自动查重

This commit is contained in:
wangjinlei
2026-05-18 18:34:48 +08:00
parent f99dbc6397
commit 53e6ddbd9e
7 changed files with 105 additions and 44 deletions

View File

@@ -2692,8 +2692,10 @@ class EmailClient extends Base
* 根据 expert_type 分发选人逻辑
*
* - expert_type = 5从 t_expert 库选人(按领域 / 国家 / 频次)
* 频次e.ltime成功发送后回写+ t_promotion_email_log 中「待发送 state=0 的入队时间 ctime」
* (避免「今日生成任务明日发送」时 ltime 未变导致连续两天选到同一拨人)
* - expert_type ∈ {1,2,3,4}:从系统内部表选人(主编/编委/青年编委/作者fields 与国家筛选忽略;
* 频次按 t_promotion_email_log 中相同 expert_type 维度的最近发送时间扣除
* 频次按 t_promotion_email_log:已发/退信用 send_time待发送队列用 ctime同上
*
* 返回行 shape 已对齐:
* - type=5 行包含 e.* 全部字段(含 expert_id、country_id、ltime 等)
@@ -2741,9 +2743,25 @@ class EmailClient extends Base
if ($noRepeatDays > 0) {
$cutoff = time() - ($noRepeatDays * 86400);
// ltime成功发出后回写与 log 中 state=1 在「已送达」上部分重叠,但保留 ltime 可走索引、且退信 state=3 未必回写 ltime。
$query->where(function ($q) use ($cutoff) {
$q->where('e.ltime', 0)->whereOr('e.ltime', '<', $cutoff);
});
// 一条 NOT EXISTS待发(state=0 按 ctime) 或 已发/退信(按 send_time),避免两段相同 join 的重复感
$query->where(function ($q) use ($cutoff) {
$q->table('t_promotion_email_log')->alias('pl')
->join('t_promotion_task pt', 'pt.task_id = pl.task_id', 'inner')
->where('pt.expert_type', 5)
->where('pl.expert_id', '>', 0)
->whereRaw('pl.expert_id = e.expert_id')
->where(function ($w) use ($cutoff) {
$w->where(function ($a) use ($cutoff) {
$a->where('pl.state', 0)->where('pl.ctime', '>', $cutoff);
})->whereOr(function ($b) use ($cutoff) {
$b->where('pl.state', 'in', [1, 3])->where('pl.send_time', '>', $cutoff);
});
});
}, 'not exists');
}
$countryIds = $this->resolveCountryIds($targetPartitions, $targetCountryIds);
@@ -2762,7 +2780,7 @@ class EmailClient extends Base
* 系统内部受众选人(编委 / 主编 / 青年编委 / 作者)
* 仅按 期刊 + 频次 过滤;领域 / 国家无关
*
* 频次:扣除「同 expert_type 维度no_repeat_days 内已经发出 (state=1) 或退信 (state=3) 的人
* 频次:扣除「同 expert_type 下no_repeat_days 内 (1) 已发出或退信,或 (2) 仍在队列待发送state=0按 ctime的人
*
* @param int $expertType 1=主编 2=编委 3=青年编委 4=作者
* @param int $journalId
@@ -2807,15 +2825,18 @@ class EmailClient extends Base
if ($noRepeatDays > 0) {
$cutoff = intval(time() - ($noRepeatDays * 86400));
$expertTypeSafe = intval($expertType);
// 关联子查询:相对于 NOT IN避免把全部已发 user_id 拉到 PHP 再拼回 SQL
// 配合 t_promotion_email_log(user_id, send_time) 复合索引做半连接探针,常量时间。
$query->where(function ($q) use ($expertTypeSafe, $cutoff) {
$q->table('t_promotion_email_log')->alias('l')
->join('t_promotion_task t', 't.task_id = l.task_id', 'inner')
->where('t.expert_type', $expertTypeSafe)
->where('l.state', 'in', [1, 3])
->where('l.send_time', '>', $cutoff)
->whereRaw('l.user_id = u.user_id');
->join('t_promotion_task t', 't.task_id = l.task_id', 'inner')
->where('t.expert_type', $expertTypeSafe)
->whereRaw('l.user_id = u.user_id')
->where(function ($w) use ($cutoff) {
$w->where(function ($a) use ($cutoff) {
$a->where('l.state', 0)->where('l.ctime', '>', $cutoff);
})->whereOr(function ($b) use ($cutoff) {
$b->where('l.state', 'in', [1, 3])->where('l.send_time', '>', $cutoff);
});
});
}, 'not exists');
}

View File

@@ -81,10 +81,49 @@ class Plagiarism extends Base
return jsonError($rule->getError());
}
$tii = new TurnitinService();
$res = $tii->getSubmission($data['id']);
$res = $tii->parseSubmissionIngestState($data['id']);
return jsonSuccess($res);
}
public function testcconewait(){
$data = $this->request->post();
$rule = new Validate([
"checkId"=>"require"
]);
if(!$rule->check($data)){
return jsonError($rule->getError());
}
$svc = new PlagiarismService();
$res = $svc->runIngestPollStep($data['checkId']);
return jsonSuccess($res);
}
public function testcconesimilar(){
$data = $this->request->post();
$rule = new Validate([
"checkId"=>"require"
]);
if(!$rule->check($data)){
return jsonError($rule->getError());
}
$svc = new PlagiarismService();
$res = $svc->runTriggerSimilarityOnly($data['checkId']);
return jsonSuccess($res);
}
public function testcconelast(){
$data = $this->request->post();
$rule = new Validate([
"checkId"=>"require"
]);
if(!$rule->check($data)){
return jsonError($rule->getError());
}
$svc = new PlagiarismService();
$re = $svc->runPollStatus($data['checkId']);
return jsonSuccess($re);
}
/**
* 重试 = 提交一次新查重(保留历史)
*/

View File

@@ -23,15 +23,16 @@ class PlagiarismPoll
public function fire(Job $job, $data)
{
$checkId = isset($data['check_id']) ? intval($data['check_id']) : 0;
$attempt = isset($data['attempt']) ? intval($data['attempt']) : 1;
if ($checkId <= 0) {
$job->delete();
return;
}
// $checkId = isset($data['check_id']) ? intval($data['check_id']) : 0;
// $attempt = isset($data['attempt']) ? intval($data['attempt']) : 1;
//
// if ($checkId <= 0) {
// $job->delete();
// return;
// }
$svc = new PlagiarismService();
$svc->runPollStatus($checkId, $attempt);
$svc->log("PlagiarismPoll job is running");
// $svc->runPollStatus($checkId, $attempt);
$job->delete();
}
}

View File

@@ -29,7 +29,7 @@ class PlagiarismRun
return;
}
$svc = new PlagiarismService();
$svc->log('PlagiarismRun job act check_id=' . $checkId);
$svc->log('PlagiarismRun job is running');
try {
$svc->runUploadOnly($checkId, $filePath);
} catch (\Throwable $e) {

View File

@@ -23,6 +23,7 @@ class PlagiarismTriggerSimilarity
return;
}
$svc = new PlagiarismService();
$svc->log("PlagiarismTriggerSimilarity job is running");
try {
$svc->runTriggerSimilarityOnly($checkId, $ingestAttempt);
} catch (\Throwable $e) {

View File

@@ -23,6 +23,7 @@ class PlagiarismWaitIngest
return;
}
$svc = new PlagiarismService();
$svc->log("PlagiarismWaitIngest job is running");
try {
$svc->runIngestPollStep($checkId, $attempt);
} catch (\Throwable $e) {

View File

@@ -73,7 +73,7 @@ class PlagiarismService
->where('article_id', $articleId)
->value('journal_id');
$this->log("plagiarism submit is running");
$now = time();
$checkId = Db::name('plagiarism_check')->insertGetId([
'article_id' => $articleId,
@@ -86,7 +86,6 @@ class PlagiarismService
'ctime' => $now,
'utime' => $now,
]);
$this->log("submit service act");
Queue::push(
'app\\api\\job\\PlagiarismRun',
['check_id' => $checkId, 'file_path' => $filePath],
@@ -132,8 +131,6 @@ class PlagiarismService
]);
$tii->uploadFile($submissionId, $filePath, basename($filePath));
$this->log('runUploadOnly uploaded submission_id=' . $submissionId);
$firstDelay = $this->ingestChainFirstDelaySec();
Queue::later(
$firstDelay,
@@ -153,7 +150,7 @@ class PlagiarismService
$this->markFailed($checkId, '[ingest] tii_submission_id empty');
return;
}
$this->log("runIngestPollStep is running");
$maxAttempts = $this->ingestChainMaxAttempts();
$interval = $this->ingestChainPollIntervalSec();
$tii = new TurnitinService();
@@ -198,6 +195,7 @@ class PlagiarismService
return;
}
$this->log("runTriggerSimilarityOnly is running");
$tii = new TurnitinService();
$sid = $check['tii_submission_id'];
@@ -254,7 +252,7 @@ class PlagiarismService
return;
}
try {
// try {
$tii = new TurnitinService();
$statusResp = $tii->getSimilarityStatus($check['tii_submission_id']);
$status = isset($statusResp['status']) ? strtoupper($statusResp['status']) : '';
@@ -301,24 +299,24 @@ class PlagiarismService
['check_id' => $checkId, 'attempt' => $attempt + 1],
self::QUEUE_CHAIN
);
} catch (\Throwable $e) {
// 网络抖动不要直接 fail给一定容错次数
if ($attempt < self::MAX_POLL_ATTEMPTS) {
Queue::later(
self::POLL_INTERVAL,
self::JOB_POLL,
['check_id' => $checkId, 'attempt' => $attempt + 1],
self::QUEUE_CHAIN
);
$this->updateCheck($checkId, [
'attempts' => $attempt,
'error_msg' => '[poll] transient: ' . $e->getMessage(),
]);
return;
}
$this->markFailed($checkId, '[poll] exhausted: ' . $e->getMessage());
throw $e;
}
// } catch (\Throwable $e) {
// // 网络抖动不要直接 fail给一定容错次数
// if ($attempt < self::MAX_POLL_ATTEMPTS) {
// Queue::later(
// self::POLL_INTERVAL,
// self::JOB_POLL,
// ['check_id' => $checkId, 'attempt' => $attempt + 1],
// self::QUEUE_CHAIN
// );
// $this->updateCheck($checkId, [
// 'attempts' => $attempt,
// 'error_msg' => '[poll] transient: ' . $e->getMessage(),
// ]);
// return;
// }
// $this->markFailed($checkId, '[poll] exhausted: ' . $e->getMessage());
// throw $e;
// }
}
/**
@@ -437,7 +435,7 @@ class PlagiarismService
private function ingestChainPollIntervalSec()
{
return max(5, (int) Env::get('turnitin.ingest_chain_poll_interval', 15));
return max(60, (int) Env::get('turnitin.ingest_chain_poll_interval', 15));
}
private function ingestChainMaxAttempts()