自动查重
This commit is contained in:
@@ -12,10 +12,14 @@ use think\Exception;
|
||||
* 并维护 t_plagiarism_check 状态机。
|
||||
*
|
||||
* 状态流:
|
||||
* submit() → state=1(上传中),入队 PlagiarismRun
|
||||
* PlagiarismRun.fire → 上传 + 触发 similarity → state=2(比对中),入队 PlagiarismPoll
|
||||
* PlagiarismPoll.fire → 轮询 status,完成后下载 PDF → state=3(完成)
|
||||
* 任意环节抛异常 → state=4(失败),写 error_msg
|
||||
* submit() → state=1(上传中),入队 PlagiarismRun
|
||||
* PlagiarismRun → 创建 submission + 上传文件 → 入队 PlagiarismWaitIngest
|
||||
* PlagiarismWaitIngest → 单次 GET submission 状态;就绪则入队 PlagiarismTriggerSimilarity,否则延迟再入队
|
||||
* PlagiarismTriggerSimilarity → PUT similarity → state=2(比对中),入队 PlagiarismPoll
|
||||
* PlagiarismPoll → 轮询 similarity,完成后下载 PDF → state=3(完成)
|
||||
* 任意环节抛异常 → state=4(失败),写 error_msg
|
||||
*
|
||||
* Worker:请用 `queue:work` 消费队列 **plagiarism**(整条链与轮询均在此队列;若此前单独监听 PlagiarismRun / PlagiarismPoll,需改为 plagiarism)。
|
||||
*/
|
||||
class PlagiarismService
|
||||
{
|
||||
@@ -24,6 +28,13 @@ class PlagiarismService
|
||||
*/
|
||||
const REPORT_DIR = 'public/plagiarism';
|
||||
|
||||
/** Run / WaitIngest / TriggerSimilarity / Poll 共用队列名 */
|
||||
const QUEUE_CHAIN = 'plagiarism';
|
||||
|
||||
const JOB_WAIT_INGEST = 'app\\api\\job\\PlagiarismWaitIngest';
|
||||
const JOB_TRIGGER_SIM = 'app\\api\\job\\PlagiarismTriggerSimilarity';
|
||||
const JOB_POLL = 'app\\api\\job\\PlagiarismPoll';
|
||||
|
||||
/**
|
||||
* 轮询间隔(秒)。Turnitin 一般 1-5 分钟出结果,30 秒一次比较合适
|
||||
*/
|
||||
@@ -76,78 +87,160 @@ class PlagiarismService
|
||||
'utime' => $now,
|
||||
]);
|
||||
$this->log("submit service act");
|
||||
// 入队执行:上传 + 触发 similarity
|
||||
Queue::push(
|
||||
'app\\api\\job\\PlagiarismRun',
|
||||
['check_id' => $checkId, 'file_path' => $filePath],
|
||||
'PlagiarismRun'
|
||||
self::QUEUE_CHAIN
|
||||
);
|
||||
|
||||
return (int)$checkId;
|
||||
}
|
||||
|
||||
/**
|
||||
* Job 调用:上传文件到 Turnitin 并触发 similarity,然后入队 PlagiarismPoll
|
||||
* Job 调用:仅创建 submission + 上传文件,随后由 PlagiarismWaitIngest 链式轮询 ingest,再 PlagiarismTriggerSimilarity。
|
||||
*/
|
||||
public function runUploadOnly($checkId, $filePath)
|
||||
{
|
||||
$check = $this->mustGetCheck($checkId);
|
||||
$this->log('runUploadOnly start check_id=' . $checkId);
|
||||
$tii = new TurnitinService();
|
||||
|
||||
$articleTitle = (string) Db::name('article')
|
||||
->where('article_id', $check['article_id'])
|
||||
->value('title');
|
||||
if ($articleTitle === '') {
|
||||
$articleTitle = 'Article #' . $check['article_id'];
|
||||
}
|
||||
|
||||
$createResp = $tii->createSubmission([
|
||||
'title' => mb_substr($articleTitle, 0, 250),
|
||||
'owner' => 'editor_' . $check['triggered_by'],
|
||||
'submitter' => 'editor_' . $check['triggered_by'],
|
||||
'metadata' => [
|
||||
'article_id' => (string) $check['article_id'],
|
||||
'check_id' => (string) $check['check_id'],
|
||||
],
|
||||
]);
|
||||
$submissionId = isset($createResp['id']) ? $createResp['id'] : '';
|
||||
if ($submissionId === '') {
|
||||
throw new Exception('Turnitin createSubmission returned empty id: ' . json_encode($createResp));
|
||||
}
|
||||
|
||||
$this->updateCheck($checkId, [
|
||||
'tii_submission_id' => $submissionId,
|
||||
'raw_response' => json_encode($createResp, JSON_UNESCAPED_UNICODE),
|
||||
]);
|
||||
|
||||
$tii->uploadFile($submissionId, $filePath, basename($filePath));
|
||||
$this->log('runUploadOnly uploaded submission_id=' . $submissionId);
|
||||
|
||||
$firstDelay = $this->ingestChainFirstDelaySec();
|
||||
Queue::later(
|
||||
$firstDelay,
|
||||
self::JOB_WAIT_INGEST,
|
||||
['check_id' => $checkId, 'attempt' => 1],
|
||||
self::QUEUE_CHAIN
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* 单次 ingest 检查(由 PlagiarismWaitIngest 调用)。不在本方法内 sleep 长循环。
|
||||
*/
|
||||
public function runIngestPollStep($checkId, $attempt = 1)
|
||||
{
|
||||
$check = $this->mustGetCheck($checkId);
|
||||
if (empty($check['tii_submission_id'])) {
|
||||
$this->markFailed($checkId, '[ingest] tii_submission_id empty');
|
||||
return;
|
||||
}
|
||||
|
||||
$maxAttempts = $this->ingestChainMaxAttempts();
|
||||
$interval = $this->ingestChainPollIntervalSec();
|
||||
$tii = new TurnitinService();
|
||||
|
||||
try {
|
||||
$parsed = $tii->parseSubmissionIngestState($check['tii_submission_id']);
|
||||
} catch (\Throwable $e) {
|
||||
if ($attempt >= $maxAttempts) {
|
||||
$this->markFailed($checkId, '[ingest] request failed after ' . $attempt . ' tries: ' . $e->getMessage());
|
||||
return;
|
||||
}
|
||||
Queue::later($interval, self::JOB_WAIT_INGEST, ['check_id' => $checkId, 'attempt' => $attempt + 1], self::QUEUE_CHAIN);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!empty($parsed['failed'])) {
|
||||
$this->markFailed($checkId, '[ingest] submission failed status=' . $parsed['status'] . ' ' . $parsed['snippet']);
|
||||
return;
|
||||
}
|
||||
if (!empty($parsed['ready'])) {
|
||||
Queue::push(self::JOB_TRIGGER_SIM, ['check_id' => $checkId, 'ingest_attempt' => $attempt], self::QUEUE_CHAIN);
|
||||
return;
|
||||
}
|
||||
if ($attempt >= $maxAttempts) {
|
||||
$this->markFailed($checkId, '[ingest] timeout last_status=' . ($parsed['status'] !== '' ? $parsed['status'] : '(empty)'));
|
||||
return;
|
||||
}
|
||||
Queue::later($interval, self::JOB_WAIT_INGEST, ['check_id' => $checkId, 'attempt' => $attempt + 1], self::QUEUE_CHAIN);
|
||||
}
|
||||
|
||||
/**
|
||||
* 在 ingest 就绪后触发 similarity,并入队 PlagiarismPoll。
|
||||
* 若仍返回 409,则重新入队 PlagiarismWaitIngest(不抛异常,避免误标失败)。
|
||||
*
|
||||
* @param int $ingestAttempt 来自 WaitIngest 的 attempt,供 409 时继续轮询
|
||||
*/
|
||||
public function runTriggerSimilarityOnly($checkId, $ingestAttempt = 1)
|
||||
{
|
||||
$check = $this->mustGetCheck($checkId);
|
||||
if (empty($check['tii_submission_id'])) {
|
||||
$this->markFailed($checkId, '[similarity] tii_submission_id empty');
|
||||
return;
|
||||
}
|
||||
|
||||
$tii = new TurnitinService();
|
||||
$sid = $check['tii_submission_id'];
|
||||
|
||||
try {
|
||||
$simResp = $tii->triggerSimilarity($sid);
|
||||
} catch (\Throwable $e) {
|
||||
$msg = $e->getMessage();
|
||||
$is409 = (stripos($msg, '409') !== false || stripos($msg, 'CONFLICT') !== false)
|
||||
&& (stripos($msg, 'not been completed') !== false || stripos($msg, 'completed yet') !== false);
|
||||
if ($is409) {
|
||||
$maxAttempts = $this->ingestChainMaxAttempts();
|
||||
$next = $ingestAttempt + 1;
|
||||
if ($next > $maxAttempts) {
|
||||
$this->markFailed($checkId, '[similarity] still not ready after ingest attempts: ' . $msg);
|
||||
return;
|
||||
}
|
||||
$delay = max($this->ingestChainPollIntervalSec(), 20);
|
||||
Queue::later($delay, self::JOB_WAIT_INGEST, ['check_id' => $checkId, 'attempt' => $next], self::QUEUE_CHAIN);
|
||||
return;
|
||||
}
|
||||
throw $e;
|
||||
}
|
||||
|
||||
$this->updateCheck($checkId, [
|
||||
'state' => 2,
|
||||
'tii_report_status' => 'PROCESSING',
|
||||
'raw_response' => json_encode($simResp, JSON_UNESCAPED_UNICODE),
|
||||
]);
|
||||
|
||||
Queue::later(
|
||||
self::POLL_INTERVAL,
|
||||
self::JOB_POLL,
|
||||
['check_id' => $checkId, 'attempt' => 1],
|
||||
self::QUEUE_CHAIN
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated 与 runUploadOnly 等价;长耗时 ingest 已拆到队列 PlagiarismWaitIngest,勿在本方法内同步 wait。
|
||||
*/
|
||||
public function runUploadAndTrigger($checkId, $filePath)
|
||||
{
|
||||
$check = $this->mustGetCheck($checkId);
|
||||
$this->log("runUploadAndTrigger is act0");
|
||||
try {
|
||||
$tii = new TurnitinService();
|
||||
|
||||
// 1. 创建 submission
|
||||
$articleTitle = (string) Db::name('article')
|
||||
->where('article_id', $check['article_id'])
|
||||
->value('title');
|
||||
if ($articleTitle === '') {
|
||||
$articleTitle = 'Article #' . $check['article_id'];
|
||||
}
|
||||
$this->log("runUploadAndTrigger is act1");
|
||||
$createResp = $tii->createSubmission([
|
||||
'title' => mb_substr($articleTitle, 0, 250),
|
||||
'owner' => 'editor_' . $check['triggered_by'],
|
||||
'submitter' => 'editor_' . $check['triggered_by'],
|
||||
'metadata' => [
|
||||
'article_id' => (string)$check['article_id'],
|
||||
'check_id' => (string)$check['check_id'],
|
||||
],
|
||||
]);
|
||||
$submissionId = isset($createResp['id']) ? $createResp['id'] : '';
|
||||
if ($submissionId === '') {
|
||||
throw new Exception('Turnitin createSubmission returned empty id: ' . json_encode($createResp));
|
||||
}
|
||||
|
||||
$this->updateCheck($checkId, [
|
||||
'tii_submission_id' => $submissionId,
|
||||
'raw_response' => json_encode($createResp, JSON_UNESCAPED_UNICODE),
|
||||
]);
|
||||
$this->log("runUploadAndTrigger is act2");
|
||||
// 2. 上传文件
|
||||
$tii->uploadFile($submissionId, $filePath, basename($filePath));
|
||||
|
||||
// 3. 触发 similarity
|
||||
$simResp = $tii->triggerSimilarity($submissionId);
|
||||
|
||||
$this->updateCheck($checkId, [
|
||||
'state' => 2, // 比对中
|
||||
'tii_report_status' => 'PROCESSING',
|
||||
'raw_response' => json_encode($simResp, JSON_UNESCAPED_UNICODE),
|
||||
]);
|
||||
|
||||
$this->log("runUploadAndTrigger is act3");
|
||||
|
||||
// 4. 排队首次轮询(晚一点开始,让 Turnitin 先处理)
|
||||
Queue::later(
|
||||
self::POLL_INTERVAL,
|
||||
'app\\api\\job\\PlagiarismPoll',
|
||||
['check_id' => $checkId, 'attempt' => 1],
|
||||
'PlagiarismPoll'
|
||||
);
|
||||
} catch (\Throwable $e) {
|
||||
$this->markFailed($checkId, '[upload] ' . $e->getMessage());
|
||||
throw $e;
|
||||
}
|
||||
$this->runUploadOnly($checkId, $filePath);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -204,18 +297,18 @@ class PlagiarismService
|
||||
}
|
||||
Queue::later(
|
||||
self::POLL_INTERVAL,
|
||||
'app\\api\\job\\PlagiarismPoll',
|
||||
self::JOB_POLL,
|
||||
['check_id' => $checkId, 'attempt' => $attempt + 1],
|
||||
'plagiarism'
|
||||
self::QUEUE_CHAIN
|
||||
);
|
||||
} catch (\Throwable $e) {
|
||||
// 网络抖动不要直接 fail,给一定容错次数
|
||||
if ($attempt < self::MAX_POLL_ATTEMPTS) {
|
||||
Queue::later(
|
||||
self::POLL_INTERVAL,
|
||||
'app\\api\\job\\PlagiarismPoll',
|
||||
self::JOB_POLL,
|
||||
['check_id' => $checkId, 'attempt' => $attempt + 1],
|
||||
'plagiarism'
|
||||
self::QUEUE_CHAIN
|
||||
);
|
||||
$this->updateCheck($checkId, [
|
||||
'attempts' => $attempt,
|
||||
@@ -328,15 +421,30 @@ class PlagiarismService
|
||||
Db::name('plagiarism_check')->where('check_id', $checkId)->update($data);
|
||||
}
|
||||
|
||||
private function markFailed($checkId, $errMsg)
|
||||
public function markFailed($checkId, $errMsg)
|
||||
{
|
||||
$this->log("markFailed act");
|
||||
$this->log('markFailed check_id=' . $checkId);
|
||||
$this->updateCheck($checkId, [
|
||||
'state' => 4,
|
||||
'error_msg' => mb_substr($errMsg, 0, 1000),
|
||||
]);
|
||||
}
|
||||
|
||||
private function ingestChainFirstDelaySec()
|
||||
{
|
||||
return max(3, (int) Env::get('turnitin.ingest_chain_first_delay', 10));
|
||||
}
|
||||
|
||||
private function ingestChainPollIntervalSec()
|
||||
{
|
||||
return max(5, (int) Env::get('turnitin.ingest_chain_poll_interval', 15));
|
||||
}
|
||||
|
||||
private function ingestChainMaxAttempts()
|
||||
{
|
||||
return max(10, (int) Env::get('turnitin.ingest_chain_max_attempts', 80));
|
||||
}
|
||||
|
||||
/**
|
||||
* 从 t_article_file 找到投稿主稿(manuscirpt)的本地绝对路径。
|
||||
* file_url 在系统里可能是 URL 或相对路径,调用方负责保证可读。
|
||||
|
||||
Reference in New Issue
Block a user