自动查重
This commit is contained in:
@@ -2,9 +2,11 @@
|
||||
|
||||
namespace app\api\controller;
|
||||
|
||||
use app\common\TurnitinService;
|
||||
use think\Db;
|
||||
use think\Response;
|
||||
use app\common\PlagiarismService;
|
||||
use think\Validate;
|
||||
|
||||
/**
|
||||
* 论文查重(Turnitin / Crossref Similarity Check)控制器。
|
||||
@@ -51,7 +53,6 @@ class Plagiarism extends Base
|
||||
$localPath = $fileUrl !== ''
|
||||
? $svc->resolveFileUrlToLocal($fileUrl)
|
||||
: $svc->locateArticleManuscript($articleId);
|
||||
echo $localPath;
|
||||
$checkId = $svc->submit($articleId, $localPath, $editorId, 'manual');
|
||||
return jsonSuccess(['check_id' => $checkId]);
|
||||
} catch (\Throwable $e) {
|
||||
@@ -60,12 +61,28 @@ class Plagiarism extends Base
|
||||
}
|
||||
|
||||
|
||||
public function testccone(){
|
||||
/**
|
||||
* 调试:与线上一致走队列链(upload → wait ingest → trigger → poll),需 worker 消费 plagiarism 队列。
|
||||
*/
|
||||
public function testccone()
|
||||
{
|
||||
$svc = new PlagiarismService();
|
||||
$checkId = 9;
|
||||
$filePath = "/home/wwwroot/api.tmrjournals.com/public/manuscirpt/20260509/6832a56e8ace38fe99df390ab5221deb.docx";
|
||||
$svc->runUploadAndTrigger($checkId,$filePath);
|
||||
$filePath = '/home/wwwroot/api.tmrjournals.com/public/manuscirpt/20260509/6832a56e8ace38fe99df390ab5221deb.docx';
|
||||
$svc->runUploadOnly($checkId, $filePath);
|
||||
}
|
||||
|
||||
public function testcconegetstatus(){
|
||||
$data = $this->request->post();
|
||||
$rule = new Validate([
|
||||
"id"=>"require"
|
||||
]);
|
||||
if(!$rule->check($data)){
|
||||
return jsonError($rule->getError());
|
||||
}
|
||||
$tii = new TurnitinService();
|
||||
$res = $tii->getSubmission($data['id']);
|
||||
return jsonSuccess($res);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -6,9 +6,9 @@ use think\queue\Job;
|
||||
use app\common\PlagiarismService;
|
||||
|
||||
/**
|
||||
* 队列任务:上传论文到 Turnitin + 触发 similarity 检测。
|
||||
* 队列任务:创建 Turnitin submission 并上传原稿;ingest 轮询与触发 similarity 由后续 Job 完成。
|
||||
*
|
||||
* 完成后会自动入队 PlagiarismPoll 进行后续轮询。
|
||||
* 链:PlagiarismRun → PlagiarismWaitIngest → PlagiarismTriggerSimilarity → PlagiarismPoll
|
||||
*
|
||||
* data:
|
||||
* - check_id t_plagiarism_check.check_id
|
||||
@@ -29,8 +29,12 @@ class PlagiarismRun
|
||||
return;
|
||||
}
|
||||
$svc = new PlagiarismService();
|
||||
$svc->log("PlagiarismRun job act!!");
|
||||
$svc->runUploadAndTrigger($checkId, $filePath);
|
||||
$svc->log('PlagiarismRun job act check_id=' . $checkId);
|
||||
try {
|
||||
$svc->runUploadOnly($checkId, $filePath);
|
||||
} catch (\Throwable $e) {
|
||||
$svc->markFailed($checkId, '[upload] ' . $e->getMessage());
|
||||
}
|
||||
$job->delete();
|
||||
}
|
||||
}
|
||||
|
||||
33
application/api/job/PlagiarismTriggerSimilarity.php
Normal file
33
application/api/job/PlagiarismTriggerSimilarity.php
Normal file
@@ -0,0 +1,33 @@
|
||||
<?php
|
||||
|
||||
namespace app\api\job;
|
||||
|
||||
use think\queue\Job;
|
||||
use app\common\PlagiarismService;
|
||||
|
||||
/**
|
||||
* 队列任务:在 ingest 就绪后调用 PUT /similarity,并入队 PlagiarismPoll。
|
||||
*
|
||||
* data:
|
||||
* - check_id t_plagiarism_check.check_id
|
||||
* - ingest_attempt 来自 PlagiarismWaitIngest 的 attempt(409 时用于继续轮询 ingest)
|
||||
*/
|
||||
class PlagiarismTriggerSimilarity
|
||||
{
|
||||
public function fire(Job $job, $data)
|
||||
{
|
||||
$checkId = isset($data['check_id']) ? (int) $data['check_id'] : 0;
|
||||
$ingestAttempt = isset($data['ingest_attempt']) ? (int) $data['ingest_attempt'] : 1;
|
||||
if ($checkId <= 0) {
|
||||
$job->delete();
|
||||
return;
|
||||
}
|
||||
$svc = new PlagiarismService();
|
||||
try {
|
||||
$svc->runTriggerSimilarityOnly($checkId, $ingestAttempt);
|
||||
} catch (\Throwable $e) {
|
||||
$svc->markFailed($checkId, '[similarity] ' . $e->getMessage());
|
||||
}
|
||||
$job->delete();
|
||||
}
|
||||
}
|
||||
33
application/api/job/PlagiarismWaitIngest.php
Normal file
33
application/api/job/PlagiarismWaitIngest.php
Normal file
@@ -0,0 +1,33 @@
|
||||
<?php
|
||||
|
||||
namespace app\api\job;
|
||||
|
||||
use think\queue\Job;
|
||||
use app\common\PlagiarismService;
|
||||
|
||||
/**
|
||||
* 队列任务:单次查询 Turnitin submission 是否解析完成(ingest),未完成则延迟再次入队。
|
||||
*
|
||||
* data:
|
||||
* - check_id t_plagiarism_check.check_id
|
||||
* - attempt 从 1 递增
|
||||
*/
|
||||
class PlagiarismWaitIngest
|
||||
{
|
||||
public function fire(Job $job, $data)
|
||||
{
|
||||
$checkId = isset($data['check_id']) ? (int) $data['check_id'] : 0;
|
||||
$attempt = isset($data['attempt']) ? (int) $data['attempt'] : 1;
|
||||
if ($checkId <= 0) {
|
||||
$job->delete();
|
||||
return;
|
||||
}
|
||||
$svc = new PlagiarismService();
|
||||
try {
|
||||
$svc->runIngestPollStep($checkId, $attempt);
|
||||
} catch (\Throwable $e) {
|
||||
$svc->markFailed($checkId, '[ingest] ' . $e->getMessage());
|
||||
}
|
||||
$job->delete();
|
||||
}
|
||||
}
|
||||
@@ -12,10 +12,14 @@ use think\Exception;
|
||||
* 并维护 t_plagiarism_check 状态机。
|
||||
*
|
||||
* 状态流:
|
||||
* submit() → state=1(上传中),入队 PlagiarismRun
|
||||
* PlagiarismRun.fire → 上传 + 触发 similarity → state=2(比对中),入队 PlagiarismPoll
|
||||
* PlagiarismPoll.fire → 轮询 status,完成后下载 PDF → state=3(完成)
|
||||
* 任意环节抛异常 → state=4(失败),写 error_msg
|
||||
* submit() → state=1(上传中),入队 PlagiarismRun
|
||||
* PlagiarismRun → 创建 submission + 上传文件 → 入队 PlagiarismWaitIngest
|
||||
* PlagiarismWaitIngest → 单次 GET submission 状态;就绪则入队 PlagiarismTriggerSimilarity,否则延迟再入队
|
||||
* PlagiarismTriggerSimilarity → PUT similarity → state=2(比对中),入队 PlagiarismPoll
|
||||
* PlagiarismPoll → 轮询 similarity,完成后下载 PDF → state=3(完成)
|
||||
* 任意环节抛异常 → state=4(失败),写 error_msg
|
||||
*
|
||||
* Worker:请用 `queue:work` 消费队列 **plagiarism**(整条链与轮询均在此队列;若此前单独监听 PlagiarismRun / PlagiarismPoll,需改为 plagiarism)。
|
||||
*/
|
||||
class PlagiarismService
|
||||
{
|
||||
@@ -24,6 +28,13 @@ class PlagiarismService
|
||||
*/
|
||||
const REPORT_DIR = 'public/plagiarism';
|
||||
|
||||
/** Run / WaitIngest / TriggerSimilarity / Poll 共用队列名 */
|
||||
const QUEUE_CHAIN = 'plagiarism';
|
||||
|
||||
const JOB_WAIT_INGEST = 'app\\api\\job\\PlagiarismWaitIngest';
|
||||
const JOB_TRIGGER_SIM = 'app\\api\\job\\PlagiarismTriggerSimilarity';
|
||||
const JOB_POLL = 'app\\api\\job\\PlagiarismPoll';
|
||||
|
||||
/**
|
||||
* 轮询间隔(秒)。Turnitin 一般 1-5 分钟出结果,30 秒一次比较合适
|
||||
*/
|
||||
@@ -76,78 +87,160 @@ class PlagiarismService
|
||||
'utime' => $now,
|
||||
]);
|
||||
$this->log("submit service act");
|
||||
// 入队执行:上传 + 触发 similarity
|
||||
Queue::push(
|
||||
'app\\api\\job\\PlagiarismRun',
|
||||
['check_id' => $checkId, 'file_path' => $filePath],
|
||||
'PlagiarismRun'
|
||||
self::QUEUE_CHAIN
|
||||
);
|
||||
|
||||
return (int)$checkId;
|
||||
}
|
||||
|
||||
/**
|
||||
* Job 调用:上传文件到 Turnitin 并触发 similarity,然后入队 PlagiarismPoll
|
||||
* Job 调用:仅创建 submission + 上传文件,随后由 PlagiarismWaitIngest 链式轮询 ingest,再 PlagiarismTriggerSimilarity。
|
||||
*/
|
||||
public function runUploadOnly($checkId, $filePath)
|
||||
{
|
||||
$check = $this->mustGetCheck($checkId);
|
||||
$this->log('runUploadOnly start check_id=' . $checkId);
|
||||
$tii = new TurnitinService();
|
||||
|
||||
$articleTitle = (string) Db::name('article')
|
||||
->where('article_id', $check['article_id'])
|
||||
->value('title');
|
||||
if ($articleTitle === '') {
|
||||
$articleTitle = 'Article #' . $check['article_id'];
|
||||
}
|
||||
|
||||
$createResp = $tii->createSubmission([
|
||||
'title' => mb_substr($articleTitle, 0, 250),
|
||||
'owner' => 'editor_' . $check['triggered_by'],
|
||||
'submitter' => 'editor_' . $check['triggered_by'],
|
||||
'metadata' => [
|
||||
'article_id' => (string) $check['article_id'],
|
||||
'check_id' => (string) $check['check_id'],
|
||||
],
|
||||
]);
|
||||
$submissionId = isset($createResp['id']) ? $createResp['id'] : '';
|
||||
if ($submissionId === '') {
|
||||
throw new Exception('Turnitin createSubmission returned empty id: ' . json_encode($createResp));
|
||||
}
|
||||
|
||||
$this->updateCheck($checkId, [
|
||||
'tii_submission_id' => $submissionId,
|
||||
'raw_response' => json_encode($createResp, JSON_UNESCAPED_UNICODE),
|
||||
]);
|
||||
|
||||
$tii->uploadFile($submissionId, $filePath, basename($filePath));
|
||||
$this->log('runUploadOnly uploaded submission_id=' . $submissionId);
|
||||
|
||||
$firstDelay = $this->ingestChainFirstDelaySec();
|
||||
Queue::later(
|
||||
$firstDelay,
|
||||
self::JOB_WAIT_INGEST,
|
||||
['check_id' => $checkId, 'attempt' => 1],
|
||||
self::QUEUE_CHAIN
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* 单次 ingest 检查(由 PlagiarismWaitIngest 调用)。不在本方法内 sleep 长循环。
|
||||
*/
|
||||
public function runIngestPollStep($checkId, $attempt = 1)
|
||||
{
|
||||
$check = $this->mustGetCheck($checkId);
|
||||
if (empty($check['tii_submission_id'])) {
|
||||
$this->markFailed($checkId, '[ingest] tii_submission_id empty');
|
||||
return;
|
||||
}
|
||||
|
||||
$maxAttempts = $this->ingestChainMaxAttempts();
|
||||
$interval = $this->ingestChainPollIntervalSec();
|
||||
$tii = new TurnitinService();
|
||||
|
||||
try {
|
||||
$parsed = $tii->parseSubmissionIngestState($check['tii_submission_id']);
|
||||
} catch (\Throwable $e) {
|
||||
if ($attempt >= $maxAttempts) {
|
||||
$this->markFailed($checkId, '[ingest] request failed after ' . $attempt . ' tries: ' . $e->getMessage());
|
||||
return;
|
||||
}
|
||||
Queue::later($interval, self::JOB_WAIT_INGEST, ['check_id' => $checkId, 'attempt' => $attempt + 1], self::QUEUE_CHAIN);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!empty($parsed['failed'])) {
|
||||
$this->markFailed($checkId, '[ingest] submission failed status=' . $parsed['status'] . ' ' . $parsed['snippet']);
|
||||
return;
|
||||
}
|
||||
if (!empty($parsed['ready'])) {
|
||||
Queue::push(self::JOB_TRIGGER_SIM, ['check_id' => $checkId, 'ingest_attempt' => $attempt], self::QUEUE_CHAIN);
|
||||
return;
|
||||
}
|
||||
if ($attempt >= $maxAttempts) {
|
||||
$this->markFailed($checkId, '[ingest] timeout last_status=' . ($parsed['status'] !== '' ? $parsed['status'] : '(empty)'));
|
||||
return;
|
||||
}
|
||||
Queue::later($interval, self::JOB_WAIT_INGEST, ['check_id' => $checkId, 'attempt' => $attempt + 1], self::QUEUE_CHAIN);
|
||||
}
|
||||
|
||||
/**
|
||||
* 在 ingest 就绪后触发 similarity,并入队 PlagiarismPoll。
|
||||
* 若仍返回 409,则重新入队 PlagiarismWaitIngest(不抛异常,避免误标失败)。
|
||||
*
|
||||
* @param int $ingestAttempt 来自 WaitIngest 的 attempt,供 409 时继续轮询
|
||||
*/
|
||||
public function runTriggerSimilarityOnly($checkId, $ingestAttempt = 1)
|
||||
{
|
||||
$check = $this->mustGetCheck($checkId);
|
||||
if (empty($check['tii_submission_id'])) {
|
||||
$this->markFailed($checkId, '[similarity] tii_submission_id empty');
|
||||
return;
|
||||
}
|
||||
|
||||
$tii = new TurnitinService();
|
||||
$sid = $check['tii_submission_id'];
|
||||
|
||||
try {
|
||||
$simResp = $tii->triggerSimilarity($sid);
|
||||
} catch (\Throwable $e) {
|
||||
$msg = $e->getMessage();
|
||||
$is409 = (stripos($msg, '409') !== false || stripos($msg, 'CONFLICT') !== false)
|
||||
&& (stripos($msg, 'not been completed') !== false || stripos($msg, 'completed yet') !== false);
|
||||
if ($is409) {
|
||||
$maxAttempts = $this->ingestChainMaxAttempts();
|
||||
$next = $ingestAttempt + 1;
|
||||
if ($next > $maxAttempts) {
|
||||
$this->markFailed($checkId, '[similarity] still not ready after ingest attempts: ' . $msg);
|
||||
return;
|
||||
}
|
||||
$delay = max($this->ingestChainPollIntervalSec(), 20);
|
||||
Queue::later($delay, self::JOB_WAIT_INGEST, ['check_id' => $checkId, 'attempt' => $next], self::QUEUE_CHAIN);
|
||||
return;
|
||||
}
|
||||
throw $e;
|
||||
}
|
||||
|
||||
$this->updateCheck($checkId, [
|
||||
'state' => 2,
|
||||
'tii_report_status' => 'PROCESSING',
|
||||
'raw_response' => json_encode($simResp, JSON_UNESCAPED_UNICODE),
|
||||
]);
|
||||
|
||||
Queue::later(
|
||||
self::POLL_INTERVAL,
|
||||
self::JOB_POLL,
|
||||
['check_id' => $checkId, 'attempt' => 1],
|
||||
self::QUEUE_CHAIN
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated 与 runUploadOnly 等价;长耗时 ingest 已拆到队列 PlagiarismWaitIngest,勿在本方法内同步 wait。
|
||||
*/
|
||||
public function runUploadAndTrigger($checkId, $filePath)
|
||||
{
|
||||
$check = $this->mustGetCheck($checkId);
|
||||
$this->log("runUploadAndTrigger is act0");
|
||||
try {
|
||||
$tii = new TurnitinService();
|
||||
|
||||
// 1. 创建 submission
|
||||
$articleTitle = (string) Db::name('article')
|
||||
->where('article_id', $check['article_id'])
|
||||
->value('title');
|
||||
if ($articleTitle === '') {
|
||||
$articleTitle = 'Article #' . $check['article_id'];
|
||||
}
|
||||
$this->log("runUploadAndTrigger is act1");
|
||||
$createResp = $tii->createSubmission([
|
||||
'title' => mb_substr($articleTitle, 0, 250),
|
||||
'owner' => 'editor_' . $check['triggered_by'],
|
||||
'submitter' => 'editor_' . $check['triggered_by'],
|
||||
'metadata' => [
|
||||
'article_id' => (string)$check['article_id'],
|
||||
'check_id' => (string)$check['check_id'],
|
||||
],
|
||||
]);
|
||||
$submissionId = isset($createResp['id']) ? $createResp['id'] : '';
|
||||
if ($submissionId === '') {
|
||||
throw new Exception('Turnitin createSubmission returned empty id: ' . json_encode($createResp));
|
||||
}
|
||||
|
||||
$this->updateCheck($checkId, [
|
||||
'tii_submission_id' => $submissionId,
|
||||
'raw_response' => json_encode($createResp, JSON_UNESCAPED_UNICODE),
|
||||
]);
|
||||
$this->log("runUploadAndTrigger is act2");
|
||||
// 2. 上传文件
|
||||
$tii->uploadFile($submissionId, $filePath, basename($filePath));
|
||||
|
||||
// 3. 触发 similarity
|
||||
$simResp = $tii->triggerSimilarity($submissionId);
|
||||
|
||||
$this->updateCheck($checkId, [
|
||||
'state' => 2, // 比对中
|
||||
'tii_report_status' => 'PROCESSING',
|
||||
'raw_response' => json_encode($simResp, JSON_UNESCAPED_UNICODE),
|
||||
]);
|
||||
|
||||
$this->log("runUploadAndTrigger is act3");
|
||||
|
||||
// 4. 排队首次轮询(晚一点开始,让 Turnitin 先处理)
|
||||
Queue::later(
|
||||
self::POLL_INTERVAL,
|
||||
'app\\api\\job\\PlagiarismPoll',
|
||||
['check_id' => $checkId, 'attempt' => 1],
|
||||
'PlagiarismPoll'
|
||||
);
|
||||
} catch (\Throwable $e) {
|
||||
$this->markFailed($checkId, '[upload] ' . $e->getMessage());
|
||||
throw $e;
|
||||
}
|
||||
$this->runUploadOnly($checkId, $filePath);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -204,18 +297,18 @@ class PlagiarismService
|
||||
}
|
||||
Queue::later(
|
||||
self::POLL_INTERVAL,
|
||||
'app\\api\\job\\PlagiarismPoll',
|
||||
self::JOB_POLL,
|
||||
['check_id' => $checkId, 'attempt' => $attempt + 1],
|
||||
'plagiarism'
|
||||
self::QUEUE_CHAIN
|
||||
);
|
||||
} catch (\Throwable $e) {
|
||||
// 网络抖动不要直接 fail,给一定容错次数
|
||||
if ($attempt < self::MAX_POLL_ATTEMPTS) {
|
||||
Queue::later(
|
||||
self::POLL_INTERVAL,
|
||||
'app\\api\\job\\PlagiarismPoll',
|
||||
self::JOB_POLL,
|
||||
['check_id' => $checkId, 'attempt' => $attempt + 1],
|
||||
'plagiarism'
|
||||
self::QUEUE_CHAIN
|
||||
);
|
||||
$this->updateCheck($checkId, [
|
||||
'attempts' => $attempt,
|
||||
@@ -328,15 +421,30 @@ class PlagiarismService
|
||||
Db::name('plagiarism_check')->where('check_id', $checkId)->update($data);
|
||||
}
|
||||
|
||||
private function markFailed($checkId, $errMsg)
|
||||
public function markFailed($checkId, $errMsg)
|
||||
{
|
||||
$this->log("markFailed act");
|
||||
$this->log('markFailed check_id=' . $checkId);
|
||||
$this->updateCheck($checkId, [
|
||||
'state' => 4,
|
||||
'error_msg' => mb_substr($errMsg, 0, 1000),
|
||||
]);
|
||||
}
|
||||
|
||||
private function ingestChainFirstDelaySec()
|
||||
{
|
||||
return max(3, (int) Env::get('turnitin.ingest_chain_first_delay', 10));
|
||||
}
|
||||
|
||||
private function ingestChainPollIntervalSec()
|
||||
{
|
||||
return max(5, (int) Env::get('turnitin.ingest_chain_poll_interval', 15));
|
||||
}
|
||||
|
||||
private function ingestChainMaxAttempts()
|
||||
{
|
||||
return max(10, (int) Env::get('turnitin.ingest_chain_max_attempts', 80));
|
||||
}
|
||||
|
||||
/**
|
||||
* 从 t_article_file 找到投稿主稿(manuscirpt)的本地绝对路径。
|
||||
* file_url 在系统里可能是 URL 或相对路径,调用方负责保证可读。
|
||||
|
||||
@@ -18,6 +18,11 @@ use think\Exception;
|
||||
* API_KEY 生成的 Bearer token
|
||||
* INTEGRATION_NAME Scope Name(创建 integration 时填的名字)
|
||||
* INTEGRATION_VERSION 自定义版本号,便于审计 e.g. 1.0.0
|
||||
* SUBMISSION_INGEST_MAX_WAIT 上传后轮询 submission 就绪的最长秒数,默认 600(仅 waitAfterUploadForSimilarity 同步用)
|
||||
* SUBMISSION_INGEST_POLL_INTERVAL 同步轮询间隔秒数,默认 3
|
||||
* INGEST_CHAIN_FIRST_DELAY 上传后首次 ingest 检查延迟秒数,默认 10(队列链)
|
||||
* INGEST_CHAIN_POLL_INTERVAL ingest 链每步间隔秒数,默认 15
|
||||
* INGEST_CHAIN_MAX_ATTEMPTS ingest 链最大步数,默认 80
|
||||
*
|
||||
* API 文档:https://developers.turnitin.com/docs/tca
|
||||
*
|
||||
@@ -36,8 +41,8 @@ class TurnitinService
|
||||
|
||||
public function __construct()
|
||||
{
|
||||
$this->baseUrl = rtrim(trim((string)Env::get('turnitin.base_url', '')), '/');
|
||||
$this->apiKey = trim((string)Env::get('turnitin.api_key', ''));
|
||||
$this->baseUrl = rtrim(trim((string)Env::get('turnitin.base_url', 'https://crossref-20794.turnitin.com/api/v1')), '/');
|
||||
$this->apiKey = trim((string)Env::get('turnitin.api_key', 'c6315e8291a4433dae09ad5efdb8a89c'));
|
||||
$this->integrationName = trim((string)Env::get('turnitin.integration_name', 'tmr'));
|
||||
$this->integrationVersion = trim((string)Env::get('turnitin.integration_version', '1.0.0'));
|
||||
|
||||
@@ -80,11 +85,13 @@ class TurnitinService
|
||||
|
||||
/**
|
||||
* 上传文件到 submission
|
||||
* PUT /submissions/{id}/original/{filename}
|
||||
*
|
||||
* TCA 文档路径为 PUT /submissions/{id}/original(文件名仅通过 Content-Disposition 传递,
|
||||
* 不要再拼在 URL 末尾;否则网关会 404,错误里常见 path 形如 //v1/submissions/.../original/xxx.docx)。
|
||||
*
|
||||
* @param string $submissionId
|
||||
* @param string $filePath 本地 PDF/DOCX 路径
|
||||
* @param string $filename 传给 Turnitin 的文件名(用于报告显示)
|
||||
* @param string $filename 传给 Turnitin 的展示文件名(默认取 basename)
|
||||
* @return array
|
||||
*/
|
||||
public function uploadFile($submissionId, $filePath, $filename = '')
|
||||
@@ -95,15 +102,20 @@ class TurnitinService
|
||||
if ($filename === '') {
|
||||
$filename = basename($filePath);
|
||||
}
|
||||
// Content-Disposition 里避免未转义的双引号
|
||||
$safeName = str_replace(['"', "\r", "\n"], '', $filename);
|
||||
if ($safeName === '') {
|
||||
$safeName = 'document.bin';
|
||||
}
|
||||
$body = file_get_contents($filePath);
|
||||
|
||||
return $this->request(
|
||||
'PUT',
|
||||
'/submissions/' . urlencode($submissionId) . '/original/' . rawurlencode($filename),
|
||||
'/submissions/' . rawurlencode($submissionId) . '/original',
|
||||
$body,
|
||||
[
|
||||
'Content-Type' => 'binary/octet-stream',
|
||||
'Content-Disposition' => 'inline; filename="' . $filename . '"',
|
||||
'Content-Type' => 'application/octet-stream',
|
||||
'Content-Disposition' => 'attachment; filename="' . $safeName . '"',
|
||||
]
|
||||
);
|
||||
}
|
||||
@@ -114,9 +126,10 @@ class TurnitinService
|
||||
*
|
||||
* @param string $submissionId
|
||||
* @param array $opts
|
||||
* - generation_settings.search_repositories 默认 ['INTERNET','PUBLICATION','CROSSREF','CROSSREF_POSTED_CONTENT','SUBMITTED_WORK']
|
||||
* - generation_settings.submission_auto_excludes bool
|
||||
* - view_settings.exclude_quotes / exclude_bibliography / exclude_citations / exclude_abstract / exclude_methods bool
|
||||
* - generation_settings.search_repositories 默认 ['INTERNET','PUBLICATION',...]
|
||||
* - generation_settings.submission_auto_excludes **字符串数组**(如 [] 或具体仓库键),不可传 boolean(否则会 400)
|
||||
* - generation_settings.auto_exclude_self_matching_scope 可选,如 'GROUP_CONTEXT'
|
||||
* - view_settings.exclude_* 布尔排除项(与 TCA 文档一致)
|
||||
* - indexing_settings.add_to_index bool 是否把本文加进 SUBMITTED_WORK 索引(一般 true)
|
||||
* @return array
|
||||
*/
|
||||
@@ -125,7 +138,8 @@ class TurnitinService
|
||||
$body = array_merge([
|
||||
'generation_settings' => [
|
||||
'search_repositories' => ['INTERNET', 'PUBLICATION', 'CROSSREF', 'CROSSREF_POSTED_CONTENT', 'SUBMITTED_WORK'],
|
||||
'submission_auto_excludes' => true,
|
||||
// 服务端类型为 List<String>,传 true 会 400:Cannot deserialize ... from Boolean
|
||||
'submission_auto_excludes' => [],
|
||||
'auto_exclude_self_matching_scope' => 'GROUP_CONTEXT',
|
||||
],
|
||||
'view_settings' => [
|
||||
@@ -140,11 +154,122 @@ class TurnitinService
|
||||
|
||||
return $this->request(
|
||||
'PUT',
|
||||
'/submissions/' . urlencode($submissionId) . '/similarity',
|
||||
'/submissions/' . rawurlencode($submissionId) . '/similarity',
|
||||
$body
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* 查询 submission 详情(上传后用于轮询是否解析完成)。
|
||||
* GET /submissions/{id}
|
||||
*
|
||||
* @return array 解码后的 JSON(常见为 status=ok + message 内含 id/status)
|
||||
*/
|
||||
public function getSubmission($submissionId)
|
||||
{
|
||||
return $this->request('GET', '/submissions/' . rawurlencode($submissionId));
|
||||
}
|
||||
|
||||
/**
|
||||
* 单次解析 GET /submissions/{id},判断是否可调用 PUT /similarity(不 sleep,供队列链逐步轮询)。
|
||||
*
|
||||
* @return array{ready:bool, failed:bool, status:string, snippet:string, message:array}
|
||||
*/
|
||||
public function parseSubmissionIngestState($submissionId)
|
||||
{
|
||||
$raw = $this->getSubmission($submissionId);
|
||||
$msg = self::unwrapSubmissionPayload($raw);
|
||||
$st = strtoupper(trim((string) self::pickSubmissionStatus($msg)));
|
||||
$snippet = mb_substr(json_encode($msg, JSON_UNESCAPED_UNICODE), 0, 400);
|
||||
|
||||
$ready = [
|
||||
'COMPLETE', 'COMPLETED', 'PROCESSED', 'READY', 'SUCCEEDED',
|
||||
'COMPLETE_PROCESSING',
|
||||
];
|
||||
$failed = ['ERROR', 'FAILED', 'CANCELLED', 'CANCELED', 'DELETED'];
|
||||
|
||||
$readyFlag = $st !== '' && in_array($st, $ready, true);
|
||||
$failedFlag = $st !== '' && in_array($st, $failed, true);
|
||||
|
||||
return [
|
||||
'ready' => $readyFlag,
|
||||
'failed' => $failedFlag,
|
||||
'status' => $st,
|
||||
'snippet' => $snippet,
|
||||
'message' => $msg,
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* 上传完成后需等待 Turnitin 异步完成文本解析(同步阻塞版,仅 CLI/调试;线上请用队列链 PlagiarismWaitIngest)。
|
||||
*
|
||||
* @param string $submissionId
|
||||
* @param int $maxWaitSec 最长等待秒数,默认 600(10 分钟)
|
||||
* @param int $intervalSec 轮询间隔秒数,默认 3
|
||||
* @throws Exception 超时或终态为失败
|
||||
*/
|
||||
public function waitAfterUploadForSimilarity($submissionId, $maxWaitSec = 600, $intervalSec = 3)
|
||||
{
|
||||
$deadline = time() + max(30, (int)$maxWaitSec);
|
||||
$intervalSec = max(1, (int)$intervalSec);
|
||||
$lastStatus = '';
|
||||
$lastSnippet = '';
|
||||
|
||||
while (time() < $deadline) {
|
||||
$parsed = $this->parseSubmissionIngestState($submissionId);
|
||||
$lastStatus = $parsed['status'];
|
||||
$lastSnippet = $parsed['snippet'];
|
||||
|
||||
if (!empty($parsed['ready'])) {
|
||||
return;
|
||||
}
|
||||
if (!empty($parsed['failed'])) {
|
||||
throw new Exception('Turnitin submission failed, status=' . $lastStatus . ' body=' . $lastSnippet);
|
||||
}
|
||||
|
||||
sleep($intervalSec);
|
||||
}
|
||||
|
||||
throw new Exception(
|
||||
'Timeout waiting for Turnitin submission ingest (last status=' . ($lastStatus ?: '(empty)') . ') snippet=' . $lastSnippet
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param mixed $decoded
|
||||
* @return array
|
||||
*/
|
||||
private static function unwrapSubmissionPayload($decoded)
|
||||
{
|
||||
if (!is_array($decoded)) {
|
||||
return [];
|
||||
}
|
||||
if (isset($decoded['message']) && is_array($decoded['message'])) {
|
||||
return $decoded['message'];
|
||||
}
|
||||
return $decoded;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $msg
|
||||
* @return string
|
||||
*/
|
||||
private static function pickSubmissionStatus(array $msg)
|
||||
{
|
||||
$candidates = [$msg];
|
||||
if (isset($msg['submission']) && is_array($msg['submission'])) {
|
||||
$candidates[] = $msg['submission'];
|
||||
}
|
||||
foreach ($candidates as $m) {
|
||||
foreach (['status', 'workflow_status', 'submission_status', 'processing_status', 'paper_status'] as $k) {
|
||||
if (!empty($m[$k])) {
|
||||
return (string)$m[$k];
|
||||
}
|
||||
}
|
||||
}
|
||||
return '';
|
||||
}
|
||||
|
||||
/**
|
||||
* 查询 similarity 状态
|
||||
* GET /submissions/{id}/similarity
|
||||
@@ -156,7 +281,7 @@ class TurnitinService
|
||||
{
|
||||
return $this->request(
|
||||
'GET',
|
||||
'/submissions/' . urlencode($submissionId) . '/similarity'
|
||||
'/submissions/' . rawurlencode($submissionId) . '/similarity'
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user