Compare commits
16 Commits
a68742d2c2
...
checkrefer
| Author | SHA1 | Date | |
|---|---|---|---|
| c1107780a7 | |||
| 68cf1867d8 | |||
| 44f3383887 | |||
| f118a799c2 | |||
| d9c3243053 | |||
| 8cd033a56d | |||
| 3663dd4ea6 | |||
| 6f76c483ec | |||
| 867621232b | |||
| 74383d24ea | |||
| 7e5a087a4e | |||
| 4aab7f5b7e | |||
|
|
fa878334cd | ||
|
|
c36eba77b1 | ||
|
|
336fa08a18 | ||
|
|
b1e978ed73 |
3
.env
3
.env
@@ -33,6 +33,9 @@ UNSUBSCRIBE_BASE_URL=https://submission.tmrjournals.com/api/Unsubscribe/index
|
||||
[yboard]
|
||||
APPLY_URL="https://submission.tmrjournals.com/youthBoardRegister"
|
||||
|
||||
[plagiarism]
|
||||
static_root="/home/wwwroot/api.tmrjournals.com/public"
|
||||
|
||||
[journal]
|
||||
;官网服务器地址
|
||||
base_url = http://journalapi.tmrjournals.com/public/index.php
|
||||
|
||||
@@ -271,6 +271,14 @@ class Base extends Controller
|
||||
}
|
||||
$this->production_article_refer_obj->where('p_article_id', $refer_info['p_article_id'])->where('index', ">", $refer_info['index'])->where('state', 0)->setDec('index');
|
||||
$this->production_article_refer_obj->where('p_refer_id', $p_refer_id)->update(['state' => 1]);
|
||||
|
||||
// 文献集合已变更,原校对结果的 reference_no 已全部错位,整篇标记为未校对
|
||||
try {
|
||||
(new \app\common\ReferenceCheckService())
|
||||
->clearArticleChecksByPArticleId(intval($refer_info['p_article_id']));
|
||||
} catch (\Exception $e) {
|
||||
\think\Log::error('delOneRefer clearArticleChecksByPArticleId p_refer_id=' . $p_refer_id . ' ' . $e->getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1761,20 +1761,9 @@ class EmailClient extends Base
|
||||
$service = new PromotionService();
|
||||
$taskId = intval($data['id']);
|
||||
|
||||
// 调用前快照:用于解释"为什么没入队"
|
||||
$task = \think\Db::name('promotion_task')->where('task_id', $taskId)->find();
|
||||
$pending = \think\Db::name('promotion_email_log')
|
||||
->where('task_id', $taskId)
|
||||
->where('state', 0)
|
||||
->where('prepared_at', 0)
|
||||
->count();
|
||||
|
||||
$result = $service->dispatchPrepareEmails($taskId);
|
||||
|
||||
return jsonSuccess([
|
||||
'task_id' => $taskId,
|
||||
'task_state' => $task ? intval($task['state']) : null, // 0 才能 dispatch;5 已准备完
|
||||
'pending_before' => intval($pending), // 调用前还能入队的 log 数
|
||||
'dispatch_result' => $result, // ['dispatched' => N, ...]
|
||||
]);
|
||||
}
|
||||
@@ -1793,6 +1782,20 @@ class EmailClient extends Base
|
||||
return jsonSuccess($result);
|
||||
}
|
||||
|
||||
public function mytestqqq(){
|
||||
$data = $this->request->post();
|
||||
$rule = new Validate([
|
||||
"id"=>"require"
|
||||
]);
|
||||
if(!$rule->check($data)){
|
||||
return jsonError($rule->getError());
|
||||
}
|
||||
$service = new PromotionService();
|
||||
$service->enqueuePrepareEmail(intval($data['id']));
|
||||
return jsonSuccess();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 队列调试:查看 Redis 里队列长度(不依赖 redis-cli)。
|
||||
*
|
||||
|
||||
226
application/api/controller/Plagiarism.php
Normal file
226
application/api/controller/Plagiarism.php
Normal file
@@ -0,0 +1,226 @@
|
||||
<?php
|
||||
|
||||
namespace app\api\controller;
|
||||
|
||||
use think\Db;
|
||||
use think\Response;
|
||||
use app\common\PlagiarismService;
|
||||
|
||||
/**
|
||||
* 论文查重(Turnitin / Crossref Similarity Check)控制器。
|
||||
*
|
||||
* 触发方式:纯手工(编辑后台点"查重"按钮)。
|
||||
* 报告策略:在线 viewer URL 临时签名 + PDF 永久落盘 runtime/plagiarism/。
|
||||
*
|
||||
* 主要接口:
|
||||
* POST submit 触发查重
|
||||
* GET getStatus 轮询单条查重状态(前端 ajax)
|
||||
* GET getList 列出某 article 的全部查重记录
|
||||
* GET getReportUrl 获取/刷新在线查看 URL
|
||||
* GET downloadReport 下载本地 PDF
|
||||
* POST retry 重新触发(创建新行)
|
||||
* GET features 探活(开发调试用)
|
||||
*/
|
||||
class Plagiarism extends Base
|
||||
{
|
||||
public function __construct(\think\Request $request = null)
|
||||
{
|
||||
parent::__construct($request);
|
||||
}
|
||||
|
||||
/**
|
||||
* 触发查重
|
||||
*
|
||||
* 入参:
|
||||
* article_id 必填
|
||||
* file_url 选填;不传则按 article_id 在 t_article_file 找 manuscirpt
|
||||
* editor_id 选填;触发人 user_id(前端拿不到也可以传 0)
|
||||
*/
|
||||
public function submit()
|
||||
{
|
||||
$articleId = intval($this->request->param('article_id', 0));
|
||||
$fileUrl = trim($this->request->param('file_url', ''));
|
||||
$editorId = intval($this->request->param('editor_id', 0));
|
||||
|
||||
if ($articleId <= 0) {
|
||||
return jsonError('article_id required');
|
||||
}
|
||||
|
||||
try {
|
||||
$svc = new PlagiarismService();
|
||||
$localPath = $fileUrl !== ''
|
||||
? $svc->resolveFileUrlToLocal($fileUrl)
|
||||
: $svc->locateArticleManuscript($articleId);
|
||||
echo $localPath;
|
||||
$checkId = $svc->submit($articleId, $localPath, $editorId, 'manual');
|
||||
return jsonSuccess(['check_id' => $checkId]);
|
||||
} catch (\Throwable $e) {
|
||||
return jsonError($e->getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public function testccone(){
|
||||
$svc = new PlagiarismService();
|
||||
$checkId = 9;
|
||||
$filePath = "/home/wwwroot/api.tmrjournals.com/public/manuscirpt/20260509/6832a56e8ace38fe99df390ab5221deb.docx";
|
||||
$svc->runUploadAndTrigger($checkId,$filePath);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* 重试 = 提交一次新查重(保留历史)
|
||||
*/
|
||||
public function retry()
|
||||
{
|
||||
return $this->submit();
|
||||
}
|
||||
|
||||
/**
|
||||
* 取单条查重状态
|
||||
*/
|
||||
public function getStatus()
|
||||
{
|
||||
$checkId = intval($this->request->param('check_id', 0));
|
||||
if ($checkId <= 0) {
|
||||
return jsonError('check_id required');
|
||||
}
|
||||
$row = Db::name('plagiarism_check')->where('check_id', $checkId)->find();
|
||||
if (!$row) {
|
||||
return jsonError('not found');
|
||||
}
|
||||
return jsonSuccess($this->formatRow($row));
|
||||
}
|
||||
|
||||
/**
|
||||
* 列出某 article 的全部查重记录(按时间倒序)
|
||||
*/
|
||||
public function getList()
|
||||
{
|
||||
$articleId = intval($this->request->param('article_id', 0));
|
||||
if ($articleId <= 0) {
|
||||
return jsonError('article_id required');
|
||||
}
|
||||
$rows = Db::name('plagiarism_check')
|
||||
->where('article_id', $articleId)
|
||||
->order('check_id desc')
|
||||
->select();
|
||||
$out = [];
|
||||
foreach ($rows as $r) {
|
||||
$out[] = $this->formatRow($r);
|
||||
}
|
||||
return jsonSuccess(['list' => $out]);
|
||||
}
|
||||
|
||||
/**
|
||||
* 取在线查看 URL;过期则自动刷新
|
||||
*/
|
||||
public function getReportUrl()
|
||||
{
|
||||
$checkId = intval($this->request->param('check_id', 0));
|
||||
if ($checkId <= 0) {
|
||||
return jsonError('check_id required');
|
||||
}
|
||||
try {
|
||||
$row = Db::name('plagiarism_check')->where('check_id', $checkId)->find();
|
||||
if (!$row) {
|
||||
return jsonError('not found');
|
||||
}
|
||||
if ($row['state'] != 3) {
|
||||
return jsonError('check not completed yet, state=' . $row['state']);
|
||||
}
|
||||
$needRefresh = empty($row['view_only_url'])
|
||||
|| intval($row['view_only_url_expire']) < time() + 60;
|
||||
|
||||
if ($needRefresh) {
|
||||
$svc = new PlagiarismService();
|
||||
$info = $svc->refreshViewerUrlFor($checkId);
|
||||
return jsonSuccess([
|
||||
'view_only_url' => $info['url'],
|
||||
'expire' => $info['expire'],
|
||||
]);
|
||||
}
|
||||
return jsonSuccess([
|
||||
'view_only_url' => $row['view_only_url'],
|
||||
'expire' => intval($row['view_only_url_expire']),
|
||||
]);
|
||||
} catch (\Throwable $e) {
|
||||
return jsonError($e->getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 直接吐 PDF 二进制流给浏览器下载
|
||||
*/
|
||||
public function downloadReport()
|
||||
{
|
||||
$checkId = intval($this->request->param('check_id', 0));
|
||||
if ($checkId <= 0) {
|
||||
return jsonError('check_id required');
|
||||
}
|
||||
$row = Db::name('plagiarism_check')->where('check_id', $checkId)->find();
|
||||
if (!$row || empty($row['pdf_local_path'])) {
|
||||
return jsonError('report not available');
|
||||
}
|
||||
$rootDir = ROOT_PATH ?: dirname(dirname(dirname(__DIR__)));
|
||||
$abs = rtrim($rootDir, '/\\') . DIRECTORY_SEPARATOR . str_replace(['/', '\\'], DIRECTORY_SEPARATOR, $row['pdf_local_path']);
|
||||
if (!is_file($abs)) {
|
||||
return jsonError('pdf file missing on disk: ' . $row['pdf_local_path']);
|
||||
}
|
||||
$filename = sprintf('plagiarism_check_%d_article_%d.pdf', $row['check_id'], $row['article_id']);
|
||||
return Response::create(file_get_contents($abs), 'html', 200, [
|
||||
'Content-Type' => 'application/pdf',
|
||||
'Content-Disposition' => 'attachment; filename="' . $filename . '"',
|
||||
'Content-Length' => (string)filesize($abs),
|
||||
]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Turnitin 探活(开发调试用)
|
||||
*/
|
||||
public function features()
|
||||
{
|
||||
try {
|
||||
$tii = new \app\common\TurnitinService();
|
||||
return jsonSuccess($tii->featuresEnabled());
|
||||
} catch (\Throwable $e) {
|
||||
return jsonError($e->getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
// ---------- 内部 ----------
|
||||
|
||||
private function formatRow($r)
|
||||
{
|
||||
return [
|
||||
'check_id' => intval($r['check_id']),
|
||||
'article_id' => intval($r['article_id']),
|
||||
'journal_id' => intval($r['journal_id']),
|
||||
'state' => intval($r['state']),
|
||||
'state_label' => $this->stateLabel($r['state']),
|
||||
'similarity_score' => floatval($r['similarity_score']),
|
||||
'tii_report_status' => (string)$r['tii_report_status'],
|
||||
'has_pdf' => !empty($r['pdf_local_path']),
|
||||
'has_viewer_url' => !empty($r['view_only_url']) && intval($r['view_only_url_expire']) > time(),
|
||||
'attempts' => intval($r['attempts']),
|
||||
'error_msg' => (string)$r['error_msg'],
|
||||
'source_file_name' => (string)$r['source_file_name'],
|
||||
'trigger_source' => (string)$r['trigger_source'],
|
||||
'triggered_by' => intval($r['triggered_by']),
|
||||
'ctime' => intval($r['ctime']),
|
||||
'utime' => intval($r['utime']),
|
||||
];
|
||||
}
|
||||
|
||||
private function stateLabel($state)
|
||||
{
|
||||
$map = [
|
||||
0 => '待上传',
|
||||
1 => '上传中',
|
||||
2 => '比对中',
|
||||
3 => '完成',
|
||||
4 => '失败',
|
||||
];
|
||||
return isset($map[$state]) ? $map[$state] : 'unknown';
|
||||
}
|
||||
}
|
||||
@@ -6,6 +6,8 @@ use think\Db;
|
||||
use think\Env;
|
||||
use think\Queue;
|
||||
use think\Validate;
|
||||
use app\common\CrossrefService;
|
||||
use app\common\ReferenceCheckService;
|
||||
|
||||
class Preaccept extends Base
|
||||
{
|
||||
@@ -14,6 +16,26 @@ class Preaccept extends Base
|
||||
parent::__construct($request);
|
||||
}
|
||||
|
||||
/**
|
||||
* 新增/修改导致文献集合改变后,清空整篇校对明细,使文章状态回到"未校对"。
|
||||
* 失败仅记日志,不阻塞主流程。
|
||||
*/
|
||||
private function resetArticleChecksOnReferChange($pArticleId, $sourceTag = '')
|
||||
{
|
||||
$pArticleId = intval($pArticleId);
|
||||
if ($pArticleId <= 0) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
(new ReferenceCheckService())->clearArticleChecksByPArticleId($pArticleId);
|
||||
} catch (\Exception $e) {
|
||||
\think\Log::error(
|
||||
'resetArticleChecksOnReferChange[' . $sourceTag . '] p_article_id='
|
||||
. $pArticleId . ' ' . $e->getMessage()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**获取文章参考文献列表
|
||||
* @return \think\response\Json
|
||||
@@ -91,6 +113,7 @@ class Preaccept extends Base
|
||||
return jsonError($rule->getError());
|
||||
}
|
||||
$this->production_article_refer_obj->where('p_article_id',$data['p_article_id'])->update(["state"=>1]);
|
||||
$this->resetArticleChecksOnReferChange(intval($data['p_article_id']), 'discardRefersByParticleid');
|
||||
return jsonSuccess([]);
|
||||
}
|
||||
|
||||
@@ -141,6 +164,7 @@ class Preaccept extends Base
|
||||
}
|
||||
$adId= $this->production_article_refer_obj->insertGetId($insert);
|
||||
$this->production_article_refer_obj->where('p_article_id', $p_info['p_article_id'])->where("p_refer_id", "<>", $adId)->where("index", ">", $pre_refer['index'])->where('state', 0)->setInc('index');
|
||||
$this->resetArticleChecksOnReferChange(intval($p_info['p_article_id']), 'addRefer');
|
||||
return jsonSuccess([]);
|
||||
|
||||
|
||||
@@ -197,6 +221,7 @@ class Preaccept extends Base
|
||||
}
|
||||
$adId= $this->production_article_refer_obj->insertGetId($insert);
|
||||
$this->production_article_refer_obj->where('p_article_id', $p_info['p_article_id'])->where("p_refer_id", "<>", $adId)->where("index", ">", $pre_refer['index'])->where('state', 0)->setInc('index');
|
||||
$this->resetArticleChecksOnReferChange(intval($p_info['p_article_id']), 'addReferByParticleid');
|
||||
return jsonSuccess([]);
|
||||
}
|
||||
|
||||
@@ -232,6 +257,7 @@ class Preaccept extends Base
|
||||
$insert['cs'] = 1;
|
||||
$adId = $this->production_article_refer_obj->insertGetId($insert);
|
||||
$this->production_article_refer_obj->where('p_article_id', $p_info['p_article_id'])->where("p_refer_id", "<>", $adId)->where("index", ">", $pre_refer['index'])->where('state', 0)->setInc('index');
|
||||
$this->resetArticleChecksOnReferChange(intval($p_info['p_article_id']), 'addReferNotdoi');
|
||||
return jsonSuccess([]);
|
||||
}
|
||||
|
||||
@@ -461,6 +487,17 @@ class Preaccept extends Base
|
||||
// }
|
||||
// $this->production_article_refer_obj->where('p_refer_id', $data['p_refer_id'])->update(['refer_doi' => $data['doi']]);
|
||||
// my_doiToFrag2($this->production_article_refer_obj->where('p_refer_id', $data['p_refer_id'])->find());
|
||||
|
||||
//文献内容更新成功后异步重检该文献对应的全部校对明细(失败不阻塞主流程)
|
||||
try {
|
||||
(new ReferenceCheckService())->enqueueRecheckByPReferId(
|
||||
intval($data['p_refer_id']),
|
||||
intval($old_refer_info['p_article_id'])
|
||||
);
|
||||
} catch (\Exception $e) {
|
||||
\think\Log::error('editRefer enqueueRecheckByPReferId p_refer_id=' . $data['p_refer_id'] . ' ' . $e->getMessage());
|
||||
}
|
||||
|
||||
return jsonSuccess([]);
|
||||
}
|
||||
|
||||
@@ -708,36 +745,66 @@ class Preaccept extends Base
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 通过 DOI 获取文献元数据(Crossref REST API)。
|
||||
*
|
||||
* POST 参数:
|
||||
* doi 必填,可为纯 DOI(10.xxxx/...)或 https://doi.org/10.xxxx/...
|
||||
*
|
||||
* 返回 data.formate 与旧版字段兼容: author, title, joura, dateno, doilink
|
||||
* 另附 data.crossref: 原始摘要字段(不含 raw message,避免体积过大)
|
||||
*/
|
||||
public function searchDoi()
|
||||
{
|
||||
$data = $this->request->post();
|
||||
$rule = new Validate([
|
||||
"doi" => "require"
|
||||
'doi' => 'require',
|
||||
]);
|
||||
if (!$rule->check($data)) {
|
||||
return jsonError($rule->getError());
|
||||
}
|
||||
$doi = str_replace('/', '%2F', $data['doi']);
|
||||
// $url = "https://citation.crosscite.org/format?doi=$doi&style=cancer-translational-medicine&lang=en-US";
|
||||
$url = "https://citation.doi.org/format?doi=$doi&style=cancer-translational-medicine&lang=en-US";
|
||||
$res = myGet($url);
|
||||
$frag = trim(substr($res, strpos($res, '.') + 1));
|
||||
if ($frag == "") {
|
||||
return jsonError("not find");
|
||||
|
||||
$doiInput = trim((string)$data['doi']);
|
||||
if ($doiInput === '') {
|
||||
return jsonError('doi empty');
|
||||
}
|
||||
if (mb_substr_count($frag, '.') != 3) {
|
||||
return jsonError("formate fail");
|
||||
// 去掉 URL 前缀,得到裸 DOI
|
||||
$doiNorm = preg_replace('#^https?://(dx\.)?doi\.org/#i', '', $doiInput);
|
||||
$doiNorm = trim($doiNorm, " \t\n\r\0\x0B/");
|
||||
|
||||
$svc = new CrossrefService([
|
||||
'mailto' => trim((string)Env::get('crossref_mailto', '')),
|
||||
]);
|
||||
$summary = $svc->fetchWorkSummary($doiNorm);
|
||||
if ($summary === null || empty($summary['doi'])) {
|
||||
return jsonError('DOI not found or invalid (Crossref)');
|
||||
}
|
||||
$res = explode('.', $frag);
|
||||
$f['author'] = prgeAuthor($res[0]);
|
||||
$f['title'] = trim($res[1]);
|
||||
$bj = bekjournal($res[2]);
|
||||
$joura = formateJournal(trim($bj[0]));
|
||||
$f['joura'] = $joura;
|
||||
$f['dateno'] = str_replace(' ', '', str_replace('-', '–', trim($bj[1])));
|
||||
$f['doilink'] = strpos($data['doi'], "http") === false ? "http://doi.org/" . $data['doi'] : $data['doi'];
|
||||
$re['formate'] = $f;
|
||||
return jsonSuccess($re);
|
||||
|
||||
$title = trim((string)($summary['title'] ?? ''));
|
||||
$jouraRaw = trim((string)($summary['joura'] ?? ''));
|
||||
$authorStr = trim((string)($summary['author_str'] ?? ''));
|
||||
$dateno = trim((string)($summary['dateno'] ?? ''));
|
||||
$doilink = trim((string)($summary['doilink'] ?? ''));
|
||||
if ($doilink === '') {
|
||||
$doilink = 'https://doi.org/' . $summary['doi'];
|
||||
}
|
||||
|
||||
$f = [
|
||||
'author' => $authorStr !== '' ? prgeAuthor($authorStr) : '',
|
||||
'title' => $title,
|
||||
'joura' => $jouraRaw !== '' ? formateJournal($jouraRaw) : '',
|
||||
'dateno' => str_replace(' ', '', str_replace('-', '–', $dateno)),
|
||||
'doilink' => $doilink,
|
||||
];
|
||||
|
||||
$crossrefOut = $summary;
|
||||
unset($crossrefOut['raw']);
|
||||
|
||||
return jsonSuccess([
|
||||
'formate' => $f,
|
||||
'crossref' => $crossrefOut,
|
||||
'doi' => $summary['doi'],
|
||||
]);
|
||||
}
|
||||
|
||||
|
||||
@@ -1422,6 +1489,7 @@ class Preaccept extends Base
|
||||
return jsonError($rule->getError());
|
||||
}
|
||||
$refer_info = $this->production_article_refer_obj->where('p_refer_id', $data['p_refer_id'])->find();
|
||||
$sibling_p_refer_id = 0;
|
||||
if ($data['act'] == "up") {
|
||||
$up_info = $this->production_article_refer_obj->where('p_article_id', $refer_info['p_article_id'])->where('index', $refer_info['index'] - 1)->where('state', 0)->find();
|
||||
if (!$up_info) {
|
||||
@@ -1429,6 +1497,7 @@ class Preaccept extends Base
|
||||
}
|
||||
$this->production_article_refer_obj->where('p_refer_id', $up_info['p_refer_id'])->setInc("index");
|
||||
$this->production_article_refer_obj->where('p_refer_id', $refer_info['p_refer_id'])->setDec("index");
|
||||
$sibling_p_refer_id = intval($up_info['p_refer_id']);
|
||||
} else {
|
||||
$down_info = $this->production_article_refer_obj->where('p_article_id', $refer_info['p_article_id'])->where('index', $refer_info['index'] + 1)->where('state', 0)->find();
|
||||
if (!$down_info) {
|
||||
@@ -1436,7 +1505,19 @@ class Preaccept extends Base
|
||||
}
|
||||
$this->production_article_refer_obj->where('p_refer_id', $refer_info['p_refer_id'])->setInc("index");
|
||||
$this->production_article_refer_obj->where('p_refer_id', $down_info['p_refer_id'])->setDec("index");
|
||||
$sibling_p_refer_id = intval($down_info['p_refer_id']);
|
||||
}
|
||||
|
||||
// 仅同步本次交换的两条 p_refer_id 对应的校对明细 reference_no / refer_index
|
||||
try {
|
||||
(new ReferenceCheckService())->syncReferenceNoByPReferIds(
|
||||
[intval($refer_info['p_refer_id']), $sibling_p_refer_id],
|
||||
intval($refer_info['p_article_id'])
|
||||
);
|
||||
} catch (\Exception $e) {
|
||||
\think\Log::error('sortRefer syncReferenceNoByPReferIds: ' . $e->getMessage());
|
||||
}
|
||||
|
||||
return jsonSuccess([]);
|
||||
}
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@ use think\Db;
|
||||
use think\Queue;
|
||||
use think\Validate;
|
||||
use think\log;
|
||||
use app\common\ArticleSymbolNormalizer;
|
||||
|
||||
/**
|
||||
* @title 公共管理相关
|
||||
@@ -1380,6 +1381,10 @@ class Production extends Base
|
||||
return $html;
|
||||
}
|
||||
|
||||
public function testsym(){
|
||||
ArticleSymbolNormalizer::normalize("");
|
||||
}
|
||||
|
||||
|
||||
public function doTypeSettingNew()
|
||||
{
|
||||
@@ -1399,7 +1404,7 @@ class Production extends Base
|
||||
$editor_info = $this->user_obj->where('user_id', $journal_info['editor_id'])->find();
|
||||
|
||||
$typesetInfo = [];
|
||||
$typesetInfo['info_title'] = $p_info['title'];
|
||||
$typesetInfo['info_title'] = ArticleSymbolNormalizer::normalize($p_info['title']);
|
||||
$typesetInfo['info_type'] = $p_info['type'];
|
||||
$typesetInfo['doi'] = $p_info['doi'];
|
||||
$typesetInfo['topic'] = '';
|
||||
|
||||
@@ -1307,4 +1307,231 @@ class References extends Base
|
||||
}
|
||||
return json_encode(['status' => 8,'msg' => 'fail']);
|
||||
}
|
||||
/**
|
||||
* 参考文献第一次校对
|
||||
* @return \think\response\Json
|
||||
*/
|
||||
public function allReferenceCheckAI(){
|
||||
//获取参数
|
||||
$aParam = empty($aParam) ? $this->request->post() : $aParam;
|
||||
|
||||
//必填值验证
|
||||
$iPArticleId = empty($aParam['p_article_id']) ? '' : $aParam['p_article_id'];
|
||||
if(empty($iPArticleId)){
|
||||
return json_encode(array('status' => 2,'msg' => 'Please select an article' ));
|
||||
}
|
||||
//查询文章(p_article_id 与 article_id 都要带,下游服务方法两者都用)
|
||||
$aWhere = ['p_article_id' => $iPArticleId,'state' => ['in',[0,2]]];
|
||||
$aProductionArticle = Db::name('production_article')->field('p_article_id,article_id')->where($aWhere)->find();
|
||||
if(empty($aProductionArticle)){
|
||||
return json_encode(array('status' => 3,'msg' => 'No articles found' ));
|
||||
}
|
||||
if($this->checkReferStatus($iPArticleId)==0){
|
||||
return jsonError('请修正完文献内容再进行校对。');
|
||||
}
|
||||
//已存在校对记录则禁止重复执行第一次校对,提示走重置接口
|
||||
$iExisting = Db::name('article_reference_check_result')
|
||||
->where('p_article_id', $iPArticleId)
|
||||
->count();
|
||||
if(intval($iExisting) > 0){
|
||||
return jsonError('该文章已存在校对记录,请使用"重置校对"接口重新校对。');
|
||||
}
|
||||
try {
|
||||
$svc = new ReferenceCheckService();
|
||||
$result = $svc->enqueueByPArticle($aProductionArticle);
|
||||
return jsonSuccess($result);
|
||||
} catch (\Exception $e) {
|
||||
return jsonError($e->getMessage());
|
||||
}
|
||||
}
|
||||
/**
|
||||
* 文献校对重置:删除该文章已有的全部校对明细,并重新入队整篇校对
|
||||
* POST/GET: article_id(必填)
|
||||
* @url /api/Article/referenceCheckReset
|
||||
*/
|
||||
public function referenceCheckResetAI()
|
||||
{
|
||||
//获取参数
|
||||
$aParam = empty($aParam) ? $this->request->post() : $aParam;
|
||||
|
||||
//必填值验证
|
||||
$iPArticleId = empty($aParam['p_article_id']) ? '' : $aParam['p_article_id'];
|
||||
if(empty($iPArticleId)){
|
||||
return json_encode(array('status' => 2,'msg' => 'Please select an article' ));
|
||||
}
|
||||
//查询文章(p_article_id 与 article_id 都要带,下游服务方法两者都用)
|
||||
$aWhere = ['p_article_id' => $iPArticleId,'state' => ['in',[0,2]]];
|
||||
$aProductionArticle = Db::name('production_article')->field('p_article_id,article_id')->where($aWhere)->find();
|
||||
if(empty($aProductionArticle)){
|
||||
return json_encode(array('status' => 3,'msg' => 'No articles found' ));
|
||||
}
|
||||
if($this->checkReferStatus($iPArticleId)==0){
|
||||
return jsonError('请修正完文献内容再进行校对。');
|
||||
}
|
||||
$iArticleId = empty($aProductionArticle['article_id']) ? 0 : $aProductionArticle['article_id'];
|
||||
if(empty($iArticleId)){
|
||||
return json_encode(array('status' => 4,'msg' => 'Unbound article' ));
|
||||
}
|
||||
try {
|
||||
$result = (new ReferenceCheckService())->resetAndRecheckByArticle($aProductionArticle);
|
||||
return jsonSuccess($result);
|
||||
} catch (\Exception $e) {
|
||||
return jsonError($e->getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 清空某篇文章下的全部参考文献校对记录(不重新入队)
|
||||
*
|
||||
* 与 referenceCheckResetAI 的区别:reset 是「清空 + 重新校对」,
|
||||
* 这里只做「清空」一步,校对状态回到未校对,等待用户手动再触发。
|
||||
*
|
||||
* POST/GET: p_article_id(必填)
|
||||
*/
|
||||
public function referenceCheckClearAI()
|
||||
{
|
||||
$aParam = $this->request->post();
|
||||
if (empty($aParam)) {
|
||||
$aParam = $this->request->param();
|
||||
}
|
||||
|
||||
$iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']);
|
||||
if ($iPArticleId <= 0) {
|
||||
return json_encode(array('status' => 2, 'msg' => 'Please select an article'));
|
||||
}
|
||||
|
||||
// 校验文章存在(与其它校对接口口径一致:state in [0,2])
|
||||
$aProductionArticle = Db::name('production_article')
|
||||
->field('p_article_id,article_id')
|
||||
->where(['p_article_id' => $iPArticleId, 'state' => ['in', [0, 2]]])
|
||||
->find();
|
||||
if (empty($aProductionArticle)) {
|
||||
return json_encode(array('status' => 3, 'msg' => 'No articles found'));
|
||||
}
|
||||
|
||||
try {
|
||||
$deleted = (new ReferenceCheckService())->clearArticleChecksByPArticleId($iPArticleId);
|
||||
return jsonSuccess([
|
||||
'p_article_id' => $iPArticleId,
|
||||
'deleted' => intval($deleted),
|
||||
]);
|
||||
} catch (\Exception $e) {
|
||||
return jsonError($e->getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 按 p_article_id 查整篇引用校对进度(按 reference_no 分组聚合)
|
||||
*
|
||||
* POST/GET: p_article_id(必填)
|
||||
*
|
||||
* 返回 list 中每项含:reference_no、p_refer_id、status(数值)、
|
||||
* total、pending、done、failed、pass、is_pass、last_updated_at、records
|
||||
*
|
||||
* status 数值含义:
|
||||
* 0 = 待校验 1 = 校对中 2 = 校对完成 3 = 校对失败
|
||||
*/
|
||||
public function referenceCheckProgressAI()
|
||||
{
|
||||
$aParam = $this->request->post();
|
||||
if (empty($aParam)) {
|
||||
$aParam = $this->request->param();
|
||||
}
|
||||
|
||||
$iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']);
|
||||
if ($iPArticleId <= 0) {
|
||||
return json_encode(array('status' => 2, 'msg' => 'Please select an article'));
|
||||
}
|
||||
try {
|
||||
$result = (new ReferenceCheckService())->getProgressByPArticleId($iPArticleId);
|
||||
return jsonSuccess($result);
|
||||
} catch (\Exception $e) {
|
||||
return jsonError($e->getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 按 p_article_id 查整篇文章引用校对总状态(用于前端按钮分流)
|
||||
*
|
||||
* POST/GET: p_article_id(必填)
|
||||
*
|
||||
* 计数维度是「参考文献」(按 reference_no 分组),不是单条校对明细行。
|
||||
* 例:50 条参考文献、底层 111 条校对明细时,total = 50。
|
||||
*
|
||||
* 返回 status 数值含义(整篇):
|
||||
* 0 = 未校对(一条记录都没有)
|
||||
* 1 = 校对中(至少 1 条参考文献仍有未跑完的明细)
|
||||
* 2 = 校对完成(所有参考文献全部明细已结束)
|
||||
*
|
||||
* 返回字段:p_article_id、status、total、pending、done、failed、progress_percent
|
||||
* total —— 参考文献条数
|
||||
* pending —— 该条参考文献仍有未跑完明细的数量(含"部分跑完")
|
||||
* done —— 该条参考文献所有明细都 status=1 的数量
|
||||
* failed —— 该条参考文献全部跑完且至少 1 条 status=2 的数量
|
||||
* pending + done + failed = total;progress_percent = (done+failed)/total
|
||||
*
|
||||
* 分组明细请走 referenceCheckProgressAI。
|
||||
*/
|
||||
public function referenceCheckArticleStatusAI()
|
||||
{
|
||||
$aParam = $this->request->post();
|
||||
if (empty($aParam)) {
|
||||
$aParam = $this->request->param();
|
||||
}
|
||||
|
||||
$iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']);
|
||||
if ($iPArticleId <= 0) {
|
||||
return json_encode(array('status' => 2, 'msg' => 'Please select an article'));
|
||||
}
|
||||
|
||||
try {
|
||||
$result = (new ReferenceCheckService())->getArticleProgressStatusByPArticleId($iPArticleId);
|
||||
return jsonSuccess($result);
|
||||
} catch (\Exception $e) {
|
||||
return jsonError($e->getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 按 p_refer_id 查单条参考文献的校对明细
|
||||
*
|
||||
* POST/GET: p_refer_id(必填)
|
||||
*
|
||||
* 返回 list 中每项含:am_id、confidence、reason、is_match、is_pass
|
||||
* 同时附带上下文:p_refer_id、p_article_id、reference_no、total
|
||||
*/
|
||||
public function referenceCheckDetailsAI()
|
||||
{
|
||||
$aParam = $this->request->post();
|
||||
if (empty($aParam)) {
|
||||
$aParam = $this->request->param();
|
||||
}
|
||||
|
||||
$iPReferId = empty($aParam['p_refer_id']) ? 0 : intval($aParam['p_refer_id']);
|
||||
if ($iPReferId <= 0) {
|
||||
return json_encode(array('status' => 2, 'msg' => 'Please select a reference'));
|
||||
}
|
||||
|
||||
try {
|
||||
$result = (new ReferenceCheckService())->getCheckDetailsByPReferId($iPReferId);
|
||||
return jsonSuccess($result);
|
||||
} catch (\Exception $e) {
|
||||
return jsonError($e->getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
public function checkReferStatus($p_article_id){
|
||||
$list = $this->production_article_refer_obj->where('p_article_id', $p_article_id)->where('state', 0)->select();
|
||||
if (!$list) {
|
||||
return jsonError('references error');
|
||||
}
|
||||
$frag = 1;
|
||||
foreach ($list as $v) {
|
||||
if ($v['cs'] == 0) {
|
||||
$frag = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return $frag;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,7 +4,6 @@ namespace app\api\job;
|
||||
|
||||
use think\queue\Job;
|
||||
use app\common\ExpertFinderService;
|
||||
use app\common\QueueJob;
|
||||
|
||||
/**
|
||||
* 专家抓取队列任务。
|
||||
@@ -16,25 +15,16 @@ use app\common\QueueJob;
|
||||
*/
|
||||
class FetchExperts
|
||||
{
|
||||
private $oQueueJob;
|
||||
|
||||
public function __construct()
|
||||
{
|
||||
$this->oQueueJob = new QueueJob();
|
||||
}
|
||||
|
||||
public function fire(Job $job, $data)
|
||||
{
|
||||
$this->oQueueJob->init($job);
|
||||
|
||||
$field = isset($data['field']) ? (string)$data['field'] : '';
|
||||
if ($field === '') {
|
||||
$this->oQueueJob->log("FetchExperts 无效的 field,删除任务");
|
||||
$job->delete();
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
$service = new ExpertFinderService();
|
||||
$service->doFetchForField(
|
||||
$field,
|
||||
@@ -42,14 +32,6 @@ class FetchExperts
|
||||
isset($data['per_page']) ? intval($data['per_page']) : 100,
|
||||
isset($data['min_year']) ? $data['min_year'] : null
|
||||
);
|
||||
$this->oQueueJob->log("FetchExperts 完成 | field={$field}");
|
||||
$job->delete();
|
||||
} catch (\Exception $e) {
|
||||
$this->oQueueJob->handleException($e, $job, "field={$field}");
|
||||
} catch (\Throwable $e) {
|
||||
$this->oQueueJob->handleException($e, $job, "field={$field}");
|
||||
} finally {
|
||||
$this->oQueueJob->finnal();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,7 +4,6 @@ namespace app\api\job;
|
||||
|
||||
use think\queue\Job;
|
||||
use app\common\ExpertFinderService;
|
||||
use app\common\QueueJob;
|
||||
|
||||
/**
|
||||
* 队列任务:用本地大模型从 affiliation 推断国家,写入 expert.country_id / country。
|
||||
@@ -17,16 +16,9 @@ use app\common\QueueJob;
|
||||
*/
|
||||
class FillExpertCountry
|
||||
{
|
||||
private $oQueueJob;
|
||||
|
||||
public function __construct()
|
||||
{
|
||||
$this->oQueueJob = new QueueJob();
|
||||
}
|
||||
|
||||
public function fire(Job $job, $data)
|
||||
{
|
||||
$this->oQueueJob->init($job);
|
||||
|
||||
$expertId = intval(isset($data['expert_id']) ? $data['expert_id'] : 0);
|
||||
$affiliation = isset($data['affiliation']) ? trim((string)$data['affiliation']) : '';
|
||||
@@ -35,7 +27,6 @@ class FillExpertCountry
|
||||
|
||||
$service = new ExpertFinderService();
|
||||
|
||||
try {
|
||||
if ($expertId && $affiliation !== '') {
|
||||
$service->fillExpertCountry($expertId, $affiliation, $chatUrl);
|
||||
}
|
||||
@@ -43,12 +34,5 @@ class FillExpertCountry
|
||||
|
||||
// 链式:处理完当前专家立刻拉下一个进来
|
||||
$service->enqueueNextCountryFill(1, $queue, $chatUrl);
|
||||
} catch (\Exception $e) {
|
||||
$this->oQueueJob->handleException($e, $job, "expert_id={$expertId} queue={$queue}");
|
||||
} catch (\Throwable $e) {
|
||||
$this->oQueueJob->handleException($e, $job, "expert_id={$expertId} queue={$queue}");
|
||||
} finally {
|
||||
$this->oQueueJob->finnal();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
37
application/api/job/PlagiarismPoll.php
Normal file
37
application/api/job/PlagiarismPoll.php
Normal file
@@ -0,0 +1,37 @@
|
||||
<?php
|
||||
|
||||
namespace app\api\job;
|
||||
|
||||
use think\queue\Job;
|
||||
use app\common\PlagiarismService;
|
||||
|
||||
/**
|
||||
* 队列任务:轮询 Turnitin similarity 状态。
|
||||
*
|
||||
* 未完成会再次入队(链式延迟),完成后下载 PDF 报告并写本地永久保留。
|
||||
*
|
||||
* data:
|
||||
* - check_id t_plagiarism_check.check_id
|
||||
* - attempt 当前轮询次数(首次为 1)
|
||||
*
|
||||
* 注意:单条 job 通常很短(1 个 HTTP 请求),但会反复入队,常驻 worker 长时间运行
|
||||
* 由 QueueJob 在进程超 6h 或致命 DB 错误时主动 exit(1) 让 supervisor 拉起新进程。
|
||||
*/
|
||||
class PlagiarismPoll
|
||||
{
|
||||
|
||||
public function fire(Job $job, $data)
|
||||
{
|
||||
|
||||
$checkId = isset($data['check_id']) ? intval($data['check_id']) : 0;
|
||||
$attempt = isset($data['attempt']) ? intval($data['attempt']) : 1;
|
||||
|
||||
if ($checkId <= 0) {
|
||||
$job->delete();
|
||||
return;
|
||||
}
|
||||
$svc = new PlagiarismService();
|
||||
$svc->runPollStatus($checkId, $attempt);
|
||||
$job->delete();
|
||||
}
|
||||
}
|
||||
36
application/api/job/PlagiarismRun.php
Normal file
36
application/api/job/PlagiarismRun.php
Normal file
@@ -0,0 +1,36 @@
|
||||
<?php
|
||||
|
||||
namespace app\api\job;
|
||||
|
||||
use think\queue\Job;
|
||||
use app\common\PlagiarismService;
|
||||
|
||||
/**
|
||||
* 队列任务:上传论文到 Turnitin + 触发 similarity 检测。
|
||||
*
|
||||
* 完成后会自动入队 PlagiarismPoll 进行后续轮询。
|
||||
*
|
||||
* data:
|
||||
* - check_id t_plagiarism_check.check_id
|
||||
* - file_path 本地可读的 PDF/DOCX 绝对路径
|
||||
*
|
||||
* 注意:上传单个 PDF 可能耗时数十秒,常驻 worker 由 QueueJob 在进程超 6h 或致命 DB
|
||||
* 错误时主动 exit(1) 让 supervisor 拉起新进程。
|
||||
*/
|
||||
class PlagiarismRun
|
||||
{
|
||||
|
||||
public function fire(Job $job, $data)
|
||||
{
|
||||
$checkId = isset($data['check_id']) ? intval($data['check_id']) : 0;
|
||||
$filePath = isset($data['file_path']) ? (string)$data['file_path'] : '';
|
||||
if ($checkId <= 0 || $filePath === '') {
|
||||
$job->delete();
|
||||
return;
|
||||
}
|
||||
$svc = new PlagiarismService();
|
||||
$svc->log("PlagiarismRun job act!!");
|
||||
$svc->runUploadAndTrigger($checkId, $filePath);
|
||||
$job->delete();
|
||||
}
|
||||
}
|
||||
@@ -4,7 +4,6 @@ namespace app\api\job;
|
||||
|
||||
use think\queue\Job;
|
||||
use app\common\PromotionService;
|
||||
use app\common\QueueJob;
|
||||
|
||||
/**
|
||||
* 【已废弃 / 兼容保留】
|
||||
@@ -16,35 +15,17 @@ use app\common\QueueJob;
|
||||
*/
|
||||
class PromotionPrepare
|
||||
{
|
||||
private $oQueueJob;
|
||||
|
||||
public function __construct()
|
||||
{
|
||||
$this->oQueueJob = new QueueJob();
|
||||
}
|
||||
|
||||
public function fire(Job $job, $data)
|
||||
{
|
||||
$this->oQueueJob->init($job);
|
||||
|
||||
$taskId = isset($data['task_id']) ? intval($data['task_id']) : 0;
|
||||
if ($taskId <= 0) {
|
||||
$this->oQueueJob->log("PromotionPrepare[deprecated] 无效的 task_id,删除任务");
|
||||
$job->delete();
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
$service = new PromotionService();
|
||||
$service->enqueuePrepareTask($taskId);
|
||||
$this->oQueueJob->log("PromotionPrepare[deprecated] forwarded task_id={$taskId} -> PromotionPrepareTask");
|
||||
$job->delete();
|
||||
} catch (\Exception $e) {
|
||||
$this->oQueueJob->handleException($e, $job, "[deprecated] task_id={$taskId}");
|
||||
} catch (\Throwable $e) {
|
||||
$this->oQueueJob->handleException($e, $job, "[deprecated] task_id={$taskId}");
|
||||
} finally {
|
||||
$this->oQueueJob->finnal();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,7 +4,6 @@ namespace app\api\job;
|
||||
|
||||
use think\queue\Job;
|
||||
use app\common\PromotionService;
|
||||
use app\common\QueueJob;
|
||||
|
||||
/**
|
||||
* 队列任务:单封邮件 prepare(调用 LLM 生成个性化描述 + 渲染模板 + 写入 log)。
|
||||
@@ -18,41 +17,13 @@ use app\common\QueueJob;
|
||||
*/
|
||||
class PromotionPrepareEmail
|
||||
{
|
||||
// private $oQueueJob;
|
||||
|
||||
// public function __construct()
|
||||
// {
|
||||
// $this->oQueueJob = new QueueJob();
|
||||
// }
|
||||
|
||||
public function fire(Job $job, $data)
|
||||
{
|
||||
// $this->oQueueJob->init($job);
|
||||
//
|
||||
$logId = isset($data['log_id']) ? intval($data['log_id']) : 0;
|
||||
// if ($logId <= 0) {
|
||||
// $this->oQueueJob->log("PromotionPrepareEmail 无效的 log_id,删除任务");
|
||||
// $job->delete();
|
||||
// return;
|
||||
// }
|
||||
//
|
||||
// try {
|
||||
$service = new PromotionService();
|
||||
$service->log("id:".$logId);
|
||||
// $result = $service->prepareSingleEmail($logId);
|
||||
//
|
||||
// $code = isset($result['code']) ? $result['code'] : '';
|
||||
// $msg = isset($result['msg']) ? $result['msg'] : '';
|
||||
// $llm = isset($result['llm_status']) ? $result['llm_status'] : '';
|
||||
// $this->oQueueJob->log("PromotionPrepareEmail 完成 | log_id={$logId} code={$code} llm_status={$llm} msg={$msg}");
|
||||
|
||||
$result = $service->prepareSingleEmail($logId);
|
||||
$job->delete();
|
||||
// } catch (\Exception $e) {
|
||||
// $this->oQueueJob->handleException($e, $job, "log_id={$logId}");
|
||||
// } catch (\Throwable $e) {
|
||||
// $this->oQueueJob->handleException($e, $job, "log_id={$logId}");
|
||||
// } finally {
|
||||
// $this->oQueueJob->finnal();
|
||||
// }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,40 +17,17 @@ use app\common\QueueJob;
|
||||
*/
|
||||
class PromotionPrepareTask
|
||||
{
|
||||
private $oQueueJob;
|
||||
|
||||
public function __construct()
|
||||
{
|
||||
$this->oQueueJob = new QueueJob();
|
||||
}
|
||||
|
||||
public function fire(Job $job, $data)
|
||||
{
|
||||
$this->oQueueJob->init($job);
|
||||
|
||||
$taskId = isset($data['task_id']) ? intval($data['task_id']) : 0;
|
||||
if ($taskId <= 0) {
|
||||
$this->oQueueJob->log("PromotionPrepareTask 无效的 task_id,删除任务");
|
||||
$job->delete();
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
$service = new PromotionService();
|
||||
$result = $service->dispatchPrepareEmails($taskId);
|
||||
|
||||
$dispatched = isset($result['dispatched']) ? $result['dispatched'] : 0;
|
||||
$alreadyDone = isset($result['already_done']) ? $result['already_done'] : 0;
|
||||
$err = isset($result['error']) ? $result['error'] : '';
|
||||
$this->oQueueJob->log("PromotionPrepareTask 完成 | task_id={$taskId} dispatched={$dispatched} already_done={$alreadyDone} error={$err}");
|
||||
|
||||
$job->delete();
|
||||
} catch (\Exception $e) {
|
||||
$this->oQueueJob->handleException($e, $job, "task_id={$taskId}");
|
||||
} catch (\Throwable $e) {
|
||||
$this->oQueueJob->handleException($e, $job, "task_id={$taskId}");
|
||||
} finally {
|
||||
$this->oQueueJob->finnal();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,7 +4,6 @@ namespace app\api\job;
|
||||
|
||||
use think\queue\Job;
|
||||
use app\common\PromotionService;
|
||||
use app\common\QueueJob;
|
||||
|
||||
/**
|
||||
* 队列任务:发送 task 下"已 prepare"的邮件,按 min/max_interval 控制节奏。
|
||||
@@ -16,39 +15,18 @@ use app\common\QueueJob;
|
||||
*/
|
||||
class PromotionSend
|
||||
{
|
||||
private $oQueueJob;
|
||||
|
||||
public function __construct()
|
||||
{
|
||||
$this->oQueueJob = new QueueJob();
|
||||
}
|
||||
|
||||
public function fire(Job $job, $data)
|
||||
{
|
||||
$this->oQueueJob->init($job);
|
||||
|
||||
$taskId = isset($data['task_id']) ? intval($data['task_id']) : 0;
|
||||
if ($taskId <= 0) {
|
||||
$this->oQueueJob->log("PromotionSend 无效的 task_id,删除任务");
|
||||
$job->delete();
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
$service = new PromotionService();
|
||||
$result = $service->processNextEmail($taskId);
|
||||
|
||||
$done = !empty($result['done']) ? 1 : 0;
|
||||
$reason = isset($result['reason']) ? $result['reason'] : '';
|
||||
$this->oQueueJob->log("PromotionSend 完成 | task_id={$taskId} done={$done} reason={$reason}");
|
||||
|
||||
$job->delete();
|
||||
} catch (\Exception $e) {
|
||||
$this->oQueueJob->handleException($e, $job, "task_id={$taskId}");
|
||||
} catch (\Throwable $e) {
|
||||
$this->oQueueJob->handleException($e, $job, "task_id={$taskId}");
|
||||
} finally {
|
||||
$this->oQueueJob->finnal();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
114
application/api/job/ReferenceCheck.php
Normal file
114
application/api/job/ReferenceCheck.php
Normal file
@@ -0,0 +1,114 @@
|
||||
<?php
|
||||
namespace app\api\job;
|
||||
|
||||
use think\Db;
|
||||
use think\queue\Job;
|
||||
use app\common\QueueJob;
|
||||
use app\common\QueueRedis;
|
||||
use app\common\ReferenceCheckService;
|
||||
|
||||
class ReferenceCheck
|
||||
{
|
||||
private $oQueueJob;
|
||||
private $QueueRedis;
|
||||
private $completedExprie = 3600;
|
||||
|
||||
public function __construct()
|
||||
{
|
||||
$this->oQueueJob = new QueueJob();
|
||||
$this->QueueRedis = QueueRedis::getInstance();
|
||||
}
|
||||
|
||||
public function fire(Job $job, $data)
|
||||
{
|
||||
$this->oQueueJob->init($job);
|
||||
|
||||
$rawBody = empty($job->getRawBody()) ? '' : $job->getRawBody();
|
||||
$jobData = empty($rawBody) ? [] : json_decode($rawBody, true);
|
||||
$jobId = empty($jobData['id']) ? 'unknown' : $jobData['id'];
|
||||
|
||||
$sRedisKey = '';
|
||||
$sRedisValue = '';
|
||||
|
||||
$this->oQueueJob->log("-----------队列任务开始-----------");
|
||||
$this->oQueueJob->log("当前任务ID: {$jobId}, 尝试次数: {$job->attempts()}");
|
||||
|
||||
try {
|
||||
$checkId = intval(isset($data['check_id']) ? $data['check_id'] : 0);
|
||||
if ($checkId <= 0 && !empty($jobData['data']['check_id'])) {
|
||||
$checkId = intval($jobData['data']['check_id']);
|
||||
}
|
||||
if ($checkId <= 0) {
|
||||
$job->delete();
|
||||
return;
|
||||
}
|
||||
|
||||
$row = Db::name('article_reference_check_result')->where('id', $checkId)->find();
|
||||
if (empty($row)) {
|
||||
$job->delete();
|
||||
return;
|
||||
}
|
||||
|
||||
if (intval($row['status']) === 1) {
|
||||
$job->delete();
|
||||
return;
|
||||
}
|
||||
|
||||
$sClassName = get_class($this);
|
||||
$sRedisKey = "queue_job:{$sClassName}:{$checkId}";
|
||||
$sRedisValue = uniqid() . '_' . getmypid();
|
||||
|
||||
$svc = new ReferenceCheckService();
|
||||
$svc->clearReferenceCheckQueueLock($checkId);
|
||||
|
||||
if (!$this->oQueueJob->acquireLock($sRedisKey, $sRedisValue, $job)) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
$svc->runReferenceCheckOnce($checkId);
|
||||
|
||||
$amId = intval(isset($row['am_id']) ? $row['am_id'] : 0);
|
||||
if ($amId > 0) {
|
||||
$svc->syncAmRefCheckStatus($amId);
|
||||
}
|
||||
$this->QueueRedis->finishJob($sRedisKey, 'completed', $this->completedExprie, $sRedisValue);
|
||||
$job->delete();
|
||||
$this->oQueueJob->log("任务执行成功 | 日志ID: {$sRedisKey}");
|
||||
} catch (\Exception $e) {
|
||||
$this->oQueueJob->log('ReferenceCheck error: ' . $e->getMessage());
|
||||
if ($job->attempts() >= 3) {
|
||||
$this->markFailed($checkId, $e->getMessage());
|
||||
$job->delete();
|
||||
return;
|
||||
}
|
||||
$job->release(30);
|
||||
}
|
||||
} catch (\RuntimeException $e) {
|
||||
$this->oQueueJob->handleRetryableException($e, $sRedisKey, $sRedisValue, $job);
|
||||
} catch (\LogicException $e) {
|
||||
$this->oQueueJob->handleNonRetryableException($e, $sRedisKey, $sRedisValue, $job);
|
||||
} catch (\Exception $e) {
|
||||
$this->oQueueJob->handleRetryableException($e, $sRedisKey, $sRedisValue, $job);
|
||||
} finally {
|
||||
$this->oQueueJob->finnal();
|
||||
}
|
||||
}
|
||||
|
||||
private function markFailed($checkId, $msg)
|
||||
{
|
||||
$row = Db::name('article_reference_check_result')->where('id', $checkId)->find();
|
||||
try {
|
||||
(new ReferenceCheckService())->updateCheckResult($checkId, [
|
||||
'status' => 2,
|
||||
'error_msg' => $msg,
|
||||
]);
|
||||
} catch (\Exception $e) {
|
||||
\think\Log::error('ReferenceCheck markFailed: ' . $e->getMessage());
|
||||
}
|
||||
$amId = empty($row) ? 0 : intval(isset($row['am_id']) ? $row['am_id'] : 0);
|
||||
if ($amId > 0) {
|
||||
(new ReferenceCheckService())->syncAmRefCheckStatus($amId);
|
||||
}
|
||||
}
|
||||
}
|
||||
162
application/api/job/ReferenceCheckTwo.php
Normal file
162
application/api/job/ReferenceCheckTwo.php
Normal file
@@ -0,0 +1,162 @@
|
||||
<?php
|
||||
namespace app\api\job;
|
||||
|
||||
use think\Db;
|
||||
use think\queue\Job;
|
||||
use app\common\QueueJob;
|
||||
use app\common\QueueRedis;
|
||||
use app\common\ReferenceCheckService;
|
||||
use app\common\service\LLMService;
|
||||
|
||||
class ReferenceCheckTwo
|
||||
{
|
||||
private $oQueueJob;
|
||||
private $QueueRedis;
|
||||
private $completedExprie = 3600;
|
||||
|
||||
public function __construct()
|
||||
{
|
||||
$this->oQueueJob = new QueueJob();
|
||||
$this->QueueRedis = QueueRedis::getInstance();
|
||||
}
|
||||
|
||||
public function fire(Job $job, $data)
|
||||
{
|
||||
$this->oQueueJob->init($job);
|
||||
|
||||
$rawBody = empty($job->getRawBody()) ? '' : $job->getRawBody();
|
||||
$jobData = empty($rawBody) ? [] : json_decode($rawBody, true);
|
||||
$jobId = empty($jobData['id']) ? 'unknown' : $jobData['id'];
|
||||
|
||||
$sRedisKey = '';
|
||||
$sRedisValue = '';
|
||||
|
||||
$this->oQueueJob->log("-----------队列任务开始-----------");
|
||||
$this->oQueueJob->log("当前任务ID: {$jobId}, 尝试次数: {$job->attempts()}");
|
||||
|
||||
try {
|
||||
$checkId = intval(isset($data['check_id']) ? $data['check_id'] : 0);
|
||||
if ($checkId <= 0 && !empty($jobData['data']['check_id'])) {
|
||||
$checkId = intval($jobData['data']['check_id']);
|
||||
}
|
||||
$sClassName = get_class($this);
|
||||
$sRedisKey = "queue_job_two:{$sClassName}:{$checkId}";
|
||||
$sRedisValue = uniqid() . '_' . getmypid();
|
||||
|
||||
if (!$this->oQueueJob->acquireLock($sRedisKey, $sRedisValue, $job)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if ($checkId <= 0) {
|
||||
$job->delete();
|
||||
return;
|
||||
}
|
||||
|
||||
$row = Db::name('article_reference_check_result')->where('id', $checkId)->find();
|
||||
if (empty($row)) {
|
||||
$job->delete();
|
||||
return;
|
||||
}
|
||||
|
||||
// if (intval($row['status']) === 1) {
|
||||
// $job->delete();
|
||||
// return;
|
||||
// }
|
||||
|
||||
try {
|
||||
$svc = new ReferenceCheckService();
|
||||
|
||||
$contentA = $svc->resolveMainContentForJob($row);
|
||||
$referText = trim((string)(isset($row['refer_text']) ? $row['refer_text'] : ''));
|
||||
$refer = null;
|
||||
|
||||
if (intval($row['p_refer_id']) > 0) {
|
||||
$refer = Db::name('production_article_refer')
|
||||
->where('p_refer_id', intval($row['p_refer_id']))
|
||||
->where('state', 0)
|
||||
->find();
|
||||
}
|
||||
|
||||
$payload = $svc->prepareRecheckPayload(is_array($refer) ? $refer : [], $referText);
|
||||
$doiBlock = $payload['doi_block'];
|
||||
|
||||
if ($contentA === '' || $referText === '') {
|
||||
$this->markFailed($checkId, 'Missing article_main.content or refer_text');
|
||||
$job->delete();
|
||||
return;
|
||||
}
|
||||
$llm = new LLMService();
|
||||
$llmResult = $llm->checkReference($contentA, $referText, true, $doiBlock);
|
||||
|
||||
$requestFailed = !empty($llmResult['request_failed']);
|
||||
$canSupport = $svc->parseLlmCanSupport($llmResult);
|
||||
$tag = $payload['has_abstract']
|
||||
? ('[Crossref复核' . ($payload['doi_used'] !== '' ? ' ' . $payload['doi_used'] : '') . ']')
|
||||
: '[Crossref复核-无摘要]';
|
||||
$reason = $tag . ' ' . (isset($llmResult['reason']) ? $llmResult['reason'] : '');
|
||||
|
||||
// LLM 通讯失败:写 status=2 并抛异常触发队列重试
|
||||
if ($requestFailed) {
|
||||
$svc->updateCheckResult($checkId, [
|
||||
'confidence' => floatval($llmResult['confidence']),
|
||||
'reason' => $reason,
|
||||
'status' => 2,
|
||||
'error_msg' => isset($llmResult['reason']) ? $llmResult['reason'] : 'LLM request failed',
|
||||
]);
|
||||
throw new \RuntimeException(isset($llmResult['reason']) ? $llmResult['reason'] : 'LLM request failed');
|
||||
}
|
||||
|
||||
$affected = $svc->updateCheckResult($checkId, [
|
||||
'can_support' => $canSupport ? 1 : 0,
|
||||
'is_match' => $canSupport ? 1 : 0,
|
||||
'confidence' => floatval($llmResult['confidence']),
|
||||
'reason' => $reason,
|
||||
'status' => 1,
|
||||
'error_msg' => '',
|
||||
]);
|
||||
$this->oQueueJob->log("Crossref复核写入 id={$checkId} affected={$affected} can_support=" . ($canSupport ? 1 : 0) . " confidence=" . floatval($llmResult['confidence']));
|
||||
|
||||
$amId = intval(isset($row['am_id']) ? $row['am_id'] : 0);
|
||||
if ($amId > 0) {
|
||||
$svc->syncAmRefCheckStatus($amId);
|
||||
}
|
||||
$this->QueueRedis->finishJob($sRedisKey, 'completed', $this->completedExprie, $sRedisValue);
|
||||
$job->delete();
|
||||
$this->oQueueJob->log("任务执行成功 | 日志ID: {$sRedisKey}");
|
||||
} catch (\Exception $e) {
|
||||
$this->oQueueJob->log('ReferenceCheckTwo error: ' . $e->getMessage());
|
||||
if ($job->attempts() >= 3) {
|
||||
$this->markFailed($checkId, $e->getMessage());
|
||||
$job->delete();
|
||||
return;
|
||||
}
|
||||
$job->release(30);
|
||||
}
|
||||
} catch (\RuntimeException $e) {
|
||||
$this->oQueueJob->handleRetryableException($e, $sRedisKey, $sRedisValue, $job);
|
||||
} catch (\LogicException $e) {
|
||||
$this->oQueueJob->handleNonRetryableException($e, $sRedisKey, $sRedisValue, $job);
|
||||
} catch (\Exception $e) {
|
||||
$this->oQueueJob->handleRetryableException($e, $sRedisKey, $sRedisValue, $job);
|
||||
} finally {
|
||||
$this->oQueueJob->finnal();
|
||||
}
|
||||
}
|
||||
|
||||
private function markFailed($checkId, $msg)
|
||||
{
|
||||
$row = Db::name('article_reference_check_result')->where('id', $checkId)->find();
|
||||
try {
|
||||
(new ReferenceCheckService())->updateCheckResult($checkId, [
|
||||
'status' => 2,
|
||||
'error_msg' => $msg,
|
||||
]);
|
||||
} catch (\Exception $e) {
|
||||
\think\Log::error('ReferenceCheckTwo markFailed: ' . $e->getMessage());
|
||||
}
|
||||
$amId = empty($row) ? 0 : intval(isset($row['am_id']) ? $row['am_id'] : 0);
|
||||
if ($amId > 0) {
|
||||
(new ReferenceCheckService())->syncAmRefCheckStatus($amId);
|
||||
}
|
||||
}
|
||||
}
|
||||
31
application/api/job/myQueue.php
Normal file
31
application/api/job/myQueue.php
Normal file
@@ -0,0 +1,31 @@
|
||||
<?php
|
||||
|
||||
namespace app\api\job;
|
||||
|
||||
use think\queue\Job;
|
||||
class myQueue
|
||||
{
|
||||
|
||||
|
||||
public function fire(Job $job, $data)
|
||||
{
|
||||
try {
|
||||
// 你的业务逻辑(哪怕是空的)
|
||||
// 这里写任何代码
|
||||
|
||||
// 执行成功,删除任务
|
||||
$job->delete();
|
||||
|
||||
} catch (\Throwable $e) {
|
||||
// 如果你不想重试,直接删除
|
||||
$job->delete();
|
||||
}
|
||||
}
|
||||
|
||||
// 可选:任务失败时执行
|
||||
public function failed($data)
|
||||
{
|
||||
// 失败后不做处理,直接跳过
|
||||
}
|
||||
|
||||
}
|
||||
194
application/common/ArticleSymbolNormalizer.php
Normal file
194
application/common/ArticleSymbolNormalizer.php
Normal file
@@ -0,0 +1,194 @@
|
||||
<?php
|
||||
|
||||
namespace app\common;
|
||||
|
||||
/**
|
||||
* 期刊文章内容「符号层」校对:只调整标点、空白、全角半角等,不增删语义文字。
|
||||
*
|
||||
* 设计原则:
|
||||
* - 默认规则保守,可通过 $options 逐项关闭;
|
||||
* - 纯文本用 normalize();含 HTML 时用 normalizeHtml()(仅处理标签之间的文本段,避免破坏属性里的 URL)。
|
||||
* - Abstract 常用:存储时被转义为 > < & 等,可用 normalizeAbstract() 先解码再符号校对。
|
||||
* - 英文期刊正文/摘要通常不含中文:设 english_journal=true(或 normalizeEnglishAbstract)可关闭仅针对汉字的规则。
|
||||
*/
|
||||
class ArticleSymbolNormalizer
|
||||
{
|
||||
/** @var string 常用汉字 BMP 段(含扩展 A 前部,足够覆盖正文) */
|
||||
private static $han = '\x{4E00}-\x{9FFF}\x{3400}-\x{4DBF}';
|
||||
|
||||
/**
|
||||
* 纯文本符号校对。
|
||||
*
|
||||
* @param string $text
|
||||
* @param array $options 可选键(均为 bool,默认 true):
|
||||
* - line_endings CRLF / CR → LF
|
||||
* - fullwidth_space U+3000 全角空格 → 普通空格
|
||||
* - collapse_spaces 连续半角空格(不含换行)压成单个空格
|
||||
* - remove_zwsp 删除零宽空格等不可见格式字符(不改变可见字)
|
||||
* - comma_cjk 两个汉字之间的英文逗号「,」→「,」
|
||||
* - comma_latin 两个 ASCII 字母/数字之间的全角逗号「,」→「,」
|
||||
* - period_cjk 汉字后的全角句点「.」(U+FF0E) →「。」
|
||||
* - bracket_latin 仅由 ASCII 标识包裹时「()」→「()」(如 (a) 类简单情形,保守:仅当括号内全为 ASCII)
|
||||
* - decode_html_entities 将 > < & " ' 及数字实体等转为真实字符(默认 false;abstract 见 normalizeAbstract)
|
||||
* - english_journal 英文期刊:关闭「两汉字间英文逗号→,」「汉字后 FF0E→。」等中文专用规则(默认 false;见 normalizeEnglishAbstract)
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public static function normalize($text, array $options = [])
|
||||
{
|
||||
$text = (string)$text;
|
||||
if ($text === '') {
|
||||
return '';
|
||||
}
|
||||
|
||||
$o = array_merge([
|
||||
'line_endings' => true,
|
||||
'fullwidth_space' => true,
|
||||
'collapse_spaces' => true,
|
||||
'remove_zwsp' => true,
|
||||
'comma_cjk' => true,
|
||||
'comma_latin' => true,
|
||||
'period_cjk' => true,
|
||||
'bracket_latin' => false,
|
||||
'decode_html_entities' => false,
|
||||
'english_journal' => false,
|
||||
], $options);
|
||||
|
||||
if (!empty($o['english_journal'])) {
|
||||
if (!array_key_exists('comma_cjk', $options)) {
|
||||
$o['comma_cjk'] = false;
|
||||
}
|
||||
if (!array_key_exists('period_cjk', $options)) {
|
||||
$o['period_cjk'] = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!empty($o['decode_html_entities'])) {
|
||||
$text = html_entity_decode($text, ENT_QUOTES | ENT_HTML5, 'UTF-8');
|
||||
}
|
||||
|
||||
if (!empty($o['line_endings'])) {
|
||||
$text = str_replace(["\r\n", "\r"], "\n", $text);
|
||||
}
|
||||
if (!empty($o['fullwidth_space'])) {
|
||||
$text = str_replace("\u{3000}", ' ', $text);
|
||||
}
|
||||
if (!empty($o['remove_zwsp'])) {
|
||||
// 零宽空格、零宽非断空格、BOM、软连字符等(不改变可见字符)
|
||||
$text = preg_replace('/[\x{200B}-\x{200D}\x{FEFF}\x{00AD}]/u', '', $text);
|
||||
}
|
||||
if (!empty($o['collapse_spaces'])) {
|
||||
$text = preg_replace('/[ \t]{2,}/u', ' ', $text);
|
||||
}
|
||||
|
||||
$han = self::$han;
|
||||
|
||||
if (!empty($o['comma_cjk'])) {
|
||||
// 汉字 , 汉字 → 汉字 , 汉字
|
||||
$text = preg_replace('/(?<=[' . $han . ']),(?=[' . $han . '])/u', ',', $text);
|
||||
}
|
||||
if (!empty($o['comma_latin'])) {
|
||||
// 字母/数字 , 字母/数字 → ,
|
||||
$text = preg_replace('/(?<=[0-9A-Za-z]),(?=[0-9A-Za-z])/u', ',', $text);
|
||||
}
|
||||
if (!empty($o['period_cjk'])) {
|
||||
// 汉字后的全角英文句点 FF0E → 中文句号 。
|
||||
$text = preg_replace('/(?<=[' . $han . '])./u', '。', $text);
|
||||
}
|
||||
if (!empty($o['bracket_latin'])) {
|
||||
// ( 仅 ASCII + 常见标点 + 空格 )
|
||||
$text = preg_replace_callback(
|
||||
'/(([0-9A-Za-z\s\.,;:\-\+/=]+))/u',
|
||||
static function ($m) {
|
||||
return '(' . $m[1] . ')';
|
||||
},
|
||||
$text
|
||||
);
|
||||
}
|
||||
|
||||
return $text;
|
||||
}
|
||||
|
||||
/**
|
||||
* 对 HTML 片段做符号校对:只替换「标签外」的文本,不修改标签名与属性值。
|
||||
*
|
||||
* 实现:按 `<...>` 切分,对偶数段(文本)调用 normalize(),奇数段(标签)原样保留。
|
||||
* 注意:畸形 HTML、属性值中含未转义 `<` 时可能误判,复杂场景请先抽纯文本再校对。
|
||||
*
|
||||
* @param string $html
|
||||
* @param array $options 同 normalize()
|
||||
* @return string
|
||||
*/
|
||||
public static function normalizeHtml($html, array $options = [])
|
||||
{
|
||||
$html = (string)$html;
|
||||
if ($html === '') {
|
||||
return '';
|
||||
}
|
||||
|
||||
$parts = preg_split('/(<[^>]*>)/u', $html, -1, PREG_SPLIT_DELIM_CAPTURE);
|
||||
if ($parts === false) {
|
||||
return self::normalize($html, $options);
|
||||
}
|
||||
|
||||
$out = '';
|
||||
foreach ($parts as $i => $chunk) {
|
||||
if ($chunk === '') {
|
||||
continue;
|
||||
}
|
||||
// 偶数索引为文本,奇数索引且以 < 开头为标签
|
||||
if ($i % 2 === 1 && isset($chunk[0]) && $chunk[0] === '<') {
|
||||
$out .= $chunk;
|
||||
} else {
|
||||
$out .= self::normalize($chunk, $options);
|
||||
}
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
/**
|
||||
* Abstract 专用:先 HTML 实体解码(> → > 等),再执行与普通正文相同的符号校对。
|
||||
*
|
||||
* 适用于摘要字段在库中/接口中以 htmlspecialchars 形式存储的场景。
|
||||
* 若摘要内本身含真实 HTML 标签且需保留标签结构,请改用 normalizeHtml() 并自行传入 decode_html_entities。
|
||||
*
|
||||
* @param string $abstract
|
||||
* @param array $options 同 normalize(),默认会合并 decode_html_entities=true(可被显式 false 覆盖)
|
||||
* @return string
|
||||
*/
|
||||
public static function normalizeAbstract($abstract, array $options = [])
|
||||
{
|
||||
$opts = array_merge(['decode_html_entities' => true], $options);
|
||||
return self::normalize($abstract, $opts);
|
||||
}
|
||||
|
||||
/**
|
||||
* 带 HTML 标签的摘要:仅在「标签外文本」中做实体解码 + 符号校对,不改动标签与属性。
|
||||
*
|
||||
* @param string $html
|
||||
* @param array $options 同 normalize(),默认 decode_html_entities=true
|
||||
* @return string
|
||||
*/
|
||||
public static function normalizeAbstractHtml($html, array $options = [])
|
||||
{
|
||||
$opts = array_merge(['decode_html_entities' => true], $options);
|
||||
return self::normalizeHtml($html, $opts);
|
||||
}
|
||||
|
||||
/**
|
||||
* 英文期刊 Abstract:实体解码 + 符号校对,且默认关闭中文专用标点规则。
|
||||
*/
|
||||
public static function normalizeEnglishAbstract($abstract, array $options = [])
|
||||
{
|
||||
return self::normalizeAbstract($abstract, array_merge(['english_journal' => true], $options));
|
||||
}
|
||||
|
||||
/**
|
||||
* 英文期刊、带 HTML 的摘要(标签外文本):实体解码 + 符号校对,且默认关闭中文专用规则。
|
||||
*/
|
||||
public static function normalizeEnglishAbstractHtml($html, array $options = [])
|
||||
{
|
||||
return self::normalizeAbstractHtml($html, array_merge(['english_journal' => true], $options));
|
||||
}
|
||||
}
|
||||
440
application/common/PlagiarismService.php
Normal file
440
application/common/PlagiarismService.php
Normal file
@@ -0,0 +1,440 @@
|
||||
<?php
|
||||
|
||||
namespace app\common;
|
||||
|
||||
use think\Db;
|
||||
use think\Env;
|
||||
use think\Queue;
|
||||
use think\Exception;
|
||||
|
||||
/**
|
||||
* 查重业务层:把 TurnitinService 的低层调用包装成"按 article 查重"的高层流程,
|
||||
* 并维护 t_plagiarism_check 状态机。
|
||||
*
|
||||
* 状态流:
|
||||
* submit() → state=1(上传中),入队 PlagiarismRun
|
||||
* PlagiarismRun.fire → 上传 + 触发 similarity → state=2(比对中),入队 PlagiarismPoll
|
||||
* PlagiarismPoll.fire → 轮询 status,完成后下载 PDF → state=3(完成)
|
||||
* 任意环节抛异常 → state=4(失败),写 error_msg
|
||||
*/
|
||||
class PlagiarismService
|
||||
{
|
||||
/**
|
||||
* 报告 PDF 本地保存目录(相对于项目根,永久保留)
|
||||
*/
|
||||
const REPORT_DIR = 'public/plagiarism';
|
||||
|
||||
/**
|
||||
* 轮询间隔(秒)。Turnitin 一般 1-5 分钟出结果,30 秒一次比较合适
|
||||
*/
|
||||
const POLL_INTERVAL = 30;
|
||||
|
||||
/**
|
||||
* 最长轮询次数(30s × 60 = 30 分钟)
|
||||
*/
|
||||
const MAX_POLL_ATTEMPTS = 60;
|
||||
|
||||
private $logFile;
|
||||
|
||||
public function __construct()
|
||||
{
|
||||
$this->logFile = ROOT_PATH . 'runtime' . DS . 'plagiarism_task.log';
|
||||
}
|
||||
|
||||
// ---------- 顶层入口 ----------
|
||||
|
||||
/**
|
||||
* 提交查重(入队,立即返回 check_id)
|
||||
*
|
||||
* @param int $articleId 投稿 ID
|
||||
* @param string $filePath 本地可读的 PDF/DOCX 绝对路径
|
||||
* @param int $triggeredBy 触发人 user_id(手工触发时编辑后台的 user_id)
|
||||
* @param string $source 'manual' / 'auto_xxx'
|
||||
* @return int check_id
|
||||
*/
|
||||
public function submit($articleId, $filePath, $triggeredBy = 0, $source = 'manual')
|
||||
{
|
||||
if (!is_file($filePath) || !is_readable($filePath)) {
|
||||
throw new Exception("File not readable: {$filePath}");
|
||||
}
|
||||
|
||||
$journalId = (int) Db::name('article')
|
||||
->where('article_id', $articleId)
|
||||
->value('journal_id');
|
||||
|
||||
|
||||
$now = time();
|
||||
$checkId = Db::name('plagiarism_check')->insertGetId([
|
||||
'article_id' => $articleId,
|
||||
'journal_id' => $journalId,
|
||||
'triggered_by' => $triggeredBy,
|
||||
'trigger_source' => $source,
|
||||
'state' => 1, // 上传中
|
||||
'source_file_name' => basename($filePath),
|
||||
'source_file_size' => filesize($filePath) ?: 0,
|
||||
'ctime' => $now,
|
||||
'utime' => $now,
|
||||
]);
|
||||
$this->log("submit service act");
|
||||
// 入队执行:上传 + 触发 similarity
|
||||
Queue::push(
|
||||
'app\\api\\job\\PlagiarismRun',
|
||||
['check_id' => $checkId, 'file_path' => $filePath],
|
||||
'PlagiarismRun'
|
||||
);
|
||||
|
||||
return (int)$checkId;
|
||||
}
|
||||
|
||||
/**
|
||||
* Job 调用:上传文件到 Turnitin 并触发 similarity,然后入队 PlagiarismPoll
|
||||
*/
|
||||
public function runUploadAndTrigger($checkId, $filePath)
|
||||
{
|
||||
$check = $this->mustGetCheck($checkId);
|
||||
$this->log("runUploadAndTrigger is act0");
|
||||
try {
|
||||
$tii = new TurnitinService();
|
||||
|
||||
// 1. 创建 submission
|
||||
$articleTitle = (string) Db::name('article')
|
||||
->where('article_id', $check['article_id'])
|
||||
->value('title');
|
||||
if ($articleTitle === '') {
|
||||
$articleTitle = 'Article #' . $check['article_id'];
|
||||
}
|
||||
$this->log("runUploadAndTrigger is act1");
|
||||
$createResp = $tii->createSubmission([
|
||||
'title' => mb_substr($articleTitle, 0, 250),
|
||||
'owner' => 'editor_' . $check['triggered_by'],
|
||||
'submitter' => 'editor_' . $check['triggered_by'],
|
||||
'metadata' => [
|
||||
'article_id' => (string)$check['article_id'],
|
||||
'check_id' => (string)$check['check_id'],
|
||||
],
|
||||
]);
|
||||
$submissionId = isset($createResp['id']) ? $createResp['id'] : '';
|
||||
if ($submissionId === '') {
|
||||
throw new Exception('Turnitin createSubmission returned empty id: ' . json_encode($createResp));
|
||||
}
|
||||
|
||||
$this->updateCheck($checkId, [
|
||||
'tii_submission_id' => $submissionId,
|
||||
'raw_response' => json_encode($createResp, JSON_UNESCAPED_UNICODE),
|
||||
]);
|
||||
$this->log("runUploadAndTrigger is act2");
|
||||
// 2. 上传文件
|
||||
$tii->uploadFile($submissionId, $filePath, basename($filePath));
|
||||
|
||||
// 3. 触发 similarity
|
||||
$simResp = $tii->triggerSimilarity($submissionId);
|
||||
|
||||
$this->updateCheck($checkId, [
|
||||
'state' => 2, // 比对中
|
||||
'tii_report_status' => 'PROCESSING',
|
||||
'raw_response' => json_encode($simResp, JSON_UNESCAPED_UNICODE),
|
||||
]);
|
||||
|
||||
$this->log("runUploadAndTrigger is act3");
|
||||
|
||||
// 4. 排队首次轮询(晚一点开始,让 Turnitin 先处理)
|
||||
Queue::later(
|
||||
self::POLL_INTERVAL,
|
||||
'app\\api\\job\\PlagiarismPoll',
|
||||
['check_id' => $checkId, 'attempt' => 1],
|
||||
'PlagiarismPoll'
|
||||
);
|
||||
} catch (\Throwable $e) {
|
||||
$this->markFailed($checkId, '[upload] ' . $e->getMessage());
|
||||
throw $e;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Job 调用:轮询 similarity 状态,完成后下载 PDF。未完成则重新入队。
|
||||
*/
|
||||
public function runPollStatus($checkId, $attempt = 1)
|
||||
{
|
||||
$check = $this->mustGetCheck($checkId);
|
||||
if (empty($check['tii_submission_id'])) {
|
||||
$this->markFailed($checkId, '[poll] tii_submission_id empty');
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
$tii = new TurnitinService();
|
||||
$statusResp = $tii->getSimilarityStatus($check['tii_submission_id']);
|
||||
$status = isset($statusResp['status']) ? strtoupper($statusResp['status']) : '';
|
||||
|
||||
$this->updateCheck($checkId, [
|
||||
'tii_report_status' => $status,
|
||||
'attempts' => $attempt,
|
||||
'raw_response' => json_encode($statusResp, JSON_UNESCAPED_UNICODE),
|
||||
]);
|
||||
|
||||
if ($status === 'COMPLETE') {
|
||||
$score = isset($statusResp['overall_match_percentage'])
|
||||
? floatval($statusResp['overall_match_percentage']) : 0;
|
||||
|
||||
// 下载 PDF + 取在线查看 URL
|
||||
$localPdf = $this->downloadAndStorePdf($tii, $check['tii_submission_id'], $checkId);
|
||||
$viewerInfo = $this->refreshViewerUrl($tii, $check['tii_submission_id']);
|
||||
|
||||
$this->updateCheck($checkId, [
|
||||
'state' => 3,
|
||||
'similarity_score' => $score,
|
||||
'pdf_local_path' => $localPdf,
|
||||
'view_only_url' => $viewerInfo['url'],
|
||||
'view_only_url_expire' => $viewerInfo['expire'],
|
||||
'error_msg' => '',
|
||||
]);
|
||||
return;
|
||||
}
|
||||
|
||||
if ($status === 'ERROR') {
|
||||
$errMsg = isset($statusResp['error_code']) ? (string)$statusResp['error_code'] : 'Turnitin reported ERROR';
|
||||
$this->markFailed($checkId, '[poll] ' . $errMsg);
|
||||
return;
|
||||
}
|
||||
|
||||
// PROCESSING 或其它中间态:继续轮询
|
||||
if ($attempt >= self::MAX_POLL_ATTEMPTS) {
|
||||
$this->markFailed($checkId, '[poll] timeout after ' . $attempt . ' attempts');
|
||||
return;
|
||||
}
|
||||
Queue::later(
|
||||
self::POLL_INTERVAL,
|
||||
'app\\api\\job\\PlagiarismPoll',
|
||||
['check_id' => $checkId, 'attempt' => $attempt + 1],
|
||||
'plagiarism'
|
||||
);
|
||||
} catch (\Throwable $e) {
|
||||
// 网络抖动不要直接 fail,给一定容错次数
|
||||
if ($attempt < self::MAX_POLL_ATTEMPTS) {
|
||||
Queue::later(
|
||||
self::POLL_INTERVAL,
|
||||
'app\\api\\job\\PlagiarismPoll',
|
||||
['check_id' => $checkId, 'attempt' => $attempt + 1],
|
||||
'plagiarism'
|
||||
);
|
||||
$this->updateCheck($checkId, [
|
||||
'attempts' => $attempt,
|
||||
'error_msg' => '[poll] transient: ' . $e->getMessage(),
|
||||
]);
|
||||
return;
|
||||
}
|
||||
$this->markFailed($checkId, '[poll] exhausted: ' . $e->getMessage());
|
||||
throw $e;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 重新生成在线查看 URL(已有的过期了用)
|
||||
*
|
||||
* @return array{url:string, expire:int, local_pdf:string}
|
||||
*/
|
||||
public function refreshViewerUrlFor($checkId)
|
||||
{
|
||||
$check = $this->mustGetCheck($checkId);
|
||||
if (empty($check['tii_submission_id'])) {
|
||||
throw new Exception('check has no tii_submission_id');
|
||||
}
|
||||
$tii = new TurnitinService();
|
||||
$info = $this->refreshViewerUrl($tii, $check['tii_submission_id']);
|
||||
$this->updateCheck($checkId, [
|
||||
'view_only_url' => $info['url'],
|
||||
'view_only_url_expire' => $info['expire'],
|
||||
]);
|
||||
return [
|
||||
'url' => $info['url'],
|
||||
'expire' => $info['expire'],
|
||||
'local_pdf' => $check['pdf_local_path'],
|
||||
];
|
||||
}
|
||||
|
||||
// ---------- 内部 ----------
|
||||
|
||||
private function refreshViewerUrl($tii, $submissionId)
|
||||
{
|
||||
$resp = $tii->getViewerUrl($submissionId);
|
||||
$url = '';
|
||||
if (isset($resp['viewer_url'])) {
|
||||
$url = (string)$resp['viewer_url'];
|
||||
} elseif (isset($resp['url'])) {
|
||||
$url = (string)$resp['url'];
|
||||
}
|
||||
// 默认 2 小时过期,保守起见
|
||||
return ['url' => $url, 'expire' => time() + 7200];
|
||||
}
|
||||
|
||||
/**
|
||||
* 触发生成 + 轮询 + 下载 PDF 到本地,返回相对路径
|
||||
*/
|
||||
private function downloadAndStorePdf($tii, $submissionId, $checkId)
|
||||
{
|
||||
// 1. 请求生成
|
||||
$req = $tii->requestPdfReport($submissionId);
|
||||
$pdfId = isset($req['id']) ? $req['id'] : '';
|
||||
if ($pdfId === '') {
|
||||
throw new Exception('requestPdfReport empty id: ' . json_encode($req));
|
||||
}
|
||||
|
||||
// 2. 内联轮询 PDF 状态(最多 3 分钟,每 6 秒一次)
|
||||
$maxLoops = 30;
|
||||
for ($i = 0; $i < $maxLoops; $i++) {
|
||||
$st = $tii->getPdfReportStatus($submissionId, $pdfId);
|
||||
$stCode = isset($st['status']) ? strtoupper($st['status']) : '';
|
||||
if ($stCode === 'SUCCESS') {
|
||||
break;
|
||||
}
|
||||
if ($stCode === 'FAILED') {
|
||||
throw new Exception('PDF report generation failed: ' . json_encode($st));
|
||||
}
|
||||
sleep(6);
|
||||
}
|
||||
// 3. 下载
|
||||
$binary = $tii->downloadPdfReport($submissionId, $pdfId);
|
||||
if (!is_string($binary) || strlen($binary) < 100) {
|
||||
throw new Exception('downloaded pdf is empty/too small');
|
||||
}
|
||||
|
||||
// 4. 落盘
|
||||
$rootDir = ROOT_PATH ?: dirname(dirname(__DIR__));
|
||||
$absDir = rtrim($rootDir, '/\\') . DIRECTORY_SEPARATOR . self::REPORT_DIR;
|
||||
if (!is_dir($absDir)) {
|
||||
@mkdir($absDir, 0755, true);
|
||||
}
|
||||
$filename = sprintf('check_%d_%s.pdf', $checkId, date('Ymd_His'));
|
||||
$absPath = $absDir . DIRECTORY_SEPARATOR . $filename;
|
||||
$bytes = file_put_contents($absPath, $binary);
|
||||
if ($bytes === false || $bytes < 100) {
|
||||
throw new Exception('failed to save pdf to ' . $absPath);
|
||||
}
|
||||
return self::REPORT_DIR . '/' . $filename;
|
||||
}
|
||||
|
||||
private function mustGetCheck($checkId)
|
||||
{
|
||||
$row = Db::name('plagiarism_check')->where('check_id', $checkId)->find();
|
||||
if (!$row) {
|
||||
throw new Exception("plagiarism_check #{$checkId} not found");
|
||||
}
|
||||
return $row;
|
||||
}
|
||||
|
||||
private function updateCheck($checkId, array $data)
|
||||
{
|
||||
$data['utime'] = time();
|
||||
Db::name('plagiarism_check')->where('check_id', $checkId)->update($data);
|
||||
}
|
||||
|
||||
private function markFailed($checkId, $errMsg)
|
||||
{
|
||||
$this->log("markFailed act");
|
||||
$this->updateCheck($checkId, [
|
||||
'state' => 4,
|
||||
'error_msg' => mb_substr($errMsg, 0, 1000),
|
||||
]);
|
||||
}
|
||||
|
||||
/**
|
||||
* 从 t_article_file 找到投稿主稿(manuscirpt)的本地绝对路径。
|
||||
* file_url 在系统里可能是 URL 或相对路径,调用方负责保证可读。
|
||||
*
|
||||
* @return string 文件绝对路径,找不到时抛异常
|
||||
*/
|
||||
public function locateArticleManuscript($articleId)
|
||||
{
|
||||
$row = Db::name('article_file')
|
||||
->where('article_id', $articleId)
|
||||
->where('type_name', 'manuscirpt') // 历史拼写
|
||||
->order('file_id desc')
|
||||
->find();
|
||||
if (!$row || empty($row['file_url'])) {
|
||||
throw new Exception("article #{$articleId} has no manuscirpt file");
|
||||
}
|
||||
return $this->resolveFileUrlToLocal($row['file_url']);
|
||||
}
|
||||
|
||||
/**
|
||||
* 把 file_url(可能是 http URL 或相对路径)解析成本地绝对路径。
|
||||
* 不同环境部署可能有差异,这里用 .env 配置的 STATIC_ROOT 作前缀。
|
||||
*/
|
||||
public function resolveFileUrlToLocal($fileUrl)
|
||||
{
|
||||
$fileUrl = trim((string)$fileUrl);
|
||||
if ($fileUrl === '') {
|
||||
throw new Exception('empty file_url');
|
||||
}
|
||||
// 已是绝对路径
|
||||
if (preg_match('/^([a-zA-Z]:[\\\\\/]|\/)/', $fileUrl) && is_file($fileUrl)) {
|
||||
return $fileUrl;
|
||||
}
|
||||
|
||||
$staticRoot = trim((string)Env::get('plagiarism.static_root', ''));
|
||||
$cdnPrefix = trim((string)Env::get('plagiarism.cdn_prefix', ''));
|
||||
|
||||
// 是 http URL:先试着剥掉 cdn 前缀,映射到本地
|
||||
if (preg_match('#^https?://#i', $fileUrl)) {
|
||||
if ($cdnPrefix !== '' && stripos($fileUrl, $cdnPrefix) === 0) {
|
||||
$rel = ltrim(substr($fileUrl, strlen($cdnPrefix)), '/');
|
||||
$local = rtrim($staticRoot, '/\\') . DIRECTORY_SEPARATOR . $rel;
|
||||
if (is_file($local)) {
|
||||
return $local;
|
||||
}
|
||||
}
|
||||
// 实在不行,下载到 runtime/plagiarism/tmp 临时目录
|
||||
return $this->downloadRemoteFile($fileUrl);
|
||||
}
|
||||
|
||||
// 相对路径:拼 static_root
|
||||
if ($staticRoot !== '') {
|
||||
$local = rtrim($staticRoot, '/\\') . DIRECTORY_SEPARATOR . ltrim($fileUrl, '/\\');
|
||||
if (is_file($local)) {
|
||||
return $local;
|
||||
}
|
||||
}
|
||||
|
||||
throw new Exception("cannot resolve file_url to local path: {$fileUrl} (set [plagiarism] STATIC_ROOT/CDN_PREFIX in .env)");
|
||||
}
|
||||
|
||||
private function downloadRemoteFile($url)
|
||||
{
|
||||
$rootDir = ROOT_PATH ?: dirname(dirname(__DIR__));
|
||||
$tmpDir = rtrim($rootDir, '/\\') . DIRECTORY_SEPARATOR . self::REPORT_DIR . DIRECTORY_SEPARATOR . 'tmp';
|
||||
if (!is_dir($tmpDir)) {
|
||||
@mkdir($tmpDir, 0755, true);
|
||||
}
|
||||
$ext = pathinfo(parse_url($url, PHP_URL_PATH), PATHINFO_EXTENSION) ?: 'pdf';
|
||||
$local = $tmpDir . DIRECTORY_SEPARATOR . md5($url) . '_' . time() . '.' . $ext;
|
||||
|
||||
$ch = curl_init($url);
|
||||
$fh = fopen($local, 'wb');
|
||||
curl_setopt_array($ch, [
|
||||
CURLOPT_FILE => $fh,
|
||||
CURLOPT_FOLLOWLOCATION => true,
|
||||
CURLOPT_TIMEOUT => 120,
|
||||
CURLOPT_SSL_VERIFYPEER => false,
|
||||
]);
|
||||
$ok = curl_exec($ch);
|
||||
$code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
||||
curl_close($ch);
|
||||
fclose($fh);
|
||||
if (!$ok || $code !== 200 || filesize($local) < 100) {
|
||||
@unlink($local);
|
||||
throw new Exception("download failed url={$url} http={$code}");
|
||||
}
|
||||
return $local;
|
||||
}
|
||||
|
||||
public function getCheck($checkId)
|
||||
{
|
||||
return Db::name('plagiarism_check')->where('check_id', $checkId)->find();
|
||||
}
|
||||
|
||||
public function log($msg)
|
||||
{
|
||||
$line = date('Y-m-d H:i:s') . ' ' . $msg . PHP_EOL;
|
||||
@file_put_contents($this->logFile, $line, FILE_APPEND);
|
||||
}
|
||||
}
|
||||
@@ -319,19 +319,27 @@ class PromotionService
|
||||
$this->log("dispatchPrepareEmails task_id={$taskId} no_logs -> state=5");
|
||||
return ['dispatched' => 0, 'already_done' => true, 'error' => null];
|
||||
}
|
||||
|
||||
// return $logIds;
|
||||
|
||||
|
||||
foreach ($logIds as $logId) {
|
||||
echo $logId."----";
|
||||
$this->enqueuePrepareEmail(intval($logId));
|
||||
}
|
||||
|
||||
$this->log("dispatchPrepareEmails task_id={$taskId} dispatched=" . count($logIds));
|
||||
return ['dispatched' => count($logIds), 'already_done' => false, 'error' => null];
|
||||
}
|
||||
|
||||
public function prepareSingleEmailTest($logId){
|
||||
$log = Db::name('promotion_email_log')->where('log_id', $logId)->find();
|
||||
// $task = Db::name('promotion_task')->where('task_id', $log['task_id'])->find();
|
||||
// if (!$task) {
|
||||
// Db::name('promotion_email_log')->where('log_id', $logId)->update([
|
||||
// 'state' => 2,
|
||||
// 'error_msg' => 'Task not found',
|
||||
// 'send_time' => time(),
|
||||
// ]);
|
||||
// return ['code' => 1, 'msg' => 'task_not_found', 'llm_status' => 0];
|
||||
// }
|
||||
return jsonSuccess($log);
|
||||
}
|
||||
|
||||
/**
|
||||
* 对单封邮件执行准备:拉取 expert / journal,调 LLM 生成描述,渲染模板,写回 log。
|
||||
*
|
||||
@@ -768,16 +776,14 @@ class PromotionService
|
||||
* 队列名:promotion_email
|
||||
* 启动 worker:php think queue:listen --queue promotion_email
|
||||
*/
|
||||
public function enqueuePrepareEmail($logId, $delay = 0)
|
||||
public function enqueuePrepareEmail($logId)
|
||||
{
|
||||
$jobClass = 'app\api\job\PromotionPrepareEmail@fire';
|
||||
$data = ['log_id' => intval($logId)];
|
||||
|
||||
if ($delay > 0) {
|
||||
Queue::later($delay, $jobClass, $data, 'PromotionPrepareEmail');
|
||||
} else {
|
||||
Queue::push($jobClass, $data, 'PromotionPrepareEmail');
|
||||
}
|
||||
|
||||
$res =Queue::push($jobClass, $data, 'PromotionPrepareEmail');
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -80,6 +80,25 @@ class QueueRedis
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 删除一个或多个 Redis 键(用于重检前清除队列任务 completed 标记)
|
||||
*/
|
||||
public function deleteRedisKeys(array $keys)
|
||||
{
|
||||
$keys = array_values(array_filter($keys, function ($k) {
|
||||
return $k !== null && $k !== '';
|
||||
}));
|
||||
if (empty($keys)) {
|
||||
return true;
|
||||
}
|
||||
try {
|
||||
$this->connect()->del(...$keys);
|
||||
return true;
|
||||
} catch (\Exception $e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// 安全释放锁(仅当值匹配时删除)
|
||||
public function releaseRedisLock($key, $value)
|
||||
|
||||
2578
application/common/ReferenceCheckService.php
Normal file
2578
application/common/ReferenceCheckService.php
Normal file
File diff suppressed because it is too large
Load Diff
322
application/common/TurnitinService.php
Normal file
322
application/common/TurnitinService.php
Normal file
@@ -0,0 +1,322 @@
|
||||
<?php
|
||||
|
||||
namespace app\common;
|
||||
|
||||
use think\Env;
|
||||
use think\Exception;
|
||||
|
||||
/**
|
||||
* Turnitin Core API (TCA) REST 客户端封装。
|
||||
*
|
||||
* 适用 Crossref Similarity Check 通道(product_name=Crossref)以及标准 TCA 接入。
|
||||
*
|
||||
* 鉴权:Authorization: Bearer <API_KEY>
|
||||
* X-Turnitin-Integration-Name / X-Turnitin-Integration-Version 用于审计
|
||||
*
|
||||
* .env 配置([turnitin] 段):
|
||||
* BASE_URL 形如 https://crossref-12345.turnitin.com/api/v1(不带尾斜杠)
|
||||
* API_KEY 生成的 Bearer token
|
||||
* INTEGRATION_NAME Scope Name(创建 integration 时填的名字)
|
||||
* INTEGRATION_VERSION 自定义版本号,便于审计 e.g. 1.0.0
|
||||
*
|
||||
* API 文档:https://developers.turnitin.com/docs/tca
|
||||
*
|
||||
* 注意:
|
||||
* - 所有方法返回原始 decode 后的数组;HTTP 错误抛 Exception
|
||||
* - 不做任何业务层逻辑(业务层在 PlagiarismService 里)
|
||||
* - 不缓存 token(Bearer 不需要登录,每次请求自带)
|
||||
*/
|
||||
class TurnitinService
|
||||
{
|
||||
private $baseUrl;
|
||||
private $apiKey;
|
||||
private $integrationName;
|
||||
private $integrationVersion;
|
||||
private $timeout = 60;
|
||||
|
||||
public function __construct()
|
||||
{
|
||||
$this->baseUrl = rtrim(trim((string)Env::get('turnitin.base_url', '')), '/');
|
||||
$this->apiKey = trim((string)Env::get('turnitin.api_key', ''));
|
||||
$this->integrationName = trim((string)Env::get('turnitin.integration_name', 'tmr'));
|
||||
$this->integrationVersion = trim((string)Env::get('turnitin.integration_version', '1.0.0'));
|
||||
|
||||
if ($this->baseUrl === '' || $this->apiKey === '') {
|
||||
throw new Exception('Turnitin not configured: missing BASE_URL or API_KEY in .env [turnitin] section');
|
||||
}
|
||||
}
|
||||
|
||||
// ==================== Public API ====================
|
||||
|
||||
/**
|
||||
* 探活 / 拿账户能力
|
||||
* GET /features-enabled
|
||||
*/
|
||||
public function featuresEnabled()
|
||||
{
|
||||
return $this->request('GET', '/features-enabled');
|
||||
}
|
||||
|
||||
/**
|
||||
* 创建 submission(拿到 id 之后才能上传文件)
|
||||
* POST /submissions
|
||||
*
|
||||
* @param array $meta 必填字段:
|
||||
* - title 论文标题
|
||||
* - owner submission owner 标识符(自定义字符串,比如投稿系统 user_id)
|
||||
* - submitter 提交者标识符(同上)
|
||||
* - eula (可选) ['version' => '...', 'language' => 'en-US', 'accepted_timestamp' => ISO8601]
|
||||
* 如果 features-enabled 返回 require_eula=false 可省略
|
||||
* 可选字段:
|
||||
* - extract_text_only bool
|
||||
* - metadata array 自定义键值,供后续追溯
|
||||
*
|
||||
* @return array 含 id(submission UUID), status, owner, ...
|
||||
*/
|
||||
public function createSubmission($meta)
|
||||
{
|
||||
return $this->request('POST', '/submissions', $meta);
|
||||
}
|
||||
|
||||
/**
|
||||
* 上传文件到 submission
|
||||
* PUT /submissions/{id}/original/{filename}
|
||||
*
|
||||
* @param string $submissionId
|
||||
* @param string $filePath 本地 PDF/DOCX 路径
|
||||
* @param string $filename 传给 Turnitin 的文件名(用于报告显示)
|
||||
* @return array
|
||||
*/
|
||||
public function uploadFile($submissionId, $filePath, $filename = '')
|
||||
{
|
||||
if (!is_file($filePath) || !is_readable($filePath)) {
|
||||
throw new Exception("File not found or not readable: {$filePath}");
|
||||
}
|
||||
if ($filename === '') {
|
||||
$filename = basename($filePath);
|
||||
}
|
||||
$body = file_get_contents($filePath);
|
||||
|
||||
return $this->request(
|
||||
'PUT',
|
||||
'/submissions/' . urlencode($submissionId) . '/original/' . rawurlencode($filename),
|
||||
$body,
|
||||
[
|
||||
'Content-Type' => 'binary/octet-stream',
|
||||
'Content-Disposition' => 'inline; filename="' . $filename . '"',
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* 触发 similarity 比对
|
||||
* PUT /submissions/{id}/similarity
|
||||
*
|
||||
* @param string $submissionId
|
||||
* @param array $opts
|
||||
* - generation_settings.search_repositories 默认 ['INTERNET','PUBLICATION','CROSSREF','CROSSREF_POSTED_CONTENT','SUBMITTED_WORK']
|
||||
* - generation_settings.submission_auto_excludes bool
|
||||
* - view_settings.exclude_quotes / exclude_bibliography / exclude_citations / exclude_abstract / exclude_methods bool
|
||||
* - indexing_settings.add_to_index bool 是否把本文加进 SUBMITTED_WORK 索引(一般 true)
|
||||
* @return array
|
||||
*/
|
||||
public function triggerSimilarity($submissionId, $opts = [])
|
||||
{
|
||||
$body = array_merge([
|
||||
'generation_settings' => [
|
||||
'search_repositories' => ['INTERNET', 'PUBLICATION', 'CROSSREF', 'CROSSREF_POSTED_CONTENT', 'SUBMITTED_WORK'],
|
||||
'submission_auto_excludes' => true,
|
||||
'auto_exclude_self_matching_scope' => 'GROUP_CONTEXT',
|
||||
],
|
||||
'view_settings' => [
|
||||
'exclude_quotes' => true,
|
||||
'exclude_bibliography' => true,
|
||||
'exclude_citations' => true,
|
||||
],
|
||||
'indexing_settings' => [
|
||||
'add_to_index' => true,
|
||||
],
|
||||
], $opts);
|
||||
|
||||
return $this->request(
|
||||
'PUT',
|
||||
'/submissions/' . urlencode($submissionId) . '/similarity',
|
||||
$body
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* 查询 similarity 状态
|
||||
* GET /submissions/{id}/similarity
|
||||
*
|
||||
* 返回 status: PROCESSING / COMPLETE / ERROR
|
||||
* COMPLETE 时返回 overall_match_percentage / time_requested / time_generated
|
||||
*/
|
||||
public function getSimilarityStatus($submissionId)
|
||||
{
|
||||
return $this->request(
|
||||
'GET',
|
||||
'/submissions/' . urlencode($submissionId) . '/similarity'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* 取在线查看报告的临时 URL
|
||||
* POST /submissions/{id}/viewer-url
|
||||
*
|
||||
* 返回 viewer_url(数小时有效)
|
||||
*
|
||||
* @param array $viewer 可选 viewer 设置 e.g. ['viewer_default_permission_set' => 'INSTRUCTOR']
|
||||
*/
|
||||
public function getViewerUrl($submissionId, $viewer = [])
|
||||
{
|
||||
$body = array_merge([
|
||||
'viewer_default_permission_set' => 'INSTRUCTOR',
|
||||
'similarity' => [
|
||||
'default_mode' => 'MATCH_OVERVIEW',
|
||||
'view_settings' => ['save_changes' => true],
|
||||
'modes' => ['match_overview' => true, 'all_sources' => true],
|
||||
],
|
||||
'locale' => 'en-US',
|
||||
], $viewer);
|
||||
|
||||
return $this->request(
|
||||
'POST',
|
||||
'/submissions/' . urlencode($submissionId) . '/viewer-url',
|
||||
$body
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* 触发生成 PDF 报告(异步,状态在另一个轮询里看)
|
||||
* POST /submissions/{id}/similarity/pdf
|
||||
*
|
||||
* 返回 id(pdf 报告 ID)
|
||||
*/
|
||||
public function requestPdfReport($submissionId, $opts = [])
|
||||
{
|
||||
$body = array_merge([
|
||||
'locale' => 'en-US',
|
||||
], $opts);
|
||||
|
||||
return $this->request(
|
||||
'POST',
|
||||
'/submissions/' . urlencode($submissionId) . '/similarity/pdf',
|
||||
$body
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* 查询 PDF 报告状态
|
||||
* GET /submissions/{id}/similarity/pdf/{pdf_id}/status
|
||||
*
|
||||
* status: PENDING / SUCCESS / FAILED
|
||||
*/
|
||||
public function getPdfReportStatus($submissionId, $pdfId)
|
||||
{
|
||||
return $this->request(
|
||||
'GET',
|
||||
'/submissions/' . urlencode($submissionId) . '/similarity/pdf/' . urlencode($pdfId) . '/status'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* 下载 PDF 报告内容(status=SUCCESS 后才可调用)
|
||||
* GET /submissions/{id}/similarity/pdf/{pdf_id}
|
||||
*
|
||||
* 返回 raw PDF binary 字符串;调用方负责落盘
|
||||
*/
|
||||
public function downloadPdfReport($submissionId, $pdfId)
|
||||
{
|
||||
return $this->request(
|
||||
'GET',
|
||||
'/submissions/' . urlencode($submissionId) . '/similarity/pdf/' . urlencode($pdfId),
|
||||
null,
|
||||
[],
|
||||
true // raw response (不 json_decode)
|
||||
);
|
||||
}
|
||||
|
||||
// ==================== Internal HTTP layer ====================
|
||||
|
||||
/**
|
||||
* 统一 HTTP 调用
|
||||
*
|
||||
* @param string $method GET/POST/PUT/DELETE
|
||||
* @param string $path 以 / 开头的相对路径,会拼到 baseUrl 后
|
||||
* @param mixed $body array 时按 JSON 编码;string 时直接当 raw body
|
||||
* @param array $extraHeaders 额外 header
|
||||
* @param bool $rawResponse true=返回 raw 字符串;false=json_decode
|
||||
* @return mixed
|
||||
* @throws Exception
|
||||
*/
|
||||
private function request($method, $path, $body = null, $extraHeaders = [], $rawResponse = false)
|
||||
{
|
||||
$url = $this->baseUrl . $path;
|
||||
|
||||
$headers = [
|
||||
'Authorization: Bearer ' . $this->apiKey,
|
||||
'X-Turnitin-Integration-Name: ' . $this->integrationName,
|
||||
'X-Turnitin-Integration-Version: ' . $this->integrationVersion,
|
||||
];
|
||||
|
||||
$payload = null;
|
||||
if ($body !== null) {
|
||||
if (is_array($body)) {
|
||||
$payload = json_encode($body, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES);
|
||||
$headers[] = 'Content-Type: application/json';
|
||||
} else {
|
||||
$payload = $body;
|
||||
if (!isset($extraHeaders['Content-Type'])) {
|
||||
$headers[] = 'Content-Type: application/octet-stream';
|
||||
}
|
||||
}
|
||||
}
|
||||
foreach ($extraHeaders as $k => $v) {
|
||||
$headers[] = $k . ': ' . $v;
|
||||
}
|
||||
|
||||
$ch = curl_init();
|
||||
curl_setopt_array($ch, [
|
||||
CURLOPT_URL => $url,
|
||||
CURLOPT_CUSTOMREQUEST => strtoupper($method),
|
||||
CURLOPT_RETURNTRANSFER => true,
|
||||
CURLOPT_HTTPHEADER => $headers,
|
||||
CURLOPT_TIMEOUT => $this->timeout,
|
||||
CURLOPT_CONNECTTIMEOUT => 15,
|
||||
CURLOPT_SSL_VERIFYPEER => true,
|
||||
CURLOPT_SSL_VERIFYHOST => 2,
|
||||
]);
|
||||
if ($payload !== null) {
|
||||
curl_setopt($ch, CURLOPT_POSTFIELDS, $payload);
|
||||
}
|
||||
|
||||
$resp = curl_exec($ch);
|
||||
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
||||
$err = curl_error($ch);
|
||||
curl_close($ch);
|
||||
|
||||
if ($resp === false) {
|
||||
throw new Exception("Turnitin curl error: {$err} (url={$url})");
|
||||
}
|
||||
if ($httpCode < 200 || $httpCode >= 300) {
|
||||
// 把响应体的前 1k 也带上方便排错
|
||||
$excerpt = mb_substr((string)$resp, 0, 1000);
|
||||
throw new Exception("Turnitin HTTP {$httpCode} {$method} {$path}: {$excerpt}");
|
||||
}
|
||||
|
||||
if ($rawResponse) {
|
||||
return $resp;
|
||||
}
|
||||
// 部分响应可能是 204 No Content
|
||||
if ($resp === '' || $resp === null) {
|
||||
return [];
|
||||
}
|
||||
$data = json_decode($resp, true);
|
||||
if (json_last_error() !== JSON_ERROR_NONE) {
|
||||
// 不是 JSON 也直接抛回原文
|
||||
return $resp;
|
||||
}
|
||||
return $data;
|
||||
}
|
||||
}
|
||||
1271
application/common/service/LLMService.php
Normal file
1271
application/common/service/LLMService.php
Normal file
File diff suppressed because it is too large
Load Diff
44
sql/add_plagiarism_check_table.sql
Normal file
44
sql/add_plagiarism_check_table.sql
Normal file
@@ -0,0 +1,44 @@
|
||||
-- 查重任务表(Turnitin TCA / Crossref Similarity Check)
|
||||
--
|
||||
-- 一篇 article 可重复触发多次查重;同一 article 的最近一次显示在编辑详情页。
|
||||
-- state 流转:0 待上传 → 1 上传中 → 2 比对中 → 3 完成 → 4 失败
|
||||
-- 失败可重新触发,会创建新行(保留历史)
|
||||
--
|
||||
-- 报告永久保留:pdf_local_path 指向 runtime/plagiarism/ 下的本地 PDF;
|
||||
-- view_only_url 是 Turnitin 临时签名(数小时过期),过期需重新生成
|
||||
|
||||
DROP TABLE IF EXISTS `t_plagiarism_check`;
|
||||
CREATE TABLE `t_plagiarism_check` (
|
||||
`check_id` INT NOT NULL AUTO_INCREMENT,
|
||||
`article_id` INT NOT NULL DEFAULT 0 COMMENT '关联投稿 t_article.article_id',
|
||||
`journal_id` INT NOT NULL DEFAULT 0 COMMENT '所属期刊(冗余便于按期刊统计)',
|
||||
`triggered_by` INT NOT NULL DEFAULT 0 COMMENT '触发人 user_id(手工触发时编辑的 user_id)',
|
||||
`trigger_source` VARCHAR(32) NOT NULL DEFAULT 'manual' COMMENT 'manual/auto_initial_review/...',
|
||||
`state` TINYINT NOT NULL DEFAULT 0 COMMENT '0待上传 1上传中 2比对中 3完成 4失败',
|
||||
|
||||
-- Turnitin 端的实体 ID
|
||||
`tii_submission_id` VARCHAR(64) NOT NULL DEFAULT '' COMMENT 'Turnitin submission UUID',
|
||||
`tii_report_status` VARCHAR(32) NOT NULL DEFAULT '' COMMENT 'PROCESSING/COMPLETE/ERROR',
|
||||
|
||||
-- 结果
|
||||
`similarity_score` DECIMAL(5,2) NOT NULL DEFAULT 0 COMMENT '总相似度 %(如 12.34)',
|
||||
`view_only_url` VARCHAR(1024) NOT NULL DEFAULT '' COMMENT '在线查看报告 URL(临时签名)',
|
||||
`view_only_url_expire` INT NOT NULL DEFAULT 0 COMMENT '在线查看 URL 过期时间戳',
|
||||
`pdf_local_path` VARCHAR(255) NOT NULL DEFAULT '' COMMENT '本地缓存的 PDF 报告相对路径',
|
||||
|
||||
-- 文件元数据(上传时记录,便于追踪)
|
||||
`source_file_name` VARCHAR(255) NOT NULL DEFAULT '' COMMENT '原始 PDF 文件名',
|
||||
`source_file_size` INT NOT NULL DEFAULT 0 COMMENT '原始 PDF 字节数',
|
||||
|
||||
-- 调试与重试
|
||||
`attempts` INT NOT NULL DEFAULT 0 COMMENT '总轮询/重试次数',
|
||||
`error_msg` VARCHAR(1024) NOT NULL DEFAULT '' COMMENT '失败原因',
|
||||
`raw_response` MEDIUMTEXT COMMENT '最近一次 Turnitin API 原始返回(调试用)',
|
||||
|
||||
`ctime` INT NOT NULL DEFAULT 0,
|
||||
`utime` INT NOT NULL DEFAULT 0,
|
||||
PRIMARY KEY (`check_id`),
|
||||
KEY `idx_article` (`article_id`, `state`),
|
||||
KEY `idx_state` (`state`),
|
||||
KEY `idx_tii_submission` (`tii_submission_id`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb3 COMMENT='Turnitin 查重任务表';
|
||||
102
test_plagiarism_e2e.php
Normal file
102
test_plagiarism_e2e.php
Normal file
@@ -0,0 +1,102 @@
|
||||
<?php
|
||||
/**
|
||||
* Turnitin TCA 端到端连通性测试。
|
||||
*
|
||||
* 用法(在项目根执行):
|
||||
* php test_plagiarism_e2e.php features # 探活
|
||||
* php test_plagiarism_e2e.php submit <article_id> # 用 article 主稿提交查重(手工触发)
|
||||
* php test_plagiarism_e2e.php submit-file <pdf> # 用本地 PDF 提交(不绑定 article)
|
||||
* php test_plagiarism_e2e.php status <check_id> # 查询状态
|
||||
* php test_plagiarism_e2e.php list <article_id> # 列出某 article 的查重记录
|
||||
* php test_plagiarism_e2e.php viewer <check_id> # 取在线查看 URL
|
||||
*
|
||||
* 说明:
|
||||
* submit-file 不会真正落库(仅用于联通验证),它会用 article_id=0 走完整套流程。
|
||||
* submit 会写入 t_plagiarism_check,并把 check_id 打回,再用 status 自己轮询。
|
||||
*/
|
||||
|
||||
define('IS_CLI', true);
|
||||
|
||||
require __DIR__ . '/thinkphp/start.php';
|
||||
|
||||
use think\Db;
|
||||
use app\common\PlagiarismService;
|
||||
use app\common\TurnitinService;
|
||||
|
||||
if ($argc < 2) {
|
||||
echo "Usage: php test_plagiarism_e2e.php <command> [args...]\n";
|
||||
exit(1);
|
||||
}
|
||||
$cmd = $argv[1];
|
||||
|
||||
try {
|
||||
switch ($cmd) {
|
||||
case 'features': {
|
||||
$tii = new TurnitinService();
|
||||
print_r($tii->featuresEnabled());
|
||||
break;
|
||||
}
|
||||
case 'submit': {
|
||||
if ($argc < 3) {
|
||||
echo "Usage: ... submit <article_id>\n";
|
||||
exit(1);
|
||||
}
|
||||
$articleId = intval($argv[2]);
|
||||
$svc = new PlagiarismService();
|
||||
$local = $svc->locateArticleManuscript($articleId);
|
||||
echo "manuscript local path: {$local}\n";
|
||||
$checkId = $svc->submit($articleId, $local, 0, 'cli_test');
|
||||
echo "submitted, check_id = {$checkId}\n";
|
||||
echo "now run: php think queue:work --queue plagiarism --tries=1 -v\n";
|
||||
break;
|
||||
}
|
||||
case 'submit-file': {
|
||||
if ($argc < 3) {
|
||||
echo "Usage: ... submit-file <pdf_path>\n";
|
||||
exit(1);
|
||||
}
|
||||
$path = $argv[2];
|
||||
if (!is_file($path)) {
|
||||
echo "file not exists: {$path}\n";
|
||||
exit(1);
|
||||
}
|
||||
$svc = new PlagiarismService();
|
||||
$checkId = $svc->submit(0, $path, 0, 'cli_test_file');
|
||||
echo "submitted, check_id = {$checkId}\n";
|
||||
break;
|
||||
}
|
||||
case 'status': {
|
||||
if ($argc < 3) {
|
||||
echo "Usage: ... status <check_id>\n";
|
||||
exit(1);
|
||||
}
|
||||
$row = Db::name('plagiarism_check')->where('check_id', intval($argv[2]))->find();
|
||||
print_r($row);
|
||||
break;
|
||||
}
|
||||
case 'list': {
|
||||
if ($argc < 3) {
|
||||
echo "Usage: ... list <article_id>\n";
|
||||
exit(1);
|
||||
}
|
||||
$rows = Db::name('plagiarism_check')->where('article_id', intval($argv[2]))->order('check_id desc')->select();
|
||||
print_r($rows);
|
||||
break;
|
||||
}
|
||||
case 'viewer': {
|
||||
if ($argc < 3) {
|
||||
echo "Usage: ... viewer <check_id>\n";
|
||||
exit(1);
|
||||
}
|
||||
$svc = new PlagiarismService();
|
||||
print_r($svc->refreshViewerUrlFor(intval($argv[2])));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
echo "unknown command: {$cmd}\n";
|
||||
exit(1);
|
||||
}
|
||||
} catch (\Throwable $e) {
|
||||
echo "ERROR: " . $e->getMessage() . "\n" . $e->getTraceAsString() . "\n";
|
||||
exit(1);
|
||||
}
|
||||
Reference in New Issue
Block a user