Compare commits

..

16 Commits

Author SHA1 Message Date
wyn
c1107780a7 参考文献本地大模型校对 2026-05-26 17:33:34 +08:00
wyn
68cf1867d8 已经完成一个文章校对了,但换个文章id就报错了,排查前备份 2026-05-22 16:58:07 +08:00
wyn
44f3383887 Changes 2026-05-21 17:28:36 +08:00
wyn
f118a799c2 此节点之后改成不拆分原文内容,直接用参考文献和整段进行对比 2026-05-21 16:28:28 +08:00
wyn
d9c3243053 Changes 2026-05-21 16:24:34 +08:00
wyn
8cd033a56d Changes
原文内容截取的已经很好了
2026-05-21 15:19:07 +08:00
wyn
3663dd4ea6 Changes 2026-05-21 14:37:04 +08:00
wyn
6f76c483ec 还不错,挺完美,唯一不足The results of the linear regression analysis in this study show that work immersion among emergency department nurses is an important influencing factor for organizational silence (P < 0.05). Organizational silence among emergency department nurses is a process influenced by both individual motivation [23] and external environment [24]. The higher the immersion scores of emergency department nurses, the more likely they are to feel intrinsically motivated [25] and willing to speak up. 24截取成了后面的he higher the immersion scores of emergency department nurses, the more likely they are to feel intrinsically motivated 2026-05-21 14:14:34 +08:00
wyn
867621232b Changes 2026-05-21 13:55:13 +08:00
wyn
74383d24ea Changes 2026-05-21 11:31:19 +08:00
wyn
7e5a087a4e Changes 2026-05-21 11:30:46 +08:00
wyn
4aab7f5b7e 文章引用文献校验 2026-05-21 10:02:05 +08:00
wangjinlei
fa878334cd 修改自动推广的相关任务 2026-05-13 12:26:28 +08:00
wangjinlei
c36eba77b1 修改自动推广的相关任务 2026-05-08 17:59:08 +08:00
wangjinlei
336fa08a18 修改自动推广的相关任务 2026-05-08 15:38:15 +08:00
wangjinlei
b1e978ed73 修改自动推广的相关任务 2026-05-07 11:45:55 +08:00
27 changed files with 5954 additions and 172 deletions

3
.env
View File

@@ -33,6 +33,9 @@ UNSUBSCRIBE_BASE_URL=https://submission.tmrjournals.com/api/Unsubscribe/index
[yboard]
APPLY_URL="https://submission.tmrjournals.com/youthBoardRegister"
[plagiarism]
static_root="/home/wwwroot/api.tmrjournals.com/public"
[journal]
;官网服务器地址
base_url = http://journalapi.tmrjournals.com/public/index.php

View File

@@ -271,6 +271,14 @@ class Base extends Controller
}
$this->production_article_refer_obj->where('p_article_id', $refer_info['p_article_id'])->where('index', ">", $refer_info['index'])->where('state', 0)->setDec('index');
$this->production_article_refer_obj->where('p_refer_id', $p_refer_id)->update(['state' => 1]);
// 文献集合已变更,原校对结果的 reference_no 已全部错位,整篇标记为未校对
try {
(new \app\common\ReferenceCheckService())
->clearArticleChecksByPArticleId(intval($refer_info['p_article_id']));
} catch (\Exception $e) {
\think\Log::error('delOneRefer clearArticleChecksByPArticleId p_refer_id=' . $p_refer_id . ' ' . $e->getMessage());
}
}

View File

@@ -1761,20 +1761,9 @@ class EmailClient extends Base
$service = new PromotionService();
$taskId = intval($data['id']);
// 调用前快照:用于解释"为什么没入队"
$task = \think\Db::name('promotion_task')->where('task_id', $taskId)->find();
$pending = \think\Db::name('promotion_email_log')
->where('task_id', $taskId)
->where('state', 0)
->where('prepared_at', 0)
->count();
$result = $service->dispatchPrepareEmails($taskId);
return jsonSuccess([
'task_id' => $taskId,
'task_state' => $task ? intval($task['state']) : null, // 0 才能 dispatch5 已准备完
'pending_before' => intval($pending), // 调用前还能入队的 log 数
'dispatch_result' => $result, // ['dispatched' => N, ...]
]);
}
@@ -1793,6 +1782,20 @@ class EmailClient extends Base
return jsonSuccess($result);
}
public function mytestqqq(){
$data = $this->request->post();
$rule = new Validate([
"id"=>"require"
]);
if(!$rule->check($data)){
return jsonError($rule->getError());
}
$service = new PromotionService();
$service->enqueuePrepareEmail(intval($data['id']));
return jsonSuccess();
}
/**
* 队列调试:查看 Redis 里队列长度(不依赖 redis-cli
*

View File

@@ -0,0 +1,226 @@
<?php
namespace app\api\controller;
use think\Db;
use think\Response;
use app\common\PlagiarismService;
/**
* 论文查重Turnitin / Crossref Similarity Check控制器。
*
* 触发方式:纯手工(编辑后台点"查重"按钮)。
* 报告策略:在线 viewer URL 临时签名 + PDF 永久落盘 runtime/plagiarism/。
*
* 主要接口:
* POST submit 触发查重
* GET getStatus 轮询单条查重状态(前端 ajax
* GET getList 列出某 article 的全部查重记录
* GET getReportUrl 获取/刷新在线查看 URL
* GET downloadReport 下载本地 PDF
* POST retry 重新触发(创建新行)
* GET features 探活(开发调试用)
*/
class Plagiarism extends Base
{
public function __construct(\think\Request $request = null)
{
parent::__construct($request);
}
/**
* 触发查重
*
* 入参:
* article_id 必填
* file_url 选填;不传则按 article_id 在 t_article_file 找 manuscirpt
* editor_id 选填;触发人 user_id前端拿不到也可以传 0
*/
public function submit()
{
$articleId = intval($this->request->param('article_id', 0));
$fileUrl = trim($this->request->param('file_url', ''));
$editorId = intval($this->request->param('editor_id', 0));
if ($articleId <= 0) {
return jsonError('article_id required');
}
try {
$svc = new PlagiarismService();
$localPath = $fileUrl !== ''
? $svc->resolveFileUrlToLocal($fileUrl)
: $svc->locateArticleManuscript($articleId);
echo $localPath;
$checkId = $svc->submit($articleId, $localPath, $editorId, 'manual');
return jsonSuccess(['check_id' => $checkId]);
} catch (\Throwable $e) {
return jsonError($e->getMessage());
}
}
public function testccone(){
$svc = new PlagiarismService();
$checkId = 9;
$filePath = "/home/wwwroot/api.tmrjournals.com/public/manuscirpt/20260509/6832a56e8ace38fe99df390ab5221deb.docx";
$svc->runUploadAndTrigger($checkId,$filePath);
}
/**
* 重试 = 提交一次新查重(保留历史)
*/
public function retry()
{
return $this->submit();
}
/**
* 取单条查重状态
*/
public function getStatus()
{
$checkId = intval($this->request->param('check_id', 0));
if ($checkId <= 0) {
return jsonError('check_id required');
}
$row = Db::name('plagiarism_check')->where('check_id', $checkId)->find();
if (!$row) {
return jsonError('not found');
}
return jsonSuccess($this->formatRow($row));
}
/**
* 列出某 article 的全部查重记录(按时间倒序)
*/
public function getList()
{
$articleId = intval($this->request->param('article_id', 0));
if ($articleId <= 0) {
return jsonError('article_id required');
}
$rows = Db::name('plagiarism_check')
->where('article_id', $articleId)
->order('check_id desc')
->select();
$out = [];
foreach ($rows as $r) {
$out[] = $this->formatRow($r);
}
return jsonSuccess(['list' => $out]);
}
/**
* 取在线查看 URL过期则自动刷新
*/
public function getReportUrl()
{
$checkId = intval($this->request->param('check_id', 0));
if ($checkId <= 0) {
return jsonError('check_id required');
}
try {
$row = Db::name('plagiarism_check')->where('check_id', $checkId)->find();
if (!$row) {
return jsonError('not found');
}
if ($row['state'] != 3) {
return jsonError('check not completed yet, state=' . $row['state']);
}
$needRefresh = empty($row['view_only_url'])
|| intval($row['view_only_url_expire']) < time() + 60;
if ($needRefresh) {
$svc = new PlagiarismService();
$info = $svc->refreshViewerUrlFor($checkId);
return jsonSuccess([
'view_only_url' => $info['url'],
'expire' => $info['expire'],
]);
}
return jsonSuccess([
'view_only_url' => $row['view_only_url'],
'expire' => intval($row['view_only_url_expire']),
]);
} catch (\Throwable $e) {
return jsonError($e->getMessage());
}
}
/**
* 直接吐 PDF 二进制流给浏览器下载
*/
public function downloadReport()
{
$checkId = intval($this->request->param('check_id', 0));
if ($checkId <= 0) {
return jsonError('check_id required');
}
$row = Db::name('plagiarism_check')->where('check_id', $checkId)->find();
if (!$row || empty($row['pdf_local_path'])) {
return jsonError('report not available');
}
$rootDir = ROOT_PATH ?: dirname(dirname(dirname(__DIR__)));
$abs = rtrim($rootDir, '/\\') . DIRECTORY_SEPARATOR . str_replace(['/', '\\'], DIRECTORY_SEPARATOR, $row['pdf_local_path']);
if (!is_file($abs)) {
return jsonError('pdf file missing on disk: ' . $row['pdf_local_path']);
}
$filename = sprintf('plagiarism_check_%d_article_%d.pdf', $row['check_id'], $row['article_id']);
return Response::create(file_get_contents($abs), 'html', 200, [
'Content-Type' => 'application/pdf',
'Content-Disposition' => 'attachment; filename="' . $filename . '"',
'Content-Length' => (string)filesize($abs),
]);
}
/**
* Turnitin 探活(开发调试用)
*/
public function features()
{
try {
$tii = new \app\common\TurnitinService();
return jsonSuccess($tii->featuresEnabled());
} catch (\Throwable $e) {
return jsonError($e->getMessage());
}
}
// ---------- 内部 ----------
private function formatRow($r)
{
return [
'check_id' => intval($r['check_id']),
'article_id' => intval($r['article_id']),
'journal_id' => intval($r['journal_id']),
'state' => intval($r['state']),
'state_label' => $this->stateLabel($r['state']),
'similarity_score' => floatval($r['similarity_score']),
'tii_report_status' => (string)$r['tii_report_status'],
'has_pdf' => !empty($r['pdf_local_path']),
'has_viewer_url' => !empty($r['view_only_url']) && intval($r['view_only_url_expire']) > time(),
'attempts' => intval($r['attempts']),
'error_msg' => (string)$r['error_msg'],
'source_file_name' => (string)$r['source_file_name'],
'trigger_source' => (string)$r['trigger_source'],
'triggered_by' => intval($r['triggered_by']),
'ctime' => intval($r['ctime']),
'utime' => intval($r['utime']),
];
}
private function stateLabel($state)
{
$map = [
0 => '待上传',
1 => '上传中',
2 => '比对中',
3 => '完成',
4 => '失败',
];
return isset($map[$state]) ? $map[$state] : 'unknown';
}
}

View File

@@ -6,6 +6,8 @@ use think\Db;
use think\Env;
use think\Queue;
use think\Validate;
use app\common\CrossrefService;
use app\common\ReferenceCheckService;
class Preaccept extends Base
{
@@ -14,6 +16,26 @@ class Preaccept extends Base
parent::__construct($request);
}
/**
* 新增/修改导致文献集合改变后,清空整篇校对明细,使文章状态回到"未校对"。
* 失败仅记日志,不阻塞主流程。
*/
private function resetArticleChecksOnReferChange($pArticleId, $sourceTag = '')
{
$pArticleId = intval($pArticleId);
if ($pArticleId <= 0) {
return;
}
try {
(new ReferenceCheckService())->clearArticleChecksByPArticleId($pArticleId);
} catch (\Exception $e) {
\think\Log::error(
'resetArticleChecksOnReferChange[' . $sourceTag . '] p_article_id='
. $pArticleId . ' ' . $e->getMessage()
);
}
}
/**获取文章参考文献列表
* @return \think\response\Json
@@ -91,6 +113,7 @@ class Preaccept extends Base
return jsonError($rule->getError());
}
$this->production_article_refer_obj->where('p_article_id',$data['p_article_id'])->update(["state"=>1]);
$this->resetArticleChecksOnReferChange(intval($data['p_article_id']), 'discardRefersByParticleid');
return jsonSuccess([]);
}
@@ -141,6 +164,7 @@ class Preaccept extends Base
}
$adId= $this->production_article_refer_obj->insertGetId($insert);
$this->production_article_refer_obj->where('p_article_id', $p_info['p_article_id'])->where("p_refer_id", "<>", $adId)->where("index", ">", $pre_refer['index'])->where('state', 0)->setInc('index');
$this->resetArticleChecksOnReferChange(intval($p_info['p_article_id']), 'addRefer');
return jsonSuccess([]);
@@ -197,6 +221,7 @@ class Preaccept extends Base
}
$adId= $this->production_article_refer_obj->insertGetId($insert);
$this->production_article_refer_obj->where('p_article_id', $p_info['p_article_id'])->where("p_refer_id", "<>", $adId)->where("index", ">", $pre_refer['index'])->where('state', 0)->setInc('index');
$this->resetArticleChecksOnReferChange(intval($p_info['p_article_id']), 'addReferByParticleid');
return jsonSuccess([]);
}
@@ -232,6 +257,7 @@ class Preaccept extends Base
$insert['cs'] = 1;
$adId = $this->production_article_refer_obj->insertGetId($insert);
$this->production_article_refer_obj->where('p_article_id', $p_info['p_article_id'])->where("p_refer_id", "<>", $adId)->where("index", ">", $pre_refer['index'])->where('state', 0)->setInc('index');
$this->resetArticleChecksOnReferChange(intval($p_info['p_article_id']), 'addReferNotdoi');
return jsonSuccess([]);
}
@@ -461,6 +487,17 @@ class Preaccept extends Base
// }
// $this->production_article_refer_obj->where('p_refer_id', $data['p_refer_id'])->update(['refer_doi' => $data['doi']]);
// my_doiToFrag2($this->production_article_refer_obj->where('p_refer_id', $data['p_refer_id'])->find());
//文献内容更新成功后异步重检该文献对应的全部校对明细(失败不阻塞主流程)
try {
(new ReferenceCheckService())->enqueueRecheckByPReferId(
intval($data['p_refer_id']),
intval($old_refer_info['p_article_id'])
);
} catch (\Exception $e) {
\think\Log::error('editRefer enqueueRecheckByPReferId p_refer_id=' . $data['p_refer_id'] . ' ' . $e->getMessage());
}
return jsonSuccess([]);
}
@@ -708,36 +745,66 @@ class Preaccept extends Base
}
/**
* 通过 DOI 获取文献元数据Crossref REST API
*
* POST 参数:
* doi 必填,可为纯 DOI10.xxxx/...)或 https://doi.org/10.xxxx/...
*
* 返回 data.formate 与旧版字段兼容: author, title, joura, dateno, doilink
* 另附 data.crossref: 原始摘要字段(不含 raw message避免体积过大
*/
public function searchDoi()
{
$data = $this->request->post();
$rule = new Validate([
"doi" => "require"
'doi' => 'require',
]);
if (!$rule->check($data)) {
return jsonError($rule->getError());
}
$doi = str_replace('/', '%2F', $data['doi']);
// $url = "https://citation.crosscite.org/format?doi=$doi&style=cancer-translational-medicine&lang=en-US";
$url = "https://citation.doi.org/format?doi=$doi&style=cancer-translational-medicine&lang=en-US";
$res = myGet($url);
$frag = trim(substr($res, strpos($res, '.') + 1));
if ($frag == "") {
return jsonError("not find");
$doiInput = trim((string)$data['doi']);
if ($doiInput === '') {
return jsonError('doi empty');
}
if (mb_substr_count($frag, '.') != 3) {
return jsonError("formate fail");
// 去掉 URL 前缀,得到裸 DOI
$doiNorm = preg_replace('#^https?://(dx\.)?doi\.org/#i', '', $doiInput);
$doiNorm = trim($doiNorm, " \t\n\r\0\x0B/");
$svc = new CrossrefService([
'mailto' => trim((string)Env::get('crossref_mailto', '')),
]);
$summary = $svc->fetchWorkSummary($doiNorm);
if ($summary === null || empty($summary['doi'])) {
return jsonError('DOI not found or invalid (Crossref)');
}
$res = explode('.', $frag);
$f['author'] = prgeAuthor($res[0]);
$f['title'] = trim($res[1]);
$bj = bekjournal($res[2]);
$joura = formateJournal(trim($bj[0]));
$f['joura'] = $joura;
$f['dateno'] = str_replace(' ', '', str_replace('-', '', trim($bj[1])));
$f['doilink'] = strpos($data['doi'], "http") === false ? "http://doi.org/" . $data['doi'] : $data['doi'];
$re['formate'] = $f;
return jsonSuccess($re);
$title = trim((string)($summary['title'] ?? ''));
$jouraRaw = trim((string)($summary['joura'] ?? ''));
$authorStr = trim((string)($summary['author_str'] ?? ''));
$dateno = trim((string)($summary['dateno'] ?? ''));
$doilink = trim((string)($summary['doilink'] ?? ''));
if ($doilink === '') {
$doilink = 'https://doi.org/' . $summary['doi'];
}
$f = [
'author' => $authorStr !== '' ? prgeAuthor($authorStr) : '',
'title' => $title,
'joura' => $jouraRaw !== '' ? formateJournal($jouraRaw) : '',
'dateno' => str_replace(' ', '', str_replace('-', '', $dateno)),
'doilink' => $doilink,
];
$crossrefOut = $summary;
unset($crossrefOut['raw']);
return jsonSuccess([
'formate' => $f,
'crossref' => $crossrefOut,
'doi' => $summary['doi'],
]);
}
@@ -1422,6 +1489,7 @@ class Preaccept extends Base
return jsonError($rule->getError());
}
$refer_info = $this->production_article_refer_obj->where('p_refer_id', $data['p_refer_id'])->find();
$sibling_p_refer_id = 0;
if ($data['act'] == "up") {
$up_info = $this->production_article_refer_obj->where('p_article_id', $refer_info['p_article_id'])->where('index', $refer_info['index'] - 1)->where('state', 0)->find();
if (!$up_info) {
@@ -1429,6 +1497,7 @@ class Preaccept extends Base
}
$this->production_article_refer_obj->where('p_refer_id', $up_info['p_refer_id'])->setInc("index");
$this->production_article_refer_obj->where('p_refer_id', $refer_info['p_refer_id'])->setDec("index");
$sibling_p_refer_id = intval($up_info['p_refer_id']);
} else {
$down_info = $this->production_article_refer_obj->where('p_article_id', $refer_info['p_article_id'])->where('index', $refer_info['index'] + 1)->where('state', 0)->find();
if (!$down_info) {
@@ -1436,7 +1505,19 @@ class Preaccept extends Base
}
$this->production_article_refer_obj->where('p_refer_id', $refer_info['p_refer_id'])->setInc("index");
$this->production_article_refer_obj->where('p_refer_id', $down_info['p_refer_id'])->setDec("index");
$sibling_p_refer_id = intval($down_info['p_refer_id']);
}
// 仅同步本次交换的两条 p_refer_id 对应的校对明细 reference_no / refer_index
try {
(new ReferenceCheckService())->syncReferenceNoByPReferIds(
[intval($refer_info['p_refer_id']), $sibling_p_refer_id],
intval($refer_info['p_article_id'])
);
} catch (\Exception $e) {
\think\Log::error('sortRefer syncReferenceNoByPReferIds: ' . $e->getMessage());
}
return jsonSuccess([]);
}

View File

@@ -10,6 +10,7 @@ use think\Db;
use think\Queue;
use think\Validate;
use think\log;
use app\common\ArticleSymbolNormalizer;
/**
* @title 公共管理相关
@@ -1380,6 +1381,10 @@ class Production extends Base
return $html;
}
public function testsym(){
ArticleSymbolNormalizer::normalize("");
}
public function doTypeSettingNew()
{
@@ -1399,7 +1404,7 @@ class Production extends Base
$editor_info = $this->user_obj->where('user_id', $journal_info['editor_id'])->find();
$typesetInfo = [];
$typesetInfo['info_title'] = $p_info['title'];
$typesetInfo['info_title'] = ArticleSymbolNormalizer::normalize($p_info['title']);
$typesetInfo['info_type'] = $p_info['type'];
$typesetInfo['doi'] = $p_info['doi'];
$typesetInfo['topic'] = '';

View File

@@ -1307,4 +1307,231 @@ class References extends Base
}
return json_encode(['status' => 8,'msg' => 'fail']);
}
/**
* 参考文献第一次校对
* @return \think\response\Json
*/
public function allReferenceCheckAI(){
//获取参数
$aParam = empty($aParam) ? $this->request->post() : $aParam;
//必填值验证
$iPArticleId = empty($aParam['p_article_id']) ? '' : $aParam['p_article_id'];
if(empty($iPArticleId)){
return json_encode(array('status' => 2,'msg' => 'Please select an article' ));
}
//查询文章p_article_id 与 article_id 都要带,下游服务方法两者都用)
$aWhere = ['p_article_id' => $iPArticleId,'state' => ['in',[0,2]]];
$aProductionArticle = Db::name('production_article')->field('p_article_id,article_id')->where($aWhere)->find();
if(empty($aProductionArticle)){
return json_encode(array('status' => 3,'msg' => 'No articles found' ));
}
if($this->checkReferStatus($iPArticleId)==0){
return jsonError('请修正完文献内容再进行校对。');
}
//已存在校对记录则禁止重复执行第一次校对,提示走重置接口
$iExisting = Db::name('article_reference_check_result')
->where('p_article_id', $iPArticleId)
->count();
if(intval($iExisting) > 0){
return jsonError('该文章已存在校对记录,请使用"重置校对"接口重新校对。');
}
try {
$svc = new ReferenceCheckService();
$result = $svc->enqueueByPArticle($aProductionArticle);
return jsonSuccess($result);
} catch (\Exception $e) {
return jsonError($e->getMessage());
}
}
/**
* 文献校对重置:删除该文章已有的全部校对明细,并重新入队整篇校对
* POST/GET: article_id必填
* @url /api/Article/referenceCheckReset
*/
public function referenceCheckResetAI()
{
//获取参数
$aParam = empty($aParam) ? $this->request->post() : $aParam;
//必填值验证
$iPArticleId = empty($aParam['p_article_id']) ? '' : $aParam['p_article_id'];
if(empty($iPArticleId)){
return json_encode(array('status' => 2,'msg' => 'Please select an article' ));
}
//查询文章p_article_id 与 article_id 都要带,下游服务方法两者都用)
$aWhere = ['p_article_id' => $iPArticleId,'state' => ['in',[0,2]]];
$aProductionArticle = Db::name('production_article')->field('p_article_id,article_id')->where($aWhere)->find();
if(empty($aProductionArticle)){
return json_encode(array('status' => 3,'msg' => 'No articles found' ));
}
if($this->checkReferStatus($iPArticleId)==0){
return jsonError('请修正完文献内容再进行校对。');
}
$iArticleId = empty($aProductionArticle['article_id']) ? 0 : $aProductionArticle['article_id'];
if(empty($iArticleId)){
return json_encode(array('status' => 4,'msg' => 'Unbound article' ));
}
try {
$result = (new ReferenceCheckService())->resetAndRecheckByArticle($aProductionArticle);
return jsonSuccess($result);
} catch (\Exception $e) {
return jsonError($e->getMessage());
}
}
/**
* 清空某篇文章下的全部参考文献校对记录(不重新入队)
*
* 与 referenceCheckResetAI 的区别reset 是「清空 + 重新校对」,
* 这里只做「清空」一步,校对状态回到未校对,等待用户手动再触发。
*
* POST/GET: p_article_id必填
*/
public function referenceCheckClearAI()
{
$aParam = $this->request->post();
if (empty($aParam)) {
$aParam = $this->request->param();
}
$iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']);
if ($iPArticleId <= 0) {
return json_encode(array('status' => 2, 'msg' => 'Please select an article'));
}
// 校验文章存在与其它校对接口口径一致state in [0,2]
$aProductionArticle = Db::name('production_article')
->field('p_article_id,article_id')
->where(['p_article_id' => $iPArticleId, 'state' => ['in', [0, 2]]])
->find();
if (empty($aProductionArticle)) {
return json_encode(array('status' => 3, 'msg' => 'No articles found'));
}
try {
$deleted = (new ReferenceCheckService())->clearArticleChecksByPArticleId($iPArticleId);
return jsonSuccess([
'p_article_id' => $iPArticleId,
'deleted' => intval($deleted),
]);
} catch (\Exception $e) {
return jsonError($e->getMessage());
}
}
/**
* 按 p_article_id 查整篇引用校对进度(按 reference_no 分组聚合)
*
* POST/GET: p_article_id必填
*
* 返回 list 中每项含reference_no、p_refer_id、status数值
* total、pending、done、failed、pass、is_pass、last_updated_at、records
*
* status 数值含义:
* 0 = 待校验 1 = 校对中 2 = 校对完成 3 = 校对失败
*/
public function referenceCheckProgressAI()
{
$aParam = $this->request->post();
if (empty($aParam)) {
$aParam = $this->request->param();
}
$iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']);
if ($iPArticleId <= 0) {
return json_encode(array('status' => 2, 'msg' => 'Please select an article'));
}
try {
$result = (new ReferenceCheckService())->getProgressByPArticleId($iPArticleId);
return jsonSuccess($result);
} catch (\Exception $e) {
return jsonError($e->getMessage());
}
}
/**
* 按 p_article_id 查整篇文章引用校对总状态(用于前端按钮分流)
*
* POST/GET: p_article_id必填
*
* 计数维度是「参考文献」(按 reference_no 分组),不是单条校对明细行。
* 例50 条参考文献、底层 111 条校对明细时total = 50。
*
* 返回 status 数值含义(整篇):
* 0 = 未校对(一条记录都没有)
* 1 = 校对中(至少 1 条参考文献仍有未跑完的明细)
* 2 = 校对完成(所有参考文献全部明细已结束)
*
* 返回字段p_article_id、status、total、pending、done、failed、progress_percent
* total —— 参考文献条数
* pending —— 该条参考文献仍有未跑完明细的数量(含"部分跑完"
* done —— 该条参考文献所有明细都 status=1 的数量
* failed —— 该条参考文献全部跑完且至少 1 条 status=2 的数量
* pending + done + failed = totalprogress_percent = (done+failed)/total
*
* 分组明细请走 referenceCheckProgressAI。
*/
public function referenceCheckArticleStatusAI()
{
$aParam = $this->request->post();
if (empty($aParam)) {
$aParam = $this->request->param();
}
$iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']);
if ($iPArticleId <= 0) {
return json_encode(array('status' => 2, 'msg' => 'Please select an article'));
}
try {
$result = (new ReferenceCheckService())->getArticleProgressStatusByPArticleId($iPArticleId);
return jsonSuccess($result);
} catch (\Exception $e) {
return jsonError($e->getMessage());
}
}
/**
* 按 p_refer_id 查单条参考文献的校对明细
*
* POST/GET: p_refer_id必填
*
* 返回 list 中每项含am_id、confidence、reason、is_match、is_pass
* 同时附带上下文p_refer_id、p_article_id、reference_no、total
*/
public function referenceCheckDetailsAI()
{
$aParam = $this->request->post();
if (empty($aParam)) {
$aParam = $this->request->param();
}
$iPReferId = empty($aParam['p_refer_id']) ? 0 : intval($aParam['p_refer_id']);
if ($iPReferId <= 0) {
return json_encode(array('status' => 2, 'msg' => 'Please select a reference'));
}
try {
$result = (new ReferenceCheckService())->getCheckDetailsByPReferId($iPReferId);
return jsonSuccess($result);
} catch (\Exception $e) {
return jsonError($e->getMessage());
}
}
public function checkReferStatus($p_article_id){
$list = $this->production_article_refer_obj->where('p_article_id', $p_article_id)->where('state', 0)->select();
if (!$list) {
return jsonError('references error');
}
$frag = 1;
foreach ($list as $v) {
if ($v['cs'] == 0) {
$frag = 0;
break;
}
}
return $frag;
}
}

View File

@@ -4,7 +4,6 @@ namespace app\api\job;
use think\queue\Job;
use app\common\ExpertFinderService;
use app\common\QueueJob;
/**
* 专家抓取队列任务。
@@ -16,25 +15,16 @@ use app\common\QueueJob;
*/
class FetchExperts
{
private $oQueueJob;
public function __construct()
{
$this->oQueueJob = new QueueJob();
}
public function fire(Job $job, $data)
{
$this->oQueueJob->init($job);
$field = isset($data['field']) ? (string)$data['field'] : '';
if ($field === '') {
$this->oQueueJob->log("FetchExperts 无效的 field删除任务");
$job->delete();
return;
}
try {
$service = new ExpertFinderService();
$service->doFetchForField(
$field,
@@ -42,14 +32,6 @@ class FetchExperts
isset($data['per_page']) ? intval($data['per_page']) : 100,
isset($data['min_year']) ? $data['min_year'] : null
);
$this->oQueueJob->log("FetchExperts 完成 | field={$field}");
$job->delete();
} catch (\Exception $e) {
$this->oQueueJob->handleException($e, $job, "field={$field}");
} catch (\Throwable $e) {
$this->oQueueJob->handleException($e, $job, "field={$field}");
} finally {
$this->oQueueJob->finnal();
}
}
}

View File

@@ -4,7 +4,6 @@ namespace app\api\job;
use think\queue\Job;
use app\common\ExpertFinderService;
use app\common\QueueJob;
/**
* 队列任务:用本地大模型从 affiliation 推断国家,写入 expert.country_id / country。
@@ -17,16 +16,9 @@ use app\common\QueueJob;
*/
class FillExpertCountry
{
private $oQueueJob;
public function __construct()
{
$this->oQueueJob = new QueueJob();
}
public function fire(Job $job, $data)
{
$this->oQueueJob->init($job);
$expertId = intval(isset($data['expert_id']) ? $data['expert_id'] : 0);
$affiliation = isset($data['affiliation']) ? trim((string)$data['affiliation']) : '';
@@ -35,7 +27,6 @@ class FillExpertCountry
$service = new ExpertFinderService();
try {
if ($expertId && $affiliation !== '') {
$service->fillExpertCountry($expertId, $affiliation, $chatUrl);
}
@@ -43,12 +34,5 @@ class FillExpertCountry
// 链式:处理完当前专家立刻拉下一个进来
$service->enqueueNextCountryFill(1, $queue, $chatUrl);
} catch (\Exception $e) {
$this->oQueueJob->handleException($e, $job, "expert_id={$expertId} queue={$queue}");
} catch (\Throwable $e) {
$this->oQueueJob->handleException($e, $job, "expert_id={$expertId} queue={$queue}");
} finally {
$this->oQueueJob->finnal();
}
}
}

View File

@@ -0,0 +1,37 @@
<?php
namespace app\api\job;
use think\queue\Job;
use app\common\PlagiarismService;
/**
* 队列任务:轮询 Turnitin similarity 状态。
*
* 未完成会再次入队(链式延迟),完成后下载 PDF 报告并写本地永久保留。
*
* data:
* - check_id t_plagiarism_check.check_id
* - attempt 当前轮询次数(首次为 1
*
* 注意:单条 job 通常很短1 个 HTTP 请求),但会反复入队,常驻 worker 长时间运行
* 由 QueueJob 在进程超 6h 或致命 DB 错误时主动 exit(1) 让 supervisor 拉起新进程。
*/
class PlagiarismPoll
{
public function fire(Job $job, $data)
{
$checkId = isset($data['check_id']) ? intval($data['check_id']) : 0;
$attempt = isset($data['attempt']) ? intval($data['attempt']) : 1;
if ($checkId <= 0) {
$job->delete();
return;
}
$svc = new PlagiarismService();
$svc->runPollStatus($checkId, $attempt);
$job->delete();
}
}

View File

@@ -0,0 +1,36 @@
<?php
namespace app\api\job;
use think\queue\Job;
use app\common\PlagiarismService;
/**
* 队列任务:上传论文到 Turnitin + 触发 similarity 检测。
*
* 完成后会自动入队 PlagiarismPoll 进行后续轮询。
*
* data:
* - check_id t_plagiarism_check.check_id
* - file_path 本地可读的 PDF/DOCX 绝对路径
*
* 注意:上传单个 PDF 可能耗时数十秒,常驻 worker 由 QueueJob 在进程超 6h 或致命 DB
* 错误时主动 exit(1) 让 supervisor 拉起新进程。
*/
class PlagiarismRun
{
public function fire(Job $job, $data)
{
$checkId = isset($data['check_id']) ? intval($data['check_id']) : 0;
$filePath = isset($data['file_path']) ? (string)$data['file_path'] : '';
if ($checkId <= 0 || $filePath === '') {
$job->delete();
return;
}
$svc = new PlagiarismService();
$svc->log("PlagiarismRun job act!!");
$svc->runUploadAndTrigger($checkId, $filePath);
$job->delete();
}
}

View File

@@ -4,7 +4,6 @@ namespace app\api\job;
use think\queue\Job;
use app\common\PromotionService;
use app\common\QueueJob;
/**
* 【已废弃 / 兼容保留】
@@ -16,35 +15,17 @@ use app\common\QueueJob;
*/
class PromotionPrepare
{
private $oQueueJob;
public function __construct()
{
$this->oQueueJob = new QueueJob();
}
public function fire(Job $job, $data)
{
$this->oQueueJob->init($job);
$taskId = isset($data['task_id']) ? intval($data['task_id']) : 0;
if ($taskId <= 0) {
$this->oQueueJob->log("PromotionPrepare[deprecated] 无效的 task_id删除任务");
$job->delete();
return;
}
try {
$service = new PromotionService();
$service->enqueuePrepareTask($taskId);
$this->oQueueJob->log("PromotionPrepare[deprecated] forwarded task_id={$taskId} -> PromotionPrepareTask");
$job->delete();
} catch (\Exception $e) {
$this->oQueueJob->handleException($e, $job, "[deprecated] task_id={$taskId}");
} catch (\Throwable $e) {
$this->oQueueJob->handleException($e, $job, "[deprecated] task_id={$taskId}");
} finally {
$this->oQueueJob->finnal();
}
}
}

View File

@@ -4,7 +4,6 @@ namespace app\api\job;
use think\queue\Job;
use app\common\PromotionService;
use app\common\QueueJob;
/**
* 队列任务:单封邮件 prepare调用 LLM 生成个性化描述 + 渲染模板 + 写入 log
@@ -18,41 +17,13 @@ use app\common\QueueJob;
*/
class PromotionPrepareEmail
{
// private $oQueueJob;
// public function __construct()
// {
// $this->oQueueJob = new QueueJob();
// }
public function fire(Job $job, $data)
{
// $this->oQueueJob->init($job);
//
$logId = isset($data['log_id']) ? intval($data['log_id']) : 0;
// if ($logId <= 0) {
// $this->oQueueJob->log("PromotionPrepareEmail 无效的 log_id删除任务");
// $job->delete();
// return;
// }
//
// try {
$service = new PromotionService();
$service->log("id:".$logId);
// $result = $service->prepareSingleEmail($logId);
//
// $code = isset($result['code']) ? $result['code'] : '';
// $msg = isset($result['msg']) ? $result['msg'] : '';
// $llm = isset($result['llm_status']) ? $result['llm_status'] : '';
// $this->oQueueJob->log("PromotionPrepareEmail 完成 | log_id={$logId} code={$code} llm_status={$llm} msg={$msg}");
$result = $service->prepareSingleEmail($logId);
$job->delete();
// } catch (\Exception $e) {
// $this->oQueueJob->handleException($e, $job, "log_id={$logId}");
// } catch (\Throwable $e) {
// $this->oQueueJob->handleException($e, $job, "log_id={$logId}");
// } finally {
// $this->oQueueJob->finnal();
// }
}
}

View File

@@ -17,40 +17,17 @@ use app\common\QueueJob;
*/
class PromotionPrepareTask
{
private $oQueueJob;
public function __construct()
{
$this->oQueueJob = new QueueJob();
}
public function fire(Job $job, $data)
{
$this->oQueueJob->init($job);
$taskId = isset($data['task_id']) ? intval($data['task_id']) : 0;
if ($taskId <= 0) {
$this->oQueueJob->log("PromotionPrepareTask 无效的 task_id删除任务");
$job->delete();
return;
}
try {
$service = new PromotionService();
$result = $service->dispatchPrepareEmails($taskId);
$dispatched = isset($result['dispatched']) ? $result['dispatched'] : 0;
$alreadyDone = isset($result['already_done']) ? $result['already_done'] : 0;
$err = isset($result['error']) ? $result['error'] : '';
$this->oQueueJob->log("PromotionPrepareTask 完成 | task_id={$taskId} dispatched={$dispatched} already_done={$alreadyDone} error={$err}");
$job->delete();
} catch (\Exception $e) {
$this->oQueueJob->handleException($e, $job, "task_id={$taskId}");
} catch (\Throwable $e) {
$this->oQueueJob->handleException($e, $job, "task_id={$taskId}");
} finally {
$this->oQueueJob->finnal();
}
}
}

View File

@@ -4,7 +4,6 @@ namespace app\api\job;
use think\queue\Job;
use app\common\PromotionService;
use app\common\QueueJob;
/**
* 队列任务:发送 task 下"已 prepare"的邮件,按 min/max_interval 控制节奏。
@@ -16,39 +15,18 @@ use app\common\QueueJob;
*/
class PromotionSend
{
private $oQueueJob;
public function __construct()
{
$this->oQueueJob = new QueueJob();
}
public function fire(Job $job, $data)
{
$this->oQueueJob->init($job);
$taskId = isset($data['task_id']) ? intval($data['task_id']) : 0;
if ($taskId <= 0) {
$this->oQueueJob->log("PromotionSend 无效的 task_id删除任务");
$job->delete();
return;
}
try {
$service = new PromotionService();
$result = $service->processNextEmail($taskId);
$done = !empty($result['done']) ? 1 : 0;
$reason = isset($result['reason']) ? $result['reason'] : '';
$this->oQueueJob->log("PromotionSend 完成 | task_id={$taskId} done={$done} reason={$reason}");
$job->delete();
} catch (\Exception $e) {
$this->oQueueJob->handleException($e, $job, "task_id={$taskId}");
} catch (\Throwable $e) {
$this->oQueueJob->handleException($e, $job, "task_id={$taskId}");
} finally {
$this->oQueueJob->finnal();
}
}
}

View File

@@ -0,0 +1,114 @@
<?php
namespace app\api\job;
use think\Db;
use think\queue\Job;
use app\common\QueueJob;
use app\common\QueueRedis;
use app\common\ReferenceCheckService;
class ReferenceCheck
{
private $oQueueJob;
private $QueueRedis;
private $completedExprie = 3600;
public function __construct()
{
$this->oQueueJob = new QueueJob();
$this->QueueRedis = QueueRedis::getInstance();
}
public function fire(Job $job, $data)
{
$this->oQueueJob->init($job);
$rawBody = empty($job->getRawBody()) ? '' : $job->getRawBody();
$jobData = empty($rawBody) ? [] : json_decode($rawBody, true);
$jobId = empty($jobData['id']) ? 'unknown' : $jobData['id'];
$sRedisKey = '';
$sRedisValue = '';
$this->oQueueJob->log("-----------队列任务开始-----------");
$this->oQueueJob->log("当前任务ID: {$jobId}, 尝试次数: {$job->attempts()}");
try {
$checkId = intval(isset($data['check_id']) ? $data['check_id'] : 0);
if ($checkId <= 0 && !empty($jobData['data']['check_id'])) {
$checkId = intval($jobData['data']['check_id']);
}
if ($checkId <= 0) {
$job->delete();
return;
}
$row = Db::name('article_reference_check_result')->where('id', $checkId)->find();
if (empty($row)) {
$job->delete();
return;
}
if (intval($row['status']) === 1) {
$job->delete();
return;
}
$sClassName = get_class($this);
$sRedisKey = "queue_job:{$sClassName}:{$checkId}";
$sRedisValue = uniqid() . '_' . getmypid();
$svc = new ReferenceCheckService();
$svc->clearReferenceCheckQueueLock($checkId);
if (!$this->oQueueJob->acquireLock($sRedisKey, $sRedisValue, $job)) {
return;
}
try {
$svc->runReferenceCheckOnce($checkId);
$amId = intval(isset($row['am_id']) ? $row['am_id'] : 0);
if ($amId > 0) {
$svc->syncAmRefCheckStatus($amId);
}
$this->QueueRedis->finishJob($sRedisKey, 'completed', $this->completedExprie, $sRedisValue);
$job->delete();
$this->oQueueJob->log("任务执行成功 | 日志ID: {$sRedisKey}");
} catch (\Exception $e) {
$this->oQueueJob->log('ReferenceCheck error: ' . $e->getMessage());
if ($job->attempts() >= 3) {
$this->markFailed($checkId, $e->getMessage());
$job->delete();
return;
}
$job->release(30);
}
} catch (\RuntimeException $e) {
$this->oQueueJob->handleRetryableException($e, $sRedisKey, $sRedisValue, $job);
} catch (\LogicException $e) {
$this->oQueueJob->handleNonRetryableException($e, $sRedisKey, $sRedisValue, $job);
} catch (\Exception $e) {
$this->oQueueJob->handleRetryableException($e, $sRedisKey, $sRedisValue, $job);
} finally {
$this->oQueueJob->finnal();
}
}
private function markFailed($checkId, $msg)
{
$row = Db::name('article_reference_check_result')->where('id', $checkId)->find();
try {
(new ReferenceCheckService())->updateCheckResult($checkId, [
'status' => 2,
'error_msg' => $msg,
]);
} catch (\Exception $e) {
\think\Log::error('ReferenceCheck markFailed: ' . $e->getMessage());
}
$amId = empty($row) ? 0 : intval(isset($row['am_id']) ? $row['am_id'] : 0);
if ($amId > 0) {
(new ReferenceCheckService())->syncAmRefCheckStatus($amId);
}
}
}

View File

@@ -0,0 +1,162 @@
<?php
namespace app\api\job;
use think\Db;
use think\queue\Job;
use app\common\QueueJob;
use app\common\QueueRedis;
use app\common\ReferenceCheckService;
use app\common\service\LLMService;
class ReferenceCheckTwo
{
private $oQueueJob;
private $QueueRedis;
private $completedExprie = 3600;
public function __construct()
{
$this->oQueueJob = new QueueJob();
$this->QueueRedis = QueueRedis::getInstance();
}
public function fire(Job $job, $data)
{
$this->oQueueJob->init($job);
$rawBody = empty($job->getRawBody()) ? '' : $job->getRawBody();
$jobData = empty($rawBody) ? [] : json_decode($rawBody, true);
$jobId = empty($jobData['id']) ? 'unknown' : $jobData['id'];
$sRedisKey = '';
$sRedisValue = '';
$this->oQueueJob->log("-----------队列任务开始-----------");
$this->oQueueJob->log("当前任务ID: {$jobId}, 尝试次数: {$job->attempts()}");
try {
$checkId = intval(isset($data['check_id']) ? $data['check_id'] : 0);
if ($checkId <= 0 && !empty($jobData['data']['check_id'])) {
$checkId = intval($jobData['data']['check_id']);
}
$sClassName = get_class($this);
$sRedisKey = "queue_job_two:{$sClassName}:{$checkId}";
$sRedisValue = uniqid() . '_' . getmypid();
if (!$this->oQueueJob->acquireLock($sRedisKey, $sRedisValue, $job)) {
return;
}
if ($checkId <= 0) {
$job->delete();
return;
}
$row = Db::name('article_reference_check_result')->where('id', $checkId)->find();
if (empty($row)) {
$job->delete();
return;
}
// if (intval($row['status']) === 1) {
// $job->delete();
// return;
// }
try {
$svc = new ReferenceCheckService();
$contentA = $svc->resolveMainContentForJob($row);
$referText = trim((string)(isset($row['refer_text']) ? $row['refer_text'] : ''));
$refer = null;
if (intval($row['p_refer_id']) > 0) {
$refer = Db::name('production_article_refer')
->where('p_refer_id', intval($row['p_refer_id']))
->where('state', 0)
->find();
}
$payload = $svc->prepareRecheckPayload(is_array($refer) ? $refer : [], $referText);
$doiBlock = $payload['doi_block'];
if ($contentA === '' || $referText === '') {
$this->markFailed($checkId, 'Missing article_main.content or refer_text');
$job->delete();
return;
}
$llm = new LLMService();
$llmResult = $llm->checkReference($contentA, $referText, true, $doiBlock);
$requestFailed = !empty($llmResult['request_failed']);
$canSupport = $svc->parseLlmCanSupport($llmResult);
$tag = $payload['has_abstract']
? ('[Crossref复核' . ($payload['doi_used'] !== '' ? ' ' . $payload['doi_used'] : '') . ']')
: '[Crossref复核-无摘要]';
$reason = $tag . ' ' . (isset($llmResult['reason']) ? $llmResult['reason'] : '');
// LLM 通讯失败:写 status=2 并抛异常触发队列重试
if ($requestFailed) {
$svc->updateCheckResult($checkId, [
'confidence' => floatval($llmResult['confidence']),
'reason' => $reason,
'status' => 2,
'error_msg' => isset($llmResult['reason']) ? $llmResult['reason'] : 'LLM request failed',
]);
throw new \RuntimeException(isset($llmResult['reason']) ? $llmResult['reason'] : 'LLM request failed');
}
$affected = $svc->updateCheckResult($checkId, [
'can_support' => $canSupport ? 1 : 0,
'is_match' => $canSupport ? 1 : 0,
'confidence' => floatval($llmResult['confidence']),
'reason' => $reason,
'status' => 1,
'error_msg' => '',
]);
$this->oQueueJob->log("Crossref复核写入 id={$checkId} affected={$affected} can_support=" . ($canSupport ? 1 : 0) . " confidence=" . floatval($llmResult['confidence']));
$amId = intval(isset($row['am_id']) ? $row['am_id'] : 0);
if ($amId > 0) {
$svc->syncAmRefCheckStatus($amId);
}
$this->QueueRedis->finishJob($sRedisKey, 'completed', $this->completedExprie, $sRedisValue);
$job->delete();
$this->oQueueJob->log("任务执行成功 | 日志ID: {$sRedisKey}");
} catch (\Exception $e) {
$this->oQueueJob->log('ReferenceCheckTwo error: ' . $e->getMessage());
if ($job->attempts() >= 3) {
$this->markFailed($checkId, $e->getMessage());
$job->delete();
return;
}
$job->release(30);
}
} catch (\RuntimeException $e) {
$this->oQueueJob->handleRetryableException($e, $sRedisKey, $sRedisValue, $job);
} catch (\LogicException $e) {
$this->oQueueJob->handleNonRetryableException($e, $sRedisKey, $sRedisValue, $job);
} catch (\Exception $e) {
$this->oQueueJob->handleRetryableException($e, $sRedisKey, $sRedisValue, $job);
} finally {
$this->oQueueJob->finnal();
}
}
private function markFailed($checkId, $msg)
{
$row = Db::name('article_reference_check_result')->where('id', $checkId)->find();
try {
(new ReferenceCheckService())->updateCheckResult($checkId, [
'status' => 2,
'error_msg' => $msg,
]);
} catch (\Exception $e) {
\think\Log::error('ReferenceCheckTwo markFailed: ' . $e->getMessage());
}
$amId = empty($row) ? 0 : intval(isset($row['am_id']) ? $row['am_id'] : 0);
if ($amId > 0) {
(new ReferenceCheckService())->syncAmRefCheckStatus($amId);
}
}
}

View File

@@ -0,0 +1,31 @@
<?php
namespace app\api\job;
use think\queue\Job;
class myQueue
{
public function fire(Job $job, $data)
{
try {
// 你的业务逻辑(哪怕是空的)
// 这里写任何代码
// 执行成功,删除任务
$job->delete();
} catch (\Throwable $e) {
// 如果你不想重试,直接删除
$job->delete();
}
}
// 可选:任务失败时执行
public function failed($data)
{
// 失败后不做处理,直接跳过
}
}

View File

@@ -0,0 +1,194 @@
<?php
namespace app\common;
/**
* 期刊文章内容「符号层」校对:只调整标点、空白、全角半角等,不增删语义文字。
*
* 设计原则:
* - 默认规则保守,可通过 $options 逐项关闭;
* - 纯文本用 normalize();含 HTML 时用 normalizeHtml()(仅处理标签之间的文本段,避免破坏属性里的 URL
* - Abstract 常用:存储时被转义为 &gt; &lt; &amp; 等,可用 normalizeAbstract() 先解码再符号校对。
* - 英文期刊正文/摘要通常不含中文:设 english_journal=true或 normalizeEnglishAbstract可关闭仅针对汉字的规则。
*/
class ArticleSymbolNormalizer
{
/** @var string 常用汉字 BMP 段(含扩展 A 前部,足够覆盖正文) */
private static $han = '\x{4E00}-\x{9FFF}\x{3400}-\x{4DBF}';
/**
* 纯文本符号校对。
*
* @param string $text
* @param array $options 可选键(均为 bool默认 true
* - line_endings CRLF / CR → LF
* - fullwidth_space U+3000 全角空格 → 普通空格
* - collapse_spaces 连续半角空格(不含换行)压成单个空格
* - remove_zwsp 删除零宽空格等不可见格式字符(不改变可见字)
* - comma_cjk 两个汉字之间的英文逗号「,」→「,」
* - comma_latin 两个 ASCII 字母/数字之间的全角逗号「,」→「,」
* - period_cjk 汉字后的全角句点「.」(U+FF0E) →「。」
* - bracket_latin 仅由 ASCII 标识包裹时「()」→「()」(如 (a) 类简单情形,保守:仅当括号内全为 ASCII
* - decode_html_entities 将 &gt; &lt; &amp; &quot; &#39; 及数字实体等转为真实字符(默认 falseabstract 见 normalizeAbstract
* - english_journal 英文期刊:关闭「两汉字间英文逗号→,」「汉字后 FF0E→。」等中文专用规则默认 false见 normalizeEnglishAbstract
*
* @return string
*/
public static function normalize($text, array $options = [])
{
$text = (string)$text;
if ($text === '') {
return '';
}
$o = array_merge([
'line_endings' => true,
'fullwidth_space' => true,
'collapse_spaces' => true,
'remove_zwsp' => true,
'comma_cjk' => true,
'comma_latin' => true,
'period_cjk' => true,
'bracket_latin' => false,
'decode_html_entities' => false,
'english_journal' => false,
], $options);
if (!empty($o['english_journal'])) {
if (!array_key_exists('comma_cjk', $options)) {
$o['comma_cjk'] = false;
}
if (!array_key_exists('period_cjk', $options)) {
$o['period_cjk'] = false;
}
}
if (!empty($o['decode_html_entities'])) {
$text = html_entity_decode($text, ENT_QUOTES | ENT_HTML5, 'UTF-8');
}
if (!empty($o['line_endings'])) {
$text = str_replace(["\r\n", "\r"], "\n", $text);
}
if (!empty($o['fullwidth_space'])) {
$text = str_replace("\u{3000}", ' ', $text);
}
if (!empty($o['remove_zwsp'])) {
// 零宽空格、零宽非断空格、BOM、软连字符等不改变可见字符
$text = preg_replace('/[\x{200B}-\x{200D}\x{FEFF}\x{00AD}]/u', '', $text);
}
if (!empty($o['collapse_spaces'])) {
$text = preg_replace('/[ \t]{2,}/u', ' ', $text);
}
$han = self::$han;
if (!empty($o['comma_cjk'])) {
// 汉字 , 汉字 → 汉字 汉字
$text = preg_replace('/(?<=[' . $han . ']),(?=[' . $han . '])/u', '', $text);
}
if (!empty($o['comma_latin'])) {
// 字母/数字 字母/数字 → ,
$text = preg_replace('/(?<=[0-9A-Za-z])(?=[0-9A-Za-z])/u', ',', $text);
}
if (!empty($o['period_cjk'])) {
// 汉字后的全角英文句点 FF0E → 中文句号 。
$text = preg_replace('/(?<=[' . $han . '])/u', '。', $text);
}
if (!empty($o['bracket_latin'])) {
// 仅 ASCII + 常见标点 + 空格
$text = preg_replace_callback(
'/([0-9A-Za-z\s\.,;:\-\+/=]+)/u',
static function ($m) {
return '(' . $m[1] . ')';
},
$text
);
}
return $text;
}
/**
* 对 HTML 片段做符号校对:只替换「标签外」的文本,不修改标签名与属性值。
*
* 实现:按 `<...>` 切分,对偶数段(文本)调用 normalize(),奇数段(标签)原样保留。
* 注意:畸形 HTML、属性值中含未转义 `<` 时可能误判,复杂场景请先抽纯文本再校对。
*
* @param string $html
* @param array $options 同 normalize()
* @return string
*/
public static function normalizeHtml($html, array $options = [])
{
$html = (string)$html;
if ($html === '') {
return '';
}
$parts = preg_split('/(<[^>]*>)/u', $html, -1, PREG_SPLIT_DELIM_CAPTURE);
if ($parts === false) {
return self::normalize($html, $options);
}
$out = '';
foreach ($parts as $i => $chunk) {
if ($chunk === '') {
continue;
}
// 偶数索引为文本,奇数索引且以 < 开头为标签
if ($i % 2 === 1 && isset($chunk[0]) && $chunk[0] === '<') {
$out .= $chunk;
} else {
$out .= self::normalize($chunk, $options);
}
}
return $out;
}
/**
* Abstract 专用:先 HTML 实体解码(&gt; → > 等),再执行与普通正文相同的符号校对。
*
* 适用于摘要字段在库中/接口中以 htmlspecialchars 形式存储的场景。
* 若摘要内本身含真实 HTML 标签且需保留标签结构,请改用 normalizeHtml() 并自行传入 decode_html_entities。
*
* @param string $abstract
* @param array $options 同 normalize(),默认会合并 decode_html_entities=true可被显式 false 覆盖)
* @return string
*/
public static function normalizeAbstract($abstract, array $options = [])
{
$opts = array_merge(['decode_html_entities' => true], $options);
return self::normalize($abstract, $opts);
}
/**
* 带 HTML 标签的摘要:仅在「标签外文本」中做实体解码 + 符号校对,不改动标签与属性。
*
* @param string $html
* @param array $options 同 normalize(),默认 decode_html_entities=true
* @return string
*/
public static function normalizeAbstractHtml($html, array $options = [])
{
$opts = array_merge(['decode_html_entities' => true], $options);
return self::normalizeHtml($html, $opts);
}
/**
* 英文期刊 Abstract实体解码 + 符号校对,且默认关闭中文专用标点规则。
*/
public static function normalizeEnglishAbstract($abstract, array $options = [])
{
return self::normalizeAbstract($abstract, array_merge(['english_journal' => true], $options));
}
/**
* 英文期刊、带 HTML 的摘要(标签外文本):实体解码 + 符号校对,且默认关闭中文专用规则。
*/
public static function normalizeEnglishAbstractHtml($html, array $options = [])
{
return self::normalizeAbstractHtml($html, array_merge(['english_journal' => true], $options));
}
}

View File

@@ -0,0 +1,440 @@
<?php
namespace app\common;
use think\Db;
use think\Env;
use think\Queue;
use think\Exception;
/**
* 查重业务层:把 TurnitinService 的低层调用包装成"按 article 查重"的高层流程,
* 并维护 t_plagiarism_check 状态机。
*
* 状态流:
* submit() → state=1上传中入队 PlagiarismRun
* PlagiarismRun.fire → 上传 + 触发 similarity → state=2比对中入队 PlagiarismPoll
* PlagiarismPoll.fire → 轮询 status完成后下载 PDF → state=3完成
* 任意环节抛异常 → state=4失败写 error_msg
*/
class PlagiarismService
{
/**
* 报告 PDF 本地保存目录(相对于项目根,永久保留)
*/
const REPORT_DIR = 'public/plagiarism';
/**
* 轮询间隔。Turnitin 一般 1-5 分钟出结果30 秒一次比较合适
*/
const POLL_INTERVAL = 30;
/**
* 最长轮询次数30s × 60 = 30 分钟)
*/
const MAX_POLL_ATTEMPTS = 60;
private $logFile;
public function __construct()
{
$this->logFile = ROOT_PATH . 'runtime' . DS . 'plagiarism_task.log';
}
// ---------- 顶层入口 ----------
/**
* 提交查重(入队,立即返回 check_id
*
* @param int $articleId 投稿 ID
* @param string $filePath 本地可读的 PDF/DOCX 绝对路径
* @param int $triggeredBy 触发人 user_id手工触发时编辑后台的 user_id
* @param string $source 'manual' / 'auto_xxx'
* @return int check_id
*/
public function submit($articleId, $filePath, $triggeredBy = 0, $source = 'manual')
{
if (!is_file($filePath) || !is_readable($filePath)) {
throw new Exception("File not readable: {$filePath}");
}
$journalId = (int) Db::name('article')
->where('article_id', $articleId)
->value('journal_id');
$now = time();
$checkId = Db::name('plagiarism_check')->insertGetId([
'article_id' => $articleId,
'journal_id' => $journalId,
'triggered_by' => $triggeredBy,
'trigger_source' => $source,
'state' => 1, // 上传中
'source_file_name' => basename($filePath),
'source_file_size' => filesize($filePath) ?: 0,
'ctime' => $now,
'utime' => $now,
]);
$this->log("submit service act");
// 入队执行:上传 + 触发 similarity
Queue::push(
'app\\api\\job\\PlagiarismRun',
['check_id' => $checkId, 'file_path' => $filePath],
'PlagiarismRun'
);
return (int)$checkId;
}
/**
* Job 调用:上传文件到 Turnitin 并触发 similarity然后入队 PlagiarismPoll
*/
public function runUploadAndTrigger($checkId, $filePath)
{
$check = $this->mustGetCheck($checkId);
$this->log("runUploadAndTrigger is act0");
try {
$tii = new TurnitinService();
// 1. 创建 submission
$articleTitle = (string) Db::name('article')
->where('article_id', $check['article_id'])
->value('title');
if ($articleTitle === '') {
$articleTitle = 'Article #' . $check['article_id'];
}
$this->log("runUploadAndTrigger is act1");
$createResp = $tii->createSubmission([
'title' => mb_substr($articleTitle, 0, 250),
'owner' => 'editor_' . $check['triggered_by'],
'submitter' => 'editor_' . $check['triggered_by'],
'metadata' => [
'article_id' => (string)$check['article_id'],
'check_id' => (string)$check['check_id'],
],
]);
$submissionId = isset($createResp['id']) ? $createResp['id'] : '';
if ($submissionId === '') {
throw new Exception('Turnitin createSubmission returned empty id: ' . json_encode($createResp));
}
$this->updateCheck($checkId, [
'tii_submission_id' => $submissionId,
'raw_response' => json_encode($createResp, JSON_UNESCAPED_UNICODE),
]);
$this->log("runUploadAndTrigger is act2");
// 2. 上传文件
$tii->uploadFile($submissionId, $filePath, basename($filePath));
// 3. 触发 similarity
$simResp = $tii->triggerSimilarity($submissionId);
$this->updateCheck($checkId, [
'state' => 2, // 比对中
'tii_report_status' => 'PROCESSING',
'raw_response' => json_encode($simResp, JSON_UNESCAPED_UNICODE),
]);
$this->log("runUploadAndTrigger is act3");
// 4. 排队首次轮询(晚一点开始,让 Turnitin 先处理)
Queue::later(
self::POLL_INTERVAL,
'app\\api\\job\\PlagiarismPoll',
['check_id' => $checkId, 'attempt' => 1],
'PlagiarismPoll'
);
} catch (\Throwable $e) {
$this->markFailed($checkId, '[upload] ' . $e->getMessage());
throw $e;
}
}
/**
* Job 调用:轮询 similarity 状态,完成后下载 PDF。未完成则重新入队。
*/
public function runPollStatus($checkId, $attempt = 1)
{
$check = $this->mustGetCheck($checkId);
if (empty($check['tii_submission_id'])) {
$this->markFailed($checkId, '[poll] tii_submission_id empty');
return;
}
try {
$tii = new TurnitinService();
$statusResp = $tii->getSimilarityStatus($check['tii_submission_id']);
$status = isset($statusResp['status']) ? strtoupper($statusResp['status']) : '';
$this->updateCheck($checkId, [
'tii_report_status' => $status,
'attempts' => $attempt,
'raw_response' => json_encode($statusResp, JSON_UNESCAPED_UNICODE),
]);
if ($status === 'COMPLETE') {
$score = isset($statusResp['overall_match_percentage'])
? floatval($statusResp['overall_match_percentage']) : 0;
// 下载 PDF + 取在线查看 URL
$localPdf = $this->downloadAndStorePdf($tii, $check['tii_submission_id'], $checkId);
$viewerInfo = $this->refreshViewerUrl($tii, $check['tii_submission_id']);
$this->updateCheck($checkId, [
'state' => 3,
'similarity_score' => $score,
'pdf_local_path' => $localPdf,
'view_only_url' => $viewerInfo['url'],
'view_only_url_expire' => $viewerInfo['expire'],
'error_msg' => '',
]);
return;
}
if ($status === 'ERROR') {
$errMsg = isset($statusResp['error_code']) ? (string)$statusResp['error_code'] : 'Turnitin reported ERROR';
$this->markFailed($checkId, '[poll] ' . $errMsg);
return;
}
// PROCESSING 或其它中间态:继续轮询
if ($attempt >= self::MAX_POLL_ATTEMPTS) {
$this->markFailed($checkId, '[poll] timeout after ' . $attempt . ' attempts');
return;
}
Queue::later(
self::POLL_INTERVAL,
'app\\api\\job\\PlagiarismPoll',
['check_id' => $checkId, 'attempt' => $attempt + 1],
'plagiarism'
);
} catch (\Throwable $e) {
// 网络抖动不要直接 fail给一定容错次数
if ($attempt < self::MAX_POLL_ATTEMPTS) {
Queue::later(
self::POLL_INTERVAL,
'app\\api\\job\\PlagiarismPoll',
['check_id' => $checkId, 'attempt' => $attempt + 1],
'plagiarism'
);
$this->updateCheck($checkId, [
'attempts' => $attempt,
'error_msg' => '[poll] transient: ' . $e->getMessage(),
]);
return;
}
$this->markFailed($checkId, '[poll] exhausted: ' . $e->getMessage());
throw $e;
}
}
/**
* 重新生成在线查看 URL已有的过期了用
*
* @return array{url:string, expire:int, local_pdf:string}
*/
public function refreshViewerUrlFor($checkId)
{
$check = $this->mustGetCheck($checkId);
if (empty($check['tii_submission_id'])) {
throw new Exception('check has no tii_submission_id');
}
$tii = new TurnitinService();
$info = $this->refreshViewerUrl($tii, $check['tii_submission_id']);
$this->updateCheck($checkId, [
'view_only_url' => $info['url'],
'view_only_url_expire' => $info['expire'],
]);
return [
'url' => $info['url'],
'expire' => $info['expire'],
'local_pdf' => $check['pdf_local_path'],
];
}
// ---------- 内部 ----------
private function refreshViewerUrl($tii, $submissionId)
{
$resp = $tii->getViewerUrl($submissionId);
$url = '';
if (isset($resp['viewer_url'])) {
$url = (string)$resp['viewer_url'];
} elseif (isset($resp['url'])) {
$url = (string)$resp['url'];
}
// 默认 2 小时过期,保守起见
return ['url' => $url, 'expire' => time() + 7200];
}
/**
* 触发生成 + 轮询 + 下载 PDF 到本地,返回相对路径
*/
private function downloadAndStorePdf($tii, $submissionId, $checkId)
{
// 1. 请求生成
$req = $tii->requestPdfReport($submissionId);
$pdfId = isset($req['id']) ? $req['id'] : '';
if ($pdfId === '') {
throw new Exception('requestPdfReport empty id: ' . json_encode($req));
}
// 2. 内联轮询 PDF 状态(最多 3 分钟,每 6 秒一次)
$maxLoops = 30;
for ($i = 0; $i < $maxLoops; $i++) {
$st = $tii->getPdfReportStatus($submissionId, $pdfId);
$stCode = isset($st['status']) ? strtoupper($st['status']) : '';
if ($stCode === 'SUCCESS') {
break;
}
if ($stCode === 'FAILED') {
throw new Exception('PDF report generation failed: ' . json_encode($st));
}
sleep(6);
}
// 3. 下载
$binary = $tii->downloadPdfReport($submissionId, $pdfId);
if (!is_string($binary) || strlen($binary) < 100) {
throw new Exception('downloaded pdf is empty/too small');
}
// 4. 落盘
$rootDir = ROOT_PATH ?: dirname(dirname(__DIR__));
$absDir = rtrim($rootDir, '/\\') . DIRECTORY_SEPARATOR . self::REPORT_DIR;
if (!is_dir($absDir)) {
@mkdir($absDir, 0755, true);
}
$filename = sprintf('check_%d_%s.pdf', $checkId, date('Ymd_His'));
$absPath = $absDir . DIRECTORY_SEPARATOR . $filename;
$bytes = file_put_contents($absPath, $binary);
if ($bytes === false || $bytes < 100) {
throw new Exception('failed to save pdf to ' . $absPath);
}
return self::REPORT_DIR . '/' . $filename;
}
private function mustGetCheck($checkId)
{
$row = Db::name('plagiarism_check')->where('check_id', $checkId)->find();
if (!$row) {
throw new Exception("plagiarism_check #{$checkId} not found");
}
return $row;
}
private function updateCheck($checkId, array $data)
{
$data['utime'] = time();
Db::name('plagiarism_check')->where('check_id', $checkId)->update($data);
}
private function markFailed($checkId, $errMsg)
{
$this->log("markFailed act");
$this->updateCheck($checkId, [
'state' => 4,
'error_msg' => mb_substr($errMsg, 0, 1000),
]);
}
/**
* 从 t_article_file 找到投稿主稿manuscirpt的本地绝对路径。
* file_url 在系统里可能是 URL 或相对路径,调用方负责保证可读。
*
* @return string 文件绝对路径,找不到时抛异常
*/
public function locateArticleManuscript($articleId)
{
$row = Db::name('article_file')
->where('article_id', $articleId)
->where('type_name', 'manuscirpt') // 历史拼写
->order('file_id desc')
->find();
if (!$row || empty($row['file_url'])) {
throw new Exception("article #{$articleId} has no manuscirpt file");
}
return $this->resolveFileUrlToLocal($row['file_url']);
}
/**
* 把 file_url可能是 http URL 或相对路径)解析成本地绝对路径。
* 不同环境部署可能有差异,这里用 .env 配置的 STATIC_ROOT 作前缀。
*/
public function resolveFileUrlToLocal($fileUrl)
{
$fileUrl = trim((string)$fileUrl);
if ($fileUrl === '') {
throw new Exception('empty file_url');
}
// 已是绝对路径
if (preg_match('/^([a-zA-Z]:[\\\\\/]|\/)/', $fileUrl) && is_file($fileUrl)) {
return $fileUrl;
}
$staticRoot = trim((string)Env::get('plagiarism.static_root', ''));
$cdnPrefix = trim((string)Env::get('plagiarism.cdn_prefix', ''));
// 是 http URL先试着剥掉 cdn 前缀,映射到本地
if (preg_match('#^https?://#i', $fileUrl)) {
if ($cdnPrefix !== '' && stripos($fileUrl, $cdnPrefix) === 0) {
$rel = ltrim(substr($fileUrl, strlen($cdnPrefix)), '/');
$local = rtrim($staticRoot, '/\\') . DIRECTORY_SEPARATOR . $rel;
if (is_file($local)) {
return $local;
}
}
// 实在不行,下载到 runtime/plagiarism/tmp 临时目录
return $this->downloadRemoteFile($fileUrl);
}
// 相对路径:拼 static_root
if ($staticRoot !== '') {
$local = rtrim($staticRoot, '/\\') . DIRECTORY_SEPARATOR . ltrim($fileUrl, '/\\');
if (is_file($local)) {
return $local;
}
}
throw new Exception("cannot resolve file_url to local path: {$fileUrl} (set [plagiarism] STATIC_ROOT/CDN_PREFIX in .env)");
}
private function downloadRemoteFile($url)
{
$rootDir = ROOT_PATH ?: dirname(dirname(__DIR__));
$tmpDir = rtrim($rootDir, '/\\') . DIRECTORY_SEPARATOR . self::REPORT_DIR . DIRECTORY_SEPARATOR . 'tmp';
if (!is_dir($tmpDir)) {
@mkdir($tmpDir, 0755, true);
}
$ext = pathinfo(parse_url($url, PHP_URL_PATH), PATHINFO_EXTENSION) ?: 'pdf';
$local = $tmpDir . DIRECTORY_SEPARATOR . md5($url) . '_' . time() . '.' . $ext;
$ch = curl_init($url);
$fh = fopen($local, 'wb');
curl_setopt_array($ch, [
CURLOPT_FILE => $fh,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_TIMEOUT => 120,
CURLOPT_SSL_VERIFYPEER => false,
]);
$ok = curl_exec($ch);
$code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
fclose($fh);
if (!$ok || $code !== 200 || filesize($local) < 100) {
@unlink($local);
throw new Exception("download failed url={$url} http={$code}");
}
return $local;
}
public function getCheck($checkId)
{
return Db::name('plagiarism_check')->where('check_id', $checkId)->find();
}
public function log($msg)
{
$line = date('Y-m-d H:i:s') . ' ' . $msg . PHP_EOL;
@file_put_contents($this->logFile, $line, FILE_APPEND);
}
}

View File

@@ -319,19 +319,27 @@ class PromotionService
$this->log("dispatchPrepareEmails task_id={$taskId} no_logs -> state=5");
return ['dispatched' => 0, 'already_done' => true, 'error' => null];
}
// return $logIds;
foreach ($logIds as $logId) {
echo $logId."----";
$this->enqueuePrepareEmail(intval($logId));
}
$this->log("dispatchPrepareEmails task_id={$taskId} dispatched=" . count($logIds));
return ['dispatched' => count($logIds), 'already_done' => false, 'error' => null];
}
public function prepareSingleEmailTest($logId){
$log = Db::name('promotion_email_log')->where('log_id', $logId)->find();
// $task = Db::name('promotion_task')->where('task_id', $log['task_id'])->find();
// if (!$task) {
// Db::name('promotion_email_log')->where('log_id', $logId)->update([
// 'state' => 2,
// 'error_msg' => 'Task not found',
// 'send_time' => time(),
// ]);
// return ['code' => 1, 'msg' => 'task_not_found', 'llm_status' => 0];
// }
return jsonSuccess($log);
}
/**
* 对单封邮件执行准备:拉取 expert / journal调 LLM 生成描述,渲染模板,写回 log。
*
@@ -768,16 +776,14 @@ class PromotionService
* 队列名promotion_email
* 启动 workerphp think queue:listen --queue promotion_email
*/
public function enqueuePrepareEmail($logId, $delay = 0)
public function enqueuePrepareEmail($logId)
{
$jobClass = 'app\api\job\PromotionPrepareEmail@fire';
$data = ['log_id' => intval($logId)];
if ($delay > 0) {
Queue::later($delay, $jobClass, $data, 'PromotionPrepareEmail');
} else {
Queue::push($jobClass, $data, 'PromotionPrepareEmail');
}
$res =Queue::push($jobClass, $data, 'PromotionPrepareEmail');
}
/**

View File

@@ -80,6 +80,25 @@ class QueueRedis
return null;
}
}
/**
* 删除一个或多个 Redis 键(用于重检前清除队列任务 completed 标记)
*/
public function deleteRedisKeys(array $keys)
{
$keys = array_values(array_filter($keys, function ($k) {
return $k !== null && $k !== '';
}));
if (empty($keys)) {
return true;
}
try {
$this->connect()->del(...$keys);
return true;
} catch (\Exception $e) {
return false;
}
}
// 安全释放锁(仅当值匹配时删除)
public function releaseRedisLock($key, $value)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,322 @@
<?php
namespace app\common;
use think\Env;
use think\Exception;
/**
* Turnitin Core API (TCA) REST 客户端封装。
*
* 适用 Crossref Similarity Check 通道product_name=Crossref以及标准 TCA 接入。
*
* 鉴权Authorization: Bearer <API_KEY>
* X-Turnitin-Integration-Name / X-Turnitin-Integration-Version 用于审计
*
* .env 配置([turnitin] 段):
* BASE_URL 形如 https://crossref-12345.turnitin.com/api/v1不带尾斜杠
* API_KEY 生成的 Bearer token
* INTEGRATION_NAME Scope Name创建 integration 时填的名字)
* INTEGRATION_VERSION 自定义版本号,便于审计 e.g. 1.0.0
*
* API 文档https://developers.turnitin.com/docs/tca
*
* 注意:
* - 所有方法返回原始 decode 后的数组HTTP 错误抛 Exception
* - 不做任何业务层逻辑(业务层在 PlagiarismService 里)
* - 不缓存 tokenBearer 不需要登录,每次请求自带)
*/
class TurnitinService
{
private $baseUrl;
private $apiKey;
private $integrationName;
private $integrationVersion;
private $timeout = 60;
public function __construct()
{
$this->baseUrl = rtrim(trim((string)Env::get('turnitin.base_url', '')), '/');
$this->apiKey = trim((string)Env::get('turnitin.api_key', ''));
$this->integrationName = trim((string)Env::get('turnitin.integration_name', 'tmr'));
$this->integrationVersion = trim((string)Env::get('turnitin.integration_version', '1.0.0'));
if ($this->baseUrl === '' || $this->apiKey === '') {
throw new Exception('Turnitin not configured: missing BASE_URL or API_KEY in .env [turnitin] section');
}
}
// ==================== Public API ====================
/**
* 探活 / 拿账户能力
* GET /features-enabled
*/
public function featuresEnabled()
{
return $this->request('GET', '/features-enabled');
}
/**
* 创建 submission拿到 id 之后才能上传文件)
* POST /submissions
*
* @param array $meta 必填字段:
* - title 论文标题
* - owner submission owner 标识符(自定义字符串,比如投稿系统 user_id
* - submitter 提交者标识符(同上)
* - eula (可选) ['version' => '...', 'language' => 'en-US', 'accepted_timestamp' => ISO8601]
* 如果 features-enabled 返回 require_eula=false 可省略
* 可选字段:
* - extract_text_only bool
* - metadata array 自定义键值,供后续追溯
*
* @return array 含 idsubmission UUID, status, owner, ...
*/
public function createSubmission($meta)
{
return $this->request('POST', '/submissions', $meta);
}
/**
* 上传文件到 submission
* PUT /submissions/{id}/original/{filename}
*
* @param string $submissionId
* @param string $filePath 本地 PDF/DOCX 路径
* @param string $filename 传给 Turnitin 的文件名(用于报告显示)
* @return array
*/
public function uploadFile($submissionId, $filePath, $filename = '')
{
if (!is_file($filePath) || !is_readable($filePath)) {
throw new Exception("File not found or not readable: {$filePath}");
}
if ($filename === '') {
$filename = basename($filePath);
}
$body = file_get_contents($filePath);
return $this->request(
'PUT',
'/submissions/' . urlencode($submissionId) . '/original/' . rawurlencode($filename),
$body,
[
'Content-Type' => 'binary/octet-stream',
'Content-Disposition' => 'inline; filename="' . $filename . '"',
]
);
}
/**
* 触发 similarity 比对
* PUT /submissions/{id}/similarity
*
* @param string $submissionId
* @param array $opts
* - generation_settings.search_repositories 默认 ['INTERNET','PUBLICATION','CROSSREF','CROSSREF_POSTED_CONTENT','SUBMITTED_WORK']
* - generation_settings.submission_auto_excludes bool
* - view_settings.exclude_quotes / exclude_bibliography / exclude_citations / exclude_abstract / exclude_methods bool
* - indexing_settings.add_to_index bool 是否把本文加进 SUBMITTED_WORK 索引(一般 true
* @return array
*/
public function triggerSimilarity($submissionId, $opts = [])
{
$body = array_merge([
'generation_settings' => [
'search_repositories' => ['INTERNET', 'PUBLICATION', 'CROSSREF', 'CROSSREF_POSTED_CONTENT', 'SUBMITTED_WORK'],
'submission_auto_excludes' => true,
'auto_exclude_self_matching_scope' => 'GROUP_CONTEXT',
],
'view_settings' => [
'exclude_quotes' => true,
'exclude_bibliography' => true,
'exclude_citations' => true,
],
'indexing_settings' => [
'add_to_index' => true,
],
], $opts);
return $this->request(
'PUT',
'/submissions/' . urlencode($submissionId) . '/similarity',
$body
);
}
/**
* 查询 similarity 状态
* GET /submissions/{id}/similarity
*
* 返回 status: PROCESSING / COMPLETE / ERROR
* COMPLETE 时返回 overall_match_percentage / time_requested / time_generated
*/
public function getSimilarityStatus($submissionId)
{
return $this->request(
'GET',
'/submissions/' . urlencode($submissionId) . '/similarity'
);
}
/**
* 取在线查看报告的临时 URL
* POST /submissions/{id}/viewer-url
*
* 返回 viewer_url数小时有效
*
* @param array $viewer 可选 viewer 设置 e.g. ['viewer_default_permission_set' => 'INSTRUCTOR']
*/
public function getViewerUrl($submissionId, $viewer = [])
{
$body = array_merge([
'viewer_default_permission_set' => 'INSTRUCTOR',
'similarity' => [
'default_mode' => 'MATCH_OVERVIEW',
'view_settings' => ['save_changes' => true],
'modes' => ['match_overview' => true, 'all_sources' => true],
],
'locale' => 'en-US',
], $viewer);
return $this->request(
'POST',
'/submissions/' . urlencode($submissionId) . '/viewer-url',
$body
);
}
/**
* 触发生成 PDF 报告(异步,状态在另一个轮询里看)
* POST /submissions/{id}/similarity/pdf
*
* 返回 idpdf 报告 ID
*/
public function requestPdfReport($submissionId, $opts = [])
{
$body = array_merge([
'locale' => 'en-US',
], $opts);
return $this->request(
'POST',
'/submissions/' . urlencode($submissionId) . '/similarity/pdf',
$body
);
}
/**
* 查询 PDF 报告状态
* GET /submissions/{id}/similarity/pdf/{pdf_id}/status
*
* status: PENDING / SUCCESS / FAILED
*/
public function getPdfReportStatus($submissionId, $pdfId)
{
return $this->request(
'GET',
'/submissions/' . urlencode($submissionId) . '/similarity/pdf/' . urlencode($pdfId) . '/status'
);
}
/**
* 下载 PDF 报告内容status=SUCCESS 后才可调用)
* GET /submissions/{id}/similarity/pdf/{pdf_id}
*
* 返回 raw PDF binary 字符串;调用方负责落盘
*/
public function downloadPdfReport($submissionId, $pdfId)
{
return $this->request(
'GET',
'/submissions/' . urlencode($submissionId) . '/similarity/pdf/' . urlencode($pdfId),
null,
[],
true // raw response (不 json_decode)
);
}
// ==================== Internal HTTP layer ====================
/**
* 统一 HTTP 调用
*
* @param string $method GET/POST/PUT/DELETE
* @param string $path 以 / 开头的相对路径,会拼到 baseUrl 后
* @param mixed $body array 时按 JSON 编码string 时直接当 raw body
* @param array $extraHeaders 额外 header
* @param bool $rawResponse true=返回 raw 字符串false=json_decode
* @return mixed
* @throws Exception
*/
private function request($method, $path, $body = null, $extraHeaders = [], $rawResponse = false)
{
$url = $this->baseUrl . $path;
$headers = [
'Authorization: Bearer ' . $this->apiKey,
'X-Turnitin-Integration-Name: ' . $this->integrationName,
'X-Turnitin-Integration-Version: ' . $this->integrationVersion,
];
$payload = null;
if ($body !== null) {
if (is_array($body)) {
$payload = json_encode($body, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES);
$headers[] = 'Content-Type: application/json';
} else {
$payload = $body;
if (!isset($extraHeaders['Content-Type'])) {
$headers[] = 'Content-Type: application/octet-stream';
}
}
}
foreach ($extraHeaders as $k => $v) {
$headers[] = $k . ': ' . $v;
}
$ch = curl_init();
curl_setopt_array($ch, [
CURLOPT_URL => $url,
CURLOPT_CUSTOMREQUEST => strtoupper($method),
CURLOPT_RETURNTRANSFER => true,
CURLOPT_HTTPHEADER => $headers,
CURLOPT_TIMEOUT => $this->timeout,
CURLOPT_CONNECTTIMEOUT => 15,
CURLOPT_SSL_VERIFYPEER => true,
CURLOPT_SSL_VERIFYHOST => 2,
]);
if ($payload !== null) {
curl_setopt($ch, CURLOPT_POSTFIELDS, $payload);
}
$resp = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
$err = curl_error($ch);
curl_close($ch);
if ($resp === false) {
throw new Exception("Turnitin curl error: {$err} (url={$url})");
}
if ($httpCode < 200 || $httpCode >= 300) {
// 把响应体的前 1k 也带上方便排错
$excerpt = mb_substr((string)$resp, 0, 1000);
throw new Exception("Turnitin HTTP {$httpCode} {$method} {$path}: {$excerpt}");
}
if ($rawResponse) {
return $resp;
}
// 部分响应可能是 204 No Content
if ($resp === '' || $resp === null) {
return [];
}
$data = json_decode($resp, true);
if (json_last_error() !== JSON_ERROR_NONE) {
// 不是 JSON 也直接抛回原文
return $resp;
}
return $data;
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,44 @@
-- 查重任务表Turnitin TCA / Crossref Similarity Check
--
-- 一篇 article 可重复触发多次查重;同一 article 的最近一次显示在编辑详情页。
-- state 流转0 待上传 → 1 上传中 → 2 比对中 → 3 完成 → 4 失败
-- 失败可重新触发,会创建新行(保留历史)
--
-- 报告永久保留pdf_local_path 指向 runtime/plagiarism/ 下的本地 PDF
-- view_only_url 是 Turnitin 临时签名(数小时过期),过期需重新生成
DROP TABLE IF EXISTS `t_plagiarism_check`;
CREATE TABLE `t_plagiarism_check` (
`check_id` INT NOT NULL AUTO_INCREMENT,
`article_id` INT NOT NULL DEFAULT 0 COMMENT '关联投稿 t_article.article_id',
`journal_id` INT NOT NULL DEFAULT 0 COMMENT '所属期刊(冗余便于按期刊统计)',
`triggered_by` INT NOT NULL DEFAULT 0 COMMENT '触发人 user_id手工触发时编辑的 user_id',
`trigger_source` VARCHAR(32) NOT NULL DEFAULT 'manual' COMMENT 'manual/auto_initial_review/...',
`state` TINYINT NOT NULL DEFAULT 0 COMMENT '0待上传 1上传中 2比对中 3完成 4失败',
-- Turnitin 端的实体 ID
`tii_submission_id` VARCHAR(64) NOT NULL DEFAULT '' COMMENT 'Turnitin submission UUID',
`tii_report_status` VARCHAR(32) NOT NULL DEFAULT '' COMMENT 'PROCESSING/COMPLETE/ERROR',
-- 结果
`similarity_score` DECIMAL(5,2) NOT NULL DEFAULT 0 COMMENT '总相似度 %(如 12.34',
`view_only_url` VARCHAR(1024) NOT NULL DEFAULT '' COMMENT '在线查看报告 URL临时签名',
`view_only_url_expire` INT NOT NULL DEFAULT 0 COMMENT '在线查看 URL 过期时间戳',
`pdf_local_path` VARCHAR(255) NOT NULL DEFAULT '' COMMENT '本地缓存的 PDF 报告相对路径',
-- 文件元数据(上传时记录,便于追踪)
`source_file_name` VARCHAR(255) NOT NULL DEFAULT '' COMMENT '原始 PDF 文件名',
`source_file_size` INT NOT NULL DEFAULT 0 COMMENT '原始 PDF 字节数',
-- 调试与重试
`attempts` INT NOT NULL DEFAULT 0 COMMENT '总轮询/重试次数',
`error_msg` VARCHAR(1024) NOT NULL DEFAULT '' COMMENT '失败原因',
`raw_response` MEDIUMTEXT COMMENT '最近一次 Turnitin API 原始返回(调试用)',
`ctime` INT NOT NULL DEFAULT 0,
`utime` INT NOT NULL DEFAULT 0,
PRIMARY KEY (`check_id`),
KEY `idx_article` (`article_id`, `state`),
KEY `idx_state` (`state`),
KEY `idx_tii_submission` (`tii_submission_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb3 COMMENT='Turnitin 查重任务表';

102
test_plagiarism_e2e.php Normal file
View File

@@ -0,0 +1,102 @@
<?php
/**
* Turnitin TCA 端到端连通性测试。
*
* 用法(在项目根执行):
* php test_plagiarism_e2e.php features # 探活
* php test_plagiarism_e2e.php submit <article_id> # 用 article 主稿提交查重(手工触发)
* php test_plagiarism_e2e.php submit-file <pdf> # 用本地 PDF 提交(不绑定 article
* php test_plagiarism_e2e.php status <check_id> # 查询状态
* php test_plagiarism_e2e.php list <article_id> # 列出某 article 的查重记录
* php test_plagiarism_e2e.php viewer <check_id> # 取在线查看 URL
*
* 说明:
* submit-file 不会真正落库(仅用于联通验证),它会用 article_id=0 走完整套流程。
* submit 会写入 t_plagiarism_check并把 check_id 打回,再用 status 自己轮询。
*/
define('IS_CLI', true);
require __DIR__ . '/thinkphp/start.php';
use think\Db;
use app\common\PlagiarismService;
use app\common\TurnitinService;
if ($argc < 2) {
echo "Usage: php test_plagiarism_e2e.php <command> [args...]\n";
exit(1);
}
$cmd = $argv[1];
try {
switch ($cmd) {
case 'features': {
$tii = new TurnitinService();
print_r($tii->featuresEnabled());
break;
}
case 'submit': {
if ($argc < 3) {
echo "Usage: ... submit <article_id>\n";
exit(1);
}
$articleId = intval($argv[2]);
$svc = new PlagiarismService();
$local = $svc->locateArticleManuscript($articleId);
echo "manuscript local path: {$local}\n";
$checkId = $svc->submit($articleId, $local, 0, 'cli_test');
echo "submitted, check_id = {$checkId}\n";
echo "now run: php think queue:work --queue plagiarism --tries=1 -v\n";
break;
}
case 'submit-file': {
if ($argc < 3) {
echo "Usage: ... submit-file <pdf_path>\n";
exit(1);
}
$path = $argv[2];
if (!is_file($path)) {
echo "file not exists: {$path}\n";
exit(1);
}
$svc = new PlagiarismService();
$checkId = $svc->submit(0, $path, 0, 'cli_test_file');
echo "submitted, check_id = {$checkId}\n";
break;
}
case 'status': {
if ($argc < 3) {
echo "Usage: ... status <check_id>\n";
exit(1);
}
$row = Db::name('plagiarism_check')->where('check_id', intval($argv[2]))->find();
print_r($row);
break;
}
case 'list': {
if ($argc < 3) {
echo "Usage: ... list <article_id>\n";
exit(1);
}
$rows = Db::name('plagiarism_check')->where('article_id', intval($argv[2]))->order('check_id desc')->select();
print_r($rows);
break;
}
case 'viewer': {
if ($argc < 3) {
echo "Usage: ... viewer <check_id>\n";
exit(1);
}
$svc = new PlagiarismService();
print_r($svc->refreshViewerUrlFor(intval($argv[2])));
break;
}
default:
echo "unknown command: {$cmd}\n";
exit(1);
}
} catch (\Throwable $e) {
echo "ERROR: " . $e->getMessage() . "\n" . $e->getTraceAsString() . "\n";
exit(1);
}