This commit is contained in:
wyn
2026-05-26 18:02:54 +08:00
28 changed files with 2434 additions and 197 deletions

22
.env
View File

@@ -19,6 +19,16 @@ client_id = 616562
client_secret = CfMDrllyqBTFKrUkO2XaE7OmWTYqP3yd client_secret = CfMDrllyqBTFKrUkO2XaE7OmWTYqP3yd
hmac = 8aU8WnITYhwaGTXH hmac = 8aU8WnITYhwaGTXH
[base]
model_url=http://chat.taimed.cn
model_url1=http://125.39.141.154:10002/v1/chat/completions
model=DeepSeek-Coder-V2-Instruct
[user_field_ai]
; 留空则依次用 promotion PROMOTION_LLM_URL、citation 等;仅写根地址时会自动补 /v1/chat/completions
;chat_url=http://chat.taimed.cn/v1/chat/completions
;chat_model=DeepSeek-Coder-V2-Instruct
[promotion] [promotion]
PROMOTION_LLM_URL=http://chat.taimed.cn/v1/chat/completions PROMOTION_LLM_URL=http://chat.taimed.cn/v1/chat/completions
PROMOTION_LLM_MODEL=DeepSeek-Coder-V2-Instruct PROMOTION_LLM_MODEL=DeepSeek-Coder-V2-Instruct
@@ -33,6 +43,18 @@ UNSUBSCRIBE_BASE_URL=https://submission.tmrjournals.com/api/Unsubscribe/index
[yboard] [yboard]
APPLY_URL="https://submission.tmrjournals.com/youthBoardRegister" APPLY_URL="https://submission.tmrjournals.com/youthBoardRegister"
[turnitin]
viewer_permission_set=ADMINISTRATOR
viewer_locale=en-US
; viewer-url 必填 viewer_user_id默认用查重记录的 triggered_by → editor_{id},也可写死:
;viewer_user_id=editor_1
; 与 Crossref 网页手动查重对齐:三项默认 0不排除引用/参考文献/引文)。若只要正文相似度可改为 1
exclude_quotes=0
exclude_bibliography=0
exclude_citations=0
; 在线报告默认视图all_sources=按来源库分类(与 Crossref 手动后台一致match_overview=匹配总览(文中编号易都显示为 1
viewer_default_mode=all_sources
[plagiarism] [plagiarism]
static_root="/home/wwwroot/api.tmrjournals.com/public" static_root="/home/wwwroot/api.tmrjournals.com/public"

View File

@@ -598,6 +598,10 @@ class Article extends Base
$article_res['is_draft'] = 1; $article_res['is_draft'] = 1;
} }
} }
//新增是否存在生产实例 20260204 start
$article_res['has_produce'] = $this->production_article_obj->where('article_id', $data['articleId'])->where('state', 0)->find()?1:0;
//新增是否是草稿删除 20260204 end //新增是否是草稿删除 20260204 end
return json(['article' => $article_res, 'msg' => $article_msg, 'authors' => $author_res, 'suggest' => $suggest, 'transfer' => $transfer_res, 'transinfo' => $transfer_info, "major" => $major,'suggest_final' => $aFinal]); return json(['article' => $article_res, 'msg' => $article_msg, 'authors' => $author_res, 'suggest' => $suggest, 'transfer' => $transfer_res, 'transinfo' => $transfer_info, "major" => $major,'suggest_final' => $aFinal]);
} }

View File

@@ -567,6 +567,8 @@ class Board extends Base {
return jsonError($rule->getError()); return jsonError($rule->getError());
} }
$check = $this->board_to_journal_obj->where('user_id',$data['user_id'])->where('state',0)->find(); $check = $this->board_to_journal_obj->where('user_id',$data['user_id'])->where('state',0)->find();
$journal_info = $this->journal_obj->where('journal_id',$data['journal_id'])->find();
$user_info = $this->user_obj->where('user_id',$data['user_id'])->find();
if($check){ if($check){
return jsonError("According to TMR Publishing Group Policy, scientists are not allowed to serve on the editorial board of more than one journal at the same time."); return jsonError("According to TMR Publishing Group Policy, scientists are not allowed to serve on the editorial board of more than one journal at the same time.");
} }
@@ -578,6 +580,18 @@ class Board extends Base {
$insert['board_group_id'] = $data['board_group_id']; $insert['board_group_id'] = $data['board_group_id'];
$insert['research_areas'] = trim($data['research_areas']); $insert['research_areas'] = trim($data['research_areas']);
$this->board_to_journal_obj->insert($insert); $this->board_to_journal_obj->insert($insert);
$reviewer_journal = $this->reviewer_to_journal_obj->where("reviewer_id",$user_info['user_id'])->where("journal_id",$journal_info['journal_id'])->find();
if(!$reviewer_journal){
$insert_reviewer['reviewer_id'] = $user_info['user_id'];
$insert_reviewer['journal_id'] = $journal_info['journal_id'];
$insert_reviewer['account'] = $user_info['account'];
$insert_reviewer['journal_title'] = $journal_info['title'];
$insert_reviewer['ctime'] = time();
$this->reviewer_to_journal_obj->insert($insert_reviewer);
}
return jsonSuccess([]); return jsonSuccess([]);
} }

View File

@@ -115,7 +115,6 @@ class Email extends Base
public function pushEmailOnTemplate() public function pushEmailOnTemplate()
{ {
die();
$data = $this->request->post(); $data = $this->request->post();
$rule = new Validate([ $rule = new Validate([

View File

@@ -1406,8 +1406,8 @@ class EmailClient extends Base
return jsonError('Factory is disabled'); return jsonError('Factory is disabled');
} }
$expertType = intval($factory['expert_type']); $expertType = intval($factory['expert_type']);
if (!in_array($expertType, [2, 3, 5], true)) { if (!in_array($expertType, [2, 3, 4, 5, 6], true)) {
return jsonError('Only expert_type=2(Editorial Board), 3(Young Editorial Board) or 5(Expert pool) is supported currently'); return jsonError('Unsupported expert_type; supported: 2=编委, 3=青年编委, 4=作者, 5=专家库, 6=往期青年编委');
} }
$journalId = intval($factory['journal_id']); $journalId = intval($factory['journal_id']);
@@ -2260,7 +2260,7 @@ class EmailClient extends Base
* 每日自动生成推广任务(由 Linux crontab 调用) * 每日自动生成推广任务(由 Linux crontab 调用)
* *
* 逻辑: * 逻辑:
* 1. 查询所有 state=0 的任务工厂(支持 expert_type=2 编委 / =5 expert 库;其他类型预留 * 1. 查询所有 state=0 的任务工厂(支持 expert_type=2/3/4/5/6
* 2. JOIN journal 确认期刊有效state=0, start_promotion=1 * 2. JOIN journal 确认期刊有效state=0, start_promotion=1
* 3. 按 factory_id + send_date 检查去重 * 3. 按 factory_id + send_date 检查去重
* 4. template/style: 工厂 > 0 用工厂的,否则用期刊默认 * 4. template/style: 工厂 > 0 用工厂的,否则用期刊默认
@@ -2282,7 +2282,7 @@ class EmailClient extends Base
->alias('f') ->alias('f')
->join('t_journal j', 'j.journal_id = f.journal_id', 'inner') ->join('t_journal j', 'j.journal_id = f.journal_id', 'inner')
->where('f.state', 0) ->where('f.state', 0)
->where('f.expert_type', 'in', [2, 3, 5]) ->where('f.expert_type', 'in', [2, 3, 4, 5, 6])
->where('j.state', 0) ->where('j.state', 0)
->where('f.start_promotion', 1) ->where('f.start_promotion', 1)
->field('f.*, j.title as journal_title, j.default_template_id, j.default_style_id') ->field('f.*, j.title as journal_title, j.default_template_id, j.default_style_id')
@@ -2684,6 +2684,7 @@ class EmailClient extends Base
3 => 'Young Editorial Board', 3 => 'Young Editorial Board',
4 => 'Author', 4 => 'Author',
5 => 'Expert Pool', 5 => 'Expert Pool',
6 => 'Past Young Editorial Board',
]; ];
return isset($map[intval($t)]) ? $map[intval($t)] : 'Unknown'; return isset($map[intval($t)]) ? $map[intval($t)] : 'Unknown';
} }
@@ -2692,8 +2693,10 @@ class EmailClient extends Base
* 根据 expert_type 分发选人逻辑 * 根据 expert_type 分发选人逻辑
* *
* - expert_type = 5从 t_expert 库选人(按领域 / 国家 / 频次) * - expert_type = 5从 t_expert 库选人(按领域 / 国家 / 频次)
* - expert_type ∈ {1,2,3,4}:从系统内部表选人(主编/编委/青年编委/作者fields 与国家筛选忽略; * 频次e.ltime成功发送后回写+ t_promotion_email_log 中「待发送 state=0 的入队时间 ctime」
* 频次按 t_promotion_email_log 中相同 expert_type 维度的最近发送时间扣除 * (避免「今日生成任务明日发送」时 ltime 未变导致连续两天选到同一拨人)
* - expert_type ∈ {1,2,3,4,6}:从系统内部表选人(主编/编委/青年编委/作者/往期青年编委fields 与国家筛选忽略;
* 频次按 t_promotion_email_log已发/退信用 send_time待发送队列用 ctime同上
* *
* 返回行 shape 已对齐: * 返回行 shape 已对齐:
* - type=5 行包含 e.* 全部字段(含 expert_id、country_id、ltime 等) * - type=5 行包含 e.* 全部字段(含 expert_id、country_id、ltime 等)
@@ -2741,9 +2744,25 @@ class EmailClient extends Base
if ($noRepeatDays > 0) { if ($noRepeatDays > 0) {
$cutoff = time() - ($noRepeatDays * 86400); $cutoff = time() - ($noRepeatDays * 86400);
// ltime成功发出后回写与 log 中 state=1 在「已送达」上部分重叠,但保留 ltime 可走索引、且退信 state=3 未必回写 ltime。
$query->where(function ($q) use ($cutoff) { $query->where(function ($q) use ($cutoff) {
$q->where('e.ltime', 0)->whereOr('e.ltime', '<', $cutoff); $q->where('e.ltime', 0)->whereOr('e.ltime', '<', $cutoff);
}); });
// 一条 NOT EXISTS待发(state=0 按 ctime) 或 已发/退信(按 send_time),避免两段相同 join 的重复感
$query->where(function ($q) use ($cutoff) {
$q->table('t_promotion_email_log')->alias('pl')
->join('t_promotion_task pt', 'pt.task_id = pl.task_id', 'inner')
->where('pt.expert_type', 5)
->where('pl.expert_id', '>', 0)
->whereRaw('pl.expert_id = e.expert_id')
->where(function ($w) use ($cutoff) {
$w->where(function ($a) use ($cutoff) {
$a->where('pl.state', 0)->where('pl.ctime', '>', $cutoff);
})->whereOr(function ($b) use ($cutoff) {
$b->where('pl.state', 'in', [1, 3])->where('pl.send_time', '>', $cutoff);
});
});
}, 'not exists');
} }
$countryIds = $this->resolveCountryIds($targetPartitions, $targetCountryIds); $countryIds = $this->resolveCountryIds($targetPartitions, $targetCountryIds);
@@ -2762,9 +2781,9 @@ class EmailClient extends Base
* 系统内部受众选人(编委 / 主编 / 青年编委 / 作者) * 系统内部受众选人(编委 / 主编 / 青年编委 / 作者)
* 仅按 期刊 + 频次 过滤;领域 / 国家无关 * 仅按 期刊 + 频次 过滤;领域 / 国家无关
* *
* 频次:扣除「同 expert_type 维度no_repeat_days 内已经发出 (state=1) 或退信 (state=3) 的人 * 频次:扣除「同 expert_type 下no_repeat_days 内 (1) 已发出或退信,或 (2) 仍在队列待发送state=0按 ctime的人
* *
* @param int $expertType 1=主编 2=编委 3=青年编委 4=作者 * @param int $expertType 1=主编 2=编委 3=青年编委 4=作者 6=往期青年编委
* @param int $journalId * @param int $journalId
* @param int $noRepeatDays * @param int $noRepeatDays
* @param int $limit * @param int $limit
@@ -2799,23 +2818,52 @@ class EmailClient extends Base
break; break;
case 1: // 主编(预留,本期不实现) case 1: // 主编(预留,本期不实现)
case 4: // 作者(预留) return [];
case 4: // 作者:该刊投稿作者(按邮箱关联 t_user
$query = Db::name('article_author')->alias('aa')
->join('t_user u', 'u.email = aa.email', 'inner')
->join('t_article a', 'a.article_id = aa.article_id', 'inner')
->join('t_user_reviewer_info uri', 'uri.reviewer_id = u.user_id', 'left')
->where('a.journal_id', $journalId)
->where('u.email', '<>', '')
->where('u.unsubscribed', 0);
break;
case 6: //获取往期的青年编委2025年以前的,中国人
$now = strtotime('2025-01-01');
$query = Db::name('user_to_yboard')->alias('y')
->join('t_user u', 'u.user_id = y.user_id', 'inner')
->join('t_user_reviewer_info uri', 'uri.reviewer_id = u.user_id', 'left')
->where('y.journal_id', $journalId)
->where('y.state', 0)
->where('y.start_date', '<=', $now)
->where('uri.country', 'China')
->where('u.email', '<>', '')
->where('u.unsubscribed', 0);
break;//
default: default:
return []; return [];
} }
if (!isset($query)) {
return [];
}
if ($noRepeatDays > 0) { if ($noRepeatDays > 0) {
$cutoff = intval(time() - ($noRepeatDays * 86400)); $cutoff = intval(time() - ($noRepeatDays * 86400));
$expertTypeSafe = intval($expertType); $expertTypeSafe = intval($expertType);
// 关联子查询:相对于 NOT IN避免把全部已发 user_id 拉到 PHP 再拼回 SQL
// 配合 t_promotion_email_log(user_id, send_time) 复合索引做半连接探针,常量时间。
$query->where(function ($q) use ($expertTypeSafe, $cutoff) { $query->where(function ($q) use ($expertTypeSafe, $cutoff) {
$q->table('t_promotion_email_log')->alias('l') $q->table('t_promotion_email_log')->alias('l')
->join('t_promotion_task t', 't.task_id = l.task_id', 'inner') ->join('t_promotion_task t', 't.task_id = l.task_id', 'inner')
->where('t.expert_type', $expertTypeSafe) ->where('t.expert_type', $expertTypeSafe)
->where('l.state', 'in', [1, 3]) ->whereRaw('l.user_id = u.user_id')
->where('l.send_time', '>', $cutoff) ->where(function ($w) use ($cutoff) {
->whereRaw('l.user_id = u.user_id'); $w->where(function ($a) use ($cutoff) {
$a->where('l.state', 0)->where('l.ctime', '>', $cutoff);
})->whereOr(function ($b) use ($cutoff) {
$b->where('l.state', 'in', [1, 3])->where('l.send_time', '>', $cutoff);
});
});
}, 'not exists'); }, 'not exists');
} }

View File

@@ -2,15 +2,17 @@
namespace app\api\controller; namespace app\api\controller;
use app\common\TurnitinService;
use think\Db; use think\Db;
use think\Response; use think\Response;
use app\common\PlagiarismService; use app\common\PlagiarismService;
use think\Validate;
/** /**
* 论文查重Turnitin / Crossref Similarity Check控制器。 * 论文查重Turnitin / Crossref Similarity Check控制器。
* *
* 触发方式:纯手工(编辑后台点"查重"按钮)。 * 触发方式:纯手工(编辑后台点"查重"按钮)。
* 报告策略:在线 viewer URL 临时签名 + PDF 永久落盘 runtime/plagiarism/ * 报告策略:PDF 在 poll 完成时落盘;在线 viewer URL 通过 getReportUrl 按需生成(临时签名)
* *
* 主要接口: * 主要接口:
* POST submit 触发查重 * POST submit 触发查重
@@ -35,12 +37,14 @@ class Plagiarism extends Base
* article_id 必填 * article_id 必填
* file_url 选填;不传则按 article_id 在 t_article_file 找 manuscirpt * file_url 选填;不传则按 article_id 在 t_article_file 找 manuscirpt
* editor_id 选填;触发人 user_id前端拿不到也可以传 0 * editor_id 选填;触发人 user_id前端拿不到也可以传 0
* check_type 选填full默认全文| body_only正文| both各提交一条
*/ */
public function submit() public function submit()
{ {
$articleId = intval($this->request->param('article_id', 0)); $articleId = intval($this->request->param('article_id', 0));
$fileUrl = trim($this->request->param('file_url', '')); $fileUrl = trim($this->request->param('file_url', ''));
$editorId = intval($this->request->param('editor_id', 0)); $editorId = intval($this->request->param('editor_id', 0));
$checkType = trim($this->request->param('check_type', 'full'));
if ($articleId <= 0) { if ($articleId <= 0) {
return jsonError('article_id required'); return jsonError('article_id required');
@@ -51,21 +55,79 @@ class Plagiarism extends Base
$localPath = $fileUrl !== '' $localPath = $fileUrl !== ''
? $svc->resolveFileUrlToLocal($fileUrl) ? $svc->resolveFileUrlToLocal($fileUrl)
: $svc->locateArticleManuscript($articleId); : $svc->locateArticleManuscript($articleId);
echo $localPath; if (strtolower($checkType) === 'both') {
$checkId = $svc->submit($articleId, $localPath, $editorId, 'manual'); $ids = $svc->submitBoth($articleId, $localPath, $editorId, 'manual');
return jsonSuccess(['check_id' => $checkId]); return jsonSuccess($ids);
}
$checkId = $svc->submit($articleId, $localPath, $editorId, 'manual', $checkType);
return jsonSuccess(['check_id' => $checkId, 'check_type' => strtolower($checkType) ?: 'full']);
} catch (\Throwable $e) { } catch (\Throwable $e) {
return jsonError($e->getMessage()); return jsonError($e->getMessage());
} }
} }
public function testccone(){ /**
* 调试与线上一致走队列链upload → wait ingest → trigger → poll需 worker 消费 plagiarism 队列。
*/
public function testccone()
{
$svc = new PlagiarismService(); $svc = new PlagiarismService();
$checkId = 9; $checkId = 9;
$filePath = "/home/wwwroot/api.tmrjournals.com/public/manuscirpt/20260509/6832a56e8ace38fe99df390ab5221deb.docx"; $filePath = '/home/wwwroot/api.tmrjournals.com/public/manuscirpt/20260509/6832a56e8ace38fe99df390ab5221deb.docx';
$svc->runUploadAndTrigger($checkId,$filePath); $svc->runUploadOnly($checkId, $filePath);
}
public function testcconegetstatus(){
$data = $this->request->post();
$rule = new Validate([
"id"=>"require"
]);
if(!$rule->check($data)){
return jsonError($rule->getError());
}
$tii = new TurnitinService();
$res = $tii->parseSubmissionIngestState($data['id']);
return jsonSuccess($res);
}
public function testcconewait(){
$data = $this->request->post();
$rule = new Validate([
"checkId"=>"require"
]);
if(!$rule->check($data)){
return jsonError($rule->getError());
}
$svc = new PlagiarismService();
$res = $svc->runIngestPollStep($data['checkId']);
return jsonSuccess($res);
}
public function testcconesimilar(){
$data = $this->request->post();
$rule = new Validate([
"checkId"=>"require"
]);
if(!$rule->check($data)){
return jsonError($rule->getError());
}
$svc = new PlagiarismService();
$res = $svc->runTriggerSimilarityOnly($data['checkId']);
return jsonSuccess($res);
}
public function testcconelast(){
$data = $this->request->post();
$rule = new Validate([
"checkId"=>"require"
]);
if(!$rule->check($data)){
return jsonError($rule->getError());
}
$svc = new PlagiarismService();
$re = $svc->runPollStatus($data['checkId']);
return jsonSuccess($re);
} }
/** /**
@@ -89,7 +151,15 @@ class Plagiarism extends Base
if (!$row) { if (!$row) {
return jsonError('not found'); return jsonError('not found');
} }
return jsonSuccess($this->formatRow($row)); $out = $this->formatRow($row);
if (!empty($row['raw_response'])) {
$raw = json_decode($row['raw_response'], true);
if (is_array($raw)) {
$out['similarity_meta'] = \app\common\TurnitinService::parseSimilarityReportMeta($raw);
}
}
$out['report_view_hint'] = 'PDF 多为 Match Overview 汇总样式;按来源库(Internet/Publication/Crossref)分类请用 getReportUrl 打开在线报告并切到 All Sources';
return jsonSuccess($out);
} }
/** /**
@@ -113,11 +183,18 @@ class Plagiarism extends Base
} }
/** /**
* 取在线查看 URL;过期则自动刷新 * 取在线查看 URLTurnitin 一次性会话链接,关闭报告页后勿复用旧 URL
*
* 入参:
* check_id 必填
* editor_id 选填,当前打开报告的编辑 user_id与 viewer_user_id 对应,避免 session 认证失败)
* reuse 选填1=在未过期时复用库内缓存;默认 0每次调用重新向 Turnitin 申请
*/ */
public function getReportUrl() public function getReportUrl()
{ {
$checkId = intval($this->request->param('check_id', 0)); $checkId = intval($this->request->param('check_id', 0));
$editorId = intval($this->request->param('editor_id', 0));
$reuse = intval($this->request->param('reuse', 0)) === 1;
if ($checkId <= 0) { if ($checkId <= 0) {
return jsonError('check_id required'); return jsonError('check_id required');
} }
@@ -129,22 +206,48 @@ class Plagiarism extends Base
if ($row['state'] != 3) { if ($row['state'] != 3) {
return jsonError('check not completed yet, state=' . $row['state']); return jsonError('check not completed yet, state=' . $row['state']);
} }
$needRefresh = empty($row['view_only_url']) $viewerContext = [];
if ($editorId > 0) {
$viewerContext['editor_id'] = $editorId;
}
$needRefresh = !$reuse
|| empty($row['view_only_url'])
|| intval($row['view_only_url_expire']) < time() + 60; || intval($row['view_only_url_expire']) < time() + 60;
$usageHint = '每次打开请先调用本接口获取新链接;勿收藏或再次打开旧链接。请在新标签页打开,并允许 Turnitin 域名 Cookie。';
if ($needRefresh) { if ($needRefresh) {
$svc = new PlagiarismService(); $svc = new PlagiarismService();
$info = $svc->refreshViewerUrlFor($checkId); $info = $svc->refreshViewerUrlFor($checkId, $viewerContext);
if ($info['url'] === '') {
return jsonError('Turnitin returned empty viewer_url');
}
return jsonSuccess([ return jsonSuccess([
'view_only_url' => $info['url'], 'view_only_url' => $info['url'],
'expire' => $info['expire'], 'expire' => $info['expire'],
'has_pdf' => !empty($info['local_pdf']),
'viewer_user_id' => $info['viewer_user_id'],
'refreshed' => true,
'usage_hint' => $usageHint,
]); ]);
} }
return jsonSuccess([ return jsonSuccess([
'view_only_url' => $row['view_only_url'], 'view_only_url' => $row['view_only_url'],
'expire' => intval($row['view_only_url_expire']), 'expire' => intval($row['view_only_url_expire']),
'has_pdf' => !empty($row['pdf_local_path']),
'refreshed' => false,
'usage_hint' => $usageHint,
]); ]);
} catch (\Throwable $e) { } catch (\Throwable $e) {
if (!empty($row['pdf_local_path'])) {
return jsonSuccess([
'view_only_url' => '',
'expire' => 0,
'has_pdf' => true,
'viewer_error' => $e->getMessage(),
'hint' => '在线报告暂不可用,请使用 downloadReport 下载 PDF',
]);
}
return jsonError($e->getMessage()); return jsonError($e->getMessage());
} }
} }
@@ -201,10 +304,14 @@ class Plagiarism extends Base
'similarity_score' => floatval($r['similarity_score']), 'similarity_score' => floatval($r['similarity_score']),
'tii_report_status' => (string)$r['tii_report_status'], 'tii_report_status' => (string)$r['tii_report_status'],
'has_pdf' => !empty($r['pdf_local_path']), 'has_pdf' => !empty($r['pdf_local_path']),
'local_pdf_url' => $r['pdf_local_path'],
'has_viewer_url' => !empty($r['view_only_url']) && intval($r['view_only_url_expire']) > time(), 'has_viewer_url' => !empty($r['view_only_url']) && intval($r['view_only_url_expire']) > time(),
'attempts' => intval($r['attempts']), 'attempts' => intval($r['attempts']),
'error_msg' => (string)$r['error_msg'], 'error_msg' => (string)$r['error_msg'],
'source_file_name' => (string)$r['source_file_name'], 'source_file_name' => (string)$r['source_file_name'],
'check_type' => (string)($r['check_type'] ?? 'full'),
'check_type_label' => $this->checkTypeLabel($r['check_type'] ?? 'full'),
'derived_file_path'=> (string)($r['derived_file_path'] ?? ''),
'trigger_source' => (string)$r['trigger_source'], 'trigger_source' => (string)$r['trigger_source'],
'triggered_by' => intval($r['triggered_by']), 'triggered_by' => intval($r['triggered_by']),
'ctime' => intval($r['ctime']), 'ctime' => intval($r['ctime']),
@@ -212,6 +319,15 @@ class Plagiarism extends Base
]; ];
} }
private function checkTypeLabel($checkType)
{
$t = strtolower(trim((string) $checkType));
if ($t === 'body_only' || $t === 'body') {
return '正文查重';
}
return '全文查重';
}
private function stateLabel($state) private function stateLabel($state)
{ {
$map = [ $map = [

View File

@@ -1030,6 +1030,7 @@ class References extends Base
* AI检测 * AI检测
*/ */
public function checkByAi($aParam = []){ public function checkByAi($aParam = []){
return jsonError("service is stop!");
//获取参数 //获取参数
$aParam = empty($aParam) ? $this->request->post() : $aParam; $aParam = empty($aParam) ? $this->request->post() : $aParam;

View File

@@ -2299,14 +2299,14 @@ class Reviewer extends Base
->count(); ->count();
if(empty($count)){ if(empty($count)){
return jsonSuccess(['reviewers' => [],'count' => 0]); return jsonSuccess(['reviewers' => [],'count' => 0,"sql"=>$this->reviewer_to_journal_obj->getLastSql()]);
} }
//获取数据 //获取数据
$list = $this->reviewer_to_journal_obj $list = $this->reviewer_to_journal_obj
->join("t_user", "t_user.user_id = t_reviewer_to_journal.reviewer_id", "left") ->join("t_user", "t_user.user_id = t_reviewer_to_journal.reviewer_id", "left")
->join("t_user_reviewer_info", "t_user_reviewer_info.reviewer_id = t_reviewer_to_journal.reviewer_id", "left") ->join("t_user_reviewer_info", "t_user_reviewer_info.reviewer_id = t_reviewer_to_journal.reviewer_id", "left")
->field('t_user.account,t_user.email,t_user.realname,t_user_reviewer_info.company,t_user_reviewer_info.field,t_user.user_id,t_user.rs_num') ->field('t_user.account,t_user.email,t_user.realname,t_user_reviewer_info.company,t_user_reviewer_info.field,t_user_reviewer_info.last_invite_time,t_user.user_id,t_user.rs_num')
->where($where)->where(function($query) use ($iTeenDaysLater) { ->where($where)->where(function($query) use ($iTeenDaysLater) {
$query->where('t_user_reviewer_info.last_invite_time', '<', $iTeenDaysLater) $query->where('t_user_reviewer_info.last_invite_time', '<', $iTeenDaysLater)
->whereOr('t_user_reviewer_info.last_invite_time', '=', 0); ->whereOr('t_user_reviewer_info.last_invite_time', '=', 0);

View File

@@ -214,6 +214,21 @@ class User extends Base
$insert['ctime'] = time(); $insert['ctime'] = time();
$this->user_to_yboard_obj->insert($insert); $this->user_to_yboard_obj->insert($insert);
//将此人添加到审稿人中
$reviewer_journal = $this->reviewer_to_journal_obj->where("reviewer_id",$user_info['user_id'])->where("journal_id",$journal_info['journal_id'])->find();
if($reviewer_journal){
$this->reviewer_to_journal_obj->where("rtj_id",$reviewer_journal['rtj_id'])->update(['is_yboard'=>1]);
}else{
$insert_reviewer['reviewer_id'] = $user_info['user_id'];
$insert_reviewer['journal_id'] = $journal_info['journal_id'];
$insert_reviewer['account'] = $user_info['account'];
$insert_reviewer['journal_title'] = $journal_info['title'];
$insert_reviewer['is_yboard'] = 1;
$insert_reviewer['ctime'] = time();
$this->reviewer_to_journal_obj->insert($insert_reviewer);
}
//发送通知邮件给用户 //发送通知邮件给用户
$tt = 'Dear Dr. ' . ($user_info['realname'] == '' ? $user_info['account'] : $user_info['realname']) . ',<br><br>'; $tt = 'Dear Dr. ' . ($user_info['realname'] == '' ? $user_info['account'] : $user_info['realname']) . ',<br><br>';
$tt .= "Thanks for your support to the journal ".$journal_info['title'].", Please note that your account of ".$journal_info['title']." has been created. The login credentials in the system is as below:<br><br>"; $tt .= "Thanks for your support to the journal ".$journal_info['title'].", Please note that your account of ".$journal_info['title']." has been created. The login credentials in the system is as below:<br><br>";

View File

@@ -0,0 +1,92 @@
<?php
namespace app\api\controller;
use think\Db;
use think\Validate;
use app\common\UserFieldAiService;
/**
* 用户主领域 AI 总结(写入 t_user_reviewer_info.field_ai
*
* POST startChain 启动链式队列(扫描全部符合条件的用户)
* POST processOne 同步处理单个 user_id调试
* GET preview 预览某用户是否 eligible 及上下文摘要
*/
class UserFieldAi extends Base
{
/**
* 启动链式处理。需 worker: php think queue:work --queue UserFieldAi
*/
public function startChain()
{
$force = intval($this->request->param('force', 0)) === 1;
$delay = max(0, intval($this->request->param('delay', 1)));
$svc = new UserFieldAiService();
$started = $svc->startChain($force, $delay);
return jsonSuccess([
'started' => $started,
'queue' => UserFieldAiService::QUEUE_NAME,
'force' => $force,
'msg' => $started ? 'chain enqueued' : 'no pending users',
]);
}
/**
* 同步处理单个用户(不调队列)。
*/
public function processOne()
{
$userId = intval($this->request->param('user_id', 0));
$force = intval($this->request->param('force', 0)) === 1;
if ($userId <= 0) {
return jsonError('user_id required');
}
$svc = new UserFieldAiService();
$result = $svc->processUser($userId, $force);
if (empty($result['ok'])) {
return jsonError(isset($result['error']) ? $result['error'] : 'failed');
}
return jsonSuccess($result);
}
/**
* 预览:是否满足条件、当前 field_ai 状态。
*/
public function preview()
{
$userId = intval($this->request->param('user_id', 0));
if ($userId <= 0) {
return jsonError('user_id required');
}
$svc = new UserFieldAiService();
$svc->ensureReviewerInfoRow($userId);
$uri = Db::name('user_reviewer_info')->where('reviewer_id', $userId)->find();
return jsonSuccess([
'user_id' => $userId,
'has_articles' => $svc->hasSubmittedArticles($userId),
'profile_complete' => $svc->isReviewerProfileComplete($uri),
'eligible' => $svc->isEligible($userId, $uri),
'field_ai' => $uri ? (string) $uri['field_ai'] : '',
'field_ai_status' => $uri ? intval($uri['field_ai_status']) : 0,
'field_ai_utime' => $uri ? intval($uri['field_ai_utime']) : 0,
'field_ai_status_text' => $this->statusLabel($uri ? intval($uri['field_ai_status']) : 0),
]);
}
private function statusLabel($status)
{
$map = [
UserFieldAiService::STATUS_PENDING => 'pending',
UserFieldAiService::STATUS_DONE => 'done',
UserFieldAiService::STATUS_INSUFFICIENT => 'insufficient',
UserFieldAiService::STATUS_FAILED => 'failed',
];
return isset($map[$status]) ? $map[$status] : 'unknown';
}
}

View File

@@ -0,0 +1,85 @@
<?php
namespace app\api\job;
use think\queue\Job;
use app\common\QueueJob;
use app\common\QueueRedis;
use think\Db;
class AiCheckReferByDoi
{
private $oQueueJob;
private $QueueRedis;
private $completedExprie = 3600;
public function __construct()
{
$this->oQueueJob = new QueueJob;
$this->QueueRedis = QueueRedis::getInstance();
}
public function fire(Job $job, $data)
{
//任务开始判断
$this->oQueueJob->init($job);
// 获取 Redis 任务的原始数据
$rawBody = empty($job->getRawBody()) ? '' : $job->getRawBody();
$jobData = empty($rawBody) ? [] : json_decode($rawBody, true);
$jobId = empty($jobData['id']) ? 'unknown' : $jobData['id'];
$this->oQueueJob->log("-----------队列任务开始-----------");
$this->oQueueJob->log("当前任务ID: {$jobId}, 尝试次数: {$job->attempts()}");
// 获取生产文章ID
$iPArticleId = empty($data['p_article_id']) ? 0 : $data['p_article_id'];
if (empty($iPArticleId)) {
$this->oQueueJob->log("无效的p_article_id删除任务");
$job->delete();
return;
}
// 获取参考文献ID
$iPReferId = empty($data['p_refer_id']) ? 0 : $data['p_refer_id'];
if (empty($iPArticleId)) {
$this->oQueueJob->log("无效的p_article_id删除任务");
$job->delete();
return;
}
try {
// 生成Redis键并尝试获取锁
$sClassName = get_class($this);
$sRedisKey = "queue_job:{$sClassName}:{$iPArticleId}:{$iPReferId}";
$sRedisValue = uniqid() . '_' . getmypid();
if (!$this->oQueueJob->acquireLock($sRedisKey, $sRedisValue, $job)) {
return; // 未获取到锁,已处理
}
//生成内容
$oProductionArticleRefer = new \app\api\controller\References;
$response = $oProductionArticleRefer->getCheckByAiResult($data);
// 验证API响应
if (empty($response)) {
throw new \RuntimeException("OpenAI API返回空结果");
}
// 检查JSON解析错误
$aResult = json_decode($response, true);
if (json_last_error() !== JSON_ERROR_NONE) {
throw new \RuntimeException("解析OpenAI响应失败: " . json_last_error_msg() . " | 原始响应: {$response}");
}
$sMsg = empty($aResult['msg']) ? 'success' : $aResult['msg'];
//更新完成标识
$this->QueueRedis->finishJob($sRedisKey, 'completed', $this->completedExprie,$sRedisValue);
$job->delete();
$this->oQueueJob->log("任务执行成功 | 日志ID: {$sRedisKey} | 执行日志:{$sMsg}");
} catch (\RuntimeException $e) {
$this->oQueueJob->handleRetryableException($e,$sRedisKey,$sRedisValue, $job);
} catch (\LogicException $e) {
$this->oQueueJob->handleNonRetryableException($e,$sRedisKey,$sRedisValue, $job);
} catch (\Exception $e) {
$this->oQueueJob->handleRetryableException($e,$sRedisKey,$sRedisValue, $job);
} finally {
$this->oQueueJob->finnal();
}
}
}

View File

@@ -0,0 +1,92 @@
<?php
namespace app\api\job;
use think\queue\Job;
use app\common\QueueJob;
use app\common\QueueRedis;
use app\common\ProductionArticleRefer;
use think\Db;
class ArticleReferDetailQueue
{
private $oQueueJob;
private $QueueRedis;
private $completedExprie = 3600;
public function __construct()
{
$this->oQueueJob = new QueueJob;
$this->QueueRedis = QueueRedis::getInstance();
}
public function fire(Job $job, $data)
{
//任务开始判断
$this->oQueueJob->init($job);
// 获取 Redis 任务的原始数据
$rawBody = empty($job->getRawBody()) ? '' : $job->getRawBody();
$jobData = empty($rawBody) ? [] : json_decode($rawBody, true);
$jobId = empty($jobData['id']) ? 'unknown' : $jobData['id'];
$this->oQueueJob->log("-----------队列任务开始-----------");
$this->oQueueJob->log("当前任务ID: {$jobId}, 尝试次数: {$job->attempts()}");
// // 获取文章ID
// $iArticleId = empty($data['article_id']) ? 0 : $data['article_id'];
// if (empty($iArticleId)) {
// $this->oQueueJob->log("无效的article_id删除任务");
// $job->delete();
// return;
// }
// 获取生产文章ID
$iPArticleId = empty($data['p_article_id']) ? 0 : $data['p_article_id'];
if (empty($iPArticleId)) {
$this->oQueueJob->log("无效的p_article_id删除任务");
$job->delete();
return;
}
// 获取生产文章ID
$iPReferId = empty($data['p_refer_id']) ? 0 : $data['p_refer_id'];
if (empty($iPReferId)) {
$this->oQueueJob->log("无效的p_refer_id删除任务");
$job->delete();
return;
}
try {
// 生成Redis键并尝试获取锁
$sClassName = get_class($this);
$sRedisKey = "queue_job:{$sClassName}:{$iPArticleId}:{$iPReferId}";
$sRedisValue = uniqid() . '_' . getmypid();
if (!$this->oQueueJob->acquireLock($sRedisKey, $sRedisValue, $job)) {
return; // 未获取到锁,已处理
}
//生成内容
$oProductionArticleRefer = new ProductionArticleRefer;
$response = $oProductionArticleRefer->get($data);
// 验证API响应
if (empty($response)) {
throw new \RuntimeException("返回空结果");
}
// 检查JSON解析错误
$aResult = json_decode($response, true);
if (json_last_error() !== JSON_ERROR_NONE) {
throw new \RuntimeException("解析响应失败: " . json_last_error_msg() . " | 原始响应: {$response}");
}
$sMsg = empty($aResult['msg']) ? 'success' : $aResult['msg'];
//更新完成标识
$this->QueueRedis->finishJob($sRedisKey, 'completed', $this->completedExprie,$sRedisValue);
$job->delete();
$this->oQueueJob->log("任务执行成功 | 日志ID: {$sRedisKey} | 执行日志:{$sMsg}");
} catch (\RuntimeException $e) {
$this->oQueueJob->handleRetryableException($e,$sRedisKey,$sRedisValue, $job);
} catch (\LogicException $e) {
$this->oQueueJob->handleNonRetryableException($e,$sRedisKey,$sRedisValue, $job);
} catch (\Exception $e) {
$this->oQueueJob->handleRetryableException($e,$sRedisKey,$sRedisValue, $job);
} finally {
$this->oQueueJob->finnal();
}
}
}

View File

@@ -0,0 +1,85 @@
<?php
namespace app\api\job;
use think\queue\Job;
use app\common\QueueJob;
use app\common\QueueRedis;
use app\common\ProductionArticleRefer;
use think\Db;
class ArticleReferQueue
{
private $oQueueJob;
private $QueueRedis;
private $completedExprie = 180;
public function __construct()
{
$this->oQueueJob = new QueueJob;
$this->QueueRedis = QueueRedis::getInstance();
}
public function fire(Job $job, $data)
{
//任务开始判断
$this->oQueueJob->init($job);
// 获取 Redis 任务的原始数据
$rawBody = empty($job->getRawBody()) ? '' : $job->getRawBody();
$jobData = empty($rawBody) ? [] : json_decode($rawBody, true);
$jobId = empty($jobData['id']) ? 'unknown' : $jobData['id'];
$this->oQueueJob->log("-----------队列任务开始-----------");
$this->oQueueJob->log("当前任务ID: {$jobId}, 尝试次数: {$job->attempts()}");
// 获取文章ID
$iArticleId = empty($data['article_id']) ? 0 : $data['article_id'];
if (empty($iArticleId)) {
$this->oQueueJob->log("无效的article_id删除任务");
$job->delete();
return;
}
// 获取生产文章ID
$iPArticleId = empty($data['p_article_id']) ? 0 : $data['p_article_id'];
if (empty($iPArticleId)) {
$this->oQueueJob->log("无效的p_article_id删除任务");
$job->delete();
return;
}
try {
// 生成Redis键并尝试获取锁
$sClassName = get_class($this);
$sRedisKey = "queue_job:{$sClassName}:{$iArticleId}:{$iPArticleId}";
$sRedisValue = uniqid() . '_' . getmypid();
if (!$this->oQueueJob->acquireLock($sRedisKey, $sRedisValue, $job)) {
return; // 未获取到锁,已处理
}
//生成内容
$oProductionArticleRefer = new ProductionArticleRefer;
$response = $oProductionArticleRefer->top($data);
// 验证API响应
if (empty($response)) {
throw new \RuntimeException("OpenAI API返回空结果");
}
// 检查JSON解析错误
$aResult = json_decode($response, true);
if (json_last_error() !== JSON_ERROR_NONE) {
throw new \RuntimeException("解析OpenAI响应失败: " . json_last_error_msg() . " | 原始响应: {$response}");
}
$sMsg = empty($aResult['msg']) ? 'success' : $aResult['msg'];
//更新完成标识
$this->QueueRedis->finishJob($sRedisKey, 'completed', $this->completedExprie,$sRedisValue);
$job->delete();
$this->oQueueJob->log("任务执行成功 | 日志ID: {$sRedisKey} | 执行日志:{$sMsg}");
} catch (\RuntimeException $e) {
$this->oQueueJob->handleRetryableException($e,$sRedisKey,$sRedisValue, $job);
} catch (\LogicException $e) {
$this->oQueueJob->handleNonRetryableException($e,$sRedisKey,$sRedisValue, $job);
} catch (\Exception $e) {
$this->oQueueJob->handleRetryableException($e,$sRedisKey,$sRedisValue, $job);
} finally {
$this->oQueueJob->finnal();
}
}
}

View File

@@ -31,6 +31,7 @@ class PlagiarismPoll
return; return;
} }
$svc = new PlagiarismService(); $svc = new PlagiarismService();
$svc->log("PlagiarismPoll job is running");
$svc->runPollStatus($checkId, $attempt); $svc->runPollStatus($checkId, $attempt);
$job->delete(); $job->delete();
} }

View File

@@ -6,9 +6,9 @@ use think\queue\Job;
use app\common\PlagiarismService; use app\common\PlagiarismService;
/** /**
* 队列任务:上传论文到 Turnitin + 触发 similarity 检测 * 队列任务:创建 Turnitin submission 并上传原稿ingest 轮询与触发 similarity 由后续 Job 完成
* *
* 完成后会自动入队 PlagiarismPoll 进行后续轮询。 * PlagiarismRun → PlagiarismWaitIngest → PlagiarismTriggerSimilarity → PlagiarismPoll
* *
* data: * data:
* - check_id t_plagiarism_check.check_id * - check_id t_plagiarism_check.check_id
@@ -29,8 +29,12 @@ class PlagiarismRun
return; return;
} }
$svc = new PlagiarismService(); $svc = new PlagiarismService();
$svc->log("PlagiarismRun job act!!"); $svc->log('PlagiarismRun job is running');
$svc->runUploadAndTrigger($checkId, $filePath); try {
$svc->runUploadOnly($checkId, $filePath);
} catch (\Throwable $e) {
$svc->markFailed($checkId, '[upload] ' . $e->getMessage());
}
$job->delete(); $job->delete();
} }
} }

View File

@@ -0,0 +1,34 @@
<?php
namespace app\api\job;
use think\queue\Job;
use app\common\PlagiarismService;
/**
* 队列任务:在 ingest 就绪后调用 PUT /similarity并入队 PlagiarismPoll。
*
* data:
* - check_id t_plagiarism_check.check_id
* - ingest_attempt 来自 PlagiarismWaitIngest 的 attempt409 时用于继续轮询 ingest
*/
class PlagiarismTriggerSimilarity
{
public function fire(Job $job, $data)
{
$checkId = isset($data['check_id']) ? (int) $data['check_id'] : 0;
$ingestAttempt = isset($data['ingest_attempt']) ? (int) $data['ingest_attempt'] : 1;
if ($checkId <= 0) {
$job->delete();
return;
}
$svc = new PlagiarismService();
$svc->log("PlagiarismTriggerSimilarity job is running");
try {
$svc->runTriggerSimilarityOnly($checkId, $ingestAttempt);
} catch (\Throwable $e) {
$svc->markFailed($checkId, '[similarity] ' . $e->getMessage());
}
$job->delete();
}
}

View File

@@ -0,0 +1,34 @@
<?php
namespace app\api\job;
use think\queue\Job;
use app\common\PlagiarismService;
/**
* 队列任务:单次查询 Turnitin submission 是否解析完成ingest未完成则延迟再次入队。
*
* data:
* - check_id t_plagiarism_check.check_id
* - attempt 从 1 递增
*/
class PlagiarismWaitIngest
{
public function fire(Job $job, $data)
{
$checkId = isset($data['check_id']) ? (int) $data['check_id'] : 0;
$attempt = isset($data['attempt']) ? (int) $data['attempt'] : 1;
if ($checkId <= 0) {
$job->delete();
return;
}
$svc = new PlagiarismService();
$svc->log("PlagiarismWaitIngest job is running");
try {
$svc->runIngestPollStep($checkId, $attempt);
} catch (\Throwable $e) {
$svc->markFailed($checkId, '[ingest] ' . $e->getMessage());
}
$job->delete();
}
}

View File

@@ -0,0 +1,101 @@
<?php
namespace app\api\job;
use think\queue\Job;
use app\common\QueueJob;
use app\common\QueueRedis;
use app\api\controller\Cronreview;
class ReminderEmailToReviewer
{
//审稿邮件提醒
private $oQueueJob;
private $QueueRedis;
private $completedExprie = 3600; // 完成状态过期时间
public function __construct()
{
$this->oQueueJob = new QueueJob;
$this->QueueRedis = QueueRedis::getInstance();
}
public function fire(Job $job, $data)
{
//任务开始判断
$this->oQueueJob->init($job);
// 获取 Redis 任务的原始数据
$rawBody = empty($job->getRawBody()) ? '' : $job->getRawBody();
$jobData = empty($rawBody) ? [] : json_decode($rawBody, true);
$jobId = empty($jobData['id']) ? 'unknown' : $jobData['id'];
$this->oQueueJob->log("-----------队列任务开始-----------");
$this->oQueueJob->log("当前任务ID: {$jobId}, 尝试次数: {$job->attempts()}");
try {
// 验证任务数据完整性
// 获取文章ID
$iArticleId = empty($data['article_id']) ? 0 : $data['article_id'];
//审稿记录表主键ID
$art_rev_id = empty($data['art_rev_id']) ? 0 : $data['art_rev_id'];
//审稿人ID
$reviewer_id = empty($data['reviewer_id']) ? 0 : $data['reviewer_id'];
//邮件类型
$email_type = empty($data['email_type']) ? 0 : $data['email_type'];
if (empty($iArticleId)) {
$this->oQueueJob->log("无效的article_id删除任务");
$job->delete();
return;
}
if (empty($art_rev_id)) {
$this->oQueueJob->log("无效的art_rev_id删除任务");
$job->delete();
return;
}
if (empty($reviewer_id)) {
$this->oQueueJob->log("无效的reviewer_id删除任务");
$job->delete();
return;
}
if (empty($email_type)) {
$this->oQueueJob->log("无效的email_type删除任务");
$job->delete();
return;
}
// 生成唯一任务标识
$sClassName = get_class($this);
$sRedisKey = "queue_job:{$sClassName}:{$iArticleId}:{$reviewer_id}:{$art_rev_id}:{$email_type}";
$sRedisValue = uniqid() . '_' . getmypid();
if (!$this->oQueueJob->acquireLock($sRedisKey, $sRedisValue, $job)) {
return; // 未获取到锁,已处理
}
// 执行核心任务
//查询是否发送过邮件
$oCronreview = new Cronreview;
$response = $oCronreview->reminder($data);
// 验证API响应
if (empty($response)) {
throw new \RuntimeException("OpenAI API返回空结果");
}
// 检查JSON解析错误
$aResult = json_decode($response, true);
if (json_last_error() !== JSON_ERROR_NONE) {
throw new \RuntimeException("解析OpenAI响应失败: " . json_last_error_msg() . " | 原始响应: {$response}");
}
$sMsg = empty($aResult['msg']) ? 'success' : $aResult['msg'];
//更新完成标识
$this->QueueRedis->finishJob($sRedisKey, 'completed', $this->completedExprie,$sRedisValue);
$job->delete();
$this->oQueueJob->log("任务执行成功 | 日志ID: {$sRedisKey} | 执行日志:{$sMsg}");
} catch (RuntimeException $e) {
$this->oQueueJob->handleRetryableException($e,$sRedisKey,$sRedisValue, $job);
} catch (LogicException $e) {
$this->oQueueJob->handleNonRetryableException($e,$sRedisKey,$sRedisValue, $job);
} catch (Exception $e) {
$this->oQueueJob->handleRetryableException($e,$sRedisKey,$sRedisValue, $job);
} finally {
$this->oQueueJob->finnal();
}
}
}

View File

@@ -0,0 +1,35 @@
<?php
namespace app\api\job;
use think\queue\Job;
use app\common\UserFieldAiService;
/**
* 链式任务:为单个用户生成 field_ai完成后自动入队下一位用户。
*
* data:
* - user_id 当前处理的用户
* - queue 队列名(默认 UserFieldAi
* - force 1=强制重算
*
* Worker: php think queue:work --queue UserFieldAi
*/
class UserFieldAiFill
{
public function fire(Job $job, $data)
{
$userId = isset($data['user_id']) ? intval($data['user_id']) : 0;
$queue = isset($data['queue']) ? (string) $data['queue'] : UserFieldAiService::QUEUE_NAME;
$force = !empty($data['force']);
$svc = new UserFieldAiService();
if ($userId > 0) {
$svc->processUser($userId, $force);
}
$job->delete();
$delay = max(0, (int) (isset($data['delay']) ? $data['delay'] : 1));
// $svc->enqueueNextFieldAi($delay, $queue, $userId, $force);
}
}

View File

@@ -15,7 +15,7 @@ class mail {
public function tgpu(Job $job, $data){ public function tgpu(Job $job, $data){
// my_tg_pushmail($data); my_tg_pushmail($data);
$job->delete(); $job->delete();
} }

View File

@@ -915,7 +915,7 @@ function prgeAuthor($author)
function my_tg_pushmail($data) function my_tg_pushmail($data)
{ {
// $res = sendEmail($data['email'], $data['title'], $data['title'], $data['content'], $data['tmail'], $data['tpassword'], $data['attachmentFile']); sendEmail($data['email'], $data['title'], $data['title'], $data['content'], $data['tmail'], $data['tpassword'], $data['attachmentFile']);
// if (isset($res['status'])) { // if (isset($res['status'])) {
// $log_obj = Db::name('email_log'); // $log_obj = Db::name('email_log');
// $insert['article_id'] = $data['article_id']; // $insert['article_id'] = $data['article_id'];

View File

@@ -1153,12 +1153,12 @@ class ArticleParserService
} }
/** /**
* 提取 Word 文档中的参考文献列表(仅返回数组,不做入库 * 按段落提取 Word 全文行(供正文裁切、参考文献识别等复用
* @return array 每条为一个参考文献的纯文本字符串 * @return array<int,string>
*/ */
public static function getReferencesFromWord($filePath): array public static function collectParagraphLines($filePath): array
{ {
$othis = new self($filePath) ; $othis = new self($filePath);
if (empty($othis->sections)) { if (empty($othis->sections)) {
return []; return [];
} }
@@ -1166,13 +1166,26 @@ class ArticleParserService
$lines = []; $lines = [];
foreach ($othis->sections as $section) { foreach ($othis->sections as $section) {
foreach ($section->getElements() as $element) { foreach ($section->getElements() as $element) {
$text = $othis->getTextFromElement($element); $text = trim((string) $othis->getTextFromElement($element));
$text = trim((string)$text); if ($text === '') {
if ($text === '') continue; continue;
$lines[] = $text; }
if (!mb_check_encoding($text, 'UTF-8')) {
$text = mb_convert_encoding($text, 'UTF-8', 'GBK');
}
$lines[] = preg_replace('/\s+/u', ' ', $text);
} }
} }
return $lines;
}
/**
* 提取 Word 文档中的参考文献列表(仅返回数组,不做入库)
* @return array 每条为一个参考文献的纯文本字符串
*/
public static function getReferencesFromWord($filePath): array
{
$lines = self::collectParagraphLines($filePath);
if (empty($lines)) { if (empty($lines)) {
return []; return [];
} }

View File

@@ -0,0 +1,356 @@
<?php
namespace app\common;
use DOMDocument;
use DOMElement;
use DOMXPath;
use think\Exception;
use ZipArchive;
/**
* 从投稿 Word 生成「仅正文」docx在 document.xml 上按块裁切,保留表格/图片/样式;
* 边界识别仅用可见文本w:t不读取域指令Zotero/EndNote 的 JSON
*/
class ManuscriptBodyExtractor
{
const BODY_SUBDIR = 'public/plagiarism/body_only';
const W_NS = 'http://schemas.openxmlformats.org/wordprocessingml/2006/main';
/** @var DOMDocument */
private $dom;
/** @var DOMElement */
private $bodyNode;
/** @var array<int,DOMElement> */
private $blocks = [];
/** @var array<int,string> */
private $blockTexts = [];
/**
* @return array{path:string, rel_path:string, line_count:int, ref_start:int, body_start:int, warnings:array}
*/
public function buildBodyOnlyDocx($sourcePath, $articleId = 0)
{
$sourcePath = trim((string) $sourcePath);
if (!is_file($sourcePath) || !is_readable($sourcePath)) {
throw new Exception('Manuscript not readable: ' . $sourcePath);
}
$ext = strtolower(pathinfo($sourcePath, PATHINFO_EXTENSION));
if ($ext !== 'docx') {
throw new Exception('body_only check requires DOCX manuscript, got: ' . $ext);
}
$this->loadDocumentBlocks($sourcePath);
if (empty($this->blocks)) {
throw new Exception('No content blocks in manuscript');
}
$refStart = $this->findReferenceStartIndex();
$bodyStart = $this->findBodyStartIndex();
$warnings = [];
if ($refStart < 0) {
$warnings[] = 'references_heading_not_found; using document end';
$refStart = count($this->blocks);
}
if ($bodyStart >= $refStart) {
throw new Exception('Could not locate main body (front matter may include entire document)');
}
$kept = 0;
for ($i = $bodyStart; $i < $refStart; $i++) {
if (trim($this->blockTexts[$i]) !== '') {
$kept++;
}
}
if ($kept < 3) {
throw new Exception('Body content too short after extraction (' . $kept . ' non-empty blocks)');
}
$relPath = $this->sliceDocxToNewFile($sourcePath, $articleId, $bodyStart, $refStart);
$rootDir = rtrim(ROOT_PATH ?: dirname(dirname(__DIR__)), '/\\');
$absPath = $rootDir . DIRECTORY_SEPARATOR . str_replace(['/', '\\'], DIRECTORY_SEPARATOR, $relPath);
return [
'path' => $absPath,
'rel_path' => $relPath,
'line_count' => $kept,
'ref_start' => $refStart,
'body_start' => $bodyStart,
'warnings' => $warnings,
];
}
private function loadDocumentBlocks($sourcePath)
{
$zip = new ZipArchive();
if ($zip->open($sourcePath) !== true) {
throw new Exception('Cannot open docx: ' . $sourcePath);
}
$xml = $zip->getFromName('word/document.xml');
$zip->close();
if ($xml === false || $xml === '') {
throw new Exception('word/document.xml missing in docx');
}
$this->dom = new DOMDocument();
$this->dom->preserveWhiteSpace = false;
$this->dom->formatOutput = false;
if (@$this->dom->loadXML($xml) === false) {
throw new Exception('Invalid word/document.xml');
}
$xpath = new DOMXPath($this->dom);
$xpath->registerNamespace('w', self::W_NS);
$body = $xpath->query('//w:body')->item(0);
if (!$body instanceof DOMElement) {
throw new Exception('w:body not found');
}
$this->bodyNode = $body;
$this->blocks = [];
$this->blockTexts = [];
foreach ($body->childNodes as $child) {
if ($child->nodeType !== XML_ELEMENT_NODE) {
continue;
}
/** @var DOMElement $child */
if ($child->localName === 'sectPr') {
continue;
}
$this->blocks[] = $child;
$this->blockTexts[] = $this->extractVisibleTextFromBlock($child);
}
}
/**
* 仅拼接 w:t 可见文本,忽略 w:instrText 等域指令(避免 Zotero JSON 参与裁切判断)。
*/
private function extractVisibleTextFromBlock(DOMElement $block)
{
$xpath = new DOMXPath($block->ownerDocument);
$xpath->registerNamespace('w', self::W_NS);
$nodes = $xpath->query('.//w:t', $block);
if (!$nodes || $nodes->length === 0) {
return '';
}
$parts = [];
foreach ($nodes as $node) {
$parts[] = $node->textContent;
}
$text = preg_replace('/\s+/u', ' ', implode('', $parts));
return trim((string) $text);
}
private function sliceDocxToNewFile($sourcePath, $articleId, $bodyStart, $refStart)
{
$rootDir = rtrim(ROOT_PATH ?: dirname(dirname(__DIR__)), '/\\');
$dir = $rootDir . DIRECTORY_SEPARATOR . self::BODY_SUBDIR;
if (!is_dir($dir)) {
@mkdir($dir, 0755, true);
}
$name = sprintf('body_article_%d_%s.docx', intval($articleId), date('Ymd_His'));
$absPath = $dir . DIRECTORY_SEPARATOR . $name;
if (!copy($sourcePath, $absPath)) {
throw new Exception('Failed to copy source docx');
}
$n = count($this->blocks);
$zip = new ZipArchive();
if ($zip->open($absPath) !== true) {
throw new Exception('Cannot open output docx');
}
$xml = $zip->getFromName('word/document.xml');
if ($xml === false) {
$zip->close();
throw new Exception('document.xml missing in output docx');
}
$outDom = new DOMDocument();
$outDom->preserveWhiteSpace = false;
$outDom->formatOutput = false;
if (@$outDom->loadXML($xml) === false) {
$zip->close();
throw new Exception('Invalid document.xml in output docx');
}
$xpath = new DOMXPath($outDom);
$xpath->registerNamespace('w', self::W_NS);
$body = $xpath->query('//w:body')->item(0);
if (!$body instanceof DOMElement) {
$zip->close();
throw new Exception('w:body not found in output docx');
}
$children = [];
foreach ($body->childNodes as $child) {
if ($child->nodeType === XML_ELEMENT_NODE) {
$children[] = $child;
}
}
$blockIdx = 0;
foreach ($children as $child) {
if (!($child instanceof DOMElement)) {
continue;
}
if ($child->localName === 'sectPr') {
continue;
}
if ($blockIdx < $bodyStart || $blockIdx >= $refStart) {
if ($child->parentNode) {
$child->parentNode->removeChild($child);
}
}
$blockIdx++;
}
if ($blockIdx !== $n) {
$zip->close();
@unlink($absPath);
throw new Exception('Document block count mismatch during slice');
}
$zip->addFromString('word/document.xml', $outDom->saveXML());
$zip->close();
if (!is_file($absPath) || filesize($absPath) < 200) {
throw new Exception('Failed to write body-only docx');
}
return self::BODY_SUBDIR . '/' . $name;
}
private function findReferenceStartIndex()
{
$stopKeywords = [
'acknowledgements', 'acknowledgments', 'funding', 'appendix', 'supplementary',
'conflict of interest', 'competing interests', 'author contributions',
'致谢', '基金', '附录', '补充材料', '利益冲突', '作者贡献',
];
foreach ($this->blockTexts as $i => $line) {
$t = trim($line);
if ($t === '') {
continue;
}
if (preg_match('/^\s*(references|reference|bibliography|参考文献|文献)\b\s*[:]?\s*/iu', $t)) {
return $i;
}
$lower = strtolower($t);
foreach ($stopKeywords as $sk) {
$skLower = strtolower($sk);
if ($lower === $skLower || $lower === $skLower . ':' || $lower === $skLower . '') {
if ($i > count($this->blockTexts) * 0.4) {
return $i;
}
}
}
}
return -1;
}
private function findBodyStartIndex()
{
$n = count($this->blockTexts);
$introIdx = -1;
$keywordsIdx = -1;
for ($i = 0; $i < $n; $i++) {
$t = trim($this->blockTexts[$i]);
if ($t === '') {
continue;
}
if ($introIdx < 0 && $this->isIntroductionHeading($t)) {
$introIdx = $i;
}
if ($keywordsIdx < 0 && preg_match('/^\s*keywords?\b\s*[:]?/iu', $t)) {
$keywordsIdx = $i;
}
}
if ($introIdx >= 0) {
return $introIdx;
}
if ($keywordsIdx >= 0) {
$afterKw = $this->indexAfterKeywordsBlock($keywordsIdx);
if ($afterKw < $n) {
return $afterKw;
}
}
return $this->indexAfterFrontMatterFallback();
}
private function isIntroductionHeading($t)
{
if (preg_match('/^\s*(introduction|background|materials and methods|materials & methods|methods and materials)\b\s*[:]?/iu', $t)) {
return true;
}
if (preg_match('/^\s*(引言|前言|背景|材料与方法|资料与方法|研究方法)\b\s*[:]?/iu', $t)) {
return true;
}
if (preg_match('/^\s*1[\.\s、]+(introduction|引言|前言)\b/iu', $t)) {
return true;
}
return false;
}
private function indexAfterKeywordsBlock($kwIdx)
{
$n = count($this->blockTexts);
for ($i = $kwIdx + 1; $i < $n; $i++) {
$t = trim($this->blockTexts[$i]);
if ($t === '') {
continue;
}
if ($this->isIntroductionHeading($t)) {
return $i;
}
if (preg_match('/^\s*abstract\b/iu', $t)) {
continue;
}
if (mb_strlen($t) >= 30 && !$this->looksLikeAffiliationLine($t)) {
return $i;
}
}
return min($kwIdx + 1, $n - 1);
}
private function indexAfterFrontMatterFallback()
{
$n = count($this->blockTexts);
$maxSkip = min(20, (int) floor($n * 0.15));
for ($i = 0; $i < $maxSkip && $i < $n; $i++) {
$t = trim($this->blockTexts[$i]);
if ($t === '') {
continue;
}
if ($this->isIntroductionHeading($t)) {
return $i;
}
}
return min(8, max(0, $n - 1));
}
private function looksLikeAffiliationLine($t)
{
if (preg_match('/@|mailto:|correspond|univ|university|hospital|institute|department|^\d+[\s,]/iu', $t)) {
return true;
}
if (preg_match('/^\s*abstract\b/iu', $t) || preg_match('/^\s*keywords?\b/iu', $t)) {
return true;
}
return false;
}
}

View File

@@ -12,10 +12,14 @@ use think\Exception;
* 并维护 t_plagiarism_check 状态机。 * 并维护 t_plagiarism_check 状态机。
* *
* 状态流: * 状态流:
* submit() → state=1上传中入队 PlagiarismRun * submit() → state=1上传中入队 PlagiarismRun
* PlagiarismRun.fire上传 + 触发 similarity → state=2比对中入队 PlagiarismPoll * PlagiarismRun 创建 submission + 上传文件 → 入队 PlagiarismWaitIngest
* PlagiarismPoll.fire → 轮询 status完成后下载 PDF → state=3完成 * PlagiarismWaitIngest → 单次 GET submission 状态;就绪则入队 PlagiarismTriggerSimilarity否则延迟再入队
* 任意环节抛异常 → state=4失败写 error_msg * PlagiarismTriggerSimilarity → PUT similarity → state=2比对中入队 PlagiarismPoll
* PlagiarismPoll → 轮询 similarity完成后下载 PDF → state=3完成在线 viewer URL 按需 getReportUrl 调用 refreshViewerUrlFor
* 任意环节抛异常 → state=4失败写 error_msg
*
* Worker请用 `queue:work` 消费队列 **plagiarism**(整条链与轮询均在此队列;若此前单独监听 PlagiarismRun / PlagiarismPoll需改为 plagiarism
*/ */
class PlagiarismService class PlagiarismService
{ {
@@ -24,6 +28,16 @@ class PlagiarismService
*/ */
const REPORT_DIR = 'public/plagiarism'; const REPORT_DIR = 'public/plagiarism';
/** Run / WaitIngest / TriggerSimilarity / Poll 共用队列名 */
const QUEUE_CHAIN = 'plagiarism';
const CHECK_TYPE_FULL = 'full';
const CHECK_TYPE_BODY = 'body_only';
const JOB_WAIT_INGEST = 'app\\api\\job\\PlagiarismWaitIngest';
const JOB_TRIGGER_SIM = 'app\\api\\job\\PlagiarismTriggerSimilarity';
const JOB_POLL = 'app\\api\\job\\PlagiarismPoll';
/** /**
* 轮询间隔。Turnitin 一般 1-5 分钟出结果30 秒一次比较合适 * 轮询间隔。Turnitin 一般 1-5 分钟出结果30 秒一次比较合适
*/ */
@@ -50,104 +64,229 @@ class PlagiarismService
* @param string $filePath 本地可读的 PDF/DOCX 绝对路径 * @param string $filePath 本地可读的 PDF/DOCX 绝对路径
* @param int $triggeredBy 触发人 user_id手工触发时编辑后台的 user_id * @param int $triggeredBy 触发人 user_id手工触发时编辑后台的 user_id
* @param string $source 'manual' / 'auto_xxx' * @param string $source 'manual' / 'auto_xxx'
* @param string $checkType full | body_only
* @return int check_id * @return int check_id
*/ */
public function submit($articleId, $filePath, $triggeredBy = 0, $source = 'manual') public function submit($articleId, $filePath, $triggeredBy = 0, $source = 'manual', $checkType = self::CHECK_TYPE_FULL)
{ {
if (!is_file($filePath) || !is_readable($filePath)) { if (!is_file($filePath) || !is_readable($filePath)) {
throw new Exception("File not readable: {$filePath}"); throw new Exception("File not readable: {$filePath}");
} }
$checkType = $this->normalizeCheckType($checkType);
$uploadPath = $filePath;
$derivedRel = '';
$sourceName = basename($filePath);
if ($checkType === self::CHECK_TYPE_BODY) {
$built = (new ManuscriptBodyExtractor())->buildBodyOnlyDocx($filePath, $articleId);
$uploadPath = $built['path'];
$derivedRel = (string) $built['rel_path'];
$sourceName = basename($uploadPath);
if (!empty($built['warnings'])) {
$this->log('body_only warnings check article=' . $articleId . ' ' . implode('; ', $built['warnings']));
}
}
$journalId = (int) Db::name('article') $journalId = (int) Db::name('article')
->where('article_id', $articleId) ->where('article_id', $articleId)
->value('journal_id'); ->value('journal_id');
$this->log("plagiarism submit type={$checkType} article={$articleId}");
$now = time(); $now = time();
$checkId = Db::name('plagiarism_check')->insertGetId([ $row = [
'article_id' => $articleId, 'article_id' => $articleId,
'journal_id' => $journalId, 'journal_id' => $journalId,
'triggered_by' => $triggeredBy, 'triggered_by' => $triggeredBy,
'trigger_source' => $source, 'trigger_source' => $source,
'state' => 1, // 上传中 'check_type' => $checkType,
'source_file_name' => basename($filePath), 'state' => 1,
'source_file_size' => filesize($filePath) ?: 0, 'source_file_name' => $sourceName,
'source_file_size' => filesize($uploadPath) ?: 0,
'ctime' => $now, 'ctime' => $now,
'utime' => $now, 'utime' => $now,
]); ];
$this->log("submit service act"); if ($derivedRel !== '') {
// 入队执行:上传 + 触发 similarity $row['derived_file_path'] = $derivedRel;
}
$checkId = Db::name('plagiarism_check')->insertGetId($row);
Queue::push( Queue::push(
'app\\api\\job\\PlagiarismRun', 'app\\api\\job\\PlagiarismRun',
['check_id' => $checkId, 'file_path' => $filePath], ['check_id' => $checkId, 'file_path' => $uploadPath],
'PlagiarismRun' self::QUEUE_CHAIN
); );
return (int)$checkId; return (int) $checkId;
} }
/** /**
* Job 调用:上传文件到 Turnitin 并触发 similarity然后入队 PlagiarismPoll * 同时提交全文 + 正文两次查重
* @return array{full:int, body_only:int}
*/
public function submitBoth($articleId, $filePath, $triggeredBy = 0, $source = 'manual')
{
return [
'full' => $this->submit($articleId, $filePath, $triggeredBy, $source, self::CHECK_TYPE_FULL),
'body_only' => $this->submit($articleId, $filePath, $triggeredBy, $source, self::CHECK_TYPE_BODY),
];
}
private function normalizeCheckType($checkType)
{
$t = strtolower(trim((string) $checkType));
if ($t === '' || $t === self::CHECK_TYPE_FULL || $t === 'full') {
return self::CHECK_TYPE_FULL;
}
if ($t === self::CHECK_TYPE_BODY || $t === 'body' || $t === 'bodyonly') {
return self::CHECK_TYPE_BODY;
}
throw new Exception('invalid check_type, use full or body_only');
}
/**
* Job 调用:仅创建 submission + 上传文件,随后由 PlagiarismWaitIngest 链式轮询 ingest再 PlagiarismTriggerSimilarity。
*/
public function runUploadOnly($checkId, $filePath)
{
$check = $this->mustGetCheck($checkId);
$this->log('runUploadOnly start check_id=' . $checkId);
$tii = new TurnitinService();
$articleTitle = (string) Db::name('article')
->where('article_id', $check['article_id'])
->value('title');
if ($articleTitle === '') {
$articleTitle = 'Article #' . $check['article_id'];
}
$createResp = $tii->createSubmission([
'title' => mb_substr($articleTitle, 0, 250),
'owner' => 'editor_' . $check['triggered_by'],
'submitter' => 'editor_' . $check['triggered_by'],
'metadata' => [
'article_id' => (string) $check['article_id'],
'check_id' => (string) $check['check_id'],
],
]);
$submissionId = isset($createResp['id']) ? $createResp['id'] : '';
if ($submissionId === '') {
throw new Exception('Turnitin createSubmission returned empty id: ' . json_encode($createResp));
}
$this->updateCheck($checkId, [
'tii_submission_id' => $submissionId,
'raw_response' => json_encode($createResp, JSON_UNESCAPED_UNICODE),
]);
$tii->uploadFile($submissionId, $filePath, basename($filePath));
$firstDelay = $this->ingestChainFirstDelaySec();
Queue::later(
$firstDelay,
self::JOB_WAIT_INGEST,
['check_id' => $checkId, 'attempt' => 1],
self::QUEUE_CHAIN
);
}
/**
* 单次 ingest 检查(由 PlagiarismWaitIngest 调用)。不在本方法内 sleep 长循环。
*/
public function runIngestPollStep($checkId, $attempt = 1)
{
$check = $this->mustGetCheck($checkId);
if (empty($check['tii_submission_id'])) {
$this->markFailed($checkId, '[ingest] tii_submission_id empty');
return;
}
$this->log("runIngestPollStep is running");
$maxAttempts = $this->ingestChainMaxAttempts();
$interval = $this->ingestChainPollIntervalSec();
$tii = new TurnitinService();
try {
$parsed = $tii->parseSubmissionIngestState($check['tii_submission_id']);
} catch (\Throwable $e) {
if ($attempt >= $maxAttempts) {
$this->markFailed($checkId, '[ingest] request failed after ' . $attempt . ' tries: ' . $e->getMessage());
return;
}
Queue::later($interval, self::JOB_WAIT_INGEST, ['check_id' => $checkId, 'attempt' => $attempt + 1], self::QUEUE_CHAIN);
return;
}
if (!empty($parsed['failed'])) {
$this->markFailed($checkId, '[ingest] submission failed status=' . $parsed['status'] . ' ' . $parsed['snippet']);
return;
}
if (!empty($parsed['ready'])) {
Queue::push(self::JOB_TRIGGER_SIM, ['check_id' => $checkId, 'ingest_attempt' => $attempt], self::QUEUE_CHAIN);
return;
}
if ($attempt >= $maxAttempts) {
$this->markFailed($checkId, '[ingest] timeout last_status=' . ($parsed['status'] !== '' ? $parsed['status'] : '(empty)'));
return;
}
Queue::later($interval, self::JOB_WAIT_INGEST, ['check_id' => $checkId, 'attempt' => $attempt + 1], self::QUEUE_CHAIN);
}
/**
* 在 ingest 就绪后触发 similarity并入队 PlagiarismPoll。
* 若仍返回 409则重新入队 PlagiarismWaitIngest不抛异常避免误标失败
*
* @param int $ingestAttempt 来自 WaitIngest 的 attempt供 409 时继续轮询
*/
public function runTriggerSimilarityOnly($checkId, $ingestAttempt = 1)
{
$check = $this->mustGetCheck($checkId);
if (empty($check['tii_submission_id'])) {
$this->markFailed($checkId, '[similarity] tii_submission_id empty');
return;
}
$this->log("runTriggerSimilarityOnly is running");
$tii = new TurnitinService();
$sid = $check['tii_submission_id'];
try {
$simResp = $tii->triggerSimilarity($sid);
} catch (\Throwable $e) {
$msg = $e->getMessage();
$is409 = (stripos($msg, '409') !== false || stripos($msg, 'CONFLICT') !== false)
&& (stripos($msg, 'not been completed') !== false || stripos($msg, 'completed yet') !== false);
if ($is409) {
$maxAttempts = $this->ingestChainMaxAttempts();
$next = $ingestAttempt + 1;
if ($next > $maxAttempts) {
$this->markFailed($checkId, '[similarity] still not ready after ingest attempts: ' . $msg);
return;
}
$delay = max($this->ingestChainPollIntervalSec(), 20);
Queue::later($delay, self::JOB_WAIT_INGEST, ['check_id' => $checkId, 'attempt' => $next], self::QUEUE_CHAIN);
return;
}
throw $e;
}
$this->updateCheck($checkId, [
'state' => 2,
'tii_report_status' => 'PROCESSING',
'raw_response' => json_encode($simResp, JSON_UNESCAPED_UNICODE),
]);
Queue::later(
self::POLL_INTERVAL,
self::JOB_POLL,
['check_id' => $checkId, 'attempt' => 1],
self::QUEUE_CHAIN
);
}
/**
* @deprecated 与 runUploadOnly 等价;长耗时 ingest 已拆到队列 PlagiarismWaitIngest勿在本方法内同步 wait。
*/ */
public function runUploadAndTrigger($checkId, $filePath) public function runUploadAndTrigger($checkId, $filePath)
{ {
$check = $this->mustGetCheck($checkId); $this->runUploadOnly($checkId, $filePath);
$this->log("runUploadAndTrigger is act0");
try {
$tii = new TurnitinService();
// 1. 创建 submission
$articleTitle = (string) Db::name('article')
->where('article_id', $check['article_id'])
->value('title');
if ($articleTitle === '') {
$articleTitle = 'Article #' . $check['article_id'];
}
$this->log("runUploadAndTrigger is act1");
$createResp = $tii->createSubmission([
'title' => mb_substr($articleTitle, 0, 250),
'owner' => 'editor_' . $check['triggered_by'],
'submitter' => 'editor_' . $check['triggered_by'],
'metadata' => [
'article_id' => (string)$check['article_id'],
'check_id' => (string)$check['check_id'],
],
]);
$submissionId = isset($createResp['id']) ? $createResp['id'] : '';
if ($submissionId === '') {
throw new Exception('Turnitin createSubmission returned empty id: ' . json_encode($createResp));
}
$this->updateCheck($checkId, [
'tii_submission_id' => $submissionId,
'raw_response' => json_encode($createResp, JSON_UNESCAPED_UNICODE),
]);
$this->log("runUploadAndTrigger is act2");
// 2. 上传文件
$tii->uploadFile($submissionId, $filePath, basename($filePath));
// 3. 触发 similarity
$simResp = $tii->triggerSimilarity($submissionId);
$this->updateCheck($checkId, [
'state' => 2, // 比对中
'tii_report_status' => 'PROCESSING',
'raw_response' => json_encode($simResp, JSON_UNESCAPED_UNICODE),
]);
$this->log("runUploadAndTrigger is act3");
// 4. 排队首次轮询(晚一点开始,让 Turnitin 先处理)
Queue::later(
self::POLL_INTERVAL,
'app\\api\\job\\PlagiarismPoll',
['check_id' => $checkId, 'attempt' => 1],
'PlagiarismPoll'
);
} catch (\Throwable $e) {
$this->markFailed($checkId, '[upload] ' . $e->getMessage());
throw $e;
}
} }
/** /**
@@ -173,20 +312,25 @@ class PlagiarismService
]); ]);
if ($status === 'COMPLETE') { if ($status === 'COMPLETE') {
$score = isset($statusResp['overall_match_percentage']) $score = TurnitinService::extractOverallMatchPercentage($statusResp);
? floatval($statusResp['overall_match_percentage']) : 0; if ($score <= 0 && isset($statusResp['overall_match_percentage'])) {
$score = floatval($statusResp['overall_match_percentage']);
}
$this->log('poll complete check_id=' . $checkId . ' score=' . $score
. ' check_type=' . ($check['check_type'] ?? 'full'));
// 下载 PDF + 取在线查看 URL
$localPdf = $this->downloadAndStorePdf($tii, $check['tii_submission_id'], $checkId); $localPdf = $this->downloadAndStorePdf($tii, $check['tii_submission_id'], $checkId);
$viewerInfo = $this->refreshViewerUrl($tii, $check['tii_submission_id']);
$meta = TurnitinService::parseSimilarityReportMeta($statusResp);
if ($meta['score'] > 0) {
$score = $meta['score'];
}
$this->updateCheck($checkId, [ $this->updateCheck($checkId, [
'state' => 3, 'state' => 3,
'similarity_score' => $score, 'similarity_score' => $score,
'pdf_local_path' => $localPdf, 'pdf_local_path' => $localPdf,
'view_only_url' => $viewerInfo['url'], 'error_msg' => '',
'view_only_url_expire' => $viewerInfo['expire'],
'error_msg' => '',
]); ]);
return; return;
} }
@@ -197,25 +341,23 @@ class PlagiarismService
return; return;
} }
// PROCESSING 或其它中间态:继续轮询
if ($attempt >= self::MAX_POLL_ATTEMPTS) { if ($attempt >= self::MAX_POLL_ATTEMPTS) {
$this->markFailed($checkId, '[poll] timeout after ' . $attempt . ' attempts'); $this->markFailed($checkId, '[poll] timeout after ' . $attempt . ' attempts');
return; return;
} }
Queue::later( Queue::later(
self::POLL_INTERVAL, self::POLL_INTERVAL,
'app\\api\\job\\PlagiarismPoll', self::JOB_POLL,
['check_id' => $checkId, 'attempt' => $attempt + 1], ['check_id' => $checkId, 'attempt' => $attempt + 1],
'plagiarism' self::QUEUE_CHAIN
); );
} catch (\Throwable $e) { } catch (\Throwable $e) {
// 网络抖动不要直接 fail给一定容错次数
if ($attempt < self::MAX_POLL_ATTEMPTS) { if ($attempt < self::MAX_POLL_ATTEMPTS) {
Queue::later( Queue::later(
self::POLL_INTERVAL, self::POLL_INTERVAL,
'app\\api\\job\\PlagiarismPoll', self::JOB_POLL,
['check_id' => $checkId, 'attempt' => $attempt + 1], ['check_id' => $checkId, 'attempt' => $attempt + 1],
'plagiarism' self::QUEUE_CHAIN
); );
$this->updateCheck($checkId, [ $this->updateCheck($checkId, [
'attempts' => $attempt, 'attempts' => $attempt,
@@ -229,42 +371,71 @@ class PlagiarismService
} }
/** /**
* 重新生成在线查看 URL已有的过期了用 * 按需获取/刷新 Turnitin 在线报告 URL与 poll 解耦,避免 viewer-url 失败拖死查重完成)。
* *
* @return array{url:string, expire:int, local_pdf:string} * @param array $viewerContext editor_id=当前打开报告的编辑 user_idviewer_user_id 可显式指定
* @return array{url:string, expire:int, local_pdf:string, viewer_user_id:string}
*/ */
public function refreshViewerUrlFor($checkId) public function refreshViewerUrlFor($checkId, array $viewerContext = [])
{ {
$check = $this->mustGetCheck($checkId); $check = $this->mustGetCheck($checkId);
if (empty($check['tii_submission_id'])) { if (empty($check['tii_submission_id'])) {
throw new Exception('check has no tii_submission_id'); throw new Exception('check has no tii_submission_id');
} }
$tii = new TurnitinService(); $tii = new TurnitinService();
$info = $this->refreshViewerUrl($tii, $check['tii_submission_id']); $info = $this->refreshViewerUrl($tii, $check['tii_submission_id'], $check, $viewerContext);
$this->updateCheck($checkId, [ $this->updateCheck($checkId, [
'view_only_url' => $info['url'], 'view_only_url' => $info['url'],
'view_only_url_expire' => $info['expire'], 'view_only_url_expire' => $info['expire'],
]); ]);
return [ return [
'url' => $info['url'], 'url' => $info['url'],
'expire' => $info['expire'], 'expire' => $info['expire'],
'local_pdf' => $check['pdf_local_path'], 'local_pdf' => $check['pdf_local_path'],
'viewer_user_id' => $info['viewer_user_id'],
]; ];
} }
// ---------- 内部 ---------- // ---------- 内部 ----------
private function refreshViewerUrl($tii, $submissionId) /**
* 调用 Turnitin POST viewer-url仅由 refreshViewerUrlFor / getReportUrl 触发。
*/
private function refreshViewerUrl($tii, $submissionId, array $check = [], array $viewerContext = [])
{ {
$resp = $tii->getViewerUrl($submissionId); $viewerOpts = $viewerContext;
if (!isset($viewerOpts['editor_id']) && !empty($check['triggered_by'])) {
$viewerOpts['triggered_by'] = intval($check['triggered_by']);
}
$viewerUserId = $tii->resolveViewerUserId($viewerOpts);
$resp = $tii->getViewerUrl($submissionId, $viewerOpts);
$url = ''; $url = '';
if (isset($resp['viewer_url'])) { if (isset($resp['viewer_url'])) {
$url = (string)$resp['viewer_url']; $url = (string) $resp['viewer_url'];
} elseif (isset($resp['url'])) { } elseif (isset($resp['url'])) {
$url = (string)$resp['url']; $url = (string) $resp['url'];
} elseif (isset($resp['launch_url'])) {
$url = (string) $resp['launch_url'];
} }
// 默认 2 小时过期,保守起见 if ($url === '') {
return ['url' => $url, 'expire' => time() + 7200]; throw new Exception('viewer-url response has no url: ' . json_encode($resp, JSON_UNESCAPED_UNICODE));
}
$expire = time() + 7200;
foreach (['viewer_url_expires', 'expires_at', 'expiration_time', 'expire_time'] as $k) {
if (empty($resp[$k])) {
continue;
}
$ts = is_numeric($resp[$k]) ? intval($resp[$k]) : strtotime((string) $resp[$k]);
if ($ts > time()) {
$expire = $ts;
break;
}
}
return [
'url' => $url,
'expire' => $expire,
'viewer_user_id' => $viewerUserId,
];
} }
/** /**
@@ -328,15 +499,30 @@ class PlagiarismService
Db::name('plagiarism_check')->where('check_id', $checkId)->update($data); Db::name('plagiarism_check')->where('check_id', $checkId)->update($data);
} }
private function markFailed($checkId, $errMsg) public function markFailed($checkId, $errMsg)
{ {
$this->log("markFailed act"); $this->log('markFailed check_id=' . $checkId);
$this->updateCheck($checkId, [ $this->updateCheck($checkId, [
'state' => 4, 'state' => 4,
'error_msg' => mb_substr($errMsg, 0, 1000), 'error_msg' => mb_substr($errMsg, 0, 1000),
]); ]);
} }
private function ingestChainFirstDelaySec()
{
return max(3, (int) Env::get('turnitin.ingest_chain_first_delay', 10));
}
private function ingestChainPollIntervalSec()
{
return max(60, (int) Env::get('turnitin.ingest_chain_poll_interval', 15));
}
private function ingestChainMaxAttempts()
{
return max(10, (int) Env::get('turnitin.ingest_chain_max_attempts', 80));
}
/** /**
* 从 t_article_file 找到投稿主稿manuscirpt的本地绝对路径。 * 从 t_article_file 找到投稿主稿manuscirpt的本地绝对路径。
* file_url 在系统里可能是 URL 或相对路径,调用方负责保证可读。 * file_url 在系统里可能是 URL 或相对路径,调用方负责保证可读。

View File

@@ -18,6 +18,14 @@ use think\Exception;
* API_KEY 生成的 Bearer token * API_KEY 生成的 Bearer token
* INTEGRATION_NAME Scope Name创建 integration 时填的名字) * INTEGRATION_NAME Scope Name创建 integration 时填的名字)
* INTEGRATION_VERSION 自定义版本号,便于审计 e.g. 1.0.0 * INTEGRATION_VERSION 自定义版本号,便于审计 e.g. 1.0.0
* SUBMISSION_INGEST_MAX_WAIT 上传后轮询 submission 就绪的最长秒数,默认 600仅 waitAfterUploadForSimilarity 同步用)
* SUBMISSION_INGEST_POLL_INTERVAL 同步轮询间隔秒数,默认 3
* INGEST_CHAIN_FIRST_DELAY 上传后首次 ingest 检查延迟秒数,默认 10队列链
* INGEST_CHAIN_POLL_INTERVAL ingest 链每步间隔秒数,默认 15
* INGEST_CHAIN_MAX_ATTEMPTS ingest 链最大步数,默认 80
* EXCLUDE_QUOTES / EXCLUDE_BIBLIOGRAPHY / EXCLUDE_CITATIONS 0|1默认 0与 Crossref 网页手动查重更接近)
* VIEWER_DEFAULT_MODE match_overview | all_sources默认 all_sources便于按来源库分类查看
* ADD_TO_INDEX 0|1默认 1
* *
* API 文档https://developers.turnitin.com/docs/tca * API 文档https://developers.turnitin.com/docs/tca
* *
@@ -36,8 +44,8 @@ class TurnitinService
public function __construct() public function __construct()
{ {
$this->baseUrl = rtrim(trim((string)Env::get('turnitin.base_url', '')), '/'); $this->baseUrl = rtrim(trim((string)Env::get('turnitin.base_url', 'https://crossref-20794.turnitin.com/api/v1')), '/');
$this->apiKey = trim((string)Env::get('turnitin.api_key', '')); $this->apiKey = trim((string)Env::get('turnitin.api_key', 'c6315e8291a4433dae09ad5efdb8a89c'));
$this->integrationName = trim((string)Env::get('turnitin.integration_name', 'tmr')); $this->integrationName = trim((string)Env::get('turnitin.integration_name', 'tmr'));
$this->integrationVersion = trim((string)Env::get('turnitin.integration_version', '1.0.0')); $this->integrationVersion = trim((string)Env::get('turnitin.integration_version', '1.0.0'));
@@ -80,11 +88,13 @@ class TurnitinService
/** /**
* 上传文件到 submission * 上传文件到 submission
* PUT /submissions/{id}/original/{filename} *
* TCA 文档路径为 PUT /submissions/{id}/original文件名仅通过 Content-Disposition 传递,
* 不要再拼在 URL 末尾;否则网关会 404错误里常见 path 形如 //v1/submissions/.../original/xxx.docx
* *
* @param string $submissionId * @param string $submissionId
* @param string $filePath 本地 PDF/DOCX 路径 * @param string $filePath 本地 PDF/DOCX 路径
* @param string $filename 传给 Turnitin 的文件名(用于报告显示 * @param string $filename 传给 Turnitin 的展示文件名(默认取 basename
* @return array * @return array
*/ */
public function uploadFile($submissionId, $filePath, $filename = '') public function uploadFile($submissionId, $filePath, $filename = '')
@@ -95,15 +105,20 @@ class TurnitinService
if ($filename === '') { if ($filename === '') {
$filename = basename($filePath); $filename = basename($filePath);
} }
// Content-Disposition 里避免未转义的双引号
$safeName = str_replace(['"', "\r", "\n"], '', $filename);
if ($safeName === '') {
$safeName = 'document.bin';
}
$body = file_get_contents($filePath); $body = file_get_contents($filePath);
return $this->request( return $this->request(
'PUT', 'PUT',
'/submissions/' . urlencode($submissionId) . '/original/' . rawurlencode($filename), '/submissions/' . rawurlencode($submissionId) . '/original',
$body, $body,
[ [
'Content-Type' => 'binary/octet-stream', 'Content-Type' => 'application/octet-stream',
'Content-Disposition' => 'inline; filename="' . $filename . '"', 'Content-Disposition' => 'attachment; filename="' . $safeName . '"',
] ]
); );
} }
@@ -114,37 +129,340 @@ class TurnitinService
* *
* @param string $submissionId * @param string $submissionId
* @param array $opts * @param array $opts
* - generation_settings.search_repositories 默认 ['INTERNET','PUBLICATION','CROSSREF','CROSSREF_POSTED_CONTENT','SUBMITTED_WORK'] * - generation_settings.search_repositories 默认 ['INTERNET','PUBLICATION',...]
* - generation_settings.submission_auto_excludes bool * - generation_settings.submission_auto_excludes **字符串数组**(如 [] 或具体仓库键),不可传 boolean否则会 400
* - view_settings.exclude_quotes / exclude_bibliography / exclude_citations / exclude_abstract / exclude_methods bool * - generation_settings.auto_exclude_self_matching_scope 可选,如 'GROUP_CONTEXT'
* - view_settings.exclude_* 布尔排除项(与 TCA 文档一致)
* - indexing_settings.add_to_index bool 是否把本文加进 SUBMITTED_WORK 索引(一般 true * - indexing_settings.add_to_index bool 是否把本文加进 SUBMITTED_WORK 索引(一般 true
* @return array * @return array
*/ */
public function triggerSimilarity($submissionId, $opts = []) public function triggerSimilarity($submissionId, $opts = [])
{ {
$body = array_merge([ $body = array_merge($this->defaultSimilarityPayload(), $opts);
'generation_settings' => [
'search_repositories' => ['INTERNET', 'PUBLICATION', 'CROSSREF', 'CROSSREF_POSTED_CONTENT', 'SUBMITTED_WORK'],
'submission_auto_excludes' => true,
'auto_exclude_self_matching_scope' => 'GROUP_CONTEXT',
],
'view_settings' => [
'exclude_quotes' => true,
'exclude_bibliography' => true,
'exclude_citations' => true,
],
'indexing_settings' => [
'add_to_index' => true,
],
], $opts);
return $this->request( return $this->request(
'PUT', 'PUT',
'/submissions/' . urlencode($submissionId) . '/similarity', '/submissions/' . rawurlencode($submissionId) . '/similarity',
$body $body
); );
} }
/**
* PUT /similarity 与 PDF 导出共用的默认参数。
* 此前固定 exclude_*=true 时,总相似度会低于 Crossref 网页手动查重(与「匹配来源编号/类型」无关)。
*/
public function defaultSimilarityPayload()
{
$scope = trim((string) Env::get('turnitin.auto_exclude_self_matching_scope', 'GROUP_CONTEXT'));
if ($scope === '') {
unset($scope);
}
$generation = [
'search_repositories' => ['INTERNET', 'PUBLICATION', 'CROSSREF', 'CROSSREF_POSTED_CONTENT', 'SUBMITTED_WORK'],
'submission_auto_excludes' => [],
];
if (isset($scope)) {
$generation['auto_exclude_self_matching_scope'] = $scope;
}
return [
'generation_settings' => $generation,
'view_settings' => $this->defaultViewSettings(),
'indexing_settings' => [
'add_to_index' => $this->envBool('turnitin.add_to_index', true),
],
];
}
public function defaultViewSettings()
{
return [
'exclude_quotes' => $this->envBool('turnitin.exclude_quotes', false),
'exclude_bibliography' => $this->envBool('turnitin.exclude_bibliography', false),
'exclude_citations' => $this->envBool('turnitin.exclude_citations', false),
];
}
/**
* 从 GET /similarity 响应解析总相似度0100
* 兼容 overall_match_percentage 在 message 嵌套、以及 01 小数形式。
*/
public static function extractOverallMatchPercentage(array $statusResp)
{
$candidates = [];
$push = function ($v) use (&$candidates) {
if ($v === null || $v === '') {
return;
}
if (is_numeric($v)) {
$candidates[] = floatval($v);
}
};
$push($statusResp['overall_match_percentage'] ?? null);
$push($statusResp['overall_match'] ?? null);
$push($statusResp['similarity_percentage'] ?? null);
$msg = $statusResp;
if (isset($statusResp['message']) && is_array($statusResp['message'])) {
$msg = $statusResp['message'];
}
$push($msg['overall_match_percentage'] ?? null);
$push($msg['overall_match'] ?? null);
if (isset($msg['similarity']) && is_array($msg['similarity'])) {
$sim = $msg['similarity'];
$push($sim['overall_match_percentage'] ?? null);
$push($sim['overall_match'] ?? null);
}
foreach ($candidates as $n) {
if ($n > 0 && $n <= 1.0) {
$scaled = round($n * 100, 2);
if ($scaled > 1.0 || $n < 0.05) {
return $scaled;
}
}
if ($n >= 0) {
return round($n, 2);
}
}
return 0.0;
}
/**
* 从 GET /similarity 响应中尽量提取「按来源」的摘要(供列表展示;完整明细仍在 Turnitin 在线报告里)。
*
* @return array{score:float,sources:array<int,array<string,mixed>>}
*/
public static function parseSimilarityReportMeta(array $statusResp)
{
$meta = [
'score' => self::extractOverallMatchPercentage($statusResp),
'sources' => [],
];
$candidates = [];
self::collectSimilaritySourceNodes($statusResp, $candidates, 0);
if (isset($statusResp['message']) && is_array($statusResp['message'])) {
self::collectSimilaritySourceNodes($statusResp['message'], $candidates, 0);
}
$seen = [];
foreach ($candidates as $node) {
if (!is_array($node)) {
continue;
}
$pct = null;
foreach (['percentage', 'match_percentage', 'overall_match_percentage', 'similarity_percentage'] as $k) {
if (isset($node[$k]) && is_numeric($node[$k])) {
$pct = floatval($node[$k]);
break;
}
}
$repo = '';
foreach (['repository', 'repository_name', 'collection', 'source_type', 'type', 'database', 'category'] as $k) {
if (!empty($node[$k])) {
$repo = strtoupper(trim((string) $node[$k]));
break;
}
}
$words = isset($node['matched_word_count']) ? intval($node['matched_word_count'])
: (isset($node['word_count']) ? intval($node['word_count']) : 0);
$key = $repo . '|' . ($pct !== null ? $pct : '') . '|' . $words;
if (isset($seen[$key])) {
continue;
}
$seen[$key] = true;
$meta['sources'][] = array_filter([
'repository' => $repo,
'match_percentage' => $pct,
'matched_word_count' => $words > 0 ? $words : null,
], function ($v) {
return $v !== null && $v !== '';
});
}
return $meta;
}
/**
* @param array<string,mixed> $node
* @param array<int,mixed> $out
*/
private static function collectSimilaritySourceNodes($node, array &$out, $depth)
{
if ($depth > 8 || !is_array($node)) {
return;
}
$hasRepo = false;
foreach (['repository', 'repository_name', 'collection', 'source_type'] as $k) {
if (!empty($node[$k])) {
$hasRepo = true;
break;
}
}
if ($hasRepo) {
$out[] = $node;
}
foreach ($node as $v) {
if (is_array($v)) {
if (isset($v[0]) && is_array($v[0])) {
foreach ($v as $item) {
self::collectSimilaritySourceNodes($item, $out, $depth + 1);
}
} else {
self::collectSimilaritySourceNodes($v, $out, $depth + 1);
}
}
}
}
/**
* 在线 Similarity Report 默认视图(与 Crossref 后台「按来源查看」对齐)。
*/
public function defaultViewerSimilarityBlock()
{
$mode = strtolower(trim((string) Env::get('turnitin.viewer_default_mode', 'all_sources')));
if (!in_array($mode, ['match_overview', 'all_sources'], true)) {
$mode = 'all_sources';
}
return [
'default_mode' => $mode,
'modes' => [
'match_overview' => true,
'all_sources' => true,
],
];
}
private function envBool($name, $default = false)
{
$v = Env::get($name, $default ? '1' : '0');
if ($v === true) {
return true;
}
if ($v === false) {
return false;
}
$v = strtolower(trim((string) $v));
return in_array($v, ['1', 'true', 'yes', 'on'], true);
}
/**
* 查询 submission 详情(上传后用于轮询是否解析完成)。
* GET /submissions/{id}
*
* @return array 解码后的 JSON常见为 status=ok + message 内含 id/status
*/
public function getSubmission($submissionId)
{
return $this->request('GET', '/submissions/' . rawurlencode($submissionId));
}
/**
* 单次解析 GET /submissions/{id},判断是否可调用 PUT /similarity不 sleep供队列链逐步轮询
*
* @return array{ready:bool, failed:bool, status:string, snippet:string, message:array}
*/
public function parseSubmissionIngestState($submissionId)
{
$raw = $this->getSubmission($submissionId);
$msg = self::unwrapSubmissionPayload($raw);
$st = strtoupper(trim((string) self::pickSubmissionStatus($msg)));
$snippet = mb_substr(json_encode($msg, JSON_UNESCAPED_UNICODE), 0, 400);
$ready = [
'COMPLETE', 'COMPLETED', 'PROCESSED', 'READY', 'SUCCEEDED',
'COMPLETE_PROCESSING',
];
$failed = ['ERROR', 'FAILED', 'CANCELLED', 'CANCELED', 'DELETED'];
$readyFlag = $st !== '' && in_array($st, $ready, true);
$failedFlag = $st !== '' && in_array($st, $failed, true);
return [
'ready' => $readyFlag,
'failed' => $failedFlag,
'status' => $st,
'snippet' => $snippet,
'message' => $msg,
];
}
/**
* 上传完成后需等待 Turnitin 异步完成文本解析(同步阻塞版,仅 CLI/调试;线上请用队列链 PlagiarismWaitIngest
*
* @param string $submissionId
* @param int $maxWaitSec 最长等待秒数,默认 60010 分钟)
* @param int $intervalSec 轮询间隔秒数,默认 3
* @throws Exception 超时或终态为失败
*/
public function waitAfterUploadForSimilarity($submissionId, $maxWaitSec = 600, $intervalSec = 3)
{
$deadline = time() + max(30, (int)$maxWaitSec);
$intervalSec = max(1, (int)$intervalSec);
$lastStatus = '';
$lastSnippet = '';
while (time() < $deadline) {
$parsed = $this->parseSubmissionIngestState($submissionId);
$lastStatus = $parsed['status'];
$lastSnippet = $parsed['snippet'];
if (!empty($parsed['ready'])) {
return;
}
if (!empty($parsed['failed'])) {
throw new Exception('Turnitin submission failed, status=' . $lastStatus . ' body=' . $lastSnippet);
}
sleep($intervalSec);
}
throw new Exception(
'Timeout waiting for Turnitin submission ingest (last status=' . ($lastStatus ?: '(empty)') . ') snippet=' . $lastSnippet
);
}
/**
* @param mixed $decoded
* @return array
*/
private static function unwrapSubmissionPayload($decoded)
{
if (!is_array($decoded)) {
return [];
}
if (isset($decoded['message']) && is_array($decoded['message'])) {
return $decoded['message'];
}
return $decoded;
}
/**
* @param array $msg
* @return string
*/
private static function pickSubmissionStatus(array $msg)
{
$candidates = [$msg];
if (isset($msg['submission']) && is_array($msg['submission'])) {
$candidates[] = $msg['submission'];
}
foreach ($candidates as $m) {
foreach (['status', 'workflow_status', 'submission_status', 'processing_status', 'paper_status'] as $k) {
if (!empty($m[$k])) {
return (string)$m[$k];
}
}
}
return '';
}
/** /**
* 查询 similarity 状态 * 查询 similarity 状态
* GET /submissions/{id}/similarity * GET /submissions/{id}/similarity
@@ -156,7 +474,7 @@ class TurnitinService
{ {
return $this->request( return $this->request(
'GET', 'GET',
'/submissions/' . urlencode($submissionId) . '/similarity' '/submissions/' . rawurlencode($submissionId) . '/similarity'
); );
} }
@@ -166,25 +484,134 @@ class TurnitinService
* *
* 返回 viewer_url数小时有效 * 返回 viewer_url数小时有效
* *
* @param array $viewer 可选 viewer 设置 e.g. ['viewer_default_permission_set' => 'INSTRUCTOR'] * TCA 要求 default_mode 为小写(如 match_overviewsave_changes 等 LTI 字段会导致 400。
* Crossref 通道常用 ADMINISTRATOR/USER非 INSTRUCTOR。可在 .env 配置:
* turnitin.viewer_permission_set=ADMINISTRATOR
*
* @param array $viewer 可选viewer_user_id、triggered_by映射为 editor_{id})、或完整请求体覆盖
*/ */
public function getViewerUrl($submissionId, $viewer = []) public function getViewerUrl($submissionId, $viewer = [])
{ {
$body = array_merge([ $submissionId = trim((string) $submissionId);
'viewer_default_permission_set' => 'INSTRUCTOR', if ($submissionId === '') {
'similarity' => [ throw new Exception('submissionId required for viewer-url');
'default_mode' => 'MATCH_OVERVIEW', }
'view_settings' => ['save_changes' => true],
'modes' => ['match_overview' => true, 'all_sources' => true],
],
'locale' => 'en-US',
], $viewer);
return $this->request( $statusResp = $this->getSimilarityStatus($submissionId);
'POST', $st = strtoupper(trim((string) ($statusResp['status'] ?? '')));
'/submissions/' . urlencode($submissionId) . '/viewer-url', if ($st !== '' && $st !== 'COMPLETE') {
$body throw new Exception('similarity report not ready for viewer-url, status=' . $st);
); }
$path = '/submissions/' . rawurlencode($submissionId) . '/viewer-url';
$lastError = null;
foreach ($this->buildViewerUrlBodies($viewer) as $body) {
try {
return $this->request('POST', $path, $body);
} catch (Exception $e) {
$lastError = $e;
if (strpos($e->getMessage(), 'HTTP 400') === false) {
throw $e;
}
}
}
throw $lastError ?: new Exception('viewer-url failed');
}
/**
* 按优先级生成若干合法请求体(前者失败且为 400 时尝试后者)。
*
* @return array<int,array>
*/
private function buildViewerUrlBodies(array $viewerOverrides)
{
if (!empty($viewerOverrides) && isset($viewerOverrides['viewer_default_permission_set'])) {
$body = $viewerOverrides;
if (empty($body['viewer_user_id'])) {
$body['viewer_user_id'] = $this->resolveViewerUserId($viewerOverrides);
}
return [$body];
}
$locale = trim((string) Env::get('turnitin.viewer_locale', 'en-US')) ?: 'en-US';
$configured = trim((string) Env::get('turnitin.viewer_permission_set', ''));
$permissionSets = $configured !== ''
? array_map('trim', explode(',', $configured))
: $this->defaultViewerPermissionSets();
$viewerUserId = $this->resolveViewerUserId($viewerOverrides);
$saveChanges = $this->envBool('turnitin.viewer_save_changes', false);
$simModes = $this->defaultViewerSimilarityBlock();
$bodies = [];
foreach ($permissionSets as $perm) {
if ($perm === '') {
continue;
}
// TCA 认证要求:必须带 viewer_user_id此前缺失会导致 400 Bad request
$bodies[] = [
'viewer_user_id' => $viewerUserId,
'locale' => $locale,
'viewer_default_permission_set' => $perm,
'similarity' => [
'view_settings' => ['save_changes' => $saveChanges],
],
];
$bodies[] = [
'viewer_user_id' => $viewerUserId,
'locale' => $locale,
'viewer_default_permission_set' => $perm,
'similarity' => array_merge($simModes, [
'view_settings' => ['save_changes' => $saveChanges],
]),
];
$bodies[] = [
'viewer_user_id' => $viewerUserId,
'locale' => $locale,
'viewer_default_permission_set' => $perm,
];
}
return $bodies;
}
/**
* viewer-url 必填:与 createSubmission 的 owner/submitter 同一命名空间editor_{user_id})。
*/
public function resolveViewerUserId(array $opts = [])
{
if (!empty($opts['viewer_user_id'])) {
return trim((string) $opts['viewer_user_id']);
}
// 打开报告的人(当前编辑)须与申请 viewer-url 时一致,否则易出现 session 认证失败
$editorId = isset($opts['editor_id']) ? intval($opts['editor_id']) : 0;
if ($editorId > 0) {
return 'editor_' . $editorId;
}
$triggeredBy = isset($opts['triggered_by']) ? intval($opts['triggered_by']) : 0;
if ($triggeredBy > 0) {
return 'editor_' . $triggeredBy;
}
$custom = trim((string) Env::get('turnitin.viewer_user_id', ''));
if ($custom !== '') {
return $custom;
}
$name = trim((string) $this->integrationName);
return ($name !== '' ? $name : 'tmr') . '_viewer';
}
/**
* Crossref Similarity Check 通常不用 INSTRUCTOR按常见可用角色排序尝试。
*
* @return array<int,string>
*/
private function defaultViewerPermissionSets()
{
if (stripos($this->baseUrl, 'crossref') !== false) {
return ['ADMINISTRATOR', 'USER', 'EDITOR', 'INSTRUCTOR'];
}
return ['INSTRUCTOR', 'ADMINISTRATOR', 'USER'];
} }
/** /**
@@ -196,12 +623,13 @@ class TurnitinService
public function requestPdfReport($submissionId, $opts = []) public function requestPdfReport($submissionId, $opts = [])
{ {
$body = array_merge([ $body = array_merge([
'locale' => 'en-US', 'locale' => trim((string) Env::get('turnitin.viewer_locale', 'en-US')) ?: 'en-US',
'view_settings' => $this->defaultViewSettings(),
], $opts); ], $opts);
return $this->request( return $this->request(
'POST', 'POST',
'/submissions/' . urlencode($submissionId) . '/similarity/pdf', '/submissions/' . rawurlencode($submissionId) . '/similarity/pdf',
$body $body
); );
} }

View File

@@ -0,0 +1,463 @@
<?php
namespace app\common;
use think\Db;
use think\Env;
use think\Exception;
use think\Queue;
/**
* 根据投稿记录 / 审稿人资料,用大模型总结用户主领域(中文)写入 field_ai。
* 队列链UserFieldAiFill → 处理一条 → enqueueNextFieldAi → 下一条。
*/
class UserFieldAiService
{
const QUEUE_NAME = 'UserFieldAi';
const STATUS_PENDING = 0;
const STATUS_DONE = 1;
const STATUS_INSUFFICIENT = 2;
const STATUS_FAILED = 3;
private $logFile;
public function __construct()
{
$this->logFile = ROOT_PATH . 'runtime' . DS . 'user_field_ai.log';
}
/**
* 启动链式处理(从 user_id=0 之后找第一个待处理用户)。
*
* @param bool $force true 时重算已生成用户
* @return bool 是否已推入首条 job
*/
public function startChain($force = false, $delay = 1, $queue = '')
{
return $this->enqueueNextFieldAi($delay, $queue, 0, $force);
}
/**
* 链式:找 user_id > $afterUserId 的下一位待处理用户并入队。
*/
public function enqueueNextFieldAi($delay = 1, $queue = '', $afterUserId = 0, $force = false)
{
if ($queue === '') {
$queue = self::QUEUE_NAME;
}
$afterUserId = intval($afterUserId);
$userId = $this->findNextPendingUserId($afterUserId, $force);
if ($userId <= 0) {
$this->log('[FieldAi] chain finished after user_id=' . $afterUserId . ' force=' . ($force ? '1' : '0'));
return false;
}
$data = [
'user_id' => $userId,
'queue' => $queue,
'force' => $force ? 1 : 0,
];
$jobClass = 'app\\api\\job\\UserFieldAiFill@fire';
if ($delay > 0) {
Queue::later($delay, $jobClass, $data, $queue);
} else {
Queue::push($jobClass, $data, $queue);
}
$this->log('[FieldAi] enqueued user_id=' . $userId . ' queue=' . $queue);
return true;
}
/**
* 处理单个用户(队列 Job 或同步调试)。
*
* @return array{ok:bool, skipped?:bool, insufficient?:bool, field_ai?:string, error?:string}
*/
public function processUser($userId, $force = false)
{
$userId = intval($userId);
if ($userId <= 0) {
return ['ok' => false, 'error' => 'invalid user_id'];
}
$this->ensureReviewerInfoRow($userId);
$uri = Db::name('user_reviewer_info')->where('reviewer_id', $userId)->find();
if (!$uri) {
return ['ok' => false, 'error' => 'reviewer_info missing'];
}
if (!$force && intval($uri['field_ai_status']) === self::STATUS_DONE && trim((string)$uri['field_ai']) !== '') {
return ['ok' => true, 'skipped' => true, 'field_ai' => (string)$uri['field_ai']];
}
if (!$this->isEligible($userId, $uri)) {
$this->updateFieldAi($userId, '', self::STATUS_INSUFFICIENT, 'insufficient profile/articles');
return ['ok' => true, 'insufficient' => true];
}
try {
$context = $this->buildContext($userId, $uri);
$fieldAi = $this->summarizeWithLlm($context);
if ($fieldAi === '') {
throw new Exception('LLM returned empty field');
}
$this->updateFieldAi($userId, $fieldAi, self::STATUS_DONE, '');
return ['ok' => true, 'field_ai' => $fieldAi];
} catch (\Throwable $e) {
$this->updateFieldAi($userId, '', self::STATUS_FAILED, mb_substr($e->getMessage(), 0, 500));
$this->log('[FieldAi] user_id=' . $userId . ' fail: ' . $e->getMessage());
return ['ok' => false, 'error' => $e->getMessage()];
}
}
/**
* 是否满足「可总结」:有投稿 或 审稿人资料较全。
*/
public function isEligible($userId, $uri = null)
{
if ($this->hasSubmittedArticles($userId)) {
return true;
}
if ($uri === null) {
$uri = Db::name('user_reviewer_info')->where('reviewer_id', $userId)->find();
}
return $this->isReviewerProfileComplete($uri);
}
public function hasSubmittedArticles($userId)
{
$n = Db::name('article')
->where('user_id', intval($userId))
->where('title', '<>', '')
->count();
return $n > 0;
}
/**
* 审稿人资料字段填充数达到阈值视为「较全」。
*/
public function isReviewerProfileComplete($uri)
{
if (!$uri || !is_array($uri)) {
return false;
}
$minFilled = max(3, (int) Env::get('user_field_ai.min_profile_fields', 4));
$keys = ['field', 'company', 'country', 'technical', 'introduction', 'department', 'website'];
$filled = 0;
foreach ($keys as $k) {
if (!empty($uri[$k]) && trim((string)$uri[$k]) !== '') {
$filled++;
}
}
if (!empty($uri['major']) && trim((string)$uri['major']) !== '' && trim((string)$uri['major']) !== '0') {
$filled++;
}
$majorCount = Db::name('major_to_user')->where('user_id', intval($uri['reviewer_id']))->where('state', 0)->count();
if ($majorCount > 0) {
$filled++;
}
return $filled >= $minFilled;
}
private function findNextPendingUserId($afterUserId, $force)
{
$batch = max(20, (int) Env::get('user_field_ai.scan_batch', 80));
$cursor = intval($afterUserId);
while (true) {
$query = Db::name('user')->alias('u')
->join('t_user_reviewer_info uri', 'uri.reviewer_id = u.user_id',"left")
->where('u.user_id', '>', $cursor);
if (!$force) {
$query->where(function ($q) {
$q->where('uri.field_ai_status', self::STATUS_PENDING)
->whereOr('uri.field_ai_status', self::STATUS_FAILED)
->whereOr('uri.reviewer_info_id', 'null');
});
}
$ids = $query->order('u.user_id asc')->limit($batch)->column('u.user_id');
if (empty($ids)) {
return 0;
}
foreach ($ids as $uid) {
$uid = intval($uid);
$cursor = $uid;
$this->ensureReviewerInfoRow($uid);
$uri = Db::name('user_reviewer_info')->where('reviewer_id', $uid)->find();
if (!$force && intval($uri['field_ai_status']) === self::STATUS_DONE) {
continue;
}
if (!$force && intval($uri['field_ai_status']) === self::STATUS_INSUFFICIENT) {
continue;
}
if ($this->isEligible($uid, $uri)) {
return $uid;
}
if (!$force) {
$this->updateFieldAi($uid, '', self::STATUS_INSUFFICIENT, 'auto skip: insufficient data');
}
}
}
}
private function buildContext($userId, array $uri)
{
$user = Db::name('user')->where('user_id', $userId)->field('user_id,realname,email,account')->find();
$majorTitles = $this->resolveMajorTitles($userId, $uri);
$maxArticles = max(1, min(10, (int) Env::get('user_field_ai.max_articles', 5)));
$articles = Db::name('article')
->where('user_id', $userId)
->where('title', '<>', '')
->order('article_id desc')
->limit($maxArticles)
->field('article_id,title,keywords,abstrart,journal_id,ctime')
->select();
$journalNames = [];
if (!empty($articles)) {
$jids = array_unique(array_filter(array_column($articles, 'journal_id')));
if (!empty($jids)) {
$journalNames = Db::name('journal')->where('journal_id', 'in', $jids)->column('title', 'journal_id');
}
}
$articleBlocks = [];
foreach ($articles as $a) {
$jid = intval($a['journal_id']);
$articleBlocks[] = [
'title' => (string) $a['title'],
'journal' => isset($journalNames[$jid]) ? (string) $journalNames[$jid] : '',
'keywords' => (string) ($a['keywords'] ?? ''),
'abstract' => mb_substr(trim((string) ($a['abstrart'] ?? '')), 0, 800),
];
}
return [
'user' => [
'realname' => $user ? (string) $user['realname'] : '',
'email' => $user ? (string) $user['email'] : '',
],
'profile' => [
'field' => trim((string) ($uri['field'] ?? '')),
'technical' => trim((string) ($uri['technical'] ?? '')),
'company' => trim((string) ($uri['company'] ?? '')),
'department' => trim((string) ($uri['department'] ?? '')),
'country' => trim((string) ($uri['country'] ?? '')),
'introduction' => mb_substr(trim((string) ($uri['introduction'] ?? '')), 0, 1200),
'website' => trim((string) ($uri['website'] ?? '')),
'majors' => $majorTitles,
],
'articles' => $articleBlocks,
];
}
private function resolveMajorTitles($userId, array $uri)
{
$titles = [];
$ids = Db::name('major_to_user')->where('user_id', $userId)->where('state', 0)->column('major_id');
if (!empty($ids)) {
$titles = Db::name('reviewer_major')->where('major_id', 'in', $ids)->where('state', 0)->column('title');
}
if (empty($titles) && !empty($uri['major'])) {
$legacy = array_filter(array_map('intval', explode(',', (string) $uri['major'])));
if (!empty($legacy)) {
$titles = Db::name('reviewer_major')->where('major_id', 'in', $legacy)->column('title');
}
}
return array_values(array_unique(array_filter(array_map('trim', $titles))));
}
/**
* 解析 OpenAI 兼容 chat/completions 完整 URL。
* base.model_url 常为站点根(如 http://chat.taimed.cn直接 POST 会 404。
*/
private function resolveLlmChatUrl()
{
$candidates = [
// Env::get('user_field_ai.chat_url', ''),
// Env::get('promotion.promotion_llm_url', ''),
// Env::get('expert_country_chat_url', ''),
// Env::get('citation_chat_url', ''),
Env::get('base.model_url', ''),
];
foreach ($candidates as $u) {
$u = trim((string) $u);
if ($u === '') {
continue;
}
$normalized = $this->normalizeChatCompletionsUrl($u);
if ($normalized !== '') {
return $normalized;
}
}
return '';
}
private function normalizeChatCompletionsUrl($url)
{
$url = trim((string) $url);
if ($url === '') {
return '';
}
if (stripos($url, 'chat/completions') !== false) {
return $url;
}
return rtrim($url, '/') . '/v1/chat/completions';
}
private function resolveLlmModel()
{
$candidates = [
Env::get('user_field_ai.chat_model', ''),
Env::get('base.model', ''),
Env::get('promotion.promotion_llm_model', ''),
Env::get('expert_country_chat_model', ''),
Env::get('citation_chat_model', ''),
'gpt-4.1',
];
foreach ($candidates as $m) {
$m = trim((string) $m);
if ($m !== '' && strtolower($m) !== 'your-model-name') {
return $m;
}
}
return '';
}
private function summarizeWithLlm(array $context)
{
$url = $this->resolveLlmChatUrl();
$model = $this->resolveLlmModel();
$apiKey = trim((string) Env::get('user_field_ai.chat_api_key', Env::get('expert_country_chat_api_key', Env::get('citation_chat_api_key', ''))));
if ($url === '' || $model === '') {
throw new Exception('user_field_ai chat not configured (set user_field_ai.chat_url or promotion PROMOTION_LLM_URL / base.model_url)');
}
$payloadJson = json_encode($context, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES);
$messages = [
[
'role' => 'system',
'content' => '你是学术领域分类助手。根据用户的投稿与个人资料,用简体中文给出该用户最主要的研究领域总结。'
. '要求精确、简洁13 个中文领域词或短短语,用顿号分隔;不要解释、不要英文、不要 JSON 以外的多余文字。'
. '只输出 JSON{"field_ai":"..."}。',
],
[
'role' => 'user',
'content' => "请根据以下 JSON 资料总结该用户的主要研究领域:\n" . $payloadJson,
],
];
$body = [
'model' => $model,
'temperature' => 0.2,
'messages' => $messages,
];
$ch = curl_init();
curl_setopt_array($ch, [
CURLOPT_URL => $url,
CURLOPT_POST => true,
CURLOPT_POSTFIELDS => json_encode($body, JSON_UNESCAPED_UNICODE),
CURLOPT_RETURNTRANSFER => true,
CURLOPT_CONNECTTIMEOUT => 15,
CURLOPT_TIMEOUT => max(30, (int) Env::get('user_field_ai.timeout', 90)),
CURLOPT_HTTPHEADER => array_filter([
'Content-Type: application/json',
$apiKey !== '' ? 'Authorization: Bearer ' . $apiKey : null,
]),
]);
$raw = curl_exec($ch);
$code = (int) curl_getinfo($ch, CURLINFO_HTTP_CODE);
$err = curl_error($ch);
curl_close($ch);
if ($raw === false) {
throw new Exception('LLM curl error: ' . $err);
}
if ($code < 200 || $code >= 300) {
$hint = ($code === 404 && stripos($url, 'chat/completions') === false)
? ' (chat_url may be missing /v1/chat/completions)'
: '';
throw new Exception('LLM HTTP ' . $code . $hint . ': ' . mb_substr((string) $raw, 0, 400));
}
$data = json_decode($raw, true);
$content = '';
if (is_array($data) && isset($data['choices'][0]['message']['content'])) {
$content = trim((string) $data['choices'][0]['message']['content']);
} elseif (is_string($raw)) {
$content = trim($raw);
}
$fieldAi = $this->parseFieldAiFromContent($content);
if ($fieldAi === '' && $content !== '') {
$fieldAi = $this->cleanFieldAiText($content);
}
return $fieldAi;
}
private function parseFieldAiFromContent($content)
{
$content = trim((string) $content);
if ($content === '') {
return '';
}
$content = preg_replace('/^```[a-zA-Z]*\s*|```$/m', '', $content);
if (preg_match('/\{.*\}/s', $content, $m)) {
$obj = json_decode($m[0], true);
if (is_array($obj) && !empty($obj['field_ai'])) {
return $this->cleanFieldAiText((string) $obj['field_ai']);
}
}
$obj = json_decode($content, true);
if (is_array($obj) && !empty($obj['field_ai'])) {
return $this->cleanFieldAiText((string) $obj['field_ai']);
}
return '';
}
private function cleanFieldAiText($text)
{
$text = trim((string) $text);
$text = trim($text, "\"' \t\n\r");
$text = preg_replace('/\s+/u', '', $text);
if (mb_strlen($text) > 200) {
$text = mb_substr($text, 0, 200);
}
return $text;
}
public function ensureReviewerInfoRow($userId)
{
$exists = Db::name('user_reviewer_info')->where('reviewer_id', $userId)->find();
if ($exists) {
return;
}
Db::name('user_reviewer_info')->insert([
'reviewer_id' => $userId,
'state' => 0,
]);
}
private function updateFieldAi($userId, $fieldAi, $status, $note)
{
$data = [
'field_ai' => mb_substr(trim((string) $fieldAi), 0, 512),
'field_ai_status' => intval($status),
'field_ai_utime' => time(),
];
Db::name('user_reviewer_info')->where('reviewer_id', $userId)->update($data);
if ($note !== '') {
$this->log('[FieldAi] user_id=' . $userId . ' status=' . $status . ' note=' . $note);
}
}
public function log($msg)
{
$line = date('Y-m-d H:i:s') . ' ' . $msg . PHP_EOL;
@file_put_contents($this->logFile, $line, FILE_APPEND);
}
}

View File

@@ -0,0 +1,5 @@
-- 用户主领域 AI 总结(中文),由队列链式任务写入
ALTER TABLE `t_user_reviewer_info`
ADD COLUMN `field_ai` VARCHAR(512) NOT NULL DEFAULT '' COMMENT 'AI总结的主要研究领域(中文)' AFTER `field`,
ADD COLUMN `field_ai_status` TINYINT NOT NULL DEFAULT 0 COMMENT '0待处理 1已生成 2资料不足跳过 3失败' AFTER `field_ai`,
ADD COLUMN `field_ai_utime` INT NOT NULL DEFAULT 0 COMMENT 'field_ai 更新时间' AFTER `field_ai_status`;

View File

@@ -0,0 +1,4 @@
-- 查重类型:全文 full / 正文 body_only裁切题名、作者、参考文献后上传
ALTER TABLE `t_plagiarism_check`
ADD COLUMN `check_type` VARCHAR(16) NOT NULL DEFAULT 'full' COMMENT 'full=全文 body_only=仅正文' AFTER `trigger_source`,
ADD COLUMN `derived_file_path` VARCHAR(255) NOT NULL DEFAULT '' COMMENT 'body_only 时生成的临时稿件相对路径' AFTER `source_file_size`;