diff --git a/.env b/.env index 1cae3940..01c668b8 100644 --- a/.env +++ b/.env @@ -19,6 +19,16 @@ client_id = 616562 client_secret = CfMDrllyqBTFKrUkO2XaE7OmWTYqP3yd hmac = 8aU8WnITYhwaGTXH +[base] +model_url=http://chat.taimed.cn +model_url1=http://125.39.141.154:10002/v1/chat/completions +model=DeepSeek-Coder-V2-Instruct + +[user_field_ai] +; 留空则依次用 promotion PROMOTION_LLM_URL、citation 等;仅写根地址时会自动补 /v1/chat/completions +;chat_url=http://chat.taimed.cn/v1/chat/completions +;chat_model=DeepSeek-Coder-V2-Instruct + [promotion] PROMOTION_LLM_URL=http://chat.taimed.cn/v1/chat/completions PROMOTION_LLM_MODEL=DeepSeek-Coder-V2-Instruct @@ -33,6 +43,18 @@ UNSUBSCRIBE_BASE_URL=https://submission.tmrjournals.com/api/Unsubscribe/index [yboard] APPLY_URL="https://submission.tmrjournals.com/youthBoardRegister" +[turnitin] +viewer_permission_set=ADMINISTRATOR +viewer_locale=en-US +; viewer-url 必填 viewer_user_id;默认用查重记录的 triggered_by → editor_{id},也可写死: +;viewer_user_id=editor_1 +; 与 Crossref 网页手动查重对齐:三项默认 0(不排除引用/参考文献/引文)。若只要正文相似度可改为 1 +exclude_quotes=0 +exclude_bibliography=0 +exclude_citations=0 +; 在线报告默认视图:all_sources=按来源库分类(与 Crossref 手动后台一致);match_overview=匹配总览(文中编号易都显示为 1) +viewer_default_mode=all_sources + [plagiarism] static_root="/home/wwwroot/api.tmrjournals.com/public" diff --git a/application/api/controller/Article.php b/application/api/controller/Article.php index e47a0473..39572f85 100644 --- a/application/api/controller/Article.php +++ b/application/api/controller/Article.php @@ -598,6 +598,10 @@ class Article extends Base $article_res['is_draft'] = 1; } } + //新增是否存在生产实例 20260204 start + $article_res['has_produce'] = $this->production_article_obj->where('article_id', $data['articleId'])->where('state', 0)->find()?1:0; + + //新增是否是草稿删除 20260204 end return json(['article' => $article_res, 'msg' => $article_msg, 'authors' => $author_res, 'suggest' => $suggest, 'transfer' => $transfer_res, 'transinfo' => $transfer_info, "major" => $major,'suggest_final' => $aFinal]); } diff --git a/application/api/controller/Board.php b/application/api/controller/Board.php index 1f699cad..46bd55db 100644 --- a/application/api/controller/Board.php +++ b/application/api/controller/Board.php @@ -567,6 +567,8 @@ class Board extends Base { return jsonError($rule->getError()); } $check = $this->board_to_journal_obj->where('user_id',$data['user_id'])->where('state',0)->find(); + $journal_info = $this->journal_obj->where('journal_id',$data['journal_id'])->find(); + $user_info = $this->user_obj->where('user_id',$data['user_id'])->find(); if($check){ return jsonError("According to TMR Publishing Group Policy, scientists are not allowed to serve on the editorial board of more than one journal at the same time."); } @@ -578,6 +580,18 @@ class Board extends Base { $insert['board_group_id'] = $data['board_group_id']; $insert['research_areas'] = trim($data['research_areas']); $this->board_to_journal_obj->insert($insert); + + $reviewer_journal = $this->reviewer_to_journal_obj->where("reviewer_id",$user_info['user_id'])->where("journal_id",$journal_info['journal_id'])->find(); + if(!$reviewer_journal){ + $insert_reviewer['reviewer_id'] = $user_info['user_id']; + $insert_reviewer['journal_id'] = $journal_info['journal_id']; + $insert_reviewer['account'] = $user_info['account']; + $insert_reviewer['journal_title'] = $journal_info['title']; + $insert_reviewer['ctime'] = time(); + $this->reviewer_to_journal_obj->insert($insert_reviewer); + } + + return jsonSuccess([]); } diff --git a/application/api/controller/Email.php b/application/api/controller/Email.php index c1f08261..6b5a2d6a 100644 --- a/application/api/controller/Email.php +++ b/application/api/controller/Email.php @@ -115,7 +115,6 @@ class Email extends Base public function pushEmailOnTemplate() { - die(); $data = $this->request->post(); $rule = new Validate([ diff --git a/application/api/controller/EmailClient.php b/application/api/controller/EmailClient.php index 27091ab3..63841fc0 100644 --- a/application/api/controller/EmailClient.php +++ b/application/api/controller/EmailClient.php @@ -1406,8 +1406,8 @@ class EmailClient extends Base return jsonError('Factory is disabled'); } $expertType = intval($factory['expert_type']); - if (!in_array($expertType, [2, 3, 5], true)) { - return jsonError('Only expert_type=2(Editorial Board), 3(Young Editorial Board) or 5(Expert pool) is supported currently'); + if (!in_array($expertType, [2, 3, 4, 5, 6], true)) { + return jsonError('Unsupported expert_type; supported: 2=编委, 3=青年编委, 4=作者, 5=专家库, 6=往期青年编委'); } $journalId = intval($factory['journal_id']); @@ -2260,7 +2260,7 @@ class EmailClient extends Base * 每日自动生成推广任务(由 Linux crontab 调用) * * 逻辑: - * 1. 查询所有 state=0 的任务工厂(支持 expert_type=2 编委 / =5 expert 库;其他类型预留) + * 1. 查询所有 state=0 的任务工厂(支持 expert_type=2/3/4/5/6) * 2. JOIN journal 确认期刊有效(state=0, start_promotion=1) * 3. 按 factory_id + send_date 检查去重 * 4. template/style: 工厂 > 0 用工厂的,否则用期刊默认 @@ -2282,7 +2282,7 @@ class EmailClient extends Base ->alias('f') ->join('t_journal j', 'j.journal_id = f.journal_id', 'inner') ->where('f.state', 0) - ->where('f.expert_type', 'in', [2, 3, 5]) + ->where('f.expert_type', 'in', [2, 3, 4, 5, 6]) ->where('j.state', 0) ->where('f.start_promotion', 1) ->field('f.*, j.title as journal_title, j.default_template_id, j.default_style_id') @@ -2684,6 +2684,7 @@ class EmailClient extends Base 3 => 'Young Editorial Board', 4 => 'Author', 5 => 'Expert Pool', + 6 => 'Past Young Editorial Board', ]; return isset($map[intval($t)]) ? $map[intval($t)] : 'Unknown'; } @@ -2692,8 +2693,10 @@ class EmailClient extends Base * 根据 expert_type 分发选人逻辑 * * - expert_type = 5:从 t_expert 库选人(按领域 / 国家 / 频次) - * - expert_type ∈ {1,2,3,4}:从系统内部表选人(主编/编委/青年编委/作者),fields 与国家筛选忽略; - * 频次按 t_promotion_email_log 中相同 expert_type 维度的最近发送时间扣除 + * 频次:e.ltime(成功发送后回写)+ t_promotion_email_log 中「待发送 state=0 的入队时间 ctime」 + * (避免「今日生成任务明日发送」时 ltime 未变导致连续两天选到同一拨人) + * - expert_type ∈ {1,2,3,4,6}:从系统内部表选人(主编/编委/青年编委/作者/往期青年编委),fields 与国家筛选忽略; + * 频次按 t_promotion_email_log:已发/退信用 send_time;待发送队列用 ctime(同上) * * 返回行 shape 已对齐: * - type=5 行包含 e.* 全部字段(含 expert_id、country_id、ltime 等) @@ -2741,9 +2744,25 @@ class EmailClient extends Base if ($noRepeatDays > 0) { $cutoff = time() - ($noRepeatDays * 86400); + // ltime:成功发出后回写;与 log 中 state=1 在「已送达」上部分重叠,但保留 ltime 可走索引、且退信 state=3 未必回写 ltime。 $query->where(function ($q) use ($cutoff) { $q->where('e.ltime', 0)->whereOr('e.ltime', '<', $cutoff); }); + // 一条 NOT EXISTS:待发(state=0 按 ctime) 或 已发/退信(按 send_time),避免两段相同 join 的重复感 + $query->where(function ($q) use ($cutoff) { + $q->table('t_promotion_email_log')->alias('pl') + ->join('t_promotion_task pt', 'pt.task_id = pl.task_id', 'inner') + ->where('pt.expert_type', 5) + ->where('pl.expert_id', '>', 0) + ->whereRaw('pl.expert_id = e.expert_id') + ->where(function ($w) use ($cutoff) { + $w->where(function ($a) use ($cutoff) { + $a->where('pl.state', 0)->where('pl.ctime', '>', $cutoff); + })->whereOr(function ($b) use ($cutoff) { + $b->where('pl.state', 'in', [1, 3])->where('pl.send_time', '>', $cutoff); + }); + }); + }, 'not exists'); } $countryIds = $this->resolveCountryIds($targetPartitions, $targetCountryIds); @@ -2762,9 +2781,9 @@ class EmailClient extends Base * 系统内部受众选人(编委 / 主编 / 青年编委 / 作者) * 仅按 期刊 + 频次 过滤;领域 / 国家无关 * - * 频次:扣除「同 expert_type 维度下,no_repeat_days 内已经发出 (state=1) 或退信 (state=3) 的人」 + * 频次:扣除「同 expert_type 下,no_repeat_days 内 (1) 已发出或退信,或 (2) 仍在队列待发送(state=0,按 ctime)」的人 * - * @param int $expertType 1=主编 2=编委 3=青年编委 4=作者 + * @param int $expertType 1=主编 2=编委 3=青年编委 4=作者 6=往期青年编委 * @param int $journalId * @param int $noRepeatDays * @param int $limit @@ -2799,23 +2818,52 @@ class EmailClient extends Base break; case 1: // 主编(预留,本期不实现) - case 4: // 作者(预留) + return []; + + case 4: // 作者:该刊投稿作者(按邮箱关联 t_user) + $query = Db::name('article_author')->alias('aa') + ->join('t_user u', 'u.email = aa.email', 'inner') + ->join('t_article a', 'a.article_id = aa.article_id', 'inner') + ->join('t_user_reviewer_info uri', 'uri.reviewer_id = u.user_id', 'left') + ->where('a.journal_id', $journalId) + ->where('u.email', '<>', '') + ->where('u.unsubscribed', 0); + break; + case 6: //获取往期的青年编委2025年以前的,中国人 + $now = strtotime('2025-01-01'); + $query = Db::name('user_to_yboard')->alias('y') + ->join('t_user u', 'u.user_id = y.user_id', 'inner') + ->join('t_user_reviewer_info uri', 'uri.reviewer_id = u.user_id', 'left') + ->where('y.journal_id', $journalId) + ->where('y.state', 0) + ->where('y.start_date', '<=', $now) + ->where('uri.country', 'China') + ->where('u.email', '<>', '') + ->where('u.unsubscribed', 0); + break;// default: return []; } + if (!isset($query)) { + return []; + } + if ($noRepeatDays > 0) { $cutoff = intval(time() - ($noRepeatDays * 86400)); $expertTypeSafe = intval($expertType); - // 关联子查询:相对于 NOT IN,避免把全部已发 user_id 拉到 PHP 再拼回 SQL; - // 配合 t_promotion_email_log(user_id, send_time) 复合索引做半连接探针,常量时间。 $query->where(function ($q) use ($expertTypeSafe, $cutoff) { $q->table('t_promotion_email_log')->alias('l') - ->join('t_promotion_task t', 't.task_id = l.task_id', 'inner') - ->where('t.expert_type', $expertTypeSafe) - ->where('l.state', 'in', [1, 3]) - ->where('l.send_time', '>', $cutoff) - ->whereRaw('l.user_id = u.user_id'); + ->join('t_promotion_task t', 't.task_id = l.task_id', 'inner') + ->where('t.expert_type', $expertTypeSafe) + ->whereRaw('l.user_id = u.user_id') + ->where(function ($w) use ($cutoff) { + $w->where(function ($a) use ($cutoff) { + $a->where('l.state', 0)->where('l.ctime', '>', $cutoff); + })->whereOr(function ($b) use ($cutoff) { + $b->where('l.state', 'in', [1, 3])->where('l.send_time', '>', $cutoff); + }); + }); }, 'not exists'); } diff --git a/application/api/controller/Plagiarism.php b/application/api/controller/Plagiarism.php index febfbde1..9d0020b7 100644 --- a/application/api/controller/Plagiarism.php +++ b/application/api/controller/Plagiarism.php @@ -2,15 +2,17 @@ namespace app\api\controller; +use app\common\TurnitinService; use think\Db; use think\Response; use app\common\PlagiarismService; +use think\Validate; /** * 论文查重(Turnitin / Crossref Similarity Check)控制器。 * * 触发方式:纯手工(编辑后台点"查重"按钮)。 - * 报告策略:在线 viewer URL 临时签名 + PDF 永久落盘 runtime/plagiarism/。 + * 报告策略:PDF 在 poll 完成时落盘;在线 viewer URL 通过 getReportUrl 按需生成(临时签名)。 * * 主要接口: * POST submit 触发查重 @@ -35,12 +37,14 @@ class Plagiarism extends Base * article_id 必填 * file_url 选填;不传则按 article_id 在 t_article_file 找 manuscirpt * editor_id 选填;触发人 user_id(前端拿不到也可以传 0) + * check_type 选填;full(默认全文)| body_only(正文)| both(各提交一条) */ public function submit() { $articleId = intval($this->request->param('article_id', 0)); $fileUrl = trim($this->request->param('file_url', '')); $editorId = intval($this->request->param('editor_id', 0)); + $checkType = trim($this->request->param('check_type', 'full')); if ($articleId <= 0) { return jsonError('article_id required'); @@ -51,21 +55,79 @@ class Plagiarism extends Base $localPath = $fileUrl !== '' ? $svc->resolveFileUrlToLocal($fileUrl) : $svc->locateArticleManuscript($articleId); - echo $localPath; - $checkId = $svc->submit($articleId, $localPath, $editorId, 'manual'); - return jsonSuccess(['check_id' => $checkId]); + if (strtolower($checkType) === 'both') { + $ids = $svc->submitBoth($articleId, $localPath, $editorId, 'manual'); + return jsonSuccess($ids); + } + $checkId = $svc->submit($articleId, $localPath, $editorId, 'manual', $checkType); + return jsonSuccess(['check_id' => $checkId, 'check_type' => strtolower($checkType) ?: 'full']); } catch (\Throwable $e) { return jsonError($e->getMessage()); } } - public function testccone(){ + /** + * 调试:与线上一致走队列链(upload → wait ingest → trigger → poll),需 worker 消费 plagiarism 队列。 + */ + public function testccone() + { $svc = new PlagiarismService(); $checkId = 9; - $filePath = "/home/wwwroot/api.tmrjournals.com/public/manuscirpt/20260509/6832a56e8ace38fe99df390ab5221deb.docx"; - $svc->runUploadAndTrigger($checkId,$filePath); + $filePath = '/home/wwwroot/api.tmrjournals.com/public/manuscirpt/20260509/6832a56e8ace38fe99df390ab5221deb.docx'; + $svc->runUploadOnly($checkId, $filePath); + } + public function testcconegetstatus(){ + $data = $this->request->post(); + $rule = new Validate([ + "id"=>"require" + ]); + if(!$rule->check($data)){ + return jsonError($rule->getError()); + } + $tii = new TurnitinService(); + $res = $tii->parseSubmissionIngestState($data['id']); + return jsonSuccess($res); + } + + public function testcconewait(){ + $data = $this->request->post(); + $rule = new Validate([ + "checkId"=>"require" + ]); + if(!$rule->check($data)){ + return jsonError($rule->getError()); + } + $svc = new PlagiarismService(); + $res = $svc->runIngestPollStep($data['checkId']); + return jsonSuccess($res); + } + + public function testcconesimilar(){ + $data = $this->request->post(); + $rule = new Validate([ + "checkId"=>"require" + ]); + if(!$rule->check($data)){ + return jsonError($rule->getError()); + } + $svc = new PlagiarismService(); + $res = $svc->runTriggerSimilarityOnly($data['checkId']); + return jsonSuccess($res); + } + + public function testcconelast(){ + $data = $this->request->post(); + $rule = new Validate([ + "checkId"=>"require" + ]); + if(!$rule->check($data)){ + return jsonError($rule->getError()); + } + $svc = new PlagiarismService(); + $re = $svc->runPollStatus($data['checkId']); + return jsonSuccess($re); } /** @@ -89,7 +151,15 @@ class Plagiarism extends Base if (!$row) { return jsonError('not found'); } - return jsonSuccess($this->formatRow($row)); + $out = $this->formatRow($row); + if (!empty($row['raw_response'])) { + $raw = json_decode($row['raw_response'], true); + if (is_array($raw)) { + $out['similarity_meta'] = \app\common\TurnitinService::parseSimilarityReportMeta($raw); + } + } + $out['report_view_hint'] = 'PDF 多为 Match Overview 汇总样式;按来源库(Internet/Publication/Crossref)分类请用 getReportUrl 打开在线报告并切到 All Sources'; + return jsonSuccess($out); } /** @@ -113,11 +183,18 @@ class Plagiarism extends Base } /** - * 取在线查看 URL;过期则自动刷新 + * 取在线查看 URL(Turnitin 一次性会话链接,关闭报告页后勿复用旧 URL) + * + * 入参: + * check_id 必填 + * editor_id 选填,当前打开报告的编辑 user_id(与 viewer_user_id 对应,避免 session 认证失败) + * reuse 选填,1=在未过期时复用库内缓存;默认 0,每次调用重新向 Turnitin 申请 */ public function getReportUrl() { $checkId = intval($this->request->param('check_id', 0)); + $editorId = intval($this->request->param('editor_id', 0)); + $reuse = intval($this->request->param('reuse', 0)) === 1; if ($checkId <= 0) { return jsonError('check_id required'); } @@ -129,22 +206,48 @@ class Plagiarism extends Base if ($row['state'] != 3) { return jsonError('check not completed yet, state=' . $row['state']); } - $needRefresh = empty($row['view_only_url']) + $viewerContext = []; + if ($editorId > 0) { + $viewerContext['editor_id'] = $editorId; + } + $needRefresh = !$reuse + || empty($row['view_only_url']) || intval($row['view_only_url_expire']) < time() + 60; + $usageHint = '每次打开请先调用本接口获取新链接;勿收藏或再次打开旧链接。请在新标签页打开,并允许 Turnitin 域名 Cookie。'; + if ($needRefresh) { $svc = new PlagiarismService(); - $info = $svc->refreshViewerUrlFor($checkId); + $info = $svc->refreshViewerUrlFor($checkId, $viewerContext); + if ($info['url'] === '') { + return jsonError('Turnitin returned empty viewer_url'); + } return jsonSuccess([ - 'view_only_url' => $info['url'], - 'expire' => $info['expire'], + 'view_only_url' => $info['url'], + 'expire' => $info['expire'], + 'has_pdf' => !empty($info['local_pdf']), + 'viewer_user_id' => $info['viewer_user_id'], + 'refreshed' => true, + 'usage_hint' => $usageHint, ]); } return jsonSuccess([ - 'view_only_url' => $row['view_only_url'], - 'expire' => intval($row['view_only_url_expire']), + 'view_only_url' => $row['view_only_url'], + 'expire' => intval($row['view_only_url_expire']), + 'has_pdf' => !empty($row['pdf_local_path']), + 'refreshed' => false, + 'usage_hint' => $usageHint, ]); } catch (\Throwable $e) { + if (!empty($row['pdf_local_path'])) { + return jsonSuccess([ + 'view_only_url' => '', + 'expire' => 0, + 'has_pdf' => true, + 'viewer_error' => $e->getMessage(), + 'hint' => '在线报告暂不可用,请使用 downloadReport 下载 PDF', + ]); + } return jsonError($e->getMessage()); } } @@ -201,10 +304,14 @@ class Plagiarism extends Base 'similarity_score' => floatval($r['similarity_score']), 'tii_report_status' => (string)$r['tii_report_status'], 'has_pdf' => !empty($r['pdf_local_path']), + 'local_pdf_url' => $r['pdf_local_path'], 'has_viewer_url' => !empty($r['view_only_url']) && intval($r['view_only_url_expire']) > time(), 'attempts' => intval($r['attempts']), 'error_msg' => (string)$r['error_msg'], 'source_file_name' => (string)$r['source_file_name'], + 'check_type' => (string)($r['check_type'] ?? 'full'), + 'check_type_label' => $this->checkTypeLabel($r['check_type'] ?? 'full'), + 'derived_file_path'=> (string)($r['derived_file_path'] ?? ''), 'trigger_source' => (string)$r['trigger_source'], 'triggered_by' => intval($r['triggered_by']), 'ctime' => intval($r['ctime']), @@ -212,6 +319,15 @@ class Plagiarism extends Base ]; } + private function checkTypeLabel($checkType) + { + $t = strtolower(trim((string) $checkType)); + if ($t === 'body_only' || $t === 'body') { + return '正文查重'; + } + return '全文查重'; + } + private function stateLabel($state) { $map = [ diff --git a/application/api/controller/References.php b/application/api/controller/References.php index 47ae2328..3f9a32b2 100644 --- a/application/api/controller/References.php +++ b/application/api/controller/References.php @@ -1030,6 +1030,7 @@ class References extends Base * AI检测 */ public function checkByAi($aParam = []){ + return jsonError("service is stop!"); //获取参数 $aParam = empty($aParam) ? $this->request->post() : $aParam; diff --git a/application/api/controller/Reviewer.php b/application/api/controller/Reviewer.php index fc2cbcc3..a031a419 100644 --- a/application/api/controller/Reviewer.php +++ b/application/api/controller/Reviewer.php @@ -2299,14 +2299,14 @@ class Reviewer extends Base ->count(); if(empty($count)){ - return jsonSuccess(['reviewers' => [],'count' => 0]); + return jsonSuccess(['reviewers' => [],'count' => 0,"sql"=>$this->reviewer_to_journal_obj->getLastSql()]); } //获取数据 $list = $this->reviewer_to_journal_obj ->join("t_user", "t_user.user_id = t_reviewer_to_journal.reviewer_id", "left") ->join("t_user_reviewer_info", "t_user_reviewer_info.reviewer_id = t_reviewer_to_journal.reviewer_id", "left") - ->field('t_user.account,t_user.email,t_user.realname,t_user_reviewer_info.company,t_user_reviewer_info.field,t_user.user_id,t_user.rs_num') + ->field('t_user.account,t_user.email,t_user.realname,t_user_reviewer_info.company,t_user_reviewer_info.field,t_user_reviewer_info.last_invite_time,t_user.user_id,t_user.rs_num') ->where($where)->where(function($query) use ($iTeenDaysLater) { $query->where('t_user_reviewer_info.last_invite_time', '<', $iTeenDaysLater) ->whereOr('t_user_reviewer_info.last_invite_time', '=', 0); diff --git a/application/api/controller/User.php b/application/api/controller/User.php index 25fc120f..27a9a750 100644 --- a/application/api/controller/User.php +++ b/application/api/controller/User.php @@ -214,6 +214,21 @@ class User extends Base $insert['ctime'] = time(); $this->user_to_yboard_obj->insert($insert); + //将此人添加到审稿人中 + $reviewer_journal = $this->reviewer_to_journal_obj->where("reviewer_id",$user_info['user_id'])->where("journal_id",$journal_info['journal_id'])->find(); + if($reviewer_journal){ + $this->reviewer_to_journal_obj->where("rtj_id",$reviewer_journal['rtj_id'])->update(['is_yboard'=>1]); + }else{ + $insert_reviewer['reviewer_id'] = $user_info['user_id']; + $insert_reviewer['journal_id'] = $journal_info['journal_id']; + $insert_reviewer['account'] = $user_info['account']; + $insert_reviewer['journal_title'] = $journal_info['title']; + $insert_reviewer['is_yboard'] = 1; + $insert_reviewer['ctime'] = time(); + $this->reviewer_to_journal_obj->insert($insert_reviewer); + } + + //发送通知邮件给用户 $tt = 'Dear Dr. ' . ($user_info['realname'] == '' ? $user_info['account'] : $user_info['realname']) . ',

'; $tt .= "Thanks for your support to the journal ".$journal_info['title'].", Please note that your account of ".$journal_info['title']." has been created. The login credentials in the system is as below:

"; diff --git a/application/api/controller/UserFieldAi.php b/application/api/controller/UserFieldAi.php new file mode 100644 index 00000000..78680155 --- /dev/null +++ b/application/api/controller/UserFieldAi.php @@ -0,0 +1,92 @@ +request->param('force', 0)) === 1; + $delay = max(0, intval($this->request->param('delay', 1))); + + $svc = new UserFieldAiService(); + $started = $svc->startChain($force, $delay); + + return jsonSuccess([ + 'started' => $started, + 'queue' => UserFieldAiService::QUEUE_NAME, + 'force' => $force, + 'msg' => $started ? 'chain enqueued' : 'no pending users', + ]); + } + + /** + * 同步处理单个用户(不调队列)。 + */ + public function processOne() + { + $userId = intval($this->request->param('user_id', 0)); + $force = intval($this->request->param('force', 0)) === 1; + if ($userId <= 0) { + return jsonError('user_id required'); + } + + $svc = new UserFieldAiService(); + $result = $svc->processUser($userId, $force); + if (empty($result['ok'])) { + return jsonError(isset($result['error']) ? $result['error'] : 'failed'); + } + return jsonSuccess($result); + } + + /** + * 预览:是否满足条件、当前 field_ai 状态。 + */ + public function preview() + { + $userId = intval($this->request->param('user_id', 0)); + if ($userId <= 0) { + return jsonError('user_id required'); + } + + $svc = new UserFieldAiService(); + $svc->ensureReviewerInfoRow($userId); + $uri = Db::name('user_reviewer_info')->where('reviewer_id', $userId)->find(); + + return jsonSuccess([ + 'user_id' => $userId, + 'has_articles' => $svc->hasSubmittedArticles($userId), + 'profile_complete' => $svc->isReviewerProfileComplete($uri), + 'eligible' => $svc->isEligible($userId, $uri), + 'field_ai' => $uri ? (string) $uri['field_ai'] : '', + 'field_ai_status' => $uri ? intval($uri['field_ai_status']) : 0, + 'field_ai_utime' => $uri ? intval($uri['field_ai_utime']) : 0, + 'field_ai_status_text' => $this->statusLabel($uri ? intval($uri['field_ai_status']) : 0), + ]); + } + + private function statusLabel($status) + { + $map = [ + UserFieldAiService::STATUS_PENDING => 'pending', + UserFieldAiService::STATUS_DONE => 'done', + UserFieldAiService::STATUS_INSUFFICIENT => 'insufficient', + UserFieldAiService::STATUS_FAILED => 'failed', + ]; + return isset($map[$status]) ? $map[$status] : 'unknown'; + } +} diff --git a/application/api/job/AiCheckReferByDoi.php b/application/api/job/AiCheckReferByDoi.php new file mode 100644 index 00000000..750b6374 --- /dev/null +++ b/application/api/job/AiCheckReferByDoi.php @@ -0,0 +1,85 @@ +oQueueJob = new QueueJob; + $this->QueueRedis = QueueRedis::getInstance(); + } + + public function fire(Job $job, $data) + { + //任务开始判断 + $this->oQueueJob->init($job); + + // 获取 Redis 任务的原始数据 + $rawBody = empty($job->getRawBody()) ? '' : $job->getRawBody(); + $jobData = empty($rawBody) ? [] : json_decode($rawBody, true); + $jobId = empty($jobData['id']) ? 'unknown' : $jobData['id']; + + $this->oQueueJob->log("-----------队列任务开始-----------"); + $this->oQueueJob->log("当前任务ID: {$jobId}, 尝试次数: {$job->attempts()}"); + + + // 获取生产文章ID + $iPArticleId = empty($data['p_article_id']) ? 0 : $data['p_article_id']; + if (empty($iPArticleId)) { + $this->oQueueJob->log("无效的p_article_id,删除任务"); + $job->delete(); + return; + } + // 获取参考文献ID + $iPReferId = empty($data['p_refer_id']) ? 0 : $data['p_refer_id']; + if (empty($iPArticleId)) { + $this->oQueueJob->log("无效的p_article_id,删除任务"); + $job->delete(); + return; + } + try { + + // 生成Redis键并尝试获取锁 + $sClassName = get_class($this); + $sRedisKey = "queue_job:{$sClassName}:{$iPArticleId}:{$iPReferId}"; + $sRedisValue = uniqid() . '_' . getmypid(); + if (!$this->oQueueJob->acquireLock($sRedisKey, $sRedisValue, $job)) { + return; // 未获取到锁,已处理 + } + + //生成内容 + $oProductionArticleRefer = new \app\api\controller\References; + $response = $oProductionArticleRefer->getCheckByAiResult($data); + // 验证API响应 + if (empty($response)) { + throw new \RuntimeException("OpenAI API返回空结果"); + } + // 检查JSON解析错误 + $aResult = json_decode($response, true); + if (json_last_error() !== JSON_ERROR_NONE) { + throw new \RuntimeException("解析OpenAI响应失败: " . json_last_error_msg() . " | 原始响应: {$response}"); + } + $sMsg = empty($aResult['msg']) ? 'success' : $aResult['msg']; + //更新完成标识 + $this->QueueRedis->finishJob($sRedisKey, 'completed', $this->completedExprie,$sRedisValue); + $job->delete(); + $this->oQueueJob->log("任务执行成功 | 日志ID: {$sRedisKey} | 执行日志:{$sMsg}"); + + } catch (\RuntimeException $e) { + $this->oQueueJob->handleRetryableException($e,$sRedisKey,$sRedisValue, $job); + } catch (\LogicException $e) { + $this->oQueueJob->handleNonRetryableException($e,$sRedisKey,$sRedisValue, $job); + } catch (\Exception $e) { + $this->oQueueJob->handleRetryableException($e,$sRedisKey,$sRedisValue, $job); + } finally { + $this->oQueueJob->finnal(); + } + } +} \ No newline at end of file diff --git a/application/api/job/ArticleReferDetailQueue.php b/application/api/job/ArticleReferDetailQueue.php new file mode 100644 index 00000000..12190846 --- /dev/null +++ b/application/api/job/ArticleReferDetailQueue.php @@ -0,0 +1,92 @@ +oQueueJob = new QueueJob; + $this->QueueRedis = QueueRedis::getInstance(); + } + + public function fire(Job $job, $data) + { + //任务开始判断 + $this->oQueueJob->init($job); + + // 获取 Redis 任务的原始数据 + $rawBody = empty($job->getRawBody()) ? '' : $job->getRawBody(); + $jobData = empty($rawBody) ? [] : json_decode($rawBody, true); + $jobId = empty($jobData['id']) ? 'unknown' : $jobData['id']; + + $this->oQueueJob->log("-----------队列任务开始-----------"); + $this->oQueueJob->log("当前任务ID: {$jobId}, 尝试次数: {$job->attempts()}"); + + // // 获取文章ID + // $iArticleId = empty($data['article_id']) ? 0 : $data['article_id']; + // if (empty($iArticleId)) { + // $this->oQueueJob->log("无效的article_id,删除任务"); + // $job->delete(); + // return; + // } + // 获取生产文章ID + $iPArticleId = empty($data['p_article_id']) ? 0 : $data['p_article_id']; + if (empty($iPArticleId)) { + $this->oQueueJob->log("无效的p_article_id,删除任务"); + $job->delete(); + return; + } + // 获取生产文章ID + $iPReferId = empty($data['p_refer_id']) ? 0 : $data['p_refer_id']; + if (empty($iPReferId)) { + $this->oQueueJob->log("无效的p_refer_id,删除任务"); + $job->delete(); + return; + } + try { + + // 生成Redis键并尝试获取锁 + $sClassName = get_class($this); + $sRedisKey = "queue_job:{$sClassName}:{$iPArticleId}:{$iPReferId}"; + $sRedisValue = uniqid() . '_' . getmypid(); + if (!$this->oQueueJob->acquireLock($sRedisKey, $sRedisValue, $job)) { + return; // 未获取到锁,已处理 + } + + //生成内容 + $oProductionArticleRefer = new ProductionArticleRefer; + $response = $oProductionArticleRefer->get($data); + // 验证API响应 + if (empty($response)) { + throw new \RuntimeException("返回空结果"); + } + // 检查JSON解析错误 + $aResult = json_decode($response, true); + if (json_last_error() !== JSON_ERROR_NONE) { + throw new \RuntimeException("解析响应失败: " . json_last_error_msg() . " | 原始响应: {$response}"); + } + $sMsg = empty($aResult['msg']) ? 'success' : $aResult['msg']; + //更新完成标识 + $this->QueueRedis->finishJob($sRedisKey, 'completed', $this->completedExprie,$sRedisValue); + $job->delete(); + $this->oQueueJob->log("任务执行成功 | 日志ID: {$sRedisKey} | 执行日志:{$sMsg}"); + + } catch (\RuntimeException $e) { + $this->oQueueJob->handleRetryableException($e,$sRedisKey,$sRedisValue, $job); + } catch (\LogicException $e) { + $this->oQueueJob->handleNonRetryableException($e,$sRedisKey,$sRedisValue, $job); + } catch (\Exception $e) { + $this->oQueueJob->handleRetryableException($e,$sRedisKey,$sRedisValue, $job); + } finally { + $this->oQueueJob->finnal(); + } + } +} \ No newline at end of file diff --git a/application/api/job/ArticleReferQueue.php b/application/api/job/ArticleReferQueue.php new file mode 100644 index 00000000..e35ecc5a --- /dev/null +++ b/application/api/job/ArticleReferQueue.php @@ -0,0 +1,85 @@ +oQueueJob = new QueueJob; + $this->QueueRedis = QueueRedis::getInstance(); + } + + public function fire(Job $job, $data) + { + //任务开始判断 + $this->oQueueJob->init($job); + + // 获取 Redis 任务的原始数据 + $rawBody = empty($job->getRawBody()) ? '' : $job->getRawBody(); + $jobData = empty($rawBody) ? [] : json_decode($rawBody, true); + $jobId = empty($jobData['id']) ? 'unknown' : $jobData['id']; + + $this->oQueueJob->log("-----------队列任务开始-----------"); + $this->oQueueJob->log("当前任务ID: {$jobId}, 尝试次数: {$job->attempts()}"); + + // 获取文章ID + $iArticleId = empty($data['article_id']) ? 0 : $data['article_id']; + if (empty($iArticleId)) { + $this->oQueueJob->log("无效的article_id,删除任务"); + $job->delete(); + return; + } + // 获取生产文章ID + $iPArticleId = empty($data['p_article_id']) ? 0 : $data['p_article_id']; + if (empty($iPArticleId)) { + $this->oQueueJob->log("无效的p_article_id,删除任务"); + $job->delete(); + return; + } + try { + + // 生成Redis键并尝试获取锁 + $sClassName = get_class($this); + $sRedisKey = "queue_job:{$sClassName}:{$iArticleId}:{$iPArticleId}"; + $sRedisValue = uniqid() . '_' . getmypid(); + if (!$this->oQueueJob->acquireLock($sRedisKey, $sRedisValue, $job)) { + return; // 未获取到锁,已处理 + } + + //生成内容 + $oProductionArticleRefer = new ProductionArticleRefer; + $response = $oProductionArticleRefer->top($data); + // 验证API响应 + if (empty($response)) { + throw new \RuntimeException("OpenAI API返回空结果"); + } + // 检查JSON解析错误 + $aResult = json_decode($response, true); + if (json_last_error() !== JSON_ERROR_NONE) { + throw new \RuntimeException("解析OpenAI响应失败: " . json_last_error_msg() . " | 原始响应: {$response}"); + } + $sMsg = empty($aResult['msg']) ? 'success' : $aResult['msg']; + //更新完成标识 + $this->QueueRedis->finishJob($sRedisKey, 'completed', $this->completedExprie,$sRedisValue); + $job->delete(); + $this->oQueueJob->log("任务执行成功 | 日志ID: {$sRedisKey} | 执行日志:{$sMsg}"); + + } catch (\RuntimeException $e) { + $this->oQueueJob->handleRetryableException($e,$sRedisKey,$sRedisValue, $job); + } catch (\LogicException $e) { + $this->oQueueJob->handleNonRetryableException($e,$sRedisKey,$sRedisValue, $job); + } catch (\Exception $e) { + $this->oQueueJob->handleRetryableException($e,$sRedisKey,$sRedisValue, $job); + } finally { + $this->oQueueJob->finnal(); + } + } +} \ No newline at end of file diff --git a/application/api/job/PlagiarismPoll.php b/application/api/job/PlagiarismPoll.php index 560922e3..f4e1b214 100644 --- a/application/api/job/PlagiarismPoll.php +++ b/application/api/job/PlagiarismPoll.php @@ -31,6 +31,7 @@ class PlagiarismPoll return; } $svc = new PlagiarismService(); + $svc->log("PlagiarismPoll job is running"); $svc->runPollStatus($checkId, $attempt); $job->delete(); } diff --git a/application/api/job/PlagiarismRun.php b/application/api/job/PlagiarismRun.php index 74d18d79..757935e3 100644 --- a/application/api/job/PlagiarismRun.php +++ b/application/api/job/PlagiarismRun.php @@ -6,9 +6,9 @@ use think\queue\Job; use app\common\PlagiarismService; /** - * 队列任务:上传论文到 Turnitin + 触发 similarity 检测。 + * 队列任务:创建 Turnitin submission 并上传原稿;ingest 轮询与触发 similarity 由后续 Job 完成。 * - * 完成后会自动入队 PlagiarismPoll 进行后续轮询。 + * 链:PlagiarismRun → PlagiarismWaitIngest → PlagiarismTriggerSimilarity → PlagiarismPoll * * data: * - check_id t_plagiarism_check.check_id @@ -29,8 +29,12 @@ class PlagiarismRun return; } $svc = new PlagiarismService(); - $svc->log("PlagiarismRun job act!!"); - $svc->runUploadAndTrigger($checkId, $filePath); + $svc->log('PlagiarismRun job is running'); + try { + $svc->runUploadOnly($checkId, $filePath); + } catch (\Throwable $e) { + $svc->markFailed($checkId, '[upload] ' . $e->getMessage()); + } $job->delete(); } } diff --git a/application/api/job/PlagiarismTriggerSimilarity.php b/application/api/job/PlagiarismTriggerSimilarity.php new file mode 100644 index 00000000..f06f65ad --- /dev/null +++ b/application/api/job/PlagiarismTriggerSimilarity.php @@ -0,0 +1,34 @@ +delete(); + return; + } + $svc = new PlagiarismService(); + $svc->log("PlagiarismTriggerSimilarity job is running"); + try { + $svc->runTriggerSimilarityOnly($checkId, $ingestAttempt); + } catch (\Throwable $e) { + $svc->markFailed($checkId, '[similarity] ' . $e->getMessage()); + } + $job->delete(); + } +} diff --git a/application/api/job/PlagiarismWaitIngest.php b/application/api/job/PlagiarismWaitIngest.php new file mode 100644 index 00000000..c47f3830 --- /dev/null +++ b/application/api/job/PlagiarismWaitIngest.php @@ -0,0 +1,34 @@ +delete(); + return; + } + $svc = new PlagiarismService(); + $svc->log("PlagiarismWaitIngest job is running"); + try { + $svc->runIngestPollStep($checkId, $attempt); + } catch (\Throwable $e) { + $svc->markFailed($checkId, '[ingest] ' . $e->getMessage()); + } + $job->delete(); + } +} diff --git a/application/api/job/ReminderEmailToReviewer.php b/application/api/job/ReminderEmailToReviewer.php new file mode 100644 index 00000000..312bc8ac --- /dev/null +++ b/application/api/job/ReminderEmailToReviewer.php @@ -0,0 +1,101 @@ +oQueueJob = new QueueJob; + $this->QueueRedis = QueueRedis::getInstance(); + } + + public function fire(Job $job, $data) + { + //任务开始判断 + $this->oQueueJob->init($job); + + // 获取 Redis 任务的原始数据 + $rawBody = empty($job->getRawBody()) ? '' : $job->getRawBody(); + $jobData = empty($rawBody) ? [] : json_decode($rawBody, true); + $jobId = empty($jobData['id']) ? 'unknown' : $jobData['id']; + + $this->oQueueJob->log("-----------队列任务开始-----------"); + $this->oQueueJob->log("当前任务ID: {$jobId}, 尝试次数: {$job->attempts()}"); + + try { + + // 验证任务数据完整性 + // 获取文章ID + $iArticleId = empty($data['article_id']) ? 0 : $data['article_id']; + //审稿记录表主键ID + $art_rev_id = empty($data['art_rev_id']) ? 0 : $data['art_rev_id']; + //审稿人ID + $reviewer_id = empty($data['reviewer_id']) ? 0 : $data['reviewer_id']; + //邮件类型 + $email_type = empty($data['email_type']) ? 0 : $data['email_type']; + if (empty($iArticleId)) { + $this->oQueueJob->log("无效的article_id,删除任务"); + $job->delete(); + return; + } + if (empty($art_rev_id)) { + $this->oQueueJob->log("无效的art_rev_id,删除任务"); + $job->delete(); + return; + } + if (empty($reviewer_id)) { + $this->oQueueJob->log("无效的reviewer_id,删除任务"); + $job->delete(); + return; + } + if (empty($email_type)) { + $this->oQueueJob->log("无效的email_type,删除任务"); + $job->delete(); + return; + } + // 生成唯一任务标识 + $sClassName = get_class($this); + $sRedisKey = "queue_job:{$sClassName}:{$iArticleId}:{$reviewer_id}:{$art_rev_id}:{$email_type}"; + $sRedisValue = uniqid() . '_' . getmypid(); + if (!$this->oQueueJob->acquireLock($sRedisKey, $sRedisValue, $job)) { + return; // 未获取到锁,已处理 + } + + // 执行核心任务 + //查询是否发送过邮件 + $oCronreview = new Cronreview; + $response = $oCronreview->reminder($data); + // 验证API响应 + if (empty($response)) { + throw new \RuntimeException("OpenAI API返回空结果"); + } + // 检查JSON解析错误 + $aResult = json_decode($response, true); + if (json_last_error() !== JSON_ERROR_NONE) { + throw new \RuntimeException("解析OpenAI响应失败: " . json_last_error_msg() . " | 原始响应: {$response}"); + } + $sMsg = empty($aResult['msg']) ? 'success' : $aResult['msg']; + //更新完成标识 + $this->QueueRedis->finishJob($sRedisKey, 'completed', $this->completedExprie,$sRedisValue); + $job->delete(); + $this->oQueueJob->log("任务执行成功 | 日志ID: {$sRedisKey} | 执行日志:{$sMsg}"); + + } catch (RuntimeException $e) { + $this->oQueueJob->handleRetryableException($e,$sRedisKey,$sRedisValue, $job); + } catch (LogicException $e) { + $this->oQueueJob->handleNonRetryableException($e,$sRedisKey,$sRedisValue, $job); + } catch (Exception $e) { + $this->oQueueJob->handleRetryableException($e,$sRedisKey,$sRedisValue, $job); + } finally { + $this->oQueueJob->finnal(); + } + } +} \ No newline at end of file diff --git a/application/api/job/UserFieldAiFill.php b/application/api/job/UserFieldAiFill.php new file mode 100644 index 00000000..deb1b447 --- /dev/null +++ b/application/api/job/UserFieldAiFill.php @@ -0,0 +1,35 @@ + 0) { + $svc->processUser($userId, $force); + } + $job->delete(); + + $delay = max(0, (int) (isset($data['delay']) ? $data['delay'] : 1)); +// $svc->enqueueNextFieldAi($delay, $queue, $userId, $force); + } +} diff --git a/application/api/job/mail.php b/application/api/job/mail.php index bbb23ccf..39f96c23 100644 --- a/application/api/job/mail.php +++ b/application/api/job/mail.php @@ -15,7 +15,7 @@ class mail { public function tgpu(Job $job, $data){ -// my_tg_pushmail($data); + my_tg_pushmail($data); $job->delete(); } diff --git a/application/common.php b/application/common.php index f4fb3041..31dd9c06 100644 --- a/application/common.php +++ b/application/common.php @@ -915,7 +915,7 @@ function prgeAuthor($author) function my_tg_pushmail($data) { -// $res = sendEmail($data['email'], $data['title'], $data['title'], $data['content'], $data['tmail'], $data['tpassword'], $data['attachmentFile']); + sendEmail($data['email'], $data['title'], $data['title'], $data['content'], $data['tmail'], $data['tpassword'], $data['attachmentFile']); // if (isset($res['status'])) { // $log_obj = Db::name('email_log'); // $insert['article_id'] = $data['article_id']; diff --git a/application/common/ArticleParserService.php b/application/common/ArticleParserService.php index 2d4619c8..2996ed01 100644 --- a/application/common/ArticleParserService.php +++ b/application/common/ArticleParserService.php @@ -1153,12 +1153,12 @@ class ArticleParserService } /** - * 提取 Word 文档中的参考文献列表(仅返回数组,不做入库) - * @return array 每条为一个参考文献的纯文本字符串 + * 按段落提取 Word 全文行(供正文裁切、参考文献识别等复用) + * @return array */ - public static function getReferencesFromWord($filePath): array + public static function collectParagraphLines($filePath): array { - $othis = new self($filePath) ; + $othis = new self($filePath); if (empty($othis->sections)) { return []; } @@ -1166,13 +1166,26 @@ class ArticleParserService $lines = []; foreach ($othis->sections as $section) { foreach ($section->getElements() as $element) { - $text = $othis->getTextFromElement($element); - $text = trim((string)$text); - if ($text === '') continue; - $lines[] = $text; + $text = trim((string) $othis->getTextFromElement($element)); + if ($text === '') { + continue; + } + if (!mb_check_encoding($text, 'UTF-8')) { + $text = mb_convert_encoding($text, 'UTF-8', 'GBK'); + } + $lines[] = preg_replace('/\s+/u', ' ', $text); } } + return $lines; + } + /** + * 提取 Word 文档中的参考文献列表(仅返回数组,不做入库) + * @return array 每条为一个参考文献的纯文本字符串 + */ + public static function getReferencesFromWord($filePath): array + { + $lines = self::collectParagraphLines($filePath); if (empty($lines)) { return []; } diff --git a/application/common/ManuscriptBodyExtractor.php b/application/common/ManuscriptBodyExtractor.php new file mode 100644 index 00000000..3115bf2d --- /dev/null +++ b/application/common/ManuscriptBodyExtractor.php @@ -0,0 +1,356 @@ + */ + private $blocks = []; + + /** @var array */ + private $blockTexts = []; + + /** + * @return array{path:string, rel_path:string, line_count:int, ref_start:int, body_start:int, warnings:array} + */ + public function buildBodyOnlyDocx($sourcePath, $articleId = 0) + { + $sourcePath = trim((string) $sourcePath); + if (!is_file($sourcePath) || !is_readable($sourcePath)) { + throw new Exception('Manuscript not readable: ' . $sourcePath); + } + $ext = strtolower(pathinfo($sourcePath, PATHINFO_EXTENSION)); + if ($ext !== 'docx') { + throw new Exception('body_only check requires DOCX manuscript, got: ' . $ext); + } + + $this->loadDocumentBlocks($sourcePath); + if (empty($this->blocks)) { + throw new Exception('No content blocks in manuscript'); + } + + $refStart = $this->findReferenceStartIndex(); + $bodyStart = $this->findBodyStartIndex(); + $warnings = []; + + if ($refStart < 0) { + $warnings[] = 'references_heading_not_found; using document end'; + $refStart = count($this->blocks); + } + if ($bodyStart >= $refStart) { + throw new Exception('Could not locate main body (front matter may include entire document)'); + } + + $kept = 0; + for ($i = $bodyStart; $i < $refStart; $i++) { + if (trim($this->blockTexts[$i]) !== '') { + $kept++; + } + } + if ($kept < 3) { + throw new Exception('Body content too short after extraction (' . $kept . ' non-empty blocks)'); + } + + $relPath = $this->sliceDocxToNewFile($sourcePath, $articleId, $bodyStart, $refStart); + $rootDir = rtrim(ROOT_PATH ?: dirname(dirname(__DIR__)), '/\\'); + $absPath = $rootDir . DIRECTORY_SEPARATOR . str_replace(['/', '\\'], DIRECTORY_SEPARATOR, $relPath); + + return [ + 'path' => $absPath, + 'rel_path' => $relPath, + 'line_count' => $kept, + 'ref_start' => $refStart, + 'body_start' => $bodyStart, + 'warnings' => $warnings, + ]; + } + + private function loadDocumentBlocks($sourcePath) + { + $zip = new ZipArchive(); + if ($zip->open($sourcePath) !== true) { + throw new Exception('Cannot open docx: ' . $sourcePath); + } + $xml = $zip->getFromName('word/document.xml'); + $zip->close(); + if ($xml === false || $xml === '') { + throw new Exception('word/document.xml missing in docx'); + } + + $this->dom = new DOMDocument(); + $this->dom->preserveWhiteSpace = false; + $this->dom->formatOutput = false; + if (@$this->dom->loadXML($xml) === false) { + throw new Exception('Invalid word/document.xml'); + } + + $xpath = new DOMXPath($this->dom); + $xpath->registerNamespace('w', self::W_NS); + $body = $xpath->query('//w:body')->item(0); + if (!$body instanceof DOMElement) { + throw new Exception('w:body not found'); + } + + $this->bodyNode = $body; + $this->blocks = []; + $this->blockTexts = []; + + foreach ($body->childNodes as $child) { + if ($child->nodeType !== XML_ELEMENT_NODE) { + continue; + } + /** @var DOMElement $child */ + if ($child->localName === 'sectPr') { + continue; + } + $this->blocks[] = $child; + $this->blockTexts[] = $this->extractVisibleTextFromBlock($child); + } + } + + /** + * 仅拼接 w:t 可见文本,忽略 w:instrText 等域指令(避免 Zotero JSON 参与裁切判断)。 + */ + private function extractVisibleTextFromBlock(DOMElement $block) + { + $xpath = new DOMXPath($block->ownerDocument); + $xpath->registerNamespace('w', self::W_NS); + $nodes = $xpath->query('.//w:t', $block); + if (!$nodes || $nodes->length === 0) { + return ''; + } + $parts = []; + foreach ($nodes as $node) { + $parts[] = $node->textContent; + } + $text = preg_replace('/\s+/u', ' ', implode('', $parts)); + return trim((string) $text); + } + + private function sliceDocxToNewFile($sourcePath, $articleId, $bodyStart, $refStart) + { + $rootDir = rtrim(ROOT_PATH ?: dirname(dirname(__DIR__)), '/\\'); + $dir = $rootDir . DIRECTORY_SEPARATOR . self::BODY_SUBDIR; + if (!is_dir($dir)) { + @mkdir($dir, 0755, true); + } + + $name = sprintf('body_article_%d_%s.docx', intval($articleId), date('Ymd_His')); + $absPath = $dir . DIRECTORY_SEPARATOR . $name; + + if (!copy($sourcePath, $absPath)) { + throw new Exception('Failed to copy source docx'); + } + + $n = count($this->blocks); + + $zip = new ZipArchive(); + if ($zip->open($absPath) !== true) { + throw new Exception('Cannot open output docx'); + } + + $xml = $zip->getFromName('word/document.xml'); + if ($xml === false) { + $zip->close(); + throw new Exception('document.xml missing in output docx'); + } + + $outDom = new DOMDocument(); + $outDom->preserveWhiteSpace = false; + $outDom->formatOutput = false; + if (@$outDom->loadXML($xml) === false) { + $zip->close(); + throw new Exception('Invalid document.xml in output docx'); + } + + $xpath = new DOMXPath($outDom); + $xpath->registerNamespace('w', self::W_NS); + $body = $xpath->query('//w:body')->item(0); + if (!$body instanceof DOMElement) { + $zip->close(); + throw new Exception('w:body not found in output docx'); + } + + $children = []; + foreach ($body->childNodes as $child) { + if ($child->nodeType === XML_ELEMENT_NODE) { + $children[] = $child; + } + } + + $blockIdx = 0; + foreach ($children as $child) { + if (!($child instanceof DOMElement)) { + continue; + } + if ($child->localName === 'sectPr') { + continue; + } + if ($blockIdx < $bodyStart || $blockIdx >= $refStart) { + if ($child->parentNode) { + $child->parentNode->removeChild($child); + } + } + $blockIdx++; + } + + if ($blockIdx !== $n) { + $zip->close(); + @unlink($absPath); + throw new Exception('Document block count mismatch during slice'); + } + + $zip->addFromString('word/document.xml', $outDom->saveXML()); + $zip->close(); + + if (!is_file($absPath) || filesize($absPath) < 200) { + throw new Exception('Failed to write body-only docx'); + } + + return self::BODY_SUBDIR . '/' . $name; + } + + private function findReferenceStartIndex() + { + $stopKeywords = [ + 'acknowledgements', 'acknowledgments', 'funding', 'appendix', 'supplementary', + 'conflict of interest', 'competing interests', 'author contributions', + '致谢', '基金', '附录', '补充材料', '利益冲突', '作者贡献', + ]; + + foreach ($this->blockTexts as $i => $line) { + $t = trim($line); + if ($t === '') { + continue; + } + if (preg_match('/^\s*(references|reference|bibliography|参考文献|文献)\b\s*[::]?\s*/iu', $t)) { + return $i; + } + $lower = strtolower($t); + foreach ($stopKeywords as $sk) { + $skLower = strtolower($sk); + if ($lower === $skLower || $lower === $skLower . ':' || $lower === $skLower . ':') { + if ($i > count($this->blockTexts) * 0.4) { + return $i; + } + } + } + } + return -1; + } + + private function findBodyStartIndex() + { + $n = count($this->blockTexts); + $introIdx = -1; + $keywordsIdx = -1; + + for ($i = 0; $i < $n; $i++) { + $t = trim($this->blockTexts[$i]); + if ($t === '') { + continue; + } + if ($introIdx < 0 && $this->isIntroductionHeading($t)) { + $introIdx = $i; + } + if ($keywordsIdx < 0 && preg_match('/^\s*keywords?\b\s*[::]?/iu', $t)) { + $keywordsIdx = $i; + } + } + + if ($introIdx >= 0) { + return $introIdx; + } + + if ($keywordsIdx >= 0) { + $afterKw = $this->indexAfterKeywordsBlock($keywordsIdx); + if ($afterKw < $n) { + return $afterKw; + } + } + + return $this->indexAfterFrontMatterFallback(); + } + + private function isIntroductionHeading($t) + { + if (preg_match('/^\s*(introduction|background|materials and methods|materials & methods|methods and materials)\b\s*[::]?/iu', $t)) { + return true; + } + if (preg_match('/^\s*(引言|前言|背景|材料与方法|资料与方法|研究方法)\b\s*[::]?/iu', $t)) { + return true; + } + if (preg_match('/^\s*1[\.\s、]+(introduction|引言|前言)\b/iu', $t)) { + return true; + } + return false; + } + + private function indexAfterKeywordsBlock($kwIdx) + { + $n = count($this->blockTexts); + for ($i = $kwIdx + 1; $i < $n; $i++) { + $t = trim($this->blockTexts[$i]); + if ($t === '') { + continue; + } + if ($this->isIntroductionHeading($t)) { + return $i; + } + if (preg_match('/^\s*abstract\b/iu', $t)) { + continue; + } + if (mb_strlen($t) >= 30 && !$this->looksLikeAffiliationLine($t)) { + return $i; + } + } + return min($kwIdx + 1, $n - 1); + } + + private function indexAfterFrontMatterFallback() + { + $n = count($this->blockTexts); + $maxSkip = min(20, (int) floor($n * 0.15)); + for ($i = 0; $i < $maxSkip && $i < $n; $i++) { + $t = trim($this->blockTexts[$i]); + if ($t === '') { + continue; + } + if ($this->isIntroductionHeading($t)) { + return $i; + } + } + return min(8, max(0, $n - 1)); + } + + private function looksLikeAffiliationLine($t) + { + if (preg_match('/@|mailto:|correspond|univ|university|hospital|institute|department|^\d+[\s,,]/iu', $t)) { + return true; + } + if (preg_match('/^\s*abstract\b/iu', $t) || preg_match('/^\s*keywords?\b/iu', $t)) { + return true; + } + return false; + } +} diff --git a/application/common/PlagiarismService.php b/application/common/PlagiarismService.php index a067a3de..1927c124 100644 --- a/application/common/PlagiarismService.php +++ b/application/common/PlagiarismService.php @@ -12,10 +12,14 @@ use think\Exception; * 并维护 t_plagiarism_check 状态机。 * * 状态流: - * submit() → state=1(上传中),入队 PlagiarismRun - * PlagiarismRun.fire → 上传 + 触发 similarity → state=2(比对中),入队 PlagiarismPoll - * PlagiarismPoll.fire → 轮询 status,完成后下载 PDF → state=3(完成) - * 任意环节抛异常 → state=4(失败),写 error_msg + * submit() → state=1(上传中),入队 PlagiarismRun + * PlagiarismRun → 创建 submission + 上传文件 → 入队 PlagiarismWaitIngest + * PlagiarismWaitIngest → 单次 GET submission 状态;就绪则入队 PlagiarismTriggerSimilarity,否则延迟再入队 + * PlagiarismTriggerSimilarity → PUT similarity → state=2(比对中),入队 PlagiarismPoll + * PlagiarismPoll → 轮询 similarity,完成后下载 PDF → state=3(完成);在线 viewer URL 按需 getReportUrl 调用 refreshViewerUrlFor + * 任意环节抛异常 → state=4(失败),写 error_msg + * + * Worker:请用 `queue:work` 消费队列 **plagiarism**(整条链与轮询均在此队列;若此前单独监听 PlagiarismRun / PlagiarismPoll,需改为 plagiarism)。 */ class PlagiarismService { @@ -24,6 +28,16 @@ class PlagiarismService */ const REPORT_DIR = 'public/plagiarism'; + /** Run / WaitIngest / TriggerSimilarity / Poll 共用队列名 */ + const QUEUE_CHAIN = 'plagiarism'; + + const CHECK_TYPE_FULL = 'full'; + const CHECK_TYPE_BODY = 'body_only'; + + const JOB_WAIT_INGEST = 'app\\api\\job\\PlagiarismWaitIngest'; + const JOB_TRIGGER_SIM = 'app\\api\\job\\PlagiarismTriggerSimilarity'; + const JOB_POLL = 'app\\api\\job\\PlagiarismPoll'; + /** * 轮询间隔(秒)。Turnitin 一般 1-5 分钟出结果,30 秒一次比较合适 */ @@ -50,104 +64,229 @@ class PlagiarismService * @param string $filePath 本地可读的 PDF/DOCX 绝对路径 * @param int $triggeredBy 触发人 user_id(手工触发时编辑后台的 user_id) * @param string $source 'manual' / 'auto_xxx' + * @param string $checkType full | body_only * @return int check_id */ - public function submit($articleId, $filePath, $triggeredBy = 0, $source = 'manual') + public function submit($articleId, $filePath, $triggeredBy = 0, $source = 'manual', $checkType = self::CHECK_TYPE_FULL) { if (!is_file($filePath) || !is_readable($filePath)) { throw new Exception("File not readable: {$filePath}"); } + $checkType = $this->normalizeCheckType($checkType); + $uploadPath = $filePath; + $derivedRel = ''; + $sourceName = basename($filePath); + + if ($checkType === self::CHECK_TYPE_BODY) { + $built = (new ManuscriptBodyExtractor())->buildBodyOnlyDocx($filePath, $articleId); + $uploadPath = $built['path']; + $derivedRel = (string) $built['rel_path']; + $sourceName = basename($uploadPath); + if (!empty($built['warnings'])) { + $this->log('body_only warnings check article=' . $articleId . ' ' . implode('; ', $built['warnings'])); + } + } + $journalId = (int) Db::name('article') ->where('article_id', $articleId) ->value('journal_id'); - + $this->log("plagiarism submit type={$checkType} article={$articleId}"); $now = time(); - $checkId = Db::name('plagiarism_check')->insertGetId([ + $row = [ 'article_id' => $articleId, 'journal_id' => $journalId, 'triggered_by' => $triggeredBy, 'trigger_source' => $source, - 'state' => 1, // 上传中 - 'source_file_name' => basename($filePath), - 'source_file_size' => filesize($filePath) ?: 0, + 'check_type' => $checkType, + 'state' => 1, + 'source_file_name' => $sourceName, + 'source_file_size' => filesize($uploadPath) ?: 0, 'ctime' => $now, 'utime' => $now, - ]); - $this->log("submit service act"); - // 入队执行:上传 + 触发 similarity + ]; + if ($derivedRel !== '') { + $row['derived_file_path'] = $derivedRel; + } + $checkId = Db::name('plagiarism_check')->insertGetId($row); Queue::push( 'app\\api\\job\\PlagiarismRun', - ['check_id' => $checkId, 'file_path' => $filePath], - 'PlagiarismRun' + ['check_id' => $checkId, 'file_path' => $uploadPath], + self::QUEUE_CHAIN ); - return (int)$checkId; + return (int) $checkId; } /** - * Job 调用:上传文件到 Turnitin 并触发 similarity,然后入队 PlagiarismPoll + * 同时提交全文 + 正文两次查重 + * @return array{full:int, body_only:int} + */ + public function submitBoth($articleId, $filePath, $triggeredBy = 0, $source = 'manual') + { + return [ + 'full' => $this->submit($articleId, $filePath, $triggeredBy, $source, self::CHECK_TYPE_FULL), + 'body_only' => $this->submit($articleId, $filePath, $triggeredBy, $source, self::CHECK_TYPE_BODY), + ]; + } + + private function normalizeCheckType($checkType) + { + $t = strtolower(trim((string) $checkType)); + if ($t === '' || $t === self::CHECK_TYPE_FULL || $t === 'full') { + return self::CHECK_TYPE_FULL; + } + if ($t === self::CHECK_TYPE_BODY || $t === 'body' || $t === 'bodyonly') { + return self::CHECK_TYPE_BODY; + } + throw new Exception('invalid check_type, use full or body_only'); + } + + /** + * Job 调用:仅创建 submission + 上传文件,随后由 PlagiarismWaitIngest 链式轮询 ingest,再 PlagiarismTriggerSimilarity。 + */ + public function runUploadOnly($checkId, $filePath) + { + $check = $this->mustGetCheck($checkId); + $this->log('runUploadOnly start check_id=' . $checkId); + $tii = new TurnitinService(); + + $articleTitle = (string) Db::name('article') + ->where('article_id', $check['article_id']) + ->value('title'); + if ($articleTitle === '') { + $articleTitle = 'Article #' . $check['article_id']; + } + + $createResp = $tii->createSubmission([ + 'title' => mb_substr($articleTitle, 0, 250), + 'owner' => 'editor_' . $check['triggered_by'], + 'submitter' => 'editor_' . $check['triggered_by'], + 'metadata' => [ + 'article_id' => (string) $check['article_id'], + 'check_id' => (string) $check['check_id'], + ], + ]); + $submissionId = isset($createResp['id']) ? $createResp['id'] : ''; + if ($submissionId === '') { + throw new Exception('Turnitin createSubmission returned empty id: ' . json_encode($createResp)); + } + + $this->updateCheck($checkId, [ + 'tii_submission_id' => $submissionId, + 'raw_response' => json_encode($createResp, JSON_UNESCAPED_UNICODE), + ]); + + $tii->uploadFile($submissionId, $filePath, basename($filePath)); + $firstDelay = $this->ingestChainFirstDelaySec(); + Queue::later( + $firstDelay, + self::JOB_WAIT_INGEST, + ['check_id' => $checkId, 'attempt' => 1], + self::QUEUE_CHAIN + ); + } + + /** + * 单次 ingest 检查(由 PlagiarismWaitIngest 调用)。不在本方法内 sleep 长循环。 + */ + public function runIngestPollStep($checkId, $attempt = 1) + { + $check = $this->mustGetCheck($checkId); + if (empty($check['tii_submission_id'])) { + $this->markFailed($checkId, '[ingest] tii_submission_id empty'); + return; + } + $this->log("runIngestPollStep is running"); + $maxAttempts = $this->ingestChainMaxAttempts(); + $interval = $this->ingestChainPollIntervalSec(); + $tii = new TurnitinService(); + + try { + $parsed = $tii->parseSubmissionIngestState($check['tii_submission_id']); + } catch (\Throwable $e) { + if ($attempt >= $maxAttempts) { + $this->markFailed($checkId, '[ingest] request failed after ' . $attempt . ' tries: ' . $e->getMessage()); + return; + } + Queue::later($interval, self::JOB_WAIT_INGEST, ['check_id' => $checkId, 'attempt' => $attempt + 1], self::QUEUE_CHAIN); + return; + } + + if (!empty($parsed['failed'])) { + $this->markFailed($checkId, '[ingest] submission failed status=' . $parsed['status'] . ' ' . $parsed['snippet']); + return; + } + if (!empty($parsed['ready'])) { + Queue::push(self::JOB_TRIGGER_SIM, ['check_id' => $checkId, 'ingest_attempt' => $attempt], self::QUEUE_CHAIN); + return; + } + if ($attempt >= $maxAttempts) { + $this->markFailed($checkId, '[ingest] timeout last_status=' . ($parsed['status'] !== '' ? $parsed['status'] : '(empty)')); + return; + } + Queue::later($interval, self::JOB_WAIT_INGEST, ['check_id' => $checkId, 'attempt' => $attempt + 1], self::QUEUE_CHAIN); + } + + /** + * 在 ingest 就绪后触发 similarity,并入队 PlagiarismPoll。 + * 若仍返回 409,则重新入队 PlagiarismWaitIngest(不抛异常,避免误标失败)。 + * + * @param int $ingestAttempt 来自 WaitIngest 的 attempt,供 409 时继续轮询 + */ + public function runTriggerSimilarityOnly($checkId, $ingestAttempt = 1) + { + $check = $this->mustGetCheck($checkId); + if (empty($check['tii_submission_id'])) { + $this->markFailed($checkId, '[similarity] tii_submission_id empty'); + return; + } + + $this->log("runTriggerSimilarityOnly is running"); + $tii = new TurnitinService(); + $sid = $check['tii_submission_id']; + + try { + $simResp = $tii->triggerSimilarity($sid); + } catch (\Throwable $e) { + $msg = $e->getMessage(); + $is409 = (stripos($msg, '409') !== false || stripos($msg, 'CONFLICT') !== false) + && (stripos($msg, 'not been completed') !== false || stripos($msg, 'completed yet') !== false); + if ($is409) { + $maxAttempts = $this->ingestChainMaxAttempts(); + $next = $ingestAttempt + 1; + if ($next > $maxAttempts) { + $this->markFailed($checkId, '[similarity] still not ready after ingest attempts: ' . $msg); + return; + } + $delay = max($this->ingestChainPollIntervalSec(), 20); + Queue::later($delay, self::JOB_WAIT_INGEST, ['check_id' => $checkId, 'attempt' => $next], self::QUEUE_CHAIN); + return; + } + throw $e; + } + + $this->updateCheck($checkId, [ + 'state' => 2, + 'tii_report_status' => 'PROCESSING', + 'raw_response' => json_encode($simResp, JSON_UNESCAPED_UNICODE), + ]); + + Queue::later( + self::POLL_INTERVAL, + self::JOB_POLL, + ['check_id' => $checkId, 'attempt' => 1], + self::QUEUE_CHAIN + ); + } + + /** + * @deprecated 与 runUploadOnly 等价;长耗时 ingest 已拆到队列 PlagiarismWaitIngest,勿在本方法内同步 wait。 */ public function runUploadAndTrigger($checkId, $filePath) { - $check = $this->mustGetCheck($checkId); - $this->log("runUploadAndTrigger is act0"); - try { - $tii = new TurnitinService(); - - // 1. 创建 submission - $articleTitle = (string) Db::name('article') - ->where('article_id', $check['article_id']) - ->value('title'); - if ($articleTitle === '') { - $articleTitle = 'Article #' . $check['article_id']; - } - $this->log("runUploadAndTrigger is act1"); - $createResp = $tii->createSubmission([ - 'title' => mb_substr($articleTitle, 0, 250), - 'owner' => 'editor_' . $check['triggered_by'], - 'submitter' => 'editor_' . $check['triggered_by'], - 'metadata' => [ - 'article_id' => (string)$check['article_id'], - 'check_id' => (string)$check['check_id'], - ], - ]); - $submissionId = isset($createResp['id']) ? $createResp['id'] : ''; - if ($submissionId === '') { - throw new Exception('Turnitin createSubmission returned empty id: ' . json_encode($createResp)); - } - - $this->updateCheck($checkId, [ - 'tii_submission_id' => $submissionId, - 'raw_response' => json_encode($createResp, JSON_UNESCAPED_UNICODE), - ]); - $this->log("runUploadAndTrigger is act2"); - // 2. 上传文件 - $tii->uploadFile($submissionId, $filePath, basename($filePath)); - - // 3. 触发 similarity - $simResp = $tii->triggerSimilarity($submissionId); - - $this->updateCheck($checkId, [ - 'state' => 2, // 比对中 - 'tii_report_status' => 'PROCESSING', - 'raw_response' => json_encode($simResp, JSON_UNESCAPED_UNICODE), - ]); - - $this->log("runUploadAndTrigger is act3"); - - // 4. 排队首次轮询(晚一点开始,让 Turnitin 先处理) - Queue::later( - self::POLL_INTERVAL, - 'app\\api\\job\\PlagiarismPoll', - ['check_id' => $checkId, 'attempt' => 1], - 'PlagiarismPoll' - ); - } catch (\Throwable $e) { - $this->markFailed($checkId, '[upload] ' . $e->getMessage()); - throw $e; - } + $this->runUploadOnly($checkId, $filePath); } /** @@ -173,20 +312,25 @@ class PlagiarismService ]); if ($status === 'COMPLETE') { - $score = isset($statusResp['overall_match_percentage']) - ? floatval($statusResp['overall_match_percentage']) : 0; + $score = TurnitinService::extractOverallMatchPercentage($statusResp); + if ($score <= 0 && isset($statusResp['overall_match_percentage'])) { + $score = floatval($statusResp['overall_match_percentage']); + } + $this->log('poll complete check_id=' . $checkId . ' score=' . $score + . ' check_type=' . ($check['check_type'] ?? 'full')); - // 下载 PDF + 取在线查看 URL $localPdf = $this->downloadAndStorePdf($tii, $check['tii_submission_id'], $checkId); - $viewerInfo = $this->refreshViewerUrl($tii, $check['tii_submission_id']); + + $meta = TurnitinService::parseSimilarityReportMeta($statusResp); + if ($meta['score'] > 0) { + $score = $meta['score']; + } $this->updateCheck($checkId, [ - 'state' => 3, - 'similarity_score' => $score, - 'pdf_local_path' => $localPdf, - 'view_only_url' => $viewerInfo['url'], - 'view_only_url_expire' => $viewerInfo['expire'], - 'error_msg' => '', + 'state' => 3, + 'similarity_score' => $score, + 'pdf_local_path' => $localPdf, + 'error_msg' => '', ]); return; } @@ -197,25 +341,23 @@ class PlagiarismService return; } - // PROCESSING 或其它中间态:继续轮询 if ($attempt >= self::MAX_POLL_ATTEMPTS) { $this->markFailed($checkId, '[poll] timeout after ' . $attempt . ' attempts'); return; } Queue::later( self::POLL_INTERVAL, - 'app\\api\\job\\PlagiarismPoll', + self::JOB_POLL, ['check_id' => $checkId, 'attempt' => $attempt + 1], - 'plagiarism' + self::QUEUE_CHAIN ); } catch (\Throwable $e) { - // 网络抖动不要直接 fail,给一定容错次数 if ($attempt < self::MAX_POLL_ATTEMPTS) { Queue::later( self::POLL_INTERVAL, - 'app\\api\\job\\PlagiarismPoll', + self::JOB_POLL, ['check_id' => $checkId, 'attempt' => $attempt + 1], - 'plagiarism' + self::QUEUE_CHAIN ); $this->updateCheck($checkId, [ 'attempts' => $attempt, @@ -229,42 +371,71 @@ class PlagiarismService } /** - * 重新生成在线查看 URL(已有的过期了用) + * 按需获取/刷新 Turnitin 在线报告 URL(与 poll 解耦,避免 viewer-url 失败拖死查重完成)。 * - * @return array{url:string, expire:int, local_pdf:string} + * @param array $viewerContext editor_id=当前打开报告的编辑 user_id;viewer_user_id 可显式指定 + * @return array{url:string, expire:int, local_pdf:string, viewer_user_id:string} */ - public function refreshViewerUrlFor($checkId) + public function refreshViewerUrlFor($checkId, array $viewerContext = []) { $check = $this->mustGetCheck($checkId); if (empty($check['tii_submission_id'])) { throw new Exception('check has no tii_submission_id'); } $tii = new TurnitinService(); - $info = $this->refreshViewerUrl($tii, $check['tii_submission_id']); + $info = $this->refreshViewerUrl($tii, $check['tii_submission_id'], $check, $viewerContext); $this->updateCheck($checkId, [ 'view_only_url' => $info['url'], 'view_only_url_expire' => $info['expire'], ]); return [ - 'url' => $info['url'], - 'expire' => $info['expire'], - 'local_pdf' => $check['pdf_local_path'], + 'url' => $info['url'], + 'expire' => $info['expire'], + 'local_pdf' => $check['pdf_local_path'], + 'viewer_user_id' => $info['viewer_user_id'], ]; } // ---------- 内部 ---------- - private function refreshViewerUrl($tii, $submissionId) + /** + * 调用 Turnitin POST viewer-url;仅由 refreshViewerUrlFor / getReportUrl 触发。 + */ + private function refreshViewerUrl($tii, $submissionId, array $check = [], array $viewerContext = []) { - $resp = $tii->getViewerUrl($submissionId); + $viewerOpts = $viewerContext; + if (!isset($viewerOpts['editor_id']) && !empty($check['triggered_by'])) { + $viewerOpts['triggered_by'] = intval($check['triggered_by']); + } + $viewerUserId = $tii->resolveViewerUserId($viewerOpts); + $resp = $tii->getViewerUrl($submissionId, $viewerOpts); $url = ''; if (isset($resp['viewer_url'])) { - $url = (string)$resp['viewer_url']; + $url = (string) $resp['viewer_url']; } elseif (isset($resp['url'])) { - $url = (string)$resp['url']; + $url = (string) $resp['url']; + } elseif (isset($resp['launch_url'])) { + $url = (string) $resp['launch_url']; } - // 默认 2 小时过期,保守起见 - return ['url' => $url, 'expire' => time() + 7200]; + if ($url === '') { + throw new Exception('viewer-url response has no url: ' . json_encode($resp, JSON_UNESCAPED_UNICODE)); + } + $expire = time() + 7200; + foreach (['viewer_url_expires', 'expires_at', 'expiration_time', 'expire_time'] as $k) { + if (empty($resp[$k])) { + continue; + } + $ts = is_numeric($resp[$k]) ? intval($resp[$k]) : strtotime((string) $resp[$k]); + if ($ts > time()) { + $expire = $ts; + break; + } + } + return [ + 'url' => $url, + 'expire' => $expire, + 'viewer_user_id' => $viewerUserId, + ]; } /** @@ -328,15 +499,30 @@ class PlagiarismService Db::name('plagiarism_check')->where('check_id', $checkId)->update($data); } - private function markFailed($checkId, $errMsg) + public function markFailed($checkId, $errMsg) { - $this->log("markFailed act"); + $this->log('markFailed check_id=' . $checkId); $this->updateCheck($checkId, [ 'state' => 4, 'error_msg' => mb_substr($errMsg, 0, 1000), ]); } + private function ingestChainFirstDelaySec() + { + return max(3, (int) Env::get('turnitin.ingest_chain_first_delay', 10)); + } + + private function ingestChainPollIntervalSec() + { + return max(60, (int) Env::get('turnitin.ingest_chain_poll_interval', 15)); + } + + private function ingestChainMaxAttempts() + { + return max(10, (int) Env::get('turnitin.ingest_chain_max_attempts', 80)); + } + /** * 从 t_article_file 找到投稿主稿(manuscirpt)的本地绝对路径。 * file_url 在系统里可能是 URL 或相对路径,调用方负责保证可读。 diff --git a/application/common/TurnitinService.php b/application/common/TurnitinService.php index 5b1a8898..0e1dc9aa 100644 --- a/application/common/TurnitinService.php +++ b/application/common/TurnitinService.php @@ -18,6 +18,14 @@ use think\Exception; * API_KEY 生成的 Bearer token * INTEGRATION_NAME Scope Name(创建 integration 时填的名字) * INTEGRATION_VERSION 自定义版本号,便于审计 e.g. 1.0.0 + * SUBMISSION_INGEST_MAX_WAIT 上传后轮询 submission 就绪的最长秒数,默认 600(仅 waitAfterUploadForSimilarity 同步用) + * SUBMISSION_INGEST_POLL_INTERVAL 同步轮询间隔秒数,默认 3 + * INGEST_CHAIN_FIRST_DELAY 上传后首次 ingest 检查延迟秒数,默认 10(队列链) + * INGEST_CHAIN_POLL_INTERVAL ingest 链每步间隔秒数,默认 15 + * INGEST_CHAIN_MAX_ATTEMPTS ingest 链最大步数,默认 80 + * EXCLUDE_QUOTES / EXCLUDE_BIBLIOGRAPHY / EXCLUDE_CITATIONS 0|1,默认 0(与 Crossref 网页手动查重更接近) + * VIEWER_DEFAULT_MODE match_overview | all_sources(默认 all_sources,便于按来源库分类查看) + * ADD_TO_INDEX 0|1,默认 1 * * API 文档:https://developers.turnitin.com/docs/tca * @@ -36,8 +44,8 @@ class TurnitinService public function __construct() { - $this->baseUrl = rtrim(trim((string)Env::get('turnitin.base_url', '')), '/'); - $this->apiKey = trim((string)Env::get('turnitin.api_key', '')); + $this->baseUrl = rtrim(trim((string)Env::get('turnitin.base_url', 'https://crossref-20794.turnitin.com/api/v1')), '/'); + $this->apiKey = trim((string)Env::get('turnitin.api_key', 'c6315e8291a4433dae09ad5efdb8a89c')); $this->integrationName = trim((string)Env::get('turnitin.integration_name', 'tmr')); $this->integrationVersion = trim((string)Env::get('turnitin.integration_version', '1.0.0')); @@ -80,11 +88,13 @@ class TurnitinService /** * 上传文件到 submission - * PUT /submissions/{id}/original/{filename} + * + * TCA 文档路径为 PUT /submissions/{id}/original(文件名仅通过 Content-Disposition 传递, + * 不要再拼在 URL 末尾;否则网关会 404,错误里常见 path 形如 //v1/submissions/.../original/xxx.docx)。 * * @param string $submissionId * @param string $filePath 本地 PDF/DOCX 路径 - * @param string $filename 传给 Turnitin 的文件名(用于报告显示) + * @param string $filename 传给 Turnitin 的展示文件名(默认取 basename) * @return array */ public function uploadFile($submissionId, $filePath, $filename = '') @@ -95,15 +105,20 @@ class TurnitinService if ($filename === '') { $filename = basename($filePath); } + // Content-Disposition 里避免未转义的双引号 + $safeName = str_replace(['"', "\r", "\n"], '', $filename); + if ($safeName === '') { + $safeName = 'document.bin'; + } $body = file_get_contents($filePath); return $this->request( 'PUT', - '/submissions/' . urlencode($submissionId) . '/original/' . rawurlencode($filename), + '/submissions/' . rawurlencode($submissionId) . '/original', $body, [ - 'Content-Type' => 'binary/octet-stream', - 'Content-Disposition' => 'inline; filename="' . $filename . '"', + 'Content-Type' => 'application/octet-stream', + 'Content-Disposition' => 'attachment; filename="' . $safeName . '"', ] ); } @@ -114,37 +129,340 @@ class TurnitinService * * @param string $submissionId * @param array $opts - * - generation_settings.search_repositories 默认 ['INTERNET','PUBLICATION','CROSSREF','CROSSREF_POSTED_CONTENT','SUBMITTED_WORK'] - * - generation_settings.submission_auto_excludes bool - * - view_settings.exclude_quotes / exclude_bibliography / exclude_citations / exclude_abstract / exclude_methods bool + * - generation_settings.search_repositories 默认 ['INTERNET','PUBLICATION',...] + * - generation_settings.submission_auto_excludes **字符串数组**(如 [] 或具体仓库键),不可传 boolean(否则会 400) + * - generation_settings.auto_exclude_self_matching_scope 可选,如 'GROUP_CONTEXT' + * - view_settings.exclude_* 布尔排除项(与 TCA 文档一致) * - indexing_settings.add_to_index bool 是否把本文加进 SUBMITTED_WORK 索引(一般 true) * @return array */ public function triggerSimilarity($submissionId, $opts = []) { - $body = array_merge([ - 'generation_settings' => [ - 'search_repositories' => ['INTERNET', 'PUBLICATION', 'CROSSREF', 'CROSSREF_POSTED_CONTENT', 'SUBMITTED_WORK'], - 'submission_auto_excludes' => true, - 'auto_exclude_self_matching_scope' => 'GROUP_CONTEXT', - ], - 'view_settings' => [ - 'exclude_quotes' => true, - 'exclude_bibliography' => true, - 'exclude_citations' => true, - ], - 'indexing_settings' => [ - 'add_to_index' => true, - ], - ], $opts); + $body = array_merge($this->defaultSimilarityPayload(), $opts); return $this->request( 'PUT', - '/submissions/' . urlencode($submissionId) . '/similarity', + '/submissions/' . rawurlencode($submissionId) . '/similarity', $body ); } + /** + * PUT /similarity 与 PDF 导出共用的默认参数。 + * 此前固定 exclude_*=true 时,总相似度会低于 Crossref 网页手动查重(与「匹配来源编号/类型」无关)。 + */ + public function defaultSimilarityPayload() + { + $scope = trim((string) Env::get('turnitin.auto_exclude_self_matching_scope', 'GROUP_CONTEXT')); + if ($scope === '') { + unset($scope); + } + + $generation = [ + 'search_repositories' => ['INTERNET', 'PUBLICATION', 'CROSSREF', 'CROSSREF_POSTED_CONTENT', 'SUBMITTED_WORK'], + 'submission_auto_excludes' => [], + ]; + if (isset($scope)) { + $generation['auto_exclude_self_matching_scope'] = $scope; + } + + return [ + 'generation_settings' => $generation, + 'view_settings' => $this->defaultViewSettings(), + 'indexing_settings' => [ + 'add_to_index' => $this->envBool('turnitin.add_to_index', true), + ], + ]; + } + + public function defaultViewSettings() + { + return [ + 'exclude_quotes' => $this->envBool('turnitin.exclude_quotes', false), + 'exclude_bibliography' => $this->envBool('turnitin.exclude_bibliography', false), + 'exclude_citations' => $this->envBool('turnitin.exclude_citations', false), + ]; + } + + /** + * 从 GET /similarity 响应解析总相似度(0–100)。 + * 兼容 overall_match_percentage 在 message 嵌套、以及 0–1 小数形式。 + */ + public static function extractOverallMatchPercentage(array $statusResp) + { + $candidates = []; + + $push = function ($v) use (&$candidates) { + if ($v === null || $v === '') { + return; + } + if (is_numeric($v)) { + $candidates[] = floatval($v); + } + }; + + $push($statusResp['overall_match_percentage'] ?? null); + $push($statusResp['overall_match'] ?? null); + $push($statusResp['similarity_percentage'] ?? null); + + $msg = $statusResp; + if (isset($statusResp['message']) && is_array($statusResp['message'])) { + $msg = $statusResp['message']; + } + $push($msg['overall_match_percentage'] ?? null); + $push($msg['overall_match'] ?? null); + if (isset($msg['similarity']) && is_array($msg['similarity'])) { + $sim = $msg['similarity']; + $push($sim['overall_match_percentage'] ?? null); + $push($sim['overall_match'] ?? null); + } + + foreach ($candidates as $n) { + if ($n > 0 && $n <= 1.0) { + $scaled = round($n * 100, 2); + if ($scaled > 1.0 || $n < 0.05) { + return $scaled; + } + } + if ($n >= 0) { + return round($n, 2); + } + } + + return 0.0; + } + + /** + * 从 GET /similarity 响应中尽量提取「按来源」的摘要(供列表展示;完整明细仍在 Turnitin 在线报告里)。 + * + * @return array{score:float,sources:array>} + */ + public static function parseSimilarityReportMeta(array $statusResp) + { + $meta = [ + 'score' => self::extractOverallMatchPercentage($statusResp), + 'sources' => [], + ]; + + $candidates = []; + self::collectSimilaritySourceNodes($statusResp, $candidates, 0); + if (isset($statusResp['message']) && is_array($statusResp['message'])) { + self::collectSimilaritySourceNodes($statusResp['message'], $candidates, 0); + } + + $seen = []; + foreach ($candidates as $node) { + if (!is_array($node)) { + continue; + } + $pct = null; + foreach (['percentage', 'match_percentage', 'overall_match_percentage', 'similarity_percentage'] as $k) { + if (isset($node[$k]) && is_numeric($node[$k])) { + $pct = floatval($node[$k]); + break; + } + } + $repo = ''; + foreach (['repository', 'repository_name', 'collection', 'source_type', 'type', 'database', 'category'] as $k) { + if (!empty($node[$k])) { + $repo = strtoupper(trim((string) $node[$k])); + break; + } + } + $words = isset($node['matched_word_count']) ? intval($node['matched_word_count']) + : (isset($node['word_count']) ? intval($node['word_count']) : 0); + $key = $repo . '|' . ($pct !== null ? $pct : '') . '|' . $words; + if (isset($seen[$key])) { + continue; + } + $seen[$key] = true; + $meta['sources'][] = array_filter([ + 'repository' => $repo, + 'match_percentage' => $pct, + 'matched_word_count' => $words > 0 ? $words : null, + ], function ($v) { + return $v !== null && $v !== ''; + }); + } + + return $meta; + } + + /** + * @param array $node + * @param array $out + */ + private static function collectSimilaritySourceNodes($node, array &$out, $depth) + { + if ($depth > 8 || !is_array($node)) { + return; + } + $hasRepo = false; + foreach (['repository', 'repository_name', 'collection', 'source_type'] as $k) { + if (!empty($node[$k])) { + $hasRepo = true; + break; + } + } + if ($hasRepo) { + $out[] = $node; + } + foreach ($node as $v) { + if (is_array($v)) { + if (isset($v[0]) && is_array($v[0])) { + foreach ($v as $item) { + self::collectSimilaritySourceNodes($item, $out, $depth + 1); + } + } else { + self::collectSimilaritySourceNodes($v, $out, $depth + 1); + } + } + } + } + + /** + * 在线 Similarity Report 默认视图(与 Crossref 后台「按来源查看」对齐)。 + */ + public function defaultViewerSimilarityBlock() + { + $mode = strtolower(trim((string) Env::get('turnitin.viewer_default_mode', 'all_sources'))); + if (!in_array($mode, ['match_overview', 'all_sources'], true)) { + $mode = 'all_sources'; + } + + return [ + 'default_mode' => $mode, + 'modes' => [ + 'match_overview' => true, + 'all_sources' => true, + ], + ]; + } + + private function envBool($name, $default = false) + { + $v = Env::get($name, $default ? '1' : '0'); + if ($v === true) { + return true; + } + if ($v === false) { + return false; + } + $v = strtolower(trim((string) $v)); + return in_array($v, ['1', 'true', 'yes', 'on'], true); + } + + /** + * 查询 submission 详情(上传后用于轮询是否解析完成)。 + * GET /submissions/{id} + * + * @return array 解码后的 JSON(常见为 status=ok + message 内含 id/status) + */ + public function getSubmission($submissionId) + { + return $this->request('GET', '/submissions/' . rawurlencode($submissionId)); + } + + /** + * 单次解析 GET /submissions/{id},判断是否可调用 PUT /similarity(不 sleep,供队列链逐步轮询)。 + * + * @return array{ready:bool, failed:bool, status:string, snippet:string, message:array} + */ + public function parseSubmissionIngestState($submissionId) + { + $raw = $this->getSubmission($submissionId); + $msg = self::unwrapSubmissionPayload($raw); + $st = strtoupper(trim((string) self::pickSubmissionStatus($msg))); + $snippet = mb_substr(json_encode($msg, JSON_UNESCAPED_UNICODE), 0, 400); + + $ready = [ + 'COMPLETE', 'COMPLETED', 'PROCESSED', 'READY', 'SUCCEEDED', + 'COMPLETE_PROCESSING', + ]; + $failed = ['ERROR', 'FAILED', 'CANCELLED', 'CANCELED', 'DELETED']; + + $readyFlag = $st !== '' && in_array($st, $ready, true); + $failedFlag = $st !== '' && in_array($st, $failed, true); + + return [ + 'ready' => $readyFlag, + 'failed' => $failedFlag, + 'status' => $st, + 'snippet' => $snippet, + 'message' => $msg, + ]; + } + + /** + * 上传完成后需等待 Turnitin 异步完成文本解析(同步阻塞版,仅 CLI/调试;线上请用队列链 PlagiarismWaitIngest)。 + * + * @param string $submissionId + * @param int $maxWaitSec 最长等待秒数,默认 600(10 分钟) + * @param int $intervalSec 轮询间隔秒数,默认 3 + * @throws Exception 超时或终态为失败 + */ + public function waitAfterUploadForSimilarity($submissionId, $maxWaitSec = 600, $intervalSec = 3) + { + $deadline = time() + max(30, (int)$maxWaitSec); + $intervalSec = max(1, (int)$intervalSec); + $lastStatus = ''; + $lastSnippet = ''; + + while (time() < $deadline) { + $parsed = $this->parseSubmissionIngestState($submissionId); + $lastStatus = $parsed['status']; + $lastSnippet = $parsed['snippet']; + + if (!empty($parsed['ready'])) { + return; + } + if (!empty($parsed['failed'])) { + throw new Exception('Turnitin submission failed, status=' . $lastStatus . ' body=' . $lastSnippet); + } + + sleep($intervalSec); + } + + throw new Exception( + 'Timeout waiting for Turnitin submission ingest (last status=' . ($lastStatus ?: '(empty)') . ') snippet=' . $lastSnippet + ); + } + + /** + * @param mixed $decoded + * @return array + */ + private static function unwrapSubmissionPayload($decoded) + { + if (!is_array($decoded)) { + return []; + } + if (isset($decoded['message']) && is_array($decoded['message'])) { + return $decoded['message']; + } + return $decoded; + } + + /** + * @param array $msg + * @return string + */ + private static function pickSubmissionStatus(array $msg) + { + $candidates = [$msg]; + if (isset($msg['submission']) && is_array($msg['submission'])) { + $candidates[] = $msg['submission']; + } + foreach ($candidates as $m) { + foreach (['status', 'workflow_status', 'submission_status', 'processing_status', 'paper_status'] as $k) { + if (!empty($m[$k])) { + return (string)$m[$k]; + } + } + } + return ''; + } + /** * 查询 similarity 状态 * GET /submissions/{id}/similarity @@ -156,7 +474,7 @@ class TurnitinService { return $this->request( 'GET', - '/submissions/' . urlencode($submissionId) . '/similarity' + '/submissions/' . rawurlencode($submissionId) . '/similarity' ); } @@ -166,25 +484,134 @@ class TurnitinService * * 返回 viewer_url(数小时有效) * - * @param array $viewer 可选 viewer 设置 e.g. ['viewer_default_permission_set' => 'INSTRUCTOR'] + * TCA 要求 default_mode 为小写(如 match_overview);save_changes 等 LTI 字段会导致 400。 + * Crossref 通道常用 ADMINISTRATOR/USER,非 INSTRUCTOR。可在 .env 配置: + * turnitin.viewer_permission_set=ADMINISTRATOR + * + * @param array $viewer 可选:viewer_user_id、triggered_by(映射为 editor_{id})、或完整请求体覆盖 */ public function getViewerUrl($submissionId, $viewer = []) { - $body = array_merge([ - 'viewer_default_permission_set' => 'INSTRUCTOR', - 'similarity' => [ - 'default_mode' => 'MATCH_OVERVIEW', - 'view_settings' => ['save_changes' => true], - 'modes' => ['match_overview' => true, 'all_sources' => true], - ], - 'locale' => 'en-US', - ], $viewer); + $submissionId = trim((string) $submissionId); + if ($submissionId === '') { + throw new Exception('submissionId required for viewer-url'); + } - return $this->request( - 'POST', - '/submissions/' . urlencode($submissionId) . '/viewer-url', - $body - ); + $statusResp = $this->getSimilarityStatus($submissionId); + $st = strtoupper(trim((string) ($statusResp['status'] ?? ''))); + if ($st !== '' && $st !== 'COMPLETE') { + throw new Exception('similarity report not ready for viewer-url, status=' . $st); + } + + $path = '/submissions/' . rawurlencode($submissionId) . '/viewer-url'; + $lastError = null; + + foreach ($this->buildViewerUrlBodies($viewer) as $body) { + try { + return $this->request('POST', $path, $body); + } catch (Exception $e) { + $lastError = $e; + if (strpos($e->getMessage(), 'HTTP 400') === false) { + throw $e; + } + } + } + + throw $lastError ?: new Exception('viewer-url failed'); + } + + /** + * 按优先级生成若干合法请求体(前者失败且为 400 时尝试后者)。 + * + * @return array + */ + private function buildViewerUrlBodies(array $viewerOverrides) + { + if (!empty($viewerOverrides) && isset($viewerOverrides['viewer_default_permission_set'])) { + $body = $viewerOverrides; + if (empty($body['viewer_user_id'])) { + $body['viewer_user_id'] = $this->resolveViewerUserId($viewerOverrides); + } + return [$body]; + } + + $locale = trim((string) Env::get('turnitin.viewer_locale', 'en-US')) ?: 'en-US'; + $configured = trim((string) Env::get('turnitin.viewer_permission_set', '')); + $permissionSets = $configured !== '' + ? array_map('trim', explode(',', $configured)) + : $this->defaultViewerPermissionSets(); + $viewerUserId = $this->resolveViewerUserId($viewerOverrides); + $saveChanges = $this->envBool('turnitin.viewer_save_changes', false); + $simModes = $this->defaultViewerSimilarityBlock(); + + $bodies = []; + foreach ($permissionSets as $perm) { + if ($perm === '') { + continue; + } + // TCA 认证要求:必须带 viewer_user_id(此前缺失会导致 400 Bad request) + $bodies[] = [ + 'viewer_user_id' => $viewerUserId, + 'locale' => $locale, + 'viewer_default_permission_set' => $perm, + 'similarity' => [ + 'view_settings' => ['save_changes' => $saveChanges], + ], + ]; + $bodies[] = [ + 'viewer_user_id' => $viewerUserId, + 'locale' => $locale, + 'viewer_default_permission_set' => $perm, + 'similarity' => array_merge($simModes, [ + 'view_settings' => ['save_changes' => $saveChanges], + ]), + ]; + $bodies[] = [ + 'viewer_user_id' => $viewerUserId, + 'locale' => $locale, + 'viewer_default_permission_set' => $perm, + ]; + } + + return $bodies; + } + + /** + * viewer-url 必填:与 createSubmission 的 owner/submitter 同一命名空间(editor_{user_id})。 + */ + public function resolveViewerUserId(array $opts = []) + { + if (!empty($opts['viewer_user_id'])) { + return trim((string) $opts['viewer_user_id']); + } + // 打开报告的人(当前编辑)须与申请 viewer-url 时一致,否则易出现 session 认证失败 + $editorId = isset($opts['editor_id']) ? intval($opts['editor_id']) : 0; + if ($editorId > 0) { + return 'editor_' . $editorId; + } + $triggeredBy = isset($opts['triggered_by']) ? intval($opts['triggered_by']) : 0; + if ($triggeredBy > 0) { + return 'editor_' . $triggeredBy; + } + $custom = trim((string) Env::get('turnitin.viewer_user_id', '')); + if ($custom !== '') { + return $custom; + } + $name = trim((string) $this->integrationName); + return ($name !== '' ? $name : 'tmr') . '_viewer'; + } + + /** + * Crossref Similarity Check 通常不用 INSTRUCTOR;按常见可用角色排序尝试。 + * + * @return array + */ + private function defaultViewerPermissionSets() + { + if (stripos($this->baseUrl, 'crossref') !== false) { + return ['ADMINISTRATOR', 'USER', 'EDITOR', 'INSTRUCTOR']; + } + return ['INSTRUCTOR', 'ADMINISTRATOR', 'USER']; } /** @@ -196,12 +623,13 @@ class TurnitinService public function requestPdfReport($submissionId, $opts = []) { $body = array_merge([ - 'locale' => 'en-US', + 'locale' => trim((string) Env::get('turnitin.viewer_locale', 'en-US')) ?: 'en-US', + 'view_settings' => $this->defaultViewSettings(), ], $opts); return $this->request( 'POST', - '/submissions/' . urlencode($submissionId) . '/similarity/pdf', + '/submissions/' . rawurlencode($submissionId) . '/similarity/pdf', $body ); } diff --git a/application/common/UserFieldAiService.php b/application/common/UserFieldAiService.php new file mode 100644 index 00000000..3bc023ec --- /dev/null +++ b/application/common/UserFieldAiService.php @@ -0,0 +1,463 @@ +logFile = ROOT_PATH . 'runtime' . DS . 'user_field_ai.log'; + } + + /** + * 启动链式处理(从 user_id=0 之后找第一个待处理用户)。 + * + * @param bool $force true 时重算已生成用户 + * @return bool 是否已推入首条 job + */ + public function startChain($force = false, $delay = 1, $queue = '') + { + return $this->enqueueNextFieldAi($delay, $queue, 0, $force); + } + + /** + * 链式:找 user_id > $afterUserId 的下一位待处理用户并入队。 + */ + public function enqueueNextFieldAi($delay = 1, $queue = '', $afterUserId = 0, $force = false) + { + if ($queue === '') { + $queue = self::QUEUE_NAME; + } + $afterUserId = intval($afterUserId); + $userId = $this->findNextPendingUserId($afterUserId, $force); + if ($userId <= 0) { + $this->log('[FieldAi] chain finished after user_id=' . $afterUserId . ' force=' . ($force ? '1' : '0')); + return false; + } + + $data = [ + 'user_id' => $userId, + 'queue' => $queue, + 'force' => $force ? 1 : 0, + ]; + $jobClass = 'app\\api\\job\\UserFieldAiFill@fire'; + if ($delay > 0) { + Queue::later($delay, $jobClass, $data, $queue); + } else { + Queue::push($jobClass, $data, $queue); + } + $this->log('[FieldAi] enqueued user_id=' . $userId . ' queue=' . $queue); + return true; + } + + /** + * 处理单个用户(队列 Job 或同步调试)。 + * + * @return array{ok:bool, skipped?:bool, insufficient?:bool, field_ai?:string, error?:string} + */ + public function processUser($userId, $force = false) + { + $userId = intval($userId); + if ($userId <= 0) { + return ['ok' => false, 'error' => 'invalid user_id']; + } + + $this->ensureReviewerInfoRow($userId); + $uri = Db::name('user_reviewer_info')->where('reviewer_id', $userId)->find(); + if (!$uri) { + return ['ok' => false, 'error' => 'reviewer_info missing']; + } + + if (!$force && intval($uri['field_ai_status']) === self::STATUS_DONE && trim((string)$uri['field_ai']) !== '') { + return ['ok' => true, 'skipped' => true, 'field_ai' => (string)$uri['field_ai']]; + } + + if (!$this->isEligible($userId, $uri)) { + $this->updateFieldAi($userId, '', self::STATUS_INSUFFICIENT, 'insufficient profile/articles'); + return ['ok' => true, 'insufficient' => true]; + } + + try { + $context = $this->buildContext($userId, $uri); + $fieldAi = $this->summarizeWithLlm($context); + if ($fieldAi === '') { + throw new Exception('LLM returned empty field'); + } + $this->updateFieldAi($userId, $fieldAi, self::STATUS_DONE, ''); + return ['ok' => true, 'field_ai' => $fieldAi]; + } catch (\Throwable $e) { + $this->updateFieldAi($userId, '', self::STATUS_FAILED, mb_substr($e->getMessage(), 0, 500)); + $this->log('[FieldAi] user_id=' . $userId . ' fail: ' . $e->getMessage()); + return ['ok' => false, 'error' => $e->getMessage()]; + } + } + + /** + * 是否满足「可总结」:有投稿 或 审稿人资料较全。 + */ + public function isEligible($userId, $uri = null) + { + if ($this->hasSubmittedArticles($userId)) { + return true; + } + if ($uri === null) { + $uri = Db::name('user_reviewer_info')->where('reviewer_id', $userId)->find(); + } + return $this->isReviewerProfileComplete($uri); + } + + public function hasSubmittedArticles($userId) + { + $n = Db::name('article') + ->where('user_id', intval($userId)) + ->where('title', '<>', '') + ->count(); + return $n > 0; + } + + /** + * 审稿人资料字段填充数达到阈值视为「较全」。 + */ + public function isReviewerProfileComplete($uri) + { + if (!$uri || !is_array($uri)) { + return false; + } + $minFilled = max(3, (int) Env::get('user_field_ai.min_profile_fields', 4)); + $keys = ['field', 'company', 'country', 'technical', 'introduction', 'department', 'website']; + $filled = 0; + foreach ($keys as $k) { + if (!empty($uri[$k]) && trim((string)$uri[$k]) !== '') { + $filled++; + } + } + if (!empty($uri['major']) && trim((string)$uri['major']) !== '' && trim((string)$uri['major']) !== '0') { + $filled++; + } + $majorCount = Db::name('major_to_user')->where('user_id', intval($uri['reviewer_id']))->where('state', 0)->count(); + if ($majorCount > 0) { + $filled++; + } + return $filled >= $minFilled; + } + + private function findNextPendingUserId($afterUserId, $force) + { + $batch = max(20, (int) Env::get('user_field_ai.scan_batch', 80)); + $cursor = intval($afterUserId); + + while (true) { + $query = Db::name('user')->alias('u') + ->join('t_user_reviewer_info uri', 'uri.reviewer_id = u.user_id',"left") + ->where('u.user_id', '>', $cursor); + if (!$force) { + $query->where(function ($q) { + $q->where('uri.field_ai_status', self::STATUS_PENDING) + ->whereOr('uri.field_ai_status', self::STATUS_FAILED) + ->whereOr('uri.reviewer_info_id', 'null'); + }); + } + $ids = $query->order('u.user_id asc')->limit($batch)->column('u.user_id'); + + if (empty($ids)) { + return 0; + } + + foreach ($ids as $uid) { + $uid = intval($uid); + $cursor = $uid; + $this->ensureReviewerInfoRow($uid); + $uri = Db::name('user_reviewer_info')->where('reviewer_id', $uid)->find(); + if (!$force && intval($uri['field_ai_status']) === self::STATUS_DONE) { + continue; + } + if (!$force && intval($uri['field_ai_status']) === self::STATUS_INSUFFICIENT) { + continue; + } + if ($this->isEligible($uid, $uri)) { + return $uid; + } + if (!$force) { + $this->updateFieldAi($uid, '', self::STATUS_INSUFFICIENT, 'auto skip: insufficient data'); + } + } + } + } + + private function buildContext($userId, array $uri) + { + $user = Db::name('user')->where('user_id', $userId)->field('user_id,realname,email,account')->find(); + $majorTitles = $this->resolveMajorTitles($userId, $uri); + + $maxArticles = max(1, min(10, (int) Env::get('user_field_ai.max_articles', 5))); + $articles = Db::name('article') + ->where('user_id', $userId) + ->where('title', '<>', '') + ->order('article_id desc') + ->limit($maxArticles) + ->field('article_id,title,keywords,abstrart,journal_id,ctime') + ->select(); + + $journalNames = []; + if (!empty($articles)) { + $jids = array_unique(array_filter(array_column($articles, 'journal_id'))); + if (!empty($jids)) { + $journalNames = Db::name('journal')->where('journal_id', 'in', $jids)->column('title', 'journal_id'); + } + } + + $articleBlocks = []; + foreach ($articles as $a) { + $jid = intval($a['journal_id']); + $articleBlocks[] = [ + 'title' => (string) $a['title'], + 'journal' => isset($journalNames[$jid]) ? (string) $journalNames[$jid] : '', + 'keywords' => (string) ($a['keywords'] ?? ''), + 'abstract' => mb_substr(trim((string) ($a['abstrart'] ?? '')), 0, 800), + ]; + } + + return [ + 'user' => [ + 'realname' => $user ? (string) $user['realname'] : '', + 'email' => $user ? (string) $user['email'] : '', + ], + 'profile' => [ + 'field' => trim((string) ($uri['field'] ?? '')), + 'technical' => trim((string) ($uri['technical'] ?? '')), + 'company' => trim((string) ($uri['company'] ?? '')), + 'department' => trim((string) ($uri['department'] ?? '')), + 'country' => trim((string) ($uri['country'] ?? '')), + 'introduction' => mb_substr(trim((string) ($uri['introduction'] ?? '')), 0, 1200), + 'website' => trim((string) ($uri['website'] ?? '')), + 'majors' => $majorTitles, + ], + 'articles' => $articleBlocks, + ]; + } + + private function resolveMajorTitles($userId, array $uri) + { + $titles = []; + $ids = Db::name('major_to_user')->where('user_id', $userId)->where('state', 0)->column('major_id'); + if (!empty($ids)) { + $titles = Db::name('reviewer_major')->where('major_id', 'in', $ids)->where('state', 0)->column('title'); + } + if (empty($titles) && !empty($uri['major'])) { + $legacy = array_filter(array_map('intval', explode(',', (string) $uri['major']))); + if (!empty($legacy)) { + $titles = Db::name('reviewer_major')->where('major_id', 'in', $legacy)->column('title'); + } + } + return array_values(array_unique(array_filter(array_map('trim', $titles)))); + } + + /** + * 解析 OpenAI 兼容 chat/completions 完整 URL。 + * base.model_url 常为站点根(如 http://chat.taimed.cn),直接 POST 会 404。 + */ + private function resolveLlmChatUrl() + { + $candidates = [ +// Env::get('user_field_ai.chat_url', ''), +// Env::get('promotion.promotion_llm_url', ''), +// Env::get('expert_country_chat_url', ''), +// Env::get('citation_chat_url', ''), + Env::get('base.model_url', ''), + ]; + foreach ($candidates as $u) { + $u = trim((string) $u); + if ($u === '') { + continue; + } + $normalized = $this->normalizeChatCompletionsUrl($u); + if ($normalized !== '') { + return $normalized; + } + } + return ''; + } + + private function normalizeChatCompletionsUrl($url) + { + $url = trim((string) $url); + if ($url === '') { + return ''; + } + if (stripos($url, 'chat/completions') !== false) { + return $url; + } + return rtrim($url, '/') . '/v1/chat/completions'; + } + + private function resolveLlmModel() + { + $candidates = [ + Env::get('user_field_ai.chat_model', ''), + Env::get('base.model', ''), + Env::get('promotion.promotion_llm_model', ''), + Env::get('expert_country_chat_model', ''), + Env::get('citation_chat_model', ''), + 'gpt-4.1', + ]; + foreach ($candidates as $m) { + $m = trim((string) $m); + if ($m !== '' && strtolower($m) !== 'your-model-name') { + return $m; + } + } + return ''; + } + + private function summarizeWithLlm(array $context) + { + $url = $this->resolveLlmChatUrl(); + $model = $this->resolveLlmModel(); + $apiKey = trim((string) Env::get('user_field_ai.chat_api_key', Env::get('expert_country_chat_api_key', Env::get('citation_chat_api_key', '')))); + if ($url === '' || $model === '') { + throw new Exception('user_field_ai chat not configured (set user_field_ai.chat_url or promotion PROMOTION_LLM_URL / base.model_url)'); + } + + $payloadJson = json_encode($context, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES); + $messages = [ + [ + 'role' => 'system', + 'content' => '你是学术领域分类助手。根据用户的投稿与个人资料,用简体中文给出该用户最主要的研究领域总结。' + . '要求:精确、简洁,1~3 个中文领域词或短短语,用顿号分隔;不要解释、不要英文、不要 JSON 以外的多余文字。' + . '只输出 JSON:{"field_ai":"..."}。', + ], + [ + 'role' => 'user', + 'content' => "请根据以下 JSON 资料总结该用户的主要研究领域:\n" . $payloadJson, + ], + ]; + + $body = [ + 'model' => $model, + 'temperature' => 0.2, + 'messages' => $messages, + ]; + + $ch = curl_init(); + curl_setopt_array($ch, [ + CURLOPT_URL => $url, + CURLOPT_POST => true, + CURLOPT_POSTFIELDS => json_encode($body, JSON_UNESCAPED_UNICODE), + CURLOPT_RETURNTRANSFER => true, + CURLOPT_CONNECTTIMEOUT => 15, + CURLOPT_TIMEOUT => max(30, (int) Env::get('user_field_ai.timeout', 90)), + CURLOPT_HTTPHEADER => array_filter([ + 'Content-Type: application/json', + $apiKey !== '' ? 'Authorization: Bearer ' . $apiKey : null, + ]), + ]); + $raw = curl_exec($ch); + $code = (int) curl_getinfo($ch, CURLINFO_HTTP_CODE); + $err = curl_error($ch); + curl_close($ch); + + if ($raw === false) { + throw new Exception('LLM curl error: ' . $err); + } + if ($code < 200 || $code >= 300) { + $hint = ($code === 404 && stripos($url, 'chat/completions') === false) + ? ' (chat_url may be missing /v1/chat/completions)' + : ''; + throw new Exception('LLM HTTP ' . $code . $hint . ': ' . mb_substr((string) $raw, 0, 400)); + } + + $data = json_decode($raw, true); + $content = ''; + if (is_array($data) && isset($data['choices'][0]['message']['content'])) { + $content = trim((string) $data['choices'][0]['message']['content']); + } elseif (is_string($raw)) { + $content = trim($raw); + } + + $fieldAi = $this->parseFieldAiFromContent($content); + if ($fieldAi === '' && $content !== '') { + $fieldAi = $this->cleanFieldAiText($content); + } + return $fieldAi; + } + + private function parseFieldAiFromContent($content) + { + $content = trim((string) $content); + if ($content === '') { + return ''; + } + $content = preg_replace('/^```[a-zA-Z]*\s*|```$/m', '', $content); + if (preg_match('/\{.*\}/s', $content, $m)) { + $obj = json_decode($m[0], true); + if (is_array($obj) && !empty($obj['field_ai'])) { + return $this->cleanFieldAiText((string) $obj['field_ai']); + } + } + $obj = json_decode($content, true); + if (is_array($obj) && !empty($obj['field_ai'])) { + return $this->cleanFieldAiText((string) $obj['field_ai']); + } + return ''; + } + + private function cleanFieldAiText($text) + { + $text = trim((string) $text); + $text = trim($text, "\"' \t\n\r"); + $text = preg_replace('/\s+/u', '', $text); + if (mb_strlen($text) > 200) { + $text = mb_substr($text, 0, 200); + } + return $text; + } + + public function ensureReviewerInfoRow($userId) + { + $exists = Db::name('user_reviewer_info')->where('reviewer_id', $userId)->find(); + if ($exists) { + return; + } + Db::name('user_reviewer_info')->insert([ + 'reviewer_id' => $userId, + 'state' => 0, + ]); + } + + private function updateFieldAi($userId, $fieldAi, $status, $note) + { + $data = [ + 'field_ai' => mb_substr(trim((string) $fieldAi), 0, 512), + 'field_ai_status' => intval($status), + 'field_ai_utime' => time(), + ]; + Db::name('user_reviewer_info')->where('reviewer_id', $userId)->update($data); + if ($note !== '') { + $this->log('[FieldAi] user_id=' . $userId . ' status=' . $status . ' note=' . $note); + } + } + + public function log($msg) + { + $line = date('Y-m-d H:i:s') . ' ' . $msg . PHP_EOL; + @file_put_contents($this->logFile, $line, FILE_APPEND); + } +} diff --git a/sql/add_field_ai_to_user_reviewer_info.sql b/sql/add_field_ai_to_user_reviewer_info.sql new file mode 100644 index 00000000..78dfea26 --- /dev/null +++ b/sql/add_field_ai_to_user_reviewer_info.sql @@ -0,0 +1,5 @@ +-- 用户主领域 AI 总结(中文),由队列链式任务写入 +ALTER TABLE `t_user_reviewer_info` + ADD COLUMN `field_ai` VARCHAR(512) NOT NULL DEFAULT '' COMMENT 'AI总结的主要研究领域(中文)' AFTER `field`, + ADD COLUMN `field_ai_status` TINYINT NOT NULL DEFAULT 0 COMMENT '0待处理 1已生成 2资料不足跳过 3失败' AFTER `field_ai`, + ADD COLUMN `field_ai_utime` INT NOT NULL DEFAULT 0 COMMENT 'field_ai 更新时间' AFTER `field_ai_status`; diff --git a/sql/add_plagiarism_check_type.sql b/sql/add_plagiarism_check_type.sql new file mode 100644 index 00000000..f24ab004 --- /dev/null +++ b/sql/add_plagiarism_check_type.sql @@ -0,0 +1,4 @@ +-- 查重类型:全文 full / 正文 body_only(裁切题名、作者、参考文献后上传) +ALTER TABLE `t_plagiarism_check` + ADD COLUMN `check_type` VARCHAR(16) NOT NULL DEFAULT 'full' COMMENT 'full=全文 body_only=仅正文' AFTER `trigger_source`, + ADD COLUMN `derived_file_path` VARCHAR(255) NOT NULL DEFAULT '' COMMENT 'body_only 时生成的临时稿件相对路径' AFTER `source_file_size`;