From c1107780a7c12e47303bab5c9b56e90788d81d39 Mon Sep 17 00:00:00 2001 From: wyn <1074145239@qq.com> Date: Tue, 26 May 2026 17:33:34 +0800 Subject: [PATCH] =?UTF-8?q?=E5=8F=82=E8=80=83=E6=96=87=E7=8C=AE=E6=9C=AC?= =?UTF-8?q?=E5=9C=B0=E5=A4=A7=E6=A8=A1=E5=9E=8B=E6=A0=A1=E5=AF=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- application/api/controller/Article.php | 427 -------- application/api/controller/Base.php | 8 + application/api/controller/Preaccept.php | 50 + application/api/controller/References.php | 227 ++++ application/api/job/ReferenceCheck.php | 58 +- application/api/job/ReferenceCheckTwo.php | 12 + application/common/QueueRedis.php | 19 + application/common/ReferenceCheckService.php | 1034 +++++++++++++++++- application/common/service/LLMService.php | 26 +- 9 files changed, 1357 insertions(+), 504 deletions(-) diff --git a/application/api/controller/Article.php b/application/api/controller/Article.php index 456fe59c..e47a0473 100644 --- a/application/api/controller/Article.php +++ b/application/api/controller/Article.php @@ -10,7 +10,6 @@ use PhpOffice\PhpWord\IOFactory; use app\common\OpenAi; use app\common\CrossrefService; use app\common\PubmedService; -use app\common\ReferenceCheckService; /** * @title 文章接口 @@ -6392,430 +6391,4 @@ class Article extends Base Db::commit(); return json_encode(['status' => 1,'msg' => 'success']); } - /** - * 调试:预览 article_main 中提取的 blue 引用(不入队) - * POST: article_id - */ - public function citationReview() - { - $articleId = 7821;//intval($this->request->post('article_id', 0)); - if ($articleId <= 0) { - return jsonError('article_id is required'); - } - - $svc = new ReferenceCheckService(); - $mains = Db::name('article_main') - ->field('am_id,content') - ->where('article_id', $articleId) - ->where('am_id', 127448) - //->whereIn('state', [0, 2]) - ->order('sort asc') - ->select(); - - $preview = []; - foreach ($mains as $item) { - $preview[] = [ - 'am_id' => $item['am_id'], - 'citations' => $svc->extractReferences((string)$item['content']), - ]; - break; - } - return jsonSuccess(['article_id' => $articleId, 'sections' => $preview]); - } - /** - * 提取文献引用 - * - * @param string $content 原始内容 - * @return array - */ - function extractReferences($content) - { - $result = []; - - // 匹配 [57][74-79][72, 45] - preg_match_all( - '/\[([\d,\-\s]+)\]<\/blue>/', - $content, - $matches, - PREG_OFFSET_CAPTURE - ); - - if (empty($matches[0])) { - return []; - } - - foreach ($matches[0] as $index => $match) { - - // 完整标签 - $fullTag = $match[0]; - - // 标签开始位置 - $tagStart = $match[1]; - - // 标签结束位置 - $tagEnd = $tagStart + strlen($fullTag); - - // 文献号原始字符串 - $rawRef = trim($matches[1][$index][0]); - - // 展开文献号 - $referenceNumbers = $this->expandReferenceNumbers($rawRef); - - /** - * 获取原文内容 - * 这里按句号切分: - * 找当前引用所在句子的开始和结束位置 - */ - $sentenceStart = $this->findSentenceStart($content, $tagStart); - $sentenceEnd = $this->findSentenceEnd($content, $tagEnd); - - $originalText = mb_substr( - $content, - $sentenceStart, - $sentenceEnd - $sentenceStart - ); - - // 去掉 blue 标签 - $originalText = preg_replace( - '/\[[\d,\-\s]+\]<\/blue>/', - '', - $originalText - ); - - $originalText = trim($originalText); - - $result[] = [ - 'reference_raw' => $rawRef, - 'reference_numbers' => $referenceNumbers, - 'original_text' => $originalText, - - // blue标签在整段中的位置 - 'reference_start' => $tagStart, - 'reference_end' => $tagEnd, - - // 原文位置 - 'text_start' => $sentenceStart, - 'text_end' => $sentenceEnd, - ]; - } - - return $result; - } - - /** - * 展开文献号 - * 11-15 => [11,12,13,14,15] - * 72,45 => [72,45] - * 74-79,81 => [74,75,76,77,78,79,81] - */ - function expandReferenceNumbers($refStr) - { - $numbers = []; - - $parts = explode(',', $refStr); - - foreach ($parts as $part) { - - $part = trim($part); - - // 范围 - if (strpos($part, '-') !== false) { - - list($start, $end) = explode('-', $part); - - $start = intval(trim($start)); - $end = intval(trim($end)); - - if ($start <= $end) { - $numbers = array_merge( - $numbers, - range($start, $end) - ); - } - - } else { - - // 单个数字 - if (is_numeric($part)) { - $numbers[] = intval($part); - } - } - } - - return array_values(array_unique($numbers)); - } - - /** - * 查找句子开始位置 - */ - function findSentenceStart($content, $position) - { - $delimiters = ['.', '。', '!', '?', "\n"]; - - $start = 0; - - foreach ($delimiters as $delimiter) { - - $pos = strrpos( - substr($content, 0, $position), - $delimiter - ); - - if ($pos !== false) { - $start = max($start, $pos + 1); - } - } - - return $start; - } - - /** - * 查找句子结束位置 - */ - function findSentenceEnd($content, $position) - { - $length = strlen($content); - - $endPositions = []; - - foreach (['.', '。', '!', '?', "\n"] as $delimiter) { - - $pos = strpos($content, $delimiter, $position); - - if ($pos !== false) { - $endPositions[] = $pos + 1; - } - } - - return empty($endPositions) - ? $length - : min($endPositions); - } - - /** - * 引用相关性:提交单条到队列(异步调用 promotion 同款本地大模型) - * POST: content_a(必填), content_b(可选), article_id, reference_no(n=index+1), am_id - */ - public function referenceCheckEnqueue() - { - $data = $this->request->post(); - $contentA = trim((string)(isset($data['content_a']) ? $data['content_a'] : '')); - $contentB = trim((string)(isset($data['content_b']) ? $data['content_b'] : '')); - $articleId = intval(isset($data['article_id']) ? $data['article_id'] : 0); - $referenceNo = intval(isset($data['reference_no']) ? $data['reference_no'] : 0); - - if ($contentA === '') { - return jsonError('content_a is required'); - } - - try { - $svc = new ReferenceCheckService(); - $extra = [ - 'reference_no' => $referenceNo, - 'article_id' => $articleId, - 'am_id' => intval(isset($data['am_id']) ? $data['am_id'] : 0), - ]; - - if ($contentB === '' && $articleId > 0 && $referenceNo > 0) { - $prod = Db::name('production_article') - ->where('article_id', $articleId) - ->where('state', 0) - ->find(); - if ($prod) { - $referMap = $svc->loadReferMapByPArticleId(intval($prod['p_article_id'])); - $referIndex = $referenceNo - 1; - if (isset($referMap[$referIndex])) { - $refer = $referMap[$referIndex]; - $contentB = $svc->formatReferForLlm($refer); - $extra['p_article_id'] = intval($prod['p_article_id']); - $extra['p_refer_id'] = intval($refer['p_refer_id']); - $extra['refer_index'] = $referIndex; - } - } - } - - $result = $svc->enqueue($contentA, $contentB, $extra); - return jsonSuccess($result); - } catch (\Exception $e) { - return jsonError($e->getMessage()); - } - } - public function checkOne(){ - $articleId = intval($this->request->param('article_id', 7414)); - $svc = new ReferenceCheckService(); - return jsonSuccess($svc->enqueueSecondPassByArticle($articleId)); - } - public function referenceCheckEnqueueArticleMain(){ - $amId = 127448; - $svc = new ReferenceCheckService(); - $main = Db::name('article_main') - ->field('am_id,content,article_id') - ->where('am_id', $amId) - ->whereIn('state', [0, 2]) - ->find(); - $result = $svc->enqueueByArticleMain($main); - return jsonSuccess($result); - } - public function referenceCheckEnqueueArticle(){ - $data = $this->request->get(); - $articleId = intval(isset($data['article_id']) ? $data['article_id'] : 0); - var_dump($articleId); - if ($articleId <= 0) { - return jsonError('article_id is required'); - } - try { - $svc = new ReferenceCheckService(); - $result = $svc->enqueueByArticle($articleId); - return jsonSuccess($result); - } catch (\Exception $e) { - return jsonError($e->getMessage()); - } - } - /** - * 按文章批量入队:从 article_main 提取 blue 引用与文献号 - * POST: article_id, clear_previous=1(默认清空该文旧明细后重检) - */ - public function referenceCheckEnqueueArticle2() - { - $data = $this->request->post(); - $articleId = intval(isset($data['article_id']) ? $data['article_id'] : 0); - if ($articleId <= 0) { - return jsonError('article_id is required'); - } - - try { - $svc = new ReferenceCheckService(); - $clear = !isset($data['clear_previous']) || intval($data['clear_previous']) === 1; - $result = $svc->enqueueByArticle($articleId, $clear); - return jsonSuccess($result); - } catch (\Exception $e) { - return jsonError($e->getMessage()); - } - } - - /** - * 查询单条引用相关性检测结果 - * GET/POST: check_id - */ - public function referenceCheckResult() - { - $checkId = intval($this->request->param('check_id', 0)); - if ($checkId <= 0) { - return jsonError('check_id is required'); - } - - $row = (new ReferenceCheckService())->getResult($checkId); - if (!$row) { - return jsonError('result not found'); - } - - return jsonSuccess($this->formatReferenceCheckRow($row)); - } - - /** - * 稿件预览:带不合理引用标记的 content(序号 + 引用句) - * GET/POST: article_id, am_id(可选,只预览某一节) - */ - public function referenceCheckPreview() - { - $articleId = intval($this->request->param('article_id', 0)); - if ($articleId <= 0) { - return jsonError('article_id is required'); - } - $amId = intval($this->request->param('am_id', 0)); - - try { - $data = (new ReferenceCheckService())->buildArticlePreview($articleId, $amId); - $data['markup_hint'] = [ - 'ref_no' => '.ref-no-error — 不合理的文献序号(如 70-73 中单独的 70)', - 'ref_cite' => '.ref-cite-tag.ref-cite-error — 含不合理序号的 blue 引用块', - 'ref_context'=> '.ref-context-error — 不合理的引用句/上下文', - ]; - $data['preview_css'] = '.ref-no-error{color:#c00;font-weight:bold;border-bottom:2px wavy #c00}' - . '.ref-cite-tag.ref-cite-error{background:#ffecec}' - . '.ref-context-error{background:#fff3cd;outline:1px dashed #e6a700}'; - return jsonSuccess($data); - } catch (\Exception $e) { - return jsonError($e->getMessage()); - } - } - - /** - * 按文章列出引用校对结果([70-73] 为 4 条,reference_no 分别为 70,71,72,73) - * GET/POST: article_id, status(可选), only_mismatch=1 仅不合理 - */ - public function referenceCheckList() - { - $articleId = intval($this->request->param('article_id', 0)); - if ($articleId <= 0) { - return jsonError('article_id is required'); - } - - $status = $this->request->param('status', ''); - $statusFilter = ($status === '' || $status === null) ? -1 : intval($status); - $onlyMismatch = intval($this->request->param('only_mismatch', 0)) === 1; - $rows = (new ReferenceCheckService())->listByArticle($articleId, $statusFilter, $onlyMismatch); - - $list = []; - foreach ($rows as $row) { - $list[] = $this->formatReferenceCheckRow($row); - } - - $mains = Db::name('article_main') - ->field('am_id,ref_check_status,sort') - ->where('article_id', $articleId) - ->whereIn('state', [0, 2]) - ->order('sort asc') - ->select(); - $sections = []; - foreach ($mains as $m) { - $st = intval(isset($m['ref_check_status']) ? $m['ref_check_status'] : 0); - $sections[] = [ - 'am_id' => intval($m['am_id']), - 'ref_check_status' => $st, - 'ref_check_pass' => $st === ReferenceCheckService::AM_STATUS_PASS, - 'ref_check_label' => ReferenceCheckService::amStatusLabel($st), - ]; - } - - return jsonSuccess([ - 'article_id' => $articleId, - 'total' => count($list), - 'list' => $list, - 'sections' => $sections, - ]); - } - - private function formatReferenceCheckRow($row) - { - $statusMap = array(0 => 'pending', 1 => 'done', 2 => 'failed'); - $amId = intval(isset($row['am_id']) ? $row['am_id'] : 0); - $citeStart = intval(isset($row['cite_tag_start']) ? $row['cite_tag_start'] : 0); - $rowStatus = intval($row['status']); - return array( - 'check_id' => intval(isset($row['id']) ? $row['id'] : (isset($row['check_id']) ? $row['check_id'] : 0)), - 'article_id' => intval(isset($row['article_id']) ? $row['article_id'] : 0), - 'am_id' => $amId, - 'cite_group_key' => $amId . '_' . $citeStart, - 'p_refer_id' => intval(isset($row['p_refer_id']) ? $row['p_refer_id'] : 0), - 'refer_index' => intval(isset($row['refer_index']) ? $row['refer_index'] : 0), - 'reference_no' => intval(isset($row['reference_no']) ? $row['reference_no'] : 0), - 'reference_raw' => isset($row['reference_raw']) ? $row['reference_raw'] : '', - 'cite_tag_start' => $citeStart, - 'cite_tag_end' => intval(isset($row['cite_tag_end']) ? $row['cite_tag_end'] : 0), - 'text_start' => intval(isset($row['text_start']) ? $row['text_start'] : 0), - 'text_end' => intval(isset($row['text_end']) ? $row['text_end'] : 0), - 'status' => isset($statusMap[$rowStatus]) ? $statusMap[$rowStatus] : 'unknown', - 'is_match' => intval($row['is_match']), - 'can_support' => intval(isset($row['can_support']) ? $row['can_support'] : $row['is_match']), - 'is_reasonable' => intval(isset($row['can_support']) ? $row['can_support'] : $row['is_match']) === 1, - 'confidence' => floatval($row['confidence']), - 'reason' => isset($row['reason']) ? $row['reason'] : '', - 'error_msg' => isset($row['error_msg']) ? $row['error_msg'] : '', - 'content_a' => isset($row['content_a']) ? $row['content_a'] : '', - 'content_b' => isset($row['content_b']) ? $row['content_b'] : '', - 'updated_at' => isset($row['updated_at']) ? $row['updated_at'] : '', - ); - } - } diff --git a/application/api/controller/Base.php b/application/api/controller/Base.php index 77e1da7b..3b2c4627 100644 --- a/application/api/controller/Base.php +++ b/application/api/controller/Base.php @@ -271,6 +271,14 @@ class Base extends Controller } $this->production_article_refer_obj->where('p_article_id', $refer_info['p_article_id'])->where('index', ">", $refer_info['index'])->where('state', 0)->setDec('index'); $this->production_article_refer_obj->where('p_refer_id', $p_refer_id)->update(['state' => 1]); + + // 文献集合已变更,原校对结果的 reference_no 已全部错位,整篇标记为未校对 + try { + (new \app\common\ReferenceCheckService()) + ->clearArticleChecksByPArticleId(intval($refer_info['p_article_id'])); + } catch (\Exception $e) { + \think\Log::error('delOneRefer clearArticleChecksByPArticleId p_refer_id=' . $p_refer_id . ' ' . $e->getMessage()); + } } diff --git a/application/api/controller/Preaccept.php b/application/api/controller/Preaccept.php index 9b4867c7..166af09f 100644 --- a/application/api/controller/Preaccept.php +++ b/application/api/controller/Preaccept.php @@ -7,6 +7,7 @@ use think\Env; use think\Queue; use think\Validate; use app\common\CrossrefService; +use app\common\ReferenceCheckService; class Preaccept extends Base { @@ -15,6 +16,26 @@ class Preaccept extends Base parent::__construct($request); } + /** + * 新增/修改导致文献集合改变后,清空整篇校对明细,使文章状态回到"未校对"。 + * 失败仅记日志,不阻塞主流程。 + */ + private function resetArticleChecksOnReferChange($pArticleId, $sourceTag = '') + { + $pArticleId = intval($pArticleId); + if ($pArticleId <= 0) { + return; + } + try { + (new ReferenceCheckService())->clearArticleChecksByPArticleId($pArticleId); + } catch (\Exception $e) { + \think\Log::error( + 'resetArticleChecksOnReferChange[' . $sourceTag . '] p_article_id=' + . $pArticleId . ' ' . $e->getMessage() + ); + } + } + /**获取文章参考文献列表 * @return \think\response\Json @@ -92,6 +113,7 @@ class Preaccept extends Base return jsonError($rule->getError()); } $this->production_article_refer_obj->where('p_article_id',$data['p_article_id'])->update(["state"=>1]); + $this->resetArticleChecksOnReferChange(intval($data['p_article_id']), 'discardRefersByParticleid'); return jsonSuccess([]); } @@ -142,6 +164,7 @@ class Preaccept extends Base } $adId= $this->production_article_refer_obj->insertGetId($insert); $this->production_article_refer_obj->where('p_article_id', $p_info['p_article_id'])->where("p_refer_id", "<>", $adId)->where("index", ">", $pre_refer['index'])->where('state', 0)->setInc('index'); + $this->resetArticleChecksOnReferChange(intval($p_info['p_article_id']), 'addRefer'); return jsonSuccess([]); @@ -198,6 +221,7 @@ class Preaccept extends Base } $adId= $this->production_article_refer_obj->insertGetId($insert); $this->production_article_refer_obj->where('p_article_id', $p_info['p_article_id'])->where("p_refer_id", "<>", $adId)->where("index", ">", $pre_refer['index'])->where('state', 0)->setInc('index'); + $this->resetArticleChecksOnReferChange(intval($p_info['p_article_id']), 'addReferByParticleid'); return jsonSuccess([]); } @@ -233,6 +257,7 @@ class Preaccept extends Base $insert['cs'] = 1; $adId = $this->production_article_refer_obj->insertGetId($insert); $this->production_article_refer_obj->where('p_article_id', $p_info['p_article_id'])->where("p_refer_id", "<>", $adId)->where("index", ">", $pre_refer['index'])->where('state', 0)->setInc('index'); + $this->resetArticleChecksOnReferChange(intval($p_info['p_article_id']), 'addReferNotdoi'); return jsonSuccess([]); } @@ -462,6 +487,17 @@ class Preaccept extends Base // } // $this->production_article_refer_obj->where('p_refer_id', $data['p_refer_id'])->update(['refer_doi' => $data['doi']]); // my_doiToFrag2($this->production_article_refer_obj->where('p_refer_id', $data['p_refer_id'])->find()); + + //文献内容更新成功后异步重检该文献对应的全部校对明细(失败不阻塞主流程) + try { + (new ReferenceCheckService())->enqueueRecheckByPReferId( + intval($data['p_refer_id']), + intval($old_refer_info['p_article_id']) + ); + } catch (\Exception $e) { + \think\Log::error('editRefer enqueueRecheckByPReferId p_refer_id=' . $data['p_refer_id'] . ' ' . $e->getMessage()); + } + return jsonSuccess([]); } @@ -1453,6 +1489,7 @@ class Preaccept extends Base return jsonError($rule->getError()); } $refer_info = $this->production_article_refer_obj->where('p_refer_id', $data['p_refer_id'])->find(); + $sibling_p_refer_id = 0; if ($data['act'] == "up") { $up_info = $this->production_article_refer_obj->where('p_article_id', $refer_info['p_article_id'])->where('index', $refer_info['index'] - 1)->where('state', 0)->find(); if (!$up_info) { @@ -1460,6 +1497,7 @@ class Preaccept extends Base } $this->production_article_refer_obj->where('p_refer_id', $up_info['p_refer_id'])->setInc("index"); $this->production_article_refer_obj->where('p_refer_id', $refer_info['p_refer_id'])->setDec("index"); + $sibling_p_refer_id = intval($up_info['p_refer_id']); } else { $down_info = $this->production_article_refer_obj->where('p_article_id', $refer_info['p_article_id'])->where('index', $refer_info['index'] + 1)->where('state', 0)->find(); if (!$down_info) { @@ -1467,7 +1505,19 @@ class Preaccept extends Base } $this->production_article_refer_obj->where('p_refer_id', $refer_info['p_refer_id'])->setInc("index"); $this->production_article_refer_obj->where('p_refer_id', $down_info['p_refer_id'])->setDec("index"); + $sibling_p_refer_id = intval($down_info['p_refer_id']); } + + // 仅同步本次交换的两条 p_refer_id 对应的校对明细 reference_no / refer_index + try { + (new ReferenceCheckService())->syncReferenceNoByPReferIds( + [intval($refer_info['p_refer_id']), $sibling_p_refer_id], + intval($refer_info['p_article_id']) + ); + } catch (\Exception $e) { + \think\Log::error('sortRefer syncReferenceNoByPReferIds: ' . $e->getMessage()); + } + return jsonSuccess([]); } diff --git a/application/api/controller/References.php b/application/api/controller/References.php index 47ae2328..759c63bf 100644 --- a/application/api/controller/References.php +++ b/application/api/controller/References.php @@ -1307,4 +1307,231 @@ class References extends Base } return json_encode(['status' => 8,'msg' => 'fail']); } + /** + * 参考文献第一次校对 + * @return \think\response\Json + */ + public function allReferenceCheckAI(){ + //获取参数 + $aParam = empty($aParam) ? $this->request->post() : $aParam; + + //必填值验证 + $iPArticleId = empty($aParam['p_article_id']) ? '' : $aParam['p_article_id']; + if(empty($iPArticleId)){ + return json_encode(array('status' => 2,'msg' => 'Please select an article' )); + } + //查询文章(p_article_id 与 article_id 都要带,下游服务方法两者都用) + $aWhere = ['p_article_id' => $iPArticleId,'state' => ['in',[0,2]]]; + $aProductionArticle = Db::name('production_article')->field('p_article_id,article_id')->where($aWhere)->find(); + if(empty($aProductionArticle)){ + return json_encode(array('status' => 3,'msg' => 'No articles found' )); + } + if($this->checkReferStatus($iPArticleId)==0){ + return jsonError('请修正完文献内容再进行校对。'); + } + //已存在校对记录则禁止重复执行第一次校对,提示走重置接口 + $iExisting = Db::name('article_reference_check_result') + ->where('p_article_id', $iPArticleId) + ->count(); + if(intval($iExisting) > 0){ + return jsonError('该文章已存在校对记录,请使用"重置校对"接口重新校对。'); + } + try { + $svc = new ReferenceCheckService(); + $result = $svc->enqueueByPArticle($aProductionArticle); + return jsonSuccess($result); + } catch (\Exception $e) { + return jsonError($e->getMessage()); + } + } + /** + * 文献校对重置:删除该文章已有的全部校对明细,并重新入队整篇校对 + * POST/GET: article_id(必填) + * @url /api/Article/referenceCheckReset + */ + public function referenceCheckResetAI() + { + //获取参数 + $aParam = empty($aParam) ? $this->request->post() : $aParam; + + //必填值验证 + $iPArticleId = empty($aParam['p_article_id']) ? '' : $aParam['p_article_id']; + if(empty($iPArticleId)){ + return json_encode(array('status' => 2,'msg' => 'Please select an article' )); + } + //查询文章(p_article_id 与 article_id 都要带,下游服务方法两者都用) + $aWhere = ['p_article_id' => $iPArticleId,'state' => ['in',[0,2]]]; + $aProductionArticle = Db::name('production_article')->field('p_article_id,article_id')->where($aWhere)->find(); + if(empty($aProductionArticle)){ + return json_encode(array('status' => 3,'msg' => 'No articles found' )); + } + if($this->checkReferStatus($iPArticleId)==0){ + return jsonError('请修正完文献内容再进行校对。'); + } + $iArticleId = empty($aProductionArticle['article_id']) ? 0 : $aProductionArticle['article_id']; + if(empty($iArticleId)){ + return json_encode(array('status' => 4,'msg' => 'Unbound article' )); + } + try { + $result = (new ReferenceCheckService())->resetAndRecheckByArticle($aProductionArticle); + return jsonSuccess($result); + } catch (\Exception $e) { + return jsonError($e->getMessage()); + } + } + + /** + * 清空某篇文章下的全部参考文献校对记录(不重新入队) + * + * 与 referenceCheckResetAI 的区别:reset 是「清空 + 重新校对」, + * 这里只做「清空」一步,校对状态回到未校对,等待用户手动再触发。 + * + * POST/GET: p_article_id(必填) + */ + public function referenceCheckClearAI() + { + $aParam = $this->request->post(); + if (empty($aParam)) { + $aParam = $this->request->param(); + } + + $iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']); + if ($iPArticleId <= 0) { + return json_encode(array('status' => 2, 'msg' => 'Please select an article')); + } + + // 校验文章存在(与其它校对接口口径一致:state in [0,2]) + $aProductionArticle = Db::name('production_article') + ->field('p_article_id,article_id') + ->where(['p_article_id' => $iPArticleId, 'state' => ['in', [0, 2]]]) + ->find(); + if (empty($aProductionArticle)) { + return json_encode(array('status' => 3, 'msg' => 'No articles found')); + } + + try { + $deleted = (new ReferenceCheckService())->clearArticleChecksByPArticleId($iPArticleId); + return jsonSuccess([ + 'p_article_id' => $iPArticleId, + 'deleted' => intval($deleted), + ]); + } catch (\Exception $e) { + return jsonError($e->getMessage()); + } + } + + /** + * 按 p_article_id 查整篇引用校对进度(按 reference_no 分组聚合) + * + * POST/GET: p_article_id(必填) + * + * 返回 list 中每项含:reference_no、p_refer_id、status(数值)、 + * total、pending、done、failed、pass、is_pass、last_updated_at、records + * + * status 数值含义: + * 0 = 待校验 1 = 校对中 2 = 校对完成 3 = 校对失败 + */ + public function referenceCheckProgressAI() + { + $aParam = $this->request->post(); + if (empty($aParam)) { + $aParam = $this->request->param(); + } + + $iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']); + if ($iPArticleId <= 0) { + return json_encode(array('status' => 2, 'msg' => 'Please select an article')); + } + try { + $result = (new ReferenceCheckService())->getProgressByPArticleId($iPArticleId); + return jsonSuccess($result); + } catch (\Exception $e) { + return jsonError($e->getMessage()); + } + } + + /** + * 按 p_article_id 查整篇文章引用校对总状态(用于前端按钮分流) + * + * POST/GET: p_article_id(必填) + * + * 计数维度是「参考文献」(按 reference_no 分组),不是单条校对明细行。 + * 例:50 条参考文献、底层 111 条校对明细时,total = 50。 + * + * 返回 status 数值含义(整篇): + * 0 = 未校对(一条记录都没有) + * 1 = 校对中(至少 1 条参考文献仍有未跑完的明细) + * 2 = 校对完成(所有参考文献全部明细已结束) + * + * 返回字段:p_article_id、status、total、pending、done、failed、progress_percent + * total —— 参考文献条数 + * pending —— 该条参考文献仍有未跑完明细的数量(含"部分跑完") + * done —— 该条参考文献所有明细都 status=1 的数量 + * failed —— 该条参考文献全部跑完且至少 1 条 status=2 的数量 + * pending + done + failed = total;progress_percent = (done+failed)/total + * + * 分组明细请走 referenceCheckProgressAI。 + */ + public function referenceCheckArticleStatusAI() + { + $aParam = $this->request->post(); + if (empty($aParam)) { + $aParam = $this->request->param(); + } + + $iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']); + if ($iPArticleId <= 0) { + return json_encode(array('status' => 2, 'msg' => 'Please select an article')); + } + + try { + $result = (new ReferenceCheckService())->getArticleProgressStatusByPArticleId($iPArticleId); + return jsonSuccess($result); + } catch (\Exception $e) { + return jsonError($e->getMessage()); + } + } + + /** + * 按 p_refer_id 查单条参考文献的校对明细 + * + * POST/GET: p_refer_id(必填) + * + * 返回 list 中每项含:am_id、confidence、reason、is_match、is_pass + * 同时附带上下文:p_refer_id、p_article_id、reference_no、total + */ + public function referenceCheckDetailsAI() + { + $aParam = $this->request->post(); + if (empty($aParam)) { + $aParam = $this->request->param(); + } + + $iPReferId = empty($aParam['p_refer_id']) ? 0 : intval($aParam['p_refer_id']); + if ($iPReferId <= 0) { + return json_encode(array('status' => 2, 'msg' => 'Please select a reference')); + } + + try { + $result = (new ReferenceCheckService())->getCheckDetailsByPReferId($iPReferId); + return jsonSuccess($result); + } catch (\Exception $e) { + return jsonError($e->getMessage()); + } + } + + public function checkReferStatus($p_article_id){ + $list = $this->production_article_refer_obj->where('p_article_id', $p_article_id)->where('state', 0)->select(); + if (!$list) { + return jsonError('references error'); + } + $frag = 1; + foreach ($list as $v) { + if ($v['cs'] == 0) { + $frag = 0; + break; + } + } + return $frag; + } } diff --git a/application/api/job/ReferenceCheck.php b/application/api/job/ReferenceCheck.php index 3b15e6a1..89c5c67d 100644 --- a/application/api/job/ReferenceCheck.php +++ b/application/api/job/ReferenceCheck.php @@ -6,7 +6,6 @@ use think\queue\Job; use app\common\QueueJob; use app\common\QueueRedis; use app\common\ReferenceCheckService; -use app\common\service\LLMService; class ReferenceCheck { @@ -39,14 +38,6 @@ class ReferenceCheck if ($checkId <= 0 && !empty($jobData['data']['check_id'])) { $checkId = intval($jobData['data']['check_id']); } - $sClassName = get_class($this); - $sRedisKey = "queue_job:{$sClassName}:{$checkId}"; - $sRedisValue = uniqid() . '_' . getmypid(); - - if (!$this->oQueueJob->acquireLock($sRedisKey, $sRedisValue, $job)) { - return; - } - if ($checkId <= 0) { $job->delete(); return; @@ -63,44 +54,19 @@ class ReferenceCheck return; } + $sClassName = get_class($this); + $sRedisKey = "queue_job:{$sClassName}:{$checkId}"; + $sRedisValue = uniqid() . '_' . getmypid(); + + $svc = new ReferenceCheckService(); + $svc->clearReferenceCheckQueueLock($checkId); + + if (!$this->oQueueJob->acquireLock($sRedisKey, $sRedisValue, $job)) { + return; + } + try { - $svc = new ReferenceCheckService(); - - $contentA = $svc->resolveMainContentForJob($row); - $contentB = trim((string)(isset($row['refer_text']) ? $row['refer_text'] : '')); - $refer = null; - - if (intval($row['p_refer_id']) > 0) { - $refer = Db::name('production_article_refer') - ->where('p_refer_id', intval($row['p_refer_id'])) - ->where('state', 0) - ->find(); - if ($refer && $contentB === '') { - $contentB = $svc->formatReferForLlm($refer); - } - } - - if ($contentA === '' || $contentB === '') { - $this->markFailed($checkId, 'Missing article_main.content or refer_text'); - $job->delete(); - return; - } - - $llm = new LLMService(); - $llmResult = $llm->checkReference($contentA, $contentB, false); - $canSupport = $svc->parseLlmCanSupport($llmResult); - $confidence = floatval($llmResult['confidence']); - - $svc->updateCheckResult($checkId, [ - 'can_support' => $canSupport ? 1 : 0, - 'is_match' => $canSupport ? 1 : 0, - 'confidence' => $confidence, - 'reason' => isset($llmResult['reason']) ? $llmResult['reason'] : '', - 'status' => 1, - 'error_msg' => '', - ]); - - $svc->maybeEnqueueSecondPass($checkId, $confidence); + $svc->runReferenceCheckOnce($checkId); $amId = intval(isset($row['am_id']) ? $row['am_id'] : 0); if ($amId > 0) { diff --git a/application/api/job/ReferenceCheckTwo.php b/application/api/job/ReferenceCheckTwo.php index b28c9f6c..564af204 100644 --- a/application/api/job/ReferenceCheckTwo.php +++ b/application/api/job/ReferenceCheckTwo.php @@ -88,12 +88,24 @@ class ReferenceCheckTwo $llm = new LLMService(); $llmResult = $llm->checkReference($contentA, $referText, true, $doiBlock); + $requestFailed = !empty($llmResult['request_failed']); $canSupport = $svc->parseLlmCanSupport($llmResult); $tag = $payload['has_abstract'] ? ('[Crossref复核' . ($payload['doi_used'] !== '' ? ' ' . $payload['doi_used'] : '') . ']') : '[Crossref复核-无摘要]'; $reason = $tag . ' ' . (isset($llmResult['reason']) ? $llmResult['reason'] : ''); + // LLM 通讯失败:写 status=2 并抛异常触发队列重试 + if ($requestFailed) { + $svc->updateCheckResult($checkId, [ + 'confidence' => floatval($llmResult['confidence']), + 'reason' => $reason, + 'status' => 2, + 'error_msg' => isset($llmResult['reason']) ? $llmResult['reason'] : 'LLM request failed', + ]); + throw new \RuntimeException(isset($llmResult['reason']) ? $llmResult['reason'] : 'LLM request failed'); + } + $affected = $svc->updateCheckResult($checkId, [ 'can_support' => $canSupport ? 1 : 0, 'is_match' => $canSupport ? 1 : 0, diff --git a/application/common/QueueRedis.php b/application/common/QueueRedis.php index fb9fb5fb..4412d1ba 100644 --- a/application/common/QueueRedis.php +++ b/application/common/QueueRedis.php @@ -80,6 +80,25 @@ class QueueRedis return null; } } + + /** + * 删除一个或多个 Redis 键(用于重检前清除队列任务 completed 标记) + */ + public function deleteRedisKeys(array $keys) + { + $keys = array_values(array_filter($keys, function ($k) { + return $k !== null && $k !== ''; + })); + if (empty($keys)) { + return true; + } + try { + $this->connect()->del(...$keys); + return true; + } catch (\Exception $e) { + return false; + } + } // 安全释放锁(仅当值匹配时删除) public function releaseRedisLock($key, $value) diff --git a/application/common/ReferenceCheckService.php b/application/common/ReferenceCheckService.php index 593f1548..77b44e9d 100644 --- a/application/common/ReferenceCheckService.php +++ b/application/common/ReferenceCheckService.php @@ -5,6 +5,7 @@ namespace app\common; use think\Db; use think\Env; use think\Queue; +use app\common\service\LLMService; /** * 正文 <blue>[n]</blue> 引用与 t_production_article_refer(index+1=n)相关性校对。 @@ -20,6 +21,48 @@ class ReferenceCheckService const AM_STATUS_FAIL = 2; const AM_STATUS_RUNNING = 3; + /** 引用校对进度(按 reference_no 分组聚合后的对外状态) */ + const PROGRESS_PENDING = 0; // 待校验:分组内全部明细 status=0 + const PROGRESS_CHECKING = 1; // 校对中:分组内部分明细已结束、部分仍为 0 + const PROGRESS_COMPLETED = 2; // 校对完成:分组内全部明细 status=1 + const PROGRESS_FAILED = 3; // 校对失败:分组内全部明细已结束,且至少 1 条 status=2 + + /** 整篇文章的引用校对状态(对外整体状态,用于"开始/重置"按钮分流) */ + const ARTICLE_PROGRESS_NONE = 0; // 还没有任何校对记录 + const ARTICLE_PROGRESS_RUNNING = 1; // 至少 1 条 status=0(队列里还有未跑完的) + const ARTICLE_PROGRESS_COMPLETED = 2; // 所有明细 status != 0(全部已完成或失败) + + /** + * 单条校对明细的对外状态(getProgressByPArticleId 返回的 records[i].status) + * + * DB 里 article_reference_check_result.status 只有 0/1/2 三种值; + * RECORD_PROCESSING 是基于 Redis 队列锁 :status='processing' 的瞬时态, + * 并不持久化。worker 进入 LLM 调用期间 DB.status 仍是 0,需要靠队列锁识别。 + */ + const RECORD_PENDING = 0; // 待校对,已入队但还没被 worker 拾起 + const RECORD_COMPLETED = 1; // 校对完成 + const RECORD_FAILED = 2; // 校对失败 + const RECORD_PROCESSING = 3; // 处理中:worker 正在跑 LLM(Redis :status='processing') + + /** LLM 评分(confidence)通过阈值:>= 该值视为"通过" */ + const PASS_CONFIDENCE_THRESHOLD = 0.65; + + /** + * [...] 引用标签内允许的字符类(带 /u 修饰符使用)。 + * + * 除 ASCII 数字、半角逗号、半角连字符、空白外,还兼容常见排版变体: + * , U+FF0C 全角逗号 + * – U+2013 EN DASH + * — U+2014 EM DASH + * − U+2212 MINUS SIGN + * ‐ U+2010 HYPHEN + * ‑ U+2011 NON-BREAKING HYPHEN + * + * 若不支持变体连字符,会导致 [19–21] 这种区间引用整段被 preg 漏掉, + * 进而丢失对应的 reference_no 校对记录。 + */ + const BLUE_TAG_REGEX = '/\[([\d,,\-\x{2013}\x{2014}\x{2212}\x{2010}\x{2011}\s]+)\]<\/blue>/u'; + /** * 兼容无 ?? 的 PHP 版本 */ @@ -166,13 +209,94 @@ class ReferenceCheckService 'queued' => count($checkIds2), ]; } + public function enqueueByPArticle($prod){ + if (empty($prod)) { + throw new \RuntimeException('production_article not found'); + } + $pArticleId = intval($prod['p_article_id']); + $articleId = intval($prod['article_id']); + $referMap = $this->loadReferMapByPArticleId($pArticleId); + + $mains = Db::name('article_main') + ->field('am_id,content,article_id') + ->where('article_id', $articleId) + ->whereIn('state', [0, 2]) + ->order('sort asc') + ->select(); + if (empty($mains)) { + throw new \RuntimeException('article_main is empty'); + } + $queued = 0; + $skipped = 0; + $pendingJobs = []; + $amIdsWithJobs = []; + $now = date('Y-m-d H:i:s'); + foreach ($mains as $main) { + $amId = intval($main['am_id']); + $citations = $this->extractReferences((string)$main['content']); + if (empty($citations)) { + $this->setAmRefCheckStatus($amId, self::AM_STATUS_NONE); + continue; + } + foreach ($citations as $cite) { + foreach ($cite['reference_numbers'] as $refNo) { + $referIndex = $refNo - 1; + if ($referIndex < 0 || !isset($referMap[$referIndex])) { + $skipped++; + continue; + } + $refer = $referMap[$referIndex]; + $referText = $this->formatReferForLlm($refer); + + // [70-73] 展开为 reference_no=70,71,72,73 共 4 条记录;先入队表,再按文献号正序校对 + $checkId = Db::name('article_reference_check_result')->insertGetId([ + 'article_id' => $main['article_id'], + 'p_article_id' => $pArticleId, + 'am_id' => $amId, + 'reference_no' => $refNo, + 'refer_index' => $refNo, + 'origin_text' => $cite['original_text'], + 'refer_text' => $referText, + 'p_refer_id' => $referMap[$referIndex]['p_refer_id'], + 'text_start' => $cite['text_start'], + 'text_end' => $cite['text_end'], + 'created_at' => $now, + 'updated_at' => $now, + ]); + + $pendingJobs[] = [ + 'check_id' => intval($checkId), + 'reference_no' => intval($refNo), + 'am_id' => $amId, + 'text_start' => intval($cite['text_start']), + ]; + $queued++; + $amIdsWithJobs[$amId] = true; + } + } + } + + $checkIds = $this->pushJobsSortedByReferenceNo($pendingJobs); + foreach (array_keys($amIdsWithJobs) as $amId) { + $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING); + } + + return [ + 'article_id' => $articleId, + 'p_article_id' => $pArticleId, + 'queued' => $queued, + 'skipped' => $skipped, + 'check_ids' => $checkIds, + 'queue' => self::QUEUE_NAME, + ]; + } public function enqueueByArticle($articleId){ if ($articleId <= 0) { throw new \InvalidArgumentException('article_id is required'); } $prod = Db::name('production_article') ->where('article_id', $articleId) - ->where('state', [0, 2]) + ->whereIn('state', [0, 2]) ->find(); if (empty($prod)) { throw new \RuntimeException('production_article not found for article_id=' . $articleId); @@ -191,10 +315,9 @@ class ReferenceCheckService } $queued = 0; $skipped = 0; - $checkIds = []; - $delay = 0; + $pendingJobs = []; $amIdsWithJobs = []; - + $now = date('Y-m-d H:i:s'); foreach ($mains as $main) { $amId = intval($main['am_id']); $citations = $this->extractReferences((string)$main['content']); @@ -212,12 +335,11 @@ class ReferenceCheckService $refer = $referMap[$referIndex]; $referText = $this->formatReferForLlm($refer); - $now = date('Y-m-d H:i:s'); - // [70-73] 展开为 reference_no=70,71,72,73 共 4 条记录 + // [70-73] 展开为 reference_no=70,71,72,73 共 4 条记录;先入队表,再按文献号正序校对 $checkId = Db::name('article_reference_check_result')->insertGetId([ 'article_id' => $main['article_id'], 'p_article_id' => $pArticleId, - 'am_id' => intval($main['am_id']), + 'am_id' => $amId, 'reference_no' => $refNo, 'refer_index' => $refNo, 'origin_text' => $cite['original_text'], @@ -229,14 +351,19 @@ class ReferenceCheckService 'updated_at' => $now, ]); - $this->pushJob(intval($checkId), $delay); - $checkIds[] = $checkId; + $pendingJobs[] = [ + 'check_id' => intval($checkId), + 'reference_no' => intval($refNo), + 'am_id' => $amId, + 'text_start' => intval($cite['text_start']), + ]; $queued++; - $delay += 1; $amIdsWithJobs[$amId] = true; } } } + + $checkIds = $this->pushJobsSortedByReferenceNo($pendingJobs); foreach (array_keys($amIdsWithJobs) as $amId) { $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING); } @@ -308,13 +435,464 @@ class ReferenceCheckService ]); } + /** + * 按 p_article_id 清空整篇文章的引用校对明细 + 重置节级 ref_check_status。 + * + * 用于新增/删除文献后,旧的 reference_no 全部错位、原校对结果失效的场景: + * 物理删除后,整篇状态查询自然回到 ARTICLE_PROGRESS_NONE(未校对)。 + * + * @return int 被删除的明细条数 + */ + public function clearArticleChecksByPArticleId($pArticleId) + { + $pArticleId = intval($pArticleId); + if ($pArticleId <= 0) { + return 0; + } + + // 先反查 article_id(用于重置 article_main.ref_check_status 节级状态) + $articleId = intval(Db::name('production_article') + ->where('p_article_id', $pArticleId) + ->whereIn('state', [0, 2]) + ->value('article_id')); + + // 先清掉旧记录对应的队列 Redis 锁,避免在途 worker 写回数据 + $oldIds = Db::name('article_reference_check_result') + ->where('p_article_id', $pArticleId) + ->column('id'); + foreach ($oldIds as $oldId) { + $this->clearReferenceCheckQueueLock(intval($oldId)); + } + + $deleted = Db::name('article_reference_check_result') + ->where('p_article_id', $pArticleId) + ->delete(); + + if ($articleId > 0) { + Db::name('article_main') + ->where('article_id', $articleId) + ->whereIn('state', [0, 2]) + ->update(['ref_check_status' => self::AM_STATUS_NONE]); + } + + return intval($deleted); + } + public function clearArticleChecks($articleId) { - Db::name('article_reference_check_result')->where('article_id', $articleId)->delete(); + $articleId = intval($articleId); + if ($articleId <= 0) { + return 0; + } + + // 先清掉旧记录对应的队列 Redis 锁,否则同 check_id 在 TTL 内不会再次执行 + $oldIds = Db::name('article_reference_check_result') + ->where('article_id', $articleId) + ->column('id'); + foreach ($oldIds as $oldId) { + $this->clearReferenceCheckQueueLock(intval($oldId)); + } + + $deleted = Db::name('article_reference_check_result')->where('article_id', $articleId)->delete(); Db::name('article_main') ->where('article_id', $articleId) ->whereIn('state', [0, 2]) ->update(['ref_check_status' => self::AM_STATUS_NONE]); + + return intval($deleted); + } + + /** + * 文献列表局部挪动后,仅刷新指定 p_refer_id 对应的校对明细 reference_no / refer_index。 + * + * 读 production_article_refer 的最新 index 来算新序号(index + 1),避免外部传入过期值。 + * 仅更新受影响的两条左右记录,降低与并发挪动互相覆盖的风险。 + * + * @param int[] $pReferIds 受影响的 p_refer_id(一般为 2 个:被挪条目 + 其相邻条目) + * @param int $pArticleId 可选:附加 p_article_id 限定,进一步缩小行锁范围 + * @return array{p_refer_ids:int[], affected_rows:int, changes:array} + */ + public function syncReferenceNoByPReferIds(array $pReferIds, $pArticleId = 0) + { + $pReferIds = array_values(array_unique(array_filter(array_map('intval', $pReferIds)))); + $pArticleId = intval($pArticleId); + if (empty($pReferIds)) { + return [ + 'p_refer_ids' => [], + 'affected_rows' => 0, + 'changes' => [], + ]; + } + + $referQuery = Db::name('production_article_refer') + ->field('p_refer_id,p_article_id,index') + ->whereIn('p_refer_id', $pReferIds) + ->where('state', 0); + if ($pArticleId > 0) { + $referQuery->where('p_article_id', $pArticleId); + } + $refers = $referQuery->select(); + if (empty($refers)) { + return [ + 'p_refer_ids' => $pReferIds, + 'affected_rows' => 0, + 'changes' => [], + ]; + } + + $now = date('Y-m-d H:i:s'); + $affected = 0; + $changes = []; + + foreach ($refers as $refer) { + $pReferId = intval($refer['p_refer_id']); + $newNo = intval($refer['index']) + 1; + + $updateQuery = Db::name('article_reference_check_result') + ->where('p_refer_id', $pReferId) + ->where('reference_no', '<>', $newNo); + if ($pArticleId > 0) { + $updateQuery->where('p_article_id', $pArticleId); + } + $rows = $updateQuery->update([ + 'reference_no' => $newNo, + 'refer_index' => $newNo, + 'updated_at' => $now, + ]); + + if ($rows > 0) { + $affected += intval($rows); + $changes[] = [ + 'p_refer_id' => $pReferId, + 'new_ref_no' => $newNo, + 'affected_rows' => intval($rows), + ]; + } + } + + return [ + 'p_refer_ids' => $pReferIds, + 'affected_rows' => $affected, + 'changes' => $changes, + ]; + } + + /** + * 重置整篇稿件的引用校对:删除旧明细 + 清理队列锁 + 全文重新入队校对 + * + * @return array + */ + /** + * 按 p_article_id 查整篇文章的引用校对总状态。 + * + * 统计维度是"参考文献"(按 reference_no 分组),不是单条校对明细行。 + * 例如 50 条参考文献、底层明细 111 条时,total 返回 50。 + * + * 返回 status 数值含义(整篇): + * 0 = ARTICLE_PROGRESS_NONE 一条校对记录都没有 + * 1 = ARTICLE_PROGRESS_RUNNING 至少 1 条参考文献仍有未跑完的明细 + * 2 = ARTICLE_PROGRESS_COMPLETED 所有参考文献的全部明细都已结束 + * + * 每条参考文献按其明细 status 分布落桶(互斥): + * pending —— 组内任一明细 status=0(含部分跑完的"校对中"也归此桶) + * done —— 组内全部明细 status=1 + * failed —— 组内全部明细已结束、至少 1 条 status=2 + * + * pending + done + failed = total;progress_percent = (done + failed) / total。 + * 分组明细请走 getProgressByPArticleId(控制器 referenceCheckProgressAI)。 + * + * @return array{p_article_id:int, status:int, total:int, pending:int, done:int, failed:int, progress_percent:float} + */ + public function getArticleProgressStatusByPArticleId($pArticleId) + { + $pArticleId = intval($pArticleId); + if ($pArticleId <= 0) { + throw new \InvalidArgumentException('p_article_id is required'); + } + + // 一条 SQL 按 reference_no 聚合,组内 status 分布一并算出来; + // 50 条参考文献 → 返回 50 行,PHP 走一次循环分桶即可 + $rows = Db::name('article_reference_check_result') + ->field('reference_no' + . ', SUM(CASE WHEN status = 0 THEN 1 ELSE 0 END) AS pending_cnt' + . ', SUM(CASE WHEN status = 2 THEN 1 ELSE 0 END) AS failed_cnt') + ->where('p_article_id', $pArticleId) + ->group('reference_no') + ->select(); + + if (empty($rows)) { + return [ + 'p_article_id' => $pArticleId, + 'status' => self::ARTICLE_PROGRESS_NONE, + 'total' => 0, + 'pending' => 0, + 'done' => 0, + 'failed' => 0, + 'progress_percent' => 0, + ]; + } + + $pending = 0; + $done = 0; + $failed = 0; + foreach ($rows as $row) { + $pendingCnt = intval($this->arrGet($row, 'pending_cnt', 0)); + $failedCnt = intval($this->arrGet($row, 'failed_cnt', 0)); + if ($pendingCnt > 0) { + $pending++; + } elseif ($failedCnt > 0) { + $failed++; + } else { + $done++; + } + } + + $total = count($rows); + $articleStatus = $pending > 0 + ? self::ARTICLE_PROGRESS_RUNNING + : self::ARTICLE_PROGRESS_COMPLETED; + $finished = $done + $failed; + $progressPercent = round($finished / $total * 100, 1); + + return [ + 'p_article_id' => $pArticleId, + 'status' => $articleStatus, + 'total' => $total, + 'pending' => $pending, + 'done' => $done, + 'failed' => $failed, + 'progress_percent' => $progressPercent, + ]; + } + + /** + * 按 p_article_id 查整篇引用校对进度,按 reference_no 分组聚合状态,并展开每条明细。 + * + * 单条 article_reference_check_result.status: + * 0 = 待校验 1 = 校对完成 2 = 校对失败 + * + * 分组(reference_no)状态(返回字段 status,数值类型): + * 0 = PROGRESS_PENDING 待校验 :分组内全部明细 status=0 + * 1 = PROGRESS_CHECKING 校对中 :分组内部分明细已结束、部分仍为 0 + * 2 = PROGRESS_COMPLETED 校对完成:分组内全部明细 status=1 + * 3 = PROGRESS_FAILED 校对失败:分组内全部明细已结束,且至少 1 条 status=2 + * + * 每个分组还会展开 records 子数组,给出该 reference_no 下每条 check 明细的: + * - status(同上 0/1/2) + * - confidence 评分 + * - is_pass(confidence >= PASS_CONFIDENCE_THRESHOLD 视为通过) + * + * @return array{p_article_id:int, total_groups:int, summary:array, list:array} + */ + public function getProgressByPArticleId($pArticleId) + { + $pArticleId = intval($pArticleId); + if ($pArticleId <= 0) { + throw new \InvalidArgumentException('p_article_id is required'); + } + + $rows = Db::name('article_reference_check_result') + ->field('id,p_refer_id,reference_no,am_id,status,confidence,is_match,reason,text_start,text_end,updated_at') + ->where('p_article_id', $pArticleId) + ->order('reference_no asc, id asc') + ->select(); + + // summary 用数值键,0/1/2/3 对应 PROGRESS_* 常量 + $summary = [ + self::PROGRESS_PENDING => 0, + self::PROGRESS_CHECKING => 0, + self::PROGRESS_COMPLETED => 0, + self::PROGRESS_FAILED => 0, + ]; + if (empty($rows)) { + return [ + 'p_article_id' => $pArticleId, + 'total_groups' => 0, + 'summary' => $summary, + 'list' => [], + ]; + } + + $groups = []; + foreach ($rows as $row) { + $refNo = intval($this->arrGet($row, 'reference_no', 0)); + $pReferId = intval($this->arrGet($row, 'p_refer_id', 0)); + if (!isset($groups[$refNo])) { + $groups[$refNo] = [ + 'reference_no' => $refNo, + 'p_refer_id' => $pReferId, + 'total' => 0, + 'pending' => 0, + 'done' => 0, + 'failed' => 0, + 'pass' => 0, + 'last_updated_at' => '', + 'records' => [], + ]; + } + // 同一 reference_no 理论上只对应一个 p_refer_id;如果出现混淆,保留首次出现的非空 id + if ($groups[$refNo]['p_refer_id'] <= 0 && $pReferId > 0) { + $groups[$refNo]['p_refer_id'] = $pReferId; + } + + $groups[$refNo]['total']++; + $st = intval($this->arrGet($row, 'status', 0)); + if ($st === 0) { + $groups[$refNo]['pending']++; + } elseif ($st === 1) { + $groups[$refNo]['done']++; + } elseif ($st === 2) { + $groups[$refNo]['failed']++; + } + + $upd = (string)$this->arrGet($row, 'updated_at', ''); + if ($upd > $groups[$refNo]['last_updated_at']) { + $groups[$refNo]['last_updated_at'] = $upd; + } + + $confidence = floatval($this->arrGet($row, 'confidence', 0)); + $isPass = $confidence >= self::PASS_CONFIDENCE_THRESHOLD; + if ($isPass) { + $groups[$refNo]['pass']++; + } + + $groups[$refNo]['records'][] = [ + 'check_id' => intval($this->arrGet($row, 'id', 0)), + 'am_id' => intval($this->arrGet($row, 'am_id', 0)), + 'status' => $st, + 'confidence' => $confidence, + 'is_pass' => $isPass, + 'is_match' => intval($this->arrGet($row, 'is_match', 0)), + 'reason' => (string)$this->arrGet($row, 'reason', ''), + 'text_start' => intval($this->arrGet($row, 'text_start', 0)), + 'text_end' => intval($this->arrGet($row, 'text_end', 0)), + 'last_updated_at' => $upd, + ]; + } + + $list = []; + foreach ($groups as $g) { + $total = $g['total']; + $pending = $g['pending']; + $failed = $g['failed']; + $pass = $g['pass']; + + if ($pending === $total) { + $status = self::PROGRESS_PENDING; + } elseif ($pending === 0) { + $status = $failed > 0 ? self::PROGRESS_FAILED : self::PROGRESS_COMPLETED; + } else { + $status = self::PROGRESS_CHECKING; + } + + // 整体通过校验:分组已全部完成(无 pending、无 failed),且每条 confidence >= 0.65 + $g['is_pass'] = ( + $status === self::PROGRESS_COMPLETED + && $total > 0 + && $pass === $total + ); + + $summary[$status]++; + $g['status'] = $status; + $list[] = $g; + } + + usort($list, function ($a, $b) { + return $a['reference_no'] - $b['reference_no']; + }); + + return [ + 'p_article_id' => $pArticleId, + 'total_groups' => count($list), + 'summary' => $summary, + 'list' => $list, + ]; + } + + /** + * 按 p_refer_id 查这条参考文献的所有校对明细。 + * + * 每条 record 返回: + * - am_id 命中的 article_main 主键 + * - confidence 匹配置信度(0~1) + * - reason LLM 给出的判定理由 + * - is_match 是否匹配(来自 article_reference_check_result.is_match) + * - is_pass 是否通过校验(confidence >= PASS_CONFIDENCE_THRESHOLD) + * + * @param int $pReferId production_article_refer.p_refer_id + * @return array{p_refer_id:int, p_article_id:int, reference_no:int, total:int, list:array} + */ + public function getCheckDetailsByPReferId($pReferId) + { + $pReferId = intval($pReferId); + if ($pReferId <= 0) { + throw new \InvalidArgumentException('p_refer_id is required'); + } + + $rows = Db::name('article_reference_check_result') + ->field('id,p_article_id,reference_no,am_id,confidence,is_match,reason') + ->where('p_refer_id', $pReferId) + ->order('id asc') + ->select(); + + $list = []; + $pArticleId = 0; + $referenceNo = 0; + foreach ($rows as $row) { + // 取首条出现的 p_article_id / reference_no 作为该 refer 的上下文 + if ($pArticleId <= 0) { + $pArticleId = intval($this->arrGet($row, 'p_article_id', 0)); + } + if ($referenceNo <= 0) { + $referenceNo = intval($this->arrGet($row, 'reference_no', 0)); + } + + $confidence = floatval($this->arrGet($row, 'confidence', 0)); + $list[] = [ + 'am_id' => intval($this->arrGet($row, 'am_id', 0)), + 'confidence' => $confidence, + 'reason' => (string)$this->arrGet($row, 'reason', ''), + 'is_match' => intval($this->arrGet($row, 'is_match', 0)), + 'is_pass' => $confidence >= self::PASS_CONFIDENCE_THRESHOLD, + ]; + } + + return [ + 'p_refer_id' => $pReferId, + 'p_article_id' => $pArticleId, + 'reference_no' => $referenceNo, + 'total' => count($list), + 'list' => $list, + ]; + } + + public function resetAndRecheckByArticle($aProductionArticle) + { + if (empty($aProductionArticle) || !is_array($aProductionArticle)) { + throw new \InvalidArgumentException('production_article is required'); + } + $pArticleId = intval($this->arrGet($aProductionArticle, 'p_article_id', 0)); + $articleId = intval($this->arrGet($aProductionArticle, 'article_id', 0)); + if ($pArticleId <= 0 || $articleId <= 0) { + throw new \InvalidArgumentException('production_article requires both p_article_id and article_id'); + } + + $existing = Db::name('article_reference_check_result') + ->where('p_article_id', $pArticleId) + ->count(); + if (intval($existing) <= 0) { + throw new \RuntimeException('no existing reference check records for p_article_id=' . $pArticleId); + } + + $cleared = $this->clearArticleChecks($articleId); + $enqueueResult = $this->enqueueByArticle($articleId); + + if (!is_array($enqueueResult)) { + $enqueueResult = []; + } + $enqueueResult['cleared'] = $cleared; + $enqueueResult['reset'] = 1; + return $enqueueResult; } public static function amStatusLabel($status) @@ -571,7 +1149,7 @@ class ReferenceCheckService // 1) 先标记 blue 内各序号(在原文上操作,[70-73] 仅标不合理者如 70、71) preg_match_all( - '/\[([\d,\-\s]+)\]<\/blue>/', + self::BLUE_TAG_REGEX, $html, $matches, PREG_OFFSET_CAPTURE @@ -619,7 +1197,9 @@ class ReferenceCheckService $tagClass = !empty($badNums) ? ' ref-cite-error' : ''; $groupIds = !empty($badNums) - ? implode(',', array_map('intval', array_column($badNums, 'check_id'))) + ? implode(',', array_map(function ($row) { + return (int) $this->resolveCheckRowId($row); + }, $badNums)) : ''; $newHtml = '[' . $innerMarked . ']'; @@ -718,13 +1298,388 @@ class ReferenceCheckService $parts[] = ucfirst($f) . ': ' . $v; } } + $frag = trim((string)$this->arrGet($refer, 'refer_frag', '')); $content = trim((string)$this->arrGet($refer, 'refer_content', '')); - if ($content !== '') { + if ($frag !== '') { + $parts[] = 'Reference: ' . $frag; + } elseif ($content !== '') { $parts[] = 'Reference: ' . $content; } return implode("\n", $parts); } + /** + * 前端修改参考文献后重新校对:仅处理已有校对记录,刷新 refer_text、重置结果并入队;无记录直接返回 + * + * @param int $articleId + * @param int $pReferId t_production_article_refer.p_refer_id(优先) + * @param int $referenceNo 文献序号 index+1(无 p_refer_id 时用) + * @return array + */ + /** + * 编辑某条文献内容后,按 p_refer_id 异步重新校对该文献对应的全部 check 明细 + * + * 流程:刷新 refer_text/refer_index → 重置 status/is_match/confidence/reason + * → 设节级 ref_check_status=RUNNING → 投递到 ReferenceCheck 队列 + * + * 与 recheckByRefer 的差异:本方法**不**在请求内同步跑 LLM,仅入队,立即返回。 + * 前端可调 getProgressByPArticleId 轮询进度。 + * + * @param int $pReferId t_production_article_refer.p_refer_id(必填) + * @param int $pArticleId 可选:传入跳过 refer 表二次查表 + * @return array{p_refer_id:int, p_article_id:int, reference_no:int, reset:int, queued:int, check_ids:int[], queue:string} + */ + public function enqueueRecheckByPReferId($pReferId, $pArticleId = 0) + { + $pReferId = intval($pReferId); + if ($pReferId <= 0) { + throw new \InvalidArgumentException('p_refer_id is required'); + } + + $refer = Db::name('production_article_refer') + ->where('p_refer_id', $pReferId) + ->where('state', 0) + ->find(); + if (empty($refer)) { + throw new \RuntimeException('production_article_refer not found, p_refer_id=' . $pReferId); + } + + $pArticleId = intval($pArticleId); + if ($pArticleId <= 0) { + $pArticleId = intval($this->arrGet($refer, 'p_article_id', 0)); + } + if ($pArticleId <= 0) { + throw new \RuntimeException('p_article_id is missing for p_refer_id=' . $pReferId); + } + + $referenceNo = intval($this->arrGet($refer, 'index', 0)) + 1; + $referText = $this->formatReferForLlm($refer); + $now = date('Y-m-d H:i:s'); + + $rows = Db::name('article_reference_check_result') + ->where('p_article_id', $pArticleId) + ->where('p_refer_id', $pReferId) + ->select(); + + if (empty($rows)) { + return [ + 'p_refer_id' => $pReferId, + 'p_article_id' => $pArticleId, + 'reference_no' => $referenceNo, + 'reset' => 0, + 'queued' => 0, + 'check_ids' => [], + 'queue' => self::QUEUE_NAME, + ]; + } + + $resetFields = [ + 'refer_text' => $referText, + 'refer_index' => $referenceNo, + 'reference_no' => $referenceNo, + 'status' => 0, + 'is_match' => 0, + 'can_support' => 0, + 'confidence' => 0, + 'reason' => '', + 'error_msg' => '', + 'updated_at' => $now, + ]; + + $pendingJobs = []; + $amIds = []; + foreach ($rows as $row) { + $checkId = $this->resolveCheckRowId($row); + Db::name('article_reference_check_result')->where('id', $checkId)->update($resetFields); + // 旧的队列完成标记必须清掉,否则同 check_id 再次投递会被 acquireLock 静默丢弃 + $this->clearReferenceCheckQueueLock($checkId); + $pendingJobs[] = [ + 'check_id' => $checkId, + 'reference_no' => $referenceNo, + 'am_id' => intval($this->arrGet($row, 'am_id', 0)), + 'text_start' => intval($this->arrGet($row, 'text_start', 0)), + ]; + $amId = intval($this->arrGet($row, 'am_id', 0)); + if ($amId > 0) { + $amIds[$amId] = true; + } + } + + foreach (array_keys($amIds) as $amId) { + $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING); + } + + $checkIds = $this->pushJobsSortedByReferenceNo($pendingJobs); + + return [ + 'p_refer_id' => $pReferId, + 'p_article_id' => $pArticleId, + 'reference_no' => $referenceNo, + 'reset' => count($rows), + 'queued' => count($checkIds), + 'check_ids' => $checkIds, + 'queue' => self::QUEUE_NAME, + ]; + } + + public function recheckByRefer($articleId, $pReferId = 0, $referenceNo = 0) + { + $articleId = intval($articleId); + if ($articleId <= 0) { + throw new \InvalidArgumentException('article_id is required'); + } + + $ctx = $this->resolveReferForRecheck($articleId, intval($pReferId), intval($referenceNo)); + $refer = $ctx['refer']; + $pReferId = $ctx['p_refer_id']; + $referenceNo = $ctx['reference_no']; + $pArticleId = $ctx['p_article_id']; + $referText = $this->formatReferForLlm($refer); + $now = date('Y-m-d H:i:s'); + + $rows = Db::name('article_reference_check_result') + ->where('article_id', $articleId) + ->where(function ($query) use ($pReferId, $referenceNo) { + $query->where('p_refer_id', $pReferId)->whereOr('reference_no', $referenceNo); + }) + ->select(); + + if (empty($rows)) { + return [ + 'article_id' => $articleId, + 'p_refer_id' => $pReferId, + 'reference_no' => $referenceNo, + 'reset' => 0, + 'queued' => 0, + 'check_ids' => [], + 'queue' => self::QUEUE_NAME, + ]; + } + + $resetFields = [ + 'refer_text' => $referText, + 'p_refer_id' => $pReferId, + 'p_article_id' => $pArticleId, + 'refer_index' => $referenceNo, + 'status' => 0, + 'is_match' => 0, + 'can_support' => 0, + 'confidence' => 0, + 'reason' => '', + 'error_msg' => '', + 'updated_at' => $now, + ]; + + $pendingJobs = []; + $amIds = []; + foreach ($rows as $row) { + $checkId = $this->resolveCheckRowId($row); + Db::name('article_reference_check_result')->where('id', $checkId)->update($resetFields); + $pendingJobs[] = [ + 'check_id' => $checkId, + 'reference_no' => $referenceNo, + 'am_id' => intval($row['am_id']), + 'text_start' => intval(isset($row['text_start']) ? $row['text_start'] : 0), + ]; + $amId = intval($row['am_id']); + if ($amId > 0) { + $amIds[$amId] = true; + } + } + + foreach (array_keys($amIds) as $amId) { + $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING); + } + + usort($pendingJobs, function ($a, $b) { + if ($a['reference_no'] !== $b['reference_no']) { + return $a['reference_no'] - $b['reference_no']; + } + if ($a['am_id'] !== $b['am_id']) { + return $a['am_id'] - $b['am_id']; + } + return $a['text_start'] - $b['text_start']; + }); + + $checkIds = []; + $results = []; + $failed = []; + foreach ($pendingJobs as $job) { + $checkId = intval($job['check_id']); + $checkIds[] = $checkId; + $this->clearReferenceCheckQueueLock($checkId); + try { + $results[] = $this->runReferenceCheckOnce($checkId); + } catch (\Exception $e) { + $failed[] = [ + 'check_id' => $checkId, + 'error' => $e->getMessage(), + ]; + \think\Log::error('recheckByRefer check_id=' . $checkId . ' ' . $e->getMessage()); + } + } + + foreach (array_keys($amIds) as $amId) { + $this->syncAmRefCheckStatus($amId); + } + + return [ + 'article_id' => $articleId, + 'p_refer_id' => $pReferId, + 'reference_no' => $referenceNo, + 'reset' => count($rows), + 'checked' => count($results), + 'failed' => count($failed), + 'check_ids' => $checkIds, + 'results' => $results, + 'errors' => $failed, + ]; + } + + /** + * 清除队列 Redis 完成标记,避免重检任务被 acquireLock 静默丢弃 + */ + public function clearReferenceCheckQueueLock($checkId) + { + $checkId = intval($checkId); + if ($checkId <= 0) { + return; + } + try { + $keys = []; + foreach (['queue_job', 'queue_job_two'] as $prefix) { + $class = $prefix === 'queue_job_two' + ? 'app\\api\\job\\ReferenceCheckTwo' + : 'app\\api\\job\\ReferenceCheck'; + $base = $prefix . ':' . $class . ':' . $checkId; + $keys[] = $base; + $keys[] = $base . ':status'; + } + QueueRedis::getInstance()->deleteRedisKeys($keys); + } catch (\Exception $e) { + \think\Log::warning('clearReferenceCheckQueueLock id=' . $checkId . ' ' . $e->getMessage()); + } + } + + /** + * 执行一次引用 LLM 校对(同步,写回 article_reference_check_result) + */ + public function runReferenceCheckOnce($checkId) + { + $checkId = intval($checkId); + $row = Db::name('article_reference_check_result')->where('id', $checkId)->find(); + if (empty($row)) { + throw new \RuntimeException('article_reference_check_result not found, id=' . $checkId); + } + + $contentA = $this->resolveMainContentForJob($row); + $refer = null; + if (intval($row['p_refer_id']) > 0) { + $refer = Db::name('production_article_refer') + ->where('p_refer_id', intval($row['p_refer_id'])) + ->where('state', 0) + ->find(); + } + + if ($refer) { + $contentB = $this->formatReferForLlm($refer); + } else { + $contentB = trim((string)$this->arrGet($row, 'refer_text', '')); + } + + if ($contentA === '' || $contentB === '') { + $this->updateCheckResult($checkId, [ + 'status' => 2, + 'error_msg' => 'Missing article_main.content or refer_text', + ]); + throw new \RuntimeException('Missing article_main.content or refer_text'); + } + + $llmResult = (new LLMService())->checkReference($contentA, $contentB, false); + $requestFailed = !empty($llmResult['request_failed']); + $canSupport = $this->parseLlmCanSupport($llmResult); + $confidence = floatval(isset($llmResult['confidence']) ? $llmResult['confidence'] : 0); + $reason = isset($llmResult['reason']) ? $llmResult['reason'] : ''; + + // LLM 通讯失败:写 status=2(校对失败) + error_msg,抛异常让队列 worker 走 release(30) 重试; + // 重试 3 次后 ReferenceCheck::markFailed 会保持 status=2 收尾 + if ($requestFailed) { + $this->updateCheckResult($checkId, [ + 'confidence' => $confidence, + 'reason' => $reason, + 'status' => 2, + 'error_msg' => $reason, + ]); + $this->clearReferenceCheckQueueLock($checkId); + throw new \RuntimeException($reason !== '' ? $reason : 'LLM request failed'); + } + + $this->updateCheckResult($checkId, [ + 'can_support' => $canSupport ? 1 : 0, + 'is_match' => $canSupport ? 1 : 0, + 'confidence' => $confidence, + 'reason' => $reason, + 'status' => 1, + 'error_msg' => '', + ]); + + $this->clearReferenceCheckQueueLock($checkId); + $this->maybeEnqueueSecondPass($checkId, $confidence); + + return [ + 'check_id' => $checkId, + 'can_support' => $canSupport ? 1 : 0, + 'is_match' => $canSupport ? 1 : 0, + 'confidence' => $confidence, + 'reason' => $reason, + ]; + } + + /** + * @return array{refer: array, p_article_id: int, p_refer_id: int, reference_no: int} + */ + private function resolveReferForRecheck($articleId, $pReferId, $referenceNo) + { + $prod = Db::name('production_article') + ->where('article_id', $articleId) + ->whereIn('state', [0, 2]) + ->find(); + if (empty($prod)) { + throw new \RuntimeException('production_article not found for article_id=' . $articleId); + } + + $pArticleId = intval($prod['p_article_id']); + $refer = null; + + if ($pReferId > 0) { + $refer = Db::name('production_article_refer') + ->where('p_refer_id', $pReferId) + ->where('p_article_id', $pArticleId) + ->where('state', 0) + ->find(); + } elseif ($referenceNo > 0) { + $referMap = $this->loadReferMapByPArticleId($pArticleId); + $referIndex = $referenceNo - 1; + if (isset($referMap[$referIndex])) { + $refer = $referMap[$referIndex]; + $pReferId = intval($refer['p_refer_id']); + } + } else { + throw new \InvalidArgumentException('p_refer_id or reference_no is required'); + } + + if (empty($refer)) { + throw new \RuntimeException('production_article_refer not found'); + } + + return [ + 'refer' => $refer, + 'p_article_id' => $pArticleId, + 'p_refer_id' => intval($refer['p_refer_id']), + 'reference_no' => intval($refer['index']) + 1, + ]; + } + /** * 仅使用 refer_doi 字段(二次 Crossref 摘要用) */ @@ -804,7 +1759,7 @@ class ReferenceCheckService return ''; } - $text = preg_replace('/\[([\d,\-\s]+)\]<\/blue>/', '[$1]', $text); + $text = preg_replace(self::BLUE_TAG_REGEX, '[$1]', $text); $text = strip_tags($text); $text = html_entity_decode($text, ENT_QUOTES | ENT_HTML5, 'UTF-8'); $text = preg_replace('/\s+/u', ' ', $text); @@ -1163,6 +2118,7 @@ class ReferenceCheckService return false; } + $this->clearReferenceCheckQueueLock($checkId); $this->pushJob2($checkId, 5); return true; } @@ -1173,7 +2129,7 @@ class ReferenceCheckService public function extractReferences($content) { $result = []; - preg_match_all('/\[([\d,\-\s]+)\]<\/blue>/', $content, $matches, PREG_OFFSET_CAPTURE); + preg_match_all(self::BLUE_TAG_REGEX, $content, $matches, PREG_OFFSET_CAPTURE); if (empty($matches[0])) { return []; } @@ -1353,7 +2309,7 @@ class ReferenceCheckService private function buildCitationContextText($content, $start, $end) { $text = $this->byteSubstr($content, $start, $end); - $text = preg_replace('/\[[\d,\-\s]+\]<\/blue>/', '', $text); + $text = preg_replace(self::BLUE_TAG_REGEX, '', $text); $text = trim(strip_tags($text)); $text = preg_replace('/\s+/u', ' ', $text); $text = ltrim($text, "\xEF\xBB\xBF"); @@ -1493,8 +2449,7 @@ class ReferenceCheckService $start = $tagStart - $maxBytes; $slice = substr($content, $start, $tagStart - $start); if (preg_match('/[.!?。!?]\s+/u', $slice, $m, PREG_OFFSET_CAPTURE)) { - $last = end($m[0]); - $rel = $last[1] + strlen($last[0]); + $rel = $m[0][1] + strlen($m[0][0]); return $start + $rel; } @@ -1540,7 +2495,7 @@ class ReferenceCheckService } $gap = substr($content, $tagEnd, $end - $tagEnd); - $gapText = trim(strip_tags(preg_replace('/\[[\d,\-\s]+\]<\/blue>/', '', $gap))); + $gapText = trim(strip_tags(preg_replace(self::BLUE_TAG_REGEX, '', $gap))); if ($gapText !== '' && !$this->isOnlyPunctuationOrSpace($gapText)) { return $end; } @@ -1551,8 +2506,47 @@ class ReferenceCheckService return $length; } + /** + * 已入库记录按文献编号正序入队(同号按 am_id、正文位置稳定排序) + * + * @param array $rows 元素含 check_id、reference_no,可选 am_id、text_start + */ + private function pushJobsSortedByReferenceNo(array $rows) + { + if (empty($rows)) { + return []; + } + + usort($rows, function ($a, $b) { + if ($a['reference_no'] !== $b['reference_no']) { + return $a['reference_no'] - $b['reference_no']; + } + $amA = isset($a['am_id']) ? intval($a['am_id']) : 0; + $amB = isset($b['am_id']) ? intval($b['am_id']) : 0; + if ($amA !== $amB) { + return $amA - $amB; + } + $posA = isset($a['text_start']) ? intval($a['text_start']) : 0; + $posB = isset($b['text_start']) ? intval($b['text_start']) : 0; + return $posA - $posB; + }); + + $checkIds = []; + $delay = 0; + foreach ($rows as $row) { + $checkId = intval($row['check_id']); + $checkIds[] = $checkId; + $this->pushJob($checkId, $delay); + $delay++; + } + + return $checkIds; + } + private function pushJob($checkId, $delaySeconds = 0) { + $checkId = intval($checkId); + $this->clearReferenceCheckQueueLock($checkId); $jobClass = 'app\api\job\ReferenceCheck@fire'; $data = ['check_id' => $checkId]; try { diff --git a/application/common/service/LLMService.php b/application/common/service/LLMService.php index 01a755df..69f5e61c 100644 --- a/application/common/service/LLMService.php +++ b/application/common/service/LLMService.php @@ -32,11 +32,14 @@ class LLMService */ public function checkReference($contextText, $referText, $isAgain = false, $doiBlock = null) { + // request_failed=true 表示"LLM 通讯/解析层面的失败"(可重试,区别于业务上的"未命中"); + // 上游 runReferenceCheckOnce 会据此把 DB.status 置为 2(失败) 并抛异常触发队列重试 $fallback = [ - 'can_support' => false, - 'is_match' => false, - 'confidence' => 0.0, - 'reason' => 'LLM not configured or request failed', + 'can_support' => false, + 'is_match' => false, + 'confidence' => 0.0, + 'reason' => 'LLM not configured or request failed', + 'request_failed' => true, ]; if ($this->url === '' || $this->model === '') { \think\Log::warning('ReferenceCheck LLM: url or model not configured'); @@ -47,6 +50,7 @@ class LLMService $referText = trim($referText); $doiBlock = trim((string)$doiBlock); if ($contextText === '' || $referText === '') { + // 空文本是入参问题,不是 LLM 故障,不需要重试 return [ 'can_support' => false, 'is_match' => false, @@ -149,10 +153,10 @@ class LLMService 你是文献引用校对助手。判断【正文全文】与【参考文献书目】是否相关、能否用于支撑正文中的引用。 【核心原则:从宽判断,避免误杀】 -默认倾向 can_support=true。只要文献与正文不是「驴唇不对马嘴」,即判为相关、能支撑。 +默认倾向 can_support=true。只要文献与正文不是「风马牛不相及」,即判为相关、能支撑。 不要求变量一致、不要求结论逐条对应、不要求研究设计相同。 -【仅当以下情况才判 can_support=false(驴唇不对马嘴)】 +【仅当以下情况才判 can_support=false(与正文明显无关)】 - 学科/主题完全无关(如正文讲深度学习聚类,文献是糖尿病步态检测)。 - 明显张冠李戴(正文断言 A 疗法的效果,文献研究的是完全不同的 B 问题且无关联)。 - 文献条目与正文讨论的对象/场景毫无交集,且无法作背景或理论引用。 @@ -164,7 +168,7 @@ class LLMService 【confidence 固定档位(禁止其它小数)】 can_support=true:0.65(有关联但较泛)/ 0.78 / 0.85 / 0.92 / 0.98(非常确定相关) -can_support=false:0.15(明确驴唇不对马嘴)/ 0.25 / 0.35 / 0.45(仅当实在无法建立任何合理关联) +can_support=false:0.15(明确风马牛不相及)/ 0.25 / 0.35 / 0.45(仅当实在无法建立任何合理关联) 【输出】仅一行 minified JSON,无 markdown: {"can_support":true|false,"is_match":true|false,"confidence":0.15|0.25|0.35|0.45|0.65|0.78|0.85|0.92|0.98,"reason":"30-80字简体中文"} @@ -176,7 +180,7 @@ PROMPT; { return "【正文全文 article_main.content】\n" . $contextText . "\n\n【参考文献书目 refer_text】\n" . $referText - . "\n\n请从宽判断:非驴唇不对马嘴即 can_support=true,只返回 JSON。"; + . "\n\n请从宽判断:文献与正文非风马牛不相即可判 can_support=true,只返回 JSON。"; } /** 第二次校对:Crossref 摘要(Refer_doi) */ @@ -186,12 +190,12 @@ PROMPT; 你是文献引用二次校对助手。已根据 Refer_doi 从 Crossref(https://api.crossref.org/works/)获取摘要,请结合【正文全文】复核该文献是否相关。 【核心原则:与第一次相同,从宽判断】 -默认倾向 can_support=true。只要 Crossref 摘要(或书目)与正文不是驴唇不对马嘴,即判相关、能支撑。 +默认倾向 can_support=true。只要 Crossref 摘要(或书目)与正文不是风马牛不相及,即判相关、能支撑。 以【Crossref 摘要】为准;摘要与书目冲突时以摘要为准。 【仅当以下情况才判 can_support=false】 - 摘要显示的研究主题/对象/方法与正文讨论内容完全风马牛不相及。 -- 典型驴唇不对马嘴、张冠李戴,且无法解释为背景或泛化引用。 +- 典型风马牛不相及、张冠李戴,且无法解释为背景或泛化引用。 【以下情况均应 can_support=true】 - 摘要与正文属同领域或相近方向,能作背景、理论或方向性支撑。 @@ -217,7 +221,7 @@ PROMPT; . "\n\n【参考文献书目 refer_text】\n" . $referText . "\n\n【Crossref 摘要】(Refer_doi → api.crossref.org/works/)\n" . ($doiBlock !== '' ? $doiBlock : '(未获取到摘要,请结合 refer_text 从宽判断)') - . "\n\n非驴唇不对马嘴即 can_support=true,只返回 JSON。"; + . "\n\n文献与正文非风马牛不相即可判 can_support=true,只返回 JSON。"; } private function buildReferenceCheckSystemPrompt3() {