From c1107780a7c12e47303bab5c9b56e90788d81d39 Mon Sep 17 00:00:00 2001
From: wyn <1074145239@qq.com>
Date: Tue, 26 May 2026 17:33:34 +0800
Subject: [PATCH] =?UTF-8?q?=E5=8F=82=E8=80=83=E6=96=87=E7=8C=AE=E6=9C=AC?=
=?UTF-8?q?=E5=9C=B0=E5=A4=A7=E6=A8=A1=E5=9E=8B=E6=A0=A1=E5=AF=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
application/api/controller/Article.php | 427 --------
application/api/controller/Base.php | 8 +
application/api/controller/Preaccept.php | 50 +
application/api/controller/References.php | 227 ++++
application/api/job/ReferenceCheck.php | 58 +-
application/api/job/ReferenceCheckTwo.php | 12 +
application/common/QueueRedis.php | 19 +
application/common/ReferenceCheckService.php | 1034 +++++++++++++++++-
application/common/service/LLMService.php | 26 +-
9 files changed, 1357 insertions(+), 504 deletions(-)
diff --git a/application/api/controller/Article.php b/application/api/controller/Article.php
index 456fe59c..e47a0473 100644
--- a/application/api/controller/Article.php
+++ b/application/api/controller/Article.php
@@ -10,7 +10,6 @@ use PhpOffice\PhpWord\IOFactory;
use app\common\OpenAi;
use app\common\CrossrefService;
use app\common\PubmedService;
-use app\common\ReferenceCheckService;
/**
* @title 文章接口
@@ -6392,430 +6391,4 @@ class Article extends Base
Db::commit();
return json_encode(['status' => 1,'msg' => 'success']);
}
- /**
- * 调试:预览 article_main 中提取的 blue 引用(不入队)
- * POST: article_id
- */
- public function citationReview()
- {
- $articleId = 7821;//intval($this->request->post('article_id', 0));
- if ($articleId <= 0) {
- return jsonError('article_id is required');
- }
-
- $svc = new ReferenceCheckService();
- $mains = Db::name('article_main')
- ->field('am_id,content')
- ->where('article_id', $articleId)
- ->where('am_id', 127448)
- //->whereIn('state', [0, 2])
- ->order('sort asc')
- ->select();
-
- $preview = [];
- foreach ($mains as $item) {
- $preview[] = [
- 'am_id' => $item['am_id'],
- 'citations' => $svc->extractReferences((string)$item['content']),
- ];
- break;
- }
- return jsonSuccess(['article_id' => $articleId, 'sections' => $preview]);
- }
- /**
- * 提取文献引用
- *
- * @param string $content 原始内容
- * @return array
- */
- function extractReferences($content)
- {
- $result = [];
-
- // 匹配 [57]、[74-79]、[72, 45]
- preg_match_all(
- '/\[([\d,\-\s]+)\]<\/blue>/',
- $content,
- $matches,
- PREG_OFFSET_CAPTURE
- );
-
- if (empty($matches[0])) {
- return [];
- }
-
- foreach ($matches[0] as $index => $match) {
-
- // 完整标签
- $fullTag = $match[0];
-
- // 标签开始位置
- $tagStart = $match[1];
-
- // 标签结束位置
- $tagEnd = $tagStart + strlen($fullTag);
-
- // 文献号原始字符串
- $rawRef = trim($matches[1][$index][0]);
-
- // 展开文献号
- $referenceNumbers = $this->expandReferenceNumbers($rawRef);
-
- /**
- * 获取原文内容
- * 这里按句号切分:
- * 找当前引用所在句子的开始和结束位置
- */
- $sentenceStart = $this->findSentenceStart($content, $tagStart);
- $sentenceEnd = $this->findSentenceEnd($content, $tagEnd);
-
- $originalText = mb_substr(
- $content,
- $sentenceStart,
- $sentenceEnd - $sentenceStart
- );
-
- // 去掉 blue 标签
- $originalText = preg_replace(
- '/\[[\d,\-\s]+\]<\/blue>/',
- '',
- $originalText
- );
-
- $originalText = trim($originalText);
-
- $result[] = [
- 'reference_raw' => $rawRef,
- 'reference_numbers' => $referenceNumbers,
- 'original_text' => $originalText,
-
- // blue标签在整段中的位置
- 'reference_start' => $tagStart,
- 'reference_end' => $tagEnd,
-
- // 原文位置
- 'text_start' => $sentenceStart,
- 'text_end' => $sentenceEnd,
- ];
- }
-
- return $result;
- }
-
- /**
- * 展开文献号
- * 11-15 => [11,12,13,14,15]
- * 72,45 => [72,45]
- * 74-79,81 => [74,75,76,77,78,79,81]
- */
- function expandReferenceNumbers($refStr)
- {
- $numbers = [];
-
- $parts = explode(',', $refStr);
-
- foreach ($parts as $part) {
-
- $part = trim($part);
-
- // 范围
- if (strpos($part, '-') !== false) {
-
- list($start, $end) = explode('-', $part);
-
- $start = intval(trim($start));
- $end = intval(trim($end));
-
- if ($start <= $end) {
- $numbers = array_merge(
- $numbers,
- range($start, $end)
- );
- }
-
- } else {
-
- // 单个数字
- if (is_numeric($part)) {
- $numbers[] = intval($part);
- }
- }
- }
-
- return array_values(array_unique($numbers));
- }
-
- /**
- * 查找句子开始位置
- */
- function findSentenceStart($content, $position)
- {
- $delimiters = ['.', '。', '!', '?', "\n"];
-
- $start = 0;
-
- foreach ($delimiters as $delimiter) {
-
- $pos = strrpos(
- substr($content, 0, $position),
- $delimiter
- );
-
- if ($pos !== false) {
- $start = max($start, $pos + 1);
- }
- }
-
- return $start;
- }
-
- /**
- * 查找句子结束位置
- */
- function findSentenceEnd($content, $position)
- {
- $length = strlen($content);
-
- $endPositions = [];
-
- foreach (['.', '。', '!', '?', "\n"] as $delimiter) {
-
- $pos = strpos($content, $delimiter, $position);
-
- if ($pos !== false) {
- $endPositions[] = $pos + 1;
- }
- }
-
- return empty($endPositions)
- ? $length
- : min($endPositions);
- }
-
- /**
- * 引用相关性:提交单条到队列(异步调用 promotion 同款本地大模型)
- * POST: content_a(必填), content_b(可选), article_id, reference_no(n=index+1), am_id
- */
- public function referenceCheckEnqueue()
- {
- $data = $this->request->post();
- $contentA = trim((string)(isset($data['content_a']) ? $data['content_a'] : ''));
- $contentB = trim((string)(isset($data['content_b']) ? $data['content_b'] : ''));
- $articleId = intval(isset($data['article_id']) ? $data['article_id'] : 0);
- $referenceNo = intval(isset($data['reference_no']) ? $data['reference_no'] : 0);
-
- if ($contentA === '') {
- return jsonError('content_a is required');
- }
-
- try {
- $svc = new ReferenceCheckService();
- $extra = [
- 'reference_no' => $referenceNo,
- 'article_id' => $articleId,
- 'am_id' => intval(isset($data['am_id']) ? $data['am_id'] : 0),
- ];
-
- if ($contentB === '' && $articleId > 0 && $referenceNo > 0) {
- $prod = Db::name('production_article')
- ->where('article_id', $articleId)
- ->where('state', 0)
- ->find();
- if ($prod) {
- $referMap = $svc->loadReferMapByPArticleId(intval($prod['p_article_id']));
- $referIndex = $referenceNo - 1;
- if (isset($referMap[$referIndex])) {
- $refer = $referMap[$referIndex];
- $contentB = $svc->formatReferForLlm($refer);
- $extra['p_article_id'] = intval($prod['p_article_id']);
- $extra['p_refer_id'] = intval($refer['p_refer_id']);
- $extra['refer_index'] = $referIndex;
- }
- }
- }
-
- $result = $svc->enqueue($contentA, $contentB, $extra);
- return jsonSuccess($result);
- } catch (\Exception $e) {
- return jsonError($e->getMessage());
- }
- }
- public function checkOne(){
- $articleId = intval($this->request->param('article_id', 7414));
- $svc = new ReferenceCheckService();
- return jsonSuccess($svc->enqueueSecondPassByArticle($articleId));
- }
- public function referenceCheckEnqueueArticleMain(){
- $amId = 127448;
- $svc = new ReferenceCheckService();
- $main = Db::name('article_main')
- ->field('am_id,content,article_id')
- ->where('am_id', $amId)
- ->whereIn('state', [0, 2])
- ->find();
- $result = $svc->enqueueByArticleMain($main);
- return jsonSuccess($result);
- }
- public function referenceCheckEnqueueArticle(){
- $data = $this->request->get();
- $articleId = intval(isset($data['article_id']) ? $data['article_id'] : 0);
- var_dump($articleId);
- if ($articleId <= 0) {
- return jsonError('article_id is required');
- }
- try {
- $svc = new ReferenceCheckService();
- $result = $svc->enqueueByArticle($articleId);
- return jsonSuccess($result);
- } catch (\Exception $e) {
- return jsonError($e->getMessage());
- }
- }
- /**
- * 按文章批量入队:从 article_main 提取 blue 引用与文献号
- * POST: article_id, clear_previous=1(默认清空该文旧明细后重检)
- */
- public function referenceCheckEnqueueArticle2()
- {
- $data = $this->request->post();
- $articleId = intval(isset($data['article_id']) ? $data['article_id'] : 0);
- if ($articleId <= 0) {
- return jsonError('article_id is required');
- }
-
- try {
- $svc = new ReferenceCheckService();
- $clear = !isset($data['clear_previous']) || intval($data['clear_previous']) === 1;
- $result = $svc->enqueueByArticle($articleId, $clear);
- return jsonSuccess($result);
- } catch (\Exception $e) {
- return jsonError($e->getMessage());
- }
- }
-
- /**
- * 查询单条引用相关性检测结果
- * GET/POST: check_id
- */
- public function referenceCheckResult()
- {
- $checkId = intval($this->request->param('check_id', 0));
- if ($checkId <= 0) {
- return jsonError('check_id is required');
- }
-
- $row = (new ReferenceCheckService())->getResult($checkId);
- if (!$row) {
- return jsonError('result not found');
- }
-
- return jsonSuccess($this->formatReferenceCheckRow($row));
- }
-
- /**
- * 稿件预览:带不合理引用标记的 content(序号 + 引用句)
- * GET/POST: article_id, am_id(可选,只预览某一节)
- */
- public function referenceCheckPreview()
- {
- $articleId = intval($this->request->param('article_id', 0));
- if ($articleId <= 0) {
- return jsonError('article_id is required');
- }
- $amId = intval($this->request->param('am_id', 0));
-
- try {
- $data = (new ReferenceCheckService())->buildArticlePreview($articleId, $amId);
- $data['markup_hint'] = [
- 'ref_no' => '.ref-no-error — 不合理的文献序号(如 70-73 中单独的 70)',
- 'ref_cite' => '.ref-cite-tag.ref-cite-error — 含不合理序号的 blue 引用块',
- 'ref_context'=> '.ref-context-error — 不合理的引用句/上下文',
- ];
- $data['preview_css'] = '.ref-no-error{color:#c00;font-weight:bold;border-bottom:2px wavy #c00}'
- . '.ref-cite-tag.ref-cite-error{background:#ffecec}'
- . '.ref-context-error{background:#fff3cd;outline:1px dashed #e6a700}';
- return jsonSuccess($data);
- } catch (\Exception $e) {
- return jsonError($e->getMessage());
- }
- }
-
- /**
- * 按文章列出引用校对结果([70-73] 为 4 条,reference_no 分别为 70,71,72,73)
- * GET/POST: article_id, status(可选), only_mismatch=1 仅不合理
- */
- public function referenceCheckList()
- {
- $articleId = intval($this->request->param('article_id', 0));
- if ($articleId <= 0) {
- return jsonError('article_id is required');
- }
-
- $status = $this->request->param('status', '');
- $statusFilter = ($status === '' || $status === null) ? -1 : intval($status);
- $onlyMismatch = intval($this->request->param('only_mismatch', 0)) === 1;
- $rows = (new ReferenceCheckService())->listByArticle($articleId, $statusFilter, $onlyMismatch);
-
- $list = [];
- foreach ($rows as $row) {
- $list[] = $this->formatReferenceCheckRow($row);
- }
-
- $mains = Db::name('article_main')
- ->field('am_id,ref_check_status,sort')
- ->where('article_id', $articleId)
- ->whereIn('state', [0, 2])
- ->order('sort asc')
- ->select();
- $sections = [];
- foreach ($mains as $m) {
- $st = intval(isset($m['ref_check_status']) ? $m['ref_check_status'] : 0);
- $sections[] = [
- 'am_id' => intval($m['am_id']),
- 'ref_check_status' => $st,
- 'ref_check_pass' => $st === ReferenceCheckService::AM_STATUS_PASS,
- 'ref_check_label' => ReferenceCheckService::amStatusLabel($st),
- ];
- }
-
- return jsonSuccess([
- 'article_id' => $articleId,
- 'total' => count($list),
- 'list' => $list,
- 'sections' => $sections,
- ]);
- }
-
- private function formatReferenceCheckRow($row)
- {
- $statusMap = array(0 => 'pending', 1 => 'done', 2 => 'failed');
- $amId = intval(isset($row['am_id']) ? $row['am_id'] : 0);
- $citeStart = intval(isset($row['cite_tag_start']) ? $row['cite_tag_start'] : 0);
- $rowStatus = intval($row['status']);
- return array(
- 'check_id' => intval(isset($row['id']) ? $row['id'] : (isset($row['check_id']) ? $row['check_id'] : 0)),
- 'article_id' => intval(isset($row['article_id']) ? $row['article_id'] : 0),
- 'am_id' => $amId,
- 'cite_group_key' => $amId . '_' . $citeStart,
- 'p_refer_id' => intval(isset($row['p_refer_id']) ? $row['p_refer_id'] : 0),
- 'refer_index' => intval(isset($row['refer_index']) ? $row['refer_index'] : 0),
- 'reference_no' => intval(isset($row['reference_no']) ? $row['reference_no'] : 0),
- 'reference_raw' => isset($row['reference_raw']) ? $row['reference_raw'] : '',
- 'cite_tag_start' => $citeStart,
- 'cite_tag_end' => intval(isset($row['cite_tag_end']) ? $row['cite_tag_end'] : 0),
- 'text_start' => intval(isset($row['text_start']) ? $row['text_start'] : 0),
- 'text_end' => intval(isset($row['text_end']) ? $row['text_end'] : 0),
- 'status' => isset($statusMap[$rowStatus]) ? $statusMap[$rowStatus] : 'unknown',
- 'is_match' => intval($row['is_match']),
- 'can_support' => intval(isset($row['can_support']) ? $row['can_support'] : $row['is_match']),
- 'is_reasonable' => intval(isset($row['can_support']) ? $row['can_support'] : $row['is_match']) === 1,
- 'confidence' => floatval($row['confidence']),
- 'reason' => isset($row['reason']) ? $row['reason'] : '',
- 'error_msg' => isset($row['error_msg']) ? $row['error_msg'] : '',
- 'content_a' => isset($row['content_a']) ? $row['content_a'] : '',
- 'content_b' => isset($row['content_b']) ? $row['content_b'] : '',
- 'updated_at' => isset($row['updated_at']) ? $row['updated_at'] : '',
- );
- }
-
}
diff --git a/application/api/controller/Base.php b/application/api/controller/Base.php
index 77e1da7b..3b2c4627 100644
--- a/application/api/controller/Base.php
+++ b/application/api/controller/Base.php
@@ -271,6 +271,14 @@ class Base extends Controller
}
$this->production_article_refer_obj->where('p_article_id', $refer_info['p_article_id'])->where('index', ">", $refer_info['index'])->where('state', 0)->setDec('index');
$this->production_article_refer_obj->where('p_refer_id', $p_refer_id)->update(['state' => 1]);
+
+ // 文献集合已变更,原校对结果的 reference_no 已全部错位,整篇标记为未校对
+ try {
+ (new \app\common\ReferenceCheckService())
+ ->clearArticleChecksByPArticleId(intval($refer_info['p_article_id']));
+ } catch (\Exception $e) {
+ \think\Log::error('delOneRefer clearArticleChecksByPArticleId p_refer_id=' . $p_refer_id . ' ' . $e->getMessage());
+ }
}
diff --git a/application/api/controller/Preaccept.php b/application/api/controller/Preaccept.php
index 9b4867c7..166af09f 100644
--- a/application/api/controller/Preaccept.php
+++ b/application/api/controller/Preaccept.php
@@ -7,6 +7,7 @@ use think\Env;
use think\Queue;
use think\Validate;
use app\common\CrossrefService;
+use app\common\ReferenceCheckService;
class Preaccept extends Base
{
@@ -15,6 +16,26 @@ class Preaccept extends Base
parent::__construct($request);
}
+ /**
+ * 新增/修改导致文献集合改变后,清空整篇校对明细,使文章状态回到"未校对"。
+ * 失败仅记日志,不阻塞主流程。
+ */
+ private function resetArticleChecksOnReferChange($pArticleId, $sourceTag = '')
+ {
+ $pArticleId = intval($pArticleId);
+ if ($pArticleId <= 0) {
+ return;
+ }
+ try {
+ (new ReferenceCheckService())->clearArticleChecksByPArticleId($pArticleId);
+ } catch (\Exception $e) {
+ \think\Log::error(
+ 'resetArticleChecksOnReferChange[' . $sourceTag . '] p_article_id='
+ . $pArticleId . ' ' . $e->getMessage()
+ );
+ }
+ }
+
/**获取文章参考文献列表
* @return \think\response\Json
@@ -92,6 +113,7 @@ class Preaccept extends Base
return jsonError($rule->getError());
}
$this->production_article_refer_obj->where('p_article_id',$data['p_article_id'])->update(["state"=>1]);
+ $this->resetArticleChecksOnReferChange(intval($data['p_article_id']), 'discardRefersByParticleid');
return jsonSuccess([]);
}
@@ -142,6 +164,7 @@ class Preaccept extends Base
}
$adId= $this->production_article_refer_obj->insertGetId($insert);
$this->production_article_refer_obj->where('p_article_id', $p_info['p_article_id'])->where("p_refer_id", "<>", $adId)->where("index", ">", $pre_refer['index'])->where('state', 0)->setInc('index');
+ $this->resetArticleChecksOnReferChange(intval($p_info['p_article_id']), 'addRefer');
return jsonSuccess([]);
@@ -198,6 +221,7 @@ class Preaccept extends Base
}
$adId= $this->production_article_refer_obj->insertGetId($insert);
$this->production_article_refer_obj->where('p_article_id', $p_info['p_article_id'])->where("p_refer_id", "<>", $adId)->where("index", ">", $pre_refer['index'])->where('state', 0)->setInc('index');
+ $this->resetArticleChecksOnReferChange(intval($p_info['p_article_id']), 'addReferByParticleid');
return jsonSuccess([]);
}
@@ -233,6 +257,7 @@ class Preaccept extends Base
$insert['cs'] = 1;
$adId = $this->production_article_refer_obj->insertGetId($insert);
$this->production_article_refer_obj->where('p_article_id', $p_info['p_article_id'])->where("p_refer_id", "<>", $adId)->where("index", ">", $pre_refer['index'])->where('state', 0)->setInc('index');
+ $this->resetArticleChecksOnReferChange(intval($p_info['p_article_id']), 'addReferNotdoi');
return jsonSuccess([]);
}
@@ -462,6 +487,17 @@ class Preaccept extends Base
// }
// $this->production_article_refer_obj->where('p_refer_id', $data['p_refer_id'])->update(['refer_doi' => $data['doi']]);
// my_doiToFrag2($this->production_article_refer_obj->where('p_refer_id', $data['p_refer_id'])->find());
+
+ //文献内容更新成功后异步重检该文献对应的全部校对明细(失败不阻塞主流程)
+ try {
+ (new ReferenceCheckService())->enqueueRecheckByPReferId(
+ intval($data['p_refer_id']),
+ intval($old_refer_info['p_article_id'])
+ );
+ } catch (\Exception $e) {
+ \think\Log::error('editRefer enqueueRecheckByPReferId p_refer_id=' . $data['p_refer_id'] . ' ' . $e->getMessage());
+ }
+
return jsonSuccess([]);
}
@@ -1453,6 +1489,7 @@ class Preaccept extends Base
return jsonError($rule->getError());
}
$refer_info = $this->production_article_refer_obj->where('p_refer_id', $data['p_refer_id'])->find();
+ $sibling_p_refer_id = 0;
if ($data['act'] == "up") {
$up_info = $this->production_article_refer_obj->where('p_article_id', $refer_info['p_article_id'])->where('index', $refer_info['index'] - 1)->where('state', 0)->find();
if (!$up_info) {
@@ -1460,6 +1497,7 @@ class Preaccept extends Base
}
$this->production_article_refer_obj->where('p_refer_id', $up_info['p_refer_id'])->setInc("index");
$this->production_article_refer_obj->where('p_refer_id', $refer_info['p_refer_id'])->setDec("index");
+ $sibling_p_refer_id = intval($up_info['p_refer_id']);
} else {
$down_info = $this->production_article_refer_obj->where('p_article_id', $refer_info['p_article_id'])->where('index', $refer_info['index'] + 1)->where('state', 0)->find();
if (!$down_info) {
@@ -1467,7 +1505,19 @@ class Preaccept extends Base
}
$this->production_article_refer_obj->where('p_refer_id', $refer_info['p_refer_id'])->setInc("index");
$this->production_article_refer_obj->where('p_refer_id', $down_info['p_refer_id'])->setDec("index");
+ $sibling_p_refer_id = intval($down_info['p_refer_id']);
}
+
+ // 仅同步本次交换的两条 p_refer_id 对应的校对明细 reference_no / refer_index
+ try {
+ (new ReferenceCheckService())->syncReferenceNoByPReferIds(
+ [intval($refer_info['p_refer_id']), $sibling_p_refer_id],
+ intval($refer_info['p_article_id'])
+ );
+ } catch (\Exception $e) {
+ \think\Log::error('sortRefer syncReferenceNoByPReferIds: ' . $e->getMessage());
+ }
+
return jsonSuccess([]);
}
diff --git a/application/api/controller/References.php b/application/api/controller/References.php
index 47ae2328..759c63bf 100644
--- a/application/api/controller/References.php
+++ b/application/api/controller/References.php
@@ -1307,4 +1307,231 @@ class References extends Base
}
return json_encode(['status' => 8,'msg' => 'fail']);
}
+ /**
+ * 参考文献第一次校对
+ * @return \think\response\Json
+ */
+ public function allReferenceCheckAI(){
+ //获取参数
+ $aParam = empty($aParam) ? $this->request->post() : $aParam;
+
+ //必填值验证
+ $iPArticleId = empty($aParam['p_article_id']) ? '' : $aParam['p_article_id'];
+ if(empty($iPArticleId)){
+ return json_encode(array('status' => 2,'msg' => 'Please select an article' ));
+ }
+ //查询文章(p_article_id 与 article_id 都要带,下游服务方法两者都用)
+ $aWhere = ['p_article_id' => $iPArticleId,'state' => ['in',[0,2]]];
+ $aProductionArticle = Db::name('production_article')->field('p_article_id,article_id')->where($aWhere)->find();
+ if(empty($aProductionArticle)){
+ return json_encode(array('status' => 3,'msg' => 'No articles found' ));
+ }
+ if($this->checkReferStatus($iPArticleId)==0){
+ return jsonError('请修正完文献内容再进行校对。');
+ }
+ //已存在校对记录则禁止重复执行第一次校对,提示走重置接口
+ $iExisting = Db::name('article_reference_check_result')
+ ->where('p_article_id', $iPArticleId)
+ ->count();
+ if(intval($iExisting) > 0){
+ return jsonError('该文章已存在校对记录,请使用"重置校对"接口重新校对。');
+ }
+ try {
+ $svc = new ReferenceCheckService();
+ $result = $svc->enqueueByPArticle($aProductionArticle);
+ return jsonSuccess($result);
+ } catch (\Exception $e) {
+ return jsonError($e->getMessage());
+ }
+ }
+ /**
+ * 文献校对重置:删除该文章已有的全部校对明细,并重新入队整篇校对
+ * POST/GET: article_id(必填)
+ * @url /api/Article/referenceCheckReset
+ */
+ public function referenceCheckResetAI()
+ {
+ //获取参数
+ $aParam = empty($aParam) ? $this->request->post() : $aParam;
+
+ //必填值验证
+ $iPArticleId = empty($aParam['p_article_id']) ? '' : $aParam['p_article_id'];
+ if(empty($iPArticleId)){
+ return json_encode(array('status' => 2,'msg' => 'Please select an article' ));
+ }
+ //查询文章(p_article_id 与 article_id 都要带,下游服务方法两者都用)
+ $aWhere = ['p_article_id' => $iPArticleId,'state' => ['in',[0,2]]];
+ $aProductionArticle = Db::name('production_article')->field('p_article_id,article_id')->where($aWhere)->find();
+ if(empty($aProductionArticle)){
+ return json_encode(array('status' => 3,'msg' => 'No articles found' ));
+ }
+ if($this->checkReferStatus($iPArticleId)==0){
+ return jsonError('请修正完文献内容再进行校对。');
+ }
+ $iArticleId = empty($aProductionArticle['article_id']) ? 0 : $aProductionArticle['article_id'];
+ if(empty($iArticleId)){
+ return json_encode(array('status' => 4,'msg' => 'Unbound article' ));
+ }
+ try {
+ $result = (new ReferenceCheckService())->resetAndRecheckByArticle($aProductionArticle);
+ return jsonSuccess($result);
+ } catch (\Exception $e) {
+ return jsonError($e->getMessage());
+ }
+ }
+
+ /**
+ * 清空某篇文章下的全部参考文献校对记录(不重新入队)
+ *
+ * 与 referenceCheckResetAI 的区别:reset 是「清空 + 重新校对」,
+ * 这里只做「清空」一步,校对状态回到未校对,等待用户手动再触发。
+ *
+ * POST/GET: p_article_id(必填)
+ */
+ public function referenceCheckClearAI()
+ {
+ $aParam = $this->request->post();
+ if (empty($aParam)) {
+ $aParam = $this->request->param();
+ }
+
+ $iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']);
+ if ($iPArticleId <= 0) {
+ return json_encode(array('status' => 2, 'msg' => 'Please select an article'));
+ }
+
+ // 校验文章存在(与其它校对接口口径一致:state in [0,2])
+ $aProductionArticle = Db::name('production_article')
+ ->field('p_article_id,article_id')
+ ->where(['p_article_id' => $iPArticleId, 'state' => ['in', [0, 2]]])
+ ->find();
+ if (empty($aProductionArticle)) {
+ return json_encode(array('status' => 3, 'msg' => 'No articles found'));
+ }
+
+ try {
+ $deleted = (new ReferenceCheckService())->clearArticleChecksByPArticleId($iPArticleId);
+ return jsonSuccess([
+ 'p_article_id' => $iPArticleId,
+ 'deleted' => intval($deleted),
+ ]);
+ } catch (\Exception $e) {
+ return jsonError($e->getMessage());
+ }
+ }
+
+ /**
+ * 按 p_article_id 查整篇引用校对进度(按 reference_no 分组聚合)
+ *
+ * POST/GET: p_article_id(必填)
+ *
+ * 返回 list 中每项含:reference_no、p_refer_id、status(数值)、
+ * total、pending、done、failed、pass、is_pass、last_updated_at、records
+ *
+ * status 数值含义:
+ * 0 = 待校验 1 = 校对中 2 = 校对完成 3 = 校对失败
+ */
+ public function referenceCheckProgressAI()
+ {
+ $aParam = $this->request->post();
+ if (empty($aParam)) {
+ $aParam = $this->request->param();
+ }
+
+ $iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']);
+ if ($iPArticleId <= 0) {
+ return json_encode(array('status' => 2, 'msg' => 'Please select an article'));
+ }
+ try {
+ $result = (new ReferenceCheckService())->getProgressByPArticleId($iPArticleId);
+ return jsonSuccess($result);
+ } catch (\Exception $e) {
+ return jsonError($e->getMessage());
+ }
+ }
+
+ /**
+ * 按 p_article_id 查整篇文章引用校对总状态(用于前端按钮分流)
+ *
+ * POST/GET: p_article_id(必填)
+ *
+ * 计数维度是「参考文献」(按 reference_no 分组),不是单条校对明细行。
+ * 例:50 条参考文献、底层 111 条校对明细时,total = 50。
+ *
+ * 返回 status 数值含义(整篇):
+ * 0 = 未校对(一条记录都没有)
+ * 1 = 校对中(至少 1 条参考文献仍有未跑完的明细)
+ * 2 = 校对完成(所有参考文献全部明细已结束)
+ *
+ * 返回字段:p_article_id、status、total、pending、done、failed、progress_percent
+ * total —— 参考文献条数
+ * pending —— 该条参考文献仍有未跑完明细的数量(含"部分跑完")
+ * done —— 该条参考文献所有明细都 status=1 的数量
+ * failed —— 该条参考文献全部跑完且至少 1 条 status=2 的数量
+ * pending + done + failed = total;progress_percent = (done+failed)/total
+ *
+ * 分组明细请走 referenceCheckProgressAI。
+ */
+ public function referenceCheckArticleStatusAI()
+ {
+ $aParam = $this->request->post();
+ if (empty($aParam)) {
+ $aParam = $this->request->param();
+ }
+
+ $iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']);
+ if ($iPArticleId <= 0) {
+ return json_encode(array('status' => 2, 'msg' => 'Please select an article'));
+ }
+
+ try {
+ $result = (new ReferenceCheckService())->getArticleProgressStatusByPArticleId($iPArticleId);
+ return jsonSuccess($result);
+ } catch (\Exception $e) {
+ return jsonError($e->getMessage());
+ }
+ }
+
+ /**
+ * 按 p_refer_id 查单条参考文献的校对明细
+ *
+ * POST/GET: p_refer_id(必填)
+ *
+ * 返回 list 中每项含:am_id、confidence、reason、is_match、is_pass
+ * 同时附带上下文:p_refer_id、p_article_id、reference_no、total
+ */
+ public function referenceCheckDetailsAI()
+ {
+ $aParam = $this->request->post();
+ if (empty($aParam)) {
+ $aParam = $this->request->param();
+ }
+
+ $iPReferId = empty($aParam['p_refer_id']) ? 0 : intval($aParam['p_refer_id']);
+ if ($iPReferId <= 0) {
+ return json_encode(array('status' => 2, 'msg' => 'Please select a reference'));
+ }
+
+ try {
+ $result = (new ReferenceCheckService())->getCheckDetailsByPReferId($iPReferId);
+ return jsonSuccess($result);
+ } catch (\Exception $e) {
+ return jsonError($e->getMessage());
+ }
+ }
+
+ public function checkReferStatus($p_article_id){
+ $list = $this->production_article_refer_obj->where('p_article_id', $p_article_id)->where('state', 0)->select();
+ if (!$list) {
+ return jsonError('references error');
+ }
+ $frag = 1;
+ foreach ($list as $v) {
+ if ($v['cs'] == 0) {
+ $frag = 0;
+ break;
+ }
+ }
+ return $frag;
+ }
}
diff --git a/application/api/job/ReferenceCheck.php b/application/api/job/ReferenceCheck.php
index 3b15e6a1..89c5c67d 100644
--- a/application/api/job/ReferenceCheck.php
+++ b/application/api/job/ReferenceCheck.php
@@ -6,7 +6,6 @@ use think\queue\Job;
use app\common\QueueJob;
use app\common\QueueRedis;
use app\common\ReferenceCheckService;
-use app\common\service\LLMService;
class ReferenceCheck
{
@@ -39,14 +38,6 @@ class ReferenceCheck
if ($checkId <= 0 && !empty($jobData['data']['check_id'])) {
$checkId = intval($jobData['data']['check_id']);
}
- $sClassName = get_class($this);
- $sRedisKey = "queue_job:{$sClassName}:{$checkId}";
- $sRedisValue = uniqid() . '_' . getmypid();
-
- if (!$this->oQueueJob->acquireLock($sRedisKey, $sRedisValue, $job)) {
- return;
- }
-
if ($checkId <= 0) {
$job->delete();
return;
@@ -63,44 +54,19 @@ class ReferenceCheck
return;
}
+ $sClassName = get_class($this);
+ $sRedisKey = "queue_job:{$sClassName}:{$checkId}";
+ $sRedisValue = uniqid() . '_' . getmypid();
+
+ $svc = new ReferenceCheckService();
+ $svc->clearReferenceCheckQueueLock($checkId);
+
+ if (!$this->oQueueJob->acquireLock($sRedisKey, $sRedisValue, $job)) {
+ return;
+ }
+
try {
- $svc = new ReferenceCheckService();
-
- $contentA = $svc->resolveMainContentForJob($row);
- $contentB = trim((string)(isset($row['refer_text']) ? $row['refer_text'] : ''));
- $refer = null;
-
- if (intval($row['p_refer_id']) > 0) {
- $refer = Db::name('production_article_refer')
- ->where('p_refer_id', intval($row['p_refer_id']))
- ->where('state', 0)
- ->find();
- if ($refer && $contentB === '') {
- $contentB = $svc->formatReferForLlm($refer);
- }
- }
-
- if ($contentA === '' || $contentB === '') {
- $this->markFailed($checkId, 'Missing article_main.content or refer_text');
- $job->delete();
- return;
- }
-
- $llm = new LLMService();
- $llmResult = $llm->checkReference($contentA, $contentB, false);
- $canSupport = $svc->parseLlmCanSupport($llmResult);
- $confidence = floatval($llmResult['confidence']);
-
- $svc->updateCheckResult($checkId, [
- 'can_support' => $canSupport ? 1 : 0,
- 'is_match' => $canSupport ? 1 : 0,
- 'confidence' => $confidence,
- 'reason' => isset($llmResult['reason']) ? $llmResult['reason'] : '',
- 'status' => 1,
- 'error_msg' => '',
- ]);
-
- $svc->maybeEnqueueSecondPass($checkId, $confidence);
+ $svc->runReferenceCheckOnce($checkId);
$amId = intval(isset($row['am_id']) ? $row['am_id'] : 0);
if ($amId > 0) {
diff --git a/application/api/job/ReferenceCheckTwo.php b/application/api/job/ReferenceCheckTwo.php
index b28c9f6c..564af204 100644
--- a/application/api/job/ReferenceCheckTwo.php
+++ b/application/api/job/ReferenceCheckTwo.php
@@ -88,12 +88,24 @@ class ReferenceCheckTwo
$llm = new LLMService();
$llmResult = $llm->checkReference($contentA, $referText, true, $doiBlock);
+ $requestFailed = !empty($llmResult['request_failed']);
$canSupport = $svc->parseLlmCanSupport($llmResult);
$tag = $payload['has_abstract']
? ('[Crossref复核' . ($payload['doi_used'] !== '' ? ' ' . $payload['doi_used'] : '') . ']')
: '[Crossref复核-无摘要]';
$reason = $tag . ' ' . (isset($llmResult['reason']) ? $llmResult['reason'] : '');
+ // LLM 通讯失败:写 status=2 并抛异常触发队列重试
+ if ($requestFailed) {
+ $svc->updateCheckResult($checkId, [
+ 'confidence' => floatval($llmResult['confidence']),
+ 'reason' => $reason,
+ 'status' => 2,
+ 'error_msg' => isset($llmResult['reason']) ? $llmResult['reason'] : 'LLM request failed',
+ ]);
+ throw new \RuntimeException(isset($llmResult['reason']) ? $llmResult['reason'] : 'LLM request failed');
+ }
+
$affected = $svc->updateCheckResult($checkId, [
'can_support' => $canSupport ? 1 : 0,
'is_match' => $canSupport ? 1 : 0,
diff --git a/application/common/QueueRedis.php b/application/common/QueueRedis.php
index fb9fb5fb..4412d1ba 100644
--- a/application/common/QueueRedis.php
+++ b/application/common/QueueRedis.php
@@ -80,6 +80,25 @@ class QueueRedis
return null;
}
}
+
+ /**
+ * 删除一个或多个 Redis 键(用于重检前清除队列任务 completed 标记)
+ */
+ public function deleteRedisKeys(array $keys)
+ {
+ $keys = array_values(array_filter($keys, function ($k) {
+ return $k !== null && $k !== '';
+ }));
+ if (empty($keys)) {
+ return true;
+ }
+ try {
+ $this->connect()->del(...$keys);
+ return true;
+ } catch (\Exception $e) {
+ return false;
+ }
+ }
// 安全释放锁(仅当值匹配时删除)
public function releaseRedisLock($key, $value)
diff --git a/application/common/ReferenceCheckService.php b/application/common/ReferenceCheckService.php
index 593f1548..77b44e9d 100644
--- a/application/common/ReferenceCheckService.php
+++ b/application/common/ReferenceCheckService.php
@@ -5,6 +5,7 @@ namespace app\common;
use think\Db;
use think\Env;
use think\Queue;
+use app\common\service\LLMService;
/**
* 正文 <blue>[n]</blue> 引用与 t_production_article_refer(index+1=n)相关性校对。
@@ -20,6 +21,48 @@ class ReferenceCheckService
const AM_STATUS_FAIL = 2;
const AM_STATUS_RUNNING = 3;
+ /** 引用校对进度(按 reference_no 分组聚合后的对外状态) */
+ const PROGRESS_PENDING = 0; // 待校验:分组内全部明细 status=0
+ const PROGRESS_CHECKING = 1; // 校对中:分组内部分明细已结束、部分仍为 0
+ const PROGRESS_COMPLETED = 2; // 校对完成:分组内全部明细 status=1
+ const PROGRESS_FAILED = 3; // 校对失败:分组内全部明细已结束,且至少 1 条 status=2
+
+ /** 整篇文章的引用校对状态(对外整体状态,用于"开始/重置"按钮分流) */
+ const ARTICLE_PROGRESS_NONE = 0; // 还没有任何校对记录
+ const ARTICLE_PROGRESS_RUNNING = 1; // 至少 1 条 status=0(队列里还有未跑完的)
+ const ARTICLE_PROGRESS_COMPLETED = 2; // 所有明细 status != 0(全部已完成或失败)
+
+ /**
+ * 单条校对明细的对外状态(getProgressByPArticleId 返回的 records[i].status)
+ *
+ * DB 里 article_reference_check_result.status 只有 0/1/2 三种值;
+ * RECORD_PROCESSING 是基于 Redis 队列锁 :status='processing' 的瞬时态,
+ * 并不持久化。worker 进入 LLM 调用期间 DB.status 仍是 0,需要靠队列锁识别。
+ */
+ const RECORD_PENDING = 0; // 待校对,已入队但还没被 worker 拾起
+ const RECORD_COMPLETED = 1; // 校对完成
+ const RECORD_FAILED = 2; // 校对失败
+ const RECORD_PROCESSING = 3; // 处理中:worker 正在跑 LLM(Redis :status='processing')
+
+ /** LLM 评分(confidence)通过阈值:>= 该值视为"通过" */
+ const PASS_CONFIDENCE_THRESHOLD = 0.65;
+
+ /**
+ * [...] 引用标签内允许的字符类(带 /u 修饰符使用)。
+ *
+ * 除 ASCII 数字、半角逗号、半角连字符、空白外,还兼容常见排版变体:
+ * , U+FF0C 全角逗号
+ * – U+2013 EN DASH
+ * — U+2014 EM DASH
+ * − U+2212 MINUS SIGN
+ * ‐ U+2010 HYPHEN
+ * ‑ U+2011 NON-BREAKING HYPHEN
+ *
+ * 若不支持变体连字符,会导致 [19–21] 这种区间引用整段被 preg 漏掉,
+ * 进而丢失对应的 reference_no 校对记录。
+ */
+ const BLUE_TAG_REGEX = '/\[([\d,,\-\x{2013}\x{2014}\x{2212}\x{2010}\x{2011}\s]+)\]<\/blue>/u';
+
/**
* 兼容无 ?? 的 PHP 版本
*/
@@ -166,13 +209,94 @@ class ReferenceCheckService
'queued' => count($checkIds2),
];
}
+ public function enqueueByPArticle($prod){
+ if (empty($prod)) {
+ throw new \RuntimeException('production_article not found');
+ }
+ $pArticleId = intval($prod['p_article_id']);
+ $articleId = intval($prod['article_id']);
+ $referMap = $this->loadReferMapByPArticleId($pArticleId);
+
+ $mains = Db::name('article_main')
+ ->field('am_id,content,article_id')
+ ->where('article_id', $articleId)
+ ->whereIn('state', [0, 2])
+ ->order('sort asc')
+ ->select();
+ if (empty($mains)) {
+ throw new \RuntimeException('article_main is empty');
+ }
+ $queued = 0;
+ $skipped = 0;
+ $pendingJobs = [];
+ $amIdsWithJobs = [];
+ $now = date('Y-m-d H:i:s');
+ foreach ($mains as $main) {
+ $amId = intval($main['am_id']);
+ $citations = $this->extractReferences((string)$main['content']);
+ if (empty($citations)) {
+ $this->setAmRefCheckStatus($amId, self::AM_STATUS_NONE);
+ continue;
+ }
+ foreach ($citations as $cite) {
+ foreach ($cite['reference_numbers'] as $refNo) {
+ $referIndex = $refNo - 1;
+ if ($referIndex < 0 || !isset($referMap[$referIndex])) {
+ $skipped++;
+ continue;
+ }
+ $refer = $referMap[$referIndex];
+ $referText = $this->formatReferForLlm($refer);
+
+ // [70-73] 展开为 reference_no=70,71,72,73 共 4 条记录;先入队表,再按文献号正序校对
+ $checkId = Db::name('article_reference_check_result')->insertGetId([
+ 'article_id' => $main['article_id'],
+ 'p_article_id' => $pArticleId,
+ 'am_id' => $amId,
+ 'reference_no' => $refNo,
+ 'refer_index' => $refNo,
+ 'origin_text' => $cite['original_text'],
+ 'refer_text' => $referText,
+ 'p_refer_id' => $referMap[$referIndex]['p_refer_id'],
+ 'text_start' => $cite['text_start'],
+ 'text_end' => $cite['text_end'],
+ 'created_at' => $now,
+ 'updated_at' => $now,
+ ]);
+
+ $pendingJobs[] = [
+ 'check_id' => intval($checkId),
+ 'reference_no' => intval($refNo),
+ 'am_id' => $amId,
+ 'text_start' => intval($cite['text_start']),
+ ];
+ $queued++;
+ $amIdsWithJobs[$amId] = true;
+ }
+ }
+ }
+
+ $checkIds = $this->pushJobsSortedByReferenceNo($pendingJobs);
+ foreach (array_keys($amIdsWithJobs) as $amId) {
+ $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING);
+ }
+
+ return [
+ 'article_id' => $articleId,
+ 'p_article_id' => $pArticleId,
+ 'queued' => $queued,
+ 'skipped' => $skipped,
+ 'check_ids' => $checkIds,
+ 'queue' => self::QUEUE_NAME,
+ ];
+ }
public function enqueueByArticle($articleId){
if ($articleId <= 0) {
throw new \InvalidArgumentException('article_id is required');
}
$prod = Db::name('production_article')
->where('article_id', $articleId)
- ->where('state', [0, 2])
+ ->whereIn('state', [0, 2])
->find();
if (empty($prod)) {
throw new \RuntimeException('production_article not found for article_id=' . $articleId);
@@ -191,10 +315,9 @@ class ReferenceCheckService
}
$queued = 0;
$skipped = 0;
- $checkIds = [];
- $delay = 0;
+ $pendingJobs = [];
$amIdsWithJobs = [];
-
+ $now = date('Y-m-d H:i:s');
foreach ($mains as $main) {
$amId = intval($main['am_id']);
$citations = $this->extractReferences((string)$main['content']);
@@ -212,12 +335,11 @@ class ReferenceCheckService
$refer = $referMap[$referIndex];
$referText = $this->formatReferForLlm($refer);
- $now = date('Y-m-d H:i:s');
- // [70-73] 展开为 reference_no=70,71,72,73 共 4 条记录
+ // [70-73] 展开为 reference_no=70,71,72,73 共 4 条记录;先入队表,再按文献号正序校对
$checkId = Db::name('article_reference_check_result')->insertGetId([
'article_id' => $main['article_id'],
'p_article_id' => $pArticleId,
- 'am_id' => intval($main['am_id']),
+ 'am_id' => $amId,
'reference_no' => $refNo,
'refer_index' => $refNo,
'origin_text' => $cite['original_text'],
@@ -229,14 +351,19 @@ class ReferenceCheckService
'updated_at' => $now,
]);
- $this->pushJob(intval($checkId), $delay);
- $checkIds[] = $checkId;
+ $pendingJobs[] = [
+ 'check_id' => intval($checkId),
+ 'reference_no' => intval($refNo),
+ 'am_id' => $amId,
+ 'text_start' => intval($cite['text_start']),
+ ];
$queued++;
- $delay += 1;
$amIdsWithJobs[$amId] = true;
}
}
}
+
+ $checkIds = $this->pushJobsSortedByReferenceNo($pendingJobs);
foreach (array_keys($amIdsWithJobs) as $amId) {
$this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING);
}
@@ -308,13 +435,464 @@ class ReferenceCheckService
]);
}
+ /**
+ * 按 p_article_id 清空整篇文章的引用校对明细 + 重置节级 ref_check_status。
+ *
+ * 用于新增/删除文献后,旧的 reference_no 全部错位、原校对结果失效的场景:
+ * 物理删除后,整篇状态查询自然回到 ARTICLE_PROGRESS_NONE(未校对)。
+ *
+ * @return int 被删除的明细条数
+ */
+ public function clearArticleChecksByPArticleId($pArticleId)
+ {
+ $pArticleId = intval($pArticleId);
+ if ($pArticleId <= 0) {
+ return 0;
+ }
+
+ // 先反查 article_id(用于重置 article_main.ref_check_status 节级状态)
+ $articleId = intval(Db::name('production_article')
+ ->where('p_article_id', $pArticleId)
+ ->whereIn('state', [0, 2])
+ ->value('article_id'));
+
+ // 先清掉旧记录对应的队列 Redis 锁,避免在途 worker 写回数据
+ $oldIds = Db::name('article_reference_check_result')
+ ->where('p_article_id', $pArticleId)
+ ->column('id');
+ foreach ($oldIds as $oldId) {
+ $this->clearReferenceCheckQueueLock(intval($oldId));
+ }
+
+ $deleted = Db::name('article_reference_check_result')
+ ->where('p_article_id', $pArticleId)
+ ->delete();
+
+ if ($articleId > 0) {
+ Db::name('article_main')
+ ->where('article_id', $articleId)
+ ->whereIn('state', [0, 2])
+ ->update(['ref_check_status' => self::AM_STATUS_NONE]);
+ }
+
+ return intval($deleted);
+ }
+
public function clearArticleChecks($articleId)
{
- Db::name('article_reference_check_result')->where('article_id', $articleId)->delete();
+ $articleId = intval($articleId);
+ if ($articleId <= 0) {
+ return 0;
+ }
+
+ // 先清掉旧记录对应的队列 Redis 锁,否则同 check_id 在 TTL 内不会再次执行
+ $oldIds = Db::name('article_reference_check_result')
+ ->where('article_id', $articleId)
+ ->column('id');
+ foreach ($oldIds as $oldId) {
+ $this->clearReferenceCheckQueueLock(intval($oldId));
+ }
+
+ $deleted = Db::name('article_reference_check_result')->where('article_id', $articleId)->delete();
Db::name('article_main')
->where('article_id', $articleId)
->whereIn('state', [0, 2])
->update(['ref_check_status' => self::AM_STATUS_NONE]);
+
+ return intval($deleted);
+ }
+
+ /**
+ * 文献列表局部挪动后,仅刷新指定 p_refer_id 对应的校对明细 reference_no / refer_index。
+ *
+ * 读 production_article_refer 的最新 index 来算新序号(index + 1),避免外部传入过期值。
+ * 仅更新受影响的两条左右记录,降低与并发挪动互相覆盖的风险。
+ *
+ * @param int[] $pReferIds 受影响的 p_refer_id(一般为 2 个:被挪条目 + 其相邻条目)
+ * @param int $pArticleId 可选:附加 p_article_id 限定,进一步缩小行锁范围
+ * @return array{p_refer_ids:int[], affected_rows:int, changes:array}
+ */
+ public function syncReferenceNoByPReferIds(array $pReferIds, $pArticleId = 0)
+ {
+ $pReferIds = array_values(array_unique(array_filter(array_map('intval', $pReferIds))));
+ $pArticleId = intval($pArticleId);
+ if (empty($pReferIds)) {
+ return [
+ 'p_refer_ids' => [],
+ 'affected_rows' => 0,
+ 'changes' => [],
+ ];
+ }
+
+ $referQuery = Db::name('production_article_refer')
+ ->field('p_refer_id,p_article_id,index')
+ ->whereIn('p_refer_id', $pReferIds)
+ ->where('state', 0);
+ if ($pArticleId > 0) {
+ $referQuery->where('p_article_id', $pArticleId);
+ }
+ $refers = $referQuery->select();
+ if (empty($refers)) {
+ return [
+ 'p_refer_ids' => $pReferIds,
+ 'affected_rows' => 0,
+ 'changes' => [],
+ ];
+ }
+
+ $now = date('Y-m-d H:i:s');
+ $affected = 0;
+ $changes = [];
+
+ foreach ($refers as $refer) {
+ $pReferId = intval($refer['p_refer_id']);
+ $newNo = intval($refer['index']) + 1;
+
+ $updateQuery = Db::name('article_reference_check_result')
+ ->where('p_refer_id', $pReferId)
+ ->where('reference_no', '<>', $newNo);
+ if ($pArticleId > 0) {
+ $updateQuery->where('p_article_id', $pArticleId);
+ }
+ $rows = $updateQuery->update([
+ 'reference_no' => $newNo,
+ 'refer_index' => $newNo,
+ 'updated_at' => $now,
+ ]);
+
+ if ($rows > 0) {
+ $affected += intval($rows);
+ $changes[] = [
+ 'p_refer_id' => $pReferId,
+ 'new_ref_no' => $newNo,
+ 'affected_rows' => intval($rows),
+ ];
+ }
+ }
+
+ return [
+ 'p_refer_ids' => $pReferIds,
+ 'affected_rows' => $affected,
+ 'changes' => $changes,
+ ];
+ }
+
+ /**
+ * 重置整篇稿件的引用校对:删除旧明细 + 清理队列锁 + 全文重新入队校对
+ *
+ * @return array
+ */
+ /**
+ * 按 p_article_id 查整篇文章的引用校对总状态。
+ *
+ * 统计维度是"参考文献"(按 reference_no 分组),不是单条校对明细行。
+ * 例如 50 条参考文献、底层明细 111 条时,total 返回 50。
+ *
+ * 返回 status 数值含义(整篇):
+ * 0 = ARTICLE_PROGRESS_NONE 一条校对记录都没有
+ * 1 = ARTICLE_PROGRESS_RUNNING 至少 1 条参考文献仍有未跑完的明细
+ * 2 = ARTICLE_PROGRESS_COMPLETED 所有参考文献的全部明细都已结束
+ *
+ * 每条参考文献按其明细 status 分布落桶(互斥):
+ * pending —— 组内任一明细 status=0(含部分跑完的"校对中"也归此桶)
+ * done —— 组内全部明细 status=1
+ * failed —— 组内全部明细已结束、至少 1 条 status=2
+ *
+ * pending + done + failed = total;progress_percent = (done + failed) / total。
+ * 分组明细请走 getProgressByPArticleId(控制器 referenceCheckProgressAI)。
+ *
+ * @return array{p_article_id:int, status:int, total:int, pending:int, done:int, failed:int, progress_percent:float}
+ */
+ public function getArticleProgressStatusByPArticleId($pArticleId)
+ {
+ $pArticleId = intval($pArticleId);
+ if ($pArticleId <= 0) {
+ throw new \InvalidArgumentException('p_article_id is required');
+ }
+
+ // 一条 SQL 按 reference_no 聚合,组内 status 分布一并算出来;
+ // 50 条参考文献 → 返回 50 行,PHP 走一次循环分桶即可
+ $rows = Db::name('article_reference_check_result')
+ ->field('reference_no'
+ . ', SUM(CASE WHEN status = 0 THEN 1 ELSE 0 END) AS pending_cnt'
+ . ', SUM(CASE WHEN status = 2 THEN 1 ELSE 0 END) AS failed_cnt')
+ ->where('p_article_id', $pArticleId)
+ ->group('reference_no')
+ ->select();
+
+ if (empty($rows)) {
+ return [
+ 'p_article_id' => $pArticleId,
+ 'status' => self::ARTICLE_PROGRESS_NONE,
+ 'total' => 0,
+ 'pending' => 0,
+ 'done' => 0,
+ 'failed' => 0,
+ 'progress_percent' => 0,
+ ];
+ }
+
+ $pending = 0;
+ $done = 0;
+ $failed = 0;
+ foreach ($rows as $row) {
+ $pendingCnt = intval($this->arrGet($row, 'pending_cnt', 0));
+ $failedCnt = intval($this->arrGet($row, 'failed_cnt', 0));
+ if ($pendingCnt > 0) {
+ $pending++;
+ } elseif ($failedCnt > 0) {
+ $failed++;
+ } else {
+ $done++;
+ }
+ }
+
+ $total = count($rows);
+ $articleStatus = $pending > 0
+ ? self::ARTICLE_PROGRESS_RUNNING
+ : self::ARTICLE_PROGRESS_COMPLETED;
+ $finished = $done + $failed;
+ $progressPercent = round($finished / $total * 100, 1);
+
+ return [
+ 'p_article_id' => $pArticleId,
+ 'status' => $articleStatus,
+ 'total' => $total,
+ 'pending' => $pending,
+ 'done' => $done,
+ 'failed' => $failed,
+ 'progress_percent' => $progressPercent,
+ ];
+ }
+
+ /**
+ * 按 p_article_id 查整篇引用校对进度,按 reference_no 分组聚合状态,并展开每条明细。
+ *
+ * 单条 article_reference_check_result.status:
+ * 0 = 待校验 1 = 校对完成 2 = 校对失败
+ *
+ * 分组(reference_no)状态(返回字段 status,数值类型):
+ * 0 = PROGRESS_PENDING 待校验 :分组内全部明细 status=0
+ * 1 = PROGRESS_CHECKING 校对中 :分组内部分明细已结束、部分仍为 0
+ * 2 = PROGRESS_COMPLETED 校对完成:分组内全部明细 status=1
+ * 3 = PROGRESS_FAILED 校对失败:分组内全部明细已结束,且至少 1 条 status=2
+ *
+ * 每个分组还会展开 records 子数组,给出该 reference_no 下每条 check 明细的:
+ * - status(同上 0/1/2)
+ * - confidence 评分
+ * - is_pass(confidence >= PASS_CONFIDENCE_THRESHOLD 视为通过)
+ *
+ * @return array{p_article_id:int, total_groups:int, summary:array, list:array}
+ */
+ public function getProgressByPArticleId($pArticleId)
+ {
+ $pArticleId = intval($pArticleId);
+ if ($pArticleId <= 0) {
+ throw new \InvalidArgumentException('p_article_id is required');
+ }
+
+ $rows = Db::name('article_reference_check_result')
+ ->field('id,p_refer_id,reference_no,am_id,status,confidence,is_match,reason,text_start,text_end,updated_at')
+ ->where('p_article_id', $pArticleId)
+ ->order('reference_no asc, id asc')
+ ->select();
+
+ // summary 用数值键,0/1/2/3 对应 PROGRESS_* 常量
+ $summary = [
+ self::PROGRESS_PENDING => 0,
+ self::PROGRESS_CHECKING => 0,
+ self::PROGRESS_COMPLETED => 0,
+ self::PROGRESS_FAILED => 0,
+ ];
+ if (empty($rows)) {
+ return [
+ 'p_article_id' => $pArticleId,
+ 'total_groups' => 0,
+ 'summary' => $summary,
+ 'list' => [],
+ ];
+ }
+
+ $groups = [];
+ foreach ($rows as $row) {
+ $refNo = intval($this->arrGet($row, 'reference_no', 0));
+ $pReferId = intval($this->arrGet($row, 'p_refer_id', 0));
+ if (!isset($groups[$refNo])) {
+ $groups[$refNo] = [
+ 'reference_no' => $refNo,
+ 'p_refer_id' => $pReferId,
+ 'total' => 0,
+ 'pending' => 0,
+ 'done' => 0,
+ 'failed' => 0,
+ 'pass' => 0,
+ 'last_updated_at' => '',
+ 'records' => [],
+ ];
+ }
+ // 同一 reference_no 理论上只对应一个 p_refer_id;如果出现混淆,保留首次出现的非空 id
+ if ($groups[$refNo]['p_refer_id'] <= 0 && $pReferId > 0) {
+ $groups[$refNo]['p_refer_id'] = $pReferId;
+ }
+
+ $groups[$refNo]['total']++;
+ $st = intval($this->arrGet($row, 'status', 0));
+ if ($st === 0) {
+ $groups[$refNo]['pending']++;
+ } elseif ($st === 1) {
+ $groups[$refNo]['done']++;
+ } elseif ($st === 2) {
+ $groups[$refNo]['failed']++;
+ }
+
+ $upd = (string)$this->arrGet($row, 'updated_at', '');
+ if ($upd > $groups[$refNo]['last_updated_at']) {
+ $groups[$refNo]['last_updated_at'] = $upd;
+ }
+
+ $confidence = floatval($this->arrGet($row, 'confidence', 0));
+ $isPass = $confidence >= self::PASS_CONFIDENCE_THRESHOLD;
+ if ($isPass) {
+ $groups[$refNo]['pass']++;
+ }
+
+ $groups[$refNo]['records'][] = [
+ 'check_id' => intval($this->arrGet($row, 'id', 0)),
+ 'am_id' => intval($this->arrGet($row, 'am_id', 0)),
+ 'status' => $st,
+ 'confidence' => $confidence,
+ 'is_pass' => $isPass,
+ 'is_match' => intval($this->arrGet($row, 'is_match', 0)),
+ 'reason' => (string)$this->arrGet($row, 'reason', ''),
+ 'text_start' => intval($this->arrGet($row, 'text_start', 0)),
+ 'text_end' => intval($this->arrGet($row, 'text_end', 0)),
+ 'last_updated_at' => $upd,
+ ];
+ }
+
+ $list = [];
+ foreach ($groups as $g) {
+ $total = $g['total'];
+ $pending = $g['pending'];
+ $failed = $g['failed'];
+ $pass = $g['pass'];
+
+ if ($pending === $total) {
+ $status = self::PROGRESS_PENDING;
+ } elseif ($pending === 0) {
+ $status = $failed > 0 ? self::PROGRESS_FAILED : self::PROGRESS_COMPLETED;
+ } else {
+ $status = self::PROGRESS_CHECKING;
+ }
+
+ // 整体通过校验:分组已全部完成(无 pending、无 failed),且每条 confidence >= 0.65
+ $g['is_pass'] = (
+ $status === self::PROGRESS_COMPLETED
+ && $total > 0
+ && $pass === $total
+ );
+
+ $summary[$status]++;
+ $g['status'] = $status;
+ $list[] = $g;
+ }
+
+ usort($list, function ($a, $b) {
+ return $a['reference_no'] - $b['reference_no'];
+ });
+
+ return [
+ 'p_article_id' => $pArticleId,
+ 'total_groups' => count($list),
+ 'summary' => $summary,
+ 'list' => $list,
+ ];
+ }
+
+ /**
+ * 按 p_refer_id 查这条参考文献的所有校对明细。
+ *
+ * 每条 record 返回:
+ * - am_id 命中的 article_main 主键
+ * - confidence 匹配置信度(0~1)
+ * - reason LLM 给出的判定理由
+ * - is_match 是否匹配(来自 article_reference_check_result.is_match)
+ * - is_pass 是否通过校验(confidence >= PASS_CONFIDENCE_THRESHOLD)
+ *
+ * @param int $pReferId production_article_refer.p_refer_id
+ * @return array{p_refer_id:int, p_article_id:int, reference_no:int, total:int, list:array}
+ */
+ public function getCheckDetailsByPReferId($pReferId)
+ {
+ $pReferId = intval($pReferId);
+ if ($pReferId <= 0) {
+ throw new \InvalidArgumentException('p_refer_id is required');
+ }
+
+ $rows = Db::name('article_reference_check_result')
+ ->field('id,p_article_id,reference_no,am_id,confidence,is_match,reason')
+ ->where('p_refer_id', $pReferId)
+ ->order('id asc')
+ ->select();
+
+ $list = [];
+ $pArticleId = 0;
+ $referenceNo = 0;
+ foreach ($rows as $row) {
+ // 取首条出现的 p_article_id / reference_no 作为该 refer 的上下文
+ if ($pArticleId <= 0) {
+ $pArticleId = intval($this->arrGet($row, 'p_article_id', 0));
+ }
+ if ($referenceNo <= 0) {
+ $referenceNo = intval($this->arrGet($row, 'reference_no', 0));
+ }
+
+ $confidence = floatval($this->arrGet($row, 'confidence', 0));
+ $list[] = [
+ 'am_id' => intval($this->arrGet($row, 'am_id', 0)),
+ 'confidence' => $confidence,
+ 'reason' => (string)$this->arrGet($row, 'reason', ''),
+ 'is_match' => intval($this->arrGet($row, 'is_match', 0)),
+ 'is_pass' => $confidence >= self::PASS_CONFIDENCE_THRESHOLD,
+ ];
+ }
+
+ return [
+ 'p_refer_id' => $pReferId,
+ 'p_article_id' => $pArticleId,
+ 'reference_no' => $referenceNo,
+ 'total' => count($list),
+ 'list' => $list,
+ ];
+ }
+
+ public function resetAndRecheckByArticle($aProductionArticle)
+ {
+ if (empty($aProductionArticle) || !is_array($aProductionArticle)) {
+ throw new \InvalidArgumentException('production_article is required');
+ }
+ $pArticleId = intval($this->arrGet($aProductionArticle, 'p_article_id', 0));
+ $articleId = intval($this->arrGet($aProductionArticle, 'article_id', 0));
+ if ($pArticleId <= 0 || $articleId <= 0) {
+ throw new \InvalidArgumentException('production_article requires both p_article_id and article_id');
+ }
+
+ $existing = Db::name('article_reference_check_result')
+ ->where('p_article_id', $pArticleId)
+ ->count();
+ if (intval($existing) <= 0) {
+ throw new \RuntimeException('no existing reference check records for p_article_id=' . $pArticleId);
+ }
+
+ $cleared = $this->clearArticleChecks($articleId);
+ $enqueueResult = $this->enqueueByArticle($articleId);
+
+ if (!is_array($enqueueResult)) {
+ $enqueueResult = [];
+ }
+ $enqueueResult['cleared'] = $cleared;
+ $enqueueResult['reset'] = 1;
+ return $enqueueResult;
}
public static function amStatusLabel($status)
@@ -571,7 +1149,7 @@ class ReferenceCheckService
// 1) 先标记 blue 内各序号(在原文上操作,[70-73] 仅标不合理者如 70、71)
preg_match_all(
- '/\[([\d,\-\s]+)\]<\/blue>/',
+ self::BLUE_TAG_REGEX,
$html,
$matches,
PREG_OFFSET_CAPTURE
@@ -619,7 +1197,9 @@ class ReferenceCheckService
$tagClass = !empty($badNums) ? ' ref-cite-error' : '';
$groupIds = !empty($badNums)
- ? implode(',', array_map('intval', array_column($badNums, 'check_id')))
+ ? implode(',', array_map(function ($row) {
+ return (int) $this->resolveCheckRowId($row);
+ }, $badNums))
: '';
$newHtml = '[' . $innerMarked . ']';
@@ -718,13 +1298,388 @@ class ReferenceCheckService
$parts[] = ucfirst($f) . ': ' . $v;
}
}
+ $frag = trim((string)$this->arrGet($refer, 'refer_frag', ''));
$content = trim((string)$this->arrGet($refer, 'refer_content', ''));
- if ($content !== '') {
+ if ($frag !== '') {
+ $parts[] = 'Reference: ' . $frag;
+ } elseif ($content !== '') {
$parts[] = 'Reference: ' . $content;
}
return implode("\n", $parts);
}
+ /**
+ * 前端修改参考文献后重新校对:仅处理已有校对记录,刷新 refer_text、重置结果并入队;无记录直接返回
+ *
+ * @param int $articleId
+ * @param int $pReferId t_production_article_refer.p_refer_id(优先)
+ * @param int $referenceNo 文献序号 index+1(无 p_refer_id 时用)
+ * @return array
+ */
+ /**
+ * 编辑某条文献内容后,按 p_refer_id 异步重新校对该文献对应的全部 check 明细
+ *
+ * 流程:刷新 refer_text/refer_index → 重置 status/is_match/confidence/reason
+ * → 设节级 ref_check_status=RUNNING → 投递到 ReferenceCheck 队列
+ *
+ * 与 recheckByRefer 的差异:本方法**不**在请求内同步跑 LLM,仅入队,立即返回。
+ * 前端可调 getProgressByPArticleId 轮询进度。
+ *
+ * @param int $pReferId t_production_article_refer.p_refer_id(必填)
+ * @param int $pArticleId 可选:传入跳过 refer 表二次查表
+ * @return array{p_refer_id:int, p_article_id:int, reference_no:int, reset:int, queued:int, check_ids:int[], queue:string}
+ */
+ public function enqueueRecheckByPReferId($pReferId, $pArticleId = 0)
+ {
+ $pReferId = intval($pReferId);
+ if ($pReferId <= 0) {
+ throw new \InvalidArgumentException('p_refer_id is required');
+ }
+
+ $refer = Db::name('production_article_refer')
+ ->where('p_refer_id', $pReferId)
+ ->where('state', 0)
+ ->find();
+ if (empty($refer)) {
+ throw new \RuntimeException('production_article_refer not found, p_refer_id=' . $pReferId);
+ }
+
+ $pArticleId = intval($pArticleId);
+ if ($pArticleId <= 0) {
+ $pArticleId = intval($this->arrGet($refer, 'p_article_id', 0));
+ }
+ if ($pArticleId <= 0) {
+ throw new \RuntimeException('p_article_id is missing for p_refer_id=' . $pReferId);
+ }
+
+ $referenceNo = intval($this->arrGet($refer, 'index', 0)) + 1;
+ $referText = $this->formatReferForLlm($refer);
+ $now = date('Y-m-d H:i:s');
+
+ $rows = Db::name('article_reference_check_result')
+ ->where('p_article_id', $pArticleId)
+ ->where('p_refer_id', $pReferId)
+ ->select();
+
+ if (empty($rows)) {
+ return [
+ 'p_refer_id' => $pReferId,
+ 'p_article_id' => $pArticleId,
+ 'reference_no' => $referenceNo,
+ 'reset' => 0,
+ 'queued' => 0,
+ 'check_ids' => [],
+ 'queue' => self::QUEUE_NAME,
+ ];
+ }
+
+ $resetFields = [
+ 'refer_text' => $referText,
+ 'refer_index' => $referenceNo,
+ 'reference_no' => $referenceNo,
+ 'status' => 0,
+ 'is_match' => 0,
+ 'can_support' => 0,
+ 'confidence' => 0,
+ 'reason' => '',
+ 'error_msg' => '',
+ 'updated_at' => $now,
+ ];
+
+ $pendingJobs = [];
+ $amIds = [];
+ foreach ($rows as $row) {
+ $checkId = $this->resolveCheckRowId($row);
+ Db::name('article_reference_check_result')->where('id', $checkId)->update($resetFields);
+ // 旧的队列完成标记必须清掉,否则同 check_id 再次投递会被 acquireLock 静默丢弃
+ $this->clearReferenceCheckQueueLock($checkId);
+ $pendingJobs[] = [
+ 'check_id' => $checkId,
+ 'reference_no' => $referenceNo,
+ 'am_id' => intval($this->arrGet($row, 'am_id', 0)),
+ 'text_start' => intval($this->arrGet($row, 'text_start', 0)),
+ ];
+ $amId = intval($this->arrGet($row, 'am_id', 0));
+ if ($amId > 0) {
+ $amIds[$amId] = true;
+ }
+ }
+
+ foreach (array_keys($amIds) as $amId) {
+ $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING);
+ }
+
+ $checkIds = $this->pushJobsSortedByReferenceNo($pendingJobs);
+
+ return [
+ 'p_refer_id' => $pReferId,
+ 'p_article_id' => $pArticleId,
+ 'reference_no' => $referenceNo,
+ 'reset' => count($rows),
+ 'queued' => count($checkIds),
+ 'check_ids' => $checkIds,
+ 'queue' => self::QUEUE_NAME,
+ ];
+ }
+
+ public function recheckByRefer($articleId, $pReferId = 0, $referenceNo = 0)
+ {
+ $articleId = intval($articleId);
+ if ($articleId <= 0) {
+ throw new \InvalidArgumentException('article_id is required');
+ }
+
+ $ctx = $this->resolveReferForRecheck($articleId, intval($pReferId), intval($referenceNo));
+ $refer = $ctx['refer'];
+ $pReferId = $ctx['p_refer_id'];
+ $referenceNo = $ctx['reference_no'];
+ $pArticleId = $ctx['p_article_id'];
+ $referText = $this->formatReferForLlm($refer);
+ $now = date('Y-m-d H:i:s');
+
+ $rows = Db::name('article_reference_check_result')
+ ->where('article_id', $articleId)
+ ->where(function ($query) use ($pReferId, $referenceNo) {
+ $query->where('p_refer_id', $pReferId)->whereOr('reference_no', $referenceNo);
+ })
+ ->select();
+
+ if (empty($rows)) {
+ return [
+ 'article_id' => $articleId,
+ 'p_refer_id' => $pReferId,
+ 'reference_no' => $referenceNo,
+ 'reset' => 0,
+ 'queued' => 0,
+ 'check_ids' => [],
+ 'queue' => self::QUEUE_NAME,
+ ];
+ }
+
+ $resetFields = [
+ 'refer_text' => $referText,
+ 'p_refer_id' => $pReferId,
+ 'p_article_id' => $pArticleId,
+ 'refer_index' => $referenceNo,
+ 'status' => 0,
+ 'is_match' => 0,
+ 'can_support' => 0,
+ 'confidence' => 0,
+ 'reason' => '',
+ 'error_msg' => '',
+ 'updated_at' => $now,
+ ];
+
+ $pendingJobs = [];
+ $amIds = [];
+ foreach ($rows as $row) {
+ $checkId = $this->resolveCheckRowId($row);
+ Db::name('article_reference_check_result')->where('id', $checkId)->update($resetFields);
+ $pendingJobs[] = [
+ 'check_id' => $checkId,
+ 'reference_no' => $referenceNo,
+ 'am_id' => intval($row['am_id']),
+ 'text_start' => intval(isset($row['text_start']) ? $row['text_start'] : 0),
+ ];
+ $amId = intval($row['am_id']);
+ if ($amId > 0) {
+ $amIds[$amId] = true;
+ }
+ }
+
+ foreach (array_keys($amIds) as $amId) {
+ $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING);
+ }
+
+ usort($pendingJobs, function ($a, $b) {
+ if ($a['reference_no'] !== $b['reference_no']) {
+ return $a['reference_no'] - $b['reference_no'];
+ }
+ if ($a['am_id'] !== $b['am_id']) {
+ return $a['am_id'] - $b['am_id'];
+ }
+ return $a['text_start'] - $b['text_start'];
+ });
+
+ $checkIds = [];
+ $results = [];
+ $failed = [];
+ foreach ($pendingJobs as $job) {
+ $checkId = intval($job['check_id']);
+ $checkIds[] = $checkId;
+ $this->clearReferenceCheckQueueLock($checkId);
+ try {
+ $results[] = $this->runReferenceCheckOnce($checkId);
+ } catch (\Exception $e) {
+ $failed[] = [
+ 'check_id' => $checkId,
+ 'error' => $e->getMessage(),
+ ];
+ \think\Log::error('recheckByRefer check_id=' . $checkId . ' ' . $e->getMessage());
+ }
+ }
+
+ foreach (array_keys($amIds) as $amId) {
+ $this->syncAmRefCheckStatus($amId);
+ }
+
+ return [
+ 'article_id' => $articleId,
+ 'p_refer_id' => $pReferId,
+ 'reference_no' => $referenceNo,
+ 'reset' => count($rows),
+ 'checked' => count($results),
+ 'failed' => count($failed),
+ 'check_ids' => $checkIds,
+ 'results' => $results,
+ 'errors' => $failed,
+ ];
+ }
+
+ /**
+ * 清除队列 Redis 完成标记,避免重检任务被 acquireLock 静默丢弃
+ */
+ public function clearReferenceCheckQueueLock($checkId)
+ {
+ $checkId = intval($checkId);
+ if ($checkId <= 0) {
+ return;
+ }
+ try {
+ $keys = [];
+ foreach (['queue_job', 'queue_job_two'] as $prefix) {
+ $class = $prefix === 'queue_job_two'
+ ? 'app\\api\\job\\ReferenceCheckTwo'
+ : 'app\\api\\job\\ReferenceCheck';
+ $base = $prefix . ':' . $class . ':' . $checkId;
+ $keys[] = $base;
+ $keys[] = $base . ':status';
+ }
+ QueueRedis::getInstance()->deleteRedisKeys($keys);
+ } catch (\Exception $e) {
+ \think\Log::warning('clearReferenceCheckQueueLock id=' . $checkId . ' ' . $e->getMessage());
+ }
+ }
+
+ /**
+ * 执行一次引用 LLM 校对(同步,写回 article_reference_check_result)
+ */
+ public function runReferenceCheckOnce($checkId)
+ {
+ $checkId = intval($checkId);
+ $row = Db::name('article_reference_check_result')->where('id', $checkId)->find();
+ if (empty($row)) {
+ throw new \RuntimeException('article_reference_check_result not found, id=' . $checkId);
+ }
+
+ $contentA = $this->resolveMainContentForJob($row);
+ $refer = null;
+ if (intval($row['p_refer_id']) > 0) {
+ $refer = Db::name('production_article_refer')
+ ->where('p_refer_id', intval($row['p_refer_id']))
+ ->where('state', 0)
+ ->find();
+ }
+
+ if ($refer) {
+ $contentB = $this->formatReferForLlm($refer);
+ } else {
+ $contentB = trim((string)$this->arrGet($row, 'refer_text', ''));
+ }
+
+ if ($contentA === '' || $contentB === '') {
+ $this->updateCheckResult($checkId, [
+ 'status' => 2,
+ 'error_msg' => 'Missing article_main.content or refer_text',
+ ]);
+ throw new \RuntimeException('Missing article_main.content or refer_text');
+ }
+
+ $llmResult = (new LLMService())->checkReference($contentA, $contentB, false);
+ $requestFailed = !empty($llmResult['request_failed']);
+ $canSupport = $this->parseLlmCanSupport($llmResult);
+ $confidence = floatval(isset($llmResult['confidence']) ? $llmResult['confidence'] : 0);
+ $reason = isset($llmResult['reason']) ? $llmResult['reason'] : '';
+
+ // LLM 通讯失败:写 status=2(校对失败) + error_msg,抛异常让队列 worker 走 release(30) 重试;
+ // 重试 3 次后 ReferenceCheck::markFailed 会保持 status=2 收尾
+ if ($requestFailed) {
+ $this->updateCheckResult($checkId, [
+ 'confidence' => $confidence,
+ 'reason' => $reason,
+ 'status' => 2,
+ 'error_msg' => $reason,
+ ]);
+ $this->clearReferenceCheckQueueLock($checkId);
+ throw new \RuntimeException($reason !== '' ? $reason : 'LLM request failed');
+ }
+
+ $this->updateCheckResult($checkId, [
+ 'can_support' => $canSupport ? 1 : 0,
+ 'is_match' => $canSupport ? 1 : 0,
+ 'confidence' => $confidence,
+ 'reason' => $reason,
+ 'status' => 1,
+ 'error_msg' => '',
+ ]);
+
+ $this->clearReferenceCheckQueueLock($checkId);
+ $this->maybeEnqueueSecondPass($checkId, $confidence);
+
+ return [
+ 'check_id' => $checkId,
+ 'can_support' => $canSupport ? 1 : 0,
+ 'is_match' => $canSupport ? 1 : 0,
+ 'confidence' => $confidence,
+ 'reason' => $reason,
+ ];
+ }
+
+ /**
+ * @return array{refer: array, p_article_id: int, p_refer_id: int, reference_no: int}
+ */
+ private function resolveReferForRecheck($articleId, $pReferId, $referenceNo)
+ {
+ $prod = Db::name('production_article')
+ ->where('article_id', $articleId)
+ ->whereIn('state', [0, 2])
+ ->find();
+ if (empty($prod)) {
+ throw new \RuntimeException('production_article not found for article_id=' . $articleId);
+ }
+
+ $pArticleId = intval($prod['p_article_id']);
+ $refer = null;
+
+ if ($pReferId > 0) {
+ $refer = Db::name('production_article_refer')
+ ->where('p_refer_id', $pReferId)
+ ->where('p_article_id', $pArticleId)
+ ->where('state', 0)
+ ->find();
+ } elseif ($referenceNo > 0) {
+ $referMap = $this->loadReferMapByPArticleId($pArticleId);
+ $referIndex = $referenceNo - 1;
+ if (isset($referMap[$referIndex])) {
+ $refer = $referMap[$referIndex];
+ $pReferId = intval($refer['p_refer_id']);
+ }
+ } else {
+ throw new \InvalidArgumentException('p_refer_id or reference_no is required');
+ }
+
+ if (empty($refer)) {
+ throw new \RuntimeException('production_article_refer not found');
+ }
+
+ return [
+ 'refer' => $refer,
+ 'p_article_id' => $pArticleId,
+ 'p_refer_id' => intval($refer['p_refer_id']),
+ 'reference_no' => intval($refer['index']) + 1,
+ ];
+ }
+
/**
* 仅使用 refer_doi 字段(二次 Crossref 摘要用)
*/
@@ -804,7 +1759,7 @@ class ReferenceCheckService
return '';
}
- $text = preg_replace('/\[([\d,\-\s]+)\]<\/blue>/', '[$1]', $text);
+ $text = preg_replace(self::BLUE_TAG_REGEX, '[$1]', $text);
$text = strip_tags($text);
$text = html_entity_decode($text, ENT_QUOTES | ENT_HTML5, 'UTF-8');
$text = preg_replace('/\s+/u', ' ', $text);
@@ -1163,6 +2118,7 @@ class ReferenceCheckService
return false;
}
+ $this->clearReferenceCheckQueueLock($checkId);
$this->pushJob2($checkId, 5);
return true;
}
@@ -1173,7 +2129,7 @@ class ReferenceCheckService
public function extractReferences($content)
{
$result = [];
- preg_match_all('/\[([\d,\-\s]+)\]<\/blue>/', $content, $matches, PREG_OFFSET_CAPTURE);
+ preg_match_all(self::BLUE_TAG_REGEX, $content, $matches, PREG_OFFSET_CAPTURE);
if (empty($matches[0])) {
return [];
}
@@ -1353,7 +2309,7 @@ class ReferenceCheckService
private function buildCitationContextText($content, $start, $end)
{
$text = $this->byteSubstr($content, $start, $end);
- $text = preg_replace('/\[[\d,\-\s]+\]<\/blue>/', '', $text);
+ $text = preg_replace(self::BLUE_TAG_REGEX, '', $text);
$text = trim(strip_tags($text));
$text = preg_replace('/\s+/u', ' ', $text);
$text = ltrim($text, "\xEF\xBB\xBF");
@@ -1493,8 +2449,7 @@ class ReferenceCheckService
$start = $tagStart - $maxBytes;
$slice = substr($content, $start, $tagStart - $start);
if (preg_match('/[.!?。!?]\s+/u', $slice, $m, PREG_OFFSET_CAPTURE)) {
- $last = end($m[0]);
- $rel = $last[1] + strlen($last[0]);
+ $rel = $m[0][1] + strlen($m[0][0]);
return $start + $rel;
}
@@ -1540,7 +2495,7 @@ class ReferenceCheckService
}
$gap = substr($content, $tagEnd, $end - $tagEnd);
- $gapText = trim(strip_tags(preg_replace('/\[[\d,\-\s]+\]<\/blue>/', '', $gap)));
+ $gapText = trim(strip_tags(preg_replace(self::BLUE_TAG_REGEX, '', $gap)));
if ($gapText !== '' && !$this->isOnlyPunctuationOrSpace($gapText)) {
return $end;
}
@@ -1551,8 +2506,47 @@ class ReferenceCheckService
return $length;
}
+ /**
+ * 已入库记录按文献编号正序入队(同号按 am_id、正文位置稳定排序)
+ *
+ * @param array $rows 元素含 check_id、reference_no,可选 am_id、text_start
+ */
+ private function pushJobsSortedByReferenceNo(array $rows)
+ {
+ if (empty($rows)) {
+ return [];
+ }
+
+ usort($rows, function ($a, $b) {
+ if ($a['reference_no'] !== $b['reference_no']) {
+ return $a['reference_no'] - $b['reference_no'];
+ }
+ $amA = isset($a['am_id']) ? intval($a['am_id']) : 0;
+ $amB = isset($b['am_id']) ? intval($b['am_id']) : 0;
+ if ($amA !== $amB) {
+ return $amA - $amB;
+ }
+ $posA = isset($a['text_start']) ? intval($a['text_start']) : 0;
+ $posB = isset($b['text_start']) ? intval($b['text_start']) : 0;
+ return $posA - $posB;
+ });
+
+ $checkIds = [];
+ $delay = 0;
+ foreach ($rows as $row) {
+ $checkId = intval($row['check_id']);
+ $checkIds[] = $checkId;
+ $this->pushJob($checkId, $delay);
+ $delay++;
+ }
+
+ return $checkIds;
+ }
+
private function pushJob($checkId, $delaySeconds = 0)
{
+ $checkId = intval($checkId);
+ $this->clearReferenceCheckQueueLock($checkId);
$jobClass = 'app\api\job\ReferenceCheck@fire';
$data = ['check_id' => $checkId];
try {
diff --git a/application/common/service/LLMService.php b/application/common/service/LLMService.php
index 01a755df..69f5e61c 100644
--- a/application/common/service/LLMService.php
+++ b/application/common/service/LLMService.php
@@ -32,11 +32,14 @@ class LLMService
*/
public function checkReference($contextText, $referText, $isAgain = false, $doiBlock = null)
{
+ // request_failed=true 表示"LLM 通讯/解析层面的失败"(可重试,区别于业务上的"未命中");
+ // 上游 runReferenceCheckOnce 会据此把 DB.status 置为 2(失败) 并抛异常触发队列重试
$fallback = [
- 'can_support' => false,
- 'is_match' => false,
- 'confidence' => 0.0,
- 'reason' => 'LLM not configured or request failed',
+ 'can_support' => false,
+ 'is_match' => false,
+ 'confidence' => 0.0,
+ 'reason' => 'LLM not configured or request failed',
+ 'request_failed' => true,
];
if ($this->url === '' || $this->model === '') {
\think\Log::warning('ReferenceCheck LLM: url or model not configured');
@@ -47,6 +50,7 @@ class LLMService
$referText = trim($referText);
$doiBlock = trim((string)$doiBlock);
if ($contextText === '' || $referText === '') {
+ // 空文本是入参问题,不是 LLM 故障,不需要重试
return [
'can_support' => false,
'is_match' => false,
@@ -149,10 +153,10 @@ class LLMService
你是文献引用校对助手。判断【正文全文】与【参考文献书目】是否相关、能否用于支撑正文中的引用。
【核心原则:从宽判断,避免误杀】
-默认倾向 can_support=true。只要文献与正文不是「驴唇不对马嘴」,即判为相关、能支撑。
+默认倾向 can_support=true。只要文献与正文不是「风马牛不相及」,即判为相关、能支撑。
不要求变量一致、不要求结论逐条对应、不要求研究设计相同。
-【仅当以下情况才判 can_support=false(驴唇不对马嘴)】
+【仅当以下情况才判 can_support=false(与正文明显无关)】
- 学科/主题完全无关(如正文讲深度学习聚类,文献是糖尿病步态检测)。
- 明显张冠李戴(正文断言 A 疗法的效果,文献研究的是完全不同的 B 问题且无关联)。
- 文献条目与正文讨论的对象/场景毫无交集,且无法作背景或理论引用。
@@ -164,7 +168,7 @@ class LLMService
【confidence 固定档位(禁止其它小数)】
can_support=true:0.65(有关联但较泛)/ 0.78 / 0.85 / 0.92 / 0.98(非常确定相关)
-can_support=false:0.15(明确驴唇不对马嘴)/ 0.25 / 0.35 / 0.45(仅当实在无法建立任何合理关联)
+can_support=false:0.15(明确风马牛不相及)/ 0.25 / 0.35 / 0.45(仅当实在无法建立任何合理关联)
【输出】仅一行 minified JSON,无 markdown:
{"can_support":true|false,"is_match":true|false,"confidence":0.15|0.25|0.35|0.45|0.65|0.78|0.85|0.92|0.98,"reason":"30-80字简体中文"}
@@ -176,7 +180,7 @@ PROMPT;
{
return "【正文全文 article_main.content】\n" . $contextText
. "\n\n【参考文献书目 refer_text】\n" . $referText
- . "\n\n请从宽判断:非驴唇不对马嘴即 can_support=true,只返回 JSON。";
+ . "\n\n请从宽判断:文献与正文非风马牛不相即可判 can_support=true,只返回 JSON。";
}
/** 第二次校对:Crossref 摘要(Refer_doi) */
@@ -186,12 +190,12 @@ PROMPT;
你是文献引用二次校对助手。已根据 Refer_doi 从 Crossref(https://api.crossref.org/works/)获取摘要,请结合【正文全文】复核该文献是否相关。
【核心原则:与第一次相同,从宽判断】
-默认倾向 can_support=true。只要 Crossref 摘要(或书目)与正文不是驴唇不对马嘴,即判相关、能支撑。
+默认倾向 can_support=true。只要 Crossref 摘要(或书目)与正文不是风马牛不相及,即判相关、能支撑。
以【Crossref 摘要】为准;摘要与书目冲突时以摘要为准。
【仅当以下情况才判 can_support=false】
- 摘要显示的研究主题/对象/方法与正文讨论内容完全风马牛不相及。
-- 典型驴唇不对马嘴、张冠李戴,且无法解释为背景或泛化引用。
+- 典型风马牛不相及、张冠李戴,且无法解释为背景或泛化引用。
【以下情况均应 can_support=true】
- 摘要与正文属同领域或相近方向,能作背景、理论或方向性支撑。
@@ -217,7 +221,7 @@ PROMPT;
. "\n\n【参考文献书目 refer_text】\n" . $referText
. "\n\n【Crossref 摘要】(Refer_doi → api.crossref.org/works/)\n"
. ($doiBlock !== '' ? $doiBlock : '(未获取到摘要,请结合 refer_text 从宽判断)')
- . "\n\n非驴唇不对马嘴即 can_support=true,只返回 JSON。";
+ . "\n\n文献与正文非风马牛不相即可判 can_support=true,只返回 JSON。";
}
private function buildReferenceCheckSystemPrompt3()
{