From 6fdc4efb6f88bb95b3119689e40fd30b1ed0aa2c Mon Sep 17 00:00:00 2001
From: wyn <1074145239@qq.com>
Date: Mon, 29 Jun 2026 10:23:27 +0800
Subject: [PATCH] =?UTF-8?q?=E5=8F=82=E8=80=83=E6=96=87=E7=8C=AE=E6=A0=A1?=
=?UTF-8?q?=E5=AF=B9=E5=8D=87=E7=BA=A7?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
application/api/controller/Base.php | 101 +
application/api/controller/Preaccept.php | 45 +-
application/api/controller/References.php | 264 ++-
application/command.php | 1 +
application/common/PubmedService.php | 62 +
application/common/ReferenceCheckService.php | 1721 ++++++++++++++---
application/common/mq/RabbitMqConfig.php | 6 +
.../common/mq/ReferenceCheckArticleWorker.php | 4 +
application/common/service/LLMService.php | 843 ++++++--
.../service/ReferenceRelevanceLlmService.php | 6 +
application/extra/rabbitmq.php | 7 +
11 files changed, 2680 insertions(+), 380 deletions(-)
diff --git a/application/api/controller/Base.php b/application/api/controller/Base.php
index 380066a2..6d76cb01 100644
--- a/application/api/controller/Base.php
+++ b/application/api/controller/Base.php
@@ -1178,6 +1178,107 @@ class Base extends Controller
return $ids;
}
+ /**
+ * 解析方括号引用内层(如 1,2 / 3-5),展开为文献序号列表。
+ *
+ * @return int[]
+ */
+ protected function expandCitationBracketNumbers(string $referencePart): array
+ {
+ $referencePart = trim($referencePart);
+ if ($referencePart === '') {
+ return [];
+ }
+ $referencePart = str_replace(
+ [',', '–', '—', '−', '‐', '‑'],
+ [',', '-', '-', '-', '-', '-'],
+ $referencePart
+ );
+ $out = [];
+ $segments = preg_split('/\s*,\s*/', $referencePart);
+ foreach ($segments as $seg) {
+ $seg = trim((string)$seg);
+ if ($seg === '') {
+ continue;
+ }
+ $seg = str_replace(['–', '—', '−', '‐', '‑'], '-', $seg);
+ if (preg_match('/^(\d+)\s*-\s*(\d+)$/', $seg, $m)) {
+ $a = intval($m[1]);
+ $b = intval($m[2]);
+ if ($a > $b) {
+ $t = $a;
+ $a = $b;
+ $b = $t;
+ }
+ for ($i = $a; $i <= $b; $i++) {
+ $out[] = $i;
+ }
+ } else {
+ $n = intval($seg);
+ if ($n > 0) {
+ $out[] = $n;
+ }
+ }
+ }
+ return $out;
+ }
+
+ /**
+ * 从正文片段提取被引用的文献序号(reference_no = index+1)。
+ * 兼容 与 [n] / [n] 两种形态。
+ *
+ * @return int[]
+ */
+ protected function extractCitationRefNosFromMainContent(string $text, int $pArticleId = 0): array
+ {
+ if ($text === '') {
+ return [];
+ }
+
+ $nos = [];
+
+ $pReferIds = $this->extractMyciteIds($text);
+ if (!empty($pReferIds) && $pArticleId > 0) {
+ $refers = Db::name('production_article_refer')
+ ->where('p_article_id', $pArticleId)
+ ->whereIn('p_refer_id', $pReferIds)
+ ->where('state', 0)
+ ->field('p_refer_id,index')
+ ->select();
+ $idToNo = [];
+ foreach ($refers as $row) {
+ $idToNo[intval($row['p_refer_id'])] = intval($row['index']) + 1;
+ }
+ foreach ($pReferIds as $pid) {
+ if (isset($idToNo[$pid])) {
+ $nos[] = $idToNo[$pid];
+ }
+ }
+ }
+
+ if (preg_match_all('/(?:<\s*blue[^>]*>)?\[([^\]]+)\](?:<\/\s*blue\s*>)?/iu', $text, $m)) {
+ foreach ($m[1] as $inner) {
+ $innerNorm = str_replace(
+ [',', '–', '—', '−', '‐', '‑'],
+ [',', '-', '-', '-', '-', '-'],
+ trim((string)$inner)
+ );
+ if (!preg_match('/^[\d\s,\-]+$/u', $innerNorm)) {
+ continue;
+ }
+ foreach ($this->expandCitationBracketNumbers($innerNorm) as $n) {
+ if ($n > 0) {
+ $nos[] = $n;
+ }
+ }
+ }
+ }
+
+ $nos = array_values(array_unique($nos));
+ sort($nos, SORT_NUMERIC);
+ return $nos;
+ }
+
/**
* table_data:二维数组 JSON [[{text,colspan,rowspan},...],...];支持双重 JSON 字符串编码。
*
diff --git a/application/api/controller/Preaccept.php b/application/api/controller/Preaccept.php
index 79794434..9f89b31d 100644
--- a/application/api/controller/Preaccept.php
+++ b/application/api/controller/Preaccept.php
@@ -7,7 +7,7 @@ use think\Env;
use think\Queue;
use think\Validate;
use app\common\CrossrefService;
-use app\common\ReferenceCheckService;
+use app\common\ReferenceRelevanceCheckService;
class Preaccept extends Base
{
@@ -27,7 +27,7 @@ class Preaccept extends Base
return;
}
try {
- (new ReferenceCheckService())->clearArticleChecksByPArticleId($pArticleId);
+ (new ReferenceRelevanceCheckService())->clearArticleChecksByPArticleId($pArticleId);
} catch (\Exception $e) {
\think\Log::error(
'resetArticleChecksOnReferChange[' . $sourceTag . '] p_article_id='
@@ -1220,6 +1220,14 @@ class Preaccept extends Base
$insert['ctime'] = time();
$this->article_main_log_obj->insert($insert);
+// $articleId = intval($am_info['article_id']);
+// $amId = intval($data['am_id']);
+//
+// // 本段引用集合变化(如 10,11 → 11,12)时仅清空该 am_id 下的校对明细
+// if ($this->hasMainCitationChange($old_content, $new_raw_content, $articleId)) {
+// $this->clearMainChecksOnCitationChange($articleId, $amId);
+// }
+
// 判断是否存在“引用删除”(新 content 相对旧 content 缺少 )
$hasCitationDeletion = $this->hasMyciteDeletion($old_content, $new_raw_content);
@@ -1245,6 +1253,39 @@ class Preaccept extends Base
//返回更新数据 20260119 end
}
+ /**
+ * 正文单节保存后,仅清空该 am_id 下已有的引用校对明细(按 article_id 定位)。
+ */
+ private function clearMainChecksOnCitationChange(int $articleId, int $amId)
+ {
+ if ($articleId <= 0 || $amId <= 0) {
+ return;
+ }
+ try {
+ (new ReferenceCheckService())->clearChecksByAmId($articleId, $amId);
+ } catch (\Exception $e) {
+ \think\Log::error(
+ 'clearMainChecksOnCitationChange article_id=' . $articleId
+ . ' am_id=' . $amId . ' ' . $e->getMessage()
+ );
+ }
+ }
+
+ /**
+ * 本段正文引用集合是否变化(增删改任一即 true)。
+ * old 多为库内 [n],new 多为编辑器提交的 。
+ */
+ private function hasMainCitationChange(string $oldContent, string $newContent, int $articleId): bool
+ {
+ $pArticleId = intval(Db::name('production_article')
+ ->where('article_id', $articleId)
+ ->whereIn('state', [0, 2])
+ ->value('p_article_id'));
+ $oldNos = $this->extractCitationRefNosFromMainContent($oldContent, $pArticleId);
+ $newNos = $this->extractCitationRefNosFromMainContent($newContent, $pArticleId);
+ return $oldNos !== $newNos;
+ }
+
/**
* 是否发生 删除(new 相对 old 少了任意引用 id)
*/
diff --git a/application/api/controller/References.php b/application/api/controller/References.php
index 331edd62..ea539ed6 100644
--- a/application/api/controller/References.php
+++ b/application/api/controller/References.php
@@ -12,6 +12,8 @@ use think\Db;
use think\Env;
use think\Queue;
use app\common\ReferenceCheckService;
+use app\common\ReferenceRelevanceCheckService;
+use app\common\DbReconnectHelper;
/**
* @title 参考文献
* @description 相关方法汇总
@@ -1309,11 +1311,195 @@ class References extends Base
}
return json_encode(['status' => 8,'msg' => 'fail']);
}
+ // ============================================================
+ // 参考文献「主题相关性」校对(独立模块,RabbitMQ 链式消费)
+ // 表:t_article_reference_relevance_check_result / t_article_reference_relevance_check_batch
+ // 消费:php think reference_relevance:mq-consume
+ // ============================================================
+
/**
- * 参考文献第一次校对
+ * 启动整篇参考文献相关性校对
+ * POST: p_article_id(必填)
+ *
+ * 文献摘要/内容优先读 t_production_article_refer.abstract_text、refer_content_cleaned;
+ * 二者都为空时在校对执行阶段抓取并回写 refer 表,校对时始终从 refer 表读取。
+ */
+ public function allReferenceCheckAI()
+ {
+ $aParam = $this->request->post();
+ $iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']);
+ if ($iPArticleId <= 0) {
+ return jsonError('Please select an article');
+ }
+
+ $aProductionArticle = Db::name('production_article')
+ ->field('p_article_id,article_id')
+ ->where(['p_article_id' => $iPArticleId, 'state' => ['in', [0, 2]]])
+ ->find();
+ if (empty($aProductionArticle)) {
+ return jsonError('No articles found');
+ }
+ if ($this->checkReferStatus($iPArticleId) == 0) {
+ return jsonError('Please correct the reference content before running the check.');
+ }
+
+ $existing = Db::name('article_reference_relevance_check_result')
+ ->where('p_article_id', $iPArticleId)
+ ->count();
+ if (intval($existing) > 0) {
+ return jsonError('This article already has relevance check records. Use referenceRelevanceCheckResetAI to rerun.');
+ }
+
+ try {
+ DbReconnectHelper::ensure();
+ $result = (new ReferenceRelevanceCheckService())->enqueueByPArticle($aProductionArticle);
+ if (empty($result['check_ids'])) {
+ return jsonError('No reference citations were found in the article.');
+ }
+ return jsonSuccess($result);
+ } catch (\Exception $e) {
+ return jsonError($e->getMessage());
+ }
+ }
+
+ /**
+ * 相关性校对进度
+ * POST: p_article_id
+ */
+ public function referenceRelevanceCheckProgressAI()
+ {
+ $aParam = $this->request->post();
+ $iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']);
+ if ($iPArticleId <= 0) {
+ return jsonError('p_article_id is required');
+ }
+ try {
+ $result = (new ReferenceRelevanceCheckService())->getProgressByPArticleId($iPArticleId);
+ return jsonSuccess($result);
+ } catch (\Exception $e) {
+ return jsonError($e->getMessage());
+ }
+ }
+
+ /**
+ * 按 p_article_id 查整篇文章相关性校对总状态(用于前端按钮分流)
+ *
+ * POST/GET: p_article_id(必填)
+ *
+ * 返回 status:0=未校对 1=校对中 2=校对完成
+ * 计数维度为参考文献(按 reference_no 分组),与 referenceRelevanceCheckProgressAI 一致。
+ */
+ public function referenceRelevanceCheckArticleStatusAI()
+ {
+ $aParam = $this->request->post();
+ if (empty($aParam)) {
+ $aParam = $this->request->param();
+ }
+
+ $iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']);
+ if ($iPArticleId <= 0) {
+ return jsonError('p_article_id is required');
+ }
+
+ try {
+ $result = (new ReferenceRelevanceCheckService())->getArticleProgressStatusByPArticleId($iPArticleId);
+ return jsonSuccess($result);
+ } catch (\Exception $e) {
+ return jsonError($e->getMessage());
+ }
+ }
+
+ /**
+ * 按 p_refer_id 查相关性校对明细
+ * POST: p_refer_id
+ */
+ public function referenceRelevanceCheckDetailsAI()
+ {
+ $aParam = $this->request->post();
+ $iPReferId = empty($aParam['p_refer_id']) ? 0 : intval($aParam['p_refer_id']);
+ if ($iPReferId <= 0) {
+ return jsonError('p_refer_id is required');
+ }
+ try {
+ $result = (new ReferenceRelevanceCheckService())->getDetailsByPReferId($iPReferId);
+ return jsonSuccess($result);
+ } catch (\Exception $e) {
+ return jsonError($e->getMessage());
+ }
+ }
+
+ /**
+ * 清空并重新执行相关性校对
+ * POST: p_article_id
+ */
+ public function referenceRelevanceCheckResetAI()
+ {
+ $aParam = $this->request->post();
+ $iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']);
+ if ($iPArticleId <= 0) {
+ return jsonError('Please select an article');
+ }
+ $aProductionArticle = Db::name('production_article')
+ ->field('p_article_id,article_id')
+ ->where(['p_article_id' => $iPArticleId, 'state' => ['in', [0, 2]]])
+ ->find();
+ if (empty($aProductionArticle)) {
+ return jsonError('No articles found');
+ }
+ if ($this->checkReferStatus($iPArticleId) == 0) {
+ return jsonError('Please correct the reference content before running the check.');
+ }
+ try {
+ $result = (new ReferenceRelevanceCheckService())->resetAndRecheckByArticle($aProductionArticle);
+ return jsonSuccess($result);
+ } catch (\Exception $e) {
+ return jsonError($e->getMessage());
+ }
+ }
+
+ /**
+ * 仅清空相关性校对记录(不重跑)
+ * POST: p_article_id
+ */
+ public function referenceRelevanceCheckClearAI()
+ {
+ $aParam = $this->request->post();
+ $iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']);
+ if ($iPArticleId <= 0) {
+ return jsonError('p_article_id is required');
+ }
+ try {
+ $deleted = (new ReferenceRelevanceCheckService())->clearByPArticleId($iPArticleId);
+ return jsonSuccess(['p_article_id' => $iPArticleId, 'deleted' => intval($deleted)]);
+ } catch (\Exception $e) {
+ return jsonError($e->getMessage());
+ }
+ }
+
+ /**
+ * 仅重跑相关性 status=0 的记录(不清空,不抓摘要,不清洗文献内容)
+ * POST: p_article_id
+ */
+ public function referenceRelevanceCheckRecheckPendingAI()
+ {
+ $aParam = $this->request->post();
+ $iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']);
+ if ($iPArticleId <= 0) {
+ return jsonError('p_article_id is required');
+ }
+ try {
+ $result = (new ReferenceRelevanceCheckService())->recheckPendingOnlyByArticle($iPArticleId);
+ return jsonSuccess($result);
+ } catch (\Exception $e) {
+ return jsonError($e->getMessage());
+ }
+ }
+
+ /**
+ * 参考文献第一次校对(支撑力度)
* @return \think\response\Json
*/
- public function allReferenceCheckAI(){
+ public function allReferenceCheckAI2(){
//获取参数
$aParam = empty($aParam) ? $this->request->post() : $aParam;
@@ -1537,7 +1723,6 @@ class References extends Base
* p_article_id(可选)
*
* 仅重跑 status=3(校对失败)的记录;不改动 refer_text,只重置结果字段后入 RabbitMQ 批次队列。
- * 返回:p_refer_id、p_article_id、reset、queued、check_ids、queue
*/
public function referenceCheckRecheckFailedAI()
{
@@ -1561,6 +1746,36 @@ class References extends Base
}
}
+ /**
+ * 某条参考文献下「校对失败」重跑,并联动同一引用标签分组(如 [1,2])全部重跑(异步)
+ *
+ * POST/GET: p_refer_id(必填)
+ * p_article_id(可选)
+ *
+ * 返回:p_refer_id、p_article_id、reset、queued、check_ids、queue
+ */
+ public function referenceCheckRecheckFailedWithGroupAI()
+ {
+ $aParam = $this->request->post();
+ if (empty($aParam)) {
+ $aParam = $this->request->param();
+ }
+
+ $iPReferId = empty($aParam['p_refer_id']) ? 0 : intval($aParam['p_refer_id']);
+ if ($iPReferId <= 0) {
+ return json_encode(array('status' => 2, 'msg' => 'Please select a reference'));
+ }
+
+ $iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']);
+
+ try {
+ $result = (new ReferenceCheckService())->enqueueRecheckFailedByPReferIdWithGroup($iPReferId, $iPArticleId);
+ return jsonSuccess($result);
+ } catch (\Exception $e) {
+ return jsonError($e->getMessage());
+ }
+ }
+
/**
* 按 p_refer_id 查单条参考文献的校对明细与进度
*
@@ -1590,6 +1805,47 @@ class References extends Base
}
}
+ /**
+ * 对校对明细中从未出现过的参考文献(p_refer_id 差集)重新扫描全文并入队校对
+ *
+ * POST/GET: p_article_id(必填)
+ *
+ * 差集:production_article_refer(state=0) 减去 article_reference_check_result 已出现的 p_refer_id。
+ * 适用:首次校对漏匹配、表格后上传、正文补标等场景。不重置已有明细。
+ * 前置:须已执行过第一次校对(库中已有校对记录)。
+ *
+ * 返回:missing_p_refer_ids、matched_p_refer_ids、still_unmatched_p_refer_ids、
+ * queued、new_reference_nos、check_ids、queue
+ */
+ public function referenceCheckRematchNewAI()
+ {
+ $aParam = $this->request->post();
+ if (empty($aParam)) {
+ $aParam = $this->request->param();
+ }
+
+ $iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']);
+ if ($iPArticleId <= 0) {
+ return json_encode(array('status' => 2, 'msg' => 'Please select an article'));
+ }
+
+ $aWhere = ['p_article_id' => $iPArticleId, 'state' => ['in', [0, 2]]];
+ $aProductionArticle = Db::name('production_article')->field('p_article_id,article_id')->where($aWhere)->find();
+ if (empty($aProductionArticle)) {
+ return json_encode(array('status' => 3, 'msg' => 'No articles found'));
+ }
+ if ($this->checkReferStatus($iPArticleId) == 0) {
+ return jsonError('Please correct the reference content before running the check.');
+ }
+
+ try {
+ $result = (new ReferenceCheckService())->enqueueNewlyMatchedByPArticle($aProductionArticle);
+ return jsonSuccess($result);
+ } catch (\Exception $e) {
+ return jsonError($e->getMessage());
+ }
+ }
+
public function checkReferStatus($p_article_id){
$list = $this->production_article_refer_obj->where('p_article_id', $p_article_id)->where('state', 0)->select();
if (!$list) {
@@ -1604,4 +1860,6 @@ class References extends Base
}
return $frag;
}
+
+
}
diff --git a/application/command.php b/application/command.php
index 43892e98..cbea1b3b 100644
--- a/application/command.php
+++ b/application/command.php
@@ -11,4 +11,5 @@
return [
'app\\command\\ReferenceCheckMqConsume',
+ 'app\\command\\ReferenceRelevanceMqConsume',
];
diff --git a/application/common/PubmedService.php b/application/common/PubmedService.php
index ad17e2da..50f565ec 100644
--- a/application/common/PubmedService.php
+++ b/application/common/PubmedService.php
@@ -96,6 +96,68 @@ class PubmedService
return $info;
}
+ /**
+ * 按书目信息检索 PubMed(标题 + 第一作者 + 年份)
+ */
+ public function searchByBibliographic($title, $author = '', $year = ''): ?array
+ {
+ $title = trim((string)$title);
+ if ($title === '') {
+ return null;
+ }
+
+ $terms = ['(' . $this->quoteTerm($title) . '[Title])'];
+ $author = trim((string)$author);
+ if ($author !== '') {
+ $parts = preg_split('/[,;]/', $author);
+ $first = trim((string)($parts[0] ?? ''));
+ if ($first !== '') {
+ $terms[] = '(' . $this->quoteTerm($first) . '[Author])';
+ }
+ }
+ $year = trim((string)$year);
+ if ($year !== '' && preg_match('/^(19|20)\d{2}$/', $year)) {
+ $terms[] = '(' . $year . '[pdat])';
+ }
+
+ $pmid = $this->esearch(implode(' AND ', $terms));
+ if (!$pmid) {
+ return null;
+ }
+
+ $info = $this->fetchByPmid($pmid);
+ if (!$info) {
+ return null;
+ }
+ $info['pmid'] = $pmid;
+ $info['doi'] = $this->extractDoiFromPmidRecord($pmid);
+ return $info;
+ }
+
+ private function quoteTerm($text)
+ {
+ return str_replace('"', '', trim((string)$text));
+ }
+
+ private function extractDoiFromPmidRecord($pmid)
+ {
+ $url = $this->base . 'efetch.fcgi?' . http_build_query([
+ 'db' => 'pubmed',
+ 'id' => $pmid,
+ 'retmode' => 'xml',
+ 'tool' => $this->tool,
+ 'email' => $this->email,
+ ]);
+ $xml = $this->httpGet($url);
+ if ($xml === '') {
+ return '';
+ }
+ if (preg_match('/([^<]+)<\/ArticleId>/i', $xml, $m)) {
+ return trim($m[1]);
+ }
+ return '';
+ }
+
// ----------------- Internals -----------------
private function esearch(string $term): ?string
diff --git a/application/common/ReferenceCheckService.php b/application/common/ReferenceCheckService.php
index e551a482..0cf20986 100644
--- a/application/common/ReferenceCheckService.php
+++ b/application/common/ReferenceCheckService.php
@@ -9,6 +9,8 @@ use app\common\mq\ReferenceCheckMqPublisher;
/**
* 正文 <blue>[n]</blue> 引用与 t_production_article_refer(index+1=n)相关性校对。
+ * 校对上下文取 t_article_main 一条记录(正文 content 或表格 table_data 展平)。
+ * 同一引用标签 [1,2]、[4-6] 联合校对,cite_group_refs 存展开序号。
* LLM 配置与 PromotionLlmService 相同;异步任务走 RabbitMQ(一篇一条消息)。
*/
class ReferenceCheckService
@@ -69,6 +71,9 @@ class ReferenceCheckService
/** LLM 评分(confidence)通过阈值:>= 该值视为"通过" */
const PASS_CONFIDENCE_THRESHOLD = 0.65;
+ /** 是否启用二轮 DOI/Crossref 复核(暂时关闭时设为 false) */
+ const SECOND_PASS_ENABLED = false;
+
/**
* 正文引用标签两种排版(带 /u):
* 1) [8, 9]、[13-15] —— 方括号在 blue 内
@@ -93,6 +98,11 @@ class ReferenceCheckService
return isset($arr[$key]) ? $arr[$key] : $default;
}
+ private function isSecondPassEnabled()
+ {
+ return self::SECOND_PASS_ENABLED;
+ }
+
/** 新建/重置校对明细时的队列初始字段 */
private function newCheckRecordFields(array $fields, $queueStatus = self::QUEUE_PENDING, $retryCount = 0)
{
@@ -101,8 +111,26 @@ class ReferenceCheckService
return $fields;
}
+ /** 重置校对结果时清零的字段(含支撑力度扩展字段) */
+ private function referenceCheckResultResetFields(array $extra = [])
+ {
+ return array_merge([
+ 'status' => self::RECORD_PENDING,
+ 'is_match' => 0,
+ 'can_support' => 0,
+ 'confidence' => 0,
+ 'reason' => '',
+ 'support_role' => '',
+ 'combined_can_support' => 0,
+ 'combined_confidence' => 0,
+ 'combined_reason' => '',
+ 'error_msg' => '',
+ ], $extra);
+ }
+
public function markQueueRuntime($checkId, $queueStatus, $retryCount = null)
{
+ DbReconnectHelper::ensure();
$checkId = intval($checkId);
if ($checkId <= 0) {
return 0;
@@ -113,6 +141,84 @@ class ReferenceCheckService
}
return Db::name('article_reference_check_result')->where('id', $checkId)->update($fields);
}
+ public function enqueueByPArticle($prod){
+ if (empty($prod)) {
+ throw new \RuntimeException('production_article not found');
+ }
+ $pArticleId = intval($prod['p_article_id']);
+ $articleId = intval($prod['article_id']);
+ $referMap = $this->loadReferMapByPArticleId($pArticleId);
+
+ $mains = Db::name('article_main')
+ ->field('am_id,content,article_id,type,amt_id')
+ ->where('article_id', $articleId)
+ ->whereIn('state', [0, 2])
+ ->order('sort asc')
+ ->select();
+ if (empty($mains)) {
+ throw new \RuntimeException('article_main is empty');
+ }
+ $queued = 0;
+ $skipped = 0;
+ $pendingJobs = [];
+ $amIdsWithJobs = [];
+ $now = date('Y-m-d H:i:s');
+ foreach ($mains as $main) {
+ $amId = intval($main['am_id']);
+ $citations = $this->extractReferencesForArticleMain($main);
+ if (empty($citations)) {
+ $this->setAmRefCheckStatus($amId, self::AM_STATUS_NONE);
+ continue;
+ }
+ $sectionText = $this->resolveSectionTextForArticleMain($main);
+ foreach ($citations as $cite) {
+ foreach ($cite['reference_numbers'] as $refNo) {
+ $referIndex = $refNo - 1;
+ if ($referIndex < 0 || !isset($referMap[$referIndex])) {
+ $skipped++;
+ continue;
+ }
+ $scope = [
+ 'article_id' => $main['article_id'],
+ 'p_article_id' => $pArticleId,
+ 'am_id' => $amId,
+ 'section_text' => $sectionText,
+ ];
+ $checkId = $this->insertCitationCheckRecord($scope, $cite, $refNo, $referMap[$referIndex], $now);
+ if ($checkId <= 0) {
+ $skipped++;
+ continue;
+ }
+
+ $this->appendCitationPendingJob($pendingJobs, $checkId, $refNo, $amId, $cite['text_start']);
+ $queued++;
+ $amIdsWithJobs[$amId] = true;
+ }
+ }
+ break;
+ }
+ $checkIds = $this->enqueueChecksSortedByReferenceNo($pendingJobs, $pArticleId, 'enqueue');
+ foreach (array_keys($amIdsWithJobs) as $amId) {
+ $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING);
+ }
+
+ return [
+ 'article_id' => $articleId,
+ 'p_article_id' => $pArticleId,
+ 'queued' => $queued,
+ 'skipped' => $skipped,
+ 'check_ids' => $checkIds,
+ 'queue' => self::TRANSPORT_RABBITMQ,
+ ];
+ }
+
+
+
+
+
+
+
+
/**
* 合并匹配两种 blue 引用排版,按在正文中的起始位置排序。
@@ -173,6 +279,7 @@ class ReferenceCheckService
'refer_index' => intval($this->arrGet($extra, 'refer_index', 0)),
'reference_no' => intval($this->arrGet($extra, 'reference_no', 0)),
'reference_raw' => (string)$this->arrGet($extra, 'reference_raw', ''),
+ 'cite_group_refs' => (string)$this->arrGet($extra, 'cite_group_refs', ''),
'cite_tag_start' => intval($this->arrGet($extra, 'cite_tag_start', 0)),
'cite_tag_end' => intval($this->arrGet($extra, 'cite_tag_end', 0)),
'text_start' => intval($this->arrGet($extra, 'text_start', 0)),
@@ -229,6 +336,7 @@ class ReferenceCheckService
$skipped = 0;
$pendingJobs = [];
$now = date('Y-m-d H:i:s');
+ $sectionText = $this->resolveSectionTextForArticleMain($main);
foreach ($citations as $cite) {
foreach ($cite['reference_numbers'] as $refNo) {
$referIndex = $refNo - 1;
@@ -236,30 +344,24 @@ class ReferenceCheckService
$skipped++;
continue;
}
- $refer = $referMap[$referIndex];
- $referText = $this->formatReferForLlm($refer);
-
- $checkId = Db::name('article_reference_check_result')->insertGetId($this->newCheckRecordFields([
- 'article_id' => $main['article_id'],
- 'p_article_id' => $pArticleId,
- 'am_id' => intval($main['am_id']),
- 'reference_no' => $refNo,
- 'refer_index' => $refNo,
- 'origin_text' => $cite['original_text'],
- 'refer_text' => $referText,
- 'p_refer_id' => $referMap[$referIndex]['p_refer_id'],
- 'text_start' => $cite['text_start'],
- 'text_end' => $cite['text_end'],
- 'status' => self::RECORD_PENDING,
- 'created_at' => $now,
- 'updated_at' => $now,
- ]));
- $pendingJobs[] = [
- 'check_id' => intval($checkId),
- 'reference_no' => intval($refNo),
+ $scope = [
+ 'article_id' => $main['article_id'],
+ 'p_article_id' => $pArticleId,
'am_id' => intval($main['am_id']),
- 'text_start' => intval($cite['text_start']),
+ 'section_text' => $sectionText,
];
+ $checkId = $this->insertCitationCheckRecord($scope, $cite, $refNo, $referMap[$referIndex], $now);
+ if ($checkId <= 0) {
+ $skipped++;
+ continue;
+ }
+ $this->appendCitationPendingJob(
+ $pendingJobs,
+ $checkId,
+ $refNo,
+ intval($main['am_id']),
+ $cite['text_start']
+ );
}
}
@@ -276,6 +378,14 @@ class ReferenceCheckService
throw new \InvalidArgumentException('article_id is required');
}
+ if (!$this->isSecondPassEnabled()) {
+ return [
+ 'article_id' => $articleId,
+ 'check_ids2' => [],
+ 'queued' => 0,
+ ];
+ }
+
$rows = Db::name('article_reference_check_result')
->where('article_id', $articleId)
->where('status', self::RECORD_COMPLETED)
@@ -300,87 +410,6 @@ class ReferenceCheckService
'queued' => count($checkIds2),
];
}
- public function enqueueByPArticle($prod){
- if (empty($prod)) {
- throw new \RuntimeException('production_article not found');
- }
- $pArticleId = intval($prod['p_article_id']);
- $articleId = intval($prod['article_id']);
- $referMap = $this->loadReferMapByPArticleId($pArticleId);
-
- $mains = Db::name('article_main')
- ->field('am_id,content,article_id,type,amt_id')
- ->where('article_id', $articleId)
- ->whereIn('state', [0, 2])
- ->order('sort asc')
- ->select();
- if (empty($mains)) {
- throw new \RuntimeException('article_main is empty');
- }
- $queued = 0;
- $skipped = 0;
- $pendingJobs = [];
- $amIdsWithJobs = [];
- $now = date('Y-m-d H:i:s');
- foreach ($mains as $main) {
- $amId = intval($main['am_id']);
- $citations = $this->extractReferencesForArticleMain($main);
- if (empty($citations)) {
- $this->setAmRefCheckStatus($amId, self::AM_STATUS_NONE);
- continue;
- }
- foreach ($citations as $cite) {
- foreach ($cite['reference_numbers'] as $refNo) {
- $referIndex = $refNo - 1;
- if ($referIndex < 0 || !isset($referMap[$referIndex])) {
- $skipped++;
- continue;
- }
- $refer = $referMap[$referIndex];
- $referText = $this->formatReferForLlm($refer);
-
- // [70-73] 展开为 reference_no=70,71,72,73 共 4 条记录;先入队表,再按文献号正序校对
- $checkId = Db::name('article_reference_check_result')->insertGetId($this->newCheckRecordFields([
- 'article_id' => $main['article_id'],
- 'p_article_id' => $pArticleId,
- 'am_id' => $amId,
- 'reference_no' => $refNo,
- 'refer_index' => $refNo,
- 'origin_text' => $cite['original_text'],
- 'refer_text' => $referText,
- 'p_refer_id' => $referMap[$referIndex]['p_refer_id'],
- 'text_start' => $cite['text_start'],
- 'text_end' => $cite['text_end'],
- 'status' => self::RECORD_PENDING,
- 'created_at' => $now,
- 'updated_at' => $now,
- ]));
-
- $pendingJobs[] = [
- 'check_id' => intval($checkId),
- 'reference_no' => intval($refNo),
- 'am_id' => $amId,
- 'text_start' => intval($cite['text_start']),
- ];
- $queued++;
- $amIdsWithJobs[$amId] = true;
- }
- }
- }
- $checkIds = $this->enqueueChecksSortedByReferenceNo($pendingJobs, $pArticleId, 'enqueue');
- foreach (array_keys($amIdsWithJobs) as $amId) {
- $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING);
- }
-
- return [
- 'article_id' => $articleId,
- 'p_article_id' => $pArticleId,
- 'queued' => $queued,
- 'skipped' => $skipped,
- 'check_ids' => $checkIds,
- 'queue' => self::TRANSPORT_RABBITMQ,
- ];
- }
public function enqueueByArticle($articleId){
if ($articleId <= 0) {
throw new \InvalidArgumentException('article_id is required');
@@ -416,6 +445,7 @@ class ReferenceCheckService
$this->setAmRefCheckStatus($amId, self::AM_STATUS_NONE);
continue;
}
+ $sectionText = $this->resolveSectionTextForArticleMain($main);
foreach ($citations as $cite) {
foreach ($cite['reference_numbers'] as $refNo) {
$referIndex = $refNo - 1;
@@ -423,32 +453,19 @@ class ReferenceCheckService
$skipped++;
continue;
}
- $refer = $referMap[$referIndex];
- $referText = $this->formatReferForLlm($refer);
-
- // [70-73] 展开为 reference_no=70,71,72,73 共 4 条记录;先入队表,再按文献号正序校对
- $checkId = Db::name('article_reference_check_result')->insertGetId($this->newCheckRecordFields([
- 'article_id' => $main['article_id'],
- 'p_article_id' => $pArticleId,
- 'am_id' => $amId,
- 'reference_no' => $refNo,
- 'refer_index' => $refNo,
- 'origin_text' => $cite['original_text'],
- 'refer_text' => $referText,
- 'p_refer_id' => $referMap[$referIndex]['p_refer_id'],
- 'text_start' => $cite['text_start'],
- 'text_end' => $cite['text_end'],
- 'status' => self::RECORD_PENDING,
- 'created_at' => $now,
- 'updated_at' => $now,
- ]));
-
- $pendingJobs[] = [
- 'check_id' => intval($checkId),
- 'reference_no' => intval($refNo),
+ $scope = [
+ 'article_id' => $main['article_id'],
+ 'p_article_id' => $pArticleId,
'am_id' => $amId,
- 'text_start' => intval($cite['text_start']),
+ 'section_text' => $sectionText,
];
+ $checkId = $this->insertCitationCheckRecord($scope, $cite, $refNo, $referMap[$referIndex], $now);
+ if ($checkId <= 0) {
+ $skipped++;
+ continue;
+ }
+
+ $this->appendCitationPendingJob($pendingJobs, $checkId, $refNo, $amId, $cite['text_start']);
$queued++;
$amIdsWithJobs[$amId] = true;
}
@@ -470,6 +487,200 @@ class ReferenceCheckService
];
}
+ /**
+ * 对「参考文献表中有、校对明细中从未出现过的 p_refer_id」重新扫描全文并入队校对。
+ *
+ * 差集:production_article_refer(state=0) 的 p_refer_id
+ * 减去 article_reference_check_result 中已出现过的 p_refer_id。
+ * 仅对上述缺失文献在全文(含表格)中查找引用标签,命中则新增明细并入队。
+ * 不删除、不重跑已有明细。
+ *
+ * @param array $prod production_article 行(需含 p_article_id、article_id)
+ * @return array
+ */
+ public function enqueueNewlyMatchedByPArticle($prod)
+ {
+ if (empty($prod) || !is_array($prod)) {
+ throw new \RuntimeException('production_article not found');
+ }
+ $pArticleId = intval($this->arrGet($prod, 'p_article_id', 0));
+ $articleId = intval($this->arrGet($prod, 'article_id', 0));
+ if ($pArticleId <= 0 || $articleId <= 0) {
+ throw new \InvalidArgumentException('production_article requires both p_article_id and article_id');
+ }
+
+ $existingCount = Db::name('article_reference_check_result')
+ ->where('p_article_id', $pArticleId)
+ ->count();
+ if (intval($existingCount) <= 0) {
+ throw new \RuntimeException('no existing reference check records for p_article_id=' . $pArticleId . '; please run the first check first');
+ }
+
+ $missingCtx = $this->loadMissingPReferIdsByPArticleId($pArticleId);
+ $missingPReferIds = $missingCtx['missing_p_refer_ids'];
+ $missingRefNos = $missingCtx['missing_reference_nos'];
+
+ if (empty($missingPReferIds)) {
+ return [
+ 'article_id' => $articleId,
+ 'p_article_id' => $pArticleId,
+ 'queued' => 0,
+ 'skipped' => 0,
+ 'existing' => intval($existingCount),
+ 'missing_p_refer_ids' => [],
+ 'matched_p_refer_ids' => [],
+ 'still_unmatched_p_refer_ids' => [],
+ 'new_reference_nos' => [],
+ 'check_ids' => [],
+ 'queue' => self::TRANSPORT_RABBITMQ,
+ ];
+ }
+
+ $referMap = $this->loadReferMapByPArticleId($pArticleId);
+
+ $mains = Db::name('article_main')
+ ->field('am_id,content,article_id,type,amt_id')
+ ->where('article_id', $articleId)
+ ->whereIn('state', [0, 2])
+ ->order('sort asc')
+ ->select();
+ if (empty($mains)) {
+ throw new \RuntimeException('article_main is empty');
+ }
+
+ $queued = 0;
+ $skipped = 0;
+ $pendingJobs = [];
+ $amIdsWithJobs = [];
+ $newReferenceNos = [];
+ $matchedPReferIds = [];
+ $now = date('Y-m-d H:i:s');
+
+ foreach ($mains as $main) {
+ $amId = intval($main['am_id']);
+ $citations = $this->extractReferencesForArticleMain($main);
+ if (empty($citations)) {
+ continue;
+ }
+ $sectionText = $this->resolveSectionTextForArticleMain($main);
+ foreach ($citations as $cite) {
+ foreach ($cite['reference_numbers'] as $refNo) {
+ if (!isset($missingRefNos[$refNo])) {
+ $skipped++;
+ continue;
+ }
+
+ $referIndex = $refNo - 1;
+ if ($referIndex < 0 || !isset($referMap[$referIndex])) {
+ $skipped++;
+ continue;
+ }
+
+ $refer = $referMap[$referIndex];
+ $pReferId = intval($this->arrGet($refer, 'p_refer_id', 0));
+ $scope = [
+ 'article_id' => $main['article_id'],
+ 'p_article_id' => $pArticleId,
+ 'am_id' => $amId,
+ 'section_text' => $sectionText,
+ ];
+ $checkId = $this->insertCitationCheckRecord($scope, $cite, $refNo, $refer, $now);
+ if ($checkId <= 0) {
+ $skipped++;
+ continue;
+ }
+
+ $this->appendCitationPendingJob($pendingJobs, $checkId, $refNo, $amId, $cite['text_start']);
+ $queued++;
+ $amIdsWithJobs[$amId] = true;
+ $newReferenceNos[$refNo] = true;
+ if ($pReferId > 0) {
+ $matchedPReferIds[$pReferId] = true;
+ }
+ }
+ }
+ }
+
+ $checkIds = $this->enqueueChecksSortedByReferenceNo($pendingJobs, $pArticleId, 'rematch_new');
+ foreach (array_keys($amIdsWithJobs) as $amId) {
+ $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING);
+ }
+
+ $newRefList = array_keys($newReferenceNos);
+ sort($newRefList, SORT_NUMERIC);
+
+ $matchedList = array_keys($matchedPReferIds);
+ sort($matchedList, SORT_NUMERIC);
+
+ $stillUnmatched = array_values(array_diff($missingPReferIds, $matchedList));
+ sort($stillUnmatched, SORT_NUMERIC);
+
+ return [
+ 'article_id' => $articleId,
+ 'p_article_id' => $pArticleId,
+ 'queued' => $queued,
+ 'skipped' => $skipped,
+ 'existing' => intval($existingCount),
+ 'missing_p_refer_ids' => $missingPReferIds,
+ 'matched_p_refer_ids' => $matchedList,
+ 'still_unmatched_p_refer_ids' => $stillUnmatched,
+ 'new_reference_nos' => $newRefList,
+ 'check_ids' => $checkIds,
+ 'queue' => self::TRANSPORT_RABBITMQ,
+ ];
+ }
+
+ /**
+ * 参考文献表(state=0) 与校对明细中已出现的 p_refer_id 做差集。
+ *
+ * @return array{missing_p_refer_ids:int[], missing_reference_nos:array}
+ */
+ private function loadMissingPReferIdsByPArticleId($pArticleId)
+ {
+ $pArticleId = intval($pArticleId);
+ $missingPReferIds = [];
+ $missingRefNos = [];
+
+ if ($pArticleId <= 0) {
+ return [
+ 'missing_p_refer_ids' => $missingPReferIds,
+ 'missing_reference_nos' => $missingRefNos,
+ ];
+ }
+
+ $refers = Db::name('production_article_refer')
+ ->field('p_refer_id,index')
+ ->where('p_article_id', $pArticleId)
+ ->where('state', 0)
+ ->order('index asc')
+ ->select();
+
+ $checkedIds = Db::name('article_reference_check_result')
+ ->where('p_article_id', $pArticleId)
+ ->where('p_refer_id', '>', 0)
+ ->group('p_refer_id')
+ ->column('p_refer_id');
+ $checkedSet = [];
+ foreach ($checkedIds as $id) {
+ $checkedSet[intval($id)] = true;
+ }
+
+ foreach ($refers as $refer) {
+ $pReferId = intval($this->arrGet($refer, 'p_refer_id', 0));
+ if ($pReferId <= 0 || isset($checkedSet[$pReferId])) {
+ continue;
+ }
+ $refNo = intval($this->arrGet($refer, 'index', 0)) + 1;
+ $missingPReferIds[] = $pReferId;
+ $missingRefNos[$refNo] = $pReferId;
+ }
+
+ return [
+ 'missing_p_refer_ids' => $missingPReferIds,
+ 'missing_reference_nos' => $missingRefNos,
+ ];
+ }
+
/**
* 根据该节全部明细行汇总更新 t_article_main.ref_check_status
*/
@@ -553,7 +764,7 @@ class ReferenceCheckService
*
* @return int 被删除的明细条数
*/
- public function clearArticleChecksByPArticleId($pArticleId)
+ public function clearArticleChecksByPArticleId($pArticleId,$articleId=0)
{
$pArticleId = intval($pArticleId);
if ($pArticleId <= 0) {
@@ -561,10 +772,12 @@ class ReferenceCheckService
}
// 先反查 article_id(用于重置 article_main.ref_check_status 节级状态)
- $articleId = intval(Db::name('production_article')
- ->where('p_article_id', $pArticleId)
- ->whereIn('state', [0, 2])
- ->value('article_id'));
+ if($articleId==0){
+ $articleId = intval(Db::name('production_article')
+ ->where('p_article_id', $pArticleId)
+ ->whereIn('state', [0, 2])
+ ->value('article_id'));
+ }
$deleted = Db::name('article_reference_check_result')
->where('p_article_id', $pArticleId)
@@ -598,6 +811,29 @@ class ReferenceCheckService
return intval($deleted);
}
+ /**
+ * 按 article_id + am_id 删除单节正文下的引用校对明细,并同步该节 ref_check_status。
+ *
+ * @return int 被删除的明细条数
+ */
+ public function clearChecksByAmId($articleId, $amId)
+ {
+ $articleId = intval($articleId);
+ $amId = intval($amId);
+ if ($articleId <= 0 || $amId <= 0) {
+ return 0;
+ }
+
+ $deleted = Db::name('article_reference_check_result')
+ ->where('article_id', $articleId)
+ ->where('am_id', $amId)
+ ->delete();
+
+ $this->syncAmRefCheckStatus($amId);
+
+ return intval($deleted);
+ }
+
/**
* 文献列表局部挪动后,仅刷新指定 p_refer_id 对应的校对明细 reference_no / refer_index。
*
@@ -850,7 +1086,7 @@ class ReferenceCheckService
}
$rows = Db::name('article_reference_check_result')
- ->field('id,p_refer_id,reference_no,am_id,status,confidence,is_match,reason,text_start,text_end,updated_at')
+ ->field('id,p_refer_id,reference_no,am_id,status,confidence,is_match,reason,support_role,combined_can_support,combined_confidence,combined_reason,text_start,text_end,cite_group_refs,updated_at')
->where('p_article_id', $pArticleId)
->order('reference_no asc, id asc')
->select();
@@ -916,16 +1152,22 @@ class ReferenceCheckService
}
$groups[$refNo]['records'][] = [
- 'check_id' => intval($this->arrGet($row, 'id', 0)),
- 'am_id' => intval($this->arrGet($row, 'am_id', 0)),
- 'status' => $st,
- 'confidence' => $confidence,
- 'is_pass' => $isPass,
- 'is_match' => intval($this->arrGet($row, 'is_match', 0)),
- 'reason' => (string)$this->arrGet($row, 'reason', ''),
- 'text_start' => intval($this->arrGet($row, 'text_start', 0)),
- 'text_end' => intval($this->arrGet($row, 'text_end', 0)),
- 'last_updated_at' => $upd,
+ 'check_id' => intval($this->arrGet($row, 'id', 0)),
+ 'am_id' => intval($this->arrGet($row, 'am_id', 0)),
+ 'status' => $st,
+ 'confidence' => $confidence,
+ 'is_pass' => $isPass,
+ 'is_match' => intval($this->arrGet($row, 'is_match', 0)),
+ 'reason' => (string)$this->arrGet($row, 'reason', ''),
+ 'support_role' => (string)$this->arrGet($row, 'support_role', ''),
+ 'combined_can_support' => intval($this->arrGet($row, 'combined_can_support', 0)),
+ 'combined_confidence' => floatval($this->arrGet($row, 'combined_confidence', 0)),
+ 'combined_reason' => (string)$this->arrGet($row, 'combined_reason', ''),
+ 'cite_group_refs' => (string)$this->arrGet($row, 'cite_group_refs', ''),
+ 'cite_check_mode' => $this->isJointCiteGroupRefs($this->arrGet($row, 'cite_group_refs', '')) ? 'joint' : 'single',
+ 'text_start' => intval($this->arrGet($row, 'text_start', 0)),
+ 'text_end' => intval($this->arrGet($row, 'text_end', 0)),
+ 'last_updated_at' => $upd,
];
}
@@ -993,7 +1235,7 @@ class ReferenceCheckService
}
$rows = Db::name('article_reference_check_result')
- ->field('id,p_article_id,reference_no,am_id,status,confidence,is_match,reason,updated_at')
+ ->field('id,p_article_id,reference_no,am_id,status,confidence,is_match,reason,support_role,combined_can_support,combined_confidence,combined_reason,cite_group_refs,updated_at')
->where('p_refer_id', $pReferId)
->order('id asc')
->select();
@@ -1036,13 +1278,19 @@ class ReferenceCheckService
}
$list[] = [
- 'check_id' => intval($this->arrGet($row, 'id', 0)),
- 'am_id' => intval($this->arrGet($row, 'am_id', 0)),
- 'status' => $st,
- 'confidence' => $confidence,
- 'reason' => (string)$this->arrGet($row, 'reason', ''),
- 'is_match' => intval($this->arrGet($row, 'is_match', 0)),
- 'is_pass' => $isPass,
+ 'check_id' => intval($this->arrGet($row, 'id', 0)),
+ 'am_id' => intval($this->arrGet($row, 'am_id', 0)),
+ 'status' => $st,
+ 'confidence' => $confidence,
+ 'reason' => (string)$this->arrGet($row, 'reason', ''),
+ 'is_match' => intval($this->arrGet($row, 'is_match', 0)),
+ 'is_pass' => $isPass,
+ 'support_role' => (string)$this->arrGet($row, 'support_role', ''),
+ 'combined_can_support' => intval($this->arrGet($row, 'combined_can_support', 0)),
+ 'combined_confidence' => floatval($this->arrGet($row, 'combined_confidence', 0)),
+ 'combined_reason' => (string)$this->arrGet($row, 'combined_reason', ''),
+ 'cite_group_refs' => (string)$this->arrGet($row, 'cite_group_refs', ''),
+ 'cite_check_mode' => $this->isJointCiteGroupRefs($this->arrGet($row, 'cite_group_refs', '')) ? 'joint' : 'single',
];
}
@@ -1117,8 +1365,8 @@ class ReferenceCheckService
throw new \RuntimeException('no existing reference check records for p_article_id=' . $pArticleId);
}
- $cleared = $this->clearArticleChecks($articleId);
- $enqueueResult = $this->enqueueByArticle($articleId);
+ $cleared = $this->clearArticleChecksByPArticleId($aProductionArticle['p_article_id'],$aProductionArticle['article_id']);
+ $enqueueResult = $this->enqueueByPArticle($aProductionArticle);
if (!is_array($enqueueResult)) {
$enqueueResult = [];
@@ -1178,6 +1426,7 @@ class ReferenceCheckService
*/
public function updateCheckResult($checkId, array $fields)
{
+ DbReconnectHelper::ensure();
$checkId = intval($checkId);
if ($checkId <= 0) {
throw new \InvalidArgumentException('invalid check id');
@@ -1186,6 +1435,9 @@ class ReferenceCheckService
if (isset($fields['reason'])) {
$fields['reason'] = mb_substr(trim((string)$fields['reason']), 0, 512);
}
+ if (isset($fields['combined_reason'])) {
+ $fields['combined_reason'] = mb_substr(trim((string)$fields['combined_reason']), 0, 512);
+ }
if (isset($fields['error_msg'])) {
$fields['error_msg'] = mb_substr(trim((string)$fields['error_msg']), 0, 512);
}
@@ -1540,6 +1792,581 @@ class ReferenceCheckService
return implode("\n", $parts);
}
+ /**
+ * 从 extractReferences 结果提取引用标签元数据([1,2]、[70-73] 等同标签下各明细共用)
+ */
+ private function citationMetaFromExtract(array $cite)
+ {
+ return [
+ 'reference_raw' => (string)$this->arrGet($cite, 'reference_raw', ''),
+ 'cite_group_refs' => $this->formatCiteGroupRefs((array)$this->arrGet($cite, 'reference_numbers', [])),
+ 'cite_tag_start' => intval($this->arrGet($cite, 'reference_start', 0)),
+ 'cite_tag_end' => intval($this->arrGet($cite, 'reference_end', 0)),
+ 'origin_text' => (string)$this->arrGet($cite, 'original_text', ''),
+ 'text_start' => intval($this->arrGet($cite, 'text_start', 0)),
+ 'text_end' => intval($this->arrGet($cite, 'text_end', 0)),
+ ];
+ }
+
+ /**
+ * 引用组展开序号:1,2 或 4,5,6 或 3
+ */
+ private function formatCiteGroupRefs(array $refNumbers)
+ {
+ $nums = [];
+ foreach ($refNumbers as $n) {
+ $n = intval($n);
+ if ($n > 0) {
+ $nums[$n] = $n;
+ }
+ }
+ if (empty($nums)) {
+ return '';
+ }
+ $list = array_values($nums);
+ sort($list, SORT_NUMERIC);
+
+ return implode(',', $list);
+ }
+
+ private function isJointCiteGroupRefs($citeGroupRefs)
+ {
+ return strpos((string)$citeGroupRefs, ',') !== false;
+ }
+
+ private function resolveCiteGroupRefsFromRow(array $row, array $groupRows = null)
+ {
+ $refs = trim((string)$this->arrGet($row, 'cite_group_refs', ''));
+ if ($refs !== '') {
+ return $refs;
+ }
+ if ($groupRows === null) {
+ $groupRows = $this->findCitationGroupRows($row);
+ }
+ $nums = [];
+ foreach ($groupRows as $gr) {
+ $n = intval($this->arrGet($gr, 'reference_no', 0));
+ if ($n > 0) {
+ $nums[$n] = $n;
+ }
+ }
+ if (empty($nums)) {
+ $n = intval($this->arrGet($row, 'reference_no', 0));
+ return $n > 0 ? (string)$n : '';
+ }
+ $list = array_values($nums);
+ sort($list, SORT_NUMERIC);
+
+ return implode(',', $list);
+ }
+
+ private function hasSecondPassCompleted(array $row)
+ {
+ $reason = (string)$this->arrGet($row, 'reason', '');
+ return stripos($reason, '[DOI复核') !== false || stripos($reason, '[Crossref复核') !== false;
+ }
+
+ private function buildSecondPassReasonTag(array $row, array $payload, array $groupRows = null)
+ {
+ $citeGroupRefs = $this->resolveCiteGroupRefsFromRow($row, $groupRows);
+ $tag = '[DOI复核';
+ if ($citeGroupRefs !== '') {
+ $tag .= ' 文献' . $citeGroupRefs;
+ }
+ if (trim((string)$this->arrGet($payload, 'doi_used', '')) !== '') {
+ $tag .= ' ' . trim((string)$payload['doi_used']);
+ }
+ $tag .= ']';
+
+ return $tag;
+ }
+
+ /**
+ * 从 refer 抓取 DOI 真实文献块(PubMed 优先,回落 Crossref)
+ *
+ * @return array{text:string, has_abstract:bool, doi:string}
+ */
+ private function resolveDoiRecheckFromRefer($refer)
+ {
+ if (!is_array($refer) || empty($refer)) {
+ return ['text' => '', 'has_abstract' => false, 'doi' => ''];
+ }
+ $text = trim($this->fetchDoiLiteratureBlock($refer));
+ $hasAbstract = $text !== '' && preg_match('/Abstract:\s*\S/u', $text);
+
+ return [
+ 'text' => $text,
+ 'has_abstract' => $hasAbstract,
+ 'doi' => $this->extractDoiFromRefer($refer),
+ ];
+ }
+
+ /**
+ * 校对时使用的正文:t_article_main 一条记录(正文或表格展平文本)
+ */
+ public function resolveParagraphContextForJob(array $row, $maxChars = 8000)
+ {
+ return $this->resolveMainContentForJob($row, $maxChars);
+ }
+
+ /**
+ * 入队时快照引用处局部上下文,写入 origin_text(与 text_start/text_end 对应)
+ */
+ private function resolveSectionTextForArticleMain(array $main, $maxChars = 8000)
+ {
+ $raw = trim($this->resolveArticleMainCheckContent($main));
+ if ($raw === '') {
+ return '';
+ }
+
+ return $this->normalizeCheckContentForLlm($raw, $maxChars);
+ }
+
+ /**
+ * 同一 blue 引用标签(如 [1,2])下为单个文献号写入校对明细
+ *
+ * @return int|null check_id
+ */
+ private function insertCitationCheckRecord(array $scope, array $cite, $refNo, array $refer, $now)
+ {
+ $refNo = intval($refNo);
+ $referIndex = $refNo - 1;
+ if ($referIndex < 0) {
+ return null;
+ }
+
+ $meta = $this->citationMetaFromExtract($cite);
+ $referText = $this->formatReferForLlm($refer);
+ // origin_text 存引用处局部上下文(extractLocalCitationContext),非整节 am 正文
+ $originText = trim((string)$meta['origin_text']);
+ if ($originText === '') {
+ $originText = trim((string)$this->arrGet($scope, 'section_text', ''));
+ }
+
+ return intval(Db::name('article_reference_check_result')->insertGetId($this->newCheckRecordFields([
+ 'article_id' => intval($this->arrGet($scope, 'article_id', 0)),
+ 'p_article_id' => intval($this->arrGet($scope, 'p_article_id', 0)),
+ 'am_id' => intval($this->arrGet($scope, 'am_id', 0)),
+ 'reference_no' => $refNo,
+ 'refer_index' => $refNo,
+ 'origin_text' => $originText,
+ 'refer_text' => $referText,
+ 'p_refer_id' => intval($this->arrGet($refer, 'p_refer_id', 0)),
+ 'reference_raw' => $meta['reference_raw'],
+ 'cite_group_refs' => $meta['cite_group_refs'],
+ 'cite_tag_start' => $meta['cite_tag_start'],
+ 'cite_tag_end' => $meta['cite_tag_end'],
+ 'text_start' => $meta['text_start'],
+ 'text_end' => $meta['text_end'],
+ 'status' => self::RECORD_PENDING,
+ 'created_at' => $now,
+ 'updated_at' => $now,
+ ])));
+ }
+
+ private function appendCitationPendingJob(array &$pendingJobs, $checkId, $refNo, $amId, $textStart)
+ {
+ $pendingJobs[] = [
+ 'check_id' => intval($checkId),
+ 'reference_no' => intval($refNo),
+ 'am_id' => intval($amId),
+ 'text_start' => intval($textStart),
+ ];
+ }
+
+ /**
+ * 同一引用标签下的全部校对明细([1,2] 展开后 reference_no 不同但 cite_tag_* 相同)
+ *
+ * @return array[]
+ */
+ private function findCitationGroupRows(array $row)
+ {
+ $amId = intval($this->arrGet($row, 'am_id', 0));
+ if ($amId <= 0) {
+ return [$row];
+ }
+
+ $citeTagStart = intval($this->arrGet($row, 'cite_tag_start', 0));
+ $citeTagEnd = intval($this->arrGet($row, 'cite_tag_end', 0));
+ $q = Db::name('article_reference_check_result')->where('am_id', $amId);
+
+ if ($citeTagStart > 0 && $citeTagEnd > $citeTagStart) {
+ $q->where('cite_tag_start', $citeTagStart)->where('cite_tag_end', $citeTagEnd);
+ } else {
+ $textStart = intval($this->arrGet($row, 'text_start', 0));
+ $textEnd = intval($this->arrGet($row, 'text_end', 0));
+ $referenceRaw = trim((string)$this->arrGet($row, 'reference_raw', ''));
+ $citeGroupRefs = trim((string)$this->arrGet($row, 'cite_group_refs', ''));
+ $q->where('text_start', $textStart)->where('text_end', $textEnd);
+ if ($referenceRaw !== '') {
+ $q->where('reference_raw', $referenceRaw);
+ } elseif ($citeGroupRefs !== '') {
+ $q->where('cite_group_refs', $citeGroupRefs);
+ }
+ }
+
+ $rows = $q->order('reference_no asc')->select();
+ return empty($rows) ? [$row] : $rows;
+ }
+
+ private function resolveCitationGroupLeaderRefNo(array $groupRows)
+ {
+ $leader = PHP_INT_MAX;
+ foreach ($groupRows as $gr) {
+ $refNo = intval($this->arrGet($gr, 'reference_no', 0));
+ if ($refNo > 0 && $refNo < $leader) {
+ $leader = $refNo;
+ }
+ }
+
+ return $leader === PHP_INT_MAX ? 0 : $leader;
+ }
+
+ private function findCitationGroupRowByRefNo(array $groupRows, $refNo)
+ {
+ $refNo = intval($refNo);
+ foreach ($groupRows as $gr) {
+ if (intval($this->arrGet($gr, 'reference_no', 0)) === $refNo) {
+ return $gr;
+ }
+ }
+
+ return null;
+ }
+
+ private function isCitationGroupCheck(array $groupRows)
+ {
+ return count($groupRows) > 1;
+ }
+
+ private function resolveReferTextForCheckRow(array $row, $refer = null)
+ {
+ if (is_array($refer) && !empty($refer)) {
+ return $this->formatReferForLlm($refer);
+ }
+
+ return trim((string)$this->arrGet($row, 'refer_text', ''));
+ }
+
+ /**
+ * 将同一引用标签下多条文献书目拼成一次 LLM 校对的 refer_text
+ */
+ private function buildCombinedReferTextForCitationGroup(array $groupRows)
+ {
+ $blocks = [];
+ foreach ($groupRows as $gr) {
+ $refNo = intval($this->arrGet($gr, 'reference_no', 0));
+ if ($refNo <= 0) {
+ continue;
+ }
+
+ $refer = null;
+ if (intval($this->arrGet($gr, 'p_refer_id', 0)) > 0) {
+ $refer = Db::name('production_article_refer')
+ ->where('p_refer_id', intval($gr['p_refer_id']))
+ ->where('state', 0)
+ ->find();
+ }
+ $text = $this->resolveReferTextForCheckRow($gr, $refer);
+ if ($text === '') {
+ continue;
+ }
+ $blocks[] = '【参考文献 ' . $refNo . '】' . "\n" . $text;
+ }
+
+ return implode("\n\n", $blocks);
+ }
+
+ /**
+ * @return array{refer_text:string, doi_block:string, has_abstract:bool, doi_used:string}
+ */
+ private function prepareRecheckPayloadForCitationGroup(array $groupRows)
+ {
+ $referText = $this->buildCombinedReferTextForCitationGroup($groupRows);
+ $doiParts = [];
+ $doiUsed = [];
+ $hasAbstract = false;
+
+ foreach ($groupRows as $gr) {
+ $refNo = intval($this->arrGet($gr, 'reference_no', 0));
+ if ($refNo <= 0 || intval($this->arrGet($gr, 'p_refer_id', 0)) <= 0) {
+ continue;
+ }
+ DbReconnectHelper::ensure();
+ $refer = Db::name('production_article_refer')
+ ->where('p_refer_id', intval($gr['p_refer_id']))
+ ->where('state', 0)
+ ->find();
+ if (empty($refer)) {
+ continue;
+ }
+ $bundle = (new ReferenceLiteratureFetchService())->fetchAndCleanForRefer($refer);
+ $checkId = intval($this->arrGet($gr, 'id', $this->arrGet($gr, 'check_id', 0)));
+ if ($checkId > 0) {
+ $this->persistLiteratureOnCheckRow($checkId, $bundle);
+ }
+
+ $block = $this->buildLiteratureBlockFromBundle($refNo, $bundle);
+ if ($block === '') {
+ continue;
+ }
+ if (trim((string)($bundle['abstract_final'] ?? '')) !== '') {
+ $hasAbstract = true;
+ }
+ $doiParts[] = $block;
+ $doi = trim((string)($bundle['doi'] ?? ''));
+ if ($doi !== '') {
+ $doiUsed[] = $doi;
+ }
+ }
+
+ return [
+ 'refer_text' => $referText,
+ 'doi_block' => implode("\n\n", $doiParts),
+ 'has_abstract' => $hasAbstract,
+ 'doi_used' => implode(',', $doiUsed),
+ ];
+ }
+
+ private function applyCheckResultFromRow($checkId, array $sourceRow)
+ {
+ $this->updateCheckResult($checkId, [
+ 'can_support' => intval($this->arrGet($sourceRow, 'can_support', 0)),
+ 'is_match' => intval($this->arrGet($sourceRow, 'is_match', 0)),
+ 'confidence' => floatval($this->arrGet($sourceRow, 'confidence', 0)),
+ 'reason' => (string)$this->arrGet($sourceRow, 'reason', ''),
+ 'support_role' => (string)$this->arrGet($sourceRow, 'support_role', ''),
+ 'combined_can_support' => intval($this->arrGet($sourceRow, 'combined_can_support', 0)),
+ 'combined_confidence' => floatval($this->arrGet($sourceRow, 'combined_confidence', 0)),
+ 'combined_reason' => (string)$this->arrGet($sourceRow, 'combined_reason', ''),
+ 'status' => self::RECORD_COMPLETED,
+ 'error_msg' => '',
+ ]);
+ }
+
+ /**
+ * 将 LLM results 数组按 reference_no 写入同一引用组内的各行
+ */
+ private function applyCitationGroupCheckResults(array $groupRows, array $llmResponse, $reasonPrefix = '')
+ {
+ $results = isset($llmResponse['results']) && is_array($llmResponse['results'])
+ ? $llmResponse['results'] : [];
+ if (empty($results)) {
+ return false;
+ }
+
+ $byRefNo = [];
+ foreach ($results as $item) {
+ if (!is_array($item)) {
+ continue;
+ }
+ $refNo = intval($this->arrGet($item, 'reference_no', 0));
+ if ($refNo > 0) {
+ $byRefNo[$refNo] = $item;
+ }
+ }
+
+ $applied = 0;
+ $expected = 0;
+ $reasonPrefix = trim((string)$reasonPrefix);
+ foreach ($groupRows as $gr) {
+ $refNo = intval($this->arrGet($gr, 'reference_no', 0));
+ if ($refNo <= 0) {
+ continue;
+ }
+ $expected++;
+ if (!isset($byRefNo[$refNo])) {
+ continue;
+ }
+ $item = $byRefNo[$refNo];
+ $canSupport = !empty($item['can_support']) ? 1 : 0;
+ $reason = trim($reasonPrefix . ' ' . (string)$this->arrGet($item, 'reason', ''));
+ $this->updateCheckResult($this->resolveCheckRowId($gr), [
+ 'can_support' => $canSupport,
+ 'is_match' => array_key_exists('is_match', $item) ? (!empty($item['is_match']) ? 1 : 0) : $canSupport,
+ 'confidence' => floatval($this->arrGet($item, 'confidence', 0)),
+ 'reason' => $reason,
+ 'support_role' => (string)$this->arrGet($item, 'support_role', ''),
+ 'combined_can_support' => !empty($item['combined_can_support']) ? 1 : 0,
+ 'combined_confidence' => floatval($this->arrGet($item, 'combined_confidence', 0)),
+ 'combined_reason' => (string)$this->arrGet($item, 'combined_reason', ''),
+ 'status' => self::RECORD_COMPLETED,
+ 'error_msg' => '',
+ ]);
+ $applied++;
+ }
+
+ return $expected > 0 && $applied === $expected;
+ }
+
+ private function findLlmResultItemForRow(array $llmResponse, array $row)
+ {
+ $results = isset($llmResponse['results']) && is_array($llmResponse['results'])
+ ? $llmResponse['results'] : [];
+ $refNo = intval($this->arrGet($row, 'reference_no', 0));
+ foreach ($results as $item) {
+ if (is_array($item) && intval($this->arrGet($item, 'reference_no', 0)) === $refNo) {
+ return $item;
+ }
+ }
+
+ return null;
+ }
+
+ private function formatCheckReturnFromRow(array $row)
+ {
+ $checkId = $this->resolveCheckRowId($row);
+
+ return [
+ 'check_id' => $checkId,
+ 'can_support' => intval($this->arrGet($row, 'can_support', 0)),
+ 'is_match' => intval($this->arrGet($row, 'is_match', 0)),
+ 'confidence' => floatval($this->arrGet($row, 'confidence', 0)),
+ 'reason' => (string)$this->arrGet($row, 'reason', ''),
+ 'support_role' => (string)$this->arrGet($row, 'support_role', ''),
+ 'combined_can_support' => intval($this->arrGet($row, 'combined_can_support', 0)),
+ 'combined_confidence' => floatval($this->arrGet($row, 'combined_confidence', 0)),
+ 'combined_reason' => (string)$this->arrGet($row, 'combined_reason', ''),
+ ];
+ }
+
+ private function shouldRunSecondPassForLlmResults(array $groupRows, array $llmResponse)
+ {
+ if (!$this->isSecondPassEnabled()) {
+ return false;
+ }
+
+ $results = isset($llmResponse['results']) && is_array($llmResponse['results'])
+ ? $llmResponse['results'] : [];
+ if (empty($results)) {
+ return false;
+ }
+
+ $byRefNo = [];
+ foreach ($results as $item) {
+ if (!is_array($item)) {
+ continue;
+ }
+ $refNo = intval($this->arrGet($item, 'reference_no', 0));
+ if ($refNo > 0) {
+ $byRefNo[$refNo] = $item;
+ }
+ }
+
+ foreach ($groupRows as $gr) {
+ $refNo = intval($this->arrGet($gr, 'reference_no', 0));
+ if ($refNo <= 0 || !isset($byRefNo[$refNo])) {
+ continue;
+ }
+ $item = $byRefNo[$refNo];
+ if (floatval($this->arrGet($item, 'confidence', 0)) <= self::PASS_CONFIDENCE_THRESHOLD) {
+ return true;
+ }
+ if (floatval($this->arrGet($item, 'combined_confidence', 0)) <= self::PASS_CONFIDENCE_THRESHOLD) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ /**
+ * 本引用位置附近上下文(用于 LLM 判断具体支撑哪句)
+ */
+ public function resolveCitationLocalContextForJob(array $row, $maxChars = 3000)
+ {
+ $textStart = intval($this->arrGet($row, 'text_start', 0));
+ $textEnd = intval($this->arrGet($row, 'text_end', 0));
+ $amId = intval($this->arrGet($row, 'am_id', 0));
+ if ($amId <= 0 || $textEnd <= $textStart) {
+ return '';
+ }
+
+ $main = Db::name('article_main')
+ ->field('content,type,amt_id,article_id')
+ ->where('am_id', $amId)
+ ->find();
+ if (empty($main)) {
+ return '';
+ }
+
+ $raw = trim($this->resolveArticleMainCheckContent($main));
+ if ($raw === '') {
+ return '';
+ }
+
+ $slice = $this->buildCitationContextText($raw, $textStart, $textEnd);
+ if (trim($slice) === '') {
+ return '';
+ }
+
+ return $this->normalizeCheckContentForLlm($slice, $maxChars);
+ }
+
+ /**
+ * 相关性校对专用:段内后续引用在「本句」基础上再向前扩展 1–2 句(不早于上一引用标签),
+ * 以便覆盖紧邻的前置 claim,同时避免整段混用多个引用点的论述。
+ */
+ public function resolveCitationLocalContextForRelevanceJob(array $row, $maxChars = 3000, $extraSentences = 2)
+ {
+ DbReconnectHelper::ensure();
+ $textStart = intval($this->arrGet($row, 'text_start', 0));
+ $textEnd = intval($this->arrGet($row, 'text_end', 0));
+ $amId = intval($this->arrGet($row, 'am_id', 0));
+ $tagStart = intval($this->arrGet($row, 'cite_tag_start', 0));
+ $fallback = trim((string)$this->arrGet($row, 'origin_text', ''));
+
+ if ($amId <= 0 || $textEnd <= $textStart) {
+ return $fallback;
+ }
+
+ $main = Db::name('article_main')
+ ->field('content,type,amt_id,article_id')
+ ->where('am_id', $amId)
+ ->find();
+ if (empty($main)) {
+ return $fallback;
+ }
+
+ $raw = trim($this->resolveArticleMainCheckContent($main));
+ if ($raw === '') {
+ return $fallback;
+ }
+
+ $paragraphStart = $tagStart > 0 ? $this->findParagraphStart($raw, $tagStart) : 0;
+ $prevTagEnd = $tagStart > 0 ? $this->resolvePriorCitationTagEnd($raw, $tagStart) : $paragraphStart;
+ $extendedStart = $textStart;
+ if ($prevTagEnd > $paragraphStart) {
+ $extendedStart = $this->extendContextStartBackward(
+ $raw,
+ $textStart,
+ max($paragraphStart, $prevTagEnd),
+ $extraSentences
+ );
+ }
+
+ $slice = $this->buildCitationContextText($raw, $extendedStart, $textEnd);
+ $slice = ltrim($slice, ". \t\n\r");
+ if (trim($slice) === '') {
+ return $fallback;
+ }
+
+ return $this->normalizeCheckContentForLlm($slice, $maxChars);
+ }
+
+ /**
+ * 联合校对结果写回同一引用标签下的全部明细
+ */
+ private function applyCheckResultToCitationGroup(array $groupRows, array $fields)
+ {
+ foreach ($groupRows as $gr) {
+ $gid = $this->resolveCheckRowId($gr);
+ if ($gid > 0) {
+ $this->updateCheckResult($gid, $fields);
+ }
+ }
+ }
+
/**
* 编辑某条文献内容后,按 p_refer_id 异步重新校对该文献对应的全部 check 明细
*
@@ -1597,18 +2424,12 @@ class ReferenceCheckService
];
}
- $resetFields = $this->newCheckRecordFields([
+ $resetFields = $this->newCheckRecordFields($this->referenceCheckResultResetFields([
'refer_text' => $referText,
'refer_index' => $referenceNo,
'reference_no' => $referenceNo,
- 'status' => self::RECORD_PENDING,
- 'is_match' => 0,
- 'can_support' => 0,
- 'confidence' => 0,
- 'reason' => '',
- 'error_msg' => '',
'updated_at' => $now,
- ], self::QUEUE_PENDING, 0);
+ ]), self::QUEUE_PENDING, 0);
$pendingJobs = [];
$amIds = [];
@@ -1686,15 +2507,9 @@ class ReferenceCheckService
}
$now = date('Y-m-d H:i:s');
- $resetFields = $this->newCheckRecordFields([
- 'status' => self::RECORD_PENDING,
- 'is_match' => 0,
- 'can_support' => 0,
- 'confidence' => 0,
- 'reason' => '',
- 'error_msg' => '',
- 'updated_at' => $now,
- ], self::QUEUE_PENDING, 0);
+ $resetFields = $this->newCheckRecordFields($this->referenceCheckResultResetFields([
+ 'updated_at' => $now,
+ ]), self::QUEUE_PENDING, 0);
$pendingJobs = [];
$amIds = [];
@@ -1729,6 +2544,93 @@ class ReferenceCheckService
];
}
+ /**
+ * 某条参考文献下「校对失败」重跑,并将失败行所在同一引用标签分组(如 [1,2])全部一并重跑。
+ *
+ * @param int $pReferId
+ * @param int $pArticleId
+ * @return array{p_refer_id:int, p_article_id:int, reset:int, queued:int, check_ids:int[], queue:string}
+ */
+ public function enqueueRecheckFailedByPReferIdWithGroup($pReferId, $pArticleId = 0)
+ {
+ $pReferId = intval($pReferId);
+ if ($pReferId <= 0) {
+ throw new \InvalidArgumentException('p_refer_id is required');
+ }
+
+ $q = Db::name('article_reference_check_result')
+ ->where('p_refer_id', $pReferId)
+ ->where('status', self::RECORD_FAILED);
+ $pArticleId = intval($pArticleId);
+ if ($pArticleId > 0) {
+ $q->where('p_article_id', $pArticleId);
+ }
+
+ $rows = $q->select();
+ if (empty($rows)) {
+ return [
+ 'p_refer_id' => $pReferId,
+ 'p_article_id' => $pArticleId,
+ 'reset' => 0,
+ 'queued' => 0,
+ 'check_ids' => [],
+ 'queue' => self::TRANSPORT_RABBITMQ,
+ ];
+ }
+
+ if ($pArticleId <= 0) {
+ $pArticleId = intval($this->arrGet($rows[0], 'p_article_id', 0));
+ }
+
+ $now = date('Y-m-d H:i:s');
+ $resetFields = $this->newCheckRecordFields($this->referenceCheckResultResetFields([
+ 'updated_at' => $now,
+ ]), self::QUEUE_PENDING, 0);
+
+ $targetRows = [];
+ foreach ($rows as $row) {
+ $groupRows = $this->findCitationGroupRows($row);
+ foreach ($groupRows as $gr) {
+ $checkId = $this->resolveCheckRowId($gr);
+ if ($checkId > 0) {
+ $targetRows[$checkId] = $gr;
+ }
+ }
+ }
+
+ $pendingJobs = [];
+ $amIds = [];
+ foreach ($targetRows as $row) {
+ $checkId = $this->resolveCheckRowId($row);
+ Db::name('article_reference_check_result')->where('id', $checkId)->update($resetFields);
+ $pendingJobs[] = [
+ 'check_id' => $checkId,
+ 'reference_no' => intval($this->arrGet($row, 'reference_no', 0)),
+ 'am_id' => intval($this->arrGet($row, 'am_id', 0)),
+ 'text_start' => intval($this->arrGet($row, 'text_start', 0)),
+ ];
+ $amId = intval($this->arrGet($row, 'am_id', 0));
+ if ($amId > 0) {
+ $amIds[$amId] = true;
+ }
+ }
+
+ foreach (array_keys($amIds) as $amId) {
+ $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING);
+ }
+
+ $checkIds = $this->enqueueChecksSortedByReferenceNo($pendingJobs, $pArticleId, 'recheck_failed');
+
+ return [
+ 'p_refer_id' => $pReferId,
+ 'p_article_id' => $pArticleId,
+ 'reset' => count($targetRows),
+ 'queued' => count($checkIds),
+ 'check_ids' => $checkIds,
+ 'queue' => self::TRANSPORT_RABBITMQ,
+ ];
+ }
+
public function recheckByRefer($articleId, $pReferId = 0, $referenceNo = 0)
{
$articleId = intval($articleId);
@@ -1763,19 +2665,13 @@ class ReferenceCheckService
];
}
- $resetFields = $this->newCheckRecordFields([
+ $resetFields = $this->newCheckRecordFields($this->referenceCheckResultResetFields([
'refer_text' => $referText,
'p_refer_id' => $pReferId,
'p_article_id' => $pArticleId,
'refer_index' => $referenceNo,
- 'status' => 0,
- 'is_match' => 0,
- 'can_support' => 0,
- 'confidence' => 0,
- 'reason' => '',
- 'error_msg' => '',
'updated_at' => $now,
- ], self::QUEUE_PENDING, 0);
+ ]), self::QUEUE_PENDING, 0);
$pendingJobs = [];
$amIds = [];
@@ -1847,13 +2743,34 @@ class ReferenceCheckService
*/
public function runReferenceCheckOnce($checkId)
{
+ DbReconnectHelper::ensure();
$checkId = intval($checkId);
$row = Db::name('article_reference_check_result')->where('id', $checkId)->find();
if (empty($row)) {
throw new \RuntimeException('article_reference_check_result not found, id=' . $checkId);
}
- $contentA = $this->resolveMainContentForJob($row);
+ if (intval($row['status']) === self::RECORD_COMPLETED) {
+ return $this->formatCheckReturnFromRow($row);
+ }
+
+ $groupRows = $this->findCitationGroupRows($row);
+ $isGroup = $this->isCitationGroupCheck($groupRows);
+ if ($isGroup) {
+ $leaderRefNo = $this->resolveCitationGroupLeaderRefNo($groupRows);
+ $currentRefNo = intval($this->arrGet($row, 'reference_no', 0));
+ if ($currentRefNo !== $leaderRefNo) {
+ $freshRow = Db::name('article_reference_check_result')->where('id', $checkId)->find();
+ if (!empty($freshRow) && intval($freshRow['status']) === self::RECORD_COMPLETED) {
+ return $this->formatCheckReturnFromRow($freshRow);
+ }
+ throw new \RuntimeException('Citation group leader check not finished yet, reference_no=' . $leaderRefNo);
+ }
+ }
+
+ $contentA = $this->resolveParagraphContextForJob($row);
+ $localContext = $this->resolveCitationLocalContextForJob($row);
+ $citeGroupRefs = $this->resolveCiteGroupRefsFromRow($row, $groupRows);
$refer = null;
if (intval($row['p_refer_id']) > 0) {
$refer = Db::name('production_article_refer')
@@ -1862,95 +2779,151 @@ class ReferenceCheckService
->find();
}
- if ($refer) {
- $contentB = $this->formatReferForLlm($refer);
- } else {
- $contentB = trim((string)$this->arrGet($row, 'refer_text', ''));
- }
+ $contentB = $this->buildCombinedReferTextForCitationGroup($groupRows);
+ DbReconnectHelper::release();
+ $doiPayload = $this->prepareRecheckPayloadForCitationGroup($groupRows);
+ $doiBlock = trim((string)$this->arrGet($doiPayload, 'doi_block', ''));
+ DbReconnectHelper::ensure();
if ($contentA === '' || $contentB === '') {
- $this->updateCheckResult($checkId, [
+ $failFields = [
'status' => self::RECORD_FAILED,
'error_msg' => 'Missing section content (text/table) or refer_text',
- ]);
+ ];
+ if ($isGroup) {
+ $this->applyCheckResultToCitationGroup($groupRows, $failFields);
+ } else {
+ $this->updateCheckResult($checkId, $failFields);
+ }
throw new \RuntimeException('Missing section content (text/table) or refer_text');
}
- $llmResult = (new LLMService())->checkReference($contentA, $contentB, false);
+ DbReconnectHelper::release();
+ $llmResult = (new LLMService())->checkReference(
+ $contentA,
+ $contentB,
+ false,
+ $doiBlock !== '' ? $doiBlock : null,
+ $citeGroupRefs,
+ $localContext
+ );
+ DbReconnectHelper::ensure();
$requestFailed = !empty($llmResult['request_failed']);
- $canSupport = $this->parseLlmCanSupport($llmResult);
- $confidence = floatval(isset($llmResult['confidence']) ? $llmResult['confidence'] : 0);
- $reason = isset($llmResult['reason']) ? $llmResult['reason'] : '';
- // LLM 通讯失败:写 status=RECORD_FAILED(3) + error_msg,抛异常由 MQ worker 重试
- if ($requestFailed) {
- $this->updateCheckResult($checkId, [
- 'confidence' => $confidence,
- 'reason' => $reason,
- 'status' => self::RECORD_FAILED,
- 'error_msg' => $reason,
- ]);
- throw new \RuntimeException($reason !== '' ? $reason : 'LLM request failed');
+ if ($requestFailed || !$this->applyCitationGroupCheckResults($groupRows, $llmResult)) {
+ $failReason = isset($llmResult['reason']) ? (string)$llmResult['reason'] : 'LLM request failed or empty results';
+ $failFields = [
+ 'status' => self::RECORD_FAILED,
+ 'error_msg' => $failReason,
+ ];
+ if ($isGroup) {
+ $this->applyCheckResultToCitationGroup($groupRows, $failFields);
+ } else {
+ $this->updateCheckResult($checkId, $failFields);
+ }
+ throw new \RuntimeException($failReason !== '' ? $failReason : 'LLM request failed');
}
- $this->updateCheckResult($checkId, [
- 'can_support' => $canSupport ? 1 : 0,
- 'is_match' => $canSupport ? 1 : 0,
- 'confidence' => $confidence,
- 'reason' => $reason,
- 'status' => self::RECORD_COMPLETED,
- 'error_msg' => '',
- ]);
-
- if ($confidence <= self::PASS_CONFIDENCE_THRESHOLD) {
- $this->runSecondPassBlocking($checkId, $row, $contentA, $refer, $contentB);
+ if ($this->shouldRunSecondPassForLlmResults($groupRows, $llmResult)) {
+ $this->runSecondPassBlocking($checkId, $row, $contentA, $refer, $contentB, $groupRows);
}
- return [
- 'check_id' => $checkId,
- 'can_support' => $canSupport ? 1 : 0,
- 'is_match' => $canSupport ? 1 : 0,
- 'confidence' => $confidence,
- 'reason' => $reason,
- ];
+ $freshRow = Db::name('article_reference_check_result')->where('id', $checkId)->find();
+ return $this->formatCheckReturnFromRow(!empty($freshRow) ? $freshRow : $row);
}
/**
* 低分结果的二轮 DOI 复核(同步阻塞执行;失败重试一次)
*/
- public function runSecondPassBlocking($checkId, array $row, $contentA, $refer, $referText)
+ public function runSecondPassBlocking($checkId, array $row, $contentA, $refer, $referText, array $groupRows = null)
{
+ if (!$this->isSecondPassEnabled()) {
+ return false;
+ }
+
+ DbReconnectHelper::ensure();
$checkId = intval($checkId);
if ($checkId <= 0) {
return false;
}
- $payload = $this->prepareRecheckPayload(is_array($refer) ? $refer : [], trim((string)$referText));
+ if ($groupRows === null) {
+ $groupRows = $this->findCitationGroupRows($row);
+ }
+ $isGroup = $this->isCitationGroupCheck($groupRows);
+
+ if ($isGroup) {
+ $leaderRefNo = $this->resolveCitationGroupLeaderRefNo($groupRows);
+ $currentRefNo = intval($this->arrGet($row, 'reference_no', 0));
+ if ($currentRefNo !== $leaderRefNo) {
+ $freshRow = Db::name('article_reference_check_result')->where('id', $checkId)->find();
+ if (!empty($freshRow) && $this->hasSecondPassCompleted($freshRow)) {
+ return true;
+ }
+ return false;
+ }
+ }
+
+ if (trim((string)$contentA) === '') {
+ $contentA = $this->resolveParagraphContextForJob($row);
+ }
+ $localContext = $this->resolveCitationLocalContextForJob($row);
+
+ DbReconnectHelper::release();
+ if ($isGroup) {
+ $payload = $this->prepareRecheckPayloadForCitationGroup($groupRows);
+ $referText = trim((string)$payload['refer_text']);
+ } else {
+ if (trim((string)$referText) === '') {
+ $referText = $this->resolveReferTextForCheckRow($row, is_array($refer) ? $refer : null);
+ }
+ $payload = $this->prepareRecheckPayload(is_array($refer) ? $refer : [], trim((string)$referText));
+ }
+ DbReconnectHelper::ensure();
if (empty($payload['has_abstract']) || trim((string)$payload['doi_block']) === '') {
return false;
}
+ $citeGroupRefs = $this->resolveCiteGroupRefsFromRow($row, $groupRows);
$lastError = '';
for ($attempt = 0; $attempt < 2; $attempt++) {
try {
- $llmResult = (new LLMService())->checkReference($contentA, trim((string)$referText), true, $payload['doi_block']);
+ DbReconnectHelper::release();
+ $llmResult = (new LLMService())->checkReference(
+ $contentA,
+ trim((string)$referText),
+ true,
+ $payload['doi_block'],
+ $citeGroupRefs,
+ $localContext
+ );
+ DbReconnectHelper::ensure();
$requestFailed = !empty($llmResult['request_failed']);
- $canSupport = $this->parseLlmCanSupport($llmResult);
- $confidence = floatval(isset($llmResult['confidence']) ? $llmResult['confidence'] : 0);
- $tag = '[Crossref复核' . (trim((string)$payload['doi_used']) !== '' ? (' ' . trim((string)$payload['doi_used'])) : '') . ']';
- $reason = $tag . ' ' . (isset($llmResult['reason']) ? $llmResult['reason'] : '');
+ $tag = $this->buildSecondPassReasonTag($row, $payload, $groupRows);
+ if ($tag !== '' && !empty($llmResult['results']) && is_array($llmResult['results'])) {
+ foreach ($llmResult['results'] as &$one) {
+ if (!is_array($one)) {
+ continue;
+ }
+ $one['reason'] = $tag . ' ' . (isset($one['reason']) ? (string)$one['reason'] : '');
+ }
+ unset($one);
+ }
- if ($requestFailed) {
- $lastError = isset($llmResult['reason']) ? (string)$llmResult['reason'] : 'LLM request failed';
+ if ($requestFailed || !$this->applyCitationGroupCheckResults($groupRows, $llmResult)) {
+ $lastError = isset($llmResult['reason']) ? (string)$llmResult['reason'] : 'LLM request failed or empty results';
if ($attempt < 1) {
continue;
}
- $this->updateCheckResult($checkId, [
- 'confidence' => $confidence,
- 'reason' => $reason,
- 'status' => self::RECORD_FAILED,
- 'error_msg' => $lastError,
- ]);
+ $failFields = [
+ 'status' => self::RECORD_FAILED,
+ 'error_msg' => $lastError,
+ ];
+ if ($isGroup) {
+ $this->applyCheckResultToCitationGroup($groupRows, $failFields);
+ } else {
+ $this->updateCheckResult($checkId, $failFields);
+ }
$amId = intval(isset($row['am_id']) ? $row['am_id'] : 0);
if ($amId > 0) {
$this->syncAmRefCheckStatus($amId);
@@ -1958,14 +2931,6 @@ class ReferenceCheckService
return false;
}
- $this->updateCheckResult($checkId, [
- 'can_support' => $canSupport ? 1 : 0,
- 'is_match' => $canSupport ? 1 : 0,
- 'confidence' => $confidence,
- 'reason' => $reason,
- 'status' => self::RECORD_COMPLETED,
- 'error_msg' => '',
- ]);
$amId = intval(isset($row['am_id']) ? $row['am_id'] : 0);
if ($amId > 0) {
$this->syncAmRefCheckStatus($amId);
@@ -1976,10 +2941,15 @@ class ReferenceCheckService
if ($attempt < 1) {
continue;
}
- $this->updateCheckResult($checkId, [
+ $failFields = [
'status' => self::RECORD_FAILED,
'error_msg' => $lastError,
- ]);
+ ];
+ if ($isGroup) {
+ $this->applyCheckResultToCitationGroup($groupRows, $failFields);
+ } else {
+ $this->updateCheckResult($checkId, $failFields);
+ }
$amId = intval(isset($row['am_id']) ? $row['am_id'] : 0);
if ($amId > 0) {
$this->syncAmRefCheckStatus($amId);
@@ -2087,6 +3057,18 @@ class ReferenceCheckService
if (!is_array($llmResult)) {
return false;
}
+ if (!empty($llmResult['results']) && is_array($llmResult['results'])) {
+ foreach ($llmResult['results'] as $item) {
+ if (!is_array($item)) {
+ continue;
+ }
+ if (!empty($item['can_support']) || !empty($item['is_match'])) {
+ return true;
+ }
+ }
+
+ return false;
+ }
if (array_key_exists('can_support', $llmResult)) {
return $this->parseLlmIsMatch($llmResult['can_support']);
}
@@ -2098,6 +3080,7 @@ class ReferenceCheckService
*/
public function resolveMainContentForJob(array $row, $maxChars = 8000)
{
+ DbReconnectHelper::ensure();
$amId = intval($this->arrGet($row, 'am_id', 0));
if ($amId <= 0) {
return '';
@@ -2387,10 +3370,14 @@ class ReferenceCheckService
}
/**
- * 引用处局部上下文(origin_text),供其它场景使用
+ * 引用处局部上下文:优先按 text_start/text_end 从节正文重算,回落 origin_text 快照
*/
public function resolveCitationContextForJob(array $row)
{
+ $local = $this->resolveCitationLocalContextForJob($row);
+ if ($local !== '') {
+ return $local;
+ }
$text = trim((string)$this->arrGet($row, 'origin_text', ''));
if ($text === '') {
$text = trim((string)$this->arrGet($row, 'content_a', ''));
@@ -2503,6 +3490,8 @@ class ReferenceCheckService
*/
public function fetchDoiLiteratureBlock($refer)
{
+ DbReconnectHelper::release();
+
$candidates = $this->extractAllDoiCandidatesFromRefer($refer);
if (empty($candidates)) {
return '';
@@ -2670,15 +3659,80 @@ class ReferenceCheckService
public function prepareRecheckPayload($refer, $referText = '')
{
$base = trim($referText) !== '' ? trim($referText) : $this->formatReferForLlm($refer);
- $cr = $this->fetchCrossrefAbstractByReferDoi($refer);
+ $bundle = (new ReferenceLiteratureFetchService())->fetchAndCleanForRefer(is_array($refer) ? $refer : []);
+ $block = $this->buildLiteratureBlockFromBundle(0, $bundle);
+ if ($block === '') {
+ $cr = $this->resolveDoiRecheckFromRefer(is_array($refer) ? $refer : []);
+ return [
+ 'refer_text' => $base,
+ 'doi_block' => $cr['text'],
+ 'has_abstract' => $cr['has_abstract'],
+ 'doi_used' => $cr['doi'],
+ ];
+ }
return [
'refer_text' => $base,
- 'doi_block' => $cr['text'],
- 'has_abstract' => $cr['has_abstract'],
- 'doi_used' => $cr['doi'],
+ 'doi_block' => $block,
+ 'has_abstract' => trim((string)($bundle['abstract_final'] ?? '')) !== '',
+ 'doi_used' => trim((string)($bundle['doi'] ?? '')),
];
}
+ private function buildLiteratureBlockFromBundle($refNo, array $bundle)
+ {
+ $abstract = trim((string)($bundle['abstract_final'] ?? $bundle['abstract'] ?? ''));
+ $cleaned = trim((string)($bundle['content_cleaned'] ?? ''));
+ $raw = trim((string)($bundle['raw_content'] ?? ''));
+ if ($cleaned === '' && $raw !== '') {
+ $cleaned = mb_substr($raw, 0, 6000);
+ }
+ if ($abstract === '' && $cleaned === '') {
+ return '';
+ }
+
+ $head = $refNo > 0 ? ('【参考文献 ' . intval($refNo) . '】') : '【文献内容】';
+ $doi = trim((string)($bundle['doi'] ?? ''));
+ if ($doi !== '') {
+ $head .= ' DOI: ' . $doi;
+ }
+ $parts = [$head];
+ if ($abstract !== '') {
+ $parts[] = '【摘要】' . "\n" . $abstract;
+ }
+ if ($cleaned !== '') {
+ $parts[] = '【清洗后文献内容】' . "\n" . $cleaned;
+ }
+ $sources = isset($bundle['sources']) && is_array($bundle['sources']) ? implode(',', $bundle['sources']) : '';
+ if ($sources !== '') {
+ $parts[] = 'Sources: ' . $sources;
+ }
+ return implode("\n\n", $parts);
+ }
+
+ private function persistLiteratureOnCheckRow($checkId, array $bundle)
+ {
+ $checkId = intval($checkId);
+ if ($checkId <= 0) {
+ return;
+ }
+ $abstract = trim((string)($bundle['abstract_final'] ?? $bundle['abstract'] ?? ''));
+ $raw = trim((string)($bundle['raw_content'] ?? ''));
+ $cleaned = trim((string)($bundle['content_cleaned'] ?? ''));
+ if ($cleaned === '' && $raw !== '') {
+ $cleaned = mb_substr($raw, 0, 6000);
+ }
+ try {
+ DbReconnectHelper::ensure();
+ Db::name('article_reference_check_result')->where('id', $checkId)->update([
+ 'abstract_text' => $abstract,
+ 'refer_content_cleaned' => $cleaned,
+ 'updated_at' => date('Y-m-d H:i:s'),
+ ]);
+ } catch (\Throwable $e) {
+ \think\Log::warning('persistLiteratureOnCheckRow: ' . $e->getMessage());
+ }
+ }
+
/**
* 旧接口:拼接成单块文本(向后兼容,建议调用方改用 prepareRecheckPayload)
*/
@@ -2710,6 +3764,24 @@ class ReferenceCheckService
return false;
}
+ if ($this->hasSecondPassCompleted($row)) {
+ return true;
+ }
+
+ $groupRows = $this->findCitationGroupRows($row);
+ $isGroup = $this->isCitationGroupCheck($groupRows);
+ if ($isGroup) {
+ $leaderRefNo = $this->resolveCitationGroupLeaderRefNo($groupRows);
+ $currentRefNo = intval($this->arrGet($row, 'reference_no', 0));
+ if ($currentRefNo !== $leaderRefNo) {
+ $freshRow = Db::name('article_reference_check_result')->where('id', $checkId)->find();
+ if (!empty($freshRow) && $this->hasSecondPassCompleted($freshRow)) {
+ return true;
+ }
+ return false;
+ }
+ }
+
$refer = null;
if (intval($row['p_refer_id']) > 0) {
$refer = Db::name('production_article_refer')
@@ -2717,22 +3789,15 @@ class ReferenceCheckService
->where('state', 0)
->find();
}
- if (empty($refer) || $this->extractReferDoiOnly($refer) === '') {
- return false;
+
+ $contentA = $this->resolveParagraphContextForJob($row);
+ if ($isGroup) {
+ $referText = $this->buildCombinedReferTextForCitationGroup($groupRows);
+ } else {
+ $referText = $this->resolveReferTextForCheckRow($row, $refer);
}
- $cr = $this->fetchCrossrefAbstractByReferDoi($refer);
- if (empty($cr['has_abstract'])) {
- return false;
- }
-
- $contentA = $this->resolveMainContentForJob($row);
- $referText = trim((string)$this->arrGet($row, 'refer_text', ''));
- if ($referText === '' && is_array($refer)) {
- $referText = $this->formatReferForLlm($refer);
- }
-
- return $this->runSecondPassBlocking($checkId, $row, $contentA, $refer, $referText);
+ return $this->runSecondPassBlocking($checkId, $row, $contentA, $refer, $referText, $groupRows);
}
/**
@@ -2787,6 +3852,27 @@ class ReferenceCheckService
return $result;
}
+ /**
+ * 按段落截取引用上下文:同一段落内各处引用共用段落文本,分别按 cite_group_refs 校对。
+ */
+ private function extractParagraphCitationContext($content, $tagStart, $tagEnd, array $tagSpans)
+ {
+ $paragraphStart = $this->findParagraphStart($content, $tagStart);
+ $paragraphEnd = $this->findParagraphEnd($content, $tagEnd);
+ $originalText = $this->buildCitationContextText($content, $paragraphStart, $paragraphEnd);
+
+ if (!$this->isMeaningfulCitationContext($originalText)) {
+ return $this->extractLocalCitationContext(
+ $content,
+ $tagStart,
+ $tagEnd,
+ $tagSpans
+ );
+ }
+
+ return [$paragraphStart, $paragraphEnd, $originalText];
+ }
+
/**
* 按引用位置截取局部上下文:优先取标签前叙述;同句多引时后续引用从上一标签后开始。
*/
@@ -2929,6 +4015,89 @@ class ReferenceCheckService
return $text;
}
+ /**
+ * 同段落内、当前引用标签之前最近一个引用标签的结束字节位置
+ */
+ private function resolvePriorCitationTagEnd($content, $tagStart)
+ {
+ $tagStart = intval($tagStart);
+ $paragraphStart = $this->findParagraphStart($content, $tagStart);
+ $prevTagEnd = $paragraphStart;
+
+ $matches = $this->collectBlueTagMatches($content);
+ if (empty($matches[0])) {
+ return $paragraphStart;
+ }
+
+ foreach ($matches[0] as $match) {
+ $end = intval($match[1]) + strlen($match[0]);
+ if ($end <= $tagStart && $end > $prevTagEnd) {
+ $prevTagEnd = $end;
+ }
+ }
+
+ return $prevTagEnd;
+ }
+
+ /**
+ * 给定当前句起点,返回上一句起点
+ */
+ private function findPreviousSentenceStart($content, $sentenceStart)
+ {
+ $sentenceStart = intval($sentenceStart);
+ if ($sentenceStart <= 0) {
+ return 0;
+ }
+
+ $pos = $sentenceStart - 1;
+ while ($pos > 0 && isset($content[$pos]) && ctype_space($content[$pos])) {
+ $pos--;
+ }
+ if ($pos <= 0) {
+ return 0;
+ }
+
+ $prev = $this->findSentenceStart($content, $pos);
+ if ($prev >= $sentenceStart) {
+ $pos--;
+ while ($pos > 0 && isset($content[$pos]) && ctype_space($content[$pos])) {
+ $pos--;
+ }
+ if ($pos <= 0) {
+ return 0;
+ }
+ $prev = $this->findSentenceStart($content, $pos);
+ }
+
+ return max(0, $prev);
+ }
+
+ /**
+ * 从当前句起点向前扩展若干完整句子,但不早于 $minStart
+ */
+ private function extendContextStartBackward($content, $start, $minStart, $extraSentences = 2)
+ {
+ $start = intval($start);
+ $minStart = max(0, intval($minStart));
+ $extraSentences = max(0, intval($extraSentences));
+ if ($extraSentences === 0 || $start <= $minStart) {
+ return max($minStart, $start);
+ }
+
+ for ($i = 0; $i < $extraSentences; $i++) {
+ if ($start <= $minStart) {
+ break;
+ }
+ $prev = $this->findPreviousSentenceStart($content, $start);
+ if ($prev >= $start) {
+ break;
+ }
+ $start = max($minStart, $prev);
+ }
+
+ return $start;
+ }
+
/**
* 过滤仅标点、过短或无字母/汉字的上下文(如去掉标签后只剩 ".")
*/
@@ -3049,6 +4218,36 @@ class ReferenceCheckService
return $best;
}
+ /**
+ * 段落结束(
、双换行、下一段 之前)
+ */
+ private function findParagraphEnd($content, $tagEnd)
+ {
+ $length = strlen($content);
+ $pos = max(0, intval($tagEnd));
+ if ($pos >= $length) {
+ return $length;
+ }
+
+ $candidates = [$length];
+
+ if (preg_match('/<\/p>/i', $content, $m, PREG_OFFSET_CAPTURE, $pos)) {
+ $candidates[] = intval($m[0][1]) + strlen($m[0][0]);
+ }
+ if (preg_match('/
]*>/i', $content, $m, PREG_OFFSET_CAPTURE, $pos + 1)) {
+ $candidates[] = intval($m[0][1]);
+ }
+ if (preg_match('/\n\n/', $content, $m, PREG_OFFSET_CAPTURE, $pos)) {
+ $candidates[] = intval($m[0][1]);
+ }
+ if (preg_match('/
\s*
/i', $content, $m, PREG_OFFSET_CAPTURE, $pos)) {
+ $candidates[] = intval($m[0][1]) + strlen($m[0][0]);
+ }
+
+ $end = min($candidates);
+ return max($pos, $end);
+ }
+
/**
* 段落过长时从引用处向前截取上限,避免单次 LLM 上下文过大
*/
diff --git a/application/common/mq/RabbitMqConfig.php b/application/common/mq/RabbitMqConfig.php
index df30aa5e..84e19c17 100644
--- a/application/common/mq/RabbitMqConfig.php
+++ b/application/common/mq/RabbitMqConfig.php
@@ -21,4 +21,10 @@ class RabbitMqConfig
$rc = self::get('reference_check', []);
return is_array($rc) ? $rc : [];
}
+
+ public static function referenceRelevance()
+ {
+ $rc = self::get('reference_relevance', []);
+ return is_array($rc) ? $rc : [];
+ }
}
diff --git a/application/common/mq/ReferenceCheckArticleWorker.php b/application/common/mq/ReferenceCheckArticleWorker.php
index e71da22d..46f61026 100644
--- a/application/common/mq/ReferenceCheckArticleWorker.php
+++ b/application/common/mq/ReferenceCheckArticleWorker.php
@@ -3,6 +3,7 @@
namespace app\common\mq;
use think\Db;
+use app\common\DbReconnectHelper;
use app\common\ReferenceCheckService;
/**
@@ -25,6 +26,7 @@ class ReferenceCheckArticleWorker
public function handleMessage(array $payload)
{
+ DbReconnectHelper::ensure();
$pArticleId = intval(isset($payload['p_article_id']) ? $payload['p_article_id'] : 0);
$batchId = intval(isset($payload['batch_id']) ? $payload['batch_id'] : 0);
if ($pArticleId <= 0 || $batchId <= 0) {
@@ -115,6 +117,7 @@ class ReferenceCheckArticleWorker
*/
private function processOneRow($checkId, array $row)
{
+ DbReconnectHelper::ensure();
$claimed = Db::name('article_reference_check_result')
->where('id', intval($checkId))
->where('queue_status', ReferenceCheckService::QUEUE_PENDING)
@@ -134,6 +137,7 @@ class ReferenceCheckArticleWorker
return 'ok';
} catch (\Exception $e) {
$this->svc->log('ReferenceCheckArticleWorker check_id=' . $checkId . ' err=' . $e->getMessage());
+ DbReconnectHelper::ensure();
if ($retryCount < ReferenceCheckService::QUEUE_MAX_RETRY) {
$this->svc->markQueueRuntime($checkId, ReferenceCheckService::QUEUE_PENDING, $retryCount + 1);
return $this->processOneRow($checkId, array_merge($row, ['retry_count' => $retryCount + 1]));
diff --git a/application/common/service/LLMService.php b/application/common/service/LLMService.php
index 20e25fc1..3daca96a 100644
--- a/application/common/service/LLMService.php
+++ b/application/common/service/LLMService.php
@@ -28,18 +28,17 @@ class LLMService
* @param string $contextText 正文引用处句子
* @param string $referText 参考文献条目(或 refer 格式化文本)
* @param bool $isAgain 是否为 DOI 二次复核
- * @param string|null $doiBlock 可选:系统抓取到的 DOI 真实文献内容(仅二次复核使用)
+ * @param string|null $doiBlock 可选:系统抓取到的 DOI 真实文献内容(仅二次复核使用)
+ * @param string $citeGroupRefs 引用文献组,如 1,2 或 4,5,6
+ * @param string $localContext 本引用位置附近上下文(可选)
+ * @return array{results:array,request_failed?:bool}
*/
- public function checkReference($contextText, $referText, $isAgain = false, $doiBlock = null)
+ public function checkReference($contextText, $referText, $isAgain = false, $doiBlock = null, $citeGroupRefs = '', $localContext = '')
{
- // request_failed=true 表示"LLM 通讯/解析层面的失败"(可重试,区别于业务上的"未命中");
- // 上游 runReferenceCheckOnce 会据此把 DB.status 置为 3(失败) 并抛异常触发 MQ worker 重试
$fallback = [
- 'can_support' => false,
- 'is_match' => false,
- 'confidence' => 0.0,
- 'reason' => 'LLM not configured or request failed',
+ 'results' => [],
'request_failed' => true,
+ 'reason' => 'LLM not configured or request failed',
];
if ($this->url === '' || $this->model === '') {
\think\Log::warning('ReferenceCheck LLM: url or model not configured');
@@ -47,15 +46,16 @@ class LLMService
}
$contextText = trim($contextText);
+ \think\Log::info('llm checkReference:' . $contextText);
$referText = trim($referText);
+ \think\Log::info('llm referText:' . $referText);
$doiBlock = trim((string)$doiBlock);
+ $citeGroupRefs = trim((string)$citeGroupRefs);
+ $localContext = trim((string)$localContext);
if ($contextText === '' || $referText === '') {
- // 空文本是入参问题,不是 LLM 故障,不需要重试
return [
- 'can_support' => false,
- 'is_match' => false,
- 'confidence' => 0.0,
- 'reason' => 'Empty citation context or reference text',
+ 'results' => [],
+ 'reason' => 'Empty citation context or reference text',
];
}
@@ -63,27 +63,30 @@ class LLMService
if (mb_strlen($contextText) > $maxContextLen) {
$contextText = mb_substr($contextText, 0, $maxContextLen);
}
- if (mb_strlen($referText) > 4000) {
- $referText = mb_substr($referText, 0, 4000);
+ if (mb_strlen($localContext) > 3000) {
+ $localContext = mb_substr($localContext, 0, 3000);
}
- if (mb_strlen($doiBlock) > 4000) {
- $doiBlock = mb_substr($doiBlock, 0, 4000);
+ if (mb_strlen($referText) > 6000) {
+ $referText = mb_substr($referText, 0, 6000);
+ }
+ if (mb_strlen($doiBlock) > 8000) {
+ $doiBlock = mb_substr($doiBlock, 0, 8000);
}
if ($isAgain) {
$system = $this->buildReferenceCheckSecondPassPrompt();
- $user = $this->buildReferenceCheckSecondPassUserPrompt($contextText, $referText, $doiBlock);
+ $user = $this->buildReferenceCheckSecondPassUserPrompt($contextText, $referText, $doiBlock, $citeGroupRefs, $localContext);
} else {
$system = $this->buildReferenceCheckFirstPassPrompt();
- $user = $this->buildReferenceCheckFirstPassUserPrompt($contextText, $referText);
+ $user = $this->buildReferenceCheckFirstPassUserPrompt($contextText, $referText, $citeGroupRefs, $localContext, $doiBlock);
}
- \think\Log::info('ReferenceCheck system head: ' . mb_substr($system, 0, 200));
- \think\Log::info('ReferenceCheck user head: ' . mb_substr($user, 0, 600));
+// \think\Log::info('ReferenceCheck system head: ' . mb_substr($system, 0, 200));
+// \think\Log::info('ReferenceCheck user head: ' . mb_substr($user, 0, 600));
$payload = [
- 'model' => $this->model,
+ 'model' => $this->model,
'temperature' => 0,
- 'messages' => [
+ 'messages' => [
['role' => 'system', 'content' => $system],
['role' => 'user', 'content' => $user],
],
@@ -101,23 +104,14 @@ class LLMService
return $fallback;
}
- $canSupport = $this->parseCanSupportFromParsed($parsed);
- $confidence = $this->snapReferenceCheckConfidence(
- $this->normalizeConfidence(isset($parsed['confidence']) ? $parsed['confidence'] : 0),
- $canSupport
- );
- $reason = $this->cleanReason((string)(isset($parsed['reason']) ? $parsed['reason'] : ''));
- \think\Log::info(
- 'ReferenceCheck result: can_support=' . ($canSupport ? '1' : '0')
- . ', confidence=' . $confidence
- . ', reason=' . $reason
- );
- return [
- 'can_support' => $canSupport,
- 'is_match' => $canSupport,
- 'confidence' => $confidence,
- 'reason' => $reason,
- ];
+ $results = $this->parseReferenceCheckResultsFromParsed($parsed, $citeGroupRefs, $localContext, $doiBlock);
+ if (empty($results)) {
+ \think\Log::warning('ReferenceCheck LLM: empty results array');
+ return $fallback;
+ }
+
+ \think\Log::info($results);
+ return ['results' => $results];
}
/**
@@ -174,83 +168,541 @@ class LLMService
$s = strtolower(trim((string)$value));
return in_array($s, ['1', 'true', 'yes', 'support', 'supported'], true);
}
+ private function bulidReferenceCheckFirstPassPrompt(){
+ return <<<'PROMPT'
+你是一名护理、医学与科研期刊的资深文献编辑,专门校对「正文引用句」与「对应参考文献条目」是否匹配。
- /** 第一次校对:书目条目 vs 正文全文 */
+你的目标是严格识别错引、张冠李戴、方法不符、对象不符、结论不成立的问题。
+
+宁可少判 true,也不要漏掉错引。
+
+你只能依据用户提供的内容判断:
+1. 正文引用句
+2. 当前对应参考文献条目
+
+禁止假设已阅读全文。
+禁止联网。
+禁止脑补文献内容。
+禁止根据学科常识推断研究结果。
+
+====================
+【核心任务】
+
+判断:
+
+正文在该引用位置表达的核心观点、结论、方法、数据、定义、模型、研究发现、指南依据等,
+
+是否能够被该条参考文献合理支撑。
+
+你判断的是:
+
+“引用是否成立”
+
+不是:
+
+“正文是否正确”。
+
+====================
+【总原则(最高优先级)】
+
+采用严格审稿标准:
+
+边界不清时,一律判 false。
+
+宁可误杀(人工复核),不要漏掉错引。
+
+同领域 ≠ 匹配。
+
+同关键词 ≠ 匹配。
+
+相关 ≠ 能支撑。
+
+====================
+【强制规则】
+
+1. 严禁关键词硬匹配
+
+不能因为出现:
+患者、护理、治疗、研究、模型、算法、深度学习、机器学习、焦虑、效果
+
+等泛化词汇就判定匹配。
+
+必须看:
+
+- 核心对象
+- 研究问题
+- 方法
+- 场景
+- 结局指标
+- 核心论点
+
+是否一致。
+
+====================
+2. 方法学必须严格一致(极重要)
+
+若正文明确提到:
+
+- 算法
+- 模型
+- 聚类方法
+- 深度学习架构
+- 统计方法
+- 数学模型
+- 评价指标
+
+必须要求文献与其存在明确关联。
+
+例如:
+
+不匹配:
+- fuzzy clustering ≠ deep learning
+- CNN ≠ LSTM
+- random forest ≠ SVM
+- 聚类 ≠ 分类
+- 特征选择 ≠ 分类预测
+- 风险因素分析 ≠ 干预研究
+
+仅属于同一“大领域(AI/ML)”
+不能判定匹配。
+
+若方法体系不同:
+
+优先判 false + 0.10。
+
+====================
+3. 医学护理引用严格一致
+
+若正文涉及:
+
+- 疾病
+- 人群
+- 护理场景
+- 干预措施
+- 结局指标
+
+必须基本一致。
+
+例如:
+
+不匹配:
+- ICU ≠ 普通病房
+- 老年人 ≠ 儿童
+- 糖尿病 ≠ 高血压
+- 心理护理 ≠ 运动干预
+- 焦虑改善 ≠ 生存率提高
+
+====================
+4. 强结论必须强证据
+
+正文若出现:
+
+- 显著改善
+- 明显降低
+- 证实
+- 优于
+- 有效预测
+- 危险因素
+- 因果关系
+
+文献必须能合理支撑该强结论。
+
+仅“应用研究”“相关研究”“观察研究”
+不能自动支持强结论。
+
+否则 false。
+
+====================
+5. 特定证据类型必须一致
+
+正文若明确写:
+
+- RCT/randomized trial
+- Meta-analysis
+- Guideline
+- Systematic review
+- Expert consensus
+
+而参考文献类型明显不符:
+
+直接 false。
+
+====================
+6. 信息不足从严
+
+若参考文献只有:
+
+作者 + 年份
+
+或信息过少,
+
+无法建立明确关联:
+
+false + 0.30
+
+====================
+【判定逻辑】
+
+只有同时满足以下条件,才能 true:
+
+1. 主题一致
+2. 核心对象一致
+3. 核心论点一致
+4. 方法/研究方向一致
+5. 无明显错引风险
+
+任意一点明显不符:
+
+false。
+
+====================
+【评分(只能四选一)】
+
+只能输出:
+
+0.90
+0.75
+0.30
+0.10
+
+禁止任何其他分数。
+
+评分规则:
+
+0.90
+明确匹配:
+主题、对象、方法、核心论点均明显一致。
+
+0.75
+基本匹配:
+整体支撑成立,但存在轻微概括或小范围表述差异。
+
+0.30
+存疑:
+同领域但支撑不足;
+信息不足;
+需人工复核。
+
+0.10
+明确错引:
+主题、对象、方法或核心论点明显不符。
+
+硬规则:
+
+is_match=true
+只能:
+0.75 或 0.90
+
+is_match=false
+只能:
+0.10 或 0.30
+
+====================
+【reason 要求】
+
+仅说明:
+
+1. 是否主题一致;
+2. 核心论点/方法是否能支撑。
+
+禁止模糊措辞:
+“可能”
+“看起来”
+“应该”
+“疑似”
+
+长度:
+
+20~60字。
+
+====================
+【输出要求】
+
+仅输出一行 minified JSON。
+
+禁止 markdown。
+禁止解释。
+禁止换行。
+禁止任何额外内容。
+
+格式:
+
+{"is_match":true|false,"confidence":0.10|0.30|0.75|0.90,"reason":"简体中文说明"}
+PROMPT;
+
+ }
+ /** 第一次校对:参考文献真实性与支撑力度 */
private function buildReferenceCheckFirstPassPrompt()
{
- return <<<'PROMPT'
-你是文献引用校对助手。判断【正文全文】与【参考文献书目】是否相关、能否用于支撑正文中的引用。
-
-【核心原则:从宽判断,避免误杀】
-默认倾向 can_support=true。只要文献与正文不是「风马牛不相及」,即判为相关、能支撑。
-不要求变量一致、不要求结论逐条对应、不要求研究设计相同。
-
-【仅当以下情况才判 can_support=false(与正文明显无关)】
-- 学科/主题完全无关(如正文讲深度学习聚类,文献是糖尿病步态检测)。
-- 明显张冠李戴(正文断言 A 疗法的效果,文献研究的是完全不同的 B 问题且无关联)。
-- 文献条目与正文讨论的对象/场景毫无交集,且无法作背景或理论引用。
-
-【以下情况均应 can_support=true】
-- 同一大领域或相邻方向(如护理、心理、管理、医学、统计、AI 等相近子领域)。
-- 可作背景文献、综述性引用、理论或方法的一般性依据。
-- 表述略宽、略有概括、变量名不完全一致,但大方向说得通。
-
-【confidence 固定档位(禁止其它小数)】
-can_support=true:0.65(有关联但较泛)/ 0.78 / 0.85 / 0.92 / 0.98(非常确定相关)
-can_support=false:0.15(明确风马牛不相及)/ 0.25 / 0.35 / 0.45(仅当实在无法建立任何合理关联)
-
-【输出】仅一行 minified JSON,无 markdown:
-{"can_support":true|false,"is_match":true|false,"confidence":0.15|0.25|0.35|0.45|0.65|0.78|0.85|0.92|0.98,"reason":"30-80字简体中文"}
-is_match 必须与 can_support 相同。
-PROMPT;
+ return $this->buildReferenceCheckSupportSystemPrompt(false);
}
- private function buildReferenceCheckFirstPassUserPrompt($contextText, $referText)
+ private function buildReferenceCheckSupportSystemPrompt($isSecondPass = false)
{
- return "【正文全文 article_main.content】\n" . $contextText
- . "\n\n【参考文献书目 refer_text】\n" . $referText
- . "\n\n请从宽判断:文献与正文非风马牛不相即可判 can_support=true,只返回 JSON。";
+ $prompt = <<<'PROMPT'
+你是一名护理、医学、生物医学与科研期刊的资深学术编辑,正在执行“参考文献真实性与支撑力度校对”。
+
+你的任务不是判断“主题是否相关”,而是判断:
+【稿件正文中某段被引用内容】是否真的能被【对应编号的参考文献】直接或充分支撑。
+
+你必须严格基于用户提供的材料作出判断,不得凭常识、不得脑补、不得假设参考文献中“可能写过但未提供”的内容。
+
+==================================================
+【一、任务目标】
+你需要判断:
+“正文引用位置的核心论点、结论、背景陈述、机制解释、疗效描述、数据表达或因果表述,
+是否能被对应参考文献真实支持。”
+
+这里的“支持”不是指“文献主题相关”或“研究领域接近”,而是指:
+参考文献中确实包含足以支持正文该处表述的内容。
+
+==================================================
+【二、输出原则:结果必须直接对应数据库行】
+
+你输出的结果将直接写入数据库表 t_article_reference_check_result。
+
+因此:
+## 输出必须是 results 数组,数组中的每一个对象对应数据库中的一行,也就是“一个引用位置中的一条参考文献结果”。
+
+换句话说:
+- 如果某个引用位置是 [3],则输出 1 条 result(reference_no=3)
+- 如果某个引用位置是 [1,2],则输出 2 条 result:
+ - 一条对应 reference_no=1
+ - 一条对应 reference_no=2
+
+每条 result 都必须给出该参考文献“单独”对正文引用句的支撑判断。
+如果该引用位置是联合引用(citation group 中有多篇文献),则除了单条判断外,还必须给出该引用组整体的联合判断(combined_* 字段)。
+
+==================================================
+【三、最重要原则:只看“是否支撑正文核心断言”,不是看“主题是否沾边”】
+
+以下情况不能判为强支撑:
+1. 参考文献只和主题大致相关,但没有明确支持正文中的关键表述
+2. 正文说的是“疗效提升/死亡率下降/全球高发/耐药/多通路机制”等明确论点,而文献只是在背景里泛泛提到疾病
+3. 正文是多层复合句,文献只支撑其中一小部分
+4. 正文有因果、比较、趋势、机制、疗效强度等强表述,而文献没有明确证据
+5. 文献是基础机制研究,但正文引用它来支撑宏观流行病学、临床治疗现状或指南式结论
+6. 文献可以“推测支持”但不是“直接/明确支持”
+
+==================================================
+【三b、多 claim 复合句 → 0.78 部分支撑(勿误降到 0.45)】
+
+正文常为 2~4 个连续 claim 的复合句。须逐 claim 比对后综合给分:
+
+- 若文献(含 DOI 摘要)能**明确支撑多数关键概念**(如遗传异质性/多基因改变、多 survival pathway 并存、耐药或治疗挑战),
+ 但**未逐字写出**正文完整因果链(如「异质性→多通路→单靶点疗效下降」),
+ → 应判 **partial_support**,confidence 通常 **0.78**(边界情况 0.65),**不得**仅因文献主标题聚焦某化合物/干预就降到 0.45。
+
+- 0.45 仅用于:文献与 claim 方向明显不符、仅同病沾边、或几乎无可用证据。
+
+**校准样例(单条 [4],须接近此逻辑):**
+
+引用句:
+Furthermore, the genomic heterogeneity of colorectal cancer (CRC) presents additional difficulties because tumors frequently make use of several survival pathways at once, which reduces the efficacy of single-target treatments [4].
+
+文献4(Sheikhnia et al., thymoquinone CRC 机制综述):
+- Claim1 遗传异质性/多基因改变:文献有 APC/KRAS/TP53、MSI/CIN 等 → 支撑较强
+- Claim2 多 survival pathway:文献列举 PI3K/Akt、Wnt、STAT3、NF-κB 等多通路 → 支撑较强
+- Claim3 单靶点疗效下降:文献有 drug resistance/治疗挑战,但未直述因果链 → 部分支撑
+- **输出**:can_support=1, confidence=**0.78**, support_role=supplementary_support(**不是 0.45**)
+
+用户消息中若提供【DOI 真实文献内容】,**必须结合摘要判断**,不得仅凭书目标题给分。
+
+==================================================
+【四、评分规则】
+
+你必须使用以下 8 个固定分值之一:
+0.98 / 0.92 / 0.85 / 0.78 / 0.65 / 0.45 / 0.25 / 0.15
+
+判定含义:
+- 0.98 / 0.92 / 0.85 => 强支撑(strong_support)
+- 0.78 / 0.65 => 部分支撑(partial_support)
+- 0.45 / 0.25 => 支撑不足(insufficient_support)
+- 0.15 => 不支撑(not_support)
+
+can_support 取值规则:
+- 若该文献/联合引文整体可判为 strong_support 或 partial_support,则 can_support = 1
+- 若判为 insufficient_support 或 not_support,则 can_support = 0
+
+==================================================
+【五、单条文献结果如何判断】
+
+对于每一条参考文献,你必须判断它“单独”能否支撑该引用位置的正文内容,并输出:
+- can_support
+- confidence
+- reason
+- support_role
+
+其中:
+### support_role 只能取以下值之一
+- primary_support:该文献本身就是主要证据来源,能支撑引用句核心内容
+- supplementary_support:能支撑部分重要内容,但不是主要来源
+- minimal_support:只提供少量背景或边缘支撑
+- no_meaningful_support:几乎不能支撑该引用句
+
+### reason 的写法要求
+必须使用中文,明确写出:
+1. 这篇文献具体支撑正文的哪一部分
+2. 哪些部分没有支撑到
+3. 是否存在文献类型与引用用途不匹配的问题
+4. 为什么给这个分值,而不是更高或更低
+
+==================================================
+【六、联合引用的判断规则】
+
+当同一个引用位置包含多篇参考文献时(例如 [1,2] / [4,5,6]),除了逐条给单条结果外,还要额外判断:
+“这些文献合起来,是否足以支撑该引用位置的正文内容?”
+
+联合结论输出到:
+- combined_can_support
+- combined_confidence
+- combined_reason
+
+规则:
+1. 联合评分不是单条评分平均值
+2. 如果其中一篇文献已强支撑,其他文献只是补充,则联合评分可接近主支撑文献
+3. 如果多篇文献分别覆盖不同部分,合起来能较完整支撑正文,则联合评分可以高于某些单条评分
+4. 但如果最关键的核心断言没有被任何文献明确支撑,则联合评分不能虚高
+5. 如果多篇文献都只是零散相关,需要大量推断才能拼出正文结论,则联合评分通常不应过高
+
+==================================================
+【七、单引文的 combined_* 字段处理规则】
+
+即使某个引用位置只有 1 条参考文献,也仍然必须输出 combined_* 字段。
+此时:
+- combined_can_support = can_support
+- combined_confidence = confidence
+- combined_reason = “该引用位置仅包含单条文献,联合结论等同于该文献的单条结论。” 或等价表述
+
+这样可以保证输出结构统一,便于数据库写入。
+
+==================================================
+【八、输出 JSON 结构】
+
+你必须输出合法 JSON,且只能输出以下结构:
+
+{
+ "results": [
+ {
+ "reference_no": 1,
+ "cite_group_refs": "1,2",
+ "can_support": 0,
+ "confidence": 0.65,
+ "reason": "中文,单条文献结论",
+ "support_role": "supplementary_support",
+ "combined_can_support": 1,
+ "combined_confidence": 0.85,
+ "combined_reason": "中文,联合引用整体结论"
+ }
+ ]
+}
+
+==================================================
+【九、字段约束】
+
+### 1)results 中每个对象都必须包含以下字段:
+- reference_no
+- cite_group_refs
+- can_support
+- confidence
+- reason
+- support_role
+- combined_can_support
+- combined_confidence
+- combined_reason
+
+### 2)reference_no
+必须对应当前引用位置中的某一条参考文献编号。
+
+### 3)cite_group_refs
+必须是该引用位置的完整引文组,格式如:
+- "3"
+- "1,2"
+- "4,5,6"
+
+### 4)同一引用位置若包含多条参考文献,则必须输出多条 result
+例如 cite_group_refs = "1,2" 时,必须输出:
+- 一条 reference_no=1
+- 一条 reference_no=2
+
+### 5)同一引用位置下的 combined_* 必须一致
+例如同属 "1,2" 的两条 result,它们的:
+- combined_can_support
+- combined_confidence
+- combined_reason
+必须完全一致。
+
+==================================================
+【十、禁止事项】
+你绝对不能:
+- 杜撰文献中不存在的结论
+- 把“主题相关”当作“内容支撑”
+- 因为是同一疾病就默认支持
+- 输出 JSON 以外的任何内容
+
+现在开始,读取用户提供的引用位置正文、参考文献信息和文献内容,输出结果。
+PROMPT;
+
+ if ($isSecondPass) {
+ $prompt .= <<<'PROMPT'
+
+
+==================================================
+【二次校对补充(DOI 真实文献内容)】
+用户消息中会提供【DOI 真实文献内容(PubMed/Crossref)】。
+必须以 DOI 真实内容为准复核支撑力度;书目信息与 DOI 冲突时以 DOI 为准。
+仍须输出完整 results 数组,逐条给出单文献判断与联合判断。
+PROMPT;
+ }
+
+ return $prompt;
}
- /** 第二次校对:Crossref 摘要(Refer_doi) */
+ private function buildReferenceCheckFirstPassUserPrompt($contextText, $referText, $citeGroupRefs = '', $localContext = '', $doiBlock = '')
+ {
+ return $this->buildReferenceCheckSupportUserPrompt($contextText, $referText, $citeGroupRefs, $localContext, $doiBlock);
+ }
+
+ private function buildReferenceCheckSupportUserPrompt($contextText, $referText, $citeGroupRefs, $localContext, $doiBlock)
+ {
+ $citeGroupRefs = trim((string)$citeGroupRefs);
+ $localContext = trim((string)$localContext);
+ $doiBlock = trim((string)$doiBlock);
+
+ $parts = [
+ "【正文节 t_article_main】\n" . $contextText,
+ ];
+ if ($citeGroupRefs !== '') {
+ $mode = strpos($citeGroupRefs, ',') !== false ? '联合引用' : '单独引用';
+ $parts[] = "【引用文献组 cite_group_refs】{$citeGroupRefs}({$mode})";
+ }
+ if ($localContext !== '') {
+ $parts[] = "【本引用位置附近上下文】\n" . $localContext;
+ }
+ $parts[] = "【参考文献书目(按编号列出)】\n" . $referText;
+ if ($doiBlock !== '') {
+ $parts[] = "【DOI 真实文献内容(PubMed/Crossref,一轮校对已提供)】\n" . $doiBlock;
+ }
+ $parts[] = '请严格按 system 要求输出 results 数组 JSON,每条 result 对应一个 reference_no,并包含 combined_* 字段。';
+
+ return implode("\n\n", $parts);
+ }
+
+ /** 第二次校对:DOI 真实文献内容复核 */
private function buildReferenceCheckSecondPassPrompt()
{
- return <<<'PROMPT'
-你是文献引用二次校对助手。已根据 Refer_doi 从 Crossref(https://api.crossref.org/works/)获取摘要,请结合【正文全文】复核该文献是否相关。
-
-【核心原则:与第一次相同,从宽判断】
-默认倾向 can_support=true。只要 Crossref 摘要(或书目)与正文不是风马牛不相及,即判相关、能支撑。
-以【Crossref 摘要】为准;摘要与书目冲突时以摘要为准。
-
-【仅当以下情况才判 can_support=false】
-- 摘要显示的研究主题/对象/方法与正文讨论内容完全风马牛不相及。
-- 典型风马牛不相及、张冠李戴,且无法解释为背景或泛化引用。
-
-【以下情况均应 can_support=true】
-- 摘要与正文属同领域或相近方向,能作背景、理论或方向性支撑。
-- 细节不完全一致,但不存在明显矛盾。
-
-【无 Crossref 摘要时】
-结合 refer_text 从宽判断;非明显无关仍可 can_support=true,confidence 建议 0.65。
-
-【confidence 固定档位(禁止其它小数)】
-can_support=true:0.65 / 0.78 / 0.85 / 0.92 / 0.98
-can_support=false:0.15 / 0.25 / 0.35 / 0.45
-
-【输出】仅一行 minified JSON:
-{"can_support":true|false,"is_match":true|false,"confidence":0.15|0.25|0.35|0.45|0.65|0.78|0.85|0.92|0.98,"reason":"30-80字简体中文"}
-is_match 必须与 can_support 相同。
-PROMPT;
+ return $this->buildReferenceCheckSupportSystemPrompt(true);
}
- private function buildReferenceCheckSecondPassUserPrompt($contextText, $referText, $doiBlock)
+ private function buildReferenceCheckSecondPassUserPrompt($contextText, $referText, $doiBlock, $citeGroupRefs = '', $localContext = '')
{
- $doiBlock = trim((string)$doiBlock);
- return "【正文全文 article_main.content】\n" . $contextText
- . "\n\n【参考文献书目 refer_text】\n" . $referText
- . "\n\n【Crossref 摘要】(Refer_doi → api.crossref.org/works/)\n"
- . ($doiBlock !== '' ? $doiBlock : '(未获取到摘要,请结合 refer_text 从宽判断)')
- . "\n\n文献与正文非风马牛不相即可判 can_support=true,只返回 JSON。";
+ return $this->buildReferenceCheckSupportUserPrompt(
+ $contextText,
+ $referText,
+ $citeGroupRefs,
+ $localContext,
+ $doiBlock !== '' ? $doiBlock : '(未获取到 DOI 摘要或元数据,请结合书目条目从严判断)'
+ );
}
private function buildReferenceCheckSystemPrompt3()
{
@@ -1169,13 +1621,174 @@ PROMPT;
private function buildReferenceCheckRecheckUserPrompt($contextText, $referText, $doiBlock)
{
- return $this->buildReferenceCheckSecondPassUserPrompt($contextText, $referText, $doiBlock);
+ return $this->buildReferenceCheckSecondPassUserPrompt($contextText, $referText, $doiBlock, '', '');
}
/**
- * 与 buildReferenceCheckSystemPrompt3 一致的 confidence 档位
+ * @return array
*/
- private function getReferenceCheckConfidenceBands($isMatch)
+ private function parseReferenceCheckResultsFromParsed(array $parsed, $defaultCiteGroupRefs = '', $localContext = '', $doiBlock = '')
+ {
+ $rows = [];
+ if (isset($parsed['results']) && is_array($parsed['results'])) {
+ $rows = $parsed['results'];
+ } elseif (isset($parsed['reference_no']) || isset($parsed['confidence'])) {
+ $rows = [$parsed];
+ }
+
+ $normalized = [];
+ foreach ($rows as $item) {
+ if (!is_array($item)) {
+ continue;
+ }
+ $refNo = intval(isset($item['reference_no']) ? $item['reference_no'] : 0);
+ if ($refNo <= 0) {
+ continue;
+ }
+
+ $confidence = $this->snapReferenceCheckConfidenceValue(
+ $this->normalizeConfidence(isset($item['confidence']) ? $item['confidence'] : 0)
+ );
+ $canSupport = $this->canSupportFromConfidence($confidence);
+ if (array_key_exists('can_support', $item)) {
+ $canSupport = $this->boolFromLlmValue($item['can_support']);
+ } elseif (array_key_exists('is_match', $item)) {
+ $canSupport = $this->boolFromLlmValue($item['is_match']);
+ }
+
+ $reason = $this->cleanReason((string)(isset($item['reason']) ? $item['reason'] : ''));
+ $supportRole = $this->normalizeSupportRole(isset($item['support_role']) ? $item['support_role'] : '');
+ list($confidence, $canSupport, $supportRole) = $this->applyMultiClaimPartialSupportFloor(
+ $localContext,
+ $doiBlock,
+ $confidence,
+ $canSupport,
+ $supportRole,
+ $reason
+ );
+
+ $combinedConfidence = $this->snapReferenceCheckConfidenceValue(
+ $this->normalizeConfidence(isset($item['combined_confidence']) ? $item['combined_confidence'] : $confidence)
+ );
+ $combinedCanSupport = $this->canSupportFromConfidence($combinedConfidence);
+ if (array_key_exists('combined_can_support', $item)) {
+ $combinedCanSupport = $this->boolFromLlmValue($item['combined_can_support']);
+ }
+
+ $citeGroupRefs = trim((string)(isset($item['cite_group_refs']) ? $item['cite_group_refs'] : $defaultCiteGroupRefs));
+ if ($citeGroupRefs === '' && $defaultCiteGroupRefs !== '') {
+ $citeGroupRefs = trim((string)$defaultCiteGroupRefs);
+ }
+
+ $normalized[] = [
+ 'reference_no' => $refNo,
+ 'cite_group_refs' => $citeGroupRefs,
+ 'can_support' => $canSupport,
+ 'is_match' => $canSupport,
+ 'confidence' => $confidence,
+ 'reason' => $reason,
+ 'support_role' => $supportRole,
+ 'combined_can_support' => $combinedCanSupport,
+ 'combined_confidence' => $combinedConfidence,
+ 'combined_reason' => $this->cleanReason((string)(isset($item['combined_reason']) ? $item['combined_reason'] : '')),
+ ];
+ }
+
+ return $normalized;
+ }
+
+ private function normalizeSupportRole($role)
+ {
+ $role = strtolower(trim((string)$role));
+ $allowed = [
+ 'primary_support',
+ 'supplementary_support',
+ 'minimal_support',
+ 'no_meaningful_support',
+ ];
+ return in_array($role, $allowed, true) ? $role : 'no_meaningful_support';
+ }
+
+ private function canSupportFromConfidence($confidence)
+ {
+ return floatval($confidence) >= 0.65 - 0.001;
+ }
+
+ /**
+ * 多通路/异质性 claim + DOI 有多通路证据时,防止误打 0.45(应对齐 0.78 部分支撑)
+ */
+ private function applyMultiClaimPartialSupportFloor($localContext, $doiBlock, $confidence, $canSupport, $supportRole, $reason)
+ {
+ $confidence = floatval($confidence);
+ if ($confidence > 0.45) {
+ return [$confidence, $canSupport, $supportRole];
+ }
+
+ $claimText = trim((string)$localContext);
+ if ($claimText === '') {
+ return [$confidence, $canSupport, $supportRole];
+ }
+
+ $claimIsMechanism = (bool)preg_match(
+ '/\b(genomic heterogeneity|heterogeneity|survival pathway|pathways at once|single-target|multi.?pathway|genetic alteration|drug resistance|异质性|生存通路|多.*通路|单靶点|耐药)\b/ui',
+ $claimText
+ );
+ if (!$claimIsMechanism) {
+ return [$confidence, $canSupport, $supportRole];
+ }
+
+ $corpus = trim((string)$doiBlock) . ' ' . trim((string)$reason);
+ if ($corpus === '') {
+ return [$confidence, $canSupport, $supportRole];
+ }
+
+ $refHasPathwayEvidence = (bool)preg_match(
+ '/\b(pathway|PI3K|Akt|mTOR|Wnt|STAT3|NF-κB|NF-kB|genetic alteration|MSI|CIN|drug resistance|signaling|multiple|APC|KRAS|TP53|通路|耐药|信号)\b/ui',
+ $corpus
+ );
+ if (!$refHasPathwayEvidence) {
+ return [$confidence, $canSupport, $supportRole];
+ }
+
+ $confidence = 0.78;
+ $canSupport = true;
+ if ($supportRole === 'no_meaningful_support' || $supportRole === 'minimal_support') {
+ $supportRole = 'supplementary_support';
+ }
+
+ return [$confidence, $canSupport, $supportRole];
+ }
+
+ private function getReferenceCheckConfidenceBands()
+ {
+ return [0.15, 0.25, 0.45, 0.65, 0.78, 0.85, 0.92, 0.98];
+ }
+
+ private function snapReferenceCheckConfidenceValue($confidence)
+ {
+ $bands = $this->getReferenceCheckConfidenceBands();
+ foreach ($bands as $band) {
+ if (abs($confidence - $band) < 0.001) {
+ return $band;
+ }
+ }
+ $nearest = $bands[0];
+ $minDiff = abs($confidence - $nearest);
+ foreach ($bands as $band) {
+ $diff = abs($confidence - $band);
+ if ($diff < $minDiff) {
+ $minDiff = $diff;
+ $nearest = $band;
+ }
+ }
+
+ return $nearest;
+ }
+
+ /**
+ * @deprecated 兼容旧逻辑
+ */
+ private function getReferenceCheckConfidenceBandsLegacy($isMatch)
{
return $isMatch
? [0.65, 0.78, 0.85, 0.92, 0.98]
@@ -1183,22 +1796,24 @@ PROMPT;
}
/**
- * 将模型输出的 confidence 吸附到合法档位(如 0.95 → 0.92,0.75 → 0.78)
+ * 将模型输出的 confidence 吸附到合法档位
*/
private function snapReferenceCheckConfidence($confidence, $isMatch)
{
- $bands = $this->getReferenceCheckConfidenceBands($isMatch);
-
+ $snapped = $this->snapReferenceCheckConfidenceValue($confidence);
+ $bands = $this->getReferenceCheckConfidenceBandsLegacy($isMatch);
+ if (in_array($snapped, $bands, true)) {
+ return $snapped;
+ }
foreach ($bands as $band) {
- if (abs($confidence - $band) < 0.001) {
+ if (abs($snapped - $band) < 0.001) {
return $band;
}
}
-
$nearest = $bands[0];
- $minDiff = abs($confidence - $nearest);
+ $minDiff = abs($snapped - $nearest);
foreach ($bands as $band) {
- $diff = abs($confidence - $band);
+ $diff = abs($snapped - $band);
if ($diff < $minDiff) {
$minDiff = $diff;
$nearest = $band;
diff --git a/application/common/service/ReferenceRelevanceLlmService.php b/application/common/service/ReferenceRelevanceLlmService.php
index 90e1fdef..f37fb7d1 100644
--- a/application/common/service/ReferenceRelevanceLlmService.php
+++ b/application/common/service/ReferenceRelevanceLlmService.php
@@ -138,12 +138,18 @@ class ReferenceRelevanceLlmService
- **「覆盖部分结局」不足以进入 0.78**:原句点名了多条通路 + 多个结局,文献仅命中其中 1~2 个结局(如仅凋亡/增殖),且**点名通路在本文结果中全部缺失(仅讨论转引)**或主语层级不对 → 单条 **限 0.45(weakly_related / minimal_relevance)**,不得给 0.65~0.78
- 仅同领域沾边 1–2 项、主语或机制层级不对 → **0.45**
- **进入 0.65~0.78 的前提**:主语对齐(X 单体)+ 本文自身结果命中原句点名通路/结局的多数项;几乎全部明确对应 → **0.85+**
+11. **文献「主题粒度」必须匹配 claim「主题粒度」**:引用处为**疾病总论型 claim**(流行病学负担、标准/多模态治疗现状与局限、基因组异质性、单靶点治疗受限、亟需新策略等总体背景)时:
+ - 最适合的来源是**疾病总体综述 / 分子病理综述 / 精准肿瘤学 / 耐药综述**;此类文献正面、系统地为该总论 claim 提供依据 → 可 **0.85+**
+ - **单一药物 / 单一成分 / 单一通路的专题综述**(如「某化合物抗某癌:A review」),即使同病、同大方向,也只是专题视角、并非为该总论 claim 做系统总结 → 通常 **partially_related(0.72~0.78)**,**不得给 0.85+**
+ - **单基因 / 单通路的机制原始研究**对纯流行病学负担 claim → 仍按规则 3 给 **0.45**
+ - 判断要点:文献类型是否「为该总论 claim 本身做系统综述/总论」;仅同病同方向、或只支撑整段中某一两句(如「需要更安全的新策略」),不足以进入 highly_related
==================================================
【一、必须先拆解 claim】
从【本引用位置附近上下文】中提炼最小主张单元(Claim A, Claim B…),**不要**把整句笼统归为「大概讲抗癌」。例如:
- **主语/研究对象**(化合物单体 vs 植物提取物 vs 其他物种;是否「X has been demonstrated」)
- **证据语气与层级**(demonstrated / mechanistically vs predict / suggest;本文结果 vs 讨论转引)
+- **claim 主题粒度**:是否为疾病总论型(流行病学负担 / 治疗现状与局限 / 基因组异质性 / 单靶点受限 / 亟需新策略);若是,要求「总体综述 / 分子病理 / 精准肿瘤学 / 耐药综述」类来源,单一药物专题综述只算 partially_related
- 疾病流行病学(高发、死亡率)
- **点名通路/分子机制**(PI3K/AKT、MAPK、NF-κB 等,须逐项)
- **点名功能结局**(抑制增殖、凋亡、血管生成、炎症信号等,须逐项)
diff --git a/application/extra/rabbitmq.php b/application/extra/rabbitmq.php
index 05aa89b4..4b41a649 100644
--- a/application/extra/rabbitmq.php
+++ b/application/extra/rabbitmq.php
@@ -13,4 +13,11 @@ return [
'dlq' => 'ref_check.article.dlq',
'route_key' => 'article.start',
],
+
+ 'reference_relevance' => [
+ 'exchange' => 'reference_relevance',
+ 'queue' => 'ref_relevance.article',
+ 'dlq' => 'ref_relevance.article.dlq',
+ 'route_key' => 'article.start',
+ ],
];