diff --git a/application/api/controller/Base.php b/application/api/controller/Base.php index 380066a2..6d76cb01 100644 --- a/application/api/controller/Base.php +++ b/application/api/controller/Base.php @@ -1178,6 +1178,107 @@ class Base extends Controller return $ids; } + /** + * 解析方括号引用内层(如 1,2 / 3-5),展开为文献序号列表。 + * + * @return int[] + */ + protected function expandCitationBracketNumbers(string $referencePart): array + { + $referencePart = trim($referencePart); + if ($referencePart === '') { + return []; + } + $referencePart = str_replace( + [',', '–', '—', '−', '‐', '‑'], + [',', '-', '-', '-', '-', '-'], + $referencePart + ); + $out = []; + $segments = preg_split('/\s*,\s*/', $referencePart); + foreach ($segments as $seg) { + $seg = trim((string)$seg); + if ($seg === '') { + continue; + } + $seg = str_replace(['–', '—', '−', '‐', '‑'], '-', $seg); + if (preg_match('/^(\d+)\s*-\s*(\d+)$/', $seg, $m)) { + $a = intval($m[1]); + $b = intval($m[2]); + if ($a > $b) { + $t = $a; + $a = $b; + $b = $t; + } + for ($i = $a; $i <= $b; $i++) { + $out[] = $i; + } + } else { + $n = intval($seg); + if ($n > 0) { + $out[] = $n; + } + } + } + return $out; + } + + /** + * 从正文片段提取被引用的文献序号(reference_no = index+1)。 + * 兼容 [n] / [n] 两种形态。 + * + * @return int[] + */ + protected function extractCitationRefNosFromMainContent(string $text, int $pArticleId = 0): array + { + if ($text === '') { + return []; + } + + $nos = []; + + $pReferIds = $this->extractMyciteIds($text); + if (!empty($pReferIds) && $pArticleId > 0) { + $refers = Db::name('production_article_refer') + ->where('p_article_id', $pArticleId) + ->whereIn('p_refer_id', $pReferIds) + ->where('state', 0) + ->field('p_refer_id,index') + ->select(); + $idToNo = []; + foreach ($refers as $row) { + $idToNo[intval($row['p_refer_id'])] = intval($row['index']) + 1; + } + foreach ($pReferIds as $pid) { + if (isset($idToNo[$pid])) { + $nos[] = $idToNo[$pid]; + } + } + } + + if (preg_match_all('/(?:<\s*blue[^>]*>)?\[([^\]]+)\](?:<\/\s*blue\s*>)?/iu', $text, $m)) { + foreach ($m[1] as $inner) { + $innerNorm = str_replace( + [',', '–', '—', '−', '‐', '‑'], + [',', '-', '-', '-', '-', '-'], + trim((string)$inner) + ); + if (!preg_match('/^[\d\s,\-]+$/u', $innerNorm)) { + continue; + } + foreach ($this->expandCitationBracketNumbers($innerNorm) as $n) { + if ($n > 0) { + $nos[] = $n; + } + } + } + } + + $nos = array_values(array_unique($nos)); + sort($nos, SORT_NUMERIC); + return $nos; + } + /** * table_data:二维数组 JSON [[{text,colspan,rowspan},...],...];支持双重 JSON 字符串编码。 * diff --git a/application/api/controller/Preaccept.php b/application/api/controller/Preaccept.php index 79794434..9f89b31d 100644 --- a/application/api/controller/Preaccept.php +++ b/application/api/controller/Preaccept.php @@ -7,7 +7,7 @@ use think\Env; use think\Queue; use think\Validate; use app\common\CrossrefService; -use app\common\ReferenceCheckService; +use app\common\ReferenceRelevanceCheckService; class Preaccept extends Base { @@ -27,7 +27,7 @@ class Preaccept extends Base return; } try { - (new ReferenceCheckService())->clearArticleChecksByPArticleId($pArticleId); + (new ReferenceRelevanceCheckService())->clearArticleChecksByPArticleId($pArticleId); } catch (\Exception $e) { \think\Log::error( 'resetArticleChecksOnReferChange[' . $sourceTag . '] p_article_id=' @@ -1220,6 +1220,14 @@ class Preaccept extends Base $insert['ctime'] = time(); $this->article_main_log_obj->insert($insert); +// $articleId = intval($am_info['article_id']); +// $amId = intval($data['am_id']); +// +// // 本段引用集合变化(如 10,11 → 11,12)时仅清空该 am_id 下的校对明细 +// if ($this->hasMainCitationChange($old_content, $new_raw_content, $articleId)) { +// $this->clearMainChecksOnCitationChange($articleId, $amId); +// } + // 判断是否存在“引用删除”(新 content 相对旧 content 缺少 ) $hasCitationDeletion = $this->hasMyciteDeletion($old_content, $new_raw_content); @@ -1245,6 +1253,39 @@ class Preaccept extends Base //返回更新数据 20260119 end } + /** + * 正文单节保存后,仅清空该 am_id 下已有的引用校对明细(按 article_id 定位)。 + */ + private function clearMainChecksOnCitationChange(int $articleId, int $amId) + { + if ($articleId <= 0 || $amId <= 0) { + return; + } + try { + (new ReferenceCheckService())->clearChecksByAmId($articleId, $amId); + } catch (\Exception $e) { + \think\Log::error( + 'clearMainChecksOnCitationChange article_id=' . $articleId + . ' am_id=' . $amId . ' ' . $e->getMessage() + ); + } + } + + /** + * 本段正文引用集合是否变化(增删改任一即 true)。 + * old 多为库内 [n],new 多为编辑器提交的 。 + */ + private function hasMainCitationChange(string $oldContent, string $newContent, int $articleId): bool + { + $pArticleId = intval(Db::name('production_article') + ->where('article_id', $articleId) + ->whereIn('state', [0, 2]) + ->value('p_article_id')); + $oldNos = $this->extractCitationRefNosFromMainContent($oldContent, $pArticleId); + $newNos = $this->extractCitationRefNosFromMainContent($newContent, $pArticleId); + return $oldNos !== $newNos; + } + /** * 是否发生 删除(new 相对 old 少了任意引用 id) */ diff --git a/application/api/controller/References.php b/application/api/controller/References.php index 331edd62..ea539ed6 100644 --- a/application/api/controller/References.php +++ b/application/api/controller/References.php @@ -12,6 +12,8 @@ use think\Db; use think\Env; use think\Queue; use app\common\ReferenceCheckService; +use app\common\ReferenceRelevanceCheckService; +use app\common\DbReconnectHelper; /** * @title 参考文献 * @description 相关方法汇总 @@ -1309,11 +1311,195 @@ class References extends Base } return json_encode(['status' => 8,'msg' => 'fail']); } + // ============================================================ + // 参考文献「主题相关性」校对(独立模块,RabbitMQ 链式消费) + // 表:t_article_reference_relevance_check_result / t_article_reference_relevance_check_batch + // 消费:php think reference_relevance:mq-consume + // ============================================================ + /** - * 参考文献第一次校对 + * 启动整篇参考文献相关性校对 + * POST: p_article_id(必填) + * + * 文献摘要/内容优先读 t_production_article_refer.abstract_text、refer_content_cleaned; + * 二者都为空时在校对执行阶段抓取并回写 refer 表,校对时始终从 refer 表读取。 + */ + public function allReferenceCheckAI() + { + $aParam = $this->request->post(); + $iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']); + if ($iPArticleId <= 0) { + return jsonError('Please select an article'); + } + + $aProductionArticle = Db::name('production_article') + ->field('p_article_id,article_id') + ->where(['p_article_id' => $iPArticleId, 'state' => ['in', [0, 2]]]) + ->find(); + if (empty($aProductionArticle)) { + return jsonError('No articles found'); + } + if ($this->checkReferStatus($iPArticleId) == 0) { + return jsonError('Please correct the reference content before running the check.'); + } + + $existing = Db::name('article_reference_relevance_check_result') + ->where('p_article_id', $iPArticleId) + ->count(); + if (intval($existing) > 0) { + return jsonError('This article already has relevance check records. Use referenceRelevanceCheckResetAI to rerun.'); + } + + try { + DbReconnectHelper::ensure(); + $result = (new ReferenceRelevanceCheckService())->enqueueByPArticle($aProductionArticle); + if (empty($result['check_ids'])) { + return jsonError('No reference citations were found in the article.'); + } + return jsonSuccess($result); + } catch (\Exception $e) { + return jsonError($e->getMessage()); + } + } + + /** + * 相关性校对进度 + * POST: p_article_id + */ + public function referenceRelevanceCheckProgressAI() + { + $aParam = $this->request->post(); + $iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']); + if ($iPArticleId <= 0) { + return jsonError('p_article_id is required'); + } + try { + $result = (new ReferenceRelevanceCheckService())->getProgressByPArticleId($iPArticleId); + return jsonSuccess($result); + } catch (\Exception $e) { + return jsonError($e->getMessage()); + } + } + + /** + * 按 p_article_id 查整篇文章相关性校对总状态(用于前端按钮分流) + * + * POST/GET: p_article_id(必填) + * + * 返回 status:0=未校对 1=校对中 2=校对完成 + * 计数维度为参考文献(按 reference_no 分组),与 referenceRelevanceCheckProgressAI 一致。 + */ + public function referenceRelevanceCheckArticleStatusAI() + { + $aParam = $this->request->post(); + if (empty($aParam)) { + $aParam = $this->request->param(); + } + + $iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']); + if ($iPArticleId <= 0) { + return jsonError('p_article_id is required'); + } + + try { + $result = (new ReferenceRelevanceCheckService())->getArticleProgressStatusByPArticleId($iPArticleId); + return jsonSuccess($result); + } catch (\Exception $e) { + return jsonError($e->getMessage()); + } + } + + /** + * 按 p_refer_id 查相关性校对明细 + * POST: p_refer_id + */ + public function referenceRelevanceCheckDetailsAI() + { + $aParam = $this->request->post(); + $iPReferId = empty($aParam['p_refer_id']) ? 0 : intval($aParam['p_refer_id']); + if ($iPReferId <= 0) { + return jsonError('p_refer_id is required'); + } + try { + $result = (new ReferenceRelevanceCheckService())->getDetailsByPReferId($iPReferId); + return jsonSuccess($result); + } catch (\Exception $e) { + return jsonError($e->getMessage()); + } + } + + /** + * 清空并重新执行相关性校对 + * POST: p_article_id + */ + public function referenceRelevanceCheckResetAI() + { + $aParam = $this->request->post(); + $iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']); + if ($iPArticleId <= 0) { + return jsonError('Please select an article'); + } + $aProductionArticle = Db::name('production_article') + ->field('p_article_id,article_id') + ->where(['p_article_id' => $iPArticleId, 'state' => ['in', [0, 2]]]) + ->find(); + if (empty($aProductionArticle)) { + return jsonError('No articles found'); + } + if ($this->checkReferStatus($iPArticleId) == 0) { + return jsonError('Please correct the reference content before running the check.'); + } + try { + $result = (new ReferenceRelevanceCheckService())->resetAndRecheckByArticle($aProductionArticle); + return jsonSuccess($result); + } catch (\Exception $e) { + return jsonError($e->getMessage()); + } + } + + /** + * 仅清空相关性校对记录(不重跑) + * POST: p_article_id + */ + public function referenceRelevanceCheckClearAI() + { + $aParam = $this->request->post(); + $iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']); + if ($iPArticleId <= 0) { + return jsonError('p_article_id is required'); + } + try { + $deleted = (new ReferenceRelevanceCheckService())->clearByPArticleId($iPArticleId); + return jsonSuccess(['p_article_id' => $iPArticleId, 'deleted' => intval($deleted)]); + } catch (\Exception $e) { + return jsonError($e->getMessage()); + } + } + + /** + * 仅重跑相关性 status=0 的记录(不清空,不抓摘要,不清洗文献内容) + * POST: p_article_id + */ + public function referenceRelevanceCheckRecheckPendingAI() + { + $aParam = $this->request->post(); + $iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']); + if ($iPArticleId <= 0) { + return jsonError('p_article_id is required'); + } + try { + $result = (new ReferenceRelevanceCheckService())->recheckPendingOnlyByArticle($iPArticleId); + return jsonSuccess($result); + } catch (\Exception $e) { + return jsonError($e->getMessage()); + } + } + + /** + * 参考文献第一次校对(支撑力度) * @return \think\response\Json */ - public function allReferenceCheckAI(){ + public function allReferenceCheckAI2(){ //获取参数 $aParam = empty($aParam) ? $this->request->post() : $aParam; @@ -1537,7 +1723,6 @@ class References extends Base * p_article_id(可选) * * 仅重跑 status=3(校对失败)的记录;不改动 refer_text,只重置结果字段后入 RabbitMQ 批次队列。 - * 返回:p_refer_id、p_article_id、reset、queued、check_ids、queue */ public function referenceCheckRecheckFailedAI() { @@ -1561,6 +1746,36 @@ class References extends Base } } + /** + * 某条参考文献下「校对失败」重跑,并联动同一引用标签分组(如 [1,2])全部重跑(异步) + * + * POST/GET: p_refer_id(必填) + * p_article_id(可选) + * + * 返回:p_refer_id、p_article_id、reset、queued、check_ids、queue + */ + public function referenceCheckRecheckFailedWithGroupAI() + { + $aParam = $this->request->post(); + if (empty($aParam)) { + $aParam = $this->request->param(); + } + + $iPReferId = empty($aParam['p_refer_id']) ? 0 : intval($aParam['p_refer_id']); + if ($iPReferId <= 0) { + return json_encode(array('status' => 2, 'msg' => 'Please select a reference')); + } + + $iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']); + + try { + $result = (new ReferenceCheckService())->enqueueRecheckFailedByPReferIdWithGroup($iPReferId, $iPArticleId); + return jsonSuccess($result); + } catch (\Exception $e) { + return jsonError($e->getMessage()); + } + } + /** * 按 p_refer_id 查单条参考文献的校对明细与进度 * @@ -1590,6 +1805,47 @@ class References extends Base } } + /** + * 对校对明细中从未出现过的参考文献(p_refer_id 差集)重新扫描全文并入队校对 + * + * POST/GET: p_article_id(必填) + * + * 差集:production_article_refer(state=0) 减去 article_reference_check_result 已出现的 p_refer_id。 + * 适用:首次校对漏匹配、表格后上传、正文补标等场景。不重置已有明细。 + * 前置:须已执行过第一次校对(库中已有校对记录)。 + * + * 返回:missing_p_refer_ids、matched_p_refer_ids、still_unmatched_p_refer_ids、 + * queued、new_reference_nos、check_ids、queue + */ + public function referenceCheckRematchNewAI() + { + $aParam = $this->request->post(); + if (empty($aParam)) { + $aParam = $this->request->param(); + } + + $iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']); + if ($iPArticleId <= 0) { + return json_encode(array('status' => 2, 'msg' => 'Please select an article')); + } + + $aWhere = ['p_article_id' => $iPArticleId, 'state' => ['in', [0, 2]]]; + $aProductionArticle = Db::name('production_article')->field('p_article_id,article_id')->where($aWhere)->find(); + if (empty($aProductionArticle)) { + return json_encode(array('status' => 3, 'msg' => 'No articles found')); + } + if ($this->checkReferStatus($iPArticleId) == 0) { + return jsonError('Please correct the reference content before running the check.'); + } + + try { + $result = (new ReferenceCheckService())->enqueueNewlyMatchedByPArticle($aProductionArticle); + return jsonSuccess($result); + } catch (\Exception $e) { + return jsonError($e->getMessage()); + } + } + public function checkReferStatus($p_article_id){ $list = $this->production_article_refer_obj->where('p_article_id', $p_article_id)->where('state', 0)->select(); if (!$list) { @@ -1604,4 +1860,6 @@ class References extends Base } return $frag; } + + } diff --git a/application/command.php b/application/command.php index 43892e98..cbea1b3b 100644 --- a/application/command.php +++ b/application/command.php @@ -11,4 +11,5 @@ return [ 'app\\command\\ReferenceCheckMqConsume', + 'app\\command\\ReferenceRelevanceMqConsume', ]; diff --git a/application/common/PubmedService.php b/application/common/PubmedService.php index ad17e2da..50f565ec 100644 --- a/application/common/PubmedService.php +++ b/application/common/PubmedService.php @@ -96,6 +96,68 @@ class PubmedService return $info; } + /** + * 按书目信息检索 PubMed(标题 + 第一作者 + 年份) + */ + public function searchByBibliographic($title, $author = '', $year = ''): ?array + { + $title = trim((string)$title); + if ($title === '') { + return null; + } + + $terms = ['(' . $this->quoteTerm($title) . '[Title])']; + $author = trim((string)$author); + if ($author !== '') { + $parts = preg_split('/[,;]/', $author); + $first = trim((string)($parts[0] ?? '')); + if ($first !== '') { + $terms[] = '(' . $this->quoteTerm($first) . '[Author])'; + } + } + $year = trim((string)$year); + if ($year !== '' && preg_match('/^(19|20)\d{2}$/', $year)) { + $terms[] = '(' . $year . '[pdat])'; + } + + $pmid = $this->esearch(implode(' AND ', $terms)); + if (!$pmid) { + return null; + } + + $info = $this->fetchByPmid($pmid); + if (!$info) { + return null; + } + $info['pmid'] = $pmid; + $info['doi'] = $this->extractDoiFromPmidRecord($pmid); + return $info; + } + + private function quoteTerm($text) + { + return str_replace('"', '', trim((string)$text)); + } + + private function extractDoiFromPmidRecord($pmid) + { + $url = $this->base . 'efetch.fcgi?' . http_build_query([ + 'db' => 'pubmed', + 'id' => $pmid, + 'retmode' => 'xml', + 'tool' => $this->tool, + 'email' => $this->email, + ]); + $xml = $this->httpGet($url); + if ($xml === '') { + return ''; + } + if (preg_match('/([^<]+)<\/ArticleId>/i', $xml, $m)) { + return trim($m[1]); + } + return ''; + } + // ----------------- Internals ----------------- private function esearch(string $term): ?string diff --git a/application/common/ReferenceCheckService.php b/application/common/ReferenceCheckService.php index e551a482..0cf20986 100644 --- a/application/common/ReferenceCheckService.php +++ b/application/common/ReferenceCheckService.php @@ -9,6 +9,8 @@ use app\common\mq\ReferenceCheckMqPublisher; /** * 正文 <blue>[n]</blue> 引用与 t_production_article_refer(index+1=n)相关性校对。 + * 校对上下文取 t_article_main 一条记录(正文 content 或表格 table_data 展平)。 + * 同一引用标签 [1,2]、[4-6] 联合校对,cite_group_refs 存展开序号。 * LLM 配置与 PromotionLlmService 相同;异步任务走 RabbitMQ(一篇一条消息)。 */ class ReferenceCheckService @@ -69,6 +71,9 @@ class ReferenceCheckService /** LLM 评分(confidence)通过阈值:>= 该值视为"通过" */ const PASS_CONFIDENCE_THRESHOLD = 0.65; + /** 是否启用二轮 DOI/Crossref 复核(暂时关闭时设为 false) */ + const SECOND_PASS_ENABLED = false; + /** * 正文引用标签两种排版(带 /u): * 1) [8, 9][13-15] —— 方括号在 blue 内 @@ -93,6 +98,11 @@ class ReferenceCheckService return isset($arr[$key]) ? $arr[$key] : $default; } + private function isSecondPassEnabled() + { + return self::SECOND_PASS_ENABLED; + } + /** 新建/重置校对明细时的队列初始字段 */ private function newCheckRecordFields(array $fields, $queueStatus = self::QUEUE_PENDING, $retryCount = 0) { @@ -101,8 +111,26 @@ class ReferenceCheckService return $fields; } + /** 重置校对结果时清零的字段(含支撑力度扩展字段) */ + private function referenceCheckResultResetFields(array $extra = []) + { + return array_merge([ + 'status' => self::RECORD_PENDING, + 'is_match' => 0, + 'can_support' => 0, + 'confidence' => 0, + 'reason' => '', + 'support_role' => '', + 'combined_can_support' => 0, + 'combined_confidence' => 0, + 'combined_reason' => '', + 'error_msg' => '', + ], $extra); + } + public function markQueueRuntime($checkId, $queueStatus, $retryCount = null) { + DbReconnectHelper::ensure(); $checkId = intval($checkId); if ($checkId <= 0) { return 0; @@ -113,6 +141,84 @@ class ReferenceCheckService } return Db::name('article_reference_check_result')->where('id', $checkId)->update($fields); } + public function enqueueByPArticle($prod){ + if (empty($prod)) { + throw new \RuntimeException('production_article not found'); + } + $pArticleId = intval($prod['p_article_id']); + $articleId = intval($prod['article_id']); + $referMap = $this->loadReferMapByPArticleId($pArticleId); + + $mains = Db::name('article_main') + ->field('am_id,content,article_id,type,amt_id') + ->where('article_id', $articleId) + ->whereIn('state', [0, 2]) + ->order('sort asc') + ->select(); + if (empty($mains)) { + throw new \RuntimeException('article_main is empty'); + } + $queued = 0; + $skipped = 0; + $pendingJobs = []; + $amIdsWithJobs = []; + $now = date('Y-m-d H:i:s'); + foreach ($mains as $main) { + $amId = intval($main['am_id']); + $citations = $this->extractReferencesForArticleMain($main); + if (empty($citations)) { + $this->setAmRefCheckStatus($amId, self::AM_STATUS_NONE); + continue; + } + $sectionText = $this->resolveSectionTextForArticleMain($main); + foreach ($citations as $cite) { + foreach ($cite['reference_numbers'] as $refNo) { + $referIndex = $refNo - 1; + if ($referIndex < 0 || !isset($referMap[$referIndex])) { + $skipped++; + continue; + } + $scope = [ + 'article_id' => $main['article_id'], + 'p_article_id' => $pArticleId, + 'am_id' => $amId, + 'section_text' => $sectionText, + ]; + $checkId = $this->insertCitationCheckRecord($scope, $cite, $refNo, $referMap[$referIndex], $now); + if ($checkId <= 0) { + $skipped++; + continue; + } + + $this->appendCitationPendingJob($pendingJobs, $checkId, $refNo, $amId, $cite['text_start']); + $queued++; + $amIdsWithJobs[$amId] = true; + } + } + break; + } + $checkIds = $this->enqueueChecksSortedByReferenceNo($pendingJobs, $pArticleId, 'enqueue'); + foreach (array_keys($amIdsWithJobs) as $amId) { + $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING); + } + + return [ + 'article_id' => $articleId, + 'p_article_id' => $pArticleId, + 'queued' => $queued, + 'skipped' => $skipped, + 'check_ids' => $checkIds, + 'queue' => self::TRANSPORT_RABBITMQ, + ]; + } + + + + + + + + /** * 合并匹配两种 blue 引用排版,按在正文中的起始位置排序。 @@ -173,6 +279,7 @@ class ReferenceCheckService 'refer_index' => intval($this->arrGet($extra, 'refer_index', 0)), 'reference_no' => intval($this->arrGet($extra, 'reference_no', 0)), 'reference_raw' => (string)$this->arrGet($extra, 'reference_raw', ''), + 'cite_group_refs' => (string)$this->arrGet($extra, 'cite_group_refs', ''), 'cite_tag_start' => intval($this->arrGet($extra, 'cite_tag_start', 0)), 'cite_tag_end' => intval($this->arrGet($extra, 'cite_tag_end', 0)), 'text_start' => intval($this->arrGet($extra, 'text_start', 0)), @@ -229,6 +336,7 @@ class ReferenceCheckService $skipped = 0; $pendingJobs = []; $now = date('Y-m-d H:i:s'); + $sectionText = $this->resolveSectionTextForArticleMain($main); foreach ($citations as $cite) { foreach ($cite['reference_numbers'] as $refNo) { $referIndex = $refNo - 1; @@ -236,30 +344,24 @@ class ReferenceCheckService $skipped++; continue; } - $refer = $referMap[$referIndex]; - $referText = $this->formatReferForLlm($refer); - - $checkId = Db::name('article_reference_check_result')->insertGetId($this->newCheckRecordFields([ - 'article_id' => $main['article_id'], - 'p_article_id' => $pArticleId, - 'am_id' => intval($main['am_id']), - 'reference_no' => $refNo, - 'refer_index' => $refNo, - 'origin_text' => $cite['original_text'], - 'refer_text' => $referText, - 'p_refer_id' => $referMap[$referIndex]['p_refer_id'], - 'text_start' => $cite['text_start'], - 'text_end' => $cite['text_end'], - 'status' => self::RECORD_PENDING, - 'created_at' => $now, - 'updated_at' => $now, - ])); - $pendingJobs[] = [ - 'check_id' => intval($checkId), - 'reference_no' => intval($refNo), + $scope = [ + 'article_id' => $main['article_id'], + 'p_article_id' => $pArticleId, 'am_id' => intval($main['am_id']), - 'text_start' => intval($cite['text_start']), + 'section_text' => $sectionText, ]; + $checkId = $this->insertCitationCheckRecord($scope, $cite, $refNo, $referMap[$referIndex], $now); + if ($checkId <= 0) { + $skipped++; + continue; + } + $this->appendCitationPendingJob( + $pendingJobs, + $checkId, + $refNo, + intval($main['am_id']), + $cite['text_start'] + ); } } @@ -276,6 +378,14 @@ class ReferenceCheckService throw new \InvalidArgumentException('article_id is required'); } + if (!$this->isSecondPassEnabled()) { + return [ + 'article_id' => $articleId, + 'check_ids2' => [], + 'queued' => 0, + ]; + } + $rows = Db::name('article_reference_check_result') ->where('article_id', $articleId) ->where('status', self::RECORD_COMPLETED) @@ -300,87 +410,6 @@ class ReferenceCheckService 'queued' => count($checkIds2), ]; } - public function enqueueByPArticle($prod){ - if (empty($prod)) { - throw new \RuntimeException('production_article not found'); - } - $pArticleId = intval($prod['p_article_id']); - $articleId = intval($prod['article_id']); - $referMap = $this->loadReferMapByPArticleId($pArticleId); - - $mains = Db::name('article_main') - ->field('am_id,content,article_id,type,amt_id') - ->where('article_id', $articleId) - ->whereIn('state', [0, 2]) - ->order('sort asc') - ->select(); - if (empty($mains)) { - throw new \RuntimeException('article_main is empty'); - } - $queued = 0; - $skipped = 0; - $pendingJobs = []; - $amIdsWithJobs = []; - $now = date('Y-m-d H:i:s'); - foreach ($mains as $main) { - $amId = intval($main['am_id']); - $citations = $this->extractReferencesForArticleMain($main); - if (empty($citations)) { - $this->setAmRefCheckStatus($amId, self::AM_STATUS_NONE); - continue; - } - foreach ($citations as $cite) { - foreach ($cite['reference_numbers'] as $refNo) { - $referIndex = $refNo - 1; - if ($referIndex < 0 || !isset($referMap[$referIndex])) { - $skipped++; - continue; - } - $refer = $referMap[$referIndex]; - $referText = $this->formatReferForLlm($refer); - - // [70-73] 展开为 reference_no=70,71,72,73 共 4 条记录;先入队表,再按文献号正序校对 - $checkId = Db::name('article_reference_check_result')->insertGetId($this->newCheckRecordFields([ - 'article_id' => $main['article_id'], - 'p_article_id' => $pArticleId, - 'am_id' => $amId, - 'reference_no' => $refNo, - 'refer_index' => $refNo, - 'origin_text' => $cite['original_text'], - 'refer_text' => $referText, - 'p_refer_id' => $referMap[$referIndex]['p_refer_id'], - 'text_start' => $cite['text_start'], - 'text_end' => $cite['text_end'], - 'status' => self::RECORD_PENDING, - 'created_at' => $now, - 'updated_at' => $now, - ])); - - $pendingJobs[] = [ - 'check_id' => intval($checkId), - 'reference_no' => intval($refNo), - 'am_id' => $amId, - 'text_start' => intval($cite['text_start']), - ]; - $queued++; - $amIdsWithJobs[$amId] = true; - } - } - } - $checkIds = $this->enqueueChecksSortedByReferenceNo($pendingJobs, $pArticleId, 'enqueue'); - foreach (array_keys($amIdsWithJobs) as $amId) { - $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING); - } - - return [ - 'article_id' => $articleId, - 'p_article_id' => $pArticleId, - 'queued' => $queued, - 'skipped' => $skipped, - 'check_ids' => $checkIds, - 'queue' => self::TRANSPORT_RABBITMQ, - ]; - } public function enqueueByArticle($articleId){ if ($articleId <= 0) { throw new \InvalidArgumentException('article_id is required'); @@ -416,6 +445,7 @@ class ReferenceCheckService $this->setAmRefCheckStatus($amId, self::AM_STATUS_NONE); continue; } + $sectionText = $this->resolveSectionTextForArticleMain($main); foreach ($citations as $cite) { foreach ($cite['reference_numbers'] as $refNo) { $referIndex = $refNo - 1; @@ -423,32 +453,19 @@ class ReferenceCheckService $skipped++; continue; } - $refer = $referMap[$referIndex]; - $referText = $this->formatReferForLlm($refer); - - // [70-73] 展开为 reference_no=70,71,72,73 共 4 条记录;先入队表,再按文献号正序校对 - $checkId = Db::name('article_reference_check_result')->insertGetId($this->newCheckRecordFields([ - 'article_id' => $main['article_id'], - 'p_article_id' => $pArticleId, - 'am_id' => $amId, - 'reference_no' => $refNo, - 'refer_index' => $refNo, - 'origin_text' => $cite['original_text'], - 'refer_text' => $referText, - 'p_refer_id' => $referMap[$referIndex]['p_refer_id'], - 'text_start' => $cite['text_start'], - 'text_end' => $cite['text_end'], - 'status' => self::RECORD_PENDING, - 'created_at' => $now, - 'updated_at' => $now, - ])); - - $pendingJobs[] = [ - 'check_id' => intval($checkId), - 'reference_no' => intval($refNo), + $scope = [ + 'article_id' => $main['article_id'], + 'p_article_id' => $pArticleId, 'am_id' => $amId, - 'text_start' => intval($cite['text_start']), + 'section_text' => $sectionText, ]; + $checkId = $this->insertCitationCheckRecord($scope, $cite, $refNo, $referMap[$referIndex], $now); + if ($checkId <= 0) { + $skipped++; + continue; + } + + $this->appendCitationPendingJob($pendingJobs, $checkId, $refNo, $amId, $cite['text_start']); $queued++; $amIdsWithJobs[$amId] = true; } @@ -470,6 +487,200 @@ class ReferenceCheckService ]; } + /** + * 对「参考文献表中有、校对明细中从未出现过的 p_refer_id」重新扫描全文并入队校对。 + * + * 差集:production_article_refer(state=0) 的 p_refer_id + * 减去 article_reference_check_result 中已出现过的 p_refer_id。 + * 仅对上述缺失文献在全文(含表格)中查找引用标签,命中则新增明细并入队。 + * 不删除、不重跑已有明细。 + * + * @param array $prod production_article 行(需含 p_article_id、article_id) + * @return array + */ + public function enqueueNewlyMatchedByPArticle($prod) + { + if (empty($prod) || !is_array($prod)) { + throw new \RuntimeException('production_article not found'); + } + $pArticleId = intval($this->arrGet($prod, 'p_article_id', 0)); + $articleId = intval($this->arrGet($prod, 'article_id', 0)); + if ($pArticleId <= 0 || $articleId <= 0) { + throw new \InvalidArgumentException('production_article requires both p_article_id and article_id'); + } + + $existingCount = Db::name('article_reference_check_result') + ->where('p_article_id', $pArticleId) + ->count(); + if (intval($existingCount) <= 0) { + throw new \RuntimeException('no existing reference check records for p_article_id=' . $pArticleId . '; please run the first check first'); + } + + $missingCtx = $this->loadMissingPReferIdsByPArticleId($pArticleId); + $missingPReferIds = $missingCtx['missing_p_refer_ids']; + $missingRefNos = $missingCtx['missing_reference_nos']; + + if (empty($missingPReferIds)) { + return [ + 'article_id' => $articleId, + 'p_article_id' => $pArticleId, + 'queued' => 0, + 'skipped' => 0, + 'existing' => intval($existingCount), + 'missing_p_refer_ids' => [], + 'matched_p_refer_ids' => [], + 'still_unmatched_p_refer_ids' => [], + 'new_reference_nos' => [], + 'check_ids' => [], + 'queue' => self::TRANSPORT_RABBITMQ, + ]; + } + + $referMap = $this->loadReferMapByPArticleId($pArticleId); + + $mains = Db::name('article_main') + ->field('am_id,content,article_id,type,amt_id') + ->where('article_id', $articleId) + ->whereIn('state', [0, 2]) + ->order('sort asc') + ->select(); + if (empty($mains)) { + throw new \RuntimeException('article_main is empty'); + } + + $queued = 0; + $skipped = 0; + $pendingJobs = []; + $amIdsWithJobs = []; + $newReferenceNos = []; + $matchedPReferIds = []; + $now = date('Y-m-d H:i:s'); + + foreach ($mains as $main) { + $amId = intval($main['am_id']); + $citations = $this->extractReferencesForArticleMain($main); + if (empty($citations)) { + continue; + } + $sectionText = $this->resolveSectionTextForArticleMain($main); + foreach ($citations as $cite) { + foreach ($cite['reference_numbers'] as $refNo) { + if (!isset($missingRefNos[$refNo])) { + $skipped++; + continue; + } + + $referIndex = $refNo - 1; + if ($referIndex < 0 || !isset($referMap[$referIndex])) { + $skipped++; + continue; + } + + $refer = $referMap[$referIndex]; + $pReferId = intval($this->arrGet($refer, 'p_refer_id', 0)); + $scope = [ + 'article_id' => $main['article_id'], + 'p_article_id' => $pArticleId, + 'am_id' => $amId, + 'section_text' => $sectionText, + ]; + $checkId = $this->insertCitationCheckRecord($scope, $cite, $refNo, $refer, $now); + if ($checkId <= 0) { + $skipped++; + continue; + } + + $this->appendCitationPendingJob($pendingJobs, $checkId, $refNo, $amId, $cite['text_start']); + $queued++; + $amIdsWithJobs[$amId] = true; + $newReferenceNos[$refNo] = true; + if ($pReferId > 0) { + $matchedPReferIds[$pReferId] = true; + } + } + } + } + + $checkIds = $this->enqueueChecksSortedByReferenceNo($pendingJobs, $pArticleId, 'rematch_new'); + foreach (array_keys($amIdsWithJobs) as $amId) { + $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING); + } + + $newRefList = array_keys($newReferenceNos); + sort($newRefList, SORT_NUMERIC); + + $matchedList = array_keys($matchedPReferIds); + sort($matchedList, SORT_NUMERIC); + + $stillUnmatched = array_values(array_diff($missingPReferIds, $matchedList)); + sort($stillUnmatched, SORT_NUMERIC); + + return [ + 'article_id' => $articleId, + 'p_article_id' => $pArticleId, + 'queued' => $queued, + 'skipped' => $skipped, + 'existing' => intval($existingCount), + 'missing_p_refer_ids' => $missingPReferIds, + 'matched_p_refer_ids' => $matchedList, + 'still_unmatched_p_refer_ids' => $stillUnmatched, + 'new_reference_nos' => $newRefList, + 'check_ids' => $checkIds, + 'queue' => self::TRANSPORT_RABBITMQ, + ]; + } + + /** + * 参考文献表(state=0) 与校对明细中已出现的 p_refer_id 做差集。 + * + * @return array{missing_p_refer_ids:int[], missing_reference_nos:array} + */ + private function loadMissingPReferIdsByPArticleId($pArticleId) + { + $pArticleId = intval($pArticleId); + $missingPReferIds = []; + $missingRefNos = []; + + if ($pArticleId <= 0) { + return [ + 'missing_p_refer_ids' => $missingPReferIds, + 'missing_reference_nos' => $missingRefNos, + ]; + } + + $refers = Db::name('production_article_refer') + ->field('p_refer_id,index') + ->where('p_article_id', $pArticleId) + ->where('state', 0) + ->order('index asc') + ->select(); + + $checkedIds = Db::name('article_reference_check_result') + ->where('p_article_id', $pArticleId) + ->where('p_refer_id', '>', 0) + ->group('p_refer_id') + ->column('p_refer_id'); + $checkedSet = []; + foreach ($checkedIds as $id) { + $checkedSet[intval($id)] = true; + } + + foreach ($refers as $refer) { + $pReferId = intval($this->arrGet($refer, 'p_refer_id', 0)); + if ($pReferId <= 0 || isset($checkedSet[$pReferId])) { + continue; + } + $refNo = intval($this->arrGet($refer, 'index', 0)) + 1; + $missingPReferIds[] = $pReferId; + $missingRefNos[$refNo] = $pReferId; + } + + return [ + 'missing_p_refer_ids' => $missingPReferIds, + 'missing_reference_nos' => $missingRefNos, + ]; + } + /** * 根据该节全部明细行汇总更新 t_article_main.ref_check_status */ @@ -553,7 +764,7 @@ class ReferenceCheckService * * @return int 被删除的明细条数 */ - public function clearArticleChecksByPArticleId($pArticleId) + public function clearArticleChecksByPArticleId($pArticleId,$articleId=0) { $pArticleId = intval($pArticleId); if ($pArticleId <= 0) { @@ -561,10 +772,12 @@ class ReferenceCheckService } // 先反查 article_id(用于重置 article_main.ref_check_status 节级状态) - $articleId = intval(Db::name('production_article') - ->where('p_article_id', $pArticleId) - ->whereIn('state', [0, 2]) - ->value('article_id')); + if($articleId==0){ + $articleId = intval(Db::name('production_article') + ->where('p_article_id', $pArticleId) + ->whereIn('state', [0, 2]) + ->value('article_id')); + } $deleted = Db::name('article_reference_check_result') ->where('p_article_id', $pArticleId) @@ -598,6 +811,29 @@ class ReferenceCheckService return intval($deleted); } + /** + * 按 article_id + am_id 删除单节正文下的引用校对明细,并同步该节 ref_check_status。 + * + * @return int 被删除的明细条数 + */ + public function clearChecksByAmId($articleId, $amId) + { + $articleId = intval($articleId); + $amId = intval($amId); + if ($articleId <= 0 || $amId <= 0) { + return 0; + } + + $deleted = Db::name('article_reference_check_result') + ->where('article_id', $articleId) + ->where('am_id', $amId) + ->delete(); + + $this->syncAmRefCheckStatus($amId); + + return intval($deleted); + } + /** * 文献列表局部挪动后,仅刷新指定 p_refer_id 对应的校对明细 reference_no / refer_index。 * @@ -850,7 +1086,7 @@ class ReferenceCheckService } $rows = Db::name('article_reference_check_result') - ->field('id,p_refer_id,reference_no,am_id,status,confidence,is_match,reason,text_start,text_end,updated_at') + ->field('id,p_refer_id,reference_no,am_id,status,confidence,is_match,reason,support_role,combined_can_support,combined_confidence,combined_reason,text_start,text_end,cite_group_refs,updated_at') ->where('p_article_id', $pArticleId) ->order('reference_no asc, id asc') ->select(); @@ -916,16 +1152,22 @@ class ReferenceCheckService } $groups[$refNo]['records'][] = [ - 'check_id' => intval($this->arrGet($row, 'id', 0)), - 'am_id' => intval($this->arrGet($row, 'am_id', 0)), - 'status' => $st, - 'confidence' => $confidence, - 'is_pass' => $isPass, - 'is_match' => intval($this->arrGet($row, 'is_match', 0)), - 'reason' => (string)$this->arrGet($row, 'reason', ''), - 'text_start' => intval($this->arrGet($row, 'text_start', 0)), - 'text_end' => intval($this->arrGet($row, 'text_end', 0)), - 'last_updated_at' => $upd, + 'check_id' => intval($this->arrGet($row, 'id', 0)), + 'am_id' => intval($this->arrGet($row, 'am_id', 0)), + 'status' => $st, + 'confidence' => $confidence, + 'is_pass' => $isPass, + 'is_match' => intval($this->arrGet($row, 'is_match', 0)), + 'reason' => (string)$this->arrGet($row, 'reason', ''), + 'support_role' => (string)$this->arrGet($row, 'support_role', ''), + 'combined_can_support' => intval($this->arrGet($row, 'combined_can_support', 0)), + 'combined_confidence' => floatval($this->arrGet($row, 'combined_confidence', 0)), + 'combined_reason' => (string)$this->arrGet($row, 'combined_reason', ''), + 'cite_group_refs' => (string)$this->arrGet($row, 'cite_group_refs', ''), + 'cite_check_mode' => $this->isJointCiteGroupRefs($this->arrGet($row, 'cite_group_refs', '')) ? 'joint' : 'single', + 'text_start' => intval($this->arrGet($row, 'text_start', 0)), + 'text_end' => intval($this->arrGet($row, 'text_end', 0)), + 'last_updated_at' => $upd, ]; } @@ -993,7 +1235,7 @@ class ReferenceCheckService } $rows = Db::name('article_reference_check_result') - ->field('id,p_article_id,reference_no,am_id,status,confidence,is_match,reason,updated_at') + ->field('id,p_article_id,reference_no,am_id,status,confidence,is_match,reason,support_role,combined_can_support,combined_confidence,combined_reason,cite_group_refs,updated_at') ->where('p_refer_id', $pReferId) ->order('id asc') ->select(); @@ -1036,13 +1278,19 @@ class ReferenceCheckService } $list[] = [ - 'check_id' => intval($this->arrGet($row, 'id', 0)), - 'am_id' => intval($this->arrGet($row, 'am_id', 0)), - 'status' => $st, - 'confidence' => $confidence, - 'reason' => (string)$this->arrGet($row, 'reason', ''), - 'is_match' => intval($this->arrGet($row, 'is_match', 0)), - 'is_pass' => $isPass, + 'check_id' => intval($this->arrGet($row, 'id', 0)), + 'am_id' => intval($this->arrGet($row, 'am_id', 0)), + 'status' => $st, + 'confidence' => $confidence, + 'reason' => (string)$this->arrGet($row, 'reason', ''), + 'is_match' => intval($this->arrGet($row, 'is_match', 0)), + 'is_pass' => $isPass, + 'support_role' => (string)$this->arrGet($row, 'support_role', ''), + 'combined_can_support' => intval($this->arrGet($row, 'combined_can_support', 0)), + 'combined_confidence' => floatval($this->arrGet($row, 'combined_confidence', 0)), + 'combined_reason' => (string)$this->arrGet($row, 'combined_reason', ''), + 'cite_group_refs' => (string)$this->arrGet($row, 'cite_group_refs', ''), + 'cite_check_mode' => $this->isJointCiteGroupRefs($this->arrGet($row, 'cite_group_refs', '')) ? 'joint' : 'single', ]; } @@ -1117,8 +1365,8 @@ class ReferenceCheckService throw new \RuntimeException('no existing reference check records for p_article_id=' . $pArticleId); } - $cleared = $this->clearArticleChecks($articleId); - $enqueueResult = $this->enqueueByArticle($articleId); + $cleared = $this->clearArticleChecksByPArticleId($aProductionArticle['p_article_id'],$aProductionArticle['article_id']); + $enqueueResult = $this->enqueueByPArticle($aProductionArticle); if (!is_array($enqueueResult)) { $enqueueResult = []; @@ -1178,6 +1426,7 @@ class ReferenceCheckService */ public function updateCheckResult($checkId, array $fields) { + DbReconnectHelper::ensure(); $checkId = intval($checkId); if ($checkId <= 0) { throw new \InvalidArgumentException('invalid check id'); @@ -1186,6 +1435,9 @@ class ReferenceCheckService if (isset($fields['reason'])) { $fields['reason'] = mb_substr(trim((string)$fields['reason']), 0, 512); } + if (isset($fields['combined_reason'])) { + $fields['combined_reason'] = mb_substr(trim((string)$fields['combined_reason']), 0, 512); + } if (isset($fields['error_msg'])) { $fields['error_msg'] = mb_substr(trim((string)$fields['error_msg']), 0, 512); } @@ -1540,6 +1792,581 @@ class ReferenceCheckService return implode("\n", $parts); } + /** + * 从 extractReferences 结果提取引用标签元数据([1,2]、[70-73] 等同标签下各明细共用) + */ + private function citationMetaFromExtract(array $cite) + { + return [ + 'reference_raw' => (string)$this->arrGet($cite, 'reference_raw', ''), + 'cite_group_refs' => $this->formatCiteGroupRefs((array)$this->arrGet($cite, 'reference_numbers', [])), + 'cite_tag_start' => intval($this->arrGet($cite, 'reference_start', 0)), + 'cite_tag_end' => intval($this->arrGet($cite, 'reference_end', 0)), + 'origin_text' => (string)$this->arrGet($cite, 'original_text', ''), + 'text_start' => intval($this->arrGet($cite, 'text_start', 0)), + 'text_end' => intval($this->arrGet($cite, 'text_end', 0)), + ]; + } + + /** + * 引用组展开序号:1,2 或 4,5,6 或 3 + */ + private function formatCiteGroupRefs(array $refNumbers) + { + $nums = []; + foreach ($refNumbers as $n) { + $n = intval($n); + if ($n > 0) { + $nums[$n] = $n; + } + } + if (empty($nums)) { + return ''; + } + $list = array_values($nums); + sort($list, SORT_NUMERIC); + + return implode(',', $list); + } + + private function isJointCiteGroupRefs($citeGroupRefs) + { + return strpos((string)$citeGroupRefs, ',') !== false; + } + + private function resolveCiteGroupRefsFromRow(array $row, array $groupRows = null) + { + $refs = trim((string)$this->arrGet($row, 'cite_group_refs', '')); + if ($refs !== '') { + return $refs; + } + if ($groupRows === null) { + $groupRows = $this->findCitationGroupRows($row); + } + $nums = []; + foreach ($groupRows as $gr) { + $n = intval($this->arrGet($gr, 'reference_no', 0)); + if ($n > 0) { + $nums[$n] = $n; + } + } + if (empty($nums)) { + $n = intval($this->arrGet($row, 'reference_no', 0)); + return $n > 0 ? (string)$n : ''; + } + $list = array_values($nums); + sort($list, SORT_NUMERIC); + + return implode(',', $list); + } + + private function hasSecondPassCompleted(array $row) + { + $reason = (string)$this->arrGet($row, 'reason', ''); + return stripos($reason, '[DOI复核') !== false || stripos($reason, '[Crossref复核') !== false; + } + + private function buildSecondPassReasonTag(array $row, array $payload, array $groupRows = null) + { + $citeGroupRefs = $this->resolveCiteGroupRefsFromRow($row, $groupRows); + $tag = '[DOI复核'; + if ($citeGroupRefs !== '') { + $tag .= ' 文献' . $citeGroupRefs; + } + if (trim((string)$this->arrGet($payload, 'doi_used', '')) !== '') { + $tag .= ' ' . trim((string)$payload['doi_used']); + } + $tag .= ']'; + + return $tag; + } + + /** + * 从 refer 抓取 DOI 真实文献块(PubMed 优先,回落 Crossref) + * + * @return array{text:string, has_abstract:bool, doi:string} + */ + private function resolveDoiRecheckFromRefer($refer) + { + if (!is_array($refer) || empty($refer)) { + return ['text' => '', 'has_abstract' => false, 'doi' => '']; + } + $text = trim($this->fetchDoiLiteratureBlock($refer)); + $hasAbstract = $text !== '' && preg_match('/Abstract:\s*\S/u', $text); + + return [ + 'text' => $text, + 'has_abstract' => $hasAbstract, + 'doi' => $this->extractDoiFromRefer($refer), + ]; + } + + /** + * 校对时使用的正文:t_article_main 一条记录(正文或表格展平文本) + */ + public function resolveParagraphContextForJob(array $row, $maxChars = 8000) + { + return $this->resolveMainContentForJob($row, $maxChars); + } + + /** + * 入队时快照引用处局部上下文,写入 origin_text(与 text_start/text_end 对应) + */ + private function resolveSectionTextForArticleMain(array $main, $maxChars = 8000) + { + $raw = trim($this->resolveArticleMainCheckContent($main)); + if ($raw === '') { + return ''; + } + + return $this->normalizeCheckContentForLlm($raw, $maxChars); + } + + /** + * 同一 blue 引用标签(如 [1,2])下为单个文献号写入校对明细 + * + * @return int|null check_id + */ + private function insertCitationCheckRecord(array $scope, array $cite, $refNo, array $refer, $now) + { + $refNo = intval($refNo); + $referIndex = $refNo - 1; + if ($referIndex < 0) { + return null; + } + + $meta = $this->citationMetaFromExtract($cite); + $referText = $this->formatReferForLlm($refer); + // origin_text 存引用处局部上下文(extractLocalCitationContext),非整节 am 正文 + $originText = trim((string)$meta['origin_text']); + if ($originText === '') { + $originText = trim((string)$this->arrGet($scope, 'section_text', '')); + } + + return intval(Db::name('article_reference_check_result')->insertGetId($this->newCheckRecordFields([ + 'article_id' => intval($this->arrGet($scope, 'article_id', 0)), + 'p_article_id' => intval($this->arrGet($scope, 'p_article_id', 0)), + 'am_id' => intval($this->arrGet($scope, 'am_id', 0)), + 'reference_no' => $refNo, + 'refer_index' => $refNo, + 'origin_text' => $originText, + 'refer_text' => $referText, + 'p_refer_id' => intval($this->arrGet($refer, 'p_refer_id', 0)), + 'reference_raw' => $meta['reference_raw'], + 'cite_group_refs' => $meta['cite_group_refs'], + 'cite_tag_start' => $meta['cite_tag_start'], + 'cite_tag_end' => $meta['cite_tag_end'], + 'text_start' => $meta['text_start'], + 'text_end' => $meta['text_end'], + 'status' => self::RECORD_PENDING, + 'created_at' => $now, + 'updated_at' => $now, + ]))); + } + + private function appendCitationPendingJob(array &$pendingJobs, $checkId, $refNo, $amId, $textStart) + { + $pendingJobs[] = [ + 'check_id' => intval($checkId), + 'reference_no' => intval($refNo), + 'am_id' => intval($amId), + 'text_start' => intval($textStart), + ]; + } + + /** + * 同一引用标签下的全部校对明细([1,2] 展开后 reference_no 不同但 cite_tag_* 相同) + * + * @return array[] + */ + private function findCitationGroupRows(array $row) + { + $amId = intval($this->arrGet($row, 'am_id', 0)); + if ($amId <= 0) { + return [$row]; + } + + $citeTagStart = intval($this->arrGet($row, 'cite_tag_start', 0)); + $citeTagEnd = intval($this->arrGet($row, 'cite_tag_end', 0)); + $q = Db::name('article_reference_check_result')->where('am_id', $amId); + + if ($citeTagStart > 0 && $citeTagEnd > $citeTagStart) { + $q->where('cite_tag_start', $citeTagStart)->where('cite_tag_end', $citeTagEnd); + } else { + $textStart = intval($this->arrGet($row, 'text_start', 0)); + $textEnd = intval($this->arrGet($row, 'text_end', 0)); + $referenceRaw = trim((string)$this->arrGet($row, 'reference_raw', '')); + $citeGroupRefs = trim((string)$this->arrGet($row, 'cite_group_refs', '')); + $q->where('text_start', $textStart)->where('text_end', $textEnd); + if ($referenceRaw !== '') { + $q->where('reference_raw', $referenceRaw); + } elseif ($citeGroupRefs !== '') { + $q->where('cite_group_refs', $citeGroupRefs); + } + } + + $rows = $q->order('reference_no asc')->select(); + return empty($rows) ? [$row] : $rows; + } + + private function resolveCitationGroupLeaderRefNo(array $groupRows) + { + $leader = PHP_INT_MAX; + foreach ($groupRows as $gr) { + $refNo = intval($this->arrGet($gr, 'reference_no', 0)); + if ($refNo > 0 && $refNo < $leader) { + $leader = $refNo; + } + } + + return $leader === PHP_INT_MAX ? 0 : $leader; + } + + private function findCitationGroupRowByRefNo(array $groupRows, $refNo) + { + $refNo = intval($refNo); + foreach ($groupRows as $gr) { + if (intval($this->arrGet($gr, 'reference_no', 0)) === $refNo) { + return $gr; + } + } + + return null; + } + + private function isCitationGroupCheck(array $groupRows) + { + return count($groupRows) > 1; + } + + private function resolveReferTextForCheckRow(array $row, $refer = null) + { + if (is_array($refer) && !empty($refer)) { + return $this->formatReferForLlm($refer); + } + + return trim((string)$this->arrGet($row, 'refer_text', '')); + } + + /** + * 将同一引用标签下多条文献书目拼成一次 LLM 校对的 refer_text + */ + private function buildCombinedReferTextForCitationGroup(array $groupRows) + { + $blocks = []; + foreach ($groupRows as $gr) { + $refNo = intval($this->arrGet($gr, 'reference_no', 0)); + if ($refNo <= 0) { + continue; + } + + $refer = null; + if (intval($this->arrGet($gr, 'p_refer_id', 0)) > 0) { + $refer = Db::name('production_article_refer') + ->where('p_refer_id', intval($gr['p_refer_id'])) + ->where('state', 0) + ->find(); + } + $text = $this->resolveReferTextForCheckRow($gr, $refer); + if ($text === '') { + continue; + } + $blocks[] = '【参考文献 ' . $refNo . '】' . "\n" . $text; + } + + return implode("\n\n", $blocks); + } + + /** + * @return array{refer_text:string, doi_block:string, has_abstract:bool, doi_used:string} + */ + private function prepareRecheckPayloadForCitationGroup(array $groupRows) + { + $referText = $this->buildCombinedReferTextForCitationGroup($groupRows); + $doiParts = []; + $doiUsed = []; + $hasAbstract = false; + + foreach ($groupRows as $gr) { + $refNo = intval($this->arrGet($gr, 'reference_no', 0)); + if ($refNo <= 0 || intval($this->arrGet($gr, 'p_refer_id', 0)) <= 0) { + continue; + } + DbReconnectHelper::ensure(); + $refer = Db::name('production_article_refer') + ->where('p_refer_id', intval($gr['p_refer_id'])) + ->where('state', 0) + ->find(); + if (empty($refer)) { + continue; + } + $bundle = (new ReferenceLiteratureFetchService())->fetchAndCleanForRefer($refer); + $checkId = intval($this->arrGet($gr, 'id', $this->arrGet($gr, 'check_id', 0))); + if ($checkId > 0) { + $this->persistLiteratureOnCheckRow($checkId, $bundle); + } + + $block = $this->buildLiteratureBlockFromBundle($refNo, $bundle); + if ($block === '') { + continue; + } + if (trim((string)($bundle['abstract_final'] ?? '')) !== '') { + $hasAbstract = true; + } + $doiParts[] = $block; + $doi = trim((string)($bundle['doi'] ?? '')); + if ($doi !== '') { + $doiUsed[] = $doi; + } + } + + return [ + 'refer_text' => $referText, + 'doi_block' => implode("\n\n", $doiParts), + 'has_abstract' => $hasAbstract, + 'doi_used' => implode(',', $doiUsed), + ]; + } + + private function applyCheckResultFromRow($checkId, array $sourceRow) + { + $this->updateCheckResult($checkId, [ + 'can_support' => intval($this->arrGet($sourceRow, 'can_support', 0)), + 'is_match' => intval($this->arrGet($sourceRow, 'is_match', 0)), + 'confidence' => floatval($this->arrGet($sourceRow, 'confidence', 0)), + 'reason' => (string)$this->arrGet($sourceRow, 'reason', ''), + 'support_role' => (string)$this->arrGet($sourceRow, 'support_role', ''), + 'combined_can_support' => intval($this->arrGet($sourceRow, 'combined_can_support', 0)), + 'combined_confidence' => floatval($this->arrGet($sourceRow, 'combined_confidence', 0)), + 'combined_reason' => (string)$this->arrGet($sourceRow, 'combined_reason', ''), + 'status' => self::RECORD_COMPLETED, + 'error_msg' => '', + ]); + } + + /** + * 将 LLM results 数组按 reference_no 写入同一引用组内的各行 + */ + private function applyCitationGroupCheckResults(array $groupRows, array $llmResponse, $reasonPrefix = '') + { + $results = isset($llmResponse['results']) && is_array($llmResponse['results']) + ? $llmResponse['results'] : []; + if (empty($results)) { + return false; + } + + $byRefNo = []; + foreach ($results as $item) { + if (!is_array($item)) { + continue; + } + $refNo = intval($this->arrGet($item, 'reference_no', 0)); + if ($refNo > 0) { + $byRefNo[$refNo] = $item; + } + } + + $applied = 0; + $expected = 0; + $reasonPrefix = trim((string)$reasonPrefix); + foreach ($groupRows as $gr) { + $refNo = intval($this->arrGet($gr, 'reference_no', 0)); + if ($refNo <= 0) { + continue; + } + $expected++; + if (!isset($byRefNo[$refNo])) { + continue; + } + $item = $byRefNo[$refNo]; + $canSupport = !empty($item['can_support']) ? 1 : 0; + $reason = trim($reasonPrefix . ' ' . (string)$this->arrGet($item, 'reason', '')); + $this->updateCheckResult($this->resolveCheckRowId($gr), [ + 'can_support' => $canSupport, + 'is_match' => array_key_exists('is_match', $item) ? (!empty($item['is_match']) ? 1 : 0) : $canSupport, + 'confidence' => floatval($this->arrGet($item, 'confidence', 0)), + 'reason' => $reason, + 'support_role' => (string)$this->arrGet($item, 'support_role', ''), + 'combined_can_support' => !empty($item['combined_can_support']) ? 1 : 0, + 'combined_confidence' => floatval($this->arrGet($item, 'combined_confidence', 0)), + 'combined_reason' => (string)$this->arrGet($item, 'combined_reason', ''), + 'status' => self::RECORD_COMPLETED, + 'error_msg' => '', + ]); + $applied++; + } + + return $expected > 0 && $applied === $expected; + } + + private function findLlmResultItemForRow(array $llmResponse, array $row) + { + $results = isset($llmResponse['results']) && is_array($llmResponse['results']) + ? $llmResponse['results'] : []; + $refNo = intval($this->arrGet($row, 'reference_no', 0)); + foreach ($results as $item) { + if (is_array($item) && intval($this->arrGet($item, 'reference_no', 0)) === $refNo) { + return $item; + } + } + + return null; + } + + private function formatCheckReturnFromRow(array $row) + { + $checkId = $this->resolveCheckRowId($row); + + return [ + 'check_id' => $checkId, + 'can_support' => intval($this->arrGet($row, 'can_support', 0)), + 'is_match' => intval($this->arrGet($row, 'is_match', 0)), + 'confidence' => floatval($this->arrGet($row, 'confidence', 0)), + 'reason' => (string)$this->arrGet($row, 'reason', ''), + 'support_role' => (string)$this->arrGet($row, 'support_role', ''), + 'combined_can_support' => intval($this->arrGet($row, 'combined_can_support', 0)), + 'combined_confidence' => floatval($this->arrGet($row, 'combined_confidence', 0)), + 'combined_reason' => (string)$this->arrGet($row, 'combined_reason', ''), + ]; + } + + private function shouldRunSecondPassForLlmResults(array $groupRows, array $llmResponse) + { + if (!$this->isSecondPassEnabled()) { + return false; + } + + $results = isset($llmResponse['results']) && is_array($llmResponse['results']) + ? $llmResponse['results'] : []; + if (empty($results)) { + return false; + } + + $byRefNo = []; + foreach ($results as $item) { + if (!is_array($item)) { + continue; + } + $refNo = intval($this->arrGet($item, 'reference_no', 0)); + if ($refNo > 0) { + $byRefNo[$refNo] = $item; + } + } + + foreach ($groupRows as $gr) { + $refNo = intval($this->arrGet($gr, 'reference_no', 0)); + if ($refNo <= 0 || !isset($byRefNo[$refNo])) { + continue; + } + $item = $byRefNo[$refNo]; + if (floatval($this->arrGet($item, 'confidence', 0)) <= self::PASS_CONFIDENCE_THRESHOLD) { + return true; + } + if (floatval($this->arrGet($item, 'combined_confidence', 0)) <= self::PASS_CONFIDENCE_THRESHOLD) { + return true; + } + } + + return false; + } + + /** + * 本引用位置附近上下文(用于 LLM 判断具体支撑哪句) + */ + public function resolveCitationLocalContextForJob(array $row, $maxChars = 3000) + { + $textStart = intval($this->arrGet($row, 'text_start', 0)); + $textEnd = intval($this->arrGet($row, 'text_end', 0)); + $amId = intval($this->arrGet($row, 'am_id', 0)); + if ($amId <= 0 || $textEnd <= $textStart) { + return ''; + } + + $main = Db::name('article_main') + ->field('content,type,amt_id,article_id') + ->where('am_id', $amId) + ->find(); + if (empty($main)) { + return ''; + } + + $raw = trim($this->resolveArticleMainCheckContent($main)); + if ($raw === '') { + return ''; + } + + $slice = $this->buildCitationContextText($raw, $textStart, $textEnd); + if (trim($slice) === '') { + return ''; + } + + return $this->normalizeCheckContentForLlm($slice, $maxChars); + } + + /** + * 相关性校对专用:段内后续引用在「本句」基础上再向前扩展 1–2 句(不早于上一引用标签), + * 以便覆盖紧邻的前置 claim,同时避免整段混用多个引用点的论述。 + */ + public function resolveCitationLocalContextForRelevanceJob(array $row, $maxChars = 3000, $extraSentences = 2) + { + DbReconnectHelper::ensure(); + $textStart = intval($this->arrGet($row, 'text_start', 0)); + $textEnd = intval($this->arrGet($row, 'text_end', 0)); + $amId = intval($this->arrGet($row, 'am_id', 0)); + $tagStart = intval($this->arrGet($row, 'cite_tag_start', 0)); + $fallback = trim((string)$this->arrGet($row, 'origin_text', '')); + + if ($amId <= 0 || $textEnd <= $textStart) { + return $fallback; + } + + $main = Db::name('article_main') + ->field('content,type,amt_id,article_id') + ->where('am_id', $amId) + ->find(); + if (empty($main)) { + return $fallback; + } + + $raw = trim($this->resolveArticleMainCheckContent($main)); + if ($raw === '') { + return $fallback; + } + + $paragraphStart = $tagStart > 0 ? $this->findParagraphStart($raw, $tagStart) : 0; + $prevTagEnd = $tagStart > 0 ? $this->resolvePriorCitationTagEnd($raw, $tagStart) : $paragraphStart; + $extendedStart = $textStart; + if ($prevTagEnd > $paragraphStart) { + $extendedStart = $this->extendContextStartBackward( + $raw, + $textStart, + max($paragraphStart, $prevTagEnd), + $extraSentences + ); + } + + $slice = $this->buildCitationContextText($raw, $extendedStart, $textEnd); + $slice = ltrim($slice, ". \t\n\r"); + if (trim($slice) === '') { + return $fallback; + } + + return $this->normalizeCheckContentForLlm($slice, $maxChars); + } + + /** + * 联合校对结果写回同一引用标签下的全部明细 + */ + private function applyCheckResultToCitationGroup(array $groupRows, array $fields) + { + foreach ($groupRows as $gr) { + $gid = $this->resolveCheckRowId($gr); + if ($gid > 0) { + $this->updateCheckResult($gid, $fields); + } + } + } + /** * 编辑某条文献内容后,按 p_refer_id 异步重新校对该文献对应的全部 check 明细 * @@ -1597,18 +2424,12 @@ class ReferenceCheckService ]; } - $resetFields = $this->newCheckRecordFields([ + $resetFields = $this->newCheckRecordFields($this->referenceCheckResultResetFields([ 'refer_text' => $referText, 'refer_index' => $referenceNo, 'reference_no' => $referenceNo, - 'status' => self::RECORD_PENDING, - 'is_match' => 0, - 'can_support' => 0, - 'confidence' => 0, - 'reason' => '', - 'error_msg' => '', 'updated_at' => $now, - ], self::QUEUE_PENDING, 0); + ]), self::QUEUE_PENDING, 0); $pendingJobs = []; $amIds = []; @@ -1686,15 +2507,9 @@ class ReferenceCheckService } $now = date('Y-m-d H:i:s'); - $resetFields = $this->newCheckRecordFields([ - 'status' => self::RECORD_PENDING, - 'is_match' => 0, - 'can_support' => 0, - 'confidence' => 0, - 'reason' => '', - 'error_msg' => '', - 'updated_at' => $now, - ], self::QUEUE_PENDING, 0); + $resetFields = $this->newCheckRecordFields($this->referenceCheckResultResetFields([ + 'updated_at' => $now, + ]), self::QUEUE_PENDING, 0); $pendingJobs = []; $amIds = []; @@ -1729,6 +2544,93 @@ class ReferenceCheckService ]; } + /** + * 某条参考文献下「校对失败」重跑,并将失败行所在同一引用标签分组(如 [1,2])全部一并重跑。 + * + * @param int $pReferId + * @param int $pArticleId + * @return array{p_refer_id:int, p_article_id:int, reset:int, queued:int, check_ids:int[], queue:string} + */ + public function enqueueRecheckFailedByPReferIdWithGroup($pReferId, $pArticleId = 0) + { + $pReferId = intval($pReferId); + if ($pReferId <= 0) { + throw new \InvalidArgumentException('p_refer_id is required'); + } + + $q = Db::name('article_reference_check_result') + ->where('p_refer_id', $pReferId) + ->where('status', self::RECORD_FAILED); + $pArticleId = intval($pArticleId); + if ($pArticleId > 0) { + $q->where('p_article_id', $pArticleId); + } + + $rows = $q->select(); + if (empty($rows)) { + return [ + 'p_refer_id' => $pReferId, + 'p_article_id' => $pArticleId, + 'reset' => 0, + 'queued' => 0, + 'check_ids' => [], + 'queue' => self::TRANSPORT_RABBITMQ, + ]; + } + + if ($pArticleId <= 0) { + $pArticleId = intval($this->arrGet($rows[0], 'p_article_id', 0)); + } + + $now = date('Y-m-d H:i:s'); + $resetFields = $this->newCheckRecordFields($this->referenceCheckResultResetFields([ + 'updated_at' => $now, + ]), self::QUEUE_PENDING, 0); + + $targetRows = []; + foreach ($rows as $row) { + $groupRows = $this->findCitationGroupRows($row); + foreach ($groupRows as $gr) { + $checkId = $this->resolveCheckRowId($gr); + if ($checkId > 0) { + $targetRows[$checkId] = $gr; + } + } + } + + $pendingJobs = []; + $amIds = []; + foreach ($targetRows as $row) { + $checkId = $this->resolveCheckRowId($row); + Db::name('article_reference_check_result')->where('id', $checkId)->update($resetFields); + $pendingJobs[] = [ + 'check_id' => $checkId, + 'reference_no' => intval($this->arrGet($row, 'reference_no', 0)), + 'am_id' => intval($this->arrGet($row, 'am_id', 0)), + 'text_start' => intval($this->arrGet($row, 'text_start', 0)), + ]; + $amId = intval($this->arrGet($row, 'am_id', 0)); + if ($amId > 0) { + $amIds[$amId] = true; + } + } + + foreach (array_keys($amIds) as $amId) { + $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING); + } + + $checkIds = $this->enqueueChecksSortedByReferenceNo($pendingJobs, $pArticleId, 'recheck_failed'); + + return [ + 'p_refer_id' => $pReferId, + 'p_article_id' => $pArticleId, + 'reset' => count($targetRows), + 'queued' => count($checkIds), + 'check_ids' => $checkIds, + 'queue' => self::TRANSPORT_RABBITMQ, + ]; + } + public function recheckByRefer($articleId, $pReferId = 0, $referenceNo = 0) { $articleId = intval($articleId); @@ -1763,19 +2665,13 @@ class ReferenceCheckService ]; } - $resetFields = $this->newCheckRecordFields([ + $resetFields = $this->newCheckRecordFields($this->referenceCheckResultResetFields([ 'refer_text' => $referText, 'p_refer_id' => $pReferId, 'p_article_id' => $pArticleId, 'refer_index' => $referenceNo, - 'status' => 0, - 'is_match' => 0, - 'can_support' => 0, - 'confidence' => 0, - 'reason' => '', - 'error_msg' => '', 'updated_at' => $now, - ], self::QUEUE_PENDING, 0); + ]), self::QUEUE_PENDING, 0); $pendingJobs = []; $amIds = []; @@ -1847,13 +2743,34 @@ class ReferenceCheckService */ public function runReferenceCheckOnce($checkId) { + DbReconnectHelper::ensure(); $checkId = intval($checkId); $row = Db::name('article_reference_check_result')->where('id', $checkId)->find(); if (empty($row)) { throw new \RuntimeException('article_reference_check_result not found, id=' . $checkId); } - $contentA = $this->resolveMainContentForJob($row); + if (intval($row['status']) === self::RECORD_COMPLETED) { + return $this->formatCheckReturnFromRow($row); + } + + $groupRows = $this->findCitationGroupRows($row); + $isGroup = $this->isCitationGroupCheck($groupRows); + if ($isGroup) { + $leaderRefNo = $this->resolveCitationGroupLeaderRefNo($groupRows); + $currentRefNo = intval($this->arrGet($row, 'reference_no', 0)); + if ($currentRefNo !== $leaderRefNo) { + $freshRow = Db::name('article_reference_check_result')->where('id', $checkId)->find(); + if (!empty($freshRow) && intval($freshRow['status']) === self::RECORD_COMPLETED) { + return $this->formatCheckReturnFromRow($freshRow); + } + throw new \RuntimeException('Citation group leader check not finished yet, reference_no=' . $leaderRefNo); + } + } + + $contentA = $this->resolveParagraphContextForJob($row); + $localContext = $this->resolveCitationLocalContextForJob($row); + $citeGroupRefs = $this->resolveCiteGroupRefsFromRow($row, $groupRows); $refer = null; if (intval($row['p_refer_id']) > 0) { $refer = Db::name('production_article_refer') @@ -1862,95 +2779,151 @@ class ReferenceCheckService ->find(); } - if ($refer) { - $contentB = $this->formatReferForLlm($refer); - } else { - $contentB = trim((string)$this->arrGet($row, 'refer_text', '')); - } + $contentB = $this->buildCombinedReferTextForCitationGroup($groupRows); + DbReconnectHelper::release(); + $doiPayload = $this->prepareRecheckPayloadForCitationGroup($groupRows); + $doiBlock = trim((string)$this->arrGet($doiPayload, 'doi_block', '')); + DbReconnectHelper::ensure(); if ($contentA === '' || $contentB === '') { - $this->updateCheckResult($checkId, [ + $failFields = [ 'status' => self::RECORD_FAILED, 'error_msg' => 'Missing section content (text/table) or refer_text', - ]); + ]; + if ($isGroup) { + $this->applyCheckResultToCitationGroup($groupRows, $failFields); + } else { + $this->updateCheckResult($checkId, $failFields); + } throw new \RuntimeException('Missing section content (text/table) or refer_text'); } - $llmResult = (new LLMService())->checkReference($contentA, $contentB, false); + DbReconnectHelper::release(); + $llmResult = (new LLMService())->checkReference( + $contentA, + $contentB, + false, + $doiBlock !== '' ? $doiBlock : null, + $citeGroupRefs, + $localContext + ); + DbReconnectHelper::ensure(); $requestFailed = !empty($llmResult['request_failed']); - $canSupport = $this->parseLlmCanSupport($llmResult); - $confidence = floatval(isset($llmResult['confidence']) ? $llmResult['confidence'] : 0); - $reason = isset($llmResult['reason']) ? $llmResult['reason'] : ''; - // LLM 通讯失败:写 status=RECORD_FAILED(3) + error_msg,抛异常由 MQ worker 重试 - if ($requestFailed) { - $this->updateCheckResult($checkId, [ - 'confidence' => $confidence, - 'reason' => $reason, - 'status' => self::RECORD_FAILED, - 'error_msg' => $reason, - ]); - throw new \RuntimeException($reason !== '' ? $reason : 'LLM request failed'); + if ($requestFailed || !$this->applyCitationGroupCheckResults($groupRows, $llmResult)) { + $failReason = isset($llmResult['reason']) ? (string)$llmResult['reason'] : 'LLM request failed or empty results'; + $failFields = [ + 'status' => self::RECORD_FAILED, + 'error_msg' => $failReason, + ]; + if ($isGroup) { + $this->applyCheckResultToCitationGroup($groupRows, $failFields); + } else { + $this->updateCheckResult($checkId, $failFields); + } + throw new \RuntimeException($failReason !== '' ? $failReason : 'LLM request failed'); } - $this->updateCheckResult($checkId, [ - 'can_support' => $canSupport ? 1 : 0, - 'is_match' => $canSupport ? 1 : 0, - 'confidence' => $confidence, - 'reason' => $reason, - 'status' => self::RECORD_COMPLETED, - 'error_msg' => '', - ]); - - if ($confidence <= self::PASS_CONFIDENCE_THRESHOLD) { - $this->runSecondPassBlocking($checkId, $row, $contentA, $refer, $contentB); + if ($this->shouldRunSecondPassForLlmResults($groupRows, $llmResult)) { + $this->runSecondPassBlocking($checkId, $row, $contentA, $refer, $contentB, $groupRows); } - return [ - 'check_id' => $checkId, - 'can_support' => $canSupport ? 1 : 0, - 'is_match' => $canSupport ? 1 : 0, - 'confidence' => $confidence, - 'reason' => $reason, - ]; + $freshRow = Db::name('article_reference_check_result')->where('id', $checkId)->find(); + return $this->formatCheckReturnFromRow(!empty($freshRow) ? $freshRow : $row); } /** * 低分结果的二轮 DOI 复核(同步阻塞执行;失败重试一次) */ - public function runSecondPassBlocking($checkId, array $row, $contentA, $refer, $referText) + public function runSecondPassBlocking($checkId, array $row, $contentA, $refer, $referText, array $groupRows = null) { + if (!$this->isSecondPassEnabled()) { + return false; + } + + DbReconnectHelper::ensure(); $checkId = intval($checkId); if ($checkId <= 0) { return false; } - $payload = $this->prepareRecheckPayload(is_array($refer) ? $refer : [], trim((string)$referText)); + if ($groupRows === null) { + $groupRows = $this->findCitationGroupRows($row); + } + $isGroup = $this->isCitationGroupCheck($groupRows); + + if ($isGroup) { + $leaderRefNo = $this->resolveCitationGroupLeaderRefNo($groupRows); + $currentRefNo = intval($this->arrGet($row, 'reference_no', 0)); + if ($currentRefNo !== $leaderRefNo) { + $freshRow = Db::name('article_reference_check_result')->where('id', $checkId)->find(); + if (!empty($freshRow) && $this->hasSecondPassCompleted($freshRow)) { + return true; + } + return false; + } + } + + if (trim((string)$contentA) === '') { + $contentA = $this->resolveParagraphContextForJob($row); + } + $localContext = $this->resolveCitationLocalContextForJob($row); + + DbReconnectHelper::release(); + if ($isGroup) { + $payload = $this->prepareRecheckPayloadForCitationGroup($groupRows); + $referText = trim((string)$payload['refer_text']); + } else { + if (trim((string)$referText) === '') { + $referText = $this->resolveReferTextForCheckRow($row, is_array($refer) ? $refer : null); + } + $payload = $this->prepareRecheckPayload(is_array($refer) ? $refer : [], trim((string)$referText)); + } + DbReconnectHelper::ensure(); if (empty($payload['has_abstract']) || trim((string)$payload['doi_block']) === '') { return false; } + $citeGroupRefs = $this->resolveCiteGroupRefsFromRow($row, $groupRows); $lastError = ''; for ($attempt = 0; $attempt < 2; $attempt++) { try { - $llmResult = (new LLMService())->checkReference($contentA, trim((string)$referText), true, $payload['doi_block']); + DbReconnectHelper::release(); + $llmResult = (new LLMService())->checkReference( + $contentA, + trim((string)$referText), + true, + $payload['doi_block'], + $citeGroupRefs, + $localContext + ); + DbReconnectHelper::ensure(); $requestFailed = !empty($llmResult['request_failed']); - $canSupport = $this->parseLlmCanSupport($llmResult); - $confidence = floatval(isset($llmResult['confidence']) ? $llmResult['confidence'] : 0); - $tag = '[Crossref复核' . (trim((string)$payload['doi_used']) !== '' ? (' ' . trim((string)$payload['doi_used'])) : '') . ']'; - $reason = $tag . ' ' . (isset($llmResult['reason']) ? $llmResult['reason'] : ''); + $tag = $this->buildSecondPassReasonTag($row, $payload, $groupRows); + if ($tag !== '' && !empty($llmResult['results']) && is_array($llmResult['results'])) { + foreach ($llmResult['results'] as &$one) { + if (!is_array($one)) { + continue; + } + $one['reason'] = $tag . ' ' . (isset($one['reason']) ? (string)$one['reason'] : ''); + } + unset($one); + } - if ($requestFailed) { - $lastError = isset($llmResult['reason']) ? (string)$llmResult['reason'] : 'LLM request failed'; + if ($requestFailed || !$this->applyCitationGroupCheckResults($groupRows, $llmResult)) { + $lastError = isset($llmResult['reason']) ? (string)$llmResult['reason'] : 'LLM request failed or empty results'; if ($attempt < 1) { continue; } - $this->updateCheckResult($checkId, [ - 'confidence' => $confidence, - 'reason' => $reason, - 'status' => self::RECORD_FAILED, - 'error_msg' => $lastError, - ]); + $failFields = [ + 'status' => self::RECORD_FAILED, + 'error_msg' => $lastError, + ]; + if ($isGroup) { + $this->applyCheckResultToCitationGroup($groupRows, $failFields); + } else { + $this->updateCheckResult($checkId, $failFields); + } $amId = intval(isset($row['am_id']) ? $row['am_id'] : 0); if ($amId > 0) { $this->syncAmRefCheckStatus($amId); @@ -1958,14 +2931,6 @@ class ReferenceCheckService return false; } - $this->updateCheckResult($checkId, [ - 'can_support' => $canSupport ? 1 : 0, - 'is_match' => $canSupport ? 1 : 0, - 'confidence' => $confidence, - 'reason' => $reason, - 'status' => self::RECORD_COMPLETED, - 'error_msg' => '', - ]); $amId = intval(isset($row['am_id']) ? $row['am_id'] : 0); if ($amId > 0) { $this->syncAmRefCheckStatus($amId); @@ -1976,10 +2941,15 @@ class ReferenceCheckService if ($attempt < 1) { continue; } - $this->updateCheckResult($checkId, [ + $failFields = [ 'status' => self::RECORD_FAILED, 'error_msg' => $lastError, - ]); + ]; + if ($isGroup) { + $this->applyCheckResultToCitationGroup($groupRows, $failFields); + } else { + $this->updateCheckResult($checkId, $failFields); + } $amId = intval(isset($row['am_id']) ? $row['am_id'] : 0); if ($amId > 0) { $this->syncAmRefCheckStatus($amId); @@ -2087,6 +3057,18 @@ class ReferenceCheckService if (!is_array($llmResult)) { return false; } + if (!empty($llmResult['results']) && is_array($llmResult['results'])) { + foreach ($llmResult['results'] as $item) { + if (!is_array($item)) { + continue; + } + if (!empty($item['can_support']) || !empty($item['is_match'])) { + return true; + } + } + + return false; + } if (array_key_exists('can_support', $llmResult)) { return $this->parseLlmIsMatch($llmResult['can_support']); } @@ -2098,6 +3080,7 @@ class ReferenceCheckService */ public function resolveMainContentForJob(array $row, $maxChars = 8000) { + DbReconnectHelper::ensure(); $amId = intval($this->arrGet($row, 'am_id', 0)); if ($amId <= 0) { return ''; @@ -2387,10 +3370,14 @@ class ReferenceCheckService } /** - * 引用处局部上下文(origin_text),供其它场景使用 + * 引用处局部上下文:优先按 text_start/text_end 从节正文重算,回落 origin_text 快照 */ public function resolveCitationContextForJob(array $row) { + $local = $this->resolveCitationLocalContextForJob($row); + if ($local !== '') { + return $local; + } $text = trim((string)$this->arrGet($row, 'origin_text', '')); if ($text === '') { $text = trim((string)$this->arrGet($row, 'content_a', '')); @@ -2503,6 +3490,8 @@ class ReferenceCheckService */ public function fetchDoiLiteratureBlock($refer) { + DbReconnectHelper::release(); + $candidates = $this->extractAllDoiCandidatesFromRefer($refer); if (empty($candidates)) { return ''; @@ -2670,15 +3659,80 @@ class ReferenceCheckService public function prepareRecheckPayload($refer, $referText = '') { $base = trim($referText) !== '' ? trim($referText) : $this->formatReferForLlm($refer); - $cr = $this->fetchCrossrefAbstractByReferDoi($refer); + $bundle = (new ReferenceLiteratureFetchService())->fetchAndCleanForRefer(is_array($refer) ? $refer : []); + $block = $this->buildLiteratureBlockFromBundle(0, $bundle); + if ($block === '') { + $cr = $this->resolveDoiRecheckFromRefer(is_array($refer) ? $refer : []); + return [ + 'refer_text' => $base, + 'doi_block' => $cr['text'], + 'has_abstract' => $cr['has_abstract'], + 'doi_used' => $cr['doi'], + ]; + } return [ 'refer_text' => $base, - 'doi_block' => $cr['text'], - 'has_abstract' => $cr['has_abstract'], - 'doi_used' => $cr['doi'], + 'doi_block' => $block, + 'has_abstract' => trim((string)($bundle['abstract_final'] ?? '')) !== '', + 'doi_used' => trim((string)($bundle['doi'] ?? '')), ]; } + private function buildLiteratureBlockFromBundle($refNo, array $bundle) + { + $abstract = trim((string)($bundle['abstract_final'] ?? $bundle['abstract'] ?? '')); + $cleaned = trim((string)($bundle['content_cleaned'] ?? '')); + $raw = trim((string)($bundle['raw_content'] ?? '')); + if ($cleaned === '' && $raw !== '') { + $cleaned = mb_substr($raw, 0, 6000); + } + if ($abstract === '' && $cleaned === '') { + return ''; + } + + $head = $refNo > 0 ? ('【参考文献 ' . intval($refNo) . '】') : '【文献内容】'; + $doi = trim((string)($bundle['doi'] ?? '')); + if ($doi !== '') { + $head .= ' DOI: ' . $doi; + } + $parts = [$head]; + if ($abstract !== '') { + $parts[] = '【摘要】' . "\n" . $abstract; + } + if ($cleaned !== '') { + $parts[] = '【清洗后文献内容】' . "\n" . $cleaned; + } + $sources = isset($bundle['sources']) && is_array($bundle['sources']) ? implode(',', $bundle['sources']) : ''; + if ($sources !== '') { + $parts[] = 'Sources: ' . $sources; + } + return implode("\n\n", $parts); + } + + private function persistLiteratureOnCheckRow($checkId, array $bundle) + { + $checkId = intval($checkId); + if ($checkId <= 0) { + return; + } + $abstract = trim((string)($bundle['abstract_final'] ?? $bundle['abstract'] ?? '')); + $raw = trim((string)($bundle['raw_content'] ?? '')); + $cleaned = trim((string)($bundle['content_cleaned'] ?? '')); + if ($cleaned === '' && $raw !== '') { + $cleaned = mb_substr($raw, 0, 6000); + } + try { + DbReconnectHelper::ensure(); + Db::name('article_reference_check_result')->where('id', $checkId)->update([ + 'abstract_text' => $abstract, + 'refer_content_cleaned' => $cleaned, + 'updated_at' => date('Y-m-d H:i:s'), + ]); + } catch (\Throwable $e) { + \think\Log::warning('persistLiteratureOnCheckRow: ' . $e->getMessage()); + } + } + /** * 旧接口:拼接成单块文本(向后兼容,建议调用方改用 prepareRecheckPayload) */ @@ -2710,6 +3764,24 @@ class ReferenceCheckService return false; } + if ($this->hasSecondPassCompleted($row)) { + return true; + } + + $groupRows = $this->findCitationGroupRows($row); + $isGroup = $this->isCitationGroupCheck($groupRows); + if ($isGroup) { + $leaderRefNo = $this->resolveCitationGroupLeaderRefNo($groupRows); + $currentRefNo = intval($this->arrGet($row, 'reference_no', 0)); + if ($currentRefNo !== $leaderRefNo) { + $freshRow = Db::name('article_reference_check_result')->where('id', $checkId)->find(); + if (!empty($freshRow) && $this->hasSecondPassCompleted($freshRow)) { + return true; + } + return false; + } + } + $refer = null; if (intval($row['p_refer_id']) > 0) { $refer = Db::name('production_article_refer') @@ -2717,22 +3789,15 @@ class ReferenceCheckService ->where('state', 0) ->find(); } - if (empty($refer) || $this->extractReferDoiOnly($refer) === '') { - return false; + + $contentA = $this->resolveParagraphContextForJob($row); + if ($isGroup) { + $referText = $this->buildCombinedReferTextForCitationGroup($groupRows); + } else { + $referText = $this->resolveReferTextForCheckRow($row, $refer); } - $cr = $this->fetchCrossrefAbstractByReferDoi($refer); - if (empty($cr['has_abstract'])) { - return false; - } - - $contentA = $this->resolveMainContentForJob($row); - $referText = trim((string)$this->arrGet($row, 'refer_text', '')); - if ($referText === '' && is_array($refer)) { - $referText = $this->formatReferForLlm($refer); - } - - return $this->runSecondPassBlocking($checkId, $row, $contentA, $refer, $referText); + return $this->runSecondPassBlocking($checkId, $row, $contentA, $refer, $referText, $groupRows); } /** @@ -2787,6 +3852,27 @@ class ReferenceCheckService return $result; } + /** + * 按段落截取引用上下文:同一段落内各处引用共用段落文本,分别按 cite_group_refs 校对。 + */ + private function extractParagraphCitationContext($content, $tagStart, $tagEnd, array $tagSpans) + { + $paragraphStart = $this->findParagraphStart($content, $tagStart); + $paragraphEnd = $this->findParagraphEnd($content, $tagEnd); + $originalText = $this->buildCitationContextText($content, $paragraphStart, $paragraphEnd); + + if (!$this->isMeaningfulCitationContext($originalText)) { + return $this->extractLocalCitationContext( + $content, + $tagStart, + $tagEnd, + $tagSpans + ); + } + + return [$paragraphStart, $paragraphEnd, $originalText]; + } + /** * 按引用位置截取局部上下文:优先取标签前叙述;同句多引时后续引用从上一标签后开始。 */ @@ -2929,6 +4015,89 @@ class ReferenceCheckService return $text; } + /** + * 同段落内、当前引用标签之前最近一个引用标签的结束字节位置 + */ + private function resolvePriorCitationTagEnd($content, $tagStart) + { + $tagStart = intval($tagStart); + $paragraphStart = $this->findParagraphStart($content, $tagStart); + $prevTagEnd = $paragraphStart; + + $matches = $this->collectBlueTagMatches($content); + if (empty($matches[0])) { + return $paragraphStart; + } + + foreach ($matches[0] as $match) { + $end = intval($match[1]) + strlen($match[0]); + if ($end <= $tagStart && $end > $prevTagEnd) { + $prevTagEnd = $end; + } + } + + return $prevTagEnd; + } + + /** + * 给定当前句起点,返回上一句起点 + */ + private function findPreviousSentenceStart($content, $sentenceStart) + { + $sentenceStart = intval($sentenceStart); + if ($sentenceStart <= 0) { + return 0; + } + + $pos = $sentenceStart - 1; + while ($pos > 0 && isset($content[$pos]) && ctype_space($content[$pos])) { + $pos--; + } + if ($pos <= 0) { + return 0; + } + + $prev = $this->findSentenceStart($content, $pos); + if ($prev >= $sentenceStart) { + $pos--; + while ($pos > 0 && isset($content[$pos]) && ctype_space($content[$pos])) { + $pos--; + } + if ($pos <= 0) { + return 0; + } + $prev = $this->findSentenceStart($content, $pos); + } + + return max(0, $prev); + } + + /** + * 从当前句起点向前扩展若干完整句子,但不早于 $minStart + */ + private function extendContextStartBackward($content, $start, $minStart, $extraSentences = 2) + { + $start = intval($start); + $minStart = max(0, intval($minStart)); + $extraSentences = max(0, intval($extraSentences)); + if ($extraSentences === 0 || $start <= $minStart) { + return max($minStart, $start); + } + + for ($i = 0; $i < $extraSentences; $i++) { + if ($start <= $minStart) { + break; + } + $prev = $this->findPreviousSentenceStart($content, $start); + if ($prev >= $start) { + break; + } + $start = max($minStart, $prev); + } + + return $start; + } + /** * 过滤仅标点、过短或无字母/汉字的上下文(如去掉标签后只剩 ".") */ @@ -3049,6 +4218,36 @@ class ReferenceCheckService return $best; } + /** + * 段落结束(

、双换行、下一段

之前) + */ + private function findParagraphEnd($content, $tagEnd) + { + $length = strlen($content); + $pos = max(0, intval($tagEnd)); + if ($pos >= $length) { + return $length; + } + + $candidates = [$length]; + + if (preg_match('/<\/p>/i', $content, $m, PREG_OFFSET_CAPTURE, $pos)) { + $candidates[] = intval($m[0][1]) + strlen($m[0][0]); + } + if (preg_match('/]*>/i', $content, $m, PREG_OFFSET_CAPTURE, $pos + 1)) { + $candidates[] = intval($m[0][1]); + } + if (preg_match('/\n\n/', $content, $m, PREG_OFFSET_CAPTURE, $pos)) { + $candidates[] = intval($m[0][1]); + } + if (preg_match('/\s*/i', $content, $m, PREG_OFFSET_CAPTURE, $pos)) { + $candidates[] = intval($m[0][1]) + strlen($m[0][0]); + } + + $end = min($candidates); + return max($pos, $end); + } + /** * 段落过长时从引用处向前截取上限,避免单次 LLM 上下文过大 */ diff --git a/application/common/mq/RabbitMqConfig.php b/application/common/mq/RabbitMqConfig.php index df30aa5e..84e19c17 100644 --- a/application/common/mq/RabbitMqConfig.php +++ b/application/common/mq/RabbitMqConfig.php @@ -21,4 +21,10 @@ class RabbitMqConfig $rc = self::get('reference_check', []); return is_array($rc) ? $rc : []; } + + public static function referenceRelevance() + { + $rc = self::get('reference_relevance', []); + return is_array($rc) ? $rc : []; + } } diff --git a/application/common/mq/ReferenceCheckArticleWorker.php b/application/common/mq/ReferenceCheckArticleWorker.php index e71da22d..46f61026 100644 --- a/application/common/mq/ReferenceCheckArticleWorker.php +++ b/application/common/mq/ReferenceCheckArticleWorker.php @@ -3,6 +3,7 @@ namespace app\common\mq; use think\Db; +use app\common\DbReconnectHelper; use app\common\ReferenceCheckService; /** @@ -25,6 +26,7 @@ class ReferenceCheckArticleWorker public function handleMessage(array $payload) { + DbReconnectHelper::ensure(); $pArticleId = intval(isset($payload['p_article_id']) ? $payload['p_article_id'] : 0); $batchId = intval(isset($payload['batch_id']) ? $payload['batch_id'] : 0); if ($pArticleId <= 0 || $batchId <= 0) { @@ -115,6 +117,7 @@ class ReferenceCheckArticleWorker */ private function processOneRow($checkId, array $row) { + DbReconnectHelper::ensure(); $claimed = Db::name('article_reference_check_result') ->where('id', intval($checkId)) ->where('queue_status', ReferenceCheckService::QUEUE_PENDING) @@ -134,6 +137,7 @@ class ReferenceCheckArticleWorker return 'ok'; } catch (\Exception $e) { $this->svc->log('ReferenceCheckArticleWorker check_id=' . $checkId . ' err=' . $e->getMessage()); + DbReconnectHelper::ensure(); if ($retryCount < ReferenceCheckService::QUEUE_MAX_RETRY) { $this->svc->markQueueRuntime($checkId, ReferenceCheckService::QUEUE_PENDING, $retryCount + 1); return $this->processOneRow($checkId, array_merge($row, ['retry_count' => $retryCount + 1])); diff --git a/application/common/service/LLMService.php b/application/common/service/LLMService.php index 20e25fc1..3daca96a 100644 --- a/application/common/service/LLMService.php +++ b/application/common/service/LLMService.php @@ -28,18 +28,17 @@ class LLMService * @param string $contextText 正文引用处句子 * @param string $referText 参考文献条目(或 refer 格式化文本) * @param bool $isAgain 是否为 DOI 二次复核 - * @param string|null $doiBlock 可选:系统抓取到的 DOI 真实文献内容(仅二次复核使用) + * @param string|null $doiBlock 可选:系统抓取到的 DOI 真实文献内容(仅二次复核使用) + * @param string $citeGroupRefs 引用文献组,如 1,2 或 4,5,6 + * @param string $localContext 本引用位置附近上下文(可选) + * @return array{results:array,request_failed?:bool} */ - public function checkReference($contextText, $referText, $isAgain = false, $doiBlock = null) + public function checkReference($contextText, $referText, $isAgain = false, $doiBlock = null, $citeGroupRefs = '', $localContext = '') { - // request_failed=true 表示"LLM 通讯/解析层面的失败"(可重试,区别于业务上的"未命中"); - // 上游 runReferenceCheckOnce 会据此把 DB.status 置为 3(失败) 并抛异常触发 MQ worker 重试 $fallback = [ - 'can_support' => false, - 'is_match' => false, - 'confidence' => 0.0, - 'reason' => 'LLM not configured or request failed', + 'results' => [], 'request_failed' => true, + 'reason' => 'LLM not configured or request failed', ]; if ($this->url === '' || $this->model === '') { \think\Log::warning('ReferenceCheck LLM: url or model not configured'); @@ -47,15 +46,16 @@ class LLMService } $contextText = trim($contextText); + \think\Log::info('llm checkReference:' . $contextText); $referText = trim($referText); + \think\Log::info('llm referText:' . $referText); $doiBlock = trim((string)$doiBlock); + $citeGroupRefs = trim((string)$citeGroupRefs); + $localContext = trim((string)$localContext); if ($contextText === '' || $referText === '') { - // 空文本是入参问题,不是 LLM 故障,不需要重试 return [ - 'can_support' => false, - 'is_match' => false, - 'confidence' => 0.0, - 'reason' => 'Empty citation context or reference text', + 'results' => [], + 'reason' => 'Empty citation context or reference text', ]; } @@ -63,27 +63,30 @@ class LLMService if (mb_strlen($contextText) > $maxContextLen) { $contextText = mb_substr($contextText, 0, $maxContextLen); } - if (mb_strlen($referText) > 4000) { - $referText = mb_substr($referText, 0, 4000); + if (mb_strlen($localContext) > 3000) { + $localContext = mb_substr($localContext, 0, 3000); } - if (mb_strlen($doiBlock) > 4000) { - $doiBlock = mb_substr($doiBlock, 0, 4000); + if (mb_strlen($referText) > 6000) { + $referText = mb_substr($referText, 0, 6000); + } + if (mb_strlen($doiBlock) > 8000) { + $doiBlock = mb_substr($doiBlock, 0, 8000); } if ($isAgain) { $system = $this->buildReferenceCheckSecondPassPrompt(); - $user = $this->buildReferenceCheckSecondPassUserPrompt($contextText, $referText, $doiBlock); + $user = $this->buildReferenceCheckSecondPassUserPrompt($contextText, $referText, $doiBlock, $citeGroupRefs, $localContext); } else { $system = $this->buildReferenceCheckFirstPassPrompt(); - $user = $this->buildReferenceCheckFirstPassUserPrompt($contextText, $referText); + $user = $this->buildReferenceCheckFirstPassUserPrompt($contextText, $referText, $citeGroupRefs, $localContext, $doiBlock); } - \think\Log::info('ReferenceCheck system head: ' . mb_substr($system, 0, 200)); - \think\Log::info('ReferenceCheck user head: ' . mb_substr($user, 0, 600)); +// \think\Log::info('ReferenceCheck system head: ' . mb_substr($system, 0, 200)); +// \think\Log::info('ReferenceCheck user head: ' . mb_substr($user, 0, 600)); $payload = [ - 'model' => $this->model, + 'model' => $this->model, 'temperature' => 0, - 'messages' => [ + 'messages' => [ ['role' => 'system', 'content' => $system], ['role' => 'user', 'content' => $user], ], @@ -101,23 +104,14 @@ class LLMService return $fallback; } - $canSupport = $this->parseCanSupportFromParsed($parsed); - $confidence = $this->snapReferenceCheckConfidence( - $this->normalizeConfidence(isset($parsed['confidence']) ? $parsed['confidence'] : 0), - $canSupport - ); - $reason = $this->cleanReason((string)(isset($parsed['reason']) ? $parsed['reason'] : '')); - \think\Log::info( - 'ReferenceCheck result: can_support=' . ($canSupport ? '1' : '0') - . ', confidence=' . $confidence - . ', reason=' . $reason - ); - return [ - 'can_support' => $canSupport, - 'is_match' => $canSupport, - 'confidence' => $confidence, - 'reason' => $reason, - ]; + $results = $this->parseReferenceCheckResultsFromParsed($parsed, $citeGroupRefs, $localContext, $doiBlock); + if (empty($results)) { + \think\Log::warning('ReferenceCheck LLM: empty results array'); + return $fallback; + } + + \think\Log::info($results); + return ['results' => $results]; } /** @@ -174,83 +168,541 @@ class LLMService $s = strtolower(trim((string)$value)); return in_array($s, ['1', 'true', 'yes', 'support', 'supported'], true); } + private function bulidReferenceCheckFirstPassPrompt(){ + return <<<'PROMPT' +你是一名护理、医学与科研期刊的资深文献编辑,专门校对「正文引用句」与「对应参考文献条目」是否匹配。 - /** 第一次校对:书目条目 vs 正文全文 */ +你的目标是严格识别错引、张冠李戴、方法不符、对象不符、结论不成立的问题。 + +宁可少判 true,也不要漏掉错引。 + +你只能依据用户提供的内容判断: +1. 正文引用句 +2. 当前对应参考文献条目 + +禁止假设已阅读全文。 +禁止联网。 +禁止脑补文献内容。 +禁止根据学科常识推断研究结果。 + +==================== +【核心任务】 + +判断: + +正文在该引用位置表达的核心观点、结论、方法、数据、定义、模型、研究发现、指南依据等, + +是否能够被该条参考文献合理支撑。 + +你判断的是: + +“引用是否成立” + +不是: + +“正文是否正确”。 + +==================== +【总原则(最高优先级)】 + +采用严格审稿标准: + +边界不清时,一律判 false。 + +宁可误杀(人工复核),不要漏掉错引。 + +同领域 ≠ 匹配。 + +同关键词 ≠ 匹配。 + +相关 ≠ 能支撑。 + +==================== +【强制规则】 + +1. 严禁关键词硬匹配 + +不能因为出现: +患者、护理、治疗、研究、模型、算法、深度学习、机器学习、焦虑、效果 + +等泛化词汇就判定匹配。 + +必须看: + +- 核心对象 +- 研究问题 +- 方法 +- 场景 +- 结局指标 +- 核心论点 + +是否一致。 + +==================== +2. 方法学必须严格一致(极重要) + +若正文明确提到: + +- 算法 +- 模型 +- 聚类方法 +- 深度学习架构 +- 统计方法 +- 数学模型 +- 评价指标 + +必须要求文献与其存在明确关联。 + +例如: + +不匹配: +- fuzzy clustering ≠ deep learning +- CNN ≠ LSTM +- random forest ≠ SVM +- 聚类 ≠ 分类 +- 特征选择 ≠ 分类预测 +- 风险因素分析 ≠ 干预研究 + +仅属于同一“大领域(AI/ML)” +不能判定匹配。 + +若方法体系不同: + +优先判 false + 0.10。 + +==================== +3. 医学护理引用严格一致 + +若正文涉及: + +- 疾病 +- 人群 +- 护理场景 +- 干预措施 +- 结局指标 + +必须基本一致。 + +例如: + +不匹配: +- ICU ≠ 普通病房 +- 老年人 ≠ 儿童 +- 糖尿病 ≠ 高血压 +- 心理护理 ≠ 运动干预 +- 焦虑改善 ≠ 生存率提高 + +==================== +4. 强结论必须强证据 + +正文若出现: + +- 显著改善 +- 明显降低 +- 证实 +- 优于 +- 有效预测 +- 危险因素 +- 因果关系 + +文献必须能合理支撑该强结论。 + +仅“应用研究”“相关研究”“观察研究” +不能自动支持强结论。 + +否则 false。 + +==================== +5. 特定证据类型必须一致 + +正文若明确写: + +- RCT/randomized trial +- Meta-analysis +- Guideline +- Systematic review +- Expert consensus + +而参考文献类型明显不符: + +直接 false。 + +==================== +6. 信息不足从严 + +若参考文献只有: + +作者 + 年份 + +或信息过少, + +无法建立明确关联: + +false + 0.30 + +==================== +【判定逻辑】 + +只有同时满足以下条件,才能 true: + +1. 主题一致 +2. 核心对象一致 +3. 核心论点一致 +4. 方法/研究方向一致 +5. 无明显错引风险 + +任意一点明显不符: + +false。 + +==================== +【评分(只能四选一)】 + +只能输出: + +0.90 +0.75 +0.30 +0.10 + +禁止任何其他分数。 + +评分规则: + +0.90 +明确匹配: +主题、对象、方法、核心论点均明显一致。 + +0.75 +基本匹配: +整体支撑成立,但存在轻微概括或小范围表述差异。 + +0.30 +存疑: +同领域但支撑不足; +信息不足; +需人工复核。 + +0.10 +明确错引: +主题、对象、方法或核心论点明显不符。 + +硬规则: + +is_match=true +只能: +0.75 或 0.90 + +is_match=false +只能: +0.10 或 0.30 + +==================== +【reason 要求】 + +仅说明: + +1. 是否主题一致; +2. 核心论点/方法是否能支撑。 + +禁止模糊措辞: +“可能” +“看起来” +“应该” +“疑似” + +长度: + +20~60字。 + +==================== +【输出要求】 + +仅输出一行 minified JSON。 + +禁止 markdown。 +禁止解释。 +禁止换行。 +禁止任何额外内容。 + +格式: + +{"is_match":true|false,"confidence":0.10|0.30|0.75|0.90,"reason":"简体中文说明"} +PROMPT; + + } + /** 第一次校对:参考文献真实性与支撑力度 */ private function buildReferenceCheckFirstPassPrompt() { - return <<<'PROMPT' -你是文献引用校对助手。判断【正文全文】与【参考文献书目】是否相关、能否用于支撑正文中的引用。 - -【核心原则:从宽判断,避免误杀】 -默认倾向 can_support=true。只要文献与正文不是「风马牛不相及」,即判为相关、能支撑。 -不要求变量一致、不要求结论逐条对应、不要求研究设计相同。 - -【仅当以下情况才判 can_support=false(与正文明显无关)】 -- 学科/主题完全无关(如正文讲深度学习聚类,文献是糖尿病步态检测)。 -- 明显张冠李戴(正文断言 A 疗法的效果,文献研究的是完全不同的 B 问题且无关联)。 -- 文献条目与正文讨论的对象/场景毫无交集,且无法作背景或理论引用。 - -【以下情况均应 can_support=true】 -- 同一大领域或相邻方向(如护理、心理、管理、医学、统计、AI 等相近子领域)。 -- 可作背景文献、综述性引用、理论或方法的一般性依据。 -- 表述略宽、略有概括、变量名不完全一致,但大方向说得通。 - -【confidence 固定档位(禁止其它小数)】 -can_support=true:0.65(有关联但较泛)/ 0.78 / 0.85 / 0.92 / 0.98(非常确定相关) -can_support=false:0.15(明确风马牛不相及)/ 0.25 / 0.35 / 0.45(仅当实在无法建立任何合理关联) - -【输出】仅一行 minified JSON,无 markdown: -{"can_support":true|false,"is_match":true|false,"confidence":0.15|0.25|0.35|0.45|0.65|0.78|0.85|0.92|0.98,"reason":"30-80字简体中文"} -is_match 必须与 can_support 相同。 -PROMPT; + return $this->buildReferenceCheckSupportSystemPrompt(false); } - private function buildReferenceCheckFirstPassUserPrompt($contextText, $referText) + private function buildReferenceCheckSupportSystemPrompt($isSecondPass = false) { - return "【正文全文 article_main.content】\n" . $contextText - . "\n\n【参考文献书目 refer_text】\n" . $referText - . "\n\n请从宽判断:文献与正文非风马牛不相即可判 can_support=true,只返回 JSON。"; + $prompt = <<<'PROMPT' +你是一名护理、医学、生物医学与科研期刊的资深学术编辑,正在执行“参考文献真实性与支撑力度校对”。 + +你的任务不是判断“主题是否相关”,而是判断: +【稿件正文中某段被引用内容】是否真的能被【对应编号的参考文献】直接或充分支撑。 + +你必须严格基于用户提供的材料作出判断,不得凭常识、不得脑补、不得假设参考文献中“可能写过但未提供”的内容。 + +================================================== +【一、任务目标】 +你需要判断: +“正文引用位置的核心论点、结论、背景陈述、机制解释、疗效描述、数据表达或因果表述, +是否能被对应参考文献真实支持。” + +这里的“支持”不是指“文献主题相关”或“研究领域接近”,而是指: +参考文献中确实包含足以支持正文该处表述的内容。 + +================================================== +【二、输出原则:结果必须直接对应数据库行】 + +你输出的结果将直接写入数据库表 t_article_reference_check_result。 + +因此: +## 输出必须是 results 数组,数组中的每一个对象对应数据库中的一行,也就是“一个引用位置中的一条参考文献结果”。 + +换句话说: +- 如果某个引用位置是 [3],则输出 1 条 result(reference_no=3) +- 如果某个引用位置是 [1,2],则输出 2 条 result: + - 一条对应 reference_no=1 + - 一条对应 reference_no=2 + +每条 result 都必须给出该参考文献“单独”对正文引用句的支撑判断。 +如果该引用位置是联合引用(citation group 中有多篇文献),则除了单条判断外,还必须给出该引用组整体的联合判断(combined_* 字段)。 + +================================================== +【三、最重要原则:只看“是否支撑正文核心断言”,不是看“主题是否沾边”】 + +以下情况不能判为强支撑: +1. 参考文献只和主题大致相关,但没有明确支持正文中的关键表述 +2. 正文说的是“疗效提升/死亡率下降/全球高发/耐药/多通路机制”等明确论点,而文献只是在背景里泛泛提到疾病 +3. 正文是多层复合句,文献只支撑其中一小部分 +4. 正文有因果、比较、趋势、机制、疗效强度等强表述,而文献没有明确证据 +5. 文献是基础机制研究,但正文引用它来支撑宏观流行病学、临床治疗现状或指南式结论 +6. 文献可以“推测支持”但不是“直接/明确支持” + +================================================== +【三b、多 claim 复合句 → 0.78 部分支撑(勿误降到 0.45)】 + +正文常为 2~4 个连续 claim 的复合句。须逐 claim 比对后综合给分: + +- 若文献(含 DOI 摘要)能**明确支撑多数关键概念**(如遗传异质性/多基因改变、多 survival pathway 并存、耐药或治疗挑战), + 但**未逐字写出**正文完整因果链(如「异质性→多通路→单靶点疗效下降」), + → 应判 **partial_support**,confidence 通常 **0.78**(边界情况 0.65),**不得**仅因文献主标题聚焦某化合物/干预就降到 0.45。 + +- 0.45 仅用于:文献与 claim 方向明显不符、仅同病沾边、或几乎无可用证据。 + +**校准样例(单条 [4],须接近此逻辑):** + +引用句: +Furthermore, the genomic heterogeneity of colorectal cancer (CRC) presents additional difficulties because tumors frequently make use of several survival pathways at once, which reduces the efficacy of single-target treatments [4]. + +文献4(Sheikhnia et al., thymoquinone CRC 机制综述): +- Claim1 遗传异质性/多基因改变:文献有 APC/KRAS/TP53、MSI/CIN 等 → 支撑较强 +- Claim2 多 survival pathway:文献列举 PI3K/Akt、Wnt、STAT3、NF-κB 等多通路 → 支撑较强 +- Claim3 单靶点疗效下降:文献有 drug resistance/治疗挑战,但未直述因果链 → 部分支撑 +- **输出**:can_support=1, confidence=**0.78**, support_role=supplementary_support(**不是 0.45**) + +用户消息中若提供【DOI 真实文献内容】,**必须结合摘要判断**,不得仅凭书目标题给分。 + +================================================== +【四、评分规则】 + +你必须使用以下 8 个固定分值之一: +0.98 / 0.92 / 0.85 / 0.78 / 0.65 / 0.45 / 0.25 / 0.15 + +判定含义: +- 0.98 / 0.92 / 0.85 => 强支撑(strong_support) +- 0.78 / 0.65 => 部分支撑(partial_support) +- 0.45 / 0.25 => 支撑不足(insufficient_support) +- 0.15 => 不支撑(not_support) + +can_support 取值规则: +- 若该文献/联合引文整体可判为 strong_support 或 partial_support,则 can_support = 1 +- 若判为 insufficient_support 或 not_support,则 can_support = 0 + +================================================== +【五、单条文献结果如何判断】 + +对于每一条参考文献,你必须判断它“单独”能否支撑该引用位置的正文内容,并输出: +- can_support +- confidence +- reason +- support_role + +其中: +### support_role 只能取以下值之一 +- primary_support:该文献本身就是主要证据来源,能支撑引用句核心内容 +- supplementary_support:能支撑部分重要内容,但不是主要来源 +- minimal_support:只提供少量背景或边缘支撑 +- no_meaningful_support:几乎不能支撑该引用句 + +### reason 的写法要求 +必须使用中文,明确写出: +1. 这篇文献具体支撑正文的哪一部分 +2. 哪些部分没有支撑到 +3. 是否存在文献类型与引用用途不匹配的问题 +4. 为什么给这个分值,而不是更高或更低 + +================================================== +【六、联合引用的判断规则】 + +当同一个引用位置包含多篇参考文献时(例如 [1,2] / [4,5,6]),除了逐条给单条结果外,还要额外判断: +“这些文献合起来,是否足以支撑该引用位置的正文内容?” + +联合结论输出到: +- combined_can_support +- combined_confidence +- combined_reason + +规则: +1. 联合评分不是单条评分平均值 +2. 如果其中一篇文献已强支撑,其他文献只是补充,则联合评分可接近主支撑文献 +3. 如果多篇文献分别覆盖不同部分,合起来能较完整支撑正文,则联合评分可以高于某些单条评分 +4. 但如果最关键的核心断言没有被任何文献明确支撑,则联合评分不能虚高 +5. 如果多篇文献都只是零散相关,需要大量推断才能拼出正文结论,则联合评分通常不应过高 + +================================================== +【七、单引文的 combined_* 字段处理规则】 + +即使某个引用位置只有 1 条参考文献,也仍然必须输出 combined_* 字段。 +此时: +- combined_can_support = can_support +- combined_confidence = confidence +- combined_reason = “该引用位置仅包含单条文献,联合结论等同于该文献的单条结论。” 或等价表述 + +这样可以保证输出结构统一,便于数据库写入。 + +================================================== +【八、输出 JSON 结构】 + +你必须输出合法 JSON,且只能输出以下结构: + +{ + "results": [ + { + "reference_no": 1, + "cite_group_refs": "1,2", + "can_support": 0, + "confidence": 0.65, + "reason": "中文,单条文献结论", + "support_role": "supplementary_support", + "combined_can_support": 1, + "combined_confidence": 0.85, + "combined_reason": "中文,联合引用整体结论" + } + ] +} + +================================================== +【九、字段约束】 + +### 1)results 中每个对象都必须包含以下字段: +- reference_no +- cite_group_refs +- can_support +- confidence +- reason +- support_role +- combined_can_support +- combined_confidence +- combined_reason + +### 2)reference_no +必须对应当前引用位置中的某一条参考文献编号。 + +### 3)cite_group_refs +必须是该引用位置的完整引文组,格式如: +- "3" +- "1,2" +- "4,5,6" + +### 4)同一引用位置若包含多条参考文献,则必须输出多条 result +例如 cite_group_refs = "1,2" 时,必须输出: +- 一条 reference_no=1 +- 一条 reference_no=2 + +### 5)同一引用位置下的 combined_* 必须一致 +例如同属 "1,2" 的两条 result,它们的: +- combined_can_support +- combined_confidence +- combined_reason +必须完全一致。 + +================================================== +【十、禁止事项】 +你绝对不能: +- 杜撰文献中不存在的结论 +- 把“主题相关”当作“内容支撑” +- 因为是同一疾病就默认支持 +- 输出 JSON 以外的任何内容 + +现在开始,读取用户提供的引用位置正文、参考文献信息和文献内容,输出结果。 +PROMPT; + + if ($isSecondPass) { + $prompt .= <<<'PROMPT' + + +================================================== +【二次校对补充(DOI 真实文献内容)】 +用户消息中会提供【DOI 真实文献内容(PubMed/Crossref)】。 +必须以 DOI 真实内容为准复核支撑力度;书目信息与 DOI 冲突时以 DOI 为准。 +仍须输出完整 results 数组,逐条给出单文献判断与联合判断。 +PROMPT; + } + + return $prompt; } - /** 第二次校对:Crossref 摘要(Refer_doi) */ + private function buildReferenceCheckFirstPassUserPrompt($contextText, $referText, $citeGroupRefs = '', $localContext = '', $doiBlock = '') + { + return $this->buildReferenceCheckSupportUserPrompt($contextText, $referText, $citeGroupRefs, $localContext, $doiBlock); + } + + private function buildReferenceCheckSupportUserPrompt($contextText, $referText, $citeGroupRefs, $localContext, $doiBlock) + { + $citeGroupRefs = trim((string)$citeGroupRefs); + $localContext = trim((string)$localContext); + $doiBlock = trim((string)$doiBlock); + + $parts = [ + "【正文节 t_article_main】\n" . $contextText, + ]; + if ($citeGroupRefs !== '') { + $mode = strpos($citeGroupRefs, ',') !== false ? '联合引用' : '单独引用'; + $parts[] = "【引用文献组 cite_group_refs】{$citeGroupRefs}({$mode})"; + } + if ($localContext !== '') { + $parts[] = "【本引用位置附近上下文】\n" . $localContext; + } + $parts[] = "【参考文献书目(按编号列出)】\n" . $referText; + if ($doiBlock !== '') { + $parts[] = "【DOI 真实文献内容(PubMed/Crossref,一轮校对已提供)】\n" . $doiBlock; + } + $parts[] = '请严格按 system 要求输出 results 数组 JSON,每条 result 对应一个 reference_no,并包含 combined_* 字段。'; + + return implode("\n\n", $parts); + } + + /** 第二次校对:DOI 真实文献内容复核 */ private function buildReferenceCheckSecondPassPrompt() { - return <<<'PROMPT' -你是文献引用二次校对助手。已根据 Refer_doi 从 Crossref(https://api.crossref.org/works/)获取摘要,请结合【正文全文】复核该文献是否相关。 - -【核心原则:与第一次相同,从宽判断】 -默认倾向 can_support=true。只要 Crossref 摘要(或书目)与正文不是风马牛不相及,即判相关、能支撑。 -以【Crossref 摘要】为准;摘要与书目冲突时以摘要为准。 - -【仅当以下情况才判 can_support=false】 -- 摘要显示的研究主题/对象/方法与正文讨论内容完全风马牛不相及。 -- 典型风马牛不相及、张冠李戴,且无法解释为背景或泛化引用。 - -【以下情况均应 can_support=true】 -- 摘要与正文属同领域或相近方向,能作背景、理论或方向性支撑。 -- 细节不完全一致,但不存在明显矛盾。 - -【无 Crossref 摘要时】 -结合 refer_text 从宽判断;非明显无关仍可 can_support=true,confidence 建议 0.65。 - -【confidence 固定档位(禁止其它小数)】 -can_support=true:0.65 / 0.78 / 0.85 / 0.92 / 0.98 -can_support=false:0.15 / 0.25 / 0.35 / 0.45 - -【输出】仅一行 minified JSON: -{"can_support":true|false,"is_match":true|false,"confidence":0.15|0.25|0.35|0.45|0.65|0.78|0.85|0.92|0.98,"reason":"30-80字简体中文"} -is_match 必须与 can_support 相同。 -PROMPT; + return $this->buildReferenceCheckSupportSystemPrompt(true); } - private function buildReferenceCheckSecondPassUserPrompt($contextText, $referText, $doiBlock) + private function buildReferenceCheckSecondPassUserPrompt($contextText, $referText, $doiBlock, $citeGroupRefs = '', $localContext = '') { - $doiBlock = trim((string)$doiBlock); - return "【正文全文 article_main.content】\n" . $contextText - . "\n\n【参考文献书目 refer_text】\n" . $referText - . "\n\n【Crossref 摘要】(Refer_doi → api.crossref.org/works/)\n" - . ($doiBlock !== '' ? $doiBlock : '(未获取到摘要,请结合 refer_text 从宽判断)') - . "\n\n文献与正文非风马牛不相即可判 can_support=true,只返回 JSON。"; + return $this->buildReferenceCheckSupportUserPrompt( + $contextText, + $referText, + $citeGroupRefs, + $localContext, + $doiBlock !== '' ? $doiBlock : '(未获取到 DOI 摘要或元数据,请结合书目条目从严判断)' + ); } private function buildReferenceCheckSystemPrompt3() { @@ -1169,13 +1621,174 @@ PROMPT; private function buildReferenceCheckRecheckUserPrompt($contextText, $referText, $doiBlock) { - return $this->buildReferenceCheckSecondPassUserPrompt($contextText, $referText, $doiBlock); + return $this->buildReferenceCheckSecondPassUserPrompt($contextText, $referText, $doiBlock, '', ''); } /** - * 与 buildReferenceCheckSystemPrompt3 一致的 confidence 档位 + * @return array */ - private function getReferenceCheckConfidenceBands($isMatch) + private function parseReferenceCheckResultsFromParsed(array $parsed, $defaultCiteGroupRefs = '', $localContext = '', $doiBlock = '') + { + $rows = []; + if (isset($parsed['results']) && is_array($parsed['results'])) { + $rows = $parsed['results']; + } elseif (isset($parsed['reference_no']) || isset($parsed['confidence'])) { + $rows = [$parsed]; + } + + $normalized = []; + foreach ($rows as $item) { + if (!is_array($item)) { + continue; + } + $refNo = intval(isset($item['reference_no']) ? $item['reference_no'] : 0); + if ($refNo <= 0) { + continue; + } + + $confidence = $this->snapReferenceCheckConfidenceValue( + $this->normalizeConfidence(isset($item['confidence']) ? $item['confidence'] : 0) + ); + $canSupport = $this->canSupportFromConfidence($confidence); + if (array_key_exists('can_support', $item)) { + $canSupport = $this->boolFromLlmValue($item['can_support']); + } elseif (array_key_exists('is_match', $item)) { + $canSupport = $this->boolFromLlmValue($item['is_match']); + } + + $reason = $this->cleanReason((string)(isset($item['reason']) ? $item['reason'] : '')); + $supportRole = $this->normalizeSupportRole(isset($item['support_role']) ? $item['support_role'] : ''); + list($confidence, $canSupport, $supportRole) = $this->applyMultiClaimPartialSupportFloor( + $localContext, + $doiBlock, + $confidence, + $canSupport, + $supportRole, + $reason + ); + + $combinedConfidence = $this->snapReferenceCheckConfidenceValue( + $this->normalizeConfidence(isset($item['combined_confidence']) ? $item['combined_confidence'] : $confidence) + ); + $combinedCanSupport = $this->canSupportFromConfidence($combinedConfidence); + if (array_key_exists('combined_can_support', $item)) { + $combinedCanSupport = $this->boolFromLlmValue($item['combined_can_support']); + } + + $citeGroupRefs = trim((string)(isset($item['cite_group_refs']) ? $item['cite_group_refs'] : $defaultCiteGroupRefs)); + if ($citeGroupRefs === '' && $defaultCiteGroupRefs !== '') { + $citeGroupRefs = trim((string)$defaultCiteGroupRefs); + } + + $normalized[] = [ + 'reference_no' => $refNo, + 'cite_group_refs' => $citeGroupRefs, + 'can_support' => $canSupport, + 'is_match' => $canSupport, + 'confidence' => $confidence, + 'reason' => $reason, + 'support_role' => $supportRole, + 'combined_can_support' => $combinedCanSupport, + 'combined_confidence' => $combinedConfidence, + 'combined_reason' => $this->cleanReason((string)(isset($item['combined_reason']) ? $item['combined_reason'] : '')), + ]; + } + + return $normalized; + } + + private function normalizeSupportRole($role) + { + $role = strtolower(trim((string)$role)); + $allowed = [ + 'primary_support', + 'supplementary_support', + 'minimal_support', + 'no_meaningful_support', + ]; + return in_array($role, $allowed, true) ? $role : 'no_meaningful_support'; + } + + private function canSupportFromConfidence($confidence) + { + return floatval($confidence) >= 0.65 - 0.001; + } + + /** + * 多通路/异质性 claim + DOI 有多通路证据时,防止误打 0.45(应对齐 0.78 部分支撑) + */ + private function applyMultiClaimPartialSupportFloor($localContext, $doiBlock, $confidence, $canSupport, $supportRole, $reason) + { + $confidence = floatval($confidence); + if ($confidence > 0.45) { + return [$confidence, $canSupport, $supportRole]; + } + + $claimText = trim((string)$localContext); + if ($claimText === '') { + return [$confidence, $canSupport, $supportRole]; + } + + $claimIsMechanism = (bool)preg_match( + '/\b(genomic heterogeneity|heterogeneity|survival pathway|pathways at once|single-target|multi.?pathway|genetic alteration|drug resistance|异质性|生存通路|多.*通路|单靶点|耐药)\b/ui', + $claimText + ); + if (!$claimIsMechanism) { + return [$confidence, $canSupport, $supportRole]; + } + + $corpus = trim((string)$doiBlock) . ' ' . trim((string)$reason); + if ($corpus === '') { + return [$confidence, $canSupport, $supportRole]; + } + + $refHasPathwayEvidence = (bool)preg_match( + '/\b(pathway|PI3K|Akt|mTOR|Wnt|STAT3|NF-κB|NF-kB|genetic alteration|MSI|CIN|drug resistance|signaling|multiple|APC|KRAS|TP53|通路|耐药|信号)\b/ui', + $corpus + ); + if (!$refHasPathwayEvidence) { + return [$confidence, $canSupport, $supportRole]; + } + + $confidence = 0.78; + $canSupport = true; + if ($supportRole === 'no_meaningful_support' || $supportRole === 'minimal_support') { + $supportRole = 'supplementary_support'; + } + + return [$confidence, $canSupport, $supportRole]; + } + + private function getReferenceCheckConfidenceBands() + { + return [0.15, 0.25, 0.45, 0.65, 0.78, 0.85, 0.92, 0.98]; + } + + private function snapReferenceCheckConfidenceValue($confidence) + { + $bands = $this->getReferenceCheckConfidenceBands(); + foreach ($bands as $band) { + if (abs($confidence - $band) < 0.001) { + return $band; + } + } + $nearest = $bands[0]; + $minDiff = abs($confidence - $nearest); + foreach ($bands as $band) { + $diff = abs($confidence - $band); + if ($diff < $minDiff) { + $minDiff = $diff; + $nearest = $band; + } + } + + return $nearest; + } + + /** + * @deprecated 兼容旧逻辑 + */ + private function getReferenceCheckConfidenceBandsLegacy($isMatch) { return $isMatch ? [0.65, 0.78, 0.85, 0.92, 0.98] @@ -1183,22 +1796,24 @@ PROMPT; } /** - * 将模型输出的 confidence 吸附到合法档位(如 0.95 → 0.92,0.75 → 0.78) + * 将模型输出的 confidence 吸附到合法档位 */ private function snapReferenceCheckConfidence($confidence, $isMatch) { - $bands = $this->getReferenceCheckConfidenceBands($isMatch); - + $snapped = $this->snapReferenceCheckConfidenceValue($confidence); + $bands = $this->getReferenceCheckConfidenceBandsLegacy($isMatch); + if (in_array($snapped, $bands, true)) { + return $snapped; + } foreach ($bands as $band) { - if (abs($confidence - $band) < 0.001) { + if (abs($snapped - $band) < 0.001) { return $band; } } - $nearest = $bands[0]; - $minDiff = abs($confidence - $nearest); + $minDiff = abs($snapped - $nearest); foreach ($bands as $band) { - $diff = abs($confidence - $band); + $diff = abs($snapped - $band); if ($diff < $minDiff) { $minDiff = $diff; $nearest = $band; diff --git a/application/common/service/ReferenceRelevanceLlmService.php b/application/common/service/ReferenceRelevanceLlmService.php index 90e1fdef..f37fb7d1 100644 --- a/application/common/service/ReferenceRelevanceLlmService.php +++ b/application/common/service/ReferenceRelevanceLlmService.php @@ -138,12 +138,18 @@ class ReferenceRelevanceLlmService - **「覆盖部分结局」不足以进入 0.78**:原句点名了多条通路 + 多个结局,文献仅命中其中 1~2 个结局(如仅凋亡/增殖),且**点名通路在本文结果中全部缺失(仅讨论转引)**或主语层级不对 → 单条 **限 0.45(weakly_related / minimal_relevance)**,不得给 0.65~0.78 - 仅同领域沾边 1–2 项、主语或机制层级不对 → **0.45** - **进入 0.65~0.78 的前提**:主语对齐(X 单体)+ 本文自身结果命中原句点名通路/结局的多数项;几乎全部明确对应 → **0.85+** +11. **文献「主题粒度」必须匹配 claim「主题粒度」**:引用处为**疾病总论型 claim**(流行病学负担、标准/多模态治疗现状与局限、基因组异质性、单靶点治疗受限、亟需新策略等总体背景)时: + - 最适合的来源是**疾病总体综述 / 分子病理综述 / 精准肿瘤学 / 耐药综述**;此类文献正面、系统地为该总论 claim 提供依据 → 可 **0.85+** + - **单一药物 / 单一成分 / 单一通路的专题综述**(如「某化合物抗某癌:A review」),即使同病、同大方向,也只是专题视角、并非为该总论 claim 做系统总结 → 通常 **partially_related(0.72~0.78)**,**不得给 0.85+** + - **单基因 / 单通路的机制原始研究**对纯流行病学负担 claim → 仍按规则 3 给 **0.45** + - 判断要点:文献类型是否「为该总论 claim 本身做系统综述/总论」;仅同病同方向、或只支撑整段中某一两句(如「需要更安全的新策略」),不足以进入 highly_related ================================================== 【一、必须先拆解 claim】 从【本引用位置附近上下文】中提炼最小主张单元(Claim A, Claim B…),**不要**把整句笼统归为「大概讲抗癌」。例如: - **主语/研究对象**(化合物单体 vs 植物提取物 vs 其他物种;是否「X has been demonstrated」) - **证据语气与层级**(demonstrated / mechanistically vs predict / suggest;本文结果 vs 讨论转引) +- **claim 主题粒度**:是否为疾病总论型(流行病学负担 / 治疗现状与局限 / 基因组异质性 / 单靶点受限 / 亟需新策略);若是,要求「总体综述 / 分子病理 / 精准肿瘤学 / 耐药综述」类来源,单一药物专题综述只算 partially_related - 疾病流行病学(高发、死亡率) - **点名通路/分子机制**(PI3K/AKT、MAPK、NF-κB 等,须逐项) - **点名功能结局**(抑制增殖、凋亡、血管生成、炎症信号等,须逐项) diff --git a/application/extra/rabbitmq.php b/application/extra/rabbitmq.php index 05aa89b4..4b41a649 100644 --- a/application/extra/rabbitmq.php +++ b/application/extra/rabbitmq.php @@ -13,4 +13,11 @@ return [ 'dlq' => 'ref_check.article.dlq', 'route_key' => 'article.start', ], + + 'reference_relevance' => [ + 'exchange' => 'reference_relevance', + 'queue' => 'ref_relevance.article', + 'dlq' => 'ref_relevance.article.dlq', + 'route_key' => 'article.start', + ], ];