From 4aab7f5b7e5de33ae77a86f7b9ce9c0938ee45b4 Mon Sep 17 00:00:00 2001 From: wyn <1074145239@qq.com> Date: Thu, 21 May 2026 10:02:05 +0800 Subject: [PATCH 01/12] =?UTF-8?q?=E6=96=87=E7=AB=A0=E5=BC=95=E7=94=A8?= =?UTF-8?q?=E6=96=87=E7=8C=AE=E6=A0=A1=E9=AA=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- application/api/controller/Article.php | 414 ++++++++++ application/api/job/ReferenceCheck.php | 135 ++++ application/common/ReferenceCheckService.php | 751 +++++++++++++++++++ application/common/service/LLMService.php | 490 ++++++++++++ 4 files changed, 1790 insertions(+) create mode 100644 application/api/job/ReferenceCheck.php create mode 100644 application/common/ReferenceCheckService.php create mode 100644 application/common/service/LLMService.php diff --git a/application/api/controller/Article.php b/application/api/controller/Article.php index e47a0473..e346c264 100644 --- a/application/api/controller/Article.php +++ b/application/api/controller/Article.php @@ -10,6 +10,7 @@ use PhpOffice\PhpWord\IOFactory; use app\common\OpenAi; use app\common\CrossrefService; use app\common\PubmedService; +use app\common\ReferenceCheckService; /** * @title 文章接口 @@ -6391,4 +6392,417 @@ class Article extends Base Db::commit(); return json_encode(['status' => 1,'msg' => 'success']); } + /** + * 调试:预览 article_main 中提取的 blue 引用(不入队) + * POST: article_id + */ + public function citationReview() + { + $articleId = 7821;//intval($this->request->post('article_id', 0)); + if ($articleId <= 0) { + return jsonError('article_id is required'); + } + + $svc = new ReferenceCheckService(); + $mains = Db::name('article_main') + ->field('am_id,content') + ->where('article_id', $articleId) + ->where('am_id', 127448) + //->whereIn('state', [0, 2]) + ->order('sort asc') + ->select(); + + $preview = []; + foreach ($mains as $item) { + $preview[] = [ + 'am_id' => $item['am_id'], + 'citations' => $svc->extractReferences((string)$item['content']), + ]; + break; + } + return jsonSuccess(['article_id' => $articleId, 'sections' => $preview]); + } + /** + * 提取文献引用 + * + * @param string $content 原始内容 + * @return array + */ + function extractReferences($content) + { + $result = []; + + // 匹配 [57][74-79][72, 45] + preg_match_all( + '/\[([\d,\-\s]+)\]<\/blue>/', + $content, + $matches, + PREG_OFFSET_CAPTURE + ); + + if (empty($matches[0])) { + return []; + } + + foreach ($matches[0] as $index => $match) { + + // 完整标签 + $fullTag = $match[0]; + + // 标签开始位置 + $tagStart = $match[1]; + + // 标签结束位置 + $tagEnd = $tagStart + strlen($fullTag); + + // 文献号原始字符串 + $rawRef = trim($matches[1][$index][0]); + + // 展开文献号 + $referenceNumbers = $this->expandReferenceNumbers($rawRef); + + /** + * 获取原文内容 + * 这里按句号切分: + * 找当前引用所在句子的开始和结束位置 + */ + $sentenceStart = $this->findSentenceStart($content, $tagStart); + $sentenceEnd = $this->findSentenceEnd($content, $tagEnd); + + $originalText = mb_substr( + $content, + $sentenceStart, + $sentenceEnd - $sentenceStart + ); + + // 去掉 blue 标签 + $originalText = preg_replace( + '/\[[\d,\-\s]+\]<\/blue>/', + '', + $originalText + ); + + $originalText = trim($originalText); + + $result[] = [ + 'reference_raw' => $rawRef, + 'reference_numbers' => $referenceNumbers, + 'original_text' => $originalText, + + // blue标签在整段中的位置 + 'reference_start' => $tagStart, + 'reference_end' => $tagEnd, + + // 原文位置 + 'text_start' => $sentenceStart, + 'text_end' => $sentenceEnd, + ]; + } + + return $result; + } + + /** + * 展开文献号 + * 11-15 => [11,12,13,14,15] + * 72,45 => [72,45] + * 74-79,81 => [74,75,76,77,78,79,81] + */ + function expandReferenceNumbers($refStr) + { + $numbers = []; + + $parts = explode(',', $refStr); + + foreach ($parts as $part) { + + $part = trim($part); + + // 范围 + if (strpos($part, '-') !== false) { + + list($start, $end) = explode('-', $part); + + $start = intval(trim($start)); + $end = intval(trim($end)); + + if ($start <= $end) { + $numbers = array_merge( + $numbers, + range($start, $end) + ); + } + + } else { + + // 单个数字 + if (is_numeric($part)) { + $numbers[] = intval($part); + } + } + } + + return array_values(array_unique($numbers)); + } + + /** + * 查找句子开始位置 + */ + function findSentenceStart($content, $position) + { + $delimiters = ['.', '。', '!', '?', "\n"]; + + $start = 0; + + foreach ($delimiters as $delimiter) { + + $pos = strrpos( + substr($content, 0, $position), + $delimiter + ); + + if ($pos !== false) { + $start = max($start, $pos + 1); + } + } + + return $start; + } + + /** + * 查找句子结束位置 + */ + function findSentenceEnd($content, $position) + { + $length = strlen($content); + + $endPositions = []; + + foreach (['.', '。', '!', '?', "\n"] as $delimiter) { + + $pos = strpos($content, $delimiter, $position); + + if ($pos !== false) { + $endPositions[] = $pos + 1; + } + } + + return empty($endPositions) + ? $length + : min($endPositions); + } + + /** + * 引用相关性:提交单条到队列(异步调用 promotion 同款本地大模型) + * POST: content_a(必填), content_b(可选), article_id, reference_no(n=index+1), am_id + */ + public function referenceCheckEnqueue() + { + $data = $this->request->post(); + $contentA = trim((string)(isset($data['content_a']) ? $data['content_a'] : '')); + $contentB = trim((string)(isset($data['content_b']) ? $data['content_b'] : '')); + $articleId = intval(isset($data['article_id']) ? $data['article_id'] : 0); + $referenceNo = intval(isset($data['reference_no']) ? $data['reference_no'] : 0); + + if ($contentA === '') { + return jsonError('content_a is required'); + } + + try { + $svc = new ReferenceCheckService(); + $extra = [ + 'reference_no' => $referenceNo, + 'article_id' => $articleId, + 'am_id' => intval(isset($data['am_id']) ? $data['am_id'] : 0), + ]; + + if ($contentB === '' && $articleId > 0 && $referenceNo > 0) { + $prod = Db::name('production_article') + ->where('article_id', $articleId) + ->where('state', 0) + ->find(); + if ($prod) { + $referMap = $svc->loadReferMapByPArticleId(intval($prod['p_article_id'])); + $referIndex = $referenceNo - 1; + if (isset($referMap[$referIndex])) { + $refer = $referMap[$referIndex]; + $contentB = $svc->formatReferForLlm($refer); + $extra['p_article_id'] = intval($prod['p_article_id']); + $extra['p_refer_id'] = intval($refer['p_refer_id']); + $extra['refer_index'] = $referIndex; + } + } + } + + $result = $svc->enqueue($contentA, $contentB, $extra); + return jsonSuccess($result); + } catch (\Exception $e) { + return jsonError($e->getMessage()); + } + } + + public function referenceCheckEnqueueArticleMain(){ + $data = $this->request->post(); + $articleId = intval(isset($data['article_id']) ? $data['article_id'] : 0); + if ($articleId <= 0) { + return jsonError('article_id is required'); + } + $mainsList = Db::name('article_main') + ->field('am_id,content,article_id') + ->where('article_id', $articleId) + ->whereIn('state', [0, 2]) + ->order('sort asc') + ->select(); + + $svc = new ReferenceCheckService(); + foreach ($mainsList as $mainInfo ){ + $svc->enqueueByArticleMain($mainInfo); + } + } + /** + * 按文章批量入队:从 article_main 提取 blue 引用与文献号 + * POST: article_id, clear_previous=1(默认清空该文旧明细后重检) + */ + public function referenceCheckEnqueueArticle() + { + $data = $this->request->post(); + $articleId = intval(isset($data['article_id']) ? $data['article_id'] : 0); + if ($articleId <= 0) { + return jsonError('article_id is required'); + } + + try { + $svc = new ReferenceCheckService(); + $clear = !isset($data['clear_previous']) || intval($data['clear_previous']) === 1; + $result = $svc->enqueueByArticle($articleId, $clear); + return jsonSuccess($result); + } catch (\Exception $e) { + return jsonError($e->getMessage()); + } + } + + /** + * 查询单条引用相关性检测结果 + * GET/POST: check_id + */ + public function referenceCheckResult() + { + $checkId = intval($this->request->param('check_id', 0)); + if ($checkId <= 0) { + return jsonError('check_id is required'); + } + + $row = (new ReferenceCheckService())->getResult($checkId); + if (!$row) { + return jsonError('result not found'); + } + + return jsonSuccess($this->formatReferenceCheckRow($row)); + } + + /** + * 稿件预览:带不合理引用标记的 content(序号 + 引用句) + * GET/POST: article_id, am_id(可选,只预览某一节) + */ + public function referenceCheckPreview() + { + $articleId = intval($this->request->param('article_id', 0)); + if ($articleId <= 0) { + return jsonError('article_id is required'); + } + $amId = intval($this->request->param('am_id', 0)); + + try { + $data = (new ReferenceCheckService())->buildArticlePreview($articleId, $amId); + $data['markup_hint'] = [ + 'ref_no' => '.ref-no-error — 不合理的文献序号(如 70-73 中单独的 70)', + 'ref_cite' => '.ref-cite-tag.ref-cite-error — 含不合理序号的 blue 引用块', + 'ref_context'=> '.ref-context-error — 不合理的引用句/上下文', + ]; + $data['preview_css'] = '.ref-no-error{color:#c00;font-weight:bold;border-bottom:2px wavy #c00}' + . '.ref-cite-tag.ref-cite-error{background:#ffecec}' + . '.ref-context-error{background:#fff3cd;outline:1px dashed #e6a700}'; + return jsonSuccess($data); + } catch (\Exception $e) { + return jsonError($e->getMessage()); + } + } + + /** + * 按文章列出引用校对结果([70-73] 为 4 条,reference_no 分别为 70,71,72,73) + * GET/POST: article_id, status(可选), only_mismatch=1 仅不合理 + */ + public function referenceCheckList() + { + $articleId = intval($this->request->param('article_id', 0)); + if ($articleId <= 0) { + return jsonError('article_id is required'); + } + + $status = $this->request->param('status', ''); + $statusFilter = ($status === '' || $status === null) ? -1 : intval($status); + $onlyMismatch = intval($this->request->param('only_mismatch', 0)) === 1; + $rows = (new ReferenceCheckService())->listByArticle($articleId, $statusFilter, $onlyMismatch); + + $list = []; + foreach ($rows as $row) { + $list[] = $this->formatReferenceCheckRow($row); + } + + $mains = Db::name('article_main') + ->field('am_id,ref_check_status,sort') + ->where('article_id', $articleId) + ->whereIn('state', [0, 2]) + ->order('sort asc') + ->select(); + $sections = []; + foreach ($mains as $m) { + $st = intval(isset($m['ref_check_status']) ? $m['ref_check_status'] : 0); + $sections[] = [ + 'am_id' => intval($m['am_id']), + 'ref_check_status' => $st, + 'ref_check_pass' => $st === ReferenceCheckService::AM_STATUS_PASS, + 'ref_check_label' => ReferenceCheckService::amStatusLabel($st), + ]; + } + + return jsonSuccess([ + 'article_id' => $articleId, + 'total' => count($list), + 'list' => $list, + 'sections' => $sections, + ]); + } + + private function formatReferenceCheckRow($row) + { + $statusMap = array(0 => 'pending', 1 => 'done', 2 => 'failed'); + $amId = intval(isset($row['am_id']) ? $row['am_id'] : 0); + $citeStart = intval(isset($row['cite_tag_start']) ? $row['cite_tag_start'] : 0); + $rowStatus = intval($row['status']); + return array( + 'check_id' => intval($row['check_id']), + 'article_id' => intval(isset($row['article_id']) ? $row['article_id'] : 0), + 'am_id' => $amId, + 'cite_group_key' => $amId . '_' . $citeStart, + 'p_refer_id' => intval(isset($row['p_refer_id']) ? $row['p_refer_id'] : 0), + 'refer_index' => intval(isset($row['refer_index']) ? $row['refer_index'] : 0), + 'reference_no' => intval(isset($row['reference_no']) ? $row['reference_no'] : 0), + 'reference_raw' => isset($row['reference_raw']) ? $row['reference_raw'] : '', + 'cite_tag_start' => $citeStart, + 'cite_tag_end' => intval(isset($row['cite_tag_end']) ? $row['cite_tag_end'] : 0), + 'text_start' => intval(isset($row['text_start']) ? $row['text_start'] : 0), + 'text_end' => intval(isset($row['text_end']) ? $row['text_end'] : 0), + 'status' => isset($statusMap[$rowStatus]) ? $statusMap[$rowStatus] : 'unknown', + 'is_match' => intval($row['is_match']), + 'is_reasonable' => intval($row['is_match']) === 1, + 'confidence' => floatval($row['confidence']), + 'reason' => isset($row['reason']) ? $row['reason'] : '', + 'error_msg' => isset($row['error_msg']) ? $row['error_msg'] : '', + 'content_a' => isset($row['content_a']) ? $row['content_a'] : '', + 'content_b' => isset($row['content_b']) ? $row['content_b'] : '', + 'updated_at' => isset($row['updated_at']) ? $row['updated_at'] : '', + ); + } + } diff --git a/application/api/job/ReferenceCheck.php b/application/api/job/ReferenceCheck.php new file mode 100644 index 00000000..5058bdc1 --- /dev/null +++ b/application/api/job/ReferenceCheck.php @@ -0,0 +1,135 @@ +oQueueJob = new QueueJob(); + $this->QueueRedis = QueueRedis::getInstance(); + } + + public function fire(Job $job, $data) + { + $this->oQueueJob->init($job); + + $rawBody = empty($job->getRawBody()) ? '' : $job->getRawBody(); + $jobData = empty($rawBody) ? [] : json_decode($rawBody, true); + $jobId = empty($jobData['id']) ? 'unknown' : $jobData['id']; + + $sRedisKey = ''; + $sRedisValue = ''; + + $this->oQueueJob->log("-----------队列任务开始-----------"); + $this->oQueueJob->log("当前任务ID: {$jobId}, 尝试次数: {$job->attempts()}"); + + try { + $checkId = intval(isset($data['check_id']) ? $data['check_id'] : 0); + $sClassName = get_class($this); + $sRedisKey = "queue_job:{$sClassName}:{$checkId}"; + $sRedisValue = uniqid() . '_' . getmypid(); + + if (!$this->oQueueJob->acquireLock($sRedisKey, $sRedisValue, $job)) { + return; + } + + if ($checkId <= 0) { + $job->delete(); + return; + } + + $row = Db::name('article_reference_check_result')->where('id', $checkId)->find(); + if (empty($row)) { + $job->delete(); + return; + } + + if (intval($row['status']) === 1) { + $job->delete(); + return; + } + + try { + $contentA = trim((string)(isset($row['origin_text']) ? $row['origin_text'] : '')); + $contentB = trim((string)(isset($row['refer_text']) ? $row['refer_text'] : '')); + + if ($contentB === '' && intval($row['p_refer_id']) > 0) { + $refer = Db::name('production_article_refer') + ->where('p_refer_id', intval($row['p_refer_id'])) + ->where('status', 0) + ->find(); + if ($refer) { + $contentB = (new ReferenceCheckService())->formatReferForLlm($refer); + } + } + + if ($contentA === '' || $contentB === '') { + $this->markFailed($checkId, 'Missing content_a or reference text'); + $job->delete(); + return; + } + + $llm = new LLMService(); + $llmResult = $llm->checkReference($contentA, $contentB); + + Db::name('article_reference_check_result')->where('id', $checkId)->update([ + 'is_match' => !empty($llmResult['is_match']) ? 1 : 0, + 'confidence' => $llmResult['confidence'], + 'reason' => $llmResult['reason'], + 'status' => 1, + 'error_msg' => '', + 'updated_at' => date('Y-m-d H:i:s'), + ]); + + $amId = intval(isset($row['am_id']) ? $row['am_id'] : 0); + if ($amId > 0) { + (new ReferenceCheckService())->syncAmRefCheckStatus($amId); + } + $this->QueueRedis->finishJob($sRedisKey, 'completed', $this->completedExprie, $sRedisValue); + $job->delete(); + $this->oQueueJob->log("任务执行成功 | 日志ID: {$sRedisKey}"); + } catch (\Exception $e) { + var_dump($e->getMessage()); + if ($job->attempts() >= 3) { + $this->markFailed($checkId, $e->getMessage()); + $job->delete(); + return; + } + $job->release(30); + } + } catch (\RuntimeException $e) { + $this->oQueueJob->handleRetryableException($e, $sRedisKey, $sRedisValue, $job); + } catch (\LogicException $e) { + $this->oQueueJob->handleNonRetryableException($e, $sRedisKey, $sRedisValue, $job); + } catch (\Exception $e) { + $this->oQueueJob->handleRetryableException($e, $sRedisKey, $sRedisValue, $job); + } finally { + $this->oQueueJob->finnal(); + } + } + + private function markFailed($checkId, $msg) + { + $row = Db::name('article_reference_check_result')->where('id', $checkId)->find(); + Db::name('article_reference_check_result')->where('id', $checkId)->update([ + 'status' => 2, + 'error_msg' => mb_substr($msg, 0, 500), + 'updated_at' => date('Y-m-d H:i:s'), + ]); + $amId = empty($row) ? 0 : intval(isset($row['am_id']) ? $row['am_id'] : 0); + if ($amId > 0) { + (new ReferenceCheckService())->syncAmRefCheckStatus($amId); + } + } +} diff --git a/application/common/ReferenceCheckService.php b/application/common/ReferenceCheckService.php new file mode 100644 index 00000000..f913e8e1 --- /dev/null +++ b/application/common/ReferenceCheckService.php @@ -0,0 +1,751 @@ +insertGetId([ + 'article_id' => intval($this->arrGet($extra, 'article_id', 0)), + 'am_id' => intval($this->arrGet($extra, 'am_id', 0)), + 'p_article_id' => intval($this->arrGet($extra, 'p_article_id', 0)), + 'p_refer_id' => intval($this->arrGet($extra, 'p_refer_id', 0)), + 'refer_index' => intval($this->arrGet($extra, 'refer_index', 0)), + 'reference_no' => intval($this->arrGet($extra, 'reference_no', 0)), + 'reference_raw' => (string)$this->arrGet($extra, 'reference_raw', ''), + 'cite_tag_start' => intval($this->arrGet($extra, 'cite_tag_start', 0)), + 'cite_tag_end' => intval($this->arrGet($extra, 'cite_tag_end', 0)), + 'text_start' => intval($this->arrGet($extra, 'text_start', 0)), + 'text_end' => intval($this->arrGet($extra, 'text_end', 0)), + 'content_a' => $contentA, + 'content_b' => trim($contentB), + 'status' => 0, + 'created_at' => $now, + 'updated_at' => $now, + ]); + + $amId = intval($this->arrGet($extra, 'am_id', 0)); + if ($amId > 0) { + $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING); + } + + $this->pushJob(intval($checkId), intval($this->arrGet($extra, 'queue_delay', 0))); + + return ['check_id' => $checkId, 'queued' => 1]; + } + public function enqueueByArticleMain($main){ + $amId = $main['am_id']; +// $main = Db::name('article_main') +// ->field('am_id,content,article_id') +// ->where('am_id', $amId) +// ->whereIn('state', [0, 2]) +// ->find(); + $citations = $this->extractReferences((string)$main['content']); +// return $citations; + + $prod = Db::name('production_article') + ->where('article_id', $main['article_id']) + ->where('state', 0) + ->find(); + if (empty($prod)) { + throw new \RuntimeException('production_article not found for article_id=' . $main['article_id']); + } + + $pArticleId = intval($prod['p_article_id']); + $referMap = $this->loadReferMapByPArticleId($pArticleId); + + if (empty($citations)) { + $this->setAmRefCheckStatus($amId, self::AM_STATUS_PASS); + return; + } + + $skipped = 0; + $delay = 0; + foreach ($citations as $cite) { + foreach ($cite['reference_numbers'] as $refNo) { + $referIndex = $refNo - 1; + if ($referIndex < 0 || !isset($referMap[$referIndex])) { + $skipped++; + continue; + } + $refer = $referMap[$referIndex]; + $referText = $this->formatReferForLlm($refer); + + $now = date('Y-m-d H:i:s'); + // [70-73] 展开为 reference_no=70,71,72,73 共 4 条记录 + $checkId = Db::name('article_reference_check_result')->insertGetId([ + 'article_id' => $main['article_id'], + 'p_article_id' => $pArticleId, + 'am_id' => intval($main['am_id']), + 'reference_no' => $refNo, + 'refer_index' => $refNo, + 'origin_text' => $cite['original_text'], + 'refer_text' => $referText, + 'p_refer_id' => $referMap[$referIndex]['p_refer_id'], + 'text_start' => $cite['text_start'], + 'text_end' => $cite['text_end'], + 'created_at' => $now, + 'updated_at' => $now, + ]); + $this->pushJob(intval($checkId), $delay); + $checkIds[] = $checkId; + $delay += 1; + } + } + + $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING); + } + /** + * 按 article_id 扫描 t_article_main,为每个 blue 引用 × 文献号入队 + */ + public function enqueueByArticle($articleId, $clearPrevious = true) + { + if ($articleId <= 0) { + throw new \InvalidArgumentException('article_id is required'); + } + + $prod = Db::name('production_article') + ->where('article_id', $articleId) + ->where('state', 0) + ->find(); + if (empty($prod)) { + throw new \RuntimeException('production_article not found for article_id=' . $articleId); + } + + $pArticleId = intval($prod['p_article_id']); + $referMap = $this->loadReferMapByPArticleId($pArticleId); + + $mains = Db::name('article_main') + ->field('am_id,content') + ->where('article_id', $articleId) + ->whereIn('state', [0, 2]) + ->order('sort asc') + ->select(); + + if (empty($mains)) { + throw new \RuntimeException('article_main is empty'); + } + + if ($clearPrevious) { + $this->clearArticleChecks($articleId); + } + + $queued = 0; + $skipped = 0; + $checkIds = []; + $delay = 0; + $amIdsWithJobs = []; + + foreach ($mains as $main) { + $amId = intval($main['am_id']); + $citations = $this->extractReferences((string)$main['content']); + if (empty($citations)) { + $this->setAmRefCheckStatus($amId, self::AM_STATUS_NONE); + continue; + } + foreach ($citations as $cite) { + foreach ($cite['reference_numbers'] as $refNo) { + $referIndex = $refNo - 1; + if ($referIndex < 0 || !isset($referMap[$referIndex])) { + $skipped++; + continue; + } + $refer = $referMap[$referIndex]; + $referText = $this->formatReferForLlm($refer); + + $now = date('Y-m-d H:i:s'); + // [70-73] 展开为 reference_no=70,71,72,73 共 4 条记录 + $checkId = Db::name('article_reference_check_result')->insertGetId([ + 'article_id' => $articleId, + 'am_id' => intval($main['am_id']), + 'p_article_id' => $pArticleId, + 'p_refer_id' => intval($refer['p_refer_id']), + 'refer_index' => $referIndex, + 'reference_no' => $refNo, + 'reference_raw' => $cite['reference_raw'], + 'cite_tag_start' => intval($cite['reference_start']), + 'cite_tag_end' => intval($cite['reference_end']), + 'text_start' => intval($cite['text_start']), + 'text_end' => intval($cite['text_end']), + 'content_a' => $cite['original_text'], + 'content_b' => $referText, + 'status' => 0, + 'created_at' => $now, + 'updated_at' => $now, + ]); + + $this->pushJob(intval($checkId), $delay); + $checkIds[] = $checkId; + $queued++; + $delay += 1; + $amIdsWithJobs[$amId] = true; + } + } + } + + foreach (array_keys($amIdsWithJobs) as $amId) { + $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING); + } + + return [ + 'article_id' => $articleId, + 'p_article_id' => $pArticleId, + 'queued' => $queued, + 'skipped' => $skipped, + 'check_ids' => $checkIds, + 'queue' => self::QUEUE_NAME, + ]; + } + + /** + * 根据该节全部明细行汇总更新 t_article_main.ref_check_status + */ + public function syncAmRefCheckStatus($amId) + { + if ($amId <= 0) { + return self::AM_STATUS_NONE; + } + + $rows = Db::name('article_reference_check_result')->where('am_id', $amId)->select(); + if (empty($rows)) { + $this->setAmRefCheckStatus($amId, self::AM_STATUS_NONE); + return self::AM_STATUS_NONE; + } + + $pending = 0; + $hasFail = false; + $done = 0; + + foreach ($rows as $row) { + $st = intval($row['status']); + if ($st === 0) { + $pending++; + continue; + } + if ($st === 2 || ($st === 1 && intval($row['is_match']) === 0)) { + $hasFail = true; + } + if ($st === 1) { + $done++; + } + } + + if ($pending > 0) { + $status = self::AM_STATUS_RUNNING; + } elseif ($hasFail) { + $status = self::AM_STATUS_FAIL; + } elseif ($done === count($rows)) { + $status = self::AM_STATUS_PASS; + } else { + $status = self::AM_STATUS_FAIL; + } + + $this->setAmRefCheckStatus($amId, $status); + return $status; + } + + public function setAmRefCheckStatus($amId, $status) + { + if ($amId <= 0) { + return; + } + Db::name('article_main')->where('am_id', $amId)->update([ + 'ref_check_status' => $status, + ]); + } + + public function clearArticleChecks($articleId) + { + Db::name('article_reference_check_result')->where('article_id', $articleId)->delete(); + Db::name('article_main') + ->where('article_id', $articleId) + ->whereIn('state', [0, 2]) + ->update(['ref_check_status' => self::AM_STATUS_NONE]); + } + + public static function amStatusLabel($status) + { + $map = [ + self::AM_STATUS_NONE => 'none', + self::AM_STATUS_PASS => 'pass', + self::AM_STATUS_FAIL => 'fail', + self::AM_STATUS_RUNNING => 'running', + ]; + return isset($map[$status]) ? $map[$status] : 'unknown'; + } + + public function getResult($checkId) + { + if ($checkId <= 0) { + return null; + } + $row = Db::name('article_reference_check_result')->where('check_id', $checkId)->find(); + return $row ?: null; + } + + public function listByArticle($articleId, $status = -1, $onlyMismatch = false) + { + $q = Db::name('article_reference_check_result')->where('article_id', $articleId); + if ($status >= 0) { + $q->where('status', $status); + } + if ($onlyMismatch) { + $q->where('status', 1)->where('is_match', 0); + } + return $q->order('am_id asc, cite_tag_start asc, reference_no asc')->select(); + } + + /** + * 稿件预览:在 content 上标记不合理引用序号与引用句 + * + * @return array{sections: array, issues: array, stats: array} + */ + public function buildArticlePreview($articleId, $amId = 0) + { + $q = Db::name('article_main') + ->field('am_id,content,sort,ref_check_status') + ->where('article_id', $articleId) + ->whereIn('state', [0, 2]); + if ($amId > 0) { + $q->where('am_id', $amId); + } + $mains = $q->order('sort asc')->select(); + + $rows = $this->listByArticle($articleId, 1); + $badByAm = $this->indexBadResults($rows); + + $sections = []; + $issues = []; + $stats = ['total' => 0, 'mismatch' => 0, 'match' => 0, 'pending' => 0]; + + foreach ($this->listByArticle($articleId, -1) as $r) { + $stats['total']++; + if (intval($r['status']) === 0) { + $stats['pending']++; + } elseif (intval($r['is_match']) === 1) { + $stats['match']++; + } else { + $stats['mismatch']++; + } + } + + foreach ($mains as $main) { + $id = intval($main['am_id']); + $content = (string)$main['content']; + $badIndex = isset($badByAm[$id]) ? $badByAm[$id] : array(); + $marked = $this->markContentForPreview($content, $id, $badIndex); + $amStatus = intval($this->arrGet($main, 'ref_check_status', 0)); + $sections[] = [ + 'am_id' => $id, + 'ref_check_status' => $amStatus, + 'ref_check_pass' => $amStatus === self::AM_STATUS_PASS, + 'ref_check_label' => self::amStatusLabel($amStatus), + 'content' => $content, + 'content_marked' => $marked['html'], + 'issue_count' => $marked['issue_count'], + ]; + foreach ($marked['issues'] as $issue) { + $issues[] = $issue; + } + } + + $articlePass = $this->resolveArticlePass($sections); + + return [ + 'article_id' => $articleId, + 'article_ref_check_pass' => $articlePass, + 'sections' => $sections, + 'issues' => $issues, + 'stats' => $stats, + ]; + } + + /** + * 全文是否通过:各节均为 pass,且无 running/fail(无引用节忽略) + */ + private function resolveArticlePass($sections) + { + $hasChecked = false; + foreach ($sections as $sec) { + $st = intval($this->arrGet($sec, 'ref_check_status', 0)); + if ($st === self::AM_STATUS_NONE) { + continue; + } + $hasChecked = true; + if ($st !== self::AM_STATUS_PASS) { + return false; + } + } + return $hasChecked ? true : null; + } + + /** + * @param array $rows status=1 的检测结果 + * @return array am_id => indexed bad map + */ + private function indexBadResults($rows) + { + $byAm = []; + foreach ($rows as $row) { + if (intval($row['status']) !== 1 || intval($row['is_match']) === 1) { + continue; + } + $amId = intval($row['am_id']); + $refNo = intval($row['reference_no']); + if ($amId <= 0 || $refNo <= 0) { + continue; + } + if (!isset($byAm[$amId])) { + $byAm[$amId] = ['by_raw' => [], 'contexts' => []]; + } + $rawKey = $this->normalizeRefRawKey((string)$this->arrGet($row, 'reference_raw', '')); + if ($rawKey !== '') { + $byAm[$amId]['by_raw'][$rawKey][$refNo] = $row; + } + + $ctxKey = intval($row['text_start']) . '_' . intval($row['text_end']); + if (!isset($byAm[$amId]['contexts'][$ctxKey])) { + $byAm[$amId]['contexts'][$ctxKey] = [ + 'text_start' => intval($row['text_start']), + 'text_end' => intval($row['text_end']), + 'check_ids' => [], + 'reasons' => [], + 'ref_nos' => [], + ]; + } + $byAm[$amId]['contexts'][$ctxKey]['check_ids'][] = intval($row['check_id']); + $byAm[$amId]['contexts'][$ctxKey]['ref_nos'][] = $refNo; + $reason = trim((string)$this->arrGet($row, 'reason', '')); + if ($reason !== '') { + $byAm[$amId]['contexts'][$ctxKey]['reasons'][$refNo] = $reason; + } + } + return $byAm; + } + + private function normalizeRefRawKey($raw) + { + $raw = str_replace( + [',', '–', '—', '−', '‐', '‑', ' '], + [',', '-', '-', '-', '-', '-', ''], + trim($raw) + ); + return strtolower($raw); + } + + /** + * @param array $badIndex indexBadResults 中单 am 的结构 + */ + private function markContentForPreview($content, $amId, $badIndex) + { + $badByRaw = isset($badIndex['by_raw']) ? $badIndex['by_raw'] : array(); + $contexts = isset($badIndex['contexts']) ? $badIndex['contexts'] : array(); + $issues = array(); + $issueCount = 0; + + if ($content === '' || (empty($badByRaw) && empty($contexts))) { + return array('html' => $content, 'issues' => array(), 'issue_count' => 0); + } + + $html = $content; + + // 1) 先标记 blue 内各序号(在原文上操作,[70-73] 仅标不合理者如 70、71) + preg_match_all( + '/\[([\d,\-\s]+)\]<\/blue>/', + $html, + $matches, + PREG_OFFSET_CAPTURE + ); + $citeDeltas = []; + if (!empty($matches[0])) { + $replacements = []; + foreach ($matches[0] as $idx => $match) { + $fullTag = $match[0]; + $tagStart = $match[1]; + $tagEnd = $tagStart + strlen($fullTag); + $inner = $matches[1][$idx][0]; + $rawKey = $this->normalizeRefRawKey($inner); + $badNums = isset($badByRaw[$rawKey]) ? $badByRaw[$rawKey] : array(); + + $innerMarked = preg_replace_callback( + '/\d+/', + function ($numMatch) use ($badNums, &$issues, &$issueCount, $amId, $inner) { + $num = intval($numMatch[0]); + if (!isset($badNums[$num])) { + return $numMatch[0]; + } + $row = $badNums[$num]; + $rowReason = isset($row['reason']) ? $row['reason'] : ''; + $issueCount++; + $issues[] = array( + 'am_id' => $amId, + 'check_id' => intval($row['check_id']), + 'reference_no' => $num, + 'reference_raw' => $inner, + 'reason' => $rowReason, + 'confidence' => floatval(isset($row['confidence']) ? $row['confidence'] : 0), + ); + $title = htmlspecialchars( + '引用[' . $num . ']不合理: ' . $rowReason, + ENT_QUOTES, + 'UTF-8' + ); + return '' + . $numMatch[0] . ''; + }, + $inner + ); + + $tagClass = !empty($badNums) ? ' ref-cite-error' : ''; + $groupIds = !empty($badNums) + ? implode(',', array_map('intval', array_column($badNums, 'check_id'))) + : ''; + $newHtml = '[' . $innerMarked . ']'; + $replacements[] = [ + 'start' => $tagStart, + 'end' => $tagEnd, + 'html' => $newHtml, + 'delta' => strlen($newHtml) - ($tagEnd - $tagStart), + ]; + } + usort($replacements, function ($a, $b) { + return $b['start'] - $a['start']; + }); + foreach ($replacements as $rep) { + $html = substr($html, 0, $rep['start']) . $rep['html'] . substr($html, $rep['end']); + $citeDeltas[] = ['start' => $rep['start'], 'delta' => $rep['delta']]; + } + } + + $shiftByCite = function ($pos) use ($citeDeltas) { + $d = 0; + foreach ($citeDeltas as $cd) { + if ($cd['start'] < $pos) { + $d += $cd['delta']; + } + } + return $pos + $d; + }; + + // 2) 再标记引用句(从后往前) + if (!empty($contexts)) { + $spans = array_values($contexts); + usort($spans, function ($a, $b) { + return $b['text_start'] - $a['text_start']; + }); + foreach ($spans as $span) { + $start = $span['text_start']; + $end = $span['text_end']; + if ($start < 0 || $end <= $start) { + continue; + } + $s = $shiftByCite($start); + $e = $shiftByCite($end); + if ($e > strlen($html)) { + $e = strlen($html); + } + $checkIds = array_values(array_unique($span['check_ids'])); + $refNos = array_values(array_unique($span['ref_nos'])); + sort($refNos); + $reasonParts = []; + foreach ($refNos as $rn) { + if (!empty($span['reasons'][$rn])) { + $reasonParts[] = '[' . $rn . '] ' . $span['reasons'][$rn]; + } + } + $title = htmlspecialchars( + '引用句可能不合理: ' . implode('; ', $reasonParts), + ENT_QUOTES, + 'UTF-8' + ); + $open = ''; + $close = ''; + $html = substr($html, 0, $s) . $open . substr($html, $s, $e - $s) . $close . substr($html, $e); + } + } + + return ['html' => $html, 'issues' => $issues, 'issue_count' => $issueCount]; + } + + /** + * @return array refer_index => row + */ + public function loadReferMapByPArticleId($pArticleId) + { + $map = []; + if ($pArticleId <= 0) { + return $map; + } + $rows = Db::name('production_article_refer') + ->where('p_article_id', $pArticleId) + ->where('state', 0) + ->order('index asc') + ->select(); + foreach ($rows as $row) { + $map[intval($row['index'])] = $row; + } + return $map; + } + public function formatReferForLlm($refer) + { + $parts = []; + foreach (['title', 'author', 'joura', 'dateno', 'refer_doi', 'doilink'] as $f) { + $v = trim((string)$this->arrGet($refer, $f, '')); + if ($v !== '') { + $parts[] = ucfirst($f) . ': ' . $v; + } + } + $content = trim((string)$this->arrGet($refer, 'refer_content', '')); + if ($content !== '') { + $parts[] = 'Reference: ' . $content; + } + return implode("\n", $parts); + } + + /** + * 从 article_main.content 提取 blue 引用 + */ + public function extractReferences($content) + { + $result = []; + preg_match_all('/\[([\d,\-\s]+)\]<\/blue>/', $content, $matches,PREG_OFFSET_CAPTURE); + if (empty($matches[0])) { + return []; + } + + foreach ($matches[0] as $index => $match) { + + $fullTag = $match[0]; + $tagStart = $match[1]; + $tagEnd = $tagStart + strlen($fullTag); + $rawRef = trim($matches[1][$index][0]); + $referenceNumbers = $this->expandReferenceNumbers($rawRef); + + $sentenceStart = $this->findSentenceStart($content, $tagStart); + $sentenceEnd = $this->findSentenceEnd($content, $tagEnd); + $originalText = mb_substr($content, $sentenceStart, $sentenceEnd - $sentenceStart); + $originalText = preg_replace('/\[[\d,\-\s]+\]<\/blue>/', '', $originalText); + $originalText = trim(strip_tags($originalText)); + + if ($originalText === '' || empty($referenceNumbers)) { + continue; + } + + $result[] = [ + 'reference_raw' => $rawRef, + 'reference_numbers' => $referenceNumbers, + 'original_text' => $originalText, + 'reference_start' => $tagStart, + 'reference_end' => $tagEnd, + 'text_start' => $sentenceStart, + 'text_end' => $sentenceEnd, + ]; + } + + return $result; + } + + public function expandReferenceNumbers($refStr) + { + $refStr = str_replace( + [',', '–', '—', '−', '‐', '‑'], + [',', '-', '-', '-', '-', '-'], + trim($refStr) + ); + $numbers = []; + foreach (explode(',', $refStr) as $part) { + $part = trim($part); + if ($part === '') { + continue; + } + if (preg_match('/^(\d+)\s*-\s*(\d+)$/', $part, $m)) { + $start = intval($m[1]); + $end = intval($m[2]); + if ($start <= $end) { + $numbers = array_merge($numbers, range($start, $end)); + } + } elseif (ctype_digit($part)) { + $numbers[] = intval($part); + } + } + return array_values(array_unique($numbers)); + } + + private function findSentenceStart($content, $position) + { + $start = 0; + foreach (['.', '。', '!', '?', "\n"] as $delimiter) { + $pos = strrpos(substr($content, 0, $position), $delimiter); + if ($pos !== false) { + $start = max($start, $pos + 1); + } + } + return $start; + } + + private function findSentenceEnd($content, $position) + { + $length = strlen($content); + $endPositions = []; + foreach (['.', '。', '!', '?', "\n"] as $delimiter) { + $pos = strpos($content, $delimiter, $position); + if ($pos !== false) { + $endPositions[] = $pos + 1; + } + } + return empty($endPositions) ? $length : min($endPositions); + } + + private function pushJob($checkId, $delaySeconds = 0) + { + $jobClass = 'app\api\job\ReferenceCheck@fire'; + $data = ['check_id' => $checkId]; + try { + if ($delaySeconds > 0) { + $jobId = Queue::later($delaySeconds, $jobClass, $data, self::QUEUE_NAME); + } else { + $jobId = Queue::push($jobClass, $data, self::QUEUE_NAME); + } + var_dump("=====jobId:".$jobId); + } catch (\Exception $e) { + \think\Log::error('ReferenceCheck pushJob failed check_id=' . $checkId . ' ' . $e->getMessage()); + throw $e; + } + } +} diff --git a/application/common/service/LLMService.php b/application/common/service/LLMService.php new file mode 100644 index 00000000..2e056297 --- /dev/null +++ b/application/common/service/LLMService.php @@ -0,0 +1,490 @@ +url = trim((string)Env::get('promotion.promotion_llm_url', '')); + $this->model = trim((string)Env::get('promotion.promotion_llm_model', '')); + $this->apiKey = trim((string)Env::get('promotion.promotion_llm_api_key', '')); + $this->timeout = max(30, intval(Env::get('promotion.promotion_llm_timeout', 120))); + } + + /** + * @param string $contextText 正文引用处句子 + * @param string $referText 参考文献条目(或 refer 格式化文本) + */ + public function checkReference($contextText, $referText) + { + $fallback = [ + 'is_match' => false, + 'confidence' => 0.0, + 'reason' => 'LLM not configured or request failed', + ]; + \think\Log::info('llmUrl:'.$this->url); + var_dump("in URL====".$this->url); + if ($this->url === '' || $this->model === '') { + return $fallback; + } + + $contextText = trim($contextText); + $referText = trim($referText); + if ($contextText === '' || $referText === '') { + return [ + 'is_match' => false, + 'confidence' => 0.0, + 'reason' => 'Empty citation context or reference text', + ]; + } + + if (mb_strlen($contextText) > 2000) { + $contextText = mb_substr($contextText, 0, 2000); + } + if (mb_strlen($referText) > 4000) { + $referText = mb_substr($referText, 0, 4000); + } + + $system = $this->buildReferenceCheckSystemPrompt(); + \think\Log::info('system:' . $system); + + $user = $this->buildReferenceCheckUserPrompt($contextText, $referText); + \think\Log::info('user:' . $user); + $payload = [ + 'model' => $this->model, + 'temperature' => 0, + 'messages' => [ + ['role' => 'system', 'content' => $system], + ['role' => 'user', 'content' => $user], + ], + ]; + + $content = $this->postChat($payload); + if ($content === null) { + return $fallback; + } + + $parsed = $this->parseJson($content); + if ($parsed === null) { + return $fallback; + } + + $isMatch = !empty($parsed['is_match']); + $confidence = $this->snapReferenceCheckConfidence( + $this->normalizeConfidence(isset($parsed['confidence']) ? $parsed['confidence'] : 0), + $isMatch + ); + + return [ + 'is_match' => $isMatch, + 'confidence' => $confidence, + 'reason' => $this->cleanReason((string)(isset($parsed['reason']) ? $parsed['reason'] : '')), + ]; + } + private function buildReferenceCheckSystemPrompt() + { + return <<<'PROMPT' +你是一名护理与医学期刊的资深编辑,专门校对「正文引用句」与「对应参考文献条目」是否匹配。 + +你的职责是判断:作者在该引用位置引用的观点/数据/结论/方法/定义,是否能够被该条参考文献合理支撑。 + +你只能依据用户提供的两段文本判断,不得假设已阅读全文,不得联网,不得编造文献中未出现的信息。 + +【输入内容】 +你将收到: +1. 正文引用句(引用位置附近的一句话或一段话) +2. 当前对应的参考文献条目(仅当前编号,不是整篇参考文献列表) + +你必须严格只评估「当前这一条参考文献」与引用句的关系。 + +==================== +【核心判断目标】 +判断: +正文中的核心论点、事实、数据、定义、护理措施、医学结论、研究发现、理论依据、政策依据等,是否可由该条参考文献合理支撑。 + +你评估的是“引用是否成立”,不是“句子是否正确”。 + +==================== +【强制约束(必须遵守)】 + +1. 只能依据用户提供的信息判断 +- 不得假设你看过全文。 +- 不得根据常识补全文献内容。 +- 不得根据作者、期刊名或研究热点脑补研究结果。 +- 不得把“可能研究了”视为“能够支撑”。 + +2. 严禁串号判断 +- 仅允许依据「当前引用句」与「当前参考文献条目」判断。 +- 严禁利用其它参考文献编号或上下文内容推断当前文献。 + +3. 不得关键词硬匹配 +- 不得因为标题里出现相同关键词(如护理、患者、干预、效果、治疗、心理)就直接判定匹配。 +- 必须关注:对象、人群、疾病、干预方式、研究主题、核心结论是否一致。 + +4. 医学错引从严 +若出现以下情况,优先判定不匹配: +- 同一大领域但具体疾病/对象不同 +- 人群不同(儿童 vs 老年;ICU vs 普通病房等) +- 干预方式不同 +- 指标或结局不同 +- 把指南、综述、Meta分析、专家共识、原始研究混用导致支撑关系不成立 +- 文献无法合理支持正文中的强结论(如“显著改善”“明显降低”“证实”“优于”“危险因素”“因果关系”等) + +例如: +正文写: +“研究证实某护理显著降低死亡率” + +文献仅是: +“某护理模式应用观察” + +此时不得脑补效果成立,应从严判 false。 + +5. 特定证据类型必须一致 +若正文明确声明: +- “随机对照研究显示” +- “Meta分析表明” +- “指南推荐” +- “系统综述指出” +- “专家共识建议” + +而文献条目显示的证据类型不一致,应从严判 false。 + +6. 信息不足从严 +若参考文献条目信息过少(仅作者+年份等): +- 只有在能够建立明确合理关联时才判 true。 +- 无法建立明确关联时,判 false(confidence=0.35)。 + +==================== +【评估步骤(按顺序在心里完成)】 + +第一步:主题域一致性 +判断正文句子的核心主题是否与文献属于同一专业领域,包括但不限于: +- 疾病/诊断 +- 护理问题 +- 患者人群 +- 医疗场景 +- 干预措施 +- 指标/结局 +- 理论模型 +- 政策/指南 + +第二步:关键断言对齐 +判断正文中的核心断言是否可被文献合理支撑: + +允许: +- 合理概括性引用 +- 轻度表述扩展 + +不允许: +- 张冠李戴 +- 过度推断 +- 用弱证据支撑强结论 +- 用相关性支撑因果性 +- 用观察研究支撑RCT级别表述 + +第三步:错引排查 +重点检查: +- 对象错 +- 疾病错 +- 场景错 +- 指标错 +- 方法错 +- 证据类型错 +- 研究层级不匹配 + +==================== +【最终判定规则】 + +is_match(二选一,必须一致) + +true: +满足以下全部条件: +- 主题明确相关 +- 核心对象基本一致 +- 正文关键论点能够被该文献合理支撑 +- 不存在明显错引风险 + +false: +任一情况满足即判 false: +- 主题无关 +- 具体对象明显不同 +- 核心结论对不上 +- 文献无法支撑正文强结论 +- 证据类型不匹配 +- 无法建立明确合理关联 +- 信息不足且无法确认 + +边界不清时,从严判 false。 + +==================== +【confidence 固定评分规则】 + +只能输出以下 6 个固定值之一: +0.95 +0.85 +0.75 +0.35 +0.25 +0.15 + +禁止输出: +0.5、0.6、0.7、0.8、0.9 等任何其它数字。 + +评分标准: + +0.95 +高度匹配: +主题、对象、研究方向、关键论点均明确对应。 + +0.85 +较匹配: +主题与核心论点一致,存在轻微概括,但仍合理支撑。 + +0.75 +基本匹配: +大方向一致,但有一定表述泛化或轻微不精确。 + +0.35 +存疑: +同领域但具体对象/结论不够明确; +或参考文献信息不足,建议人工复核。 + +0.25 +较可能错引: +主题相关但核心论点明显偏离; +对象、场景、结局存在明显差异。 + +0.15 +明确错引: +主题无关; +典型张冠李戴; +明显无法支撑正文内容。 + +硬性规则: +- is_match=true 时,confidence 只能是: +0.75 / 0.85 / 0.95 + +- is_match=false 时,confidence 只能是: +0.15 / 0.25 / 0.35 + +==================== +【评分稳定原则】 + +- 相同输入必须得到相同结论。 +- 优先依据“主题 + 核心断言”。 +- 不要被单个关键词误导。 +- 一句多引时,仅评价当前这一条文献。 +- 边界情况从严,降低漏报错引风险。 + +==================== +【reason 输出要求】 + +- 使用简体中文。 +- 仅说明: + 1)主题是否一致; + 2)核心论点是否能够支撑。 + +- 禁止模糊措辞: +“可能有关” +“看起来一致” +“应该支持” + +- 长度控制在 30~80 字。 + +==================== +【输出格式(绝对严格)】 + +仅输出一行 minified JSON。 +禁止 markdown。 +禁止代码块。 +禁止解释说明。 +禁止换行。 +禁止任何额外文字。 + +格式如下: + +{"is_match":true|false,"confidence":0.15|0.25|0.35|0.75|0.85|0.95,"reason":"简体中文原因说明"} + +【示例输出】 + +{"is_match":true,"confidence":0.95,"reason":"正文讨论的护理干预与文献研究对象、场景及核心结论一致,可合理支撑该引用。"} +PROMPT; + } + + /** + * 护理/医学期刊:正文引用句与参考文献条目的相关性校对 + */ + private function buildReferenceCheckSystemPrompt2() + { + return <<<'PROMPT' +你是一名护理与医学期刊的资深编辑,专门校对「正文引用句」与「对应参考文献条目」是否匹配。 +你只能依据用户提供的两段文本判断,不得假设已阅读全文,不得编造文献中未出现的信息。 + +## 校对目标 +判断:作者在该引用位置引用的观点/数据/结论/方法/定义,是否可由该条参考文献合理支撑(主题与论证层面是否对得上)。 + +## 评估步骤(按顺序,在心里完成即可) +1. 主题域:正文句子的核心主题(疾病、人群、干预、结局、理论、政策等)与文献题目/作者/期刊/年份/条目内容是否属于同一专业领域。 +2. 论点对齐:正文句子的关键断言,是否与该文献可能报告的内容方向一致(允许概括性引用,但不可张冠李戴)。 +3. 错引排查:是否出现「仅同一大领域但具体对象不同」「人群/场景/指标明显不符」「把指南/综述/原始研究混用导致支撑关系不成立」等常见错引。 +4. 信息不足:若文献条目过简(仅作者+年份等),只能做粗判;若完全无法建立合理关联,按不匹配处理。 + +## is_match 判定(二选一,必须一致) +- true:主题明确相关,且引用句的核心信息与该文献可能内容高度吻合或可被其合理概括支撑。 +- false:主题无关、明显错引、具体论点对不上、或无法建立合理关联。边界不清时从严标 false(降低漏报错引风险)。 + +## confidence 评分(稳定性要求:只能使用下列 6 个固定值之一,禁止 0.72、0.8 等其它小数) +| 分值 | 含义 | 通常配合 is_match | +| 0.95 | 高度匹配:主题、对象、论点均清晰对应 | true | +| 0.85 | 较匹配:主题与论点一致,表述略宽但仍可接受 | true | +| 0.75 | 基本匹配:大方向对,有轻微不精确或概括过度 | true | +| 0.35 | 存疑:同领域但具体对不上,或信息不足,建议人工复核 | false | +| 0.25 | 较可能错引:主题或论点明显偏离 | false | +| 0.15 | 明确错引:主题无关或典型张冠李戴 | false | + +硬性规则(必须遵守,否则视为无效输出): +- is_match=true 时,confidence 只能是 0.75、0.85 或 0.95。 +- is_match=false 时,confidence 只能是 0.15、0.25 或 0.35。 +- 禁止输出 0.5、0.6、0.9 等未列出的 confidence 值。 + +## 评分稳定原则 +- 相同输入应得到相同结论;不要因措辞风格波动而改变档位。 +- 优先依据「主题 + 关键断言」而非个别泛化词(如「研究」「护理」「患者」)判匹配。 +- 一句多引时,只评价当前这一条文献与引用句的关系,勿与其它序号混淆。 + +## 输出格式(仅输出一行 minified JSON,无 markdown、无前后说明) +{"is_match":true|false,"confidence":0.15|0.25|0.35|0.75|0.85|0.95,"reason":"1-2句简体中文,说明匹配或不匹配的关键依据"} +PROMPT; + } + + private function buildReferenceCheckUserPrompt($contextText, $referText) + { + return "【正文引用句】(含该处引用所要支撑的观点,可能为中文或英文)\n" + . $contextText + . "\n\n【对应参考文献条目】(书目信息,可能不完整)\n" + . $referText + . "\n\n请按 system 中的步骤与评分表完成校对,只返回 JSON。"; + } + + /** + * 将模型输出的 confidence 吸附到固定档位,并与 is_match 规则对齐 + */ + private function snapReferenceCheckConfidence($confidence, $isMatch) + { + $matchBands = [0.75, 0.85, 0.95]; + $mismatchBands = [0.15, 0.25, 0.35]; + $bands = $isMatch ? $matchBands : $mismatchBands; + + $nearest = $bands[0]; + $minDiff = abs($confidence - $nearest); + foreach ($bands as $band) { + $diff = abs($confidence - $band); + if ($diff < $minDiff) { + $minDiff = $diff; + $nearest = $band; + } + } + return $nearest; + } + + private function postChat(array $payload) + { + try{ + $ch = curl_init(); + curl_setopt($ch, CURLOPT_URL, $this->url); + curl_setopt($ch, CURLOPT_POST, true); + curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($payload, JSON_UNESCAPED_UNICODE)); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, min(15, $this->timeout)); + curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout); + + $headers = ['Content-Type: application/json']; + if ($this->apiKey !== '') { + $headers[] = 'Authorization: Bearer ' . $this->apiKey; + } + curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); + + $raw = curl_exec($ch); + if ($raw === false) { + curl_close($ch); + return null; + } + $httpCode = intval(curl_getinfo($ch, CURLINFO_HTTP_CODE)); + \think\Log::info('httpCode:'.$httpCode); + curl_close($ch); + if ($httpCode < 200 || $httpCode >= 300) { + return null; + } + + $data = json_decode($raw, true); + if (!is_array($data)) { + return null; + } + if (isset($data['choices'][0]['message']['content'])) { + return (string)$data['choices'][0]['message']['content']; + } + if (isset($data['content'])) { + return (string)$data['content']; + } + }catch (Exception $exception){ + var_dump($exception->getMessage()); + } + + return null; + } + + private function parseJson($raw) + { + $raw = trim($raw); + if ($raw === '') { + return null; + } + $raw = preg_replace('/^```[a-zA-Z]*\s*|```$/m', '', $raw); + $raw = trim($raw); + + $obj = json_decode($raw, true); + if (is_array($obj)) { + return $obj; + } + if (preg_match('/\{.*\}/s', $raw, $m)) { + $obj = json_decode($m[0], true); + if (is_array($obj)) { + return $obj; + } + } + return null; + } + + private function normalizeConfidence($value) + { + if (!is_numeric($value)) { + return 0.0; + } + $v = (float)$value; + if ($v > 1.0 && $v <= 100.0) { + $v = $v / 100.0; + } + return round(max(0.0, min(1.0, $v)), 4); + } + + private function cleanReason($text) + { + $text = trim(preg_replace('/\s+/', ' ', $text)); + if (mb_strlen($text) > 500) { + $text = mb_substr($text, 0, 500); + } + return $text !== '' ? $text : 'No reason provided'; + } +} From 7e5a087a4eaffd59cd5f138f1cb228a93d249fe8 Mon Sep 17 00:00:00 2001 From: wyn <1074145239@qq.com> Date: Thu, 21 May 2026 11:30:46 +0800 Subject: [PATCH 02/12] Changes --- application/api/controller/Article.php | 33 +- application/common/ReferenceCheckService.php | 184 ++++++++--- application/common/service/LLMService.php | 330 +++++++++++++++++++ 3 files changed, 494 insertions(+), 53 deletions(-) diff --git a/application/api/controller/Article.php b/application/api/controller/Article.php index e346c264..e2875d01 100644 --- a/application/api/controller/Article.php +++ b/application/api/controller/Article.php @@ -6640,30 +6640,37 @@ class Article extends Base return jsonError($e->getMessage()); } } - public function referenceCheckEnqueueArticleMain(){ - $data = $this->request->post(); + $amId = 127448; + $svc = new ReferenceCheckService(); + $main = Db::name('article_main') + ->field('am_id,content,article_id') + ->where('am_id', $amId) + ->whereIn('state', [0, 2]) + ->find(); + $result = $svc->enqueueByArticleMain($main); + return jsonSuccess($result); + } + public function referenceCheckEnqueueArticle(){ + $data = $this->request->get(); $articleId = intval(isset($data['article_id']) ? $data['article_id'] : 0); + var_dump($articleId); if ($articleId <= 0) { return jsonError('article_id is required'); } - $mainsList = Db::name('article_main') - ->field('am_id,content,article_id') - ->where('article_id', $articleId) - ->whereIn('state', [0, 2]) - ->order('sort asc') - ->select(); - - $svc = new ReferenceCheckService(); - foreach ($mainsList as $mainInfo ){ - $svc->enqueueByArticleMain($mainInfo); + try { + $svc = new ReferenceCheckService(); + $result = $svc->enqueueByArticle($articleId); + return jsonSuccess($result); + } catch (\Exception $e) { + return jsonError($e->getMessage()); } } /** * 按文章批量入队:从 article_main 提取 blue 引用与文献号 * POST: article_id, clear_previous=1(默认清空该文旧明细后重检) */ - public function referenceCheckEnqueueArticle() + public function referenceCheckEnqueueArticle2() { $data = $this->request->post(); $articleId = intval(isset($data['article_id']) ? $data['article_id'] : 0); diff --git a/application/common/ReferenceCheckService.php b/application/common/ReferenceCheckService.php index f913e8e1..194f60cd 100644 --- a/application/common/ReferenceCheckService.php +++ b/application/common/ReferenceCheckService.php @@ -75,7 +75,10 @@ class ReferenceCheckService // ->find(); $citations = $this->extractReferences((string)$main['content']); // return $citations; - + if (empty($citations)) { + $this->setAmRefCheckStatus($amId, self::AM_STATUS_NONE); + return; + } $prod = Db::name('production_article') ->where('article_id', $main['article_id']) ->where('state', 0) @@ -128,15 +131,10 @@ class ReferenceCheckService $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING); } - /** - * 按 article_id 扫描 t_article_main,为每个 blue 引用 × 文献号入队 - */ - public function enqueueByArticle($articleId, $clearPrevious = true) - { + public function enqueueByArticle($articleId){ if ($articleId <= 0) { throw new \InvalidArgumentException('article_id is required'); } - $prod = Db::name('production_article') ->where('article_id', $articleId) ->where('state', 0) @@ -144,25 +142,18 @@ class ReferenceCheckService if (empty($prod)) { throw new \RuntimeException('production_article not found for article_id=' . $articleId); } - $pArticleId = intval($prod['p_article_id']); $referMap = $this->loadReferMapByPArticleId($pArticleId); $mains = Db::name('article_main') - ->field('am_id,content') + ->field('am_id,content,article_id') ->where('article_id', $articleId) ->whereIn('state', [0, 2]) ->order('sort asc') ->select(); - if (empty($mains)) { throw new \RuntimeException('article_main is empty'); } - - if ($clearPrevious) { - $this->clearArticleChecks($articleId); - } - $queued = 0; $skipped = 0; $checkIds = []; @@ -189,20 +180,16 @@ class ReferenceCheckService $now = date('Y-m-d H:i:s'); // [70-73] 展开为 reference_no=70,71,72,73 共 4 条记录 $checkId = Db::name('article_reference_check_result')->insertGetId([ - 'article_id' => $articleId, - 'am_id' => intval($main['am_id']), + 'article_id' => $main['article_id'], 'p_article_id' => $pArticleId, - 'p_refer_id' => intval($refer['p_refer_id']), - 'refer_index' => $referIndex, + 'am_id' => intval($main['am_id']), 'reference_no' => $refNo, - 'reference_raw' => $cite['reference_raw'], - 'cite_tag_start' => intval($cite['reference_start']), - 'cite_tag_end' => intval($cite['reference_end']), - 'text_start' => intval($cite['text_start']), - 'text_end' => intval($cite['text_end']), - 'content_a' => $cite['original_text'], - 'content_b' => $referText, - 'status' => 0, + 'refer_index' => $refNo, + 'origin_text' => $cite['original_text'], + 'refer_text' => $referText, + 'p_refer_id' => $referMap[$referIndex]['p_refer_id'], + 'text_start' => $cite['text_start'], + 'text_end' => $cite['text_end'], 'created_at' => $now, 'updated_at' => $now, ]); @@ -658,12 +645,21 @@ class ReferenceCheckService $referenceNumbers = $this->expandReferenceNumbers($rawRef); $sentenceStart = $this->findSentenceStart($content, $tagStart); - $sentenceEnd = $this->findSentenceEnd($content, $tagEnd); - $originalText = mb_substr($content, $sentenceStart, $sentenceEnd - $sentenceStart); - $originalText = preg_replace('/\[[\d,\-\s]+\]<\/blue>/', '', $originalText); - $originalText = trim(strip_tags($originalText)); + $sentenceEnd = $this->findSentenceEnd($content, $tagEnd, $tagEnd); + $originalText = $this->buildCitationContextText($content, $sentenceStart, $sentenceEnd); - if ($originalText === '' || empty($referenceNumbers)) { + if (!$this->isMeaningfulCitationContext($originalText)) { + list($sentenceStart, $sentenceEnd) = $this->widenCitationContextBounds( + $content, + $tagStart, + $tagEnd, + $sentenceStart, + $sentenceEnd + ); + $originalText = $this->buildCitationContextText($content, $sentenceStart, $sentenceEnd); + } + + if (!$this->isMeaningfulCitationContext($originalText) || empty($referenceNumbers)) { continue; } @@ -707,29 +703,137 @@ class ReferenceCheckService return array_values(array_unique($numbers)); } + private function buildCitationContextText($content, $start, $end) + { + $text = mb_substr($content, $start, max(0, $end - $start)); + $text = preg_replace('/\[[\d,\-\s]+\]<\/blue>/', '', $text); + $text = trim(strip_tags($text)); + $text = preg_replace('/\s+/u', ' ', $text); + + return $text; + } + + /** + * 过滤仅标点、过短或无字母/汉字的上下文(如去掉标签后只剩 ".") + */ + private function isMeaningfulCitationContext($text) + { + $text = trim($text); + if ($text === '') { + return false; + } + if ($this->isOnlyPunctuationOrSpace($text)) { + return false; + } + if (!preg_match('/[\p{L}\p{N}]/u', $text)) { + return false; + } + + return mb_strlen($text) >= 2; + } + + private function isOnlyPunctuationOrSpace($text) + { + return preg_match('/^[\s\p{P}\p{S}]+$/u', $text) === 1; + } + + /** + * 首句过短时向前后各扩展一句(上限约 2000 字符) + */ + private function widenCitationContextBounds($content, $tagStart, $tagEnd, $start, $end) + { + $len = strlen($content); + $maxSpan = 2000; + + if ($start > 0) { + $prevStart = $this->findSentenceStart($content, max(0, $start - 1)); + if ($prevStart < $start) { + $start = $prevStart; + } + } + + $nextEnd = $this->findSentenceEnd($content, $end, $tagEnd); + if ($nextEnd > $end && $nextEnd <= $len) { + $end = $nextEnd; + } + + if ($end - $start > $maxSpan) { + $half = (int)floor($maxSpan / 2); + $mid = (int)floor(($tagStart + $tagEnd) / 2); + $start = max(0, $mid - $half); + $end = min($len, $start + $maxSpan); + } + + return [$start, $end]; + } + + /** + * 句号是否可作为句界(排除 0.95、3.14 等小数点) + */ + private function isSentenceDelimiterAt($content, $pos, $delimiter) + { + $len = strlen($content); + if ($delimiter !== '.' || $pos < 0 || $pos >= $len) { + return true; + } + if ($pos > 0 && $pos + 1 < $len + && ctype_digit($content[$pos - 1]) + && ctype_digit($content[$pos + 1]) + ) { + return false; + } + + return true; + } + private function findSentenceStart($content, $position) { $start = 0; foreach (['.', '。', '!', '?', "\n"] as $delimiter) { $pos = strrpos(substr($content, 0, $position), $delimiter); - if ($pos !== false) { + if ($pos !== false && $this->isSentenceDelimiterAt($content, $pos, $delimiter)) { $start = max($start, $pos + 1); } } return $start; } - private function findSentenceEnd($content, $position) + /** + * @param int $searchFrom 从该字节位置起查找句末 + * @param int $tagEnd 引用标签结束位置;用于跳过 后紧跟的孤立句号 + */ + private function findSentenceEnd($content, $searchFrom, $tagEnd = 0) { $length = strlen($content); - $endPositions = []; - foreach (['.', '。', '!', '?', "\n"] as $delimiter) { - $pos = strpos($content, $delimiter, $position); - if ($pos !== false) { - $endPositions[] = $pos + 1; + $minPos = max(0, $searchFrom); + + while ($minPos < $length) { + $endPositions = []; + foreach (['.', '。', '!', '?', "\n"] as $delimiter) { + $pos = strpos($content, $delimiter, $minPos); + if ($pos !== false && $this->isSentenceDelimiterAt($content, $pos, $delimiter)) { + $endPositions[] = $pos + 1; + } } + if (empty($endPositions)) { + return $length; + } + + $end = min($endPositions); + if ($tagEnd <= 0 || $end <= $tagEnd) { + return $end; + } + + $gap = substr($content, $tagEnd, $end - $tagEnd); + $gapText = trim(strip_tags(preg_replace('/\[[\d,\-\s]+\]<\/blue>/', '', $gap))); + if ($gapText !== '' && !$this->isOnlyPunctuationOrSpace($gapText)) { + return $end; + } + + $minPos = $end; } - return empty($endPositions) ? $length : min($endPositions); + + return $length; } private function pushJob($checkId, $delaySeconds = 0) diff --git a/application/common/service/LLMService.php b/application/common/service/LLMService.php index 2e056297..ab5f9e72 100644 --- a/application/common/service/LLMService.php +++ b/application/common/service/LLMService.php @@ -93,6 +93,318 @@ class LLMService 'reason' => $this->cleanReason((string)(isset($parsed['reason']) ? $parsed['reason'] : '')), ]; } + private function buildReferenceCheckSystemPrompt3() + { + return <<<'PROMPT' +你是一名护理、医学与科研期刊的资深编辑,专门校对「正文引用句」与「对应参考文献条目」是否匹配。 + +你的职责是判断:作者在该引用位置引用的观点、数据、结论、方法、定义、理论或证据,是否能够被该条参考文献合理支撑。 + +你只能依据用户提供的两段文本判断,不得假设已阅读全文,不得联网,不得编造文献中未出现的信息。 + +【输入内容】 +你将收到: + +1. 正文引用句(引用位置附近的一句话或一段话) + +2. 当前对应的参考文献条目(仅当前编号,不是整篇参考文献列表) + +你必须严格只评估「当前这一条参考文献」与引用句的关系。 + +==================== +【核心判断目标】 + +判断: +正文中的核心论点、事实、数据、定义、护理措施、医学结论、研究发现、理论依据、政策依据、算法方法、统计方法、模型结构等,是否可由该条参考文献合理支撑。 + +你评估的是“引用是否成立”,不是“句子是否正确”。 + +==================== +【硬性约束(必须遵守)】 + +1. 只能依据用户提供的信息判断 +- 不得假设看过全文。 +- 不得联网。 +- 不得根据常识补全文献内容。 +- 不得根据作者、期刊名、热点方向脑补研究结果。 +- 不得把“可能研究了”视为“能够支撑”。 + +2. 严禁串号判断 +- 仅允许依据「当前引用句」与「当前参考文献条目」判断。 +- 严禁利用其它参考文献编号或上下文内容推断当前文献。 + +3. 不得关键词硬匹配 +禁止因为出现相同关键词就判匹配,例如: +“护理”“患者”“治疗”“效果”“心理”“机器学习”“深度学习”“模型”等。 + +必须重点判断: +- 对象是否一致 +- 疾病/场景是否一致 +- 人群是否一致 +- 干预方式是否一致 +- 方法学是否一致 +- 关键结论是否一致 + +4. 医学与科研错引从严 +若出现以下情况,优先判 false: + +- 同领域但具体疾病不同 +- 人群不同(儿童 vs 老年) +- 场景不同(ICU vs 普通病房) +- 干预方式不同 +- 指标或结局不同 +- 指南、综述、Meta、原始研究混用 +- 文献无法支撑正文中的强结论 + +例如: +正文: +“研究证实显著降低死亡率” + +文献: +“某护理模式应用观察” + +不得脑补效果成立,应从严判 false。 + +5. 特定证据类型必须一致 +若正文明确声明: + +- “随机对照研究显示” +- “Meta分析表明” +- “系统综述指出” +- “指南推荐” +- “专家共识建议” + +而文献条目显示证据类型不一致,应从严判 false。 + +6. 方法学引用必须严格一致(非常重要) +若正文明确引用某种: + +- 算法 +- 模型 +- 聚类方法 +- 分类方法 +- 深度学习架构 +- 统计方法 +- 数学技术 +- 数据处理方法 + +则文献必须与该方法存在明确合理关联。 + +例如: + +不匹配: +- fuzzy clustering ≠ deep learning +- random forest ≠ SVM +- CNN ≠ LSTM +- 聚类模型 ≠ 分类模型 +- 回归分析 ≠ 聚类分析 + +仅属于同一“人工智能/机器学习”大领域,不能视为匹配。 + +若方法体系明显不同: +优先判 false + confidence=0.15。 + +7. 信息不足从严 +若参考文献条目信息过少(仅作者+年份等): + +只有在能够建立明确关联时才可判 true。 + +无法建立明确关联: +判 false。 + +==================== +【评估步骤(按顺序在心里完成)】 + +第一步:主题域一致性 +判断正文核心主题与文献是否属于同一专业领域,包括: + +- 疾病 +- 患者群体 +- 护理问题 +- 医疗场景 +- 干预措施 +- 指标/结局 +- 理论模型 +- 政策/指南 +- 算法/统计方法 + +第二步:关键断言对齐 +判断正文中的核心断言是否能够被文献合理支撑。 + +允许: +- 合理概括 +- 轻度表述扩展 + +不允许: +- 张冠李戴 +- 过度推断 +- 用弱证据支撑强结论 +- 用相关性支撑因果性 +- 用观察研究支撑RCT级表述 +- 方法体系不一致 + +第三步:错引排查 +重点检查: + +- 疾病错 +- 人群错 +- 场景错 +- 方法错 +- 指标错 +- 研究类型错 +- 证据层级错 +- 算法体系错 + +==================== +【最终判定规则】 + +is_match(二选一) + +true: +满足以下全部条件: +- 主题明确相关 +- 核心对象基本一致 +- 方法或研究方向合理一致 +- 正文关键论点能够被文献支撑 +- 不存在明显错引风险 + +false: +满足任一情况: +- 主题无关 +- 对象不同 +- 疾病/场景不同 +- 方法体系明显不同 +- 核心结论对不上 +- 文献无法支撑正文强结论 +- 证据类型不一致 +- 无法建立明确合理关联 +- 信息不足无法确认 + +边界情况从严判 false。 + +==================== +【confidence 固定评分规则】 + +只能输出以下固定值之一: + +0.98 +0.92 +0.85 +0.78 +0.65 +0.45 +0.35 +0.25 +0.15 + +禁止输出任何其它数字。 + +-------------------- +【true 档位】 + +0.98(几乎完全一致) +主题、对象、方法、核心结论高度一致。 + +0.92(高度匹配) +主题与关键论点明确一致,仅存在轻微概括。 + +0.85(较匹配) +主题和核心结论一致,但表述略宽。 + +0.78(基本匹配) +大方向一致,但存在轻微泛化或不精确。 + +0.65(边界匹配) +存在一定支撑关系,但结论略强或关联较弱。 + +-------------------- +【false 档位】 + +0.45(人工复核) +信息不足、标题过泛、同领域但无法确认。 + +0.35(较可能错引) +同领域但对象、场景、结论存在明显偏差。 + +0.25(明显不匹配) +主题相关但核心论点明显不一致。 + +0.15(明确错引) +以下情况优先使用: + +- 主题无关 +- 方法体系明显不同 +- 典型张冠李戴 +- 完全无法支撑正文内容 + +例如: +正文讲 fuzzy clustering, +文献讲 hybrid deep learning, +应判: +false + 0.15。 + +==================== +【硬性规则】 + +- is_match=true 时: +confidence 只能是: +0.65 / 0.78 / 0.85 / 0.92 / 0.98 + +- is_match=false 时: +confidence 只能是: +0.15 / 0.25 / 0.35 / 0.45 + +禁止违反。 + +==================== +【评分稳定原则】 + +- 相同输入必须得到相同结果。 +- 优先依据“主题 + 核心断言”。 +- 不要被单个关键词误导。 +- 一句多引时,仅评价当前这一条文献。 +- 边界情况从严,降低漏报错引风险。 +- 方法学不一致时优先 false。 + +==================== +【reason 输出要求】 + +- 使用简体中文。 +- 长度控制在 30~80 字。 +- 只说明两件事: + 1)主题/对象/方法是否一致; + 2)核心论点是否能够支撑。 + +禁止模糊措辞: +- “可能有关” +- “看起来一致” +- “应该支持” +- “似乎” + +应明确表达: +一致 / 不一致 / 无法支撑。 + +==================== +【输出格式(绝对严格)】 + +仅输出一行 minified JSON。 + +禁止: +- markdown +- 代码块 +- 换行 +- 解释说明 +- 前后文字 + +格式: + +{"is_match":true|false,"confidence":0.15|0.25|0.35|0.45|0.65|0.78|0.85|0.92|0.98,"reason":"简体中文原因"} + +【示例输出】 + +{"is_match":false,"confidence":0.15,"reason":"正文讨论改进模糊聚类算法及聚类划分优化,而文献主题为基于步态加速度的糖尿病深度学习检测,研究方法与核心内容明显不符。"} +PROMPT; + } private function buildReferenceCheckSystemPrompt() { return <<<'PROMPT' @@ -166,6 +478,24 @@ class LLMService - 只有在能够建立明确合理关联时才判 true。 - 无法建立明确关联时,判 false(confidence=0.35)。 +7. 方法学引用严格一致 +若正文明确引用某一算法、模型、统计方法、聚类方法、 +深度学习架构、评估方法或数学技术: + +必须要求参考文献与该方法存在明确合理关联。 + +例如: +- fuzzy clustering ≠ deep learning +- random forest ≠ SVM +- CNN ≠ LSTM +- 聚类方法 ≠ 分类模型 + +仅属于同一“机器学习/人工智能”大领域, +不能视为匹配,应从严判 false。 + +若方法体系明显不同,优先判: +confidence=0.15 + ==================== 【评估步骤(按顺序在心里完成)】 From 74383d24ea763d2fa3c01c3c14781eeeac4a6bea Mon Sep 17 00:00:00 2001 From: wyn <1074145239@qq.com> Date: Thu, 21 May 2026 11:31:19 +0800 Subject: [PATCH 03/12] Changes --- application/common/service/LLMService.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/application/common/service/LLMService.php b/application/common/service/LLMService.php index ab5f9e72..ce66056c 100644 --- a/application/common/service/LLMService.php +++ b/application/common/service/LLMService.php @@ -57,7 +57,7 @@ class LLMService $referText = mb_substr($referText, 0, 4000); } - $system = $this->buildReferenceCheckSystemPrompt(); + $system = $this->buildReferenceCheckSystemPrompt3(); \think\Log::info('system:' . $system); $user = $this->buildReferenceCheckUserPrompt($contextText, $referText); From 867621232bc318780f05b19f751aa320087006ce Mon Sep 17 00:00:00 2001 From: wyn <1074145239@qq.com> Date: Thu, 21 May 2026 13:55:13 +0800 Subject: [PATCH 04/12] Changes --- application/common/ReferenceCheckService.php | 152 ++++++++++++++++--- 1 file changed, 130 insertions(+), 22 deletions(-) diff --git a/application/common/ReferenceCheckService.php b/application/common/ReferenceCheckService.php index 194f60cd..27ff9d8c 100644 --- a/application/common/ReferenceCheckService.php +++ b/application/common/ReferenceCheckService.php @@ -193,7 +193,7 @@ class ReferenceCheckService 'created_at' => $now, 'updated_at' => $now, ]); - + continue; $this->pushJob(intval($checkId), $delay); $checkIds[] = $checkId; $queued++; @@ -631,33 +631,33 @@ class ReferenceCheckService public function extractReferences($content) { $result = []; - preg_match_all('/\[([\d,\-\s]+)\]<\/blue>/', $content, $matches,PREG_OFFSET_CAPTURE); + preg_match_all('/\[([\d,\-\s]+)\]<\/blue>/', $content, $matches, PREG_OFFSET_CAPTURE); if (empty($matches[0])) { return []; } + $tagSpans = []; foreach ($matches[0] as $index => $match) { + $tagSpans[] = [ + 'start' => $match[1], + 'end' => $match[1] + strlen($match[0]), + 'index' => $index, + ]; + } + foreach ($matches[0] as $index => $match) { $fullTag = $match[0]; $tagStart = $match[1]; $tagEnd = $tagStart + strlen($fullTag); $rawRef = trim($matches[1][$index][0]); $referenceNumbers = $this->expandReferenceNumbers($rawRef); - $sentenceStart = $this->findSentenceStart($content, $tagStart); - $sentenceEnd = $this->findSentenceEnd($content, $tagEnd, $tagEnd); - $originalText = $this->buildCitationContextText($content, $sentenceStart, $sentenceEnd); - - if (!$this->isMeaningfulCitationContext($originalText)) { - list($sentenceStart, $sentenceEnd) = $this->widenCitationContextBounds( - $content, - $tagStart, - $tagEnd, - $sentenceStart, - $sentenceEnd - ); - $originalText = $this->buildCitationContextText($content, $sentenceStart, $sentenceEnd); - } + list($localStart, $localEnd, $originalText) = $this->extractLocalCitationContext( + $content, + $tagStart, + $tagEnd, + $tagSpans + ); if (!$this->isMeaningfulCitationContext($originalText) || empty($referenceNumbers)) { continue; @@ -669,14 +669,81 @@ class ReferenceCheckService 'original_text' => $originalText, 'reference_start' => $tagStart, 'reference_end' => $tagEnd, - 'text_start' => $sentenceStart, - 'text_end' => $sentenceEnd, + 'text_start' => $localStart, + 'text_end' => $localEnd, ]; } return $result; } + /** + * 按引用位置截取局部上下文:优先取标签前叙述;同句多引时后续引用从上一标签后开始。 + */ + private function extractLocalCitationContext($content, $tagStart, $tagEnd, array $tagSpans) + { + $sentenceStart = $this->findSentenceStart($content, $tagStart); + $sentenceEnd = $this->findSentenceEnd($content, $tagEnd, $tagEnd); + + $prevTagEnd = $sentenceStart; + $nextTagStart = $sentenceEnd; + foreach ($tagSpans as $span) { + if ($span['end'] <= $tagStart && $span['end'] > $prevTagEnd && $span['end'] >= $sentenceStart) { + $prevTagEnd = $span['end']; + } + if ($span['start'] > $tagEnd && $span['start'] < $nextTagStart) { + $nextTagStart = $span['start']; + } + } + + $hasPriorCiteInSentence = ($prevTagEnd > $sentenceStart); + // 同句后续引用:从上一 标签后开始;首个引用:从整句开头到本标签前 + $localStart = $hasPriorCiteInSentence ? $prevTagEnd : $sentenceStart; + + // 默认:引用标签前的论述(如 Yin et al. [13] → 含 “higher than … Yin et al.”) + $localEnd = $tagStart; + $originalText = $this->buildCitationContextText($content, $localStart, $localEnd); + + // 标签前几乎无正文(如句末 … ICU nurses [14])→ 改用标签后至下一引用或句末 + if (!$this->isMeaningfulCitationContext($originalText) + || $this->shouldUseTrailingCitationContext($content, $localStart, $tagStart, $tagEnd) + ) { + $trailEnd = ($nextTagStart < $sentenceEnd) ? $nextTagStart : $sentenceEnd; + $trailText = $this->buildCitationContextText($content, $tagEnd, $trailEnd); + if ($this->isMeaningfulCitationContext($trailText)) { + $localStart = $tagEnd; + $localEnd = $trailEnd; + $originalText = $trailText; + } + } + + if (!$this->isMeaningfulCitationContext($originalText)) { + list($localStart, $localEnd) = $this->widenCitationContextBounds( + $content, + $tagStart, + $tagEnd, + $localStart, + $localEnd + ); + $originalText = $this->buildCitationContextText($content, $localStart, $localEnd); + } + + return [$localStart, $localEnd, $originalText]; + } + + /** + * 标签前仅有作者缩写等极短片段时,改用标签后上下文 + */ + private function shouldUseTrailingCitationContext($content, $localStart, $tagStart, $tagEnd) + { + $before = $this->buildCitationContextText($content, $localStart, $tagStart); + if (!$this->isMeaningfulCitationContext($before)) { + return true; + } + + return mb_strlen($before) < 25; + } + public function expandReferenceNumbers($refStr) { $refStr = str_replace( @@ -703,12 +770,43 @@ class ReferenceCheckService return array_values(array_unique($numbers)); } + /** + * 返回 $bytePos 处 UTF-8 码点占用的最后一字节之后的位置(下一字符起始) + */ + private function utf8CharEnd($content, $bytePos) + { + $len = strlen($content); + if ($bytePos < 0 || $bytePos >= $len) { + return max(0, min($len, $bytePos + 1)); + } + $next = $bytePos + 1; + while ($next < $len && (ord($content[$next]) & 0xC0) === 0x80) { + $next++; + } + + return $next; + } + + /** + * 按字节偏移截取(与 strpos/strlen 一致);勿用 mb_substr,否则遇中文前缀会截断英文词头 + */ + private function byteSubstr($content, $start, $end) + { + $length = max(0, $end - $start); + if ($length === 0) { + return ''; + } + + return (string)mb_strcut($content, $start, $length, 'UTF-8'); + } + private function buildCitationContextText($content, $start, $end) { - $text = mb_substr($content, $start, max(0, $end - $start)); + $text = $this->byteSubstr($content, $start, $end); $text = preg_replace('/\[[\d,\-\s]+\]<\/blue>/', '', $text); $text = trim(strip_tags($text)); $text = preg_replace('/\s+/u', ' ', $text); + $text = ltrim($text, "\xEF\xBB\xBF"); return $text; } @@ -768,7 +866,7 @@ class ReferenceCheckService } /** - * 句号是否可作为句界(排除 0.95、3.14 等小数点) + * 句号是否可作为句界(排除小数点、et al. 等缩写) */ private function isSentenceDelimiterAt($content, $pos, $delimiter) { @@ -783,6 +881,16 @@ class ReferenceCheckService return false; } + $before = substr($content, max(0, $pos - 12), min(12, $pos)); + if (preg_match('/\b(et\s+al|e\.g|i\.e|vs|etc|fig|no)\s*\.?\s*$/i', $before)) { + return false; + } + + $after = substr($content, $pos + 1, 24); + if (preg_match('/^\s*\s*\[/', $after)) { + return false; + } + return true; } @@ -792,7 +900,7 @@ class ReferenceCheckService foreach (['.', '。', '!', '?', "\n"] as $delimiter) { $pos = strrpos(substr($content, 0, $position), $delimiter); if ($pos !== false && $this->isSentenceDelimiterAt($content, $pos, $delimiter)) { - $start = max($start, $pos + 1); + $start = max($start, $this->utf8CharEnd($content, $pos)); } } return $start; @@ -812,7 +920,7 @@ class ReferenceCheckService foreach (['.', '。', '!', '?', "\n"] as $delimiter) { $pos = strpos($content, $delimiter, $minPos); if ($pos !== false && $this->isSentenceDelimiterAt($content, $pos, $delimiter)) { - $endPositions[] = $pos + 1; + $endPositions[] = $this->utf8CharEnd($content, $pos); } } if (empty($endPositions)) { From 6f76c483ec415c8e187ba5fff3d55a45ef6f6507 Mon Sep 17 00:00:00 2001 From: wyn <1074145239@qq.com> Date: Thu, 21 May 2026 14:14:34 +0800 Subject: [PATCH 05/12] =?UTF-8?q?=E8=BF=98=E4=B8=8D=E9=94=99=EF=BC=8C?= =?UTF-8?q?=E6=8C=BA=E5=AE=8C=E7=BE=8E=EF=BC=8C=E5=94=AF=E4=B8=80=E4=B8=8D?= =?UTF-8?q?=E8=B6=B3The=20results=20of=20the=20linear=20regression=20analy?= =?UTF-8?q?sis=20in=20this=20study=20show=20that=20work=20immersion=20amon?= =?UTF-8?q?g=20emergency=20department=20nurses=20is=20an=20important=20inf?= =?UTF-8?q?luencing=20factor=20for=20organizational=20silence=20(P=20<=200?= =?UTF-8?q?.05).=20Organizational=20silence=20among=20emergency=20departme?= =?UTF-8?q?nt=20nurses=20is=20a=20process=20influenced=20by=20both=20indiv?= =?UTF-8?q?idual=20motivation=20[23]=20and=20external=20environment=20[24]?= =?UTF-8?q?.=20The=20higher=20the=20immersion=20scores=20of=20emergency=20?= =?UTF-8?q?department=20nurses,=20the=20more=20likely=20they=20are=20to=20?= =?UTF-8?q?feel=20intrinsically=20motivated=20[25]=20and=20willing=20to=20?= =?UTF-8?q?speak=20up.=2024=E6=88=AA=E5=8F=96=E6=88=90=E4=BA=86=E5=90=8E?= =?UTF-8?q?=E9=9D=A2=E7=9A=84he=20higher=20the=20immersion=20scores=20of?= =?UTF-8?q?=20emergency=20department=20nurses,=20the=20more=20likely=20the?= =?UTF-8?q?y=20are=20to=20feel=20intrinsically=20motivated?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- application/common/ReferenceCheckService.php | 75 ++++++++++++++++++-- 1 file changed, 68 insertions(+), 7 deletions(-) diff --git a/application/common/ReferenceCheckService.php b/application/common/ReferenceCheckService.php index 27ff9d8c..be13d089 100644 --- a/application/common/ReferenceCheckService.php +++ b/application/common/ReferenceCheckService.php @@ -193,7 +193,7 @@ class ReferenceCheckService 'created_at' => $now, 'updated_at' => $now, ]); - continue; + $this->pushJob(intval($checkId), $delay); $checkIds[] = $checkId; $queued++; @@ -682,13 +682,13 @@ class ReferenceCheckService */ private function extractLocalCitationContext($content, $tagStart, $tagEnd, array $tagSpans) { - $sentenceStart = $this->findSentenceStart($content, $tagStart); + $paragraphStart = $this->findParagraphStart($content, $tagStart); $sentenceEnd = $this->findSentenceEnd($content, $tagEnd, $tagEnd); - $prevTagEnd = $sentenceStart; + $prevTagEnd = $paragraphStart; $nextTagStart = $sentenceEnd; foreach ($tagSpans as $span) { - if ($span['end'] <= $tagStart && $span['end'] > $prevTagEnd && $span['end'] >= $sentenceStart) { + if ($span['end'] <= $tagStart && $span['end'] > $prevTagEnd) { $prevTagEnd = $span['end']; } if ($span['start'] > $tagEnd && $span['start'] < $nextTagStart) { @@ -696,9 +696,13 @@ class ReferenceCheckService } } - $hasPriorCiteInSentence = ($prevTagEnd > $sentenceStart); - // 同句后续引用:从上一 标签后开始;首个引用:从整句开头到本标签前 - $localStart = $hasPriorCiteInSentence ? $prevTagEnd : $sentenceStart; + $hasPriorCiteInParagraph = ($prevTagEnd > $paragraphStart); + // 同段后续引用:从上一 后开始;段内首个引用:从段落开头到本标签前(非仅最后一句) + if ($hasPriorCiteInParagraph) { + $localStart = $prevTagEnd; + } else { + $localStart = $this->capContextStartBeforeTag($content, $tagStart, $paragraphStart); + } // 默认:引用标签前的论述(如 Yin et al. [13] → 含 “higher than … Yin et al.”) $localEnd = $tagStart; @@ -894,6 +898,63 @@ class ReferenceCheckService return true; } + /** + * 段落起始(HTML / 换行),避免英文多句段落只取到最后一个句号后的一句 + */ + private function findParagraphStart($content, $tagStart) + { + $search = substr($content, 0, max(0, $tagStart)); + if ($search === '') { + return 0; + } + + $best = 0; + + if (preg_match_all('/]*>/i', $search, $m, PREG_OFFSET_CAPTURE)) { + $last = end($m[0]); + $best = max($best, $last[1] + strlen($last[0])); + } + if (preg_match_all('/<\/p>\s*/i', $search, $m, PREG_OFFSET_CAPTURE)) { + $last = end($m[0]); + $best = max($best, $last[1] + strlen($last[0])); + } + if (preg_match_all('/\s*/i', $search, $m, PREG_OFFSET_CAPTURE)) { + $last = end($m[0]); + $best = max($best, $last[1] + strlen($last[0])); + } + + $pos = strrpos($search, "\n\n"); + if ($pos !== false) { + $best = max($best, $pos + 2); + } + $pos = strrpos($search, "\n"); + if ($pos !== false) { + $best = max($best, $pos + 1); + } + + return $best; + } + + /** + * 段落过长时从引用处向前截取上限,避免单次 LLM 上下文过大 + */ + private function capContextStartBeforeTag($content, $tagStart, $paragraphStart, $maxBytes = 2500) + { + if ($tagStart - $paragraphStart <= $maxBytes) { + return $paragraphStart; + } + + $start = $tagStart - $maxBytes; + $slice = substr($content, $start, $tagStart - $start); + if (preg_match('/[.!?。!?]\s+/u', $slice, $m, PREG_OFFSET_CAPTURE)) { + $last = end($m[0]); + $rel = $last[1] + strlen($last[0]); + return $start + $rel; + } + + return max($paragraphStart, $start); + } + private function findSentenceStart($content, $position) { $start = 0; From 3663dd4ea69be8f4010bdcc2838d563f6d23a092 Mon Sep 17 00:00:00 2001 From: wyn <1074145239@qq.com> Date: Thu, 21 May 2026 14:37:04 +0800 Subject: [PATCH 06/12] Changes --- application/common/ReferenceCheckService.php | 25 +++++++++++++++----- application/common/service/LLMService.php | 23 ++++++++++++++---- 2 files changed, 38 insertions(+), 10 deletions(-) diff --git a/application/common/ReferenceCheckService.php b/application/common/ReferenceCheckService.php index be13d089..65ee9abc 100644 --- a/application/common/ReferenceCheckService.php +++ b/application/common/ReferenceCheckService.php @@ -193,7 +193,7 @@ class ReferenceCheckService 'created_at' => $now, 'updated_at' => $now, ]); - +// continue; $this->pushJob(intval($checkId), $delay); $checkIds[] = $checkId; $queued++; @@ -697,20 +697,32 @@ class ReferenceCheckService } $hasPriorCiteInParagraph = ($prevTagEnd > $paragraphStart); - // 同段后续引用:从上一 后开始;段内首个引用:从段落开头到本标签前(非仅最后一句) + // 同段后续引用:从上一 后开始;段内首个引用:从段落开头到本标签前 if ($hasPriorCiteInParagraph) { $localStart = $prevTagEnd; } else { - $localStart = $this->capContextStartBeforeTag($content, $tagStart, $paragraphStart); + $sentenceStart = $this->findSentenceStart($content, $tagStart); + $localStart = $this->capContextStartBeforeTag( + $content, + $tagStart, + max($paragraphStart, $sentenceStart) + ); } - // 默认:引用标签前的论述(如 Yin et al. [13] → 含 “higher than … Yin et al.”) + // 默认:引用标签前的论述 $localEnd = $tagStart; $originalText = $this->buildCitationContextText($content, $localStart, $localEnd); - // 标签前几乎无正文(如句末 … ICU nurses [14])→ 改用标签后至下一引用或句末 + // 同句多引(如 …[23] and external environment [24]):上一标签后仅几个词,回退到本句开头 + if ($hasPriorCiteInParagraph && mb_strlen($originalText) < 50) { + $sentenceStart = $this->findSentenceStart($content, $tagStart); + $localStart = max($paragraphStart, $sentenceStart); + $originalText = $this->buildCitationContextText($content, $localStart, $localEnd); + } + + // 仅段内首个引用且标签前极短时才用标签后文(避免 [24] 误截到 [25] 所在句) if (!$this->isMeaningfulCitationContext($originalText) - || $this->shouldUseTrailingCitationContext($content, $localStart, $tagStart, $tagEnd) + || (!$hasPriorCiteInParagraph && $this->shouldUseTrailingCitationContext($content, $localStart, $tagStart, $tagEnd)) ) { $trailEnd = ($nextTagStart < $sentenceEnd) ? $nextTagStart : $sentenceEnd; $trailText = $this->buildCitationContextText($content, $tagEnd, $trailEnd); @@ -811,6 +823,7 @@ class ReferenceCheckService $text = trim(strip_tags($text)); $text = preg_replace('/\s+/u', ' ', $text); $text = ltrim($text, "\xEF\xBB\xBF"); + $text = preg_replace('/^[\s.,。;、:!?]+/u', '', $text); return $text; } diff --git a/application/common/service/LLMService.php b/application/common/service/LLMService.php index ce66056c..4ffe0cda 100644 --- a/application/common/service/LLMService.php +++ b/application/common/service/LLMService.php @@ -709,13 +709,27 @@ PROMPT; } /** - * 将模型输出的 confidence 吸附到固定档位,并与 is_match 规则对齐 + * 与 buildReferenceCheckSystemPrompt3 一致的 confidence 档位 + */ + private function getReferenceCheckConfidenceBands($isMatch) + { + return $isMatch + ? [0.65, 0.78, 0.85, 0.92, 0.98] + : [0.15, 0.25, 0.35, 0.45]; + } + + /** + * 将模型输出的 confidence 吸附到合法档位(如 0.95 → 0.92,0.75 → 0.78) */ private function snapReferenceCheckConfidence($confidence, $isMatch) { - $matchBands = [0.75, 0.85, 0.95]; - $mismatchBands = [0.15, 0.25, 0.35]; - $bands = $isMatch ? $matchBands : $mismatchBands; + $bands = $this->getReferenceCheckConfidenceBands($isMatch); + + foreach ($bands as $band) { + if (abs($confidence - $band) < 0.001) { + return $band; + } + } $nearest = $bands[0]; $minDiff = abs($confidence - $nearest); @@ -726,6 +740,7 @@ PROMPT; $nearest = $band; } } + return $nearest; } From 8cd033a56da243a6e7d1563705f498c38a486d39 Mon Sep 17 00:00:00 2001 From: wyn <1074145239@qq.com> Date: Thu, 21 May 2026 15:19:07 +0800 Subject: [PATCH 07/12] =?UTF-8?q?Changes=20=E5=8E=9F=E6=96=87=E5=86=85?= =?UTF-8?q?=E5=AE=B9=E6=88=AA=E5=8F=96=E7=9A=84=E5=B7=B2=E7=BB=8F=E5=BE=88?= =?UTF-8?q?=E5=A5=BD=E4=BA=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- application/common/ReferenceCheckService.php | 65 +++++++++++++------- 1 file changed, 42 insertions(+), 23 deletions(-) diff --git a/application/common/ReferenceCheckService.php b/application/common/ReferenceCheckService.php index 65ee9abc..d56c0af0 100644 --- a/application/common/ReferenceCheckService.php +++ b/application/common/ReferenceCheckService.php @@ -193,7 +193,7 @@ class ReferenceCheckService 'created_at' => $now, 'updated_at' => $now, ]); -// continue; + continue; $this->pushJob(intval($checkId), $delay); $checkIds[] = $checkId; $queued++; @@ -677,6 +677,9 @@ class ReferenceCheckService return $result; } + /** 与上一引用间距低于此值(字符)时视为同句并列,从整句开头截取而非仅取两标签之间 */ + const CITE_GAP_SENTENCE_THRESHOLD = 60; + /** * 按引用位置截取局部上下文:优先取标签前叙述;同句多引时后续引用从上一标签后开始。 */ @@ -697,32 +700,37 @@ class ReferenceCheckService } $hasPriorCiteInParagraph = ($prevTagEnd > $paragraphStart); - // 同段后续引用:从上一 后开始;段内首个引用:从段落开头到本标签前 if ($hasPriorCiteInParagraph) { - $localStart = $prevTagEnd; + $gapText = $this->buildCitationContextText($content, $prevTagEnd, $tagStart); + // 如 motivation [23] and external environment [24]:间距短,取整句而非仅 “and external environment” + if (mb_strlen($gapText) < self::CITE_GAP_SENTENCE_THRESHOLD) { + $sentenceStart = $this->findSentenceStart($content, $tagStart); + $localStart = $this->capContextStartBeforeTag( + $content, + $tagStart, + max($paragraphStart, $sentenceStart) + ); + } else { + // 如 … Yin et al. [13] on oncology nurses, but … Yang [14]:间距较长,取上一标签后至本标签前 + $localStart = $prevTagEnd; + } } else { - $sentenceStart = $this->findSentenceStart($content, $tagStart); - $localStart = $this->capContextStartBeforeTag( - $content, - $tagStart, - max($paragraphStart, $sentenceStart) - ); + $localStart = $this->capContextStartBeforeTag($content, $tagStart, $paragraphStart); } - // 默认:引用标签前的论述 + // 默认:引用标签前的论述(如 Yin et al. [13] → 含 “higher than … Yin et al.”) $localEnd = $tagStart; $originalText = $this->buildCitationContextText($content, $localStart, $localEnd); - // 同句多引(如 …[23] and external environment [24]):上一标签后仅几个词,回退到本句开头 - if ($hasPriorCiteInParagraph && mb_strlen($originalText) < 50) { - $sentenceStart = $this->findSentenceStart($content, $tagStart); - $localStart = max($paragraphStart, $sentenceStart); - $originalText = $this->buildCitationContextText($content, $localStart, $localEnd); - } - - // 仅段内首个引用且标签前极短时才用标签后文(避免 [24] 误截到 [25] 所在句) + // 标签前几乎无正文(如句末 … ICU nurses [14])→ 改用标签后至下一引用或句末 if (!$this->isMeaningfulCitationContext($originalText) - || (!$hasPriorCiteInParagraph && $this->shouldUseTrailingCitationContext($content, $localStart, $tagStart, $tagEnd)) + || $this->shouldUseTrailingCitationContext( + $content, + $localStart, + $tagStart, + $tagEnd, + $hasPriorCiteInParagraph + ) ) { $trailEnd = ($nextTagStart < $sentenceEnd) ? $nextTagStart : $sentenceEnd; $trailText = $this->buildCitationContextText($content, $tagEnd, $trailEnd); @@ -749,12 +757,23 @@ class ReferenceCheckService /** * 标签前仅有作者缩写等极短片段时,改用标签后上下文 + * + * @param bool $hasPriorCiteInParagraph 同段多引时,短片段常为并列成分,不应误取标签后下一句 */ - private function shouldUseTrailingCitationContext($content, $localStart, $tagStart, $tagEnd) - { + private function shouldUseTrailingCitationContext( + $content, + $localStart, + $tagStart, + $tagEnd, + $hasPriorCiteInParagraph = false + ) { $before = $this->buildCitationContextText($content, $localStart, $tagStart); if (!$this->isMeaningfulCitationContext($before)) { - return true; + return !$hasPriorCiteInParagraph; + } + + if ($hasPriorCiteInParagraph) { + return false; } return mb_strlen($before) < 25; @@ -823,7 +842,7 @@ class ReferenceCheckService $text = trim(strip_tags($text)); $text = preg_replace('/\s+/u', ' ', $text); $text = ltrim($text, "\xEF\xBB\xBF"); - $text = preg_replace('/^[\s.,。;、:!?]+/u', '', $text); + $text = preg_replace('/^[\s.!?。!?,,、;:]+/u', '', $text); return $text; } From d9c32430538004e44eea249ad72491f9376e8224 Mon Sep 17 00:00:00 2001 From: wyn <1074145239@qq.com> Date: Thu, 21 May 2026 16:24:34 +0800 Subject: [PATCH 08/12] Changes --- application/api/controller/Article.php | 4 ++++ application/api/job/ReferenceCheck.php | 9 +++++++-- application/common/ReferenceCheckService.php | 3 +++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/application/api/controller/Article.php b/application/api/controller/Article.php index e2875d01..b217e4c1 100644 --- a/application/api/controller/Article.php +++ b/application/api/controller/Article.php @@ -6640,6 +6640,10 @@ class Article extends Base return jsonError($e->getMessage()); } } + public function checkOne(){ + $svc = new ReferenceCheckService(); + $svc->checkOne(); + } public function referenceCheckEnqueueArticleMain(){ $amId = 127448; $svc = new ReferenceCheckService(); diff --git a/application/api/job/ReferenceCheck.php b/application/api/job/ReferenceCheck.php index 5058bdc1..0c15c4f5 100644 --- a/application/api/job/ReferenceCheck.php +++ b/application/api/job/ReferenceCheck.php @@ -82,10 +82,15 @@ class ReferenceCheck $llm = new LLMService(); $llmResult = $llm->checkReference($contentA, $contentB); + $isMatch = !empty($llmResult['is_match']); + $confidence = $llm->enforceReferenceCheckConfidence( + isset($llmResult['confidence']) ? $llmResult['confidence'] : 0, + $isMatch + ); Db::name('article_reference_check_result')->where('id', $checkId)->update([ - 'is_match' => !empty($llmResult['is_match']) ? 1 : 0, - 'confidence' => $llmResult['confidence'], + 'is_match' => $isMatch ? 1 : 0, + 'confidence' => $confidence, 'reason' => $llmResult['reason'], 'status' => 1, 'error_msg' => '', diff --git a/application/common/ReferenceCheckService.php b/application/common/ReferenceCheckService.php index d56c0af0..f1903ca4 100644 --- a/application/common/ReferenceCheckService.php +++ b/application/common/ReferenceCheckService.php @@ -131,6 +131,9 @@ class ReferenceCheckService $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING); } + public function checkOne(){ + $this->pushJob(intval(722), 0); + } public function enqueueByArticle($articleId){ if ($articleId <= 0) { throw new \InvalidArgumentException('article_id is required'); From f118a799c22c18c49e4779b438832285631a091a Mon Sep 17 00:00:00 2001 From: wyn <1074145239@qq.com> Date: Thu, 21 May 2026 16:28:28 +0800 Subject: [PATCH 09/12] =?UTF-8?q?=E6=AD=A4=E8=8A=82=E7=82=B9=E4=B9=8B?= =?UTF-8?q?=E5=90=8E=E6=94=B9=E6=88=90=E4=B8=8D=E6=8B=86=E5=88=86=E5=8E=9F?= =?UTF-8?q?=E6=96=87=E5=86=85=E5=AE=B9=EF=BC=8C=E7=9B=B4=E6=8E=A5=E7=94=A8?= =?UTF-8?q?=E5=8F=82=E8=80=83=E6=96=87=E7=8C=AE=E5=92=8C=E6=95=B4=E6=AE=B5?= =?UTF-8?q?=E8=BF=9B=E8=A1=8C=E5=AF=B9=E6=AF=94?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- application/api/job/ReferenceCheck.php | 1 + 1 file changed, 1 insertion(+) diff --git a/application/api/job/ReferenceCheck.php b/application/api/job/ReferenceCheck.php index 0c15c4f5..1078b8ca 100644 --- a/application/api/job/ReferenceCheck.php +++ b/application/api/job/ReferenceCheck.php @@ -88,6 +88,7 @@ class ReferenceCheck $isMatch ); + Db::name('article_reference_check_result')->where('id', $checkId)->update([ 'is_match' => $isMatch ? 1 : 0, 'confidence' => $confidence, From 44f3383887df5187ad422dfc6a44dc6e8eab410b Mon Sep 17 00:00:00 2001 From: wyn <1074145239@qq.com> Date: Thu, 21 May 2026 17:28:36 +0800 Subject: [PATCH 10/12] Changes --- application/api/job/ReferenceCheck.php | 15 +++-- application/common/ReferenceCheckService.php | 61 +++++--------------- application/common/service/LLMService.php | 21 ++++--- 3 files changed, 37 insertions(+), 60 deletions(-) diff --git a/application/api/job/ReferenceCheck.php b/application/api/job/ReferenceCheck.php index 1078b8ca..704d692d 100644 --- a/application/api/job/ReferenceCheck.php +++ b/application/api/job/ReferenceCheck.php @@ -61,7 +61,11 @@ class ReferenceCheck } try { - $contentA = trim((string)(isset($row['origin_text']) ? $row['origin_text'] : '')); + $mainInfo = Db::name('article_main')->where('am_id', $row['am_id'])->find(); + $contentA = trim($mainInfo['content']);//trim((string)(isset($row['origin_text']) ? $row['origin_text'] : '')); + if ($contentA === '' && !empty($row['content_a'])) { + $contentA = trim((string)$row['content_a']); + } $contentB = trim((string)(isset($row['refer_text']) ? $row['refer_text'] : '')); if ($contentB === '' && intval($row['p_refer_id']) > 0) { @@ -83,15 +87,10 @@ class ReferenceCheck $llm = new LLMService(); $llmResult = $llm->checkReference($contentA, $contentB); $isMatch = !empty($llmResult['is_match']); - $confidence = $llm->enforceReferenceCheckConfidence( - isset($llmResult['confidence']) ? $llmResult['confidence'] : 0, - $isMatch - ); - Db::name('article_reference_check_result')->where('id', $checkId)->update([ 'is_match' => $isMatch ? 1 : 0, - 'confidence' => $confidence, + 'confidence' => $llmResult['confidence'], 'reason' => $llmResult['reason'], 'status' => 1, 'error_msg' => '', @@ -106,7 +105,7 @@ class ReferenceCheck $job->delete(); $this->oQueueJob->log("任务执行成功 | 日志ID: {$sRedisKey}"); } catch (\Exception $e) { - var_dump($e->getMessage()); + $this->oQueueJob->log('ReferenceCheck error: ' . $e->getMessage()); if ($job->attempts() >= 3) { $this->markFailed($checkId, $e->getMessage()); $job->delete(); diff --git a/application/common/ReferenceCheckService.php b/application/common/ReferenceCheckService.php index f1903ca4..9aab409e 100644 --- a/application/common/ReferenceCheckService.php +++ b/application/common/ReferenceCheckService.php @@ -132,7 +132,7 @@ class ReferenceCheckService $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING); } public function checkOne(){ - $this->pushJob(intval(722), 0); + $this->pushJob(intval(724), 0); } public function enqueueByArticle($articleId){ if ($articleId <= 0) { @@ -196,7 +196,7 @@ class ReferenceCheckService 'created_at' => $now, 'updated_at' => $now, ]); - continue; + $this->pushJob(intval($checkId), $delay); $checkIds[] = $checkId; $queued++; @@ -205,7 +205,6 @@ class ReferenceCheckService } } } - foreach (array_keys($amIdsWithJobs) as $amId) { $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING); } @@ -680,9 +679,6 @@ class ReferenceCheckService return $result; } - /** 与上一引用间距低于此值(字符)时视为同句并列,从整句开头截取而非仅取两标签之间 */ - const CITE_GAP_SENTENCE_THRESHOLD = 60; - /** * 按引用位置截取局部上下文:优先取标签前叙述;同句多引时后续引用从上一标签后开始。 */ @@ -703,38 +699,25 @@ class ReferenceCheckService } $hasPriorCiteInParagraph = ($prevTagEnd > $paragraphStart); + $sentenceStart = $this->findSentenceStart($content, $tagStart); + + // 段内首个引用:整段到标签前;后续引用:取「本句」起点(可早于上一标签),避免只剩 “and external environment” 再误用标签后文本 if ($hasPriorCiteInParagraph) { - $gapText = $this->buildCitationContextText($content, $prevTagEnd, $tagStart); - // 如 motivation [23] and external environment [24]:间距短,取整句而非仅 “and external environment” - if (mb_strlen($gapText) < self::CITE_GAP_SENTENCE_THRESHOLD) { - $sentenceStart = $this->findSentenceStart($content, $tagStart); - $localStart = $this->capContextStartBeforeTag( - $content, - $tagStart, - max($paragraphStart, $sentenceStart) - ); - } else { - // 如 … Yin et al. [13] on oncology nurses, but … Yang [14]:间距较长,取上一标签后至本标签前 - $localStart = $prevTagEnd; - } + $localStart = max($paragraphStart, $sentenceStart); } else { $localStart = $this->capContextStartBeforeTag($content, $tagStart, $paragraphStart); } - // 默认:引用标签前的论述(如 Yin et al. [13] → 含 “higher than … Yin et al.”) + // 默认:引用标签前的论述 $localEnd = $tagStart; $originalText = $this->buildCitationContextText($content, $localStart, $localEnd); - // 标签前几乎无正文(如句末 … ICU nurses [14])→ 改用标签后至下一引用或句末 - if (!$this->isMeaningfulCitationContext($originalText) - || $this->shouldUseTrailingCitationContext( - $content, - $localStart, - $tagStart, - $tagEnd, - $hasPriorCiteInParagraph - ) - ) { + // 仅段内首个引用、且标签前极短(如句末 ICU nurses [14])时,才改用标签后片段;同段多引禁止标签后截取(会错取下一句) + $allowTrailing = !$hasPriorCiteInParagraph; + if ($allowTrailing && ( + !$this->isMeaningfulCitationContext($originalText) + || $this->shouldUseTrailingCitationContext($content, $localStart, $tagStart, $tagEnd) + )) { $trailEnd = ($nextTagStart < $sentenceEnd) ? $nextTagStart : $sentenceEnd; $trailText = $this->buildCitationContextText($content, $tagEnd, $trailEnd); if ($this->isMeaningfulCitationContext($trailText)) { @@ -760,23 +743,12 @@ class ReferenceCheckService /** * 标签前仅有作者缩写等极短片段时,改用标签后上下文 - * - * @param bool $hasPriorCiteInParagraph 同段多引时,短片段常为并列成分,不应误取标签后下一句 */ - private function shouldUseTrailingCitationContext( - $content, - $localStart, - $tagStart, - $tagEnd, - $hasPriorCiteInParagraph = false - ) { + private function shouldUseTrailingCitationContext($content, $localStart, $tagStart, $tagEnd) + { $before = $this->buildCitationContextText($content, $localStart, $tagStart); if (!$this->isMeaningfulCitationContext($before)) { - return !$hasPriorCiteInParagraph; - } - - if ($hasPriorCiteInParagraph) { - return false; + return true; } return mb_strlen($before) < 25; @@ -845,7 +817,6 @@ class ReferenceCheckService $text = trim(strip_tags($text)); $text = preg_replace('/\s+/u', ' ', $text); $text = ltrim($text, "\xEF\xBB\xBF"); - $text = preg_replace('/^[\s.!?。!?,,、;:]+/u', '', $text); return $text; } diff --git a/application/common/service/LLMService.php b/application/common/service/LLMService.php index 4ffe0cda..d8734596 100644 --- a/application/common/service/LLMService.php +++ b/application/common/service/LLMService.php @@ -20,7 +20,8 @@ class LLMService $this->url = trim((string)Env::get('promotion.promotion_llm_url', '')); $this->model = trim((string)Env::get('promotion.promotion_llm_model', '')); $this->apiKey = trim((string)Env::get('promotion.promotion_llm_api_key', '')); - $this->timeout = max(30, intval(Env::get('promotion.promotion_llm_timeout', 120))); + // 引用校对 system 提示词较长,请求常超过 30s,至少 120s + $this->timeout = max(120, intval(Env::get('promotion.promotion_llm_timeout', 120))); } /** @@ -34,9 +35,8 @@ class LLMService 'confidence' => 0.0, 'reason' => 'LLM not configured or request failed', ]; - \think\Log::info('llmUrl:'.$this->url); - var_dump("in URL====".$this->url); if ($this->url === '' || $this->model === '') { + \think\Log::warning('ReferenceCheck LLM: url or model not configured'); return $fallback; } @@ -73,11 +73,13 @@ class LLMService $content = $this->postChat($payload); if ($content === null) { + \think\Log::warning('ReferenceCheck LLM: postChat returned null'); return $fallback; } $parsed = $this->parseJson($content); if ($parsed === null) { + \think\Log::warning('ReferenceCheck LLM: parseJson failed, raw=' . mb_substr($content, 0, 500)); return $fallback; } @@ -86,7 +88,11 @@ class LLMService $this->normalizeConfidence(isset($parsed['confidence']) ? $parsed['confidence'] : 0), $isMatch ); - + \think\Log::info("confidence:".$confidence,[ + 'is_match' => $isMatch, + 'confidence' => $confidence, + 'reason' => $this->cleanReason((string)(isset($parsed['reason']) ? $parsed['reason'] : '')), + ]); return [ 'is_match' => $isMatch, 'confidence' => $confidence, @@ -763,13 +769,14 @@ PROMPT; $raw = curl_exec($ch); if ($raw === false) { + \think\Log::warning('ReferenceCheck LLM curl error: ' . curl_error($ch)); curl_close($ch); return null; } $httpCode = intval(curl_getinfo($ch, CURLINFO_HTTP_CODE)); - \think\Log::info('httpCode:'.$httpCode); curl_close($ch); if ($httpCode < 200 || $httpCode >= 300) { + \think\Log::warning('ReferenceCheck LLM http ' . $httpCode . ': ' . mb_substr((string)$raw, 0, 500)); return null; } @@ -783,8 +790,8 @@ PROMPT; if (isset($data['content'])) { return (string)$data['content']; } - }catch (Exception $exception){ - var_dump($exception->getMessage()); + } catch (Exception $exception) { + \think\Log::warning('ReferenceCheck LLM exception: ' . $exception->getMessage()); } return null; From 68cf1867d896ff7de9de1dce79720918939ea3f7 Mon Sep 17 00:00:00 2001 From: wyn <1074145239@qq.com> Date: Fri, 22 May 2026 16:58:07 +0800 Subject: [PATCH 11/12] =?UTF-8?q?=E5=B7=B2=E7=BB=8F=E5=AE=8C=E6=88=90?= =?UTF-8?q?=E4=B8=80=E4=B8=AA=E6=96=87=E7=AB=A0=E6=A0=A1=E5=AF=B9=E4=BA=86?= =?UTF-8?q?=EF=BC=8C=E4=BD=86=E6=8D=A2=E4=B8=AA=E6=96=87=E7=AB=A0id?= =?UTF-8?q?=E5=B0=B1=E6=8A=A5=E9=94=99=E4=BA=86=EF=BC=8C=E6=8E=92=E6=9F=A5?= =?UTF-8?q?=E5=89=8D=E5=A4=87=E4=BB=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- application/api/controller/Article.php | 8 +- application/api/job/ReferenceCheck.php | 56 +- application/api/job/ReferenceCheckTwo.php | 150 ++ application/common/ReferenceCheckService.php | 570 ++++++- application/common/service/LLMService.php | 1587 +++++++++++------- 5 files changed, 1755 insertions(+), 616 deletions(-) create mode 100644 application/api/job/ReferenceCheckTwo.php diff --git a/application/api/controller/Article.php b/application/api/controller/Article.php index b217e4c1..456fe59c 100644 --- a/application/api/controller/Article.php +++ b/application/api/controller/Article.php @@ -6641,8 +6641,9 @@ class Article extends Base } } public function checkOne(){ + $articleId = intval($this->request->param('article_id', 7414)); $svc = new ReferenceCheckService(); - $svc->checkOne(); + return jsonSuccess($svc->enqueueSecondPassByArticle($articleId)); } public function referenceCheckEnqueueArticleMain(){ $amId = 127448; @@ -6792,7 +6793,7 @@ class Article extends Base $citeStart = intval(isset($row['cite_tag_start']) ? $row['cite_tag_start'] : 0); $rowStatus = intval($row['status']); return array( - 'check_id' => intval($row['check_id']), + 'check_id' => intval(isset($row['id']) ? $row['id'] : (isset($row['check_id']) ? $row['check_id'] : 0)), 'article_id' => intval(isset($row['article_id']) ? $row['article_id'] : 0), 'am_id' => $amId, 'cite_group_key' => $amId . '_' . $citeStart, @@ -6806,7 +6807,8 @@ class Article extends Base 'text_end' => intval(isset($row['text_end']) ? $row['text_end'] : 0), 'status' => isset($statusMap[$rowStatus]) ? $statusMap[$rowStatus] : 'unknown', 'is_match' => intval($row['is_match']), - 'is_reasonable' => intval($row['is_match']) === 1, + 'can_support' => intval(isset($row['can_support']) ? $row['can_support'] : $row['is_match']), + 'is_reasonable' => intval(isset($row['can_support']) ? $row['can_support'] : $row['is_match']) === 1, 'confidence' => floatval($row['confidence']), 'reason' => isset($row['reason']) ? $row['reason'] : '', 'error_msg' => isset($row['error_msg']) ? $row['error_msg'] : '', diff --git a/application/api/job/ReferenceCheck.php b/application/api/job/ReferenceCheck.php index 704d692d..3b15e6a1 100644 --- a/application/api/job/ReferenceCheck.php +++ b/application/api/job/ReferenceCheck.php @@ -36,6 +36,9 @@ class ReferenceCheck try { $checkId = intval(isset($data['check_id']) ? $data['check_id'] : 0); + if ($checkId <= 0 && !empty($jobData['data']['check_id'])) { + $checkId = intval($jobData['data']['check_id']); + } $sClassName = get_class($this); $sRedisKey = "queue_job:{$sClassName}:{$checkId}"; $sRedisValue = uniqid() . '_' . getmypid(); @@ -61,45 +64,47 @@ class ReferenceCheck } try { - $mainInfo = Db::name('article_main')->where('am_id', $row['am_id'])->find(); - $contentA = trim($mainInfo['content']);//trim((string)(isset($row['origin_text']) ? $row['origin_text'] : '')); - if ($contentA === '' && !empty($row['content_a'])) { - $contentA = trim((string)$row['content_a']); - } - $contentB = trim((string)(isset($row['refer_text']) ? $row['refer_text'] : '')); + $svc = new ReferenceCheckService(); - if ($contentB === '' && intval($row['p_refer_id']) > 0) { + $contentA = $svc->resolveMainContentForJob($row); + $contentB = trim((string)(isset($row['refer_text']) ? $row['refer_text'] : '')); + $refer = null; + + if (intval($row['p_refer_id']) > 0) { $refer = Db::name('production_article_refer') ->where('p_refer_id', intval($row['p_refer_id'])) - ->where('status', 0) + ->where('state', 0) ->find(); - if ($refer) { - $contentB = (new ReferenceCheckService())->formatReferForLlm($refer); + if ($refer && $contentB === '') { + $contentB = $svc->formatReferForLlm($refer); } } if ($contentA === '' || $contentB === '') { - $this->markFailed($checkId, 'Missing content_a or reference text'); + $this->markFailed($checkId, 'Missing article_main.content or refer_text'); $job->delete(); return; } $llm = new LLMService(); - $llmResult = $llm->checkReference($contentA, $contentB); - $isMatch = !empty($llmResult['is_match']); + $llmResult = $llm->checkReference($contentA, $contentB, false); + $canSupport = $svc->parseLlmCanSupport($llmResult); + $confidence = floatval($llmResult['confidence']); - Db::name('article_reference_check_result')->where('id', $checkId)->update([ - 'is_match' => $isMatch ? 1 : 0, - 'confidence' => $llmResult['confidence'], - 'reason' => $llmResult['reason'], + $svc->updateCheckResult($checkId, [ + 'can_support' => $canSupport ? 1 : 0, + 'is_match' => $canSupport ? 1 : 0, + 'confidence' => $confidence, + 'reason' => isset($llmResult['reason']) ? $llmResult['reason'] : '', 'status' => 1, 'error_msg' => '', - 'updated_at' => date('Y-m-d H:i:s'), ]); + $svc->maybeEnqueueSecondPass($checkId, $confidence); + $amId = intval(isset($row['am_id']) ? $row['am_id'] : 0); if ($amId > 0) { - (new ReferenceCheckService())->syncAmRefCheckStatus($amId); + $svc->syncAmRefCheckStatus($amId); } $this->QueueRedis->finishJob($sRedisKey, 'completed', $this->completedExprie, $sRedisValue); $job->delete(); @@ -127,11 +132,14 @@ class ReferenceCheck private function markFailed($checkId, $msg) { $row = Db::name('article_reference_check_result')->where('id', $checkId)->find(); - Db::name('article_reference_check_result')->where('id', $checkId)->update([ - 'status' => 2, - 'error_msg' => mb_substr($msg, 0, 500), - 'updated_at' => date('Y-m-d H:i:s'), - ]); + try { + (new ReferenceCheckService())->updateCheckResult($checkId, [ + 'status' => 2, + 'error_msg' => $msg, + ]); + } catch (\Exception $e) { + \think\Log::error('ReferenceCheck markFailed: ' . $e->getMessage()); + } $amId = empty($row) ? 0 : intval(isset($row['am_id']) ? $row['am_id'] : 0); if ($amId > 0) { (new ReferenceCheckService())->syncAmRefCheckStatus($amId); diff --git a/application/api/job/ReferenceCheckTwo.php b/application/api/job/ReferenceCheckTwo.php new file mode 100644 index 00000000..b28c9f6c --- /dev/null +++ b/application/api/job/ReferenceCheckTwo.php @@ -0,0 +1,150 @@ +oQueueJob = new QueueJob(); + $this->QueueRedis = QueueRedis::getInstance(); + } + + public function fire(Job $job, $data) + { + $this->oQueueJob->init($job); + + $rawBody = empty($job->getRawBody()) ? '' : $job->getRawBody(); + $jobData = empty($rawBody) ? [] : json_decode($rawBody, true); + $jobId = empty($jobData['id']) ? 'unknown' : $jobData['id']; + + $sRedisKey = ''; + $sRedisValue = ''; + + $this->oQueueJob->log("-----------队列任务开始-----------"); + $this->oQueueJob->log("当前任务ID: {$jobId}, 尝试次数: {$job->attempts()}"); + + try { + $checkId = intval(isset($data['check_id']) ? $data['check_id'] : 0); + if ($checkId <= 0 && !empty($jobData['data']['check_id'])) { + $checkId = intval($jobData['data']['check_id']); + } + $sClassName = get_class($this); + $sRedisKey = "queue_job_two:{$sClassName}:{$checkId}"; + $sRedisValue = uniqid() . '_' . getmypid(); + + if (!$this->oQueueJob->acquireLock($sRedisKey, $sRedisValue, $job)) { + return; + } + + if ($checkId <= 0) { + $job->delete(); + return; + } + + $row = Db::name('article_reference_check_result')->where('id', $checkId)->find(); + if (empty($row)) { + $job->delete(); + return; + } + +// if (intval($row['status']) === 1) { +// $job->delete(); +// return; +// } + + try { + $svc = new ReferenceCheckService(); + + $contentA = $svc->resolveMainContentForJob($row); + $referText = trim((string)(isset($row['refer_text']) ? $row['refer_text'] : '')); + $refer = null; + + if (intval($row['p_refer_id']) > 0) { + $refer = Db::name('production_article_refer') + ->where('p_refer_id', intval($row['p_refer_id'])) + ->where('state', 0) + ->find(); + } + + $payload = $svc->prepareRecheckPayload(is_array($refer) ? $refer : [], $referText); + $doiBlock = $payload['doi_block']; + + if ($contentA === '' || $referText === '') { + $this->markFailed($checkId, 'Missing article_main.content or refer_text'); + $job->delete(); + return; + } + $llm = new LLMService(); + $llmResult = $llm->checkReference($contentA, $referText, true, $doiBlock); + + $canSupport = $svc->parseLlmCanSupport($llmResult); + $tag = $payload['has_abstract'] + ? ('[Crossref复核' . ($payload['doi_used'] !== '' ? ' ' . $payload['doi_used'] : '') . ']') + : '[Crossref复核-无摘要]'; + $reason = $tag . ' ' . (isset($llmResult['reason']) ? $llmResult['reason'] : ''); + + $affected = $svc->updateCheckResult($checkId, [ + 'can_support' => $canSupport ? 1 : 0, + 'is_match' => $canSupport ? 1 : 0, + 'confidence' => floatval($llmResult['confidence']), + 'reason' => $reason, + 'status' => 1, + 'error_msg' => '', + ]); + $this->oQueueJob->log("Crossref复核写入 id={$checkId} affected={$affected} can_support=" . ($canSupport ? 1 : 0) . " confidence=" . floatval($llmResult['confidence'])); + + $amId = intval(isset($row['am_id']) ? $row['am_id'] : 0); + if ($amId > 0) { + $svc->syncAmRefCheckStatus($amId); + } + $this->QueueRedis->finishJob($sRedisKey, 'completed', $this->completedExprie, $sRedisValue); + $job->delete(); + $this->oQueueJob->log("任务执行成功 | 日志ID: {$sRedisKey}"); + } catch (\Exception $e) { + $this->oQueueJob->log('ReferenceCheckTwo error: ' . $e->getMessage()); + if ($job->attempts() >= 3) { + $this->markFailed($checkId, $e->getMessage()); + $job->delete(); + return; + } + $job->release(30); + } + } catch (\RuntimeException $e) { + $this->oQueueJob->handleRetryableException($e, $sRedisKey, $sRedisValue, $job); + } catch (\LogicException $e) { + $this->oQueueJob->handleNonRetryableException($e, $sRedisKey, $sRedisValue, $job); + } catch (\Exception $e) { + $this->oQueueJob->handleRetryableException($e, $sRedisKey, $sRedisValue, $job); + } finally { + $this->oQueueJob->finnal(); + } + } + + private function markFailed($checkId, $msg) + { + $row = Db::name('article_reference_check_result')->where('id', $checkId)->find(); + try { + (new ReferenceCheckService())->updateCheckResult($checkId, [ + 'status' => 2, + 'error_msg' => $msg, + ]); + } catch (\Exception $e) { + \think\Log::error('ReferenceCheckTwo markFailed: ' . $e->getMessage()); + } + $amId = empty($row) ? 0 : intval(isset($row['am_id']) ? $row['am_id'] : 0); + if ($amId > 0) { + (new ReferenceCheckService())->syncAmRefCheckStatus($amId); + } + } +} diff --git a/application/common/ReferenceCheckService.php b/application/common/ReferenceCheckService.php index 9aab409e..593f1548 100644 --- a/application/common/ReferenceCheckService.php +++ b/application/common/ReferenceCheckService.php @@ -3,6 +3,7 @@ namespace app\common; use think\Db; +use think\Env; use think\Queue; /** @@ -131,8 +132,39 @@ class ReferenceCheckService $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING); } - public function checkOne(){ - $this->pushJob(intval(724), 0); + /** + * 手工触发:对已完成且 confidence<=0.65 的记录入队 DOI 第二轮复核 + */ + public function enqueueSecondPassByArticle($articleId) + { + $articleId = intval($articleId); + if ($articleId <= 0) { + throw new \InvalidArgumentException('article_id is required'); + } + + $rows = Db::name('article_reference_check_result') + ->where('article_id', $articleId) + ->where('status', 1) + ->where('confidence', '<=', 0.65) + ->orderRaw('rand()') + ->limit(2) + ->select(); + + $checkIds2 = []; + $delay2 = 0; + foreach ($rows as $checkLog) { + $rowId = $this->resolveCheckRowId($checkLog); + if ($this->maybeEnqueueSecondPass($rowId, floatval($checkLog['confidence']))) { + $checkIds2[] = $rowId; + $delay2 += 1; + } + } + + return [ + 'article_id' => $articleId, + 'check_ids2' => $checkIds2, + 'queued' => count($checkIds2), + ]; } public function enqueueByArticle($articleId){ if ($articleId <= 0) { @@ -140,7 +172,7 @@ class ReferenceCheckService } $prod = Db::name('production_article') ->where('article_id', $articleId) - ->where('state', 0) + ->where('state', [0, 2]) ->find(); if (empty($prod)) { throw new \RuntimeException('production_article not found for article_id=' . $articleId); @@ -296,12 +328,78 @@ class ReferenceCheckService return isset($map[$status]) ? $map[$status] : 'unknown'; } + /** + * 表主键为 id(对外 API 参数名仍叫 check_id) + */ + public function resolveCheckRowId($row) + { + if (!is_array($row)) { + return 0; + } + if (isset($row['id']) && intval($row['id']) > 0) { + return intval($row['id']); + } + if (isset($row['check_id']) && intval($row['check_id']) > 0) { + return intval($row['check_id']); + } + return 0; + } + + /** + * 解析 LLM 返回的 is_match(兼容 bool / 0|1 / "true"|"false" 字符串) + */ + public function parseLlmIsMatch($value) + { + if (is_bool($value)) { + return $value; + } + if (is_int($value) || is_float($value)) { + return intval($value) === 1; + } + $s = strtolower(trim((string)$value)); + return in_array($s, ['1', 'true', 'yes', 'match', 'matched'], true); + } + + /** + * 写入单条校对结果(统一截断 reason/error_msg,避免 varchar(512) 导致 UPDATE 失败) + * + * @throws \RuntimeException + */ + public function updateCheckResult($checkId, array $fields) + { + $checkId = intval($checkId); + if ($checkId <= 0) { + throw new \InvalidArgumentException('invalid check id'); + } + + if (isset($fields['reason'])) { + $fields['reason'] = mb_substr(trim((string)$fields['reason']), 0, 512); + } + if (isset($fields['error_msg'])) { + $fields['error_msg'] = mb_substr(trim((string)$fields['error_msg']), 0, 512); + } + $fields['updated_at'] = date('Y-m-d H:i:s'); + + $exists = Db::name('article_reference_check_result')->where('id', $checkId)->find(); + if (empty($exists)) { + throw new \RuntimeException('article_reference_check_result not found, id=' . $checkId); + } + + $affected = Db::name('article_reference_check_result')->where('id', $checkId)->update($fields); + if ($affected === false) { + throw new \RuntimeException('article_reference_check_result update failed, id=' . $checkId); + } + + \think\Log::info('updateCheckResult id=' . $checkId . ' affected=' . intval($affected)); + return intval($affected); + } + public function getResult($checkId) { if ($checkId <= 0) { return null; } - $row = Db::name('article_reference_check_result')->where('check_id', $checkId)->find(); + $row = Db::name('article_reference_check_result')->where('id', $checkId)->find(); return $row ?: null; } @@ -435,7 +533,7 @@ class ReferenceCheckService 'ref_nos' => [], ]; } - $byAm[$amId]['contexts'][$ctxKey]['check_ids'][] = intval($row['check_id']); + $byAm[$amId]['contexts'][$ctxKey]['check_ids'][] = $this->resolveCheckRowId($row); $byAm[$amId]['contexts'][$ctxKey]['ref_nos'][] = $refNo; $reason = trim((string)$this->arrGet($row, 'reason', '')); if ($reason !== '') { @@ -501,7 +599,7 @@ class ReferenceCheckService $issueCount++; $issues[] = array( 'am_id' => $amId, - 'check_id' => intval($row['check_id']), + 'check_id' => $this->resolveCheckRowId($row), 'reference_no' => $num, 'reference_raw' => $inner, 'reason' => $rowReason, @@ -512,7 +610,7 @@ class ReferenceCheckService ENT_QUOTES, 'UTF-8' ); - return '' . $numMatch[0] . ''; }, @@ -627,6 +725,448 @@ class ReferenceCheckService return implode("\n", $parts); } + /** + * 仅使用 refer_doi 字段(二次 Crossref 摘要用) + */ + public function extractReferDoiOnly($refer) + { + if (!is_array($refer)) { + return ''; + } + $raw = trim((string)$this->arrGet($refer, 'refer_doi', '')); + if ($raw === '' || stripos($raw, 'not available') !== false) { + return ''; + } + $dois = $this->extractDoisFromString($raw); + return empty($dois) ? '' : $dois[0]; + } + + /** + * 根据 refer_doi 调用 Crossref works API 获取摘要(二次校对专用) + * + * @return array{text:string, has_abstract:bool, doi:string} + */ + public function fetchCrossrefAbstractByReferDoi($refer) + { + $doi = $this->extractReferDoiOnly($refer); + if ($doi === '') { + return ['text' => '', 'has_abstract' => false, 'doi' => '']; + } + + $crossref = new CrossrefService([ + 'mailto' => trim((string)Env::get('crossref_mailto', '')), + ]); + $block = $this->extractCrossrefBlock($doi, $crossref); + if ($block === null) { + return ['text' => '', 'has_abstract' => false, 'doi' => $doi]; + } + + return [ + 'text' => $block['text'], + 'has_abstract' => !empty($block['has_abstract']), + 'doi' => $doi, + ]; + } + + /** + * 解析 LLM 返回的 can_support + */ + public function parseLlmCanSupport($llmResult) + { + if (!is_array($llmResult)) { + return false; + } + if (array_key_exists('can_support', $llmResult)) { + return $this->parseLlmIsMatch($llmResult['can_support']); + } + return $this->parseLlmIsMatch(isset($llmResult['is_match']) ? $llmResult['is_match'] : false); + } + + /** + * 第一次校对:取 article_main.content(整节正文) + */ + public function resolveMainContentForJob(array $row, $maxChars = 8000) + { + $amId = intval($this->arrGet($row, 'am_id', 0)); + if ($amId <= 0) { + return ''; + } + $main = Db::name('article_main') + ->field('content') + ->where('am_id', $amId) + ->find(); + if (empty($main)) { + return ''; + } + + $text = trim((string)$this->arrGet($main, 'content', '')); + if ($text === '') { + return ''; + } + + $text = preg_replace('/\[([\d,\-\s]+)\]<\/blue>/', '[$1]', $text); + $text = strip_tags($text); + $text = html_entity_decode($text, ENT_QUOTES | ENT_HTML5, 'UTF-8'); + $text = preg_replace('/\s+/u', ' ', $text); + $text = trim($text); + + $maxChars = max(500, intval($maxChars)); + if (mb_strlen($text) > $maxChars) { + $text = mb_substr($text, 0, $maxChars) . '...'; + } + + return $text; + } + + /** + * 引用处局部上下文(origin_text),供其它场景使用 + */ + public function resolveCitationContextForJob(array $row) + { + $text = trim((string)$this->arrGet($row, 'origin_text', '')); + if ($text === '') { + $text = trim((string)$this->arrGet($row, 'content_a', '')); + } + return $text; + } + + /** + * 从 refer 行提取标准 DOI(10.xxxx/...) + * + * 优先级:refer_content(原始引用文本里的 DOI 最贴近实际被引用的文献) + * > refer_doi > doi > doilink + */ + public function extractDoiFromRefer($refer) + { + $list = $this->extractAllDoiCandidatesFromRefer($refer); + return empty($list) ? '' : $list[0]; + } + + /** + * 返回 refer 行可能对应的全部 DOI 候选(去重,按优先级排序) + * + * 用于第二轮 DOI 复核场景:当 metadata 的 refer_doi 与原始引用文本里的 DOI + * 不一致时(数据漂移),优先尝试原始引用文本里的 DOI 抓真实摘要。 + * + * @return string[] + */ + public function extractAllDoiCandidatesFromRefer($refer) + { + if (!is_array($refer)) { + return []; + } + $ordered = [ + (string)$this->arrGet($refer, 'refer_content', ''), + (string)$this->arrGet($refer, 'refer_doi', ''), + (string)$this->arrGet($refer, 'doi', ''), + (string)$this->arrGet($refer, 'doilink', ''), + ]; + + $result = []; + foreach ($ordered as $raw) { + foreach ($this->extractDoisFromString($raw) as $doi) { + if (!in_array($doi, $result, true)) { + $result[] = $doi; + } + } + } + return $result; + } + + /** + * 从任意文本里抽取所有形如 10.xxxx/yyy 的 DOI + * @return string[] + */ + private function extractDoisFromString($text) + { + $text = trim((string)$text); + if ($text === '' || stripos($text, 'not available') !== false) { + return []; + } + + $dois = []; + + if (preg_match_all('~doi\.org/([^\s?#"\'<>]+)~i', $text, $m)) { + foreach ($m[1] as $cand) { + $cand = $this->trimDoiTail(trim($cand)); + if ($this->isValidDoi($cand)) { + $dois[] = $cand; + } + } + } + + if (preg_match_all('~\b(10\.\d{3,9}/[^\s?#"\'<>]+)~i', $text, $m)) { + foreach ($m[1] as $cand) { + $cand = $this->trimDoiTail(trim($cand)); + if ($this->isValidDoi($cand)) { + $dois[] = $cand; + } + } + } + + if ($dois === [] && strpos($text, '10.') === 0) { + $cand = $this->trimDoiTail($text); + if ($this->isValidDoi($cand)) { + $dois[] = $cand; + } + } + + return array_values(array_unique($dois)); + } + + private function trimDoiTail($doi) + { + return rtrim($doi, ".,;:)]}>\"'\\ \t\n\r"); + } + + private function isValidDoi($doi) + { + return (bool)preg_match('~^10\.\d{3,9}/[^\s]+$~i', (string)$doi); + } + + /** + * 通过 PubMed / Crossref 拉取 DOI 对应文献内容(本地 LLM 无法打开网页,须预先抓取) + * + * 行为: + * - 尝试 refer 行内所有 DOI 候选(refer_content > refer_doi > doi > doilink) + * - 优先采用第一个能拿到 abstract 的 DOI + * - PubMed 无摘要时回落到 Crossref raw 解析摘要(清理 JATS 标签) + * - 全部失败则返回空字符串(调用方据此跳过二次复核) + */ + public function fetchDoiLiteratureBlock($refer) + { + $candidates = $this->extractAllDoiCandidatesFromRefer($refer); + if (empty($candidates)) { + return ''; + } + + $pubmed = new PubmedService([ + 'email' => trim((string)Env::get('pubmed_email', '')), + 'tool' => trim((string)Env::get('pubmed_tool', 'tmrjournals')), + ]); + $crossref = new CrossrefService([ + 'mailto' => trim((string)Env::get('crossref_mailto', '')), + ]); + + $best = null; + $fallback = null; + + foreach ($candidates as $doi) { + $block = $this->buildDoiBlockFromSources($doi, $pubmed, $crossref); + if ($block === null) { + continue; + } + if (!empty($block['has_abstract'])) { + $best = $block; + break; + } + if ($fallback === null) { + $fallback = $block; + } + } + + $chosen = $best ?: $fallback; + if ($chosen === null) { + return ''; + } + return $chosen['text']; + } + + /** + * 拉单个 DOI 的真实内容,返回 ['text' => string, 'has_abstract' => bool] 或 null + */ + private function buildDoiBlockFromSources($doi, PubmedService $pubmed, CrossrefService $crossref) + { + $doi = trim((string)$doi); + if ($doi === '') { + return null; + } + + $pub = $pubmed->fetchByDoi($doi); + $pubAbstract = is_array($pub) ? trim((string)$this->arrGet($pub, 'abstract', '')) : ''; + + if (is_array($pub) && ($pubAbstract !== '' || trim((string)$this->arrGet($pub, 'title', '')) !== '')) { + $lines = ['Source: PubMed (DOI ' . $doi . ')']; + if (!empty($pub['title'])) { + $lines[] = 'Actual Title: ' . trim((string)$pub['title']); + } + if (!empty($pub['journal'])) { + $lines[] = 'Journal: ' . trim((string)$pub['journal']); + } + if (!empty($pub['year'])) { + $lines[] = 'Year: ' . trim((string)$pub['year']); + } + if (!empty($pub['publication_types'])) { + $lines[] = 'Publication Types: ' . implode('; ', (array)$pub['publication_types']); + } + if (!empty($pub['mesh_terms'])) { + $lines[] = 'MeSH: ' . implode('; ', (array)$pub['mesh_terms']); + } + if ($pubAbstract !== '') { + $lines[] = 'Abstract: ' . $this->truncate($pubAbstract, 3500); + } + + if ($pubAbstract === '') { + $cr = $this->extractCrossrefBlock($doi, $crossref); + if ($cr !== null && $cr['has_abstract']) { + $lines[] = "\n--- Crossref 补充 ---\n" . $cr['text']; + return ['text' => implode("\n", $lines), 'has_abstract' => true]; + } + } + + return ['text' => implode("\n", $lines), 'has_abstract' => $pubAbstract !== '']; + } + + return $this->extractCrossrefBlock($doi, $crossref); + } + + /** + * 从 Crossref 拉取标题/期刊/作者/摘要(abstract 通常包裹 JATS XML,需清洗) + * @return array|null ['text' => string, 'has_abstract' => bool] + */ + private function extractCrossrefBlock($doi, CrossrefService $crossref) + { + $msg = $crossref->fetchWork($doi); + if (!is_array($msg)) { + return null; + } + + $summary = $crossref->fetchWorkSummary($doi); + if (!is_array($summary)) { + $summary = []; + } + + $lines = ['Source: Crossref api.crossref.org/works/' . rawurlencode($doi)]; + $title = isset($msg['title'][0]) ? trim((string)$msg['title'][0]) : trim((string)$this->arrGet($summary, 'title', '')); + if ($title !== '') { + $lines[] = 'Actual Title: ' . $title; + } + if (!empty($summary['joura'])) { + $lines[] = 'Journal: ' . trim((string)$summary['joura']); + } + if (!empty($summary['author_str'])) { + $lines[] = 'Authors: ' . trim((string)$summary['author_str']); + } + if (!empty($summary['dateno'])) { + $lines[] = 'Publication: ' . trim((string)$summary['dateno']); + } + if (!empty($summary['doilink'])) { + $lines[] = 'DOI Link: ' . trim((string)$summary['doilink']); + } + if (!empty($summary['is_retracted'])) { + $lines[] = 'Retraction: yes - ' . trim((string)$this->arrGet($summary, 'retract_reason', '')); + } + + $abstract = $this->cleanCrossrefAbstract((string)$this->arrGet($msg, 'abstract', '')); + $hasAbstract = $abstract !== ''; + if ($hasAbstract) { + $lines[] = 'Abstract: ' . $this->truncate($abstract, 3500); + } else { + $lines[] = 'Note: Crossref 未返回摘要,请结合标题/期刊/作者与正文谨慎判断。'; + } + + return ['text' => implode("\n", $lines), 'has_abstract' => $hasAbstract]; + } + + private function cleanCrossrefAbstract($raw) + { + $raw = trim((string)$raw); + if ($raw === '') { + return ''; + } + $raw = preg_replace('~]*>.*?~is', '', $raw); + $raw = preg_replace('~]*>~i', "\n", $raw); + $raw = preg_replace('~~i', '', $raw); + $raw = preg_replace('~]+>~i', '', $raw); + $raw = strip_tags($raw); + $raw = preg_replace('/[ \t]+/u', ' ', $raw); + $raw = preg_replace("/\r\n|\r/u", "\n", $raw); + $raw = preg_replace("/\n{2,}/u", "\n", $raw); + return trim($raw); + } + + private function truncate($text, $max) + { + $text = (string)$text; + if (mb_strlen($text) <= $max) { + return $text; + } + return mb_substr($text, 0, $max) . '...'; + } + + /** + * 第二次 DOI 复核数据准备:返回书目信息 + 真实抓取内容 + * + * @return array{refer_text:string, doi_block:string, has_abstract:bool, doi_used:string} + */ + public function prepareRecheckPayload($refer, $referText = '') + { + $base = trim($referText) !== '' ? trim($referText) : $this->formatReferForLlm($refer); + $cr = $this->fetchCrossrefAbstractByReferDoi($refer); + return [ + 'refer_text' => $base, + 'doi_block' => $cr['text'], + 'has_abstract' => $cr['has_abstract'], + 'doi_used' => $cr['doi'], + ]; + } + + /** + * 旧接口:拼接成单块文本(向后兼容,建议调用方改用 prepareRecheckPayload) + */ + public function formatReferForDoiRecheck($refer, $referText = '') + { + $payload = $this->prepareRecheckPayload($refer, $referText); + if ($payload['doi_block'] === '') { + return $payload['refer_text'] + . "\n\n【DOI 文献真实内容】\n未能从 PubMed/Crossref 获取该 DOI 的摘要或元数据,请依据书目条目与正文谨慎判断。"; + } + return $payload['refer_text'] + . "\n\n【Crossref 摘要(依据 Refer_doi 从 api.crossref.org/works 获取)】\n" + . $payload['doi_block']; + } + + /** + * 第一轮 confidence<=0.65 且能抓到 DOI 真实内容时,延迟入队第二轮复核 + * + * 跳过条件(避免无意义重跑得到相同结果): + * - check_id 不合法 / 一次置信度高于阈值 + * - refer 行不存在 + * - refer_doi 为空或 Crossref 未返回摘要 + */ + public function maybeEnqueueSecondPass($checkId, $confidence) + { + $checkId = intval($checkId); + $confidence = floatval($confidence); + if ($checkId <= 0 || $confidence > 0.65) { + return false; + } + + $row = Db::name('article_reference_check_result')->where('id', $checkId)->find(); + if (empty($row)) { + return false; + } + + $refer = null; + if (intval($row['p_refer_id']) > 0) { + $refer = Db::name('production_article_refer') + ->where('p_refer_id', intval($row['p_refer_id'])) + ->where('state', 0) + ->find(); + } + if (empty($refer) || $this->extractReferDoiOnly($refer) === '') { + return false; + } + + $cr = $this->fetchCrossrefAbstractByReferDoi($refer); + if (empty($cr['has_abstract'])) { + return false; + } + + $this->pushJob2($checkId, 5); + return true; + } + /** * 从 article_main.content 提取 blue 引用 */ @@ -1021,10 +1561,24 @@ class ReferenceCheckService } else { $jobId = Queue::push($jobClass, $data, self::QUEUE_NAME); } - var_dump("=====jobId:".$jobId); } catch (\Exception $e) { \think\Log::error('ReferenceCheck pushJob failed check_id=' . $checkId . ' ' . $e->getMessage()); throw $e; } } + private function pushJob2($checkId, $delaySeconds = 0) + { + $jobClass = 'app\api\job\ReferenceCheckTwo@fire'; + $data = ['check_id' => $checkId]; + try { + if ($delaySeconds > 0) { + $jobId = Queue::later($delaySeconds, $jobClass, $data, self::QUEUE_NAME); + } else { + $jobId = Queue::push($jobClass, $data, self::QUEUE_NAME); + } + } catch (\Exception $e) { + \think\Log::error('ReferenceCheckTwo pushJob failed check_id=' . $checkId . ' ' . $e->getMessage()); + throw $e; + } + } } diff --git a/application/common/service/LLMService.php b/application/common/service/LLMService.php index d8734596..01a755df 100644 --- a/application/common/service/LLMService.php +++ b/application/common/service/LLMService.php @@ -25,15 +25,18 @@ class LLMService } /** - * @param string $contextText 正文引用处句子 - * @param string $referText 参考文献条目(或 refer 格式化文本) + * @param string $contextText 正文引用处句子 + * @param string $referText 参考文献条目(或 refer 格式化文本) + * @param bool $isAgain 是否为 DOI 二次复核 + * @param string|null $doiBlock 可选:系统抓取到的 DOI 真实文献内容(仅二次复核使用) */ - public function checkReference($contextText, $referText) + public function checkReference($contextText, $referText, $isAgain = false, $doiBlock = null) { $fallback = [ - 'is_match' => false, - 'confidence' => 0.0, - 'reason' => 'LLM not configured or request failed', + 'can_support' => false, + 'is_match' => false, + 'confidence' => 0.0, + 'reason' => 'LLM not configured or request failed', ]; if ($this->url === '' || $this->model === '') { \think\Log::warning('ReferenceCheck LLM: url or model not configured'); @@ -42,26 +45,37 @@ class LLMService $contextText = trim($contextText); $referText = trim($referText); + $doiBlock = trim((string)$doiBlock); if ($contextText === '' || $referText === '') { return [ - 'is_match' => false, - 'confidence' => 0.0, - 'reason' => 'Empty citation context or reference text', + 'can_support' => false, + 'is_match' => false, + 'confidence' => 0.0, + 'reason' => 'Empty citation context or reference text', ]; } - if (mb_strlen($contextText) > 2000) { - $contextText = mb_substr($contextText, 0, 2000); + $maxContextLen = 8000; + if (mb_strlen($contextText) > $maxContextLen) { + $contextText = mb_substr($contextText, 0, $maxContextLen); } if (mb_strlen($referText) > 4000) { $referText = mb_substr($referText, 0, 4000); } + if (mb_strlen($doiBlock) > 4000) { + $doiBlock = mb_substr($doiBlock, 0, 4000); + } - $system = $this->buildReferenceCheckSystemPrompt3(); - \think\Log::info('system:' . $system); + if ($isAgain) { + $system = $this->buildReferenceCheckSecondPassPrompt(); + $user = $this->buildReferenceCheckSecondPassUserPrompt($contextText, $referText, $doiBlock); + } else { + $system = $this->buildReferenceCheckFirstPassPrompt(); + $user = $this->buildReferenceCheckFirstPassUserPrompt($contextText, $referText); + } - $user = $this->buildReferenceCheckUserPrompt($contextText, $referText); - \think\Log::info('user:' . $user); + \think\Log::info('ReferenceCheck system head: ' . mb_substr($system, 0, 200)); + \think\Log::info('ReferenceCheck user head: ' . mb_substr($user, 0, 600)); $payload = [ 'model' => $this->model, 'temperature' => 0, @@ -83,580 +97,131 @@ class LLMService return $fallback; } - $isMatch = !empty($parsed['is_match']); + $canSupport = $this->parseCanSupportFromParsed($parsed); $confidence = $this->snapReferenceCheckConfidence( $this->normalizeConfidence(isset($parsed['confidence']) ? $parsed['confidence'] : 0), - $isMatch + $canSupport + ); + $reason = $this->cleanReason((string)(isset($parsed['reason']) ? $parsed['reason'] : '')); + \think\Log::info( + 'ReferenceCheck result: can_support=' . ($canSupport ? '1' : '0') + . ', confidence=' . $confidence + . ', reason=' . $reason ); - \think\Log::info("confidence:".$confidence,[ - 'is_match' => $isMatch, - 'confidence' => $confidence, - 'reason' => $this->cleanReason((string)(isset($parsed['reason']) ? $parsed['reason'] : '')), - ]); return [ - 'is_match' => $isMatch, - 'confidence' => $confidence, - 'reason' => $this->cleanReason((string)(isset($parsed['reason']) ? $parsed['reason'] : '')), + 'can_support' => $canSupport, + 'is_match' => $canSupport, + 'confidence' => $confidence, + 'reason' => $reason, ]; } + + /** + * 解析 can_support;兼容 is_match 字段 + */ + private function parseCanSupportFromParsed(array $parsed) + { + if (array_key_exists('can_support', $parsed)) { + return $this->boolFromLlmValue($parsed['can_support']); + } + if (array_key_exists('is_match', $parsed)) { + return $this->boolFromLlmValue($parsed['is_match']); + } + return false; + } + + private function boolFromLlmValue($value) + { + if (is_bool($value)) { + return $value; + } + if (is_int($value) || is_float($value)) { + return intval($value) === 1; + } + $s = strtolower(trim((string)$value)); + return in_array($s, ['1', 'true', 'yes', 'support', 'supported'], true); + } + + /** 第一次校对:书目条目 vs 正文全文 */ + private function buildReferenceCheckFirstPassPrompt() + { + return <<<'PROMPT' +你是文献引用校对助手。判断【正文全文】与【参考文献书目】是否相关、能否用于支撑正文中的引用。 + +【核心原则:从宽判断,避免误杀】 +默认倾向 can_support=true。只要文献与正文不是「驴唇不对马嘴」,即判为相关、能支撑。 +不要求变量一致、不要求结论逐条对应、不要求研究设计相同。 + +【仅当以下情况才判 can_support=false(驴唇不对马嘴)】 +- 学科/主题完全无关(如正文讲深度学习聚类,文献是糖尿病步态检测)。 +- 明显张冠李戴(正文断言 A 疗法的效果,文献研究的是完全不同的 B 问题且无关联)。 +- 文献条目与正文讨论的对象/场景毫无交集,且无法作背景或理论引用。 + +【以下情况均应 can_support=true】 +- 同一大领域或相邻方向(如护理、心理、管理、医学、统计、AI 等相近子领域)。 +- 可作背景文献、综述性引用、理论或方法的一般性依据。 +- 表述略宽、略有概括、变量名不完全一致,但大方向说得通。 + +【confidence 固定档位(禁止其它小数)】 +can_support=true:0.65(有关联但较泛)/ 0.78 / 0.85 / 0.92 / 0.98(非常确定相关) +can_support=false:0.15(明确驴唇不对马嘴)/ 0.25 / 0.35 / 0.45(仅当实在无法建立任何合理关联) + +【输出】仅一行 minified JSON,无 markdown: +{"can_support":true|false,"is_match":true|false,"confidence":0.15|0.25|0.35|0.45|0.65|0.78|0.85|0.92|0.98,"reason":"30-80字简体中文"} +is_match 必须与 can_support 相同。 +PROMPT; + } + + private function buildReferenceCheckFirstPassUserPrompt($contextText, $referText) + { + return "【正文全文 article_main.content】\n" . $contextText + . "\n\n【参考文献书目 refer_text】\n" . $referText + . "\n\n请从宽判断:非驴唇不对马嘴即 can_support=true,只返回 JSON。"; + } + + /** 第二次校对:Crossref 摘要(Refer_doi) */ + private function buildReferenceCheckSecondPassPrompt() + { + return <<<'PROMPT' +你是文献引用二次校对助手。已根据 Refer_doi 从 Crossref(https://api.crossref.org/works/)获取摘要,请结合【正文全文】复核该文献是否相关。 + +【核心原则:与第一次相同,从宽判断】 +默认倾向 can_support=true。只要 Crossref 摘要(或书目)与正文不是驴唇不对马嘴,即判相关、能支撑。 +以【Crossref 摘要】为准;摘要与书目冲突时以摘要为准。 + +【仅当以下情况才判 can_support=false】 +- 摘要显示的研究主题/对象/方法与正文讨论内容完全风马牛不相及。 +- 典型驴唇不对马嘴、张冠李戴,且无法解释为背景或泛化引用。 + +【以下情况均应 can_support=true】 +- 摘要与正文属同领域或相近方向,能作背景、理论或方向性支撑。 +- 细节不完全一致,但不存在明显矛盾。 + +【无 Crossref 摘要时】 +结合 refer_text 从宽判断;非明显无关仍可 can_support=true,confidence 建议 0.65。 + +【confidence 固定档位(禁止其它小数)】 +can_support=true:0.65 / 0.78 / 0.85 / 0.92 / 0.98 +can_support=false:0.15 / 0.25 / 0.35 / 0.45 + +【输出】仅一行 minified JSON: +{"can_support":true|false,"is_match":true|false,"confidence":0.15|0.25|0.35|0.45|0.65|0.78|0.85|0.92|0.98,"reason":"30-80字简体中文"} +is_match 必须与 can_support 相同。 +PROMPT; + } + + private function buildReferenceCheckSecondPassUserPrompt($contextText, $referText, $doiBlock) + { + $doiBlock = trim((string)$doiBlock); + return "【正文全文 article_main.content】\n" . $contextText + . "\n\n【参考文献书目 refer_text】\n" . $referText + . "\n\n【Crossref 摘要】(Refer_doi → api.crossref.org/works/)\n" + . ($doiBlock !== '' ? $doiBlock : '(未获取到摘要,请结合 refer_text 从宽判断)') + . "\n\n非驴唇不对马嘴即 can_support=true,只返回 JSON。"; + } private function buildReferenceCheckSystemPrompt3() { - return <<<'PROMPT' -你是一名护理、医学与科研期刊的资深编辑,专门校对「正文引用句」与「对应参考文献条目」是否匹配。 - -你的职责是判断:作者在该引用位置引用的观点、数据、结论、方法、定义、理论或证据,是否能够被该条参考文献合理支撑。 - -你只能依据用户提供的两段文本判断,不得假设已阅读全文,不得联网,不得编造文献中未出现的信息。 - -【输入内容】 -你将收到: - -1. 正文引用句(引用位置附近的一句话或一段话) - -2. 当前对应的参考文献条目(仅当前编号,不是整篇参考文献列表) - -你必须严格只评估「当前这一条参考文献」与引用句的关系。 - -==================== -【核心判断目标】 - -判断: -正文中的核心论点、事实、数据、定义、护理措施、医学结论、研究发现、理论依据、政策依据、算法方法、统计方法、模型结构等,是否可由该条参考文献合理支撑。 - -你评估的是“引用是否成立”,不是“句子是否正确”。 - -==================== -【硬性约束(必须遵守)】 - -1. 只能依据用户提供的信息判断 -- 不得假设看过全文。 -- 不得联网。 -- 不得根据常识补全文献内容。 -- 不得根据作者、期刊名、热点方向脑补研究结果。 -- 不得把“可能研究了”视为“能够支撑”。 - -2. 严禁串号判断 -- 仅允许依据「当前引用句」与「当前参考文献条目」判断。 -- 严禁利用其它参考文献编号或上下文内容推断当前文献。 - -3. 不得关键词硬匹配 -禁止因为出现相同关键词就判匹配,例如: -“护理”“患者”“治疗”“效果”“心理”“机器学习”“深度学习”“模型”等。 - -必须重点判断: -- 对象是否一致 -- 疾病/场景是否一致 -- 人群是否一致 -- 干预方式是否一致 -- 方法学是否一致 -- 关键结论是否一致 - -4. 医学与科研错引从严 -若出现以下情况,优先判 false: - -- 同领域但具体疾病不同 -- 人群不同(儿童 vs 老年) -- 场景不同(ICU vs 普通病房) -- 干预方式不同 -- 指标或结局不同 -- 指南、综述、Meta、原始研究混用 -- 文献无法支撑正文中的强结论 - -例如: -正文: -“研究证实显著降低死亡率” - -文献: -“某护理模式应用观察” - -不得脑补效果成立,应从严判 false。 - -5. 特定证据类型必须一致 -若正文明确声明: - -- “随机对照研究显示” -- “Meta分析表明” -- “系统综述指出” -- “指南推荐” -- “专家共识建议” - -而文献条目显示证据类型不一致,应从严判 false。 - -6. 方法学引用必须严格一致(非常重要) -若正文明确引用某种: - -- 算法 -- 模型 -- 聚类方法 -- 分类方法 -- 深度学习架构 -- 统计方法 -- 数学技术 -- 数据处理方法 - -则文献必须与该方法存在明确合理关联。 - -例如: - -不匹配: -- fuzzy clustering ≠ deep learning -- random forest ≠ SVM -- CNN ≠ LSTM -- 聚类模型 ≠ 分类模型 -- 回归分析 ≠ 聚类分析 - -仅属于同一“人工智能/机器学习”大领域,不能视为匹配。 - -若方法体系明显不同: -优先判 false + confidence=0.15。 - -7. 信息不足从严 -若参考文献条目信息过少(仅作者+年份等): - -只有在能够建立明确关联时才可判 true。 - -无法建立明确关联: -判 false。 - -==================== -【评估步骤(按顺序在心里完成)】 - -第一步:主题域一致性 -判断正文核心主题与文献是否属于同一专业领域,包括: - -- 疾病 -- 患者群体 -- 护理问题 -- 医疗场景 -- 干预措施 -- 指标/结局 -- 理论模型 -- 政策/指南 -- 算法/统计方法 - -第二步:关键断言对齐 -判断正文中的核心断言是否能够被文献合理支撑。 - -允许: -- 合理概括 -- 轻度表述扩展 - -不允许: -- 张冠李戴 -- 过度推断 -- 用弱证据支撑强结论 -- 用相关性支撑因果性 -- 用观察研究支撑RCT级表述 -- 方法体系不一致 - -第三步:错引排查 -重点检查: - -- 疾病错 -- 人群错 -- 场景错 -- 方法错 -- 指标错 -- 研究类型错 -- 证据层级错 -- 算法体系错 - -==================== -【最终判定规则】 - -is_match(二选一) - -true: -满足以下全部条件: -- 主题明确相关 -- 核心对象基本一致 -- 方法或研究方向合理一致 -- 正文关键论点能够被文献支撑 -- 不存在明显错引风险 - -false: -满足任一情况: -- 主题无关 -- 对象不同 -- 疾病/场景不同 -- 方法体系明显不同 -- 核心结论对不上 -- 文献无法支撑正文强结论 -- 证据类型不一致 -- 无法建立明确合理关联 -- 信息不足无法确认 - -边界情况从严判 false。 - -==================== -【confidence 固定评分规则】 - -只能输出以下固定值之一: - -0.98 -0.92 -0.85 -0.78 -0.65 -0.45 -0.35 -0.25 -0.15 - -禁止输出任何其它数字。 - --------------------- -【true 档位】 - -0.98(几乎完全一致) -主题、对象、方法、核心结论高度一致。 - -0.92(高度匹配) -主题与关键论点明确一致,仅存在轻微概括。 - -0.85(较匹配) -主题和核心结论一致,但表述略宽。 - -0.78(基本匹配) -大方向一致,但存在轻微泛化或不精确。 - -0.65(边界匹配) -存在一定支撑关系,但结论略强或关联较弱。 - --------------------- -【false 档位】 - -0.45(人工复核) -信息不足、标题过泛、同领域但无法确认。 - -0.35(较可能错引) -同领域但对象、场景、结论存在明显偏差。 - -0.25(明显不匹配) -主题相关但核心论点明显不一致。 - -0.15(明确错引) -以下情况优先使用: - -- 主题无关 -- 方法体系明显不同 -- 典型张冠李戴 -- 完全无法支撑正文内容 - -例如: -正文讲 fuzzy clustering, -文献讲 hybrid deep learning, -应判: -false + 0.15。 - -==================== -【硬性规则】 - -- is_match=true 时: -confidence 只能是: -0.65 / 0.78 / 0.85 / 0.92 / 0.98 - -- is_match=false 时: -confidence 只能是: -0.15 / 0.25 / 0.35 / 0.45 - -禁止违反。 - -==================== -【评分稳定原则】 - -- 相同输入必须得到相同结果。 -- 优先依据“主题 + 核心断言”。 -- 不要被单个关键词误导。 -- 一句多引时,仅评价当前这一条文献。 -- 边界情况从严,降低漏报错引风险。 -- 方法学不一致时优先 false。 - -==================== -【reason 输出要求】 - -- 使用简体中文。 -- 长度控制在 30~80 字。 -- 只说明两件事: - 1)主题/对象/方法是否一致; - 2)核心论点是否能够支撑。 - -禁止模糊措辞: -- “可能有关” -- “看起来一致” -- “应该支持” -- “似乎” - -应明确表达: -一致 / 不一致 / 无法支撑。 - -==================== -【输出格式(绝对严格)】 - -仅输出一行 minified JSON。 - -禁止: -- markdown -- 代码块 -- 换行 -- 解释说明 -- 前后文字 - -格式: - -{"is_match":true|false,"confidence":0.15|0.25|0.35|0.45|0.65|0.78|0.85|0.92|0.98,"reason":"简体中文原因"} - -【示例输出】 - -{"is_match":false,"confidence":0.15,"reason":"正文讨论改进模糊聚类算法及聚类划分优化,而文献主题为基于步态加速度的糖尿病深度学习检测,研究方法与核心内容明显不符。"} -PROMPT; - } - private function buildReferenceCheckSystemPrompt() - { - return <<<'PROMPT' -你是一名护理与医学期刊的资深编辑,专门校对「正文引用句」与「对应参考文献条目」是否匹配。 - -你的职责是判断:作者在该引用位置引用的观点/数据/结论/方法/定义,是否能够被该条参考文献合理支撑。 - -你只能依据用户提供的两段文本判断,不得假设已阅读全文,不得联网,不得编造文献中未出现的信息。 - -【输入内容】 -你将收到: -1. 正文引用句(引用位置附近的一句话或一段话) -2. 当前对应的参考文献条目(仅当前编号,不是整篇参考文献列表) - -你必须严格只评估「当前这一条参考文献」与引用句的关系。 - -==================== -【核心判断目标】 -判断: -正文中的核心论点、事实、数据、定义、护理措施、医学结论、研究发现、理论依据、政策依据等,是否可由该条参考文献合理支撑。 - -你评估的是“引用是否成立”,不是“句子是否正确”。 - -==================== -【强制约束(必须遵守)】 - -1. 只能依据用户提供的信息判断 -- 不得假设你看过全文。 -- 不得根据常识补全文献内容。 -- 不得根据作者、期刊名或研究热点脑补研究结果。 -- 不得把“可能研究了”视为“能够支撑”。 - -2. 严禁串号判断 -- 仅允许依据「当前引用句」与「当前参考文献条目」判断。 -- 严禁利用其它参考文献编号或上下文内容推断当前文献。 - -3. 不得关键词硬匹配 -- 不得因为标题里出现相同关键词(如护理、患者、干预、效果、治疗、心理)就直接判定匹配。 -- 必须关注:对象、人群、疾病、干预方式、研究主题、核心结论是否一致。 - -4. 医学错引从严 -若出现以下情况,优先判定不匹配: -- 同一大领域但具体疾病/对象不同 -- 人群不同(儿童 vs 老年;ICU vs 普通病房等) -- 干预方式不同 -- 指标或结局不同 -- 把指南、综述、Meta分析、专家共识、原始研究混用导致支撑关系不成立 -- 文献无法合理支持正文中的强结论(如“显著改善”“明显降低”“证实”“优于”“危险因素”“因果关系”等) - -例如: -正文写: -“研究证实某护理显著降低死亡率” - -文献仅是: -“某护理模式应用观察” - -此时不得脑补效果成立,应从严判 false。 - -5. 特定证据类型必须一致 -若正文明确声明: -- “随机对照研究显示” -- “Meta分析表明” -- “指南推荐” -- “系统综述指出” -- “专家共识建议” - -而文献条目显示的证据类型不一致,应从严判 false。 - -6. 信息不足从严 -若参考文献条目信息过少(仅作者+年份等): -- 只有在能够建立明确合理关联时才判 true。 -- 无法建立明确关联时,判 false(confidence=0.35)。 - -7. 方法学引用严格一致 -若正文明确引用某一算法、模型、统计方法、聚类方法、 -深度学习架构、评估方法或数学技术: - -必须要求参考文献与该方法存在明确合理关联。 - -例如: -- fuzzy clustering ≠ deep learning -- random forest ≠ SVM -- CNN ≠ LSTM -- 聚类方法 ≠ 分类模型 - -仅属于同一“机器学习/人工智能”大领域, -不能视为匹配,应从严判 false。 - -若方法体系明显不同,优先判: -confidence=0.15 - -==================== -【评估步骤(按顺序在心里完成)】 - -第一步:主题域一致性 -判断正文句子的核心主题是否与文献属于同一专业领域,包括但不限于: -- 疾病/诊断 -- 护理问题 -- 患者人群 -- 医疗场景 -- 干预措施 -- 指标/结局 -- 理论模型 -- 政策/指南 - -第二步:关键断言对齐 -判断正文中的核心断言是否可被文献合理支撑: - -允许: -- 合理概括性引用 -- 轻度表述扩展 - -不允许: -- 张冠李戴 -- 过度推断 -- 用弱证据支撑强结论 -- 用相关性支撑因果性 -- 用观察研究支撑RCT级别表述 - -第三步:错引排查 -重点检查: -- 对象错 -- 疾病错 -- 场景错 -- 指标错 -- 方法错 -- 证据类型错 -- 研究层级不匹配 - -==================== -【最终判定规则】 - -is_match(二选一,必须一致) - -true: -满足以下全部条件: -- 主题明确相关 -- 核心对象基本一致 -- 正文关键论点能够被该文献合理支撑 -- 不存在明显错引风险 - -false: -任一情况满足即判 false: -- 主题无关 -- 具体对象明显不同 -- 核心结论对不上 -- 文献无法支撑正文强结论 -- 证据类型不匹配 -- 无法建立明确合理关联 -- 信息不足且无法确认 - -边界不清时,从严判 false。 - -==================== -【confidence 固定评分规则】 - -只能输出以下 6 个固定值之一: -0.95 -0.85 -0.75 -0.35 -0.25 -0.15 - -禁止输出: -0.5、0.6、0.7、0.8、0.9 等任何其它数字。 - -评分标准: - -0.95 -高度匹配: -主题、对象、研究方向、关键论点均明确对应。 - -0.85 -较匹配: -主题与核心论点一致,存在轻微概括,但仍合理支撑。 - -0.75 -基本匹配: -大方向一致,但有一定表述泛化或轻微不精确。 - -0.35 -存疑: -同领域但具体对象/结论不够明确; -或参考文献信息不足,建议人工复核。 - -0.25 -较可能错引: -主题相关但核心论点明显偏离; -对象、场景、结局存在明显差异。 - -0.15 -明确错引: -主题无关; -典型张冠李戴; -明显无法支撑正文内容。 - -硬性规则: -- is_match=true 时,confidence 只能是: -0.75 / 0.85 / 0.95 - -- is_match=false 时,confidence 只能是: -0.15 / 0.25 / 0.35 - -==================== -【评分稳定原则】 - -- 相同输入必须得到相同结论。 -- 优先依据“主题 + 核心断言”。 -- 不要被单个关键词误导。 -- 一句多引时,仅评价当前这一条文献。 -- 边界情况从严,降低漏报错引风险。 - -==================== -【reason 输出要求】 - -- 使用简体中文。 -- 仅说明: - 1)主题是否一致; - 2)核心论点是否能够支撑。 - -- 禁止模糊措辞: -“可能有关” -“看起来一致” -“应该支持” - -- 长度控制在 30~80 字。 - -==================== -【输出格式(绝对严格)】 - -仅输出一行 minified JSON。 -禁止 markdown。 -禁止代码块。 -禁止解释说明。 -禁止换行。 -禁止任何额外文字。 - -格式如下: - -{"is_match":true|false,"confidence":0.15|0.25|0.35|0.75|0.85|0.95,"reason":"简体中文原因说明"} - -【示例输出】 - -{"is_match":true,"confidence":0.95,"reason":"正文讨论的护理干预与文献研究对象、场景及核心结论一致,可合理支撑该引用。"} -PROMPT; + return $this->buildReferenceCheckFirstPassPrompt(); } /** @@ -704,7 +269,409 @@ PROMPT; {"is_match":true|false,"confidence":0.15|0.25|0.35|0.75|0.85|0.95,"reason":"1-2句简体中文,说明匹配或不匹配的关键依据"} PROMPT; } + private function buildReferenceCheckAgaintSystemPrompt() + { + return <<<'PROMPT' +你是一名护理、医学与科研期刊的资深编辑,专门校对「正文引用句」与「对应参考文献」是否真实匹配。 +你的职责是判断: + +作者在该引用位置引用的观点、数据、结论、方法、定义、理论或证据, + +是否能够被该参考文献 DOI 对应的真实文献内容合理支撑。 + +你必须执行: + +【第一轮:文献条目粗判】 ++ +【第二轮:DOI真实文献内容复核(最高优先级)】 + +最终结果以 DOI 页面实际文献内容为准。 + +不得仅凭标题、关键词或研究领域判定匹配。 + +==================== +【输入内容】 + +你将收到: + +1. 正文引用句(引用位置附近的一句话或一段话) + +2. 当前参考文献条目(仅当前编号) + +3. 文献元信息: +- Title +- Author +- Journal +- Year +- DOI +- DOI Link + +4. DOI 页面解析出的真实内容(最高优先级): +可能包括: + +- 实际标题 +- Abstract +- Keywords +- Objective +- Methods +- Participants +- Results +- Conclusion +- Study design +- Full metadata + +注意: + +DOI 页面内容优先级最高。 + +若 DOI 页面内容与参考文献条目存在冲突: + +必须以 DOI 页面真实显示内容为准。 + +==================== +【核心判断目标】 + +判断: + +正文中的核心论点、事实、数据、定义、护理措施、医学结论、研究发现、理论依据、政策依据、算法方法、统计方法、模型结构等, + +是否可由 DOI 对应的真实文献内容合理支撑。 + +你评估的是: + +“引用是否成立”。 + +不是: + +“正文是否正确”。 + +==================== +【硬性约束(必须遵守)】 + +1. 只能依据提供的信息判断 + +- 不得假设看过全文。 +- 不得联网到未提供的新网页。 +- 不得根据常识补全文献内容。 +- 不得根据作者、期刊名、热点方向脑补研究结果。 +- 不得把“可能研究了”视为“能够支撑”。 + +2. DOI真实内容优先(最高优先级) + +必须优先依据: + +- DOI摘要 +- DOI方法 +- DOI研究对象 +- DOI结果 +- DOI结论 + +判断是否支撑正文。 + +禁止: + +仅因为标题相似或关键词重叠就判 true。 + +例如: + +正文: +“研究证实显著降低焦虑” + +DOI摘要未提焦虑改善结果: + +必须 false。 + +3. 严禁串号判断 + +- 仅允许依据当前引用句与当前参考文献。 +- 严禁利用其它参考文献编号或上下文推断当前文献。 + +4. 不得关键词硬匹配 + +禁止因为出现相同关键词就判匹配,例如: + +“护理”“患者”“治疗”“效果”“心理” +“机器学习”“深度学习”“模型”等。 + +必须重点判断: + +- 对象是否一致 +- 疾病/场景是否一致 +- 人群是否一致 +- 干预方式是否一致 +- 方法学是否一致 +- 关键结论是否一致 + +5. 医学与科研错引从严 + +若 DOI 内容出现以下情况: + +优先判 false: + +- 同领域但疾病不同 +- 人群不同(儿童 vs 老年) +- 场景不同(ICU vs 普通病房) +- 干预方式不同 +- 指标或结局不同 +- 指南、综述、Meta、原始研究混用 +- 文献无法支撑正文中的强结论 + +例如: + +正文: +“研究证实显著降低死亡率” + +DOI: +仅描述护理模式应用观察。 + +不得脑补效果成立。 + +应从严判 false。 + +6. 特定证据类型必须一致 + +正文明确声明: + +- “随机对照研究显示” +- “Meta分析表明” +- “系统综述指出” +- “指南推荐” +- “专家共识建议” + +若 DOI 内容显示证据类型不一致: + +应从严判 false。 + +7. 方法学引用必须严格一致(极重要) + +若正文明确引用: + +- 算法 +- 模型 +- 聚类方法 +- 分类方法 +- 深度学习架构 +- 统计方法 +- 数学技术 +- 数据处理方法 + +DOI 内容必须与该方法存在明确合理关联。 + +例如: + +不匹配: + +- fuzzy clustering ≠ deep learning +- random forest ≠ SVM +- CNN ≠ LSTM +- 聚类模型 ≠ 分类模型 +- 回归分析 ≠ 聚类分析 + +仅属于同一“大领域(AI/ML)” + +不能视为匹配。 + +若方法体系明显不同: + +优先判: + +false + confidence=0.15 + +8. DOI 内容中的核心变量必须一致(新增重点) + +若正文讨论: + +- 心理资本 +- 工作流 +- 组织支持 +- 焦虑 +- 压力 +- 满意度 +- 护理能力 +- 风险预测 + +必须检查 DOI 内容是否真正研究该变量及其关系。 + +例如: + +正文: +“心理资本影响工作流” + +DOI: +研究组织支持与工作流。 + +即使都属于护士心理研究: + +仍应 false。 + +9. 信息不足从严 + +若: + +- DOI打不开 +- DOI无摘要 +- DOI内容不足 +- 无法建立明确关联 + +只有明确支撑时才判 true。 + +否则: + +false。 + +==================== +【评估步骤(按顺序在心里完成)】 + +第一步:DOI内容优先理解 +先判断 DOI 实际研究: + +- 谁(对象) +- 什么问题(主题) +- 怎么研究(方法) +- 得出什么(结果/结论) + +第二步:主题域一致性 + +检查正文与 DOI 文献是否属于同一: + +- 疾病 +- 患者群体 +- 护理问题 +- 医疗场景 +- 干预措施 +- 指标/结局 +- 理论模型 +- 算法/统计方法 + +第三步:关键断言对齐 + +判断正文核心断言是否真正被 DOI 内容支撑。 + +允许: + +- 合理概括 +- 轻度扩展 + +不允许: + +- 张冠李戴 +- 过度推断 +- 用相关性支撑因果性 +- 用弱证据支撑强结论 +- 方法体系不一致 + +第四步:错引排查 + +重点检查: + +- 疾病错 +- 人群错 +- 场景错 +- 方法错 +- 指标错 +- 研究类型错 +- 变量关系错 +- 算法体系错 + +==================== +【最终判定规则】 + +is_match(二选一) + +true: + +满足以下全部条件: + +- 主题明确相关 +- 核心对象基本一致 +- 方法或研究方向合理一致 +- DOI内容支持正文关键论点 +- 不存在明显错引风险 + +false: + +满足任一情况: + +- 主题无关 +- 对象不同 +- 疾病/场景不同 +- 方法体系明显不同 +- 核心变量关系不同 +- DOI内容无法支撑正文结论 +- 证据类型不一致 +- 无法建立明确合理关联 +- 信息不足无法确认 + +边界情况从严判 false。 + +==================== +【confidence 固定评分规则】 + +只能输出以下固定值之一: + +0.98 +0.92 +0.85 +0.78 +0.65 +0.45 +0.35 +0.25 +0.15 + +禁止输出其它数字。 + +硬规则: + +is_match=true: +只能: +0.65 / 0.78 / 0.85 / 0.92 / 0.98 + +is_match=false: +只能: +0.15 / 0.25 / 0.35 / 0.45 + +DOI内容与正文明显冲突: +优先: +0.15 + +==================== +【reason 输出要求】 + +- 使用简体中文 +- 长度30~80字 +- 仅说明: +1)DOI文献研究内容; +2)是否支撑正文核心论点。 + +禁止: + +“可能” +“应该” +“看起来” +“似乎” + +必须明确表达: +一致 / 不一致 / 无法支撑。 + +==================== +【输出格式(绝对严格)】 + +仅输出一行 minified JSON。 + +禁止: +- markdown +- 代码块 +- 换行 +- 解释说明 +- 前后文字 + +格式: + +{"is_match":true|false,"confidence":0.15|0.25|0.35|0.45|0.65|0.78|0.85|0.92|0.98,"reason":"简体中文原因"} +PROMPT; + } private function buildReferenceCheckUserPrompt($contextText, $referText) { return "【正文引用句】(含该处引用所要支撑的观点,可能为中文或英文)\n" @@ -714,6 +681,464 @@ PROMPT; . "\n\n请按 system 中的步骤与评分表完成校对,只返回 JSON。"; } + /** + * 二次 DOI 复核 system prompt: + * - 强调输入中的"DOI 真实内容"已由系统抓取,模型不可自行联网 + * - 处理 metadata(标题/作者)与 refer_content/DOI 抓取内容不一致的情况 + * - confidence 档位与一次校对保持一致 + */ + private function buildReferenceCheckRecheckSystemPrompt() + { + return <<<'PROMPT' +你是一名护理、医学与科研期刊的资深编辑,正在执行【初稿 DOI 文献复核】。 + +一次粗判(仅依据书目条目)已经给出较低置信度(≤0.65)。 + +你的职责是: + +依据系统提供的【DOI 真实文献内容】重新判断: + +正文引用位置的观点、结论、方法、数据或理论, + +是否能够被 DOI 对应的真实文献“基本合理支撑”。 + +你的目标是: + +优先识别真正错引, + +同时避免误杀“合理但非完全一致”的引用。 + +注意: + +初稿校对允许: + +- 背景研究支撑 +- 理论依据支撑 +- 同方向研究支撑 +- 合理概括 +- 轻度表述扩展 + +不要求: + +正文与 DOI 摘要逐字对应。 + +==================== +【输入结构】 + +User 消息中会出现三个块: + +1.【正文引用句】 + +作者希望被该引用支撑的: + +观点、方法、数据、结论或理论。 + +2.【参考文献条目(书目)】 + +可能包含: + +- Title +- Author +- Journal +- Year +- DOI +- Reference + +注意: + +书目可能存在: + +- 错 DOI +- 错标题 +- 错作者 +- 元数据漂移 + +不能仅依据书目判断。 + +3.【DOI 真实文献内容(最高优先级)】 + +来源: + +Source: PubMed +或 +Source: Crossref + +可能包含: + +- 真正标题 +- Abstract +- Methods +- Results +- Conclusion +- MeSH +- Publication Type + +该内容已由系统抓取, + +视为: + +“真实文献内容”。 + +禁止联网。 +禁止自行打开 DOI。 +禁止猜测未提供字段。 + +==================== +【判断优先级(必须遵守)】 + +A. +DOI 内容最高优先级 + +若 DOI 内容存在: + +必须以其为准。 + +即使: + +书目 Title / Author 与 DOI 冲突, + +也以 DOI 内容为准。 + +==================== +B. +DOI 有摘要 + +优先依据: + +- 研究对象 +- 核心变量 +- 方法 +- 结果 +- 结论 + +判断是否支撑正文。 + +允许: + +- 合理概括 +- 背景研究支撑 +- 同方向研究支撑 +- 理论依据支撑 +- 轻度扩展 + +不要求: + +逐字一致。 + +==================== +C. +DOI 仅有标题,无摘要 + +仅当标题与正文存在: + +明确语义关联 + +才可判: + +true + 0.65 + +否则: + +优先: + +false + 0.45 + +(人工复核) + +不要轻易判: + +0.15。 + +==================== +D. +DOI 获取失败 + +若: + +- 无摘要 +- 无核心信息 +- 抓取失败 + +不能直接判 true。 + +也不要轻易判错引。 + +优先: + +false + 0.45 + +(信息不足,人工复核) + +==================== +【允许 true 的情况(重要)】 + +以下情况允许 true: + +1. +DOI 摘要直接支撑正文核心观点。 + +2. +DOI 文献属于: + +- 背景研究 +- 理论依据 +- 同方向研究 + +即使: + +对象、变量或场景存在轻微差异, + +但研究方向一致, + +仍可: + +0.65 / 0.78。 + +例如: + +正文: +工作流与职业发展相关。 + +DOI: +工作流与心理资本关系。 + +可作为背景研究支撑: + +true + 0.65。 + +3. +正文属于概括性表达, + +DOI 文献能支撑主要方向。 + +==================== +【优先 false 的情况】 + +以下情况优先 false: + +1. +主题明显无关。 + +2. +研究对象明显不同。 + +例如: + +- 儿童 vs 老年 +- ICU vs 普通病房 + +3. +疾病 / 场景明显不同。 + +4. +方法体系明显冲突 +(仅限明确方法引用)。 + +仅当正文明确讨论: + +- 算法 +- 模型 +- 聚类 +- 分类 +- 深度学习架构 +- 统计方法 +- 数据处理方法 + +时, + +要求方法一致。 + +例如: + +- fuzzy clustering ≠ deep learning +- CNN ≠ LSTM +- 聚类 ≠ 分类 +- random forest ≠ SVM + +此类: + +优先: + +false + 0.15。 + +注意: + +若正文只是: + +背景研究、 +相关工作、 +理论依据, + +不要因方法不同直接 false。 + +5. +正文强结论无法支撑。 + +正文出现: + +- 显著改善 +- 显著降低 +- 证实 +- 优于 +- 危险因素 +- 有效预测 +- 中介作用 + +但 DOI 摘要未提供对应结果: + +优先 false。 + +6. +正文明确: + +- RCT +- Meta分析 +- 系统综述 +- Guideline + +但 DOI 类型明显不一致。 + +==================== +【confidence 固定评分规则】 + +只能输出: + +0.98 +0.92 +0.85 +0.78 +0.65 +0.45 +0.35 +0.25 +0.15 + +禁止其它数字。 + +-------------------- +【true 档位】 + +0.98 +DOI 对象、方法、结论与正文高度一致。 + +0.92 +DOI 明确支撑正文关键论点。 + +0.85 +DOI 支撑核心观点, +存在轻微概括。 + +0.78 +研究方向一致, +能够合理支撑正文。 + +0.65 +边界匹配: + +可作为背景研究、 +理论依据、 +同方向研究支撑。 + +建议人工复核。 + +-------------------- +【false 档位】 + +0.45 +信息不足、 +无摘要、 +标题过泛、 +无法确认。 + +建议人工复核。 + +0.35 +同领域但对象、变量或结论偏差明显。 + +0.25 +主题相关但核心观点无法支撑。 + +0.15 +明确错引: + +- DOI 内容明显无关 +- 方法体系冲突 +- 张冠李戴 +- 强结论明显无法成立 + +==================== +【硬性规则】 + +is_match=true: + +只能: +0.65 / 0.78 / 0.85 / 0.92 / 0.98 + +is_match=false: + +只能: +0.15 / 0.25 / 0.35 / 0.45 + +==================== +【评分稳定原则】 + +- 相同输入得到相同结果。 +- 优先主题 + 核心论点。 +- 不因关键词重叠误判。 +- 一句多引仅评价当前文献。 +- 模糊情况优先人工复核。 +- 不轻易误杀合理引用。 + +==================== +【reason 输出要求】 + +简体中文。 + +30~80字。 + +必须说明: + +1)DOI 文献研究什么; + +2)是否支撑正文核心观点; + +3)支撑点或冲突点是什么。 + +禁止: + +“可能” +“应该” +“似乎” +“看起来” + +必须明确表达: + +一致 / 不一致 / 可支撑 / 无法支撑。 + +==================== +【输出格式(严格)】 + +仅输出一行 minified JSON。 + +禁止: + +- markdown +- 代码块 +- 换行 +- 解释说明 +- 前后文字 + +格式: + +{"is_match":true|false,"confidence":0.15|0.25|0.35|0.45|0.65|0.78|0.85|0.92|0.98,"reason":"简体中文原因"} +PROMPT; + } + + private function buildReferenceCheckRecheckUserPrompt($contextText, $referText, $doiBlock) + { + return $this->buildReferenceCheckSecondPassUserPrompt($contextText, $referText, $doiBlock); + } + /** * 与 buildReferenceCheckSystemPrompt3 一致的 confidence 档位 */ From c1107780a7c12e47303bab5c9b56e90788d81d39 Mon Sep 17 00:00:00 2001 From: wyn <1074145239@qq.com> Date: Tue, 26 May 2026 17:33:34 +0800 Subject: [PATCH 12/12] =?UTF-8?q?=E5=8F=82=E8=80=83=E6=96=87=E7=8C=AE?= =?UTF-8?q?=E6=9C=AC=E5=9C=B0=E5=A4=A7=E6=A8=A1=E5=9E=8B=E6=A0=A1=E5=AF=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- application/api/controller/Article.php | 427 -------- application/api/controller/Base.php | 8 + application/api/controller/Preaccept.php | 50 + application/api/controller/References.php | 227 ++++ application/api/job/ReferenceCheck.php | 58 +- application/api/job/ReferenceCheckTwo.php | 12 + application/common/QueueRedis.php | 19 + application/common/ReferenceCheckService.php | 1034 +++++++++++++++++- application/common/service/LLMService.php | 26 +- 9 files changed, 1357 insertions(+), 504 deletions(-) diff --git a/application/api/controller/Article.php b/application/api/controller/Article.php index 456fe59c..e47a0473 100644 --- a/application/api/controller/Article.php +++ b/application/api/controller/Article.php @@ -10,7 +10,6 @@ use PhpOffice\PhpWord\IOFactory; use app\common\OpenAi; use app\common\CrossrefService; use app\common\PubmedService; -use app\common\ReferenceCheckService; /** * @title 文章接口 @@ -6392,430 +6391,4 @@ class Article extends Base Db::commit(); return json_encode(['status' => 1,'msg' => 'success']); } - /** - * 调试:预览 article_main 中提取的 blue 引用(不入队) - * POST: article_id - */ - public function citationReview() - { - $articleId = 7821;//intval($this->request->post('article_id', 0)); - if ($articleId <= 0) { - return jsonError('article_id is required'); - } - - $svc = new ReferenceCheckService(); - $mains = Db::name('article_main') - ->field('am_id,content') - ->where('article_id', $articleId) - ->where('am_id', 127448) - //->whereIn('state', [0, 2]) - ->order('sort asc') - ->select(); - - $preview = []; - foreach ($mains as $item) { - $preview[] = [ - 'am_id' => $item['am_id'], - 'citations' => $svc->extractReferences((string)$item['content']), - ]; - break; - } - return jsonSuccess(['article_id' => $articleId, 'sections' => $preview]); - } - /** - * 提取文献引用 - * - * @param string $content 原始内容 - * @return array - */ - function extractReferences($content) - { - $result = []; - - // 匹配 [57][74-79][72, 45] - preg_match_all( - '/\[([\d,\-\s]+)\]<\/blue>/', - $content, - $matches, - PREG_OFFSET_CAPTURE - ); - - if (empty($matches[0])) { - return []; - } - - foreach ($matches[0] as $index => $match) { - - // 完整标签 - $fullTag = $match[0]; - - // 标签开始位置 - $tagStart = $match[1]; - - // 标签结束位置 - $tagEnd = $tagStart + strlen($fullTag); - - // 文献号原始字符串 - $rawRef = trim($matches[1][$index][0]); - - // 展开文献号 - $referenceNumbers = $this->expandReferenceNumbers($rawRef); - - /** - * 获取原文内容 - * 这里按句号切分: - * 找当前引用所在句子的开始和结束位置 - */ - $sentenceStart = $this->findSentenceStart($content, $tagStart); - $sentenceEnd = $this->findSentenceEnd($content, $tagEnd); - - $originalText = mb_substr( - $content, - $sentenceStart, - $sentenceEnd - $sentenceStart - ); - - // 去掉 blue 标签 - $originalText = preg_replace( - '/\[[\d,\-\s]+\]<\/blue>/', - '', - $originalText - ); - - $originalText = trim($originalText); - - $result[] = [ - 'reference_raw' => $rawRef, - 'reference_numbers' => $referenceNumbers, - 'original_text' => $originalText, - - // blue标签在整段中的位置 - 'reference_start' => $tagStart, - 'reference_end' => $tagEnd, - - // 原文位置 - 'text_start' => $sentenceStart, - 'text_end' => $sentenceEnd, - ]; - } - - return $result; - } - - /** - * 展开文献号 - * 11-15 => [11,12,13,14,15] - * 72,45 => [72,45] - * 74-79,81 => [74,75,76,77,78,79,81] - */ - function expandReferenceNumbers($refStr) - { - $numbers = []; - - $parts = explode(',', $refStr); - - foreach ($parts as $part) { - - $part = trim($part); - - // 范围 - if (strpos($part, '-') !== false) { - - list($start, $end) = explode('-', $part); - - $start = intval(trim($start)); - $end = intval(trim($end)); - - if ($start <= $end) { - $numbers = array_merge( - $numbers, - range($start, $end) - ); - } - - } else { - - // 单个数字 - if (is_numeric($part)) { - $numbers[] = intval($part); - } - } - } - - return array_values(array_unique($numbers)); - } - - /** - * 查找句子开始位置 - */ - function findSentenceStart($content, $position) - { - $delimiters = ['.', '。', '!', '?', "\n"]; - - $start = 0; - - foreach ($delimiters as $delimiter) { - - $pos = strrpos( - substr($content, 0, $position), - $delimiter - ); - - if ($pos !== false) { - $start = max($start, $pos + 1); - } - } - - return $start; - } - - /** - * 查找句子结束位置 - */ - function findSentenceEnd($content, $position) - { - $length = strlen($content); - - $endPositions = []; - - foreach (['.', '。', '!', '?', "\n"] as $delimiter) { - - $pos = strpos($content, $delimiter, $position); - - if ($pos !== false) { - $endPositions[] = $pos + 1; - } - } - - return empty($endPositions) - ? $length - : min($endPositions); - } - - /** - * 引用相关性:提交单条到队列(异步调用 promotion 同款本地大模型) - * POST: content_a(必填), content_b(可选), article_id, reference_no(n=index+1), am_id - */ - public function referenceCheckEnqueue() - { - $data = $this->request->post(); - $contentA = trim((string)(isset($data['content_a']) ? $data['content_a'] : '')); - $contentB = trim((string)(isset($data['content_b']) ? $data['content_b'] : '')); - $articleId = intval(isset($data['article_id']) ? $data['article_id'] : 0); - $referenceNo = intval(isset($data['reference_no']) ? $data['reference_no'] : 0); - - if ($contentA === '') { - return jsonError('content_a is required'); - } - - try { - $svc = new ReferenceCheckService(); - $extra = [ - 'reference_no' => $referenceNo, - 'article_id' => $articleId, - 'am_id' => intval(isset($data['am_id']) ? $data['am_id'] : 0), - ]; - - if ($contentB === '' && $articleId > 0 && $referenceNo > 0) { - $prod = Db::name('production_article') - ->where('article_id', $articleId) - ->where('state', 0) - ->find(); - if ($prod) { - $referMap = $svc->loadReferMapByPArticleId(intval($prod['p_article_id'])); - $referIndex = $referenceNo - 1; - if (isset($referMap[$referIndex])) { - $refer = $referMap[$referIndex]; - $contentB = $svc->formatReferForLlm($refer); - $extra['p_article_id'] = intval($prod['p_article_id']); - $extra['p_refer_id'] = intval($refer['p_refer_id']); - $extra['refer_index'] = $referIndex; - } - } - } - - $result = $svc->enqueue($contentA, $contentB, $extra); - return jsonSuccess($result); - } catch (\Exception $e) { - return jsonError($e->getMessage()); - } - } - public function checkOne(){ - $articleId = intval($this->request->param('article_id', 7414)); - $svc = new ReferenceCheckService(); - return jsonSuccess($svc->enqueueSecondPassByArticle($articleId)); - } - public function referenceCheckEnqueueArticleMain(){ - $amId = 127448; - $svc = new ReferenceCheckService(); - $main = Db::name('article_main') - ->field('am_id,content,article_id') - ->where('am_id', $amId) - ->whereIn('state', [0, 2]) - ->find(); - $result = $svc->enqueueByArticleMain($main); - return jsonSuccess($result); - } - public function referenceCheckEnqueueArticle(){ - $data = $this->request->get(); - $articleId = intval(isset($data['article_id']) ? $data['article_id'] : 0); - var_dump($articleId); - if ($articleId <= 0) { - return jsonError('article_id is required'); - } - try { - $svc = new ReferenceCheckService(); - $result = $svc->enqueueByArticle($articleId); - return jsonSuccess($result); - } catch (\Exception $e) { - return jsonError($e->getMessage()); - } - } - /** - * 按文章批量入队:从 article_main 提取 blue 引用与文献号 - * POST: article_id, clear_previous=1(默认清空该文旧明细后重检) - */ - public function referenceCheckEnqueueArticle2() - { - $data = $this->request->post(); - $articleId = intval(isset($data['article_id']) ? $data['article_id'] : 0); - if ($articleId <= 0) { - return jsonError('article_id is required'); - } - - try { - $svc = new ReferenceCheckService(); - $clear = !isset($data['clear_previous']) || intval($data['clear_previous']) === 1; - $result = $svc->enqueueByArticle($articleId, $clear); - return jsonSuccess($result); - } catch (\Exception $e) { - return jsonError($e->getMessage()); - } - } - - /** - * 查询单条引用相关性检测结果 - * GET/POST: check_id - */ - public function referenceCheckResult() - { - $checkId = intval($this->request->param('check_id', 0)); - if ($checkId <= 0) { - return jsonError('check_id is required'); - } - - $row = (new ReferenceCheckService())->getResult($checkId); - if (!$row) { - return jsonError('result not found'); - } - - return jsonSuccess($this->formatReferenceCheckRow($row)); - } - - /** - * 稿件预览:带不合理引用标记的 content(序号 + 引用句) - * GET/POST: article_id, am_id(可选,只预览某一节) - */ - public function referenceCheckPreview() - { - $articleId = intval($this->request->param('article_id', 0)); - if ($articleId <= 0) { - return jsonError('article_id is required'); - } - $amId = intval($this->request->param('am_id', 0)); - - try { - $data = (new ReferenceCheckService())->buildArticlePreview($articleId, $amId); - $data['markup_hint'] = [ - 'ref_no' => '.ref-no-error — 不合理的文献序号(如 70-73 中单独的 70)', - 'ref_cite' => '.ref-cite-tag.ref-cite-error — 含不合理序号的 blue 引用块', - 'ref_context'=> '.ref-context-error — 不合理的引用句/上下文', - ]; - $data['preview_css'] = '.ref-no-error{color:#c00;font-weight:bold;border-bottom:2px wavy #c00}' - . '.ref-cite-tag.ref-cite-error{background:#ffecec}' - . '.ref-context-error{background:#fff3cd;outline:1px dashed #e6a700}'; - return jsonSuccess($data); - } catch (\Exception $e) { - return jsonError($e->getMessage()); - } - } - - /** - * 按文章列出引用校对结果([70-73] 为 4 条,reference_no 分别为 70,71,72,73) - * GET/POST: article_id, status(可选), only_mismatch=1 仅不合理 - */ - public function referenceCheckList() - { - $articleId = intval($this->request->param('article_id', 0)); - if ($articleId <= 0) { - return jsonError('article_id is required'); - } - - $status = $this->request->param('status', ''); - $statusFilter = ($status === '' || $status === null) ? -1 : intval($status); - $onlyMismatch = intval($this->request->param('only_mismatch', 0)) === 1; - $rows = (new ReferenceCheckService())->listByArticle($articleId, $statusFilter, $onlyMismatch); - - $list = []; - foreach ($rows as $row) { - $list[] = $this->formatReferenceCheckRow($row); - } - - $mains = Db::name('article_main') - ->field('am_id,ref_check_status,sort') - ->where('article_id', $articleId) - ->whereIn('state', [0, 2]) - ->order('sort asc') - ->select(); - $sections = []; - foreach ($mains as $m) { - $st = intval(isset($m['ref_check_status']) ? $m['ref_check_status'] : 0); - $sections[] = [ - 'am_id' => intval($m['am_id']), - 'ref_check_status' => $st, - 'ref_check_pass' => $st === ReferenceCheckService::AM_STATUS_PASS, - 'ref_check_label' => ReferenceCheckService::amStatusLabel($st), - ]; - } - - return jsonSuccess([ - 'article_id' => $articleId, - 'total' => count($list), - 'list' => $list, - 'sections' => $sections, - ]); - } - - private function formatReferenceCheckRow($row) - { - $statusMap = array(0 => 'pending', 1 => 'done', 2 => 'failed'); - $amId = intval(isset($row['am_id']) ? $row['am_id'] : 0); - $citeStart = intval(isset($row['cite_tag_start']) ? $row['cite_tag_start'] : 0); - $rowStatus = intval($row['status']); - return array( - 'check_id' => intval(isset($row['id']) ? $row['id'] : (isset($row['check_id']) ? $row['check_id'] : 0)), - 'article_id' => intval(isset($row['article_id']) ? $row['article_id'] : 0), - 'am_id' => $amId, - 'cite_group_key' => $amId . '_' . $citeStart, - 'p_refer_id' => intval(isset($row['p_refer_id']) ? $row['p_refer_id'] : 0), - 'refer_index' => intval(isset($row['refer_index']) ? $row['refer_index'] : 0), - 'reference_no' => intval(isset($row['reference_no']) ? $row['reference_no'] : 0), - 'reference_raw' => isset($row['reference_raw']) ? $row['reference_raw'] : '', - 'cite_tag_start' => $citeStart, - 'cite_tag_end' => intval(isset($row['cite_tag_end']) ? $row['cite_tag_end'] : 0), - 'text_start' => intval(isset($row['text_start']) ? $row['text_start'] : 0), - 'text_end' => intval(isset($row['text_end']) ? $row['text_end'] : 0), - 'status' => isset($statusMap[$rowStatus]) ? $statusMap[$rowStatus] : 'unknown', - 'is_match' => intval($row['is_match']), - 'can_support' => intval(isset($row['can_support']) ? $row['can_support'] : $row['is_match']), - 'is_reasonable' => intval(isset($row['can_support']) ? $row['can_support'] : $row['is_match']) === 1, - 'confidence' => floatval($row['confidence']), - 'reason' => isset($row['reason']) ? $row['reason'] : '', - 'error_msg' => isset($row['error_msg']) ? $row['error_msg'] : '', - 'content_a' => isset($row['content_a']) ? $row['content_a'] : '', - 'content_b' => isset($row['content_b']) ? $row['content_b'] : '', - 'updated_at' => isset($row['updated_at']) ? $row['updated_at'] : '', - ); - } - } diff --git a/application/api/controller/Base.php b/application/api/controller/Base.php index 77e1da7b..3b2c4627 100644 --- a/application/api/controller/Base.php +++ b/application/api/controller/Base.php @@ -271,6 +271,14 @@ class Base extends Controller } $this->production_article_refer_obj->where('p_article_id', $refer_info['p_article_id'])->where('index', ">", $refer_info['index'])->where('state', 0)->setDec('index'); $this->production_article_refer_obj->where('p_refer_id', $p_refer_id)->update(['state' => 1]); + + // 文献集合已变更,原校对结果的 reference_no 已全部错位,整篇标记为未校对 + try { + (new \app\common\ReferenceCheckService()) + ->clearArticleChecksByPArticleId(intval($refer_info['p_article_id'])); + } catch (\Exception $e) { + \think\Log::error('delOneRefer clearArticleChecksByPArticleId p_refer_id=' . $p_refer_id . ' ' . $e->getMessage()); + } } diff --git a/application/api/controller/Preaccept.php b/application/api/controller/Preaccept.php index 9b4867c7..166af09f 100644 --- a/application/api/controller/Preaccept.php +++ b/application/api/controller/Preaccept.php @@ -7,6 +7,7 @@ use think\Env; use think\Queue; use think\Validate; use app\common\CrossrefService; +use app\common\ReferenceCheckService; class Preaccept extends Base { @@ -15,6 +16,26 @@ class Preaccept extends Base parent::__construct($request); } + /** + * 新增/修改导致文献集合改变后,清空整篇校对明细,使文章状态回到"未校对"。 + * 失败仅记日志,不阻塞主流程。 + */ + private function resetArticleChecksOnReferChange($pArticleId, $sourceTag = '') + { + $pArticleId = intval($pArticleId); + if ($pArticleId <= 0) { + return; + } + try { + (new ReferenceCheckService())->clearArticleChecksByPArticleId($pArticleId); + } catch (\Exception $e) { + \think\Log::error( + 'resetArticleChecksOnReferChange[' . $sourceTag . '] p_article_id=' + . $pArticleId . ' ' . $e->getMessage() + ); + } + } + /**获取文章参考文献列表 * @return \think\response\Json @@ -92,6 +113,7 @@ class Preaccept extends Base return jsonError($rule->getError()); } $this->production_article_refer_obj->where('p_article_id',$data['p_article_id'])->update(["state"=>1]); + $this->resetArticleChecksOnReferChange(intval($data['p_article_id']), 'discardRefersByParticleid'); return jsonSuccess([]); } @@ -142,6 +164,7 @@ class Preaccept extends Base } $adId= $this->production_article_refer_obj->insertGetId($insert); $this->production_article_refer_obj->where('p_article_id', $p_info['p_article_id'])->where("p_refer_id", "<>", $adId)->where("index", ">", $pre_refer['index'])->where('state', 0)->setInc('index'); + $this->resetArticleChecksOnReferChange(intval($p_info['p_article_id']), 'addRefer'); return jsonSuccess([]); @@ -198,6 +221,7 @@ class Preaccept extends Base } $adId= $this->production_article_refer_obj->insertGetId($insert); $this->production_article_refer_obj->where('p_article_id', $p_info['p_article_id'])->where("p_refer_id", "<>", $adId)->where("index", ">", $pre_refer['index'])->where('state', 0)->setInc('index'); + $this->resetArticleChecksOnReferChange(intval($p_info['p_article_id']), 'addReferByParticleid'); return jsonSuccess([]); } @@ -233,6 +257,7 @@ class Preaccept extends Base $insert['cs'] = 1; $adId = $this->production_article_refer_obj->insertGetId($insert); $this->production_article_refer_obj->where('p_article_id', $p_info['p_article_id'])->where("p_refer_id", "<>", $adId)->where("index", ">", $pre_refer['index'])->where('state', 0)->setInc('index'); + $this->resetArticleChecksOnReferChange(intval($p_info['p_article_id']), 'addReferNotdoi'); return jsonSuccess([]); } @@ -462,6 +487,17 @@ class Preaccept extends Base // } // $this->production_article_refer_obj->where('p_refer_id', $data['p_refer_id'])->update(['refer_doi' => $data['doi']]); // my_doiToFrag2($this->production_article_refer_obj->where('p_refer_id', $data['p_refer_id'])->find()); + + //文献内容更新成功后异步重检该文献对应的全部校对明细(失败不阻塞主流程) + try { + (new ReferenceCheckService())->enqueueRecheckByPReferId( + intval($data['p_refer_id']), + intval($old_refer_info['p_article_id']) + ); + } catch (\Exception $e) { + \think\Log::error('editRefer enqueueRecheckByPReferId p_refer_id=' . $data['p_refer_id'] . ' ' . $e->getMessage()); + } + return jsonSuccess([]); } @@ -1453,6 +1489,7 @@ class Preaccept extends Base return jsonError($rule->getError()); } $refer_info = $this->production_article_refer_obj->where('p_refer_id', $data['p_refer_id'])->find(); + $sibling_p_refer_id = 0; if ($data['act'] == "up") { $up_info = $this->production_article_refer_obj->where('p_article_id', $refer_info['p_article_id'])->where('index', $refer_info['index'] - 1)->where('state', 0)->find(); if (!$up_info) { @@ -1460,6 +1497,7 @@ class Preaccept extends Base } $this->production_article_refer_obj->where('p_refer_id', $up_info['p_refer_id'])->setInc("index"); $this->production_article_refer_obj->where('p_refer_id', $refer_info['p_refer_id'])->setDec("index"); + $sibling_p_refer_id = intval($up_info['p_refer_id']); } else { $down_info = $this->production_article_refer_obj->where('p_article_id', $refer_info['p_article_id'])->where('index', $refer_info['index'] + 1)->where('state', 0)->find(); if (!$down_info) { @@ -1467,7 +1505,19 @@ class Preaccept extends Base } $this->production_article_refer_obj->where('p_refer_id', $refer_info['p_refer_id'])->setInc("index"); $this->production_article_refer_obj->where('p_refer_id', $down_info['p_refer_id'])->setDec("index"); + $sibling_p_refer_id = intval($down_info['p_refer_id']); } + + // 仅同步本次交换的两条 p_refer_id 对应的校对明细 reference_no / refer_index + try { + (new ReferenceCheckService())->syncReferenceNoByPReferIds( + [intval($refer_info['p_refer_id']), $sibling_p_refer_id], + intval($refer_info['p_article_id']) + ); + } catch (\Exception $e) { + \think\Log::error('sortRefer syncReferenceNoByPReferIds: ' . $e->getMessage()); + } + return jsonSuccess([]); } diff --git a/application/api/controller/References.php b/application/api/controller/References.php index 47ae2328..759c63bf 100644 --- a/application/api/controller/References.php +++ b/application/api/controller/References.php @@ -1307,4 +1307,231 @@ class References extends Base } return json_encode(['status' => 8,'msg' => 'fail']); } + /** + * 参考文献第一次校对 + * @return \think\response\Json + */ + public function allReferenceCheckAI(){ + //获取参数 + $aParam = empty($aParam) ? $this->request->post() : $aParam; + + //必填值验证 + $iPArticleId = empty($aParam['p_article_id']) ? '' : $aParam['p_article_id']; + if(empty($iPArticleId)){ + return json_encode(array('status' => 2,'msg' => 'Please select an article' )); + } + //查询文章(p_article_id 与 article_id 都要带,下游服务方法两者都用) + $aWhere = ['p_article_id' => $iPArticleId,'state' => ['in',[0,2]]]; + $aProductionArticle = Db::name('production_article')->field('p_article_id,article_id')->where($aWhere)->find(); + if(empty($aProductionArticle)){ + return json_encode(array('status' => 3,'msg' => 'No articles found' )); + } + if($this->checkReferStatus($iPArticleId)==0){ + return jsonError('请修正完文献内容再进行校对。'); + } + //已存在校对记录则禁止重复执行第一次校对,提示走重置接口 + $iExisting = Db::name('article_reference_check_result') + ->where('p_article_id', $iPArticleId) + ->count(); + if(intval($iExisting) > 0){ + return jsonError('该文章已存在校对记录,请使用"重置校对"接口重新校对。'); + } + try { + $svc = new ReferenceCheckService(); + $result = $svc->enqueueByPArticle($aProductionArticle); + return jsonSuccess($result); + } catch (\Exception $e) { + return jsonError($e->getMessage()); + } + } + /** + * 文献校对重置:删除该文章已有的全部校对明细,并重新入队整篇校对 + * POST/GET: article_id(必填) + * @url /api/Article/referenceCheckReset + */ + public function referenceCheckResetAI() + { + //获取参数 + $aParam = empty($aParam) ? $this->request->post() : $aParam; + + //必填值验证 + $iPArticleId = empty($aParam['p_article_id']) ? '' : $aParam['p_article_id']; + if(empty($iPArticleId)){ + return json_encode(array('status' => 2,'msg' => 'Please select an article' )); + } + //查询文章(p_article_id 与 article_id 都要带,下游服务方法两者都用) + $aWhere = ['p_article_id' => $iPArticleId,'state' => ['in',[0,2]]]; + $aProductionArticle = Db::name('production_article')->field('p_article_id,article_id')->where($aWhere)->find(); + if(empty($aProductionArticle)){ + return json_encode(array('status' => 3,'msg' => 'No articles found' )); + } + if($this->checkReferStatus($iPArticleId)==0){ + return jsonError('请修正完文献内容再进行校对。'); + } + $iArticleId = empty($aProductionArticle['article_id']) ? 0 : $aProductionArticle['article_id']; + if(empty($iArticleId)){ + return json_encode(array('status' => 4,'msg' => 'Unbound article' )); + } + try { + $result = (new ReferenceCheckService())->resetAndRecheckByArticle($aProductionArticle); + return jsonSuccess($result); + } catch (\Exception $e) { + return jsonError($e->getMessage()); + } + } + + /** + * 清空某篇文章下的全部参考文献校对记录(不重新入队) + * + * 与 referenceCheckResetAI 的区别:reset 是「清空 + 重新校对」, + * 这里只做「清空」一步,校对状态回到未校对,等待用户手动再触发。 + * + * POST/GET: p_article_id(必填) + */ + public function referenceCheckClearAI() + { + $aParam = $this->request->post(); + if (empty($aParam)) { + $aParam = $this->request->param(); + } + + $iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']); + if ($iPArticleId <= 0) { + return json_encode(array('status' => 2, 'msg' => 'Please select an article')); + } + + // 校验文章存在(与其它校对接口口径一致:state in [0,2]) + $aProductionArticle = Db::name('production_article') + ->field('p_article_id,article_id') + ->where(['p_article_id' => $iPArticleId, 'state' => ['in', [0, 2]]]) + ->find(); + if (empty($aProductionArticle)) { + return json_encode(array('status' => 3, 'msg' => 'No articles found')); + } + + try { + $deleted = (new ReferenceCheckService())->clearArticleChecksByPArticleId($iPArticleId); + return jsonSuccess([ + 'p_article_id' => $iPArticleId, + 'deleted' => intval($deleted), + ]); + } catch (\Exception $e) { + return jsonError($e->getMessage()); + } + } + + /** + * 按 p_article_id 查整篇引用校对进度(按 reference_no 分组聚合) + * + * POST/GET: p_article_id(必填) + * + * 返回 list 中每项含:reference_no、p_refer_id、status(数值)、 + * total、pending、done、failed、pass、is_pass、last_updated_at、records + * + * status 数值含义: + * 0 = 待校验 1 = 校对中 2 = 校对完成 3 = 校对失败 + */ + public function referenceCheckProgressAI() + { + $aParam = $this->request->post(); + if (empty($aParam)) { + $aParam = $this->request->param(); + } + + $iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']); + if ($iPArticleId <= 0) { + return json_encode(array('status' => 2, 'msg' => 'Please select an article')); + } + try { + $result = (new ReferenceCheckService())->getProgressByPArticleId($iPArticleId); + return jsonSuccess($result); + } catch (\Exception $e) { + return jsonError($e->getMessage()); + } + } + + /** + * 按 p_article_id 查整篇文章引用校对总状态(用于前端按钮分流) + * + * POST/GET: p_article_id(必填) + * + * 计数维度是「参考文献」(按 reference_no 分组),不是单条校对明细行。 + * 例:50 条参考文献、底层 111 条校对明细时,total = 50。 + * + * 返回 status 数值含义(整篇): + * 0 = 未校对(一条记录都没有) + * 1 = 校对中(至少 1 条参考文献仍有未跑完的明细) + * 2 = 校对完成(所有参考文献全部明细已结束) + * + * 返回字段:p_article_id、status、total、pending、done、failed、progress_percent + * total —— 参考文献条数 + * pending —— 该条参考文献仍有未跑完明细的数量(含"部分跑完") + * done —— 该条参考文献所有明细都 status=1 的数量 + * failed —— 该条参考文献全部跑完且至少 1 条 status=2 的数量 + * pending + done + failed = total;progress_percent = (done+failed)/total + * + * 分组明细请走 referenceCheckProgressAI。 + */ + public function referenceCheckArticleStatusAI() + { + $aParam = $this->request->post(); + if (empty($aParam)) { + $aParam = $this->request->param(); + } + + $iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']); + if ($iPArticleId <= 0) { + return json_encode(array('status' => 2, 'msg' => 'Please select an article')); + } + + try { + $result = (new ReferenceCheckService())->getArticleProgressStatusByPArticleId($iPArticleId); + return jsonSuccess($result); + } catch (\Exception $e) { + return jsonError($e->getMessage()); + } + } + + /** + * 按 p_refer_id 查单条参考文献的校对明细 + * + * POST/GET: p_refer_id(必填) + * + * 返回 list 中每项含:am_id、confidence、reason、is_match、is_pass + * 同时附带上下文:p_refer_id、p_article_id、reference_no、total + */ + public function referenceCheckDetailsAI() + { + $aParam = $this->request->post(); + if (empty($aParam)) { + $aParam = $this->request->param(); + } + + $iPReferId = empty($aParam['p_refer_id']) ? 0 : intval($aParam['p_refer_id']); + if ($iPReferId <= 0) { + return json_encode(array('status' => 2, 'msg' => 'Please select a reference')); + } + + try { + $result = (new ReferenceCheckService())->getCheckDetailsByPReferId($iPReferId); + return jsonSuccess($result); + } catch (\Exception $e) { + return jsonError($e->getMessage()); + } + } + + public function checkReferStatus($p_article_id){ + $list = $this->production_article_refer_obj->where('p_article_id', $p_article_id)->where('state', 0)->select(); + if (!$list) { + return jsonError('references error'); + } + $frag = 1; + foreach ($list as $v) { + if ($v['cs'] == 0) { + $frag = 0; + break; + } + } + return $frag; + } } diff --git a/application/api/job/ReferenceCheck.php b/application/api/job/ReferenceCheck.php index 3b15e6a1..89c5c67d 100644 --- a/application/api/job/ReferenceCheck.php +++ b/application/api/job/ReferenceCheck.php @@ -6,7 +6,6 @@ use think\queue\Job; use app\common\QueueJob; use app\common\QueueRedis; use app\common\ReferenceCheckService; -use app\common\service\LLMService; class ReferenceCheck { @@ -39,14 +38,6 @@ class ReferenceCheck if ($checkId <= 0 && !empty($jobData['data']['check_id'])) { $checkId = intval($jobData['data']['check_id']); } - $sClassName = get_class($this); - $sRedisKey = "queue_job:{$sClassName}:{$checkId}"; - $sRedisValue = uniqid() . '_' . getmypid(); - - if (!$this->oQueueJob->acquireLock($sRedisKey, $sRedisValue, $job)) { - return; - } - if ($checkId <= 0) { $job->delete(); return; @@ -63,44 +54,19 @@ class ReferenceCheck return; } + $sClassName = get_class($this); + $sRedisKey = "queue_job:{$sClassName}:{$checkId}"; + $sRedisValue = uniqid() . '_' . getmypid(); + + $svc = new ReferenceCheckService(); + $svc->clearReferenceCheckQueueLock($checkId); + + if (!$this->oQueueJob->acquireLock($sRedisKey, $sRedisValue, $job)) { + return; + } + try { - $svc = new ReferenceCheckService(); - - $contentA = $svc->resolveMainContentForJob($row); - $contentB = trim((string)(isset($row['refer_text']) ? $row['refer_text'] : '')); - $refer = null; - - if (intval($row['p_refer_id']) > 0) { - $refer = Db::name('production_article_refer') - ->where('p_refer_id', intval($row['p_refer_id'])) - ->where('state', 0) - ->find(); - if ($refer && $contentB === '') { - $contentB = $svc->formatReferForLlm($refer); - } - } - - if ($contentA === '' || $contentB === '') { - $this->markFailed($checkId, 'Missing article_main.content or refer_text'); - $job->delete(); - return; - } - - $llm = new LLMService(); - $llmResult = $llm->checkReference($contentA, $contentB, false); - $canSupport = $svc->parseLlmCanSupport($llmResult); - $confidence = floatval($llmResult['confidence']); - - $svc->updateCheckResult($checkId, [ - 'can_support' => $canSupport ? 1 : 0, - 'is_match' => $canSupport ? 1 : 0, - 'confidence' => $confidence, - 'reason' => isset($llmResult['reason']) ? $llmResult['reason'] : '', - 'status' => 1, - 'error_msg' => '', - ]); - - $svc->maybeEnqueueSecondPass($checkId, $confidence); + $svc->runReferenceCheckOnce($checkId); $amId = intval(isset($row['am_id']) ? $row['am_id'] : 0); if ($amId > 0) { diff --git a/application/api/job/ReferenceCheckTwo.php b/application/api/job/ReferenceCheckTwo.php index b28c9f6c..564af204 100644 --- a/application/api/job/ReferenceCheckTwo.php +++ b/application/api/job/ReferenceCheckTwo.php @@ -88,12 +88,24 @@ class ReferenceCheckTwo $llm = new LLMService(); $llmResult = $llm->checkReference($contentA, $referText, true, $doiBlock); + $requestFailed = !empty($llmResult['request_failed']); $canSupport = $svc->parseLlmCanSupport($llmResult); $tag = $payload['has_abstract'] ? ('[Crossref复核' . ($payload['doi_used'] !== '' ? ' ' . $payload['doi_used'] : '') . ']') : '[Crossref复核-无摘要]'; $reason = $tag . ' ' . (isset($llmResult['reason']) ? $llmResult['reason'] : ''); + // LLM 通讯失败:写 status=2 并抛异常触发队列重试 + if ($requestFailed) { + $svc->updateCheckResult($checkId, [ + 'confidence' => floatval($llmResult['confidence']), + 'reason' => $reason, + 'status' => 2, + 'error_msg' => isset($llmResult['reason']) ? $llmResult['reason'] : 'LLM request failed', + ]); + throw new \RuntimeException(isset($llmResult['reason']) ? $llmResult['reason'] : 'LLM request failed'); + } + $affected = $svc->updateCheckResult($checkId, [ 'can_support' => $canSupport ? 1 : 0, 'is_match' => $canSupport ? 1 : 0, diff --git a/application/common/QueueRedis.php b/application/common/QueueRedis.php index fb9fb5fb..4412d1ba 100644 --- a/application/common/QueueRedis.php +++ b/application/common/QueueRedis.php @@ -80,6 +80,25 @@ class QueueRedis return null; } } + + /** + * 删除一个或多个 Redis 键(用于重检前清除队列任务 completed 标记) + */ + public function deleteRedisKeys(array $keys) + { + $keys = array_values(array_filter($keys, function ($k) { + return $k !== null && $k !== ''; + })); + if (empty($keys)) { + return true; + } + try { + $this->connect()->del(...$keys); + return true; + } catch (\Exception $e) { + return false; + } + } // 安全释放锁(仅当值匹配时删除) public function releaseRedisLock($key, $value) diff --git a/application/common/ReferenceCheckService.php b/application/common/ReferenceCheckService.php index 593f1548..77b44e9d 100644 --- a/application/common/ReferenceCheckService.php +++ b/application/common/ReferenceCheckService.php @@ -5,6 +5,7 @@ namespace app\common; use think\Db; use think\Env; use think\Queue; +use app\common\service\LLMService; /** * 正文 <blue>[n]</blue> 引用与 t_production_article_refer(index+1=n)相关性校对。 @@ -20,6 +21,48 @@ class ReferenceCheckService const AM_STATUS_FAIL = 2; const AM_STATUS_RUNNING = 3; + /** 引用校对进度(按 reference_no 分组聚合后的对外状态) */ + const PROGRESS_PENDING = 0; // 待校验:分组内全部明细 status=0 + const PROGRESS_CHECKING = 1; // 校对中:分组内部分明细已结束、部分仍为 0 + const PROGRESS_COMPLETED = 2; // 校对完成:分组内全部明细 status=1 + const PROGRESS_FAILED = 3; // 校对失败:分组内全部明细已结束,且至少 1 条 status=2 + + /** 整篇文章的引用校对状态(对外整体状态,用于"开始/重置"按钮分流) */ + const ARTICLE_PROGRESS_NONE = 0; // 还没有任何校对记录 + const ARTICLE_PROGRESS_RUNNING = 1; // 至少 1 条 status=0(队列里还有未跑完的) + const ARTICLE_PROGRESS_COMPLETED = 2; // 所有明细 status != 0(全部已完成或失败) + + /** + * 单条校对明细的对外状态(getProgressByPArticleId 返回的 records[i].status) + * + * DB 里 article_reference_check_result.status 只有 0/1/2 三种值; + * RECORD_PROCESSING 是基于 Redis 队列锁 :status='processing' 的瞬时态, + * 并不持久化。worker 进入 LLM 调用期间 DB.status 仍是 0,需要靠队列锁识别。 + */ + const RECORD_PENDING = 0; // 待校对,已入队但还没被 worker 拾起 + const RECORD_COMPLETED = 1; // 校对完成 + const RECORD_FAILED = 2; // 校对失败 + const RECORD_PROCESSING = 3; // 处理中:worker 正在跑 LLM(Redis :status='processing') + + /** LLM 评分(confidence)通过阈值:>= 该值视为"通过" */ + const PASS_CONFIDENCE_THRESHOLD = 0.65; + + /** + * [...] 引用标签内允许的字符类(带 /u 修饰符使用)。 + * + * 除 ASCII 数字、半角逗号、半角连字符、空白外,还兼容常见排版变体: + * , U+FF0C 全角逗号 + * – U+2013 EN DASH + * — U+2014 EM DASH + * − U+2212 MINUS SIGN + * ‐ U+2010 HYPHEN + * ‑ U+2011 NON-BREAKING HYPHEN + * + * 若不支持变体连字符,会导致 [19–21] 这种区间引用整段被 preg 漏掉, + * 进而丢失对应的 reference_no 校对记录。 + */ + const BLUE_TAG_REGEX = '/\[([\d,,\-\x{2013}\x{2014}\x{2212}\x{2010}\x{2011}\s]+)\]<\/blue>/u'; + /** * 兼容无 ?? 的 PHP 版本 */ @@ -166,13 +209,94 @@ class ReferenceCheckService 'queued' => count($checkIds2), ]; } + public function enqueueByPArticle($prod){ + if (empty($prod)) { + throw new \RuntimeException('production_article not found'); + } + $pArticleId = intval($prod['p_article_id']); + $articleId = intval($prod['article_id']); + $referMap = $this->loadReferMapByPArticleId($pArticleId); + + $mains = Db::name('article_main') + ->field('am_id,content,article_id') + ->where('article_id', $articleId) + ->whereIn('state', [0, 2]) + ->order('sort asc') + ->select(); + if (empty($mains)) { + throw new \RuntimeException('article_main is empty'); + } + $queued = 0; + $skipped = 0; + $pendingJobs = []; + $amIdsWithJobs = []; + $now = date('Y-m-d H:i:s'); + foreach ($mains as $main) { + $amId = intval($main['am_id']); + $citations = $this->extractReferences((string)$main['content']); + if (empty($citations)) { + $this->setAmRefCheckStatus($amId, self::AM_STATUS_NONE); + continue; + } + foreach ($citations as $cite) { + foreach ($cite['reference_numbers'] as $refNo) { + $referIndex = $refNo - 1; + if ($referIndex < 0 || !isset($referMap[$referIndex])) { + $skipped++; + continue; + } + $refer = $referMap[$referIndex]; + $referText = $this->formatReferForLlm($refer); + + // [70-73] 展开为 reference_no=70,71,72,73 共 4 条记录;先入队表,再按文献号正序校对 + $checkId = Db::name('article_reference_check_result')->insertGetId([ + 'article_id' => $main['article_id'], + 'p_article_id' => $pArticleId, + 'am_id' => $amId, + 'reference_no' => $refNo, + 'refer_index' => $refNo, + 'origin_text' => $cite['original_text'], + 'refer_text' => $referText, + 'p_refer_id' => $referMap[$referIndex]['p_refer_id'], + 'text_start' => $cite['text_start'], + 'text_end' => $cite['text_end'], + 'created_at' => $now, + 'updated_at' => $now, + ]); + + $pendingJobs[] = [ + 'check_id' => intval($checkId), + 'reference_no' => intval($refNo), + 'am_id' => $amId, + 'text_start' => intval($cite['text_start']), + ]; + $queued++; + $amIdsWithJobs[$amId] = true; + } + } + } + + $checkIds = $this->pushJobsSortedByReferenceNo($pendingJobs); + foreach (array_keys($amIdsWithJobs) as $amId) { + $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING); + } + + return [ + 'article_id' => $articleId, + 'p_article_id' => $pArticleId, + 'queued' => $queued, + 'skipped' => $skipped, + 'check_ids' => $checkIds, + 'queue' => self::QUEUE_NAME, + ]; + } public function enqueueByArticle($articleId){ if ($articleId <= 0) { throw new \InvalidArgumentException('article_id is required'); } $prod = Db::name('production_article') ->where('article_id', $articleId) - ->where('state', [0, 2]) + ->whereIn('state', [0, 2]) ->find(); if (empty($prod)) { throw new \RuntimeException('production_article not found for article_id=' . $articleId); @@ -191,10 +315,9 @@ class ReferenceCheckService } $queued = 0; $skipped = 0; - $checkIds = []; - $delay = 0; + $pendingJobs = []; $amIdsWithJobs = []; - + $now = date('Y-m-d H:i:s'); foreach ($mains as $main) { $amId = intval($main['am_id']); $citations = $this->extractReferences((string)$main['content']); @@ -212,12 +335,11 @@ class ReferenceCheckService $refer = $referMap[$referIndex]; $referText = $this->formatReferForLlm($refer); - $now = date('Y-m-d H:i:s'); - // [70-73] 展开为 reference_no=70,71,72,73 共 4 条记录 + // [70-73] 展开为 reference_no=70,71,72,73 共 4 条记录;先入队表,再按文献号正序校对 $checkId = Db::name('article_reference_check_result')->insertGetId([ 'article_id' => $main['article_id'], 'p_article_id' => $pArticleId, - 'am_id' => intval($main['am_id']), + 'am_id' => $amId, 'reference_no' => $refNo, 'refer_index' => $refNo, 'origin_text' => $cite['original_text'], @@ -229,14 +351,19 @@ class ReferenceCheckService 'updated_at' => $now, ]); - $this->pushJob(intval($checkId), $delay); - $checkIds[] = $checkId; + $pendingJobs[] = [ + 'check_id' => intval($checkId), + 'reference_no' => intval($refNo), + 'am_id' => $amId, + 'text_start' => intval($cite['text_start']), + ]; $queued++; - $delay += 1; $amIdsWithJobs[$amId] = true; } } } + + $checkIds = $this->pushJobsSortedByReferenceNo($pendingJobs); foreach (array_keys($amIdsWithJobs) as $amId) { $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING); } @@ -308,13 +435,464 @@ class ReferenceCheckService ]); } + /** + * 按 p_article_id 清空整篇文章的引用校对明细 + 重置节级 ref_check_status。 + * + * 用于新增/删除文献后,旧的 reference_no 全部错位、原校对结果失效的场景: + * 物理删除后,整篇状态查询自然回到 ARTICLE_PROGRESS_NONE(未校对)。 + * + * @return int 被删除的明细条数 + */ + public function clearArticleChecksByPArticleId($pArticleId) + { + $pArticleId = intval($pArticleId); + if ($pArticleId <= 0) { + return 0; + } + + // 先反查 article_id(用于重置 article_main.ref_check_status 节级状态) + $articleId = intval(Db::name('production_article') + ->where('p_article_id', $pArticleId) + ->whereIn('state', [0, 2]) + ->value('article_id')); + + // 先清掉旧记录对应的队列 Redis 锁,避免在途 worker 写回数据 + $oldIds = Db::name('article_reference_check_result') + ->where('p_article_id', $pArticleId) + ->column('id'); + foreach ($oldIds as $oldId) { + $this->clearReferenceCheckQueueLock(intval($oldId)); + } + + $deleted = Db::name('article_reference_check_result') + ->where('p_article_id', $pArticleId) + ->delete(); + + if ($articleId > 0) { + Db::name('article_main') + ->where('article_id', $articleId) + ->whereIn('state', [0, 2]) + ->update(['ref_check_status' => self::AM_STATUS_NONE]); + } + + return intval($deleted); + } + public function clearArticleChecks($articleId) { - Db::name('article_reference_check_result')->where('article_id', $articleId)->delete(); + $articleId = intval($articleId); + if ($articleId <= 0) { + return 0; + } + + // 先清掉旧记录对应的队列 Redis 锁,否则同 check_id 在 TTL 内不会再次执行 + $oldIds = Db::name('article_reference_check_result') + ->where('article_id', $articleId) + ->column('id'); + foreach ($oldIds as $oldId) { + $this->clearReferenceCheckQueueLock(intval($oldId)); + } + + $deleted = Db::name('article_reference_check_result')->where('article_id', $articleId)->delete(); Db::name('article_main') ->where('article_id', $articleId) ->whereIn('state', [0, 2]) ->update(['ref_check_status' => self::AM_STATUS_NONE]); + + return intval($deleted); + } + + /** + * 文献列表局部挪动后,仅刷新指定 p_refer_id 对应的校对明细 reference_no / refer_index。 + * + * 读 production_article_refer 的最新 index 来算新序号(index + 1),避免外部传入过期值。 + * 仅更新受影响的两条左右记录,降低与并发挪动互相覆盖的风险。 + * + * @param int[] $pReferIds 受影响的 p_refer_id(一般为 2 个:被挪条目 + 其相邻条目) + * @param int $pArticleId 可选:附加 p_article_id 限定,进一步缩小行锁范围 + * @return array{p_refer_ids:int[], affected_rows:int, changes:array} + */ + public function syncReferenceNoByPReferIds(array $pReferIds, $pArticleId = 0) + { + $pReferIds = array_values(array_unique(array_filter(array_map('intval', $pReferIds)))); + $pArticleId = intval($pArticleId); + if (empty($pReferIds)) { + return [ + 'p_refer_ids' => [], + 'affected_rows' => 0, + 'changes' => [], + ]; + } + + $referQuery = Db::name('production_article_refer') + ->field('p_refer_id,p_article_id,index') + ->whereIn('p_refer_id', $pReferIds) + ->where('state', 0); + if ($pArticleId > 0) { + $referQuery->where('p_article_id', $pArticleId); + } + $refers = $referQuery->select(); + if (empty($refers)) { + return [ + 'p_refer_ids' => $pReferIds, + 'affected_rows' => 0, + 'changes' => [], + ]; + } + + $now = date('Y-m-d H:i:s'); + $affected = 0; + $changes = []; + + foreach ($refers as $refer) { + $pReferId = intval($refer['p_refer_id']); + $newNo = intval($refer['index']) + 1; + + $updateQuery = Db::name('article_reference_check_result') + ->where('p_refer_id', $pReferId) + ->where('reference_no', '<>', $newNo); + if ($pArticleId > 0) { + $updateQuery->where('p_article_id', $pArticleId); + } + $rows = $updateQuery->update([ + 'reference_no' => $newNo, + 'refer_index' => $newNo, + 'updated_at' => $now, + ]); + + if ($rows > 0) { + $affected += intval($rows); + $changes[] = [ + 'p_refer_id' => $pReferId, + 'new_ref_no' => $newNo, + 'affected_rows' => intval($rows), + ]; + } + } + + return [ + 'p_refer_ids' => $pReferIds, + 'affected_rows' => $affected, + 'changes' => $changes, + ]; + } + + /** + * 重置整篇稿件的引用校对:删除旧明细 + 清理队列锁 + 全文重新入队校对 + * + * @return array + */ + /** + * 按 p_article_id 查整篇文章的引用校对总状态。 + * + * 统计维度是"参考文献"(按 reference_no 分组),不是单条校对明细行。 + * 例如 50 条参考文献、底层明细 111 条时,total 返回 50。 + * + * 返回 status 数值含义(整篇): + * 0 = ARTICLE_PROGRESS_NONE 一条校对记录都没有 + * 1 = ARTICLE_PROGRESS_RUNNING 至少 1 条参考文献仍有未跑完的明细 + * 2 = ARTICLE_PROGRESS_COMPLETED 所有参考文献的全部明细都已结束 + * + * 每条参考文献按其明细 status 分布落桶(互斥): + * pending —— 组内任一明细 status=0(含部分跑完的"校对中"也归此桶) + * done —— 组内全部明细 status=1 + * failed —— 组内全部明细已结束、至少 1 条 status=2 + * + * pending + done + failed = total;progress_percent = (done + failed) / total。 + * 分组明细请走 getProgressByPArticleId(控制器 referenceCheckProgressAI)。 + * + * @return array{p_article_id:int, status:int, total:int, pending:int, done:int, failed:int, progress_percent:float} + */ + public function getArticleProgressStatusByPArticleId($pArticleId) + { + $pArticleId = intval($pArticleId); + if ($pArticleId <= 0) { + throw new \InvalidArgumentException('p_article_id is required'); + } + + // 一条 SQL 按 reference_no 聚合,组内 status 分布一并算出来; + // 50 条参考文献 → 返回 50 行,PHP 走一次循环分桶即可 + $rows = Db::name('article_reference_check_result') + ->field('reference_no' + . ', SUM(CASE WHEN status = 0 THEN 1 ELSE 0 END) AS pending_cnt' + . ', SUM(CASE WHEN status = 2 THEN 1 ELSE 0 END) AS failed_cnt') + ->where('p_article_id', $pArticleId) + ->group('reference_no') + ->select(); + + if (empty($rows)) { + return [ + 'p_article_id' => $pArticleId, + 'status' => self::ARTICLE_PROGRESS_NONE, + 'total' => 0, + 'pending' => 0, + 'done' => 0, + 'failed' => 0, + 'progress_percent' => 0, + ]; + } + + $pending = 0; + $done = 0; + $failed = 0; + foreach ($rows as $row) { + $pendingCnt = intval($this->arrGet($row, 'pending_cnt', 0)); + $failedCnt = intval($this->arrGet($row, 'failed_cnt', 0)); + if ($pendingCnt > 0) { + $pending++; + } elseif ($failedCnt > 0) { + $failed++; + } else { + $done++; + } + } + + $total = count($rows); + $articleStatus = $pending > 0 + ? self::ARTICLE_PROGRESS_RUNNING + : self::ARTICLE_PROGRESS_COMPLETED; + $finished = $done + $failed; + $progressPercent = round($finished / $total * 100, 1); + + return [ + 'p_article_id' => $pArticleId, + 'status' => $articleStatus, + 'total' => $total, + 'pending' => $pending, + 'done' => $done, + 'failed' => $failed, + 'progress_percent' => $progressPercent, + ]; + } + + /** + * 按 p_article_id 查整篇引用校对进度,按 reference_no 分组聚合状态,并展开每条明细。 + * + * 单条 article_reference_check_result.status: + * 0 = 待校验 1 = 校对完成 2 = 校对失败 + * + * 分组(reference_no)状态(返回字段 status,数值类型): + * 0 = PROGRESS_PENDING 待校验 :分组内全部明细 status=0 + * 1 = PROGRESS_CHECKING 校对中 :分组内部分明细已结束、部分仍为 0 + * 2 = PROGRESS_COMPLETED 校对完成:分组内全部明细 status=1 + * 3 = PROGRESS_FAILED 校对失败:分组内全部明细已结束,且至少 1 条 status=2 + * + * 每个分组还会展开 records 子数组,给出该 reference_no 下每条 check 明细的: + * - status(同上 0/1/2) + * - confidence 评分 + * - is_pass(confidence >= PASS_CONFIDENCE_THRESHOLD 视为通过) + * + * @return array{p_article_id:int, total_groups:int, summary:array, list:array} + */ + public function getProgressByPArticleId($pArticleId) + { + $pArticleId = intval($pArticleId); + if ($pArticleId <= 0) { + throw new \InvalidArgumentException('p_article_id is required'); + } + + $rows = Db::name('article_reference_check_result') + ->field('id,p_refer_id,reference_no,am_id,status,confidence,is_match,reason,text_start,text_end,updated_at') + ->where('p_article_id', $pArticleId) + ->order('reference_no asc, id asc') + ->select(); + + // summary 用数值键,0/1/2/3 对应 PROGRESS_* 常量 + $summary = [ + self::PROGRESS_PENDING => 0, + self::PROGRESS_CHECKING => 0, + self::PROGRESS_COMPLETED => 0, + self::PROGRESS_FAILED => 0, + ]; + if (empty($rows)) { + return [ + 'p_article_id' => $pArticleId, + 'total_groups' => 0, + 'summary' => $summary, + 'list' => [], + ]; + } + + $groups = []; + foreach ($rows as $row) { + $refNo = intval($this->arrGet($row, 'reference_no', 0)); + $pReferId = intval($this->arrGet($row, 'p_refer_id', 0)); + if (!isset($groups[$refNo])) { + $groups[$refNo] = [ + 'reference_no' => $refNo, + 'p_refer_id' => $pReferId, + 'total' => 0, + 'pending' => 0, + 'done' => 0, + 'failed' => 0, + 'pass' => 0, + 'last_updated_at' => '', + 'records' => [], + ]; + } + // 同一 reference_no 理论上只对应一个 p_refer_id;如果出现混淆,保留首次出现的非空 id + if ($groups[$refNo]['p_refer_id'] <= 0 && $pReferId > 0) { + $groups[$refNo]['p_refer_id'] = $pReferId; + } + + $groups[$refNo]['total']++; + $st = intval($this->arrGet($row, 'status', 0)); + if ($st === 0) { + $groups[$refNo]['pending']++; + } elseif ($st === 1) { + $groups[$refNo]['done']++; + } elseif ($st === 2) { + $groups[$refNo]['failed']++; + } + + $upd = (string)$this->arrGet($row, 'updated_at', ''); + if ($upd > $groups[$refNo]['last_updated_at']) { + $groups[$refNo]['last_updated_at'] = $upd; + } + + $confidence = floatval($this->arrGet($row, 'confidence', 0)); + $isPass = $confidence >= self::PASS_CONFIDENCE_THRESHOLD; + if ($isPass) { + $groups[$refNo]['pass']++; + } + + $groups[$refNo]['records'][] = [ + 'check_id' => intval($this->arrGet($row, 'id', 0)), + 'am_id' => intval($this->arrGet($row, 'am_id', 0)), + 'status' => $st, + 'confidence' => $confidence, + 'is_pass' => $isPass, + 'is_match' => intval($this->arrGet($row, 'is_match', 0)), + 'reason' => (string)$this->arrGet($row, 'reason', ''), + 'text_start' => intval($this->arrGet($row, 'text_start', 0)), + 'text_end' => intval($this->arrGet($row, 'text_end', 0)), + 'last_updated_at' => $upd, + ]; + } + + $list = []; + foreach ($groups as $g) { + $total = $g['total']; + $pending = $g['pending']; + $failed = $g['failed']; + $pass = $g['pass']; + + if ($pending === $total) { + $status = self::PROGRESS_PENDING; + } elseif ($pending === 0) { + $status = $failed > 0 ? self::PROGRESS_FAILED : self::PROGRESS_COMPLETED; + } else { + $status = self::PROGRESS_CHECKING; + } + + // 整体通过校验:分组已全部完成(无 pending、无 failed),且每条 confidence >= 0.65 + $g['is_pass'] = ( + $status === self::PROGRESS_COMPLETED + && $total > 0 + && $pass === $total + ); + + $summary[$status]++; + $g['status'] = $status; + $list[] = $g; + } + + usort($list, function ($a, $b) { + return $a['reference_no'] - $b['reference_no']; + }); + + return [ + 'p_article_id' => $pArticleId, + 'total_groups' => count($list), + 'summary' => $summary, + 'list' => $list, + ]; + } + + /** + * 按 p_refer_id 查这条参考文献的所有校对明细。 + * + * 每条 record 返回: + * - am_id 命中的 article_main 主键 + * - confidence 匹配置信度(0~1) + * - reason LLM 给出的判定理由 + * - is_match 是否匹配(来自 article_reference_check_result.is_match) + * - is_pass 是否通过校验(confidence >= PASS_CONFIDENCE_THRESHOLD) + * + * @param int $pReferId production_article_refer.p_refer_id + * @return array{p_refer_id:int, p_article_id:int, reference_no:int, total:int, list:array} + */ + public function getCheckDetailsByPReferId($pReferId) + { + $pReferId = intval($pReferId); + if ($pReferId <= 0) { + throw new \InvalidArgumentException('p_refer_id is required'); + } + + $rows = Db::name('article_reference_check_result') + ->field('id,p_article_id,reference_no,am_id,confidence,is_match,reason') + ->where('p_refer_id', $pReferId) + ->order('id asc') + ->select(); + + $list = []; + $pArticleId = 0; + $referenceNo = 0; + foreach ($rows as $row) { + // 取首条出现的 p_article_id / reference_no 作为该 refer 的上下文 + if ($pArticleId <= 0) { + $pArticleId = intval($this->arrGet($row, 'p_article_id', 0)); + } + if ($referenceNo <= 0) { + $referenceNo = intval($this->arrGet($row, 'reference_no', 0)); + } + + $confidence = floatval($this->arrGet($row, 'confidence', 0)); + $list[] = [ + 'am_id' => intval($this->arrGet($row, 'am_id', 0)), + 'confidence' => $confidence, + 'reason' => (string)$this->arrGet($row, 'reason', ''), + 'is_match' => intval($this->arrGet($row, 'is_match', 0)), + 'is_pass' => $confidence >= self::PASS_CONFIDENCE_THRESHOLD, + ]; + } + + return [ + 'p_refer_id' => $pReferId, + 'p_article_id' => $pArticleId, + 'reference_no' => $referenceNo, + 'total' => count($list), + 'list' => $list, + ]; + } + + public function resetAndRecheckByArticle($aProductionArticle) + { + if (empty($aProductionArticle) || !is_array($aProductionArticle)) { + throw new \InvalidArgumentException('production_article is required'); + } + $pArticleId = intval($this->arrGet($aProductionArticle, 'p_article_id', 0)); + $articleId = intval($this->arrGet($aProductionArticle, 'article_id', 0)); + if ($pArticleId <= 0 || $articleId <= 0) { + throw new \InvalidArgumentException('production_article requires both p_article_id and article_id'); + } + + $existing = Db::name('article_reference_check_result') + ->where('p_article_id', $pArticleId) + ->count(); + if (intval($existing) <= 0) { + throw new \RuntimeException('no existing reference check records for p_article_id=' . $pArticleId); + } + + $cleared = $this->clearArticleChecks($articleId); + $enqueueResult = $this->enqueueByArticle($articleId); + + if (!is_array($enqueueResult)) { + $enqueueResult = []; + } + $enqueueResult['cleared'] = $cleared; + $enqueueResult['reset'] = 1; + return $enqueueResult; } public static function amStatusLabel($status) @@ -571,7 +1149,7 @@ class ReferenceCheckService // 1) 先标记 blue 内各序号(在原文上操作,[70-73] 仅标不合理者如 70、71) preg_match_all( - '/\[([\d,\-\s]+)\]<\/blue>/', + self::BLUE_TAG_REGEX, $html, $matches, PREG_OFFSET_CAPTURE @@ -619,7 +1197,9 @@ class ReferenceCheckService $tagClass = !empty($badNums) ? ' ref-cite-error' : ''; $groupIds = !empty($badNums) - ? implode(',', array_map('intval', array_column($badNums, 'check_id'))) + ? implode(',', array_map(function ($row) { + return (int) $this->resolveCheckRowId($row); + }, $badNums)) : ''; $newHtml = '[' . $innerMarked . ']'; @@ -718,13 +1298,388 @@ class ReferenceCheckService $parts[] = ucfirst($f) . ': ' . $v; } } + $frag = trim((string)$this->arrGet($refer, 'refer_frag', '')); $content = trim((string)$this->arrGet($refer, 'refer_content', '')); - if ($content !== '') { + if ($frag !== '') { + $parts[] = 'Reference: ' . $frag; + } elseif ($content !== '') { $parts[] = 'Reference: ' . $content; } return implode("\n", $parts); } + /** + * 前端修改参考文献后重新校对:仅处理已有校对记录,刷新 refer_text、重置结果并入队;无记录直接返回 + * + * @param int $articleId + * @param int $pReferId t_production_article_refer.p_refer_id(优先) + * @param int $referenceNo 文献序号 index+1(无 p_refer_id 时用) + * @return array + */ + /** + * 编辑某条文献内容后,按 p_refer_id 异步重新校对该文献对应的全部 check 明细 + * + * 流程:刷新 refer_text/refer_index → 重置 status/is_match/confidence/reason + * → 设节级 ref_check_status=RUNNING → 投递到 ReferenceCheck 队列 + * + * 与 recheckByRefer 的差异:本方法**不**在请求内同步跑 LLM,仅入队,立即返回。 + * 前端可调 getProgressByPArticleId 轮询进度。 + * + * @param int $pReferId t_production_article_refer.p_refer_id(必填) + * @param int $pArticleId 可选:传入跳过 refer 表二次查表 + * @return array{p_refer_id:int, p_article_id:int, reference_no:int, reset:int, queued:int, check_ids:int[], queue:string} + */ + public function enqueueRecheckByPReferId($pReferId, $pArticleId = 0) + { + $pReferId = intval($pReferId); + if ($pReferId <= 0) { + throw new \InvalidArgumentException('p_refer_id is required'); + } + + $refer = Db::name('production_article_refer') + ->where('p_refer_id', $pReferId) + ->where('state', 0) + ->find(); + if (empty($refer)) { + throw new \RuntimeException('production_article_refer not found, p_refer_id=' . $pReferId); + } + + $pArticleId = intval($pArticleId); + if ($pArticleId <= 0) { + $pArticleId = intval($this->arrGet($refer, 'p_article_id', 0)); + } + if ($pArticleId <= 0) { + throw new \RuntimeException('p_article_id is missing for p_refer_id=' . $pReferId); + } + + $referenceNo = intval($this->arrGet($refer, 'index', 0)) + 1; + $referText = $this->formatReferForLlm($refer); + $now = date('Y-m-d H:i:s'); + + $rows = Db::name('article_reference_check_result') + ->where('p_article_id', $pArticleId) + ->where('p_refer_id', $pReferId) + ->select(); + + if (empty($rows)) { + return [ + 'p_refer_id' => $pReferId, + 'p_article_id' => $pArticleId, + 'reference_no' => $referenceNo, + 'reset' => 0, + 'queued' => 0, + 'check_ids' => [], + 'queue' => self::QUEUE_NAME, + ]; + } + + $resetFields = [ + 'refer_text' => $referText, + 'refer_index' => $referenceNo, + 'reference_no' => $referenceNo, + 'status' => 0, + 'is_match' => 0, + 'can_support' => 0, + 'confidence' => 0, + 'reason' => '', + 'error_msg' => '', + 'updated_at' => $now, + ]; + + $pendingJobs = []; + $amIds = []; + foreach ($rows as $row) { + $checkId = $this->resolveCheckRowId($row); + Db::name('article_reference_check_result')->where('id', $checkId)->update($resetFields); + // 旧的队列完成标记必须清掉,否则同 check_id 再次投递会被 acquireLock 静默丢弃 + $this->clearReferenceCheckQueueLock($checkId); + $pendingJobs[] = [ + 'check_id' => $checkId, + 'reference_no' => $referenceNo, + 'am_id' => intval($this->arrGet($row, 'am_id', 0)), + 'text_start' => intval($this->arrGet($row, 'text_start', 0)), + ]; + $amId = intval($this->arrGet($row, 'am_id', 0)); + if ($amId > 0) { + $amIds[$amId] = true; + } + } + + foreach (array_keys($amIds) as $amId) { + $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING); + } + + $checkIds = $this->pushJobsSortedByReferenceNo($pendingJobs); + + return [ + 'p_refer_id' => $pReferId, + 'p_article_id' => $pArticleId, + 'reference_no' => $referenceNo, + 'reset' => count($rows), + 'queued' => count($checkIds), + 'check_ids' => $checkIds, + 'queue' => self::QUEUE_NAME, + ]; + } + + public function recheckByRefer($articleId, $pReferId = 0, $referenceNo = 0) + { + $articleId = intval($articleId); + if ($articleId <= 0) { + throw new \InvalidArgumentException('article_id is required'); + } + + $ctx = $this->resolveReferForRecheck($articleId, intval($pReferId), intval($referenceNo)); + $refer = $ctx['refer']; + $pReferId = $ctx['p_refer_id']; + $referenceNo = $ctx['reference_no']; + $pArticleId = $ctx['p_article_id']; + $referText = $this->formatReferForLlm($refer); + $now = date('Y-m-d H:i:s'); + + $rows = Db::name('article_reference_check_result') + ->where('article_id', $articleId) + ->where(function ($query) use ($pReferId, $referenceNo) { + $query->where('p_refer_id', $pReferId)->whereOr('reference_no', $referenceNo); + }) + ->select(); + + if (empty($rows)) { + return [ + 'article_id' => $articleId, + 'p_refer_id' => $pReferId, + 'reference_no' => $referenceNo, + 'reset' => 0, + 'queued' => 0, + 'check_ids' => [], + 'queue' => self::QUEUE_NAME, + ]; + } + + $resetFields = [ + 'refer_text' => $referText, + 'p_refer_id' => $pReferId, + 'p_article_id' => $pArticleId, + 'refer_index' => $referenceNo, + 'status' => 0, + 'is_match' => 0, + 'can_support' => 0, + 'confidence' => 0, + 'reason' => '', + 'error_msg' => '', + 'updated_at' => $now, + ]; + + $pendingJobs = []; + $amIds = []; + foreach ($rows as $row) { + $checkId = $this->resolveCheckRowId($row); + Db::name('article_reference_check_result')->where('id', $checkId)->update($resetFields); + $pendingJobs[] = [ + 'check_id' => $checkId, + 'reference_no' => $referenceNo, + 'am_id' => intval($row['am_id']), + 'text_start' => intval(isset($row['text_start']) ? $row['text_start'] : 0), + ]; + $amId = intval($row['am_id']); + if ($amId > 0) { + $amIds[$amId] = true; + } + } + + foreach (array_keys($amIds) as $amId) { + $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING); + } + + usort($pendingJobs, function ($a, $b) { + if ($a['reference_no'] !== $b['reference_no']) { + return $a['reference_no'] - $b['reference_no']; + } + if ($a['am_id'] !== $b['am_id']) { + return $a['am_id'] - $b['am_id']; + } + return $a['text_start'] - $b['text_start']; + }); + + $checkIds = []; + $results = []; + $failed = []; + foreach ($pendingJobs as $job) { + $checkId = intval($job['check_id']); + $checkIds[] = $checkId; + $this->clearReferenceCheckQueueLock($checkId); + try { + $results[] = $this->runReferenceCheckOnce($checkId); + } catch (\Exception $e) { + $failed[] = [ + 'check_id' => $checkId, + 'error' => $e->getMessage(), + ]; + \think\Log::error('recheckByRefer check_id=' . $checkId . ' ' . $e->getMessage()); + } + } + + foreach (array_keys($amIds) as $amId) { + $this->syncAmRefCheckStatus($amId); + } + + return [ + 'article_id' => $articleId, + 'p_refer_id' => $pReferId, + 'reference_no' => $referenceNo, + 'reset' => count($rows), + 'checked' => count($results), + 'failed' => count($failed), + 'check_ids' => $checkIds, + 'results' => $results, + 'errors' => $failed, + ]; + } + + /** + * 清除队列 Redis 完成标记,避免重检任务被 acquireLock 静默丢弃 + */ + public function clearReferenceCheckQueueLock($checkId) + { + $checkId = intval($checkId); + if ($checkId <= 0) { + return; + } + try { + $keys = []; + foreach (['queue_job', 'queue_job_two'] as $prefix) { + $class = $prefix === 'queue_job_two' + ? 'app\\api\\job\\ReferenceCheckTwo' + : 'app\\api\\job\\ReferenceCheck'; + $base = $prefix . ':' . $class . ':' . $checkId; + $keys[] = $base; + $keys[] = $base . ':status'; + } + QueueRedis::getInstance()->deleteRedisKeys($keys); + } catch (\Exception $e) { + \think\Log::warning('clearReferenceCheckQueueLock id=' . $checkId . ' ' . $e->getMessage()); + } + } + + /** + * 执行一次引用 LLM 校对(同步,写回 article_reference_check_result) + */ + public function runReferenceCheckOnce($checkId) + { + $checkId = intval($checkId); + $row = Db::name('article_reference_check_result')->where('id', $checkId)->find(); + if (empty($row)) { + throw new \RuntimeException('article_reference_check_result not found, id=' . $checkId); + } + + $contentA = $this->resolveMainContentForJob($row); + $refer = null; + if (intval($row['p_refer_id']) > 0) { + $refer = Db::name('production_article_refer') + ->where('p_refer_id', intval($row['p_refer_id'])) + ->where('state', 0) + ->find(); + } + + if ($refer) { + $contentB = $this->formatReferForLlm($refer); + } else { + $contentB = trim((string)$this->arrGet($row, 'refer_text', '')); + } + + if ($contentA === '' || $contentB === '') { + $this->updateCheckResult($checkId, [ + 'status' => 2, + 'error_msg' => 'Missing article_main.content or refer_text', + ]); + throw new \RuntimeException('Missing article_main.content or refer_text'); + } + + $llmResult = (new LLMService())->checkReference($contentA, $contentB, false); + $requestFailed = !empty($llmResult['request_failed']); + $canSupport = $this->parseLlmCanSupport($llmResult); + $confidence = floatval(isset($llmResult['confidence']) ? $llmResult['confidence'] : 0); + $reason = isset($llmResult['reason']) ? $llmResult['reason'] : ''; + + // LLM 通讯失败:写 status=2(校对失败) + error_msg,抛异常让队列 worker 走 release(30) 重试; + // 重试 3 次后 ReferenceCheck::markFailed 会保持 status=2 收尾 + if ($requestFailed) { + $this->updateCheckResult($checkId, [ + 'confidence' => $confidence, + 'reason' => $reason, + 'status' => 2, + 'error_msg' => $reason, + ]); + $this->clearReferenceCheckQueueLock($checkId); + throw new \RuntimeException($reason !== '' ? $reason : 'LLM request failed'); + } + + $this->updateCheckResult($checkId, [ + 'can_support' => $canSupport ? 1 : 0, + 'is_match' => $canSupport ? 1 : 0, + 'confidence' => $confidence, + 'reason' => $reason, + 'status' => 1, + 'error_msg' => '', + ]); + + $this->clearReferenceCheckQueueLock($checkId); + $this->maybeEnqueueSecondPass($checkId, $confidence); + + return [ + 'check_id' => $checkId, + 'can_support' => $canSupport ? 1 : 0, + 'is_match' => $canSupport ? 1 : 0, + 'confidence' => $confidence, + 'reason' => $reason, + ]; + } + + /** + * @return array{refer: array, p_article_id: int, p_refer_id: int, reference_no: int} + */ + private function resolveReferForRecheck($articleId, $pReferId, $referenceNo) + { + $prod = Db::name('production_article') + ->where('article_id', $articleId) + ->whereIn('state', [0, 2]) + ->find(); + if (empty($prod)) { + throw new \RuntimeException('production_article not found for article_id=' . $articleId); + } + + $pArticleId = intval($prod['p_article_id']); + $refer = null; + + if ($pReferId > 0) { + $refer = Db::name('production_article_refer') + ->where('p_refer_id', $pReferId) + ->where('p_article_id', $pArticleId) + ->where('state', 0) + ->find(); + } elseif ($referenceNo > 0) { + $referMap = $this->loadReferMapByPArticleId($pArticleId); + $referIndex = $referenceNo - 1; + if (isset($referMap[$referIndex])) { + $refer = $referMap[$referIndex]; + $pReferId = intval($refer['p_refer_id']); + } + } else { + throw new \InvalidArgumentException('p_refer_id or reference_no is required'); + } + + if (empty($refer)) { + throw new \RuntimeException('production_article_refer not found'); + } + + return [ + 'refer' => $refer, + 'p_article_id' => $pArticleId, + 'p_refer_id' => intval($refer['p_refer_id']), + 'reference_no' => intval($refer['index']) + 1, + ]; + } + /** * 仅使用 refer_doi 字段(二次 Crossref 摘要用) */ @@ -804,7 +1759,7 @@ class ReferenceCheckService return ''; } - $text = preg_replace('/\[([\d,\-\s]+)\]<\/blue>/', '[$1]', $text); + $text = preg_replace(self::BLUE_TAG_REGEX, '[$1]', $text); $text = strip_tags($text); $text = html_entity_decode($text, ENT_QUOTES | ENT_HTML5, 'UTF-8'); $text = preg_replace('/\s+/u', ' ', $text); @@ -1163,6 +2118,7 @@ class ReferenceCheckService return false; } + $this->clearReferenceCheckQueueLock($checkId); $this->pushJob2($checkId, 5); return true; } @@ -1173,7 +2129,7 @@ class ReferenceCheckService public function extractReferences($content) { $result = []; - preg_match_all('/\[([\d,\-\s]+)\]<\/blue>/', $content, $matches, PREG_OFFSET_CAPTURE); + preg_match_all(self::BLUE_TAG_REGEX, $content, $matches, PREG_OFFSET_CAPTURE); if (empty($matches[0])) { return []; } @@ -1353,7 +2309,7 @@ class ReferenceCheckService private function buildCitationContextText($content, $start, $end) { $text = $this->byteSubstr($content, $start, $end); - $text = preg_replace('/\[[\d,\-\s]+\]<\/blue>/', '', $text); + $text = preg_replace(self::BLUE_TAG_REGEX, '', $text); $text = trim(strip_tags($text)); $text = preg_replace('/\s+/u', ' ', $text); $text = ltrim($text, "\xEF\xBB\xBF"); @@ -1493,8 +2449,7 @@ class ReferenceCheckService $start = $tagStart - $maxBytes; $slice = substr($content, $start, $tagStart - $start); if (preg_match('/[.!?。!?]\s+/u', $slice, $m, PREG_OFFSET_CAPTURE)) { - $last = end($m[0]); - $rel = $last[1] + strlen($last[0]); + $rel = $m[0][1] + strlen($m[0][0]); return $start + $rel; } @@ -1540,7 +2495,7 @@ class ReferenceCheckService } $gap = substr($content, $tagEnd, $end - $tagEnd); - $gapText = trim(strip_tags(preg_replace('/\[[\d,\-\s]+\]<\/blue>/', '', $gap))); + $gapText = trim(strip_tags(preg_replace(self::BLUE_TAG_REGEX, '', $gap))); if ($gapText !== '' && !$this->isOnlyPunctuationOrSpace($gapText)) { return $end; } @@ -1551,8 +2506,47 @@ class ReferenceCheckService return $length; } + /** + * 已入库记录按文献编号正序入队(同号按 am_id、正文位置稳定排序) + * + * @param array $rows 元素含 check_id、reference_no,可选 am_id、text_start + */ + private function pushJobsSortedByReferenceNo(array $rows) + { + if (empty($rows)) { + return []; + } + + usort($rows, function ($a, $b) { + if ($a['reference_no'] !== $b['reference_no']) { + return $a['reference_no'] - $b['reference_no']; + } + $amA = isset($a['am_id']) ? intval($a['am_id']) : 0; + $amB = isset($b['am_id']) ? intval($b['am_id']) : 0; + if ($amA !== $amB) { + return $amA - $amB; + } + $posA = isset($a['text_start']) ? intval($a['text_start']) : 0; + $posB = isset($b['text_start']) ? intval($b['text_start']) : 0; + return $posA - $posB; + }); + + $checkIds = []; + $delay = 0; + foreach ($rows as $row) { + $checkId = intval($row['check_id']); + $checkIds[] = $checkId; + $this->pushJob($checkId, $delay); + $delay++; + } + + return $checkIds; + } + private function pushJob($checkId, $delaySeconds = 0) { + $checkId = intval($checkId); + $this->clearReferenceCheckQueueLock($checkId); $jobClass = 'app\api\job\ReferenceCheck@fire'; $data = ['check_id' => $checkId]; try { diff --git a/application/common/service/LLMService.php b/application/common/service/LLMService.php index 01a755df..69f5e61c 100644 --- a/application/common/service/LLMService.php +++ b/application/common/service/LLMService.php @@ -32,11 +32,14 @@ class LLMService */ public function checkReference($contextText, $referText, $isAgain = false, $doiBlock = null) { + // request_failed=true 表示"LLM 通讯/解析层面的失败"(可重试,区别于业务上的"未命中"); + // 上游 runReferenceCheckOnce 会据此把 DB.status 置为 2(失败) 并抛异常触发队列重试 $fallback = [ - 'can_support' => false, - 'is_match' => false, - 'confidence' => 0.0, - 'reason' => 'LLM not configured or request failed', + 'can_support' => false, + 'is_match' => false, + 'confidence' => 0.0, + 'reason' => 'LLM not configured or request failed', + 'request_failed' => true, ]; if ($this->url === '' || $this->model === '') { \think\Log::warning('ReferenceCheck LLM: url or model not configured'); @@ -47,6 +50,7 @@ class LLMService $referText = trim($referText); $doiBlock = trim((string)$doiBlock); if ($contextText === '' || $referText === '') { + // 空文本是入参问题,不是 LLM 故障,不需要重试 return [ 'can_support' => false, 'is_match' => false, @@ -149,10 +153,10 @@ class LLMService 你是文献引用校对助手。判断【正文全文】与【参考文献书目】是否相关、能否用于支撑正文中的引用。 【核心原则:从宽判断,避免误杀】 -默认倾向 can_support=true。只要文献与正文不是「驴唇不对马嘴」,即判为相关、能支撑。 +默认倾向 can_support=true。只要文献与正文不是「风马牛不相及」,即判为相关、能支撑。 不要求变量一致、不要求结论逐条对应、不要求研究设计相同。 -【仅当以下情况才判 can_support=false(驴唇不对马嘴)】 +【仅当以下情况才判 can_support=false(与正文明显无关)】 - 学科/主题完全无关(如正文讲深度学习聚类,文献是糖尿病步态检测)。 - 明显张冠李戴(正文断言 A 疗法的效果,文献研究的是完全不同的 B 问题且无关联)。 - 文献条目与正文讨论的对象/场景毫无交集,且无法作背景或理论引用。 @@ -164,7 +168,7 @@ class LLMService 【confidence 固定档位(禁止其它小数)】 can_support=true:0.65(有关联但较泛)/ 0.78 / 0.85 / 0.92 / 0.98(非常确定相关) -can_support=false:0.15(明确驴唇不对马嘴)/ 0.25 / 0.35 / 0.45(仅当实在无法建立任何合理关联) +can_support=false:0.15(明确风马牛不相及)/ 0.25 / 0.35 / 0.45(仅当实在无法建立任何合理关联) 【输出】仅一行 minified JSON,无 markdown: {"can_support":true|false,"is_match":true|false,"confidence":0.15|0.25|0.35|0.45|0.65|0.78|0.85|0.92|0.98,"reason":"30-80字简体中文"} @@ -176,7 +180,7 @@ PROMPT; { return "【正文全文 article_main.content】\n" . $contextText . "\n\n【参考文献书目 refer_text】\n" . $referText - . "\n\n请从宽判断:非驴唇不对马嘴即 can_support=true,只返回 JSON。"; + . "\n\n请从宽判断:文献与正文非风马牛不相即可判 can_support=true,只返回 JSON。"; } /** 第二次校对:Crossref 摘要(Refer_doi) */ @@ -186,12 +190,12 @@ PROMPT; 你是文献引用二次校对助手。已根据 Refer_doi 从 Crossref(https://api.crossref.org/works/)获取摘要,请结合【正文全文】复核该文献是否相关。 【核心原则:与第一次相同,从宽判断】 -默认倾向 can_support=true。只要 Crossref 摘要(或书目)与正文不是驴唇不对马嘴,即判相关、能支撑。 +默认倾向 can_support=true。只要 Crossref 摘要(或书目)与正文不是风马牛不相及,即判相关、能支撑。 以【Crossref 摘要】为准;摘要与书目冲突时以摘要为准。 【仅当以下情况才判 can_support=false】 - 摘要显示的研究主题/对象/方法与正文讨论内容完全风马牛不相及。 -- 典型驴唇不对马嘴、张冠李戴,且无法解释为背景或泛化引用。 +- 典型风马牛不相及、张冠李戴,且无法解释为背景或泛化引用。 【以下情况均应 can_support=true】 - 摘要与正文属同领域或相近方向,能作背景、理论或方向性支撑。 @@ -217,7 +221,7 @@ PROMPT; . "\n\n【参考文献书目 refer_text】\n" . $referText . "\n\n【Crossref 摘要】(Refer_doi → api.crossref.org/works/)\n" . ($doiBlock !== '' ? $doiBlock : '(未获取到摘要,请结合 refer_text 从宽判断)') - . "\n\n非驴唇不对马嘴即 can_support=true,只返回 JSON。"; + . "\n\n文献与正文非风马牛不相即可判 can_support=true,只返回 JSON。"; } private function buildReferenceCheckSystemPrompt3() {