From 4aab7f5b7e5de33ae77a86f7b9ce9c0938ee45b4 Mon Sep 17 00:00:00 2001 From: wyn <1074145239@qq.com> Date: Thu, 21 May 2026 10:02:05 +0800 Subject: [PATCH] =?UTF-8?q?=E6=96=87=E7=AB=A0=E5=BC=95=E7=94=A8=E6=96=87?= =?UTF-8?q?=E7=8C=AE=E6=A0=A1=E9=AA=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- application/api/controller/Article.php | 414 ++++++++++ application/api/job/ReferenceCheck.php | 135 ++++ application/common/ReferenceCheckService.php | 751 +++++++++++++++++++ application/common/service/LLMService.php | 490 ++++++++++++ 4 files changed, 1790 insertions(+) create mode 100644 application/api/job/ReferenceCheck.php create mode 100644 application/common/ReferenceCheckService.php create mode 100644 application/common/service/LLMService.php diff --git a/application/api/controller/Article.php b/application/api/controller/Article.php index e47a0473..e346c264 100644 --- a/application/api/controller/Article.php +++ b/application/api/controller/Article.php @@ -10,6 +10,7 @@ use PhpOffice\PhpWord\IOFactory; use app\common\OpenAi; use app\common\CrossrefService; use app\common\PubmedService; +use app\common\ReferenceCheckService; /** * @title 文章接口 @@ -6391,4 +6392,417 @@ class Article extends Base Db::commit(); return json_encode(['status' => 1,'msg' => 'success']); } + /** + * 调试:预览 article_main 中提取的 blue 引用(不入队) + * POST: article_id + */ + public function citationReview() + { + $articleId = 7821;//intval($this->request->post('article_id', 0)); + if ($articleId <= 0) { + return jsonError('article_id is required'); + } + + $svc = new ReferenceCheckService(); + $mains = Db::name('article_main') + ->field('am_id,content') + ->where('article_id', $articleId) + ->where('am_id', 127448) + //->whereIn('state', [0, 2]) + ->order('sort asc') + ->select(); + + $preview = []; + foreach ($mains as $item) { + $preview[] = [ + 'am_id' => $item['am_id'], + 'citations' => $svc->extractReferences((string)$item['content']), + ]; + break; + } + return jsonSuccess(['article_id' => $articleId, 'sections' => $preview]); + } + /** + * 提取文献引用 + * + * @param string $content 原始内容 + * @return array + */ + function extractReferences($content) + { + $result = []; + + // 匹配 [57][74-79][72, 45] + preg_match_all( + '/\[([\d,\-\s]+)\]<\/blue>/', + $content, + $matches, + PREG_OFFSET_CAPTURE + ); + + if (empty($matches[0])) { + return []; + } + + foreach ($matches[0] as $index => $match) { + + // 完整标签 + $fullTag = $match[0]; + + // 标签开始位置 + $tagStart = $match[1]; + + // 标签结束位置 + $tagEnd = $tagStart + strlen($fullTag); + + // 文献号原始字符串 + $rawRef = trim($matches[1][$index][0]); + + // 展开文献号 + $referenceNumbers = $this->expandReferenceNumbers($rawRef); + + /** + * 获取原文内容 + * 这里按句号切分: + * 找当前引用所在句子的开始和结束位置 + */ + $sentenceStart = $this->findSentenceStart($content, $tagStart); + $sentenceEnd = $this->findSentenceEnd($content, $tagEnd); + + $originalText = mb_substr( + $content, + $sentenceStart, + $sentenceEnd - $sentenceStart + ); + + // 去掉 blue 标签 + $originalText = preg_replace( + '/\[[\d,\-\s]+\]<\/blue>/', + '', + $originalText + ); + + $originalText = trim($originalText); + + $result[] = [ + 'reference_raw' => $rawRef, + 'reference_numbers' => $referenceNumbers, + 'original_text' => $originalText, + + // blue标签在整段中的位置 + 'reference_start' => $tagStart, + 'reference_end' => $tagEnd, + + // 原文位置 + 'text_start' => $sentenceStart, + 'text_end' => $sentenceEnd, + ]; + } + + return $result; + } + + /** + * 展开文献号 + * 11-15 => [11,12,13,14,15] + * 72,45 => [72,45] + * 74-79,81 => [74,75,76,77,78,79,81] + */ + function expandReferenceNumbers($refStr) + { + $numbers = []; + + $parts = explode(',', $refStr); + + foreach ($parts as $part) { + + $part = trim($part); + + // 范围 + if (strpos($part, '-') !== false) { + + list($start, $end) = explode('-', $part); + + $start = intval(trim($start)); + $end = intval(trim($end)); + + if ($start <= $end) { + $numbers = array_merge( + $numbers, + range($start, $end) + ); + } + + } else { + + // 单个数字 + if (is_numeric($part)) { + $numbers[] = intval($part); + } + } + } + + return array_values(array_unique($numbers)); + } + + /** + * 查找句子开始位置 + */ + function findSentenceStart($content, $position) + { + $delimiters = ['.', '。', '!', '?', "\n"]; + + $start = 0; + + foreach ($delimiters as $delimiter) { + + $pos = strrpos( + substr($content, 0, $position), + $delimiter + ); + + if ($pos !== false) { + $start = max($start, $pos + 1); + } + } + + return $start; + } + + /** + * 查找句子结束位置 + */ + function findSentenceEnd($content, $position) + { + $length = strlen($content); + + $endPositions = []; + + foreach (['.', '。', '!', '?', "\n"] as $delimiter) { + + $pos = strpos($content, $delimiter, $position); + + if ($pos !== false) { + $endPositions[] = $pos + 1; + } + } + + return empty($endPositions) + ? $length + : min($endPositions); + } + + /** + * 引用相关性:提交单条到队列(异步调用 promotion 同款本地大模型) + * POST: content_a(必填), content_b(可选), article_id, reference_no(n=index+1), am_id + */ + public function referenceCheckEnqueue() + { + $data = $this->request->post(); + $contentA = trim((string)(isset($data['content_a']) ? $data['content_a'] : '')); + $contentB = trim((string)(isset($data['content_b']) ? $data['content_b'] : '')); + $articleId = intval(isset($data['article_id']) ? $data['article_id'] : 0); + $referenceNo = intval(isset($data['reference_no']) ? $data['reference_no'] : 0); + + if ($contentA === '') { + return jsonError('content_a is required'); + } + + try { + $svc = new ReferenceCheckService(); + $extra = [ + 'reference_no' => $referenceNo, + 'article_id' => $articleId, + 'am_id' => intval(isset($data['am_id']) ? $data['am_id'] : 0), + ]; + + if ($contentB === '' && $articleId > 0 && $referenceNo > 0) { + $prod = Db::name('production_article') + ->where('article_id', $articleId) + ->where('state', 0) + ->find(); + if ($prod) { + $referMap = $svc->loadReferMapByPArticleId(intval($prod['p_article_id'])); + $referIndex = $referenceNo - 1; + if (isset($referMap[$referIndex])) { + $refer = $referMap[$referIndex]; + $contentB = $svc->formatReferForLlm($refer); + $extra['p_article_id'] = intval($prod['p_article_id']); + $extra['p_refer_id'] = intval($refer['p_refer_id']); + $extra['refer_index'] = $referIndex; + } + } + } + + $result = $svc->enqueue($contentA, $contentB, $extra); + return jsonSuccess($result); + } catch (\Exception $e) { + return jsonError($e->getMessage()); + } + } + + public function referenceCheckEnqueueArticleMain(){ + $data = $this->request->post(); + $articleId = intval(isset($data['article_id']) ? $data['article_id'] : 0); + if ($articleId <= 0) { + return jsonError('article_id is required'); + } + $mainsList = Db::name('article_main') + ->field('am_id,content,article_id') + ->where('article_id', $articleId) + ->whereIn('state', [0, 2]) + ->order('sort asc') + ->select(); + + $svc = new ReferenceCheckService(); + foreach ($mainsList as $mainInfo ){ + $svc->enqueueByArticleMain($mainInfo); + } + } + /** + * 按文章批量入队:从 article_main 提取 blue 引用与文献号 + * POST: article_id, clear_previous=1(默认清空该文旧明细后重检) + */ + public function referenceCheckEnqueueArticle() + { + $data = $this->request->post(); + $articleId = intval(isset($data['article_id']) ? $data['article_id'] : 0); + if ($articleId <= 0) { + return jsonError('article_id is required'); + } + + try { + $svc = new ReferenceCheckService(); + $clear = !isset($data['clear_previous']) || intval($data['clear_previous']) === 1; + $result = $svc->enqueueByArticle($articleId, $clear); + return jsonSuccess($result); + } catch (\Exception $e) { + return jsonError($e->getMessage()); + } + } + + /** + * 查询单条引用相关性检测结果 + * GET/POST: check_id + */ + public function referenceCheckResult() + { + $checkId = intval($this->request->param('check_id', 0)); + if ($checkId <= 0) { + return jsonError('check_id is required'); + } + + $row = (new ReferenceCheckService())->getResult($checkId); + if (!$row) { + return jsonError('result not found'); + } + + return jsonSuccess($this->formatReferenceCheckRow($row)); + } + + /** + * 稿件预览:带不合理引用标记的 content(序号 + 引用句) + * GET/POST: article_id, am_id(可选,只预览某一节) + */ + public function referenceCheckPreview() + { + $articleId = intval($this->request->param('article_id', 0)); + if ($articleId <= 0) { + return jsonError('article_id is required'); + } + $amId = intval($this->request->param('am_id', 0)); + + try { + $data = (new ReferenceCheckService())->buildArticlePreview($articleId, $amId); + $data['markup_hint'] = [ + 'ref_no' => '.ref-no-error — 不合理的文献序号(如 70-73 中单独的 70)', + 'ref_cite' => '.ref-cite-tag.ref-cite-error — 含不合理序号的 blue 引用块', + 'ref_context'=> '.ref-context-error — 不合理的引用句/上下文', + ]; + $data['preview_css'] = '.ref-no-error{color:#c00;font-weight:bold;border-bottom:2px wavy #c00}' + . '.ref-cite-tag.ref-cite-error{background:#ffecec}' + . '.ref-context-error{background:#fff3cd;outline:1px dashed #e6a700}'; + return jsonSuccess($data); + } catch (\Exception $e) { + return jsonError($e->getMessage()); + } + } + + /** + * 按文章列出引用校对结果([70-73] 为 4 条,reference_no 分别为 70,71,72,73) + * GET/POST: article_id, status(可选), only_mismatch=1 仅不合理 + */ + public function referenceCheckList() + { + $articleId = intval($this->request->param('article_id', 0)); + if ($articleId <= 0) { + return jsonError('article_id is required'); + } + + $status = $this->request->param('status', ''); + $statusFilter = ($status === '' || $status === null) ? -1 : intval($status); + $onlyMismatch = intval($this->request->param('only_mismatch', 0)) === 1; + $rows = (new ReferenceCheckService())->listByArticle($articleId, $statusFilter, $onlyMismatch); + + $list = []; + foreach ($rows as $row) { + $list[] = $this->formatReferenceCheckRow($row); + } + + $mains = Db::name('article_main') + ->field('am_id,ref_check_status,sort') + ->where('article_id', $articleId) + ->whereIn('state', [0, 2]) + ->order('sort asc') + ->select(); + $sections = []; + foreach ($mains as $m) { + $st = intval(isset($m['ref_check_status']) ? $m['ref_check_status'] : 0); + $sections[] = [ + 'am_id' => intval($m['am_id']), + 'ref_check_status' => $st, + 'ref_check_pass' => $st === ReferenceCheckService::AM_STATUS_PASS, + 'ref_check_label' => ReferenceCheckService::amStatusLabel($st), + ]; + } + + return jsonSuccess([ + 'article_id' => $articleId, + 'total' => count($list), + 'list' => $list, + 'sections' => $sections, + ]); + } + + private function formatReferenceCheckRow($row) + { + $statusMap = array(0 => 'pending', 1 => 'done', 2 => 'failed'); + $amId = intval(isset($row['am_id']) ? $row['am_id'] : 0); + $citeStart = intval(isset($row['cite_tag_start']) ? $row['cite_tag_start'] : 0); + $rowStatus = intval($row['status']); + return array( + 'check_id' => intval($row['check_id']), + 'article_id' => intval(isset($row['article_id']) ? $row['article_id'] : 0), + 'am_id' => $amId, + 'cite_group_key' => $amId . '_' . $citeStart, + 'p_refer_id' => intval(isset($row['p_refer_id']) ? $row['p_refer_id'] : 0), + 'refer_index' => intval(isset($row['refer_index']) ? $row['refer_index'] : 0), + 'reference_no' => intval(isset($row['reference_no']) ? $row['reference_no'] : 0), + 'reference_raw' => isset($row['reference_raw']) ? $row['reference_raw'] : '', + 'cite_tag_start' => $citeStart, + 'cite_tag_end' => intval(isset($row['cite_tag_end']) ? $row['cite_tag_end'] : 0), + 'text_start' => intval(isset($row['text_start']) ? $row['text_start'] : 0), + 'text_end' => intval(isset($row['text_end']) ? $row['text_end'] : 0), + 'status' => isset($statusMap[$rowStatus]) ? $statusMap[$rowStatus] : 'unknown', + 'is_match' => intval($row['is_match']), + 'is_reasonable' => intval($row['is_match']) === 1, + 'confidence' => floatval($row['confidence']), + 'reason' => isset($row['reason']) ? $row['reason'] : '', + 'error_msg' => isset($row['error_msg']) ? $row['error_msg'] : '', + 'content_a' => isset($row['content_a']) ? $row['content_a'] : '', + 'content_b' => isset($row['content_b']) ? $row['content_b'] : '', + 'updated_at' => isset($row['updated_at']) ? $row['updated_at'] : '', + ); + } + } diff --git a/application/api/job/ReferenceCheck.php b/application/api/job/ReferenceCheck.php new file mode 100644 index 00000000..5058bdc1 --- /dev/null +++ b/application/api/job/ReferenceCheck.php @@ -0,0 +1,135 @@ +oQueueJob = new QueueJob(); + $this->QueueRedis = QueueRedis::getInstance(); + } + + public function fire(Job $job, $data) + { + $this->oQueueJob->init($job); + + $rawBody = empty($job->getRawBody()) ? '' : $job->getRawBody(); + $jobData = empty($rawBody) ? [] : json_decode($rawBody, true); + $jobId = empty($jobData['id']) ? 'unknown' : $jobData['id']; + + $sRedisKey = ''; + $sRedisValue = ''; + + $this->oQueueJob->log("-----------队列任务开始-----------"); + $this->oQueueJob->log("当前任务ID: {$jobId}, 尝试次数: {$job->attempts()}"); + + try { + $checkId = intval(isset($data['check_id']) ? $data['check_id'] : 0); + $sClassName = get_class($this); + $sRedisKey = "queue_job:{$sClassName}:{$checkId}"; + $sRedisValue = uniqid() . '_' . getmypid(); + + if (!$this->oQueueJob->acquireLock($sRedisKey, $sRedisValue, $job)) { + return; + } + + if ($checkId <= 0) { + $job->delete(); + return; + } + + $row = Db::name('article_reference_check_result')->where('id', $checkId)->find(); + if (empty($row)) { + $job->delete(); + return; + } + + if (intval($row['status']) === 1) { + $job->delete(); + return; + } + + try { + $contentA = trim((string)(isset($row['origin_text']) ? $row['origin_text'] : '')); + $contentB = trim((string)(isset($row['refer_text']) ? $row['refer_text'] : '')); + + if ($contentB === '' && intval($row['p_refer_id']) > 0) { + $refer = Db::name('production_article_refer') + ->where('p_refer_id', intval($row['p_refer_id'])) + ->where('status', 0) + ->find(); + if ($refer) { + $contentB = (new ReferenceCheckService())->formatReferForLlm($refer); + } + } + + if ($contentA === '' || $contentB === '') { + $this->markFailed($checkId, 'Missing content_a or reference text'); + $job->delete(); + return; + } + + $llm = new LLMService(); + $llmResult = $llm->checkReference($contentA, $contentB); + + Db::name('article_reference_check_result')->where('id', $checkId)->update([ + 'is_match' => !empty($llmResult['is_match']) ? 1 : 0, + 'confidence' => $llmResult['confidence'], + 'reason' => $llmResult['reason'], + 'status' => 1, + 'error_msg' => '', + 'updated_at' => date('Y-m-d H:i:s'), + ]); + + $amId = intval(isset($row['am_id']) ? $row['am_id'] : 0); + if ($amId > 0) { + (new ReferenceCheckService())->syncAmRefCheckStatus($amId); + } + $this->QueueRedis->finishJob($sRedisKey, 'completed', $this->completedExprie, $sRedisValue); + $job->delete(); + $this->oQueueJob->log("任务执行成功 | 日志ID: {$sRedisKey}"); + } catch (\Exception $e) { + var_dump($e->getMessage()); + if ($job->attempts() >= 3) { + $this->markFailed($checkId, $e->getMessage()); + $job->delete(); + return; + } + $job->release(30); + } + } catch (\RuntimeException $e) { + $this->oQueueJob->handleRetryableException($e, $sRedisKey, $sRedisValue, $job); + } catch (\LogicException $e) { + $this->oQueueJob->handleNonRetryableException($e, $sRedisKey, $sRedisValue, $job); + } catch (\Exception $e) { + $this->oQueueJob->handleRetryableException($e, $sRedisKey, $sRedisValue, $job); + } finally { + $this->oQueueJob->finnal(); + } + } + + private function markFailed($checkId, $msg) + { + $row = Db::name('article_reference_check_result')->where('id', $checkId)->find(); + Db::name('article_reference_check_result')->where('id', $checkId)->update([ + 'status' => 2, + 'error_msg' => mb_substr($msg, 0, 500), + 'updated_at' => date('Y-m-d H:i:s'), + ]); + $amId = empty($row) ? 0 : intval(isset($row['am_id']) ? $row['am_id'] : 0); + if ($amId > 0) { + (new ReferenceCheckService())->syncAmRefCheckStatus($amId); + } + } +} diff --git a/application/common/ReferenceCheckService.php b/application/common/ReferenceCheckService.php new file mode 100644 index 00000000..f913e8e1 --- /dev/null +++ b/application/common/ReferenceCheckService.php @@ -0,0 +1,751 @@ +insertGetId([ + 'article_id' => intval($this->arrGet($extra, 'article_id', 0)), + 'am_id' => intval($this->arrGet($extra, 'am_id', 0)), + 'p_article_id' => intval($this->arrGet($extra, 'p_article_id', 0)), + 'p_refer_id' => intval($this->arrGet($extra, 'p_refer_id', 0)), + 'refer_index' => intval($this->arrGet($extra, 'refer_index', 0)), + 'reference_no' => intval($this->arrGet($extra, 'reference_no', 0)), + 'reference_raw' => (string)$this->arrGet($extra, 'reference_raw', ''), + 'cite_tag_start' => intval($this->arrGet($extra, 'cite_tag_start', 0)), + 'cite_tag_end' => intval($this->arrGet($extra, 'cite_tag_end', 0)), + 'text_start' => intval($this->arrGet($extra, 'text_start', 0)), + 'text_end' => intval($this->arrGet($extra, 'text_end', 0)), + 'content_a' => $contentA, + 'content_b' => trim($contentB), + 'status' => 0, + 'created_at' => $now, + 'updated_at' => $now, + ]); + + $amId = intval($this->arrGet($extra, 'am_id', 0)); + if ($amId > 0) { + $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING); + } + + $this->pushJob(intval($checkId), intval($this->arrGet($extra, 'queue_delay', 0))); + + return ['check_id' => $checkId, 'queued' => 1]; + } + public function enqueueByArticleMain($main){ + $amId = $main['am_id']; +// $main = Db::name('article_main') +// ->field('am_id,content,article_id') +// ->where('am_id', $amId) +// ->whereIn('state', [0, 2]) +// ->find(); + $citations = $this->extractReferences((string)$main['content']); +// return $citations; + + $prod = Db::name('production_article') + ->where('article_id', $main['article_id']) + ->where('state', 0) + ->find(); + if (empty($prod)) { + throw new \RuntimeException('production_article not found for article_id=' . $main['article_id']); + } + + $pArticleId = intval($prod['p_article_id']); + $referMap = $this->loadReferMapByPArticleId($pArticleId); + + if (empty($citations)) { + $this->setAmRefCheckStatus($amId, self::AM_STATUS_PASS); + return; + } + + $skipped = 0; + $delay = 0; + foreach ($citations as $cite) { + foreach ($cite['reference_numbers'] as $refNo) { + $referIndex = $refNo - 1; + if ($referIndex < 0 || !isset($referMap[$referIndex])) { + $skipped++; + continue; + } + $refer = $referMap[$referIndex]; + $referText = $this->formatReferForLlm($refer); + + $now = date('Y-m-d H:i:s'); + // [70-73] 展开为 reference_no=70,71,72,73 共 4 条记录 + $checkId = Db::name('article_reference_check_result')->insertGetId([ + 'article_id' => $main['article_id'], + 'p_article_id' => $pArticleId, + 'am_id' => intval($main['am_id']), + 'reference_no' => $refNo, + 'refer_index' => $refNo, + 'origin_text' => $cite['original_text'], + 'refer_text' => $referText, + 'p_refer_id' => $referMap[$referIndex]['p_refer_id'], + 'text_start' => $cite['text_start'], + 'text_end' => $cite['text_end'], + 'created_at' => $now, + 'updated_at' => $now, + ]); + $this->pushJob(intval($checkId), $delay); + $checkIds[] = $checkId; + $delay += 1; + } + } + + $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING); + } + /** + * 按 article_id 扫描 t_article_main,为每个 blue 引用 × 文献号入队 + */ + public function enqueueByArticle($articleId, $clearPrevious = true) + { + if ($articleId <= 0) { + throw new \InvalidArgumentException('article_id is required'); + } + + $prod = Db::name('production_article') + ->where('article_id', $articleId) + ->where('state', 0) + ->find(); + if (empty($prod)) { + throw new \RuntimeException('production_article not found for article_id=' . $articleId); + } + + $pArticleId = intval($prod['p_article_id']); + $referMap = $this->loadReferMapByPArticleId($pArticleId); + + $mains = Db::name('article_main') + ->field('am_id,content') + ->where('article_id', $articleId) + ->whereIn('state', [0, 2]) + ->order('sort asc') + ->select(); + + if (empty($mains)) { + throw new \RuntimeException('article_main is empty'); + } + + if ($clearPrevious) { + $this->clearArticleChecks($articleId); + } + + $queued = 0; + $skipped = 0; + $checkIds = []; + $delay = 0; + $amIdsWithJobs = []; + + foreach ($mains as $main) { + $amId = intval($main['am_id']); + $citations = $this->extractReferences((string)$main['content']); + if (empty($citations)) { + $this->setAmRefCheckStatus($amId, self::AM_STATUS_NONE); + continue; + } + foreach ($citations as $cite) { + foreach ($cite['reference_numbers'] as $refNo) { + $referIndex = $refNo - 1; + if ($referIndex < 0 || !isset($referMap[$referIndex])) { + $skipped++; + continue; + } + $refer = $referMap[$referIndex]; + $referText = $this->formatReferForLlm($refer); + + $now = date('Y-m-d H:i:s'); + // [70-73] 展开为 reference_no=70,71,72,73 共 4 条记录 + $checkId = Db::name('article_reference_check_result')->insertGetId([ + 'article_id' => $articleId, + 'am_id' => intval($main['am_id']), + 'p_article_id' => $pArticleId, + 'p_refer_id' => intval($refer['p_refer_id']), + 'refer_index' => $referIndex, + 'reference_no' => $refNo, + 'reference_raw' => $cite['reference_raw'], + 'cite_tag_start' => intval($cite['reference_start']), + 'cite_tag_end' => intval($cite['reference_end']), + 'text_start' => intval($cite['text_start']), + 'text_end' => intval($cite['text_end']), + 'content_a' => $cite['original_text'], + 'content_b' => $referText, + 'status' => 0, + 'created_at' => $now, + 'updated_at' => $now, + ]); + + $this->pushJob(intval($checkId), $delay); + $checkIds[] = $checkId; + $queued++; + $delay += 1; + $amIdsWithJobs[$amId] = true; + } + } + } + + foreach (array_keys($amIdsWithJobs) as $amId) { + $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING); + } + + return [ + 'article_id' => $articleId, + 'p_article_id' => $pArticleId, + 'queued' => $queued, + 'skipped' => $skipped, + 'check_ids' => $checkIds, + 'queue' => self::QUEUE_NAME, + ]; + } + + /** + * 根据该节全部明细行汇总更新 t_article_main.ref_check_status + */ + public function syncAmRefCheckStatus($amId) + { + if ($amId <= 0) { + return self::AM_STATUS_NONE; + } + + $rows = Db::name('article_reference_check_result')->where('am_id', $amId)->select(); + if (empty($rows)) { + $this->setAmRefCheckStatus($amId, self::AM_STATUS_NONE); + return self::AM_STATUS_NONE; + } + + $pending = 0; + $hasFail = false; + $done = 0; + + foreach ($rows as $row) { + $st = intval($row['status']); + if ($st === 0) { + $pending++; + continue; + } + if ($st === 2 || ($st === 1 && intval($row['is_match']) === 0)) { + $hasFail = true; + } + if ($st === 1) { + $done++; + } + } + + if ($pending > 0) { + $status = self::AM_STATUS_RUNNING; + } elseif ($hasFail) { + $status = self::AM_STATUS_FAIL; + } elseif ($done === count($rows)) { + $status = self::AM_STATUS_PASS; + } else { + $status = self::AM_STATUS_FAIL; + } + + $this->setAmRefCheckStatus($amId, $status); + return $status; + } + + public function setAmRefCheckStatus($amId, $status) + { + if ($amId <= 0) { + return; + } + Db::name('article_main')->where('am_id', $amId)->update([ + 'ref_check_status' => $status, + ]); + } + + public function clearArticleChecks($articleId) + { + Db::name('article_reference_check_result')->where('article_id', $articleId)->delete(); + Db::name('article_main') + ->where('article_id', $articleId) + ->whereIn('state', [0, 2]) + ->update(['ref_check_status' => self::AM_STATUS_NONE]); + } + + public static function amStatusLabel($status) + { + $map = [ + self::AM_STATUS_NONE => 'none', + self::AM_STATUS_PASS => 'pass', + self::AM_STATUS_FAIL => 'fail', + self::AM_STATUS_RUNNING => 'running', + ]; + return isset($map[$status]) ? $map[$status] : 'unknown'; + } + + public function getResult($checkId) + { + if ($checkId <= 0) { + return null; + } + $row = Db::name('article_reference_check_result')->where('check_id', $checkId)->find(); + return $row ?: null; + } + + public function listByArticle($articleId, $status = -1, $onlyMismatch = false) + { + $q = Db::name('article_reference_check_result')->where('article_id', $articleId); + if ($status >= 0) { + $q->where('status', $status); + } + if ($onlyMismatch) { + $q->where('status', 1)->where('is_match', 0); + } + return $q->order('am_id asc, cite_tag_start asc, reference_no asc')->select(); + } + + /** + * 稿件预览:在 content 上标记不合理引用序号与引用句 + * + * @return array{sections: array, issues: array, stats: array} + */ + public function buildArticlePreview($articleId, $amId = 0) + { + $q = Db::name('article_main') + ->field('am_id,content,sort,ref_check_status') + ->where('article_id', $articleId) + ->whereIn('state', [0, 2]); + if ($amId > 0) { + $q->where('am_id', $amId); + } + $mains = $q->order('sort asc')->select(); + + $rows = $this->listByArticle($articleId, 1); + $badByAm = $this->indexBadResults($rows); + + $sections = []; + $issues = []; + $stats = ['total' => 0, 'mismatch' => 0, 'match' => 0, 'pending' => 0]; + + foreach ($this->listByArticle($articleId, -1) as $r) { + $stats['total']++; + if (intval($r['status']) === 0) { + $stats['pending']++; + } elseif (intval($r['is_match']) === 1) { + $stats['match']++; + } else { + $stats['mismatch']++; + } + } + + foreach ($mains as $main) { + $id = intval($main['am_id']); + $content = (string)$main['content']; + $badIndex = isset($badByAm[$id]) ? $badByAm[$id] : array(); + $marked = $this->markContentForPreview($content, $id, $badIndex); + $amStatus = intval($this->arrGet($main, 'ref_check_status', 0)); + $sections[] = [ + 'am_id' => $id, + 'ref_check_status' => $amStatus, + 'ref_check_pass' => $amStatus === self::AM_STATUS_PASS, + 'ref_check_label' => self::amStatusLabel($amStatus), + 'content' => $content, + 'content_marked' => $marked['html'], + 'issue_count' => $marked['issue_count'], + ]; + foreach ($marked['issues'] as $issue) { + $issues[] = $issue; + } + } + + $articlePass = $this->resolveArticlePass($sections); + + return [ + 'article_id' => $articleId, + 'article_ref_check_pass' => $articlePass, + 'sections' => $sections, + 'issues' => $issues, + 'stats' => $stats, + ]; + } + + /** + * 全文是否通过:各节均为 pass,且无 running/fail(无引用节忽略) + */ + private function resolveArticlePass($sections) + { + $hasChecked = false; + foreach ($sections as $sec) { + $st = intval($this->arrGet($sec, 'ref_check_status', 0)); + if ($st === self::AM_STATUS_NONE) { + continue; + } + $hasChecked = true; + if ($st !== self::AM_STATUS_PASS) { + return false; + } + } + return $hasChecked ? true : null; + } + + /** + * @param array $rows status=1 的检测结果 + * @return array am_id => indexed bad map + */ + private function indexBadResults($rows) + { + $byAm = []; + foreach ($rows as $row) { + if (intval($row['status']) !== 1 || intval($row['is_match']) === 1) { + continue; + } + $amId = intval($row['am_id']); + $refNo = intval($row['reference_no']); + if ($amId <= 0 || $refNo <= 0) { + continue; + } + if (!isset($byAm[$amId])) { + $byAm[$amId] = ['by_raw' => [], 'contexts' => []]; + } + $rawKey = $this->normalizeRefRawKey((string)$this->arrGet($row, 'reference_raw', '')); + if ($rawKey !== '') { + $byAm[$amId]['by_raw'][$rawKey][$refNo] = $row; + } + + $ctxKey = intval($row['text_start']) . '_' . intval($row['text_end']); + if (!isset($byAm[$amId]['contexts'][$ctxKey])) { + $byAm[$amId]['contexts'][$ctxKey] = [ + 'text_start' => intval($row['text_start']), + 'text_end' => intval($row['text_end']), + 'check_ids' => [], + 'reasons' => [], + 'ref_nos' => [], + ]; + } + $byAm[$amId]['contexts'][$ctxKey]['check_ids'][] = intval($row['check_id']); + $byAm[$amId]['contexts'][$ctxKey]['ref_nos'][] = $refNo; + $reason = trim((string)$this->arrGet($row, 'reason', '')); + if ($reason !== '') { + $byAm[$amId]['contexts'][$ctxKey]['reasons'][$refNo] = $reason; + } + } + return $byAm; + } + + private function normalizeRefRawKey($raw) + { + $raw = str_replace( + [',', '–', '—', '−', '‐', '‑', ' '], + [',', '-', '-', '-', '-', '-', ''], + trim($raw) + ); + return strtolower($raw); + } + + /** + * @param array $badIndex indexBadResults 中单 am 的结构 + */ + private function markContentForPreview($content, $amId, $badIndex) + { + $badByRaw = isset($badIndex['by_raw']) ? $badIndex['by_raw'] : array(); + $contexts = isset($badIndex['contexts']) ? $badIndex['contexts'] : array(); + $issues = array(); + $issueCount = 0; + + if ($content === '' || (empty($badByRaw) && empty($contexts))) { + return array('html' => $content, 'issues' => array(), 'issue_count' => 0); + } + + $html = $content; + + // 1) 先标记 blue 内各序号(在原文上操作,[70-73] 仅标不合理者如 70、71) + preg_match_all( + '/\[([\d,\-\s]+)\]<\/blue>/', + $html, + $matches, + PREG_OFFSET_CAPTURE + ); + $citeDeltas = []; + if (!empty($matches[0])) { + $replacements = []; + foreach ($matches[0] as $idx => $match) { + $fullTag = $match[0]; + $tagStart = $match[1]; + $tagEnd = $tagStart + strlen($fullTag); + $inner = $matches[1][$idx][0]; + $rawKey = $this->normalizeRefRawKey($inner); + $badNums = isset($badByRaw[$rawKey]) ? $badByRaw[$rawKey] : array(); + + $innerMarked = preg_replace_callback( + '/\d+/', + function ($numMatch) use ($badNums, &$issues, &$issueCount, $amId, $inner) { + $num = intval($numMatch[0]); + if (!isset($badNums[$num])) { + return $numMatch[0]; + } + $row = $badNums[$num]; + $rowReason = isset($row['reason']) ? $row['reason'] : ''; + $issueCount++; + $issues[] = array( + 'am_id' => $amId, + 'check_id' => intval($row['check_id']), + 'reference_no' => $num, + 'reference_raw' => $inner, + 'reason' => $rowReason, + 'confidence' => floatval(isset($row['confidence']) ? $row['confidence'] : 0), + ); + $title = htmlspecialchars( + '引用[' . $num . ']不合理: ' . $rowReason, + ENT_QUOTES, + 'UTF-8' + ); + return '' + . $numMatch[0] . ''; + }, + $inner + ); + + $tagClass = !empty($badNums) ? ' ref-cite-error' : ''; + $groupIds = !empty($badNums) + ? implode(',', array_map('intval', array_column($badNums, 'check_id'))) + : ''; + $newHtml = '[' . $innerMarked . ']'; + $replacements[] = [ + 'start' => $tagStart, + 'end' => $tagEnd, + 'html' => $newHtml, + 'delta' => strlen($newHtml) - ($tagEnd - $tagStart), + ]; + } + usort($replacements, function ($a, $b) { + return $b['start'] - $a['start']; + }); + foreach ($replacements as $rep) { + $html = substr($html, 0, $rep['start']) . $rep['html'] . substr($html, $rep['end']); + $citeDeltas[] = ['start' => $rep['start'], 'delta' => $rep['delta']]; + } + } + + $shiftByCite = function ($pos) use ($citeDeltas) { + $d = 0; + foreach ($citeDeltas as $cd) { + if ($cd['start'] < $pos) { + $d += $cd['delta']; + } + } + return $pos + $d; + }; + + // 2) 再标记引用句(从后往前) + if (!empty($contexts)) { + $spans = array_values($contexts); + usort($spans, function ($a, $b) { + return $b['text_start'] - $a['text_start']; + }); + foreach ($spans as $span) { + $start = $span['text_start']; + $end = $span['text_end']; + if ($start < 0 || $end <= $start) { + continue; + } + $s = $shiftByCite($start); + $e = $shiftByCite($end); + if ($e > strlen($html)) { + $e = strlen($html); + } + $checkIds = array_values(array_unique($span['check_ids'])); + $refNos = array_values(array_unique($span['ref_nos'])); + sort($refNos); + $reasonParts = []; + foreach ($refNos as $rn) { + if (!empty($span['reasons'][$rn])) { + $reasonParts[] = '[' . $rn . '] ' . $span['reasons'][$rn]; + } + } + $title = htmlspecialchars( + '引用句可能不合理: ' . implode('; ', $reasonParts), + ENT_QUOTES, + 'UTF-8' + ); + $open = ''; + $close = ''; + $html = substr($html, 0, $s) . $open . substr($html, $s, $e - $s) . $close . substr($html, $e); + } + } + + return ['html' => $html, 'issues' => $issues, 'issue_count' => $issueCount]; + } + + /** + * @return array refer_index => row + */ + public function loadReferMapByPArticleId($pArticleId) + { + $map = []; + if ($pArticleId <= 0) { + return $map; + } + $rows = Db::name('production_article_refer') + ->where('p_article_id', $pArticleId) + ->where('state', 0) + ->order('index asc') + ->select(); + foreach ($rows as $row) { + $map[intval($row['index'])] = $row; + } + return $map; + } + public function formatReferForLlm($refer) + { + $parts = []; + foreach (['title', 'author', 'joura', 'dateno', 'refer_doi', 'doilink'] as $f) { + $v = trim((string)$this->arrGet($refer, $f, '')); + if ($v !== '') { + $parts[] = ucfirst($f) . ': ' . $v; + } + } + $content = trim((string)$this->arrGet($refer, 'refer_content', '')); + if ($content !== '') { + $parts[] = 'Reference: ' . $content; + } + return implode("\n", $parts); + } + + /** + * 从 article_main.content 提取 blue 引用 + */ + public function extractReferences($content) + { + $result = []; + preg_match_all('/\[([\d,\-\s]+)\]<\/blue>/', $content, $matches,PREG_OFFSET_CAPTURE); + if (empty($matches[0])) { + return []; + } + + foreach ($matches[0] as $index => $match) { + + $fullTag = $match[0]; + $tagStart = $match[1]; + $tagEnd = $tagStart + strlen($fullTag); + $rawRef = trim($matches[1][$index][0]); + $referenceNumbers = $this->expandReferenceNumbers($rawRef); + + $sentenceStart = $this->findSentenceStart($content, $tagStart); + $sentenceEnd = $this->findSentenceEnd($content, $tagEnd); + $originalText = mb_substr($content, $sentenceStart, $sentenceEnd - $sentenceStart); + $originalText = preg_replace('/\[[\d,\-\s]+\]<\/blue>/', '', $originalText); + $originalText = trim(strip_tags($originalText)); + + if ($originalText === '' || empty($referenceNumbers)) { + continue; + } + + $result[] = [ + 'reference_raw' => $rawRef, + 'reference_numbers' => $referenceNumbers, + 'original_text' => $originalText, + 'reference_start' => $tagStart, + 'reference_end' => $tagEnd, + 'text_start' => $sentenceStart, + 'text_end' => $sentenceEnd, + ]; + } + + return $result; + } + + public function expandReferenceNumbers($refStr) + { + $refStr = str_replace( + [',', '–', '—', '−', '‐', '‑'], + [',', '-', '-', '-', '-', '-'], + trim($refStr) + ); + $numbers = []; + foreach (explode(',', $refStr) as $part) { + $part = trim($part); + if ($part === '') { + continue; + } + if (preg_match('/^(\d+)\s*-\s*(\d+)$/', $part, $m)) { + $start = intval($m[1]); + $end = intval($m[2]); + if ($start <= $end) { + $numbers = array_merge($numbers, range($start, $end)); + } + } elseif (ctype_digit($part)) { + $numbers[] = intval($part); + } + } + return array_values(array_unique($numbers)); + } + + private function findSentenceStart($content, $position) + { + $start = 0; + foreach (['.', '。', '!', '?', "\n"] as $delimiter) { + $pos = strrpos(substr($content, 0, $position), $delimiter); + if ($pos !== false) { + $start = max($start, $pos + 1); + } + } + return $start; + } + + private function findSentenceEnd($content, $position) + { + $length = strlen($content); + $endPositions = []; + foreach (['.', '。', '!', '?', "\n"] as $delimiter) { + $pos = strpos($content, $delimiter, $position); + if ($pos !== false) { + $endPositions[] = $pos + 1; + } + } + return empty($endPositions) ? $length : min($endPositions); + } + + private function pushJob($checkId, $delaySeconds = 0) + { + $jobClass = 'app\api\job\ReferenceCheck@fire'; + $data = ['check_id' => $checkId]; + try { + if ($delaySeconds > 0) { + $jobId = Queue::later($delaySeconds, $jobClass, $data, self::QUEUE_NAME); + } else { + $jobId = Queue::push($jobClass, $data, self::QUEUE_NAME); + } + var_dump("=====jobId:".$jobId); + } catch (\Exception $e) { + \think\Log::error('ReferenceCheck pushJob failed check_id=' . $checkId . ' ' . $e->getMessage()); + throw $e; + } + } +} diff --git a/application/common/service/LLMService.php b/application/common/service/LLMService.php new file mode 100644 index 00000000..2e056297 --- /dev/null +++ b/application/common/service/LLMService.php @@ -0,0 +1,490 @@ +url = trim((string)Env::get('promotion.promotion_llm_url', '')); + $this->model = trim((string)Env::get('promotion.promotion_llm_model', '')); + $this->apiKey = trim((string)Env::get('promotion.promotion_llm_api_key', '')); + $this->timeout = max(30, intval(Env::get('promotion.promotion_llm_timeout', 120))); + } + + /** + * @param string $contextText 正文引用处句子 + * @param string $referText 参考文献条目(或 refer 格式化文本) + */ + public function checkReference($contextText, $referText) + { + $fallback = [ + 'is_match' => false, + 'confidence' => 0.0, + 'reason' => 'LLM not configured or request failed', + ]; + \think\Log::info('llmUrl:'.$this->url); + var_dump("in URL====".$this->url); + if ($this->url === '' || $this->model === '') { + return $fallback; + } + + $contextText = trim($contextText); + $referText = trim($referText); + if ($contextText === '' || $referText === '') { + return [ + 'is_match' => false, + 'confidence' => 0.0, + 'reason' => 'Empty citation context or reference text', + ]; + } + + if (mb_strlen($contextText) > 2000) { + $contextText = mb_substr($contextText, 0, 2000); + } + if (mb_strlen($referText) > 4000) { + $referText = mb_substr($referText, 0, 4000); + } + + $system = $this->buildReferenceCheckSystemPrompt(); + \think\Log::info('system:' . $system); + + $user = $this->buildReferenceCheckUserPrompt($contextText, $referText); + \think\Log::info('user:' . $user); + $payload = [ + 'model' => $this->model, + 'temperature' => 0, + 'messages' => [ + ['role' => 'system', 'content' => $system], + ['role' => 'user', 'content' => $user], + ], + ]; + + $content = $this->postChat($payload); + if ($content === null) { + return $fallback; + } + + $parsed = $this->parseJson($content); + if ($parsed === null) { + return $fallback; + } + + $isMatch = !empty($parsed['is_match']); + $confidence = $this->snapReferenceCheckConfidence( + $this->normalizeConfidence(isset($parsed['confidence']) ? $parsed['confidence'] : 0), + $isMatch + ); + + return [ + 'is_match' => $isMatch, + 'confidence' => $confidence, + 'reason' => $this->cleanReason((string)(isset($parsed['reason']) ? $parsed['reason'] : '')), + ]; + } + private function buildReferenceCheckSystemPrompt() + { + return <<<'PROMPT' +你是一名护理与医学期刊的资深编辑,专门校对「正文引用句」与「对应参考文献条目」是否匹配。 + +你的职责是判断:作者在该引用位置引用的观点/数据/结论/方法/定义,是否能够被该条参考文献合理支撑。 + +你只能依据用户提供的两段文本判断,不得假设已阅读全文,不得联网,不得编造文献中未出现的信息。 + +【输入内容】 +你将收到: +1. 正文引用句(引用位置附近的一句话或一段话) +2. 当前对应的参考文献条目(仅当前编号,不是整篇参考文献列表) + +你必须严格只评估「当前这一条参考文献」与引用句的关系。 + +==================== +【核心判断目标】 +判断: +正文中的核心论点、事实、数据、定义、护理措施、医学结论、研究发现、理论依据、政策依据等,是否可由该条参考文献合理支撑。 + +你评估的是“引用是否成立”,不是“句子是否正确”。 + +==================== +【强制约束(必须遵守)】 + +1. 只能依据用户提供的信息判断 +- 不得假设你看过全文。 +- 不得根据常识补全文献内容。 +- 不得根据作者、期刊名或研究热点脑补研究结果。 +- 不得把“可能研究了”视为“能够支撑”。 + +2. 严禁串号判断 +- 仅允许依据「当前引用句」与「当前参考文献条目」判断。 +- 严禁利用其它参考文献编号或上下文内容推断当前文献。 + +3. 不得关键词硬匹配 +- 不得因为标题里出现相同关键词(如护理、患者、干预、效果、治疗、心理)就直接判定匹配。 +- 必须关注:对象、人群、疾病、干预方式、研究主题、核心结论是否一致。 + +4. 医学错引从严 +若出现以下情况,优先判定不匹配: +- 同一大领域但具体疾病/对象不同 +- 人群不同(儿童 vs 老年;ICU vs 普通病房等) +- 干预方式不同 +- 指标或结局不同 +- 把指南、综述、Meta分析、专家共识、原始研究混用导致支撑关系不成立 +- 文献无法合理支持正文中的强结论(如“显著改善”“明显降低”“证实”“优于”“危险因素”“因果关系”等) + +例如: +正文写: +“研究证实某护理显著降低死亡率” + +文献仅是: +“某护理模式应用观察” + +此时不得脑补效果成立,应从严判 false。 + +5. 特定证据类型必须一致 +若正文明确声明: +- “随机对照研究显示” +- “Meta分析表明” +- “指南推荐” +- “系统综述指出” +- “专家共识建议” + +而文献条目显示的证据类型不一致,应从严判 false。 + +6. 信息不足从严 +若参考文献条目信息过少(仅作者+年份等): +- 只有在能够建立明确合理关联时才判 true。 +- 无法建立明确关联时,判 false(confidence=0.35)。 + +==================== +【评估步骤(按顺序在心里完成)】 + +第一步:主题域一致性 +判断正文句子的核心主题是否与文献属于同一专业领域,包括但不限于: +- 疾病/诊断 +- 护理问题 +- 患者人群 +- 医疗场景 +- 干预措施 +- 指标/结局 +- 理论模型 +- 政策/指南 + +第二步:关键断言对齐 +判断正文中的核心断言是否可被文献合理支撑: + +允许: +- 合理概括性引用 +- 轻度表述扩展 + +不允许: +- 张冠李戴 +- 过度推断 +- 用弱证据支撑强结论 +- 用相关性支撑因果性 +- 用观察研究支撑RCT级别表述 + +第三步:错引排查 +重点检查: +- 对象错 +- 疾病错 +- 场景错 +- 指标错 +- 方法错 +- 证据类型错 +- 研究层级不匹配 + +==================== +【最终判定规则】 + +is_match(二选一,必须一致) + +true: +满足以下全部条件: +- 主题明确相关 +- 核心对象基本一致 +- 正文关键论点能够被该文献合理支撑 +- 不存在明显错引风险 + +false: +任一情况满足即判 false: +- 主题无关 +- 具体对象明显不同 +- 核心结论对不上 +- 文献无法支撑正文强结论 +- 证据类型不匹配 +- 无法建立明确合理关联 +- 信息不足且无法确认 + +边界不清时,从严判 false。 + +==================== +【confidence 固定评分规则】 + +只能输出以下 6 个固定值之一: +0.95 +0.85 +0.75 +0.35 +0.25 +0.15 + +禁止输出: +0.5、0.6、0.7、0.8、0.9 等任何其它数字。 + +评分标准: + +0.95 +高度匹配: +主题、对象、研究方向、关键论点均明确对应。 + +0.85 +较匹配: +主题与核心论点一致,存在轻微概括,但仍合理支撑。 + +0.75 +基本匹配: +大方向一致,但有一定表述泛化或轻微不精确。 + +0.35 +存疑: +同领域但具体对象/结论不够明确; +或参考文献信息不足,建议人工复核。 + +0.25 +较可能错引: +主题相关但核心论点明显偏离; +对象、场景、结局存在明显差异。 + +0.15 +明确错引: +主题无关; +典型张冠李戴; +明显无法支撑正文内容。 + +硬性规则: +- is_match=true 时,confidence 只能是: +0.75 / 0.85 / 0.95 + +- is_match=false 时,confidence 只能是: +0.15 / 0.25 / 0.35 + +==================== +【评分稳定原则】 + +- 相同输入必须得到相同结论。 +- 优先依据“主题 + 核心断言”。 +- 不要被单个关键词误导。 +- 一句多引时,仅评价当前这一条文献。 +- 边界情况从严,降低漏报错引风险。 + +==================== +【reason 输出要求】 + +- 使用简体中文。 +- 仅说明: + 1)主题是否一致; + 2)核心论点是否能够支撑。 + +- 禁止模糊措辞: +“可能有关” +“看起来一致” +“应该支持” + +- 长度控制在 30~80 字。 + +==================== +【输出格式(绝对严格)】 + +仅输出一行 minified JSON。 +禁止 markdown。 +禁止代码块。 +禁止解释说明。 +禁止换行。 +禁止任何额外文字。 + +格式如下: + +{"is_match":true|false,"confidence":0.15|0.25|0.35|0.75|0.85|0.95,"reason":"简体中文原因说明"} + +【示例输出】 + +{"is_match":true,"confidence":0.95,"reason":"正文讨论的护理干预与文献研究对象、场景及核心结论一致,可合理支撑该引用。"} +PROMPT; + } + + /** + * 护理/医学期刊:正文引用句与参考文献条目的相关性校对 + */ + private function buildReferenceCheckSystemPrompt2() + { + return <<<'PROMPT' +你是一名护理与医学期刊的资深编辑,专门校对「正文引用句」与「对应参考文献条目」是否匹配。 +你只能依据用户提供的两段文本判断,不得假设已阅读全文,不得编造文献中未出现的信息。 + +## 校对目标 +判断:作者在该引用位置引用的观点/数据/结论/方法/定义,是否可由该条参考文献合理支撑(主题与论证层面是否对得上)。 + +## 评估步骤(按顺序,在心里完成即可) +1. 主题域:正文句子的核心主题(疾病、人群、干预、结局、理论、政策等)与文献题目/作者/期刊/年份/条目内容是否属于同一专业领域。 +2. 论点对齐:正文句子的关键断言,是否与该文献可能报告的内容方向一致(允许概括性引用,但不可张冠李戴)。 +3. 错引排查:是否出现「仅同一大领域但具体对象不同」「人群/场景/指标明显不符」「把指南/综述/原始研究混用导致支撑关系不成立」等常见错引。 +4. 信息不足:若文献条目过简(仅作者+年份等),只能做粗判;若完全无法建立合理关联,按不匹配处理。 + +## is_match 判定(二选一,必须一致) +- true:主题明确相关,且引用句的核心信息与该文献可能内容高度吻合或可被其合理概括支撑。 +- false:主题无关、明显错引、具体论点对不上、或无法建立合理关联。边界不清时从严标 false(降低漏报错引风险)。 + +## confidence 评分(稳定性要求:只能使用下列 6 个固定值之一,禁止 0.72、0.8 等其它小数) +| 分值 | 含义 | 通常配合 is_match | +| 0.95 | 高度匹配:主题、对象、论点均清晰对应 | true | +| 0.85 | 较匹配:主题与论点一致,表述略宽但仍可接受 | true | +| 0.75 | 基本匹配:大方向对,有轻微不精确或概括过度 | true | +| 0.35 | 存疑:同领域但具体对不上,或信息不足,建议人工复核 | false | +| 0.25 | 较可能错引:主题或论点明显偏离 | false | +| 0.15 | 明确错引:主题无关或典型张冠李戴 | false | + +硬性规则(必须遵守,否则视为无效输出): +- is_match=true 时,confidence 只能是 0.75、0.85 或 0.95。 +- is_match=false 时,confidence 只能是 0.15、0.25 或 0.35。 +- 禁止输出 0.5、0.6、0.9 等未列出的 confidence 值。 + +## 评分稳定原则 +- 相同输入应得到相同结论;不要因措辞风格波动而改变档位。 +- 优先依据「主题 + 关键断言」而非个别泛化词(如「研究」「护理」「患者」)判匹配。 +- 一句多引时,只评价当前这一条文献与引用句的关系,勿与其它序号混淆。 + +## 输出格式(仅输出一行 minified JSON,无 markdown、无前后说明) +{"is_match":true|false,"confidence":0.15|0.25|0.35|0.75|0.85|0.95,"reason":"1-2句简体中文,说明匹配或不匹配的关键依据"} +PROMPT; + } + + private function buildReferenceCheckUserPrompt($contextText, $referText) + { + return "【正文引用句】(含该处引用所要支撑的观点,可能为中文或英文)\n" + . $contextText + . "\n\n【对应参考文献条目】(书目信息,可能不完整)\n" + . $referText + . "\n\n请按 system 中的步骤与评分表完成校对,只返回 JSON。"; + } + + /** + * 将模型输出的 confidence 吸附到固定档位,并与 is_match 规则对齐 + */ + private function snapReferenceCheckConfidence($confidence, $isMatch) + { + $matchBands = [0.75, 0.85, 0.95]; + $mismatchBands = [0.15, 0.25, 0.35]; + $bands = $isMatch ? $matchBands : $mismatchBands; + + $nearest = $bands[0]; + $minDiff = abs($confidence - $nearest); + foreach ($bands as $band) { + $diff = abs($confidence - $band); + if ($diff < $minDiff) { + $minDiff = $diff; + $nearest = $band; + } + } + return $nearest; + } + + private function postChat(array $payload) + { + try{ + $ch = curl_init(); + curl_setopt($ch, CURLOPT_URL, $this->url); + curl_setopt($ch, CURLOPT_POST, true); + curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($payload, JSON_UNESCAPED_UNICODE)); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, min(15, $this->timeout)); + curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout); + + $headers = ['Content-Type: application/json']; + if ($this->apiKey !== '') { + $headers[] = 'Authorization: Bearer ' . $this->apiKey; + } + curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); + + $raw = curl_exec($ch); + if ($raw === false) { + curl_close($ch); + return null; + } + $httpCode = intval(curl_getinfo($ch, CURLINFO_HTTP_CODE)); + \think\Log::info('httpCode:'.$httpCode); + curl_close($ch); + if ($httpCode < 200 || $httpCode >= 300) { + return null; + } + + $data = json_decode($raw, true); + if (!is_array($data)) { + return null; + } + if (isset($data['choices'][0]['message']['content'])) { + return (string)$data['choices'][0]['message']['content']; + } + if (isset($data['content'])) { + return (string)$data['content']; + } + }catch (Exception $exception){ + var_dump($exception->getMessage()); + } + + return null; + } + + private function parseJson($raw) + { + $raw = trim($raw); + if ($raw === '') { + return null; + } + $raw = preg_replace('/^```[a-zA-Z]*\s*|```$/m', '', $raw); + $raw = trim($raw); + + $obj = json_decode($raw, true); + if (is_array($obj)) { + return $obj; + } + if (preg_match('/\{.*\}/s', $raw, $m)) { + $obj = json_decode($m[0], true); + if (is_array($obj)) { + return $obj; + } + } + return null; + } + + private function normalizeConfidence($value) + { + if (!is_numeric($value)) { + return 0.0; + } + $v = (float)$value; + if ($v > 1.0 && $v <= 100.0) { + $v = $v / 100.0; + } + return round(max(0.0, min(1.0, $v)), 4); + } + + private function cleanReason($text) + { + $text = trim(preg_replace('/\s+/', ' ', $text)); + if (mb_strlen($text) > 500) { + $text = mb_substr($text, 0, 500); + } + return $text !== '' ? $text : 'No reason provided'; + } +}