参考文献本地大模型校对

2026-05-26 17:33:34 +08:00
parent 68cf1867d8
commit c1107780a7
9 changed files with 1357 additions and 504 deletions
--- a/application/common/service/LLMService.php
+++ b/application/common/service/LLMService.php
@@ -32,11 +32,14 @@ class LLMService
     */
    public function checkReference($contextText, $referText, $isAgain = false, $doiBlock = null)
    {
+        // request_failed=true 表示"LLM 通讯/解析层面的失败"（可重试，区别于业务上的"未命中"）；
+        // 上游 runReferenceCheckOnce 会据此把 DB.status 置为 2(失败) 并抛异常触发队列重试
        $fallback = [
-            'can_support' => false,
-            'is_match'    => false,
-            'confidence'  => 0.0,
-            'reason'      => 'LLM not configured or request failed',
+            'can_support'    => false,
+            'is_match'       => false,
+            'confidence'     => 0.0,
+            'reason'         => 'LLM not configured or request failed',
+            'request_failed' => true,
        ];
        if ($this->url === '' || $this->model === '') {
            \think\Log::warning('ReferenceCheck LLM: url or model not configured');
@@ -47,6 +50,7 @@ class LLMService
        $referText = trim($referText);
        $doiBlock = trim((string)$doiBlock);
        if ($contextText === '' || $referText === '') {
+            // 空文本是入参问题，不是 LLM 故障，不需要重试
            return [
                'can_support' => false,
                'is_match'    => false,
@@ -149,10 +153,10 @@ class LLMService
 你是文献引用校对助手。判断【正文全文】与【参考文献书目】是否相关、能否用于支撑正文中的引用。

 【核心原则：从宽判断，避免误杀】
-默认倾向 can_support=true。只要文献与正文不是「驴唇不对马嘴」，即判为相关、能支撑。
+默认倾向 can_support=true。只要文献与正文不是「风马牛不相及」，即判为相关、能支撑。
 不要求变量一致、不要求结论逐条对应、不要求研究设计相同。

-【仅当以下情况才判 can_support=false（驴唇不对马嘴）】
+【仅当以下情况才判 can_support=false（与正文明显无关）】
 - 学科/主题完全无关（如正文讲深度学习聚类，文献是糖尿病步态检测）。
 - 明显张冠李戴（正文断言 A 疗法的效果，文献研究的是完全不同的 B 问题且无关联）。
 - 文献条目与正文讨论的对象/场景毫无交集，且无法作背景或理论引用。
@@ -164,7 +168,7 @@ class LLMService

 【confidence 固定档位（禁止其它小数）】
 can_support=true：0.65（有关联但较泛）/ 0.78 / 0.85 / 0.92 / 0.98（非常确定相关）
-can_support=false：0.15（明确驴唇不对马嘴）/ 0.25 / 0.35 / 0.45（仅当实在无法建立任何合理关联）
+can_support=false：0.15（明确风马牛不相及）/ 0.25 / 0.35 / 0.45（仅当实在无法建立任何合理关联）

 【输出】仅一行 minified JSON，无 markdown：
 {"can_support":true|false,"is_match":true|false,"confidence":0.15|0.25|0.35|0.45|0.65|0.78|0.85|0.92|0.98,"reason":"30-80字简体中文"}
@@ -176,7 +180,7 @@ PROMPT;
    {
        return "【正文全文 article_main.content】\n" . $contextText
            . "\n\n【参考文献书目 refer_text】\n" . $referText
-            . "\n\n请从宽判断：非驴唇不对马嘴即 can_support=true，只返回 JSON。";
+            . "\n\n请从宽判断：文献与正文非风马牛不相即可判 can_support=true，只返回 JSON。";
    }

    /** 第二次校对：Crossref 摘要（Refer_doi） */
@@ -186,12 +190,12 @@ PROMPT;
 你是文献引用二次校对助手。已根据 Refer_doi 从 Crossref（https://api.crossref.org/works/）获取摘要，请结合【正文全文】复核该文献是否相关。

 【核心原则：与第一次相同，从宽判断】
-默认倾向 can_support=true。只要 Crossref 摘要（或书目）与正文不是驴唇不对马嘴，即判相关、能支撑。
+默认倾向 can_support=true。只要 Crossref 摘要（或书目）与正文不是风马牛不相及，即判相关、能支撑。
 以【Crossref 摘要】为准；摘要与书目冲突时以摘要为准。

 【仅当以下情况才判 can_support=false】
 - 摘要显示的研究主题/对象/方法与正文讨论内容完全风马牛不相及。
- 典型驴唇不对马嘴、张冠李戴，且无法解释为背景或泛化引用。
+- 典型风马牛不相及、张冠李戴，且无法解释为背景或泛化引用。

 【以下情况均应 can_support=true】
 - 摘要与正文属同领域或相近方向，能作背景、理论或方向性支撑。
@@ -217,7 +221,7 @@ PROMPT;
            . "\n\n【参考文献书目 refer_text】\n" . $referText
            . "\n\n【Crossref 摘要】（Refer_doi → api.crossref.org/works/）\n"
            . ($doiBlock !== '' ? $doiBlock : '（未获取到摘要，请结合 refer_text 从宽判断）')
-            . "\n\n非驴唇不对马嘴即 can_support=true，只返回 JSON。";
+            . "\n\n文献与正文非风马牛不相即可判 can_support=true，只返回 JSON。";
    }
    private function buildReferenceCheckSystemPrompt3()
    {