diff --git a/application/common/service/ReferenceRelevanceLlmService.php b/application/common/service/ReferenceRelevanceLlmService.php new file mode 100644 index 00000000..90e1fdef --- /dev/null +++ b/application/common/service/ReferenceRelevanceLlmService.php @@ -0,0 +1,609 @@ +url = trim((string)Env::get('promotion.promotion_llm_url', '')); + $this->model = trim((string)Env::get('promotion.promotion_llm_model', '')); + $this->apiKey = trim((string)Env::get('promotion.promotion_llm_api_key', '')); + $this->timeout = max(180, intval(Env::get('promotion.promotion_llm_timeout', 180))); + // 控制发送给 LLM 的上下文长度,降低单次推理耗时(可通过 env 覆盖) + $this->maxSectionChars = max(1500, intval(Env::get('promotion.relevance_llm_max_section_chars', 4500))); + $this->maxLocalContextChars = max(600, intval(Env::get('promotion.relevance_llm_max_local_context_chars', 1800))); + $this->maxReferChars = max(1500, intval(Env::get('promotion.relevance_llm_max_refer_chars', 3500))); + $this->maxAbstractChars = max(1500, intval(Env::get('promotion.relevance_llm_max_abstract_chars', 3500))); + } + + /** + * @return array{results:array,request_failed?:bool,reason?:string} + */ + public function checkRelevance($sectionText, $localContext, $referText, $abstractText = '', $citeGroupRefs = '') + { + $fallback = [ + 'results' => [], + 'request_failed' => true, + 'reason' => 'LLM not configured or request failed', + ]; + if ($this->url === '' || $this->model === '') { + return $fallback; + } + + $sectionText = trim((string)$sectionText); + $localContext = trim((string)$localContext); + $referText = trim((string)$referText); + $abstractText = trim((string)$abstractText); + if ($sectionText === '' || $referText === '') { + return ['results' => [], 'reason' => 'Empty section or reference text']; + } + + if (mb_strlen($sectionText) > $this->maxSectionChars) { + $sectionText = mb_substr($sectionText, 0, $this->maxSectionChars); + } + if (mb_strlen($localContext) > $this->maxLocalContextChars) { + $localContext = mb_substr($localContext, 0, $this->maxLocalContextChars); + } + if (mb_strlen($referText) > $this->maxReferChars) { + $referText = mb_substr($referText, 0, $this->maxReferChars); + } + if (mb_strlen($abstractText) > $this->maxAbstractChars) { + $abstractText = mb_substr($abstractText, 0, $this->maxAbstractChars); + } + + $payload = [ + 'model' => $this->model, + 'temperature' => 0, + 'messages' => [ + ['role' => 'system', 'content' => $this->buildSystemPrompt()], + ['role' => 'user', 'content' => $this->buildUserPrompt($sectionText, $localContext, $referText, $abstractText, $citeGroupRefs)], + ], + ]; + + $content = $this->postChat($payload); + if ($content === null) { + $reason = $this->lastPostError !== '' ? $this->lastPostError : 'LLM request failed'; + return array_merge($fallback, ['reason' => $reason]); + } + + $parsed = $this->parseJson($content); + if ($parsed === null) { + return array_merge($fallback, ['reason' => 'LLM response JSON parse failed']); + } + + $results = $this->normalizeResults($parsed, $citeGroupRefs, $localContext, $referText, $abstractText); + if (empty($results)) { + return array_merge($fallback, ['reason' => 'LLM returned empty or invalid results']); + } + + return ['results' => $results]; + } + + private function buildSystemPrompt() + { + return <<<'PROMPT' +你是一名护理、医学、生物医学与科研期刊的资深学术编辑,正在执行「参考文献主题相关性校对」。 + +你的任务:判断【引用位置正文表述】与【对应编号参考文献】在主题、研究对象、疾病/场景/结局方向上是否相关,能否作为该处引用的合理来源。 + +注意:这是「相关性」校对,侧重引用处具体 claim 与文献内容是否匹配;**不是**判断「是否同一疾病/同一领域」。 + +================================================== +【零、最硬规则(违反则输出无效)】 +1. **单条 relevance_score 只评价该编号文献单独**与引用处的关系;不得因联合组整体合理而抬高弱相关文献的单条分。 +2. **禁止「同病高分」**:正文与文献都涉及 CRC,不等于单条可给 0.85~0.92。 + **但若引用处 claim 本身就是机制/通路/异质性/耐药/治疗挑战**,且**研究主语一致**(同一疾病/同一化合物/同一干预对象),文献(含摘要/清洗内容)讨论同病多通路、遗传改变、耐药等,应给 **0.65~0.78**,不得误降到 0.45。 + **主语不一致时仍适用本条禁止高分**:引用处主语为化合物 X,文献却是其他植物/提取物/计算预测,即使提到 X 或相同通路名,也不得因此给 0.78+。 +3. 引用处若为**流行病学/负担类 claim**(most common、incidence、mortality、burden、全球高发等): + - 机制研究、分子通路、细胞增殖/迁移、血管生成等**原始研究** → 单条通常 **0.45 或更低**,`is_relevant=0`,`minimal_relevance` + - 不得因摘要提到 colorectal cancer 就给 0.92 + - 仅当文献为流行病学综述/公共卫生研究,或明确讨论发病率、死亡率、疾病负担时,单条才可 **0.85~0.92** +4. **联合分写在 combined_relevance_score**,与单条分必须可分离(例如 [1,2] 时文献1=0.45、文献2=0.92、联合=0.92)。 +5. **「来源/化学分类」型句子**(naturally occurring、pentacyclic triterpenoid、found in fruits/vegetables/medicinal plants、并列举具体植物学名): + - 先判文献类型:来源综述 / 生物活性综述 最适合;**抗癌治疗综述**对「来源分布」claim 通常仅 **0.65** + - 单篇可差异化打分(如 0.92 / 0.92 / 0.65),**不得**因联合而三篇都给高分 + - 若原句含**具体列举项**(如多个植物学名),而材料未逐一核实全部学名,联合分通常 **≤0.85**(不得给 0.98) +6. **多要素综括句**(一句同时塞入:药学/研究兴趣 + 大量前临床研究 + 多种活性[抗炎/抗氧化/抗癌等] + 多个癌种/对象列举): + - 单篇即使是综述,通常仅 partially_related ~ near-direct(**0.78~0.86**),**不轻易给 0.92**(单篇难逐项覆盖全部要素) + - **联合分是整句覆盖度评估,可低于最高单条分**:若整句要素需多篇拼合、且含作者整合概括,联合通常 **0.72~0.78(partially_related)**,不给 0.85+ +7. **联合分不是「取最高单条分」**:当各单篇都只覆盖整句一部分、需互补拼合时,联合分应反映「整句作为一个整体被支撑的完整度」,**允许低于任何一篇单条分**。 +8. **主语/研究对象层级必须对齐**:引用处主语为某化合物/分子(如「X has been demonstrated…」)时,文献核心对象须为 **X 本身**或以 X 为核心的实验/综述。 + - **植物提取物/混合物**研究、**其他物种/其他植物**的计算预测、成分表中顺带出现 X → 通常 **0.45 或更低** + - **关键:提取物即使 X 含量很高(如 50%+)且显示了抗癌/凋亡活性,活性归因于提取物整体而非 X 单体单独验证 → 仍属 weakly_related(≤0.45)、`minimal_relevance`**,不得评为 supplementary_relevance/0.78 + - 只有当文献**针对 X 单体单独做了验证**(X monomer 处理、X 单独剂量效应等)时,主语才算对齐,方可进入 0.65+ + - **不得**因摘要/讨论出现与引用句相同的通路名、凋亡、抗癌等词就给 0.78+ +9. **证据层级与 demonstrated / mechanistically**: + - 本文实验结果或针对 X 的系统综述 > 计算预测/混合成分推测 + - **讨论(Discussion)转引他人关于 X 的机制总结 ≠ 该文自身证据**;据此最多 **0.45~0.65**,不得评为 highly_related + - in silico / computational prediction 不足以支撑「has been demonstrated to mechanistically…」式强语气 claim 的高分 +10. **点名通路/功能结局须逐项核对**:原句逐条列举通路(如 PI3K/AKT、MAPK、NF-κB)或结局(增殖、凋亡、血管生成、炎症信号等)时,**每一项单独核对是否在本文证据中成立**(非仅背景提及)。 + - 讨论转述既往文献 ≠ 本文证明该项 + - 缺原句任一点名项(如 angiogenesis)→ 单条通常 **不得 0.78+** + - **「覆盖部分结局」不足以进入 0.78**:原句点名了多条通路 + 多个结局,文献仅命中其中 1~2 个结局(如仅凋亡/增殖),且**点名通路在本文结果中全部缺失(仅讨论转引)**或主语层级不对 → 单条 **限 0.45(weakly_related / minimal_relevance)**,不得给 0.65~0.78 + - 仅同领域沾边 1–2 项、主语或机制层级不对 → **0.45** + - **进入 0.65~0.78 的前提**:主语对齐(X 单体)+ 本文自身结果命中原句点名通路/结局的多数项;几乎全部明确对应 → **0.85+** + +================================================== +【一、必须先拆解 claim】 +从【本引用位置附近上下文】中提炼最小主张单元(Claim A, Claim B…),**不要**把整句笼统归为「大概讲抗癌」。例如: +- **主语/研究对象**(化合物单体 vs 植物提取物 vs 其他物种;是否「X has been demonstrated」) +- **证据语气与层级**(demonstrated / mechanistically vs predict / suggest;本文结果 vs 讨论转引) +- 疾病流行病学(高发、死亡率) +- **点名通路/分子机制**(PI3K/AKT、MAPK、NF-κB 等,须逐项) +- **点名功能结局**(抑制增殖、凋亡、血管生成、炎症信号等,须逐项) +- 治疗/干预现状 +- **化合物化学类别**(如 pentacyclic triterpenoid) +- **天然来源分布**(fruits / vegetables / medicinal plants) +- **具体列举项**(植物学名、药名、基因名等,须逐项核对) + +================================================== +【二、逐篇文献单独判断(每条 result 对应一个 reference_no)】 +对 cite_group_refs 中的每一篇文献,单独输出: +- 该文献与引用处哪些 claim 主题相关、哪些不相关(含具体列举项是否覆盖) +- 文献类型是否匹配引用用途(来源综述 / 生物活性综述 / 机制研究 / 流行病学综述 / 抗癌治疗综述等) +- relevance_score:只能使用 0.98 / 0.92 / 0.85 / 0.78 / 0.65 / 0.45 / 0.25 / 0.15 +- relevance_level:highly_related | partially_related | weakly_related | unrelated +- is_relevant:score>=0.65 为 1,否则 0 +- relevance_role: + - primary_relevance:该文献是引用处主题的主要相关来源 + - supplementary_relevance:部分相关、补充性 + - minimal_relevance:仅边缘/背景沾边 + - no_meaningful_relevance:与引用处核心表述基本无关 +- reason:中文,须写明:①文献类型与**核心研究对象** ②**本文自身证据**覆盖了哪些 claim / 哪些未覆盖(含点名通路、功能结局、具体列举项;讨论转引须标明) ③**若仅为同主题不同 claim 或主语层级不对须明确写出** ④为何此分值而非更高或更低 + +主语/层级不对 → 单条 **0.45**,不得因讨论提及相同通路给 0.78: +引用处 claim 为「化合物 X 经 PI3K/AKT 等机制 demonstrated…」,文献为其他植物提取物或计算预测、仅在讨论转引他人 X 机制 → 0.45,weakly_related,is_relevant=0。 + +机制文引用流行病学句 → 单条 **0.45**,不得 0.92: +文献为 CRC 机制研究,引用处 claim 为全球高发/死亡率,文献无流行病学数据 → 0.45,minimal_relevance,is_relevant=0。 + +================================================== +【三、联合引用 combined_*(同一 cite_group_refs 内各行必须一致)】 +当 cite_group_refs 为 "1,2" 等多篇时,除逐篇判断外,必须给出引用组整体结论: +- 这些文献合起来,是否足以支撑/匹配该引用位置的整体表述? +- combined_relevance_score:八档固定分值之一,**不是单条平均分** +- 若一篇已强相关、其余仅弱补充,联合分可接近主相关文献,但**不必等于最高单条分** +- 若原句含具体列举项(学名等)且材料未逐一核实,联合分通常 **0.85**,不给 0.98 +- 若核心 claim 无任何文献明确覆盖,联合分不能虚高 +- 多篇联合仍缺主语对齐、缺原句点名通路/结局、或主要靠讨论转引 → 联合分通常 **≤0.45~0.65**,不得因单篇讨论出现相同关键词给到 0.78+ +- combined_is_relevant:combined_relevance_score>=0.65 为 1 +- combined_relevance_level:与 combined 分数对应的等级 +- combined_reason:中文综合结论,说明各文献分工(如「文献2综述覆盖流行病学主 claim,文献1仅机制补充」)及最终分值理由 + +单条引用时:combined_* 与单条一致,combined_reason 可写「」。 + +================================================== +【四、评分与等级对照】 +0.98 / 0.92 / 0.85 = highly_related +文献直接支持整句主旨,大部分关键要素都在文中明确出现 +0.78 / 0.65 = partially_related +文献只支撑其中一部分,或支撑方式偏间接 +0.45 = weakly_related +只是同领域文献,但与句子事实对应很弱 +0.25 / 0.15 = unrelated +基本不支撑该句 +≤0.15 = not_support +不支撑 + +================================================== +【五、输出 JSON(仅 JSON,无 markdown)】 +{ + "results": [ + { + "reference_no": 1, + "cite_group_refs": "1,2", + "is_relevant": 0, + "relevance_score": 0.45, + "relevance_level": "weakly_related", + "relevance_role": "minimal_relevance", + "reason": "中文单条结论", + "combined_is_relevant": 1, + "combined_relevance_score": 0.92, + "combined_relevance_level": "highly_related", + "combined_reason": "中文联合结论" + }, + { + "reference_no": 2, + "cite_group_refs": "1,2", + ... + } + ] +} +PROMPT; + } + + private function buildUserPrompt($sectionText, $localContext, $referText, $abstractText, $citeGroupRefs) + { + $parts = ["【正文节 t_article_main】\n" . $sectionText]; + if (trim((string)$citeGroupRefs) !== '') { + $mode = strpos($citeGroupRefs, ',') !== false ? '联合引用' : '单独引用'; + $parts[] = "【引用文献组 cite_group_refs】{$citeGroupRefs}({$mode})"; + } + if ($localContext !== '') { + $parts[] = "【本引用位置附近上下文(优先据此拆解 claim)】\n" . $localContext; + } + $parts[] = "【参考文献书目(按编号)】\n" . $referText; + if ($abstractText !== '') { + $parts[] = "【文献摘要/清洗后内容(Europe PMC·PubMed·Crossref·PDF)】\n" . $abstractText; + } + $parts[] = '请先拆解最小主张单元(主语层级、证据来源、点名通路/结局逐项核对),判断每篇文献类型与**本文自身证据**,再**逐篇独立**给出单条 relevance_score(讨论转引、提取物/计算预测不得抬高;弱相关文献不得因联合而高分),最后给出 combined_*。仅输出 results 数组 JSON。'; + + return implode("\n\n", $parts); + } + + private function normalizeResults(array $parsed, $defaultCiteGroupRefs, $localContext = '', $referText = '', $abstractText = '') + { + $rows = []; + if (isset($parsed['results']) && is_array($parsed['results'])) { + $rows = $parsed['results']; + } elseif (isset($parsed['reference_no']) || isset($parsed['relevance_score'])) { + $rows = [$parsed]; + } + + $bands = $this->getScoreBands(); + $localContext = trim((string)$localContext); + $referText = trim((string)$referText); + $abstractText = trim((string)$abstractText); + + $out = []; + foreach ($rows as $item) { + if (!is_array($item)) { + continue; + } + $refNo = intval(isset($item['reference_no']) ? $item['reference_no'] : 0); + if ($refNo <= 0) { + continue; + } + + $score = $this->snapScore(floatval(isset($item['relevance_score']) ? $item['relevance_score'] : 0), $bands); + $isRelevant = $score >= 0.65 - 0.001; + if (array_key_exists('is_relevant', $item)) { + $isRelevant = $this->boolVal($item['is_relevant']); + } + + $level = $this->levelFromScore($score, isset($item['relevance_level']) ? $item['relevance_level'] : ''); + $role = $this->normalizeRelevanceRole(isset($item['relevance_role']) ? $item['relevance_role'] : ''); + $reason = $this->cleanReason(isset($item['reason']) ? $item['reason'] : ''); + + list($score, $level, $isRelevant, $role) = $this->enforceSingleReferenceConsistency( + $score, + $level, + $isRelevant, + $role, + $bands + ); + + $combinedScore = $this->snapScore( + floatval(isset($item['combined_relevance_score']) ? $item['combined_relevance_score'] : $score), + $bands + ); + $combinedRelevant = $combinedScore >= 0.65 - 0.001; + if (array_key_exists('combined_is_relevant', $item)) { + $combinedRelevant = $this->boolVal($item['combined_is_relevant']); + } + + $combinedLevel = $this->levelFromScore( + $combinedScore, + isset($item['combined_relevance_level']) ? $item['combined_relevance_level'] : '' + ); + list($combinedScore, $combinedLevel, $combinedRelevant) = $this->enforceCombinedConsistency( + $combinedScore, + $combinedLevel, + $combinedRelevant, + $bands + ); + + $citeGroupRefs = trim((string)(isset($item['cite_group_refs']) ? $item['cite_group_refs'] : $defaultCiteGroupRefs)); + if ($citeGroupRefs === '' && $defaultCiteGroupRefs !== '') { + $citeGroupRefs = trim((string)$defaultCiteGroupRefs); + } + + $out[] = [ + 'reference_no' => $refNo, + 'cite_group_refs' => $citeGroupRefs, + 'is_relevant' => $isRelevant ? 1 : 0, + 'relevance_score' => $score, + 'relevance_level' => $level, + 'relevance_role' => $role, + 'reason' => $reason, + 'combined_is_relevant' => $combinedRelevant ? 1 : 0, + 'combined_relevance_score' => $combinedScore, + 'combined_relevance_level' => $combinedLevel, + 'combined_reason' => $this->cleanReason(isset($item['combined_reason']) ? $item['combined_reason'] : ''), + ]; + } + + $out = $this->syncCombinedFieldsAcrossGroup($out); + + return $out; + } + + private function enforceSingleReferenceConsistency($score, $level, $isRelevant, $role, array $bands) + { + $score = floatval($score); + if ($role === 'no_meaningful_relevance') { + if ($score > 0.25) { + $score = 0.25; + } + $level = 'unrelated'; + $isRelevant = false; + } elseif ($role === 'minimal_relevance') { + if ($score > 0.45) { + $score = 0.45; + } + $level = 'weakly_related'; + $isRelevant = false; + } elseif ($role === 'supplementary_relevance') { + if ($score > 0.78) { + $score = 0.78; + } + $level = $this->levelFromScore($score, $level); + } elseif ($role === 'primary_relevance') { + if ($score < 0.85) { + $score = 0.85; + } + $isRelevant = true; + $level = $this->levelFromScore($score, $level); + } + + if ($level === 'weakly_related' && $score > 0.45) { + $score = 0.45; + $isRelevant = false; + } elseif ($level === 'unrelated' && $score > 0.25) { + $score = 0.25; + $isRelevant = false; + } elseif ($level === 'highly_related' && $score < 0.85) { + $score = 0.85; + $isRelevant = true; + } elseif ($level === 'partially_related') { + if ($score > 0.78) { + $score = 0.78; + } + if ($score < 0.65) { + $score = 0.65; + } + $isRelevant = true; + } + + if (!$isRelevant && $score >= 0.65) { + $score = 0.45; + $level = 'weakly_related'; + } + if ($isRelevant && $score < 0.65) { + $score = 0.65; + $level = 'partially_related'; + } + + $score = $this->snapScore($score, $bands); + $level = $this->levelFromScore($score, $level); + + return [$score, $level, $isRelevant, $role]; + } + + private function enforceCombinedConsistency($combinedScore, $combinedLevel, $combinedRelevant, array $bands) + { + $combinedScore = $this->snapScore(floatval($combinedScore), $bands); + $combinedLevel = $this->levelFromScore($combinedScore, $combinedLevel); + $combinedRelevant = $combinedScore >= 0.65 - 0.001; + + return [$combinedScore, $combinedLevel, $combinedRelevant]; + } + + private function syncCombinedFieldsAcrossGroup(array $out) + { + $groups = []; + foreach ($out as $idx => $row) { + $key = (string)$row['cite_group_refs']; + if ($key === '') { + $key = 'ref:' . $row['reference_no']; + } + $groups[$key][] = $idx; + } + + foreach ($groups as $indices) { + if (count($indices) <= 1) { + continue; + } + $bestIdx = $indices[0]; + $bestScore = floatval($out[$bestIdx]['combined_relevance_score']); + foreach ($indices as $idx) { + $s = floatval($out[$idx]['combined_relevance_score']); + if ($s >= $bestScore) { + $bestScore = $s; + $bestIdx = $idx; + } + } + $src = $out[$bestIdx]; + foreach ($indices as $idx) { + $out[$idx]['combined_is_relevant'] = intval($src['combined_is_relevant']); + $out[$idx]['combined_relevance_score'] = floatval($src['combined_relevance_score']); + $out[$idx]['combined_relevance_level'] = (string)$src['combined_relevance_level']; + $out[$idx]['combined_reason'] = (string)$src['combined_reason']; + } + } + + return $out; + } + + private function getScoreBands() + { + return [0.15, 0.25, 0.45, 0.65, 0.78, 0.85, 0.92, 0.98]; + } + + private function snapScore($score, array $bands) + { + foreach ($bands as $band) { + if (abs($score - $band) < 0.001) { + return $band; + } + } + $nearest = $bands[0]; + $minDiff = abs($score - $nearest); + foreach ($bands as $band) { + $diff = abs($score - $band); + if ($diff < $minDiff) { + $minDiff = $diff; + $nearest = $band; + } + } + + return $nearest; + } + + private function levelFromScore($score, $levelHint = '') + { + $levelHint = strtolower(trim((string)$levelHint)); + $allowed = ['highly_related', 'partially_related', 'weakly_related', 'unrelated']; + if (in_array($levelHint, $allowed, true)) { + return $levelHint; + } + $aliases = [ + 'highly_related' => ['highly_related', 'high_related', 'strong_related', 'strong_relevance'], + 'partially_related' => ['partially_related', 'partial_related', 'moderate_related'], + 'weakly_related' => ['weakly_related', 'weak_related', 'low_related', 'insufficient'], + 'unrelated' => ['unrelated', 'not_related', 'irrelevant', 'no_meaningful_relevance'], + ]; + foreach ($aliases as $canonical => $list) { + if (in_array($levelHint, $list, true)) { + return $canonical; + } + } + $score = floatval($score); + if ($score >= 0.85) { + return 'highly_related'; + } + if ($score >= 0.65) { + return 'partially_related'; + } + if ($score >= 0.45) { + return 'weakly_related'; + } + + return 'unrelated'; + } + + private function normalizeRelevanceRole($role) + { + $role = strtolower(trim((string)$role)); + $map = [ + 'primary_relevance' => ['primary_relevance', 'primary_support', 'primary'], + 'supplementary_relevance' => ['supplementary_relevance', 'supplementary_support', 'supplementary'], + 'minimal_relevance' => ['minimal_relevance', 'minimal_support', 'minimal'], + 'no_meaningful_relevance' => ['no_meaningful_relevance', 'no_meaningful_support', 'none'], + ]; + foreach ($map as $canonical => $aliases) { + if ($role === $canonical || in_array($role, $aliases, true)) { + return $canonical; + } + } + + return 'no_meaningful_relevance'; + } + + private function cleanReason($reason) + { + $reason = trim(preg_replace('/\s+/u', ' ', (string)$reason)); + return mb_substr($reason, 0, 2000); + } + + private function boolVal($v) + { + if (is_bool($v)) { + return $v; + } + if (is_numeric($v)) { + return intval($v) !== 0; + } + $s = strtolower(trim((string)$v)); + return in_array($s, ['1', 'true', 'yes', 'y'], true); + } + + private function postChat(array $payload) + { + $this->lastPostError = ''; + try { + $ch = curl_init(); + curl_setopt($ch, CURLOPT_URL, $this->url); + curl_setopt($ch, CURLOPT_POST, true); + curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($payload, JSON_UNESCAPED_UNICODE)); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, min(15, $this->timeout)); + curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout); + $headers = ['Content-Type: application/json']; + if ($this->apiKey !== '') { + $headers[] = 'Authorization: Bearer ' . $this->apiKey; + } + curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); + $raw = curl_exec($ch); + if ($raw === false) { + $this->lastPostError = 'LLM curl error: ' . curl_error($ch); + \think\Log::warning('ReferenceRelevanceLlm: ' . $this->lastPostError); + curl_close($ch); + return null; + } + $httpCode = intval(curl_getinfo($ch, CURLINFO_HTTP_CODE)); + curl_close($ch); + if ($httpCode < 200 || $httpCode >= 300) { + $snippet = mb_substr(trim((string)$raw), 0, 200); + $this->lastPostError = 'LLM HTTP ' . $httpCode . ($snippet !== '' ? ': ' . $snippet : ''); + \think\Log::warning('ReferenceRelevanceLlm: ' . $this->lastPostError); + return null; + } + $data = json_decode($raw, true); + if (!is_array($data)) { + $this->lastPostError = 'LLM response is not valid JSON'; + return null; + } + if (isset($data['choices'][0]['message']['content'])) { + return (string)$data['choices'][0]['message']['content']; + } + if (isset($data['content'])) { + return (string)$data['content']; + } + $this->lastPostError = 'LLM response missing content field'; + } catch (\Exception $e) { + $this->lastPostError = 'LLM exception: ' . $e->getMessage(); + \think\Log::warning('ReferenceRelevanceLlm: ' . $this->lastPostError); + } + + return null; + } + + private function parseJson($raw) + { + $raw = trim((string)$raw); + if ($raw === '') { + return null; + } + $raw = preg_replace('/^```[a-zA-Z]*\s*|```$/m', '', $raw); + $raw = trim($raw); + $decoded = json_decode($raw, true); + if (is_array($decoded)) { + return $decoded; + } + if (preg_match('/\{[\s\S]*\}/', $raw, $m)) { + $decoded = json_decode($m[0], true); + if (is_array($decoded)) { + return $decoded; + } + } + + return null; + } +}