Files
tougao/application/common/service/ReferenceRelevanceLlmService.php
2026-06-29 10:23:27 +08:00

616 lines
30 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<?php
namespace app\common\service;
use think\Env;
/**
* 参考文献「主题相关性」LLM 校对(独立于支撑力度校对 LLMService
*/
class ReferenceRelevanceLlmService
{
private $url;
private $model;
private $apiKey;
private $timeout;
private $lastPostError = '';
private $maxSectionChars;
private $maxLocalContextChars;
private $maxReferChars;
private $maxAbstractChars;
public function __construct()
{
$this->url = trim((string)Env::get('promotion.promotion_llm_url', ''));
$this->model = trim((string)Env::get('promotion.promotion_llm_model', ''));
$this->apiKey = trim((string)Env::get('promotion.promotion_llm_api_key', ''));
$this->timeout = max(180, intval(Env::get('promotion.promotion_llm_timeout', 180)));
// 控制发送给 LLM 的上下文长度,降低单次推理耗时(可通过 env 覆盖)
$this->maxSectionChars = max(1500, intval(Env::get('promotion.relevance_llm_max_section_chars', 4500)));
$this->maxLocalContextChars = max(600, intval(Env::get('promotion.relevance_llm_max_local_context_chars', 1800)));
$this->maxReferChars = max(1500, intval(Env::get('promotion.relevance_llm_max_refer_chars', 3500)));
$this->maxAbstractChars = max(1500, intval(Env::get('promotion.relevance_llm_max_abstract_chars', 3500)));
}
/**
* @return array{results:array,request_failed?:bool,reason?:string}
*/
public function checkRelevance($sectionText, $localContext, $referText, $abstractText = '', $citeGroupRefs = '')
{
$fallback = [
'results' => [],
'request_failed' => true,
'reason' => 'LLM not configured or request failed',
];
if ($this->url === '' || $this->model === '') {
return $fallback;
}
$sectionText = trim((string)$sectionText);
$localContext = trim((string)$localContext);
$referText = trim((string)$referText);
$abstractText = trim((string)$abstractText);
if ($sectionText === '' || $referText === '') {
return ['results' => [], 'reason' => 'Empty section or reference text'];
}
if (mb_strlen($sectionText) > $this->maxSectionChars) {
$sectionText = mb_substr($sectionText, 0, $this->maxSectionChars);
}
if (mb_strlen($localContext) > $this->maxLocalContextChars) {
$localContext = mb_substr($localContext, 0, $this->maxLocalContextChars);
}
if (mb_strlen($referText) > $this->maxReferChars) {
$referText = mb_substr($referText, 0, $this->maxReferChars);
}
if (mb_strlen($abstractText) > $this->maxAbstractChars) {
$abstractText = mb_substr($abstractText, 0, $this->maxAbstractChars);
}
$payload = [
'model' => $this->model,
'temperature' => 0,
'messages' => [
['role' => 'system', 'content' => $this->buildSystemPrompt()],
['role' => 'user', 'content' => $this->buildUserPrompt($sectionText, $localContext, $referText, $abstractText, $citeGroupRefs)],
],
];
$content = $this->postChat($payload);
if ($content === null) {
$reason = $this->lastPostError !== '' ? $this->lastPostError : 'LLM request failed';
return array_merge($fallback, ['reason' => $reason]);
}
$parsed = $this->parseJson($content);
if ($parsed === null) {
return array_merge($fallback, ['reason' => 'LLM response JSON parse failed']);
}
$results = $this->normalizeResults($parsed, $citeGroupRefs, $localContext, $referText, $abstractText);
if (empty($results)) {
return array_merge($fallback, ['reason' => 'LLM returned empty or invalid results']);
}
return ['results' => $results];
}
private function buildSystemPrompt()
{
return <<<'PROMPT'
你是一名护理、医学、生物医学与科研期刊的资深学术编辑,正在执行「参考文献主题相关性校对」。
你的任务:判断【引用位置正文表述】与【对应编号参考文献】在主题、研究对象、疾病/场景/结局方向上是否相关,能否作为该处引用的合理来源。
注意:这是「相关性」校对,侧重引用处具体 claim 与文献内容是否匹配;**不是**判断「是否同一疾病/同一领域」。
==================================================
【零、最硬规则(违反则输出无效)】
1. **单条 relevance_score 只评价该编号文献单独**与引用处的关系;不得因联合组整体合理而抬高弱相关文献的单条分。
2. **禁止「同病高分」**:正文与文献都涉及 CRC不等于单条可给 0.85~0.92。
**但若引用处 claim 本身就是机制/通路/异质性/耐药/治疗挑战**,且**研究主语一致**(同一疾病/同一化合物/同一干预对象),文献(含摘要/清洗内容)讨论同病多通路、遗传改变、耐药等,应给 **0.65~0.78**,不得误降到 0.45。
**主语不一致时仍适用本条禁止高分**:引用处主语为化合物 X文献却是其他植物/提取物/计算预测,即使提到 X 或相同通路名,也不得因此给 0.78+。
3. 引用处若为**流行病学/负担类 claim**most common、incidence、mortality、burden、全球高发等
- 机制研究、分子通路、细胞增殖/迁移、血管生成等**原始研究** → 单条通常 **0.45 或更低**`is_relevant=0``minimal_relevance`
- 不得因摘要提到 colorectal cancer 就给 0.92
- 仅当文献为流行病学综述/公共卫生研究,或明确讨论发病率、死亡率、疾病负担时,单条才可 **0.85~0.92**
4. **联合分写在 combined_relevance_score**,与单条分必须可分离(例如 [1,2] 时文献1=0.45、文献2=0.92、联合=0.92)。
5. **「来源/化学分类」型句子**naturally occurring、pentacyclic triterpenoid、found in fruits/vegetables/medicinal plants、并列举具体植物学名
- 先判文献类型:来源综述 / 生物活性综述 最适合;**抗癌治疗综述**对「来源分布」claim 通常仅 **0.65**
- 单篇可差异化打分(如 0.92 / 0.92 / 0.65**不得**因联合而三篇都给高分
- 若原句含**具体列举项**(如多个植物学名),而材料未逐一核实全部学名,联合分通常 **≤0.85**(不得给 0.98
6. **多要素综括句**(一句同时塞入:药学/研究兴趣 + 大量前临床研究 + 多种活性[抗炎/抗氧化/抗癌等] + 多个癌种/对象列举):
- 单篇即使是综述,通常仅 partially_related ~ near-direct**0.78~0.86****不轻易给 0.92**(单篇难逐项覆盖全部要素)
- **联合分是整句覆盖度评估,可低于最高单条分**:若整句要素需多篇拼合、且含作者整合概括,联合通常 **0.72~0.78partially_related**,不给 0.85+
7. **联合分不是「取最高单条分」**:当各单篇都只覆盖整句一部分、需互补拼合时,联合分应反映「整句作为一个整体被支撑的完整度」,**允许低于任何一篇单条分**。
8. **主语/研究对象层级必须对齐**:引用处主语为某化合物/分子如「X has been demonstrated…」文献核心对象须为 **X 本身**或以 X 为核心的实验/综述。
- **植物提取物/混合物**研究、**其他物种/其他植物**的计算预测、成分表中顺带出现 X → 通常 **0.45 或更低**
- **关键:提取物即使 X 含量很高(如 50%+)且显示了抗癌/凋亡活性,活性归因于提取物整体而非 X 单体单独验证 → 仍属 weakly_related≤0.45)、`minimal_relevance`**,不得评为 supplementary_relevance/0.78
- 只有当文献**针对 X 单体单独做了验证**X monomer 处理、X 单独剂量效应等)时,主语才算对齐,方可进入 0.65+
- **不得**因摘要/讨论出现与引用句相同的通路名、凋亡、抗癌等词就给 0.78+
9. **证据层级与 demonstrated / mechanistically**
- 本文实验结果或针对 X 的系统综述 > 计算预测/混合成分推测
- **讨论Discussion转引他人关于 X 的机制总结 ≠ 该文自身证据**;据此最多 **0.45~0.65**,不得评为 highly_related
- in silico / computational prediction 不足以支撑「has been demonstrated to mechanistically…」式强语气 claim 的高分
10. **点名通路/功能结局须逐项核对**:原句逐条列举通路(如 PI3K/AKT、MAPK、NF-κB或结局增殖、凋亡、血管生成、炎症信号等**每一项单独核对是否在本文证据中成立**(非仅背景提及)。
- 讨论转述既往文献 ≠ 本文证明该项
- 缺原句任一点名项(如 angiogenesis→ 单条通常 **不得 0.78+**
- **「覆盖部分结局」不足以进入 0.78**:原句点名了多条通路 + 多个结局,文献仅命中其中 1~2 个结局(如仅凋亡/增殖),且**点名通路在本文结果中全部缺失(仅讨论转引)**或主语层级不对 → 单条 **限 0.45weakly_related / minimal_relevance**,不得给 0.65~0.78
- 仅同领域沾边 12 项、主语或机制层级不对 → **0.45**
- **进入 0.65~0.78 的前提**主语对齐X 单体)+ 本文自身结果命中原句点名通路/结局的多数项;几乎全部明确对应 → **0.85+**
11. **文献「主题粒度」必须匹配 claim「主题粒度」**:引用处为**疾病总论型 claim**(流行病学负担、标准/多模态治疗现状与局限、基因组异质性、单靶点治疗受限、亟需新策略等总体背景)时:
- 最适合的来源是**疾病总体综述 / 分子病理综述 / 精准肿瘤学 / 耐药综述**;此类文献正面、系统地为该总论 claim 提供依据 → 可 **0.85+**
- **单一药物 / 单一成分 / 单一通路的专题综述**如「某化合物抗某癌A review」即使同病、同大方向也只是专题视角、并非为该总论 claim 做系统总结 → 通常 **partially_related0.72~0.78****不得给 0.85+**
- **单基因 / 单通路的机制原始研究**对纯流行病学负担 claim → 仍按规则 3 给 **0.45**
- 判断要点:文献类型是否「为该总论 claim 本身做系统综述/总论」;仅同病同方向、或只支撑整段中某一两句(如「需要更安全的新策略」),不足以进入 highly_related
==================================================
【一、必须先拆解 claim】
从【本引用位置附近上下文】中提炼最小主张单元Claim A, Claim B…**不要**把整句笼统归为「大概讲抗癌」。例如:
- **主语/研究对象**(化合物单体 vs 植物提取物 vs 其他物种是否「X has been demonstrated」
- **证据语气与层级**demonstrated / mechanistically vs predict / suggest本文结果 vs 讨论转引)
- **claim 主题粒度**:是否为疾病总论型(流行病学负担 / 治疗现状与局限 / 基因组异质性 / 单靶点受限 / 亟需新策略);若是,要求「总体综述 / 分子病理 / 精准肿瘤学 / 耐药综述」类来源,单一药物专题综述只算 partially_related
- 疾病流行病学(高发、死亡率)
- **点名通路/分子机制**PI3K/AKT、MAPK、NF-κB 等,须逐项)
- **点名功能结局**(抑制增殖、凋亡、血管生成、炎症信号等,须逐项)
- 治疗/干预现状
- **化合物化学类别**(如 pentacyclic triterpenoid
- **天然来源分布**fruits / vegetables / medicinal plants
- **具体列举项**(植物学名、药名、基因名等,须逐项核对)
==================================================
【二、逐篇文献单独判断(每条 result 对应一个 reference_no
对 cite_group_refs 中的每一篇文献,单独输出:
- 该文献与引用处哪些 claim 主题相关、哪些不相关(含具体列举项是否覆盖)
- 文献类型是否匹配引用用途(来源综述 / 生物活性综述 / 机制研究 / 流行病学综述 / 抗癌治疗综述等)
- relevance_score只能使用 0.98 / 0.92 / 0.85 / 0.78 / 0.65 / 0.45 / 0.25 / 0.15
- relevance_levelhighly_related | partially_related | weakly_related | unrelated
- is_relevantscore>=0.65 为 1否则 0
- relevance_role
- primary_relevance该文献是引用处主题的主要相关来源
- supplementary_relevance部分相关、补充性
- minimal_relevance仅边缘/背景沾边
- no_meaningful_relevance与引用处核心表述基本无关
- reason中文须写明①文献类型与**核心研究对象** ②**本文自身证据**覆盖了哪些 claim / 哪些未覆盖(含点名通路、功能结局、具体列举项;讨论转引须标明) ③**若仅为同主题不同 claim 或主语层级不对须明确写出** ④为何此分值而非更高或更低
主语/层级不对 → 单条 **0.45**,不得因讨论提及相同通路给 0.78
引用处 claim 为「化合物 X 经 PI3K/AKT 等机制 demonstrated…」文献为其他植物提取物或计算预测、仅在讨论转引他人 X 机制 → 0.45weakly_relatedis_relevant=0。
机制文引用流行病学句 → 单条 **0.45**,不得 0.92
文献为 CRC 机制研究,引用处 claim 为全球高发/死亡率,文献无流行病学数据 → 0.45minimal_relevanceis_relevant=0。
==================================================
【三、联合引用 combined_*(同一 cite_group_refs 内各行必须一致)】
当 cite_group_refs 为 "1,2" 等多篇时,除逐篇判断外,必须给出引用组整体结论:
- 这些文献合起来,是否足以支撑/匹配该引用位置的整体表述?
- combined_relevance_score八档固定分值之一**不是单条平均分**
- 若一篇已强相关、其余仅弱补充,联合分可接近主相关文献,但**不必等于最高单条分**
- 若原句含具体列举项(学名等)且材料未逐一核实,联合分通常 **0.85**,不给 0.98
- 若核心 claim 无任何文献明确覆盖,联合分不能虚高
- 多篇联合仍缺主语对齐、缺原句点名通路/结局、或主要靠讨论转引 → 联合分通常 **≤0.45~0.65**,不得因单篇讨论出现相同关键词给到 0.78+
- combined_is_relevantcombined_relevance_score>=0.65 为 1
- combined_relevance_level与 combined 分数对应的等级
- combined_reason中文综合结论说明各文献分工如「文献2综述覆盖流行病学主 claim文献1仅机制补充」及最终分值理由
单条引用时combined_* 与单条一致combined_reason 可写「」。
==================================================
【四、评分与等级对照】
0.98 / 0.92 / 0.85 = highly_related
文献直接支持整句主旨,大部分关键要素都在文中明确出现
0.78 / 0.65 = partially_related
文献只支撑其中一部分,或支撑方式偏间接
0.45 = weakly_related
只是同领域文献,但与句子事实对应很弱
0.25 / 0.15 = unrelated
基本不支撑该句
≤0.15 = not_support
不支撑
==================================================
【五、输出 JSON仅 JSON无 markdown
{
"results": [
{
"reference_no": 1,
"cite_group_refs": "1,2",
"is_relevant": 0,
"relevance_score": 0.45,
"relevance_level": "weakly_related",
"relevance_role": "minimal_relevance",
"reason": "中文单条结论",
"combined_is_relevant": 1,
"combined_relevance_score": 0.92,
"combined_relevance_level": "highly_related",
"combined_reason": "中文联合结论"
},
{
"reference_no": 2,
"cite_group_refs": "1,2",
...
}
]
}
PROMPT;
}
private function buildUserPrompt($sectionText, $localContext, $referText, $abstractText, $citeGroupRefs)
{
$parts = ["【正文节 t_article_main】\n" . $sectionText];
if (trim((string)$citeGroupRefs) !== '') {
$mode = strpos($citeGroupRefs, ',') !== false ? '联合引用' : '单独引用';
$parts[] = "【引用文献组 cite_group_refs】{$citeGroupRefs}{$mode}";
}
if ($localContext !== '') {
$parts[] = "【本引用位置附近上下文(优先据此拆解 claim\n" . $localContext;
}
$parts[] = "【参考文献书目(按编号)】\n" . $referText;
if ($abstractText !== '') {
$parts[] = "【文献摘要/清洗后内容Europe PMC·PubMed·Crossref·PDF\n" . $abstractText;
}
$parts[] = '请先拆解最小主张单元(主语层级、证据来源、点名通路/结局逐项核对),判断每篇文献类型与**本文自身证据**,再**逐篇独立**给出单条 relevance_score讨论转引、提取物/计算预测不得抬高;弱相关文献不得因联合而高分),最后给出 combined_*。仅输出 results 数组 JSON。';
return implode("\n\n", $parts);
}
private function normalizeResults(array $parsed, $defaultCiteGroupRefs, $localContext = '', $referText = '', $abstractText = '')
{
$rows = [];
if (isset($parsed['results']) && is_array($parsed['results'])) {
$rows = $parsed['results'];
} elseif (isset($parsed['reference_no']) || isset($parsed['relevance_score'])) {
$rows = [$parsed];
}
$bands = $this->getScoreBands();
$localContext = trim((string)$localContext);
$referText = trim((string)$referText);
$abstractText = trim((string)$abstractText);
$out = [];
foreach ($rows as $item) {
if (!is_array($item)) {
continue;
}
$refNo = intval(isset($item['reference_no']) ? $item['reference_no'] : 0);
if ($refNo <= 0) {
continue;
}
$score = $this->snapScore(floatval(isset($item['relevance_score']) ? $item['relevance_score'] : 0), $bands);
$isRelevant = $score >= 0.65 - 0.001;
if (array_key_exists('is_relevant', $item)) {
$isRelevant = $this->boolVal($item['is_relevant']);
}
$level = $this->levelFromScore($score, isset($item['relevance_level']) ? $item['relevance_level'] : '');
$role = $this->normalizeRelevanceRole(isset($item['relevance_role']) ? $item['relevance_role'] : '');
$reason = $this->cleanReason(isset($item['reason']) ? $item['reason'] : '');
list($score, $level, $isRelevant, $role) = $this->enforceSingleReferenceConsistency(
$score,
$level,
$isRelevant,
$role,
$bands
);
$combinedScore = $this->snapScore(
floatval(isset($item['combined_relevance_score']) ? $item['combined_relevance_score'] : $score),
$bands
);
$combinedRelevant = $combinedScore >= 0.65 - 0.001;
if (array_key_exists('combined_is_relevant', $item)) {
$combinedRelevant = $this->boolVal($item['combined_is_relevant']);
}
$combinedLevel = $this->levelFromScore(
$combinedScore,
isset($item['combined_relevance_level']) ? $item['combined_relevance_level'] : ''
);
list($combinedScore, $combinedLevel, $combinedRelevant) = $this->enforceCombinedConsistency(
$combinedScore,
$combinedLevel,
$combinedRelevant,
$bands
);
$citeGroupRefs = trim((string)(isset($item['cite_group_refs']) ? $item['cite_group_refs'] : $defaultCiteGroupRefs));
if ($citeGroupRefs === '' && $defaultCiteGroupRefs !== '') {
$citeGroupRefs = trim((string)$defaultCiteGroupRefs);
}
$out[] = [
'reference_no' => $refNo,
'cite_group_refs' => $citeGroupRefs,
'is_relevant' => $isRelevant ? 1 : 0,
'relevance_score' => $score,
'relevance_level' => $level,
'relevance_role' => $role,
'reason' => $reason,
'combined_is_relevant' => $combinedRelevant ? 1 : 0,
'combined_relevance_score' => $combinedScore,
'combined_relevance_level' => $combinedLevel,
'combined_reason' => $this->cleanReason(isset($item['combined_reason']) ? $item['combined_reason'] : ''),
];
}
$out = $this->syncCombinedFieldsAcrossGroup($out);
return $out;
}
private function enforceSingleReferenceConsistency($score, $level, $isRelevant, $role, array $bands)
{
$score = floatval($score);
if ($role === 'no_meaningful_relevance') {
if ($score > 0.25) {
$score = 0.25;
}
$level = 'unrelated';
$isRelevant = false;
} elseif ($role === 'minimal_relevance') {
if ($score > 0.45) {
$score = 0.45;
}
$level = 'weakly_related';
$isRelevant = false;
} elseif ($role === 'supplementary_relevance') {
if ($score > 0.78) {
$score = 0.78;
}
$level = $this->levelFromScore($score, $level);
} elseif ($role === 'primary_relevance') {
if ($score < 0.85) {
$score = 0.85;
}
$isRelevant = true;
$level = $this->levelFromScore($score, $level);
}
if ($level === 'weakly_related' && $score > 0.45) {
$score = 0.45;
$isRelevant = false;
} elseif ($level === 'unrelated' && $score > 0.25) {
$score = 0.25;
$isRelevant = false;
} elseif ($level === 'highly_related' && $score < 0.85) {
$score = 0.85;
$isRelevant = true;
} elseif ($level === 'partially_related') {
if ($score > 0.78) {
$score = 0.78;
}
if ($score < 0.65) {
$score = 0.65;
}
$isRelevant = true;
}
if (!$isRelevant && $score >= 0.65) {
$score = 0.45;
$level = 'weakly_related';
}
if ($isRelevant && $score < 0.65) {
$score = 0.65;
$level = 'partially_related';
}
$score = $this->snapScore($score, $bands);
$level = $this->levelFromScore($score, $level);
return [$score, $level, $isRelevant, $role];
}
private function enforceCombinedConsistency($combinedScore, $combinedLevel, $combinedRelevant, array $bands)
{
$combinedScore = $this->snapScore(floatval($combinedScore), $bands);
$combinedLevel = $this->levelFromScore($combinedScore, $combinedLevel);
$combinedRelevant = $combinedScore >= 0.65 - 0.001;
return [$combinedScore, $combinedLevel, $combinedRelevant];
}
private function syncCombinedFieldsAcrossGroup(array $out)
{
$groups = [];
foreach ($out as $idx => $row) {
$key = (string)$row['cite_group_refs'];
if ($key === '') {
$key = 'ref:' . $row['reference_no'];
}
$groups[$key][] = $idx;
}
foreach ($groups as $indices) {
if (count($indices) <= 1) {
continue;
}
$bestIdx = $indices[0];
$bestScore = floatval($out[$bestIdx]['combined_relevance_score']);
foreach ($indices as $idx) {
$s = floatval($out[$idx]['combined_relevance_score']);
if ($s >= $bestScore) {
$bestScore = $s;
$bestIdx = $idx;
}
}
$src = $out[$bestIdx];
foreach ($indices as $idx) {
$out[$idx]['combined_is_relevant'] = intval($src['combined_is_relevant']);
$out[$idx]['combined_relevance_score'] = floatval($src['combined_relevance_score']);
$out[$idx]['combined_relevance_level'] = (string)$src['combined_relevance_level'];
$out[$idx]['combined_reason'] = (string)$src['combined_reason'];
}
}
return $out;
}
private function getScoreBands()
{
return [0.15, 0.25, 0.45, 0.65, 0.78, 0.85, 0.92, 0.98];
}
private function snapScore($score, array $bands)
{
foreach ($bands as $band) {
if (abs($score - $band) < 0.001) {
return $band;
}
}
$nearest = $bands[0];
$minDiff = abs($score - $nearest);
foreach ($bands as $band) {
$diff = abs($score - $band);
if ($diff < $minDiff) {
$minDiff = $diff;
$nearest = $band;
}
}
return $nearest;
}
private function levelFromScore($score, $levelHint = '')
{
$levelHint = strtolower(trim((string)$levelHint));
$allowed = ['highly_related', 'partially_related', 'weakly_related', 'unrelated'];
if (in_array($levelHint, $allowed, true)) {
return $levelHint;
}
$aliases = [
'highly_related' => ['highly_related', 'high_related', 'strong_related', 'strong_relevance'],
'partially_related' => ['partially_related', 'partial_related', 'moderate_related'],
'weakly_related' => ['weakly_related', 'weak_related', 'low_related', 'insufficient'],
'unrelated' => ['unrelated', 'not_related', 'irrelevant', 'no_meaningful_relevance'],
];
foreach ($aliases as $canonical => $list) {
if (in_array($levelHint, $list, true)) {
return $canonical;
}
}
$score = floatval($score);
if ($score >= 0.85) {
return 'highly_related';
}
if ($score >= 0.65) {
return 'partially_related';
}
if ($score >= 0.45) {
return 'weakly_related';
}
return 'unrelated';
}
private function normalizeRelevanceRole($role)
{
$role = strtolower(trim((string)$role));
$map = [
'primary_relevance' => ['primary_relevance', 'primary_support', 'primary'],
'supplementary_relevance' => ['supplementary_relevance', 'supplementary_support', 'supplementary'],
'minimal_relevance' => ['minimal_relevance', 'minimal_support', 'minimal'],
'no_meaningful_relevance' => ['no_meaningful_relevance', 'no_meaningful_support', 'none'],
];
foreach ($map as $canonical => $aliases) {
if ($role === $canonical || in_array($role, $aliases, true)) {
return $canonical;
}
}
return 'no_meaningful_relevance';
}
private function cleanReason($reason)
{
$reason = trim(preg_replace('/\s+/u', ' ', (string)$reason));
return mb_substr($reason, 0, 2000);
}
private function boolVal($v)
{
if (is_bool($v)) {
return $v;
}
if (is_numeric($v)) {
return intval($v) !== 0;
}
$s = strtolower(trim((string)$v));
return in_array($s, ['1', 'true', 'yes', 'y'], true);
}
private function postChat(array $payload)
{
$this->lastPostError = '';
try {
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $this->url);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($payload, JSON_UNESCAPED_UNICODE));
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, min(15, $this->timeout));
curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout);
$headers = ['Content-Type: application/json'];
if ($this->apiKey !== '') {
$headers[] = 'Authorization: Bearer ' . $this->apiKey;
}
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
$raw = curl_exec($ch);
if ($raw === false) {
$this->lastPostError = 'LLM curl error: ' . curl_error($ch);
\think\Log::warning('ReferenceRelevanceLlm: ' . $this->lastPostError);
curl_close($ch);
return null;
}
$httpCode = intval(curl_getinfo($ch, CURLINFO_HTTP_CODE));
curl_close($ch);
if ($httpCode < 200 || $httpCode >= 300) {
$snippet = mb_substr(trim((string)$raw), 0, 200);
$this->lastPostError = 'LLM HTTP ' . $httpCode . ($snippet !== '' ? ': ' . $snippet : '');
\think\Log::warning('ReferenceRelevanceLlm: ' . $this->lastPostError);
return null;
}
$data = json_decode($raw, true);
if (!is_array($data)) {
$this->lastPostError = 'LLM response is not valid JSON';
return null;
}
if (isset($data['choices'][0]['message']['content'])) {
return (string)$data['choices'][0]['message']['content'];
}
if (isset($data['content'])) {
return (string)$data['content'];
}
$this->lastPostError = 'LLM response missing content field';
} catch (\Exception $e) {
$this->lastPostError = 'LLM exception: ' . $e->getMessage();
\think\Log::warning('ReferenceRelevanceLlm: ' . $this->lastPostError);
}
return null;
}
private function parseJson($raw)
{
$raw = trim((string)$raw);
if ($raw === '') {
return null;
}
$raw = preg_replace('/^```[a-zA-Z]*\s*|```$/m', '', $raw);
$raw = trim($raw);
$decoded = json_decode($raw, true);
if (is_array($decoded)) {
return $decoded;
}
if (preg_match('/\{[\s\S]*\}/', $raw, $m)) {
$decoded = json_decode($m[0], true);
if (is_array($decoded)) {
return $decoded;
}
}
return null;
}
}