新逻辑参考文献相关性整合之前的逻辑

This commit is contained in:
wyn
2026-06-30 09:30:33 +08:00
parent 9c8f7cc3b6
commit da71dfc04e
8 changed files with 162 additions and 190 deletions

View File

@@ -171,7 +171,10 @@ class ReferenceRelevanceLlmService
- supplementary_relevance部分相关、补充性
- minimal_relevance仅边缘/背景沾边
- no_meaningful_relevance与引用处核心表述基本无关
- reason文,须写明:①文献类型与**核心研究对象** ②**本文自身证据**覆盖了哪些 claim / 哪些未覆盖(含点名通路、功能结局、具体列举项;讨论转引须标明) ③**若仅为同主题不同 claim 或主语层级不对须明确写出** ④为何此分值而非更高或更低
- reason英双语结论,格式固定为两行:
【中文】(中文结论,须写明:①文献类型与**核心研究对象** ②**本文自身证据**覆盖了哪些 claim / 哪些未覆盖 ③主语/claim 不匹配须明确写出 ④为何此分值)
【English】与中文对应的英文结论语义一致
- reason_en仅英文结论与 reason 中【English】段相同勿留空
主语/层级不对 → 单条 **0.45**,不得因讨论提及相同通路给 0.78
引用处 claim 为「化合物 X 经 PI3K/AKT 等机制 demonstrated…」文献为其他植物提取物或计算预测、仅在讨论转引他人 X 机制 → 0.45weakly_relatedis_relevant=0。
@@ -190,9 +193,10 @@ class ReferenceRelevanceLlmService
- 多篇联合仍缺主语对齐、缺原句点名通路/结局、或主要靠讨论转引 → 联合分通常 **≤0.45~0.65**,不得因单篇讨论出现相同关键词给到 0.78+
- combined_is_relevantcombined_relevance_score>=0.65 为 1
- combined_relevance_level与 combined 分数对应的等级
- combined_reason综合结论,说明各文献分工如「文献2综述覆盖流行病学主 claim文献1仅机制补充」及最终分值理由
- combined_reason英双语综合结论,格式同 reason【中文】/【English】说明各文献分工及最终分值理由
- combined_reason_en仅英文综合结论与 combined_reason 中【English】段相同
单条引用时combined_* 与单条一致combined_reason 可写「」
单条引用时combined_* 与单条一致combined_reason / combined_reason_en 可与 reason / reason_en 相同
==================================================
【四、评分与等级对照】
@@ -218,11 +222,13 @@ class ReferenceRelevanceLlmService
"relevance_score": 0.45,
"relevance_level": "weakly_related",
"relevance_role": "minimal_relevance",
"reason": "中文单条结论",
"reason": "【中文】中文单条结论\n【English】English single-reference conclusion",
"reason_en": "English single-reference conclusion",
"combined_is_relevant": 1,
"combined_relevance_score": 0.92,
"combined_relevance_level": "highly_related",
"combined_reason": "中文联合结论"
"combined_reason": "【中文】中文联合结论\n【English】English combined conclusion",
"combined_reason_en": "English combined conclusion"
},
{
"reference_no": 2,
@@ -248,7 +254,7 @@ PROMPT;
if ($abstractText !== '') {
$parts[] = "【文献摘要/清洗后内容Europe PMC·PubMed·Crossref·PDF\n" . $abstractText;
}
$parts[] = '请先拆解最小主张单元(主语层级、证据来源、点名通路/结局逐项核对),判断每篇文献类型与**本文自身证据**,再**逐篇独立**给出单条 relevance_score讨论转引、提取物/计算预测不得抬高;弱相关文献不得因联合而高分),最后给出 combined_*。仅输出 results 数组 JSON。';
$parts[] = '请先拆解最小主张单元(主语层级、证据来源、点名通路/结局逐项核对),判断每篇文献类型与**本文自身证据**,再**逐篇独立**给出单条 relevance_score讨论转引、提取物/计算预测不得抬高;弱相关文献不得因联合而高分),最后给出 combined_*。reason / combined_reason 必须中英双语(【中文】/【English】并分别填写 reason_en / combined_reason_en。仅输出 results 数组 JSON。';
return implode("\n\n", $parts);
}
@@ -285,7 +291,10 @@ PROMPT;
$level = $this->levelFromScore($score, isset($item['relevance_level']) ? $item['relevance_level'] : '');
$role = $this->normalizeRelevanceRole(isset($item['relevance_role']) ? $item['relevance_role'] : '');
$reason = $this->cleanReason(isset($item['reason']) ? $item['reason'] : '');
list($reason, $reasonEn) = $this->normalizeBilingualReason(
isset($item['reason']) ? $item['reason'] : '',
isset($item['reason_en']) ? $item['reason_en'] : ''
);
list($score, $level, $isRelevant, $role) = $this->enforceSingleReferenceConsistency(
$score,
@@ -320,6 +329,14 @@ PROMPT;
$citeGroupRefs = trim((string)$defaultCiteGroupRefs);
}
list($combinedReason, $combinedReasonEn) = $this->normalizeBilingualReason(
isset($item['combined_reason']) ? $item['combined_reason'] : '',
isset($item['combined_reason_en']) ? $item['combined_reason_en'] : ''
);
if ($combinedReason === '' && $combinedReasonEn === '') {
list($combinedReason, $combinedReasonEn) = [$reason, $reasonEn];
}
$out[] = [
'reference_no' => $refNo,
'cite_group_refs' => $citeGroupRefs,
@@ -328,10 +345,12 @@ PROMPT;
'relevance_level' => $level,
'relevance_role' => $role,
'reason' => $reason,
'reason_en' => $reasonEn,
'combined_is_relevant' => $combinedRelevant ? 1 : 0,
'combined_relevance_score' => $combinedScore,
'combined_relevance_level' => $combinedLevel,
'combined_reason' => $this->cleanReason(isset($item['combined_reason']) ? $item['combined_reason'] : ''),
'combined_reason' => $combinedReason,
'combined_reason_en' => $combinedReasonEn,
];
}
@@ -441,6 +460,7 @@ PROMPT;
$out[$idx]['combined_relevance_score'] = floatval($src['combined_relevance_score']);
$out[$idx]['combined_relevance_level'] = (string)$src['combined_relevance_level'];
$out[$idx]['combined_reason'] = (string)$src['combined_reason'];
$out[$idx]['combined_reason_en'] = (string)$src['combined_reason_en'];
}
}
@@ -524,10 +544,45 @@ PROMPT;
private function cleanReason($reason)
{
$reason = trim(preg_replace('/\s+/u', ' ', (string)$reason));
$reason = trim(preg_replace('/[ \t]+/u', ' ', (string)$reason));
$reason = trim(preg_replace("/\n{3,}/u", "\n\n", $reason));
return mb_substr($reason, 0, 2000);
}
/**
* @return array{0:string,1:string} [bilingual reason, english only]
*/
private function normalizeBilingualReason($reason, $reasonEn)
{
$reason = trim((string)$reason);
$reasonEn = $this->cleanReason($reasonEn);
if ($reasonEn === '' && preg_match('/【English】\s*(.+)$/us', $reason, $m)) {
$reasonEn = $this->cleanReason($m[1]);
}
$zh = '';
if (preg_match('/【中文】\s*(.*?)(?:\n【English】|$)/us', $reason, $m)) {
$zh = trim($m[1]);
} elseif ($reason !== '' && strpos($reason, '【English】') === false) {
$zh = trim($reason);
}
if ($zh !== '' && $reasonEn !== '' && strpos($reason, '【English】') === false) {
$reason = "【中文】{$zh}\n【English】{$reasonEn}";
} elseif ($zh !== '' && $reasonEn !== '' && strpos($reason, '【中文】') === false) {
$reason = "【中文】{$zh}\n【English】{$reasonEn}";
} else {
$reason = $this->cleanReason($reason);
}
if ($reasonEn === '' && $zh !== '') {
$reasonEn = '';
}
return [$reason, $reasonEn];
}
private function boolVal($v)
{
if (is_bool($v)) {