新逻辑参考文献相关性整合之前的逻辑

This commit is contained in:
wyn
2026-06-30 09:30:33 +08:00
parent 9c8f7cc3b6
commit da71dfc04e
8 changed files with 162 additions and 190 deletions

View File

@@ -5,7 +5,6 @@ namespace app\common;
use think\Db;
use think\Env;
use app\common\service\LLMService;
use app\common\mq\ReferenceCheckMqPublisher;
/**
* 正文 <blue>[n]</blue> 引用与 t_production_article_referindex+1=n相关性校对。
@@ -4350,41 +4349,7 @@ class ReferenceCheckService
*/
public function startArticleCheckQueue(array $checkIds, $pArticleId = 0, $trigger = 'enqueue')
{
$checkIds = array_values(array_filter(array_map('intval', $checkIds)));
if (empty($checkIds)) {
return [];
}
$pArticleId = intval($pArticleId);
if ($pArticleId <= 0) {
$firstRow = Db::name('article_reference_check_result')->where('id', $checkIds[0])->find();
$pArticleId = empty($firstRow) ? 0 : intval($this->arrGet($firstRow, 'p_article_id', 0));
}
if ($pArticleId <= 0) {
throw new \RuntimeException('p_article_id is required for reference check queue');
}
$now = date('Y-m-d H:i:s');
$batchId = Db::name('article_reference_check_batch')->insertGetId([
'p_article_id' => $pArticleId,
'batch_status' => 0,
'total_count' => count($checkIds),
'done_count' => 0,
'failed_count' => 0,
'trigger' => (string)$trigger,
'created_at' => $now,
'updated_at' => $now,
]);
$shouldPublish = !$this->hasEarlierWaitingBatch($batchId) && !$this->hasRunningReferenceCheckBatch();
if ($shouldPublish) {
(new ReferenceCheckMqPublisher())->publishArticleStart($pArticleId, intval($batchId), $trigger);
$this->log('startArticleCheckQueue publish p_article_id=' . $pArticleId . ' batch_id=' . $batchId);
} else {
$this->log('startArticleCheckQueue queued batch_id=' . $batchId . ' p_article_id=' . $pArticleId);
}
return $checkIds;
throw new \RuntimeException('Support strength check queue is deprecated. Use ReferenceRelevanceCheckService.');
}
private function hasRunningReferenceCheckBatch()

View File

@@ -21,10 +21,4 @@ class RabbitMqConfig
$rc = self::get('reference_check', []);
return is_array($rc) ? $rc : [];
}
public static function referenceRelevance()
{
$rc = self::get('reference_relevance', []);
return is_array($rc) ? $rc : [];
}
}

View File

@@ -4,10 +4,11 @@ namespace app\common\mq;
use think\Db;
use app\common\DbReconnectHelper;
use app\common\ReferenceCheckService;
use app\common\ReferenceRelevanceCheckService;
/**
* RabbitMQ 消费:按文章串行,文章内 reference_no 升序逐条校对(含低分同步二轮)
* RabbitMQ 消费(队列 reference_check / ref_check.article
* 全局文章串行,文章内 reference_no 升序链式逐条「主题相关性」校对。
*/
class ReferenceCheckArticleWorker
{
@@ -16,12 +17,12 @@ class ReferenceCheckArticleWorker
const BATCH_DONE = 2;
const BATCH_PARTIAL_FAILED = 3;
/** @var ReferenceCheckService */
/** @var ReferenceRelevanceCheckService */
private $svc;
public function __construct()
{
$this->svc = new ReferenceCheckService();
$this->svc = new ReferenceRelevanceCheckService();
}
public function handleMessage(array $payload)
@@ -29,6 +30,7 @@ class ReferenceCheckArticleWorker
DbReconnectHelper::ensure();
$pArticleId = intval(isset($payload['p_article_id']) ? $payload['p_article_id'] : 0);
$batchId = intval(isset($payload['batch_id']) ? $payload['batch_id'] : 0);
$trigger = isset($payload['trigger']) ? (string)$payload['trigger'] : 'enqueue';
if ($pArticleId <= 0 || $batchId <= 0) {
$this->svc->log('ReferenceCheckArticleWorker invalid payload');
return;
@@ -36,7 +38,11 @@ class ReferenceCheckArticleWorker
if (!$this->canStartArticleWork($batchId)) {
$this->svc->log('ReferenceCheckArticleWorker defer batch_id=' . $batchId . ' other article running');
(new ReferenceCheckMqPublisher())->publishArticleStart($pArticleId, $batchId, isset($payload['trigger']) ? $payload['trigger'] : 'enqueue');
(new ReferenceCheckMqPublisher())->publishArticleStart(
$pArticleId,
$batchId,
isset($payload['trigger']) ? $payload['trigger'] : 'enqueue'
);
sleep(3);
return;
}
@@ -48,6 +54,11 @@ class ReferenceCheckArticleWorker
}
}
$this->svc->recoverQueueRowsForArticle($pArticleId);
if ($trigger !== 'recheck_pending_only'
&& ReferenceRelevanceCheckService::PREPARE_LITERATURE_BEFORE_CHECK) {
$this->svc->prepareLiteratureContentByArticle($pArticleId);
}
$this->svc->log('ReferenceCheckArticleWorker start p_article_id=' . $pArticleId . ' batch_id=' . $batchId);
$done = 0;
@@ -61,7 +72,7 @@ class ReferenceCheckArticleWorker
if ($checkId <= 0) {
continue;
}
$result = $this->processOneRow($checkId, $row);
$result = $this->processOneRow($checkId, $row, $trigger === 'recheck_pending_only');
if ($result === 'ok') {
$done++;
} elseif ($result === 'failed') {
@@ -77,7 +88,7 @@ class ReferenceCheckArticleWorker
private function canStartArticleWork($batchId)
{
$running = Db::name('article_reference_check_batch')
$running = Db::name('article_reference_relevance_check_batch')
->where('batch_status', self::BATCH_RUNNING)
->where('id', '<>', intval($batchId))
->count();
@@ -87,7 +98,7 @@ class ReferenceCheckArticleWorker
private function claimBatch($batchId)
{
$now = date('Y-m-d H:i:s');
$affected = Db::name('article_reference_check_batch')
$affected = Db::name('article_reference_relevance_check_batch')
->where('id', intval($batchId))
->whereIn('batch_status', [self::BATCH_WAITING, self::BATCH_RUNNING])
->update([
@@ -99,15 +110,15 @@ class ReferenceCheckArticleWorker
private function getBatch($batchId)
{
return Db::name('article_reference_check_batch')->where('id', intval($batchId))->find();
return Db::name('article_reference_relevance_check_batch')->where('id', intval($batchId))->find();
}
private function fetchNextPendingRow($pArticleId)
{
return Db::name('article_reference_check_result')
return Db::name('article_reference_relevance_check_result')
->where('p_article_id', intval($pArticleId))
->where('queue_status', ReferenceCheckService::QUEUE_PENDING)
->where('status', ReferenceCheckService::RECORD_PENDING)
->where('queue_status', ReferenceRelevanceCheckService::QUEUE_PENDING)
->where('status', ReferenceRelevanceCheckService::RECORD_PENDING)
->order('reference_no asc,am_id asc,text_start asc,id asc')
->find();
}
@@ -115,46 +126,44 @@ class ReferenceCheckArticleWorker
/**
* @return string ok|failed|skip
*/
private function processOneRow($checkId, array $row)
private function processOneRow($checkId, array $row, $skipLiteratureFetch = false)
{
DbReconnectHelper::ensure();
$claimed = Db::name('article_reference_check_result')
$claimed = Db::name('article_reference_relevance_check_result')
->where('id', intval($checkId))
->where('queue_status', ReferenceCheckService::QUEUE_PENDING)
->update(['queue_status' => ReferenceCheckService::QUEUE_RUNNING]);
->where('queue_status', ReferenceRelevanceCheckService::QUEUE_PENDING)
->update(['queue_status' => ReferenceRelevanceCheckService::QUEUE_RUNNING]);
if (intval($claimed) <= 0) {
return 'skip';
}
$retryCount = intval(isset($row['retry_count']) ? $row['retry_count'] : 0);
try {
$this->svc->runReferenceCheckOnce($checkId);
$amId = intval(isset($row['am_id']) ? $row['am_id'] : 0);
if ($amId > 0) {
$this->svc->syncAmRefCheckStatus($amId);
}
$this->svc->markQueueRuntime($checkId, ReferenceCheckService::QUEUE_COMPLETED, $retryCount);
$this->svc->runCheckOnce($checkId, $skipLiteratureFetch);
$this->svc->markQueueRuntime($checkId, ReferenceRelevanceCheckService::QUEUE_COMPLETED, $retryCount);
return 'ok';
} catch (\Exception $e) {
$this->svc->log('ReferenceCheckArticleWorker check_id=' . $checkId . ' err=' . $e->getMessage());
DbReconnectHelper::ensure();
if ($retryCount < ReferenceCheckService::QUEUE_MAX_RETRY) {
$this->svc->markQueueRuntime($checkId, ReferenceCheckService::QUEUE_PENDING, $retryCount + 1);
return $this->processOneRow($checkId, array_merge($row, ['retry_count' => $retryCount + 1]));
if ($retryCount < ReferenceRelevanceCheckService::QUEUE_MAX_RETRY) {
$this->svc->markQueueRuntime($checkId, ReferenceRelevanceCheckService::QUEUE_PENDING, $retryCount + 1);
return $this->processOneRow($checkId, array_merge($row, ['retry_count' => $retryCount + 1]), $skipLiteratureFetch);
}
try {
$this->svc->updateCheckResult($checkId, [
'status' => ReferenceCheckService::RECORD_FAILED,
'error_msg' => $e->getMessage(),
]);
$this->svc->markQueueRuntime($checkId, ReferenceCheckService::QUEUE_FAILED, $retryCount);
$fresh = Db::name('article_reference_relevance_check_result')->where('id', intval($checkId))->find();
$groupRows = !empty($fresh) ? $this->svc->findCitationGroupRowsForWorker($fresh) : [];
if (!empty($groupRows)) {
$this->svc->failGroupWithQueue($groupRows, $e->getMessage(), $retryCount);
} else {
$this->svc->updateCheckResult($checkId, [
'status' => ReferenceRelevanceCheckService::RECORD_FAILED,
'error_msg' => $e->getMessage(),
]);
$this->svc->markQueueRuntime($checkId, ReferenceRelevanceCheckService::QUEUE_FAILED, $retryCount);
}
} catch (\Exception $e2) {
\think\Log::error('ReferenceCheckArticleWorker markFailed: ' . $e2->getMessage());
}
$amId = intval(isset($row['am_id']) ? $row['am_id'] : 0);
if ($amId > 0) {
$this->svc->syncAmRefCheckStatus($amId);
}
return 'failed';
}
}
@@ -170,7 +179,7 @@ class ReferenceCheckArticleWorker
if ($failed > 0) {
$status = self::BATCH_PARTIAL_FAILED;
}
Db::name('article_reference_check_batch')->where('id', intval($batchId))->update([
Db::name('article_reference_relevance_check_batch')->where('id', intval($batchId))->update([
'batch_status' => $status,
'done_count' => intval($done),
'failed_count' => intval($failed),
@@ -183,7 +192,7 @@ class ReferenceCheckArticleWorker
private function publishNextWaitingBatch()
{
$next = Db::name('article_reference_check_batch')
$next = Db::name('article_reference_relevance_check_batch')
->where('batch_status', self::BATCH_WAITING)
->order('id asc')
->find();
@@ -197,8 +206,8 @@ class ReferenceCheckArticleWorker
isset($next['trigger']) ? $next['trigger'] : 'enqueue'
);
} catch (\Exception $e) {
$this->svc->log('publishNextWaitingBatch failed: ' . $e->getMessage());
\think\Log::error('publishNextWaitingBatch: ' . $e->getMessage());
$this->svc->log('ReferenceCheck publishNextWaitingBatch failed: ' . $e->getMessage());
\think\Log::error('ReferenceCheck publishNextWaitingBatch: ' . $e->getMessage());
}
}
}

View File

@@ -171,7 +171,10 @@ class ReferenceRelevanceLlmService
- supplementary_relevance部分相关、补充性
- minimal_relevance仅边缘/背景沾边
- no_meaningful_relevance与引用处核心表述基本无关
- reason文,须写明:①文献类型与**核心研究对象** ②**本文自身证据**覆盖了哪些 claim / 哪些未覆盖(含点名通路、功能结局、具体列举项;讨论转引须标明) ③**若仅为同主题不同 claim 或主语层级不对须明确写出** ④为何此分值而非更高或更低
- reason英双语结论,格式固定为两行:
【中文】(中文结论,须写明:①文献类型与**核心研究对象** ②**本文自身证据**覆盖了哪些 claim / 哪些未覆盖 ③主语/claim 不匹配须明确写出 ④为何此分值)
【English】与中文对应的英文结论语义一致
- reason_en仅英文结论与 reason 中【English】段相同勿留空
主语/层级不对 → 单条 **0.45**,不得因讨论提及相同通路给 0.78
引用处 claim 为「化合物 X 经 PI3K/AKT 等机制 demonstrated…」文献为其他植物提取物或计算预测、仅在讨论转引他人 X 机制 → 0.45weakly_relatedis_relevant=0。
@@ -190,9 +193,10 @@ class ReferenceRelevanceLlmService
- 多篇联合仍缺主语对齐、缺原句点名通路/结局、或主要靠讨论转引 → 联合分通常 **≤0.45~0.65**,不得因单篇讨论出现相同关键词给到 0.78+
- combined_is_relevantcombined_relevance_score>=0.65 为 1
- combined_relevance_level与 combined 分数对应的等级
- combined_reason综合结论,说明各文献分工如「文献2综述覆盖流行病学主 claim文献1仅机制补充」及最终分值理由
- combined_reason英双语综合结论,格式同 reason【中文】/【English】说明各文献分工及最终分值理由
- combined_reason_en仅英文综合结论与 combined_reason 中【English】段相同
单条引用时combined_* 与单条一致combined_reason 可写「」
单条引用时combined_* 与单条一致combined_reason / combined_reason_en 可与 reason / reason_en 相同
==================================================
【四、评分与等级对照】
@@ -218,11 +222,13 @@ class ReferenceRelevanceLlmService
"relevance_score": 0.45,
"relevance_level": "weakly_related",
"relevance_role": "minimal_relevance",
"reason": "中文单条结论",
"reason": "【中文】中文单条结论\n【English】English single-reference conclusion",
"reason_en": "English single-reference conclusion",
"combined_is_relevant": 1,
"combined_relevance_score": 0.92,
"combined_relevance_level": "highly_related",
"combined_reason": "中文联合结论"
"combined_reason": "【中文】中文联合结论\n【English】English combined conclusion",
"combined_reason_en": "English combined conclusion"
},
{
"reference_no": 2,
@@ -248,7 +254,7 @@ PROMPT;
if ($abstractText !== '') {
$parts[] = "【文献摘要/清洗后内容Europe PMC·PubMed·Crossref·PDF\n" . $abstractText;
}
$parts[] = '请先拆解最小主张单元(主语层级、证据来源、点名通路/结局逐项核对),判断每篇文献类型与**本文自身证据**,再**逐篇独立**给出单条 relevance_score讨论转引、提取物/计算预测不得抬高;弱相关文献不得因联合而高分),最后给出 combined_*。仅输出 results 数组 JSON。';
$parts[] = '请先拆解最小主张单元(主语层级、证据来源、点名通路/结局逐项核对),判断每篇文献类型与**本文自身证据**,再**逐篇独立**给出单条 relevance_score讨论转引、提取物/计算预测不得抬高;弱相关文献不得因联合而高分),最后给出 combined_*。reason / combined_reason 必须中英双语(【中文】/【English】并分别填写 reason_en / combined_reason_en。仅输出 results 数组 JSON。';
return implode("\n\n", $parts);
}
@@ -285,7 +291,10 @@ PROMPT;
$level = $this->levelFromScore($score, isset($item['relevance_level']) ? $item['relevance_level'] : '');
$role = $this->normalizeRelevanceRole(isset($item['relevance_role']) ? $item['relevance_role'] : '');
$reason = $this->cleanReason(isset($item['reason']) ? $item['reason'] : '');
list($reason, $reasonEn) = $this->normalizeBilingualReason(
isset($item['reason']) ? $item['reason'] : '',
isset($item['reason_en']) ? $item['reason_en'] : ''
);
list($score, $level, $isRelevant, $role) = $this->enforceSingleReferenceConsistency(
$score,
@@ -320,6 +329,14 @@ PROMPT;
$citeGroupRefs = trim((string)$defaultCiteGroupRefs);
}
list($combinedReason, $combinedReasonEn) = $this->normalizeBilingualReason(
isset($item['combined_reason']) ? $item['combined_reason'] : '',
isset($item['combined_reason_en']) ? $item['combined_reason_en'] : ''
);
if ($combinedReason === '' && $combinedReasonEn === '') {
list($combinedReason, $combinedReasonEn) = [$reason, $reasonEn];
}
$out[] = [
'reference_no' => $refNo,
'cite_group_refs' => $citeGroupRefs,
@@ -328,10 +345,12 @@ PROMPT;
'relevance_level' => $level,
'relevance_role' => $role,
'reason' => $reason,
'reason_en' => $reasonEn,
'combined_is_relevant' => $combinedRelevant ? 1 : 0,
'combined_relevance_score' => $combinedScore,
'combined_relevance_level' => $combinedLevel,
'combined_reason' => $this->cleanReason(isset($item['combined_reason']) ? $item['combined_reason'] : ''),
'combined_reason' => $combinedReason,
'combined_reason_en' => $combinedReasonEn,
];
}
@@ -441,6 +460,7 @@ PROMPT;
$out[$idx]['combined_relevance_score'] = floatval($src['combined_relevance_score']);
$out[$idx]['combined_relevance_level'] = (string)$src['combined_relevance_level'];
$out[$idx]['combined_reason'] = (string)$src['combined_reason'];
$out[$idx]['combined_reason_en'] = (string)$src['combined_reason_en'];
}
}
@@ -524,10 +544,45 @@ PROMPT;
private function cleanReason($reason)
{
$reason = trim(preg_replace('/\s+/u', ' ', (string)$reason));
$reason = trim(preg_replace('/[ \t]+/u', ' ', (string)$reason));
$reason = trim(preg_replace("/\n{3,}/u", "\n\n", $reason));
return mb_substr($reason, 0, 2000);
}
/**
* @return array{0:string,1:string} [bilingual reason, english only]
*/
private function normalizeBilingualReason($reason, $reasonEn)
{
$reason = trim((string)$reason);
$reasonEn = $this->cleanReason($reasonEn);
if ($reasonEn === '' && preg_match('/【English】\s*(.+)$/us', $reason, $m)) {
$reasonEn = $this->cleanReason($m[1]);
}
$zh = '';
if (preg_match('/【中文】\s*(.*?)(?:\n【English】|$)/us', $reason, $m)) {
$zh = trim($m[1]);
} elseif ($reason !== '' && strpos($reason, '【English】') === false) {
$zh = trim($reason);
}
if ($zh !== '' && $reasonEn !== '' && strpos($reason, '【English】') === false) {
$reason = "【中文】{$zh}\n【English】{$reasonEn}";
} elseif ($zh !== '' && $reasonEn !== '' && strpos($reason, '【中文】') === false) {
$reason = "【中文】{$zh}\n【English】{$reasonEn}";
} else {
$reason = $this->cleanReason($reason);
}
if ($reasonEn === '' && $zh !== '') {
$reasonEn = '';
}
return [$reason, $reasonEn];
}
private function boolVal($v)
{
if (is_bool($v)) {