后开始;段内首个引用:从段落开头到本标签前(非仅最后一句)
+ if ($hasPriorCiteInParagraph) {
+ $localStart = $prevTagEnd;
+ } else {
+ $localStart = $this->capContextStartBeforeTag($content, $tagStart, $paragraphStart);
+ }
// 默认:引用标签前的论述(如 Yin et al. [13] → 含 “higher than … Yin et al.”)
$localEnd = $tagStart;
@@ -894,6 +898,63 @@ class ReferenceCheckService
return true;
}
+ /**
+ * 段落起始(HTML / 换行),避免英文多句段落只取到最后一个句号后的一句
+ */
+ private function findParagraphStart($content, $tagStart)
+ {
+ $search = substr($content, 0, max(0, $tagStart));
+ if ($search === '') {
+ return 0;
+ }
+
+ $best = 0;
+
+ if (preg_match_all('/]*>/i', $search, $m, PREG_OFFSET_CAPTURE)) {
+ $last = end($m[0]);
+ $best = max($best, $last[1] + strlen($last[0]));
+ }
+ if (preg_match_all('/<\/p>\s*/i', $search, $m, PREG_OFFSET_CAPTURE)) {
+ $last = end($m[0]);
+ $best = max($best, $last[1] + strlen($last[0]));
+ }
+ if (preg_match_all('/
\s*/i', $search, $m, PREG_OFFSET_CAPTURE)) {
+ $last = end($m[0]);
+ $best = max($best, $last[1] + strlen($last[0]));
+ }
+
+ $pos = strrpos($search, "\n\n");
+ if ($pos !== false) {
+ $best = max($best, $pos + 2);
+ }
+ $pos = strrpos($search, "\n");
+ if ($pos !== false) {
+ $best = max($best, $pos + 1);
+ }
+
+ return $best;
+ }
+
+ /**
+ * 段落过长时从引用处向前截取上限,避免单次 LLM 上下文过大
+ */
+ private function capContextStartBeforeTag($content, $tagStart, $paragraphStart, $maxBytes = 2500)
+ {
+ if ($tagStart - $paragraphStart <= $maxBytes) {
+ return $paragraphStart;
+ }
+
+ $start = $tagStart - $maxBytes;
+ $slice = substr($content, $start, $tagStart - $start);
+ if (preg_match('/[.!?。!?]\s+/u', $slice, $m, PREG_OFFSET_CAPTURE)) {
+ $last = end($m[0]);
+ $rel = $last[1] + strlen($last[0]);
+ return $start + $rel;
+ }
+
+ return max($paragraphStart, $start);
+ }
+
private function findSentenceStart($content, $position)
{
$start = 0;
From 3663dd4ea69be8f4010bdcc2838d563f6d23a092 Mon Sep 17 00:00:00 2001
From: wyn <1074145239@qq.com>
Date: Thu, 21 May 2026 14:37:04 +0800
Subject: [PATCH 06/12] Changes
---
application/common/ReferenceCheckService.php | 25 +++++++++++++++-----
application/common/service/LLMService.php | 23 ++++++++++++++----
2 files changed, 38 insertions(+), 10 deletions(-)
diff --git a/application/common/ReferenceCheckService.php b/application/common/ReferenceCheckService.php
index be13d089..65ee9abc 100644
--- a/application/common/ReferenceCheckService.php
+++ b/application/common/ReferenceCheckService.php
@@ -193,7 +193,7 @@ class ReferenceCheckService
'created_at' => $now,
'updated_at' => $now,
]);
-
+// continue;
$this->pushJob(intval($checkId), $delay);
$checkIds[] = $checkId;
$queued++;
@@ -697,20 +697,32 @@ class ReferenceCheckService
}
$hasPriorCiteInParagraph = ($prevTagEnd > $paragraphStart);
- // 同段后续引用:从上一 后开始;段内首个引用:从段落开头到本标签前(非仅最后一句)
+ // 同段后续引用:从上一 后开始;段内首个引用:从段落开头到本标签前
if ($hasPriorCiteInParagraph) {
$localStart = $prevTagEnd;
} else {
- $localStart = $this->capContextStartBeforeTag($content, $tagStart, $paragraphStart);
+ $sentenceStart = $this->findSentenceStart($content, $tagStart);
+ $localStart = $this->capContextStartBeforeTag(
+ $content,
+ $tagStart,
+ max($paragraphStart, $sentenceStart)
+ );
}
- // 默认:引用标签前的论述(如 Yin et al. [13] → 含 “higher than … Yin et al.”)
+ // 默认:引用标签前的论述
$localEnd = $tagStart;
$originalText = $this->buildCitationContextText($content, $localStart, $localEnd);
- // 标签前几乎无正文(如句末 … ICU nurses [14])→ 改用标签后至下一引用或句末
+ // 同句多引(如 …[23] and external environment [24]):上一标签后仅几个词,回退到本句开头
+ if ($hasPriorCiteInParagraph && mb_strlen($originalText) < 50) {
+ $sentenceStart = $this->findSentenceStart($content, $tagStart);
+ $localStart = max($paragraphStart, $sentenceStart);
+ $originalText = $this->buildCitationContextText($content, $localStart, $localEnd);
+ }
+
+ // 仅段内首个引用且标签前极短时才用标签后文(避免 [24] 误截到 [25] 所在句)
if (!$this->isMeaningfulCitationContext($originalText)
- || $this->shouldUseTrailingCitationContext($content, $localStart, $tagStart, $tagEnd)
+ || (!$hasPriorCiteInParagraph && $this->shouldUseTrailingCitationContext($content, $localStart, $tagStart, $tagEnd))
) {
$trailEnd = ($nextTagStart < $sentenceEnd) ? $nextTagStart : $sentenceEnd;
$trailText = $this->buildCitationContextText($content, $tagEnd, $trailEnd);
@@ -811,6 +823,7 @@ class ReferenceCheckService
$text = trim(strip_tags($text));
$text = preg_replace('/\s+/u', ' ', $text);
$text = ltrim($text, "\xEF\xBB\xBF");
+ $text = preg_replace('/^[\s.,。;、:!?]+/u', '', $text);
return $text;
}
diff --git a/application/common/service/LLMService.php b/application/common/service/LLMService.php
index ce66056c..4ffe0cda 100644
--- a/application/common/service/LLMService.php
+++ b/application/common/service/LLMService.php
@@ -709,13 +709,27 @@ PROMPT;
}
/**
- * 将模型输出的 confidence 吸附到固定档位,并与 is_match 规则对齐
+ * 与 buildReferenceCheckSystemPrompt3 一致的 confidence 档位
+ */
+ private function getReferenceCheckConfidenceBands($isMatch)
+ {
+ return $isMatch
+ ? [0.65, 0.78, 0.85, 0.92, 0.98]
+ : [0.15, 0.25, 0.35, 0.45];
+ }
+
+ /**
+ * 将模型输出的 confidence 吸附到合法档位(如 0.95 → 0.92,0.75 → 0.78)
*/
private function snapReferenceCheckConfidence($confidence, $isMatch)
{
- $matchBands = [0.75, 0.85, 0.95];
- $mismatchBands = [0.15, 0.25, 0.35];
- $bands = $isMatch ? $matchBands : $mismatchBands;
+ $bands = $this->getReferenceCheckConfidenceBands($isMatch);
+
+ foreach ($bands as $band) {
+ if (abs($confidence - $band) < 0.001) {
+ return $band;
+ }
+ }
$nearest = $bands[0];
$minDiff = abs($confidence - $nearest);
@@ -726,6 +740,7 @@ PROMPT;
$nearest = $band;
}
}
+
return $nearest;
}
From 8cd033a56da243a6e7d1563705f498c38a486d39 Mon Sep 17 00:00:00 2001
From: wyn <1074145239@qq.com>
Date: Thu, 21 May 2026 15:19:07 +0800
Subject: [PATCH 07/12] =?UTF-8?q?Changes=20=E5=8E=9F=E6=96=87=E5=86=85?=
=?UTF-8?q?=E5=AE=B9=E6=88=AA=E5=8F=96=E7=9A=84=E5=B7=B2=E7=BB=8F=E5=BE=88?=
=?UTF-8?q?=E5=A5=BD=E4=BA=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
application/common/ReferenceCheckService.php | 65 +++++++++++++-------
1 file changed, 42 insertions(+), 23 deletions(-)
diff --git a/application/common/ReferenceCheckService.php b/application/common/ReferenceCheckService.php
index 65ee9abc..d56c0af0 100644
--- a/application/common/ReferenceCheckService.php
+++ b/application/common/ReferenceCheckService.php
@@ -193,7 +193,7 @@ class ReferenceCheckService
'created_at' => $now,
'updated_at' => $now,
]);
-// continue;
+ continue;
$this->pushJob(intval($checkId), $delay);
$checkIds[] = $checkId;
$queued++;
@@ -677,6 +677,9 @@ class ReferenceCheckService
return $result;
}
+ /** 与上一引用间距低于此值(字符)时视为同句并列,从整句开头截取而非仅取两标签之间 */
+ const CITE_GAP_SENTENCE_THRESHOLD = 60;
+
/**
* 按引用位置截取局部上下文:优先取标签前叙述;同句多引时后续引用从上一标签后开始。
*/
@@ -697,32 +700,37 @@ class ReferenceCheckService
}
$hasPriorCiteInParagraph = ($prevTagEnd > $paragraphStart);
- // 同段后续引用:从上一 后开始;段内首个引用:从段落开头到本标签前
if ($hasPriorCiteInParagraph) {
- $localStart = $prevTagEnd;
+ $gapText = $this->buildCitationContextText($content, $prevTagEnd, $tagStart);
+ // 如 motivation [23] and external environment [24]:间距短,取整句而非仅 “and external environment”
+ if (mb_strlen($gapText) < self::CITE_GAP_SENTENCE_THRESHOLD) {
+ $sentenceStart = $this->findSentenceStart($content, $tagStart);
+ $localStart = $this->capContextStartBeforeTag(
+ $content,
+ $tagStart,
+ max($paragraphStart, $sentenceStart)
+ );
+ } else {
+ // 如 … Yin et al. [13] on oncology nurses, but … Yang [14]:间距较长,取上一标签后至本标签前
+ $localStart = $prevTagEnd;
+ }
} else {
- $sentenceStart = $this->findSentenceStart($content, $tagStart);
- $localStart = $this->capContextStartBeforeTag(
- $content,
- $tagStart,
- max($paragraphStart, $sentenceStart)
- );
+ $localStart = $this->capContextStartBeforeTag($content, $tagStart, $paragraphStart);
}
- // 默认:引用标签前的论述
+ // 默认:引用标签前的论述(如 Yin et al. [13] → 含 “higher than … Yin et al.”)
$localEnd = $tagStart;
$originalText = $this->buildCitationContextText($content, $localStart, $localEnd);
- // 同句多引(如 …[23] and external environment [24]):上一标签后仅几个词,回退到本句开头
- if ($hasPriorCiteInParagraph && mb_strlen($originalText) < 50) {
- $sentenceStart = $this->findSentenceStart($content, $tagStart);
- $localStart = max($paragraphStart, $sentenceStart);
- $originalText = $this->buildCitationContextText($content, $localStart, $localEnd);
- }
-
- // 仅段内首个引用且标签前极短时才用标签后文(避免 [24] 误截到 [25] 所在句)
+ // 标签前几乎无正文(如句末 … ICU nurses [14])→ 改用标签后至下一引用或句末
if (!$this->isMeaningfulCitationContext($originalText)
- || (!$hasPriorCiteInParagraph && $this->shouldUseTrailingCitationContext($content, $localStart, $tagStart, $tagEnd))
+ || $this->shouldUseTrailingCitationContext(
+ $content,
+ $localStart,
+ $tagStart,
+ $tagEnd,
+ $hasPriorCiteInParagraph
+ )
) {
$trailEnd = ($nextTagStart < $sentenceEnd) ? $nextTagStart : $sentenceEnd;
$trailText = $this->buildCitationContextText($content, $tagEnd, $trailEnd);
@@ -749,12 +757,23 @@ class ReferenceCheckService
/**
* 标签前仅有作者缩写等极短片段时,改用标签后上下文
+ *
+ * @param bool $hasPriorCiteInParagraph 同段多引时,短片段常为并列成分,不应误取标签后下一句
*/
- private function shouldUseTrailingCitationContext($content, $localStart, $tagStart, $tagEnd)
- {
+ private function shouldUseTrailingCitationContext(
+ $content,
+ $localStart,
+ $tagStart,
+ $tagEnd,
+ $hasPriorCiteInParagraph = false
+ ) {
$before = $this->buildCitationContextText($content, $localStart, $tagStart);
if (!$this->isMeaningfulCitationContext($before)) {
- return true;
+ return !$hasPriorCiteInParagraph;
+ }
+
+ if ($hasPriorCiteInParagraph) {
+ return false;
}
return mb_strlen($before) < 25;
@@ -823,7 +842,7 @@ class ReferenceCheckService
$text = trim(strip_tags($text));
$text = preg_replace('/\s+/u', ' ', $text);
$text = ltrim($text, "\xEF\xBB\xBF");
- $text = preg_replace('/^[\s.,。;、:!?]+/u', '', $text);
+ $text = preg_replace('/^[\s.!?。!?,,、;:]+/u', '', $text);
return $text;
}
From d9c32430538004e44eea249ad72491f9376e8224 Mon Sep 17 00:00:00 2001
From: wyn <1074145239@qq.com>
Date: Thu, 21 May 2026 16:24:34 +0800
Subject: [PATCH 08/12] Changes
---
application/api/controller/Article.php | 4 ++++
application/api/job/ReferenceCheck.php | 9 +++++++--
application/common/ReferenceCheckService.php | 3 +++
3 files changed, 14 insertions(+), 2 deletions(-)
diff --git a/application/api/controller/Article.php b/application/api/controller/Article.php
index e2875d01..b217e4c1 100644
--- a/application/api/controller/Article.php
+++ b/application/api/controller/Article.php
@@ -6640,6 +6640,10 @@ class Article extends Base
return jsonError($e->getMessage());
}
}
+ public function checkOne(){
+ $svc = new ReferenceCheckService();
+ $svc->checkOne();
+ }
public function referenceCheckEnqueueArticleMain(){
$amId = 127448;
$svc = new ReferenceCheckService();
diff --git a/application/api/job/ReferenceCheck.php b/application/api/job/ReferenceCheck.php
index 5058bdc1..0c15c4f5 100644
--- a/application/api/job/ReferenceCheck.php
+++ b/application/api/job/ReferenceCheck.php
@@ -82,10 +82,15 @@ class ReferenceCheck
$llm = new LLMService();
$llmResult = $llm->checkReference($contentA, $contentB);
+ $isMatch = !empty($llmResult['is_match']);
+ $confidence = $llm->enforceReferenceCheckConfidence(
+ isset($llmResult['confidence']) ? $llmResult['confidence'] : 0,
+ $isMatch
+ );
Db::name('article_reference_check_result')->where('id', $checkId)->update([
- 'is_match' => !empty($llmResult['is_match']) ? 1 : 0,
- 'confidence' => $llmResult['confidence'],
+ 'is_match' => $isMatch ? 1 : 0,
+ 'confidence' => $confidence,
'reason' => $llmResult['reason'],
'status' => 1,
'error_msg' => '',
diff --git a/application/common/ReferenceCheckService.php b/application/common/ReferenceCheckService.php
index d56c0af0..f1903ca4 100644
--- a/application/common/ReferenceCheckService.php
+++ b/application/common/ReferenceCheckService.php
@@ -131,6 +131,9 @@ class ReferenceCheckService
$this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING);
}
+ public function checkOne(){
+ $this->pushJob(intval(722), 0);
+ }
public function enqueueByArticle($articleId){
if ($articleId <= 0) {
throw new \InvalidArgumentException('article_id is required');
From f118a799c22c18c49e4779b438832285631a091a Mon Sep 17 00:00:00 2001
From: wyn <1074145239@qq.com>
Date: Thu, 21 May 2026 16:28:28 +0800
Subject: [PATCH 09/12] =?UTF-8?q?=E6=AD=A4=E8=8A=82=E7=82=B9=E4=B9=8B?=
=?UTF-8?q?=E5=90=8E=E6=94=B9=E6=88=90=E4=B8=8D=E6=8B=86=E5=88=86=E5=8E=9F?=
=?UTF-8?q?=E6=96=87=E5=86=85=E5=AE=B9=EF=BC=8C=E7=9B=B4=E6=8E=A5=E7=94=A8?=
=?UTF-8?q?=E5=8F=82=E8=80=83=E6=96=87=E7=8C=AE=E5=92=8C=E6=95=B4=E6=AE=B5?=
=?UTF-8?q?=E8=BF=9B=E8=A1=8C=E5=AF=B9=E6=AF=94?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
application/api/job/ReferenceCheck.php | 1 +
1 file changed, 1 insertion(+)
diff --git a/application/api/job/ReferenceCheck.php b/application/api/job/ReferenceCheck.php
index 0c15c4f5..1078b8ca 100644
--- a/application/api/job/ReferenceCheck.php
+++ b/application/api/job/ReferenceCheck.php
@@ -88,6 +88,7 @@ class ReferenceCheck
$isMatch
);
+
Db::name('article_reference_check_result')->where('id', $checkId)->update([
'is_match' => $isMatch ? 1 : 0,
'confidence' => $confidence,
From 44f3383887df5187ad422dfc6a44dc6e8eab410b Mon Sep 17 00:00:00 2001
From: wyn <1074145239@qq.com>
Date: Thu, 21 May 2026 17:28:36 +0800
Subject: [PATCH 10/12] Changes
---
application/api/job/ReferenceCheck.php | 15 +++--
application/common/ReferenceCheckService.php | 61 +++++---------------
application/common/service/LLMService.php | 21 ++++---
3 files changed, 37 insertions(+), 60 deletions(-)
diff --git a/application/api/job/ReferenceCheck.php b/application/api/job/ReferenceCheck.php
index 1078b8ca..704d692d 100644
--- a/application/api/job/ReferenceCheck.php
+++ b/application/api/job/ReferenceCheck.php
@@ -61,7 +61,11 @@ class ReferenceCheck
}
try {
- $contentA = trim((string)(isset($row['origin_text']) ? $row['origin_text'] : ''));
+ $mainInfo = Db::name('article_main')->where('am_id', $row['am_id'])->find();
+ $contentA = trim($mainInfo['content']);//trim((string)(isset($row['origin_text']) ? $row['origin_text'] : ''));
+ if ($contentA === '' && !empty($row['content_a'])) {
+ $contentA = trim((string)$row['content_a']);
+ }
$contentB = trim((string)(isset($row['refer_text']) ? $row['refer_text'] : ''));
if ($contentB === '' && intval($row['p_refer_id']) > 0) {
@@ -83,15 +87,10 @@ class ReferenceCheck
$llm = new LLMService();
$llmResult = $llm->checkReference($contentA, $contentB);
$isMatch = !empty($llmResult['is_match']);
- $confidence = $llm->enforceReferenceCheckConfidence(
- isset($llmResult['confidence']) ? $llmResult['confidence'] : 0,
- $isMatch
- );
-
Db::name('article_reference_check_result')->where('id', $checkId)->update([
'is_match' => $isMatch ? 1 : 0,
- 'confidence' => $confidence,
+ 'confidence' => $llmResult['confidence'],
'reason' => $llmResult['reason'],
'status' => 1,
'error_msg' => '',
@@ -106,7 +105,7 @@ class ReferenceCheck
$job->delete();
$this->oQueueJob->log("任务执行成功 | 日志ID: {$sRedisKey}");
} catch (\Exception $e) {
- var_dump($e->getMessage());
+ $this->oQueueJob->log('ReferenceCheck error: ' . $e->getMessage());
if ($job->attempts() >= 3) {
$this->markFailed($checkId, $e->getMessage());
$job->delete();
diff --git a/application/common/ReferenceCheckService.php b/application/common/ReferenceCheckService.php
index f1903ca4..9aab409e 100644
--- a/application/common/ReferenceCheckService.php
+++ b/application/common/ReferenceCheckService.php
@@ -132,7 +132,7 @@ class ReferenceCheckService
$this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING);
}
public function checkOne(){
- $this->pushJob(intval(722), 0);
+ $this->pushJob(intval(724), 0);
}
public function enqueueByArticle($articleId){
if ($articleId <= 0) {
@@ -196,7 +196,7 @@ class ReferenceCheckService
'created_at' => $now,
'updated_at' => $now,
]);
- continue;
+
$this->pushJob(intval($checkId), $delay);
$checkIds[] = $checkId;
$queued++;
@@ -205,7 +205,6 @@ class ReferenceCheckService
}
}
}
-
foreach (array_keys($amIdsWithJobs) as $amId) {
$this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING);
}
@@ -680,9 +679,6 @@ class ReferenceCheckService
return $result;
}
- /** 与上一引用间距低于此值(字符)时视为同句并列,从整句开头截取而非仅取两标签之间 */
- const CITE_GAP_SENTENCE_THRESHOLD = 60;
-
/**
* 按引用位置截取局部上下文:优先取标签前叙述;同句多引时后续引用从上一标签后开始。
*/
@@ -703,38 +699,25 @@ class ReferenceCheckService
}
$hasPriorCiteInParagraph = ($prevTagEnd > $paragraphStart);
+ $sentenceStart = $this->findSentenceStart($content, $tagStart);
+
+ // 段内首个引用:整段到标签前;后续引用:取「本句」起点(可早于上一标签),避免只剩 “and external environment” 再误用标签后文本
if ($hasPriorCiteInParagraph) {
- $gapText = $this->buildCitationContextText($content, $prevTagEnd, $tagStart);
- // 如 motivation [23] and external environment [24]:间距短,取整句而非仅 “and external environment”
- if (mb_strlen($gapText) < self::CITE_GAP_SENTENCE_THRESHOLD) {
- $sentenceStart = $this->findSentenceStart($content, $tagStart);
- $localStart = $this->capContextStartBeforeTag(
- $content,
- $tagStart,
- max($paragraphStart, $sentenceStart)
- );
- } else {
- // 如 … Yin et al. [13] on oncology nurses, but … Yang [14]:间距较长,取上一标签后至本标签前
- $localStart = $prevTagEnd;
- }
+ $localStart = max($paragraphStart, $sentenceStart);
} else {
$localStart = $this->capContextStartBeforeTag($content, $tagStart, $paragraphStart);
}
- // 默认:引用标签前的论述(如 Yin et al. [13] → 含 “higher than … Yin et al.”)
+ // 默认:引用标签前的论述
$localEnd = $tagStart;
$originalText = $this->buildCitationContextText($content, $localStart, $localEnd);
- // 标签前几乎无正文(如句末 … ICU nurses [14])→ 改用标签后至下一引用或句末
- if (!$this->isMeaningfulCitationContext($originalText)
- || $this->shouldUseTrailingCitationContext(
- $content,
- $localStart,
- $tagStart,
- $tagEnd,
- $hasPriorCiteInParagraph
- )
- ) {
+ // 仅段内首个引用、且标签前极短(如句末 ICU nurses [14])时,才改用标签后片段;同段多引禁止标签后截取(会错取下一句)
+ $allowTrailing = !$hasPriorCiteInParagraph;
+ if ($allowTrailing && (
+ !$this->isMeaningfulCitationContext($originalText)
+ || $this->shouldUseTrailingCitationContext($content, $localStart, $tagStart, $tagEnd)
+ )) {
$trailEnd = ($nextTagStart < $sentenceEnd) ? $nextTagStart : $sentenceEnd;
$trailText = $this->buildCitationContextText($content, $tagEnd, $trailEnd);
if ($this->isMeaningfulCitationContext($trailText)) {
@@ -760,23 +743,12 @@ class ReferenceCheckService
/**
* 标签前仅有作者缩写等极短片段时,改用标签后上下文
- *
- * @param bool $hasPriorCiteInParagraph 同段多引时,短片段常为并列成分,不应误取标签后下一句
*/
- private function shouldUseTrailingCitationContext(
- $content,
- $localStart,
- $tagStart,
- $tagEnd,
- $hasPriorCiteInParagraph = false
- ) {
+ private function shouldUseTrailingCitationContext($content, $localStart, $tagStart, $tagEnd)
+ {
$before = $this->buildCitationContextText($content, $localStart, $tagStart);
if (!$this->isMeaningfulCitationContext($before)) {
- return !$hasPriorCiteInParagraph;
- }
-
- if ($hasPriorCiteInParagraph) {
- return false;
+ return true;
}
return mb_strlen($before) < 25;
@@ -845,7 +817,6 @@ class ReferenceCheckService
$text = trim(strip_tags($text));
$text = preg_replace('/\s+/u', ' ', $text);
$text = ltrim($text, "\xEF\xBB\xBF");
- $text = preg_replace('/^[\s.!?。!?,,、;:]+/u', '', $text);
return $text;
}
diff --git a/application/common/service/LLMService.php b/application/common/service/LLMService.php
index 4ffe0cda..d8734596 100644
--- a/application/common/service/LLMService.php
+++ b/application/common/service/LLMService.php
@@ -20,7 +20,8 @@ class LLMService
$this->url = trim((string)Env::get('promotion.promotion_llm_url', ''));
$this->model = trim((string)Env::get('promotion.promotion_llm_model', ''));
$this->apiKey = trim((string)Env::get('promotion.promotion_llm_api_key', ''));
- $this->timeout = max(30, intval(Env::get('promotion.promotion_llm_timeout', 120)));
+ // 引用校对 system 提示词较长,请求常超过 30s,至少 120s
+ $this->timeout = max(120, intval(Env::get('promotion.promotion_llm_timeout', 120)));
}
/**
@@ -34,9 +35,8 @@ class LLMService
'confidence' => 0.0,
'reason' => 'LLM not configured or request failed',
];
- \think\Log::info('llmUrl:'.$this->url);
- var_dump("in URL====".$this->url);
if ($this->url === '' || $this->model === '') {
+ \think\Log::warning('ReferenceCheck LLM: url or model not configured');
return $fallback;
}
@@ -73,11 +73,13 @@ class LLMService
$content = $this->postChat($payload);
if ($content === null) {
+ \think\Log::warning('ReferenceCheck LLM: postChat returned null');
return $fallback;
}
$parsed = $this->parseJson($content);
if ($parsed === null) {
+ \think\Log::warning('ReferenceCheck LLM: parseJson failed, raw=' . mb_substr($content, 0, 500));
return $fallback;
}
@@ -86,7 +88,11 @@ class LLMService
$this->normalizeConfidence(isset($parsed['confidence']) ? $parsed['confidence'] : 0),
$isMatch
);
-
+ \think\Log::info("confidence:".$confidence,[
+ 'is_match' => $isMatch,
+ 'confidence' => $confidence,
+ 'reason' => $this->cleanReason((string)(isset($parsed['reason']) ? $parsed['reason'] : '')),
+ ]);
return [
'is_match' => $isMatch,
'confidence' => $confidence,
@@ -763,13 +769,14 @@ PROMPT;
$raw = curl_exec($ch);
if ($raw === false) {
+ \think\Log::warning('ReferenceCheck LLM curl error: ' . curl_error($ch));
curl_close($ch);
return null;
}
$httpCode = intval(curl_getinfo($ch, CURLINFO_HTTP_CODE));
- \think\Log::info('httpCode:'.$httpCode);
curl_close($ch);
if ($httpCode < 200 || $httpCode >= 300) {
+ \think\Log::warning('ReferenceCheck LLM http ' . $httpCode . ': ' . mb_substr((string)$raw, 0, 500));
return null;
}
@@ -783,8 +790,8 @@ PROMPT;
if (isset($data['content'])) {
return (string)$data['content'];
}
- }catch (Exception $exception){
- var_dump($exception->getMessage());
+ } catch (Exception $exception) {
+ \think\Log::warning('ReferenceCheck LLM exception: ' . $exception->getMessage());
}
return null;
From 68cf1867d896ff7de9de1dce79720918939ea3f7 Mon Sep 17 00:00:00 2001
From: wyn <1074145239@qq.com>
Date: Fri, 22 May 2026 16:58:07 +0800
Subject: [PATCH 11/12] =?UTF-8?q?=E5=B7=B2=E7=BB=8F=E5=AE=8C=E6=88=90?=
=?UTF-8?q?=E4=B8=80=E4=B8=AA=E6=96=87=E7=AB=A0=E6=A0=A1=E5=AF=B9=E4=BA=86?=
=?UTF-8?q?=EF=BC=8C=E4=BD=86=E6=8D=A2=E4=B8=AA=E6=96=87=E7=AB=A0id?=
=?UTF-8?q?=E5=B0=B1=E6=8A=A5=E9=94=99=E4=BA=86=EF=BC=8C=E6=8E=92=E6=9F=A5?=
=?UTF-8?q?=E5=89=8D=E5=A4=87=E4=BB=BD?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
application/api/controller/Article.php | 8 +-
application/api/job/ReferenceCheck.php | 56 +-
application/api/job/ReferenceCheckTwo.php | 150 ++
application/common/ReferenceCheckService.php | 570 ++++++-
application/common/service/LLMService.php | 1587 +++++++++++-------
5 files changed, 1755 insertions(+), 616 deletions(-)
create mode 100644 application/api/job/ReferenceCheckTwo.php
diff --git a/application/api/controller/Article.php b/application/api/controller/Article.php
index b217e4c1..456fe59c 100644
--- a/application/api/controller/Article.php
+++ b/application/api/controller/Article.php
@@ -6641,8 +6641,9 @@ class Article extends Base
}
}
public function checkOne(){
+ $articleId = intval($this->request->param('article_id', 7414));
$svc = new ReferenceCheckService();
- $svc->checkOne();
+ return jsonSuccess($svc->enqueueSecondPassByArticle($articleId));
}
public function referenceCheckEnqueueArticleMain(){
$amId = 127448;
@@ -6792,7 +6793,7 @@ class Article extends Base
$citeStart = intval(isset($row['cite_tag_start']) ? $row['cite_tag_start'] : 0);
$rowStatus = intval($row['status']);
return array(
- 'check_id' => intval($row['check_id']),
+ 'check_id' => intval(isset($row['id']) ? $row['id'] : (isset($row['check_id']) ? $row['check_id'] : 0)),
'article_id' => intval(isset($row['article_id']) ? $row['article_id'] : 0),
'am_id' => $amId,
'cite_group_key' => $amId . '_' . $citeStart,
@@ -6806,7 +6807,8 @@ class Article extends Base
'text_end' => intval(isset($row['text_end']) ? $row['text_end'] : 0),
'status' => isset($statusMap[$rowStatus]) ? $statusMap[$rowStatus] : 'unknown',
'is_match' => intval($row['is_match']),
- 'is_reasonable' => intval($row['is_match']) === 1,
+ 'can_support' => intval(isset($row['can_support']) ? $row['can_support'] : $row['is_match']),
+ 'is_reasonable' => intval(isset($row['can_support']) ? $row['can_support'] : $row['is_match']) === 1,
'confidence' => floatval($row['confidence']),
'reason' => isset($row['reason']) ? $row['reason'] : '',
'error_msg' => isset($row['error_msg']) ? $row['error_msg'] : '',
diff --git a/application/api/job/ReferenceCheck.php b/application/api/job/ReferenceCheck.php
index 704d692d..3b15e6a1 100644
--- a/application/api/job/ReferenceCheck.php
+++ b/application/api/job/ReferenceCheck.php
@@ -36,6 +36,9 @@ class ReferenceCheck
try {
$checkId = intval(isset($data['check_id']) ? $data['check_id'] : 0);
+ if ($checkId <= 0 && !empty($jobData['data']['check_id'])) {
+ $checkId = intval($jobData['data']['check_id']);
+ }
$sClassName = get_class($this);
$sRedisKey = "queue_job:{$sClassName}:{$checkId}";
$sRedisValue = uniqid() . '_' . getmypid();
@@ -61,45 +64,47 @@ class ReferenceCheck
}
try {
- $mainInfo = Db::name('article_main')->where('am_id', $row['am_id'])->find();
- $contentA = trim($mainInfo['content']);//trim((string)(isset($row['origin_text']) ? $row['origin_text'] : ''));
- if ($contentA === '' && !empty($row['content_a'])) {
- $contentA = trim((string)$row['content_a']);
- }
- $contentB = trim((string)(isset($row['refer_text']) ? $row['refer_text'] : ''));
+ $svc = new ReferenceCheckService();
- if ($contentB === '' && intval($row['p_refer_id']) > 0) {
+ $contentA = $svc->resolveMainContentForJob($row);
+ $contentB = trim((string)(isset($row['refer_text']) ? $row['refer_text'] : ''));
+ $refer = null;
+
+ if (intval($row['p_refer_id']) > 0) {
$refer = Db::name('production_article_refer')
->where('p_refer_id', intval($row['p_refer_id']))
- ->where('status', 0)
+ ->where('state', 0)
->find();
- if ($refer) {
- $contentB = (new ReferenceCheckService())->formatReferForLlm($refer);
+ if ($refer && $contentB === '') {
+ $contentB = $svc->formatReferForLlm($refer);
}
}
if ($contentA === '' || $contentB === '') {
- $this->markFailed($checkId, 'Missing content_a or reference text');
+ $this->markFailed($checkId, 'Missing article_main.content or refer_text');
$job->delete();
return;
}
$llm = new LLMService();
- $llmResult = $llm->checkReference($contentA, $contentB);
- $isMatch = !empty($llmResult['is_match']);
+ $llmResult = $llm->checkReference($contentA, $contentB, false);
+ $canSupport = $svc->parseLlmCanSupport($llmResult);
+ $confidence = floatval($llmResult['confidence']);
- Db::name('article_reference_check_result')->where('id', $checkId)->update([
- 'is_match' => $isMatch ? 1 : 0,
- 'confidence' => $llmResult['confidence'],
- 'reason' => $llmResult['reason'],
+ $svc->updateCheckResult($checkId, [
+ 'can_support' => $canSupport ? 1 : 0,
+ 'is_match' => $canSupport ? 1 : 0,
+ 'confidence' => $confidence,
+ 'reason' => isset($llmResult['reason']) ? $llmResult['reason'] : '',
'status' => 1,
'error_msg' => '',
- 'updated_at' => date('Y-m-d H:i:s'),
]);
+ $svc->maybeEnqueueSecondPass($checkId, $confidence);
+
$amId = intval(isset($row['am_id']) ? $row['am_id'] : 0);
if ($amId > 0) {
- (new ReferenceCheckService())->syncAmRefCheckStatus($amId);
+ $svc->syncAmRefCheckStatus($amId);
}
$this->QueueRedis->finishJob($sRedisKey, 'completed', $this->completedExprie, $sRedisValue);
$job->delete();
@@ -127,11 +132,14 @@ class ReferenceCheck
private function markFailed($checkId, $msg)
{
$row = Db::name('article_reference_check_result')->where('id', $checkId)->find();
- Db::name('article_reference_check_result')->where('id', $checkId)->update([
- 'status' => 2,
- 'error_msg' => mb_substr($msg, 0, 500),
- 'updated_at' => date('Y-m-d H:i:s'),
- ]);
+ try {
+ (new ReferenceCheckService())->updateCheckResult($checkId, [
+ 'status' => 2,
+ 'error_msg' => $msg,
+ ]);
+ } catch (\Exception $e) {
+ \think\Log::error('ReferenceCheck markFailed: ' . $e->getMessage());
+ }
$amId = empty($row) ? 0 : intval(isset($row['am_id']) ? $row['am_id'] : 0);
if ($amId > 0) {
(new ReferenceCheckService())->syncAmRefCheckStatus($amId);
diff --git a/application/api/job/ReferenceCheckTwo.php b/application/api/job/ReferenceCheckTwo.php
new file mode 100644
index 00000000..b28c9f6c
--- /dev/null
+++ b/application/api/job/ReferenceCheckTwo.php
@@ -0,0 +1,150 @@
+oQueueJob = new QueueJob();
+ $this->QueueRedis = QueueRedis::getInstance();
+ }
+
+ public function fire(Job $job, $data)
+ {
+ $this->oQueueJob->init($job);
+
+ $rawBody = empty($job->getRawBody()) ? '' : $job->getRawBody();
+ $jobData = empty($rawBody) ? [] : json_decode($rawBody, true);
+ $jobId = empty($jobData['id']) ? 'unknown' : $jobData['id'];
+
+ $sRedisKey = '';
+ $sRedisValue = '';
+
+ $this->oQueueJob->log("-----------队列任务开始-----------");
+ $this->oQueueJob->log("当前任务ID: {$jobId}, 尝试次数: {$job->attempts()}");
+
+ try {
+ $checkId = intval(isset($data['check_id']) ? $data['check_id'] : 0);
+ if ($checkId <= 0 && !empty($jobData['data']['check_id'])) {
+ $checkId = intval($jobData['data']['check_id']);
+ }
+ $sClassName = get_class($this);
+ $sRedisKey = "queue_job_two:{$sClassName}:{$checkId}";
+ $sRedisValue = uniqid() . '_' . getmypid();
+
+ if (!$this->oQueueJob->acquireLock($sRedisKey, $sRedisValue, $job)) {
+ return;
+ }
+
+ if ($checkId <= 0) {
+ $job->delete();
+ return;
+ }
+
+ $row = Db::name('article_reference_check_result')->where('id', $checkId)->find();
+ if (empty($row)) {
+ $job->delete();
+ return;
+ }
+
+// if (intval($row['status']) === 1) {
+// $job->delete();
+// return;
+// }
+
+ try {
+ $svc = new ReferenceCheckService();
+
+ $contentA = $svc->resolveMainContentForJob($row);
+ $referText = trim((string)(isset($row['refer_text']) ? $row['refer_text'] : ''));
+ $refer = null;
+
+ if (intval($row['p_refer_id']) > 0) {
+ $refer = Db::name('production_article_refer')
+ ->where('p_refer_id', intval($row['p_refer_id']))
+ ->where('state', 0)
+ ->find();
+ }
+
+ $payload = $svc->prepareRecheckPayload(is_array($refer) ? $refer : [], $referText);
+ $doiBlock = $payload['doi_block'];
+
+ if ($contentA === '' || $referText === '') {
+ $this->markFailed($checkId, 'Missing article_main.content or refer_text');
+ $job->delete();
+ return;
+ }
+ $llm = new LLMService();
+ $llmResult = $llm->checkReference($contentA, $referText, true, $doiBlock);
+
+ $canSupport = $svc->parseLlmCanSupport($llmResult);
+ $tag = $payload['has_abstract']
+ ? ('[Crossref复核' . ($payload['doi_used'] !== '' ? ' ' . $payload['doi_used'] : '') . ']')
+ : '[Crossref复核-无摘要]';
+ $reason = $tag . ' ' . (isset($llmResult['reason']) ? $llmResult['reason'] : '');
+
+ $affected = $svc->updateCheckResult($checkId, [
+ 'can_support' => $canSupport ? 1 : 0,
+ 'is_match' => $canSupport ? 1 : 0,
+ 'confidence' => floatval($llmResult['confidence']),
+ 'reason' => $reason,
+ 'status' => 1,
+ 'error_msg' => '',
+ ]);
+ $this->oQueueJob->log("Crossref复核写入 id={$checkId} affected={$affected} can_support=" . ($canSupport ? 1 : 0) . " confidence=" . floatval($llmResult['confidence']));
+
+ $amId = intval(isset($row['am_id']) ? $row['am_id'] : 0);
+ if ($amId > 0) {
+ $svc->syncAmRefCheckStatus($amId);
+ }
+ $this->QueueRedis->finishJob($sRedisKey, 'completed', $this->completedExprie, $sRedisValue);
+ $job->delete();
+ $this->oQueueJob->log("任务执行成功 | 日志ID: {$sRedisKey}");
+ } catch (\Exception $e) {
+ $this->oQueueJob->log('ReferenceCheckTwo error: ' . $e->getMessage());
+ if ($job->attempts() >= 3) {
+ $this->markFailed($checkId, $e->getMessage());
+ $job->delete();
+ return;
+ }
+ $job->release(30);
+ }
+ } catch (\RuntimeException $e) {
+ $this->oQueueJob->handleRetryableException($e, $sRedisKey, $sRedisValue, $job);
+ } catch (\LogicException $e) {
+ $this->oQueueJob->handleNonRetryableException($e, $sRedisKey, $sRedisValue, $job);
+ } catch (\Exception $e) {
+ $this->oQueueJob->handleRetryableException($e, $sRedisKey, $sRedisValue, $job);
+ } finally {
+ $this->oQueueJob->finnal();
+ }
+ }
+
+ private function markFailed($checkId, $msg)
+ {
+ $row = Db::name('article_reference_check_result')->where('id', $checkId)->find();
+ try {
+ (new ReferenceCheckService())->updateCheckResult($checkId, [
+ 'status' => 2,
+ 'error_msg' => $msg,
+ ]);
+ } catch (\Exception $e) {
+ \think\Log::error('ReferenceCheckTwo markFailed: ' . $e->getMessage());
+ }
+ $amId = empty($row) ? 0 : intval(isset($row['am_id']) ? $row['am_id'] : 0);
+ if ($amId > 0) {
+ (new ReferenceCheckService())->syncAmRefCheckStatus($amId);
+ }
+ }
+}
diff --git a/application/common/ReferenceCheckService.php b/application/common/ReferenceCheckService.php
index 9aab409e..593f1548 100644
--- a/application/common/ReferenceCheckService.php
+++ b/application/common/ReferenceCheckService.php
@@ -3,6 +3,7 @@
namespace app\common;
use think\Db;
+use think\Env;
use think\Queue;
/**
@@ -131,8 +132,39 @@ class ReferenceCheckService
$this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING);
}
- public function checkOne(){
- $this->pushJob(intval(724), 0);
+ /**
+ * 手工触发:对已完成且 confidence<=0.65 的记录入队 DOI 第二轮复核
+ */
+ public function enqueueSecondPassByArticle($articleId)
+ {
+ $articleId = intval($articleId);
+ if ($articleId <= 0) {
+ throw new \InvalidArgumentException('article_id is required');
+ }
+
+ $rows = Db::name('article_reference_check_result')
+ ->where('article_id', $articleId)
+ ->where('status', 1)
+ ->where('confidence', '<=', 0.65)
+ ->orderRaw('rand()')
+ ->limit(2)
+ ->select();
+
+ $checkIds2 = [];
+ $delay2 = 0;
+ foreach ($rows as $checkLog) {
+ $rowId = $this->resolveCheckRowId($checkLog);
+ if ($this->maybeEnqueueSecondPass($rowId, floatval($checkLog['confidence']))) {
+ $checkIds2[] = $rowId;
+ $delay2 += 1;
+ }
+ }
+
+ return [
+ 'article_id' => $articleId,
+ 'check_ids2' => $checkIds2,
+ 'queued' => count($checkIds2),
+ ];
}
public function enqueueByArticle($articleId){
if ($articleId <= 0) {
@@ -140,7 +172,7 @@ class ReferenceCheckService
}
$prod = Db::name('production_article')
->where('article_id', $articleId)
- ->where('state', 0)
+ ->where('state', [0, 2])
->find();
if (empty($prod)) {
throw new \RuntimeException('production_article not found for article_id=' . $articleId);
@@ -296,12 +328,78 @@ class ReferenceCheckService
return isset($map[$status]) ? $map[$status] : 'unknown';
}
+ /**
+ * 表主键为 id(对外 API 参数名仍叫 check_id)
+ */
+ public function resolveCheckRowId($row)
+ {
+ if (!is_array($row)) {
+ return 0;
+ }
+ if (isset($row['id']) && intval($row['id']) > 0) {
+ return intval($row['id']);
+ }
+ if (isset($row['check_id']) && intval($row['check_id']) > 0) {
+ return intval($row['check_id']);
+ }
+ return 0;
+ }
+
+ /**
+ * 解析 LLM 返回的 is_match(兼容 bool / 0|1 / "true"|"false" 字符串)
+ */
+ public function parseLlmIsMatch($value)
+ {
+ if (is_bool($value)) {
+ return $value;
+ }
+ if (is_int($value) || is_float($value)) {
+ return intval($value) === 1;
+ }
+ $s = strtolower(trim((string)$value));
+ return in_array($s, ['1', 'true', 'yes', 'match', 'matched'], true);
+ }
+
+ /**
+ * 写入单条校对结果(统一截断 reason/error_msg,避免 varchar(512) 导致 UPDATE 失败)
+ *
+ * @throws \RuntimeException
+ */
+ public function updateCheckResult($checkId, array $fields)
+ {
+ $checkId = intval($checkId);
+ if ($checkId <= 0) {
+ throw new \InvalidArgumentException('invalid check id');
+ }
+
+ if (isset($fields['reason'])) {
+ $fields['reason'] = mb_substr(trim((string)$fields['reason']), 0, 512);
+ }
+ if (isset($fields['error_msg'])) {
+ $fields['error_msg'] = mb_substr(trim((string)$fields['error_msg']), 0, 512);
+ }
+ $fields['updated_at'] = date('Y-m-d H:i:s');
+
+ $exists = Db::name('article_reference_check_result')->where('id', $checkId)->find();
+ if (empty($exists)) {
+ throw new \RuntimeException('article_reference_check_result not found, id=' . $checkId);
+ }
+
+ $affected = Db::name('article_reference_check_result')->where('id', $checkId)->update($fields);
+ if ($affected === false) {
+ throw new \RuntimeException('article_reference_check_result update failed, id=' . $checkId);
+ }
+
+ \think\Log::info('updateCheckResult id=' . $checkId . ' affected=' . intval($affected));
+ return intval($affected);
+ }
+
public function getResult($checkId)
{
if ($checkId <= 0) {
return null;
}
- $row = Db::name('article_reference_check_result')->where('check_id', $checkId)->find();
+ $row = Db::name('article_reference_check_result')->where('id', $checkId)->find();
return $row ?: null;
}
@@ -435,7 +533,7 @@ class ReferenceCheckService
'ref_nos' => [],
];
}
- $byAm[$amId]['contexts'][$ctxKey]['check_ids'][] = intval($row['check_id']);
+ $byAm[$amId]['contexts'][$ctxKey]['check_ids'][] = $this->resolveCheckRowId($row);
$byAm[$amId]['contexts'][$ctxKey]['ref_nos'][] = $refNo;
$reason = trim((string)$this->arrGet($row, 'reason', ''));
if ($reason !== '') {
@@ -501,7 +599,7 @@ class ReferenceCheckService
$issueCount++;
$issues[] = array(
'am_id' => $amId,
- 'check_id' => intval($row['check_id']),
+ 'check_id' => $this->resolveCheckRowId($row),
'reference_no' => $num,
'reference_raw' => $inner,
'reason' => $rowReason,
@@ -512,7 +610,7 @@ class ReferenceCheckService
ENT_QUOTES,
'UTF-8'
);
- return ''
. $numMatch[0] . '';
},
@@ -627,6 +725,448 @@ class ReferenceCheckService
return implode("\n", $parts);
}
+ /**
+ * 仅使用 refer_doi 字段(二次 Crossref 摘要用)
+ */
+ public function extractReferDoiOnly($refer)
+ {
+ if (!is_array($refer)) {
+ return '';
+ }
+ $raw = trim((string)$this->arrGet($refer, 'refer_doi', ''));
+ if ($raw === '' || stripos($raw, 'not available') !== false) {
+ return '';
+ }
+ $dois = $this->extractDoisFromString($raw);
+ return empty($dois) ? '' : $dois[0];
+ }
+
+ /**
+ * 根据 refer_doi 调用 Crossref works API 获取摘要(二次校对专用)
+ *
+ * @return array{text:string, has_abstract:bool, doi:string}
+ */
+ public function fetchCrossrefAbstractByReferDoi($refer)
+ {
+ $doi = $this->extractReferDoiOnly($refer);
+ if ($doi === '') {
+ return ['text' => '', 'has_abstract' => false, 'doi' => ''];
+ }
+
+ $crossref = new CrossrefService([
+ 'mailto' => trim((string)Env::get('crossref_mailto', '')),
+ ]);
+ $block = $this->extractCrossrefBlock($doi, $crossref);
+ if ($block === null) {
+ return ['text' => '', 'has_abstract' => false, 'doi' => $doi];
+ }
+
+ return [
+ 'text' => $block['text'],
+ 'has_abstract' => !empty($block['has_abstract']),
+ 'doi' => $doi,
+ ];
+ }
+
+ /**
+ * 解析 LLM 返回的 can_support
+ */
+ public function parseLlmCanSupport($llmResult)
+ {
+ if (!is_array($llmResult)) {
+ return false;
+ }
+ if (array_key_exists('can_support', $llmResult)) {
+ return $this->parseLlmIsMatch($llmResult['can_support']);
+ }
+ return $this->parseLlmIsMatch(isset($llmResult['is_match']) ? $llmResult['is_match'] : false);
+ }
+
+ /**
+ * 第一次校对:取 article_main.content(整节正文)
+ */
+ public function resolveMainContentForJob(array $row, $maxChars = 8000)
+ {
+ $amId = intval($this->arrGet($row, 'am_id', 0));
+ if ($amId <= 0) {
+ return '';
+ }
+ $main = Db::name('article_main')
+ ->field('content')
+ ->where('am_id', $amId)
+ ->find();
+ if (empty($main)) {
+ return '';
+ }
+
+ $text = trim((string)$this->arrGet($main, 'content', ''));
+ if ($text === '') {
+ return '';
+ }
+
+ $text = preg_replace('/\[([\d,\-\s]+)\]<\/blue>/', '[$1]', $text);
+ $text = strip_tags($text);
+ $text = html_entity_decode($text, ENT_QUOTES | ENT_HTML5, 'UTF-8');
+ $text = preg_replace('/\s+/u', ' ', $text);
+ $text = trim($text);
+
+ $maxChars = max(500, intval($maxChars));
+ if (mb_strlen($text) > $maxChars) {
+ $text = mb_substr($text, 0, $maxChars) . '...';
+ }
+
+ return $text;
+ }
+
+ /**
+ * 引用处局部上下文(origin_text),供其它场景使用
+ */
+ public function resolveCitationContextForJob(array $row)
+ {
+ $text = trim((string)$this->arrGet($row, 'origin_text', ''));
+ if ($text === '') {
+ $text = trim((string)$this->arrGet($row, 'content_a', ''));
+ }
+ return $text;
+ }
+
+ /**
+ * 从 refer 行提取标准 DOI(10.xxxx/...)
+ *
+ * 优先级:refer_content(原始引用文本里的 DOI 最贴近实际被引用的文献)
+ * > refer_doi > doi > doilink
+ */
+ public function extractDoiFromRefer($refer)
+ {
+ $list = $this->extractAllDoiCandidatesFromRefer($refer);
+ return empty($list) ? '' : $list[0];
+ }
+
+ /**
+ * 返回 refer 行可能对应的全部 DOI 候选(去重,按优先级排序)
+ *
+ * 用于第二轮 DOI 复核场景:当 metadata 的 refer_doi 与原始引用文本里的 DOI
+ * 不一致时(数据漂移),优先尝试原始引用文本里的 DOI 抓真实摘要。
+ *
+ * @return string[]
+ */
+ public function extractAllDoiCandidatesFromRefer($refer)
+ {
+ if (!is_array($refer)) {
+ return [];
+ }
+ $ordered = [
+ (string)$this->arrGet($refer, 'refer_content', ''),
+ (string)$this->arrGet($refer, 'refer_doi', ''),
+ (string)$this->arrGet($refer, 'doi', ''),
+ (string)$this->arrGet($refer, 'doilink', ''),
+ ];
+
+ $result = [];
+ foreach ($ordered as $raw) {
+ foreach ($this->extractDoisFromString($raw) as $doi) {
+ if (!in_array($doi, $result, true)) {
+ $result[] = $doi;
+ }
+ }
+ }
+ return $result;
+ }
+
+ /**
+ * 从任意文本里抽取所有形如 10.xxxx/yyy 的 DOI
+ * @return string[]
+ */
+ private function extractDoisFromString($text)
+ {
+ $text = trim((string)$text);
+ if ($text === '' || stripos($text, 'not available') !== false) {
+ return [];
+ }
+
+ $dois = [];
+
+ if (preg_match_all('~doi\.org/([^\s?#"\'<>]+)~i', $text, $m)) {
+ foreach ($m[1] as $cand) {
+ $cand = $this->trimDoiTail(trim($cand));
+ if ($this->isValidDoi($cand)) {
+ $dois[] = $cand;
+ }
+ }
+ }
+
+ if (preg_match_all('~\b(10\.\d{3,9}/[^\s?#"\'<>]+)~i', $text, $m)) {
+ foreach ($m[1] as $cand) {
+ $cand = $this->trimDoiTail(trim($cand));
+ if ($this->isValidDoi($cand)) {
+ $dois[] = $cand;
+ }
+ }
+ }
+
+ if ($dois === [] && strpos($text, '10.') === 0) {
+ $cand = $this->trimDoiTail($text);
+ if ($this->isValidDoi($cand)) {
+ $dois[] = $cand;
+ }
+ }
+
+ return array_values(array_unique($dois));
+ }
+
+ private function trimDoiTail($doi)
+ {
+ return rtrim($doi, ".,;:)]}>\"'\\ \t\n\r");
+ }
+
+ private function isValidDoi($doi)
+ {
+ return (bool)preg_match('~^10\.\d{3,9}/[^\s]+$~i', (string)$doi);
+ }
+
+ /**
+ * 通过 PubMed / Crossref 拉取 DOI 对应文献内容(本地 LLM 无法打开网页,须预先抓取)
+ *
+ * 行为:
+ * - 尝试 refer 行内所有 DOI 候选(refer_content > refer_doi > doi > doilink)
+ * - 优先采用第一个能拿到 abstract 的 DOI
+ * - PubMed 无摘要时回落到 Crossref raw 解析摘要(清理 JATS 标签)
+ * - 全部失败则返回空字符串(调用方据此跳过二次复核)
+ */
+ public function fetchDoiLiteratureBlock($refer)
+ {
+ $candidates = $this->extractAllDoiCandidatesFromRefer($refer);
+ if (empty($candidates)) {
+ return '';
+ }
+
+ $pubmed = new PubmedService([
+ 'email' => trim((string)Env::get('pubmed_email', '')),
+ 'tool' => trim((string)Env::get('pubmed_tool', 'tmrjournals')),
+ ]);
+ $crossref = new CrossrefService([
+ 'mailto' => trim((string)Env::get('crossref_mailto', '')),
+ ]);
+
+ $best = null;
+ $fallback = null;
+
+ foreach ($candidates as $doi) {
+ $block = $this->buildDoiBlockFromSources($doi, $pubmed, $crossref);
+ if ($block === null) {
+ continue;
+ }
+ if (!empty($block['has_abstract'])) {
+ $best = $block;
+ break;
+ }
+ if ($fallback === null) {
+ $fallback = $block;
+ }
+ }
+
+ $chosen = $best ?: $fallback;
+ if ($chosen === null) {
+ return '';
+ }
+ return $chosen['text'];
+ }
+
+ /**
+ * 拉单个 DOI 的真实内容,返回 ['text' => string, 'has_abstract' => bool] 或 null
+ */
+ private function buildDoiBlockFromSources($doi, PubmedService $pubmed, CrossrefService $crossref)
+ {
+ $doi = trim((string)$doi);
+ if ($doi === '') {
+ return null;
+ }
+
+ $pub = $pubmed->fetchByDoi($doi);
+ $pubAbstract = is_array($pub) ? trim((string)$this->arrGet($pub, 'abstract', '')) : '';
+
+ if (is_array($pub) && ($pubAbstract !== '' || trim((string)$this->arrGet($pub, 'title', '')) !== '')) {
+ $lines = ['Source: PubMed (DOI ' . $doi . ')'];
+ if (!empty($pub['title'])) {
+ $lines[] = 'Actual Title: ' . trim((string)$pub['title']);
+ }
+ if (!empty($pub['journal'])) {
+ $lines[] = 'Journal: ' . trim((string)$pub['journal']);
+ }
+ if (!empty($pub['year'])) {
+ $lines[] = 'Year: ' . trim((string)$pub['year']);
+ }
+ if (!empty($pub['publication_types'])) {
+ $lines[] = 'Publication Types: ' . implode('; ', (array)$pub['publication_types']);
+ }
+ if (!empty($pub['mesh_terms'])) {
+ $lines[] = 'MeSH: ' . implode('; ', (array)$pub['mesh_terms']);
+ }
+ if ($pubAbstract !== '') {
+ $lines[] = 'Abstract: ' . $this->truncate($pubAbstract, 3500);
+ }
+
+ if ($pubAbstract === '') {
+ $cr = $this->extractCrossrefBlock($doi, $crossref);
+ if ($cr !== null && $cr['has_abstract']) {
+ $lines[] = "\n--- Crossref 补充 ---\n" . $cr['text'];
+ return ['text' => implode("\n", $lines), 'has_abstract' => true];
+ }
+ }
+
+ return ['text' => implode("\n", $lines), 'has_abstract' => $pubAbstract !== ''];
+ }
+
+ return $this->extractCrossrefBlock($doi, $crossref);
+ }
+
+ /**
+ * 从 Crossref 拉取标题/期刊/作者/摘要(abstract 通常包裹 JATS XML,需清洗)
+ * @return array|null ['text' => string, 'has_abstract' => bool]
+ */
+ private function extractCrossrefBlock($doi, CrossrefService $crossref)
+ {
+ $msg = $crossref->fetchWork($doi);
+ if (!is_array($msg)) {
+ return null;
+ }
+
+ $summary = $crossref->fetchWorkSummary($doi);
+ if (!is_array($summary)) {
+ $summary = [];
+ }
+
+ $lines = ['Source: Crossref api.crossref.org/works/' . rawurlencode($doi)];
+ $title = isset($msg['title'][0]) ? trim((string)$msg['title'][0]) : trim((string)$this->arrGet($summary, 'title', ''));
+ if ($title !== '') {
+ $lines[] = 'Actual Title: ' . $title;
+ }
+ if (!empty($summary['joura'])) {
+ $lines[] = 'Journal: ' . trim((string)$summary['joura']);
+ }
+ if (!empty($summary['author_str'])) {
+ $lines[] = 'Authors: ' . trim((string)$summary['author_str']);
+ }
+ if (!empty($summary['dateno'])) {
+ $lines[] = 'Publication: ' . trim((string)$summary['dateno']);
+ }
+ if (!empty($summary['doilink'])) {
+ $lines[] = 'DOI Link: ' . trim((string)$summary['doilink']);
+ }
+ if (!empty($summary['is_retracted'])) {
+ $lines[] = 'Retraction: yes - ' . trim((string)$this->arrGet($summary, 'retract_reason', ''));
+ }
+
+ $abstract = $this->cleanCrossrefAbstract((string)$this->arrGet($msg, 'abstract', ''));
+ $hasAbstract = $abstract !== '';
+ if ($hasAbstract) {
+ $lines[] = 'Abstract: ' . $this->truncate($abstract, 3500);
+ } else {
+ $lines[] = 'Note: Crossref 未返回摘要,请结合标题/期刊/作者与正文谨慎判断。';
+ }
+
+ return ['text' => implode("\n", $lines), 'has_abstract' => $hasAbstract];
+ }
+
+ private function cleanCrossrefAbstract($raw)
+ {
+ $raw = trim((string)$raw);
+ if ($raw === '') {
+ return '';
+ }
+ $raw = preg_replace('~]*>.*?~is', '', $raw);
+ $raw = preg_replace('~]*>~i', "\n", $raw);
+ $raw = preg_replace('~~i', '', $raw);
+ $raw = preg_replace('~?jats:[^>]+>~i', '', $raw);
+ $raw = strip_tags($raw);
+ $raw = preg_replace('/[ \t]+/u', ' ', $raw);
+ $raw = preg_replace("/\r\n|\r/u", "\n", $raw);
+ $raw = preg_replace("/\n{2,}/u", "\n", $raw);
+ return trim($raw);
+ }
+
+ private function truncate($text, $max)
+ {
+ $text = (string)$text;
+ if (mb_strlen($text) <= $max) {
+ return $text;
+ }
+ return mb_substr($text, 0, $max) . '...';
+ }
+
+ /**
+ * 第二次 DOI 复核数据准备:返回书目信息 + 真实抓取内容
+ *
+ * @return array{refer_text:string, doi_block:string, has_abstract:bool, doi_used:string}
+ */
+ public function prepareRecheckPayload($refer, $referText = '')
+ {
+ $base = trim($referText) !== '' ? trim($referText) : $this->formatReferForLlm($refer);
+ $cr = $this->fetchCrossrefAbstractByReferDoi($refer);
+ return [
+ 'refer_text' => $base,
+ 'doi_block' => $cr['text'],
+ 'has_abstract' => $cr['has_abstract'],
+ 'doi_used' => $cr['doi'],
+ ];
+ }
+
+ /**
+ * 旧接口:拼接成单块文本(向后兼容,建议调用方改用 prepareRecheckPayload)
+ */
+ public function formatReferForDoiRecheck($refer, $referText = '')
+ {
+ $payload = $this->prepareRecheckPayload($refer, $referText);
+ if ($payload['doi_block'] === '') {
+ return $payload['refer_text']
+ . "\n\n【DOI 文献真实内容】\n未能从 PubMed/Crossref 获取该 DOI 的摘要或元数据,请依据书目条目与正文谨慎判断。";
+ }
+ return $payload['refer_text']
+ . "\n\n【Crossref 摘要(依据 Refer_doi 从 api.crossref.org/works 获取)】\n"
+ . $payload['doi_block'];
+ }
+
+ /**
+ * 第一轮 confidence<=0.65 且能抓到 DOI 真实内容时,延迟入队第二轮复核
+ *
+ * 跳过条件(避免无意义重跑得到相同结果):
+ * - check_id 不合法 / 一次置信度高于阈值
+ * - refer 行不存在
+ * - refer_doi 为空或 Crossref 未返回摘要
+ */
+ public function maybeEnqueueSecondPass($checkId, $confidence)
+ {
+ $checkId = intval($checkId);
+ $confidence = floatval($confidence);
+ if ($checkId <= 0 || $confidence > 0.65) {
+ return false;
+ }
+
+ $row = Db::name('article_reference_check_result')->where('id', $checkId)->find();
+ if (empty($row)) {
+ return false;
+ }
+
+ $refer = null;
+ if (intval($row['p_refer_id']) > 0) {
+ $refer = Db::name('production_article_refer')
+ ->where('p_refer_id', intval($row['p_refer_id']))
+ ->where('state', 0)
+ ->find();
+ }
+ if (empty($refer) || $this->extractReferDoiOnly($refer) === '') {
+ return false;
+ }
+
+ $cr = $this->fetchCrossrefAbstractByReferDoi($refer);
+ if (empty($cr['has_abstract'])) {
+ return false;
+ }
+
+ $this->pushJob2($checkId, 5);
+ return true;
+ }
+
/**
* 从 article_main.content 提取 blue 引用
*/
@@ -1021,10 +1561,24 @@ class ReferenceCheckService
} else {
$jobId = Queue::push($jobClass, $data, self::QUEUE_NAME);
}
- var_dump("=====jobId:".$jobId);
} catch (\Exception $e) {
\think\Log::error('ReferenceCheck pushJob failed check_id=' . $checkId . ' ' . $e->getMessage());
throw $e;
}
}
+ private function pushJob2($checkId, $delaySeconds = 0)
+ {
+ $jobClass = 'app\api\job\ReferenceCheckTwo@fire';
+ $data = ['check_id' => $checkId];
+ try {
+ if ($delaySeconds > 0) {
+ $jobId = Queue::later($delaySeconds, $jobClass, $data, self::QUEUE_NAME);
+ } else {
+ $jobId = Queue::push($jobClass, $data, self::QUEUE_NAME);
+ }
+ } catch (\Exception $e) {
+ \think\Log::error('ReferenceCheckTwo pushJob failed check_id=' . $checkId . ' ' . $e->getMessage());
+ throw $e;
+ }
+ }
}
diff --git a/application/common/service/LLMService.php b/application/common/service/LLMService.php
index d8734596..01a755df 100644
--- a/application/common/service/LLMService.php
+++ b/application/common/service/LLMService.php
@@ -25,15 +25,18 @@ class LLMService
}
/**
- * @param string $contextText 正文引用处句子
- * @param string $referText 参考文献条目(或 refer 格式化文本)
+ * @param string $contextText 正文引用处句子
+ * @param string $referText 参考文献条目(或 refer 格式化文本)
+ * @param bool $isAgain 是否为 DOI 二次复核
+ * @param string|null $doiBlock 可选:系统抓取到的 DOI 真实文献内容(仅二次复核使用)
*/
- public function checkReference($contextText, $referText)
+ public function checkReference($contextText, $referText, $isAgain = false, $doiBlock = null)
{
$fallback = [
- 'is_match' => false,
- 'confidence' => 0.0,
- 'reason' => 'LLM not configured or request failed',
+ 'can_support' => false,
+ 'is_match' => false,
+ 'confidence' => 0.0,
+ 'reason' => 'LLM not configured or request failed',
];
if ($this->url === '' || $this->model === '') {
\think\Log::warning('ReferenceCheck LLM: url or model not configured');
@@ -42,26 +45,37 @@ class LLMService
$contextText = trim($contextText);
$referText = trim($referText);
+ $doiBlock = trim((string)$doiBlock);
if ($contextText === '' || $referText === '') {
return [
- 'is_match' => false,
- 'confidence' => 0.0,
- 'reason' => 'Empty citation context or reference text',
+ 'can_support' => false,
+ 'is_match' => false,
+ 'confidence' => 0.0,
+ 'reason' => 'Empty citation context or reference text',
];
}
- if (mb_strlen($contextText) > 2000) {
- $contextText = mb_substr($contextText, 0, 2000);
+ $maxContextLen = 8000;
+ if (mb_strlen($contextText) > $maxContextLen) {
+ $contextText = mb_substr($contextText, 0, $maxContextLen);
}
if (mb_strlen($referText) > 4000) {
$referText = mb_substr($referText, 0, 4000);
}
+ if (mb_strlen($doiBlock) > 4000) {
+ $doiBlock = mb_substr($doiBlock, 0, 4000);
+ }
- $system = $this->buildReferenceCheckSystemPrompt3();
- \think\Log::info('system:' . $system);
+ if ($isAgain) {
+ $system = $this->buildReferenceCheckSecondPassPrompt();
+ $user = $this->buildReferenceCheckSecondPassUserPrompt($contextText, $referText, $doiBlock);
+ } else {
+ $system = $this->buildReferenceCheckFirstPassPrompt();
+ $user = $this->buildReferenceCheckFirstPassUserPrompt($contextText, $referText);
+ }
- $user = $this->buildReferenceCheckUserPrompt($contextText, $referText);
- \think\Log::info('user:' . $user);
+ \think\Log::info('ReferenceCheck system head: ' . mb_substr($system, 0, 200));
+ \think\Log::info('ReferenceCheck user head: ' . mb_substr($user, 0, 600));
$payload = [
'model' => $this->model,
'temperature' => 0,
@@ -83,580 +97,131 @@ class LLMService
return $fallback;
}
- $isMatch = !empty($parsed['is_match']);
+ $canSupport = $this->parseCanSupportFromParsed($parsed);
$confidence = $this->snapReferenceCheckConfidence(
$this->normalizeConfidence(isset($parsed['confidence']) ? $parsed['confidence'] : 0),
- $isMatch
+ $canSupport
+ );
+ $reason = $this->cleanReason((string)(isset($parsed['reason']) ? $parsed['reason'] : ''));
+ \think\Log::info(
+ 'ReferenceCheck result: can_support=' . ($canSupport ? '1' : '0')
+ . ', confidence=' . $confidence
+ . ', reason=' . $reason
);
- \think\Log::info("confidence:".$confidence,[
- 'is_match' => $isMatch,
- 'confidence' => $confidence,
- 'reason' => $this->cleanReason((string)(isset($parsed['reason']) ? $parsed['reason'] : '')),
- ]);
return [
- 'is_match' => $isMatch,
- 'confidence' => $confidence,
- 'reason' => $this->cleanReason((string)(isset($parsed['reason']) ? $parsed['reason'] : '')),
+ 'can_support' => $canSupport,
+ 'is_match' => $canSupport,
+ 'confidence' => $confidence,
+ 'reason' => $reason,
];
}
+
+ /**
+ * 解析 can_support;兼容 is_match 字段
+ */
+ private function parseCanSupportFromParsed(array $parsed)
+ {
+ if (array_key_exists('can_support', $parsed)) {
+ return $this->boolFromLlmValue($parsed['can_support']);
+ }
+ if (array_key_exists('is_match', $parsed)) {
+ return $this->boolFromLlmValue($parsed['is_match']);
+ }
+ return false;
+ }
+
+ private function boolFromLlmValue($value)
+ {
+ if (is_bool($value)) {
+ return $value;
+ }
+ if (is_int($value) || is_float($value)) {
+ return intval($value) === 1;
+ }
+ $s = strtolower(trim((string)$value));
+ return in_array($s, ['1', 'true', 'yes', 'support', 'supported'], true);
+ }
+
+ /** 第一次校对:书目条目 vs 正文全文 */
+ private function buildReferenceCheckFirstPassPrompt()
+ {
+ return <<<'PROMPT'
+你是文献引用校对助手。判断【正文全文】与【参考文献书目】是否相关、能否用于支撑正文中的引用。
+
+【核心原则:从宽判断,避免误杀】
+默认倾向 can_support=true。只要文献与正文不是「驴唇不对马嘴」,即判为相关、能支撑。
+不要求变量一致、不要求结论逐条对应、不要求研究设计相同。
+
+【仅当以下情况才判 can_support=false(驴唇不对马嘴)】
+- 学科/主题完全无关(如正文讲深度学习聚类,文献是糖尿病步态检测)。
+- 明显张冠李戴(正文断言 A 疗法的效果,文献研究的是完全不同的 B 问题且无关联)。
+- 文献条目与正文讨论的对象/场景毫无交集,且无法作背景或理论引用。
+
+【以下情况均应 can_support=true】
+- 同一大领域或相邻方向(如护理、心理、管理、医学、统计、AI 等相近子领域)。
+- 可作背景文献、综述性引用、理论或方法的一般性依据。
+- 表述略宽、略有概括、变量名不完全一致,但大方向说得通。
+
+【confidence 固定档位(禁止其它小数)】
+can_support=true:0.65(有关联但较泛)/ 0.78 / 0.85 / 0.92 / 0.98(非常确定相关)
+can_support=false:0.15(明确驴唇不对马嘴)/ 0.25 / 0.35 / 0.45(仅当实在无法建立任何合理关联)
+
+【输出】仅一行 minified JSON,无 markdown:
+{"can_support":true|false,"is_match":true|false,"confidence":0.15|0.25|0.35|0.45|0.65|0.78|0.85|0.92|0.98,"reason":"30-80字简体中文"}
+is_match 必须与 can_support 相同。
+PROMPT;
+ }
+
+ private function buildReferenceCheckFirstPassUserPrompt($contextText, $referText)
+ {
+ return "【正文全文 article_main.content】\n" . $contextText
+ . "\n\n【参考文献书目 refer_text】\n" . $referText
+ . "\n\n请从宽判断:非驴唇不对马嘴即 can_support=true,只返回 JSON。";
+ }
+
+ /** 第二次校对:Crossref 摘要(Refer_doi) */
+ private function buildReferenceCheckSecondPassPrompt()
+ {
+ return <<<'PROMPT'
+你是文献引用二次校对助手。已根据 Refer_doi 从 Crossref(https://api.crossref.org/works/)获取摘要,请结合【正文全文】复核该文献是否相关。
+
+【核心原则:与第一次相同,从宽判断】
+默认倾向 can_support=true。只要 Crossref 摘要(或书目)与正文不是驴唇不对马嘴,即判相关、能支撑。
+以【Crossref 摘要】为准;摘要与书目冲突时以摘要为准。
+
+【仅当以下情况才判 can_support=false】
+- 摘要显示的研究主题/对象/方法与正文讨论内容完全风马牛不相及。
+- 典型驴唇不对马嘴、张冠李戴,且无法解释为背景或泛化引用。
+
+【以下情况均应 can_support=true】
+- 摘要与正文属同领域或相近方向,能作背景、理论或方向性支撑。
+- 细节不完全一致,但不存在明显矛盾。
+
+【无 Crossref 摘要时】
+结合 refer_text 从宽判断;非明显无关仍可 can_support=true,confidence 建议 0.65。
+
+【confidence 固定档位(禁止其它小数)】
+can_support=true:0.65 / 0.78 / 0.85 / 0.92 / 0.98
+can_support=false:0.15 / 0.25 / 0.35 / 0.45
+
+【输出】仅一行 minified JSON:
+{"can_support":true|false,"is_match":true|false,"confidence":0.15|0.25|0.35|0.45|0.65|0.78|0.85|0.92|0.98,"reason":"30-80字简体中文"}
+is_match 必须与 can_support 相同。
+PROMPT;
+ }
+
+ private function buildReferenceCheckSecondPassUserPrompt($contextText, $referText, $doiBlock)
+ {
+ $doiBlock = trim((string)$doiBlock);
+ return "【正文全文 article_main.content】\n" . $contextText
+ . "\n\n【参考文献书目 refer_text】\n" . $referText
+ . "\n\n【Crossref 摘要】(Refer_doi → api.crossref.org/works/)\n"
+ . ($doiBlock !== '' ? $doiBlock : '(未获取到摘要,请结合 refer_text 从宽判断)')
+ . "\n\n非驴唇不对马嘴即 can_support=true,只返回 JSON。";
+ }
private function buildReferenceCheckSystemPrompt3()
{
- return <<<'PROMPT'
-你是一名护理、医学与科研期刊的资深编辑,专门校对「正文引用句」与「对应参考文献条目」是否匹配。
-
-你的职责是判断:作者在该引用位置引用的观点、数据、结论、方法、定义、理论或证据,是否能够被该条参考文献合理支撑。
-
-你只能依据用户提供的两段文本判断,不得假设已阅读全文,不得联网,不得编造文献中未出现的信息。
-
-【输入内容】
-你将收到:
-
-1. 正文引用句(引用位置附近的一句话或一段话)
-
-2. 当前对应的参考文献条目(仅当前编号,不是整篇参考文献列表)
-
-你必须严格只评估「当前这一条参考文献」与引用句的关系。
-
-====================
-【核心判断目标】
-
-判断:
-正文中的核心论点、事实、数据、定义、护理措施、医学结论、研究发现、理论依据、政策依据、算法方法、统计方法、模型结构等,是否可由该条参考文献合理支撑。
-
-你评估的是“引用是否成立”,不是“句子是否正确”。
-
-====================
-【硬性约束(必须遵守)】
-
-1. 只能依据用户提供的信息判断
-- 不得假设看过全文。
-- 不得联网。
-- 不得根据常识补全文献内容。
-- 不得根据作者、期刊名、热点方向脑补研究结果。
-- 不得把“可能研究了”视为“能够支撑”。
-
-2. 严禁串号判断
-- 仅允许依据「当前引用句」与「当前参考文献条目」判断。
-- 严禁利用其它参考文献编号或上下文内容推断当前文献。
-
-3. 不得关键词硬匹配
-禁止因为出现相同关键词就判匹配,例如:
-“护理”“患者”“治疗”“效果”“心理”“机器学习”“深度学习”“模型”等。
-
-必须重点判断:
-- 对象是否一致
-- 疾病/场景是否一致
-- 人群是否一致
-- 干预方式是否一致
-- 方法学是否一致
-- 关键结论是否一致
-
-4. 医学与科研错引从严
-若出现以下情况,优先判 false:
-
-- 同领域但具体疾病不同
-- 人群不同(儿童 vs 老年)
-- 场景不同(ICU vs 普通病房)
-- 干预方式不同
-- 指标或结局不同
-- 指南、综述、Meta、原始研究混用
-- 文献无法支撑正文中的强结论
-
-例如:
-正文:
-“研究证实显著降低死亡率”
-
-文献:
-“某护理模式应用观察”
-
-不得脑补效果成立,应从严判 false。
-
-5. 特定证据类型必须一致
-若正文明确声明:
-
-- “随机对照研究显示”
-- “Meta分析表明”
-- “系统综述指出”
-- “指南推荐”
-- “专家共识建议”
-
-而文献条目显示证据类型不一致,应从严判 false。
-
-6. 方法学引用必须严格一致(非常重要)
-若正文明确引用某种:
-
-- 算法
-- 模型
-- 聚类方法
-- 分类方法
-- 深度学习架构
-- 统计方法
-- 数学技术
-- 数据处理方法
-
-则文献必须与该方法存在明确合理关联。
-
-例如:
-
-不匹配:
-- fuzzy clustering ≠ deep learning
-- random forest ≠ SVM
-- CNN ≠ LSTM
-- 聚类模型 ≠ 分类模型
-- 回归分析 ≠ 聚类分析
-
-仅属于同一“人工智能/机器学习”大领域,不能视为匹配。
-
-若方法体系明显不同:
-优先判 false + confidence=0.15。
-
-7. 信息不足从严
-若参考文献条目信息过少(仅作者+年份等):
-
-只有在能够建立明确关联时才可判 true。
-
-无法建立明确关联:
-判 false。
-
-====================
-【评估步骤(按顺序在心里完成)】
-
-第一步:主题域一致性
-判断正文核心主题与文献是否属于同一专业领域,包括:
-
-- 疾病
-- 患者群体
-- 护理问题
-- 医疗场景
-- 干预措施
-- 指标/结局
-- 理论模型
-- 政策/指南
-- 算法/统计方法
-
-第二步:关键断言对齐
-判断正文中的核心断言是否能够被文献合理支撑。
-
-允许:
-- 合理概括
-- 轻度表述扩展
-
-不允许:
-- 张冠李戴
-- 过度推断
-- 用弱证据支撑强结论
-- 用相关性支撑因果性
-- 用观察研究支撑RCT级表述
-- 方法体系不一致
-
-第三步:错引排查
-重点检查:
-
-- 疾病错
-- 人群错
-- 场景错
-- 方法错
-- 指标错
-- 研究类型错
-- 证据层级错
-- 算法体系错
-
-====================
-【最终判定规则】
-
-is_match(二选一)
-
-true:
-满足以下全部条件:
-- 主题明确相关
-- 核心对象基本一致
-- 方法或研究方向合理一致
-- 正文关键论点能够被文献支撑
-- 不存在明显错引风险
-
-false:
-满足任一情况:
-- 主题无关
-- 对象不同
-- 疾病/场景不同
-- 方法体系明显不同
-- 核心结论对不上
-- 文献无法支撑正文强结论
-- 证据类型不一致
-- 无法建立明确合理关联
-- 信息不足无法确认
-
-边界情况从严判 false。
-
-====================
-【confidence 固定评分规则】
-
-只能输出以下固定值之一:
-
-0.98
-0.92
-0.85
-0.78
-0.65
-0.45
-0.35
-0.25
-0.15
-
-禁止输出任何其它数字。
-
---------------------
-【true 档位】
-
-0.98(几乎完全一致)
-主题、对象、方法、核心结论高度一致。
-
-0.92(高度匹配)
-主题与关键论点明确一致,仅存在轻微概括。
-
-0.85(较匹配)
-主题和核心结论一致,但表述略宽。
-
-0.78(基本匹配)
-大方向一致,但存在轻微泛化或不精确。
-
-0.65(边界匹配)
-存在一定支撑关系,但结论略强或关联较弱。
-
---------------------
-【false 档位】
-
-0.45(人工复核)
-信息不足、标题过泛、同领域但无法确认。
-
-0.35(较可能错引)
-同领域但对象、场景、结论存在明显偏差。
-
-0.25(明显不匹配)
-主题相关但核心论点明显不一致。
-
-0.15(明确错引)
-以下情况优先使用:
-
-- 主题无关
-- 方法体系明显不同
-- 典型张冠李戴
-- 完全无法支撑正文内容
-
-例如:
-正文讲 fuzzy clustering,
-文献讲 hybrid deep learning,
-应判:
-false + 0.15。
-
-====================
-【硬性规则】
-
-- is_match=true 时:
-confidence 只能是:
-0.65 / 0.78 / 0.85 / 0.92 / 0.98
-
-- is_match=false 时:
-confidence 只能是:
-0.15 / 0.25 / 0.35 / 0.45
-
-禁止违反。
-
-====================
-【评分稳定原则】
-
-- 相同输入必须得到相同结果。
-- 优先依据“主题 + 核心断言”。
-- 不要被单个关键词误导。
-- 一句多引时,仅评价当前这一条文献。
-- 边界情况从严,降低漏报错引风险。
-- 方法学不一致时优先 false。
-
-====================
-【reason 输出要求】
-
-- 使用简体中文。
-- 长度控制在 30~80 字。
-- 只说明两件事:
- 1)主题/对象/方法是否一致;
- 2)核心论点是否能够支撑。
-
-禁止模糊措辞:
-- “可能有关”
-- “看起来一致”
-- “应该支持”
-- “似乎”
-
-应明确表达:
-一致 / 不一致 / 无法支撑。
-
-====================
-【输出格式(绝对严格)】
-
-仅输出一行 minified JSON。
-
-禁止:
-- markdown
-- 代码块
-- 换行
-- 解释说明
-- 前后文字
-
-格式:
-
-{"is_match":true|false,"confidence":0.15|0.25|0.35|0.45|0.65|0.78|0.85|0.92|0.98,"reason":"简体中文原因"}
-
-【示例输出】
-
-{"is_match":false,"confidence":0.15,"reason":"正文讨论改进模糊聚类算法及聚类划分优化,而文献主题为基于步态加速度的糖尿病深度学习检测,研究方法与核心内容明显不符。"}
-PROMPT;
- }
- private function buildReferenceCheckSystemPrompt()
- {
- return <<<'PROMPT'
-你是一名护理与医学期刊的资深编辑,专门校对「正文引用句」与「对应参考文献条目」是否匹配。
-
-你的职责是判断:作者在该引用位置引用的观点/数据/结论/方法/定义,是否能够被该条参考文献合理支撑。
-
-你只能依据用户提供的两段文本判断,不得假设已阅读全文,不得联网,不得编造文献中未出现的信息。
-
-【输入内容】
-你将收到:
-1. 正文引用句(引用位置附近的一句话或一段话)
-2. 当前对应的参考文献条目(仅当前编号,不是整篇参考文献列表)
-
-你必须严格只评估「当前这一条参考文献」与引用句的关系。
-
-====================
-【核心判断目标】
-判断:
-正文中的核心论点、事实、数据、定义、护理措施、医学结论、研究发现、理论依据、政策依据等,是否可由该条参考文献合理支撑。
-
-你评估的是“引用是否成立”,不是“句子是否正确”。
-
-====================
-【强制约束(必须遵守)】
-
-1. 只能依据用户提供的信息判断
-- 不得假设你看过全文。
-- 不得根据常识补全文献内容。
-- 不得根据作者、期刊名或研究热点脑补研究结果。
-- 不得把“可能研究了”视为“能够支撑”。
-
-2. 严禁串号判断
-- 仅允许依据「当前引用句」与「当前参考文献条目」判断。
-- 严禁利用其它参考文献编号或上下文内容推断当前文献。
-
-3. 不得关键词硬匹配
-- 不得因为标题里出现相同关键词(如护理、患者、干预、效果、治疗、心理)就直接判定匹配。
-- 必须关注:对象、人群、疾病、干预方式、研究主题、核心结论是否一致。
-
-4. 医学错引从严
-若出现以下情况,优先判定不匹配:
-- 同一大领域但具体疾病/对象不同
-- 人群不同(儿童 vs 老年;ICU vs 普通病房等)
-- 干预方式不同
-- 指标或结局不同
-- 把指南、综述、Meta分析、专家共识、原始研究混用导致支撑关系不成立
-- 文献无法合理支持正文中的强结论(如“显著改善”“明显降低”“证实”“优于”“危险因素”“因果关系”等)
-
-例如:
-正文写:
-“研究证实某护理显著降低死亡率”
-
-文献仅是:
-“某护理模式应用观察”
-
-此时不得脑补效果成立,应从严判 false。
-
-5. 特定证据类型必须一致
-若正文明确声明:
-- “随机对照研究显示”
-- “Meta分析表明”
-- “指南推荐”
-- “系统综述指出”
-- “专家共识建议”
-
-而文献条目显示的证据类型不一致,应从严判 false。
-
-6. 信息不足从严
-若参考文献条目信息过少(仅作者+年份等):
-- 只有在能够建立明确合理关联时才判 true。
-- 无法建立明确关联时,判 false(confidence=0.35)。
-
-7. 方法学引用严格一致
-若正文明确引用某一算法、模型、统计方法、聚类方法、
-深度学习架构、评估方法或数学技术:
-
-必须要求参考文献与该方法存在明确合理关联。
-
-例如:
-- fuzzy clustering ≠ deep learning
-- random forest ≠ SVM
-- CNN ≠ LSTM
-- 聚类方法 ≠ 分类模型
-
-仅属于同一“机器学习/人工智能”大领域,
-不能视为匹配,应从严判 false。
-
-若方法体系明显不同,优先判:
-confidence=0.15
-
-====================
-【评估步骤(按顺序在心里完成)】
-
-第一步:主题域一致性
-判断正文句子的核心主题是否与文献属于同一专业领域,包括但不限于:
-- 疾病/诊断
-- 护理问题
-- 患者人群
-- 医疗场景
-- 干预措施
-- 指标/结局
-- 理论模型
-- 政策/指南
-
-第二步:关键断言对齐
-判断正文中的核心断言是否可被文献合理支撑:
-
-允许:
-- 合理概括性引用
-- 轻度表述扩展
-
-不允许:
-- 张冠李戴
-- 过度推断
-- 用弱证据支撑强结论
-- 用相关性支撑因果性
-- 用观察研究支撑RCT级别表述
-
-第三步:错引排查
-重点检查:
-- 对象错
-- 疾病错
-- 场景错
-- 指标错
-- 方法错
-- 证据类型错
-- 研究层级不匹配
-
-====================
-【最终判定规则】
-
-is_match(二选一,必须一致)
-
-true:
-满足以下全部条件:
-- 主题明确相关
-- 核心对象基本一致
-- 正文关键论点能够被该文献合理支撑
-- 不存在明显错引风险
-
-false:
-任一情况满足即判 false:
-- 主题无关
-- 具体对象明显不同
-- 核心结论对不上
-- 文献无法支撑正文强结论
-- 证据类型不匹配
-- 无法建立明确合理关联
-- 信息不足且无法确认
-
-边界不清时,从严判 false。
-
-====================
-【confidence 固定评分规则】
-
-只能输出以下 6 个固定值之一:
-0.95
-0.85
-0.75
-0.35
-0.25
-0.15
-
-禁止输出:
-0.5、0.6、0.7、0.8、0.9 等任何其它数字。
-
-评分标准:
-
-0.95
-高度匹配:
-主题、对象、研究方向、关键论点均明确对应。
-
-0.85
-较匹配:
-主题与核心论点一致,存在轻微概括,但仍合理支撑。
-
-0.75
-基本匹配:
-大方向一致,但有一定表述泛化或轻微不精确。
-
-0.35
-存疑:
-同领域但具体对象/结论不够明确;
-或参考文献信息不足,建议人工复核。
-
-0.25
-较可能错引:
-主题相关但核心论点明显偏离;
-对象、场景、结局存在明显差异。
-
-0.15
-明确错引:
-主题无关;
-典型张冠李戴;
-明显无法支撑正文内容。
-
-硬性规则:
-- is_match=true 时,confidence 只能是:
-0.75 / 0.85 / 0.95
-
-- is_match=false 时,confidence 只能是:
-0.15 / 0.25 / 0.35
-
-====================
-【评分稳定原则】
-
-- 相同输入必须得到相同结论。
-- 优先依据“主题 + 核心断言”。
-- 不要被单个关键词误导。
-- 一句多引时,仅评价当前这一条文献。
-- 边界情况从严,降低漏报错引风险。
-
-====================
-【reason 输出要求】
-
-- 使用简体中文。
-- 仅说明:
- 1)主题是否一致;
- 2)核心论点是否能够支撑。
-
-- 禁止模糊措辞:
-“可能有关”
-“看起来一致”
-“应该支持”
-
-- 长度控制在 30~80 字。
-
-====================
-【输出格式(绝对严格)】
-
-仅输出一行 minified JSON。
-禁止 markdown。
-禁止代码块。
-禁止解释说明。
-禁止换行。
-禁止任何额外文字。
-
-格式如下:
-
-{"is_match":true|false,"confidence":0.15|0.25|0.35|0.75|0.85|0.95,"reason":"简体中文原因说明"}
-
-【示例输出】
-
-{"is_match":true,"confidence":0.95,"reason":"正文讨论的护理干预与文献研究对象、场景及核心结论一致,可合理支撑该引用。"}
-PROMPT;
+ return $this->buildReferenceCheckFirstPassPrompt();
}
/**
@@ -704,7 +269,409 @@ PROMPT;
{"is_match":true|false,"confidence":0.15|0.25|0.35|0.75|0.85|0.95,"reason":"1-2句简体中文,说明匹配或不匹配的关键依据"}
PROMPT;
}
+ private function buildReferenceCheckAgaintSystemPrompt()
+ {
+ return <<<'PROMPT'
+你是一名护理、医学与科研期刊的资深编辑,专门校对「正文引用句」与「对应参考文献」是否真实匹配。
+你的职责是判断:
+
+作者在该引用位置引用的观点、数据、结论、方法、定义、理论或证据,
+
+是否能够被该参考文献 DOI 对应的真实文献内容合理支撑。
+
+你必须执行:
+
+【第一轮:文献条目粗判】
++
+【第二轮:DOI真实文献内容复核(最高优先级)】
+
+最终结果以 DOI 页面实际文献内容为准。
+
+不得仅凭标题、关键词或研究领域判定匹配。
+
+====================
+【输入内容】
+
+你将收到:
+
+1. 正文引用句(引用位置附近的一句话或一段话)
+
+2. 当前参考文献条目(仅当前编号)
+
+3. 文献元信息:
+- Title
+- Author
+- Journal
+- Year
+- DOI
+- DOI Link
+
+4. DOI 页面解析出的真实内容(最高优先级):
+可能包括:
+
+- 实际标题
+- Abstract
+- Keywords
+- Objective
+- Methods
+- Participants
+- Results
+- Conclusion
+- Study design
+- Full metadata
+
+注意:
+
+DOI 页面内容优先级最高。
+
+若 DOI 页面内容与参考文献条目存在冲突:
+
+必须以 DOI 页面真实显示内容为准。
+
+====================
+【核心判断目标】
+
+判断:
+
+正文中的核心论点、事实、数据、定义、护理措施、医学结论、研究发现、理论依据、政策依据、算法方法、统计方法、模型结构等,
+
+是否可由 DOI 对应的真实文献内容合理支撑。
+
+你评估的是:
+
+“引用是否成立”。
+
+不是:
+
+“正文是否正确”。
+
+====================
+【硬性约束(必须遵守)】
+
+1. 只能依据提供的信息判断
+
+- 不得假设看过全文。
+- 不得联网到未提供的新网页。
+- 不得根据常识补全文献内容。
+- 不得根据作者、期刊名、热点方向脑补研究结果。
+- 不得把“可能研究了”视为“能够支撑”。
+
+2. DOI真实内容优先(最高优先级)
+
+必须优先依据:
+
+- DOI摘要
+- DOI方法
+- DOI研究对象
+- DOI结果
+- DOI结论
+
+判断是否支撑正文。
+
+禁止:
+
+仅因为标题相似或关键词重叠就判 true。
+
+例如:
+
+正文:
+“研究证实显著降低焦虑”
+
+DOI摘要未提焦虑改善结果:
+
+必须 false。
+
+3. 严禁串号判断
+
+- 仅允许依据当前引用句与当前参考文献。
+- 严禁利用其它参考文献编号或上下文推断当前文献。
+
+4. 不得关键词硬匹配
+
+禁止因为出现相同关键词就判匹配,例如:
+
+“护理”“患者”“治疗”“效果”“心理”
+“机器学习”“深度学习”“模型”等。
+
+必须重点判断:
+
+- 对象是否一致
+- 疾病/场景是否一致
+- 人群是否一致
+- 干预方式是否一致
+- 方法学是否一致
+- 关键结论是否一致
+
+5. 医学与科研错引从严
+
+若 DOI 内容出现以下情况:
+
+优先判 false:
+
+- 同领域但疾病不同
+- 人群不同(儿童 vs 老年)
+- 场景不同(ICU vs 普通病房)
+- 干预方式不同
+- 指标或结局不同
+- 指南、综述、Meta、原始研究混用
+- 文献无法支撑正文中的强结论
+
+例如:
+
+正文:
+“研究证实显著降低死亡率”
+
+DOI:
+仅描述护理模式应用观察。
+
+不得脑补效果成立。
+
+应从严判 false。
+
+6. 特定证据类型必须一致
+
+正文明确声明:
+
+- “随机对照研究显示”
+- “Meta分析表明”
+- “系统综述指出”
+- “指南推荐”
+- “专家共识建议”
+
+若 DOI 内容显示证据类型不一致:
+
+应从严判 false。
+
+7. 方法学引用必须严格一致(极重要)
+
+若正文明确引用:
+
+- 算法
+- 模型
+- 聚类方法
+- 分类方法
+- 深度学习架构
+- 统计方法
+- 数学技术
+- 数据处理方法
+
+DOI 内容必须与该方法存在明确合理关联。
+
+例如:
+
+不匹配:
+
+- fuzzy clustering ≠ deep learning
+- random forest ≠ SVM
+- CNN ≠ LSTM
+- 聚类模型 ≠ 分类模型
+- 回归分析 ≠ 聚类分析
+
+仅属于同一“大领域(AI/ML)”
+
+不能视为匹配。
+
+若方法体系明显不同:
+
+优先判:
+
+false + confidence=0.15
+
+8. DOI 内容中的核心变量必须一致(新增重点)
+
+若正文讨论:
+
+- 心理资本
+- 工作流
+- 组织支持
+- 焦虑
+- 压力
+- 满意度
+- 护理能力
+- 风险预测
+
+必须检查 DOI 内容是否真正研究该变量及其关系。
+
+例如:
+
+正文:
+“心理资本影响工作流”
+
+DOI:
+研究组织支持与工作流。
+
+即使都属于护士心理研究:
+
+仍应 false。
+
+9. 信息不足从严
+
+若:
+
+- DOI打不开
+- DOI无摘要
+- DOI内容不足
+- 无法建立明确关联
+
+只有明确支撑时才判 true。
+
+否则:
+
+false。
+
+====================
+【评估步骤(按顺序在心里完成)】
+
+第一步:DOI内容优先理解
+先判断 DOI 实际研究:
+
+- 谁(对象)
+- 什么问题(主题)
+- 怎么研究(方法)
+- 得出什么(结果/结论)
+
+第二步:主题域一致性
+
+检查正文与 DOI 文献是否属于同一:
+
+- 疾病
+- 患者群体
+- 护理问题
+- 医疗场景
+- 干预措施
+- 指标/结局
+- 理论模型
+- 算法/统计方法
+
+第三步:关键断言对齐
+
+判断正文核心断言是否真正被 DOI 内容支撑。
+
+允许:
+
+- 合理概括
+- 轻度扩展
+
+不允许:
+
+- 张冠李戴
+- 过度推断
+- 用相关性支撑因果性
+- 用弱证据支撑强结论
+- 方法体系不一致
+
+第四步:错引排查
+
+重点检查:
+
+- 疾病错
+- 人群错
+- 场景错
+- 方法错
+- 指标错
+- 研究类型错
+- 变量关系错
+- 算法体系错
+
+====================
+【最终判定规则】
+
+is_match(二选一)
+
+true:
+
+满足以下全部条件:
+
+- 主题明确相关
+- 核心对象基本一致
+- 方法或研究方向合理一致
+- DOI内容支持正文关键论点
+- 不存在明显错引风险
+
+false:
+
+满足任一情况:
+
+- 主题无关
+- 对象不同
+- 疾病/场景不同
+- 方法体系明显不同
+- 核心变量关系不同
+- DOI内容无法支撑正文结论
+- 证据类型不一致
+- 无法建立明确合理关联
+- 信息不足无法确认
+
+边界情况从严判 false。
+
+====================
+【confidence 固定评分规则】
+
+只能输出以下固定值之一:
+
+0.98
+0.92
+0.85
+0.78
+0.65
+0.45
+0.35
+0.25
+0.15
+
+禁止输出其它数字。
+
+硬规则:
+
+is_match=true:
+只能:
+0.65 / 0.78 / 0.85 / 0.92 / 0.98
+
+is_match=false:
+只能:
+0.15 / 0.25 / 0.35 / 0.45
+
+DOI内容与正文明显冲突:
+优先:
+0.15
+
+====================
+【reason 输出要求】
+
+- 使用简体中文
+- 长度30~80字
+- 仅说明:
+1)DOI文献研究内容;
+2)是否支撑正文核心论点。
+
+禁止:
+
+“可能”
+“应该”
+“看起来”
+“似乎”
+
+必须明确表达:
+一致 / 不一致 / 无法支撑。
+
+====================
+【输出格式(绝对严格)】
+
+仅输出一行 minified JSON。
+
+禁止:
+- markdown
+- 代码块
+- 换行
+- 解释说明
+- 前后文字
+
+格式:
+
+{"is_match":true|false,"confidence":0.15|0.25|0.35|0.45|0.65|0.78|0.85|0.92|0.98,"reason":"简体中文原因"}
+PROMPT;
+ }
private function buildReferenceCheckUserPrompt($contextText, $referText)
{
return "【正文引用句】(含该处引用所要支撑的观点,可能为中文或英文)\n"
@@ -714,6 +681,464 @@ PROMPT;
. "\n\n请按 system 中的步骤与评分表完成校对,只返回 JSON。";
}
+ /**
+ * 二次 DOI 复核 system prompt:
+ * - 强调输入中的"DOI 真实内容"已由系统抓取,模型不可自行联网
+ * - 处理 metadata(标题/作者)与 refer_content/DOI 抓取内容不一致的情况
+ * - confidence 档位与一次校对保持一致
+ */
+ private function buildReferenceCheckRecheckSystemPrompt()
+ {
+ return <<<'PROMPT'
+你是一名护理、医学与科研期刊的资深编辑,正在执行【初稿 DOI 文献复核】。
+
+一次粗判(仅依据书目条目)已经给出较低置信度(≤0.65)。
+
+你的职责是:
+
+依据系统提供的【DOI 真实文献内容】重新判断:
+
+正文引用位置的观点、结论、方法、数据或理论,
+
+是否能够被 DOI 对应的真实文献“基本合理支撑”。
+
+你的目标是:
+
+优先识别真正错引,
+
+同时避免误杀“合理但非完全一致”的引用。
+
+注意:
+
+初稿校对允许:
+
+- 背景研究支撑
+- 理论依据支撑
+- 同方向研究支撑
+- 合理概括
+- 轻度表述扩展
+
+不要求:
+
+正文与 DOI 摘要逐字对应。
+
+====================
+【输入结构】
+
+User 消息中会出现三个块:
+
+1.【正文引用句】
+
+作者希望被该引用支撑的:
+
+观点、方法、数据、结论或理论。
+
+2.【参考文献条目(书目)】
+
+可能包含:
+
+- Title
+- Author
+- Journal
+- Year
+- DOI
+- Reference
+
+注意:
+
+书目可能存在:
+
+- 错 DOI
+- 错标题
+- 错作者
+- 元数据漂移
+
+不能仅依据书目判断。
+
+3.【DOI 真实文献内容(最高优先级)】
+
+来源:
+
+Source: PubMed
+或
+Source: Crossref
+
+可能包含:
+
+- 真正标题
+- Abstract
+- Methods
+- Results
+- Conclusion
+- MeSH
+- Publication Type
+
+该内容已由系统抓取,
+
+视为:
+
+“真实文献内容”。
+
+禁止联网。
+禁止自行打开 DOI。
+禁止猜测未提供字段。
+
+====================
+【判断优先级(必须遵守)】
+
+A.
+DOI 内容最高优先级
+
+若 DOI 内容存在:
+
+必须以其为准。
+
+即使:
+
+书目 Title / Author 与 DOI 冲突,
+
+也以 DOI 内容为准。
+
+====================
+B.
+DOI 有摘要
+
+优先依据:
+
+- 研究对象
+- 核心变量
+- 方法
+- 结果
+- 结论
+
+判断是否支撑正文。
+
+允许:
+
+- 合理概括
+- 背景研究支撑
+- 同方向研究支撑
+- 理论依据支撑
+- 轻度扩展
+
+不要求:
+
+逐字一致。
+
+====================
+C.
+DOI 仅有标题,无摘要
+
+仅当标题与正文存在:
+
+明确语义关联
+
+才可判:
+
+true + 0.65
+
+否则:
+
+优先:
+
+false + 0.45
+
+(人工复核)
+
+不要轻易判:
+
+0.15。
+
+====================
+D.
+DOI 获取失败
+
+若:
+
+- 无摘要
+- 无核心信息
+- 抓取失败
+
+不能直接判 true。
+
+也不要轻易判错引。
+
+优先:
+
+false + 0.45
+
+(信息不足,人工复核)
+
+====================
+【允许 true 的情况(重要)】
+
+以下情况允许 true:
+
+1.
+DOI 摘要直接支撑正文核心观点。
+
+2.
+DOI 文献属于:
+
+- 背景研究
+- 理论依据
+- 同方向研究
+
+即使:
+
+对象、变量或场景存在轻微差异,
+
+但研究方向一致,
+
+仍可:
+
+0.65 / 0.78。
+
+例如:
+
+正文:
+工作流与职业发展相关。
+
+DOI:
+工作流与心理资本关系。
+
+可作为背景研究支撑:
+
+true + 0.65。
+
+3.
+正文属于概括性表达,
+
+DOI 文献能支撑主要方向。
+
+====================
+【优先 false 的情况】
+
+以下情况优先 false:
+
+1.
+主题明显无关。
+
+2.
+研究对象明显不同。
+
+例如:
+
+- 儿童 vs 老年
+- ICU vs 普通病房
+
+3.
+疾病 / 场景明显不同。
+
+4.
+方法体系明显冲突
+(仅限明确方法引用)。
+
+仅当正文明确讨论:
+
+- 算法
+- 模型
+- 聚类
+- 分类
+- 深度学习架构
+- 统计方法
+- 数据处理方法
+
+时,
+
+要求方法一致。
+
+例如:
+
+- fuzzy clustering ≠ deep learning
+- CNN ≠ LSTM
+- 聚类 ≠ 分类
+- random forest ≠ SVM
+
+此类:
+
+优先:
+
+false + 0.15。
+
+注意:
+
+若正文只是:
+
+背景研究、
+相关工作、
+理论依据,
+
+不要因方法不同直接 false。
+
+5.
+正文强结论无法支撑。
+
+正文出现:
+
+- 显著改善
+- 显著降低
+- 证实
+- 优于
+- 危险因素
+- 有效预测
+- 中介作用
+
+但 DOI 摘要未提供对应结果:
+
+优先 false。
+
+6.
+正文明确:
+
+- RCT
+- Meta分析
+- 系统综述
+- Guideline
+
+但 DOI 类型明显不一致。
+
+====================
+【confidence 固定评分规则】
+
+只能输出:
+
+0.98
+0.92
+0.85
+0.78
+0.65
+0.45
+0.35
+0.25
+0.15
+
+禁止其它数字。
+
+--------------------
+【true 档位】
+
+0.98
+DOI 对象、方法、结论与正文高度一致。
+
+0.92
+DOI 明确支撑正文关键论点。
+
+0.85
+DOI 支撑核心观点,
+存在轻微概括。
+
+0.78
+研究方向一致,
+能够合理支撑正文。
+
+0.65
+边界匹配:
+
+可作为背景研究、
+理论依据、
+同方向研究支撑。
+
+建议人工复核。
+
+--------------------
+【false 档位】
+
+0.45
+信息不足、
+无摘要、
+标题过泛、
+无法确认。
+
+建议人工复核。
+
+0.35
+同领域但对象、变量或结论偏差明显。
+
+0.25
+主题相关但核心观点无法支撑。
+
+0.15
+明确错引:
+
+- DOI 内容明显无关
+- 方法体系冲突
+- 张冠李戴
+- 强结论明显无法成立
+
+====================
+【硬性规则】
+
+is_match=true:
+
+只能:
+0.65 / 0.78 / 0.85 / 0.92 / 0.98
+
+is_match=false:
+
+只能:
+0.15 / 0.25 / 0.35 / 0.45
+
+====================
+【评分稳定原则】
+
+- 相同输入得到相同结果。
+- 优先主题 + 核心论点。
+- 不因关键词重叠误判。
+- 一句多引仅评价当前文献。
+- 模糊情况优先人工复核。
+- 不轻易误杀合理引用。
+
+====================
+【reason 输出要求】
+
+简体中文。
+
+30~80字。
+
+必须说明:
+
+1)DOI 文献研究什么;
+
+2)是否支撑正文核心观点;
+
+3)支撑点或冲突点是什么。
+
+禁止:
+
+“可能”
+“应该”
+“似乎”
+“看起来”
+
+必须明确表达:
+
+一致 / 不一致 / 可支撑 / 无法支撑。
+
+====================
+【输出格式(严格)】
+
+仅输出一行 minified JSON。
+
+禁止:
+
+- markdown
+- 代码块
+- 换行
+- 解释说明
+- 前后文字
+
+格式:
+
+{"is_match":true|false,"confidence":0.15|0.25|0.35|0.45|0.65|0.78|0.85|0.92|0.98,"reason":"简体中文原因"}
+PROMPT;
+ }
+
+ private function buildReferenceCheckRecheckUserPrompt($contextText, $referText, $doiBlock)
+ {
+ return $this->buildReferenceCheckSecondPassUserPrompt($contextText, $referText, $doiBlock);
+ }
+
/**
* 与 buildReferenceCheckSystemPrompt3 一致的 confidence 档位
*/
From c1107780a7c12e47303bab5c9b56e90788d81d39 Mon Sep 17 00:00:00 2001
From: wyn <1074145239@qq.com>
Date: Tue, 26 May 2026 17:33:34 +0800
Subject: [PATCH 12/12] =?UTF-8?q?=E5=8F=82=E8=80=83=E6=96=87=E7=8C=AE?=
=?UTF-8?q?=E6=9C=AC=E5=9C=B0=E5=A4=A7=E6=A8=A1=E5=9E=8B=E6=A0=A1=E5=AF=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
application/api/controller/Article.php | 427 --------
application/api/controller/Base.php | 8 +
application/api/controller/Preaccept.php | 50 +
application/api/controller/References.php | 227 ++++
application/api/job/ReferenceCheck.php | 58 +-
application/api/job/ReferenceCheckTwo.php | 12 +
application/common/QueueRedis.php | 19 +
application/common/ReferenceCheckService.php | 1034 +++++++++++++++++-
application/common/service/LLMService.php | 26 +-
9 files changed, 1357 insertions(+), 504 deletions(-)
diff --git a/application/api/controller/Article.php b/application/api/controller/Article.php
index 456fe59c..e47a0473 100644
--- a/application/api/controller/Article.php
+++ b/application/api/controller/Article.php
@@ -10,7 +10,6 @@ use PhpOffice\PhpWord\IOFactory;
use app\common\OpenAi;
use app\common\CrossrefService;
use app\common\PubmedService;
-use app\common\ReferenceCheckService;
/**
* @title 文章接口
@@ -6392,430 +6391,4 @@ class Article extends Base
Db::commit();
return json_encode(['status' => 1,'msg' => 'success']);
}
- /**
- * 调试:预览 article_main 中提取的 blue 引用(不入队)
- * POST: article_id
- */
- public function citationReview()
- {
- $articleId = 7821;//intval($this->request->post('article_id', 0));
- if ($articleId <= 0) {
- return jsonError('article_id is required');
- }
-
- $svc = new ReferenceCheckService();
- $mains = Db::name('article_main')
- ->field('am_id,content')
- ->where('article_id', $articleId)
- ->where('am_id', 127448)
- //->whereIn('state', [0, 2])
- ->order('sort asc')
- ->select();
-
- $preview = [];
- foreach ($mains as $item) {
- $preview[] = [
- 'am_id' => $item['am_id'],
- 'citations' => $svc->extractReferences((string)$item['content']),
- ];
- break;
- }
- return jsonSuccess(['article_id' => $articleId, 'sections' => $preview]);
- }
- /**
- * 提取文献引用
- *
- * @param string $content 原始内容
- * @return array
- */
- function extractReferences($content)
- {
- $result = [];
-
- // 匹配 [57]、[74-79]、[72, 45]
- preg_match_all(
- '/\[([\d,\-\s]+)\]<\/blue>/',
- $content,
- $matches,
- PREG_OFFSET_CAPTURE
- );
-
- if (empty($matches[0])) {
- return [];
- }
-
- foreach ($matches[0] as $index => $match) {
-
- // 完整标签
- $fullTag = $match[0];
-
- // 标签开始位置
- $tagStart = $match[1];
-
- // 标签结束位置
- $tagEnd = $tagStart + strlen($fullTag);
-
- // 文献号原始字符串
- $rawRef = trim($matches[1][$index][0]);
-
- // 展开文献号
- $referenceNumbers = $this->expandReferenceNumbers($rawRef);
-
- /**
- * 获取原文内容
- * 这里按句号切分:
- * 找当前引用所在句子的开始和结束位置
- */
- $sentenceStart = $this->findSentenceStart($content, $tagStart);
- $sentenceEnd = $this->findSentenceEnd($content, $tagEnd);
-
- $originalText = mb_substr(
- $content,
- $sentenceStart,
- $sentenceEnd - $sentenceStart
- );
-
- // 去掉 blue 标签
- $originalText = preg_replace(
- '/\[[\d,\-\s]+\]<\/blue>/',
- '',
- $originalText
- );
-
- $originalText = trim($originalText);
-
- $result[] = [
- 'reference_raw' => $rawRef,
- 'reference_numbers' => $referenceNumbers,
- 'original_text' => $originalText,
-
- // blue标签在整段中的位置
- 'reference_start' => $tagStart,
- 'reference_end' => $tagEnd,
-
- // 原文位置
- 'text_start' => $sentenceStart,
- 'text_end' => $sentenceEnd,
- ];
- }
-
- return $result;
- }
-
- /**
- * 展开文献号
- * 11-15 => [11,12,13,14,15]
- * 72,45 => [72,45]
- * 74-79,81 => [74,75,76,77,78,79,81]
- */
- function expandReferenceNumbers($refStr)
- {
- $numbers = [];
-
- $parts = explode(',', $refStr);
-
- foreach ($parts as $part) {
-
- $part = trim($part);
-
- // 范围
- if (strpos($part, '-') !== false) {
-
- list($start, $end) = explode('-', $part);
-
- $start = intval(trim($start));
- $end = intval(trim($end));
-
- if ($start <= $end) {
- $numbers = array_merge(
- $numbers,
- range($start, $end)
- );
- }
-
- } else {
-
- // 单个数字
- if (is_numeric($part)) {
- $numbers[] = intval($part);
- }
- }
- }
-
- return array_values(array_unique($numbers));
- }
-
- /**
- * 查找句子开始位置
- */
- function findSentenceStart($content, $position)
- {
- $delimiters = ['.', '。', '!', '?', "\n"];
-
- $start = 0;
-
- foreach ($delimiters as $delimiter) {
-
- $pos = strrpos(
- substr($content, 0, $position),
- $delimiter
- );
-
- if ($pos !== false) {
- $start = max($start, $pos + 1);
- }
- }
-
- return $start;
- }
-
- /**
- * 查找句子结束位置
- */
- function findSentenceEnd($content, $position)
- {
- $length = strlen($content);
-
- $endPositions = [];
-
- foreach (['.', '。', '!', '?', "\n"] as $delimiter) {
-
- $pos = strpos($content, $delimiter, $position);
-
- if ($pos !== false) {
- $endPositions[] = $pos + 1;
- }
- }
-
- return empty($endPositions)
- ? $length
- : min($endPositions);
- }
-
- /**
- * 引用相关性:提交单条到队列(异步调用 promotion 同款本地大模型)
- * POST: content_a(必填), content_b(可选), article_id, reference_no(n=index+1), am_id
- */
- public function referenceCheckEnqueue()
- {
- $data = $this->request->post();
- $contentA = trim((string)(isset($data['content_a']) ? $data['content_a'] : ''));
- $contentB = trim((string)(isset($data['content_b']) ? $data['content_b'] : ''));
- $articleId = intval(isset($data['article_id']) ? $data['article_id'] : 0);
- $referenceNo = intval(isset($data['reference_no']) ? $data['reference_no'] : 0);
-
- if ($contentA === '') {
- return jsonError('content_a is required');
- }
-
- try {
- $svc = new ReferenceCheckService();
- $extra = [
- 'reference_no' => $referenceNo,
- 'article_id' => $articleId,
- 'am_id' => intval(isset($data['am_id']) ? $data['am_id'] : 0),
- ];
-
- if ($contentB === '' && $articleId > 0 && $referenceNo > 0) {
- $prod = Db::name('production_article')
- ->where('article_id', $articleId)
- ->where('state', 0)
- ->find();
- if ($prod) {
- $referMap = $svc->loadReferMapByPArticleId(intval($prod['p_article_id']));
- $referIndex = $referenceNo - 1;
- if (isset($referMap[$referIndex])) {
- $refer = $referMap[$referIndex];
- $contentB = $svc->formatReferForLlm($refer);
- $extra['p_article_id'] = intval($prod['p_article_id']);
- $extra['p_refer_id'] = intval($refer['p_refer_id']);
- $extra['refer_index'] = $referIndex;
- }
- }
- }
-
- $result = $svc->enqueue($contentA, $contentB, $extra);
- return jsonSuccess($result);
- } catch (\Exception $e) {
- return jsonError($e->getMessage());
- }
- }
- public function checkOne(){
- $articleId = intval($this->request->param('article_id', 7414));
- $svc = new ReferenceCheckService();
- return jsonSuccess($svc->enqueueSecondPassByArticle($articleId));
- }
- public function referenceCheckEnqueueArticleMain(){
- $amId = 127448;
- $svc = new ReferenceCheckService();
- $main = Db::name('article_main')
- ->field('am_id,content,article_id')
- ->where('am_id', $amId)
- ->whereIn('state', [0, 2])
- ->find();
- $result = $svc->enqueueByArticleMain($main);
- return jsonSuccess($result);
- }
- public function referenceCheckEnqueueArticle(){
- $data = $this->request->get();
- $articleId = intval(isset($data['article_id']) ? $data['article_id'] : 0);
- var_dump($articleId);
- if ($articleId <= 0) {
- return jsonError('article_id is required');
- }
- try {
- $svc = new ReferenceCheckService();
- $result = $svc->enqueueByArticle($articleId);
- return jsonSuccess($result);
- } catch (\Exception $e) {
- return jsonError($e->getMessage());
- }
- }
- /**
- * 按文章批量入队:从 article_main 提取 blue 引用与文献号
- * POST: article_id, clear_previous=1(默认清空该文旧明细后重检)
- */
- public function referenceCheckEnqueueArticle2()
- {
- $data = $this->request->post();
- $articleId = intval(isset($data['article_id']) ? $data['article_id'] : 0);
- if ($articleId <= 0) {
- return jsonError('article_id is required');
- }
-
- try {
- $svc = new ReferenceCheckService();
- $clear = !isset($data['clear_previous']) || intval($data['clear_previous']) === 1;
- $result = $svc->enqueueByArticle($articleId, $clear);
- return jsonSuccess($result);
- } catch (\Exception $e) {
- return jsonError($e->getMessage());
- }
- }
-
- /**
- * 查询单条引用相关性检测结果
- * GET/POST: check_id
- */
- public function referenceCheckResult()
- {
- $checkId = intval($this->request->param('check_id', 0));
- if ($checkId <= 0) {
- return jsonError('check_id is required');
- }
-
- $row = (new ReferenceCheckService())->getResult($checkId);
- if (!$row) {
- return jsonError('result not found');
- }
-
- return jsonSuccess($this->formatReferenceCheckRow($row));
- }
-
- /**
- * 稿件预览:带不合理引用标记的 content(序号 + 引用句)
- * GET/POST: article_id, am_id(可选,只预览某一节)
- */
- public function referenceCheckPreview()
- {
- $articleId = intval($this->request->param('article_id', 0));
- if ($articleId <= 0) {
- return jsonError('article_id is required');
- }
- $amId = intval($this->request->param('am_id', 0));
-
- try {
- $data = (new ReferenceCheckService())->buildArticlePreview($articleId, $amId);
- $data['markup_hint'] = [
- 'ref_no' => '.ref-no-error — 不合理的文献序号(如 70-73 中单独的 70)',
- 'ref_cite' => '.ref-cite-tag.ref-cite-error — 含不合理序号的 blue 引用块',
- 'ref_context'=> '.ref-context-error — 不合理的引用句/上下文',
- ];
- $data['preview_css'] = '.ref-no-error{color:#c00;font-weight:bold;border-bottom:2px wavy #c00}'
- . '.ref-cite-tag.ref-cite-error{background:#ffecec}'
- . '.ref-context-error{background:#fff3cd;outline:1px dashed #e6a700}';
- return jsonSuccess($data);
- } catch (\Exception $e) {
- return jsonError($e->getMessage());
- }
- }
-
- /**
- * 按文章列出引用校对结果([70-73] 为 4 条,reference_no 分别为 70,71,72,73)
- * GET/POST: article_id, status(可选), only_mismatch=1 仅不合理
- */
- public function referenceCheckList()
- {
- $articleId = intval($this->request->param('article_id', 0));
- if ($articleId <= 0) {
- return jsonError('article_id is required');
- }
-
- $status = $this->request->param('status', '');
- $statusFilter = ($status === '' || $status === null) ? -1 : intval($status);
- $onlyMismatch = intval($this->request->param('only_mismatch', 0)) === 1;
- $rows = (new ReferenceCheckService())->listByArticle($articleId, $statusFilter, $onlyMismatch);
-
- $list = [];
- foreach ($rows as $row) {
- $list[] = $this->formatReferenceCheckRow($row);
- }
-
- $mains = Db::name('article_main')
- ->field('am_id,ref_check_status,sort')
- ->where('article_id', $articleId)
- ->whereIn('state', [0, 2])
- ->order('sort asc')
- ->select();
- $sections = [];
- foreach ($mains as $m) {
- $st = intval(isset($m['ref_check_status']) ? $m['ref_check_status'] : 0);
- $sections[] = [
- 'am_id' => intval($m['am_id']),
- 'ref_check_status' => $st,
- 'ref_check_pass' => $st === ReferenceCheckService::AM_STATUS_PASS,
- 'ref_check_label' => ReferenceCheckService::amStatusLabel($st),
- ];
- }
-
- return jsonSuccess([
- 'article_id' => $articleId,
- 'total' => count($list),
- 'list' => $list,
- 'sections' => $sections,
- ]);
- }
-
- private function formatReferenceCheckRow($row)
- {
- $statusMap = array(0 => 'pending', 1 => 'done', 2 => 'failed');
- $amId = intval(isset($row['am_id']) ? $row['am_id'] : 0);
- $citeStart = intval(isset($row['cite_tag_start']) ? $row['cite_tag_start'] : 0);
- $rowStatus = intval($row['status']);
- return array(
- 'check_id' => intval(isset($row['id']) ? $row['id'] : (isset($row['check_id']) ? $row['check_id'] : 0)),
- 'article_id' => intval(isset($row['article_id']) ? $row['article_id'] : 0),
- 'am_id' => $amId,
- 'cite_group_key' => $amId . '_' . $citeStart,
- 'p_refer_id' => intval(isset($row['p_refer_id']) ? $row['p_refer_id'] : 0),
- 'refer_index' => intval(isset($row['refer_index']) ? $row['refer_index'] : 0),
- 'reference_no' => intval(isset($row['reference_no']) ? $row['reference_no'] : 0),
- 'reference_raw' => isset($row['reference_raw']) ? $row['reference_raw'] : '',
- 'cite_tag_start' => $citeStart,
- 'cite_tag_end' => intval(isset($row['cite_tag_end']) ? $row['cite_tag_end'] : 0),
- 'text_start' => intval(isset($row['text_start']) ? $row['text_start'] : 0),
- 'text_end' => intval(isset($row['text_end']) ? $row['text_end'] : 0),
- 'status' => isset($statusMap[$rowStatus]) ? $statusMap[$rowStatus] : 'unknown',
- 'is_match' => intval($row['is_match']),
- 'can_support' => intval(isset($row['can_support']) ? $row['can_support'] : $row['is_match']),
- 'is_reasonable' => intval(isset($row['can_support']) ? $row['can_support'] : $row['is_match']) === 1,
- 'confidence' => floatval($row['confidence']),
- 'reason' => isset($row['reason']) ? $row['reason'] : '',
- 'error_msg' => isset($row['error_msg']) ? $row['error_msg'] : '',
- 'content_a' => isset($row['content_a']) ? $row['content_a'] : '',
- 'content_b' => isset($row['content_b']) ? $row['content_b'] : '',
- 'updated_at' => isset($row['updated_at']) ? $row['updated_at'] : '',
- );
- }
-
}
diff --git a/application/api/controller/Base.php b/application/api/controller/Base.php
index 77e1da7b..3b2c4627 100644
--- a/application/api/controller/Base.php
+++ b/application/api/controller/Base.php
@@ -271,6 +271,14 @@ class Base extends Controller
}
$this->production_article_refer_obj->where('p_article_id', $refer_info['p_article_id'])->where('index', ">", $refer_info['index'])->where('state', 0)->setDec('index');
$this->production_article_refer_obj->where('p_refer_id', $p_refer_id)->update(['state' => 1]);
+
+ // 文献集合已变更,原校对结果的 reference_no 已全部错位,整篇标记为未校对
+ try {
+ (new \app\common\ReferenceCheckService())
+ ->clearArticleChecksByPArticleId(intval($refer_info['p_article_id']));
+ } catch (\Exception $e) {
+ \think\Log::error('delOneRefer clearArticleChecksByPArticleId p_refer_id=' . $p_refer_id . ' ' . $e->getMessage());
+ }
}
diff --git a/application/api/controller/Preaccept.php b/application/api/controller/Preaccept.php
index 9b4867c7..166af09f 100644
--- a/application/api/controller/Preaccept.php
+++ b/application/api/controller/Preaccept.php
@@ -7,6 +7,7 @@ use think\Env;
use think\Queue;
use think\Validate;
use app\common\CrossrefService;
+use app\common\ReferenceCheckService;
class Preaccept extends Base
{
@@ -15,6 +16,26 @@ class Preaccept extends Base
parent::__construct($request);
}
+ /**
+ * 新增/修改导致文献集合改变后,清空整篇校对明细,使文章状态回到"未校对"。
+ * 失败仅记日志,不阻塞主流程。
+ */
+ private function resetArticleChecksOnReferChange($pArticleId, $sourceTag = '')
+ {
+ $pArticleId = intval($pArticleId);
+ if ($pArticleId <= 0) {
+ return;
+ }
+ try {
+ (new ReferenceCheckService())->clearArticleChecksByPArticleId($pArticleId);
+ } catch (\Exception $e) {
+ \think\Log::error(
+ 'resetArticleChecksOnReferChange[' . $sourceTag . '] p_article_id='
+ . $pArticleId . ' ' . $e->getMessage()
+ );
+ }
+ }
+
/**获取文章参考文献列表
* @return \think\response\Json
@@ -92,6 +113,7 @@ class Preaccept extends Base
return jsonError($rule->getError());
}
$this->production_article_refer_obj->where('p_article_id',$data['p_article_id'])->update(["state"=>1]);
+ $this->resetArticleChecksOnReferChange(intval($data['p_article_id']), 'discardRefersByParticleid');
return jsonSuccess([]);
}
@@ -142,6 +164,7 @@ class Preaccept extends Base
}
$adId= $this->production_article_refer_obj->insertGetId($insert);
$this->production_article_refer_obj->where('p_article_id', $p_info['p_article_id'])->where("p_refer_id", "<>", $adId)->where("index", ">", $pre_refer['index'])->where('state', 0)->setInc('index');
+ $this->resetArticleChecksOnReferChange(intval($p_info['p_article_id']), 'addRefer');
return jsonSuccess([]);
@@ -198,6 +221,7 @@ class Preaccept extends Base
}
$adId= $this->production_article_refer_obj->insertGetId($insert);
$this->production_article_refer_obj->where('p_article_id', $p_info['p_article_id'])->where("p_refer_id", "<>", $adId)->where("index", ">", $pre_refer['index'])->where('state', 0)->setInc('index');
+ $this->resetArticleChecksOnReferChange(intval($p_info['p_article_id']), 'addReferByParticleid');
return jsonSuccess([]);
}
@@ -233,6 +257,7 @@ class Preaccept extends Base
$insert['cs'] = 1;
$adId = $this->production_article_refer_obj->insertGetId($insert);
$this->production_article_refer_obj->where('p_article_id', $p_info['p_article_id'])->where("p_refer_id", "<>", $adId)->where("index", ">", $pre_refer['index'])->where('state', 0)->setInc('index');
+ $this->resetArticleChecksOnReferChange(intval($p_info['p_article_id']), 'addReferNotdoi');
return jsonSuccess([]);
}
@@ -462,6 +487,17 @@ class Preaccept extends Base
// }
// $this->production_article_refer_obj->where('p_refer_id', $data['p_refer_id'])->update(['refer_doi' => $data['doi']]);
// my_doiToFrag2($this->production_article_refer_obj->where('p_refer_id', $data['p_refer_id'])->find());
+
+ //文献内容更新成功后异步重检该文献对应的全部校对明细(失败不阻塞主流程)
+ try {
+ (new ReferenceCheckService())->enqueueRecheckByPReferId(
+ intval($data['p_refer_id']),
+ intval($old_refer_info['p_article_id'])
+ );
+ } catch (\Exception $e) {
+ \think\Log::error('editRefer enqueueRecheckByPReferId p_refer_id=' . $data['p_refer_id'] . ' ' . $e->getMessage());
+ }
+
return jsonSuccess([]);
}
@@ -1453,6 +1489,7 @@ class Preaccept extends Base
return jsonError($rule->getError());
}
$refer_info = $this->production_article_refer_obj->where('p_refer_id', $data['p_refer_id'])->find();
+ $sibling_p_refer_id = 0;
if ($data['act'] == "up") {
$up_info = $this->production_article_refer_obj->where('p_article_id', $refer_info['p_article_id'])->where('index', $refer_info['index'] - 1)->where('state', 0)->find();
if (!$up_info) {
@@ -1460,6 +1497,7 @@ class Preaccept extends Base
}
$this->production_article_refer_obj->where('p_refer_id', $up_info['p_refer_id'])->setInc("index");
$this->production_article_refer_obj->where('p_refer_id', $refer_info['p_refer_id'])->setDec("index");
+ $sibling_p_refer_id = intval($up_info['p_refer_id']);
} else {
$down_info = $this->production_article_refer_obj->where('p_article_id', $refer_info['p_article_id'])->where('index', $refer_info['index'] + 1)->where('state', 0)->find();
if (!$down_info) {
@@ -1467,7 +1505,19 @@ class Preaccept extends Base
}
$this->production_article_refer_obj->where('p_refer_id', $refer_info['p_refer_id'])->setInc("index");
$this->production_article_refer_obj->where('p_refer_id', $down_info['p_refer_id'])->setDec("index");
+ $sibling_p_refer_id = intval($down_info['p_refer_id']);
}
+
+ // 仅同步本次交换的两条 p_refer_id 对应的校对明细 reference_no / refer_index
+ try {
+ (new ReferenceCheckService())->syncReferenceNoByPReferIds(
+ [intval($refer_info['p_refer_id']), $sibling_p_refer_id],
+ intval($refer_info['p_article_id'])
+ );
+ } catch (\Exception $e) {
+ \think\Log::error('sortRefer syncReferenceNoByPReferIds: ' . $e->getMessage());
+ }
+
return jsonSuccess([]);
}
diff --git a/application/api/controller/References.php b/application/api/controller/References.php
index 47ae2328..759c63bf 100644
--- a/application/api/controller/References.php
+++ b/application/api/controller/References.php
@@ -1307,4 +1307,231 @@ class References extends Base
}
return json_encode(['status' => 8,'msg' => 'fail']);
}
+ /**
+ * 参考文献第一次校对
+ * @return \think\response\Json
+ */
+ public function allReferenceCheckAI(){
+ //获取参数
+ $aParam = empty($aParam) ? $this->request->post() : $aParam;
+
+ //必填值验证
+ $iPArticleId = empty($aParam['p_article_id']) ? '' : $aParam['p_article_id'];
+ if(empty($iPArticleId)){
+ return json_encode(array('status' => 2,'msg' => 'Please select an article' ));
+ }
+ //查询文章(p_article_id 与 article_id 都要带,下游服务方法两者都用)
+ $aWhere = ['p_article_id' => $iPArticleId,'state' => ['in',[0,2]]];
+ $aProductionArticle = Db::name('production_article')->field('p_article_id,article_id')->where($aWhere)->find();
+ if(empty($aProductionArticle)){
+ return json_encode(array('status' => 3,'msg' => 'No articles found' ));
+ }
+ if($this->checkReferStatus($iPArticleId)==0){
+ return jsonError('请修正完文献内容再进行校对。');
+ }
+ //已存在校对记录则禁止重复执行第一次校对,提示走重置接口
+ $iExisting = Db::name('article_reference_check_result')
+ ->where('p_article_id', $iPArticleId)
+ ->count();
+ if(intval($iExisting) > 0){
+ return jsonError('该文章已存在校对记录,请使用"重置校对"接口重新校对。');
+ }
+ try {
+ $svc = new ReferenceCheckService();
+ $result = $svc->enqueueByPArticle($aProductionArticle);
+ return jsonSuccess($result);
+ } catch (\Exception $e) {
+ return jsonError($e->getMessage());
+ }
+ }
+ /**
+ * 文献校对重置:删除该文章已有的全部校对明细,并重新入队整篇校对
+ * POST/GET: article_id(必填)
+ * @url /api/Article/referenceCheckReset
+ */
+ public function referenceCheckResetAI()
+ {
+ //获取参数
+ $aParam = empty($aParam) ? $this->request->post() : $aParam;
+
+ //必填值验证
+ $iPArticleId = empty($aParam['p_article_id']) ? '' : $aParam['p_article_id'];
+ if(empty($iPArticleId)){
+ return json_encode(array('status' => 2,'msg' => 'Please select an article' ));
+ }
+ //查询文章(p_article_id 与 article_id 都要带,下游服务方法两者都用)
+ $aWhere = ['p_article_id' => $iPArticleId,'state' => ['in',[0,2]]];
+ $aProductionArticle = Db::name('production_article')->field('p_article_id,article_id')->where($aWhere)->find();
+ if(empty($aProductionArticle)){
+ return json_encode(array('status' => 3,'msg' => 'No articles found' ));
+ }
+ if($this->checkReferStatus($iPArticleId)==0){
+ return jsonError('请修正完文献内容再进行校对。');
+ }
+ $iArticleId = empty($aProductionArticle['article_id']) ? 0 : $aProductionArticle['article_id'];
+ if(empty($iArticleId)){
+ return json_encode(array('status' => 4,'msg' => 'Unbound article' ));
+ }
+ try {
+ $result = (new ReferenceCheckService())->resetAndRecheckByArticle($aProductionArticle);
+ return jsonSuccess($result);
+ } catch (\Exception $e) {
+ return jsonError($e->getMessage());
+ }
+ }
+
+ /**
+ * 清空某篇文章下的全部参考文献校对记录(不重新入队)
+ *
+ * 与 referenceCheckResetAI 的区别:reset 是「清空 + 重新校对」,
+ * 这里只做「清空」一步,校对状态回到未校对,等待用户手动再触发。
+ *
+ * POST/GET: p_article_id(必填)
+ */
+ public function referenceCheckClearAI()
+ {
+ $aParam = $this->request->post();
+ if (empty($aParam)) {
+ $aParam = $this->request->param();
+ }
+
+ $iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']);
+ if ($iPArticleId <= 0) {
+ return json_encode(array('status' => 2, 'msg' => 'Please select an article'));
+ }
+
+ // 校验文章存在(与其它校对接口口径一致:state in [0,2])
+ $aProductionArticle = Db::name('production_article')
+ ->field('p_article_id,article_id')
+ ->where(['p_article_id' => $iPArticleId, 'state' => ['in', [0, 2]]])
+ ->find();
+ if (empty($aProductionArticle)) {
+ return json_encode(array('status' => 3, 'msg' => 'No articles found'));
+ }
+
+ try {
+ $deleted = (new ReferenceCheckService())->clearArticleChecksByPArticleId($iPArticleId);
+ return jsonSuccess([
+ 'p_article_id' => $iPArticleId,
+ 'deleted' => intval($deleted),
+ ]);
+ } catch (\Exception $e) {
+ return jsonError($e->getMessage());
+ }
+ }
+
+ /**
+ * 按 p_article_id 查整篇引用校对进度(按 reference_no 分组聚合)
+ *
+ * POST/GET: p_article_id(必填)
+ *
+ * 返回 list 中每项含:reference_no、p_refer_id、status(数值)、
+ * total、pending、done、failed、pass、is_pass、last_updated_at、records
+ *
+ * status 数值含义:
+ * 0 = 待校验 1 = 校对中 2 = 校对完成 3 = 校对失败
+ */
+ public function referenceCheckProgressAI()
+ {
+ $aParam = $this->request->post();
+ if (empty($aParam)) {
+ $aParam = $this->request->param();
+ }
+
+ $iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']);
+ if ($iPArticleId <= 0) {
+ return json_encode(array('status' => 2, 'msg' => 'Please select an article'));
+ }
+ try {
+ $result = (new ReferenceCheckService())->getProgressByPArticleId($iPArticleId);
+ return jsonSuccess($result);
+ } catch (\Exception $e) {
+ return jsonError($e->getMessage());
+ }
+ }
+
+ /**
+ * 按 p_article_id 查整篇文章引用校对总状态(用于前端按钮分流)
+ *
+ * POST/GET: p_article_id(必填)
+ *
+ * 计数维度是「参考文献」(按 reference_no 分组),不是单条校对明细行。
+ * 例:50 条参考文献、底层 111 条校对明细时,total = 50。
+ *
+ * 返回 status 数值含义(整篇):
+ * 0 = 未校对(一条记录都没有)
+ * 1 = 校对中(至少 1 条参考文献仍有未跑完的明细)
+ * 2 = 校对完成(所有参考文献全部明细已结束)
+ *
+ * 返回字段:p_article_id、status、total、pending、done、failed、progress_percent
+ * total —— 参考文献条数
+ * pending —— 该条参考文献仍有未跑完明细的数量(含"部分跑完")
+ * done —— 该条参考文献所有明细都 status=1 的数量
+ * failed —— 该条参考文献全部跑完且至少 1 条 status=2 的数量
+ * pending + done + failed = total;progress_percent = (done+failed)/total
+ *
+ * 分组明细请走 referenceCheckProgressAI。
+ */
+ public function referenceCheckArticleStatusAI()
+ {
+ $aParam = $this->request->post();
+ if (empty($aParam)) {
+ $aParam = $this->request->param();
+ }
+
+ $iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']);
+ if ($iPArticleId <= 0) {
+ return json_encode(array('status' => 2, 'msg' => 'Please select an article'));
+ }
+
+ try {
+ $result = (new ReferenceCheckService())->getArticleProgressStatusByPArticleId($iPArticleId);
+ return jsonSuccess($result);
+ } catch (\Exception $e) {
+ return jsonError($e->getMessage());
+ }
+ }
+
+ /**
+ * 按 p_refer_id 查单条参考文献的校对明细
+ *
+ * POST/GET: p_refer_id(必填)
+ *
+ * 返回 list 中每项含:am_id、confidence、reason、is_match、is_pass
+ * 同时附带上下文:p_refer_id、p_article_id、reference_no、total
+ */
+ public function referenceCheckDetailsAI()
+ {
+ $aParam = $this->request->post();
+ if (empty($aParam)) {
+ $aParam = $this->request->param();
+ }
+
+ $iPReferId = empty($aParam['p_refer_id']) ? 0 : intval($aParam['p_refer_id']);
+ if ($iPReferId <= 0) {
+ return json_encode(array('status' => 2, 'msg' => 'Please select a reference'));
+ }
+
+ try {
+ $result = (new ReferenceCheckService())->getCheckDetailsByPReferId($iPReferId);
+ return jsonSuccess($result);
+ } catch (\Exception $e) {
+ return jsonError($e->getMessage());
+ }
+ }
+
+ public function checkReferStatus($p_article_id){
+ $list = $this->production_article_refer_obj->where('p_article_id', $p_article_id)->where('state', 0)->select();
+ if (!$list) {
+ return jsonError('references error');
+ }
+ $frag = 1;
+ foreach ($list as $v) {
+ if ($v['cs'] == 0) {
+ $frag = 0;
+ break;
+ }
+ }
+ return $frag;
+ }
}
diff --git a/application/api/job/ReferenceCheck.php b/application/api/job/ReferenceCheck.php
index 3b15e6a1..89c5c67d 100644
--- a/application/api/job/ReferenceCheck.php
+++ b/application/api/job/ReferenceCheck.php
@@ -6,7 +6,6 @@ use think\queue\Job;
use app\common\QueueJob;
use app\common\QueueRedis;
use app\common\ReferenceCheckService;
-use app\common\service\LLMService;
class ReferenceCheck
{
@@ -39,14 +38,6 @@ class ReferenceCheck
if ($checkId <= 0 && !empty($jobData['data']['check_id'])) {
$checkId = intval($jobData['data']['check_id']);
}
- $sClassName = get_class($this);
- $sRedisKey = "queue_job:{$sClassName}:{$checkId}";
- $sRedisValue = uniqid() . '_' . getmypid();
-
- if (!$this->oQueueJob->acquireLock($sRedisKey, $sRedisValue, $job)) {
- return;
- }
-
if ($checkId <= 0) {
$job->delete();
return;
@@ -63,44 +54,19 @@ class ReferenceCheck
return;
}
+ $sClassName = get_class($this);
+ $sRedisKey = "queue_job:{$sClassName}:{$checkId}";
+ $sRedisValue = uniqid() . '_' . getmypid();
+
+ $svc = new ReferenceCheckService();
+ $svc->clearReferenceCheckQueueLock($checkId);
+
+ if (!$this->oQueueJob->acquireLock($sRedisKey, $sRedisValue, $job)) {
+ return;
+ }
+
try {
- $svc = new ReferenceCheckService();
-
- $contentA = $svc->resolveMainContentForJob($row);
- $contentB = trim((string)(isset($row['refer_text']) ? $row['refer_text'] : ''));
- $refer = null;
-
- if (intval($row['p_refer_id']) > 0) {
- $refer = Db::name('production_article_refer')
- ->where('p_refer_id', intval($row['p_refer_id']))
- ->where('state', 0)
- ->find();
- if ($refer && $contentB === '') {
- $contentB = $svc->formatReferForLlm($refer);
- }
- }
-
- if ($contentA === '' || $contentB === '') {
- $this->markFailed($checkId, 'Missing article_main.content or refer_text');
- $job->delete();
- return;
- }
-
- $llm = new LLMService();
- $llmResult = $llm->checkReference($contentA, $contentB, false);
- $canSupport = $svc->parseLlmCanSupport($llmResult);
- $confidence = floatval($llmResult['confidence']);
-
- $svc->updateCheckResult($checkId, [
- 'can_support' => $canSupport ? 1 : 0,
- 'is_match' => $canSupport ? 1 : 0,
- 'confidence' => $confidence,
- 'reason' => isset($llmResult['reason']) ? $llmResult['reason'] : '',
- 'status' => 1,
- 'error_msg' => '',
- ]);
-
- $svc->maybeEnqueueSecondPass($checkId, $confidence);
+ $svc->runReferenceCheckOnce($checkId);
$amId = intval(isset($row['am_id']) ? $row['am_id'] : 0);
if ($amId > 0) {
diff --git a/application/api/job/ReferenceCheckTwo.php b/application/api/job/ReferenceCheckTwo.php
index b28c9f6c..564af204 100644
--- a/application/api/job/ReferenceCheckTwo.php
+++ b/application/api/job/ReferenceCheckTwo.php
@@ -88,12 +88,24 @@ class ReferenceCheckTwo
$llm = new LLMService();
$llmResult = $llm->checkReference($contentA, $referText, true, $doiBlock);
+ $requestFailed = !empty($llmResult['request_failed']);
$canSupport = $svc->parseLlmCanSupport($llmResult);
$tag = $payload['has_abstract']
? ('[Crossref复核' . ($payload['doi_used'] !== '' ? ' ' . $payload['doi_used'] : '') . ']')
: '[Crossref复核-无摘要]';
$reason = $tag . ' ' . (isset($llmResult['reason']) ? $llmResult['reason'] : '');
+ // LLM 通讯失败:写 status=2 并抛异常触发队列重试
+ if ($requestFailed) {
+ $svc->updateCheckResult($checkId, [
+ 'confidence' => floatval($llmResult['confidence']),
+ 'reason' => $reason,
+ 'status' => 2,
+ 'error_msg' => isset($llmResult['reason']) ? $llmResult['reason'] : 'LLM request failed',
+ ]);
+ throw new \RuntimeException(isset($llmResult['reason']) ? $llmResult['reason'] : 'LLM request failed');
+ }
+
$affected = $svc->updateCheckResult($checkId, [
'can_support' => $canSupport ? 1 : 0,
'is_match' => $canSupport ? 1 : 0,
diff --git a/application/common/QueueRedis.php b/application/common/QueueRedis.php
index fb9fb5fb..4412d1ba 100644
--- a/application/common/QueueRedis.php
+++ b/application/common/QueueRedis.php
@@ -80,6 +80,25 @@ class QueueRedis
return null;
}
}
+
+ /**
+ * 删除一个或多个 Redis 键(用于重检前清除队列任务 completed 标记)
+ */
+ public function deleteRedisKeys(array $keys)
+ {
+ $keys = array_values(array_filter($keys, function ($k) {
+ return $k !== null && $k !== '';
+ }));
+ if (empty($keys)) {
+ return true;
+ }
+ try {
+ $this->connect()->del(...$keys);
+ return true;
+ } catch (\Exception $e) {
+ return false;
+ }
+ }
// 安全释放锁(仅当值匹配时删除)
public function releaseRedisLock($key, $value)
diff --git a/application/common/ReferenceCheckService.php b/application/common/ReferenceCheckService.php
index 593f1548..77b44e9d 100644
--- a/application/common/ReferenceCheckService.php
+++ b/application/common/ReferenceCheckService.php
@@ -5,6 +5,7 @@ namespace app\common;
use think\Db;
use think\Env;
use think\Queue;
+use app\common\service\LLMService;
/**
* 正文 <blue>[n]</blue> 引用与 t_production_article_refer(index+1=n)相关性校对。
@@ -20,6 +21,48 @@ class ReferenceCheckService
const AM_STATUS_FAIL = 2;
const AM_STATUS_RUNNING = 3;
+ /** 引用校对进度(按 reference_no 分组聚合后的对外状态) */
+ const PROGRESS_PENDING = 0; // 待校验:分组内全部明细 status=0
+ const PROGRESS_CHECKING = 1; // 校对中:分组内部分明细已结束、部分仍为 0
+ const PROGRESS_COMPLETED = 2; // 校对完成:分组内全部明细 status=1
+ const PROGRESS_FAILED = 3; // 校对失败:分组内全部明细已结束,且至少 1 条 status=2
+
+ /** 整篇文章的引用校对状态(对外整体状态,用于"开始/重置"按钮分流) */
+ const ARTICLE_PROGRESS_NONE = 0; // 还没有任何校对记录
+ const ARTICLE_PROGRESS_RUNNING = 1; // 至少 1 条 status=0(队列里还有未跑完的)
+ const ARTICLE_PROGRESS_COMPLETED = 2; // 所有明细 status != 0(全部已完成或失败)
+
+ /**
+ * 单条校对明细的对外状态(getProgressByPArticleId 返回的 records[i].status)
+ *
+ * DB 里 article_reference_check_result.status 只有 0/1/2 三种值;
+ * RECORD_PROCESSING 是基于 Redis 队列锁 :status='processing' 的瞬时态,
+ * 并不持久化。worker 进入 LLM 调用期间 DB.status 仍是 0,需要靠队列锁识别。
+ */
+ const RECORD_PENDING = 0; // 待校对,已入队但还没被 worker 拾起
+ const RECORD_COMPLETED = 1; // 校对完成
+ const RECORD_FAILED = 2; // 校对失败
+ const RECORD_PROCESSING = 3; // 处理中:worker 正在跑 LLM(Redis :status='processing')
+
+ /** LLM 评分(confidence)通过阈值:>= 该值视为"通过" */
+ const PASS_CONFIDENCE_THRESHOLD = 0.65;
+
+ /**
+ * [...] 引用标签内允许的字符类(带 /u 修饰符使用)。
+ *
+ * 除 ASCII 数字、半角逗号、半角连字符、空白外,还兼容常见排版变体:
+ * , U+FF0C 全角逗号
+ * – U+2013 EN DASH
+ * — U+2014 EM DASH
+ * − U+2212 MINUS SIGN
+ * ‐ U+2010 HYPHEN
+ * ‑ U+2011 NON-BREAKING HYPHEN
+ *
+ * 若不支持变体连字符,会导致 [19–21] 这种区间引用整段被 preg 漏掉,
+ * 进而丢失对应的 reference_no 校对记录。
+ */
+ const BLUE_TAG_REGEX = '/\[([\d,,\-\x{2013}\x{2014}\x{2212}\x{2010}\x{2011}\s]+)\]<\/blue>/u';
+
/**
* 兼容无 ?? 的 PHP 版本
*/
@@ -166,13 +209,94 @@ class ReferenceCheckService
'queued' => count($checkIds2),
];
}
+ public function enqueueByPArticle($prod){
+ if (empty($prod)) {
+ throw new \RuntimeException('production_article not found');
+ }
+ $pArticleId = intval($prod['p_article_id']);
+ $articleId = intval($prod['article_id']);
+ $referMap = $this->loadReferMapByPArticleId($pArticleId);
+
+ $mains = Db::name('article_main')
+ ->field('am_id,content,article_id')
+ ->where('article_id', $articleId)
+ ->whereIn('state', [0, 2])
+ ->order('sort asc')
+ ->select();
+ if (empty($mains)) {
+ throw new \RuntimeException('article_main is empty');
+ }
+ $queued = 0;
+ $skipped = 0;
+ $pendingJobs = [];
+ $amIdsWithJobs = [];
+ $now = date('Y-m-d H:i:s');
+ foreach ($mains as $main) {
+ $amId = intval($main['am_id']);
+ $citations = $this->extractReferences((string)$main['content']);
+ if (empty($citations)) {
+ $this->setAmRefCheckStatus($amId, self::AM_STATUS_NONE);
+ continue;
+ }
+ foreach ($citations as $cite) {
+ foreach ($cite['reference_numbers'] as $refNo) {
+ $referIndex = $refNo - 1;
+ if ($referIndex < 0 || !isset($referMap[$referIndex])) {
+ $skipped++;
+ continue;
+ }
+ $refer = $referMap[$referIndex];
+ $referText = $this->formatReferForLlm($refer);
+
+ // [70-73] 展开为 reference_no=70,71,72,73 共 4 条记录;先入队表,再按文献号正序校对
+ $checkId = Db::name('article_reference_check_result')->insertGetId([
+ 'article_id' => $main['article_id'],
+ 'p_article_id' => $pArticleId,
+ 'am_id' => $amId,
+ 'reference_no' => $refNo,
+ 'refer_index' => $refNo,
+ 'origin_text' => $cite['original_text'],
+ 'refer_text' => $referText,
+ 'p_refer_id' => $referMap[$referIndex]['p_refer_id'],
+ 'text_start' => $cite['text_start'],
+ 'text_end' => $cite['text_end'],
+ 'created_at' => $now,
+ 'updated_at' => $now,
+ ]);
+
+ $pendingJobs[] = [
+ 'check_id' => intval($checkId),
+ 'reference_no' => intval($refNo),
+ 'am_id' => $amId,
+ 'text_start' => intval($cite['text_start']),
+ ];
+ $queued++;
+ $amIdsWithJobs[$amId] = true;
+ }
+ }
+ }
+
+ $checkIds = $this->pushJobsSortedByReferenceNo($pendingJobs);
+ foreach (array_keys($amIdsWithJobs) as $amId) {
+ $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING);
+ }
+
+ return [
+ 'article_id' => $articleId,
+ 'p_article_id' => $pArticleId,
+ 'queued' => $queued,
+ 'skipped' => $skipped,
+ 'check_ids' => $checkIds,
+ 'queue' => self::QUEUE_NAME,
+ ];
+ }
public function enqueueByArticle($articleId){
if ($articleId <= 0) {
throw new \InvalidArgumentException('article_id is required');
}
$prod = Db::name('production_article')
->where('article_id', $articleId)
- ->where('state', [0, 2])
+ ->whereIn('state', [0, 2])
->find();
if (empty($prod)) {
throw new \RuntimeException('production_article not found for article_id=' . $articleId);
@@ -191,10 +315,9 @@ class ReferenceCheckService
}
$queued = 0;
$skipped = 0;
- $checkIds = [];
- $delay = 0;
+ $pendingJobs = [];
$amIdsWithJobs = [];
-
+ $now = date('Y-m-d H:i:s');
foreach ($mains as $main) {
$amId = intval($main['am_id']);
$citations = $this->extractReferences((string)$main['content']);
@@ -212,12 +335,11 @@ class ReferenceCheckService
$refer = $referMap[$referIndex];
$referText = $this->formatReferForLlm($refer);
- $now = date('Y-m-d H:i:s');
- // [70-73] 展开为 reference_no=70,71,72,73 共 4 条记录
+ // [70-73] 展开为 reference_no=70,71,72,73 共 4 条记录;先入队表,再按文献号正序校对
$checkId = Db::name('article_reference_check_result')->insertGetId([
'article_id' => $main['article_id'],
'p_article_id' => $pArticleId,
- 'am_id' => intval($main['am_id']),
+ 'am_id' => $amId,
'reference_no' => $refNo,
'refer_index' => $refNo,
'origin_text' => $cite['original_text'],
@@ -229,14 +351,19 @@ class ReferenceCheckService
'updated_at' => $now,
]);
- $this->pushJob(intval($checkId), $delay);
- $checkIds[] = $checkId;
+ $pendingJobs[] = [
+ 'check_id' => intval($checkId),
+ 'reference_no' => intval($refNo),
+ 'am_id' => $amId,
+ 'text_start' => intval($cite['text_start']),
+ ];
$queued++;
- $delay += 1;
$amIdsWithJobs[$amId] = true;
}
}
}
+
+ $checkIds = $this->pushJobsSortedByReferenceNo($pendingJobs);
foreach (array_keys($amIdsWithJobs) as $amId) {
$this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING);
}
@@ -308,13 +435,464 @@ class ReferenceCheckService
]);
}
+ /**
+ * 按 p_article_id 清空整篇文章的引用校对明细 + 重置节级 ref_check_status。
+ *
+ * 用于新增/删除文献后,旧的 reference_no 全部错位、原校对结果失效的场景:
+ * 物理删除后,整篇状态查询自然回到 ARTICLE_PROGRESS_NONE(未校对)。
+ *
+ * @return int 被删除的明细条数
+ */
+ public function clearArticleChecksByPArticleId($pArticleId)
+ {
+ $pArticleId = intval($pArticleId);
+ if ($pArticleId <= 0) {
+ return 0;
+ }
+
+ // 先反查 article_id(用于重置 article_main.ref_check_status 节级状态)
+ $articleId = intval(Db::name('production_article')
+ ->where('p_article_id', $pArticleId)
+ ->whereIn('state', [0, 2])
+ ->value('article_id'));
+
+ // 先清掉旧记录对应的队列 Redis 锁,避免在途 worker 写回数据
+ $oldIds = Db::name('article_reference_check_result')
+ ->where('p_article_id', $pArticleId)
+ ->column('id');
+ foreach ($oldIds as $oldId) {
+ $this->clearReferenceCheckQueueLock(intval($oldId));
+ }
+
+ $deleted = Db::name('article_reference_check_result')
+ ->where('p_article_id', $pArticleId)
+ ->delete();
+
+ if ($articleId > 0) {
+ Db::name('article_main')
+ ->where('article_id', $articleId)
+ ->whereIn('state', [0, 2])
+ ->update(['ref_check_status' => self::AM_STATUS_NONE]);
+ }
+
+ return intval($deleted);
+ }
+
public function clearArticleChecks($articleId)
{
- Db::name('article_reference_check_result')->where('article_id', $articleId)->delete();
+ $articleId = intval($articleId);
+ if ($articleId <= 0) {
+ return 0;
+ }
+
+ // 先清掉旧记录对应的队列 Redis 锁,否则同 check_id 在 TTL 内不会再次执行
+ $oldIds = Db::name('article_reference_check_result')
+ ->where('article_id', $articleId)
+ ->column('id');
+ foreach ($oldIds as $oldId) {
+ $this->clearReferenceCheckQueueLock(intval($oldId));
+ }
+
+ $deleted = Db::name('article_reference_check_result')->where('article_id', $articleId)->delete();
Db::name('article_main')
->where('article_id', $articleId)
->whereIn('state', [0, 2])
->update(['ref_check_status' => self::AM_STATUS_NONE]);
+
+ return intval($deleted);
+ }
+
+ /**
+ * 文献列表局部挪动后,仅刷新指定 p_refer_id 对应的校对明细 reference_no / refer_index。
+ *
+ * 读 production_article_refer 的最新 index 来算新序号(index + 1),避免外部传入过期值。
+ * 仅更新受影响的两条左右记录,降低与并发挪动互相覆盖的风险。
+ *
+ * @param int[] $pReferIds 受影响的 p_refer_id(一般为 2 个:被挪条目 + 其相邻条目)
+ * @param int $pArticleId 可选:附加 p_article_id 限定,进一步缩小行锁范围
+ * @return array{p_refer_ids:int[], affected_rows:int, changes:array}
+ */
+ public function syncReferenceNoByPReferIds(array $pReferIds, $pArticleId = 0)
+ {
+ $pReferIds = array_values(array_unique(array_filter(array_map('intval', $pReferIds))));
+ $pArticleId = intval($pArticleId);
+ if (empty($pReferIds)) {
+ return [
+ 'p_refer_ids' => [],
+ 'affected_rows' => 0,
+ 'changes' => [],
+ ];
+ }
+
+ $referQuery = Db::name('production_article_refer')
+ ->field('p_refer_id,p_article_id,index')
+ ->whereIn('p_refer_id', $pReferIds)
+ ->where('state', 0);
+ if ($pArticleId > 0) {
+ $referQuery->where('p_article_id', $pArticleId);
+ }
+ $refers = $referQuery->select();
+ if (empty($refers)) {
+ return [
+ 'p_refer_ids' => $pReferIds,
+ 'affected_rows' => 0,
+ 'changes' => [],
+ ];
+ }
+
+ $now = date('Y-m-d H:i:s');
+ $affected = 0;
+ $changes = [];
+
+ foreach ($refers as $refer) {
+ $pReferId = intval($refer['p_refer_id']);
+ $newNo = intval($refer['index']) + 1;
+
+ $updateQuery = Db::name('article_reference_check_result')
+ ->where('p_refer_id', $pReferId)
+ ->where('reference_no', '<>', $newNo);
+ if ($pArticleId > 0) {
+ $updateQuery->where('p_article_id', $pArticleId);
+ }
+ $rows = $updateQuery->update([
+ 'reference_no' => $newNo,
+ 'refer_index' => $newNo,
+ 'updated_at' => $now,
+ ]);
+
+ if ($rows > 0) {
+ $affected += intval($rows);
+ $changes[] = [
+ 'p_refer_id' => $pReferId,
+ 'new_ref_no' => $newNo,
+ 'affected_rows' => intval($rows),
+ ];
+ }
+ }
+
+ return [
+ 'p_refer_ids' => $pReferIds,
+ 'affected_rows' => $affected,
+ 'changes' => $changes,
+ ];
+ }
+
+ /**
+ * 重置整篇稿件的引用校对:删除旧明细 + 清理队列锁 + 全文重新入队校对
+ *
+ * @return array
+ */
+ /**
+ * 按 p_article_id 查整篇文章的引用校对总状态。
+ *
+ * 统计维度是"参考文献"(按 reference_no 分组),不是单条校对明细行。
+ * 例如 50 条参考文献、底层明细 111 条时,total 返回 50。
+ *
+ * 返回 status 数值含义(整篇):
+ * 0 = ARTICLE_PROGRESS_NONE 一条校对记录都没有
+ * 1 = ARTICLE_PROGRESS_RUNNING 至少 1 条参考文献仍有未跑完的明细
+ * 2 = ARTICLE_PROGRESS_COMPLETED 所有参考文献的全部明细都已结束
+ *
+ * 每条参考文献按其明细 status 分布落桶(互斥):
+ * pending —— 组内任一明细 status=0(含部分跑完的"校对中"也归此桶)
+ * done —— 组内全部明细 status=1
+ * failed —— 组内全部明细已结束、至少 1 条 status=2
+ *
+ * pending + done + failed = total;progress_percent = (done + failed) / total。
+ * 分组明细请走 getProgressByPArticleId(控制器 referenceCheckProgressAI)。
+ *
+ * @return array{p_article_id:int, status:int, total:int, pending:int, done:int, failed:int, progress_percent:float}
+ */
+ public function getArticleProgressStatusByPArticleId($pArticleId)
+ {
+ $pArticleId = intval($pArticleId);
+ if ($pArticleId <= 0) {
+ throw new \InvalidArgumentException('p_article_id is required');
+ }
+
+ // 一条 SQL 按 reference_no 聚合,组内 status 分布一并算出来;
+ // 50 条参考文献 → 返回 50 行,PHP 走一次循环分桶即可
+ $rows = Db::name('article_reference_check_result')
+ ->field('reference_no'
+ . ', SUM(CASE WHEN status = 0 THEN 1 ELSE 0 END) AS pending_cnt'
+ . ', SUM(CASE WHEN status = 2 THEN 1 ELSE 0 END) AS failed_cnt')
+ ->where('p_article_id', $pArticleId)
+ ->group('reference_no')
+ ->select();
+
+ if (empty($rows)) {
+ return [
+ 'p_article_id' => $pArticleId,
+ 'status' => self::ARTICLE_PROGRESS_NONE,
+ 'total' => 0,
+ 'pending' => 0,
+ 'done' => 0,
+ 'failed' => 0,
+ 'progress_percent' => 0,
+ ];
+ }
+
+ $pending = 0;
+ $done = 0;
+ $failed = 0;
+ foreach ($rows as $row) {
+ $pendingCnt = intval($this->arrGet($row, 'pending_cnt', 0));
+ $failedCnt = intval($this->arrGet($row, 'failed_cnt', 0));
+ if ($pendingCnt > 0) {
+ $pending++;
+ } elseif ($failedCnt > 0) {
+ $failed++;
+ } else {
+ $done++;
+ }
+ }
+
+ $total = count($rows);
+ $articleStatus = $pending > 0
+ ? self::ARTICLE_PROGRESS_RUNNING
+ : self::ARTICLE_PROGRESS_COMPLETED;
+ $finished = $done + $failed;
+ $progressPercent = round($finished / $total * 100, 1);
+
+ return [
+ 'p_article_id' => $pArticleId,
+ 'status' => $articleStatus,
+ 'total' => $total,
+ 'pending' => $pending,
+ 'done' => $done,
+ 'failed' => $failed,
+ 'progress_percent' => $progressPercent,
+ ];
+ }
+
+ /**
+ * 按 p_article_id 查整篇引用校对进度,按 reference_no 分组聚合状态,并展开每条明细。
+ *
+ * 单条 article_reference_check_result.status:
+ * 0 = 待校验 1 = 校对完成 2 = 校对失败
+ *
+ * 分组(reference_no)状态(返回字段 status,数值类型):
+ * 0 = PROGRESS_PENDING 待校验 :分组内全部明细 status=0
+ * 1 = PROGRESS_CHECKING 校对中 :分组内部分明细已结束、部分仍为 0
+ * 2 = PROGRESS_COMPLETED 校对完成:分组内全部明细 status=1
+ * 3 = PROGRESS_FAILED 校对失败:分组内全部明细已结束,且至少 1 条 status=2
+ *
+ * 每个分组还会展开 records 子数组,给出该 reference_no 下每条 check 明细的:
+ * - status(同上 0/1/2)
+ * - confidence 评分
+ * - is_pass(confidence >= PASS_CONFIDENCE_THRESHOLD 视为通过)
+ *
+ * @return array{p_article_id:int, total_groups:int, summary:array, list:array}
+ */
+ public function getProgressByPArticleId($pArticleId)
+ {
+ $pArticleId = intval($pArticleId);
+ if ($pArticleId <= 0) {
+ throw new \InvalidArgumentException('p_article_id is required');
+ }
+
+ $rows = Db::name('article_reference_check_result')
+ ->field('id,p_refer_id,reference_no,am_id,status,confidence,is_match,reason,text_start,text_end,updated_at')
+ ->where('p_article_id', $pArticleId)
+ ->order('reference_no asc, id asc')
+ ->select();
+
+ // summary 用数值键,0/1/2/3 对应 PROGRESS_* 常量
+ $summary = [
+ self::PROGRESS_PENDING => 0,
+ self::PROGRESS_CHECKING => 0,
+ self::PROGRESS_COMPLETED => 0,
+ self::PROGRESS_FAILED => 0,
+ ];
+ if (empty($rows)) {
+ return [
+ 'p_article_id' => $pArticleId,
+ 'total_groups' => 0,
+ 'summary' => $summary,
+ 'list' => [],
+ ];
+ }
+
+ $groups = [];
+ foreach ($rows as $row) {
+ $refNo = intval($this->arrGet($row, 'reference_no', 0));
+ $pReferId = intval($this->arrGet($row, 'p_refer_id', 0));
+ if (!isset($groups[$refNo])) {
+ $groups[$refNo] = [
+ 'reference_no' => $refNo,
+ 'p_refer_id' => $pReferId,
+ 'total' => 0,
+ 'pending' => 0,
+ 'done' => 0,
+ 'failed' => 0,
+ 'pass' => 0,
+ 'last_updated_at' => '',
+ 'records' => [],
+ ];
+ }
+ // 同一 reference_no 理论上只对应一个 p_refer_id;如果出现混淆,保留首次出现的非空 id
+ if ($groups[$refNo]['p_refer_id'] <= 0 && $pReferId > 0) {
+ $groups[$refNo]['p_refer_id'] = $pReferId;
+ }
+
+ $groups[$refNo]['total']++;
+ $st = intval($this->arrGet($row, 'status', 0));
+ if ($st === 0) {
+ $groups[$refNo]['pending']++;
+ } elseif ($st === 1) {
+ $groups[$refNo]['done']++;
+ } elseif ($st === 2) {
+ $groups[$refNo]['failed']++;
+ }
+
+ $upd = (string)$this->arrGet($row, 'updated_at', '');
+ if ($upd > $groups[$refNo]['last_updated_at']) {
+ $groups[$refNo]['last_updated_at'] = $upd;
+ }
+
+ $confidence = floatval($this->arrGet($row, 'confidence', 0));
+ $isPass = $confidence >= self::PASS_CONFIDENCE_THRESHOLD;
+ if ($isPass) {
+ $groups[$refNo]['pass']++;
+ }
+
+ $groups[$refNo]['records'][] = [
+ 'check_id' => intval($this->arrGet($row, 'id', 0)),
+ 'am_id' => intval($this->arrGet($row, 'am_id', 0)),
+ 'status' => $st,
+ 'confidence' => $confidence,
+ 'is_pass' => $isPass,
+ 'is_match' => intval($this->arrGet($row, 'is_match', 0)),
+ 'reason' => (string)$this->arrGet($row, 'reason', ''),
+ 'text_start' => intval($this->arrGet($row, 'text_start', 0)),
+ 'text_end' => intval($this->arrGet($row, 'text_end', 0)),
+ 'last_updated_at' => $upd,
+ ];
+ }
+
+ $list = [];
+ foreach ($groups as $g) {
+ $total = $g['total'];
+ $pending = $g['pending'];
+ $failed = $g['failed'];
+ $pass = $g['pass'];
+
+ if ($pending === $total) {
+ $status = self::PROGRESS_PENDING;
+ } elseif ($pending === 0) {
+ $status = $failed > 0 ? self::PROGRESS_FAILED : self::PROGRESS_COMPLETED;
+ } else {
+ $status = self::PROGRESS_CHECKING;
+ }
+
+ // 整体通过校验:分组已全部完成(无 pending、无 failed),且每条 confidence >= 0.65
+ $g['is_pass'] = (
+ $status === self::PROGRESS_COMPLETED
+ && $total > 0
+ && $pass === $total
+ );
+
+ $summary[$status]++;
+ $g['status'] = $status;
+ $list[] = $g;
+ }
+
+ usort($list, function ($a, $b) {
+ return $a['reference_no'] - $b['reference_no'];
+ });
+
+ return [
+ 'p_article_id' => $pArticleId,
+ 'total_groups' => count($list),
+ 'summary' => $summary,
+ 'list' => $list,
+ ];
+ }
+
+ /**
+ * 按 p_refer_id 查这条参考文献的所有校对明细。
+ *
+ * 每条 record 返回:
+ * - am_id 命中的 article_main 主键
+ * - confidence 匹配置信度(0~1)
+ * - reason LLM 给出的判定理由
+ * - is_match 是否匹配(来自 article_reference_check_result.is_match)
+ * - is_pass 是否通过校验(confidence >= PASS_CONFIDENCE_THRESHOLD)
+ *
+ * @param int $pReferId production_article_refer.p_refer_id
+ * @return array{p_refer_id:int, p_article_id:int, reference_no:int, total:int, list:array}
+ */
+ public function getCheckDetailsByPReferId($pReferId)
+ {
+ $pReferId = intval($pReferId);
+ if ($pReferId <= 0) {
+ throw new \InvalidArgumentException('p_refer_id is required');
+ }
+
+ $rows = Db::name('article_reference_check_result')
+ ->field('id,p_article_id,reference_no,am_id,confidence,is_match,reason')
+ ->where('p_refer_id', $pReferId)
+ ->order('id asc')
+ ->select();
+
+ $list = [];
+ $pArticleId = 0;
+ $referenceNo = 0;
+ foreach ($rows as $row) {
+ // 取首条出现的 p_article_id / reference_no 作为该 refer 的上下文
+ if ($pArticleId <= 0) {
+ $pArticleId = intval($this->arrGet($row, 'p_article_id', 0));
+ }
+ if ($referenceNo <= 0) {
+ $referenceNo = intval($this->arrGet($row, 'reference_no', 0));
+ }
+
+ $confidence = floatval($this->arrGet($row, 'confidence', 0));
+ $list[] = [
+ 'am_id' => intval($this->arrGet($row, 'am_id', 0)),
+ 'confidence' => $confidence,
+ 'reason' => (string)$this->arrGet($row, 'reason', ''),
+ 'is_match' => intval($this->arrGet($row, 'is_match', 0)),
+ 'is_pass' => $confidence >= self::PASS_CONFIDENCE_THRESHOLD,
+ ];
+ }
+
+ return [
+ 'p_refer_id' => $pReferId,
+ 'p_article_id' => $pArticleId,
+ 'reference_no' => $referenceNo,
+ 'total' => count($list),
+ 'list' => $list,
+ ];
+ }
+
+ public function resetAndRecheckByArticle($aProductionArticle)
+ {
+ if (empty($aProductionArticle) || !is_array($aProductionArticle)) {
+ throw new \InvalidArgumentException('production_article is required');
+ }
+ $pArticleId = intval($this->arrGet($aProductionArticle, 'p_article_id', 0));
+ $articleId = intval($this->arrGet($aProductionArticle, 'article_id', 0));
+ if ($pArticleId <= 0 || $articleId <= 0) {
+ throw new \InvalidArgumentException('production_article requires both p_article_id and article_id');
+ }
+
+ $existing = Db::name('article_reference_check_result')
+ ->where('p_article_id', $pArticleId)
+ ->count();
+ if (intval($existing) <= 0) {
+ throw new \RuntimeException('no existing reference check records for p_article_id=' . $pArticleId);
+ }
+
+ $cleared = $this->clearArticleChecks($articleId);
+ $enqueueResult = $this->enqueueByArticle($articleId);
+
+ if (!is_array($enqueueResult)) {
+ $enqueueResult = [];
+ }
+ $enqueueResult['cleared'] = $cleared;
+ $enqueueResult['reset'] = 1;
+ return $enqueueResult;
}
public static function amStatusLabel($status)
@@ -571,7 +1149,7 @@ class ReferenceCheckService
// 1) 先标记 blue 内各序号(在原文上操作,[70-73] 仅标不合理者如 70、71)
preg_match_all(
- '/\[([\d,\-\s]+)\]<\/blue>/',
+ self::BLUE_TAG_REGEX,
$html,
$matches,
PREG_OFFSET_CAPTURE
@@ -619,7 +1197,9 @@ class ReferenceCheckService
$tagClass = !empty($badNums) ? ' ref-cite-error' : '';
$groupIds = !empty($badNums)
- ? implode(',', array_map('intval', array_column($badNums, 'check_id')))
+ ? implode(',', array_map(function ($row) {
+ return (int) $this->resolveCheckRowId($row);
+ }, $badNums))
: '';
$newHtml = '[' . $innerMarked . ']';
@@ -718,13 +1298,388 @@ class ReferenceCheckService
$parts[] = ucfirst($f) . ': ' . $v;
}
}
+ $frag = trim((string)$this->arrGet($refer, 'refer_frag', ''));
$content = trim((string)$this->arrGet($refer, 'refer_content', ''));
- if ($content !== '') {
+ if ($frag !== '') {
+ $parts[] = 'Reference: ' . $frag;
+ } elseif ($content !== '') {
$parts[] = 'Reference: ' . $content;
}
return implode("\n", $parts);
}
+ /**
+ * 前端修改参考文献后重新校对:仅处理已有校对记录,刷新 refer_text、重置结果并入队;无记录直接返回
+ *
+ * @param int $articleId
+ * @param int $pReferId t_production_article_refer.p_refer_id(优先)
+ * @param int $referenceNo 文献序号 index+1(无 p_refer_id 时用)
+ * @return array
+ */
+ /**
+ * 编辑某条文献内容后,按 p_refer_id 异步重新校对该文献对应的全部 check 明细
+ *
+ * 流程:刷新 refer_text/refer_index → 重置 status/is_match/confidence/reason
+ * → 设节级 ref_check_status=RUNNING → 投递到 ReferenceCheck 队列
+ *
+ * 与 recheckByRefer 的差异:本方法**不**在请求内同步跑 LLM,仅入队,立即返回。
+ * 前端可调 getProgressByPArticleId 轮询进度。
+ *
+ * @param int $pReferId t_production_article_refer.p_refer_id(必填)
+ * @param int $pArticleId 可选:传入跳过 refer 表二次查表
+ * @return array{p_refer_id:int, p_article_id:int, reference_no:int, reset:int, queued:int, check_ids:int[], queue:string}
+ */
+ public function enqueueRecheckByPReferId($pReferId, $pArticleId = 0)
+ {
+ $pReferId = intval($pReferId);
+ if ($pReferId <= 0) {
+ throw new \InvalidArgumentException('p_refer_id is required');
+ }
+
+ $refer = Db::name('production_article_refer')
+ ->where('p_refer_id', $pReferId)
+ ->where('state', 0)
+ ->find();
+ if (empty($refer)) {
+ throw new \RuntimeException('production_article_refer not found, p_refer_id=' . $pReferId);
+ }
+
+ $pArticleId = intval($pArticleId);
+ if ($pArticleId <= 0) {
+ $pArticleId = intval($this->arrGet($refer, 'p_article_id', 0));
+ }
+ if ($pArticleId <= 0) {
+ throw new \RuntimeException('p_article_id is missing for p_refer_id=' . $pReferId);
+ }
+
+ $referenceNo = intval($this->arrGet($refer, 'index', 0)) + 1;
+ $referText = $this->formatReferForLlm($refer);
+ $now = date('Y-m-d H:i:s');
+
+ $rows = Db::name('article_reference_check_result')
+ ->where('p_article_id', $pArticleId)
+ ->where('p_refer_id', $pReferId)
+ ->select();
+
+ if (empty($rows)) {
+ return [
+ 'p_refer_id' => $pReferId,
+ 'p_article_id' => $pArticleId,
+ 'reference_no' => $referenceNo,
+ 'reset' => 0,
+ 'queued' => 0,
+ 'check_ids' => [],
+ 'queue' => self::QUEUE_NAME,
+ ];
+ }
+
+ $resetFields = [
+ 'refer_text' => $referText,
+ 'refer_index' => $referenceNo,
+ 'reference_no' => $referenceNo,
+ 'status' => 0,
+ 'is_match' => 0,
+ 'can_support' => 0,
+ 'confidence' => 0,
+ 'reason' => '',
+ 'error_msg' => '',
+ 'updated_at' => $now,
+ ];
+
+ $pendingJobs = [];
+ $amIds = [];
+ foreach ($rows as $row) {
+ $checkId = $this->resolveCheckRowId($row);
+ Db::name('article_reference_check_result')->where('id', $checkId)->update($resetFields);
+ // 旧的队列完成标记必须清掉,否则同 check_id 再次投递会被 acquireLock 静默丢弃
+ $this->clearReferenceCheckQueueLock($checkId);
+ $pendingJobs[] = [
+ 'check_id' => $checkId,
+ 'reference_no' => $referenceNo,
+ 'am_id' => intval($this->arrGet($row, 'am_id', 0)),
+ 'text_start' => intval($this->arrGet($row, 'text_start', 0)),
+ ];
+ $amId = intval($this->arrGet($row, 'am_id', 0));
+ if ($amId > 0) {
+ $amIds[$amId] = true;
+ }
+ }
+
+ foreach (array_keys($amIds) as $amId) {
+ $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING);
+ }
+
+ $checkIds = $this->pushJobsSortedByReferenceNo($pendingJobs);
+
+ return [
+ 'p_refer_id' => $pReferId,
+ 'p_article_id' => $pArticleId,
+ 'reference_no' => $referenceNo,
+ 'reset' => count($rows),
+ 'queued' => count($checkIds),
+ 'check_ids' => $checkIds,
+ 'queue' => self::QUEUE_NAME,
+ ];
+ }
+
+ public function recheckByRefer($articleId, $pReferId = 0, $referenceNo = 0)
+ {
+ $articleId = intval($articleId);
+ if ($articleId <= 0) {
+ throw new \InvalidArgumentException('article_id is required');
+ }
+
+ $ctx = $this->resolveReferForRecheck($articleId, intval($pReferId), intval($referenceNo));
+ $refer = $ctx['refer'];
+ $pReferId = $ctx['p_refer_id'];
+ $referenceNo = $ctx['reference_no'];
+ $pArticleId = $ctx['p_article_id'];
+ $referText = $this->formatReferForLlm($refer);
+ $now = date('Y-m-d H:i:s');
+
+ $rows = Db::name('article_reference_check_result')
+ ->where('article_id', $articleId)
+ ->where(function ($query) use ($pReferId, $referenceNo) {
+ $query->where('p_refer_id', $pReferId)->whereOr('reference_no', $referenceNo);
+ })
+ ->select();
+
+ if (empty($rows)) {
+ return [
+ 'article_id' => $articleId,
+ 'p_refer_id' => $pReferId,
+ 'reference_no' => $referenceNo,
+ 'reset' => 0,
+ 'queued' => 0,
+ 'check_ids' => [],
+ 'queue' => self::QUEUE_NAME,
+ ];
+ }
+
+ $resetFields = [
+ 'refer_text' => $referText,
+ 'p_refer_id' => $pReferId,
+ 'p_article_id' => $pArticleId,
+ 'refer_index' => $referenceNo,
+ 'status' => 0,
+ 'is_match' => 0,
+ 'can_support' => 0,
+ 'confidence' => 0,
+ 'reason' => '',
+ 'error_msg' => '',
+ 'updated_at' => $now,
+ ];
+
+ $pendingJobs = [];
+ $amIds = [];
+ foreach ($rows as $row) {
+ $checkId = $this->resolveCheckRowId($row);
+ Db::name('article_reference_check_result')->where('id', $checkId)->update($resetFields);
+ $pendingJobs[] = [
+ 'check_id' => $checkId,
+ 'reference_no' => $referenceNo,
+ 'am_id' => intval($row['am_id']),
+ 'text_start' => intval(isset($row['text_start']) ? $row['text_start'] : 0),
+ ];
+ $amId = intval($row['am_id']);
+ if ($amId > 0) {
+ $amIds[$amId] = true;
+ }
+ }
+
+ foreach (array_keys($amIds) as $amId) {
+ $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING);
+ }
+
+ usort($pendingJobs, function ($a, $b) {
+ if ($a['reference_no'] !== $b['reference_no']) {
+ return $a['reference_no'] - $b['reference_no'];
+ }
+ if ($a['am_id'] !== $b['am_id']) {
+ return $a['am_id'] - $b['am_id'];
+ }
+ return $a['text_start'] - $b['text_start'];
+ });
+
+ $checkIds = [];
+ $results = [];
+ $failed = [];
+ foreach ($pendingJobs as $job) {
+ $checkId = intval($job['check_id']);
+ $checkIds[] = $checkId;
+ $this->clearReferenceCheckQueueLock($checkId);
+ try {
+ $results[] = $this->runReferenceCheckOnce($checkId);
+ } catch (\Exception $e) {
+ $failed[] = [
+ 'check_id' => $checkId,
+ 'error' => $e->getMessage(),
+ ];
+ \think\Log::error('recheckByRefer check_id=' . $checkId . ' ' . $e->getMessage());
+ }
+ }
+
+ foreach (array_keys($amIds) as $amId) {
+ $this->syncAmRefCheckStatus($amId);
+ }
+
+ return [
+ 'article_id' => $articleId,
+ 'p_refer_id' => $pReferId,
+ 'reference_no' => $referenceNo,
+ 'reset' => count($rows),
+ 'checked' => count($results),
+ 'failed' => count($failed),
+ 'check_ids' => $checkIds,
+ 'results' => $results,
+ 'errors' => $failed,
+ ];
+ }
+
+ /**
+ * 清除队列 Redis 完成标记,避免重检任务被 acquireLock 静默丢弃
+ */
+ public function clearReferenceCheckQueueLock($checkId)
+ {
+ $checkId = intval($checkId);
+ if ($checkId <= 0) {
+ return;
+ }
+ try {
+ $keys = [];
+ foreach (['queue_job', 'queue_job_two'] as $prefix) {
+ $class = $prefix === 'queue_job_two'
+ ? 'app\\api\\job\\ReferenceCheckTwo'
+ : 'app\\api\\job\\ReferenceCheck';
+ $base = $prefix . ':' . $class . ':' . $checkId;
+ $keys[] = $base;
+ $keys[] = $base . ':status';
+ }
+ QueueRedis::getInstance()->deleteRedisKeys($keys);
+ } catch (\Exception $e) {
+ \think\Log::warning('clearReferenceCheckQueueLock id=' . $checkId . ' ' . $e->getMessage());
+ }
+ }
+
+ /**
+ * 执行一次引用 LLM 校对(同步,写回 article_reference_check_result)
+ */
+ public function runReferenceCheckOnce($checkId)
+ {
+ $checkId = intval($checkId);
+ $row = Db::name('article_reference_check_result')->where('id', $checkId)->find();
+ if (empty($row)) {
+ throw new \RuntimeException('article_reference_check_result not found, id=' . $checkId);
+ }
+
+ $contentA = $this->resolveMainContentForJob($row);
+ $refer = null;
+ if (intval($row['p_refer_id']) > 0) {
+ $refer = Db::name('production_article_refer')
+ ->where('p_refer_id', intval($row['p_refer_id']))
+ ->where('state', 0)
+ ->find();
+ }
+
+ if ($refer) {
+ $contentB = $this->formatReferForLlm($refer);
+ } else {
+ $contentB = trim((string)$this->arrGet($row, 'refer_text', ''));
+ }
+
+ if ($contentA === '' || $contentB === '') {
+ $this->updateCheckResult($checkId, [
+ 'status' => 2,
+ 'error_msg' => 'Missing article_main.content or refer_text',
+ ]);
+ throw new \RuntimeException('Missing article_main.content or refer_text');
+ }
+
+ $llmResult = (new LLMService())->checkReference($contentA, $contentB, false);
+ $requestFailed = !empty($llmResult['request_failed']);
+ $canSupport = $this->parseLlmCanSupport($llmResult);
+ $confidence = floatval(isset($llmResult['confidence']) ? $llmResult['confidence'] : 0);
+ $reason = isset($llmResult['reason']) ? $llmResult['reason'] : '';
+
+ // LLM 通讯失败:写 status=2(校对失败) + error_msg,抛异常让队列 worker 走 release(30) 重试;
+ // 重试 3 次后 ReferenceCheck::markFailed 会保持 status=2 收尾
+ if ($requestFailed) {
+ $this->updateCheckResult($checkId, [
+ 'confidence' => $confidence,
+ 'reason' => $reason,
+ 'status' => 2,
+ 'error_msg' => $reason,
+ ]);
+ $this->clearReferenceCheckQueueLock($checkId);
+ throw new \RuntimeException($reason !== '' ? $reason : 'LLM request failed');
+ }
+
+ $this->updateCheckResult($checkId, [
+ 'can_support' => $canSupport ? 1 : 0,
+ 'is_match' => $canSupport ? 1 : 0,
+ 'confidence' => $confidence,
+ 'reason' => $reason,
+ 'status' => 1,
+ 'error_msg' => '',
+ ]);
+
+ $this->clearReferenceCheckQueueLock($checkId);
+ $this->maybeEnqueueSecondPass($checkId, $confidence);
+
+ return [
+ 'check_id' => $checkId,
+ 'can_support' => $canSupport ? 1 : 0,
+ 'is_match' => $canSupport ? 1 : 0,
+ 'confidence' => $confidence,
+ 'reason' => $reason,
+ ];
+ }
+
+ /**
+ * @return array{refer: array, p_article_id: int, p_refer_id: int, reference_no: int}
+ */
+ private function resolveReferForRecheck($articleId, $pReferId, $referenceNo)
+ {
+ $prod = Db::name('production_article')
+ ->where('article_id', $articleId)
+ ->whereIn('state', [0, 2])
+ ->find();
+ if (empty($prod)) {
+ throw new \RuntimeException('production_article not found for article_id=' . $articleId);
+ }
+
+ $pArticleId = intval($prod['p_article_id']);
+ $refer = null;
+
+ if ($pReferId > 0) {
+ $refer = Db::name('production_article_refer')
+ ->where('p_refer_id', $pReferId)
+ ->where('p_article_id', $pArticleId)
+ ->where('state', 0)
+ ->find();
+ } elseif ($referenceNo > 0) {
+ $referMap = $this->loadReferMapByPArticleId($pArticleId);
+ $referIndex = $referenceNo - 1;
+ if (isset($referMap[$referIndex])) {
+ $refer = $referMap[$referIndex];
+ $pReferId = intval($refer['p_refer_id']);
+ }
+ } else {
+ throw new \InvalidArgumentException('p_refer_id or reference_no is required');
+ }
+
+ if (empty($refer)) {
+ throw new \RuntimeException('production_article_refer not found');
+ }
+
+ return [
+ 'refer' => $refer,
+ 'p_article_id' => $pArticleId,
+ 'p_refer_id' => intval($refer['p_refer_id']),
+ 'reference_no' => intval($refer['index']) + 1,
+ ];
+ }
+
/**
* 仅使用 refer_doi 字段(二次 Crossref 摘要用)
*/
@@ -804,7 +1759,7 @@ class ReferenceCheckService
return '';
}
- $text = preg_replace('/\[([\d,\-\s]+)\]<\/blue>/', '[$1]', $text);
+ $text = preg_replace(self::BLUE_TAG_REGEX, '[$1]', $text);
$text = strip_tags($text);
$text = html_entity_decode($text, ENT_QUOTES | ENT_HTML5, 'UTF-8');
$text = preg_replace('/\s+/u', ' ', $text);
@@ -1163,6 +2118,7 @@ class ReferenceCheckService
return false;
}
+ $this->clearReferenceCheckQueueLock($checkId);
$this->pushJob2($checkId, 5);
return true;
}
@@ -1173,7 +2129,7 @@ class ReferenceCheckService
public function extractReferences($content)
{
$result = [];
- preg_match_all('/\[([\d,\-\s]+)\]<\/blue>/', $content, $matches, PREG_OFFSET_CAPTURE);
+ preg_match_all(self::BLUE_TAG_REGEX, $content, $matches, PREG_OFFSET_CAPTURE);
if (empty($matches[0])) {
return [];
}
@@ -1353,7 +2309,7 @@ class ReferenceCheckService
private function buildCitationContextText($content, $start, $end)
{
$text = $this->byteSubstr($content, $start, $end);
- $text = preg_replace('/\[[\d,\-\s]+\]<\/blue>/', '', $text);
+ $text = preg_replace(self::BLUE_TAG_REGEX, '', $text);
$text = trim(strip_tags($text));
$text = preg_replace('/\s+/u', ' ', $text);
$text = ltrim($text, "\xEF\xBB\xBF");
@@ -1493,8 +2449,7 @@ class ReferenceCheckService
$start = $tagStart - $maxBytes;
$slice = substr($content, $start, $tagStart - $start);
if (preg_match('/[.!?。!?]\s+/u', $slice, $m, PREG_OFFSET_CAPTURE)) {
- $last = end($m[0]);
- $rel = $last[1] + strlen($last[0]);
+ $rel = $m[0][1] + strlen($m[0][0]);
return $start + $rel;
}
@@ -1540,7 +2495,7 @@ class ReferenceCheckService
}
$gap = substr($content, $tagEnd, $end - $tagEnd);
- $gapText = trim(strip_tags(preg_replace('/\[[\d,\-\s]+\]<\/blue>/', '', $gap)));
+ $gapText = trim(strip_tags(preg_replace(self::BLUE_TAG_REGEX, '', $gap)));
if ($gapText !== '' && !$this->isOnlyPunctuationOrSpace($gapText)) {
return $end;
}
@@ -1551,8 +2506,47 @@ class ReferenceCheckService
return $length;
}
+ /**
+ * 已入库记录按文献编号正序入队(同号按 am_id、正文位置稳定排序)
+ *
+ * @param array $rows 元素含 check_id、reference_no,可选 am_id、text_start
+ */
+ private function pushJobsSortedByReferenceNo(array $rows)
+ {
+ if (empty($rows)) {
+ return [];
+ }
+
+ usort($rows, function ($a, $b) {
+ if ($a['reference_no'] !== $b['reference_no']) {
+ return $a['reference_no'] - $b['reference_no'];
+ }
+ $amA = isset($a['am_id']) ? intval($a['am_id']) : 0;
+ $amB = isset($b['am_id']) ? intval($b['am_id']) : 0;
+ if ($amA !== $amB) {
+ return $amA - $amB;
+ }
+ $posA = isset($a['text_start']) ? intval($a['text_start']) : 0;
+ $posB = isset($b['text_start']) ? intval($b['text_start']) : 0;
+ return $posA - $posB;
+ });
+
+ $checkIds = [];
+ $delay = 0;
+ foreach ($rows as $row) {
+ $checkId = intval($row['check_id']);
+ $checkIds[] = $checkId;
+ $this->pushJob($checkId, $delay);
+ $delay++;
+ }
+
+ return $checkIds;
+ }
+
private function pushJob($checkId, $delaySeconds = 0)
{
+ $checkId = intval($checkId);
+ $this->clearReferenceCheckQueueLock($checkId);
$jobClass = 'app\api\job\ReferenceCheck@fire';
$data = ['check_id' => $checkId];
try {
diff --git a/application/common/service/LLMService.php b/application/common/service/LLMService.php
index 01a755df..69f5e61c 100644
--- a/application/common/service/LLMService.php
+++ b/application/common/service/LLMService.php
@@ -32,11 +32,14 @@ class LLMService
*/
public function checkReference($contextText, $referText, $isAgain = false, $doiBlock = null)
{
+ // request_failed=true 表示"LLM 通讯/解析层面的失败"(可重试,区别于业务上的"未命中");
+ // 上游 runReferenceCheckOnce 会据此把 DB.status 置为 2(失败) 并抛异常触发队列重试
$fallback = [
- 'can_support' => false,
- 'is_match' => false,
- 'confidence' => 0.0,
- 'reason' => 'LLM not configured or request failed',
+ 'can_support' => false,
+ 'is_match' => false,
+ 'confidence' => 0.0,
+ 'reason' => 'LLM not configured or request failed',
+ 'request_failed' => true,
];
if ($this->url === '' || $this->model === '') {
\think\Log::warning('ReferenceCheck LLM: url or model not configured');
@@ -47,6 +50,7 @@ class LLMService
$referText = trim($referText);
$doiBlock = trim((string)$doiBlock);
if ($contextText === '' || $referText === '') {
+ // 空文本是入参问题,不是 LLM 故障,不需要重试
return [
'can_support' => false,
'is_match' => false,
@@ -149,10 +153,10 @@ class LLMService
你是文献引用校对助手。判断【正文全文】与【参考文献书目】是否相关、能否用于支撑正文中的引用。
【核心原则:从宽判断,避免误杀】
-默认倾向 can_support=true。只要文献与正文不是「驴唇不对马嘴」,即判为相关、能支撑。
+默认倾向 can_support=true。只要文献与正文不是「风马牛不相及」,即判为相关、能支撑。
不要求变量一致、不要求结论逐条对应、不要求研究设计相同。
-【仅当以下情况才判 can_support=false(驴唇不对马嘴)】
+【仅当以下情况才判 can_support=false(与正文明显无关)】
- 学科/主题完全无关(如正文讲深度学习聚类,文献是糖尿病步态检测)。
- 明显张冠李戴(正文断言 A 疗法的效果,文献研究的是完全不同的 B 问题且无关联)。
- 文献条目与正文讨论的对象/场景毫无交集,且无法作背景或理论引用。
@@ -164,7 +168,7 @@ class LLMService
【confidence 固定档位(禁止其它小数)】
can_support=true:0.65(有关联但较泛)/ 0.78 / 0.85 / 0.92 / 0.98(非常确定相关)
-can_support=false:0.15(明确驴唇不对马嘴)/ 0.25 / 0.35 / 0.45(仅当实在无法建立任何合理关联)
+can_support=false:0.15(明确风马牛不相及)/ 0.25 / 0.35 / 0.45(仅当实在无法建立任何合理关联)
【输出】仅一行 minified JSON,无 markdown:
{"can_support":true|false,"is_match":true|false,"confidence":0.15|0.25|0.35|0.45|0.65|0.78|0.85|0.92|0.98,"reason":"30-80字简体中文"}
@@ -176,7 +180,7 @@ PROMPT;
{
return "【正文全文 article_main.content】\n" . $contextText
. "\n\n【参考文献书目 refer_text】\n" . $referText
- . "\n\n请从宽判断:非驴唇不对马嘴即 can_support=true,只返回 JSON。";
+ . "\n\n请从宽判断:文献与正文非风马牛不相即可判 can_support=true,只返回 JSON。";
}
/** 第二次校对:Crossref 摘要(Refer_doi) */
@@ -186,12 +190,12 @@ PROMPT;
你是文献引用二次校对助手。已根据 Refer_doi 从 Crossref(https://api.crossref.org/works/)获取摘要,请结合【正文全文】复核该文献是否相关。
【核心原则:与第一次相同,从宽判断】
-默认倾向 can_support=true。只要 Crossref 摘要(或书目)与正文不是驴唇不对马嘴,即判相关、能支撑。
+默认倾向 can_support=true。只要 Crossref 摘要(或书目)与正文不是风马牛不相及,即判相关、能支撑。
以【Crossref 摘要】为准;摘要与书目冲突时以摘要为准。
【仅当以下情况才判 can_support=false】
- 摘要显示的研究主题/对象/方法与正文讨论内容完全风马牛不相及。
-- 典型驴唇不对马嘴、张冠李戴,且无法解释为背景或泛化引用。
+- 典型风马牛不相及、张冠李戴,且无法解释为背景或泛化引用。
【以下情况均应 can_support=true】
- 摘要与正文属同领域或相近方向,能作背景、理论或方向性支撑。
@@ -217,7 +221,7 @@ PROMPT;
. "\n\n【参考文献书目 refer_text】\n" . $referText
. "\n\n【Crossref 摘要】(Refer_doi → api.crossref.org/works/)\n"
. ($doiBlock !== '' ? $doiBlock : '(未获取到摘要,请结合 refer_text 从宽判断)')
- . "\n\n非驴唇不对马嘴即 can_support=true,只返回 JSON。";
+ . "\n\n文献与正文非风马牛不相即可判 can_support=true,只返回 JSON。";
}
private function buildReferenceCheckSystemPrompt3()
{