diff --git a/application/common/ReferenceCheckService.php b/application/common/ReferenceCheckService.php index 65ee9abc..d56c0af0 100644 --- a/application/common/ReferenceCheckService.php +++ b/application/common/ReferenceCheckService.php @@ -193,7 +193,7 @@ class ReferenceCheckService 'created_at' => $now, 'updated_at' => $now, ]); -// continue; + continue; $this->pushJob(intval($checkId), $delay); $checkIds[] = $checkId; $queued++; @@ -677,6 +677,9 @@ class ReferenceCheckService return $result; } + /** 与上一引用间距低于此值(字符)时视为同句并列,从整句开头截取而非仅取两标签之间 */ + const CITE_GAP_SENTENCE_THRESHOLD = 60; + /** * 按引用位置截取局部上下文:优先取标签前叙述;同句多引时后续引用从上一标签后开始。 */ @@ -697,32 +700,37 @@ class ReferenceCheckService } $hasPriorCiteInParagraph = ($prevTagEnd > $paragraphStart); - // 同段后续引用:从上一 后开始;段内首个引用:从段落开头到本标签前 if ($hasPriorCiteInParagraph) { - $localStart = $prevTagEnd; + $gapText = $this->buildCitationContextText($content, $prevTagEnd, $tagStart); + // 如 motivation [23] and external environment [24]:间距短,取整句而非仅 “and external environment” + if (mb_strlen($gapText) < self::CITE_GAP_SENTENCE_THRESHOLD) { + $sentenceStart = $this->findSentenceStart($content, $tagStart); + $localStart = $this->capContextStartBeforeTag( + $content, + $tagStart, + max($paragraphStart, $sentenceStart) + ); + } else { + // 如 … Yin et al. [13] on oncology nurses, but … Yang [14]:间距较长,取上一标签后至本标签前 + $localStart = $prevTagEnd; + } } else { - $sentenceStart = $this->findSentenceStart($content, $tagStart); - $localStart = $this->capContextStartBeforeTag( - $content, - $tagStart, - max($paragraphStart, $sentenceStart) - ); + $localStart = $this->capContextStartBeforeTag($content, $tagStart, $paragraphStart); } - // 默认:引用标签前的论述 + // 默认:引用标签前的论述(如 Yin et al. [13] → 含 “higher than … Yin et al.”) $localEnd = $tagStart; $originalText = $this->buildCitationContextText($content, $localStart, $localEnd); - // 同句多引(如 …[23] and external environment [24]):上一标签后仅几个词,回退到本句开头 - if ($hasPriorCiteInParagraph && mb_strlen($originalText) < 50) { - $sentenceStart = $this->findSentenceStart($content, $tagStart); - $localStart = max($paragraphStart, $sentenceStart); - $originalText = $this->buildCitationContextText($content, $localStart, $localEnd); - } - - // 仅段内首个引用且标签前极短时才用标签后文(避免 [24] 误截到 [25] 所在句) + // 标签前几乎无正文(如句末 … ICU nurses [14])→ 改用标签后至下一引用或句末 if (!$this->isMeaningfulCitationContext($originalText) - || (!$hasPriorCiteInParagraph && $this->shouldUseTrailingCitationContext($content, $localStart, $tagStart, $tagEnd)) + || $this->shouldUseTrailingCitationContext( + $content, + $localStart, + $tagStart, + $tagEnd, + $hasPriorCiteInParagraph + ) ) { $trailEnd = ($nextTagStart < $sentenceEnd) ? $nextTagStart : $sentenceEnd; $trailText = $this->buildCitationContextText($content, $tagEnd, $trailEnd); @@ -749,12 +757,23 @@ class ReferenceCheckService /** * 标签前仅有作者缩写等极短片段时,改用标签后上下文 + * + * @param bool $hasPriorCiteInParagraph 同段多引时,短片段常为并列成分,不应误取标签后下一句 */ - private function shouldUseTrailingCitationContext($content, $localStart, $tagStart, $tagEnd) - { + private function shouldUseTrailingCitationContext( + $content, + $localStart, + $tagStart, + $tagEnd, + $hasPriorCiteInParagraph = false + ) { $before = $this->buildCitationContextText($content, $localStart, $tagStart); if (!$this->isMeaningfulCitationContext($before)) { - return true; + return !$hasPriorCiteInParagraph; + } + + if ($hasPriorCiteInParagraph) { + return false; } return mb_strlen($before) < 25; @@ -823,7 +842,7 @@ class ReferenceCheckService $text = trim(strip_tags($text)); $text = preg_replace('/\s+/u', ' ', $text); $text = ltrim($text, "\xEF\xBB\xBF"); - $text = preg_replace('/^[\s.,。;、:!?]+/u', '', $text); + $text = preg_replace('/^[\s.!?。!?,,、;:]+/u', '', $text); return $text; }