diff --git a/application/api/job/ReferenceCheck.php b/application/api/job/ReferenceCheck.php index 1078b8ca..704d692d 100644 --- a/application/api/job/ReferenceCheck.php +++ b/application/api/job/ReferenceCheck.php @@ -61,7 +61,11 @@ class ReferenceCheck } try { - $contentA = trim((string)(isset($row['origin_text']) ? $row['origin_text'] : '')); + $mainInfo = Db::name('article_main')->where('am_id', $row['am_id'])->find(); + $contentA = trim($mainInfo['content']);//trim((string)(isset($row['origin_text']) ? $row['origin_text'] : '')); + if ($contentA === '' && !empty($row['content_a'])) { + $contentA = trim((string)$row['content_a']); + } $contentB = trim((string)(isset($row['refer_text']) ? $row['refer_text'] : '')); if ($contentB === '' && intval($row['p_refer_id']) > 0) { @@ -83,15 +87,10 @@ class ReferenceCheck $llm = new LLMService(); $llmResult = $llm->checkReference($contentA, $contentB); $isMatch = !empty($llmResult['is_match']); - $confidence = $llm->enforceReferenceCheckConfidence( - isset($llmResult['confidence']) ? $llmResult['confidence'] : 0, - $isMatch - ); - Db::name('article_reference_check_result')->where('id', $checkId)->update([ 'is_match' => $isMatch ? 1 : 0, - 'confidence' => $confidence, + 'confidence' => $llmResult['confidence'], 'reason' => $llmResult['reason'], 'status' => 1, 'error_msg' => '', @@ -106,7 +105,7 @@ class ReferenceCheck $job->delete(); $this->oQueueJob->log("任务执行成功 | 日志ID: {$sRedisKey}"); } catch (\Exception $e) { - var_dump($e->getMessage()); + $this->oQueueJob->log('ReferenceCheck error: ' . $e->getMessage()); if ($job->attempts() >= 3) { $this->markFailed($checkId, $e->getMessage()); $job->delete(); diff --git a/application/common/ReferenceCheckService.php b/application/common/ReferenceCheckService.php index f1903ca4..9aab409e 100644 --- a/application/common/ReferenceCheckService.php +++ b/application/common/ReferenceCheckService.php @@ -132,7 +132,7 @@ class ReferenceCheckService $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING); } public function checkOne(){ - $this->pushJob(intval(722), 0); + $this->pushJob(intval(724), 0); } public function enqueueByArticle($articleId){ if ($articleId <= 0) { @@ -196,7 +196,7 @@ class ReferenceCheckService 'created_at' => $now, 'updated_at' => $now, ]); - continue; + $this->pushJob(intval($checkId), $delay); $checkIds[] = $checkId; $queued++; @@ -205,7 +205,6 @@ class ReferenceCheckService } } } - foreach (array_keys($amIdsWithJobs) as $amId) { $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING); } @@ -680,9 +679,6 @@ class ReferenceCheckService return $result; } - /** 与上一引用间距低于此值(字符)时视为同句并列,从整句开头截取而非仅取两标签之间 */ - const CITE_GAP_SENTENCE_THRESHOLD = 60; - /** * 按引用位置截取局部上下文:优先取标签前叙述;同句多引时后续引用从上一标签后开始。 */ @@ -703,38 +699,25 @@ class ReferenceCheckService } $hasPriorCiteInParagraph = ($prevTagEnd > $paragraphStart); + $sentenceStart = $this->findSentenceStart($content, $tagStart); + + // 段内首个引用:整段到标签前;后续引用:取「本句」起点(可早于上一标签),避免只剩 “and external environment” 再误用标签后文本 if ($hasPriorCiteInParagraph) { - $gapText = $this->buildCitationContextText($content, $prevTagEnd, $tagStart); - // 如 motivation [23] and external environment [24]:间距短,取整句而非仅 “and external environment” - if (mb_strlen($gapText) < self::CITE_GAP_SENTENCE_THRESHOLD) { - $sentenceStart = $this->findSentenceStart($content, $tagStart); - $localStart = $this->capContextStartBeforeTag( - $content, - $tagStart, - max($paragraphStart, $sentenceStart) - ); - } else { - // 如 … Yin et al. [13] on oncology nurses, but … Yang [14]:间距较长,取上一标签后至本标签前 - $localStart = $prevTagEnd; - } + $localStart = max($paragraphStart, $sentenceStart); } else { $localStart = $this->capContextStartBeforeTag($content, $tagStart, $paragraphStart); } - // 默认:引用标签前的论述(如 Yin et al. [13] → 含 “higher than … Yin et al.”) + // 默认:引用标签前的论述 $localEnd = $tagStart; $originalText = $this->buildCitationContextText($content, $localStart, $localEnd); - // 标签前几乎无正文(如句末 … ICU nurses [14])→ 改用标签后至下一引用或句末 - if (!$this->isMeaningfulCitationContext($originalText) - || $this->shouldUseTrailingCitationContext( - $content, - $localStart, - $tagStart, - $tagEnd, - $hasPriorCiteInParagraph - ) - ) { + // 仅段内首个引用、且标签前极短(如句末 ICU nurses [14])时,才改用标签后片段;同段多引禁止标签后截取(会错取下一句) + $allowTrailing = !$hasPriorCiteInParagraph; + if ($allowTrailing && ( + !$this->isMeaningfulCitationContext($originalText) + || $this->shouldUseTrailingCitationContext($content, $localStart, $tagStart, $tagEnd) + )) { $trailEnd = ($nextTagStart < $sentenceEnd) ? $nextTagStart : $sentenceEnd; $trailText = $this->buildCitationContextText($content, $tagEnd, $trailEnd); if ($this->isMeaningfulCitationContext($trailText)) { @@ -760,23 +743,12 @@ class ReferenceCheckService /** * 标签前仅有作者缩写等极短片段时,改用标签后上下文 - * - * @param bool $hasPriorCiteInParagraph 同段多引时,短片段常为并列成分,不应误取标签后下一句 */ - private function shouldUseTrailingCitationContext( - $content, - $localStart, - $tagStart, - $tagEnd, - $hasPriorCiteInParagraph = false - ) { + private function shouldUseTrailingCitationContext($content, $localStart, $tagStart, $tagEnd) + { $before = $this->buildCitationContextText($content, $localStart, $tagStart); if (!$this->isMeaningfulCitationContext($before)) { - return !$hasPriorCiteInParagraph; - } - - if ($hasPriorCiteInParagraph) { - return false; + return true; } return mb_strlen($before) < 25; @@ -845,7 +817,6 @@ class ReferenceCheckService $text = trim(strip_tags($text)); $text = preg_replace('/\s+/u', ' ', $text); $text = ltrim($text, "\xEF\xBB\xBF"); - $text = preg_replace('/^[\s.!?。!?,,、;:]+/u', '', $text); return $text; } diff --git a/application/common/service/LLMService.php b/application/common/service/LLMService.php index 4ffe0cda..d8734596 100644 --- a/application/common/service/LLMService.php +++ b/application/common/service/LLMService.php @@ -20,7 +20,8 @@ class LLMService $this->url = trim((string)Env::get('promotion.promotion_llm_url', '')); $this->model = trim((string)Env::get('promotion.promotion_llm_model', '')); $this->apiKey = trim((string)Env::get('promotion.promotion_llm_api_key', '')); - $this->timeout = max(30, intval(Env::get('promotion.promotion_llm_timeout', 120))); + // 引用校对 system 提示词较长,请求常超过 30s,至少 120s + $this->timeout = max(120, intval(Env::get('promotion.promotion_llm_timeout', 120))); } /** @@ -34,9 +35,8 @@ class LLMService 'confidence' => 0.0, 'reason' => 'LLM not configured or request failed', ]; - \think\Log::info('llmUrl:'.$this->url); - var_dump("in URL====".$this->url); if ($this->url === '' || $this->model === '') { + \think\Log::warning('ReferenceCheck LLM: url or model not configured'); return $fallback; } @@ -73,11 +73,13 @@ class LLMService $content = $this->postChat($payload); if ($content === null) { + \think\Log::warning('ReferenceCheck LLM: postChat returned null'); return $fallback; } $parsed = $this->parseJson($content); if ($parsed === null) { + \think\Log::warning('ReferenceCheck LLM: parseJson failed, raw=' . mb_substr($content, 0, 500)); return $fallback; } @@ -86,7 +88,11 @@ class LLMService $this->normalizeConfidence(isset($parsed['confidence']) ? $parsed['confidence'] : 0), $isMatch ); - + \think\Log::info("confidence:".$confidence,[ + 'is_match' => $isMatch, + 'confidence' => $confidence, + 'reason' => $this->cleanReason((string)(isset($parsed['reason']) ? $parsed['reason'] : '')), + ]); return [ 'is_match' => $isMatch, 'confidence' => $confidence, @@ -763,13 +769,14 @@ PROMPT; $raw = curl_exec($ch); if ($raw === false) { + \think\Log::warning('ReferenceCheck LLM curl error: ' . curl_error($ch)); curl_close($ch); return null; } $httpCode = intval(curl_getinfo($ch, CURLINFO_HTTP_CODE)); - \think\Log::info('httpCode:'.$httpCode); curl_close($ch); if ($httpCode < 200 || $httpCode >= 300) { + \think\Log::warning('ReferenceCheck LLM http ' . $httpCode . ': ' . mb_substr((string)$raw, 0, 500)); return null; } @@ -783,8 +790,8 @@ PROMPT; if (isset($data['content'])) { return (string)$data['content']; } - }catch (Exception $exception){ - var_dump($exception->getMessage()); + } catch (Exception $exception) { + \think\Log::warning('ReferenceCheck LLM exception: ' . $exception->getMessage()); } return null;