Changes
This commit is contained in:
@@ -61,7 +61,11 @@ class ReferenceCheck
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
$contentA = trim((string)(isset($row['origin_text']) ? $row['origin_text'] : ''));
|
$mainInfo = Db::name('article_main')->where('am_id', $row['am_id'])->find();
|
||||||
|
$contentA = trim($mainInfo['content']);//trim((string)(isset($row['origin_text']) ? $row['origin_text'] : ''));
|
||||||
|
if ($contentA === '' && !empty($row['content_a'])) {
|
||||||
|
$contentA = trim((string)$row['content_a']);
|
||||||
|
}
|
||||||
$contentB = trim((string)(isset($row['refer_text']) ? $row['refer_text'] : ''));
|
$contentB = trim((string)(isset($row['refer_text']) ? $row['refer_text'] : ''));
|
||||||
|
|
||||||
if ($contentB === '' && intval($row['p_refer_id']) > 0) {
|
if ($contentB === '' && intval($row['p_refer_id']) > 0) {
|
||||||
@@ -83,15 +87,10 @@ class ReferenceCheck
|
|||||||
$llm = new LLMService();
|
$llm = new LLMService();
|
||||||
$llmResult = $llm->checkReference($contentA, $contentB);
|
$llmResult = $llm->checkReference($contentA, $contentB);
|
||||||
$isMatch = !empty($llmResult['is_match']);
|
$isMatch = !empty($llmResult['is_match']);
|
||||||
$confidence = $llm->enforceReferenceCheckConfidence(
|
|
||||||
isset($llmResult['confidence']) ? $llmResult['confidence'] : 0,
|
|
||||||
$isMatch
|
|
||||||
);
|
|
||||||
|
|
||||||
|
|
||||||
Db::name('article_reference_check_result')->where('id', $checkId)->update([
|
Db::name('article_reference_check_result')->where('id', $checkId)->update([
|
||||||
'is_match' => $isMatch ? 1 : 0,
|
'is_match' => $isMatch ? 1 : 0,
|
||||||
'confidence' => $confidence,
|
'confidence' => $llmResult['confidence'],
|
||||||
'reason' => $llmResult['reason'],
|
'reason' => $llmResult['reason'],
|
||||||
'status' => 1,
|
'status' => 1,
|
||||||
'error_msg' => '',
|
'error_msg' => '',
|
||||||
@@ -106,7 +105,7 @@ class ReferenceCheck
|
|||||||
$job->delete();
|
$job->delete();
|
||||||
$this->oQueueJob->log("任务执行成功 | 日志ID: {$sRedisKey}");
|
$this->oQueueJob->log("任务执行成功 | 日志ID: {$sRedisKey}");
|
||||||
} catch (\Exception $e) {
|
} catch (\Exception $e) {
|
||||||
var_dump($e->getMessage());
|
$this->oQueueJob->log('ReferenceCheck error: ' . $e->getMessage());
|
||||||
if ($job->attempts() >= 3) {
|
if ($job->attempts() >= 3) {
|
||||||
$this->markFailed($checkId, $e->getMessage());
|
$this->markFailed($checkId, $e->getMessage());
|
||||||
$job->delete();
|
$job->delete();
|
||||||
|
|||||||
@@ -132,7 +132,7 @@ class ReferenceCheckService
|
|||||||
$this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING);
|
$this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING);
|
||||||
}
|
}
|
||||||
public function checkOne(){
|
public function checkOne(){
|
||||||
$this->pushJob(intval(722), 0);
|
$this->pushJob(intval(724), 0);
|
||||||
}
|
}
|
||||||
public function enqueueByArticle($articleId){
|
public function enqueueByArticle($articleId){
|
||||||
if ($articleId <= 0) {
|
if ($articleId <= 0) {
|
||||||
@@ -196,7 +196,7 @@ class ReferenceCheckService
|
|||||||
'created_at' => $now,
|
'created_at' => $now,
|
||||||
'updated_at' => $now,
|
'updated_at' => $now,
|
||||||
]);
|
]);
|
||||||
continue;
|
|
||||||
$this->pushJob(intval($checkId), $delay);
|
$this->pushJob(intval($checkId), $delay);
|
||||||
$checkIds[] = $checkId;
|
$checkIds[] = $checkId;
|
||||||
$queued++;
|
$queued++;
|
||||||
@@ -205,7 +205,6 @@ class ReferenceCheckService
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
foreach (array_keys($amIdsWithJobs) as $amId) {
|
foreach (array_keys($amIdsWithJobs) as $amId) {
|
||||||
$this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING);
|
$this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING);
|
||||||
}
|
}
|
||||||
@@ -680,9 +679,6 @@ class ReferenceCheckService
|
|||||||
return $result;
|
return $result;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** 与上一引用间距低于此值(字符)时视为同句并列,从整句开头截取而非仅取两标签之间 */
|
|
||||||
const CITE_GAP_SENTENCE_THRESHOLD = 60;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 按引用位置截取局部上下文:优先取标签前叙述;同句多引时后续引用从上一标签后开始。
|
* 按引用位置截取局部上下文:优先取标签前叙述;同句多引时后续引用从上一标签后开始。
|
||||||
*/
|
*/
|
||||||
@@ -703,38 +699,25 @@ class ReferenceCheckService
|
|||||||
}
|
}
|
||||||
|
|
||||||
$hasPriorCiteInParagraph = ($prevTagEnd > $paragraphStart);
|
$hasPriorCiteInParagraph = ($prevTagEnd > $paragraphStart);
|
||||||
|
$sentenceStart = $this->findSentenceStart($content, $tagStart);
|
||||||
|
|
||||||
|
// 段内首个引用:整段到标签前;后续引用:取「本句」起点(可早于上一标签),避免只剩 “and external environment” 再误用标签后文本
|
||||||
if ($hasPriorCiteInParagraph) {
|
if ($hasPriorCiteInParagraph) {
|
||||||
$gapText = $this->buildCitationContextText($content, $prevTagEnd, $tagStart);
|
$localStart = max($paragraphStart, $sentenceStart);
|
||||||
// 如 motivation [23] and external environment [24]:间距短,取整句而非仅 “and external environment”
|
|
||||||
if (mb_strlen($gapText) < self::CITE_GAP_SENTENCE_THRESHOLD) {
|
|
||||||
$sentenceStart = $this->findSentenceStart($content, $tagStart);
|
|
||||||
$localStart = $this->capContextStartBeforeTag(
|
|
||||||
$content,
|
|
||||||
$tagStart,
|
|
||||||
max($paragraphStart, $sentenceStart)
|
|
||||||
);
|
|
||||||
} else {
|
|
||||||
// 如 … Yin et al. [13] on oncology nurses, but … Yang [14]:间距较长,取上一标签后至本标签前
|
|
||||||
$localStart = $prevTagEnd;
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
$localStart = $this->capContextStartBeforeTag($content, $tagStart, $paragraphStart);
|
$localStart = $this->capContextStartBeforeTag($content, $tagStart, $paragraphStart);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 默认:引用标签前的论述(如 Yin et al. [13] → 含 “higher than … Yin et al.”)
|
// 默认:引用标签前的论述
|
||||||
$localEnd = $tagStart;
|
$localEnd = $tagStart;
|
||||||
$originalText = $this->buildCitationContextText($content, $localStart, $localEnd);
|
$originalText = $this->buildCitationContextText($content, $localStart, $localEnd);
|
||||||
|
|
||||||
// 标签前几乎无正文(如句末 … ICU nurses [14])→ 改用标签后至下一引用或句末
|
// 仅段内首个引用、且标签前极短(如句末 ICU nurses [14])时,才改用标签后片段;同段多引禁止标签后截取(会错取下一句)
|
||||||
if (!$this->isMeaningfulCitationContext($originalText)
|
$allowTrailing = !$hasPriorCiteInParagraph;
|
||||||
|| $this->shouldUseTrailingCitationContext(
|
if ($allowTrailing && (
|
||||||
$content,
|
!$this->isMeaningfulCitationContext($originalText)
|
||||||
$localStart,
|
|| $this->shouldUseTrailingCitationContext($content, $localStart, $tagStart, $tagEnd)
|
||||||
$tagStart,
|
)) {
|
||||||
$tagEnd,
|
|
||||||
$hasPriorCiteInParagraph
|
|
||||||
)
|
|
||||||
) {
|
|
||||||
$trailEnd = ($nextTagStart < $sentenceEnd) ? $nextTagStart : $sentenceEnd;
|
$trailEnd = ($nextTagStart < $sentenceEnd) ? $nextTagStart : $sentenceEnd;
|
||||||
$trailText = $this->buildCitationContextText($content, $tagEnd, $trailEnd);
|
$trailText = $this->buildCitationContextText($content, $tagEnd, $trailEnd);
|
||||||
if ($this->isMeaningfulCitationContext($trailText)) {
|
if ($this->isMeaningfulCitationContext($trailText)) {
|
||||||
@@ -760,23 +743,12 @@ class ReferenceCheckService
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* 标签前仅有作者缩写等极短片段时,改用标签后上下文
|
* 标签前仅有作者缩写等极短片段时,改用标签后上下文
|
||||||
*
|
|
||||||
* @param bool $hasPriorCiteInParagraph 同段多引时,短片段常为并列成分,不应误取标签后下一句
|
|
||||||
*/
|
*/
|
||||||
private function shouldUseTrailingCitationContext(
|
private function shouldUseTrailingCitationContext($content, $localStart, $tagStart, $tagEnd)
|
||||||
$content,
|
{
|
||||||
$localStart,
|
|
||||||
$tagStart,
|
|
||||||
$tagEnd,
|
|
||||||
$hasPriorCiteInParagraph = false
|
|
||||||
) {
|
|
||||||
$before = $this->buildCitationContextText($content, $localStart, $tagStart);
|
$before = $this->buildCitationContextText($content, $localStart, $tagStart);
|
||||||
if (!$this->isMeaningfulCitationContext($before)) {
|
if (!$this->isMeaningfulCitationContext($before)) {
|
||||||
return !$hasPriorCiteInParagraph;
|
return true;
|
||||||
}
|
|
||||||
|
|
||||||
if ($hasPriorCiteInParagraph) {
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return mb_strlen($before) < 25;
|
return mb_strlen($before) < 25;
|
||||||
@@ -845,7 +817,6 @@ class ReferenceCheckService
|
|||||||
$text = trim(strip_tags($text));
|
$text = trim(strip_tags($text));
|
||||||
$text = preg_replace('/\s+/u', ' ', $text);
|
$text = preg_replace('/\s+/u', ' ', $text);
|
||||||
$text = ltrim($text, "\xEF\xBB\xBF");
|
$text = ltrim($text, "\xEF\xBB\xBF");
|
||||||
$text = preg_replace('/^[\s.!?。!?,,、;:]+/u', '', $text);
|
|
||||||
|
|
||||||
return $text;
|
return $text;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -20,7 +20,8 @@ class LLMService
|
|||||||
$this->url = trim((string)Env::get('promotion.promotion_llm_url', ''));
|
$this->url = trim((string)Env::get('promotion.promotion_llm_url', ''));
|
||||||
$this->model = trim((string)Env::get('promotion.promotion_llm_model', ''));
|
$this->model = trim((string)Env::get('promotion.promotion_llm_model', ''));
|
||||||
$this->apiKey = trim((string)Env::get('promotion.promotion_llm_api_key', ''));
|
$this->apiKey = trim((string)Env::get('promotion.promotion_llm_api_key', ''));
|
||||||
$this->timeout = max(30, intval(Env::get('promotion.promotion_llm_timeout', 120)));
|
// 引用校对 system 提示词较长,请求常超过 30s,至少 120s
|
||||||
|
$this->timeout = max(120, intval(Env::get('promotion.promotion_llm_timeout', 120)));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -34,9 +35,8 @@ class LLMService
|
|||||||
'confidence' => 0.0,
|
'confidence' => 0.0,
|
||||||
'reason' => 'LLM not configured or request failed',
|
'reason' => 'LLM not configured or request failed',
|
||||||
];
|
];
|
||||||
\think\Log::info('llmUrl:'.$this->url);
|
|
||||||
var_dump("in URL====".$this->url);
|
|
||||||
if ($this->url === '' || $this->model === '') {
|
if ($this->url === '' || $this->model === '') {
|
||||||
|
\think\Log::warning('ReferenceCheck LLM: url or model not configured');
|
||||||
return $fallback;
|
return $fallback;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -73,11 +73,13 @@ class LLMService
|
|||||||
|
|
||||||
$content = $this->postChat($payload);
|
$content = $this->postChat($payload);
|
||||||
if ($content === null) {
|
if ($content === null) {
|
||||||
|
\think\Log::warning('ReferenceCheck LLM: postChat returned null');
|
||||||
return $fallback;
|
return $fallback;
|
||||||
}
|
}
|
||||||
|
|
||||||
$parsed = $this->parseJson($content);
|
$parsed = $this->parseJson($content);
|
||||||
if ($parsed === null) {
|
if ($parsed === null) {
|
||||||
|
\think\Log::warning('ReferenceCheck LLM: parseJson failed, raw=' . mb_substr($content, 0, 500));
|
||||||
return $fallback;
|
return $fallback;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -86,7 +88,11 @@ class LLMService
|
|||||||
$this->normalizeConfidence(isset($parsed['confidence']) ? $parsed['confidence'] : 0),
|
$this->normalizeConfidence(isset($parsed['confidence']) ? $parsed['confidence'] : 0),
|
||||||
$isMatch
|
$isMatch
|
||||||
);
|
);
|
||||||
|
\think\Log::info("confidence:".$confidence,[
|
||||||
|
'is_match' => $isMatch,
|
||||||
|
'confidence' => $confidence,
|
||||||
|
'reason' => $this->cleanReason((string)(isset($parsed['reason']) ? $parsed['reason'] : '')),
|
||||||
|
]);
|
||||||
return [
|
return [
|
||||||
'is_match' => $isMatch,
|
'is_match' => $isMatch,
|
||||||
'confidence' => $confidence,
|
'confidence' => $confidence,
|
||||||
@@ -763,13 +769,14 @@ PROMPT;
|
|||||||
|
|
||||||
$raw = curl_exec($ch);
|
$raw = curl_exec($ch);
|
||||||
if ($raw === false) {
|
if ($raw === false) {
|
||||||
|
\think\Log::warning('ReferenceCheck LLM curl error: ' . curl_error($ch));
|
||||||
curl_close($ch);
|
curl_close($ch);
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
$httpCode = intval(curl_getinfo($ch, CURLINFO_HTTP_CODE));
|
$httpCode = intval(curl_getinfo($ch, CURLINFO_HTTP_CODE));
|
||||||
\think\Log::info('httpCode:'.$httpCode);
|
|
||||||
curl_close($ch);
|
curl_close($ch);
|
||||||
if ($httpCode < 200 || $httpCode >= 300) {
|
if ($httpCode < 200 || $httpCode >= 300) {
|
||||||
|
\think\Log::warning('ReferenceCheck LLM http ' . $httpCode . ': ' . mb_substr((string)$raw, 0, 500));
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -783,8 +790,8 @@ PROMPT;
|
|||||||
if (isset($data['content'])) {
|
if (isset($data['content'])) {
|
||||||
return (string)$data['content'];
|
return (string)$data['content'];
|
||||||
}
|
}
|
||||||
}catch (Exception $exception){
|
} catch (Exception $exception) {
|
||||||
var_dump($exception->getMessage());
|
\think\Log::warning('ReferenceCheck LLM exception: ' . $exception->getMessage());
|
||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
|
|||||||
Reference in New Issue
Block a user