diff --git a/application/api/controller/References.php b/application/api/controller/References.php index fbc6b6be..2ca9eca1 100644 --- a/application/api/controller/References.php +++ b/application/api/controller/References.php @@ -1533,7 +1533,7 @@ class References extends Base * POST/GET: p_refer_id(必填) * p_article_id(可选) * - * 仅重跑 status=3(校对失败)的记录;不改动 refer_text,只重置结果字段后入 ReferenceCheck 队列。 + * 仅重跑 status=3(校对失败)的记录;不改动 refer_text,只重置结果字段后入 RabbitMQ 批次队列。 * 返回:p_refer_id、p_article_id、reset、queued、check_ids、queue */ public function referenceCheckRecheckFailedAI() diff --git a/application/api/job/ReferenceCheck.php b/application/api/job/ReferenceCheck.php deleted file mode 100644 index 3977e39e..00000000 --- a/application/api/job/ReferenceCheck.php +++ /dev/null @@ -1,114 +0,0 @@ -oQueueJob = new QueueJob(); - $this->QueueRedis = QueueRedis::getInstance(); - } - - public function fire(Job $job, $data) - { - $this->oQueueJob->init($job); - - $rawBody = empty($job->getRawBody()) ? '' : $job->getRawBody(); - $jobData = empty($rawBody) ? [] : json_decode($rawBody, true); - $jobId = empty($jobData['id']) ? 'unknown' : $jobData['id']; - - $sRedisKey = ''; - $sRedisValue = ''; - - $this->oQueueJob->log("-----------队列任务开始-----------"); - $this->oQueueJob->log("当前任务ID: {$jobId}, 尝试次数: {$job->attempts()}"); - - try { - $checkId = intval(isset($data['check_id']) ? $data['check_id'] : 0); - if ($checkId <= 0 && !empty($jobData['data']['check_id'])) { - $checkId = intval($jobData['data']['check_id']); - } - if ($checkId <= 0) { - $job->delete(); - return; - } - - $row = Db::name('article_reference_check_result')->where('id', $checkId)->find(); - if (empty($row)) { - $job->delete(); - return; - } - - if (intval($row['status']) === ReferenceCheckService::RECORD_COMPLETED) { - $job->delete(); - return; - } - - $sClassName = get_class($this); - $sRedisKey = "queue_job:{$sClassName}:{$checkId}"; - $sRedisValue = uniqid() . '_' . getmypid(); - - $svc = new ReferenceCheckService(); - $svc->clearReferenceCheckQueueLock($checkId); - - if (!$this->oQueueJob->acquireLock($sRedisKey, $sRedisValue, $job)) { - return; - } - - try { - $svc->runReferenceCheckOnce($checkId); - - $amId = intval(isset($row['am_id']) ? $row['am_id'] : 0); - if ($amId > 0) { - $svc->syncAmRefCheckStatus($amId); - } - $this->QueueRedis->finishJob($sRedisKey, 'completed', $this->completedExprie, $sRedisValue); - $job->delete(); - $this->oQueueJob->log("任务执行成功 | 日志ID: {$sRedisKey}"); - } catch (\Exception $e) { - $this->oQueueJob->log('ReferenceCheck error: ' . $e->getMessage()); - if ($job->attempts() >= 3) { - $this->markFailed($checkId, $e->getMessage()); - $job->delete(); - return; - } - $job->release(30); - } - } catch (\RuntimeException $e) { - $this->oQueueJob->handleRetryableException($e, $sRedisKey, $sRedisValue, $job); - } catch (\LogicException $e) { - $this->oQueueJob->handleNonRetryableException($e, $sRedisKey, $sRedisValue, $job); - } catch (\Exception $e) { - $this->oQueueJob->handleRetryableException($e, $sRedisKey, $sRedisValue, $job); - } finally { - $this->oQueueJob->finnal(); - } - } - - private function markFailed($checkId, $msg) - { - $row = Db::name('article_reference_check_result')->where('id', $checkId)->find(); - try { - (new ReferenceCheckService())->updateCheckResult($checkId, [ - 'status' => ReferenceCheckService::RECORD_FAILED, - 'error_msg' => $msg, - ]); - } catch (\Exception $e) { - \think\Log::error('ReferenceCheck markFailed: ' . $e->getMessage()); - } - $amId = empty($row) ? 0 : intval(isset($row['am_id']) ? $row['am_id'] : 0); - if ($amId > 0) { - (new ReferenceCheckService())->syncAmRefCheckStatus($amId); - } - } -} diff --git a/application/api/job/ReferenceCheckTwo.php b/application/api/job/ReferenceCheckTwo.php deleted file mode 100644 index 53cb0939..00000000 --- a/application/api/job/ReferenceCheckTwo.php +++ /dev/null @@ -1,162 +0,0 @@ -oQueueJob = new QueueJob(); - $this->QueueRedis = QueueRedis::getInstance(); - } - - public function fire(Job $job, $data) - { - $this->oQueueJob->init($job); - - $rawBody = empty($job->getRawBody()) ? '' : $job->getRawBody(); - $jobData = empty($rawBody) ? [] : json_decode($rawBody, true); - $jobId = empty($jobData['id']) ? 'unknown' : $jobData['id']; - - $sRedisKey = ''; - $sRedisValue = ''; - - $this->oQueueJob->log("-----------队列任务开始-----------"); - $this->oQueueJob->log("当前任务ID: {$jobId}, 尝试次数: {$job->attempts()}"); - - try { - $checkId = intval(isset($data['check_id']) ? $data['check_id'] : 0); - if ($checkId <= 0 && !empty($jobData['data']['check_id'])) { - $checkId = intval($jobData['data']['check_id']); - } - $sClassName = get_class($this); - $sRedisKey = "queue_job_two:{$sClassName}:{$checkId}"; - $sRedisValue = uniqid() . '_' . getmypid(); - - if (!$this->oQueueJob->acquireLock($sRedisKey, $sRedisValue, $job)) { - return; - } - - if ($checkId <= 0) { - $job->delete(); - return; - } - - $row = Db::name('article_reference_check_result')->where('id', $checkId)->find(); - if (empty($row)) { - $job->delete(); - return; - } - -// if (intval($row['status']) === ReferenceCheckService::RECORD_COMPLETED) { -// $job->delete(); -// return; -// } - - try { - $svc = new ReferenceCheckService(); - - $contentA = $svc->resolveMainContentForJob($row); - $referText = trim((string)(isset($row['refer_text']) ? $row['refer_text'] : '')); - $refer = null; - - if (intval($row['p_refer_id']) > 0) { - $refer = Db::name('production_article_refer') - ->where('p_refer_id', intval($row['p_refer_id'])) - ->where('state', 0) - ->find(); - } - - $payload = $svc->prepareRecheckPayload(is_array($refer) ? $refer : [], $referText); - $doiBlock = $payload['doi_block']; - - if ($contentA === '' || $referText === '') { - $this->markFailed($checkId, 'Missing article_main.content or refer_text'); - $job->delete(); - return; - } - $llm = new LLMService(); - $llmResult = $llm->checkReference($contentA, $referText, true, $doiBlock); - - $requestFailed = !empty($llmResult['request_failed']); - $canSupport = $svc->parseLlmCanSupport($llmResult); - $tag = $payload['has_abstract'] - ? ('[Crossref复核' . ($payload['doi_used'] !== '' ? ' ' . $payload['doi_used'] : '') . ']') - : '[Crossref复核-无摘要]'; - $reason = $tag . ' ' . (isset($llmResult['reason']) ? $llmResult['reason'] : ''); - - // LLM 通讯失败:写 status=RECORD_FAILED(3) 并抛异常触发队列重试 - if ($requestFailed) { - $svc->updateCheckResult($checkId, [ - 'confidence' => floatval($llmResult['confidence']), - 'reason' => $reason, - 'status' => ReferenceCheckService::RECORD_FAILED, - 'error_msg' => isset($llmResult['reason']) ? $llmResult['reason'] : 'LLM request failed', - ]); - throw new \RuntimeException(isset($llmResult['reason']) ? $llmResult['reason'] : 'LLM request failed'); - } - - $affected = $svc->updateCheckResult($checkId, [ - 'can_support' => $canSupport ? 1 : 0, - 'is_match' => $canSupport ? 1 : 0, - 'confidence' => floatval($llmResult['confidence']), - 'reason' => $reason, - 'status' => ReferenceCheckService::RECORD_COMPLETED, - 'error_msg' => '', - ]); - $this->oQueueJob->log("Crossref复核写入 id={$checkId} affected={$affected} can_support=" . ($canSupport ? 1 : 0) . " confidence=" . floatval($llmResult['confidence'])); - - $amId = intval(isset($row['am_id']) ? $row['am_id'] : 0); - if ($amId > 0) { - $svc->syncAmRefCheckStatus($amId); - } - $this->QueueRedis->finishJob($sRedisKey, 'completed', $this->completedExprie, $sRedisValue); - $job->delete(); - $this->oQueueJob->log("任务执行成功 | 日志ID: {$sRedisKey}"); - } catch (\Exception $e) { - $this->oQueueJob->log('ReferenceCheckTwo error: ' . $e->getMessage()); - if ($job->attempts() >= 3) { - $this->markFailed($checkId, $e->getMessage()); - $job->delete(); - return; - } - $job->release(30); - } - } catch (\RuntimeException $e) { - $this->oQueueJob->handleRetryableException($e, $sRedisKey, $sRedisValue, $job); - } catch (\LogicException $e) { - $this->oQueueJob->handleNonRetryableException($e, $sRedisKey, $sRedisValue, $job); - } catch (\Exception $e) { - $this->oQueueJob->handleRetryableException($e, $sRedisKey, $sRedisValue, $job); - } finally { - $this->oQueueJob->finnal(); - } - } - - private function markFailed($checkId, $msg) - { - $row = Db::name('article_reference_check_result')->where('id', $checkId)->find(); - try { - (new ReferenceCheckService())->updateCheckResult($checkId, [ - 'status' => ReferenceCheckService::RECORD_FAILED, - 'error_msg' => $msg, - ]); - } catch (\Exception $e) { - \think\Log::error('ReferenceCheckTwo markFailed: ' . $e->getMessage()); - } - $amId = empty($row) ? 0 : intval(isset($row['am_id']) ? $row['am_id'] : 0); - if ($amId > 0) { - (new ReferenceCheckService())->syncAmRefCheckStatus($amId); - } - } -} diff --git a/application/command.php b/application/command.php index 826bb2b2..43892e98 100644 --- a/application/command.php +++ b/application/command.php @@ -9,4 +9,6 @@ // | Author: yunwuxin <448901948@qq.com> // +---------------------------------------------------------------------- -return []; +return [ + 'app\\command\\ReferenceCheckMqConsume', +]; diff --git a/application/common/ReferenceCheckService.php b/application/common/ReferenceCheckService.php index 89ef6b8a..e551a482 100644 --- a/application/common/ReferenceCheckService.php +++ b/application/common/ReferenceCheckService.php @@ -4,16 +4,17 @@ namespace app\common; use think\Db; use think\Env; -use think\Queue; use app\common\service\LLMService; +use app\common\mq\ReferenceCheckMqPublisher; /** * 正文 <blue>[n]</blue> 引用与 t_production_article_refer(index+1=n)相关性校对。 - * LLM 配置与 PromotionLlmService 相同;单条任务走 ReferenceCheck 队列。 + * LLM 配置与 PromotionLlmService 相同;异步任务走 RabbitMQ(一篇一条消息)。 */ class ReferenceCheckService { - const QUEUE_NAME = 'ReferenceCheck'; + /** API 返回:异步传输方式(RabbitMQ 文章批次) */ + const TRANSPORT_RABBITMQ = 'rabbitmq'; /** t_article_main.type */ const MAIN_TYPE_TEXT = 0; @@ -29,6 +30,9 @@ class ReferenceCheckService /** @var bool|null t_article_main 是否已有 ref_check_status 列 */ private static $amRefCheckStatusColumnExists = null; + /** 单条任务最多重试次数(不含首次执行) */ + const QUEUE_MAX_RETRY = 1; + /** * 引用校对状态(生命周期顺序:0→1→2→3 = 待→进行→完成→失败) * @@ -56,6 +60,12 @@ class ReferenceCheckService const RECORD_COMPLETED = 2; // 校对完成 const RECORD_FAILED = 3; // 校对失败 + /** 队列执行状态(queue_status) */ + const QUEUE_PENDING = 0; // 已入队待执行 + const QUEUE_RUNNING = 1; // worker 正在执行 + const QUEUE_COMPLETED = 2; // 执行完成 + const QUEUE_FAILED = 3; // 最终失败(重试耗尽) + /** LLM 评分(confidence)通过阈值:>= 该值视为"通过" */ const PASS_CONFIDENCE_THRESHOLD = 0.65; @@ -69,6 +79,12 @@ class ReferenceCheckService const BLUE_TAG_REGEX = '/\[([\d,,\-\x{2013}\x{2014}\x{2212}\x{2010}\x{2011}\s]+)\]<\/blue>/u'; const BLUE_TAG_REGEX_BRACKET_OUTSIDE = '/\[([\d,,\-\x{2013}\x{2014}\x{2212}\x{2010}\x{2011}\s]+)<\/blue>\]/u'; + private $logFile; + + public function __construct() + { + $this->logFile = ROOT_PATH . 'runtime' . DS . 'plagiarism_task.log'; + } /** * 兼容无 ?? 的 PHP 版本 */ @@ -77,6 +93,27 @@ class ReferenceCheckService return isset($arr[$key]) ? $arr[$key] : $default; } + /** 新建/重置校对明细时的队列初始字段 */ + private function newCheckRecordFields(array $fields, $queueStatus = self::QUEUE_PENDING, $retryCount = 0) + { + $fields['queue_status'] = intval($queueStatus); + $fields['retry_count'] = max(0, intval($retryCount)); + return $fields; + } + + public function markQueueRuntime($checkId, $queueStatus, $retryCount = null) + { + $checkId = intval($checkId); + if ($checkId <= 0) { + return 0; + } + $fields = ['queue_status' => intval($queueStatus)]; + if ($retryCount !== null) { + $fields['retry_count'] = max(0, intval($retryCount)); + } + return Db::name('article_reference_check_result')->where('id', $checkId)->update($fields); + } + /** * 合并匹配两种 blue 引用排版,按在正文中的起始位置排序。 * @@ -128,7 +165,7 @@ class ReferenceCheckService } $now = date('Y-m-d H:i:s'); - $checkId = Db::name('article_reference_check_result')->insertGetId([ + $checkId = Db::name('article_reference_check_result')->insertGetId($this->newCheckRecordFields([ 'article_id' => intval($this->arrGet($extra, 'article_id', 0)), 'am_id' => intval($this->arrGet($extra, 'am_id', 0)), 'p_article_id' => intval($this->arrGet($extra, 'p_article_id', 0)), @@ -145,14 +182,14 @@ class ReferenceCheckService 'status' => 0, 'created_at' => $now, 'updated_at' => $now, - ]); + ])); $amId = intval($this->arrGet($extra, 'am_id', 0)); if ($amId > 0) { $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING); } - $this->pushJob(intval($checkId), intval($this->arrGet($extra, 'queue_delay', 0))); + $this->startArticleCheckQueue([intval($checkId)], intval($this->arrGet($extra, 'p_article_id', 0)), 'enqueue'); return ['check_id' => $checkId, 'queued' => 1]; } @@ -190,7 +227,8 @@ class ReferenceCheckService } $skipped = 0; - $delay = 0; + $pendingJobs = []; + $now = date('Y-m-d H:i:s'); foreach ($citations as $cite) { foreach ($cite['reference_numbers'] as $refNo) { $referIndex = $refNo - 1; @@ -201,9 +239,7 @@ class ReferenceCheckService $refer = $referMap[$referIndex]; $referText = $this->formatReferForLlm($refer); - $now = date('Y-m-d H:i:s'); - // [70-73] 展开为 reference_no=70,71,72,73 共 4 条记录 - $checkId = Db::name('article_reference_check_result')->insertGetId([ + $checkId = Db::name('article_reference_check_result')->insertGetId($this->newCheckRecordFields([ 'article_id' => $main['article_id'], 'p_article_id' => $pArticleId, 'am_id' => intval($main['am_id']), @@ -211,22 +247,27 @@ class ReferenceCheckService 'refer_index' => $refNo, 'origin_text' => $cite['original_text'], 'refer_text' => $referText, - 'p_refer_id' => $referMap[$referIndex]['p_refer_id'], + 'p_refer_id' => $referMap[$referIndex]['p_refer_id'], 'text_start' => $cite['text_start'], 'text_end' => $cite['text_end'], + 'status' => self::RECORD_PENDING, 'created_at' => $now, 'updated_at' => $now, - ]); - $this->pushJob(intval($checkId), $delay); - $checkIds[] = $checkId; - $delay += 1; + ])); + $pendingJobs[] = [ + 'check_id' => intval($checkId), + 'reference_no' => intval($refNo), + 'am_id' => intval($main['am_id']), + 'text_start' => intval($cite['text_start']), + ]; } } + $this->enqueueChecksSortedByReferenceNo($pendingJobs, $pArticleId, 'enqueue'); $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING); } /** - * 手工触发:对已完成且 confidence<=0.65 的记录入队 DOI 第二轮复核 + * 手工触发:对已完成且 confidence<=0.65 的记录同步执行 Crossref 二轮复核 */ public function enqueueSecondPassByArticle($articleId) { @@ -247,7 +288,7 @@ class ReferenceCheckService $delay2 = 0; foreach ($rows as $checkLog) { $rowId = $this->resolveCheckRowId($checkLog); - if ($this->maybeEnqueueSecondPass($rowId, floatval($checkLog['confidence']))) { + if ($this->runSecondPassIfNeeded($rowId, floatval($checkLog['confidence']))) { $checkIds2[] = $rowId; $delay2 += 1; } @@ -299,7 +340,7 @@ class ReferenceCheckService $referText = $this->formatReferForLlm($refer); // [70-73] 展开为 reference_no=70,71,72,73 共 4 条记录;先入队表,再按文献号正序校对 - $checkId = Db::name('article_reference_check_result')->insertGetId([ + $checkId = Db::name('article_reference_check_result')->insertGetId($this->newCheckRecordFields([ 'article_id' => $main['article_id'], 'p_article_id' => $pArticleId, 'am_id' => $amId, @@ -310,9 +351,10 @@ class ReferenceCheckService 'p_refer_id' => $referMap[$referIndex]['p_refer_id'], 'text_start' => $cite['text_start'], 'text_end' => $cite['text_end'], + 'status' => self::RECORD_PENDING, 'created_at' => $now, 'updated_at' => $now, - ]); + ])); $pendingJobs[] = [ 'check_id' => intval($checkId), @@ -325,8 +367,7 @@ class ReferenceCheckService } } } - - $checkIds = $this->pushJobsSortedByReferenceNo($pendingJobs); + $checkIds = $this->enqueueChecksSortedByReferenceNo($pendingJobs, $pArticleId, 'enqueue'); foreach (array_keys($amIdsWithJobs) as $amId) { $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING); } @@ -337,7 +378,7 @@ class ReferenceCheckService 'queued' => $queued, 'skipped' => $skipped, 'check_ids' => $checkIds, - 'queue' => self::QUEUE_NAME, + 'queue' => self::TRANSPORT_RABBITMQ, ]; } public function enqueueByArticle($articleId){ @@ -386,7 +427,7 @@ class ReferenceCheckService $referText = $this->formatReferForLlm($refer); // [70-73] 展开为 reference_no=70,71,72,73 共 4 条记录;先入队表,再按文献号正序校对 - $checkId = Db::name('article_reference_check_result')->insertGetId([ + $checkId = Db::name('article_reference_check_result')->insertGetId($this->newCheckRecordFields([ 'article_id' => $main['article_id'], 'p_article_id' => $pArticleId, 'am_id' => $amId, @@ -397,9 +438,10 @@ class ReferenceCheckService 'p_refer_id' => $referMap[$referIndex]['p_refer_id'], 'text_start' => $cite['text_start'], 'text_end' => $cite['text_end'], + 'status' => self::RECORD_PENDING, 'created_at' => $now, 'updated_at' => $now, - ]); + ])); $pendingJobs[] = [ 'check_id' => intval($checkId), @@ -413,7 +455,7 @@ class ReferenceCheckService } } - $checkIds = $this->pushJobsSortedByReferenceNo($pendingJobs); + $checkIds = $this->enqueueChecksSortedByReferenceNo($pendingJobs, $pArticleId, 'enqueue'); foreach (array_keys($amIdsWithJobs) as $amId) { $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING); } @@ -424,7 +466,7 @@ class ReferenceCheckService 'queued' => $queued, 'skipped' => $skipped, 'check_ids' => $checkIds, - 'queue' => self::QUEUE_NAME, + 'queue' => self::TRANSPORT_RABBITMQ, ]; } @@ -524,14 +566,6 @@ class ReferenceCheckService ->whereIn('state', [0, 2]) ->value('article_id')); - // 先清掉旧记录对应的队列 Redis 锁,避免在途 worker 写回数据 - $oldIds = Db::name('article_reference_check_result') - ->where('p_article_id', $pArticleId) - ->column('id'); - foreach ($oldIds as $oldId) { - $this->clearReferenceCheckQueueLock(intval($oldId)); - } - $deleted = Db::name('article_reference_check_result') ->where('p_article_id', $pArticleId) ->delete(); @@ -553,14 +587,6 @@ class ReferenceCheckService return 0; } - // 先清掉旧记录对应的队列 Redis 锁,否则同 check_id 在 TTL 内不会再次执行 - $oldIds = Db::name('article_reference_check_result') - ->where('article_id', $articleId) - ->column('id'); - foreach ($oldIds as $oldId) { - $this->clearReferenceCheckQueueLock(intval($oldId)); - } - $deleted = Db::name('article_reference_check_result')->where('article_id', $articleId)->delete(); if ($this->hasAmRefCheckStatusColumn()) { Db::name('article_main') @@ -1518,7 +1544,7 @@ class ReferenceCheckService * 编辑某条文献内容后,按 p_refer_id 异步重新校对该文献对应的全部 check 明细 * * 流程:刷新 refer_text/refer_index → 重置 status/is_match/confidence/reason - * → 设节级 ref_check_status=RUNNING → 投递到 ReferenceCheck 队列 + * → 设节级 ref_check_status=RUNNING → 投递 RabbitMQ 文章批次 * * 与 recheckByRefer 的差异:本方法**不**在请求内同步跑 LLM,仅入队,立即返回。 * 前端可调 getProgressByPArticleId 轮询进度。 @@ -1567,11 +1593,11 @@ class ReferenceCheckService 'reset' => 0, 'queued' => 0, 'check_ids' => [], - 'queue' => self::QUEUE_NAME, + 'queue' => self::TRANSPORT_RABBITMQ, ]; } - $resetFields = [ + $resetFields = $this->newCheckRecordFields([ 'refer_text' => $referText, 'refer_index' => $referenceNo, 'reference_no' => $referenceNo, @@ -1582,14 +1608,13 @@ class ReferenceCheckService 'reason' => '', 'error_msg' => '', 'updated_at' => $now, - ]; + ], self::QUEUE_PENDING, 0); $pendingJobs = []; $amIds = []; foreach ($rows as $row) { $checkId = $this->resolveCheckRowId($row); Db::name('article_reference_check_result')->where('id', $checkId)->update($resetFields); - $this->clearReferenceCheckQueueLock($checkId); $pendingJobs[] = [ 'check_id' => $checkId, 'reference_no' => $referenceNo, @@ -1606,7 +1631,7 @@ class ReferenceCheckService $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING); } - $checkIds = $this->pushJobsSortedByReferenceNo($pendingJobs); + $checkIds = $this->enqueueChecksSortedByReferenceNo($pendingJobs, $pArticleId, 'enqueue'); return [ 'p_refer_id' => $pReferId, @@ -1615,7 +1640,7 @@ class ReferenceCheckService 'reset' => count($rows), 'queued' => count($checkIds), 'check_ids' => $checkIds, - 'queue' => self::QUEUE_NAME, + 'queue' => self::TRANSPORT_RABBITMQ, ]; } @@ -1652,7 +1677,7 @@ class ReferenceCheckService 'reset' => 0, 'queued' => 0, 'check_ids' => [], - 'queue' => self::QUEUE_NAME, + 'queue' => self::TRANSPORT_RABBITMQ, ]; } @@ -1661,7 +1686,7 @@ class ReferenceCheckService } $now = date('Y-m-d H:i:s'); - $resetFields = [ + $resetFields = $this->newCheckRecordFields([ 'status' => self::RECORD_PENDING, 'is_match' => 0, 'can_support' => 0, @@ -1669,14 +1694,13 @@ class ReferenceCheckService 'reason' => '', 'error_msg' => '', 'updated_at' => $now, - ]; + ], self::QUEUE_PENDING, 0); $pendingJobs = []; $amIds = []; foreach ($rows as $row) { $checkId = $this->resolveCheckRowId($row); Db::name('article_reference_check_result')->where('id', $checkId)->update($resetFields); - $this->clearReferenceCheckQueueLock($checkId); $pendingJobs[] = [ 'check_id' => $checkId, 'reference_no' => intval($this->arrGet($row, 'reference_no', 0)), @@ -1693,7 +1717,7 @@ class ReferenceCheckService $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING); } - $checkIds = $this->pushJobsSortedByReferenceNo($pendingJobs); + $checkIds = $this->enqueueChecksSortedByReferenceNo($pendingJobs, $pArticleId, 'recheck_failed'); return [ 'p_refer_id' => $pReferId, @@ -1701,7 +1725,7 @@ class ReferenceCheckService 'reset' => count($rows), 'queued' => count($checkIds), 'check_ids' => $checkIds, - 'queue' => self::QUEUE_NAME, + 'queue' => self::TRANSPORT_RABBITMQ, ]; } @@ -1735,11 +1759,11 @@ class ReferenceCheckService 'reset' => 0, 'queued' => 0, 'check_ids' => [], - 'queue' => self::QUEUE_NAME, + 'queue' => self::TRANSPORT_RABBITMQ, ]; } - $resetFields = [ + $resetFields = $this->newCheckRecordFields([ 'refer_text' => $referText, 'p_refer_id' => $pReferId, 'p_article_id' => $pArticleId, @@ -1751,7 +1775,7 @@ class ReferenceCheckService 'reason' => '', 'error_msg' => '', 'updated_at' => $now, - ]; + ], self::QUEUE_PENDING, 0); $pendingJobs = []; $amIds = []; @@ -1790,7 +1814,6 @@ class ReferenceCheckService foreach ($pendingJobs as $job) { $checkId = intval($job['check_id']); $checkIds[] = $checkId; - $this->clearReferenceCheckQueueLock($checkId); try { $results[] = $this->runReferenceCheckOnce($checkId); } catch (\Exception $e) { @@ -1819,31 +1842,6 @@ class ReferenceCheckService ]; } - /** - * 清除队列 Redis 完成标记,避免重检任务被 acquireLock 静默丢弃 - */ - public function clearReferenceCheckQueueLock($checkId) - { - $checkId = intval($checkId); - if ($checkId <= 0) { - return; - } - try { - $keys = []; - foreach (['queue_job', 'queue_job_two'] as $prefix) { - $class = $prefix === 'queue_job_two' - ? 'app\\api\\job\\ReferenceCheckTwo' - : 'app\\api\\job\\ReferenceCheck'; - $base = $prefix . ':' . $class . ':' . $checkId; - $keys[] = $base; - $keys[] = $base . ':status'; - } - QueueRedis::getInstance()->deleteRedisKeys($keys); - } catch (\Exception $e) { - \think\Log::warning('clearReferenceCheckQueueLock id=' . $checkId . ' ' . $e->getMessage()); - } - } - /** * 执行一次引用 LLM 校对(同步,写回 article_reference_check_result) */ @@ -1884,8 +1882,7 @@ class ReferenceCheckService $confidence = floatval(isset($llmResult['confidence']) ? $llmResult['confidence'] : 0); $reason = isset($llmResult['reason']) ? $llmResult['reason'] : ''; - // LLM 通讯失败:写 status=RECORD_FAILED(3) + error_msg,抛异常让队列 worker 走 release(30) 重试; - // 重试 3 次后 ReferenceCheck::markFailed 会保持 status=3 收尾 + // LLM 通讯失败:写 status=RECORD_FAILED(3) + error_msg,抛异常由 MQ worker 重试 if ($requestFailed) { $this->updateCheckResult($checkId, [ 'confidence' => $confidence, @@ -1893,7 +1890,6 @@ class ReferenceCheckService 'status' => self::RECORD_FAILED, 'error_msg' => $reason, ]); - $this->clearReferenceCheckQueueLock($checkId); throw new \RuntimeException($reason !== '' ? $reason : 'LLM request failed'); } @@ -1906,8 +1902,9 @@ class ReferenceCheckService 'error_msg' => '', ]); - $this->clearReferenceCheckQueueLock($checkId); - $this->maybeEnqueueSecondPass($checkId, $confidence); + if ($confidence <= self::PASS_CONFIDENCE_THRESHOLD) { + $this->runSecondPassBlocking($checkId, $row, $contentA, $refer, $contentB); + } return [ 'check_id' => $checkId, @@ -1918,6 +1915,82 @@ class ReferenceCheckService ]; } + /** + * 低分结果的二轮 DOI 复核(同步阻塞执行;失败重试一次) + */ + public function runSecondPassBlocking($checkId, array $row, $contentA, $refer, $referText) + { + $checkId = intval($checkId); + if ($checkId <= 0) { + return false; + } + + $payload = $this->prepareRecheckPayload(is_array($refer) ? $refer : [], trim((string)$referText)); + if (empty($payload['has_abstract']) || trim((string)$payload['doi_block']) === '') { + return false; + } + + $lastError = ''; + for ($attempt = 0; $attempt < 2; $attempt++) { + try { + $llmResult = (new LLMService())->checkReference($contentA, trim((string)$referText), true, $payload['doi_block']); + $requestFailed = !empty($llmResult['request_failed']); + $canSupport = $this->parseLlmCanSupport($llmResult); + $confidence = floatval(isset($llmResult['confidence']) ? $llmResult['confidence'] : 0); + $tag = '[Crossref复核' . (trim((string)$payload['doi_used']) !== '' ? (' ' . trim((string)$payload['doi_used'])) : '') . ']'; + $reason = $tag . ' ' . (isset($llmResult['reason']) ? $llmResult['reason'] : ''); + + if ($requestFailed) { + $lastError = isset($llmResult['reason']) ? (string)$llmResult['reason'] : 'LLM request failed'; + if ($attempt < 1) { + continue; + } + $this->updateCheckResult($checkId, [ + 'confidence' => $confidence, + 'reason' => $reason, + 'status' => self::RECORD_FAILED, + 'error_msg' => $lastError, + ]); + $amId = intval(isset($row['am_id']) ? $row['am_id'] : 0); + if ($amId > 0) { + $this->syncAmRefCheckStatus($amId); + } + return false; + } + + $this->updateCheckResult($checkId, [ + 'can_support' => $canSupport ? 1 : 0, + 'is_match' => $canSupport ? 1 : 0, + 'confidence' => $confidence, + 'reason' => $reason, + 'status' => self::RECORD_COMPLETED, + 'error_msg' => '', + ]); + $amId = intval(isset($row['am_id']) ? $row['am_id'] : 0); + if ($amId > 0) { + $this->syncAmRefCheckStatus($amId); + } + return true; + } catch (\Exception $e) { + $lastError = $e->getMessage(); + if ($attempt < 1) { + continue; + } + $this->updateCheckResult($checkId, [ + 'status' => self::RECORD_FAILED, + 'error_msg' => $lastError, + ]); + $amId = intval(isset($row['am_id']) ? $row['am_id'] : 0); + if ($amId > 0) { + $this->syncAmRefCheckStatus($amId); + } + return false; + } + } + + return false; + } + /** * @return array{refer: array, p_article_id: int, p_refer_id: int, reference_no: int} */ @@ -2622,18 +2695,13 @@ class ReferenceCheckService } /** - * 第一轮 confidence<=0.65 且能抓到 DOI 真实内容时,延迟入队第二轮复核 - * - * 跳过条件(避免无意义重跑得到相同结果): - * - check_id 不合法 / 一次置信度高于阈值 - * - refer 行不存在 - * - refer_doi 为空或 Crossref 未返回摘要 + * 对已完成且低分的记录尝试同步 Crossref 二轮(供 enqueueSecondPassByArticle 等手工入口) */ - public function maybeEnqueueSecondPass($checkId, $confidence) + public function runSecondPassIfNeeded($checkId, $confidence) { $checkId = intval($checkId); $confidence = floatval($confidence); - if ($checkId <= 0 || $confidence > 0.65) { + if ($checkId <= 0 || $confidence > self::PASS_CONFIDENCE_THRESHOLD) { return false; } @@ -2658,9 +2726,13 @@ class ReferenceCheckService return false; } - $this->clearReferenceCheckQueueLock($checkId); - $this->pushJob2($checkId, 5); - return true; + $contentA = $this->resolveMainContentForJob($row); + $referText = trim((string)$this->arrGet($row, 'refer_text', '')); + if ($referText === '' && is_array($refer)) { + $referText = $this->formatReferForLlm($refer); + } + + return $this->runSecondPassBlocking($checkId, $row, $contentA, $refer, $referText); } /** @@ -3047,72 +3119,93 @@ class ReferenceCheckService } /** - * 已入库记录按文献编号正序入队(同号按 am_id、正文位置稳定排序) + * 批量记录已入库后创建文章批次并投递 RabbitMQ * - * @param array $rows 元素含 check_id、reference_no,可选 am_id、text_start + * @param array $rows 元素含 check_id + * @param int $pArticleId + * @param string $trigger enqueue|recheck_failed|manual + * @return int[] check_id 列表 */ - private function pushJobsSortedByReferenceNo(array $rows) + private function enqueueChecksSortedByReferenceNo(array $rows, $pArticleId = 0, $trigger = 'enqueue') { - if (empty($rows)) { + $checkIds = []; + foreach ($rows as $row) { + $checkId = intval($row['check_id']); + if ($checkId > 0) { + $checkIds[] = $checkId; + } + } + if (!empty($checkIds)) { + $this->startArticleCheckQueue($checkIds, intval($pArticleId), $trigger); + } + return $checkIds; + } + + /** + * 创建文章批次;队首批次立即发 MQ,其余批次等待前序完成 + * + * @param int[] $checkIds + * @param int $pArticleId + * @param string $trigger + * @return int[] + */ + public function startArticleCheckQueue(array $checkIds, $pArticleId = 0, $trigger = 'enqueue') + { + $checkIds = array_values(array_filter(array_map('intval', $checkIds))); + if (empty($checkIds)) { return []; } - usort($rows, function ($a, $b) { - if ($a['reference_no'] !== $b['reference_no']) { - return $a['reference_no'] - $b['reference_no']; - } - $amA = isset($a['am_id']) ? intval($a['am_id']) : 0; - $amB = isset($b['am_id']) ? intval($b['am_id']) : 0; - if ($amA !== $amB) { - return $amA - $amB; - } - $posA = isset($a['text_start']) ? intval($a['text_start']) : 0; - $posB = isset($b['text_start']) ? intval($b['text_start']) : 0; - return $posA - $posB; - }); + $pArticleId = intval($pArticleId); + if ($pArticleId <= 0) { + $firstRow = Db::name('article_reference_check_result')->where('id', $checkIds[0])->find(); + $pArticleId = empty($firstRow) ? 0 : intval($this->arrGet($firstRow, 'p_article_id', 0)); + } + if ($pArticleId <= 0) { + throw new \RuntimeException('p_article_id is required for reference check queue'); + } - $checkIds = []; - $delay = 0; - foreach ($rows as $row) { - $checkId = intval($row['check_id']); - $checkIds[] = $checkId; - $this->pushJob($checkId, $delay); - $delay++; + $now = date('Y-m-d H:i:s'); + $batchId = Db::name('article_reference_check_batch')->insertGetId([ + 'p_article_id' => $pArticleId, + 'batch_status' => 0, + 'total_count' => count($checkIds), + 'done_count' => 0, + 'failed_count' => 0, + 'trigger' => (string)$trigger, + 'created_at' => $now, + 'updated_at' => $now, + ]); + + $shouldPublish = !$this->hasEarlierWaitingBatch($batchId) && !$this->hasRunningReferenceCheckBatch(); + if ($shouldPublish) { + (new ReferenceCheckMqPublisher())->publishArticleStart($pArticleId, intval($batchId), $trigger); + $this->log('startArticleCheckQueue publish p_article_id=' . $pArticleId . ' batch_id=' . $batchId); + } else { + $this->log('startArticleCheckQueue queued batch_id=' . $batchId . ' p_article_id=' . $pArticleId); } return $checkIds; } - private function pushJob($checkId, $delaySeconds = 0) + private function hasRunningReferenceCheckBatch() { - $checkId = intval($checkId); - $this->clearReferenceCheckQueueLock($checkId); - $jobClass = 'app\api\job\ReferenceCheck@fire'; - $data = ['check_id' => $checkId]; - try { - if ($delaySeconds > 0) { - $jobId = Queue::later($delaySeconds, $jobClass, $data, self::QUEUE_NAME); - } else { - $jobId = Queue::push($jobClass, $data, self::QUEUE_NAME); - } - } catch (\Exception $e) { - \think\Log::error('ReferenceCheck pushJob failed check_id=' . $checkId . ' ' . $e->getMessage()); - throw $e; - } + return Db::name('article_reference_check_batch') + ->where('batch_status', 1) + ->count() > 0; } - private function pushJob2($checkId, $delaySeconds = 0) + + private function hasEarlierWaitingBatch($batchId) { - $jobClass = 'app\api\job\ReferenceCheckTwo@fire'; - $data = ['check_id' => $checkId]; - try { - if ($delaySeconds > 0) { - $jobId = Queue::later($delaySeconds, $jobClass, $data, self::QUEUE_NAME); - } else { - $jobId = Queue::push($jobClass, $data, self::QUEUE_NAME); - } - } catch (\Exception $e) { - \think\Log::error('ReferenceCheckTwo pushJob failed check_id=' . $checkId . ' ' . $e->getMessage()); - throw $e; - } + return Db::name('article_reference_check_batch') + ->where('batch_status', 0) + ->where('id', '<', intval($batchId)) + ->count() > 0; + } + + public function log($msg) + { + $line = date('Y-m-d H:i:s') . ' ' . $msg . PHP_EOL; + @file_put_contents($this->logFile, $line, FILE_APPEND); } } diff --git a/application/common/service/LLMService.php b/application/common/service/LLMService.php index 69f5e61c..fdd1fb42 100644 --- a/application/common/service/LLMService.php +++ b/application/common/service/LLMService.php @@ -33,7 +33,7 @@ class LLMService public function checkReference($contextText, $referText, $isAgain = false, $doiBlock = null) { // request_failed=true 表示"LLM 通讯/解析层面的失败"(可重试,区别于业务上的"未命中"); - // 上游 runReferenceCheckOnce 会据此把 DB.status 置为 2(失败) 并抛异常触发队列重试 + // 上游 runReferenceCheckOnce 会据此把 DB.status 置为 3(失败) 并抛异常触发 MQ worker 重试 $fallback = [ 'can_support' => false, 'is_match' => false,