文献校对功能 转rabbitMQ
This commit is contained in:
@@ -4,16 +4,17 @@ namespace app\common;
|
||||
|
||||
use think\Db;
|
||||
use think\Env;
|
||||
use think\Queue;
|
||||
use app\common\service\LLMService;
|
||||
use app\common\mq\ReferenceCheckMqPublisher;
|
||||
|
||||
/**
|
||||
* 正文 <blue>[n]</blue> 引用与 t_production_article_refer(index+1=n)相关性校对。
|
||||
* LLM 配置与 PromotionLlmService 相同;单条任务走 ReferenceCheck 队列。
|
||||
* LLM 配置与 PromotionLlmService 相同;异步任务走 RabbitMQ(一篇一条消息)。
|
||||
*/
|
||||
class ReferenceCheckService
|
||||
{
|
||||
const QUEUE_NAME = 'ReferenceCheck';
|
||||
/** API 返回:异步传输方式(RabbitMQ 文章批次) */
|
||||
const TRANSPORT_RABBITMQ = 'rabbitmq';
|
||||
|
||||
/** t_article_main.type */
|
||||
const MAIN_TYPE_TEXT = 0;
|
||||
@@ -29,6 +30,9 @@ class ReferenceCheckService
|
||||
/** @var bool|null t_article_main 是否已有 ref_check_status 列 */
|
||||
private static $amRefCheckStatusColumnExists = null;
|
||||
|
||||
/** 单条任务最多重试次数(不含首次执行) */
|
||||
const QUEUE_MAX_RETRY = 1;
|
||||
|
||||
/**
|
||||
* 引用校对状态(生命周期顺序:0→1→2→3 = 待→进行→完成→失败)
|
||||
*
|
||||
@@ -56,6 +60,12 @@ class ReferenceCheckService
|
||||
const RECORD_COMPLETED = 2; // 校对完成
|
||||
const RECORD_FAILED = 3; // 校对失败
|
||||
|
||||
/** 队列执行状态(queue_status) */
|
||||
const QUEUE_PENDING = 0; // 已入队待执行
|
||||
const QUEUE_RUNNING = 1; // worker 正在执行
|
||||
const QUEUE_COMPLETED = 2; // 执行完成
|
||||
const QUEUE_FAILED = 3; // 最终失败(重试耗尽)
|
||||
|
||||
/** LLM 评分(confidence)通过阈值:>= 该值视为"通过" */
|
||||
const PASS_CONFIDENCE_THRESHOLD = 0.65;
|
||||
|
||||
@@ -69,6 +79,12 @@ class ReferenceCheckService
|
||||
const BLUE_TAG_REGEX = '/<blue>\[([\d,,\-\x{2013}\x{2014}\x{2212}\x{2010}\x{2011}\s]+)\]<\/blue>/u';
|
||||
const BLUE_TAG_REGEX_BRACKET_OUTSIDE = '/\[<blue>([\d,,\-\x{2013}\x{2014}\x{2212}\x{2010}\x{2011}\s]+)<\/blue>\]/u';
|
||||
|
||||
private $logFile;
|
||||
|
||||
public function __construct()
|
||||
{
|
||||
$this->logFile = ROOT_PATH . 'runtime' . DS . 'plagiarism_task.log';
|
||||
}
|
||||
/**
|
||||
* 兼容无 ?? 的 PHP 版本
|
||||
*/
|
||||
@@ -77,6 +93,27 @@ class ReferenceCheckService
|
||||
return isset($arr[$key]) ? $arr[$key] : $default;
|
||||
}
|
||||
|
||||
/** 新建/重置校对明细时的队列初始字段 */
|
||||
private function newCheckRecordFields(array $fields, $queueStatus = self::QUEUE_PENDING, $retryCount = 0)
|
||||
{
|
||||
$fields['queue_status'] = intval($queueStatus);
|
||||
$fields['retry_count'] = max(0, intval($retryCount));
|
||||
return $fields;
|
||||
}
|
||||
|
||||
public function markQueueRuntime($checkId, $queueStatus, $retryCount = null)
|
||||
{
|
||||
$checkId = intval($checkId);
|
||||
if ($checkId <= 0) {
|
||||
return 0;
|
||||
}
|
||||
$fields = ['queue_status' => intval($queueStatus)];
|
||||
if ($retryCount !== null) {
|
||||
$fields['retry_count'] = max(0, intval($retryCount));
|
||||
}
|
||||
return Db::name('article_reference_check_result')->where('id', $checkId)->update($fields);
|
||||
}
|
||||
|
||||
/**
|
||||
* 合并匹配两种 blue 引用排版,按在正文中的起始位置排序。
|
||||
*
|
||||
@@ -128,7 +165,7 @@ class ReferenceCheckService
|
||||
}
|
||||
|
||||
$now = date('Y-m-d H:i:s');
|
||||
$checkId = Db::name('article_reference_check_result')->insertGetId([
|
||||
$checkId = Db::name('article_reference_check_result')->insertGetId($this->newCheckRecordFields([
|
||||
'article_id' => intval($this->arrGet($extra, 'article_id', 0)),
|
||||
'am_id' => intval($this->arrGet($extra, 'am_id', 0)),
|
||||
'p_article_id' => intval($this->arrGet($extra, 'p_article_id', 0)),
|
||||
@@ -145,14 +182,14 @@ class ReferenceCheckService
|
||||
'status' => 0,
|
||||
'created_at' => $now,
|
||||
'updated_at' => $now,
|
||||
]);
|
||||
]));
|
||||
|
||||
$amId = intval($this->arrGet($extra, 'am_id', 0));
|
||||
if ($amId > 0) {
|
||||
$this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING);
|
||||
}
|
||||
|
||||
$this->pushJob(intval($checkId), intval($this->arrGet($extra, 'queue_delay', 0)));
|
||||
$this->startArticleCheckQueue([intval($checkId)], intval($this->arrGet($extra, 'p_article_id', 0)), 'enqueue');
|
||||
|
||||
return ['check_id' => $checkId, 'queued' => 1];
|
||||
}
|
||||
@@ -190,7 +227,8 @@ class ReferenceCheckService
|
||||
}
|
||||
|
||||
$skipped = 0;
|
||||
$delay = 0;
|
||||
$pendingJobs = [];
|
||||
$now = date('Y-m-d H:i:s');
|
||||
foreach ($citations as $cite) {
|
||||
foreach ($cite['reference_numbers'] as $refNo) {
|
||||
$referIndex = $refNo - 1;
|
||||
@@ -201,9 +239,7 @@ class ReferenceCheckService
|
||||
$refer = $referMap[$referIndex];
|
||||
$referText = $this->formatReferForLlm($refer);
|
||||
|
||||
$now = date('Y-m-d H:i:s');
|
||||
// [70-73] 展开为 reference_no=70,71,72,73 共 4 条记录
|
||||
$checkId = Db::name('article_reference_check_result')->insertGetId([
|
||||
$checkId = Db::name('article_reference_check_result')->insertGetId($this->newCheckRecordFields([
|
||||
'article_id' => $main['article_id'],
|
||||
'p_article_id' => $pArticleId,
|
||||
'am_id' => intval($main['am_id']),
|
||||
@@ -211,22 +247,27 @@ class ReferenceCheckService
|
||||
'refer_index' => $refNo,
|
||||
'origin_text' => $cite['original_text'],
|
||||
'refer_text' => $referText,
|
||||
'p_refer_id' => $referMap[$referIndex]['p_refer_id'],
|
||||
'p_refer_id' => $referMap[$referIndex]['p_refer_id'],
|
||||
'text_start' => $cite['text_start'],
|
||||
'text_end' => $cite['text_end'],
|
||||
'status' => self::RECORD_PENDING,
|
||||
'created_at' => $now,
|
||||
'updated_at' => $now,
|
||||
]);
|
||||
$this->pushJob(intval($checkId), $delay);
|
||||
$checkIds[] = $checkId;
|
||||
$delay += 1;
|
||||
]));
|
||||
$pendingJobs[] = [
|
||||
'check_id' => intval($checkId),
|
||||
'reference_no' => intval($refNo),
|
||||
'am_id' => intval($main['am_id']),
|
||||
'text_start' => intval($cite['text_start']),
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
$this->enqueueChecksSortedByReferenceNo($pendingJobs, $pArticleId, 'enqueue');
|
||||
$this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING);
|
||||
}
|
||||
/**
|
||||
* 手工触发:对已完成且 confidence<=0.65 的记录入队 DOI 第二轮复核
|
||||
* 手工触发:对已完成且 confidence<=0.65 的记录同步执行 Crossref 二轮复核
|
||||
*/
|
||||
public function enqueueSecondPassByArticle($articleId)
|
||||
{
|
||||
@@ -247,7 +288,7 @@ class ReferenceCheckService
|
||||
$delay2 = 0;
|
||||
foreach ($rows as $checkLog) {
|
||||
$rowId = $this->resolveCheckRowId($checkLog);
|
||||
if ($this->maybeEnqueueSecondPass($rowId, floatval($checkLog['confidence']))) {
|
||||
if ($this->runSecondPassIfNeeded($rowId, floatval($checkLog['confidence']))) {
|
||||
$checkIds2[] = $rowId;
|
||||
$delay2 += 1;
|
||||
}
|
||||
@@ -299,7 +340,7 @@ class ReferenceCheckService
|
||||
$referText = $this->formatReferForLlm($refer);
|
||||
|
||||
// [70-73] 展开为 reference_no=70,71,72,73 共 4 条记录;先入队表,再按文献号正序校对
|
||||
$checkId = Db::name('article_reference_check_result')->insertGetId([
|
||||
$checkId = Db::name('article_reference_check_result')->insertGetId($this->newCheckRecordFields([
|
||||
'article_id' => $main['article_id'],
|
||||
'p_article_id' => $pArticleId,
|
||||
'am_id' => $amId,
|
||||
@@ -310,9 +351,10 @@ class ReferenceCheckService
|
||||
'p_refer_id' => $referMap[$referIndex]['p_refer_id'],
|
||||
'text_start' => $cite['text_start'],
|
||||
'text_end' => $cite['text_end'],
|
||||
'status' => self::RECORD_PENDING,
|
||||
'created_at' => $now,
|
||||
'updated_at' => $now,
|
||||
]);
|
||||
]));
|
||||
|
||||
$pendingJobs[] = [
|
||||
'check_id' => intval($checkId),
|
||||
@@ -325,8 +367,7 @@ class ReferenceCheckService
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$checkIds = $this->pushJobsSortedByReferenceNo($pendingJobs);
|
||||
$checkIds = $this->enqueueChecksSortedByReferenceNo($pendingJobs, $pArticleId, 'enqueue');
|
||||
foreach (array_keys($amIdsWithJobs) as $amId) {
|
||||
$this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING);
|
||||
}
|
||||
@@ -337,7 +378,7 @@ class ReferenceCheckService
|
||||
'queued' => $queued,
|
||||
'skipped' => $skipped,
|
||||
'check_ids' => $checkIds,
|
||||
'queue' => self::QUEUE_NAME,
|
||||
'queue' => self::TRANSPORT_RABBITMQ,
|
||||
];
|
||||
}
|
||||
public function enqueueByArticle($articleId){
|
||||
@@ -386,7 +427,7 @@ class ReferenceCheckService
|
||||
$referText = $this->formatReferForLlm($refer);
|
||||
|
||||
// [70-73] 展开为 reference_no=70,71,72,73 共 4 条记录;先入队表,再按文献号正序校对
|
||||
$checkId = Db::name('article_reference_check_result')->insertGetId([
|
||||
$checkId = Db::name('article_reference_check_result')->insertGetId($this->newCheckRecordFields([
|
||||
'article_id' => $main['article_id'],
|
||||
'p_article_id' => $pArticleId,
|
||||
'am_id' => $amId,
|
||||
@@ -397,9 +438,10 @@ class ReferenceCheckService
|
||||
'p_refer_id' => $referMap[$referIndex]['p_refer_id'],
|
||||
'text_start' => $cite['text_start'],
|
||||
'text_end' => $cite['text_end'],
|
||||
'status' => self::RECORD_PENDING,
|
||||
'created_at' => $now,
|
||||
'updated_at' => $now,
|
||||
]);
|
||||
]));
|
||||
|
||||
$pendingJobs[] = [
|
||||
'check_id' => intval($checkId),
|
||||
@@ -413,7 +455,7 @@ class ReferenceCheckService
|
||||
}
|
||||
}
|
||||
|
||||
$checkIds = $this->pushJobsSortedByReferenceNo($pendingJobs);
|
||||
$checkIds = $this->enqueueChecksSortedByReferenceNo($pendingJobs, $pArticleId, 'enqueue');
|
||||
foreach (array_keys($amIdsWithJobs) as $amId) {
|
||||
$this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING);
|
||||
}
|
||||
@@ -424,7 +466,7 @@ class ReferenceCheckService
|
||||
'queued' => $queued,
|
||||
'skipped' => $skipped,
|
||||
'check_ids' => $checkIds,
|
||||
'queue' => self::QUEUE_NAME,
|
||||
'queue' => self::TRANSPORT_RABBITMQ,
|
||||
];
|
||||
}
|
||||
|
||||
@@ -524,14 +566,6 @@ class ReferenceCheckService
|
||||
->whereIn('state', [0, 2])
|
||||
->value('article_id'));
|
||||
|
||||
// 先清掉旧记录对应的队列 Redis 锁,避免在途 worker 写回数据
|
||||
$oldIds = Db::name('article_reference_check_result')
|
||||
->where('p_article_id', $pArticleId)
|
||||
->column('id');
|
||||
foreach ($oldIds as $oldId) {
|
||||
$this->clearReferenceCheckQueueLock(intval($oldId));
|
||||
}
|
||||
|
||||
$deleted = Db::name('article_reference_check_result')
|
||||
->where('p_article_id', $pArticleId)
|
||||
->delete();
|
||||
@@ -553,14 +587,6 @@ class ReferenceCheckService
|
||||
return 0;
|
||||
}
|
||||
|
||||
// 先清掉旧记录对应的队列 Redis 锁,否则同 check_id 在 TTL 内不会再次执行
|
||||
$oldIds = Db::name('article_reference_check_result')
|
||||
->where('article_id', $articleId)
|
||||
->column('id');
|
||||
foreach ($oldIds as $oldId) {
|
||||
$this->clearReferenceCheckQueueLock(intval($oldId));
|
||||
}
|
||||
|
||||
$deleted = Db::name('article_reference_check_result')->where('article_id', $articleId)->delete();
|
||||
if ($this->hasAmRefCheckStatusColumn()) {
|
||||
Db::name('article_main')
|
||||
@@ -1518,7 +1544,7 @@ class ReferenceCheckService
|
||||
* 编辑某条文献内容后,按 p_refer_id 异步重新校对该文献对应的全部 check 明细
|
||||
*
|
||||
* 流程:刷新 refer_text/refer_index → 重置 status/is_match/confidence/reason
|
||||
* → 设节级 ref_check_status=RUNNING → 投递到 ReferenceCheck 队列
|
||||
* → 设节级 ref_check_status=RUNNING → 投递 RabbitMQ 文章批次
|
||||
*
|
||||
* 与 recheckByRefer 的差异:本方法**不**在请求内同步跑 LLM,仅入队,立即返回。
|
||||
* 前端可调 getProgressByPArticleId 轮询进度。
|
||||
@@ -1567,11 +1593,11 @@ class ReferenceCheckService
|
||||
'reset' => 0,
|
||||
'queued' => 0,
|
||||
'check_ids' => [],
|
||||
'queue' => self::QUEUE_NAME,
|
||||
'queue' => self::TRANSPORT_RABBITMQ,
|
||||
];
|
||||
}
|
||||
|
||||
$resetFields = [
|
||||
$resetFields = $this->newCheckRecordFields([
|
||||
'refer_text' => $referText,
|
||||
'refer_index' => $referenceNo,
|
||||
'reference_no' => $referenceNo,
|
||||
@@ -1582,14 +1608,13 @@ class ReferenceCheckService
|
||||
'reason' => '',
|
||||
'error_msg' => '',
|
||||
'updated_at' => $now,
|
||||
];
|
||||
], self::QUEUE_PENDING, 0);
|
||||
|
||||
$pendingJobs = [];
|
||||
$amIds = [];
|
||||
foreach ($rows as $row) {
|
||||
$checkId = $this->resolveCheckRowId($row);
|
||||
Db::name('article_reference_check_result')->where('id', $checkId)->update($resetFields);
|
||||
$this->clearReferenceCheckQueueLock($checkId);
|
||||
$pendingJobs[] = [
|
||||
'check_id' => $checkId,
|
||||
'reference_no' => $referenceNo,
|
||||
@@ -1606,7 +1631,7 @@ class ReferenceCheckService
|
||||
$this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING);
|
||||
}
|
||||
|
||||
$checkIds = $this->pushJobsSortedByReferenceNo($pendingJobs);
|
||||
$checkIds = $this->enqueueChecksSortedByReferenceNo($pendingJobs, $pArticleId, 'enqueue');
|
||||
|
||||
return [
|
||||
'p_refer_id' => $pReferId,
|
||||
@@ -1615,7 +1640,7 @@ class ReferenceCheckService
|
||||
'reset' => count($rows),
|
||||
'queued' => count($checkIds),
|
||||
'check_ids' => $checkIds,
|
||||
'queue' => self::QUEUE_NAME,
|
||||
'queue' => self::TRANSPORT_RABBITMQ,
|
||||
];
|
||||
}
|
||||
|
||||
@@ -1652,7 +1677,7 @@ class ReferenceCheckService
|
||||
'reset' => 0,
|
||||
'queued' => 0,
|
||||
'check_ids' => [],
|
||||
'queue' => self::QUEUE_NAME,
|
||||
'queue' => self::TRANSPORT_RABBITMQ,
|
||||
];
|
||||
}
|
||||
|
||||
@@ -1661,7 +1686,7 @@ class ReferenceCheckService
|
||||
}
|
||||
|
||||
$now = date('Y-m-d H:i:s');
|
||||
$resetFields = [
|
||||
$resetFields = $this->newCheckRecordFields([
|
||||
'status' => self::RECORD_PENDING,
|
||||
'is_match' => 0,
|
||||
'can_support' => 0,
|
||||
@@ -1669,14 +1694,13 @@ class ReferenceCheckService
|
||||
'reason' => '',
|
||||
'error_msg' => '',
|
||||
'updated_at' => $now,
|
||||
];
|
||||
], self::QUEUE_PENDING, 0);
|
||||
|
||||
$pendingJobs = [];
|
||||
$amIds = [];
|
||||
foreach ($rows as $row) {
|
||||
$checkId = $this->resolveCheckRowId($row);
|
||||
Db::name('article_reference_check_result')->where('id', $checkId)->update($resetFields);
|
||||
$this->clearReferenceCheckQueueLock($checkId);
|
||||
$pendingJobs[] = [
|
||||
'check_id' => $checkId,
|
||||
'reference_no' => intval($this->arrGet($row, 'reference_no', 0)),
|
||||
@@ -1693,7 +1717,7 @@ class ReferenceCheckService
|
||||
$this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING);
|
||||
}
|
||||
|
||||
$checkIds = $this->pushJobsSortedByReferenceNo($pendingJobs);
|
||||
$checkIds = $this->enqueueChecksSortedByReferenceNo($pendingJobs, $pArticleId, 'recheck_failed');
|
||||
|
||||
return [
|
||||
'p_refer_id' => $pReferId,
|
||||
@@ -1701,7 +1725,7 @@ class ReferenceCheckService
|
||||
'reset' => count($rows),
|
||||
'queued' => count($checkIds),
|
||||
'check_ids' => $checkIds,
|
||||
'queue' => self::QUEUE_NAME,
|
||||
'queue' => self::TRANSPORT_RABBITMQ,
|
||||
];
|
||||
}
|
||||
|
||||
@@ -1735,11 +1759,11 @@ class ReferenceCheckService
|
||||
'reset' => 0,
|
||||
'queued' => 0,
|
||||
'check_ids' => [],
|
||||
'queue' => self::QUEUE_NAME,
|
||||
'queue' => self::TRANSPORT_RABBITMQ,
|
||||
];
|
||||
}
|
||||
|
||||
$resetFields = [
|
||||
$resetFields = $this->newCheckRecordFields([
|
||||
'refer_text' => $referText,
|
||||
'p_refer_id' => $pReferId,
|
||||
'p_article_id' => $pArticleId,
|
||||
@@ -1751,7 +1775,7 @@ class ReferenceCheckService
|
||||
'reason' => '',
|
||||
'error_msg' => '',
|
||||
'updated_at' => $now,
|
||||
];
|
||||
], self::QUEUE_PENDING, 0);
|
||||
|
||||
$pendingJobs = [];
|
||||
$amIds = [];
|
||||
@@ -1790,7 +1814,6 @@ class ReferenceCheckService
|
||||
foreach ($pendingJobs as $job) {
|
||||
$checkId = intval($job['check_id']);
|
||||
$checkIds[] = $checkId;
|
||||
$this->clearReferenceCheckQueueLock($checkId);
|
||||
try {
|
||||
$results[] = $this->runReferenceCheckOnce($checkId);
|
||||
} catch (\Exception $e) {
|
||||
@@ -1819,31 +1842,6 @@ class ReferenceCheckService
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* 清除队列 Redis 完成标记,避免重检任务被 acquireLock 静默丢弃
|
||||
*/
|
||||
public function clearReferenceCheckQueueLock($checkId)
|
||||
{
|
||||
$checkId = intval($checkId);
|
||||
if ($checkId <= 0) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
$keys = [];
|
||||
foreach (['queue_job', 'queue_job_two'] as $prefix) {
|
||||
$class = $prefix === 'queue_job_two'
|
||||
? 'app\\api\\job\\ReferenceCheckTwo'
|
||||
: 'app\\api\\job\\ReferenceCheck';
|
||||
$base = $prefix . ':' . $class . ':' . $checkId;
|
||||
$keys[] = $base;
|
||||
$keys[] = $base . ':status';
|
||||
}
|
||||
QueueRedis::getInstance()->deleteRedisKeys($keys);
|
||||
} catch (\Exception $e) {
|
||||
\think\Log::warning('clearReferenceCheckQueueLock id=' . $checkId . ' ' . $e->getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 执行一次引用 LLM 校对(同步,写回 article_reference_check_result)
|
||||
*/
|
||||
@@ -1884,8 +1882,7 @@ class ReferenceCheckService
|
||||
$confidence = floatval(isset($llmResult['confidence']) ? $llmResult['confidence'] : 0);
|
||||
$reason = isset($llmResult['reason']) ? $llmResult['reason'] : '';
|
||||
|
||||
// LLM 通讯失败:写 status=RECORD_FAILED(3) + error_msg,抛异常让队列 worker 走 release(30) 重试;
|
||||
// 重试 3 次后 ReferenceCheck::markFailed 会保持 status=3 收尾
|
||||
// LLM 通讯失败:写 status=RECORD_FAILED(3) + error_msg,抛异常由 MQ worker 重试
|
||||
if ($requestFailed) {
|
||||
$this->updateCheckResult($checkId, [
|
||||
'confidence' => $confidence,
|
||||
@@ -1893,7 +1890,6 @@ class ReferenceCheckService
|
||||
'status' => self::RECORD_FAILED,
|
||||
'error_msg' => $reason,
|
||||
]);
|
||||
$this->clearReferenceCheckQueueLock($checkId);
|
||||
throw new \RuntimeException($reason !== '' ? $reason : 'LLM request failed');
|
||||
}
|
||||
|
||||
@@ -1906,8 +1902,9 @@ class ReferenceCheckService
|
||||
'error_msg' => '',
|
||||
]);
|
||||
|
||||
$this->clearReferenceCheckQueueLock($checkId);
|
||||
$this->maybeEnqueueSecondPass($checkId, $confidence);
|
||||
if ($confidence <= self::PASS_CONFIDENCE_THRESHOLD) {
|
||||
$this->runSecondPassBlocking($checkId, $row, $contentA, $refer, $contentB);
|
||||
}
|
||||
|
||||
return [
|
||||
'check_id' => $checkId,
|
||||
@@ -1918,6 +1915,82 @@ class ReferenceCheckService
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* 低分结果的二轮 DOI 复核(同步阻塞执行;失败重试一次)
|
||||
*/
|
||||
public function runSecondPassBlocking($checkId, array $row, $contentA, $refer, $referText)
|
||||
{
|
||||
$checkId = intval($checkId);
|
||||
if ($checkId <= 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$payload = $this->prepareRecheckPayload(is_array($refer) ? $refer : [], trim((string)$referText));
|
||||
if (empty($payload['has_abstract']) || trim((string)$payload['doi_block']) === '') {
|
||||
return false;
|
||||
}
|
||||
|
||||
$lastError = '';
|
||||
for ($attempt = 0; $attempt < 2; $attempt++) {
|
||||
try {
|
||||
$llmResult = (new LLMService())->checkReference($contentA, trim((string)$referText), true, $payload['doi_block']);
|
||||
$requestFailed = !empty($llmResult['request_failed']);
|
||||
$canSupport = $this->parseLlmCanSupport($llmResult);
|
||||
$confidence = floatval(isset($llmResult['confidence']) ? $llmResult['confidence'] : 0);
|
||||
$tag = '[Crossref复核' . (trim((string)$payload['doi_used']) !== '' ? (' ' . trim((string)$payload['doi_used'])) : '') . ']';
|
||||
$reason = $tag . ' ' . (isset($llmResult['reason']) ? $llmResult['reason'] : '');
|
||||
|
||||
if ($requestFailed) {
|
||||
$lastError = isset($llmResult['reason']) ? (string)$llmResult['reason'] : 'LLM request failed';
|
||||
if ($attempt < 1) {
|
||||
continue;
|
||||
}
|
||||
$this->updateCheckResult($checkId, [
|
||||
'confidence' => $confidence,
|
||||
'reason' => $reason,
|
||||
'status' => self::RECORD_FAILED,
|
||||
'error_msg' => $lastError,
|
||||
]);
|
||||
$amId = intval(isset($row['am_id']) ? $row['am_id'] : 0);
|
||||
if ($amId > 0) {
|
||||
$this->syncAmRefCheckStatus($amId);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
$this->updateCheckResult($checkId, [
|
||||
'can_support' => $canSupport ? 1 : 0,
|
||||
'is_match' => $canSupport ? 1 : 0,
|
||||
'confidence' => $confidence,
|
||||
'reason' => $reason,
|
||||
'status' => self::RECORD_COMPLETED,
|
||||
'error_msg' => '',
|
||||
]);
|
||||
$amId = intval(isset($row['am_id']) ? $row['am_id'] : 0);
|
||||
if ($amId > 0) {
|
||||
$this->syncAmRefCheckStatus($amId);
|
||||
}
|
||||
return true;
|
||||
} catch (\Exception $e) {
|
||||
$lastError = $e->getMessage();
|
||||
if ($attempt < 1) {
|
||||
continue;
|
||||
}
|
||||
$this->updateCheckResult($checkId, [
|
||||
'status' => self::RECORD_FAILED,
|
||||
'error_msg' => $lastError,
|
||||
]);
|
||||
$amId = intval(isset($row['am_id']) ? $row['am_id'] : 0);
|
||||
if ($amId > 0) {
|
||||
$this->syncAmRefCheckStatus($amId);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array{refer: array, p_article_id: int, p_refer_id: int, reference_no: int}
|
||||
*/
|
||||
@@ -2622,18 +2695,13 @@ class ReferenceCheckService
|
||||
}
|
||||
|
||||
/**
|
||||
* 第一轮 confidence<=0.65 且能抓到 DOI 真实内容时,延迟入队第二轮复核
|
||||
*
|
||||
* 跳过条件(避免无意义重跑得到相同结果):
|
||||
* - check_id 不合法 / 一次置信度高于阈值
|
||||
* - refer 行不存在
|
||||
* - refer_doi 为空或 Crossref 未返回摘要
|
||||
* 对已完成且低分的记录尝试同步 Crossref 二轮(供 enqueueSecondPassByArticle 等手工入口)
|
||||
*/
|
||||
public function maybeEnqueueSecondPass($checkId, $confidence)
|
||||
public function runSecondPassIfNeeded($checkId, $confidence)
|
||||
{
|
||||
$checkId = intval($checkId);
|
||||
$confidence = floatval($confidence);
|
||||
if ($checkId <= 0 || $confidence > 0.65) {
|
||||
if ($checkId <= 0 || $confidence > self::PASS_CONFIDENCE_THRESHOLD) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -2658,9 +2726,13 @@ class ReferenceCheckService
|
||||
return false;
|
||||
}
|
||||
|
||||
$this->clearReferenceCheckQueueLock($checkId);
|
||||
$this->pushJob2($checkId, 5);
|
||||
return true;
|
||||
$contentA = $this->resolveMainContentForJob($row);
|
||||
$referText = trim((string)$this->arrGet($row, 'refer_text', ''));
|
||||
if ($referText === '' && is_array($refer)) {
|
||||
$referText = $this->formatReferForLlm($refer);
|
||||
}
|
||||
|
||||
return $this->runSecondPassBlocking($checkId, $row, $contentA, $refer, $referText);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -3047,72 +3119,93 @@ class ReferenceCheckService
|
||||
}
|
||||
|
||||
/**
|
||||
* 已入库记录按文献编号正序入队(同号按 am_id、正文位置稳定排序)
|
||||
* 批量记录已入库后创建文章批次并投递 RabbitMQ
|
||||
*
|
||||
* @param array $rows 元素含 check_id、reference_no,可选 am_id、text_start
|
||||
* @param array $rows 元素含 check_id
|
||||
* @param int $pArticleId
|
||||
* @param string $trigger enqueue|recheck_failed|manual
|
||||
* @return int[] check_id 列表
|
||||
*/
|
||||
private function pushJobsSortedByReferenceNo(array $rows)
|
||||
private function enqueueChecksSortedByReferenceNo(array $rows, $pArticleId = 0, $trigger = 'enqueue')
|
||||
{
|
||||
if (empty($rows)) {
|
||||
$checkIds = [];
|
||||
foreach ($rows as $row) {
|
||||
$checkId = intval($row['check_id']);
|
||||
if ($checkId > 0) {
|
||||
$checkIds[] = $checkId;
|
||||
}
|
||||
}
|
||||
if (!empty($checkIds)) {
|
||||
$this->startArticleCheckQueue($checkIds, intval($pArticleId), $trigger);
|
||||
}
|
||||
return $checkIds;
|
||||
}
|
||||
|
||||
/**
|
||||
* 创建文章批次;队首批次立即发 MQ,其余批次等待前序完成
|
||||
*
|
||||
* @param int[] $checkIds
|
||||
* @param int $pArticleId
|
||||
* @param string $trigger
|
||||
* @return int[]
|
||||
*/
|
||||
public function startArticleCheckQueue(array $checkIds, $pArticleId = 0, $trigger = 'enqueue')
|
||||
{
|
||||
$checkIds = array_values(array_filter(array_map('intval', $checkIds)));
|
||||
if (empty($checkIds)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
usort($rows, function ($a, $b) {
|
||||
if ($a['reference_no'] !== $b['reference_no']) {
|
||||
return $a['reference_no'] - $b['reference_no'];
|
||||
}
|
||||
$amA = isset($a['am_id']) ? intval($a['am_id']) : 0;
|
||||
$amB = isset($b['am_id']) ? intval($b['am_id']) : 0;
|
||||
if ($amA !== $amB) {
|
||||
return $amA - $amB;
|
||||
}
|
||||
$posA = isset($a['text_start']) ? intval($a['text_start']) : 0;
|
||||
$posB = isset($b['text_start']) ? intval($b['text_start']) : 0;
|
||||
return $posA - $posB;
|
||||
});
|
||||
$pArticleId = intval($pArticleId);
|
||||
if ($pArticleId <= 0) {
|
||||
$firstRow = Db::name('article_reference_check_result')->where('id', $checkIds[0])->find();
|
||||
$pArticleId = empty($firstRow) ? 0 : intval($this->arrGet($firstRow, 'p_article_id', 0));
|
||||
}
|
||||
if ($pArticleId <= 0) {
|
||||
throw new \RuntimeException('p_article_id is required for reference check queue');
|
||||
}
|
||||
|
||||
$checkIds = [];
|
||||
$delay = 0;
|
||||
foreach ($rows as $row) {
|
||||
$checkId = intval($row['check_id']);
|
||||
$checkIds[] = $checkId;
|
||||
$this->pushJob($checkId, $delay);
|
||||
$delay++;
|
||||
$now = date('Y-m-d H:i:s');
|
||||
$batchId = Db::name('article_reference_check_batch')->insertGetId([
|
||||
'p_article_id' => $pArticleId,
|
||||
'batch_status' => 0,
|
||||
'total_count' => count($checkIds),
|
||||
'done_count' => 0,
|
||||
'failed_count' => 0,
|
||||
'trigger' => (string)$trigger,
|
||||
'created_at' => $now,
|
||||
'updated_at' => $now,
|
||||
]);
|
||||
|
||||
$shouldPublish = !$this->hasEarlierWaitingBatch($batchId) && !$this->hasRunningReferenceCheckBatch();
|
||||
if ($shouldPublish) {
|
||||
(new ReferenceCheckMqPublisher())->publishArticleStart($pArticleId, intval($batchId), $trigger);
|
||||
$this->log('startArticleCheckQueue publish p_article_id=' . $pArticleId . ' batch_id=' . $batchId);
|
||||
} else {
|
||||
$this->log('startArticleCheckQueue queued batch_id=' . $batchId . ' p_article_id=' . $pArticleId);
|
||||
}
|
||||
|
||||
return $checkIds;
|
||||
}
|
||||
|
||||
private function pushJob($checkId, $delaySeconds = 0)
|
||||
private function hasRunningReferenceCheckBatch()
|
||||
{
|
||||
$checkId = intval($checkId);
|
||||
$this->clearReferenceCheckQueueLock($checkId);
|
||||
$jobClass = 'app\api\job\ReferenceCheck@fire';
|
||||
$data = ['check_id' => $checkId];
|
||||
try {
|
||||
if ($delaySeconds > 0) {
|
||||
$jobId = Queue::later($delaySeconds, $jobClass, $data, self::QUEUE_NAME);
|
||||
} else {
|
||||
$jobId = Queue::push($jobClass, $data, self::QUEUE_NAME);
|
||||
}
|
||||
} catch (\Exception $e) {
|
||||
\think\Log::error('ReferenceCheck pushJob failed check_id=' . $checkId . ' ' . $e->getMessage());
|
||||
throw $e;
|
||||
}
|
||||
return Db::name('article_reference_check_batch')
|
||||
->where('batch_status', 1)
|
||||
->count() > 0;
|
||||
}
|
||||
private function pushJob2($checkId, $delaySeconds = 0)
|
||||
|
||||
private function hasEarlierWaitingBatch($batchId)
|
||||
{
|
||||
$jobClass = 'app\api\job\ReferenceCheckTwo@fire';
|
||||
$data = ['check_id' => $checkId];
|
||||
try {
|
||||
if ($delaySeconds > 0) {
|
||||
$jobId = Queue::later($delaySeconds, $jobClass, $data, self::QUEUE_NAME);
|
||||
} else {
|
||||
$jobId = Queue::push($jobClass, $data, self::QUEUE_NAME);
|
||||
}
|
||||
} catch (\Exception $e) {
|
||||
\think\Log::error('ReferenceCheckTwo pushJob failed check_id=' . $checkId . ' ' . $e->getMessage());
|
||||
throw $e;
|
||||
}
|
||||
return Db::name('article_reference_check_batch')
|
||||
->where('batch_status', 0)
|
||||
->where('id', '<', intval($batchId))
|
||||
->count() > 0;
|
||||
}
|
||||
|
||||
public function log($msg)
|
||||
{
|
||||
$line = date('Y-m-d H:i:s') . ' ' . $msg . PHP_EOL;
|
||||
@file_put_contents($this->logFile, $line, FILE_APPEND);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -33,7 +33,7 @@ class LLMService
|
||||
public function checkReference($contextText, $referText, $isAgain = false, $doiBlock = null)
|
||||
{
|
||||
// request_failed=true 表示"LLM 通讯/解析层面的失败"(可重试,区别于业务上的"未命中");
|
||||
// 上游 runReferenceCheckOnce 会据此把 DB.status 置为 2(失败) 并抛异常触发队列重试
|
||||
// 上游 runReferenceCheckOnce 会据此把 DB.status 置为 3(失败) 并抛异常触发 MQ worker 重试
|
||||
$fallback = [
|
||||
'can_support' => false,
|
||||
'is_match' => false,
|
||||
|
||||
Reference in New Issue
Block a user