Merge remote-tracking branch 'origin/master'
This commit is contained in:
303
application/api/controller/Author.php
Normal file
303
application/api/controller/Author.php
Normal file
@@ -0,0 +1,303 @@
|
||||
<?php
|
||||
/**
|
||||
* Created by PhpStorm.
|
||||
* User: Administrator
|
||||
* Date: 2026/6/2
|
||||
* Time: 15:04
|
||||
*/
|
||||
|
||||
namespace app\api\controller;
|
||||
|
||||
|
||||
class Author
|
||||
{
|
||||
public function get_hindex()
|
||||
{
|
||||
$name = trim(input('get.name'));
|
||||
$affil = trim(input('get.affil'));
|
||||
$debug = (int) input('get.debug', 0);
|
||||
$cookieFile = tempnam(sys_get_temp_dir(), 'scopus_cookie_');
|
||||
|
||||
if (empty($name)) {
|
||||
return json(['code' => 0, 'msg' => '请输入作者姓名']);
|
||||
}
|
||||
|
||||
// 1) 获取 freelookup 页面,用于拿到真实提交地址和隐藏字段。
|
||||
$lookupUrl = 'https://www.scopus.com/freelookup/form/author.uri?zone=TopNavBar&origin=NO%20ORIGIN%20DEFINED';
|
||||
$lookupRes = $this->httpRequest($lookupUrl, null, true, '', $cookieFile);
|
||||
if (!$lookupRes['ok']) {
|
||||
@unlink($cookieFile);
|
||||
$ret = ['code' => 0, 'msg' => '访问 Scopus 失败:' . $lookupRes['msg']];
|
||||
if ($debug === 1) {
|
||||
$ret['debug'] = $this->buildDebugInfo($lookupRes['url'], $lookupRes['http_code'], $lookupRes['body']);
|
||||
}
|
||||
return json($ret);
|
||||
}
|
||||
|
||||
$formInfo = $this->extractScopusLookupForm($lookupRes['body']);
|
||||
if (empty($formInfo['action'])) {
|
||||
@unlink($cookieFile);
|
||||
$ret = ['code' => 0, 'msg' => 'Scopus 页面结构已变化,未找到查询表单'];
|
||||
if ($debug === 1) {
|
||||
$ret['debug'] = $this->buildDebugInfo($lookupRes['url'], $lookupRes['http_code'], $lookupRes['body']);
|
||||
}
|
||||
return json($ret);
|
||||
}
|
||||
|
||||
// 2) 组装查询参数(姓名 + 机构),并携带隐藏字段提交。
|
||||
$postData = $formInfo['hidden_fields'];
|
||||
$postData['authLast'] = $name;
|
||||
$postData['affil'] = $affil;
|
||||
|
||||
$searchRes = $this->httpRequest($formInfo['action'], $postData, true, $lookupUrl, $cookieFile);
|
||||
if (!$searchRes['ok']) {
|
||||
@unlink($cookieFile);
|
||||
$ret = ['code' => 0, 'msg' => '查询 Scopus 失败:' . $searchRes['msg']];
|
||||
if ($debug === 1) {
|
||||
$ret['debug'] = $this->buildDebugInfo($searchRes['url'], $searchRes['http_code'], $searchRes['body']);
|
||||
}
|
||||
return json($ret);
|
||||
}
|
||||
|
||||
$blockMsg = $this->detectScopusBlocking($searchRes['body']);
|
||||
if (!empty($blockMsg)) {
|
||||
@unlink($cookieFile);
|
||||
$ret = ['code' => 0, 'msg' => $blockMsg];
|
||||
$fallback = $this->fallbackByOpenAlex($name, $affil);
|
||||
if ($fallback !== null) {
|
||||
$ret = array_merge($fallback, [
|
||||
'msg' => $blockMsg . ',已自动降级 OpenAlex 结果'
|
||||
]);
|
||||
}
|
||||
if ($debug === 1) {
|
||||
$ret['debug'] = $this->buildDebugInfo($searchRes['url'], $searchRes['http_code'], $searchRes['body']);
|
||||
}
|
||||
return json($ret);
|
||||
}
|
||||
|
||||
// 3) 从返回页提取 h-index(优先匹配“h-index”关键词附近数字)。
|
||||
$hIndex = $this->extractHIndexFromHtml($searchRes['body']);
|
||||
if ($hIndex === null) {
|
||||
@unlink($cookieFile);
|
||||
$ret = [
|
||||
'code' => 0,
|
||||
'msg' => '未从 Scopus 结果页解析到 H 指数(可能需要人工登录或页面结构调整)'
|
||||
];
|
||||
if ($debug === 1) {
|
||||
$ret['debug'] = $this->buildDebugInfo($searchRes['url'], $searchRes['http_code'], $searchRes['body']);
|
||||
}
|
||||
return json($ret);
|
||||
}
|
||||
|
||||
@unlink($cookieFile);
|
||||
|
||||
$ret = [
|
||||
'code' => 1,
|
||||
'name' => $name,
|
||||
'affil' => $affil,
|
||||
'h_index_scopus' => $hIndex,
|
||||
'source' => 'scopus_freelookup',
|
||||
];
|
||||
if ($debug === 1) {
|
||||
$ret['debug'] = $this->buildDebugInfo($searchRes['url'], $searchRes['http_code'], $searchRes['body']);
|
||||
}
|
||||
return json($ret);
|
||||
}
|
||||
|
||||
private function httpRequest($url, $postData = null, $followLocation = true, $referer = '', $cookieFile = '')
|
||||
{
|
||||
$ch = curl_init();
|
||||
$options = [
|
||||
CURLOPT_URL => $url,
|
||||
CURLOPT_RETURNTRANSFER => true,
|
||||
CURLOPT_SSL_VERIFYPEER => false,
|
||||
CURLOPT_SSL_VERIFYHOST => false,
|
||||
CURLOPT_FOLLOWLOCATION => $followLocation,
|
||||
CURLOPT_MAXREDIRS => 8,
|
||||
CURLOPT_TIMEOUT => 30,
|
||||
CURLOPT_CONNECTTIMEOUT => 15,
|
||||
CURLOPT_USERAGENT => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
|
||||
CURLOPT_ENCODING => '',
|
||||
CURLOPT_HTTPHEADER => [
|
||||
'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
'Accept-Language: zh-CN,zh;q=0.9,en;q=0.8',
|
||||
],
|
||||
];
|
||||
|
||||
if (!empty($referer)) {
|
||||
$options[CURLOPT_REFERER] = $referer;
|
||||
}
|
||||
|
||||
if (!empty($cookieFile)) {
|
||||
$options[CURLOPT_COOKIEJAR] = $cookieFile;
|
||||
$options[CURLOPT_COOKIEFILE] = $cookieFile;
|
||||
}
|
||||
|
||||
if (is_array($postData)) {
|
||||
$options[CURLOPT_POST] = true;
|
||||
$options[CURLOPT_POSTFIELDS] = http_build_query($postData);
|
||||
}
|
||||
|
||||
curl_setopt_array($ch, $options);
|
||||
$body = curl_exec($ch);
|
||||
$error = curl_error($ch);
|
||||
$httpCode = (int) curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
||||
$finalUrl = (string) curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
|
||||
curl_close($ch);
|
||||
|
||||
if ($error) {
|
||||
if (strpos($error, 'Maximum (') !== false && strpos($error, 'redirects followed') !== false) {
|
||||
return [
|
||||
'ok' => false,
|
||||
'msg' => 'Scopus 跳转过多(可能触发登录/验证页面),请稍后重试或先在浏览器登录 Scopus',
|
||||
'body' => '',
|
||||
'http_code' => $httpCode,
|
||||
'url' => $finalUrl
|
||||
];
|
||||
}
|
||||
return ['ok' => false, 'msg' => $error, 'body' => '', 'http_code' => $httpCode, 'url' => $finalUrl];
|
||||
}
|
||||
|
||||
if ($httpCode >= 400 || $httpCode === 0) {
|
||||
return ['ok' => false, 'msg' => 'HTTP ' . $httpCode, 'body' => (string) $body, 'http_code' => $httpCode, 'url' => $finalUrl];
|
||||
}
|
||||
|
||||
return ['ok' => true, 'msg' => '', 'body' => (string) $body, 'http_code' => $httpCode, 'url' => $finalUrl];
|
||||
}
|
||||
|
||||
private function detectScopusBlocking($html)
|
||||
{
|
||||
if (empty($html)) {
|
||||
return '';
|
||||
}
|
||||
|
||||
$text = strtolower(strip_tags($html));
|
||||
if (strpos($text, 'sign in') !== false || strpos($text, 'institutional sign in') !== false) {
|
||||
return 'Scopus 返回登录页,当前环境未授权访问作者详情页面';
|
||||
}
|
||||
if (strpos($text, 'captcha') !== false || strpos($text, 'are you a robot') !== false) {
|
||||
return 'Scopus 触发了人机验证,当前接口无法自动通过';
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
|
||||
private function buildDebugInfo($finalUrl, $httpCode, $html)
|
||||
{
|
||||
$normalized = html_entity_decode(strip_tags((string) $html), ENT_QUOTES, 'UTF-8');
|
||||
$normalized = preg_replace('/\s+/u', ' ', $normalized);
|
||||
$snippet = mb_substr($normalized, 0, 300, 'UTF-8');
|
||||
|
||||
return [
|
||||
'final_url' => (string) $finalUrl,
|
||||
'http_code' => (int) $httpCode,
|
||||
'page_snippet' => $snippet,
|
||||
'contains_signin' => stripos($normalized, 'sign in') !== false ? 1 : 0,
|
||||
'contains_captcha' => stripos($normalized, 'captcha') !== false ? 1 : 0,
|
||||
];
|
||||
}
|
||||
|
||||
private function extractScopusLookupForm($html)
|
||||
{
|
||||
$ret = [
|
||||
'action' => '',
|
||||
'hidden_fields' => [],
|
||||
];
|
||||
|
||||
if (empty($html)) {
|
||||
return $ret;
|
||||
}
|
||||
|
||||
// 优先定位包含 author 的 form,减少解析误匹配。
|
||||
if (preg_match('/<form[^>]*action=["\']([^"\']+)["\'][^>]*>.*?<\/form>/is', $html, $formMatch)) {
|
||||
$action = trim($formMatch[1]);
|
||||
if (!preg_match('/^https?:\/\//i', $action)) {
|
||||
$action = 'https://www.scopus.com' . (substr($action, 0, 1) === '/' ? '' : '/') . $action;
|
||||
}
|
||||
$ret['action'] = $action;
|
||||
|
||||
if (preg_match_all('/<input[^>]*type=["\']hidden["\'][^>]*>/is', $formMatch[0], $inputs)) {
|
||||
foreach ($inputs[0] as $inputTag) {
|
||||
if (preg_match('/name=["\']([^"\']+)["\']/i', $inputTag, $nameMatch)) {
|
||||
$fieldName = trim($nameMatch[1]);
|
||||
$fieldVal = '';
|
||||
if (preg_match('/value=["\']([^"\']*)["\']/i', $inputTag, $valMatch)) {
|
||||
$fieldVal = $valMatch[1];
|
||||
}
|
||||
$ret['hidden_fields'][$fieldName] = $fieldVal;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $ret;
|
||||
}
|
||||
|
||||
private function extractHIndexFromHtml($html)
|
||||
{
|
||||
if (empty($html)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$text = html_entity_decode(strip_tags($html), ENT_QUOTES, 'UTF-8');
|
||||
$text = preg_replace('/\s+/u', ' ', $text);
|
||||
|
||||
$patterns = [
|
||||
'/h[\-\s]?index[^0-9]{0,20}([0-9]{1,3})/iu',
|
||||
'/([0-9]{1,3})[^0-9]{0,20}h[\-\s]?index/iu',
|
||||
];
|
||||
foreach ($patterns as $pattern) {
|
||||
if (preg_match($pattern, $text, $m)) {
|
||||
return (int) $m[1];
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private function fallbackByOpenAlex($name, $affil)
|
||||
{
|
||||
$search = urlencode($name);
|
||||
$url = "https://api.openalex.org/authors?search={$search}&limit=8";
|
||||
$res = $this->httpRequest($url, null, true);
|
||||
if (!$res['ok']) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$data = json_decode($res['body'], true);
|
||||
$list = $data['results'] ?? [];
|
||||
if (empty($list)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$targetAffil = strtolower((string) $affil);
|
||||
$match = null;
|
||||
foreach ($list as $item) {
|
||||
if (empty($targetAffil)) {
|
||||
$match = $item;
|
||||
break;
|
||||
}
|
||||
$insts = $item['affiliations'] ?? [];
|
||||
foreach ($insts as $inst) {
|
||||
$instName = strtolower($inst['display_name'] ?? '');
|
||||
if ($instName !== '' && strpos($instName, $targetAffil) !== false) {
|
||||
$match = $item;
|
||||
break 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ($match === null) {
|
||||
$match = $list[0];
|
||||
}
|
||||
|
||||
return [
|
||||
'code' => 1,
|
||||
'name' => $match['display_name'] ?? $name,
|
||||
'affil' => !empty($match['affiliations'][0]['display_name']) ? $match['affiliations'][0]['display_name'] : $affil,
|
||||
'h_index_scopus' => $match['summary_stats']['h_index_scopus'] ?? null,
|
||||
'h_index_openalex' => $match['summary_stats']['h_index'] ?? null,
|
||||
'source' => 'openalex_fallback',
|
||||
];
|
||||
}
|
||||
}
|
||||
@@ -1329,18 +1329,21 @@ class References extends Base
|
||||
return json_encode(array('status' => 3,'msg' => 'No articles found' ));
|
||||
}
|
||||
if($this->checkReferStatus($iPArticleId)==0){
|
||||
return jsonError('请修正完文献内容再进行校对。');
|
||||
return jsonError('Please correct the reference content before running the check.');
|
||||
}
|
||||
//已存在校对记录则禁止重复执行第一次校对,提示走重置接口
|
||||
$iExisting = Db::name('article_reference_check_result')
|
||||
->where('p_article_id', $iPArticleId)
|
||||
->count();
|
||||
if(intval($iExisting) > 0){
|
||||
return jsonError('该文章已存在校对记录,请使用"重置校对"接口重新校对。');
|
||||
return jsonError('This article already has a reference check record. Please use the "Reset Check" endpoint to run the check again.');
|
||||
}
|
||||
try {
|
||||
$svc = new ReferenceCheckService();
|
||||
$result = $svc->enqueueByPArticle($aProductionArticle);
|
||||
if (empty($result['check_ids'])) {
|
||||
return jsonError('No reference citations were found in the article.');
|
||||
}
|
||||
return jsonSuccess($result);
|
||||
} catch (\Exception $e) {
|
||||
return jsonError($e->getMessage());
|
||||
@@ -1368,7 +1371,7 @@ class References extends Base
|
||||
return json_encode(array('status' => 3,'msg' => 'No articles found' ));
|
||||
}
|
||||
if($this->checkReferStatus($iPArticleId)==0){
|
||||
return jsonError('请修正完文献内容再进行校对。');
|
||||
return jsonError('Please correct the reference content before running the check.');
|
||||
}
|
||||
$iArticleId = empty($aProductionArticle['article_id']) ? 0 : $aProductionArticle['article_id'];
|
||||
if(empty($iArticleId)){
|
||||
@@ -1533,7 +1536,7 @@ class References extends Base
|
||||
* POST/GET: p_refer_id(必填)
|
||||
* p_article_id(可选)
|
||||
*
|
||||
* 仅重跑 status=3(校对失败)的记录;不改动 refer_text,只重置结果字段后入 ReferenceCheck 队列。
|
||||
* 仅重跑 status=3(校对失败)的记录;不改动 refer_text,只重置结果字段后入 RabbitMQ 批次队列。
|
||||
* 返回:p_refer_id、p_article_id、reset、queued、check_ids、queue
|
||||
*/
|
||||
public function referenceCheckRecheckFailedAI()
|
||||
|
||||
@@ -1,114 +0,0 @@
|
||||
<?php
|
||||
namespace app\api\job;
|
||||
|
||||
use think\Db;
|
||||
use think\queue\Job;
|
||||
use app\common\QueueJob;
|
||||
use app\common\QueueRedis;
|
||||
use app\common\ReferenceCheckService;
|
||||
|
||||
class ReferenceCheck
|
||||
{
|
||||
private $oQueueJob;
|
||||
private $QueueRedis;
|
||||
private $completedExprie = 3600;
|
||||
|
||||
public function __construct()
|
||||
{
|
||||
$this->oQueueJob = new QueueJob();
|
||||
$this->QueueRedis = QueueRedis::getInstance();
|
||||
}
|
||||
|
||||
public function fire(Job $job, $data)
|
||||
{
|
||||
$this->oQueueJob->init($job);
|
||||
|
||||
$rawBody = empty($job->getRawBody()) ? '' : $job->getRawBody();
|
||||
$jobData = empty($rawBody) ? [] : json_decode($rawBody, true);
|
||||
$jobId = empty($jobData['id']) ? 'unknown' : $jobData['id'];
|
||||
|
||||
$sRedisKey = '';
|
||||
$sRedisValue = '';
|
||||
|
||||
$this->oQueueJob->log("-----------队列任务开始-----------");
|
||||
$this->oQueueJob->log("当前任务ID: {$jobId}, 尝试次数: {$job->attempts()}");
|
||||
|
||||
try {
|
||||
$checkId = intval(isset($data['check_id']) ? $data['check_id'] : 0);
|
||||
if ($checkId <= 0 && !empty($jobData['data']['check_id'])) {
|
||||
$checkId = intval($jobData['data']['check_id']);
|
||||
}
|
||||
if ($checkId <= 0) {
|
||||
$job->delete();
|
||||
return;
|
||||
}
|
||||
|
||||
$row = Db::name('article_reference_check_result')->where('id', $checkId)->find();
|
||||
if (empty($row)) {
|
||||
$job->delete();
|
||||
return;
|
||||
}
|
||||
|
||||
if (intval($row['status']) === ReferenceCheckService::RECORD_COMPLETED) {
|
||||
$job->delete();
|
||||
return;
|
||||
}
|
||||
|
||||
$sClassName = get_class($this);
|
||||
$sRedisKey = "queue_job:{$sClassName}:{$checkId}";
|
||||
$sRedisValue = uniqid() . '_' . getmypid();
|
||||
|
||||
$svc = new ReferenceCheckService();
|
||||
$svc->clearReferenceCheckQueueLock($checkId);
|
||||
|
||||
if (!$this->oQueueJob->acquireLock($sRedisKey, $sRedisValue, $job)) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
$svc->runReferenceCheckOnce($checkId);
|
||||
|
||||
$amId = intval(isset($row['am_id']) ? $row['am_id'] : 0);
|
||||
if ($amId > 0) {
|
||||
$svc->syncAmRefCheckStatus($amId);
|
||||
}
|
||||
$this->QueueRedis->finishJob($sRedisKey, 'completed', $this->completedExprie, $sRedisValue);
|
||||
$job->delete();
|
||||
$this->oQueueJob->log("任务执行成功 | 日志ID: {$sRedisKey}");
|
||||
} catch (\Exception $e) {
|
||||
$this->oQueueJob->log('ReferenceCheck error: ' . $e->getMessage());
|
||||
if ($job->attempts() >= 3) {
|
||||
$this->markFailed($checkId, $e->getMessage());
|
||||
$job->delete();
|
||||
return;
|
||||
}
|
||||
$job->release(30);
|
||||
}
|
||||
} catch (\RuntimeException $e) {
|
||||
$this->oQueueJob->handleRetryableException($e, $sRedisKey, $sRedisValue, $job);
|
||||
} catch (\LogicException $e) {
|
||||
$this->oQueueJob->handleNonRetryableException($e, $sRedisKey, $sRedisValue, $job);
|
||||
} catch (\Exception $e) {
|
||||
$this->oQueueJob->handleRetryableException($e, $sRedisKey, $sRedisValue, $job);
|
||||
} finally {
|
||||
$this->oQueueJob->finnal();
|
||||
}
|
||||
}
|
||||
|
||||
private function markFailed($checkId, $msg)
|
||||
{
|
||||
$row = Db::name('article_reference_check_result')->where('id', $checkId)->find();
|
||||
try {
|
||||
(new ReferenceCheckService())->updateCheckResult($checkId, [
|
||||
'status' => ReferenceCheckService::RECORD_FAILED,
|
||||
'error_msg' => $msg,
|
||||
]);
|
||||
} catch (\Exception $e) {
|
||||
\think\Log::error('ReferenceCheck markFailed: ' . $e->getMessage());
|
||||
}
|
||||
$amId = empty($row) ? 0 : intval(isset($row['am_id']) ? $row['am_id'] : 0);
|
||||
if ($amId > 0) {
|
||||
(new ReferenceCheckService())->syncAmRefCheckStatus($amId);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,162 +0,0 @@
|
||||
<?php
|
||||
namespace app\api\job;
|
||||
|
||||
use think\Db;
|
||||
use think\queue\Job;
|
||||
use app\common\QueueJob;
|
||||
use app\common\QueueRedis;
|
||||
use app\common\ReferenceCheckService;
|
||||
use app\common\service\LLMService;
|
||||
|
||||
class ReferenceCheckTwo
|
||||
{
|
||||
private $oQueueJob;
|
||||
private $QueueRedis;
|
||||
private $completedExprie = 3600;
|
||||
|
||||
public function __construct()
|
||||
{
|
||||
$this->oQueueJob = new QueueJob();
|
||||
$this->QueueRedis = QueueRedis::getInstance();
|
||||
}
|
||||
|
||||
public function fire(Job $job, $data)
|
||||
{
|
||||
$this->oQueueJob->init($job);
|
||||
|
||||
$rawBody = empty($job->getRawBody()) ? '' : $job->getRawBody();
|
||||
$jobData = empty($rawBody) ? [] : json_decode($rawBody, true);
|
||||
$jobId = empty($jobData['id']) ? 'unknown' : $jobData['id'];
|
||||
|
||||
$sRedisKey = '';
|
||||
$sRedisValue = '';
|
||||
|
||||
$this->oQueueJob->log("-----------队列任务开始-----------");
|
||||
$this->oQueueJob->log("当前任务ID: {$jobId}, 尝试次数: {$job->attempts()}");
|
||||
|
||||
try {
|
||||
$checkId = intval(isset($data['check_id']) ? $data['check_id'] : 0);
|
||||
if ($checkId <= 0 && !empty($jobData['data']['check_id'])) {
|
||||
$checkId = intval($jobData['data']['check_id']);
|
||||
}
|
||||
$sClassName = get_class($this);
|
||||
$sRedisKey = "queue_job_two:{$sClassName}:{$checkId}";
|
||||
$sRedisValue = uniqid() . '_' . getmypid();
|
||||
|
||||
if (!$this->oQueueJob->acquireLock($sRedisKey, $sRedisValue, $job)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if ($checkId <= 0) {
|
||||
$job->delete();
|
||||
return;
|
||||
}
|
||||
|
||||
$row = Db::name('article_reference_check_result')->where('id', $checkId)->find();
|
||||
if (empty($row)) {
|
||||
$job->delete();
|
||||
return;
|
||||
}
|
||||
|
||||
// if (intval($row['status']) === ReferenceCheckService::RECORD_COMPLETED) {
|
||||
// $job->delete();
|
||||
// return;
|
||||
// }
|
||||
|
||||
try {
|
||||
$svc = new ReferenceCheckService();
|
||||
|
||||
$contentA = $svc->resolveMainContentForJob($row);
|
||||
$referText = trim((string)(isset($row['refer_text']) ? $row['refer_text'] : ''));
|
||||
$refer = null;
|
||||
|
||||
if (intval($row['p_refer_id']) > 0) {
|
||||
$refer = Db::name('production_article_refer')
|
||||
->where('p_refer_id', intval($row['p_refer_id']))
|
||||
->where('state', 0)
|
||||
->find();
|
||||
}
|
||||
|
||||
$payload = $svc->prepareRecheckPayload(is_array($refer) ? $refer : [], $referText);
|
||||
$doiBlock = $payload['doi_block'];
|
||||
|
||||
if ($contentA === '' || $referText === '') {
|
||||
$this->markFailed($checkId, 'Missing article_main.content or refer_text');
|
||||
$job->delete();
|
||||
return;
|
||||
}
|
||||
$llm = new LLMService();
|
||||
$llmResult = $llm->checkReference($contentA, $referText, true, $doiBlock);
|
||||
|
||||
$requestFailed = !empty($llmResult['request_failed']);
|
||||
$canSupport = $svc->parseLlmCanSupport($llmResult);
|
||||
$tag = $payload['has_abstract']
|
||||
? ('[Crossref复核' . ($payload['doi_used'] !== '' ? ' ' . $payload['doi_used'] : '') . ']')
|
||||
: '[Crossref复核-无摘要]';
|
||||
$reason = $tag . ' ' . (isset($llmResult['reason']) ? $llmResult['reason'] : '');
|
||||
|
||||
// LLM 通讯失败:写 status=RECORD_FAILED(3) 并抛异常触发队列重试
|
||||
if ($requestFailed) {
|
||||
$svc->updateCheckResult($checkId, [
|
||||
'confidence' => floatval($llmResult['confidence']),
|
||||
'reason' => $reason,
|
||||
'status' => ReferenceCheckService::RECORD_FAILED,
|
||||
'error_msg' => isset($llmResult['reason']) ? $llmResult['reason'] : 'LLM request failed',
|
||||
]);
|
||||
throw new \RuntimeException(isset($llmResult['reason']) ? $llmResult['reason'] : 'LLM request failed');
|
||||
}
|
||||
|
||||
$affected = $svc->updateCheckResult($checkId, [
|
||||
'can_support' => $canSupport ? 1 : 0,
|
||||
'is_match' => $canSupport ? 1 : 0,
|
||||
'confidence' => floatval($llmResult['confidence']),
|
||||
'reason' => $reason,
|
||||
'status' => ReferenceCheckService::RECORD_COMPLETED,
|
||||
'error_msg' => '',
|
||||
]);
|
||||
$this->oQueueJob->log("Crossref复核写入 id={$checkId} affected={$affected} can_support=" . ($canSupport ? 1 : 0) . " confidence=" . floatval($llmResult['confidence']));
|
||||
|
||||
$amId = intval(isset($row['am_id']) ? $row['am_id'] : 0);
|
||||
if ($amId > 0) {
|
||||
$svc->syncAmRefCheckStatus($amId);
|
||||
}
|
||||
$this->QueueRedis->finishJob($sRedisKey, 'completed', $this->completedExprie, $sRedisValue);
|
||||
$job->delete();
|
||||
$this->oQueueJob->log("任务执行成功 | 日志ID: {$sRedisKey}");
|
||||
} catch (\Exception $e) {
|
||||
$this->oQueueJob->log('ReferenceCheckTwo error: ' . $e->getMessage());
|
||||
if ($job->attempts() >= 3) {
|
||||
$this->markFailed($checkId, $e->getMessage());
|
||||
$job->delete();
|
||||
return;
|
||||
}
|
||||
$job->release(30);
|
||||
}
|
||||
} catch (\RuntimeException $e) {
|
||||
$this->oQueueJob->handleRetryableException($e, $sRedisKey, $sRedisValue, $job);
|
||||
} catch (\LogicException $e) {
|
||||
$this->oQueueJob->handleNonRetryableException($e, $sRedisKey, $sRedisValue, $job);
|
||||
} catch (\Exception $e) {
|
||||
$this->oQueueJob->handleRetryableException($e, $sRedisKey, $sRedisValue, $job);
|
||||
} finally {
|
||||
$this->oQueueJob->finnal();
|
||||
}
|
||||
}
|
||||
|
||||
private function markFailed($checkId, $msg)
|
||||
{
|
||||
$row = Db::name('article_reference_check_result')->where('id', $checkId)->find();
|
||||
try {
|
||||
(new ReferenceCheckService())->updateCheckResult($checkId, [
|
||||
'status' => ReferenceCheckService::RECORD_FAILED,
|
||||
'error_msg' => $msg,
|
||||
]);
|
||||
} catch (\Exception $e) {
|
||||
\think\Log::error('ReferenceCheckTwo markFailed: ' . $e->getMessage());
|
||||
}
|
||||
$amId = empty($row) ? 0 : intval(isset($row['am_id']) ? $row['am_id'] : 0);
|
||||
if ($amId > 0) {
|
||||
(new ReferenceCheckService())->syncAmRefCheckStatus($amId);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -9,4 +9,6 @@
|
||||
// | Author: yunwuxin <448901948@qq.com>
|
||||
// +----------------------------------------------------------------------
|
||||
|
||||
return [];
|
||||
return [
|
||||
'app\\command\\ReferenceCheckMqConsume',
|
||||
];
|
||||
|
||||
77
application/command/ReferenceCheckMqConsume.php
Normal file
77
application/command/ReferenceCheckMqConsume.php
Normal file
@@ -0,0 +1,77 @@
|
||||
<?php
|
||||
|
||||
namespace app\command;
|
||||
|
||||
use think\console\Command;
|
||||
use think\console\Input;
|
||||
use think\console\Output;
|
||||
use app\common\mq\RabbitMqConfig;
|
||||
use app\common\mq\ReferenceCheckArticleWorker;
|
||||
|
||||
class ReferenceCheckMqConsume extends Command
|
||||
{
|
||||
protected function configure()
|
||||
{
|
||||
$this->setName('reference_check:mq-consume')
|
||||
->setDescription('Consume RabbitMQ reference check article queue');
|
||||
}
|
||||
|
||||
protected function execute(Input $input, Output $output)
|
||||
{
|
||||
if (!class_exists('\\PhpAmqpLib\\Connection\\AMQPStreamConnection')) {
|
||||
$output->writeln('<error>php-amqplib not installed. Run: php composer.phar require php-amqplib/php-amqplib:^2.12</error>');
|
||||
return 1;
|
||||
}
|
||||
|
||||
$rc = RabbitMqConfig::referenceCheck();
|
||||
$exchange = isset($rc['exchange']) ? $rc['exchange'] : 'reference_check';
|
||||
$queue = isset($rc['queue']) ? $rc['queue'] : 'ref_check.article';
|
||||
$routeKey = isset($rc['route_key']) ? $rc['route_key'] : 'article.start';
|
||||
|
||||
$conn = new \PhpAmqpLib\Connection\AMQPStreamConnection(
|
||||
RabbitMqConfig::get('host', '127.0.0.1'),
|
||||
intval(RabbitMqConfig::get('port', 5672)),
|
||||
RabbitMqConfig::get('user', 'guest'),
|
||||
RabbitMqConfig::get('password', 'guest'),
|
||||
RabbitMqConfig::get('vhost', '/')
|
||||
);
|
||||
$ch = $conn->channel();
|
||||
$ch->exchange_declare($exchange, 'direct', false, true, false);
|
||||
$dlq = isset($rc['dlq']) ? $rc['dlq'] : 'ref_check.article.dlq';
|
||||
$ch->queue_declare($dlq, false, true, false, false);
|
||||
$ch->queue_declare($queue, false, true, false, false, false, new \PhpAmqpLib\Wire\AMQPTable([
|
||||
'x-dead-letter-exchange' => '',
|
||||
'x-dead-letter-routing-key' => $dlq,
|
||||
]));
|
||||
$ch->queue_bind($queue, $exchange, $routeKey);
|
||||
$ch->basic_qos(null, 1, null);
|
||||
|
||||
$output->writeln('Waiting on queue: ' . $queue);
|
||||
|
||||
$worker = new ReferenceCheckArticleWorker();
|
||||
$callback = function ($msg) use ($worker, $output) {
|
||||
$payload = json_decode($msg->body, true);
|
||||
if (!is_array($payload)) {
|
||||
$msg->ack();
|
||||
return;
|
||||
}
|
||||
try {
|
||||
$worker->handleMessage($payload);
|
||||
$msg->ack();
|
||||
} catch (\Exception $e) {
|
||||
\think\Log::error('reference_check:mq-consume ' . $e->getMessage());
|
||||
$output->writeln('<error>' . $e->getMessage() . '</error>');
|
||||
$msg->nack(false, false);
|
||||
}
|
||||
};
|
||||
|
||||
$ch->basic_consume($queue, '', false, false, false, false, $callback);
|
||||
while ($ch->is_consuming()) {
|
||||
$ch->wait();
|
||||
}
|
||||
|
||||
$ch->close();
|
||||
$conn->close();
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
@@ -4,16 +4,17 @@ namespace app\common;
|
||||
|
||||
use think\Db;
|
||||
use think\Env;
|
||||
use think\Queue;
|
||||
use app\common\service\LLMService;
|
||||
use app\common\mq\ReferenceCheckMqPublisher;
|
||||
|
||||
/**
|
||||
* 正文 <blue>[n]</blue> 引用与 t_production_article_refer(index+1=n)相关性校对。
|
||||
* LLM 配置与 PromotionLlmService 相同;单条任务走 ReferenceCheck 队列。
|
||||
* LLM 配置与 PromotionLlmService 相同;异步任务走 RabbitMQ(一篇一条消息)。
|
||||
*/
|
||||
class ReferenceCheckService
|
||||
{
|
||||
const QUEUE_NAME = 'ReferenceCheck';
|
||||
/** API 返回:异步传输方式(RabbitMQ 文章批次) */
|
||||
const TRANSPORT_RABBITMQ = 'rabbitmq';
|
||||
|
||||
/** t_article_main.type */
|
||||
const MAIN_TYPE_TEXT = 0;
|
||||
@@ -29,6 +30,9 @@ class ReferenceCheckService
|
||||
/** @var bool|null t_article_main 是否已有 ref_check_status 列 */
|
||||
private static $amRefCheckStatusColumnExists = null;
|
||||
|
||||
/** 单条任务最多重试次数(不含首次执行) */
|
||||
const QUEUE_MAX_RETRY = 1;
|
||||
|
||||
/**
|
||||
* 引用校对状态(生命周期顺序:0→1→2→3 = 待→进行→完成→失败)
|
||||
*
|
||||
@@ -56,6 +60,12 @@ class ReferenceCheckService
|
||||
const RECORD_COMPLETED = 2; // 校对完成
|
||||
const RECORD_FAILED = 3; // 校对失败
|
||||
|
||||
/** 队列执行状态(queue_status) */
|
||||
const QUEUE_PENDING = 0; // 已入队待执行
|
||||
const QUEUE_RUNNING = 1; // worker 正在执行
|
||||
const QUEUE_COMPLETED = 2; // 执行完成
|
||||
const QUEUE_FAILED = 3; // 最终失败(重试耗尽)
|
||||
|
||||
/** LLM 评分(confidence)通过阈值:>= 该值视为"通过" */
|
||||
const PASS_CONFIDENCE_THRESHOLD = 0.65;
|
||||
|
||||
@@ -69,6 +79,12 @@ class ReferenceCheckService
|
||||
const BLUE_TAG_REGEX = '/<blue>\[([\d,,\-\x{2013}\x{2014}\x{2212}\x{2010}\x{2011}\s]+)\]<\/blue>/u';
|
||||
const BLUE_TAG_REGEX_BRACKET_OUTSIDE = '/\[<blue>([\d,,\-\x{2013}\x{2014}\x{2212}\x{2010}\x{2011}\s]+)<\/blue>\]/u';
|
||||
|
||||
private $logFile;
|
||||
|
||||
public function __construct()
|
||||
{
|
||||
$this->logFile = ROOT_PATH . 'runtime' . DS . 'plagiarism_task.log';
|
||||
}
|
||||
/**
|
||||
* 兼容无 ?? 的 PHP 版本
|
||||
*/
|
||||
@@ -77,6 +93,27 @@ class ReferenceCheckService
|
||||
return isset($arr[$key]) ? $arr[$key] : $default;
|
||||
}
|
||||
|
||||
/** 新建/重置校对明细时的队列初始字段 */
|
||||
private function newCheckRecordFields(array $fields, $queueStatus = self::QUEUE_PENDING, $retryCount = 0)
|
||||
{
|
||||
$fields['queue_status'] = intval($queueStatus);
|
||||
$fields['retry_count'] = max(0, intval($retryCount));
|
||||
return $fields;
|
||||
}
|
||||
|
||||
public function markQueueRuntime($checkId, $queueStatus, $retryCount = null)
|
||||
{
|
||||
$checkId = intval($checkId);
|
||||
if ($checkId <= 0) {
|
||||
return 0;
|
||||
}
|
||||
$fields = ['queue_status' => intval($queueStatus)];
|
||||
if ($retryCount !== null) {
|
||||
$fields['retry_count'] = max(0, intval($retryCount));
|
||||
}
|
||||
return Db::name('article_reference_check_result')->where('id', $checkId)->update($fields);
|
||||
}
|
||||
|
||||
/**
|
||||
* 合并匹配两种 blue 引用排版,按在正文中的起始位置排序。
|
||||
*
|
||||
@@ -128,7 +165,7 @@ class ReferenceCheckService
|
||||
}
|
||||
|
||||
$now = date('Y-m-d H:i:s');
|
||||
$checkId = Db::name('article_reference_check_result')->insertGetId([
|
||||
$checkId = Db::name('article_reference_check_result')->insertGetId($this->newCheckRecordFields([
|
||||
'article_id' => intval($this->arrGet($extra, 'article_id', 0)),
|
||||
'am_id' => intval($this->arrGet($extra, 'am_id', 0)),
|
||||
'p_article_id' => intval($this->arrGet($extra, 'p_article_id', 0)),
|
||||
@@ -145,14 +182,14 @@ class ReferenceCheckService
|
||||
'status' => 0,
|
||||
'created_at' => $now,
|
||||
'updated_at' => $now,
|
||||
]);
|
||||
]));
|
||||
|
||||
$amId = intval($this->arrGet($extra, 'am_id', 0));
|
||||
if ($amId > 0) {
|
||||
$this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING);
|
||||
}
|
||||
|
||||
$this->pushJob(intval($checkId), intval($this->arrGet($extra, 'queue_delay', 0)));
|
||||
$this->startArticleCheckQueue([intval($checkId)], intval($this->arrGet($extra, 'p_article_id', 0)), 'enqueue');
|
||||
|
||||
return ['check_id' => $checkId, 'queued' => 1];
|
||||
}
|
||||
@@ -190,7 +227,8 @@ class ReferenceCheckService
|
||||
}
|
||||
|
||||
$skipped = 0;
|
||||
$delay = 0;
|
||||
$pendingJobs = [];
|
||||
$now = date('Y-m-d H:i:s');
|
||||
foreach ($citations as $cite) {
|
||||
foreach ($cite['reference_numbers'] as $refNo) {
|
||||
$referIndex = $refNo - 1;
|
||||
@@ -201,9 +239,7 @@ class ReferenceCheckService
|
||||
$refer = $referMap[$referIndex];
|
||||
$referText = $this->formatReferForLlm($refer);
|
||||
|
||||
$now = date('Y-m-d H:i:s');
|
||||
// [70-73] 展开为 reference_no=70,71,72,73 共 4 条记录
|
||||
$checkId = Db::name('article_reference_check_result')->insertGetId([
|
||||
$checkId = Db::name('article_reference_check_result')->insertGetId($this->newCheckRecordFields([
|
||||
'article_id' => $main['article_id'],
|
||||
'p_article_id' => $pArticleId,
|
||||
'am_id' => intval($main['am_id']),
|
||||
@@ -211,22 +247,27 @@ class ReferenceCheckService
|
||||
'refer_index' => $refNo,
|
||||
'origin_text' => $cite['original_text'],
|
||||
'refer_text' => $referText,
|
||||
'p_refer_id' => $referMap[$referIndex]['p_refer_id'],
|
||||
'p_refer_id' => $referMap[$referIndex]['p_refer_id'],
|
||||
'text_start' => $cite['text_start'],
|
||||
'text_end' => $cite['text_end'],
|
||||
'status' => self::RECORD_PENDING,
|
||||
'created_at' => $now,
|
||||
'updated_at' => $now,
|
||||
]);
|
||||
$this->pushJob(intval($checkId), $delay);
|
||||
$checkIds[] = $checkId;
|
||||
$delay += 1;
|
||||
]));
|
||||
$pendingJobs[] = [
|
||||
'check_id' => intval($checkId),
|
||||
'reference_no' => intval($refNo),
|
||||
'am_id' => intval($main['am_id']),
|
||||
'text_start' => intval($cite['text_start']),
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
$this->enqueueChecksSortedByReferenceNo($pendingJobs, $pArticleId, 'enqueue');
|
||||
$this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING);
|
||||
}
|
||||
/**
|
||||
* 手工触发:对已完成且 confidence<=0.65 的记录入队 DOI 第二轮复核
|
||||
* 手工触发:对已完成且 confidence<=0.65 的记录同步执行 Crossref 二轮复核
|
||||
*/
|
||||
public function enqueueSecondPassByArticle($articleId)
|
||||
{
|
||||
@@ -247,7 +288,7 @@ class ReferenceCheckService
|
||||
$delay2 = 0;
|
||||
foreach ($rows as $checkLog) {
|
||||
$rowId = $this->resolveCheckRowId($checkLog);
|
||||
if ($this->maybeEnqueueSecondPass($rowId, floatval($checkLog['confidence']))) {
|
||||
if ($this->runSecondPassIfNeeded($rowId, floatval($checkLog['confidence']))) {
|
||||
$checkIds2[] = $rowId;
|
||||
$delay2 += 1;
|
||||
}
|
||||
@@ -299,7 +340,7 @@ class ReferenceCheckService
|
||||
$referText = $this->formatReferForLlm($refer);
|
||||
|
||||
// [70-73] 展开为 reference_no=70,71,72,73 共 4 条记录;先入队表,再按文献号正序校对
|
||||
$checkId = Db::name('article_reference_check_result')->insertGetId([
|
||||
$checkId = Db::name('article_reference_check_result')->insertGetId($this->newCheckRecordFields([
|
||||
'article_id' => $main['article_id'],
|
||||
'p_article_id' => $pArticleId,
|
||||
'am_id' => $amId,
|
||||
@@ -310,9 +351,10 @@ class ReferenceCheckService
|
||||
'p_refer_id' => $referMap[$referIndex]['p_refer_id'],
|
||||
'text_start' => $cite['text_start'],
|
||||
'text_end' => $cite['text_end'],
|
||||
'status' => self::RECORD_PENDING,
|
||||
'created_at' => $now,
|
||||
'updated_at' => $now,
|
||||
]);
|
||||
]));
|
||||
|
||||
$pendingJobs[] = [
|
||||
'check_id' => intval($checkId),
|
||||
@@ -325,8 +367,7 @@ class ReferenceCheckService
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$checkIds = $this->pushJobsSortedByReferenceNo($pendingJobs);
|
||||
$checkIds = $this->enqueueChecksSortedByReferenceNo($pendingJobs, $pArticleId, 'enqueue');
|
||||
foreach (array_keys($amIdsWithJobs) as $amId) {
|
||||
$this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING);
|
||||
}
|
||||
@@ -337,7 +378,7 @@ class ReferenceCheckService
|
||||
'queued' => $queued,
|
||||
'skipped' => $skipped,
|
||||
'check_ids' => $checkIds,
|
||||
'queue' => self::QUEUE_NAME,
|
||||
'queue' => self::TRANSPORT_RABBITMQ,
|
||||
];
|
||||
}
|
||||
public function enqueueByArticle($articleId){
|
||||
@@ -386,7 +427,7 @@ class ReferenceCheckService
|
||||
$referText = $this->formatReferForLlm($refer);
|
||||
|
||||
// [70-73] 展开为 reference_no=70,71,72,73 共 4 条记录;先入队表,再按文献号正序校对
|
||||
$checkId = Db::name('article_reference_check_result')->insertGetId([
|
||||
$checkId = Db::name('article_reference_check_result')->insertGetId($this->newCheckRecordFields([
|
||||
'article_id' => $main['article_id'],
|
||||
'p_article_id' => $pArticleId,
|
||||
'am_id' => $amId,
|
||||
@@ -397,9 +438,10 @@ class ReferenceCheckService
|
||||
'p_refer_id' => $referMap[$referIndex]['p_refer_id'],
|
||||
'text_start' => $cite['text_start'],
|
||||
'text_end' => $cite['text_end'],
|
||||
'status' => self::RECORD_PENDING,
|
||||
'created_at' => $now,
|
||||
'updated_at' => $now,
|
||||
]);
|
||||
]));
|
||||
|
||||
$pendingJobs[] = [
|
||||
'check_id' => intval($checkId),
|
||||
@@ -413,7 +455,7 @@ class ReferenceCheckService
|
||||
}
|
||||
}
|
||||
|
||||
$checkIds = $this->pushJobsSortedByReferenceNo($pendingJobs);
|
||||
$checkIds = $this->enqueueChecksSortedByReferenceNo($pendingJobs, $pArticleId, 'enqueue');
|
||||
foreach (array_keys($amIdsWithJobs) as $amId) {
|
||||
$this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING);
|
||||
}
|
||||
@@ -424,7 +466,7 @@ class ReferenceCheckService
|
||||
'queued' => $queued,
|
||||
'skipped' => $skipped,
|
||||
'check_ids' => $checkIds,
|
||||
'queue' => self::QUEUE_NAME,
|
||||
'queue' => self::TRANSPORT_RABBITMQ,
|
||||
];
|
||||
}
|
||||
|
||||
@@ -524,14 +566,6 @@ class ReferenceCheckService
|
||||
->whereIn('state', [0, 2])
|
||||
->value('article_id'));
|
||||
|
||||
// 先清掉旧记录对应的队列 Redis 锁,避免在途 worker 写回数据
|
||||
$oldIds = Db::name('article_reference_check_result')
|
||||
->where('p_article_id', $pArticleId)
|
||||
->column('id');
|
||||
foreach ($oldIds as $oldId) {
|
||||
$this->clearReferenceCheckQueueLock(intval($oldId));
|
||||
}
|
||||
|
||||
$deleted = Db::name('article_reference_check_result')
|
||||
->where('p_article_id', $pArticleId)
|
||||
->delete();
|
||||
@@ -553,14 +587,6 @@ class ReferenceCheckService
|
||||
return 0;
|
||||
}
|
||||
|
||||
// 先清掉旧记录对应的队列 Redis 锁,否则同 check_id 在 TTL 内不会再次执行
|
||||
$oldIds = Db::name('article_reference_check_result')
|
||||
->where('article_id', $articleId)
|
||||
->column('id');
|
||||
foreach ($oldIds as $oldId) {
|
||||
$this->clearReferenceCheckQueueLock(intval($oldId));
|
||||
}
|
||||
|
||||
$deleted = Db::name('article_reference_check_result')->where('article_id', $articleId)->delete();
|
||||
if ($this->hasAmRefCheckStatusColumn()) {
|
||||
Db::name('article_main')
|
||||
@@ -1518,7 +1544,7 @@ class ReferenceCheckService
|
||||
* 编辑某条文献内容后,按 p_refer_id 异步重新校对该文献对应的全部 check 明细
|
||||
*
|
||||
* 流程:刷新 refer_text/refer_index → 重置 status/is_match/confidence/reason
|
||||
* → 设节级 ref_check_status=RUNNING → 投递到 ReferenceCheck 队列
|
||||
* → 设节级 ref_check_status=RUNNING → 投递 RabbitMQ 文章批次
|
||||
*
|
||||
* 与 recheckByRefer 的差异:本方法**不**在请求内同步跑 LLM,仅入队,立即返回。
|
||||
* 前端可调 getProgressByPArticleId 轮询进度。
|
||||
@@ -1567,11 +1593,11 @@ class ReferenceCheckService
|
||||
'reset' => 0,
|
||||
'queued' => 0,
|
||||
'check_ids' => [],
|
||||
'queue' => self::QUEUE_NAME,
|
||||
'queue' => self::TRANSPORT_RABBITMQ,
|
||||
];
|
||||
}
|
||||
|
||||
$resetFields = [
|
||||
$resetFields = $this->newCheckRecordFields([
|
||||
'refer_text' => $referText,
|
||||
'refer_index' => $referenceNo,
|
||||
'reference_no' => $referenceNo,
|
||||
@@ -1582,14 +1608,13 @@ class ReferenceCheckService
|
||||
'reason' => '',
|
||||
'error_msg' => '',
|
||||
'updated_at' => $now,
|
||||
];
|
||||
], self::QUEUE_PENDING, 0);
|
||||
|
||||
$pendingJobs = [];
|
||||
$amIds = [];
|
||||
foreach ($rows as $row) {
|
||||
$checkId = $this->resolveCheckRowId($row);
|
||||
Db::name('article_reference_check_result')->where('id', $checkId)->update($resetFields);
|
||||
$this->clearReferenceCheckQueueLock($checkId);
|
||||
$pendingJobs[] = [
|
||||
'check_id' => $checkId,
|
||||
'reference_no' => $referenceNo,
|
||||
@@ -1606,7 +1631,7 @@ class ReferenceCheckService
|
||||
$this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING);
|
||||
}
|
||||
|
||||
$checkIds = $this->pushJobsSortedByReferenceNo($pendingJobs);
|
||||
$checkIds = $this->enqueueChecksSortedByReferenceNo($pendingJobs, $pArticleId, 'enqueue');
|
||||
|
||||
return [
|
||||
'p_refer_id' => $pReferId,
|
||||
@@ -1615,7 +1640,7 @@ class ReferenceCheckService
|
||||
'reset' => count($rows),
|
||||
'queued' => count($checkIds),
|
||||
'check_ids' => $checkIds,
|
||||
'queue' => self::QUEUE_NAME,
|
||||
'queue' => self::TRANSPORT_RABBITMQ,
|
||||
];
|
||||
}
|
||||
|
||||
@@ -1652,7 +1677,7 @@ class ReferenceCheckService
|
||||
'reset' => 0,
|
||||
'queued' => 0,
|
||||
'check_ids' => [],
|
||||
'queue' => self::QUEUE_NAME,
|
||||
'queue' => self::TRANSPORT_RABBITMQ,
|
||||
];
|
||||
}
|
||||
|
||||
@@ -1661,7 +1686,7 @@ class ReferenceCheckService
|
||||
}
|
||||
|
||||
$now = date('Y-m-d H:i:s');
|
||||
$resetFields = [
|
||||
$resetFields = $this->newCheckRecordFields([
|
||||
'status' => self::RECORD_PENDING,
|
||||
'is_match' => 0,
|
||||
'can_support' => 0,
|
||||
@@ -1669,14 +1694,13 @@ class ReferenceCheckService
|
||||
'reason' => '',
|
||||
'error_msg' => '',
|
||||
'updated_at' => $now,
|
||||
];
|
||||
], self::QUEUE_PENDING, 0);
|
||||
|
||||
$pendingJobs = [];
|
||||
$amIds = [];
|
||||
foreach ($rows as $row) {
|
||||
$checkId = $this->resolveCheckRowId($row);
|
||||
Db::name('article_reference_check_result')->where('id', $checkId)->update($resetFields);
|
||||
$this->clearReferenceCheckQueueLock($checkId);
|
||||
$pendingJobs[] = [
|
||||
'check_id' => $checkId,
|
||||
'reference_no' => intval($this->arrGet($row, 'reference_no', 0)),
|
||||
@@ -1693,7 +1717,7 @@ class ReferenceCheckService
|
||||
$this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING);
|
||||
}
|
||||
|
||||
$checkIds = $this->pushJobsSortedByReferenceNo($pendingJobs);
|
||||
$checkIds = $this->enqueueChecksSortedByReferenceNo($pendingJobs, $pArticleId, 'recheck_failed');
|
||||
|
||||
return [
|
||||
'p_refer_id' => $pReferId,
|
||||
@@ -1701,7 +1725,7 @@ class ReferenceCheckService
|
||||
'reset' => count($rows),
|
||||
'queued' => count($checkIds),
|
||||
'check_ids' => $checkIds,
|
||||
'queue' => self::QUEUE_NAME,
|
||||
'queue' => self::TRANSPORT_RABBITMQ,
|
||||
];
|
||||
}
|
||||
|
||||
@@ -1735,11 +1759,11 @@ class ReferenceCheckService
|
||||
'reset' => 0,
|
||||
'queued' => 0,
|
||||
'check_ids' => [],
|
||||
'queue' => self::QUEUE_NAME,
|
||||
'queue' => self::TRANSPORT_RABBITMQ,
|
||||
];
|
||||
}
|
||||
|
||||
$resetFields = [
|
||||
$resetFields = $this->newCheckRecordFields([
|
||||
'refer_text' => $referText,
|
||||
'p_refer_id' => $pReferId,
|
||||
'p_article_id' => $pArticleId,
|
||||
@@ -1751,7 +1775,7 @@ class ReferenceCheckService
|
||||
'reason' => '',
|
||||
'error_msg' => '',
|
||||
'updated_at' => $now,
|
||||
];
|
||||
], self::QUEUE_PENDING, 0);
|
||||
|
||||
$pendingJobs = [];
|
||||
$amIds = [];
|
||||
@@ -1790,7 +1814,6 @@ class ReferenceCheckService
|
||||
foreach ($pendingJobs as $job) {
|
||||
$checkId = intval($job['check_id']);
|
||||
$checkIds[] = $checkId;
|
||||
$this->clearReferenceCheckQueueLock($checkId);
|
||||
try {
|
||||
$results[] = $this->runReferenceCheckOnce($checkId);
|
||||
} catch (\Exception $e) {
|
||||
@@ -1819,31 +1842,6 @@ class ReferenceCheckService
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* 清除队列 Redis 完成标记,避免重检任务被 acquireLock 静默丢弃
|
||||
*/
|
||||
public function clearReferenceCheckQueueLock($checkId)
|
||||
{
|
||||
$checkId = intval($checkId);
|
||||
if ($checkId <= 0) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
$keys = [];
|
||||
foreach (['queue_job', 'queue_job_two'] as $prefix) {
|
||||
$class = $prefix === 'queue_job_two'
|
||||
? 'app\\api\\job\\ReferenceCheckTwo'
|
||||
: 'app\\api\\job\\ReferenceCheck';
|
||||
$base = $prefix . ':' . $class . ':' . $checkId;
|
||||
$keys[] = $base;
|
||||
$keys[] = $base . ':status';
|
||||
}
|
||||
QueueRedis::getInstance()->deleteRedisKeys($keys);
|
||||
} catch (\Exception $e) {
|
||||
\think\Log::warning('clearReferenceCheckQueueLock id=' . $checkId . ' ' . $e->getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 执行一次引用 LLM 校对(同步,写回 article_reference_check_result)
|
||||
*/
|
||||
@@ -1884,8 +1882,7 @@ class ReferenceCheckService
|
||||
$confidence = floatval(isset($llmResult['confidence']) ? $llmResult['confidence'] : 0);
|
||||
$reason = isset($llmResult['reason']) ? $llmResult['reason'] : '';
|
||||
|
||||
// LLM 通讯失败:写 status=RECORD_FAILED(3) + error_msg,抛异常让队列 worker 走 release(30) 重试;
|
||||
// 重试 3 次后 ReferenceCheck::markFailed 会保持 status=3 收尾
|
||||
// LLM 通讯失败:写 status=RECORD_FAILED(3) + error_msg,抛异常由 MQ worker 重试
|
||||
if ($requestFailed) {
|
||||
$this->updateCheckResult($checkId, [
|
||||
'confidence' => $confidence,
|
||||
@@ -1893,7 +1890,6 @@ class ReferenceCheckService
|
||||
'status' => self::RECORD_FAILED,
|
||||
'error_msg' => $reason,
|
||||
]);
|
||||
$this->clearReferenceCheckQueueLock($checkId);
|
||||
throw new \RuntimeException($reason !== '' ? $reason : 'LLM request failed');
|
||||
}
|
||||
|
||||
@@ -1906,8 +1902,9 @@ class ReferenceCheckService
|
||||
'error_msg' => '',
|
||||
]);
|
||||
|
||||
$this->clearReferenceCheckQueueLock($checkId);
|
||||
$this->maybeEnqueueSecondPass($checkId, $confidence);
|
||||
if ($confidence <= self::PASS_CONFIDENCE_THRESHOLD) {
|
||||
$this->runSecondPassBlocking($checkId, $row, $contentA, $refer, $contentB);
|
||||
}
|
||||
|
||||
return [
|
||||
'check_id' => $checkId,
|
||||
@@ -1918,6 +1915,82 @@ class ReferenceCheckService
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* 低分结果的二轮 DOI 复核(同步阻塞执行;失败重试一次)
|
||||
*/
|
||||
public function runSecondPassBlocking($checkId, array $row, $contentA, $refer, $referText)
|
||||
{
|
||||
$checkId = intval($checkId);
|
||||
if ($checkId <= 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$payload = $this->prepareRecheckPayload(is_array($refer) ? $refer : [], trim((string)$referText));
|
||||
if (empty($payload['has_abstract']) || trim((string)$payload['doi_block']) === '') {
|
||||
return false;
|
||||
}
|
||||
|
||||
$lastError = '';
|
||||
for ($attempt = 0; $attempt < 2; $attempt++) {
|
||||
try {
|
||||
$llmResult = (new LLMService())->checkReference($contentA, trim((string)$referText), true, $payload['doi_block']);
|
||||
$requestFailed = !empty($llmResult['request_failed']);
|
||||
$canSupport = $this->parseLlmCanSupport($llmResult);
|
||||
$confidence = floatval(isset($llmResult['confidence']) ? $llmResult['confidence'] : 0);
|
||||
$tag = '[Crossref复核' . (trim((string)$payload['doi_used']) !== '' ? (' ' . trim((string)$payload['doi_used'])) : '') . ']';
|
||||
$reason = $tag . ' ' . (isset($llmResult['reason']) ? $llmResult['reason'] : '');
|
||||
|
||||
if ($requestFailed) {
|
||||
$lastError = isset($llmResult['reason']) ? (string)$llmResult['reason'] : 'LLM request failed';
|
||||
if ($attempt < 1) {
|
||||
continue;
|
||||
}
|
||||
$this->updateCheckResult($checkId, [
|
||||
'confidence' => $confidence,
|
||||
'reason' => $reason,
|
||||
'status' => self::RECORD_FAILED,
|
||||
'error_msg' => $lastError,
|
||||
]);
|
||||
$amId = intval(isset($row['am_id']) ? $row['am_id'] : 0);
|
||||
if ($amId > 0) {
|
||||
$this->syncAmRefCheckStatus($amId);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
$this->updateCheckResult($checkId, [
|
||||
'can_support' => $canSupport ? 1 : 0,
|
||||
'is_match' => $canSupport ? 1 : 0,
|
||||
'confidence' => $confidence,
|
||||
'reason' => $reason,
|
||||
'status' => self::RECORD_COMPLETED,
|
||||
'error_msg' => '',
|
||||
]);
|
||||
$amId = intval(isset($row['am_id']) ? $row['am_id'] : 0);
|
||||
if ($amId > 0) {
|
||||
$this->syncAmRefCheckStatus($amId);
|
||||
}
|
||||
return true;
|
||||
} catch (\Exception $e) {
|
||||
$lastError = $e->getMessage();
|
||||
if ($attempt < 1) {
|
||||
continue;
|
||||
}
|
||||
$this->updateCheckResult($checkId, [
|
||||
'status' => self::RECORD_FAILED,
|
||||
'error_msg' => $lastError,
|
||||
]);
|
||||
$amId = intval(isset($row['am_id']) ? $row['am_id'] : 0);
|
||||
if ($amId > 0) {
|
||||
$this->syncAmRefCheckStatus($amId);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array{refer: array, p_article_id: int, p_refer_id: int, reference_no: int}
|
||||
*/
|
||||
@@ -2622,18 +2695,13 @@ class ReferenceCheckService
|
||||
}
|
||||
|
||||
/**
|
||||
* 第一轮 confidence<=0.65 且能抓到 DOI 真实内容时,延迟入队第二轮复核
|
||||
*
|
||||
* 跳过条件(避免无意义重跑得到相同结果):
|
||||
* - check_id 不合法 / 一次置信度高于阈值
|
||||
* - refer 行不存在
|
||||
* - refer_doi 为空或 Crossref 未返回摘要
|
||||
* 对已完成且低分的记录尝试同步 Crossref 二轮(供 enqueueSecondPassByArticle 等手工入口)
|
||||
*/
|
||||
public function maybeEnqueueSecondPass($checkId, $confidence)
|
||||
public function runSecondPassIfNeeded($checkId, $confidence)
|
||||
{
|
||||
$checkId = intval($checkId);
|
||||
$confidence = floatval($confidence);
|
||||
if ($checkId <= 0 || $confidence > 0.65) {
|
||||
if ($checkId <= 0 || $confidence > self::PASS_CONFIDENCE_THRESHOLD) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -2658,9 +2726,13 @@ class ReferenceCheckService
|
||||
return false;
|
||||
}
|
||||
|
||||
$this->clearReferenceCheckQueueLock($checkId);
|
||||
$this->pushJob2($checkId, 5);
|
||||
return true;
|
||||
$contentA = $this->resolveMainContentForJob($row);
|
||||
$referText = trim((string)$this->arrGet($row, 'refer_text', ''));
|
||||
if ($referText === '' && is_array($refer)) {
|
||||
$referText = $this->formatReferForLlm($refer);
|
||||
}
|
||||
|
||||
return $this->runSecondPassBlocking($checkId, $row, $contentA, $refer, $referText);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -3047,72 +3119,93 @@ class ReferenceCheckService
|
||||
}
|
||||
|
||||
/**
|
||||
* 已入库记录按文献编号正序入队(同号按 am_id、正文位置稳定排序)
|
||||
* 批量记录已入库后创建文章批次并投递 RabbitMQ
|
||||
*
|
||||
* @param array $rows 元素含 check_id、reference_no,可选 am_id、text_start
|
||||
* @param array $rows 元素含 check_id
|
||||
* @param int $pArticleId
|
||||
* @param string $trigger enqueue|recheck_failed|manual
|
||||
* @return int[] check_id 列表
|
||||
*/
|
||||
private function pushJobsSortedByReferenceNo(array $rows)
|
||||
private function enqueueChecksSortedByReferenceNo(array $rows, $pArticleId = 0, $trigger = 'enqueue')
|
||||
{
|
||||
if (empty($rows)) {
|
||||
$checkIds = [];
|
||||
foreach ($rows as $row) {
|
||||
$checkId = intval($row['check_id']);
|
||||
if ($checkId > 0) {
|
||||
$checkIds[] = $checkId;
|
||||
}
|
||||
}
|
||||
if (!empty($checkIds)) {
|
||||
$this->startArticleCheckQueue($checkIds, intval($pArticleId), $trigger);
|
||||
}
|
||||
return $checkIds;
|
||||
}
|
||||
|
||||
/**
|
||||
* 创建文章批次;队首批次立即发 MQ,其余批次等待前序完成
|
||||
*
|
||||
* @param int[] $checkIds
|
||||
* @param int $pArticleId
|
||||
* @param string $trigger
|
||||
* @return int[]
|
||||
*/
|
||||
public function startArticleCheckQueue(array $checkIds, $pArticleId = 0, $trigger = 'enqueue')
|
||||
{
|
||||
$checkIds = array_values(array_filter(array_map('intval', $checkIds)));
|
||||
if (empty($checkIds)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
usort($rows, function ($a, $b) {
|
||||
if ($a['reference_no'] !== $b['reference_no']) {
|
||||
return $a['reference_no'] - $b['reference_no'];
|
||||
}
|
||||
$amA = isset($a['am_id']) ? intval($a['am_id']) : 0;
|
||||
$amB = isset($b['am_id']) ? intval($b['am_id']) : 0;
|
||||
if ($amA !== $amB) {
|
||||
return $amA - $amB;
|
||||
}
|
||||
$posA = isset($a['text_start']) ? intval($a['text_start']) : 0;
|
||||
$posB = isset($b['text_start']) ? intval($b['text_start']) : 0;
|
||||
return $posA - $posB;
|
||||
});
|
||||
$pArticleId = intval($pArticleId);
|
||||
if ($pArticleId <= 0) {
|
||||
$firstRow = Db::name('article_reference_check_result')->where('id', $checkIds[0])->find();
|
||||
$pArticleId = empty($firstRow) ? 0 : intval($this->arrGet($firstRow, 'p_article_id', 0));
|
||||
}
|
||||
if ($pArticleId <= 0) {
|
||||
throw new \RuntimeException('p_article_id is required for reference check queue');
|
||||
}
|
||||
|
||||
$checkIds = [];
|
||||
$delay = 0;
|
||||
foreach ($rows as $row) {
|
||||
$checkId = intval($row['check_id']);
|
||||
$checkIds[] = $checkId;
|
||||
$this->pushJob($checkId, $delay);
|
||||
$delay++;
|
||||
$now = date('Y-m-d H:i:s');
|
||||
$batchId = Db::name('article_reference_check_batch')->insertGetId([
|
||||
'p_article_id' => $pArticleId,
|
||||
'batch_status' => 0,
|
||||
'total_count' => count($checkIds),
|
||||
'done_count' => 0,
|
||||
'failed_count' => 0,
|
||||
'trigger' => (string)$trigger,
|
||||
'created_at' => $now,
|
||||
'updated_at' => $now,
|
||||
]);
|
||||
|
||||
$shouldPublish = !$this->hasEarlierWaitingBatch($batchId) && !$this->hasRunningReferenceCheckBatch();
|
||||
if ($shouldPublish) {
|
||||
(new ReferenceCheckMqPublisher())->publishArticleStart($pArticleId, intval($batchId), $trigger);
|
||||
$this->log('startArticleCheckQueue publish p_article_id=' . $pArticleId . ' batch_id=' . $batchId);
|
||||
} else {
|
||||
$this->log('startArticleCheckQueue queued batch_id=' . $batchId . ' p_article_id=' . $pArticleId);
|
||||
}
|
||||
|
||||
return $checkIds;
|
||||
}
|
||||
|
||||
private function pushJob($checkId, $delaySeconds = 0)
|
||||
private function hasRunningReferenceCheckBatch()
|
||||
{
|
||||
$checkId = intval($checkId);
|
||||
$this->clearReferenceCheckQueueLock($checkId);
|
||||
$jobClass = 'app\api\job\ReferenceCheck@fire';
|
||||
$data = ['check_id' => $checkId];
|
||||
try {
|
||||
if ($delaySeconds > 0) {
|
||||
$jobId = Queue::later($delaySeconds, $jobClass, $data, self::QUEUE_NAME);
|
||||
} else {
|
||||
$jobId = Queue::push($jobClass, $data, self::QUEUE_NAME);
|
||||
}
|
||||
} catch (\Exception $e) {
|
||||
\think\Log::error('ReferenceCheck pushJob failed check_id=' . $checkId . ' ' . $e->getMessage());
|
||||
throw $e;
|
||||
}
|
||||
return Db::name('article_reference_check_batch')
|
||||
->where('batch_status', 1)
|
||||
->count() > 0;
|
||||
}
|
||||
private function pushJob2($checkId, $delaySeconds = 0)
|
||||
|
||||
private function hasEarlierWaitingBatch($batchId)
|
||||
{
|
||||
$jobClass = 'app\api\job\ReferenceCheckTwo@fire';
|
||||
$data = ['check_id' => $checkId];
|
||||
try {
|
||||
if ($delaySeconds > 0) {
|
||||
$jobId = Queue::later($delaySeconds, $jobClass, $data, self::QUEUE_NAME);
|
||||
} else {
|
||||
$jobId = Queue::push($jobClass, $data, self::QUEUE_NAME);
|
||||
}
|
||||
} catch (\Exception $e) {
|
||||
\think\Log::error('ReferenceCheckTwo pushJob failed check_id=' . $checkId . ' ' . $e->getMessage());
|
||||
throw $e;
|
||||
}
|
||||
return Db::name('article_reference_check_batch')
|
||||
->where('batch_status', 0)
|
||||
->where('id', '<', intval($batchId))
|
||||
->count() > 0;
|
||||
}
|
||||
|
||||
public function log($msg)
|
||||
{
|
||||
$line = date('Y-m-d H:i:s') . ' ' . $msg . PHP_EOL;
|
||||
@file_put_contents($this->logFile, $line, FILE_APPEND);
|
||||
}
|
||||
}
|
||||
|
||||
24
application/common/mq/RabbitMqConfig.php
Normal file
24
application/common/mq/RabbitMqConfig.php
Normal file
@@ -0,0 +1,24 @@
|
||||
<?php
|
||||
|
||||
namespace app\common\mq;
|
||||
|
||||
class RabbitMqConfig
|
||||
{
|
||||
public static function get($key = null, $default = null)
|
||||
{
|
||||
$cfg = config('rabbitmq');
|
||||
if (!is_array($cfg)) {
|
||||
$cfg = [];
|
||||
}
|
||||
if ($key === null) {
|
||||
return $cfg;
|
||||
}
|
||||
return isset($cfg[$key]) ? $cfg[$key] : $default;
|
||||
}
|
||||
|
||||
public static function referenceCheck()
|
||||
{
|
||||
$rc = self::get('reference_check', []);
|
||||
return is_array($rc) ? $rc : [];
|
||||
}
|
||||
}
|
||||
200
application/common/mq/ReferenceCheckArticleWorker.php
Normal file
200
application/common/mq/ReferenceCheckArticleWorker.php
Normal file
@@ -0,0 +1,200 @@
|
||||
<?php
|
||||
|
||||
namespace app\common\mq;
|
||||
|
||||
use think\Db;
|
||||
use app\common\ReferenceCheckService;
|
||||
|
||||
/**
|
||||
* RabbitMQ 消费:按文章串行,文章内 reference_no 升序逐条校对(含低分同步二轮)
|
||||
*/
|
||||
class ReferenceCheckArticleWorker
|
||||
{
|
||||
const BATCH_WAITING = 0;
|
||||
const BATCH_RUNNING = 1;
|
||||
const BATCH_DONE = 2;
|
||||
const BATCH_PARTIAL_FAILED = 3;
|
||||
|
||||
/** @var ReferenceCheckService */
|
||||
private $svc;
|
||||
|
||||
public function __construct()
|
||||
{
|
||||
$this->svc = new ReferenceCheckService();
|
||||
}
|
||||
|
||||
public function handleMessage(array $payload)
|
||||
{
|
||||
$pArticleId = intval(isset($payload['p_article_id']) ? $payload['p_article_id'] : 0);
|
||||
$batchId = intval(isset($payload['batch_id']) ? $payload['batch_id'] : 0);
|
||||
if ($pArticleId <= 0 || $batchId <= 0) {
|
||||
$this->svc->log('ReferenceCheckArticleWorker invalid payload');
|
||||
return;
|
||||
}
|
||||
|
||||
if (!$this->canStartArticleWork($batchId)) {
|
||||
$this->svc->log('ReferenceCheckArticleWorker defer batch_id=' . $batchId . ' other article running');
|
||||
(new ReferenceCheckMqPublisher())->publishArticleStart($pArticleId, $batchId, isset($payload['trigger']) ? $payload['trigger'] : 'enqueue');
|
||||
sleep(3);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!$this->claimBatch($batchId)) {
|
||||
$batch = $this->getBatch($batchId);
|
||||
if (empty($batch) || intval($batch['batch_status']) === self::BATCH_DONE) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
$this->svc->log('ReferenceCheckArticleWorker start p_article_id=' . $pArticleId . ' batch_id=' . $batchId);
|
||||
|
||||
$done = 0;
|
||||
$failed = 0;
|
||||
while (true) {
|
||||
$row = $this->fetchNextPendingRow($pArticleId);
|
||||
if (empty($row)) {
|
||||
break;
|
||||
}
|
||||
$checkId = $this->svc->resolveCheckRowId($row);
|
||||
if ($checkId <= 0) {
|
||||
continue;
|
||||
}
|
||||
$result = $this->processOneRow($checkId, $row);
|
||||
if ($result === 'ok') {
|
||||
$done++;
|
||||
} elseif ($result === 'failed') {
|
||||
$failed++;
|
||||
}
|
||||
}
|
||||
|
||||
$this->finalizeBatch($batchId, $done, $failed);
|
||||
$this->svc->log('ReferenceCheckArticleWorker done p_article_id=' . $pArticleId . ' batch_id=' . $batchId . ' done=' . $done . ' failed=' . $failed);
|
||||
|
||||
$this->publishNextWaitingBatch();
|
||||
}
|
||||
|
||||
private function canStartArticleWork($batchId)
|
||||
{
|
||||
$running = Db::name('article_reference_check_batch')
|
||||
->where('batch_status', self::BATCH_RUNNING)
|
||||
->where('id', '<>', intval($batchId))
|
||||
->count();
|
||||
return intval($running) === 0;
|
||||
}
|
||||
|
||||
private function claimBatch($batchId)
|
||||
{
|
||||
$now = date('Y-m-d H:i:s');
|
||||
$affected = Db::name('article_reference_check_batch')
|
||||
->where('id', intval($batchId))
|
||||
->whereIn('batch_status', [self::BATCH_WAITING, self::BATCH_RUNNING])
|
||||
->update([
|
||||
'batch_status' => self::BATCH_RUNNING,
|
||||
'updated_at' => $now,
|
||||
]);
|
||||
return intval($affected) > 0;
|
||||
}
|
||||
|
||||
private function getBatch($batchId)
|
||||
{
|
||||
return Db::name('article_reference_check_batch')->where('id', intval($batchId))->find();
|
||||
}
|
||||
|
||||
private function fetchNextPendingRow($pArticleId)
|
||||
{
|
||||
return Db::name('article_reference_check_result')
|
||||
->where('p_article_id', intval($pArticleId))
|
||||
->where('queue_status', ReferenceCheckService::QUEUE_PENDING)
|
||||
->where('status', ReferenceCheckService::RECORD_PENDING)
|
||||
->order('reference_no asc,am_id asc,text_start asc,id asc')
|
||||
->find();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string ok|failed|skip
|
||||
*/
|
||||
private function processOneRow($checkId, array $row)
|
||||
{
|
||||
$claimed = Db::name('article_reference_check_result')
|
||||
->where('id', intval($checkId))
|
||||
->where('queue_status', ReferenceCheckService::QUEUE_PENDING)
|
||||
->update(['queue_status' => ReferenceCheckService::QUEUE_RUNNING]);
|
||||
if (intval($claimed) <= 0) {
|
||||
return 'skip';
|
||||
}
|
||||
|
||||
$retryCount = intval(isset($row['retry_count']) ? $row['retry_count'] : 0);
|
||||
try {
|
||||
$this->svc->runReferenceCheckOnce($checkId);
|
||||
$amId = intval(isset($row['am_id']) ? $row['am_id'] : 0);
|
||||
if ($amId > 0) {
|
||||
$this->svc->syncAmRefCheckStatus($amId);
|
||||
}
|
||||
$this->svc->markQueueRuntime($checkId, ReferenceCheckService::QUEUE_COMPLETED, $retryCount);
|
||||
return 'ok';
|
||||
} catch (\Exception $e) {
|
||||
$this->svc->log('ReferenceCheckArticleWorker check_id=' . $checkId . ' err=' . $e->getMessage());
|
||||
if ($retryCount < ReferenceCheckService::QUEUE_MAX_RETRY) {
|
||||
$this->svc->markQueueRuntime($checkId, ReferenceCheckService::QUEUE_PENDING, $retryCount + 1);
|
||||
return $this->processOneRow($checkId, array_merge($row, ['retry_count' => $retryCount + 1]));
|
||||
}
|
||||
try {
|
||||
$this->svc->updateCheckResult($checkId, [
|
||||
'status' => ReferenceCheckService::RECORD_FAILED,
|
||||
'error_msg' => $e->getMessage(),
|
||||
]);
|
||||
$this->svc->markQueueRuntime($checkId, ReferenceCheckService::QUEUE_FAILED, $retryCount);
|
||||
} catch (\Exception $e2) {
|
||||
\think\Log::error('ReferenceCheckArticleWorker markFailed: ' . $e2->getMessage());
|
||||
}
|
||||
$amId = intval(isset($row['am_id']) ? $row['am_id'] : 0);
|
||||
if ($amId > 0) {
|
||||
$this->svc->syncAmRefCheckStatus($amId);
|
||||
}
|
||||
return 'failed';
|
||||
}
|
||||
}
|
||||
|
||||
private function finalizeBatch($batchId, $done, $failed)
|
||||
{
|
||||
$batch = $this->getBatch($batchId);
|
||||
if (empty($batch)) {
|
||||
return;
|
||||
}
|
||||
$total = intval($batch['total_count']);
|
||||
$status = self::BATCH_DONE;
|
||||
if ($failed > 0) {
|
||||
$status = self::BATCH_PARTIAL_FAILED;
|
||||
}
|
||||
Db::name('article_reference_check_batch')->where('id', intval($batchId))->update([
|
||||
'batch_status' => $status,
|
||||
'done_count' => intval($done),
|
||||
'failed_count' => intval($failed),
|
||||
'updated_at' => date('Y-m-d H:i:s'),
|
||||
]);
|
||||
if ($total > 0 && ($done + $failed) < $total) {
|
||||
$this->svc->log('ReferenceCheckArticleWorker batch_id=' . $batchId . ' incomplete total=' . $total);
|
||||
}
|
||||
}
|
||||
|
||||
private function publishNextWaitingBatch()
|
||||
{
|
||||
$next = Db::name('article_reference_check_batch')
|
||||
->where('batch_status', self::BATCH_WAITING)
|
||||
->order('id asc')
|
||||
->find();
|
||||
if (empty($next)) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
(new ReferenceCheckMqPublisher())->publishArticleStart(
|
||||
intval($next['p_article_id']),
|
||||
intval($next['id']),
|
||||
isset($next['trigger']) ? $next['trigger'] : 'enqueue'
|
||||
);
|
||||
} catch (\Exception $e) {
|
||||
$this->svc->log('publishNextWaitingBatch failed: ' . $e->getMessage());
|
||||
\think\Log::error('publishNextWaitingBatch: ' . $e->getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
73
application/common/mq/ReferenceCheckMqPublisher.php
Normal file
73
application/common/mq/ReferenceCheckMqPublisher.php
Normal file
@@ -0,0 +1,73 @@
|
||||
<?php
|
||||
|
||||
namespace app\common\mq;
|
||||
|
||||
class ReferenceCheckMqPublisher
|
||||
{
|
||||
public function publishArticleStart($pArticleId, $batchId, $trigger = 'enqueue')
|
||||
{
|
||||
if (!class_exists('\\PhpAmqpLib\\Connection\\AMQPStreamConnection')) {
|
||||
throw new \RuntimeException('php-amqplib not installed. Run: php composer.phar require php-amqplib/php-amqplib:^2.12');
|
||||
}
|
||||
|
||||
$pArticleId = intval($pArticleId);
|
||||
$batchId = intval($batchId);
|
||||
if ($pArticleId <= 0 || $batchId <= 0) {
|
||||
throw new \InvalidArgumentException('invalid p_article_id or batch_id');
|
||||
}
|
||||
|
||||
$body = json_encode([
|
||||
'p_article_id' => $pArticleId,
|
||||
'batch_id' => $batchId,
|
||||
'trigger' => (string)$trigger,
|
||||
'ts' => time(),
|
||||
], JSON_UNESCAPED_UNICODE);
|
||||
|
||||
$rc = RabbitMqConfig::referenceCheck();
|
||||
$exchange = isset($rc['exchange']) ? $rc['exchange'] : 'reference_check';
|
||||
$routeKey = isset($rc['route_key']) ? $rc['route_key'] : 'article.start';
|
||||
|
||||
$conn = $this->connect();
|
||||
try {
|
||||
$ch = $conn->channel();
|
||||
$this->declareTopology($ch, $rc);
|
||||
$msg = new \PhpAmqpLib\Message\AMQPMessage($body, [
|
||||
'content_type' => 'application/json',
|
||||
'delivery_mode' => \PhpAmqpLib\Message\AMQPMessage::DELIVERY_MODE_PERSISTENT,
|
||||
]);
|
||||
$ch->basic_publish($msg, $exchange, $routeKey);
|
||||
$ch->close();
|
||||
} finally {
|
||||
$conn->close();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private function connect()
|
||||
{
|
||||
return new \PhpAmqpLib\Connection\AMQPStreamConnection(
|
||||
RabbitMqConfig::get('host', '127.0.0.1'),
|
||||
intval(RabbitMqConfig::get('port', 5672)),
|
||||
RabbitMqConfig::get('user', 'guest'),
|
||||
RabbitMqConfig::get('password', 'guest'),
|
||||
RabbitMqConfig::get('vhost', '/')
|
||||
);
|
||||
}
|
||||
|
||||
private function declareTopology($ch, array $rc)
|
||||
{
|
||||
$exchange = isset($rc['exchange']) ? $rc['exchange'] : 'reference_check';
|
||||
$queue = isset($rc['queue']) ? $rc['queue'] : 'ref_check.article';
|
||||
$dlq = isset($rc['dlq']) ? $rc['dlq'] : 'ref_check.article.dlq';
|
||||
$routeKey = isset($rc['route_key']) ? $rc['route_key'] : 'article.start';
|
||||
|
||||
$ch->exchange_declare($exchange, 'direct', false, true, false);
|
||||
$ch->queue_declare($dlq, false, true, false, false);
|
||||
$ch->queue_declare($queue, false, true, false, false, false, new \PhpAmqpLib\Wire\AMQPTable([
|
||||
'x-dead-letter-exchange' => '',
|
||||
'x-dead-letter-routing-key' => $dlq,
|
||||
]));
|
||||
$ch->queue_bind($queue, $exchange, $routeKey);
|
||||
}
|
||||
}
|
||||
@@ -33,7 +33,7 @@ class LLMService
|
||||
public function checkReference($contextText, $referText, $isAgain = false, $doiBlock = null)
|
||||
{
|
||||
// request_failed=true 表示"LLM 通讯/解析层面的失败"(可重试,区别于业务上的"未命中");
|
||||
// 上游 runReferenceCheckOnce 会据此把 DB.status 置为 2(失败) 并抛异常触发队列重试
|
||||
// 上游 runReferenceCheckOnce 会据此把 DB.status 置为 3(失败) 并抛异常触发 MQ worker 重试
|
||||
$fallback = [
|
||||
'can_support' => false,
|
||||
'is_match' => false,
|
||||
|
||||
16
application/extra/rabbitmq.php
Normal file
16
application/extra/rabbitmq.php
Normal file
@@ -0,0 +1,16 @@
|
||||
<?php
|
||||
|
||||
return [
|
||||
'host' => '127.0.0.1',
|
||||
'port' => 5672,
|
||||
'user' => 'admin',
|
||||
'password' => '751019',
|
||||
'vhost' => '/',
|
||||
|
||||
'reference_check' => [
|
||||
'exchange' => 'reference_check',
|
||||
'queue' => 'ref_check.article',
|
||||
'dlq' => 'ref_check.article.dlq',
|
||||
'route_key' => 'article.start',
|
||||
],
|
||||
];
|
||||
Reference in New Issue
Block a user