diff --git a/application/api/controller/Base.php b/application/api/controller/Base.php index 77e1da7b..3b2c4627 100644 --- a/application/api/controller/Base.php +++ b/application/api/controller/Base.php @@ -271,6 +271,14 @@ class Base extends Controller } $this->production_article_refer_obj->where('p_article_id', $refer_info['p_article_id'])->where('index', ">", $refer_info['index'])->where('state', 0)->setDec('index'); $this->production_article_refer_obj->where('p_refer_id', $p_refer_id)->update(['state' => 1]); + + // 文献集合已变更,原校对结果的 reference_no 已全部错位,整篇标记为未校对 + try { + (new \app\common\ReferenceCheckService()) + ->clearArticleChecksByPArticleId(intval($refer_info['p_article_id'])); + } catch (\Exception $e) { + \think\Log::error('delOneRefer clearArticleChecksByPArticleId p_refer_id=' . $p_refer_id . ' ' . $e->getMessage()); + } } diff --git a/application/api/controller/Preaccept.php b/application/api/controller/Preaccept.php index 9b4867c7..166af09f 100644 --- a/application/api/controller/Preaccept.php +++ b/application/api/controller/Preaccept.php @@ -7,6 +7,7 @@ use think\Env; use think\Queue; use think\Validate; use app\common\CrossrefService; +use app\common\ReferenceCheckService; class Preaccept extends Base { @@ -15,6 +16,26 @@ class Preaccept extends Base parent::__construct($request); } + /** + * 新增/修改导致文献集合改变后,清空整篇校对明细,使文章状态回到"未校对"。 + * 失败仅记日志,不阻塞主流程。 + */ + private function resetArticleChecksOnReferChange($pArticleId, $sourceTag = '') + { + $pArticleId = intval($pArticleId); + if ($pArticleId <= 0) { + return; + } + try { + (new ReferenceCheckService())->clearArticleChecksByPArticleId($pArticleId); + } catch (\Exception $e) { + \think\Log::error( + 'resetArticleChecksOnReferChange[' . $sourceTag . '] p_article_id=' + . $pArticleId . ' ' . $e->getMessage() + ); + } + } + /**获取文章参考文献列表 * @return \think\response\Json @@ -92,6 +113,7 @@ class Preaccept extends Base return jsonError($rule->getError()); } $this->production_article_refer_obj->where('p_article_id',$data['p_article_id'])->update(["state"=>1]); + $this->resetArticleChecksOnReferChange(intval($data['p_article_id']), 'discardRefersByParticleid'); return jsonSuccess([]); } @@ -142,6 +164,7 @@ class Preaccept extends Base } $adId= $this->production_article_refer_obj->insertGetId($insert); $this->production_article_refer_obj->where('p_article_id', $p_info['p_article_id'])->where("p_refer_id", "<>", $adId)->where("index", ">", $pre_refer['index'])->where('state', 0)->setInc('index'); + $this->resetArticleChecksOnReferChange(intval($p_info['p_article_id']), 'addRefer'); return jsonSuccess([]); @@ -198,6 +221,7 @@ class Preaccept extends Base } $adId= $this->production_article_refer_obj->insertGetId($insert); $this->production_article_refer_obj->where('p_article_id', $p_info['p_article_id'])->where("p_refer_id", "<>", $adId)->where("index", ">", $pre_refer['index'])->where('state', 0)->setInc('index'); + $this->resetArticleChecksOnReferChange(intval($p_info['p_article_id']), 'addReferByParticleid'); return jsonSuccess([]); } @@ -233,6 +257,7 @@ class Preaccept extends Base $insert['cs'] = 1; $adId = $this->production_article_refer_obj->insertGetId($insert); $this->production_article_refer_obj->where('p_article_id', $p_info['p_article_id'])->where("p_refer_id", "<>", $adId)->where("index", ">", $pre_refer['index'])->where('state', 0)->setInc('index'); + $this->resetArticleChecksOnReferChange(intval($p_info['p_article_id']), 'addReferNotdoi'); return jsonSuccess([]); } @@ -462,6 +487,17 @@ class Preaccept extends Base // } // $this->production_article_refer_obj->where('p_refer_id', $data['p_refer_id'])->update(['refer_doi' => $data['doi']]); // my_doiToFrag2($this->production_article_refer_obj->where('p_refer_id', $data['p_refer_id'])->find()); + + //文献内容更新成功后异步重检该文献对应的全部校对明细(失败不阻塞主流程) + try { + (new ReferenceCheckService())->enqueueRecheckByPReferId( + intval($data['p_refer_id']), + intval($old_refer_info['p_article_id']) + ); + } catch (\Exception $e) { + \think\Log::error('editRefer enqueueRecheckByPReferId p_refer_id=' . $data['p_refer_id'] . ' ' . $e->getMessage()); + } + return jsonSuccess([]); } @@ -1453,6 +1489,7 @@ class Preaccept extends Base return jsonError($rule->getError()); } $refer_info = $this->production_article_refer_obj->where('p_refer_id', $data['p_refer_id'])->find(); + $sibling_p_refer_id = 0; if ($data['act'] == "up") { $up_info = $this->production_article_refer_obj->where('p_article_id', $refer_info['p_article_id'])->where('index', $refer_info['index'] - 1)->where('state', 0)->find(); if (!$up_info) { @@ -1460,6 +1497,7 @@ class Preaccept extends Base } $this->production_article_refer_obj->where('p_refer_id', $up_info['p_refer_id'])->setInc("index"); $this->production_article_refer_obj->where('p_refer_id', $refer_info['p_refer_id'])->setDec("index"); + $sibling_p_refer_id = intval($up_info['p_refer_id']); } else { $down_info = $this->production_article_refer_obj->where('p_article_id', $refer_info['p_article_id'])->where('index', $refer_info['index'] + 1)->where('state', 0)->find(); if (!$down_info) { @@ -1467,7 +1505,19 @@ class Preaccept extends Base } $this->production_article_refer_obj->where('p_refer_id', $refer_info['p_refer_id'])->setInc("index"); $this->production_article_refer_obj->where('p_refer_id', $down_info['p_refer_id'])->setDec("index"); + $sibling_p_refer_id = intval($down_info['p_refer_id']); } + + // 仅同步本次交换的两条 p_refer_id 对应的校对明细 reference_no / refer_index + try { + (new ReferenceCheckService())->syncReferenceNoByPReferIds( + [intval($refer_info['p_refer_id']), $sibling_p_refer_id], + intval($refer_info['p_article_id']) + ); + } catch (\Exception $e) { + \think\Log::error('sortRefer syncReferenceNoByPReferIds: ' . $e->getMessage()); + } + return jsonSuccess([]); } diff --git a/application/api/controller/References.php b/application/api/controller/References.php index 3f9a32b2..ac1a2b77 100644 --- a/application/api/controller/References.php +++ b/application/api/controller/References.php @@ -1308,4 +1308,231 @@ class References extends Base } return json_encode(['status' => 8,'msg' => 'fail']); } + /** + * 参考文献第一次校对 + * @return \think\response\Json + */ + public function allReferenceCheckAI(){ + //获取参数 + $aParam = empty($aParam) ? $this->request->post() : $aParam; + + //必填值验证 + $iPArticleId = empty($aParam['p_article_id']) ? '' : $aParam['p_article_id']; + if(empty($iPArticleId)){ + return json_encode(array('status' => 2,'msg' => 'Please select an article' )); + } + //查询文章(p_article_id 与 article_id 都要带,下游服务方法两者都用) + $aWhere = ['p_article_id' => $iPArticleId,'state' => ['in',[0,2]]]; + $aProductionArticle = Db::name('production_article')->field('p_article_id,article_id')->where($aWhere)->find(); + if(empty($aProductionArticle)){ + return json_encode(array('status' => 3,'msg' => 'No articles found' )); + } + if($this->checkReferStatus($iPArticleId)==0){ + return jsonError('请修正完文献内容再进行校对。'); + } + //已存在校对记录则禁止重复执行第一次校对,提示走重置接口 + $iExisting = Db::name('article_reference_check_result') + ->where('p_article_id', $iPArticleId) + ->count(); + if(intval($iExisting) > 0){ + return jsonError('该文章已存在校对记录,请使用"重置校对"接口重新校对。'); + } + try { + $svc = new ReferenceCheckService(); + $result = $svc->enqueueByPArticle($aProductionArticle); + return jsonSuccess($result); + } catch (\Exception $e) { + return jsonError($e->getMessage()); + } + } + /** + * 文献校对重置:删除该文章已有的全部校对明细,并重新入队整篇校对 + * POST/GET: article_id(必填) + * @url /api/Article/referenceCheckReset + */ + public function referenceCheckResetAI() + { + //获取参数 + $aParam = empty($aParam) ? $this->request->post() : $aParam; + + //必填值验证 + $iPArticleId = empty($aParam['p_article_id']) ? '' : $aParam['p_article_id']; + if(empty($iPArticleId)){ + return json_encode(array('status' => 2,'msg' => 'Please select an article' )); + } + //查询文章(p_article_id 与 article_id 都要带,下游服务方法两者都用) + $aWhere = ['p_article_id' => $iPArticleId,'state' => ['in',[0,2]]]; + $aProductionArticle = Db::name('production_article')->field('p_article_id,article_id')->where($aWhere)->find(); + if(empty($aProductionArticle)){ + return json_encode(array('status' => 3,'msg' => 'No articles found' )); + } + if($this->checkReferStatus($iPArticleId)==0){ + return jsonError('请修正完文献内容再进行校对。'); + } + $iArticleId = empty($aProductionArticle['article_id']) ? 0 : $aProductionArticle['article_id']; + if(empty($iArticleId)){ + return json_encode(array('status' => 4,'msg' => 'Unbound article' )); + } + try { + $result = (new ReferenceCheckService())->resetAndRecheckByArticle($aProductionArticle); + return jsonSuccess($result); + } catch (\Exception $e) { + return jsonError($e->getMessage()); + } + } + + /** + * 清空某篇文章下的全部参考文献校对记录(不重新入队) + * + * 与 referenceCheckResetAI 的区别:reset 是「清空 + 重新校对」, + * 这里只做「清空」一步,校对状态回到未校对,等待用户手动再触发。 + * + * POST/GET: p_article_id(必填) + */ + public function referenceCheckClearAI() + { + $aParam = $this->request->post(); + if (empty($aParam)) { + $aParam = $this->request->param(); + } + + $iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']); + if ($iPArticleId <= 0) { + return json_encode(array('status' => 2, 'msg' => 'Please select an article')); + } + + // 校验文章存在(与其它校对接口口径一致:state in [0,2]) + $aProductionArticle = Db::name('production_article') + ->field('p_article_id,article_id') + ->where(['p_article_id' => $iPArticleId, 'state' => ['in', [0, 2]]]) + ->find(); + if (empty($aProductionArticle)) { + return json_encode(array('status' => 3, 'msg' => 'No articles found')); + } + + try { + $deleted = (new ReferenceCheckService())->clearArticleChecksByPArticleId($iPArticleId); + return jsonSuccess([ + 'p_article_id' => $iPArticleId, + 'deleted' => intval($deleted), + ]); + } catch (\Exception $e) { + return jsonError($e->getMessage()); + } + } + + /** + * 按 p_article_id 查整篇引用校对进度(按 reference_no 分组聚合) + * + * POST/GET: p_article_id(必填) + * + * 返回 list 中每项含:reference_no、p_refer_id、status(数值)、 + * total、pending、done、failed、pass、is_pass、last_updated_at、records + * + * status 数值含义: + * 0 = 待校验 1 = 校对中 2 = 校对完成 3 = 校对失败 + */ + public function referenceCheckProgressAI() + { + $aParam = $this->request->post(); + if (empty($aParam)) { + $aParam = $this->request->param(); + } + + $iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']); + if ($iPArticleId <= 0) { + return json_encode(array('status' => 2, 'msg' => 'Please select an article')); + } + try { + $result = (new ReferenceCheckService())->getProgressByPArticleId($iPArticleId); + return jsonSuccess($result); + } catch (\Exception $e) { + return jsonError($e->getMessage()); + } + } + + /** + * 按 p_article_id 查整篇文章引用校对总状态(用于前端按钮分流) + * + * POST/GET: p_article_id(必填) + * + * 计数维度是「参考文献」(按 reference_no 分组),不是单条校对明细行。 + * 例:50 条参考文献、底层 111 条校对明细时,total = 50。 + * + * 返回 status 数值含义(整篇): + * 0 = 未校对(一条记录都没有) + * 1 = 校对中(至少 1 条参考文献仍有未跑完的明细) + * 2 = 校对完成(所有参考文献全部明细已结束) + * + * 返回字段:p_article_id、status、total、pending、done、failed、progress_percent + * total —— 参考文献条数 + * pending —— 该条参考文献仍有未跑完明细的数量(含"部分跑完") + * done —— 该条参考文献所有明细都 status=1 的数量 + * failed —— 该条参考文献全部跑完且至少 1 条 status=2 的数量 + * pending + done + failed = total;progress_percent = (done+failed)/total + * + * 分组明细请走 referenceCheckProgressAI。 + */ + public function referenceCheckArticleStatusAI() + { + $aParam = $this->request->post(); + if (empty($aParam)) { + $aParam = $this->request->param(); + } + + $iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']); + if ($iPArticleId <= 0) { + return json_encode(array('status' => 2, 'msg' => 'Please select an article')); + } + + try { + $result = (new ReferenceCheckService())->getArticleProgressStatusByPArticleId($iPArticleId); + return jsonSuccess($result); + } catch (\Exception $e) { + return jsonError($e->getMessage()); + } + } + + /** + * 按 p_refer_id 查单条参考文献的校对明细 + * + * POST/GET: p_refer_id(必填) + * + * 返回 list 中每项含:am_id、confidence、reason、is_match、is_pass + * 同时附带上下文:p_refer_id、p_article_id、reference_no、total + */ + public function referenceCheckDetailsAI() + { + $aParam = $this->request->post(); + if (empty($aParam)) { + $aParam = $this->request->param(); + } + + $iPReferId = empty($aParam['p_refer_id']) ? 0 : intval($aParam['p_refer_id']); + if ($iPReferId <= 0) { + return json_encode(array('status' => 2, 'msg' => 'Please select a reference')); + } + + try { + $result = (new ReferenceCheckService())->getCheckDetailsByPReferId($iPReferId); + return jsonSuccess($result); + } catch (\Exception $e) { + return jsonError($e->getMessage()); + } + } + + public function checkReferStatus($p_article_id){ + $list = $this->production_article_refer_obj->where('p_article_id', $p_article_id)->where('state', 0)->select(); + if (!$list) { + return jsonError('references error'); + } + $frag = 1; + foreach ($list as $v) { + if ($v['cs'] == 0) { + $frag = 0; + break; + } + } + return $frag; + } } diff --git a/application/api/job/ReferenceCheck.php b/application/api/job/ReferenceCheck.php new file mode 100644 index 00000000..89c5c67d --- /dev/null +++ b/application/api/job/ReferenceCheck.php @@ -0,0 +1,114 @@ +oQueueJob = new QueueJob(); + $this->QueueRedis = QueueRedis::getInstance(); + } + + public function fire(Job $job, $data) + { + $this->oQueueJob->init($job); + + $rawBody = empty($job->getRawBody()) ? '' : $job->getRawBody(); + $jobData = empty($rawBody) ? [] : json_decode($rawBody, true); + $jobId = empty($jobData['id']) ? 'unknown' : $jobData['id']; + + $sRedisKey = ''; + $sRedisValue = ''; + + $this->oQueueJob->log("-----------队列任务开始-----------"); + $this->oQueueJob->log("当前任务ID: {$jobId}, 尝试次数: {$job->attempts()}"); + + try { + $checkId = intval(isset($data['check_id']) ? $data['check_id'] : 0); + if ($checkId <= 0 && !empty($jobData['data']['check_id'])) { + $checkId = intval($jobData['data']['check_id']); + } + if ($checkId <= 0) { + $job->delete(); + return; + } + + $row = Db::name('article_reference_check_result')->where('id', $checkId)->find(); + if (empty($row)) { + $job->delete(); + return; + } + + if (intval($row['status']) === 1) { + $job->delete(); + return; + } + + $sClassName = get_class($this); + $sRedisKey = "queue_job:{$sClassName}:{$checkId}"; + $sRedisValue = uniqid() . '_' . getmypid(); + + $svc = new ReferenceCheckService(); + $svc->clearReferenceCheckQueueLock($checkId); + + if (!$this->oQueueJob->acquireLock($sRedisKey, $sRedisValue, $job)) { + return; + } + + try { + $svc->runReferenceCheckOnce($checkId); + + $amId = intval(isset($row['am_id']) ? $row['am_id'] : 0); + if ($amId > 0) { + $svc->syncAmRefCheckStatus($amId); + } + $this->QueueRedis->finishJob($sRedisKey, 'completed', $this->completedExprie, $sRedisValue); + $job->delete(); + $this->oQueueJob->log("任务执行成功 | 日志ID: {$sRedisKey}"); + } catch (\Exception $e) { + $this->oQueueJob->log('ReferenceCheck error: ' . $e->getMessage()); + if ($job->attempts() >= 3) { + $this->markFailed($checkId, $e->getMessage()); + $job->delete(); + return; + } + $job->release(30); + } + } catch (\RuntimeException $e) { + $this->oQueueJob->handleRetryableException($e, $sRedisKey, $sRedisValue, $job); + } catch (\LogicException $e) { + $this->oQueueJob->handleNonRetryableException($e, $sRedisKey, $sRedisValue, $job); + } catch (\Exception $e) { + $this->oQueueJob->handleRetryableException($e, $sRedisKey, $sRedisValue, $job); + } finally { + $this->oQueueJob->finnal(); + } + } + + private function markFailed($checkId, $msg) + { + $row = Db::name('article_reference_check_result')->where('id', $checkId)->find(); + try { + (new ReferenceCheckService())->updateCheckResult($checkId, [ + 'status' => 2, + 'error_msg' => $msg, + ]); + } catch (\Exception $e) { + \think\Log::error('ReferenceCheck markFailed: ' . $e->getMessage()); + } + $amId = empty($row) ? 0 : intval(isset($row['am_id']) ? $row['am_id'] : 0); + if ($amId > 0) { + (new ReferenceCheckService())->syncAmRefCheckStatus($amId); + } + } +} diff --git a/application/api/job/ReferenceCheckTwo.php b/application/api/job/ReferenceCheckTwo.php new file mode 100644 index 00000000..564af204 --- /dev/null +++ b/application/api/job/ReferenceCheckTwo.php @@ -0,0 +1,162 @@ +oQueueJob = new QueueJob(); + $this->QueueRedis = QueueRedis::getInstance(); + } + + public function fire(Job $job, $data) + { + $this->oQueueJob->init($job); + + $rawBody = empty($job->getRawBody()) ? '' : $job->getRawBody(); + $jobData = empty($rawBody) ? [] : json_decode($rawBody, true); + $jobId = empty($jobData['id']) ? 'unknown' : $jobData['id']; + + $sRedisKey = ''; + $sRedisValue = ''; + + $this->oQueueJob->log("-----------队列任务开始-----------"); + $this->oQueueJob->log("当前任务ID: {$jobId}, 尝试次数: {$job->attempts()}"); + + try { + $checkId = intval(isset($data['check_id']) ? $data['check_id'] : 0); + if ($checkId <= 0 && !empty($jobData['data']['check_id'])) { + $checkId = intval($jobData['data']['check_id']); + } + $sClassName = get_class($this); + $sRedisKey = "queue_job_two:{$sClassName}:{$checkId}"; + $sRedisValue = uniqid() . '_' . getmypid(); + + if (!$this->oQueueJob->acquireLock($sRedisKey, $sRedisValue, $job)) { + return; + } + + if ($checkId <= 0) { + $job->delete(); + return; + } + + $row = Db::name('article_reference_check_result')->where('id', $checkId)->find(); + if (empty($row)) { + $job->delete(); + return; + } + +// if (intval($row['status']) === 1) { +// $job->delete(); +// return; +// } + + try { + $svc = new ReferenceCheckService(); + + $contentA = $svc->resolveMainContentForJob($row); + $referText = trim((string)(isset($row['refer_text']) ? $row['refer_text'] : '')); + $refer = null; + + if (intval($row['p_refer_id']) > 0) { + $refer = Db::name('production_article_refer') + ->where('p_refer_id', intval($row['p_refer_id'])) + ->where('state', 0) + ->find(); + } + + $payload = $svc->prepareRecheckPayload(is_array($refer) ? $refer : [], $referText); + $doiBlock = $payload['doi_block']; + + if ($contentA === '' || $referText === '') { + $this->markFailed($checkId, 'Missing article_main.content or refer_text'); + $job->delete(); + return; + } + $llm = new LLMService(); + $llmResult = $llm->checkReference($contentA, $referText, true, $doiBlock); + + $requestFailed = !empty($llmResult['request_failed']); + $canSupport = $svc->parseLlmCanSupport($llmResult); + $tag = $payload['has_abstract'] + ? ('[Crossref复核' . ($payload['doi_used'] !== '' ? ' ' . $payload['doi_used'] : '') . ']') + : '[Crossref复核-无摘要]'; + $reason = $tag . ' ' . (isset($llmResult['reason']) ? $llmResult['reason'] : ''); + + // LLM 通讯失败:写 status=2 并抛异常触发队列重试 + if ($requestFailed) { + $svc->updateCheckResult($checkId, [ + 'confidence' => floatval($llmResult['confidence']), + 'reason' => $reason, + 'status' => 2, + 'error_msg' => isset($llmResult['reason']) ? $llmResult['reason'] : 'LLM request failed', + ]); + throw new \RuntimeException(isset($llmResult['reason']) ? $llmResult['reason'] : 'LLM request failed'); + } + + $affected = $svc->updateCheckResult($checkId, [ + 'can_support' => $canSupport ? 1 : 0, + 'is_match' => $canSupport ? 1 : 0, + 'confidence' => floatval($llmResult['confidence']), + 'reason' => $reason, + 'status' => 1, + 'error_msg' => '', + ]); + $this->oQueueJob->log("Crossref复核写入 id={$checkId} affected={$affected} can_support=" . ($canSupport ? 1 : 0) . " confidence=" . floatval($llmResult['confidence'])); + + $amId = intval(isset($row['am_id']) ? $row['am_id'] : 0); + if ($amId > 0) { + $svc->syncAmRefCheckStatus($amId); + } + $this->QueueRedis->finishJob($sRedisKey, 'completed', $this->completedExprie, $sRedisValue); + $job->delete(); + $this->oQueueJob->log("任务执行成功 | 日志ID: {$sRedisKey}"); + } catch (\Exception $e) { + $this->oQueueJob->log('ReferenceCheckTwo error: ' . $e->getMessage()); + if ($job->attempts() >= 3) { + $this->markFailed($checkId, $e->getMessage()); + $job->delete(); + return; + } + $job->release(30); + } + } catch (\RuntimeException $e) { + $this->oQueueJob->handleRetryableException($e, $sRedisKey, $sRedisValue, $job); + } catch (\LogicException $e) { + $this->oQueueJob->handleNonRetryableException($e, $sRedisKey, $sRedisValue, $job); + } catch (\Exception $e) { + $this->oQueueJob->handleRetryableException($e, $sRedisKey, $sRedisValue, $job); + } finally { + $this->oQueueJob->finnal(); + } + } + + private function markFailed($checkId, $msg) + { + $row = Db::name('article_reference_check_result')->where('id', $checkId)->find(); + try { + (new ReferenceCheckService())->updateCheckResult($checkId, [ + 'status' => 2, + 'error_msg' => $msg, + ]); + } catch (\Exception $e) { + \think\Log::error('ReferenceCheckTwo markFailed: ' . $e->getMessage()); + } + $amId = empty($row) ? 0 : intval(isset($row['am_id']) ? $row['am_id'] : 0); + if ($amId > 0) { + (new ReferenceCheckService())->syncAmRefCheckStatus($amId); + } + } +} diff --git a/application/common/QueueRedis.php b/application/common/QueueRedis.php index fb9fb5fb..4412d1ba 100644 --- a/application/common/QueueRedis.php +++ b/application/common/QueueRedis.php @@ -80,6 +80,25 @@ class QueueRedis return null; } } + + /** + * 删除一个或多个 Redis 键(用于重检前清除队列任务 completed 标记) + */ + public function deleteRedisKeys(array $keys) + { + $keys = array_values(array_filter($keys, function ($k) { + return $k !== null && $k !== ''; + })); + if (empty($keys)) { + return true; + } + try { + $this->connect()->del(...$keys); + return true; + } catch (\Exception $e) { + return false; + } + } // 安全释放锁(仅当值匹配时删除) public function releaseRedisLock($key, $value) diff --git a/application/common/ReferenceCheckService.php b/application/common/ReferenceCheckService.php new file mode 100644 index 00000000..77b44e9d --- /dev/null +++ b/application/common/ReferenceCheckService.php @@ -0,0 +1,2578 @@ += 该值视为"通过" */ + const PASS_CONFIDENCE_THRESHOLD = 0.65; + + /** + * [...] 引用标签内允许的字符类(带 /u 修饰符使用)。 + * + * 除 ASCII 数字、半角逗号、半角连字符、空白外,还兼容常见排版变体: + * , U+FF0C 全角逗号 + * – U+2013 EN DASH + * — U+2014 EM DASH + * − U+2212 MINUS SIGN + * ‐ U+2010 HYPHEN + * ‑ U+2011 NON-BREAKING HYPHEN + * + * 若不支持变体连字符,会导致 [19–21] 这种区间引用整段被 preg 漏掉, + * 进而丢失对应的 reference_no 校对记录。 + */ + const BLUE_TAG_REGEX = '/\[([\d,,\-\x{2013}\x{2014}\x{2212}\x{2010}\x{2011}\s]+)\]<\/blue>/u'; + + /** + * 兼容无 ?? 的 PHP 版本 + */ + private function arrGet($arr, $key, $default = '') + { + return isset($arr[$key]) ? $arr[$key] : $default; + } + + /** + * 单条入队(可手工指定正文与文献文本) + */ + public function enqueue($contentA, $contentB, array $extra = []) + { + $contentA = trim($contentA); + if ($contentA === '') { + throw new \InvalidArgumentException('content_a is required'); + } + + $now = date('Y-m-d H:i:s'); + $checkId = Db::name('article_reference_check_result')->insertGetId([ + 'article_id' => intval($this->arrGet($extra, 'article_id', 0)), + 'am_id' => intval($this->arrGet($extra, 'am_id', 0)), + 'p_article_id' => intval($this->arrGet($extra, 'p_article_id', 0)), + 'p_refer_id' => intval($this->arrGet($extra, 'p_refer_id', 0)), + 'refer_index' => intval($this->arrGet($extra, 'refer_index', 0)), + 'reference_no' => intval($this->arrGet($extra, 'reference_no', 0)), + 'reference_raw' => (string)$this->arrGet($extra, 'reference_raw', ''), + 'cite_tag_start' => intval($this->arrGet($extra, 'cite_tag_start', 0)), + 'cite_tag_end' => intval($this->arrGet($extra, 'cite_tag_end', 0)), + 'text_start' => intval($this->arrGet($extra, 'text_start', 0)), + 'text_end' => intval($this->arrGet($extra, 'text_end', 0)), + 'content_a' => $contentA, + 'content_b' => trim($contentB), + 'status' => 0, + 'created_at' => $now, + 'updated_at' => $now, + ]); + + $amId = intval($this->arrGet($extra, 'am_id', 0)); + if ($amId > 0) { + $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING); + } + + $this->pushJob(intval($checkId), intval($this->arrGet($extra, 'queue_delay', 0))); + + return ['check_id' => $checkId, 'queued' => 1]; + } + public function enqueueByArticleMain($main){ + $amId = $main['am_id']; +// $main = Db::name('article_main') +// ->field('am_id,content,article_id') +// ->where('am_id', $amId) +// ->whereIn('state', [0, 2]) +// ->find(); + $citations = $this->extractReferences((string)$main['content']); +// return $citations; + if (empty($citations)) { + $this->setAmRefCheckStatus($amId, self::AM_STATUS_NONE); + return; + } + $prod = Db::name('production_article') + ->where('article_id', $main['article_id']) + ->where('state', 0) + ->find(); + if (empty($prod)) { + throw new \RuntimeException('production_article not found for article_id=' . $main['article_id']); + } + + $pArticleId = intval($prod['p_article_id']); + $referMap = $this->loadReferMapByPArticleId($pArticleId); + + if (empty($citations)) { + $this->setAmRefCheckStatus($amId, self::AM_STATUS_PASS); + return; + } + + $skipped = 0; + $delay = 0; + foreach ($citations as $cite) { + foreach ($cite['reference_numbers'] as $refNo) { + $referIndex = $refNo - 1; + if ($referIndex < 0 || !isset($referMap[$referIndex])) { + $skipped++; + continue; + } + $refer = $referMap[$referIndex]; + $referText = $this->formatReferForLlm($refer); + + $now = date('Y-m-d H:i:s'); + // [70-73] 展开为 reference_no=70,71,72,73 共 4 条记录 + $checkId = Db::name('article_reference_check_result')->insertGetId([ + 'article_id' => $main['article_id'], + 'p_article_id' => $pArticleId, + 'am_id' => intval($main['am_id']), + 'reference_no' => $refNo, + 'refer_index' => $refNo, + 'origin_text' => $cite['original_text'], + 'refer_text' => $referText, + 'p_refer_id' => $referMap[$referIndex]['p_refer_id'], + 'text_start' => $cite['text_start'], + 'text_end' => $cite['text_end'], + 'created_at' => $now, + 'updated_at' => $now, + ]); + $this->pushJob(intval($checkId), $delay); + $checkIds[] = $checkId; + $delay += 1; + } + } + + $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING); + } + /** + * 手工触发:对已完成且 confidence<=0.65 的记录入队 DOI 第二轮复核 + */ + public function enqueueSecondPassByArticle($articleId) + { + $articleId = intval($articleId); + if ($articleId <= 0) { + throw new \InvalidArgumentException('article_id is required'); + } + + $rows = Db::name('article_reference_check_result') + ->where('article_id', $articleId) + ->where('status', 1) + ->where('confidence', '<=', 0.65) + ->orderRaw('rand()') + ->limit(2) + ->select(); + + $checkIds2 = []; + $delay2 = 0; + foreach ($rows as $checkLog) { + $rowId = $this->resolveCheckRowId($checkLog); + if ($this->maybeEnqueueSecondPass($rowId, floatval($checkLog['confidence']))) { + $checkIds2[] = $rowId; + $delay2 += 1; + } + } + + return [ + 'article_id' => $articleId, + 'check_ids2' => $checkIds2, + 'queued' => count($checkIds2), + ]; + } + public function enqueueByPArticle($prod){ + if (empty($prod)) { + throw new \RuntimeException('production_article not found'); + } + $pArticleId = intval($prod['p_article_id']); + $articleId = intval($prod['article_id']); + $referMap = $this->loadReferMapByPArticleId($pArticleId); + + $mains = Db::name('article_main') + ->field('am_id,content,article_id') + ->where('article_id', $articleId) + ->whereIn('state', [0, 2]) + ->order('sort asc') + ->select(); + if (empty($mains)) { + throw new \RuntimeException('article_main is empty'); + } + $queued = 0; + $skipped = 0; + $pendingJobs = []; + $amIdsWithJobs = []; + $now = date('Y-m-d H:i:s'); + foreach ($mains as $main) { + $amId = intval($main['am_id']); + $citations = $this->extractReferences((string)$main['content']); + if (empty($citations)) { + $this->setAmRefCheckStatus($amId, self::AM_STATUS_NONE); + continue; + } + foreach ($citations as $cite) { + foreach ($cite['reference_numbers'] as $refNo) { + $referIndex = $refNo - 1; + if ($referIndex < 0 || !isset($referMap[$referIndex])) { + $skipped++; + continue; + } + $refer = $referMap[$referIndex]; + $referText = $this->formatReferForLlm($refer); + + // [70-73] 展开为 reference_no=70,71,72,73 共 4 条记录;先入队表,再按文献号正序校对 + $checkId = Db::name('article_reference_check_result')->insertGetId([ + 'article_id' => $main['article_id'], + 'p_article_id' => $pArticleId, + 'am_id' => $amId, + 'reference_no' => $refNo, + 'refer_index' => $refNo, + 'origin_text' => $cite['original_text'], + 'refer_text' => $referText, + 'p_refer_id' => $referMap[$referIndex]['p_refer_id'], + 'text_start' => $cite['text_start'], + 'text_end' => $cite['text_end'], + 'created_at' => $now, + 'updated_at' => $now, + ]); + + $pendingJobs[] = [ + 'check_id' => intval($checkId), + 'reference_no' => intval($refNo), + 'am_id' => $amId, + 'text_start' => intval($cite['text_start']), + ]; + $queued++; + $amIdsWithJobs[$amId] = true; + } + } + } + + $checkIds = $this->pushJobsSortedByReferenceNo($pendingJobs); + foreach (array_keys($amIdsWithJobs) as $amId) { + $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING); + } + + return [ + 'article_id' => $articleId, + 'p_article_id' => $pArticleId, + 'queued' => $queued, + 'skipped' => $skipped, + 'check_ids' => $checkIds, + 'queue' => self::QUEUE_NAME, + ]; + } + public function enqueueByArticle($articleId){ + if ($articleId <= 0) { + throw new \InvalidArgumentException('article_id is required'); + } + $prod = Db::name('production_article') + ->where('article_id', $articleId) + ->whereIn('state', [0, 2]) + ->find(); + if (empty($prod)) { + throw new \RuntimeException('production_article not found for article_id=' . $articleId); + } + $pArticleId = intval($prod['p_article_id']); + $referMap = $this->loadReferMapByPArticleId($pArticleId); + + $mains = Db::name('article_main') + ->field('am_id,content,article_id') + ->where('article_id', $articleId) + ->whereIn('state', [0, 2]) + ->order('sort asc') + ->select(); + if (empty($mains)) { + throw new \RuntimeException('article_main is empty'); + } + $queued = 0; + $skipped = 0; + $pendingJobs = []; + $amIdsWithJobs = []; + $now = date('Y-m-d H:i:s'); + foreach ($mains as $main) { + $amId = intval($main['am_id']); + $citations = $this->extractReferences((string)$main['content']); + if (empty($citations)) { + $this->setAmRefCheckStatus($amId, self::AM_STATUS_NONE); + continue; + } + foreach ($citations as $cite) { + foreach ($cite['reference_numbers'] as $refNo) { + $referIndex = $refNo - 1; + if ($referIndex < 0 || !isset($referMap[$referIndex])) { + $skipped++; + continue; + } + $refer = $referMap[$referIndex]; + $referText = $this->formatReferForLlm($refer); + + // [70-73] 展开为 reference_no=70,71,72,73 共 4 条记录;先入队表,再按文献号正序校对 + $checkId = Db::name('article_reference_check_result')->insertGetId([ + 'article_id' => $main['article_id'], + 'p_article_id' => $pArticleId, + 'am_id' => $amId, + 'reference_no' => $refNo, + 'refer_index' => $refNo, + 'origin_text' => $cite['original_text'], + 'refer_text' => $referText, + 'p_refer_id' => $referMap[$referIndex]['p_refer_id'], + 'text_start' => $cite['text_start'], + 'text_end' => $cite['text_end'], + 'created_at' => $now, + 'updated_at' => $now, + ]); + + $pendingJobs[] = [ + 'check_id' => intval($checkId), + 'reference_no' => intval($refNo), + 'am_id' => $amId, + 'text_start' => intval($cite['text_start']), + ]; + $queued++; + $amIdsWithJobs[$amId] = true; + } + } + } + + $checkIds = $this->pushJobsSortedByReferenceNo($pendingJobs); + foreach (array_keys($amIdsWithJobs) as $amId) { + $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING); + } + + return [ + 'article_id' => $articleId, + 'p_article_id' => $pArticleId, + 'queued' => $queued, + 'skipped' => $skipped, + 'check_ids' => $checkIds, + 'queue' => self::QUEUE_NAME, + ]; + } + + /** + * 根据该节全部明细行汇总更新 t_article_main.ref_check_status + */ + public function syncAmRefCheckStatus($amId) + { + if ($amId <= 0) { + return self::AM_STATUS_NONE; + } + + $rows = Db::name('article_reference_check_result')->where('am_id', $amId)->select(); + if (empty($rows)) { + $this->setAmRefCheckStatus($amId, self::AM_STATUS_NONE); + return self::AM_STATUS_NONE; + } + + $pending = 0; + $hasFail = false; + $done = 0; + + foreach ($rows as $row) { + $st = intval($row['status']); + if ($st === 0) { + $pending++; + continue; + } + if ($st === 2 || ($st === 1 && intval($row['is_match']) === 0)) { + $hasFail = true; + } + if ($st === 1) { + $done++; + } + } + + if ($pending > 0) { + $status = self::AM_STATUS_RUNNING; + } elseif ($hasFail) { + $status = self::AM_STATUS_FAIL; + } elseif ($done === count($rows)) { + $status = self::AM_STATUS_PASS; + } else { + $status = self::AM_STATUS_FAIL; + } + + $this->setAmRefCheckStatus($amId, $status); + return $status; + } + + public function setAmRefCheckStatus($amId, $status) + { + if ($amId <= 0) { + return; + } + Db::name('article_main')->where('am_id', $amId)->update([ + 'ref_check_status' => $status, + ]); + } + + /** + * 按 p_article_id 清空整篇文章的引用校对明细 + 重置节级 ref_check_status。 + * + * 用于新增/删除文献后,旧的 reference_no 全部错位、原校对结果失效的场景: + * 物理删除后,整篇状态查询自然回到 ARTICLE_PROGRESS_NONE(未校对)。 + * + * @return int 被删除的明细条数 + */ + public function clearArticleChecksByPArticleId($pArticleId) + { + $pArticleId = intval($pArticleId); + if ($pArticleId <= 0) { + return 0; + } + + // 先反查 article_id(用于重置 article_main.ref_check_status 节级状态) + $articleId = intval(Db::name('production_article') + ->where('p_article_id', $pArticleId) + ->whereIn('state', [0, 2]) + ->value('article_id')); + + // 先清掉旧记录对应的队列 Redis 锁,避免在途 worker 写回数据 + $oldIds = Db::name('article_reference_check_result') + ->where('p_article_id', $pArticleId) + ->column('id'); + foreach ($oldIds as $oldId) { + $this->clearReferenceCheckQueueLock(intval($oldId)); + } + + $deleted = Db::name('article_reference_check_result') + ->where('p_article_id', $pArticleId) + ->delete(); + + if ($articleId > 0) { + Db::name('article_main') + ->where('article_id', $articleId) + ->whereIn('state', [0, 2]) + ->update(['ref_check_status' => self::AM_STATUS_NONE]); + } + + return intval($deleted); + } + + public function clearArticleChecks($articleId) + { + $articleId = intval($articleId); + if ($articleId <= 0) { + return 0; + } + + // 先清掉旧记录对应的队列 Redis 锁,否则同 check_id 在 TTL 内不会再次执行 + $oldIds = Db::name('article_reference_check_result') + ->where('article_id', $articleId) + ->column('id'); + foreach ($oldIds as $oldId) { + $this->clearReferenceCheckQueueLock(intval($oldId)); + } + + $deleted = Db::name('article_reference_check_result')->where('article_id', $articleId)->delete(); + Db::name('article_main') + ->where('article_id', $articleId) + ->whereIn('state', [0, 2]) + ->update(['ref_check_status' => self::AM_STATUS_NONE]); + + return intval($deleted); + } + + /** + * 文献列表局部挪动后,仅刷新指定 p_refer_id 对应的校对明细 reference_no / refer_index。 + * + * 读 production_article_refer 的最新 index 来算新序号(index + 1),避免外部传入过期值。 + * 仅更新受影响的两条左右记录,降低与并发挪动互相覆盖的风险。 + * + * @param int[] $pReferIds 受影响的 p_refer_id(一般为 2 个:被挪条目 + 其相邻条目) + * @param int $pArticleId 可选:附加 p_article_id 限定,进一步缩小行锁范围 + * @return array{p_refer_ids:int[], affected_rows:int, changes:array} + */ + public function syncReferenceNoByPReferIds(array $pReferIds, $pArticleId = 0) + { + $pReferIds = array_values(array_unique(array_filter(array_map('intval', $pReferIds)))); + $pArticleId = intval($pArticleId); + if (empty($pReferIds)) { + return [ + 'p_refer_ids' => [], + 'affected_rows' => 0, + 'changes' => [], + ]; + } + + $referQuery = Db::name('production_article_refer') + ->field('p_refer_id,p_article_id,index') + ->whereIn('p_refer_id', $pReferIds) + ->where('state', 0); + if ($pArticleId > 0) { + $referQuery->where('p_article_id', $pArticleId); + } + $refers = $referQuery->select(); + if (empty($refers)) { + return [ + 'p_refer_ids' => $pReferIds, + 'affected_rows' => 0, + 'changes' => [], + ]; + } + + $now = date('Y-m-d H:i:s'); + $affected = 0; + $changes = []; + + foreach ($refers as $refer) { + $pReferId = intval($refer['p_refer_id']); + $newNo = intval($refer['index']) + 1; + + $updateQuery = Db::name('article_reference_check_result') + ->where('p_refer_id', $pReferId) + ->where('reference_no', '<>', $newNo); + if ($pArticleId > 0) { + $updateQuery->where('p_article_id', $pArticleId); + } + $rows = $updateQuery->update([ + 'reference_no' => $newNo, + 'refer_index' => $newNo, + 'updated_at' => $now, + ]); + + if ($rows > 0) { + $affected += intval($rows); + $changes[] = [ + 'p_refer_id' => $pReferId, + 'new_ref_no' => $newNo, + 'affected_rows' => intval($rows), + ]; + } + } + + return [ + 'p_refer_ids' => $pReferIds, + 'affected_rows' => $affected, + 'changes' => $changes, + ]; + } + + /** + * 重置整篇稿件的引用校对:删除旧明细 + 清理队列锁 + 全文重新入队校对 + * + * @return array + */ + /** + * 按 p_article_id 查整篇文章的引用校对总状态。 + * + * 统计维度是"参考文献"(按 reference_no 分组),不是单条校对明细行。 + * 例如 50 条参考文献、底层明细 111 条时,total 返回 50。 + * + * 返回 status 数值含义(整篇): + * 0 = ARTICLE_PROGRESS_NONE 一条校对记录都没有 + * 1 = ARTICLE_PROGRESS_RUNNING 至少 1 条参考文献仍有未跑完的明细 + * 2 = ARTICLE_PROGRESS_COMPLETED 所有参考文献的全部明细都已结束 + * + * 每条参考文献按其明细 status 分布落桶(互斥): + * pending —— 组内任一明细 status=0(含部分跑完的"校对中"也归此桶) + * done —— 组内全部明细 status=1 + * failed —— 组内全部明细已结束、至少 1 条 status=2 + * + * pending + done + failed = total;progress_percent = (done + failed) / total。 + * 分组明细请走 getProgressByPArticleId(控制器 referenceCheckProgressAI)。 + * + * @return array{p_article_id:int, status:int, total:int, pending:int, done:int, failed:int, progress_percent:float} + */ + public function getArticleProgressStatusByPArticleId($pArticleId) + { + $pArticleId = intval($pArticleId); + if ($pArticleId <= 0) { + throw new \InvalidArgumentException('p_article_id is required'); + } + + // 一条 SQL 按 reference_no 聚合,组内 status 分布一并算出来; + // 50 条参考文献 → 返回 50 行,PHP 走一次循环分桶即可 + $rows = Db::name('article_reference_check_result') + ->field('reference_no' + . ', SUM(CASE WHEN status = 0 THEN 1 ELSE 0 END) AS pending_cnt' + . ', SUM(CASE WHEN status = 2 THEN 1 ELSE 0 END) AS failed_cnt') + ->where('p_article_id', $pArticleId) + ->group('reference_no') + ->select(); + + if (empty($rows)) { + return [ + 'p_article_id' => $pArticleId, + 'status' => self::ARTICLE_PROGRESS_NONE, + 'total' => 0, + 'pending' => 0, + 'done' => 0, + 'failed' => 0, + 'progress_percent' => 0, + ]; + } + + $pending = 0; + $done = 0; + $failed = 0; + foreach ($rows as $row) { + $pendingCnt = intval($this->arrGet($row, 'pending_cnt', 0)); + $failedCnt = intval($this->arrGet($row, 'failed_cnt', 0)); + if ($pendingCnt > 0) { + $pending++; + } elseif ($failedCnt > 0) { + $failed++; + } else { + $done++; + } + } + + $total = count($rows); + $articleStatus = $pending > 0 + ? self::ARTICLE_PROGRESS_RUNNING + : self::ARTICLE_PROGRESS_COMPLETED; + $finished = $done + $failed; + $progressPercent = round($finished / $total * 100, 1); + + return [ + 'p_article_id' => $pArticleId, + 'status' => $articleStatus, + 'total' => $total, + 'pending' => $pending, + 'done' => $done, + 'failed' => $failed, + 'progress_percent' => $progressPercent, + ]; + } + + /** + * 按 p_article_id 查整篇引用校对进度,按 reference_no 分组聚合状态,并展开每条明细。 + * + * 单条 article_reference_check_result.status: + * 0 = 待校验 1 = 校对完成 2 = 校对失败 + * + * 分组(reference_no)状态(返回字段 status,数值类型): + * 0 = PROGRESS_PENDING 待校验 :分组内全部明细 status=0 + * 1 = PROGRESS_CHECKING 校对中 :分组内部分明细已结束、部分仍为 0 + * 2 = PROGRESS_COMPLETED 校对完成:分组内全部明细 status=1 + * 3 = PROGRESS_FAILED 校对失败:分组内全部明细已结束,且至少 1 条 status=2 + * + * 每个分组还会展开 records 子数组,给出该 reference_no 下每条 check 明细的: + * - status(同上 0/1/2) + * - confidence 评分 + * - is_pass(confidence >= PASS_CONFIDENCE_THRESHOLD 视为通过) + * + * @return array{p_article_id:int, total_groups:int, summary:array, list:array} + */ + public function getProgressByPArticleId($pArticleId) + { + $pArticleId = intval($pArticleId); + if ($pArticleId <= 0) { + throw new \InvalidArgumentException('p_article_id is required'); + } + + $rows = Db::name('article_reference_check_result') + ->field('id,p_refer_id,reference_no,am_id,status,confidence,is_match,reason,text_start,text_end,updated_at') + ->where('p_article_id', $pArticleId) + ->order('reference_no asc, id asc') + ->select(); + + // summary 用数值键,0/1/2/3 对应 PROGRESS_* 常量 + $summary = [ + self::PROGRESS_PENDING => 0, + self::PROGRESS_CHECKING => 0, + self::PROGRESS_COMPLETED => 0, + self::PROGRESS_FAILED => 0, + ]; + if (empty($rows)) { + return [ + 'p_article_id' => $pArticleId, + 'total_groups' => 0, + 'summary' => $summary, + 'list' => [], + ]; + } + + $groups = []; + foreach ($rows as $row) { + $refNo = intval($this->arrGet($row, 'reference_no', 0)); + $pReferId = intval($this->arrGet($row, 'p_refer_id', 0)); + if (!isset($groups[$refNo])) { + $groups[$refNo] = [ + 'reference_no' => $refNo, + 'p_refer_id' => $pReferId, + 'total' => 0, + 'pending' => 0, + 'done' => 0, + 'failed' => 0, + 'pass' => 0, + 'last_updated_at' => '', + 'records' => [], + ]; + } + // 同一 reference_no 理论上只对应一个 p_refer_id;如果出现混淆,保留首次出现的非空 id + if ($groups[$refNo]['p_refer_id'] <= 0 && $pReferId > 0) { + $groups[$refNo]['p_refer_id'] = $pReferId; + } + + $groups[$refNo]['total']++; + $st = intval($this->arrGet($row, 'status', 0)); + if ($st === 0) { + $groups[$refNo]['pending']++; + } elseif ($st === 1) { + $groups[$refNo]['done']++; + } elseif ($st === 2) { + $groups[$refNo]['failed']++; + } + + $upd = (string)$this->arrGet($row, 'updated_at', ''); + if ($upd > $groups[$refNo]['last_updated_at']) { + $groups[$refNo]['last_updated_at'] = $upd; + } + + $confidence = floatval($this->arrGet($row, 'confidence', 0)); + $isPass = $confidence >= self::PASS_CONFIDENCE_THRESHOLD; + if ($isPass) { + $groups[$refNo]['pass']++; + } + + $groups[$refNo]['records'][] = [ + 'check_id' => intval($this->arrGet($row, 'id', 0)), + 'am_id' => intval($this->arrGet($row, 'am_id', 0)), + 'status' => $st, + 'confidence' => $confidence, + 'is_pass' => $isPass, + 'is_match' => intval($this->arrGet($row, 'is_match', 0)), + 'reason' => (string)$this->arrGet($row, 'reason', ''), + 'text_start' => intval($this->arrGet($row, 'text_start', 0)), + 'text_end' => intval($this->arrGet($row, 'text_end', 0)), + 'last_updated_at' => $upd, + ]; + } + + $list = []; + foreach ($groups as $g) { + $total = $g['total']; + $pending = $g['pending']; + $failed = $g['failed']; + $pass = $g['pass']; + + if ($pending === $total) { + $status = self::PROGRESS_PENDING; + } elseif ($pending === 0) { + $status = $failed > 0 ? self::PROGRESS_FAILED : self::PROGRESS_COMPLETED; + } else { + $status = self::PROGRESS_CHECKING; + } + + // 整体通过校验:分组已全部完成(无 pending、无 failed),且每条 confidence >= 0.65 + $g['is_pass'] = ( + $status === self::PROGRESS_COMPLETED + && $total > 0 + && $pass === $total + ); + + $summary[$status]++; + $g['status'] = $status; + $list[] = $g; + } + + usort($list, function ($a, $b) { + return $a['reference_no'] - $b['reference_no']; + }); + + return [ + 'p_article_id' => $pArticleId, + 'total_groups' => count($list), + 'summary' => $summary, + 'list' => $list, + ]; + } + + /** + * 按 p_refer_id 查这条参考文献的所有校对明细。 + * + * 每条 record 返回: + * - am_id 命中的 article_main 主键 + * - confidence 匹配置信度(0~1) + * - reason LLM 给出的判定理由 + * - is_match 是否匹配(来自 article_reference_check_result.is_match) + * - is_pass 是否通过校验(confidence >= PASS_CONFIDENCE_THRESHOLD) + * + * @param int $pReferId production_article_refer.p_refer_id + * @return array{p_refer_id:int, p_article_id:int, reference_no:int, total:int, list:array} + */ + public function getCheckDetailsByPReferId($pReferId) + { + $pReferId = intval($pReferId); + if ($pReferId <= 0) { + throw new \InvalidArgumentException('p_refer_id is required'); + } + + $rows = Db::name('article_reference_check_result') + ->field('id,p_article_id,reference_no,am_id,confidence,is_match,reason') + ->where('p_refer_id', $pReferId) + ->order('id asc') + ->select(); + + $list = []; + $pArticleId = 0; + $referenceNo = 0; + foreach ($rows as $row) { + // 取首条出现的 p_article_id / reference_no 作为该 refer 的上下文 + if ($pArticleId <= 0) { + $pArticleId = intval($this->arrGet($row, 'p_article_id', 0)); + } + if ($referenceNo <= 0) { + $referenceNo = intval($this->arrGet($row, 'reference_no', 0)); + } + + $confidence = floatval($this->arrGet($row, 'confidence', 0)); + $list[] = [ + 'am_id' => intval($this->arrGet($row, 'am_id', 0)), + 'confidence' => $confidence, + 'reason' => (string)$this->arrGet($row, 'reason', ''), + 'is_match' => intval($this->arrGet($row, 'is_match', 0)), + 'is_pass' => $confidence >= self::PASS_CONFIDENCE_THRESHOLD, + ]; + } + + return [ + 'p_refer_id' => $pReferId, + 'p_article_id' => $pArticleId, + 'reference_no' => $referenceNo, + 'total' => count($list), + 'list' => $list, + ]; + } + + public function resetAndRecheckByArticle($aProductionArticle) + { + if (empty($aProductionArticle) || !is_array($aProductionArticle)) { + throw new \InvalidArgumentException('production_article is required'); + } + $pArticleId = intval($this->arrGet($aProductionArticle, 'p_article_id', 0)); + $articleId = intval($this->arrGet($aProductionArticle, 'article_id', 0)); + if ($pArticleId <= 0 || $articleId <= 0) { + throw new \InvalidArgumentException('production_article requires both p_article_id and article_id'); + } + + $existing = Db::name('article_reference_check_result') + ->where('p_article_id', $pArticleId) + ->count(); + if (intval($existing) <= 0) { + throw new \RuntimeException('no existing reference check records for p_article_id=' . $pArticleId); + } + + $cleared = $this->clearArticleChecks($articleId); + $enqueueResult = $this->enqueueByArticle($articleId); + + if (!is_array($enqueueResult)) { + $enqueueResult = []; + } + $enqueueResult['cleared'] = $cleared; + $enqueueResult['reset'] = 1; + return $enqueueResult; + } + + public static function amStatusLabel($status) + { + $map = [ + self::AM_STATUS_NONE => 'none', + self::AM_STATUS_PASS => 'pass', + self::AM_STATUS_FAIL => 'fail', + self::AM_STATUS_RUNNING => 'running', + ]; + return isset($map[$status]) ? $map[$status] : 'unknown'; + } + + /** + * 表主键为 id(对外 API 参数名仍叫 check_id) + */ + public function resolveCheckRowId($row) + { + if (!is_array($row)) { + return 0; + } + if (isset($row['id']) && intval($row['id']) > 0) { + return intval($row['id']); + } + if (isset($row['check_id']) && intval($row['check_id']) > 0) { + return intval($row['check_id']); + } + return 0; + } + + /** + * 解析 LLM 返回的 is_match(兼容 bool / 0|1 / "true"|"false" 字符串) + */ + public function parseLlmIsMatch($value) + { + if (is_bool($value)) { + return $value; + } + if (is_int($value) || is_float($value)) { + return intval($value) === 1; + } + $s = strtolower(trim((string)$value)); + return in_array($s, ['1', 'true', 'yes', 'match', 'matched'], true); + } + + /** + * 写入单条校对结果(统一截断 reason/error_msg,避免 varchar(512) 导致 UPDATE 失败) + * + * @throws \RuntimeException + */ + public function updateCheckResult($checkId, array $fields) + { + $checkId = intval($checkId); + if ($checkId <= 0) { + throw new \InvalidArgumentException('invalid check id'); + } + + if (isset($fields['reason'])) { + $fields['reason'] = mb_substr(trim((string)$fields['reason']), 0, 512); + } + if (isset($fields['error_msg'])) { + $fields['error_msg'] = mb_substr(trim((string)$fields['error_msg']), 0, 512); + } + $fields['updated_at'] = date('Y-m-d H:i:s'); + + $exists = Db::name('article_reference_check_result')->where('id', $checkId)->find(); + if (empty($exists)) { + throw new \RuntimeException('article_reference_check_result not found, id=' . $checkId); + } + + $affected = Db::name('article_reference_check_result')->where('id', $checkId)->update($fields); + if ($affected === false) { + throw new \RuntimeException('article_reference_check_result update failed, id=' . $checkId); + } + + \think\Log::info('updateCheckResult id=' . $checkId . ' affected=' . intval($affected)); + return intval($affected); + } + + public function getResult($checkId) + { + if ($checkId <= 0) { + return null; + } + $row = Db::name('article_reference_check_result')->where('id', $checkId)->find(); + return $row ?: null; + } + + public function listByArticle($articleId, $status = -1, $onlyMismatch = false) + { + $q = Db::name('article_reference_check_result')->where('article_id', $articleId); + if ($status >= 0) { + $q->where('status', $status); + } + if ($onlyMismatch) { + $q->where('status', 1)->where('is_match', 0); + } + return $q->order('am_id asc, cite_tag_start asc, reference_no asc')->select(); + } + + /** + * 稿件预览:在 content 上标记不合理引用序号与引用句 + * + * @return array{sections: array, issues: array, stats: array} + */ + public function buildArticlePreview($articleId, $amId = 0) + { + $q = Db::name('article_main') + ->field('am_id,content,sort,ref_check_status') + ->where('article_id', $articleId) + ->whereIn('state', [0, 2]); + if ($amId > 0) { + $q->where('am_id', $amId); + } + $mains = $q->order('sort asc')->select(); + + $rows = $this->listByArticle($articleId, 1); + $badByAm = $this->indexBadResults($rows); + + $sections = []; + $issues = []; + $stats = ['total' => 0, 'mismatch' => 0, 'match' => 0, 'pending' => 0]; + + foreach ($this->listByArticle($articleId, -1) as $r) { + $stats['total']++; + if (intval($r['status']) === 0) { + $stats['pending']++; + } elseif (intval($r['is_match']) === 1) { + $stats['match']++; + } else { + $stats['mismatch']++; + } + } + + foreach ($mains as $main) { + $id = intval($main['am_id']); + $content = (string)$main['content']; + $badIndex = isset($badByAm[$id]) ? $badByAm[$id] : array(); + $marked = $this->markContentForPreview($content, $id, $badIndex); + $amStatus = intval($this->arrGet($main, 'ref_check_status', 0)); + $sections[] = [ + 'am_id' => $id, + 'ref_check_status' => $amStatus, + 'ref_check_pass' => $amStatus === self::AM_STATUS_PASS, + 'ref_check_label' => self::amStatusLabel($amStatus), + 'content' => $content, + 'content_marked' => $marked['html'], + 'issue_count' => $marked['issue_count'], + ]; + foreach ($marked['issues'] as $issue) { + $issues[] = $issue; + } + } + + $articlePass = $this->resolveArticlePass($sections); + + return [ + 'article_id' => $articleId, + 'article_ref_check_pass' => $articlePass, + 'sections' => $sections, + 'issues' => $issues, + 'stats' => $stats, + ]; + } + + /** + * 全文是否通过:各节均为 pass,且无 running/fail(无引用节忽略) + */ + private function resolveArticlePass($sections) + { + $hasChecked = false; + foreach ($sections as $sec) { + $st = intval($this->arrGet($sec, 'ref_check_status', 0)); + if ($st === self::AM_STATUS_NONE) { + continue; + } + $hasChecked = true; + if ($st !== self::AM_STATUS_PASS) { + return false; + } + } + return $hasChecked ? true : null; + } + + /** + * @param array $rows status=1 的检测结果 + * @return array am_id => indexed bad map + */ + private function indexBadResults($rows) + { + $byAm = []; + foreach ($rows as $row) { + if (intval($row['status']) !== 1 || intval($row['is_match']) === 1) { + continue; + } + $amId = intval($row['am_id']); + $refNo = intval($row['reference_no']); + if ($amId <= 0 || $refNo <= 0) { + continue; + } + if (!isset($byAm[$amId])) { + $byAm[$amId] = ['by_raw' => [], 'contexts' => []]; + } + $rawKey = $this->normalizeRefRawKey((string)$this->arrGet($row, 'reference_raw', '')); + if ($rawKey !== '') { + $byAm[$amId]['by_raw'][$rawKey][$refNo] = $row; + } + + $ctxKey = intval($row['text_start']) . '_' . intval($row['text_end']); + if (!isset($byAm[$amId]['contexts'][$ctxKey])) { + $byAm[$amId]['contexts'][$ctxKey] = [ + 'text_start' => intval($row['text_start']), + 'text_end' => intval($row['text_end']), + 'check_ids' => [], + 'reasons' => [], + 'ref_nos' => [], + ]; + } + $byAm[$amId]['contexts'][$ctxKey]['check_ids'][] = $this->resolveCheckRowId($row); + $byAm[$amId]['contexts'][$ctxKey]['ref_nos'][] = $refNo; + $reason = trim((string)$this->arrGet($row, 'reason', '')); + if ($reason !== '') { + $byAm[$amId]['contexts'][$ctxKey]['reasons'][$refNo] = $reason; + } + } + return $byAm; + } + + private function normalizeRefRawKey($raw) + { + $raw = str_replace( + [',', '–', '—', '−', '‐', '‑', ' '], + [',', '-', '-', '-', '-', '-', ''], + trim($raw) + ); + return strtolower($raw); + } + + /** + * @param array $badIndex indexBadResults 中单 am 的结构 + */ + private function markContentForPreview($content, $amId, $badIndex) + { + $badByRaw = isset($badIndex['by_raw']) ? $badIndex['by_raw'] : array(); + $contexts = isset($badIndex['contexts']) ? $badIndex['contexts'] : array(); + $issues = array(); + $issueCount = 0; + + if ($content === '' || (empty($badByRaw) && empty($contexts))) { + return array('html' => $content, 'issues' => array(), 'issue_count' => 0); + } + + $html = $content; + + // 1) 先标记 blue 内各序号(在原文上操作,[70-73] 仅标不合理者如 70、71) + preg_match_all( + self::BLUE_TAG_REGEX, + $html, + $matches, + PREG_OFFSET_CAPTURE + ); + $citeDeltas = []; + if (!empty($matches[0])) { + $replacements = []; + foreach ($matches[0] as $idx => $match) { + $fullTag = $match[0]; + $tagStart = $match[1]; + $tagEnd = $tagStart + strlen($fullTag); + $inner = $matches[1][$idx][0]; + $rawKey = $this->normalizeRefRawKey($inner); + $badNums = isset($badByRaw[$rawKey]) ? $badByRaw[$rawKey] : array(); + + $innerMarked = preg_replace_callback( + '/\d+/', + function ($numMatch) use ($badNums, &$issues, &$issueCount, $amId, $inner) { + $num = intval($numMatch[0]); + if (!isset($badNums[$num])) { + return $numMatch[0]; + } + $row = $badNums[$num]; + $rowReason = isset($row['reason']) ? $row['reason'] : ''; + $issueCount++; + $issues[] = array( + 'am_id' => $amId, + 'check_id' => $this->resolveCheckRowId($row), + 'reference_no' => $num, + 'reference_raw' => $inner, + 'reason' => $rowReason, + 'confidence' => floatval(isset($row['confidence']) ? $row['confidence'] : 0), + ); + $title = htmlspecialchars( + '引用[' . $num . ']不合理: ' . $rowReason, + ENT_QUOTES, + 'UTF-8' + ); + return '' + . $numMatch[0] . ''; + }, + $inner + ); + + $tagClass = !empty($badNums) ? ' ref-cite-error' : ''; + $groupIds = !empty($badNums) + ? implode(',', array_map(function ($row) { + return (int) $this->resolveCheckRowId($row); + }, $badNums)) + : ''; + $newHtml = '[' . $innerMarked . ']'; + $replacements[] = [ + 'start' => $tagStart, + 'end' => $tagEnd, + 'html' => $newHtml, + 'delta' => strlen($newHtml) - ($tagEnd - $tagStart), + ]; + } + usort($replacements, function ($a, $b) { + return $b['start'] - $a['start']; + }); + foreach ($replacements as $rep) { + $html = substr($html, 0, $rep['start']) . $rep['html'] . substr($html, $rep['end']); + $citeDeltas[] = ['start' => $rep['start'], 'delta' => $rep['delta']]; + } + } + + $shiftByCite = function ($pos) use ($citeDeltas) { + $d = 0; + foreach ($citeDeltas as $cd) { + if ($cd['start'] < $pos) { + $d += $cd['delta']; + } + } + return $pos + $d; + }; + + // 2) 再标记引用句(从后往前) + if (!empty($contexts)) { + $spans = array_values($contexts); + usort($spans, function ($a, $b) { + return $b['text_start'] - $a['text_start']; + }); + foreach ($spans as $span) { + $start = $span['text_start']; + $end = $span['text_end']; + if ($start < 0 || $end <= $start) { + continue; + } + $s = $shiftByCite($start); + $e = $shiftByCite($end); + if ($e > strlen($html)) { + $e = strlen($html); + } + $checkIds = array_values(array_unique($span['check_ids'])); + $refNos = array_values(array_unique($span['ref_nos'])); + sort($refNos); + $reasonParts = []; + foreach ($refNos as $rn) { + if (!empty($span['reasons'][$rn])) { + $reasonParts[] = '[' . $rn . '] ' . $span['reasons'][$rn]; + } + } + $title = htmlspecialchars( + '引用句可能不合理: ' . implode('; ', $reasonParts), + ENT_QUOTES, + 'UTF-8' + ); + $open = ''; + $close = ''; + $html = substr($html, 0, $s) . $open . substr($html, $s, $e - $s) . $close . substr($html, $e); + } + } + + return ['html' => $html, 'issues' => $issues, 'issue_count' => $issueCount]; + } + + /** + * @return array refer_index => row + */ + public function loadReferMapByPArticleId($pArticleId) + { + $map = []; + if ($pArticleId <= 0) { + return $map; + } + $rows = Db::name('production_article_refer') + ->where('p_article_id', $pArticleId) + ->where('state', 0) + ->order('index asc') + ->select(); + foreach ($rows as $row) { + $map[intval($row['index'])] = $row; + } + return $map; + } + public function formatReferForLlm($refer) + { + $parts = []; + foreach (['title', 'author', 'joura', 'dateno', 'refer_doi', 'doilink'] as $f) { + $v = trim((string)$this->arrGet($refer, $f, '')); + if ($v !== '') { + $parts[] = ucfirst($f) . ': ' . $v; + } + } + $frag = trim((string)$this->arrGet($refer, 'refer_frag', '')); + $content = trim((string)$this->arrGet($refer, 'refer_content', '')); + if ($frag !== '') { + $parts[] = 'Reference: ' . $frag; + } elseif ($content !== '') { + $parts[] = 'Reference: ' . $content; + } + return implode("\n", $parts); + } + + /** + * 前端修改参考文献后重新校对:仅处理已有校对记录,刷新 refer_text、重置结果并入队;无记录直接返回 + * + * @param int $articleId + * @param int $pReferId t_production_article_refer.p_refer_id(优先) + * @param int $referenceNo 文献序号 index+1(无 p_refer_id 时用) + * @return array + */ + /** + * 编辑某条文献内容后,按 p_refer_id 异步重新校对该文献对应的全部 check 明细 + * + * 流程:刷新 refer_text/refer_index → 重置 status/is_match/confidence/reason + * → 设节级 ref_check_status=RUNNING → 投递到 ReferenceCheck 队列 + * + * 与 recheckByRefer 的差异:本方法**不**在请求内同步跑 LLM,仅入队,立即返回。 + * 前端可调 getProgressByPArticleId 轮询进度。 + * + * @param int $pReferId t_production_article_refer.p_refer_id(必填) + * @param int $pArticleId 可选:传入跳过 refer 表二次查表 + * @return array{p_refer_id:int, p_article_id:int, reference_no:int, reset:int, queued:int, check_ids:int[], queue:string} + */ + public function enqueueRecheckByPReferId($pReferId, $pArticleId = 0) + { + $pReferId = intval($pReferId); + if ($pReferId <= 0) { + throw new \InvalidArgumentException('p_refer_id is required'); + } + + $refer = Db::name('production_article_refer') + ->where('p_refer_id', $pReferId) + ->where('state', 0) + ->find(); + if (empty($refer)) { + throw new \RuntimeException('production_article_refer not found, p_refer_id=' . $pReferId); + } + + $pArticleId = intval($pArticleId); + if ($pArticleId <= 0) { + $pArticleId = intval($this->arrGet($refer, 'p_article_id', 0)); + } + if ($pArticleId <= 0) { + throw new \RuntimeException('p_article_id is missing for p_refer_id=' . $pReferId); + } + + $referenceNo = intval($this->arrGet($refer, 'index', 0)) + 1; + $referText = $this->formatReferForLlm($refer); + $now = date('Y-m-d H:i:s'); + + $rows = Db::name('article_reference_check_result') + ->where('p_article_id', $pArticleId) + ->where('p_refer_id', $pReferId) + ->select(); + + if (empty($rows)) { + return [ + 'p_refer_id' => $pReferId, + 'p_article_id' => $pArticleId, + 'reference_no' => $referenceNo, + 'reset' => 0, + 'queued' => 0, + 'check_ids' => [], + 'queue' => self::QUEUE_NAME, + ]; + } + + $resetFields = [ + 'refer_text' => $referText, + 'refer_index' => $referenceNo, + 'reference_no' => $referenceNo, + 'status' => 0, + 'is_match' => 0, + 'can_support' => 0, + 'confidence' => 0, + 'reason' => '', + 'error_msg' => '', + 'updated_at' => $now, + ]; + + $pendingJobs = []; + $amIds = []; + foreach ($rows as $row) { + $checkId = $this->resolveCheckRowId($row); + Db::name('article_reference_check_result')->where('id', $checkId)->update($resetFields); + // 旧的队列完成标记必须清掉,否则同 check_id 再次投递会被 acquireLock 静默丢弃 + $this->clearReferenceCheckQueueLock($checkId); + $pendingJobs[] = [ + 'check_id' => $checkId, + 'reference_no' => $referenceNo, + 'am_id' => intval($this->arrGet($row, 'am_id', 0)), + 'text_start' => intval($this->arrGet($row, 'text_start', 0)), + ]; + $amId = intval($this->arrGet($row, 'am_id', 0)); + if ($amId > 0) { + $amIds[$amId] = true; + } + } + + foreach (array_keys($amIds) as $amId) { + $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING); + } + + $checkIds = $this->pushJobsSortedByReferenceNo($pendingJobs); + + return [ + 'p_refer_id' => $pReferId, + 'p_article_id' => $pArticleId, + 'reference_no' => $referenceNo, + 'reset' => count($rows), + 'queued' => count($checkIds), + 'check_ids' => $checkIds, + 'queue' => self::QUEUE_NAME, + ]; + } + + public function recheckByRefer($articleId, $pReferId = 0, $referenceNo = 0) + { + $articleId = intval($articleId); + if ($articleId <= 0) { + throw new \InvalidArgumentException('article_id is required'); + } + + $ctx = $this->resolveReferForRecheck($articleId, intval($pReferId), intval($referenceNo)); + $refer = $ctx['refer']; + $pReferId = $ctx['p_refer_id']; + $referenceNo = $ctx['reference_no']; + $pArticleId = $ctx['p_article_id']; + $referText = $this->formatReferForLlm($refer); + $now = date('Y-m-d H:i:s'); + + $rows = Db::name('article_reference_check_result') + ->where('article_id', $articleId) + ->where(function ($query) use ($pReferId, $referenceNo) { + $query->where('p_refer_id', $pReferId)->whereOr('reference_no', $referenceNo); + }) + ->select(); + + if (empty($rows)) { + return [ + 'article_id' => $articleId, + 'p_refer_id' => $pReferId, + 'reference_no' => $referenceNo, + 'reset' => 0, + 'queued' => 0, + 'check_ids' => [], + 'queue' => self::QUEUE_NAME, + ]; + } + + $resetFields = [ + 'refer_text' => $referText, + 'p_refer_id' => $pReferId, + 'p_article_id' => $pArticleId, + 'refer_index' => $referenceNo, + 'status' => 0, + 'is_match' => 0, + 'can_support' => 0, + 'confidence' => 0, + 'reason' => '', + 'error_msg' => '', + 'updated_at' => $now, + ]; + + $pendingJobs = []; + $amIds = []; + foreach ($rows as $row) { + $checkId = $this->resolveCheckRowId($row); + Db::name('article_reference_check_result')->where('id', $checkId)->update($resetFields); + $pendingJobs[] = [ + 'check_id' => $checkId, + 'reference_no' => $referenceNo, + 'am_id' => intval($row['am_id']), + 'text_start' => intval(isset($row['text_start']) ? $row['text_start'] : 0), + ]; + $amId = intval($row['am_id']); + if ($amId > 0) { + $amIds[$amId] = true; + } + } + + foreach (array_keys($amIds) as $amId) { + $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING); + } + + usort($pendingJobs, function ($a, $b) { + if ($a['reference_no'] !== $b['reference_no']) { + return $a['reference_no'] - $b['reference_no']; + } + if ($a['am_id'] !== $b['am_id']) { + return $a['am_id'] - $b['am_id']; + } + return $a['text_start'] - $b['text_start']; + }); + + $checkIds = []; + $results = []; + $failed = []; + foreach ($pendingJobs as $job) { + $checkId = intval($job['check_id']); + $checkIds[] = $checkId; + $this->clearReferenceCheckQueueLock($checkId); + try { + $results[] = $this->runReferenceCheckOnce($checkId); + } catch (\Exception $e) { + $failed[] = [ + 'check_id' => $checkId, + 'error' => $e->getMessage(), + ]; + \think\Log::error('recheckByRefer check_id=' . $checkId . ' ' . $e->getMessage()); + } + } + + foreach (array_keys($amIds) as $amId) { + $this->syncAmRefCheckStatus($amId); + } + + return [ + 'article_id' => $articleId, + 'p_refer_id' => $pReferId, + 'reference_no' => $referenceNo, + 'reset' => count($rows), + 'checked' => count($results), + 'failed' => count($failed), + 'check_ids' => $checkIds, + 'results' => $results, + 'errors' => $failed, + ]; + } + + /** + * 清除队列 Redis 完成标记,避免重检任务被 acquireLock 静默丢弃 + */ + public function clearReferenceCheckQueueLock($checkId) + { + $checkId = intval($checkId); + if ($checkId <= 0) { + return; + } + try { + $keys = []; + foreach (['queue_job', 'queue_job_two'] as $prefix) { + $class = $prefix === 'queue_job_two' + ? 'app\\api\\job\\ReferenceCheckTwo' + : 'app\\api\\job\\ReferenceCheck'; + $base = $prefix . ':' . $class . ':' . $checkId; + $keys[] = $base; + $keys[] = $base . ':status'; + } + QueueRedis::getInstance()->deleteRedisKeys($keys); + } catch (\Exception $e) { + \think\Log::warning('clearReferenceCheckQueueLock id=' . $checkId . ' ' . $e->getMessage()); + } + } + + /** + * 执行一次引用 LLM 校对(同步,写回 article_reference_check_result) + */ + public function runReferenceCheckOnce($checkId) + { + $checkId = intval($checkId); + $row = Db::name('article_reference_check_result')->where('id', $checkId)->find(); + if (empty($row)) { + throw new \RuntimeException('article_reference_check_result not found, id=' . $checkId); + } + + $contentA = $this->resolveMainContentForJob($row); + $refer = null; + if (intval($row['p_refer_id']) > 0) { + $refer = Db::name('production_article_refer') + ->where('p_refer_id', intval($row['p_refer_id'])) + ->where('state', 0) + ->find(); + } + + if ($refer) { + $contentB = $this->formatReferForLlm($refer); + } else { + $contentB = trim((string)$this->arrGet($row, 'refer_text', '')); + } + + if ($contentA === '' || $contentB === '') { + $this->updateCheckResult($checkId, [ + 'status' => 2, + 'error_msg' => 'Missing article_main.content or refer_text', + ]); + throw new \RuntimeException('Missing article_main.content or refer_text'); + } + + $llmResult = (new LLMService())->checkReference($contentA, $contentB, false); + $requestFailed = !empty($llmResult['request_failed']); + $canSupport = $this->parseLlmCanSupport($llmResult); + $confidence = floatval(isset($llmResult['confidence']) ? $llmResult['confidence'] : 0); + $reason = isset($llmResult['reason']) ? $llmResult['reason'] : ''; + + // LLM 通讯失败:写 status=2(校对失败) + error_msg,抛异常让队列 worker 走 release(30) 重试; + // 重试 3 次后 ReferenceCheck::markFailed 会保持 status=2 收尾 + if ($requestFailed) { + $this->updateCheckResult($checkId, [ + 'confidence' => $confidence, + 'reason' => $reason, + 'status' => 2, + 'error_msg' => $reason, + ]); + $this->clearReferenceCheckQueueLock($checkId); + throw new \RuntimeException($reason !== '' ? $reason : 'LLM request failed'); + } + + $this->updateCheckResult($checkId, [ + 'can_support' => $canSupport ? 1 : 0, + 'is_match' => $canSupport ? 1 : 0, + 'confidence' => $confidence, + 'reason' => $reason, + 'status' => 1, + 'error_msg' => '', + ]); + + $this->clearReferenceCheckQueueLock($checkId); + $this->maybeEnqueueSecondPass($checkId, $confidence); + + return [ + 'check_id' => $checkId, + 'can_support' => $canSupport ? 1 : 0, + 'is_match' => $canSupport ? 1 : 0, + 'confidence' => $confidence, + 'reason' => $reason, + ]; + } + + /** + * @return array{refer: array, p_article_id: int, p_refer_id: int, reference_no: int} + */ + private function resolveReferForRecheck($articleId, $pReferId, $referenceNo) + { + $prod = Db::name('production_article') + ->where('article_id', $articleId) + ->whereIn('state', [0, 2]) + ->find(); + if (empty($prod)) { + throw new \RuntimeException('production_article not found for article_id=' . $articleId); + } + + $pArticleId = intval($prod['p_article_id']); + $refer = null; + + if ($pReferId > 0) { + $refer = Db::name('production_article_refer') + ->where('p_refer_id', $pReferId) + ->where('p_article_id', $pArticleId) + ->where('state', 0) + ->find(); + } elseif ($referenceNo > 0) { + $referMap = $this->loadReferMapByPArticleId($pArticleId); + $referIndex = $referenceNo - 1; + if (isset($referMap[$referIndex])) { + $refer = $referMap[$referIndex]; + $pReferId = intval($refer['p_refer_id']); + } + } else { + throw new \InvalidArgumentException('p_refer_id or reference_no is required'); + } + + if (empty($refer)) { + throw new \RuntimeException('production_article_refer not found'); + } + + return [ + 'refer' => $refer, + 'p_article_id' => $pArticleId, + 'p_refer_id' => intval($refer['p_refer_id']), + 'reference_no' => intval($refer['index']) + 1, + ]; + } + + /** + * 仅使用 refer_doi 字段(二次 Crossref 摘要用) + */ + public function extractReferDoiOnly($refer) + { + if (!is_array($refer)) { + return ''; + } + $raw = trim((string)$this->arrGet($refer, 'refer_doi', '')); + if ($raw === '' || stripos($raw, 'not available') !== false) { + return ''; + } + $dois = $this->extractDoisFromString($raw); + return empty($dois) ? '' : $dois[0]; + } + + /** + * 根据 refer_doi 调用 Crossref works API 获取摘要(二次校对专用) + * + * @return array{text:string, has_abstract:bool, doi:string} + */ + public function fetchCrossrefAbstractByReferDoi($refer) + { + $doi = $this->extractReferDoiOnly($refer); + if ($doi === '') { + return ['text' => '', 'has_abstract' => false, 'doi' => '']; + } + + $crossref = new CrossrefService([ + 'mailto' => trim((string)Env::get('crossref_mailto', '')), + ]); + $block = $this->extractCrossrefBlock($doi, $crossref); + if ($block === null) { + return ['text' => '', 'has_abstract' => false, 'doi' => $doi]; + } + + return [ + 'text' => $block['text'], + 'has_abstract' => !empty($block['has_abstract']), + 'doi' => $doi, + ]; + } + + /** + * 解析 LLM 返回的 can_support + */ + public function parseLlmCanSupport($llmResult) + { + if (!is_array($llmResult)) { + return false; + } + if (array_key_exists('can_support', $llmResult)) { + return $this->parseLlmIsMatch($llmResult['can_support']); + } + return $this->parseLlmIsMatch(isset($llmResult['is_match']) ? $llmResult['is_match'] : false); + } + + /** + * 第一次校对:取 article_main.content(整节正文) + */ + public function resolveMainContentForJob(array $row, $maxChars = 8000) + { + $amId = intval($this->arrGet($row, 'am_id', 0)); + if ($amId <= 0) { + return ''; + } + $main = Db::name('article_main') + ->field('content') + ->where('am_id', $amId) + ->find(); + if (empty($main)) { + return ''; + } + + $text = trim((string)$this->arrGet($main, 'content', '')); + if ($text === '') { + return ''; + } + + $text = preg_replace(self::BLUE_TAG_REGEX, '[$1]', $text); + $text = strip_tags($text); + $text = html_entity_decode($text, ENT_QUOTES | ENT_HTML5, 'UTF-8'); + $text = preg_replace('/\s+/u', ' ', $text); + $text = trim($text); + + $maxChars = max(500, intval($maxChars)); + if (mb_strlen($text) > $maxChars) { + $text = mb_substr($text, 0, $maxChars) . '...'; + } + + return $text; + } + + /** + * 引用处局部上下文(origin_text),供其它场景使用 + */ + public function resolveCitationContextForJob(array $row) + { + $text = trim((string)$this->arrGet($row, 'origin_text', '')); + if ($text === '') { + $text = trim((string)$this->arrGet($row, 'content_a', '')); + } + return $text; + } + + /** + * 从 refer 行提取标准 DOI(10.xxxx/...) + * + * 优先级:refer_content(原始引用文本里的 DOI 最贴近实际被引用的文献) + * > refer_doi > doi > doilink + */ + public function extractDoiFromRefer($refer) + { + $list = $this->extractAllDoiCandidatesFromRefer($refer); + return empty($list) ? '' : $list[0]; + } + + /** + * 返回 refer 行可能对应的全部 DOI 候选(去重,按优先级排序) + * + * 用于第二轮 DOI 复核场景:当 metadata 的 refer_doi 与原始引用文本里的 DOI + * 不一致时(数据漂移),优先尝试原始引用文本里的 DOI 抓真实摘要。 + * + * @return string[] + */ + public function extractAllDoiCandidatesFromRefer($refer) + { + if (!is_array($refer)) { + return []; + } + $ordered = [ + (string)$this->arrGet($refer, 'refer_content', ''), + (string)$this->arrGet($refer, 'refer_doi', ''), + (string)$this->arrGet($refer, 'doi', ''), + (string)$this->arrGet($refer, 'doilink', ''), + ]; + + $result = []; + foreach ($ordered as $raw) { + foreach ($this->extractDoisFromString($raw) as $doi) { + if (!in_array($doi, $result, true)) { + $result[] = $doi; + } + } + } + return $result; + } + + /** + * 从任意文本里抽取所有形如 10.xxxx/yyy 的 DOI + * @return string[] + */ + private function extractDoisFromString($text) + { + $text = trim((string)$text); + if ($text === '' || stripos($text, 'not available') !== false) { + return []; + } + + $dois = []; + + if (preg_match_all('~doi\.org/([^\s?#"\'<>]+)~i', $text, $m)) { + foreach ($m[1] as $cand) { + $cand = $this->trimDoiTail(trim($cand)); + if ($this->isValidDoi($cand)) { + $dois[] = $cand; + } + } + } + + if (preg_match_all('~\b(10\.\d{3,9}/[^\s?#"\'<>]+)~i', $text, $m)) { + foreach ($m[1] as $cand) { + $cand = $this->trimDoiTail(trim($cand)); + if ($this->isValidDoi($cand)) { + $dois[] = $cand; + } + } + } + + if ($dois === [] && strpos($text, '10.') === 0) { + $cand = $this->trimDoiTail($text); + if ($this->isValidDoi($cand)) { + $dois[] = $cand; + } + } + + return array_values(array_unique($dois)); + } + + private function trimDoiTail($doi) + { + return rtrim($doi, ".,;:)]}>\"'\\ \t\n\r"); + } + + private function isValidDoi($doi) + { + return (bool)preg_match('~^10\.\d{3,9}/[^\s]+$~i', (string)$doi); + } + + /** + * 通过 PubMed / Crossref 拉取 DOI 对应文献内容(本地 LLM 无法打开网页,须预先抓取) + * + * 行为: + * - 尝试 refer 行内所有 DOI 候选(refer_content > refer_doi > doi > doilink) + * - 优先采用第一个能拿到 abstract 的 DOI + * - PubMed 无摘要时回落到 Crossref raw 解析摘要(清理 JATS 标签) + * - 全部失败则返回空字符串(调用方据此跳过二次复核) + */ + public function fetchDoiLiteratureBlock($refer) + { + $candidates = $this->extractAllDoiCandidatesFromRefer($refer); + if (empty($candidates)) { + return ''; + } + + $pubmed = new PubmedService([ + 'email' => trim((string)Env::get('pubmed_email', '')), + 'tool' => trim((string)Env::get('pubmed_tool', 'tmrjournals')), + ]); + $crossref = new CrossrefService([ + 'mailto' => trim((string)Env::get('crossref_mailto', '')), + ]); + + $best = null; + $fallback = null; + + foreach ($candidates as $doi) { + $block = $this->buildDoiBlockFromSources($doi, $pubmed, $crossref); + if ($block === null) { + continue; + } + if (!empty($block['has_abstract'])) { + $best = $block; + break; + } + if ($fallback === null) { + $fallback = $block; + } + } + + $chosen = $best ?: $fallback; + if ($chosen === null) { + return ''; + } + return $chosen['text']; + } + + /** + * 拉单个 DOI 的真实内容,返回 ['text' => string, 'has_abstract' => bool] 或 null + */ + private function buildDoiBlockFromSources($doi, PubmedService $pubmed, CrossrefService $crossref) + { + $doi = trim((string)$doi); + if ($doi === '') { + return null; + } + + $pub = $pubmed->fetchByDoi($doi); + $pubAbstract = is_array($pub) ? trim((string)$this->arrGet($pub, 'abstract', '')) : ''; + + if (is_array($pub) && ($pubAbstract !== '' || trim((string)$this->arrGet($pub, 'title', '')) !== '')) { + $lines = ['Source: PubMed (DOI ' . $doi . ')']; + if (!empty($pub['title'])) { + $lines[] = 'Actual Title: ' . trim((string)$pub['title']); + } + if (!empty($pub['journal'])) { + $lines[] = 'Journal: ' . trim((string)$pub['journal']); + } + if (!empty($pub['year'])) { + $lines[] = 'Year: ' . trim((string)$pub['year']); + } + if (!empty($pub['publication_types'])) { + $lines[] = 'Publication Types: ' . implode('; ', (array)$pub['publication_types']); + } + if (!empty($pub['mesh_terms'])) { + $lines[] = 'MeSH: ' . implode('; ', (array)$pub['mesh_terms']); + } + if ($pubAbstract !== '') { + $lines[] = 'Abstract: ' . $this->truncate($pubAbstract, 3500); + } + + if ($pubAbstract === '') { + $cr = $this->extractCrossrefBlock($doi, $crossref); + if ($cr !== null && $cr['has_abstract']) { + $lines[] = "\n--- Crossref 补充 ---\n" . $cr['text']; + return ['text' => implode("\n", $lines), 'has_abstract' => true]; + } + } + + return ['text' => implode("\n", $lines), 'has_abstract' => $pubAbstract !== '']; + } + + return $this->extractCrossrefBlock($doi, $crossref); + } + + /** + * 从 Crossref 拉取标题/期刊/作者/摘要(abstract 通常包裹 JATS XML,需清洗) + * @return array|null ['text' => string, 'has_abstract' => bool] + */ + private function extractCrossrefBlock($doi, CrossrefService $crossref) + { + $msg = $crossref->fetchWork($doi); + if (!is_array($msg)) { + return null; + } + + $summary = $crossref->fetchWorkSummary($doi); + if (!is_array($summary)) { + $summary = []; + } + + $lines = ['Source: Crossref api.crossref.org/works/' . rawurlencode($doi)]; + $title = isset($msg['title'][0]) ? trim((string)$msg['title'][0]) : trim((string)$this->arrGet($summary, 'title', '')); + if ($title !== '') { + $lines[] = 'Actual Title: ' . $title; + } + if (!empty($summary['joura'])) { + $lines[] = 'Journal: ' . trim((string)$summary['joura']); + } + if (!empty($summary['author_str'])) { + $lines[] = 'Authors: ' . trim((string)$summary['author_str']); + } + if (!empty($summary['dateno'])) { + $lines[] = 'Publication: ' . trim((string)$summary['dateno']); + } + if (!empty($summary['doilink'])) { + $lines[] = 'DOI Link: ' . trim((string)$summary['doilink']); + } + if (!empty($summary['is_retracted'])) { + $lines[] = 'Retraction: yes - ' . trim((string)$this->arrGet($summary, 'retract_reason', '')); + } + + $abstract = $this->cleanCrossrefAbstract((string)$this->arrGet($msg, 'abstract', '')); + $hasAbstract = $abstract !== ''; + if ($hasAbstract) { + $lines[] = 'Abstract: ' . $this->truncate($abstract, 3500); + } else { + $lines[] = 'Note: Crossref 未返回摘要,请结合标题/期刊/作者与正文谨慎判断。'; + } + + return ['text' => implode("\n", $lines), 'has_abstract' => $hasAbstract]; + } + + private function cleanCrossrefAbstract($raw) + { + $raw = trim((string)$raw); + if ($raw === '') { + return ''; + } + $raw = preg_replace('~]*>.*?~is', '', $raw); + $raw = preg_replace('~]*>~i', "\n", $raw); + $raw = preg_replace('~~i', '', $raw); + $raw = preg_replace('~]+>~i', '', $raw); + $raw = strip_tags($raw); + $raw = preg_replace('/[ \t]+/u', ' ', $raw); + $raw = preg_replace("/\r\n|\r/u", "\n", $raw); + $raw = preg_replace("/\n{2,}/u", "\n", $raw); + return trim($raw); + } + + private function truncate($text, $max) + { + $text = (string)$text; + if (mb_strlen($text) <= $max) { + return $text; + } + return mb_substr($text, 0, $max) . '...'; + } + + /** + * 第二次 DOI 复核数据准备:返回书目信息 + 真实抓取内容 + * + * @return array{refer_text:string, doi_block:string, has_abstract:bool, doi_used:string} + */ + public function prepareRecheckPayload($refer, $referText = '') + { + $base = trim($referText) !== '' ? trim($referText) : $this->formatReferForLlm($refer); + $cr = $this->fetchCrossrefAbstractByReferDoi($refer); + return [ + 'refer_text' => $base, + 'doi_block' => $cr['text'], + 'has_abstract' => $cr['has_abstract'], + 'doi_used' => $cr['doi'], + ]; + } + + /** + * 旧接口:拼接成单块文本(向后兼容,建议调用方改用 prepareRecheckPayload) + */ + public function formatReferForDoiRecheck($refer, $referText = '') + { + $payload = $this->prepareRecheckPayload($refer, $referText); + if ($payload['doi_block'] === '') { + return $payload['refer_text'] + . "\n\n【DOI 文献真实内容】\n未能从 PubMed/Crossref 获取该 DOI 的摘要或元数据,请依据书目条目与正文谨慎判断。"; + } + return $payload['refer_text'] + . "\n\n【Crossref 摘要(依据 Refer_doi 从 api.crossref.org/works 获取)】\n" + . $payload['doi_block']; + } + + /** + * 第一轮 confidence<=0.65 且能抓到 DOI 真实内容时,延迟入队第二轮复核 + * + * 跳过条件(避免无意义重跑得到相同结果): + * - check_id 不合法 / 一次置信度高于阈值 + * - refer 行不存在 + * - refer_doi 为空或 Crossref 未返回摘要 + */ + public function maybeEnqueueSecondPass($checkId, $confidence) + { + $checkId = intval($checkId); + $confidence = floatval($confidence); + if ($checkId <= 0 || $confidence > 0.65) { + return false; + } + + $row = Db::name('article_reference_check_result')->where('id', $checkId)->find(); + if (empty($row)) { + return false; + } + + $refer = null; + if (intval($row['p_refer_id']) > 0) { + $refer = Db::name('production_article_refer') + ->where('p_refer_id', intval($row['p_refer_id'])) + ->where('state', 0) + ->find(); + } + if (empty($refer) || $this->extractReferDoiOnly($refer) === '') { + return false; + } + + $cr = $this->fetchCrossrefAbstractByReferDoi($refer); + if (empty($cr['has_abstract'])) { + return false; + } + + $this->clearReferenceCheckQueueLock($checkId); + $this->pushJob2($checkId, 5); + return true; + } + + /** + * 从 article_main.content 提取 blue 引用 + */ + public function extractReferences($content) + { + $result = []; + preg_match_all(self::BLUE_TAG_REGEX, $content, $matches, PREG_OFFSET_CAPTURE); + if (empty($matches[0])) { + return []; + } + + $tagSpans = []; + foreach ($matches[0] as $index => $match) { + $tagSpans[] = [ + 'start' => $match[1], + 'end' => $match[1] + strlen($match[0]), + 'index' => $index, + ]; + } + + foreach ($matches[0] as $index => $match) { + $fullTag = $match[0]; + $tagStart = $match[1]; + $tagEnd = $tagStart + strlen($fullTag); + $rawRef = trim($matches[1][$index][0]); + $referenceNumbers = $this->expandReferenceNumbers($rawRef); + + list($localStart, $localEnd, $originalText) = $this->extractLocalCitationContext( + $content, + $tagStart, + $tagEnd, + $tagSpans + ); + + if (!$this->isMeaningfulCitationContext($originalText) || empty($referenceNumbers)) { + continue; + } + + $result[] = [ + 'reference_raw' => $rawRef, + 'reference_numbers' => $referenceNumbers, + 'original_text' => $originalText, + 'reference_start' => $tagStart, + 'reference_end' => $tagEnd, + 'text_start' => $localStart, + 'text_end' => $localEnd, + ]; + } + + return $result; + } + + /** + * 按引用位置截取局部上下文:优先取标签前叙述;同句多引时后续引用从上一标签后开始。 + */ + private function extractLocalCitationContext($content, $tagStart, $tagEnd, array $tagSpans) + { + $paragraphStart = $this->findParagraphStart($content, $tagStart); + $sentenceEnd = $this->findSentenceEnd($content, $tagEnd, $tagEnd); + + $prevTagEnd = $paragraphStart; + $nextTagStart = $sentenceEnd; + foreach ($tagSpans as $span) { + if ($span['end'] <= $tagStart && $span['end'] > $prevTagEnd) { + $prevTagEnd = $span['end']; + } + if ($span['start'] > $tagEnd && $span['start'] < $nextTagStart) { + $nextTagStart = $span['start']; + } + } + + $hasPriorCiteInParagraph = ($prevTagEnd > $paragraphStart); + $sentenceStart = $this->findSentenceStart($content, $tagStart); + + // 段内首个引用:整段到标签前;后续引用:取「本句」起点(可早于上一标签),避免只剩 “and external environment” 再误用标签后文本 + if ($hasPriorCiteInParagraph) { + $localStart = max($paragraphStart, $sentenceStart); + } else { + $localStart = $this->capContextStartBeforeTag($content, $tagStart, $paragraphStart); + } + + // 默认:引用标签前的论述 + $localEnd = $tagStart; + $originalText = $this->buildCitationContextText($content, $localStart, $localEnd); + + // 仅段内首个引用、且标签前极短(如句末 ICU nurses [14])时,才改用标签后片段;同段多引禁止标签后截取(会错取下一句) + $allowTrailing = !$hasPriorCiteInParagraph; + if ($allowTrailing && ( + !$this->isMeaningfulCitationContext($originalText) + || $this->shouldUseTrailingCitationContext($content, $localStart, $tagStart, $tagEnd) + )) { + $trailEnd = ($nextTagStart < $sentenceEnd) ? $nextTagStart : $sentenceEnd; + $trailText = $this->buildCitationContextText($content, $tagEnd, $trailEnd); + if ($this->isMeaningfulCitationContext($trailText)) { + $localStart = $tagEnd; + $localEnd = $trailEnd; + $originalText = $trailText; + } + } + + if (!$this->isMeaningfulCitationContext($originalText)) { + list($localStart, $localEnd) = $this->widenCitationContextBounds( + $content, + $tagStart, + $tagEnd, + $localStart, + $localEnd + ); + $originalText = $this->buildCitationContextText($content, $localStart, $localEnd); + } + + return [$localStart, $localEnd, $originalText]; + } + + /** + * 标签前仅有作者缩写等极短片段时,改用标签后上下文 + */ + private function shouldUseTrailingCitationContext($content, $localStart, $tagStart, $tagEnd) + { + $before = $this->buildCitationContextText($content, $localStart, $tagStart); + if (!$this->isMeaningfulCitationContext($before)) { + return true; + } + + return mb_strlen($before) < 25; + } + + public function expandReferenceNumbers($refStr) + { + $refStr = str_replace( + [',', '–', '—', '−', '‐', '‑'], + [',', '-', '-', '-', '-', '-'], + trim($refStr) + ); + $numbers = []; + foreach (explode(',', $refStr) as $part) { + $part = trim($part); + if ($part === '') { + continue; + } + if (preg_match('/^(\d+)\s*-\s*(\d+)$/', $part, $m)) { + $start = intval($m[1]); + $end = intval($m[2]); + if ($start <= $end) { + $numbers = array_merge($numbers, range($start, $end)); + } + } elseif (ctype_digit($part)) { + $numbers[] = intval($part); + } + } + return array_values(array_unique($numbers)); + } + + /** + * 返回 $bytePos 处 UTF-8 码点占用的最后一字节之后的位置(下一字符起始) + */ + private function utf8CharEnd($content, $bytePos) + { + $len = strlen($content); + if ($bytePos < 0 || $bytePos >= $len) { + return max(0, min($len, $bytePos + 1)); + } + $next = $bytePos + 1; + while ($next < $len && (ord($content[$next]) & 0xC0) === 0x80) { + $next++; + } + + return $next; + } + + /** + * 按字节偏移截取(与 strpos/strlen 一致);勿用 mb_substr,否则遇中文前缀会截断英文词头 + */ + private function byteSubstr($content, $start, $end) + { + $length = max(0, $end - $start); + if ($length === 0) { + return ''; + } + + return (string)mb_strcut($content, $start, $length, 'UTF-8'); + } + + private function buildCitationContextText($content, $start, $end) + { + $text = $this->byteSubstr($content, $start, $end); + $text = preg_replace(self::BLUE_TAG_REGEX, '', $text); + $text = trim(strip_tags($text)); + $text = preg_replace('/\s+/u', ' ', $text); + $text = ltrim($text, "\xEF\xBB\xBF"); + + return $text; + } + + /** + * 过滤仅标点、过短或无字母/汉字的上下文(如去掉标签后只剩 ".") + */ + private function isMeaningfulCitationContext($text) + { + $text = trim($text); + if ($text === '') { + return false; + } + if ($this->isOnlyPunctuationOrSpace($text)) { + return false; + } + if (!preg_match('/[\p{L}\p{N}]/u', $text)) { + return false; + } + + return mb_strlen($text) >= 2; + } + + private function isOnlyPunctuationOrSpace($text) + { + return preg_match('/^[\s\p{P}\p{S}]+$/u', $text) === 1; + } + + /** + * 首句过短时向前后各扩展一句(上限约 2000 字符) + */ + private function widenCitationContextBounds($content, $tagStart, $tagEnd, $start, $end) + { + $len = strlen($content); + $maxSpan = 2000; + + if ($start > 0) { + $prevStart = $this->findSentenceStart($content, max(0, $start - 1)); + if ($prevStart < $start) { + $start = $prevStart; + } + } + + $nextEnd = $this->findSentenceEnd($content, $end, $tagEnd); + if ($nextEnd > $end && $nextEnd <= $len) { + $end = $nextEnd; + } + + if ($end - $start > $maxSpan) { + $half = (int)floor($maxSpan / 2); + $mid = (int)floor(($tagStart + $tagEnd) / 2); + $start = max(0, $mid - $half); + $end = min($len, $start + $maxSpan); + } + + return [$start, $end]; + } + + /** + * 句号是否可作为句界(排除小数点、et al. 等缩写) + */ + private function isSentenceDelimiterAt($content, $pos, $delimiter) + { + $len = strlen($content); + if ($delimiter !== '.' || $pos < 0 || $pos >= $len) { + return true; + } + if ($pos > 0 && $pos + 1 < $len + && ctype_digit($content[$pos - 1]) + && ctype_digit($content[$pos + 1]) + ) { + return false; + } + + $before = substr($content, max(0, $pos - 12), min(12, $pos)); + if (preg_match('/\b(et\s+al|e\.g|i\.e|vs|etc|fig|no)\s*\.?\s*$/i', $before)) { + return false; + } + + $after = substr($content, $pos + 1, 24); + if (preg_match('/^\s*\s*\[/', $after)) { + return false; + } + + return true; + } + + /** + * 段落起始(HTML / 换行),避免英文多句段落只取到最后一个句号后的一句 + */ + private function findParagraphStart($content, $tagStart) + { + $search = substr($content, 0, max(0, $tagStart)); + if ($search === '') { + return 0; + } + + $best = 0; + + if (preg_match_all('/]*>/i', $search, $m, PREG_OFFSET_CAPTURE)) { + $last = end($m[0]); + $best = max($best, $last[1] + strlen($last[0])); + } + if (preg_match_all('/<\/p>\s*/i', $search, $m, PREG_OFFSET_CAPTURE)) { + $last = end($m[0]); + $best = max($best, $last[1] + strlen($last[0])); + } + if (preg_match_all('/\s*/i', $search, $m, PREG_OFFSET_CAPTURE)) { + $last = end($m[0]); + $best = max($best, $last[1] + strlen($last[0])); + } + + $pos = strrpos($search, "\n\n"); + if ($pos !== false) { + $best = max($best, $pos + 2); + } + $pos = strrpos($search, "\n"); + if ($pos !== false) { + $best = max($best, $pos + 1); + } + + return $best; + } + + /** + * 段落过长时从引用处向前截取上限,避免单次 LLM 上下文过大 + */ + private function capContextStartBeforeTag($content, $tagStart, $paragraphStart, $maxBytes = 2500) + { + if ($tagStart - $paragraphStart <= $maxBytes) { + return $paragraphStart; + } + + $start = $tagStart - $maxBytes; + $slice = substr($content, $start, $tagStart - $start); + if (preg_match('/[.!?。!?]\s+/u', $slice, $m, PREG_OFFSET_CAPTURE)) { + $rel = $m[0][1] + strlen($m[0][0]); + return $start + $rel; + } + + return max($paragraphStart, $start); + } + + private function findSentenceStart($content, $position) + { + $start = 0; + foreach (['.', '。', '!', '?', "\n"] as $delimiter) { + $pos = strrpos(substr($content, 0, $position), $delimiter); + if ($pos !== false && $this->isSentenceDelimiterAt($content, $pos, $delimiter)) { + $start = max($start, $this->utf8CharEnd($content, $pos)); + } + } + return $start; + } + + /** + * @param int $searchFrom 从该字节位置起查找句末 + * @param int $tagEnd 引用标签结束位置;用于跳过 后紧跟的孤立句号 + */ + private function findSentenceEnd($content, $searchFrom, $tagEnd = 0) + { + $length = strlen($content); + $minPos = max(0, $searchFrom); + + while ($minPos < $length) { + $endPositions = []; + foreach (['.', '。', '!', '?', "\n"] as $delimiter) { + $pos = strpos($content, $delimiter, $minPos); + if ($pos !== false && $this->isSentenceDelimiterAt($content, $pos, $delimiter)) { + $endPositions[] = $this->utf8CharEnd($content, $pos); + } + } + if (empty($endPositions)) { + return $length; + } + + $end = min($endPositions); + if ($tagEnd <= 0 || $end <= $tagEnd) { + return $end; + } + + $gap = substr($content, $tagEnd, $end - $tagEnd); + $gapText = trim(strip_tags(preg_replace(self::BLUE_TAG_REGEX, '', $gap))); + if ($gapText !== '' && !$this->isOnlyPunctuationOrSpace($gapText)) { + return $end; + } + + $minPos = $end; + } + + return $length; + } + + /** + * 已入库记录按文献编号正序入队(同号按 am_id、正文位置稳定排序) + * + * @param array $rows 元素含 check_id、reference_no,可选 am_id、text_start + */ + private function pushJobsSortedByReferenceNo(array $rows) + { + if (empty($rows)) { + return []; + } + + usort($rows, function ($a, $b) { + if ($a['reference_no'] !== $b['reference_no']) { + return $a['reference_no'] - $b['reference_no']; + } + $amA = isset($a['am_id']) ? intval($a['am_id']) : 0; + $amB = isset($b['am_id']) ? intval($b['am_id']) : 0; + if ($amA !== $amB) { + return $amA - $amB; + } + $posA = isset($a['text_start']) ? intval($a['text_start']) : 0; + $posB = isset($b['text_start']) ? intval($b['text_start']) : 0; + return $posA - $posB; + }); + + $checkIds = []; + $delay = 0; + foreach ($rows as $row) { + $checkId = intval($row['check_id']); + $checkIds[] = $checkId; + $this->pushJob($checkId, $delay); + $delay++; + } + + return $checkIds; + } + + private function pushJob($checkId, $delaySeconds = 0) + { + $checkId = intval($checkId); + $this->clearReferenceCheckQueueLock($checkId); + $jobClass = 'app\api\job\ReferenceCheck@fire'; + $data = ['check_id' => $checkId]; + try { + if ($delaySeconds > 0) { + $jobId = Queue::later($delaySeconds, $jobClass, $data, self::QUEUE_NAME); + } else { + $jobId = Queue::push($jobClass, $data, self::QUEUE_NAME); + } + } catch (\Exception $e) { + \think\Log::error('ReferenceCheck pushJob failed check_id=' . $checkId . ' ' . $e->getMessage()); + throw $e; + } + } + private function pushJob2($checkId, $delaySeconds = 0) + { + $jobClass = 'app\api\job\ReferenceCheckTwo@fire'; + $data = ['check_id' => $checkId]; + try { + if ($delaySeconds > 0) { + $jobId = Queue::later($delaySeconds, $jobClass, $data, self::QUEUE_NAME); + } else { + $jobId = Queue::push($jobClass, $data, self::QUEUE_NAME); + } + } catch (\Exception $e) { + \think\Log::error('ReferenceCheckTwo pushJob failed check_id=' . $checkId . ' ' . $e->getMessage()); + throw $e; + } + } +} diff --git a/application/common/service/LLMService.php b/application/common/service/LLMService.php new file mode 100644 index 00000000..69f5e61c --- /dev/null +++ b/application/common/service/LLMService.php @@ -0,0 +1,1271 @@ +url = trim((string)Env::get('promotion.promotion_llm_url', '')); + $this->model = trim((string)Env::get('promotion.promotion_llm_model', '')); + $this->apiKey = trim((string)Env::get('promotion.promotion_llm_api_key', '')); + // 引用校对 system 提示词较长,请求常超过 30s,至少 120s + $this->timeout = max(120, intval(Env::get('promotion.promotion_llm_timeout', 120))); + } + + /** + * @param string $contextText 正文引用处句子 + * @param string $referText 参考文献条目(或 refer 格式化文本) + * @param bool $isAgain 是否为 DOI 二次复核 + * @param string|null $doiBlock 可选:系统抓取到的 DOI 真实文献内容(仅二次复核使用) + */ + public function checkReference($contextText, $referText, $isAgain = false, $doiBlock = null) + { + // request_failed=true 表示"LLM 通讯/解析层面的失败"(可重试,区别于业务上的"未命中"); + // 上游 runReferenceCheckOnce 会据此把 DB.status 置为 2(失败) 并抛异常触发队列重试 + $fallback = [ + 'can_support' => false, + 'is_match' => false, + 'confidence' => 0.0, + 'reason' => 'LLM not configured or request failed', + 'request_failed' => true, + ]; + if ($this->url === '' || $this->model === '') { + \think\Log::warning('ReferenceCheck LLM: url or model not configured'); + return $fallback; + } + + $contextText = trim($contextText); + $referText = trim($referText); + $doiBlock = trim((string)$doiBlock); + if ($contextText === '' || $referText === '') { + // 空文本是入参问题,不是 LLM 故障,不需要重试 + return [ + 'can_support' => false, + 'is_match' => false, + 'confidence' => 0.0, + 'reason' => 'Empty citation context or reference text', + ]; + } + + $maxContextLen = 8000; + if (mb_strlen($contextText) > $maxContextLen) { + $contextText = mb_substr($contextText, 0, $maxContextLen); + } + if (mb_strlen($referText) > 4000) { + $referText = mb_substr($referText, 0, 4000); + } + if (mb_strlen($doiBlock) > 4000) { + $doiBlock = mb_substr($doiBlock, 0, 4000); + } + + if ($isAgain) { + $system = $this->buildReferenceCheckSecondPassPrompt(); + $user = $this->buildReferenceCheckSecondPassUserPrompt($contextText, $referText, $doiBlock); + } else { + $system = $this->buildReferenceCheckFirstPassPrompt(); + $user = $this->buildReferenceCheckFirstPassUserPrompt($contextText, $referText); + } + + \think\Log::info('ReferenceCheck system head: ' . mb_substr($system, 0, 200)); + \think\Log::info('ReferenceCheck user head: ' . mb_substr($user, 0, 600)); + $payload = [ + 'model' => $this->model, + 'temperature' => 0, + 'messages' => [ + ['role' => 'system', 'content' => $system], + ['role' => 'user', 'content' => $user], + ], + ]; + + $content = $this->postChat($payload); + if ($content === null) { + \think\Log::warning('ReferenceCheck LLM: postChat returned null'); + return $fallback; + } + + $parsed = $this->parseJson($content); + if ($parsed === null) { + \think\Log::warning('ReferenceCheck LLM: parseJson failed, raw=' . mb_substr($content, 0, 500)); + return $fallback; + } + + $canSupport = $this->parseCanSupportFromParsed($parsed); + $confidence = $this->snapReferenceCheckConfidence( + $this->normalizeConfidence(isset($parsed['confidence']) ? $parsed['confidence'] : 0), + $canSupport + ); + $reason = $this->cleanReason((string)(isset($parsed['reason']) ? $parsed['reason'] : '')); + \think\Log::info( + 'ReferenceCheck result: can_support=' . ($canSupport ? '1' : '0') + . ', confidence=' . $confidence + . ', reason=' . $reason + ); + return [ + 'can_support' => $canSupport, + 'is_match' => $canSupport, + 'confidence' => $confidence, + 'reason' => $reason, + ]; + } + + /** + * 解析 can_support;兼容 is_match 字段 + */ + private function parseCanSupportFromParsed(array $parsed) + { + if (array_key_exists('can_support', $parsed)) { + return $this->boolFromLlmValue($parsed['can_support']); + } + if (array_key_exists('is_match', $parsed)) { + return $this->boolFromLlmValue($parsed['is_match']); + } + return false; + } + + private function boolFromLlmValue($value) + { + if (is_bool($value)) { + return $value; + } + if (is_int($value) || is_float($value)) { + return intval($value) === 1; + } + $s = strtolower(trim((string)$value)); + return in_array($s, ['1', 'true', 'yes', 'support', 'supported'], true); + } + + /** 第一次校对:书目条目 vs 正文全文 */ + private function buildReferenceCheckFirstPassPrompt() + { + return <<<'PROMPT' +你是文献引用校对助手。判断【正文全文】与【参考文献书目】是否相关、能否用于支撑正文中的引用。 + +【核心原则:从宽判断,避免误杀】 +默认倾向 can_support=true。只要文献与正文不是「风马牛不相及」,即判为相关、能支撑。 +不要求变量一致、不要求结论逐条对应、不要求研究设计相同。 + +【仅当以下情况才判 can_support=false(与正文明显无关)】 +- 学科/主题完全无关(如正文讲深度学习聚类,文献是糖尿病步态检测)。 +- 明显张冠李戴(正文断言 A 疗法的效果,文献研究的是完全不同的 B 问题且无关联)。 +- 文献条目与正文讨论的对象/场景毫无交集,且无法作背景或理论引用。 + +【以下情况均应 can_support=true】 +- 同一大领域或相邻方向(如护理、心理、管理、医学、统计、AI 等相近子领域)。 +- 可作背景文献、综述性引用、理论或方法的一般性依据。 +- 表述略宽、略有概括、变量名不完全一致,但大方向说得通。 + +【confidence 固定档位(禁止其它小数)】 +can_support=true:0.65(有关联但较泛)/ 0.78 / 0.85 / 0.92 / 0.98(非常确定相关) +can_support=false:0.15(明确风马牛不相及)/ 0.25 / 0.35 / 0.45(仅当实在无法建立任何合理关联) + +【输出】仅一行 minified JSON,无 markdown: +{"can_support":true|false,"is_match":true|false,"confidence":0.15|0.25|0.35|0.45|0.65|0.78|0.85|0.92|0.98,"reason":"30-80字简体中文"} +is_match 必须与 can_support 相同。 +PROMPT; + } + + private function buildReferenceCheckFirstPassUserPrompt($contextText, $referText) + { + return "【正文全文 article_main.content】\n" . $contextText + . "\n\n【参考文献书目 refer_text】\n" . $referText + . "\n\n请从宽判断:文献与正文非风马牛不相即可判 can_support=true,只返回 JSON。"; + } + + /** 第二次校对:Crossref 摘要(Refer_doi) */ + private function buildReferenceCheckSecondPassPrompt() + { + return <<<'PROMPT' +你是文献引用二次校对助手。已根据 Refer_doi 从 Crossref(https://api.crossref.org/works/)获取摘要,请结合【正文全文】复核该文献是否相关。 + +【核心原则:与第一次相同,从宽判断】 +默认倾向 can_support=true。只要 Crossref 摘要(或书目)与正文不是风马牛不相及,即判相关、能支撑。 +以【Crossref 摘要】为准;摘要与书目冲突时以摘要为准。 + +【仅当以下情况才判 can_support=false】 +- 摘要显示的研究主题/对象/方法与正文讨论内容完全风马牛不相及。 +- 典型风马牛不相及、张冠李戴,且无法解释为背景或泛化引用。 + +【以下情况均应 can_support=true】 +- 摘要与正文属同领域或相近方向,能作背景、理论或方向性支撑。 +- 细节不完全一致,但不存在明显矛盾。 + +【无 Crossref 摘要时】 +结合 refer_text 从宽判断;非明显无关仍可 can_support=true,confidence 建议 0.65。 + +【confidence 固定档位(禁止其它小数)】 +can_support=true:0.65 / 0.78 / 0.85 / 0.92 / 0.98 +can_support=false:0.15 / 0.25 / 0.35 / 0.45 + +【输出】仅一行 minified JSON: +{"can_support":true|false,"is_match":true|false,"confidence":0.15|0.25|0.35|0.45|0.65|0.78|0.85|0.92|0.98,"reason":"30-80字简体中文"} +is_match 必须与 can_support 相同。 +PROMPT; + } + + private function buildReferenceCheckSecondPassUserPrompt($contextText, $referText, $doiBlock) + { + $doiBlock = trim((string)$doiBlock); + return "【正文全文 article_main.content】\n" . $contextText + . "\n\n【参考文献书目 refer_text】\n" . $referText + . "\n\n【Crossref 摘要】(Refer_doi → api.crossref.org/works/)\n" + . ($doiBlock !== '' ? $doiBlock : '(未获取到摘要,请结合 refer_text 从宽判断)') + . "\n\n文献与正文非风马牛不相即可判 can_support=true,只返回 JSON。"; + } + private function buildReferenceCheckSystemPrompt3() + { + return $this->buildReferenceCheckFirstPassPrompt(); + } + + /** + * 护理/医学期刊:正文引用句与参考文献条目的相关性校对 + */ + private function buildReferenceCheckSystemPrompt2() + { + return <<<'PROMPT' +你是一名护理与医学期刊的资深编辑,专门校对「正文引用句」与「对应参考文献条目」是否匹配。 +你只能依据用户提供的两段文本判断,不得假设已阅读全文,不得编造文献中未出现的信息。 + +## 校对目标 +判断:作者在该引用位置引用的观点/数据/结论/方法/定义,是否可由该条参考文献合理支撑(主题与论证层面是否对得上)。 + +## 评估步骤(按顺序,在心里完成即可) +1. 主题域:正文句子的核心主题(疾病、人群、干预、结局、理论、政策等)与文献题目/作者/期刊/年份/条目内容是否属于同一专业领域。 +2. 论点对齐:正文句子的关键断言,是否与该文献可能报告的内容方向一致(允许概括性引用,但不可张冠李戴)。 +3. 错引排查:是否出现「仅同一大领域但具体对象不同」「人群/场景/指标明显不符」「把指南/综述/原始研究混用导致支撑关系不成立」等常见错引。 +4. 信息不足:若文献条目过简(仅作者+年份等),只能做粗判;若完全无法建立合理关联,按不匹配处理。 + +## is_match 判定(二选一,必须一致) +- true:主题明确相关,且引用句的核心信息与该文献可能内容高度吻合或可被其合理概括支撑。 +- false:主题无关、明显错引、具体论点对不上、或无法建立合理关联。边界不清时从严标 false(降低漏报错引风险)。 + +## confidence 评分(稳定性要求:只能使用下列 6 个固定值之一,禁止 0.72、0.8 等其它小数) +| 分值 | 含义 | 通常配合 is_match | +| 0.95 | 高度匹配:主题、对象、论点均清晰对应 | true | +| 0.85 | 较匹配:主题与论点一致,表述略宽但仍可接受 | true | +| 0.75 | 基本匹配:大方向对,有轻微不精确或概括过度 | true | +| 0.35 | 存疑:同领域但具体对不上,或信息不足,建议人工复核 | false | +| 0.25 | 较可能错引:主题或论点明显偏离 | false | +| 0.15 | 明确错引:主题无关或典型张冠李戴 | false | + +硬性规则(必须遵守,否则视为无效输出): +- is_match=true 时,confidence 只能是 0.75、0.85 或 0.95。 +- is_match=false 时,confidence 只能是 0.15、0.25 或 0.35。 +- 禁止输出 0.5、0.6、0.9 等未列出的 confidence 值。 + +## 评分稳定原则 +- 相同输入应得到相同结论;不要因措辞风格波动而改变档位。 +- 优先依据「主题 + 关键断言」而非个别泛化词(如「研究」「护理」「患者」)判匹配。 +- 一句多引时,只评价当前这一条文献与引用句的关系,勿与其它序号混淆。 + +## 输出格式(仅输出一行 minified JSON,无 markdown、无前后说明) +{"is_match":true|false,"confidence":0.15|0.25|0.35|0.75|0.85|0.95,"reason":"1-2句简体中文,说明匹配或不匹配的关键依据"} +PROMPT; + } + private function buildReferenceCheckAgaintSystemPrompt() + { + return <<<'PROMPT' +你是一名护理、医学与科研期刊的资深编辑,专门校对「正文引用句」与「对应参考文献」是否真实匹配。 + +你的职责是判断: + +作者在该引用位置引用的观点、数据、结论、方法、定义、理论或证据, + +是否能够被该参考文献 DOI 对应的真实文献内容合理支撑。 + +你必须执行: + +【第一轮:文献条目粗判】 ++ +【第二轮:DOI真实文献内容复核(最高优先级)】 + +最终结果以 DOI 页面实际文献内容为准。 + +不得仅凭标题、关键词或研究领域判定匹配。 + +==================== +【输入内容】 + +你将收到: + +1. 正文引用句(引用位置附近的一句话或一段话) + +2. 当前参考文献条目(仅当前编号) + +3. 文献元信息: +- Title +- Author +- Journal +- Year +- DOI +- DOI Link + +4. DOI 页面解析出的真实内容(最高优先级): +可能包括: + +- 实际标题 +- Abstract +- Keywords +- Objective +- Methods +- Participants +- Results +- Conclusion +- Study design +- Full metadata + +注意: + +DOI 页面内容优先级最高。 + +若 DOI 页面内容与参考文献条目存在冲突: + +必须以 DOI 页面真实显示内容为准。 + +==================== +【核心判断目标】 + +判断: + +正文中的核心论点、事实、数据、定义、护理措施、医学结论、研究发现、理论依据、政策依据、算法方法、统计方法、模型结构等, + +是否可由 DOI 对应的真实文献内容合理支撑。 + +你评估的是: + +“引用是否成立”。 + +不是: + +“正文是否正确”。 + +==================== +【硬性约束(必须遵守)】 + +1. 只能依据提供的信息判断 + +- 不得假设看过全文。 +- 不得联网到未提供的新网页。 +- 不得根据常识补全文献内容。 +- 不得根据作者、期刊名、热点方向脑补研究结果。 +- 不得把“可能研究了”视为“能够支撑”。 + +2. DOI真实内容优先(最高优先级) + +必须优先依据: + +- DOI摘要 +- DOI方法 +- DOI研究对象 +- DOI结果 +- DOI结论 + +判断是否支撑正文。 + +禁止: + +仅因为标题相似或关键词重叠就判 true。 + +例如: + +正文: +“研究证实显著降低焦虑” + +DOI摘要未提焦虑改善结果: + +必须 false。 + +3. 严禁串号判断 + +- 仅允许依据当前引用句与当前参考文献。 +- 严禁利用其它参考文献编号或上下文推断当前文献。 + +4. 不得关键词硬匹配 + +禁止因为出现相同关键词就判匹配,例如: + +“护理”“患者”“治疗”“效果”“心理” +“机器学习”“深度学习”“模型”等。 + +必须重点判断: + +- 对象是否一致 +- 疾病/场景是否一致 +- 人群是否一致 +- 干预方式是否一致 +- 方法学是否一致 +- 关键结论是否一致 + +5. 医学与科研错引从严 + +若 DOI 内容出现以下情况: + +优先判 false: + +- 同领域但疾病不同 +- 人群不同(儿童 vs 老年) +- 场景不同(ICU vs 普通病房) +- 干预方式不同 +- 指标或结局不同 +- 指南、综述、Meta、原始研究混用 +- 文献无法支撑正文中的强结论 + +例如: + +正文: +“研究证实显著降低死亡率” + +DOI: +仅描述护理模式应用观察。 + +不得脑补效果成立。 + +应从严判 false。 + +6. 特定证据类型必须一致 + +正文明确声明: + +- “随机对照研究显示” +- “Meta分析表明” +- “系统综述指出” +- “指南推荐” +- “专家共识建议” + +若 DOI 内容显示证据类型不一致: + +应从严判 false。 + +7. 方法学引用必须严格一致(极重要) + +若正文明确引用: + +- 算法 +- 模型 +- 聚类方法 +- 分类方法 +- 深度学习架构 +- 统计方法 +- 数学技术 +- 数据处理方法 + +DOI 内容必须与该方法存在明确合理关联。 + +例如: + +不匹配: + +- fuzzy clustering ≠ deep learning +- random forest ≠ SVM +- CNN ≠ LSTM +- 聚类模型 ≠ 分类模型 +- 回归分析 ≠ 聚类分析 + +仅属于同一“大领域(AI/ML)” + +不能视为匹配。 + +若方法体系明显不同: + +优先判: + +false + confidence=0.15 + +8. DOI 内容中的核心变量必须一致(新增重点) + +若正文讨论: + +- 心理资本 +- 工作流 +- 组织支持 +- 焦虑 +- 压力 +- 满意度 +- 护理能力 +- 风险预测 + +必须检查 DOI 内容是否真正研究该变量及其关系。 + +例如: + +正文: +“心理资本影响工作流” + +DOI: +研究组织支持与工作流。 + +即使都属于护士心理研究: + +仍应 false。 + +9. 信息不足从严 + +若: + +- DOI打不开 +- DOI无摘要 +- DOI内容不足 +- 无法建立明确关联 + +只有明确支撑时才判 true。 + +否则: + +false。 + +==================== +【评估步骤(按顺序在心里完成)】 + +第一步:DOI内容优先理解 +先判断 DOI 实际研究: + +- 谁(对象) +- 什么问题(主题) +- 怎么研究(方法) +- 得出什么(结果/结论) + +第二步:主题域一致性 + +检查正文与 DOI 文献是否属于同一: + +- 疾病 +- 患者群体 +- 护理问题 +- 医疗场景 +- 干预措施 +- 指标/结局 +- 理论模型 +- 算法/统计方法 + +第三步:关键断言对齐 + +判断正文核心断言是否真正被 DOI 内容支撑。 + +允许: + +- 合理概括 +- 轻度扩展 + +不允许: + +- 张冠李戴 +- 过度推断 +- 用相关性支撑因果性 +- 用弱证据支撑强结论 +- 方法体系不一致 + +第四步:错引排查 + +重点检查: + +- 疾病错 +- 人群错 +- 场景错 +- 方法错 +- 指标错 +- 研究类型错 +- 变量关系错 +- 算法体系错 + +==================== +【最终判定规则】 + +is_match(二选一) + +true: + +满足以下全部条件: + +- 主题明确相关 +- 核心对象基本一致 +- 方法或研究方向合理一致 +- DOI内容支持正文关键论点 +- 不存在明显错引风险 + +false: + +满足任一情况: + +- 主题无关 +- 对象不同 +- 疾病/场景不同 +- 方法体系明显不同 +- 核心变量关系不同 +- DOI内容无法支撑正文结论 +- 证据类型不一致 +- 无法建立明确合理关联 +- 信息不足无法确认 + +边界情况从严判 false。 + +==================== +【confidence 固定评分规则】 + +只能输出以下固定值之一: + +0.98 +0.92 +0.85 +0.78 +0.65 +0.45 +0.35 +0.25 +0.15 + +禁止输出其它数字。 + +硬规则: + +is_match=true: +只能: +0.65 / 0.78 / 0.85 / 0.92 / 0.98 + +is_match=false: +只能: +0.15 / 0.25 / 0.35 / 0.45 + +DOI内容与正文明显冲突: +优先: +0.15 + +==================== +【reason 输出要求】 + +- 使用简体中文 +- 长度30~80字 +- 仅说明: +1)DOI文献研究内容; +2)是否支撑正文核心论点。 + +禁止: + +“可能” +“应该” +“看起来” +“似乎” + +必须明确表达: +一致 / 不一致 / 无法支撑。 + +==================== +【输出格式(绝对严格)】 + +仅输出一行 minified JSON。 + +禁止: +- markdown +- 代码块 +- 换行 +- 解释说明 +- 前后文字 + +格式: + +{"is_match":true|false,"confidence":0.15|0.25|0.35|0.45|0.65|0.78|0.85|0.92|0.98,"reason":"简体中文原因"} +PROMPT; + } + private function buildReferenceCheckUserPrompt($contextText, $referText) + { + return "【正文引用句】(含该处引用所要支撑的观点,可能为中文或英文)\n" + . $contextText + . "\n\n【对应参考文献条目】(书目信息,可能不完整)\n" + . $referText + . "\n\n请按 system 中的步骤与评分表完成校对,只返回 JSON。"; + } + + /** + * 二次 DOI 复核 system prompt: + * - 强调输入中的"DOI 真实内容"已由系统抓取,模型不可自行联网 + * - 处理 metadata(标题/作者)与 refer_content/DOI 抓取内容不一致的情况 + * - confidence 档位与一次校对保持一致 + */ + private function buildReferenceCheckRecheckSystemPrompt() + { + return <<<'PROMPT' +你是一名护理、医学与科研期刊的资深编辑,正在执行【初稿 DOI 文献复核】。 + +一次粗判(仅依据书目条目)已经给出较低置信度(≤0.65)。 + +你的职责是: + +依据系统提供的【DOI 真实文献内容】重新判断: + +正文引用位置的观点、结论、方法、数据或理论, + +是否能够被 DOI 对应的真实文献“基本合理支撑”。 + +你的目标是: + +优先识别真正错引, + +同时避免误杀“合理但非完全一致”的引用。 + +注意: + +初稿校对允许: + +- 背景研究支撑 +- 理论依据支撑 +- 同方向研究支撑 +- 合理概括 +- 轻度表述扩展 + +不要求: + +正文与 DOI 摘要逐字对应。 + +==================== +【输入结构】 + +User 消息中会出现三个块: + +1.【正文引用句】 + +作者希望被该引用支撑的: + +观点、方法、数据、结论或理论。 + +2.【参考文献条目(书目)】 + +可能包含: + +- Title +- Author +- Journal +- Year +- DOI +- Reference + +注意: + +书目可能存在: + +- 错 DOI +- 错标题 +- 错作者 +- 元数据漂移 + +不能仅依据书目判断。 + +3.【DOI 真实文献内容(最高优先级)】 + +来源: + +Source: PubMed +或 +Source: Crossref + +可能包含: + +- 真正标题 +- Abstract +- Methods +- Results +- Conclusion +- MeSH +- Publication Type + +该内容已由系统抓取, + +视为: + +“真实文献内容”。 + +禁止联网。 +禁止自行打开 DOI。 +禁止猜测未提供字段。 + +==================== +【判断优先级(必须遵守)】 + +A. +DOI 内容最高优先级 + +若 DOI 内容存在: + +必须以其为准。 + +即使: + +书目 Title / Author 与 DOI 冲突, + +也以 DOI 内容为准。 + +==================== +B. +DOI 有摘要 + +优先依据: + +- 研究对象 +- 核心变量 +- 方法 +- 结果 +- 结论 + +判断是否支撑正文。 + +允许: + +- 合理概括 +- 背景研究支撑 +- 同方向研究支撑 +- 理论依据支撑 +- 轻度扩展 + +不要求: + +逐字一致。 + +==================== +C. +DOI 仅有标题,无摘要 + +仅当标题与正文存在: + +明确语义关联 + +才可判: + +true + 0.65 + +否则: + +优先: + +false + 0.45 + +(人工复核) + +不要轻易判: + +0.15。 + +==================== +D. +DOI 获取失败 + +若: + +- 无摘要 +- 无核心信息 +- 抓取失败 + +不能直接判 true。 + +也不要轻易判错引。 + +优先: + +false + 0.45 + +(信息不足,人工复核) + +==================== +【允许 true 的情况(重要)】 + +以下情况允许 true: + +1. +DOI 摘要直接支撑正文核心观点。 + +2. +DOI 文献属于: + +- 背景研究 +- 理论依据 +- 同方向研究 + +即使: + +对象、变量或场景存在轻微差异, + +但研究方向一致, + +仍可: + +0.65 / 0.78。 + +例如: + +正文: +工作流与职业发展相关。 + +DOI: +工作流与心理资本关系。 + +可作为背景研究支撑: + +true + 0.65。 + +3. +正文属于概括性表达, + +DOI 文献能支撑主要方向。 + +==================== +【优先 false 的情况】 + +以下情况优先 false: + +1. +主题明显无关。 + +2. +研究对象明显不同。 + +例如: + +- 儿童 vs 老年 +- ICU vs 普通病房 + +3. +疾病 / 场景明显不同。 + +4. +方法体系明显冲突 +(仅限明确方法引用)。 + +仅当正文明确讨论: + +- 算法 +- 模型 +- 聚类 +- 分类 +- 深度学习架构 +- 统计方法 +- 数据处理方法 + +时, + +要求方法一致。 + +例如: + +- fuzzy clustering ≠ deep learning +- CNN ≠ LSTM +- 聚类 ≠ 分类 +- random forest ≠ SVM + +此类: + +优先: + +false + 0.15。 + +注意: + +若正文只是: + +背景研究、 +相关工作、 +理论依据, + +不要因方法不同直接 false。 + +5. +正文强结论无法支撑。 + +正文出现: + +- 显著改善 +- 显著降低 +- 证实 +- 优于 +- 危险因素 +- 有效预测 +- 中介作用 + +但 DOI 摘要未提供对应结果: + +优先 false。 + +6. +正文明确: + +- RCT +- Meta分析 +- 系统综述 +- Guideline + +但 DOI 类型明显不一致。 + +==================== +【confidence 固定评分规则】 + +只能输出: + +0.98 +0.92 +0.85 +0.78 +0.65 +0.45 +0.35 +0.25 +0.15 + +禁止其它数字。 + +-------------------- +【true 档位】 + +0.98 +DOI 对象、方法、结论与正文高度一致。 + +0.92 +DOI 明确支撑正文关键论点。 + +0.85 +DOI 支撑核心观点, +存在轻微概括。 + +0.78 +研究方向一致, +能够合理支撑正文。 + +0.65 +边界匹配: + +可作为背景研究、 +理论依据、 +同方向研究支撑。 + +建议人工复核。 + +-------------------- +【false 档位】 + +0.45 +信息不足、 +无摘要、 +标题过泛、 +无法确认。 + +建议人工复核。 + +0.35 +同领域但对象、变量或结论偏差明显。 + +0.25 +主题相关但核心观点无法支撑。 + +0.15 +明确错引: + +- DOI 内容明显无关 +- 方法体系冲突 +- 张冠李戴 +- 强结论明显无法成立 + +==================== +【硬性规则】 + +is_match=true: + +只能: +0.65 / 0.78 / 0.85 / 0.92 / 0.98 + +is_match=false: + +只能: +0.15 / 0.25 / 0.35 / 0.45 + +==================== +【评分稳定原则】 + +- 相同输入得到相同结果。 +- 优先主题 + 核心论点。 +- 不因关键词重叠误判。 +- 一句多引仅评价当前文献。 +- 模糊情况优先人工复核。 +- 不轻易误杀合理引用。 + +==================== +【reason 输出要求】 + +简体中文。 + +30~80字。 + +必须说明: + +1)DOI 文献研究什么; + +2)是否支撑正文核心观点; + +3)支撑点或冲突点是什么。 + +禁止: + +“可能” +“应该” +“似乎” +“看起来” + +必须明确表达: + +一致 / 不一致 / 可支撑 / 无法支撑。 + +==================== +【输出格式(严格)】 + +仅输出一行 minified JSON。 + +禁止: + +- markdown +- 代码块 +- 换行 +- 解释说明 +- 前后文字 + +格式: + +{"is_match":true|false,"confidence":0.15|0.25|0.35|0.45|0.65|0.78|0.85|0.92|0.98,"reason":"简体中文原因"} +PROMPT; + } + + private function buildReferenceCheckRecheckUserPrompt($contextText, $referText, $doiBlock) + { + return $this->buildReferenceCheckSecondPassUserPrompt($contextText, $referText, $doiBlock); + } + + /** + * 与 buildReferenceCheckSystemPrompt3 一致的 confidence 档位 + */ + private function getReferenceCheckConfidenceBands($isMatch) + { + return $isMatch + ? [0.65, 0.78, 0.85, 0.92, 0.98] + : [0.15, 0.25, 0.35, 0.45]; + } + + /** + * 将模型输出的 confidence 吸附到合法档位(如 0.95 → 0.92,0.75 → 0.78) + */ + private function snapReferenceCheckConfidence($confidence, $isMatch) + { + $bands = $this->getReferenceCheckConfidenceBands($isMatch); + + foreach ($bands as $band) { + if (abs($confidence - $band) < 0.001) { + return $band; + } + } + + $nearest = $bands[0]; + $minDiff = abs($confidence - $nearest); + foreach ($bands as $band) { + $diff = abs($confidence - $band); + if ($diff < $minDiff) { + $minDiff = $diff; + $nearest = $band; + } + } + + return $nearest; + } + + private function postChat(array $payload) + { + try{ + $ch = curl_init(); + curl_setopt($ch, CURLOPT_URL, $this->url); + curl_setopt($ch, CURLOPT_POST, true); + curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($payload, JSON_UNESCAPED_UNICODE)); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, min(15, $this->timeout)); + curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout); + + $headers = ['Content-Type: application/json']; + if ($this->apiKey !== '') { + $headers[] = 'Authorization: Bearer ' . $this->apiKey; + } + curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); + + $raw = curl_exec($ch); + if ($raw === false) { + \think\Log::warning('ReferenceCheck LLM curl error: ' . curl_error($ch)); + curl_close($ch); + return null; + } + $httpCode = intval(curl_getinfo($ch, CURLINFO_HTTP_CODE)); + curl_close($ch); + if ($httpCode < 200 || $httpCode >= 300) { + \think\Log::warning('ReferenceCheck LLM http ' . $httpCode . ': ' . mb_substr((string)$raw, 0, 500)); + return null; + } + + $data = json_decode($raw, true); + if (!is_array($data)) { + return null; + } + if (isset($data['choices'][0]['message']['content'])) { + return (string)$data['choices'][0]['message']['content']; + } + if (isset($data['content'])) { + return (string)$data['content']; + } + } catch (Exception $exception) { + \think\Log::warning('ReferenceCheck LLM exception: ' . $exception->getMessage()); + } + + return null; + } + + private function parseJson($raw) + { + $raw = trim($raw); + if ($raw === '') { + return null; + } + $raw = preg_replace('/^```[a-zA-Z]*\s*|```$/m', '', $raw); + $raw = trim($raw); + + $obj = json_decode($raw, true); + if (is_array($obj)) { + return $obj; + } + if (preg_match('/\{.*\}/s', $raw, $m)) { + $obj = json_decode($m[0], true); + if (is_array($obj)) { + return $obj; + } + } + return null; + } + + private function normalizeConfidence($value) + { + if (!is_numeric($value)) { + return 0.0; + } + $v = (float)$value; + if ($v > 1.0 && $v <= 100.0) { + $v = $v / 100.0; + } + return round(max(0.0, min(1.0, $v)), 4); + } + + private function cleanReason($text) + { + $text = trim(preg_replace('/\s+/', ' ', $text)); + if (mb_strlen($text) > 500) { + $text = mb_substr($text, 0, 500); + } + return $text !== '' ? $text : 'No reason provided'; + } +}