From 94b212fe7c6ec47113eeff7ab2125e0e1636d328 Mon Sep 17 00:00:00 2001
From: wyn <1074145239@qq.com>
Date: Wed, 27 May 2026 16:09:23 +0800
Subject: [PATCH] =?UTF-8?q?=E6=96=87=E7=8C=AE=E6=A0=A1=E5=AF=B9=E5=8A=9F?=
=?UTF-8?q?=E8=83=BD=E5=AE=8C=E5=96=84?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
application/api/controller/Preaccept.php | 12 +-
application/api/controller/References.php | 67 +-
application/common/ReferenceCheckService.php | 680 +++++++++++++++++--
application/database.php | 2 +-
4 files changed, 681 insertions(+), 80 deletions(-)
diff --git a/application/api/controller/Preaccept.php b/application/api/controller/Preaccept.php
index 166af09f..79794434 100644
--- a/application/api/controller/Preaccept.php
+++ b/application/api/controller/Preaccept.php
@@ -898,7 +898,17 @@ class Preaccept extends Base
return jsonSuccess($re);
}
-
+ public function getArticleMainById(){
+ $data = $this->request->post();
+ $rule = new Validate([
+ "am_id"=>"require"
+ ]);
+ if(!$rule->check($data)){
+ return jsonError($rule->getError());
+ }
+ $am_info = $this->article_main_obj->where("am_id",$data['am_id'])->find();
+ return jsonSuccess($am_info);
+ }
public function changeH1(){
$data = $this->request->post();
diff --git a/application/api/controller/References.php b/application/api/controller/References.php
index 659c12b6..fbc6b6be 100644
--- a/application/api/controller/References.php
+++ b/application/api/controller/References.php
@@ -11,6 +11,7 @@ use think\Validate;
use think\Db;
use think\Env;
use think\Queue;
+use app\common\ReferenceCheckService;
/**
* @title 参考文献
* @description 相关方法汇总
@@ -1499,12 +1500,72 @@ class References extends Base
}
/**
- * 按 p_refer_id 查单条参考文献的校对明细
+ * 多篇文章并行校对时,查询指定文章前面还有几篇在排队
+ *
+ * POST/GET: p_article_id(必填)
+ *
+ * 例:当前 5 篇文章正在校对,该文排在第 3 → ahead=2, position=3, running_total=5。
+ * 返回:running_total、ahead、position、in_queue、status(整篇校对状态 0/1/2)
+ */
+ public function referenceCheckPendingCountAI()
+ {
+ $aParam = $this->request->post();
+ if (empty($aParam)) {
+ $aParam = $this->request->param();
+ }
+
+ $iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']);
+ if ($iPArticleId <= 0) {
+ return json_encode(array('status' => 2, 'msg' => 'Please select an article'));
+ }
+
+ try {
+ $result = (new ReferenceCheckService())->getArticleCheckQueuePositionByPArticleId($iPArticleId);
+ return jsonSuccess($result);
+ } catch (\Exception $e) {
+ return jsonError($e->getMessage());
+ }
+ }
+
+ /**
+ * 某条参考文献下「校对失败」的明细重新校对(异步)
+ *
+ * POST/GET: p_refer_id(必填)
+ * p_article_id(可选)
+ *
+ * 仅重跑 status=3(校对失败)的记录;不改动 refer_text,只重置结果字段后入 ReferenceCheck 队列。
+ * 返回:p_refer_id、p_article_id、reset、queued、check_ids、queue
+ */
+ public function referenceCheckRecheckFailedAI()
+ {
+ $aParam = $this->request->post();
+ if (empty($aParam)) {
+ $aParam = $this->request->param();
+ }
+
+ $iPReferId = empty($aParam['p_refer_id']) ? 0 : intval($aParam['p_refer_id']);
+ if ($iPReferId <= 0) {
+ return json_encode(array('status' => 2, 'msg' => 'Please select a reference'));
+ }
+
+ $iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']);
+
+ try {
+ $result = (new ReferenceCheckService())->enqueueRecheckFailedByPReferId($iPReferId, $iPArticleId);
+ return jsonSuccess([]);
+ } catch (\Exception $e) {
+ return jsonError($e->getMessage());
+ }
+ }
+
+ /**
+ * 按 p_refer_id 查单条参考文献的校对明细与进度
*
* POST/GET: p_refer_id(必填)
*
- * 返回 list 中每项含:am_id、confidence、reason、is_match、is_pass
- * 同时附带上下文:p_refer_id、p_article_id、reference_no、total
+ * 分组进度:progress_status(0待/1中/2完成/3失败)、pending、done、failed、pass、
+ * is_pass、progress_percent、last_updated_at
+ * list 每项:check_id、am_id、status、confidence、reason、is_match、is_pass
*/
public function referenceCheckDetailsAI()
{
diff --git a/application/common/ReferenceCheckService.php b/application/common/ReferenceCheckService.php
index b1d3223f..89ef6b8a 100644
--- a/application/common/ReferenceCheckService.php
+++ b/application/common/ReferenceCheckService.php
@@ -15,12 +15,20 @@ class ReferenceCheckService
{
const QUEUE_NAME = 'ReferenceCheck';
- /** t_article_main.ref_check_status */
+ /** t_article_main.type */
+ const MAIN_TYPE_TEXT = 0;
+ const MAIN_TYPE_IMAGE = 1;
+ const MAIN_TYPE_TABLE = 2;
+
+ /** t_article_main.ref_check_status(需执行 sql/article_main_ref_check_status.sql) */
const AM_STATUS_NONE = 0;
const AM_STATUS_PASS = 1;
const AM_STATUS_FAIL = 2;
const AM_STATUS_RUNNING = 3;
+ /** @var bool|null t_article_main 是否已有 ref_check_status 列 */
+ private static $amRefCheckStatusColumnExists = null;
+
/**
* 引用校对状态(生命周期顺序:0→1→2→3 = 待→进行→完成→失败)
*
@@ -52,20 +60,14 @@ class ReferenceCheckService
const PASS_CONFIDENCE_THRESHOLD = 0.65;
/**
- * [...] 引用标签内允许的字符类(带 /u 修饰符使用)。
+ * 正文引用标签两种排版(带 /u):
+ * 1) [8, 9]、[13-15] —— 方括号在 blue 内
+ * 2) [13-15] —— 方括号包裹 blue
*
- * 除 ASCII 数字、半角逗号、半角连字符、空白外,还兼容常见排版变体:
- * , U+FF0C 全角逗号
- * – U+2013 EN DASH
- * — U+2014 EM DASH
- * − U+2212 MINUS SIGN
- * ‐ U+2010 HYPHEN
- * ‑ U+2011 NON-BREAKING HYPHEN
- *
- * 若不支持变体连字符,会导致 [19–21] 这种区间引用整段被 preg 漏掉,
- * 进而丢失对应的 reference_no 校对记录。
+ * 捕获组均为序号串(可含逗号、区间连字符及排版变体)。
*/
const BLUE_TAG_REGEX = '/\[([\d,,\-\x{2013}\x{2014}\x{2212}\x{2010}\x{2011}\s]+)\]<\/blue>/u';
+ const BLUE_TAG_REGEX_BRACKET_OUTSIDE = '/\[([\d,,\-\x{2013}\x{2014}\x{2212}\x{2010}\x{2011}\s]+)<\/blue>\]/u';
/**
* 兼容无 ?? 的 PHP 版本
@@ -75,6 +77,46 @@ class ReferenceCheckService
return isset($arr[$key]) ? $arr[$key] : $default;
}
+ /**
+ * 合并匹配两种 blue 引用排版,按在正文中的起始位置排序。
+ *
+ * @return array{0: array, 1: array} 同 preg_match_all 的完整匹配与捕获组 1
+ */
+ private function collectBlueTagMatches($content)
+ {
+ $merged = [];
+ foreach ([self::BLUE_TAG_REGEX, self::BLUE_TAG_REGEX_BRACKET_OUTSIDE] as $pattern) {
+ if (!preg_match_all($pattern, $content, $m, PREG_OFFSET_CAPTURE)) {
+ continue;
+ }
+ $count = count($m[0]);
+ for ($i = 0; $i < $count; $i++) {
+ $merged[] = ['full' => $m[0][$i], 'inner' => $m[1][$i]];
+ }
+ }
+
+ usort($merged, function ($a, $b) {
+ return $a['full'][1] - $b['full'][1];
+ });
+
+ $matches = [[], []];
+ foreach ($merged as $item) {
+ $matches[0][] = $item['full'];
+ $matches[1][] = $item['inner'];
+ }
+
+ return $matches;
+ }
+
+ /** 对两种 blue 引用排版执行 preg_replace */
+ private function pregReplaceBlueTags($subject, $replacement)
+ {
+ $subject = preg_replace(self::BLUE_TAG_REGEX, $replacement, $subject);
+ $subject = preg_replace(self::BLUE_TAG_REGEX_BRACKET_OUTSIDE, $replacement, $subject);
+
+ return $subject;
+ }
+
/**
* 单条入队(可手工指定正文与文献文本)
*/
@@ -115,14 +157,18 @@ class ReferenceCheckService
return ['check_id' => $checkId, 'queued' => 1];
}
public function enqueueByArticleMain($main){
- $amId = $main['am_id'];
-// $main = Db::name('article_main')
-// ->field('am_id,content,article_id')
-// ->where('am_id', $amId)
-// ->whereIn('state', [0, 2])
-// ->find();
- $citations = $this->extractReferences((string)$main['content']);
-// return $citations;
+ $amId = intval($this->arrGet($main, 'am_id', 0));
+ if ($amId > 0 && (!isset($main['type']) || (intval($main['type']) === self::MAIN_TYPE_TABLE && intval($this->arrGet($main, 'amt_id', 0)) <= 0))) {
+ $dbMain = Db::name('article_main')
+ ->field('am_id,content,article_id,type,amt_id')
+ ->where('am_id', $amId)
+ ->whereIn('state', [0, 2])
+ ->find();
+ if (!empty($dbMain)) {
+ $main = array_merge($dbMain, $main);
+ }
+ }
+ $citations = $this->extractReferencesForArticleMain($main);
if (empty($citations)) {
$this->setAmRefCheckStatus($amId, self::AM_STATUS_NONE);
return;
@@ -222,7 +268,7 @@ class ReferenceCheckService
$referMap = $this->loadReferMapByPArticleId($pArticleId);
$mains = Db::name('article_main')
- ->field('am_id,content,article_id')
+ ->field('am_id,content,article_id,type,amt_id')
->where('article_id', $articleId)
->whereIn('state', [0, 2])
->order('sort asc')
@@ -237,7 +283,7 @@ class ReferenceCheckService
$now = date('Y-m-d H:i:s');
foreach ($mains as $main) {
$amId = intval($main['am_id']);
- $citations = $this->extractReferences((string)$main['content']);
+ $citations = $this->extractReferencesForArticleMain($main);
if (empty($citations)) {
$this->setAmRefCheckStatus($amId, self::AM_STATUS_NONE);
continue;
@@ -309,7 +355,7 @@ class ReferenceCheckService
$referMap = $this->loadReferMapByPArticleId($pArticleId);
$mains = Db::name('article_main')
- ->field('am_id,content,article_id')
+ ->field('am_id,content,article_id,type,amt_id')
->where('article_id', $articleId)
->whereIn('state', [0, 2])
->order('sort asc')
@@ -324,7 +370,7 @@ class ReferenceCheckService
$now = date('Y-m-d H:i:s');
foreach ($mains as $main) {
$amId = intval($main['am_id']);
- $citations = $this->extractReferences((string)$main['content']);
+ $citations = $this->extractReferencesForArticleMain($main);
if (empty($citations)) {
$this->setAmRefCheckStatus($amId, self::AM_STATUS_NONE);
continue;
@@ -429,9 +475,27 @@ class ReferenceCheckService
return $status;
}
+ /**
+ * t_article_main 是否已加 ref_check_status 列(未迁移时跳过写入,避免 fields not exists)
+ */
+ private function hasAmRefCheckStatusColumn()
+ {
+ if (self::$amRefCheckStatusColumnExists !== null) {
+ return self::$amRefCheckStatusColumnExists;
+ }
+ try {
+ $table = Db::name('article_main')->getTable();
+ $rows = Db::query('SHOW COLUMNS FROM `' . str_replace('`', '``', $table) . '` LIKE \'ref_check_status\'');
+ self::$amRefCheckStatusColumnExists = !empty($rows);
+ } catch (\Exception $e) {
+ self::$amRefCheckStatusColumnExists = false;
+ }
+ return self::$amRefCheckStatusColumnExists;
+ }
+
public function setAmRefCheckStatus($amId, $status)
{
- if ($amId <= 0) {
+ if ($amId <= 0 || !$this->hasAmRefCheckStatusColumn()) {
return;
}
Db::name('article_main')->where('am_id', $amId)->update([
@@ -472,7 +536,7 @@ class ReferenceCheckService
->where('p_article_id', $pArticleId)
->delete();
- if ($articleId > 0) {
+ if ($articleId > 0 && $this->hasAmRefCheckStatusColumn()) {
Db::name('article_main')
->where('article_id', $articleId)
->whereIn('state', [0, 2])
@@ -498,10 +562,12 @@ class ReferenceCheckService
}
$deleted = Db::name('article_reference_check_result')->where('article_id', $articleId)->delete();
- Db::name('article_main')
- ->where('article_id', $articleId)
- ->whereIn('state', [0, 2])
- ->update(['ref_check_status' => self::AM_STATUS_NONE]);
+ if ($this->hasAmRefCheckStatusColumn()) {
+ Db::name('article_main')
+ ->where('article_id', $articleId)
+ ->whereIn('state', [0, 2])
+ ->update(['ref_check_status' => self::AM_STATUS_NONE]);
+ }
return intval($deleted);
}
@@ -669,6 +735,68 @@ class ReferenceCheckService
];
}
+ /**
+ * 多篇文章并行校对时,查询指定文章前面还有几篇在排队。
+ *
+ * 「正在校对」= 该文至少还有 1 条明细 status=待校验(0)。
+ * 排队顺序:按各文章最早一条待校验明细的 id 升序(与全局入队先后一致)。
+ *
+ * @return array{
+ * p_article_id:int,
+ * running_total:int,
+ * ahead:int,
+ * position:int,
+ * in_queue:bool,
+ * status:int
+ * }
+ */
+ public function getArticleCheckQueuePositionByPArticleId($pArticleId)
+ {
+ $pArticleId = intval($pArticleId);
+ if ($pArticleId <= 0) {
+ throw new \InvalidArgumentException('p_article_id is required');
+ }
+
+ $rows = Db::name('article_reference_check_result')
+ ->field('p_article_id, MIN(id) AS queue_anchor')
+ ->where('status', self::RECORD_PENDING)
+ ->group('p_article_id')
+ ->order('queue_anchor', 'asc')
+ ->select();
+
+ $runningIds = [];
+ foreach ($rows as $row) {
+ $aid = intval($this->arrGet($row, 'p_article_id', 0));
+ if ($aid > 0) {
+ $runningIds[] = $aid;
+ }
+ }
+
+ $runningTotal = count($runningIds);
+ $ahead = 0;
+ $position = 0;
+ $inQueue = false;
+ foreach ($runningIds as $idx => $aid) {
+ if ($aid === $pArticleId) {
+ $ahead = $idx;
+ $position = $idx + 1;
+ $inQueue = true;
+ break;
+ }
+ }
+
+ $articleStatus = $this->getArticleProgressStatusByPArticleId($pArticleId);
+
+ return [
+ 'p_article_id' => $pArticleId,
+ 'running_total' => $runningTotal,
+ 'ahead' => $inQueue ? $ahead : 0,
+ 'position' => $inQueue ? $position : 0,
+ 'in_queue' => $inQueue,
+ 'status' => intval($this->arrGet($articleStatus, 'status', self::ARTICLE_PROGRESS_NONE)),
+ ];
+ }
+
/**
* 按 p_article_id 查整篇引用校对进度,按 reference_no 分组聚合状态,并展开每条明细。
*
@@ -820,17 +948,16 @@ class ReferenceCheckService
}
/**
- * 按 p_refer_id 查这条参考文献的所有校对明细。
+ * 按 p_refer_id 查这条参考文献的校对明细与分组进度。
*
- * 每条 record 返回:
- * - am_id 命中的 article_main 主键
- * - confidence 匹配置信度(0~1)
- * - reason LLM 给出的判定理由
- * - is_match 是否匹配(来自 article_reference_check_result.is_match)
- * - is_pass 是否通过校验(confidence >= PASS_CONFIDENCE_THRESHOLD)
+ * 分组进度(与 referenceCheckProgressAI 单条 list 项口径一致):
+ * progress_status 0待校验 1校对中 2完成 3失败
+ * pending/done/failed/pass、is_pass、progress_percent
+ *
+ * list 每项:check_id、am_id、status、confidence、reason、is_match、is_pass
*
* @param int $pReferId production_article_refer.p_refer_id
- * @return array{p_refer_id:int, p_article_id:int, reference_no:int, total:int, list:array}
+ * @return array
*/
public function getCheckDetailsByPReferId($pReferId)
{
@@ -840,7 +967,7 @@ class ReferenceCheckService
}
$rows = Db::name('article_reference_check_result')
- ->field('id,p_article_id,reference_no,am_id,confidence,is_match,reason')
+ ->field('id,p_article_id,reference_no,am_id,status,confidence,is_match,reason,updated_at')
->where('p_refer_id', $pReferId)
->order('id asc')
->select();
@@ -848,8 +975,13 @@ class ReferenceCheckService
$list = [];
$pArticleId = 0;
$referenceNo = 0;
+ $pending = 0;
+ $done = 0;
+ $failed = 0;
+ $pass = 0;
+ $lastUpdatedAt = '';
+
foreach ($rows as $row) {
- // 取首条出现的 p_article_id / reference_no 作为该 refer 的上下文
if ($pArticleId <= 0) {
$pArticleId = intval($this->arrGet($row, 'p_article_id', 0));
}
@@ -857,22 +989,87 @@ class ReferenceCheckService
$referenceNo = intval($this->arrGet($row, 'reference_no', 0));
}
+ $st = intval($this->arrGet($row, 'status', 0));
+ if ($st === self::RECORD_PENDING) {
+ $pending++;
+ } elseif ($st === self::RECORD_COMPLETED) {
+ $done++;
+ } elseif ($st === self::RECORD_FAILED) {
+ $failed++;
+ }
+
+ $upd = (string)$this->arrGet($row, 'updated_at', '');
+ if ($upd > $lastUpdatedAt) {
+ $lastUpdatedAt = $upd;
+ }
+
$confidence = floatval($this->arrGet($row, 'confidence', 0));
+ $isPass = $confidence >= self::PASS_CONFIDENCE_THRESHOLD;
+ if ($isPass) {
+ $pass++;
+ }
+
$list[] = [
+ 'check_id' => intval($this->arrGet($row, 'id', 0)),
'am_id' => intval($this->arrGet($row, 'am_id', 0)),
+ 'status' => $st,
'confidence' => $confidence,
'reason' => (string)$this->arrGet($row, 'reason', ''),
'is_match' => intval($this->arrGet($row, 'is_match', 0)),
- 'is_pass' => $confidence >= self::PASS_CONFIDENCE_THRESHOLD,
+ 'is_pass' => $isPass,
];
}
+ if ($referenceNo <= 0) {
+ $refer = Db::name('production_article_refer')
+ ->where('p_refer_id', $pReferId)
+ ->where('state', 0)
+ ->find();
+ if (!empty($refer)) {
+ if ($pArticleId <= 0) {
+ $pArticleId = intval($this->arrGet($refer, 'p_article_id', 0));
+ }
+ $referenceNo = intval($this->arrGet($refer, 'index', 0)) + 1;
+ }
+ }
+
+ $total = count($list);
+ if ($total === 0) {
+ $progressStatus = self::PROGRESS_PENDING;
+ $progressPercent = 0;
+ $isPassGroup = false;
+ } elseif ($pending === $total) {
+ $progressStatus = self::PROGRESS_PENDING;
+ $progressPercent = 0;
+ $isPassGroup = false;
+ } elseif ($pending === 0) {
+ $progressStatus = $failed > 0 ? self::PROGRESS_FAILED : self::PROGRESS_COMPLETED;
+ $progressPercent = 100;
+ $isPassGroup = (
+ $progressStatus === self::PROGRESS_COMPLETED
+ && $pass === $total
+ );
+ } else {
+ $progressStatus = self::PROGRESS_CHECKING;
+ $finished = $done + $failed;
+ $progressPercent = round($finished / $total * 100, 1);
+ $isPassGroup = false;
+ }
+
return [
- 'p_refer_id' => $pReferId,
- 'p_article_id' => $pArticleId,
- 'reference_no' => $referenceNo,
- 'total' => count($list),
- 'list' => $list,
+ 'p_refer_id' => $pReferId,
+ 'p_article_id' => $pArticleId,
+ 'reference_no' => $referenceNo,
+ 'total' => $total,
+ 'pending' => $pending,
+ 'done' => $done,
+ 'failed' => $failed,
+ 'pass' => $pass,
+ 'progress_status' => $progressStatus,
+ 'progress_percent' => $progressPercent,
+ 'is_pass' => $isPassGroup,
+ 'last_updated_at' => $lastUpdatedAt,
+ 'list' => $list,
];
}
@@ -1010,8 +1207,12 @@ class ReferenceCheckService
*/
public function buildArticlePreview($articleId, $amId = 0)
{
+ $fields = 'am_id,content,sort,type,amt_id';
+ if ($this->hasAmRefCheckStatusColumn()) {
+ $fields .= ',ref_check_status';
+ }
$q = Db::name('article_main')
- ->field('am_id,content,sort,ref_check_status')
+ ->field($fields)
->where('article_id', $articleId)
->whereIn('state', [0, 2]);
if ($amId > 0) {
@@ -1039,7 +1240,7 @@ class ReferenceCheckService
foreach ($mains as $main) {
$id = intval($main['am_id']);
- $content = (string)$main['content'];
+ $content = $this->resolveArticleMainCheckContent($main);
$badIndex = isset($badByAm[$id]) ? $badByAm[$id] : array();
$marked = $this->markContentForPreview($content, $id, $badIndex);
$amStatus = intval($this->arrGet($main, 'ref_check_status', 0));
@@ -1158,12 +1359,7 @@ class ReferenceCheckService
$html = $content;
// 1) 先标记 blue 内各序号(在原文上操作,[70-73] 仅标不合理者如 70、71)
- preg_match_all(
- self::BLUE_TAG_REGEX,
- $html,
- $matches,
- PREG_OFFSET_CAPTURE
- );
+ $matches = $this->collectBlueTagMatches($html);
$citeDeltas = [];
if (!empty($matches[0])) {
$replacements = [];
@@ -1318,14 +1514,6 @@ class ReferenceCheckService
return implode("\n", $parts);
}
- /**
- * 前端修改参考文献后重新校对:仅处理已有校对记录,刷新 refer_text、重置结果并入队;无记录直接返回
- *
- * @param int $articleId
- * @param int $pReferId t_production_article_refer.p_refer_id(优先)
- * @param int $referenceNo 文献序号 index+1(无 p_refer_id 时用)
- * @return array
- */
/**
* 编辑某条文献内容后,按 p_refer_id 异步重新校对该文献对应的全部 check 明细
*
@@ -1387,7 +1575,7 @@ class ReferenceCheckService
'refer_text' => $referText,
'refer_index' => $referenceNo,
'reference_no' => $referenceNo,
- 'status' => 0,
+ 'status' => self::RECORD_PENDING,
'is_match' => 0,
'can_support' => 0,
'confidence' => 0,
@@ -1401,7 +1589,6 @@ class ReferenceCheckService
foreach ($rows as $row) {
$checkId = $this->resolveCheckRowId($row);
Db::name('article_reference_check_result')->where('id', $checkId)->update($resetFields);
- // 旧的队列完成标记必须清掉,否则同 check_id 再次投递会被 acquireLock 静默丢弃
$this->clearReferenceCheckQueueLock($checkId);
$pendingJobs[] = [
'check_id' => $checkId,
@@ -1432,6 +1619,92 @@ class ReferenceCheckService
];
}
+ /**
+ * 某条参考文献下「校对失败」的明细重新校对(仅 status=RECORD_FAILED,异步入队)
+ *
+ * 不刷新 refer_text / reference_no,沿用记录内已有正文与文献快照,只重置结果字段后入队。
+ *
+ * @param int $pReferId t_production_article_refer.p_refer_id(必填)
+ * @param int $pArticleId 可选,进一步限定文章
+ * @return array{p_refer_id:int, p_article_id:int, reset:int, queued:int, check_ids:int[], queue:string}
+ */
+ public function enqueueRecheckFailedByPReferId($pReferId, $pArticleId = 0)
+ {
+ $pReferId = intval($pReferId);
+ if ($pReferId <= 0) {
+ throw new \InvalidArgumentException('p_refer_id is required');
+ }
+
+ $q = Db::name('article_reference_check_result')
+ ->where('p_refer_id', $pReferId)
+ ->where('status', self::RECORD_FAILED);
+ $pArticleId = intval($pArticleId);
+ if ($pArticleId > 0) {
+ $q->where('p_article_id', $pArticleId);
+ }
+
+ $rows = $q->select();
+
+ if (empty($rows)) {
+ return [
+ 'p_refer_id' => $pReferId,
+ 'p_article_id' => $pArticleId,
+ 'reset' => 0,
+ 'queued' => 0,
+ 'check_ids' => [],
+ 'queue' => self::QUEUE_NAME,
+ ];
+ }
+
+ if ($pArticleId <= 0) {
+ $pArticleId = intval($this->arrGet($rows[0], 'p_article_id', 0));
+ }
+
+ $now = date('Y-m-d H:i:s');
+ $resetFields = [
+ 'status' => self::RECORD_PENDING,
+ 'is_match' => 0,
+ 'can_support' => 0,
+ 'confidence' => 0,
+ 'reason' => '',
+ 'error_msg' => '',
+ 'updated_at' => $now,
+ ];
+
+ $pendingJobs = [];
+ $amIds = [];
+ foreach ($rows as $row) {
+ $checkId = $this->resolveCheckRowId($row);
+ Db::name('article_reference_check_result')->where('id', $checkId)->update($resetFields);
+ $this->clearReferenceCheckQueueLock($checkId);
+ $pendingJobs[] = [
+ 'check_id' => $checkId,
+ 'reference_no' => intval($this->arrGet($row, 'reference_no', 0)),
+ 'am_id' => intval($this->arrGet($row, 'am_id', 0)),
+ 'text_start' => intval($this->arrGet($row, 'text_start', 0)),
+ ];
+ $amId = intval($this->arrGet($row, 'am_id', 0));
+ if ($amId > 0) {
+ $amIds[$amId] = true;
+ }
+ }
+
+ foreach (array_keys($amIds) as $amId) {
+ $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING);
+ }
+
+ $checkIds = $this->pushJobsSortedByReferenceNo($pendingJobs);
+
+ return [
+ 'p_refer_id' => $pReferId,
+ 'p_article_id' => $pArticleId,
+ 'reset' => count($rows),
+ 'queued' => count($checkIds),
+ 'check_ids' => $checkIds,
+ 'queue' => self::QUEUE_NAME,
+ ];
+ }
+
public function recheckByRefer($articleId, $pReferId = 0, $referenceNo = 0)
{
$articleId = intval($articleId);
@@ -1600,9 +1873,9 @@ class ReferenceCheckService
if ($contentA === '' || $contentB === '') {
$this->updateCheckResult($checkId, [
'status' => self::RECORD_FAILED,
- 'error_msg' => 'Missing article_main.content or refer_text',
+ 'error_msg' => 'Missing section content (text/table) or refer_text',
]);
- throw new \RuntimeException('Missing article_main.content or refer_text');
+ throw new \RuntimeException('Missing section content (text/table) or refer_text');
}
$llmResult = (new LLMService())->checkReference($contentA, $contentB, false);
@@ -1748,7 +2021,7 @@ class ReferenceCheckService
}
/**
- * 第一次校对:取 article_main.content(整节正文)
+ * 第一次校对:正文取 article_main.content;表格(type=2)取 article_main_table.table_data 等
*/
public function resolveMainContentForJob(array $row, $maxChars = 8000)
{
@@ -1757,23 +2030,280 @@ class ReferenceCheckService
return '';
}
$main = Db::name('article_main')
- ->field('content')
+ ->field('content,type,amt_id,article_id')
->where('am_id', $amId)
->find();
if (empty($main)) {
return '';
}
- $text = trim((string)$this->arrGet($main, 'content', ''));
- if ($text === '') {
+ $raw = trim($this->resolveArticleMainCheckContent($main));
+ if ($raw === '') {
return '';
}
- $text = preg_replace(self::BLUE_TAG_REGEX, '[$1]', $text);
+ return $this->normalizeCheckContentForLlm($raw, $maxChars);
+ }
+
+ /**
+ * 是否为表格节:type=2、有 amt_id,或 content 为 <table tableId='…'/> 占位
+ */
+ private function isArticleMainTableSection(array $main)
+ {
+ if (intval($this->arrGet($main, 'type', self::MAIN_TYPE_TEXT)) === self::MAIN_TYPE_TABLE) {
+ return true;
+ }
+ if (intval($this->arrGet($main, 'amt_id', 0)) > 0) {
+ return true;
+ }
+ $content = (string)$this->arrGet($main, 'content', '');
+
+ return stripos($content, 'arrGet($main, 'amt_id', 0));
+ if ($amtId > 0) {
+ return $amtId;
+ }
+ $content = (string)$this->arrGet($main, 'content', '');
+ if (preg_match('/tableId\s*=\s*[\'"]?(\d+)/i', $content, $m)) {
+ return intval($m[1]);
+ }
+
+ return 0;
+ }
+
+ /**
+ * @return array|null
+ */
+ private function loadArticleMainTableRow(array $main)
+ {
+ $amtId = $this->resolveArticleMainTableAmtId($main);
+ if ($amtId <= 0) {
+ return null;
+ }
+
+ $q = Db::name('article_main_table')
+ ->where('amt_id', $amtId)
+ ->whereIn('state', [0, 2])
+ ->field('table_data,title,note');
+ $articleId = intval($this->arrGet($main, 'article_id', 0));
+ if ($articleId > 0) {
+ $q->where('article_id', $articleId);
+ }
+ $tbl = $q->find();
+
+ return empty($tbl) ? null : $tbl;
+ }
+
+ /**
+ * 按节提取引用:正文走 content;表格按行拼接单元格后扫描(Study 列仅 [n] 时也能带上同行上下文)
+ */
+ public function extractReferencesForArticleMain(array $main)
+ {
+ if (!$this->isArticleMainTableSection($main)) {
+ return $this->extractReferences((string)$this->arrGet($main, 'content', ''));
+ }
+
+ $tbl = $this->loadArticleMainTableRow($main);
+ if (empty($tbl)) {
+ return [];
+ }
+
+ $extra = [];
+ foreach (['title', 'note'] as $field) {
+ $part = trim((string)$this->arrGet($tbl, $field, ''));
+ if ($part !== '') {
+ $extra[] = $part;
+ }
+ }
+
+ return $this->extractReferencesFromTableDataJson(
+ (string)$this->arrGet($tbl, 'table_data', ''),
+ $extra
+ );
+ }
+
+ /**
+ * table_data 按行提取;$prefixChunks 为 title/note 等(在表格行之前扫描)
+ */
+ public function extractReferencesFromTableDataJson($tableDataJson, array $prefixChunks = [])
+ {
+ $result = [];
+ $offset = 0;
+
+ foreach ($prefixChunks as $chunk) {
+ $chunk = trim((string)$chunk);
+ if ($chunk === '') {
+ continue;
+ }
+ foreach ($this->extractReferences($chunk) as $cite) {
+ $cite['text_start'] = intval($cite['text_start']) + $offset;
+ $cite['text_end'] = intval($cite['text_end']) + $offset;
+ $cite['reference_start'] = intval($cite['reference_start']) + $offset;
+ $cite['reference_end'] = intval($cite['reference_end']) + $offset;
+ $result[] = $cite;
+ }
+ $offset += strlen($chunk) + 1;
+ }
+
+ $tableDataJson = trim((string)$tableDataJson);
+ if ($tableDataJson === '') {
+ return $result;
+ }
+
+ $decoded = $this->decodeTableDataJsonToArray($tableDataJson);
+ if ($decoded === null) {
+ foreach ($this->extractReferences($tableDataJson) as $cite) {
+ $cite['text_start'] = intval($cite['text_start']) + $offset;
+ $cite['text_end'] = intval($cite['text_end']) + $offset;
+ $cite['reference_start'] = intval($cite['reference_start']) + $offset;
+ $cite['reference_end'] = intval($cite['reference_end']) + $offset;
+ $result[] = $cite;
+ }
+
+ return $result;
+ }
+
+ foreach ($decoded as $row) {
+ $line = $this->buildTableRowCheckLine($row);
+ if ($line === '') {
+ continue;
+ }
+ foreach ($this->extractReferences($line) as $cite) {
+ $cite['text_start'] = intval($cite['text_start']) + $offset;
+ $cite['text_end'] = intval($cite['text_end']) + $offset;
+ $cite['reference_start'] = intval($cite['reference_start']) + $offset;
+ $cite['reference_end'] = intval($cite['reference_end']) + $offset;
+ $result[] = $cite;
+ }
+ $offset += strlen($line) + 1;
+ }
+
+ return $result;
+ }
+
+ /**
+ * 入队/LLM 用的原始 HTML:type=0 为 content;表格为 table_data 按行展平
+ */
+ public function resolveArticleMainCheckContent(array $main)
+ {
+ if (!$this->isArticleMainTableSection($main)) {
+ return (string)$this->arrGet($main, 'content', '');
+ }
+
+ $tbl = $this->loadArticleMainTableRow($main);
+ if (empty($tbl)) {
+ return '';
+ }
+
+ $chunks = [];
+ foreach (['title', 'note'] as $field) {
+ $part = trim((string)$this->arrGet($tbl, $field, ''));
+ if ($part !== '') {
+ $chunks[] = $part;
+ }
+ }
+ $flat = $this->flattenTableDataJsonToCheckContent((string)$this->arrGet($tbl, 'table_data', ''));
+ if ($flat !== '') {
+ $chunks[] = $flat;
+ }
+
+ return implode("\n", $chunks);
+ }
+
+ /**
+ * 表格一行:各单元格 text 用 " | " 连接(保留同行化学名/部位/Study 列引用)
+ */
+ private function buildTableRowCheckLine($row)
+ {
+ if (!is_array($row)) {
+ return '';
+ }
+ $cells = [];
+ foreach ($row as $cell) {
+ if (!is_array($cell)) {
+ continue;
+ }
+ $text = trim((string)$this->arrGet($cell, 'text', ''));
+ if ($text !== '') {
+ $cells[] = $text;
+ }
+ }
+
+ return implode(' | ', $cells);
+ }
+
+ /**
+ * table_data 按行展平(供 LLM / 预览);非法 JSON 时按整串处理
+ */
+ private function flattenTableDataJsonToCheckContent($tableDataJson)
+ {
+ $tableDataJson = trim((string)$tableDataJson);
+ if ($tableDataJson === '') {
+ return '';
+ }
+ $decoded = $this->decodeTableDataJsonToArray($tableDataJson);
+ if ($decoded === null) {
+ return $tableDataJson;
+ }
+
+ $lines = [];
+ foreach ($decoded as $row) {
+ $line = $this->buildTableRowCheckLine($row);
+ if ($line !== '') {
+ $lines[] = $line;
+ }
+ }
+
+ return implode("\n", $lines);
+ }
+
+ /**
+ * @return array|null
+ */
+ private function decodeTableDataJsonToArray($raw)
+ {
+ $raw = trim((string)$raw);
+ if ($raw === '') {
+ return null;
+ }
+ if (preg_match('/^\xEF\xBB\xBF/', $raw)) {
+ $raw = substr($raw, 3);
+ }
+ $decoded = json_decode($raw, true);
+ if (json_last_error() !== JSON_ERROR_NONE) {
+ return null;
+ }
+ if (is_array($decoded)) {
+ return $decoded;
+ }
+ if (is_string($decoded)) {
+ $decoded2 = json_decode($decoded, true);
+ if (json_last_error() === JSON_ERROR_NONE && is_array($decoded2)) {
+ return $decoded2;
+ }
+ }
+
+ return null;
+ }
+
+ private function normalizeCheckContentForLlm($raw, $maxChars = 8000)
+ {
+ $text = $this->pregReplaceBlueTags($raw, '[$1]');
$text = strip_tags($text);
$text = html_entity_decode($text, ENT_QUOTES | ENT_HTML5, 'UTF-8');
$text = preg_replace('/\s+/u', ' ', $text);
$text = trim($text);
+ if ($text === '') {
+ return '';
+ }
$maxChars = max(500, intval($maxChars));
if (mb_strlen($text) > $maxChars) {
@@ -2134,12 +2664,12 @@ class ReferenceCheckService
}
/**
- * 从 article_main.content 提取 blue 引用
+ * 从正文 HTML 或表格展平后的 HTML 提取 blue 引用
*/
public function extractReferences($content)
{
$result = [];
- preg_match_all(self::BLUE_TAG_REGEX, $content, $matches, PREG_OFFSET_CAPTURE);
+ $matches = $this->collectBlueTagMatches($content);
if (empty($matches[0])) {
return [];
}
@@ -2319,7 +2849,7 @@ class ReferenceCheckService
private function buildCitationContextText($content, $start, $end)
{
$text = $this->byteSubstr($content, $start, $end);
- $text = preg_replace(self::BLUE_TAG_REGEX, '', $text);
+ $text = $this->pregReplaceBlueTags($text, '');
$text = trim(strip_tags($text));
$text = preg_replace('/\s+/u', ' ', $text);
$text = ltrim($text, "\xEF\xBB\xBF");
@@ -2505,7 +3035,7 @@ class ReferenceCheckService
}
$gap = substr($content, $tagEnd, $end - $tagEnd);
- $gapText = trim(strip_tags(preg_replace(self::BLUE_TAG_REGEX, '', $gap)));
+ $gapText = trim(strip_tags($this->pregReplaceBlueTags($gap, '')));
if ($gapText !== '' && !$this->isOnlyPunctuationOrSpace($gapText)) {
return $end;
}
diff --git a/application/database.php b/application/database.php
index 0295739a..d8ef7790 100644
--- a/application/database.php
+++ b/application/database.php
@@ -17,7 +17,7 @@ return [
'hostname' => 'localhost',
// 'hostname' => 'ec2-13-229-30-239.ap-southeast-1.compute.amazonaws.com',
// 数据库名
- 'database' => 'tougao',
+ 'database' => 'tougao2',
// 用户名
// 'username' => 'tmradmin',
'username' => 'root',