文献校对功能完善
This commit is contained in:
@@ -898,7 +898,17 @@ class Preaccept extends Base
|
|||||||
|
|
||||||
return jsonSuccess($re);
|
return jsonSuccess($re);
|
||||||
}
|
}
|
||||||
|
public function getArticleMainById(){
|
||||||
|
$data = $this->request->post();
|
||||||
|
$rule = new Validate([
|
||||||
|
"am_id"=>"require"
|
||||||
|
]);
|
||||||
|
if(!$rule->check($data)){
|
||||||
|
return jsonError($rule->getError());
|
||||||
|
}
|
||||||
|
$am_info = $this->article_main_obj->where("am_id",$data['am_id'])->find();
|
||||||
|
return jsonSuccess($am_info);
|
||||||
|
}
|
||||||
|
|
||||||
public function changeH1(){
|
public function changeH1(){
|
||||||
$data = $this->request->post();
|
$data = $this->request->post();
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ use think\Validate;
|
|||||||
use think\Db;
|
use think\Db;
|
||||||
use think\Env;
|
use think\Env;
|
||||||
use think\Queue;
|
use think\Queue;
|
||||||
|
use app\common\ReferenceCheckService;
|
||||||
/**
|
/**
|
||||||
* @title 参考文献
|
* @title 参考文献
|
||||||
* @description 相关方法汇总
|
* @description 相关方法汇总
|
||||||
@@ -1499,12 +1500,72 @@ class References extends Base
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 按 p_refer_id 查单条参考文献的校对明细
|
* 多篇文章并行校对时,查询指定文章前面还有几篇在排队
|
||||||
|
*
|
||||||
|
* POST/GET: p_article_id(必填)
|
||||||
|
*
|
||||||
|
* 例:当前 5 篇文章正在校对,该文排在第 3 → ahead=2, position=3, running_total=5。
|
||||||
|
* 返回:running_total、ahead、position、in_queue、status(整篇校对状态 0/1/2)
|
||||||
|
*/
|
||||||
|
public function referenceCheckPendingCountAI()
|
||||||
|
{
|
||||||
|
$aParam = $this->request->post();
|
||||||
|
if (empty($aParam)) {
|
||||||
|
$aParam = $this->request->param();
|
||||||
|
}
|
||||||
|
|
||||||
|
$iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']);
|
||||||
|
if ($iPArticleId <= 0) {
|
||||||
|
return json_encode(array('status' => 2, 'msg' => 'Please select an article'));
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
$result = (new ReferenceCheckService())->getArticleCheckQueuePositionByPArticleId($iPArticleId);
|
||||||
|
return jsonSuccess($result);
|
||||||
|
} catch (\Exception $e) {
|
||||||
|
return jsonError($e->getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 某条参考文献下「校对失败」的明细重新校对(异步)
|
||||||
|
*
|
||||||
|
* POST/GET: p_refer_id(必填)
|
||||||
|
* p_article_id(可选)
|
||||||
|
*
|
||||||
|
* 仅重跑 status=3(校对失败)的记录;不改动 refer_text,只重置结果字段后入 ReferenceCheck 队列。
|
||||||
|
* 返回:p_refer_id、p_article_id、reset、queued、check_ids、queue
|
||||||
|
*/
|
||||||
|
public function referenceCheckRecheckFailedAI()
|
||||||
|
{
|
||||||
|
$aParam = $this->request->post();
|
||||||
|
if (empty($aParam)) {
|
||||||
|
$aParam = $this->request->param();
|
||||||
|
}
|
||||||
|
|
||||||
|
$iPReferId = empty($aParam['p_refer_id']) ? 0 : intval($aParam['p_refer_id']);
|
||||||
|
if ($iPReferId <= 0) {
|
||||||
|
return json_encode(array('status' => 2, 'msg' => 'Please select a reference'));
|
||||||
|
}
|
||||||
|
|
||||||
|
$iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']);
|
||||||
|
|
||||||
|
try {
|
||||||
|
$result = (new ReferenceCheckService())->enqueueRecheckFailedByPReferId($iPReferId, $iPArticleId);
|
||||||
|
return jsonSuccess([]);
|
||||||
|
} catch (\Exception $e) {
|
||||||
|
return jsonError($e->getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 按 p_refer_id 查单条参考文献的校对明细与进度
|
||||||
*
|
*
|
||||||
* POST/GET: p_refer_id(必填)
|
* POST/GET: p_refer_id(必填)
|
||||||
*
|
*
|
||||||
* 返回 list 中每项含:am_id、confidence、reason、is_match、is_pass
|
* 分组进度:progress_status(0待/1中/2完成/3失败)、pending、done、failed、pass、
|
||||||
* 同时附带上下文:p_refer_id、p_article_id、reference_no、total
|
* is_pass、progress_percent、last_updated_at
|
||||||
|
* list 每项:check_id、am_id、status、confidence、reason、is_match、is_pass
|
||||||
*/
|
*/
|
||||||
public function referenceCheckDetailsAI()
|
public function referenceCheckDetailsAI()
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -15,12 +15,20 @@ class ReferenceCheckService
|
|||||||
{
|
{
|
||||||
const QUEUE_NAME = 'ReferenceCheck';
|
const QUEUE_NAME = 'ReferenceCheck';
|
||||||
|
|
||||||
/** t_article_main.ref_check_status */
|
/** t_article_main.type */
|
||||||
|
const MAIN_TYPE_TEXT = 0;
|
||||||
|
const MAIN_TYPE_IMAGE = 1;
|
||||||
|
const MAIN_TYPE_TABLE = 2;
|
||||||
|
|
||||||
|
/** t_article_main.ref_check_status(需执行 sql/article_main_ref_check_status.sql) */
|
||||||
const AM_STATUS_NONE = 0;
|
const AM_STATUS_NONE = 0;
|
||||||
const AM_STATUS_PASS = 1;
|
const AM_STATUS_PASS = 1;
|
||||||
const AM_STATUS_FAIL = 2;
|
const AM_STATUS_FAIL = 2;
|
||||||
const AM_STATUS_RUNNING = 3;
|
const AM_STATUS_RUNNING = 3;
|
||||||
|
|
||||||
|
/** @var bool|null t_article_main 是否已有 ref_check_status 列 */
|
||||||
|
private static $amRefCheckStatusColumnExists = null;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 引用校对状态(生命周期顺序:0→1→2→3 = 待→进行→完成→失败)
|
* 引用校对状态(生命周期顺序:0→1→2→3 = 待→进行→完成→失败)
|
||||||
*
|
*
|
||||||
@@ -52,20 +60,14 @@ class ReferenceCheckService
|
|||||||
const PASS_CONFIDENCE_THRESHOLD = 0.65;
|
const PASS_CONFIDENCE_THRESHOLD = 0.65;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <blue>[...]</blue> 引用标签内允许的字符类(带 /u 修饰符使用)。
|
* 正文引用标签两种排版(带 /u):
|
||||||
|
* 1) <blue>[8, 9]</blue>、<blue>[13-15]</blue> —— 方括号在 blue 内
|
||||||
|
* 2) [<blue>13-15</blue>] —— 方括号包裹 blue
|
||||||
*
|
*
|
||||||
* 除 ASCII 数字、半角逗号、半角连字符、空白外,还兼容常见排版变体:
|
* 捕获组均为序号串(可含逗号、区间连字符及排版变体)。
|
||||||
* , U+FF0C 全角逗号
|
|
||||||
* – U+2013 EN DASH
|
|
||||||
* — U+2014 EM DASH
|
|
||||||
* − U+2212 MINUS SIGN
|
|
||||||
* ‐ U+2010 HYPHEN
|
|
||||||
* ‑ U+2011 NON-BREAKING HYPHEN
|
|
||||||
*
|
|
||||||
* 若不支持变体连字符,会导致 [19–21] 这种区间引用整段被 preg 漏掉,
|
|
||||||
* 进而丢失对应的 reference_no 校对记录。
|
|
||||||
*/
|
*/
|
||||||
const BLUE_TAG_REGEX = '/<blue>\[([\d,,\-\x{2013}\x{2014}\x{2212}\x{2010}\x{2011}\s]+)\]<\/blue>/u';
|
const BLUE_TAG_REGEX = '/<blue>\[([\d,,\-\x{2013}\x{2014}\x{2212}\x{2010}\x{2011}\s]+)\]<\/blue>/u';
|
||||||
|
const BLUE_TAG_REGEX_BRACKET_OUTSIDE = '/\[<blue>([\d,,\-\x{2013}\x{2014}\x{2212}\x{2010}\x{2011}\s]+)<\/blue>\]/u';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 兼容无 ?? 的 PHP 版本
|
* 兼容无 ?? 的 PHP 版本
|
||||||
@@ -75,6 +77,46 @@ class ReferenceCheckService
|
|||||||
return isset($arr[$key]) ? $arr[$key] : $default;
|
return isset($arr[$key]) ? $arr[$key] : $default;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 合并匹配两种 blue 引用排版,按在正文中的起始位置排序。
|
||||||
|
*
|
||||||
|
* @return array{0: array, 1: array} 同 preg_match_all 的完整匹配与捕获组 1
|
||||||
|
*/
|
||||||
|
private function collectBlueTagMatches($content)
|
||||||
|
{
|
||||||
|
$merged = [];
|
||||||
|
foreach ([self::BLUE_TAG_REGEX, self::BLUE_TAG_REGEX_BRACKET_OUTSIDE] as $pattern) {
|
||||||
|
if (!preg_match_all($pattern, $content, $m, PREG_OFFSET_CAPTURE)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
$count = count($m[0]);
|
||||||
|
for ($i = 0; $i < $count; $i++) {
|
||||||
|
$merged[] = ['full' => $m[0][$i], 'inner' => $m[1][$i]];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
usort($merged, function ($a, $b) {
|
||||||
|
return $a['full'][1] - $b['full'][1];
|
||||||
|
});
|
||||||
|
|
||||||
|
$matches = [[], []];
|
||||||
|
foreach ($merged as $item) {
|
||||||
|
$matches[0][] = $item['full'];
|
||||||
|
$matches[1][] = $item['inner'];
|
||||||
|
}
|
||||||
|
|
||||||
|
return $matches;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** 对两种 blue 引用排版执行 preg_replace */
|
||||||
|
private function pregReplaceBlueTags($subject, $replacement)
|
||||||
|
{
|
||||||
|
$subject = preg_replace(self::BLUE_TAG_REGEX, $replacement, $subject);
|
||||||
|
$subject = preg_replace(self::BLUE_TAG_REGEX_BRACKET_OUTSIDE, $replacement, $subject);
|
||||||
|
|
||||||
|
return $subject;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 单条入队(可手工指定正文与文献文本)
|
* 单条入队(可手工指定正文与文献文本)
|
||||||
*/
|
*/
|
||||||
@@ -115,14 +157,18 @@ class ReferenceCheckService
|
|||||||
return ['check_id' => $checkId, 'queued' => 1];
|
return ['check_id' => $checkId, 'queued' => 1];
|
||||||
}
|
}
|
||||||
public function enqueueByArticleMain($main){
|
public function enqueueByArticleMain($main){
|
||||||
$amId = $main['am_id'];
|
$amId = intval($this->arrGet($main, 'am_id', 0));
|
||||||
// $main = Db::name('article_main')
|
if ($amId > 0 && (!isset($main['type']) || (intval($main['type']) === self::MAIN_TYPE_TABLE && intval($this->arrGet($main, 'amt_id', 0)) <= 0))) {
|
||||||
// ->field('am_id,content,article_id')
|
$dbMain = Db::name('article_main')
|
||||||
// ->where('am_id', $amId)
|
->field('am_id,content,article_id,type,amt_id')
|
||||||
// ->whereIn('state', [0, 2])
|
->where('am_id', $amId)
|
||||||
// ->find();
|
->whereIn('state', [0, 2])
|
||||||
$citations = $this->extractReferences((string)$main['content']);
|
->find();
|
||||||
// return $citations;
|
if (!empty($dbMain)) {
|
||||||
|
$main = array_merge($dbMain, $main);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$citations = $this->extractReferencesForArticleMain($main);
|
||||||
if (empty($citations)) {
|
if (empty($citations)) {
|
||||||
$this->setAmRefCheckStatus($amId, self::AM_STATUS_NONE);
|
$this->setAmRefCheckStatus($amId, self::AM_STATUS_NONE);
|
||||||
return;
|
return;
|
||||||
@@ -222,7 +268,7 @@ class ReferenceCheckService
|
|||||||
$referMap = $this->loadReferMapByPArticleId($pArticleId);
|
$referMap = $this->loadReferMapByPArticleId($pArticleId);
|
||||||
|
|
||||||
$mains = Db::name('article_main')
|
$mains = Db::name('article_main')
|
||||||
->field('am_id,content,article_id')
|
->field('am_id,content,article_id,type,amt_id')
|
||||||
->where('article_id', $articleId)
|
->where('article_id', $articleId)
|
||||||
->whereIn('state', [0, 2])
|
->whereIn('state', [0, 2])
|
||||||
->order('sort asc')
|
->order('sort asc')
|
||||||
@@ -237,7 +283,7 @@ class ReferenceCheckService
|
|||||||
$now = date('Y-m-d H:i:s');
|
$now = date('Y-m-d H:i:s');
|
||||||
foreach ($mains as $main) {
|
foreach ($mains as $main) {
|
||||||
$amId = intval($main['am_id']);
|
$amId = intval($main['am_id']);
|
||||||
$citations = $this->extractReferences((string)$main['content']);
|
$citations = $this->extractReferencesForArticleMain($main);
|
||||||
if (empty($citations)) {
|
if (empty($citations)) {
|
||||||
$this->setAmRefCheckStatus($amId, self::AM_STATUS_NONE);
|
$this->setAmRefCheckStatus($amId, self::AM_STATUS_NONE);
|
||||||
continue;
|
continue;
|
||||||
@@ -309,7 +355,7 @@ class ReferenceCheckService
|
|||||||
$referMap = $this->loadReferMapByPArticleId($pArticleId);
|
$referMap = $this->loadReferMapByPArticleId($pArticleId);
|
||||||
|
|
||||||
$mains = Db::name('article_main')
|
$mains = Db::name('article_main')
|
||||||
->field('am_id,content,article_id')
|
->field('am_id,content,article_id,type,amt_id')
|
||||||
->where('article_id', $articleId)
|
->where('article_id', $articleId)
|
||||||
->whereIn('state', [0, 2])
|
->whereIn('state', [0, 2])
|
||||||
->order('sort asc')
|
->order('sort asc')
|
||||||
@@ -324,7 +370,7 @@ class ReferenceCheckService
|
|||||||
$now = date('Y-m-d H:i:s');
|
$now = date('Y-m-d H:i:s');
|
||||||
foreach ($mains as $main) {
|
foreach ($mains as $main) {
|
||||||
$amId = intval($main['am_id']);
|
$amId = intval($main['am_id']);
|
||||||
$citations = $this->extractReferences((string)$main['content']);
|
$citations = $this->extractReferencesForArticleMain($main);
|
||||||
if (empty($citations)) {
|
if (empty($citations)) {
|
||||||
$this->setAmRefCheckStatus($amId, self::AM_STATUS_NONE);
|
$this->setAmRefCheckStatus($amId, self::AM_STATUS_NONE);
|
||||||
continue;
|
continue;
|
||||||
@@ -429,9 +475,27 @@ class ReferenceCheckService
|
|||||||
return $status;
|
return $status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* t_article_main 是否已加 ref_check_status 列(未迁移时跳过写入,避免 fields not exists)
|
||||||
|
*/
|
||||||
|
private function hasAmRefCheckStatusColumn()
|
||||||
|
{
|
||||||
|
if (self::$amRefCheckStatusColumnExists !== null) {
|
||||||
|
return self::$amRefCheckStatusColumnExists;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
$table = Db::name('article_main')->getTable();
|
||||||
|
$rows = Db::query('SHOW COLUMNS FROM `' . str_replace('`', '``', $table) . '` LIKE \'ref_check_status\'');
|
||||||
|
self::$amRefCheckStatusColumnExists = !empty($rows);
|
||||||
|
} catch (\Exception $e) {
|
||||||
|
self::$amRefCheckStatusColumnExists = false;
|
||||||
|
}
|
||||||
|
return self::$amRefCheckStatusColumnExists;
|
||||||
|
}
|
||||||
|
|
||||||
public function setAmRefCheckStatus($amId, $status)
|
public function setAmRefCheckStatus($amId, $status)
|
||||||
{
|
{
|
||||||
if ($amId <= 0) {
|
if ($amId <= 0 || !$this->hasAmRefCheckStatusColumn()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
Db::name('article_main')->where('am_id', $amId)->update([
|
Db::name('article_main')->where('am_id', $amId)->update([
|
||||||
@@ -472,7 +536,7 @@ class ReferenceCheckService
|
|||||||
->where('p_article_id', $pArticleId)
|
->where('p_article_id', $pArticleId)
|
||||||
->delete();
|
->delete();
|
||||||
|
|
||||||
if ($articleId > 0) {
|
if ($articleId > 0 && $this->hasAmRefCheckStatusColumn()) {
|
||||||
Db::name('article_main')
|
Db::name('article_main')
|
||||||
->where('article_id', $articleId)
|
->where('article_id', $articleId)
|
||||||
->whereIn('state', [0, 2])
|
->whereIn('state', [0, 2])
|
||||||
@@ -498,10 +562,12 @@ class ReferenceCheckService
|
|||||||
}
|
}
|
||||||
|
|
||||||
$deleted = Db::name('article_reference_check_result')->where('article_id', $articleId)->delete();
|
$deleted = Db::name('article_reference_check_result')->where('article_id', $articleId)->delete();
|
||||||
Db::name('article_main')
|
if ($this->hasAmRefCheckStatusColumn()) {
|
||||||
->where('article_id', $articleId)
|
Db::name('article_main')
|
||||||
->whereIn('state', [0, 2])
|
->where('article_id', $articleId)
|
||||||
->update(['ref_check_status' => self::AM_STATUS_NONE]);
|
->whereIn('state', [0, 2])
|
||||||
|
->update(['ref_check_status' => self::AM_STATUS_NONE]);
|
||||||
|
}
|
||||||
|
|
||||||
return intval($deleted);
|
return intval($deleted);
|
||||||
}
|
}
|
||||||
@@ -669,6 +735,68 @@ class ReferenceCheckService
|
|||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 多篇文章并行校对时,查询指定文章前面还有几篇在排队。
|
||||||
|
*
|
||||||
|
* 「正在校对」= 该文至少还有 1 条明细 status=待校验(0)。
|
||||||
|
* 排队顺序:按各文章最早一条待校验明细的 id 升序(与全局入队先后一致)。
|
||||||
|
*
|
||||||
|
* @return array{
|
||||||
|
* p_article_id:int,
|
||||||
|
* running_total:int,
|
||||||
|
* ahead:int,
|
||||||
|
* position:int,
|
||||||
|
* in_queue:bool,
|
||||||
|
* status:int
|
||||||
|
* }
|
||||||
|
*/
|
||||||
|
public function getArticleCheckQueuePositionByPArticleId($pArticleId)
|
||||||
|
{
|
||||||
|
$pArticleId = intval($pArticleId);
|
||||||
|
if ($pArticleId <= 0) {
|
||||||
|
throw new \InvalidArgumentException('p_article_id is required');
|
||||||
|
}
|
||||||
|
|
||||||
|
$rows = Db::name('article_reference_check_result')
|
||||||
|
->field('p_article_id, MIN(id) AS queue_anchor')
|
||||||
|
->where('status', self::RECORD_PENDING)
|
||||||
|
->group('p_article_id')
|
||||||
|
->order('queue_anchor', 'asc')
|
||||||
|
->select();
|
||||||
|
|
||||||
|
$runningIds = [];
|
||||||
|
foreach ($rows as $row) {
|
||||||
|
$aid = intval($this->arrGet($row, 'p_article_id', 0));
|
||||||
|
if ($aid > 0) {
|
||||||
|
$runningIds[] = $aid;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$runningTotal = count($runningIds);
|
||||||
|
$ahead = 0;
|
||||||
|
$position = 0;
|
||||||
|
$inQueue = false;
|
||||||
|
foreach ($runningIds as $idx => $aid) {
|
||||||
|
if ($aid === $pArticleId) {
|
||||||
|
$ahead = $idx;
|
||||||
|
$position = $idx + 1;
|
||||||
|
$inQueue = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$articleStatus = $this->getArticleProgressStatusByPArticleId($pArticleId);
|
||||||
|
|
||||||
|
return [
|
||||||
|
'p_article_id' => $pArticleId,
|
||||||
|
'running_total' => $runningTotal,
|
||||||
|
'ahead' => $inQueue ? $ahead : 0,
|
||||||
|
'position' => $inQueue ? $position : 0,
|
||||||
|
'in_queue' => $inQueue,
|
||||||
|
'status' => intval($this->arrGet($articleStatus, 'status', self::ARTICLE_PROGRESS_NONE)),
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 按 p_article_id 查整篇引用校对进度,按 reference_no 分组聚合状态,并展开每条明细。
|
* 按 p_article_id 查整篇引用校对进度,按 reference_no 分组聚合状态,并展开每条明细。
|
||||||
*
|
*
|
||||||
@@ -820,17 +948,16 @@ class ReferenceCheckService
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 按 p_refer_id 查这条参考文献的所有校对明细。
|
* 按 p_refer_id 查这条参考文献的校对明细与分组进度。
|
||||||
*
|
*
|
||||||
* 每条 record 返回:
|
* 分组进度(与 referenceCheckProgressAI 单条 list 项口径一致):
|
||||||
* - am_id 命中的 article_main 主键
|
* progress_status 0待校验 1校对中 2完成 3失败
|
||||||
* - confidence 匹配置信度(0~1)
|
* pending/done/failed/pass、is_pass、progress_percent
|
||||||
* - reason LLM 给出的判定理由
|
*
|
||||||
* - is_match 是否匹配(来自 article_reference_check_result.is_match)
|
* list 每项:check_id、am_id、status、confidence、reason、is_match、is_pass
|
||||||
* - is_pass 是否通过校验(confidence >= PASS_CONFIDENCE_THRESHOLD)
|
|
||||||
*
|
*
|
||||||
* @param int $pReferId production_article_refer.p_refer_id
|
* @param int $pReferId production_article_refer.p_refer_id
|
||||||
* @return array{p_refer_id:int, p_article_id:int, reference_no:int, total:int, list:array}
|
* @return array
|
||||||
*/
|
*/
|
||||||
public function getCheckDetailsByPReferId($pReferId)
|
public function getCheckDetailsByPReferId($pReferId)
|
||||||
{
|
{
|
||||||
@@ -840,7 +967,7 @@ class ReferenceCheckService
|
|||||||
}
|
}
|
||||||
|
|
||||||
$rows = Db::name('article_reference_check_result')
|
$rows = Db::name('article_reference_check_result')
|
||||||
->field('id,p_article_id,reference_no,am_id,confidence,is_match,reason')
|
->field('id,p_article_id,reference_no,am_id,status,confidence,is_match,reason,updated_at')
|
||||||
->where('p_refer_id', $pReferId)
|
->where('p_refer_id', $pReferId)
|
||||||
->order('id asc')
|
->order('id asc')
|
||||||
->select();
|
->select();
|
||||||
@@ -848,8 +975,13 @@ class ReferenceCheckService
|
|||||||
$list = [];
|
$list = [];
|
||||||
$pArticleId = 0;
|
$pArticleId = 0;
|
||||||
$referenceNo = 0;
|
$referenceNo = 0;
|
||||||
|
$pending = 0;
|
||||||
|
$done = 0;
|
||||||
|
$failed = 0;
|
||||||
|
$pass = 0;
|
||||||
|
$lastUpdatedAt = '';
|
||||||
|
|
||||||
foreach ($rows as $row) {
|
foreach ($rows as $row) {
|
||||||
// 取首条出现的 p_article_id / reference_no 作为该 refer 的上下文
|
|
||||||
if ($pArticleId <= 0) {
|
if ($pArticleId <= 0) {
|
||||||
$pArticleId = intval($this->arrGet($row, 'p_article_id', 0));
|
$pArticleId = intval($this->arrGet($row, 'p_article_id', 0));
|
||||||
}
|
}
|
||||||
@@ -857,22 +989,87 @@ class ReferenceCheckService
|
|||||||
$referenceNo = intval($this->arrGet($row, 'reference_no', 0));
|
$referenceNo = intval($this->arrGet($row, 'reference_no', 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$st = intval($this->arrGet($row, 'status', 0));
|
||||||
|
if ($st === self::RECORD_PENDING) {
|
||||||
|
$pending++;
|
||||||
|
} elseif ($st === self::RECORD_COMPLETED) {
|
||||||
|
$done++;
|
||||||
|
} elseif ($st === self::RECORD_FAILED) {
|
||||||
|
$failed++;
|
||||||
|
}
|
||||||
|
|
||||||
|
$upd = (string)$this->arrGet($row, 'updated_at', '');
|
||||||
|
if ($upd > $lastUpdatedAt) {
|
||||||
|
$lastUpdatedAt = $upd;
|
||||||
|
}
|
||||||
|
|
||||||
$confidence = floatval($this->arrGet($row, 'confidence', 0));
|
$confidence = floatval($this->arrGet($row, 'confidence', 0));
|
||||||
|
$isPass = $confidence >= self::PASS_CONFIDENCE_THRESHOLD;
|
||||||
|
if ($isPass) {
|
||||||
|
$pass++;
|
||||||
|
}
|
||||||
|
|
||||||
$list[] = [
|
$list[] = [
|
||||||
|
'check_id' => intval($this->arrGet($row, 'id', 0)),
|
||||||
'am_id' => intval($this->arrGet($row, 'am_id', 0)),
|
'am_id' => intval($this->arrGet($row, 'am_id', 0)),
|
||||||
|
'status' => $st,
|
||||||
'confidence' => $confidence,
|
'confidence' => $confidence,
|
||||||
'reason' => (string)$this->arrGet($row, 'reason', ''),
|
'reason' => (string)$this->arrGet($row, 'reason', ''),
|
||||||
'is_match' => intval($this->arrGet($row, 'is_match', 0)),
|
'is_match' => intval($this->arrGet($row, 'is_match', 0)),
|
||||||
'is_pass' => $confidence >= self::PASS_CONFIDENCE_THRESHOLD,
|
'is_pass' => $isPass,
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ($referenceNo <= 0) {
|
||||||
|
$refer = Db::name('production_article_refer')
|
||||||
|
->where('p_refer_id', $pReferId)
|
||||||
|
->where('state', 0)
|
||||||
|
->find();
|
||||||
|
if (!empty($refer)) {
|
||||||
|
if ($pArticleId <= 0) {
|
||||||
|
$pArticleId = intval($this->arrGet($refer, 'p_article_id', 0));
|
||||||
|
}
|
||||||
|
$referenceNo = intval($this->arrGet($refer, 'index', 0)) + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$total = count($list);
|
||||||
|
if ($total === 0) {
|
||||||
|
$progressStatus = self::PROGRESS_PENDING;
|
||||||
|
$progressPercent = 0;
|
||||||
|
$isPassGroup = false;
|
||||||
|
} elseif ($pending === $total) {
|
||||||
|
$progressStatus = self::PROGRESS_PENDING;
|
||||||
|
$progressPercent = 0;
|
||||||
|
$isPassGroup = false;
|
||||||
|
} elseif ($pending === 0) {
|
||||||
|
$progressStatus = $failed > 0 ? self::PROGRESS_FAILED : self::PROGRESS_COMPLETED;
|
||||||
|
$progressPercent = 100;
|
||||||
|
$isPassGroup = (
|
||||||
|
$progressStatus === self::PROGRESS_COMPLETED
|
||||||
|
&& $pass === $total
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
$progressStatus = self::PROGRESS_CHECKING;
|
||||||
|
$finished = $done + $failed;
|
||||||
|
$progressPercent = round($finished / $total * 100, 1);
|
||||||
|
$isPassGroup = false;
|
||||||
|
}
|
||||||
|
|
||||||
return [
|
return [
|
||||||
'p_refer_id' => $pReferId,
|
'p_refer_id' => $pReferId,
|
||||||
'p_article_id' => $pArticleId,
|
'p_article_id' => $pArticleId,
|
||||||
'reference_no' => $referenceNo,
|
'reference_no' => $referenceNo,
|
||||||
'total' => count($list),
|
'total' => $total,
|
||||||
'list' => $list,
|
'pending' => $pending,
|
||||||
|
'done' => $done,
|
||||||
|
'failed' => $failed,
|
||||||
|
'pass' => $pass,
|
||||||
|
'progress_status' => $progressStatus,
|
||||||
|
'progress_percent' => $progressPercent,
|
||||||
|
'is_pass' => $isPassGroup,
|
||||||
|
'last_updated_at' => $lastUpdatedAt,
|
||||||
|
'list' => $list,
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1010,8 +1207,12 @@ class ReferenceCheckService
|
|||||||
*/
|
*/
|
||||||
public function buildArticlePreview($articleId, $amId = 0)
|
public function buildArticlePreview($articleId, $amId = 0)
|
||||||
{
|
{
|
||||||
|
$fields = 'am_id,content,sort,type,amt_id';
|
||||||
|
if ($this->hasAmRefCheckStatusColumn()) {
|
||||||
|
$fields .= ',ref_check_status';
|
||||||
|
}
|
||||||
$q = Db::name('article_main')
|
$q = Db::name('article_main')
|
||||||
->field('am_id,content,sort,ref_check_status')
|
->field($fields)
|
||||||
->where('article_id', $articleId)
|
->where('article_id', $articleId)
|
||||||
->whereIn('state', [0, 2]);
|
->whereIn('state', [0, 2]);
|
||||||
if ($amId > 0) {
|
if ($amId > 0) {
|
||||||
@@ -1039,7 +1240,7 @@ class ReferenceCheckService
|
|||||||
|
|
||||||
foreach ($mains as $main) {
|
foreach ($mains as $main) {
|
||||||
$id = intval($main['am_id']);
|
$id = intval($main['am_id']);
|
||||||
$content = (string)$main['content'];
|
$content = $this->resolveArticleMainCheckContent($main);
|
||||||
$badIndex = isset($badByAm[$id]) ? $badByAm[$id] : array();
|
$badIndex = isset($badByAm[$id]) ? $badByAm[$id] : array();
|
||||||
$marked = $this->markContentForPreview($content, $id, $badIndex);
|
$marked = $this->markContentForPreview($content, $id, $badIndex);
|
||||||
$amStatus = intval($this->arrGet($main, 'ref_check_status', 0));
|
$amStatus = intval($this->arrGet($main, 'ref_check_status', 0));
|
||||||
@@ -1158,12 +1359,7 @@ class ReferenceCheckService
|
|||||||
$html = $content;
|
$html = $content;
|
||||||
|
|
||||||
// 1) 先标记 blue 内各序号(在原文上操作,[70-73] 仅标不合理者如 70、71)
|
// 1) 先标记 blue 内各序号(在原文上操作,[70-73] 仅标不合理者如 70、71)
|
||||||
preg_match_all(
|
$matches = $this->collectBlueTagMatches($html);
|
||||||
self::BLUE_TAG_REGEX,
|
|
||||||
$html,
|
|
||||||
$matches,
|
|
||||||
PREG_OFFSET_CAPTURE
|
|
||||||
);
|
|
||||||
$citeDeltas = [];
|
$citeDeltas = [];
|
||||||
if (!empty($matches[0])) {
|
if (!empty($matches[0])) {
|
||||||
$replacements = [];
|
$replacements = [];
|
||||||
@@ -1318,14 +1514,6 @@ class ReferenceCheckService
|
|||||||
return implode("\n", $parts);
|
return implode("\n", $parts);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* 前端修改参考文献后重新校对:仅处理已有校对记录,刷新 refer_text、重置结果并入队;无记录直接返回
|
|
||||||
*
|
|
||||||
* @param int $articleId
|
|
||||||
* @param int $pReferId t_production_article_refer.p_refer_id(优先)
|
|
||||||
* @param int $referenceNo 文献序号 index+1(无 p_refer_id 时用)
|
|
||||||
* @return array
|
|
||||||
*/
|
|
||||||
/**
|
/**
|
||||||
* 编辑某条文献内容后,按 p_refer_id 异步重新校对该文献对应的全部 check 明细
|
* 编辑某条文献内容后,按 p_refer_id 异步重新校对该文献对应的全部 check 明细
|
||||||
*
|
*
|
||||||
@@ -1387,7 +1575,7 @@ class ReferenceCheckService
|
|||||||
'refer_text' => $referText,
|
'refer_text' => $referText,
|
||||||
'refer_index' => $referenceNo,
|
'refer_index' => $referenceNo,
|
||||||
'reference_no' => $referenceNo,
|
'reference_no' => $referenceNo,
|
||||||
'status' => 0,
|
'status' => self::RECORD_PENDING,
|
||||||
'is_match' => 0,
|
'is_match' => 0,
|
||||||
'can_support' => 0,
|
'can_support' => 0,
|
||||||
'confidence' => 0,
|
'confidence' => 0,
|
||||||
@@ -1401,7 +1589,6 @@ class ReferenceCheckService
|
|||||||
foreach ($rows as $row) {
|
foreach ($rows as $row) {
|
||||||
$checkId = $this->resolveCheckRowId($row);
|
$checkId = $this->resolveCheckRowId($row);
|
||||||
Db::name('article_reference_check_result')->where('id', $checkId)->update($resetFields);
|
Db::name('article_reference_check_result')->where('id', $checkId)->update($resetFields);
|
||||||
// 旧的队列完成标记必须清掉,否则同 check_id 再次投递会被 acquireLock 静默丢弃
|
|
||||||
$this->clearReferenceCheckQueueLock($checkId);
|
$this->clearReferenceCheckQueueLock($checkId);
|
||||||
$pendingJobs[] = [
|
$pendingJobs[] = [
|
||||||
'check_id' => $checkId,
|
'check_id' => $checkId,
|
||||||
@@ -1432,6 +1619,92 @@ class ReferenceCheckService
|
|||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 某条参考文献下「校对失败」的明细重新校对(仅 status=RECORD_FAILED,异步入队)
|
||||||
|
*
|
||||||
|
* 不刷新 refer_text / reference_no,沿用记录内已有正文与文献快照,只重置结果字段后入队。
|
||||||
|
*
|
||||||
|
* @param int $pReferId t_production_article_refer.p_refer_id(必填)
|
||||||
|
* @param int $pArticleId 可选,进一步限定文章
|
||||||
|
* @return array{p_refer_id:int, p_article_id:int, reset:int, queued:int, check_ids:int[], queue:string}
|
||||||
|
*/
|
||||||
|
public function enqueueRecheckFailedByPReferId($pReferId, $pArticleId = 0)
|
||||||
|
{
|
||||||
|
$pReferId = intval($pReferId);
|
||||||
|
if ($pReferId <= 0) {
|
||||||
|
throw new \InvalidArgumentException('p_refer_id is required');
|
||||||
|
}
|
||||||
|
|
||||||
|
$q = Db::name('article_reference_check_result')
|
||||||
|
->where('p_refer_id', $pReferId)
|
||||||
|
->where('status', self::RECORD_FAILED);
|
||||||
|
$pArticleId = intval($pArticleId);
|
||||||
|
if ($pArticleId > 0) {
|
||||||
|
$q->where('p_article_id', $pArticleId);
|
||||||
|
}
|
||||||
|
|
||||||
|
$rows = $q->select();
|
||||||
|
|
||||||
|
if (empty($rows)) {
|
||||||
|
return [
|
||||||
|
'p_refer_id' => $pReferId,
|
||||||
|
'p_article_id' => $pArticleId,
|
||||||
|
'reset' => 0,
|
||||||
|
'queued' => 0,
|
||||||
|
'check_ids' => [],
|
||||||
|
'queue' => self::QUEUE_NAME,
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($pArticleId <= 0) {
|
||||||
|
$pArticleId = intval($this->arrGet($rows[0], 'p_article_id', 0));
|
||||||
|
}
|
||||||
|
|
||||||
|
$now = date('Y-m-d H:i:s');
|
||||||
|
$resetFields = [
|
||||||
|
'status' => self::RECORD_PENDING,
|
||||||
|
'is_match' => 0,
|
||||||
|
'can_support' => 0,
|
||||||
|
'confidence' => 0,
|
||||||
|
'reason' => '',
|
||||||
|
'error_msg' => '',
|
||||||
|
'updated_at' => $now,
|
||||||
|
];
|
||||||
|
|
||||||
|
$pendingJobs = [];
|
||||||
|
$amIds = [];
|
||||||
|
foreach ($rows as $row) {
|
||||||
|
$checkId = $this->resolveCheckRowId($row);
|
||||||
|
Db::name('article_reference_check_result')->where('id', $checkId)->update($resetFields);
|
||||||
|
$this->clearReferenceCheckQueueLock($checkId);
|
||||||
|
$pendingJobs[] = [
|
||||||
|
'check_id' => $checkId,
|
||||||
|
'reference_no' => intval($this->arrGet($row, 'reference_no', 0)),
|
||||||
|
'am_id' => intval($this->arrGet($row, 'am_id', 0)),
|
||||||
|
'text_start' => intval($this->arrGet($row, 'text_start', 0)),
|
||||||
|
];
|
||||||
|
$amId = intval($this->arrGet($row, 'am_id', 0));
|
||||||
|
if ($amId > 0) {
|
||||||
|
$amIds[$amId] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach (array_keys($amIds) as $amId) {
|
||||||
|
$this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING);
|
||||||
|
}
|
||||||
|
|
||||||
|
$checkIds = $this->pushJobsSortedByReferenceNo($pendingJobs);
|
||||||
|
|
||||||
|
return [
|
||||||
|
'p_refer_id' => $pReferId,
|
||||||
|
'p_article_id' => $pArticleId,
|
||||||
|
'reset' => count($rows),
|
||||||
|
'queued' => count($checkIds),
|
||||||
|
'check_ids' => $checkIds,
|
||||||
|
'queue' => self::QUEUE_NAME,
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
public function recheckByRefer($articleId, $pReferId = 0, $referenceNo = 0)
|
public function recheckByRefer($articleId, $pReferId = 0, $referenceNo = 0)
|
||||||
{
|
{
|
||||||
$articleId = intval($articleId);
|
$articleId = intval($articleId);
|
||||||
@@ -1600,9 +1873,9 @@ class ReferenceCheckService
|
|||||||
if ($contentA === '' || $contentB === '') {
|
if ($contentA === '' || $contentB === '') {
|
||||||
$this->updateCheckResult($checkId, [
|
$this->updateCheckResult($checkId, [
|
||||||
'status' => self::RECORD_FAILED,
|
'status' => self::RECORD_FAILED,
|
||||||
'error_msg' => 'Missing article_main.content or refer_text',
|
'error_msg' => 'Missing section content (text/table) or refer_text',
|
||||||
]);
|
]);
|
||||||
throw new \RuntimeException('Missing article_main.content or refer_text');
|
throw new \RuntimeException('Missing section content (text/table) or refer_text');
|
||||||
}
|
}
|
||||||
|
|
||||||
$llmResult = (new LLMService())->checkReference($contentA, $contentB, false);
|
$llmResult = (new LLMService())->checkReference($contentA, $contentB, false);
|
||||||
@@ -1748,7 +2021,7 @@ class ReferenceCheckService
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 第一次校对:取 article_main.content(整节正文)
|
* 第一次校对:正文取 article_main.content;表格(type=2)取 article_main_table.table_data 等
|
||||||
*/
|
*/
|
||||||
public function resolveMainContentForJob(array $row, $maxChars = 8000)
|
public function resolveMainContentForJob(array $row, $maxChars = 8000)
|
||||||
{
|
{
|
||||||
@@ -1757,23 +2030,280 @@ class ReferenceCheckService
|
|||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
$main = Db::name('article_main')
|
$main = Db::name('article_main')
|
||||||
->field('content')
|
->field('content,type,amt_id,article_id')
|
||||||
->where('am_id', $amId)
|
->where('am_id', $amId)
|
||||||
->find();
|
->find();
|
||||||
if (empty($main)) {
|
if (empty($main)) {
|
||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
|
|
||||||
$text = trim((string)$this->arrGet($main, 'content', ''));
|
$raw = trim($this->resolveArticleMainCheckContent($main));
|
||||||
if ($text === '') {
|
if ($raw === '') {
|
||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
|
|
||||||
$text = preg_replace(self::BLUE_TAG_REGEX, '[$1]', $text);
|
return $this->normalizeCheckContentForLlm($raw, $maxChars);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 是否为表格节:type=2、有 amt_id,或 content 为 <table tableId='…'/> 占位
|
||||||
|
*/
|
||||||
|
private function isArticleMainTableSection(array $main)
|
||||||
|
{
|
||||||
|
if (intval($this->arrGet($main, 'type', self::MAIN_TYPE_TEXT)) === self::MAIN_TYPE_TABLE) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (intval($this->arrGet($main, 'amt_id', 0)) > 0) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
$content = (string)$this->arrGet($main, 'content', '');
|
||||||
|
|
||||||
|
return stripos($content, '<table') !== false
|
||||||
|
&& preg_match('/tableId\s*=\s*[\'"]?\d+/i', $content);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 从 article_main 或 content 占位解析 amt_id
|
||||||
|
*/
|
||||||
|
private function resolveArticleMainTableAmtId(array $main)
|
||||||
|
{
|
||||||
|
$amtId = intval($this->arrGet($main, 'amt_id', 0));
|
||||||
|
if ($amtId > 0) {
|
||||||
|
return $amtId;
|
||||||
|
}
|
||||||
|
$content = (string)$this->arrGet($main, 'content', '');
|
||||||
|
if (preg_match('/tableId\s*=\s*[\'"]?(\d+)/i', $content, $m)) {
|
||||||
|
return intval($m[1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return array|null
|
||||||
|
*/
|
||||||
|
private function loadArticleMainTableRow(array $main)
|
||||||
|
{
|
||||||
|
$amtId = $this->resolveArticleMainTableAmtId($main);
|
||||||
|
if ($amtId <= 0) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
$q = Db::name('article_main_table')
|
||||||
|
->where('amt_id', $amtId)
|
||||||
|
->whereIn('state', [0, 2])
|
||||||
|
->field('table_data,title,note');
|
||||||
|
$articleId = intval($this->arrGet($main, 'article_id', 0));
|
||||||
|
if ($articleId > 0) {
|
||||||
|
$q->where('article_id', $articleId);
|
||||||
|
}
|
||||||
|
$tbl = $q->find();
|
||||||
|
|
||||||
|
return empty($tbl) ? null : $tbl;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 按节提取引用:正文走 content;表格按行拼接单元格后扫描(Study 列仅 [n] 时也能带上同行上下文)
|
||||||
|
*/
|
||||||
|
public function extractReferencesForArticleMain(array $main)
|
||||||
|
{
|
||||||
|
if (!$this->isArticleMainTableSection($main)) {
|
||||||
|
return $this->extractReferences((string)$this->arrGet($main, 'content', ''));
|
||||||
|
}
|
||||||
|
|
||||||
|
$tbl = $this->loadArticleMainTableRow($main);
|
||||||
|
if (empty($tbl)) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
$extra = [];
|
||||||
|
foreach (['title', 'note'] as $field) {
|
||||||
|
$part = trim((string)$this->arrGet($tbl, $field, ''));
|
||||||
|
if ($part !== '') {
|
||||||
|
$extra[] = $part;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return $this->extractReferencesFromTableDataJson(
|
||||||
|
(string)$this->arrGet($tbl, 'table_data', ''),
|
||||||
|
$extra
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* table_data 按行提取;$prefixChunks 为 title/note 等(在表格行之前扫描)
|
||||||
|
*/
|
||||||
|
public function extractReferencesFromTableDataJson($tableDataJson, array $prefixChunks = [])
|
||||||
|
{
|
||||||
|
$result = [];
|
||||||
|
$offset = 0;
|
||||||
|
|
||||||
|
foreach ($prefixChunks as $chunk) {
|
||||||
|
$chunk = trim((string)$chunk);
|
||||||
|
if ($chunk === '') {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
foreach ($this->extractReferences($chunk) as $cite) {
|
||||||
|
$cite['text_start'] = intval($cite['text_start']) + $offset;
|
||||||
|
$cite['text_end'] = intval($cite['text_end']) + $offset;
|
||||||
|
$cite['reference_start'] = intval($cite['reference_start']) + $offset;
|
||||||
|
$cite['reference_end'] = intval($cite['reference_end']) + $offset;
|
||||||
|
$result[] = $cite;
|
||||||
|
}
|
||||||
|
$offset += strlen($chunk) + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
$tableDataJson = trim((string)$tableDataJson);
|
||||||
|
if ($tableDataJson === '') {
|
||||||
|
return $result;
|
||||||
|
}
|
||||||
|
|
||||||
|
$decoded = $this->decodeTableDataJsonToArray($tableDataJson);
|
||||||
|
if ($decoded === null) {
|
||||||
|
foreach ($this->extractReferences($tableDataJson) as $cite) {
|
||||||
|
$cite['text_start'] = intval($cite['text_start']) + $offset;
|
||||||
|
$cite['text_end'] = intval($cite['text_end']) + $offset;
|
||||||
|
$cite['reference_start'] = intval($cite['reference_start']) + $offset;
|
||||||
|
$cite['reference_end'] = intval($cite['reference_end']) + $offset;
|
||||||
|
$result[] = $cite;
|
||||||
|
}
|
||||||
|
|
||||||
|
return $result;
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach ($decoded as $row) {
|
||||||
|
$line = $this->buildTableRowCheckLine($row);
|
||||||
|
if ($line === '') {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
foreach ($this->extractReferences($line) as $cite) {
|
||||||
|
$cite['text_start'] = intval($cite['text_start']) + $offset;
|
||||||
|
$cite['text_end'] = intval($cite['text_end']) + $offset;
|
||||||
|
$cite['reference_start'] = intval($cite['reference_start']) + $offset;
|
||||||
|
$cite['reference_end'] = intval($cite['reference_end']) + $offset;
|
||||||
|
$result[] = $cite;
|
||||||
|
}
|
||||||
|
$offset += strlen($line) + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return $result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 入队/LLM 用的原始 HTML:type=0 为 content;表格为 table_data 按行展平
|
||||||
|
*/
|
||||||
|
public function resolveArticleMainCheckContent(array $main)
|
||||||
|
{
|
||||||
|
if (!$this->isArticleMainTableSection($main)) {
|
||||||
|
return (string)$this->arrGet($main, 'content', '');
|
||||||
|
}
|
||||||
|
|
||||||
|
$tbl = $this->loadArticleMainTableRow($main);
|
||||||
|
if (empty($tbl)) {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
|
$chunks = [];
|
||||||
|
foreach (['title', 'note'] as $field) {
|
||||||
|
$part = trim((string)$this->arrGet($tbl, $field, ''));
|
||||||
|
if ($part !== '') {
|
||||||
|
$chunks[] = $part;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$flat = $this->flattenTableDataJsonToCheckContent((string)$this->arrGet($tbl, 'table_data', ''));
|
||||||
|
if ($flat !== '') {
|
||||||
|
$chunks[] = $flat;
|
||||||
|
}
|
||||||
|
|
||||||
|
return implode("\n", $chunks);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 表格一行:各单元格 text 用 " | " 连接(保留同行化学名/部位/Study 列引用)
|
||||||
|
*/
|
||||||
|
private function buildTableRowCheckLine($row)
|
||||||
|
{
|
||||||
|
if (!is_array($row)) {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
$cells = [];
|
||||||
|
foreach ($row as $cell) {
|
||||||
|
if (!is_array($cell)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
$text = trim((string)$this->arrGet($cell, 'text', ''));
|
||||||
|
if ($text !== '') {
|
||||||
|
$cells[] = $text;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return implode(' | ', $cells);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* table_data 按行展平(供 LLM / 预览);非法 JSON 时按整串处理
|
||||||
|
*/
|
||||||
|
private function flattenTableDataJsonToCheckContent($tableDataJson)
|
||||||
|
{
|
||||||
|
$tableDataJson = trim((string)$tableDataJson);
|
||||||
|
if ($tableDataJson === '') {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
$decoded = $this->decodeTableDataJsonToArray($tableDataJson);
|
||||||
|
if ($decoded === null) {
|
||||||
|
return $tableDataJson;
|
||||||
|
}
|
||||||
|
|
||||||
|
$lines = [];
|
||||||
|
foreach ($decoded as $row) {
|
||||||
|
$line = $this->buildTableRowCheckLine($row);
|
||||||
|
if ($line !== '') {
|
||||||
|
$lines[] = $line;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return implode("\n", $lines);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return array|null
|
||||||
|
*/
|
||||||
|
private function decodeTableDataJsonToArray($raw)
|
||||||
|
{
|
||||||
|
$raw = trim((string)$raw);
|
||||||
|
if ($raw === '') {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if (preg_match('/^\xEF\xBB\xBF/', $raw)) {
|
||||||
|
$raw = substr($raw, 3);
|
||||||
|
}
|
||||||
|
$decoded = json_decode($raw, true);
|
||||||
|
if (json_last_error() !== JSON_ERROR_NONE) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if (is_array($decoded)) {
|
||||||
|
return $decoded;
|
||||||
|
}
|
||||||
|
if (is_string($decoded)) {
|
||||||
|
$decoded2 = json_decode($decoded, true);
|
||||||
|
if (json_last_error() === JSON_ERROR_NONE && is_array($decoded2)) {
|
||||||
|
return $decoded2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private function normalizeCheckContentForLlm($raw, $maxChars = 8000)
|
||||||
|
{
|
||||||
|
$text = $this->pregReplaceBlueTags($raw, '[$1]');
|
||||||
$text = strip_tags($text);
|
$text = strip_tags($text);
|
||||||
$text = html_entity_decode($text, ENT_QUOTES | ENT_HTML5, 'UTF-8');
|
$text = html_entity_decode($text, ENT_QUOTES | ENT_HTML5, 'UTF-8');
|
||||||
$text = preg_replace('/\s+/u', ' ', $text);
|
$text = preg_replace('/\s+/u', ' ', $text);
|
||||||
$text = trim($text);
|
$text = trim($text);
|
||||||
|
if ($text === '') {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
$maxChars = max(500, intval($maxChars));
|
$maxChars = max(500, intval($maxChars));
|
||||||
if (mb_strlen($text) > $maxChars) {
|
if (mb_strlen($text) > $maxChars) {
|
||||||
@@ -2134,12 +2664,12 @@ class ReferenceCheckService
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 从 article_main.content 提取 blue 引用
|
* 从正文 HTML 或表格展平后的 HTML 提取 blue 引用
|
||||||
*/
|
*/
|
||||||
public function extractReferences($content)
|
public function extractReferences($content)
|
||||||
{
|
{
|
||||||
$result = [];
|
$result = [];
|
||||||
preg_match_all(self::BLUE_TAG_REGEX, $content, $matches, PREG_OFFSET_CAPTURE);
|
$matches = $this->collectBlueTagMatches($content);
|
||||||
if (empty($matches[0])) {
|
if (empty($matches[0])) {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
@@ -2319,7 +2849,7 @@ class ReferenceCheckService
|
|||||||
private function buildCitationContextText($content, $start, $end)
|
private function buildCitationContextText($content, $start, $end)
|
||||||
{
|
{
|
||||||
$text = $this->byteSubstr($content, $start, $end);
|
$text = $this->byteSubstr($content, $start, $end);
|
||||||
$text = preg_replace(self::BLUE_TAG_REGEX, '', $text);
|
$text = $this->pregReplaceBlueTags($text, '');
|
||||||
$text = trim(strip_tags($text));
|
$text = trim(strip_tags($text));
|
||||||
$text = preg_replace('/\s+/u', ' ', $text);
|
$text = preg_replace('/\s+/u', ' ', $text);
|
||||||
$text = ltrim($text, "\xEF\xBB\xBF");
|
$text = ltrim($text, "\xEF\xBB\xBF");
|
||||||
@@ -2505,7 +3035,7 @@ class ReferenceCheckService
|
|||||||
}
|
}
|
||||||
|
|
||||||
$gap = substr($content, $tagEnd, $end - $tagEnd);
|
$gap = substr($content, $tagEnd, $end - $tagEnd);
|
||||||
$gapText = trim(strip_tags(preg_replace(self::BLUE_TAG_REGEX, '', $gap)));
|
$gapText = trim(strip_tags($this->pregReplaceBlueTags($gap, '')));
|
||||||
if ($gapText !== '' && !$this->isOnlyPunctuationOrSpace($gapText)) {
|
if ($gapText !== '' && !$this->isOnlyPunctuationOrSpace($gapText)) {
|
||||||
return $end;
|
return $end;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ return [
|
|||||||
'hostname' => 'localhost',
|
'hostname' => 'localhost',
|
||||||
// 'hostname' => 'ec2-13-229-30-239.ap-southeast-1.compute.amazonaws.com',
|
// 'hostname' => 'ec2-13-229-30-239.ap-southeast-1.compute.amazonaws.com',
|
||||||
// 数据库名
|
// 数据库名
|
||||||
'database' => 'tougao',
|
'database' => 'tougao2',
|
||||||
// 用户名
|
// 用户名
|
||||||
// 'username' => 'tmradmin',
|
// 'username' => 'tmradmin',
|
||||||
'username' => 'root',
|
'username' => 'root',
|
||||||
|
|||||||
Reference in New Issue
Block a user