参考文献校对升级

This commit is contained in:
wyn
2026-06-29 10:23:27 +08:00
parent edb3c1b27b
commit 6fdc4efb6f
11 changed files with 2680 additions and 380 deletions

View File

@@ -1178,6 +1178,107 @@ class Base extends Controller
return $ids;
}
/**
* 解析方括号引用内层(如 1,2 / 3-5展开为文献序号列表。
*
* @return int[]
*/
protected function expandCitationBracketNumbers(string $referencePart): array
{
$referencePart = trim($referencePart);
if ($referencePart === '') {
return [];
}
$referencePart = str_replace(
['', '', '—', '', '', ''],
[',', '-', '-', '-', '-', '-'],
$referencePart
);
$out = [];
$segments = preg_split('/\s*,\s*/', $referencePart);
foreach ($segments as $seg) {
$seg = trim((string)$seg);
if ($seg === '') {
continue;
}
$seg = str_replace(['', '—', '', '', ''], '-', $seg);
if (preg_match('/^(\d+)\s*-\s*(\d+)$/', $seg, $m)) {
$a = intval($m[1]);
$b = intval($m[2]);
if ($a > $b) {
$t = $a;
$a = $b;
$b = $t;
}
for ($i = $a; $i <= $b; $i++) {
$out[] = $i;
}
} else {
$n = intval($seg);
if ($n > 0) {
$out[] = $n;
}
}
}
return $out;
}
/**
* 从正文片段提取被引用的文献序号reference_no = index+1
* 兼容 <mycite data-id="p_refer_id"> 与 <blue>[n]</blue> / [n] 两种形态。
*
* @return int[]
*/
protected function extractCitationRefNosFromMainContent(string $text, int $pArticleId = 0): array
{
if ($text === '') {
return [];
}
$nos = [];
$pReferIds = $this->extractMyciteIds($text);
if (!empty($pReferIds) && $pArticleId > 0) {
$refers = Db::name('production_article_refer')
->where('p_article_id', $pArticleId)
->whereIn('p_refer_id', $pReferIds)
->where('state', 0)
->field('p_refer_id,index')
->select();
$idToNo = [];
foreach ($refers as $row) {
$idToNo[intval($row['p_refer_id'])] = intval($row['index']) + 1;
}
foreach ($pReferIds as $pid) {
if (isset($idToNo[$pid])) {
$nos[] = $idToNo[$pid];
}
}
}
if (preg_match_all('/(?:<\s*blue[^>]*>)?\[([^\]]+)\](?:<\/\s*blue\s*>)?/iu', $text, $m)) {
foreach ($m[1] as $inner) {
$innerNorm = str_replace(
['', '', '—', '', '', ''],
[',', '-', '-', '-', '-', '-'],
trim((string)$inner)
);
if (!preg_match('/^[\d\s,\-]+$/u', $innerNorm)) {
continue;
}
foreach ($this->expandCitationBracketNumbers($innerNorm) as $n) {
if ($n > 0) {
$nos[] = $n;
}
}
}
}
$nos = array_values(array_unique($nos));
sort($nos, SORT_NUMERIC);
return $nos;
}
/**
* table_data二维数组 JSON [[{text,colspan,rowspan},...],...];支持双重 JSON 字符串编码。
*

View File

@@ -7,7 +7,7 @@ use think\Env;
use think\Queue;
use think\Validate;
use app\common\CrossrefService;
use app\common\ReferenceCheckService;
use app\common\ReferenceRelevanceCheckService;
class Preaccept extends Base
{
@@ -27,7 +27,7 @@ class Preaccept extends Base
return;
}
try {
(new ReferenceCheckService())->clearArticleChecksByPArticleId($pArticleId);
(new ReferenceRelevanceCheckService())->clearArticleChecksByPArticleId($pArticleId);
} catch (\Exception $e) {
\think\Log::error(
'resetArticleChecksOnReferChange[' . $sourceTag . '] p_article_id='
@@ -1220,6 +1220,14 @@ class Preaccept extends Base
$insert['ctime'] = time();
$this->article_main_log_obj->insert($insert);
// $articleId = intval($am_info['article_id']);
// $amId = intval($data['am_id']);
//
// // 本段引用集合变化(如 10,11 → 11,12时仅清空该 am_id 下的校对明细
// if ($this->hasMainCitationChange($old_content, $new_raw_content, $articleId)) {
// $this->clearMainChecksOnCitationChange($articleId, $amId);
// }
// 判断是否存在“引用删除”(新 content 相对旧 content 缺少 <mycite>
$hasCitationDeletion = $this->hasMyciteDeletion($old_content, $new_raw_content);
@@ -1245,6 +1253,39 @@ class Preaccept extends Base
//返回更新数据 20260119 end
}
/**
* 正文单节保存后,仅清空该 am_id 下已有的引用校对明细(按 article_id 定位)。
*/
private function clearMainChecksOnCitationChange(int $articleId, int $amId)
{
if ($articleId <= 0 || $amId <= 0) {
return;
}
try {
(new ReferenceCheckService())->clearChecksByAmId($articleId, $amId);
} catch (\Exception $e) {
\think\Log::error(
'clearMainChecksOnCitationChange article_id=' . $articleId
. ' am_id=' . $amId . ' ' . $e->getMessage()
);
}
}
/**
* 本段正文引用集合是否变化(增删改任一即 true
* old 多为库内 <blue>[n]</blue>new 多为编辑器提交的 <mycite data-id="p_refer_id">。
*/
private function hasMainCitationChange(string $oldContent, string $newContent, int $articleId): bool
{
$pArticleId = intval(Db::name('production_article')
->where('article_id', $articleId)
->whereIn('state', [0, 2])
->value('p_article_id'));
$oldNos = $this->extractCitationRefNosFromMainContent($oldContent, $pArticleId);
$newNos = $this->extractCitationRefNosFromMainContent($newContent, $pArticleId);
return $oldNos !== $newNos;
}
/**
* 是否发生 <mycite> 删除new 相对 old 少了任意引用 id
*/

View File

@@ -12,6 +12,8 @@ use think\Db;
use think\Env;
use think\Queue;
use app\common\ReferenceCheckService;
use app\common\ReferenceRelevanceCheckService;
use app\common\DbReconnectHelper;
/**
* @title 参考文献
* @description 相关方法汇总
@@ -1309,11 +1311,195 @@ class References extends Base
}
return json_encode(['status' => 8,'msg' => 'fail']);
}
// ============================================================
// 参考文献「主题相关性」校对独立模块RabbitMQ 链式消费)
// 表t_article_reference_relevance_check_result / t_article_reference_relevance_check_batch
// 消费php think reference_relevance:mq-consume
// ============================================================
/**
* 参考文献第一次校对
* 启动整篇参考文献相关性校对
* POST: p_article_id必填
*
* 文献摘要/内容优先读 t_production_article_refer.abstract_text、refer_content_cleaned
* 二者都为空时在校对执行阶段抓取并回写 refer 表,校对时始终从 refer 表读取。
*/
public function allReferenceCheckAI()
{
$aParam = $this->request->post();
$iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']);
if ($iPArticleId <= 0) {
return jsonError('Please select an article');
}
$aProductionArticle = Db::name('production_article')
->field('p_article_id,article_id')
->where(['p_article_id' => $iPArticleId, 'state' => ['in', [0, 2]]])
->find();
if (empty($aProductionArticle)) {
return jsonError('No articles found');
}
if ($this->checkReferStatus($iPArticleId) == 0) {
return jsonError('Please correct the reference content before running the check.');
}
$existing = Db::name('article_reference_relevance_check_result')
->where('p_article_id', $iPArticleId)
->count();
if (intval($existing) > 0) {
return jsonError('This article already has relevance check records. Use referenceRelevanceCheckResetAI to rerun.');
}
try {
DbReconnectHelper::ensure();
$result = (new ReferenceRelevanceCheckService())->enqueueByPArticle($aProductionArticle);
if (empty($result['check_ids'])) {
return jsonError('No reference citations were found in the article.');
}
return jsonSuccess($result);
} catch (\Exception $e) {
return jsonError($e->getMessage());
}
}
/**
* 相关性校对进度
* POST: p_article_id
*/
public function referenceRelevanceCheckProgressAI()
{
$aParam = $this->request->post();
$iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']);
if ($iPArticleId <= 0) {
return jsonError('p_article_id is required');
}
try {
$result = (new ReferenceRelevanceCheckService())->getProgressByPArticleId($iPArticleId);
return jsonSuccess($result);
} catch (\Exception $e) {
return jsonError($e->getMessage());
}
}
/**
* 按 p_article_id 查整篇文章相关性校对总状态(用于前端按钮分流)
*
* POST/GET: p_article_id必填
*
* 返回 status0=未校对 1=校对中 2=校对完成
* 计数维度为参考文献(按 reference_no 分组),与 referenceRelevanceCheckProgressAI 一致。
*/
public function referenceRelevanceCheckArticleStatusAI()
{
$aParam = $this->request->post();
if (empty($aParam)) {
$aParam = $this->request->param();
}
$iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']);
if ($iPArticleId <= 0) {
return jsonError('p_article_id is required');
}
try {
$result = (new ReferenceRelevanceCheckService())->getArticleProgressStatusByPArticleId($iPArticleId);
return jsonSuccess($result);
} catch (\Exception $e) {
return jsonError($e->getMessage());
}
}
/**
* 按 p_refer_id 查相关性校对明细
* POST: p_refer_id
*/
public function referenceRelevanceCheckDetailsAI()
{
$aParam = $this->request->post();
$iPReferId = empty($aParam['p_refer_id']) ? 0 : intval($aParam['p_refer_id']);
if ($iPReferId <= 0) {
return jsonError('p_refer_id is required');
}
try {
$result = (new ReferenceRelevanceCheckService())->getDetailsByPReferId($iPReferId);
return jsonSuccess($result);
} catch (\Exception $e) {
return jsonError($e->getMessage());
}
}
/**
* 清空并重新执行相关性校对
* POST: p_article_id
*/
public function referenceRelevanceCheckResetAI()
{
$aParam = $this->request->post();
$iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']);
if ($iPArticleId <= 0) {
return jsonError('Please select an article');
}
$aProductionArticle = Db::name('production_article')
->field('p_article_id,article_id')
->where(['p_article_id' => $iPArticleId, 'state' => ['in', [0, 2]]])
->find();
if (empty($aProductionArticle)) {
return jsonError('No articles found');
}
if ($this->checkReferStatus($iPArticleId) == 0) {
return jsonError('Please correct the reference content before running the check.');
}
try {
$result = (new ReferenceRelevanceCheckService())->resetAndRecheckByArticle($aProductionArticle);
return jsonSuccess($result);
} catch (\Exception $e) {
return jsonError($e->getMessage());
}
}
/**
* 仅清空相关性校对记录(不重跑)
* POST: p_article_id
*/
public function referenceRelevanceCheckClearAI()
{
$aParam = $this->request->post();
$iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']);
if ($iPArticleId <= 0) {
return jsonError('p_article_id is required');
}
try {
$deleted = (new ReferenceRelevanceCheckService())->clearByPArticleId($iPArticleId);
return jsonSuccess(['p_article_id' => $iPArticleId, 'deleted' => intval($deleted)]);
} catch (\Exception $e) {
return jsonError($e->getMessage());
}
}
/**
* 仅重跑相关性 status=0 的记录(不清空,不抓摘要,不清洗文献内容)
* POST: p_article_id
*/
public function referenceRelevanceCheckRecheckPendingAI()
{
$aParam = $this->request->post();
$iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']);
if ($iPArticleId <= 0) {
return jsonError('p_article_id is required');
}
try {
$result = (new ReferenceRelevanceCheckService())->recheckPendingOnlyByArticle($iPArticleId);
return jsonSuccess($result);
} catch (\Exception $e) {
return jsonError($e->getMessage());
}
}
/**
* 参考文献第一次校对(支撑力度)
* @return \think\response\Json
*/
public function allReferenceCheckAI(){
public function allReferenceCheckAI2(){
//获取参数
$aParam = empty($aParam) ? $this->request->post() : $aParam;
@@ -1537,7 +1723,6 @@ class References extends Base
* p_article_id可选
*
* 仅重跑 status=3校对失败的记录不改动 refer_text只重置结果字段后入 RabbitMQ 批次队列。
* 返回p_refer_id、p_article_id、reset、queued、check_ids、queue
*/
public function referenceCheckRecheckFailedAI()
{
@@ -1561,6 +1746,36 @@ class References extends Base
}
}
/**
* 某条参考文献下「校对失败」重跑,并联动同一引用标签分组(如 [1,2])全部重跑(异步)
*
* POST/GET: p_refer_id必填
* p_article_id可选
*
* 返回p_refer_id、p_article_id、reset、queued、check_ids、queue
*/
public function referenceCheckRecheckFailedWithGroupAI()
{
$aParam = $this->request->post();
if (empty($aParam)) {
$aParam = $this->request->param();
}
$iPReferId = empty($aParam['p_refer_id']) ? 0 : intval($aParam['p_refer_id']);
if ($iPReferId <= 0) {
return json_encode(array('status' => 2, 'msg' => 'Please select a reference'));
}
$iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']);
try {
$result = (new ReferenceCheckService())->enqueueRecheckFailedByPReferIdWithGroup($iPReferId, $iPArticleId);
return jsonSuccess($result);
} catch (\Exception $e) {
return jsonError($e->getMessage());
}
}
/**
* 按 p_refer_id 查单条参考文献的校对明细与进度
*
@@ -1590,6 +1805,47 @@ class References extends Base
}
}
/**
* 对校对明细中从未出现过的参考文献p_refer_id 差集)重新扫描全文并入队校对
*
* POST/GET: p_article_id必填
*
* 差集production_article_refer(state=0) 减去 article_reference_check_result 已出现的 p_refer_id。
* 适用:首次校对漏匹配、表格后上传、正文补标等场景。不重置已有明细。
* 前置:须已执行过第一次校对(库中已有校对记录)。
*
* 返回missing_p_refer_ids、matched_p_refer_ids、still_unmatched_p_refer_ids、
* queued、new_reference_nos、check_ids、queue
*/
public function referenceCheckRematchNewAI()
{
$aParam = $this->request->post();
if (empty($aParam)) {
$aParam = $this->request->param();
}
$iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']);
if ($iPArticleId <= 0) {
return json_encode(array('status' => 2, 'msg' => 'Please select an article'));
}
$aWhere = ['p_article_id' => $iPArticleId, 'state' => ['in', [0, 2]]];
$aProductionArticle = Db::name('production_article')->field('p_article_id,article_id')->where($aWhere)->find();
if (empty($aProductionArticle)) {
return json_encode(array('status' => 3, 'msg' => 'No articles found'));
}
if ($this->checkReferStatus($iPArticleId) == 0) {
return jsonError('Please correct the reference content before running the check.');
}
try {
$result = (new ReferenceCheckService())->enqueueNewlyMatchedByPArticle($aProductionArticle);
return jsonSuccess($result);
} catch (\Exception $e) {
return jsonError($e->getMessage());
}
}
public function checkReferStatus($p_article_id){
$list = $this->production_article_refer_obj->where('p_article_id', $p_article_id)->where('state', 0)->select();
if (!$list) {
@@ -1604,4 +1860,6 @@ class References extends Base
}
return $frag;
}
}