文章引用文献校验
This commit is contained in:
@@ -10,6 +10,7 @@ use PhpOffice\PhpWord\IOFactory;
|
||||
use app\common\OpenAi;
|
||||
use app\common\CrossrefService;
|
||||
use app\common\PubmedService;
|
||||
use app\common\ReferenceCheckService;
|
||||
|
||||
/**
|
||||
* @title 文章接口
|
||||
@@ -6391,4 +6392,417 @@ class Article extends Base
|
||||
Db::commit();
|
||||
return json_encode(['status' => 1,'msg' => 'success']);
|
||||
}
|
||||
/**
|
||||
* 调试:预览 article_main 中提取的 blue 引用(不入队)
|
||||
* POST: article_id
|
||||
*/
|
||||
public function citationReview()
|
||||
{
|
||||
$articleId = 7821;//intval($this->request->post('article_id', 0));
|
||||
if ($articleId <= 0) {
|
||||
return jsonError('article_id is required');
|
||||
}
|
||||
|
||||
$svc = new ReferenceCheckService();
|
||||
$mains = Db::name('article_main')
|
||||
->field('am_id,content')
|
||||
->where('article_id', $articleId)
|
||||
->where('am_id', 127448)
|
||||
//->whereIn('state', [0, 2])
|
||||
->order('sort asc')
|
||||
->select();
|
||||
|
||||
$preview = [];
|
||||
foreach ($mains as $item) {
|
||||
$preview[] = [
|
||||
'am_id' => $item['am_id'],
|
||||
'citations' => $svc->extractReferences((string)$item['content']),
|
||||
];
|
||||
break;
|
||||
}
|
||||
return jsonSuccess(['article_id' => $articleId, 'sections' => $preview]);
|
||||
}
|
||||
/**
|
||||
* 提取文献引用
|
||||
*
|
||||
* @param string $content 原始内容
|
||||
* @return array
|
||||
*/
|
||||
function extractReferences($content)
|
||||
{
|
||||
$result = [];
|
||||
|
||||
// 匹配 <blue>[57]</blue>、<blue>[74-79]</blue>、<blue>[72, 45]</blue>
|
||||
preg_match_all(
|
||||
'/<blue>\[([\d,\-\s]+)\]<\/blue>/',
|
||||
$content,
|
||||
$matches,
|
||||
PREG_OFFSET_CAPTURE
|
||||
);
|
||||
|
||||
if (empty($matches[0])) {
|
||||
return [];
|
||||
}
|
||||
|
||||
foreach ($matches[0] as $index => $match) {
|
||||
|
||||
// 完整标签
|
||||
$fullTag = $match[0];
|
||||
|
||||
// 标签开始位置
|
||||
$tagStart = $match[1];
|
||||
|
||||
// 标签结束位置
|
||||
$tagEnd = $tagStart + strlen($fullTag);
|
||||
|
||||
// 文献号原始字符串
|
||||
$rawRef = trim($matches[1][$index][0]);
|
||||
|
||||
// 展开文献号
|
||||
$referenceNumbers = $this->expandReferenceNumbers($rawRef);
|
||||
|
||||
/**
|
||||
* 获取原文内容
|
||||
* 这里按句号切分:
|
||||
* 找当前引用所在句子的开始和结束位置
|
||||
*/
|
||||
$sentenceStart = $this->findSentenceStart($content, $tagStart);
|
||||
$sentenceEnd = $this->findSentenceEnd($content, $tagEnd);
|
||||
|
||||
$originalText = mb_substr(
|
||||
$content,
|
||||
$sentenceStart,
|
||||
$sentenceEnd - $sentenceStart
|
||||
);
|
||||
|
||||
// 去掉 blue 标签
|
||||
$originalText = preg_replace(
|
||||
'/<blue>\[[\d,\-\s]+\]<\/blue>/',
|
||||
'',
|
||||
$originalText
|
||||
);
|
||||
|
||||
$originalText = trim($originalText);
|
||||
|
||||
$result[] = [
|
||||
'reference_raw' => $rawRef,
|
||||
'reference_numbers' => $referenceNumbers,
|
||||
'original_text' => $originalText,
|
||||
|
||||
// blue标签在整段中的位置
|
||||
'reference_start' => $tagStart,
|
||||
'reference_end' => $tagEnd,
|
||||
|
||||
// 原文位置
|
||||
'text_start' => $sentenceStart,
|
||||
'text_end' => $sentenceEnd,
|
||||
];
|
||||
}
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
/**
|
||||
* 展开文献号
|
||||
* 11-15 => [11,12,13,14,15]
|
||||
* 72,45 => [72,45]
|
||||
* 74-79,81 => [74,75,76,77,78,79,81]
|
||||
*/
|
||||
function expandReferenceNumbers($refStr)
|
||||
{
|
||||
$numbers = [];
|
||||
|
||||
$parts = explode(',', $refStr);
|
||||
|
||||
foreach ($parts as $part) {
|
||||
|
||||
$part = trim($part);
|
||||
|
||||
// 范围
|
||||
if (strpos($part, '-') !== false) {
|
||||
|
||||
list($start, $end) = explode('-', $part);
|
||||
|
||||
$start = intval(trim($start));
|
||||
$end = intval(trim($end));
|
||||
|
||||
if ($start <= $end) {
|
||||
$numbers = array_merge(
|
||||
$numbers,
|
||||
range($start, $end)
|
||||
);
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
// 单个数字
|
||||
if (is_numeric($part)) {
|
||||
$numbers[] = intval($part);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return array_values(array_unique($numbers));
|
||||
}
|
||||
|
||||
/**
|
||||
* 查找句子开始位置
|
||||
*/
|
||||
function findSentenceStart($content, $position)
|
||||
{
|
||||
$delimiters = ['.', '。', '!', '?', "\n"];
|
||||
|
||||
$start = 0;
|
||||
|
||||
foreach ($delimiters as $delimiter) {
|
||||
|
||||
$pos = strrpos(
|
||||
substr($content, 0, $position),
|
||||
$delimiter
|
||||
);
|
||||
|
||||
if ($pos !== false) {
|
||||
$start = max($start, $pos + 1);
|
||||
}
|
||||
}
|
||||
|
||||
return $start;
|
||||
}
|
||||
|
||||
/**
|
||||
* 查找句子结束位置
|
||||
*/
|
||||
function findSentenceEnd($content, $position)
|
||||
{
|
||||
$length = strlen($content);
|
||||
|
||||
$endPositions = [];
|
||||
|
||||
foreach (['.', '。', '!', '?', "\n"] as $delimiter) {
|
||||
|
||||
$pos = strpos($content, $delimiter, $position);
|
||||
|
||||
if ($pos !== false) {
|
||||
$endPositions[] = $pos + 1;
|
||||
}
|
||||
}
|
||||
|
||||
return empty($endPositions)
|
||||
? $length
|
||||
: min($endPositions);
|
||||
}
|
||||
|
||||
/**
|
||||
* 引用相关性:提交单条到队列(异步调用 promotion 同款本地大模型)
|
||||
* POST: content_a(必填), content_b(可选), article_id, reference_no(n=index+1), am_id
|
||||
*/
|
||||
public function referenceCheckEnqueue()
|
||||
{
|
||||
$data = $this->request->post();
|
||||
$contentA = trim((string)(isset($data['content_a']) ? $data['content_a'] : ''));
|
||||
$contentB = trim((string)(isset($data['content_b']) ? $data['content_b'] : ''));
|
||||
$articleId = intval(isset($data['article_id']) ? $data['article_id'] : 0);
|
||||
$referenceNo = intval(isset($data['reference_no']) ? $data['reference_no'] : 0);
|
||||
|
||||
if ($contentA === '') {
|
||||
return jsonError('content_a is required');
|
||||
}
|
||||
|
||||
try {
|
||||
$svc = new ReferenceCheckService();
|
||||
$extra = [
|
||||
'reference_no' => $referenceNo,
|
||||
'article_id' => $articleId,
|
||||
'am_id' => intval(isset($data['am_id']) ? $data['am_id'] : 0),
|
||||
];
|
||||
|
||||
if ($contentB === '' && $articleId > 0 && $referenceNo > 0) {
|
||||
$prod = Db::name('production_article')
|
||||
->where('article_id', $articleId)
|
||||
->where('state', 0)
|
||||
->find();
|
||||
if ($prod) {
|
||||
$referMap = $svc->loadReferMapByPArticleId(intval($prod['p_article_id']));
|
||||
$referIndex = $referenceNo - 1;
|
||||
if (isset($referMap[$referIndex])) {
|
||||
$refer = $referMap[$referIndex];
|
||||
$contentB = $svc->formatReferForLlm($refer);
|
||||
$extra['p_article_id'] = intval($prod['p_article_id']);
|
||||
$extra['p_refer_id'] = intval($refer['p_refer_id']);
|
||||
$extra['refer_index'] = $referIndex;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$result = $svc->enqueue($contentA, $contentB, $extra);
|
||||
return jsonSuccess($result);
|
||||
} catch (\Exception $e) {
|
||||
return jsonError($e->getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
public function referenceCheckEnqueueArticleMain(){
|
||||
$data = $this->request->post();
|
||||
$articleId = intval(isset($data['article_id']) ? $data['article_id'] : 0);
|
||||
if ($articleId <= 0) {
|
||||
return jsonError('article_id is required');
|
||||
}
|
||||
$mainsList = Db::name('article_main')
|
||||
->field('am_id,content,article_id')
|
||||
->where('article_id', $articleId)
|
||||
->whereIn('state', [0, 2])
|
||||
->order('sort asc')
|
||||
->select();
|
||||
|
||||
$svc = new ReferenceCheckService();
|
||||
foreach ($mainsList as $mainInfo ){
|
||||
$svc->enqueueByArticleMain($mainInfo);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* 按文章批量入队:从 article_main 提取 blue 引用与文献号
|
||||
* POST: article_id, clear_previous=1(默认清空该文旧明细后重检)
|
||||
*/
|
||||
public function referenceCheckEnqueueArticle()
|
||||
{
|
||||
$data = $this->request->post();
|
||||
$articleId = intval(isset($data['article_id']) ? $data['article_id'] : 0);
|
||||
if ($articleId <= 0) {
|
||||
return jsonError('article_id is required');
|
||||
}
|
||||
|
||||
try {
|
||||
$svc = new ReferenceCheckService();
|
||||
$clear = !isset($data['clear_previous']) || intval($data['clear_previous']) === 1;
|
||||
$result = $svc->enqueueByArticle($articleId, $clear);
|
||||
return jsonSuccess($result);
|
||||
} catch (\Exception $e) {
|
||||
return jsonError($e->getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 查询单条引用相关性检测结果
|
||||
* GET/POST: check_id
|
||||
*/
|
||||
public function referenceCheckResult()
|
||||
{
|
||||
$checkId = intval($this->request->param('check_id', 0));
|
||||
if ($checkId <= 0) {
|
||||
return jsonError('check_id is required');
|
||||
}
|
||||
|
||||
$row = (new ReferenceCheckService())->getResult($checkId);
|
||||
if (!$row) {
|
||||
return jsonError('result not found');
|
||||
}
|
||||
|
||||
return jsonSuccess($this->formatReferenceCheckRow($row));
|
||||
}
|
||||
|
||||
/**
|
||||
* 稿件预览:带不合理引用标记的 content(序号 + 引用句)
|
||||
* GET/POST: article_id, am_id(可选,只预览某一节)
|
||||
*/
|
||||
public function referenceCheckPreview()
|
||||
{
|
||||
$articleId = intval($this->request->param('article_id', 0));
|
||||
if ($articleId <= 0) {
|
||||
return jsonError('article_id is required');
|
||||
}
|
||||
$amId = intval($this->request->param('am_id', 0));
|
||||
|
||||
try {
|
||||
$data = (new ReferenceCheckService())->buildArticlePreview($articleId, $amId);
|
||||
$data['markup_hint'] = [
|
||||
'ref_no' => '.ref-no-error — 不合理的文献序号(如 70-73 中单独的 70)',
|
||||
'ref_cite' => '.ref-cite-tag.ref-cite-error — 含不合理序号的 blue 引用块',
|
||||
'ref_context'=> '.ref-context-error — 不合理的引用句/上下文',
|
||||
];
|
||||
$data['preview_css'] = '.ref-no-error{color:#c00;font-weight:bold;border-bottom:2px wavy #c00}'
|
||||
. '.ref-cite-tag.ref-cite-error{background:#ffecec}'
|
||||
. '.ref-context-error{background:#fff3cd;outline:1px dashed #e6a700}';
|
||||
return jsonSuccess($data);
|
||||
} catch (\Exception $e) {
|
||||
return jsonError($e->getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 按文章列出引用校对结果([70-73] 为 4 条,reference_no 分别为 70,71,72,73)
|
||||
* GET/POST: article_id, status(可选), only_mismatch=1 仅不合理
|
||||
*/
|
||||
public function referenceCheckList()
|
||||
{
|
||||
$articleId = intval($this->request->param('article_id', 0));
|
||||
if ($articleId <= 0) {
|
||||
return jsonError('article_id is required');
|
||||
}
|
||||
|
||||
$status = $this->request->param('status', '');
|
||||
$statusFilter = ($status === '' || $status === null) ? -1 : intval($status);
|
||||
$onlyMismatch = intval($this->request->param('only_mismatch', 0)) === 1;
|
||||
$rows = (new ReferenceCheckService())->listByArticle($articleId, $statusFilter, $onlyMismatch);
|
||||
|
||||
$list = [];
|
||||
foreach ($rows as $row) {
|
||||
$list[] = $this->formatReferenceCheckRow($row);
|
||||
}
|
||||
|
||||
$mains = Db::name('article_main')
|
||||
->field('am_id,ref_check_status,sort')
|
||||
->where('article_id', $articleId)
|
||||
->whereIn('state', [0, 2])
|
||||
->order('sort asc')
|
||||
->select();
|
||||
$sections = [];
|
||||
foreach ($mains as $m) {
|
||||
$st = intval(isset($m['ref_check_status']) ? $m['ref_check_status'] : 0);
|
||||
$sections[] = [
|
||||
'am_id' => intval($m['am_id']),
|
||||
'ref_check_status' => $st,
|
||||
'ref_check_pass' => $st === ReferenceCheckService::AM_STATUS_PASS,
|
||||
'ref_check_label' => ReferenceCheckService::amStatusLabel($st),
|
||||
];
|
||||
}
|
||||
|
||||
return jsonSuccess([
|
||||
'article_id' => $articleId,
|
||||
'total' => count($list),
|
||||
'list' => $list,
|
||||
'sections' => $sections,
|
||||
]);
|
||||
}
|
||||
|
||||
private function formatReferenceCheckRow($row)
|
||||
{
|
||||
$statusMap = array(0 => 'pending', 1 => 'done', 2 => 'failed');
|
||||
$amId = intval(isset($row['am_id']) ? $row['am_id'] : 0);
|
||||
$citeStart = intval(isset($row['cite_tag_start']) ? $row['cite_tag_start'] : 0);
|
||||
$rowStatus = intval($row['status']);
|
||||
return array(
|
||||
'check_id' => intval($row['check_id']),
|
||||
'article_id' => intval(isset($row['article_id']) ? $row['article_id'] : 0),
|
||||
'am_id' => $amId,
|
||||
'cite_group_key' => $amId . '_' . $citeStart,
|
||||
'p_refer_id' => intval(isset($row['p_refer_id']) ? $row['p_refer_id'] : 0),
|
||||
'refer_index' => intval(isset($row['refer_index']) ? $row['refer_index'] : 0),
|
||||
'reference_no' => intval(isset($row['reference_no']) ? $row['reference_no'] : 0),
|
||||
'reference_raw' => isset($row['reference_raw']) ? $row['reference_raw'] : '',
|
||||
'cite_tag_start' => $citeStart,
|
||||
'cite_tag_end' => intval(isset($row['cite_tag_end']) ? $row['cite_tag_end'] : 0),
|
||||
'text_start' => intval(isset($row['text_start']) ? $row['text_start'] : 0),
|
||||
'text_end' => intval(isset($row['text_end']) ? $row['text_end'] : 0),
|
||||
'status' => isset($statusMap[$rowStatus]) ? $statusMap[$rowStatus] : 'unknown',
|
||||
'is_match' => intval($row['is_match']),
|
||||
'is_reasonable' => intval($row['is_match']) === 1,
|
||||
'confidence' => floatval($row['confidence']),
|
||||
'reason' => isset($row['reason']) ? $row['reason'] : '',
|
||||
'error_msg' => isset($row['error_msg']) ? $row['error_msg'] : '',
|
||||
'content_a' => isset($row['content_a']) ? $row['content_a'] : '',
|
||||
'content_b' => isset($row['content_b']) ? $row['content_b'] : '',
|
||||
'updated_at' => isset($row['updated_at']) ? $row['updated_at'] : '',
|
||||
);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
135
application/api/job/ReferenceCheck.php
Normal file
135
application/api/job/ReferenceCheck.php
Normal file
@@ -0,0 +1,135 @@
|
||||
<?php
|
||||
namespace app\api\job;
|
||||
|
||||
use think\Db;
|
||||
use think\queue\Job;
|
||||
use app\common\QueueJob;
|
||||
use app\common\QueueRedis;
|
||||
use app\common\ReferenceCheckService;
|
||||
use app\common\service\LLMService;
|
||||
|
||||
class ReferenceCheck
|
||||
{
|
||||
private $oQueueJob;
|
||||
private $QueueRedis;
|
||||
private $completedExprie = 3600;
|
||||
|
||||
public function __construct()
|
||||
{
|
||||
$this->oQueueJob = new QueueJob();
|
||||
$this->QueueRedis = QueueRedis::getInstance();
|
||||
}
|
||||
|
||||
public function fire(Job $job, $data)
|
||||
{
|
||||
$this->oQueueJob->init($job);
|
||||
|
||||
$rawBody = empty($job->getRawBody()) ? '' : $job->getRawBody();
|
||||
$jobData = empty($rawBody) ? [] : json_decode($rawBody, true);
|
||||
$jobId = empty($jobData['id']) ? 'unknown' : $jobData['id'];
|
||||
|
||||
$sRedisKey = '';
|
||||
$sRedisValue = '';
|
||||
|
||||
$this->oQueueJob->log("-----------队列任务开始-----------");
|
||||
$this->oQueueJob->log("当前任务ID: {$jobId}, 尝试次数: {$job->attempts()}");
|
||||
|
||||
try {
|
||||
$checkId = intval(isset($data['check_id']) ? $data['check_id'] : 0);
|
||||
$sClassName = get_class($this);
|
||||
$sRedisKey = "queue_job:{$sClassName}:{$checkId}";
|
||||
$sRedisValue = uniqid() . '_' . getmypid();
|
||||
|
||||
if (!$this->oQueueJob->acquireLock($sRedisKey, $sRedisValue, $job)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if ($checkId <= 0) {
|
||||
$job->delete();
|
||||
return;
|
||||
}
|
||||
|
||||
$row = Db::name('article_reference_check_result')->where('id', $checkId)->find();
|
||||
if (empty($row)) {
|
||||
$job->delete();
|
||||
return;
|
||||
}
|
||||
|
||||
if (intval($row['status']) === 1) {
|
||||
$job->delete();
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
$contentA = trim((string)(isset($row['origin_text']) ? $row['origin_text'] : ''));
|
||||
$contentB = trim((string)(isset($row['refer_text']) ? $row['refer_text'] : ''));
|
||||
|
||||
if ($contentB === '' && intval($row['p_refer_id']) > 0) {
|
||||
$refer = Db::name('production_article_refer')
|
||||
->where('p_refer_id', intval($row['p_refer_id']))
|
||||
->where('status', 0)
|
||||
->find();
|
||||
if ($refer) {
|
||||
$contentB = (new ReferenceCheckService())->formatReferForLlm($refer);
|
||||
}
|
||||
}
|
||||
|
||||
if ($contentA === '' || $contentB === '') {
|
||||
$this->markFailed($checkId, 'Missing content_a or reference text');
|
||||
$job->delete();
|
||||
return;
|
||||
}
|
||||
|
||||
$llm = new LLMService();
|
||||
$llmResult = $llm->checkReference($contentA, $contentB);
|
||||
|
||||
Db::name('article_reference_check_result')->where('id', $checkId)->update([
|
||||
'is_match' => !empty($llmResult['is_match']) ? 1 : 0,
|
||||
'confidence' => $llmResult['confidence'],
|
||||
'reason' => $llmResult['reason'],
|
||||
'status' => 1,
|
||||
'error_msg' => '',
|
||||
'updated_at' => date('Y-m-d H:i:s'),
|
||||
]);
|
||||
|
||||
$amId = intval(isset($row['am_id']) ? $row['am_id'] : 0);
|
||||
if ($amId > 0) {
|
||||
(new ReferenceCheckService())->syncAmRefCheckStatus($amId);
|
||||
}
|
||||
$this->QueueRedis->finishJob($sRedisKey, 'completed', $this->completedExprie, $sRedisValue);
|
||||
$job->delete();
|
||||
$this->oQueueJob->log("任务执行成功 | 日志ID: {$sRedisKey}");
|
||||
} catch (\Exception $e) {
|
||||
var_dump($e->getMessage());
|
||||
if ($job->attempts() >= 3) {
|
||||
$this->markFailed($checkId, $e->getMessage());
|
||||
$job->delete();
|
||||
return;
|
||||
}
|
||||
$job->release(30);
|
||||
}
|
||||
} catch (\RuntimeException $e) {
|
||||
$this->oQueueJob->handleRetryableException($e, $sRedisKey, $sRedisValue, $job);
|
||||
} catch (\LogicException $e) {
|
||||
$this->oQueueJob->handleNonRetryableException($e, $sRedisKey, $sRedisValue, $job);
|
||||
} catch (\Exception $e) {
|
||||
$this->oQueueJob->handleRetryableException($e, $sRedisKey, $sRedisValue, $job);
|
||||
} finally {
|
||||
$this->oQueueJob->finnal();
|
||||
}
|
||||
}
|
||||
|
||||
private function markFailed($checkId, $msg)
|
||||
{
|
||||
$row = Db::name('article_reference_check_result')->where('id', $checkId)->find();
|
||||
Db::name('article_reference_check_result')->where('id', $checkId)->update([
|
||||
'status' => 2,
|
||||
'error_msg' => mb_substr($msg, 0, 500),
|
||||
'updated_at' => date('Y-m-d H:i:s'),
|
||||
]);
|
||||
$amId = empty($row) ? 0 : intval(isset($row['am_id']) ? $row['am_id'] : 0);
|
||||
if ($amId > 0) {
|
||||
(new ReferenceCheckService())->syncAmRefCheckStatus($amId);
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user