参考文献校对升级
This commit is contained in:
@@ -1178,6 +1178,107 @@ class Base extends Controller
|
|||||||
return $ids;
|
return $ids;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 解析方括号引用内层(如 1,2 / 3-5),展开为文献序号列表。
|
||||||
|
*
|
||||||
|
* @return int[]
|
||||||
|
*/
|
||||||
|
protected function expandCitationBracketNumbers(string $referencePart): array
|
||||||
|
{
|
||||||
|
$referencePart = trim($referencePart);
|
||||||
|
if ($referencePart === '') {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
$referencePart = str_replace(
|
||||||
|
[',', '–', '—', '−', '‐', '‑'],
|
||||||
|
[',', '-', '-', '-', '-', '-'],
|
||||||
|
$referencePart
|
||||||
|
);
|
||||||
|
$out = [];
|
||||||
|
$segments = preg_split('/\s*,\s*/', $referencePart);
|
||||||
|
foreach ($segments as $seg) {
|
||||||
|
$seg = trim((string)$seg);
|
||||||
|
if ($seg === '') {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
$seg = str_replace(['–', '—', '−', '‐', '‑'], '-', $seg);
|
||||||
|
if (preg_match('/^(\d+)\s*-\s*(\d+)$/', $seg, $m)) {
|
||||||
|
$a = intval($m[1]);
|
||||||
|
$b = intval($m[2]);
|
||||||
|
if ($a > $b) {
|
||||||
|
$t = $a;
|
||||||
|
$a = $b;
|
||||||
|
$b = $t;
|
||||||
|
}
|
||||||
|
for ($i = $a; $i <= $b; $i++) {
|
||||||
|
$out[] = $i;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
$n = intval($seg);
|
||||||
|
if ($n > 0) {
|
||||||
|
$out[] = $n;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return $out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 从正文片段提取被引用的文献序号(reference_no = index+1)。
|
||||||
|
* 兼容 <mycite data-id="p_refer_id"> 与 <blue>[n]</blue> / [n] 两种形态。
|
||||||
|
*
|
||||||
|
* @return int[]
|
||||||
|
*/
|
||||||
|
protected function extractCitationRefNosFromMainContent(string $text, int $pArticleId = 0): array
|
||||||
|
{
|
||||||
|
if ($text === '') {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
$nos = [];
|
||||||
|
|
||||||
|
$pReferIds = $this->extractMyciteIds($text);
|
||||||
|
if (!empty($pReferIds) && $pArticleId > 0) {
|
||||||
|
$refers = Db::name('production_article_refer')
|
||||||
|
->where('p_article_id', $pArticleId)
|
||||||
|
->whereIn('p_refer_id', $pReferIds)
|
||||||
|
->where('state', 0)
|
||||||
|
->field('p_refer_id,index')
|
||||||
|
->select();
|
||||||
|
$idToNo = [];
|
||||||
|
foreach ($refers as $row) {
|
||||||
|
$idToNo[intval($row['p_refer_id'])] = intval($row['index']) + 1;
|
||||||
|
}
|
||||||
|
foreach ($pReferIds as $pid) {
|
||||||
|
if (isset($idToNo[$pid])) {
|
||||||
|
$nos[] = $idToNo[$pid];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (preg_match_all('/(?:<\s*blue[^>]*>)?\[([^\]]+)\](?:<\/\s*blue\s*>)?/iu', $text, $m)) {
|
||||||
|
foreach ($m[1] as $inner) {
|
||||||
|
$innerNorm = str_replace(
|
||||||
|
[',', '–', '—', '−', '‐', '‑'],
|
||||||
|
[',', '-', '-', '-', '-', '-'],
|
||||||
|
trim((string)$inner)
|
||||||
|
);
|
||||||
|
if (!preg_match('/^[\d\s,\-]+$/u', $innerNorm)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
foreach ($this->expandCitationBracketNumbers($innerNorm) as $n) {
|
||||||
|
if ($n > 0) {
|
||||||
|
$nos[] = $n;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$nos = array_values(array_unique($nos));
|
||||||
|
sort($nos, SORT_NUMERIC);
|
||||||
|
return $nos;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* table_data:二维数组 JSON [[{text,colspan,rowspan},...],...];支持双重 JSON 字符串编码。
|
* table_data:二维数组 JSON [[{text,colspan,rowspan},...],...];支持双重 JSON 字符串编码。
|
||||||
*
|
*
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ use think\Env;
|
|||||||
use think\Queue;
|
use think\Queue;
|
||||||
use think\Validate;
|
use think\Validate;
|
||||||
use app\common\CrossrefService;
|
use app\common\CrossrefService;
|
||||||
use app\common\ReferenceCheckService;
|
use app\common\ReferenceRelevanceCheckService;
|
||||||
|
|
||||||
class Preaccept extends Base
|
class Preaccept extends Base
|
||||||
{
|
{
|
||||||
@@ -27,7 +27,7 @@ class Preaccept extends Base
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
(new ReferenceCheckService())->clearArticleChecksByPArticleId($pArticleId);
|
(new ReferenceRelevanceCheckService())->clearArticleChecksByPArticleId($pArticleId);
|
||||||
} catch (\Exception $e) {
|
} catch (\Exception $e) {
|
||||||
\think\Log::error(
|
\think\Log::error(
|
||||||
'resetArticleChecksOnReferChange[' . $sourceTag . '] p_article_id='
|
'resetArticleChecksOnReferChange[' . $sourceTag . '] p_article_id='
|
||||||
@@ -1220,6 +1220,14 @@ class Preaccept extends Base
|
|||||||
$insert['ctime'] = time();
|
$insert['ctime'] = time();
|
||||||
$this->article_main_log_obj->insert($insert);
|
$this->article_main_log_obj->insert($insert);
|
||||||
|
|
||||||
|
// $articleId = intval($am_info['article_id']);
|
||||||
|
// $amId = intval($data['am_id']);
|
||||||
|
//
|
||||||
|
// // 本段引用集合变化(如 10,11 → 11,12)时仅清空该 am_id 下的校对明细
|
||||||
|
// if ($this->hasMainCitationChange($old_content, $new_raw_content, $articleId)) {
|
||||||
|
// $this->clearMainChecksOnCitationChange($articleId, $amId);
|
||||||
|
// }
|
||||||
|
|
||||||
// 判断是否存在“引用删除”(新 content 相对旧 content 缺少 <mycite>)
|
// 判断是否存在“引用删除”(新 content 相对旧 content 缺少 <mycite>)
|
||||||
$hasCitationDeletion = $this->hasMyciteDeletion($old_content, $new_raw_content);
|
$hasCitationDeletion = $this->hasMyciteDeletion($old_content, $new_raw_content);
|
||||||
|
|
||||||
@@ -1245,6 +1253,39 @@ class Preaccept extends Base
|
|||||||
//返回更新数据 20260119 end
|
//返回更新数据 20260119 end
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 正文单节保存后,仅清空该 am_id 下已有的引用校对明细(按 article_id 定位)。
|
||||||
|
*/
|
||||||
|
private function clearMainChecksOnCitationChange(int $articleId, int $amId)
|
||||||
|
{
|
||||||
|
if ($articleId <= 0 || $amId <= 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
(new ReferenceCheckService())->clearChecksByAmId($articleId, $amId);
|
||||||
|
} catch (\Exception $e) {
|
||||||
|
\think\Log::error(
|
||||||
|
'clearMainChecksOnCitationChange article_id=' . $articleId
|
||||||
|
. ' am_id=' . $amId . ' ' . $e->getMessage()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 本段正文引用集合是否变化(增删改任一即 true)。
|
||||||
|
* old 多为库内 <blue>[n]</blue>,new 多为编辑器提交的 <mycite data-id="p_refer_id">。
|
||||||
|
*/
|
||||||
|
private function hasMainCitationChange(string $oldContent, string $newContent, int $articleId): bool
|
||||||
|
{
|
||||||
|
$pArticleId = intval(Db::name('production_article')
|
||||||
|
->where('article_id', $articleId)
|
||||||
|
->whereIn('state', [0, 2])
|
||||||
|
->value('p_article_id'));
|
||||||
|
$oldNos = $this->extractCitationRefNosFromMainContent($oldContent, $pArticleId);
|
||||||
|
$newNos = $this->extractCitationRefNosFromMainContent($newContent, $pArticleId);
|
||||||
|
return $oldNos !== $newNos;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 是否发生 <mycite> 删除(new 相对 old 少了任意引用 id)
|
* 是否发生 <mycite> 删除(new 相对 old 少了任意引用 id)
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -12,6 +12,8 @@ use think\Db;
|
|||||||
use think\Env;
|
use think\Env;
|
||||||
use think\Queue;
|
use think\Queue;
|
||||||
use app\common\ReferenceCheckService;
|
use app\common\ReferenceCheckService;
|
||||||
|
use app\common\ReferenceRelevanceCheckService;
|
||||||
|
use app\common\DbReconnectHelper;
|
||||||
/**
|
/**
|
||||||
* @title 参考文献
|
* @title 参考文献
|
||||||
* @description 相关方法汇总
|
* @description 相关方法汇总
|
||||||
@@ -1309,11 +1311,195 @@ class References extends Base
|
|||||||
}
|
}
|
||||||
return json_encode(['status' => 8,'msg' => 'fail']);
|
return json_encode(['status' => 8,'msg' => 'fail']);
|
||||||
}
|
}
|
||||||
|
// ============================================================
|
||||||
|
// 参考文献「主题相关性」校对(独立模块,RabbitMQ 链式消费)
|
||||||
|
// 表:t_article_reference_relevance_check_result / t_article_reference_relevance_check_batch
|
||||||
|
// 消费:php think reference_relevance:mq-consume
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 参考文献第一次校对
|
* 启动整篇参考文献相关性校对
|
||||||
|
* POST: p_article_id(必填)
|
||||||
|
*
|
||||||
|
* 文献摘要/内容优先读 t_production_article_refer.abstract_text、refer_content_cleaned;
|
||||||
|
* 二者都为空时在校对执行阶段抓取并回写 refer 表,校对时始终从 refer 表读取。
|
||||||
|
*/
|
||||||
|
public function allReferenceCheckAI()
|
||||||
|
{
|
||||||
|
$aParam = $this->request->post();
|
||||||
|
$iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']);
|
||||||
|
if ($iPArticleId <= 0) {
|
||||||
|
return jsonError('Please select an article');
|
||||||
|
}
|
||||||
|
|
||||||
|
$aProductionArticle = Db::name('production_article')
|
||||||
|
->field('p_article_id,article_id')
|
||||||
|
->where(['p_article_id' => $iPArticleId, 'state' => ['in', [0, 2]]])
|
||||||
|
->find();
|
||||||
|
if (empty($aProductionArticle)) {
|
||||||
|
return jsonError('No articles found');
|
||||||
|
}
|
||||||
|
if ($this->checkReferStatus($iPArticleId) == 0) {
|
||||||
|
return jsonError('Please correct the reference content before running the check.');
|
||||||
|
}
|
||||||
|
|
||||||
|
$existing = Db::name('article_reference_relevance_check_result')
|
||||||
|
->where('p_article_id', $iPArticleId)
|
||||||
|
->count();
|
||||||
|
if (intval($existing) > 0) {
|
||||||
|
return jsonError('This article already has relevance check records. Use referenceRelevanceCheckResetAI to rerun.');
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
DbReconnectHelper::ensure();
|
||||||
|
$result = (new ReferenceRelevanceCheckService())->enqueueByPArticle($aProductionArticle);
|
||||||
|
if (empty($result['check_ids'])) {
|
||||||
|
return jsonError('No reference citations were found in the article.');
|
||||||
|
}
|
||||||
|
return jsonSuccess($result);
|
||||||
|
} catch (\Exception $e) {
|
||||||
|
return jsonError($e->getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 相关性校对进度
|
||||||
|
* POST: p_article_id
|
||||||
|
*/
|
||||||
|
public function referenceRelevanceCheckProgressAI()
|
||||||
|
{
|
||||||
|
$aParam = $this->request->post();
|
||||||
|
$iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']);
|
||||||
|
if ($iPArticleId <= 0) {
|
||||||
|
return jsonError('p_article_id is required');
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
$result = (new ReferenceRelevanceCheckService())->getProgressByPArticleId($iPArticleId);
|
||||||
|
return jsonSuccess($result);
|
||||||
|
} catch (\Exception $e) {
|
||||||
|
return jsonError($e->getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 按 p_article_id 查整篇文章相关性校对总状态(用于前端按钮分流)
|
||||||
|
*
|
||||||
|
* POST/GET: p_article_id(必填)
|
||||||
|
*
|
||||||
|
* 返回 status:0=未校对 1=校对中 2=校对完成
|
||||||
|
* 计数维度为参考文献(按 reference_no 分组),与 referenceRelevanceCheckProgressAI 一致。
|
||||||
|
*/
|
||||||
|
public function referenceRelevanceCheckArticleStatusAI()
|
||||||
|
{
|
||||||
|
$aParam = $this->request->post();
|
||||||
|
if (empty($aParam)) {
|
||||||
|
$aParam = $this->request->param();
|
||||||
|
}
|
||||||
|
|
||||||
|
$iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']);
|
||||||
|
if ($iPArticleId <= 0) {
|
||||||
|
return jsonError('p_article_id is required');
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
$result = (new ReferenceRelevanceCheckService())->getArticleProgressStatusByPArticleId($iPArticleId);
|
||||||
|
return jsonSuccess($result);
|
||||||
|
} catch (\Exception $e) {
|
||||||
|
return jsonError($e->getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 按 p_refer_id 查相关性校对明细
|
||||||
|
* POST: p_refer_id
|
||||||
|
*/
|
||||||
|
public function referenceRelevanceCheckDetailsAI()
|
||||||
|
{
|
||||||
|
$aParam = $this->request->post();
|
||||||
|
$iPReferId = empty($aParam['p_refer_id']) ? 0 : intval($aParam['p_refer_id']);
|
||||||
|
if ($iPReferId <= 0) {
|
||||||
|
return jsonError('p_refer_id is required');
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
$result = (new ReferenceRelevanceCheckService())->getDetailsByPReferId($iPReferId);
|
||||||
|
return jsonSuccess($result);
|
||||||
|
} catch (\Exception $e) {
|
||||||
|
return jsonError($e->getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 清空并重新执行相关性校对
|
||||||
|
* POST: p_article_id
|
||||||
|
*/
|
||||||
|
public function referenceRelevanceCheckResetAI()
|
||||||
|
{
|
||||||
|
$aParam = $this->request->post();
|
||||||
|
$iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']);
|
||||||
|
if ($iPArticleId <= 0) {
|
||||||
|
return jsonError('Please select an article');
|
||||||
|
}
|
||||||
|
$aProductionArticle = Db::name('production_article')
|
||||||
|
->field('p_article_id,article_id')
|
||||||
|
->where(['p_article_id' => $iPArticleId, 'state' => ['in', [0, 2]]])
|
||||||
|
->find();
|
||||||
|
if (empty($aProductionArticle)) {
|
||||||
|
return jsonError('No articles found');
|
||||||
|
}
|
||||||
|
if ($this->checkReferStatus($iPArticleId) == 0) {
|
||||||
|
return jsonError('Please correct the reference content before running the check.');
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
$result = (new ReferenceRelevanceCheckService())->resetAndRecheckByArticle($aProductionArticle);
|
||||||
|
return jsonSuccess($result);
|
||||||
|
} catch (\Exception $e) {
|
||||||
|
return jsonError($e->getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 仅清空相关性校对记录(不重跑)
|
||||||
|
* POST: p_article_id
|
||||||
|
*/
|
||||||
|
public function referenceRelevanceCheckClearAI()
|
||||||
|
{
|
||||||
|
$aParam = $this->request->post();
|
||||||
|
$iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']);
|
||||||
|
if ($iPArticleId <= 0) {
|
||||||
|
return jsonError('p_article_id is required');
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
$deleted = (new ReferenceRelevanceCheckService())->clearByPArticleId($iPArticleId);
|
||||||
|
return jsonSuccess(['p_article_id' => $iPArticleId, 'deleted' => intval($deleted)]);
|
||||||
|
} catch (\Exception $e) {
|
||||||
|
return jsonError($e->getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 仅重跑相关性 status=0 的记录(不清空,不抓摘要,不清洗文献内容)
|
||||||
|
* POST: p_article_id
|
||||||
|
*/
|
||||||
|
public function referenceRelevanceCheckRecheckPendingAI()
|
||||||
|
{
|
||||||
|
$aParam = $this->request->post();
|
||||||
|
$iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']);
|
||||||
|
if ($iPArticleId <= 0) {
|
||||||
|
return jsonError('p_article_id is required');
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
$result = (new ReferenceRelevanceCheckService())->recheckPendingOnlyByArticle($iPArticleId);
|
||||||
|
return jsonSuccess($result);
|
||||||
|
} catch (\Exception $e) {
|
||||||
|
return jsonError($e->getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 参考文献第一次校对(支撑力度)
|
||||||
* @return \think\response\Json
|
* @return \think\response\Json
|
||||||
*/
|
*/
|
||||||
public function allReferenceCheckAI(){
|
public function allReferenceCheckAI2(){
|
||||||
//获取参数
|
//获取参数
|
||||||
$aParam = empty($aParam) ? $this->request->post() : $aParam;
|
$aParam = empty($aParam) ? $this->request->post() : $aParam;
|
||||||
|
|
||||||
@@ -1537,7 +1723,6 @@ class References extends Base
|
|||||||
* p_article_id(可选)
|
* p_article_id(可选)
|
||||||
*
|
*
|
||||||
* 仅重跑 status=3(校对失败)的记录;不改动 refer_text,只重置结果字段后入 RabbitMQ 批次队列。
|
* 仅重跑 status=3(校对失败)的记录;不改动 refer_text,只重置结果字段后入 RabbitMQ 批次队列。
|
||||||
* 返回:p_refer_id、p_article_id、reset、queued、check_ids、queue
|
|
||||||
*/
|
*/
|
||||||
public function referenceCheckRecheckFailedAI()
|
public function referenceCheckRecheckFailedAI()
|
||||||
{
|
{
|
||||||
@@ -1561,6 +1746,36 @@ class References extends Base
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 某条参考文献下「校对失败」重跑,并联动同一引用标签分组(如 [1,2])全部重跑(异步)
|
||||||
|
*
|
||||||
|
* POST/GET: p_refer_id(必填)
|
||||||
|
* p_article_id(可选)
|
||||||
|
*
|
||||||
|
* 返回:p_refer_id、p_article_id、reset、queued、check_ids、queue
|
||||||
|
*/
|
||||||
|
public function referenceCheckRecheckFailedWithGroupAI()
|
||||||
|
{
|
||||||
|
$aParam = $this->request->post();
|
||||||
|
if (empty($aParam)) {
|
||||||
|
$aParam = $this->request->param();
|
||||||
|
}
|
||||||
|
|
||||||
|
$iPReferId = empty($aParam['p_refer_id']) ? 0 : intval($aParam['p_refer_id']);
|
||||||
|
if ($iPReferId <= 0) {
|
||||||
|
return json_encode(array('status' => 2, 'msg' => 'Please select a reference'));
|
||||||
|
}
|
||||||
|
|
||||||
|
$iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']);
|
||||||
|
|
||||||
|
try {
|
||||||
|
$result = (new ReferenceCheckService())->enqueueRecheckFailedByPReferIdWithGroup($iPReferId, $iPArticleId);
|
||||||
|
return jsonSuccess($result);
|
||||||
|
} catch (\Exception $e) {
|
||||||
|
return jsonError($e->getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 按 p_refer_id 查单条参考文献的校对明细与进度
|
* 按 p_refer_id 查单条参考文献的校对明细与进度
|
||||||
*
|
*
|
||||||
@@ -1590,6 +1805,47 @@ class References extends Base
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 对校对明细中从未出现过的参考文献(p_refer_id 差集)重新扫描全文并入队校对
|
||||||
|
*
|
||||||
|
* POST/GET: p_article_id(必填)
|
||||||
|
*
|
||||||
|
* 差集:production_article_refer(state=0) 减去 article_reference_check_result 已出现的 p_refer_id。
|
||||||
|
* 适用:首次校对漏匹配、表格后上传、正文补标等场景。不重置已有明细。
|
||||||
|
* 前置:须已执行过第一次校对(库中已有校对记录)。
|
||||||
|
*
|
||||||
|
* 返回:missing_p_refer_ids、matched_p_refer_ids、still_unmatched_p_refer_ids、
|
||||||
|
* queued、new_reference_nos、check_ids、queue
|
||||||
|
*/
|
||||||
|
public function referenceCheckRematchNewAI()
|
||||||
|
{
|
||||||
|
$aParam = $this->request->post();
|
||||||
|
if (empty($aParam)) {
|
||||||
|
$aParam = $this->request->param();
|
||||||
|
}
|
||||||
|
|
||||||
|
$iPArticleId = empty($aParam['p_article_id']) ? 0 : intval($aParam['p_article_id']);
|
||||||
|
if ($iPArticleId <= 0) {
|
||||||
|
return json_encode(array('status' => 2, 'msg' => 'Please select an article'));
|
||||||
|
}
|
||||||
|
|
||||||
|
$aWhere = ['p_article_id' => $iPArticleId, 'state' => ['in', [0, 2]]];
|
||||||
|
$aProductionArticle = Db::name('production_article')->field('p_article_id,article_id')->where($aWhere)->find();
|
||||||
|
if (empty($aProductionArticle)) {
|
||||||
|
return json_encode(array('status' => 3, 'msg' => 'No articles found'));
|
||||||
|
}
|
||||||
|
if ($this->checkReferStatus($iPArticleId) == 0) {
|
||||||
|
return jsonError('Please correct the reference content before running the check.');
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
$result = (new ReferenceCheckService())->enqueueNewlyMatchedByPArticle($aProductionArticle);
|
||||||
|
return jsonSuccess($result);
|
||||||
|
} catch (\Exception $e) {
|
||||||
|
return jsonError($e->getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public function checkReferStatus($p_article_id){
|
public function checkReferStatus($p_article_id){
|
||||||
$list = $this->production_article_refer_obj->where('p_article_id', $p_article_id)->where('state', 0)->select();
|
$list = $this->production_article_refer_obj->where('p_article_id', $p_article_id)->where('state', 0)->select();
|
||||||
if (!$list) {
|
if (!$list) {
|
||||||
@@ -1604,4 +1860,6 @@ class References extends Base
|
|||||||
}
|
}
|
||||||
return $frag;
|
return $frag;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -11,4 +11,5 @@
|
|||||||
|
|
||||||
return [
|
return [
|
||||||
'app\\command\\ReferenceCheckMqConsume',
|
'app\\command\\ReferenceCheckMqConsume',
|
||||||
|
'app\\command\\ReferenceRelevanceMqConsume',
|
||||||
];
|
];
|
||||||
|
|||||||
@@ -96,6 +96,68 @@ class PubmedService
|
|||||||
return $info;
|
return $info;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 按书目信息检索 PubMed(标题 + 第一作者 + 年份)
|
||||||
|
*/
|
||||||
|
public function searchByBibliographic($title, $author = '', $year = ''): ?array
|
||||||
|
{
|
||||||
|
$title = trim((string)$title);
|
||||||
|
if ($title === '') {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
$terms = ['(' . $this->quoteTerm($title) . '[Title])'];
|
||||||
|
$author = trim((string)$author);
|
||||||
|
if ($author !== '') {
|
||||||
|
$parts = preg_split('/[,;]/', $author);
|
||||||
|
$first = trim((string)($parts[0] ?? ''));
|
||||||
|
if ($first !== '') {
|
||||||
|
$terms[] = '(' . $this->quoteTerm($first) . '[Author])';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$year = trim((string)$year);
|
||||||
|
if ($year !== '' && preg_match('/^(19|20)\d{2}$/', $year)) {
|
||||||
|
$terms[] = '(' . $year . '[pdat])';
|
||||||
|
}
|
||||||
|
|
||||||
|
$pmid = $this->esearch(implode(' AND ', $terms));
|
||||||
|
if (!$pmid) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
$info = $this->fetchByPmid($pmid);
|
||||||
|
if (!$info) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
$info['pmid'] = $pmid;
|
||||||
|
$info['doi'] = $this->extractDoiFromPmidRecord($pmid);
|
||||||
|
return $info;
|
||||||
|
}
|
||||||
|
|
||||||
|
private function quoteTerm($text)
|
||||||
|
{
|
||||||
|
return str_replace('"', '', trim((string)$text));
|
||||||
|
}
|
||||||
|
|
||||||
|
private function extractDoiFromPmidRecord($pmid)
|
||||||
|
{
|
||||||
|
$url = $this->base . 'efetch.fcgi?' . http_build_query([
|
||||||
|
'db' => 'pubmed',
|
||||||
|
'id' => $pmid,
|
||||||
|
'retmode' => 'xml',
|
||||||
|
'tool' => $this->tool,
|
||||||
|
'email' => $this->email,
|
||||||
|
]);
|
||||||
|
$xml = $this->httpGet($url);
|
||||||
|
if ($xml === '') {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
if (preg_match('/<ArticleId IdType="doi">([^<]+)<\/ArticleId>/i', $xml, $m)) {
|
||||||
|
return trim($m[1]);
|
||||||
|
}
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
// ----------------- Internals -----------------
|
// ----------------- Internals -----------------
|
||||||
|
|
||||||
private function esearch(string $term): ?string
|
private function esearch(string $term): ?string
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -21,4 +21,10 @@ class RabbitMqConfig
|
|||||||
$rc = self::get('reference_check', []);
|
$rc = self::get('reference_check', []);
|
||||||
return is_array($rc) ? $rc : [];
|
return is_array($rc) ? $rc : [];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static function referenceRelevance()
|
||||||
|
{
|
||||||
|
$rc = self::get('reference_relevance', []);
|
||||||
|
return is_array($rc) ? $rc : [];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,6 +3,7 @@
|
|||||||
namespace app\common\mq;
|
namespace app\common\mq;
|
||||||
|
|
||||||
use think\Db;
|
use think\Db;
|
||||||
|
use app\common\DbReconnectHelper;
|
||||||
use app\common\ReferenceCheckService;
|
use app\common\ReferenceCheckService;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -25,6 +26,7 @@ class ReferenceCheckArticleWorker
|
|||||||
|
|
||||||
public function handleMessage(array $payload)
|
public function handleMessage(array $payload)
|
||||||
{
|
{
|
||||||
|
DbReconnectHelper::ensure();
|
||||||
$pArticleId = intval(isset($payload['p_article_id']) ? $payload['p_article_id'] : 0);
|
$pArticleId = intval(isset($payload['p_article_id']) ? $payload['p_article_id'] : 0);
|
||||||
$batchId = intval(isset($payload['batch_id']) ? $payload['batch_id'] : 0);
|
$batchId = intval(isset($payload['batch_id']) ? $payload['batch_id'] : 0);
|
||||||
if ($pArticleId <= 0 || $batchId <= 0) {
|
if ($pArticleId <= 0 || $batchId <= 0) {
|
||||||
@@ -115,6 +117,7 @@ class ReferenceCheckArticleWorker
|
|||||||
*/
|
*/
|
||||||
private function processOneRow($checkId, array $row)
|
private function processOneRow($checkId, array $row)
|
||||||
{
|
{
|
||||||
|
DbReconnectHelper::ensure();
|
||||||
$claimed = Db::name('article_reference_check_result')
|
$claimed = Db::name('article_reference_check_result')
|
||||||
->where('id', intval($checkId))
|
->where('id', intval($checkId))
|
||||||
->where('queue_status', ReferenceCheckService::QUEUE_PENDING)
|
->where('queue_status', ReferenceCheckService::QUEUE_PENDING)
|
||||||
@@ -134,6 +137,7 @@ class ReferenceCheckArticleWorker
|
|||||||
return 'ok';
|
return 'ok';
|
||||||
} catch (\Exception $e) {
|
} catch (\Exception $e) {
|
||||||
$this->svc->log('ReferenceCheckArticleWorker check_id=' . $checkId . ' err=' . $e->getMessage());
|
$this->svc->log('ReferenceCheckArticleWorker check_id=' . $checkId . ' err=' . $e->getMessage());
|
||||||
|
DbReconnectHelper::ensure();
|
||||||
if ($retryCount < ReferenceCheckService::QUEUE_MAX_RETRY) {
|
if ($retryCount < ReferenceCheckService::QUEUE_MAX_RETRY) {
|
||||||
$this->svc->markQueueRuntime($checkId, ReferenceCheckService::QUEUE_PENDING, $retryCount + 1);
|
$this->svc->markQueueRuntime($checkId, ReferenceCheckService::QUEUE_PENDING, $retryCount + 1);
|
||||||
return $this->processOneRow($checkId, array_merge($row, ['retry_count' => $retryCount + 1]));
|
return $this->processOneRow($checkId, array_merge($row, ['retry_count' => $retryCount + 1]));
|
||||||
|
|||||||
@@ -28,18 +28,17 @@ class LLMService
|
|||||||
* @param string $contextText 正文引用处句子
|
* @param string $contextText 正文引用处句子
|
||||||
* @param string $referText 参考文献条目(或 refer 格式化文本)
|
* @param string $referText 参考文献条目(或 refer 格式化文本)
|
||||||
* @param bool $isAgain 是否为 DOI 二次复核
|
* @param bool $isAgain 是否为 DOI 二次复核
|
||||||
* @param string|null $doiBlock 可选:系统抓取到的 DOI 真实文献内容(仅二次复核使用)
|
* @param string|null $doiBlock 可选:系统抓取到的 DOI 真实文献内容(仅二次复核使用)
|
||||||
|
* @param string $citeGroupRefs 引用文献组,如 1,2 或 4,5,6
|
||||||
|
* @param string $localContext 本引用位置附近上下文(可选)
|
||||||
|
* @return array{results:array,request_failed?:bool}
|
||||||
*/
|
*/
|
||||||
public function checkReference($contextText, $referText, $isAgain = false, $doiBlock = null)
|
public function checkReference($contextText, $referText, $isAgain = false, $doiBlock = null, $citeGroupRefs = '', $localContext = '')
|
||||||
{
|
{
|
||||||
// request_failed=true 表示"LLM 通讯/解析层面的失败"(可重试,区别于业务上的"未命中");
|
|
||||||
// 上游 runReferenceCheckOnce 会据此把 DB.status 置为 3(失败) 并抛异常触发 MQ worker 重试
|
|
||||||
$fallback = [
|
$fallback = [
|
||||||
'can_support' => false,
|
'results' => [],
|
||||||
'is_match' => false,
|
|
||||||
'confidence' => 0.0,
|
|
||||||
'reason' => 'LLM not configured or request failed',
|
|
||||||
'request_failed' => true,
|
'request_failed' => true,
|
||||||
|
'reason' => 'LLM not configured or request failed',
|
||||||
];
|
];
|
||||||
if ($this->url === '' || $this->model === '') {
|
if ($this->url === '' || $this->model === '') {
|
||||||
\think\Log::warning('ReferenceCheck LLM: url or model not configured');
|
\think\Log::warning('ReferenceCheck LLM: url or model not configured');
|
||||||
@@ -47,15 +46,16 @@ class LLMService
|
|||||||
}
|
}
|
||||||
|
|
||||||
$contextText = trim($contextText);
|
$contextText = trim($contextText);
|
||||||
|
\think\Log::info('llm checkReference:' . $contextText);
|
||||||
$referText = trim($referText);
|
$referText = trim($referText);
|
||||||
|
\think\Log::info('llm referText:' . $referText);
|
||||||
$doiBlock = trim((string)$doiBlock);
|
$doiBlock = trim((string)$doiBlock);
|
||||||
|
$citeGroupRefs = trim((string)$citeGroupRefs);
|
||||||
|
$localContext = trim((string)$localContext);
|
||||||
if ($contextText === '' || $referText === '') {
|
if ($contextText === '' || $referText === '') {
|
||||||
// 空文本是入参问题,不是 LLM 故障,不需要重试
|
|
||||||
return [
|
return [
|
||||||
'can_support' => false,
|
'results' => [],
|
||||||
'is_match' => false,
|
'reason' => 'Empty citation context or reference text',
|
||||||
'confidence' => 0.0,
|
|
||||||
'reason' => 'Empty citation context or reference text',
|
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -63,27 +63,30 @@ class LLMService
|
|||||||
if (mb_strlen($contextText) > $maxContextLen) {
|
if (mb_strlen($contextText) > $maxContextLen) {
|
||||||
$contextText = mb_substr($contextText, 0, $maxContextLen);
|
$contextText = mb_substr($contextText, 0, $maxContextLen);
|
||||||
}
|
}
|
||||||
if (mb_strlen($referText) > 4000) {
|
if (mb_strlen($localContext) > 3000) {
|
||||||
$referText = mb_substr($referText, 0, 4000);
|
$localContext = mb_substr($localContext, 0, 3000);
|
||||||
}
|
}
|
||||||
if (mb_strlen($doiBlock) > 4000) {
|
if (mb_strlen($referText) > 6000) {
|
||||||
$doiBlock = mb_substr($doiBlock, 0, 4000);
|
$referText = mb_substr($referText, 0, 6000);
|
||||||
|
}
|
||||||
|
if (mb_strlen($doiBlock) > 8000) {
|
||||||
|
$doiBlock = mb_substr($doiBlock, 0, 8000);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($isAgain) {
|
if ($isAgain) {
|
||||||
$system = $this->buildReferenceCheckSecondPassPrompt();
|
$system = $this->buildReferenceCheckSecondPassPrompt();
|
||||||
$user = $this->buildReferenceCheckSecondPassUserPrompt($contextText, $referText, $doiBlock);
|
$user = $this->buildReferenceCheckSecondPassUserPrompt($contextText, $referText, $doiBlock, $citeGroupRefs, $localContext);
|
||||||
} else {
|
} else {
|
||||||
$system = $this->buildReferenceCheckFirstPassPrompt();
|
$system = $this->buildReferenceCheckFirstPassPrompt();
|
||||||
$user = $this->buildReferenceCheckFirstPassUserPrompt($contextText, $referText);
|
$user = $this->buildReferenceCheckFirstPassUserPrompt($contextText, $referText, $citeGroupRefs, $localContext, $doiBlock);
|
||||||
}
|
}
|
||||||
|
|
||||||
\think\Log::info('ReferenceCheck system head: ' . mb_substr($system, 0, 200));
|
// \think\Log::info('ReferenceCheck system head: ' . mb_substr($system, 0, 200));
|
||||||
\think\Log::info('ReferenceCheck user head: ' . mb_substr($user, 0, 600));
|
// \think\Log::info('ReferenceCheck user head: ' . mb_substr($user, 0, 600));
|
||||||
$payload = [
|
$payload = [
|
||||||
'model' => $this->model,
|
'model' => $this->model,
|
||||||
'temperature' => 0,
|
'temperature' => 0,
|
||||||
'messages' => [
|
'messages' => [
|
||||||
['role' => 'system', 'content' => $system],
|
['role' => 'system', 'content' => $system],
|
||||||
['role' => 'user', 'content' => $user],
|
['role' => 'user', 'content' => $user],
|
||||||
],
|
],
|
||||||
@@ -101,23 +104,14 @@ class LLMService
|
|||||||
return $fallback;
|
return $fallback;
|
||||||
}
|
}
|
||||||
|
|
||||||
$canSupport = $this->parseCanSupportFromParsed($parsed);
|
$results = $this->parseReferenceCheckResultsFromParsed($parsed, $citeGroupRefs, $localContext, $doiBlock);
|
||||||
$confidence = $this->snapReferenceCheckConfidence(
|
if (empty($results)) {
|
||||||
$this->normalizeConfidence(isset($parsed['confidence']) ? $parsed['confidence'] : 0),
|
\think\Log::warning('ReferenceCheck LLM: empty results array');
|
||||||
$canSupport
|
return $fallback;
|
||||||
);
|
}
|
||||||
$reason = $this->cleanReason((string)(isset($parsed['reason']) ? $parsed['reason'] : ''));
|
|
||||||
\think\Log::info(
|
\think\Log::info($results);
|
||||||
'ReferenceCheck result: can_support=' . ($canSupport ? '1' : '0')
|
return ['results' => $results];
|
||||||
. ', confidence=' . $confidence
|
|
||||||
. ', reason=' . $reason
|
|
||||||
);
|
|
||||||
return [
|
|
||||||
'can_support' => $canSupport,
|
|
||||||
'is_match' => $canSupport,
|
|
||||||
'confidence' => $confidence,
|
|
||||||
'reason' => $reason,
|
|
||||||
];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -174,83 +168,541 @@ class LLMService
|
|||||||
$s = strtolower(trim((string)$value));
|
$s = strtolower(trim((string)$value));
|
||||||
return in_array($s, ['1', 'true', 'yes', 'support', 'supported'], true);
|
return in_array($s, ['1', 'true', 'yes', 'support', 'supported'], true);
|
||||||
}
|
}
|
||||||
|
private function bulidReferenceCheckFirstPassPrompt(){
|
||||||
|
return <<<'PROMPT'
|
||||||
|
你是一名护理、医学与科研期刊的资深文献编辑,专门校对「正文引用句」与「对应参考文献条目」是否匹配。
|
||||||
|
|
||||||
/** 第一次校对:书目条目 vs 正文全文 */
|
你的目标是严格识别错引、张冠李戴、方法不符、对象不符、结论不成立的问题。
|
||||||
|
|
||||||
|
宁可少判 true,也不要漏掉错引。
|
||||||
|
|
||||||
|
你只能依据用户提供的内容判断:
|
||||||
|
1. 正文引用句
|
||||||
|
2. 当前对应参考文献条目
|
||||||
|
|
||||||
|
禁止假设已阅读全文。
|
||||||
|
禁止联网。
|
||||||
|
禁止脑补文献内容。
|
||||||
|
禁止根据学科常识推断研究结果。
|
||||||
|
|
||||||
|
====================
|
||||||
|
【核心任务】
|
||||||
|
|
||||||
|
判断:
|
||||||
|
|
||||||
|
正文在该引用位置表达的核心观点、结论、方法、数据、定义、模型、研究发现、指南依据等,
|
||||||
|
|
||||||
|
是否能够被该条参考文献合理支撑。
|
||||||
|
|
||||||
|
你判断的是:
|
||||||
|
|
||||||
|
“引用是否成立”
|
||||||
|
|
||||||
|
不是:
|
||||||
|
|
||||||
|
“正文是否正确”。
|
||||||
|
|
||||||
|
====================
|
||||||
|
【总原则(最高优先级)】
|
||||||
|
|
||||||
|
采用严格审稿标准:
|
||||||
|
|
||||||
|
边界不清时,一律判 false。
|
||||||
|
|
||||||
|
宁可误杀(人工复核),不要漏掉错引。
|
||||||
|
|
||||||
|
同领域 ≠ 匹配。
|
||||||
|
|
||||||
|
同关键词 ≠ 匹配。
|
||||||
|
|
||||||
|
相关 ≠ 能支撑。
|
||||||
|
|
||||||
|
====================
|
||||||
|
【强制规则】
|
||||||
|
|
||||||
|
1. 严禁关键词硬匹配
|
||||||
|
|
||||||
|
不能因为出现:
|
||||||
|
患者、护理、治疗、研究、模型、算法、深度学习、机器学习、焦虑、效果
|
||||||
|
|
||||||
|
等泛化词汇就判定匹配。
|
||||||
|
|
||||||
|
必须看:
|
||||||
|
|
||||||
|
- 核心对象
|
||||||
|
- 研究问题
|
||||||
|
- 方法
|
||||||
|
- 场景
|
||||||
|
- 结局指标
|
||||||
|
- 核心论点
|
||||||
|
|
||||||
|
是否一致。
|
||||||
|
|
||||||
|
====================
|
||||||
|
2. 方法学必须严格一致(极重要)
|
||||||
|
|
||||||
|
若正文明确提到:
|
||||||
|
|
||||||
|
- 算法
|
||||||
|
- 模型
|
||||||
|
- 聚类方法
|
||||||
|
- 深度学习架构
|
||||||
|
- 统计方法
|
||||||
|
- 数学模型
|
||||||
|
- 评价指标
|
||||||
|
|
||||||
|
必须要求文献与其存在明确关联。
|
||||||
|
|
||||||
|
例如:
|
||||||
|
|
||||||
|
不匹配:
|
||||||
|
- fuzzy clustering ≠ deep learning
|
||||||
|
- CNN ≠ LSTM
|
||||||
|
- random forest ≠ SVM
|
||||||
|
- 聚类 ≠ 分类
|
||||||
|
- 特征选择 ≠ 分类预测
|
||||||
|
- 风险因素分析 ≠ 干预研究
|
||||||
|
|
||||||
|
仅属于同一“大领域(AI/ML)”
|
||||||
|
不能判定匹配。
|
||||||
|
|
||||||
|
若方法体系不同:
|
||||||
|
|
||||||
|
优先判 false + 0.10。
|
||||||
|
|
||||||
|
====================
|
||||||
|
3. 医学护理引用严格一致
|
||||||
|
|
||||||
|
若正文涉及:
|
||||||
|
|
||||||
|
- 疾病
|
||||||
|
- 人群
|
||||||
|
- 护理场景
|
||||||
|
- 干预措施
|
||||||
|
- 结局指标
|
||||||
|
|
||||||
|
必须基本一致。
|
||||||
|
|
||||||
|
例如:
|
||||||
|
|
||||||
|
不匹配:
|
||||||
|
- ICU ≠ 普通病房
|
||||||
|
- 老年人 ≠ 儿童
|
||||||
|
- 糖尿病 ≠ 高血压
|
||||||
|
- 心理护理 ≠ 运动干预
|
||||||
|
- 焦虑改善 ≠ 生存率提高
|
||||||
|
|
||||||
|
====================
|
||||||
|
4. 强结论必须强证据
|
||||||
|
|
||||||
|
正文若出现:
|
||||||
|
|
||||||
|
- 显著改善
|
||||||
|
- 明显降低
|
||||||
|
- 证实
|
||||||
|
- 优于
|
||||||
|
- 有效预测
|
||||||
|
- 危险因素
|
||||||
|
- 因果关系
|
||||||
|
|
||||||
|
文献必须能合理支撑该强结论。
|
||||||
|
|
||||||
|
仅“应用研究”“相关研究”“观察研究”
|
||||||
|
不能自动支持强结论。
|
||||||
|
|
||||||
|
否则 false。
|
||||||
|
|
||||||
|
====================
|
||||||
|
5. 特定证据类型必须一致
|
||||||
|
|
||||||
|
正文若明确写:
|
||||||
|
|
||||||
|
- RCT/randomized trial
|
||||||
|
- Meta-analysis
|
||||||
|
- Guideline
|
||||||
|
- Systematic review
|
||||||
|
- Expert consensus
|
||||||
|
|
||||||
|
而参考文献类型明显不符:
|
||||||
|
|
||||||
|
直接 false。
|
||||||
|
|
||||||
|
====================
|
||||||
|
6. 信息不足从严
|
||||||
|
|
||||||
|
若参考文献只有:
|
||||||
|
|
||||||
|
作者 + 年份
|
||||||
|
|
||||||
|
或信息过少,
|
||||||
|
|
||||||
|
无法建立明确关联:
|
||||||
|
|
||||||
|
false + 0.30
|
||||||
|
|
||||||
|
====================
|
||||||
|
【判定逻辑】
|
||||||
|
|
||||||
|
只有同时满足以下条件,才能 true:
|
||||||
|
|
||||||
|
1. 主题一致
|
||||||
|
2. 核心对象一致
|
||||||
|
3. 核心论点一致
|
||||||
|
4. 方法/研究方向一致
|
||||||
|
5. 无明显错引风险
|
||||||
|
|
||||||
|
任意一点明显不符:
|
||||||
|
|
||||||
|
false。
|
||||||
|
|
||||||
|
====================
|
||||||
|
【评分(只能四选一)】
|
||||||
|
|
||||||
|
只能输出:
|
||||||
|
|
||||||
|
0.90
|
||||||
|
0.75
|
||||||
|
0.30
|
||||||
|
0.10
|
||||||
|
|
||||||
|
禁止任何其他分数。
|
||||||
|
|
||||||
|
评分规则:
|
||||||
|
|
||||||
|
0.90
|
||||||
|
明确匹配:
|
||||||
|
主题、对象、方法、核心论点均明显一致。
|
||||||
|
|
||||||
|
0.75
|
||||||
|
基本匹配:
|
||||||
|
整体支撑成立,但存在轻微概括或小范围表述差异。
|
||||||
|
|
||||||
|
0.30
|
||||||
|
存疑:
|
||||||
|
同领域但支撑不足;
|
||||||
|
信息不足;
|
||||||
|
需人工复核。
|
||||||
|
|
||||||
|
0.10
|
||||||
|
明确错引:
|
||||||
|
主题、对象、方法或核心论点明显不符。
|
||||||
|
|
||||||
|
硬规则:
|
||||||
|
|
||||||
|
is_match=true
|
||||||
|
只能:
|
||||||
|
0.75 或 0.90
|
||||||
|
|
||||||
|
is_match=false
|
||||||
|
只能:
|
||||||
|
0.10 或 0.30
|
||||||
|
|
||||||
|
====================
|
||||||
|
【reason 要求】
|
||||||
|
|
||||||
|
仅说明:
|
||||||
|
|
||||||
|
1. 是否主题一致;
|
||||||
|
2. 核心论点/方法是否能支撑。
|
||||||
|
|
||||||
|
禁止模糊措辞:
|
||||||
|
“可能”
|
||||||
|
“看起来”
|
||||||
|
“应该”
|
||||||
|
“疑似”
|
||||||
|
|
||||||
|
长度:
|
||||||
|
|
||||||
|
20~60字。
|
||||||
|
|
||||||
|
====================
|
||||||
|
【输出要求】
|
||||||
|
|
||||||
|
仅输出一行 minified JSON。
|
||||||
|
|
||||||
|
禁止 markdown。
|
||||||
|
禁止解释。
|
||||||
|
禁止换行。
|
||||||
|
禁止任何额外内容。
|
||||||
|
|
||||||
|
格式:
|
||||||
|
|
||||||
|
{"is_match":true|false,"confidence":0.10|0.30|0.75|0.90,"reason":"简体中文说明"}
|
||||||
|
PROMPT;
|
||||||
|
|
||||||
|
}
|
||||||
|
/** 第一次校对:参考文献真实性与支撑力度 */
|
||||||
private function buildReferenceCheckFirstPassPrompt()
|
private function buildReferenceCheckFirstPassPrompt()
|
||||||
{
|
{
|
||||||
return <<<'PROMPT'
|
return $this->buildReferenceCheckSupportSystemPrompt(false);
|
||||||
你是文献引用校对助手。判断【正文全文】与【参考文献书目】是否相关、能否用于支撑正文中的引用。
|
|
||||||
|
|
||||||
【核心原则:从宽判断,避免误杀】
|
|
||||||
默认倾向 can_support=true。只要文献与正文不是「风马牛不相及」,即判为相关、能支撑。
|
|
||||||
不要求变量一致、不要求结论逐条对应、不要求研究设计相同。
|
|
||||||
|
|
||||||
【仅当以下情况才判 can_support=false(与正文明显无关)】
|
|
||||||
- 学科/主题完全无关(如正文讲深度学习聚类,文献是糖尿病步态检测)。
|
|
||||||
- 明显张冠李戴(正文断言 A 疗法的效果,文献研究的是完全不同的 B 问题且无关联)。
|
|
||||||
- 文献条目与正文讨论的对象/场景毫无交集,且无法作背景或理论引用。
|
|
||||||
|
|
||||||
【以下情况均应 can_support=true】
|
|
||||||
- 同一大领域或相邻方向(如护理、心理、管理、医学、统计、AI 等相近子领域)。
|
|
||||||
- 可作背景文献、综述性引用、理论或方法的一般性依据。
|
|
||||||
- 表述略宽、略有概括、变量名不完全一致,但大方向说得通。
|
|
||||||
|
|
||||||
【confidence 固定档位(禁止其它小数)】
|
|
||||||
can_support=true:0.65(有关联但较泛)/ 0.78 / 0.85 / 0.92 / 0.98(非常确定相关)
|
|
||||||
can_support=false:0.15(明确风马牛不相及)/ 0.25 / 0.35 / 0.45(仅当实在无法建立任何合理关联)
|
|
||||||
|
|
||||||
【输出】仅一行 minified JSON,无 markdown:
|
|
||||||
{"can_support":true|false,"is_match":true|false,"confidence":0.15|0.25|0.35|0.45|0.65|0.78|0.85|0.92|0.98,"reason":"30-80字简体中文"}
|
|
||||||
is_match 必须与 can_support 相同。
|
|
||||||
PROMPT;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private function buildReferenceCheckFirstPassUserPrompt($contextText, $referText)
|
private function buildReferenceCheckSupportSystemPrompt($isSecondPass = false)
|
||||||
{
|
{
|
||||||
return "【正文全文 article_main.content】\n" . $contextText
|
$prompt = <<<'PROMPT'
|
||||||
. "\n\n【参考文献书目 refer_text】\n" . $referText
|
你是一名护理、医学、生物医学与科研期刊的资深学术编辑,正在执行“参考文献真实性与支撑力度校对”。
|
||||||
. "\n\n请从宽判断:文献与正文非风马牛不相即可判 can_support=true,只返回 JSON。";
|
|
||||||
|
你的任务不是判断“主题是否相关”,而是判断:
|
||||||
|
【稿件正文中某段被引用内容】是否真的能被【对应编号的参考文献】直接或充分支撑。
|
||||||
|
|
||||||
|
你必须严格基于用户提供的材料作出判断,不得凭常识、不得脑补、不得假设参考文献中“可能写过但未提供”的内容。
|
||||||
|
|
||||||
|
==================================================
|
||||||
|
【一、任务目标】
|
||||||
|
你需要判断:
|
||||||
|
“正文引用位置的核心论点、结论、背景陈述、机制解释、疗效描述、数据表达或因果表述,
|
||||||
|
是否能被对应参考文献真实支持。”
|
||||||
|
|
||||||
|
这里的“支持”不是指“文献主题相关”或“研究领域接近”,而是指:
|
||||||
|
参考文献中确实包含足以支持正文该处表述的内容。
|
||||||
|
|
||||||
|
==================================================
|
||||||
|
【二、输出原则:结果必须直接对应数据库行】
|
||||||
|
|
||||||
|
你输出的结果将直接写入数据库表 t_article_reference_check_result。
|
||||||
|
|
||||||
|
因此:
|
||||||
|
## 输出必须是 results 数组,数组中的每一个对象对应数据库中的一行,也就是“一个引用位置中的一条参考文献结果”。
|
||||||
|
|
||||||
|
换句话说:
|
||||||
|
- 如果某个引用位置是 [3],则输出 1 条 result(reference_no=3)
|
||||||
|
- 如果某个引用位置是 [1,2],则输出 2 条 result:
|
||||||
|
- 一条对应 reference_no=1
|
||||||
|
- 一条对应 reference_no=2
|
||||||
|
|
||||||
|
每条 result 都必须给出该参考文献“单独”对正文引用句的支撑判断。
|
||||||
|
如果该引用位置是联合引用(citation group 中有多篇文献),则除了单条判断外,还必须给出该引用组整体的联合判断(combined_* 字段)。
|
||||||
|
|
||||||
|
==================================================
|
||||||
|
【三、最重要原则:只看“是否支撑正文核心断言”,不是看“主题是否沾边”】
|
||||||
|
|
||||||
|
以下情况不能判为强支撑:
|
||||||
|
1. 参考文献只和主题大致相关,但没有明确支持正文中的关键表述
|
||||||
|
2. 正文说的是“疗效提升/死亡率下降/全球高发/耐药/多通路机制”等明确论点,而文献只是在背景里泛泛提到疾病
|
||||||
|
3. 正文是多层复合句,文献只支撑其中一小部分
|
||||||
|
4. 正文有因果、比较、趋势、机制、疗效强度等强表述,而文献没有明确证据
|
||||||
|
5. 文献是基础机制研究,但正文引用它来支撑宏观流行病学、临床治疗现状或指南式结论
|
||||||
|
6. 文献可以“推测支持”但不是“直接/明确支持”
|
||||||
|
|
||||||
|
==================================================
|
||||||
|
【三b、多 claim 复合句 → 0.78 部分支撑(勿误降到 0.45)】
|
||||||
|
|
||||||
|
正文常为 2~4 个连续 claim 的复合句。须逐 claim 比对后综合给分:
|
||||||
|
|
||||||
|
- 若文献(含 DOI 摘要)能**明确支撑多数关键概念**(如遗传异质性/多基因改变、多 survival pathway 并存、耐药或治疗挑战),
|
||||||
|
但**未逐字写出**正文完整因果链(如「异质性→多通路→单靶点疗效下降」),
|
||||||
|
→ 应判 **partial_support**,confidence 通常 **0.78**(边界情况 0.65),**不得**仅因文献主标题聚焦某化合物/干预就降到 0.45。
|
||||||
|
|
||||||
|
- 0.45 仅用于:文献与 claim 方向明显不符、仅同病沾边、或几乎无可用证据。
|
||||||
|
|
||||||
|
**校准样例(单条 [4],须接近此逻辑):**
|
||||||
|
|
||||||
|
引用句:
|
||||||
|
Furthermore, the genomic heterogeneity of colorectal cancer (CRC) presents additional difficulties because tumors frequently make use of several survival pathways at once, which reduces the efficacy of single-target treatments [4].
|
||||||
|
|
||||||
|
文献4(Sheikhnia et al., thymoquinone CRC 机制综述):
|
||||||
|
- Claim1 遗传异质性/多基因改变:文献有 APC/KRAS/TP53、MSI/CIN 等 → 支撑较强
|
||||||
|
- Claim2 多 survival pathway:文献列举 PI3K/Akt、Wnt、STAT3、NF-κB 等多通路 → 支撑较强
|
||||||
|
- Claim3 单靶点疗效下降:文献有 drug resistance/治疗挑战,但未直述因果链 → 部分支撑
|
||||||
|
- **输出**:can_support=1, confidence=**0.78**, support_role=supplementary_support(**不是 0.45**)
|
||||||
|
|
||||||
|
用户消息中若提供【DOI 真实文献内容】,**必须结合摘要判断**,不得仅凭书目标题给分。
|
||||||
|
|
||||||
|
==================================================
|
||||||
|
【四、评分规则】
|
||||||
|
|
||||||
|
你必须使用以下 8 个固定分值之一:
|
||||||
|
0.98 / 0.92 / 0.85 / 0.78 / 0.65 / 0.45 / 0.25 / 0.15
|
||||||
|
|
||||||
|
判定含义:
|
||||||
|
- 0.98 / 0.92 / 0.85 => 强支撑(strong_support)
|
||||||
|
- 0.78 / 0.65 => 部分支撑(partial_support)
|
||||||
|
- 0.45 / 0.25 => 支撑不足(insufficient_support)
|
||||||
|
- 0.15 => 不支撑(not_support)
|
||||||
|
|
||||||
|
can_support 取值规则:
|
||||||
|
- 若该文献/联合引文整体可判为 strong_support 或 partial_support,则 can_support = 1
|
||||||
|
- 若判为 insufficient_support 或 not_support,则 can_support = 0
|
||||||
|
|
||||||
|
==================================================
|
||||||
|
【五、单条文献结果如何判断】
|
||||||
|
|
||||||
|
对于每一条参考文献,你必须判断它“单独”能否支撑该引用位置的正文内容,并输出:
|
||||||
|
- can_support
|
||||||
|
- confidence
|
||||||
|
- reason
|
||||||
|
- support_role
|
||||||
|
|
||||||
|
其中:
|
||||||
|
### support_role 只能取以下值之一
|
||||||
|
- primary_support:该文献本身就是主要证据来源,能支撑引用句核心内容
|
||||||
|
- supplementary_support:能支撑部分重要内容,但不是主要来源
|
||||||
|
- minimal_support:只提供少量背景或边缘支撑
|
||||||
|
- no_meaningful_support:几乎不能支撑该引用句
|
||||||
|
|
||||||
|
### reason 的写法要求
|
||||||
|
必须使用中文,明确写出:
|
||||||
|
1. 这篇文献具体支撑正文的哪一部分
|
||||||
|
2. 哪些部分没有支撑到
|
||||||
|
3. 是否存在文献类型与引用用途不匹配的问题
|
||||||
|
4. 为什么给这个分值,而不是更高或更低
|
||||||
|
|
||||||
|
==================================================
|
||||||
|
【六、联合引用的判断规则】
|
||||||
|
|
||||||
|
当同一个引用位置包含多篇参考文献时(例如 [1,2] / [4,5,6]),除了逐条给单条结果外,还要额外判断:
|
||||||
|
“这些文献合起来,是否足以支撑该引用位置的正文内容?”
|
||||||
|
|
||||||
|
联合结论输出到:
|
||||||
|
- combined_can_support
|
||||||
|
- combined_confidence
|
||||||
|
- combined_reason
|
||||||
|
|
||||||
|
规则:
|
||||||
|
1. 联合评分不是单条评分平均值
|
||||||
|
2. 如果其中一篇文献已强支撑,其他文献只是补充,则联合评分可接近主支撑文献
|
||||||
|
3. 如果多篇文献分别覆盖不同部分,合起来能较完整支撑正文,则联合评分可以高于某些单条评分
|
||||||
|
4. 但如果最关键的核心断言没有被任何文献明确支撑,则联合评分不能虚高
|
||||||
|
5. 如果多篇文献都只是零散相关,需要大量推断才能拼出正文结论,则联合评分通常不应过高
|
||||||
|
|
||||||
|
==================================================
|
||||||
|
【七、单引文的 combined_* 字段处理规则】
|
||||||
|
|
||||||
|
即使某个引用位置只有 1 条参考文献,也仍然必须输出 combined_* 字段。
|
||||||
|
此时:
|
||||||
|
- combined_can_support = can_support
|
||||||
|
- combined_confidence = confidence
|
||||||
|
- combined_reason = “该引用位置仅包含单条文献,联合结论等同于该文献的单条结论。” 或等价表述
|
||||||
|
|
||||||
|
这样可以保证输出结构统一,便于数据库写入。
|
||||||
|
|
||||||
|
==================================================
|
||||||
|
【八、输出 JSON 结构】
|
||||||
|
|
||||||
|
你必须输出合法 JSON,且只能输出以下结构:
|
||||||
|
|
||||||
|
{
|
||||||
|
"results": [
|
||||||
|
{
|
||||||
|
"reference_no": 1,
|
||||||
|
"cite_group_refs": "1,2",
|
||||||
|
"can_support": 0,
|
||||||
|
"confidence": 0.65,
|
||||||
|
"reason": "中文,单条文献结论",
|
||||||
|
"support_role": "supplementary_support",
|
||||||
|
"combined_can_support": 1,
|
||||||
|
"combined_confidence": 0.85,
|
||||||
|
"combined_reason": "中文,联合引用整体结论"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
==================================================
|
||||||
|
【九、字段约束】
|
||||||
|
|
||||||
|
### 1)results 中每个对象都必须包含以下字段:
|
||||||
|
- reference_no
|
||||||
|
- cite_group_refs
|
||||||
|
- can_support
|
||||||
|
- confidence
|
||||||
|
- reason
|
||||||
|
- support_role
|
||||||
|
- combined_can_support
|
||||||
|
- combined_confidence
|
||||||
|
- combined_reason
|
||||||
|
|
||||||
|
### 2)reference_no
|
||||||
|
必须对应当前引用位置中的某一条参考文献编号。
|
||||||
|
|
||||||
|
### 3)cite_group_refs
|
||||||
|
必须是该引用位置的完整引文组,格式如:
|
||||||
|
- "3"
|
||||||
|
- "1,2"
|
||||||
|
- "4,5,6"
|
||||||
|
|
||||||
|
### 4)同一引用位置若包含多条参考文献,则必须输出多条 result
|
||||||
|
例如 cite_group_refs = "1,2" 时,必须输出:
|
||||||
|
- 一条 reference_no=1
|
||||||
|
- 一条 reference_no=2
|
||||||
|
|
||||||
|
### 5)同一引用位置下的 combined_* 必须一致
|
||||||
|
例如同属 "1,2" 的两条 result,它们的:
|
||||||
|
- combined_can_support
|
||||||
|
- combined_confidence
|
||||||
|
- combined_reason
|
||||||
|
必须完全一致。
|
||||||
|
|
||||||
|
==================================================
|
||||||
|
【十、禁止事项】
|
||||||
|
你绝对不能:
|
||||||
|
- 杜撰文献中不存在的结论
|
||||||
|
- 把“主题相关”当作“内容支撑”
|
||||||
|
- 因为是同一疾病就默认支持
|
||||||
|
- 输出 JSON 以外的任何内容
|
||||||
|
|
||||||
|
现在开始,读取用户提供的引用位置正文、参考文献信息和文献内容,输出结果。
|
||||||
|
PROMPT;
|
||||||
|
|
||||||
|
if ($isSecondPass) {
|
||||||
|
$prompt .= <<<'PROMPT'
|
||||||
|
|
||||||
|
|
||||||
|
==================================================
|
||||||
|
【二次校对补充(DOI 真实文献内容)】
|
||||||
|
用户消息中会提供【DOI 真实文献内容(PubMed/Crossref)】。
|
||||||
|
必须以 DOI 真实内容为准复核支撑力度;书目信息与 DOI 冲突时以 DOI 为准。
|
||||||
|
仍须输出完整 results 数组,逐条给出单文献判断与联合判断。
|
||||||
|
PROMPT;
|
||||||
|
}
|
||||||
|
|
||||||
|
return $prompt;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** 第二次校对:Crossref 摘要(Refer_doi) */
|
private function buildReferenceCheckFirstPassUserPrompt($contextText, $referText, $citeGroupRefs = '', $localContext = '', $doiBlock = '')
|
||||||
|
{
|
||||||
|
return $this->buildReferenceCheckSupportUserPrompt($contextText, $referText, $citeGroupRefs, $localContext, $doiBlock);
|
||||||
|
}
|
||||||
|
|
||||||
|
private function buildReferenceCheckSupportUserPrompt($contextText, $referText, $citeGroupRefs, $localContext, $doiBlock)
|
||||||
|
{
|
||||||
|
$citeGroupRefs = trim((string)$citeGroupRefs);
|
||||||
|
$localContext = trim((string)$localContext);
|
||||||
|
$doiBlock = trim((string)$doiBlock);
|
||||||
|
|
||||||
|
$parts = [
|
||||||
|
"【正文节 t_article_main】\n" . $contextText,
|
||||||
|
];
|
||||||
|
if ($citeGroupRefs !== '') {
|
||||||
|
$mode = strpos($citeGroupRefs, ',') !== false ? '联合引用' : '单独引用';
|
||||||
|
$parts[] = "【引用文献组 cite_group_refs】{$citeGroupRefs}({$mode})";
|
||||||
|
}
|
||||||
|
if ($localContext !== '') {
|
||||||
|
$parts[] = "【本引用位置附近上下文】\n" . $localContext;
|
||||||
|
}
|
||||||
|
$parts[] = "【参考文献书目(按编号列出)】\n" . $referText;
|
||||||
|
if ($doiBlock !== '') {
|
||||||
|
$parts[] = "【DOI 真实文献内容(PubMed/Crossref,一轮校对已提供)】\n" . $doiBlock;
|
||||||
|
}
|
||||||
|
$parts[] = '请严格按 system 要求输出 results 数组 JSON,每条 result 对应一个 reference_no,并包含 combined_* 字段。';
|
||||||
|
|
||||||
|
return implode("\n\n", $parts);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** 第二次校对:DOI 真实文献内容复核 */
|
||||||
private function buildReferenceCheckSecondPassPrompt()
|
private function buildReferenceCheckSecondPassPrompt()
|
||||||
{
|
{
|
||||||
return <<<'PROMPT'
|
return $this->buildReferenceCheckSupportSystemPrompt(true);
|
||||||
你是文献引用二次校对助手。已根据 Refer_doi 从 Crossref(https://api.crossref.org/works/)获取摘要,请结合【正文全文】复核该文献是否相关。
|
|
||||||
|
|
||||||
【核心原则:与第一次相同,从宽判断】
|
|
||||||
默认倾向 can_support=true。只要 Crossref 摘要(或书目)与正文不是风马牛不相及,即判相关、能支撑。
|
|
||||||
以【Crossref 摘要】为准;摘要与书目冲突时以摘要为准。
|
|
||||||
|
|
||||||
【仅当以下情况才判 can_support=false】
|
|
||||||
- 摘要显示的研究主题/对象/方法与正文讨论内容完全风马牛不相及。
|
|
||||||
- 典型风马牛不相及、张冠李戴,且无法解释为背景或泛化引用。
|
|
||||||
|
|
||||||
【以下情况均应 can_support=true】
|
|
||||||
- 摘要与正文属同领域或相近方向,能作背景、理论或方向性支撑。
|
|
||||||
- 细节不完全一致,但不存在明显矛盾。
|
|
||||||
|
|
||||||
【无 Crossref 摘要时】
|
|
||||||
结合 refer_text 从宽判断;非明显无关仍可 can_support=true,confidence 建议 0.65。
|
|
||||||
|
|
||||||
【confidence 固定档位(禁止其它小数)】
|
|
||||||
can_support=true:0.65 / 0.78 / 0.85 / 0.92 / 0.98
|
|
||||||
can_support=false:0.15 / 0.25 / 0.35 / 0.45
|
|
||||||
|
|
||||||
【输出】仅一行 minified JSON:
|
|
||||||
{"can_support":true|false,"is_match":true|false,"confidence":0.15|0.25|0.35|0.45|0.65|0.78|0.85|0.92|0.98,"reason":"30-80字简体中文"}
|
|
||||||
is_match 必须与 can_support 相同。
|
|
||||||
PROMPT;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private function buildReferenceCheckSecondPassUserPrompt($contextText, $referText, $doiBlock)
|
private function buildReferenceCheckSecondPassUserPrompt($contextText, $referText, $doiBlock, $citeGroupRefs = '', $localContext = '')
|
||||||
{
|
{
|
||||||
$doiBlock = trim((string)$doiBlock);
|
return $this->buildReferenceCheckSupportUserPrompt(
|
||||||
return "【正文全文 article_main.content】\n" . $contextText
|
$contextText,
|
||||||
. "\n\n【参考文献书目 refer_text】\n" . $referText
|
$referText,
|
||||||
. "\n\n【Crossref 摘要】(Refer_doi → api.crossref.org/works/)\n"
|
$citeGroupRefs,
|
||||||
. ($doiBlock !== '' ? $doiBlock : '(未获取到摘要,请结合 refer_text 从宽判断)')
|
$localContext,
|
||||||
. "\n\n文献与正文非风马牛不相即可判 can_support=true,只返回 JSON。";
|
$doiBlock !== '' ? $doiBlock : '(未获取到 DOI 摘要或元数据,请结合书目条目从严判断)'
|
||||||
|
);
|
||||||
}
|
}
|
||||||
private function buildReferenceCheckSystemPrompt3()
|
private function buildReferenceCheckSystemPrompt3()
|
||||||
{
|
{
|
||||||
@@ -1169,13 +1621,174 @@ PROMPT;
|
|||||||
|
|
||||||
private function buildReferenceCheckRecheckUserPrompt($contextText, $referText, $doiBlock)
|
private function buildReferenceCheckRecheckUserPrompt($contextText, $referText, $doiBlock)
|
||||||
{
|
{
|
||||||
return $this->buildReferenceCheckSecondPassUserPrompt($contextText, $referText, $doiBlock);
|
return $this->buildReferenceCheckSecondPassUserPrompt($contextText, $referText, $doiBlock, '', '');
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 与 buildReferenceCheckSystemPrompt3 一致的 confidence 档位
|
* @return array<int, array>
|
||||||
*/
|
*/
|
||||||
private function getReferenceCheckConfidenceBands($isMatch)
|
private function parseReferenceCheckResultsFromParsed(array $parsed, $defaultCiteGroupRefs = '', $localContext = '', $doiBlock = '')
|
||||||
|
{
|
||||||
|
$rows = [];
|
||||||
|
if (isset($parsed['results']) && is_array($parsed['results'])) {
|
||||||
|
$rows = $parsed['results'];
|
||||||
|
} elseif (isset($parsed['reference_no']) || isset($parsed['confidence'])) {
|
||||||
|
$rows = [$parsed];
|
||||||
|
}
|
||||||
|
|
||||||
|
$normalized = [];
|
||||||
|
foreach ($rows as $item) {
|
||||||
|
if (!is_array($item)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
$refNo = intval(isset($item['reference_no']) ? $item['reference_no'] : 0);
|
||||||
|
if ($refNo <= 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
$confidence = $this->snapReferenceCheckConfidenceValue(
|
||||||
|
$this->normalizeConfidence(isset($item['confidence']) ? $item['confidence'] : 0)
|
||||||
|
);
|
||||||
|
$canSupport = $this->canSupportFromConfidence($confidence);
|
||||||
|
if (array_key_exists('can_support', $item)) {
|
||||||
|
$canSupport = $this->boolFromLlmValue($item['can_support']);
|
||||||
|
} elseif (array_key_exists('is_match', $item)) {
|
||||||
|
$canSupport = $this->boolFromLlmValue($item['is_match']);
|
||||||
|
}
|
||||||
|
|
||||||
|
$reason = $this->cleanReason((string)(isset($item['reason']) ? $item['reason'] : ''));
|
||||||
|
$supportRole = $this->normalizeSupportRole(isset($item['support_role']) ? $item['support_role'] : '');
|
||||||
|
list($confidence, $canSupport, $supportRole) = $this->applyMultiClaimPartialSupportFloor(
|
||||||
|
$localContext,
|
||||||
|
$doiBlock,
|
||||||
|
$confidence,
|
||||||
|
$canSupport,
|
||||||
|
$supportRole,
|
||||||
|
$reason
|
||||||
|
);
|
||||||
|
|
||||||
|
$combinedConfidence = $this->snapReferenceCheckConfidenceValue(
|
||||||
|
$this->normalizeConfidence(isset($item['combined_confidence']) ? $item['combined_confidence'] : $confidence)
|
||||||
|
);
|
||||||
|
$combinedCanSupport = $this->canSupportFromConfidence($combinedConfidence);
|
||||||
|
if (array_key_exists('combined_can_support', $item)) {
|
||||||
|
$combinedCanSupport = $this->boolFromLlmValue($item['combined_can_support']);
|
||||||
|
}
|
||||||
|
|
||||||
|
$citeGroupRefs = trim((string)(isset($item['cite_group_refs']) ? $item['cite_group_refs'] : $defaultCiteGroupRefs));
|
||||||
|
if ($citeGroupRefs === '' && $defaultCiteGroupRefs !== '') {
|
||||||
|
$citeGroupRefs = trim((string)$defaultCiteGroupRefs);
|
||||||
|
}
|
||||||
|
|
||||||
|
$normalized[] = [
|
||||||
|
'reference_no' => $refNo,
|
||||||
|
'cite_group_refs' => $citeGroupRefs,
|
||||||
|
'can_support' => $canSupport,
|
||||||
|
'is_match' => $canSupport,
|
||||||
|
'confidence' => $confidence,
|
||||||
|
'reason' => $reason,
|
||||||
|
'support_role' => $supportRole,
|
||||||
|
'combined_can_support' => $combinedCanSupport,
|
||||||
|
'combined_confidence' => $combinedConfidence,
|
||||||
|
'combined_reason' => $this->cleanReason((string)(isset($item['combined_reason']) ? $item['combined_reason'] : '')),
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
return $normalized;
|
||||||
|
}
|
||||||
|
|
||||||
|
private function normalizeSupportRole($role)
|
||||||
|
{
|
||||||
|
$role = strtolower(trim((string)$role));
|
||||||
|
$allowed = [
|
||||||
|
'primary_support',
|
||||||
|
'supplementary_support',
|
||||||
|
'minimal_support',
|
||||||
|
'no_meaningful_support',
|
||||||
|
];
|
||||||
|
return in_array($role, $allowed, true) ? $role : 'no_meaningful_support';
|
||||||
|
}
|
||||||
|
|
||||||
|
private function canSupportFromConfidence($confidence)
|
||||||
|
{
|
||||||
|
return floatval($confidence) >= 0.65 - 0.001;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 多通路/异质性 claim + DOI 有多通路证据时,防止误打 0.45(应对齐 0.78 部分支撑)
|
||||||
|
*/
|
||||||
|
private function applyMultiClaimPartialSupportFloor($localContext, $doiBlock, $confidence, $canSupport, $supportRole, $reason)
|
||||||
|
{
|
||||||
|
$confidence = floatval($confidence);
|
||||||
|
if ($confidence > 0.45) {
|
||||||
|
return [$confidence, $canSupport, $supportRole];
|
||||||
|
}
|
||||||
|
|
||||||
|
$claimText = trim((string)$localContext);
|
||||||
|
if ($claimText === '') {
|
||||||
|
return [$confidence, $canSupport, $supportRole];
|
||||||
|
}
|
||||||
|
|
||||||
|
$claimIsMechanism = (bool)preg_match(
|
||||||
|
'/\b(genomic heterogeneity|heterogeneity|survival pathway|pathways at once|single-target|multi.?pathway|genetic alteration|drug resistance|异质性|生存通路|多.*通路|单靶点|耐药)\b/ui',
|
||||||
|
$claimText
|
||||||
|
);
|
||||||
|
if (!$claimIsMechanism) {
|
||||||
|
return [$confidence, $canSupport, $supportRole];
|
||||||
|
}
|
||||||
|
|
||||||
|
$corpus = trim((string)$doiBlock) . ' ' . trim((string)$reason);
|
||||||
|
if ($corpus === '') {
|
||||||
|
return [$confidence, $canSupport, $supportRole];
|
||||||
|
}
|
||||||
|
|
||||||
|
$refHasPathwayEvidence = (bool)preg_match(
|
||||||
|
'/\b(pathway|PI3K|Akt|mTOR|Wnt|STAT3|NF-κB|NF-kB|genetic alteration|MSI|CIN|drug resistance|signaling|multiple|APC|KRAS|TP53|通路|耐药|信号)\b/ui',
|
||||||
|
$corpus
|
||||||
|
);
|
||||||
|
if (!$refHasPathwayEvidence) {
|
||||||
|
return [$confidence, $canSupport, $supportRole];
|
||||||
|
}
|
||||||
|
|
||||||
|
$confidence = 0.78;
|
||||||
|
$canSupport = true;
|
||||||
|
if ($supportRole === 'no_meaningful_support' || $supportRole === 'minimal_support') {
|
||||||
|
$supportRole = 'supplementary_support';
|
||||||
|
}
|
||||||
|
|
||||||
|
return [$confidence, $canSupport, $supportRole];
|
||||||
|
}
|
||||||
|
|
||||||
|
private function getReferenceCheckConfidenceBands()
|
||||||
|
{
|
||||||
|
return [0.15, 0.25, 0.45, 0.65, 0.78, 0.85, 0.92, 0.98];
|
||||||
|
}
|
||||||
|
|
||||||
|
private function snapReferenceCheckConfidenceValue($confidence)
|
||||||
|
{
|
||||||
|
$bands = $this->getReferenceCheckConfidenceBands();
|
||||||
|
foreach ($bands as $band) {
|
||||||
|
if (abs($confidence - $band) < 0.001) {
|
||||||
|
return $band;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$nearest = $bands[0];
|
||||||
|
$minDiff = abs($confidence - $nearest);
|
||||||
|
foreach ($bands as $band) {
|
||||||
|
$diff = abs($confidence - $band);
|
||||||
|
if ($diff < $minDiff) {
|
||||||
|
$minDiff = $diff;
|
||||||
|
$nearest = $band;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return $nearest;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @deprecated 兼容旧逻辑
|
||||||
|
*/
|
||||||
|
private function getReferenceCheckConfidenceBandsLegacy($isMatch)
|
||||||
{
|
{
|
||||||
return $isMatch
|
return $isMatch
|
||||||
? [0.65, 0.78, 0.85, 0.92, 0.98]
|
? [0.65, 0.78, 0.85, 0.92, 0.98]
|
||||||
@@ -1183,22 +1796,24 @@ PROMPT;
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 将模型输出的 confidence 吸附到合法档位(如 0.95 → 0.92,0.75 → 0.78)
|
* 将模型输出的 confidence 吸附到合法档位
|
||||||
*/
|
*/
|
||||||
private function snapReferenceCheckConfidence($confidence, $isMatch)
|
private function snapReferenceCheckConfidence($confidence, $isMatch)
|
||||||
{
|
{
|
||||||
$bands = $this->getReferenceCheckConfidenceBands($isMatch);
|
$snapped = $this->snapReferenceCheckConfidenceValue($confidence);
|
||||||
|
$bands = $this->getReferenceCheckConfidenceBandsLegacy($isMatch);
|
||||||
|
if (in_array($snapped, $bands, true)) {
|
||||||
|
return $snapped;
|
||||||
|
}
|
||||||
foreach ($bands as $band) {
|
foreach ($bands as $band) {
|
||||||
if (abs($confidence - $band) < 0.001) {
|
if (abs($snapped - $band) < 0.001) {
|
||||||
return $band;
|
return $band;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
$nearest = $bands[0];
|
$nearest = $bands[0];
|
||||||
$minDiff = abs($confidence - $nearest);
|
$minDiff = abs($snapped - $nearest);
|
||||||
foreach ($bands as $band) {
|
foreach ($bands as $band) {
|
||||||
$diff = abs($confidence - $band);
|
$diff = abs($snapped - $band);
|
||||||
if ($diff < $minDiff) {
|
if ($diff < $minDiff) {
|
||||||
$minDiff = $diff;
|
$minDiff = $diff;
|
||||||
$nearest = $band;
|
$nearest = $band;
|
||||||
|
|||||||
@@ -138,12 +138,18 @@ class ReferenceRelevanceLlmService
|
|||||||
- **「覆盖部分结局」不足以进入 0.78**:原句点名了多条通路 + 多个结局,文献仅命中其中 1~2 个结局(如仅凋亡/增殖),且**点名通路在本文结果中全部缺失(仅讨论转引)**或主语层级不对 → 单条 **限 0.45(weakly_related / minimal_relevance)**,不得给 0.65~0.78
|
- **「覆盖部分结局」不足以进入 0.78**:原句点名了多条通路 + 多个结局,文献仅命中其中 1~2 个结局(如仅凋亡/增殖),且**点名通路在本文结果中全部缺失(仅讨论转引)**或主语层级不对 → 单条 **限 0.45(weakly_related / minimal_relevance)**,不得给 0.65~0.78
|
||||||
- 仅同领域沾边 1–2 项、主语或机制层级不对 → **0.45**
|
- 仅同领域沾边 1–2 项、主语或机制层级不对 → **0.45**
|
||||||
- **进入 0.65~0.78 的前提**:主语对齐(X 单体)+ 本文自身结果命中原句点名通路/结局的多数项;几乎全部明确对应 → **0.85+**
|
- **进入 0.65~0.78 的前提**:主语对齐(X 单体)+ 本文自身结果命中原句点名通路/结局的多数项;几乎全部明确对应 → **0.85+**
|
||||||
|
11. **文献「主题粒度」必须匹配 claim「主题粒度」**:引用处为**疾病总论型 claim**(流行病学负担、标准/多模态治疗现状与局限、基因组异质性、单靶点治疗受限、亟需新策略等总体背景)时:
|
||||||
|
- 最适合的来源是**疾病总体综述 / 分子病理综述 / 精准肿瘤学 / 耐药综述**;此类文献正面、系统地为该总论 claim 提供依据 → 可 **0.85+**
|
||||||
|
- **单一药物 / 单一成分 / 单一通路的专题综述**(如「某化合物抗某癌:A review」),即使同病、同大方向,也只是专题视角、并非为该总论 claim 做系统总结 → 通常 **partially_related(0.72~0.78)**,**不得给 0.85+**
|
||||||
|
- **单基因 / 单通路的机制原始研究**对纯流行病学负担 claim → 仍按规则 3 给 **0.45**
|
||||||
|
- 判断要点:文献类型是否「为该总论 claim 本身做系统综述/总论」;仅同病同方向、或只支撑整段中某一两句(如「需要更安全的新策略」),不足以进入 highly_related
|
||||||
|
|
||||||
==================================================
|
==================================================
|
||||||
【一、必须先拆解 claim】
|
【一、必须先拆解 claim】
|
||||||
从【本引用位置附近上下文】中提炼最小主张单元(Claim A, Claim B…),**不要**把整句笼统归为「大概讲抗癌」。例如:
|
从【本引用位置附近上下文】中提炼最小主张单元(Claim A, Claim B…),**不要**把整句笼统归为「大概讲抗癌」。例如:
|
||||||
- **主语/研究对象**(化合物单体 vs 植物提取物 vs 其他物种;是否「X has been demonstrated」)
|
- **主语/研究对象**(化合物单体 vs 植物提取物 vs 其他物种;是否「X has been demonstrated」)
|
||||||
- **证据语气与层级**(demonstrated / mechanistically vs predict / suggest;本文结果 vs 讨论转引)
|
- **证据语气与层级**(demonstrated / mechanistically vs predict / suggest;本文结果 vs 讨论转引)
|
||||||
|
- **claim 主题粒度**:是否为疾病总论型(流行病学负担 / 治疗现状与局限 / 基因组异质性 / 单靶点受限 / 亟需新策略);若是,要求「总体综述 / 分子病理 / 精准肿瘤学 / 耐药综述」类来源,单一药物专题综述只算 partially_related
|
||||||
- 疾病流行病学(高发、死亡率)
|
- 疾病流行病学(高发、死亡率)
|
||||||
- **点名通路/分子机制**(PI3K/AKT、MAPK、NF-κB 等,须逐项)
|
- **点名通路/分子机制**(PI3K/AKT、MAPK、NF-κB 等,须逐项)
|
||||||
- **点名功能结局**(抑制增殖、凋亡、血管生成、炎症信号等,须逐项)
|
- **点名功能结局**(抑制增殖、凋亡、血管生成、炎症信号等,须逐项)
|
||||||
|
|||||||
@@ -13,4 +13,11 @@ return [
|
|||||||
'dlq' => 'ref_check.article.dlq',
|
'dlq' => 'ref_check.article.dlq',
|
||||||
'route_key' => 'article.start',
|
'route_key' => 'article.start',
|
||||||
],
|
],
|
||||||
|
|
||||||
|
'reference_relevance' => [
|
||||||
|
'exchange' => 'reference_relevance',
|
||||||
|
'queue' => 'ref_relevance.article',
|
||||||
|
'dlq' => 'ref_relevance.article.dlq',
|
||||||
|
'route_key' => 'article.start',
|
||||||
|
],
|
||||||
];
|
];
|
||||||
|
|||||||
Reference in New Issue
Block a user