已经完成一个文章校对了,但换个文章id就报错了,排查前备份
This commit is contained in:
@@ -3,6 +3,7 @@
|
||||
namespace app\common;
|
||||
|
||||
use think\Db;
|
||||
use think\Env;
|
||||
use think\Queue;
|
||||
|
||||
/**
|
||||
@@ -131,8 +132,39 @@ class ReferenceCheckService
|
||||
|
||||
$this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING);
|
||||
}
|
||||
public function checkOne(){
|
||||
$this->pushJob(intval(724), 0);
|
||||
/**
|
||||
* 手工触发:对已完成且 confidence<=0.65 的记录入队 DOI 第二轮复核
|
||||
*/
|
||||
public function enqueueSecondPassByArticle($articleId)
|
||||
{
|
||||
$articleId = intval($articleId);
|
||||
if ($articleId <= 0) {
|
||||
throw new \InvalidArgumentException('article_id is required');
|
||||
}
|
||||
|
||||
$rows = Db::name('article_reference_check_result')
|
||||
->where('article_id', $articleId)
|
||||
->where('status', 1)
|
||||
->where('confidence', '<=', 0.65)
|
||||
->orderRaw('rand()')
|
||||
->limit(2)
|
||||
->select();
|
||||
|
||||
$checkIds2 = [];
|
||||
$delay2 = 0;
|
||||
foreach ($rows as $checkLog) {
|
||||
$rowId = $this->resolveCheckRowId($checkLog);
|
||||
if ($this->maybeEnqueueSecondPass($rowId, floatval($checkLog['confidence']))) {
|
||||
$checkIds2[] = $rowId;
|
||||
$delay2 += 1;
|
||||
}
|
||||
}
|
||||
|
||||
return [
|
||||
'article_id' => $articleId,
|
||||
'check_ids2' => $checkIds2,
|
||||
'queued' => count($checkIds2),
|
||||
];
|
||||
}
|
||||
public function enqueueByArticle($articleId){
|
||||
if ($articleId <= 0) {
|
||||
@@ -140,7 +172,7 @@ class ReferenceCheckService
|
||||
}
|
||||
$prod = Db::name('production_article')
|
||||
->where('article_id', $articleId)
|
||||
->where('state', 0)
|
||||
->where('state', [0, 2])
|
||||
->find();
|
||||
if (empty($prod)) {
|
||||
throw new \RuntimeException('production_article not found for article_id=' . $articleId);
|
||||
@@ -296,12 +328,78 @@ class ReferenceCheckService
|
||||
return isset($map[$status]) ? $map[$status] : 'unknown';
|
||||
}
|
||||
|
||||
/**
|
||||
* 表主键为 id(对外 API 参数名仍叫 check_id)
|
||||
*/
|
||||
public function resolveCheckRowId($row)
|
||||
{
|
||||
if (!is_array($row)) {
|
||||
return 0;
|
||||
}
|
||||
if (isset($row['id']) && intval($row['id']) > 0) {
|
||||
return intval($row['id']);
|
||||
}
|
||||
if (isset($row['check_id']) && intval($row['check_id']) > 0) {
|
||||
return intval($row['check_id']);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* 解析 LLM 返回的 is_match(兼容 bool / 0|1 / "true"|"false" 字符串)
|
||||
*/
|
||||
public function parseLlmIsMatch($value)
|
||||
{
|
||||
if (is_bool($value)) {
|
||||
return $value;
|
||||
}
|
||||
if (is_int($value) || is_float($value)) {
|
||||
return intval($value) === 1;
|
||||
}
|
||||
$s = strtolower(trim((string)$value));
|
||||
return in_array($s, ['1', 'true', 'yes', 'match', 'matched'], true);
|
||||
}
|
||||
|
||||
/**
|
||||
* 写入单条校对结果(统一截断 reason/error_msg,避免 varchar(512) 导致 UPDATE 失败)
|
||||
*
|
||||
* @throws \RuntimeException
|
||||
*/
|
||||
public function updateCheckResult($checkId, array $fields)
|
||||
{
|
||||
$checkId = intval($checkId);
|
||||
if ($checkId <= 0) {
|
||||
throw new \InvalidArgumentException('invalid check id');
|
||||
}
|
||||
|
||||
if (isset($fields['reason'])) {
|
||||
$fields['reason'] = mb_substr(trim((string)$fields['reason']), 0, 512);
|
||||
}
|
||||
if (isset($fields['error_msg'])) {
|
||||
$fields['error_msg'] = mb_substr(trim((string)$fields['error_msg']), 0, 512);
|
||||
}
|
||||
$fields['updated_at'] = date('Y-m-d H:i:s');
|
||||
|
||||
$exists = Db::name('article_reference_check_result')->where('id', $checkId)->find();
|
||||
if (empty($exists)) {
|
||||
throw new \RuntimeException('article_reference_check_result not found, id=' . $checkId);
|
||||
}
|
||||
|
||||
$affected = Db::name('article_reference_check_result')->where('id', $checkId)->update($fields);
|
||||
if ($affected === false) {
|
||||
throw new \RuntimeException('article_reference_check_result update failed, id=' . $checkId);
|
||||
}
|
||||
|
||||
\think\Log::info('updateCheckResult id=' . $checkId . ' affected=' . intval($affected));
|
||||
return intval($affected);
|
||||
}
|
||||
|
||||
public function getResult($checkId)
|
||||
{
|
||||
if ($checkId <= 0) {
|
||||
return null;
|
||||
}
|
||||
$row = Db::name('article_reference_check_result')->where('check_id', $checkId)->find();
|
||||
$row = Db::name('article_reference_check_result')->where('id', $checkId)->find();
|
||||
return $row ?: null;
|
||||
}
|
||||
|
||||
@@ -435,7 +533,7 @@ class ReferenceCheckService
|
||||
'ref_nos' => [],
|
||||
];
|
||||
}
|
||||
$byAm[$amId]['contexts'][$ctxKey]['check_ids'][] = intval($row['check_id']);
|
||||
$byAm[$amId]['contexts'][$ctxKey]['check_ids'][] = $this->resolveCheckRowId($row);
|
||||
$byAm[$amId]['contexts'][$ctxKey]['ref_nos'][] = $refNo;
|
||||
$reason = trim((string)$this->arrGet($row, 'reason', ''));
|
||||
if ($reason !== '') {
|
||||
@@ -501,7 +599,7 @@ class ReferenceCheckService
|
||||
$issueCount++;
|
||||
$issues[] = array(
|
||||
'am_id' => $amId,
|
||||
'check_id' => intval($row['check_id']),
|
||||
'check_id' => $this->resolveCheckRowId($row),
|
||||
'reference_no' => $num,
|
||||
'reference_raw' => $inner,
|
||||
'reason' => $rowReason,
|
||||
@@ -512,7 +610,7 @@ class ReferenceCheckService
|
||||
ENT_QUOTES,
|
||||
'UTF-8'
|
||||
);
|
||||
return '<span class="ref-no-error" data-check-id="' . intval($row['check_id'])
|
||||
return '<span class="ref-no-error" data-check-id="' . $this->resolveCheckRowId($row)
|
||||
. '" data-ref-no="' . $num . '" title="' . $title . '">'
|
||||
. $numMatch[0] . '</span>';
|
||||
},
|
||||
@@ -627,6 +725,448 @@ class ReferenceCheckService
|
||||
return implode("\n", $parts);
|
||||
}
|
||||
|
||||
/**
|
||||
* 仅使用 refer_doi 字段(二次 Crossref 摘要用)
|
||||
*/
|
||||
public function extractReferDoiOnly($refer)
|
||||
{
|
||||
if (!is_array($refer)) {
|
||||
return '';
|
||||
}
|
||||
$raw = trim((string)$this->arrGet($refer, 'refer_doi', ''));
|
||||
if ($raw === '' || stripos($raw, 'not available') !== false) {
|
||||
return '';
|
||||
}
|
||||
$dois = $this->extractDoisFromString($raw);
|
||||
return empty($dois) ? '' : $dois[0];
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据 refer_doi 调用 Crossref works API 获取摘要(二次校对专用)
|
||||
*
|
||||
* @return array{text:string, has_abstract:bool, doi:string}
|
||||
*/
|
||||
public function fetchCrossrefAbstractByReferDoi($refer)
|
||||
{
|
||||
$doi = $this->extractReferDoiOnly($refer);
|
||||
if ($doi === '') {
|
||||
return ['text' => '', 'has_abstract' => false, 'doi' => ''];
|
||||
}
|
||||
|
||||
$crossref = new CrossrefService([
|
||||
'mailto' => trim((string)Env::get('crossref_mailto', '')),
|
||||
]);
|
||||
$block = $this->extractCrossrefBlock($doi, $crossref);
|
||||
if ($block === null) {
|
||||
return ['text' => '', 'has_abstract' => false, 'doi' => $doi];
|
||||
}
|
||||
|
||||
return [
|
||||
'text' => $block['text'],
|
||||
'has_abstract' => !empty($block['has_abstract']),
|
||||
'doi' => $doi,
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* 解析 LLM 返回的 can_support
|
||||
*/
|
||||
public function parseLlmCanSupport($llmResult)
|
||||
{
|
||||
if (!is_array($llmResult)) {
|
||||
return false;
|
||||
}
|
||||
if (array_key_exists('can_support', $llmResult)) {
|
||||
return $this->parseLlmIsMatch($llmResult['can_support']);
|
||||
}
|
||||
return $this->parseLlmIsMatch(isset($llmResult['is_match']) ? $llmResult['is_match'] : false);
|
||||
}
|
||||
|
||||
/**
|
||||
* 第一次校对:取 article_main.content(整节正文)
|
||||
*/
|
||||
public function resolveMainContentForJob(array $row, $maxChars = 8000)
|
||||
{
|
||||
$amId = intval($this->arrGet($row, 'am_id', 0));
|
||||
if ($amId <= 0) {
|
||||
return '';
|
||||
}
|
||||
$main = Db::name('article_main')
|
||||
->field('content')
|
||||
->where('am_id', $amId)
|
||||
->find();
|
||||
if (empty($main)) {
|
||||
return '';
|
||||
}
|
||||
|
||||
$text = trim((string)$this->arrGet($main, 'content', ''));
|
||||
if ($text === '') {
|
||||
return '';
|
||||
}
|
||||
|
||||
$text = preg_replace('/<blue>\[([\d,\-\s]+)\]<\/blue>/', '[$1]', $text);
|
||||
$text = strip_tags($text);
|
||||
$text = html_entity_decode($text, ENT_QUOTES | ENT_HTML5, 'UTF-8');
|
||||
$text = preg_replace('/\s+/u', ' ', $text);
|
||||
$text = trim($text);
|
||||
|
||||
$maxChars = max(500, intval($maxChars));
|
||||
if (mb_strlen($text) > $maxChars) {
|
||||
$text = mb_substr($text, 0, $maxChars) . '...';
|
||||
}
|
||||
|
||||
return $text;
|
||||
}
|
||||
|
||||
/**
|
||||
* 引用处局部上下文(origin_text),供其它场景使用
|
||||
*/
|
||||
public function resolveCitationContextForJob(array $row)
|
||||
{
|
||||
$text = trim((string)$this->arrGet($row, 'origin_text', ''));
|
||||
if ($text === '') {
|
||||
$text = trim((string)$this->arrGet($row, 'content_a', ''));
|
||||
}
|
||||
return $text;
|
||||
}
|
||||
|
||||
/**
|
||||
* 从 refer 行提取标准 DOI(10.xxxx/...)
|
||||
*
|
||||
* 优先级:refer_content(原始引用文本里的 DOI 最贴近实际被引用的文献)
|
||||
* > refer_doi > doi > doilink
|
||||
*/
|
||||
public function extractDoiFromRefer($refer)
|
||||
{
|
||||
$list = $this->extractAllDoiCandidatesFromRefer($refer);
|
||||
return empty($list) ? '' : $list[0];
|
||||
}
|
||||
|
||||
/**
|
||||
* 返回 refer 行可能对应的全部 DOI 候选(去重,按优先级排序)
|
||||
*
|
||||
* 用于第二轮 DOI 复核场景:当 metadata 的 refer_doi 与原始引用文本里的 DOI
|
||||
* 不一致时(数据漂移),优先尝试原始引用文本里的 DOI 抓真实摘要。
|
||||
*
|
||||
* @return string[]
|
||||
*/
|
||||
public function extractAllDoiCandidatesFromRefer($refer)
|
||||
{
|
||||
if (!is_array($refer)) {
|
||||
return [];
|
||||
}
|
||||
$ordered = [
|
||||
(string)$this->arrGet($refer, 'refer_content', ''),
|
||||
(string)$this->arrGet($refer, 'refer_doi', ''),
|
||||
(string)$this->arrGet($refer, 'doi', ''),
|
||||
(string)$this->arrGet($refer, 'doilink', ''),
|
||||
];
|
||||
|
||||
$result = [];
|
||||
foreach ($ordered as $raw) {
|
||||
foreach ($this->extractDoisFromString($raw) as $doi) {
|
||||
if (!in_array($doi, $result, true)) {
|
||||
$result[] = $doi;
|
||||
}
|
||||
}
|
||||
}
|
||||
return $result;
|
||||
}
|
||||
|
||||
/**
|
||||
* 从任意文本里抽取所有形如 10.xxxx/yyy 的 DOI
|
||||
* @return string[]
|
||||
*/
|
||||
private function extractDoisFromString($text)
|
||||
{
|
||||
$text = trim((string)$text);
|
||||
if ($text === '' || stripos($text, 'not available') !== false) {
|
||||
return [];
|
||||
}
|
||||
|
||||
$dois = [];
|
||||
|
||||
if (preg_match_all('~doi\.org/([^\s?#"\'<>]+)~i', $text, $m)) {
|
||||
foreach ($m[1] as $cand) {
|
||||
$cand = $this->trimDoiTail(trim($cand));
|
||||
if ($this->isValidDoi($cand)) {
|
||||
$dois[] = $cand;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (preg_match_all('~\b(10\.\d{3,9}/[^\s?#"\'<>]+)~i', $text, $m)) {
|
||||
foreach ($m[1] as $cand) {
|
||||
$cand = $this->trimDoiTail(trim($cand));
|
||||
if ($this->isValidDoi($cand)) {
|
||||
$dois[] = $cand;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ($dois === [] && strpos($text, '10.') === 0) {
|
||||
$cand = $this->trimDoiTail($text);
|
||||
if ($this->isValidDoi($cand)) {
|
||||
$dois[] = $cand;
|
||||
}
|
||||
}
|
||||
|
||||
return array_values(array_unique($dois));
|
||||
}
|
||||
|
||||
private function trimDoiTail($doi)
|
||||
{
|
||||
return rtrim($doi, ".,;:)]}>\"'\\ \t\n\r");
|
||||
}
|
||||
|
||||
private function isValidDoi($doi)
|
||||
{
|
||||
return (bool)preg_match('~^10\.\d{3,9}/[^\s]+$~i', (string)$doi);
|
||||
}
|
||||
|
||||
/**
|
||||
* 通过 PubMed / Crossref 拉取 DOI 对应文献内容(本地 LLM 无法打开网页,须预先抓取)
|
||||
*
|
||||
* 行为:
|
||||
* - 尝试 refer 行内所有 DOI 候选(refer_content > refer_doi > doi > doilink)
|
||||
* - 优先采用第一个能拿到 abstract 的 DOI
|
||||
* - PubMed 无摘要时回落到 Crossref raw 解析摘要(清理 JATS 标签)
|
||||
* - 全部失败则返回空字符串(调用方据此跳过二次复核)
|
||||
*/
|
||||
public function fetchDoiLiteratureBlock($refer)
|
||||
{
|
||||
$candidates = $this->extractAllDoiCandidatesFromRefer($refer);
|
||||
if (empty($candidates)) {
|
||||
return '';
|
||||
}
|
||||
|
||||
$pubmed = new PubmedService([
|
||||
'email' => trim((string)Env::get('pubmed_email', '')),
|
||||
'tool' => trim((string)Env::get('pubmed_tool', 'tmrjournals')),
|
||||
]);
|
||||
$crossref = new CrossrefService([
|
||||
'mailto' => trim((string)Env::get('crossref_mailto', '')),
|
||||
]);
|
||||
|
||||
$best = null;
|
||||
$fallback = null;
|
||||
|
||||
foreach ($candidates as $doi) {
|
||||
$block = $this->buildDoiBlockFromSources($doi, $pubmed, $crossref);
|
||||
if ($block === null) {
|
||||
continue;
|
||||
}
|
||||
if (!empty($block['has_abstract'])) {
|
||||
$best = $block;
|
||||
break;
|
||||
}
|
||||
if ($fallback === null) {
|
||||
$fallback = $block;
|
||||
}
|
||||
}
|
||||
|
||||
$chosen = $best ?: $fallback;
|
||||
if ($chosen === null) {
|
||||
return '';
|
||||
}
|
||||
return $chosen['text'];
|
||||
}
|
||||
|
||||
/**
|
||||
* 拉单个 DOI 的真实内容,返回 ['text' => string, 'has_abstract' => bool] 或 null
|
||||
*/
|
||||
private function buildDoiBlockFromSources($doi, PubmedService $pubmed, CrossrefService $crossref)
|
||||
{
|
||||
$doi = trim((string)$doi);
|
||||
if ($doi === '') {
|
||||
return null;
|
||||
}
|
||||
|
||||
$pub = $pubmed->fetchByDoi($doi);
|
||||
$pubAbstract = is_array($pub) ? trim((string)$this->arrGet($pub, 'abstract', '')) : '';
|
||||
|
||||
if (is_array($pub) && ($pubAbstract !== '' || trim((string)$this->arrGet($pub, 'title', '')) !== '')) {
|
||||
$lines = ['Source: PubMed (DOI ' . $doi . ')'];
|
||||
if (!empty($pub['title'])) {
|
||||
$lines[] = 'Actual Title: ' . trim((string)$pub['title']);
|
||||
}
|
||||
if (!empty($pub['journal'])) {
|
||||
$lines[] = 'Journal: ' . trim((string)$pub['journal']);
|
||||
}
|
||||
if (!empty($pub['year'])) {
|
||||
$lines[] = 'Year: ' . trim((string)$pub['year']);
|
||||
}
|
||||
if (!empty($pub['publication_types'])) {
|
||||
$lines[] = 'Publication Types: ' . implode('; ', (array)$pub['publication_types']);
|
||||
}
|
||||
if (!empty($pub['mesh_terms'])) {
|
||||
$lines[] = 'MeSH: ' . implode('; ', (array)$pub['mesh_terms']);
|
||||
}
|
||||
if ($pubAbstract !== '') {
|
||||
$lines[] = 'Abstract: ' . $this->truncate($pubAbstract, 3500);
|
||||
}
|
||||
|
||||
if ($pubAbstract === '') {
|
||||
$cr = $this->extractCrossrefBlock($doi, $crossref);
|
||||
if ($cr !== null && $cr['has_abstract']) {
|
||||
$lines[] = "\n--- Crossref 补充 ---\n" . $cr['text'];
|
||||
return ['text' => implode("\n", $lines), 'has_abstract' => true];
|
||||
}
|
||||
}
|
||||
|
||||
return ['text' => implode("\n", $lines), 'has_abstract' => $pubAbstract !== ''];
|
||||
}
|
||||
|
||||
return $this->extractCrossrefBlock($doi, $crossref);
|
||||
}
|
||||
|
||||
/**
|
||||
* 从 Crossref 拉取标题/期刊/作者/摘要(abstract 通常包裹 JATS XML,需清洗)
|
||||
* @return array|null ['text' => string, 'has_abstract' => bool]
|
||||
*/
|
||||
private function extractCrossrefBlock($doi, CrossrefService $crossref)
|
||||
{
|
||||
$msg = $crossref->fetchWork($doi);
|
||||
if (!is_array($msg)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$summary = $crossref->fetchWorkSummary($doi);
|
||||
if (!is_array($summary)) {
|
||||
$summary = [];
|
||||
}
|
||||
|
||||
$lines = ['Source: Crossref api.crossref.org/works/' . rawurlencode($doi)];
|
||||
$title = isset($msg['title'][0]) ? trim((string)$msg['title'][0]) : trim((string)$this->arrGet($summary, 'title', ''));
|
||||
if ($title !== '') {
|
||||
$lines[] = 'Actual Title: ' . $title;
|
||||
}
|
||||
if (!empty($summary['joura'])) {
|
||||
$lines[] = 'Journal: ' . trim((string)$summary['joura']);
|
||||
}
|
||||
if (!empty($summary['author_str'])) {
|
||||
$lines[] = 'Authors: ' . trim((string)$summary['author_str']);
|
||||
}
|
||||
if (!empty($summary['dateno'])) {
|
||||
$lines[] = 'Publication: ' . trim((string)$summary['dateno']);
|
||||
}
|
||||
if (!empty($summary['doilink'])) {
|
||||
$lines[] = 'DOI Link: ' . trim((string)$summary['doilink']);
|
||||
}
|
||||
if (!empty($summary['is_retracted'])) {
|
||||
$lines[] = 'Retraction: yes - ' . trim((string)$this->arrGet($summary, 'retract_reason', ''));
|
||||
}
|
||||
|
||||
$abstract = $this->cleanCrossrefAbstract((string)$this->arrGet($msg, 'abstract', ''));
|
||||
$hasAbstract = $abstract !== '';
|
||||
if ($hasAbstract) {
|
||||
$lines[] = 'Abstract: ' . $this->truncate($abstract, 3500);
|
||||
} else {
|
||||
$lines[] = 'Note: Crossref 未返回摘要,请结合标题/期刊/作者与正文谨慎判断。';
|
||||
}
|
||||
|
||||
return ['text' => implode("\n", $lines), 'has_abstract' => $hasAbstract];
|
||||
}
|
||||
|
||||
private function cleanCrossrefAbstract($raw)
|
||||
{
|
||||
$raw = trim((string)$raw);
|
||||
if ($raw === '') {
|
||||
return '';
|
||||
}
|
||||
$raw = preg_replace('~<jats:title[^>]*>.*?</jats:title>~is', '', $raw);
|
||||
$raw = preg_replace('~<jats:p[^>]*>~i', "\n", $raw);
|
||||
$raw = preg_replace('~</jats:p>~i', '', $raw);
|
||||
$raw = preg_replace('~</?jats:[^>]+>~i', '', $raw);
|
||||
$raw = strip_tags($raw);
|
||||
$raw = preg_replace('/[ \t]+/u', ' ', $raw);
|
||||
$raw = preg_replace("/\r\n|\r/u", "\n", $raw);
|
||||
$raw = preg_replace("/\n{2,}/u", "\n", $raw);
|
||||
return trim($raw);
|
||||
}
|
||||
|
||||
private function truncate($text, $max)
|
||||
{
|
||||
$text = (string)$text;
|
||||
if (mb_strlen($text) <= $max) {
|
||||
return $text;
|
||||
}
|
||||
return mb_substr($text, 0, $max) . '...';
|
||||
}
|
||||
|
||||
/**
|
||||
* 第二次 DOI 复核数据准备:返回书目信息 + 真实抓取内容
|
||||
*
|
||||
* @return array{refer_text:string, doi_block:string, has_abstract:bool, doi_used:string}
|
||||
*/
|
||||
public function prepareRecheckPayload($refer, $referText = '')
|
||||
{
|
||||
$base = trim($referText) !== '' ? trim($referText) : $this->formatReferForLlm($refer);
|
||||
$cr = $this->fetchCrossrefAbstractByReferDoi($refer);
|
||||
return [
|
||||
'refer_text' => $base,
|
||||
'doi_block' => $cr['text'],
|
||||
'has_abstract' => $cr['has_abstract'],
|
||||
'doi_used' => $cr['doi'],
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* 旧接口:拼接成单块文本(向后兼容,建议调用方改用 prepareRecheckPayload)
|
||||
*/
|
||||
public function formatReferForDoiRecheck($refer, $referText = '')
|
||||
{
|
||||
$payload = $this->prepareRecheckPayload($refer, $referText);
|
||||
if ($payload['doi_block'] === '') {
|
||||
return $payload['refer_text']
|
||||
. "\n\n【DOI 文献真实内容】\n未能从 PubMed/Crossref 获取该 DOI 的摘要或元数据,请依据书目条目与正文谨慎判断。";
|
||||
}
|
||||
return $payload['refer_text']
|
||||
. "\n\n【Crossref 摘要(依据 Refer_doi 从 api.crossref.org/works 获取)】\n"
|
||||
. $payload['doi_block'];
|
||||
}
|
||||
|
||||
/**
|
||||
* 第一轮 confidence<=0.65 且能抓到 DOI 真实内容时,延迟入队第二轮复核
|
||||
*
|
||||
* 跳过条件(避免无意义重跑得到相同结果):
|
||||
* - check_id 不合法 / 一次置信度高于阈值
|
||||
* - refer 行不存在
|
||||
* - refer_doi 为空或 Crossref 未返回摘要
|
||||
*/
|
||||
public function maybeEnqueueSecondPass($checkId, $confidence)
|
||||
{
|
||||
$checkId = intval($checkId);
|
||||
$confidence = floatval($confidence);
|
||||
if ($checkId <= 0 || $confidence > 0.65) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$row = Db::name('article_reference_check_result')->where('id', $checkId)->find();
|
||||
if (empty($row)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$refer = null;
|
||||
if (intval($row['p_refer_id']) > 0) {
|
||||
$refer = Db::name('production_article_refer')
|
||||
->where('p_refer_id', intval($row['p_refer_id']))
|
||||
->where('state', 0)
|
||||
->find();
|
||||
}
|
||||
if (empty($refer) || $this->extractReferDoiOnly($refer) === '') {
|
||||
return false;
|
||||
}
|
||||
|
||||
$cr = $this->fetchCrossrefAbstractByReferDoi($refer);
|
||||
if (empty($cr['has_abstract'])) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$this->pushJob2($checkId, 5);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* 从 article_main.content 提取 blue 引用
|
||||
*/
|
||||
@@ -1021,10 +1561,24 @@ class ReferenceCheckService
|
||||
} else {
|
||||
$jobId = Queue::push($jobClass, $data, self::QUEUE_NAME);
|
||||
}
|
||||
var_dump("=====jobId:".$jobId);
|
||||
} catch (\Exception $e) {
|
||||
\think\Log::error('ReferenceCheck pushJob failed check_id=' . $checkId . ' ' . $e->getMessage());
|
||||
throw $e;
|
||||
}
|
||||
}
|
||||
private function pushJob2($checkId, $delaySeconds = 0)
|
||||
{
|
||||
$jobClass = 'app\api\job\ReferenceCheckTwo@fire';
|
||||
$data = ['check_id' => $checkId];
|
||||
try {
|
||||
if ($delaySeconds > 0) {
|
||||
$jobId = Queue::later($delaySeconds, $jobClass, $data, self::QUEUE_NAME);
|
||||
} else {
|
||||
$jobId = Queue::push($jobClass, $data, self::QUEUE_NAME);
|
||||
}
|
||||
} catch (\Exception $e) {
|
||||
\think\Log::error('ReferenceCheckTwo pushJob failed check_id=' . $checkId . ' ' . $e->getMessage());
|
||||
throw $e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user