This commit is contained in:
wangjinlei
2026-06-23 09:55:38 +08:00
parent 6b9d119b27
commit 978c81ea10
4 changed files with 93 additions and 7 deletions

View File

@@ -16,6 +16,10 @@ class CrossrefService
private $timeout = 15; // 请求超时(秒)
private $maxRetry = 2; // 单个DOI最大重试次数
private $crossrefUrl = "https://api.crossref.org/works/"; // 接口地址
private $pubmedAbbr = true; // CrossRef 无期刊缩写时,是否回退到 PubMed/NLM 规范缩写
/** @var PubmedService|null 懒加载 */
private $pubmedService = null;
public function __construct($config = [])
{
@@ -24,6 +28,7 @@ class CrossrefService
if (isset($config['timeout'])) $this->timeout = intval($config['timeout']);
if (isset($config['maxRetry'])) $this->maxRetry = intval($config['maxRetry']);
if (isset($config['crossrefUrl'])) $this->crossrefUrl = (string)$config['crossrefUrl'];
if (isset($config['pubmed_abbr'])) $this->pubmedAbbr = (bool)$config['pubmed_abbr'];
}
}
@@ -191,7 +196,15 @@ class CrossrefService
$title = $this->getTitle($msg);
$publisher = $this->getPublisher($msg);
$joura = !empty($publisher['title']) ? $publisher['title'] : ($publisher['short_title'] ?? '');
$validDoi = $this->filterValidDoi($doi);
// 期刊缩写优先级CrossRef short-container-title → PubMed/NLM 规范缩写 → CrossRef 全称
$shortTitle = trim((string)($publisher['short_title'] ?? ''));
$fullTitle = trim((string)($publisher['title'] ?? ''));
$joura = $shortTitle;
if ($joura === '') {
$pubmedAbbr = $this->lookupPubmedJournalAbbr($validDoi);
$joura = $pubmedAbbr !== '' ? $pubmedAbbr : $fullTitle;
}
$authors = $this->getAuthors($msg);
$dateno = $this->getVolumeIssuePages($msg);
$retractInfo = $this->checkRetracted($msg);
@@ -280,6 +293,34 @@ class CrossrefService
];
}
/**
* 用 PubMed/NLM 反查期刊规范缩写CrossRef 无缩写时的兜底)。
* 任何异常都吞掉并返回空串,保证不影响主流程。
*
* @param string $doi 已规整的裸 DOI
* @return string 缩写或空串
*/
private function lookupPubmedJournalAbbr($doi)
{
$doi = trim((string)$doi);
if (!$this->pubmedAbbr || $doi === '') {
return '';
}
try {
if ($this->pubmedService === null) {
$this->pubmedService = new PubmedService([
'email' => $this->mailto,
'timeout' => $this->timeout,
]);
}
$abbr = $this->pubmedService->journalAbbrByDoi($doi);
return is_string($abbr) ? trim($abbr) : '';
} catch (\Throwable $e) {
return '';
}
}
/**
* 提取作者列表
*/

View File

@@ -60,7 +60,8 @@ class PubmedService
$pmid = trim($pmid);
if ($pmid === '') return null;
$cacheKey = 'pmid_' . $pmid;
// v2解析结果新增 journal_iso_abbr / journal_medline_ta换 key 避免命中旧缓存
$cacheKey = 'pmid_v2_' . $pmid;
$cached = $this->cacheGet($cacheKey, 30 * 86400);
if (is_array($cached)) return $cached;
@@ -96,6 +97,22 @@ class PubmedService
return $info;
}
/**
* DOI -> 期刊规范缩写NLM/ISO 形式,如 "J Clin Oncol"
* 优先 ISOAbbreviation回退 MedlineTA查不到返回 null。
*/
public function journalAbbrByDoi(string $doi): ?string
{
$info = $this->fetchByDoi($doi);
if (!is_array($info)) return null;
$abbr = trim((string)($info['journal_iso_abbr'] ?? ''));
if ($abbr === '') {
$abbr = trim((string)($info['journal_medline_ta'] ?? ''));
}
return $abbr !== '' ? $abbr : null;
}
// ----------------- Internals -----------------
private function esearch(string $term): ?string
@@ -162,6 +179,9 @@ class PubmedService
$pubTypes = array_values(array_unique($pubTypes));
$journal = $this->xpText($xp, '//PubmedArticle//Journal//Title');
// 期刊规范缩写ISOAbbreviationJournal 下)与 MedlineTAMedlineJournalInfo 下)
$journalIsoAbbr = $this->xpText($xp, '//PubmedArticle//Journal//ISOAbbreviation');
$journalMedlineTa = $this->xpText($xp, '//PubmedArticle//MedlineJournalInfo//MedlineTA');
$year = '';
$year = $this->xpText($xp, '//PubmedArticle//JournalIssue//PubDate//Year');
@@ -182,6 +202,8 @@ class PubmedService
'mesh_terms' => $mesh,
'publication_types' => $pubTypes,
'journal' => $journal,
'journal_iso_abbr' => $journalIsoAbbr,
'journal_medline_ta' => $journalMedlineTa,
'year' => $year,
];
}