自动推广

This commit is contained in:
wangjinlei
2026-04-03 11:45:45 +08:00
parent 22947a56a4
commit a802b2e923
11 changed files with 2240 additions and 36 deletions

View File

@@ -0,0 +1,237 @@
<?php
namespace app\common;
/**
* PubMed 工具类E-utilities
*
* 功能:
* - DOI -> PMID
* - PMID -> 文章结构化信息title/abstract/mesh/publication_types/year/journal
*
* 说明:
* - 默认使用 runtime 文件缓存,避免重复请求 NCBI
*/
class PubmedService
{
private $base = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/';
private $timeout = 20;
private $tool = 'tmrjournals';
private $email = '';
public function __construct(array $config = [])
{
if (isset($config['base'])) $this->base = rtrim((string)$config['base'], '/') . '/';
if (isset($config['timeout'])) $this->timeout = max(5, intval($config['timeout']));
if (isset($config['tool'])) $this->tool = (string)$config['tool'];
if (isset($config['email'])) $this->email = (string)$config['email'];
}
/**
* DOI -> PMID优先用 [DOI],命中不到再用 [AID]
*/
public function doiToPmid(string $doi): ?string
{
$doi = trim($doi);
if ($doi === '') return null;
$cacheKey = 'doi2pmid_' . sha1(strtolower($doi));
$cached = $this->cacheGet($cacheKey, 30 * 86400);
if (is_string($cached) && $cached !== '') {
return $cached;
}
$pmid = $this->esearch($doi . '[DOI]');
if (!$pmid) {
$pmid = $this->esearch($doi . '[AID]');
}
if ($pmid) {
$this->cacheSet($cacheKey, $pmid);
return $pmid;
}
return null;
}
/**
* PMID -> 文章信息title/abstract/mesh/publication_types/year/journal
*/
public function fetchByPmid(string $pmid): ?array
{
$pmid = trim($pmid);
if ($pmid === '') return null;
$cacheKey = 'pmid_' . $pmid;
$cached = $this->cacheGet($cacheKey, 30 * 86400);
if (is_array($cached)) return $cached;
$url = $this->base . 'efetch.fcgi?' . http_build_query([
'db' => 'pubmed',
'id' => $pmid,
'retmode' => 'xml',
'tool' => $this->tool,
'email' => $this->email,
]);
$xml = $this->httpGet($url);
if (!is_string($xml) || trim($xml) === '') return null;
$data = $this->parseEfetchXml($xml);
if (!$data) return null;
$this->cacheSet($cacheKey, $data);
return $data;
}
/**
* DOI -> PubMed 信息(含 abstract/mesh
*/
public function fetchByDoi(string $doi): ?array
{
$pmid = $this->doiToPmid($doi);
if (!$pmid) return null;
$info = $this->fetchByPmid($pmid);
if (!$info) return null;
$info['pmid'] = $pmid;
$info['doi'] = $doi;
return $info;
}
// ----------------- Internals -----------------
private function esearch(string $term): ?string
{
$url = $this->base . 'esearch.fcgi?' . http_build_query([
'db' => 'pubmed',
'retmode' => 'json',
'retmax' => 1,
'term' => $term,
'tool' => $this->tool,
'email' => $this->email,
]);
$res = $this->httpGet($url);
$json = json_decode((string)$res, true);
$ids = $json['esearchresult']['idlist'] ?? [];
if (!empty($ids[0])) return (string)$ids[0];
return null;
}
private function parseEfetchXml(string $xml): ?array
{
libxml_use_internal_errors(true);
$doc = new \DOMDocument();
if (!$doc->loadXML($xml)) {
return null;
}
$xp = new \DOMXPath($doc);
$title = $this->xpText($xp, '//PubmedArticle//ArticleTitle');
$abstractParts = [];
$absNodes = $xp->query('//PubmedArticle//Abstract//AbstractText');
if ($absNodes) {
foreach ($absNodes as $n) {
$label = $n->attributes && $n->attributes->getNamedItem('Label')
? trim($n->attributes->getNamedItem('Label')->nodeValue)
: '';
$txt = trim($n->textContent);
if ($txt === '') continue;
$abstractParts[] = $label ? ($label . ': ' . $txt) : $txt;
}
}
$abstract = trim(implode("\n", $abstractParts));
$mesh = [];
$meshNodes = $xp->query('//PubmedArticle//MeshHeadingList//MeshHeading//DescriptorName');
if ($meshNodes) {
foreach ($meshNodes as $n) {
$t = trim($n->textContent);
if ($t !== '') $mesh[] = $t;
}
}
$mesh = array_values(array_unique($mesh));
$pubTypes = [];
$ptNodes = $xp->query('//PubmedArticle//PublicationTypeList//PublicationType');
if ($ptNodes) {
foreach ($ptNodes as $n) {
$t = trim($n->textContent);
if ($t !== '') $pubTypes[] = $t;
}
}
$pubTypes = array_values(array_unique($pubTypes));
$journal = $this->xpText($xp, '//PubmedArticle//Journal//Title');
$year = '';
$year = $this->xpText($xp, '//PubmedArticle//JournalIssue//PubDate//Year');
if ($year === '') {
$medlineDate = $this->xpText($xp, '//PubmedArticle//JournalIssue//PubDate//MedlineDate');
if (preg_match('/(19\\d{2}|20\\d{2})/', $medlineDate, $m)) {
$year = $m[1];
}
}
if ($title === '' && $abstract === '') {
return null;
}
return [
'title' => $title,
'abstract' => $abstract,
'mesh_terms' => $mesh,
'publication_types' => $pubTypes,
'journal' => $journal,
'year' => $year,
];
}
private function xpText(\DOMXPath $xp, string $query): string
{
$n = $xp->query($query);
if ($n && $n->length > 0) {
return trim($n->item(0)->textContent);
}
return '';
}
private function httpGet(string $url): string
{
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_HTTPHEADER, [
'User-Agent: TMRjournals-PubMed/1.0'
]);
$res = curl_exec($ch);
curl_close($ch);
return is_string($res) ? $res : '';
}
private function cacheDir(): string
{
return rtrim(ROOT_PATH, '/') . '/runtime/pubmed_cache';
}
private function cacheGet(string $key, int $ttlSeconds)
{
$file = $this->cacheDir() . '/' . $key . '.json';
if (!is_file($file)) return null;
$mtime = filemtime($file);
if (!$mtime || (time() - $mtime) > $ttlSeconds) return null;
$raw = @file_get_contents($file);
$decoded = json_decode((string)$raw, true);
return $decoded;
}
private function cacheSet(string $key, $value): void
{
$dir = $this->cacheDir();
if (!is_dir($dir)) @mkdir($dir, 0777, true);
$file = $dir . '/' . $key . '.json';
@file_put_contents($file, json_encode($value, JSON_UNESCAPED_UNICODE));
}
}