PMID * - PMID -> 文章结构化信息(title/abstract/mesh/publication_types/year/journal) * * 说明: * - 默认使用 runtime 文件缓存,避免重复请求 NCBI */ class PubmedService { private $base = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/'; private $timeout = 20; private $tool = 'tmrjournals'; private $email = ''; public function __construct(array $config = []) { if (isset($config['base'])) $this->base = rtrim((string)$config['base'], '/') . '/'; if (isset($config['timeout'])) $this->timeout = max(5, intval($config['timeout'])); if (isset($config['tool'])) $this->tool = (string)$config['tool']; if (isset($config['email'])) $this->email = (string)$config['email']; } /** * DOI -> PMID(优先用 [DOI],命中不到再用 [AID]) */ public function doiToPmid(string $doi): ?string { $doi = trim($doi); if ($doi === '') return null; $cacheKey = 'doi2pmid_' . sha1(strtolower($doi)); $cached = $this->cacheGet($cacheKey, 30 * 86400); if (is_string($cached) && $cached !== '') { return $cached; } $pmid = $this->esearch($doi . '[DOI]'); if (!$pmid) { $pmid = $this->esearch($doi . '[AID]'); } if ($pmid) { $this->cacheSet($cacheKey, $pmid); return $pmid; } return null; } /** * PMID -> 文章信息(title/abstract/mesh/publication_types/year/journal) */ public function fetchByPmid(string $pmid): ?array { $pmid = trim($pmid); if ($pmid === '') return null; $cacheKey = 'pmid_' . $pmid; $cached = $this->cacheGet($cacheKey, 30 * 86400); if (is_array($cached)) return $cached; $url = $this->base . 'efetch.fcgi?' . http_build_query([ 'db' => 'pubmed', 'id' => $pmid, 'retmode' => 'xml', 'tool' => $this->tool, 'email' => $this->email, ]); $xml = $this->httpGet($url); if (!is_string($xml) || trim($xml) === '') return null; $data = $this->parseEfetchXml($xml); if (!$data) return null; $this->cacheSet($cacheKey, $data); return $data; } /** * DOI -> PubMed 信息(含 abstract/mesh) */ public function fetchByDoi(string $doi): ?array { $pmid = $this->doiToPmid($doi); if (!$pmid) return null; $info = $this->fetchByPmid($pmid); if (!$info) return null; $info['pmid'] = $pmid; $info['doi'] = $doi; return $info; } // ----------------- Internals ----------------- private function esearch(string $term): ?string { $url = $this->base . 'esearch.fcgi?' . http_build_query([ 'db' => 'pubmed', 'retmode' => 'json', 'retmax' => 1, 'term' => $term, 'tool' => $this->tool, 'email' => $this->email, ]); $res = $this->httpGet($url); $json = json_decode((string)$res, true); $ids = $json['esearchresult']['idlist'] ?? []; if (!empty($ids[0])) return (string)$ids[0]; return null; } private function parseEfetchXml(string $xml): ?array { libxml_use_internal_errors(true); $doc = new \DOMDocument(); if (!$doc->loadXML($xml)) { return null; } $xp = new \DOMXPath($doc); $title = $this->xpText($xp, '//PubmedArticle//ArticleTitle'); $abstractParts = []; $absNodes = $xp->query('//PubmedArticle//Abstract//AbstractText'); if ($absNodes) { foreach ($absNodes as $n) { $label = $n->attributes && $n->attributes->getNamedItem('Label') ? trim($n->attributes->getNamedItem('Label')->nodeValue) : ''; $txt = trim($n->textContent); if ($txt === '') continue; $abstractParts[] = $label ? ($label . ': ' . $txt) : $txt; } } $abstract = trim(implode("\n", $abstractParts)); $mesh = []; $meshNodes = $xp->query('//PubmedArticle//MeshHeadingList//MeshHeading//DescriptorName'); if ($meshNodes) { foreach ($meshNodes as $n) { $t = trim($n->textContent); if ($t !== '') $mesh[] = $t; } } $mesh = array_values(array_unique($mesh)); $pubTypes = []; $ptNodes = $xp->query('//PubmedArticle//PublicationTypeList//PublicationType'); if ($ptNodes) { foreach ($ptNodes as $n) { $t = trim($n->textContent); if ($t !== '') $pubTypes[] = $t; } } $pubTypes = array_values(array_unique($pubTypes)); $journal = $this->xpText($xp, '//PubmedArticle//Journal//Title'); $year = ''; $year = $this->xpText($xp, '//PubmedArticle//JournalIssue//PubDate//Year'); if ($year === '') { $medlineDate = $this->xpText($xp, '//PubmedArticle//JournalIssue//PubDate//MedlineDate'); if (preg_match('/(19\\d{2}|20\\d{2})/', $medlineDate, $m)) { $year = $m[1]; } } if ($title === '' && $abstract === '') { return null; } return [ 'title' => $title, 'abstract' => $abstract, 'mesh_terms' => $mesh, 'publication_types' => $pubTypes, 'journal' => $journal, 'year' => $year, ]; } private function xpText(\DOMXPath $xp, string $query): string { $n = $xp->query($query); if ($n && $n->length > 0) { return trim($n->item(0)->textContent); } return ''; } private function httpGet(string $url): string { $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); curl_setopt($ch, CURLOPT_HTTPHEADER, [ 'User-Agent: TMRjournals-PubMed/1.0' ]); $res = curl_exec($ch); curl_close($ch); return is_string($res) ? $res : ''; } private function cacheDir(): string { return rtrim(ROOT_PATH, '/') . '/runtime/pubmed_cache'; } private function cacheGet(string $key, int $ttlSeconds) { $file = $this->cacheDir() . '/' . $key . '.json'; if (!is_file($file)) return null; $mtime = filemtime($file); if (!$mtime || (time() - $mtime) > $ttlSeconds) return null; $raw = @file_get_contents($file); $decoded = json_decode((string)$raw, true); return $decoded; } private function cacheSet(string $key, $value): void { $dir = $this->cacheDir(); if (!is_dir($dir)) @mkdir($dir, 0777, true); $file = $dir . '/' . $key . '.json'; @file_put_contents($file, json_encode($value, JSON_UNESCAPED_UNICODE)); } }