email = trim((string) Env::get('author_bg.email', '')); if ($this->email === '') { $this->email = trim((string) Env::get('pubmed.email', 'yananwang898@gmail.com')); } $this->scopusApiKey = trim((string) Env::get('scopus.api_key', '')); if ($this->scopusApiKey === '') { $this->scopusApiKey = trim((string) config('scopus.api_key', '')); } } /** * 生成完整背调报告数据(前后端分离 JSON) * * @return array{ok:bool,msg?:string,data?:array} */ public function buildReport($orcid, $lastName, $firstName, $institution) { $orcidNorm = $this->normalizeOrcid($orcid); $lastName = trim((string) $lastName); $firstName = trim((string) $firstName); $institution = trim((string) $institution); $hasQuery = ($orcidNorm !== '') || ($lastName !== '') || ($firstName !== ''); if (!$hasQuery) { return ['ok' => false, 'msg' => '请提供 ORCID 或姓名']; } $orcidSource = 'provided'; if ($orcidNorm === '') { if ($lastName === '') { return [ 'ok' => false, 'msg' => '未提供 ORCID 时,需填写作者姓氏', 'data' => [ 'orcid_required' => true, 'submitted' => [ 'last_name' => $lastName, 'first_name' => $firstName, 'institution' => $institution, ], 'hint' => '请填写 ORCID,或至少填写姓氏(机构选填,仅用于候选列表排序)', ], ]; } $search = $this->searchOrcidCandidates($lastName, $firstName, $institution); $candidates = $search['candidates'] ?? []; if (empty($candidates)) { return [ 'ok' => false, 'msg' => '未能按姓名检索到 ORCID,请手动填写', 'data' => [ 'orcid_required' => true, 'submitted' => [ 'last_name' => $lastName, 'first_name' => $firstName, 'institution' => $institution, ], 'hint' => '已在 OpenAlex、ORCID 官网、Scopus 按姓名检索,未找到带 ORCID 的作者', 'lookup_attempts' => $search['attempts'] ?? [], ], ]; } if (count($candidates) > 1) { return [ 'ok' => false, 'need_select' => true, 'msg' => '匹配到 ' . count($candidates) . ' 位作者,请选择', 'data' => [ 'candidates' => $candidates, 'submitted' => [ 'last_name' => $lastName, 'first_name' => $firstName, 'institution' => $institution, ], 'lookup_attempts' => $search['attempts'] ?? [], ], ]; } $orcidNorm = $candidates[0]['orcid']; $orcidSource = 'name_search'; if ($firstName === '' && !empty($candidates[0]['display_name'])) { $parts = preg_split('/\s+/u', trim($candidates[0]['display_name'])); if (count($parts) > 1) { $lastName = array_pop($parts); $firstName = implode(' ', $parts); } } } $way = $this->describeQueryWay($orcidSource); $authorDisplay = trim("$firstName $lastName"); $orcidData = $this->orcidProfile($orcidNorm); if ($orcidData['name'] !== '') { $authorDisplay = $orcidData['name']; } $openalexAuthor = $this->resolveOpenAlexAuthor($orcidNorm, $firstName, $lastName, $institution); $metrics = $this->openalexMetrics($openalexAuthor); $pubmed = $this->pubmedSearch($lastName, $firstName, $institution, $orcidNorm, 50); $rw = $this->searchRetractionsHybrid($orcidData['papers'], $firstName, $lastName, $institution, $authorDisplay); $scopusUrl = $this->scopusDirectUrl($lastName, $firstName, $institution, $orcidNorm); $scopusApi = $this->scopusApiSearch($orcidNorm, $lastName, $firstName, $institution); $dups = $this->checkDuplicateTitles($this->papersForDupCheck($orcidData['papers'], $pubmed['papers'])); $worksCount = $this->resolveWorksCount($metrics, $orcidData, $pubmed, $scopusApi, $orcidNorm); $risk = $this->riskLevel($rw, $metrics['h_index'], $worksCount); $orcidPapers = array_slice($orcidData['papers'], 0, 10); $pubmedPapers = array_slice($pubmed['papers'], 0, 10); foreach ($orcidPapers as &$p) { $p['open_url'] = $this->paperOpenUrl($p); } unset($p); foreach ($pubmedPapers as &$p) { $p['open_url'] = $this->paperOpenUrl($p); } unset($p); foreach ($dups as &$dg) { foreach ($dg['papers'] as &$dp) { $dp['open_url'] = $this->paperOpenUrl($dp); } unset($dp); } unset($dg); return [ 'ok' => true, 'data' => [ 'report_at' => date('Y-m-d H:i:s'), 'query' => [ 'way' => $way, 'orcid' => $orcidNorm, 'orcid_source' => $orcidSource, 'orcid_resolved' => $orcidSource !== 'provided', 'last_name' => $lastName, 'first_name' => $firstName, 'institution' => $institution, ], 'conclusion' => [ 'risk_level' => $risk, 'notes' => [ '有 ORCID 时优先以 ORCID + OpenAlex 为准,指标更稳定。', '撤稿数据来自 Retraction Watch:有 DOI 作品按 DOI 精确比对;无 DOI 作品回退姓名/题目匹配(同名有风险,需人工核实)。', '本报告不构成法律认定,重大决策请结合原始文献、单位证明及人工调查。', ], ], 'basic' => [ 'display_name' => $authorDisplay, 'orcid' => $orcidNorm, 'orcid_url' => 'https://orcid.org/' . $orcidNorm, 'orcid_affiliations'=> $orcidData['affiliations'], 'openalex_institutions' => $metrics['institutions'], 'openalex_url' => $metrics['openalex_url'], 'scopus_id' => $metrics['scopus_id'], 'scopus_url' => $metrics['scopus_url'], ], 'scopus' => [ 'search_url' => $scopusUrl, 'api' => $scopusApi, ], 'metrics' => [ 'works_count' => $worksCount, 'cited_by_count' => (int) $metrics['cited_by_count'], 'h_index' => (int) $metrics['h_index'], 'i10_index' => (int) $metrics['i10_index'], 'topics' => $metrics['topics'], 'pubmed_total' => (int) $pubmed['total'], 'pubmed_query' => $pubmed['query'], 'pubmed_url' => $pubmed['pubmed_url'], ], 'retraction_watch' => $rw, 'duplicates' => $dups, 'pubmed_papers' => $pubmedPapers, 'orcid_papers' => [ 'total' => (int) $orcidData['papers_total'], 'papers' => $orcidPapers, ], 'sources' => ['OpenAlex', 'ORCID', 'PubMed', 'Scopus', 'Retraction Watch'], ], ]; } public function normalizeOrcid($raw) { $raw = trim((string) $raw); if ($raw === '') { return ''; } if (preg_match('/(\d{4}-\d{4}-\d{4}-\d{3}[\dX])/i', $raw, $m)) { return strtolower($m[1]); } return ''; } /** * 按姓名检索 ORCID 候选(机构仅用于排序/校验,不参与搜索) * * @return array{candidates:array,attempts:array} */ public function searchOrcidCandidates($lastName, $firstName, $institution) { $attempts = []; $lastName = trim((string) $lastName); $firstName = trim((string) $firstName); $institution = trim((string) $institution); $pool = []; if ($lastName === '') { return ['candidates' => [], 'attempts' => []]; } $openalexList = $this->openalexAuthorsByName($firstName, $lastName); $attempts[] = ['source' => 'openalex', 'count' => count($openalexList)]; foreach ($openalexList as $author) { $orcid = $this->extractOrcidFromOpenAlexAuthor($author); if ($orcid === '') { continue; } $displayName = $author['display_name'] ?? ''; if (!$this->isAcceptableNameMatch($displayName, $firstName, $lastName)) { continue; } $affs = []; foreach ($author['last_known_institutions'] ?? [] as $ins) { $n = trim((string) ($ins['display_name'] ?? '')); if ($n !== '') { $affs[] = $n; } } $this->addOrcidCandidate($pool, $orcid, $displayName, $affs, 'openalex', $institution); } $orcidResults = $this->orcidRegistrySearch($lastName, $firstName); $attempts[] = ['source' => 'orcid_registry', 'count' => count($orcidResults)]; foreach ($orcidResults as $row) { $orcid = $this->normalizeOrcid($row['orcid-id'] ?? $row['orcid_id'] ?? ''); if ($orcid === '') { continue; } $given = trim((string) ($row['given-names'] ?? $row['given_names'] ?? '')); $family = trim((string) ($row['family-names'] ?? $row['family_names'] ?? '')); $displayName = trim($given . ' ' . $family); if (!$this->isAcceptableNameMatch($displayName, $firstName, $lastName, $given, $family)) { continue; } $instNames = $row['institution-name'] ?? $row['institution_name'] ?? []; if (!is_array($instNames)) { $instNames = $instNames !== '' ? [$instNames] : []; } $this->addOrcidCandidate($pool, $orcid, $displayName, $instNames, 'orcid_registry', $institution); } $scopus = $this->scopusApiSearch('', $lastName, $firstName, $institution, true); $attempts[] = ['source' => 'scopus', 'count' => count($scopus['entries'] ?? [])]; foreach ($scopus['entries'] ?? [] as $entry) { $orcid = $this->normalizeOrcid($entry['orcid'] ?? ''); if ($orcid === '') { continue; } $displayName = $entry['name'] ?? ''; if (!$this->isAcceptableNameMatch($displayName, $firstName, $lastName)) { continue; } $affs = []; if (!empty($entry['affiliation'])) { $affs[] = $entry['affiliation']; } $this->addOrcidCandidate($pool, $orcid, $displayName, $affs, 'scopus', $institution); } return [ 'candidates' => $this->sortOrcidCandidates(array_values($pool), $firstName, $lastName), 'attempts' => $attempts, ]; } /** * 姓名匹配评分(名+姓);提供 firstName 时名不匹配则拒绝(如 Yanan ≠ Yuxuan) */ private function scoreCandidateNameMatch($displayName, $firstName, $lastName, $givenName = '', $familyName = '') { $firstName = strtolower(trim((string) $firstName)); $lastName = strtolower(trim((string) $lastName)); $givenName = strtolower(trim((string) $givenName)); $familyName = strtolower(trim((string) $familyName)); $displayName = trim((string) $displayName); if ($familyName !== '' && $lastName !== '') { if (!$this->nameTokenMatches($familyName, $lastName)) { return 0; } $score = 60; if ($firstName === '') { return $score; } if ($givenName === '') { return 20; } if ($givenName === $firstName) { return $score + 120; } if ($this->nameTokenMatches($givenName, $firstName)) { return $score + 100; } return 0; } if ($displayName === '' || $lastName === '') { return 0; } $nameLow = strtolower($displayName); if (!$this->nameContainsToken($nameLow, $lastName)) { return 0; } $score = 50; if ($firstName === '') { return $score; } $targetA = $firstName . ' ' . $lastName; $targetB = $lastName . ' ' . $firstName; if ($nameLow === $targetA || $nameLow === $targetB) { return $score + 120; } $tokens = preg_split('/[\s,]+/u', $nameLow); $tokens = array_values(array_filter($tokens, function ($t) { return $t !== ''; })); $firstHit = false; foreach ($tokens as $token) { if ($this->nameTokenMatches($token, $lastName)) { continue; } if ($this->nameTokenMatches($token, $firstName)) { $firstHit = true; $score += 100; break; } } if (!$firstHit && $this->nameContainsToken($nameLow, $firstName)) { $firstHit = true; $score += 80; } return $firstHit ? $score : 0; } private function isAcceptableNameMatch($displayName, $firstName, $lastName, $givenName = '', $familyName = '') { $minScore = trim((string) $firstName) !== '' ? 70 : 40; return $this->scoreCandidateNameMatch($displayName, $firstName, $lastName, $givenName, $familyName) >= $minScore; } private function nameTokenMatches($token, $target) { $token = strtolower(trim((string) $token)); $target = strtolower(trim((string) $target)); if ($token === '' || $target === '') { return false; } return $token === $target || strpos($token, $target) === 0 || strpos($target, $token) === 0; } private function nameContainsToken($haystack, $token) { $token = strtolower(trim((string) $token)); if ($token === '') { return false; } return preg_match('/\b' . preg_quote($token, '/') . '\b/u', strtolower($haystack)) === 1; } private function addOrcidCandidate(array &$pool, $orcid, $name, array $affiliations, $source, $institution) { $orcid = $this->normalizeOrcid($orcid); if ($orcid === '') { return; } if (!isset($pool[$orcid])) { $pool[$orcid] = [ 'orcid' => $orcid, 'display_name' => '', 'affiliations' => [], 'affiliations_text' => '', 'sources' => [], 'sources_text' => '', 'institution_matched' => false, 'orcid_url' => 'https://orcid.org/' . $orcid, ]; } $name = trim((string) $name); if ($name !== '' && $pool[$orcid]['display_name'] === '') { $pool[$orcid]['display_name'] = $name; } foreach ($affiliations as $aff) { $aff = trim((string) $aff); if ($aff === '') { continue; } if (!in_array($aff, $pool[$orcid]['affiliations'], true)) { $pool[$orcid]['affiliations'][] = $aff; } if ($institution !== '' && $this->institutionMatches($aff, $institution)) { $pool[$orcid]['institution_matched'] = true; } } if (!in_array($source, $pool[$orcid]['sources'], true)) { $pool[$orcid]['sources'][] = $source; } } private function sortOrcidCandidates(array $candidates, $firstName = '', $lastName = '') { foreach ($candidates as &$item) { $item['name_match_score'] = $this->scoreCandidateNameMatch( $item['display_name'] ?? '', $firstName, $lastName ); $item['name_matched'] = $item['name_match_score'] >= 70; $item['affiliations_text'] = implode(';', $item['affiliations'] ?? []); $srcMap = [ 'openalex' => 'OpenAlex', 'orcid_registry' => 'ORCID', 'scopus' => 'Scopus', ]; $labels = []; foreach ($item['sources'] ?? [] as $s) { $labels[] = $srcMap[$s] ?? $s; } $item['sources_text'] = implode(' / ', $labels); } unset($item); usort($candidates, function ($a, $b) { $nameCmp = ($b['name_match_score'] ?? 0) <=> ($a['name_match_score'] ?? 0); if ($nameCmp !== 0) { return $nameCmp; } if (($a['institution_matched'] ?? false) !== ($b['institution_matched'] ?? false)) { return ($b['institution_matched'] ?? false) <=> ($a['institution_matched'] ?? false); } return strcmp($a['display_name'] ?? '', $b['display_name'] ?? ''); }); return $candidates; } private function institutionMatches($candidateInst, $targetInstitution) { $instLow = strtolower(trim((string) $targetInstitution)); $candLow = strtolower(trim((string) $candidateInst)); if ($instLow === '' || $candLow === '') { return false; } return strpos($candLow, $instLow) !== false || strpos($instLow, $candLow) !== false; } private function describeQueryWay($orcidSource) { $map = [ 'provided' => 'ORCID 精准查询', 'name_search' => '姓名自动匹配 ORCID', ]; return $map[$orcidSource] ?? 'ORCID 查询'; } private function extractOrcidFromOpenAlexAuthor($author) { if (!is_array($author)) { return ''; } $raw = $author['orcid'] ?? ($author['ids']['orcid'] ?? ''); return $this->normalizeOrcid((string) $raw); } private function openalexAuthorsByName($first, $last) { $q = trim("$first $last"); if ($q === '' && $last !== '') { $q = $last; } if ($q === '') { return []; } $url = 'https://api.openalex.org/authors?search=' . urlencode($q) . '&per_page=25'; $json = $this->httpGet($url); if (!$json) { return []; } $data = json_decode($json, true); return $data['results'] ?? []; } private function orcidRegistrySearch($lastName, $firstName) { $parts = []; if ($lastName !== '') { $parts[] = 'family-name:' . $lastName; } if ($firstName !== '') { $parts[] = 'given-names:' . $firstName; } if (empty($parts)) { return []; } $url = 'https://pub.orcid.org/v3.0/expanded-search/?q=' . urlencode(implode(' AND ', $parts)) . '&rows=25'; $json = $this->httpGet($url, ['Accept: application/json']); if (!$json) { return []; } $data = json_decode($json, true); return $data['expanded-result'] ?? $data['result'] ?? []; } private function httpGet($url, array $headers = [], $timeout = 25) { $headers[] = 'User-Agent: MedicalAuthorCheck/1.0 (mailto:' . $this->email . ')'; if (function_exists('curl_init')) { $ch = curl_init($url); curl_setopt_array($ch, [ CURLOPT_RETURNTRANSFER => true, CURLOPT_TIMEOUT => $timeout, CURLOPT_FOLLOWLOCATION => true, CURLOPT_HTTPHEADER => $headers, CURLOPT_SSL_VERIFYPEER => true, ]); $body = curl_exec($ch); $code = (int) curl_getinfo($ch, CURLINFO_HTTP_CODE); curl_close($ch); return ($body !== false && $code >= 200 && $code < 300) ? $body : null; } $ctx = stream_context_create([ 'http' => ['method' => 'GET', 'header' => implode("\r\n", $headers), 'timeout' => $timeout], 'ssl' => ['verify_peer' => true, 'verify_peer_name' => true], ]); $body = @file_get_contents($url, false, $ctx); return $body !== false ? $body : null; } private function openalexAuthorByOrcid($orcid) { $url = 'https://api.openalex.org/authors?filter=orcid:' . urlencode('https://orcid.org/' . $orcid) . '&per_page=5'; $json = $this->httpGet($url); if (!$json) { return null; } $data = json_decode($json, true); return $data['results'][0] ?? null; } private function resolveOpenAlexAuthor($orcid, $firstName, $lastName, $institution) { $author = $this->openalexAuthorByOrcid($orcid); if ($author) { return $author; } $orcidNorm = $this->normalizeOrcid($orcid); if ($orcidNorm !== '') { foreach ($this->openalexAuthorsByName($firstName, $lastName) as $candidate) { if ($this->extractOrcidFromOpenAlexAuthor($candidate) === $orcidNorm) { return $candidate; } } return null; } if ($lastName === '' && $firstName === '') { return null; } $best = null; $bestScore = -1; foreach ($this->openalexAuthorsByName($firstName, $lastName) as $candidate) { $displayName = $candidate['display_name'] ?? ''; if (!$this->isAcceptableNameMatch($displayName, $firstName, $lastName)) { continue; } $score = $this->scoreCandidateNameMatch($displayName, $firstName, $lastName); if ($institution !== '') { foreach ($candidate['last_known_institutions'] ?? [] as $inst) { if ($this->institutionMatches($inst['display_name'] ?? '', $institution)) { $score += 50; break; } } } if ($score > $bestScore) { $bestScore = $score; $best = $candidate; continue; } if ($score === $bestScore && $best !== null) { $candidateWorks = (int) ($candidate['works_count'] ?? 0); $bestWorks = (int) ($best['works_count'] ?? 0); if ($candidateWorks > $bestWorks) { $best = $candidate; } } } return $best; } private function resolveWorksCount($metrics, $orcidData, $pubmed, $scopusApi, $orcid) { $counts = [ (int) ($metrics['works_count'] ?? 0), (int) ($orcidData['papers_total'] ?? 0), (int) ($pubmed['total'] ?? 0), ]; $orcidNorm = $this->normalizeOrcid($orcid); $entries = $scopusApi['entries'] ?? []; foreach ($entries as $entry) { if ($orcidNorm !== '' && $this->normalizeOrcid($entry['orcid'] ?? '') === $orcidNorm) { $counts[] = (int) ($entry['document_count'] ?? 0); break; } } if ($orcidNorm === '' && count($entries) === 1) { $counts[] = (int) ($entries[0]['document_count'] ?? 0); } return max($counts); } private function openalexMetrics($author) { if (!$author) { return [ 'found' => false, 'display_name' => '', 'openalex_id' => '', 'orcid' => '', 'works_count' => 0, 'cited_by_count' => 0, 'h_index' => 0, 'i10_index' => 0, 'institutions' => [], 'topics' => [], 'openalex_url' => '', 'scopus_id' => '', 'scopus_url' => '', ]; } $stats = $author['summary_stats'] ?? []; $scopusId = $this->extractScopusId($author['ids']['scopus'] ?? ''); $insts = []; foreach ($author['last_known_institutions'] ?? [] as $i) { $insts[] = $i['display_name'] ?? ''; } $topics = []; foreach (array_slice($author['topics'] ?? [], 0, 5) as $t) { $topics[] = ($t['display_name'] ?? '') . ' (' . ($t['count'] ?? 0) . '篇)'; } $oid = $author['id'] ?? ''; return [ 'found' => true, 'display_name' => $author['display_name'] ?? '', 'openalex_id' => $oid, 'orcid' => preg_replace('#.*/#', '', $author['orcid'] ?? ''), 'works_count' => (int) ($author['works_count'] ?? 0), 'cited_by_count' => (int) ($author['cited_by_count'] ?? 0), 'h_index' => (int) ($stats['h_index'] ?? 0), 'i10_index' => (int) ($stats['i10_index'] ?? 0), 'institutions' => $insts, 'topics' => $topics, 'openalex_url' => str_replace('https://openalex.org/', 'https://openalex.org/authors/', $oid), 'scopus_id' => $scopusId, 'scopus_url' => $scopusId ? $this->scopusAuthorUrl($scopusId) : '', ]; } private function extractScopusId($raw) { if (!$raw) { return ''; } if (preg_match('/authorID=(\d+)/i', (string) $raw, $m)) { return $m[1]; } if (preg_match('/^(\d{8,})$/', (string) $raw, $m)) { return $m[1]; } return ''; } private function scopusAuthorUrl($authorId) { return 'https://www.scopus.com/authid/detail.uri?authorId=' . urlencode($authorId); } private function scopusDirectUrl($last, $first, $institution, $orcid) { $base = 'https://www.scopus.com/results/authorNamesList.uri'; $params = [ 'sort' => 'count-f', 'src' => 'al', 'selectionPageSearch' => 'anl', 'origin' => 'searchauthorfreelookup', 'activeFlag' => 'true', 'resultsPerPage' => '20', 'exactAuthorSearch' => 'false', ]; if ($orcid !== '') { $params['orcidId'] = $orcid; $params['s'] = 'AUTH--ORCID--ID(' . $orcid . ')'; return $base . '?' . http_build_query($params, '', '&', PHP_QUERY_RFC3986); } if ($last === '' && $first === '') { return null; } $params['authorLastName'] = $last; $params['authorFirstName'] = $first; if ($institution !== '') { $params['affilname'] = $institution; } $s = []; if ($last !== '') { $s[] = 'AUTHLASTNAME(' . $last . ')'; } if ($first !== '') { $s[] = 'AUTHFIRST(' . $first . ')'; } if ($institution !== '') { $s[] = 'AFFIL(' . $institution . ')'; } $params['s'] = implode(' AND ', $s); return $base . '?' . http_build_query($params, '', '&', PHP_QUERY_RFC3986); } private function httpGetElsevier($url, $apiKey, $timeout = 25) { $headers = ['Accept: application/json', 'X-ELS-APIKey: ' . $apiKey]; if (!function_exists('curl_init')) { return null; } $ch = curl_init($url); curl_setopt_array($ch, [ CURLOPT_RETURNTRANSFER => true, CURLOPT_TIMEOUT => $timeout, CURLOPT_FOLLOWLOCATION => true, CURLOPT_HTTPHEADER => $headers, CURLOPT_SSL_VERIFYPEER => true, ]); $body = curl_exec($ch); $code = (int) curl_getinfo($ch, CURLINFO_HTTP_CODE); curl_close($ch); return ($body !== false && $code >= 200 && $code < 300) ? $body : null; } private function scopusApiSearch($orcid, $last, $first, $institution, $nameOnly = false) { $apiKey = $this->scopusApiKey; if (trim($apiKey) === '') { return ['ok' => false, 'msg' => '未配置 Scopus API Key', 'entries' => []]; } if ($orcid !== '') { $query = 'ORCID(' . $orcid . ')'; } else { $parts = []; if ($last !== '') { $parts[] = 'AUTHLASTNAME(' . preg_replace('/[^\pL\pN\s\-]/u', '', $last) . ')'; } if ($first !== '') { $parts[] = 'AUTHFIRST(' . preg_replace('/[^\pL\pN\s\-]/u', '', $first) . ')'; } if (!$nameOnly && $institution !== '') { $parts[] = 'AFFIL(' . $institution . ')'; } if (empty($parts)) { return ['ok' => false, 'msg' => '缺少检索条件', 'entries' => []]; } $query = implode(' AND ', $parts); } $count = $nameOnly ? 25 : 10; $url = 'https://api.elsevier.com/content/search/author?query=' . urlencode($query) . '&count=' . $count; $json = $this->httpGetElsevier($url, $apiKey); if (!$json) { return ['ok' => false, 'msg' => 'Scopus API 请求失败,请检查 Key 或网络', 'entries' => []]; } $data = json_decode($json, true); $entries = []; foreach ($data['search-results']['entry'] ?? [] as $e) { if (!is_array($e)) { continue; } $idRaw = $e['dc:identifier'] ?? ''; $authorId = ''; if (preg_match('/AUTHOR_ID:(\d+)/', $idRaw, $m)) { $authorId = $m[1]; } $name = ''; if (!empty($e['preferred-name'])) { $pn = $e['preferred-name']; $name = ($pn['ce:indexed-name'] ?? '') ?: trim(($pn['ce:given-name'] ?? '') . ' ' . ($pn['ce:surname'] ?? '')); } $aff = ''; if (!empty($e['affiliation-current']['affiliation-name'])) { $aff = $e['affiliation-current']['affiliation-name']; } elseif (!empty($e['affiliation-current']['ip-doc']['afdispname'])) { $aff = $e['affiliation-current']['ip-doc']['afdispname']; } $entryOrcid = ''; foreach (['orcid', 'ORCID'] as $orcidKey) { if (!empty($e[$orcidKey])) { $entryOrcid = $this->normalizeOrcid((string) $e[$orcidKey]); if ($entryOrcid !== '') { break; } } } $entries[] = [ 'author_id' => $authorId, 'name' => $name, 'affiliation' => $aff, 'orcid' => $entryOrcid, 'document_count' => (int) ($e['document-count'] ?? 0), 'cited_by_count' => (int) ($e['cited-by-count'] ?? 0), 'h_index' => (int) ($e['h-index'] ?? 0), 'url' => $authorId ? $this->scopusAuthorUrl($authorId) : '', ]; } return ['ok' => true, 'msg' => '共匹配 ' . count($entries) . ' 位作者', 'entries' => $entries]; } private function orcidProfile($orcid) { $base = "https://pub.orcid.org/v3.0/$orcid"; $headers = ['Accept: application/json']; $person = json_decode($this->httpGet("$base/person", $headers) ?: '{}', true); $works = json_decode($this->httpGet("$base/works", $headers) ?: '{}', true); $name = ''; $affs = []; if (!empty($person['name'])) { $g = $person['name']['given-names']['value'] ?? ''; $f = $person['name']['family-name']['value'] ?? ''; $name = trim("$g $f"); } foreach ($person['activities-summary']['employments']['affiliation-group'] ?? [] as $g) { $s = $g['summaries'][0]['employment-summary'] ?? []; $org = $s['organization']['name'] ?? ''; if ($org) { $affs[] = $org; } } $papers = []; foreach ($works['group'] ?? [] as $grp) { $w = $grp['work-summary'][0] ?? []; $doi = ''; $pmid = ''; foreach ($w['external-ids']['external-id'] ?? [] as $ext) { $type = strtolower($ext['external-id-type'] ?? ''); $val = $ext['external-id-value'] ?? ''; if ($type === 'doi' && $doi === '') { $doi = $val; } if ($type === 'pmid' && $pmid === '') { $pmid = $val; } } $papers[] = [ 'title' => $w['title']['title']['value'] ?? '无标题', 'year' => $w['publication-date']['year']['value'] ?? '', 'journal' => $w['journal-title']['value'] ?? '', 'doi' => $doi, 'pmid' => $pmid, 'url' => $w['url']['value'] ?? '', ]; } usort($papers, function ($a, $b) { return (int) ($b['year'] ?? 0) <=> (int) ($a['year'] ?? 0); }); return ['name' => $name, 'affiliations' => $affs, 'papers' => $papers, 'papers_total' => count($papers)]; } private function pubmedEsearch($term, $retmax = 0, $sort = '') { $email = urlencode($this->email); $url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=' . urlencode($term) . '&retmode=json&retmax=' . $retmax . '&tool=MedicalAuthorCheck&email=' . $email; if ($sort !== '') { $url .= '&sort=' . urlencode($sort); } $data = json_decode($this->httpGet($url) ?: '{}', true); $res = $data['esearchresult'] ?? []; return ['count' => (int) ($res['count'] ?? 0), 'ids' => $res['idlist'] ?? [], 'term' => $term]; } private function pubmedFetchSummaries(array $ids) { if (empty($ids)) { return []; } $email = urlencode($this->email); $papers = []; foreach (array_chunk($ids, 20) as $chunk) { $sumUrl = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=pubmed&id=' . implode(',', $chunk) . '&retmode=json&tool=MedicalAuthorCheck&email=' . $email; $sum = json_decode($this->httpGet($sumUrl) ?: '{}', true); foreach ($chunk as $pmid) { $r = $sum['result'][$pmid] ?? []; $doi = ''; foreach ($r['articleids'] ?? [] as $aid) { if (strtolower($aid['idtype'] ?? '') === 'doi') { $doi = $aid['value'] ?? ''; break; } } $papers[] = [ 'pmid' => $pmid, 'title' => $r['title'] ?? "PMID $pmid", 'year' => substr($r['pubdate'] ?? '', 0, 4), 'journal' => $r['fulljournalname'] ?? $r['source'] ?? '', 'doi' => $doi, 'url' => '', ]; } } return $papers; } private function pubmedBuildQueries($last, $first, $institution, $orcid) { $queries = []; if ($orcid !== '') { $queries[] = $orcid . '[ORCID]'; } $firstParts = preg_split('/\s+/', trim($first)); $firstInitial = $firstParts[0] ?? ''; if ($last !== '') { if ($firstInitial !== '' && $institution !== '') { $queries[] = $last . '[Author] AND ' . $firstInitial . '[Author] AND ' . $institution . '[Affiliation]'; } if ($first !== '' && $institution !== '') { $queries[] = $last . '[Author] AND "' . $first . '"[Author] AND ' . $institution . '[Affiliation]'; } if ($firstInitial !== '') { $queries[] = $last . '[Author] AND ' . $firstInitial . '[Author]'; } if ($first !== '') { $queries[] = $last . '[Author] AND "' . $first . '"[Author]'; } if ($first === '') { $queries[] = $last . '[Author]'; } } return array_values(array_unique($queries)); } private function pubmedSearch($last, $first, $institution, $orcid = '', $listMax = 10) { $queries = $this->pubmedBuildQueries($last, $first, $institution, $orcid); $best = ['count' => 0, 'ids' => [], 'term' => '']; foreach ($queries as $term) { $r = $this->pubmedEsearch($term, 0); if ($r['count'] > 0) { $best = $r; break; } } $total = $best['count']; $usedTerm = $best['term']; $papers = []; if ($total > 0) { $recent = $this->pubmedEsearch($usedTerm, $listMax, 'pub date'); $ids = $recent['ids']; if (empty($ids)) { $recent = $this->pubmedEsearch($usedTerm, $listMax); $ids = $recent['ids']; } $papers = $this->pubmedFetchSummaries($ids); } $urlTerm = $usedTerm; if (preg_match('/^(.+)\[ORCID\]$/i', $usedTerm, $m)) { $urlTerm = $m[1]; } return [ 'total' => $total, 'papers' => $papers, 'query' => $usedTerm, 'pubmed_url' => 'https://pubmed.ncbi.nlm.nih.gov/?term=' . urlencode($urlTerm), ]; } public function paperOpenUrl(array $p) { if (!empty($p['url']) && filter_var($p['url'], FILTER_VALIDATE_URL)) { return $p['url']; } if (!empty($p['doi'])) { return 'https://doi.org/' . ltrim($p['doi'], 'https://doi.org/'); } if (!empty($p['pmid'])) { return 'https://pubmed.ncbi.nlm.nih.gov/' . $p['pmid'] . '/'; } return ''; } private function rwCachePath() { $root = defined('ROOT_PATH') ? rtrim(ROOT_PATH, '/\\') : dirname(dirname(dirname(__DIR__))); return $root . DIRECTORY_SEPARATOR . 'runtime' . DIRECTORY_SEPARATOR . 'retraction_watch_cache.csv'; } private function downloadRetractionWatch() { $path = $this->rwCachePath(); $dir = dirname($path); if (!is_dir($dir)) { @mkdir($dir, 0755, true); } if (is_file($path) && (time() - filemtime($path)) < self::RW_CACHE_H) { return true; } $csv = $this->httpGet(self::RW_CSV_URL, [], 60); if (!$csv || strlen($csv) < 1000) { return is_file($path); } return file_put_contents($path, $csv) !== false; } private function normalizeDoi($raw) { $raw = strtolower(trim((string) $raw)); if ($raw === '' || in_array($raw, ['unavailable', 'na', 'n/a'], true)) { return ''; } $raw = preg_replace('#^https?://(dx\.)?doi\.org/#i', '', $raw); return trim($raw, '/'); } private function collectPaperDois(array $papers) { $map = []; foreach ($papers as $p) { $doi = $this->normalizeDoi($p['doi'] ?? ''); if ($doi !== '') { $map[$doi] = $p; } } return $map; } private function isMisconductReason($reason) { $r = strtolower((string) $reason); $keys = [ 'misconduct', 'fabrication', 'falsification', 'plagiarism', 'fake peer', 'paper mill', 'ethical violation', 'breach of policy', 'complaints about author', 'fraud', 'manipulation', '不端', '造假', '抄袭', '剽窃', ]; foreach ($keys as $k) { if (strpos($r, $k) !== false) { return true; } } return false; } private function titleMatchKey($title) { return strtolower(preg_replace('/[^a-z0-9\x{4e00}-\x{9fff}]+/u', '', trim((string) $title))); } private function rwTitleMatchesPaper($rwTitle, array $paper) { $k1 = $this->titleMatchKey($rwTitle); $k2 = $this->titleMatchKey($paper['title'] ?? ''); if ($k1 === '' || $k2 === '') { return false; } return $k1 === $k2 || strpos($k1, $k2) !== false || strpos($k2, $k1) !== false; } private function nameTokens($first, $last, $displayName = '') { $tokens = []; $last = strtolower(trim((string) $last)); $first = strtolower(trim((string) $first)); if ($last) { $tokens[] = $last; } if ($first) { $tokens[] = $first; foreach (preg_split('/\s+/', $first) as $p) { if (strlen($p) > 1) { $tokens[] = $p; } } } if ($displayName) { foreach (preg_split('/\s+/', strtolower($displayName)) as $p) { if (strlen($p) > 2) { $tokens[] = $p; } } } return array_unique($tokens); } private function authorMatchesRw($authorField, array $tokens) { $field = strtolower((string) $authorField); if ($field === '') { return false; } if (!empty($tokens[0]) && strpos($field, $tokens[0]) === false) { return false; } $hits = 0; foreach ($tokens as $t) { if (strlen($t) >= 3 && strpos($field, $t) !== false) { $hits++; } } return $hits >= 2 || (count($tokens) === 1 && $hits >= 1); } private function rwDetailUrl(array $row, array $col) { $urls = $row[$col['URLS'] ?? 7] ?? ''; foreach (preg_split('/\s*;\s*/', (string) $urls) as $u) { $u = trim($u); if ($u !== '' && preg_match('#^https?://#i', $u)) { return $u; } } $origDoi = $this->normalizeDoi($row[$col['OriginalPaperDOI'] ?? 12] ?? ''); if ($origDoi !== '') { return 'https://doi.org/' . $origDoi; } $retDoi = $this->normalizeDoi($row[$col['RetractionDOI'] ?? 9] ?? ''); if ($retDoi !== '') { return 'https://doi.org/' . $retDoi; } $title = trim($row[$col['Title'] ?? 1] ?? ''); if ($title !== '') { return 'https://retractionwatch.com/?s=' . rawurlencode($title); } return ''; } private function buildRwItem(array $row, array $col, $matchType, $authorPaper = null) { $title = $row[$col['Title'] ?? 1] ?? ''; $reason = $row[$col['Reason'] ?? 11] ?? ''; $nature = $row[$col['RetractionNature'] ?? 10] ?? ''; $date = $row[$col['RetractionDate'] ?? 5] ?? ''; $authors = $row[$col['Author'] ?? 6] ?? ''; $origDoi = $this->normalizeDoi($row[$col['OriginalPaperDOI'] ?? 12] ?? ''); $misconduct = $this->isMisconductReason($reason); $detailUrl = $this->rwDetailUrl($row, $col); if ($authorPaper && !$detailUrl) { $detailUrl = $this->paperOpenUrl($authorPaper); } $matchLabels = [ 'doi' => 'DOI 精确匹配(高可信度)', 'name' => '姓名+题目匹配(参考,已关联 ORCID 无 DOI 作品)', 'name_loose' => '姓名匹配(低可信度,存在同名误报风险)', ]; return [ 'record_id' => $row[$col['Record ID'] ?? 0] ?? '', 'title' => $title, 'nature' => $nature, 'reason' => $reason, 'date' => $date, 'misconduct' => $misconduct, 'authors' => $authors, 'doi' => $origDoi, 'author_title' => $authorPaper['title'] ?? '', 'author_year' => $authorPaper['year'] ?? '', 'url' => $detailUrl, 'match_type' => $matchType, 'match_label' => $matchLabels[$matchType] ?? $matchType, ]; } private function countRwStats(array $items) { $mis = 0; $ret = 0; foreach ($items as $it) { if (stripos($it['nature'] ?? '', 'retraction') !== false) { $ret++; } if (!empty($it['misconduct'])) { $mis++; } } return ['misconduct_count' => $mis, 'retraction_count' => $ret]; } private function searchRetractionsHybrid(array $papers, $first, $last, $institution, $displayName = '') { $empty = [ 'ok' => false, 'msg' => '', 'items' => [], 'misconduct_count' => 0, 'retraction_count' => 0, 'checked_doi_count' => 0, 'no_doi_count' => count($papers), 'doi_match_count' => 0, 'name_match_count' => 0, 'name_loose_match_count' => 0, ]; if (!$this->downloadRetractionWatch()) { $empty['msg'] = '撤稿数据库暂不可用'; return $empty; } $paperByDoi = $this->collectPaperDois($papers); $noDoiPapers = []; foreach ($papers as $p) { if ($this->normalizeDoi($p['doi'] ?? '') === '') { $noDoiPapers[] = $p; } } $checkedCount = count($paperByDoi); $noDoiCount = count($noDoiPapers); $path = $this->rwCachePath(); $fp = fopen($path, 'r'); if (!$fp) { $empty['msg'] = '撤稿数据库读取失败'; return $empty; } $header = fgetcsv($fp); $col = array_flip($header ?: []); $doiIndex = array_flip(array_keys($paperByDoi)); $tokens = $this->nameTokens($first, $last, $displayName); $instLow = strtolower((string) $institution); $items = []; $seenKeys = []; $addItem = function (array $item) use (&$items, &$seenKeys) { $key = ($item['record_id'] ?? '') . '|' . ($item['doi'] ?? '') . '|' . $this->titleMatchKey($item['title'] ?? ''); if (isset($seenKeys[$key])) { return; } $seenKeys[$key] = true; $items[] = $item; }; while (($row = fgetcsv($fp)) !== false) { $origDoi = $this->normalizeDoi($row[$col['OriginalPaperDOI'] ?? 12] ?? ''); if ($origDoi !== '' && isset($doiIndex[$origDoi])) { $addItem($this->buildRwItem($row, $col, 'doi', $paperByDoi[$origDoi])); } } rewind($fp); fgetcsv($fp); if ($noDoiCount > 0 && !empty($tokens)) { while (($row = fgetcsv($fp)) !== false) { $origDoi = $this->normalizeDoi($row[$col['OriginalPaperDOI'] ?? 12] ?? ''); if ($origDoi !== '' && isset($doiIndex[$origDoi])) { continue; } $authors = $row[$col['Author'] ?? 6] ?? ''; if (!$this->authorMatchesRw($authors, $tokens)) { continue; } $rwTitle = $row[$col['Title'] ?? 1] ?? ''; $linkedPaper = null; foreach ($noDoiPapers as $p) { if ($this->rwTitleMatchesPaper($rwTitle, $p)) { $linkedPaper = $p; break; } } if ($linkedPaper) { $addItem($this->buildRwItem($row, $col, 'name', $linkedPaper)); continue; } if ($instLow !== '') { $inst = strtolower($row[$col['Institution'] ?? 4] ?? ''); if ($inst !== '' && strpos($inst, $instLow) === false && strpos($instLow, $inst) === false) { continue; } } if (count($items) < 50) { $addItem($this->buildRwItem($row, $col, 'name_loose', null)); } } } fclose($fp); $doiMatch = $nameMatch = $nameLooseMatch = 0; foreach ($items as $it) { if ($it['match_type'] === 'doi') { $doiMatch++; } elseif ($it['match_type'] === 'name') { $nameMatch++; } else { $nameLooseMatch++; } } $stats = $this->countRwStats($items); return [ 'ok' => true, 'msg' => 'DOI 比对 ' . $checkedCount . ' 篇,无 DOI 作品 ' . $noDoiCount . ' 篇已启用姓名回退', 'items' => $items, 'misconduct_count' => $stats['misconduct_count'], 'retraction_count' => $stats['retraction_count'], 'checked_doi_count' => $checkedCount, 'no_doi_count' => $noDoiCount, 'doi_match_count' => $doiMatch, 'name_match_count' => $nameMatch, 'name_loose_match_count' => $nameLooseMatch, ]; } private function papersForDupCheck(array $orcidPapers, array $pubmedPapers) { $all = []; foreach ($orcidPapers as $p) { $p['source'] = 'ORCID'; $all[] = $p; } foreach ($pubmedPapers as $p) { $p['source'] = 'PubMed'; $all[] = $p; } return $all; } private function checkDuplicateTitles(array $papers) { $groups = []; foreach ($papers as $p) { $t = trim($p['title'] ?? ''); if ($t === '' || strpos($t, 'PMID') === 0) { continue; } $key = strtolower(preg_replace('/[^a-z0-9\x{4e00}-\x{9fff}]+/u', '', $t)); if ($key === '') { continue; } $groups[$key][] = $p; } $dups = []; foreach ($groups as $items) { if (count($items) >= 2) { $dups[] = ['title' => $items[0]['title'], 'papers' => $items]; } } return $dups; } private function riskLevel(array $rw, $hIndex, $works) { if (($rw['misconduct_count'] ?? 0) > 0) { return '高风险 — 存在学术不端相关撤稿记录,建议人工复核'; } if (($rw['retraction_count'] ?? 0) > 0) { return '中风险 — 存在撤稿 / 关注声明,请核对是否与本人相关'; } if ($works === 0) { return '待核实 — 未检索到论文,请核对 ORCID / 姓名拼写'; } if ($hIndex >= 10 || $works >= 20) { return '低风险 — 学术产出指标正常'; } return '一般 — 青年学者常见产出区间,建议结合研究方向综合判断'; } }