1384 lines
52 KiB
PHP
1384 lines
52 KiB
PHP
<?php
|
||
|
||
namespace app\common\service;
|
||
|
||
use think\Env;
|
||
|
||
/**
|
||
* 医学/护理期刊 — 青年编委/作者背景调查(ORCID 精准查询)
|
||
* 数据源:OpenAlex、ORCID、PubMed、Retraction Watch、Scopus
|
||
*/
|
||
class AuthorBackgroundService
|
||
{
|
||
const RW_CSV_URL = 'https://gitlab.com/crossref/retraction-watch-data/-/raw/main/retraction_watch.csv';
|
||
const RW_CACHE_H = 86400;
|
||
|
||
/** @var string PubMed / OpenAlex 联系邮箱 */
|
||
private $email = '';
|
||
|
||
/** @var string Elsevier Scopus API Key */
|
||
private $scopusApiKey = '';
|
||
|
||
public function __construct()
|
||
{
|
||
$this->email = trim((string) Env::get('author_bg.email', ''));
|
||
if ($this->email === '') {
|
||
$this->email = trim((string) Env::get('pubmed.email', 'yananwang898@gmail.com'));
|
||
}
|
||
$this->scopusApiKey = trim((string) Env::get('scopus.api_key', ''));
|
||
if ($this->scopusApiKey === '') {
|
||
$this->scopusApiKey = trim((string) config('scopus.api_key', ''));
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 生成完整背调报告数据(前后端分离 JSON)
|
||
*
|
||
* @return array{ok:bool,msg?:string,data?:array}
|
||
*/
|
||
public function buildReport($orcid, $lastName, $firstName, $institution)
|
||
{
|
||
$orcidNorm = $this->normalizeOrcid($orcid);
|
||
$lastName = trim((string) $lastName);
|
||
$firstName = trim((string) $firstName);
|
||
$institution = trim((string) $institution);
|
||
|
||
$hasQuery = ($orcidNorm !== '') || ($lastName !== '') || ($firstName !== '');
|
||
if (!$hasQuery) {
|
||
return ['ok' => false, 'msg' => '请提供 ORCID 或姓名'];
|
||
}
|
||
|
||
$orcidSource = 'provided';
|
||
if ($orcidNorm === '') {
|
||
if ($lastName === '') {
|
||
return [
|
||
'ok' => false,
|
||
'msg' => '未提供 ORCID 时,需填写作者姓氏',
|
||
'data' => [
|
||
'orcid_required' => true,
|
||
'submitted' => [
|
||
'last_name' => $lastName,
|
||
'first_name' => $firstName,
|
||
'institution' => $institution,
|
||
],
|
||
'hint' => '请填写 ORCID,或至少填写姓氏(机构选填,仅用于候选列表排序)',
|
||
],
|
||
];
|
||
}
|
||
$search = $this->searchOrcidCandidates($lastName, $firstName, $institution);
|
||
$candidates = $search['candidates'] ?? [];
|
||
if (empty($candidates)) {
|
||
return [
|
||
'ok' => false,
|
||
'msg' => '未能按姓名检索到 ORCID,请手动填写',
|
||
'data' => [
|
||
'orcid_required' => true,
|
||
'submitted' => [
|
||
'last_name' => $lastName,
|
||
'first_name' => $firstName,
|
||
'institution' => $institution,
|
||
],
|
||
'hint' => '已在 OpenAlex、ORCID 官网、Scopus 按姓名检索,未找到带 ORCID 的作者',
|
||
'lookup_attempts' => $search['attempts'] ?? [],
|
||
],
|
||
];
|
||
}
|
||
if (count($candidates) > 1) {
|
||
return [
|
||
'ok' => false,
|
||
'need_select' => true,
|
||
'msg' => '匹配到 ' . count($candidates) . ' 位作者,请选择',
|
||
'data' => [
|
||
'candidates' => $candidates,
|
||
'submitted' => [
|
||
'last_name' => $lastName,
|
||
'first_name' => $firstName,
|
||
'institution' => $institution,
|
||
],
|
||
'lookup_attempts' => $search['attempts'] ?? [],
|
||
],
|
||
];
|
||
}
|
||
$orcidNorm = $candidates[0]['orcid'];
|
||
$orcidSource = 'name_search';
|
||
if ($firstName === '' && !empty($candidates[0]['display_name'])) {
|
||
$parts = preg_split('/\s+/u', trim($candidates[0]['display_name']));
|
||
if (count($parts) > 1) {
|
||
$lastName = array_pop($parts);
|
||
$firstName = implode(' ', $parts);
|
||
}
|
||
}
|
||
}
|
||
|
||
$way = $this->describeQueryWay($orcidSource);
|
||
$authorDisplay = trim("$firstName $lastName");
|
||
$orcidData = $this->orcidProfile($orcidNorm);
|
||
if ($orcidData['name'] !== '') {
|
||
$authorDisplay = $orcidData['name'];
|
||
}
|
||
|
||
$openalexAuthor = $this->resolveOpenAlexAuthor($orcidNorm, $firstName, $lastName, $institution);
|
||
$metrics = $this->openalexMetrics($openalexAuthor);
|
||
$pubmed = $this->pubmedSearch($lastName, $firstName, $institution, $orcidNorm, 50);
|
||
$rw = $this->searchRetractionsHybrid($orcidData['papers'], $firstName, $lastName, $institution, $authorDisplay);
|
||
$scopusUrl = $this->scopusDirectUrl($lastName, $firstName, $institution, $orcidNorm);
|
||
$scopusApi = $this->scopusApiSearch($orcidNorm, $lastName, $firstName, $institution);
|
||
$dups = $this->checkDuplicateTitles($this->papersForDupCheck($orcidData['papers'], $pubmed['papers']));
|
||
$worksCount = $this->resolveWorksCount($metrics, $orcidData, $pubmed, $scopusApi, $orcidNorm);
|
||
$risk = $this->riskLevel($rw, $metrics['h_index'], $worksCount);
|
||
|
||
$orcidPapers = array_slice($orcidData['papers'], 0, 10);
|
||
$pubmedPapers = array_slice($pubmed['papers'], 0, 10);
|
||
foreach ($orcidPapers as &$p) {
|
||
$p['open_url'] = $this->paperOpenUrl($p);
|
||
}
|
||
unset($p);
|
||
foreach ($pubmedPapers as &$p) {
|
||
$p['open_url'] = $this->paperOpenUrl($p);
|
||
}
|
||
unset($p);
|
||
foreach ($dups as &$dg) {
|
||
foreach ($dg['papers'] as &$dp) {
|
||
$dp['open_url'] = $this->paperOpenUrl($dp);
|
||
}
|
||
unset($dp);
|
||
}
|
||
unset($dg);
|
||
|
||
return [
|
||
'ok' => true,
|
||
'data' => [
|
||
'report_at' => date('Y-m-d H:i:s'),
|
||
'query' => [
|
||
'way' => $way,
|
||
'orcid' => $orcidNorm,
|
||
'orcid_source' => $orcidSource,
|
||
'orcid_resolved' => $orcidSource !== 'provided',
|
||
'last_name' => $lastName,
|
||
'first_name' => $firstName,
|
||
'institution' => $institution,
|
||
],
|
||
'conclusion' => [
|
||
'risk_level' => $risk,
|
||
'notes' => [
|
||
'有 ORCID 时优先以 ORCID + OpenAlex 为准,指标更稳定。',
|
||
'撤稿数据来自 Retraction Watch:有 DOI 作品按 DOI 精确比对;无 DOI 作品回退姓名/题目匹配(同名有风险,需人工核实)。',
|
||
'本报告不构成法律认定,重大决策请结合原始文献、单位证明及人工调查。',
|
||
],
|
||
],
|
||
'basic' => [
|
||
'display_name' => $authorDisplay,
|
||
'orcid' => $orcidNorm,
|
||
'orcid_url' => 'https://orcid.org/' . $orcidNorm,
|
||
'orcid_affiliations'=> $orcidData['affiliations'],
|
||
'openalex_institutions' => $metrics['institutions'],
|
||
'openalex_url' => $metrics['openalex_url'],
|
||
'scopus_id' => $metrics['scopus_id'],
|
||
'scopus_url' => $metrics['scopus_url'],
|
||
],
|
||
'scopus' => [
|
||
'search_url' => $scopusUrl,
|
||
'api' => $scopusApi,
|
||
],
|
||
'metrics' => [
|
||
'works_count' => $worksCount,
|
||
'cited_by_count' => (int) $metrics['cited_by_count'],
|
||
'h_index' => (int) $metrics['h_index'],
|
||
'i10_index' => (int) $metrics['i10_index'],
|
||
'topics' => $metrics['topics'],
|
||
'pubmed_total' => (int) $pubmed['total'],
|
||
'pubmed_query' => $pubmed['query'],
|
||
'pubmed_url' => $pubmed['pubmed_url'],
|
||
],
|
||
'retraction_watch' => $rw,
|
||
'duplicates' => $dups,
|
||
'pubmed_papers' => $pubmedPapers,
|
||
'orcid_papers' => [
|
||
'total' => (int) $orcidData['papers_total'],
|
||
'papers' => $orcidPapers,
|
||
],
|
||
'sources' => ['OpenAlex', 'ORCID', 'PubMed', 'Scopus', 'Retraction Watch'],
|
||
],
|
||
];
|
||
}
|
||
|
||
public function normalizeOrcid($raw)
|
||
{
|
||
$raw = trim((string) $raw);
|
||
if ($raw === '') {
|
||
return '';
|
||
}
|
||
if (preg_match('/(\d{4}-\d{4}-\d{4}-\d{3}[\dX])/i', $raw, $m)) {
|
||
return strtolower($m[1]);
|
||
}
|
||
return '';
|
||
}
|
||
|
||
/**
|
||
* 按姓名检索 ORCID 候选(机构仅用于排序/校验,不参与搜索)
|
||
*
|
||
* @return array{candidates:array,attempts:array}
|
||
*/
|
||
public function searchOrcidCandidates($lastName, $firstName, $institution)
|
||
{
|
||
$attempts = [];
|
||
$lastName = trim((string) $lastName);
|
||
$firstName = trim((string) $firstName);
|
||
$institution = trim((string) $institution);
|
||
$pool = [];
|
||
|
||
if ($lastName === '') {
|
||
return ['candidates' => [], 'attempts' => []];
|
||
}
|
||
|
||
$openalexList = $this->openalexAuthorsByName($firstName, $lastName);
|
||
$attempts[] = ['source' => 'openalex', 'count' => count($openalexList)];
|
||
foreach ($openalexList as $author) {
|
||
$orcid = $this->extractOrcidFromOpenAlexAuthor($author);
|
||
if ($orcid === '') {
|
||
continue;
|
||
}
|
||
$displayName = $author['display_name'] ?? '';
|
||
if (!$this->isAcceptableNameMatch($displayName, $firstName, $lastName)) {
|
||
continue;
|
||
}
|
||
$affs = [];
|
||
foreach ($author['last_known_institutions'] ?? [] as $ins) {
|
||
$n = trim((string) ($ins['display_name'] ?? ''));
|
||
if ($n !== '') {
|
||
$affs[] = $n;
|
||
}
|
||
}
|
||
$this->addOrcidCandidate($pool, $orcid, $displayName, $affs, 'openalex', $institution);
|
||
}
|
||
|
||
$orcidResults = $this->orcidRegistrySearch($lastName, $firstName);
|
||
$attempts[] = ['source' => 'orcid_registry', 'count' => count($orcidResults)];
|
||
foreach ($orcidResults as $row) {
|
||
$orcid = $this->normalizeOrcid($row['orcid-id'] ?? $row['orcid_id'] ?? '');
|
||
if ($orcid === '') {
|
||
continue;
|
||
}
|
||
$given = trim((string) ($row['given-names'] ?? $row['given_names'] ?? ''));
|
||
$family = trim((string) ($row['family-names'] ?? $row['family_names'] ?? ''));
|
||
$displayName = trim($given . ' ' . $family);
|
||
if (!$this->isAcceptableNameMatch($displayName, $firstName, $lastName, $given, $family)) {
|
||
continue;
|
||
}
|
||
$instNames = $row['institution-name'] ?? $row['institution_name'] ?? [];
|
||
if (!is_array($instNames)) {
|
||
$instNames = $instNames !== '' ? [$instNames] : [];
|
||
}
|
||
$this->addOrcidCandidate($pool, $orcid, $displayName, $instNames, 'orcid_registry', $institution);
|
||
}
|
||
|
||
$scopus = $this->scopusApiSearch('', $lastName, $firstName, $institution, true);
|
||
$attempts[] = ['source' => 'scopus', 'count' => count($scopus['entries'] ?? [])];
|
||
foreach ($scopus['entries'] ?? [] as $entry) {
|
||
$orcid = $this->normalizeOrcid($entry['orcid'] ?? '');
|
||
if ($orcid === '') {
|
||
continue;
|
||
}
|
||
$displayName = $entry['name'] ?? '';
|
||
if (!$this->isAcceptableNameMatch($displayName, $firstName, $lastName)) {
|
||
continue;
|
||
}
|
||
$affs = [];
|
||
if (!empty($entry['affiliation'])) {
|
||
$affs[] = $entry['affiliation'];
|
||
}
|
||
$this->addOrcidCandidate($pool, $orcid, $displayName, $affs, 'scopus', $institution);
|
||
}
|
||
|
||
return [
|
||
'candidates' => $this->sortOrcidCandidates(array_values($pool), $firstName, $lastName),
|
||
'attempts' => $attempts,
|
||
];
|
||
}
|
||
|
||
/**
|
||
* 姓名匹配评分(名+姓);提供 firstName 时名不匹配则拒绝(如 Yanan ≠ Yuxuan)
|
||
*/
|
||
private function scoreCandidateNameMatch($displayName, $firstName, $lastName, $givenName = '', $familyName = '')
|
||
{
|
||
$firstName = strtolower(trim((string) $firstName));
|
||
$lastName = strtolower(trim((string) $lastName));
|
||
$givenName = strtolower(trim((string) $givenName));
|
||
$familyName = strtolower(trim((string) $familyName));
|
||
$displayName = trim((string) $displayName);
|
||
|
||
if ($familyName !== '' && $lastName !== '') {
|
||
if (!$this->nameTokenMatches($familyName, $lastName)) {
|
||
return 0;
|
||
}
|
||
$score = 60;
|
||
if ($firstName === '') {
|
||
return $score;
|
||
}
|
||
if ($givenName === '') {
|
||
return 20;
|
||
}
|
||
if ($givenName === $firstName) {
|
||
return $score + 120;
|
||
}
|
||
if ($this->nameTokenMatches($givenName, $firstName)) {
|
||
return $score + 100;
|
||
}
|
||
return 0;
|
||
}
|
||
|
||
if ($displayName === '' || $lastName === '') {
|
||
return 0;
|
||
}
|
||
|
||
$nameLow = strtolower($displayName);
|
||
if (!$this->nameContainsToken($nameLow, $lastName)) {
|
||
return 0;
|
||
}
|
||
|
||
$score = 50;
|
||
if ($firstName === '') {
|
||
return $score;
|
||
}
|
||
|
||
$targetA = $firstName . ' ' . $lastName;
|
||
$targetB = $lastName . ' ' . $firstName;
|
||
if ($nameLow === $targetA || $nameLow === $targetB) {
|
||
return $score + 120;
|
||
}
|
||
|
||
$tokens = preg_split('/[\s,]+/u', $nameLow);
|
||
$tokens = array_values(array_filter($tokens, function ($t) {
|
||
return $t !== '';
|
||
}));
|
||
|
||
$firstHit = false;
|
||
foreach ($tokens as $token) {
|
||
if ($this->nameTokenMatches($token, $lastName)) {
|
||
continue;
|
||
}
|
||
if ($this->nameTokenMatches($token, $firstName)) {
|
||
$firstHit = true;
|
||
$score += 100;
|
||
break;
|
||
}
|
||
}
|
||
|
||
if (!$firstHit && $this->nameContainsToken($nameLow, $firstName)) {
|
||
$firstHit = true;
|
||
$score += 80;
|
||
}
|
||
|
||
return $firstHit ? $score : 0;
|
||
}
|
||
|
||
private function isAcceptableNameMatch($displayName, $firstName, $lastName, $givenName = '', $familyName = '')
|
||
{
|
||
$minScore = trim((string) $firstName) !== '' ? 70 : 40;
|
||
return $this->scoreCandidateNameMatch($displayName, $firstName, $lastName, $givenName, $familyName) >= $minScore;
|
||
}
|
||
|
||
private function nameTokenMatches($token, $target)
|
||
{
|
||
$token = strtolower(trim((string) $token));
|
||
$target = strtolower(trim((string) $target));
|
||
if ($token === '' || $target === '') {
|
||
return false;
|
||
}
|
||
return $token === $target
|
||
|| strpos($token, $target) === 0
|
||
|| strpos($target, $token) === 0;
|
||
}
|
||
|
||
private function nameContainsToken($haystack, $token)
|
||
{
|
||
$token = strtolower(trim((string) $token));
|
||
if ($token === '') {
|
||
return false;
|
||
}
|
||
return preg_match('/\b' . preg_quote($token, '/') . '\b/u', strtolower($haystack)) === 1;
|
||
}
|
||
|
||
private function addOrcidCandidate(array &$pool, $orcid, $name, array $affiliations, $source, $institution)
|
||
{
|
||
$orcid = $this->normalizeOrcid($orcid);
|
||
if ($orcid === '') {
|
||
return;
|
||
}
|
||
if (!isset($pool[$orcid])) {
|
||
$pool[$orcid] = [
|
||
'orcid' => $orcid,
|
||
'display_name' => '',
|
||
'affiliations' => [],
|
||
'affiliations_text' => '',
|
||
'sources' => [],
|
||
'sources_text' => '',
|
||
'institution_matched' => false,
|
||
'orcid_url' => 'https://orcid.org/' . $orcid,
|
||
];
|
||
}
|
||
$name = trim((string) $name);
|
||
if ($name !== '' && $pool[$orcid]['display_name'] === '') {
|
||
$pool[$orcid]['display_name'] = $name;
|
||
}
|
||
foreach ($affiliations as $aff) {
|
||
$aff = trim((string) $aff);
|
||
if ($aff === '') {
|
||
continue;
|
||
}
|
||
if (!in_array($aff, $pool[$orcid]['affiliations'], true)) {
|
||
$pool[$orcid]['affiliations'][] = $aff;
|
||
}
|
||
if ($institution !== '' && $this->institutionMatches($aff, $institution)) {
|
||
$pool[$orcid]['institution_matched'] = true;
|
||
}
|
||
}
|
||
if (!in_array($source, $pool[$orcid]['sources'], true)) {
|
||
$pool[$orcid]['sources'][] = $source;
|
||
}
|
||
}
|
||
|
||
private function sortOrcidCandidates(array $candidates, $firstName = '', $lastName = '')
|
||
{
|
||
foreach ($candidates as &$item) {
|
||
$item['name_match_score'] = $this->scoreCandidateNameMatch(
|
||
$item['display_name'] ?? '',
|
||
$firstName,
|
||
$lastName
|
||
);
|
||
$item['name_matched'] = $item['name_match_score'] >= 70;
|
||
$item['affiliations_text'] = implode(';', $item['affiliations'] ?? []);
|
||
$srcMap = [
|
||
'openalex' => 'OpenAlex',
|
||
'orcid_registry' => 'ORCID',
|
||
'scopus' => 'Scopus',
|
||
];
|
||
$labels = [];
|
||
foreach ($item['sources'] ?? [] as $s) {
|
||
$labels[] = $srcMap[$s] ?? $s;
|
||
}
|
||
$item['sources_text'] = implode(' / ', $labels);
|
||
}
|
||
unset($item);
|
||
|
||
usort($candidates, function ($a, $b) {
|
||
$nameCmp = ($b['name_match_score'] ?? 0) <=> ($a['name_match_score'] ?? 0);
|
||
if ($nameCmp !== 0) {
|
||
return $nameCmp;
|
||
}
|
||
if (($a['institution_matched'] ?? false) !== ($b['institution_matched'] ?? false)) {
|
||
return ($b['institution_matched'] ?? false) <=> ($a['institution_matched'] ?? false);
|
||
}
|
||
return strcmp($a['display_name'] ?? '', $b['display_name'] ?? '');
|
||
});
|
||
|
||
return $candidates;
|
||
}
|
||
|
||
private function institutionMatches($candidateInst, $targetInstitution)
|
||
{
|
||
$instLow = strtolower(trim((string) $targetInstitution));
|
||
$candLow = strtolower(trim((string) $candidateInst));
|
||
if ($instLow === '' || $candLow === '') {
|
||
return false;
|
||
}
|
||
return strpos($candLow, $instLow) !== false || strpos($instLow, $candLow) !== false;
|
||
}
|
||
|
||
private function describeQueryWay($orcidSource)
|
||
{
|
||
$map = [
|
||
'provided' => 'ORCID 精准查询',
|
||
'name_search' => '姓名自动匹配 ORCID',
|
||
];
|
||
return $map[$orcidSource] ?? 'ORCID 查询';
|
||
}
|
||
|
||
private function extractOrcidFromOpenAlexAuthor($author)
|
||
{
|
||
if (!is_array($author)) {
|
||
return '';
|
||
}
|
||
$raw = $author['orcid'] ?? ($author['ids']['orcid'] ?? '');
|
||
return $this->normalizeOrcid((string) $raw);
|
||
}
|
||
|
||
private function openalexAuthorsByName($first, $last)
|
||
{
|
||
$q = trim("$first $last");
|
||
if ($q === '' && $last !== '') {
|
||
$q = $last;
|
||
}
|
||
if ($q === '') {
|
||
return [];
|
||
}
|
||
$url = 'https://api.openalex.org/authors?search=' . urlencode($q) . '&per_page=25';
|
||
$json = $this->httpGet($url);
|
||
if (!$json) {
|
||
return [];
|
||
}
|
||
$data = json_decode($json, true);
|
||
return $data['results'] ?? [];
|
||
}
|
||
|
||
private function orcidRegistrySearch($lastName, $firstName)
|
||
{
|
||
$parts = [];
|
||
if ($lastName !== '') {
|
||
$parts[] = 'family-name:' . $lastName;
|
||
}
|
||
if ($firstName !== '') {
|
||
$parts[] = 'given-names:' . $firstName;
|
||
}
|
||
if (empty($parts)) {
|
||
return [];
|
||
}
|
||
$url = 'https://pub.orcid.org/v3.0/expanded-search/?q='
|
||
. urlencode(implode(' AND ', $parts)) . '&rows=25';
|
||
$json = $this->httpGet($url, ['Accept: application/json']);
|
||
if (!$json) {
|
||
return [];
|
||
}
|
||
$data = json_decode($json, true);
|
||
return $data['expanded-result'] ?? $data['result'] ?? [];
|
||
}
|
||
|
||
private function httpGet($url, array $headers = [], $timeout = 25)
|
||
{
|
||
$headers[] = 'User-Agent: MedicalAuthorCheck/1.0 (mailto:' . $this->email . ')';
|
||
if (function_exists('curl_init')) {
|
||
$ch = curl_init($url);
|
||
curl_setopt_array($ch, [
|
||
CURLOPT_RETURNTRANSFER => true,
|
||
CURLOPT_TIMEOUT => $timeout,
|
||
CURLOPT_FOLLOWLOCATION => true,
|
||
CURLOPT_HTTPHEADER => $headers,
|
||
CURLOPT_SSL_VERIFYPEER => true,
|
||
]);
|
||
$body = curl_exec($ch);
|
||
$code = (int) curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
||
curl_close($ch);
|
||
return ($body !== false && $code >= 200 && $code < 300) ? $body : null;
|
||
}
|
||
$ctx = stream_context_create([
|
||
'http' => ['method' => 'GET', 'header' => implode("\r\n", $headers), 'timeout' => $timeout],
|
||
'ssl' => ['verify_peer' => true, 'verify_peer_name' => true],
|
||
]);
|
||
$body = @file_get_contents($url, false, $ctx);
|
||
return $body !== false ? $body : null;
|
||
}
|
||
|
||
private function openalexAuthorByOrcid($orcid)
|
||
{
|
||
$url = 'https://api.openalex.org/authors?filter=orcid:' . urlencode('https://orcid.org/' . $orcid) . '&per_page=5';
|
||
$json = $this->httpGet($url);
|
||
if (!$json) {
|
||
return null;
|
||
}
|
||
$data = json_decode($json, true);
|
||
return $data['results'][0] ?? null;
|
||
}
|
||
|
||
private function resolveOpenAlexAuthor($orcid, $firstName, $lastName, $institution)
|
||
{
|
||
$author = $this->openalexAuthorByOrcid($orcid);
|
||
if ($author) {
|
||
return $author;
|
||
}
|
||
|
||
$orcidNorm = $this->normalizeOrcid($orcid);
|
||
if ($orcidNorm !== '') {
|
||
foreach ($this->openalexAuthorsByName($firstName, $lastName) as $candidate) {
|
||
if ($this->extractOrcidFromOpenAlexAuthor($candidate) === $orcidNorm) {
|
||
return $candidate;
|
||
}
|
||
}
|
||
return null;
|
||
}
|
||
|
||
if ($lastName === '' && $firstName === '') {
|
||
return null;
|
||
}
|
||
|
||
$best = null;
|
||
$bestScore = -1;
|
||
foreach ($this->openalexAuthorsByName($firstName, $lastName) as $candidate) {
|
||
$displayName = $candidate['display_name'] ?? '';
|
||
if (!$this->isAcceptableNameMatch($displayName, $firstName, $lastName)) {
|
||
continue;
|
||
}
|
||
|
||
$score = $this->scoreCandidateNameMatch($displayName, $firstName, $lastName);
|
||
if ($institution !== '') {
|
||
foreach ($candidate['last_known_institutions'] ?? [] as $inst) {
|
||
if ($this->institutionMatches($inst['display_name'] ?? '', $institution)) {
|
||
$score += 50;
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
if ($score > $bestScore) {
|
||
$bestScore = $score;
|
||
$best = $candidate;
|
||
continue;
|
||
}
|
||
if ($score === $bestScore && $best !== null) {
|
||
$candidateWorks = (int) ($candidate['works_count'] ?? 0);
|
||
$bestWorks = (int) ($best['works_count'] ?? 0);
|
||
if ($candidateWorks > $bestWorks) {
|
||
$best = $candidate;
|
||
}
|
||
}
|
||
}
|
||
return $best;
|
||
}
|
||
|
||
private function resolveWorksCount($metrics, $orcidData, $pubmed, $scopusApi, $orcid)
|
||
{
|
||
$counts = [
|
||
(int) ($metrics['works_count'] ?? 0),
|
||
(int) ($orcidData['papers_total'] ?? 0),
|
||
(int) ($pubmed['total'] ?? 0),
|
||
];
|
||
$orcidNorm = $this->normalizeOrcid($orcid);
|
||
$entries = $scopusApi['entries'] ?? [];
|
||
foreach ($entries as $entry) {
|
||
if ($orcidNorm !== '' && $this->normalizeOrcid($entry['orcid'] ?? '') === $orcidNorm) {
|
||
$counts[] = (int) ($entry['document_count'] ?? 0);
|
||
break;
|
||
}
|
||
}
|
||
if ($orcidNorm === '' && count($entries) === 1) {
|
||
$counts[] = (int) ($entries[0]['document_count'] ?? 0);
|
||
}
|
||
return max($counts);
|
||
}
|
||
|
||
private function openalexMetrics($author)
|
||
{
|
||
if (!$author) {
|
||
return [
|
||
'found' => false, 'display_name' => '', 'openalex_id' => '', 'orcid' => '',
|
||
'works_count' => 0, 'cited_by_count' => 0, 'h_index' => 0, 'i10_index' => 0,
|
||
'institutions' => [], 'topics' => [], 'openalex_url' => '', 'scopus_id' => '', 'scopus_url' => '',
|
||
];
|
||
}
|
||
$stats = $author['summary_stats'] ?? [];
|
||
$scopusId = $this->extractScopusId($author['ids']['scopus'] ?? '');
|
||
$insts = [];
|
||
foreach ($author['last_known_institutions'] ?? [] as $i) {
|
||
$insts[] = $i['display_name'] ?? '';
|
||
}
|
||
$topics = [];
|
||
foreach (array_slice($author['topics'] ?? [], 0, 5) as $t) {
|
||
$topics[] = ($t['display_name'] ?? '') . ' (' . ($t['count'] ?? 0) . '篇)';
|
||
}
|
||
$oid = $author['id'] ?? '';
|
||
return [
|
||
'found' => true,
|
||
'display_name' => $author['display_name'] ?? '',
|
||
'openalex_id' => $oid,
|
||
'orcid' => preg_replace('#.*/#', '', $author['orcid'] ?? ''),
|
||
'works_count' => (int) ($author['works_count'] ?? 0),
|
||
'cited_by_count' => (int) ($author['cited_by_count'] ?? 0),
|
||
'h_index' => (int) ($stats['h_index'] ?? 0),
|
||
'i10_index' => (int) ($stats['i10_index'] ?? 0),
|
||
'institutions' => $insts,
|
||
'topics' => $topics,
|
||
'openalex_url' => str_replace('https://openalex.org/', 'https://openalex.org/authors/', $oid),
|
||
'scopus_id' => $scopusId,
|
||
'scopus_url' => $scopusId ? $this->scopusAuthorUrl($scopusId) : '',
|
||
];
|
||
}
|
||
|
||
private function extractScopusId($raw)
|
||
{
|
||
if (!$raw) {
|
||
return '';
|
||
}
|
||
if (preg_match('/authorID=(\d+)/i', (string) $raw, $m)) {
|
||
return $m[1];
|
||
}
|
||
if (preg_match('/^(\d{8,})$/', (string) $raw, $m)) {
|
||
return $m[1];
|
||
}
|
||
return '';
|
||
}
|
||
|
||
private function scopusAuthorUrl($authorId)
|
||
{
|
||
return 'https://www.scopus.com/authid/detail.uri?authorId=' . urlencode($authorId);
|
||
}
|
||
|
||
private function scopusDirectUrl($last, $first, $institution, $orcid)
|
||
{
|
||
$base = 'https://www.scopus.com/results/authorNamesList.uri';
|
||
$params = [
|
||
'sort' => 'count-f', 'src' => 'al', 'selectionPageSearch' => 'anl',
|
||
'origin' => 'searchauthorfreelookup', 'activeFlag' => 'true',
|
||
'resultsPerPage' => '20', 'exactAuthorSearch' => 'false',
|
||
];
|
||
if ($orcid !== '') {
|
||
$params['orcidId'] = $orcid;
|
||
$params['s'] = 'AUTH--ORCID--ID(' . $orcid . ')';
|
||
return $base . '?' . http_build_query($params, '', '&', PHP_QUERY_RFC3986);
|
||
}
|
||
if ($last === '' && $first === '') {
|
||
return null;
|
||
}
|
||
$params['authorLastName'] = $last;
|
||
$params['authorFirstName'] = $first;
|
||
if ($institution !== '') {
|
||
$params['affilname'] = $institution;
|
||
}
|
||
$s = [];
|
||
if ($last !== '') {
|
||
$s[] = 'AUTHLASTNAME(' . $last . ')';
|
||
}
|
||
if ($first !== '') {
|
||
$s[] = 'AUTHFIRST(' . $first . ')';
|
||
}
|
||
if ($institution !== '') {
|
||
$s[] = 'AFFIL(' . $institution . ')';
|
||
}
|
||
$params['s'] = implode(' AND ', $s);
|
||
return $base . '?' . http_build_query($params, '', '&', PHP_QUERY_RFC3986);
|
||
}
|
||
|
||
private function httpGetElsevier($url, $apiKey, $timeout = 25)
|
||
{
|
||
$headers = ['Accept: application/json', 'X-ELS-APIKey: ' . $apiKey];
|
||
if (!function_exists('curl_init')) {
|
||
return null;
|
||
}
|
||
$ch = curl_init($url);
|
||
curl_setopt_array($ch, [
|
||
CURLOPT_RETURNTRANSFER => true,
|
||
CURLOPT_TIMEOUT => $timeout,
|
||
CURLOPT_FOLLOWLOCATION => true,
|
||
CURLOPT_HTTPHEADER => $headers,
|
||
CURLOPT_SSL_VERIFYPEER => true,
|
||
]);
|
||
$body = curl_exec($ch);
|
||
$code = (int) curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
||
curl_close($ch);
|
||
return ($body !== false && $code >= 200 && $code < 300) ? $body : null;
|
||
}
|
||
|
||
private function scopusApiSearch($orcid, $last, $first, $institution, $nameOnly = false)
|
||
{
|
||
$apiKey = $this->scopusApiKey;
|
||
if (trim($apiKey) === '') {
|
||
return ['ok' => false, 'msg' => '未配置 Scopus API Key', 'entries' => []];
|
||
}
|
||
if ($orcid !== '') {
|
||
$query = 'ORCID(' . $orcid . ')';
|
||
} else {
|
||
$parts = [];
|
||
if ($last !== '') {
|
||
$parts[] = 'AUTHLASTNAME(' . preg_replace('/[^\pL\pN\s\-]/u', '', $last) . ')';
|
||
}
|
||
if ($first !== '') {
|
||
$parts[] = 'AUTHFIRST(' . preg_replace('/[^\pL\pN\s\-]/u', '', $first) . ')';
|
||
}
|
||
if (!$nameOnly && $institution !== '') {
|
||
$parts[] = 'AFFIL(' . $institution . ')';
|
||
}
|
||
if (empty($parts)) {
|
||
return ['ok' => false, 'msg' => '缺少检索条件', 'entries' => []];
|
||
}
|
||
$query = implode(' AND ', $parts);
|
||
}
|
||
$count = $nameOnly ? 25 : 10;
|
||
$url = 'https://api.elsevier.com/content/search/author?query=' . urlencode($query) . '&count=' . $count;
|
||
$json = $this->httpGetElsevier($url, $apiKey);
|
||
if (!$json) {
|
||
return ['ok' => false, 'msg' => 'Scopus API 请求失败,请检查 Key 或网络', 'entries' => []];
|
||
}
|
||
$data = json_decode($json, true);
|
||
$entries = [];
|
||
foreach ($data['search-results']['entry'] ?? [] as $e) {
|
||
if (!is_array($e)) {
|
||
continue;
|
||
}
|
||
$idRaw = $e['dc:identifier'] ?? '';
|
||
$authorId = '';
|
||
if (preg_match('/AUTHOR_ID:(\d+)/', $idRaw, $m)) {
|
||
$authorId = $m[1];
|
||
}
|
||
$name = '';
|
||
if (!empty($e['preferred-name'])) {
|
||
$pn = $e['preferred-name'];
|
||
$name = ($pn['ce:indexed-name'] ?? '')
|
||
?: trim(($pn['ce:given-name'] ?? '') . ' ' . ($pn['ce:surname'] ?? ''));
|
||
}
|
||
$aff = '';
|
||
if (!empty($e['affiliation-current']['affiliation-name'])) {
|
||
$aff = $e['affiliation-current']['affiliation-name'];
|
||
} elseif (!empty($e['affiliation-current']['ip-doc']['afdispname'])) {
|
||
$aff = $e['affiliation-current']['ip-doc']['afdispname'];
|
||
}
|
||
$entryOrcid = '';
|
||
foreach (['orcid', 'ORCID'] as $orcidKey) {
|
||
if (!empty($e[$orcidKey])) {
|
||
$entryOrcid = $this->normalizeOrcid((string) $e[$orcidKey]);
|
||
if ($entryOrcid !== '') {
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
$entries[] = [
|
||
'author_id' => $authorId,
|
||
'name' => $name,
|
||
'affiliation' => $aff,
|
||
'orcid' => $entryOrcid,
|
||
'document_count' => (int) ($e['document-count'] ?? 0),
|
||
'cited_by_count' => (int) ($e['cited-by-count'] ?? 0),
|
||
'h_index' => (int) ($e['h-index'] ?? 0),
|
||
'url' => $authorId ? $this->scopusAuthorUrl($authorId) : '',
|
||
];
|
||
}
|
||
return ['ok' => true, 'msg' => '共匹配 ' . count($entries) . ' 位作者', 'entries' => $entries];
|
||
}
|
||
|
||
private function orcidProfile($orcid)
|
||
{
|
||
$base = "https://pub.orcid.org/v3.0/$orcid";
|
||
$headers = ['Accept: application/json'];
|
||
$person = json_decode($this->httpGet("$base/person", $headers) ?: '{}', true);
|
||
$works = json_decode($this->httpGet("$base/works", $headers) ?: '{}', true);
|
||
|
||
$name = '';
|
||
$affs = [];
|
||
if (!empty($person['name'])) {
|
||
$g = $person['name']['given-names']['value'] ?? '';
|
||
$f = $person['name']['family-name']['value'] ?? '';
|
||
$name = trim("$g $f");
|
||
}
|
||
foreach ($person['activities-summary']['employments']['affiliation-group'] ?? [] as $g) {
|
||
$s = $g['summaries'][0]['employment-summary'] ?? [];
|
||
$org = $s['organization']['name'] ?? '';
|
||
if ($org) {
|
||
$affs[] = $org;
|
||
}
|
||
}
|
||
|
||
$papers = [];
|
||
foreach ($works['group'] ?? [] as $grp) {
|
||
$w = $grp['work-summary'][0] ?? [];
|
||
$doi = '';
|
||
$pmid = '';
|
||
foreach ($w['external-ids']['external-id'] ?? [] as $ext) {
|
||
$type = strtolower($ext['external-id-type'] ?? '');
|
||
$val = $ext['external-id-value'] ?? '';
|
||
if ($type === 'doi' && $doi === '') {
|
||
$doi = $val;
|
||
}
|
||
if ($type === 'pmid' && $pmid === '') {
|
||
$pmid = $val;
|
||
}
|
||
}
|
||
$papers[] = [
|
||
'title' => $w['title']['title']['value'] ?? '无标题',
|
||
'year' => $w['publication-date']['year']['value'] ?? '',
|
||
'journal' => $w['journal-title']['value'] ?? '',
|
||
'doi' => $doi,
|
||
'pmid' => $pmid,
|
||
'url' => $w['url']['value'] ?? '',
|
||
];
|
||
}
|
||
usort($papers, function ($a, $b) {
|
||
return (int) ($b['year'] ?? 0) <=> (int) ($a['year'] ?? 0);
|
||
});
|
||
return ['name' => $name, 'affiliations' => $affs, 'papers' => $papers, 'papers_total' => count($papers)];
|
||
}
|
||
|
||
private function pubmedEsearch($term, $retmax = 0, $sort = '')
|
||
{
|
||
$email = urlencode($this->email);
|
||
$url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term='
|
||
. urlencode($term) . '&retmode=json&retmax=' . $retmax
|
||
. '&tool=MedicalAuthorCheck&email=' . $email;
|
||
if ($sort !== '') {
|
||
$url .= '&sort=' . urlencode($sort);
|
||
}
|
||
$data = json_decode($this->httpGet($url) ?: '{}', true);
|
||
$res = $data['esearchresult'] ?? [];
|
||
return ['count' => (int) ($res['count'] ?? 0), 'ids' => $res['idlist'] ?? [], 'term' => $term];
|
||
}
|
||
|
||
private function pubmedFetchSummaries(array $ids)
|
||
{
|
||
if (empty($ids)) {
|
||
return [];
|
||
}
|
||
$email = urlencode($this->email);
|
||
$papers = [];
|
||
foreach (array_chunk($ids, 20) as $chunk) {
|
||
$sumUrl = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=pubmed&id='
|
||
. implode(',', $chunk) . '&retmode=json&tool=MedicalAuthorCheck&email=' . $email;
|
||
$sum = json_decode($this->httpGet($sumUrl) ?: '{}', true);
|
||
foreach ($chunk as $pmid) {
|
||
$r = $sum['result'][$pmid] ?? [];
|
||
$doi = '';
|
||
foreach ($r['articleids'] ?? [] as $aid) {
|
||
if (strtolower($aid['idtype'] ?? '') === 'doi') {
|
||
$doi = $aid['value'] ?? '';
|
||
break;
|
||
}
|
||
}
|
||
$papers[] = [
|
||
'pmid' => $pmid, 'title' => $r['title'] ?? "PMID $pmid",
|
||
'year' => substr($r['pubdate'] ?? '', 0, 4),
|
||
'journal' => $r['fulljournalname'] ?? $r['source'] ?? '',
|
||
'doi' => $doi, 'url' => '',
|
||
];
|
||
}
|
||
}
|
||
return $papers;
|
||
}
|
||
|
||
private function pubmedBuildQueries($last, $first, $institution, $orcid)
|
||
{
|
||
$queries = [];
|
||
if ($orcid !== '') {
|
||
$queries[] = $orcid . '[ORCID]';
|
||
}
|
||
$firstParts = preg_split('/\s+/', trim($first));
|
||
$firstInitial = $firstParts[0] ?? '';
|
||
if ($last !== '') {
|
||
if ($firstInitial !== '' && $institution !== '') {
|
||
$queries[] = $last . '[Author] AND ' . $firstInitial . '[Author] AND ' . $institution . '[Affiliation]';
|
||
}
|
||
if ($first !== '' && $institution !== '') {
|
||
$queries[] = $last . '[Author] AND "' . $first . '"[Author] AND ' . $institution . '[Affiliation]';
|
||
}
|
||
if ($firstInitial !== '') {
|
||
$queries[] = $last . '[Author] AND ' . $firstInitial . '[Author]';
|
||
}
|
||
if ($first !== '') {
|
||
$queries[] = $last . '[Author] AND "' . $first . '"[Author]';
|
||
}
|
||
if ($first === '') {
|
||
$queries[] = $last . '[Author]';
|
||
}
|
||
}
|
||
return array_values(array_unique($queries));
|
||
}
|
||
|
||
private function pubmedSearch($last, $first, $institution, $orcid = '', $listMax = 10)
|
||
{
|
||
$queries = $this->pubmedBuildQueries($last, $first, $institution, $orcid);
|
||
$best = ['count' => 0, 'ids' => [], 'term' => ''];
|
||
foreach ($queries as $term) {
|
||
$r = $this->pubmedEsearch($term, 0);
|
||
if ($r['count'] > 0) {
|
||
$best = $r;
|
||
break;
|
||
}
|
||
}
|
||
$total = $best['count'];
|
||
$usedTerm = $best['term'];
|
||
$papers = [];
|
||
if ($total > 0) {
|
||
$recent = $this->pubmedEsearch($usedTerm, $listMax, 'pub date');
|
||
$ids = $recent['ids'];
|
||
if (empty($ids)) {
|
||
$recent = $this->pubmedEsearch($usedTerm, $listMax);
|
||
$ids = $recent['ids'];
|
||
}
|
||
$papers = $this->pubmedFetchSummaries($ids);
|
||
}
|
||
$urlTerm = $usedTerm;
|
||
if (preg_match('/^(.+)\[ORCID\]$/i', $usedTerm, $m)) {
|
||
$urlTerm = $m[1];
|
||
}
|
||
|
||
return [
|
||
'total' => $total, 'papers' => $papers, 'query' => $usedTerm,
|
||
'pubmed_url' => 'https://pubmed.ncbi.nlm.nih.gov/?term=' . urlencode($urlTerm),
|
||
];
|
||
}
|
||
|
||
public function paperOpenUrl(array $p)
|
||
{
|
||
if (!empty($p['url']) && filter_var($p['url'], FILTER_VALIDATE_URL)) {
|
||
return $p['url'];
|
||
}
|
||
if (!empty($p['doi'])) {
|
||
return 'https://doi.org/' . ltrim($p['doi'], 'https://doi.org/');
|
||
}
|
||
if (!empty($p['pmid'])) {
|
||
return 'https://pubmed.ncbi.nlm.nih.gov/' . $p['pmid'] . '/';
|
||
}
|
||
return '';
|
||
}
|
||
|
||
private function rwCachePath()
|
||
{
|
||
$root = defined('ROOT_PATH') ? rtrim(ROOT_PATH, '/\\') : dirname(dirname(dirname(__DIR__)));
|
||
return $root . DIRECTORY_SEPARATOR . 'runtime' . DIRECTORY_SEPARATOR . 'retraction_watch_cache.csv';
|
||
}
|
||
|
||
private function downloadRetractionWatch()
|
||
{
|
||
$path = $this->rwCachePath();
|
||
$dir = dirname($path);
|
||
if (!is_dir($dir)) {
|
||
@mkdir($dir, 0755, true);
|
||
}
|
||
if (is_file($path) && (time() - filemtime($path)) < self::RW_CACHE_H) {
|
||
return true;
|
||
}
|
||
$csv = $this->httpGet(self::RW_CSV_URL, [], 60);
|
||
if (!$csv || strlen($csv) < 1000) {
|
||
return is_file($path);
|
||
}
|
||
return file_put_contents($path, $csv) !== false;
|
||
}
|
||
|
||
private function normalizeDoi($raw)
|
||
{
|
||
$raw = strtolower(trim((string) $raw));
|
||
if ($raw === '' || in_array($raw, ['unavailable', 'na', 'n/a'], true)) {
|
||
return '';
|
||
}
|
||
$raw = preg_replace('#^https?://(dx\.)?doi\.org/#i', '', $raw);
|
||
return trim($raw, '/');
|
||
}
|
||
|
||
private function collectPaperDois(array $papers)
|
||
{
|
||
$map = [];
|
||
foreach ($papers as $p) {
|
||
$doi = $this->normalizeDoi($p['doi'] ?? '');
|
||
if ($doi !== '') {
|
||
$map[$doi] = $p;
|
||
}
|
||
}
|
||
return $map;
|
||
}
|
||
|
||
private function isMisconductReason($reason)
|
||
{
|
||
$r = strtolower((string) $reason);
|
||
$keys = [
|
||
'misconduct', 'fabrication', 'falsification', 'plagiarism',
|
||
'fake peer', 'paper mill', 'ethical violation', 'breach of policy',
|
||
'complaints about author', 'fraud', 'manipulation',
|
||
'不端', '造假', '抄袭', '剽窃',
|
||
];
|
||
foreach ($keys as $k) {
|
||
if (strpos($r, $k) !== false) {
|
||
return true;
|
||
}
|
||
}
|
||
return false;
|
||
}
|
||
|
||
private function titleMatchKey($title)
|
||
{
|
||
return strtolower(preg_replace('/[^a-z0-9\x{4e00}-\x{9fff}]+/u', '', trim((string) $title)));
|
||
}
|
||
|
||
private function rwTitleMatchesPaper($rwTitle, array $paper)
|
||
{
|
||
$k1 = $this->titleMatchKey($rwTitle);
|
||
$k2 = $this->titleMatchKey($paper['title'] ?? '');
|
||
if ($k1 === '' || $k2 === '') {
|
||
return false;
|
||
}
|
||
return $k1 === $k2 || strpos($k1, $k2) !== false || strpos($k2, $k1) !== false;
|
||
}
|
||
|
||
private function nameTokens($first, $last, $displayName = '')
|
||
{
|
||
$tokens = [];
|
||
$last = strtolower(trim((string) $last));
|
||
$first = strtolower(trim((string) $first));
|
||
if ($last) {
|
||
$tokens[] = $last;
|
||
}
|
||
if ($first) {
|
||
$tokens[] = $first;
|
||
foreach (preg_split('/\s+/', $first) as $p) {
|
||
if (strlen($p) > 1) {
|
||
$tokens[] = $p;
|
||
}
|
||
}
|
||
}
|
||
if ($displayName) {
|
||
foreach (preg_split('/\s+/', strtolower($displayName)) as $p) {
|
||
if (strlen($p) > 2) {
|
||
$tokens[] = $p;
|
||
}
|
||
}
|
||
}
|
||
return array_unique($tokens);
|
||
}
|
||
|
||
private function authorMatchesRw($authorField, array $tokens)
|
||
{
|
||
$field = strtolower((string) $authorField);
|
||
if ($field === '') {
|
||
return false;
|
||
}
|
||
if (!empty($tokens[0]) && strpos($field, $tokens[0]) === false) {
|
||
return false;
|
||
}
|
||
$hits = 0;
|
||
foreach ($tokens as $t) {
|
||
if (strlen($t) >= 3 && strpos($field, $t) !== false) {
|
||
$hits++;
|
||
}
|
||
}
|
||
return $hits >= 2 || (count($tokens) === 1 && $hits >= 1);
|
||
}
|
||
|
||
private function rwDetailUrl(array $row, array $col)
|
||
{
|
||
$urls = $row[$col['URLS'] ?? 7] ?? '';
|
||
foreach (preg_split('/\s*;\s*/', (string) $urls) as $u) {
|
||
$u = trim($u);
|
||
if ($u !== '' && preg_match('#^https?://#i', $u)) {
|
||
return $u;
|
||
}
|
||
}
|
||
$origDoi = $this->normalizeDoi($row[$col['OriginalPaperDOI'] ?? 12] ?? '');
|
||
if ($origDoi !== '') {
|
||
return 'https://doi.org/' . $origDoi;
|
||
}
|
||
$retDoi = $this->normalizeDoi($row[$col['RetractionDOI'] ?? 9] ?? '');
|
||
if ($retDoi !== '') {
|
||
return 'https://doi.org/' . $retDoi;
|
||
}
|
||
$title = trim($row[$col['Title'] ?? 1] ?? '');
|
||
if ($title !== '') {
|
||
return 'https://retractionwatch.com/?s=' . rawurlencode($title);
|
||
}
|
||
return '';
|
||
}
|
||
|
||
private function buildRwItem(array $row, array $col, $matchType, $authorPaper = null)
|
||
{
|
||
$title = $row[$col['Title'] ?? 1] ?? '';
|
||
$reason = $row[$col['Reason'] ?? 11] ?? '';
|
||
$nature = $row[$col['RetractionNature'] ?? 10] ?? '';
|
||
$date = $row[$col['RetractionDate'] ?? 5] ?? '';
|
||
$authors = $row[$col['Author'] ?? 6] ?? '';
|
||
$origDoi = $this->normalizeDoi($row[$col['OriginalPaperDOI'] ?? 12] ?? '');
|
||
$misconduct = $this->isMisconductReason($reason);
|
||
$detailUrl = $this->rwDetailUrl($row, $col);
|
||
if ($authorPaper && !$detailUrl) {
|
||
$detailUrl = $this->paperOpenUrl($authorPaper);
|
||
}
|
||
$matchLabels = [
|
||
'doi' => 'DOI 精确匹配(高可信度)',
|
||
'name' => '姓名+题目匹配(参考,已关联 ORCID 无 DOI 作品)',
|
||
'name_loose' => '姓名匹配(低可信度,存在同名误报风险)',
|
||
];
|
||
return [
|
||
'record_id' => $row[$col['Record ID'] ?? 0] ?? '',
|
||
'title' => $title,
|
||
'nature' => $nature,
|
||
'reason' => $reason,
|
||
'date' => $date,
|
||
'misconduct' => $misconduct,
|
||
'authors' => $authors,
|
||
'doi' => $origDoi,
|
||
'author_title' => $authorPaper['title'] ?? '',
|
||
'author_year' => $authorPaper['year'] ?? '',
|
||
'url' => $detailUrl,
|
||
'match_type' => $matchType,
|
||
'match_label' => $matchLabels[$matchType] ?? $matchType,
|
||
];
|
||
}
|
||
|
||
private function countRwStats(array $items)
|
||
{
|
||
$mis = 0;
|
||
$ret = 0;
|
||
foreach ($items as $it) {
|
||
if (stripos($it['nature'] ?? '', 'retraction') !== false) {
|
||
$ret++;
|
||
}
|
||
if (!empty($it['misconduct'])) {
|
||
$mis++;
|
||
}
|
||
}
|
||
return ['misconduct_count' => $mis, 'retraction_count' => $ret];
|
||
}
|
||
|
||
private function searchRetractionsHybrid(array $papers, $first, $last, $institution, $displayName = '')
|
||
{
|
||
$empty = [
|
||
'ok' => false, 'msg' => '', 'items' => [],
|
||
'misconduct_count' => 0, 'retraction_count' => 0,
|
||
'checked_doi_count' => 0, 'no_doi_count' => count($papers),
|
||
'doi_match_count' => 0, 'name_match_count' => 0, 'name_loose_match_count' => 0,
|
||
];
|
||
if (!$this->downloadRetractionWatch()) {
|
||
$empty['msg'] = '撤稿数据库暂不可用';
|
||
return $empty;
|
||
}
|
||
|
||
$paperByDoi = $this->collectPaperDois($papers);
|
||
$noDoiPapers = [];
|
||
foreach ($papers as $p) {
|
||
if ($this->normalizeDoi($p['doi'] ?? '') === '') {
|
||
$noDoiPapers[] = $p;
|
||
}
|
||
}
|
||
$checkedCount = count($paperByDoi);
|
||
$noDoiCount = count($noDoiPapers);
|
||
|
||
$path = $this->rwCachePath();
|
||
$fp = fopen($path, 'r');
|
||
if (!$fp) {
|
||
$empty['msg'] = '撤稿数据库读取失败';
|
||
return $empty;
|
||
}
|
||
|
||
$header = fgetcsv($fp);
|
||
$col = array_flip($header ?: []);
|
||
$doiIndex = array_flip(array_keys($paperByDoi));
|
||
$tokens = $this->nameTokens($first, $last, $displayName);
|
||
$instLow = strtolower((string) $institution);
|
||
$items = [];
|
||
$seenKeys = [];
|
||
|
||
$addItem = function (array $item) use (&$items, &$seenKeys) {
|
||
$key = ($item['record_id'] ?? '') . '|' . ($item['doi'] ?? '') . '|' . $this->titleMatchKey($item['title'] ?? '');
|
||
if (isset($seenKeys[$key])) {
|
||
return;
|
||
}
|
||
$seenKeys[$key] = true;
|
||
$items[] = $item;
|
||
};
|
||
|
||
while (($row = fgetcsv($fp)) !== false) {
|
||
$origDoi = $this->normalizeDoi($row[$col['OriginalPaperDOI'] ?? 12] ?? '');
|
||
if ($origDoi !== '' && isset($doiIndex[$origDoi])) {
|
||
$addItem($this->buildRwItem($row, $col, 'doi', $paperByDoi[$origDoi]));
|
||
}
|
||
}
|
||
|
||
rewind($fp);
|
||
fgetcsv($fp);
|
||
|
||
if ($noDoiCount > 0 && !empty($tokens)) {
|
||
while (($row = fgetcsv($fp)) !== false) {
|
||
$origDoi = $this->normalizeDoi($row[$col['OriginalPaperDOI'] ?? 12] ?? '');
|
||
if ($origDoi !== '' && isset($doiIndex[$origDoi])) {
|
||
continue;
|
||
}
|
||
$authors = $row[$col['Author'] ?? 6] ?? '';
|
||
if (!$this->authorMatchesRw($authors, $tokens)) {
|
||
continue;
|
||
}
|
||
$rwTitle = $row[$col['Title'] ?? 1] ?? '';
|
||
$linkedPaper = null;
|
||
foreach ($noDoiPapers as $p) {
|
||
if ($this->rwTitleMatchesPaper($rwTitle, $p)) {
|
||
$linkedPaper = $p;
|
||
break;
|
||
}
|
||
}
|
||
if ($linkedPaper) {
|
||
$addItem($this->buildRwItem($row, $col, 'name', $linkedPaper));
|
||
continue;
|
||
}
|
||
if ($instLow !== '') {
|
||
$inst = strtolower($row[$col['Institution'] ?? 4] ?? '');
|
||
if ($inst !== '' && strpos($inst, $instLow) === false && strpos($instLow, $inst) === false) {
|
||
continue;
|
||
}
|
||
}
|
||
if (count($items) < 50) {
|
||
$addItem($this->buildRwItem($row, $col, 'name_loose', null));
|
||
}
|
||
}
|
||
}
|
||
fclose($fp);
|
||
|
||
$doiMatch = $nameMatch = $nameLooseMatch = 0;
|
||
foreach ($items as $it) {
|
||
if ($it['match_type'] === 'doi') {
|
||
$doiMatch++;
|
||
} elseif ($it['match_type'] === 'name') {
|
||
$nameMatch++;
|
||
} else {
|
||
$nameLooseMatch++;
|
||
}
|
||
}
|
||
$stats = $this->countRwStats($items);
|
||
|
||
return [
|
||
'ok' => true,
|
||
'msg' => 'DOI 比对 ' . $checkedCount . ' 篇,无 DOI 作品 ' . $noDoiCount . ' 篇已启用姓名回退',
|
||
'items' => $items,
|
||
'misconduct_count' => $stats['misconduct_count'],
|
||
'retraction_count' => $stats['retraction_count'],
|
||
'checked_doi_count' => $checkedCount,
|
||
'no_doi_count' => $noDoiCount,
|
||
'doi_match_count' => $doiMatch,
|
||
'name_match_count' => $nameMatch,
|
||
'name_loose_match_count' => $nameLooseMatch,
|
||
];
|
||
}
|
||
|
||
private function papersForDupCheck(array $orcidPapers, array $pubmedPapers)
|
||
{
|
||
$all = [];
|
||
foreach ($orcidPapers as $p) {
|
||
$p['source'] = 'ORCID';
|
||
$all[] = $p;
|
||
}
|
||
foreach ($pubmedPapers as $p) {
|
||
$p['source'] = 'PubMed';
|
||
$all[] = $p;
|
||
}
|
||
return $all;
|
||
}
|
||
|
||
private function checkDuplicateTitles(array $papers)
|
||
{
|
||
$groups = [];
|
||
foreach ($papers as $p) {
|
||
$t = trim($p['title'] ?? '');
|
||
if ($t === '' || strpos($t, 'PMID') === 0) {
|
||
continue;
|
||
}
|
||
$key = strtolower(preg_replace('/[^a-z0-9\x{4e00}-\x{9fff}]+/u', '', $t));
|
||
if ($key === '') {
|
||
continue;
|
||
}
|
||
$groups[$key][] = $p;
|
||
}
|
||
$dups = [];
|
||
foreach ($groups as $items) {
|
||
if (count($items) >= 2) {
|
||
$dups[] = ['title' => $items[0]['title'], 'papers' => $items];
|
||
}
|
||
}
|
||
return $dups;
|
||
}
|
||
|
||
private function riskLevel(array $rw, $hIndex, $works)
|
||
{
|
||
if (($rw['misconduct_count'] ?? 0) > 0) {
|
||
return '高风险 — 存在学术不端相关撤稿记录,建议人工复核';
|
||
}
|
||
if (($rw['retraction_count'] ?? 0) > 0) {
|
||
return '中风险 — 存在撤稿 / 关注声明,请核对是否与本人相关';
|
||
}
|
||
if ($works === 0) {
|
||
return '待核实 — 未检索到论文,请核对 ORCID / 姓名拼写';
|
||
}
|
||
if ($hIndex >= 10 || $works >= 20) {
|
||
return '低风险 — 学术产出指标正常';
|
||
}
|
||
return '一般 — 青年学者常见产出区间,建议结合研究方向综合判断';
|
||
}
|
||
}
|