mailto; $url = $this->openAlexBase . $path . '?' . http_build_query($query); $result = $this->httpGet($url, [ 'Accept: application/json', 'User-Agent: TMRJournals-BackgroundCheck/1.0 (mailto:' . $this->mailto . ')', ]); if (!$result['success']) { return $result; } $data = json_decode($result['body'], true); if (!is_array($data)) { return ['success' => false, 'error' => 'OpenAlex返回数据格式异常']; } return ['success' => true, 'data' => $data]; } public function resolveAuthor($params) { if (!empty($params['openalex_id'])) { $id = preg_replace('/^https?:\/\/openalex\.org\//', '', $params['openalex_id']); $res = $this->openAlexGet('/authors/' . urlencode($id)); if (!$res['success']) { return ['success' => false, 'error' => $res['error']]; } return ['success' => true, 'data' => $res['data']]; } if (!empty($params['orcid'])) { $orcid = $this->cleanOrcid($params['orcid']); $res = $this->openAlexGet('/authors/https://orcid.org/' . $orcid); if (!$res['success']) { return ['success' => false, 'error' => '未在 OpenAlex 找到该 ORCID 对应学者']; } return ['success' => true, 'data' => $res['data']]; } if (empty($params['name'])) { return ['success' => false, 'error' => '请提供 openalex_id、orcid 或 name']; } $filter = 'display_name.search:' . $params['name']; if (!empty($params['affiliation'])) { $filter .= ',last_known_institutions.display_name.search:' . $params['affiliation']; } $res = $this->openAlexGet('/authors', [ 'search' => $params['name'], 'filter' => $filter, 'sort' => 'cited_by_count:desc', 'per-page' => 1, ]); if (!$res['success']) { return ['success' => false, 'error' => $res['error']]; } $results = $res['data']['results'] ?? []; if (empty($results)) { return ['success' => false, 'error' => '未找到匹配学者,请补充 affiliation 或使用 orcid']; } return ['success' => true, 'data' => $results[0]]; } public function fetchRetractedWorksOpenAlex($openAlexId) { $res = $this->openAlexGet('/works', [ 'filter' => 'authorships.author.id:' . $openAlexId . ',is_retracted:true', 'sort' => 'publication_date:desc', 'per-page' => 25, ]); if (!$res['success']) { return ['count' => 0, 'list' => [], 'error' => $res['error']]; } $list = []; foreach ($res['data']['results'] ?? [] as $work) { $list[] = $this->formatOpenAlexWork($work); } return ['count' => count($list), 'list' => $list, 'source' => 'openalex']; } public function fetchRecentWorks($openAlexId, $limit = 5) { $res = $this->openAlexGet('/works', [ 'filter' => 'authorships.author.id:' . $openAlexId, 'sort' => 'publication_date:desc', 'per-page' => $limit, ]); if (!$res['success']) { return []; } $list = []; foreach ($res['data']['results'] ?? [] as $work) { $item = $this->formatOpenAlexWork($work); $item['is_retracted'] = !empty($work['is_retracted']); $list[] = $item; } return $list; } /** * 按领域/关键词批量搜索学者(OpenAlex) */ public function searchAuthorsByField($keyword, $options = []) { $minHIndex = intval($options['min_h_index'] ?? 5); $limit = min(max(intval($options['limit'] ?? 10), 1), 30); $page = max(intval($options['page'] ?? 1), 1); $topicId = $this->resolveTopicId($keyword); $filters = []; if ($topicId !== '') { $filters[] = 'topics.id:' . $topicId; } if ($minHIndex > 0) { $filters[] = 'summary_stats.h_index:>' . $minHIndex; } $query = [ 'sort' => 'cited_by_count:desc', 'per-page' => $limit, 'page' => $page, ]; if (!empty($filters)) { $query['filter'] = implode(',', $filters); $query['search'] = $keyword; } else { $query['search'] = $keyword; } $res = $this->openAlexGet('/authors', $query); if (!$res['success']) { return ['success' => false, 'error' => $res['error']]; } $authors = []; foreach ($res['data']['results'] ?? [] as $author) { $authors[] = $this->formatAuthorBrief($author); } return [ 'success' => true, 'data' => [ 'keyword' => $keyword, 'topic_id' => $topicId, 'page' => $page, 'limit' => $limit, 'total' => $res['data']['meta']['count'] ?? count($authors), 'list' => $authors, ], ]; } private function resolveTopicId($keyword) { $res = $this->openAlexGet('/topics', [ 'search' => $keyword, 'sort' => 'works_count:desc', 'per-page' => 1, ]); if (!$res['success']) { return ''; } $results = $res['data']['results'] ?? []; if (empty($results)) { return ''; } return $this->extractOpenAlexId($results[0]['id'] ?? ''); } // ===================== CrossRef ===================== public function cleanDoi($doi) { $doi = trim($doi); $doi = preg_replace('/^https?:\/\/doi\.org\//', '', $doi); $doi = preg_replace('/^doi:\s*/i', '', $doi); return trim($doi); } public function fetchCrossRefWork($doi) { $doi = $this->cleanDoi($doi); if ($doi === '') { return ['success' => false, 'error' => 'DOI为空']; } $url = $this->crossRefBase . '/works/' . urlencode($doi); $result = $this->httpGet($url, [ 'Accept: application/json', 'User-Agent: TMRJournals-BackgroundCheck/1.0 (mailto:' . $this->mailto . ')', ]); if (!$result['success']) { return ['success' => false, 'error' => $result['error']]; } if ($result['http_code'] == 404) { return ['success' => false, 'error' => 'DOI在CrossRef中未找到']; } if ($result['http_code'] != 200) { return ['success' => false, 'error' => 'CrossRef返回 HTTP ' . $result['http_code']]; } $data = json_decode($result['body'], true); if (!isset($data['message'])) { return ['success' => false, 'error' => 'CrossRef返回数据格式异常']; } return ['success' => true, 'message' => $data['message']]; } public function parseCrossRefRetractionDetail($doi, $message) { $retraction = $this->detectCrossRefRetraction($message); return [ 'doi' => $this->cleanDoi($doi), 'title' => isset($message['title'][0]) ? $message['title'][0] : '', 'is_retracted' => $retraction['is_retracted'], 'retraction_detail' => $retraction['retraction_detail'], 'journal' => isset($message['container-title'][0]) ? $message['container-title'][0] : '', 'publisher' => $message['publisher'] ?? '', 'published_date' => isset($message['published-print']) ? $this->parseDateParts($message['published-print']) : '', 'authors' => $this->parseCrossRefAuthors($message['author'] ?? []), 'url' => $message['URL'] ?? ('https://doi.org/' . $this->cleanDoi($doi)), ]; } public function enrichRetractionsWithCrossRef($retractionList) { $enriched = []; foreach ($retractionList as $item) { $doi = $this->cleanDoi($item['doi'] ?? ''); if ($doi === '') { $item['crossref'] = ['success' => false, 'error' => '无DOI']; $enriched[] = $item; continue; } $res = $this->fetchCrossRefWork($doi); if (!$res['success']) { $item['crossref'] = ['success' => false, 'error' => $res['error']]; } else { $item['crossref'] = [ 'success' => true, 'data' => $this->parseCrossRefRetractionDetail($doi, $res['message']), ]; } $enriched[] = $item; usleep(200000); } return $enriched; } private function detectCrossRefRetraction($message) { $isRetracted = false; $retractionDetail = [ 'sources' => [], 'retraction_notices' => [], 'record_ids' => [], ]; foreach (['updated-by', 'update-to'] as $field) { if (!isset($message[$field]) || !is_array($message[$field])) { continue; } foreach ($message[$field] as $update) { $updateType = strtolower($update['type'] ?? ''); $updateLabel = strtolower($update['label'] ?? ''); if (strpos($updateType, 'retract') === false && strpos($updateLabel, 'retract') === false) { continue; } $isRetracted = true; $source = $update['source'] ?? 'publisher'; $retractionDetail['sources'][] = $source; $notice = [ 'type' => $update['type'] ?? '', 'label' => $update['label'] ?? '', 'source' => $source, 'notice_doi'=> $update['DOI'] ?? '', 'date' => isset($update['updated']) ? $this->parseDateParts($update['updated']) : '', 'record_id' => $update['record-id'] ?? '', ]; $retractionDetail['retraction_notices'][] = $notice; if (!empty($notice['record_id'])) { $retractionDetail['record_ids'][] = $notice['record_id']; } } } $type = strtolower($message['type'] ?? ''); $subtype = strtolower($message['subtype'] ?? ''); if (strpos($type, 'retract') !== false || strpos($subtype, 'retract') !== false) { $isRetracted = true; $retractionDetail['is_retraction_notice'] = true; } if (isset($message['relation']) && is_array($message['relation'])) { foreach ($message['relation'] as $relType => $relations) { if (strpos(strtolower($relType), 'retract') !== false) { $isRetracted = true; $retractionDetail['relation'] = [$relType => $relations]; break; } } } $retractionDetail['sources'] = array_values(array_unique($retractionDetail['sources'])); $retractionDetail['record_ids'] = array_values(array_unique($retractionDetail['record_ids'])); return ['is_retracted' => $isRetracted, 'retraction_detail' => $retractionDetail]; } // ===================== Retraction Watch (via CrossRef) ===================== /** * 通过 CrossRef 检索 Retraction Watch 来源的撤稿记录(按作者姓名) */ public function fetchRetractionWatchByAuthor($authorName) { $url = $this->crossRefBase . '/works?' . http_build_query([ 'query.author' => $authorName, 'filter' => 'update-type:retraction', 'rows' => 25, 'mailto' => $this->mailto, ]); $result = $this->httpGet($url, [ 'Accept: application/json', 'User-Agent: TMRJournals-BackgroundCheck/1.0 (mailto:' . $this->mailto . ')', ]); if (!$result['success']) { return ['count' => 0, 'list' => [], 'error' => $result['error']]; } if ($result['http_code'] != 200) { return ['count' => 0, 'list' => [], 'error' => 'CrossRef返回 HTTP ' . $result['http_code']]; } $data = json_decode($result['body'], true); $items = $data['message']['items'] ?? []; $list = []; foreach ($items as $message) { $parsed = $this->parseCrossRefRetractionDetail($message['DOI'] ?? '', $message); if (!$parsed['is_retracted']) { continue; } $rwSources = array_filter($parsed['retraction_detail']['sources'] ?? [], function ($s) { return stripos($s, 'retraction-watch') !== false || stripos($s, 'retraction_watch') !== false; }); $list[] = [ 'title' => $parsed['title'], 'doi' => $parsed['doi'], 'journal' => $parsed['journal'], 'publisher' => $parsed['publisher'], 'published_date' => $parsed['published_date'], 'is_retracted' => true, 'retraction_detail' => $parsed['retraction_detail'], 'from_retraction_watch' => !empty($rwSources) || !empty($parsed['retraction_detail']['record_ids']), 'source' => 'retraction_watch', ]; } return [ 'count' => count($list), 'list' => $list, 'source' => 'retraction_watch', ]; } /** * 合并 OpenAlex + Retraction Watch 撤稿记录(按 DOI 去重) */ public function mergeRetractionRecords($openAlexRetractions, $rwRetractions, $withCrossRefDetail = false) { $merged = []; $doiMap = []; foreach ([$openAlexRetractions, $rwRetractions] as $sourceData) { foreach ($sourceData['list'] ?? [] as $item) { $doi = $this->cleanDoi($item['doi'] ?? ''); $key = $doi !== '' ? strtolower($doi) : md5(json_encode($item)); if (!isset($doiMap[$key])) { $doiMap[$key] = [ 'title' => $item['title'] ?? '', 'doi' => $doi, 'journal' => $item['journal'] ?? '', 'publication_date' => $item['publication_date'] ?? ($item['published_date'] ?? ''), 'sources' => [], 'retraction_detail'=> $item['retraction_detail'] ?? [], 'from_retraction_watch' => !empty($item['from_retraction_watch']), ]; } $src = $item['source'] ?? 'unknown'; if (!in_array($src, $doiMap[$key]['sources'])) { $doiMap[$key]['sources'][] = $src; } if (!empty($item['from_retraction_watch'])) { $doiMap[$key]['from_retraction_watch'] = true; } if (!empty($item['retraction_detail']) && empty($doiMap[$key]['retraction_detail'])) { $doiMap[$key]['retraction_detail'] = $item['retraction_detail']; } } } $merged = array_values($doiMap); if ($withCrossRefDetail) { $merged = $this->enrichRetractionsWithCrossRef($merged); } $rwOnlyCount = 0; foreach ($merged as $row) { if (!empty($row['from_retraction_watch']) && count($row['sources'] ?? []) <= 1) { $rwOnlyCount++; } } return [ 'count' => count($merged), 'openalex_count' => intval($openAlexRetractions['count'] ?? 0), 'rw_count' => intval($rwRetractions['count'] ?? 0), 'rw_only_count' => $rwOnlyCount, 'list' => $merged, ]; } // ===================== 格式化 ===================== public function formatAuthorBrief($author) { $institutions = []; foreach ($author['last_known_institutions'] ?? [] as $inst) { $institutions[] = [ 'name' => $inst['display_name'] ?? '', 'country' => $inst['country_code'] ?? '', ]; } return [ 'openalex_id' => $this->extractOpenAlexId($author['id'] ?? ''), 'name' => $author['display_name'] ?? '', 'orcid' => $this->extractOrcid($author['orcid'] ?? ''), 'works_count' => intval($author['works_count'] ?? 0), 'cited_by_count' => intval($author['cited_by_count'] ?? 0), 'h_index' => intval($author['summary_stats']['h_index'] ?? 0), 'institutions' => $institutions, 'openalex_url' => $author['id'] ?? '', ]; } public function parseAuthorMetrics($author) { $stats = $author['summary_stats'] ?? []; return [ 'works_count' => intval($author['works_count'] ?? 0), 'cited_by_count' => intval($author['cited_by_count'] ?? 0), 'h_index' => intval($stats['h_index'] ?? 0), 'i10_index' => intval($stats['i10_index'] ?? 0), 'two_year_mean_cited' => round(floatval($stats['2yr_mean_citedness'] ?? 0), 2), 'level_label' => $this->getAcademicLevelLabel($stats), ]; } public function parseResearchTopics($author) { $topics = []; foreach ($author['x_concepts'] ?? [] as $concept) { if (empty($concept['display_name'])) { continue; } $topics[] = [ 'name' => $concept['display_name'], 'score' => round(floatval($concept['score'] ?? 0), 3), ]; } if (empty($topics)) { foreach ($author['topics'] ?? [] as $topic) { if (empty($topic['display_name'])) { continue; } $topics[] = [ 'name' => $topic['display_name'], 'score' => round(floatval($topic['score'] ?? 0), 3), ]; } } return array_slice($topics, 0, 8); } public function assessRisk($metrics, $retractions) { $retractionCount = intval($retractions['count'] ?? 0); $rwOnlyCount = intval($retractions['rw_only_count'] ?? 0); $level = 'low'; $score = 0; $reasons = []; if ($retractionCount === 0) { $level = 'low'; $score = 10; $reasons[] = 'OpenAlex 与 Retraction Watch 均未发现撤稿记录'; } elseif ($retractionCount === 1) { $level = 'medium'; $score = 50; $reasons[] = '发现 1 篇撤稿论文,建议人工核实撤稿原因'; } else { $level = 'high'; $score = 80 + min($retractionCount * 5, 20); $reasons[] = '发现 ' . $retractionCount . ' 篇撤稿论文,存在较高学术风险'; } if ($rwOnlyCount > 0) { $reasons[] = 'Retraction Watch 额外发现 ' . $rwOnlyCount . ' 条 OpenAlex 未收录的撤稿记录'; if ($level === 'low') { $level = 'medium'; $score = max($score, 45); } } $worksCount = max(intval($metrics['works_count'] ?? 0), 1); $retractionRate = round($retractionCount / $worksCount * 100, 2); if ($retractionCount > 0 && $retractionRate >= 5) { $reasons[] = '撤稿率 ' . $retractionRate . '%,比例偏高'; if ($level === 'medium') { $level = 'high'; $score = max($score, 70); } } return [ 'level' => $level, 'level_label' => $this->getRiskLevelLabel($level), 'score' => min($score, 100), 'retraction_count' => $retractionCount, 'retraction_rate' => $retractionRate . '%', 'rw_only_count' => $rwOnlyCount, 'reasons' => $reasons, ]; } // ===================== 内部工具 ===================== private function formatOpenAlexWork($work) { return [ 'title' => $work['display_name'] ?? '', 'doi' => $this->extractDoi($work), 'publication_date' => $work['publication_date'] ?? '', 'journal' => $work['primary_location']['source']['display_name'] ?? '', 'cited_by_count' => intval($work['cited_by_count'] ?? 0), 'openalex_url' => $work['id'] ?? '', 'source' => 'openalex', ]; } private function parseCrossRefAuthors($authorList) { if (empty($authorList) || !is_array($authorList)) { return []; } $result = []; foreach ($authorList as $a) { $result[] = [ 'given' => $a['given'] ?? '', 'family' => $a['family'] ?? '', 'name' => isset($a['name']) ? $a['name'] : trim(($a['given'] ?? '') . ' ' . ($a['family'] ?? '')), 'orcid' => $a['ORCID'] ?? '', ]; } return $result; } private function parseDateParts($dateObj) { if (!isset($dateObj['date-parts'][0])) { return ''; } $parts = $dateObj['date-parts'][0]; $y = isset($parts[0]) ? $parts[0] : ''; $m = isset($parts[1]) ? sprintf('%02d', $parts[1]) : ''; $d = isset($parts[2]) ? sprintf('%02d', $parts[2]) : ''; if ($y && $m && $d) { return "{$y}-{$m}-{$d}"; } if ($y && $m) { return "{$y}-{$m}"; } return (string)$y; } private function getAcademicLevelLabel($stats) { $h = intval($stats['h_index'] ?? 0); if ($h >= 50) return '国际顶尖学者'; if ($h >= 30) return '资深专家'; if ($h >= 15) return '活跃研究者'; if ($h >= 5) return '青年学者'; if ($h > 0) return '初入领域'; return '暂无足够公开数据'; } private function getRiskLevelLabel($level) { $map = ['low' => '低风险', 'medium' => '中风险', 'high' => '高风险']; return $map[$level] ?? '未知'; } public function extractOpenAlexId($id) { return preg_replace('/^https?:\/\/openalex\.org\//', '', $id); } public function extractOrcid($orcid) { if ($orcid === '') return ''; return preg_replace('/^https?:\/\/orcid\.org\//', '', $orcid); } public function cleanOrcid($orcid) { $orcid = trim($orcid); $orcid = preg_replace('/^https?:\/\/orcid\.org\//', '', $orcid); return trim($orcid); } private function extractDoi($work) { $doi = $work['doi'] ?? ''; return preg_replace('/^https?:\/\/doi\.org\//', '', $doi); } private function httpGet($url, $headers = []) { $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); curl_setopt($ch, CURLOPT_TIMEOUT, 30); curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); $body = curl_exec($ch); $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); if (curl_errno($ch)) { $error = curl_error($ch); curl_close($ch); return ['success' => false, 'error' => 'HTTP请求失败: ' . $error]; } curl_close($ch); return ['success' => true, 'body' => $body, 'http_code' => $httpCode]; } }