This commit is contained in:
wyn
2026-06-05 11:11:16 +08:00
parent d3ae05f851
commit 66c3b86bd8
8 changed files with 2478 additions and 271 deletions

View File

@@ -1,303 +1,394 @@
<?php
/**
* Created by PhpStorm.
* User: Administrator
* Date: 2026/6/2
* Time: 15:04
*/
namespace app\api\controller;
use app\common\service\AuthorBackgroundService;
use think\Controller;
class Author
/**
* 作者背调 API前后端分离返回 JSON
*
* 主接口background_report / due_diligenceORCID 必填)
* Scopus 相关接口委托 AuthorInfo 实现
*/
class Author extends Controller
{
/** @var AuthorBackgroundService */
private $bgService;
/** @var AuthorInfo */
private $authorInfo;
public function __construct(\think\Request $request = null)
{
parent::__construct($request);
$this->bgService = new AuthorBackgroundService();
$this->authorInfo = new AuthorInfo();
}
/**
* 作者背调 HTML 页面入口
*
* 1. 传了 ORCID → 直接生成报告
* 2. 未传 ORCID + 姓氏(机构选填)→ 仅按姓名搜 ORCID1 条直接报告,多条显示选择列表
*/
public function index()
{
@set_time_limit(120);
$formAction = $this->resolveFormAction();
$params = $this->resolveBackgroundParams();
$orcidNorm = $this->bgService->normalizeOrcid($params['orcid']);
if ($orcidNorm === ''
&& $params['last_name'] === ''
&& $params['first_name'] === ''
&& $params['institution'] === ''
) {
$this->assign('form_action', $formAction);
return $this->fetch('author/index');
}
// 1. 有 ORCID → 直接报告页
if ($orcidNorm !== '') {
return $this->renderReportPage($params, $formAction);
}
// 2. 无 ORCID → 姓氏必填,机构选填
if ($params['last_name'] === '') {
$this->assign([
'form_action' => $formAction,
'error_msg' => '未填 ORCID 时,请填写姓氏',
'last_name' => $params['last_name'],
'first_name' => $params['first_name'],
'institution' => $params['institution'],
]);
return $this->fetch('author/index');
}
// 3. 仅按姓名搜 ORCID机构只做排序校验
$search = $this->bgService->searchOrcidCandidates(
$params['last_name'],
$params['first_name'],
$params['institution']
);
$candidates = $search['candidates'] ?? [];
if (empty($candidates)) {
return $this->renderOrcidRequiredPage($params, $formAction, '已在 OpenAlex、ORCID 官网、Scopus 按姓名检索,未找到带 ORCID 的作者');
}
if (count($candidates) > 1) {
$this->assignCandidateListView($candidates, $params, $formAction);
return $this->fetch('author/select_orcid');
}
return $this->redirect($this->buildReportEntryUrl($formAction, $params, $candidates[0]['orcid']));
}
/**
* 医学期刊作者背景调查报告ORCID 必填)
*
* POST/GET 参数:
* orcid / orcid_id ORCID必填
* lastName / last_name 姓(选填,用于 PubMed 辅助检索与报告展示)
* firstName / first_name 名(选填)
* institution / affiliation 机构(选填)
*/
public function background_report()
{
@set_time_limit(120);
$params = $this->resolveBackgroundParams();
$result = $this->bgService->buildReport(
$params['orcid'],
$params['last_name'],
$params['first_name'],
$params['institution']
);
if (empty($result['ok'])) {
$code = !empty($result['need_select']) ? 2 : 0;
return json([
'code' => $code,
'msg' => $result['msg'] ?? '查询失败',
'data' => $result['data'] ?? null,
]);
}
return json([
'code' => 1,
'msg' => 'success',
'data' => $result['data'],
]);
}
/** camelCase 别名 */
public function backgroundReport()
{
return $this->background_report();
}
/** 与 background_report 相同(路由兼容) */
public function due_diligence()
{
return $this->background_report();
}
public function dueDiligence()
{
return $this->due_diligence();
}
/**
* OpenAlex + Crossref 诚信扫描(不依赖 ORCID 必填)
*/
public function background_check()
{
return $this->authorInfo->background_check();
}
public function backgroundCheck()
{
return $this->background_check();
}
public function get_hindex()
{
$name = trim(input('get.name'));
$affil = trim(input('get.affil'));
$debug = (int) input('get.debug', 0);
$cookieFile = tempnam(sys_get_temp_dir(), 'scopus_cookie_');
if (empty($name)) {
return json(['code' => 0, 'msg' => '请输入作者姓名']);
}
// 1) 获取 freelookup 页面,用于拿到真实提交地址和隐藏字段。
$lookupUrl = 'https://www.scopus.com/freelookup/form/author.uri?zone=TopNavBar&origin=NO%20ORIGIN%20DEFINED';
$lookupRes = $this->httpRequest($lookupUrl, null, true, '', $cookieFile);
if (!$lookupRes['ok']) {
@unlink($cookieFile);
$ret = ['code' => 0, 'msg' => '访问 Scopus 失败:' . $lookupRes['msg']];
if ($debug === 1) {
$ret['debug'] = $this->buildDebugInfo($lookupRes['url'], $lookupRes['http_code'], $lookupRes['body']);
}
return json($ret);
}
$formInfo = $this->extractScopusLookupForm($lookupRes['body']);
if (empty($formInfo['action'])) {
@unlink($cookieFile);
$ret = ['code' => 0, 'msg' => 'Scopus 页面结构已变化,未找到查询表单'];
if ($debug === 1) {
$ret['debug'] = $this->buildDebugInfo($lookupRes['url'], $lookupRes['http_code'], $lookupRes['body']);
}
return json($ret);
}
// 2) 组装查询参数(姓名 + 机构),并携带隐藏字段提交。
$postData = $formInfo['hidden_fields'];
$postData['authLast'] = $name;
$postData['affil'] = $affil;
$searchRes = $this->httpRequest($formInfo['action'], $postData, true, $lookupUrl, $cookieFile);
if (!$searchRes['ok']) {
@unlink($cookieFile);
$ret = ['code' => 0, 'msg' => '查询 Scopus 失败:' . $searchRes['msg']];
if ($debug === 1) {
$ret['debug'] = $this->buildDebugInfo($searchRes['url'], $searchRes['http_code'], $searchRes['body']);
}
return json($ret);
}
$blockMsg = $this->detectScopusBlocking($searchRes['body']);
if (!empty($blockMsg)) {
@unlink($cookieFile);
$ret = ['code' => 0, 'msg' => $blockMsg];
$fallback = $this->fallbackByOpenAlex($name, $affil);
if ($fallback !== null) {
$ret = array_merge($fallback, [
'msg' => $blockMsg . ',已自动降级 OpenAlex 结果'
]);
}
if ($debug === 1) {
$ret['debug'] = $this->buildDebugInfo($searchRes['url'], $searchRes['http_code'], $searchRes['body']);
}
return json($ret);
}
// 3) 从返回页提取 h-index优先匹配“h-index”关键词附近数字
$hIndex = $this->extractHIndexFromHtml($searchRes['body']);
if ($hIndex === null) {
@unlink($cookieFile);
$ret = [
'code' => 0,
'msg' => '未从 Scopus 结果页解析到 H 指数(可能需要人工登录或页面结构调整)'
];
if ($debug === 1) {
$ret['debug'] = $this->buildDebugInfo($searchRes['url'], $searchRes['http_code'], $searchRes['body']);
}
return json($ret);
}
@unlink($cookieFile);
$ret = [
'code' => 1,
'name' => $name,
'affil' => $affil,
'h_index_scopus' => $hIndex,
'source' => 'scopus_freelookup',
];
if ($debug === 1) {
$ret['debug'] = $this->buildDebugInfo($searchRes['url'], $searchRes['http_code'], $searchRes['body']);
}
return json($ret);
return $this->authorInfo->get_hindex();
}
private function httpRequest($url, $postData = null, $followLocation = true, $referer = '', $cookieFile = '')
public function getHindex()
{
$ch = curl_init();
$options = [
CURLOPT_URL => $url,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_SSL_VERIFYPEER => false,
CURLOPT_SSL_VERIFYHOST => false,
CURLOPT_FOLLOWLOCATION => $followLocation,
CURLOPT_MAXREDIRS => 8,
CURLOPT_TIMEOUT => 30,
CURLOPT_CONNECTTIMEOUT => 15,
CURLOPT_USERAGENT => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
CURLOPT_ENCODING => '',
CURLOPT_HTTPHEADER => [
'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language: zh-CN,zh;q=0.9,en;q=0.8',
],
];
if (!empty($referer)) {
$options[CURLOPT_REFERER] = $referer;
}
if (!empty($cookieFile)) {
$options[CURLOPT_COOKIEJAR] = $cookieFile;
$options[CURLOPT_COOKIEFILE] = $cookieFile;
}
if (is_array($postData)) {
$options[CURLOPT_POST] = true;
$options[CURLOPT_POSTFIELDS] = http_build_query($postData);
}
curl_setopt_array($ch, $options);
$body = curl_exec($ch);
$error = curl_error($ch);
$httpCode = (int) curl_getinfo($ch, CURLINFO_HTTP_CODE);
$finalUrl = (string) curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
curl_close($ch);
if ($error) {
if (strpos($error, 'Maximum (') !== false && strpos($error, 'redirects followed') !== false) {
return [
'ok' => false,
'msg' => 'Scopus 跳转过多(可能触发登录/验证页面),请稍后重试或先在浏览器登录 Scopus',
'body' => '',
'http_code' => $httpCode,
'url' => $finalUrl
];
}
return ['ok' => false, 'msg' => $error, 'body' => '', 'http_code' => $httpCode, 'url' => $finalUrl];
}
if ($httpCode >= 400 || $httpCode === 0) {
return ['ok' => false, 'msg' => 'HTTP ' . $httpCode, 'body' => (string) $body, 'http_code' => $httpCode, 'url' => $finalUrl];
}
return ['ok' => true, 'msg' => '', 'body' => (string) $body, 'http_code' => $httpCode, 'url' => $finalUrl];
return $this->get_hindex();
}
private function detectScopusBlocking($html)
public function get_scopus_id()
{
if (empty($html)) {
return $this->authorInfo->get_scopus_id();
}
public function getScopusId()
{
return $this->get_scopus_id();
}
public function check_scopus_cookie()
{
return $this->authorInfo->check_scopus_cookie();
}
public function checkScopusCookie()
{
return $this->check_scopus_cookie();
}
public function save_scopus_cookie()
{
return $this->authorInfo->save_scopus_cookie();
}
public function saveScopusCookie()
{
return $this->save_scopus_cookie();
}
public function login_scopus()
{
return $this->authorInfo->login_scopus();
}
public function loginScopus()
{
return $this->login_scopus();
}
public function check_elsevier_api()
{
if (method_exists($this->authorInfo, 'check_elsevier_api')) {
return $this->authorInfo->check_elsevier_api();
}
return json(['code' => 0, 'msg' => 'check_elsevier_api not implemented']);
}
public function checkElsevierApi()
{
return $this->check_elsevier_api();
}
/**
* 解析背调查询参数(兼容多种命名)
*/
private function resolveBackgroundParams()
{
$pick = function (...$keys) {
foreach ($keys as $k) {
$v = trim((string) input('param.' . $k, ''));
if ($v === '') {
$v = trim((string) input('post.' . $k, ''));
}
if ($v === '') {
$v = trim((string) input('get.' . $k, ''));
}
if ($v !== '') {
return $v;
}
}
return '';
}
$text = strtolower(strip_tags($html));
if (strpos($text, 'sign in') !== false || strpos($text, 'institutional sign in') !== false) {
return 'Scopus 返回登录页,当前环境未授权访问作者详情页面';
}
if (strpos($text, 'captcha') !== false || strpos($text, 'are you a robot') !== false) {
return 'Scopus 触发了人机验证,当前接口无法自动通过';
}
return '';
}
private function buildDebugInfo($finalUrl, $httpCode, $html)
{
$normalized = html_entity_decode(strip_tags((string) $html), ENT_QUOTES, 'UTF-8');
$normalized = preg_replace('/\s+/u', ' ', $normalized);
$snippet = mb_substr($normalized, 0, 300, 'UTF-8');
};
return [
'final_url' => (string) $finalUrl,
'http_code' => (int) $httpCode,
'page_snippet' => $snippet,
'contains_signin' => stripos($normalized, 'sign in') !== false ? 1 : 0,
'contains_captcha' => stripos($normalized, 'captcha') !== false ? 1 : 0,
'orcid' => $pick('orcid', 'orcid_id'),
'last_name' => $pick('lastName', 'last_name', 'lastname', 'surname'),
'first_name' => $pick('firstName', 'first_name', 'firstname', 'given_name'),
'institution' => $pick('institution', 'affiliation', 'affil', 'org'),
];
}
private function extractScopusLookupForm($html)
private function resolveFormAction()
{
$ret = [
'action' => '',
'hidden_fields' => [],
];
if (empty($html)) {
return $ret;
}
// 优先定位包含 author 的 form减少解析误匹配。
if (preg_match('/<form[^>]*action=["\']([^"\']+)["\'][^>]*>.*?<\/form>/is', $html, $formMatch)) {
$action = trim($formMatch[1]);
if (!preg_match('/^https?:\/\//i', $action)) {
$action = 'https://www.scopus.com' . (substr($action, 0, 1) === '/' ? '' : '/') . $action;
}
$ret['action'] = $action;
if (preg_match_all('/<input[^>]*type=["\']hidden["\'][^>]*>/is', $formMatch[0], $inputs)) {
foreach ($inputs[0] as $inputTag) {
if (preg_match('/name=["\']([^"\']+)["\']/i', $inputTag, $nameMatch)) {
$fieldName = trim($nameMatch[1]);
$fieldVal = '';
if (preg_match('/value=["\']([^"\']*)["\']/i', $inputTag, $valMatch)) {
$fieldVal = $valMatch[1];
}
$ret['hidden_fields'][$fieldName] = $fieldVal;
}
}
}
}
return $ret;
return rtrim($this->request->root(), '/') . '/api/author/index';
}
private function extractHIndexFromHtml($html)
private function renderReportPage(array $params, $formAction)
{
if (empty($html)) {
return null;
}
$result = $this->bgService->buildReport(
$params['orcid'],
$params['last_name'],
$params['first_name'],
$params['institution']
);
$text = html_entity_decode(strip_tags($html), ENT_QUOTES, 'UTF-8');
$text = preg_replace('/\s+/u', ' ', $text);
$patterns = [
'/h[\-\s]?index[^0-9]{0,20}([0-9]{1,3})/iu',
'/([0-9]{1,3})[^0-9]{0,20}h[\-\s]?index/iu',
];
foreach ($patterns as $pattern) {
if (preg_match($pattern, $text, $m)) {
return (int) $m[1];
if (empty($result['ok'])) {
$data = $result['data'] ?? [];
if (!empty($result['need_select'])) {
$this->assignCandidateListView($data['candidates'] ?? [], $params, $formAction);
return $this->fetch('author/select_orcid');
}
if (!empty($data['orcid_required'])) {
return $this->renderOrcidRequiredPage($params, $formAction, $data['hint'] ?? '');
}
$this->assign([
'form_action' => $formAction,
'error_msg' => $result['msg'] ?? '查询失败',
]);
return $this->fetch('author/index');
}
return null;
$this->assignReportView($result['data'], $formAction);
return $this->fetch('author/report');
}
private function fallbackByOpenAlex($name, $affil)
private function renderOrcidRequiredPage(array $params, $formAction, $hint = '')
{
$search = urlencode($name);
$url = "https://api.openalex.org/authors?search={$search}&limit=8";
$res = $this->httpRequest($url, null, true);
if (!$res['ok']) {
return null;
}
$data = json_decode($res['body'], true);
$list = $data['results'] ?? [];
if (empty($list)) {
return null;
}
$targetAffil = strtolower((string) $affil);
$match = null;
foreach ($list as $item) {
if (empty($targetAffil)) {
$match = $item;
break;
}
$insts = $item['affiliations'] ?? [];
foreach ($insts as $inst) {
$instName = strtolower($inst['display_name'] ?? '');
if ($instName !== '' && strpos($instName, $targetAffil) !== false) {
$match = $item;
break 2;
}
}
}
if ($match === null) {
$match = $list[0];
}
return [
'code' => 1,
'name' => $match['display_name'] ?? $name,
'affil' => !empty($match['affiliations'][0]['display_name']) ? $match['affiliations'][0]['display_name'] : $affil,
'h_index_scopus' => $match['summary_stats']['h_index_scopus'] ?? null,
'h_index_openalex' => $match['summary_stats']['h_index'] ?? null,
'source' => 'openalex_fallback',
];
$this->assign([
'form_action' => $formAction,
'submitted_name' => trim($params['first_name'] . ' ' . $params['last_name']),
'submitted_institution' => $params['institution'],
'last_name' => $params['last_name'],
'first_name' => $params['first_name'],
'institution' => $params['institution'],
'hint' => $hint,
]);
return $this->fetch('author/orcid_required');
}
}
private function buildReportEntryUrl($formAction, array $params, $orcid)
{
return $formAction . '?' . http_build_query(
array_filter([
'orcid' => $orcid,
'lastName' => $params['last_name'] ?? '',
'firstName' => $params['first_name'] ?? '',
'institution' => $params['institution'] ?? '',
], function ($v) {
return trim((string) $v) !== '';
}),
'',
'&',
PHP_QUERY_RFC3986
);
}
private function assignCandidateListView(array $candidates, array $params, $formAction)
{
foreach ($candidates as $idx => $item) {
$candidates[$idx]['report_url'] = $this->buildReportEntryUrl(
$formAction,
$params,
$item['orcid'] ?? ''
);
$candidates[$idx]['matched_class'] = !empty($item['institution_matched']) ? 'match' : '';
$name = trim((string) ($item['display_name'] ?? ''));
$candidates[$idx]['avatar_letter'] = $name !== ''
? mb_strtoupper(mb_substr($name, 0, 1))
: '?';
}
$this->assign([
'form_action' => $formAction,
'candidates' => $candidates,
'candidate_count' => count($candidates),
'submitted_name' => trim(($params['first_name'] ?? '') . ' ' . ($params['last_name'] ?? '')),
'submitted_institution' => $params['institution'] ?? '',
'last_name' => $params['last_name'] ?? '',
'first_name' => $params['first_name'] ?? '',
'institution' => $params['institution'] ?? '',
]);
}
private function assignReportView(array $report, $formAction)
{
$dupPaperCount = 0;
$duplicates = $report['duplicates'] ?? [];
foreach ($duplicates as $idx => $dg) {
$duplicates[$idx]['paper_count'] = count($dg['papers'] ?? []);
$dupPaperCount += $duplicates[$idx]['paper_count'];
foreach ($duplicates[$idx]['papers'] as $pi => $dp) {
$src = strtolower((string) ($dp['source'] ?? 'orcid'));
$duplicates[$idx]['papers'][$pi]['source_class'] = in_array($src, ['orcid', 'pubmed'], true) ? $src : 'orcid';
}
}
$report['duplicates'] = $duplicates;
$rw = $report['retraction_watch'] ?? [];
$items = $rw['items'] ?? [];
foreach ($items as $idx => $it) {
$title = !empty($it['author_title']) ? $it['author_title'] : ($it['title'] ?? '');
$items[$idx]['display_title'] = mb_substr($title, 0, 120);
$items[$idx]['reason_short'] = mb_substr((string) ($it['reason'] ?? ''), 0, 200);
$linkUrl = trim((string) ($it['url'] ?? ''));
if ($linkUrl === '') {
$linkUrl = 'https://retractionwatch.com/?s=' . rawurlencode((string) ($it['title'] ?? ''));
}
$items[$idx]['link_url'] = $linkUrl;
}
$report['retraction_watch']['items'] = $items;
$riskLevel = (string) ($report['conclusion']['risk_level'] ?? '');
$riskClass = 'risk-default';
if (strpos($riskLevel, '高风险') !== false) {
$riskClass = 'risk-high';
} elseif (strpos($riskLevel, '中风险') !== false) {
$riskClass = 'risk-mid';
} elseif (strpos($riskLevel, '低风险') !== false) {
$riskClass = 'risk-low';
}
$this->assign([
'form_action' => $formAction,
'report' => $report,
'risk_class' => $riskClass,
'orcid_affiliations_text' => implode('', $report['basic']['orcid_affiliations'] ?? []),
'openalex_institutions_text' => implode('', $report['basic']['openalex_institutions'] ?? []),
'topics_text' => implode('', $report['metrics']['topics'] ?? []),
'rw_match_total' => (int) ($rw['doi_match_count'] ?? 0)
+ (int) ($rw['name_match_count'] ?? 0)
+ (int) ($rw['name_loose_match_count'] ?? 0),
'dup_group_count' => count($duplicates),
'dup_paper_count' => $dupPaperCount,
'pubmed_list_count' => min(10, count($report['pubmed_papers'] ?? [])),
'orcid_section_num' => (($report['metrics']['pubmed_total'] ?? 0) > 0) ? '七' : '六',
]);
}
}