From 66c3b86bd8dfd5b10c1c035490184e4d5e2b48d8 Mon Sep 17 00:00:00 2001 From: wyn <1074145239@qq.com> Date: Fri, 5 Jun 2026 11:11:16 +0800 Subject: [PATCH 1/5] =?UTF-8?q?=E8=83=8C=E8=B0=83?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- application/api/controller/Author.php | 633 ++++---- application/api/view/author/_styles.html | 315 ++++ application/api/view/author/index.html | 52 + .../api/view/author/orcid_required.html | 55 + application/api/view/author/report.html | 249 +++ application/api/view/author/select_orcid.html | 48 + .../service/AuthorBackgroundService.php | 1378 +++++++++++++++++ application/route.php | 19 + 8 files changed, 2478 insertions(+), 271 deletions(-) create mode 100644 application/api/view/author/_styles.html create mode 100644 application/api/view/author/index.html create mode 100644 application/api/view/author/orcid_required.html create mode 100644 application/api/view/author/report.html create mode 100644 application/api/view/author/select_orcid.html create mode 100644 application/common/service/AuthorBackgroundService.php diff --git a/application/api/controller/Author.php b/application/api/controller/Author.php index 2ea83479..5809357e 100644 --- a/application/api/controller/Author.php +++ b/application/api/controller/Author.php @@ -1,303 +1,394 @@ bgService = new AuthorBackgroundService(); + $this->authorInfo = new AuthorInfo(); + } + + /** + * 作者背调 HTML 页面入口 + * + * 1. 传了 ORCID → 直接生成报告 + * 2. 未传 ORCID + 姓氏(机构选填)→ 仅按姓名搜 ORCID;1 条直接报告,多条显示选择列表 + */ + public function index() + { + @set_time_limit(120); + + $formAction = $this->resolveFormAction(); + $params = $this->resolveBackgroundParams(); + $orcidNorm = $this->bgService->normalizeOrcid($params['orcid']); + + if ($orcidNorm === '' + && $params['last_name'] === '' + && $params['first_name'] === '' + && $params['institution'] === '' + ) { + $this->assign('form_action', $formAction); + return $this->fetch('author/index'); + } + + // 1. 有 ORCID → 直接报告页 + if ($orcidNorm !== '') { + return $this->renderReportPage($params, $formAction); + } + + // 2. 无 ORCID → 姓氏必填,机构选填 + if ($params['last_name'] === '') { + $this->assign([ + 'form_action' => $formAction, + 'error_msg' => '未填 ORCID 时,请填写姓氏', + 'last_name' => $params['last_name'], + 'first_name' => $params['first_name'], + 'institution' => $params['institution'], + ]); + return $this->fetch('author/index'); + } + + // 3. 仅按姓名搜 ORCID(机构只做排序校验) + $search = $this->bgService->searchOrcidCandidates( + $params['last_name'], + $params['first_name'], + $params['institution'] + ); + $candidates = $search['candidates'] ?? []; + + if (empty($candidates)) { + return $this->renderOrcidRequiredPage($params, $formAction, '已在 OpenAlex、ORCID 官网、Scopus 按姓名检索,未找到带 ORCID 的作者'); + } + + if (count($candidates) > 1) { + $this->assignCandidateListView($candidates, $params, $formAction); + return $this->fetch('author/select_orcid'); + } + + return $this->redirect($this->buildReportEntryUrl($formAction, $params, $candidates[0]['orcid'])); + } + + /** + * 医学期刊作者背景调查报告(ORCID 必填) + * + * POST/GET 参数: + * orcid / orcid_id ORCID(必填) + * lastName / last_name 姓(选填,用于 PubMed 辅助检索与报告展示) + * firstName / first_name 名(选填) + * institution / affiliation 机构(选填) + */ + public function background_report() + { + @set_time_limit(120); + + $params = $this->resolveBackgroundParams(); + $result = $this->bgService->buildReport( + $params['orcid'], + $params['last_name'], + $params['first_name'], + $params['institution'] + ); + + if (empty($result['ok'])) { + $code = !empty($result['need_select']) ? 2 : 0; + return json([ + 'code' => $code, + 'msg' => $result['msg'] ?? '查询失败', + 'data' => $result['data'] ?? null, + ]); + } + + return json([ + 'code' => 1, + 'msg' => 'success', + 'data' => $result['data'], + ]); + } + + /** camelCase 别名 */ + public function backgroundReport() + { + return $this->background_report(); + } + + /** 与 background_report 相同(路由兼容) */ + public function due_diligence() + { + return $this->background_report(); + } + + public function dueDiligence() + { + return $this->due_diligence(); + } + + /** + * OpenAlex + Crossref 诚信扫描(不依赖 ORCID 必填) + */ + public function background_check() + { + return $this->authorInfo->background_check(); + } + + public function backgroundCheck() + { + return $this->background_check(); + } + public function get_hindex() { - $name = trim(input('get.name')); - $affil = trim(input('get.affil')); - $debug = (int) input('get.debug', 0); - $cookieFile = tempnam(sys_get_temp_dir(), 'scopus_cookie_'); - - if (empty($name)) { - return json(['code' => 0, 'msg' => '请输入作者姓名']); - } - - // 1) 获取 freelookup 页面,用于拿到真实提交地址和隐藏字段。 - $lookupUrl = 'https://www.scopus.com/freelookup/form/author.uri?zone=TopNavBar&origin=NO%20ORIGIN%20DEFINED'; - $lookupRes = $this->httpRequest($lookupUrl, null, true, '', $cookieFile); - if (!$lookupRes['ok']) { - @unlink($cookieFile); - $ret = ['code' => 0, 'msg' => '访问 Scopus 失败:' . $lookupRes['msg']]; - if ($debug === 1) { - $ret['debug'] = $this->buildDebugInfo($lookupRes['url'], $lookupRes['http_code'], $lookupRes['body']); - } - return json($ret); - } - - $formInfo = $this->extractScopusLookupForm($lookupRes['body']); - if (empty($formInfo['action'])) { - @unlink($cookieFile); - $ret = ['code' => 0, 'msg' => 'Scopus 页面结构已变化,未找到查询表单']; - if ($debug === 1) { - $ret['debug'] = $this->buildDebugInfo($lookupRes['url'], $lookupRes['http_code'], $lookupRes['body']); - } - return json($ret); - } - - // 2) 组装查询参数(姓名 + 机构),并携带隐藏字段提交。 - $postData = $formInfo['hidden_fields']; - $postData['authLast'] = $name; - $postData['affil'] = $affil; - - $searchRes = $this->httpRequest($formInfo['action'], $postData, true, $lookupUrl, $cookieFile); - if (!$searchRes['ok']) { - @unlink($cookieFile); - $ret = ['code' => 0, 'msg' => '查询 Scopus 失败:' . $searchRes['msg']]; - if ($debug === 1) { - $ret['debug'] = $this->buildDebugInfo($searchRes['url'], $searchRes['http_code'], $searchRes['body']); - } - return json($ret); - } - - $blockMsg = $this->detectScopusBlocking($searchRes['body']); - if (!empty($blockMsg)) { - @unlink($cookieFile); - $ret = ['code' => 0, 'msg' => $blockMsg]; - $fallback = $this->fallbackByOpenAlex($name, $affil); - if ($fallback !== null) { - $ret = array_merge($fallback, [ - 'msg' => $blockMsg . ',已自动降级 OpenAlex 结果' - ]); - } - if ($debug === 1) { - $ret['debug'] = $this->buildDebugInfo($searchRes['url'], $searchRes['http_code'], $searchRes['body']); - } - return json($ret); - } - - // 3) 从返回页提取 h-index(优先匹配“h-index”关键词附近数字)。 - $hIndex = $this->extractHIndexFromHtml($searchRes['body']); - if ($hIndex === null) { - @unlink($cookieFile); - $ret = [ - 'code' => 0, - 'msg' => '未从 Scopus 结果页解析到 H 指数(可能需要人工登录或页面结构调整)' - ]; - if ($debug === 1) { - $ret['debug'] = $this->buildDebugInfo($searchRes['url'], $searchRes['http_code'], $searchRes['body']); - } - return json($ret); - } - - @unlink($cookieFile); - - $ret = [ - 'code' => 1, - 'name' => $name, - 'affil' => $affil, - 'h_index_scopus' => $hIndex, - 'source' => 'scopus_freelookup', - ]; - if ($debug === 1) { - $ret['debug'] = $this->buildDebugInfo($searchRes['url'], $searchRes['http_code'], $searchRes['body']); - } - return json($ret); + return $this->authorInfo->get_hindex(); } - private function httpRequest($url, $postData = null, $followLocation = true, $referer = '', $cookieFile = '') + public function getHindex() { - $ch = curl_init(); - $options = [ - CURLOPT_URL => $url, - CURLOPT_RETURNTRANSFER => true, - CURLOPT_SSL_VERIFYPEER => false, - CURLOPT_SSL_VERIFYHOST => false, - CURLOPT_FOLLOWLOCATION => $followLocation, - CURLOPT_MAXREDIRS => 8, - CURLOPT_TIMEOUT => 30, - CURLOPT_CONNECTTIMEOUT => 15, - CURLOPT_USERAGENT => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36', - CURLOPT_ENCODING => '', - CURLOPT_HTTPHEADER => [ - 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', - 'Accept-Language: zh-CN,zh;q=0.9,en;q=0.8', - ], - ]; - - if (!empty($referer)) { - $options[CURLOPT_REFERER] = $referer; - } - - if (!empty($cookieFile)) { - $options[CURLOPT_COOKIEJAR] = $cookieFile; - $options[CURLOPT_COOKIEFILE] = $cookieFile; - } - - if (is_array($postData)) { - $options[CURLOPT_POST] = true; - $options[CURLOPT_POSTFIELDS] = http_build_query($postData); - } - - curl_setopt_array($ch, $options); - $body = curl_exec($ch); - $error = curl_error($ch); - $httpCode = (int) curl_getinfo($ch, CURLINFO_HTTP_CODE); - $finalUrl = (string) curl_getinfo($ch, CURLINFO_EFFECTIVE_URL); - curl_close($ch); - - if ($error) { - if (strpos($error, 'Maximum (') !== false && strpos($error, 'redirects followed') !== false) { - return [ - 'ok' => false, - 'msg' => 'Scopus 跳转过多(可能触发登录/验证页面),请稍后重试或先在浏览器登录 Scopus', - 'body' => '', - 'http_code' => $httpCode, - 'url' => $finalUrl - ]; - } - return ['ok' => false, 'msg' => $error, 'body' => '', 'http_code' => $httpCode, 'url' => $finalUrl]; - } - - if ($httpCode >= 400 || $httpCode === 0) { - return ['ok' => false, 'msg' => 'HTTP ' . $httpCode, 'body' => (string) $body, 'http_code' => $httpCode, 'url' => $finalUrl]; - } - - return ['ok' => true, 'msg' => '', 'body' => (string) $body, 'http_code' => $httpCode, 'url' => $finalUrl]; + return $this->get_hindex(); } - private function detectScopusBlocking($html) + public function get_scopus_id() { - if (empty($html)) { + return $this->authorInfo->get_scopus_id(); + } + + public function getScopusId() + { + return $this->get_scopus_id(); + } + + public function check_scopus_cookie() + { + return $this->authorInfo->check_scopus_cookie(); + } + + public function checkScopusCookie() + { + return $this->check_scopus_cookie(); + } + + public function save_scopus_cookie() + { + return $this->authorInfo->save_scopus_cookie(); + } + + public function saveScopusCookie() + { + return $this->save_scopus_cookie(); + } + + public function login_scopus() + { + return $this->authorInfo->login_scopus(); + } + + public function loginScopus() + { + return $this->login_scopus(); + } + + public function check_elsevier_api() + { + if (method_exists($this->authorInfo, 'check_elsevier_api')) { + return $this->authorInfo->check_elsevier_api(); + } + return json(['code' => 0, 'msg' => 'check_elsevier_api not implemented']); + } + + public function checkElsevierApi() + { + return $this->check_elsevier_api(); + } + + /** + * 解析背调查询参数(兼容多种命名) + */ + private function resolveBackgroundParams() + { + $pick = function (...$keys) { + foreach ($keys as $k) { + $v = trim((string) input('param.' . $k, '')); + if ($v === '') { + $v = trim((string) input('post.' . $k, '')); + } + if ($v === '') { + $v = trim((string) input('get.' . $k, '')); + } + if ($v !== '') { + return $v; + } + } return ''; - } - - $text = strtolower(strip_tags($html)); - if (strpos($text, 'sign in') !== false || strpos($text, 'institutional sign in') !== false) { - return 'Scopus 返回登录页,当前环境未授权访问作者详情页面'; - } - if (strpos($text, 'captcha') !== false || strpos($text, 'are you a robot') !== false) { - return 'Scopus 触发了人机验证,当前接口无法自动通过'; - } - - return ''; - } - - private function buildDebugInfo($finalUrl, $httpCode, $html) - { - $normalized = html_entity_decode(strip_tags((string) $html), ENT_QUOTES, 'UTF-8'); - $normalized = preg_replace('/\s+/u', ' ', $normalized); - $snippet = mb_substr($normalized, 0, 300, 'UTF-8'); + }; return [ - 'final_url' => (string) $finalUrl, - 'http_code' => (int) $httpCode, - 'page_snippet' => $snippet, - 'contains_signin' => stripos($normalized, 'sign in') !== false ? 1 : 0, - 'contains_captcha' => stripos($normalized, 'captcha') !== false ? 1 : 0, + 'orcid' => $pick('orcid', 'orcid_id'), + 'last_name' => $pick('lastName', 'last_name', 'lastname', 'surname'), + 'first_name' => $pick('firstName', 'first_name', 'firstname', 'given_name'), + 'institution' => $pick('institution', 'affiliation', 'affil', 'org'), ]; } - private function extractScopusLookupForm($html) + private function resolveFormAction() { - $ret = [ - 'action' => '', - 'hidden_fields' => [], - ]; - - if (empty($html)) { - return $ret; - } - - // 优先定位包含 author 的 form,减少解析误匹配。 - if (preg_match('/