diff --git a/.env b/.env index b90922aa..fdc27593 100644 --- a/.env +++ b/.env @@ -85,6 +85,9 @@ citation_chat_model = qwen2.5:7b citation_chat_api_key = citation_chat_timeout = 120 +[scopus] +api_key = 54f915fc96e86b94ae8722ce7fdd69c7 + [emailtemplete] pre = ' diff --git a/application/api/controller/Author.php b/application/api/controller/Author.php index 2ea83479..99b22daa 100644 --- a/application/api/controller/Author.php +++ b/application/api/controller/Author.php @@ -1,303 +1,302 @@ 0, 'msg' => '请输入作者姓名']); - } - - // 1) 获取 freelookup 页面,用于拿到真实提交地址和隐藏字段。 - $lookupUrl = 'https://www.scopus.com/freelookup/form/author.uri?zone=TopNavBar&origin=NO%20ORIGIN%20DEFINED'; - $lookupRes = $this->httpRequest($lookupUrl, null, true, '', $cookieFile); - if (!$lookupRes['ok']) { - @unlink($cookieFile); - $ret = ['code' => 0, 'msg' => '访问 Scopus 失败:' . $lookupRes['msg']]; - if ($debug === 1) { - $ret['debug'] = $this->buildDebugInfo($lookupRes['url'], $lookupRes['http_code'], $lookupRes['body']); - } - return json($ret); - } - - $formInfo = $this->extractScopusLookupForm($lookupRes['body']); - if (empty($formInfo['action'])) { - @unlink($cookieFile); - $ret = ['code' => 0, 'msg' => 'Scopus 页面结构已变化,未找到查询表单']; - if ($debug === 1) { - $ret['debug'] = $this->buildDebugInfo($lookupRes['url'], $lookupRes['http_code'], $lookupRes['body']); - } - return json($ret); - } - - // 2) 组装查询参数(姓名 + 机构),并携带隐藏字段提交。 - $postData = $formInfo['hidden_fields']; - $postData['authLast'] = $name; - $postData['affil'] = $affil; - - $searchRes = $this->httpRequest($formInfo['action'], $postData, true, $lookupUrl, $cookieFile); - if (!$searchRes['ok']) { - @unlink($cookieFile); - $ret = ['code' => 0, 'msg' => '查询 Scopus 失败:' . $searchRes['msg']]; - if ($debug === 1) { - $ret['debug'] = $this->buildDebugInfo($searchRes['url'], $searchRes['http_code'], $searchRes['body']); - } - return json($ret); - } - - $blockMsg = $this->detectScopusBlocking($searchRes['body']); - if (!empty($blockMsg)) { - @unlink($cookieFile); - $ret = ['code' => 0, 'msg' => $blockMsg]; - $fallback = $this->fallbackByOpenAlex($name, $affil); - if ($fallback !== null) { - $ret = array_merge($fallback, [ - 'msg' => $blockMsg . ',已自动降级 OpenAlex 结果' - ]); - } - if ($debug === 1) { - $ret['debug'] = $this->buildDebugInfo($searchRes['url'], $searchRes['http_code'], $searchRes['body']); - } - return json($ret); - } - - // 3) 从返回页提取 h-index(优先匹配“h-index”关键词附近数字)。 - $hIndex = $this->extractHIndexFromHtml($searchRes['body']); - if ($hIndex === null) { - @unlink($cookieFile); - $ret = [ - 'code' => 0, - 'msg' => '未从 Scopus 结果页解析到 H 指数(可能需要人工登录或页面结构调整)' - ]; - if ($debug === 1) { - $ret['debug'] = $this->buildDebugInfo($searchRes['url'], $searchRes['http_code'], $searchRes['body']); - } - return json($ret); - } - - @unlink($cookieFile); - - $ret = [ - 'code' => 1, - 'name' => $name, - 'affil' => $affil, - 'h_index_scopus' => $hIndex, - 'source' => 'scopus_freelookup', - ]; - if ($debug === 1) { - $ret['debug'] = $this->buildDebugInfo($searchRes['url'], $searchRes['http_code'], $searchRes['body']); - } - return json($ret); + parent::__construct($request); + $this->bgService = new AuthorBackgroundService(); } - private function httpRequest($url, $postData = null, $followLocation = true, $referer = '', $cookieFile = '') + /** + * 作者背调 HTML 页面入口 + * + * 1. 传了 ORCID → 直接生成报告 + * 2. 未传 ORCID + 姓氏(机构选填)→ 仅按姓名搜 ORCID;1 条直接报告,多条显示选择列表 + */ + public function index() { - $ch = curl_init(); - $options = [ - CURLOPT_URL => $url, - CURLOPT_RETURNTRANSFER => true, - CURLOPT_SSL_VERIFYPEER => false, - CURLOPT_SSL_VERIFYHOST => false, - CURLOPT_FOLLOWLOCATION => $followLocation, - CURLOPT_MAXREDIRS => 8, - CURLOPT_TIMEOUT => 30, - CURLOPT_CONNECTTIMEOUT => 15, - CURLOPT_USERAGENT => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36', - CURLOPT_ENCODING => '', - CURLOPT_HTTPHEADER => [ - 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', - 'Accept-Language: zh-CN,zh;q=0.9,en;q=0.8', - ], - ]; + @set_time_limit(120); - if (!empty($referer)) { - $options[CURLOPT_REFERER] = $referer; + $formAction = $this->resolveFormAction(); + $params = $this->resolveBackgroundParams(); + $orcidNorm = $this->bgService->normalizeOrcid($params['orcid']); + + if ($orcidNorm === '' + && $params['last_name'] === '' + && $params['first_name'] === '' + && $params['institution'] === '' + ) { + $this->assign('form_action', $formAction); + return $this->fetch('author/index'); } - if (!empty($cookieFile)) { - $options[CURLOPT_COOKIEJAR] = $cookieFile; - $options[CURLOPT_COOKIEFILE] = $cookieFile; + // 1. 有 ORCID → 直接报告页 + if ($orcidNorm !== '') { + return $this->renderReportPage($params, $formAction); } - if (is_array($postData)) { - $options[CURLOPT_POST] = true; - $options[CURLOPT_POSTFIELDS] = http_build_query($postData); + // 2. 无 ORCID → 姓氏必填,机构选填 + if ($params['last_name'] === '') { + $this->assign([ + 'form_action' => $formAction, + 'error_msg' => '未填 ORCID 时,请填写姓氏', + 'last_name' => $params['last_name'], + 'first_name' => $params['first_name'], + 'institution' => $params['institution'], + ]); + return $this->fetch('author/index'); } - curl_setopt_array($ch, $options); - $body = curl_exec($ch); - $error = curl_error($ch); - $httpCode = (int) curl_getinfo($ch, CURLINFO_HTTP_CODE); - $finalUrl = (string) curl_getinfo($ch, CURLINFO_EFFECTIVE_URL); - curl_close($ch); + // 3. 仅按姓名搜 ORCID(机构只做排序校验) + $search = $this->bgService->searchOrcidCandidates( + $params['last_name'], + $params['first_name'], + $params['institution'] + ); + $candidates = $search['candidates'] ?? []; - if ($error) { - if (strpos($error, 'Maximum (') !== false && strpos($error, 'redirects followed') !== false) { - return [ - 'ok' => false, - 'msg' => 'Scopus 跳转过多(可能触发登录/验证页面),请稍后重试或先在浏览器登录 Scopus', - 'body' => '', - 'http_code' => $httpCode, - 'url' => $finalUrl - ]; - } - return ['ok' => false, 'msg' => $error, 'body' => '', 'http_code' => $httpCode, 'url' => $finalUrl]; + if (empty($candidates)) { + return $this->renderOrcidRequiredPage($params, $formAction, '已在 OpenAlex、ORCID 官网、Scopus 按姓名检索,未找到带 ORCID 的作者'); } - if ($httpCode >= 400 || $httpCode === 0) { - return ['ok' => false, 'msg' => 'HTTP ' . $httpCode, 'body' => (string) $body, 'http_code' => $httpCode, 'url' => $finalUrl]; + if (count($candidates) > 1) { + $this->assignCandidateListView($candidates, $params, $formAction); + return $this->fetch('author/select_orcid'); } - return ['ok' => true, 'msg' => '', 'body' => (string) $body, 'http_code' => $httpCode, 'url' => $finalUrl]; + return $this->redirect($this->buildReportEntryUrl($formAction, $params, $candidates[0]['orcid'])); } - private function detectScopusBlocking($html) + /** + * 医学期刊作者背景调查报告(ORCID 必填) + * + * POST/GET 参数: + * orcid / orcid_id ORCID(必填) + * lastName / last_name 姓(选填,用于 PubMed 辅助检索与报告展示) + * firstName / first_name 名(选填) + * institution / affiliation 机构(选填) + */ + public function background_report() { - if (empty($html)) { + @set_time_limit(120); + + $params = $this->resolveBackgroundParams(); + $result = $this->bgService->buildReport( + $params['orcid'], + $params['last_name'], + $params['first_name'], + $params['institution'] + ); + + if (empty($result['ok'])) { + $code = !empty($result['need_select']) ? 2 : 0; + return json([ + 'code' => $code, + 'msg' => $result['msg'] ?? '查询失败', + 'data' => $result['data'] ?? null, + ]); + } + + return json([ + 'code' => 1, + 'msg' => 'success', + 'data' => $result['data'], + ]); + } + + /** 与 background_report 相同(路由兼容) */ + public function due_diligence() + { + return $this->background_report(); + } + + /** + * 解析背调查询参数(兼容多种命名) + */ + private function resolveBackgroundParams() + { + $pick = function (...$keys) { + foreach ($keys as $k) { + $v = trim((string) input('param.' . $k, '')); + if ($v === '') { + $v = trim((string) input('post.' . $k, '')); + } + if ($v === '') { + $v = trim((string) input('get.' . $k, '')); + } + if ($v !== '') { + return $v; + } + } return ''; - } - - $text = strtolower(strip_tags($html)); - if (strpos($text, 'sign in') !== false || strpos($text, 'institutional sign in') !== false) { - return 'Scopus 返回登录页,当前环境未授权访问作者详情页面'; - } - if (strpos($text, 'captcha') !== false || strpos($text, 'are you a robot') !== false) { - return 'Scopus 触发了人机验证,当前接口无法自动通过'; - } - - return ''; - } - - private function buildDebugInfo($finalUrl, $httpCode, $html) - { - $normalized = html_entity_decode(strip_tags((string) $html), ENT_QUOTES, 'UTF-8'); - $normalized = preg_replace('/\s+/u', ' ', $normalized); - $snippet = mb_substr($normalized, 0, 300, 'UTF-8'); + }; return [ - 'final_url' => (string) $finalUrl, - 'http_code' => (int) $httpCode, - 'page_snippet' => $snippet, - 'contains_signin' => stripos($normalized, 'sign in') !== false ? 1 : 0, - 'contains_captcha' => stripos($normalized, 'captcha') !== false ? 1 : 0, + 'orcid' => $pick('orcid', 'orcid_id'), + 'last_name' => $pick('lastName', 'last_name', 'lastname', 'surname'), + 'first_name' => $pick('firstName', 'first_name', 'firstname', 'given_name'), + 'institution' => $pick('institution', 'affiliation', 'affil', 'org'), ]; } - private function extractScopusLookupForm($html) + private function resolveFormAction() { - $ret = [ - 'action' => '', - 'hidden_fields' => [], - ]; - - if (empty($html)) { - return $ret; - } - - // 优先定位包含 author 的 form,减少解析误匹配。 - if (preg_match('/