0, 'msg' => '请输入作者姓名']); } // 1) 获取 freelookup 页面,用于拿到真实提交地址和隐藏字段。 $lookupUrl = 'https://www.scopus.com/freelookup/form/author.uri?zone=TopNavBar&origin=NO%20ORIGIN%20DEFINED'; $lookupRes = $this->httpRequest($lookupUrl, null, true, '', $cookieFile); if (!$lookupRes['ok']) { @unlink($cookieFile); $ret = ['code' => 0, 'msg' => '访问 Scopus 失败:' . $lookupRes['msg']]; if ($debug === 1) { $ret['debug'] = $this->buildDebugInfo($lookupRes['url'], $lookupRes['http_code'], $lookupRes['body']); } return json($ret); } $formInfo = $this->extractScopusLookupForm($lookupRes['body']); if (empty($formInfo['action'])) { @unlink($cookieFile); $ret = ['code' => 0, 'msg' => 'Scopus 页面结构已变化,未找到查询表单']; if ($debug === 1) { $ret['debug'] = $this->buildDebugInfo($lookupRes['url'], $lookupRes['http_code'], $lookupRes['body']); } return json($ret); } // 2) 组装查询参数(姓名 + 机构),并携带隐藏字段提交。 $postData = $formInfo['hidden_fields']; $postData['authLast'] = $name; $postData['affil'] = $affil; $searchRes = $this->httpRequest($formInfo['action'], $postData, true, $lookupUrl, $cookieFile); if (!$searchRes['ok']) { @unlink($cookieFile); $ret = ['code' => 0, 'msg' => '查询 Scopus 失败:' . $searchRes['msg']]; if ($debug === 1) { $ret['debug'] = $this->buildDebugInfo($searchRes['url'], $searchRes['http_code'], $searchRes['body']); } return json($ret); } $blockMsg = $this->detectScopusBlocking($searchRes['body']); if (!empty($blockMsg)) { @unlink($cookieFile); $ret = ['code' => 0, 'msg' => $blockMsg]; $fallback = $this->fallbackByOpenAlex($name, $affil); if ($fallback !== null) { $ret = array_merge($fallback, [ 'msg' => $blockMsg . ',已自动降级 OpenAlex 结果' ]); } if ($debug === 1) { $ret['debug'] = $this->buildDebugInfo($searchRes['url'], $searchRes['http_code'], $searchRes['body']); } return json($ret); } // 3) 从返回页提取 h-index(优先匹配“h-index”关键词附近数字)。 $hIndex = $this->extractHIndexFromHtml($searchRes['body']); if ($hIndex === null) { @unlink($cookieFile); $ret = [ 'code' => 0, 'msg' => '未从 Scopus 结果页解析到 H 指数(可能需要人工登录或页面结构调整)' ]; if ($debug === 1) { $ret['debug'] = $this->buildDebugInfo($searchRes['url'], $searchRes['http_code'], $searchRes['body']); } return json($ret); } @unlink($cookieFile); $ret = [ 'code' => 1, 'name' => $name, 'affil' => $affil, 'h_index_scopus' => $hIndex, 'source' => 'scopus_freelookup', ]; if ($debug === 1) { $ret['debug'] = $this->buildDebugInfo($searchRes['url'], $searchRes['http_code'], $searchRes['body']); } return json($ret); } private function httpRequest($url, $postData = null, $followLocation = true, $referer = '', $cookieFile = '') { $ch = curl_init(); $options = [ CURLOPT_URL => $url, CURLOPT_RETURNTRANSFER => true, CURLOPT_SSL_VERIFYPEER => false, CURLOPT_SSL_VERIFYHOST => false, CURLOPT_FOLLOWLOCATION => $followLocation, CURLOPT_MAXREDIRS => 8, CURLOPT_TIMEOUT => 30, CURLOPT_CONNECTTIMEOUT => 15, CURLOPT_USERAGENT => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36', CURLOPT_ENCODING => '', CURLOPT_HTTPHEADER => [ 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language: zh-CN,zh;q=0.9,en;q=0.8', ], ]; if (!empty($referer)) { $options[CURLOPT_REFERER] = $referer; } if (!empty($cookieFile)) { $options[CURLOPT_COOKIEJAR] = $cookieFile; $options[CURLOPT_COOKIEFILE] = $cookieFile; } if (is_array($postData)) { $options[CURLOPT_POST] = true; $options[CURLOPT_POSTFIELDS] = http_build_query($postData); } curl_setopt_array($ch, $options); $body = curl_exec($ch); $error = curl_error($ch); $httpCode = (int) curl_getinfo($ch, CURLINFO_HTTP_CODE); $finalUrl = (string) curl_getinfo($ch, CURLINFO_EFFECTIVE_URL); curl_close($ch); if ($error) { if (strpos($error, 'Maximum (') !== false && strpos($error, 'redirects followed') !== false) { return [ 'ok' => false, 'msg' => 'Scopus 跳转过多(可能触发登录/验证页面),请稍后重试或先在浏览器登录 Scopus', 'body' => '', 'http_code' => $httpCode, 'url' => $finalUrl ]; } return ['ok' => false, 'msg' => $error, 'body' => '', 'http_code' => $httpCode, 'url' => $finalUrl]; } if ($httpCode >= 400 || $httpCode === 0) { return ['ok' => false, 'msg' => 'HTTP ' . $httpCode, 'body' => (string) $body, 'http_code' => $httpCode, 'url' => $finalUrl]; } return ['ok' => true, 'msg' => '', 'body' => (string) $body, 'http_code' => $httpCode, 'url' => $finalUrl]; } private function detectScopusBlocking($html) { if (empty($html)) { return ''; } $text = strtolower(strip_tags($html)); if (strpos($text, 'sign in') !== false || strpos($text, 'institutional sign in') !== false) { return 'Scopus 返回登录页,当前环境未授权访问作者详情页面'; } if (strpos($text, 'captcha') !== false || strpos($text, 'are you a robot') !== false) { return 'Scopus 触发了人机验证,当前接口无法自动通过'; } return ''; } private function buildDebugInfo($finalUrl, $httpCode, $html) { $normalized = html_entity_decode(strip_tags((string) $html), ENT_QUOTES, 'UTF-8'); $normalized = preg_replace('/\s+/u', ' ', $normalized); $snippet = mb_substr($normalized, 0, 300, 'UTF-8'); return [ 'final_url' => (string) $finalUrl, 'http_code' => (int) $httpCode, 'page_snippet' => $snippet, 'contains_signin' => stripos($normalized, 'sign in') !== false ? 1 : 0, 'contains_captcha' => stripos($normalized, 'captcha') !== false ? 1 : 0, ]; } private function extractScopusLookupForm($html) { $ret = [ 'action' => '', 'hidden_fields' => [], ]; if (empty($html)) { return $ret; } // 优先定位包含 author 的 form,减少解析误匹配。 if (preg_match('/