diff --git a/application/api/controller/ExpertFinder.php b/application/api/controller/ExpertFinder.php index fb37254..2078b9a 100644 --- a/application/api/controller/ExpertFinder.php +++ b/application/api/controller/ExpertFinder.php @@ -53,13 +53,58 @@ class ExpertFinder extends Base return jsonSuccess($result); } + /** + * 测试单个专家的国家解析(同步执行,立刻返回结果) + */ + public function cityTest(){ + $data = $this->request->post(); + $rule = new Validate([ + "expert_id"=>"require", + "aff"=>"require" + ]); + if(!$rule->check($data)){ + return jsonError($rule->getError()); + } + $service = new ExpertFinderService(); + $service->fillExpertCountry($data['expert_id'], $data['aff']); + $expert = Db::name('expert')->where('expert_id', intval($data['expert_id']))->find(); + return jsonSuccess([ + 'country_id' => isset($expert['country_id']) ? $expert['country_id'] : null, + 'country' => isset($expert['country']) ? $expert['country'] : null, + ]); + } + + /** + * 启动国家解析:找到第一个缺 country 的专家推入队列, + * 队列处理完后会自动链式找下一个,直到全部处理完。 + * 只需调一次即可。 + */ + public function batchFillCountry(){ + $service = new ExpertFinderService(); + $started = $service->enqueueNextCountryFill(0); + + $pending = Db::name('expert') + ->where('affiliation', '<>', '') + ->where(function ($q) { + $q->where('country_id', 0) + ->whereOr('country_id', 'null') + ->whereOr('country', ''); + }) + ->where('state', '<>', 5) + ->count(); + + return jsonSuccess([ + 'started' => $started, + 'pending' => $pending, + ]); + } + /** * Get experts from local database */ public function getList() { $field = trim($this->request->param('field', '')); - $majorId = intval($this->request->param('major_id', 0)); $state = $this->request->param('state', '-1'); $keyword = trim($this->request->param('keyword', '')); $noRecent = intval($this->request->param('no_recent', 0)); @@ -69,16 +114,13 @@ class ExpertFinder extends Base $minExperts = max(0, intval($this->request->param('min_experts', 50))); $query = Db::name('expert')->alias('e'); - $needJoin = ($field !== '' || $majorId > 0); + $needJoin = ($field !== ''); if ($needJoin) { $query->join('t_expert_field ef', 'ef.expert_id = e.expert_id AND ef.state = 0', 'inner'); if ($field !== '') { $query->where('ef.field', 'like', '%' . $field . '%'); } - if ($majorId > 0) { - $query->where('ef.major_id', $majorId); - } $query->group('e.expert_id'); } @@ -108,6 +150,7 @@ class ExpertFinder extends Base $item['fields'] = Db::name('expert_field') ->where('expert_id', $item['expert_id']) ->where('state', 0) + ->group('field') ->column('field'); } diff --git a/application/api/controller/ExpertManage.php b/application/api/controller/ExpertManage.php index 55cd51f..854c9da 100644 --- a/application/api/controller/ExpertManage.php +++ b/application/api/controller/ExpertManage.php @@ -261,7 +261,6 @@ class ExpertManage extends Base { $data = $this->request->post(); $expertId = intval(isset($data['expert_id']) ? $data['expert_id'] : 0); - $majorId = intval(isset($data['major_id']) ? $data['major_id'] : 0); $field = trim(isset($data['field']) ? $data['field'] : ''); if (!$expertId || $field === '') { @@ -279,7 +278,6 @@ class ExpertManage extends Base $id = Db::name('expert_field')->insertGetId([ 'expert_id' => $expertId, - 'major_id' => $majorId, 'field' => $field, 'state' => 0, ]); @@ -297,7 +295,16 @@ class ExpertManage extends Base return jsonError('expert_field_id is required'); } - Db::name('expert_field')->where('expert_field_id', $efId)->update(['state' => 1]); + // 若 t_expert_field 同时存“领域行 + 论文行”,删除领域时应同时软删该领域下所有论文行 + $row = Db::name('expert_field')->where('expert_field_id', $efId)->find(); + if ($row) { + Db::name('expert_field') + ->where('expert_id', intval($row['expert_id'])) + ->where('field', (string)$row['field']) + ->update(['state' => 1]); + } else { + Db::name('expert_field')->where('expert_field_id', $efId)->update(['state' => 1]); + } return jsonSuccess([]); } @@ -459,7 +466,7 @@ class ExpertManage extends Base /** * 批量保存专家领域 * @param int $expertId - * @param array $fields [{"major_id":1,"field":"xxx"}, ...] + * @param array $fields [{"field":"xxx"}, ...] */ private function saveExpertFields($expertId, $fields) { @@ -471,7 +478,6 @@ class ExpertManage extends Base } foreach ($fields as $f) { - $majorId = intval(isset($f['major_id']) ? $f['major_id'] : 0); $fieldName = trim(isset($f['field']) ? $f['field'] : ''); if ($fieldName === '') continue; @@ -484,7 +490,6 @@ class ExpertManage extends Base Db::name('expert_field')->insert([ 'expert_id' => $expertId, - 'major_id' => $majorId, 'field' => $fieldName, 'state' => 0, ]); diff --git a/application/api/job/PromotionPrepare.php b/application/api/job/PromotionPrepare.php new file mode 100644 index 0000000..6fc4424 --- /dev/null +++ b/application/api/job/PromotionPrepare.php @@ -0,0 +1,37 @@ +delete(); + return; + } + + try { + $result = $service->prepareTask($taskId); + $service->log('[PromotionPrepare] task=' . $taskId + . ' prepared=' . $result['prepared'] + . ' failed=' . $result['failed']); + } catch (\Exception $e) { + $service->log('[PromotionPrepare] task=' . $taskId . ' exception=' . $e->getMessage()); + } + + $job->delete(); + } +} diff --git a/application/common/CountryResolverService.php b/application/common/CountryResolverService.php new file mode 100644 index 0000000..5b83280 --- /dev/null +++ b/application/common/CountryResolverService.php @@ -0,0 +1,108 @@ + 'US', 'en_name' => 'United States'],任一可为空。 + */ +class CountryResolverService +{ + private $chatUrl = ''; + private $chatModel = ''; + private $apiKey = ''; + private $timeout = 60; + + public function __construct(array $config = []) + { + if (isset($config['chat_url'])) $this->chatUrl = (string)$config['chat_url']; + if (isset($config['chat_model'])) $this->chatModel = (string)$config['chat_model']; + if (isset($config['api_key'])) $this->apiKey = (string)$config['api_key']; + if (isset($config['timeout'])) $this->timeout = max(5, intval($config['timeout'])); + } + + public function resolve(string $affiliation): array + { + $affiliation = trim($affiliation); + if ($affiliation === '' || $this->chatUrl === '' || $this->chatModel === '') { + return []; + } + + $messages = [ + [ + 'role' => 'system', + 'content' => 'You extract the country from an academic affiliation string. Reply ONLY with minified JSON.', + ], + [ + 'role' => 'user', + 'content' => "Affiliation:\n" . $affiliation . "\n\nReturn JSON with keys:\n- code: ISO 3166-1 alpha-3 (preferred)\n- en_name: English country name (optional)\nIf uncertain, return {\"code\":\"\",\"en_name\":\"\"}.", + ], + ]; + + $payload = [ + 'model' => $this->chatModel, + 'temperature' => 0, + 'messages' => $messages, + ]; + + $ch = curl_init(); + curl_setopt($ch, CURLOPT_URL, $this->chatUrl); + curl_setopt($ch, CURLOPT_POST, true); + curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($payload, JSON_UNESCAPED_UNICODE)); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, min(10, $this->timeout)); + curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout); + + $headers = [ + 'Content-Type: application/json', + ]; + if ($this->apiKey !== '') { + $headers[] = 'Authorization: Bearer ' . $this->apiKey; + } + curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); + + $raw = curl_exec($ch); + if ($raw === false) { + curl_close($ch); + return []; + } + $httpCode = intval(curl_getinfo($ch, CURLINFO_HTTP_CODE)); + curl_close($ch); + if ($httpCode < 200 || $httpCode >= 300) { + return []; + } + + $data = json_decode($raw, true); + if (!is_array($data)) return []; + + // 兼容 OpenAI chat/completions 结构:choices[0].message.content + $content = ''; + if (isset($data['choices'][0]['message']['content'])) { + $content = (string)$data['choices'][0]['message']['content']; + } elseif (isset($data['content'])) { + $content = (string)$data['content']; + } + + $content = trim($content); + if ($content === '') return []; + + // 尝试提取 JSON(允许模型包裹 ```json) + if (preg_match('/\{.*\}/s', $content, $m)) { + $content = $m[0]; + } + + $obj = json_decode($content, true); + if (!is_array($obj)) return []; + + $code = isset($obj['code']) ? strtoupper(trim((string)$obj['code'])) : ''; + $enName = isset($obj['en_name']) ? trim((string)$obj['en_name']) : ''; + + // 防止模型乱回长段文本 + if (strlen($code) > 8) $code = ''; + if (strlen($enName) > 128) $enName = mb_substr($enName, 0, 128); + + return ['code' => $code, 'en_name' => $enName]; + } +} + diff --git a/application/common/ExpertFinderService.php b/application/common/ExpertFinderService.php index b11e975..1728685 100644 --- a/application/common/ExpertFinderService.php +++ b/application/common/ExpertFinderService.php @@ -3,7 +3,9 @@ namespace app\common; use think\Db; +use think\Queue; use GuzzleHttp\Client; +use think\Env; class ExpertFinderService { @@ -74,64 +76,105 @@ class ExpertFinderService $fieldEnrich = 0; foreach ($experts as $expert) { - - $email = strtolower(trim($expert['email'])); if (empty($email)) { continue; } - $exists = Db::name('expert')->where('email', $email)->find(); + $exists = Db::name('expert')->where('email', $email)->find(); + $expertId = null; if ($exists) { $existing++; - $fieldEnrich += $this->enrichExpertField($exists['expert_id'], $field); - continue; - } - - $insert = [ - 'name' => mb_substr($expert['name'], 0, 255), - 'email' => mb_substr($email, 0, 128), - 'affiliation' => mb_substr($expert['affiliation'], 0, 128), - 'source' => mb_substr($source, 0, 128), - 'ctime' => time(), - 'ltime' => 0, - 'state' => 0, - ]; - - try { - $expertId = Db::name('expert')->insertGetId($insert); - $this->enrichExpertField($expertId, $field); - if(isset($expert['papers'])&&is_array($expert['papers'])){ - $this->savePaper($expertId, $expert['papers']); + $expertId = intval($exists['expert_id']); + } else { + try { + $expertId = Db::name('expert')->insertGetId([ + 'name' => mb_substr($expert['name'], 0, 255), + 'email' => mb_substr($email, 0, 128), + 'affiliation' => mb_substr($expert['affiliation'], 0, 128), + 'source' => mb_substr($source, 0, 128), + 'ctime' => time(), + 'ltime' => 0, + 'state' => 0, + ]); + $inserted++; + } catch (\Exception $e) { + $existing++; + continue; } - $inserted++; - } catch (\Exception $e) { - $existing++; } + + $papers = (isset($expert['papers']) && is_array($expert['papers'])) ? $expert['papers'] : []; + $fieldEnrich += $this->saveFieldWithPapers($expertId, $field, $source, $papers); } return ['inserted' => $inserted, 'existing' => $existing, 'field_enriched' => $fieldEnrich]; } - private function savePaper($expertId, $papers) + /** + * 保存领域与论文的关联。 + * 有论文时:每篇论文一行(expert_id + field + source + paper_article_id 去重)。 + * 无论文时:只存一条领域行(expert_id + field 去重)。 + */ + private function saveFieldWithPapers($expertId, $field, $source, $papers) { - foreach ($papers as $paper){ - $check = Db::name('expert_paper')->where("expert_id",$expertId)->where('paper_article_id',$paper['article_id'])->find(); - if($check){ - continue; - } - $insert = [ - 'expert_id' => $expertId, - 'paper_title' => isset($paper['title'])?mb_substr($paper['title'], 0, 255):"", - 'paper_article_id' => $paper['article_id'] ?? 0, - 'paper_journal' => isset($paper['journal'])?mb_substr($paper['journal'], 0, 128):"", - 'ctime' => time(), - ]; - Db::name('expert_paper')->insert($insert); - } - } + $field = trim($field); + if (empty($field)) return 0; + $added = 0; + + if (empty($papers)) { + $exists = Db::name('expert_field') + ->where('expert_id', $expertId) + ->where('field', $field) + ->where('state', 0) + ->find(); + if (!$exists) { + Db::name('expert_field')->insert([ + 'expert_id' => $expertId, + 'source' => mb_substr((string)$source, 0, 64), + 'field' => mb_substr($field, 0, 128), + 'paper_title' => '', + 'paper_article_id' => '', + 'paper_journal' => '', + 'state' => 0, + ]); + $added = 1; + } + } else { + foreach ($papers as $paper) { + $articleId = isset($paper['article_id']) ? (string)$paper['article_id'] : ''; + if ($articleId === '' || $articleId === '0') { + continue; + } + + $check = Db::name('expert_field') + ->where('expert_id', $expertId) + ->where('field', $field) + ->where('source', $source) + ->where('paper_article_id', $articleId) + ->where('state', 0) + ->find(); + if ($check) { + continue; + } + + Db::name('expert_field')->insert([ + 'expert_id' => $expertId, + 'source' => mb_substr((string)$source, 0, 64), + 'paper_title' => isset($paper['title']) ? mb_substr((string)$paper['title'], 0, 255) : '', + 'paper_article_id' => mb_substr($articleId, 0, 64), + 'paper_journal' => isset($paper['journal']) ? mb_substr((string)$paper['journal'], 0, 255) : '', + 'field' => mb_substr($field, 0, 128), + 'state' => 0, + ]); + $added++; + } + } + + return $added; + } public function getFetchLog($field, $source) { @@ -536,25 +579,94 @@ class ExpertFinderService ]; } - // ==================== DB Helpers ==================== + // ==================== Country Resolution ==================== - private function enrichExpertField($expertId, $field) + /** + * 启动国家解析链:找到下一个缺国家的专家推入队列。 + * 队列 Job 处理完一个后会再调此方法,自动找下一个,直到全部处理完。 + * 控制器只需调一次即可。 + * + * @param int $delay 延迟秒数(防止打满模型,默认1秒) + * @return bool 是否成功推入了一条 + */ + public function enqueueNextCountryFill($delay = 1) { - $field = trim($field); - if (empty($field)) return 0; - - $exists = Db::name('expert_field') - ->where('expert_id', $expertId) - ->where('field', $field) - ->where('state', 0) + $row = Db::name('expert') + ->where('affiliation', '<>', '') + ->where(function ($q) { + $q->where('country_id', 0) + ->whereOr('country_id', 'null') + ->whereOr('country', ''); + }) + ->where('state', '<>', 5) + ->field('expert_id, affiliation') + ->order('expert_id asc') ->find(); - if ($exists) return 0; - Db::name('expert_field')->insert([ - 'expert_id' => $expertId, - 'field' => mb_substr($field, 0, 128), - 'state' => 0, + + if (!$row) { + $this->log('[CountryFill] no more pending experts'); + return false; + } + + $data = [ + 'expert_id' => intval($row['expert_id']), + 'affiliation' => trim((string)$row['affiliation']), + ]; + + if ($delay > 0) { + Queue::later($delay, 'app\api\job\FillExpertCountry@fire', $data, 'FetchExperts'); + } else { + Queue::push('app\api\job\FillExpertCountry@fire', $data, 'FetchExperts'); + } + + return true; + } + + /** + * 对单个专家执行国家解析(同步),由队列 Job FillExpertCountry 调用,也可直接调用测试。 + */ + public function fillExpertCountry($expertId, $affiliation) + { + $affiliation = trim((string)$affiliation); + if ($affiliation === '') return; + + $resolver = new CountryResolverService([ + 'chat_url' => trim((string)Env::get('expert_country_chat_url', Env::get('citation_chat_url', 'http://chat.taimed.cn/v1/chat/completions'))), + 'chat_model' => trim((string)Env::get('expert_country_chat_model', Env::get('citation_chat_model', 'gpt-4.1'))), + 'api_key' => trim((string)Env::get('expert_country_chat_api_key', Env::get('citation_chat_api_key', ''))), + 'timeout' => max(20, intval(Env::get('expert_country_chat_timeout', 60))), ]); - return 1; + + $result = $resolver->resolve($affiliation); + if (empty($result)) return; + + $countryId = 0; + $enName = ''; + + if (!empty($result['code'])) { + $row = Db::name('country')->where('code', strtoupper(trim((string)$result['code'])))->find(); + if ($row) { + $countryId = intval($row['country_id']); + $enName = (string)$row['en_name']; + } + } + + if ($countryId === 0 && !empty($result['en_name'])) { + $row = Db::name('country') + ->whereRaw("LOWER(en_name) = ?", [strtolower(trim((string)$result['en_name']))]) + ->find(); + if ($row) { + $countryId = intval($row['country_id']); + $enName = (string)$row['en_name']; + } + } + + if ($countryId > 0 && $enName !== '') { + Db::name('expert')->where('expert_id', intval($expertId))->update([ + 'country_id' => $countryId, + 'country' => $enName, + ]); + } } // ==================== Text Helpers ==================== diff --git a/application/common/ProductionArticleRefer.php b/application/common/ProductionArticleRefer.php new file mode 100644 index 0000000..706ec8e --- /dev/null +++ b/application/common/ProductionArticleRefer.php @@ -0,0 +1,303 @@ +~`|^]+/i'; + + // 错误码与错误信息映射(标准化错误处理) + private const ERROR_CODES = [ + 'EMPTY_STRING' => 'Input string is empty (preprocessed))', + 'NO_MATCH' => 'No valid DOI detected', + 'INVALID_AFTER_CLEAN' => 'No effective DOI after cleaning', + 'FORCE_EXTRACT_FAILED' => 'Forced extraction still has no valid DOI', + 'EXTRACTION_EXCEPTION' => 'Exception occurred during DOI extraction process', + ]; + + /** + * 获取未处理的参考文献 + * + * @return void + */ + public function top($aParam = []) { + + //文章ID + $iArticleId = empty($aParam['article_id']) ? '' : $aParam['article_id']; + if(empty($iArticleId)){ + return json_encode(array('status' => 2,'msg' => 'Please select an article'.json_encode($aParam) )); + } + // 获取生产文章ID + $iPArticleId = empty($aParam['p_article_id']) ? 0 : $aParam['p_article_id']; + if(empty($iPArticleId)) { + return json_encode(array('status' => 2,'msg' => 'Please select an production article'.json_encode($aParam) )); + } + + //查询未处理过的数据 + $aWhere = ['p_article_id' => $iPArticleId,'article_id' => $iArticleId,'state' => 0,'refer_doi' => ['<>',''],'is_deal' => 2]; + $aResult = Db::name('production_article_refer')->field('article_id,p_article_id,p_refer_id,refer_doi')->where($aWhere)->select(); + if(empty($aResult)){ + return json_encode(array('status' => 2,'msg' => 'The reference data to be processed is empty'.json_encode($aParam))); + } + + //数据处理 + foreach ($aResult as $key => $value) { + if(empty($value['refer_doi'])){ + continue; + } + //调用获取参考文献详情队列 + \think\Queue::push('app\api\job\ArticleReferDetailQueue@fire', $value, 'ArticleReferDetailQueue'); + } + return json_encode(['status' => 1,'msg' => 'Add to reference processing queue']); + } + /** + * 处理参考文献 + * + * @return void + */ + public function get($aParam = []) { + // 获取生产文章ID + $iPReferId = empty($aParam['p_refer_id']) ? 0 : $aParam['p_refer_id']; + if(empty($iPReferId)) { + return json_encode(array('status' => 2,'msg' => 'Please select a reference'.json_encode($aParam) )); + } + // 获取生产文章ID + $iPArticleId = empty($aParam['p_article_id']) ? 0 : $aParam['p_article_id']; + if(empty($iPArticleId)) { + return json_encode(array('status' => 2,'msg' => 'Please select an production article'.json_encode($aParam) )); + } + //查询未处理过的数据 + $aWhere = ['p_refer_id' => $iPReferId,'p_article_id' => $iPArticleId,'state' => 0]; + $aRefer = Db::name('production_article_refer')->field('refer_doi,refer_content')->where($aWhere)->find(); + if(empty($aRefer)){ + return json_encode(array('status' => 2,'msg' => 'No reference records found'.json_encode($aParam))); + } + if(empty($aRefer['refer_doi'])){ + return json_encode(['status' => 4,'msg' => 'Reference DOI is empty'.json_encode($aParam)]); + } + + //数据处理 + $doi = str_replace('/', '%2F', $aRefer['refer_doi']); + $url = "https://citation.doi.org/format?doi=$doi&style=cancer-translational-medicine&lang=en-US"; + $res = myGet($url); + $frag = trim(substr($res, strpos($res, '.') + 1)); + if(empty($frag)){ + $aUpdate = ['refer_frag' => $aRefer['refer_content'],'refer_type' => 'other','is_deal' => 1,'update_time' => time()]; + $aWhere = ['p_refer_id' => $iPReferId]; + $result = Db::name('production_article_refer')->where($aWhere)->limit(1)->update($aUpdate); + //写入通过AI获取参考文献详情队列 + // \think\Queue::push('app\api\job\AiCheckReferByDoi@fire',$aParam,'AiCheckReferByDoi'); + return json_encode(array('status' => 2,'msg' => 'The data obtained from the interface is empty'.$url)); + } + + //整理数据入库 + $update = []; + if (mb_substr_count($frag, '.') != 3){ + $f = $frag . " Available at: " . PHP_EOL . "https://doi.org/" . $aRefer['refer_doi']; + $update['refer_type'] = "other"; + $update['refer_frag'] = $f; + $update['cs'] = 1; + //写入通过AI获取参考文献详情队列 + // \think\Queue::push('app\api\job\AiCheckReferByDoi@fire',$aParam,'AiCheckReferByDoi'); + } + if (mb_substr_count($frag, '.') == 3){ + $res = explode('.', $frag); + $update['author'] = prgeAuthor($res[0]); + $update['title'] = trim($res[1]); + $bj = bekjournal($res[2]); + $joura = formateJournal(trim($bj[0])); + $update['joura'] = $joura; + $is_js = 0; + if ($joura == trim($bj[0])) { + } + $update['refer_type'] = "journal"; + $update['is_ja'] = $joura == trim($bj[0]) ? 0 : 1; + $update['dateno'] = str_replace(' ', '', str_replace('-', '–', trim($bj[1]))); + //新增处理 期卷页码 20251127 start + if(!empty($update['dateno'])){ + $sStr = $update['dateno']; + $aStr = explode(':', $sStr); + if(!empty($aStr[1])){ + $parts = explode('–', $aStr[1]); + if(count($parts) == 2){ + $prefix = empty($parts[0]) ? 0 : intval($parts[0]); + $suffix = empty($parts[1]) ? 0 : intval($parts[1]); + if($prefix > $suffix){ + $prefixLen = strlen($prefix); + $suffixLen = strlen($suffix); + $missingLen = $prefixLen - $suffixLen; + if ($missingLen > 0) { + $fillPart = substr($prefix, 0, $missingLen); + $newSuffix = $fillPart . $suffix; + $update['dateno'] = $aStr[0].':'.$prefix.'–'.$newSuffix; + } + } + } + } + // if(empty($aStr[1])){ + // //写入通过AI获取参考文献详情队列 + // \think\Queue::push('app\api\job\AiCheckReferByDoi@fire',$aParam,'AiCheckReferByDoi'); + // } + } + //新增处理 期卷页码 20251127 end + $update['doilink'] = strpos($aRefer['refer_doi'],"http")===false?"https://doi.org/" . $aRefer['refer_doi']:$aRefer['refer_doi']; + $update['cs'] = 1; + } + //数据库更新 + if(empty($update)){ + return json_encode(array('status' => 3,'msg' => 'Update data to empty'.$url.'====='.$frag)); + } + $aWhere = ['p_refer_id' => $iPReferId]; + $update += ['is_deal' => 1,'update_time' => time()]; + $result = Db::name('production_article_refer')->where($aWhere)->limit(1)->update($update); + if($result === false){ + return json_encode(array('status' => 3,'msg' => 'Update failed'.json_encode($update))); + } + return json_encode(['status' => 1,'msg' => 'Update successful']); + } + + // /** + // * 实例方法:提取单个DOI(核心逻辑,生产级优化) + // * @param string $str 待检测字符串 + // * @param bool $standardize 是否标准化DOI(转小写) + // * @param bool $forceExtract 是否强制提取(忽略微小格式瑕疵) + // * @return array 提取结果(含错误码、错误信息、DOI) + // */ + // // public function extractDoiFromString(string $str, bool $standardize = true, bool $forceExtract = false): array + // // { + // // // 初始化标准化结果 + // // $result = [ + // // 'has_doi' => false, + // // 'doi' => null, + // // 'error_code' => null, + // // 'error_msg' => null, + // // ]; + + // // try { + // // // 严格类型校验(防止非字符串参数传入) + // // if (!is_string($str)) { + // // throw new InvalidArgumentException('输入参数必须为字符串类型', 1001); + // // } + // // // 字符串预处理(生产级:全角转半角、URL解码、HTML标签移除等) + // // $processedStr = $this->preprocessString($str); + // // if (trim($processedStr) === '') { + // // $result['error_code'] = 'EMPTY_STRING'; + // // $result['error_msg'] = self::ERROR_CODES['EMPTY_STRING']; + // // return $result; + // // } + + // // // 性能优化:用preg_match仅匹配首个DOI,替代preg_match_all + // // // 优化后的带前缀版正则 + // // $pattern = '/(?:doi[:\s]*|DOI[:\s]*)?\b10\.\d+(?:\.\d+)*\/[a-zA-Z0-9._\-!()%\/:;@$&+=?#[\]<>~`|^'"{},\\\\]+(?![\w?#])/i"; + // // if (!preg_match($pattern, $processedStr, $match)) { + // // $result['error_code'] = 'NO_MATCH'; + // // $result['error_msg'] = self::ERROR_CODES['NO_MATCH']; + // // return $result; + // // } + + // // // 清洗并验证首个DOI + // // $cleanDoi = $this->cleanAndValidateDoi($match[0], $standardize, $forceExtract); + // // if ($cleanDoi !== null) { + // // $result['has_doi'] = true; + // // $result['doi'] = $cleanDoi; + // // } else { + // // // 根据是否强制提取设置错误信息 + // // $errorKey = $forceExtract ? 'FORCE_EXTRACT_FAILED' : 'INVALID_AFTER_CLEAN'; + // // $result['error_code'] = $errorKey; + // // $result['error_msg'] = self::ERROR_CODES[$errorKey]; + // // } + + // // } catch (InvalidArgumentException $e) { + // // // 业务异常:标准化错误码和信息 + // // $result['error_code'] = 'INVALID_PARAM'; + // // $result['error_msg'] = '参数错误:' . $e->getMessage(); + // // } catch (Exception $e) { + // // // 系统异常:隐藏敏感信息,记录通用错误 + // // $result['error_code'] = 'EXTRACTION_EXCEPTION'; + // // $result['error_msg'] = self::ERROR_CODES['EXTRACTION_EXCEPTION'] . ':' . $e->getMessage(); + // // } + + // // return $result; + // // } + + // // /** + // // * 字符串预处理(生产级:覆盖所有编码/格式干扰场景) + // // * @param string $str 原始字符串 + // // * @return string 预处理后的纯净字符串 + // // */ + // // private function preprocessString(string $str): string + // // { + // // // 1. 全角转半角(解决中文全角字符干扰,如10.1007/s11042-020-10103-4) + // // $str = $this->fullWidthToHalfWidth($str); + // // // 2. 移除所有HTML标签(解决网页文本中DOI被