~`|^]+/i'; // 错误码与错误信息映射(标准化错误处理) private const ERROR_CODES = [ 'EMPTY_STRING' => 'Input string is empty (preprocessed))', 'NO_MATCH' => 'No valid DOI detected', 'INVALID_AFTER_CLEAN' => 'No effective DOI after cleaning', 'FORCE_EXTRACT_FAILED' => 'Forced extraction still has no valid DOI', 'EXTRACTION_EXCEPTION' => 'Exception occurred during DOI extraction process', ]; /** * 获取未处理的参考文献 * * @return void */ public function top($aParam = []) { //文章ID $iArticleId = empty($aParam['article_id']) ? '' : $aParam['article_id']; if(empty($iArticleId)){ return json_encode(array('status' => 2,'msg' => 'Please select an article'.json_encode($aParam) )); } // 获取生产文章ID $iPArticleId = empty($aParam['p_article_id']) ? 0 : $aParam['p_article_id']; if(empty($iPArticleId)) { return json_encode(array('status' => 2,'msg' => 'Please select an production article'.json_encode($aParam) )); } //查询未处理过的数据 $aWhere = ['p_article_id' => $iPArticleId,'article_id' => $iArticleId,'state' => 0,'refer_doi' => ['<>',''],'is_deal' => 2]; $aResult = Db::name('production_article_refer')->field('article_id,p_article_id,p_refer_id,refer_doi')->where($aWhere)->select(); if(empty($aResult)){ return json_encode(array('status' => 2,'msg' => 'The reference data to be processed is empty'.json_encode($aParam))); } //数据处理 foreach ($aResult as $key => $value) { if(empty($value['refer_doi'])){ continue; } //调用获取参考文献详情队列 \think\Queue::push('app\api\job\ArticleReferDetailQueue@fire', $value, 'ArticleReferDetailQueue'); } return json_encode(['status' => 1,'msg' => 'Add to reference processing queue']); } /** * 处理参考文献 * * @return void */ public function get($aParam = []) { // 获取生产文章ID $iPReferId = empty($aParam['p_refer_id']) ? 0 : $aParam['p_refer_id']; if(empty($iPReferId)) { return json_encode(array('status' => 2,'msg' => 'Please select a reference'.json_encode($aParam) )); } // 获取生产文章ID $iPArticleId = empty($aParam['p_article_id']) ? 0 : $aParam['p_article_id']; if(empty($iPArticleId)) { return json_encode(array('status' => 2,'msg' => 'Please select an production article'.json_encode($aParam) )); } //查询未处理过的数据 $aWhere = ['p_refer_id' => $iPReferId,'p_article_id' => $iPArticleId,'state' => 0]; $aRefer = Db::name('production_article_refer')->field('refer_doi,refer_content')->where($aWhere)->find(); if(empty($aRefer)){ return json_encode(array('status' => 2,'msg' => 'No reference records found'.json_encode($aParam))); } if(empty($aRefer['refer_doi'])){ return json_encode(['status' => 4,'msg' => 'Reference DOI is empty'.json_encode($aParam)]); } //数据处理 $doi = str_replace('/', '%2F', $aRefer['refer_doi']); $url = "https://citation.doi.org/format?doi=$doi&style=cancer-translational-medicine&lang=en-US"; $res = myGet($url); $frag = trim(substr($res, strpos($res, '.') + 1)); if(empty($frag)){ $aUpdate = ['refer_frag' => $aRefer['refer_content'],'refer_type' => 'other','is_deal' => 1,'update_time' => time()]; $aWhere = ['p_refer_id' => $iPReferId]; $result = Db::name('production_article_refer')->where($aWhere)->limit(1)->update($aUpdate); //写入通过AI获取参考文献详情队列 // \think\Queue::push('app\api\job\AiCheckReferByDoi@fire',$aParam,'AiCheckReferByDoi'); return json_encode(array('status' => 2,'msg' => 'The data obtained from the interface is empty'.$url)); } //整理数据入库 $update = []; if (mb_substr_count($frag, '.') != 3){ $f = $frag . " Available at: " . PHP_EOL . "https://doi.org/" . $aRefer['refer_doi']; $update['refer_type'] = "other"; $update['refer_frag'] = $f; $update['cs'] = 1; //写入通过AI获取参考文献详情队列 // \think\Queue::push('app\api\job\AiCheckReferByDoi@fire',$aParam,'AiCheckReferByDoi'); } if (mb_substr_count($frag, '.') == 3){ $res = explode('.', $frag); $update['author'] = prgeAuthor($res[0]); $update['title'] = trim($res[1]); $bj = bekjournal($res[2]); $joura = formateJournal(trim($bj[0])); $update['joura'] = $joura; $is_js = 0; if ($joura == trim($bj[0])) { } $update['refer_type'] = "journal"; $update['is_ja'] = $joura == trim($bj[0]) ? 0 : 1; $update['dateno'] = str_replace(' ', '', str_replace('-', '–', trim($bj[1]))); //新增处理 期卷页码 20251127 start if(!empty($update['dateno'])){ $sStr = $update['dateno']; $aStr = explode(':', $sStr); if(!empty($aStr[1])){ $parts = explode('–', $aStr[1]); if(count($parts) == 2){ $prefix = empty($parts[0]) ? 0 : intval($parts[0]); $suffix = empty($parts[1]) ? 0 : intval($parts[1]); if($prefix > $suffix){ $prefixLen = strlen($prefix); $suffixLen = strlen($suffix); $missingLen = $prefixLen - $suffixLen; if ($missingLen > 0) { $fillPart = substr($prefix, 0, $missingLen); $newSuffix = $fillPart . $suffix; $update['dateno'] = $aStr[0].':'.$prefix.'–'.$newSuffix; } } } } // if(empty($aStr[1])){ // //写入通过AI获取参考文献详情队列 // \think\Queue::push('app\api\job\AiCheckReferByDoi@fire',$aParam,'AiCheckReferByDoi'); // } } //新增处理 期卷页码 20251127 end $update['doilink'] = strpos($aRefer['refer_doi'],"http")===false?"https://doi.org/" . $aRefer['refer_doi']:$aRefer['refer_doi']; $update['cs'] = 1; } //数据库更新 if(empty($update)){ return json_encode(array('status' => 3,'msg' => 'Update data to empty'.$url.'====='.$frag)); } $aWhere = ['p_refer_id' => $iPReferId]; $update += ['is_deal' => 1,'update_time' => time()]; $result = Db::name('production_article_refer')->where($aWhere)->limit(1)->update($update); if($result === false){ return json_encode(array('status' => 3,'msg' => 'Update failed'.json_encode($update))); } return json_encode(['status' => 1,'msg' => 'Update successful']); } // /** // * 实例方法:提取单个DOI(核心逻辑,生产级优化) // * @param string $str 待检测字符串 // * @param bool $standardize 是否标准化DOI(转小写) // * @param bool $forceExtract 是否强制提取(忽略微小格式瑕疵) // * @return array 提取结果(含错误码、错误信息、DOI) // */ // // public function extractDoiFromString(string $str, bool $standardize = true, bool $forceExtract = false): array // // { // // // 初始化标准化结果 // // $result = [ // // 'has_doi' => false, // // 'doi' => null, // // 'error_code' => null, // // 'error_msg' => null, // // ]; // // try { // // // 严格类型校验(防止非字符串参数传入) // // if (!is_string($str)) { // // throw new InvalidArgumentException('输入参数必须为字符串类型', 1001); // // } // // // 字符串预处理(生产级:全角转半角、URL解码、HTML标签移除等) // // $processedStr = $this->preprocessString($str); // // if (trim($processedStr) === '') { // // $result['error_code'] = 'EMPTY_STRING'; // // $result['error_msg'] = self::ERROR_CODES['EMPTY_STRING']; // // return $result; // // } // // // 性能优化:用preg_match仅匹配首个DOI,替代preg_match_all // // // 优化后的带前缀版正则 // // $pattern = '/(?:doi[:\s]*|DOI[:\s]*)?\b10\.\d+(?:\.\d+)*\/[a-zA-Z0-9._\-!()%\/:;@$&+=?#[\]<>~`|^'"{},\\\\]+(?![\w?#])/i"; // // if (!preg_match($pattern, $processedStr, $match)) { // // $result['error_code'] = 'NO_MATCH'; // // $result['error_msg'] = self::ERROR_CODES['NO_MATCH']; // // return $result; // // } // // // 清洗并验证首个DOI // // $cleanDoi = $this->cleanAndValidateDoi($match[0], $standardize, $forceExtract); // // if ($cleanDoi !== null) { // // $result['has_doi'] = true; // // $result['doi'] = $cleanDoi; // // } else { // // // 根据是否强制提取设置错误信息 // // $errorKey = $forceExtract ? 'FORCE_EXTRACT_FAILED' : 'INVALID_AFTER_CLEAN'; // // $result['error_code'] = $errorKey; // // $result['error_msg'] = self::ERROR_CODES[$errorKey]; // // } // // } catch (InvalidArgumentException $e) { // // // 业务异常:标准化错误码和信息 // // $result['error_code'] = 'INVALID_PARAM'; // // $result['error_msg'] = '参数错误:' . $e->getMessage(); // // } catch (Exception $e) { // // // 系统异常:隐藏敏感信息,记录通用错误 // // $result['error_code'] = 'EXTRACTION_EXCEPTION'; // // $result['error_msg'] = self::ERROR_CODES['EXTRACTION_EXCEPTION'] . ':' . $e->getMessage(); // // } // // return $result; // // } // // /** // // * 字符串预处理(生产级:覆盖所有编码/格式干扰场景) // // * @param string $str 原始字符串 // // * @return string 预处理后的纯净字符串 // // */ // // private function preprocessString(string $str): string // // { // // // 1. 全角转半角(解决中文全角字符干扰,如10.1007/s11042-020-10103-4) // // $str = $this->fullWidthToHalfWidth($str); // // // 2. 移除所有HTML标签(解决网页文本中DOI被