diff --git a/application/api/controller/Crossrefdoi.php b/application/api/controller/Crossrefdoi.php new file mode 100644 index 0000000..8c17c6e --- /dev/null +++ b/application/api/controller/Crossrefdoi.php @@ -0,0 +1,310 @@ +request->post() : $aParam; + + $iPReferId = empty($aParam['p_refer_id']) ? 0 : $aParam['p_refer_id']; + if(empty($iPReferId)){ + return json_encode(['status' => 2,'msg' => 'Please select the reference to be queried']); + } + $aWhere = ['p_refer_id' => $iPReferId,'state' => ['in', [0,2]]]; + $aRefer = Db::name('production_article_refer')->field('title,joura,author,refer_doi,doilink,cs')->where($aWhere)->find(); + if(empty($aRefer['refer_doi'])){ + return json_encode(['status' => 3,'msg' => 'The doi of the reference is empty']); + } + + $finalResult = []; + $sDoi = empty($aRefer['refer_doi']) ? '' : $aRefer['refer_doi']; + $sCheckDoi = $this->filterValidDoi($sDoi); // 过滤非法DOI + if(empty($sCheckDoi)){ + return json_encode(['status' => 4,'msg' => 'Doi does not comply with the rules']); + } + + // 调用单DOI查询(带重试) + $aDoiInfo = $this->fetchSingleDoiWithRetry($sCheckDoi); + if (!$aDoiInfo) { + $result['status'] = 'fail'; + $result['fail_reason'] = "请求失败(重试{$this->maxRetry}次后仍失败)"; + } + // 提取核心字段 + $sTitle = $this->getTitle($aDoiInfo); + //期刊信息 + $aPublisher = $this->getPublisher($aDoiInfo); + $sJoura = empty($aPublisher['title']) ? $aPublisher['short_title'] : $aPublisher['title']; + //作者信息 + $aAuthor = $this->getAuthors($aDoiInfo); + $sAuthor = empty($aAuthor) ? '' : implode(',', $aAuthor); + $sDateno = $this->getVolumeIssuePages($aDoiInfo); + + // 识别撤稿状态 + $aRetractInfo = $this->checkRetracted($aDoiInfo); + $bIsRetracted = 2; + if($aRetractInfo['is_retracted'] == true){ + $bIsRetracted = 1; + } + $sRetractReason = empty($aRetractInfo['reason']) ? '' : $aRetractInfo['reason']; + //获取dolink + $sDolink = $this->getDolink($aDoiInfo); + $sDolink = empty($sDolink) ? 'https://doi.org/' . $sCheckDoi : $sDolink; + + //数据处理更新数据库 + $aUpdate = []; + if(!empty($sTitle) && empty($aRefer['title'])){ + $aUpdate['title'] = $sTitle; + } + if(!empty($sJoura) && empty($aRefer['joura'])){ + $aUpdate['joura'] = $sJoura; + } + if(!empty($sAuthor) && empty($aRefer['author'])){ + $aUpdate['author'] = $sAuthor; + } + if(!empty($sDateno)){ + $aUpdate['dateno'] = $sDateno; + } + if($bIsRetracted == 1){ + $aUpdate['is_retracted'] = 1; + } + if(!empty($sDolink) && empty($aRefer['doilink'])){ + $aUpdate['doilink'] = $sDolink; + } + if(empty($aUpdate)){ + return json_encode(['status' => 5,'msg' => 'No update information available']); + } + $aUpdate['update_time'] = time(); + $aUpdate['cs'] = 1; + $aWhere = ['p_refer_id' => $iPReferId]; + $result = Db::name('production_article_refer')->where($aWhere)->limit(1)->update($aUpdate); + if($result === false){ + return json_encode(['status' => 6,'msg' => 'Update failed-Cs']); + } + return json_encode(['status' => 1,'msg' => 'Update successful']); + } + + /** + * 过滤非法DOI(仅保留10.xxxx/xxx格式) + */ + private function filterValidDoi($doi = ''){ + $doi = trim($doi); + if (empty($doi)) return ''; + // 正则匹配:10. + 至少4位数字 + / + 任意字符 + if (preg_match('/^10\.\d{4,}\/.+/', $doi)) { + return $doi; + } + return ''; + } + + /** + * 单DOI查询 + */ + private function fetchSingleDoiWithRetry($doi){ + $retryCount = 0; + while ($retryCount < $this->maxRetry) { + $url = $this->crossrefUrl. rawurlencode($doi) . "?mailto=" . rawurlencode($this->mailto); + + $ch = curl_init(); + curl_setopt($ch, CURLOPT_URL, $url); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout); + curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); + curl_setopt($ch, CURLOPT_HTTPHEADER, [ + "User-Agent: DOI-Fetcher/1.0 (mailto:{$this->mailto})" + ]); + $response = curl_exec($ch); + $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); + curl_close($ch); + + // 成功返回 + if ($httpCode == 200) { + $data = json_decode($response, true); + return $data['status'] == 'ok' ? $data['message'] : null; + } + + // 429速率限制:延长等待后重试 + if ($httpCode == 429) { + sleep(5); + $retryCount++; + continue; + } + + $retryCount++; + sleep(1); // 普通失败,1秒后重试 + } + return null; + } + + /** + * 提取标题 + */ + private function getTitle($aDoiInfo = []){ + return $aDoiInfo['title'][0] ?? ''; + } + + /** + * 提取出版社名 + */ + private function getPublisher($aDoiInfo = []){ + $aJournal = [ + 'title' => isset($aDoiInfo['container-title'][0]) ? $aDoiInfo['container-title'][0] : '', + 'short_title'=> isset($aDoiInfo['short-container-title'][0]) ? $aDoiInfo['short-container-title'][0] : '', + 'ISSN' => $aDoiInfo['ISSN'] ?? [], + 'publisher' => $aDoiInfo['publisher'] ?? '', + ]; + return $aJournal; + } + + /** + * 提取作者 + */ + private function getAuthors($aDoiInfo = []){ + $authors = []; + if (!empty($aDoiInfo['author'])) { + foreach ($aDoiInfo['author'] as $author) { + $name = $author['family'] ?? ''; + if (!empty($author['given'])) { + $name = $author['given'] . ' ' . $name; + } + if (!empty($name)) { + $authors[] = $name; + } + } + } + return $authors; + } + + /** + * 提取发表年份 + */ + private function getPublishYear($aDoiInfo = []){ + if (!empty($aDoiInfo['issued']['date-parts'][0][0])) { + return (string)$aDoiInfo['issued']['date-parts'][0][0]; + } + return ''; + } + + /** + * 提取卷(期):起始页-终止页(格式:2024;10(2):100-120) + */ + private function getVolumeIssuePages($aDoiInfo = []){ + $parts = []; + // 年 + $year = $this->getPublishYear($aDoiInfo); + if ($year) $parts[] = $year; + + // 卷(期) + $volume = $aDoiInfo['volume'] ?? ''; + $issue = $aDoiInfo['issue'] ?? ''; + if ($volume) { + $volumeIssue = $volume . ($issue ? "({$issue})" : ''); + $parts[] = $volumeIssue; + } + + // 起始页-终止页 + $pageStart = $aDoiInfo['page']['start'] ?? ($aDoiInfo['first-page'] ?? ''); + $pageEnd = $aDoiInfo['page']['end'] ?? ($aDoiInfo['last-page'] ?? ''); + $pages = ''; + if ($pageStart) { + $pages = $pageStart . ($pageEnd ? "-{$pageEnd}" : ''); + }else{ + $pages = $aDoiInfo['page'] ?? ''; + } + if ($pages) $parts[] = $pages; + + return implode(':', $parts); + } + + /** + * 识别撤稿文章 + */ + private function checkRetracted($aDoiInfo = []){ + $isRetracted = false; + $reason = "未撤稿"; + + // 1. 文章类型为撤稿声明type/subtype + $sType = strtolower($aDoiInfo['type'] ?? ''); + $sSubtype = strtolower($aDoiInfo['subtype'] ?? ''); + if (isset($sType) && in_array($sType, ['retraction', 'correction'])) { + $isRetracted = true; + $reason = "文章类型为{$sType}(撤稿/更正声明)"; + } + + if (isset($sSubtype) && in_array($sSubtype, ['retraction', 'correction'])) { + $isRetracted = true; + $reason = "文章类型为{$sSubtype}(撤稿/更正声明)"; + } + // 2. update-type包含撤稿 + if (isset($aDoiInfo['update-type']) && in_array('retraction', $aDoiInfo['update-type'])) { + $isRetracted = true; + $reason = "官方标记为撤稿(update-type: retraction)"; + } + + // 3. 关联撤稿文章 + if (isset($aDoiInfo['relation']) && !empty($aDoiInfo['relation'])) { + foreach ($aDoiInfo['relation'] as $relType => $relItems) { + if (in_array($relType, ['is-retraction-of', 'corrects'])) { + $isRetracted = true; + $relatedDoi = $relItems[0]['id'] ?? '未知'; + $reason = "关联撤稿文章{$relatedDoi}(关系:{$relType})"; + break; + } + } + } + + // 4. update-to 字段 + if (isset($aDoiInfo['update-to']) && is_array($aDoiInfo['update-to'])) { + foreach ($aDoiInfo['update-to'] as $update) { + $updateType = strtolower($update['type'] ?? ''); + $updateLabel = strtolower($update['label'] ?? ''); + if (strpos($updateType, 'retract') !== false || strpos($updateLabel, 'retract') !== false) { + $isRetracted = true; + $retractionDetail['retraction_notice'] = [ + 'type' => $update['type'] ?? '', + 'label' => $update['label'] ?? '', + 'DOI' => $update['DOI'] ?? '', + 'date' => isset($update['updated']) ? $this->parseDateParts($update['updated']) : '', + ]; + break; + } + } + } + //5.title 关键词 + $aTitles = $aDoiInfo['title'] ?? []; + foreach ($aTitles as $value) { + $sTitleLower = strtolower($value); + if (strpos($sTitleLower, 'retraction') !== false || strpos($sTitleLower, 'retracted') !== false + || strpos($sTitleLower, 'withdrawal') !== false || strpos($sTitleLower, 'withdrawn') !== false) { + $isRetracted = true; + $retractionDetail['title_keyword'] = $value; + break; + } + } + return [ + 'is_retracted' => $isRetracted, + 'reason' => $reason + ]; + } + /** + * 识别doi链接 + */ + private function getDolink($aDoiInfo = []){ + return $aDoiInfo['URL'] ?? ''; + } +} \ No newline at end of file