Files
tougao/application/api/controller/Crossrefdoi.php
2026-03-17 11:04:11 +08:00

310 lines
11 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<?php
namespace app\api\controller;
use app\api\controller\Base;
use think\Db;
class Crossrefdoi extends Base{
public function __construct(\think\Request $request = null) {
parent::__construct($request);
}
// 配置项
private $mailto; // 邮箱(提升优先级)
private $timeout = 15; // 请求超时(秒)
private $maxRetry = 2; // 单个DOI最大重试次数
private $crossrefUrl = "https://api.crossref.org/works/"; //接口地址
/**
* 批量查询DOI信息核心方法
* @param array $dois DOI列表
* @return array 包含所有DOI的查询结果
*/
public function get($aParam = []){
//获取参数
$aParam = empty($aParam) ? $this->request->post() : $aParam;
$iPReferId = empty($aParam['p_refer_id']) ? 0 : $aParam['p_refer_id'];
if(empty($iPReferId)){
return json_encode(['status' => 2,'msg' => 'Please select the reference to be queried']);
}
$aWhere = ['p_refer_id' => $iPReferId,'state' => ['in', [0,2]]];
$aRefer = Db::name('production_article_refer')->field('title,joura,author,refer_doi,doilink,cs')->where($aWhere)->find();
if(empty($aRefer['refer_doi'])){
return json_encode(['status' => 3,'msg' => 'The doi of the reference is empty']);
}
$finalResult = [];
$sDoi = empty($aRefer['refer_doi']) ? '' : $aRefer['refer_doi'];
$sCheckDoi = $this->filterValidDoi($sDoi); // 过滤非法DOI
if(empty($sCheckDoi)){
return json_encode(['status' => 4,'msg' => 'Doi does not comply with the rules']);
}
// 调用单DOI查询带重试
$aDoiInfo = $this->fetchSingleDoiWithRetry($sCheckDoi);
if (!$aDoiInfo) {
$result['status'] = 'fail';
$result['fail_reason'] = "请求失败(重试{$this->maxRetry}次后仍失败)";
}
// 提取核心字段
$sTitle = $this->getTitle($aDoiInfo);
//期刊信息
$aPublisher = $this->getPublisher($aDoiInfo);
$sJoura = empty($aPublisher['title']) ? $aPublisher['short_title'] : $aPublisher['title'];
//作者信息
$aAuthor = $this->getAuthors($aDoiInfo);
$sAuthor = empty($aAuthor) ? '' : implode(',', $aAuthor);
$sDateno = $this->getVolumeIssuePages($aDoiInfo);
// 识别撤稿状态
$aRetractInfo = $this->checkRetracted($aDoiInfo);
$bIsRetracted = 2;
if($aRetractInfo['is_retracted'] == true){
$bIsRetracted = 1;
}
$sRetractReason = empty($aRetractInfo['reason']) ? '' : $aRetractInfo['reason'];
//获取dolink
$sDolink = $this->getDolink($aDoiInfo);
$sDolink = empty($sDolink) ? 'https://doi.org/' . $sCheckDoi : $sDolink;
//数据处理更新数据库
$aUpdate = [];
if(!empty($sTitle) && empty($aRefer['title'])){
$aUpdate['title'] = $sTitle;
}
if(!empty($sJoura) && empty($aRefer['joura'])){
$aUpdate['joura'] = $sJoura;
}
if(!empty($sAuthor) && empty($aRefer['author'])){
$aUpdate['author'] = $sAuthor;
}
if(!empty($sDateno)){
$aUpdate['dateno'] = $sDateno;
}
if($bIsRetracted == 1){
$aUpdate['is_retracted'] = 1;
}
if(!empty($sDolink) && empty($aRefer['doilink'])){
$aUpdate['doilink'] = $sDolink;
}
if(empty($aUpdate)){
return json_encode(['status' => 5,'msg' => 'No update information available']);
}
$aUpdate['update_time'] = time();
$aUpdate['cs'] = 1;
$aWhere = ['p_refer_id' => $iPReferId];
$result = Db::name('production_article_refer')->where($aWhere)->limit(1)->update($aUpdate);
if($result === false){
return json_encode(['status' => 6,'msg' => 'Update failed-Cs']);
}
return json_encode(['status' => 1,'msg' => 'Update successful']);
}
/**
* 过滤非法DOI仅保留10.xxxx/xxx格式
*/
private function filterValidDoi($doi = ''){
$doi = trim($doi);
if (empty($doi)) return '';
// 正则匹配10. + 至少4位数字 + / + 任意字符
if (preg_match('/^10\.\d{4,}\/.+/', $doi)) {
return $doi;
}
return '';
}
/**
* 单DOI查询
*/
private function fetchSingleDoiWithRetry($doi){
$retryCount = 0;
while ($retryCount < $this->maxRetry) {
$url = $this->crossrefUrl. rawurlencode($doi) . "?mailto=" . rawurlencode($this->mailto);
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_HTTPHEADER, [
"User-Agent: DOI-Fetcher/1.0 (mailto:{$this->mailto})"
]);
$response = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
// 成功返回
if ($httpCode == 200) {
$data = json_decode($response, true);
return $data['status'] == 'ok' ? $data['message'] : null;
}
// 429速率限制延长等待后重试
if ($httpCode == 429) {
sleep(5);
$retryCount++;
continue;
}
$retryCount++;
sleep(1); // 普通失败1秒后重试
}
return null;
}
/**
* 提取标题
*/
private function getTitle($aDoiInfo = []){
return $aDoiInfo['title'][0] ?? '';
}
/**
* 提取出版社名
*/
private function getPublisher($aDoiInfo = []){
$aJournal = [
'title' => isset($aDoiInfo['container-title'][0]) ? $aDoiInfo['container-title'][0] : '',
'short_title'=> isset($aDoiInfo['short-container-title'][0]) ? $aDoiInfo['short-container-title'][0] : '',
'ISSN' => $aDoiInfo['ISSN'] ?? [],
'publisher' => $aDoiInfo['publisher'] ?? '',
];
return $aJournal;
}
/**
* 提取作者
*/
private function getAuthors($aDoiInfo = []){
$authors = [];
if (!empty($aDoiInfo['author'])) {
foreach ($aDoiInfo['author'] as $author) {
$name = $author['family'] ?? '';
if (!empty($author['given'])) {
$name = $author['given'] . ' ' . $name;
}
if (!empty($name)) {
$authors[] = $name;
}
}
}
return $authors;
}
/**
* 提取发表年份
*/
private function getPublishYear($aDoiInfo = []){
if (!empty($aDoiInfo['issued']['date-parts'][0][0])) {
return (string)$aDoiInfo['issued']['date-parts'][0][0];
}
return '';
}
/**
* 提取卷(期):起始页-终止页格式2024;10(2):100-120
*/
private function getVolumeIssuePages($aDoiInfo = []){
$parts = [];
// 年
$year = $this->getPublishYear($aDoiInfo);
if ($year) $parts[] = $year;
// 卷(期)
$volume = $aDoiInfo['volume'] ?? '';
$issue = $aDoiInfo['issue'] ?? '';
if ($volume) {
$volumeIssue = $volume . ($issue ? "({$issue})" : '');
$parts[] = $volumeIssue;
}
// 起始页-终止页
$pageStart = $aDoiInfo['page']['start'] ?? ($aDoiInfo['first-page'] ?? '');
$pageEnd = $aDoiInfo['page']['end'] ?? ($aDoiInfo['last-page'] ?? '');
$pages = '';
if ($pageStart) {
$pages = $pageStart . ($pageEnd ? "-{$pageEnd}" : '');
}else{
$pages = $aDoiInfo['page'] ?? '';
}
if ($pages) $parts[] = $pages;
return implode(':', $parts);
}
/**
* 识别撤稿文章
*/
private function checkRetracted($aDoiInfo = []){
$isRetracted = false;
$reason = "未撤稿";
// 1. 文章类型为撤稿声明type/subtype
$sType = strtolower($aDoiInfo['type'] ?? '');
$sSubtype = strtolower($aDoiInfo['subtype'] ?? '');
if (isset($sType) && in_array($sType, ['retraction', 'correction'])) {
$isRetracted = true;
$reason = "文章类型为{$sType}(撤稿/更正声明)";
}
if (isset($sSubtype) && in_array($sSubtype, ['retraction', 'correction'])) {
$isRetracted = true;
$reason = "文章类型为{$sSubtype}(撤稿/更正声明)";
}
// 2. update-type包含撤稿
if (isset($aDoiInfo['update-type']) && in_array('retraction', $aDoiInfo['update-type'])) {
$isRetracted = true;
$reason = "官方标记为撤稿update-type: retraction";
}
// 3. 关联撤稿文章
if (isset($aDoiInfo['relation']) && !empty($aDoiInfo['relation'])) {
foreach ($aDoiInfo['relation'] as $relType => $relItems) {
if (in_array($relType, ['is-retraction-of', 'corrects'])) {
$isRetracted = true;
$relatedDoi = $relItems[0]['id'] ?? '未知';
$reason = "关联撤稿文章{$relatedDoi}(关系:{$relType}";
break;
}
}
}
// 4. update-to 字段
if (isset($aDoiInfo['update-to']) && is_array($aDoiInfo['update-to'])) {
foreach ($aDoiInfo['update-to'] as $update) {
$updateType = strtolower($update['type'] ?? '');
$updateLabel = strtolower($update['label'] ?? '');
if (strpos($updateType, 'retract') !== false || strpos($updateLabel, 'retract') !== false) {
$isRetracted = true;
$retractionDetail['retraction_notice'] = [
'type' => $update['type'] ?? '',
'label' => $update['label'] ?? '',
'DOI' => $update['DOI'] ?? '',
'date' => isset($update['updated']) ? $this->parseDateParts($update['updated']) : '',
];
break;
}
}
}
//5.title 关键词
$aTitles = $aDoiInfo['title'] ?? [];
foreach ($aTitles as $value) {
$sTitleLower = strtolower($value);
if (strpos($sTitleLower, 'retraction') !== false || strpos($sTitleLower, 'retracted') !== false
|| strpos($sTitleLower, 'withdrawal') !== false || strpos($sTitleLower, 'withdrawn') !== false) {
$isRetracted = true;
$retractionDetail['title_keyword'] = $value;
break;
}
}
return [
'is_retracted' => $isRetracted,
'reason' => $reason
];
}
/**
* 识别doi链接
*/
private function getDolink($aDoiInfo = []){
return $aDoiInfo['URL'] ?? '';
}
}