Compare commits

...

7 Commits

Author SHA1 Message Date
wangjinlei
a802b2e923 自动推广 2026-04-03 11:45:45 +08:00
wangjinlei
22947a56a4 自动推广 2026-03-27 17:57:25 +08:00
wangjinlei
20a68ddc8a 自动推广 2026-03-23 09:57:45 +08:00
wangjinlei
d1e0f43992 Merge remote-tracking branch 'origin/master' 2026-03-18 14:37:29 +08:00
wangjinlei
e3ec1b0ca1 自动推广 2026-03-18 14:37:23 +08:00
chengxl
e7bb34e11d 抓取是否撤稿 2026-03-17 11:04:11 +08:00
wangjinlei
65ba338a7d 自动推广 2026-03-13 17:19:34 +08:00
19 changed files with 6172 additions and 767 deletions

View File

@@ -8,6 +8,8 @@ use think\Queue;
use think\Validate; use think\Validate;
use PhpOffice\PhpWord\IOFactory; use PhpOffice\PhpWord\IOFactory;
use app\common\OpenAi; use app\common\OpenAi;
use app\common\CrossrefService;
use app\common\PubmedService;
/** /**
* @title 文章接口 * @title 文章接口
@@ -17,9 +19,17 @@ class Article extends Base
{ {
/**
* @var CrossrefService
*/
private $crossService;
private $pubmedService;
public function __construct(\think\Request $request = null) public function __construct(\think\Request $request = null)
{ {
parent::__construct($request); parent::__construct($request);
$this->crossService = new CrossrefService();
$this->pubmedService = new PubmedService();
} }
@@ -169,16 +179,6 @@ class Article extends Base
return jsonSuccess($re); return jsonSuccess($re);
} }
public function myttt()
{
$res = $this->addProductionEx("3689");
echo "<pre>";
var_dump($res);
echo "</pre>";
die;
}
/**获取预接收内容状态 /**获取预接收内容状态
* @return void * @return void
*/ */
@@ -1053,6 +1053,36 @@ class Article extends Base
} }
} }
public function testCheckArticleCitation()
{
$data = $this->request->post();
$rule = new Validate([
"article_id"=>"require"
]);
if(!$rule->check($data)){
return jsonError($rule->getError());
}
$mains = $this->article_main_obj->where("article_id",$data['article_id'])->where("state",0)->select();
$production = $this->production_article_obj->where("article_id",$data['article_id'])->find();
$refers = $this->production_article_refer_obj->where("p_article_id",$production['p_article_id'])->where("state",0)->order("index asc")->select();
$res = $this->crossService->qcArticleCitations($mains,$refers);
return jsonSuccess(['res'=>$res]);
}
public function testCheckArticlePubmed()
{
$data = $this->request->post();
$rule = new Validate([
"doi"=>"require"
]);
if(!$rule->check($data)){
return jsonError($rule->getError());
}
$res = $this->pubmedService->fetchByDoi($data['doi']);
return jsonSuccess(['res'=>$res]);
}
/** /**
* @title 发送留言板消息 * @title 发送留言板消息
* @description 发送留言板消息 * @description 发送留言板消息
@@ -3940,7 +3970,14 @@ class Article extends Base
public function ffff(){ public function ffff(){
$data = $this->request->post(); $data = $this->request->post();
$this->ai_scor($data['article_id']); $rule = new Validate([
"doi"=>"require"
]);
if(!$rule->check($data)){
return jsonError($rule->getError());
}
$res = $this->crossService->fetchWorkSummary($data['doi']);
return jsonSuccess($res);
} }

View File

@@ -6,6 +6,7 @@ use app\api\controller\Base;
use think\Db; use think\Db;
use PhpOffice\PhpWord\IOFactory; use PhpOffice\PhpWord\IOFactory;
use think\Exception; use think\Exception;
use think\Validate;
use \app\common\ArticleParserService; use \app\common\ArticleParserService;
/** /**
* @title 自动投稿控制器 * @title 自动投稿控制器
@@ -102,6 +103,23 @@ class Contribute extends Base
return $result; return $result;
} }
public function myTestArticle(){
$data = $this->request->post();
$rule = new Validate([
'article_id'=>"require"
]);
if(!$rule->check($data)){
return jsonError($rule->getError());
}
$files = $this->article_file_obj->where("article_id",$data['article_id'])->where("type_name","manuscirpt")->order("file_id desc")->limit(1)->select();
$sFileUrl =$files[0]['file_url'];
$sFileUrl = rtrim(ROOT_PATH,'/').'/public/'.ltrim(ltrim($sFileUrl,'/'),'public');
$res = ArticleParserService::getReferencesFromWord($sFileUrl);
return jsonSuccess($res);
}
/** /**
* 组装数据插入相关数据表 * 组装数据插入相关数据表
* @param array $aParam * @param array $aParam

View File

@@ -0,0 +1,326 @@
<?php
namespace app\api\controller;
use app\api\controller\Base;
use think\Db;
use think\Validate;
class Crossrefdoi extends Base{
public function __construct(\think\Request $request = null) {
parent::__construct($request);
}
// 配置项
private $mailto; // 邮箱(提升优先级)
private $timeout = 15; // 请求超时(秒)
private $maxRetry = 2; // 单个DOI最大重试次数
private $crossrefUrl = "https://api.crossref.org/works/"; //接口地址
/**
* 批量查询DOI信息核心方法
* @param array $dois DOI列表
* @return array 包含所有DOI的查询结果
*/
public function get($aParam = []){
//获取参数
$aParam = empty($aParam) ? $this->request->post() : $aParam;
$iPReferId = empty($aParam['p_refer_id']) ? 0 : $aParam['p_refer_id'];
if(empty($iPReferId)){
return json_encode(['status' => 2,'msg' => 'Please select the reference to be queried']);
}
$aWhere = ['p_refer_id' => $iPReferId,'state' => ['in', [0,2]]];
$aRefer = Db::name('production_article_refer')->field('title,joura,author,refer_doi,doilink,cs')->where($aWhere)->find();
if(empty($aRefer['refer_doi'])){
return json_encode(['status' => 3,'msg' => 'The doi of the reference is empty']);
}
$finalResult = [];
$sDoi = empty($aRefer['refer_doi']) ? '' : $aRefer['refer_doi'];
$sCheckDoi = $this->filterValidDoi($sDoi); // 过滤非法DOI
if(empty($sCheckDoi)){
return json_encode(['status' => 4,'msg' => 'Doi does not comply with the rules']);
}
// 调用单DOI查询带重试
$aDoiInfo = $this->fetchSingleDoiWithRetry($sCheckDoi);
if (!$aDoiInfo) {
$result['status'] = 'fail';
$result['fail_reason'] = "请求失败(重试{$this->maxRetry}次后仍失败)";
}
// 提取核心字段
$sTitle = $this->getTitle($aDoiInfo);
//期刊信息
$aPublisher = $this->getPublisher($aDoiInfo);
$sJoura = empty($aPublisher['title']) ? $aPublisher['short_title'] : $aPublisher['title'];
//作者信息
$aAuthor = $this->getAuthors($aDoiInfo);
$sAuthor = empty($aAuthor) ? '' : implode(',', $aAuthor);
$sDateno = $this->getVolumeIssuePages($aDoiInfo);
// 识别撤稿状态
$aRetractInfo = $this->checkRetracted($aDoiInfo);
$bIsRetracted = 2;
if($aRetractInfo['is_retracted'] == true){
$bIsRetracted = 1;
}
$sRetractReason = empty($aRetractInfo['reason']) ? '' : $aRetractInfo['reason'];
//获取dolink
$sDolink = $this->getDolink($aDoiInfo);
$sDolink = empty($sDolink) ? 'https://doi.org/' . $sCheckDoi : $sDolink;
//数据处理更新数据库
$aUpdate = [];
if(!empty($sTitle) && empty($aRefer['title'])){
$aUpdate['title'] = $sTitle;
}
if(!empty($sJoura) && empty($aRefer['joura'])){
$aUpdate['joura'] = $sJoura;
}
if(!empty($sAuthor) && empty($aRefer['author'])){
$aUpdate['author'] = $sAuthor;
}
if(!empty($sDateno)){
$aUpdate['dateno'] = $sDateno;
}
if($bIsRetracted == 1){
$aUpdate['is_retracted'] = 1;
}
if(!empty($sDolink) && empty($aRefer['doilink'])){
$aUpdate['doilink'] = $sDolink;
}
if(empty($aUpdate)){
return json_encode(['status' => 5,'msg' => 'No update information available']);
}
$aUpdate['update_time'] = time();
$aUpdate['cs'] = 1;
$aWhere = ['p_refer_id' => $iPReferId];
$result = Db::name('production_article_refer')->where($aWhere)->limit(1)->update($aUpdate);
if($result === false){
return json_encode(['status' => 6,'msg' => 'Update failed-Cs']);
}
return json_encode(['status' => 1,'msg' => 'Update successful']);
}
public function getOneDoi(){
$data = $this->request->post();
$rule = new Validate([
"doi"=>"require"
]);
if(!$rule->check($data)){
return jsonError($rule->getError());
}
$sCheckDoi = $this->fetchSingleDoiWithRetry($this->filterValidDoi($data['doi'])); // 过滤非法DOI
return jsonSuccess($sCheckDoi);
}
/**
* 过滤非法DOI仅保留10.xxxx/xxx格式
*/
private function filterValidDoi($doi = ''){
$doi = trim($doi);
if (empty($doi)) return '';
// 正则匹配10. + 至少4位数字 + / + 任意字符
if (preg_match('/^10\.\d{4,}\/.+/', $doi)) {
return $doi;
}
return '';
}
/**
* 单DOI查询
*/
private function fetchSingleDoiWithRetry($doi){
$retryCount = 0;
while ($retryCount < $this->maxRetry) {
$url = $this->crossrefUrl. rawurlencode($doi) . "?mailto=" . rawurlencode($this->mailto);
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_HTTPHEADER, [
"User-Agent: DOI-Fetcher/1.0 (mailto:{$this->mailto})"
]);
$response = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
// 成功返回
if ($httpCode == 200) {
$data = json_decode($response, true);
return $data['status'] == 'ok' ? $data['message'] : null;
}
// 429速率限制延长等待后重试
if ($httpCode == 429) {
sleep(5);
$retryCount++;
continue;
}
$retryCount++;
sleep(1); // 普通失败1秒后重试
}
return null;
}
/**
* 提取标题
*/
private function getTitle($aDoiInfo = []){
return $aDoiInfo['title'][0] ?? '';
}
/**
* 提取出版社名
*/
private function getPublisher($aDoiInfo = []){
$aJournal = [
'title' => isset($aDoiInfo['container-title'][0]) ? $aDoiInfo['container-title'][0] : '',
'short_title'=> isset($aDoiInfo['short-container-title'][0]) ? $aDoiInfo['short-container-title'][0] : '',
'ISSN' => $aDoiInfo['ISSN'] ?? [],
'publisher' => $aDoiInfo['publisher'] ?? '',
];
return $aJournal;
}
/**
* 提取作者
*/
private function getAuthors($aDoiInfo = []){
$authors = [];
if (!empty($aDoiInfo['author'])) {
foreach ($aDoiInfo['author'] as $author) {
$name = $author['family'] ?? '';
if (!empty($author['given'])) {
$name = $author['given'] . ' ' . $name;
}
if (!empty($name)) {
$authors[] = $name;
}
}
}
return $authors;
}
/**
* 提取发表年份
*/
private function getPublishYear($aDoiInfo = []){
if (!empty($aDoiInfo['issued']['date-parts'][0][0])) {
return (string)$aDoiInfo['issued']['date-parts'][0][0];
}
return '';
}
/**
* 提取卷(期):起始页-终止页格式2024;10(2):100-120
*/
private function getVolumeIssuePages($aDoiInfo = []){
$parts = [];
// 年
$year = $this->getPublishYear($aDoiInfo);
if ($year) $parts[] = $year;
// 卷(期)
$volume = $aDoiInfo['volume'] ?? '';
$issue = $aDoiInfo['issue'] ?? '';
if ($volume) {
$volumeIssue = $volume . ($issue ? "({$issue})" : '');
$parts[] = $volumeIssue;
}
// 起始页-终止页
$pageStart = $aDoiInfo['page']['start'] ?? ($aDoiInfo['first-page'] ?? '');
$pageEnd = $aDoiInfo['page']['end'] ?? ($aDoiInfo['last-page'] ?? '');
$pages = '';
if ($pageStart) {
$pages = $pageStart . ($pageEnd ? "-{$pageEnd}" : '');
}else{
$pages = $aDoiInfo['page'] ?? '';
}
if ($pages) $parts[] = $pages;
return implode(':', $parts);
}
/**
* 识别撤稿文章
*/
private function checkRetracted($aDoiInfo = []){
$isRetracted = false;
$reason = "未撤稿";
// 1. 文章类型为撤稿声明type/subtype
$sType = strtolower($aDoiInfo['type'] ?? '');
$sSubtype = strtolower($aDoiInfo['subtype'] ?? '');
if (isset($sType) && in_array($sType, ['retraction', 'correction'])) {
$isRetracted = true;
$reason = "文章类型为{$sType}(撤稿/更正声明)";
}
if (isset($sSubtype) && in_array($sSubtype, ['retraction', 'correction'])) {
$isRetracted = true;
$reason = "文章类型为{$sSubtype}(撤稿/更正声明)";
}
// 2. update-type包含撤稿
if (isset($aDoiInfo['update-type']) && in_array('retraction', $aDoiInfo['update-type'])) {
$isRetracted = true;
$reason = "官方标记为撤稿update-type: retraction";
}
// 3. 关联撤稿文章
if (isset($aDoiInfo['relation']) && !empty($aDoiInfo['relation'])) {
foreach ($aDoiInfo['relation'] as $relType => $relItems) {
if (in_array($relType, ['is-retraction-of', 'corrects'])) {
$isRetracted = true;
$relatedDoi = $relItems[0]['id'] ?? '未知';
$reason = "关联撤稿文章{$relatedDoi}(关系:{$relType}";
break;
}
}
}
// 4. update-to 字段
if (isset($aDoiInfo['update-to']) && is_array($aDoiInfo['update-to'])) {
foreach ($aDoiInfo['update-to'] as $update) {
$updateType = strtolower($update['type'] ?? '');
$updateLabel = strtolower($update['label'] ?? '');
if (strpos($updateType, 'retract') !== false || strpos($updateLabel, 'retract') !== false) {
$isRetracted = true;
$retractionDetail['retraction_notice'] = [
'type' => $update['type'] ?? '',
'label' => $update['label'] ?? '',
'DOI' => $update['DOI'] ?? '',
'date' => isset($update['updated']) ? $this->parseDateParts($update['updated']) : '',
];
break;
}
}
}
//5.title 关键词
$aTitles = $aDoiInfo['title'] ?? [];
foreach ($aTitles as $value) {
$sTitleLower = strtolower($value);
if (strpos($sTitleLower, 'retraction') !== false || strpos($sTitleLower, 'retracted') !== false
|| strpos($sTitleLower, 'withdrawal') !== false || strpos($sTitleLower, 'withdrawn') !== false) {
$isRetracted = true;
$retractionDetail['title_keyword'] = $value;
break;
}
}
return [
'is_retracted' => $isRetracted,
'reason' => $reason
];
}
/**
* 识别doi链接
*/
private function getDolink($aDoiInfo = []){
return $aDoiInfo['URL'] ?? '';
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -4,31 +4,21 @@ namespace app\api\controller;
use think\Cache; use think\Cache;
use think\Db; use think\Db;
use GuzzleHttp\Client; use think\Validate;
use app\common\ExpertFinderService;
class ExpertFinder extends Base class ExpertFinder extends Base
{ {
private $httpClient; private $service;
private $ncbiBaseUrl = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/';
public function __construct(\think\Request $request = null) public function __construct(\think\Request $request = null)
{ {
parent::__construct($request); parent::__construct($request);
$this->httpClient = new Client([ $this->service = new ExpertFinderService();
'timeout' => 180,
'connect_timeout' => 15,
'verify' => false,
]);
} }
/** /**
* Main search endpoint * Main search endpoint
* Params:
* keyword - search term (e.g. "biomedical engineering")
* page - page number, default 1
* per_page - articles per page, default 100, max 100
* min_year - earliest publication year, default current-3
* source - "pubmed" (fast, email from affiliation) or "pmc" (slower, structured email)
*/ */
public function search() public function search()
{ {
@@ -49,16 +39,12 @@ class ExpertFinder extends Base
} }
try { try {
if ($source === 'pmc') { $result = $this->service->searchExperts($keyword, $perPage, $minYear, $page, $source);
$result = $this->searchViaPMC($keyword, $perPage, $minYear, $page);
} else {
$result = $this->searchViaPubMed($keyword, $perPage, $minYear, $page);
}
} catch (\Exception $e) { } catch (\Exception $e) {
return jsonError('Search failed: ' . $e->getMessage()); return jsonError('Search failed: ' . $e->getMessage());
} }
$saveResult = $this->saveExperts($result['experts'], $keyword, $source); $saveResult = $this->service->saveExperts($result['experts'], $keyword, $source);
$result['saved_new'] = $saveResult['inserted']; $result['saved_new'] = $saveResult['inserted'];
$result['saved_exist'] = $saveResult['existing']; $result['saved_exist'] = $saveResult['existing'];
@@ -69,16 +55,6 @@ class ExpertFinder extends Base
/** /**
* Get experts from local database * Get experts from local database
* Params:
* field - filter by field keyword (searches t_expert_field)
* major_id - filter by major_id (searches t_expert_field)
* state - filter by state (0-5), -1 for all
* keyword - search name/email/affiliation
* no_recent - if 1, exclude experts promoted within last N days (default 30)
* recent_days - days threshold for no_recent filter, default 30
* page - page number, default 1
* per_page - items per page, default 20
* min_experts - auto-fetch threshold, default 50
*/ */
public function getList() public function getList()
{ {
@@ -171,9 +147,6 @@ class ExpertFinder extends Base
/** /**
* Update expert state * Update expert state
* Params:
* expert_id - single id or comma-separated ids
* state - new state (0-5)
*/ */
public function updateState() public function updateState()
{ {
@@ -195,9 +168,6 @@ class ExpertFinder extends Base
/** /**
* Delete expert (soft: set state=5 blacklist, or hard delete) * Delete expert (soft: set state=5 blacklist, or hard delete)
* Params:
* expert_id - single id or comma-separated ids
* hard - 1 for hard delete, default 0 (blacklist)
*/ */
public function deleteExpert() public function deleteExpert()
{ {
@@ -221,7 +191,6 @@ class ExpertFinder extends Base
/** /**
* Export search results to Excel * Export search results to Excel
* Same params as search(), exports current page results
*/ */
public function export() public function export()
{ {
@@ -240,11 +209,7 @@ class ExpertFinder extends Base
if (!$cached) { if (!$cached) {
try { try {
if ($source === 'pmc') { $cached = $this->service->searchExperts($keyword, $perPage, $minYear, $page, $source);
$cached = $this->searchViaPMC($keyword, $perPage, $minYear, $page);
} else {
$cached = $this->searchViaPubMed($keyword, $perPage, $minYear, $page);
}
Cache::set($cacheKey, $cached, 3600); Cache::set($cacheKey, $cached, 3600);
} catch (\Exception $e) { } catch (\Exception $e) {
return jsonError('Search failed: ' . $e->getMessage()); return jsonError('Search failed: ' . $e->getMessage());
@@ -276,16 +241,66 @@ class ExpertFinder extends Base
// ==================== Cron / Auto Fetch ==================== // ==================== Cron / Auto Fetch ====================
/**
* Daily cron: auto-fetch experts for all active fields in t_expert_fetch via queue.
* No longer tied to journals; t_expert_fetch is the sole source of crawl targets.
*/
public function dailyFetchAll()
{
$perPage = max(10, intval($this->request->param('per_page', 200)));
$source = $this->request->param('source', 'pubmed');
$minYear = intval($this->request->param('min_year', date('Y') - 3));
$fetchList = Db::name('expert_fetch')
->where('state', 0)
->select();
if (empty($fetchList)) {
return jsonSuccess(['msg' => 'No active fetch fields found', 'queued' => 0]);
}
$queued = 0;
$skipped = 0;
$details = [];
$todayStart = strtotime(date('Y-m-d'));
foreach ($fetchList as $item) {
$keyword = trim($item['field']);
$itemSource = trim($item['source'] ?: $source);
if ($keyword === '') continue;
$fetchLog = $this->service->getFetchLog($keyword, $itemSource);
if ($fetchLog['last_time'] >= $todayStart) {
$skipped++;
continue;
}
$delay = $queued * 10;
\think\Queue::later($delay, 'app\api\job\FetchExperts@fire', [
'field' => $keyword,
'source' => $itemSource,
'per_page' => $perPage,
'min_year' => $minYear,
], 'FetchExperts');
$queued++;
$details[] = [
'expert_fetch_id' => $item['expert_fetch_id'],
'field' => $keyword,
'source' => $itemSource,
'delay_s' => $delay,
];
}
return jsonSuccess([
'queued' => $queued,
'skipped' => $skipped,
'details' => $details,
]);
}
/** /**
* Cron job: daily fetch experts for given keywords * Cron job: daily fetch experts for given keywords
* Params:
* keywords - comma-separated keywords (e.g. "biomedical engineering,tissue engineering")
* source - pubmed or pmc, default pubmed
* per_page - articles per page, default 100
* min_year - default current-3
*
* Uses cache to remember which page was last fetched per keyword,
* so each cron run fetches the next page automatically.
*/ */
public function cronFetch() public function cronFetch()
{ {
@@ -303,39 +318,14 @@ class ExpertFinder extends Base
$report = []; $report = [];
foreach ($keywords as $kw) { foreach ($keywords as $kw) {
if (empty($kw)) { if (empty($kw)) continue;
continue;
}
$fetchLog = $this->getFetchLog($kw, $source);
$page = $fetchLog['last_page'] + 1;
try { try {
if ($source === 'pmc') { $result = $this->service->doFetchForField($kw, $source, $perPage, $minYear);
$result = $this->searchViaPMC($kw, $perPage, $minYear, $page); $report[] = $result;
} else {
$result = $this->searchViaPubMed($kw, $perPage, $minYear, $page);
}
$saveResult = $this->saveExperts($result['experts'], $kw, $source);
$nextPage = $result['has_more'] ? $page : 0;
$totalPages = isset($result['total_pages']) ? $result['total_pages'] : 0;
$this->updateFetchLog($kw, $source, $nextPage, $totalPages);
$report[] = [
'keyword' => $kw,
'page' => $page,
'experts_found' => $result['total'],
'saved_new' => $saveResult['inserted'],
'saved_exist' => $saveResult['existing'],
'field_enriched' => $saveResult['field_enriched'],
'has_more' => $result['has_more'],
];
} catch (\Exception $e) { } catch (\Exception $e) {
$report[] = [ $report[] = [
'keyword' => $kw, 'keyword' => $kw,
'page' => $page,
'error' => $e->getMessage(), 'error' => $e->getMessage(),
]; ];
} }
@@ -357,7 +347,7 @@ class ExpertFinder extends Base
} }
Cache::set($lockKey, 1, 300); Cache::set($lockKey, 1, 300);
\think\Queue::push('app\api\job\FetchExperts', [ \think\Queue::push('app\api\job\FetchExperts@fire', [
'field' => $field, 'field' => $field,
'source' => 'pubmed', 'source' => 'pubmed',
'per_page' => 100, 'per_page' => 100,
@@ -365,459 +355,17 @@ class ExpertFinder extends Base
], 'FetchExperts'); ], 'FetchExperts');
} }
/** public function fetchOneField()
* Internal method: run a fetch for a single keyword (used by both cron and queue job)
*/
public function doFetchForField($field, $source = 'pubmed', $perPage = 100, $minYear = null)
{ {
if ($minYear === null) { $data = $this->request->post();
$minYear = date('Y') - 3; $rule = new Validate([
} "field" => "require"
$fetchLog = $this->getFetchLog($field, $source);
$page = $fetchLog['last_page'] + 1;
if ($source === 'pmc') {
$result = $this->searchViaPMC($field, $perPage, $minYear, $page);
} else {
$result = $this->searchViaPubMed($field, $perPage, $minYear, $page);
}
$saveResult = $this->saveExperts($result['experts'], $field, $source);
$nextPage = $result['has_more'] ? $page : 0;
$totalPages = isset($result['total_pages']) ? $result['total_pages'] : 0;
$this->updateFetchLog($field, $source, $nextPage, $totalPages);
return [
'keyword' => $field,
'page' => $page,
'experts_found' => $result['total'],
'saved_new' => $saveResult['inserted'],
'saved_exist' => $saveResult['existing'],
'field_enriched' => $saveResult['field_enriched'],
'has_more' => $result['has_more'],
];
}
// ==================== PubMed Search ====================
private function searchViaPubMed($keyword, $perPage, $minYear, $page = 1)
{
set_time_limit(600);
$searchResult = $this->esearch('pubmed', $keyword, $perPage, $minYear, $page);
$ids = $searchResult['ids'];
$totalArticles = $searchResult['total'];
if (empty($ids)) {
return $this->buildPagedResult([], 0, 0, $totalArticles, $page, $perPage, 'pubmed');
}
$allAuthors = [];
$batches = array_chunk($ids, 50);
foreach ($batches as $batch) {
$xml = $this->efetchWithRetry('pubmed', $batch);
if ($xml) {
$authors = $this->parsePubMedXml($xml);
$allAuthors = array_merge($allAuthors, $authors);
}
usleep(400000);
}
$experts = $this->aggregateExperts($allAuthors);
return $this->buildPagedResult($experts, count($experts), count($ids), $totalArticles, $page, $perPage, 'pubmed');
}
// ==================== PMC Search ====================
private function searchViaPMC($keyword, $perPage, $minYear, $page = 1)
{
set_time_limit(600);
$searchResult = $this->esearch('pmc', $keyword, $perPage, $minYear, $page);
$ids = $searchResult['ids'];
$totalArticles = $searchResult['total'];
if (empty($ids)) {
return $this->buildPagedResult([], 0, 0, $totalArticles, $page, $perPage, 'pmc');
}
$allAuthors = [];
$batches = array_chunk($ids, 5);
foreach ($batches as $batch) {
$xml = $this->efetchWithRetry('pmc', $batch);
if ($xml) {
$authors = $this->parsePMCXml($xml);
$allAuthors = array_merge($allAuthors, $authors);
}
usleep(500000);
}
$experts = $this->aggregateExperts($allAuthors);
return $this->buildPagedResult($experts, count($experts), count($ids), $totalArticles, $page, $perPage, 'pmc');
}
// ==================== NCBI API Calls ====================
private function esearch($db, $keyword, $perPage, $minYear, $page = 1)
{
$term = $keyword . ' AND ' . $minYear . ':' . date('Y') . '[pdat]';
$retstart = ($page - 1) * $perPage;
$response = $this->httpClient->get($this->ncbiBaseUrl . 'esearch.fcgi', [
'query' => [
'db' => $db,
'term' => $term,
'retstart' => $retstart,
'retmax' => $perPage,
'retmode' => 'json',
'sort' => 'relevance',
],
]); ]);
if (!$rule->check($data)) {
$data = json_decode($response->getBody()->getContents(), true); return jsonError($rule->getError());
$ids = $data['esearchresult']['idlist'] ?? [];
$total = intval($data['esearchresult']['count'] ?? 0);
return ['ids' => $ids, 'total' => $total];
} }
$res = $this->service->doFetchForField($data['field'], "pubmed", 100, date('Y') - 3);
private function efetch($db, $ids) return jsonSuccess($res);
{
$response = $this->httpClient->post($this->ncbiBaseUrl . 'efetch.fcgi', [
'form_params' => [
'db' => $db,
'id' => implode(',', $ids),
'retmode' => 'xml',
],
]);
return $response->getBody()->getContents();
}
private function efetchWithRetry($db, $ids, $maxRetries = 3)
{
for ($attempt = 1; $attempt <= $maxRetries; $attempt++) {
try {
return $this->efetch($db, $ids);
} catch (\Exception $e) {
if ($attempt === $maxRetries) {
if (count($ids) > 1) {
$half = ceil(count($ids) / 2);
$firstHalf = array_slice($ids, 0, $half);
$secondHalf = array_slice($ids, $half);
$xml1 = $this->efetchWithRetry($db, $firstHalf, 2);
$xml2 = $this->efetchWithRetry($db, $secondHalf, 2);
return $this->mergeXml($xml1, $xml2);
}
return null;
}
sleep($attempt * 2);
}
}
return null;
}
private function mergeXml($xml1, $xml2)
{
if (empty($xml1)) return $xml2;
if (empty($xml2)) return $xml1;
return $xml1 . "\n" . $xml2;
}
// ==================== PubMed XML Parsing ====================
private function parsePubMedXml($xmlString)
{
$results = [];
libxml_use_internal_errors(true);
$xml = simplexml_load_string($xmlString);
if ($xml === false) {
return $results;
}
foreach ($xml->PubmedArticle as $article) {
$citation = $article->MedlineCitation;
$articleData = $citation->Article;
$title = $this->xmlNodeToString($articleData->ArticleTitle);
$pmid = (string) $citation->PMID;
$journal = '';
if (isset($articleData->Journal->Title)) {
$journal = (string) $articleData->Journal->Title;
}
if (!isset($articleData->AuthorList->Author)) {
continue;
}
foreach ($articleData->AuthorList->Author as $author) {
$lastName = (string) ($author->LastName ?? '');
$foreName = (string) ($author->ForeName ?? '');
$fullName = trim($foreName . ' ' . $lastName);
if (empty($fullName)) {
continue;
}
$email = '';
$affiliation = '';
if (isset($author->AffiliationInfo)) {
foreach ($author->AffiliationInfo as $affInfo) {
$affText = (string) $affInfo->Affiliation;
if (empty($affiliation)) {
$affiliation = $affText;
}
if (empty($email)) {
$email = $this->extractEmailFromText($affText);
}
}
}
if (empty($email)) {
continue;
}
$results[] = [
'name' => $fullName,
'email' => strtolower($email),
'affiliation' => $this->cleanAffiliation($affiliation),
'article_title' => $title,
'article_id' => $pmid,
'journal' => $journal,
];
}
}
return $results;
}
// ==================== PMC XML Parsing ====================
private function parsePMCXml($xmlString)
{
$results = [];
libxml_use_internal_errors(true);
$xml = simplexml_load_string($xmlString);
if ($xml === false) {
return $results;
}
$articles = $xml->article ?? $xml->children();
foreach ($articles as $article) {
if ($article->getName() !== 'article') {
continue;
}
$front = $article->front;
if (!$front) {
continue;
}
$articleMeta = $front->{'article-meta'};
if (!$articleMeta) {
continue;
}
$title = $this->xmlNodeToString($articleMeta->{'title-group'}->{'article-title'} ?? null);
$pmcId = '';
if (isset($articleMeta->{'article-id'})) {
foreach ($articleMeta->{'article-id'} as $idNode) {
if ((string) $idNode['pub-id-type'] === 'pmc') {
$pmcId = (string) $idNode;
}
}
}
$journal = '';
if (isset($front->{'journal-meta'}->{'journal-title'})) {
$journal = (string) $front->{'journal-meta'}->{'journal-title'};
} elseif (isset($front->{'journal-meta'}->{'journal-title-group'}->{'journal-title'})) {
$journal = (string) $front->{'journal-meta'}->{'journal-title-group'}->{'journal-title'};
}
$correspEmails = [];
if (isset($articleMeta->{'author-notes'})) {
$this->extractEmailsFromNode($articleMeta->{'author-notes'}, $correspEmails);
}
$affiliationMap = [];
if (isset($articleMeta->{'contrib-group'})) {
foreach ($articleMeta->{'contrib-group'}->children() as $child) {
if ($child->getName() === 'aff') {
$affId = (string) ($child['id'] ?? '');
$affText = $this->xmlNodeToString($child);
if ($affId) {
$affiliationMap[$affId] = $affText;
}
}
}
}
if (isset($front->{'article-meta'}->{'aff'})) {
foreach ($front->{'article-meta'}->{'aff'} as $aff) {
$affId = (string) ($aff['id'] ?? '');
$affText = $this->xmlNodeToString($aff);
if ($affId) {
$affiliationMap[$affId] = $affText;
}
}
}
if (!isset($articleMeta->{'contrib-group'})) {
continue;
}
foreach ($articleMeta->{'contrib-group'}->contrib as $contrib) {
$contribType = (string) ($contrib['contrib-type'] ?? '');
if ($contribType !== 'author') {
continue;
}
$nameNode = $contrib->name;
if (!$nameNode) {
continue;
}
$surname = (string) ($nameNode->surname ?? '');
$givenNames = (string) ($nameNode->{'given-names'} ?? '');
$fullName = trim($givenNames . ' ' . $surname);
if (empty($fullName)) {
continue;
}
$email = '';
if (isset($contrib->email)) {
$email = strtolower(trim((string) $contrib->email));
}
$affiliation = '';
if (isset($contrib->xref)) {
foreach ($contrib->xref as $xref) {
if ((string) $xref['ref-type'] === 'aff') {
$rid = (string) $xref['rid'];
if (isset($affiliationMap[$rid])) {
$affiliation = $affiliationMap[$rid];
break;
}
}
}
}
if (empty($affiliation) && isset($contrib->aff)) {
$affiliation = $this->xmlNodeToString($contrib->aff);
}
$isCorresponding = false;
if (isset($contrib->xref)) {
foreach ($contrib->xref as $xref) {
if ((string) $xref['ref-type'] === 'corresp') {
$isCorresponding = true;
}
}
}
if ((string) ($contrib['corresp'] ?? '') === 'yes') {
$isCorresponding = true;
}
if (empty($email) && $isCorresponding && !empty($correspEmails)) {
$email = $correspEmails[0];
}
if (empty($email)) {
$extracted = $this->extractEmailFromText($affiliation);
if ($extracted) {
$email = $extracted;
}
}
if (empty($email)) {
continue;
}
$results[] = [
'name' => $fullName,
'email' => strtolower($email),
'affiliation' => $this->cleanAffiliation($affiliation),
'article_title' => $title,
'article_id' => $pmcId,
'journal' => $journal,
];
}
}
return $results;
}
// ==================== Pagination ====================
private function buildPagedResult($experts, $expertCount, $articlesScanned, $totalArticles, $page, $perPage, $source)
{
$totalPages = $totalArticles > 0 ? ceil($totalArticles / $perPage) : 0;
return [
'experts' => $experts,
'total' => $expertCount,
'articles_scanned' => $articlesScanned,
'total_articles' => $totalArticles,
'page' => $page,
'per_page' => $perPage,
'total_pages' => $totalPages,
'has_more' => $page < $totalPages,
'source' => $source,
];
}
// ==================== Aggregation ====================
private function aggregateExperts($authorRecords)
{
$map = [];
foreach ($authorRecords as $record) {
$key = strtolower(trim($record['email']));
if (empty($key)) {
continue;
}
if (!isset($map[$key])) {
$map[$key] = [
'name' => $record['name'],
'email' => $record['email'],
'affiliation' => $record['affiliation'],
'paper_count' => 0,
'papers' => [],
];
}
$map[$key]['paper_count']++;
if (count($map[$key]['papers']) < 10) {
$map[$key]['papers'][] = [
'title' => $record['article_title'],
'article_id' => $record['article_id'],
'journal' => $record['journal'],
];
}
if (empty($map[$key]['affiliation']) && !empty($record['affiliation'])) {
$map[$key]['affiliation'] = $record['affiliation'];
}
}
$experts = array_values($map);
usort($experts, function ($a, $b) {
return $b['paper_count'] - $a['paper_count'];
});
return $experts;
} }
// ==================== Excel Export ==================== // ==================== Excel Export ====================
@@ -880,188 +428,4 @@ class ExpertFinder extends Base
'count' => count($experts), 'count' => count($experts),
]); ]);
} }
// ==================== Database Storage ====================
private function saveExperts($experts, $field, $source)
{
$inserted = 0;
$existing = 0;
$fieldEnrich = 0;
foreach ($experts as $expert) {
$email = strtolower(trim($expert['email']));
if (empty($email)) {
continue;
}
$exists = Db::name('expert')->where('email', $email)->find();
if ($exists) {
$existing++;
$fieldEnrich += $this->enrichExpertField($exists['expert_id'], $field);
continue;
}
$insert = [
'name' => mb_substr($expert['name'], 0, 255),
'email' => mb_substr($email, 0, 128),
'affiliation' => mb_substr($expert['affiliation'], 0, 128),
'source' => mb_substr($source, 0, 128),
'ctime' => time(),
'ltime' => 0,
'state' => 0,
];
try {
$expertId = Db::name('expert')->insertGetId($insert);
$this->enrichExpertField($expertId, $field);
$inserted++;
} catch (\Exception $e) {
$existing++;
}
}
return ['inserted' => $inserted, 'existing' => $existing, 'field_enriched' => $fieldEnrich];
}
// ==================== Fetch Log (t_expert_fetch) ====================
private function getFetchLog($field, $source)
{
$log = Db::name('expert_fetch')
->where('field', $field)
->where('source', $source)
->find();
if (!$log) {
return ['last_page' => 0, 'total_pages' => 0, 'last_time' => 0];
}
return $log;
}
private function updateFetchLog($field, $source, $lastPage, $totalPages)
{
$exists = Db::name('expert_fetch')
->where('field', $field)
->where('source', $source)
->find();
if ($exists) {
Db::name('expert_fetch')
->where('expert_fetch_id', $exists['expert_fetch_id'])
->update([
'last_page' => $lastPage,
'total_pages' => $totalPages,
'last_time' => time(),
]);
} else {
Db::name('expert_fetch')->insert([
'field' => mb_substr($field, 0, 128),
'source' => mb_substr($source, 0, 128),
'last_page' => $lastPage,
'total_pages' => $totalPages,
'last_time' => time(),
]);
}
}
private function enrichExpertField($expertId, $field)
{
$field = trim($field);
if (empty($field)) {
return 0;
}
$exists = Db::name('expert_field')
->where('expert_id', $expertId)
->where('field', $field)
->where('state', 0)
->find();
if ($exists) {
return 0;
}
Db::name('expert_field')->insert([
'expert_id' => $expertId,
'major_id' => 0,
'field' => mb_substr($field, 0, 128),
'state' => 0,
]);
return 1;
}
// ==================== Helper Methods ====================
private function extractEmailFromText($text)
{
if (empty($text)) {
return '';
}
if (preg_match('/[Ee]lectronic address:\s*([^\s;,]+@[^\s;,]+)/', $text, $m)) {
return strtolower(trim($m[1], '.'));
}
if (preg_match('/[Ee]-?mail:\s*([^\s;,]+@[^\s;,]+)/', $text, $m)) {
return strtolower(trim($m[1], '.'));
}
if (preg_match('/\b([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})\b/', $text, $m)) {
return strtolower(trim($m[1], '.'));
}
return '';
}
private function extractEmailsFromNode($node, &$emails)
{
if ($node === null) {
return;
}
foreach ($node->children() as $child) {
if ($child->getName() === 'email') {
$email = strtolower(trim((string) $child));
if (!empty($email) && !in_array($email, $emails)) {
$emails[] = $email;
}
}
$this->extractEmailsFromNode($child, $emails);
}
$text = (string) $node;
if (preg_match_all('/\b([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})\b/', $text, $matches)) {
foreach ($matches[1] as $email) {
$email = strtolower(trim($email, '.'));
if (!in_array($email, $emails)) {
$emails[] = $email;
}
}
}
}
private function cleanAffiliation($text)
{
$text = preg_replace('/\s*[Ee]lectronic address:\s*[^\s;,]+@[^\s;,]+/', '', $text);
$text = preg_replace('/\s*[Ee]-?mail:\s*[^\s;,]+@[^\s;,]+/', '', $text);
$text = preg_replace('/\s*\b[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}\b/', '', $text);
$text = trim($text, " \t\n\r\0\x0B.,;");
return $text;
}
private function xmlNodeToString($node)
{
if ($node === null) {
return '';
}
$xml = $node->asXML();
$text = strip_tags($xml);
$text = html_entity_decode($text, ENT_QUOTES | ENT_XML1, 'UTF-8');
return trim(preg_replace('/\s+/', ' ', $text));
}
} }

View File

@@ -0,0 +1,651 @@
<?php
namespace app\api\controller;
use think\Db;
class ExpertManage extends Base
{
private $stateMap = [
0 => '待联系',
1 => '已发邮件',
2 => '已回复',
3 => '已投稿',
4 => '退信/无效',
5 => '黑名单(退订)',
];
public function __construct(\think\Request $request = null)
{
parent::__construct($request);
}
/**
* 专家列表(支持多条件筛选 + 分页)
*
* 参数:
* keyword - 搜索姓名/邮箱/单位
* field - 按领域关键词筛选
* major_id - 按学科ID筛选
* state - 状态筛选 (0-5, 传-1或不传则不过滤)
* source - 来源筛选
* pageIndex - 页码 (默认1)
* pageSize - 每页条数 (默认20)
*/
public function getList()
{
$data = $this->request->param();
$keyword = trim(isset($data['keyword']) ? $data['keyword'] : '');
$field = trim(isset($data['field']) ? $data['field'] : '');
$state = isset($data['state']) ? $data['state'] : '-1';
$source = trim(isset($data['source']) ? $data['source'] : '');
$page = max(1, intval(isset($data['pageIndex']) ? $data['pageIndex'] : 1));
$pageSize = max(1, intval(isset($data['pageSize']) ? $data['pageSize'] : 20));
$query = Db::name('expert')->alias('e');
$countQuery = Db::name('expert')->alias('e');
$needJoin = ($field !== '');
if ($needJoin) {
$query->join('t_expert_field ef', 'ef.expert_id = e.expert_id AND ef.state = 0', 'inner');
$countQuery->join('t_expert_field ef', 'ef.expert_id = e.expert_id AND ef.state = 0', 'inner');
if ($field !== '') {
$query->where('ef.field', 'like', '%' . $field . '%');
$countQuery->where('ef.field', 'like', '%' . $field . '%');
}
$query->group('e.expert_id');
$countQuery->group('e.expert_id');
}
if ($state !== '-1' && $state !== '') {
$query->where('e.state', intval($state));
$countQuery->where('e.state', intval($state));
}
if ($keyword !== '') {
$query->where('e.name|e.email|e.affiliation', 'like', '%' . $keyword . '%');
$countQuery->where('e.name|e.email|e.affiliation', 'like', '%' . $keyword . '%');
}
if ($source !== '') {
$query->where('e.source', $source);
$countQuery->where('e.source', $source);
}
// $countQuery = clone $query;
// $total = $countQuery->distinct('e.expert_id')->count();
$total = $needJoin ? count($countQuery->group('e.expert_id')->column('e.expert_id')) : $countQuery->count();
$list = $query
->field('e.*')
->order('e.ctime desc')
->page($page, $pageSize)
->select();
foreach ($list as &$item) {
$item['fields'] = Db::name('expert_field')
->where('expert_id', $item['expert_id'])
->where('state', 0)
->select();
$item['state_text'] = isset($this->stateMap[$item['state']]) ? $this->stateMap[$item['state']] : '未知';
$item['ctime_text'] = $item['ctime'] ? date('Y-m-d H:i:s', $item['ctime']) : '';
$item['ltime_text'] = $item['ltime'] ? date('Y-m-d H:i:s', $item['ltime']) : '';
}
return jsonSuccess([
'list' => $list,
'total' => $total,
'pageIndex' => $page,
'pageSize' => $pageSize,
'totalPages' => $total > 0 ? ceil($total / $pageSize) : 0,
]);
}
/**
* 获取专家详情(含所有领域)
*/
public function getDetail()
{
$expertId = intval($this->request->param('expert_id', 0));
if (!$expertId) {
return jsonError('expert_id is required');
}
$expert = Db::name('expert')->where('expert_id', $expertId)->find();
if (!$expert) {
return jsonError('专家不存在');
}
$expert['fields'] = Db::name('expert_field')
->where('expert_id', $expertId)
->where('state', 0)
->select();
$expert['state_text'] = isset($this->stateMap[$expert['state']]) ? $this->stateMap[$expert['state']] : '未知';
$expert['ctime_text'] = $expert['ctime'] ? date('Y-m-d H:i:s', $expert['ctime']) : '';
$expert['ltime_text'] = $expert['ltime'] ? date('Y-m-d H:i:s', $expert['ltime']) : '';
return jsonSuccess($expert);
}
/**
* 添加专家
*/
public function addExpert()
{
$data = $this->request->post();
$name = trim(isset($data['name']) ? $data['name'] : '');
$email = trim(isset($data['email']) ? $data['email'] : '');
if ($name === '' || $email === '') {
return jsonError('name和email不能为空');
}
$exists = Db::name('expert')->where('email', $email)->find();
if ($exists) {
return jsonError('该邮箱已存在expert_id=' . $exists['expert_id']);
}
$insert = [
'name' => $name,
'email' => $email,
'affiliation' => trim(isset($data['affiliation']) ? $data['affiliation'] : ''),
'source' => trim(isset($data['source']) ? $data['source'] : 'manual'),
'ctime' => time(),
'ltime' => 0,
'state' => 0,
];
$expertId = Db::name('expert')->insertGetId($insert);
if (!empty($data['fields'])) {
$this->saveExpertFields($expertId, $data['fields']);
}
return jsonSuccess(['expert_id' => $expertId]);
}
/**
* 编辑专家
*/
public function editExpert()
{
$data = $this->request->post();
$expertId = intval(isset($data['expert_id']) ? $data['expert_id'] : 0);
if (!$expertId) {
return jsonError('expert_id is required');
}
$expert = Db::name('expert')->where('expert_id', $expertId)->find();
if (!$expert) {
return jsonError('专家不存在');
}
$update = [];
if (isset($data['name'])) $update['name'] = trim($data['name']);
if (isset($data['email'])) $update['email'] = trim($data['email']);
if (isset($data['affiliation'])) $update['affiliation'] = trim($data['affiliation']);
if (isset($data['source'])) $update['source'] = trim($data['source']);
if (isset($data['state'])) $update['state'] = intval($data['state']);
if (!empty($update)) {
Db::name('expert')->where('expert_id', $expertId)->update($update);
}
if (isset($data['fields'])) {
Db::name('expert_field')->where('expert_id', $expertId)->where('state', 0)->update(['state' => 1]);
if (!empty($data['fields'])) {
$this->saveExpertFields($expertId, $data['fields']);
}
}
return jsonSuccess(['expert_id' => $expertId]);
}
/**
* 批量修改状态
*
* 参数:
* expert_ids - 逗号分隔的ID列表 "1,2,3"
* state - 目标状态 0-5
*/
public function updateState()
{
$data = $this->request->post();
$expertIds = isset($data['expert_ids']) ? $data['expert_ids'] : '';
$state = intval(isset($data['state']) ? $data['state'] : -1);
if (empty($expertIds)) {
return jsonError('expert_ids is required');
}
if ($state < 0 || $state > 5) {
return jsonError('state取值范围0-5');
}
$ids = array_map('intval', explode(',', $expertIds));
$count = Db::name('expert')->where('expert_id', 'in', $ids)->update(['state' => $state]);
return jsonSuccess(['updated' => $count]);
}
/**
* 删除专家软删除设为黑名单状态5
*
* 参数:
* expert_ids - 逗号分隔的ID列表
* hard - 传1则物理删除
*/
public function deleteExpert()
{
$data = $this->request->post();
$expertIds = isset($data['expert_ids']) ? $data['expert_ids'] : '';
$hard = intval(isset($data['hard']) ? $data['hard'] : 0);
if (empty($expertIds)) {
return jsonError('expert_ids is required');
}
$ids = array_map('intval', explode(',', $expertIds));
if ($hard) {
Db::name('expert_field')->where('expert_id', 'in', $ids)->delete();
$count = Db::name('expert')->where('expert_id', 'in', $ids)->delete();
} else {
$count = Db::name('expert')->where('expert_id', 'in', $ids)->update(['state' => 5]);
}
return jsonSuccess(['affected' => $count]);
}
/**
* 给专家添加领域
*/
public function addField()
{
$data = $this->request->post();
$expertId = intval(isset($data['expert_id']) ? $data['expert_id'] : 0);
$majorId = intval(isset($data['major_id']) ? $data['major_id'] : 0);
$field = trim(isset($data['field']) ? $data['field'] : '');
if (!$expertId || $field === '') {
return jsonError('expert_id和field不能为空');
}
$exists = Db::name('expert_field')
->where('expert_id', $expertId)
->where('field', $field)
->where('state', 0)
->find();
if ($exists) {
return jsonError('该领域已存在');
}
$id = Db::name('expert_field')->insertGetId([
'expert_id' => $expertId,
'major_id' => $majorId,
'field' => $field,
'state' => 0,
]);
return jsonSuccess(['expert_field_id' => $id]);
}
/**
* 删除领域(软删除)
*/
public function removeField()
{
$efId = intval($this->request->param('expert_field_id', 0));
if (!$efId) {
return jsonError('expert_field_id is required');
}
Db::name('expert_field')->where('expert_field_id', $efId)->update(['state' => 1]);
return jsonSuccess([]);
}
/**
* 获取所有不重复的领域列表(用于筛选下拉框)
*/
public function getFieldOptions()
{
$list = Db::name('expert_field')
->where('state', 0)
->group('field')
->column('field');
return jsonSuccess($list);
}
/**
* 获取所有来源列表(用于筛选下拉框)
*/
public function getSourceOptions()
{
$list = Db::name('expert')
->where('source', '<>', '')
->group('source')
->column('source');
return jsonSuccess($list);
}
/**
* 导出某个领域的专家为Excel
*
* 参数:
* field - 领域关键词(必填)
* major_id - 学科ID可选
* state - 状态筛选(可选,默认不过滤)
* keyword - 搜索姓名/邮箱/单位
* source - 来源筛选
*/
public function exportExcel()
{
$data = $this->request->param();
$field = trim(isset($data['field']) ? $data['field'] : '');
$state = isset($data['state']) ? $data['state'] : '-1';
$keyword = trim(isset($data['keyword']) ? $data['keyword'] : '');
$source = trim(isset($data['source']) ? $data['source'] : '');
$query = Db::name('expert')->alias('e');
if ($field !== '') {
$query->join('t_expert_field ef', 'ef.expert_id = e.expert_id AND ef.state = 0', 'inner');
if ($field !== '') {
$query->where('ef.field', 'like', '%' . $field . '%');
}
$query->group('e.expert_id');
}
if ($state !== '-1' && $state !== '') {
$query->where('e.state', intval($state));
}
if ($keyword !== '') {
$query->where('e.name|e.email|e.affiliation', 'like', '%' . $keyword . '%');
}
if ($source !== '') {
$query->where('e.source', $source);
}
$list = $query->field('e.*')->order('e.ctime desc')->select();
if (empty($list)) {
return jsonError('没有符合条件的数据可导出');
}
$expertIds = array_column($list, 'expert_id');
$allFields = Db::name('expert_field')
->where('expert_id', 'in', $expertIds)
->where('state', 0)
->select();
$fieldMap = [];
foreach ($allFields as $f) {
$fieldMap[$f['expert_id']][] = $f['field'];
}
vendor("PHPExcel.PHPExcel");
$objPHPExcel = new \PHPExcel();
$sheet = $objPHPExcel->getActiveSheet();
$sheet->setTitle('Expert List');
$headers = [
'A' => '#',
'B' => 'Name',
'C' => 'Email',
'D' => 'Affiliation',
'E' => 'Source',
'F' => 'Fields',
'G' => 'State',
'H' => 'Add Time',
'I' => 'Last Promotion',
];
foreach ($headers as $col => $header) {
$sheet->setCellValue($col . '1', $header);
}
$headerStyle = [
'font' => ['bold' => true, 'color' => ['rgb' => 'FFFFFF']],
'fill' => ['type' => \PHPExcel_Style_Fill::FILL_SOLID, 'startcolor' => ['rgb' => '4472C4']],
'alignment' => ['horizontal' => \PHPExcel_Style_Alignment::HORIZONTAL_CENTER],
];
$sheet->getStyle('A1:I1')->applyFromArray($headerStyle);
foreach ($list as $i => $item) {
$row = $i + 2;
$fields = isset($fieldMap[$item['expert_id']]) ? implode(', ', $fieldMap[$item['expert_id']]) : '';
$stateText = isset($this->stateMap[$item['state']]) ? $this->stateMap[$item['state']] : '未知';
$sheet->setCellValue('A' . $row, $i + 1);
$sheet->setCellValue('B' . $row, $item['name']);
$sheet->setCellValueExplicit('C' . $row, $item['email'], \PHPExcel_Cell_DataType::TYPE_STRING);
$sheet->setCellValue('D' . $row, $item['affiliation']);
$sheet->setCellValue('E' . $row, $item['source']);
$sheet->setCellValue('F' . $row, $fields);
$sheet->setCellValue('G' . $row, $stateText);
$sheet->setCellValue('H' . $row, $item['ctime'] ? date('Y-m-d H:i:s', $item['ctime']) : '');
$sheet->setCellValue('I' . $row, $item['ltime'] ? date('Y-m-d H:i:s', $item['ltime']) : '');
}
$sheet->getColumnDimension('A')->setWidth(6);
$sheet->getColumnDimension('B')->setWidth(25);
$sheet->getColumnDimension('C')->setWidth(35);
$sheet->getColumnDimension('D')->setWidth(40);
$sheet->getColumnDimension('E')->setWidth(15);
$sheet->getColumnDimension('F')->setWidth(50);
$sheet->getColumnDimension('G')->setWidth(15);
$sheet->getColumnDimension('H')->setWidth(20);
$sheet->getColumnDimension('I')->setWidth(20);
$label = $field !== '' ? preg_replace('/[^a-zA-Z0-9_\x{4e00}-\x{9fa5}]/u', '_', $field) : 'all';
$filename = 'expert_' . $label . '_' . date('Ymd_His') . '.xlsx';
$dir = ROOT_PATH . 'public' . DS . 'exports';
if (!is_dir($dir)) {
mkdir($dir, 0777, true);
}
$filepath = $dir . DS . $filename;
$writer = \PHPExcel_IOFactory::createWriter($objPHPExcel, 'Excel2007');
$writer->save($filepath);
return jsonSuccess([
'file_url' => '/exports/' . $filename,
'file_name' => $filename,
'count' => count($list),
]);
}
/**
* 批量保存专家领域
* @param int $expertId
* @param array $fields [{"major_id":1,"field":"xxx"}, ...]
*/
private function saveExpertFields($expertId, $fields)
{
if (is_string($fields)) {
$fields = json_decode($fields, true);
}
if (!is_array($fields)) {
return;
}
foreach ($fields as $f) {
$majorId = intval(isset($f['major_id']) ? $f['major_id'] : 0);
$fieldName = trim(isset($f['field']) ? $f['field'] : '');
if ($fieldName === '') continue;
$exists = Db::name('expert_field')
->where('expert_id', $expertId)
->where('field', $fieldName)
->where('state', 0)
->find();
if ($exists) continue;
Db::name('expert_field')->insert([
'expert_id' => $expertId,
'major_id' => $majorId,
'field' => $fieldName,
'state' => 0,
]);
}
}
// ==================== Expert Fetch Field Management ====================
/**
* 获取抓取领域列表
* 参数: state(-1不过滤), keyword(搜索field), pageIndex, pageSize
*/
public function getFetchList()
{
$data = $this->request->param();
$state = isset($data['state']) ? $data['state'] : '-1';
$keyword = trim(isset($data['keyword']) ? $data['keyword'] : '');
$page = max(1, intval(isset($data['pageIndex']) ? $data['pageIndex'] : 1));
$pageSize = max(1, intval(isset($data['pageSize']) ? $data['pageSize'] : 20));
$query = Db::name('expert_fetch');
if ($state !== '-1' && $state !== '') {
$query->where('state', intval($state));
}
if ($keyword !== '') {
$query->where('field', 'like', '%' . $keyword . '%');
}
$countQuery = Db::name('expert_fetch');
if ($state !== '-1' && $state !== '') {
$countQuery->where('state', intval($state));
}
if ($keyword !== '') {
$countQuery->where('field', 'like', '%' . $keyword . '%');
}
$total = $countQuery->count();
$list = $query->order('expert_fetch_id desc')->page($page, $pageSize)->select();
foreach ($list as &$item) {
$item['last_time_text'] = $item['last_time'] ? date('Y-m-d H:i:s', $item['last_time']) : '';
$item['ctime_text'] = $item['ctime'] ? date('Y-m-d H:i:s', $item['ctime']) : '';
$fieldName = trim($item['field']);
$item['journal_count'] = Db::name('journal_promotion_field')
->where('expert_fetch_id', $item['expert_fetch_id'])
->where('state', 0)
->count();
// 与推广选人一致:按字段关键词匹配,统计可用专家(去重)
$item['expert_count'] = Db::name('expert_field')->alias('ef')
->join('t_expert e', 'e.expert_id = ef.expert_id', 'inner')
->where('ef.state', 0)
->where('e.state', 0)
->where('ef.field', 'like', '%' . $fieldName . '%')
->count('distinct ef.expert_id');
}
return jsonSuccess([
'list' => $list,
'total' => $total,
'pageIndex' => $page,
'pageSize' => $pageSize,
'totalPages' => $total > 0 ? ceil($total / $pageSize) : 0,
]);
}
/**
* 新增抓取领域
* 参数: field(必填), source(选填,默认pubmed)
*/
public function addFetchField()
{
$data = $this->request->post();
$field = trim(isset($data['field']) ? $data['field'] : '');
$source = trim(isset($data['source']) ? $data['source'] : 'pubmed');
if ($field === '') {
return jsonError('field不能为空');
}
$exists = Db::name('expert_fetch')
->where('field', $field)
->where('source', $source)
->find();
if ($exists) {
if ($exists['state'] == 1) {
Db::name('expert_fetch')
->where('expert_fetch_id', $exists['expert_fetch_id'])
->update(['state' => 0]);
return jsonSuccess(['expert_fetch_id' => $exists['expert_fetch_id'], 'msg' => 'reactivated']);
}
return jsonError('该领域已存在 (expert_fetch_id=' . $exists['expert_fetch_id'] . ')');
}
$id = Db::name('expert_fetch')->insertGetId([
'field' => mb_substr($field, 0, 128),
'source' => mb_substr($source, 0, 128),
'last_page' => 0,
'total_pages' => 0,
'last_time' => 0,
'state' => 0,
'ctime' => time(),
]);
return jsonSuccess(['expert_fetch_id' => $id]);
}
/**
* 编辑抓取领域
* 参数: expert_fetch_id(必填), field, source, state
*/
public function editFetchField()
{
$data = $this->request->post();
$id = intval(isset($data['expert_fetch_id']) ? $data['expert_fetch_id'] : 0);
if (!$id) {
return jsonError('expert_fetch_id is required');
}
$record = Db::name('expert_fetch')->where('expert_fetch_id', $id)->find();
if (!$record) {
return jsonError('记录不存在');
}
$update = [];
if (isset($data['field'])) $update['field'] = mb_substr(trim($data['field']), 0, 128);
if (isset($data['source'])) $update['source'] = mb_substr(trim($data['source']), 0, 128);
if (isset($data['state'])) $update['state'] = intval($data['state']);
if (!empty($update)) {
Db::name('expert_fetch')->where('expert_fetch_id', $id)->update($update);
}
return jsonSuccess(['expert_fetch_id' => $id]);
}
/**
* 删除/停用抓取领域(软删除 state=1
* 参数: expert_fetch_id(必填), hard(传1物理删除)
*/
public function deleteFetchField()
{
$data = $this->request->post();
$id = intval(isset($data['expert_fetch_id']) ? $data['expert_fetch_id'] : 0);
$hard = intval(isset($data['hard']) ? $data['hard'] : 0);
if (!$id) {
return jsonError('expert_fetch_id is required');
}
if ($hard) {
Db::name('journal_promotion_field')->where('expert_fetch_id', $id)->delete();
Db::name('expert_fetch')->where('expert_fetch_id', $id)->delete();
} else {
Db::name('expert_fetch')->where('expert_fetch_id', $id)->update(['state' => 1]);
}
return jsonSuccess([]);
}
}

View File

@@ -334,6 +334,14 @@ class Journal extends Base {
$aJournalUpdate['wechat_app_secret'] = $update['wechat_app_secret']; $aJournalUpdate['wechat_app_secret'] = $update['wechat_app_secret'];
} }
if(isset($data['editor_name'])&&$data['editor_name']!=''){
$update['editor_name'] = $data['editor_name'];
}
if(isset($data['databases'])&&$data['databases']!=''){
$update['databases'] = $data['databases'];
}
if(!empty($aJournalUpdate)){ if(!empty($aJournalUpdate)){
$aJournalUpdate['issn'] = $journal_info['issn']; $aJournalUpdate['issn'] = $journal_info['issn'];
$sUrl = $this->sJournalUrl."wechat/Article/updateJournal"; $sUrl = $this->sJournalUrl."wechat/Article/updateJournal";

View File

@@ -0,0 +1,261 @@
<?php
namespace app\api\controller;
use think\Db;
use think\Validate;
class MailTemplate extends Base
{
/**
* Create or update a mail template (V2)
* POST fields:
* - template_id (optional)
* - journal_id, scene, language, title, subject, body_html, body_text, variables_json, version, is_active
*/
public function saveTemplate()
{
$data = $this->request->post();
$rule = new Validate([
'journal_id' => 'require|number',
'scene' => 'require',
'language' => 'require',
'title' => 'require',
'subject' => 'require',
]);
if (!$rule->check($data)) {
return jsonError($rule->getError());
}
$payload = [
'journal_id' => intval($data['journal_id']),
'scene' => trim($data['scene']),
'language' => trim($data['language']),
'title' => trim($data['title']),
'subject' => trim($data['subject']),
'body_html' => $data['body_html'] ?? '',
'body_text' => $data['body_text'] ?? '',
'variables_json' => $data['variables_json'] ?? '',
'version' => intval($data['version'] ?? 1),
'is_active' => intval($data['is_active'] ?? 1),
'utime' => time(),
];
$templateId = intval($data['template_id'] ?? 0);
if ($templateId) {
$exists = Db::name('mail_template')->where('template_id', $templateId)->where('state', 0)->find();
if (!$exists) {
return jsonError('Template not found');
}
Db::name('mail_template')->where('template_id', $templateId)->update($payload);
return jsonSuccess(['template_id' => $templateId]);
}
$payload['ctime'] = time();
$payload['state'] = 0;
$newId = Db::name('mail_template')->insertGetId($payload);
return jsonSuccess(['template_id' => $newId]);
}
/**
* Soft delete a template
* Params: template_id
*/
public function deleteTemplate()
{
$templateId = intval($this->request->param('template_id', 0));
if (!$templateId) {
return jsonError('template_id is required');
}
Db::name('mail_template')->where('template_id', $templateId)->update(['state' => 1, 'utime' => time()]);
return jsonSuccess([]);
}
/**
* Get template detail
*/
public function getTemplate()
{
$templateId = intval($this->request->param('template_id', 0));
if (!$templateId) {
return jsonError('template_id is required');
}
$tpl = Db::name('mail_template')->where('template_id', $templateId)->where('state', 0)->find();
if (!$tpl) {
return jsonError('Template not found');
}
return jsonSuccess(['template' => $tpl]);
}
/**
* List templates
* Params: journal_id, scene(optional), language(optional), is_active(optional)
*/
public function listTemplates()
{
$journalId = intval($this->request->param('journal_id', 0));
if (!$journalId) {
return jsonError('journal_id is required');
}
$scene = trim($this->request->param('scene', ''));
$language = trim($this->request->param('language', ''));
$isActive = $this->request->param('is_active', '');
$where = ['journal_id' => $journalId, 'state' => 0];
if ($scene !== '') $where['scene'] = $scene;
if ($language !== '') $where['language'] = $language;
if ($isActive !== '') $where['is_active'] = intval($isActive);
$list = Db::name('mail_template')
->where($where)
->order('is_active desc, utime desc, template_id desc')
->select();
return jsonSuccess(['list' => $list]);
}
public function listTemplatesAll(){
$list = Db::name('mail_template')
->where('state', 0)
->order('is_active desc, utime desc, template_id desc')
->select();
return jsonSuccess(['list'=>$list]);
}
/**
* Create or update a global mail style
* 当前 style 表字段:
* - style_id, name, description, header_html, footer_html, state, ctime
*/
public function saveStyle()
{
$data = $this->request->post();
$rule = new Validate([
'name' => 'require',
]);
if (!$rule->check($data)) {
return jsonError($rule->getError());
}
$payload = [
'name' => trim($data['name']),
'description' => trim($data['description'] ?? ''),
'header_html' => $data['header_html'] ?? '',
'footer_html' => $data['footer_html'] ?? '',
'state' => 0,
];
$styleId = intval($data['style_id'] ?? 0);
if ($styleId) {
$exists = Db::name('mail_style')->where('style_id', $styleId)->where('state', 0)->find();
if (!$exists) {
return jsonError('Style not found');
}
Db::name('mail_style')->where('style_id', $styleId)->update($payload);
return jsonSuccess(['style_id' => $styleId]);
}
$payload['ctime'] = time();
$newId = Db::name('mail_style')->insertGetId($payload);
return jsonSuccess(['style_id' => $newId]);
}
/**
* Delete a global mail style (soft delete)
*/
public function deleteStyle()
{
$styleId = intval($this->request->param('style_id', 0));
if (!$styleId) {
return jsonError('style_id is required');
}
Db::name('mail_style')->where('style_id', $styleId)->update(['state' => 1]);
return jsonSuccess([]);
}
/**
* Get style detail
*/
public function getStyle()
{
$styleId = intval($this->request->param('style_id', 0));
if (!$styleId) {
return jsonError('style_id is required');
}
$style = Db::name('mail_style')->where('style_id', $styleId)->where('state', 0)->find();
if (!$style) {
return jsonError('Style not found');
}
return jsonSuccess(['style' => $style]);
}
/**
* List styles
* 现在样式不再按 scene / language 区分,只返回全部正常状态的样式
*/
public function listStyles()
{
$where = ['state' => 0];
$list = Db::name('mail_style')
->where($where)
->order('style_id desc')
->select();
return jsonSuccess(['list' => $list]);
}
/**
* Render preview for a template预览效果没啥用
* Params: template_id, vars (json string)
*/
public function preview()
{
$templateId = intval($this->request->param('template_id', 0));
$varsJson = $this->request->param('vars', '');
if (!$templateId) {
return jsonError('template_id is required');
}
$tpl = Db::name('mail_template')->where('template_id', $templateId)->where('state', 0)->find();
if (!$tpl) {
return jsonError('Template not found');
}
$vars = [];
if ($varsJson) {
$decoded = json_decode($varsJson, true);
if (is_array($decoded)) $vars = $decoded;
}
$subject = $this->render($tpl['subject'], $vars);
$body = $this->render($tpl['body_html'], $vars);
// For preview we do not enforce a style; caller can combine with style if needed
return jsonSuccess(['subject' => $subject, 'body' => $body]);
}
private function render($tpl, $vars)
{
if (!is_string($tpl) || empty($tpl)) return '';
if (!is_array($vars) || empty($vars)) return $tpl;
$replace = [];
foreach ($vars as $k => $v) {
$key = trim((string)$k);
if ($key === '') continue;
$replace['{{' . $key . '}}'] = (string)$v;
// backward compatible placeholders
$replace['{' . $key . '}'] = (string)$v;
}
return str_replace(array_keys($replace), array_values($replace), $tpl);
}
}

View File

@@ -843,14 +843,14 @@ class Preaccept extends Base
} }
$am_info = $this->article_main_obj->where("am_id",$data['am_id'])->find(); $am_info = $this->article_main_obj->where("am_id",$data['am_id'])->find();
//上一行,空行 //上一行,空行
$p_list = $this->article_main_obj->where("article_id",$am_info['article_id'])->where("sort","<",$am_info['sort'])->whereIn("state",[0,2])->order("sort desc")->limit(1)->select(); // $p_list = $this->article_main_obj->where("article_id",$am_info['article_id'])->where("sort","<",$am_info['sort'])->whereIn("state",[0,2])->order("sort desc")->limit(1)->select();
if($p_list&&($p_list[0]['type']>0||$p_list[0]['content']!="")){ // if($p_list&&($p_list[0]['type']>0||$p_list[0]['content']!="")){
$this->addBRow($am_info['article_id'],$p_list[0]['am_id']); // $this->addBRow($am_info['article_id'],$p_list[0]['am_id']);
} // }
$n_list = $this->article_main_obj->where("article_id",$am_info['article_id'])->where("sort",">",$am_info['sort'])->whereIn("state",[0,2])->order("sort asc")->limit(1)->select(); // $n_list = $this->article_main_obj->where("article_id",$am_info['article_id'])->where("sort",">",$am_info['sort'])->whereIn("state",[0,2])->order("sort asc")->limit(1)->select();
if($n_list[0]['type']>0||$n_list[0]['content']!=""){ // if($n_list[0]['type']>0||$n_list[0]['content']!=""){
$this->addBRow($am_info['article_id'],$data['am_id']); // $this->addBRow($am_info['article_id'],$data['am_id']);
} // }
$this->article_main_obj->where("am_id",$data['am_id'])->update(["is_h1"=>1,"is_h2"=>0,"is_h3"=>0]); $this->article_main_obj->where("am_id",$data['am_id'])->update(["is_h1"=>1,"is_h2"=>0,"is_h3"=>0]);
// return jsonSuccess([]); // return jsonSuccess([]);
//返回数据 20260119 start //返回数据 20260119 start
@@ -872,10 +872,10 @@ class Preaccept extends Base
} }
$am_info = $this->article_main_obj->where("am_id",$data['am_id'])->find(); $am_info = $this->article_main_obj->where("am_id",$data['am_id'])->find();
//上一行,空行 //上一行,空行
$p_list = $this->article_main_obj->where("article_id",$am_info['article_id'])->where("sort","<",$am_info['sort'])->whereIn("state",[0,2])->order("sort desc")->limit(1)->select(); // $p_list = $this->article_main_obj->where("article_id",$am_info['article_id'])->where("sort","<",$am_info['sort'])->whereIn("state",[0,2])->order("sort desc")->limit(1)->select();
if($p_list&&($p_list[0]['type']>0||$p_list[0]['content']!="")){ // if($p_list&&($p_list[0]['type']>0||$p_list[0]['content']!="")){
$this->addBRow($am_info['article_id'],$p_list[0]['am_id']); // $this->addBRow($am_info['article_id'],$p_list[0]['am_id']);
} // }
$this->article_main_obj->where("am_id",$data['am_id'])->update(["is_h1"=>0,"is_h2"=>1,"is_h3"=>0]); $this->article_main_obj->where("am_id",$data['am_id'])->update(["is_h1"=>0,"is_h2"=>1,"is_h3"=>0]);
// return jsonSuccess([]); // return jsonSuccess([]);
//返回数据 20260119 start //返回数据 20260119 start

View File

@@ -0,0 +1,695 @@
<?php
namespace app\api\controller;
use app\api\controller\Base;
use app\common\CitationRelevanceService;
use app\common\CrossrefService;
use app\common\PubmedService;
use think\Db;
use think\Env;
/**
* @title 参考文献
* @description 相关方法汇总
*/
class References extends Base
{
public function __construct(\think\Request $request = null) {
parent::__construct($request);
}
//OPENAI token
private $sApiKey = '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';
//OPENAI URL
private $sApiUrl = 'https://api.openai.com/v1/chat/completions';
/**
* 获取参考文献的信息
* @param p_refer_id 主键ID
*/
public function get($aParam = []){
//获取参数
$aParam = empty($aParam) ? $this->request->post() : $aParam;
//必填值验证
$iPReferId = empty($aParam['p_refer_id']) ? '' : $aParam['p_refer_id'];
if(empty($iPReferId)){
return json_encode(['status' => 2,'msg' => 'Please select the reference to be queried']);
}
$aWhere = ['p_refer_id' => $iPReferId,'state' => 0];
$aRefer = Db::name('production_article_refer')->where($aWhere)->find();
if(empty($aRefer)){
return json_encode(['status' => 4,'msg' => 'Reference is empty']);
}
//获取文章信息
$aParam['p_article_id'] = $aRefer['p_article_id'];
$aArticle = $this->getArticle($aParam);
$iStatus = empty($aArticle['status']) ? 0 : $aArticle['status'];
if($iStatus != 1){
return json_encode($aArticle);
}
$aArticle = empty($aArticle['data']) ? [] : $aArticle['data'];
if(empty($aArticle)){
return json_encode(['status' => 3,'msg' => 'The article does not exist']);
}
//获取参考文献信息作者名.文章题目.期刊名缩写.年卷页.Available at: //https://doi.org/xxxxx
//作者
$sData = $aRefer['refer_frag'];
if($aRefer['refer_type'] == 'journal'){
if(!empty($aRefer['doilink'])){
$sAuthor = empty($aRefer['author']) ? '' : trim(trim($aRefer['author']),'.');
if(!empty($sAuthor)){
$aAuthor = explode(',', $sAuthor);
if(count($aAuthor) > 3){
$sAuthor = implode(',', array_slice($aAuthor, 0,3));
$sAuthor .= ', et al';
}
if(count($aAuthor) <= 3 ){
$sAuthor = implode(',', $aAuthor);
}
}
//文章标题
$sTitle = empty($aRefer['title']) ? '' : trim(trim($aRefer['title']),'.');
//期刊名缩写
$sJoura = empty($aRefer['joura']) ? '' : trim(trim($aRefer['joura']),'.');
//年卷页
$sDateno = empty($aRefer['dateno']) ? '' : trim(trim($aRefer['dateno']),'.');
//DOI
$sDoilink = empty($aRefer['doilink']) ? '' : trim($aRefer['doilink']);
if(!empty($sDoilink)){
$sDoilink = strpos($sDoilink ,"http")===false ? "https://doi.org/".$sDoilink : $sDoilink;
$sDoilink = str_replace('http://doi.org/', 'https://doi.org/', $sDoilink);
}
$sReferDoi = empty($aRefer['refer_doi']) ? '' : trim($aRefer['refer_doi']);
if(!empty($sReferDoi)){
$sReferDoi = strpos($sReferDoi ,"http")===false ? "https://doi.org/".$sReferDoi : $sReferDoi;
$sReferDoi = str_replace('http://doi.org/', 'https://doi.org/', $sReferDoi);
}
$sDoilink = empty($sDoilink) ? $sReferDoi : $sDoilink;
$sData = $sAuthor.'.'.$sTitle.'.'.$sJoura.'.'.$sDateno.".Available at:\n".$sDoilink;
}
}
if($aRefer['refer_type'] == 'book'){
$sAuthor = empty($aRefer['author']) ? '' : trim(trim($aRefer['author']),'.');
if(!empty($sAuthor)){
$aAuthor = explode(',', $sAuthor);
if(count($aAuthor) > 3){
$sAuthor = implode(',', array_slice($aAuthor, 0,3));
$sAuthor .= ', et al';
}
if(count($aAuthor) <= 3 ){
$sAuthor = implode(',', $aAuthor);
}
}
//文章标题
$sTitle = empty($aRefer['title']) ? '' : trim(trim($aRefer['title']),'.');
//期刊名缩写
$sJoura = empty($aRefer['joura']) ? '' : trim(trim($aRefer['joura']),'.');
//年卷页
$sDateno = empty($aRefer['dateno']) ? '' : trim(trim($aRefer['dateno']),'.');
//DOI
$sDoilink = empty($aRefer['isbn']) ? '' : trim($aRefer['isbn']);
$sData = $sAuthor.'.'.$sTitle.'.'.$sJoura.'.'.$sDateno.".Available at:\n".$sDoilink;
}
$aRefer['deal_content'] = $sData;
return json_encode(['status' => 1,'msg' => 'success','data' => $aRefer]);
}
/**
* 参考文献鉴别:正文引用上下文 + PubMed/Crossref + 大模型向量相似度
* 参数p_refer_id必填
* 环境变量可选citation_chat_url、citation_chat_model、citation_chat_api_key、citation_chat_timeout、crossref_mailto、pubmed_email
*/
public function checkCitationRelevance($aParam = [])
{
$aParam = empty($aParam) ? $this->request->post() : $aParam;
$pReferId = intval(isset($aParam['p_refer_id']) ? $aParam['p_refer_id'] : 0);
if (!$pReferId) {
return jsonError('p_refer_id is required');
}
$refer = Db::name('production_article_refer')
->where('p_refer_id', $pReferId)
->where('state', 0)
->find();
if (empty($refer)) {
return jsonError('Reference not found');
}
$aArticle = $this->getArticle(['p_article_id' => $refer['p_article_id']]);
$iStatus = empty($aArticle['status']) ? 0 : $aArticle['status'];
if ($iStatus != 1) {
return json_encode($aArticle);
}
$aArticle = empty($aArticle['data']) ? [] : $aArticle['data'];
if (empty($aArticle['article_id'])) {
return jsonError('Article not found');
}
$articleId = intval($aArticle['article_id']);
$mains = Db::name('article_main')
->where('article_id', $articleId)
->whereIn('state', [0, 2])
->order('sort asc')
->select();
if (empty($mains)) {
return jsonError('article_main is empty');
}
$citationMark = intval($refer['index']) + 1;
$context = $this->extractCitationContextFromMains($mains, $citationMark);
if ($context === '') {
return jsonError('Citation context not found in article_main for mark [' . $citationMark . ']');
}
$apiKey = trim((string)Env::get('citation_chat_api_key', ''));
if ($apiKey === '') {
return jsonError('Please set env citation_chat_api_key for embedding via chat');
}
$config = [
'chat_url' => trim((string)Env::get('citation_chat_url', 'http://chat.taimed.cn/v1/chat/completions')),
'chat_model' => trim((string)Env::get('citation_chat_model', 'DeepSeek-Coder-V2-Instruct')),
'timeout' => max(60, intval(Env::get('citation_chat_timeout', 180))),
'embedding_dim' => max(32, intval(Env::get('citation_embedding_dim', 256))),
'embedding_headers' => [
'Authorization: Bearer ' . $apiKey,
],
];
$pubmed = new PubmedService([
'email' => trim((string)Env::get('pubmed_email', '')),
'tool' => trim((string)Env::get('pubmed_tool', 'tmrjournals')),
]);
$crossref = new CrossrefService([
'mailto' => trim((string)Env::get('crossref_mailto', '')),
]);
$svc = new CitationRelevanceService($pubmed, $crossref, $config);
$qc = $svc->checkOne($context, $refer, []);
return jsonSuccess([
'p_refer_id' => $pReferId,
'citation_mark' => $citationMark,
'refer_index' => intval($refer['index']),
'context' => $context,
'problem_flag' => $qc['problem_flag'] ?? '',
'problem_reason' => $qc['problem_reason'] ?? '',
'relevance_flag' => $qc['relevance_flag'] ?? '',
'relevance_score'=> $qc['relevance_score'] ?? 0,
'reason' => $qc['reason'] ?? '',
'pubmed' => $qc['pubmed'] ?? [],
]);
}
/**
* 从 t_article_main 拼接正文,按 [n] 定位句子并取前后各 1 句作为上下文
*/
private function extractCitationContextFromMains(array $mains, int $citationMark): string
{
if ($citationMark <= 0) {
return '';
}
$chunks = [];
foreach ($mains as $row) {
$text = isset($row['content']) ? (string)$row['content'] : '';
if ($text === '') {
continue;
}
$text = preg_replace('/<\s*\/?\s*blue[^>]*>/i', '', $text);
$text = strip_tags($text);
$text = html_entity_decode($text, ENT_QUOTES | ENT_HTML5, 'UTF-8');
$text = preg_replace('/\s+/u', ' ', trim($text));
if ($text !== '') {
$chunks[] = $text;
}
}
$fullText = implode("\n", $chunks);
if ($fullText === '') {
return '';
}
$sentences = $this->splitEnglishSentences($fullText);
$pattern = '/\[' . preg_quote((string)$citationMark, '/') . '\]/';
foreach ($sentences as $si => $sent) {
if (!preg_match($pattern, $sent)) {
continue;
}
$start = max(0, $si - 1);
$end = min(count($sentences) - 1, $si + 1);
$ctx = implode(' ', array_slice($sentences, $start, $end - $start + 1));
return trim(preg_replace('/\s+/u', ' ', $ctx));
}
return '';
}
private function splitEnglishSentences(string $text): array
{
$text = trim($text);
if ($text === '') {
return [];
}
$text = preg_replace('/\s+/u', ' ', $text);
$parts = preg_split('/(?<=[\.\?\!])\s+/', $text);
$out = [];
foreach ($parts as $p) {
$p = trim((string)$p);
if ($p !== '') {
$out[] = $p;
}
}
return $out;
}
/**
* 修改参考文献的信息
* @param p_refer_id 主键ID
*/
public function modify($aParam = []){
//获取参数
$aParam = empty($aParam) ? $this->request->post() : $aParam;
//必填值验证
$iPReferId = empty($aParam['p_refer_id']) ? '' : $aParam['p_refer_id'];
if(empty($iPReferId)){
return json_encode(['status' => 2,'msg' => 'Please select the reference to be queried']);
}
$sContent = empty($aParam['content']) ? '' : $aParam['content'];
if(empty($sContent)){
return json_encode(['status' => 2,'msg' => 'Please enter the modification content']);
}
if(!is_string($sContent)){
return json_encode(['status' => 2,'msg' => 'The content format is incorrect']);
}
//获取参考文献信息
$aWhere = ['p_refer_id' => $iPReferId,'state' => 0];
$aRefer = Db::name('production_article_refer')->where($aWhere)->find();
if(empty($aRefer)){
return json_encode(['status' => 4,'msg' => 'Reference is empty']);
}
//获取文章信息
$aParam['p_article_id'] = $aRefer['p_article_id'];
$aArticle = $this->getArticle($aParam);
$iStatus = empty($aArticle['status']) ? 0 : $aArticle['status'];
if($iStatus != 1){
return json_encode($aArticle);
}
$aArticle = empty($aArticle['data']) ? [] : $aArticle['data'];
if(empty($aArticle)){
return json_encode(['status' => 3,'msg' => 'The article does not exist']);
}
//数据处理
$aContent = json_decode($this->dealContent(['content' => $sContent]),true);
$aUpdate = empty($aContent['data']) ? [] : $aContent['data'];
if(empty($aUpdate)){
return json_encode(['status' => 5,'msg' => 'The content format is incorrect']);
}
$aUpdate['refer_content'] = $sContent;
$aUpdate['is_change'] = 1;
$aUpdate['update_time'] = time();
//更新数据
$aWhere = ['p_refer_id' => $iPReferId,'state' => 0];
$result = Db::name('production_article_refer')->where($aWhere)->limit(1)->update($aUpdate);
if($result === false){
return json_encode(['status' => 6,'msg' => 'Update failed']);
}
return json_encode(['status' => 1,'msg' => 'success']);
}
/**
* 处理参考文献的信息
* @param p_refer_id 主键ID
*/
public function dealContent($aParam = []){
//获取参数
$aParam = empty($aParam) ? $this->request->post() : $aParam;
//必填验证
$sContent = empty($aParam['content']) ? '' : $aParam['content'];
if(empty($sContent)){
return json_encode(['status' => 2,'msg' => 'Please enter the modification content']);
}
if(!is_string($sContent)){
return json_encode(['status' => 2,'msg' => 'The content format is incorrect']);
}
$sContent = str_replace(['?',''], '.', $sContent);
$aContent = explode('.', $sContent);
$aUpdate = [];
if(count($aContent) > 1){
$aField = [0 => 'author',1 => 'title', 2 => 'joura',3 => 'dateno'];
$aStart = array_slice($aContent, 0,4);
foreach ($aStart as $key => $value) {
if(empty($value)){
continue;
}
$aUpdate[$aField[$key]] = trim(trim($value),'.');
}
$sDoi = empty(array_slice($aContent, 4)) ? '' : implode('.', array_slice($aContent, 4));
// 匹配http/https开头的URL正则
$urlPattern = '/https?:\/\/[^\s<>"]+|http?:\/\/[^\s<>"]+/i';
// 执行匹配preg_match_all返回所有结果
preg_match_all($urlPattern, $sDoi, $matches);
if(!empty($matches[0])){
$sDoi = implode(',', array_unique($matches[0]));
}
if(empty($sDoi)){
return json_encode(['status' => 4,'msg' => 'Reference DOI is empty']);
}
$sDoi = trim(trim($sDoi),':');
$sDoi = strpos($sDoi ,"http")===false ? "https://doi.org/".$sDoi : $sDoi;
$sDoi = str_replace('http://doi.org/', 'https://doi.org/', $sDoi);
$aUpdate['doilink'] = $sDoi;
//$doiPattern = '/10\.\d{4,9}\/[^\s\/?#&=]+/i';
$doiPattern = '/\b10\.\d+(?:\.\d+)*\/[^\s?#&=]+/i';
if (preg_match($doiPattern, $sDoi, $matches)) {
$aUpdate['doi'] = $matches[0];
$aUpdate['doilink'] = 'https://doi.org/'.''.$aUpdate['doi'];
}else{
$aUpdate['doi'] = $sDoi;
}
if(!empty($aUpdate['author'])){
$aUpdate['author'] = trim(trim($aUpdate['author'])).'.';
}
}
return json_encode(['status' => 1,'msg' => 'success','data' => $aUpdate]);
}
/**
* 获取文章信息
*/
private function getArticle($aParam = []){
//获取参数
$aParam = empty($aParam) ? $this->request->post() : $aParam;
//获取生产文章信息
$iPArticleId = empty($aParam['p_article_id']) ? 0 : $aParam['p_article_id'];
if(empty($iPArticleId)){
return ['status' => 2,'msg' => 'Please select the article to query'];
}
$aWhere = ['p_article_id' => $iPArticleId,'state' => ['in',[0,2]]];
$aProductionArticle = Db::name('production_article')->field('article_id')->where($aWhere)->find();
$iArticleId = empty($aProductionArticle['article_id']) ? 0 : $aProductionArticle['article_id'];
if(empty($iArticleId)) {
return ['status' => 2,'msg' => 'No articles found'];
}
//查询条件
$aWhere = ['article_id' => $iArticleId,'state' => ['in',[5,6]]];
$aArticle = Db::name('article')->field('article_id')->where($aWhere)->find();
if(empty($aArticle)){
return ['status' => 3,'msg' => 'The article does not exist or has not entered the editorial reference status'];
}
$aArticle['p_article_id'] = $iPArticleId;
return ['status' => 1,'msg' => 'success','data' => $aArticle];
}
/**
* AI检测
*/
public function checkByAi($aParam = []){
//获取参数
$aParam = empty($aParam) ? $this->request->post() : $aParam;
//获取文章信息
$aArticle = $this->getArticle($aParam);
$iStatus = empty($aArticle['status']) ? 0 : $aArticle['status'];
if($iStatus != 1){
return json_encode($aArticle);
}
$aArticle = empty($aArticle['data']) ? [] : $aArticle['data'];
if(empty($aArticle)){
return json_encode(['status' => 3,'msg' => 'The article does not exist']);
}
//查询参考文献信息
$aWhere = ['p_article_id' => $aArticle['p_article_id'],'state' => 0,'doilink' => ''];
$aRefer = Db::name('production_article_refer')->field('p_refer_id,p_article_id,refer_type,refer_content,doilink,refer_doi')->where($aWhere)->select();
if(empty($aRefer)){
return json_encode(['status' => 4,'msg' => 'No reference information found']);
}
//数据处理
foreach ($aRefer as $key => $value) {
if(empty($value['refer_doi'])){
continue;
}
if($value['refer_doi'] == 'Not Available'){
continue;
}
if($value['refer_type'] == 'journal' && !empty($value['doilink'])){
continue;
}
if($value['refer_type'] == 'book' && !empty($value['isbn'])){
continue;
}
//写入获取参考文献详情队列
\think\Queue::push('app\api\job\AiCheckReferByDoi@fire',$value,'AiCheckReferByDoi');
}
return json_encode(['status' => 1,'msg' => 'Successfully joined the AI inspection DOI queue']);
}
/**
* 获取结果
*/
public function getCheckByAiResult($aParam = []){
//获取参数
$aParam = empty($aParam) ? $this->request->post() : $aParam;
//必填值验证
$iPReferId = empty($aParam['p_refer_id']) ? '' : $aParam['p_refer_id'];
if(empty($iPReferId)){
return json_encode(['status' => 2,'msg' => 'Please select the reference to be queried']);
}
//获取参考文献信息
$aWhere = ['p_refer_id' => $iPReferId,'state' => 0];
$aRefer = Db::name('production_article_refer')->field('p_refer_id,p_article_id,refer_type,refer_content,doilink,refer_doi,state,dateno')->where($aWhere)->find();
if(empty($aRefer)){
return json_encode(['status' => 4,'msg' => 'Reference is empty'.json_encode($aParam)]);
}
if(empty($aRefer['refer_doi'])){
return json_encode(['status' => 4,'msg' => 'Reference DOI is empty'.json_encode($aParam)]);
}
if($aRefer['refer_type'] == 'journal' && !empty($aRefer['doilink'])){
$aDateno = empty($aRefer['dateno']) ? [] : explode(':', $aRefer['dateno']);
if(count($aDateno) > 1){
return json_encode(['status' => 4,'msg' => 'No need to parse again-journal'.json_encode($aParam)]);
}
}
if($aRefer['refer_type'] == 'book' && !empty($aRefer['isbn'])){
return json_encode(['status' => 4,'msg' => 'No need to parse again-book'.json_encode($aParam)]);
}
//获取文章信息
$aParam['p_article_id'] = $aRefer['p_article_id'];
$aArticle = $this->getArticle($aParam);
$iStatus = empty($aArticle['status']) ? 0 : $aArticle['status'];
if($iStatus != 1){
return json_encode($aArticle);
}
$aArticle = empty($aArticle['data']) ? [] : $aArticle['data'];
if(empty($aArticle)){
return json_encode(['status' => 3,'msg' => 'The article does not exist']);
}
//请求AI获取结果
$aResult = $this->curlOpenAIByDoi(['doi' => $aRefer['refer_doi']]);
$iStatus = empty($aResult['status']) ? 0 : $aResult['status'];
$sMsg = empty($aResult['msg']) ? 'The DOI number AI did not find any relevant information' : $aResult['msg'];
if($iStatus != 1){
return json_encode(['status' => 4,'msg' => $sMsg]);
}
$aData = empty($aResult['data']) ? [] : $aResult['data'];
if(empty($aData)){
return json_encode(['status' => 5,'msg' => 'AI obtains empty data']);
}
//写入日志
$aLog = [];
$aLog['content'] = json_encode($aResult);
$aLog['update_time'] = time();
$aLog['p_refer_id'] = $iPReferId;
$iLogId = Db::name('production_article_refer_ai')->insertGetId($aLog);
$iIsAiCheck = empty($aData['is_ai_check']) ? 2 : $aData['is_ai_check'];
if($iIsAiCheck != 1){//AI未检测到信息
return json_encode(['status' => 6,'msg' => 'AI did not find any information'.json_encode($aParam)]);
}
//数据处理入库
$aField = ['author','title','joura','dateno','doilink'];
foreach ($aField as $key => $value) {
if(empty($aData[$value])){
continue;
}
if($value == 'author'){
$aUpdate['author'] = implode(',', $aData['author']);
// $aUpdate['author'] = str_replace('et al.', '', $aUpdate['author']);
}else{
$aUpdate[$value] = $aData[$value];
}
}
if(empty($aUpdate)){
return json_encode(['status' => 6,'msg' => 'Update data to empty'.json_encode($aData)]);
}
if($aRefer['refer_type'] == 'other'){
$aUpdate['refer_type'] = 'journal';
}
if($aRefer['refer_type'] == 'book' && !empty($aUpdate['doilink'])){
$aUpdate['refer_type'] = $aUpdate['doilink'];
unset($aUpdate['doilink']);
}
$aLog = $aUpdate;
$aUpdate['is_change'] = 1;
$aUpdate['is_ai_check'] = 1;
$aUpdate['cs'] = 1;
$aUpdate['update_time'] = time();
Db::startTrans();
//更新数据
$aWhere = ['p_refer_id' => $iPReferId,'state' => 0];
$result = Db::name('production_article_refer')->where($aWhere)->limit(1)->update($aUpdate);
if($result === false){
return json_encode(['status' => 6,'msg' => 'Update failed']);
}
//更新日志
if(!empty($iLogId)){
$aWhere = ['id' => $iLogId];
if(isset($aLog['refer_type'])){
unset($aLog['refer_type']);
}
$result = Db::name('production_article_refer_ai')->where($aWhere)->limit(1)->update($aLog);
}
Db::commit();
return json_encode(['status' => 1,'msg' => 'success']);
}
/**
* 对接OPENAI
*/
private function curlOpenAIByDoi($aParam = []){
//获取DOI
$sDoi = empty($aParam['doi']) ? '' : $aParam['doi'];
if(empty($sDoi)){
return ['status' => 2,'msg' => 'Reference doi is empty'];
}
//系统角色
$sSysMessagePrompt = '请完成以下任务:
1. 根据提供的DOI号查询该文献的AMA引用格式
2. 按照以下规则调整AMA引用格式
- 第三个作者名字后添加 et al.
- DOI前加上"Available at: "
- DOI信息格式调整为"https://doi.org/+真实DOI"替换真实DOI为文献实际DOI.
3. 严格按照以下JSON结构返回结果仅返回JSON数据不要额外文字,包含字段doilinkurl格式、title标题、author作者数组、joura出版社名称、dateno年;卷(期):起始页-终止页),is_ai_check(默认1)
4. 若未查询到信息字段is_ai_check为2,相关字段为null。';
//用户角色
$sUserPrompt = '我提供的DOI是:'.$sDoi;
$aMessage = [
['role' => 'system', 'content' => $sSysMessagePrompt],
['role' => 'user', 'content' => $sUserPrompt],
];
//请求OPENAI接口
$sModel = empty($aParam['model']) ? 'gpt-4.1' : $aParam['model'];//模型
$sApiUrl = $this->sApiUrl;//'http://chat.taimed.cn/v1/chat/completions';//
$aParam = ['model' => $sModel,'url' => $sApiUrl,'temperature' => 0,'messages' => $aMessage,'api_key' => $this->sApiKey];
$oOpenAi = new \app\common\OpenAi;
$aResult = json_decode($oOpenAi->curlOpenAI($aParam),true);
return $aResult;
}
/**
* 作者修改完成发邮件
*/
public function finishSendEmail(){
//获取参数
$aParam = empty($aParam) ? $this->request->post() : $aParam;
//文章ID
$iArticleId = empty($aParam['article_id']) ? '' : $aParam['article_id'];
if(empty($iArticleId)){
return json_encode(array('status' => 2,'msg' => 'Please select an article'));
}
//查询条件
$aWhere = ['article_id' => $iArticleId,'state' => ['in',[5,6]]];
$aArticle = Db::name('article')->field('article_id,journal_id,accept_sn')->where($aWhere)->find();
if(empty($aArticle)){
return json_encode(['status' => 3,'msg' => 'The article does not exist or has not entered the editorial reference status']);
}
$aWhere = ['article_id' => $iArticleId,'state' => 0];
$aProductionArticle = Db::name('production_article')->field('p_article_id')->where($aWhere)->find();
if(empty($aProductionArticle)) {
return ['status' => 2,'msg' => 'The article has not entered the production stage'];
}
//查询是否有参考文献
$aWhere = ['p_article_id' => $aProductionArticle['p_article_id'],'state' => 0];
$aRefer = Db::name('production_article_refer')->field('article_id')->where($aWhere)->find();
if(empty($aRefer)) {
return ['status' => 2,'msg' => 'No reference information found, please be patient and wait for the editor to upload'];
}
//查询期刊信息
if(empty($aArticle['journal_id'])){
return json_encode(array('status' => 4,'msg' => 'The article is not associated with a journal' ));
}
$aWhere = ['state' => 0,'journal_id' => $aArticle['journal_id']];
$aJournal = Db::name('journal')->where($aWhere)->find();
if(empty($aJournal)){
return json_encode(array('status' => 5,'msg' => 'No journal information found' ));
}
//查询编辑邮箱
$iUserId = empty($aJournal['editor_id']) ? '' : $aJournal['editor_id'];
if(empty($iUserId)){
return json_encode(array('status' => 6,'msg' => 'The journal to which the article belongs has not designated a responsible editor' ));
}
$aWhere = ['user_id' => $iUserId,'state' => 0,'email' => ['<>','']];
$aUser = Db::name('user')->field('user_id,email,realname,account')->where($aWhere)->find();
if(empty($aUser)){
return json_encode(['status' => 7,'msg' => "Edit email as empty"]);
}
//处理发邮件
//邮件模版
$aEmailConfig = [
'email_subject' => '{journal_title}-{accept_sn}',
'email_content' => '
Dear Editor,<br><br>
The authors have revised the formats of all references, please check.<br>
Sn:{accept_sn}<br><br>
Sincerely,<br>Editorial Office<br>
<a href="https://www.tmrjournals.com/draw_up.html?issn={journal_issn}">Subscribe to this journal</a><br>{journal_title}<br>
Email: {journal_email}<br>
Website: {website}'
];
//邮件内容
$aSearch = [
'{accept_sn}' => empty($aArticle['accept_sn']) ? '' : $aArticle['accept_sn'],//accept_sn
'{journal_title}' => empty($aJournal['title']) ? '' : $aJournal['title'],//期刊名
'{journal_issn}' => empty($aJournal['issn']) ? '' : $aJournal['issn'],
'{journal_email}' => empty($aJournal['email']) ? '' : $aJournal['email'],
'{website}' => empty($aJournal['website']) ? '' : $aJournal['website'],
];
//发邮件
//邮件标题
$email = $aUser['email'];
$title = str_replace(array_keys($aSearch), array_values($aSearch),$aEmailConfig['email_subject']);
//邮件内容变量替换
$content = str_replace(array_keys($aSearch), array_values($aSearch), $aEmailConfig['email_content']);
$pre = \think\Env::get('emailtemplete.pre');
$net = \think\Env::get('emailtemplete.net');
$net1 = str_replace("{{email}}",trim($email),$net);
$content=$pre.$content.$net1;
//发送邮件
$memail = empty($aJournal['email']) ? '' : $aJournal['email'];
$mpassword = empty($aJournal['epassword']) ? '' : $aJournal['epassword'];
//期刊标题
$from_name = empty($aJournal['title']) ? '' : $aJournal['title'];
//邮件队列组装参数
$aResult = sendEmail($email,$title,$from_name,$content,$memail,$mpassword);
$iStatus = empty($aResult['status']) ? 1 : $aResult['status'];
$iIsSuccess = 2;
$sMsg = empty($aResult['data']) ? '失败' : $aResult['data'];
if($iStatus == 1){
return json_encode(['status' => 1,'msg' => 'success']);
}
return json_encode(['status' => 8,'msg' => 'fail']);
}
}

View File

@@ -3,24 +3,26 @@
namespace app\api\job; namespace app\api\job;
use think\queue\Job; use think\queue\Job;
use think\Log; use app\common\ExpertFinderService;
/**
* 专家抓取队列任务
* 注意:此任务推送到队列名 "FetchExperts",必须单独启动 worker 才会执行:
* php think queue:listen --queue FetchExperts
* 若只运行 queue:listen 不指定队列,默认只消费 "mail",本任务不会被执行。
*/
class FetchExperts class FetchExperts
{ {
public function fire(Job $job, $data) public function fire(Job $job, $data)
{ {
try { $field = isset($data['field']) ? $data['field'] : '';
$finder = new \app\api\controller\ExpertFinder(); $service = new ExpertFinderService();
$result = $finder->doFetchForField( $service->doFetchForField(
$data['field'], $field,
$data['source'] ?? 'pubmed', isset($data['source']) ? $data['source'] : 'pubmed',
$data['per_page'] ?? 100, isset($data['per_page']) ? intval($data['per_page']) : 100,
$data['min_year'] ?? null isset($data['min_year']) ? $data['min_year'] : null
); );
Log::info('FetchExperts completed: ' . json_encode($result));
} catch (\Exception $e) {
Log::error('FetchExperts failed: ' . $e->getMessage());
}
$job->delete(); $job->delete();
} }

View File

@@ -0,0 +1,35 @@
<?php
namespace app\api\job;
use think\queue\Job;
use app\common\PromotionService;
class PromotionSend
{
public function fire(Job $job, $data)
{
$taskId = intval(isset($data['task_id']) ? $data['task_id'] : 0);
$service = new PromotionService();
if (!$taskId) {
// $service->log('[PromotionSend] missing task_id, job deleted');
$job->delete();
return;
}
// try {
$result = $service->processNextEmail($taskId);
// $service->log('[PromotionSend] task=' . $taskId . ' result=' . json_encode($result));
// if (!empty($result['done'])) {
// $reason = isset($result['reason']) ? $result['reason'] : '';
// $service->log('[PromotionSend] task=' . $taskId . ' finished, reason=' . $reason);
// }
// } catch (\Exception $e) {
// $service->log('[PromotionSend] task=' . $taskId . ' exception=' . $e->getMessage());
// }
$job->delete();
}
}

View File

@@ -1151,6 +1151,123 @@ class ArticleParserService
] ]
]; ];
} }
/**
* 提取 Word 文档中的参考文献列表(仅返回数组,不做入库)
* @return array 每条为一个参考文献的纯文本字符串
*/
public static function getReferencesFromWord($filePath): array
{
$othis = new self($filePath) ;
if (empty($othis->sections)) {
return [];
}
$lines = [];
foreach ($othis->sections as $section) {
foreach ($section->getElements() as $element) {
$text = $othis->getTextFromElement($element);
$text = trim((string)$text);
if ($text === '') continue;
$lines[] = $text;
}
}
if (empty($lines)) {
return [];
}
// 识别参考文献段落起点(允许同一行包含域代码或第一条内容)
$startIdx = -1;
$startRemainder = ''; // 标题行后可能跟着第一条参考文献内容
foreach ($lines as $i => $line) {
$t = trim($line);
if ($t === '') continue;
// 行首命中即可(避免 “References { ADDIN... }” / “References 1. ...” 漏判)
if (preg_match('/^\s*(references|reference|bibliography|参考文献|文献)\b\s*[:]?\s*/iu', $t, $m)) {
$startIdx = $i;
$remainder = preg_replace('/^\s*(references|reference|bibliography|参考文献|文献)\b\s*[:]?\s*/iu', '', $t);
$remainder = trim($remainder);
// 过滤 EndNote 域代码(允许其出现在标题行后)
if ($remainder !== '' && !preg_match('/^\{\s*ADDIN\s+EN\.REFLIST\s*\}$/i', $remainder)) {
$startRemainder = $remainder;
}
break;
}
}
if ($startIdx < 0) {
return [];
}
// 收集参考文献区域内容,遇到常见结尾段落标题则停止
$stopKeywords = [
'acknowledgements', 'acknowledgments', 'funding', 'appendix', 'supplementary',
'conflict of interest', 'competing interests', 'author contributions',
'致谢', '基金', '附录', '补充材料', '利益冲突', '作者贡献',
];
// startRemainder 已在起点识别时处理
$raw = [];
if ($startRemainder !== '') {
$raw[] = $startRemainder;
}
for ($i = $startIdx + 1; $i < count($lines); $i++) {
$line = trim($lines[$i]);
if ($line === '') continue;
// 跳过 EndNote / Word 域代码
if (preg_match('/^\{\s*ADDIN\s+EN\.REFLIST\s*\}$/i', $line)) {
continue;
}
$lineLower = strtolower($line);
foreach ($stopKeywords as $sk) {
$skLower = strtolower($sk);
if ($lineLower === $skLower || $lineLower === $skLower . ':' || $lineLower === $skLower . '') {
$i = count($lines); // break outer
continue 2;
}
}
$raw[] = $line;
}
if (empty($raw)) {
return [];
}
// 合并多行:以 “数字.” / “[数字]” / “数字]” 等作为新条目起始
$refs = [];
$current = '';
foreach ($raw as $line) {
$isNew = false;
if (preg_match('/^\s*(\[\d+\]|\d+\s*[\.\)]|\d+\s*\])\s*/u', $line)) {
$isNew = true;
}
if ($isNew) {
if (trim($current) !== '') {
$refs[] = trim(preg_replace('/\s+/u', ' ', $current));
}
$current = $line;
} else {
// 续行拼接
if ($current === '') {
$current = $line;
} else {
$current .= ' ' . $line;
}
}
}
if (trim($current) !== '') {
$refs[] = trim(preg_replace('/\s+/u', ' ', $current));
}
return $refs;
}
/** /**
* 核心解码方法 * 核心解码方法
* @param string $str 待解码字符串 * @param string $str 待解码字符串

View File

@@ -0,0 +1,331 @@
<?php
namespace app\common;
/**
* 引用相关性检测服务PubMed + embedding
*
* 依赖:
* - PubmedService用 DOI 抓取 title/abstract/mesh/publication_types
* - CrossrefService撤稿/更正识别(补充)
*
* embedding
* - 使用你们内部大模型的 embedding 接口(无需 token 付费,但速度慢)
* - 通过构造参数传入 embedding_url / headers / timeout
* - 内置文件缓存,减少重复 embedding 成本
*/
class CitationRelevanceService
{
private $pubmed;
private $crossref;
private $embeddingUrl = '';
private $embeddingHeaders = [];
private $timeout = 120;
private $chatUrl = '';
private $chatModel = '';
private $embeddingDim = 256;
private $chatMaxTokens = 1200;
public function __construct(PubmedService $pubmed = null, CrossrefService $crossref = null, array $config = [])
{
$this->pubmed = $pubmed ?: new PubmedService();
$this->crossref = $crossref ?: new CrossrefService();
if (isset($config['embedding_url'])) $this->embeddingUrl = (string)$config['embedding_url'];
if (isset($config['embedding_headers']) && is_array($config['embedding_headers'])) $this->embeddingHeaders = $config['embedding_headers'];
if (isset($config['timeout'])) $this->timeout = max(10, intval($config['timeout']));
if (isset($config['chat_url'])) $this->chatUrl = (string)$config['chat_url'];
if (isset($config['chat_model'])) $this->chatModel = (string)$config['chat_model'];
if (isset($config['embedding_dim'])) $this->embeddingDim = max(32, intval($config['embedding_dim']));
if (isset($config['chat_max_tokens'])) $this->chatMaxTokens = max(256, intval($config['chat_max_tokens']));
}
/**
* 单条引用相关性检测
*
* @param string $contextText 引用处上下文(英文)
* @param array $referRow production_article_refer 行(至少含 refer_doi/doilink/title/author/joura/dateno
* @param array $options
* - sentence_is_background(bool) 若外部已判断为背景堆引用,可直接降级
* - sim_related(float) related 阈值,默认 0.75
* - sim_unsure(float) unsure 阈值,默认 0.60
* - check_retraction(bool) 是否检查撤稿/更正,默认 true
*/
public function checkOne(string $contextText, array $referRow, array $options = []): array
{
$contextText = trim($contextText);
$simRelated = isset($options['sim_related']) ? (float)$options['sim_related'] : 0.75;
$simUnsure = isset($options['sim_unsure']) ? (float)$options['sim_unsure'] : 0.60;
$checkRetraction = isset($options['check_retraction']) ? (bool)$options['check_retraction'] : true;
$isBackground = !empty($options['sentence_is_background']);
// 1) 问题条目(退稿/更正):先 Crossref有 DOI 才能判断)
$problemFlag = 'unknown';
$problemReason = '';
if ($checkRetraction) {
$qc = $this->crossref->qcCitation($contextText, $referRow, ['check_retraction' => true]);
$problemFlag = $qc['problem_flag'] ?? 'unknown';
$problemReason = $qc['problem_reason'] ?? '';
}
// 2) PubMed 抓取 abstract/mesh提升语义
$doi = $this->extractDoiFromRefer($referRow);
$pub = $doi ? $this->pubmed->fetchByDoi($doi) : null;
$pubText = '';
$pubTypes = [];
if ($pub) {
$pubTypes = $pub['publication_types'] ?? [];
$mesh = $pub['mesh_terms'] ?? [];
$pubText = trim(
($pub['title'] ?? '') . "\n" .
($pub['abstract'] ?? '') . "\n" .
(!empty($mesh) ? ('MeSH: ' . implode('; ', $mesh)) : '')
);
}
// 3) embedding 相似度context vs pubmed_text无 pubmed_text 则退化为 crossref 的证据法
if ($pubText !== '') {
$v1 = $this->embedCached($contextText);
$v2 = $this->embedCached($pubText);
$sim = ($v1 && $v2) ? $this->cosine($v1, $v2) : 0.0;
$relevanceFlag = 'unsure';
if ($sim >= $simRelated) {
$relevanceFlag = 'related';
} elseif ($sim >= $simUnsure) {
$relevanceFlag = 'unsure';
} else {
$relevanceFlag = $isBackground ? 'unsure_background' : 'suspicious_unrelated';
}
// PubMed 自身也能提示撤稿/更正(作为补充)
if ($checkRetraction && $problemFlag !== 'retracted_or_corrected' && !empty($pubTypes)) {
$ptLower = strtolower(implode(' | ', $pubTypes));
if (strpos($ptLower, 'retracted publication') !== false
|| strpos($ptLower, 'retraction of publication') !== false
|| strpos($ptLower, 'published erratum') !== false
) {
$problemFlag = 'retracted_or_corrected';
$problemReason = 'PubMed publication type indicates retraction/correction';
}
}
return [
'problem_flag' => $problemFlag,
'problem_reason' => $problemReason,
'relevance_flag' => $relevanceFlag,
'relevance_score' => round($sim, 4),
'reason' => $pubText !== '' ? 'embedding(context,pubmed_text)' : 'embedding_unavailable',
'pubmed' => [
'pmid' => $pub['pmid'] ?? '',
'year' => $pub['year'] ?? '',
'journal' => $pub['journal'] ?? '',
'publication_types' => $pubTypes,
],
];
}
// 退化:没有 PubMed 信息时,用 CrossrefService 证据法(会偏保守)
$fallback = $this->crossref->qcCitation($contextText, $referRow, ['check_retraction' => false]);
$fallback['problem_flag'] = $problemFlag;
$fallback['problem_reason'] = $problemReason;
$fallback['reason'] = 'fallback_crossref_evidence; ' . ($fallback['reason'] ?? '');
return $fallback;
}
// ---------------- embedding ----------------
private function embedCached(string $text): ?array
{
$text = trim($text);
if ($text === '') return null;
$key = 'emb_' . sha1($text);
$cached = $this->cacheGet($key, 90 * 86400);
if (is_array($cached) && !empty($cached)) return $cached;
$vec = $this->embed($text);
if (is_array($vec) && !empty($vec)) {
$this->cacheSet($key, $vec);
return $vec;
}
return null;
}
/**
* 调用内部 embedding 接口
* 兼容返回格式:
* - OpenAI embeddings: {data:[{embedding:[...] }]}
* - {embedding:[...]}
* - 直接返回数组 [...]
*/
private function embed(string $text): ?array
{
// 1) 优先使用独立 embeddings 接口
if ($this->embeddingUrl !== '') {
$payload = json_encode(['text' => $text], JSON_UNESCAPED_UNICODE);
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $this->embeddingUrl);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, $payload);
$headers = array_merge(['Content-Type: application/json'], $this->embeddingHeaders);
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
$res = curl_exec($ch);
curl_close($ch);
if (!is_string($res) || trim($res) === '') return null;
$decoded = json_decode($res, true);
if (is_array($decoded)) {
if (isset($decoded['data'][0]['embedding']) && is_array($decoded['data'][0]['embedding'])) {
return $this->normalizeVector($decoded['data'][0]['embedding']);
}
if (isset($decoded['embedding']) && is_array($decoded['embedding'])) {
return $this->normalizeVector($decoded['embedding']);
}
$isVec = isset($decoded[0]) && (is_float($decoded[0]) || is_int($decoded[0]));
if ($isVec) return $this->normalizeVector($decoded);
}
return null;
}
// 2) 没有 embeddings 接口时,使用 chat/completions 生成固定维度向量
if ($this->chatUrl === '' || $this->chatModel === '') {
return null;
}
$sys = "You are an embedding generator. Output ONLY valid JSON in this exact shape: {\"embedding\":[...]}.\n"
. "Rules:\n"
. "- embedding must be an array of exactly {$this->embeddingDim} floats\n"
. "- each float must be between -1 and 1\n"
. "- do not include any other keys or any extra text\n";
$payload = json_encode([
'model' => $this->chatModel,
'temperature' => 0,
'max_tokens' => $this->chatMaxTokens,
'messages' => [
['role' => 'system', 'content' => $sys],
['role' => 'user', 'content' => $text],
],
], JSON_UNESCAPED_UNICODE);
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $this->chatUrl);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, $payload);
$headers = array_merge(['Content-Type: application/json'], $this->embeddingHeaders);
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
$res = curl_exec($ch);
curl_close($ch);
if (!is_string($res) || trim($res) === '') return null;
$decoded = json_decode($res, true);
$content = '';
if (is_array($decoded) && isset($decoded['choices'][0]['message']['content'])) {
$content = (string)$decoded['choices'][0]['message']['content'];
}
$content = trim($content);
if ($content === '') return null;
// content 可能被包裹在 ```json ... ```
if (preg_match('/```(?:json)?\\s*([\\s\\S]*?)\\s*```/i', $content, $m)) {
$content = trim($m[1]);
}
$j = json_decode($content, true);
if (!is_array($j) || !isset($j['embedding']) || !is_array($j['embedding'])) {
return null;
}
$vec = $j['embedding'];
if (count($vec) !== $this->embeddingDim) {
return null;
}
return $this->normalizeVector($vec);
}
private function cosine(array $a, array $b): float
{
$n = min(count($a), count($b));
if ($n <= 0) return 0.0;
$dot = 0.0; $na = 0.0; $nb = 0.0;
for ($i = 0; $i < $n; $i++) {
$x = (float)$a[$i];
$y = (float)$b[$i];
$dot += $x * $y;
$na += $x * $x;
$nb += $y * $y;
}
if ($na <= 0.0 || $nb <= 0.0) return 0.0;
return $dot / (sqrt($na) * sqrt($nb));
}
private function normalizeVector(array $v): array
{
$sum = 0.0;
$out = [];
foreach ($v as $x) {
$fx = (float)$x;
$out[] = $fx;
$sum += $fx * $fx;
}
if ($sum <= 0.0) return $out;
$norm = sqrt($sum);
for ($i = 0; $i < count($out); $i++) {
$out[$i] = $out[$i] / $norm;
}
return $out;
}
private function extractDoiFromRefer(array $referRow): string
{
// 复用 CrossrefService 内部逻辑(通过 qcCitation 的抽取函数不可直接访问,所以这里简单实现)
$doi = trim((string)($referRow['refer_doi'] ?? ''));
if ($doi !== '') return $doi;
$doilink = trim((string)($referRow['doilink'] ?? ''));
if ($doilink === '') return '';
if (preg_match('#doi\\.org/([^?#]+)#i', $doilink, $m)) {
return trim((string)$m[1]);
}
return $doilink;
}
// ---------------- cache ----------------
private function cacheDir(): string
{
return rtrim(ROOT_PATH, '/') . '/runtime/embed_cache';
}
private function cacheGet(string $key, int $ttlSeconds)
{
$file = $this->cacheDir() . '/' . $key . '.json';
if (!is_file($file)) return null;
$mtime = filemtime($file);
if (!$mtime || (time() - $mtime) > $ttlSeconds) return null;
$raw = @file_get_contents($file);
$decoded = json_decode((string)$raw, true);
return $decoded;
}
private function cacheSet(string $key, $value): void
{
$dir = $this->cacheDir();
if (!is_dir($dir)) @mkdir($dir, 0777, true);
$file = $dir . '/' . $key . '.json';
@file_put_contents($file, json_encode($value, JSON_UNESCAPED_UNICODE));
}
}

View File

@@ -0,0 +1,765 @@
<?php
namespace app\common;
/**
* Crossref API 工具类
*
* 说明:
* - 仿照 application/api/controller/Crossrefdoi.php 的实现风格抽成 Service
* - 仅做「请求 + 解析」;不包含任何数据库读写
*/
class CrossrefService
{
// 配置项
private $mailto = ''; // 邮箱(提升优先级)
private $timeout = 15; // 请求超时(秒)
private $maxRetry = 2; // 单个DOI最大重试次数
private $crossrefUrl = "https://api.crossref.org/works/"; // 接口地址
public function __construct($config = [])
{
if (is_array($config)) {
if (isset($config['mailto'])) $this->mailto = (string)$config['mailto'];
if (isset($config['timeout'])) $this->timeout = intval($config['timeout']);
if (isset($config['maxRetry'])) $this->maxRetry = intval($config['maxRetry']);
if (isset($config['crossrefUrl'])) $this->crossrefUrl = (string)$config['crossrefUrl'];
}
}
public function setMailto($mailto)
{
$this->mailto = (string)$mailto;
return $this;
}
/**
* 引用标号转换工具:正文里的 [n] 对应 production_article_refer.index = n-1index 从 0 开始)。
*
* @param int $citationMark 正文引用编号,如 13来自 [13]
* @return int production_article_refer.index如 12
*/
public function referIndexFromCitationMark(int $citationMark): int
{
$citationMark = intval($citationMark);
return max(0, $citationMark - 1);
}
/**
* 反向转换工具production_article_refer.index从 0 开始)→ 正文引用编号 [n]。
*
* @param int $referIndex production_article_refer.index如 12
* @return int 正文引用编号 n如 13
*/
public function citationMarkFromReferIndex(int $referIndex): int
{
$referIndex = intval($referIndex);
return max(0, $referIndex + 1);
}
/**
* 批量引用质检(不查库版):\n
* - 输入文章分节内容t_article_main 的 content 列表)\n
* - 输入引用条目production_article_refer 的行列表)\n
* - 自动抽取每个 [n] 的英文句子上下文,并映射到 refer.index=n-1 后调用 qcCitation()\n
*
* 说明:本方法不做任何数据库查询,方便你在 controller/service 中自由组合数据来源。
*
* @param array $articleMainContents 文章内容片段数组(按 sort 顺序),元素为 string 或含 content 的数组
* @param array $referRows production_article_refer 行数组(至少含 index/title/author/joura/dateno/refer_doi/doilink
* @param array $options 透传给 qcCitation 的 options并支持
* - sentence_window(int) 上下文句子窗口,默认 1即前1句+本句+后1句
* @return array 结果列表,每条包含 citation_mark/refer_index/context/ref_meta/qc
*/
public function qcArticleCitations(array $articleMainContents, array $referRows, array $options = []): array
{
$window = isset($options['sentence_window']) ? max(0, intval($options['sentence_window'])) : 1;
// 1) 组装全文纯文本(保留 [n]
$chunks = [];
foreach ($articleMainContents as $row) {
if (is_array($row)) {
$text = (string)($row['content'] ?? '');
} else {
$text = (string)$row;
}
if ($text === '') continue;
// 去掉常见标签,保留 [n]
$text = preg_replace('/<\s*\/?\s*blue[^>]*>/i', '', $text);
$text = strip_tags($text);
$text = html_entity_decode($text, ENT_QUOTES | ENT_HTML5, 'UTF-8');
$text = preg_replace('/\s+/u', ' ', trim($text));
if ($text !== '') $chunks[] = $text;
}
$fullText = implode("\n", $chunks);
if ($fullText === '') return [];
// 2) 构建引用条目映射refer_index => row
$referMap = [];
foreach ($referRows as $r) {
if (!is_array($r)) continue;
if (!isset($r['index'])) continue;
$idx = intval($r['index']);
$referMap[$idx] = $r;
}
// 3) 英文切句(简单稳健版)
$sentences = $this->splitEnglishSentences($fullText);
if (empty($sentences)) return [];
// 4) 遍历句子,抓取其中的 [n]
$results = [];
foreach ($sentences as $si => $sent) {
if (!preg_match_all('/\[(\d+)\]/', $sent, $m)) {
continue;
}
$marks = array_unique(array_map('intval', $m[1]));
foreach ($marks as $citationMark) {
if ($citationMark <= 0) continue;
$referIndex = $this->referIndexFromCitationMark($citationMark);
if (!isset($referMap[$referIndex])) {
continue;
}
$start = max(0, $si - $window);
$end = min(count($sentences) - 1, $si + $window);
$ctx = implode(' ', array_slice($sentences, $start, $end - $start + 1));
$ctx = trim(preg_replace('/\s+/u', ' ', $ctx));
$refMeta = $referMap[$referIndex];
$qc = $this->qcCitation($ctx, $refMeta, $options);
$results[] = [
'citation_mark' => $citationMark, // 正文编号 n来自 [n]
'refer_index' => $referIndex, // production_article_refer.index
'context' => $ctx,
'ref_meta' => [
'p_refer_id' => $refMeta['p_refer_id'] ?? 0,
'title' => $refMeta['title'] ?? '',
'author' => $refMeta['author'] ?? '',
'joura' => $refMeta['joura'] ?? '',
'dateno' => $refMeta['dateno'] ?? '',
'refer_doi' => $refMeta['refer_doi'] ?? '',
'doilink' => $refMeta['doilink'] ?? '',
'index' => $refMeta['index'] ?? $referIndex,
],
'qc' => $qc,
];
}
}
return $results;
}
/**
* 过滤非法DOI仅保留10.xxxx/xxx格式
* @param string $doi
* @return string
*/
public function filterValidDoi($doi = '')
{
$doi = trim((string)$doi);
if ($doi === '') return '';
if (preg_match('/^10\.\d{4,}\/.+/', $doi)) {
return $doi;
}
return '';
}
/**
* 获取 Crossref message带重试
* @param string $doi
* @return array|null
*/
public function fetchWork($doi)
{
$doi = $this->filterValidDoi($doi);
if ($doi === '') return null;
return $this->fetchSingleDoiWithRetry($doi);
}
/**
* 返回常用字段集合(标题/期刊/作者/卷期页/撤稿/URL
* @param string $doi
* @return array|null
*/
public function fetchWorkSummary($doi)
{
$msg = $this->fetchWork($doi);
if (!$msg) return null;
$title = $this->getTitle($msg);
$publisher = $this->getPublisher($msg);
$joura = !empty($publisher['title']) ? $publisher['title'] : ($publisher['short_title'] ?? '');
$authors = $this->getAuthors($msg);
$dateno = $this->getVolumeIssuePages($msg);
$retractInfo = $this->checkRetracted($msg);
$dolink = $this->getDolink($msg);
if (empty($dolink)) {
$dolink = 'https://doi.org/' . $this->filterValidDoi($doi);
}
return [
'doi' => $this->filterValidDoi($doi),
'title' => $title,
'joura' => $joura,
'publisher' => $publisher,
'authors' => $authors,
'author_str' => empty($authors) ? '' : implode(',', $authors),
'dateno' => $dateno,
'is_retracted' => !empty($retractInfo['is_retracted']) ? 1 : 0,
'retract_reason' => $retractInfo['reason'] ?? '',
'doilink' => $dolink,
'raw' => $msg,
];
}
/**
* 单DOI查询带重试
* @param string $doi
* @return array|null
*/
private function fetchSingleDoiWithRetry($doi)
{
$retryCount = 0;
while ($retryCount < $this->maxRetry) {
$url = $this->crossrefUrl . rawurlencode($doi);
if (!empty($this->mailto)) {
$url .= "?mailto=" . rawurlencode($this->mailto);
}
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_HTTPHEADER, [
"User-Agent: DOI-Fetcher/1.0 (mailto:{$this->mailto})"
]);
$response = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
if ($httpCode == 200) {
$data = json_decode($response, true);
return (isset($data['status']) && $data['status'] == 'ok') ? ($data['message'] ?? null) : null;
}
if ($httpCode == 429) {
sleep(5);
$retryCount++;
continue;
}
$retryCount++;
sleep(1);
}
return null;
}
/**
* 提取标题
*/
public function getTitle($aDoiInfo = [])
{
return $aDoiInfo['title'][0] ?? '';
}
/**
* 提取期刊/出版社相关信息
*/
public function getPublisher($aDoiInfo = [])
{
return [
'title' => isset($aDoiInfo['container-title'][0]) ? $aDoiInfo['container-title'][0] : '',
'short_title' => isset($aDoiInfo['short-container-title'][0]) ? $aDoiInfo['short-container-title'][0] : '',
'ISSN' => $aDoiInfo['ISSN'] ?? [],
'publisher' => $aDoiInfo['publisher'] ?? '',
];
}
/**
* 提取作者列表
*/
public function getAuthors($aDoiInfo = [])
{
$authors = [];
if (!empty($aDoiInfo['author'])) {
foreach ($aDoiInfo['author'] as $author) {
$name = $author['family'] ?? '';
if (!empty($author['given'])) {
$name = $author['given'] . ' ' . $name;
}
if (!empty($name)) {
$authors[] = $name;
}
}
}
return $authors;
}
/**
* 提取发表年份
*/
public function getPublishYear($aDoiInfo = [])
{
if (!empty($aDoiInfo['issued']['date-parts'][0][0])) {
return (string)$aDoiInfo['issued']['date-parts'][0][0];
}
return '';
}
/**
* 提取卷(期):起始页-终止页格式2024:10(2):100-120
*/
public function getVolumeIssuePages($aDoiInfo = [])
{
$parts = [];
$year = $this->getPublishYear($aDoiInfo);
if ($year) $parts[] = $year;
$volume = $aDoiInfo['volume'] ?? '';
$issue = $aDoiInfo['issue'] ?? '';
if ($volume) {
$parts[] = $volume . ($issue ? "({$issue})" : '');
}
$pageStart = $aDoiInfo['page']['start'] ?? ($aDoiInfo['first-page'] ?? '');
$pageEnd = $aDoiInfo['page']['end'] ?? ($aDoiInfo['last-page'] ?? '');
$pages = '';
if ($pageStart) {
$pages = $pageStart . ($pageEnd ? "-{$pageEnd}" : '');
} else {
$pages = $aDoiInfo['page'] ?? '';
}
if ($pages) $parts[] = $pages;
return implode(':', $parts);
}
/**
* 识别撤稿文章(与 Crossrefdoi.php 同逻辑)
*/
public function checkRetracted($aDoiInfo = [])
{
$isRetracted = false;
$reason = "未撤稿";
$sType = strtolower($aDoiInfo['type'] ?? '');
$sSubtype = strtolower($aDoiInfo['subtype'] ?? '');
if ($sType && in_array($sType, ['retraction', 'correction'])) {
$isRetracted = true;
$reason = "文章类型为{$sType}(撤稿/更正声明)";
}
if ($sSubtype && in_array($sSubtype, ['retraction', 'correction'])) {
$isRetracted = true;
$reason = "文章类型为{$sSubtype}(撤稿/更正声明)";
}
if (isset($aDoiInfo['update-type']) && is_array($aDoiInfo['update-type']) && in_array('retraction', $aDoiInfo['update-type'])) {
$isRetracted = true;
$reason = "官方标记为撤稿update-type: retraction";
}
if (isset($aDoiInfo['relation']) && !empty($aDoiInfo['relation'])) {
foreach ($aDoiInfo['relation'] as $relType => $relItems) {
if (in_array($relType, ['is-retraction-of', 'corrects'])) {
$isRetracted = true;
$relatedDoi = $relItems[0]['id'] ?? '未知';
$reason = "关联撤稿文章{$relatedDoi}(关系:{$relType}";
break;
}
}
}
if (isset($aDoiInfo['update-to']) && is_array($aDoiInfo['update-to'])) {
foreach ($aDoiInfo['update-to'] as $update) {
$updateType = strtolower($update['type'] ?? '');
$updateLabel = strtolower($update['label'] ?? '');
if (strpos($updateType, 'retract') !== false || strpos($updateLabel, 'retract') !== false) {
$isRetracted = true;
$reason = "update-to 标记撤稿({$updateType}/{$updateLabel}";
break;
}
}
}
$aTitles = $aDoiInfo['title'] ?? [];
foreach ($aTitles as $value) {
$sTitleLower = strtolower($value);
if (strpos($sTitleLower, 'retraction') !== false || strpos($sTitleLower, 'retracted') !== false
|| strpos($sTitleLower, 'withdrawal') !== false || strpos($sTitleLower, 'withdrawn') !== false) {
$isRetracted = true;
$reason = "标题包含撤稿关键词";
break;
}
}
return [
'is_retracted' => $isRetracted,
'reason' => $reason,
];
}
/**
* 识别 doi 链接
*/
public function getDolink($aDoiInfo = [])
{
return $aDoiInfo['URL'] ?? '';
}
/**
* 解析 Crossref date-parts
*/
public function parseDateParts($dateObj)
{
$parts = $dateObj['date-parts'][0] ?? [];
if (empty($parts)) return '';
$y = $parts[0] ?? '';
$m = $parts[1] ?? '';
$d = $parts[2] ?? '';
$out = (string)$y;
if ($m !== '') $out .= '-' . str_pad((string)$m, 2, '0', STR_PAD_LEFT);
if ($d !== '') $out .= '-' . str_pad((string)$d, 2, '0', STR_PAD_LEFT);
return $out;
}
/**
* 引用质检:判断(1) 被引条目是否疑似退稿/更正(基于 Crossref)(2) 引用上下文是否与被引条目相关(基于证据命中)。
*
* 说明:
* - 适用于没有 abstract/keywords 的场景(仅用 title/author/journal/year + 引用上下文句子)。
* - 如果 refer_doi/doilink 为空,则 problem_flag 只能返回 unknown。
*
* @param string $contextText 引用处的上下文句子(英文,最好只包含引用所在句 + 少量相邻句)
* @param array $refMeta 被引条目元信息(建议来自 production_article_refer
* - refer_doi / doilink / title / author / joura / dateno
* @param array $options 可选参数
* - check_retraction(bool) 是否调用 Crossref 判断退稿/更正;默认 true
* - background_phrases(array) 背景堆引用触发短语;默认使用内置
*
* @return array
* [
* 'problem_flag' => 'ok'|'retracted_or_corrected'|'unknown',
* 'problem_reason' => string,
* 'relevance_flag' => 'related'|'unsure'|'unsure_background'|'suspicious_unrelated',
* 'relevance_score' => float,
* 'reason' => string
* ]
*/
public function qcCitation(string $contextText, array $refMeta, array $options = []): array
{
$contextText = trim($contextText);
$checkRetraction = isset($options['check_retraction']) ? (bool)$options['check_retraction'] : true;
$refTitle = (string)($refMeta['title'] ?? '');
$refAuthor = (string)($refMeta['author'] ?? '');
$refJoura = (string)($refMeta['joura'] ?? '');
$refDateno = (string)($refMeta['dateno'] ?? '');
$referDoi = (string)($refMeta['refer_doi'] ?? '');
$doilink = (string)($refMeta['doilink'] ?? '');
$doi = $this->extractDoiFromMeta($referDoi, $doilink);
// 1) 退稿/更正判断(强规则,影响 problem_flag
$problemFlag = 'unknown';
$problemReason = '';
if ($checkRetraction) {
if (!empty($doi)) {
$summary = $this->fetchWorkSummary($doi);
if ($summary && isset($summary['is_retracted'])) {
if ((int)$summary['is_retracted'] === 1) {
$problemFlag = 'retracted_or_corrected';
$problemReason = !empty($summary['retract_reason']) ? $summary['retract_reason'] : 'Crossref indicates retraction/correction';
} else {
$problemFlag = 'ok';
$problemReason = 'Crossref indicates not retracted/corrected';
}
} else {
$problemFlag = 'unknown';
$problemReason = 'Crossref fetch failed or returned unexpected data';
}
} else {
$problemFlag = 'unknown';
$problemReason = 'DOI is empty';
}
} else {
$problemFlag = 'unknown';
$problemReason = 'Skip retraction check';
}
// 2) 相关性判断(弱规则+证据命中)
$backgroundPhrases = isset($options['background_phrases']) ? (array)$options['background_phrases'] : [
'several studies',
'many studies',
'the literature',
'the existing literature',
'has been reported',
'have been reported',
'it has been shown',
'previous studies',
'the study suggests',
'the literature suggests',
'in the literature',
];
$ctxLower = strtolower($contextText);
$isBackground = false;
foreach ($backgroundPhrases as $ph) {
$ph = strtolower(trim((string)$ph));
if ($ph !== '' && $ph !== '0' && strpos($ctxLower, $ph) !== false) {
$isBackground = true;
break;
}
}
$refTokens = $this->buildEvidenceTokens([
'title' => $refTitle,
'author' => $refAuthor,
'journal' => $refJoura,
'year' => $refDateno,
]);
$ctxTokens = $this->tokenize($contextText);
$titleOverlap = 0.0;
$authorHit = 0.0;
$journalOverlap = 0.0;
$yearHit = 0.0;
$titleTokens = $refTokens['titleTokens'] ?? [];
$authorTokens = $refTokens['authorTokens'] ?? [];
$journalTokens = $refTokens['journalTokens'] ?? [];
$yearToken = $refTokens['yearToken'] ?? '';
if (!empty($titleTokens)) {
$inter = array_intersect($titleTokens, $ctxTokens);
$titleOverlap = count($inter) / max(1, count($titleTokens));
}
if (!empty($authorTokens)) {
foreach ($authorTokens as $at) {
if ($at !== '' && in_array($at, $ctxTokens, true)) {
$authorHit = 1.0;
break;
}
}
}
if (!empty($journalTokens)) {
$interJ = array_intersect($journalTokens, $ctxTokens);
$journalOverlap = count($interJ) / max(1, count($journalTokens));
}
if (!empty($yearToken) && strpos($ctxLower, (string)$yearToken) !== false) {
$yearHit = 1.0;
}
// 综合得分(保持解释性:越高越相关)
$score = round((
0.60 * $titleOverlap +
0.20 * $authorHit +
0.15 * $yearHit +
0.05 * $journalOverlap
), 4);
$relevanceFlag = 'unsure';
$reasonParts = [];
if ($score >= 0.35 && ($authorHit > 0.0 || $yearHit > 0.0)) {
$relevanceFlag = 'related';
$reasonParts[] = 'title_keyword_overlap_high=' . $titleOverlap;
} elseif ($score >= 0.25) {
$relevanceFlag = 'unsure';
$reasonParts[] = 'evidence_score_mid=' . $score;
} else {
if ($isBackground) {
$relevanceFlag = 'unsure_background';
$reasonParts[] = 'background_phrases_detected';
} else {
$relevanceFlag = 'suspicious_unrelated';
$reasonParts[] = 'evidence_score_low=' . $score;
}
}
$reasonParts[] = 'titleOverlap=' . $titleOverlap;
$reasonParts[] = 'authorHit=' . $authorHit;
$reasonParts[] = 'yearHit=' . $yearHit;
$reasonParts[] = 'journalOverlap=' . $journalOverlap;
$reason = implode('; ', $reasonParts);
return [
'problem_flag' => $problemFlag,
'problem_reason' => $problemReason,
'relevance_flag' => $relevanceFlag,
'relevance_score' => (float)$score,
'reason' => $reason,
];
}
/**
* 从 refer_doi / doilink 中抽取 DOI 字符串。
* @param string $referDoi
* @param string $doilink
* @return string
*/
private function extractDoiFromMeta(string $referDoi, string $doilink): string
{
$doi = trim($referDoi);
if (!empty($doi)) {
return $this->filterValidDoi($doi);
}
$link = trim($doilink);
if ($link === '') return '';
// 常见https://doi.org/10.xxxx/xxxx 或 http://doi.org/...
if (preg_match('#doi\.org/([^?#]+)#i', $link, $m)) {
$candidate = trim((string)$m[1]);
return $this->filterValidDoi($candidate);
}
// 兜底如果doilink本身就是doi格式
return $this->filterValidDoi($link);
}
/**
* 构建证据 token用于证据命中/相似度粗判)
* @param array $src
* @return array
*/
private function buildEvidenceTokens(array $src): array
{
$stop = [
'the','a','an','and','or','of','in','on','for','with','to','from','by','at','as','is','are',
'was','were','be','been','being','that','this','these','those','which','who','whom','it','its',
'we','our','us','they','their','them','i','you','your','he','she','his','her',
'study','studies','report','reported','reports','model','models','analysis','analyses','method','methods',
'results','result','using','used','show','shown','demonstrated','demonstrate',
];
$titleTokens = $this->tokenize((string)($src['title'] ?? ''));
$titleTokens = array_values(array_filter(array_unique($titleTokens), function ($t) use ($stop) {
return !in_array($t, $stop, true) && mb_strlen($t) >= 4;
}));
$authorTokens = $this->extractAuthorTokens((string)($src['author'] ?? ''));
$authorTokens = array_values(array_unique(array_filter(array_map(function ($t) use ($stop) {
$t = trim($t);
if ($t === '') return '';
if (in_array($t, $stop, true)) return '';
return $t;
}, $authorTokens))));
$journalTokens = $this->tokenize((string)($src['journal'] ?? ''));
$journalTokens = array_values(array_filter(array_unique($journalTokens), function ($t) use ($stop) {
return !in_array($t, $stop, true) && mb_strlen($t) >= 4;
}));
$yearToken = '';
$yearRaw = (string)($src['year'] ?? '');
if (preg_match('/(19\d{2}|20\d{2})/', $yearRaw, $m)) {
$yearToken = (string)$m[1];
}
return [
'titleTokens' => $titleTokens,
'authorTokens' => $authorTokens,
'journalTokens' => $journalTokens,
'yearToken' => $yearToken,
];
}
/**
* 提取作者姓/缩写 token简化版
* @param string $authorStr
* @return array
*/
private function extractAuthorTokens(string $authorStr): array
{
$authorStr = trim($authorStr);
if ($authorStr === '') return [];
// 把常见分隔符拆开
$parts = preg_split('/[,;]| and /i', $authorStr);
$tokens = [];
foreach ($parts as $p) {
$p = trim((string)$p);
if ($p === '') continue;
// 取最后一个词当作姓(例如 "Smith J" -> "Smith"),或取首段词
$words = preg_split('/\s+/', $p);
if (empty($words)) continue;
$cand = trim((string)end($words));
if ($cand === '') $cand = trim((string)($words[0] ?? ''));
// 只保留字母/点号(去掉异常符号)
$cand = preg_replace('/[^A-Za-z\.\-]/', '', $cand);
$cand = strtolower($cand);
if ($cand !== '') {
$tokens[] = $cand;
}
}
// 去掉过短的 token
$tokens = array_values(array_filter(array_unique($tokens), function ($t) {
return mb_strlen($t) >= 4;
}));
return $tokens;
}
/**
* 文本 tokenize英文下的轻量分词
* @param string $text
* @return array
*/
private function tokenize(string $text): array
{
$text = strtolower(trim($text));
if ($text === '') return [];
$parts = preg_split('/[^a-z0-9]+/i', $text);
$tokens = [];
foreach ($parts as $p) {
$p = trim((string)$p);
if ($p === '') continue;
// 保留较有信息量的 token
if (mb_strlen($p) < 3) continue;
$tokens[] = $p;
}
return array_values(array_unique($tokens));
}
/**
* 英文切句(轻量实现):按 .?! 分割,同时保留句内的 [n]。
* @param string $text
* @return array
*/
private function splitEnglishSentences(string $text): array
{
$text = trim($text);
if ($text === '') return [];
// 先把换行统一为空格,避免断句被打断
$text = preg_replace('/\s+/u', ' ', $text);
// 按句末标点断句:. ? ! 后面跟空格/结尾
$parts = preg_split('/(?<=[\.\?\!])\s+/', $text);
$sentences = [];
foreach ($parts as $p) {
$p = trim((string)$p);
if ($p === '') continue;
$sentences[] = $p;
}
return $sentences;
}
}

View File

@@ -0,0 +1,620 @@
<?php
namespace app\common;
use think\Db;
use GuzzleHttp\Client;
class ExpertFinderService
{
private $httpClient;
private $ncbiBaseUrl = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/';
private $logFile;
public function __construct()
{
$this->httpClient = new Client([
'timeout' => 180,
'connect_timeout' => 15,
'verify' => false,
]);
$this->logFile = ROOT_PATH . 'runtime' . DS . 'expert_finder.log';
}
public function doFetchForField($field, $source = 'pubmed', $perPage = 100, $minYear = null)
{
if ($minYear === null) {
$minYear = date('Y') - 3;
}
$fetchLog = $this->getFetchLog($field, $source);
$page = $fetchLog['last_page'] + 1;
if ($source === 'pmc') {
$result = $this->searchViaPMC($field, $perPage, $minYear, $page);
} else {
$result = $this->searchViaPubMed($field, $perPage, $minYear, $page);
}
if(!isset($result['total'])){
return [
"has_more"=>"no"
];
}
$saveResult = $this->saveExperts($result['experts'], $field, $source);
$nextPage = $result['has_more'] ? $page : $fetchLog['last_page'];
$totalPages = $result['total_pages'] ?? $fetchLog['total_pages'];
$this->updateFetchLog($field, $source, $nextPage, $totalPages);
return [
'keyword' => $field,
'page' => $page,
'experts_found' => $result['total'],
'saved_new' => $saveResult['inserted'],
'saved_exist' => $saveResult['existing'],
'list' => $result['experts'],
'field_enriched' => $saveResult['field_enriched'],
'has_more' => $result['has_more'],
];
}
public function searchExperts($keyword, $perPage, $minYear, $page, $source)
{
if ($source === 'pmc') {
return $this->searchViaPMC($keyword, $perPage, $minYear, $page);
}
return $this->searchViaPubMed($keyword, $perPage, $minYear, $page);
}
public function saveExperts($experts, $field, $source)
{
$inserted = 0;
$existing = 0;
$fieldEnrich = 0;
foreach ($experts as $expert) {
$email = strtolower(trim($expert['email']));
if (empty($email)) {
continue;
}
$exists = Db::name('expert')->where('email', $email)->find();
if ($exists) {
$existing++;
$fieldEnrich += $this->enrichExpertField($exists['expert_id'], $field);
continue;
}
$insert = [
'name' => mb_substr($expert['name'], 0, 255),
'email' => mb_substr($email, 0, 128),
'affiliation' => mb_substr($expert['affiliation'], 0, 128),
'source' => mb_substr($source, 0, 128),
'ctime' => time(),
'ltime' => 0,
'state' => 0,
];
try {
$expertId = Db::name('expert')->insertGetId($insert);
$this->enrichExpertField($expertId, $field);
if(isset($expert['papers'])&&is_array($expert['papers'])){
$this->savePaper($expertId, $expert['papers']);
}
$inserted++;
} catch (\Exception $e) {
$existing++;
}
}
return ['inserted' => $inserted, 'existing' => $existing, 'field_enriched' => $fieldEnrich];
}
private function savePaper($expertId, $papers)
{
foreach ($papers as $paper){
$check = Db::name('expert_paper')->where("expert_id",$expertId)->where('paper_article_id',$paper['article_id'])->find();
if($check){
continue;
}
$insert = [
'expert_id' => $expertId,
'paper_title' => isset($paper['title'])?mb_substr($paper['title'], 0, 255):"",
'paper_article_id' => $paper['article_id'] ?? 0,
'paper_journal' => isset($paper['journal'])?mb_substr($paper['journal'], 0, 128):"",
'ctime' => time(),
];
Db::name('expert_paper')->insert($insert);
}
}
public function getFetchLog($field, $source)
{
$log = Db::name('expert_fetch')
->where('field', $field)
->where('source', $source)
->find();
if (!$log) {
return ['last_page' => 0, 'total_pages' => 0, 'last_time' => 0];
}
return $log;
}
public function updateFetchLog($field, $source, $lastPage, $totalPages)
{
$exists = Db::name('expert_fetch')
->where('field', $field)
->where('source', $source)
->find();
if ($exists) {
Db::name('expert_fetch')
->where('expert_fetch_id', $exists['expert_fetch_id'])
->update([
'last_page' => $lastPage,
'total_pages' => $totalPages,
'last_time' => time(),
]);
} else {
Db::name('expert_fetch')->insert([
'field' => mb_substr($field, 0, 128),
'source' => mb_substr($source, 0, 128),
'last_page' => $lastPage,
'total_pages' => $totalPages,
'last_time' => time(),
]);
}
}
// ==================== PubMed Search ====================
private function searchViaPubMed($keyword, $perPage, $minYear, $page = 1)
{
set_time_limit(600);
$searchResult = $this->esearch('pubmed', $keyword, $perPage, $minYear, $page);
$ids = $searchResult['ids'];
$totalArticles = $searchResult['total'];
if (empty($ids)) {
return $this->buildPagedResult([], 0, 0, $totalArticles, $page, $perPage, 'pubmed');
}
$allAuthors = [];
$batches = array_chunk($ids, 50);
foreach ($batches as $batch) {
$xml = $this->efetchWithRetry('pubmed', $batch);
if ($xml) {
$authors = $this->parsePubMedXml($xml);
$allAuthors = array_merge($allAuthors, $authors);
}
usleep(400000);
}
$experts = $this->aggregateExperts($allAuthors);
return $this->buildPagedResult($experts, count($experts), count($ids), $totalArticles, $page, $perPage, 'pubmed');
}
// ==================== PMC Search ====================
private function searchViaPMC($keyword, $perPage, $minYear, $page = 1)
{
set_time_limit(600);
$searchResult = $this->esearch('pmc', $keyword, $perPage, $minYear, $page);
$ids = $searchResult['ids'];
$totalArticles = $searchResult['total'];
if (empty($ids)) {
return $this->buildPagedResult([], 0, 0, $totalArticles, $page, $perPage, 'pmc');
}
$allAuthors = [];
$batches = array_chunk($ids, 5);
foreach ($batches as $batch) {
$xml = $this->efetchWithRetry('pmc', $batch);
if ($xml) {
$authors = $this->parsePMCXml($xml);
$allAuthors = array_merge($allAuthors, $authors);
}
usleep(500000);
}
$experts = $this->aggregateExperts($allAuthors);
return $this->buildPagedResult($experts, count($experts), count($ids), $totalArticles, $page, $perPage, 'pmc');
}
// ==================== NCBI API ====================
private function esearch($db, $keyword, $perPage, $minYear, $page = 1)
{
$term = $keyword . ' AND ' . $minYear . ':' . date('Y') . '[pdat]';
$retstart = ($page - 1) * $perPage;
$response = $this->httpClient->get($this->ncbiBaseUrl . 'esearch.fcgi', [
'query' => [
'db' => $db,
'term' => $term,
'retstart' => $retstart,
'retmax' => $perPage,
'retmode' => 'json',
'sort' => 'relevance',
],
]);
$data = json_decode($response->getBody()->getContents(), true);
$ids = $data['esearchresult']['idlist'] ?? [];
$total = intval($data['esearchresult']['count'] ?? 0);
return ['ids' => $ids, 'total' => $total];
}
private function efetch($db, $ids)
{
$response = $this->httpClient->post($this->ncbiBaseUrl . 'efetch.fcgi', [
'form_params' => [
'db' => $db,
'id' => implode(',', $ids),
'retmode' => 'xml',
],
]);
return $response->getBody()->getContents();
}
private function efetchWithRetry($db, $ids, $maxRetries = 3)
{
for ($attempt = 1; $attempt <= $maxRetries; $attempt++) {
try {
return $this->efetch($db, $ids);
} catch (\Exception $e) {
if ($attempt === $maxRetries) {
if (count($ids) > 1) {
$half = ceil(count($ids) / 2);
$firstHalf = array_slice($ids, 0, $half);
$secondHalf = array_slice($ids, $half);
$xml1 = $this->efetchWithRetry($db, $firstHalf, 2);
$xml2 = $this->efetchWithRetry($db, $secondHalf, 2);
return $this->mergeXml($xml1, $xml2);
}
return null;
}
sleep($attempt * 2);
}
}
return null;
}
private function mergeXml($xml1, $xml2)
{
if (empty($xml1)) return $xml2;
if (empty($xml2)) return $xml1;
return $xml1 . "\n" . $xml2;
}
// ==================== PubMed XML Parsing ====================
private function parsePubMedXml($xmlString)
{
$results = [];
libxml_use_internal_errors(true);
$xml = simplexml_load_string($xmlString);
if ($xml === false) {
return $results;
}
foreach ($xml->PubmedArticle as $article) {
$citation = $article->MedlineCitation;
$articleData = $citation->Article;
$title = $this->xmlNodeToString($articleData->ArticleTitle);
$pmid = (string) $citation->PMID;
$journal = '';
if (isset($articleData->Journal->Title)) {
$journal = (string) $articleData->Journal->Title;
}
if (!isset($articleData->AuthorList->Author)) {
continue;
}
foreach ($articleData->AuthorList->Author as $author) {
$lastName = (string) ($author->LastName ?? '');
$foreName = (string) ($author->ForeName ?? '');
$fullName = trim($foreName . ' ' . $lastName);
if (empty($fullName)) continue;
$email = '';
$affiliation = '';
if (isset($author->AffiliationInfo)) {
foreach ($author->AffiliationInfo as $affInfo) {
$affText = (string) $affInfo->Affiliation;
if (empty($affiliation)) $affiliation = $affText;
if (empty($email)) $email = $this->extractEmailFromText($affText);
}
}
if (empty($email)) continue;
$results[] = [
'name' => $fullName,
'email' => strtolower($email),
'affiliation' => $this->cleanAffiliation($affiliation),
'article_title' => $title,
'article_id' => $pmid,
'journal' => $journal,
];
}
}
return $results;
}
// ==================== PMC XML Parsing ====================
private function parsePMCXml($xmlString)
{
$results = [];
libxml_use_internal_errors(true);
$xml = simplexml_load_string($xmlString);
if ($xml === false) {
return $results;
}
$articles = $xml->article ?? $xml->children();
foreach ($articles as $article) {
if ($article->getName() !== 'article') continue;
$front = $article->front;
if (!$front) continue;
$articleMeta = $front->{'article-meta'};
if (!$articleMeta) continue;
$title = $this->xmlNodeToString($articleMeta->{'title-group'}->{'article-title'} ?? null);
$pmcId = '';
if (isset($articleMeta->{'article-id'})) {
foreach ($articleMeta->{'article-id'} as $idNode) {
if ((string) $idNode['pub-id-type'] === 'pmc') {
$pmcId = (string) $idNode;
}
}
}
$journal = '';
if (isset($front->{'journal-meta'}->{'journal-title'})) {
$journal = (string) $front->{'journal-meta'}->{'journal-title'};
} elseif (isset($front->{'journal-meta'}->{'journal-title-group'}->{'journal-title'})) {
$journal = (string) $front->{'journal-meta'}->{'journal-title-group'}->{'journal-title'};
}
$correspEmails = [];
if (isset($articleMeta->{'author-notes'})) {
$this->extractEmailsFromNode($articleMeta->{'author-notes'}, $correspEmails);
}
$affiliationMap = [];
if (isset($articleMeta->{'contrib-group'})) {
foreach ($articleMeta->{'contrib-group'}->children() as $child) {
if ($child->getName() === 'aff') {
$affId = (string) ($child['id'] ?? '');
$affText = $this->xmlNodeToString($child);
if ($affId) $affiliationMap[$affId] = $affText;
}
}
}
if (isset($front->{'article-meta'}->{'aff'})) {
foreach ($front->{'article-meta'}->{'aff'} as $aff) {
$affId = (string) ($aff['id'] ?? '');
$affText = $this->xmlNodeToString($aff);
if ($affId) $affiliationMap[$affId] = $affText;
}
}
if (!isset($articleMeta->{'contrib-group'})) continue;
foreach ($articleMeta->{'contrib-group'}->contrib as $contrib) {
if ((string) ($contrib['contrib-type'] ?? '') !== 'author') continue;
$nameNode = $contrib->name;
if (!$nameNode) continue;
$surname = (string) ($nameNode->surname ?? '');
$givenNames = (string) ($nameNode->{'given-names'} ?? '');
$fullName = trim($givenNames . ' ' . $surname);
if (empty($fullName)) continue;
$email = '';
if (isset($contrib->email)) {
$email = strtolower(trim((string) $contrib->email));
}
$affiliation = '';
if (isset($contrib->xref)) {
foreach ($contrib->xref as $xref) {
if ((string) $xref['ref-type'] === 'aff') {
$rid = (string) $xref['rid'];
if (isset($affiliationMap[$rid])) {
$affiliation = $affiliationMap[$rid];
break;
}
}
}
}
if (empty($affiliation) && isset($contrib->aff)) {
$affiliation = $this->xmlNodeToString($contrib->aff);
}
$isCorresponding = false;
if (isset($contrib->xref)) {
foreach ($contrib->xref as $xref) {
if ((string) $xref['ref-type'] === 'corresp') $isCorresponding = true;
}
}
if ((string) ($contrib['corresp'] ?? '') === 'yes') $isCorresponding = true;
if (empty($email) && $isCorresponding && !empty($correspEmails)) {
$email = $correspEmails[0];
}
if (empty($email)) {
$extracted = $this->extractEmailFromText($affiliation);
if ($extracted) $email = $extracted;
}
if (empty($email)) continue;
$results[] = [
'name' => $fullName,
'email' => strtolower($email),
'affiliation' => $this->cleanAffiliation($affiliation),
'article_title' => $title,
'article_id' => $pmcId,
'journal' => $journal,
];
}
}
return $results;
}
// ==================== Aggregation / Pagination ====================
private function aggregateExperts($authorRecords)
{
$map = [];
foreach ($authorRecords as $record) {
$key = strtolower(trim($record['email']));
if (empty($key)) continue;
if (!isset($map[$key])) {
$map[$key] = [
'name' => $record['name'],
'email' => $record['email'],
'affiliation' => $record['affiliation'],
'paper_count' => 0,
'papers' => [],
];
}
$map[$key]['paper_count']++;
if (count($map[$key]['papers']) < 10) {
$map[$key]['papers'][] = [
'title' => $record['article_title'],
'article_id' => $record['article_id'],
'journal' => $record['journal'],
];
}
if (empty($map[$key]['affiliation']) && !empty($record['affiliation'])) {
$map[$key]['affiliation'] = $record['affiliation'];
}
}
$experts = array_values($map);
usort($experts, function ($a, $b) {
return $b['paper_count'] - $a['paper_count'];
});
return $experts;
}
private function buildPagedResult($experts, $expertCount, $articlesScanned, $totalArticles, $page, $perPage, $source)
{
$totalPages = $totalArticles > 0 ? ceil($totalArticles / $perPage) : 0;
return [
'experts' => $experts,
'total' => $expertCount,
'articles_scanned' => $articlesScanned,
'total_articles' => $totalArticles,
'page' => $page,
'per_page' => $perPage,
'total_pages' => $totalPages,
'has_more' => $page < $totalPages,
'source' => $source,
];
}
// ==================== DB Helpers ====================
private function enrichExpertField($expertId, $field)
{
$field = trim($field);
if (empty($field)) return 0;
$exists = Db::name('expert_field')
->where('expert_id', $expertId)
->where('field', $field)
->where('state', 0)
->find();
if ($exists) return 0;
Db::name('expert_field')->insert([
'expert_id' => $expertId,
'field' => mb_substr($field, 0, 128),
'state' => 0,
]);
return 1;
}
// ==================== Text Helpers ====================
private function extractEmailFromText($text)
{
if (empty($text)) return '';
if (preg_match('/[Ee]lectronic address:\s*([^\s;,]+@[^\s;,]+)/', $text, $m)) {
return strtolower(trim($m[1], '.'));
}
if (preg_match('/[Ee]-?mail:\s*([^\s;,]+@[^\s;,]+)/', $text, $m)) {
return strtolower(trim($m[1], '.'));
}
if (preg_match('/\b([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})\b/', $text, $m)) {
return strtolower(trim($m[1], '.'));
}
return '';
}
private function extractEmailsFromNode($node, &$emails)
{
if ($node === null) return;
foreach ($node->children() as $child) {
if ($child->getName() === 'email') {
$email = strtolower(trim((string) $child));
if (!empty($email) && !in_array($email, $emails)) $emails[] = $email;
}
$this->extractEmailsFromNode($child, $emails);
}
$text = (string) $node;
if (preg_match_all('/\b([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})\b/', $text, $matches)) {
foreach ($matches[1] as $email) {
$email = strtolower(trim($email, '.'));
if (!in_array($email, $emails)) $emails[] = $email;
}
}
}
private function cleanAffiliation($text)
{
$text = preg_replace('/\s*[Ee]lectronic address:\s*[^\s;,]+@[^\s;,]+/', '', $text);
$text = preg_replace('/\s*[Ee]-?mail:\s*[^\s;,]+@[^\s;,]+/', '', $text);
$text = preg_replace('/\s*\b[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}\b/', '', $text);
return trim($text, " \t\n\r\0\x0B.,;");
}
private function xmlNodeToString($node)
{
if ($node === null) return '';
$xml = $node->asXML();
$text = strip_tags($xml);
$text = html_entity_decode($text, ENT_QUOTES | ENT_XML1, 'UTF-8');
return trim(preg_replace('/\s+/', ' ', $text));
}
// ==================== Logging ====================
public function log($msg)
{
$line = date('Y-m-d H:i:s') . ' ' . $msg . PHP_EOL;
@file_put_contents($this->logFile, $line, FILE_APPEND);
}
}

View File

@@ -0,0 +1,506 @@
<?php
namespace app\common;
use think\Db;
use think\Cache;
use think\Queue;
use PHPMailer\PHPMailer\PHPMailer;
class PromotionService
{
private $logFile;
public function __construct()
{
$this->logFile = ROOT_PATH . 'runtime' . DS . 'promotion_task.log';
}
/**
* Process the next email in a promotion task (called by queue job)
*/
public function processNextEmail($taskId)
{
$task = Db::name('promotion_task')->where('task_id', $taskId)->find();
if (!$task) {
return ['done' => true, 'reason' => 'task_not_found'];
}
if ($task['state'] != 1) {
return ['done' => true, 'reason' => 'task_not_running', 'state' => $task['state']];
}
$currentHour = intval(date('G'));
if ($currentHour < $task['send_start_hour'] || $currentHour >= $task['send_end_hour']) {
$this->enqueueNextEmail($taskId, 300);
return ['done' => false, 'reason' => 'outside_send_window', 'retry_in' => 300];
}
if ($task['sent_count'] > 0 && $task['max_bounce_rate'] > 0) {
$bounceRate = ($task['bounce_count'] / $task['sent_count']) * 100;
if ($bounceRate >= $task['max_bounce_rate']) {
Db::name('promotion_task')->where('task_id', $taskId)->update([
'state' => 2,
'utime' => time(),
]);
$this->log("Task {$taskId} auto-paused: bounce rate {$bounceRate}% >= {$task['max_bounce_rate']}%");
return ['done' => true, 'reason' => 'auto_paused_bounce_rate', 'bounce_rate' => $bounceRate];
}
}
$logEntry = Db::name('promotion_email_log')
->where('task_id', $taskId)
->where('state', 0)
->order('log_id asc')
->find();
if (!$logEntry) {
Db::name('promotion_task')->where('task_id', $taskId)->update([
'state' => 3,
'utime' => time(),
]);
return ['done' => true, 'reason' => 'all_emails_processed'];
}
$expert = Db::name('expert')->where('expert_id', $logEntry['expert_id'])->find();
if (!$expert || $expert['state'] == 4 || $expert['state'] == 5) {
Db::name('promotion_email_log')->where('log_id', $logEntry['log_id'])->update([
'state' => 2,
'error_msg' => 'Expert invalid or deleted (state=' . (isset($expert['state']) ? $expert['state'] : 'null') . ')',
'send_time' => time(),
]);
Db::name('promotion_task')->where('task_id', $taskId)->setInc('fail_count');
Db::name('promotion_task')->where('task_id', $taskId)->update(['utime' => time()]);
$this->enqueueNextEmail($taskId, 2);
return ['done' => false, 'skipped' => $logEntry['email_to'], 'reason' => 'expert_invalid'];
}
$account = $this->pickSmtpAccountForTask($task);
if (!$account) {
$this->enqueueNextEmail($taskId, 600);
return ['done' => false, 'reason' => 'no_smtp_quota', 'retry_in' => 600];
}
// 优先使用预生成内容;无则现场渲染
$subject = '';
$body = '';
$hasPrepared = !empty($logEntry['subject_prepared']) && !empty($logEntry['body_prepared']);
if ($hasPrepared) {
$subject = $logEntry['subject_prepared'];
$body = $logEntry['body_prepared'];
} else {
$journal = Db::name('journal')->where('journal_id', $task['journal_id'])->find();
$expertVars = $this->buildExpertVars($expert);
$journalVars = $this->buildJournalVars($journal);
$vars = array_merge($journalVars, $expertVars);
$rendered = $this->renderFromTemplate(
$task['template_id'],
$task['journal_id'],
json_encode($vars, JSON_UNESCAPED_UNICODE),
$task['style_id']
);
if ($rendered['code'] !== 0) {
Db::name('promotion_email_log')->where('log_id', $logEntry['log_id'])->update([
'state' => 2,
'error_msg' => 'Template render failed: ' . $rendered['msg'],
'send_time' => time(),
]);
Db::name('promotion_task')->where('task_id', $taskId)->setInc('fail_count');
Db::name('promotion_task')->where('task_id', $taskId)->update(['utime' => time()]);
$this->enqueueNextEmail($taskId, 2);
return ['done' => false, 'failed' => $logEntry['email_to'], 'reason' => 'template_error'];
}
$subject = $rendered['data']['subject'];
$body = $rendered['data']['body'];
}
$result = $this->doSendEmail($account, $logEntry['email_to'], $subject, $body);
$now = time();
if ($result['status'] === 1) {
Db::name('promotion_email_log')->where('log_id', $logEntry['log_id'])->update([
'j_email_id' => $account['j_email_id'],
'subject' => mb_substr($subject, 0, 512),
'state' => 1,
'send_time' => $now,
]);
Db::name('journal_email')->where('j_email_id', $account['j_email_id'])->setInc('today_sent');
Db::name('expert')->where('expert_id', $expert['expert_id'])->update(['state' => 1, 'ltime' => $now]);
Db::name('promotion_task')->where('task_id', $taskId)->setInc('sent_count');
} else {
Db::name('promotion_email_log')->where('log_id', $logEntry['log_id'])->update([
'j_email_id' => $account['j_email_id'],
'subject' => mb_substr($subject, 0, 512),
'state' => 2,
'error_msg' => mb_substr($result['data'], 0, 512),
'send_time' => $now,
]);
Db::name('promotion_task')->where('task_id', $taskId)->setInc('fail_count');
}
Db::name('promotion_task')->where('task_id', $taskId)->update(['utime' => $now]);
$delay = rand(max(5, $task['min_interval']), max($task['min_interval'], $task['max_interval']));
$this->enqueueNextEmail($taskId, $delay);
return [
'done' => false,
'sent' => $result['status'] === 1,
'email' => $logEntry['email_to'],
'next_in' => $delay,
];
}
// ==================== 准备与触发(今日准备、明日发送) ====================
/**
* 为指定任务预生成所有待发邮件的 subject/body写入 log完成后将任务置为 state=5已准备
* @param int $taskId
* @return array ['prepared' => int, 'failed' => int, 'error' => string|null]
*/
public function prepareTask($taskId)
{
$task = Db::name('promotion_task')->where('task_id', $taskId)->find();
if (!$task) {
return ['prepared' => 0, 'failed' => 0, 'error' => 'task_not_found'];
}
if ($task['state'] != 0) {
return ['prepared' => 0, 'failed' => 0, 'error' => 'task_state_not_draft'];
}
$journal = Db::name('journal')->where('journal_id', $task['journal_id'])->find();
$logs = Db::name('promotion_email_log')
->where('task_id', $taskId)
->where('state', 0)
->where('prepared_at', 0)
->order('log_id asc')
->select();
$prepared = 0;
$failed = 0;
$now = time();
$journalVars = $this->buildJournalVars($journal);
foreach ($logs as $log) {
$expert = Db::name('expert')->where('expert_id', $log['expert_id'])->find();
if (!$expert) {
Db::name('promotion_email_log')->where('log_id', $log['log_id'])->update([
'state' => 2,
'error_msg' => 'Expert not found',
'send_time' => $now,
]);
$failed++;
continue;
}
$expertVars = $this->buildExpertVars($expert);
$vars = array_merge($journalVars, $expertVars);
$rendered = $this->renderFromTemplate(
$task['template_id'],
$task['journal_id'],
json_encode($vars, JSON_UNESCAPED_UNICODE),
$task['style_id']
);
if ($rendered['code'] !== 0) {
Db::name('promotion_email_log')->where('log_id', $log['log_id'])->update([
'state' => 2,
'error_msg' => 'Prepare failed: ' . $rendered['msg'],
'send_time' => $now,
]);
$failed++;
continue;
}
Db::name('promotion_email_log')->where('log_id', $log['log_id'])->update([
'subject_prepared' => mb_substr($rendered['data']['subject'], 0, 512),
'body_prepared' => $rendered['data']['body'],
'prepared_at' => $now,
]);
$prepared++;
}
Db::name('promotion_task')->where('task_id', $taskId)->update([
'state' => 5,
'utime' => $now,
]);
$this->log("prepareTask task_id={$taskId} prepared={$prepared} failed={$failed}");
return ['prepared' => $prepared, 'failed' => $failed, 'error' => null];
}
/**
* 为指定日期的任务批量预生成邮件(供定时任务调用,如每天 22:00 准备明天的)
* @param string $date Y-m-d如 2026-03-12
* @return array ['tasks' => int, 'prepared' => int, 'failed' => int, 'details' => []]
*/
public function prepareTasksForDate($date)
{
$tasks = Db::name('promotion_task')
->where('send_date', $date)
->where('state', 0)
->select();
$totalPrepared = 0;
$totalFailed = 0;
$details = [];
foreach ($tasks as $task) {
$ret = $this->prepareTask($task['task_id']);
$totalPrepared += $ret['prepared'];
$totalFailed += $ret['failed'];
$details[] = [
'task_id' => $task['task_id'],
'task_name' => $task['task_name'],
'prepared' => $ret['prepared'],
'failed' => $ret['failed'],
'error' => $ret['error'],
];
}
$this->log("prepareTasksForDate date={$date} tasks=" . count($tasks) . " prepared={$totalPrepared} failed={$totalFailed}");
return [
'tasks' => count($tasks),
'prepared' => $totalPrepared,
'failed' => $totalFailed,
'details' => $details,
];
}
/**
* 触发指定日期的已准备任务开始发送(供定时任务调用,如每天 8:00 触发今天的)
* 会先对 send_date=date 且 state=0 的任务做一次补准备,再启动所有 state=5 的任务
* @param string $date Y-m-d
* @return array ['prepared' => int, 'started' => int, 'task_ids' => []]
*/
public function startTasksForDate($date)
{
// 补准备:当天日期但尚未准备的任务(如 22:00 后创建)
$catchUpTasks = Db::name('promotion_task')
->where('send_date', $date)
->where('state', 0)
->select();
foreach ($catchUpTasks as $t) {
$this->prepareTask($t['task_id']);
}
$tasks = Db::name('promotion_task')
->where('send_date', $date)
->where('state', 5)
->select();
$started = 0;
$taskIds = [];
foreach ($tasks as $task) {
Db::name('promotion_task')->where('task_id', $task['task_id'])->update([
'state' => 1,
'utime' => time(),
]);
$this->enqueueNextEmail($task['task_id'], 0);
$started++;
$taskIds[] = $task['task_id'];
}
$this->log("startTasksForDate date={$date} started={$started} task_ids=" . implode(',', $taskIds));
return [
'prepared' => count($catchUpTasks),
'started' => $started,
'task_ids' => $taskIds,
];
}
// ==================== Queue ====================
public function enqueueNextEmail($taskId, $delay = 0)
{
$jobClass = 'app\api\job\PromotionSend@fire';
$data = ['task_id' => $taskId];
if ($delay > 0) {
Queue::later($delay, $jobClass, $data, 'promotion');
} else {
Queue::push($jobClass, $data, 'promotion');
}
}
// ==================== SMTP ====================
public function pickSmtpAccountForTask($task)
{
$journalId = $task['journal_id'];
$smtpIds = $task['smtp_ids'] ? array_map('intval', explode(',', $task['smtp_ids'])) : [];
$query = Db::name('journal_email')
->where('journal_id', $journalId)
->where('state', 0);
if (!empty($smtpIds)) {
$query->where('j_email_id', 'in', $smtpIds);
}
$accounts = $query->select();
if (empty($accounts)) {
return null;
}
$best = null;
$bestRemaining = -1;
foreach ($accounts as $acc) {
$this->resetDailyCountIfNeeded($acc);
$remaining = $acc['daily_limit'] - $acc['today_sent'];
if ($remaining > 0 && $remaining > $bestRemaining) {
$best = $acc;
$bestRemaining = $remaining;
}
}
return $best;
}
public function resetDailyCountIfNeeded(&$account)
{
$todayDate = date('Y-m-d');
$cacheKey = 'smtp_reset_' . $account['j_email_id'];
$lastReset = Cache::get($cacheKey);
if ($lastReset !== $todayDate) {
Db::name('journal_email')
->where('j_email_id', $account['j_email_id'])
->update(['today_sent' => 0]);
$account['today_sent'] = 0;
Cache::set($cacheKey, $todayDate, 86400);
}
}
public function doSendEmail($account, $toEmail, $subject, $htmlContent)
{
try {
$mail = new PHPMailer(true);
$mail->isSMTP();
$mail->SMTPDebug = 0;
$mail->CharSet = 'UTF-8';
$mail->Host = $account['smtp_host'];
$mail->Port = intval($account['smtp_port']);
$mail->SMTPAuth = true;
$mail->Username = $account['smtp_user'];
$mail->Password = $account['smtp_password'];
if ($account['smtp_encryption'] === 'ssl') {
$mail->SMTPSecure = 'ssl';
} elseif ($account['smtp_encryption'] === 'tls') {
$mail->SMTPSecure = 'tls';
} else {
$mail->SMTPSecure = false;
$mail->SMTPAutoTLS = false;
}
$fromName = !empty($account['smtp_from_name']) ? $account['smtp_from_name'] : $account['smtp_user'];
$mail->setFrom($account['smtp_user'], $fromName);
$mail->addReplyTo($account['smtp_user'], $fromName);
$mail->addAddress($toEmail);
$mail->isHTML(true);
$mail->Subject = $subject;
$mail->Body = $htmlContent;
$mail->AltBody = strip_tags($htmlContent);
$mail->send();
return ['status' => 1, 'data' => 'success'];
} catch (\Exception $e) {
return ['status' => 0, 'data' => $e->getMessage()];
}
}
// ==================== Template Rendering ====================
public function renderFromTemplate($templateId, $journalId, $varsJson, $styleId = 0)
{
$tpl = Db::name('mail_template')->where('template_id', $templateId)->where('journal_id', $journalId)->where('state', 0)->find();
if (!$tpl) {
return ['code' => 1, 'msg' => 'Template not found'];
}
$vars = [];
if ($varsJson) {
$decoded = json_decode($varsJson, true);
if (is_array($decoded)) $vars = $decoded;
}
$subject = $this->renderVars($tpl['subject'], $vars);
$body = $this->renderVars($tpl['body_html'], $vars);
$finalBody = $body;
if ($styleId) {
$style = Db::name('mail_style')->where('style_id', $styleId)->where('state', 0)->find();
if ($style) {
$header = $style['header_html'] ? $this->renderVars($style['header_html'],$vars):'';
$footer = $style['footer_html'] ? $this->renderVars($style['footer_html'],$vars): '';
$finalBody = $header . $body . $footer;
}
}
return ['code' => 0, 'msg' => 'success', 'data' => ['subject' => $subject, 'body' => $finalBody]];
}
public function buildExpertVars($expert)
{
return [
'expert_title' => "Ph.D",
'expert_name' => $expert['name'] ?? '',
'expert_email' => $expert['email'] ?? '',
'expert_affiliation' => $expert['affiliation'] ?? '',
'expert_field' => $expert['field'] ?? '',
];
}
public function buildJournalVars($journal)
{
if (!$journal) return [];
$zb = Db::name("board_to_journal")
->where("journal_id",$journal['journal_id'])
->where("state",0)
->where('type',0)
->find();
return [
'journal_name' => $journal['title'] ?? '',
'journal_abbr' => $journal['jabbr'] ?? '',
'journal_url' => $journal['website'] ?? '',
'journal_email' => $journal['email'] ?? '',
'indexing_databases' => $journal['databases'] ?? '',
'submission_url' => "https://submission.tmrjournals.com/",
'eic_name' => $zb['realname'] ?? '',
'editor_name' => $journal['editor_name'],
'special_support_deadline'=>date("Y-m-d",strtotime("+30 days"))
];
}
public function renderVars($tpl, $vars)
{
if (!is_string($tpl) || $tpl === '') return '';
if (!is_array($vars) || empty($vars)) return $tpl;
$replace = [];
foreach ($vars as $k => $v) {
$key = trim((string)$k);
if ($key === '') continue;
$replace['{{' . $key . '}}'] = (string)$v;
$replace['{' . $key . '}'] = (string)$v;
}
return str_replace(array_keys($replace), array_values($replace), $tpl);
}
// ==================== Logging ====================
public function log($msg)
{
$line = date('Y-m-d H:i:s') . ' ' . $msg . PHP_EOL;
@file_put_contents($this->logFile, $line, FILE_APPEND);
}
}

View File

@@ -0,0 +1,237 @@
<?php
namespace app\common;
/**
* PubMed 工具类E-utilities
*
* 功能:
* - DOI -> PMID
* - PMID -> 文章结构化信息title/abstract/mesh/publication_types/year/journal
*
* 说明:
* - 默认使用 runtime 文件缓存,避免重复请求 NCBI
*/
class PubmedService
{
private $base = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/';
private $timeout = 20;
private $tool = 'tmrjournals';
private $email = '';
public function __construct(array $config = [])
{
if (isset($config['base'])) $this->base = rtrim((string)$config['base'], '/') . '/';
if (isset($config['timeout'])) $this->timeout = max(5, intval($config['timeout']));
if (isset($config['tool'])) $this->tool = (string)$config['tool'];
if (isset($config['email'])) $this->email = (string)$config['email'];
}
/**
* DOI -> PMID优先用 [DOI],命中不到再用 [AID]
*/
public function doiToPmid(string $doi): ?string
{
$doi = trim($doi);
if ($doi === '') return null;
$cacheKey = 'doi2pmid_' . sha1(strtolower($doi));
$cached = $this->cacheGet($cacheKey, 30 * 86400);
if (is_string($cached) && $cached !== '') {
return $cached;
}
$pmid = $this->esearch($doi . '[DOI]');
if (!$pmid) {
$pmid = $this->esearch($doi . '[AID]');
}
if ($pmid) {
$this->cacheSet($cacheKey, $pmid);
return $pmid;
}
return null;
}
/**
* PMID -> 文章信息title/abstract/mesh/publication_types/year/journal
*/
public function fetchByPmid(string $pmid): ?array
{
$pmid = trim($pmid);
if ($pmid === '') return null;
$cacheKey = 'pmid_' . $pmid;
$cached = $this->cacheGet($cacheKey, 30 * 86400);
if (is_array($cached)) return $cached;
$url = $this->base . 'efetch.fcgi?' . http_build_query([
'db' => 'pubmed',
'id' => $pmid,
'retmode' => 'xml',
'tool' => $this->tool,
'email' => $this->email,
]);
$xml = $this->httpGet($url);
if (!is_string($xml) || trim($xml) === '') return null;
$data = $this->parseEfetchXml($xml);
if (!$data) return null;
$this->cacheSet($cacheKey, $data);
return $data;
}
/**
* DOI -> PubMed 信息(含 abstract/mesh
*/
public function fetchByDoi(string $doi): ?array
{
$pmid = $this->doiToPmid($doi);
if (!$pmid) return null;
$info = $this->fetchByPmid($pmid);
if (!$info) return null;
$info['pmid'] = $pmid;
$info['doi'] = $doi;
return $info;
}
// ----------------- Internals -----------------
private function esearch(string $term): ?string
{
$url = $this->base . 'esearch.fcgi?' . http_build_query([
'db' => 'pubmed',
'retmode' => 'json',
'retmax' => 1,
'term' => $term,
'tool' => $this->tool,
'email' => $this->email,
]);
$res = $this->httpGet($url);
$json = json_decode((string)$res, true);
$ids = $json['esearchresult']['idlist'] ?? [];
if (!empty($ids[0])) return (string)$ids[0];
return null;
}
private function parseEfetchXml(string $xml): ?array
{
libxml_use_internal_errors(true);
$doc = new \DOMDocument();
if (!$doc->loadXML($xml)) {
return null;
}
$xp = new \DOMXPath($doc);
$title = $this->xpText($xp, '//PubmedArticle//ArticleTitle');
$abstractParts = [];
$absNodes = $xp->query('//PubmedArticle//Abstract//AbstractText');
if ($absNodes) {
foreach ($absNodes as $n) {
$label = $n->attributes && $n->attributes->getNamedItem('Label')
? trim($n->attributes->getNamedItem('Label')->nodeValue)
: '';
$txt = trim($n->textContent);
if ($txt === '') continue;
$abstractParts[] = $label ? ($label . ': ' . $txt) : $txt;
}
}
$abstract = trim(implode("\n", $abstractParts));
$mesh = [];
$meshNodes = $xp->query('//PubmedArticle//MeshHeadingList//MeshHeading//DescriptorName');
if ($meshNodes) {
foreach ($meshNodes as $n) {
$t = trim($n->textContent);
if ($t !== '') $mesh[] = $t;
}
}
$mesh = array_values(array_unique($mesh));
$pubTypes = [];
$ptNodes = $xp->query('//PubmedArticle//PublicationTypeList//PublicationType');
if ($ptNodes) {
foreach ($ptNodes as $n) {
$t = trim($n->textContent);
if ($t !== '') $pubTypes[] = $t;
}
}
$pubTypes = array_values(array_unique($pubTypes));
$journal = $this->xpText($xp, '//PubmedArticle//Journal//Title');
$year = '';
$year = $this->xpText($xp, '//PubmedArticle//JournalIssue//PubDate//Year');
if ($year === '') {
$medlineDate = $this->xpText($xp, '//PubmedArticle//JournalIssue//PubDate//MedlineDate');
if (preg_match('/(19\\d{2}|20\\d{2})/', $medlineDate, $m)) {
$year = $m[1];
}
}
if ($title === '' && $abstract === '') {
return null;
}
return [
'title' => $title,
'abstract' => $abstract,
'mesh_terms' => $mesh,
'publication_types' => $pubTypes,
'journal' => $journal,
'year' => $year,
];
}
private function xpText(\DOMXPath $xp, string $query): string
{
$n = $xp->query($query);
if ($n && $n->length > 0) {
return trim($n->item(0)->textContent);
}
return '';
}
private function httpGet(string $url): string
{
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_HTTPHEADER, [
'User-Agent: TMRjournals-PubMed/1.0'
]);
$res = curl_exec($ch);
curl_close($ch);
return is_string($res) ? $res : '';
}
private function cacheDir(): string
{
return rtrim(ROOT_PATH, '/') . '/runtime/pubmed_cache';
}
private function cacheGet(string $key, int $ttlSeconds)
{
$file = $this->cacheDir() . '/' . $key . '.json';
if (!is_file($file)) return null;
$mtime = filemtime($file);
if (!$mtime || (time() - $mtime) > $ttlSeconds) return null;
$raw = @file_get_contents($file);
$decoded = json_decode((string)$raw, true);
return $decoded;
}
private function cacheSet(string $key, $value): void
{
$dir = $this->cacheDir();
if (!is_dir($dir)) @mkdir($dir, 0777, true);
$file = $dir . '/' . $key . '.json';
@file_put_contents($file, json_encode($value, JSON_UNESCAPED_UNICODE));
}
}

View File

@@ -172,8 +172,11 @@ return [
// 日志保存目录 // 日志保存目录
'path' => LOG_PATH, 'path' => LOG_PATH,
// 日志记录级别 // 日志记录级别
'level' => ['log'], 'level' => ['error', 'warning', 'info', 'notice', 'debug'],
"max_files"=>3, 'file_size' => 1024 * 1024 * 2, // 2MB
'max_files'=>30,
'apart_day' => true,
'format' => '[%s][%s] %s',
], ],
// +---------------------------------------------------------------------- // +----------------------------------------------------------------------