Compare commits

..

5 Commits

Author SHA1 Message Date
wangjinlei
20a68ddc8a 自动推广 2026-03-23 09:57:45 +08:00
wangjinlei
d1e0f43992 Merge remote-tracking branch 'origin/master' 2026-03-18 14:37:29 +08:00
wangjinlei
e3ec1b0ca1 自动推广 2026-03-18 14:37:23 +08:00
chengxl
e7bb34e11d 抓取是否撤稿 2026-03-17 11:04:11 +08:00
wangjinlei
65ba338a7d 自动推广 2026-03-13 17:19:34 +08:00
11 changed files with 3462 additions and 741 deletions

View File

@@ -0,0 +1,310 @@
<?php
namespace app\api\controller;
use app\api\controller\Base;
use think\Db;
class Crossrefdoi extends Base{
public function __construct(\think\Request $request = null) {
parent::__construct($request);
}
// 配置项
private $mailto; // 邮箱(提升优先级)
private $timeout = 15; // 请求超时(秒)
private $maxRetry = 2; // 单个DOI最大重试次数
private $crossrefUrl = "https://api.crossref.org/works/"; //接口地址
/**
* 批量查询DOI信息核心方法
* @param array $dois DOI列表
* @return array 包含所有DOI的查询结果
*/
public function get($aParam = []){
//获取参数
$aParam = empty($aParam) ? $this->request->post() : $aParam;
$iPReferId = empty($aParam['p_refer_id']) ? 0 : $aParam['p_refer_id'];
if(empty($iPReferId)){
return json_encode(['status' => 2,'msg' => 'Please select the reference to be queried']);
}
$aWhere = ['p_refer_id' => $iPReferId,'state' => ['in', [0,2]]];
$aRefer = Db::name('production_article_refer')->field('title,joura,author,refer_doi,doilink,cs')->where($aWhere)->find();
if(empty($aRefer['refer_doi'])){
return json_encode(['status' => 3,'msg' => 'The doi of the reference is empty']);
}
$finalResult = [];
$sDoi = empty($aRefer['refer_doi']) ? '' : $aRefer['refer_doi'];
$sCheckDoi = $this->filterValidDoi($sDoi); // 过滤非法DOI
if(empty($sCheckDoi)){
return json_encode(['status' => 4,'msg' => 'Doi does not comply with the rules']);
}
// 调用单DOI查询带重试
$aDoiInfo = $this->fetchSingleDoiWithRetry($sCheckDoi);
if (!$aDoiInfo) {
$result['status'] = 'fail';
$result['fail_reason'] = "请求失败(重试{$this->maxRetry}次后仍失败)";
}
// 提取核心字段
$sTitle = $this->getTitle($aDoiInfo);
//期刊信息
$aPublisher = $this->getPublisher($aDoiInfo);
$sJoura = empty($aPublisher['title']) ? $aPublisher['short_title'] : $aPublisher['title'];
//作者信息
$aAuthor = $this->getAuthors($aDoiInfo);
$sAuthor = empty($aAuthor) ? '' : implode(',', $aAuthor);
$sDateno = $this->getVolumeIssuePages($aDoiInfo);
// 识别撤稿状态
$aRetractInfo = $this->checkRetracted($aDoiInfo);
$bIsRetracted = 2;
if($aRetractInfo['is_retracted'] == true){
$bIsRetracted = 1;
}
$sRetractReason = empty($aRetractInfo['reason']) ? '' : $aRetractInfo['reason'];
//获取dolink
$sDolink = $this->getDolink($aDoiInfo);
$sDolink = empty($sDolink) ? 'https://doi.org/' . $sCheckDoi : $sDolink;
//数据处理更新数据库
$aUpdate = [];
if(!empty($sTitle) && empty($aRefer['title'])){
$aUpdate['title'] = $sTitle;
}
if(!empty($sJoura) && empty($aRefer['joura'])){
$aUpdate['joura'] = $sJoura;
}
if(!empty($sAuthor) && empty($aRefer['author'])){
$aUpdate['author'] = $sAuthor;
}
if(!empty($sDateno)){
$aUpdate['dateno'] = $sDateno;
}
if($bIsRetracted == 1){
$aUpdate['is_retracted'] = 1;
}
if(!empty($sDolink) && empty($aRefer['doilink'])){
$aUpdate['doilink'] = $sDolink;
}
if(empty($aUpdate)){
return json_encode(['status' => 5,'msg' => 'No update information available']);
}
$aUpdate['update_time'] = time();
$aUpdate['cs'] = 1;
$aWhere = ['p_refer_id' => $iPReferId];
$result = Db::name('production_article_refer')->where($aWhere)->limit(1)->update($aUpdate);
if($result === false){
return json_encode(['status' => 6,'msg' => 'Update failed-Cs']);
}
return json_encode(['status' => 1,'msg' => 'Update successful']);
}
/**
* 过滤非法DOI仅保留10.xxxx/xxx格式
*/
private function filterValidDoi($doi = ''){
$doi = trim($doi);
if (empty($doi)) return '';
// 正则匹配10. + 至少4位数字 + / + 任意字符
if (preg_match('/^10\.\d{4,}\/.+/', $doi)) {
return $doi;
}
return '';
}
/**
* 单DOI查询
*/
private function fetchSingleDoiWithRetry($doi){
$retryCount = 0;
while ($retryCount < $this->maxRetry) {
$url = $this->crossrefUrl. rawurlencode($doi) . "?mailto=" . rawurlencode($this->mailto);
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_HTTPHEADER, [
"User-Agent: DOI-Fetcher/1.0 (mailto:{$this->mailto})"
]);
$response = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
// 成功返回
if ($httpCode == 200) {
$data = json_decode($response, true);
return $data['status'] == 'ok' ? $data['message'] : null;
}
// 429速率限制延长等待后重试
if ($httpCode == 429) {
sleep(5);
$retryCount++;
continue;
}
$retryCount++;
sleep(1); // 普通失败1秒后重试
}
return null;
}
/**
* 提取标题
*/
private function getTitle($aDoiInfo = []){
return $aDoiInfo['title'][0] ?? '';
}
/**
* 提取出版社名
*/
private function getPublisher($aDoiInfo = []){
$aJournal = [
'title' => isset($aDoiInfo['container-title'][0]) ? $aDoiInfo['container-title'][0] : '',
'short_title'=> isset($aDoiInfo['short-container-title'][0]) ? $aDoiInfo['short-container-title'][0] : '',
'ISSN' => $aDoiInfo['ISSN'] ?? [],
'publisher' => $aDoiInfo['publisher'] ?? '',
];
return $aJournal;
}
/**
* 提取作者
*/
private function getAuthors($aDoiInfo = []){
$authors = [];
if (!empty($aDoiInfo['author'])) {
foreach ($aDoiInfo['author'] as $author) {
$name = $author['family'] ?? '';
if (!empty($author['given'])) {
$name = $author['given'] . ' ' . $name;
}
if (!empty($name)) {
$authors[] = $name;
}
}
}
return $authors;
}
/**
* 提取发表年份
*/
private function getPublishYear($aDoiInfo = []){
if (!empty($aDoiInfo['issued']['date-parts'][0][0])) {
return (string)$aDoiInfo['issued']['date-parts'][0][0];
}
return '';
}
/**
* 提取卷(期):起始页-终止页格式2024;10(2):100-120
*/
private function getVolumeIssuePages($aDoiInfo = []){
$parts = [];
// 年
$year = $this->getPublishYear($aDoiInfo);
if ($year) $parts[] = $year;
// 卷(期)
$volume = $aDoiInfo['volume'] ?? '';
$issue = $aDoiInfo['issue'] ?? '';
if ($volume) {
$volumeIssue = $volume . ($issue ? "({$issue})" : '');
$parts[] = $volumeIssue;
}
// 起始页-终止页
$pageStart = $aDoiInfo['page']['start'] ?? ($aDoiInfo['first-page'] ?? '');
$pageEnd = $aDoiInfo['page']['end'] ?? ($aDoiInfo['last-page'] ?? '');
$pages = '';
if ($pageStart) {
$pages = $pageStart . ($pageEnd ? "-{$pageEnd}" : '');
}else{
$pages = $aDoiInfo['page'] ?? '';
}
if ($pages) $parts[] = $pages;
return implode(':', $parts);
}
/**
* 识别撤稿文章
*/
private function checkRetracted($aDoiInfo = []){
$isRetracted = false;
$reason = "未撤稿";
// 1. 文章类型为撤稿声明type/subtype
$sType = strtolower($aDoiInfo['type'] ?? '');
$sSubtype = strtolower($aDoiInfo['subtype'] ?? '');
if (isset($sType) && in_array($sType, ['retraction', 'correction'])) {
$isRetracted = true;
$reason = "文章类型为{$sType}(撤稿/更正声明)";
}
if (isset($sSubtype) && in_array($sSubtype, ['retraction', 'correction'])) {
$isRetracted = true;
$reason = "文章类型为{$sSubtype}(撤稿/更正声明)";
}
// 2. update-type包含撤稿
if (isset($aDoiInfo['update-type']) && in_array('retraction', $aDoiInfo['update-type'])) {
$isRetracted = true;
$reason = "官方标记为撤稿update-type: retraction";
}
// 3. 关联撤稿文章
if (isset($aDoiInfo['relation']) && !empty($aDoiInfo['relation'])) {
foreach ($aDoiInfo['relation'] as $relType => $relItems) {
if (in_array($relType, ['is-retraction-of', 'corrects'])) {
$isRetracted = true;
$relatedDoi = $relItems[0]['id'] ?? '未知';
$reason = "关联撤稿文章{$relatedDoi}(关系:{$relType}";
break;
}
}
}
// 4. update-to 字段
if (isset($aDoiInfo['update-to']) && is_array($aDoiInfo['update-to'])) {
foreach ($aDoiInfo['update-to'] as $update) {
$updateType = strtolower($update['type'] ?? '');
$updateLabel = strtolower($update['label'] ?? '');
if (strpos($updateType, 'retract') !== false || strpos($updateLabel, 'retract') !== false) {
$isRetracted = true;
$retractionDetail['retraction_notice'] = [
'type' => $update['type'] ?? '',
'label' => $update['label'] ?? '',
'DOI' => $update['DOI'] ?? '',
'date' => isset($update['updated']) ? $this->parseDateParts($update['updated']) : '',
];
break;
}
}
}
//5.title 关键词
$aTitles = $aDoiInfo['title'] ?? [];
foreach ($aTitles as $value) {
$sTitleLower = strtolower($value);
if (strpos($sTitleLower, 'retraction') !== false || strpos($sTitleLower, 'retracted') !== false
|| strpos($sTitleLower, 'withdrawal') !== false || strpos($sTitleLower, 'withdrawn') !== false) {
$isRetracted = true;
$retractionDetail['title_keyword'] = $value;
break;
}
}
return [
'is_retracted' => $isRetracted,
'reason' => $reason
];
}
/**
* 识别doi链接
*/
private function getDolink($aDoiInfo = []){
return $aDoiInfo['URL'] ?? '';
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -4,31 +4,21 @@ namespace app\api\controller;
use think\Cache;
use think\Db;
use GuzzleHttp\Client;
use think\Validate;
use app\common\ExpertFinderService;
class ExpertFinder extends Base
{
private $httpClient;
private $ncbiBaseUrl = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/';
private $service;
public function __construct(\think\Request $request = null)
{
parent::__construct($request);
$this->httpClient = new Client([
'timeout' => 180,
'connect_timeout' => 15,
'verify' => false,
]);
$this->service = new ExpertFinderService();
}
/**
* Main search endpoint
* Params:
* keyword - search term (e.g. "biomedical engineering")
* page - page number, default 1
* per_page - articles per page, default 100, max 100
* min_year - earliest publication year, default current-3
* source - "pubmed" (fast, email from affiliation) or "pmc" (slower, structured email)
*/
public function search()
{
@@ -49,16 +39,12 @@ class ExpertFinder extends Base
}
try {
if ($source === 'pmc') {
$result = $this->searchViaPMC($keyword, $perPage, $minYear, $page);
} else {
$result = $this->searchViaPubMed($keyword, $perPage, $minYear, $page);
}
$result = $this->service->searchExperts($keyword, $perPage, $minYear, $page, $source);
} catch (\Exception $e) {
return jsonError('Search failed: ' . $e->getMessage());
}
$saveResult = $this->saveExperts($result['experts'], $keyword, $source);
$saveResult = $this->service->saveExperts($result['experts'], $keyword, $source);
$result['saved_new'] = $saveResult['inserted'];
$result['saved_exist'] = $saveResult['existing'];
@@ -69,16 +55,6 @@ class ExpertFinder extends Base
/**
* Get experts from local database
* Params:
* field - filter by field keyword (searches t_expert_field)
* major_id - filter by major_id (searches t_expert_field)
* state - filter by state (0-5), -1 for all
* keyword - search name/email/affiliation
* no_recent - if 1, exclude experts promoted within last N days (default 30)
* recent_days - days threshold for no_recent filter, default 30
* page - page number, default 1
* per_page - items per page, default 20
* min_experts - auto-fetch threshold, default 50
*/
public function getList()
{
@@ -171,9 +147,6 @@ class ExpertFinder extends Base
/**
* Update expert state
* Params:
* expert_id - single id or comma-separated ids
* state - new state (0-5)
*/
public function updateState()
{
@@ -195,9 +168,6 @@ class ExpertFinder extends Base
/**
* Delete expert (soft: set state=5 blacklist, or hard delete)
* Params:
* expert_id - single id or comma-separated ids
* hard - 1 for hard delete, default 0 (blacklist)
*/
public function deleteExpert()
{
@@ -221,7 +191,6 @@ class ExpertFinder extends Base
/**
* Export search results to Excel
* Same params as search(), exports current page results
*/
public function export()
{
@@ -240,11 +209,7 @@ class ExpertFinder extends Base
if (!$cached) {
try {
if ($source === 'pmc') {
$cached = $this->searchViaPMC($keyword, $perPage, $minYear, $page);
} else {
$cached = $this->searchViaPubMed($keyword, $perPage, $minYear, $page);
}
$cached = $this->service->searchExperts($keyword, $perPage, $minYear, $page, $source);
Cache::set($cacheKey, $cached, 3600);
} catch (\Exception $e) {
return jsonError('Search failed: ' . $e->getMessage());
@@ -276,16 +241,82 @@ class ExpertFinder extends Base
// ==================== Cron / Auto Fetch ====================
/**
* Daily cron: auto-fetch experts for every journal's fields via queue
*/
public function dailyFetchAll()
{
$journalId = intval($this->request->param('journal_id', 0));
$perPage = max(10, intval($this->request->param('per_page', 200)));
$source = $this->request->param('source', 'pubmed');
$minYear = intval($this->request->param('min_year', date('Y') - 3));
if ($journalId) {
$journals = Db::name('journal')->field("journal_id,issn,title")->where('journal_id', $journalId)->select();
} else {
$journals = Db::name('journal')->field("journal_id,issn,title")->where('state', 0)->select();
}
if (empty($journals)) {
return jsonSuccess(['msg' => 'No active journals found', 'queued' => 0]);
}
$queued = 0;
$skipped = 0;
$details = [];
$todayStart = strtotime(date('Y-m-d'));
foreach ($journals as $journal) {
$issn = trim($journal['issn'] ?? '');
if (empty($issn)) continue;
$majors = Db::name('major_to_journal')
->alias('mtj')
->join('t_major m', 'm.major_id = mtj.major_id', 'left')
->where('mtj.journal_issn', $issn)
->where('mtj.mtj_state', 0)
->where("m.pid", "<>", 0)
->where('m.major_state', 0)
->column('m.major_title');
$majors = array_unique(array_filter($majors));
if (empty($majors)) continue;
foreach ($majors as $keyword) {
$keyword = trim($keyword);
if (empty($keyword)) continue;
$fetchLog = $this->service->getFetchLog($keyword, $source);
if ($fetchLog['last_time'] >= $todayStart) {
$skipped++;
continue;
}
$delay = $queued * 10;
\think\Queue::later($delay, 'app\api\job\FetchExperts@fire', [
'field' => $keyword,
'source' => $source,
'per_page' => $perPage,
'min_year' => $minYear,
'journal_id' => $journal['journal_id'],
], 'FetchExperts');
$queued++;
$details[] = [
'journal' => $journal['title'] ?? $journal['journal_id'],
'keyword' => $keyword,
'delay_s' => $delay,
];
}
}
return jsonSuccess([
'queued' => $queued,
'skipped' => $skipped,
'details' => $details,
]);
}
/**
* Cron job: daily fetch experts for given keywords
* Params:
* keywords - comma-separated keywords (e.g. "biomedical engineering,tissue engineering")
* source - pubmed or pmc, default pubmed
* per_page - articles per page, default 100
* min_year - default current-3
*
* Uses cache to remember which page was last fetched per keyword,
* so each cron run fetches the next page automatically.
*/
public function cronFetch()
{
@@ -303,39 +334,14 @@ class ExpertFinder extends Base
$report = [];
foreach ($keywords as $kw) {
if (empty($kw)) {
continue;
}
$fetchLog = $this->getFetchLog($kw, $source);
$page = $fetchLog['last_page'] + 1;
if (empty($kw)) continue;
try {
if ($source === 'pmc') {
$result = $this->searchViaPMC($kw, $perPage, $minYear, $page);
} else {
$result = $this->searchViaPubMed($kw, $perPage, $minYear, $page);
}
$saveResult = $this->saveExperts($result['experts'], $kw, $source);
$nextPage = $result['has_more'] ? $page : 0;
$totalPages = isset($result['total_pages']) ? $result['total_pages'] : 0;
$this->updateFetchLog($kw, $source, $nextPage, $totalPages);
$report[] = [
'keyword' => $kw,
'page' => $page,
'experts_found' => $result['total'],
'saved_new' => $saveResult['inserted'],
'saved_exist' => $saveResult['existing'],
'field_enriched' => $saveResult['field_enriched'],
'has_more' => $result['has_more'],
];
$result = $this->service->doFetchForField($kw, $source, $perPage, $minYear);
$report[] = $result;
} catch (\Exception $e) {
$report[] = [
'keyword' => $kw,
'page' => $page,
'error' => $e->getMessage(),
];
}
@@ -357,7 +363,7 @@ class ExpertFinder extends Base
}
Cache::set($lockKey, 1, 300);
\think\Queue::push('app\api\job\FetchExperts', [
\think\Queue::push('app\api\job\FetchExperts@fire', [
'field' => $field,
'source' => 'pubmed',
'per_page' => 100,
@@ -365,459 +371,17 @@ class ExpertFinder extends Base
], 'FetchExperts');
}
/**
* Internal method: run a fetch for a single keyword (used by both cron and queue job)
*/
public function doFetchForField($field, $source = 'pubmed', $perPage = 100, $minYear = null)
public function mytest()
{
if ($minYear === null) {
$minYear = date('Y') - 3;
}
$fetchLog = $this->getFetchLog($field, $source);
$page = $fetchLog['last_page'] + 1;
if ($source === 'pmc') {
$result = $this->searchViaPMC($field, $perPage, $minYear, $page);
} else {
$result = $this->searchViaPubMed($field, $perPage, $minYear, $page);
}
$saveResult = $this->saveExperts($result['experts'], $field, $source);
$nextPage = $result['has_more'] ? $page : 0;
$totalPages = isset($result['total_pages']) ? $result['total_pages'] : 0;
$this->updateFetchLog($field, $source, $nextPage, $totalPages);
return [
'keyword' => $field,
'page' => $page,
'experts_found' => $result['total'],
'saved_new' => $saveResult['inserted'],
'saved_exist' => $saveResult['existing'],
'field_enriched' => $saveResult['field_enriched'],
'has_more' => $result['has_more'],
];
}
// ==================== PubMed Search ====================
private function searchViaPubMed($keyword, $perPage, $minYear, $page = 1)
{
set_time_limit(600);
$searchResult = $this->esearch('pubmed', $keyword, $perPage, $minYear, $page);
$ids = $searchResult['ids'];
$totalArticles = $searchResult['total'];
if (empty($ids)) {
return $this->buildPagedResult([], 0, 0, $totalArticles, $page, $perPage, 'pubmed');
}
$allAuthors = [];
$batches = array_chunk($ids, 50);
foreach ($batches as $batch) {
$xml = $this->efetchWithRetry('pubmed', $batch);
if ($xml) {
$authors = $this->parsePubMedXml($xml);
$allAuthors = array_merge($allAuthors, $authors);
}
usleep(400000);
}
$experts = $this->aggregateExperts($allAuthors);
return $this->buildPagedResult($experts, count($experts), count($ids), $totalArticles, $page, $perPage, 'pubmed');
}
// ==================== PMC Search ====================
private function searchViaPMC($keyword, $perPage, $minYear, $page = 1)
{
set_time_limit(600);
$searchResult = $this->esearch('pmc', $keyword, $perPage, $minYear, $page);
$ids = $searchResult['ids'];
$totalArticles = $searchResult['total'];
if (empty($ids)) {
return $this->buildPagedResult([], 0, 0, $totalArticles, $page, $perPage, 'pmc');
}
$allAuthors = [];
$batches = array_chunk($ids, 5);
foreach ($batches as $batch) {
$xml = $this->efetchWithRetry('pmc', $batch);
if ($xml) {
$authors = $this->parsePMCXml($xml);
$allAuthors = array_merge($allAuthors, $authors);
}
usleep(500000);
}
$experts = $this->aggregateExperts($allAuthors);
return $this->buildPagedResult($experts, count($experts), count($ids), $totalArticles, $page, $perPage, 'pmc');
}
// ==================== NCBI API Calls ====================
private function esearch($db, $keyword, $perPage, $minYear, $page = 1)
{
$term = $keyword . ' AND ' . $minYear . ':' . date('Y') . '[pdat]';
$retstart = ($page - 1) * $perPage;
$response = $this->httpClient->get($this->ncbiBaseUrl . 'esearch.fcgi', [
'query' => [
'db' => $db,
'term' => $term,
'retstart' => $retstart,
'retmax' => $perPage,
'retmode' => 'json',
'sort' => 'relevance',
],
$data = $this->request->post();
$rule = new Validate([
"field" => "require"
]);
$data = json_decode($response->getBody()->getContents(), true);
$ids = $data['esearchresult']['idlist'] ?? [];
$total = intval($data['esearchresult']['count'] ?? 0);
return ['ids' => $ids, 'total' => $total];
}
private function efetch($db, $ids)
{
$response = $this->httpClient->post($this->ncbiBaseUrl . 'efetch.fcgi', [
'form_params' => [
'db' => $db,
'id' => implode(',', $ids),
'retmode' => 'xml',
],
]);
return $response->getBody()->getContents();
}
private function efetchWithRetry($db, $ids, $maxRetries = 3)
{
for ($attempt = 1; $attempt <= $maxRetries; $attempt++) {
try {
return $this->efetch($db, $ids);
} catch (\Exception $e) {
if ($attempt === $maxRetries) {
if (count($ids) > 1) {
$half = ceil(count($ids) / 2);
$firstHalf = array_slice($ids, 0, $half);
$secondHalf = array_slice($ids, $half);
$xml1 = $this->efetchWithRetry($db, $firstHalf, 2);
$xml2 = $this->efetchWithRetry($db, $secondHalf, 2);
return $this->mergeXml($xml1, $xml2);
}
return null;
}
sleep($attempt * 2);
}
if (!$rule->check($data)) {
return jsonError($rule->getError());
}
return null;
}
private function mergeXml($xml1, $xml2)
{
if (empty($xml1)) return $xml2;
if (empty($xml2)) return $xml1;
return $xml1 . "\n" . $xml2;
}
// ==================== PubMed XML Parsing ====================
private function parsePubMedXml($xmlString)
{
$results = [];
libxml_use_internal_errors(true);
$xml = simplexml_load_string($xmlString);
if ($xml === false) {
return $results;
}
foreach ($xml->PubmedArticle as $article) {
$citation = $article->MedlineCitation;
$articleData = $citation->Article;
$title = $this->xmlNodeToString($articleData->ArticleTitle);
$pmid = (string) $citation->PMID;
$journal = '';
if (isset($articleData->Journal->Title)) {
$journal = (string) $articleData->Journal->Title;
}
if (!isset($articleData->AuthorList->Author)) {
continue;
}
foreach ($articleData->AuthorList->Author as $author) {
$lastName = (string) ($author->LastName ?? '');
$foreName = (string) ($author->ForeName ?? '');
$fullName = trim($foreName . ' ' . $lastName);
if (empty($fullName)) {
continue;
}
$email = '';
$affiliation = '';
if (isset($author->AffiliationInfo)) {
foreach ($author->AffiliationInfo as $affInfo) {
$affText = (string) $affInfo->Affiliation;
if (empty($affiliation)) {
$affiliation = $affText;
}
if (empty($email)) {
$email = $this->extractEmailFromText($affText);
}
}
}
if (empty($email)) {
continue;
}
$results[] = [
'name' => $fullName,
'email' => strtolower($email),
'affiliation' => $this->cleanAffiliation($affiliation),
'article_title' => $title,
'article_id' => $pmid,
'journal' => $journal,
];
}
}
return $results;
}
// ==================== PMC XML Parsing ====================
private function parsePMCXml($xmlString)
{
$results = [];
libxml_use_internal_errors(true);
$xml = simplexml_load_string($xmlString);
if ($xml === false) {
return $results;
}
$articles = $xml->article ?? $xml->children();
foreach ($articles as $article) {
if ($article->getName() !== 'article') {
continue;
}
$front = $article->front;
if (!$front) {
continue;
}
$articleMeta = $front->{'article-meta'};
if (!$articleMeta) {
continue;
}
$title = $this->xmlNodeToString($articleMeta->{'title-group'}->{'article-title'} ?? null);
$pmcId = '';
if (isset($articleMeta->{'article-id'})) {
foreach ($articleMeta->{'article-id'} as $idNode) {
if ((string) $idNode['pub-id-type'] === 'pmc') {
$pmcId = (string) $idNode;
}
}
}
$journal = '';
if (isset($front->{'journal-meta'}->{'journal-title'})) {
$journal = (string) $front->{'journal-meta'}->{'journal-title'};
} elseif (isset($front->{'journal-meta'}->{'journal-title-group'}->{'journal-title'})) {
$journal = (string) $front->{'journal-meta'}->{'journal-title-group'}->{'journal-title'};
}
$correspEmails = [];
if (isset($articleMeta->{'author-notes'})) {
$this->extractEmailsFromNode($articleMeta->{'author-notes'}, $correspEmails);
}
$affiliationMap = [];
if (isset($articleMeta->{'contrib-group'})) {
foreach ($articleMeta->{'contrib-group'}->children() as $child) {
if ($child->getName() === 'aff') {
$affId = (string) ($child['id'] ?? '');
$affText = $this->xmlNodeToString($child);
if ($affId) {
$affiliationMap[$affId] = $affText;
}
}
}
}
if (isset($front->{'article-meta'}->{'aff'})) {
foreach ($front->{'article-meta'}->{'aff'} as $aff) {
$affId = (string) ($aff['id'] ?? '');
$affText = $this->xmlNodeToString($aff);
if ($affId) {
$affiliationMap[$affId] = $affText;
}
}
}
if (!isset($articleMeta->{'contrib-group'})) {
continue;
}
foreach ($articleMeta->{'contrib-group'}->contrib as $contrib) {
$contribType = (string) ($contrib['contrib-type'] ?? '');
if ($contribType !== 'author') {
continue;
}
$nameNode = $contrib->name;
if (!$nameNode) {
continue;
}
$surname = (string) ($nameNode->surname ?? '');
$givenNames = (string) ($nameNode->{'given-names'} ?? '');
$fullName = trim($givenNames . ' ' . $surname);
if (empty($fullName)) {
continue;
}
$email = '';
if (isset($contrib->email)) {
$email = strtolower(trim((string) $contrib->email));
}
$affiliation = '';
if (isset($contrib->xref)) {
foreach ($contrib->xref as $xref) {
if ((string) $xref['ref-type'] === 'aff') {
$rid = (string) $xref['rid'];
if (isset($affiliationMap[$rid])) {
$affiliation = $affiliationMap[$rid];
break;
}
}
}
}
if (empty($affiliation) && isset($contrib->aff)) {
$affiliation = $this->xmlNodeToString($contrib->aff);
}
$isCorresponding = false;
if (isset($contrib->xref)) {
foreach ($contrib->xref as $xref) {
if ((string) $xref['ref-type'] === 'corresp') {
$isCorresponding = true;
}
}
}
if ((string) ($contrib['corresp'] ?? '') === 'yes') {
$isCorresponding = true;
}
if (empty($email) && $isCorresponding && !empty($correspEmails)) {
$email = $correspEmails[0];
}
if (empty($email)) {
$extracted = $this->extractEmailFromText($affiliation);
if ($extracted) {
$email = $extracted;
}
}
if (empty($email)) {
continue;
}
$results[] = [
'name' => $fullName,
'email' => strtolower($email),
'affiliation' => $this->cleanAffiliation($affiliation),
'article_title' => $title,
'article_id' => $pmcId,
'journal' => $journal,
];
}
}
return $results;
}
// ==================== Pagination ====================
private function buildPagedResult($experts, $expertCount, $articlesScanned, $totalArticles, $page, $perPage, $source)
{
$totalPages = $totalArticles > 0 ? ceil($totalArticles / $perPage) : 0;
return [
'experts' => $experts,
'total' => $expertCount,
'articles_scanned' => $articlesScanned,
'total_articles' => $totalArticles,
'page' => $page,
'per_page' => $perPage,
'total_pages' => $totalPages,
'has_more' => $page < $totalPages,
'source' => $source,
];
}
// ==================== Aggregation ====================
private function aggregateExperts($authorRecords)
{
$map = [];
foreach ($authorRecords as $record) {
$key = strtolower(trim($record['email']));
if (empty($key)) {
continue;
}
if (!isset($map[$key])) {
$map[$key] = [
'name' => $record['name'],
'email' => $record['email'],
'affiliation' => $record['affiliation'],
'paper_count' => 0,
'papers' => [],
];
}
$map[$key]['paper_count']++;
if (count($map[$key]['papers']) < 10) {
$map[$key]['papers'][] = [
'title' => $record['article_title'],
'article_id' => $record['article_id'],
'journal' => $record['journal'],
];
}
if (empty($map[$key]['affiliation']) && !empty($record['affiliation'])) {
$map[$key]['affiliation'] = $record['affiliation'];
}
}
$experts = array_values($map);
usort($experts, function ($a, $b) {
return $b['paper_count'] - $a['paper_count'];
});
return $experts;
$res = $this->service->doFetchForField($data['field'], "pubmed", 100, date('Y') - 3);
return jsonSuccess($res);
}
// ==================== Excel Export ====================
@@ -880,188 +444,4 @@ class ExpertFinder extends Base
'count' => count($experts),
]);
}
// ==================== Database Storage ====================
private function saveExperts($experts, $field, $source)
{
$inserted = 0;
$existing = 0;
$fieldEnrich = 0;
foreach ($experts as $expert) {
$email = strtolower(trim($expert['email']));
if (empty($email)) {
continue;
}
$exists = Db::name('expert')->where('email', $email)->find();
if ($exists) {
$existing++;
$fieldEnrich += $this->enrichExpertField($exists['expert_id'], $field);
continue;
}
$insert = [
'name' => mb_substr($expert['name'], 0, 255),
'email' => mb_substr($email, 0, 128),
'affiliation' => mb_substr($expert['affiliation'], 0, 128),
'source' => mb_substr($source, 0, 128),
'ctime' => time(),
'ltime' => 0,
'state' => 0,
];
try {
$expertId = Db::name('expert')->insertGetId($insert);
$this->enrichExpertField($expertId, $field);
$inserted++;
} catch (\Exception $e) {
$existing++;
}
}
return ['inserted' => $inserted, 'existing' => $existing, 'field_enriched' => $fieldEnrich];
}
// ==================== Fetch Log (t_expert_fetch) ====================
private function getFetchLog($field, $source)
{
$log = Db::name('expert_fetch')
->where('field', $field)
->where('source', $source)
->find();
if (!$log) {
return ['last_page' => 0, 'total_pages' => 0, 'last_time' => 0];
}
return $log;
}
private function updateFetchLog($field, $source, $lastPage, $totalPages)
{
$exists = Db::name('expert_fetch')
->where('field', $field)
->where('source', $source)
->find();
if ($exists) {
Db::name('expert_fetch')
->where('expert_fetch_id', $exists['expert_fetch_id'])
->update([
'last_page' => $lastPage,
'total_pages' => $totalPages,
'last_time' => time(),
]);
} else {
Db::name('expert_fetch')->insert([
'field' => mb_substr($field, 0, 128),
'source' => mb_substr($source, 0, 128),
'last_page' => $lastPage,
'total_pages' => $totalPages,
'last_time' => time(),
]);
}
}
private function enrichExpertField($expertId, $field)
{
$field = trim($field);
if (empty($field)) {
return 0;
}
$exists = Db::name('expert_field')
->where('expert_id', $expertId)
->where('field', $field)
->where('state', 0)
->find();
if ($exists) {
return 0;
}
Db::name('expert_field')->insert([
'expert_id' => $expertId,
'major_id' => 0,
'field' => mb_substr($field, 0, 128),
'state' => 0,
]);
return 1;
}
// ==================== Helper Methods ====================
private function extractEmailFromText($text)
{
if (empty($text)) {
return '';
}
if (preg_match('/[Ee]lectronic address:\s*([^\s;,]+@[^\s;,]+)/', $text, $m)) {
return strtolower(trim($m[1], '.'));
}
if (preg_match('/[Ee]-?mail:\s*([^\s;,]+@[^\s;,]+)/', $text, $m)) {
return strtolower(trim($m[1], '.'));
}
if (preg_match('/\b([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})\b/', $text, $m)) {
return strtolower(trim($m[1], '.'));
}
return '';
}
private function extractEmailsFromNode($node, &$emails)
{
if ($node === null) {
return;
}
foreach ($node->children() as $child) {
if ($child->getName() === 'email') {
$email = strtolower(trim((string) $child));
if (!empty($email) && !in_array($email, $emails)) {
$emails[] = $email;
}
}
$this->extractEmailsFromNode($child, $emails);
}
$text = (string) $node;
if (preg_match_all('/\b([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})\b/', $text, $matches)) {
foreach ($matches[1] as $email) {
$email = strtolower(trim($email, '.'));
if (!in_array($email, $emails)) {
$emails[] = $email;
}
}
}
}
private function cleanAffiliation($text)
{
$text = preg_replace('/\s*[Ee]lectronic address:\s*[^\s;,]+@[^\s;,]+/', '', $text);
$text = preg_replace('/\s*[Ee]-?mail:\s*[^\s;,]+@[^\s;,]+/', '', $text);
$text = preg_replace('/\s*\b[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}\b/', '', $text);
$text = trim($text, " \t\n\r\0\x0B.,;");
return $text;
}
private function xmlNodeToString($node)
{
if ($node === null) {
return '';
}
$xml = $node->asXML();
$text = strip_tags($xml);
$text = html_entity_decode($text, ENT_QUOTES | ENT_XML1, 'UTF-8');
return trim(preg_replace('/\s+/', ' ', $text));
}
}

View File

@@ -0,0 +1,493 @@
<?php
namespace app\api\controller;
use think\Db;
class ExpertManage extends Base
{
private $stateMap = [
0 => '待联系',
1 => '已发邮件',
2 => '已回复',
3 => '已投稿',
4 => '退信/无效',
5 => '黑名单(退订)',
];
public function __construct(\think\Request $request = null)
{
parent::__construct($request);
}
/**
* 专家列表(支持多条件筛选 + 分页)
*
* 参数:
* keyword - 搜索姓名/邮箱/单位
* field - 按领域关键词筛选
* major_id - 按学科ID筛选
* state - 状态筛选 (0-5, 传-1或不传则不过滤)
* source - 来源筛选
* pageIndex - 页码 (默认1)
* pageSize - 每页条数 (默认20)
*/
public function getList()
{
$data = $this->request->param();
$keyword = trim(isset($data['keyword']) ? $data['keyword'] : '');
$field = trim(isset($data['field']) ? $data['field'] : '');
$majorId = intval(isset($data['major_id']) ? $data['major_id'] : 0);
$state = isset($data['state']) ? $data['state'] : '-1';
$source = trim(isset($data['source']) ? $data['source'] : '');
$page = max(1, intval(isset($data['pageIndex']) ? $data['pageIndex'] : 1));
$pageSize = max(1, intval(isset($data['pageSize']) ? $data['pageSize'] : 20));
$query = Db::name('expert')->alias('e');
$needJoin = ($field !== '' || $majorId > 0);
if ($needJoin) {
$query->join('t_expert_field ef', 'ef.expert_id = e.expert_id AND ef.state = 0', 'inner');
if ($field !== '') {
$query->where('ef.field', 'like', '%' . $field . '%');
}
if ($majorId > 0) {
$query->where('ef.major_id', $majorId);
}
$query->group('e.expert_id');
}
if ($state !== '-1' && $state !== '') {
$query->where('e.state', intval($state));
}
if ($keyword !== '') {
$query->where('e.name|e.email|e.affiliation', 'like', '%' . $keyword . '%');
}
if ($source !== '') {
$query->where('e.source', $source);
}
$countQuery = clone $query;
$total = $countQuery->distinct('e.expert_id')->count();
$list = $query
->field('e.*')
->order('e.ctime desc')
->page($page, $pageSize)
->select();
foreach ($list as &$item) {
$item['fields'] = Db::name('expert_field')
->where('expert_id', $item['expert_id'])
->where('state', 0)
->select();
$item['state_text'] = isset($this->stateMap[$item['state']]) ? $this->stateMap[$item['state']] : '未知';
$item['ctime_text'] = $item['ctime'] ? date('Y-m-d H:i:s', $item['ctime']) : '';
$item['ltime_text'] = $item['ltime'] ? date('Y-m-d H:i:s', $item['ltime']) : '';
}
return jsonSuccess([
'list' => $list,
'total' => $total,
'pageIndex' => $page,
'pageSize' => $pageSize,
'totalPages' => $total > 0 ? ceil($total / $pageSize) : 0,
]);
}
/**
* 获取专家详情(含所有领域)
*/
public function getDetail()
{
$expertId = intval($this->request->param('expert_id', 0));
if (!$expertId) {
return jsonError('expert_id is required');
}
$expert = Db::name('expert')->where('expert_id', $expertId)->find();
if (!$expert) {
return jsonError('专家不存在');
}
$expert['fields'] = Db::name('expert_field')
->where('expert_id', $expertId)
->where('state', 0)
->select();
$expert['state_text'] = isset($this->stateMap[$expert['state']]) ? $this->stateMap[$expert['state']] : '未知';
$expert['ctime_text'] = $expert['ctime'] ? date('Y-m-d H:i:s', $expert['ctime']) : '';
$expert['ltime_text'] = $expert['ltime'] ? date('Y-m-d H:i:s', $expert['ltime']) : '';
return jsonSuccess($expert);
}
/**
* 添加专家
*/
public function addExpert()
{
$data = $this->request->post();
$name = trim(isset($data['name']) ? $data['name'] : '');
$email = trim(isset($data['email']) ? $data['email'] : '');
if ($name === '' || $email === '') {
return jsonError('name和email不能为空');
}
$exists = Db::name('expert')->where('email', $email)->find();
if ($exists) {
return jsonError('该邮箱已存在expert_id=' . $exists['expert_id']);
}
$insert = [
'name' => $name,
'email' => $email,
'affiliation' => trim(isset($data['affiliation']) ? $data['affiliation'] : ''),
'source' => trim(isset($data['source']) ? $data['source'] : 'manual'),
'ctime' => time(),
'ltime' => 0,
'state' => 0,
];
$expertId = Db::name('expert')->insertGetId($insert);
if (!empty($data['fields'])) {
$this->saveExpertFields($expertId, $data['fields']);
}
return jsonSuccess(['expert_id' => $expertId]);
}
/**
* 编辑专家
*/
public function editExpert()
{
$data = $this->request->post();
$expertId = intval(isset($data['expert_id']) ? $data['expert_id'] : 0);
if (!$expertId) {
return jsonError('expert_id is required');
}
$expert = Db::name('expert')->where('expert_id', $expertId)->find();
if (!$expert) {
return jsonError('专家不存在');
}
$update = [];
if (isset($data['name'])) $update['name'] = trim($data['name']);
if (isset($data['email'])) $update['email'] = trim($data['email']);
if (isset($data['affiliation'])) $update['affiliation'] = trim($data['affiliation']);
if (isset($data['source'])) $update['source'] = trim($data['source']);
if (isset($data['state'])) $update['state'] = intval($data['state']);
if (!empty($update)) {
Db::name('expert')->where('expert_id', $expertId)->update($update);
}
if (isset($data['fields'])) {
Db::name('expert_field')->where('expert_id', $expertId)->where('state', 0)->update(['state' => 1]);
if (!empty($data['fields'])) {
$this->saveExpertFields($expertId, $data['fields']);
}
}
return jsonSuccess(['expert_id' => $expertId]);
}
/**
* 批量修改状态
*
* 参数:
* expert_ids - 逗号分隔的ID列表 "1,2,3"
* state - 目标状态 0-5
*/
public function updateState()
{
$data = $this->request->post();
$expertIds = isset($data['expert_ids']) ? $data['expert_ids'] : '';
$state = intval(isset($data['state']) ? $data['state'] : -1);
if (empty($expertIds)) {
return jsonError('expert_ids is required');
}
if ($state < 0 || $state > 5) {
return jsonError('state取值范围0-5');
}
$ids = array_map('intval', explode(',', $expertIds));
$count = Db::name('expert')->where('expert_id', 'in', $ids)->update(['state' => $state]);
return jsonSuccess(['updated' => $count]);
}
/**
* 删除专家软删除设为黑名单状态5
*
* 参数:
* expert_ids - 逗号分隔的ID列表
* hard - 传1则物理删除
*/
public function deleteExpert()
{
$data = $this->request->post();
$expertIds = isset($data['expert_ids']) ? $data['expert_ids'] : '';
$hard = intval(isset($data['hard']) ? $data['hard'] : 0);
if (empty($expertIds)) {
return jsonError('expert_ids is required');
}
$ids = array_map('intval', explode(',', $expertIds));
if ($hard) {
Db::name('expert_field')->where('expert_id', 'in', $ids)->delete();
$count = Db::name('expert')->where('expert_id', 'in', $ids)->delete();
} else {
$count = Db::name('expert')->where('expert_id', 'in', $ids)->update(['state' => 5]);
}
return jsonSuccess(['affected' => $count]);
}
/**
* 给专家添加领域
*/
public function addField()
{
$data = $this->request->post();
$expertId = intval(isset($data['expert_id']) ? $data['expert_id'] : 0);
$majorId = intval(isset($data['major_id']) ? $data['major_id'] : 0);
$field = trim(isset($data['field']) ? $data['field'] : '');
if (!$expertId || $field === '') {
return jsonError('expert_id和field不能为空');
}
$exists = Db::name('expert_field')
->where('expert_id', $expertId)
->where('field', $field)
->where('state', 0)
->find();
if ($exists) {
return jsonError('该领域已存在');
}
$id = Db::name('expert_field')->insertGetId([
'expert_id' => $expertId,
'major_id' => $majorId,
'field' => $field,
'state' => 0,
]);
return jsonSuccess(['expert_field_id' => $id]);
}
/**
* 删除领域(软删除)
*/
public function removeField()
{
$efId = intval($this->request->param('expert_field_id', 0));
if (!$efId) {
return jsonError('expert_field_id is required');
}
Db::name('expert_field')->where('expert_field_id', $efId)->update(['state' => 1]);
return jsonSuccess([]);
}
/**
* 获取所有不重复的领域列表(用于筛选下拉框)
*/
public function getFieldOptions()
{
$list = Db::name('expert_field')
->where('state', 0)
->group('field')
->column('field');
return jsonSuccess($list);
}
/**
* 获取所有来源列表(用于筛选下拉框)
*/
public function getSourceOptions()
{
$list = Db::name('expert')
->where('source', '<>', '')
->group('source')
->column('source');
return jsonSuccess($list);
}
/**
* 导出某个领域的专家为Excel
*
* 参数:
* field - 领域关键词(必填)
* major_id - 学科ID可选
* state - 状态筛选(可选,默认不过滤)
* keyword - 搜索姓名/邮箱/单位
* source - 来源筛选
*/
public function exportExcel()
{
$data = $this->request->param();
$field = trim(isset($data['field']) ? $data['field'] : '');
$majorId = intval(isset($data['major_id']) ? $data['major_id'] : 0);
$state = isset($data['state']) ? $data['state'] : '-1';
$keyword = trim(isset($data['keyword']) ? $data['keyword'] : '');
$source = trim(isset($data['source']) ? $data['source'] : '');
$query = Db::name('expert')->alias('e');
if ($field !== '' || $majorId > 0) {
$query->join('t_expert_field ef', 'ef.expert_id = e.expert_id AND ef.state = 0', 'inner');
if ($field !== '') {
$query->where('ef.field', 'like', '%' . $field . '%');
}
if ($majorId > 0) {
$query->where('ef.major_id', $majorId);
}
$query->group('e.expert_id');
}
if ($state !== '-1' && $state !== '') {
$query->where('e.state', intval($state));
}
if ($keyword !== '') {
$query->where('e.name|e.email|e.affiliation', 'like', '%' . $keyword . '%');
}
if ($source !== '') {
$query->where('e.source', $source);
}
$list = $query->field('e.*')->order('e.ctime desc')->select();
if (empty($list)) {
return jsonError('没有符合条件的数据可导出');
}
$expertIds = array_column($list, 'expert_id');
$allFields = Db::name('expert_field')
->where('expert_id', 'in', $expertIds)
->where('state', 0)
->select();
$fieldMap = [];
foreach ($allFields as $f) {
$fieldMap[$f['expert_id']][] = $f['field'];
}
vendor("PHPExcel.PHPExcel");
$objPHPExcel = new \PHPExcel();
$sheet = $objPHPExcel->getActiveSheet();
$sheet->setTitle('Expert List');
$headers = [
'A' => '#',
'B' => 'Name',
'C' => 'Email',
'D' => 'Affiliation',
'E' => 'Source',
'F' => 'Fields',
'G' => 'State',
'H' => 'Add Time',
'I' => 'Last Promotion',
];
foreach ($headers as $col => $header) {
$sheet->setCellValue($col . '1', $header);
}
$headerStyle = [
'font' => ['bold' => true, 'color' => ['rgb' => 'FFFFFF']],
'fill' => ['type' => \PHPExcel_Style_Fill::FILL_SOLID, 'startcolor' => ['rgb' => '4472C4']],
'alignment' => ['horizontal' => \PHPExcel_Style_Alignment::HORIZONTAL_CENTER],
];
$sheet->getStyle('A1:I1')->applyFromArray($headerStyle);
foreach ($list as $i => $item) {
$row = $i + 2;
$fields = isset($fieldMap[$item['expert_id']]) ? implode(', ', $fieldMap[$item['expert_id']]) : '';
$stateText = isset($this->stateMap[$item['state']]) ? $this->stateMap[$item['state']] : '未知';
$sheet->setCellValue('A' . $row, $i + 1);
$sheet->setCellValue('B' . $row, $item['name']);
$sheet->setCellValueExplicit('C' . $row, $item['email'], \PHPExcel_Cell_DataType::TYPE_STRING);
$sheet->setCellValue('D' . $row, $item['affiliation']);
$sheet->setCellValue('E' . $row, $item['source']);
$sheet->setCellValue('F' . $row, $fields);
$sheet->setCellValue('G' . $row, $stateText);
$sheet->setCellValue('H' . $row, $item['ctime'] ? date('Y-m-d H:i:s', $item['ctime']) : '');
$sheet->setCellValue('I' . $row, $item['ltime'] ? date('Y-m-d H:i:s', $item['ltime']) : '');
}
$sheet->getColumnDimension('A')->setWidth(6);
$sheet->getColumnDimension('B')->setWidth(25);
$sheet->getColumnDimension('C')->setWidth(35);
$sheet->getColumnDimension('D')->setWidth(40);
$sheet->getColumnDimension('E')->setWidth(15);
$sheet->getColumnDimension('F')->setWidth(50);
$sheet->getColumnDimension('G')->setWidth(15);
$sheet->getColumnDimension('H')->setWidth(20);
$sheet->getColumnDimension('I')->setWidth(20);
$label = $field !== '' ? preg_replace('/[^a-zA-Z0-9_\x{4e00}-\x{9fa5}]/u', '_', $field) : 'all';
$filename = 'expert_' . $label . '_' . date('Ymd_His') . '.xlsx';
$dir = ROOT_PATH . 'public' . DS . 'exports';
if (!is_dir($dir)) {
mkdir($dir, 0777, true);
}
$filepath = $dir . DS . $filename;
$writer = \PHPExcel_IOFactory::createWriter($objPHPExcel, 'Excel2007');
$writer->save($filepath);
return jsonSuccess([
'file_url' => '/exports/' . $filename,
'file_name' => $filename,
'count' => count($list),
]);
}
/**
* 批量保存专家领域
* @param int $expertId
* @param array $fields [{"major_id":1,"field":"xxx"}, ...]
*/
private function saveExpertFields($expertId, $fields)
{
if (is_string($fields)) {
$fields = json_decode($fields, true);
}
if (!is_array($fields)) {
return;
}
foreach ($fields as $f) {
$majorId = intval(isset($f['major_id']) ? $f['major_id'] : 0);
$fieldName = trim(isset($f['field']) ? $f['field'] : '');
if ($fieldName === '') continue;
$exists = Db::name('expert_field')
->where('expert_id', $expertId)
->where('field', $fieldName)
->where('state', 0)
->find();
if ($exists) continue;
Db::name('expert_field')->insert([
'expert_id' => $expertId,
'major_id' => $majorId,
'field' => $fieldName,
'state' => 0,
]);
}
}
}

View File

@@ -334,6 +334,14 @@ class Journal extends Base {
$aJournalUpdate['wechat_app_secret'] = $update['wechat_app_secret'];
}
if(isset($data['editor_name'])&&$data['editor_name']!=''){
$update['editor_name'] = $data['editor_name'];
}
if(isset($data['databases'])&&$data['databases']!=''){
$update['databases'] = $data['databases'];
}
if(!empty($aJournalUpdate)){
$aJournalUpdate['issn'] = $journal_info['issn'];
$sUrl = $this->sJournalUrl."wechat/Article/updateJournal";

View File

@@ -0,0 +1,261 @@
<?php
namespace app\api\controller;
use think\Db;
use think\Validate;
class MailTemplate extends Base
{
/**
* Create or update a mail template (V2)
* POST fields:
* - template_id (optional)
* - journal_id, scene, language, title, subject, body_html, body_text, variables_json, version, is_active
*/
public function saveTemplate()
{
$data = $this->request->post();
$rule = new Validate([
'journal_id' => 'require|number',
'scene' => 'require',
'language' => 'require',
'title' => 'require',
'subject' => 'require',
]);
if (!$rule->check($data)) {
return jsonError($rule->getError());
}
$payload = [
'journal_id' => intval($data['journal_id']),
'scene' => trim($data['scene']),
'language' => trim($data['language']),
'title' => trim($data['title']),
'subject' => trim($data['subject']),
'body_html' => $data['body_html'] ?? '',
'body_text' => $data['body_text'] ?? '',
'variables_json' => $data['variables_json'] ?? '',
'version' => intval($data['version'] ?? 1),
'is_active' => intval($data['is_active'] ?? 1),
'utime' => time(),
];
$templateId = intval($data['template_id'] ?? 0);
if ($templateId) {
$exists = Db::name('mail_template')->where('template_id', $templateId)->where('state', 0)->find();
if (!$exists) {
return jsonError('Template not found');
}
Db::name('mail_template')->where('template_id', $templateId)->update($payload);
return jsonSuccess(['template_id' => $templateId]);
}
$payload['ctime'] = time();
$payload['state'] = 0;
$newId = Db::name('mail_template')->insertGetId($payload);
return jsonSuccess(['template_id' => $newId]);
}
/**
* Soft delete a template
* Params: template_id
*/
public function deleteTemplate()
{
$templateId = intval($this->request->param('template_id', 0));
if (!$templateId) {
return jsonError('template_id is required');
}
Db::name('mail_template')->where('template_id', $templateId)->update(['state' => 1, 'utime' => time()]);
return jsonSuccess([]);
}
/**
* Get template detail
*/
public function getTemplate()
{
$templateId = intval($this->request->param('template_id', 0));
if (!$templateId) {
return jsonError('template_id is required');
}
$tpl = Db::name('mail_template')->where('template_id', $templateId)->where('state', 0)->find();
if (!$tpl) {
return jsonError('Template not found');
}
return jsonSuccess(['template' => $tpl]);
}
/**
* List templates
* Params: journal_id, scene(optional), language(optional), is_active(optional)
*/
public function listTemplates()
{
$journalId = intval($this->request->param('journal_id', 0));
if (!$journalId) {
return jsonError('journal_id is required');
}
$scene = trim($this->request->param('scene', ''));
$language = trim($this->request->param('language', ''));
$isActive = $this->request->param('is_active', '');
$where = ['journal_id' => $journalId, 'state' => 0];
if ($scene !== '') $where['scene'] = $scene;
if ($language !== '') $where['language'] = $language;
if ($isActive !== '') $where['is_active'] = intval($isActive);
$list = Db::name('mail_template')
->where($where)
->order('is_active desc, utime desc, template_id desc')
->select();
return jsonSuccess(['list' => $list]);
}
public function listTemplatesAll(){
$list = Db::name('mail_template')
->where('state', 0)
->order('is_active desc, utime desc, template_id desc')
->select();
return jsonSuccess(['list'=>$list]);
}
/**
* Create or update a global mail style
* 当前 style 表字段:
* - style_id, name, description, header_html, footer_html, state, ctime
*/
public function saveStyle()
{
$data = $this->request->post();
$rule = new Validate([
'name' => 'require',
]);
if (!$rule->check($data)) {
return jsonError($rule->getError());
}
$payload = [
'name' => trim($data['name']),
'description' => trim($data['description'] ?? ''),
'header_html' => $data['header_html'] ?? '',
'footer_html' => $data['footer_html'] ?? '',
'state' => 0,
];
$styleId = intval($data['style_id'] ?? 0);
if ($styleId) {
$exists = Db::name('mail_style')->where('style_id', $styleId)->where('state', 0)->find();
if (!$exists) {
return jsonError('Style not found');
}
Db::name('mail_style')->where('style_id', $styleId)->update($payload);
return jsonSuccess(['style_id' => $styleId]);
}
$payload['ctime'] = time();
$newId = Db::name('mail_style')->insertGetId($payload);
return jsonSuccess(['style_id' => $newId]);
}
/**
* Delete a global mail style (soft delete)
*/
public function deleteStyle()
{
$styleId = intval($this->request->param('style_id', 0));
if (!$styleId) {
return jsonError('style_id is required');
}
Db::name('mail_style')->where('style_id', $styleId)->update(['state' => 1]);
return jsonSuccess([]);
}
/**
* Get style detail
*/
public function getStyle()
{
$styleId = intval($this->request->param('style_id', 0));
if (!$styleId) {
return jsonError('style_id is required');
}
$style = Db::name('mail_style')->where('style_id', $styleId)->where('state', 0)->find();
if (!$style) {
return jsonError('Style not found');
}
return jsonSuccess(['style' => $style]);
}
/**
* List styles
* 现在样式不再按 scene / language 区分,只返回全部正常状态的样式
*/
public function listStyles()
{
$where = ['state' => 0];
$list = Db::name('mail_style')
->where($where)
->order('style_id desc')
->select();
return jsonSuccess(['list' => $list]);
}
/**
* Render preview for a template预览效果没啥用
* Params: template_id, vars (json string)
*/
public function preview()
{
$templateId = intval($this->request->param('template_id', 0));
$varsJson = $this->request->param('vars', '');
if (!$templateId) {
return jsonError('template_id is required');
}
$tpl = Db::name('mail_template')->where('template_id', $templateId)->where('state', 0)->find();
if (!$tpl) {
return jsonError('Template not found');
}
$vars = [];
if ($varsJson) {
$decoded = json_decode($varsJson, true);
if (is_array($decoded)) $vars = $decoded;
}
$subject = $this->render($tpl['subject'], $vars);
$body = $this->render($tpl['body_html'], $vars);
// For preview we do not enforce a style; caller can combine with style if needed
return jsonSuccess(['subject' => $subject, 'body' => $body]);
}
private function render($tpl, $vars)
{
if (!is_string($tpl) || empty($tpl)) return '';
if (!is_array($vars) || empty($vars)) return $tpl;
$replace = [];
foreach ($vars as $k => $v) {
$key = trim((string)$k);
if ($key === '') continue;
$replace['{{' . $key . '}}'] = (string)$v;
// backward compatible placeholders
$replace['{' . $key . '}'] = (string)$v;
}
return str_replace(array_keys($replace), array_values($replace), $tpl);
}
}

View File

@@ -3,24 +3,48 @@
namespace app\api\job;
use think\queue\Job;
use think\Log;
use app\common\ExpertFinderService;
/**
* 专家抓取队列任务
* 注意:此任务推送到队列名 "FetchExperts",必须单独启动 worker 才会执行:
* php think queue:listen --queue FetchExperts
* 若只运行 queue:listen 不指定队列,默认只消费 "mail",本任务不会被执行。
*/
class FetchExperts
{
public function fire(Job $job, $data)
{
try {
$finder = new \app\api\controller\ExpertFinder();
$result = $finder->doFetchForField(
$data['field'],
$data['source'] ?? 'pubmed',
$data['per_page'] ?? 100,
$data['min_year'] ?? null
$field = isset($data['field']) ? $data['field'] : '';
// $attempts = $job->attempts();
//
$service = new ExpertFinderService();
// $service->log('[FetchExperts] start field=' . $field . ' attempts=' . $attempts);
//
// try {
$result = $service->doFetchForField(
$field,
isset($data['source']) ? $data['source'] : 'pubmed',
isset($data['per_page']) ? intval($data['per_page']) : 100,
isset($data['min_year']) ? $data['min_year'] : null
);
Log::info('FetchExperts completed: ' . json_encode($result));
} catch (\Exception $e) {
Log::error('FetchExperts failed: ' . $e->getMessage());
}
// $service->log('[FetchExperts] completed field=' . $field . ' result=' . json_encode($result));
// } catch (\Throwable $e) {
// $service->log(
// '[FetchExperts] failed field=' . $field .
// ' msg=' . $e->getMessage() .
// ' file=' . $e->getFile() .
// ' line=' . $e->getLine()
// );
//
// if ($attempts >= 3) {
// $job->delete();
// return;
// }
//
// $job->release(60);
// return;
// }
$job->delete();
}

View File

@@ -0,0 +1,35 @@
<?php
namespace app\api\job;
use think\queue\Job;
use app\common\PromotionService;
class PromotionSend
{
public function fire(Job $job, $data)
{
$taskId = intval(isset($data['task_id']) ? $data['task_id'] : 0);
$service = new PromotionService();
if (!$taskId) {
// $service->log('[PromotionSend] missing task_id, job deleted');
$job->delete();
return;
}
// try {
$result = $service->processNextEmail($taskId);
// $service->log('[PromotionSend] task=' . $taskId . ' result=' . json_encode($result));
// if (!empty($result['done'])) {
// $reason = isset($result['reason']) ? $result['reason'] : '';
// $service->log('[PromotionSend] task=' . $taskId . ' finished, reason=' . $reason);
// }
// } catch (\Exception $e) {
// $service->log('[PromotionSend] task=' . $taskId . ' exception=' . $e->getMessage());
// }
$job->delete();
}
}

View File

@@ -0,0 +1,593 @@
<?php
namespace app\common;
use think\Db;
use GuzzleHttp\Client;
class ExpertFinderService
{
private $httpClient;
private $ncbiBaseUrl = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/';
private $logFile;
public function __construct()
{
$this->httpClient = new Client([
'timeout' => 180,
'connect_timeout' => 15,
'verify' => false,
]);
$this->logFile = ROOT_PATH . 'runtime' . DS . 'expert_finder.log';
}
public function doFetchForField($field, $source = 'pubmed', $perPage = 100, $minYear = null)
{
if ($minYear === null) {
$minYear = date('Y') - 3;
}
$fetchLog = $this->getFetchLog($field, $source);
$page = $fetchLog['last_page'] + 1;
if ($source === 'pmc') {
$result = $this->searchViaPMC($field, $perPage, $minYear, $page);
} else {
$result = $this->searchViaPubMed($field, $perPage, $minYear, $page);
}
$saveResult = $this->saveExperts($result['experts'], $field, $source);
$nextPage = $result['has_more'] ? $page : 0;
$totalPages = isset($result['total_pages']) ? $result['total_pages'] : 0;
$this->updateFetchLog($field, $source, $nextPage, $totalPages);
return [
'keyword' => $field,
'page' => $page,
'experts_found' => $result['total'],
'saved_new' => $saveResult['inserted'],
'saved_exist' => $saveResult['existing'],
'field_enriched' => $saveResult['field_enriched'],
'has_more' => $result['has_more'],
];
}
public function searchExperts($keyword, $perPage, $minYear, $page, $source)
{
if ($source === 'pmc') {
return $this->searchViaPMC($keyword, $perPage, $minYear, $page);
}
return $this->searchViaPubMed($keyword, $perPage, $minYear, $page);
}
public function saveExperts($experts, $field, $source)
{
$inserted = 0;
$existing = 0;
$fieldEnrich = 0;
foreach ($experts as $expert) {
$email = strtolower(trim($expert['email']));
if (empty($email)) {
continue;
}
$exists = Db::name('expert')->where('email', $email)->find();
if ($exists) {
$existing++;
$fieldEnrich += $this->enrichExpertField($exists['expert_id'], $field);
continue;
}
$insert = [
'name' => mb_substr($expert['name'], 0, 255),
'email' => mb_substr($email, 0, 128),
'affiliation' => mb_substr($expert['affiliation'], 0, 128),
'source' => mb_substr($source, 0, 128),
'ctime' => time(),
'ltime' => 0,
'state' => 0,
];
try {
$expertId = Db::name('expert')->insertGetId($insert);
$this->enrichExpertField($expertId, $field);
$inserted++;
} catch (\Exception $e) {
$existing++;
}
}
return ['inserted' => $inserted, 'existing' => $existing, 'field_enriched' => $fieldEnrich];
}
public function getFetchLog($field, $source)
{
$log = Db::name('expert_fetch')
->where('field', $field)
->where('source', $source)
->find();
if (!$log) {
return ['last_page' => 0, 'total_pages' => 0, 'last_time' => 0];
}
return $log;
}
public function updateFetchLog($field, $source, $lastPage, $totalPages)
{
$exists = Db::name('expert_fetch')
->where('field', $field)
->where('source', $source)
->find();
if ($exists) {
Db::name('expert_fetch')
->where('expert_fetch_id', $exists['expert_fetch_id'])
->update([
'last_page' => $lastPage,
'total_pages' => $totalPages,
'last_time' => time(),
]);
} else {
Db::name('expert_fetch')->insert([
'field' => mb_substr($field, 0, 128),
'source' => mb_substr($source, 0, 128),
'last_page' => $lastPage,
'total_pages' => $totalPages,
'last_time' => time(),
]);
}
}
// ==================== PubMed Search ====================
private function searchViaPubMed($keyword, $perPage, $minYear, $page = 1)
{
set_time_limit(600);
$searchResult = $this->esearch('pubmed', $keyword, $perPage, $minYear, $page);
$ids = $searchResult['ids'];
$totalArticles = $searchResult['total'];
if (empty($ids)) {
return $this->buildPagedResult([], 0, 0, $totalArticles, $page, $perPage, 'pubmed');
}
$allAuthors = [];
$batches = array_chunk($ids, 50);
foreach ($batches as $batch) {
$xml = $this->efetchWithRetry('pubmed', $batch);
if ($xml) {
$authors = $this->parsePubMedXml($xml);
$allAuthors = array_merge($allAuthors, $authors);
}
usleep(400000);
}
$experts = $this->aggregateExperts($allAuthors);
return $this->buildPagedResult($experts, count($experts), count($ids), $totalArticles, $page, $perPage, 'pubmed');
}
// ==================== PMC Search ====================
private function searchViaPMC($keyword, $perPage, $minYear, $page = 1)
{
set_time_limit(600);
$searchResult = $this->esearch('pmc', $keyword, $perPage, $minYear, $page);
$ids = $searchResult['ids'];
$totalArticles = $searchResult['total'];
if (empty($ids)) {
return $this->buildPagedResult([], 0, 0, $totalArticles, $page, $perPage, 'pmc');
}
$allAuthors = [];
$batches = array_chunk($ids, 5);
foreach ($batches as $batch) {
$xml = $this->efetchWithRetry('pmc', $batch);
if ($xml) {
$authors = $this->parsePMCXml($xml);
$allAuthors = array_merge($allAuthors, $authors);
}
usleep(500000);
}
$experts = $this->aggregateExperts($allAuthors);
return $this->buildPagedResult($experts, count($experts), count($ids), $totalArticles, $page, $perPage, 'pmc');
}
// ==================== NCBI API ====================
private function esearch($db, $keyword, $perPage, $minYear, $page = 1)
{
$term = $keyword . ' AND ' . $minYear . ':' . date('Y') . '[pdat]';
$retstart = ($page - 1) * $perPage;
$response = $this->httpClient->get($this->ncbiBaseUrl . 'esearch.fcgi', [
'query' => [
'db' => $db,
'term' => $term,
'retstart' => $retstart,
'retmax' => $perPage,
'retmode' => 'json',
'sort' => 'relevance',
],
]);
$data = json_decode($response->getBody()->getContents(), true);
$ids = $data['esearchresult']['idlist'] ?? [];
$total = intval($data['esearchresult']['count'] ?? 0);
return ['ids' => $ids, 'total' => $total];
}
private function efetch($db, $ids)
{
$response = $this->httpClient->post($this->ncbiBaseUrl . 'efetch.fcgi', [
'form_params' => [
'db' => $db,
'id' => implode(',', $ids),
'retmode' => 'xml',
],
]);
return $response->getBody()->getContents();
}
private function efetchWithRetry($db, $ids, $maxRetries = 3)
{
for ($attempt = 1; $attempt <= $maxRetries; $attempt++) {
try {
return $this->efetch($db, $ids);
} catch (\Exception $e) {
if ($attempt === $maxRetries) {
if (count($ids) > 1) {
$half = ceil(count($ids) / 2);
$firstHalf = array_slice($ids, 0, $half);
$secondHalf = array_slice($ids, $half);
$xml1 = $this->efetchWithRetry($db, $firstHalf, 2);
$xml2 = $this->efetchWithRetry($db, $secondHalf, 2);
return $this->mergeXml($xml1, $xml2);
}
return null;
}
sleep($attempt * 2);
}
}
return null;
}
private function mergeXml($xml1, $xml2)
{
if (empty($xml1)) return $xml2;
if (empty($xml2)) return $xml1;
return $xml1 . "\n" . $xml2;
}
// ==================== PubMed XML Parsing ====================
private function parsePubMedXml($xmlString)
{
$results = [];
libxml_use_internal_errors(true);
$xml = simplexml_load_string($xmlString);
if ($xml === false) {
return $results;
}
foreach ($xml->PubmedArticle as $article) {
$citation = $article->MedlineCitation;
$articleData = $citation->Article;
$title = $this->xmlNodeToString($articleData->ArticleTitle);
$pmid = (string) $citation->PMID;
$journal = '';
if (isset($articleData->Journal->Title)) {
$journal = (string) $articleData->Journal->Title;
}
if (!isset($articleData->AuthorList->Author)) {
continue;
}
foreach ($articleData->AuthorList->Author as $author) {
$lastName = (string) ($author->LastName ?? '');
$foreName = (string) ($author->ForeName ?? '');
$fullName = trim($foreName . ' ' . $lastName);
if (empty($fullName)) continue;
$email = '';
$affiliation = '';
if (isset($author->AffiliationInfo)) {
foreach ($author->AffiliationInfo as $affInfo) {
$affText = (string) $affInfo->Affiliation;
if (empty($affiliation)) $affiliation = $affText;
if (empty($email)) $email = $this->extractEmailFromText($affText);
}
}
if (empty($email)) continue;
$results[] = [
'name' => $fullName,
'email' => strtolower($email),
'affiliation' => $this->cleanAffiliation($affiliation),
'article_title' => $title,
'article_id' => $pmid,
'journal' => $journal,
];
}
}
return $results;
}
// ==================== PMC XML Parsing ====================
private function parsePMCXml($xmlString)
{
$results = [];
libxml_use_internal_errors(true);
$xml = simplexml_load_string($xmlString);
if ($xml === false) {
return $results;
}
$articles = $xml->article ?? $xml->children();
foreach ($articles as $article) {
if ($article->getName() !== 'article') continue;
$front = $article->front;
if (!$front) continue;
$articleMeta = $front->{'article-meta'};
if (!$articleMeta) continue;
$title = $this->xmlNodeToString($articleMeta->{'title-group'}->{'article-title'} ?? null);
$pmcId = '';
if (isset($articleMeta->{'article-id'})) {
foreach ($articleMeta->{'article-id'} as $idNode) {
if ((string) $idNode['pub-id-type'] === 'pmc') {
$pmcId = (string) $idNode;
}
}
}
$journal = '';
if (isset($front->{'journal-meta'}->{'journal-title'})) {
$journal = (string) $front->{'journal-meta'}->{'journal-title'};
} elseif (isset($front->{'journal-meta'}->{'journal-title-group'}->{'journal-title'})) {
$journal = (string) $front->{'journal-meta'}->{'journal-title-group'}->{'journal-title'};
}
$correspEmails = [];
if (isset($articleMeta->{'author-notes'})) {
$this->extractEmailsFromNode($articleMeta->{'author-notes'}, $correspEmails);
}
$affiliationMap = [];
if (isset($articleMeta->{'contrib-group'})) {
foreach ($articleMeta->{'contrib-group'}->children() as $child) {
if ($child->getName() === 'aff') {
$affId = (string) ($child['id'] ?? '');
$affText = $this->xmlNodeToString($child);
if ($affId) $affiliationMap[$affId] = $affText;
}
}
}
if (isset($front->{'article-meta'}->{'aff'})) {
foreach ($front->{'article-meta'}->{'aff'} as $aff) {
$affId = (string) ($aff['id'] ?? '');
$affText = $this->xmlNodeToString($aff);
if ($affId) $affiliationMap[$affId] = $affText;
}
}
if (!isset($articleMeta->{'contrib-group'})) continue;
foreach ($articleMeta->{'contrib-group'}->contrib as $contrib) {
if ((string) ($contrib['contrib-type'] ?? '') !== 'author') continue;
$nameNode = $contrib->name;
if (!$nameNode) continue;
$surname = (string) ($nameNode->surname ?? '');
$givenNames = (string) ($nameNode->{'given-names'} ?? '');
$fullName = trim($givenNames . ' ' . $surname);
if (empty($fullName)) continue;
$email = '';
if (isset($contrib->email)) {
$email = strtolower(trim((string) $contrib->email));
}
$affiliation = '';
if (isset($contrib->xref)) {
foreach ($contrib->xref as $xref) {
if ((string) $xref['ref-type'] === 'aff') {
$rid = (string) $xref['rid'];
if (isset($affiliationMap[$rid])) {
$affiliation = $affiliationMap[$rid];
break;
}
}
}
}
if (empty($affiliation) && isset($contrib->aff)) {
$affiliation = $this->xmlNodeToString($contrib->aff);
}
$isCorresponding = false;
if (isset($contrib->xref)) {
foreach ($contrib->xref as $xref) {
if ((string) $xref['ref-type'] === 'corresp') $isCorresponding = true;
}
}
if ((string) ($contrib['corresp'] ?? '') === 'yes') $isCorresponding = true;
if (empty($email) && $isCorresponding && !empty($correspEmails)) {
$email = $correspEmails[0];
}
if (empty($email)) {
$extracted = $this->extractEmailFromText($affiliation);
if ($extracted) $email = $extracted;
}
if (empty($email)) continue;
$results[] = [
'name' => $fullName,
'email' => strtolower($email),
'affiliation' => $this->cleanAffiliation($affiliation),
'article_title' => $title,
'article_id' => $pmcId,
'journal' => $journal,
];
}
}
return $results;
}
// ==================== Aggregation / Pagination ====================
private function aggregateExperts($authorRecords)
{
$map = [];
foreach ($authorRecords as $record) {
$key = strtolower(trim($record['email']));
if (empty($key)) continue;
if (!isset($map[$key])) {
$map[$key] = [
'name' => $record['name'],
'email' => $record['email'],
'affiliation' => $record['affiliation'],
'paper_count' => 0,
'papers' => [],
];
}
$map[$key]['paper_count']++;
if (count($map[$key]['papers']) < 10) {
$map[$key]['papers'][] = [
'title' => $record['article_title'],
'article_id' => $record['article_id'],
'journal' => $record['journal'],
];
}
if (empty($map[$key]['affiliation']) && !empty($record['affiliation'])) {
$map[$key]['affiliation'] = $record['affiliation'];
}
}
$experts = array_values($map);
usort($experts, function ($a, $b) {
return $b['paper_count'] - $a['paper_count'];
});
return $experts;
}
private function buildPagedResult($experts, $expertCount, $articlesScanned, $totalArticles, $page, $perPage, $source)
{
$totalPages = $totalArticles > 0 ? ceil($totalArticles / $perPage) : 0;
return [
'experts' => $experts,
'total' => $expertCount,
'articles_scanned' => $articlesScanned,
'total_articles' => $totalArticles,
'page' => $page,
'per_page' => $perPage,
'total_pages' => $totalPages,
'has_more' => $page < $totalPages,
'source' => $source,
];
}
// ==================== DB Helpers ====================
private function enrichExpertField($expertId, $field)
{
$field = trim($field);
if (empty($field)) return 0;
$exists = Db::name('expert_field')
->where('expert_id', $expertId)
->where('field', $field)
->where('state', 0)
->find();
if ($exists) return 0;
$major = Db::name("major")->where("major_title",$field)->where("state",0)->find();
$major_id = $major ? $major['major_id'] : 0;
Db::name('expert_field')->insert([
'expert_id' => $expertId,
'major_id' => $major_id,
'field' => mb_substr($field, 0, 128),
'state' => 0,
]);
return 1;
}
// ==================== Text Helpers ====================
private function extractEmailFromText($text)
{
if (empty($text)) return '';
if (preg_match('/[Ee]lectronic address:\s*([^\s;,]+@[^\s;,]+)/', $text, $m)) {
return strtolower(trim($m[1], '.'));
}
if (preg_match('/[Ee]-?mail:\s*([^\s;,]+@[^\s;,]+)/', $text, $m)) {
return strtolower(trim($m[1], '.'));
}
if (preg_match('/\b([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})\b/', $text, $m)) {
return strtolower(trim($m[1], '.'));
}
return '';
}
private function extractEmailsFromNode($node, &$emails)
{
if ($node === null) return;
foreach ($node->children() as $child) {
if ($child->getName() === 'email') {
$email = strtolower(trim((string) $child));
if (!empty($email) && !in_array($email, $emails)) $emails[] = $email;
}
$this->extractEmailsFromNode($child, $emails);
}
$text = (string) $node;
if (preg_match_all('/\b([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})\b/', $text, $matches)) {
foreach ($matches[1] as $email) {
$email = strtolower(trim($email, '.'));
if (!in_array($email, $emails)) $emails[] = $email;
}
}
}
private function cleanAffiliation($text)
{
$text = preg_replace('/\s*[Ee]lectronic address:\s*[^\s;,]+@[^\s;,]+/', '', $text);
$text = preg_replace('/\s*[Ee]-?mail:\s*[^\s;,]+@[^\s;,]+/', '', $text);
$text = preg_replace('/\s*\b[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}\b/', '', $text);
return trim($text, " \t\n\r\0\x0B.,;");
}
private function xmlNodeToString($node)
{
if ($node === null) return '';
$xml = $node->asXML();
$text = strip_tags($xml);
$text = html_entity_decode($text, ENT_QUOTES | ENT_XML1, 'UTF-8');
return trim(preg_replace('/\s+/', ' ', $text));
}
// ==================== Logging ====================
public function log($msg)
{
$line = date('Y-m-d H:i:s') . ' ' . $msg . PHP_EOL;
@file_put_contents($this->logFile, $line, FILE_APPEND);
}
}

View File

@@ -0,0 +1,506 @@
<?php
namespace app\common;
use think\Db;
use think\Cache;
use think\Queue;
use PHPMailer\PHPMailer\PHPMailer;
class PromotionService
{
private $logFile;
public function __construct()
{
$this->logFile = ROOT_PATH . 'runtime' . DS . 'promotion_task.log';
}
/**
* Process the next email in a promotion task (called by queue job)
*/
public function processNextEmail($taskId)
{
$task = Db::name('promotion_task')->where('task_id', $taskId)->find();
if (!$task) {
return ['done' => true, 'reason' => 'task_not_found'];
}
if ($task['state'] != 1) {
return ['done' => true, 'reason' => 'task_not_running', 'state' => $task['state']];
}
$currentHour = intval(date('G'));
if ($currentHour < $task['send_start_hour'] || $currentHour >= $task['send_end_hour']) {
$this->enqueueNextEmail($taskId, 300);
return ['done' => false, 'reason' => 'outside_send_window', 'retry_in' => 300];
}
if ($task['sent_count'] > 0 && $task['max_bounce_rate'] > 0) {
$bounceRate = ($task['bounce_count'] / $task['sent_count']) * 100;
if ($bounceRate >= $task['max_bounce_rate']) {
Db::name('promotion_task')->where('task_id', $taskId)->update([
'state' => 2,
'utime' => time(),
]);
$this->log("Task {$taskId} auto-paused: bounce rate {$bounceRate}% >= {$task['max_bounce_rate']}%");
return ['done' => true, 'reason' => 'auto_paused_bounce_rate', 'bounce_rate' => $bounceRate];
}
}
$logEntry = Db::name('promotion_email_log')
->where('task_id', $taskId)
->where('state', 0)
->order('log_id asc')
->find();
if (!$logEntry) {
Db::name('promotion_task')->where('task_id', $taskId)->update([
'state' => 3,
'utime' => time(),
]);
return ['done' => true, 'reason' => 'all_emails_processed'];
}
$expert = Db::name('expert')->where('expert_id', $logEntry['expert_id'])->find();
if (!$expert || $expert['state'] == 4 || $expert['state'] == 5) {
Db::name('promotion_email_log')->where('log_id', $logEntry['log_id'])->update([
'state' => 2,
'error_msg' => 'Expert invalid or deleted (state=' . (isset($expert['state']) ? $expert['state'] : 'null') . ')',
'send_time' => time(),
]);
Db::name('promotion_task')->where('task_id', $taskId)->setInc('fail_count');
Db::name('promotion_task')->where('task_id', $taskId)->update(['utime' => time()]);
$this->enqueueNextEmail($taskId, 2);
return ['done' => false, 'skipped' => $logEntry['email_to'], 'reason' => 'expert_invalid'];
}
$account = $this->pickSmtpAccountForTask($task);
if (!$account) {
$this->enqueueNextEmail($taskId, 600);
return ['done' => false, 'reason' => 'no_smtp_quota', 'retry_in' => 600];
}
// 优先使用预生成内容;无则现场渲染
$subject = '';
$body = '';
$hasPrepared = !empty($logEntry['subject_prepared']) && !empty($logEntry['body_prepared']);
if ($hasPrepared) {
$subject = $logEntry['subject_prepared'];
$body = $logEntry['body_prepared'];
} else {
$journal = Db::name('journal')->where('journal_id', $task['journal_id'])->find();
$expertVars = $this->buildExpertVars($expert);
$journalVars = $this->buildJournalVars($journal);
$vars = array_merge($journalVars, $expertVars);
$rendered = $this->renderFromTemplate(
$task['template_id'],
$task['journal_id'],
json_encode($vars, JSON_UNESCAPED_UNICODE),
$task['style_id']
);
if ($rendered['code'] !== 0) {
Db::name('promotion_email_log')->where('log_id', $logEntry['log_id'])->update([
'state' => 2,
'error_msg' => 'Template render failed: ' . $rendered['msg'],
'send_time' => time(),
]);
Db::name('promotion_task')->where('task_id', $taskId)->setInc('fail_count');
Db::name('promotion_task')->where('task_id', $taskId)->update(['utime' => time()]);
$this->enqueueNextEmail($taskId, 2);
return ['done' => false, 'failed' => $logEntry['email_to'], 'reason' => 'template_error'];
}
$subject = $rendered['data']['subject'];
$body = $rendered['data']['body'];
}
$result = $this->doSendEmail($account, $logEntry['email_to'], $subject, $body);
$now = time();
if ($result['status'] === 1) {
Db::name('promotion_email_log')->where('log_id', $logEntry['log_id'])->update([
'j_email_id' => $account['j_email_id'],
'subject' => mb_substr($subject, 0, 512),
'state' => 1,
'send_time' => $now,
]);
Db::name('journal_email')->where('j_email_id', $account['j_email_id'])->setInc('today_sent');
Db::name('expert')->where('expert_id', $expert['expert_id'])->update(['state' => 1, 'ltime' => $now]);
Db::name('promotion_task')->where('task_id', $taskId)->setInc('sent_count');
} else {
Db::name('promotion_email_log')->where('log_id', $logEntry['log_id'])->update([
'j_email_id' => $account['j_email_id'],
'subject' => mb_substr($subject, 0, 512),
'state' => 2,
'error_msg' => mb_substr($result['data'], 0, 512),
'send_time' => $now,
]);
Db::name('promotion_task')->where('task_id', $taskId)->setInc('fail_count');
}
Db::name('promotion_task')->where('task_id', $taskId)->update(['utime' => $now]);
$delay = rand(max(5, $task['min_interval']), max($task['min_interval'], $task['max_interval']));
$this->enqueueNextEmail($taskId, $delay);
return [
'done' => false,
'sent' => $result['status'] === 1,
'email' => $logEntry['email_to'],
'next_in' => $delay,
];
}
// ==================== 准备与触发(今日准备、明日发送) ====================
/**
* 为指定任务预生成所有待发邮件的 subject/body写入 log完成后将任务置为 state=5已准备
* @param int $taskId
* @return array ['prepared' => int, 'failed' => int, 'error' => string|null]
*/
public function prepareTask($taskId)
{
$task = Db::name('promotion_task')->where('task_id', $taskId)->find();
if (!$task) {
return ['prepared' => 0, 'failed' => 0, 'error' => 'task_not_found'];
}
if ($task['state'] != 0) {
return ['prepared' => 0, 'failed' => 0, 'error' => 'task_state_not_draft'];
}
$journal = Db::name('journal')->where('journal_id', $task['journal_id'])->find();
$logs = Db::name('promotion_email_log')
->where('task_id', $taskId)
->where('state', 0)
->where('prepared_at', 0)
->order('log_id asc')
->select();
$prepared = 0;
$failed = 0;
$now = time();
$journalVars = $this->buildJournalVars($journal);
foreach ($logs as $log) {
$expert = Db::name('expert')->where('expert_id', $log['expert_id'])->find();
if (!$expert) {
Db::name('promotion_email_log')->where('log_id', $log['log_id'])->update([
'state' => 2,
'error_msg' => 'Expert not found',
'send_time' => $now,
]);
$failed++;
continue;
}
$expertVars = $this->buildExpertVars($expert);
$vars = array_merge($journalVars, $expertVars);
$rendered = $this->renderFromTemplate(
$task['template_id'],
$task['journal_id'],
json_encode($vars, JSON_UNESCAPED_UNICODE),
$task['style_id']
);
if ($rendered['code'] !== 0) {
Db::name('promotion_email_log')->where('log_id', $log['log_id'])->update([
'state' => 2,
'error_msg' => 'Prepare failed: ' . $rendered['msg'],
'send_time' => $now,
]);
$failed++;
continue;
}
Db::name('promotion_email_log')->where('log_id', $log['log_id'])->update([
'subject_prepared' => mb_substr($rendered['data']['subject'], 0, 512),
'body_prepared' => $rendered['data']['body'],
'prepared_at' => $now,
]);
$prepared++;
}
Db::name('promotion_task')->where('task_id', $taskId)->update([
'state' => 5,
'utime' => $now,
]);
$this->log("prepareTask task_id={$taskId} prepared={$prepared} failed={$failed}");
return ['prepared' => $prepared, 'failed' => $failed, 'error' => null];
}
/**
* 为指定日期的任务批量预生成邮件(供定时任务调用,如每天 22:00 准备明天的)
* @param string $date Y-m-d如 2026-03-12
* @return array ['tasks' => int, 'prepared' => int, 'failed' => int, 'details' => []]
*/
public function prepareTasksForDate($date)
{
$tasks = Db::name('promotion_task')
->where('send_date', $date)
->where('state', 0)
->select();
$totalPrepared = 0;
$totalFailed = 0;
$details = [];
foreach ($tasks as $task) {
$ret = $this->prepareTask($task['task_id']);
$totalPrepared += $ret['prepared'];
$totalFailed += $ret['failed'];
$details[] = [
'task_id' => $task['task_id'],
'task_name' => $task['task_name'],
'prepared' => $ret['prepared'],
'failed' => $ret['failed'],
'error' => $ret['error'],
];
}
$this->log("prepareTasksForDate date={$date} tasks=" . count($tasks) . " prepared={$totalPrepared} failed={$totalFailed}");
return [
'tasks' => count($tasks),
'prepared' => $totalPrepared,
'failed' => $totalFailed,
'details' => $details,
];
}
/**
* 触发指定日期的已准备任务开始发送(供定时任务调用,如每天 8:00 触发今天的)
* 会先对 send_date=date 且 state=0 的任务做一次补准备,再启动所有 state=5 的任务
* @param string $date Y-m-d
* @return array ['prepared' => int, 'started' => int, 'task_ids' => []]
*/
public function startTasksForDate($date)
{
// 补准备:当天日期但尚未准备的任务(如 22:00 后创建)
$catchUpTasks = Db::name('promotion_task')
->where('send_date', $date)
->where('state', 0)
->select();
foreach ($catchUpTasks as $t) {
$this->prepareTask($t['task_id']);
}
$tasks = Db::name('promotion_task')
->where('send_date', $date)
->where('state', 5)
->select();
$started = 0;
$taskIds = [];
foreach ($tasks as $task) {
Db::name('promotion_task')->where('task_id', $task['task_id'])->update([
'state' => 1,
'utime' => time(),
]);
$this->enqueueNextEmail($task['task_id'], 0);
$started++;
$taskIds[] = $task['task_id'];
}
$this->log("startTasksForDate date={$date} started={$started} task_ids=" . implode(',', $taskIds));
return [
'prepared' => count($catchUpTasks),
'started' => $started,
'task_ids' => $taskIds,
];
}
// ==================== Queue ====================
public function enqueueNextEmail($taskId, $delay = 0)
{
$jobClass = 'app\api\job\PromotionSend@fire';
$data = ['task_id' => $taskId];
if ($delay > 0) {
Queue::later($delay, $jobClass, $data, 'promotion');
} else {
Queue::push($jobClass, $data, 'promotion');
}
}
// ==================== SMTP ====================
public function pickSmtpAccountForTask($task)
{
$journalId = $task['journal_id'];
$smtpIds = $task['smtp_ids'] ? array_map('intval', explode(',', $task['smtp_ids'])) : [];
$query = Db::name('journal_email')
->where('journal_id', $journalId)
->where('state', 0);
if (!empty($smtpIds)) {
$query->where('j_email_id', 'in', $smtpIds);
}
$accounts = $query->select();
if (empty($accounts)) {
return null;
}
$best = null;
$bestRemaining = -1;
foreach ($accounts as $acc) {
$this->resetDailyCountIfNeeded($acc);
$remaining = $acc['daily_limit'] - $acc['today_sent'];
if ($remaining > 0 && $remaining > $bestRemaining) {
$best = $acc;
$bestRemaining = $remaining;
}
}
return $best;
}
public function resetDailyCountIfNeeded(&$account)
{
$todayDate = date('Y-m-d');
$cacheKey = 'smtp_reset_' . $account['j_email_id'];
$lastReset = Cache::get($cacheKey);
if ($lastReset !== $todayDate) {
Db::name('journal_email')
->where('j_email_id', $account['j_email_id'])
->update(['today_sent' => 0]);
$account['today_sent'] = 0;
Cache::set($cacheKey, $todayDate, 86400);
}
}
public function doSendEmail($account, $toEmail, $subject, $htmlContent)
{
try {
$mail = new PHPMailer(true);
$mail->isSMTP();
$mail->SMTPDebug = 0;
$mail->CharSet = 'UTF-8';
$mail->Host = $account['smtp_host'];
$mail->Port = intval($account['smtp_port']);
$mail->SMTPAuth = true;
$mail->Username = $account['smtp_user'];
$mail->Password = $account['smtp_password'];
if ($account['smtp_encryption'] === 'ssl') {
$mail->SMTPSecure = 'ssl';
} elseif ($account['smtp_encryption'] === 'tls') {
$mail->SMTPSecure = 'tls';
} else {
$mail->SMTPSecure = false;
$mail->SMTPAutoTLS = false;
}
$fromName = !empty($account['smtp_from_name']) ? $account['smtp_from_name'] : $account['smtp_user'];
$mail->setFrom($account['smtp_user'], $fromName);
$mail->addReplyTo($account['smtp_user'], $fromName);
$mail->addAddress($toEmail);
$mail->isHTML(true);
$mail->Subject = $subject;
$mail->Body = $htmlContent;
$mail->AltBody = strip_tags($htmlContent);
$mail->send();
return ['status' => 1, 'data' => 'success'];
} catch (\Exception $e) {
return ['status' => 0, 'data' => $e->getMessage()];
}
}
// ==================== Template Rendering ====================
public function renderFromTemplate($templateId, $journalId, $varsJson, $styleId = 0)
{
$tpl = Db::name('mail_template')->where('template_id', $templateId)->where('journal_id', $journalId)->where('state', 0)->find();
if (!$tpl) {
return ['code' => 1, 'msg' => 'Template not found'];
}
$vars = [];
if ($varsJson) {
$decoded = json_decode($varsJson, true);
if (is_array($decoded)) $vars = $decoded;
}
$subject = $this->renderVars($tpl['subject'], $vars);
$body = $this->renderVars($tpl['body_html'], $vars);
$finalBody = $body;
if ($styleId) {
$style = Db::name('mail_style')->where('style_id', $styleId)->where('state', 0)->find();
if ($style) {
$header = $style['header_html'] ? $this->renderVars($style['header_html'],$vars):'';
$footer = $style['footer_html'] ? $this->renderVars($style['footer_html'],$vars): '';
$finalBody = $header . $body . $footer;
}
}
return ['code' => 0, 'msg' => 'success', 'data' => ['subject' => $subject, 'body' => $finalBody]];
}
public function buildExpertVars($expert)
{
return [
'expert_title' => "Ph.D",
'expert_name' => $expert['name'] ?? '',
'expert_email' => $expert['email'] ?? '',
'expert_affiliation' => $expert['affiliation'] ?? '',
'expert_field' => $expert['field'] ?? '',
];
}
public function buildJournalVars($journal)
{
if (!$journal) return [];
$zb = Db::name("board_to_journal")
->where("journal_id",$journal['journal_id'])
->where("state",0)
->where('type',0)
->find();
return [
'journal_name' => $journal['title'] ?? '',
'journal_abbr' => $journal['jabbr'] ?? '',
'journal_url' => $journal['website'] ?? '',
'journal_email' => $journal['email'] ?? '',
'indexing_databases' => $journal['databases'] ?? '',
'submission_url' => "https://submission.tmrjournals.com/",
'eic_name' => $zb['realname'] ?? '',
'editor_name' => $journal['editor_name'],
'special_support_deadline'=>date("Y-m-d",strtotime("+30 days"))
];
}
public function renderVars($tpl, $vars)
{
if (!is_string($tpl) || $tpl === '') return '';
if (!is_array($vars) || empty($vars)) return $tpl;
$replace = [];
foreach ($vars as $k => $v) {
$key = trim((string)$k);
if ($key === '') continue;
$replace['{{' . $key . '}}'] = (string)$v;
$replace['{' . $key . '}'] = (string)$v;
}
return str_replace(array_keys($replace), array_values($replace), $tpl);
}
// ==================== Logging ====================
public function log($msg)
{
$line = date('Y-m-d H:i:s') . ' ' . $msg . PHP_EOL;
@file_put_contents($this->logFile, $line, FILE_APPEND);
}
}

View File

@@ -172,8 +172,11 @@ return [
// 日志保存目录
'path' => LOG_PATH,
// 日志记录级别
'level' => ['log'],
"max_files"=>3,
'level' => ['error', 'warning', 'info', 'notice', 'debug'],
'file_size' => 1024 * 1024 * 2, // 2MB
'max_files'=>30,
'apart_day' => true,
'format' => '[%s][%s] %s',
],
// +----------------------------------------------------------------------