agent功能

This commit is contained in:
wangjinlei
2026-03-06 18:02:38 +08:00
parent d31279e684
commit 1915dc2e1e
4 changed files with 1012 additions and 79 deletions

View File

@@ -88,7 +88,8 @@ class Base extends Controller
protected $major_to_article_obj = "";
protected $paystation_obj = "";
protected $exchange_rate_obj = "";
protected $expert_obj = "";
protected $journal_email_obj = "";
public function __construct(\think\Request $request = null)
{
@@ -171,6 +172,8 @@ class Base extends Controller
$this->major_to_article_obj = Db::name("major_to_article");
$this->paystation_obj = Db::name("paystation");
$this->exchange_rate_obj = Db::name("exchange_rate");
$this->expert_obj = Db::name("expert");
$this->journal_email_obj = Db::name("journal_email");
}

View File

@@ -0,0 +1,448 @@
<?php
namespace app\api\controller;
use think\Db;
use think\Env;
use PHPMailer;
class EmailClient extends Base
{
// ==================== SMTP Account Management ====================
/**
* Get SMTP accounts for a journal
* Params: journal_id
*/
public function getAccounts()
{
$journalId = intval($this->request->param('journal_id', 0));
if (!$journalId) {
return jsonError('journal_id is required');
}
$list = Db::name('journal_email')
->where('journal_id', $journalId)
->order('state asc, j_email_id asc')
->select();
foreach ($list as &$item) {
$item['smtp_password'] = '******';
$item['remaining_today'] = max(0, $item['daily_limit'] - $item['today_sent']);
}
return jsonSuccess($list);
}
/**
* Add SMTP account
*/
public function addAccount()
{
$data = $this->request->post();
if (empty($data['journal_id']) || empty($data['smtp_host']) || empty($data['smtp_user']) || empty($data['smtp_password'])) {
return jsonError('journal_id, smtp_host, smtp_user, smtp_password are required');
}
$count = Db::name('journal_email')
->where('journal_id', intval($data['journal_id']))
->where('state', 0)
->count();
if ($count >= 4) {
return jsonError('Each journal allows at most 4 SMTP accounts');
}
$insert = [
'journal_id' => intval($data['journal_id']),
'smtp_host' => trim($data['smtp_host']),
'smtp_port' => intval($data['smtp_port'] ?? 465),
'smtp_user' => trim($data['smtp_user']),
'smtp_password' => trim($data['smtp_password']),
'smtp_encryption' => in_array($data['smtp_encryption'] ?? 'ssl', ['ssl', 'tls']) ? $data['smtp_encryption'] : 'ssl',
'smtp_from_name' => trim($data['smtp_from_name'] ?? ''),
'daily_limit' => intval($data['daily_limit'] ?? 100),
'today_sent' => 0,
'state' => 0,
];
$id = Db::name('journal_email')->insertGetId($insert);
return jsonSuccess(['j_email_id' => $id]);
}
/**
* Update SMTP account
*/
public function updateAccount()
{
$data = $this->request->post();
$id = intval($data['j_email_id'] ?? 0);
if (!$id) {
return jsonError('j_email_id is required');
}
$account = Db::name('journal_email')->where('j_email_id', $id)->find();
if (!$account) {
return jsonError('Account not found');
}
$update = [];
$fields = ['smtp_host', 'smtp_port', 'smtp_user', 'smtp_password', 'smtp_encryption', 'smtp_from_name', 'daily_limit', 'state'];
foreach ($fields as $field) {
if (isset($data[$field])) {
$update[$field] = $data[$field];
}
}
if (isset($update['smtp_port'])) {
$update['smtp_port'] = intval($update['smtp_port']);
}
if (isset($update['daily_limit'])) {
$update['daily_limit'] = intval($update['daily_limit']);
}
if (isset($update['state'])) {
$update['state'] = intval($update['state']);
}
if (isset($update['smtp_encryption']) && !in_array($update['smtp_encryption'], ['ssl', 'tls'])) {
$update['smtp_encryption'] = 'ssl';
}
if (empty($update)) {
return jsonError('No fields to update');
}
Db::name('journal_email')->where('j_email_id', $id)->update($update);
return jsonSuccess(['j_email_id' => $id]);
}
/**
* Delete SMTP account
*/
public function deleteAccount()
{
$id = intval($this->request->param('j_email_id', 0));
if (!$id) {
return jsonError('j_email_id is required');
}
Db::name('journal_email')->where('j_email_id', $id)->delete();
return jsonSuccess(['deleted' => $id]);
}
/**
* Test SMTP connection
*/
public function testAccount()
{
$id = intval($this->request->param('j_email_id', 0));
$testEmail = trim($this->request->param('test_email', ''));
if (!$id) {
return jsonError('j_email_id is required');
}
if (empty($testEmail)) {
return jsonError('test_email is required');
}
$account = Db::name('journal_email')->where('j_email_id', $id)->find();
if (!$account) {
return jsonError('Account not found');
}
$result = $this->doSendEmail($account, $testEmail, 'SMTP Test', '<p>This is a test email from your journal system.</p>');
if ($result['status'] === 1) {
return jsonSuccess(['msg' => 'Test email sent successfully']);
} else {
return jsonError('Send failed: ' . $result['data']);
}
}
// ==================== Email Sending ====================
/**
* Send a single email using a specific journal's SMTP
* Params: journal_id, to_email, subject, content, j_email_id(optional, auto-select if empty)
*/
public function sendOne()
{
$journalId = intval($this->request->param('journal_id', 0));
$toEmail = trim($this->request->param('to_email', ''));
$subject = trim($this->request->param('subject', ''));
$content = $this->request->param('content', '');
$accountId = intval($this->request->param('j_email_id', 0));
if (!$journalId || empty($toEmail) || empty($subject) || empty($content)) {
return jsonError('journal_id, to_email, subject, content are required');
}
if ($accountId) {
$account = Db::name('journal_email')
->where('j_email_id', $accountId)
->where('journal_id', $journalId)
->where('state', 0)
->find();
} else {
$account = $this->pickSmtpAccount($journalId);
}
if (!$account) {
return jsonError('No available SMTP account (all disabled or daily limit reached)');
}
$this->resetDailyCountIfNeeded($account);
if ($account['today_sent'] >= $account['daily_limit']) {
$account = $this->pickSmtpAccount($journalId);
if (!$account) {
return jsonError('All SMTP accounts have reached daily limit');
}
}
$result = $this->doSendEmail($account, $toEmail, $subject, $content);
if ($result['status'] === 1) {
Db::name('journal_email')->where('j_email_id', $account['j_email_id'])->setInc('today_sent');
return jsonSuccess([
'msg' => 'Email sent',
'from' => $account['smtp_user'],
'to' => $toEmail,
'j_email_id' => $account['j_email_id'],
]);
} else {
return jsonError('Send failed: ' . $result['data']);
}
}
/**
* Send batch emails to multiple experts
* Params: journal_id, expert_ids (comma separated), subject, content
* Content supports variables: {name}, {email}, {affiliation}
*/
public function sendBatch()
{
$journalId = intval($this->request->param('journal_id', 0));
$expertIds = trim($this->request->param('expert_ids', ''));
$subject = trim($this->request->param('subject', ''));
$content = $this->request->param('content', '');
if (!$journalId || empty($expertIds) || empty($subject) || empty($content)) {
return jsonError('journal_id, expert_ids, subject, content are required');
}
$ids = array_map('intval', explode(',', $expertIds));
$experts = Db::name('expert')->where('expert_id', 'in', $ids)->where('state', 0)->select();
if (empty($experts)) {
return jsonError('No valid experts found (state must be 0)');
}
$sent = 0;
$failed = 0;
$skipped = 0;
$errors = [];
foreach ($experts as $expert) {
$account = $this->pickSmtpAccount($journalId);
if (!$account) {
$skipped += (count($experts) - $sent - $failed);
$errors[] = 'All SMTP accounts reached daily limit, stopped at #' . ($sent + $failed + 1);
break;
}
$personalContent = $this->replaceVariables($content, $expert);
$personalSubject = $this->replaceVariables($subject, $expert);
$result = $this->doSendEmail($account, $expert['email'], $personalSubject, $personalContent);
if ($result['status'] === 1) {
Db::name('journal_email')->where('j_email_id', $account['j_email_id'])->setInc('today_sent');
Db::name('expert')->where('expert_id', $expert['expert_id'])->update(['state' => 1]);
$sent++;
} else {
$failed++;
$errors[] = $expert['email'] . ': ' . $result['data'];
}
$delay = rand(30, 60);
sleep($delay);
}
return jsonSuccess([
'sent' => $sent,
'failed' => $failed,
'skipped' => $skipped,
'errors' => $errors,
]);
}
/**
* Get sending stats for a journal
*/
public function getStats()
{
$journalId = intval($this->request->param('journal_id', 0));
if (!$journalId) {
return jsonError('journal_id is required');
}
$accounts = Db::name('journal_email')
->where('journal_id', $journalId)
->where('state', 0)
->select();
$totalLimit = 0;
$totalSent = 0;
$details = [];
foreach ($accounts as $acc) {
$this->resetDailyCountIfNeeded($acc);
$totalLimit += $acc['daily_limit'];
$totalSent += $acc['today_sent'];
$details[] = [
'j_email_id' => $acc['j_email_id'],
'smtp_user' => $acc['smtp_user'],
'daily_limit' => $acc['daily_limit'],
'today_sent' => $acc['today_sent'],
'remaining' => max(0, $acc['daily_limit'] - $acc['today_sent']),
];
}
return jsonSuccess([
'total_daily_limit' => $totalLimit,
'total_today_sent' => $totalSent,
'total_remaining' => max(0, $totalLimit - $totalSent),
'accounts' => $details,
]);
}
/**
* Reset daily sent count (called by cron or manually)
*/
public function resetDailyCount()
{
$journalId = intval($this->request->param('journal_id', 0));
if ($journalId) {
$count = Db::name('journal_email')
->where('journal_id', $journalId)
->update(['today_sent' => 0]);
} else {
$count = Db::name('journal_email')->update(['today_sent' => 0]);
}
return jsonSuccess(['reset_count' => $count]);
}
// ==================== Internal Methods ====================
/**
* Pick the best available SMTP account for a journal
* Strategy: choose the account with the most remaining quota
*/
private function pickSmtpAccount($journalId)
{
$accounts = Db::name('journal_email')
->where('journal_id', $journalId)
->where('state', 0)
->select();
if (empty($accounts)) {
return null;
}
$best = null;
$bestRemaining = -1;
foreach ($accounts as $acc) {
$this->resetDailyCountIfNeeded($acc);
$remaining = $acc['daily_limit'] - $acc['today_sent'];
if ($remaining > 0 && $remaining > $bestRemaining) {
$best = $acc;
$bestRemaining = $remaining;
}
}
return $best;
}
/**
* Reset today_sent if the date has changed
*/
private function resetDailyCountIfNeeded(&$account)
{
$todayDate = date('Y-m-d');
$cacheKey = 'smtp_reset_' . $account['j_email_id'];
$lastReset = \think\Cache::get($cacheKey);
if ($lastReset !== $todayDate) {
Db::name('journal_email')
->where('j_email_id', $account['j_email_id'])
->update(['today_sent' => 0]);
$account['today_sent'] = 0;
\think\Cache::set($cacheKey, $todayDate, 86400);
}
}
/**
* Actually send an email using PHPMailer with given SMTP config
*/
private function doSendEmail($account, $toEmail, $subject, $htmlContent)
{
try {
$mail = new PHPMailer(true);
$mail->isSMTP();
$mail->SMTPDebug = 0;
$mail->CharSet = 'UTF-8';
$mail->Host = $account['smtp_host'];
$mail->Port = intval($account['smtp_port']);
$mail->SMTPAuth = true;
$mail->Username = $account['smtp_user'];
$mail->Password = $account['smtp_password'];
if ($account['smtp_encryption'] === 'ssl') {
$mail->SMTPSecure = 'ssl';
} elseif ($account['smtp_encryption'] === 'tls') {
$mail->SMTPSecure = 'tls';
} else {
$mail->SMTPSecure = false;
$mail->SMTPAutoTLS = false;
}
$fromName = !empty($account['smtp_from_name']) ? $account['smtp_from_name'] : $account['smtp_user'];
$mail->setFrom($account['smtp_user'], $fromName);
$mail->addReplyTo($account['smtp_user'], $fromName);
$mail->addAddress($toEmail);
$mail->isHTML(true);
$mail->Subject = $subject;
$mail->Body = $htmlContent;
$mail->AltBody = strip_tags($htmlContent);
$mail->send();
return ['status' => 1, 'data' => 'success'];
} catch (\Exception $e) {
return ['status' => 0, 'data' => $e->getMessage()];
}
}
/**
* Replace template variables with expert data
*/
private function replaceVariables($template, $expert)
{
$vars = [
'{name}' => $expert['name'] ?? '',
'{email}' => $expert['email'] ?? '',
'{affiliation}' => $expert['affiliation'] ?? '',
'{field}' => $expert['field'] ?? '',
];
return str_replace(array_keys($vars), array_values($vars), $template);
}
}

View File

@@ -3,6 +3,7 @@
namespace app\api\controller;
use think\Cache;
use think\Db;
use GuzzleHttp\Client;
class ExpertFinder extends Base
@@ -14,8 +15,9 @@ class ExpertFinder extends Base
{
parent::__construct($request);
$this->httpClient = new Client([
'timeout' => 60,
'verify' => false,
'timeout' => 180,
'connect_timeout' => 15,
'verify' => false,
]);
}
@@ -23,24 +25,24 @@ class ExpertFinder extends Base
* Main search endpoint
* Params:
* keyword - search term (e.g. "biomedical engineering")
* max_results - max articles to scan, default 200, max 1000
* page - page number, default 1
* per_page - articles per page, default 100, max 100
* min_year - earliest publication year, default current-3
* source - "pubmed" (fast, email from affiliation) or "pmc" (slower, structured email)
*/
public function search()
{
$keyword = trim($this->request->param('keyword', ''));
$maxResults = intval($this->request->param('max_results', 200));
$minYear = intval($this->request->param('min_year', date('Y') - 3));
$source = $this->request->param('source', 'pubmed');
$keyword = trim($this->request->param('keyword', ''));
$page = max(1, intval($this->request->param('page', 1)));
$perPage = max(10, min(intval($this->request->param('per_page', 100)), 100));
$minYear = intval($this->request->param('min_year', date('Y') - 3));
$source = $this->request->param('source', 'pubmed');
if (empty($keyword)) {
return jsonError('keyword is required');
}
$maxResults = max(10, min($maxResults, 1000));
$cacheKey = 'expert_finder_' . md5($keyword . $maxResults . $minYear . $source);
$cacheKey = 'expert_finder_' . md5($keyword . $page . $perPage . $minYear . $source);
$cached = Cache::get($cacheKey);
if ($cached) {
return jsonSuccess($cached);
@@ -48,45 +50,200 @@ class ExpertFinder extends Base
try {
if ($source === 'pmc') {
$result = $this->searchViaPMC($keyword, $maxResults, $minYear);
$result = $this->searchViaPMC($keyword, $perPage, $minYear, $page);
} else {
$result = $this->searchViaPubMed($keyword, $maxResults, $minYear);
$result = $this->searchViaPubMed($keyword, $perPage, $minYear, $page);
}
} catch (\Exception $e) {
return jsonError('Search failed: ' . $e->getMessage());
}
$saveResult = $this->saveExperts($result['experts'], $keyword, $source);
$result['saved_new'] = $saveResult['inserted'];
$result['saved_exist'] = $saveResult['existing'];
Cache::set($cacheKey, $result, 3600);
return jsonSuccess($result);
}
/**
* Get experts from local database
* Params:
* field - filter by field keyword (searches t_expert_field)
* major_id - filter by major_id (searches t_expert_field)
* state - filter by state (0-5), -1 for all
* keyword - search name/email/affiliation
* no_recent - if 1, exclude experts promoted within last N days (default 30)
* recent_days - days threshold for no_recent filter, default 30
* page - page number, default 1
* per_page - items per page, default 20
* min_experts - auto-fetch threshold, default 50
*/
public function getList()
{
$field = trim($this->request->param('field', ''));
$majorId = intval($this->request->param('major_id', 0));
$state = $this->request->param('state', '-1');
$keyword = trim($this->request->param('keyword', ''));
$noRecent = intval($this->request->param('no_recent', 0));
$recentDays = max(1, intval($this->request->param('recent_days', 30)));
$page = max(1, intval($this->request->param('page', 1)));
$perPage = max(1, min(intval($this->request->param('per_page', 20)), 100));
$minExperts = max(0, intval($this->request->param('min_experts', 50)));
$query = Db::name('expert')->alias('e');
$needJoin = ($field !== '' || $majorId > 0);
if ($needJoin) {
$query->join('t_expert_field ef', 'ef.expert_id = e.expert_id AND ef.state = 0', 'inner');
if ($field !== '') {
$query->where('ef.field', 'like', '%' . $field . '%');
}
if ($majorId > 0) {
$query->where('ef.major_id', $majorId);
}
$query->group('e.expert_id');
}
if ($state !== '-1' && $state !== '') {
$query->where('e.state', intval($state));
}
if ($keyword !== '') {
$query->where('e.name|e.email|e.affiliation', 'like', '%' . $keyword . '%');
}
if ($noRecent) {
$cutoff = time() - ($recentDays * 86400);
$query->where(function ($q) use ($cutoff) {
$q->where('e.ltime', 0)->whereOr('e.ltime', '<', $cutoff);
});
}
$countQuery = clone $query;
$total = $countQuery->count('distinct e.expert_id');
$list = $query
->field('e.*')
->order('e.ctime desc')
->page($page, $perPage)
->select();
foreach ($list as &$item) {
$item['fields'] = Db::name('expert_field')
->where('expert_id', $item['expert_id'])
->where('state', 0)
->column('field');
}
$fetching = false;
if ($field !== '' && $total < $minExperts && $minExperts > 0) {
$this->triggerBackgroundFetch($field);
$fetching = true;
}
return jsonSuccess([
'list' => $list,
'total' => $total,
'page' => $page,
'per_page' => $perPage,
'total_pages' => $total > 0 ? ceil($total / $perPage) : 0,
'fetching' => $fetching,
]);
}
/**
* Get all fields associated with an expert
*/
public function getExpertFields()
{
$expertId = intval($this->request->param('expert_id', 0));
if (!$expertId) {
return jsonError('expert_id is required');
}
$fields = Db::name('expert_field')
->where('expert_id', $expertId)
->where('state', 0)
->select();
return jsonSuccess($fields);
}
/**
* Update expert state
* Params:
* expert_id - single id or comma-separated ids
* state - new state (0-5)
*/
public function updateState()
{
$expertId = $this->request->param('expert_id', '');
$state = intval($this->request->param('state', 0));
if (empty($expertId)) {
return jsonError('expert_id is required');
}
if ($state < 0 || $state > 5) {
return jsonError('state must be 0-5');
}
$ids = array_map('intval', explode(',', $expertId));
$count = Db::name('expert')->where('expert_id', 'in', $ids)->update(['state' => $state]);
return jsonSuccess(['updated' => $count]);
}
/**
* Delete expert (soft: set state=5 blacklist, or hard delete)
* Params:
* expert_id - single id or comma-separated ids
* hard - 1 for hard delete, default 0 (blacklist)
*/
public function deleteExpert()
{
$expertId = $this->request->param('expert_id', '');
$hard = intval($this->request->param('hard', 0));
if (empty($expertId)) {
return jsonError('expert_id is required');
}
$ids = array_map('intval', explode(',', $expertId));
if ($hard) {
$count = Db::name('expert')->where('expert_id', 'in', $ids)->delete();
} else {
$count = Db::name('expert')->where('expert_id', 'in', $ids)->update(['state' => 5]);
}
return jsonSuccess(['affected' => $count]);
}
/**
* Export search results to Excel
* Same params as search()
* Same params as search(), exports current page results
*/
public function export()
{
$keyword = trim($this->request->param('keyword', ''));
$maxResults = intval($this->request->param('max_results', 200));
$minYear = intval($this->request->param('min_year', date('Y') - 3));
$source = $this->request->param('source', 'pubmed');
$keyword = trim($this->request->param('keyword', ''));
$page = max(1, intval($this->request->param('page', 1)));
$perPage = max(10, min(intval($this->request->param('per_page', 100)), 100));
$minYear = intval($this->request->param('min_year', date('Y') - 3));
$source = $this->request->param('source', 'pubmed');
if (empty($keyword)) {
return jsonError('keyword is required');
}
$maxResults = max(10, min($maxResults, 1000));
$cacheKey = 'expert_finder_' . md5($keyword . $maxResults . $minYear . $source);
$cacheKey = 'expert_finder_' . md5($keyword . $page . $perPage . $minYear . $source);
$cached = Cache::get($cacheKey);
if (!$cached) {
try {
if ($source === 'pmc') {
$cached = $this->searchViaPMC($keyword, $maxResults, $minYear);
$cached = $this->searchViaPMC($keyword, $perPage, $minYear, $page);
} else {
$cached = $this->searchViaPubMed($keyword, $maxResults, $minYear);
$cached = $this->searchViaPubMed($keyword, $perPage, $minYear, $page);
}
Cache::set($cacheKey, $cached, 3600);
} catch (\Exception $e) {
@@ -98,7 +255,7 @@ class ExpertFinder extends Base
return jsonError('No experts found to export');
}
return $this->generateExcel($cached['experts'], $keyword);
return $this->generateExcel($cached['experts'], $keyword, $page);
}
/**
@@ -117,81 +274,215 @@ class ExpertFinder extends Base
return jsonSuccess(['msg' => 'Cache cleared']);
}
// ==================== PubMed Search ====================
// ==================== Cron / Auto Fetch ====================
private function searchViaPubMed($keyword, $maxResults, $minYear)
/**
* Cron job: daily fetch experts for given keywords
* Params:
* keywords - comma-separated keywords (e.g. "biomedical engineering,tissue engineering")
* source - pubmed or pmc, default pubmed
* per_page - articles per page, default 100
* min_year - default current-3
*
* Uses cache to remember which page was last fetched per keyword,
* so each cron run fetches the next page automatically.
*/
public function cronFetch()
{
$ids = $this->esearch('pubmed', $keyword, $maxResults, $minYear);
if (empty($ids)) {
return ['experts' => [], 'total' => 0, 'articles_scanned' => 0, 'source' => 'pubmed'];
$keywordsStr = trim($this->request->param('keywords', ''));
$source = $this->request->param('source', 'pubmed');
$perPage = max(10, min(intval($this->request->param('per_page', 100)), 100));
$minYear = intval($this->request->param('min_year', date('Y') - 3));
if (empty($keywordsStr)) {
return jsonError('keywords is required');
}
$allAuthors = [];
$batches = array_chunk($ids, 200);
foreach ($batches as $batch) {
$xml = $this->efetch('pubmed', $batch);
$authors = $this->parsePubMedXml($xml);
$allAuthors = array_merge($allAuthors, $authors);
usleep(340000);
set_time_limit(0);
$keywords = array_map('trim', explode(',', $keywordsStr));
$report = [];
foreach ($keywords as $kw) {
if (empty($kw)) {
continue;
}
$fetchLog = $this->getFetchLog($kw, $source);
$page = $fetchLog['last_page'] + 1;
try {
if ($source === 'pmc') {
$result = $this->searchViaPMC($kw, $perPage, $minYear, $page);
} else {
$result = $this->searchViaPubMed($kw, $perPage, $minYear, $page);
}
$saveResult = $this->saveExperts($result['experts'], $kw, $source);
$nextPage = $result['has_more'] ? $page : 0;
$totalPages = isset($result['total_pages']) ? $result['total_pages'] : 0;
$this->updateFetchLog($kw, $source, $nextPage, $totalPages);
$report[] = [
'keyword' => $kw,
'page' => $page,
'experts_found' => $result['total'],
'saved_new' => $saveResult['inserted'],
'saved_exist' => $saveResult['existing'],
'field_enriched' => $saveResult['field_enriched'],
'has_more' => $result['has_more'],
];
} catch (\Exception $e) {
$report[] = [
'keyword' => $kw,
'page' => $page,
'error' => $e->getMessage(),
];
}
sleep(2);
}
$experts = $this->aggregateExperts($allAuthors);
return jsonSuccess(['report' => $report]);
}
/**
* Trigger a background fetch for a specific field via queue
*/
private function triggerBackgroundFetch($field)
{
$lockKey = 'fetch_lock_' . md5($field);
if (Cache::get($lockKey)) {
return;
}
Cache::set($lockKey, 1, 300);
\think\Queue::push('app\api\job\FetchExperts', [
'field' => $field,
'source' => 'pubmed',
'per_page' => 100,
'min_year' => date('Y') - 3,
], 'FetchExperts');
}
/**
* Internal method: run a fetch for a single keyword (used by both cron and queue job)
*/
public function doFetchForField($field, $source = 'pubmed', $perPage = 100, $minYear = null)
{
if ($minYear === null) {
$minYear = date('Y') - 3;
}
$fetchLog = $this->getFetchLog($field, $source);
$page = $fetchLog['last_page'] + 1;
if ($source === 'pmc') {
$result = $this->searchViaPMC($field, $perPage, $minYear, $page);
} else {
$result = $this->searchViaPubMed($field, $perPage, $minYear, $page);
}
$saveResult = $this->saveExperts($result['experts'], $field, $source);
$nextPage = $result['has_more'] ? $page : 0;
$totalPages = isset($result['total_pages']) ? $result['total_pages'] : 0;
$this->updateFetchLog($field, $source, $nextPage, $totalPages);
return [
'experts' => $experts,
'total' => count($experts),
'articles_scanned' => count($ids),
'source' => 'pubmed',
'keyword' => $field,
'page' => $page,
'experts_found' => $result['total'],
'saved_new' => $saveResult['inserted'],
'saved_exist' => $saveResult['existing'],
'field_enriched' => $saveResult['field_enriched'],
'has_more' => $result['has_more'],
];
}
// ==================== PMC Search ====================
// ==================== PubMed Search ====================
private function searchViaPMC($keyword, $maxResults, $minYear)
private function searchViaPubMed($keyword, $perPage, $minYear, $page = 1)
{
$ids = $this->esearch('pmc', $keyword, $maxResults, $minYear);
set_time_limit(600);
$searchResult = $this->esearch('pubmed', $keyword, $perPage, $minYear, $page);
$ids = $searchResult['ids'];
$totalArticles = $searchResult['total'];
if (empty($ids)) {
return ['experts' => [], 'total' => 0, 'articles_scanned' => 0, 'source' => 'pmc'];
return $this->buildPagedResult([], 0, 0, $totalArticles, $page, $perPage, 'pubmed');
}
$allAuthors = [];
$batches = array_chunk($ids, 20);
$batches = array_chunk($ids, 50);
foreach ($batches as $batch) {
$xml = $this->efetch('pmc', $batch);
$authors = $this->parsePMCXml($xml);
$allAuthors = array_merge($allAuthors, $authors);
$xml = $this->efetchWithRetry('pubmed', $batch);
if ($xml) {
$authors = $this->parsePubMedXml($xml);
$allAuthors = array_merge($allAuthors, $authors);
}
usleep(400000);
}
$experts = $this->aggregateExperts($allAuthors);
return [
'experts' => $experts,
'total' => count($experts),
'articles_scanned' => count($ids),
'source' => 'pmc',
];
return $this->buildPagedResult($experts, count($experts), count($ids), $totalArticles, $page, $perPage, 'pubmed');
}
// ==================== PMC Search ====================
private function searchViaPMC($keyword, $perPage, $minYear, $page = 1)
{
set_time_limit(600);
$searchResult = $this->esearch('pmc', $keyword, $perPage, $minYear, $page);
$ids = $searchResult['ids'];
$totalArticles = $searchResult['total'];
if (empty($ids)) {
return $this->buildPagedResult([], 0, 0, $totalArticles, $page, $perPage, 'pmc');
}
$allAuthors = [];
$batches = array_chunk($ids, 5);
foreach ($batches as $batch) {
$xml = $this->efetchWithRetry('pmc', $batch);
if ($xml) {
$authors = $this->parsePMCXml($xml);
$allAuthors = array_merge($allAuthors, $authors);
}
usleep(500000);
}
$experts = $this->aggregateExperts($allAuthors);
return $this->buildPagedResult($experts, count($experts), count($ids), $totalArticles, $page, $perPage, 'pmc');
}
// ==================== NCBI API Calls ====================
private function esearch($db, $keyword, $maxResults, $minYear)
private function esearch($db, $keyword, $perPage, $minYear, $page = 1)
{
$term = $keyword . ' AND ' . $minYear . ':' . date('Y') . '[pdat]';
$term = $keyword . ' AND ' . $minYear . ':' . date('Y') . '[pdat]';
$retstart = ($page - 1) * $perPage;
$response = $this->httpClient->get($this->ncbiBaseUrl . 'esearch.fcgi', [
'query' => [
'db' => $db,
'term' => $term,
'retmax' => $maxResults,
'retmode' => 'json',
'sort' => 'relevance',
'db' => $db,
'term' => $term,
'retstart' => $retstart,
'retmax' => $perPage,
'retmode' => 'json',
'sort' => 'relevance',
],
]);
$data = json_decode($response->getBody()->getContents(), true);
$data = json_decode($response->getBody()->getContents(), true);
$ids = $data['esearchresult']['idlist'] ?? [];
$total = intval($data['esearchresult']['count'] ?? 0);
return $data['esearchresult']['idlist'] ?? [];
return ['ids' => $ids, 'total' => $total];
}
private function efetch($db, $ids)
@@ -207,6 +498,36 @@ class ExpertFinder extends Base
return $response->getBody()->getContents();
}
private function efetchWithRetry($db, $ids, $maxRetries = 3)
{
for ($attempt = 1; $attempt <= $maxRetries; $attempt++) {
try {
return $this->efetch($db, $ids);
} catch (\Exception $e) {
if ($attempt === $maxRetries) {
if (count($ids) > 1) {
$half = ceil(count($ids) / 2);
$firstHalf = array_slice($ids, 0, $half);
$secondHalf = array_slice($ids, $half);
$xml1 = $this->efetchWithRetry($db, $firstHalf, 2);
$xml2 = $this->efetchWithRetry($db, $secondHalf, 2);
return $this->mergeXml($xml1, $xml2);
}
return null;
}
sleep($attempt * 2);
}
}
return null;
}
private function mergeXml($xml1, $xml2)
{
if (empty($xml1)) return $xml2;
if (empty($xml2)) return $xml1;
return $xml1 . "\n" . $xml2;
}
// ==================== PubMed XML Parsing ====================
private function parsePubMedXml($xmlString)
@@ -434,6 +755,25 @@ class ExpertFinder extends Base
return $results;
}
// ==================== Pagination ====================
private function buildPagedResult($experts, $expertCount, $articlesScanned, $totalArticles, $page, $perPage, $source)
{
$totalPages = $totalArticles > 0 ? ceil($totalArticles / $perPage) : 0;
return [
'experts' => $experts,
'total' => $expertCount,
'articles_scanned' => $articlesScanned,
'total_articles' => $totalArticles,
'page' => $page,
'per_page' => $perPage,
'total_pages' => $totalPages,
'has_more' => $page < $totalPages,
'source' => $source,
];
}
// ==================== Aggregation ====================
private function aggregateExperts($authorRecords)
@@ -482,21 +822,23 @@ class ExpertFinder extends Base
// ==================== Excel Export ====================
private function generateExcel($experts, $keyword)
private function generateExcel($experts, $keyword, $page = 1)
{
$spreadsheet = new \PhpOffice\PhpSpreadsheet\Spreadsheet();
$sheet = $spreadsheet->getActiveSheet();
vendor("PHPExcel.PHPExcel");
$objPHPExcel = new \PHPExcel();
$sheet = $objPHPExcel->getActiveSheet();
$sheet->setTitle('Experts');
$headers = ['#', 'Name', 'Email', 'Affiliation', 'Paper Count', 'Representative Papers'];
$headers = ['A' => '#', 'B' => 'Name', 'C' => 'Email', 'D' => 'Affiliation', 'E' => 'Paper Count', 'F' => 'Representative Papers'];
foreach ($headers as $col => $header) {
$sheet->setCellValueByColumnAndRow($col + 1, 1, $header);
$sheet->setCellValue($col . '1', $header);
}
$headerStyle = [
'font' => ['bold' => true, 'color' => ['rgb' => 'FFFFFF']],
'fill' => ['fillType' => \PhpOffice\PhpSpreadsheet\Style\Fill::FILL_SOLID, 'startColor' => ['rgb' => '4472C4']],
'alignment' => ['horizontal' => \PhpOffice\PhpSpreadsheet\Style\Alignment::HORIZONTAL_CENTER],
'fill' => ['type' => \PHPExcel_Style_Fill::FILL_SOLID, 'startcolor' => ['rgb' => '4472C4']],
'alignment' => ['horizontal' => \PHPExcel_Style_Alignment::HORIZONTAL_CENTER],
];
$sheet->getStyle('A1:F1')->applyFromArray($headerStyle);
@@ -506,12 +848,12 @@ class ExpertFinder extends Base
return $p['title'];
}, $expert['papers']);
$sheet->setCellValueByColumnAndRow(1, $row, $i + 1);
$sheet->setCellValueByColumnAndRow(2, $row, $expert['name']);
$sheet->setCellValueByColumnAndRow(3, $row, $expert['email']);
$sheet->setCellValueByColumnAndRow(4, $row, $expert['affiliation']);
$sheet->setCellValueByColumnAndRow(5, $row, $expert['paper_count']);
$sheet->setCellValueByColumnAndRow(6, $row, implode("\n", $paperTitles));
$sheet->setCellValue('A' . $row, $i + 1);
$sheet->setCellValue('B' . $row, $expert['name']);
$sheet->setCellValue('C' . $row, $expert['email']);
$sheet->setCellValue('D' . $row, $expert['affiliation']);
$sheet->setCellValue('E' . $row, $expert['paper_count']);
$sheet->setCellValue('F' . $row, implode("\n", $paperTitles));
}
$sheet->getColumnDimension('A')->setWidth(6);
@@ -521,7 +863,7 @@ class ExpertFinder extends Base
$sheet->getColumnDimension('E')->setWidth(12);
$sheet->getColumnDimension('F')->setWidth(60);
$filename = 'experts_' . preg_replace('/[^a-zA-Z0-9]/', '_', $keyword) . '_' . date('Ymd_His') . '.xlsx';
$filename = 'experts_' . preg_replace('/[^a-zA-Z0-9]/', '_', $keyword) . '_p' . $page . '_' . date('Ymd_His') . '.xlsx';
$filepath = ROOT_PATH . 'public' . DS . 'exports' . DS . $filename;
$dir = ROOT_PATH . 'public' . DS . 'exports';
@@ -529,7 +871,7 @@ class ExpertFinder extends Base
mkdir($dir, 0777, true);
}
$writer = new \PhpOffice\PhpSpreadsheet\Writer\Xlsx($spreadsheet);
$writer = \PHPExcel_IOFactory::createWriter($objPHPExcel, 'Excel2007');
$writer->save($filepath);
return jsonSuccess([
@@ -539,6 +881,119 @@ class ExpertFinder extends Base
]);
}
// ==================== Database Storage ====================
private function saveExperts($experts, $field, $source)
{
$inserted = 0;
$existing = 0;
$fieldEnrich = 0;
foreach ($experts as $expert) {
$email = strtolower(trim($expert['email']));
if (empty($email)) {
continue;
}
$exists = Db::name('expert')->where('email', $email)->find();
if ($exists) {
$existing++;
$fieldEnrich += $this->enrichExpertField($exists['expert_id'], $field);
continue;
}
$insert = [
'name' => mb_substr($expert['name'], 0, 255),
'email' => mb_substr($email, 0, 128),
'affiliation' => mb_substr($expert['affiliation'], 0, 128),
'source' => mb_substr($source, 0, 128),
'ctime' => time(),
'ltime' => 0,
'state' => 0,
];
try {
$expertId = Db::name('expert')->insertGetId($insert);
$this->enrichExpertField($expertId, $field);
$inserted++;
} catch (\Exception $e) {
$existing++;
}
}
return ['inserted' => $inserted, 'existing' => $existing, 'field_enriched' => $fieldEnrich];
}
// ==================== Fetch Log (t_expert_fetch) ====================
private function getFetchLog($field, $source)
{
$log = Db::name('expert_fetch')
->where('field', $field)
->where('source', $source)
->find();
if (!$log) {
return ['last_page' => 0, 'total_pages' => 0, 'last_time' => 0];
}
return $log;
}
private function updateFetchLog($field, $source, $lastPage, $totalPages)
{
$exists = Db::name('expert_fetch')
->where('field', $field)
->where('source', $source)
->find();
if ($exists) {
Db::name('expert_fetch')
->where('expert_fetch_id', $exists['expert_fetch_id'])
->update([
'last_page' => $lastPage,
'total_pages' => $totalPages,
'last_time' => time(),
]);
} else {
Db::name('expert_fetch')->insert([
'field' => mb_substr($field, 0, 128),
'source' => mb_substr($source, 0, 128),
'last_page' => $lastPage,
'total_pages' => $totalPages,
'last_time' => time(),
]);
}
}
private function enrichExpertField($expertId, $field)
{
$field = trim($field);
if (empty($field)) {
return 0;
}
$exists = Db::name('expert_field')
->where('expert_id', $expertId)
->where('field', $field)
->where('state', 0)
->find();
if ($exists) {
return 0;
}
Db::name('expert_field')->insert([
'expert_id' => $expertId,
'major_id' => 0,
'field' => mb_substr($field, 0, 128),
'state' => 0,
]);
return 1;
}
// ==================== Helper Methods ====================
private function extractEmailFromText($text)