diff --git a/application/api/controller/Base.php b/application/api/controller/Base.php index b6373b3..ce739ee 100644 --- a/application/api/controller/Base.php +++ b/application/api/controller/Base.php @@ -88,7 +88,8 @@ class Base extends Controller protected $major_to_article_obj = ""; protected $paystation_obj = ""; protected $exchange_rate_obj = ""; - + protected $expert_obj = ""; + protected $journal_email_obj = ""; public function __construct(\think\Request $request = null) { @@ -171,6 +172,8 @@ class Base extends Controller $this->major_to_article_obj = Db::name("major_to_article"); $this->paystation_obj = Db::name("paystation"); $this->exchange_rate_obj = Db::name("exchange_rate"); + $this->expert_obj = Db::name("expert"); + $this->journal_email_obj = Db::name("journal_email"); } diff --git a/application/api/controller/EmailClient.php b/application/api/controller/EmailClient.php new file mode 100644 index 0000000..5bb0a3c --- /dev/null +++ b/application/api/controller/EmailClient.php @@ -0,0 +1,448 @@ +request->param('journal_id', 0)); + if (!$journalId) { + return jsonError('journal_id is required'); + } + + $list = Db::name('journal_email') + ->where('journal_id', $journalId) + ->order('state asc, j_email_id asc') + ->select(); + + foreach ($list as &$item) { + $item['smtp_password'] = '******'; + $item['remaining_today'] = max(0, $item['daily_limit'] - $item['today_sent']); + } + + return jsonSuccess($list); + } + + /** + * Add SMTP account + */ + public function addAccount() + { + $data = $this->request->post(); + + if (empty($data['journal_id']) || empty($data['smtp_host']) || empty($data['smtp_user']) || empty($data['smtp_password'])) { + return jsonError('journal_id, smtp_host, smtp_user, smtp_password are required'); + } + + $count = Db::name('journal_email') + ->where('journal_id', intval($data['journal_id'])) + ->where('state', 0) + ->count(); + if ($count >= 4) { + return jsonError('Each journal allows at most 4 SMTP accounts'); + } + + $insert = [ + 'journal_id' => intval($data['journal_id']), + 'smtp_host' => trim($data['smtp_host']), + 'smtp_port' => intval($data['smtp_port'] ?? 465), + 'smtp_user' => trim($data['smtp_user']), + 'smtp_password' => trim($data['smtp_password']), + 'smtp_encryption' => in_array($data['smtp_encryption'] ?? 'ssl', ['ssl', 'tls']) ? $data['smtp_encryption'] : 'ssl', + 'smtp_from_name' => trim($data['smtp_from_name'] ?? ''), + 'daily_limit' => intval($data['daily_limit'] ?? 100), + 'today_sent' => 0, + 'state' => 0, + ]; + + $id = Db::name('journal_email')->insertGetId($insert); + + return jsonSuccess(['j_email_id' => $id]); + } + + /** + * Update SMTP account + */ + public function updateAccount() + { + $data = $this->request->post(); + $id = intval($data['j_email_id'] ?? 0); + + if (!$id) { + return jsonError('j_email_id is required'); + } + + $account = Db::name('journal_email')->where('j_email_id', $id)->find(); + if (!$account) { + return jsonError('Account not found'); + } + + $update = []; + $fields = ['smtp_host', 'smtp_port', 'smtp_user', 'smtp_password', 'smtp_encryption', 'smtp_from_name', 'daily_limit', 'state']; + foreach ($fields as $field) { + if (isset($data[$field])) { + $update[$field] = $data[$field]; + } + } + + if (isset($update['smtp_port'])) { + $update['smtp_port'] = intval($update['smtp_port']); + } + if (isset($update['daily_limit'])) { + $update['daily_limit'] = intval($update['daily_limit']); + } + if (isset($update['state'])) { + $update['state'] = intval($update['state']); + } + if (isset($update['smtp_encryption']) && !in_array($update['smtp_encryption'], ['ssl', 'tls'])) { + $update['smtp_encryption'] = 'ssl'; + } + + if (empty($update)) { + return jsonError('No fields to update'); + } + + Db::name('journal_email')->where('j_email_id', $id)->update($update); + + return jsonSuccess(['j_email_id' => $id]); + } + + /** + * Delete SMTP account + */ + public function deleteAccount() + { + $id = intval($this->request->param('j_email_id', 0)); + if (!$id) { + return jsonError('j_email_id is required'); + } + + Db::name('journal_email')->where('j_email_id', $id)->delete(); + + return jsonSuccess(['deleted' => $id]); + } + + /** + * Test SMTP connection + */ + public function testAccount() + { + $id = intval($this->request->param('j_email_id', 0)); + $testEmail = trim($this->request->param('test_email', '')); + + if (!$id) { + return jsonError('j_email_id is required'); + } + if (empty($testEmail)) { + return jsonError('test_email is required'); + } + + $account = Db::name('journal_email')->where('j_email_id', $id)->find(); + if (!$account) { + return jsonError('Account not found'); + } + + $result = $this->doSendEmail($account, $testEmail, 'SMTP Test', '

This is a test email from your journal system.

'); + + if ($result['status'] === 1) { + return jsonSuccess(['msg' => 'Test email sent successfully']); + } else { + return jsonError('Send failed: ' . $result['data']); + } + } + + // ==================== Email Sending ==================== + + /** + * Send a single email using a specific journal's SMTP + * Params: journal_id, to_email, subject, content, j_email_id(optional, auto-select if empty) + */ + public function sendOne() + { + $journalId = intval($this->request->param('journal_id', 0)); + $toEmail = trim($this->request->param('to_email', '')); + $subject = trim($this->request->param('subject', '')); + $content = $this->request->param('content', ''); + $accountId = intval($this->request->param('j_email_id', 0)); + + if (!$journalId || empty($toEmail) || empty($subject) || empty($content)) { + return jsonError('journal_id, to_email, subject, content are required'); + } + + if ($accountId) { + $account = Db::name('journal_email') + ->where('j_email_id', $accountId) + ->where('journal_id', $journalId) + ->where('state', 0) + ->find(); + } else { + $account = $this->pickSmtpAccount($journalId); + } + + if (!$account) { + return jsonError('No available SMTP account (all disabled or daily limit reached)'); + } + + $this->resetDailyCountIfNeeded($account); + + if ($account['today_sent'] >= $account['daily_limit']) { + $account = $this->pickSmtpAccount($journalId); + if (!$account) { + return jsonError('All SMTP accounts have reached daily limit'); + } + } + + $result = $this->doSendEmail($account, $toEmail, $subject, $content); + + if ($result['status'] === 1) { + Db::name('journal_email')->where('j_email_id', $account['j_email_id'])->setInc('today_sent'); + return jsonSuccess([ + 'msg' => 'Email sent', + 'from' => $account['smtp_user'], + 'to' => $toEmail, + 'j_email_id' => $account['j_email_id'], + ]); + } else { + return jsonError('Send failed: ' . $result['data']); + } + } + + /** + * Send batch emails to multiple experts + * Params: journal_id, expert_ids (comma separated), subject, content + * Content supports variables: {name}, {email}, {affiliation} + */ + public function sendBatch() + { + $journalId = intval($this->request->param('journal_id', 0)); + $expertIds = trim($this->request->param('expert_ids', '')); + $subject = trim($this->request->param('subject', '')); + $content = $this->request->param('content', ''); + + if (!$journalId || empty($expertIds) || empty($subject) || empty($content)) { + return jsonError('journal_id, expert_ids, subject, content are required'); + } + + $ids = array_map('intval', explode(',', $expertIds)); + $experts = Db::name('expert')->where('expert_id', 'in', $ids)->where('state', 0)->select(); + + if (empty($experts)) { + return jsonError('No valid experts found (state must be 0)'); + } + + $sent = 0; + $failed = 0; + $skipped = 0; + $errors = []; + + foreach ($experts as $expert) { + $account = $this->pickSmtpAccount($journalId); + if (!$account) { + $skipped += (count($experts) - $sent - $failed); + $errors[] = 'All SMTP accounts reached daily limit, stopped at #' . ($sent + $failed + 1); + break; + } + + $personalContent = $this->replaceVariables($content, $expert); + $personalSubject = $this->replaceVariables($subject, $expert); + + $result = $this->doSendEmail($account, $expert['email'], $personalSubject, $personalContent); + + if ($result['status'] === 1) { + Db::name('journal_email')->where('j_email_id', $account['j_email_id'])->setInc('today_sent'); + Db::name('expert')->where('expert_id', $expert['expert_id'])->update(['state' => 1]); + $sent++; + } else { + $failed++; + $errors[] = $expert['email'] . ': ' . $result['data']; + } + + $delay = rand(30, 60); + sleep($delay); + } + + return jsonSuccess([ + 'sent' => $sent, + 'failed' => $failed, + 'skipped' => $skipped, + 'errors' => $errors, + ]); + } + + /** + * Get sending stats for a journal + */ + public function getStats() + { + $journalId = intval($this->request->param('journal_id', 0)); + if (!$journalId) { + return jsonError('journal_id is required'); + } + + $accounts = Db::name('journal_email') + ->where('journal_id', $journalId) + ->where('state', 0) + ->select(); + + $totalLimit = 0; + $totalSent = 0; + $details = []; + + foreach ($accounts as $acc) { + $this->resetDailyCountIfNeeded($acc); + $totalLimit += $acc['daily_limit']; + $totalSent += $acc['today_sent']; + $details[] = [ + 'j_email_id' => $acc['j_email_id'], + 'smtp_user' => $acc['smtp_user'], + 'daily_limit' => $acc['daily_limit'], + 'today_sent' => $acc['today_sent'], + 'remaining' => max(0, $acc['daily_limit'] - $acc['today_sent']), + ]; + } + + return jsonSuccess([ + 'total_daily_limit' => $totalLimit, + 'total_today_sent' => $totalSent, + 'total_remaining' => max(0, $totalLimit - $totalSent), + 'accounts' => $details, + ]); + } + + /** + * Reset daily sent count (called by cron or manually) + */ + public function resetDailyCount() + { + $journalId = intval($this->request->param('journal_id', 0)); + + if ($journalId) { + $count = Db::name('journal_email') + ->where('journal_id', $journalId) + ->update(['today_sent' => 0]); + } else { + $count = Db::name('journal_email')->update(['today_sent' => 0]); + } + + return jsonSuccess(['reset_count' => $count]); + } + + // ==================== Internal Methods ==================== + + /** + * Pick the best available SMTP account for a journal + * Strategy: choose the account with the most remaining quota + */ + private function pickSmtpAccount($journalId) + { + $accounts = Db::name('journal_email') + ->where('journal_id', $journalId) + ->where('state', 0) + ->select(); + + if (empty($accounts)) { + return null; + } + + $best = null; + $bestRemaining = -1; + + foreach ($accounts as $acc) { + $this->resetDailyCountIfNeeded($acc); + $remaining = $acc['daily_limit'] - $acc['today_sent']; + if ($remaining > 0 && $remaining > $bestRemaining) { + $best = $acc; + $bestRemaining = $remaining; + } + } + + return $best; + } + + /** + * Reset today_sent if the date has changed + */ + private function resetDailyCountIfNeeded(&$account) + { + $todayDate = date('Y-m-d'); + $cacheKey = 'smtp_reset_' . $account['j_email_id']; + $lastReset = \think\Cache::get($cacheKey); + + if ($lastReset !== $todayDate) { + Db::name('journal_email') + ->where('j_email_id', $account['j_email_id']) + ->update(['today_sent' => 0]); + $account['today_sent'] = 0; + \think\Cache::set($cacheKey, $todayDate, 86400); + } + } + + /** + * Actually send an email using PHPMailer with given SMTP config + */ + private function doSendEmail($account, $toEmail, $subject, $htmlContent) + { + try { + $mail = new PHPMailer(true); + $mail->isSMTP(); + $mail->SMTPDebug = 0; + $mail->CharSet = 'UTF-8'; + $mail->Host = $account['smtp_host']; + $mail->Port = intval($account['smtp_port']); + $mail->SMTPAuth = true; + $mail->Username = $account['smtp_user']; + $mail->Password = $account['smtp_password']; + + if ($account['smtp_encryption'] === 'ssl') { + $mail->SMTPSecure = 'ssl'; + } elseif ($account['smtp_encryption'] === 'tls') { + $mail->SMTPSecure = 'tls'; + } else { + $mail->SMTPSecure = false; + $mail->SMTPAutoTLS = false; + } + + $fromName = !empty($account['smtp_from_name']) ? $account['smtp_from_name'] : $account['smtp_user']; + $mail->setFrom($account['smtp_user'], $fromName); + $mail->addReplyTo($account['smtp_user'], $fromName); + $mail->addAddress($toEmail); + + $mail->isHTML(true); + $mail->Subject = $subject; + $mail->Body = $htmlContent; + $mail->AltBody = strip_tags($htmlContent); + + $mail->send(); + + return ['status' => 1, 'data' => 'success']; + } catch (\Exception $e) { + return ['status' => 0, 'data' => $e->getMessage()]; + } + } + + /** + * Replace template variables with expert data + */ + private function replaceVariables($template, $expert) + { + $vars = [ + '{name}' => $expert['name'] ?? '', + '{email}' => $expert['email'] ?? '', + '{affiliation}' => $expert['affiliation'] ?? '', + '{field}' => $expert['field'] ?? '', + ]; + + return str_replace(array_keys($vars), array_values($vars), $template); + } +} diff --git a/application/api/controller/ExpertFinder.php b/application/api/controller/ExpertFinder.php index 498a87d..9c1d2d2 100644 --- a/application/api/controller/ExpertFinder.php +++ b/application/api/controller/ExpertFinder.php @@ -3,6 +3,7 @@ namespace app\api\controller; use think\Cache; +use think\Db; use GuzzleHttp\Client; class ExpertFinder extends Base @@ -14,8 +15,9 @@ class ExpertFinder extends Base { parent::__construct($request); $this->httpClient = new Client([ - 'timeout' => 60, - 'verify' => false, + 'timeout' => 180, + 'connect_timeout' => 15, + 'verify' => false, ]); } @@ -23,24 +25,24 @@ class ExpertFinder extends Base * Main search endpoint * Params: * keyword - search term (e.g. "biomedical engineering") - * max_results - max articles to scan, default 200, max 1000 + * page - page number, default 1 + * per_page - articles per page, default 100, max 100 * min_year - earliest publication year, default current-3 * source - "pubmed" (fast, email from affiliation) or "pmc" (slower, structured email) */ public function search() { - $keyword = trim($this->request->param('keyword', '')); - $maxResults = intval($this->request->param('max_results', 200)); - $minYear = intval($this->request->param('min_year', date('Y') - 3)); - $source = $this->request->param('source', 'pubmed'); + $keyword = trim($this->request->param('keyword', '')); + $page = max(1, intval($this->request->param('page', 1))); + $perPage = max(10, min(intval($this->request->param('per_page', 100)), 100)); + $minYear = intval($this->request->param('min_year', date('Y') - 3)); + $source = $this->request->param('source', 'pubmed'); if (empty($keyword)) { return jsonError('keyword is required'); } - $maxResults = max(10, min($maxResults, 1000)); - - $cacheKey = 'expert_finder_' . md5($keyword . $maxResults . $minYear . $source); + $cacheKey = 'expert_finder_' . md5($keyword . $page . $perPage . $minYear . $source); $cached = Cache::get($cacheKey); if ($cached) { return jsonSuccess($cached); @@ -48,45 +50,200 @@ class ExpertFinder extends Base try { if ($source === 'pmc') { - $result = $this->searchViaPMC($keyword, $maxResults, $minYear); + $result = $this->searchViaPMC($keyword, $perPage, $minYear, $page); } else { - $result = $this->searchViaPubMed($keyword, $maxResults, $minYear); + $result = $this->searchViaPubMed($keyword, $perPage, $minYear, $page); } } catch (\Exception $e) { return jsonError('Search failed: ' . $e->getMessage()); } + $saveResult = $this->saveExperts($result['experts'], $keyword, $source); + $result['saved_new'] = $saveResult['inserted']; + $result['saved_exist'] = $saveResult['existing']; + Cache::set($cacheKey, $result, 3600); return jsonSuccess($result); } + /** + * Get experts from local database + * Params: + * field - filter by field keyword (searches t_expert_field) + * major_id - filter by major_id (searches t_expert_field) + * state - filter by state (0-5), -1 for all + * keyword - search name/email/affiliation + * no_recent - if 1, exclude experts promoted within last N days (default 30) + * recent_days - days threshold for no_recent filter, default 30 + * page - page number, default 1 + * per_page - items per page, default 20 + * min_experts - auto-fetch threshold, default 50 + */ + public function getList() + { + $field = trim($this->request->param('field', '')); + $majorId = intval($this->request->param('major_id', 0)); + $state = $this->request->param('state', '-1'); + $keyword = trim($this->request->param('keyword', '')); + $noRecent = intval($this->request->param('no_recent', 0)); + $recentDays = max(1, intval($this->request->param('recent_days', 30))); + $page = max(1, intval($this->request->param('page', 1))); + $perPage = max(1, min(intval($this->request->param('per_page', 20)), 100)); + $minExperts = max(0, intval($this->request->param('min_experts', 50))); + + $query = Db::name('expert')->alias('e'); + $needJoin = ($field !== '' || $majorId > 0); + + if ($needJoin) { + $query->join('t_expert_field ef', 'ef.expert_id = e.expert_id AND ef.state = 0', 'inner'); + if ($field !== '') { + $query->where('ef.field', 'like', '%' . $field . '%'); + } + if ($majorId > 0) { + $query->where('ef.major_id', $majorId); + } + $query->group('e.expert_id'); + } + + if ($state !== '-1' && $state !== '') { + $query->where('e.state', intval($state)); + } + if ($keyword !== '') { + $query->where('e.name|e.email|e.affiliation', 'like', '%' . $keyword . '%'); + } + if ($noRecent) { + $cutoff = time() - ($recentDays * 86400); + $query->where(function ($q) use ($cutoff) { + $q->where('e.ltime', 0)->whereOr('e.ltime', '<', $cutoff); + }); + } + + $countQuery = clone $query; + $total = $countQuery->count('distinct e.expert_id'); + + $list = $query + ->field('e.*') + ->order('e.ctime desc') + ->page($page, $perPage) + ->select(); + + foreach ($list as &$item) { + $item['fields'] = Db::name('expert_field') + ->where('expert_id', $item['expert_id']) + ->where('state', 0) + ->column('field'); + } + + $fetching = false; + if ($field !== '' && $total < $minExperts && $minExperts > 0) { + $this->triggerBackgroundFetch($field); + $fetching = true; + } + + return jsonSuccess([ + 'list' => $list, + 'total' => $total, + 'page' => $page, + 'per_page' => $perPage, + 'total_pages' => $total > 0 ? ceil($total / $perPage) : 0, + 'fetching' => $fetching, + ]); + } + + /** + * Get all fields associated with an expert + */ + public function getExpertFields() + { + $expertId = intval($this->request->param('expert_id', 0)); + if (!$expertId) { + return jsonError('expert_id is required'); + } + + $fields = Db::name('expert_field') + ->where('expert_id', $expertId) + ->where('state', 0) + ->select(); + + return jsonSuccess($fields); + } + + /** + * Update expert state + * Params: + * expert_id - single id or comma-separated ids + * state - new state (0-5) + */ + public function updateState() + { + $expertId = $this->request->param('expert_id', ''); + $state = intval($this->request->param('state', 0)); + + if (empty($expertId)) { + return jsonError('expert_id is required'); + } + if ($state < 0 || $state > 5) { + return jsonError('state must be 0-5'); + } + + $ids = array_map('intval', explode(',', $expertId)); + $count = Db::name('expert')->where('expert_id', 'in', $ids)->update(['state' => $state]); + + return jsonSuccess(['updated' => $count]); + } + + /** + * Delete expert (soft: set state=5 blacklist, or hard delete) + * Params: + * expert_id - single id or comma-separated ids + * hard - 1 for hard delete, default 0 (blacklist) + */ + public function deleteExpert() + { + $expertId = $this->request->param('expert_id', ''); + $hard = intval($this->request->param('hard', 0)); + + if (empty($expertId)) { + return jsonError('expert_id is required'); + } + + $ids = array_map('intval', explode(',', $expertId)); + + if ($hard) { + $count = Db::name('expert')->where('expert_id', 'in', $ids)->delete(); + } else { + $count = Db::name('expert')->where('expert_id', 'in', $ids)->update(['state' => 5]); + } + + return jsonSuccess(['affected' => $count]); + } + /** * Export search results to Excel - * Same params as search() + * Same params as search(), exports current page results */ public function export() { - $keyword = trim($this->request->param('keyword', '')); - $maxResults = intval($this->request->param('max_results', 200)); - $minYear = intval($this->request->param('min_year', date('Y') - 3)); - $source = $this->request->param('source', 'pubmed'); + $keyword = trim($this->request->param('keyword', '')); + $page = max(1, intval($this->request->param('page', 1))); + $perPage = max(10, min(intval($this->request->param('per_page', 100)), 100)); + $minYear = intval($this->request->param('min_year', date('Y') - 3)); + $source = $this->request->param('source', 'pubmed'); if (empty($keyword)) { return jsonError('keyword is required'); } - $maxResults = max(10, min($maxResults, 1000)); - - $cacheKey = 'expert_finder_' . md5($keyword . $maxResults . $minYear . $source); + $cacheKey = 'expert_finder_' . md5($keyword . $page . $perPage . $minYear . $source); $cached = Cache::get($cacheKey); if (!$cached) { try { if ($source === 'pmc') { - $cached = $this->searchViaPMC($keyword, $maxResults, $minYear); + $cached = $this->searchViaPMC($keyword, $perPage, $minYear, $page); } else { - $cached = $this->searchViaPubMed($keyword, $maxResults, $minYear); + $cached = $this->searchViaPubMed($keyword, $perPage, $minYear, $page); } Cache::set($cacheKey, $cached, 3600); } catch (\Exception $e) { @@ -98,7 +255,7 @@ class ExpertFinder extends Base return jsonError('No experts found to export'); } - return $this->generateExcel($cached['experts'], $keyword); + return $this->generateExcel($cached['experts'], $keyword, $page); } /** @@ -117,81 +274,215 @@ class ExpertFinder extends Base return jsonSuccess(['msg' => 'Cache cleared']); } - // ==================== PubMed Search ==================== + // ==================== Cron / Auto Fetch ==================== - private function searchViaPubMed($keyword, $maxResults, $minYear) + /** + * Cron job: daily fetch experts for given keywords + * Params: + * keywords - comma-separated keywords (e.g. "biomedical engineering,tissue engineering") + * source - pubmed or pmc, default pubmed + * per_page - articles per page, default 100 + * min_year - default current-3 + * + * Uses cache to remember which page was last fetched per keyword, + * so each cron run fetches the next page automatically. + */ + public function cronFetch() { - $ids = $this->esearch('pubmed', $keyword, $maxResults, $minYear); - if (empty($ids)) { - return ['experts' => [], 'total' => 0, 'articles_scanned' => 0, 'source' => 'pubmed']; + $keywordsStr = trim($this->request->param('keywords', '')); + $source = $this->request->param('source', 'pubmed'); + $perPage = max(10, min(intval($this->request->param('per_page', 100)), 100)); + $minYear = intval($this->request->param('min_year', date('Y') - 3)); + + if (empty($keywordsStr)) { + return jsonError('keywords is required'); } - $allAuthors = []; - $batches = array_chunk($ids, 200); - foreach ($batches as $batch) { - $xml = $this->efetch('pubmed', $batch); - $authors = $this->parsePubMedXml($xml); - $allAuthors = array_merge($allAuthors, $authors); - usleep(340000); + set_time_limit(0); + $keywords = array_map('trim', explode(',', $keywordsStr)); + $report = []; + + foreach ($keywords as $kw) { + if (empty($kw)) { + continue; + } + + $fetchLog = $this->getFetchLog($kw, $source); + $page = $fetchLog['last_page'] + 1; + + try { + if ($source === 'pmc') { + $result = $this->searchViaPMC($kw, $perPage, $minYear, $page); + } else { + $result = $this->searchViaPubMed($kw, $perPage, $minYear, $page); + } + + $saveResult = $this->saveExperts($result['experts'], $kw, $source); + + $nextPage = $result['has_more'] ? $page : 0; + $totalPages = isset($result['total_pages']) ? $result['total_pages'] : 0; + $this->updateFetchLog($kw, $source, $nextPage, $totalPages); + + $report[] = [ + 'keyword' => $kw, + 'page' => $page, + 'experts_found' => $result['total'], + 'saved_new' => $saveResult['inserted'], + 'saved_exist' => $saveResult['existing'], + 'field_enriched' => $saveResult['field_enriched'], + 'has_more' => $result['has_more'], + ]; + } catch (\Exception $e) { + $report[] = [ + 'keyword' => $kw, + 'page' => $page, + 'error' => $e->getMessage(), + ]; + } + + sleep(2); } - $experts = $this->aggregateExperts($allAuthors); + return jsonSuccess(['report' => $report]); + } + + /** + * Trigger a background fetch for a specific field via queue + */ + private function triggerBackgroundFetch($field) + { + $lockKey = 'fetch_lock_' . md5($field); + if (Cache::get($lockKey)) { + return; + } + Cache::set($lockKey, 1, 300); + + \think\Queue::push('app\api\job\FetchExperts', [ + 'field' => $field, + 'source' => 'pubmed', + 'per_page' => 100, + 'min_year' => date('Y') - 3, + ], 'FetchExperts'); + } + + /** + * Internal method: run a fetch for a single keyword (used by both cron and queue job) + */ + public function doFetchForField($field, $source = 'pubmed', $perPage = 100, $minYear = null) + { + if ($minYear === null) { + $minYear = date('Y') - 3; + } + + $fetchLog = $this->getFetchLog($field, $source); + $page = $fetchLog['last_page'] + 1; + + if ($source === 'pmc') { + $result = $this->searchViaPMC($field, $perPage, $minYear, $page); + } else { + $result = $this->searchViaPubMed($field, $perPage, $minYear, $page); + } + + $saveResult = $this->saveExperts($result['experts'], $field, $source); + + $nextPage = $result['has_more'] ? $page : 0; + $totalPages = isset($result['total_pages']) ? $result['total_pages'] : 0; + $this->updateFetchLog($field, $source, $nextPage, $totalPages); return [ - 'experts' => $experts, - 'total' => count($experts), - 'articles_scanned' => count($ids), - 'source' => 'pubmed', + 'keyword' => $field, + 'page' => $page, + 'experts_found' => $result['total'], + 'saved_new' => $saveResult['inserted'], + 'saved_exist' => $saveResult['existing'], + 'field_enriched' => $saveResult['field_enriched'], + 'has_more' => $result['has_more'], ]; } - // ==================== PMC Search ==================== + // ==================== PubMed Search ==================== - private function searchViaPMC($keyword, $maxResults, $minYear) + private function searchViaPubMed($keyword, $perPage, $minYear, $page = 1) { - $ids = $this->esearch('pmc', $keyword, $maxResults, $minYear); + set_time_limit(600); + + $searchResult = $this->esearch('pubmed', $keyword, $perPage, $minYear, $page); + $ids = $searchResult['ids']; + $totalArticles = $searchResult['total']; + if (empty($ids)) { - return ['experts' => [], 'total' => 0, 'articles_scanned' => 0, 'source' => 'pmc']; + return $this->buildPagedResult([], 0, 0, $totalArticles, $page, $perPage, 'pubmed'); } $allAuthors = []; - $batches = array_chunk($ids, 20); + $batches = array_chunk($ids, 50); foreach ($batches as $batch) { - $xml = $this->efetch('pmc', $batch); - $authors = $this->parsePMCXml($xml); - $allAuthors = array_merge($allAuthors, $authors); + $xml = $this->efetchWithRetry('pubmed', $batch); + if ($xml) { + $authors = $this->parsePubMedXml($xml); + $allAuthors = array_merge($allAuthors, $authors); + } usleep(400000); } $experts = $this->aggregateExperts($allAuthors); - return [ - 'experts' => $experts, - 'total' => count($experts), - 'articles_scanned' => count($ids), - 'source' => 'pmc', - ]; + return $this->buildPagedResult($experts, count($experts), count($ids), $totalArticles, $page, $perPage, 'pubmed'); + } + + // ==================== PMC Search ==================== + + private function searchViaPMC($keyword, $perPage, $minYear, $page = 1) + { + set_time_limit(600); + + $searchResult = $this->esearch('pmc', $keyword, $perPage, $minYear, $page); + $ids = $searchResult['ids']; + $totalArticles = $searchResult['total']; + + if (empty($ids)) { + return $this->buildPagedResult([], 0, 0, $totalArticles, $page, $perPage, 'pmc'); + } + + $allAuthors = []; + $batches = array_chunk($ids, 5); + foreach ($batches as $batch) { + $xml = $this->efetchWithRetry('pmc', $batch); + if ($xml) { + $authors = $this->parsePMCXml($xml); + $allAuthors = array_merge($allAuthors, $authors); + } + usleep(500000); + } + + $experts = $this->aggregateExperts($allAuthors); + + return $this->buildPagedResult($experts, count($experts), count($ids), $totalArticles, $page, $perPage, 'pmc'); } // ==================== NCBI API Calls ==================== - private function esearch($db, $keyword, $maxResults, $minYear) + private function esearch($db, $keyword, $perPage, $minYear, $page = 1) { - $term = $keyword . ' AND ' . $minYear . ':' . date('Y') . '[pdat]'; + $term = $keyword . ' AND ' . $minYear . ':' . date('Y') . '[pdat]'; + $retstart = ($page - 1) * $perPage; $response = $this->httpClient->get($this->ncbiBaseUrl . 'esearch.fcgi', [ 'query' => [ - 'db' => $db, - 'term' => $term, - 'retmax' => $maxResults, - 'retmode' => 'json', - 'sort' => 'relevance', + 'db' => $db, + 'term' => $term, + 'retstart' => $retstart, + 'retmax' => $perPage, + 'retmode' => 'json', + 'sort' => 'relevance', ], ]); - $data = json_decode($response->getBody()->getContents(), true); + $data = json_decode($response->getBody()->getContents(), true); + $ids = $data['esearchresult']['idlist'] ?? []; + $total = intval($data['esearchresult']['count'] ?? 0); - return $data['esearchresult']['idlist'] ?? []; + return ['ids' => $ids, 'total' => $total]; } private function efetch($db, $ids) @@ -207,6 +498,36 @@ class ExpertFinder extends Base return $response->getBody()->getContents(); } + private function efetchWithRetry($db, $ids, $maxRetries = 3) + { + for ($attempt = 1; $attempt <= $maxRetries; $attempt++) { + try { + return $this->efetch($db, $ids); + } catch (\Exception $e) { + if ($attempt === $maxRetries) { + if (count($ids) > 1) { + $half = ceil(count($ids) / 2); + $firstHalf = array_slice($ids, 0, $half); + $secondHalf = array_slice($ids, $half); + $xml1 = $this->efetchWithRetry($db, $firstHalf, 2); + $xml2 = $this->efetchWithRetry($db, $secondHalf, 2); + return $this->mergeXml($xml1, $xml2); + } + return null; + } + sleep($attempt * 2); + } + } + return null; + } + + private function mergeXml($xml1, $xml2) + { + if (empty($xml1)) return $xml2; + if (empty($xml2)) return $xml1; + return $xml1 . "\n" . $xml2; + } + // ==================== PubMed XML Parsing ==================== private function parsePubMedXml($xmlString) @@ -434,6 +755,25 @@ class ExpertFinder extends Base return $results; } + // ==================== Pagination ==================== + + private function buildPagedResult($experts, $expertCount, $articlesScanned, $totalArticles, $page, $perPage, $source) + { + $totalPages = $totalArticles > 0 ? ceil($totalArticles / $perPage) : 0; + + return [ + 'experts' => $experts, + 'total' => $expertCount, + 'articles_scanned' => $articlesScanned, + 'total_articles' => $totalArticles, + 'page' => $page, + 'per_page' => $perPage, + 'total_pages' => $totalPages, + 'has_more' => $page < $totalPages, + 'source' => $source, + ]; + } + // ==================== Aggregation ==================== private function aggregateExperts($authorRecords) @@ -482,21 +822,23 @@ class ExpertFinder extends Base // ==================== Excel Export ==================== - private function generateExcel($experts, $keyword) + private function generateExcel($experts, $keyword, $page = 1) { - $spreadsheet = new \PhpOffice\PhpSpreadsheet\Spreadsheet(); - $sheet = $spreadsheet->getActiveSheet(); + vendor("PHPExcel.PHPExcel"); + + $objPHPExcel = new \PHPExcel(); + $sheet = $objPHPExcel->getActiveSheet(); $sheet->setTitle('Experts'); - $headers = ['#', 'Name', 'Email', 'Affiliation', 'Paper Count', 'Representative Papers']; + $headers = ['A' => '#', 'B' => 'Name', 'C' => 'Email', 'D' => 'Affiliation', 'E' => 'Paper Count', 'F' => 'Representative Papers']; foreach ($headers as $col => $header) { - $sheet->setCellValueByColumnAndRow($col + 1, 1, $header); + $sheet->setCellValue($col . '1', $header); } $headerStyle = [ 'font' => ['bold' => true, 'color' => ['rgb' => 'FFFFFF']], - 'fill' => ['fillType' => \PhpOffice\PhpSpreadsheet\Style\Fill::FILL_SOLID, 'startColor' => ['rgb' => '4472C4']], - 'alignment' => ['horizontal' => \PhpOffice\PhpSpreadsheet\Style\Alignment::HORIZONTAL_CENTER], + 'fill' => ['type' => \PHPExcel_Style_Fill::FILL_SOLID, 'startcolor' => ['rgb' => '4472C4']], + 'alignment' => ['horizontal' => \PHPExcel_Style_Alignment::HORIZONTAL_CENTER], ]; $sheet->getStyle('A1:F1')->applyFromArray($headerStyle); @@ -506,12 +848,12 @@ class ExpertFinder extends Base return $p['title']; }, $expert['papers']); - $sheet->setCellValueByColumnAndRow(1, $row, $i + 1); - $sheet->setCellValueByColumnAndRow(2, $row, $expert['name']); - $sheet->setCellValueByColumnAndRow(3, $row, $expert['email']); - $sheet->setCellValueByColumnAndRow(4, $row, $expert['affiliation']); - $sheet->setCellValueByColumnAndRow(5, $row, $expert['paper_count']); - $sheet->setCellValueByColumnAndRow(6, $row, implode("\n", $paperTitles)); + $sheet->setCellValue('A' . $row, $i + 1); + $sheet->setCellValue('B' . $row, $expert['name']); + $sheet->setCellValue('C' . $row, $expert['email']); + $sheet->setCellValue('D' . $row, $expert['affiliation']); + $sheet->setCellValue('E' . $row, $expert['paper_count']); + $sheet->setCellValue('F' . $row, implode("\n", $paperTitles)); } $sheet->getColumnDimension('A')->setWidth(6); @@ -521,7 +863,7 @@ class ExpertFinder extends Base $sheet->getColumnDimension('E')->setWidth(12); $sheet->getColumnDimension('F')->setWidth(60); - $filename = 'experts_' . preg_replace('/[^a-zA-Z0-9]/', '_', $keyword) . '_' . date('Ymd_His') . '.xlsx'; + $filename = 'experts_' . preg_replace('/[^a-zA-Z0-9]/', '_', $keyword) . '_p' . $page . '_' . date('Ymd_His') . '.xlsx'; $filepath = ROOT_PATH . 'public' . DS . 'exports' . DS . $filename; $dir = ROOT_PATH . 'public' . DS . 'exports'; @@ -529,7 +871,7 @@ class ExpertFinder extends Base mkdir($dir, 0777, true); } - $writer = new \PhpOffice\PhpSpreadsheet\Writer\Xlsx($spreadsheet); + $writer = \PHPExcel_IOFactory::createWriter($objPHPExcel, 'Excel2007'); $writer->save($filepath); return jsonSuccess([ @@ -539,6 +881,119 @@ class ExpertFinder extends Base ]); } + // ==================== Database Storage ==================== + + private function saveExperts($experts, $field, $source) + { + $inserted = 0; + $existing = 0; + $fieldEnrich = 0; + + foreach ($experts as $expert) { + $email = strtolower(trim($expert['email'])); + if (empty($email)) { + continue; + } + + $exists = Db::name('expert')->where('email', $email)->find(); + + if ($exists) { + $existing++; + $fieldEnrich += $this->enrichExpertField($exists['expert_id'], $field); + continue; + } + + $insert = [ + 'name' => mb_substr($expert['name'], 0, 255), + 'email' => mb_substr($email, 0, 128), + 'affiliation' => mb_substr($expert['affiliation'], 0, 128), + 'source' => mb_substr($source, 0, 128), + 'ctime' => time(), + 'ltime' => 0, + 'state' => 0, + ]; + + try { + $expertId = Db::name('expert')->insertGetId($insert); + $this->enrichExpertField($expertId, $field); + $inserted++; + } catch (\Exception $e) { + $existing++; + } + } + + return ['inserted' => $inserted, 'existing' => $existing, 'field_enriched' => $fieldEnrich]; + } + + // ==================== Fetch Log (t_expert_fetch) ==================== + + private function getFetchLog($field, $source) + { + $log = Db::name('expert_fetch') + ->where('field', $field) + ->where('source', $source) + ->find(); + + if (!$log) { + return ['last_page' => 0, 'total_pages' => 0, 'last_time' => 0]; + } + + return $log; + } + + private function updateFetchLog($field, $source, $lastPage, $totalPages) + { + $exists = Db::name('expert_fetch') + ->where('field', $field) + ->where('source', $source) + ->find(); + + if ($exists) { + Db::name('expert_fetch') + ->where('expert_fetch_id', $exists['expert_fetch_id']) + ->update([ + 'last_page' => $lastPage, + 'total_pages' => $totalPages, + 'last_time' => time(), + ]); + } else { + Db::name('expert_fetch')->insert([ + 'field' => mb_substr($field, 0, 128), + 'source' => mb_substr($source, 0, 128), + 'last_page' => $lastPage, + 'total_pages' => $totalPages, + 'last_time' => time(), + ]); + } + } + + private function enrichExpertField($expertId, $field) + { + $field = trim($field); + if (empty($field)) { + return 0; + } + + $exists = Db::name('expert_field') + ->where('expert_id', $expertId) + ->where('field', $field) + ->where('state', 0) + ->find(); + + if ($exists) { + return 0; + } + + Db::name('expert_field')->insert([ + 'expert_id' => $expertId, + 'major_id' => 0, + 'field' => mb_substr($field, 0, 128), + 'state' => 0, + ]); + + return 1; + } + // ==================== Helper Methods ==================== private function extractEmailFromText($text) diff --git a/application/api/job/FetchExperts.php b/application/api/job/FetchExperts.php new file mode 100644 index 0000000..df3a0ae --- /dev/null +++ b/application/api/job/FetchExperts.php @@ -0,0 +1,27 @@ +doFetchForField( + $data['field'], + $data['source'] ?? 'pubmed', + $data['per_page'] ?? 100, + $data['min_year'] ?? null + ); + Log::info('FetchExperts completed: ' . json_encode($result)); + } catch (\Exception $e) { + Log::error('FetchExperts failed: ' . $e->getMessage()); + } + + $job->delete(); + } +}