agent功能
This commit is contained in:
@@ -3,6 +3,7 @@
|
||||
namespace app\api\controller;
|
||||
|
||||
use think\Cache;
|
||||
use think\Db;
|
||||
use GuzzleHttp\Client;
|
||||
|
||||
class ExpertFinder extends Base
|
||||
@@ -14,8 +15,9 @@ class ExpertFinder extends Base
|
||||
{
|
||||
parent::__construct($request);
|
||||
$this->httpClient = new Client([
|
||||
'timeout' => 60,
|
||||
'verify' => false,
|
||||
'timeout' => 180,
|
||||
'connect_timeout' => 15,
|
||||
'verify' => false,
|
||||
]);
|
||||
}
|
||||
|
||||
@@ -23,24 +25,24 @@ class ExpertFinder extends Base
|
||||
* Main search endpoint
|
||||
* Params:
|
||||
* keyword - search term (e.g. "biomedical engineering")
|
||||
* max_results - max articles to scan, default 200, max 1000
|
||||
* page - page number, default 1
|
||||
* per_page - articles per page, default 100, max 100
|
||||
* min_year - earliest publication year, default current-3
|
||||
* source - "pubmed" (fast, email from affiliation) or "pmc" (slower, structured email)
|
||||
*/
|
||||
public function search()
|
||||
{
|
||||
$keyword = trim($this->request->param('keyword', ''));
|
||||
$maxResults = intval($this->request->param('max_results', 200));
|
||||
$minYear = intval($this->request->param('min_year', date('Y') - 3));
|
||||
$source = $this->request->param('source', 'pubmed');
|
||||
$keyword = trim($this->request->param('keyword', ''));
|
||||
$page = max(1, intval($this->request->param('page', 1)));
|
||||
$perPage = max(10, min(intval($this->request->param('per_page', 100)), 100));
|
||||
$minYear = intval($this->request->param('min_year', date('Y') - 3));
|
||||
$source = $this->request->param('source', 'pubmed');
|
||||
|
||||
if (empty($keyword)) {
|
||||
return jsonError('keyword is required');
|
||||
}
|
||||
|
||||
$maxResults = max(10, min($maxResults, 1000));
|
||||
|
||||
$cacheKey = 'expert_finder_' . md5($keyword . $maxResults . $minYear . $source);
|
||||
$cacheKey = 'expert_finder_' . md5($keyword . $page . $perPage . $minYear . $source);
|
||||
$cached = Cache::get($cacheKey);
|
||||
if ($cached) {
|
||||
return jsonSuccess($cached);
|
||||
@@ -48,45 +50,200 @@ class ExpertFinder extends Base
|
||||
|
||||
try {
|
||||
if ($source === 'pmc') {
|
||||
$result = $this->searchViaPMC($keyword, $maxResults, $minYear);
|
||||
$result = $this->searchViaPMC($keyword, $perPage, $minYear, $page);
|
||||
} else {
|
||||
$result = $this->searchViaPubMed($keyword, $maxResults, $minYear);
|
||||
$result = $this->searchViaPubMed($keyword, $perPage, $minYear, $page);
|
||||
}
|
||||
} catch (\Exception $e) {
|
||||
return jsonError('Search failed: ' . $e->getMessage());
|
||||
}
|
||||
|
||||
$saveResult = $this->saveExperts($result['experts'], $keyword, $source);
|
||||
$result['saved_new'] = $saveResult['inserted'];
|
||||
$result['saved_exist'] = $saveResult['existing'];
|
||||
|
||||
Cache::set($cacheKey, $result, 3600);
|
||||
|
||||
return jsonSuccess($result);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get experts from local database
|
||||
* Params:
|
||||
* field - filter by field keyword (searches t_expert_field)
|
||||
* major_id - filter by major_id (searches t_expert_field)
|
||||
* state - filter by state (0-5), -1 for all
|
||||
* keyword - search name/email/affiliation
|
||||
* no_recent - if 1, exclude experts promoted within last N days (default 30)
|
||||
* recent_days - days threshold for no_recent filter, default 30
|
||||
* page - page number, default 1
|
||||
* per_page - items per page, default 20
|
||||
* min_experts - auto-fetch threshold, default 50
|
||||
*/
|
||||
public function getList()
|
||||
{
|
||||
$field = trim($this->request->param('field', ''));
|
||||
$majorId = intval($this->request->param('major_id', 0));
|
||||
$state = $this->request->param('state', '-1');
|
||||
$keyword = trim($this->request->param('keyword', ''));
|
||||
$noRecent = intval($this->request->param('no_recent', 0));
|
||||
$recentDays = max(1, intval($this->request->param('recent_days', 30)));
|
||||
$page = max(1, intval($this->request->param('page', 1)));
|
||||
$perPage = max(1, min(intval($this->request->param('per_page', 20)), 100));
|
||||
$minExperts = max(0, intval($this->request->param('min_experts', 50)));
|
||||
|
||||
$query = Db::name('expert')->alias('e');
|
||||
$needJoin = ($field !== '' || $majorId > 0);
|
||||
|
||||
if ($needJoin) {
|
||||
$query->join('t_expert_field ef', 'ef.expert_id = e.expert_id AND ef.state = 0', 'inner');
|
||||
if ($field !== '') {
|
||||
$query->where('ef.field', 'like', '%' . $field . '%');
|
||||
}
|
||||
if ($majorId > 0) {
|
||||
$query->where('ef.major_id', $majorId);
|
||||
}
|
||||
$query->group('e.expert_id');
|
||||
}
|
||||
|
||||
if ($state !== '-1' && $state !== '') {
|
||||
$query->where('e.state', intval($state));
|
||||
}
|
||||
if ($keyword !== '') {
|
||||
$query->where('e.name|e.email|e.affiliation', 'like', '%' . $keyword . '%');
|
||||
}
|
||||
if ($noRecent) {
|
||||
$cutoff = time() - ($recentDays * 86400);
|
||||
$query->where(function ($q) use ($cutoff) {
|
||||
$q->where('e.ltime', 0)->whereOr('e.ltime', '<', $cutoff);
|
||||
});
|
||||
}
|
||||
|
||||
$countQuery = clone $query;
|
||||
$total = $countQuery->count('distinct e.expert_id');
|
||||
|
||||
$list = $query
|
||||
->field('e.*')
|
||||
->order('e.ctime desc')
|
||||
->page($page, $perPage)
|
||||
->select();
|
||||
|
||||
foreach ($list as &$item) {
|
||||
$item['fields'] = Db::name('expert_field')
|
||||
->where('expert_id', $item['expert_id'])
|
||||
->where('state', 0)
|
||||
->column('field');
|
||||
}
|
||||
|
||||
$fetching = false;
|
||||
if ($field !== '' && $total < $minExperts && $minExperts > 0) {
|
||||
$this->triggerBackgroundFetch($field);
|
||||
$fetching = true;
|
||||
}
|
||||
|
||||
return jsonSuccess([
|
||||
'list' => $list,
|
||||
'total' => $total,
|
||||
'page' => $page,
|
||||
'per_page' => $perPage,
|
||||
'total_pages' => $total > 0 ? ceil($total / $perPage) : 0,
|
||||
'fetching' => $fetching,
|
||||
]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all fields associated with an expert
|
||||
*/
|
||||
public function getExpertFields()
|
||||
{
|
||||
$expertId = intval($this->request->param('expert_id', 0));
|
||||
if (!$expertId) {
|
||||
return jsonError('expert_id is required');
|
||||
}
|
||||
|
||||
$fields = Db::name('expert_field')
|
||||
->where('expert_id', $expertId)
|
||||
->where('state', 0)
|
||||
->select();
|
||||
|
||||
return jsonSuccess($fields);
|
||||
}
|
||||
|
||||
/**
|
||||
* Update expert state
|
||||
* Params:
|
||||
* expert_id - single id or comma-separated ids
|
||||
* state - new state (0-5)
|
||||
*/
|
||||
public function updateState()
|
||||
{
|
||||
$expertId = $this->request->param('expert_id', '');
|
||||
$state = intval($this->request->param('state', 0));
|
||||
|
||||
if (empty($expertId)) {
|
||||
return jsonError('expert_id is required');
|
||||
}
|
||||
if ($state < 0 || $state > 5) {
|
||||
return jsonError('state must be 0-5');
|
||||
}
|
||||
|
||||
$ids = array_map('intval', explode(',', $expertId));
|
||||
$count = Db::name('expert')->where('expert_id', 'in', $ids)->update(['state' => $state]);
|
||||
|
||||
return jsonSuccess(['updated' => $count]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete expert (soft: set state=5 blacklist, or hard delete)
|
||||
* Params:
|
||||
* expert_id - single id or comma-separated ids
|
||||
* hard - 1 for hard delete, default 0 (blacklist)
|
||||
*/
|
||||
public function deleteExpert()
|
||||
{
|
||||
$expertId = $this->request->param('expert_id', '');
|
||||
$hard = intval($this->request->param('hard', 0));
|
||||
|
||||
if (empty($expertId)) {
|
||||
return jsonError('expert_id is required');
|
||||
}
|
||||
|
||||
$ids = array_map('intval', explode(',', $expertId));
|
||||
|
||||
if ($hard) {
|
||||
$count = Db::name('expert')->where('expert_id', 'in', $ids)->delete();
|
||||
} else {
|
||||
$count = Db::name('expert')->where('expert_id', 'in', $ids)->update(['state' => 5]);
|
||||
}
|
||||
|
||||
return jsonSuccess(['affected' => $count]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Export search results to Excel
|
||||
* Same params as search()
|
||||
* Same params as search(), exports current page results
|
||||
*/
|
||||
public function export()
|
||||
{
|
||||
$keyword = trim($this->request->param('keyword', ''));
|
||||
$maxResults = intval($this->request->param('max_results', 200));
|
||||
$minYear = intval($this->request->param('min_year', date('Y') - 3));
|
||||
$source = $this->request->param('source', 'pubmed');
|
||||
$keyword = trim($this->request->param('keyword', ''));
|
||||
$page = max(1, intval($this->request->param('page', 1)));
|
||||
$perPage = max(10, min(intval($this->request->param('per_page', 100)), 100));
|
||||
$minYear = intval($this->request->param('min_year', date('Y') - 3));
|
||||
$source = $this->request->param('source', 'pubmed');
|
||||
|
||||
if (empty($keyword)) {
|
||||
return jsonError('keyword is required');
|
||||
}
|
||||
|
||||
$maxResults = max(10, min($maxResults, 1000));
|
||||
|
||||
$cacheKey = 'expert_finder_' . md5($keyword . $maxResults . $minYear . $source);
|
||||
$cacheKey = 'expert_finder_' . md5($keyword . $page . $perPage . $minYear . $source);
|
||||
$cached = Cache::get($cacheKey);
|
||||
|
||||
if (!$cached) {
|
||||
try {
|
||||
if ($source === 'pmc') {
|
||||
$cached = $this->searchViaPMC($keyword, $maxResults, $minYear);
|
||||
$cached = $this->searchViaPMC($keyword, $perPage, $minYear, $page);
|
||||
} else {
|
||||
$cached = $this->searchViaPubMed($keyword, $maxResults, $minYear);
|
||||
$cached = $this->searchViaPubMed($keyword, $perPage, $minYear, $page);
|
||||
}
|
||||
Cache::set($cacheKey, $cached, 3600);
|
||||
} catch (\Exception $e) {
|
||||
@@ -98,7 +255,7 @@ class ExpertFinder extends Base
|
||||
return jsonError('No experts found to export');
|
||||
}
|
||||
|
||||
return $this->generateExcel($cached['experts'], $keyword);
|
||||
return $this->generateExcel($cached['experts'], $keyword, $page);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -117,81 +274,215 @@ class ExpertFinder extends Base
|
||||
return jsonSuccess(['msg' => 'Cache cleared']);
|
||||
}
|
||||
|
||||
// ==================== PubMed Search ====================
|
||||
// ==================== Cron / Auto Fetch ====================
|
||||
|
||||
private function searchViaPubMed($keyword, $maxResults, $minYear)
|
||||
/**
|
||||
* Cron job: daily fetch experts for given keywords
|
||||
* Params:
|
||||
* keywords - comma-separated keywords (e.g. "biomedical engineering,tissue engineering")
|
||||
* source - pubmed or pmc, default pubmed
|
||||
* per_page - articles per page, default 100
|
||||
* min_year - default current-3
|
||||
*
|
||||
* Uses cache to remember which page was last fetched per keyword,
|
||||
* so each cron run fetches the next page automatically.
|
||||
*/
|
||||
public function cronFetch()
|
||||
{
|
||||
$ids = $this->esearch('pubmed', $keyword, $maxResults, $minYear);
|
||||
if (empty($ids)) {
|
||||
return ['experts' => [], 'total' => 0, 'articles_scanned' => 0, 'source' => 'pubmed'];
|
||||
$keywordsStr = trim($this->request->param('keywords', ''));
|
||||
$source = $this->request->param('source', 'pubmed');
|
||||
$perPage = max(10, min(intval($this->request->param('per_page', 100)), 100));
|
||||
$minYear = intval($this->request->param('min_year', date('Y') - 3));
|
||||
|
||||
if (empty($keywordsStr)) {
|
||||
return jsonError('keywords is required');
|
||||
}
|
||||
|
||||
$allAuthors = [];
|
||||
$batches = array_chunk($ids, 200);
|
||||
foreach ($batches as $batch) {
|
||||
$xml = $this->efetch('pubmed', $batch);
|
||||
$authors = $this->parsePubMedXml($xml);
|
||||
$allAuthors = array_merge($allAuthors, $authors);
|
||||
usleep(340000);
|
||||
set_time_limit(0);
|
||||
$keywords = array_map('trim', explode(',', $keywordsStr));
|
||||
$report = [];
|
||||
|
||||
foreach ($keywords as $kw) {
|
||||
if (empty($kw)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$fetchLog = $this->getFetchLog($kw, $source);
|
||||
$page = $fetchLog['last_page'] + 1;
|
||||
|
||||
try {
|
||||
if ($source === 'pmc') {
|
||||
$result = $this->searchViaPMC($kw, $perPage, $minYear, $page);
|
||||
} else {
|
||||
$result = $this->searchViaPubMed($kw, $perPage, $minYear, $page);
|
||||
}
|
||||
|
||||
$saveResult = $this->saveExperts($result['experts'], $kw, $source);
|
||||
|
||||
$nextPage = $result['has_more'] ? $page : 0;
|
||||
$totalPages = isset($result['total_pages']) ? $result['total_pages'] : 0;
|
||||
$this->updateFetchLog($kw, $source, $nextPage, $totalPages);
|
||||
|
||||
$report[] = [
|
||||
'keyword' => $kw,
|
||||
'page' => $page,
|
||||
'experts_found' => $result['total'],
|
||||
'saved_new' => $saveResult['inserted'],
|
||||
'saved_exist' => $saveResult['existing'],
|
||||
'field_enriched' => $saveResult['field_enriched'],
|
||||
'has_more' => $result['has_more'],
|
||||
];
|
||||
} catch (\Exception $e) {
|
||||
$report[] = [
|
||||
'keyword' => $kw,
|
||||
'page' => $page,
|
||||
'error' => $e->getMessage(),
|
||||
];
|
||||
}
|
||||
|
||||
sleep(2);
|
||||
}
|
||||
|
||||
$experts = $this->aggregateExperts($allAuthors);
|
||||
return jsonSuccess(['report' => $report]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Trigger a background fetch for a specific field via queue
|
||||
*/
|
||||
private function triggerBackgroundFetch($field)
|
||||
{
|
||||
$lockKey = 'fetch_lock_' . md5($field);
|
||||
if (Cache::get($lockKey)) {
|
||||
return;
|
||||
}
|
||||
Cache::set($lockKey, 1, 300);
|
||||
|
||||
\think\Queue::push('app\api\job\FetchExperts', [
|
||||
'field' => $field,
|
||||
'source' => 'pubmed',
|
||||
'per_page' => 100,
|
||||
'min_year' => date('Y') - 3,
|
||||
], 'FetchExperts');
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal method: run a fetch for a single keyword (used by both cron and queue job)
|
||||
*/
|
||||
public function doFetchForField($field, $source = 'pubmed', $perPage = 100, $minYear = null)
|
||||
{
|
||||
if ($minYear === null) {
|
||||
$minYear = date('Y') - 3;
|
||||
}
|
||||
|
||||
$fetchLog = $this->getFetchLog($field, $source);
|
||||
$page = $fetchLog['last_page'] + 1;
|
||||
|
||||
if ($source === 'pmc') {
|
||||
$result = $this->searchViaPMC($field, $perPage, $minYear, $page);
|
||||
} else {
|
||||
$result = $this->searchViaPubMed($field, $perPage, $minYear, $page);
|
||||
}
|
||||
|
||||
$saveResult = $this->saveExperts($result['experts'], $field, $source);
|
||||
|
||||
$nextPage = $result['has_more'] ? $page : 0;
|
||||
$totalPages = isset($result['total_pages']) ? $result['total_pages'] : 0;
|
||||
$this->updateFetchLog($field, $source, $nextPage, $totalPages);
|
||||
|
||||
return [
|
||||
'experts' => $experts,
|
||||
'total' => count($experts),
|
||||
'articles_scanned' => count($ids),
|
||||
'source' => 'pubmed',
|
||||
'keyword' => $field,
|
||||
'page' => $page,
|
||||
'experts_found' => $result['total'],
|
||||
'saved_new' => $saveResult['inserted'],
|
||||
'saved_exist' => $saveResult['existing'],
|
||||
'field_enriched' => $saveResult['field_enriched'],
|
||||
'has_more' => $result['has_more'],
|
||||
];
|
||||
}
|
||||
|
||||
// ==================== PMC Search ====================
|
||||
// ==================== PubMed Search ====================
|
||||
|
||||
private function searchViaPMC($keyword, $maxResults, $minYear)
|
||||
private function searchViaPubMed($keyword, $perPage, $minYear, $page = 1)
|
||||
{
|
||||
$ids = $this->esearch('pmc', $keyword, $maxResults, $minYear);
|
||||
set_time_limit(600);
|
||||
|
||||
$searchResult = $this->esearch('pubmed', $keyword, $perPage, $minYear, $page);
|
||||
$ids = $searchResult['ids'];
|
||||
$totalArticles = $searchResult['total'];
|
||||
|
||||
if (empty($ids)) {
|
||||
return ['experts' => [], 'total' => 0, 'articles_scanned' => 0, 'source' => 'pmc'];
|
||||
return $this->buildPagedResult([], 0, 0, $totalArticles, $page, $perPage, 'pubmed');
|
||||
}
|
||||
|
||||
$allAuthors = [];
|
||||
$batches = array_chunk($ids, 20);
|
||||
$batches = array_chunk($ids, 50);
|
||||
foreach ($batches as $batch) {
|
||||
$xml = $this->efetch('pmc', $batch);
|
||||
$authors = $this->parsePMCXml($xml);
|
||||
$allAuthors = array_merge($allAuthors, $authors);
|
||||
$xml = $this->efetchWithRetry('pubmed', $batch);
|
||||
if ($xml) {
|
||||
$authors = $this->parsePubMedXml($xml);
|
||||
$allAuthors = array_merge($allAuthors, $authors);
|
||||
}
|
||||
usleep(400000);
|
||||
}
|
||||
|
||||
$experts = $this->aggregateExperts($allAuthors);
|
||||
|
||||
return [
|
||||
'experts' => $experts,
|
||||
'total' => count($experts),
|
||||
'articles_scanned' => count($ids),
|
||||
'source' => 'pmc',
|
||||
];
|
||||
return $this->buildPagedResult($experts, count($experts), count($ids), $totalArticles, $page, $perPage, 'pubmed');
|
||||
}
|
||||
|
||||
// ==================== PMC Search ====================
|
||||
|
||||
private function searchViaPMC($keyword, $perPage, $minYear, $page = 1)
|
||||
{
|
||||
set_time_limit(600);
|
||||
|
||||
$searchResult = $this->esearch('pmc', $keyword, $perPage, $minYear, $page);
|
||||
$ids = $searchResult['ids'];
|
||||
$totalArticles = $searchResult['total'];
|
||||
|
||||
if (empty($ids)) {
|
||||
return $this->buildPagedResult([], 0, 0, $totalArticles, $page, $perPage, 'pmc');
|
||||
}
|
||||
|
||||
$allAuthors = [];
|
||||
$batches = array_chunk($ids, 5);
|
||||
foreach ($batches as $batch) {
|
||||
$xml = $this->efetchWithRetry('pmc', $batch);
|
||||
if ($xml) {
|
||||
$authors = $this->parsePMCXml($xml);
|
||||
$allAuthors = array_merge($allAuthors, $authors);
|
||||
}
|
||||
usleep(500000);
|
||||
}
|
||||
|
||||
$experts = $this->aggregateExperts($allAuthors);
|
||||
|
||||
return $this->buildPagedResult($experts, count($experts), count($ids), $totalArticles, $page, $perPage, 'pmc');
|
||||
}
|
||||
|
||||
// ==================== NCBI API Calls ====================
|
||||
|
||||
private function esearch($db, $keyword, $maxResults, $minYear)
|
||||
private function esearch($db, $keyword, $perPage, $minYear, $page = 1)
|
||||
{
|
||||
$term = $keyword . ' AND ' . $minYear . ':' . date('Y') . '[pdat]';
|
||||
$term = $keyword . ' AND ' . $minYear . ':' . date('Y') . '[pdat]';
|
||||
$retstart = ($page - 1) * $perPage;
|
||||
|
||||
$response = $this->httpClient->get($this->ncbiBaseUrl . 'esearch.fcgi', [
|
||||
'query' => [
|
||||
'db' => $db,
|
||||
'term' => $term,
|
||||
'retmax' => $maxResults,
|
||||
'retmode' => 'json',
|
||||
'sort' => 'relevance',
|
||||
'db' => $db,
|
||||
'term' => $term,
|
||||
'retstart' => $retstart,
|
||||
'retmax' => $perPage,
|
||||
'retmode' => 'json',
|
||||
'sort' => 'relevance',
|
||||
],
|
||||
]);
|
||||
|
||||
$data = json_decode($response->getBody()->getContents(), true);
|
||||
$data = json_decode($response->getBody()->getContents(), true);
|
||||
$ids = $data['esearchresult']['idlist'] ?? [];
|
||||
$total = intval($data['esearchresult']['count'] ?? 0);
|
||||
|
||||
return $data['esearchresult']['idlist'] ?? [];
|
||||
return ['ids' => $ids, 'total' => $total];
|
||||
}
|
||||
|
||||
private function efetch($db, $ids)
|
||||
@@ -207,6 +498,36 @@ class ExpertFinder extends Base
|
||||
return $response->getBody()->getContents();
|
||||
}
|
||||
|
||||
private function efetchWithRetry($db, $ids, $maxRetries = 3)
|
||||
{
|
||||
for ($attempt = 1; $attempt <= $maxRetries; $attempt++) {
|
||||
try {
|
||||
return $this->efetch($db, $ids);
|
||||
} catch (\Exception $e) {
|
||||
if ($attempt === $maxRetries) {
|
||||
if (count($ids) > 1) {
|
||||
$half = ceil(count($ids) / 2);
|
||||
$firstHalf = array_slice($ids, 0, $half);
|
||||
$secondHalf = array_slice($ids, $half);
|
||||
$xml1 = $this->efetchWithRetry($db, $firstHalf, 2);
|
||||
$xml2 = $this->efetchWithRetry($db, $secondHalf, 2);
|
||||
return $this->mergeXml($xml1, $xml2);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
sleep($attempt * 2);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private function mergeXml($xml1, $xml2)
|
||||
{
|
||||
if (empty($xml1)) return $xml2;
|
||||
if (empty($xml2)) return $xml1;
|
||||
return $xml1 . "\n" . $xml2;
|
||||
}
|
||||
|
||||
// ==================== PubMed XML Parsing ====================
|
||||
|
||||
private function parsePubMedXml($xmlString)
|
||||
@@ -434,6 +755,25 @@ class ExpertFinder extends Base
|
||||
return $results;
|
||||
}
|
||||
|
||||
// ==================== Pagination ====================
|
||||
|
||||
private function buildPagedResult($experts, $expertCount, $articlesScanned, $totalArticles, $page, $perPage, $source)
|
||||
{
|
||||
$totalPages = $totalArticles > 0 ? ceil($totalArticles / $perPage) : 0;
|
||||
|
||||
return [
|
||||
'experts' => $experts,
|
||||
'total' => $expertCount,
|
||||
'articles_scanned' => $articlesScanned,
|
||||
'total_articles' => $totalArticles,
|
||||
'page' => $page,
|
||||
'per_page' => $perPage,
|
||||
'total_pages' => $totalPages,
|
||||
'has_more' => $page < $totalPages,
|
||||
'source' => $source,
|
||||
];
|
||||
}
|
||||
|
||||
// ==================== Aggregation ====================
|
||||
|
||||
private function aggregateExperts($authorRecords)
|
||||
@@ -482,21 +822,23 @@ class ExpertFinder extends Base
|
||||
|
||||
// ==================== Excel Export ====================
|
||||
|
||||
private function generateExcel($experts, $keyword)
|
||||
private function generateExcel($experts, $keyword, $page = 1)
|
||||
{
|
||||
$spreadsheet = new \PhpOffice\PhpSpreadsheet\Spreadsheet();
|
||||
$sheet = $spreadsheet->getActiveSheet();
|
||||
vendor("PHPExcel.PHPExcel");
|
||||
|
||||
$objPHPExcel = new \PHPExcel();
|
||||
$sheet = $objPHPExcel->getActiveSheet();
|
||||
$sheet->setTitle('Experts');
|
||||
|
||||
$headers = ['#', 'Name', 'Email', 'Affiliation', 'Paper Count', 'Representative Papers'];
|
||||
$headers = ['A' => '#', 'B' => 'Name', 'C' => 'Email', 'D' => 'Affiliation', 'E' => 'Paper Count', 'F' => 'Representative Papers'];
|
||||
foreach ($headers as $col => $header) {
|
||||
$sheet->setCellValueByColumnAndRow($col + 1, 1, $header);
|
||||
$sheet->setCellValue($col . '1', $header);
|
||||
}
|
||||
|
||||
$headerStyle = [
|
||||
'font' => ['bold' => true, 'color' => ['rgb' => 'FFFFFF']],
|
||||
'fill' => ['fillType' => \PhpOffice\PhpSpreadsheet\Style\Fill::FILL_SOLID, 'startColor' => ['rgb' => '4472C4']],
|
||||
'alignment' => ['horizontal' => \PhpOffice\PhpSpreadsheet\Style\Alignment::HORIZONTAL_CENTER],
|
||||
'fill' => ['type' => \PHPExcel_Style_Fill::FILL_SOLID, 'startcolor' => ['rgb' => '4472C4']],
|
||||
'alignment' => ['horizontal' => \PHPExcel_Style_Alignment::HORIZONTAL_CENTER],
|
||||
];
|
||||
$sheet->getStyle('A1:F1')->applyFromArray($headerStyle);
|
||||
|
||||
@@ -506,12 +848,12 @@ class ExpertFinder extends Base
|
||||
return $p['title'];
|
||||
}, $expert['papers']);
|
||||
|
||||
$sheet->setCellValueByColumnAndRow(1, $row, $i + 1);
|
||||
$sheet->setCellValueByColumnAndRow(2, $row, $expert['name']);
|
||||
$sheet->setCellValueByColumnAndRow(3, $row, $expert['email']);
|
||||
$sheet->setCellValueByColumnAndRow(4, $row, $expert['affiliation']);
|
||||
$sheet->setCellValueByColumnAndRow(5, $row, $expert['paper_count']);
|
||||
$sheet->setCellValueByColumnAndRow(6, $row, implode("\n", $paperTitles));
|
||||
$sheet->setCellValue('A' . $row, $i + 1);
|
||||
$sheet->setCellValue('B' . $row, $expert['name']);
|
||||
$sheet->setCellValue('C' . $row, $expert['email']);
|
||||
$sheet->setCellValue('D' . $row, $expert['affiliation']);
|
||||
$sheet->setCellValue('E' . $row, $expert['paper_count']);
|
||||
$sheet->setCellValue('F' . $row, implode("\n", $paperTitles));
|
||||
}
|
||||
|
||||
$sheet->getColumnDimension('A')->setWidth(6);
|
||||
@@ -521,7 +863,7 @@ class ExpertFinder extends Base
|
||||
$sheet->getColumnDimension('E')->setWidth(12);
|
||||
$sheet->getColumnDimension('F')->setWidth(60);
|
||||
|
||||
$filename = 'experts_' . preg_replace('/[^a-zA-Z0-9]/', '_', $keyword) . '_' . date('Ymd_His') . '.xlsx';
|
||||
$filename = 'experts_' . preg_replace('/[^a-zA-Z0-9]/', '_', $keyword) . '_p' . $page . '_' . date('Ymd_His') . '.xlsx';
|
||||
$filepath = ROOT_PATH . 'public' . DS . 'exports' . DS . $filename;
|
||||
|
||||
$dir = ROOT_PATH . 'public' . DS . 'exports';
|
||||
@@ -529,7 +871,7 @@ class ExpertFinder extends Base
|
||||
mkdir($dir, 0777, true);
|
||||
}
|
||||
|
||||
$writer = new \PhpOffice\PhpSpreadsheet\Writer\Xlsx($spreadsheet);
|
||||
$writer = \PHPExcel_IOFactory::createWriter($objPHPExcel, 'Excel2007');
|
||||
$writer->save($filepath);
|
||||
|
||||
return jsonSuccess([
|
||||
@@ -539,6 +881,119 @@ class ExpertFinder extends Base
|
||||
]);
|
||||
}
|
||||
|
||||
// ==================== Database Storage ====================
|
||||
|
||||
private function saveExperts($experts, $field, $source)
|
||||
{
|
||||
$inserted = 0;
|
||||
$existing = 0;
|
||||
$fieldEnrich = 0;
|
||||
|
||||
foreach ($experts as $expert) {
|
||||
$email = strtolower(trim($expert['email']));
|
||||
if (empty($email)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$exists = Db::name('expert')->where('email', $email)->find();
|
||||
|
||||
if ($exists) {
|
||||
$existing++;
|
||||
$fieldEnrich += $this->enrichExpertField($exists['expert_id'], $field);
|
||||
continue;
|
||||
}
|
||||
|
||||
$insert = [
|
||||
'name' => mb_substr($expert['name'], 0, 255),
|
||||
'email' => mb_substr($email, 0, 128),
|
||||
'affiliation' => mb_substr($expert['affiliation'], 0, 128),
|
||||
'source' => mb_substr($source, 0, 128),
|
||||
'ctime' => time(),
|
||||
'ltime' => 0,
|
||||
'state' => 0,
|
||||
];
|
||||
|
||||
try {
|
||||
$expertId = Db::name('expert')->insertGetId($insert);
|
||||
$this->enrichExpertField($expertId, $field);
|
||||
$inserted++;
|
||||
} catch (\Exception $e) {
|
||||
$existing++;
|
||||
}
|
||||
}
|
||||
|
||||
return ['inserted' => $inserted, 'existing' => $existing, 'field_enriched' => $fieldEnrich];
|
||||
}
|
||||
|
||||
// ==================== Fetch Log (t_expert_fetch) ====================
|
||||
|
||||
private function getFetchLog($field, $source)
|
||||
{
|
||||
$log = Db::name('expert_fetch')
|
||||
->where('field', $field)
|
||||
->where('source', $source)
|
||||
->find();
|
||||
|
||||
if (!$log) {
|
||||
return ['last_page' => 0, 'total_pages' => 0, 'last_time' => 0];
|
||||
}
|
||||
|
||||
return $log;
|
||||
}
|
||||
|
||||
private function updateFetchLog($field, $source, $lastPage, $totalPages)
|
||||
{
|
||||
$exists = Db::name('expert_fetch')
|
||||
->where('field', $field)
|
||||
->where('source', $source)
|
||||
->find();
|
||||
|
||||
if ($exists) {
|
||||
Db::name('expert_fetch')
|
||||
->where('expert_fetch_id', $exists['expert_fetch_id'])
|
||||
->update([
|
||||
'last_page' => $lastPage,
|
||||
'total_pages' => $totalPages,
|
||||
'last_time' => time(),
|
||||
]);
|
||||
} else {
|
||||
Db::name('expert_fetch')->insert([
|
||||
'field' => mb_substr($field, 0, 128),
|
||||
'source' => mb_substr($source, 0, 128),
|
||||
'last_page' => $lastPage,
|
||||
'total_pages' => $totalPages,
|
||||
'last_time' => time(),
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
||||
private function enrichExpertField($expertId, $field)
|
||||
{
|
||||
$field = trim($field);
|
||||
if (empty($field)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
$exists = Db::name('expert_field')
|
||||
->where('expert_id', $expertId)
|
||||
->where('field', $field)
|
||||
->where('state', 0)
|
||||
->find();
|
||||
|
||||
if ($exists) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
Db::name('expert_field')->insert([
|
||||
'expert_id' => $expertId,
|
||||
'major_id' => 0,
|
||||
'field' => mb_substr($field, 0, 128),
|
||||
'state' => 0,
|
||||
]);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
// ==================== Helper Methods ====================
|
||||
|
||||
private function extractEmailFromText($text)
|
||||
|
||||
Reference in New Issue
Block a user