强制提交
This commit is contained in:
@@ -16,6 +16,10 @@ class CrossrefService
|
||||
private $timeout = 15; // 请求超时(秒)
|
||||
private $maxRetry = 2; // 单个DOI最大重试次数
|
||||
private $crossrefUrl = "https://api.crossref.org/works/"; // 接口地址
|
||||
private $pubmedAbbr = true; // CrossRef 无期刊缩写时,是否回退到 PubMed/NLM 规范缩写
|
||||
|
||||
/** @var PubmedService|null 懒加载 */
|
||||
private $pubmedService = null;
|
||||
|
||||
public function __construct($config = [])
|
||||
{
|
||||
@@ -24,6 +28,7 @@ class CrossrefService
|
||||
if (isset($config['timeout'])) $this->timeout = intval($config['timeout']);
|
||||
if (isset($config['maxRetry'])) $this->maxRetry = intval($config['maxRetry']);
|
||||
if (isset($config['crossrefUrl'])) $this->crossrefUrl = (string)$config['crossrefUrl'];
|
||||
if (isset($config['pubmed_abbr'])) $this->pubmedAbbr = (bool)$config['pubmed_abbr'];
|
||||
}
|
||||
}
|
||||
|
||||
@@ -191,7 +196,15 @@ class CrossrefService
|
||||
|
||||
$title = $this->getTitle($msg);
|
||||
$publisher = $this->getPublisher($msg);
|
||||
$joura = !empty($publisher['title']) ? $publisher['title'] : ($publisher['short_title'] ?? '');
|
||||
$validDoi = $this->filterValidDoi($doi);
|
||||
// 期刊缩写优先级:CrossRef short-container-title → PubMed/NLM 规范缩写 → CrossRef 全称
|
||||
$shortTitle = trim((string)($publisher['short_title'] ?? ''));
|
||||
$fullTitle = trim((string)($publisher['title'] ?? ''));
|
||||
$joura = $shortTitle;
|
||||
if ($joura === '') {
|
||||
$pubmedAbbr = $this->lookupPubmedJournalAbbr($validDoi);
|
||||
$joura = $pubmedAbbr !== '' ? $pubmedAbbr : $fullTitle;
|
||||
}
|
||||
$authors = $this->getAuthors($msg);
|
||||
$dateno = $this->getVolumeIssuePages($msg);
|
||||
$retractInfo = $this->checkRetracted($msg);
|
||||
@@ -280,6 +293,34 @@ class CrossrefService
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* 用 PubMed/NLM 反查期刊规范缩写(CrossRef 无缩写时的兜底)。
|
||||
* 任何异常都吞掉并返回空串,保证不影响主流程。
|
||||
*
|
||||
* @param string $doi 已规整的裸 DOI
|
||||
* @return string 缩写或空串
|
||||
*/
|
||||
private function lookupPubmedJournalAbbr($doi)
|
||||
{
|
||||
$doi = trim((string)$doi);
|
||||
if (!$this->pubmedAbbr || $doi === '') {
|
||||
return '';
|
||||
}
|
||||
|
||||
try {
|
||||
if ($this->pubmedService === null) {
|
||||
$this->pubmedService = new PubmedService([
|
||||
'email' => $this->mailto,
|
||||
'timeout' => $this->timeout,
|
||||
]);
|
||||
}
|
||||
$abbr = $this->pubmedService->journalAbbrByDoi($doi);
|
||||
return is_string($abbr) ? trim($abbr) : '';
|
||||
} catch (\Throwable $e) {
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 提取作者列表
|
||||
*/
|
||||
@@ -300,6 +341,73 @@ class CrossrefService
|
||||
return $authors;
|
||||
}
|
||||
|
||||
/**
|
||||
* 引用格式作者串:姓全写 + 名首字母,超过 $maxAuthors 个取前 N 个 + et al
|
||||
* 例:Smith JA, Jones B, Lee C, et al
|
||||
*
|
||||
* @param array $aDoiInfo Crossref message
|
||||
* @param int $maxAuthors 最多展示作者数,超过则截断加 et al
|
||||
* @return string
|
||||
*/
|
||||
public function getAuthorsCitation($aDoiInfo = [], $maxAuthors = 3)
|
||||
{
|
||||
$list = [];
|
||||
if (!empty($aDoiInfo['author'])) {
|
||||
foreach ($aDoiInfo['author'] as $author) {
|
||||
$family = trim((string)($author['family'] ?? ''));
|
||||
$given = trim((string)($author['given'] ?? ''));
|
||||
|
||||
if ($family === '' && $given === '') {
|
||||
// 机构作者等无姓名结构的情况
|
||||
$orgName = trim((string)($author['name'] ?? ''));
|
||||
if ($orgName !== '') {
|
||||
$list[] = $orgName;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
$initials = $this->givenToInitials($given);
|
||||
$name = $initials !== '' ? trim($family . ' ' . $initials) : $family;
|
||||
if ($name !== '') {
|
||||
$list[] = $name;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (empty($list)) {
|
||||
return '';
|
||||
}
|
||||
|
||||
$maxAuthors = max(1, (int)$maxAuthors);
|
||||
if (count($list) > $maxAuthors) {
|
||||
$list = array_slice($list, 0, $maxAuthors);
|
||||
return implode(', ', $list) . ', et al';
|
||||
}
|
||||
|
||||
return implode(', ', $list);
|
||||
}
|
||||
|
||||
/**
|
||||
* 名转首字母:取每个组成部分(空格/连字符/点分隔)首字母大写并拼接。
|
||||
* 例:"John A." -> "JA","Mary-Jane" -> "MJ"
|
||||
*/
|
||||
private function givenToInitials($given)
|
||||
{
|
||||
$given = trim((string)$given);
|
||||
if ($given === '') {
|
||||
return '';
|
||||
}
|
||||
$parts = preg_split('/[\s\-\.]+/u', $given, -1, PREG_SPLIT_NO_EMPTY);
|
||||
$initials = '';
|
||||
foreach ($parts as $p) {
|
||||
$first = mb_substr($p, 0, 1);
|
||||
if ($first !== '') {
|
||||
$initials .= mb_strtoupper($first);
|
||||
}
|
||||
}
|
||||
return $initials;
|
||||
}
|
||||
|
||||
/**
|
||||
* 提取发表年份
|
||||
*/
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
namespace app\common;
|
||||
|
||||
use app\common\service\LocalModelService;
|
||||
use think\Db;
|
||||
use think\Env;
|
||||
use think\Exception;
|
||||
@@ -27,9 +28,17 @@ class ExpertFieldAiService
|
||||
|
||||
private $logFile;
|
||||
|
||||
/** @var bool|null */
|
||||
private static $schemaReady = null;
|
||||
|
||||
public function __construct()
|
||||
{
|
||||
$this->logFile = ROOT_PATH . 'runtime' . DS . 'expert_field_ai.log';
|
||||
try {
|
||||
$this->ensureSchema();
|
||||
} catch (\Throwable $e) {
|
||||
$this->log('[ExpertFieldAi] ensureSchema fail: ' . $e->getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
// ===================== 链式队列 =====================
|
||||
@@ -366,10 +375,16 @@ class ExpertFieldAiService
|
||||
$papers = array_slice($papers, 0, $maxPapers);
|
||||
$searchKeywords = array_values(array_unique(array_filter($searchKeywords)));
|
||||
|
||||
$countryName = '';
|
||||
$countryId = intval($expert['country_id'] ?? 0);
|
||||
if ($countryId > 0) {
|
||||
$countryName = (string)Db::name('country')->where('country_id', $countryId)->value('title');
|
||||
// t_expert.country 已存国家英文名,无需再查 country 表
|
||||
$countryName = trim((string)($expert['country'] ?? ''));
|
||||
if ($countryName === '') {
|
||||
$countryId = intval($expert['country_id'] ?? 0);
|
||||
if ($countryId > 0) {
|
||||
$row = Db::name('country')->where('country_id', $countryId)->find();
|
||||
if ($row) {
|
||||
$countryName = (string)($row['en_name'] ?? ($row['zh_name'] ?? ''));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return [
|
||||
@@ -453,69 +468,27 @@ class ExpertFieldAiService
|
||||
|
||||
private function summarizeWithLlm(array $context)
|
||||
{
|
||||
$url = $this->resolveLlmChatUrl();
|
||||
$model = $this->resolveLlmModel();
|
||||
$apiKey = trim((string)Env::get(
|
||||
'expert_field_ai.chat_api_key',
|
||||
Env::get('user_field_ai.chat_api_key', Env::get('expert_country_chat_api_key', Env::get('citation_chat_api_key', '')))
|
||||
));
|
||||
|
||||
if ($url === '' || $model === '') {
|
||||
throw new Exception('LLM not configured (set base.model_url / expert_field_ai.chat_model)');
|
||||
}
|
||||
|
||||
$payloadJson = json_encode($context, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES);
|
||||
$messages = [
|
||||
[
|
||||
'role' => 'system',
|
||||
'content' => '你是学术领域分类助手。根据专家的单位、论文标题与 PubMed 检索上下文,用简体中文总结该专家最主要的研究领域。'
|
||||
. '注意:search_keywords 只是检索词,不可直接当作领域结论,应结合 paper 标题与 affiliation 判断。'
|
||||
. '要求:精确、简洁,1~3 个中文领域词或短短语,用顿号分隔;不要解释、不要英文。'
|
||||
. '只输出 JSON:{"field_ai":"..."}。',
|
||||
],
|
||||
[
|
||||
'role' => 'user',
|
||||
'content' => "请根据以下 JSON 资料总结该专家的主要研究领域:\n" . $payloadJson,
|
||||
],
|
||||
];
|
||||
$systemPrompt = '你是学术领域分类助手。根据专家的单位、论文标题与 PubMed 检索上下文,用简体中文总结该专家最主要的研究领域。'
|
||||
. '注意:search_keywords 只是检索词,不可直接当作领域结论,应结合 paper 标题与 affiliation 判断。'
|
||||
. '要求:精确、简洁,1~3 个中文领域词或短短语,用顿号分隔;不要解释、不要英文。'
|
||||
. '只输出 JSON:{"field_ai":"..."}。';
|
||||
$userPrompt = "请根据以下 JSON 资料总结该专家的主要研究领域:\n" . $payloadJson;
|
||||
|
||||
$body = [
|
||||
'model' => $model,
|
||||
'temperature' => 0.2,
|
||||
'messages' => $messages,
|
||||
];
|
||||
// 按上下文长度动态选模型(小: base.model_url1 / 大: base.model_url)
|
||||
$svc = new LocalModelService();
|
||||
$res = $svc->chat([
|
||||
['role' => 'system', 'content' => $systemPrompt],
|
||||
['role' => 'user', 'content' => $userPrompt],
|
||||
], ['temperature' => 0.2]);
|
||||
|
||||
$ch = curl_init();
|
||||
curl_setopt_array($ch, [
|
||||
CURLOPT_URL => $url,
|
||||
CURLOPT_POST => true,
|
||||
CURLOPT_POSTFIELDS => json_encode($body, JSON_UNESCAPED_UNICODE),
|
||||
CURLOPT_RETURNTRANSFER => true,
|
||||
CURLOPT_CONNECTTIMEOUT => 15,
|
||||
CURLOPT_TIMEOUT => max(30, (int)Env::get('expert_field_ai.timeout', Env::get('user_field_ai.timeout', 90))),
|
||||
CURLOPT_HTTPHEADER => array_filter([
|
||||
'Content-Type: application/json',
|
||||
$apiKey !== '' ? 'Authorization: Bearer ' . $apiKey : null,
|
||||
]),
|
||||
]);
|
||||
$raw = curl_exec($ch);
|
||||
$code = (int)curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
||||
$err = curl_error($ch);
|
||||
curl_close($ch);
|
||||
|
||||
if ($raw === false) {
|
||||
throw new Exception('LLM curl error: ' . $err);
|
||||
}
|
||||
if ($code < 200 || $code >= 300) {
|
||||
throw new Exception('LLM HTTP ' . $code . ': ' . mb_substr((string)$raw, 0, 400));
|
||||
if (empty($res['ok'])) {
|
||||
throw new Exception('LLM error: ' . (string)($res['error'] ?? 'unknown'));
|
||||
}
|
||||
|
||||
$data = json_decode($raw, true);
|
||||
$content = '';
|
||||
if (is_array($data) && isset($data['choices'][0]['message']['content'])) {
|
||||
$content = trim((string)$data['choices'][0]['message']['content']);
|
||||
}
|
||||
$this->log('[ExpertFieldAi] llm tier=' . ($res['tier'] ?? '') . ' ctx_len=' . ($res['context_len'] ?? 0) . ' url=' . ($res['url'] ?? ''));
|
||||
|
||||
$content = trim((string)($res['content'] ?? ''));
|
||||
$fieldAi = $this->parseFieldAiFromContent($content);
|
||||
if ($fieldAi === '' && $content !== '') {
|
||||
$fieldAi = $this->cleanFieldAiText($content);
|
||||
@@ -523,44 +496,6 @@ class ExpertFieldAiService
|
||||
return $fieldAi;
|
||||
}
|
||||
|
||||
private function resolveLlmChatUrl()
|
||||
{
|
||||
$candidates = [
|
||||
// Env::get('expert_field_ai.chat_url', ''),
|
||||
// Env::get('user_field_ai.chat_url', ''),
|
||||
Env::get('base.model_url1', ''),
|
||||
];
|
||||
foreach ($candidates as $u) {
|
||||
$u = trim((string)$u);
|
||||
if ($u === '') {
|
||||
continue;
|
||||
}
|
||||
if (stripos($u, 'chat/completions') !== false) {
|
||||
return $u;
|
||||
}
|
||||
return rtrim($u, '/') . '/v1/chat/completions';
|
||||
}
|
||||
return '';
|
||||
}
|
||||
|
||||
private function resolveLlmModel()
|
||||
{
|
||||
$candidates = [
|
||||
Env::get('expert_field_ai.chat_model', ''),
|
||||
Env::get('user_field_ai.chat_model', ''),
|
||||
Env::get('base.model', ''),
|
||||
Env::get('expert_country_chat_model', ''),
|
||||
'gpt-4.1',
|
||||
];
|
||||
foreach ($candidates as $m) {
|
||||
$m = trim((string)$m);
|
||||
if ($m !== '' && strtolower($m) !== 'your-model-name') {
|
||||
return $m;
|
||||
}
|
||||
}
|
||||
return '';
|
||||
}
|
||||
|
||||
private function parseFieldAiFromContent($content)
|
||||
{
|
||||
$content = trim((string)$content);
|
||||
@@ -637,18 +572,73 @@ class ExpertFieldAiService
|
||||
|
||||
private function updateFieldAi($expertId, $fieldAi, $status, $source, $note)
|
||||
{
|
||||
$this->ensureSchema();
|
||||
|
||||
$data = [
|
||||
'field_ai' => mb_substr(trim((string)$fieldAi), 0, 512),
|
||||
'field_ai_status' => intval($status),
|
||||
'field_ai_utime' => time(),
|
||||
'field_ai_source' => mb_substr(trim((string)$source), 0, 32),
|
||||
];
|
||||
if ($this->hasColumn('field_ai_source')) {
|
||||
$data['field_ai_source'] = mb_substr(trim((string)$source), 0, 32);
|
||||
}
|
||||
|
||||
Db::name('expert')->where('expert_id', intval($expertId))->update($data);
|
||||
if ($note !== '') {
|
||||
$this->log('[ExpertFieldAi] expert_id=' . $expertId . ' status=' . $status . ' note=' . $note);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 自动补全 t_expert 上缺失的 field_ai 字段(可重复执行)。
|
||||
*/
|
||||
public function ensureSchema()
|
||||
{
|
||||
if (self::$schemaReady === true) {
|
||||
return;
|
||||
}
|
||||
|
||||
$table = config('database.prefix') . 'expert';
|
||||
$columns = Db::query('SHOW COLUMNS FROM `' . $table . '`');
|
||||
$existing = [];
|
||||
foreach ($columns as $col) {
|
||||
$existing[$col['Field']] = true;
|
||||
}
|
||||
|
||||
$alters = [];
|
||||
if (!isset($existing['field_ai'])) {
|
||||
$alters[] = "ADD COLUMN `field_ai` VARCHAR(512) NOT NULL DEFAULT '' COMMENT 'AI总结的主要研究领域(中文)' AFTER `affiliation`";
|
||||
$existing['field_ai'] = true;
|
||||
}
|
||||
if (!isset($existing['field_ai_status'])) {
|
||||
$alters[] = "ADD COLUMN `field_ai_status` TINYINT NOT NULL DEFAULT 0 COMMENT '0待处理 1已生成 2资料不足 3失败 4无user待AI' AFTER `field_ai`";
|
||||
$existing['field_ai_status'] = true;
|
||||
}
|
||||
if (!isset($existing['field_ai_utime'])) {
|
||||
$alters[] = "ADD COLUMN `field_ai_utime` INT NOT NULL DEFAULT 0 COMMENT 'field_ai更新时间' AFTER `field_ai_status`";
|
||||
$existing['field_ai_utime'] = true;
|
||||
}
|
||||
if (!isset($existing['field_ai_source'])) {
|
||||
$alters[] = "ADD COLUMN `field_ai_source` VARCHAR(32) NOT NULL DEFAULT '' COMMENT '来源: user_link / ai' AFTER `field_ai_utime`";
|
||||
$existing['field_ai_source'] = true;
|
||||
}
|
||||
|
||||
if (!empty($alters)) {
|
||||
Db::execute('ALTER TABLE `' . $table . '` ' . implode(', ', $alters));
|
||||
$this->log('[ExpertFieldAi] schema patched: ' . implode('; ', $alters));
|
||||
}
|
||||
|
||||
self::$schemaReady = true;
|
||||
}
|
||||
|
||||
private function hasColumn($column)
|
||||
{
|
||||
$this->ensureSchema();
|
||||
$table = config('database.prefix') . 'expert';
|
||||
$columns = Db::query('SHOW COLUMNS FROM `' . $table . '` LIKE \'' . addslashes($column) . '\'');
|
||||
return !empty($columns);
|
||||
}
|
||||
|
||||
public function statusLabel($status)
|
||||
{
|
||||
$map = [
|
||||
|
||||
@@ -13,6 +13,9 @@ class ExpertFinderService
|
||||
private $ncbiBaseUrl = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/';
|
||||
private $logFile;
|
||||
|
||||
/** @var bool|null */
|
||||
private static $schemaReady = null;
|
||||
|
||||
public function __construct()
|
||||
{
|
||||
$this->httpClient = new Client([
|
||||
@@ -21,6 +24,54 @@ class ExpertFinderService
|
||||
'verify' => false,
|
||||
]);
|
||||
$this->logFile = ROOT_PATH . 'runtime' . DS . 'expert_finder.log';
|
||||
|
||||
try {
|
||||
$this->ensureSchema();
|
||||
} catch (\Throwable $e) {
|
||||
$this->log('[ExpertFinder] ensureSchema fail: ' . $e->getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 历史遗留数据迁移用:旧版每天按页抓取时使用的 per_page。
|
||||
* 用于把旧的 last_page 换算成新的 last_offset(last_offset = last_page × 此值)。
|
||||
*/
|
||||
const MIGRATE_LEGACY_PER_PAGE = 10;
|
||||
|
||||
/**
|
||||
* 自动补全 expert_fetch 上缺失的 last_offset 列,并一次性回填历史进度(可重复执行)。
|
||||
* last_offset 为累计抓取偏移量(已扫到第几篇),与 per_page 解耦,
|
||||
* 改 per_page 不会再导致翻页错位。
|
||||
*/
|
||||
public function ensureSchema()
|
||||
{
|
||||
if (self::$schemaReady === true) {
|
||||
return;
|
||||
}
|
||||
|
||||
$table = config('database.prefix') . 'expert_fetch';
|
||||
$columns = Db::query('SHOW COLUMNS FROM `' . $table . '`');
|
||||
$existing = [];
|
||||
foreach ($columns as $col) {
|
||||
$existing[$col['Field']] = true;
|
||||
}
|
||||
|
||||
if (!isset($existing['last_offset'])) {
|
||||
Db::execute('ALTER TABLE `' . $table . '` ADD COLUMN `last_offset` INT NOT NULL DEFAULT 0 COMMENT \'累计抓取偏移量(与per_page解耦)\' AFTER `last_page`');
|
||||
$this->log('[ExpertFinder] schema patched: add last_offset');
|
||||
}
|
||||
|
||||
// 一次性迁移:把旧 last_page 按历史 per_page 换算成 last_offset。
|
||||
// 只命中"未迁移"的遗留行(last_offset=0 且 last_page>0),幂等,不会重复执行。
|
||||
$affected = Db::execute(
|
||||
'UPDATE `' . $table . '` SET `last_offset` = `last_page` * ' . intval(self::MIGRATE_LEGACY_PER_PAGE)
|
||||
. ' WHERE `last_offset` = 0 AND `last_page` > 0'
|
||||
);
|
||||
if ($affected > 0) {
|
||||
$this->log('[ExpertFinder] migrated last_offset from last_page for ' . $affected . ' rows (×' . self::MIGRATE_LEGACY_PER_PAGE . ')');
|
||||
}
|
||||
|
||||
self::$schemaReady = true;
|
||||
}
|
||||
|
||||
public function doFetchForField($field, $source = 'pubmed', $perPage = 100, $minYear = null)
|
||||
@@ -30,12 +81,13 @@ class ExpertFinderService
|
||||
}
|
||||
|
||||
$fetchLog = $this->getFetchLog($field, $source);
|
||||
$page = $fetchLog['last_page'] + 1;
|
||||
// 基于累计偏移量(offset)的游标:改 per_page 也不会错位
|
||||
$offset = intval($fetchLog['last_offset'] ?? 0);
|
||||
|
||||
if ($source === 'pmc') {
|
||||
$result = $this->searchViaPMC($field, $perPage, $minYear, $page);
|
||||
$result = $this->searchViaPMC($field, $perPage, $minYear, $offset);
|
||||
} else {
|
||||
$result = $this->searchViaPubMed($field, $perPage, $minYear, $page);
|
||||
$result = $this->searchViaPubMed($field, $perPage, $minYear, $offset);
|
||||
}
|
||||
|
||||
if(!isset($result['total'])){
|
||||
@@ -45,13 +97,15 @@ class ExpertFinderService
|
||||
}
|
||||
$saveResult = $this->saveExperts($result['experts'], $field, $source);
|
||||
|
||||
$nextPage = $result['has_more'] ? $page : $fetchLog['last_page'];
|
||||
$totalPages = $result['total_pages'] ?? $fetchLog['total_pages'];
|
||||
$this->updateFetchLog($field, $source, $nextPage, $totalPages);
|
||||
// 抓到下一篇则前移一个窗口;抓完则保持当前 offset
|
||||
$nextOffset = $result['has_more'] ? ($offset + $perPage) : $offset;
|
||||
$totalPages = $result['total_pages'] ?? ($fetchLog['total_pages'] ?? 0);
|
||||
$this->updateFetchLog($field, $source, $nextOffset, $totalPages, $perPage);
|
||||
|
||||
return [
|
||||
'keyword' => $field,
|
||||
'page' => $page,
|
||||
'page' => $result['page'] ?? 1,
|
||||
'offset' => $offset,
|
||||
'experts_found' => $result['total'],
|
||||
'saved_new' => $saveResult['inserted'],
|
||||
'saved_exist' => $saveResult['existing'],
|
||||
@@ -63,10 +117,12 @@ class ExpertFinderService
|
||||
|
||||
public function searchExperts($keyword, $perPage, $minYear, $page, $source)
|
||||
{
|
||||
// 交互式按页搜索:把页码换算成偏移量后走统一的 offset 逻辑
|
||||
$retstart = max(0, (intval($page) - 1) * intval($perPage));
|
||||
if ($source === 'pmc') {
|
||||
return $this->searchViaPMC($keyword, $perPage, $minYear, $page);
|
||||
return $this->searchViaPMC($keyword, $perPage, $minYear, $retstart);
|
||||
}
|
||||
return $this->searchViaPubMed($keyword, $perPage, $minYear, $page);
|
||||
return $this->searchViaPubMed($keyword, $perPage, $minYear, $retstart);
|
||||
}
|
||||
|
||||
public function saveExperts($experts, $field, $source)
|
||||
@@ -184,14 +240,25 @@ class ExpertFinderService
|
||||
->find();
|
||||
|
||||
if (!$log) {
|
||||
return ['last_page' => 0, 'total_pages' => 0, 'last_time' => 0];
|
||||
return ['last_page' => 0, 'last_offset' => 0, 'total_pages' => 0, 'last_time' => 0];
|
||||
}
|
||||
|
||||
return $log;
|
||||
}
|
||||
|
||||
public function updateFetchLog($field, $source, $lastPage, $totalPages)
|
||||
/**
|
||||
* 回写抓取进度。
|
||||
* @param int $lastOffset 累计偏移量(权威游标)
|
||||
* @param int $totalPages 总页数(仅展示)
|
||||
* @param int $perPage 本次窗口大小,用于换算展示用 last_page
|
||||
*/
|
||||
public function updateFetchLog($field, $source, $lastOffset, $totalPages, $perPage = 0)
|
||||
{
|
||||
$lastOffset = max(0, intval($lastOffset));
|
||||
$perPage = intval($perPage);
|
||||
// last_page 仅作展示:由偏移量换算(per_page 未知时退化为偏移量本身)
|
||||
$lastPage = $perPage > 0 ? intval(floor($lastOffset / $perPage)) : $lastOffset;
|
||||
|
||||
$exists = Db::name('expert_fetch')
|
||||
->where('field', $field)
|
||||
->where('source', $source)
|
||||
@@ -201,6 +268,7 @@ class ExpertFinderService
|
||||
Db::name('expert_fetch')
|
||||
->where('expert_fetch_id', $exists['expert_fetch_id'])
|
||||
->update([
|
||||
'last_offset' => $lastOffset,
|
||||
'last_page' => $lastPage,
|
||||
'total_pages' => $totalPages,
|
||||
'last_time' => time(),
|
||||
@@ -209,6 +277,7 @@ class ExpertFinderService
|
||||
Db::name('expert_fetch')->insert([
|
||||
'field' => mb_substr($field, 0, 128),
|
||||
'source' => mb_substr($source, 0, 128),
|
||||
'last_offset' => $lastOffset,
|
||||
'last_page' => $lastPage,
|
||||
'total_pages' => $totalPages,
|
||||
'last_time' => time(),
|
||||
@@ -218,16 +287,16 @@ class ExpertFinderService
|
||||
|
||||
// ==================== PubMed Search ====================
|
||||
|
||||
private function searchViaPubMed($keyword, $perPage, $minYear, $page = 1)
|
||||
private function searchViaPubMed($keyword, $perPage, $minYear, $retstart = 0)
|
||||
{
|
||||
set_time_limit(600);
|
||||
|
||||
$searchResult = $this->esearch('pubmed', $keyword, $perPage, $minYear, $page);
|
||||
$searchResult = $this->esearch('pubmed', $keyword, $perPage, $minYear, $retstart);
|
||||
$ids = $searchResult['ids'];
|
||||
$totalArticles = $searchResult['total'];
|
||||
|
||||
if (empty($ids)) {
|
||||
return $this->buildPagedResult([], 0, 0, $totalArticles, $page, $perPage, 'pubmed');
|
||||
return $this->buildPagedResult([], 0, 0, $totalArticles, $retstart, $perPage, 'pubmed');
|
||||
}
|
||||
|
||||
$allAuthors = [];
|
||||
@@ -243,21 +312,21 @@ class ExpertFinderService
|
||||
|
||||
$experts = $this->aggregateExperts($allAuthors);
|
||||
|
||||
return $this->buildPagedResult($experts, count($experts), count($ids), $totalArticles, $page, $perPage, 'pubmed');
|
||||
return $this->buildPagedResult($experts, count($experts), count($ids), $totalArticles, $retstart, $perPage, 'pubmed');
|
||||
}
|
||||
|
||||
// ==================== PMC Search ====================
|
||||
|
||||
private function searchViaPMC($keyword, $perPage, $minYear, $page = 1)
|
||||
private function searchViaPMC($keyword, $perPage, $minYear, $retstart = 0)
|
||||
{
|
||||
set_time_limit(600);
|
||||
|
||||
$searchResult = $this->esearch('pmc', $keyword, $perPage, $minYear, $page);
|
||||
$searchResult = $this->esearch('pmc', $keyword, $perPage, $minYear, $retstart);
|
||||
$ids = $searchResult['ids'];
|
||||
$totalArticles = $searchResult['total'];
|
||||
|
||||
if (empty($ids)) {
|
||||
return $this->buildPagedResult([], 0, 0, $totalArticles, $page, $perPage, 'pmc');
|
||||
return $this->buildPagedResult([], 0, 0, $totalArticles, $retstart, $perPage, 'pmc');
|
||||
}
|
||||
|
||||
$allAuthors = [];
|
||||
@@ -273,15 +342,15 @@ class ExpertFinderService
|
||||
|
||||
$experts = $this->aggregateExperts($allAuthors);
|
||||
|
||||
return $this->buildPagedResult($experts, count($experts), count($ids), $totalArticles, $page, $perPage, 'pmc');
|
||||
return $this->buildPagedResult($experts, count($experts), count($ids), $totalArticles, $retstart, $perPage, 'pmc');
|
||||
}
|
||||
|
||||
// ==================== NCBI API ====================
|
||||
|
||||
private function esearch($db, $keyword, $perPage, $minYear, $page = 1)
|
||||
private function esearch($db, $keyword, $perPage, $minYear, $retstart = 0)
|
||||
{
|
||||
$term = $keyword . ' AND ' . $minYear . ':' . date('Y') . '[pdat]';
|
||||
$retstart = ($page - 1) * $perPage;
|
||||
$retstart = max(0, intval($retstart));
|
||||
|
||||
$response = $this->httpClient->get($this->ncbiBaseUrl . 'esearch.fcgi', [
|
||||
'query' => [
|
||||
@@ -563,18 +632,23 @@ class ExpertFinderService
|
||||
return $experts;
|
||||
}
|
||||
|
||||
private function buildPagedResult($experts, $expertCount, $articlesScanned, $totalArticles, $page, $perPage, $source)
|
||||
private function buildPagedResult($experts, $expertCount, $articlesScanned, $totalArticles, $retstart, $perPage, $source)
|
||||
{
|
||||
$perPage = max(1, intval($perPage));
|
||||
$retstart = max(0, intval($retstart));
|
||||
$totalPages = $totalArticles > 0 ? ceil($totalArticles / $perPage) : 0;
|
||||
$page = intval(floor($retstart / $perPage)) + 1;
|
||||
return [
|
||||
'experts' => $experts,
|
||||
'total' => $expertCount,
|
||||
'articles_scanned' => $articlesScanned,
|
||||
'total_articles' => $totalArticles,
|
||||
'page' => $page,
|
||||
'offset' => $retstart,
|
||||
'per_page' => $perPage,
|
||||
'total_pages' => $totalPages,
|
||||
'has_more' => $page < $totalPages,
|
||||
// 偏移量驱动:下一个窗口还在范围内才有更多
|
||||
'has_more' => ($retstart + $perPage) < $totalArticles,
|
||||
'source' => $source,
|
||||
];
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
namespace app\common;
|
||||
use think\Db;
|
||||
use think\Env;
|
||||
use app\common\CrossrefService;
|
||||
class ProductionArticleRefer
|
||||
{
|
||||
|
||||
@@ -78,6 +79,41 @@ class ProductionArticleRefer
|
||||
return json_encode(['status' => 4,'msg' => 'Reference DOI is empty'.json_encode($aParam)]);
|
||||
}
|
||||
|
||||
|
||||
//开始用crossref接口的方式处理数据
|
||||
$doiNorm = preg_replace('#^https?://(dx\.)?doi\.org/#i', '', $aRefer['refer_doi']);
|
||||
$doiNorm = trim($doiNorm, " \t\n\r\0\x0B/");
|
||||
|
||||
$svc = new CrossrefService([
|
||||
'mailto' => trim((string)Env::get('crossref_mailto', '')),
|
||||
]);
|
||||
$summary = $svc->fetchWorkSummary($doiNorm);
|
||||
if ($summary !== null && !empty($summary['doi'])) {
|
||||
$update_a = [];
|
||||
$title = trim((string)($summary['title'] ?? ''));
|
||||
$jouraRaw = trim((string)($summary['joura'] ?? ''));
|
||||
// 姓全写 + 名首字母,超过 3 个作者取前 3 个 + et al
|
||||
$authorCitation = $svc->getAuthorsCitation($summary['raw'] ?? [], 3);
|
||||
$dateno = trim((string)($summary['dateno'] ?? ''));
|
||||
$doilink = trim((string)($summary['doilink'] ?? ''));
|
||||
$update_a['title'] = $title;
|
||||
$update_a['author'] = $authorCitation !== '' ? $authorCitation . '.' : '';
|
||||
$update_a['joura'] = $jouraRaw;
|
||||
$update_a['dateno'] = $dateno;
|
||||
$update_a['refer_type'] = "journal";
|
||||
$update_a['is_ja'] = 1;
|
||||
$update_a['doilink'] = $doilink;
|
||||
$update_a['cs'] = 1;
|
||||
$update_a['update_time'] = time();
|
||||
$update_a['is_deal'] = 1;
|
||||
Db::name('production_article_refer')->where(['p_refer_id' => $iPReferId])->limit(1)->update($update_a);
|
||||
return json_encode(['status' => 1,'msg' => 'Update successful']);
|
||||
}
|
||||
|
||||
//结束---用crossref接口的方式处理数据
|
||||
|
||||
|
||||
|
||||
//数据处理
|
||||
$doi = str_replace('/', '%2F', $aRefer['refer_doi']);
|
||||
$url = "https://citation.doi.org/format?doi=$doi&style=cancer-translational-medicine&lang=en-US";
|
||||
|
||||
@@ -253,9 +253,13 @@ class PromotionService
|
||||
'send_time' => $now,
|
||||
]);
|
||||
Db::name('journal_email')->where('j_email_id', $account['j_email_id'])->setInc('today_sent');
|
||||
// 仅外部 expert 库回写最近一次推广时间;内部 user 用 promotion_email_log.send_time 计频次
|
||||
// 仅外部 expert 库回写最近一次推广时间与累计推广次数;内部 user 用 promotion_email_log.send_time 计频次
|
||||
if ($audienceKind === 'expert' && intval($expert['expert_id']) > 0) {
|
||||
Db::name('expert')->where('expert_id', $expert['expert_id'])->update(['state' => 1, 'ltime' => $now]);
|
||||
Db::name('expert')->where('expert_id', $expert['expert_id'])->update([
|
||||
'state' => 1,
|
||||
'ltime' => $now,
|
||||
'times' => Db::raw('times+1'),
|
||||
]);
|
||||
}
|
||||
Db::name('promotion_task')->where('task_id', $taskId)->setInc('sent_count');
|
||||
} else {
|
||||
|
||||
@@ -60,7 +60,8 @@ class PubmedService
|
||||
$pmid = trim($pmid);
|
||||
if ($pmid === '') return null;
|
||||
|
||||
$cacheKey = 'pmid_' . $pmid;
|
||||
// v2:解析结果新增 journal_iso_abbr / journal_medline_ta,换 key 避免命中旧缓存
|
||||
$cacheKey = 'pmid_v2_' . $pmid;
|
||||
$cached = $this->cacheGet($cacheKey, 30 * 86400);
|
||||
if (is_array($cached)) return $cached;
|
||||
|
||||
@@ -96,6 +97,22 @@ class PubmedService
|
||||
return $info;
|
||||
}
|
||||
|
||||
/**
|
||||
* DOI -> 期刊规范缩写(NLM/ISO 形式,如 "J Clin Oncol")
|
||||
* 优先 ISOAbbreviation,回退 MedlineTA;查不到返回 null。
|
||||
*/
|
||||
public function journalAbbrByDoi(string $doi): ?string
|
||||
{
|
||||
$info = $this->fetchByDoi($doi);
|
||||
if (!is_array($info)) return null;
|
||||
|
||||
$abbr = trim((string)($info['journal_iso_abbr'] ?? ''));
|
||||
if ($abbr === '') {
|
||||
$abbr = trim((string)($info['journal_medline_ta'] ?? ''));
|
||||
}
|
||||
return $abbr !== '' ? $abbr : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* 按书目信息检索 PubMed(标题 + 第一作者 + 年份)
|
||||
*/
|
||||
@@ -224,6 +241,9 @@ class PubmedService
|
||||
$pubTypes = array_values(array_unique($pubTypes));
|
||||
|
||||
$journal = $this->xpText($xp, '//PubmedArticle//Journal//Title');
|
||||
// 期刊规范缩写:ISOAbbreviation(Journal 下)与 MedlineTA(MedlineJournalInfo 下)
|
||||
$journalIsoAbbr = $this->xpText($xp, '//PubmedArticle//Journal//ISOAbbreviation');
|
||||
$journalMedlineTa = $this->xpText($xp, '//PubmedArticle//MedlineJournalInfo//MedlineTA');
|
||||
|
||||
$year = '';
|
||||
$year = $this->xpText($xp, '//PubmedArticle//JournalIssue//PubDate//Year');
|
||||
@@ -244,6 +264,8 @@ class PubmedService
|
||||
'mesh_terms' => $mesh,
|
||||
'publication_types' => $pubTypes,
|
||||
'journal' => $journal,
|
||||
'journal_iso_abbr' => $journalIsoAbbr,
|
||||
'journal_medline_ta' => $journalMedlineTa,
|
||||
'year' => $year,
|
||||
];
|
||||
}
|
||||
|
||||
219
application/common/service/LocalModelService.php
Normal file
219
application/common/service/LocalModelService.php
Normal file
@@ -0,0 +1,219 @@
|
||||
<?php
|
||||
|
||||
namespace app\common\service;
|
||||
|
||||
use think\Env;
|
||||
|
||||
/**
|
||||
* 本地模型服务:按上下文长度自动选择模型
|
||||
*
|
||||
* - 短上下文 -> 小模型(显存为大模型一半),对应 base.model_url1
|
||||
* - 长上下文 -> 大模型,对应 base.model_url
|
||||
*
|
||||
* 选择规则:上下文字符数 <= 阈值 用小模型;超过阈值 用大模型。
|
||||
* 两个端点模型名相同(base.model)。
|
||||
*
|
||||
* 用法:
|
||||
* $svc = new LocalModelService();
|
||||
* $res = $svc->chat([
|
||||
* ['role' => 'system', 'content' => '...'],
|
||||
* ['role' => 'user', 'content' => '...'],
|
||||
* ]);
|
||||
* // $res['ok'], $res['content'], $res['tier'](small|large), $res['context_len']
|
||||
*
|
||||
* // 只要文本结果:
|
||||
* $text = $svc->complete($systemPrompt, $userPrompt);
|
||||
*/
|
||||
class LocalModelService
|
||||
{
|
||||
/** 上下文长度阈值(字符数):<= 用小模型,> 用大模型 */
|
||||
const CONTEXT_THRESHOLD = 1000;
|
||||
|
||||
/** 请求超时(秒) */
|
||||
const TIMEOUT = 120;
|
||||
|
||||
/** 小模型端点(短上下文,显存一半) */
|
||||
private $smallUrl;
|
||||
|
||||
/** 大模型端点(长上下文) */
|
||||
private $largeUrl;
|
||||
|
||||
/** 模型名(两端点相同) */
|
||||
private $model;
|
||||
|
||||
/** 上下文长度阈值(字符数) */
|
||||
private $threshold;
|
||||
|
||||
public function __construct()
|
||||
{
|
||||
// 小模型 -> base.model_url1,大模型 -> base.model_url,模型名同为 base.model
|
||||
$this->smallUrl = $this->normalizeChatUrl((string)Env::get('base.model_url1', ''));
|
||||
$this->largeUrl = $this->normalizeChatUrl((string)Env::get('base.model_url', ''));
|
||||
$this->model = trim((string)Env::get('base.model', ''));
|
||||
$this->threshold = self::CONTEXT_THRESHOLD;
|
||||
}
|
||||
|
||||
/**
|
||||
* 发起一次对话,按上下文长度自动选模型。
|
||||
*
|
||||
* @param array $messages OpenAI 格式 messages
|
||||
* @param array $options 可选:
|
||||
* - temperature (float, 默认 0.2)
|
||||
* - max_tokens (int, 可选)
|
||||
* - force_tier ('small'|'large') 强制指定模型,跳过长度判断
|
||||
* - extra (array) 透传到请求体的额外字段
|
||||
* @return array{ok:bool, content:string, tier:string, model:string, url:string, context_len:int, error:string}
|
||||
*/
|
||||
public function chat(array $messages, array $options = [])
|
||||
{
|
||||
$contextLen = $this->measureMessages($messages);
|
||||
|
||||
$tier = isset($options['force_tier']) && in_array($options['force_tier'], ['small', 'large'], true)
|
||||
? $options['force_tier']
|
||||
: $this->pickTier($contextLen);
|
||||
|
||||
$endpoint = $this->resolveEndpoint($tier);
|
||||
|
||||
$result = [
|
||||
'ok' => false,
|
||||
'content' => '',
|
||||
'tier' => $tier,
|
||||
'model' => $endpoint['model'],
|
||||
'url' => $endpoint['url'],
|
||||
'context_len' => $contextLen,
|
||||
'error' => '',
|
||||
];
|
||||
|
||||
if ($endpoint['url'] === '' || $endpoint['model'] === '') {
|
||||
$result['error'] = $tier . ' 模型未配置(检查 .env [base] model_url / model_url1 / model)';
|
||||
return $result;
|
||||
}
|
||||
|
||||
$payload = [
|
||||
'model' => $endpoint['model'],
|
||||
'temperature' => isset($options['temperature']) ? (float)$options['temperature'] : 0.2,
|
||||
'messages' => $messages,
|
||||
];
|
||||
if (isset($options['max_tokens']) && intval($options['max_tokens']) > 0) {
|
||||
$payload['max_tokens'] = intval($options['max_tokens']);
|
||||
}
|
||||
if (isset($options['extra']) && is_array($options['extra'])) {
|
||||
$payload = array_merge($payload, $options['extra']);
|
||||
}
|
||||
|
||||
$content = $this->postChat($endpoint['url'], $payload, $err);
|
||||
if ($content === null) {
|
||||
$result['error'] = $err !== '' ? $err : 'LLM 请求失败';
|
||||
return $result;
|
||||
}
|
||||
|
||||
$result['ok'] = true;
|
||||
$result['content'] = $content;
|
||||
return $result;
|
||||
}
|
||||
|
||||
/**
|
||||
* 便捷方法:传 system + user,返回纯文本内容(失败返回空字符串)。
|
||||
*/
|
||||
public function complete($systemPrompt, $userPrompt, array $options = [])
|
||||
{
|
||||
$messages = [];
|
||||
if (trim((string)$systemPrompt) !== '') {
|
||||
$messages[] = ['role' => 'system', 'content' => (string)$systemPrompt];
|
||||
}
|
||||
$messages[] = ['role' => 'user', 'content' => (string)$userPrompt];
|
||||
|
||||
$res = $this->chat($messages, $options);
|
||||
return $res['ok'] ? $res['content'] : '';
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据上下文长度选择 tier。
|
||||
*/
|
||||
public function pickTier($contextLen)
|
||||
{
|
||||
return $contextLen > $this->threshold ? 'large' : 'small';
|
||||
}
|
||||
|
||||
/**
|
||||
* 统计 messages 的上下文长度(所有 content 字符数之和)。
|
||||
*/
|
||||
public function measureMessages(array $messages)
|
||||
{
|
||||
$len = 0;
|
||||
foreach ($messages as $m) {
|
||||
if (isset($m['content']) && is_string($m['content'])) {
|
||||
$len += mb_strlen($m['content']);
|
||||
}
|
||||
}
|
||||
return $len;
|
||||
}
|
||||
|
||||
/**
|
||||
* 返回某 tier 的端点配置(模型名两端点相同)。
|
||||
*/
|
||||
private function resolveEndpoint($tier)
|
||||
{
|
||||
$url = $tier === 'large' ? $this->largeUrl : $this->smallUrl;
|
||||
return ['url' => $url, 'model' => $this->model];
|
||||
}
|
||||
|
||||
private function postChat($url, array $payload, &$err = '')
|
||||
{
|
||||
$err = '';
|
||||
$ch = curl_init();
|
||||
curl_setopt($ch, CURLOPT_URL, $url);
|
||||
curl_setopt($ch, CURLOPT_POST, true);
|
||||
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($payload, JSON_UNESCAPED_UNICODE));
|
||||
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
|
||||
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 15);
|
||||
curl_setopt($ch, CURLOPT_TIMEOUT, self::TIMEOUT);
|
||||
|
||||
$headers = ['Content-Type: application/json'];
|
||||
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
|
||||
|
||||
$raw = curl_exec($ch);
|
||||
if ($raw === false) {
|
||||
$err = 'curl error: ' . curl_error($ch);
|
||||
curl_close($ch);
|
||||
return null;
|
||||
}
|
||||
$httpCode = intval(curl_getinfo($ch, CURLINFO_HTTP_CODE));
|
||||
curl_close($ch);
|
||||
|
||||
if ($httpCode < 200 || $httpCode >= 300) {
|
||||
$err = 'http ' . $httpCode . ': ' . mb_substr((string)$raw, 0, 300);
|
||||
return null;
|
||||
}
|
||||
|
||||
$data = json_decode($raw, true);
|
||||
if (!is_array($data)) {
|
||||
$err = 'invalid json response';
|
||||
return null;
|
||||
}
|
||||
if (isset($data['choices'][0]['message']['content'])) {
|
||||
return (string)$data['choices'][0]['message']['content'];
|
||||
}
|
||||
if (isset($data['content'])) {
|
||||
return (string)$data['content'];
|
||||
}
|
||||
$err = 'no content in response: ' . mb_substr((string)$raw, 0, 300);
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* 根地址自动补 /v1/chat/completions。
|
||||
*/
|
||||
private function normalizeChatUrl($url)
|
||||
{
|
||||
$url = trim((string)$url);
|
||||
if ($url === '') {
|
||||
return '';
|
||||
}
|
||||
if (stripos($url, 'chat/completions') !== false) {
|
||||
return $url;
|
||||
}
|
||||
return rtrim($url, '/') . '/v1/chat/completions';
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user