From 738ffa847fc518eb13d81fe129c400a5413be69b Mon Sep 17 00:00:00 2001 From: wangjinlei <751475802@qq.com> Date: Mon, 8 Jun 2026 17:18:55 +0800 Subject: [PATCH] =?UTF-8?q?=E6=80=BB=E7=BB=93expert=E9=A2=86=E5=9F=9F?= =?UTF-8?q?=E7=9A=84=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- application/common/ExpertFieldAiService.php | 190 +++++++-------- .../common/service/LocalModelService.php | 219 ++++++++++++++++++ sql/add_field_ai_source_to_expert.sql | 3 + sql/patch_expert_field_ai_columns.php | 46 ++++ 4 files changed, 358 insertions(+), 100 deletions(-) create mode 100644 application/common/service/LocalModelService.php create mode 100644 sql/add_field_ai_source_to_expert.sql create mode 100644 sql/patch_expert_field_ai_columns.php diff --git a/application/common/ExpertFieldAiService.php b/application/common/ExpertFieldAiService.php index 55cbc6ab..34637a6b 100644 --- a/application/common/ExpertFieldAiService.php +++ b/application/common/ExpertFieldAiService.php @@ -2,6 +2,7 @@ namespace app\common; +use app\common\service\LocalModelService; use think\Db; use think\Env; use think\Exception; @@ -27,9 +28,17 @@ class ExpertFieldAiService private $logFile; + /** @var bool|null */ + private static $schemaReady = null; + public function __construct() { $this->logFile = ROOT_PATH . 'runtime' . DS . 'expert_field_ai.log'; + try { + $this->ensureSchema(); + } catch (\Throwable $e) { + $this->log('[ExpertFieldAi] ensureSchema fail: ' . $e->getMessage()); + } } // ===================== 链式队列 ===================== @@ -366,10 +375,16 @@ class ExpertFieldAiService $papers = array_slice($papers, 0, $maxPapers); $searchKeywords = array_values(array_unique(array_filter($searchKeywords))); - $countryName = ''; - $countryId = intval($expert['country_id'] ?? 0); - if ($countryId > 0) { - $countryName = (string)Db::name('country')->where('country_id', $countryId)->value('title'); + // t_expert.country 已存国家英文名,无需再查 country 表 + $countryName = trim((string)($expert['country'] ?? '')); + if ($countryName === '') { + $countryId = intval($expert['country_id'] ?? 0); + if ($countryId > 0) { + $row = Db::name('country')->where('country_id', $countryId)->find(); + if ($row) { + $countryName = (string)($row['en_name'] ?? ($row['zh_name'] ?? '')); + } + } } return [ @@ -453,69 +468,27 @@ class ExpertFieldAiService private function summarizeWithLlm(array $context) { - $url = $this->resolveLlmChatUrl(); - $model = $this->resolveLlmModel(); - $apiKey = trim((string)Env::get( - 'expert_field_ai.chat_api_key', - Env::get('user_field_ai.chat_api_key', Env::get('expert_country_chat_api_key', Env::get('citation_chat_api_key', ''))) - )); - - if ($url === '' || $model === '') { - throw new Exception('LLM not configured (set base.model_url / expert_field_ai.chat_model)'); - } - $payloadJson = json_encode($context, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES); - $messages = [ - [ - 'role' => 'system', - 'content' => '你是学术领域分类助手。根据专家的单位、论文标题与 PubMed 检索上下文,用简体中文总结该专家最主要的研究领域。' - . '注意:search_keywords 只是检索词,不可直接当作领域结论,应结合 paper 标题与 affiliation 判断。' - . '要求:精确、简洁,1~3 个中文领域词或短短语,用顿号分隔;不要解释、不要英文。' - . '只输出 JSON:{"field_ai":"..."}。', - ], - [ - 'role' => 'user', - 'content' => "请根据以下 JSON 资料总结该专家的主要研究领域:\n" . $payloadJson, - ], - ]; + $systemPrompt = '你是学术领域分类助手。根据专家的单位、论文标题与 PubMed 检索上下文,用简体中文总结该专家最主要的研究领域。' + . '注意:search_keywords 只是检索词,不可直接当作领域结论,应结合 paper 标题与 affiliation 判断。' + . '要求:精确、简洁,1~3 个中文领域词或短短语,用顿号分隔;不要解释、不要英文。' + . '只输出 JSON:{"field_ai":"..."}。'; + $userPrompt = "请根据以下 JSON 资料总结该专家的主要研究领域:\n" . $payloadJson; - $body = [ - 'model' => $model, - 'temperature' => 0.2, - 'messages' => $messages, - ]; + // 按上下文长度动态选模型(小: base.model_url1 / 大: base.model_url) + $svc = new LocalModelService(); + $res = $svc->chat([ + ['role' => 'system', 'content' => $systemPrompt], + ['role' => 'user', 'content' => $userPrompt], + ], ['temperature' => 0.2]); - $ch = curl_init(); - curl_setopt_array($ch, [ - CURLOPT_URL => $url, - CURLOPT_POST => true, - CURLOPT_POSTFIELDS => json_encode($body, JSON_UNESCAPED_UNICODE), - CURLOPT_RETURNTRANSFER => true, - CURLOPT_CONNECTTIMEOUT => 15, - CURLOPT_TIMEOUT => max(30, (int)Env::get('expert_field_ai.timeout', Env::get('user_field_ai.timeout', 90))), - CURLOPT_HTTPHEADER => array_filter([ - 'Content-Type: application/json', - $apiKey !== '' ? 'Authorization: Bearer ' . $apiKey : null, - ]), - ]); - $raw = curl_exec($ch); - $code = (int)curl_getinfo($ch, CURLINFO_HTTP_CODE); - $err = curl_error($ch); - curl_close($ch); - - if ($raw === false) { - throw new Exception('LLM curl error: ' . $err); - } - if ($code < 200 || $code >= 300) { - throw new Exception('LLM HTTP ' . $code . ': ' . mb_substr((string)$raw, 0, 400)); + if (empty($res['ok'])) { + throw new Exception('LLM error: ' . (string)($res['error'] ?? 'unknown')); } - $data = json_decode($raw, true); - $content = ''; - if (is_array($data) && isset($data['choices'][0]['message']['content'])) { - $content = trim((string)$data['choices'][0]['message']['content']); - } + $this->log('[ExpertFieldAi] llm tier=' . ($res['tier'] ?? '') . ' ctx_len=' . ($res['context_len'] ?? 0) . ' url=' . ($res['url'] ?? '')); + $content = trim((string)($res['content'] ?? '')); $fieldAi = $this->parseFieldAiFromContent($content); if ($fieldAi === '' && $content !== '') { $fieldAi = $this->cleanFieldAiText($content); @@ -523,44 +496,6 @@ class ExpertFieldAiService return $fieldAi; } - private function resolveLlmChatUrl() - { - $candidates = [ -// Env::get('expert_field_ai.chat_url', ''), -// Env::get('user_field_ai.chat_url', ''), - Env::get('base.model_url1', ''), - ]; - foreach ($candidates as $u) { - $u = trim((string)$u); - if ($u === '') { - continue; - } - if (stripos($u, 'chat/completions') !== false) { - return $u; - } - return rtrim($u, '/') . '/v1/chat/completions'; - } - return ''; - } - - private function resolveLlmModel() - { - $candidates = [ - Env::get('expert_field_ai.chat_model', ''), - Env::get('user_field_ai.chat_model', ''), - Env::get('base.model', ''), - Env::get('expert_country_chat_model', ''), - 'gpt-4.1', - ]; - foreach ($candidates as $m) { - $m = trim((string)$m); - if ($m !== '' && strtolower($m) !== 'your-model-name') { - return $m; - } - } - return ''; - } - private function parseFieldAiFromContent($content) { $content = trim((string)$content); @@ -637,18 +572,73 @@ class ExpertFieldAiService private function updateFieldAi($expertId, $fieldAi, $status, $source, $note) { + $this->ensureSchema(); + $data = [ 'field_ai' => mb_substr(trim((string)$fieldAi), 0, 512), 'field_ai_status' => intval($status), 'field_ai_utime' => time(), - 'field_ai_source' => mb_substr(trim((string)$source), 0, 32), ]; + if ($this->hasColumn('field_ai_source')) { + $data['field_ai_source'] = mb_substr(trim((string)$source), 0, 32); + } + Db::name('expert')->where('expert_id', intval($expertId))->update($data); if ($note !== '') { $this->log('[ExpertFieldAi] expert_id=' . $expertId . ' status=' . $status . ' note=' . $note); } } + /** + * 自动补全 t_expert 上缺失的 field_ai 字段(可重复执行)。 + */ + public function ensureSchema() + { + if (self::$schemaReady === true) { + return; + } + + $table = config('database.prefix') . 'expert'; + $columns = Db::query('SHOW COLUMNS FROM `' . $table . '`'); + $existing = []; + foreach ($columns as $col) { + $existing[$col['Field']] = true; + } + + $alters = []; + if (!isset($existing['field_ai'])) { + $alters[] = "ADD COLUMN `field_ai` VARCHAR(512) NOT NULL DEFAULT '' COMMENT 'AI总结的主要研究领域(中文)' AFTER `affiliation`"; + $existing['field_ai'] = true; + } + if (!isset($existing['field_ai_status'])) { + $alters[] = "ADD COLUMN `field_ai_status` TINYINT NOT NULL DEFAULT 0 COMMENT '0待处理 1已生成 2资料不足 3失败 4无user待AI' AFTER `field_ai`"; + $existing['field_ai_status'] = true; + } + if (!isset($existing['field_ai_utime'])) { + $alters[] = "ADD COLUMN `field_ai_utime` INT NOT NULL DEFAULT 0 COMMENT 'field_ai更新时间' AFTER `field_ai_status`"; + $existing['field_ai_utime'] = true; + } + if (!isset($existing['field_ai_source'])) { + $alters[] = "ADD COLUMN `field_ai_source` VARCHAR(32) NOT NULL DEFAULT '' COMMENT '来源: user_link / ai' AFTER `field_ai_utime`"; + $existing['field_ai_source'] = true; + } + + if (!empty($alters)) { + Db::execute('ALTER TABLE `' . $table . '` ' . implode(', ', $alters)); + $this->log('[ExpertFieldAi] schema patched: ' . implode('; ', $alters)); + } + + self::$schemaReady = true; + } + + private function hasColumn($column) + { + $this->ensureSchema(); + $table = config('database.prefix') . 'expert'; + $columns = Db::query('SHOW COLUMNS FROM `' . $table . '` LIKE \'' . addslashes($column) . '\''); + return !empty($columns); + } + public function statusLabel($status) { $map = [ diff --git a/application/common/service/LocalModelService.php b/application/common/service/LocalModelService.php new file mode 100644 index 00000000..955bd98c --- /dev/null +++ b/application/common/service/LocalModelService.php @@ -0,0 +1,219 @@ + 小模型(显存为大模型一半),对应 base.model_url1 + * - 长上下文 -> 大模型,对应 base.model_url + * + * 选择规则:上下文字符数 <= 阈值 用小模型;超过阈值 用大模型。 + * 两个端点模型名相同(base.model)。 + * + * 用法: + * $svc = new LocalModelService(); + * $res = $svc->chat([ + * ['role' => 'system', 'content' => '...'], + * ['role' => 'user', 'content' => '...'], + * ]); + * // $res['ok'], $res['content'], $res['tier'](small|large), $res['context_len'] + * + * // 只要文本结果: + * $text = $svc->complete($systemPrompt, $userPrompt); + */ +class LocalModelService +{ + /** 上下文长度阈值(字符数):<= 用小模型,> 用大模型 */ + const CONTEXT_THRESHOLD = 3000; + + /** 请求超时(秒) */ + const TIMEOUT = 120; + + /** 小模型端点(短上下文,显存一半) */ + private $smallUrl; + + /** 大模型端点(长上下文) */ + private $largeUrl; + + /** 模型名(两端点相同) */ + private $model; + + /** 上下文长度阈值(字符数) */ + private $threshold; + + public function __construct() + { + // 小模型 -> base.model_url1,大模型 -> base.model_url,模型名同为 base.model + $this->smallUrl = $this->normalizeChatUrl((string)Env::get('base.model_url1', '')); + $this->largeUrl = $this->normalizeChatUrl((string)Env::get('base.model_url', '')); + $this->model = trim((string)Env::get('base.model', '')); + $this->threshold = self::CONTEXT_THRESHOLD; + } + + /** + * 发起一次对话,按上下文长度自动选模型。 + * + * @param array $messages OpenAI 格式 messages + * @param array $options 可选: + * - temperature (float, 默认 0.2) + * - max_tokens (int, 可选) + * - force_tier ('small'|'large') 强制指定模型,跳过长度判断 + * - extra (array) 透传到请求体的额外字段 + * @return array{ok:bool, content:string, tier:string, model:string, url:string, context_len:int, error:string} + */ + public function chat(array $messages, array $options = []) + { + $contextLen = $this->measureMessages($messages); + + $tier = isset($options['force_tier']) && in_array($options['force_tier'], ['small', 'large'], true) + ? $options['force_tier'] + : $this->pickTier($contextLen); + + $endpoint = $this->resolveEndpoint($tier); + + $result = [ + 'ok' => false, + 'content' => '', + 'tier' => $tier, + 'model' => $endpoint['model'], + 'url' => $endpoint['url'], + 'context_len' => $contextLen, + 'error' => '', + ]; + + if ($endpoint['url'] === '' || $endpoint['model'] === '') { + $result['error'] = $tier . ' 模型未配置(检查 .env [base] model_url / model_url1 / model)'; + return $result; + } + + $payload = [ + 'model' => $endpoint['model'], + 'temperature' => isset($options['temperature']) ? (float)$options['temperature'] : 0.2, + 'messages' => $messages, + ]; + if (isset($options['max_tokens']) && intval($options['max_tokens']) > 0) { + $payload['max_tokens'] = intval($options['max_tokens']); + } + if (isset($options['extra']) && is_array($options['extra'])) { + $payload = array_merge($payload, $options['extra']); + } + + $content = $this->postChat($endpoint['url'], $payload, $err); + if ($content === null) { + $result['error'] = $err !== '' ? $err : 'LLM 请求失败'; + return $result; + } + + $result['ok'] = true; + $result['content'] = $content; + return $result; + } + + /** + * 便捷方法:传 system + user,返回纯文本内容(失败返回空字符串)。 + */ + public function complete($systemPrompt, $userPrompt, array $options = []) + { + $messages = []; + if (trim((string)$systemPrompt) !== '') { + $messages[] = ['role' => 'system', 'content' => (string)$systemPrompt]; + } + $messages[] = ['role' => 'user', 'content' => (string)$userPrompt]; + + $res = $this->chat($messages, $options); + return $res['ok'] ? $res['content'] : ''; + } + + /** + * 根据上下文长度选择 tier。 + */ + public function pickTier($contextLen) + { + return $contextLen > $this->threshold ? 'large' : 'small'; + } + + /** + * 统计 messages 的上下文长度(所有 content 字符数之和)。 + */ + public function measureMessages(array $messages) + { + $len = 0; + foreach ($messages as $m) { + if (isset($m['content']) && is_string($m['content'])) { + $len += mb_strlen($m['content']); + } + } + return $len; + } + + /** + * 返回某 tier 的端点配置(模型名两端点相同)。 + */ + private function resolveEndpoint($tier) + { + $url = $tier === 'large' ? $this->largeUrl : $this->smallUrl; + return ['url' => $url, 'model' => $this->model]; + } + + private function postChat($url, array $payload, &$err = '') + { + $err = ''; + $ch = curl_init(); + curl_setopt($ch, CURLOPT_URL, $url); + curl_setopt($ch, CURLOPT_POST, true); + curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($payload, JSON_UNESCAPED_UNICODE)); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); + curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 15); + curl_setopt($ch, CURLOPT_TIMEOUT, self::TIMEOUT); + + $headers = ['Content-Type: application/json']; + curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); + + $raw = curl_exec($ch); + if ($raw === false) { + $err = 'curl error: ' . curl_error($ch); + curl_close($ch); + return null; + } + $httpCode = intval(curl_getinfo($ch, CURLINFO_HTTP_CODE)); + curl_close($ch); + + if ($httpCode < 200 || $httpCode >= 300) { + $err = 'http ' . $httpCode . ': ' . mb_substr((string)$raw, 0, 300); + return null; + } + + $data = json_decode($raw, true); + if (!is_array($data)) { + $err = 'invalid json response'; + return null; + } + if (isset($data['choices'][0]['message']['content'])) { + return (string)$data['choices'][0]['message']['content']; + } + if (isset($data['content'])) { + return (string)$data['content']; + } + $err = 'no content in response: ' . mb_substr((string)$raw, 0, 300); + return null; + } + + /** + * 根地址自动补 /v1/chat/completions。 + */ + private function normalizeChatUrl($url) + { + $url = trim((string)$url); + if ($url === '') { + return ''; + } + if (stripos($url, 'chat/completions') !== false) { + return $url; + } + return rtrim($url, '/') . '/v1/chat/completions'; + } +} diff --git a/sql/add_field_ai_source_to_expert.sql b/sql/add_field_ai_source_to_expert.sql new file mode 100644 index 00000000..511beb5c --- /dev/null +++ b/sql/add_field_ai_source_to_expert.sql @@ -0,0 +1,3 @@ +-- 若已执行过 add_field_ai_to_expert.sql 但缺少 field_ai_source,单独补这一列 +ALTER TABLE `t_expert` + ADD COLUMN `field_ai_source` VARCHAR(32) NOT NULL DEFAULT '' COMMENT '来源: user_link / ai' AFTER `field_ai_utime`; diff --git a/sql/patch_expert_field_ai_columns.php b/sql/patch_expert_field_ai_columns.php new file mode 100644 index 00000000..d13d596a --- /dev/null +++ b/sql/patch_expert_field_ai_columns.php @@ -0,0 +1,46 @@ + PDO::ERRMODE_EXCEPTION, +]); + +$table = $config['prefix'] . 'expert'; +$cols = $pdo->query("SHOW COLUMNS FROM `{$table}`")->fetchAll(PDO::FETCH_COLUMN, 0); +$colSet = array_flip($cols); + +$alters = []; +if (!isset($colSet['field_ai'])) { + $alters[] = "ADD COLUMN `field_ai` VARCHAR(512) NOT NULL DEFAULT '' COMMENT 'AI总结的主要研究领域(中文)' AFTER `affiliation`"; +} +if (!isset($colSet['field_ai_status'])) { + $after = isset($colSet['field_ai']) || !empty($alters) ? 'field_ai' : 'affiliation'; + $alters[] = "ADD COLUMN `field_ai_status` TINYINT NOT NULL DEFAULT 0 COMMENT '0待处理 1已生成 2资料不足 3失败 4无user待AI' AFTER `{$after}`"; +} +if (!isset($colSet['field_ai_utime'])) { + $alters[] = "ADD COLUMN `field_ai_utime` INT NOT NULL DEFAULT 0 COMMENT 'field_ai更新时间' AFTER `field_ai_status`"; +} +if (!isset($colSet['field_ai_source'])) { + $alters[] = "ADD COLUMN `field_ai_source` VARCHAR(32) NOT NULL DEFAULT '' COMMENT '来源: user_link / ai' AFTER `field_ai_utime`"; +} + +if (empty($alters)) { + echo "OK: all field_ai columns exist on {$table}\n"; + exit(0); +} + +$sql = "ALTER TABLE `{$table}` " . implode(', ', $alters); +echo "Running: {$sql}\n"; +$pdo->exec($sql); +echo "Done.\n";