总结expert领域的功能

This commit is contained in:
wangjinlei
2026-06-08 17:18:55 +08:00
parent 9cfa2fccc3
commit 738ffa847f
4 changed files with 358 additions and 100 deletions

View File

@@ -2,6 +2,7 @@
namespace app\common;
use app\common\service\LocalModelService;
use think\Db;
use think\Env;
use think\Exception;
@@ -27,9 +28,17 @@ class ExpertFieldAiService
private $logFile;
/** @var bool|null */
private static $schemaReady = null;
public function __construct()
{
$this->logFile = ROOT_PATH . 'runtime' . DS . 'expert_field_ai.log';
try {
$this->ensureSchema();
} catch (\Throwable $e) {
$this->log('[ExpertFieldAi] ensureSchema fail: ' . $e->getMessage());
}
}
// ===================== 链式队列 =====================
@@ -366,10 +375,16 @@ class ExpertFieldAiService
$papers = array_slice($papers, 0, $maxPapers);
$searchKeywords = array_values(array_unique(array_filter($searchKeywords)));
$countryName = '';
$countryId = intval($expert['country_id'] ?? 0);
if ($countryId > 0) {
$countryName = (string)Db::name('country')->where('country_id', $countryId)->value('title');
// t_expert.country 已存国家英文名,无需再查 country 表
$countryName = trim((string)($expert['country'] ?? ''));
if ($countryName === '') {
$countryId = intval($expert['country_id'] ?? 0);
if ($countryId > 0) {
$row = Db::name('country')->where('country_id', $countryId)->find();
if ($row) {
$countryName = (string)($row['en_name'] ?? ($row['zh_name'] ?? ''));
}
}
}
return [
@@ -453,69 +468,27 @@ class ExpertFieldAiService
private function summarizeWithLlm(array $context)
{
$url = $this->resolveLlmChatUrl();
$model = $this->resolveLlmModel();
$apiKey = trim((string)Env::get(
'expert_field_ai.chat_api_key',
Env::get('user_field_ai.chat_api_key', Env::get('expert_country_chat_api_key', Env::get('citation_chat_api_key', '')))
));
if ($url === '' || $model === '') {
throw new Exception('LLM not configured (set base.model_url / expert_field_ai.chat_model)');
}
$payloadJson = json_encode($context, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES);
$messages = [
[
'role' => 'system',
'content' => '你是学术领域分类助手。根据专家的单位、论文标题与 PubMed 检索上下文,用简体中文总结该专家最主要的研究领域。'
. '注意search_keywords 只是检索词,不可直接当作领域结论,应结合 paper 标题与 affiliation 判断。'
. '要求精确、简洁13 个中文领域词或短短语,用顿号分隔;不要解释、不要英文。'
. '只输出 JSON{"field_ai":"..."}。',
],
[
'role' => 'user',
'content' => "请根据以下 JSON 资料总结该专家的主要研究领域:\n" . $payloadJson,
],
];
$systemPrompt = '你是学术领域分类助手。根据专家的单位、论文标题与 PubMed 检索上下文,用简体中文总结该专家最主要的研究领域。'
. '注意search_keywords 只是检索词,不可直接当作领域结论,应结合 paper 标题与 affiliation 判断。'
. '要求精确、简洁13 个中文领域词或短短语,用顿号分隔;不要解释、不要英文。'
. '只输出 JSON{"field_ai":"..."}。';
$userPrompt = "请根据以下 JSON 资料总结该专家的主要研究领域:\n" . $payloadJson;
$body = [
'model' => $model,
'temperature' => 0.2,
'messages' => $messages,
];
// 按上下文长度动态选模型(小: base.model_url1 / 大: base.model_url
$svc = new LocalModelService();
$res = $svc->chat([
['role' => 'system', 'content' => $systemPrompt],
['role' => 'user', 'content' => $userPrompt],
], ['temperature' => 0.2]);
$ch = curl_init();
curl_setopt_array($ch, [
CURLOPT_URL => $url,
CURLOPT_POST => true,
CURLOPT_POSTFIELDS => json_encode($body, JSON_UNESCAPED_UNICODE),
CURLOPT_RETURNTRANSFER => true,
CURLOPT_CONNECTTIMEOUT => 15,
CURLOPT_TIMEOUT => max(30, (int)Env::get('expert_field_ai.timeout', Env::get('user_field_ai.timeout', 90))),
CURLOPT_HTTPHEADER => array_filter([
'Content-Type: application/json',
$apiKey !== '' ? 'Authorization: Bearer ' . $apiKey : null,
]),
]);
$raw = curl_exec($ch);
$code = (int)curl_getinfo($ch, CURLINFO_HTTP_CODE);
$err = curl_error($ch);
curl_close($ch);
if ($raw === false) {
throw new Exception('LLM curl error: ' . $err);
}
if ($code < 200 || $code >= 300) {
throw new Exception('LLM HTTP ' . $code . ': ' . mb_substr((string)$raw, 0, 400));
if (empty($res['ok'])) {
throw new Exception('LLM error: ' . (string)($res['error'] ?? 'unknown'));
}
$data = json_decode($raw, true);
$content = '';
if (is_array($data) && isset($data['choices'][0]['message']['content'])) {
$content = trim((string)$data['choices'][0]['message']['content']);
}
$this->log('[ExpertFieldAi] llm tier=' . ($res['tier'] ?? '') . ' ctx_len=' . ($res['context_len'] ?? 0) . ' url=' . ($res['url'] ?? ''));
$content = trim((string)($res['content'] ?? ''));
$fieldAi = $this->parseFieldAiFromContent($content);
if ($fieldAi === '' && $content !== '') {
$fieldAi = $this->cleanFieldAiText($content);
@@ -523,44 +496,6 @@ class ExpertFieldAiService
return $fieldAi;
}
private function resolveLlmChatUrl()
{
$candidates = [
// Env::get('expert_field_ai.chat_url', ''),
// Env::get('user_field_ai.chat_url', ''),
Env::get('base.model_url1', ''),
];
foreach ($candidates as $u) {
$u = trim((string)$u);
if ($u === '') {
continue;
}
if (stripos($u, 'chat/completions') !== false) {
return $u;
}
return rtrim($u, '/') . '/v1/chat/completions';
}
return '';
}
private function resolveLlmModel()
{
$candidates = [
Env::get('expert_field_ai.chat_model', ''),
Env::get('user_field_ai.chat_model', ''),
Env::get('base.model', ''),
Env::get('expert_country_chat_model', ''),
'gpt-4.1',
];
foreach ($candidates as $m) {
$m = trim((string)$m);
if ($m !== '' && strtolower($m) !== 'your-model-name') {
return $m;
}
}
return '';
}
private function parseFieldAiFromContent($content)
{
$content = trim((string)$content);
@@ -637,18 +572,73 @@ class ExpertFieldAiService
private function updateFieldAi($expertId, $fieldAi, $status, $source, $note)
{
$this->ensureSchema();
$data = [
'field_ai' => mb_substr(trim((string)$fieldAi), 0, 512),
'field_ai_status' => intval($status),
'field_ai_utime' => time(),
'field_ai_source' => mb_substr(trim((string)$source), 0, 32),
];
if ($this->hasColumn('field_ai_source')) {
$data['field_ai_source'] = mb_substr(trim((string)$source), 0, 32);
}
Db::name('expert')->where('expert_id', intval($expertId))->update($data);
if ($note !== '') {
$this->log('[ExpertFieldAi] expert_id=' . $expertId . ' status=' . $status . ' note=' . $note);
}
}
/**
* 自动补全 t_expert 上缺失的 field_ai 字段(可重复执行)。
*/
public function ensureSchema()
{
if (self::$schemaReady === true) {
return;
}
$table = config('database.prefix') . 'expert';
$columns = Db::query('SHOW COLUMNS FROM `' . $table . '`');
$existing = [];
foreach ($columns as $col) {
$existing[$col['Field']] = true;
}
$alters = [];
if (!isset($existing['field_ai'])) {
$alters[] = "ADD COLUMN `field_ai` VARCHAR(512) NOT NULL DEFAULT '' COMMENT 'AI总结的主要研究领域(中文)' AFTER `affiliation`";
$existing['field_ai'] = true;
}
if (!isset($existing['field_ai_status'])) {
$alters[] = "ADD COLUMN `field_ai_status` TINYINT NOT NULL DEFAULT 0 COMMENT '0待处理 1已生成 2资料不足 3失败 4无user待AI' AFTER `field_ai`";
$existing['field_ai_status'] = true;
}
if (!isset($existing['field_ai_utime'])) {
$alters[] = "ADD COLUMN `field_ai_utime` INT NOT NULL DEFAULT 0 COMMENT 'field_ai更新时间' AFTER `field_ai_status`";
$existing['field_ai_utime'] = true;
}
if (!isset($existing['field_ai_source'])) {
$alters[] = "ADD COLUMN `field_ai_source` VARCHAR(32) NOT NULL DEFAULT '' COMMENT '来源: user_link / ai' AFTER `field_ai_utime`";
$existing['field_ai_source'] = true;
}
if (!empty($alters)) {
Db::execute('ALTER TABLE `' . $table . '` ' . implode(', ', $alters));
$this->log('[ExpertFieldAi] schema patched: ' . implode('; ', $alters));
}
self::$schemaReady = true;
}
private function hasColumn($column)
{
$this->ensureSchema();
$table = config('database.prefix') . 'expert';
$columns = Db::query('SHOW COLUMNS FROM `' . $table . '` LIKE \'' . addslashes($column) . '\'');
return !empty($columns);
}
public function statusLabel($status)
{
$map = [

View File

@@ -0,0 +1,219 @@
<?php
namespace app\common\service;
use think\Env;
/**
* 本地模型服务:按上下文长度自动选择模型
*
* - 短上下文 -> 小模型(显存为大模型一半),对应 base.model_url1
* - 长上下文 -> 大模型,对应 base.model_url
*
* 选择规则:上下文字符数 <= 阈值 用小模型;超过阈值 用大模型。
* 两个端点模型名相同base.model
*
* 用法:
* $svc = new LocalModelService();
* $res = $svc->chat([
* ['role' => 'system', 'content' => '...'],
* ['role' => 'user', 'content' => '...'],
* ]);
* // $res['ok'], $res['content'], $res['tier'](small|large), $res['context_len']
*
* // 只要文本结果:
* $text = $svc->complete($systemPrompt, $userPrompt);
*/
class LocalModelService
{
/** 上下文长度阈值(字符数):<= 用小模型,> 用大模型 */
const CONTEXT_THRESHOLD = 3000;
/** 请求超时(秒) */
const TIMEOUT = 120;
/** 小模型端点(短上下文,显存一半) */
private $smallUrl;
/** 大模型端点(长上下文) */
private $largeUrl;
/** 模型名(两端点相同) */
private $model;
/** 上下文长度阈值(字符数) */
private $threshold;
public function __construct()
{
// 小模型 -> base.model_url1大模型 -> base.model_url模型名同为 base.model
$this->smallUrl = $this->normalizeChatUrl((string)Env::get('base.model_url1', ''));
$this->largeUrl = $this->normalizeChatUrl((string)Env::get('base.model_url', ''));
$this->model = trim((string)Env::get('base.model', ''));
$this->threshold = self::CONTEXT_THRESHOLD;
}
/**
* 发起一次对话,按上下文长度自动选模型。
*
* @param array $messages OpenAI 格式 messages
* @param array $options 可选:
* - temperature (float, 默认 0.2)
* - max_tokens (int, 可选)
* - force_tier ('small'|'large') 强制指定模型,跳过长度判断
* - extra (array) 透传到请求体的额外字段
* @return array{ok:bool, content:string, tier:string, model:string, url:string, context_len:int, error:string}
*/
public function chat(array $messages, array $options = [])
{
$contextLen = $this->measureMessages($messages);
$tier = isset($options['force_tier']) && in_array($options['force_tier'], ['small', 'large'], true)
? $options['force_tier']
: $this->pickTier($contextLen);
$endpoint = $this->resolveEndpoint($tier);
$result = [
'ok' => false,
'content' => '',
'tier' => $tier,
'model' => $endpoint['model'],
'url' => $endpoint['url'],
'context_len' => $contextLen,
'error' => '',
];
if ($endpoint['url'] === '' || $endpoint['model'] === '') {
$result['error'] = $tier . ' 模型未配置(检查 .env [base] model_url / model_url1 / model';
return $result;
}
$payload = [
'model' => $endpoint['model'],
'temperature' => isset($options['temperature']) ? (float)$options['temperature'] : 0.2,
'messages' => $messages,
];
if (isset($options['max_tokens']) && intval($options['max_tokens']) > 0) {
$payload['max_tokens'] = intval($options['max_tokens']);
}
if (isset($options['extra']) && is_array($options['extra'])) {
$payload = array_merge($payload, $options['extra']);
}
$content = $this->postChat($endpoint['url'], $payload, $err);
if ($content === null) {
$result['error'] = $err !== '' ? $err : 'LLM 请求失败';
return $result;
}
$result['ok'] = true;
$result['content'] = $content;
return $result;
}
/**
* 便捷方法:传 system + user返回纯文本内容失败返回空字符串
*/
public function complete($systemPrompt, $userPrompt, array $options = [])
{
$messages = [];
if (trim((string)$systemPrompt) !== '') {
$messages[] = ['role' => 'system', 'content' => (string)$systemPrompt];
}
$messages[] = ['role' => 'user', 'content' => (string)$userPrompt];
$res = $this->chat($messages, $options);
return $res['ok'] ? $res['content'] : '';
}
/**
* 根据上下文长度选择 tier。
*/
public function pickTier($contextLen)
{
return $contextLen > $this->threshold ? 'large' : 'small';
}
/**
* 统计 messages 的上下文长度(所有 content 字符数之和)。
*/
public function measureMessages(array $messages)
{
$len = 0;
foreach ($messages as $m) {
if (isset($m['content']) && is_string($m['content'])) {
$len += mb_strlen($m['content']);
}
}
return $len;
}
/**
* 返回某 tier 的端点配置(模型名两端点相同)。
*/
private function resolveEndpoint($tier)
{
$url = $tier === 'large' ? $this->largeUrl : $this->smallUrl;
return ['url' => $url, 'model' => $this->model];
}
private function postChat($url, array $payload, &$err = '')
{
$err = '';
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($payload, JSON_UNESCAPED_UNICODE));
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 15);
curl_setopt($ch, CURLOPT_TIMEOUT, self::TIMEOUT);
$headers = ['Content-Type: application/json'];
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
$raw = curl_exec($ch);
if ($raw === false) {
$err = 'curl error: ' . curl_error($ch);
curl_close($ch);
return null;
}
$httpCode = intval(curl_getinfo($ch, CURLINFO_HTTP_CODE));
curl_close($ch);
if ($httpCode < 200 || $httpCode >= 300) {
$err = 'http ' . $httpCode . ': ' . mb_substr((string)$raw, 0, 300);
return null;
}
$data = json_decode($raw, true);
if (!is_array($data)) {
$err = 'invalid json response';
return null;
}
if (isset($data['choices'][0]['message']['content'])) {
return (string)$data['choices'][0]['message']['content'];
}
if (isset($data['content'])) {
return (string)$data['content'];
}
$err = 'no content in response: ' . mb_substr((string)$raw, 0, 300);
return null;
}
/**
* 根地址自动补 /v1/chat/completions。
*/
private function normalizeChatUrl($url)
{
$url = trim((string)$url);
if ($url === '') {
return '';
}
if (stripos($url, 'chat/completions') !== false) {
return $url;
}
return rtrim($url, '/') . '/v1/chat/completions';
}
}