小模型(显存为大模型一半),对应 base.model_url1 * - 长上下文 -> 大模型,对应 base.model_url * * 选择规则:上下文字符数 <= 阈值 用小模型;超过阈值 用大模型。 * 两个端点模型名相同(base.model)。 * * 用法: * $svc = new LocalModelService(); * $res = $svc->chat([ * ['role' => 'system', 'content' => '...'], * ['role' => 'user', 'content' => '...'], * ]); * // $res['ok'], $res['content'], $res['tier'](small|large), $res['context_len'] * * // 只要文本结果: * $text = $svc->complete($systemPrompt, $userPrompt); */ class LocalModelService { /** 上下文长度阈值(字符数):<= 用小模型,> 用大模型 */ const CONTEXT_THRESHOLD = 3000; /** 请求超时(秒) */ const TIMEOUT = 120; /** 小模型端点(短上下文,显存一半) */ private $smallUrl; /** 大模型端点(长上下文) */ private $largeUrl; /** 模型名(两端点相同) */ private $model; /** 上下文长度阈值(字符数) */ private $threshold; public function __construct() { // 小模型 -> base.model_url1,大模型 -> base.model_url,模型名同为 base.model $this->smallUrl = $this->normalizeChatUrl((string)Env::get('base.model_url1', '')); $this->largeUrl = $this->normalizeChatUrl((string)Env::get('base.model_url', '')); $this->model = trim((string)Env::get('base.model', '')); $this->threshold = self::CONTEXT_THRESHOLD; } /** * 发起一次对话,按上下文长度自动选模型。 * * @param array $messages OpenAI 格式 messages * @param array $options 可选: * - temperature (float, 默认 0.2) * - max_tokens (int, 可选) * - force_tier ('small'|'large') 强制指定模型,跳过长度判断 * - extra (array) 透传到请求体的额外字段 * @return array{ok:bool, content:string, tier:string, model:string, url:string, context_len:int, error:string} */ public function chat(array $messages, array $options = []) { $contextLen = $this->measureMessages($messages); $tier = isset($options['force_tier']) && in_array($options['force_tier'], ['small', 'large'], true) ? $options['force_tier'] : $this->pickTier($contextLen); $endpoint = $this->resolveEndpoint($tier); $result = [ 'ok' => false, 'content' => '', 'tier' => $tier, 'model' => $endpoint['model'], 'url' => $endpoint['url'], 'context_len' => $contextLen, 'error' => '', ]; if ($endpoint['url'] === '' || $endpoint['model'] === '') { $result['error'] = $tier . ' 模型未配置(检查 .env [base] model_url / model_url1 / model)'; return $result; } $payload = [ 'model' => $endpoint['model'], 'temperature' => isset($options['temperature']) ? (float)$options['temperature'] : 0.2, 'messages' => $messages, ]; if (isset($options['max_tokens']) && intval($options['max_tokens']) > 0) { $payload['max_tokens'] = intval($options['max_tokens']); } if (isset($options['extra']) && is_array($options['extra'])) { $payload = array_merge($payload, $options['extra']); } $content = $this->postChat($endpoint['url'], $payload, $err); if ($content === null) { $result['error'] = $err !== '' ? $err : 'LLM 请求失败'; return $result; } $result['ok'] = true; $result['content'] = $content; return $result; } /** * 便捷方法:传 system + user,返回纯文本内容(失败返回空字符串)。 */ public function complete($systemPrompt, $userPrompt, array $options = []) { $messages = []; if (trim((string)$systemPrompt) !== '') { $messages[] = ['role' => 'system', 'content' => (string)$systemPrompt]; } $messages[] = ['role' => 'user', 'content' => (string)$userPrompt]; $res = $this->chat($messages, $options); return $res['ok'] ? $res['content'] : ''; } /** * 根据上下文长度选择 tier。 */ public function pickTier($contextLen) { return $contextLen > $this->threshold ? 'large' : 'small'; } /** * 统计 messages 的上下文长度(所有 content 字符数之和)。 */ public function measureMessages(array $messages) { $len = 0; foreach ($messages as $m) { if (isset($m['content']) && is_string($m['content'])) { $len += mb_strlen($m['content']); } } return $len; } /** * 返回某 tier 的端点配置(模型名两端点相同)。 */ private function resolveEndpoint($tier) { $url = $tier === 'large' ? $this->largeUrl : $this->smallUrl; return ['url' => $url, 'model' => $this->model]; } private function postChat($url, array $payload, &$err = '') { $err = ''; $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_POST, true); curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($payload, JSON_UNESCAPED_UNICODE)); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 15); curl_setopt($ch, CURLOPT_TIMEOUT, self::TIMEOUT); $headers = ['Content-Type: application/json']; curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); $raw = curl_exec($ch); if ($raw === false) { $err = 'curl error: ' . curl_error($ch); curl_close($ch); return null; } $httpCode = intval(curl_getinfo($ch, CURLINFO_HTTP_CODE)); curl_close($ch); if ($httpCode < 200 || $httpCode >= 300) { $err = 'http ' . $httpCode . ': ' . mb_substr((string)$raw, 0, 300); return null; } $data = json_decode($raw, true); if (!is_array($data)) { $err = 'invalid json response'; return null; } if (isset($data['choices'][0]['message']['content'])) { return (string)$data['choices'][0]['message']['content']; } if (isset($data['content'])) { return (string)$data['content']; } $err = 'no content in response: ' . mb_substr((string)$raw, 0, 300); return null; } /** * 根地址自动补 /v1/chat/completions。 */ private function normalizeChatUrl($url) { $url = trim((string)$url); if ($url === '') { return ''; } if (stripos($url, 'chat/completions') !== false) { return $url; } return rtrim($url, '/') . '/v1/chat/completions'; } }