logFile = ROOT_PATH . 'runtime' . DS . 'user_field_ai.log'; } /** * 启动链式处理(从 user_id=0 之后找第一个待处理用户)。 * * @param bool $force true 时重算已生成用户 * @return bool 是否已推入首条 job */ public function startChain($force = false, $delay = 1, $queue = '') { return $this->enqueueNextFieldAi($delay, $queue, 0, $force); } /** * 链式:找 user_id > $afterUserId 的下一位待处理用户并入队。 */ public function enqueueNextFieldAi($delay = 1, $queue = '', $afterUserId = 0, $force = false) { if ($queue === '') { $queue = self::QUEUE_NAME; } $afterUserId = intval($afterUserId); $userId = $this->findNextPendingUserId($afterUserId, $force); if ($userId <= 0) { $this->log('[FieldAi] chain finished after user_id=' . $afterUserId . ' force=' . ($force ? '1' : '0')); return false; } $data = [ 'user_id' => $userId, 'queue' => $queue, 'force' => $force ? 1 : 0, ]; $jobClass = 'app\\api\\job\\UserFieldAiFill@fire'; if ($delay > 0) { Queue::later($delay, $jobClass, $data, $queue); } else { Queue::push($jobClass, $data, $queue); } $this->log('[FieldAi] enqueued user_id=' . $userId . ' queue=' . $queue); return true; } /** * 处理单个用户(队列 Job 或同步调试)。 * * @return array{ok:bool, skipped?:bool, insufficient?:bool, field_ai?:string, error?:string} */ public function processUser($userId, $force = false) { $userId = intval($userId); if ($userId <= 0) { return ['ok' => false, 'error' => 'invalid user_id']; } $this->ensureReviewerInfoRow($userId); $uri = Db::name('user_reviewer_info')->where('reviewer_id', $userId)->find(); if (!$uri) { return ['ok' => false, 'error' => 'reviewer_info missing']; } if (!$force && intval($uri['field_ai_status']) === self::STATUS_DONE && trim((string)$uri['field_ai']) !== '') { return ['ok' => true, 'skipped' => true, 'field_ai' => (string)$uri['field_ai']]; } if (!$this->isEligible($userId, $uri)) { $this->updateFieldAi($userId, '', self::STATUS_INSUFFICIENT, 'insufficient profile/articles'); return ['ok' => true, 'insufficient' => true]; } try { $context = $this->buildContext($userId, $uri); $fieldAi = $this->summarizeWithLlm($context); if ($fieldAi === '') { throw new Exception('LLM returned empty field'); } $this->updateFieldAi($userId, $fieldAi, self::STATUS_DONE, ''); return ['ok' => true, 'field_ai' => $fieldAi]; } catch (\Throwable $e) { $this->updateFieldAi($userId, '', self::STATUS_FAILED, mb_substr($e->getMessage(), 0, 500)); $this->log('[FieldAi] user_id=' . $userId . ' fail: ' . $e->getMessage()); return ['ok' => false, 'error' => $e->getMessage()]; } } /** * 是否满足「可总结」:有投稿 或 审稿人资料较全。 */ public function isEligible($userId, $uri = null) { if ($this->hasSubmittedArticles($userId)) { return true; } if ($uri === null) { $uri = Db::name('user_reviewer_info')->where('reviewer_id', $userId)->find(); } return $this->isReviewerProfileComplete($uri); } public function hasSubmittedArticles($userId) { $n = Db::name('article') ->where('user_id', intval($userId)) ->where('title', '<>', '') ->count(); return $n > 0; } /** * 审稿人资料字段填充数达到阈值视为「较全」。 */ public function isReviewerProfileComplete($uri) { if (!$uri || !is_array($uri)) { return false; } $minFilled = max(3, (int) Env::get('user_field_ai.min_profile_fields', 4)); $keys = ['field', 'company', 'country', 'technical', 'introduction', 'department', 'website']; $filled = 0; foreach ($keys as $k) { if (!empty($uri[$k]) && trim((string)$uri[$k]) !== '') { $filled++; } } if (!empty($uri['major']) && trim((string)$uri['major']) !== '' && trim((string)$uri['major']) !== '0') { $filled++; } $majorCount = Db::name('major_to_user')->where('user_id', intval($uri['reviewer_id']))->where('state', 0)->count(); if ($majorCount > 0) { $filled++; } return $filled >= $minFilled; } private function findNextPendingUserId($afterUserId, $force) { $batch = max(20, (int) Env::get('user_field_ai.scan_batch', 80)); $cursor = intval($afterUserId); while (true) { $query = Db::name('user')->alias('u') ->leftJoin('t_user_reviewer_info uri', 'uri.reviewer_id = u.user_id') ->where('u.user_id', '>', $cursor); if (!$force) { $query->where(function ($q) { $q->where('uri.field_ai_status', self::STATUS_PENDING) ->whereOr('uri.field_ai_status', self::STATUS_FAILED) ->whereOr('uri.reviewer_info_id', 'null'); }); } $ids = $query->order('u.user_id asc')->limit($batch)->column('u.user_id'); if (empty($ids)) { return 0; } foreach ($ids as $uid) { $uid = intval($uid); $cursor = $uid; $this->ensureReviewerInfoRow($uid); $uri = Db::name('user_reviewer_info')->where('reviewer_id', $uid)->find(); if (!$force && intval($uri['field_ai_status']) === self::STATUS_DONE) { continue; } if (!$force && intval($uri['field_ai_status']) === self::STATUS_INSUFFICIENT) { continue; } if ($this->isEligible($uid, $uri)) { return $uid; } if (!$force) { $this->updateFieldAi($uid, '', self::STATUS_INSUFFICIENT, 'auto skip: insufficient data'); } } } } private function buildContext($userId, array $uri) { $user = Db::name('user')->where('user_id', $userId)->field('user_id,realname,email,account')->find(); $majorTitles = $this->resolveMajorTitles($userId, $uri); $maxArticles = max(1, min(10, (int) Env::get('user_field_ai.max_articles', 5))); $articles = Db::name('article') ->where('user_id', $userId) ->where('title', '<>', '') ->order('article_id desc') ->limit($maxArticles) ->field('article_id,title,keywords,abstrart,journal_id,ctime') ->select(); $journalNames = []; if (!empty($articles)) { $jids = array_unique(array_filter(array_column($articles, 'journal_id'))); if (!empty($jids)) { $journalNames = Db::name('journal')->where('journal_id', 'in', $jids)->column('title', 'journal_id'); } } $articleBlocks = []; foreach ($articles as $a) { $jid = intval($a['journal_id']); $articleBlocks[] = [ 'title' => (string) $a['title'], 'journal' => isset($journalNames[$jid]) ? (string) $journalNames[$jid] : '', 'keywords' => (string) ($a['keywords'] ?? ''), 'abstract' => mb_substr(trim((string) ($a['abstrart'] ?? '')), 0, 800), ]; } return [ 'user' => [ 'realname' => $user ? (string) $user['realname'] : '', 'email' => $user ? (string) $user['email'] : '', ], 'profile' => [ 'field' => trim((string) ($uri['field'] ?? '')), 'technical' => trim((string) ($uri['technical'] ?? '')), 'company' => trim((string) ($uri['company'] ?? '')), 'department' => trim((string) ($uri['department'] ?? '')), 'country' => trim((string) ($uri['country'] ?? '')), 'introduction' => mb_substr(trim((string) ($uri['introduction'] ?? '')), 0, 1200), 'website' => trim((string) ($uri['website'] ?? '')), 'majors' => $majorTitles, ], 'articles' => $articleBlocks, ]; } private function resolveMajorTitles($userId, array $uri) { $titles = []; $ids = Db::name('major_to_user')->where('user_id', $userId)->where('state', 0)->column('major_id'); if (!empty($ids)) { $titles = Db::name('reviewer_major')->where('major_id', 'in', $ids)->where('state', 0)->column('title'); } if (empty($titles) && !empty($uri['major'])) { $legacy = array_filter(array_map('intval', explode(',', (string) $uri['major']))); if (!empty($legacy)) { $titles = Db::name('reviewer_major')->where('major_id', 'in', $legacy)->column('title'); } } return array_values(array_unique(array_filter(array_map('trim', $titles)))); } private function summarizeWithLlm(array $context) { $url = trim((string) Env::get('user_field_ai.chat_url', Env::get('expert_country_chat_url', Env::get('citation_chat_url', '')))); $model = trim((string) Env::get('user_field_ai.chat_model', Env::get('expert_country_chat_model', Env::get('citation_chat_model', 'gpt-4.1')))); $apiKey = trim((string) Env::get('user_field_ai.chat_api_key', Env::get('expert_country_chat_api_key', Env::get('citation_chat_api_key', '')))); if ($url === '' || $model === '') { throw new Exception('user_field_ai chat not configured (chat_url / chat_model)'); } $payloadJson = json_encode($context, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES); $messages = [ [ 'role' => 'system', 'content' => '你是学术领域分类助手。根据用户的投稿与个人资料,用简体中文给出该用户最主要的研究领域总结。' . '要求:精确、简洁,1~3 个中文领域词或短短语,用顿号分隔;不要解释、不要英文、不要 JSON 以外的多余文字。' . '只输出 JSON:{"field_ai":"..."}。', ], [ 'role' => 'user', 'content' => "请根据以下 JSON 资料总结该用户的主要研究领域:\n" . $payloadJson, ], ]; $body = [ 'model' => $model, 'temperature' => 0.2, 'messages' => $messages, ]; $ch = curl_init(); curl_setopt_array($ch, [ CURLOPT_URL => $url, CURLOPT_POST => true, CURLOPT_POSTFIELDS => json_encode($body, JSON_UNESCAPED_UNICODE), CURLOPT_RETURNTRANSFER => true, CURLOPT_CONNECTTIMEOUT => 15, CURLOPT_TIMEOUT => max(30, (int) Env::get('user_field_ai.timeout', 90)), CURLOPT_HTTPHEADER => array_filter([ 'Content-Type: application/json', $apiKey !== '' ? 'Authorization: Bearer ' . $apiKey : null, ]), ]); $raw = curl_exec($ch); $code = (int) curl_getinfo($ch, CURLINFO_HTTP_CODE); $err = curl_error($ch); curl_close($ch); if ($raw === false) { throw new Exception('LLM curl error: ' . $err); } if ($code < 200 || $code >= 300) { throw new Exception('LLM HTTP ' . $code . ': ' . mb_substr((string) $raw, 0, 400)); } $data = json_decode($raw, true); $content = ''; if (is_array($data) && isset($data['choices'][0]['message']['content'])) { $content = trim((string) $data['choices'][0]['message']['content']); } elseif (is_string($raw)) { $content = trim($raw); } $fieldAi = $this->parseFieldAiFromContent($content); if ($fieldAi === '' && $content !== '') { $fieldAi = $this->cleanFieldAiText($content); } return $fieldAi; } private function parseFieldAiFromContent($content) { $content = trim((string) $content); if ($content === '') { return ''; } $content = preg_replace('/^```[a-zA-Z]*\s*|```$/m', '', $content); if (preg_match('/\{.*\}/s', $content, $m)) { $obj = json_decode($m[0], true); if (is_array($obj) && !empty($obj['field_ai'])) { return $this->cleanFieldAiText((string) $obj['field_ai']); } } $obj = json_decode($content, true); if (is_array($obj) && !empty($obj['field_ai'])) { return $this->cleanFieldAiText((string) $obj['field_ai']); } return ''; } private function cleanFieldAiText($text) { $text = trim((string) $text); $text = trim($text, "\"' \t\n\r"); $text = preg_replace('/\s+/u', '', $text); if (mb_strlen($text) > 200) { $text = mb_substr($text, 0, 200); } return $text; } public function ensureReviewerInfoRow($userId) { $exists = Db::name('user_reviewer_info')->where('reviewer_id', $userId)->find(); if ($exists) { return; } Db::name('user_reviewer_info')->insert([ 'reviewer_id' => $userId, 'ctime' => time(), 'state' => 0, ]); } private function updateFieldAi($userId, $fieldAi, $status, $note) { $data = [ 'field_ai' => mb_substr(trim((string) $fieldAi), 0, 512), 'field_ai_status' => intval($status), 'field_ai_utime' => time(), ]; Db::name('user_reviewer_info')->where('reviewer_id', $userId)->update($data); if ($note !== '') { $this->log('[FieldAi] user_id=' . $userId . ' status=' . $status . ' note=' . $note); } } public function log($msg) { $line = date('Y-m-d H:i:s') . ' ' . $msg . PHP_EOL; @file_put_contents($this->logFile, $line, FILE_APPEND); } }