Files
tougao/application/common/UserFieldAiService.php
2026-06-05 11:01:16 +08:00

478 lines
17 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<?php
namespace app\common;
use think\Db;
use think\Env;
use think\Exception;
use think\Queue;
/**
* 根据投稿记录 / 审稿人资料,用大模型总结用户主领域(中文)写入 field_ai。
* 队列链UserFieldAiFill → 处理一条 → enqueueNextFieldAi → 下一条。
*/
class UserFieldAiService
{
const QUEUE_NAME = 'UserFieldAi';
const STATUS_PENDING = 0;
const STATUS_DONE = 1;
const STATUS_INSUFFICIENT = 2;
const STATUS_FAILED = 3;
private $logFile;
public function __construct()
{
$this->logFile = ROOT_PATH . 'runtime' . DS . 'user_field_ai.log';
}
/**
* 启动链式处理(从 user_id=0 之后找第一个待处理用户)。
*
* @param bool $force true 时重算已生成用户
* @return bool 是否已推入首条 job
*/
public function startChain($force = false, $delay = 1, $queue = '')
{
return $this->enqueueNextFieldAi($delay, $queue, 0, $force);
}
/**
* 链式:找 user_id > $afterUserId 的下一位待处理用户并入队。
*/
public function enqueueNextFieldAi($delay = 1, $queue = '', $afterUserId = 0, $force = false)
{
if ($queue === '') {
$queue = self::QUEUE_NAME;
}
$afterUserId = intval($afterUserId);
$userId = $this->findNextPendingUserId($afterUserId, $force);
if ($userId <= 0) {
$this->log('[FieldAi] chain finished after user_id=' . $afterUserId . ' force=' . ($force ? '1' : '0'));
return false;
}
$data = [
'user_id' => $userId,
'queue' => $queue,
'force' => $force ? 1 : 0,
];
$jobClass = 'app\\api\\job\\UserFieldAiFill@fire';
if ($delay > 0) {
Queue::later($delay, $jobClass, $data, $queue);
} else {
Queue::push($jobClass, $data, $queue);
}
$this->log('[FieldAi] enqueued user_id=' . $userId . ' queue=' . $queue);
return true;
}
/**
* 处理单个用户(队列 Job 或同步调试)。
*
* @return array{ok:bool, skipped?:bool, insufficient?:bool, field_ai?:string, error?:string}
*/
public function processUser($userId, $force = false)
{
$userId = intval($userId);
if ($userId <= 0) {
return ['ok' => false, 'error' => 'invalid user_id'];
}
$this->ensureReviewerInfoRow($userId);
$uri = Db::name('user_reviewer_info')->where('reviewer_id', $userId)->find();
if (!$uri) {
return ['ok' => false, 'error' => 'reviewer_info missing'];
}
if (!$force && intval($uri['field_ai_status']) === self::STATUS_DONE && trim((string)$uri['field_ai']) !== '') {
return ['ok' => true, 'skipped' => true, 'field_ai' => (string)$uri['field_ai']];
}
if (!$this->isEligible($userId, $uri)) {
$this->updateFieldAi($userId, '', self::STATUS_INSUFFICIENT, 'insufficient profile/articles');
return ['ok' => true, 'insufficient' => true];
}
try {
$context = $this->buildContext($userId, $uri);
$fieldAi = $this->summarizeWithLlm($context);
if ($fieldAi === '') {
throw new Exception('LLM returned empty field');
}
$this->updateFieldAi($userId, $fieldAi, self::STATUS_DONE, '');
$this->syncLinkedExperts($userId);
return ['ok' => true, 'field_ai' => $fieldAi];
} catch (\Throwable $e) {
$this->updateFieldAi($userId, '', self::STATUS_FAILED, mb_substr($e->getMessage(), 0, 500));
$this->log('[FieldAi] user_id=' . $userId . ' fail: ' . $e->getMessage());
return ['ok' => false, 'error' => $e->getMessage()];
}
}
/**
* 是否满足「可总结」:有投稿 或 审稿人资料较全。
*/
public function isEligible($userId, $uri = null)
{
if ($this->hasSubmittedArticles($userId)) {
return true;
}
if ($uri === null) {
$uri = Db::name('user_reviewer_info')->where('reviewer_id', $userId)->find();
}
return $this->isReviewerProfileComplete($uri);
}
public function hasSubmittedArticles($userId)
{
$n = Db::name('article')
->where('user_id', intval($userId))
->where('title', '<>', '')
->count();
return $n > 0;
}
/**
* 审稿人资料字段填充数达到阈值视为「较全」。
*/
public function isReviewerProfileComplete($uri)
{
if (!$uri || !is_array($uri)) {
return false;
}
$minFilled = max(3, (int) Env::get('user_field_ai.min_profile_fields', 4));
$keys = ['field', 'company', 'country', 'technical', 'introduction', 'department', 'website'];
$filled = 0;
foreach ($keys as $k) {
if (!empty($uri[$k]) && trim((string)$uri[$k]) !== '') {
$filled++;
}
}
if (!empty($uri['major']) && trim((string)$uri['major']) !== '' && trim((string)$uri['major']) !== '0') {
$filled++;
}
$majorCount = Db::name('major_to_user')->where('user_id', intval($uri['reviewer_id']))->where('state', 0)->count();
if ($majorCount > 0) {
$filled++;
}
return $filled >= $minFilled;
}
private function findNextPendingUserId($afterUserId, $force)
{
$batch = max(20, (int) Env::get('user_field_ai.scan_batch', 80));
$cursor = intval($afterUserId);
while (true) {
$query = Db::name('user')->alias('u')
->join('t_user_reviewer_info uri', 'uri.reviewer_id = u.user_id',"left")
->where('u.user_id', '>', $cursor);
if (!$force) {
$query->where(function ($q) {
$q->where('uri.field_ai_status', self::STATUS_PENDING)
->whereOr('uri.field_ai_status', self::STATUS_FAILED)
->whereOr('uri.reviewer_info_id', 'null');
});
}
$ids = $query->order('u.user_id asc')->limit($batch)->column('u.user_id');
if (empty($ids)) {
return 0;
}
foreach ($ids as $uid) {
$uid = intval($uid);
$cursor = $uid;
$this->ensureReviewerInfoRow($uid);
$uri = Db::name('user_reviewer_info')->where('reviewer_id', $uid)->find();
if (!$force && intval($uri['field_ai_status']) === self::STATUS_DONE) {
continue;
}
if (!$force && intval($uri['field_ai_status']) === self::STATUS_INSUFFICIENT) {
continue;
}
if ($this->isEligible($uid, $uri)) {
return $uid;
}
if (!$force) {
$this->updateFieldAi($uid, '', self::STATUS_INSUFFICIENT, 'auto skip: insufficient data');
}
}
}
}
private function buildContext($userId, array $uri)
{
$user = Db::name('user')->where('user_id', $userId)->field('user_id,realname,email,account')->find();
$majorTitles = $this->resolveMajorTitles($userId, $uri);
$maxArticles = max(1, min(10, (int) Env::get('user_field_ai.max_articles', 5)));
$articles = Db::name('article')
->where('user_id', $userId)
->where('title', '<>', '')
->order('article_id desc')
->limit($maxArticles)
->field('article_id,title,keywords,abstrart,journal_id,ctime')
->select();
$journalNames = [];
if (!empty($articles)) {
$jids = array_unique(array_filter(array_column($articles, 'journal_id')));
if (!empty($jids)) {
$journalNames = Db::name('journal')->where('journal_id', 'in', $jids)->column('title', 'journal_id');
}
}
$articleBlocks = [];
foreach ($articles as $a) {
$jid = intval($a['journal_id']);
$articleBlocks[] = [
'title' => (string) $a['title'],
'journal' => isset($journalNames[$jid]) ? (string) $journalNames[$jid] : '',
'keywords' => (string) ($a['keywords'] ?? ''),
'abstract' => mb_substr(trim((string) ($a['abstrart'] ?? '')), 0, 800),
];
}
return [
'user' => [
'realname' => $user ? (string) $user['realname'] : '',
'email' => $user ? (string) $user['email'] : '',
],
'profile' => [
'field' => trim((string) ($uri['field'] ?? '')),
'technical' => trim((string) ($uri['technical'] ?? '')),
'company' => trim((string) ($uri['company'] ?? '')),
'department' => trim((string) ($uri['department'] ?? '')),
'country' => trim((string) ($uri['country'] ?? '')),
'introduction' => mb_substr(trim((string) ($uri['introduction'] ?? '')), 0, 1200),
'website' => trim((string) ($uri['website'] ?? '')),
'majors' => $majorTitles,
],
'articles' => $articleBlocks,
];
}
private function resolveMajorTitles($userId, array $uri)
{
$titles = [];
$ids = Db::name('major_to_user')->where('user_id', $userId)->where('state', 0)->column('major_id');
if (!empty($ids)) {
$titles = Db::name('reviewer_major')->where('major_id', 'in', $ids)->where('state', 0)->column('title');
}
if (empty($titles) && !empty($uri['major'])) {
$legacy = array_filter(array_map('intval', explode(',', (string) $uri['major'])));
if (!empty($legacy)) {
$titles = Db::name('reviewer_major')->where('major_id', 'in', $legacy)->column('title');
}
}
return array_values(array_unique(array_filter(array_map('trim', $titles))));
}
/**
* 解析 OpenAI 兼容 chat/completions 完整 URL。
* base.model_url 常为站点根(如 http://chat.taimed.cn直接 POST 会 404。
*/
private function resolveLlmChatUrl()
{
$candidates = [
// Env::get('user_field_ai.chat_url', ''),
// Env::get('promotion.promotion_llm_url', ''),
// Env::get('expert_country_chat_url', ''),
// Env::get('citation_chat_url', ''),
Env::get('base.model_url', ''),
];
foreach ($candidates as $u) {
$u = trim((string) $u);
if ($u === '') {
continue;
}
$normalized = $this->normalizeChatCompletionsUrl($u);
if ($normalized !== '') {
return $normalized;
}
}
return '';
}
private function normalizeChatCompletionsUrl($url)
{
$url = trim((string) $url);
if ($url === '') {
return '';
}
if (stripos($url, 'chat/completions') !== false) {
return $url;
}
return rtrim($url, '/') . '/v1/chat/completions';
}
private function resolveLlmModel()
{
$candidates = [
Env::get('user_field_ai.chat_model', ''),
Env::get('base.model', ''),
Env::get('promotion.promotion_llm_model', ''),
Env::get('expert_country_chat_model', ''),
Env::get('citation_chat_model', ''),
'gpt-4.1',
];
foreach ($candidates as $m) {
$m = trim((string) $m);
if ($m !== '' && strtolower($m) !== 'your-model-name') {
return $m;
}
}
return '';
}
private function summarizeWithLlm(array $context)
{
$url = $this->resolveLlmChatUrl();
$model = $this->resolveLlmModel();
$apiKey = trim((string) Env::get('user_field_ai.chat_api_key', Env::get('expert_country_chat_api_key', Env::get('citation_chat_api_key', ''))));
if ($url === '' || $model === '') {
throw new Exception('user_field_ai chat not configured (set user_field_ai.chat_url or promotion PROMOTION_LLM_URL / base.model_url)');
}
$payloadJson = json_encode($context, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES);
$messages = [
[
'role' => 'system',
'content' => '你是学术领域分类助手。根据用户的投稿与个人资料,用简体中文给出该用户最主要的研究领域总结。'
. '要求精确、简洁13 个中文领域词或短短语,用顿号分隔;不要解释、不要英文、不要 JSON 以外的多余文字。'
. '只输出 JSON{"field_ai":"..."}。',
],
[
'role' => 'user',
'content' => "请根据以下 JSON 资料总结该用户的主要研究领域:\n" . $payloadJson,
],
];
$body = [
'model' => $model,
'temperature' => 0.2,
'messages' => $messages,
];
$ch = curl_init();
curl_setopt_array($ch, [
CURLOPT_URL => $url,
CURLOPT_POST => true,
CURLOPT_POSTFIELDS => json_encode($body, JSON_UNESCAPED_UNICODE),
CURLOPT_RETURNTRANSFER => true,
CURLOPT_CONNECTTIMEOUT => 15,
CURLOPT_TIMEOUT => max(30, (int) Env::get('user_field_ai.timeout', 90)),
CURLOPT_HTTPHEADER => array_filter([
'Content-Type: application/json',
$apiKey !== '' ? 'Authorization: Bearer ' . $apiKey : null,
]),
]);
$raw = curl_exec($ch);
$code = (int) curl_getinfo($ch, CURLINFO_HTTP_CODE);
$err = curl_error($ch);
curl_close($ch);
if ($raw === false) {
throw new Exception('LLM curl error: ' . $err);
}
if ($code < 200 || $code >= 300) {
$hint = ($code === 404 && stripos($url, 'chat/completions') === false)
? ' (chat_url may be missing /v1/chat/completions)'
: '';
throw new Exception('LLM HTTP ' . $code . $hint . ': ' . mb_substr((string) $raw, 0, 400));
}
$data = json_decode($raw, true);
$content = '';
if (is_array($data) && isset($data['choices'][0]['message']['content'])) {
$content = trim((string) $data['choices'][0]['message']['content']);
} elseif (is_string($raw)) {
$content = trim($raw);
}
$fieldAi = $this->parseFieldAiFromContent($content);
if ($fieldAi === '' && $content !== '') {
$fieldAi = $this->cleanFieldAiText($content);
}
return $fieldAi;
}
private function parseFieldAiFromContent($content)
{
$content = trim((string) $content);
if ($content === '') {
return '';
}
$content = preg_replace('/^```[a-zA-Z]*\s*|```$/m', '', $content);
if (preg_match('/\{.*\}/s', $content, $m)) {
$obj = json_decode($m[0], true);
if (is_array($obj) && !empty($obj['field_ai'])) {
return $this->cleanFieldAiText((string) $obj['field_ai']);
}
}
$obj = json_decode($content, true);
if (is_array($obj) && !empty($obj['field_ai'])) {
return $this->cleanFieldAiText((string) $obj['field_ai']);
}
return '';
}
private function cleanFieldAiText($text)
{
$text = trim((string) $text);
$text = trim($text, "\"' \t\n\r");
$text = preg_replace('/\s+/u', '', $text);
if (mb_strlen($text) > 200) {
$text = mb_substr($text, 0, 200);
}
return $text;
}
public function ensureReviewerInfoRow($userId)
{
$exists = Db::name('user_reviewer_info')->where('reviewer_id', $userId)->find();
if ($exists) {
return;
}
Db::name('user_reviewer_info')->insert([
'reviewer_id' => $userId,
'state' => 0,
]);
}
private function updateFieldAi($userId, $fieldAi, $status, $note)
{
$data = [
'field_ai' => mb_substr(trim((string) $fieldAi), 0, 512),
'field_ai_status' => intval($status),
'field_ai_utime' => time(),
];
Db::name('user_reviewer_info')->where('reviewer_id', $userId)->update($data);
if ($note !== '') {
$this->log('[FieldAi] user_id=' . $userId . ' status=' . $status . ' note=' . $note);
}
}
public function log($msg)
{
$line = date('Y-m-d H:i:s') . ' ' . $msg . PHP_EOL;
@file_put_contents($this->logFile, $line, FILE_APPEND);
}
/**
* user.field_ai 更新后,同步到同邮箱 expert方案 C 关联)。
*/
private function syncLinkedExperts($userId)
{
try {
$svc = new ExpertFieldAiService();
$svc->syncExpertsByUserId(intval($userId), true);
} catch (\Throwable $e) {
$this->log('[FieldAi] sync expert fail user_id=' . $userId . ' ' . $e->getMessage());
}
}
}