464 lines
17 KiB
PHP
464 lines
17 KiB
PHP
<?php
|
||
|
||
namespace app\common;
|
||
|
||
use think\Db;
|
||
use think\Env;
|
||
use think\Exception;
|
||
use think\Queue;
|
||
|
||
/**
|
||
* 根据投稿记录 / 审稿人资料,用大模型总结用户主领域(中文)写入 field_ai。
|
||
* 队列链:UserFieldAiFill → 处理一条 → enqueueNextFieldAi → 下一条。
|
||
*/
|
||
class UserFieldAiService
|
||
{
|
||
const QUEUE_NAME = 'UserFieldAi';
|
||
|
||
const STATUS_PENDING = 0;
|
||
const STATUS_DONE = 1;
|
||
const STATUS_INSUFFICIENT = 2;
|
||
const STATUS_FAILED = 3;
|
||
|
||
private $logFile;
|
||
|
||
public function __construct()
|
||
{
|
||
$this->logFile = ROOT_PATH . 'runtime' . DS . 'user_field_ai.log';
|
||
}
|
||
|
||
/**
|
||
* 启动链式处理(从 user_id=0 之后找第一个待处理用户)。
|
||
*
|
||
* @param bool $force true 时重算已生成用户
|
||
* @return bool 是否已推入首条 job
|
||
*/
|
||
public function startChain($force = false, $delay = 1, $queue = '')
|
||
{
|
||
return $this->enqueueNextFieldAi($delay, $queue, 0, $force);
|
||
}
|
||
|
||
/**
|
||
* 链式:找 user_id > $afterUserId 的下一位待处理用户并入队。
|
||
*/
|
||
public function enqueueNextFieldAi($delay = 1, $queue = '', $afterUserId = 0, $force = false)
|
||
{
|
||
if ($queue === '') {
|
||
$queue = self::QUEUE_NAME;
|
||
}
|
||
$afterUserId = intval($afterUserId);
|
||
$userId = $this->findNextPendingUserId($afterUserId, $force);
|
||
if ($userId <= 0) {
|
||
$this->log('[FieldAi] chain finished after user_id=' . $afterUserId . ' force=' . ($force ? '1' : '0'));
|
||
return false;
|
||
}
|
||
|
||
$data = [
|
||
'user_id' => $userId,
|
||
'queue' => $queue,
|
||
'force' => $force ? 1 : 0,
|
||
];
|
||
$jobClass = 'app\\api\\job\\UserFieldAiFill@fire';
|
||
if ($delay > 0) {
|
||
Queue::later($delay, $jobClass, $data, $queue);
|
||
} else {
|
||
Queue::push($jobClass, $data, $queue);
|
||
}
|
||
$this->log('[FieldAi] enqueued user_id=' . $userId . ' queue=' . $queue);
|
||
return true;
|
||
}
|
||
|
||
/**
|
||
* 处理单个用户(队列 Job 或同步调试)。
|
||
*
|
||
* @return array{ok:bool, skipped?:bool, insufficient?:bool, field_ai?:string, error?:string}
|
||
*/
|
||
public function processUser($userId, $force = false)
|
||
{
|
||
$userId = intval($userId);
|
||
if ($userId <= 0) {
|
||
return ['ok' => false, 'error' => 'invalid user_id'];
|
||
}
|
||
|
||
$this->ensureReviewerInfoRow($userId);
|
||
$uri = Db::name('user_reviewer_info')->where('reviewer_id', $userId)->find();
|
||
if (!$uri) {
|
||
return ['ok' => false, 'error' => 'reviewer_info missing'];
|
||
}
|
||
|
||
if (!$force && intval($uri['field_ai_status']) === self::STATUS_DONE && trim((string)$uri['field_ai']) !== '') {
|
||
return ['ok' => true, 'skipped' => true, 'field_ai' => (string)$uri['field_ai']];
|
||
}
|
||
|
||
if (!$this->isEligible($userId, $uri)) {
|
||
$this->updateFieldAi($userId, '', self::STATUS_INSUFFICIENT, 'insufficient profile/articles');
|
||
return ['ok' => true, 'insufficient' => true];
|
||
}
|
||
|
||
try {
|
||
$context = $this->buildContext($userId, $uri);
|
||
$fieldAi = $this->summarizeWithLlm($context);
|
||
if ($fieldAi === '') {
|
||
throw new Exception('LLM returned empty field');
|
||
}
|
||
$this->updateFieldAi($userId, $fieldAi, self::STATUS_DONE, '');
|
||
return ['ok' => true, 'field_ai' => $fieldAi];
|
||
} catch (\Throwable $e) {
|
||
$this->updateFieldAi($userId, '', self::STATUS_FAILED, mb_substr($e->getMessage(), 0, 500));
|
||
$this->log('[FieldAi] user_id=' . $userId . ' fail: ' . $e->getMessage());
|
||
return ['ok' => false, 'error' => $e->getMessage()];
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 是否满足「可总结」:有投稿 或 审稿人资料较全。
|
||
*/
|
||
public function isEligible($userId, $uri = null)
|
||
{
|
||
if ($this->hasSubmittedArticles($userId)) {
|
||
return true;
|
||
}
|
||
if ($uri === null) {
|
||
$uri = Db::name('user_reviewer_info')->where('reviewer_id', $userId)->find();
|
||
}
|
||
return $this->isReviewerProfileComplete($uri);
|
||
}
|
||
|
||
public function hasSubmittedArticles($userId)
|
||
{
|
||
$n = Db::name('article')
|
||
->where('user_id', intval($userId))
|
||
->where('title', '<>', '')
|
||
->count();
|
||
return $n > 0;
|
||
}
|
||
|
||
/**
|
||
* 审稿人资料字段填充数达到阈值视为「较全」。
|
||
*/
|
||
public function isReviewerProfileComplete($uri)
|
||
{
|
||
if (!$uri || !is_array($uri)) {
|
||
return false;
|
||
}
|
||
$minFilled = max(3, (int) Env::get('user_field_ai.min_profile_fields', 4));
|
||
$keys = ['field', 'company', 'country', 'technical', 'introduction', 'department', 'website'];
|
||
$filled = 0;
|
||
foreach ($keys as $k) {
|
||
if (!empty($uri[$k]) && trim((string)$uri[$k]) !== '') {
|
||
$filled++;
|
||
}
|
||
}
|
||
if (!empty($uri['major']) && trim((string)$uri['major']) !== '' && trim((string)$uri['major']) !== '0') {
|
||
$filled++;
|
||
}
|
||
$majorCount = Db::name('major_to_user')->where('user_id', intval($uri['reviewer_id']))->where('state', 0)->count();
|
||
if ($majorCount > 0) {
|
||
$filled++;
|
||
}
|
||
return $filled >= $minFilled;
|
||
}
|
||
|
||
private function findNextPendingUserId($afterUserId, $force)
|
||
{
|
||
$batch = max(20, (int) Env::get('user_field_ai.scan_batch', 80));
|
||
$cursor = intval($afterUserId);
|
||
|
||
while (true) {
|
||
$query = Db::name('user')->alias('u')
|
||
->join('t_user_reviewer_info uri', 'uri.reviewer_id = u.user_id',"left")
|
||
->where('u.user_id', '>', $cursor);
|
||
if (!$force) {
|
||
$query->where(function ($q) {
|
||
$q->where('uri.field_ai_status', self::STATUS_PENDING)
|
||
->whereOr('uri.field_ai_status', self::STATUS_FAILED)
|
||
->whereOr('uri.reviewer_info_id', 'null');
|
||
});
|
||
}
|
||
$ids = $query->order('u.user_id asc')->limit($batch)->column('u.user_id');
|
||
|
||
if (empty($ids)) {
|
||
return 0;
|
||
}
|
||
|
||
foreach ($ids as $uid) {
|
||
$uid = intval($uid);
|
||
$cursor = $uid;
|
||
$this->ensureReviewerInfoRow($uid);
|
||
$uri = Db::name('user_reviewer_info')->where('reviewer_id', $uid)->find();
|
||
if (!$force && intval($uri['field_ai_status']) === self::STATUS_DONE) {
|
||
continue;
|
||
}
|
||
if (!$force && intval($uri['field_ai_status']) === self::STATUS_INSUFFICIENT) {
|
||
continue;
|
||
}
|
||
if ($this->isEligible($uid, $uri)) {
|
||
return $uid;
|
||
}
|
||
if (!$force) {
|
||
$this->updateFieldAi($uid, '', self::STATUS_INSUFFICIENT, 'auto skip: insufficient data');
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
private function buildContext($userId, array $uri)
|
||
{
|
||
$user = Db::name('user')->where('user_id', $userId)->field('user_id,realname,email,account')->find();
|
||
$majorTitles = $this->resolveMajorTitles($userId, $uri);
|
||
|
||
$maxArticles = max(1, min(10, (int) Env::get('user_field_ai.max_articles', 5)));
|
||
$articles = Db::name('article')
|
||
->where('user_id', $userId)
|
||
->where('title', '<>', '')
|
||
->order('article_id desc')
|
||
->limit($maxArticles)
|
||
->field('article_id,title,keywords,abstrart,journal_id,ctime')
|
||
->select();
|
||
|
||
$journalNames = [];
|
||
if (!empty($articles)) {
|
||
$jids = array_unique(array_filter(array_column($articles, 'journal_id')));
|
||
if (!empty($jids)) {
|
||
$journalNames = Db::name('journal')->where('journal_id', 'in', $jids)->column('title', 'journal_id');
|
||
}
|
||
}
|
||
|
||
$articleBlocks = [];
|
||
foreach ($articles as $a) {
|
||
$jid = intval($a['journal_id']);
|
||
$articleBlocks[] = [
|
||
'title' => (string) $a['title'],
|
||
'journal' => isset($journalNames[$jid]) ? (string) $journalNames[$jid] : '',
|
||
'keywords' => (string) ($a['keywords'] ?? ''),
|
||
'abstract' => mb_substr(trim((string) ($a['abstrart'] ?? '')), 0, 800),
|
||
];
|
||
}
|
||
|
||
return [
|
||
'user' => [
|
||
'realname' => $user ? (string) $user['realname'] : '',
|
||
'email' => $user ? (string) $user['email'] : '',
|
||
],
|
||
'profile' => [
|
||
'field' => trim((string) ($uri['field'] ?? '')),
|
||
'technical' => trim((string) ($uri['technical'] ?? '')),
|
||
'company' => trim((string) ($uri['company'] ?? '')),
|
||
'department' => trim((string) ($uri['department'] ?? '')),
|
||
'country' => trim((string) ($uri['country'] ?? '')),
|
||
'introduction' => mb_substr(trim((string) ($uri['introduction'] ?? '')), 0, 1200),
|
||
'website' => trim((string) ($uri['website'] ?? '')),
|
||
'majors' => $majorTitles,
|
||
],
|
||
'articles' => $articleBlocks,
|
||
];
|
||
}
|
||
|
||
private function resolveMajorTitles($userId, array $uri)
|
||
{
|
||
$titles = [];
|
||
$ids = Db::name('major_to_user')->where('user_id', $userId)->where('state', 0)->column('major_id');
|
||
if (!empty($ids)) {
|
||
$titles = Db::name('reviewer_major')->where('major_id', 'in', $ids)->where('state', 0)->column('title');
|
||
}
|
||
if (empty($titles) && !empty($uri['major'])) {
|
||
$legacy = array_filter(array_map('intval', explode(',', (string) $uri['major'])));
|
||
if (!empty($legacy)) {
|
||
$titles = Db::name('reviewer_major')->where('major_id', 'in', $legacy)->column('title');
|
||
}
|
||
}
|
||
return array_values(array_unique(array_filter(array_map('trim', $titles))));
|
||
}
|
||
|
||
/**
|
||
* 解析 OpenAI 兼容 chat/completions 完整 URL。
|
||
* base.model_url 常为站点根(如 http://chat.taimed.cn),直接 POST 会 404。
|
||
*/
|
||
private function resolveLlmChatUrl()
|
||
{
|
||
$candidates = [
|
||
// Env::get('user_field_ai.chat_url', ''),
|
||
// Env::get('promotion.promotion_llm_url', ''),
|
||
// Env::get('expert_country_chat_url', ''),
|
||
// Env::get('citation_chat_url', ''),
|
||
Env::get('base.model_url', ''),
|
||
];
|
||
foreach ($candidates as $u) {
|
||
$u = trim((string) $u);
|
||
if ($u === '') {
|
||
continue;
|
||
}
|
||
$normalized = $this->normalizeChatCompletionsUrl($u);
|
||
if ($normalized !== '') {
|
||
return $normalized;
|
||
}
|
||
}
|
||
return '';
|
||
}
|
||
|
||
private function normalizeChatCompletionsUrl($url)
|
||
{
|
||
$url = trim((string) $url);
|
||
if ($url === '') {
|
||
return '';
|
||
}
|
||
if (stripos($url, 'chat/completions') !== false) {
|
||
return $url;
|
||
}
|
||
return rtrim($url, '/') . '/v1/chat/completions';
|
||
}
|
||
|
||
private function resolveLlmModel()
|
||
{
|
||
$candidates = [
|
||
Env::get('user_field_ai.chat_model', ''),
|
||
Env::get('base.model', ''),
|
||
Env::get('promotion.promotion_llm_model', ''),
|
||
Env::get('expert_country_chat_model', ''),
|
||
Env::get('citation_chat_model', ''),
|
||
'gpt-4.1',
|
||
];
|
||
foreach ($candidates as $m) {
|
||
$m = trim((string) $m);
|
||
if ($m !== '' && strtolower($m) !== 'your-model-name') {
|
||
return $m;
|
||
}
|
||
}
|
||
return '';
|
||
}
|
||
|
||
private function summarizeWithLlm(array $context)
|
||
{
|
||
$url = $this->resolveLlmChatUrl();
|
||
$model = $this->resolveLlmModel();
|
||
$apiKey = trim((string) Env::get('user_field_ai.chat_api_key', Env::get('expert_country_chat_api_key', Env::get('citation_chat_api_key', ''))));
|
||
if ($url === '' || $model === '') {
|
||
throw new Exception('user_field_ai chat not configured (set user_field_ai.chat_url or promotion PROMOTION_LLM_URL / base.model_url)');
|
||
}
|
||
|
||
$payloadJson = json_encode($context, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES);
|
||
$messages = [
|
||
[
|
||
'role' => 'system',
|
||
'content' => '你是学术领域分类助手。根据用户的投稿与个人资料,用简体中文给出该用户最主要的研究领域总结。'
|
||
. '要求:精确、简洁,1~3 个中文领域词或短短语,用顿号分隔;不要解释、不要英文、不要 JSON 以外的多余文字。'
|
||
. '只输出 JSON:{"field_ai":"..."}。',
|
||
],
|
||
[
|
||
'role' => 'user',
|
||
'content' => "请根据以下 JSON 资料总结该用户的主要研究领域:\n" . $payloadJson,
|
||
],
|
||
];
|
||
|
||
$body = [
|
||
'model' => $model,
|
||
'temperature' => 0.2,
|
||
'messages' => $messages,
|
||
];
|
||
|
||
$ch = curl_init();
|
||
curl_setopt_array($ch, [
|
||
CURLOPT_URL => $url,
|
||
CURLOPT_POST => true,
|
||
CURLOPT_POSTFIELDS => json_encode($body, JSON_UNESCAPED_UNICODE),
|
||
CURLOPT_RETURNTRANSFER => true,
|
||
CURLOPT_CONNECTTIMEOUT => 15,
|
||
CURLOPT_TIMEOUT => max(30, (int) Env::get('user_field_ai.timeout', 90)),
|
||
CURLOPT_HTTPHEADER => array_filter([
|
||
'Content-Type: application/json',
|
||
$apiKey !== '' ? 'Authorization: Bearer ' . $apiKey : null,
|
||
]),
|
||
]);
|
||
$raw = curl_exec($ch);
|
||
$code = (int) curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
||
$err = curl_error($ch);
|
||
curl_close($ch);
|
||
|
||
if ($raw === false) {
|
||
throw new Exception('LLM curl error: ' . $err);
|
||
}
|
||
if ($code < 200 || $code >= 300) {
|
||
$hint = ($code === 404 && stripos($url, 'chat/completions') === false)
|
||
? ' (chat_url may be missing /v1/chat/completions)'
|
||
: '';
|
||
throw new Exception('LLM HTTP ' . $code . $hint . ': ' . mb_substr((string) $raw, 0, 400));
|
||
}
|
||
|
||
$data = json_decode($raw, true);
|
||
$content = '';
|
||
if (is_array($data) && isset($data['choices'][0]['message']['content'])) {
|
||
$content = trim((string) $data['choices'][0]['message']['content']);
|
||
} elseif (is_string($raw)) {
|
||
$content = trim($raw);
|
||
}
|
||
|
||
$fieldAi = $this->parseFieldAiFromContent($content);
|
||
if ($fieldAi === '' && $content !== '') {
|
||
$fieldAi = $this->cleanFieldAiText($content);
|
||
}
|
||
return $fieldAi;
|
||
}
|
||
|
||
private function parseFieldAiFromContent($content)
|
||
{
|
||
$content = trim((string) $content);
|
||
if ($content === '') {
|
||
return '';
|
||
}
|
||
$content = preg_replace('/^```[a-zA-Z]*\s*|```$/m', '', $content);
|
||
if (preg_match('/\{.*\}/s', $content, $m)) {
|
||
$obj = json_decode($m[0], true);
|
||
if (is_array($obj) && !empty($obj['field_ai'])) {
|
||
return $this->cleanFieldAiText((string) $obj['field_ai']);
|
||
}
|
||
}
|
||
$obj = json_decode($content, true);
|
||
if (is_array($obj) && !empty($obj['field_ai'])) {
|
||
return $this->cleanFieldAiText((string) $obj['field_ai']);
|
||
}
|
||
return '';
|
||
}
|
||
|
||
private function cleanFieldAiText($text)
|
||
{
|
||
$text = trim((string) $text);
|
||
$text = trim($text, "\"' \t\n\r");
|
||
$text = preg_replace('/\s+/u', '', $text);
|
||
if (mb_strlen($text) > 200) {
|
||
$text = mb_substr($text, 0, 200);
|
||
}
|
||
return $text;
|
||
}
|
||
|
||
public function ensureReviewerInfoRow($userId)
|
||
{
|
||
$exists = Db::name('user_reviewer_info')->where('reviewer_id', $userId)->find();
|
||
if ($exists) {
|
||
return;
|
||
}
|
||
Db::name('user_reviewer_info')->insert([
|
||
'reviewer_id' => $userId,
|
||
'state' => 0,
|
||
]);
|
||
}
|
||
|
||
private function updateFieldAi($userId, $fieldAi, $status, $note)
|
||
{
|
||
$data = [
|
||
'field_ai' => mb_substr(trim((string) $fieldAi), 0, 512),
|
||
'field_ai_status' => intval($status),
|
||
'field_ai_utime' => time(),
|
||
];
|
||
Db::name('user_reviewer_info')->where('reviewer_id', $userId)->update($data);
|
||
if ($note !== '') {
|
||
$this->log('[FieldAi] user_id=' . $userId . ' status=' . $status . ' note=' . $note);
|
||
}
|
||
}
|
||
|
||
public function log($msg)
|
||
{
|
||
$line = date('Y-m-d H:i:s') . ' ' . $msg . PHP_EOL;
|
||
@file_put_contents($this->logFile, $line, FILE_APPEND);
|
||
}
|
||
}
|