Files
tougao/application/common/ExpertFieldAiService.php
2026-06-08 17:18:55 +08:00

660 lines
24 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<?php
namespace app\common;
use app\common\service\LocalModelService;
use think\Db;
use think\Env;
use think\Exception;
use think\Queue;
/**
* Expert 领域总结(方案 C
* 1. 优先尝试 email 关联 user.field_ai少量
* 2. 主流程:根据 expert 论文/单位/检索词 AI 总结 field_ai
*/
class ExpertFieldAiService
{
const QUEUE_NAME = 'ExpertFieldAi';
const STATUS_PENDING = 0;
const STATUS_DONE = 1;
const STATUS_INSUFFICIENT = 2;
const STATUS_FAILED = 3;
const STATUS_NO_USER_LINK = 4;
const SOURCE_USER_LINK = 'user_link';
const SOURCE_AI = 'ai';
private $logFile;
/** @var bool|null */
private static $schemaReady = null;
public function __construct()
{
$this->logFile = ROOT_PATH . 'runtime' . DS . 'expert_field_ai.log';
try {
$this->ensureSchema();
} catch (\Throwable $e) {
$this->log('[ExpertFieldAi] ensureSchema fail: ' . $e->getMessage());
}
}
// ===================== 链式队列 =====================
/**
* 启动链式处理(关联 + AI主入口
*/
public function startChain($force = false, $delay = 1, $queue = '')
{
return $this->enqueueNext($delay, $queue, 0, $force);
}
/** @deprecated 兼容旧名 */
public function startLinkChain($force = false, $delay = 1, $queue = '')
{
return $this->startChain($force, $delay, $queue);
}
public function enqueueNext($delay = 1, $queue = '', $afterExpertId = 0, $force = false)
{
if ($queue === '') {
$queue = self::QUEUE_NAME;
}
$afterExpertId = intval($afterExpertId);
$expertId = $this->findNextPendingExpertId($afterExpertId, $force);
if ($expertId <= 0) {
$this->log('[ExpertFieldAi] chain finished after expert_id=' . $afterExpertId);
return false;
}
$data = [
'expert_id' => $expertId,
'queue' => $queue,
'force' => $force ? 1 : 0,
];
$jobClass = 'app\\api\\job\\ExpertFieldAiFill@fire';
if ($delay > 0) {
Queue::later($delay, $jobClass, $data, $queue);
} else {
Queue::push($jobClass, $data, $queue);
}
$this->log('[ExpertFieldAi] enqueued expert_id=' . $expertId . ' queue=' . $queue);
return true;
}
/** @deprecated */
public function enqueueNextLink($delay = 1, $queue = '', $afterExpertId = 0, $force = false)
{
return $this->enqueueNext($delay, $queue, $afterExpertId, $force);
}
// ===================== 主流程 =====================
/**
* 处理单个 expert先关联 user失败则 AI 总结。
*/
public function processExpert($expertId, $force = false)
{
$expertId = intval($expertId);
if ($expertId <= 0) {
return ['ok' => false, 'error' => 'invalid expert_id'];
}
$expert = Db::name('expert')->where('expert_id', $expertId)->find();
if (!$expert) {
return ['ok' => false, 'error' => 'expert not found'];
}
if (!$force
&& intval($expert['field_ai_status']) === self::STATUS_DONE
&& trim((string)$expert['field_ai']) !== '') {
return [
'ok' => true,
'skipped' => true,
'field_ai' => (string)$expert['field_ai'],
'source' => (string)($expert['field_ai_source'] ?? ''),
];
}
$linkResult = $this->tryLinkFromUser($expertId, $expert, $force);
if (!empty($linkResult['linked'])) {
return array_merge(['ok' => true, 'method' => 'user_link'], $linkResult);
}
if (!$this->isEligible($expertId, $expert)) {
$this->updateFieldAi($expertId, '', self::STATUS_INSUFFICIENT, '', 'insufficient papers/affiliation');
return ['ok' => true, 'insufficient' => true, 'method' => 'ai'];
}
try {
$context = $this->buildContext($expertId, $expert);
$fieldAi = $this->summarizeWithLlm($context);
if ($fieldAi === '') {
throw new Exception('LLM returned empty field_ai');
}
$this->updateFieldAi($expertId, $fieldAi, self::STATUS_DONE, self::SOURCE_AI, 'ai summarized');
return [
'ok' => true,
'method' => 'ai',
'field_ai' => $fieldAi,
'source' => self::SOURCE_AI,
];
} catch (\Throwable $e) {
$this->updateFieldAi($expertId, '', self::STATUS_FAILED, '', mb_substr($e->getMessage(), 0, 500));
$this->log('[ExpertFieldAi] expert_id=' . $expertId . ' ai fail: ' . $e->getMessage());
return ['ok' => false, 'method' => 'ai', 'error' => $e->getMessage()];
}
}
public function batchProcess(array $expertIds, $force = false)
{
$stats = ['total' => 0, 'linked' => 0, 'ai' => 0, 'skipped' => 0, 'insufficient' => 0, 'failed' => 0];
$details = [];
foreach ($expertIds as $expertId) {
$expertId = intval($expertId);
if ($expertId <= 0) {
continue;
}
$result = $this->processExpert($expertId, $force);
$stats['total']++;
if (empty($result['ok'])) {
$stats['failed']++;
} elseif (!empty($result['skipped'])) {
$stats['skipped']++;
} elseif (!empty($result['linked']) || (isset($result['method']) && $result['method'] === 'user_link')) {
$stats['linked']++;
} elseif (!empty($result['insufficient'])) {
$stats['insufficient']++;
} elseif (isset($result['method']) && $result['method'] === 'ai') {
$stats['ai']++;
}
$details[] = array_merge(['expert_id' => $expertId], $result);
}
return array_merge($stats, ['details' => $details]);
}
// ===================== 关联 user辅助 =====================
/**
* 仅做 user 关联(不触发 AI供调试。
*/
public function linkFromUser($expertId, $force = false)
{
$expertId = intval($expertId);
$expert = Db::name('expert')->where('expert_id', $expertId)->find();
if (!$expert) {
return ['ok' => false, 'error' => 'expert not found'];
}
$result = $this->tryLinkFromUser($expertId, $expert, $force);
if (empty($result['linked']) && empty($result['skipped'])) {
$this->updateFieldAi($expertId, '', self::STATUS_NO_USER_LINK, '', 'link only: no user match');
}
return array_merge(['ok' => true], $result);
}
public function batchLinkFromUser(array $expertIds, $force = false)
{
$linked = 0;
$skipped = 0;
$noLink = 0;
$failed = 0;
$details = [];
foreach ($expertIds as $expertId) {
$expertId = intval($expertId);
if ($expertId <= 0) {
continue;
}
$result = $this->linkFromUser($expertId, $force);
if (empty($result['ok'])) {
$failed++;
} elseif (!empty($result['skipped'])) {
$skipped++;
} elseif (!empty($result['linked'])) {
$linked++;
} else {
$noLink++;
}
$details[] = array_merge(['expert_id' => $expertId], $result);
}
return [
'total' => count($details),
'linked' => $linked,
'skipped' => $skipped,
'no_link' => $noLink,
'failed' => $failed,
'details' => $details,
];
}
private function tryLinkFromUser($expertId, $expert = null, $force = false)
{
if ($expert === null) {
$expert = Db::name('expert')->where('expert_id', intval($expertId))->find();
}
if (!$expert) {
return ['linked' => false, 'reason' => 'expert not found'];
}
if (!$force
&& intval($expert['field_ai_status']) === self::STATUS_DONE
&& trim((string)$expert['field_ai']) !== '') {
return [
'linked' => false,
'skipped' => true,
'field_ai' => (string)$expert['field_ai'],
'source' => (string)($expert['field_ai_source'] ?? ''),
];
}
$email = strtolower(trim((string)($expert['email'] ?? '')));
if ($email === '') {
return ['linked' => false, 'reason' => 'empty email'];
}
$user = Db::name('user')->where('email', $email)->where('state', 0)->field('user_id,email,realname')->find();
if (!$user) {
return ['linked' => false, 'reason' => 'user not found'];
}
$uri = Db::name('user_reviewer_info')
->where('reviewer_id', intval($user['user_id']))
->where('state', 0)
->find();
$fieldAi = $uri ? trim((string)($uri['field_ai'] ?? '')) : '';
if ($fieldAi === '' || intval($uri['field_ai_status'] ?? 0) !== UserFieldAiService::STATUS_DONE) {
return ['linked' => false, 'user_id' => intval($user['user_id']), 'reason' => 'user has no field_ai'];
}
$this->updateFieldAi(intval($expertId), $fieldAi, self::STATUS_DONE, self::SOURCE_USER_LINK, 'linked from user_id=' . $user['user_id']);
return [
'linked' => true,
'field_ai' => $fieldAi,
'user_id' => intval($user['user_id']),
'source' => self::SOURCE_USER_LINK,
];
}
public function syncExpertsByUserId($userId, $force = false)
{
$userId = intval($userId);
$user = Db::name('user')->where('user_id', $userId)->where('state', 0)->field('user_id,email')->find();
if (!$user || trim((string)$user['email']) === '') {
return ['ok' => false, 'error' => 'user not found'];
}
$email = strtolower(trim((string)$user['email']));
$expertIds = Db::name('expert')->where('email', $email)->where('state', '<>', 5)->column('expert_id');
if (empty($expertIds)) {
return ['ok' => true, 'synced' => 0, 'msg' => 'no expert with same email'];
}
return array_merge(['ok' => true], $this->batchLinkFromUser($expertIds, $force));
}
// ===================== AI 上下文 =====================
public function isEligible($expertId, $expert = null)
{
if ($expert === null) {
$expert = Db::name('expert')->where('expert_id', intval($expertId))->find();
}
if (!$expert) {
return false;
}
if (trim((string)($expert['affiliation'] ?? '')) !== '') {
return true;
}
$fieldRows = Db::name('expert_field')
->where('expert_id', intval($expertId))
->where('state', 0)
->field('field,paper_title,paper_journal')
->select();
foreach ($fieldRows as $row) {
if (trim((string)($row['paper_title'] ?? '')) !== '') {
return true;
}
if (trim((string)($row['field'] ?? '')) !== '') {
return true;
}
}
return false;
}
public function buildContext($expertId, $expert = null)
{
if ($expert === null) {
$expert = Db::name('expert')->where('expert_id', intval($expertId))->find();
}
$fieldRows = Db::name('expert_field')
->where('expert_id', intval($expertId))
->where('state', 0)
->order('expert_field_id desc')
->select();
$searchKeywords = [];
$papers = [];
$seenPaper = [];
foreach ($fieldRows as $row) {
$kw = trim((string)($row['field'] ?? ''));
if ($kw !== '') {
$searchKeywords[] = $kw;
}
$title = trim((string)($row['paper_title'] ?? ''));
if ($title === '') {
continue;
}
$paperKey = md5($title . '|' . ($row['paper_article_id'] ?? ''));
if (isset($seenPaper[$paperKey])) {
continue;
}
$seenPaper[$paperKey] = true;
$papers[] = [
'title' => mb_substr($title, 0, 300),
'journal' => mb_substr(trim((string)($row['paper_journal'] ?? '')), 0, 120),
'source' => trim((string)($row['source'] ?? '')),
'keyword' => $kw,
];
}
$maxPapers = max(1, min(15, (int)Env::get('expert_field_ai.max_papers', 8)));
$papers = array_slice($papers, 0, $maxPapers);
$searchKeywords = array_values(array_unique(array_filter($searchKeywords)));
// t_expert.country 已存国家英文名,无需再查 country 表
$countryName = trim((string)($expert['country'] ?? ''));
if ($countryName === '') {
$countryId = intval($expert['country_id'] ?? 0);
if ($countryId > 0) {
$row = Db::name('country')->where('country_id', $countryId)->find();
if ($row) {
$countryName = (string)($row['en_name'] ?? ($row['zh_name'] ?? ''));
}
}
}
return [
'expert' => [
'name' => trim((string)($expert['name'] ?? '')),
'email' => trim((string)($expert['email'] ?? '')),
'affiliation' => trim((string)($expert['affiliation'] ?? '')),
'country' => $countryName,
'source' => trim((string)($expert['source'] ?? '')),
],
'search_keywords' => $searchKeywords,
'papers' => $papers,
'note' => 'search_keywords 是 PubMed 检索词,不代表本人领域;请以论文标题与单位为准。',
];
}
// ===================== 预览 / 统计 =====================
public function preview($expertId)
{
$expertId = intval($expertId);
$expert = Db::name('expert')->where('expert_id', $expertId)->find();
if (!$expert) {
return ['ok' => false, 'error' => 'expert not found'];
}
$linkPreview = $this->previewLink($expertId);
$eligible = $this->isEligible($expertId, $expert);
$context = $eligible ? $this->buildContext($expertId, $expert) : null;
return [
'ok' => true,
'expert_id' => $expertId,
'expert_field_ai' => (string)($expert['field_ai'] ?? ''),
'expert_field_ai_status' => intval($expert['field_ai_status'] ?? 0),
'can_link_user' => !empty($linkPreview['can_link']),
'link_preview' => $linkPreview,
'eligible_for_ai' => $eligible,
'context_preview' => $context,
];
}
public function previewLink($expertId)
{
$expertId = intval($expertId);
$expert = Db::name('expert')->where('expert_id', $expertId)->find();
if (!$expert) {
return ['ok' => false, 'error' => 'expert not found'];
}
$email = strtolower(trim((string)($expert['email'] ?? '')));
$user = null;
$uri = null;
if ($email !== '') {
$user = Db::name('user')->where('email', $email)->where('state', 0)->field('user_id,email,realname')->find();
if ($user) {
$uri = Db::name('user_reviewer_info')
->where('reviewer_id', intval($user['user_id']))
->where('state', 0)
->find();
}
}
$canLink = $user && $uri
&& trim((string)($uri['field_ai'] ?? '')) !== ''
&& intval($uri['field_ai_status']) === UserFieldAiService::STATUS_DONE;
return [
'ok' => true,
'expert_id' => $expertId,
'expert_email' => $email,
'matched_user_id' => $user ? intval($user['user_id']) : 0,
'matched_user_name' => $user ? (string)$user['realname'] : '',
'user_field_ai' => $uri ? (string)($uri['field_ai'] ?? '') : '',
'user_field_ai_status' => $uri ? intval($uri['field_ai_status']) : 0,
'can_link' => $canLink,
];
}
// ===================== LLM =====================
private function summarizeWithLlm(array $context)
{
$payloadJson = json_encode($context, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES);
$systemPrompt = '你是学术领域分类助手。根据专家的单位、论文标题与 PubMed 检索上下文,用简体中文总结该专家最主要的研究领域。'
. '注意search_keywords 只是检索词,不可直接当作领域结论,应结合 paper 标题与 affiliation 判断。'
. '要求精确、简洁13 个中文领域词或短短语,用顿号分隔;不要解释、不要英文。'
. '只输出 JSON{"field_ai":"..."}。';
$userPrompt = "请根据以下 JSON 资料总结该专家的主要研究领域:\n" . $payloadJson;
// 按上下文长度动态选模型(小: base.model_url1 / 大: base.model_url
$svc = new LocalModelService();
$res = $svc->chat([
['role' => 'system', 'content' => $systemPrompt],
['role' => 'user', 'content' => $userPrompt],
], ['temperature' => 0.2]);
if (empty($res['ok'])) {
throw new Exception('LLM error: ' . (string)($res['error'] ?? 'unknown'));
}
$this->log('[ExpertFieldAi] llm tier=' . ($res['tier'] ?? '') . ' ctx_len=' . ($res['context_len'] ?? 0) . ' url=' . ($res['url'] ?? ''));
$content = trim((string)($res['content'] ?? ''));
$fieldAi = $this->parseFieldAiFromContent($content);
if ($fieldAi === '' && $content !== '') {
$fieldAi = $this->cleanFieldAiText($content);
}
return $fieldAi;
}
private function parseFieldAiFromContent($content)
{
$content = trim((string)$content);
if ($content === '') {
return '';
}
$content = preg_replace('/^```[a-zA-Z]*\s*|```$/m', '', $content);
if (preg_match('/\{.*\}/s', $content, $m)) {
$obj = json_decode($m[0], true);
if (is_array($obj) && !empty($obj['field_ai'])) {
return $this->cleanFieldAiText((string)$obj['field_ai']);
}
}
$obj = json_decode($content, true);
if (is_array($obj) && !empty($obj['field_ai'])) {
return $this->cleanFieldAiText((string)$obj['field_ai']);
}
return '';
}
private function cleanFieldAiText($text)
{
$text = trim((string)$text);
$text = trim($text, "\"' \t\n\r");
$text = preg_replace('/\s+/u', '', $text);
if (mb_strlen($text) > 200) {
$text = mb_substr($text, 0, 200);
}
return $text;
}
// ===================== 内部工具 =====================
private function findNextPendingExpertId($afterExpertId, $force)
{
$batch = 50;
$cursor = intval($afterExpertId);
while (true) {
$query = Db::name('expert')
->where('expert_id', '>', $cursor)
->where('state', '<>', 5);
if (!$force) {
$query->where(function ($q) {
$q->where('field_ai_status', self::STATUS_PENDING)
->whereOr('field_ai_status', self::STATUS_FAILED)
->whereOr('field_ai_status', self::STATUS_NO_USER_LINK);
});
}
$ids = $query->order('expert_id asc')->limit($batch)->column('expert_id');
if (empty($ids)) {
return 0;
}
foreach ($ids as $expertId) {
$expertId = intval($expertId);
$cursor = $expertId;
if (!$force) {
$row = Db::name('expert')->where('expert_id', $expertId)->field('field_ai,field_ai_status')->find();
if ($row
&& intval($row['field_ai_status']) === self::STATUS_DONE
&& trim((string)$row['field_ai']) !== '') {
continue;
}
}
return $expertId;
}
}
}
private function updateFieldAi($expertId, $fieldAi, $status, $source, $note)
{
$this->ensureSchema();
$data = [
'field_ai' => mb_substr(trim((string)$fieldAi), 0, 512),
'field_ai_status' => intval($status),
'field_ai_utime' => time(),
];
if ($this->hasColumn('field_ai_source')) {
$data['field_ai_source'] = mb_substr(trim((string)$source), 0, 32);
}
Db::name('expert')->where('expert_id', intval($expertId))->update($data);
if ($note !== '') {
$this->log('[ExpertFieldAi] expert_id=' . $expertId . ' status=' . $status . ' note=' . $note);
}
}
/**
* 自动补全 t_expert 上缺失的 field_ai 字段(可重复执行)。
*/
public function ensureSchema()
{
if (self::$schemaReady === true) {
return;
}
$table = config('database.prefix') . 'expert';
$columns = Db::query('SHOW COLUMNS FROM `' . $table . '`');
$existing = [];
foreach ($columns as $col) {
$existing[$col['Field']] = true;
}
$alters = [];
if (!isset($existing['field_ai'])) {
$alters[] = "ADD COLUMN `field_ai` VARCHAR(512) NOT NULL DEFAULT '' COMMENT 'AI总结的主要研究领域(中文)' AFTER `affiliation`";
$existing['field_ai'] = true;
}
if (!isset($existing['field_ai_status'])) {
$alters[] = "ADD COLUMN `field_ai_status` TINYINT NOT NULL DEFAULT 0 COMMENT '0待处理 1已生成 2资料不足 3失败 4无user待AI' AFTER `field_ai`";
$existing['field_ai_status'] = true;
}
if (!isset($existing['field_ai_utime'])) {
$alters[] = "ADD COLUMN `field_ai_utime` INT NOT NULL DEFAULT 0 COMMENT 'field_ai更新时间' AFTER `field_ai_status`";
$existing['field_ai_utime'] = true;
}
if (!isset($existing['field_ai_source'])) {
$alters[] = "ADD COLUMN `field_ai_source` VARCHAR(32) NOT NULL DEFAULT '' COMMENT '来源: user_link / ai' AFTER `field_ai_utime`";
$existing['field_ai_source'] = true;
}
if (!empty($alters)) {
Db::execute('ALTER TABLE `' . $table . '` ' . implode(', ', $alters));
$this->log('[ExpertFieldAi] schema patched: ' . implode('; ', $alters));
}
self::$schemaReady = true;
}
private function hasColumn($column)
{
$this->ensureSchema();
$table = config('database.prefix') . 'expert';
$columns = Db::query('SHOW COLUMNS FROM `' . $table . '` LIKE \'' . addslashes($column) . '\'');
return !empty($columns);
}
public function statusLabel($status)
{
$map = [
self::STATUS_PENDING => 'pending',
self::STATUS_DONE => 'done',
self::STATUS_INSUFFICIENT => 'insufficient',
self::STATUS_FAILED => 'failed',
self::STATUS_NO_USER_LINK => 'no_user_link',
];
return isset($map[$status]) ? $map[$status] : 'unknown';
}
public function log($msg)
{
$line = date('Y-m-d H:i:s') . ' ' . $msg . PHP_EOL;
@file_put_contents($this->logFile, $line, FILE_APPEND);
}
}