Files
tougao/application/common/PromotionLlmService.php
2026-04-24 14:50:16 +08:00

325 lines
12 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<?php
namespace app\common;
use think\Env;
/**
* 推广邮件 LLM 服务
*
* 作用:基于 expert 的代表作 (paper_title)、expert 的研究领域、期刊信息 (name / scope / fields 等)
* 通过一次 LLM 调用同时生成两段邮件内容:
* - description 个性化赞美 + 邀稿段落(对应模板变量 {{ai_content_analysis}} / {{llm_description}}
* - advised_topics "我们特别关注 X / Y / Z 领域" 段落(对应模板变量 {{ai_advised_topics}}
*
* 单次调用返回 JSON避免两次 LLM 请求导致的延迟翻倍;任一段失败可独立兜底。
*
* 配置(.env 的 [promotion] 段):
* PROMOTION_LLM_URL chat/completions 接口地址
* PROMOTION_LLM_MODEL 模型名
* PROMOTION_LLM_TIMEOUT 超时时间(秒),默认 30
* PROMOTION_LLM_FALLBACK description 兜底文案
* PROMOTION_LLM_ADVISED_FALLBACK advised_topics 兜底文案
*
* 状态约定:
* 1 = LLM 成功生成
* 2 = LLM 调用失败 / 解析失败,使用兜底
* 3 = 前置条件不足(缺代表作、缺交集等),使用兜底
*/
class PromotionLlmService
{
private $url;
private $model;
private $timeout;
private $apiKey;
private $fallback;
private $advisedFallback;
public function __construct()
{
$this->url = trim((string)Env::get('promotion.promotion_llm_url', ''));
$this->model = trim((string)Env::get('promotion.promotion_llm_model', ''));
$this->timeout = max(5, intval(Env::get('promotion.promotion_llm_timeout', 30)));
$this->apiKey = trim((string)Env::get('promotion.promotion_llm_api_key', ''));
$this->fallback = trim((string)Env::get('promotion.promotion_llm_fallback',
'Your recent work aligns closely with the scope of our journal, and we would be honored to consider a contribution from you.'));
$this->advisedFallback = trim((string)Env::get('promotion.promotion_llm_advised_fallback',
'We are especially interested in the research directions that align with your expertise, and warmly welcome your future submissions in these areas.'));
}
/**
* 一次 LLM 调用生成邮件两段内容。
*
* @param array $expert expert 行(含 name / representative_work_title / fields 等)
* @param array $journal journal 行(含 title / aims / databases 等)
* @param array $overlapFields 预先计算的 expert & journal 领域交集(可能为空)
* @param array $journalFields journal或 task/工厂)的目标领域
* @param array $expertFields expert 的研究领域
* @return array [
* 'description' => string,
* 'description_status' => 1|2|3,
* 'advised_topics' => string,
* 'advised_topics_status' => 1|2|3,
* ]
*/
public function generateEmailContent(
array $expert,
array $journal,
array $overlapFields = [],
array $journalFields = [],
array $expertFields = []
): array {
$paperTitle = trim((string)($expert['representative_work_title'] ?? ''));
$expertName = trim((string)($expert['name'] ?? ''));
$journalName = trim((string)($journal['title'] ?? ''));
$overlapList = $this->cleanList($overlapFields);
$journalList = $this->cleanList($journalFields);
$expertList = $this->cleanList($expertFields);
$hasDescInput = ($paperTitle !== '' && $journalName !== '');
$hasAdvisedInput = (!empty($overlapList) || (!empty($journalList) && !empty($expertList)));
// 两段都缺输入时直接走双兜底
if (!$hasDescInput && !$hasAdvisedInput) {
return $this->allFallback(3, 3);
}
// LLM 未配置
if ($this->url === '' || $this->model === '') {
return $this->allFallback(
$hasDescInput ? 2 : 3,
$hasAdvisedInput ? 2 : 3
);
}
$expertField = trim((string)($expert['fields'] ?? ($expert['field'] ?? '')));
$journalAims = trim((string)($journal['aims'] ?? ''));
$journalDbs = trim((string)($journal['databases'] ?? ''));
$system = 'You are an academic editorial assistant. '
. 'You will receive context about an author, their recent paper, and a target journal, '
. 'and you must produce TWO English paragraphs for an invitation email. '
. 'Output STRICT MINIFIED JSON ONLY with exactly these keys:'
. '{"description":"...","advised_topics":"..."} '
. 'Rules for "description": 2-3 sentences, <=50 words, warm and professional; '
. '(a) briefly appreciate the author\'s recent paper, '
. '(b) explain why it fits the journal\'s scope, '
. '(c) gently invite a future submission. '
. 'Rules for "advised_topics": 1-2 sentences, <=40 words; '
. 'emphasize that the journal is particularly interested in the research directions '
. 'where the journal\'s focus and the author\'s work overlap; '
. 'mention the overlapping topics explicitly (use the provided overlap list when non-empty, '
. 'otherwise choose the best semantic overlap between journal focus and author fields); '
. 'end by inviting contributions leaning toward those directions. '
. 'No greetings, no signatures, no placeholders, no markdown, no code fences. '
. 'If a section genuinely cannot be produced, return an empty string for that key.';
$userLines = [];
$userLines[] = 'Author name: ' . ($expertName !== '' ? $expertName : '(unknown)');
if ($expertField !== '') {
$userLines[] = 'Author research field (raw): ' . $expertField;
}
if (!empty($expertList)) {
$userLines[] = 'Author research fields (list): ' . implode(', ', $expertList);
}
$userLines[] = 'Recent paper title: ' . ($paperTitle !== '' ? $paperTitle : '(none)');
$userLines[] = 'Target journal: ' . ($journalName !== '' ? $journalName : '(unknown)');
if (!empty($journalList)) {
$userLines[] = 'Journal focus fields: ' . implode(', ', $journalList);
}
if (!empty($overlapList)) {
$userLines[] = 'Overlap topics (exact match): ' . implode(', ', $overlapList);
} else {
$userLines[] = 'Overlap topics (exact match): (none, infer semantically from the two field lists above)';
}
if ($journalAims !== '') {
$userLines[] = 'Journal aims & scope: ' . mb_substr($journalAims, 0, 500);
}
if ($journalDbs !== '') {
$userLines[] = 'Journal indexing: ' . mb_substr($journalDbs, 0, 200);
}
$userLines[] = 'Return only minified JSON {"description":"...","advised_topics":"..."}.';
$user = implode("\n", $userLines);
$payload = [
'model' => $this->model,
'temperature' => 0.4,
'messages' => [
['role' => 'system', 'content' => $system],
['role' => 'user', 'content' => $user],
],
];
$content = $this->postChat($payload);
if ($content === null) {
return $this->allFallback(
$hasDescInput ? 2 : 3,
$hasAdvisedInput ? 2 : 3
);
}
$parsed = $this->parseJson($content);
if ($parsed === null) {
return $this->allFallback(
$hasDescInput ? 2 : 3,
$hasAdvisedInput ? 2 : 3
);
}
$desc = $this->cleanContent((string)($parsed['description'] ?? ''));
$advised = $this->cleanContent((string)($parsed['advised_topics'] ?? ''));
$descStatus = 1;
if ($desc === '') {
$desc = $this->fallback;
$descStatus = $hasDescInput ? 2 : 3;
}
$advisedStatus = 1;
if ($advised === '') {
$advised = $this->advisedFallback;
$advisedStatus = $hasAdvisedInput ? 2 : 3;
}
return [
'description' => $desc,
'description_status' => $descStatus,
'advised_topics' => $advised,
'advised_topics_status' => $advisedStatus,
];
}
/**
* 兼容旧接口:单段 description 生成(仍走 generateEmailContent取第一段
*
* @return array ['status' => 1|2|3, 'text' => string]
*/
public function generateDescription(array $expert, array $journal): array
{
$r = $this->generateEmailContent($expert, $journal);
return [
'status' => intval($r['description_status']),
'text' => (string)$r['description'],
];
}
/**
* 调用 chat/completions 接口,返回 content 字符串;失败返回 null。
*/
private function postChat(array $payload)
{
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $this->url);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($payload, JSON_UNESCAPED_UNICODE));
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, min(10, $this->timeout));
curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout);
$headers = ['Content-Type: application/json'];
if ($this->apiKey !== '') {
$headers[] = 'Authorization: Bearer ' . $this->apiKey;
}
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
$raw = curl_exec($ch);
if ($raw === false) {
curl_close($ch);
return null;
}
$httpCode = intval(curl_getinfo($ch, CURLINFO_HTTP_CODE));
curl_close($ch);
if ($httpCode < 200 || $httpCode >= 300) {
return null;
}
$data = json_decode($raw, true);
if (!is_array($data)) return null;
if (isset($data['choices'][0]['message']['content'])) {
return (string)$data['choices'][0]['message']['content'];
}
if (isset($data['content'])) {
return (string)$data['content'];
}
return null;
}
/**
* 从模型原始输出里提取 JSON 对象;失败返回 null。
*/
private function parseJson(string $raw)
{
$raw = trim($raw);
if ($raw === '') return null;
$raw = preg_replace('/^```[a-zA-Z]*\s*|```$/m', '', $raw);
$raw = trim($raw);
// 直接 decode
$obj = json_decode($raw, true);
if (is_array($obj)) return $obj;
// 抓出第一个 {...} 块再 decode
if (preg_match('/\{.*\}/s', $raw, $m)) {
$obj = json_decode($m[0], true);
if (is_array($obj)) return $obj;
}
return null;
}
/**
* 清洗 LLM 输出:去除 markdown 包裹、多余空白、首尾引号、过长截断。
*/
private function cleanContent(string $text): string
{
$text = trim($text);
$text = preg_replace('/^```[a-zA-Z]*\s*|```$/m', '', $text);
$text = trim($text);
$text = trim($text, "\"' \t\n\r\0\x0B");
$text = preg_replace('/\s+/', ' ', $text);
if (mb_strlen($text) > 800) {
$text = mb_substr($text, 0, 800);
}
return trim($text);
}
/**
* 对领域列表做 trim / 去空 / 去重,保留首次出现顺序。
*/
private function cleanList(array $list): array
{
$out = [];
$seen = [];
foreach ($list as $item) {
$v = trim((string)$item);
if ($v === '') continue;
$key = strtolower($v);
if (isset($seen[$key])) continue;
$seen[$key] = true;
$out[] = $v;
}
return $out;
}
private function allFallback(int $descStatus, int $advisedStatus): array
{
return [
'description' => $this->fallback,
'description_status' => $descStatus,
'advised_topics' => $this->advisedFallback,
'advised_topics_status' => $advisedStatus,
];
}
public function getFallback(): string
{
return $this->fallback;
}
public function getAdvisedFallback(): string
{
return $this->advisedFallback;
}
}