自动查重

This commit is contained in:
wangjinlei
2026-05-20 11:58:10 +08:00
parent 53e6ddbd9e
commit cfa3f791f4
11 changed files with 938 additions and 58 deletions

View File

@@ -2817,7 +2817,28 @@ class EmailClient extends Base
break;
case 1: // 主编(预留,本期不实现)
break;
case 4: // 作者(预留)
Db::name("article_author")->alias('aa')
->join('t_user u', 'u.email = aa.email', 'inner')
->join("t_article a","a.article_id = aa.article_id","left")
->join('t_user_reviewer_info uri', 'uri.reviewer_id = u.user_id', 'left')
->where('a.journal_id', $journalId)
->where('u.email', '<>', '')
->where('u.unsubscribed', 0);
break;
case 6: //获取往期的青年编委2025年以前的,中国人
$now = strtotime('2025-01-01');
$query = Db::name('user_to_yboard')->alias('y')
->join('t_user u', 'u.user_id = y.user_id', 'inner')
->join('t_user_reviewer_info uri', 'uri.reviewer_id = u.user_id', 'left')
->where('y.journal_id', $journalId)
->where('y.state', 0)
->where('y.start_date', '<=', $now)
->where('uri.country', 'China')
->where('u.email', '<>', '')
->where('u.unsubscribed', 0);
break;//
default:
return [];
}

View File

@@ -12,7 +12,7 @@ use think\Validate;
* 论文查重Turnitin / Crossref Similarity Check控制器。
*
* 触发方式:纯手工(编辑后台点"查重"按钮)。
* 报告策略:在线 viewer URL 临时签名 + PDF 永久落盘 runtime/plagiarism/
* 报告策略:PDF 在 poll 完成时落盘;在线 viewer URL 通过 getReportUrl 按需生成(临时签名)
*
* 主要接口:
* POST submit 触发查重
@@ -37,12 +37,14 @@ class Plagiarism extends Base
* article_id 必填
* file_url 选填;不传则按 article_id 在 t_article_file 找 manuscirpt
* editor_id 选填;触发人 user_id前端拿不到也可以传 0
* check_type 选填full默认全文| body_only正文| both各提交一条
*/
public function submit()
{
$articleId = intval($this->request->param('article_id', 0));
$fileUrl = trim($this->request->param('file_url', ''));
$editorId = intval($this->request->param('editor_id', 0));
$checkType = trim($this->request->param('check_type', 'full'));
if ($articleId <= 0) {
return jsonError('article_id required');
@@ -53,8 +55,12 @@ class Plagiarism extends Base
$localPath = $fileUrl !== ''
? $svc->resolveFileUrlToLocal($fileUrl)
: $svc->locateArticleManuscript($articleId);
$checkId = $svc->submit($articleId, $localPath, $editorId, 'manual');
return jsonSuccess(['check_id' => $checkId]);
if (strtolower($checkType) === 'both') {
$ids = $svc->submitBoth($articleId, $localPath, $editorId, 'manual');
return jsonSuccess($ids);
}
$checkId = $svc->submit($articleId, $localPath, $editorId, 'manual', $checkType);
return jsonSuccess(['check_id' => $checkId, 'check_type' => strtolower($checkType) ?: 'full']);
} catch (\Throwable $e) {
return jsonError($e->getMessage());
}
@@ -257,10 +263,14 @@ class Plagiarism extends Base
'similarity_score' => floatval($r['similarity_score']),
'tii_report_status' => (string)$r['tii_report_status'],
'has_pdf' => !empty($r['pdf_local_path']),
'local_pdf_url' => $r['pdf_local_path'],
'has_viewer_url' => !empty($r['view_only_url']) && intval($r['view_only_url_expire']) > time(),
'attempts' => intval($r['attempts']),
'error_msg' => (string)$r['error_msg'],
'source_file_name' => (string)$r['source_file_name'],
'check_type' => (string)($r['check_type'] ?? 'full'),
'check_type_label' => $this->checkTypeLabel($r['check_type'] ?? 'full'),
'derived_file_path'=> (string)($r['derived_file_path'] ?? ''),
'trigger_source' => (string)$r['trigger_source'],
'triggered_by' => intval($r['triggered_by']),
'ctime' => intval($r['ctime']),
@@ -268,6 +278,15 @@ class Plagiarism extends Base
];
}
private function checkTypeLabel($checkType)
{
$t = strtolower(trim((string) $checkType));
if ($t === 'body_only' || $t === 'body') {
return '正文查重';
}
return '全文查重';
}
private function stateLabel($state)
{
$map = [

View File

@@ -0,0 +1,92 @@
<?php
namespace app\api\controller;
use think\Db;
use think\Validate;
use app\common\UserFieldAiService;
/**
* 用户主领域 AI 总结(写入 t_user_reviewer_info.field_ai
*
* POST startChain 启动链式队列(扫描全部符合条件的用户)
* POST processOne 同步处理单个 user_id调试
* GET preview 预览某用户是否 eligible 及上下文摘要
*/
class UserFieldAi extends Base
{
/**
* 启动链式处理。需 worker: php think queue:work --queue UserFieldAi
*/
public function startChain()
{
$force = intval($this->request->param('force', 0)) === 1;
$delay = max(0, intval($this->request->param('delay', 1)));
$svc = new UserFieldAiService();
$started = $svc->startChain($force, $delay);
return jsonSuccess([
'started' => $started,
'queue' => UserFieldAiService::QUEUE_NAME,
'force' => $force,
'msg' => $started ? 'chain enqueued' : 'no pending users',
]);
}
/**
* 同步处理单个用户(不调队列)。
*/
public function processOne()
{
$userId = intval($this->request->param('user_id', 0));
$force = intval($this->request->param('force', 0)) === 1;
if ($userId <= 0) {
return jsonError('user_id required');
}
$svc = new UserFieldAiService();
$result = $svc->processUser($userId, $force);
if (empty($result['ok'])) {
return jsonError(isset($result['error']) ? $result['error'] : 'failed');
}
return jsonSuccess($result);
}
/**
* 预览:是否满足条件、当前 field_ai 状态。
*/
public function preview()
{
$userId = intval($this->request->param('user_id', 0));
if ($userId <= 0) {
return jsonError('user_id required');
}
$svc = new UserFieldAiService();
$svc->ensureReviewerInfoRow($userId);
$uri = Db::name('user_reviewer_info')->where('reviewer_id', $userId)->find();
return jsonSuccess([
'user_id' => $userId,
'has_articles' => $svc->hasSubmittedArticles($userId),
'profile_complete' => $svc->isReviewerProfileComplete($uri),
'eligible' => $svc->isEligible($userId, $uri),
'field_ai' => $uri ? (string) $uri['field_ai'] : '',
'field_ai_status' => $uri ? intval($uri['field_ai_status']) : 0,
'field_ai_utime' => $uri ? intval($uri['field_ai_utime']) : 0,
'field_ai_status_text' => $this->statusLabel($uri ? intval($uri['field_ai_status']) : 0),
]);
}
private function statusLabel($status)
{
$map = [
UserFieldAiService::STATUS_PENDING => 'pending',
UserFieldAiService::STATUS_DONE => 'done',
UserFieldAiService::STATUS_INSUFFICIENT => 'insufficient',
UserFieldAiService::STATUS_FAILED => 'failed',
];
return isset($map[$status]) ? $map[$status] : 'unknown';
}
}

View File

@@ -23,16 +23,16 @@ class PlagiarismPoll
public function fire(Job $job, $data)
{
// $checkId = isset($data['check_id']) ? intval($data['check_id']) : 0;
// $attempt = isset($data['attempt']) ? intval($data['attempt']) : 1;
//
// if ($checkId <= 0) {
// $job->delete();
// return;
// }
$checkId = isset($data['check_id']) ? intval($data['check_id']) : 0;
$attempt = isset($data['attempt']) ? intval($data['attempt']) : 1;
if ($checkId <= 0) {
$job->delete();
return;
}
$svc = new PlagiarismService();
$svc->log("PlagiarismPoll job is running");
// $svc->runPollStatus($checkId, $attempt);
$svc->runPollStatus($checkId, $attempt);
$job->delete();
}
}

View File

@@ -0,0 +1,35 @@
<?php
namespace app\api\job;
use think\queue\Job;
use app\common\UserFieldAiService;
/**
* 链式任务:为单个用户生成 field_ai完成后自动入队下一位用户。
*
* data:
* - user_id 当前处理的用户
* - queue 队列名(默认 UserFieldAi
* - force 1=强制重算
*
* Worker: php think queue:work --queue UserFieldAi
*/
class UserFieldAiFill
{
public function fire(Job $job, $data)
{
$userId = isset($data['user_id']) ? intval($data['user_id']) : 0;
$queue = isset($data['queue']) ? (string) $data['queue'] : UserFieldAiService::QUEUE_NAME;
$force = !empty($data['force']);
$svc = new UserFieldAiService();
if ($userId > 0) {
$svc->processUser($userId, $force);
}
$job->delete();
$delay = max(0, (int) (isset($data['delay']) ? $data['delay'] : 1));
$svc->enqueueNextFieldAi($delay, $queue, $userId, $force);
}
}

View File

@@ -1153,12 +1153,12 @@ class ArticleParserService
}
/**
* 提取 Word 文档中的参考文献列表(仅返回数组,不做入库
* @return array 每条为一个参考文献的纯文本字符串
* 按段落提取 Word 全文行(供正文裁切、参考文献识别等复用
* @return array<int,string>
*/
public static function getReferencesFromWord($filePath): array
public static function collectParagraphLines($filePath): array
{
$othis = new self($filePath) ;
$othis = new self($filePath);
if (empty($othis->sections)) {
return [];
}
@@ -1166,13 +1166,26 @@ class ArticleParserService
$lines = [];
foreach ($othis->sections as $section) {
foreach ($section->getElements() as $element) {
$text = $othis->getTextFromElement($element);
$text = trim((string)$text);
if ($text === '') continue;
$lines[] = $text;
$text = trim((string) $othis->getTextFromElement($element));
if ($text === '') {
continue;
}
if (!mb_check_encoding($text, 'UTF-8')) {
$text = mb_convert_encoding($text, 'UTF-8', 'GBK');
}
$lines[] = preg_replace('/\s+/u', ' ', $text);
}
}
return $lines;
}
/**
* 提取 Word 文档中的参考文献列表(仅返回数组,不做入库)
* @return array 每条为一个参考文献的纯文本字符串
*/
public static function getReferencesFromWord($filePath): array
{
$lines = self::collectParagraphLines($filePath);
if (empty($lines)) {
return [];
}

View File

@@ -0,0 +1,242 @@
<?php
namespace app\common;
use PhpOffice\PhpWord\IOFactory;
use PhpOffice\PhpWord\PhpWord;
use think\Exception;
/**
* 从投稿 Word 稿件生成「仅正文」版本:去掉文前题名/作者/单位等,去掉参考文献及之后内容。
*/
class ManuscriptBodyExtractor
{
const BODY_SUBDIR = 'public/plagiarism/body_only';
/** @var array<int,string> */
private $lines = [];
/**
* @return array{path:string, rel_path:string, line_count:int, ref_start:int, body_start:int, warnings:array}
*/
public function buildBodyOnlyDocx($sourcePath, $articleId = 0)
{
$sourcePath = trim((string) $sourcePath);
if (!is_file($sourcePath) || !is_readable($sourcePath)) {
throw new Exception('Manuscript not readable: ' . $sourcePath);
}
$ext = strtolower(pathinfo($sourcePath, PATHINFO_EXTENSION));
if ($ext !== 'docx') {
throw new Exception('body_only check requires DOCX manuscript, got: ' . $ext);
}
$this->lines = ArticleParserService::collectParagraphLines($sourcePath);
if (empty($this->lines)) {
throw new Exception('No text extracted from manuscript');
}
$refStart = $this->findReferenceStartIndex();
$bodyStart = $this->findBodyStartIndex();
$warnings = [];
if ($refStart < 0) {
$warnings[] = 'references_heading_not_found; using document end';
$refStart = count($this->lines);
}
if ($bodyStart >= $refStart) {
throw new Exception('Could not locate main body (front matter may include entire document)');
}
$bodyLines = array_slice($this->lines, $bodyStart, $refStart - $bodyStart);
$bodyLines = $this->normalizeBodyLines($bodyLines);
if (count($bodyLines) < 3) {
throw new Exception('Body text too short after extraction (' . count($bodyLines) . ' paragraphs)');
}
$relPath = $this->writeBodyDocx($bodyLines, $articleId);
$rootDir = rtrim(ROOT_PATH ?: dirname(dirname(__DIR__)), '/\\');
$absPath = $rootDir . DIRECTORY_SEPARATOR . str_replace(['/', '\\'], DIRECTORY_SEPARATOR, $relPath);
return [
'path' => $absPath,
'rel_path' => $relPath,
'line_count' => count($bodyLines),
'ref_start' => $refStart,
'body_start' => $bodyStart,
'warnings' => $warnings,
];
}
private function findReferenceStartIndex()
{
$stopKeywords = [
'acknowledgements', 'acknowledgments', 'funding', 'appendix', 'supplementary',
'conflict of interest', 'competing interests', 'author contributions',
'致谢', '基金', '附录', '补充材料', '利益冲突', '作者贡献',
];
foreach ($this->lines as $i => $line) {
$t = trim($line);
if ($t === '') {
continue;
}
if (preg_match('/^\s*(references|reference|bibliography|参考文献|文献)\b\s*[:]?\s*/iu', $t)) {
return $i;
}
$lower = strtolower($t);
foreach ($stopKeywords as $sk) {
$skLower = strtolower($sk);
if ($lower === $skLower || $lower === $skLower . ':' || $lower === $skLower . '') {
if ($i > count($this->lines) * 0.4) {
return $i;
}
}
}
}
return -1;
}
private function findBodyStartIndex()
{
$n = count($this->lines);
$introIdx = -1;
$keywordsIdx = -1;
for ($i = 0; $i < $n; $i++) {
$t = trim($this->lines[$i]);
if ($t === '') {
continue;
}
if ($introIdx < 0 && $this->isIntroductionHeading($t)) {
$introIdx = $i;
}
if ($keywordsIdx < 0 && preg_match('/^\s*keywords?\b\s*[:]?/iu', $t)) {
$keywordsIdx = $i;
}
}
if ($introIdx >= 0) {
return $introIdx;
}
if ($keywordsIdx >= 0) {
$afterKw = $this->indexAfterKeywordsBlock($keywordsIdx);
if ($afterKw < $n) {
return $afterKw;
}
}
return $this->indexAfterFrontMatterFallback();
}
private function isIntroductionHeading($t)
{
if (preg_match('/^\s*(introduction|background|materials and methods|materials & methods|methods and materials)\b\s*[:]?/iu', $t)) {
return true;
}
if (preg_match('/^\s*(引言|前言|背景|材料与方法|资料与方法|研究方法)\b\s*[:]?/iu', $t)) {
return true;
}
if (preg_match('/^\s*1[\.\s、]+(introduction|引言|前言)\b/iu', $t)) {
return true;
}
return false;
}
private function indexAfterKeywordsBlock($kwIdx)
{
$n = count($this->lines);
for ($i = $kwIdx + 1; $i < $n; $i++) {
$t = trim($this->lines[$i]);
if ($t === '') {
continue;
}
if ($this->isIntroductionHeading($t)) {
return $i;
}
if (preg_match('/^\s*abstract\b/iu', $t)) {
continue;
}
if (mb_strlen($t) >= 30 && !$this->looksLikeAffiliationLine($t)) {
return $i;
}
}
return min($kwIdx + 1, $n - 1);
}
private function indexAfterFrontMatterFallback()
{
$n = count($this->lines);
$maxSkip = min(20, (int) floor($n * 0.15));
for ($i = 0; $i < $maxSkip && $i < $n; $i++) {
$t = trim($this->lines[$i]);
if ($t === '') {
continue;
}
if ($this->isIntroductionHeading($t)) {
return $i;
}
}
return min(8, max(0, $n - 1));
}
private function looksLikeAffiliationLine($t)
{
if (preg_match('/@|mailto:|correspond|univ|university|hospital|institute|department|^\d+[\s,]/iu', $t)) {
return true;
}
if (preg_match('/^\s*abstract\b/iu', $t) || preg_match('/^\s*keywords?\b/iu', $t)) {
return true;
}
return false;
}
/**
* @param array<int,string> $bodyLines
* @return array<int,string>
*/
private function normalizeBodyLines(array $bodyLines)
{
$out = [];
foreach ($bodyLines as $line) {
$line = trim($line);
if ($line === '') {
continue;
}
if (preg_match('/^\{\s*ADDIN\s+EN\.REFLIST\s*\}$/i', $line)) {
continue;
}
$out[] = $line;
}
return $out;
}
/**
* @param array<int,string> $bodyLines
*/
private function writeBodyDocx(array $bodyLines, $articleId)
{
$rootDir = rtrim(ROOT_PATH ?: dirname(dirname(__DIR__)), '/\\');
$dir = $rootDir . DIRECTORY_SEPARATOR . self::BODY_SUBDIR;
if (!is_dir($dir)) {
@mkdir($dir, 0755, true);
}
$name = sprintf('body_article_%d_%s.docx', intval($articleId), date('Ymd_His'));
$absPath = $dir . DIRECTORY_SEPARATOR . $name;
$phpWord = new PhpWord();
$section = $phpWord->addSection();
foreach ($bodyLines as $line) {
$section->addText($line);
}
$writer = IOFactory::createWriter($phpWord, 'Word2007');
$writer->save($absPath);
if (!is_file($absPath) || filesize($absPath) < 200) {
throw new Exception('Failed to write body-only docx');
}
return self::BODY_SUBDIR . '/' . $name;
}
}

View File

@@ -16,7 +16,7 @@ use think\Exception;
* PlagiarismRun → 创建 submission + 上传文件 → 入队 PlagiarismWaitIngest
* PlagiarismWaitIngest → 单次 GET submission 状态;就绪则入队 PlagiarismTriggerSimilarity否则延迟再入队
* PlagiarismTriggerSimilarity → PUT similarity → state=2比对中入队 PlagiarismPoll
* PlagiarismPoll → 轮询 similarity完成后下载 PDF → state=3完成
* PlagiarismPoll → 轮询 similarity完成后下载 PDF → state=3完成;在线 viewer URL 按需 getReportUrl 调用 refreshViewerUrlFor
* 任意环节抛异常 → state=4失败写 error_msg
*
* Worker请用 `queue:work` 消费队列 **plagiarism**(整条链与轮询均在此队列;若此前单独监听 PlagiarismRun / PlagiarismPoll需改为 plagiarism
@@ -31,6 +31,9 @@ class PlagiarismService
/** Run / WaitIngest / TriggerSimilarity / Poll 共用队列名 */
const QUEUE_CHAIN = 'plagiarism';
const CHECK_TYPE_FULL = 'full';
const CHECK_TYPE_BODY = 'body_only';
const JOB_WAIT_INGEST = 'app\\api\\job\\PlagiarismWaitIngest';
const JOB_TRIGGER_SIM = 'app\\api\\job\\PlagiarismTriggerSimilarity';
const JOB_POLL = 'app\\api\\job\\PlagiarismPoll';
@@ -61,38 +64,83 @@ class PlagiarismService
* @param string $filePath 本地可读的 PDF/DOCX 绝对路径
* @param int $triggeredBy 触发人 user_id手工触发时编辑后台的 user_id
* @param string $source 'manual' / 'auto_xxx'
* @param string $checkType full | body_only
* @return int check_id
*/
public function submit($articleId, $filePath, $triggeredBy = 0, $source = 'manual')
public function submit($articleId, $filePath, $triggeredBy = 0, $source = 'manual', $checkType = self::CHECK_TYPE_FULL)
{
if (!is_file($filePath) || !is_readable($filePath)) {
throw new Exception("File not readable: {$filePath}");
}
$checkType = $this->normalizeCheckType($checkType);
$uploadPath = $filePath;
$derivedRel = '';
$sourceName = basename($filePath);
if ($checkType === self::CHECK_TYPE_BODY) {
$built = (new ManuscriptBodyExtractor())->buildBodyOnlyDocx($filePath, $articleId);
$uploadPath = $built['path'];
$derivedRel = (string) $built['rel_path'];
$sourceName = basename($uploadPath);
if (!empty($built['warnings'])) {
$this->log('body_only warnings check article=' . $articleId . ' ' . implode('; ', $built['warnings']));
}
}
$journalId = (int) Db::name('article')
->where('article_id', $articleId)
->value('journal_id');
$this->log("plagiarism submit is running");
$this->log("plagiarism submit type={$checkType} article={$articleId}");
$now = time();
$checkId = Db::name('plagiarism_check')->insertGetId([
$row = [
'article_id' => $articleId,
'journal_id' => $journalId,
'triggered_by' => $triggeredBy,
'trigger_source' => $source,
'state' => 1, // 上传中
'source_file_name' => basename($filePath),
'source_file_size' => filesize($filePath) ?: 0,
'check_type' => $checkType,
'state' => 1,
'source_file_name' => $sourceName,
'source_file_size' => filesize($uploadPath) ?: 0,
'ctime' => $now,
'utime' => $now,
]);
];
if ($derivedRel !== '') {
$row['derived_file_path'] = $derivedRel;
}
$checkId = Db::name('plagiarism_check')->insertGetId($row);
Queue::push(
'app\\api\\job\\PlagiarismRun',
['check_id' => $checkId, 'file_path' => $filePath],
['check_id' => $checkId, 'file_path' => $uploadPath],
self::QUEUE_CHAIN
);
return (int)$checkId;
return (int) $checkId;
}
/**
* 同时提交全文 + 正文两次查重
* @return array{full:int, body_only:int}
*/
public function submitBoth($articleId, $filePath, $triggeredBy = 0, $source = 'manual')
{
return [
'full' => $this->submit($articleId, $filePath, $triggeredBy, $source, self::CHECK_TYPE_FULL),
'body_only' => $this->submit($articleId, $filePath, $triggeredBy, $source, self::CHECK_TYPE_BODY),
];
}
private function normalizeCheckType($checkType)
{
$t = strtolower(trim((string) $checkType));
if ($t === '' || $t === self::CHECK_TYPE_FULL || $t === 'full') {
return self::CHECK_TYPE_FULL;
}
if ($t === self::CHECK_TYPE_BODY || $t === 'body' || $t === 'bodyonly') {
return self::CHECK_TYPE_BODY;
}
throw new Exception('invalid check_type, use full or body_only');
}
/**
@@ -252,7 +300,7 @@ class PlagiarismService
return;
}
// try {
try {
$tii = new TurnitinService();
$statusResp = $tii->getSimilarityStatus($check['tii_submission_id']);
$status = isset($statusResp['status']) ? strtoupper($statusResp['status']) : '';
@@ -267,17 +315,13 @@ class PlagiarismService
$score = isset($statusResp['overall_match_percentage'])
? floatval($statusResp['overall_match_percentage']) : 0;
// 下载 PDF + 取在线查看 URL
$localPdf = $this->downloadAndStorePdf($tii, $check['tii_submission_id'], $checkId);
$viewerInfo = $this->refreshViewerUrl($tii, $check['tii_submission_id']);
$this->updateCheck($checkId, [
'state' => 3,
'similarity_score' => $score,
'pdf_local_path' => $localPdf,
'view_only_url' => $viewerInfo['url'],
'view_only_url_expire' => $viewerInfo['expire'],
'error_msg' => '',
'state' => 3,
'similarity_score' => $score,
'pdf_local_path' => $localPdf,
'error_msg' => '',
]);
return;
}
@@ -288,7 +332,6 @@ class PlagiarismService
return;
}
// PROCESSING 或其它中间态:继续轮询
if ($attempt >= self::MAX_POLL_ATTEMPTS) {
$this->markFailed($checkId, '[poll] timeout after ' . $attempt . ' attempts');
return;
@@ -299,28 +342,27 @@ class PlagiarismService
['check_id' => $checkId, 'attempt' => $attempt + 1],
self::QUEUE_CHAIN
);
// } catch (\Throwable $e) {
// // 网络抖动不要直接 fail给一定容错次数
// if ($attempt < self::MAX_POLL_ATTEMPTS) {
// Queue::later(
// self::POLL_INTERVAL,
// self::JOB_POLL,
// ['check_id' => $checkId, 'attempt' => $attempt + 1],
// self::QUEUE_CHAIN
// );
// $this->updateCheck($checkId, [
// 'attempts' => $attempt,
// 'error_msg' => '[poll] transient: ' . $e->getMessage(),
// ]);
// return;
// }
// $this->markFailed($checkId, '[poll] exhausted: ' . $e->getMessage());
// throw $e;
// }
} catch (\Throwable $e) {
if ($attempt < self::MAX_POLL_ATTEMPTS) {
Queue::later(
self::POLL_INTERVAL,
self::JOB_POLL,
['check_id' => $checkId, 'attempt' => $attempt + 1],
self::QUEUE_CHAIN
);
$this->updateCheck($checkId, [
'attempts' => $attempt,
'error_msg' => '[poll] transient: ' . $e->getMessage(),
]);
return;
}
$this->markFailed($checkId, '[poll] exhausted: ' . $e->getMessage());
throw $e;
}
}
/**
* 重新生成在线查看 URL已有的过期了用
* 按需获取/刷新 Turnitin 在线报告 URL与 poll 解耦,避免 viewer-url 失败拖死查重完成)。
*
* @return array{url:string, expire:int, local_pdf:string}
*/
@@ -345,6 +387,9 @@ class PlagiarismService
// ---------- 内部 ----------
/**
* 调用 Turnitin POST viewer-url仅由 refreshViewerUrlFor / getReportUrl 触发。
*/
private function refreshViewerUrl($tii, $submissionId)
{
$resp = $tii->getViewerUrl($submissionId);

View File

@@ -0,0 +1,404 @@
<?php
namespace app\common;
use think\Db;
use think\Env;
use think\Exception;
use think\Queue;
/**
* 根据投稿记录 / 审稿人资料,用大模型总结用户主领域(中文)写入 field_ai。
* 队列链UserFieldAiFill → 处理一条 → enqueueNextFieldAi → 下一条。
*/
class UserFieldAiService
{
const QUEUE_NAME = 'UserFieldAi';
const STATUS_PENDING = 0;
const STATUS_DONE = 1;
const STATUS_INSUFFICIENT = 2;
const STATUS_FAILED = 3;
private $logFile;
public function __construct()
{
$this->logFile = ROOT_PATH . 'runtime' . DS . 'user_field_ai.log';
}
/**
* 启动链式处理(从 user_id=0 之后找第一个待处理用户)。
*
* @param bool $force true 时重算已生成用户
* @return bool 是否已推入首条 job
*/
public function startChain($force = false, $delay = 1, $queue = '')
{
return $this->enqueueNextFieldAi($delay, $queue, 0, $force);
}
/**
* 链式:找 user_id > $afterUserId 的下一位待处理用户并入队。
*/
public function enqueueNextFieldAi($delay = 1, $queue = '', $afterUserId = 0, $force = false)
{
if ($queue === '') {
$queue = self::QUEUE_NAME;
}
$afterUserId = intval($afterUserId);
$userId = $this->findNextPendingUserId($afterUserId, $force);
if ($userId <= 0) {
$this->log('[FieldAi] chain finished after user_id=' . $afterUserId . ' force=' . ($force ? '1' : '0'));
return false;
}
$data = [
'user_id' => $userId,
'queue' => $queue,
'force' => $force ? 1 : 0,
];
$jobClass = 'app\\api\\job\\UserFieldAiFill@fire';
if ($delay > 0) {
Queue::later($delay, $jobClass, $data, $queue);
} else {
Queue::push($jobClass, $data, $queue);
}
$this->log('[FieldAi] enqueued user_id=' . $userId . ' queue=' . $queue);
return true;
}
/**
* 处理单个用户(队列 Job 或同步调试)。
*
* @return array{ok:bool, skipped?:bool, insufficient?:bool, field_ai?:string, error?:string}
*/
public function processUser($userId, $force = false)
{
$userId = intval($userId);
if ($userId <= 0) {
return ['ok' => false, 'error' => 'invalid user_id'];
}
$this->ensureReviewerInfoRow($userId);
$uri = Db::name('user_reviewer_info')->where('reviewer_id', $userId)->find();
if (!$uri) {
return ['ok' => false, 'error' => 'reviewer_info missing'];
}
if (!$force && intval($uri['field_ai_status']) === self::STATUS_DONE && trim((string)$uri['field_ai']) !== '') {
return ['ok' => true, 'skipped' => true, 'field_ai' => (string)$uri['field_ai']];
}
if (!$this->isEligible($userId, $uri)) {
$this->updateFieldAi($userId, '', self::STATUS_INSUFFICIENT, 'insufficient profile/articles');
return ['ok' => true, 'insufficient' => true];
}
try {
$context = $this->buildContext($userId, $uri);
$fieldAi = $this->summarizeWithLlm($context);
if ($fieldAi === '') {
throw new Exception('LLM returned empty field');
}
$this->updateFieldAi($userId, $fieldAi, self::STATUS_DONE, '');
return ['ok' => true, 'field_ai' => $fieldAi];
} catch (\Throwable $e) {
$this->updateFieldAi($userId, '', self::STATUS_FAILED, mb_substr($e->getMessage(), 0, 500));
$this->log('[FieldAi] user_id=' . $userId . ' fail: ' . $e->getMessage());
return ['ok' => false, 'error' => $e->getMessage()];
}
}
/**
* 是否满足「可总结」:有投稿 或 审稿人资料较全。
*/
public function isEligible($userId, $uri = null)
{
if ($this->hasSubmittedArticles($userId)) {
return true;
}
if ($uri === null) {
$uri = Db::name('user_reviewer_info')->where('reviewer_id', $userId)->find();
}
return $this->isReviewerProfileComplete($uri);
}
public function hasSubmittedArticles($userId)
{
$n = Db::name('article')
->where('user_id', intval($userId))
->where('title', '<>', '')
->count();
return $n > 0;
}
/**
* 审稿人资料字段填充数达到阈值视为「较全」。
*/
public function isReviewerProfileComplete($uri)
{
if (!$uri || !is_array($uri)) {
return false;
}
$minFilled = max(3, (int) Env::get('user_field_ai.min_profile_fields', 4));
$keys = ['field', 'company', 'country', 'technical', 'introduction', 'department', 'website'];
$filled = 0;
foreach ($keys as $k) {
if (!empty($uri[$k]) && trim((string)$uri[$k]) !== '') {
$filled++;
}
}
if (!empty($uri['major']) && trim((string)$uri['major']) !== '' && trim((string)$uri['major']) !== '0') {
$filled++;
}
$majorCount = Db::name('major_to_user')->where('user_id', intval($uri['reviewer_id']))->where('state', 0)->count();
if ($majorCount > 0) {
$filled++;
}
return $filled >= $minFilled;
}
private function findNextPendingUserId($afterUserId, $force)
{
$batch = max(20, (int) Env::get('user_field_ai.scan_batch', 80));
$cursor = intval($afterUserId);
while (true) {
$query = Db::name('user')->alias('u')
->leftJoin('t_user_reviewer_info uri', 'uri.reviewer_id = u.user_id')
->where('u.user_id', '>', $cursor);
if (!$force) {
$query->where(function ($q) {
$q->where('uri.field_ai_status', self::STATUS_PENDING)
->whereOr('uri.field_ai_status', self::STATUS_FAILED)
->whereOr('uri.reviewer_info_id', 'null');
});
}
$ids = $query->order('u.user_id asc')->limit($batch)->column('u.user_id');
if (empty($ids)) {
return 0;
}
foreach ($ids as $uid) {
$uid = intval($uid);
$cursor = $uid;
$this->ensureReviewerInfoRow($uid);
$uri = Db::name('user_reviewer_info')->where('reviewer_id', $uid)->find();
if (!$force && intval($uri['field_ai_status']) === self::STATUS_DONE) {
continue;
}
if (!$force && intval($uri['field_ai_status']) === self::STATUS_INSUFFICIENT) {
continue;
}
if ($this->isEligible($uid, $uri)) {
return $uid;
}
if (!$force) {
$this->updateFieldAi($uid, '', self::STATUS_INSUFFICIENT, 'auto skip: insufficient data');
}
}
}
}
private function buildContext($userId, array $uri)
{
$user = Db::name('user')->where('user_id', $userId)->field('user_id,realname,email,account')->find();
$majorTitles = $this->resolveMajorTitles($userId, $uri);
$maxArticles = max(1, min(10, (int) Env::get('user_field_ai.max_articles', 5)));
$articles = Db::name('article')
->where('user_id', $userId)
->where('title', '<>', '')
->order('article_id desc')
->limit($maxArticles)
->field('article_id,title,keywords,abstrart,journal_id,ctime')
->select();
$journalNames = [];
if (!empty($articles)) {
$jids = array_unique(array_filter(array_column($articles, 'journal_id')));
if (!empty($jids)) {
$journalNames = Db::name('journal')->where('journal_id', 'in', $jids)->column('title', 'journal_id');
}
}
$articleBlocks = [];
foreach ($articles as $a) {
$jid = intval($a['journal_id']);
$articleBlocks[] = [
'title' => (string) $a['title'],
'journal' => isset($journalNames[$jid]) ? (string) $journalNames[$jid] : '',
'keywords' => (string) ($a['keywords'] ?? ''),
'abstract' => mb_substr(trim((string) ($a['abstrart'] ?? '')), 0, 800),
];
}
return [
'user' => [
'realname' => $user ? (string) $user['realname'] : '',
'email' => $user ? (string) $user['email'] : '',
],
'profile' => [
'field' => trim((string) ($uri['field'] ?? '')),
'technical' => trim((string) ($uri['technical'] ?? '')),
'company' => trim((string) ($uri['company'] ?? '')),
'department' => trim((string) ($uri['department'] ?? '')),
'country' => trim((string) ($uri['country'] ?? '')),
'introduction' => mb_substr(trim((string) ($uri['introduction'] ?? '')), 0, 1200),
'website' => trim((string) ($uri['website'] ?? '')),
'majors' => $majorTitles,
],
'articles' => $articleBlocks,
];
}
private function resolveMajorTitles($userId, array $uri)
{
$titles = [];
$ids = Db::name('major_to_user')->where('user_id', $userId)->where('state', 0)->column('major_id');
if (!empty($ids)) {
$titles = Db::name('reviewer_major')->where('major_id', 'in', $ids)->where('state', 0)->column('title');
}
if (empty($titles) && !empty($uri['major'])) {
$legacy = array_filter(array_map('intval', explode(',', (string) $uri['major'])));
if (!empty($legacy)) {
$titles = Db::name('reviewer_major')->where('major_id', 'in', $legacy)->column('title');
}
}
return array_values(array_unique(array_filter(array_map('trim', $titles))));
}
private function summarizeWithLlm(array $context)
{
$url = trim((string) Env::get('user_field_ai.chat_url', Env::get('expert_country_chat_url', Env::get('citation_chat_url', ''))));
$model = trim((string) Env::get('user_field_ai.chat_model', Env::get('expert_country_chat_model', Env::get('citation_chat_model', 'gpt-4.1'))));
$apiKey = trim((string) Env::get('user_field_ai.chat_api_key', Env::get('expert_country_chat_api_key', Env::get('citation_chat_api_key', ''))));
if ($url === '' || $model === '') {
throw new Exception('user_field_ai chat not configured (chat_url / chat_model)');
}
$payloadJson = json_encode($context, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES);
$messages = [
[
'role' => 'system',
'content' => '你是学术领域分类助手。根据用户的投稿与个人资料,用简体中文给出该用户最主要的研究领域总结。'
. '要求精确、简洁13 个中文领域词或短短语,用顿号分隔;不要解释、不要英文、不要 JSON 以外的多余文字。'
. '只输出 JSON{"field_ai":"..."}。',
],
[
'role' => 'user',
'content' => "请根据以下 JSON 资料总结该用户的主要研究领域:\n" . $payloadJson,
],
];
$body = [
'model' => $model,
'temperature' => 0.2,
'messages' => $messages,
];
$ch = curl_init();
curl_setopt_array($ch, [
CURLOPT_URL => $url,
CURLOPT_POST => true,
CURLOPT_POSTFIELDS => json_encode($body, JSON_UNESCAPED_UNICODE),
CURLOPT_RETURNTRANSFER => true,
CURLOPT_CONNECTTIMEOUT => 15,
CURLOPT_TIMEOUT => max(30, (int) Env::get('user_field_ai.timeout', 90)),
CURLOPT_HTTPHEADER => array_filter([
'Content-Type: application/json',
$apiKey !== '' ? 'Authorization: Bearer ' . $apiKey : null,
]),
]);
$raw = curl_exec($ch);
$code = (int) curl_getinfo($ch, CURLINFO_HTTP_CODE);
$err = curl_error($ch);
curl_close($ch);
if ($raw === false) {
throw new Exception('LLM curl error: ' . $err);
}
if ($code < 200 || $code >= 300) {
throw new Exception('LLM HTTP ' . $code . ': ' . mb_substr((string) $raw, 0, 400));
}
$data = json_decode($raw, true);
$content = '';
if (is_array($data) && isset($data['choices'][0]['message']['content'])) {
$content = trim((string) $data['choices'][0]['message']['content']);
} elseif (is_string($raw)) {
$content = trim($raw);
}
$fieldAi = $this->parseFieldAiFromContent($content);
if ($fieldAi === '' && $content !== '') {
$fieldAi = $this->cleanFieldAiText($content);
}
return $fieldAi;
}
private function parseFieldAiFromContent($content)
{
$content = trim((string) $content);
if ($content === '') {
return '';
}
$content = preg_replace('/^```[a-zA-Z]*\s*|```$/m', '', $content);
if (preg_match('/\{.*\}/s', $content, $m)) {
$obj = json_decode($m[0], true);
if (is_array($obj) && !empty($obj['field_ai'])) {
return $this->cleanFieldAiText((string) $obj['field_ai']);
}
}
$obj = json_decode($content, true);
if (is_array($obj) && !empty($obj['field_ai'])) {
return $this->cleanFieldAiText((string) $obj['field_ai']);
}
return '';
}
private function cleanFieldAiText($text)
{
$text = trim((string) $text);
$text = trim($text, "\"' \t\n\r");
$text = preg_replace('/\s+/u', '', $text);
if (mb_strlen($text) > 200) {
$text = mb_substr($text, 0, 200);
}
return $text;
}
public function ensureReviewerInfoRow($userId)
{
$exists = Db::name('user_reviewer_info')->where('reviewer_id', $userId)->find();
if ($exists) {
return;
}
Db::name('user_reviewer_info')->insert([
'reviewer_id' => $userId,
'ctime' => time(),
'state' => 0,
]);
}
private function updateFieldAi($userId, $fieldAi, $status, $note)
{
$data = [
'field_ai' => mb_substr(trim((string) $fieldAi), 0, 512),
'field_ai_status' => intval($status),
'field_ai_utime' => time(),
];
Db::name('user_reviewer_info')->where('reviewer_id', $userId)->update($data);
if ($note !== '') {
$this->log('[FieldAi] user_id=' . $userId . ' status=' . $status . ' note=' . $note);
}
}
public function log($msg)
{
$line = date('Y-m-d H:i:s') . ' ' . $msg . PHP_EOL;
@file_put_contents($this->logFile, $line, FILE_APPEND);
}
}