Merge branch 'master' of https://git.nuttyreading.com/zm/tougao
This commit is contained in:
22
.env
22
.env
@@ -19,6 +19,16 @@ client_id = 616562
|
|||||||
client_secret = CfMDrllyqBTFKrUkO2XaE7OmWTYqP3yd
|
client_secret = CfMDrllyqBTFKrUkO2XaE7OmWTYqP3yd
|
||||||
hmac = 8aU8WnITYhwaGTXH
|
hmac = 8aU8WnITYhwaGTXH
|
||||||
|
|
||||||
|
[base]
|
||||||
|
model_url=http://chat.taimed.cn
|
||||||
|
model_url1=http://125.39.141.154:10002/v1/chat/completions
|
||||||
|
model=DeepSeek-Coder-V2-Instruct
|
||||||
|
|
||||||
|
[user_field_ai]
|
||||||
|
; 留空则依次用 promotion PROMOTION_LLM_URL、citation 等;仅写根地址时会自动补 /v1/chat/completions
|
||||||
|
;chat_url=http://chat.taimed.cn/v1/chat/completions
|
||||||
|
;chat_model=DeepSeek-Coder-V2-Instruct
|
||||||
|
|
||||||
[promotion]
|
[promotion]
|
||||||
PROMOTION_LLM_URL=http://chat.taimed.cn/v1/chat/completions
|
PROMOTION_LLM_URL=http://chat.taimed.cn/v1/chat/completions
|
||||||
PROMOTION_LLM_MODEL=DeepSeek-Coder-V2-Instruct
|
PROMOTION_LLM_MODEL=DeepSeek-Coder-V2-Instruct
|
||||||
@@ -33,6 +43,18 @@ UNSUBSCRIBE_BASE_URL=https://submission.tmrjournals.com/api/Unsubscribe/index
|
|||||||
[yboard]
|
[yboard]
|
||||||
APPLY_URL="https://submission.tmrjournals.com/youthBoardRegister"
|
APPLY_URL="https://submission.tmrjournals.com/youthBoardRegister"
|
||||||
|
|
||||||
|
[turnitin]
|
||||||
|
viewer_permission_set=ADMINISTRATOR
|
||||||
|
viewer_locale=en-US
|
||||||
|
; viewer-url 必填 viewer_user_id;默认用查重记录的 triggered_by → editor_{id},也可写死:
|
||||||
|
;viewer_user_id=editor_1
|
||||||
|
; 与 Crossref 网页手动查重对齐:三项默认 0(不排除引用/参考文献/引文)。若只要正文相似度可改为 1
|
||||||
|
exclude_quotes=0
|
||||||
|
exclude_bibliography=0
|
||||||
|
exclude_citations=0
|
||||||
|
; 在线报告默认视图:all_sources=按来源库分类(与 Crossref 手动后台一致);match_overview=匹配总览(文中编号易都显示为 1)
|
||||||
|
viewer_default_mode=all_sources
|
||||||
|
|
||||||
[plagiarism]
|
[plagiarism]
|
||||||
static_root="/home/wwwroot/api.tmrjournals.com/public"
|
static_root="/home/wwwroot/api.tmrjournals.com/public"
|
||||||
|
|
||||||
|
|||||||
@@ -598,6 +598,10 @@ class Article extends Base
|
|||||||
$article_res['is_draft'] = 1;
|
$article_res['is_draft'] = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
//新增是否存在生产实例 20260204 start
|
||||||
|
$article_res['has_produce'] = $this->production_article_obj->where('article_id', $data['articleId'])->where('state', 0)->find()?1:0;
|
||||||
|
|
||||||
|
|
||||||
//新增是否是草稿删除 20260204 end
|
//新增是否是草稿删除 20260204 end
|
||||||
return json(['article' => $article_res, 'msg' => $article_msg, 'authors' => $author_res, 'suggest' => $suggest, 'transfer' => $transfer_res, 'transinfo' => $transfer_info, "major" => $major,'suggest_final' => $aFinal]);
|
return json(['article' => $article_res, 'msg' => $article_msg, 'authors' => $author_res, 'suggest' => $suggest, 'transfer' => $transfer_res, 'transinfo' => $transfer_info, "major" => $major,'suggest_final' => $aFinal]);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -567,6 +567,8 @@ class Board extends Base {
|
|||||||
return jsonError($rule->getError());
|
return jsonError($rule->getError());
|
||||||
}
|
}
|
||||||
$check = $this->board_to_journal_obj->where('user_id',$data['user_id'])->where('state',0)->find();
|
$check = $this->board_to_journal_obj->where('user_id',$data['user_id'])->where('state',0)->find();
|
||||||
|
$journal_info = $this->journal_obj->where('journal_id',$data['journal_id'])->find();
|
||||||
|
$user_info = $this->user_obj->where('user_id',$data['user_id'])->find();
|
||||||
if($check){
|
if($check){
|
||||||
return jsonError("According to TMR Publishing Group Policy, scientists are not allowed to serve on the editorial board of more than one journal at the same time.");
|
return jsonError("According to TMR Publishing Group Policy, scientists are not allowed to serve on the editorial board of more than one journal at the same time.");
|
||||||
}
|
}
|
||||||
@@ -578,6 +580,18 @@ class Board extends Base {
|
|||||||
$insert['board_group_id'] = $data['board_group_id'];
|
$insert['board_group_id'] = $data['board_group_id'];
|
||||||
$insert['research_areas'] = trim($data['research_areas']);
|
$insert['research_areas'] = trim($data['research_areas']);
|
||||||
$this->board_to_journal_obj->insert($insert);
|
$this->board_to_journal_obj->insert($insert);
|
||||||
|
|
||||||
|
$reviewer_journal = $this->reviewer_to_journal_obj->where("reviewer_id",$user_info['user_id'])->where("journal_id",$journal_info['journal_id'])->find();
|
||||||
|
if(!$reviewer_journal){
|
||||||
|
$insert_reviewer['reviewer_id'] = $user_info['user_id'];
|
||||||
|
$insert_reviewer['journal_id'] = $journal_info['journal_id'];
|
||||||
|
$insert_reviewer['account'] = $user_info['account'];
|
||||||
|
$insert_reviewer['journal_title'] = $journal_info['title'];
|
||||||
|
$insert_reviewer['ctime'] = time();
|
||||||
|
$this->reviewer_to_journal_obj->insert($insert_reviewer);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
return jsonSuccess([]);
|
return jsonSuccess([]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -115,7 +115,6 @@ class Email extends Base
|
|||||||
public function pushEmailOnTemplate()
|
public function pushEmailOnTemplate()
|
||||||
{
|
{
|
||||||
|
|
||||||
die();
|
|
||||||
|
|
||||||
$data = $this->request->post();
|
$data = $this->request->post();
|
||||||
$rule = new Validate([
|
$rule = new Validate([
|
||||||
|
|||||||
@@ -1406,8 +1406,8 @@ class EmailClient extends Base
|
|||||||
return jsonError('Factory is disabled');
|
return jsonError('Factory is disabled');
|
||||||
}
|
}
|
||||||
$expertType = intval($factory['expert_type']);
|
$expertType = intval($factory['expert_type']);
|
||||||
if (!in_array($expertType, [2, 3, 5], true)) {
|
if (!in_array($expertType, [2, 3, 4, 5, 6], true)) {
|
||||||
return jsonError('Only expert_type=2(Editorial Board), 3(Young Editorial Board) or 5(Expert pool) is supported currently');
|
return jsonError('Unsupported expert_type; supported: 2=编委, 3=青年编委, 4=作者, 5=专家库, 6=往期青年编委');
|
||||||
}
|
}
|
||||||
|
|
||||||
$journalId = intval($factory['journal_id']);
|
$journalId = intval($factory['journal_id']);
|
||||||
@@ -2260,7 +2260,7 @@ class EmailClient extends Base
|
|||||||
* 每日自动生成推广任务(由 Linux crontab 调用)
|
* 每日自动生成推广任务(由 Linux crontab 调用)
|
||||||
*
|
*
|
||||||
* 逻辑:
|
* 逻辑:
|
||||||
* 1. 查询所有 state=0 的任务工厂(支持 expert_type=2 编委 / =5 expert 库;其他类型预留)
|
* 1. 查询所有 state=0 的任务工厂(支持 expert_type=2/3/4/5/6)
|
||||||
* 2. JOIN journal 确认期刊有效(state=0, start_promotion=1)
|
* 2. JOIN journal 确认期刊有效(state=0, start_promotion=1)
|
||||||
* 3. 按 factory_id + send_date 检查去重
|
* 3. 按 factory_id + send_date 检查去重
|
||||||
* 4. template/style: 工厂 > 0 用工厂的,否则用期刊默认
|
* 4. template/style: 工厂 > 0 用工厂的,否则用期刊默认
|
||||||
@@ -2282,7 +2282,7 @@ class EmailClient extends Base
|
|||||||
->alias('f')
|
->alias('f')
|
||||||
->join('t_journal j', 'j.journal_id = f.journal_id', 'inner')
|
->join('t_journal j', 'j.journal_id = f.journal_id', 'inner')
|
||||||
->where('f.state', 0)
|
->where('f.state', 0)
|
||||||
->where('f.expert_type', 'in', [2, 3, 5])
|
->where('f.expert_type', 'in', [2, 3, 4, 5, 6])
|
||||||
->where('j.state', 0)
|
->where('j.state', 0)
|
||||||
->where('f.start_promotion', 1)
|
->where('f.start_promotion', 1)
|
||||||
->field('f.*, j.title as journal_title, j.default_template_id, j.default_style_id')
|
->field('f.*, j.title as journal_title, j.default_template_id, j.default_style_id')
|
||||||
@@ -2684,6 +2684,7 @@ class EmailClient extends Base
|
|||||||
3 => 'Young Editorial Board',
|
3 => 'Young Editorial Board',
|
||||||
4 => 'Author',
|
4 => 'Author',
|
||||||
5 => 'Expert Pool',
|
5 => 'Expert Pool',
|
||||||
|
6 => 'Past Young Editorial Board',
|
||||||
];
|
];
|
||||||
return isset($map[intval($t)]) ? $map[intval($t)] : 'Unknown';
|
return isset($map[intval($t)]) ? $map[intval($t)] : 'Unknown';
|
||||||
}
|
}
|
||||||
@@ -2692,8 +2693,10 @@ class EmailClient extends Base
|
|||||||
* 根据 expert_type 分发选人逻辑
|
* 根据 expert_type 分发选人逻辑
|
||||||
*
|
*
|
||||||
* - expert_type = 5:从 t_expert 库选人(按领域 / 国家 / 频次)
|
* - expert_type = 5:从 t_expert 库选人(按领域 / 国家 / 频次)
|
||||||
* - expert_type ∈ {1,2,3,4}:从系统内部表选人(主编/编委/青年编委/作者),fields 与国家筛选忽略;
|
* 频次:e.ltime(成功发送后回写)+ t_promotion_email_log 中「待发送 state=0 的入队时间 ctime」
|
||||||
* 频次按 t_promotion_email_log 中相同 expert_type 维度的最近发送时间扣除
|
* (避免「今日生成任务明日发送」时 ltime 未变导致连续两天选到同一拨人)
|
||||||
|
* - expert_type ∈ {1,2,3,4,6}:从系统内部表选人(主编/编委/青年编委/作者/往期青年编委),fields 与国家筛选忽略;
|
||||||
|
* 频次按 t_promotion_email_log:已发/退信用 send_time;待发送队列用 ctime(同上)
|
||||||
*
|
*
|
||||||
* 返回行 shape 已对齐:
|
* 返回行 shape 已对齐:
|
||||||
* - type=5 行包含 e.* 全部字段(含 expert_id、country_id、ltime 等)
|
* - type=5 行包含 e.* 全部字段(含 expert_id、country_id、ltime 等)
|
||||||
@@ -2741,9 +2744,25 @@ class EmailClient extends Base
|
|||||||
|
|
||||||
if ($noRepeatDays > 0) {
|
if ($noRepeatDays > 0) {
|
||||||
$cutoff = time() - ($noRepeatDays * 86400);
|
$cutoff = time() - ($noRepeatDays * 86400);
|
||||||
|
// ltime:成功发出后回写;与 log 中 state=1 在「已送达」上部分重叠,但保留 ltime 可走索引、且退信 state=3 未必回写 ltime。
|
||||||
$query->where(function ($q) use ($cutoff) {
|
$query->where(function ($q) use ($cutoff) {
|
||||||
$q->where('e.ltime', 0)->whereOr('e.ltime', '<', $cutoff);
|
$q->where('e.ltime', 0)->whereOr('e.ltime', '<', $cutoff);
|
||||||
});
|
});
|
||||||
|
// 一条 NOT EXISTS:待发(state=0 按 ctime) 或 已发/退信(按 send_time),避免两段相同 join 的重复感
|
||||||
|
$query->where(function ($q) use ($cutoff) {
|
||||||
|
$q->table('t_promotion_email_log')->alias('pl')
|
||||||
|
->join('t_promotion_task pt', 'pt.task_id = pl.task_id', 'inner')
|
||||||
|
->where('pt.expert_type', 5)
|
||||||
|
->where('pl.expert_id', '>', 0)
|
||||||
|
->whereRaw('pl.expert_id = e.expert_id')
|
||||||
|
->where(function ($w) use ($cutoff) {
|
||||||
|
$w->where(function ($a) use ($cutoff) {
|
||||||
|
$a->where('pl.state', 0)->where('pl.ctime', '>', $cutoff);
|
||||||
|
})->whereOr(function ($b) use ($cutoff) {
|
||||||
|
$b->where('pl.state', 'in', [1, 3])->where('pl.send_time', '>', $cutoff);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}, 'not exists');
|
||||||
}
|
}
|
||||||
|
|
||||||
$countryIds = $this->resolveCountryIds($targetPartitions, $targetCountryIds);
|
$countryIds = $this->resolveCountryIds($targetPartitions, $targetCountryIds);
|
||||||
@@ -2762,9 +2781,9 @@ class EmailClient extends Base
|
|||||||
* 系统内部受众选人(编委 / 主编 / 青年编委 / 作者)
|
* 系统内部受众选人(编委 / 主编 / 青年编委 / 作者)
|
||||||
* 仅按 期刊 + 频次 过滤;领域 / 国家无关
|
* 仅按 期刊 + 频次 过滤;领域 / 国家无关
|
||||||
*
|
*
|
||||||
* 频次:扣除「同 expert_type 维度下,no_repeat_days 内已经发出 (state=1) 或退信 (state=3) 的人」
|
* 频次:扣除「同 expert_type 下,no_repeat_days 内 (1) 已发出或退信,或 (2) 仍在队列待发送(state=0,按 ctime)」的人
|
||||||
*
|
*
|
||||||
* @param int $expertType 1=主编 2=编委 3=青年编委 4=作者
|
* @param int $expertType 1=主编 2=编委 3=青年编委 4=作者 6=往期青年编委
|
||||||
* @param int $journalId
|
* @param int $journalId
|
||||||
* @param int $noRepeatDays
|
* @param int $noRepeatDays
|
||||||
* @param int $limit
|
* @param int $limit
|
||||||
@@ -2799,23 +2818,52 @@ class EmailClient extends Base
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case 1: // 主编(预留,本期不实现)
|
case 1: // 主编(预留,本期不实现)
|
||||||
case 4: // 作者(预留)
|
return [];
|
||||||
|
|
||||||
|
case 4: // 作者:该刊投稿作者(按邮箱关联 t_user)
|
||||||
|
$query = Db::name('article_author')->alias('aa')
|
||||||
|
->join('t_user u', 'u.email = aa.email', 'inner')
|
||||||
|
->join('t_article a', 'a.article_id = aa.article_id', 'inner')
|
||||||
|
->join('t_user_reviewer_info uri', 'uri.reviewer_id = u.user_id', 'left')
|
||||||
|
->where('a.journal_id', $journalId)
|
||||||
|
->where('u.email', '<>', '')
|
||||||
|
->where('u.unsubscribed', 0);
|
||||||
|
break;
|
||||||
|
case 6: //获取往期的青年编委2025年以前的,中国人
|
||||||
|
$now = strtotime('2025-01-01');
|
||||||
|
$query = Db::name('user_to_yboard')->alias('y')
|
||||||
|
->join('t_user u', 'u.user_id = y.user_id', 'inner')
|
||||||
|
->join('t_user_reviewer_info uri', 'uri.reviewer_id = u.user_id', 'left')
|
||||||
|
->where('y.journal_id', $journalId)
|
||||||
|
->where('y.state', 0)
|
||||||
|
->where('y.start_date', '<=', $now)
|
||||||
|
->where('uri.country', 'China')
|
||||||
|
->where('u.email', '<>', '')
|
||||||
|
->where('u.unsubscribed', 0);
|
||||||
|
break;//
|
||||||
default:
|
default:
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!isset($query)) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
if ($noRepeatDays > 0) {
|
if ($noRepeatDays > 0) {
|
||||||
$cutoff = intval(time() - ($noRepeatDays * 86400));
|
$cutoff = intval(time() - ($noRepeatDays * 86400));
|
||||||
$expertTypeSafe = intval($expertType);
|
$expertTypeSafe = intval($expertType);
|
||||||
// 关联子查询:相对于 NOT IN,避免把全部已发 user_id 拉到 PHP 再拼回 SQL;
|
|
||||||
// 配合 t_promotion_email_log(user_id, send_time) 复合索引做半连接探针,常量时间。
|
|
||||||
$query->where(function ($q) use ($expertTypeSafe, $cutoff) {
|
$query->where(function ($q) use ($expertTypeSafe, $cutoff) {
|
||||||
$q->table('t_promotion_email_log')->alias('l')
|
$q->table('t_promotion_email_log')->alias('l')
|
||||||
->join('t_promotion_task t', 't.task_id = l.task_id', 'inner')
|
->join('t_promotion_task t', 't.task_id = l.task_id', 'inner')
|
||||||
->where('t.expert_type', $expertTypeSafe)
|
->where('t.expert_type', $expertTypeSafe)
|
||||||
->where('l.state', 'in', [1, 3])
|
->whereRaw('l.user_id = u.user_id')
|
||||||
->where('l.send_time', '>', $cutoff)
|
->where(function ($w) use ($cutoff) {
|
||||||
->whereRaw('l.user_id = u.user_id');
|
$w->where(function ($a) use ($cutoff) {
|
||||||
|
$a->where('l.state', 0)->where('l.ctime', '>', $cutoff);
|
||||||
|
})->whereOr(function ($b) use ($cutoff) {
|
||||||
|
$b->where('l.state', 'in', [1, 3])->where('l.send_time', '>', $cutoff);
|
||||||
|
});
|
||||||
|
});
|
||||||
}, 'not exists');
|
}, 'not exists');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -2,15 +2,17 @@
|
|||||||
|
|
||||||
namespace app\api\controller;
|
namespace app\api\controller;
|
||||||
|
|
||||||
|
use app\common\TurnitinService;
|
||||||
use think\Db;
|
use think\Db;
|
||||||
use think\Response;
|
use think\Response;
|
||||||
use app\common\PlagiarismService;
|
use app\common\PlagiarismService;
|
||||||
|
use think\Validate;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 论文查重(Turnitin / Crossref Similarity Check)控制器。
|
* 论文查重(Turnitin / Crossref Similarity Check)控制器。
|
||||||
*
|
*
|
||||||
* 触发方式:纯手工(编辑后台点"查重"按钮)。
|
* 触发方式:纯手工(编辑后台点"查重"按钮)。
|
||||||
* 报告策略:在线 viewer URL 临时签名 + PDF 永久落盘 runtime/plagiarism/。
|
* 报告策略:PDF 在 poll 完成时落盘;在线 viewer URL 通过 getReportUrl 按需生成(临时签名)。
|
||||||
*
|
*
|
||||||
* 主要接口:
|
* 主要接口:
|
||||||
* POST submit 触发查重
|
* POST submit 触发查重
|
||||||
@@ -35,12 +37,14 @@ class Plagiarism extends Base
|
|||||||
* article_id 必填
|
* article_id 必填
|
||||||
* file_url 选填;不传则按 article_id 在 t_article_file 找 manuscirpt
|
* file_url 选填;不传则按 article_id 在 t_article_file 找 manuscirpt
|
||||||
* editor_id 选填;触发人 user_id(前端拿不到也可以传 0)
|
* editor_id 选填;触发人 user_id(前端拿不到也可以传 0)
|
||||||
|
* check_type 选填;full(默认全文)| body_only(正文)| both(各提交一条)
|
||||||
*/
|
*/
|
||||||
public function submit()
|
public function submit()
|
||||||
{
|
{
|
||||||
$articleId = intval($this->request->param('article_id', 0));
|
$articleId = intval($this->request->param('article_id', 0));
|
||||||
$fileUrl = trim($this->request->param('file_url', ''));
|
$fileUrl = trim($this->request->param('file_url', ''));
|
||||||
$editorId = intval($this->request->param('editor_id', 0));
|
$editorId = intval($this->request->param('editor_id', 0));
|
||||||
|
$checkType = trim($this->request->param('check_type', 'full'));
|
||||||
|
|
||||||
if ($articleId <= 0) {
|
if ($articleId <= 0) {
|
||||||
return jsonError('article_id required');
|
return jsonError('article_id required');
|
||||||
@@ -51,21 +55,79 @@ class Plagiarism extends Base
|
|||||||
$localPath = $fileUrl !== ''
|
$localPath = $fileUrl !== ''
|
||||||
? $svc->resolveFileUrlToLocal($fileUrl)
|
? $svc->resolveFileUrlToLocal($fileUrl)
|
||||||
: $svc->locateArticleManuscript($articleId);
|
: $svc->locateArticleManuscript($articleId);
|
||||||
echo $localPath;
|
if (strtolower($checkType) === 'both') {
|
||||||
$checkId = $svc->submit($articleId, $localPath, $editorId, 'manual');
|
$ids = $svc->submitBoth($articleId, $localPath, $editorId, 'manual');
|
||||||
return jsonSuccess(['check_id' => $checkId]);
|
return jsonSuccess($ids);
|
||||||
|
}
|
||||||
|
$checkId = $svc->submit($articleId, $localPath, $editorId, 'manual', $checkType);
|
||||||
|
return jsonSuccess(['check_id' => $checkId, 'check_type' => strtolower($checkType) ?: 'full']);
|
||||||
} catch (\Throwable $e) {
|
} catch (\Throwable $e) {
|
||||||
return jsonError($e->getMessage());
|
return jsonError($e->getMessage());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public function testccone(){
|
/**
|
||||||
|
* 调试:与线上一致走队列链(upload → wait ingest → trigger → poll),需 worker 消费 plagiarism 队列。
|
||||||
|
*/
|
||||||
|
public function testccone()
|
||||||
|
{
|
||||||
$svc = new PlagiarismService();
|
$svc = new PlagiarismService();
|
||||||
$checkId = 9;
|
$checkId = 9;
|
||||||
$filePath = "/home/wwwroot/api.tmrjournals.com/public/manuscirpt/20260509/6832a56e8ace38fe99df390ab5221deb.docx";
|
$filePath = '/home/wwwroot/api.tmrjournals.com/public/manuscirpt/20260509/6832a56e8ace38fe99df390ab5221deb.docx';
|
||||||
$svc->runUploadAndTrigger($checkId,$filePath);
|
$svc->runUploadOnly($checkId, $filePath);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testcconegetstatus(){
|
||||||
|
$data = $this->request->post();
|
||||||
|
$rule = new Validate([
|
||||||
|
"id"=>"require"
|
||||||
|
]);
|
||||||
|
if(!$rule->check($data)){
|
||||||
|
return jsonError($rule->getError());
|
||||||
|
}
|
||||||
|
$tii = new TurnitinService();
|
||||||
|
$res = $tii->parseSubmissionIngestState($data['id']);
|
||||||
|
return jsonSuccess($res);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testcconewait(){
|
||||||
|
$data = $this->request->post();
|
||||||
|
$rule = new Validate([
|
||||||
|
"checkId"=>"require"
|
||||||
|
]);
|
||||||
|
if(!$rule->check($data)){
|
||||||
|
return jsonError($rule->getError());
|
||||||
|
}
|
||||||
|
$svc = new PlagiarismService();
|
||||||
|
$res = $svc->runIngestPollStep($data['checkId']);
|
||||||
|
return jsonSuccess($res);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testcconesimilar(){
|
||||||
|
$data = $this->request->post();
|
||||||
|
$rule = new Validate([
|
||||||
|
"checkId"=>"require"
|
||||||
|
]);
|
||||||
|
if(!$rule->check($data)){
|
||||||
|
return jsonError($rule->getError());
|
||||||
|
}
|
||||||
|
$svc = new PlagiarismService();
|
||||||
|
$res = $svc->runTriggerSimilarityOnly($data['checkId']);
|
||||||
|
return jsonSuccess($res);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testcconelast(){
|
||||||
|
$data = $this->request->post();
|
||||||
|
$rule = new Validate([
|
||||||
|
"checkId"=>"require"
|
||||||
|
]);
|
||||||
|
if(!$rule->check($data)){
|
||||||
|
return jsonError($rule->getError());
|
||||||
|
}
|
||||||
|
$svc = new PlagiarismService();
|
||||||
|
$re = $svc->runPollStatus($data['checkId']);
|
||||||
|
return jsonSuccess($re);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -89,7 +151,15 @@ class Plagiarism extends Base
|
|||||||
if (!$row) {
|
if (!$row) {
|
||||||
return jsonError('not found');
|
return jsonError('not found');
|
||||||
}
|
}
|
||||||
return jsonSuccess($this->formatRow($row));
|
$out = $this->formatRow($row);
|
||||||
|
if (!empty($row['raw_response'])) {
|
||||||
|
$raw = json_decode($row['raw_response'], true);
|
||||||
|
if (is_array($raw)) {
|
||||||
|
$out['similarity_meta'] = \app\common\TurnitinService::parseSimilarityReportMeta($raw);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$out['report_view_hint'] = 'PDF 多为 Match Overview 汇总样式;按来源库(Internet/Publication/Crossref)分类请用 getReportUrl 打开在线报告并切到 All Sources';
|
||||||
|
return jsonSuccess($out);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -113,11 +183,18 @@ class Plagiarism extends Base
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 取在线查看 URL;过期则自动刷新
|
* 取在线查看 URL(Turnitin 一次性会话链接,关闭报告页后勿复用旧 URL)
|
||||||
|
*
|
||||||
|
* 入参:
|
||||||
|
* check_id 必填
|
||||||
|
* editor_id 选填,当前打开报告的编辑 user_id(与 viewer_user_id 对应,避免 session 认证失败)
|
||||||
|
* reuse 选填,1=在未过期时复用库内缓存;默认 0,每次调用重新向 Turnitin 申请
|
||||||
*/
|
*/
|
||||||
public function getReportUrl()
|
public function getReportUrl()
|
||||||
{
|
{
|
||||||
$checkId = intval($this->request->param('check_id', 0));
|
$checkId = intval($this->request->param('check_id', 0));
|
||||||
|
$editorId = intval($this->request->param('editor_id', 0));
|
||||||
|
$reuse = intval($this->request->param('reuse', 0)) === 1;
|
||||||
if ($checkId <= 0) {
|
if ($checkId <= 0) {
|
||||||
return jsonError('check_id required');
|
return jsonError('check_id required');
|
||||||
}
|
}
|
||||||
@@ -129,22 +206,48 @@ class Plagiarism extends Base
|
|||||||
if ($row['state'] != 3) {
|
if ($row['state'] != 3) {
|
||||||
return jsonError('check not completed yet, state=' . $row['state']);
|
return jsonError('check not completed yet, state=' . $row['state']);
|
||||||
}
|
}
|
||||||
$needRefresh = empty($row['view_only_url'])
|
$viewerContext = [];
|
||||||
|
if ($editorId > 0) {
|
||||||
|
$viewerContext['editor_id'] = $editorId;
|
||||||
|
}
|
||||||
|
$needRefresh = !$reuse
|
||||||
|
|| empty($row['view_only_url'])
|
||||||
|| intval($row['view_only_url_expire']) < time() + 60;
|
|| intval($row['view_only_url_expire']) < time() + 60;
|
||||||
|
|
||||||
|
$usageHint = '每次打开请先调用本接口获取新链接;勿收藏或再次打开旧链接。请在新标签页打开,并允许 Turnitin 域名 Cookie。';
|
||||||
|
|
||||||
if ($needRefresh) {
|
if ($needRefresh) {
|
||||||
$svc = new PlagiarismService();
|
$svc = new PlagiarismService();
|
||||||
$info = $svc->refreshViewerUrlFor($checkId);
|
$info = $svc->refreshViewerUrlFor($checkId, $viewerContext);
|
||||||
|
if ($info['url'] === '') {
|
||||||
|
return jsonError('Turnitin returned empty viewer_url');
|
||||||
|
}
|
||||||
return jsonSuccess([
|
return jsonSuccess([
|
||||||
'view_only_url' => $info['url'],
|
'view_only_url' => $info['url'],
|
||||||
'expire' => $info['expire'],
|
'expire' => $info['expire'],
|
||||||
|
'has_pdf' => !empty($info['local_pdf']),
|
||||||
|
'viewer_user_id' => $info['viewer_user_id'],
|
||||||
|
'refreshed' => true,
|
||||||
|
'usage_hint' => $usageHint,
|
||||||
]);
|
]);
|
||||||
}
|
}
|
||||||
return jsonSuccess([
|
return jsonSuccess([
|
||||||
'view_only_url' => $row['view_only_url'],
|
'view_only_url' => $row['view_only_url'],
|
||||||
'expire' => intval($row['view_only_url_expire']),
|
'expire' => intval($row['view_only_url_expire']),
|
||||||
|
'has_pdf' => !empty($row['pdf_local_path']),
|
||||||
|
'refreshed' => false,
|
||||||
|
'usage_hint' => $usageHint,
|
||||||
]);
|
]);
|
||||||
} catch (\Throwable $e) {
|
} catch (\Throwable $e) {
|
||||||
|
if (!empty($row['pdf_local_path'])) {
|
||||||
|
return jsonSuccess([
|
||||||
|
'view_only_url' => '',
|
||||||
|
'expire' => 0,
|
||||||
|
'has_pdf' => true,
|
||||||
|
'viewer_error' => $e->getMessage(),
|
||||||
|
'hint' => '在线报告暂不可用,请使用 downloadReport 下载 PDF',
|
||||||
|
]);
|
||||||
|
}
|
||||||
return jsonError($e->getMessage());
|
return jsonError($e->getMessage());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -201,10 +304,14 @@ class Plagiarism extends Base
|
|||||||
'similarity_score' => floatval($r['similarity_score']),
|
'similarity_score' => floatval($r['similarity_score']),
|
||||||
'tii_report_status' => (string)$r['tii_report_status'],
|
'tii_report_status' => (string)$r['tii_report_status'],
|
||||||
'has_pdf' => !empty($r['pdf_local_path']),
|
'has_pdf' => !empty($r['pdf_local_path']),
|
||||||
|
'local_pdf_url' => $r['pdf_local_path'],
|
||||||
'has_viewer_url' => !empty($r['view_only_url']) && intval($r['view_only_url_expire']) > time(),
|
'has_viewer_url' => !empty($r['view_only_url']) && intval($r['view_only_url_expire']) > time(),
|
||||||
'attempts' => intval($r['attempts']),
|
'attempts' => intval($r['attempts']),
|
||||||
'error_msg' => (string)$r['error_msg'],
|
'error_msg' => (string)$r['error_msg'],
|
||||||
'source_file_name' => (string)$r['source_file_name'],
|
'source_file_name' => (string)$r['source_file_name'],
|
||||||
|
'check_type' => (string)($r['check_type'] ?? 'full'),
|
||||||
|
'check_type_label' => $this->checkTypeLabel($r['check_type'] ?? 'full'),
|
||||||
|
'derived_file_path'=> (string)($r['derived_file_path'] ?? ''),
|
||||||
'trigger_source' => (string)$r['trigger_source'],
|
'trigger_source' => (string)$r['trigger_source'],
|
||||||
'triggered_by' => intval($r['triggered_by']),
|
'triggered_by' => intval($r['triggered_by']),
|
||||||
'ctime' => intval($r['ctime']),
|
'ctime' => intval($r['ctime']),
|
||||||
@@ -212,6 +319,15 @@ class Plagiarism extends Base
|
|||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private function checkTypeLabel($checkType)
|
||||||
|
{
|
||||||
|
$t = strtolower(trim((string) $checkType));
|
||||||
|
if ($t === 'body_only' || $t === 'body') {
|
||||||
|
return '正文查重';
|
||||||
|
}
|
||||||
|
return '全文查重';
|
||||||
|
}
|
||||||
|
|
||||||
private function stateLabel($state)
|
private function stateLabel($state)
|
||||||
{
|
{
|
||||||
$map = [
|
$map = [
|
||||||
|
|||||||
@@ -1030,6 +1030,7 @@ class References extends Base
|
|||||||
* AI检测
|
* AI检测
|
||||||
*/
|
*/
|
||||||
public function checkByAi($aParam = []){
|
public function checkByAi($aParam = []){
|
||||||
|
return jsonError("service is stop!");
|
||||||
//获取参数
|
//获取参数
|
||||||
$aParam = empty($aParam) ? $this->request->post() : $aParam;
|
$aParam = empty($aParam) ? $this->request->post() : $aParam;
|
||||||
|
|
||||||
|
|||||||
@@ -2299,14 +2299,14 @@ class Reviewer extends Base
|
|||||||
->count();
|
->count();
|
||||||
|
|
||||||
if(empty($count)){
|
if(empty($count)){
|
||||||
return jsonSuccess(['reviewers' => [],'count' => 0]);
|
return jsonSuccess(['reviewers' => [],'count' => 0,"sql"=>$this->reviewer_to_journal_obj->getLastSql()]);
|
||||||
}
|
}
|
||||||
|
|
||||||
//获取数据
|
//获取数据
|
||||||
$list = $this->reviewer_to_journal_obj
|
$list = $this->reviewer_to_journal_obj
|
||||||
->join("t_user", "t_user.user_id = t_reviewer_to_journal.reviewer_id", "left")
|
->join("t_user", "t_user.user_id = t_reviewer_to_journal.reviewer_id", "left")
|
||||||
->join("t_user_reviewer_info", "t_user_reviewer_info.reviewer_id = t_reviewer_to_journal.reviewer_id", "left")
|
->join("t_user_reviewer_info", "t_user_reviewer_info.reviewer_id = t_reviewer_to_journal.reviewer_id", "left")
|
||||||
->field('t_user.account,t_user.email,t_user.realname,t_user_reviewer_info.company,t_user_reviewer_info.field,t_user.user_id,t_user.rs_num')
|
->field('t_user.account,t_user.email,t_user.realname,t_user_reviewer_info.company,t_user_reviewer_info.field,t_user_reviewer_info.last_invite_time,t_user.user_id,t_user.rs_num')
|
||||||
->where($where)->where(function($query) use ($iTeenDaysLater) {
|
->where($where)->where(function($query) use ($iTeenDaysLater) {
|
||||||
$query->where('t_user_reviewer_info.last_invite_time', '<', $iTeenDaysLater)
|
$query->where('t_user_reviewer_info.last_invite_time', '<', $iTeenDaysLater)
|
||||||
->whereOr('t_user_reviewer_info.last_invite_time', '=', 0);
|
->whereOr('t_user_reviewer_info.last_invite_time', '=', 0);
|
||||||
|
|||||||
@@ -214,6 +214,21 @@ class User extends Base
|
|||||||
$insert['ctime'] = time();
|
$insert['ctime'] = time();
|
||||||
$this->user_to_yboard_obj->insert($insert);
|
$this->user_to_yboard_obj->insert($insert);
|
||||||
|
|
||||||
|
//将此人添加到审稿人中
|
||||||
|
$reviewer_journal = $this->reviewer_to_journal_obj->where("reviewer_id",$user_info['user_id'])->where("journal_id",$journal_info['journal_id'])->find();
|
||||||
|
if($reviewer_journal){
|
||||||
|
$this->reviewer_to_journal_obj->where("rtj_id",$reviewer_journal['rtj_id'])->update(['is_yboard'=>1]);
|
||||||
|
}else{
|
||||||
|
$insert_reviewer['reviewer_id'] = $user_info['user_id'];
|
||||||
|
$insert_reviewer['journal_id'] = $journal_info['journal_id'];
|
||||||
|
$insert_reviewer['account'] = $user_info['account'];
|
||||||
|
$insert_reviewer['journal_title'] = $journal_info['title'];
|
||||||
|
$insert_reviewer['is_yboard'] = 1;
|
||||||
|
$insert_reviewer['ctime'] = time();
|
||||||
|
$this->reviewer_to_journal_obj->insert($insert_reviewer);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
//发送通知邮件给用户
|
//发送通知邮件给用户
|
||||||
$tt = 'Dear Dr. ' . ($user_info['realname'] == '' ? $user_info['account'] : $user_info['realname']) . ',<br><br>';
|
$tt = 'Dear Dr. ' . ($user_info['realname'] == '' ? $user_info['account'] : $user_info['realname']) . ',<br><br>';
|
||||||
$tt .= "Thanks for your support to the journal ".$journal_info['title'].", Please note that your account of ".$journal_info['title']." has been created. The login credentials in the system is as below:<br><br>";
|
$tt .= "Thanks for your support to the journal ".$journal_info['title'].", Please note that your account of ".$journal_info['title']." has been created. The login credentials in the system is as below:<br><br>";
|
||||||
|
|||||||
92
application/api/controller/UserFieldAi.php
Normal file
92
application/api/controller/UserFieldAi.php
Normal file
@@ -0,0 +1,92 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
namespace app\api\controller;
|
||||||
|
|
||||||
|
use think\Db;
|
||||||
|
use think\Validate;
|
||||||
|
use app\common\UserFieldAiService;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 用户主领域 AI 总结(写入 t_user_reviewer_info.field_ai)。
|
||||||
|
*
|
||||||
|
* POST startChain 启动链式队列(扫描全部符合条件的用户)
|
||||||
|
* POST processOne 同步处理单个 user_id(调试)
|
||||||
|
* GET preview 预览某用户是否 eligible 及上下文摘要
|
||||||
|
*/
|
||||||
|
class UserFieldAi extends Base
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* 启动链式处理。需 worker: php think queue:work --queue UserFieldAi
|
||||||
|
*/
|
||||||
|
public function startChain()
|
||||||
|
{
|
||||||
|
$force = intval($this->request->param('force', 0)) === 1;
|
||||||
|
$delay = max(0, intval($this->request->param('delay', 1)));
|
||||||
|
|
||||||
|
$svc = new UserFieldAiService();
|
||||||
|
$started = $svc->startChain($force, $delay);
|
||||||
|
|
||||||
|
return jsonSuccess([
|
||||||
|
'started' => $started,
|
||||||
|
'queue' => UserFieldAiService::QUEUE_NAME,
|
||||||
|
'force' => $force,
|
||||||
|
'msg' => $started ? 'chain enqueued' : 'no pending users',
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 同步处理单个用户(不调队列)。
|
||||||
|
*/
|
||||||
|
public function processOne()
|
||||||
|
{
|
||||||
|
$userId = intval($this->request->param('user_id', 0));
|
||||||
|
$force = intval($this->request->param('force', 0)) === 1;
|
||||||
|
if ($userId <= 0) {
|
||||||
|
return jsonError('user_id required');
|
||||||
|
}
|
||||||
|
|
||||||
|
$svc = new UserFieldAiService();
|
||||||
|
$result = $svc->processUser($userId, $force);
|
||||||
|
if (empty($result['ok'])) {
|
||||||
|
return jsonError(isset($result['error']) ? $result['error'] : 'failed');
|
||||||
|
}
|
||||||
|
return jsonSuccess($result);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 预览:是否满足条件、当前 field_ai 状态。
|
||||||
|
*/
|
||||||
|
public function preview()
|
||||||
|
{
|
||||||
|
$userId = intval($this->request->param('user_id', 0));
|
||||||
|
if ($userId <= 0) {
|
||||||
|
return jsonError('user_id required');
|
||||||
|
}
|
||||||
|
|
||||||
|
$svc = new UserFieldAiService();
|
||||||
|
$svc->ensureReviewerInfoRow($userId);
|
||||||
|
$uri = Db::name('user_reviewer_info')->where('reviewer_id', $userId)->find();
|
||||||
|
|
||||||
|
return jsonSuccess([
|
||||||
|
'user_id' => $userId,
|
||||||
|
'has_articles' => $svc->hasSubmittedArticles($userId),
|
||||||
|
'profile_complete' => $svc->isReviewerProfileComplete($uri),
|
||||||
|
'eligible' => $svc->isEligible($userId, $uri),
|
||||||
|
'field_ai' => $uri ? (string) $uri['field_ai'] : '',
|
||||||
|
'field_ai_status' => $uri ? intval($uri['field_ai_status']) : 0,
|
||||||
|
'field_ai_utime' => $uri ? intval($uri['field_ai_utime']) : 0,
|
||||||
|
'field_ai_status_text' => $this->statusLabel($uri ? intval($uri['field_ai_status']) : 0),
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
|
||||||
|
private function statusLabel($status)
|
||||||
|
{
|
||||||
|
$map = [
|
||||||
|
UserFieldAiService::STATUS_PENDING => 'pending',
|
||||||
|
UserFieldAiService::STATUS_DONE => 'done',
|
||||||
|
UserFieldAiService::STATUS_INSUFFICIENT => 'insufficient',
|
||||||
|
UserFieldAiService::STATUS_FAILED => 'failed',
|
||||||
|
];
|
||||||
|
return isset($map[$status]) ? $map[$status] : 'unknown';
|
||||||
|
}
|
||||||
|
}
|
||||||
85
application/api/job/AiCheckReferByDoi.php
Normal file
85
application/api/job/AiCheckReferByDoi.php
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
<?php
|
||||||
|
namespace app\api\job;
|
||||||
|
use think\queue\Job;
|
||||||
|
use app\common\QueueJob;
|
||||||
|
use app\common\QueueRedis;
|
||||||
|
use think\Db;
|
||||||
|
class AiCheckReferByDoi
|
||||||
|
{
|
||||||
|
private $oQueueJob;
|
||||||
|
private $QueueRedis;
|
||||||
|
private $completedExprie = 3600;
|
||||||
|
|
||||||
|
public function __construct()
|
||||||
|
{
|
||||||
|
$this->oQueueJob = new QueueJob;
|
||||||
|
$this->QueueRedis = QueueRedis::getInstance();
|
||||||
|
}
|
||||||
|
|
||||||
|
public function fire(Job $job, $data)
|
||||||
|
{
|
||||||
|
//任务开始判断
|
||||||
|
$this->oQueueJob->init($job);
|
||||||
|
|
||||||
|
// 获取 Redis 任务的原始数据
|
||||||
|
$rawBody = empty($job->getRawBody()) ? '' : $job->getRawBody();
|
||||||
|
$jobData = empty($rawBody) ? [] : json_decode($rawBody, true);
|
||||||
|
$jobId = empty($jobData['id']) ? 'unknown' : $jobData['id'];
|
||||||
|
|
||||||
|
$this->oQueueJob->log("-----------队列任务开始-----------");
|
||||||
|
$this->oQueueJob->log("当前任务ID: {$jobId}, 尝试次数: {$job->attempts()}");
|
||||||
|
|
||||||
|
|
||||||
|
// 获取生产文章ID
|
||||||
|
$iPArticleId = empty($data['p_article_id']) ? 0 : $data['p_article_id'];
|
||||||
|
if (empty($iPArticleId)) {
|
||||||
|
$this->oQueueJob->log("无效的p_article_id,删除任务");
|
||||||
|
$job->delete();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// 获取参考文献ID
|
||||||
|
$iPReferId = empty($data['p_refer_id']) ? 0 : $data['p_refer_id'];
|
||||||
|
if (empty($iPArticleId)) {
|
||||||
|
$this->oQueueJob->log("无效的p_article_id,删除任务");
|
||||||
|
$job->delete();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
|
||||||
|
// 生成Redis键并尝试获取锁
|
||||||
|
$sClassName = get_class($this);
|
||||||
|
$sRedisKey = "queue_job:{$sClassName}:{$iPArticleId}:{$iPReferId}";
|
||||||
|
$sRedisValue = uniqid() . '_' . getmypid();
|
||||||
|
if (!$this->oQueueJob->acquireLock($sRedisKey, $sRedisValue, $job)) {
|
||||||
|
return; // 未获取到锁,已处理
|
||||||
|
}
|
||||||
|
|
||||||
|
//生成内容
|
||||||
|
$oProductionArticleRefer = new \app\api\controller\References;
|
||||||
|
$response = $oProductionArticleRefer->getCheckByAiResult($data);
|
||||||
|
// 验证API响应
|
||||||
|
if (empty($response)) {
|
||||||
|
throw new \RuntimeException("OpenAI API返回空结果");
|
||||||
|
}
|
||||||
|
// 检查JSON解析错误
|
||||||
|
$aResult = json_decode($response, true);
|
||||||
|
if (json_last_error() !== JSON_ERROR_NONE) {
|
||||||
|
throw new \RuntimeException("解析OpenAI响应失败: " . json_last_error_msg() . " | 原始响应: {$response}");
|
||||||
|
}
|
||||||
|
$sMsg = empty($aResult['msg']) ? 'success' : $aResult['msg'];
|
||||||
|
//更新完成标识
|
||||||
|
$this->QueueRedis->finishJob($sRedisKey, 'completed', $this->completedExprie,$sRedisValue);
|
||||||
|
$job->delete();
|
||||||
|
$this->oQueueJob->log("任务执行成功 | 日志ID: {$sRedisKey} | 执行日志:{$sMsg}");
|
||||||
|
|
||||||
|
} catch (\RuntimeException $e) {
|
||||||
|
$this->oQueueJob->handleRetryableException($e,$sRedisKey,$sRedisValue, $job);
|
||||||
|
} catch (\LogicException $e) {
|
||||||
|
$this->oQueueJob->handleNonRetryableException($e,$sRedisKey,$sRedisValue, $job);
|
||||||
|
} catch (\Exception $e) {
|
||||||
|
$this->oQueueJob->handleRetryableException($e,$sRedisKey,$sRedisValue, $job);
|
||||||
|
} finally {
|
||||||
|
$this->oQueueJob->finnal();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
92
application/api/job/ArticleReferDetailQueue.php
Normal file
92
application/api/job/ArticleReferDetailQueue.php
Normal file
@@ -0,0 +1,92 @@
|
|||||||
|
<?php
|
||||||
|
namespace app\api\job;
|
||||||
|
use think\queue\Job;
|
||||||
|
use app\common\QueueJob;
|
||||||
|
use app\common\QueueRedis;
|
||||||
|
use app\common\ProductionArticleRefer;
|
||||||
|
use think\Db;
|
||||||
|
class ArticleReferDetailQueue
|
||||||
|
{
|
||||||
|
private $oQueueJob;
|
||||||
|
private $QueueRedis;
|
||||||
|
private $completedExprie = 3600;
|
||||||
|
|
||||||
|
public function __construct()
|
||||||
|
{
|
||||||
|
$this->oQueueJob = new QueueJob;
|
||||||
|
$this->QueueRedis = QueueRedis::getInstance();
|
||||||
|
}
|
||||||
|
|
||||||
|
public function fire(Job $job, $data)
|
||||||
|
{
|
||||||
|
//任务开始判断
|
||||||
|
$this->oQueueJob->init($job);
|
||||||
|
|
||||||
|
// 获取 Redis 任务的原始数据
|
||||||
|
$rawBody = empty($job->getRawBody()) ? '' : $job->getRawBody();
|
||||||
|
$jobData = empty($rawBody) ? [] : json_decode($rawBody, true);
|
||||||
|
$jobId = empty($jobData['id']) ? 'unknown' : $jobData['id'];
|
||||||
|
|
||||||
|
$this->oQueueJob->log("-----------队列任务开始-----------");
|
||||||
|
$this->oQueueJob->log("当前任务ID: {$jobId}, 尝试次数: {$job->attempts()}");
|
||||||
|
|
||||||
|
// // 获取文章ID
|
||||||
|
// $iArticleId = empty($data['article_id']) ? 0 : $data['article_id'];
|
||||||
|
// if (empty($iArticleId)) {
|
||||||
|
// $this->oQueueJob->log("无效的article_id,删除任务");
|
||||||
|
// $job->delete();
|
||||||
|
// return;
|
||||||
|
// }
|
||||||
|
// 获取生产文章ID
|
||||||
|
$iPArticleId = empty($data['p_article_id']) ? 0 : $data['p_article_id'];
|
||||||
|
if (empty($iPArticleId)) {
|
||||||
|
$this->oQueueJob->log("无效的p_article_id,删除任务");
|
||||||
|
$job->delete();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// 获取生产文章ID
|
||||||
|
$iPReferId = empty($data['p_refer_id']) ? 0 : $data['p_refer_id'];
|
||||||
|
if (empty($iPReferId)) {
|
||||||
|
$this->oQueueJob->log("无效的p_refer_id,删除任务");
|
||||||
|
$job->delete();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
|
||||||
|
// 生成Redis键并尝试获取锁
|
||||||
|
$sClassName = get_class($this);
|
||||||
|
$sRedisKey = "queue_job:{$sClassName}:{$iPArticleId}:{$iPReferId}";
|
||||||
|
$sRedisValue = uniqid() . '_' . getmypid();
|
||||||
|
if (!$this->oQueueJob->acquireLock($sRedisKey, $sRedisValue, $job)) {
|
||||||
|
return; // 未获取到锁,已处理
|
||||||
|
}
|
||||||
|
|
||||||
|
//生成内容
|
||||||
|
$oProductionArticleRefer = new ProductionArticleRefer;
|
||||||
|
$response = $oProductionArticleRefer->get($data);
|
||||||
|
// 验证API响应
|
||||||
|
if (empty($response)) {
|
||||||
|
throw new \RuntimeException("返回空结果");
|
||||||
|
}
|
||||||
|
// 检查JSON解析错误
|
||||||
|
$aResult = json_decode($response, true);
|
||||||
|
if (json_last_error() !== JSON_ERROR_NONE) {
|
||||||
|
throw new \RuntimeException("解析响应失败: " . json_last_error_msg() . " | 原始响应: {$response}");
|
||||||
|
}
|
||||||
|
$sMsg = empty($aResult['msg']) ? 'success' : $aResult['msg'];
|
||||||
|
//更新完成标识
|
||||||
|
$this->QueueRedis->finishJob($sRedisKey, 'completed', $this->completedExprie,$sRedisValue);
|
||||||
|
$job->delete();
|
||||||
|
$this->oQueueJob->log("任务执行成功 | 日志ID: {$sRedisKey} | 执行日志:{$sMsg}");
|
||||||
|
|
||||||
|
} catch (\RuntimeException $e) {
|
||||||
|
$this->oQueueJob->handleRetryableException($e,$sRedisKey,$sRedisValue, $job);
|
||||||
|
} catch (\LogicException $e) {
|
||||||
|
$this->oQueueJob->handleNonRetryableException($e,$sRedisKey,$sRedisValue, $job);
|
||||||
|
} catch (\Exception $e) {
|
||||||
|
$this->oQueueJob->handleRetryableException($e,$sRedisKey,$sRedisValue, $job);
|
||||||
|
} finally {
|
||||||
|
$this->oQueueJob->finnal();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
85
application/api/job/ArticleReferQueue.php
Normal file
85
application/api/job/ArticleReferQueue.php
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
<?php
|
||||||
|
namespace app\api\job;
|
||||||
|
use think\queue\Job;
|
||||||
|
use app\common\QueueJob;
|
||||||
|
use app\common\QueueRedis;
|
||||||
|
use app\common\ProductionArticleRefer;
|
||||||
|
use think\Db;
|
||||||
|
class ArticleReferQueue
|
||||||
|
{
|
||||||
|
private $oQueueJob;
|
||||||
|
private $QueueRedis;
|
||||||
|
private $completedExprie = 180;
|
||||||
|
|
||||||
|
public function __construct()
|
||||||
|
{
|
||||||
|
$this->oQueueJob = new QueueJob;
|
||||||
|
$this->QueueRedis = QueueRedis::getInstance();
|
||||||
|
}
|
||||||
|
|
||||||
|
public function fire(Job $job, $data)
|
||||||
|
{
|
||||||
|
//任务开始判断
|
||||||
|
$this->oQueueJob->init($job);
|
||||||
|
|
||||||
|
// 获取 Redis 任务的原始数据
|
||||||
|
$rawBody = empty($job->getRawBody()) ? '' : $job->getRawBody();
|
||||||
|
$jobData = empty($rawBody) ? [] : json_decode($rawBody, true);
|
||||||
|
$jobId = empty($jobData['id']) ? 'unknown' : $jobData['id'];
|
||||||
|
|
||||||
|
$this->oQueueJob->log("-----------队列任务开始-----------");
|
||||||
|
$this->oQueueJob->log("当前任务ID: {$jobId}, 尝试次数: {$job->attempts()}");
|
||||||
|
|
||||||
|
// 获取文章ID
|
||||||
|
$iArticleId = empty($data['article_id']) ? 0 : $data['article_id'];
|
||||||
|
if (empty($iArticleId)) {
|
||||||
|
$this->oQueueJob->log("无效的article_id,删除任务");
|
||||||
|
$job->delete();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// 获取生产文章ID
|
||||||
|
$iPArticleId = empty($data['p_article_id']) ? 0 : $data['p_article_id'];
|
||||||
|
if (empty($iPArticleId)) {
|
||||||
|
$this->oQueueJob->log("无效的p_article_id,删除任务");
|
||||||
|
$job->delete();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
|
||||||
|
// 生成Redis键并尝试获取锁
|
||||||
|
$sClassName = get_class($this);
|
||||||
|
$sRedisKey = "queue_job:{$sClassName}:{$iArticleId}:{$iPArticleId}";
|
||||||
|
$sRedisValue = uniqid() . '_' . getmypid();
|
||||||
|
if (!$this->oQueueJob->acquireLock($sRedisKey, $sRedisValue, $job)) {
|
||||||
|
return; // 未获取到锁,已处理
|
||||||
|
}
|
||||||
|
|
||||||
|
//生成内容
|
||||||
|
$oProductionArticleRefer = new ProductionArticleRefer;
|
||||||
|
$response = $oProductionArticleRefer->top($data);
|
||||||
|
// 验证API响应
|
||||||
|
if (empty($response)) {
|
||||||
|
throw new \RuntimeException("OpenAI API返回空结果");
|
||||||
|
}
|
||||||
|
// 检查JSON解析错误
|
||||||
|
$aResult = json_decode($response, true);
|
||||||
|
if (json_last_error() !== JSON_ERROR_NONE) {
|
||||||
|
throw new \RuntimeException("解析OpenAI响应失败: " . json_last_error_msg() . " | 原始响应: {$response}");
|
||||||
|
}
|
||||||
|
$sMsg = empty($aResult['msg']) ? 'success' : $aResult['msg'];
|
||||||
|
//更新完成标识
|
||||||
|
$this->QueueRedis->finishJob($sRedisKey, 'completed', $this->completedExprie,$sRedisValue);
|
||||||
|
$job->delete();
|
||||||
|
$this->oQueueJob->log("任务执行成功 | 日志ID: {$sRedisKey} | 执行日志:{$sMsg}");
|
||||||
|
|
||||||
|
} catch (\RuntimeException $e) {
|
||||||
|
$this->oQueueJob->handleRetryableException($e,$sRedisKey,$sRedisValue, $job);
|
||||||
|
} catch (\LogicException $e) {
|
||||||
|
$this->oQueueJob->handleNonRetryableException($e,$sRedisKey,$sRedisValue, $job);
|
||||||
|
} catch (\Exception $e) {
|
||||||
|
$this->oQueueJob->handleRetryableException($e,$sRedisKey,$sRedisValue, $job);
|
||||||
|
} finally {
|
||||||
|
$this->oQueueJob->finnal();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -31,6 +31,7 @@ class PlagiarismPoll
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
$svc = new PlagiarismService();
|
$svc = new PlagiarismService();
|
||||||
|
$svc->log("PlagiarismPoll job is running");
|
||||||
$svc->runPollStatus($checkId, $attempt);
|
$svc->runPollStatus($checkId, $attempt);
|
||||||
$job->delete();
|
$job->delete();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,9 +6,9 @@ use think\queue\Job;
|
|||||||
use app\common\PlagiarismService;
|
use app\common\PlagiarismService;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 队列任务:上传论文到 Turnitin + 触发 similarity 检测。
|
* 队列任务:创建 Turnitin submission 并上传原稿;ingest 轮询与触发 similarity 由后续 Job 完成。
|
||||||
*
|
*
|
||||||
* 完成后会自动入队 PlagiarismPoll 进行后续轮询。
|
* 链:PlagiarismRun → PlagiarismWaitIngest → PlagiarismTriggerSimilarity → PlagiarismPoll
|
||||||
*
|
*
|
||||||
* data:
|
* data:
|
||||||
* - check_id t_plagiarism_check.check_id
|
* - check_id t_plagiarism_check.check_id
|
||||||
@@ -29,8 +29,12 @@ class PlagiarismRun
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
$svc = new PlagiarismService();
|
$svc = new PlagiarismService();
|
||||||
$svc->log("PlagiarismRun job act!!");
|
$svc->log('PlagiarismRun job is running');
|
||||||
$svc->runUploadAndTrigger($checkId, $filePath);
|
try {
|
||||||
|
$svc->runUploadOnly($checkId, $filePath);
|
||||||
|
} catch (\Throwable $e) {
|
||||||
|
$svc->markFailed($checkId, '[upload] ' . $e->getMessage());
|
||||||
|
}
|
||||||
$job->delete();
|
$job->delete();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
34
application/api/job/PlagiarismTriggerSimilarity.php
Normal file
34
application/api/job/PlagiarismTriggerSimilarity.php
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
namespace app\api\job;
|
||||||
|
|
||||||
|
use think\queue\Job;
|
||||||
|
use app\common\PlagiarismService;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 队列任务:在 ingest 就绪后调用 PUT /similarity,并入队 PlagiarismPoll。
|
||||||
|
*
|
||||||
|
* data:
|
||||||
|
* - check_id t_plagiarism_check.check_id
|
||||||
|
* - ingest_attempt 来自 PlagiarismWaitIngest 的 attempt(409 时用于继续轮询 ingest)
|
||||||
|
*/
|
||||||
|
class PlagiarismTriggerSimilarity
|
||||||
|
{
|
||||||
|
public function fire(Job $job, $data)
|
||||||
|
{
|
||||||
|
$checkId = isset($data['check_id']) ? (int) $data['check_id'] : 0;
|
||||||
|
$ingestAttempt = isset($data['ingest_attempt']) ? (int) $data['ingest_attempt'] : 1;
|
||||||
|
if ($checkId <= 0) {
|
||||||
|
$job->delete();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
$svc = new PlagiarismService();
|
||||||
|
$svc->log("PlagiarismTriggerSimilarity job is running");
|
||||||
|
try {
|
||||||
|
$svc->runTriggerSimilarityOnly($checkId, $ingestAttempt);
|
||||||
|
} catch (\Throwable $e) {
|
||||||
|
$svc->markFailed($checkId, '[similarity] ' . $e->getMessage());
|
||||||
|
}
|
||||||
|
$job->delete();
|
||||||
|
}
|
||||||
|
}
|
||||||
34
application/api/job/PlagiarismWaitIngest.php
Normal file
34
application/api/job/PlagiarismWaitIngest.php
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
namespace app\api\job;
|
||||||
|
|
||||||
|
use think\queue\Job;
|
||||||
|
use app\common\PlagiarismService;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 队列任务:单次查询 Turnitin submission 是否解析完成(ingest),未完成则延迟再次入队。
|
||||||
|
*
|
||||||
|
* data:
|
||||||
|
* - check_id t_plagiarism_check.check_id
|
||||||
|
* - attempt 从 1 递增
|
||||||
|
*/
|
||||||
|
class PlagiarismWaitIngest
|
||||||
|
{
|
||||||
|
public function fire(Job $job, $data)
|
||||||
|
{
|
||||||
|
$checkId = isset($data['check_id']) ? (int) $data['check_id'] : 0;
|
||||||
|
$attempt = isset($data['attempt']) ? (int) $data['attempt'] : 1;
|
||||||
|
if ($checkId <= 0) {
|
||||||
|
$job->delete();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
$svc = new PlagiarismService();
|
||||||
|
$svc->log("PlagiarismWaitIngest job is running");
|
||||||
|
try {
|
||||||
|
$svc->runIngestPollStep($checkId, $attempt);
|
||||||
|
} catch (\Throwable $e) {
|
||||||
|
$svc->markFailed($checkId, '[ingest] ' . $e->getMessage());
|
||||||
|
}
|
||||||
|
$job->delete();
|
||||||
|
}
|
||||||
|
}
|
||||||
101
application/api/job/ReminderEmailToReviewer.php
Normal file
101
application/api/job/ReminderEmailToReviewer.php
Normal file
@@ -0,0 +1,101 @@
|
|||||||
|
<?php
|
||||||
|
namespace app\api\job;
|
||||||
|
|
||||||
|
use think\queue\Job;
|
||||||
|
use app\common\QueueJob;
|
||||||
|
use app\common\QueueRedis;
|
||||||
|
use app\api\controller\Cronreview;
|
||||||
|
class ReminderEmailToReviewer
|
||||||
|
{
|
||||||
|
//审稿邮件提醒
|
||||||
|
private $oQueueJob;
|
||||||
|
private $QueueRedis;
|
||||||
|
private $completedExprie = 3600; // 完成状态过期时间
|
||||||
|
public function __construct()
|
||||||
|
{
|
||||||
|
$this->oQueueJob = new QueueJob;
|
||||||
|
$this->QueueRedis = QueueRedis::getInstance();
|
||||||
|
}
|
||||||
|
|
||||||
|
public function fire(Job $job, $data)
|
||||||
|
{
|
||||||
|
//任务开始判断
|
||||||
|
$this->oQueueJob->init($job);
|
||||||
|
|
||||||
|
// 获取 Redis 任务的原始数据
|
||||||
|
$rawBody = empty($job->getRawBody()) ? '' : $job->getRawBody();
|
||||||
|
$jobData = empty($rawBody) ? [] : json_decode($rawBody, true);
|
||||||
|
$jobId = empty($jobData['id']) ? 'unknown' : $jobData['id'];
|
||||||
|
|
||||||
|
$this->oQueueJob->log("-----------队列任务开始-----------");
|
||||||
|
$this->oQueueJob->log("当前任务ID: {$jobId}, 尝试次数: {$job->attempts()}");
|
||||||
|
|
||||||
|
try {
|
||||||
|
|
||||||
|
// 验证任务数据完整性
|
||||||
|
// 获取文章ID
|
||||||
|
$iArticleId = empty($data['article_id']) ? 0 : $data['article_id'];
|
||||||
|
//审稿记录表主键ID
|
||||||
|
$art_rev_id = empty($data['art_rev_id']) ? 0 : $data['art_rev_id'];
|
||||||
|
//审稿人ID
|
||||||
|
$reviewer_id = empty($data['reviewer_id']) ? 0 : $data['reviewer_id'];
|
||||||
|
//邮件类型
|
||||||
|
$email_type = empty($data['email_type']) ? 0 : $data['email_type'];
|
||||||
|
if (empty($iArticleId)) {
|
||||||
|
$this->oQueueJob->log("无效的article_id,删除任务");
|
||||||
|
$job->delete();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (empty($art_rev_id)) {
|
||||||
|
$this->oQueueJob->log("无效的art_rev_id,删除任务");
|
||||||
|
$job->delete();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (empty($reviewer_id)) {
|
||||||
|
$this->oQueueJob->log("无效的reviewer_id,删除任务");
|
||||||
|
$job->delete();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (empty($email_type)) {
|
||||||
|
$this->oQueueJob->log("无效的email_type,删除任务");
|
||||||
|
$job->delete();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// 生成唯一任务标识
|
||||||
|
$sClassName = get_class($this);
|
||||||
|
$sRedisKey = "queue_job:{$sClassName}:{$iArticleId}:{$reviewer_id}:{$art_rev_id}:{$email_type}";
|
||||||
|
$sRedisValue = uniqid() . '_' . getmypid();
|
||||||
|
if (!$this->oQueueJob->acquireLock($sRedisKey, $sRedisValue, $job)) {
|
||||||
|
return; // 未获取到锁,已处理
|
||||||
|
}
|
||||||
|
|
||||||
|
// 执行核心任务
|
||||||
|
//查询是否发送过邮件
|
||||||
|
$oCronreview = new Cronreview;
|
||||||
|
$response = $oCronreview->reminder($data);
|
||||||
|
// 验证API响应
|
||||||
|
if (empty($response)) {
|
||||||
|
throw new \RuntimeException("OpenAI API返回空结果");
|
||||||
|
}
|
||||||
|
// 检查JSON解析错误
|
||||||
|
$aResult = json_decode($response, true);
|
||||||
|
if (json_last_error() !== JSON_ERROR_NONE) {
|
||||||
|
throw new \RuntimeException("解析OpenAI响应失败: " . json_last_error_msg() . " | 原始响应: {$response}");
|
||||||
|
}
|
||||||
|
$sMsg = empty($aResult['msg']) ? 'success' : $aResult['msg'];
|
||||||
|
//更新完成标识
|
||||||
|
$this->QueueRedis->finishJob($sRedisKey, 'completed', $this->completedExprie,$sRedisValue);
|
||||||
|
$job->delete();
|
||||||
|
$this->oQueueJob->log("任务执行成功 | 日志ID: {$sRedisKey} | 执行日志:{$sMsg}");
|
||||||
|
|
||||||
|
} catch (RuntimeException $e) {
|
||||||
|
$this->oQueueJob->handleRetryableException($e,$sRedisKey,$sRedisValue, $job);
|
||||||
|
} catch (LogicException $e) {
|
||||||
|
$this->oQueueJob->handleNonRetryableException($e,$sRedisKey,$sRedisValue, $job);
|
||||||
|
} catch (Exception $e) {
|
||||||
|
$this->oQueueJob->handleRetryableException($e,$sRedisKey,$sRedisValue, $job);
|
||||||
|
} finally {
|
||||||
|
$this->oQueueJob->finnal();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
35
application/api/job/UserFieldAiFill.php
Normal file
35
application/api/job/UserFieldAiFill.php
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
namespace app\api\job;
|
||||||
|
|
||||||
|
use think\queue\Job;
|
||||||
|
use app\common\UserFieldAiService;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 链式任务:为单个用户生成 field_ai,完成后自动入队下一位用户。
|
||||||
|
*
|
||||||
|
* data:
|
||||||
|
* - user_id 当前处理的用户
|
||||||
|
* - queue 队列名(默认 UserFieldAi)
|
||||||
|
* - force 1=强制重算
|
||||||
|
*
|
||||||
|
* Worker: php think queue:work --queue UserFieldAi
|
||||||
|
*/
|
||||||
|
class UserFieldAiFill
|
||||||
|
{
|
||||||
|
public function fire(Job $job, $data)
|
||||||
|
{
|
||||||
|
$userId = isset($data['user_id']) ? intval($data['user_id']) : 0;
|
||||||
|
$queue = isset($data['queue']) ? (string) $data['queue'] : UserFieldAiService::QUEUE_NAME;
|
||||||
|
$force = !empty($data['force']);
|
||||||
|
|
||||||
|
$svc = new UserFieldAiService();
|
||||||
|
if ($userId > 0) {
|
||||||
|
$svc->processUser($userId, $force);
|
||||||
|
}
|
||||||
|
$job->delete();
|
||||||
|
|
||||||
|
$delay = max(0, (int) (isset($data['delay']) ? $data['delay'] : 1));
|
||||||
|
// $svc->enqueueNextFieldAi($delay, $queue, $userId, $force);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -15,7 +15,7 @@ class mail {
|
|||||||
|
|
||||||
|
|
||||||
public function tgpu(Job $job, $data){
|
public function tgpu(Job $job, $data){
|
||||||
// my_tg_pushmail($data);
|
my_tg_pushmail($data);
|
||||||
$job->delete();
|
$job->delete();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -915,7 +915,7 @@ function prgeAuthor($author)
|
|||||||
|
|
||||||
function my_tg_pushmail($data)
|
function my_tg_pushmail($data)
|
||||||
{
|
{
|
||||||
// $res = sendEmail($data['email'], $data['title'], $data['title'], $data['content'], $data['tmail'], $data['tpassword'], $data['attachmentFile']);
|
sendEmail($data['email'], $data['title'], $data['title'], $data['content'], $data['tmail'], $data['tpassword'], $data['attachmentFile']);
|
||||||
// if (isset($res['status'])) {
|
// if (isset($res['status'])) {
|
||||||
// $log_obj = Db::name('email_log');
|
// $log_obj = Db::name('email_log');
|
||||||
// $insert['article_id'] = $data['article_id'];
|
// $insert['article_id'] = $data['article_id'];
|
||||||
|
|||||||
@@ -1153,12 +1153,12 @@ class ArticleParserService
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 提取 Word 文档中的参考文献列表(仅返回数组,不做入库)
|
* 按段落提取 Word 全文行(供正文裁切、参考文献识别等复用)
|
||||||
* @return array 每条为一个参考文献的纯文本字符串
|
* @return array<int,string>
|
||||||
*/
|
*/
|
||||||
public static function getReferencesFromWord($filePath): array
|
public static function collectParagraphLines($filePath): array
|
||||||
{
|
{
|
||||||
$othis = new self($filePath) ;
|
$othis = new self($filePath);
|
||||||
if (empty($othis->sections)) {
|
if (empty($othis->sections)) {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
@@ -1166,13 +1166,26 @@ class ArticleParserService
|
|||||||
$lines = [];
|
$lines = [];
|
||||||
foreach ($othis->sections as $section) {
|
foreach ($othis->sections as $section) {
|
||||||
foreach ($section->getElements() as $element) {
|
foreach ($section->getElements() as $element) {
|
||||||
$text = $othis->getTextFromElement($element);
|
$text = trim((string) $othis->getTextFromElement($element));
|
||||||
$text = trim((string)$text);
|
if ($text === '') {
|
||||||
if ($text === '') continue;
|
continue;
|
||||||
$lines[] = $text;
|
}
|
||||||
|
if (!mb_check_encoding($text, 'UTF-8')) {
|
||||||
|
$text = mb_convert_encoding($text, 'UTF-8', 'GBK');
|
||||||
|
}
|
||||||
|
$lines[] = preg_replace('/\s+/u', ' ', $text);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return $lines;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 提取 Word 文档中的参考文献列表(仅返回数组,不做入库)
|
||||||
|
* @return array 每条为一个参考文献的纯文本字符串
|
||||||
|
*/
|
||||||
|
public static function getReferencesFromWord($filePath): array
|
||||||
|
{
|
||||||
|
$lines = self::collectParagraphLines($filePath);
|
||||||
if (empty($lines)) {
|
if (empty($lines)) {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|||||||
356
application/common/ManuscriptBodyExtractor.php
Normal file
356
application/common/ManuscriptBodyExtractor.php
Normal file
@@ -0,0 +1,356 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
namespace app\common;
|
||||||
|
|
||||||
|
use DOMDocument;
|
||||||
|
use DOMElement;
|
||||||
|
use DOMXPath;
|
||||||
|
use think\Exception;
|
||||||
|
use ZipArchive;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 从投稿 Word 生成「仅正文」docx:在 document.xml 上按块裁切,保留表格/图片/样式;
|
||||||
|
* 边界识别仅用可见文本(w:t),不读取域指令(Zotero/EndNote 的 JSON)。
|
||||||
|
*/
|
||||||
|
class ManuscriptBodyExtractor
|
||||||
|
{
|
||||||
|
const BODY_SUBDIR = 'public/plagiarism/body_only';
|
||||||
|
|
||||||
|
const W_NS = 'http://schemas.openxmlformats.org/wordprocessingml/2006/main';
|
||||||
|
|
||||||
|
/** @var DOMDocument */
|
||||||
|
private $dom;
|
||||||
|
|
||||||
|
/** @var DOMElement */
|
||||||
|
private $bodyNode;
|
||||||
|
|
||||||
|
/** @var array<int,DOMElement> */
|
||||||
|
private $blocks = [];
|
||||||
|
|
||||||
|
/** @var array<int,string> */
|
||||||
|
private $blockTexts = [];
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return array{path:string, rel_path:string, line_count:int, ref_start:int, body_start:int, warnings:array}
|
||||||
|
*/
|
||||||
|
public function buildBodyOnlyDocx($sourcePath, $articleId = 0)
|
||||||
|
{
|
||||||
|
$sourcePath = trim((string) $sourcePath);
|
||||||
|
if (!is_file($sourcePath) || !is_readable($sourcePath)) {
|
||||||
|
throw new Exception('Manuscript not readable: ' . $sourcePath);
|
||||||
|
}
|
||||||
|
$ext = strtolower(pathinfo($sourcePath, PATHINFO_EXTENSION));
|
||||||
|
if ($ext !== 'docx') {
|
||||||
|
throw new Exception('body_only check requires DOCX manuscript, got: ' . $ext);
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->loadDocumentBlocks($sourcePath);
|
||||||
|
if (empty($this->blocks)) {
|
||||||
|
throw new Exception('No content blocks in manuscript');
|
||||||
|
}
|
||||||
|
|
||||||
|
$refStart = $this->findReferenceStartIndex();
|
||||||
|
$bodyStart = $this->findBodyStartIndex();
|
||||||
|
$warnings = [];
|
||||||
|
|
||||||
|
if ($refStart < 0) {
|
||||||
|
$warnings[] = 'references_heading_not_found; using document end';
|
||||||
|
$refStart = count($this->blocks);
|
||||||
|
}
|
||||||
|
if ($bodyStart >= $refStart) {
|
||||||
|
throw new Exception('Could not locate main body (front matter may include entire document)');
|
||||||
|
}
|
||||||
|
|
||||||
|
$kept = 0;
|
||||||
|
for ($i = $bodyStart; $i < $refStart; $i++) {
|
||||||
|
if (trim($this->blockTexts[$i]) !== '') {
|
||||||
|
$kept++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ($kept < 3) {
|
||||||
|
throw new Exception('Body content too short after extraction (' . $kept . ' non-empty blocks)');
|
||||||
|
}
|
||||||
|
|
||||||
|
$relPath = $this->sliceDocxToNewFile($sourcePath, $articleId, $bodyStart, $refStart);
|
||||||
|
$rootDir = rtrim(ROOT_PATH ?: dirname(dirname(__DIR__)), '/\\');
|
||||||
|
$absPath = $rootDir . DIRECTORY_SEPARATOR . str_replace(['/', '\\'], DIRECTORY_SEPARATOR, $relPath);
|
||||||
|
|
||||||
|
return [
|
||||||
|
'path' => $absPath,
|
||||||
|
'rel_path' => $relPath,
|
||||||
|
'line_count' => $kept,
|
||||||
|
'ref_start' => $refStart,
|
||||||
|
'body_start' => $bodyStart,
|
||||||
|
'warnings' => $warnings,
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
private function loadDocumentBlocks($sourcePath)
|
||||||
|
{
|
||||||
|
$zip = new ZipArchive();
|
||||||
|
if ($zip->open($sourcePath) !== true) {
|
||||||
|
throw new Exception('Cannot open docx: ' . $sourcePath);
|
||||||
|
}
|
||||||
|
$xml = $zip->getFromName('word/document.xml');
|
||||||
|
$zip->close();
|
||||||
|
if ($xml === false || $xml === '') {
|
||||||
|
throw new Exception('word/document.xml missing in docx');
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->dom = new DOMDocument();
|
||||||
|
$this->dom->preserveWhiteSpace = false;
|
||||||
|
$this->dom->formatOutput = false;
|
||||||
|
if (@$this->dom->loadXML($xml) === false) {
|
||||||
|
throw new Exception('Invalid word/document.xml');
|
||||||
|
}
|
||||||
|
|
||||||
|
$xpath = new DOMXPath($this->dom);
|
||||||
|
$xpath->registerNamespace('w', self::W_NS);
|
||||||
|
$body = $xpath->query('//w:body')->item(0);
|
||||||
|
if (!$body instanceof DOMElement) {
|
||||||
|
throw new Exception('w:body not found');
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->bodyNode = $body;
|
||||||
|
$this->blocks = [];
|
||||||
|
$this->blockTexts = [];
|
||||||
|
|
||||||
|
foreach ($body->childNodes as $child) {
|
||||||
|
if ($child->nodeType !== XML_ELEMENT_NODE) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
/** @var DOMElement $child */
|
||||||
|
if ($child->localName === 'sectPr') {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
$this->blocks[] = $child;
|
||||||
|
$this->blockTexts[] = $this->extractVisibleTextFromBlock($child);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 仅拼接 w:t 可见文本,忽略 w:instrText 等域指令(避免 Zotero JSON 参与裁切判断)。
|
||||||
|
*/
|
||||||
|
private function extractVisibleTextFromBlock(DOMElement $block)
|
||||||
|
{
|
||||||
|
$xpath = new DOMXPath($block->ownerDocument);
|
||||||
|
$xpath->registerNamespace('w', self::W_NS);
|
||||||
|
$nodes = $xpath->query('.//w:t', $block);
|
||||||
|
if (!$nodes || $nodes->length === 0) {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
$parts = [];
|
||||||
|
foreach ($nodes as $node) {
|
||||||
|
$parts[] = $node->textContent;
|
||||||
|
}
|
||||||
|
$text = preg_replace('/\s+/u', ' ', implode('', $parts));
|
||||||
|
return trim((string) $text);
|
||||||
|
}
|
||||||
|
|
||||||
|
private function sliceDocxToNewFile($sourcePath, $articleId, $bodyStart, $refStart)
|
||||||
|
{
|
||||||
|
$rootDir = rtrim(ROOT_PATH ?: dirname(dirname(__DIR__)), '/\\');
|
||||||
|
$dir = $rootDir . DIRECTORY_SEPARATOR . self::BODY_SUBDIR;
|
||||||
|
if (!is_dir($dir)) {
|
||||||
|
@mkdir($dir, 0755, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
$name = sprintf('body_article_%d_%s.docx', intval($articleId), date('Ymd_His'));
|
||||||
|
$absPath = $dir . DIRECTORY_SEPARATOR . $name;
|
||||||
|
|
||||||
|
if (!copy($sourcePath, $absPath)) {
|
||||||
|
throw new Exception('Failed to copy source docx');
|
||||||
|
}
|
||||||
|
|
||||||
|
$n = count($this->blocks);
|
||||||
|
|
||||||
|
$zip = new ZipArchive();
|
||||||
|
if ($zip->open($absPath) !== true) {
|
||||||
|
throw new Exception('Cannot open output docx');
|
||||||
|
}
|
||||||
|
|
||||||
|
$xml = $zip->getFromName('word/document.xml');
|
||||||
|
if ($xml === false) {
|
||||||
|
$zip->close();
|
||||||
|
throw new Exception('document.xml missing in output docx');
|
||||||
|
}
|
||||||
|
|
||||||
|
$outDom = new DOMDocument();
|
||||||
|
$outDom->preserveWhiteSpace = false;
|
||||||
|
$outDom->formatOutput = false;
|
||||||
|
if (@$outDom->loadXML($xml) === false) {
|
||||||
|
$zip->close();
|
||||||
|
throw new Exception('Invalid document.xml in output docx');
|
||||||
|
}
|
||||||
|
|
||||||
|
$xpath = new DOMXPath($outDom);
|
||||||
|
$xpath->registerNamespace('w', self::W_NS);
|
||||||
|
$body = $xpath->query('//w:body')->item(0);
|
||||||
|
if (!$body instanceof DOMElement) {
|
||||||
|
$zip->close();
|
||||||
|
throw new Exception('w:body not found in output docx');
|
||||||
|
}
|
||||||
|
|
||||||
|
$children = [];
|
||||||
|
foreach ($body->childNodes as $child) {
|
||||||
|
if ($child->nodeType === XML_ELEMENT_NODE) {
|
||||||
|
$children[] = $child;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$blockIdx = 0;
|
||||||
|
foreach ($children as $child) {
|
||||||
|
if (!($child instanceof DOMElement)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if ($child->localName === 'sectPr') {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if ($blockIdx < $bodyStart || $blockIdx >= $refStart) {
|
||||||
|
if ($child->parentNode) {
|
||||||
|
$child->parentNode->removeChild($child);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$blockIdx++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($blockIdx !== $n) {
|
||||||
|
$zip->close();
|
||||||
|
@unlink($absPath);
|
||||||
|
throw new Exception('Document block count mismatch during slice');
|
||||||
|
}
|
||||||
|
|
||||||
|
$zip->addFromString('word/document.xml', $outDom->saveXML());
|
||||||
|
$zip->close();
|
||||||
|
|
||||||
|
if (!is_file($absPath) || filesize($absPath) < 200) {
|
||||||
|
throw new Exception('Failed to write body-only docx');
|
||||||
|
}
|
||||||
|
|
||||||
|
return self::BODY_SUBDIR . '/' . $name;
|
||||||
|
}
|
||||||
|
|
||||||
|
private function findReferenceStartIndex()
|
||||||
|
{
|
||||||
|
$stopKeywords = [
|
||||||
|
'acknowledgements', 'acknowledgments', 'funding', 'appendix', 'supplementary',
|
||||||
|
'conflict of interest', 'competing interests', 'author contributions',
|
||||||
|
'致谢', '基金', '附录', '补充材料', '利益冲突', '作者贡献',
|
||||||
|
];
|
||||||
|
|
||||||
|
foreach ($this->blockTexts as $i => $line) {
|
||||||
|
$t = trim($line);
|
||||||
|
if ($t === '') {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (preg_match('/^\s*(references|reference|bibliography|参考文献|文献)\b\s*[::]?\s*/iu', $t)) {
|
||||||
|
return $i;
|
||||||
|
}
|
||||||
|
$lower = strtolower($t);
|
||||||
|
foreach ($stopKeywords as $sk) {
|
||||||
|
$skLower = strtolower($sk);
|
||||||
|
if ($lower === $skLower || $lower === $skLower . ':' || $lower === $skLower . ':') {
|
||||||
|
if ($i > count($this->blockTexts) * 0.4) {
|
||||||
|
return $i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
private function findBodyStartIndex()
|
||||||
|
{
|
||||||
|
$n = count($this->blockTexts);
|
||||||
|
$introIdx = -1;
|
||||||
|
$keywordsIdx = -1;
|
||||||
|
|
||||||
|
for ($i = 0; $i < $n; $i++) {
|
||||||
|
$t = trim($this->blockTexts[$i]);
|
||||||
|
if ($t === '') {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if ($introIdx < 0 && $this->isIntroductionHeading($t)) {
|
||||||
|
$introIdx = $i;
|
||||||
|
}
|
||||||
|
if ($keywordsIdx < 0 && preg_match('/^\s*keywords?\b\s*[::]?/iu', $t)) {
|
||||||
|
$keywordsIdx = $i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($introIdx >= 0) {
|
||||||
|
return $introIdx;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($keywordsIdx >= 0) {
|
||||||
|
$afterKw = $this->indexAfterKeywordsBlock($keywordsIdx);
|
||||||
|
if ($afterKw < $n) {
|
||||||
|
return $afterKw;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return $this->indexAfterFrontMatterFallback();
|
||||||
|
}
|
||||||
|
|
||||||
|
private function isIntroductionHeading($t)
|
||||||
|
{
|
||||||
|
if (preg_match('/^\s*(introduction|background|materials and methods|materials & methods|methods and materials)\b\s*[::]?/iu', $t)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (preg_match('/^\s*(引言|前言|背景|材料与方法|资料与方法|研究方法)\b\s*[::]?/iu', $t)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (preg_match('/^\s*1[\.\s、]+(introduction|引言|前言)\b/iu', $t)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private function indexAfterKeywordsBlock($kwIdx)
|
||||||
|
{
|
||||||
|
$n = count($this->blockTexts);
|
||||||
|
for ($i = $kwIdx + 1; $i < $n; $i++) {
|
||||||
|
$t = trim($this->blockTexts[$i]);
|
||||||
|
if ($t === '') {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if ($this->isIntroductionHeading($t)) {
|
||||||
|
return $i;
|
||||||
|
}
|
||||||
|
if (preg_match('/^\s*abstract\b/iu', $t)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (mb_strlen($t) >= 30 && !$this->looksLikeAffiliationLine($t)) {
|
||||||
|
return $i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return min($kwIdx + 1, $n - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
private function indexAfterFrontMatterFallback()
|
||||||
|
{
|
||||||
|
$n = count($this->blockTexts);
|
||||||
|
$maxSkip = min(20, (int) floor($n * 0.15));
|
||||||
|
for ($i = 0; $i < $maxSkip && $i < $n; $i++) {
|
||||||
|
$t = trim($this->blockTexts[$i]);
|
||||||
|
if ($t === '') {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if ($this->isIntroductionHeading($t)) {
|
||||||
|
return $i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return min(8, max(0, $n - 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
private function looksLikeAffiliationLine($t)
|
||||||
|
{
|
||||||
|
if (preg_match('/@|mailto:|correspond|univ|university|hospital|institute|department|^\d+[\s,,]/iu', $t)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (preg_match('/^\s*abstract\b/iu', $t) || preg_match('/^\s*keywords?\b/iu', $t)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -12,10 +12,14 @@ use think\Exception;
|
|||||||
* 并维护 t_plagiarism_check 状态机。
|
* 并维护 t_plagiarism_check 状态机。
|
||||||
*
|
*
|
||||||
* 状态流:
|
* 状态流:
|
||||||
* submit() → state=1(上传中),入队 PlagiarismRun
|
* submit() → state=1(上传中),入队 PlagiarismRun
|
||||||
* PlagiarismRun.fire → 上传 + 触发 similarity → state=2(比对中),入队 PlagiarismPoll
|
* PlagiarismRun → 创建 submission + 上传文件 → 入队 PlagiarismWaitIngest
|
||||||
* PlagiarismPoll.fire → 轮询 status,完成后下载 PDF → state=3(完成)
|
* PlagiarismWaitIngest → 单次 GET submission 状态;就绪则入队 PlagiarismTriggerSimilarity,否则延迟再入队
|
||||||
* 任意环节抛异常 → state=4(失败),写 error_msg
|
* PlagiarismTriggerSimilarity → PUT similarity → state=2(比对中),入队 PlagiarismPoll
|
||||||
|
* PlagiarismPoll → 轮询 similarity,完成后下载 PDF → state=3(完成);在线 viewer URL 按需 getReportUrl 调用 refreshViewerUrlFor
|
||||||
|
* 任意环节抛异常 → state=4(失败),写 error_msg
|
||||||
|
*
|
||||||
|
* Worker:请用 `queue:work` 消费队列 **plagiarism**(整条链与轮询均在此队列;若此前单独监听 PlagiarismRun / PlagiarismPoll,需改为 plagiarism)。
|
||||||
*/
|
*/
|
||||||
class PlagiarismService
|
class PlagiarismService
|
||||||
{
|
{
|
||||||
@@ -24,6 +28,16 @@ class PlagiarismService
|
|||||||
*/
|
*/
|
||||||
const REPORT_DIR = 'public/plagiarism';
|
const REPORT_DIR = 'public/plagiarism';
|
||||||
|
|
||||||
|
/** Run / WaitIngest / TriggerSimilarity / Poll 共用队列名 */
|
||||||
|
const QUEUE_CHAIN = 'plagiarism';
|
||||||
|
|
||||||
|
const CHECK_TYPE_FULL = 'full';
|
||||||
|
const CHECK_TYPE_BODY = 'body_only';
|
||||||
|
|
||||||
|
const JOB_WAIT_INGEST = 'app\\api\\job\\PlagiarismWaitIngest';
|
||||||
|
const JOB_TRIGGER_SIM = 'app\\api\\job\\PlagiarismTriggerSimilarity';
|
||||||
|
const JOB_POLL = 'app\\api\\job\\PlagiarismPoll';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 轮询间隔(秒)。Turnitin 一般 1-5 分钟出结果,30 秒一次比较合适
|
* 轮询间隔(秒)。Turnitin 一般 1-5 分钟出结果,30 秒一次比较合适
|
||||||
*/
|
*/
|
||||||
@@ -50,104 +64,229 @@ class PlagiarismService
|
|||||||
* @param string $filePath 本地可读的 PDF/DOCX 绝对路径
|
* @param string $filePath 本地可读的 PDF/DOCX 绝对路径
|
||||||
* @param int $triggeredBy 触发人 user_id(手工触发时编辑后台的 user_id)
|
* @param int $triggeredBy 触发人 user_id(手工触发时编辑后台的 user_id)
|
||||||
* @param string $source 'manual' / 'auto_xxx'
|
* @param string $source 'manual' / 'auto_xxx'
|
||||||
|
* @param string $checkType full | body_only
|
||||||
* @return int check_id
|
* @return int check_id
|
||||||
*/
|
*/
|
||||||
public function submit($articleId, $filePath, $triggeredBy = 0, $source = 'manual')
|
public function submit($articleId, $filePath, $triggeredBy = 0, $source = 'manual', $checkType = self::CHECK_TYPE_FULL)
|
||||||
{
|
{
|
||||||
if (!is_file($filePath) || !is_readable($filePath)) {
|
if (!is_file($filePath) || !is_readable($filePath)) {
|
||||||
throw new Exception("File not readable: {$filePath}");
|
throw new Exception("File not readable: {$filePath}");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$checkType = $this->normalizeCheckType($checkType);
|
||||||
|
$uploadPath = $filePath;
|
||||||
|
$derivedRel = '';
|
||||||
|
$sourceName = basename($filePath);
|
||||||
|
|
||||||
|
if ($checkType === self::CHECK_TYPE_BODY) {
|
||||||
|
$built = (new ManuscriptBodyExtractor())->buildBodyOnlyDocx($filePath, $articleId);
|
||||||
|
$uploadPath = $built['path'];
|
||||||
|
$derivedRel = (string) $built['rel_path'];
|
||||||
|
$sourceName = basename($uploadPath);
|
||||||
|
if (!empty($built['warnings'])) {
|
||||||
|
$this->log('body_only warnings check article=' . $articleId . ' ' . implode('; ', $built['warnings']));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
$journalId = (int) Db::name('article')
|
$journalId = (int) Db::name('article')
|
||||||
->where('article_id', $articleId)
|
->where('article_id', $articleId)
|
||||||
->value('journal_id');
|
->value('journal_id');
|
||||||
|
|
||||||
|
$this->log("plagiarism submit type={$checkType} article={$articleId}");
|
||||||
$now = time();
|
$now = time();
|
||||||
$checkId = Db::name('plagiarism_check')->insertGetId([
|
$row = [
|
||||||
'article_id' => $articleId,
|
'article_id' => $articleId,
|
||||||
'journal_id' => $journalId,
|
'journal_id' => $journalId,
|
||||||
'triggered_by' => $triggeredBy,
|
'triggered_by' => $triggeredBy,
|
||||||
'trigger_source' => $source,
|
'trigger_source' => $source,
|
||||||
'state' => 1, // 上传中
|
'check_type' => $checkType,
|
||||||
'source_file_name' => basename($filePath),
|
'state' => 1,
|
||||||
'source_file_size' => filesize($filePath) ?: 0,
|
'source_file_name' => $sourceName,
|
||||||
|
'source_file_size' => filesize($uploadPath) ?: 0,
|
||||||
'ctime' => $now,
|
'ctime' => $now,
|
||||||
'utime' => $now,
|
'utime' => $now,
|
||||||
]);
|
];
|
||||||
$this->log("submit service act");
|
if ($derivedRel !== '') {
|
||||||
// 入队执行:上传 + 触发 similarity
|
$row['derived_file_path'] = $derivedRel;
|
||||||
|
}
|
||||||
|
$checkId = Db::name('plagiarism_check')->insertGetId($row);
|
||||||
Queue::push(
|
Queue::push(
|
||||||
'app\\api\\job\\PlagiarismRun',
|
'app\\api\\job\\PlagiarismRun',
|
||||||
['check_id' => $checkId, 'file_path' => $filePath],
|
['check_id' => $checkId, 'file_path' => $uploadPath],
|
||||||
'PlagiarismRun'
|
self::QUEUE_CHAIN
|
||||||
);
|
);
|
||||||
|
|
||||||
return (int)$checkId;
|
return (int) $checkId;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Job 调用:上传文件到 Turnitin 并触发 similarity,然后入队 PlagiarismPoll
|
* 同时提交全文 + 正文两次查重
|
||||||
|
* @return array{full:int, body_only:int}
|
||||||
|
*/
|
||||||
|
public function submitBoth($articleId, $filePath, $triggeredBy = 0, $source = 'manual')
|
||||||
|
{
|
||||||
|
return [
|
||||||
|
'full' => $this->submit($articleId, $filePath, $triggeredBy, $source, self::CHECK_TYPE_FULL),
|
||||||
|
'body_only' => $this->submit($articleId, $filePath, $triggeredBy, $source, self::CHECK_TYPE_BODY),
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
private function normalizeCheckType($checkType)
|
||||||
|
{
|
||||||
|
$t = strtolower(trim((string) $checkType));
|
||||||
|
if ($t === '' || $t === self::CHECK_TYPE_FULL || $t === 'full') {
|
||||||
|
return self::CHECK_TYPE_FULL;
|
||||||
|
}
|
||||||
|
if ($t === self::CHECK_TYPE_BODY || $t === 'body' || $t === 'bodyonly') {
|
||||||
|
return self::CHECK_TYPE_BODY;
|
||||||
|
}
|
||||||
|
throw new Exception('invalid check_type, use full or body_only');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Job 调用:仅创建 submission + 上传文件,随后由 PlagiarismWaitIngest 链式轮询 ingest,再 PlagiarismTriggerSimilarity。
|
||||||
|
*/
|
||||||
|
public function runUploadOnly($checkId, $filePath)
|
||||||
|
{
|
||||||
|
$check = $this->mustGetCheck($checkId);
|
||||||
|
$this->log('runUploadOnly start check_id=' . $checkId);
|
||||||
|
$tii = new TurnitinService();
|
||||||
|
|
||||||
|
$articleTitle = (string) Db::name('article')
|
||||||
|
->where('article_id', $check['article_id'])
|
||||||
|
->value('title');
|
||||||
|
if ($articleTitle === '') {
|
||||||
|
$articleTitle = 'Article #' . $check['article_id'];
|
||||||
|
}
|
||||||
|
|
||||||
|
$createResp = $tii->createSubmission([
|
||||||
|
'title' => mb_substr($articleTitle, 0, 250),
|
||||||
|
'owner' => 'editor_' . $check['triggered_by'],
|
||||||
|
'submitter' => 'editor_' . $check['triggered_by'],
|
||||||
|
'metadata' => [
|
||||||
|
'article_id' => (string) $check['article_id'],
|
||||||
|
'check_id' => (string) $check['check_id'],
|
||||||
|
],
|
||||||
|
]);
|
||||||
|
$submissionId = isset($createResp['id']) ? $createResp['id'] : '';
|
||||||
|
if ($submissionId === '') {
|
||||||
|
throw new Exception('Turnitin createSubmission returned empty id: ' . json_encode($createResp));
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->updateCheck($checkId, [
|
||||||
|
'tii_submission_id' => $submissionId,
|
||||||
|
'raw_response' => json_encode($createResp, JSON_UNESCAPED_UNICODE),
|
||||||
|
]);
|
||||||
|
|
||||||
|
$tii->uploadFile($submissionId, $filePath, basename($filePath));
|
||||||
|
$firstDelay = $this->ingestChainFirstDelaySec();
|
||||||
|
Queue::later(
|
||||||
|
$firstDelay,
|
||||||
|
self::JOB_WAIT_INGEST,
|
||||||
|
['check_id' => $checkId, 'attempt' => 1],
|
||||||
|
self::QUEUE_CHAIN
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 单次 ingest 检查(由 PlagiarismWaitIngest 调用)。不在本方法内 sleep 长循环。
|
||||||
|
*/
|
||||||
|
public function runIngestPollStep($checkId, $attempt = 1)
|
||||||
|
{
|
||||||
|
$check = $this->mustGetCheck($checkId);
|
||||||
|
if (empty($check['tii_submission_id'])) {
|
||||||
|
$this->markFailed($checkId, '[ingest] tii_submission_id empty');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
$this->log("runIngestPollStep is running");
|
||||||
|
$maxAttempts = $this->ingestChainMaxAttempts();
|
||||||
|
$interval = $this->ingestChainPollIntervalSec();
|
||||||
|
$tii = new TurnitinService();
|
||||||
|
|
||||||
|
try {
|
||||||
|
$parsed = $tii->parseSubmissionIngestState($check['tii_submission_id']);
|
||||||
|
} catch (\Throwable $e) {
|
||||||
|
if ($attempt >= $maxAttempts) {
|
||||||
|
$this->markFailed($checkId, '[ingest] request failed after ' . $attempt . ' tries: ' . $e->getMessage());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
Queue::later($interval, self::JOB_WAIT_INGEST, ['check_id' => $checkId, 'attempt' => $attempt + 1], self::QUEUE_CHAIN);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!empty($parsed['failed'])) {
|
||||||
|
$this->markFailed($checkId, '[ingest] submission failed status=' . $parsed['status'] . ' ' . $parsed['snippet']);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!empty($parsed['ready'])) {
|
||||||
|
Queue::push(self::JOB_TRIGGER_SIM, ['check_id' => $checkId, 'ingest_attempt' => $attempt], self::QUEUE_CHAIN);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if ($attempt >= $maxAttempts) {
|
||||||
|
$this->markFailed($checkId, '[ingest] timeout last_status=' . ($parsed['status'] !== '' ? $parsed['status'] : '(empty)'));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
Queue::later($interval, self::JOB_WAIT_INGEST, ['check_id' => $checkId, 'attempt' => $attempt + 1], self::QUEUE_CHAIN);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 在 ingest 就绪后触发 similarity,并入队 PlagiarismPoll。
|
||||||
|
* 若仍返回 409,则重新入队 PlagiarismWaitIngest(不抛异常,避免误标失败)。
|
||||||
|
*
|
||||||
|
* @param int $ingestAttempt 来自 WaitIngest 的 attempt,供 409 时继续轮询
|
||||||
|
*/
|
||||||
|
public function runTriggerSimilarityOnly($checkId, $ingestAttempt = 1)
|
||||||
|
{
|
||||||
|
$check = $this->mustGetCheck($checkId);
|
||||||
|
if (empty($check['tii_submission_id'])) {
|
||||||
|
$this->markFailed($checkId, '[similarity] tii_submission_id empty');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->log("runTriggerSimilarityOnly is running");
|
||||||
|
$tii = new TurnitinService();
|
||||||
|
$sid = $check['tii_submission_id'];
|
||||||
|
|
||||||
|
try {
|
||||||
|
$simResp = $tii->triggerSimilarity($sid);
|
||||||
|
} catch (\Throwable $e) {
|
||||||
|
$msg = $e->getMessage();
|
||||||
|
$is409 = (stripos($msg, '409') !== false || stripos($msg, 'CONFLICT') !== false)
|
||||||
|
&& (stripos($msg, 'not been completed') !== false || stripos($msg, 'completed yet') !== false);
|
||||||
|
if ($is409) {
|
||||||
|
$maxAttempts = $this->ingestChainMaxAttempts();
|
||||||
|
$next = $ingestAttempt + 1;
|
||||||
|
if ($next > $maxAttempts) {
|
||||||
|
$this->markFailed($checkId, '[similarity] still not ready after ingest attempts: ' . $msg);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
$delay = max($this->ingestChainPollIntervalSec(), 20);
|
||||||
|
Queue::later($delay, self::JOB_WAIT_INGEST, ['check_id' => $checkId, 'attempt' => $next], self::QUEUE_CHAIN);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
throw $e;
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->updateCheck($checkId, [
|
||||||
|
'state' => 2,
|
||||||
|
'tii_report_status' => 'PROCESSING',
|
||||||
|
'raw_response' => json_encode($simResp, JSON_UNESCAPED_UNICODE),
|
||||||
|
]);
|
||||||
|
|
||||||
|
Queue::later(
|
||||||
|
self::POLL_INTERVAL,
|
||||||
|
self::JOB_POLL,
|
||||||
|
['check_id' => $checkId, 'attempt' => 1],
|
||||||
|
self::QUEUE_CHAIN
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @deprecated 与 runUploadOnly 等价;长耗时 ingest 已拆到队列 PlagiarismWaitIngest,勿在本方法内同步 wait。
|
||||||
*/
|
*/
|
||||||
public function runUploadAndTrigger($checkId, $filePath)
|
public function runUploadAndTrigger($checkId, $filePath)
|
||||||
{
|
{
|
||||||
$check = $this->mustGetCheck($checkId);
|
$this->runUploadOnly($checkId, $filePath);
|
||||||
$this->log("runUploadAndTrigger is act0");
|
|
||||||
try {
|
|
||||||
$tii = new TurnitinService();
|
|
||||||
|
|
||||||
// 1. 创建 submission
|
|
||||||
$articleTitle = (string) Db::name('article')
|
|
||||||
->where('article_id', $check['article_id'])
|
|
||||||
->value('title');
|
|
||||||
if ($articleTitle === '') {
|
|
||||||
$articleTitle = 'Article #' . $check['article_id'];
|
|
||||||
}
|
|
||||||
$this->log("runUploadAndTrigger is act1");
|
|
||||||
$createResp = $tii->createSubmission([
|
|
||||||
'title' => mb_substr($articleTitle, 0, 250),
|
|
||||||
'owner' => 'editor_' . $check['triggered_by'],
|
|
||||||
'submitter' => 'editor_' . $check['triggered_by'],
|
|
||||||
'metadata' => [
|
|
||||||
'article_id' => (string)$check['article_id'],
|
|
||||||
'check_id' => (string)$check['check_id'],
|
|
||||||
],
|
|
||||||
]);
|
|
||||||
$submissionId = isset($createResp['id']) ? $createResp['id'] : '';
|
|
||||||
if ($submissionId === '') {
|
|
||||||
throw new Exception('Turnitin createSubmission returned empty id: ' . json_encode($createResp));
|
|
||||||
}
|
|
||||||
|
|
||||||
$this->updateCheck($checkId, [
|
|
||||||
'tii_submission_id' => $submissionId,
|
|
||||||
'raw_response' => json_encode($createResp, JSON_UNESCAPED_UNICODE),
|
|
||||||
]);
|
|
||||||
$this->log("runUploadAndTrigger is act2");
|
|
||||||
// 2. 上传文件
|
|
||||||
$tii->uploadFile($submissionId, $filePath, basename($filePath));
|
|
||||||
|
|
||||||
// 3. 触发 similarity
|
|
||||||
$simResp = $tii->triggerSimilarity($submissionId);
|
|
||||||
|
|
||||||
$this->updateCheck($checkId, [
|
|
||||||
'state' => 2, // 比对中
|
|
||||||
'tii_report_status' => 'PROCESSING',
|
|
||||||
'raw_response' => json_encode($simResp, JSON_UNESCAPED_UNICODE),
|
|
||||||
]);
|
|
||||||
|
|
||||||
$this->log("runUploadAndTrigger is act3");
|
|
||||||
|
|
||||||
// 4. 排队首次轮询(晚一点开始,让 Turnitin 先处理)
|
|
||||||
Queue::later(
|
|
||||||
self::POLL_INTERVAL,
|
|
||||||
'app\\api\\job\\PlagiarismPoll',
|
|
||||||
['check_id' => $checkId, 'attempt' => 1],
|
|
||||||
'PlagiarismPoll'
|
|
||||||
);
|
|
||||||
} catch (\Throwable $e) {
|
|
||||||
$this->markFailed($checkId, '[upload] ' . $e->getMessage());
|
|
||||||
throw $e;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -173,20 +312,25 @@ class PlagiarismService
|
|||||||
]);
|
]);
|
||||||
|
|
||||||
if ($status === 'COMPLETE') {
|
if ($status === 'COMPLETE') {
|
||||||
$score = isset($statusResp['overall_match_percentage'])
|
$score = TurnitinService::extractOverallMatchPercentage($statusResp);
|
||||||
? floatval($statusResp['overall_match_percentage']) : 0;
|
if ($score <= 0 && isset($statusResp['overall_match_percentage'])) {
|
||||||
|
$score = floatval($statusResp['overall_match_percentage']);
|
||||||
|
}
|
||||||
|
$this->log('poll complete check_id=' . $checkId . ' score=' . $score
|
||||||
|
. ' check_type=' . ($check['check_type'] ?? 'full'));
|
||||||
|
|
||||||
// 下载 PDF + 取在线查看 URL
|
|
||||||
$localPdf = $this->downloadAndStorePdf($tii, $check['tii_submission_id'], $checkId);
|
$localPdf = $this->downloadAndStorePdf($tii, $check['tii_submission_id'], $checkId);
|
||||||
$viewerInfo = $this->refreshViewerUrl($tii, $check['tii_submission_id']);
|
|
||||||
|
$meta = TurnitinService::parseSimilarityReportMeta($statusResp);
|
||||||
|
if ($meta['score'] > 0) {
|
||||||
|
$score = $meta['score'];
|
||||||
|
}
|
||||||
|
|
||||||
$this->updateCheck($checkId, [
|
$this->updateCheck($checkId, [
|
||||||
'state' => 3,
|
'state' => 3,
|
||||||
'similarity_score' => $score,
|
'similarity_score' => $score,
|
||||||
'pdf_local_path' => $localPdf,
|
'pdf_local_path' => $localPdf,
|
||||||
'view_only_url' => $viewerInfo['url'],
|
'error_msg' => '',
|
||||||
'view_only_url_expire' => $viewerInfo['expire'],
|
|
||||||
'error_msg' => '',
|
|
||||||
]);
|
]);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -197,25 +341,23 @@ class PlagiarismService
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// PROCESSING 或其它中间态:继续轮询
|
|
||||||
if ($attempt >= self::MAX_POLL_ATTEMPTS) {
|
if ($attempt >= self::MAX_POLL_ATTEMPTS) {
|
||||||
$this->markFailed($checkId, '[poll] timeout after ' . $attempt . ' attempts');
|
$this->markFailed($checkId, '[poll] timeout after ' . $attempt . ' attempts');
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
Queue::later(
|
Queue::later(
|
||||||
self::POLL_INTERVAL,
|
self::POLL_INTERVAL,
|
||||||
'app\\api\\job\\PlagiarismPoll',
|
self::JOB_POLL,
|
||||||
['check_id' => $checkId, 'attempt' => $attempt + 1],
|
['check_id' => $checkId, 'attempt' => $attempt + 1],
|
||||||
'plagiarism'
|
self::QUEUE_CHAIN
|
||||||
);
|
);
|
||||||
} catch (\Throwable $e) {
|
} catch (\Throwable $e) {
|
||||||
// 网络抖动不要直接 fail,给一定容错次数
|
|
||||||
if ($attempt < self::MAX_POLL_ATTEMPTS) {
|
if ($attempt < self::MAX_POLL_ATTEMPTS) {
|
||||||
Queue::later(
|
Queue::later(
|
||||||
self::POLL_INTERVAL,
|
self::POLL_INTERVAL,
|
||||||
'app\\api\\job\\PlagiarismPoll',
|
self::JOB_POLL,
|
||||||
['check_id' => $checkId, 'attempt' => $attempt + 1],
|
['check_id' => $checkId, 'attempt' => $attempt + 1],
|
||||||
'plagiarism'
|
self::QUEUE_CHAIN
|
||||||
);
|
);
|
||||||
$this->updateCheck($checkId, [
|
$this->updateCheck($checkId, [
|
||||||
'attempts' => $attempt,
|
'attempts' => $attempt,
|
||||||
@@ -229,42 +371,71 @@ class PlagiarismService
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 重新生成在线查看 URL(已有的过期了用)
|
* 按需获取/刷新 Turnitin 在线报告 URL(与 poll 解耦,避免 viewer-url 失败拖死查重完成)。
|
||||||
*
|
*
|
||||||
* @return array{url:string, expire:int, local_pdf:string}
|
* @param array $viewerContext editor_id=当前打开报告的编辑 user_id;viewer_user_id 可显式指定
|
||||||
|
* @return array{url:string, expire:int, local_pdf:string, viewer_user_id:string}
|
||||||
*/
|
*/
|
||||||
public function refreshViewerUrlFor($checkId)
|
public function refreshViewerUrlFor($checkId, array $viewerContext = [])
|
||||||
{
|
{
|
||||||
$check = $this->mustGetCheck($checkId);
|
$check = $this->mustGetCheck($checkId);
|
||||||
if (empty($check['tii_submission_id'])) {
|
if (empty($check['tii_submission_id'])) {
|
||||||
throw new Exception('check has no tii_submission_id');
|
throw new Exception('check has no tii_submission_id');
|
||||||
}
|
}
|
||||||
$tii = new TurnitinService();
|
$tii = new TurnitinService();
|
||||||
$info = $this->refreshViewerUrl($tii, $check['tii_submission_id']);
|
$info = $this->refreshViewerUrl($tii, $check['tii_submission_id'], $check, $viewerContext);
|
||||||
$this->updateCheck($checkId, [
|
$this->updateCheck($checkId, [
|
||||||
'view_only_url' => $info['url'],
|
'view_only_url' => $info['url'],
|
||||||
'view_only_url_expire' => $info['expire'],
|
'view_only_url_expire' => $info['expire'],
|
||||||
]);
|
]);
|
||||||
return [
|
return [
|
||||||
'url' => $info['url'],
|
'url' => $info['url'],
|
||||||
'expire' => $info['expire'],
|
'expire' => $info['expire'],
|
||||||
'local_pdf' => $check['pdf_local_path'],
|
'local_pdf' => $check['pdf_local_path'],
|
||||||
|
'viewer_user_id' => $info['viewer_user_id'],
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---------- 内部 ----------
|
// ---------- 内部 ----------
|
||||||
|
|
||||||
private function refreshViewerUrl($tii, $submissionId)
|
/**
|
||||||
|
* 调用 Turnitin POST viewer-url;仅由 refreshViewerUrlFor / getReportUrl 触发。
|
||||||
|
*/
|
||||||
|
private function refreshViewerUrl($tii, $submissionId, array $check = [], array $viewerContext = [])
|
||||||
{
|
{
|
||||||
$resp = $tii->getViewerUrl($submissionId);
|
$viewerOpts = $viewerContext;
|
||||||
|
if (!isset($viewerOpts['editor_id']) && !empty($check['triggered_by'])) {
|
||||||
|
$viewerOpts['triggered_by'] = intval($check['triggered_by']);
|
||||||
|
}
|
||||||
|
$viewerUserId = $tii->resolveViewerUserId($viewerOpts);
|
||||||
|
$resp = $tii->getViewerUrl($submissionId, $viewerOpts);
|
||||||
$url = '';
|
$url = '';
|
||||||
if (isset($resp['viewer_url'])) {
|
if (isset($resp['viewer_url'])) {
|
||||||
$url = (string)$resp['viewer_url'];
|
$url = (string) $resp['viewer_url'];
|
||||||
} elseif (isset($resp['url'])) {
|
} elseif (isset($resp['url'])) {
|
||||||
$url = (string)$resp['url'];
|
$url = (string) $resp['url'];
|
||||||
|
} elseif (isset($resp['launch_url'])) {
|
||||||
|
$url = (string) $resp['launch_url'];
|
||||||
}
|
}
|
||||||
// 默认 2 小时过期,保守起见
|
if ($url === '') {
|
||||||
return ['url' => $url, 'expire' => time() + 7200];
|
throw new Exception('viewer-url response has no url: ' . json_encode($resp, JSON_UNESCAPED_UNICODE));
|
||||||
|
}
|
||||||
|
$expire = time() + 7200;
|
||||||
|
foreach (['viewer_url_expires', 'expires_at', 'expiration_time', 'expire_time'] as $k) {
|
||||||
|
if (empty($resp[$k])) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
$ts = is_numeric($resp[$k]) ? intval($resp[$k]) : strtotime((string) $resp[$k]);
|
||||||
|
if ($ts > time()) {
|
||||||
|
$expire = $ts;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return [
|
||||||
|
'url' => $url,
|
||||||
|
'expire' => $expire,
|
||||||
|
'viewer_user_id' => $viewerUserId,
|
||||||
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -328,15 +499,30 @@ class PlagiarismService
|
|||||||
Db::name('plagiarism_check')->where('check_id', $checkId)->update($data);
|
Db::name('plagiarism_check')->where('check_id', $checkId)->update($data);
|
||||||
}
|
}
|
||||||
|
|
||||||
private function markFailed($checkId, $errMsg)
|
public function markFailed($checkId, $errMsg)
|
||||||
{
|
{
|
||||||
$this->log("markFailed act");
|
$this->log('markFailed check_id=' . $checkId);
|
||||||
$this->updateCheck($checkId, [
|
$this->updateCheck($checkId, [
|
||||||
'state' => 4,
|
'state' => 4,
|
||||||
'error_msg' => mb_substr($errMsg, 0, 1000),
|
'error_msg' => mb_substr($errMsg, 0, 1000),
|
||||||
]);
|
]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private function ingestChainFirstDelaySec()
|
||||||
|
{
|
||||||
|
return max(3, (int) Env::get('turnitin.ingest_chain_first_delay', 10));
|
||||||
|
}
|
||||||
|
|
||||||
|
private function ingestChainPollIntervalSec()
|
||||||
|
{
|
||||||
|
return max(60, (int) Env::get('turnitin.ingest_chain_poll_interval', 15));
|
||||||
|
}
|
||||||
|
|
||||||
|
private function ingestChainMaxAttempts()
|
||||||
|
{
|
||||||
|
return max(10, (int) Env::get('turnitin.ingest_chain_max_attempts', 80));
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 从 t_article_file 找到投稿主稿(manuscirpt)的本地绝对路径。
|
* 从 t_article_file 找到投稿主稿(manuscirpt)的本地绝对路径。
|
||||||
* file_url 在系统里可能是 URL 或相对路径,调用方负责保证可读。
|
* file_url 在系统里可能是 URL 或相对路径,调用方负责保证可读。
|
||||||
|
|||||||
@@ -18,6 +18,14 @@ use think\Exception;
|
|||||||
* API_KEY 生成的 Bearer token
|
* API_KEY 生成的 Bearer token
|
||||||
* INTEGRATION_NAME Scope Name(创建 integration 时填的名字)
|
* INTEGRATION_NAME Scope Name(创建 integration 时填的名字)
|
||||||
* INTEGRATION_VERSION 自定义版本号,便于审计 e.g. 1.0.0
|
* INTEGRATION_VERSION 自定义版本号,便于审计 e.g. 1.0.0
|
||||||
|
* SUBMISSION_INGEST_MAX_WAIT 上传后轮询 submission 就绪的最长秒数,默认 600(仅 waitAfterUploadForSimilarity 同步用)
|
||||||
|
* SUBMISSION_INGEST_POLL_INTERVAL 同步轮询间隔秒数,默认 3
|
||||||
|
* INGEST_CHAIN_FIRST_DELAY 上传后首次 ingest 检查延迟秒数,默认 10(队列链)
|
||||||
|
* INGEST_CHAIN_POLL_INTERVAL ingest 链每步间隔秒数,默认 15
|
||||||
|
* INGEST_CHAIN_MAX_ATTEMPTS ingest 链最大步数,默认 80
|
||||||
|
* EXCLUDE_QUOTES / EXCLUDE_BIBLIOGRAPHY / EXCLUDE_CITATIONS 0|1,默认 0(与 Crossref 网页手动查重更接近)
|
||||||
|
* VIEWER_DEFAULT_MODE match_overview | all_sources(默认 all_sources,便于按来源库分类查看)
|
||||||
|
* ADD_TO_INDEX 0|1,默认 1
|
||||||
*
|
*
|
||||||
* API 文档:https://developers.turnitin.com/docs/tca
|
* API 文档:https://developers.turnitin.com/docs/tca
|
||||||
*
|
*
|
||||||
@@ -36,8 +44,8 @@ class TurnitinService
|
|||||||
|
|
||||||
public function __construct()
|
public function __construct()
|
||||||
{
|
{
|
||||||
$this->baseUrl = rtrim(trim((string)Env::get('turnitin.base_url', '')), '/');
|
$this->baseUrl = rtrim(trim((string)Env::get('turnitin.base_url', 'https://crossref-20794.turnitin.com/api/v1')), '/');
|
||||||
$this->apiKey = trim((string)Env::get('turnitin.api_key', ''));
|
$this->apiKey = trim((string)Env::get('turnitin.api_key', 'c6315e8291a4433dae09ad5efdb8a89c'));
|
||||||
$this->integrationName = trim((string)Env::get('turnitin.integration_name', 'tmr'));
|
$this->integrationName = trim((string)Env::get('turnitin.integration_name', 'tmr'));
|
||||||
$this->integrationVersion = trim((string)Env::get('turnitin.integration_version', '1.0.0'));
|
$this->integrationVersion = trim((string)Env::get('turnitin.integration_version', '1.0.0'));
|
||||||
|
|
||||||
@@ -80,11 +88,13 @@ class TurnitinService
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* 上传文件到 submission
|
* 上传文件到 submission
|
||||||
* PUT /submissions/{id}/original/{filename}
|
*
|
||||||
|
* TCA 文档路径为 PUT /submissions/{id}/original(文件名仅通过 Content-Disposition 传递,
|
||||||
|
* 不要再拼在 URL 末尾;否则网关会 404,错误里常见 path 形如 //v1/submissions/.../original/xxx.docx)。
|
||||||
*
|
*
|
||||||
* @param string $submissionId
|
* @param string $submissionId
|
||||||
* @param string $filePath 本地 PDF/DOCX 路径
|
* @param string $filePath 本地 PDF/DOCX 路径
|
||||||
* @param string $filename 传给 Turnitin 的文件名(用于报告显示)
|
* @param string $filename 传给 Turnitin 的展示文件名(默认取 basename)
|
||||||
* @return array
|
* @return array
|
||||||
*/
|
*/
|
||||||
public function uploadFile($submissionId, $filePath, $filename = '')
|
public function uploadFile($submissionId, $filePath, $filename = '')
|
||||||
@@ -95,15 +105,20 @@ class TurnitinService
|
|||||||
if ($filename === '') {
|
if ($filename === '') {
|
||||||
$filename = basename($filePath);
|
$filename = basename($filePath);
|
||||||
}
|
}
|
||||||
|
// Content-Disposition 里避免未转义的双引号
|
||||||
|
$safeName = str_replace(['"', "\r", "\n"], '', $filename);
|
||||||
|
if ($safeName === '') {
|
||||||
|
$safeName = 'document.bin';
|
||||||
|
}
|
||||||
$body = file_get_contents($filePath);
|
$body = file_get_contents($filePath);
|
||||||
|
|
||||||
return $this->request(
|
return $this->request(
|
||||||
'PUT',
|
'PUT',
|
||||||
'/submissions/' . urlencode($submissionId) . '/original/' . rawurlencode($filename),
|
'/submissions/' . rawurlencode($submissionId) . '/original',
|
||||||
$body,
|
$body,
|
||||||
[
|
[
|
||||||
'Content-Type' => 'binary/octet-stream',
|
'Content-Type' => 'application/octet-stream',
|
||||||
'Content-Disposition' => 'inline; filename="' . $filename . '"',
|
'Content-Disposition' => 'attachment; filename="' . $safeName . '"',
|
||||||
]
|
]
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@@ -114,37 +129,340 @@ class TurnitinService
|
|||||||
*
|
*
|
||||||
* @param string $submissionId
|
* @param string $submissionId
|
||||||
* @param array $opts
|
* @param array $opts
|
||||||
* - generation_settings.search_repositories 默认 ['INTERNET','PUBLICATION','CROSSREF','CROSSREF_POSTED_CONTENT','SUBMITTED_WORK']
|
* - generation_settings.search_repositories 默认 ['INTERNET','PUBLICATION',...]
|
||||||
* - generation_settings.submission_auto_excludes bool
|
* - generation_settings.submission_auto_excludes **字符串数组**(如 [] 或具体仓库键),不可传 boolean(否则会 400)
|
||||||
* - view_settings.exclude_quotes / exclude_bibliography / exclude_citations / exclude_abstract / exclude_methods bool
|
* - generation_settings.auto_exclude_self_matching_scope 可选,如 'GROUP_CONTEXT'
|
||||||
|
* - view_settings.exclude_* 布尔排除项(与 TCA 文档一致)
|
||||||
* - indexing_settings.add_to_index bool 是否把本文加进 SUBMITTED_WORK 索引(一般 true)
|
* - indexing_settings.add_to_index bool 是否把本文加进 SUBMITTED_WORK 索引(一般 true)
|
||||||
* @return array
|
* @return array
|
||||||
*/
|
*/
|
||||||
public function triggerSimilarity($submissionId, $opts = [])
|
public function triggerSimilarity($submissionId, $opts = [])
|
||||||
{
|
{
|
||||||
$body = array_merge([
|
$body = array_merge($this->defaultSimilarityPayload(), $opts);
|
||||||
'generation_settings' => [
|
|
||||||
'search_repositories' => ['INTERNET', 'PUBLICATION', 'CROSSREF', 'CROSSREF_POSTED_CONTENT', 'SUBMITTED_WORK'],
|
|
||||||
'submission_auto_excludes' => true,
|
|
||||||
'auto_exclude_self_matching_scope' => 'GROUP_CONTEXT',
|
|
||||||
],
|
|
||||||
'view_settings' => [
|
|
||||||
'exclude_quotes' => true,
|
|
||||||
'exclude_bibliography' => true,
|
|
||||||
'exclude_citations' => true,
|
|
||||||
],
|
|
||||||
'indexing_settings' => [
|
|
||||||
'add_to_index' => true,
|
|
||||||
],
|
|
||||||
], $opts);
|
|
||||||
|
|
||||||
return $this->request(
|
return $this->request(
|
||||||
'PUT',
|
'PUT',
|
||||||
'/submissions/' . urlencode($submissionId) . '/similarity',
|
'/submissions/' . rawurlencode($submissionId) . '/similarity',
|
||||||
$body
|
$body
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* PUT /similarity 与 PDF 导出共用的默认参数。
|
||||||
|
* 此前固定 exclude_*=true 时,总相似度会低于 Crossref 网页手动查重(与「匹配来源编号/类型」无关)。
|
||||||
|
*/
|
||||||
|
public function defaultSimilarityPayload()
|
||||||
|
{
|
||||||
|
$scope = trim((string) Env::get('turnitin.auto_exclude_self_matching_scope', 'GROUP_CONTEXT'));
|
||||||
|
if ($scope === '') {
|
||||||
|
unset($scope);
|
||||||
|
}
|
||||||
|
|
||||||
|
$generation = [
|
||||||
|
'search_repositories' => ['INTERNET', 'PUBLICATION', 'CROSSREF', 'CROSSREF_POSTED_CONTENT', 'SUBMITTED_WORK'],
|
||||||
|
'submission_auto_excludes' => [],
|
||||||
|
];
|
||||||
|
if (isset($scope)) {
|
||||||
|
$generation['auto_exclude_self_matching_scope'] = $scope;
|
||||||
|
}
|
||||||
|
|
||||||
|
return [
|
||||||
|
'generation_settings' => $generation,
|
||||||
|
'view_settings' => $this->defaultViewSettings(),
|
||||||
|
'indexing_settings' => [
|
||||||
|
'add_to_index' => $this->envBool('turnitin.add_to_index', true),
|
||||||
|
],
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
public function defaultViewSettings()
|
||||||
|
{
|
||||||
|
return [
|
||||||
|
'exclude_quotes' => $this->envBool('turnitin.exclude_quotes', false),
|
||||||
|
'exclude_bibliography' => $this->envBool('turnitin.exclude_bibliography', false),
|
||||||
|
'exclude_citations' => $this->envBool('turnitin.exclude_citations', false),
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 从 GET /similarity 响应解析总相似度(0–100)。
|
||||||
|
* 兼容 overall_match_percentage 在 message 嵌套、以及 0–1 小数形式。
|
||||||
|
*/
|
||||||
|
public static function extractOverallMatchPercentage(array $statusResp)
|
||||||
|
{
|
||||||
|
$candidates = [];
|
||||||
|
|
||||||
|
$push = function ($v) use (&$candidates) {
|
||||||
|
if ($v === null || $v === '') {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (is_numeric($v)) {
|
||||||
|
$candidates[] = floatval($v);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
$push($statusResp['overall_match_percentage'] ?? null);
|
||||||
|
$push($statusResp['overall_match'] ?? null);
|
||||||
|
$push($statusResp['similarity_percentage'] ?? null);
|
||||||
|
|
||||||
|
$msg = $statusResp;
|
||||||
|
if (isset($statusResp['message']) && is_array($statusResp['message'])) {
|
||||||
|
$msg = $statusResp['message'];
|
||||||
|
}
|
||||||
|
$push($msg['overall_match_percentage'] ?? null);
|
||||||
|
$push($msg['overall_match'] ?? null);
|
||||||
|
if (isset($msg['similarity']) && is_array($msg['similarity'])) {
|
||||||
|
$sim = $msg['similarity'];
|
||||||
|
$push($sim['overall_match_percentage'] ?? null);
|
||||||
|
$push($sim['overall_match'] ?? null);
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach ($candidates as $n) {
|
||||||
|
if ($n > 0 && $n <= 1.0) {
|
||||||
|
$scaled = round($n * 100, 2);
|
||||||
|
if ($scaled > 1.0 || $n < 0.05) {
|
||||||
|
return $scaled;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ($n >= 0) {
|
||||||
|
return round($n, 2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 从 GET /similarity 响应中尽量提取「按来源」的摘要(供列表展示;完整明细仍在 Turnitin 在线报告里)。
|
||||||
|
*
|
||||||
|
* @return array{score:float,sources:array<int,array<string,mixed>>}
|
||||||
|
*/
|
||||||
|
public static function parseSimilarityReportMeta(array $statusResp)
|
||||||
|
{
|
||||||
|
$meta = [
|
||||||
|
'score' => self::extractOverallMatchPercentage($statusResp),
|
||||||
|
'sources' => [],
|
||||||
|
];
|
||||||
|
|
||||||
|
$candidates = [];
|
||||||
|
self::collectSimilaritySourceNodes($statusResp, $candidates, 0);
|
||||||
|
if (isset($statusResp['message']) && is_array($statusResp['message'])) {
|
||||||
|
self::collectSimilaritySourceNodes($statusResp['message'], $candidates, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
$seen = [];
|
||||||
|
foreach ($candidates as $node) {
|
||||||
|
if (!is_array($node)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
$pct = null;
|
||||||
|
foreach (['percentage', 'match_percentage', 'overall_match_percentage', 'similarity_percentage'] as $k) {
|
||||||
|
if (isset($node[$k]) && is_numeric($node[$k])) {
|
||||||
|
$pct = floatval($node[$k]);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$repo = '';
|
||||||
|
foreach (['repository', 'repository_name', 'collection', 'source_type', 'type', 'database', 'category'] as $k) {
|
||||||
|
if (!empty($node[$k])) {
|
||||||
|
$repo = strtoupper(trim((string) $node[$k]));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$words = isset($node['matched_word_count']) ? intval($node['matched_word_count'])
|
||||||
|
: (isset($node['word_count']) ? intval($node['word_count']) : 0);
|
||||||
|
$key = $repo . '|' . ($pct !== null ? $pct : '') . '|' . $words;
|
||||||
|
if (isset($seen[$key])) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
$seen[$key] = true;
|
||||||
|
$meta['sources'][] = array_filter([
|
||||||
|
'repository' => $repo,
|
||||||
|
'match_percentage' => $pct,
|
||||||
|
'matched_word_count' => $words > 0 ? $words : null,
|
||||||
|
], function ($v) {
|
||||||
|
return $v !== null && $v !== '';
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return $meta;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param array<string,mixed> $node
|
||||||
|
* @param array<int,mixed> $out
|
||||||
|
*/
|
||||||
|
private static function collectSimilaritySourceNodes($node, array &$out, $depth)
|
||||||
|
{
|
||||||
|
if ($depth > 8 || !is_array($node)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
$hasRepo = false;
|
||||||
|
foreach (['repository', 'repository_name', 'collection', 'source_type'] as $k) {
|
||||||
|
if (!empty($node[$k])) {
|
||||||
|
$hasRepo = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ($hasRepo) {
|
||||||
|
$out[] = $node;
|
||||||
|
}
|
||||||
|
foreach ($node as $v) {
|
||||||
|
if (is_array($v)) {
|
||||||
|
if (isset($v[0]) && is_array($v[0])) {
|
||||||
|
foreach ($v as $item) {
|
||||||
|
self::collectSimilaritySourceNodes($item, $out, $depth + 1);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
self::collectSimilaritySourceNodes($v, $out, $depth + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 在线 Similarity Report 默认视图(与 Crossref 后台「按来源查看」对齐)。
|
||||||
|
*/
|
||||||
|
public function defaultViewerSimilarityBlock()
|
||||||
|
{
|
||||||
|
$mode = strtolower(trim((string) Env::get('turnitin.viewer_default_mode', 'all_sources')));
|
||||||
|
if (!in_array($mode, ['match_overview', 'all_sources'], true)) {
|
||||||
|
$mode = 'all_sources';
|
||||||
|
}
|
||||||
|
|
||||||
|
return [
|
||||||
|
'default_mode' => $mode,
|
||||||
|
'modes' => [
|
||||||
|
'match_overview' => true,
|
||||||
|
'all_sources' => true,
|
||||||
|
],
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
private function envBool($name, $default = false)
|
||||||
|
{
|
||||||
|
$v = Env::get($name, $default ? '1' : '0');
|
||||||
|
if ($v === true) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if ($v === false) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
$v = strtolower(trim((string) $v));
|
||||||
|
return in_array($v, ['1', 'true', 'yes', 'on'], true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 查询 submission 详情(上传后用于轮询是否解析完成)。
|
||||||
|
* GET /submissions/{id}
|
||||||
|
*
|
||||||
|
* @return array 解码后的 JSON(常见为 status=ok + message 内含 id/status)
|
||||||
|
*/
|
||||||
|
public function getSubmission($submissionId)
|
||||||
|
{
|
||||||
|
return $this->request('GET', '/submissions/' . rawurlencode($submissionId));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 单次解析 GET /submissions/{id},判断是否可调用 PUT /similarity(不 sleep,供队列链逐步轮询)。
|
||||||
|
*
|
||||||
|
* @return array{ready:bool, failed:bool, status:string, snippet:string, message:array}
|
||||||
|
*/
|
||||||
|
public function parseSubmissionIngestState($submissionId)
|
||||||
|
{
|
||||||
|
$raw = $this->getSubmission($submissionId);
|
||||||
|
$msg = self::unwrapSubmissionPayload($raw);
|
||||||
|
$st = strtoupper(trim((string) self::pickSubmissionStatus($msg)));
|
||||||
|
$snippet = mb_substr(json_encode($msg, JSON_UNESCAPED_UNICODE), 0, 400);
|
||||||
|
|
||||||
|
$ready = [
|
||||||
|
'COMPLETE', 'COMPLETED', 'PROCESSED', 'READY', 'SUCCEEDED',
|
||||||
|
'COMPLETE_PROCESSING',
|
||||||
|
];
|
||||||
|
$failed = ['ERROR', 'FAILED', 'CANCELLED', 'CANCELED', 'DELETED'];
|
||||||
|
|
||||||
|
$readyFlag = $st !== '' && in_array($st, $ready, true);
|
||||||
|
$failedFlag = $st !== '' && in_array($st, $failed, true);
|
||||||
|
|
||||||
|
return [
|
||||||
|
'ready' => $readyFlag,
|
||||||
|
'failed' => $failedFlag,
|
||||||
|
'status' => $st,
|
||||||
|
'snippet' => $snippet,
|
||||||
|
'message' => $msg,
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 上传完成后需等待 Turnitin 异步完成文本解析(同步阻塞版,仅 CLI/调试;线上请用队列链 PlagiarismWaitIngest)。
|
||||||
|
*
|
||||||
|
* @param string $submissionId
|
||||||
|
* @param int $maxWaitSec 最长等待秒数,默认 600(10 分钟)
|
||||||
|
* @param int $intervalSec 轮询间隔秒数,默认 3
|
||||||
|
* @throws Exception 超时或终态为失败
|
||||||
|
*/
|
||||||
|
public function waitAfterUploadForSimilarity($submissionId, $maxWaitSec = 600, $intervalSec = 3)
|
||||||
|
{
|
||||||
|
$deadline = time() + max(30, (int)$maxWaitSec);
|
||||||
|
$intervalSec = max(1, (int)$intervalSec);
|
||||||
|
$lastStatus = '';
|
||||||
|
$lastSnippet = '';
|
||||||
|
|
||||||
|
while (time() < $deadline) {
|
||||||
|
$parsed = $this->parseSubmissionIngestState($submissionId);
|
||||||
|
$lastStatus = $parsed['status'];
|
||||||
|
$lastSnippet = $parsed['snippet'];
|
||||||
|
|
||||||
|
if (!empty($parsed['ready'])) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!empty($parsed['failed'])) {
|
||||||
|
throw new Exception('Turnitin submission failed, status=' . $lastStatus . ' body=' . $lastSnippet);
|
||||||
|
}
|
||||||
|
|
||||||
|
sleep($intervalSec);
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new Exception(
|
||||||
|
'Timeout waiting for Turnitin submission ingest (last status=' . ($lastStatus ?: '(empty)') . ') snippet=' . $lastSnippet
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param mixed $decoded
|
||||||
|
* @return array
|
||||||
|
*/
|
||||||
|
private static function unwrapSubmissionPayload($decoded)
|
||||||
|
{
|
||||||
|
if (!is_array($decoded)) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
if (isset($decoded['message']) && is_array($decoded['message'])) {
|
||||||
|
return $decoded['message'];
|
||||||
|
}
|
||||||
|
return $decoded;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param array $msg
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
|
private static function pickSubmissionStatus(array $msg)
|
||||||
|
{
|
||||||
|
$candidates = [$msg];
|
||||||
|
if (isset($msg['submission']) && is_array($msg['submission'])) {
|
||||||
|
$candidates[] = $msg['submission'];
|
||||||
|
}
|
||||||
|
foreach ($candidates as $m) {
|
||||||
|
foreach (['status', 'workflow_status', 'submission_status', 'processing_status', 'paper_status'] as $k) {
|
||||||
|
if (!empty($m[$k])) {
|
||||||
|
return (string)$m[$k];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 查询 similarity 状态
|
* 查询 similarity 状态
|
||||||
* GET /submissions/{id}/similarity
|
* GET /submissions/{id}/similarity
|
||||||
@@ -156,7 +474,7 @@ class TurnitinService
|
|||||||
{
|
{
|
||||||
return $this->request(
|
return $this->request(
|
||||||
'GET',
|
'GET',
|
||||||
'/submissions/' . urlencode($submissionId) . '/similarity'
|
'/submissions/' . rawurlencode($submissionId) . '/similarity'
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -166,25 +484,134 @@ class TurnitinService
|
|||||||
*
|
*
|
||||||
* 返回 viewer_url(数小时有效)
|
* 返回 viewer_url(数小时有效)
|
||||||
*
|
*
|
||||||
* @param array $viewer 可选 viewer 设置 e.g. ['viewer_default_permission_set' => 'INSTRUCTOR']
|
* TCA 要求 default_mode 为小写(如 match_overview);save_changes 等 LTI 字段会导致 400。
|
||||||
|
* Crossref 通道常用 ADMINISTRATOR/USER,非 INSTRUCTOR。可在 .env 配置:
|
||||||
|
* turnitin.viewer_permission_set=ADMINISTRATOR
|
||||||
|
*
|
||||||
|
* @param array $viewer 可选:viewer_user_id、triggered_by(映射为 editor_{id})、或完整请求体覆盖
|
||||||
*/
|
*/
|
||||||
public function getViewerUrl($submissionId, $viewer = [])
|
public function getViewerUrl($submissionId, $viewer = [])
|
||||||
{
|
{
|
||||||
$body = array_merge([
|
$submissionId = trim((string) $submissionId);
|
||||||
'viewer_default_permission_set' => 'INSTRUCTOR',
|
if ($submissionId === '') {
|
||||||
'similarity' => [
|
throw new Exception('submissionId required for viewer-url');
|
||||||
'default_mode' => 'MATCH_OVERVIEW',
|
}
|
||||||
'view_settings' => ['save_changes' => true],
|
|
||||||
'modes' => ['match_overview' => true, 'all_sources' => true],
|
|
||||||
],
|
|
||||||
'locale' => 'en-US',
|
|
||||||
], $viewer);
|
|
||||||
|
|
||||||
return $this->request(
|
$statusResp = $this->getSimilarityStatus($submissionId);
|
||||||
'POST',
|
$st = strtoupper(trim((string) ($statusResp['status'] ?? '')));
|
||||||
'/submissions/' . urlencode($submissionId) . '/viewer-url',
|
if ($st !== '' && $st !== 'COMPLETE') {
|
||||||
$body
|
throw new Exception('similarity report not ready for viewer-url, status=' . $st);
|
||||||
);
|
}
|
||||||
|
|
||||||
|
$path = '/submissions/' . rawurlencode($submissionId) . '/viewer-url';
|
||||||
|
$lastError = null;
|
||||||
|
|
||||||
|
foreach ($this->buildViewerUrlBodies($viewer) as $body) {
|
||||||
|
try {
|
||||||
|
return $this->request('POST', $path, $body);
|
||||||
|
} catch (Exception $e) {
|
||||||
|
$lastError = $e;
|
||||||
|
if (strpos($e->getMessage(), 'HTTP 400') === false) {
|
||||||
|
throw $e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
throw $lastError ?: new Exception('viewer-url failed');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 按优先级生成若干合法请求体(前者失败且为 400 时尝试后者)。
|
||||||
|
*
|
||||||
|
* @return array<int,array>
|
||||||
|
*/
|
||||||
|
private function buildViewerUrlBodies(array $viewerOverrides)
|
||||||
|
{
|
||||||
|
if (!empty($viewerOverrides) && isset($viewerOverrides['viewer_default_permission_set'])) {
|
||||||
|
$body = $viewerOverrides;
|
||||||
|
if (empty($body['viewer_user_id'])) {
|
||||||
|
$body['viewer_user_id'] = $this->resolveViewerUserId($viewerOverrides);
|
||||||
|
}
|
||||||
|
return [$body];
|
||||||
|
}
|
||||||
|
|
||||||
|
$locale = trim((string) Env::get('turnitin.viewer_locale', 'en-US')) ?: 'en-US';
|
||||||
|
$configured = trim((string) Env::get('turnitin.viewer_permission_set', ''));
|
||||||
|
$permissionSets = $configured !== ''
|
||||||
|
? array_map('trim', explode(',', $configured))
|
||||||
|
: $this->defaultViewerPermissionSets();
|
||||||
|
$viewerUserId = $this->resolveViewerUserId($viewerOverrides);
|
||||||
|
$saveChanges = $this->envBool('turnitin.viewer_save_changes', false);
|
||||||
|
$simModes = $this->defaultViewerSimilarityBlock();
|
||||||
|
|
||||||
|
$bodies = [];
|
||||||
|
foreach ($permissionSets as $perm) {
|
||||||
|
if ($perm === '') {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// TCA 认证要求:必须带 viewer_user_id(此前缺失会导致 400 Bad request)
|
||||||
|
$bodies[] = [
|
||||||
|
'viewer_user_id' => $viewerUserId,
|
||||||
|
'locale' => $locale,
|
||||||
|
'viewer_default_permission_set' => $perm,
|
||||||
|
'similarity' => [
|
||||||
|
'view_settings' => ['save_changes' => $saveChanges],
|
||||||
|
],
|
||||||
|
];
|
||||||
|
$bodies[] = [
|
||||||
|
'viewer_user_id' => $viewerUserId,
|
||||||
|
'locale' => $locale,
|
||||||
|
'viewer_default_permission_set' => $perm,
|
||||||
|
'similarity' => array_merge($simModes, [
|
||||||
|
'view_settings' => ['save_changes' => $saveChanges],
|
||||||
|
]),
|
||||||
|
];
|
||||||
|
$bodies[] = [
|
||||||
|
'viewer_user_id' => $viewerUserId,
|
||||||
|
'locale' => $locale,
|
||||||
|
'viewer_default_permission_set' => $perm,
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
return $bodies;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* viewer-url 必填:与 createSubmission 的 owner/submitter 同一命名空间(editor_{user_id})。
|
||||||
|
*/
|
||||||
|
public function resolveViewerUserId(array $opts = [])
|
||||||
|
{
|
||||||
|
if (!empty($opts['viewer_user_id'])) {
|
||||||
|
return trim((string) $opts['viewer_user_id']);
|
||||||
|
}
|
||||||
|
// 打开报告的人(当前编辑)须与申请 viewer-url 时一致,否则易出现 session 认证失败
|
||||||
|
$editorId = isset($opts['editor_id']) ? intval($opts['editor_id']) : 0;
|
||||||
|
if ($editorId > 0) {
|
||||||
|
return 'editor_' . $editorId;
|
||||||
|
}
|
||||||
|
$triggeredBy = isset($opts['triggered_by']) ? intval($opts['triggered_by']) : 0;
|
||||||
|
if ($triggeredBy > 0) {
|
||||||
|
return 'editor_' . $triggeredBy;
|
||||||
|
}
|
||||||
|
$custom = trim((string) Env::get('turnitin.viewer_user_id', ''));
|
||||||
|
if ($custom !== '') {
|
||||||
|
return $custom;
|
||||||
|
}
|
||||||
|
$name = trim((string) $this->integrationName);
|
||||||
|
return ($name !== '' ? $name : 'tmr') . '_viewer';
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Crossref Similarity Check 通常不用 INSTRUCTOR;按常见可用角色排序尝试。
|
||||||
|
*
|
||||||
|
* @return array<int,string>
|
||||||
|
*/
|
||||||
|
private function defaultViewerPermissionSets()
|
||||||
|
{
|
||||||
|
if (stripos($this->baseUrl, 'crossref') !== false) {
|
||||||
|
return ['ADMINISTRATOR', 'USER', 'EDITOR', 'INSTRUCTOR'];
|
||||||
|
}
|
||||||
|
return ['INSTRUCTOR', 'ADMINISTRATOR', 'USER'];
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -196,12 +623,13 @@ class TurnitinService
|
|||||||
public function requestPdfReport($submissionId, $opts = [])
|
public function requestPdfReport($submissionId, $opts = [])
|
||||||
{
|
{
|
||||||
$body = array_merge([
|
$body = array_merge([
|
||||||
'locale' => 'en-US',
|
'locale' => trim((string) Env::get('turnitin.viewer_locale', 'en-US')) ?: 'en-US',
|
||||||
|
'view_settings' => $this->defaultViewSettings(),
|
||||||
], $opts);
|
], $opts);
|
||||||
|
|
||||||
return $this->request(
|
return $this->request(
|
||||||
'POST',
|
'POST',
|
||||||
'/submissions/' . urlencode($submissionId) . '/similarity/pdf',
|
'/submissions/' . rawurlencode($submissionId) . '/similarity/pdf',
|
||||||
$body
|
$body
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|||||||
463
application/common/UserFieldAiService.php
Normal file
463
application/common/UserFieldAiService.php
Normal file
@@ -0,0 +1,463 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
namespace app\common;
|
||||||
|
|
||||||
|
use think\Db;
|
||||||
|
use think\Env;
|
||||||
|
use think\Exception;
|
||||||
|
use think\Queue;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 根据投稿记录 / 审稿人资料,用大模型总结用户主领域(中文)写入 field_ai。
|
||||||
|
* 队列链:UserFieldAiFill → 处理一条 → enqueueNextFieldAi → 下一条。
|
||||||
|
*/
|
||||||
|
class UserFieldAiService
|
||||||
|
{
|
||||||
|
const QUEUE_NAME = 'UserFieldAi';
|
||||||
|
|
||||||
|
const STATUS_PENDING = 0;
|
||||||
|
const STATUS_DONE = 1;
|
||||||
|
const STATUS_INSUFFICIENT = 2;
|
||||||
|
const STATUS_FAILED = 3;
|
||||||
|
|
||||||
|
private $logFile;
|
||||||
|
|
||||||
|
public function __construct()
|
||||||
|
{
|
||||||
|
$this->logFile = ROOT_PATH . 'runtime' . DS . 'user_field_ai.log';
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 启动链式处理(从 user_id=0 之后找第一个待处理用户)。
|
||||||
|
*
|
||||||
|
* @param bool $force true 时重算已生成用户
|
||||||
|
* @return bool 是否已推入首条 job
|
||||||
|
*/
|
||||||
|
public function startChain($force = false, $delay = 1, $queue = '')
|
||||||
|
{
|
||||||
|
return $this->enqueueNextFieldAi($delay, $queue, 0, $force);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 链式:找 user_id > $afterUserId 的下一位待处理用户并入队。
|
||||||
|
*/
|
||||||
|
public function enqueueNextFieldAi($delay = 1, $queue = '', $afterUserId = 0, $force = false)
|
||||||
|
{
|
||||||
|
if ($queue === '') {
|
||||||
|
$queue = self::QUEUE_NAME;
|
||||||
|
}
|
||||||
|
$afterUserId = intval($afterUserId);
|
||||||
|
$userId = $this->findNextPendingUserId($afterUserId, $force);
|
||||||
|
if ($userId <= 0) {
|
||||||
|
$this->log('[FieldAi] chain finished after user_id=' . $afterUserId . ' force=' . ($force ? '1' : '0'));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
$data = [
|
||||||
|
'user_id' => $userId,
|
||||||
|
'queue' => $queue,
|
||||||
|
'force' => $force ? 1 : 0,
|
||||||
|
];
|
||||||
|
$jobClass = 'app\\api\\job\\UserFieldAiFill@fire';
|
||||||
|
if ($delay > 0) {
|
||||||
|
Queue::later($delay, $jobClass, $data, $queue);
|
||||||
|
} else {
|
||||||
|
Queue::push($jobClass, $data, $queue);
|
||||||
|
}
|
||||||
|
$this->log('[FieldAi] enqueued user_id=' . $userId . ' queue=' . $queue);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 处理单个用户(队列 Job 或同步调试)。
|
||||||
|
*
|
||||||
|
* @return array{ok:bool, skipped?:bool, insufficient?:bool, field_ai?:string, error?:string}
|
||||||
|
*/
|
||||||
|
public function processUser($userId, $force = false)
|
||||||
|
{
|
||||||
|
$userId = intval($userId);
|
||||||
|
if ($userId <= 0) {
|
||||||
|
return ['ok' => false, 'error' => 'invalid user_id'];
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->ensureReviewerInfoRow($userId);
|
||||||
|
$uri = Db::name('user_reviewer_info')->where('reviewer_id', $userId)->find();
|
||||||
|
if (!$uri) {
|
||||||
|
return ['ok' => false, 'error' => 'reviewer_info missing'];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!$force && intval($uri['field_ai_status']) === self::STATUS_DONE && trim((string)$uri['field_ai']) !== '') {
|
||||||
|
return ['ok' => true, 'skipped' => true, 'field_ai' => (string)$uri['field_ai']];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!$this->isEligible($userId, $uri)) {
|
||||||
|
$this->updateFieldAi($userId, '', self::STATUS_INSUFFICIENT, 'insufficient profile/articles');
|
||||||
|
return ['ok' => true, 'insufficient' => true];
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
$context = $this->buildContext($userId, $uri);
|
||||||
|
$fieldAi = $this->summarizeWithLlm($context);
|
||||||
|
if ($fieldAi === '') {
|
||||||
|
throw new Exception('LLM returned empty field');
|
||||||
|
}
|
||||||
|
$this->updateFieldAi($userId, $fieldAi, self::STATUS_DONE, '');
|
||||||
|
return ['ok' => true, 'field_ai' => $fieldAi];
|
||||||
|
} catch (\Throwable $e) {
|
||||||
|
$this->updateFieldAi($userId, '', self::STATUS_FAILED, mb_substr($e->getMessage(), 0, 500));
|
||||||
|
$this->log('[FieldAi] user_id=' . $userId . ' fail: ' . $e->getMessage());
|
||||||
|
return ['ok' => false, 'error' => $e->getMessage()];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 是否满足「可总结」:有投稿 或 审稿人资料较全。
|
||||||
|
*/
|
||||||
|
public function isEligible($userId, $uri = null)
|
||||||
|
{
|
||||||
|
if ($this->hasSubmittedArticles($userId)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if ($uri === null) {
|
||||||
|
$uri = Db::name('user_reviewer_info')->where('reviewer_id', $userId)->find();
|
||||||
|
}
|
||||||
|
return $this->isReviewerProfileComplete($uri);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function hasSubmittedArticles($userId)
|
||||||
|
{
|
||||||
|
$n = Db::name('article')
|
||||||
|
->where('user_id', intval($userId))
|
||||||
|
->where('title', '<>', '')
|
||||||
|
->count();
|
||||||
|
return $n > 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 审稿人资料字段填充数达到阈值视为「较全」。
|
||||||
|
*/
|
||||||
|
public function isReviewerProfileComplete($uri)
|
||||||
|
{
|
||||||
|
if (!$uri || !is_array($uri)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
$minFilled = max(3, (int) Env::get('user_field_ai.min_profile_fields', 4));
|
||||||
|
$keys = ['field', 'company', 'country', 'technical', 'introduction', 'department', 'website'];
|
||||||
|
$filled = 0;
|
||||||
|
foreach ($keys as $k) {
|
||||||
|
if (!empty($uri[$k]) && trim((string)$uri[$k]) !== '') {
|
||||||
|
$filled++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!empty($uri['major']) && trim((string)$uri['major']) !== '' && trim((string)$uri['major']) !== '0') {
|
||||||
|
$filled++;
|
||||||
|
}
|
||||||
|
$majorCount = Db::name('major_to_user')->where('user_id', intval($uri['reviewer_id']))->where('state', 0)->count();
|
||||||
|
if ($majorCount > 0) {
|
||||||
|
$filled++;
|
||||||
|
}
|
||||||
|
return $filled >= $minFilled;
|
||||||
|
}
|
||||||
|
|
||||||
|
private function findNextPendingUserId($afterUserId, $force)
|
||||||
|
{
|
||||||
|
$batch = max(20, (int) Env::get('user_field_ai.scan_batch', 80));
|
||||||
|
$cursor = intval($afterUserId);
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
$query = Db::name('user')->alias('u')
|
||||||
|
->join('t_user_reviewer_info uri', 'uri.reviewer_id = u.user_id',"left")
|
||||||
|
->where('u.user_id', '>', $cursor);
|
||||||
|
if (!$force) {
|
||||||
|
$query->where(function ($q) {
|
||||||
|
$q->where('uri.field_ai_status', self::STATUS_PENDING)
|
||||||
|
->whereOr('uri.field_ai_status', self::STATUS_FAILED)
|
||||||
|
->whereOr('uri.reviewer_info_id', 'null');
|
||||||
|
});
|
||||||
|
}
|
||||||
|
$ids = $query->order('u.user_id asc')->limit($batch)->column('u.user_id');
|
||||||
|
|
||||||
|
if (empty($ids)) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach ($ids as $uid) {
|
||||||
|
$uid = intval($uid);
|
||||||
|
$cursor = $uid;
|
||||||
|
$this->ensureReviewerInfoRow($uid);
|
||||||
|
$uri = Db::name('user_reviewer_info')->where('reviewer_id', $uid)->find();
|
||||||
|
if (!$force && intval($uri['field_ai_status']) === self::STATUS_DONE) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (!$force && intval($uri['field_ai_status']) === self::STATUS_INSUFFICIENT) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if ($this->isEligible($uid, $uri)) {
|
||||||
|
return $uid;
|
||||||
|
}
|
||||||
|
if (!$force) {
|
||||||
|
$this->updateFieldAi($uid, '', self::STATUS_INSUFFICIENT, 'auto skip: insufficient data');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private function buildContext($userId, array $uri)
|
||||||
|
{
|
||||||
|
$user = Db::name('user')->where('user_id', $userId)->field('user_id,realname,email,account')->find();
|
||||||
|
$majorTitles = $this->resolveMajorTitles($userId, $uri);
|
||||||
|
|
||||||
|
$maxArticles = max(1, min(10, (int) Env::get('user_field_ai.max_articles', 5)));
|
||||||
|
$articles = Db::name('article')
|
||||||
|
->where('user_id', $userId)
|
||||||
|
->where('title', '<>', '')
|
||||||
|
->order('article_id desc')
|
||||||
|
->limit($maxArticles)
|
||||||
|
->field('article_id,title,keywords,abstrart,journal_id,ctime')
|
||||||
|
->select();
|
||||||
|
|
||||||
|
$journalNames = [];
|
||||||
|
if (!empty($articles)) {
|
||||||
|
$jids = array_unique(array_filter(array_column($articles, 'journal_id')));
|
||||||
|
if (!empty($jids)) {
|
||||||
|
$journalNames = Db::name('journal')->where('journal_id', 'in', $jids)->column('title', 'journal_id');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$articleBlocks = [];
|
||||||
|
foreach ($articles as $a) {
|
||||||
|
$jid = intval($a['journal_id']);
|
||||||
|
$articleBlocks[] = [
|
||||||
|
'title' => (string) $a['title'],
|
||||||
|
'journal' => isset($journalNames[$jid]) ? (string) $journalNames[$jid] : '',
|
||||||
|
'keywords' => (string) ($a['keywords'] ?? ''),
|
||||||
|
'abstract' => mb_substr(trim((string) ($a['abstrart'] ?? '')), 0, 800),
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
return [
|
||||||
|
'user' => [
|
||||||
|
'realname' => $user ? (string) $user['realname'] : '',
|
||||||
|
'email' => $user ? (string) $user['email'] : '',
|
||||||
|
],
|
||||||
|
'profile' => [
|
||||||
|
'field' => trim((string) ($uri['field'] ?? '')),
|
||||||
|
'technical' => trim((string) ($uri['technical'] ?? '')),
|
||||||
|
'company' => trim((string) ($uri['company'] ?? '')),
|
||||||
|
'department' => trim((string) ($uri['department'] ?? '')),
|
||||||
|
'country' => trim((string) ($uri['country'] ?? '')),
|
||||||
|
'introduction' => mb_substr(trim((string) ($uri['introduction'] ?? '')), 0, 1200),
|
||||||
|
'website' => trim((string) ($uri['website'] ?? '')),
|
||||||
|
'majors' => $majorTitles,
|
||||||
|
],
|
||||||
|
'articles' => $articleBlocks,
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
private function resolveMajorTitles($userId, array $uri)
|
||||||
|
{
|
||||||
|
$titles = [];
|
||||||
|
$ids = Db::name('major_to_user')->where('user_id', $userId)->where('state', 0)->column('major_id');
|
||||||
|
if (!empty($ids)) {
|
||||||
|
$titles = Db::name('reviewer_major')->where('major_id', 'in', $ids)->where('state', 0)->column('title');
|
||||||
|
}
|
||||||
|
if (empty($titles) && !empty($uri['major'])) {
|
||||||
|
$legacy = array_filter(array_map('intval', explode(',', (string) $uri['major'])));
|
||||||
|
if (!empty($legacy)) {
|
||||||
|
$titles = Db::name('reviewer_major')->where('major_id', 'in', $legacy)->column('title');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return array_values(array_unique(array_filter(array_map('trim', $titles))));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 解析 OpenAI 兼容 chat/completions 完整 URL。
|
||||||
|
* base.model_url 常为站点根(如 http://chat.taimed.cn),直接 POST 会 404。
|
||||||
|
*/
|
||||||
|
private function resolveLlmChatUrl()
|
||||||
|
{
|
||||||
|
$candidates = [
|
||||||
|
// Env::get('user_field_ai.chat_url', ''),
|
||||||
|
// Env::get('promotion.promotion_llm_url', ''),
|
||||||
|
// Env::get('expert_country_chat_url', ''),
|
||||||
|
// Env::get('citation_chat_url', ''),
|
||||||
|
Env::get('base.model_url', ''),
|
||||||
|
];
|
||||||
|
foreach ($candidates as $u) {
|
||||||
|
$u = trim((string) $u);
|
||||||
|
if ($u === '') {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
$normalized = $this->normalizeChatCompletionsUrl($u);
|
||||||
|
if ($normalized !== '') {
|
||||||
|
return $normalized;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
|
private function normalizeChatCompletionsUrl($url)
|
||||||
|
{
|
||||||
|
$url = trim((string) $url);
|
||||||
|
if ($url === '') {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
if (stripos($url, 'chat/completions') !== false) {
|
||||||
|
return $url;
|
||||||
|
}
|
||||||
|
return rtrim($url, '/') . '/v1/chat/completions';
|
||||||
|
}
|
||||||
|
|
||||||
|
private function resolveLlmModel()
|
||||||
|
{
|
||||||
|
$candidates = [
|
||||||
|
Env::get('user_field_ai.chat_model', ''),
|
||||||
|
Env::get('base.model', ''),
|
||||||
|
Env::get('promotion.promotion_llm_model', ''),
|
||||||
|
Env::get('expert_country_chat_model', ''),
|
||||||
|
Env::get('citation_chat_model', ''),
|
||||||
|
'gpt-4.1',
|
||||||
|
];
|
||||||
|
foreach ($candidates as $m) {
|
||||||
|
$m = trim((string) $m);
|
||||||
|
if ($m !== '' && strtolower($m) !== 'your-model-name') {
|
||||||
|
return $m;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
|
private function summarizeWithLlm(array $context)
|
||||||
|
{
|
||||||
|
$url = $this->resolveLlmChatUrl();
|
||||||
|
$model = $this->resolveLlmModel();
|
||||||
|
$apiKey = trim((string) Env::get('user_field_ai.chat_api_key', Env::get('expert_country_chat_api_key', Env::get('citation_chat_api_key', ''))));
|
||||||
|
if ($url === '' || $model === '') {
|
||||||
|
throw new Exception('user_field_ai chat not configured (set user_field_ai.chat_url or promotion PROMOTION_LLM_URL / base.model_url)');
|
||||||
|
}
|
||||||
|
|
||||||
|
$payloadJson = json_encode($context, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES);
|
||||||
|
$messages = [
|
||||||
|
[
|
||||||
|
'role' => 'system',
|
||||||
|
'content' => '你是学术领域分类助手。根据用户的投稿与个人资料,用简体中文给出该用户最主要的研究领域总结。'
|
||||||
|
. '要求:精确、简洁,1~3 个中文领域词或短短语,用顿号分隔;不要解释、不要英文、不要 JSON 以外的多余文字。'
|
||||||
|
. '只输出 JSON:{"field_ai":"..."}。',
|
||||||
|
],
|
||||||
|
[
|
||||||
|
'role' => 'user',
|
||||||
|
'content' => "请根据以下 JSON 资料总结该用户的主要研究领域:\n" . $payloadJson,
|
||||||
|
],
|
||||||
|
];
|
||||||
|
|
||||||
|
$body = [
|
||||||
|
'model' => $model,
|
||||||
|
'temperature' => 0.2,
|
||||||
|
'messages' => $messages,
|
||||||
|
];
|
||||||
|
|
||||||
|
$ch = curl_init();
|
||||||
|
curl_setopt_array($ch, [
|
||||||
|
CURLOPT_URL => $url,
|
||||||
|
CURLOPT_POST => true,
|
||||||
|
CURLOPT_POSTFIELDS => json_encode($body, JSON_UNESCAPED_UNICODE),
|
||||||
|
CURLOPT_RETURNTRANSFER => true,
|
||||||
|
CURLOPT_CONNECTTIMEOUT => 15,
|
||||||
|
CURLOPT_TIMEOUT => max(30, (int) Env::get('user_field_ai.timeout', 90)),
|
||||||
|
CURLOPT_HTTPHEADER => array_filter([
|
||||||
|
'Content-Type: application/json',
|
||||||
|
$apiKey !== '' ? 'Authorization: Bearer ' . $apiKey : null,
|
||||||
|
]),
|
||||||
|
]);
|
||||||
|
$raw = curl_exec($ch);
|
||||||
|
$code = (int) curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
||||||
|
$err = curl_error($ch);
|
||||||
|
curl_close($ch);
|
||||||
|
|
||||||
|
if ($raw === false) {
|
||||||
|
throw new Exception('LLM curl error: ' . $err);
|
||||||
|
}
|
||||||
|
if ($code < 200 || $code >= 300) {
|
||||||
|
$hint = ($code === 404 && stripos($url, 'chat/completions') === false)
|
||||||
|
? ' (chat_url may be missing /v1/chat/completions)'
|
||||||
|
: '';
|
||||||
|
throw new Exception('LLM HTTP ' . $code . $hint . ': ' . mb_substr((string) $raw, 0, 400));
|
||||||
|
}
|
||||||
|
|
||||||
|
$data = json_decode($raw, true);
|
||||||
|
$content = '';
|
||||||
|
if (is_array($data) && isset($data['choices'][0]['message']['content'])) {
|
||||||
|
$content = trim((string) $data['choices'][0]['message']['content']);
|
||||||
|
} elseif (is_string($raw)) {
|
||||||
|
$content = trim($raw);
|
||||||
|
}
|
||||||
|
|
||||||
|
$fieldAi = $this->parseFieldAiFromContent($content);
|
||||||
|
if ($fieldAi === '' && $content !== '') {
|
||||||
|
$fieldAi = $this->cleanFieldAiText($content);
|
||||||
|
}
|
||||||
|
return $fieldAi;
|
||||||
|
}
|
||||||
|
|
||||||
|
private function parseFieldAiFromContent($content)
|
||||||
|
{
|
||||||
|
$content = trim((string) $content);
|
||||||
|
if ($content === '') {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
$content = preg_replace('/^```[a-zA-Z]*\s*|```$/m', '', $content);
|
||||||
|
if (preg_match('/\{.*\}/s', $content, $m)) {
|
||||||
|
$obj = json_decode($m[0], true);
|
||||||
|
if (is_array($obj) && !empty($obj['field_ai'])) {
|
||||||
|
return $this->cleanFieldAiText((string) $obj['field_ai']);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$obj = json_decode($content, true);
|
||||||
|
if (is_array($obj) && !empty($obj['field_ai'])) {
|
||||||
|
return $this->cleanFieldAiText((string) $obj['field_ai']);
|
||||||
|
}
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
|
private function cleanFieldAiText($text)
|
||||||
|
{
|
||||||
|
$text = trim((string) $text);
|
||||||
|
$text = trim($text, "\"' \t\n\r");
|
||||||
|
$text = preg_replace('/\s+/u', '', $text);
|
||||||
|
if (mb_strlen($text) > 200) {
|
||||||
|
$text = mb_substr($text, 0, 200);
|
||||||
|
}
|
||||||
|
return $text;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function ensureReviewerInfoRow($userId)
|
||||||
|
{
|
||||||
|
$exists = Db::name('user_reviewer_info')->where('reviewer_id', $userId)->find();
|
||||||
|
if ($exists) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
Db::name('user_reviewer_info')->insert([
|
||||||
|
'reviewer_id' => $userId,
|
||||||
|
'state' => 0,
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
|
||||||
|
private function updateFieldAi($userId, $fieldAi, $status, $note)
|
||||||
|
{
|
||||||
|
$data = [
|
||||||
|
'field_ai' => mb_substr(trim((string) $fieldAi), 0, 512),
|
||||||
|
'field_ai_status' => intval($status),
|
||||||
|
'field_ai_utime' => time(),
|
||||||
|
];
|
||||||
|
Db::name('user_reviewer_info')->where('reviewer_id', $userId)->update($data);
|
||||||
|
if ($note !== '') {
|
||||||
|
$this->log('[FieldAi] user_id=' . $userId . ' status=' . $status . ' note=' . $note);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public function log($msg)
|
||||||
|
{
|
||||||
|
$line = date('Y-m-d H:i:s') . ' ' . $msg . PHP_EOL;
|
||||||
|
@file_put_contents($this->logFile, $line, FILE_APPEND);
|
||||||
|
}
|
||||||
|
}
|
||||||
5
sql/add_field_ai_to_user_reviewer_info.sql
Normal file
5
sql/add_field_ai_to_user_reviewer_info.sql
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
-- 用户主领域 AI 总结(中文),由队列链式任务写入
|
||||||
|
ALTER TABLE `t_user_reviewer_info`
|
||||||
|
ADD COLUMN `field_ai` VARCHAR(512) NOT NULL DEFAULT '' COMMENT 'AI总结的主要研究领域(中文)' AFTER `field`,
|
||||||
|
ADD COLUMN `field_ai_status` TINYINT NOT NULL DEFAULT 0 COMMENT '0待处理 1已生成 2资料不足跳过 3失败' AFTER `field_ai`,
|
||||||
|
ADD COLUMN `field_ai_utime` INT NOT NULL DEFAULT 0 COMMENT 'field_ai 更新时间' AFTER `field_ai_status`;
|
||||||
4
sql/add_plagiarism_check_type.sql
Normal file
4
sql/add_plagiarism_check_type.sql
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
-- 查重类型:全文 full / 正文 body_only(裁切题名、作者、参考文献后上传)
|
||||||
|
ALTER TABLE `t_plagiarism_check`
|
||||||
|
ADD COLUMN `check_type` VARCHAR(16) NOT NULL DEFAULT 'full' COMMENT 'full=全文 body_only=仅正文' AFTER `trigger_source`,
|
||||||
|
ADD COLUMN `derived_file_path` VARCHAR(255) NOT NULL DEFAULT '' COMMENT 'body_only 时生成的临时稿件相对路径' AFTER `source_file_size`;
|
||||||
Reference in New Issue
Block a user