305 lines
13 KiB
PHP
305 lines
13 KiB
PHP
<?php
|
||
|
||
namespace app\api\controller;
|
||
|
||
use app\api\controller\Base;
|
||
use think\Db;
|
||
use app\common\OpenAi;
|
||
use app\common\Article;
|
||
/**
|
||
* @title AI审核文章
|
||
* @description 对接OPENAI接口
|
||
*/
|
||
class Aireview extends Base
|
||
{
|
||
|
||
/**
|
||
* @title AI审核文章
|
||
* @param article_id 文章ID
|
||
* @param abstrart 摘要
|
||
* @param keywords 关键词
|
||
* @param model 接口模型
|
||
* @param stream 是否流式输出 true是false否
|
||
*/
|
||
public function review($aParam = []){
|
||
|
||
|
||
//获取参数
|
||
$aParam = empty($aParam) ? $this->request->post() : $aParam;
|
||
$iArticleId = empty($aParam['article_id']) ? '' : $aParam['article_id'];
|
||
if(empty($iArticleId)){
|
||
return json_encode(array('status' => 2,'msg' => 'Please select an article' ));
|
||
}
|
||
|
||
|
||
//问题等级
|
||
$sQuestionLevel = empty($aParam['question_level']) ? 'A' : $aParam['question_level'];
|
||
//需要处理的AI问题字段
|
||
$sQuestionFields = empty($aParam['queue_fields']) ? 'journal_scope' : $aParam['queue_fields'];
|
||
$aQuestionFields = explode(',', $sQuestionFields);
|
||
//查询文章及AI审稿
|
||
$aWhere = ['article_id' => $aParam['article_id']];
|
||
$aResult = json_decode($this->get($aWhere),true);
|
||
//文章信息
|
||
$aArticle = empty($aResult['article_data']) ? [] : $aResult['article_data'];
|
||
if(empty($aArticle)){
|
||
return json_encode(array('status' => 3,'msg' => 'No articles requiring review were found' ));
|
||
}
|
||
//AI审稿信息
|
||
$aAiReview = empty($aResult['data']) ? [] : $aResult['data'];
|
||
if(!empty($aAiReview['is_finish']) && $aAiReview['is_finish'] == 1){
|
||
return json_encode(array('status' => 1,'msg' => 'AI has been reviewed','data' => $aAiReview));
|
||
}
|
||
|
||
//实例化公共方法
|
||
$oHelperFunction = new \app\common\HelperFunction;
|
||
//根据期刊ID查询期刊信息
|
||
$aJournalIssn = [];
|
||
$sIssn = $sScope = '';
|
||
if(in_array('journal_scope', $aQuestionFields)){
|
||
$aWhere = ['journal_id' => $aArticle['journal_id'],'state' => 0];
|
||
$aJournal = Db::table('t_journal')->field('title as targeted_journals,scope as journal_scope,issn')->where($aWhere)->find();
|
||
if(empty($aJournal)){
|
||
return json_encode(array('status' => 4,'msg' => 'This article is not associated with a journal' ));
|
||
}
|
||
$aArticle += $aJournal;
|
||
$sIssn = empty($aJournal['issn']) ? '' : $aJournal['issn'];
|
||
$sScope = empty($aJournal['journal_scope']) ? '' : $aJournal['journal_scope'];
|
||
$aJournalIssn = array_merge($aJournalIssn,[$sIssn]);
|
||
}
|
||
if(in_array('other_journal', $aQuestionFields)){
|
||
$aWhere = ['journal_id' => ['<>',$aArticle['journal_id']],'state' => 0];
|
||
$aJournalOther = Db::table('t_journal')->field('title as targeted_journals,scope as journal_scope,issn')->where($aWhere)->select();
|
||
if(!empty($aJournalOther)){
|
||
$aJournalIssn = array_merge($aJournalIssn,array_column($aJournalOther, 'issn'));
|
||
}
|
||
}
|
||
//查询期刊范围
|
||
if(!empty($aJournalIssn)){
|
||
//查询期刊内容
|
||
$oArticle = new Article;
|
||
$aJournalPaperArt = json_decode($oArticle->getJournalPaperArt(['issn' => $aJournalIssn]),true);
|
||
$aJournalPaperArt = empty($aJournalPaperArt['data']) ? [] : $aJournalPaperArt['data'];
|
||
$aArticle['journal_scope'] = empty($aJournalPaperArt[$sIssn]) ? $sScope : $oHelperFunction->filterAllTags($aJournalPaperArt[$sIssn]);
|
||
if(!empty($aJournalPaperArt[$sIssn])){
|
||
unset($aJournalPaperArt[$sIssn]);
|
||
}
|
||
$aArticle['other_journal'] = empty($aJournalPaperArt) ? '' : json_encode($aJournalPaperArt,JSON_UNESCAPED_UNICODE);
|
||
}
|
||
|
||
//获取文章的领域
|
||
if($sQuestionFields == 'hotspot'){
|
||
$oArticle = new Article;
|
||
$aArticleField = $oArticle->getArticleField(['article_id' => $iArticleId]);
|
||
$aArticleField = empty($aArticleField['data']) ? [] : $aArticleField['data'];
|
||
if(empty($aArticleField)){
|
||
return json_encode(array('status' => 4,'msg' => 'Domain where article not found:'.$sQuestionFields));
|
||
}
|
||
//数据处理
|
||
$aArticleFieldData = [];
|
||
foreach ($aArticleField as $key => $value) {
|
||
$aArticleFieldData[] = getMajorStr($value['major_id']);
|
||
}
|
||
$aArticle['hotspot'] = json_encode($aArticleFieldData,JSON_UNESCAPED_UNICODE);
|
||
}
|
||
//获取提问AI话术
|
||
$oOpenAi = new OpenAi;
|
||
//获取文章内容
|
||
$aChunkContent = [];
|
||
if(!in_array($sQuestionFields, ['journal_scope','other_journal','hotspot','ai_ethics','ai_registration'])){
|
||
//实例化公共方法
|
||
$oArticle = new Article;
|
||
if($aArticle['state'] > 4 ){
|
||
//查询文章内容
|
||
$aWhere['type'] = 0;
|
||
$aWhere['content'] = ['<>',''];
|
||
$aWhere['state'] = 0;
|
||
$aArticleMain = Db::table('t_article_main')->where($aWhere)->column('content');
|
||
//查询参考文献
|
||
$aWhere = ['state' => ['in',[0,2]],'article_id' => $iArticleId];
|
||
$aProductionArticle = Db::name('production_article')->field('p_article_id')->where($aWhere)->find();
|
||
if(!empty($aProductionArticle)){
|
||
$aWhere = ['state' => 0,'p_article_id' => $aProductionArticle['p_article_id']];
|
||
$aRefer = Db::name('production_article_refer')->field('refer_content')->where($aWhere)->column('refer_content');
|
||
$aArticleMain = empty($aRefer) ? $aArticleMain : array_merge($aRefer,$aArticleMain);
|
||
}
|
||
}else{
|
||
$aFile = json_decode($oArticle->getFileContent(['article_id' => $iArticleId]),true);
|
||
|
||
$aFile = empty($aFile['data']) ? [] : $aFile['data'];
|
||
$aArticleMain = empty($aFile['mains']) ? [] : $aFile['mains'];
|
||
}
|
||
$sContent = empty($aArticleMain) ? '' : implode("\n", array_unique($aArticleMain));
|
||
if(empty($sContent)){
|
||
return json_encode(array('status' => 4,'msg' => 'No article content found:'.$sQuestionFields));
|
||
}
|
||
|
||
|
||
//处理内容
|
||
//过滤字符串
|
||
$sContent = $oHelperFunction->filterAllTags($sContent);
|
||
//将文章内容拆分参考文献
|
||
$aDealContent = $this->dealContent($sContent);
|
||
$sBefore= empty($aDealContent['before']) ? '' : $aDealContent['before'];
|
||
$sReference = empty($aDealContent['after']) ? '' : $aDealContent['after'];
|
||
if(in_array($sQuestionFields, ['attribute'])){//科学性和创新性
|
||
$aChunkContent = $oHelperFunction->splitContent($sContent);
|
||
}
|
||
if($sQuestionFields == 'reference'){//参考文献
|
||
if(empty($sReference)){
|
||
return json_encode(array('status' => 5,'msg' => 'Reference not matched successfully' ));
|
||
}
|
||
$aChunkContent = [$sReference];
|
||
}
|
||
if(!in_array($sQuestionFields, ['attribute','reference'])){
|
||
$aChunkContent = $oHelperFunction->splitContent($sBefore);
|
||
}
|
||
$aArticle['reference'] = $sReference;
|
||
$aArticle['content'] = empty($aChunkContent[0]) ? '' : $aChunkContent[0];
|
||
if(count($aChunkContent) > 1){
|
||
$aArticle['content'] = $aChunkContent;
|
||
}
|
||
}
|
||
|
||
//内容处理
|
||
$aArticle['queue_fields'] = $sQuestionFields;
|
||
$aArticle['question_level'] = $sQuestionLevel;
|
||
if(count($aChunkContent) <= 1){
|
||
$aMessage = $oOpenAi->buildReviewPrompt($aArticle);
|
||
}else{
|
||
$aMessage = $oOpenAi->buildReviewPromptChunk($aArticle);
|
||
}
|
||
if(empty($aMessage)){
|
||
return json_encode(['status' => 5,'msg' => 'AI Q&A content not obtained']);
|
||
}
|
||
//请求OPENAI接口
|
||
$aParam = [
|
||
'messages' => $aMessage,
|
||
'model' => empty($aParam['api_model']) ? 'DeepSeek-V3' : $aParam['api_model'],
|
||
'article_id' => empty($aArticle['article_id']) ? 0 : $aArticle['article_id'],
|
||
'queue_fields' => $sQuestionFields,
|
||
'question_level' => $sQuestionLevel,
|
||
'journal_id' => empty($aArticle['journal_id']) ? 0 : $aArticle['journal_id']
|
||
];
|
||
if(count($aChunkContent) > 1){
|
||
return $oOpenAi->articleReviewDealChunk($aParam);
|
||
}else{
|
||
return $oOpenAi->articleReviewDeal($aParam);
|
||
}
|
||
}
|
||
/**
|
||
* @title 将文章内容拆分参考文献
|
||
* @param sContent 文章内容
|
||
*/
|
||
private function dealContent($sContent = '',$regex = null){
|
||
if (empty($sContent)) {
|
||
return ['before' => '', 'after' => ''];
|
||
}
|
||
|
||
// 1. 优化字符串长度计算
|
||
$contentLength = mb_strlen($sContent);
|
||
$searchStart = $contentLength > 5000 ? (int)($contentLength * 0.6) : 0;
|
||
|
||
// 2. 截取搜索区域
|
||
$searchContent = $searchStart > 0
|
||
? mb_substr($sContent, $searchStart, null, 'UTF-8')
|
||
: $sContent;
|
||
|
||
// 3. 正则模式优化 - 重点处理拼写错误和特殊格式
|
||
if ($regex === null) {
|
||
// 关键词列表增加容错处理
|
||
$keywords = [
|
||
'r?reference[s]?', // 允许关键词前多一个r(处理Rreferences这类拼写错误)
|
||
'bibliograph(?:y|ies)',
|
||
'works? cited',
|
||
'citation[s]?',
|
||
'literature cited',
|
||
'reference list'
|
||
];
|
||
|
||
// 正则模式优化:
|
||
// 1. 允许关键词与前面内容直接连接(解决"article.Rreferences"这种没有空格的情况)
|
||
// 2. 增强对冒号和方括号的支持(匹配"References:[1]"这种格式)
|
||
$pattern = sprintf(
|
||
'/(?:^|\s|[(\[{<"\']|[\p{P}])?\s*\#*[*_]*\s*(%s)\s*[*_]*\s*[:\-–=.]?\s*(?:$|\s|[)\]}>"\'.|[\d{[])*/i',
|
||
implode('|', $keywords)
|
||
);
|
||
$regex = $pattern;
|
||
}
|
||
|
||
// 4. 高效匹配 - 从字符串末尾开始搜索最后一个匹配项
|
||
// 使用带偏移量的循环匹配找到最后一个符合条件的关键词
|
||
$lastPos = 0;
|
||
$matchFound = false;
|
||
$offset = 0;
|
||
$searchLen = mb_strlen($searchContent);
|
||
|
||
// 从字符串末尾向前搜索,提高长文本效率
|
||
while ($offset < $searchLen && preg_match($regex, $searchContent, $match, PREG_OFFSET_CAPTURE, $offset)) {
|
||
$lastPos = $match[0][1];
|
||
$matchFound = true;
|
||
// 移动偏移量继续查找下一个匹配
|
||
$offset = $lastPos + mb_strlen($match[0][0]);
|
||
}
|
||
|
||
if ($matchFound) {
|
||
$refPosition = $searchStart + $lastPos;
|
||
|
||
$process = static function($str) {
|
||
return trim(str_replace("\n", '', $str));
|
||
};
|
||
|
||
return [
|
||
'before' => $process(mb_substr($sContent, 0, $refPosition, 'UTF-8')),
|
||
'after' => $process(mb_substr($sContent, $refPosition, null, 'UTF-8'))
|
||
];
|
||
}
|
||
|
||
// 未匹配时处理
|
||
return [
|
||
'before' => trim(str_replace("\n", '', $sContent)),
|
||
'after' => ''
|
||
];
|
||
// $lastPos = strrpos($sContent, 'Reference');
|
||
// if ($lastPos === false) {
|
||
// $lastPos = strrpos($sContent, 'REFERENCE');
|
||
// if($lastPos === false){
|
||
// // 未找到 "Reference",返回原字符串和空
|
||
// return ['before' => $sContent, 'after' => ''];
|
||
// }
|
||
// }
|
||
|
||
// // 拆分:关键词之前的内容
|
||
// $before = substr($sContent, 0, $lastPos);
|
||
// // 关键词及之后的内容(包含关键词本身)
|
||
// $after = substr($sContent, $lastPos);
|
||
// return ['before' => $before,'after' => $after];
|
||
}
|
||
|
||
/**
|
||
* @title 文章AI审核内容查询
|
||
* @param article_id 文章ID
|
||
*/
|
||
public function get($aParam = []){
|
||
|
||
//获取参数
|
||
$aParam = empty($aParam) ? $this->request->post() : $aParam;
|
||
if(empty($aParam['article_id'])){
|
||
return json_encode(array('status' => 2,'msg' => 'Please select an article' ));
|
||
}
|
||
|
||
//查询文章
|
||
$oArticle = new Article;
|
||
$aArticle = json_decode($oArticle->get($aParam),true);
|
||
$aArticle = empty($aArticle['data']) ? [] : $aArticle['data'];
|
||
if(empty($aArticle)){
|
||
return json_encode(array('status' => 3,'msg' => 'No articles requiring review were found' ));
|
||
}
|
||
//查询文章审核内容
|
||
$oAireview = new \app\common\Aireview;
|
||
$aAireview = $oAireview->get($aParam);
|
||
$aAireview = empty($aAireview['data']) ? [] : $aAireview['data'];
|
||
return json_encode(array('status' => 1,'msg' => 'Successfully obtained article review content','data' => $aAireview,'article_data' => $aArticle));
|
||
}
|
||
}
|