Files
tougao/application/api/controller/Aireview.php
2025-08-22 16:24:59 +08:00

305 lines
13 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<?php
namespace app\api\controller;
use app\api\controller\Base;
use think\Db;
use app\common\OpenAi;
use app\common\Article;
/**
* @title AI审核文章
* @description 对接OPENAI接口
*/
class Aireview extends Base
{
/**
* @title AI审核文章
* @param article_id 文章ID
* @param abstrart 摘要
* @param keywords 关键词
* @param model 接口模型
* @param stream 是否流式输出 true是false否
*/
public function review($aParam = []){
//获取参数
$aParam = empty($aParam) ? $this->request->post() : $aParam;
$iArticleId = empty($aParam['article_id']) ? '' : $aParam['article_id'];
if(empty($iArticleId)){
return json_encode(array('status' => 2,'msg' => 'Please select an article' ));
}
//问题等级
$sQuestionLevel = empty($aParam['question_level']) ? 'A' : $aParam['question_level'];
//需要处理的AI问题字段
$sQuestionFields = empty($aParam['queue_fields']) ? 'journal_scope' : $aParam['queue_fields'];
$aQuestionFields = explode(',', $sQuestionFields);
//查询文章及AI审稿
$aWhere = ['article_id' => $aParam['article_id']];
$aResult = json_decode($this->get($aWhere),true);
//文章信息
$aArticle = empty($aResult['article_data']) ? [] : $aResult['article_data'];
if(empty($aArticle)){
return json_encode(array('status' => 3,'msg' => 'No articles requiring review were found' ));
}
//AI审稿信息
$aAiReview = empty($aResult['data']) ? [] : $aResult['data'];
if(!empty($aAiReview['is_finish']) && $aAiReview['is_finish'] == 1){
return json_encode(array('status' => 1,'msg' => 'AI has been reviewed','data' => $aAiReview));
}
//实例化公共方法
$oHelperFunction = new \app\common\HelperFunction;
//根据期刊ID查询期刊信息
$aJournalIssn = [];
$sIssn = $sScope = '';
if(in_array('journal_scope', $aQuestionFields)){
$aWhere = ['journal_id' => $aArticle['journal_id'],'state' => 0];
$aJournal = Db::table('t_journal')->field('title as targeted_journals,scope as journal_scope,issn')->where($aWhere)->find();
if(empty($aJournal)){
return json_encode(array('status' => 4,'msg' => 'This article is not associated with a journal' ));
}
$aArticle += $aJournal;
$sIssn = empty($aJournal['issn']) ? '' : $aJournal['issn'];
$sScope = empty($aJournal['journal_scope']) ? '' : $aJournal['journal_scope'];
$aJournalIssn = array_merge($aJournalIssn,[$sIssn]);
}
if(in_array('other_journal', $aQuestionFields)){
$aWhere = ['journal_id' => ['<>',$aArticle['journal_id']],'state' => 0];
$aJournalOther = Db::table('t_journal')->field('title as targeted_journals,scope as journal_scope,issn')->where($aWhere)->select();
if(!empty($aJournalOther)){
$aJournalIssn = array_merge($aJournalIssn,array_column($aJournalOther, 'issn'));
}
}
//查询期刊范围
if(!empty($aJournalIssn)){
//查询期刊内容
$oArticle = new Article;
$aJournalPaperArt = json_decode($oArticle->getJournalPaperArt(['issn' => $aJournalIssn]),true);
$aJournalPaperArt = empty($aJournalPaperArt['data']) ? [] : $aJournalPaperArt['data'];
$aArticle['journal_scope'] = empty($aJournalPaperArt[$sIssn]) ? $sScope : $oHelperFunction->filterAllTags($aJournalPaperArt[$sIssn]);
if(!empty($aJournalPaperArt[$sIssn])){
unset($aJournalPaperArt[$sIssn]);
}
$aArticle['other_journal'] = empty($aJournalPaperArt) ? '' : json_encode($aJournalPaperArt,JSON_UNESCAPED_UNICODE);
}
//获取文章的领域
if($sQuestionFields == 'hotspot'){
$oArticle = new Article;
$aArticleField = $oArticle->getArticleField(['article_id' => $iArticleId]);
$aArticleField = empty($aArticleField['data']) ? [] : $aArticleField['data'];
if(empty($aArticleField)){
return json_encode(array('status' => 4,'msg' => 'Domain where article not found:'.$sQuestionFields));
}
//数据处理
$aArticleFieldData = [];
foreach ($aArticleField as $key => $value) {
$aArticleFieldData[] = getMajorStr($value['major_id']);
}
$aArticle['hotspot'] = json_encode($aArticleFieldData,JSON_UNESCAPED_UNICODE);
}
//获取提问AI话术
$oOpenAi = new OpenAi;
//获取文章内容
$aChunkContent = [];
if(!in_array($sQuestionFields, ['journal_scope','other_journal','hotspot','ai_ethics','ai_registration'])){
//实例化公共方法
$oArticle = new Article;
if($aArticle['state'] > 4 ){
//查询文章内容
$aWhere['type'] = 0;
$aWhere['content'] = ['<>',''];
$aWhere['state'] = 0;
$aArticleMain = Db::table('t_article_main')->where($aWhere)->column('content');
//查询参考文献
$aWhere = ['state' => ['in',[0,2]],'article_id' => $iArticleId];
$aProductionArticle = Db::name('production_article')->field('p_article_id')->where($aWhere)->find();
if(!empty($aProductionArticle)){
$aWhere = ['state' => 0,'p_article_id' => $aProductionArticle['p_article_id']];
$aRefer = Db::name('production_article_refer')->field('refer_content')->where($aWhere)->column('refer_content');
$aArticleMain = empty($aRefer) ? $aArticleMain : array_merge($aRefer,$aArticleMain);
}
}else{
$aFile = json_decode($oArticle->getFileContent(['article_id' => $iArticleId]),true);
$aFile = empty($aFile['data']) ? [] : $aFile['data'];
$aArticleMain = empty($aFile['mains']) ? [] : $aFile['mains'];
}
$sContent = empty($aArticleMain) ? '' : implode("\n", array_unique($aArticleMain));
if(empty($sContent)){
return json_encode(array('status' => 4,'msg' => 'No article content found:'.$sQuestionFields));
}
//处理内容
//过滤字符串
$sContent = $oHelperFunction->filterAllTags($sContent);
//将文章内容拆分参考文献
$aDealContent = $this->dealContent($sContent);
$sBefore= empty($aDealContent['before']) ? '' : $aDealContent['before'];
$sReference = empty($aDealContent['after']) ? '' : $aDealContent['after'];
if(in_array($sQuestionFields, ['attribute'])){//科学性和创新性
$aChunkContent = $oHelperFunction->splitContent($sContent);
}
if($sQuestionFields == 'reference'){//参考文献
if(empty($sReference)){
return json_encode(array('status' => 5,'msg' => 'Reference not matched successfully' ));
}
$aChunkContent = [$sReference];
}
if(!in_array($sQuestionFields, ['attribute','reference'])){
$aChunkContent = $oHelperFunction->splitContent($sBefore);
}
$aArticle['reference'] = $sReference;
$aArticle['content'] = empty($aChunkContent[0]) ? '' : $aChunkContent[0];
if(count($aChunkContent) > 1){
$aArticle['content'] = $aChunkContent;
}
}
//内容处理
$aArticle['queue_fields'] = $sQuestionFields;
$aArticle['question_level'] = $sQuestionLevel;
if(count($aChunkContent) <= 1){
$aMessage = $oOpenAi->buildReviewPrompt($aArticle);
}else{
$aMessage = $oOpenAi->buildReviewPromptChunk($aArticle);
}
if(empty($aMessage)){
return json_encode(['status' => 5,'msg' => 'AI Q&A content not obtained']);
}
//请求OPENAI接口
$aParam = [
'messages' => $aMessage,
'model' => empty($aParam['api_model']) ? 'DeepSeek-V3' : $aParam['api_model'],
'article_id' => empty($aArticle['article_id']) ? 0 : $aArticle['article_id'],
'queue_fields' => $sQuestionFields,
'question_level' => $sQuestionLevel,
'journal_id' => empty($aArticle['journal_id']) ? 0 : $aArticle['journal_id']
];
if(count($aChunkContent) > 1){
return $oOpenAi->articleReviewDealChunk($aParam);
}else{
return $oOpenAi->articleReviewDeal($aParam);
}
}
/**
* @title 将文章内容拆分参考文献
* @param sContent 文章内容
*/
private function dealContent($sContent = '',$regex = null){
if (empty($sContent)) {
return ['before' => '', 'after' => ''];
}
// 1. 优化字符串长度计算
$contentLength = mb_strlen($sContent);
$searchStart = $contentLength > 5000 ? (int)($contentLength * 0.6) : 0;
// 2. 截取搜索区域
$searchContent = $searchStart > 0
? mb_substr($sContent, $searchStart, null, 'UTF-8')
: $sContent;
// 3. 正则模式优化 - 重点处理拼写错误和特殊格式
if ($regex === null) {
// 关键词列表增加容错处理
$keywords = [
'r?reference[s]?', // 允许关键词前多一个r处理Rreferences这类拼写错误
'bibliograph(?:y|ies)',
'works? cited',
'citation[s]?',
'literature cited',
'reference list'
];
// 正则模式优化:
// 1. 允许关键词与前面内容直接连接(解决"article.Rreferences"这种没有空格的情况)
// 2. 增强对冒号和方括号的支持(匹配"References:[1]"这种格式)
$pattern = sprintf(
'/(?:^|\s|[(\[{<"\']|[\p{P}])?\s*\#*[*_]*\s*(%s)\s*[*_]*\s*[:\-=.]?\s*(?:$|\s|[)\]}>"\'.|[\d{[])*/i',
implode('|', $keywords)
);
$regex = $pattern;
}
// 4. 高效匹配 - 从字符串末尾开始搜索最后一个匹配项
// 使用带偏移量的循环匹配找到最后一个符合条件的关键词
$lastPos = 0;
$matchFound = false;
$offset = 0;
$searchLen = mb_strlen($searchContent);
// 从字符串末尾向前搜索,提高长文本效率
while ($offset < $searchLen && preg_match($regex, $searchContent, $match, PREG_OFFSET_CAPTURE, $offset)) {
$lastPos = $match[0][1];
$matchFound = true;
// 移动偏移量继续查找下一个匹配
$offset = $lastPos + mb_strlen($match[0][0]);
}
if ($matchFound) {
$refPosition = $searchStart + $lastPos;
$process = static function($str) {
return trim(str_replace("\n", '', $str));
};
return [
'before' => $process(mb_substr($sContent, 0, $refPosition, 'UTF-8')),
'after' => $process(mb_substr($sContent, $refPosition, null, 'UTF-8'))
];
}
// 未匹配时处理
return [
'before' => trim(str_replace("\n", '', $sContent)),
'after' => ''
];
// $lastPos = strrpos($sContent, 'Reference');
// if ($lastPos === false) {
// $lastPos = strrpos($sContent, 'REFERENCE');
// if($lastPos === false){
// // 未找到 "Reference",返回原字符串和空
// return ['before' => $sContent, 'after' => ''];
// }
// }
// // 拆分:关键词之前的内容
// $before = substr($sContent, 0, $lastPos);
// // 关键词及之后的内容(包含关键词本身)
// $after = substr($sContent, $lastPos);
// return ['before' => $before,'after' => $after];
}
/**
* @title 文章AI审核内容查询
* @param article_id 文章ID
*/
public function get($aParam = []){
//获取参数
$aParam = empty($aParam) ? $this->request->post() : $aParam;
if(empty($aParam['article_id'])){
return json_encode(array('status' => 2,'msg' => 'Please select an article' ));
}
//查询文章
$oArticle = new Article;
$aArticle = json_decode($oArticle->get($aParam),true);
$aArticle = empty($aArticle['data']) ? [] : $aArticle['data'];
if(empty($aArticle)){
return json_encode(array('status' => 3,'msg' => 'No articles requiring review were found' ));
}
//查询文章审核内容
$oAireview = new \app\common\Aireview;
$aAireview = $oAireview->get($aParam);
$aAireview = empty($aAireview['data']) ? [] : $aAireview['data'];
return json_encode(array('status' => 1,'msg' => 'Successfully obtained article review content','data' => $aAireview,'article_data' => $aArticle));
}
}