From 2e02979eee8ba0fcd7ad2743656372df7d967ba6 Mon Sep 17 00:00:00 2001 From: chengxl Date: Thu, 21 Aug 2025 10:34:42 +0800 Subject: [PATCH] =?UTF-8?q?=E8=B0=83=E6=95=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- application/api/controller/Aireview.php | 74 +++++++++++++++++++------ 1 file changed, 58 insertions(+), 16 deletions(-) diff --git a/application/api/controller/Aireview.php b/application/api/controller/Aireview.php index 537697b..cd500eb 100644 --- a/application/api/controller/Aireview.php +++ b/application/api/controller/Aireview.php @@ -129,7 +129,7 @@ class Aireview extends Base $aFile = empty($aFile['data']) ? [] : $aFile['data']; $aArticleMain = empty($aFile['mains']) ? [] : $aFile['mains']; } - $sContent = empty($aArticleMain) ? '' : implode("", array_unique($aArticleMain)); + $sContent = empty($aArticleMain) ? '' : implode("\n", array_unique($aArticleMain)); if(empty($sContent)){ return json_encode(array('status' => 4,'msg' => 'No article content found:'.$sQuestionFields)); } @@ -140,6 +140,7 @@ class Aireview extends Base $sContent = $oHelperFunction->filterAllTags($sContent); //将文章内容拆分参考文献 $aDealContent = $this->dealContent($sContent); + $sBefore= empty($aDealContent['before']) ? '' : $aDealContent['before']; $sReference = empty($aDealContent['after']) ? '' : $aDealContent['after']; if(in_array($sQuestionFields, ['attribute'])){//科学性和创新性 @@ -191,26 +192,67 @@ class Aireview extends Base * @title 将文章内容拆分参考文献 * @param sContent 文章内容 */ - private function dealContent($sContent = ''){ + private function dealContent($sContent = '',$regex = null){ if(empty($sContent)){ - return []; + return ['before' => '', 'after' => '']; } - //拆分字符串 - $lastPos = strrpos($sContent, 'Reference'); - if ($lastPos === false) { - $lastPos = strrpos($sContent, 'REFERENCE'); - if($lastPos === false){ - // 未找到 "Reference",返回原字符串和空 - return ['before' => $sContent, 'after' => '']; - } + // 1. 限制匹配范围(末尾30%) + $contentLength = strlen($sContent); + $searchStart = $contentLength > 5000 ? (int)($contentLength * 0.7) : 0; + $searchContent = substr($sContent, $searchStart); + + // 2. 正则模式优化 + if ($regex === null) { + $keywords = [ + 'references?', 'bibliograph(?:y|ies)', + 'works? cited', 'citation(?:s)?' + ]; + $pattern = sprintf( + '/(?:^|\s)\s*#*\s*(%s)\s*[:\-–]?\s*(?:$|\s)/i', + implode('|', $keywords) + ); + $regex = $pattern; } + + // 3. 匹配并处理结果 + if (preg_match_all($regex, $searchContent, $matches, PREG_OFFSET_CAPTURE)) { + $lastMatch = end($matches[0]); + $refPosition = $searchStart + $lastMatch[1]; + + // 4. 合并字符串处理 + $before = substr($sContent, 0, $refPosition); + $after = substr($sContent, $refPosition); + + // 一次性处理空白和换行 + $process = function($str) { + return str_replace("\n", '', trim($str)); + }; + return [ + 'before' => $process($before), + 'after' => $process($after) + ]; + } + + // 未匹配时处理 + return [ + 'before' => str_replace("\n", '', trim($sContent)), + 'after' => '' + ]; + // $lastPos = strrpos($sContent, 'Reference'); + // if ($lastPos === false) { + // $lastPos = strrpos($sContent, 'REFERENCE'); + // if($lastPos === false){ + // // 未找到 "Reference",返回原字符串和空 + // return ['before' => $sContent, 'after' => '']; + // } + // } - // 拆分:关键词之前的内容 - $before = substr($sContent, 0, $lastPos); - // 关键词及之后的内容(包含关键词本身) - $after = substr($sContent, $lastPos); - return ['before' => $before,'after' => $after]; + // // 拆分:关键词之前的内容 + // $before = substr($sContent, 0, $lastPos); + // // 关键词及之后的内容(包含关键词本身) + // $after = substr($sContent, $lastPos); + // return ['before' => $before,'after' => $after]; } /**