job修改
This commit is contained in:
@@ -140,7 +140,6 @@ class Aireview extends Base
|
|||||||
$sContent = $oHelperFunction->filterAllTags($sContent);
|
$sContent = $oHelperFunction->filterAllTags($sContent);
|
||||||
//将文章内容拆分参考文献
|
//将文章内容拆分参考文献
|
||||||
$aDealContent = $this->dealContent($sContent);
|
$aDealContent = $this->dealContent($sContent);
|
||||||
|
|
||||||
$sBefore= empty($aDealContent['before']) ? '' : $aDealContent['before'];
|
$sBefore= empty($aDealContent['before']) ? '' : $aDealContent['before'];
|
||||||
$sReference = empty($aDealContent['after']) ? '' : $aDealContent['after'];
|
$sReference = empty($aDealContent['after']) ? '' : $aDealContent['after'];
|
||||||
if(in_array($sQuestionFields, ['attribute'])){//科学性和创新性
|
if(in_array($sQuestionFields, ['attribute'])){//科学性和创新性
|
||||||
@@ -193,50 +192,72 @@ class Aireview extends Base
|
|||||||
* @param sContent 文章内容
|
* @param sContent 文章内容
|
||||||
*/
|
*/
|
||||||
private function dealContent($sContent = '',$regex = null){
|
private function dealContent($sContent = '',$regex = null){
|
||||||
if(empty($sContent)){
|
if (empty($sContent)) {
|
||||||
return ['before' => '', 'after' => ''];
|
return ['before' => '', 'after' => ''];
|
||||||
}
|
}
|
||||||
|
|
||||||
// 1. 限制匹配范围(末尾30%)
|
// 1. 优化字符串长度计算
|
||||||
$contentLength = strlen($sContent);
|
$contentLength = mb_strlen($sContent);
|
||||||
$searchStart = $contentLength > 5000 ? (int)($contentLength * 0.7) : 0;
|
$searchStart = $contentLength > 5000 ? (int)($contentLength * 0.6) : 0;
|
||||||
$searchContent = substr($sContent, $searchStart);
|
|
||||||
|
|
||||||
// 2. 正则模式优化
|
// 2. 截取搜索区域
|
||||||
|
$searchContent = $searchStart > 0
|
||||||
|
? mb_substr($sContent, $searchStart, null, 'UTF-8')
|
||||||
|
: $sContent;
|
||||||
|
|
||||||
|
// 3. 正则模式优化 - 重点处理拼写错误和特殊格式
|
||||||
if ($regex === null) {
|
if ($regex === null) {
|
||||||
|
// 关键词列表增加容错处理
|
||||||
$keywords = [
|
$keywords = [
|
||||||
'references?', 'bibliograph(?:y|ies)',
|
'r?reference[s]?', // 允许关键词前多一个r(处理Rreferences这类拼写错误)
|
||||||
'works? cited', 'citation(?:s)?'
|
'bibliograph(?:y|ies)',
|
||||||
|
'works? cited',
|
||||||
|
'citation[s]?',
|
||||||
|
'literature cited',
|
||||||
|
'reference list'
|
||||||
];
|
];
|
||||||
|
|
||||||
|
// 正则模式优化:
|
||||||
|
// 1. 允许关键词与前面内容直接连接(解决"article.Rreferences"这种没有空格的情况)
|
||||||
|
// 2. 增强对冒号和方括号的支持(匹配"References:[1]"这种格式)
|
||||||
$pattern = sprintf(
|
$pattern = sprintf(
|
||||||
'/(?:^|\s)\s*#*\s*(%s)\s*[:\-–]?\s*(?:$|\s)/i',
|
'/(?:^|\s|[(\[{<"\']|[\p{P}])?\s*\#*[*_]*\s*(%s)\s*[*_]*\s*[:\-–=.]?\s*(?:$|\s|[)\]}>"\'.|[\d{[])*/i',
|
||||||
implode('|', $keywords)
|
implode('|', $keywords)
|
||||||
);
|
);
|
||||||
$regex = $pattern;
|
$regex = $pattern;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 3. 匹配并处理结果
|
// 4. 高效匹配 - 从字符串末尾开始搜索最后一个匹配项
|
||||||
if (preg_match_all($regex, $searchContent, $matches, PREG_OFFSET_CAPTURE)) {
|
// 使用带偏移量的循环匹配找到最后一个符合条件的关键词
|
||||||
$lastMatch = end($matches[0]);
|
$lastPos = 0;
|
||||||
$refPosition = $searchStart + $lastMatch[1];
|
$matchFound = false;
|
||||||
|
$offset = 0;
|
||||||
|
$searchLen = mb_strlen($searchContent);
|
||||||
|
|
||||||
// 4. 合并字符串处理
|
// 从字符串末尾向前搜索,提高长文本效率
|
||||||
$before = substr($sContent, 0, $refPosition);
|
while ($offset < $searchLen && preg_match($regex, $searchContent, $match, PREG_OFFSET_CAPTURE, $offset)) {
|
||||||
$after = substr($sContent, $refPosition);
|
$lastPos = $match[0][1];
|
||||||
|
$matchFound = true;
|
||||||
|
// 移动偏移量继续查找下一个匹配
|
||||||
|
$offset = $lastPos + mb_strlen($match[0][0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($matchFound) {
|
||||||
|
$refPosition = $searchStart + $lastPos;
|
||||||
|
|
||||||
// 一次性处理空白和换行
|
$process = static function($str) {
|
||||||
$process = function($str) {
|
return trim(str_replace("\n", '', $str));
|
||||||
return str_replace("\n", '', trim($str));
|
|
||||||
};
|
};
|
||||||
|
|
||||||
return [
|
return [
|
||||||
'before' => $process($before),
|
'before' => $process(mb_substr($sContent, 0, $refPosition, 'UTF-8')),
|
||||||
'after' => $process($after)
|
'after' => $process(mb_substr($sContent, $refPosition, null, 'UTF-8'))
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
// 未匹配时处理
|
// 未匹配时处理
|
||||||
return [
|
return [
|
||||||
'before' => str_replace("\n", '', trim($sContent)),
|
'before' => trim(str_replace("\n", '', $sContent)),
|
||||||
'after' => ''
|
'after' => ''
|
||||||
];
|
];
|
||||||
// $lastPos = strrpos($sContent, 'Reference');
|
// $lastPos = strrpos($sContent, 'Reference');
|
||||||
|
|||||||
Reference in New Issue
Block a user