diff --git a/application/common/FigureTagProcessor.php b/application/common/FigureTagProcessor.php
index 7956f88..ee82f1a 100644
--- a/application/common/FigureTagProcessor.php
+++ b/application/common/FigureTagProcessor.php
@@ -1,5 +1,6 @@
2, 'data' => ''];
}
+
//超长文本保护
if (strlen($html) > self::MAX_HTML_LENGTH) {
return ['status' => 4, 'data' => $html];
}
+
//编码处理
if (!mb_check_encoding($html, 'UTF-8')) {
$html = mb_convert_encoding($html, 'UTF-8', 'GBK,GB2312,ASCII,ISO-8859-1');
}
+
//初始化映射数组(过滤非数字键值)
$this->initImageMap($aImageMain);
//原始内容
$originalHtml = $html;
$hasReplace = false;
+
try {
//只要包含数字+字母/数字后缀,直接返回原内容(核心修复)
if ($this->hasFigureSuffix($html)) {
return ['status' => 4, 'data' => $html];
}
+
//合并拆分标签的Figure+数字
$html = $this->preprocessSplitTags($html);
- //替换
+
+ //替换(核心修复:适配样式标签+后缀标点场景)
$html = $this->replaceFigureInHtml($html, $hasReplace);
+
//清理冗余样式/标签
if ($hasReplace) {
$html = $this->cleanRedundantStyles($html);
@@ -69,6 +78,7 @@ class FigureTagProcessor{
'data' => $html
];
}
+
/**
* 全局检测是否包含Figure数字+字母/数字后缀
* 覆盖所有拆分/嵌套/无标签场景,无论是否有空白
@@ -86,8 +96,11 @@ class FigureTagProcessor{
// 正则3:嵌套标签场景(4B / 4123)
$pattern3 = "/figure\s*(?:<(?:{$styleTagsPattern})[^>]*>)\s*\d+[a-zA-Z0-9]\s*(?:<\/(?:{$styleTagsPattern})>)/iu";
- return preg_match($pattern1, $html) || preg_match($pattern2, $html) || preg_match($pattern3, $html);
+
+ // 加@抑制正则警告
+ return @preg_match($pattern1, $html) || @preg_match($pattern2, $html) || @preg_match($pattern3, $html);
}
+
/**
* 初始化Figure数字映射数组
* @param array $aImageMain 原始映射数组
@@ -106,6 +119,7 @@ class FigureTagProcessor{
}
$this->aImageMain = $imageMap;
}
+
/**
* 合并所有拆分标签的Figure+数字(含空白样式标签)
* @param string $html 待处理HTML
@@ -116,19 +130,22 @@ class FigureTagProcessor{
// 正则1:匹配基础拆分标签的Figure+数字
$pattern = "/(figure)\s*(?:<\/(?:{$styleTagsPattern})>)\s*(?:<(?:{$styleTagsPattern})[^>]*>)\s*(\d+)/iu";
- $html = preg_replace_callback($pattern, function($matches) {
+ $html = @preg_replace_callback($pattern, function($matches) {
return $matches[1] . ' ' . $matches[2];
}, $html);
// 正则2:匹配多轮拆分标签的Figure+数字(含空白)
$pattern2 = "/(figure)(?:<\/(?:{$styleTagsPattern})>)\s*(?:<(?:{$styleTagsPattern})[^>]*>)\s*(\s*)\s*(?:<\/(?:{$styleTagsPattern})>)\s*(?:<(?:{$styleTagsPattern})[^>]*>)\s*(\d+)/iu";
- $html = preg_replace_callback($pattern2, function($matches) {
+ $html = @preg_replace_callback($pattern2, function($matches) {
return $matches[1] . $matches[2] . $matches[3];
}, $html);
+
return $html;
}
+
/**
* 核心替换逻辑:将纯数字Figure替换为myfigure标签
+ * 修复:适配样式标签包裹 + 后缀标点场景(如 Figure 2.)
* @param string $html 待处理HTML
* @param bool $hasReplace 是否发生替换(引用传递)
* @return string
@@ -138,13 +155,14 @@ class FigureTagProcessor{
$styleTagsRegex = "(?:<(?:{$styleTagsPattern})[^>]*>)*\s*";
$styleTagsCloseRegex = "\s*(?:<\/(?:{$styleTagsPattern})>)*";
- // 正则1:匹配括号内的纯数字Figure(如 (Figure 2)、(Figure 3))
- // $pattern1 = "/\(\s*{$styleTagsRegex}figure\s*(\d+)\s*([\.,:]{0,1}){$styleTagsCloseRegex}\s*(?![a-zA-Z0-9])/iu";
- $pattern1 = "/\(\s*{$styleTagsRegex}figure\s*(\d+)\s*([\.,:]{0,1}){$styleTagsCloseRegex}\s*(?![a-zA-Z0-9])/iuD";
- $html = preg_replace_callback($pattern1, function($matches) use (&$hasReplace) {
+ // 正则1:匹配括号内的纯数字Figure(如 (Figure 2)、(Figure 3)、(Figure 3).)
+ $pattern1 = "/\(\s*{$styleTagsRegex}figure\s*(\d+){$styleTagsCloseRegex}\s*([\.,:]{0,1})\s*\)\s*([\.,:]{0,1})/iuD";
+ $html = @preg_replace_callback($pattern1, function($matches) use (&$hasReplace) {
$num = $matches[1];
$numInt = (int)$num;
- $suffix = $matches[2] ?? '';
+ $suffix1 = $matches[2] ?? '';
+ $suffix2 = $matches[3] ?? '';
+ $suffix = $suffix1 . $suffix2;
// 过滤条件:非数字、无映射、已处理过的标签
if (!ctype_digit($num) || !isset($this->aImageMain[$numInt]) ||
@@ -161,10 +179,9 @@ class FigureTagProcessor{
return $target;
}, $html);
- // 正则2:匹配无括号的纯数字Figure(如 Figure 2、Figure 3:)
- // $pattern2 = "/{$styleTagsRegex}figure\s*(\d+)\s*([\.,:]{0,1}){$styleTagsCloseRegex}(?![a-zA-Z0-9])/iu";
- $pattern2 = "/{$styleTagsRegex}figure\s*(\d+)\s*([\.,:]{0,1}){$styleTagsCloseRegex}\s*(?![a-zA-Z0-9])/iuD";
- $html = preg_replace_callback($pattern2, function($matches) use (&$hasReplace) {
+ // 正则2:匹配无括号的纯数字Figure(核心修复:适配 Figure 2. 场景)
+ $pattern2 = "/{$styleTagsRegex}figure\s*(\d+){$styleTagsCloseRegex}\s*([\.,:]{0,1})(?![a-zA-Z0-9])/iuD";
+ $html = @preg_replace_callback($pattern2, function($matches) use (&$hasReplace) {
$num = $matches[1];
$numInt = (int)$num;
$suffix = $matches[2] ?? '';
@@ -186,6 +203,7 @@ class FigureTagProcessor{
return $html;
}
+
/**
* 检测当前匹配内容是否已包含myfigure标签(避免重复替换)
* @param string $content 匹配的文本片段
@@ -195,8 +213,9 @@ class FigureTagProcessor{
private function isMatchPositionHasMyFigureTag($content, $figureText){
$escapedText = preg_quote($figureText, '/');
$pattern = '/<' . self::PROCESSED_TAG . '[^>]*>\s*' . $escapedText . '\s*<\/' . self::PROCESSED_TAG . '>/is';
- return (bool)preg_match($pattern, $content);
+ return (bool)@preg_match($pattern, $content);
}
+
/**
* 清理myfigure标签周围的冗余样式标签
* @param string $html 待处理HTML
@@ -205,14 +224,15 @@ class FigureTagProcessor{
private function cleanRedundantStyles($html){
foreach (self::STYLE_TAGS as $tag) {
$pattern = '/<' . $tag . '>\s*<'.self::PROCESSED_TAG.'([^>]*?)>(.*?)<\/'.self::PROCESSED_TAG.'>([\.,:]{0,1})\s*<\/' . $tag . '>/is';
- $html = preg_replace($pattern, '<'.self::PROCESSED_TAG.'$1>$2'.self::PROCESSED_TAG.'>$3', $html);
+ $html = @preg_replace($pattern, '<'.self::PROCESSED_TAG.'$1>$2'.self::PROCESSED_TAG.'>$3', $html);
}
// 清理无匹配的闭合样式标签
- $html = preg_replace('/<\/('.implode('|', self::STYLE_TAGS).')>(?![^<]*<\1>)/is', '', $html);
+ $html = @preg_replace('/<\/('.implode('|', self::STYLE_TAGS).')>(?![^<]*<\1>)/is', '', $html);
return $html;
}
+
/**
* 清理myfigure标签周围的冗余标点
* @param string $html 待处理HTML
@@ -220,17 +240,43 @@ class FigureTagProcessor{
*/
private function cleanRedundantPunctuation($html){
// 修复括号+标点的冗余格式
- $html = preg_replace('/<'.self::PROCESSED_TAG.' data-id="(\d+)">\(Figure \d+\)<\/'.self::PROCESSED_TAG.'>\)\./i',
+ $html = @preg_replace('/<'.self::PROCESSED_TAG.' data-id="(\d+)">\(Figure \d+\)<\/'.self::PROCESSED_TAG.'>\)\./i',
'<'.self::PROCESSED_TAG.' data-id="$1">(Figure $1)'.self::PROCESSED_TAG.'>.', $html);
- $html = preg_replace('/<\/'.self::PROCESSED_TAG.'>\)\.([\.,:]{0,1})/', ''.self::PROCESSED_TAG.'>)$1', $html);
- $html = preg_replace('/<\/'.self::PROCESSED_TAG.'>\.\)([\.,:]{0,1})/', ''.self::PROCESSED_TAG.'>)$1', $html);
+ $html = @preg_replace('/<\/'.self::PROCESSED_TAG.'>\)\.([\.,:]{0,1})/', ''.self::PROCESSED_TAG.'>)$1', $html);
+ $html = @preg_replace('/<\/'.self::PROCESSED_TAG.'>\.\)([\.,:]{0,1})/', ''.self::PROCESSED_TAG.'>)$1', $html);
// 清理重复标点
- $html = preg_replace('/<\/'.self::PROCESSED_TAG.'>([\.,:]){2,}/', ''.self::PROCESSED_TAG.'>$1', $html);
+ $html = @preg_replace('/<\/'.self::PROCESSED_TAG.'>([\.,:]){2,}/', ''.self::PROCESSED_TAG.'>$1', $html);
// 修复括号内的标签冗余
- $html = preg_replace('/<'.self::PROCESSED_TAG.' data-id="(\d+)">\((Figure \d+)\s*<\/'.self::PROCESSED_TAG.'>([\.,:]{0,1})/i',
+ $html = @preg_replace('/<'.self::PROCESSED_TAG.' data-id="(\d+)">\((Figure \d+)\s*<\/'.self::PROCESSED_TAG.'>([\.,:]{0,1})/i',
'<'.self::PROCESSED_TAG.' data-id="$1">($2)'.self::PROCESSED_TAG.'>$3', $html);
+
+ $html = $this->cleanExtraParentheses($html);
+
+ return $html;
+ }
+
+ /**
+ * 清理文本中多余的成对括号(仅处理myfigure标签相关的括号)
+ * @param string $html 待处理文本
+ * @return string
+ */
+ private function cleanExtraParentheses($html){
+ // 匹配myfigure标签周围的括号区域
+ $pattern = '/(\()*(<'.self::PROCESSED_TAG.'[^>]*>.*?<\/'.self::PROCESSED_TAG.'>)(\))*/is';
+
+ $html = @preg_replace_callback($pattern, function($matches) {
+ $tagContent = $matches[2];
+ $leftParen = $matches[1] ?? '';
+ $rightParen = $matches[3] ?? '';
+
+ // 只保留1个左括号和1个右括号(无论原始有多少)
+ $newLeft = $leftParen ? '(' : '';
+ $newRight = $rightParen ? ')' : '';
+
+ return $newLeft . $tagContent . $newRight;
+ }, $html);
return $html;
}
@@ -242,11 +288,11 @@ class FigureTagProcessor{
private function cleanUnclosedTags($html){
foreach (self::STYLE_TAGS as $tag) {
// 清理myfigure标签后的冗余闭合标签
- $html = preg_replace('/(<\/'.self::PROCESSED_TAG.'>)\s*<\/' . $tag . '>/i', '$1', $html);
+ $html = @preg_replace('/(<\/'.self::PROCESSED_TAG.'>)\s*<\/' . $tag . '>/i', '$1', $html);
// 定位所有该标签的开闭标签位置
- preg_match_all("/<{$tag}\b[^>]*>/i", $html, $openMatches, PREG_OFFSET_CAPTURE);
- preg_match_all("/<\/{$tag}>/i", $html, $closeMatches, PREG_OFFSET_CAPTURE);
+ @preg_match_all("/<{$tag}\b[^>]*>/i", $html, $openMatches, PREG_OFFSET_CAPTURE);
+ @preg_match_all("/<\/{$tag}>/i", $html, $closeMatches, PREG_OFFSET_CAPTURE);
$allTags = [];
// 收集开标签
@@ -308,6 +354,7 @@ class FigureTagProcessor{
return $html;
}
+
/**
* 优化文本格式(清理多余空格)
* @param string $html 待处理HTML
@@ -315,14 +362,15 @@ class FigureTagProcessor{
*/
private function optimizeFormat($html){
// 清理连续空格
- $html = preg_replace('/\s{2,}/', ' ', trim($html));
+ $html = @preg_replace('/\s{2,}/', ' ', trim($html));
// 标签后紧跟字母/数字时加空格
- $html = preg_replace('/<\/'.self::PROCESSED_TAG.'>([A-Za-z0-9])/is', ''.self::PROCESSED_TAG.'> $1', $html);
+ $html = @preg_replace('/<\/'.self::PROCESSED_TAG.'>([A-Za-z0-9])/is', ''.self::PROCESSED_TAG.'> $1', $html);
// 字母/数字紧跟标签前时加空格
- $html = preg_replace('/([a-zA-Z0-9])<'.self::PROCESSED_TAG.'/is', '$1 <'.self::PROCESSED_TAG.'', $html);
+ $html = @preg_replace('/([a-zA-Z0-9])<'.self::PROCESSED_TAG.'/is', '$1 <'.self::PROCESSED_TAG.'', $html);
return $html;
}
+
/**
* 清理嵌套的myfigure标签(避免重复嵌套)
* @param string $html 待处理HTML
@@ -330,7 +378,7 @@ class FigureTagProcessor{
*/
private function cleanDuplicateNestedTags($html){
$pattern = '/<'.self::PROCESSED_TAG.'[^>]*>\s*<'.self::PROCESSED_TAG.'([^>]*)>(.*?)<\/'.self::PROCESSED_TAG.'>\s*<\/'.self::PROCESSED_TAG.'>/is';
- $html = preg_replace($pattern, '<'.self::PROCESSED_TAG.'$1>$2'.self::PROCESSED_TAG.'>', $html);
+ $html = @preg_replace($pattern, '<'.self::PROCESSED_TAG.'$1>$2'.self::PROCESSED_TAG.'>', $html);
return $html;
}