Merge remote-tracking branch 'origin/master'

2026-01-20 14:18:10 +08:00
parent 94f5208195 f77f722994
commit cd5bde87dc
2 changed files with 445 additions and 269 deletions
--- a/application/common/FigureTagProcessor.php
+++ b/application/common/FigureTagProcessor.php
@@ -1,52 +1,63 @@
 <?php
 namespace app\common;
-class FigureTagProcessor {
+/**
-    // 可配置的样式标签列表（解耦）
+ * 功能：精准匹配并替换Figure相关格式为myfigure标签
-    const STYLE_TAGS = ['i', 'b', 'font', 'strong', 'em','blue'];
+ * 支持格式：figure 数字、(figure 数字)、figure 数字:/figure 数字.（含嵌套/拆分标签）
-    // 最大处理字符串长度（避免内存溢出）
+ * 跳过已被myfigure包裹的Figure（含后缀）
-    const MAX_HTML_LENGTH = 100000;
+ * 跳过Figure 数字+字母/数字后缀（含拆分标签场景，无论是否有空白）
-
+ * 正常处理Figure 数字+空白/样式标签场景
 */
 class FigureTagProcessor{
    //支持的样式标签列表
    private const STYLE_TAGS = ['i', 'b', 'font', 'strong', 'em','blue'];
    //HTML文本最大处理长度（防止内存溢出）
    private const MAX_HTML_LENGTH = 100000;
    //替换后的目标标签名
    private const PROCESSED_TAG = 'myfigure';
    //Figure数字与对应ID的映射数组
    private $aImageMain = [];
    /**
-     * 处理Figure文本，替换为myfigure标签并清理冗余内容
+     * 处理Figure标签替换的主方法
     * @param string $html 待处理的HTML文本
-     * @return array ['status'=>状态码, 'data'=>处理后文本]
+     * @param array $aImageMain Figure数字=>ID的映射数组
-     *         status: 2-空输入, 4-无匹配, 5-处理异常, 1-处理成功
+     * @return array ['status' => 状态码, 'data' => 处理后文本]
     *         status说明：2-空文本, 4-无匹配/已处理, 1-处理成功, 5-处理异常
     */
-    public function dealFigureStr($html = '') {
+    public function dealFigureStr($html = '', $aImageMain = []){
-        //验证
+        //空文本校验
-        if (!is_string($html) || trim($html) === '') {
+        $html = trim($html);
        if ($html === '' || !is_string($html)) {
            return ['status' => 2, 'data' => ''];
        }
-        //超大字符串拦截
+        //超长文本保护
        if (strlen($html) > self::MAX_HTML_LENGTH) {
            return ['status' => 4, 'data' => $html];
        }
-
+        //编码处理
        if (!mb_check_encoding($html, 'UTF-8')) {
            $html = mb_convert_encoding($html, 'UTF-8', 'GBK,GB2312,ASCII,ISO-8859-1');
        }
        //初始化映射数组（过滤非数字键值）
        $this->initImageMap($aImageMain);
        //原始内容
        $originalHtml = $html;
        $hasReplace = false;
        try {
-            //合并嵌套样式标签
+            //只要包含数字+字母/数字后缀，直接返回原内容（核心修复）
-            $mergedHtml = $this->mergeFragmentStyleTags($html);
+            if ($this->hasFigureSuffix($html)) {
-            //提取纯文本（用于匹配Figure）
+                return ['status' => 4, 'data' => $html];
            $plainText = preg_replace('/<[^>]+>/', ' ', $mergedHtml);
            $plainText = preg_replace('/\s+/', ' ', trim($plainText));
            //提取所有匹配的Figure数字
            $allMatches = $this->extractAllFigureMatches($plainText);
            if (empty($allMatches)) {
                return ['status' => 4, 'data' => $originalHtml];
            }
-
+            //合并拆分标签的Figure+数字
-            //替换为myfigure标签
+            $html = $this->preprocessSplitTags($html);
-            $html = $this->replaceFigureWithTag($html, $allMatches, $hasReplace);
+            //替换
-
+            $html = $this->replaceFigureInHtml($html, $hasReplace);
-            //清理冗余内容（仅替换成功后执行）
+            //清理冗余样式/标签
            if ($hasReplace) {
                $html = $this->cleanRedundantStyles($html);
                $html = $this->cleanRedundantPunctuation($html);
                $html = $this->cleanUnclosedTags($html);
                $html = $this->optimizeFormat($html);
                $html = $this->cleanDuplicateNestedTags($html);
            }
        } catch (\Throwable $e) {
@@ -55,153 +66,190 @@ class FigureTagProcessor {
        return [
            'status' => $hasReplace ? 1 : 4,
-            'data' => $hasReplace ? $html : $originalHtml
+            'data' => $html
        ];
    }
    /**
-     * 合并嵌套的样式标签
+     * 全局检测是否包含Figure数字+字母/数字后缀
-     * @param string $html
+     * 覆盖所有拆分/嵌套/无标签场景，无论是否有空白
-     * @return string
+     * @param string $html 待检测HTML
     * @return bool
     */
-    private function mergeFragmentStyleTags($html) {
+    private function hasFigureSuffix($html){
-        foreach (self::STYLE_TAGS as $tag) {
+        $styleTagsPattern = implode('|', self::STYLE_TAGS);
-            $pattern = '/(?:<' . $tag . '>)\s*([^<]+?)\s*<\/' . $tag . '>(?:\s*<' . $tag . '>)\s*([^<]+?)\s*<\/' . $tag . '>/is';
+        
-            while (@preg_match($pattern, $html)) { // 抑制正则警告
+        // 正则1：无标签场景（Figure 4B/4123）
-                $html = preg_replace_callback($pattern, function($matches) {
+        $pattern1 = "/figure\s*\d+[a-zA-Z0-9]/iu";
-                    return trim($matches[1]) . ' ' . trim($matches[2]);
+        
-                }, $html);
+        // 正则2：拆分标签场景（<b>4</b><b>B</b> / <b>4</b> <b>B</b> / <b>4</b>&nbsp;<b>B</b>）
        $pattern2 = "/figure\s*(?:<\/(?:{$styleTagsPattern})>)\s*[\s|&nbsp;]*\s*(?:<(?:{$styleTagsPattern})[^>]*>)\s*(\d+)\s*(?:<\/(?:{$styleTagsPattern})>)\s*[\s|&nbsp;]*\s*(?:<(?:{$styleTagsPattern})[^>]*>)\s*([a-zA-Z0-9])/iu";
        // 正则3：嵌套标签场景（<b>4B</b> / <i>4123</i>）
        $pattern3 = "/figure\s*(?:<(?:{$styleTagsPattern})[^>]*>)\s*\d+[a-zA-Z0-9]\s*(?:<\/(?:{$styleTagsPattern})>)/iu";
        return preg_match($pattern1, $html) || preg_match($pattern2, $html) || preg_match($pattern3, $html);
    }
    /**
     * 初始化Figure数字映射数组
     * @param array $aImageMain 原始映射数组
     * @return void
     */
    private function initImageMap($aImageMain){
        if (!is_array($aImageMain)) {
            $aImageMain = [];
        }
        $imageMap = [];
        foreach ($aImageMain as $key => $value) {
            // 严格校验键值均为数字
            if (ctype_digit((string)$key) && ctype_digit((string)$value)) {
                $imageMap[(int)$key] = (int)$value;
            }
        }
        $this->aImageMain = $imageMap;
    }
    /**
     * 合并所有拆分标签的Figure+数字（含空白样式标签）
     * @param string $html 待处理HTML
     * @return string
     */
    private function preprocessSplitTags($html){
        $styleTagsPattern = implode('|', self::STYLE_TAGS);
-        // 清理括号内的冗余标点/标签
+        // 正则1：匹配基础拆分标签的Figure+数字
-        $html = preg_replace('/(\(.*?\d+)(?:\s*<[^>]+>)*\s*\.*\s*(?:<[^>]+>)*(\s*.*?\))/is', '$1$2', $html);
+        $pattern = "/(figure)\s*(?:<\/(?:{$styleTagsPattern})>)\s*(?:<(?:{$styleTagsPattern})[^>]*>)\s*(\d+)/iu";
-        $html = preg_replace('/\(\s+/', '(', $html);
+        $html = preg_replace_callback($pattern, function($matches) {
-        $html = preg_replace('/\s+\)/', ')', $html);
+            return $matches[1] . ' ' . $matches[2];
        }, $html);
        // 正则2：匹配多轮拆分标签的Figure+数字（含空白）
        $pattern2 = "/(figure)(?:<\/(?:{$styleTagsPattern})>)\s*(?:<(?:{$styleTagsPattern})[^>]*>)\s*(\s*)\s*(?:<\/(?:{$styleTagsPattern})>)\s*(?:<(?:{$styleTagsPattern})[^>]*>)\s*(\d+)/iu";
        $html = preg_replace_callback($pattern2, function($matches) {
            return $matches[1] . $matches[2] . $matches[3];
        }, $html);
        return $html;
    }
    /**
-     * 从纯文本中提取所有Figure数字（兼容括号/标点/空格）
+     * 核心替换逻辑：将纯数字Figure替换为myfigure标签
-     * @param string $plainText
+     * @param string $html 待处理HTML
-     * @return array
+     * @param bool $hasReplace 是否发生替换（引用传递）
     */
    private function extractAllFigureMatches($plainText) {
        $allMatches = [];
        $processedNums = [];
        // 匹配带括号的Figure（如 (Figure 1.)）
        $pattern1 = '/\(Figure\s*(\d+)\b(?!\p{L}|\s+\p{L})(?:\s*[\.,;:]*\s*)\)\s*([\.,:]{0,1})/iu';
        if (@preg_match_all($pattern1, $plainText, $matchesFull, PREG_SET_ORDER)) {
            foreach ($matchesFull as $match) {
                $num = $match[1];
                if (!ctype_digit($num) || in_array($num, $processedNums)) continue;
                $processedNums[] = $num;
                $allMatches[$num] = [
                    'hasOuterBracket' => true,
                    'validPunct' => $match[2] ?? '',
                    'content' => "Figure {$num}"
                ];
            }
        }
        // 匹配无括号的Figure（如 Figure 1.）
        $pattern2 = '/Figure\s*(\d+)\b(?!\p{L}|\s+\p{L})(?:\s*[\.,;:]*\s*)\s*([\.,:]{0,1})/iu';
        if (@preg_match_all($pattern2, $plainText, $matchesOther, PREG_SET_ORDER)) {
            foreach ($matchesOther as $match) {
                $num = $match[1];
                if (!ctype_digit($num) || in_array($num, $processedNums)) continue;
                $processedNums[] = $num;
                $allMatches[$num] = [
                    'hasOuterBracket' => false,
                    'validPunct' => $match[2] ?? '',
                    'content' => "Figure {$num}"
                ];
            }
        }
        krsort($allMatches);
        return $allMatches;
    }
    /**
     * 将匹配的Figure替换为myfigure标签（优化标签格式）
     * @param string $html
     * @param array $allMatches
     * @param bool $hasReplace
     * @return string
     */
-    private function replaceFigureWithTag($html, $allMatches, &$hasReplace) {
+    private function replaceFigureInHtml($html, &$hasReplace){
-        foreach ($allMatches as $num => $info) {
+        $styleTagsPattern = implode('|', self::STYLE_TAGS);
-            $innerContent = $info['hasOuterBracket'] 
+        $styleTagsRegex = "(?:<(?:{$styleTagsPattern})[^>]*>)*\s*";
-                ? "({$info['content']})" 
+        $styleTagsCloseRegex = "\s*(?:<\/(?:{$styleTagsPattern})>)*";
                : $info['content'];
-            //<myfigure data-id="1">Figure 1</myfigure>
+        // 正则1：匹配括号内的纯数字Figure（如 (Figure 2)、(<b>Figure 3</b>)）
-            $targetTag = "<myfigure data-id=\"{$num}\">{$innerContent}</myfigure>";
+        // $pattern1 = "/\(\s*{$styleTagsRegex}figure\s*(\d+)\s*([\.,:]{0,1}){$styleTagsCloseRegex}\s*(?![a-zA-Z0-9])/iu";
-            if (!empty($info['validPunct']) && !$info['hasOuterBracket']) {
+        $pattern1 = "/\(\s*{$styleTagsRegex}figure\s*(\d+)\s*([\.,:]{0,1}){$styleTagsCloseRegex}\s*(?![a-zA-Z0-9])/iuD";
-                $targetTag .= $info['validPunct'];
+        $html = preg_replace_callback($pattern1, function($matches) use (&$hasReplace) {
            $num = $matches[1];
            $numInt = (int)$num;
            $suffix = $matches[2] ?? '';
            // 过滤条件：非数字、无映射、已处理过的标签
            if (!ctype_digit($num) || !isset($this->aImageMain[$numInt]) || 
                $this->isMatchPositionHasMyFigureTag($matches[0], "Figure {$num}")) {
                return $matches[0];
            }
-            $patternSuffix = '(?!\p{L}|\s+\p{L})';
+            // 执行替换
-            $pattern = $info['hasOuterBracket']
+            $primaryId = $this->aImageMain[$numInt];
-                ? '/\(\s*(?:<[^>]+>|\s)*Figure(?:<[^>]+>|\s)*' . $num . '\b' . $patternSuffix . '(?:\s*[\.,;:]*\s*|\s*<[^>]+>)*\s*\)/iu'
+            $baseTag = "<" . self::PROCESSED_TAG . " data-id=\"{$primaryId}\">Figure {$num}</" . self::PROCESSED_TAG . ">";
-                : '/\s*(?:<[^>]+>|\s)*Figure(?:<[^>]+>|\s)*' . $num . '\b' . $patternSuffix . '(?:\s*[\.,;:]*\s*|\s*<[^>]+>)*\s*([\.,:]{0,1})/iu';
+            $target = "({$baseTag}{$suffix})";
-            //执行替换（最多替换1次，避免重复）
+            $hasReplace = true;
-            $html = @preg_replace($pattern, $targetTag, $html, 1, $count);
+            return $target;
-            if ($count > 0) {
+        }, $html);
-                $hasReplace = true;
+
-                error_log("[FigureTagProcessor] 替换成功 - ID:{$num} 括号:".($info['hasOuterBracket']?'是':'否'));
+        // 正则2：匹配无括号的纯数字Figure（如 Figure 2、<i>Figure 3</i>:）
        // $pattern2 = "/{$styleTagsRegex}figure\s*(\d+)\s*([\.,:]{0,1}){$styleTagsCloseRegex}(?![a-zA-Z0-9])/iu";
        $pattern2 = "/{$styleTagsRegex}figure\s*(\d+)\s*([\.,:]{0,1}){$styleTagsCloseRegex}\s*(?![a-zA-Z0-9])/iuD";
        $html = preg_replace_callback($pattern2, function($matches) use (&$hasReplace) {
            $num = $matches[1];
            $numInt = (int)$num;
            $suffix = $matches[2] ?? '';
            // 过滤条件：非数字、无映射、已处理过的标签
            if (!ctype_digit($num) || !isset($this->aImageMain[$numInt]) || 
                $this->isMatchPositionHasMyFigureTag($matches[0], "Figure {$num}")) {
                return $matches[0];
            }
-        }
+
            // 执行替换
            $primaryId = $this->aImageMain[$numInt];
            $baseTag = "<" . self::PROCESSED_TAG . " data-id=\"{$primaryId}\">Figure {$num}</" . self::PROCESSED_TAG . ">";
            $target = "{$baseTag}{$suffix}";
            $hasReplace = true;
            return $target;
        }, $html);
        return $html;
    }
    /**
-     * 清理myfigure标签周围的冗余样式标签（适配新标签格式）
+     * 检测当前匹配内容是否已包含myfigure标签（避免重复替换）
-     * @param string $html
+     * @param string $content 匹配的文本片段
     * @param string $figureText 待检测的Figure文本（如 Figure 2）
     * @return bool
     */
    private function isMatchPositionHasMyFigureTag($content, $figureText){
        $escapedText = preg_quote($figureText, '/');
        $pattern = '/<' . self::PROCESSED_TAG . '[^>]*>\s*' . $escapedText . '\s*<\/' . self::PROCESSED_TAG . '>/is';
        return (bool)preg_match($pattern, $content);
    }
    /**
     * 清理myfigure标签周围的冗余样式标签
     * @param string $html 待处理HTML
     * @return string
     */
-    private function cleanRedundantStyles($html) {
+    private function cleanRedundantStyles($html){
        foreach (self::STYLE_TAGS as $tag) {
-            $pattern = '/<' . $tag . '>\s*<myfigure([^>]*)>(.*?)<\/myfigure>([\.,:]{0,1})\s*<\/' . $tag . '>/is';
+            $pattern = '/<' . $tag . '>\s*<'.self::PROCESSED_TAG.'([^>]*?)>(.*?)<\/'.self::PROCESSED_TAG.'>([\.,:]{0,1})\s*<\/' . $tag . '>/is';
-            $html = @preg_replace($pattern, '<myfigure$1>$2</myfigure>$3', $html);
+            $html = preg_replace($pattern, '<'.self::PROCESSED_TAG.'$1>$2</'.self::PROCESSED_TAG.'>$3', $html);
        }
-        //清理闭标签
+        
        // 清理无匹配的闭合样式标签
        $html = preg_replace('/<\/('.implode('|', self::STYLE_TAGS).')>(?![^<]*<\1>)/is', '', $html);
        return $html;
    }
    /**
-     * 清理myfigure标签后的冗余标点（适配新标签格式）
+     * 清理myfigure标签周围的冗余标点
-     * @param string $html
+     * @param string $html 待处理HTML
     * @return string
     */
-    private function cleanRedundantPunctuation($html) {
+    private function cleanRedundantPunctuation($html){
-        $html = preg_replace('/<myfigure data-id="(\d+)">\(Figure \d+\)<\/myfigure>\)\./i', '<myfigure data-id="$1">(Figure $1)</myfigure>.', $html);
+        // 修复括号+标点的冗余格式
-        $html = preg_replace('/<\/myfigure>\)\.([\.,:]{0,1})/', '</myfigure>)$1', $html);
+        $html = preg_replace('/<'.self::PROCESSED_TAG.' data-id="(\d+)">\(Figure \d+\)<\/'.self::PROCESSED_TAG.'>\)\./i', 
-        $html = preg_replace('/<\/myfigure>\.\)([\.,:]{0,1})/', '</myfigure>)$1', $html);
+            '<'.self::PROCESSED_TAG.' data-id="$1">(Figure $1)</'.self::PROCESSED_TAG.'>.', $html);
-        $html = preg_replace('/<\/myfigure>([\.,:]){2,}/', '</myfigure>$1', $html);
+        $html = preg_replace('/<\/'.self::PROCESSED_TAG.'>\)\.([\.,:]{0,1})/', '</'.self::PROCESSED_TAG.'>)$1', $html);
-        $html = preg_replace('/<myfigure data-id="(\d+)">\((Figure \d+)\s*<\/myfigure>([\.,:]{0,1})/i', 
+        $html = preg_replace('/<\/'.self::PROCESSED_TAG.'>\.\)([\.,:]{0,1})/', '</'.self::PROCESSED_TAG.'>)$1', $html);
-            '<myfigure data-id="$1">($2)</myfigure>$3', $html);
+        
        // 清理重复标点
        $html = preg_replace('/<\/'.self::PROCESSED_TAG.'>([\.,:]){2,}/', '</'.self::PROCESSED_TAG.'>$1', $html);
        // 修复括号内的标签冗余
        $html = preg_replace('/<'.self::PROCESSED_TAG.' data-id="(\d+)">\((Figure \d+)\s*<\/'.self::PROCESSED_TAG.'>([\.,:]{0,1})/i', 
            '<'.self::PROCESSED_TAG.' data-id="$1">($2)</'.self::PROCESSED_TAG.'>$3', $html);
        return $html;
    }
    /**
-     * 清理孤立的样式标签
+     * 清理未闭合的样式标签
-     * @param string $html
+     * @param string $html 待处理HTML
     * @return string
     */
-    private function cleanUnclosedTags($html) {
+    private function cleanUnclosedTags($html){
        foreach (self::STYLE_TAGS as $tag) {
-            $html = @preg_replace('/(<\/myfigure>)\s*<\/' . $tag . '>/i', '$1', $html);
+            // 清理myfigure标签后的冗余闭合标签
-        }
+            $html = preg_replace('/(<\/'.self::PROCESSED_TAG.'>)\s*<\/' . $tag . '>/i', '$1', $html);
-        foreach (self::STYLE_TAGS as $tag) {
+
-            @preg_match_all("/<{$tag}\b[^>]*>/i", $html, $openMatches, PREG_OFFSET_CAPTURE);
+            // 定位所有该标签的开闭标签位置
-            @preg_match_all("/<\/{$tag}>/i", $html, $closeMatches, PREG_OFFSET_CAPTURE);
+            preg_match_all("/<{$tag}\b[^>]*>/i", $html, $openMatches, PREG_OFFSET_CAPTURE);
            preg_match_all("/<\/{$tag}>/i", $html, $closeMatches, PREG_OFFSET_CAPTURE);
            $allTags = [];
            // 收集开标签
            foreach ($openMatches[0] as $m) {
                $allTags[] = [
                    'offset' => $m[1],
@@ -210,6 +258,7 @@ class FigureTagProcessor {
                    'length' => strlen($m[0])
                ];
            }
            // 收集闭标签
            foreach ($closeMatches[0] as $m) {
                $allTags[] = [
                    'offset' => $m[1],
@@ -218,57 +267,71 @@ class FigureTagProcessor {
                    'length' => strlen($m[0])
                ];
            }
            // 按位置排序
            usort($allTags, function($a, $b) {
                return $a['offset'] - $b['offset'];
            });
            // 栈结构匹配开闭标签
            $tagStack = [];
            $removeOffsets = [];
            foreach ($allTags as $t) {
-                if ($t['type'] == 'open') {
+                if ($t['type'] === 'open') {
                    array_push($tagStack, $t);
                } else {
                    if (!empty($tagStack)) {
                        array_pop($tagStack);
                    } else {
-                        $removeOffsets[] = [
+                        // 无匹配开标签的闭标签，标记删除
-                            'pos' => $t['offset'],
+                        $removeOffsets[] = $t;
                            'len' => $t['length'],
                            'content' => $t['content']
                        ];
                    }
                }
            }
            // 无匹配闭标签的开标签，标记删除
            foreach ($tagStack as $t) {
-                $removeOffsets[] = [
+                $removeOffsets[] = $t;
                    'pos' => $t['offset'],
                    'len' => $t['length'],
                    'content' => $t['content']
                ];
            }
-            // 倒序删除，避免偏移错乱
+            // 按偏移量倒序删除（避免影响后续偏移）
            usort($removeOffsets, function($a, $b) {
-                return $b['pos'] - $a['pos'];
+                return $b['offset'] - $a['offset'];
            });
            foreach ($removeOffsets as $item) {
-                if ($item['pos'] >= 0 && $item['pos'] < strlen($html)) {
+                if ($item['offset'] >= 0 && $item['offset'] < strlen($html)) {
-                    $html = substr_replace($html, '', $item['pos'], $item['len']);
+                    $html = substr_replace($html, '', $item['offset'], $item['length']);
                }
            }
        }
        return $html;
    }
    /**
-     * 优化文本格式（合并多余空格，规范myfigure标签前后空格）
+     * 优化文本格式（清理多余空格）
-     * @param string $html
+     * @param string $html 待处理HTML
     * @return string
     */
-    private function optimizeFormat($html) {
+    private function optimizeFormat($html){
        // 清理连续空格
        $html = preg_replace('/\s{2,}/', ' ', trim($html));
-        $html = preg_replace('/<\/myfigure>([A-Za-z0-9])/is', '</myfigure> $1', $html);
+        // 标签后紧跟字母/数字时加空格
-        $html = preg_replace('/([a-zA-Z0-9])<myfigure/is', '$1 <myfigure', $html);
+        $html = preg_replace('/<\/'.self::PROCESSED_TAG.'>([A-Za-z0-9])/is', '</'.self::PROCESSED_TAG.'> $1', $html);
        // 字母/数字紧跟标签前时加空格
        $html = preg_replace('/([a-zA-Z0-9])<'.self::PROCESSED_TAG.'/is', '$1 <'.self::PROCESSED_TAG.'', $html);
        return $html;
    }
    /**
     * 清理嵌套的myfigure标签（避免重复嵌套）
     * @param string $html 待处理HTML
     * @return string
     */
    private function cleanDuplicateNestedTags($html){
        $pattern = '/<'.self::PROCESSED_TAG.'[^>]*>\s*<'.self::PROCESSED_TAG.'([^>]*)>(.*?)<\/'.self::PROCESSED_TAG.'>\s*<\/'.self::PROCESSED_TAG.'>/is';
        $html = preg_replace($pattern, '<'.self::PROCESSED_TAG.'$1>$2</'.self::PROCESSED_TAG.'>', $html);
        return $html;
    }
 }
--- a/application/common/TableTagProcessor.php
+++ b/application/common/TableTagProcessor.php
@@ -2,59 +2,71 @@
 namespace app\common;
 /**
 * Table标签处理器（生产环境终极版）
 * 功能：精准匹配并替换Table相关格式为mytable标签
- * 支持格式：table 数字、(table 数字)、table 数字:/table 数字.（含嵌套标签）
+ * 支持格式：table 数字、(table 数字)、table 数字:/table 数字.（含嵌套/拆分标签）
- * 特性：支持任意嵌套标签/括号、不处理数字+字母、仅跳过已被mytable包裹的Table
+ * 跳过已被mytable包裹的table（含后缀）
 * 跳过table 数字+字母/数字后缀（含拆分标签场景，无论是否有空白）
 * 正常处理table 数字+空白/样式标签场景
 */
-class TableTagProcessor {
+class TableTagProcessor{
-    // 可配置的样式标签列表
+    // 支持的样式标签列表
-    const STYLE_TAGS = ['i', 'b', 'font', 'strong', 'em'];
+    private const STYLE_TAGS = ['i', 'b', 'font', 'strong', 'em','blue'];
-    // 最大处理字符串长度
+    // HTML文本最大处理长度（防止内存溢出）
-    const MAX_HTML_LENGTH = 100000;
+    private const MAX_HTML_LENGTH = 100000;
-    // 目标替换标签
+    // 替换后的目标标签名
-    const PROCESSED_TAG = 'mytable';
+    private const PROCESSED_TAG = 'mytable';
-    // 数据库表格ID映射
+    // Table数字与对应ID的映射数组
    private $aTableMain = [];
    /**
-     * 处理Table文本，替换为mytable标签并清理冗余内容
+     * 处理Table标签替换的主方法
     * @param string $html 待处理的HTML文本
-     * @param array $aTableMain Table数字→主键ID的映射数组（如 [1=>1001, 2=>1002]）
+     * @param array $aTableMain Table数字=>ID的映射数组（可选，默认1=>1~10=>10）
-     * @return array ['status'=>状态码, 'data'=>处理后文本]
+     * @return array ['status' => 状态码, 'data' => 处理后文本]
-     *         status: 2-空输入, 4-无匹配/已处理, 5-处理异常, 1-处理成功
+     *         status说明：2-空文本, 4-无匹配/已处理, 1-处理成功, 5-处理异常
     */
-    public function dealTableStr($html = '', $aTableMain = []) {
+    public function dealTableStr($html = '', $aTableMain = []){
-        //验证
+        // 初始化默认映射数组（仅当入参为空时使用）
-        if (!is_string($html) || trim($html) === '') {
+        $defaultTableMap = [1=>1,2=>2,3=>3,4=>4,5=>5,6=>6,7=>7,8=>8,9=>9,10=>10];
        // 优先使用入参，入参为空则用默认值
        $tableMap = !empty($aTableMain) ? $aTableMain : $defaultTableMap;
        // 空文本校验
        $html = trim($html);
        if ($html === '' || !is_string($html)) {
            return ['status' => 2, 'data' => ''];
        }
-        //超大字符串拦截（防止内存溢出）
+
        // 超长文本保护
        if (strlen($html) > self::MAX_HTML_LENGTH) {
            return ['status' => 4, 'data' => $html];
        }
-        //初始化主键映射数组
+        // 编码处理（统一转为UTF-8，避免中文乱码）
-        if(!empty($aTableMain)){
+        if (!mb_check_encoding($html, 'UTF-8')) {
-            $aTableMainNew = [];
+            $html = mb_convert_encoding($html, 'UTF-8', 'GBK,GB2312,ASCII,ISO-8859-1');
            foreach ($aTableMain as $key => $value) {
                if (!ctype_digit((string)$key) || !ctype_digit((string)$value)) {
                    continue;
                }
                $keyInt = (int)$key;
                $aTableMainNew[$keyInt + 1] = $value;
            }
            $this->aTableMain = $aTableMainNew;
        }
        // 初始化映射数组（过滤非数字键值）
        $this->initTableMap($tableMap);
        // 原始内容（异常时返回）
        $originalHtml = $html;
        $hasReplace = false;
        try {
-            //原始HTML中匹配所有符合规则的Table
+            // 只要包含数字+字母/数字后缀，直接返回原内容
            if ($this->hasTableSuffix($html)) {
                return ['status' => 4, 'data' => $html];
            }
            // 合并拆分标签的Table+数字
            $html = $this->preprocessSplitTags($html);
            // 核心替换逻辑
            $html = $this->replaceTableInHtml($html, $hasReplace);
-            // 清理冗余内容
+            // 清理冗余样式/标签（仅当发生替换时执行）
            if ($hasReplace) {
                $html = $this->cleanRedundantStyles($html);
                $html = $this->cleanRedundantPunctuation($html);
@@ -74,50 +86,120 @@ class TableTagProcessor {
    }
    /**
-     * 核心方法：直接在HTML中匹配并替换Table
+     * 全局检测是否包含Table数字+字母/数字后缀
     * 覆盖所有拆分/嵌套/无标签场景，无论是否有空白
     * @param string $html 待检测HTML
     * @return bool
     */
    private function hasTableSuffix($html){
        $styleTagsPattern = implode('|', self::STYLE_TAGS);
        // 正则1：无标签场景（Table 4B/4123）
        $pattern1 = "/table\s*\d+[a-zA-Z0-9]/iu";
        // 正则2：拆分标签场景（<b>4</b><b>B</b> / <b>4</b> <b>B</b> / <b>4</b>&nbsp;<b>B</b>）
        $pattern2 = "/table\s*(?:<\/(?:{$styleTagsPattern})>)\s*[\s|&nbsp;]*\s*(?:<(?:{$styleTagsPattern})[^>]*>)\s*(\d+)\s*(?:<\/(?:{$styleTagsPattern})>)\s*[\s|&nbsp;]*\s*(?:<(?:{$styleTagsPattern})[^>]*>)\s*([a-zA-Z0-9])/iu";
        // 正则3：嵌套标签场景（<b>4B</b> / <i>4123</i>）
        $pattern3 = "/table\s*(?:<(?:{$styleTagsPattern})[^>]*>)\s*\d+[a-zA-Z0-9]\s*(?:<\/(?:{$styleTagsPattern})>)/iu";
        // 加@抑制正则警告，避免极端文本导致报错
        return @preg_match($pattern1, $html) || @preg_match($pattern2, $html) || @preg_match($pattern3, $html);
    }
    /**
     * 初始化Table数字映射数组（过滤非数字键值）
     * @param array $aTableMain 原始映射数组
     * @return void
     */
    private function initTableMap($aTableMain = []){
        if (!is_array($aTableMain)) {
            $aTableMain = [];
        }
        $tableMap = [];
        foreach ($aTableMain as $key => $value) {
            // 严格校验键值均为数字
            if (ctype_digit((string)$key) && ctype_digit((string)$value)) {
                $tableMap[(int)$key] = (int)$value;
            }
        }
        $this->aTableMain = $tableMap;
    }
    /**
     * 合并所有拆分标签的Table+数字（含空白样式标签）
     * @param string $html 待处理HTML
     * @return string
     */
-    private function replaceTableInHtml($html, &$hasReplace) {
+    private function preprocessSplitTags($html){
        $styleTagsPattern = implode('|', self::STYLE_TAGS);
-        $styleTagsRegex = "(?:<(?:{$styleTagsPattern})[^>]*>)*\s*"; // 匹配任意嵌套样式标签
+        
        // 正则1：匹配基础拆分标签的Table+数字
        $pattern = "/(table)\s*(?:<\/(?:{$styleTagsPattern})>)\s*(?:<(?:{$styleTagsPattern})[^>]*>)\s*(\d+)/iu";
        $html = @preg_replace_callback($pattern, function($matches) {
            return $matches[1] . ' ' . $matches[2];
        }, $html);
        // 正则2：匹配多轮拆分标签的Table+数字（含空白）
        $pattern2 = "/(table)(?:<\/(?:{$styleTagsPattern})>)\s*(?:<(?:{$styleTagsPattern})[^>]*>)\s*(\s*)\s*(?:<\/(?:{$styleTagsPattern})>)\s*(?:<(?:{$styleTagsPattern})[^>]*>)\s*(\d+)/iu";
        $html = @preg_replace_callback($pattern2, function($matches) {
            return $matches[1] . $matches[2] . $matches[3];
        }, $html);
        return $html;
    }
    /**
     * 核心替换逻辑：将纯数字Table替换为mytable标签
     * @param string $html 待处理HTML
     * @param bool $hasReplace 是否发生替换（引用传递）
     * @return string
     */
    private function replaceTableInHtml($html, &$hasReplace){
        $styleTagsPattern = implode('|', self::STYLE_TAGS);
        $styleTagsRegex = "(?:<(?:{$styleTagsPattern})[^>]*>)*\s*";
        $styleTagsCloseRegex = "\s*(?:<\/(?:{$styleTagsPattern})>)*";
-        // 规则1：匹配带括号的Table（如 (Table 82)、(<b>Table 1.</b>)）
+        // 正则1：匹配括号内的纯数字Table（如 (Table 2)、(<b>Table 3</b>)）
-        $pattern1 = "/\(\s*{$styleTagsRegex}table\s*(\d+)\s*([\.,:]{0,1}){$styleTagsCloseRegex}\s*\)/iu";
+        $pattern1 = "/\(\s*{$styleTagsRegex}table\s*(\d+)\s*([\.,:]{0,1}){$styleTagsCloseRegex}\s*(?![a-zA-Z0-9])/iuD";
-        $html = preg_replace_callback($pattern1, function($matches) use (&$hasReplace) {
+        $html = @preg_replace_callback($pattern1, function($matches) use (&$hasReplace) {
            $num = $matches[1];
-            $numInt = intval($num);
+            $numInt = (int)$num;
            $suffix = $matches[2] ?? '';
-            // 校验：纯数字 + 有映射ID + 未被mytable包裹（避免重复替换）
+            // 过滤条件：非数字、无映射、已处理过的标签
            if (!ctype_digit($num) || !isset($this->aTableMain[$numInt]) || 
                $this->isMatchPositionHasMyTableTag($matches[0], "Table {$num}")) {
                return $matches[0];
            }
            // 执行替换
            $primaryId = $this->aTableMain[$numInt];
-            $baseTag = "<".self::PROCESSED_TAG." data-id=\"{$primaryId}\">Table {$num}</".self::PROCESSED_TAG.">";
+            $baseTag = "<" . self::PROCESSED_TAG . " data-id=\"{$primaryId}\">Table {$num}</" . self::PROCESSED_TAG . ">";
            $target = "({$baseTag}{$suffix})";
            $hasReplace = true;
            return $target;
        }, $html);
-        // 规则2：匹配无括号的Table（如 Table 1、<b>Table 2:</b>、<i>Table 3.</i>）
+        // 正则2：匹配无括号的纯数字Table（如 Table 2、<i>Table 3</i>:）
-        $pattern2 = "/{$styleTagsRegex}table\s*(\d+)\s*([\.,:]{0,1}){$styleTagsCloseRegex}(?![a-zA-Z])/iu";
+        $pattern2 = "/{$styleTagsRegex}table\s*(\d+)\s*([\.,:]{0,1}){$styleTagsCloseRegex}\s*(?![a-zA-Z0-9])/iuD";
-        $html = preg_replace_callback($pattern2, function($matches) use (&$hasReplace) {
+        $html = @preg_replace_callback($pattern2, function($matches) use (&$hasReplace) {
            $num = $matches[1];
-            $numInt = intval($num);
+            $numInt = (int)$num;
            $suffix = $matches[2] ?? '';
-            // 校验：纯数字 + 有映射ID + 未被mytable包裹 + 不是数字+字母组合
+            // 过滤条件：非数字、无映射、已处理过的标签
            if (!ctype_digit($num) || !isset($this->aTableMain[$numInt]) || 
                $this->isMatchPositionHasMyTableTag($matches[0], "Table {$num}")) {
                return $matches[0];
            }
            // 执行替换
            $primaryId = $this->aTableMain[$numInt];
-            $baseTag = "<".self::PROCESSED_TAG." data-id=\"{$primaryId}\">Table {$num}</".self::PROCESSED_TAG.">";
+            $baseTag = "<" . self::PROCESSED_TAG . " data-id=\"{$primaryId}\">Table {$num}</" . self::PROCESSED_TAG . ">";
            $target = "{$baseTag}{$suffix}";
            $hasReplace = true;
@@ -127,126 +209,157 @@ class TableTagProcessor {
        return $html;
    }
    /**
     * 检测当前匹配内容是否已包含mytable标签（避免重复替换）
     * @param string $content 匹配的文本片段
     * @param string $tableText 待检测的Table文本（如 Table 2）
     * @return bool
     */
    private function isMatchPositionHasMyTableTag($content, $tableText){
        $escapedText = preg_quote($tableText, '/');
        $pattern = '/<' . self::PROCESSED_TAG . '[^>]*>\s*' . $escapedText . '\s*<\/' . self::PROCESSED_TAG . '>/is';
        return (bool)@preg_match($pattern, $content);
    }
    /**
     * 清理mytable标签周围的冗余样式标签
-     * @param string $html
+     * @param string $html 待处理HTML
     * @return string
     */
-    private function cleanRedundantStyles($html) {
+    private function cleanRedundantStyles($html){
        foreach (self::STYLE_TAGS as $tag) {
            $pattern = '/<' . $tag . '>\s*<'.self::PROCESSED_TAG.'([^>]*?)>(.*?)<\/'.self::PROCESSED_TAG.'>([\.,:]{0,1})\s*<\/' . $tag . '>/is';
            $html = @preg_replace($pattern, '<'.self::PROCESSED_TAG.'$1>$2</'.self::PROCESSED_TAG.'>$3', $html);
        }
-        // 清理孤立的样式闭标签（避免标签残留）
+        
-        $html = preg_replace('/<\/('.implode('|', self::STYLE_TAGS).')>(?![^<]*<\1>)/is', '', $html);
+        // 清理无匹配的闭合样式标签
        $html = @preg_replace('/<\/('.implode('|', self::STYLE_TAGS).')>(?![^<]*<\1>)/is', '', $html);
        return $html;
    }
    /**
-     * 清理mytable标签后的冗余标点（保证格式整洁）
+     * 清理mytable标签周围的冗余标点
-     * @param string $html
+     * @param string $html 待处理HTML
     * @return string
     */
-    private function cleanRedundantPunctuation($html) {
+    private function cleanRedundantPunctuation($html){
-        $html = preg_replace('/<'.self::PROCESSED_TAG.' data-id="(\d+)">\(Table \d+\)<\/'.self::PROCESSED_TAG.'>\)\./i', '<'.self::PROCESSED_TAG.' data-id="$1">(Table $1)</'.self::PROCESSED_TAG.'>.', $html);
+        // 修复括号+标点的冗余格式
-        $html = preg_replace('/<\/'.self::PROCESSED_TAG.'>\)\.([\.,:]{0,1})/', '</'.self::PROCESSED_TAG.'>)$1', $html);
+        $html = @preg_replace('/<'.self::PROCESSED_TAG.' data-id="(\d+)">\(Table \d+\)<\/'.self::PROCESSED_TAG.'>\)\./i', 
-        $html = preg_replace('/<\/'.self::PROCESSED_TAG.'>\.\)([\.,:]{0,1})/', '</'.self::PROCESSED_TAG.'>)$1', $html);
+            '<'.self::PROCESSED_TAG.' data-id="$1">(Table $1)</'.self::PROCESSED_TAG.'>.', $html);
-        $html = preg_replace('/<\/'.self::PROCESSED_TAG.'>([\.,:]){2,}/', '</'.self::PROCESSED_TAG.'>$1', $html);
+        $html = @preg_replace('/<\/'.self::PROCESSED_TAG.'>\)\.([\.,:]{0,1})/', '</'.self::PROCESSED_TAG.'>)$1', $html);
-        $html = preg_replace('/<'.self::PROCESSED_TAG.' data-id="(\d+)">\((Table \d+)\s*<\/'.self::PROCESSED_TAG.'>([\.,:]{0,1})/i', 
+        $html = @preg_replace('/<\/'.self::PROCESSED_TAG.'>\.\)([\.,:]{0,1})/', '</'.self::PROCESSED_TAG.'>)$1', $html);
        // 清理重复标点
        $html = @preg_replace('/<\/'.self::PROCESSED_TAG.'>([\.,:]){2,}/', '</'.self::PROCESSED_TAG.'>$1', $html);
        // 修复括号内的标签冗余
        $html = @preg_replace('/<'.self::PROCESSED_TAG.' data-id="(\d+)">\((Table \d+)\s*<\/'.self::PROCESSED_TAG.'>([\.,:]{0,1})/i', 
            '<'.self::PROCESSED_TAG.' data-id="$1">($2)</'.self::PROCESSED_TAG.'>$3', $html);
        return $html;
    }
    /**
-     * 清理孤立的样式标签（栈算法兜底，避免标签不闭合）
+     * 清理未闭合的样式标签
-     * @param string $html
+     * @param string $html 待处理HTML
     * @return string
     */
-    private function cleanUnclosedTags($html) {
+    private function cleanUnclosedTags($html){
        // 清理mytable后孤立的样式闭标签
        foreach (self::STYLE_TAGS as $tag) {
            // 清理mytable标签后的冗余闭合标签
            $html = @preg_replace('/(<\/'.self::PROCESSED_TAG.'>)\s*<\/' . $tag . '>/i', '$1', $html);
        }
-        // 栈算法清理其他孤立标签
+            // 定位所有该标签的开闭标签位置
        foreach (self::STYLE_TAGS as $tag) {
            @preg_match_all("/<{$tag}\b[^>]*>/i", $html, $openMatches, PREG_OFFSET_CAPTURE);
            @preg_match_all("/<\/{$tag}>/i", $html, $closeMatches, PREG_OFFSET_CAPTURE);
            $allTags = [];
            // 收集开标签
            foreach ($openMatches[0] as $m) {
-                $allTags[] = ['offset' => $m[1], 'type' => 'open', 'content' => $m[0], 'length' => strlen($m[0])];
+                $allTags[] = [
                    'offset' => $m[1],
                    'type' => 'open',
                    'content' => $m[0],
                    'length' => strlen($m[0])
                ];
            }
            // 收集闭标签
            foreach ($closeMatches[0] as $m) {
-                $allTags[] = ['offset' => $m[1], 'type' => 'close', 'content' => $m[0], 'length' => strlen($m[0])];
+                $allTags[] = [
                    'offset' => $m[1],
                    'type' => 'close',
                    'content' => $m[0],
                    'length' => strlen($m[0])
                ];
            }
            // 按位置排序
            usort($allTags, function($a, $b) {
                return $a['offset'] - $b['offset'];
            });
            // 栈结构匹配开闭标签
            $tagStack = [];
            $removeOffsets = [];
            foreach ($allTags as $t) {
-                if ($t['type'] == 'open') {
+                if ($t['type'] === 'open') {
                    array_push($tagStack, $t);
                } else {
                    if (!empty($tagStack)) {
                        array_pop($tagStack);
                    } else {
                        // 无匹配开标签的闭标签，标记删除
                        $removeOffsets[] = $t;
                    }
                }
            }
            // 无匹配闭标签的开标签，标记删除
            foreach ($tagStack as $t) {
                $removeOffsets[] = $t;
            }
-            // 倒序删除，避免偏移错乱
+            // 按偏移量倒序删除（避免影响后续偏移）
            usort($removeOffsets, function($a, $b) {
                return $b['offset'] - $a['offset'];
            });
            foreach ($removeOffsets as $item) {
                if ($item['offset'] >= 0 && $item['offset'] < strlen($html)) {
                    $html = substr_replace($html, '', $item['offset'], $item['length']);
                }
            }
        }
        return $html;
    }
    /**
-     * 优化文本格式（合并多余空格，规范标签前后空格）
+     * 优化文本格式（清理多余空格）
-     * @param string $html
+     * @param string $html 待处理HTML
     * @return string
     */
-    private function optimizeFormat($html) {
+    private function optimizeFormat($html){
-        $html = preg_replace('/\s{2,}/', ' ', trim($html));
+        // 清理连续空格
-        $html = preg_replace('/<\/'.self::PROCESSED_TAG.'>([A-Za-z0-9])/is', '</'.self::PROCESSED_TAG.'> $1', $html);
+        $html = @preg_replace('/\s{2,}/', ' ', trim($html));
-        $html = preg_replace('/([a-zA-Z0-9])<'.self::PROCESSED_TAG.'/is', '$1 <'.self::PROCESSED_TAG.'', $html);
+        // 标签后紧跟字母/数字时加空格
        $html = @preg_replace('/<\/'.self::PROCESSED_TAG.'>([A-Za-z0-9])/is', '</'.self::PROCESSED_TAG.'> $1', $html);
        // 字母/数字紧跟标签前时加空格
        $html = @preg_replace('/([a-zA-Z0-9])<'.self::PROCESSED_TAG.'/is', '$1 <'.self::PROCESSED_TAG.'', $html);
        return $html;
    }
    /**
-     * 清理重复嵌套的mytable标签（兜底方案）
+     * 清理嵌套的mytable标签（避免重复嵌套）
-     * @param string $html
+     * @param string $html 待处理HTML
     * @return string
     */
-    private function cleanDuplicateNestedTags($html) {
+    private function cleanDuplicateNestedTags($html){
        $pattern = '/<'.self::PROCESSED_TAG.'[^>]*>\s*<'.self::PROCESSED_TAG.'([^>]*)>(.*?)<\/'.self::PROCESSED_TAG.'>\s*<\/'.self::PROCESSED_TAG.'>/is';
-        $html = preg_replace($pattern, '<'.self::PROCESSED_TAG.'$1>$2</'.self::PROCESSED_TAG.'>', $html);
+        $html = @preg_replace($pattern, '<'.self::PROCESSED_TAG.'$1>$2</'.self::PROCESSED_TAG.'>', $html);
        return $html;
    }
    /**
     * 判断指定Table内容是否被mytable标签包裹
     * @param string $content 待检查内容
     * @param string $tableText Table文本（如 "Table 1"）
     * @return bool
     */
    private function isMatchPositionHasMyTableTag($content, $tableText) {
        $escapedText = preg_quote($tableText, '/');
        $pattern = '/<' . self::PROCESSED_TAG . '[^>]*>\s*' . $escapedText . '\s*<\/' . self::PROCESSED_TAG . '>/is';
        return @preg_match($pattern, $content) === 1;
    }
 }