From 4d0cec198fa78f00462c08fd0544e8abba830582 Mon Sep 17 00:00:00 2001
From: chengxl <chengxl@example.com>
Date: Wed, 21 Jan 2026 13:13:19 +0800
Subject: [PATCH] =?UTF-8?q?latex=20=E4=BB=A3=E7=A0=81=E8=B0=83=E6=95=B4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 application/common/FigureTagProcessor.php | 104 ++++++++++++++++------
 1 file changed, 76 insertions(+), 28 deletions(-)
diff --git a/application/common/FigureTagProcessor.php b/application/common/FigureTagProcessor.php
index 7956f88..ee82f1a 100644
--- a/application/common/FigureTagProcessor.php
+++ b/application/common/FigureTagProcessor.php
@@ -1,5 +1,6 @@
 <?php
 namespace app\common;
+
 /**
  * 功能：精准匹配并替换Figure相关格式为myfigure标签
  * 支持格式：figure 数字、(figure 数字)、figure 数字:/figure 数字.（含嵌套/拆分标签）
@@ -16,6 +17,7 @@ class FigureTagProcessor{
     private const PROCESSED_TAG = 'myfigure';
     //Figure数字与对应ID的映射数组
     private $aImageMain = [];
+
     /**
      * 处理Figure标签替换的主方法
      * @param string $html 待处理的HTML文本
@@ -29,28 +31,35 @@ class FigureTagProcessor{
         if ($html === '' || !is_string($html)) {
             return ['status' => 2, 'data' => ''];
         }
+
         //超长文本保护
         if (strlen($html) > self::MAX_HTML_LENGTH) {
             return ['status' => 4, 'data' => $html];
         }
+
         //编码处理
         if (!mb_check_encoding($html, 'UTF-8')) {
             $html = mb_convert_encoding($html, 'UTF-8', 'GBK,GB2312,ASCII,ISO-8859-1');
         }
+
         //初始化映射数组（过滤非数字键值）
         $this->initImageMap($aImageMain);
         //原始内容
         $originalHtml = $html;
         $hasReplace = false;
+
         try {
             //只要包含数字+字母/数字后缀，直接返回原内容（核心修复）
             if ($this->hasFigureSuffix($html)) {
                 return ['status' => 4, 'data' => $html];
             }
+
             //合并拆分标签的Figure+数字
             $html = $this->preprocessSplitTags($html);
-            //替换
+
+            //替换（核心修复：适配样式标签+后缀标点场景）
             $html = $this->replaceFigureInHtml($html, $hasReplace);
+
             //清理冗余样式/标签
             if ($hasReplace) {
                 $html = $this->cleanRedundantStyles($html);
@@ -69,6 +78,7 @@ class FigureTagProcessor{
             'data' => $html
         ];
     }
+
     /**
      * 全局检测是否包含Figure数字+字母/数字后缀
      * 覆盖所有拆分/嵌套/无标签场景，无论是否有空白
@@ -86,8 +96,11 @@ class FigureTagProcessor{
         
         // 正则3：嵌套标签场景（<b>4B</b> / <i>4123</i>）
         $pattern3 = "/figure\s*(?:<(?:{$styleTagsPattern})[^>]*>)\s*\d+[a-zA-Z0-9]\s*(?:<\/(?:{$styleTagsPattern})>)/iu";
-        return preg_match($pattern1, $html) || preg_match($pattern2, $html) || preg_match($pattern3, $html);
+        
+        // 加@抑制正则警告
+        return @preg_match($pattern1, $html) || @preg_match($pattern2, $html) || @preg_match($pattern3, $html);
     }
+
     /**
      * 初始化Figure数字映射数组
      * @param array $aImageMain 原始映射数组
@@ -106,6 +119,7 @@ class FigureTagProcessor{
         }
         $this->aImageMain = $imageMap;
     }
+
     /**
      * 合并所有拆分标签的Figure+数字（含空白样式标签）
      * @param string $html 待处理HTML
@@ -116,19 +130,22 @@ class FigureTagProcessor{
         
         // 正则1：匹配基础拆分标签的Figure+数字
         $pattern = "/(figure)\s*(?:<\/(?:{$styleTagsPattern})>)\s*(?:<(?:{$styleTagsPattern})[^>]*>)\s*(\d+)/iu";
-        $html = preg_replace_callback($pattern, function($matches) {
+        $html = @preg_replace_callback($pattern, function($matches) {
             return $matches[1] . ' ' . $matches[2];
         }, $html);
         
         // 正则2：匹配多轮拆分标签的Figure+数字（含空白）
         $pattern2 = "/(figure)(?:<\/(?:{$styleTagsPattern})>)\s*(?:<(?:{$styleTagsPattern})[^>]*>)\s*(\s*)\s*(?:<\/(?:{$styleTagsPattern})>)\s*(?:<(?:{$styleTagsPattern})[^>]*>)\s*(\d+)/iu";
-        $html = preg_replace_callback($pattern2, function($matches) {
+        $html = @preg_replace_callback($pattern2, function($matches) {
             return $matches[1] . $matches[2] . $matches[3];
         }, $html);
+        
         return $html;
     }
+
     /**
      * 核心替换逻辑：将纯数字Figure替换为myfigure标签
+     * 修复：适配样式标签包裹 + 后缀标点场景（如 <b>Figure 2</b>.）
      * @param string $html 待处理HTML
      * @param bool $hasReplace 是否发生替换（引用传递）
      * @return string
@@ -138,13 +155,14 @@ class FigureTagProcessor{
         $styleTagsRegex = "(?:<(?:{$styleTagsPattern})[^>]*>)*\s*";
         $styleTagsCloseRegex = "\s*(?:<\/(?:{$styleTagsPattern})>)*";
 
-        // 正则1：匹配括号内的纯数字Figure（如 (Figure 2)、(<b>Figure 3</b>)）
-        // $pattern1 = "/\(\s*{$styleTagsRegex}figure\s*(\d+)\s*([\.,:]{0,1}){$styleTagsCloseRegex}\s*(?![a-zA-Z0-9])/iu";
-        $pattern1 = "/\(\s*{$styleTagsRegex}figure\s*(\d+)\s*([\.,:]{0,1}){$styleTagsCloseRegex}\s*(?![a-zA-Z0-9])/iuD";
-        $html = preg_replace_callback($pattern1, function($matches) use (&$hasReplace) {
+        // 正则1：匹配括号内的纯数字Figure（如 (Figure 2)、(<b>Figure 3</b>)、(<b>Figure 3</b>).）
+        $pattern1 = "/\(\s*{$styleTagsRegex}figure\s*(\d+){$styleTagsCloseRegex}\s*([\.,:]{0,1})\s*\)\s*([\.,:]{0,1})/iuD";
+        $html = @preg_replace_callback($pattern1, function($matches) use (&$hasReplace) {
             $num = $matches[1];
             $numInt = (int)$num;
-            $suffix = $matches[2] ?? '';
+            $suffix1 = $matches[2] ?? '';
+            $suffix2 = $matches[3] ?? '';
+            $suffix = $suffix1 . $suffix2;
 
             // 过滤条件：非数字、无映射、已处理过的标签
             if (!ctype_digit($num) || !isset($this->aImageMain[$numInt]) || 
@@ -161,10 +179,9 @@ class FigureTagProcessor{
             return $target;
         }, $html);
 
-        // 正则2：匹配无括号的纯数字Figure（如 Figure 2、<i>Figure 3</i>:）
-        // $pattern2 = "/{$styleTagsRegex}figure\s*(\d+)\s*([\.,:]{0,1}){$styleTagsCloseRegex}(?![a-zA-Z0-9])/iu";
-        $pattern2 = "/{$styleTagsRegex}figure\s*(\d+)\s*([\.,:]{0,1}){$styleTagsCloseRegex}\s*(?![a-zA-Z0-9])/iuD";
-        $html = preg_replace_callback($pattern2, function($matches) use (&$hasReplace) {
+        // 正则2：匹配无括号的纯数字Figure（核心修复：适配 <b>Figure 2</b>. 场景）
+        $pattern2 = "/{$styleTagsRegex}figure\s*(\d+){$styleTagsCloseRegex}\s*([\.,:]{0,1})(?![a-zA-Z0-9])/iuD";
+        $html = @preg_replace_callback($pattern2, function($matches) use (&$hasReplace) {
             $num = $matches[1];
             $numInt = (int)$num;
             $suffix = $matches[2] ?? '';
@@ -186,6 +203,7 @@ class FigureTagProcessor{
 
         return $html;
     }
+
     /**
      * 检测当前匹配内容是否已包含myfigure标签（避免重复替换）
      * @param string $content 匹配的文本片段
@@ -195,8 +213,9 @@ class FigureTagProcessor{
     private function isMatchPositionHasMyFigureTag($content, $figureText){
         $escapedText = preg_quote($figureText, '/');
         $pattern = '/<' . self::PROCESSED_TAG . '[^>]*>\s*' . $escapedText . '\s*<\/' . self::PROCESSED_TAG . '>/is';
-        return (bool)preg_match($pattern, $content);
+        return (bool)@preg_match($pattern, $content);
     }
+
     /**
      * 清理myfigure标签周围的冗余样式标签
      * @param string $html 待处理HTML
@@ -205,14 +224,15 @@ class FigureTagProcessor{
     private function cleanRedundantStyles($html){
         foreach (self::STYLE_TAGS as $tag) {
             $pattern = '/<' . $tag . '>\s*<'.self::PROCESSED_TAG.'([^>]*?)>(.*?)<\/'.self::PROCESSED_TAG.'>([\.,:]{0,1})\s*<\/' . $tag . '>/is';
-            $html = preg_replace($pattern, '<'.self::PROCESSED_TAG.'$1>$2</'.self::PROCESSED_TAG.'>$3', $html);
+            $html = @preg_replace($pattern, '<'.self::PROCESSED_TAG.'$1>$2</'.self::PROCESSED_TAG.'>$3', $html);
         }
         
         // 清理无匹配的闭合样式标签
-        $html = preg_replace('/<\/('.implode('|', self::STYLE_TAGS).')>(?![^<]*<\1>)/is', '', $html);
+        $html = @preg_replace('/<\/('.implode('|', self::STYLE_TAGS).')>(?![^<]*<\1>)/is', '', $html);
         
         return $html;
     }
+
     /**
      * 清理myfigure标签周围的冗余标点
      * @param string $html 待处理HTML
@@ -220,17 +240,43 @@ class FigureTagProcessor{
      */
     private function cleanRedundantPunctuation($html){
         // 修复括号+标点的冗余格式
-        $html = preg_replace('/<'.self::PROCESSED_TAG.' data-id="(\d+)">\(Figure \d+\)<\/'.self::PROCESSED_TAG.'>\)\./i', 
+        $html = @preg_replace('/<'.self::PROCESSED_TAG.' data-id="(\d+)">\(Figure \d+\)<\/'.self::PROCESSED_TAG.'>\)\./i', 
             '<'.self::PROCESSED_TAG.' data-id="$1">(Figure $1)</'.self::PROCESSED_TAG.'>.', $html);
-        $html = preg_replace('/<\/'.self::PROCESSED_TAG.'>\)\.([\.,:]{0,1})/', '</'.self::PROCESSED_TAG.'>)$1', $html);
-        $html = preg_replace('/<\/'.self::PROCESSED_TAG.'>\.\)([\.,:]{0,1})/', '</'.self::PROCESSED_TAG.'>)$1', $html);
+        $html = @preg_replace('/<\/'.self::PROCESSED_TAG.'>\)\.([\.,:]{0,1})/', '</'.self::PROCESSED_TAG.'>)$1', $html);
+        $html = @preg_replace('/<\/'.self::PROCESSED_TAG.'>\.\)([\.,:]{0,1})/', '</'.self::PROCESSED_TAG.'>)$1', $html);
         
         // 清理重复标点
-        $html = preg_replace('/<\/'.self::PROCESSED_TAG.'>([\.,:]){2,}/', '</'.self::PROCESSED_TAG.'>$1', $html);
+        $html = @preg_replace('/<\/'.self::PROCESSED_TAG.'>([\.,:]){2,}/', '</'.self::PROCESSED_TAG.'>$1', $html);
         
         // 修复括号内的标签冗余
-        $html = preg_replace('/<'.self::PROCESSED_TAG.' data-id="(\d+)">\((Figure \d+)\s*<\/'.self::PROCESSED_TAG.'>([\.,:]{0,1})/i', 
+        $html = @preg_replace('/<'.self::PROCESSED_TAG.' data-id="(\d+)">\((Figure \d+)\s*<\/'.self::PROCESSED_TAG.'>([\.,:]{0,1})/i', 
             '<'.self::PROCESSED_TAG.' data-id="$1">($2)</'.self::PROCESSED_TAG.'>$3', $html);
+
+        $html = $this->cleanExtraParentheses($html);
+        
+        return $html;
+    }
+
+    /**
+     * 清理文本中多余的成对括号（仅处理myfigure标签相关的括号）
+     * @param string $html 待处理文本
+     * @return string
+     */
+    private function cleanExtraParentheses($html){
+        // 匹配myfigure标签周围的括号区域
+        $pattern = '/(\()*(<'.self::PROCESSED_TAG.'[^>]*>.*?<\/'.self::PROCESSED_TAG.'>)(\))*/is';
+        
+        $html = @preg_replace_callback($pattern, function($matches) {
+            $tagContent = $matches[2];
+            $leftParen = $matches[1] ?? '';
+            $rightParen = $matches[3] ?? '';
+            
+            // 只保留1个左括号和1个右括号（无论原始有多少）
+            $newLeft = $leftParen ? '(' : '';
+            $newRight = $rightParen ? ')' : '';
+            
+            return $newLeft . $tagContent . $newRight;
+        }, $html);
         
         return $html;
     }
@@ -242,11 +288,11 @@ class FigureTagProcessor{
     private function cleanUnclosedTags($html){
         foreach (self::STYLE_TAGS as $tag) {
             // 清理myfigure标签后的冗余闭合标签
-            $html = preg_replace('/(<\/'.self::PROCESSED_TAG.'>)\s*<\/' . $tag . '>/i', '$1', $html);
+            $html = @preg_replace('/(<\/'.self::PROCESSED_TAG.'>)\s*<\/' . $tag . '>/i', '$1', $html);
 
             // 定位所有该标签的开闭标签位置
-            preg_match_all("/<{$tag}\b[^>]*>/i", $html, $openMatches, PREG_OFFSET_CAPTURE);
-            preg_match_all("/<\/{$tag}>/i", $html, $closeMatches, PREG_OFFSET_CAPTURE);
+            @preg_match_all("/<{$tag}\b[^>]*>/i", $html, $openMatches, PREG_OFFSET_CAPTURE);
+            @preg_match_all("/<\/{$tag}>/i", $html, $closeMatches, PREG_OFFSET_CAPTURE);
 
             $allTags = [];
             // 收集开标签
@@ -308,6 +354,7 @@ class FigureTagProcessor{
 
         return $html;
     }
+
     /**
      * 优化文本格式（清理多余空格）
      * @param string $html 待处理HTML
@@ -315,14 +362,15 @@ class FigureTagProcessor{
      */
     private function optimizeFormat($html){
         // 清理连续空格
-        $html = preg_replace('/\s{2,}/', ' ', trim($html));
+        $html = @preg_replace('/\s{2,}/', ' ', trim($html));
         // 标签后紧跟字母/数字时加空格
-        $html = preg_replace('/<\/'.self::PROCESSED_TAG.'>([A-Za-z0-9])/is', '</'.self::PROCESSED_TAG.'> $1', $html);
+        $html = @preg_replace('/<\/'.self::PROCESSED_TAG.'>([A-Za-z0-9])/is', '</'.self::PROCESSED_TAG.'> $1', $html);
         // 字母/数字紧跟标签前时加空格
-        $html = preg_replace('/([a-zA-Z0-9])<'.self::PROCESSED_TAG.'/is', '$1 <'.self::PROCESSED_TAG.'', $html);
+        $html = @preg_replace('/([a-zA-Z0-9])<'.self::PROCESSED_TAG.'/is', '$1 <'.self::PROCESSED_TAG.'', $html);
         
         return $html;
     }
+
     /**
      * 清理嵌套的myfigure标签（避免重复嵌套）
      * @param string $html 待处理HTML
@@ -330,7 +378,7 @@ class FigureTagProcessor{
      */
     private function cleanDuplicateNestedTags($html){
         $pattern = '/<'.self::PROCESSED_TAG.'[^>]*>\s*<'.self::PROCESSED_TAG.'([^>]*)>(.*?)<\/'.self::PROCESSED_TAG.'>\s*<\/'.self::PROCESSED_TAG.'>/is';
-        $html = preg_replace($pattern, '<'.self::PROCESSED_TAG.'$1>$2</'.self::PROCESSED_TAG.'>', $html);
+        $html = @preg_replace($pattern, '<'.self::PROCESSED_TAG.'$1>$2</'.self::PROCESSED_TAG.'>', $html);
         
         return $html;
     }