latex 代码调整

2026-01-21 13:13:19 +08:00
parent 06308bc319
commit 4d0cec198f
1 changed files with 76 additions and 28 deletions
--- a/application/common/FigureTagProcessor.php
+++ b/application/common/FigureTagProcessor.php
@@ -1,5 +1,6 @@
 <?php
 namespace app\common;
+
 /**
 * 功能：精准匹配并替换Figure相关格式为myfigure标签
 * 支持格式：figure 数字、(figure 数字)、figure 数字:/figure 数字.（含嵌套/拆分标签）
@@ -16,6 +17,7 @@ class FigureTagProcessor{
    private const PROCESSED_TAG = 'myfigure';
    //Figure数字与对应ID的映射数组
    private $aImageMain = [];
+
    /**
     * 处理Figure标签替换的主方法
     * @param string $html 待处理的HTML文本
@@ -29,28 +31,35 @@ class FigureTagProcessor{
        if ($html === '' || !is_string($html)) {
            return ['status' => 2, 'data' => ''];
        }
+
        //超长文本保护
        if (strlen($html) > self::MAX_HTML_LENGTH) {
            return ['status' => 4, 'data' => $html];
        }
+
        //编码处理
        if (!mb_check_encoding($html, 'UTF-8')) {
            $html = mb_convert_encoding($html, 'UTF-8', 'GBK,GB2312,ASCII,ISO-8859-1');
        }
+
        //初始化映射数组（过滤非数字键值）
        $this->initImageMap($aImageMain);
        //原始内容
        $originalHtml = $html;
        $hasReplace = false;
+
        try {
            //只要包含数字+字母/数字后缀，直接返回原内容（核心修复）
            if ($this->hasFigureSuffix($html)) {
                return ['status' => 4, 'data' => $html];
            }
+
            //合并拆分标签的Figure+数字
            $html = $this->preprocessSplitTags($html);
-            //替换
+
+            //替换（核心修复：适配样式标签+后缀标点场景）
            $html = $this->replaceFigureInHtml($html, $hasReplace);
+
            //清理冗余样式/标签
            if ($hasReplace) {
                $html = $this->cleanRedundantStyles($html);
@@ -69,6 +78,7 @@ class FigureTagProcessor{
            'data' => $html
        ];
    }
+
    /**
     * 全局检测是否包含Figure数字+字母/数字后缀
     * 覆盖所有拆分/嵌套/无标签场景，无论是否有空白
@@ -86,8 +96,11 @@ class FigureTagProcessor{
        
        // 正则3：嵌套标签场景（<b>4B</b> / <i>4123</i>）
        $pattern3 = "/figure\s*(?:<(?:{$styleTagsPattern})[^>]*>)\s*\d+[a-zA-Z0-9]\s*(?:<\/(?:{$styleTagsPattern})>)/iu";
-        return preg_match($pattern1, $html) || preg_match($pattern2, $html) || preg_match($pattern3, $html);
+        
+        // 加@抑制正则警告
+        return @preg_match($pattern1, $html) || @preg_match($pattern2, $html) || @preg_match($pattern3, $html);
    }
+
    /**
     * 初始化Figure数字映射数组
     * @param array $aImageMain 原始映射数组
@@ -106,6 +119,7 @@ class FigureTagProcessor{
        }
        $this->aImageMain = $imageMap;
    }
+
    /**
     * 合并所有拆分标签的Figure+数字（含空白样式标签）
     * @param string $html 待处理HTML
@@ -116,19 +130,22 @@ class FigureTagProcessor{
        
        // 正则1：匹配基础拆分标签的Figure+数字
        $pattern = "/(figure)\s*(?:<\/(?:{$styleTagsPattern})>)\s*(?:<(?:{$styleTagsPattern})[^>]*>)\s*(\d+)/iu";
-        $html = preg_replace_callback($pattern, function($matches) {
+        $html = @preg_replace_callback($pattern, function($matches) {
            return $matches[1] . ' ' . $matches[2];
        }, $html);
        
        // 正则2：匹配多轮拆分标签的Figure+数字（含空白）
        $pattern2 = "/(figure)(?:<\/(?:{$styleTagsPattern})>)\s*(?:<(?:{$styleTagsPattern})[^>]*>)\s*(\s*)\s*(?:<\/(?:{$styleTagsPattern})>)\s*(?:<(?:{$styleTagsPattern})[^>]*>)\s*(\d+)/iu";
-        $html = preg_replace_callback($pattern2, function($matches) {
+        $html = @preg_replace_callback($pattern2, function($matches) {
            return $matches[1] . $matches[2] . $matches[3];
        }, $html);
+        
        return $html;
    }
+
    /**
     * 核心替换逻辑：将纯数字Figure替换为myfigure标签
+     * 修复：适配样式标签包裹 + 后缀标点场景（如 <b>Figure 2</b>.）
     * @param string $html 待处理HTML
     * @param bool $hasReplace 是否发生替换（引用传递）
     * @return string
@@ -138,13 +155,14 @@ class FigureTagProcessor{
        $styleTagsRegex = "(?:<(?:{$styleTagsPattern})[^>]*>)*\s*";
        $styleTagsCloseRegex = "\s*(?:<\/(?:{$styleTagsPattern})>)*";

-        // 正则1：匹配括号内的纯数字Figure（如 (Figure 2)、(<b>Figure 3</b>)）
-        // $pattern1 = "/\(\s*{$styleTagsRegex}figure\s*(\d+)\s*([\.,:]{0,1}){$styleTagsCloseRegex}\s*(?![a-zA-Z0-9])/iu";
-        $pattern1 = "/\(\s*{$styleTagsRegex}figure\s*(\d+)\s*([\.,:]{0,1}){$styleTagsCloseRegex}\s*(?![a-zA-Z0-9])/iuD";
-        $html = preg_replace_callback($pattern1, function($matches) use (&$hasReplace) {
+        // 正则1：匹配括号内的纯数字Figure（如 (Figure 2)、(<b>Figure 3</b>)、(<b>Figure 3</b>).）
+        $pattern1 = "/\(\s*{$styleTagsRegex}figure\s*(\d+){$styleTagsCloseRegex}\s*([\.,:]{0,1})\s*\)\s*([\.,:]{0,1})/iuD";
+        $html = @preg_replace_callback($pattern1, function($matches) use (&$hasReplace) {
            $num = $matches[1];
            $numInt = (int)$num;
-            $suffix = $matches[2] ?? '';
+            $suffix1 = $matches[2] ?? '';
+            $suffix2 = $matches[3] ?? '';
+            $suffix = $suffix1 . $suffix2;

            // 过滤条件：非数字、无映射、已处理过的标签
            if (!ctype_digit($num) || !isset($this->aImageMain[$numInt]) || 
@@ -161,10 +179,9 @@ class FigureTagProcessor{
            return $target;
        }, $html);

-        // 正则2：匹配无括号的纯数字Figure（如 Figure 2、<i>Figure 3</i>:）
-        // $pattern2 = "/{$styleTagsRegex}figure\s*(\d+)\s*([\.,:]{0,1}){$styleTagsCloseRegex}(?![a-zA-Z0-9])/iu";
-        $pattern2 = "/{$styleTagsRegex}figure\s*(\d+)\s*([\.,:]{0,1}){$styleTagsCloseRegex}\s*(?![a-zA-Z0-9])/iuD";
-        $html = preg_replace_callback($pattern2, function($matches) use (&$hasReplace) {
+        // 正则2：匹配无括号的纯数字Figure（核心修复：适配 <b>Figure 2</b>. 场景）
+        $pattern2 = "/{$styleTagsRegex}figure\s*(\d+){$styleTagsCloseRegex}\s*([\.,:]{0,1})(?![a-zA-Z0-9])/iuD";
+        $html = @preg_replace_callback($pattern2, function($matches) use (&$hasReplace) {
            $num = $matches[1];
            $numInt = (int)$num;
            $suffix = $matches[2] ?? '';
@@ -186,6 +203,7 @@ class FigureTagProcessor{

        return $html;
    }
+
    /**
     * 检测当前匹配内容是否已包含myfigure标签（避免重复替换）
     * @param string $content 匹配的文本片段
@@ -195,8 +213,9 @@ class FigureTagProcessor{
    private function isMatchPositionHasMyFigureTag($content, $figureText){
        $escapedText = preg_quote($figureText, '/');
        $pattern = '/<' . self::PROCESSED_TAG . '[^>]*>\s*' . $escapedText . '\s*<\/' . self::PROCESSED_TAG . '>/is';
-        return (bool)preg_match($pattern, $content);
+        return (bool)@preg_match($pattern, $content);
    }
+
    /**
     * 清理myfigure标签周围的冗余样式标签
     * @param string $html 待处理HTML
@@ -205,14 +224,15 @@ class FigureTagProcessor{
    private function cleanRedundantStyles($html){
        foreach (self::STYLE_TAGS as $tag) {
            $pattern = '/<' . $tag . '>\s*<'.self::PROCESSED_TAG.'([^>]*?)>(.*?)<\/'.self::PROCESSED_TAG.'>([\.,:]{0,1})\s*<\/' . $tag . '>/is';
-            $html = preg_replace($pattern, '<'.self::PROCESSED_TAG.'$1>$2</'.self::PROCESSED_TAG.'>$3', $html);
+            $html = @preg_replace($pattern, '<'.self::PROCESSED_TAG.'$1>$2</'.self::PROCESSED_TAG.'>$3', $html);
        }
        
        // 清理无匹配的闭合样式标签
-        $html = preg_replace('/<\/('.implode('|', self::STYLE_TAGS).')>(?![^<]*<\1>)/is', '', $html);
+        $html = @preg_replace('/<\/('.implode('|', self::STYLE_TAGS).')>(?![^<]*<\1>)/is', '', $html);
        
        return $html;
    }
+
    /**
     * 清理myfigure标签周围的冗余标点
     * @param string $html 待处理HTML
@@ -220,17 +240,43 @@ class FigureTagProcessor{
     */
    private function cleanRedundantPunctuation($html){
        // 修复括号+标点的冗余格式
-        $html = preg_replace('/<'.self::PROCESSED_TAG.' data-id="(\d+)">\(Figure \d+\)<\/'.self::PROCESSED_TAG.'>\)\./i', 
+        $html = @preg_replace('/<'.self::PROCESSED_TAG.' data-id="(\d+)">\(Figure \d+\)<\/'.self::PROCESSED_TAG.'>\)\./i', 
            '<'.self::PROCESSED_TAG.' data-id="$1">(Figure $1)</'.self::PROCESSED_TAG.'>.', $html);
-        $html = preg_replace('/<\/'.self::PROCESSED_TAG.'>\)\.([\.,:]{0,1})/', '</'.self::PROCESSED_TAG.'>)$1', $html);
-        $html = preg_replace('/<\/'.self::PROCESSED_TAG.'>\.\)([\.,:]{0,1})/', '</'.self::PROCESSED_TAG.'>)$1', $html);
+        $html = @preg_replace('/<\/'.self::PROCESSED_TAG.'>\)\.([\.,:]{0,1})/', '</'.self::PROCESSED_TAG.'>)$1', $html);
+        $html = @preg_replace('/<\/'.self::PROCESSED_TAG.'>\.\)([\.,:]{0,1})/', '</'.self::PROCESSED_TAG.'>)$1', $html);
        
        // 清理重复标点
-        $html = preg_replace('/<\/'.self::PROCESSED_TAG.'>([\.,:]){2,}/', '</'.self::PROCESSED_TAG.'>$1', $html);
+        $html = @preg_replace('/<\/'.self::PROCESSED_TAG.'>([\.,:]){2,}/', '</'.self::PROCESSED_TAG.'>$1', $html);
        
        // 修复括号内的标签冗余
-        $html = preg_replace('/<'.self::PROCESSED_TAG.' data-id="(\d+)">\((Figure \d+)\s*<\/'.self::PROCESSED_TAG.'>([\.,:]{0,1})/i', 
+        $html = @preg_replace('/<'.self::PROCESSED_TAG.' data-id="(\d+)">\((Figure \d+)\s*<\/'.self::PROCESSED_TAG.'>([\.,:]{0,1})/i', 
            '<'.self::PROCESSED_TAG.' data-id="$1">($2)</'.self::PROCESSED_TAG.'>$3', $html);
+
+        $html = $this->cleanExtraParentheses($html);
+        
+        return $html;
+    }
+
+    /**
+     * 清理文本中多余的成对括号（仅处理myfigure标签相关的括号）
+     * @param string $html 待处理文本
+     * @return string
+     */
+    private function cleanExtraParentheses($html){
+        // 匹配myfigure标签周围的括号区域
+        $pattern = '/(\()*(<'.self::PROCESSED_TAG.'[^>]*>.*?<\/'.self::PROCESSED_TAG.'>)(\))*/is';
+        
+        $html = @preg_replace_callback($pattern, function($matches) {
+            $tagContent = $matches[2];
+            $leftParen = $matches[1] ?? '';
+            $rightParen = $matches[3] ?? '';
+            
+            // 只保留1个左括号和1个右括号（无论原始有多少）
+            $newLeft = $leftParen ? '(' : '';
+            $newRight = $rightParen ? ')' : '';
+            
+            return $newLeft . $tagContent . $newRight;
+        }, $html);
        
        return $html;
    }
@@ -242,11 +288,11 @@ class FigureTagProcessor{
    private function cleanUnclosedTags($html){
        foreach (self::STYLE_TAGS as $tag) {
            // 清理myfigure标签后的冗余闭合标签
-            $html = preg_replace('/(<\/'.self::PROCESSED_TAG.'>)\s*<\/' . $tag . '>/i', '$1', $html);
+            $html = @preg_replace('/(<\/'.self::PROCESSED_TAG.'>)\s*<\/' . $tag . '>/i', '$1', $html);

            // 定位所有该标签的开闭标签位置
-            preg_match_all("/<{$tag}\b[^>]*>/i", $html, $openMatches, PREG_OFFSET_CAPTURE);
-            preg_match_all("/<\/{$tag}>/i", $html, $closeMatches, PREG_OFFSET_CAPTURE);
+            @preg_match_all("/<{$tag}\b[^>]*>/i", $html, $openMatches, PREG_OFFSET_CAPTURE);
+            @preg_match_all("/<\/{$tag}>/i", $html, $closeMatches, PREG_OFFSET_CAPTURE);

            $allTags = [];
            // 收集开标签
@@ -308,6 +354,7 @@ class FigureTagProcessor{

        return $html;
    }
+
    /**
     * 优化文本格式（清理多余空格）
     * @param string $html 待处理HTML
@@ -315,14 +362,15 @@ class FigureTagProcessor{
     */
    private function optimizeFormat($html){
        // 清理连续空格
-        $html = preg_replace('/\s{2,}/', ' ', trim($html));
+        $html = @preg_replace('/\s{2,}/', ' ', trim($html));
        // 标签后紧跟字母/数字时加空格
-        $html = preg_replace('/<\/'.self::PROCESSED_TAG.'>([A-Za-z0-9])/is', '</'.self::PROCESSED_TAG.'> $1', $html);
+        $html = @preg_replace('/<\/'.self::PROCESSED_TAG.'>([A-Za-z0-9])/is', '</'.self::PROCESSED_TAG.'> $1', $html);
        // 字母/数字紧跟标签前时加空格
-        $html = preg_replace('/([a-zA-Z0-9])<'.self::PROCESSED_TAG.'/is', '$1 <'.self::PROCESSED_TAG.'', $html);
+        $html = @preg_replace('/([a-zA-Z0-9])<'.self::PROCESSED_TAG.'/is', '$1 <'.self::PROCESSED_TAG.'', $html);
        
        return $html;
    }
+
    /**
     * 清理嵌套的myfigure标签（避免重复嵌套）
     * @param string $html 待处理HTML
@@ -330,7 +378,7 @@ class FigureTagProcessor{
     */
    private function cleanDuplicateNestedTags($html){
        $pattern = '/<'.self::PROCESSED_TAG.'[^>]*>\s*<'.self::PROCESSED_TAG.'([^>]*)>(.*?)<\/'.self::PROCESSED_TAG.'>\s*<\/'.self::PROCESSED_TAG.'>/is';
-        $html = preg_replace($pattern, '<'.self::PROCESSED_TAG.'$1>$2</'.self::PROCESSED_TAG.'>', $html);
+        $html = @preg_replace($pattern, '<'.self::PROCESSED_TAG.'$1>$2</'.self::PROCESSED_TAG.'>', $html);
        
        return $html;
    }