diff --git a/application/common/TableTagProcessor.php b/application/common/TableTagProcessor.php index 9ae92ec..557b0d5 100644 --- a/application/common/TableTagProcessor.php +++ b/application/common/TableTagProcessor.php @@ -63,7 +63,7 @@ class TableTagProcessor{ // 合并拆分标签的Table+数字 $html = $this->preprocessSplitTags($html); - // 核心替换逻辑 + // 核心替换逻辑(修复后) $html = $this->replaceTableInHtml($html, $hasReplace); // 清理冗余样式/标签(仅当发生替换时执行) @@ -153,6 +153,7 @@ class TableTagProcessor{ /** * 核心替换逻辑:将纯数字Table替换为mytable标签 + * 修复:调整标点匹配位置,适配 Table 2. 场景 * @param string $html 待处理HTML * @param bool $hasReplace 是否发生替换(引用传递) * @return string @@ -162,12 +163,15 @@ class TableTagProcessor{ $styleTagsRegex = "(?:<(?:{$styleTagsPattern})[^>]*>)*\s*"; $styleTagsCloseRegex = "\s*(?:<\/(?:{$styleTagsPattern})>)*"; - // 正则1:匹配括号内的纯数字Table(如 (Table 2)、(Table 3)) - $pattern1 = "/\(\s*{$styleTagsRegex}table\s*(\d+)\s*([\.,:]{0,1}){$styleTagsCloseRegex}\s*(?![a-zA-Z0-9])/iuD"; + // 正则1:匹配括号内的纯数字Table(如 (Table 2)、(Table 3)、(Table 3).) + // 修复:将标点匹配移到样式标签闭合后 + $pattern1 = "/\(\s*{$styleTagsRegex}table\s*(\d+){$styleTagsCloseRegex}\s*([\.,:]{0,1})\s*\)\s*([\.,:]{0,1})/iuD"; $html = @preg_replace_callback($pattern1, function($matches) use (&$hasReplace) { $num = $matches[1]; $numInt = (int)$num; - $suffix = $matches[2] ?? ''; + $suffix1 = $matches[2] ?? ''; + $suffix2 = $matches[3] ?? ''; + $suffix = $suffix1 . $suffix2; // 过滤条件:非数字、无映射、已处理过的标签 if (!ctype_digit($num) || !isset($this->aTableMain[$numInt]) || @@ -175,17 +179,18 @@ class TableTagProcessor{ return $matches[0]; } - // 执行替换 + // 执行替换(清理冗余括号,避免生成((...))) $primaryId = $this->aTableMain[$numInt]; - $baseTag = "<" . self::PROCESSED_TAG . " data-id=\"{$primaryId}\">Table {$num}"; - $target = "({$baseTag}{$suffix})"; + $baseTagClean = "<" . self::PROCESSED_TAG . " data-id=\"{$primaryId}\">Table {$num}"; + $target = "({$baseTagClean}{$suffix})"; $hasReplace = true; return $target; }, $html); - // 正则2:匹配无括号的纯数字Table(如 Table 2、Table 3:) - $pattern2 = "/{$styleTagsRegex}table\s*(\d+)\s*([\.,:]{0,1}){$styleTagsCloseRegex}\s*(?![a-zA-Z0-9])/iuD"; + // 正则2:匹配无括号的纯数字Table(核心修复:适配 Table 2. 场景) + // 修复:将标点匹配移到样式标签闭合后 + $pattern2 = "/{$styleTagsRegex}table\s*(\d+){$styleTagsCloseRegex}\s*([\.,:]{0,1})(?![a-zA-Z0-9])/iuD"; $html = @preg_replace_callback($pattern2, function($matches) use (&$hasReplace) { $num = $matches[1]; $numInt = (int)$num; @@ -239,7 +244,7 @@ class TableTagProcessor{ } /** - * 清理mytable标签周围的冗余标点 + * 清理mytable标签周围的冗余标点(新增冗余括号清理) * @param string $html 待处理HTML * @return string */ @@ -250,16 +255,45 @@ class TableTagProcessor{ $html = @preg_replace('/<\/'.self::PROCESSED_TAG.'>\)\.([\.,:]{0,1})/', ')$1', $html); $html = @preg_replace('/<\/'.self::PROCESSED_TAG.'>\.\)([\.,:]{0,1})/', ')$1', $html); + // 新增:清理mytable标签后的冗余括号(避免))问题) + $html = @preg_replace('/<\/'.self::PROCESSED_TAG.'>\)\)+/', ')', $html); + $html = @preg_replace('/\(\(<'.self::PROCESSED_TAG.'/', '(<'.self::PROCESSED_TAG.'', $html); + // 清理重复标点 $html = @preg_replace('/<\/'.self::PROCESSED_TAG.'>([\.,:]){2,}/', '$1', $html); // 修复括号内的标签冗余 $html = @preg_replace('/<'.self::PROCESSED_TAG.' data-id="(\d+)">\((Table \d+)\s*<\/'.self::PROCESSED_TAG.'>([\.,:]{0,1})/i', '<'.self::PROCESSED_TAG.' data-id="$1">($2)$3', $html); + + $html = $this->cleanExtraParentheses($html); return $html; } + /** + * 清理文本中多余的成对括号(仅处理mytable标签相关的括号) + * @param string $html 待处理文本 + * @return string + */ + private function cleanExtraParentheses($html){ + // 匹配myfigure标签周围的括号区域 + $pattern = '/(\()*(<'.self::PROCESSED_TAG.'[^>]*>.*?<\/'.self::PROCESSED_TAG.'>)(\))*/is'; + + $html = @preg_replace_callback($pattern, function($matches) { + $tagContent = $matches[2]; + $leftParen = $matches[1] ?? ''; + $rightParen = $matches[3] ?? ''; + + // 只保留1个左括号和1个右括号(无论原始有多少) + $newLeft = $leftParen ? '(' : ''; + $newRight = $rightParen ? ')' : ''; + + return $newLeft . $tagContent . $newRight; + }, $html); + + return $html; + } /** * 清理未闭合的样式标签 * @param string $html 待处理HTML