latex 代码调整
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
<?php
|
||||
namespace app\common;
|
||||
|
||||
/**
|
||||
* 功能:精准匹配并替换Figure相关格式为myfigure标签
|
||||
* 支持格式:figure 数字、(figure 数字)、figure 数字:/figure 数字.(含嵌套/拆分标签)
|
||||
@@ -16,6 +17,7 @@ class FigureTagProcessor{
|
||||
private const PROCESSED_TAG = 'myfigure';
|
||||
//Figure数字与对应ID的映射数组
|
||||
private $aImageMain = [];
|
||||
|
||||
/**
|
||||
* 处理Figure标签替换的主方法
|
||||
* @param string $html 待处理的HTML文本
|
||||
@@ -29,28 +31,35 @@ class FigureTagProcessor{
|
||||
if ($html === '' || !is_string($html)) {
|
||||
return ['status' => 2, 'data' => ''];
|
||||
}
|
||||
|
||||
//超长文本保护
|
||||
if (strlen($html) > self::MAX_HTML_LENGTH) {
|
||||
return ['status' => 4, 'data' => $html];
|
||||
}
|
||||
|
||||
//编码处理
|
||||
if (!mb_check_encoding($html, 'UTF-8')) {
|
||||
$html = mb_convert_encoding($html, 'UTF-8', 'GBK,GB2312,ASCII,ISO-8859-1');
|
||||
}
|
||||
|
||||
//初始化映射数组(过滤非数字键值)
|
||||
$this->initImageMap($aImageMain);
|
||||
//原始内容
|
||||
$originalHtml = $html;
|
||||
$hasReplace = false;
|
||||
|
||||
try {
|
||||
//只要包含数字+字母/数字后缀,直接返回原内容(核心修复)
|
||||
if ($this->hasFigureSuffix($html)) {
|
||||
return ['status' => 4, 'data' => $html];
|
||||
}
|
||||
|
||||
//合并拆分标签的Figure+数字
|
||||
$html = $this->preprocessSplitTags($html);
|
||||
//替换
|
||||
|
||||
//替换(核心修复:适配样式标签+后缀标点场景)
|
||||
$html = $this->replaceFigureInHtml($html, $hasReplace);
|
||||
|
||||
//清理冗余样式/标签
|
||||
if ($hasReplace) {
|
||||
$html = $this->cleanRedundantStyles($html);
|
||||
@@ -69,6 +78,7 @@ class FigureTagProcessor{
|
||||
'data' => $html
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* 全局检测是否包含Figure数字+字母/数字后缀
|
||||
* 覆盖所有拆分/嵌套/无标签场景,无论是否有空白
|
||||
@@ -86,8 +96,11 @@ class FigureTagProcessor{
|
||||
|
||||
// 正则3:嵌套标签场景(<b>4B</b> / <i>4123</i>)
|
||||
$pattern3 = "/figure\s*(?:<(?:{$styleTagsPattern})[^>]*>)\s*\d+[a-zA-Z0-9]\s*(?:<\/(?:{$styleTagsPattern})>)/iu";
|
||||
return preg_match($pattern1, $html) || preg_match($pattern2, $html) || preg_match($pattern3, $html);
|
||||
|
||||
// 加@抑制正则警告
|
||||
return @preg_match($pattern1, $html) || @preg_match($pattern2, $html) || @preg_match($pattern3, $html);
|
||||
}
|
||||
|
||||
/**
|
||||
* 初始化Figure数字映射数组
|
||||
* @param array $aImageMain 原始映射数组
|
||||
@@ -106,6 +119,7 @@ class FigureTagProcessor{
|
||||
}
|
||||
$this->aImageMain = $imageMap;
|
||||
}
|
||||
|
||||
/**
|
||||
* 合并所有拆分标签的Figure+数字(含空白样式标签)
|
||||
* @param string $html 待处理HTML
|
||||
@@ -116,19 +130,22 @@ class FigureTagProcessor{
|
||||
|
||||
// 正则1:匹配基础拆分标签的Figure+数字
|
||||
$pattern = "/(figure)\s*(?:<\/(?:{$styleTagsPattern})>)\s*(?:<(?:{$styleTagsPattern})[^>]*>)\s*(\d+)/iu";
|
||||
$html = preg_replace_callback($pattern, function($matches) {
|
||||
$html = @preg_replace_callback($pattern, function($matches) {
|
||||
return $matches[1] . ' ' . $matches[2];
|
||||
}, $html);
|
||||
|
||||
// 正则2:匹配多轮拆分标签的Figure+数字(含空白)
|
||||
$pattern2 = "/(figure)(?:<\/(?:{$styleTagsPattern})>)\s*(?:<(?:{$styleTagsPattern})[^>]*>)\s*(\s*)\s*(?:<\/(?:{$styleTagsPattern})>)\s*(?:<(?:{$styleTagsPattern})[^>]*>)\s*(\d+)/iu";
|
||||
$html = preg_replace_callback($pattern2, function($matches) {
|
||||
$html = @preg_replace_callback($pattern2, function($matches) {
|
||||
return $matches[1] . $matches[2] . $matches[3];
|
||||
}, $html);
|
||||
|
||||
return $html;
|
||||
}
|
||||
|
||||
/**
|
||||
* 核心替换逻辑:将纯数字Figure替换为myfigure标签
|
||||
* 修复:适配样式标签包裹 + 后缀标点场景(如 <b>Figure 2</b>.)
|
||||
* @param string $html 待处理HTML
|
||||
* @param bool $hasReplace 是否发生替换(引用传递)
|
||||
* @return string
|
||||
@@ -138,13 +155,14 @@ class FigureTagProcessor{
|
||||
$styleTagsRegex = "(?:<(?:{$styleTagsPattern})[^>]*>)*\s*";
|
||||
$styleTagsCloseRegex = "\s*(?:<\/(?:{$styleTagsPattern})>)*";
|
||||
|
||||
// 正则1:匹配括号内的纯数字Figure(如 (Figure 2)、(<b>Figure 3</b>))
|
||||
// $pattern1 = "/\(\s*{$styleTagsRegex}figure\s*(\d+)\s*([\.,:]{0,1}){$styleTagsCloseRegex}\s*(?![a-zA-Z0-9])/iu";
|
||||
$pattern1 = "/\(\s*{$styleTagsRegex}figure\s*(\d+)\s*([\.,:]{0,1}){$styleTagsCloseRegex}\s*(?![a-zA-Z0-9])/iuD";
|
||||
$html = preg_replace_callback($pattern1, function($matches) use (&$hasReplace) {
|
||||
// 正则1:匹配括号内的纯数字Figure(如 (Figure 2)、(<b>Figure 3</b>)、(<b>Figure 3</b>).)
|
||||
$pattern1 = "/\(\s*{$styleTagsRegex}figure\s*(\d+){$styleTagsCloseRegex}\s*([\.,:]{0,1})\s*\)\s*([\.,:]{0,1})/iuD";
|
||||
$html = @preg_replace_callback($pattern1, function($matches) use (&$hasReplace) {
|
||||
$num = $matches[1];
|
||||
$numInt = (int)$num;
|
||||
$suffix = $matches[2] ?? '';
|
||||
$suffix1 = $matches[2] ?? '';
|
||||
$suffix2 = $matches[3] ?? '';
|
||||
$suffix = $suffix1 . $suffix2;
|
||||
|
||||
// 过滤条件:非数字、无映射、已处理过的标签
|
||||
if (!ctype_digit($num) || !isset($this->aImageMain[$numInt]) ||
|
||||
@@ -161,10 +179,9 @@ class FigureTagProcessor{
|
||||
return $target;
|
||||
}, $html);
|
||||
|
||||
// 正则2:匹配无括号的纯数字Figure(如 Figure 2、<i>Figure 3</i>:)
|
||||
// $pattern2 = "/{$styleTagsRegex}figure\s*(\d+)\s*([\.,:]{0,1}){$styleTagsCloseRegex}(?![a-zA-Z0-9])/iu";
|
||||
$pattern2 = "/{$styleTagsRegex}figure\s*(\d+)\s*([\.,:]{0,1}){$styleTagsCloseRegex}\s*(?![a-zA-Z0-9])/iuD";
|
||||
$html = preg_replace_callback($pattern2, function($matches) use (&$hasReplace) {
|
||||
// 正则2:匹配无括号的纯数字Figure(核心修复:适配 <b>Figure 2</b>. 场景)
|
||||
$pattern2 = "/{$styleTagsRegex}figure\s*(\d+){$styleTagsCloseRegex}\s*([\.,:]{0,1})(?![a-zA-Z0-9])/iuD";
|
||||
$html = @preg_replace_callback($pattern2, function($matches) use (&$hasReplace) {
|
||||
$num = $matches[1];
|
||||
$numInt = (int)$num;
|
||||
$suffix = $matches[2] ?? '';
|
||||
@@ -186,6 +203,7 @@ class FigureTagProcessor{
|
||||
|
||||
return $html;
|
||||
}
|
||||
|
||||
/**
|
||||
* 检测当前匹配内容是否已包含myfigure标签(避免重复替换)
|
||||
* @param string $content 匹配的文本片段
|
||||
@@ -195,8 +213,9 @@ class FigureTagProcessor{
|
||||
private function isMatchPositionHasMyFigureTag($content, $figureText){
|
||||
$escapedText = preg_quote($figureText, '/');
|
||||
$pattern = '/<' . self::PROCESSED_TAG . '[^>]*>\s*' . $escapedText . '\s*<\/' . self::PROCESSED_TAG . '>/is';
|
||||
return (bool)preg_match($pattern, $content);
|
||||
return (bool)@preg_match($pattern, $content);
|
||||
}
|
||||
|
||||
/**
|
||||
* 清理myfigure标签周围的冗余样式标签
|
||||
* @param string $html 待处理HTML
|
||||
@@ -205,14 +224,15 @@ class FigureTagProcessor{
|
||||
private function cleanRedundantStyles($html){
|
||||
foreach (self::STYLE_TAGS as $tag) {
|
||||
$pattern = '/<' . $tag . '>\s*<'.self::PROCESSED_TAG.'([^>]*?)>(.*?)<\/'.self::PROCESSED_TAG.'>([\.,:]{0,1})\s*<\/' . $tag . '>/is';
|
||||
$html = preg_replace($pattern, '<'.self::PROCESSED_TAG.'$1>$2</'.self::PROCESSED_TAG.'>$3', $html);
|
||||
$html = @preg_replace($pattern, '<'.self::PROCESSED_TAG.'$1>$2</'.self::PROCESSED_TAG.'>$3', $html);
|
||||
}
|
||||
|
||||
// 清理无匹配的闭合样式标签
|
||||
$html = preg_replace('/<\/('.implode('|', self::STYLE_TAGS).')>(?![^<]*<\1>)/is', '', $html);
|
||||
$html = @preg_replace('/<\/('.implode('|', self::STYLE_TAGS).')>(?![^<]*<\1>)/is', '', $html);
|
||||
|
||||
return $html;
|
||||
}
|
||||
|
||||
/**
|
||||
* 清理myfigure标签周围的冗余标点
|
||||
* @param string $html 待处理HTML
|
||||
@@ -220,17 +240,43 @@ class FigureTagProcessor{
|
||||
*/
|
||||
private function cleanRedundantPunctuation($html){
|
||||
// 修复括号+标点的冗余格式
|
||||
$html = preg_replace('/<'.self::PROCESSED_TAG.' data-id="(\d+)">\(Figure \d+\)<\/'.self::PROCESSED_TAG.'>\)\./i',
|
||||
$html = @preg_replace('/<'.self::PROCESSED_TAG.' data-id="(\d+)">\(Figure \d+\)<\/'.self::PROCESSED_TAG.'>\)\./i',
|
||||
'<'.self::PROCESSED_TAG.' data-id="$1">(Figure $1)</'.self::PROCESSED_TAG.'>.', $html);
|
||||
$html = preg_replace('/<\/'.self::PROCESSED_TAG.'>\)\.([\.,:]{0,1})/', '</'.self::PROCESSED_TAG.'>)$1', $html);
|
||||
$html = preg_replace('/<\/'.self::PROCESSED_TAG.'>\.\)([\.,:]{0,1})/', '</'.self::PROCESSED_TAG.'>)$1', $html);
|
||||
$html = @preg_replace('/<\/'.self::PROCESSED_TAG.'>\)\.([\.,:]{0,1})/', '</'.self::PROCESSED_TAG.'>)$1', $html);
|
||||
$html = @preg_replace('/<\/'.self::PROCESSED_TAG.'>\.\)([\.,:]{0,1})/', '</'.self::PROCESSED_TAG.'>)$1', $html);
|
||||
|
||||
// 清理重复标点
|
||||
$html = preg_replace('/<\/'.self::PROCESSED_TAG.'>([\.,:]){2,}/', '</'.self::PROCESSED_TAG.'>$1', $html);
|
||||
$html = @preg_replace('/<\/'.self::PROCESSED_TAG.'>([\.,:]){2,}/', '</'.self::PROCESSED_TAG.'>$1', $html);
|
||||
|
||||
// 修复括号内的标签冗余
|
||||
$html = preg_replace('/<'.self::PROCESSED_TAG.' data-id="(\d+)">\((Figure \d+)\s*<\/'.self::PROCESSED_TAG.'>([\.,:]{0,1})/i',
|
||||
$html = @preg_replace('/<'.self::PROCESSED_TAG.' data-id="(\d+)">\((Figure \d+)\s*<\/'.self::PROCESSED_TAG.'>([\.,:]{0,1})/i',
|
||||
'<'.self::PROCESSED_TAG.' data-id="$1">($2)</'.self::PROCESSED_TAG.'>$3', $html);
|
||||
|
||||
$html = $this->cleanExtraParentheses($html);
|
||||
|
||||
return $html;
|
||||
}
|
||||
|
||||
/**
|
||||
* 清理文本中多余的成对括号(仅处理myfigure标签相关的括号)
|
||||
* @param string $html 待处理文本
|
||||
* @return string
|
||||
*/
|
||||
private function cleanExtraParentheses($html){
|
||||
// 匹配myfigure标签周围的括号区域
|
||||
$pattern = '/(\()*(<'.self::PROCESSED_TAG.'[^>]*>.*?<\/'.self::PROCESSED_TAG.'>)(\))*/is';
|
||||
|
||||
$html = @preg_replace_callback($pattern, function($matches) {
|
||||
$tagContent = $matches[2];
|
||||
$leftParen = $matches[1] ?? '';
|
||||
$rightParen = $matches[3] ?? '';
|
||||
|
||||
// 只保留1个左括号和1个右括号(无论原始有多少)
|
||||
$newLeft = $leftParen ? '(' : '';
|
||||
$newRight = $rightParen ? ')' : '';
|
||||
|
||||
return $newLeft . $tagContent . $newRight;
|
||||
}, $html);
|
||||
|
||||
return $html;
|
||||
}
|
||||
@@ -242,11 +288,11 @@ class FigureTagProcessor{
|
||||
private function cleanUnclosedTags($html){
|
||||
foreach (self::STYLE_TAGS as $tag) {
|
||||
// 清理myfigure标签后的冗余闭合标签
|
||||
$html = preg_replace('/(<\/'.self::PROCESSED_TAG.'>)\s*<\/' . $tag . '>/i', '$1', $html);
|
||||
$html = @preg_replace('/(<\/'.self::PROCESSED_TAG.'>)\s*<\/' . $tag . '>/i', '$1', $html);
|
||||
|
||||
// 定位所有该标签的开闭标签位置
|
||||
preg_match_all("/<{$tag}\b[^>]*>/i", $html, $openMatches, PREG_OFFSET_CAPTURE);
|
||||
preg_match_all("/<\/{$tag}>/i", $html, $closeMatches, PREG_OFFSET_CAPTURE);
|
||||
@preg_match_all("/<{$tag}\b[^>]*>/i", $html, $openMatches, PREG_OFFSET_CAPTURE);
|
||||
@preg_match_all("/<\/{$tag}>/i", $html, $closeMatches, PREG_OFFSET_CAPTURE);
|
||||
|
||||
$allTags = [];
|
||||
// 收集开标签
|
||||
@@ -308,6 +354,7 @@ class FigureTagProcessor{
|
||||
|
||||
return $html;
|
||||
}
|
||||
|
||||
/**
|
||||
* 优化文本格式(清理多余空格)
|
||||
* @param string $html 待处理HTML
|
||||
@@ -315,14 +362,15 @@ class FigureTagProcessor{
|
||||
*/
|
||||
private function optimizeFormat($html){
|
||||
// 清理连续空格
|
||||
$html = preg_replace('/\s{2,}/', ' ', trim($html));
|
||||
$html = @preg_replace('/\s{2,}/', ' ', trim($html));
|
||||
// 标签后紧跟字母/数字时加空格
|
||||
$html = preg_replace('/<\/'.self::PROCESSED_TAG.'>([A-Za-z0-9])/is', '</'.self::PROCESSED_TAG.'> $1', $html);
|
||||
$html = @preg_replace('/<\/'.self::PROCESSED_TAG.'>([A-Za-z0-9])/is', '</'.self::PROCESSED_TAG.'> $1', $html);
|
||||
// 字母/数字紧跟标签前时加空格
|
||||
$html = preg_replace('/([a-zA-Z0-9])<'.self::PROCESSED_TAG.'/is', '$1 <'.self::PROCESSED_TAG.'', $html);
|
||||
$html = @preg_replace('/([a-zA-Z0-9])<'.self::PROCESSED_TAG.'/is', '$1 <'.self::PROCESSED_TAG.'', $html);
|
||||
|
||||
return $html;
|
||||
}
|
||||
|
||||
/**
|
||||
* 清理嵌套的myfigure标签(避免重复嵌套)
|
||||
* @param string $html 待处理HTML
|
||||
@@ -330,7 +378,7 @@ class FigureTagProcessor{
|
||||
*/
|
||||
private function cleanDuplicateNestedTags($html){
|
||||
$pattern = '/<'.self::PROCESSED_TAG.'[^>]*>\s*<'.self::PROCESSED_TAG.'([^>]*)>(.*?)<\/'.self::PROCESSED_TAG.'>\s*<\/'.self::PROCESSED_TAG.'>/is';
|
||||
$html = preg_replace($pattern, '<'.self::PROCESSED_TAG.'$1>$2</'.self::PROCESSED_TAG.'>', $html);
|
||||
$html = @preg_replace($pattern, '<'.self::PROCESSED_TAG.'$1>$2</'.self::PROCESSED_TAG.'>', $html);
|
||||
|
||||
return $html;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user