diff --git a/application/common/FigureTagProcessor.php b/application/common/FigureTagProcessor.php
index ed6a5c8..97d53d4 100644
--- a/application/common/FigureTagProcessor.php
+++ b/application/common/FigureTagProcessor.php
@@ -13,11 +13,11 @@ class FigureTagProcessor {
* status: 2-空输入, 4-无匹配, 5-处理异常, 1-处理成功
*/
public function dealFigureStr($html = '') {
- // 1. 基础输入校验
+ //验证
if (!is_string($html) || trim($html) === '') {
return ['status' => 2, 'data' => ''];
}
- // 2. 超大字符串拦截
+ //超大字符串拦截
if (strlen($html) > self::MAX_HTML_LENGTH) {
return ['status' => 4, 'data' => $html];
}
@@ -26,22 +26,22 @@ class FigureTagProcessor {
$hasReplace = false;
try {
- // 3. 合并嵌套样式标签
+ //合并嵌套样式标签
$mergedHtml = $this->mergeFragmentStyleTags($html);
- // 4. 提取纯文本(用于匹配Figure)
+ //提取纯文本(用于匹配Figure)
$plainText = preg_replace('/<[^>]+>/', ' ', $mergedHtml);
$plainText = preg_replace('/\s+/', ' ', trim($plainText));
- // 5. 提取所有匹配的Figure数字
+ //提取所有匹配的Figure数字
$allMatches = $this->extractAllFigureMatches($plainText);
if (empty($allMatches)) {
return ['status' => 4, 'data' => $originalHtml];
}
- // 6. 替换为myfigure标签
+ //替换为myfigure标签
$html = $this->replaceFigureWithTag($html, $allMatches, $hasReplace);
- // 7. 清理冗余内容(仅替换成功后执行)
+ //清理冗余内容(仅替换成功后执行)
if ($hasReplace) {
$html = $this->cleanRedundantStyles($html);
$html = $this->cleanRedundantPunctuation($html);
@@ -50,17 +50,6 @@ class FigureTagProcessor {
}
} catch (\Throwable $e) {
- // 8. 异常处理(记录详细日志)
- $errorMsg = sprintf(
- '[%s] FigureTagProcessor-dealFigureStr 异常:%s | 文件:%s | 行:%d | 入参MD5:%s | 正则错误:%s',
- date('Y-m-d H:i:s'),
- $e->getMessage(),
- $e->getFile(),
- $e->getLine(),
- md5($originalHtml),
- preg_last_error() ? preg_last_error_msg() : '无'
- );
- error_log($errorMsg);
return ['status' => 5, 'data' => $originalHtml];
}
@@ -71,7 +60,7 @@ class FigureTagProcessor {
}
/**
- * 合并嵌套的样式标签(如aaabbb → aaa bbb)
+ * 合并嵌套的样式标签
* @param string $html
* @return string
*/
@@ -148,8 +137,7 @@ class FigureTagProcessor {
? "({$info['content']})"
: $info['content'];
- // 核心修改:规范myfigure标签格式(去掉属性值空格、加双引号)
- // 最终生成:Figure 1
+ //Figure 1
$targetTag = "{$innerContent}";
if (!empty($info['validPunct']) && !$info['hasOuterBracket']) {
$targetTag .= $info['validPunct'];
@@ -160,7 +148,7 @@ class FigureTagProcessor {
? '/\(\s*(?:<[^>]+>|\s)*Figure(?:<[^>]+>|\s)*' . $num . '\b' . $patternSuffix . '(?:\s*[\.,;:]*\s*|\s*<[^>]+>)*\s*\)/iu'
: '/\s*(?:<[^>]+>|\s)*Figure(?:<[^>]+>|\s)*' . $num . '\b' . $patternSuffix . '(?:\s*[\.,;:]*\s*|\s*<[^>]+>)*\s*([\.,:]{0,1})/iu';
- // 执行替换(最多替换1次,避免重复)
+ //执行替换(最多替换1次,避免重复)
$html = @preg_replace($pattern, $targetTag, $html, 1, $count);
if ($count > 0) {
$hasReplace = true;
@@ -177,11 +165,10 @@ class FigureTagProcessor {
*/
private function cleanRedundantStyles($html) {
foreach (self::STYLE_TAGS as $tag) {
- // 修改正则:适配 data-id="数字" 的格式
$pattern = '/<' . $tag . '>\s*]*)>(.*?)<\/myfigure>([\.,:]{0,1})\s*<\/' . $tag . '>/is';
$html = @preg_replace($pattern, '$2$3', $html);
}
- // 清理孤立的样式闭标签
+ //清理闭标签
$html = preg_replace('/<\/('.implode('|', self::STYLE_TAGS).')>(?![^<]*<\1>)/is', '', $html);
return $html;
}
@@ -192,29 +179,24 @@ class FigureTagProcessor {
* @return string
*/
private function cleanRedundantPunctuation($html) {
- // 修改正则:将 data-id = (\d+) 改为 data-id="(\d+)",适配新格式
$html = preg_replace('/\(Figure \d+\)<\/myfigure>\)\./i', '(Figure $1).', $html);
$html = preg_replace('/<\/myfigure>\)\.([\.,:]{0,1})/', ')$1', $html);
$html = preg_replace('/<\/myfigure>\.\)([\.,:]{0,1})/', ')$1', $html);
$html = preg_replace('/<\/myfigure>([\.,:]){2,}/', '$1', $html);
- // 同步修改此处正则的属性格式
$html = preg_replace('/\((Figure \d+)\s*<\/myfigure>([\.,:]{0,1})/i',
'($2)$3', $html);
return $html;
}
/**
- * 清理孤立的样式标签(优先暴力清理myfigure后标签,再用栈算法兜底)
+ * 清理孤立的样式标签
* @param string $html
* @return string
*/
private function cleanUnclosedTags($html) {
- // 第一步:暴力清理myfigure后孤立的样式闭标签
foreach (self::STYLE_TAGS as $tag) {
$html = @preg_replace('/(<\/myfigure>)\s*<\/' . $tag . '>/i', '$1', $html);
}
-
- // 第二步:栈算法清理其他孤立标签
foreach (self::STYLE_TAGS as $tag) {
@preg_match_all("/<{$tag}\b[^>]*>/i", $html, $openMatches, PREG_OFFSET_CAPTURE);
@preg_match_all("/<\/{$tag}>/i", $html, $closeMatches, PREG_OFFSET_CAPTURE);
diff --git a/application/common/TableTagProcessor.php b/application/common/TableTagProcessor.php
index 65e19ca..3a7e50f 100644
--- a/application/common/TableTagProcessor.php
+++ b/application/common/TableTagProcessor.php
@@ -25,17 +25,16 @@ class TableTagProcessor {
* status: 2-空输入, 4-无匹配/已处理, 5-处理异常, 1-处理成功
*/
public function dealTableStr($html = '', $aTableMain = []) {
- // 1. 基础输入校验
+ //验证
if (!is_string($html) || trim($html) === '') {
return ['status' => 2, 'data' => ''];
}
- // 2. 超大字符串拦截(防止内存溢出)
+ //超大字符串拦截(防止内存溢出)
if (strlen($html) > self::MAX_HTML_LENGTH) {
- $this->logWarning('处理文本超出最大长度限制', ['length' => strlen($html)]);
return ['status' => 4, 'data' => $html];
}
- // 初始化主键映射数组(过滤非数字键/值,保证数据合法性)
+ //初始化主键映射数组
if(!empty($aTableMain)){
$aTableMainNew = [];
foreach ($aTableMain as $key => $value) {
@@ -52,10 +51,10 @@ class TableTagProcessor {
$hasReplace = false;
try {
- // 核心:直接在原始HTML中匹配所有符合规则的Table(含嵌套标签)
+ //原始HTML中匹配所有符合规则的Table
$html = $this->replaceTableInHtml($html, $hasReplace);
- // 清理冗余内容(仅替换成功后执行,保证输出整洁)
+ // 清理冗余内容
if ($hasReplace) {
$html = $this->cleanRedundantStyles($html);
$html = $this->cleanRedundantPunctuation($html);
@@ -65,19 +64,6 @@ class TableTagProcessor {
}
} catch (\Throwable $e) {
- // 异常兜底:捕获所有异常,记录详细日志,返回原始文本避免业务中断
- $pregError = preg_last_error();
- $pregErrorMsg = $this->getPregErrorMsg($pregError);
- $errorMsg = sprintf(
- '[%s] TableTagProcessor-dealTableStr 异常:%s | 文件:%s | 行:%d | 入参MD5:%s | 正则错误:%s',
- date('Y-m-d H:i:s'),
- $e->getMessage(),
- $e->getFile(),
- $e->getLine(),
- md5($originalHtml),
- $pregErrorMsg
- );
- $this->logError($errorMsg);
return ['status' => 5, 'data' => $originalHtml];
}
@@ -88,9 +74,7 @@ class TableTagProcessor {
}
/**
- * 核心方法:直接在HTML中匹配并替换Table(支持嵌套标签)
- * @param string $html
- * @param bool $hasReplace 引用传递:标记是否有替换
+ * 核心方法:直接在HTML中匹配并替换Table
* @return string
*/
private function replaceTableInHtml($html, &$hasReplace) {
@@ -112,12 +96,10 @@ class TableTagProcessor {
}
$primaryId = $this->aTableMain[$numInt];
- // 核心修改:规范mytable标签格式(属性值加双引号、去掉两侧空格)
$baseTag = "<".self::PROCESSED_TAG." data-id=\"{$primaryId}\">Table {$num}".self::PROCESSED_TAG.">";
$target = "({$baseTag}{$suffix})";
$hasReplace = true;
- $this->logInfo("替换带括号Table成功", ['num' => $num, 'primary_id' => $primaryId]);
return $target;
}, $html);
@@ -135,12 +117,10 @@ class TableTagProcessor {
}
$primaryId = $this->aTableMain[$numInt];
- // 核心修改:规范mytable标签格式(属性值加双引号、去掉两侧空格)
$baseTag = "<".self::PROCESSED_TAG." data-id=\"{$primaryId}\">Table {$num}".self::PROCESSED_TAG.">";
$target = "{$baseTag}{$suffix}";
$hasReplace = true;
- $this->logInfo("替换无括号Table成功", ['num' => $num, 'primary_id' => $primaryId]);
return $target;
}, $html);
@@ -168,7 +148,6 @@ class TableTagProcessor {
* @return string
*/
private function cleanRedundantPunctuation($html) {
- // 核心修改:适配新的mytable标签格式(data-id="数字")
$html = preg_replace('/<'.self::PROCESSED_TAG.' data-id="(\d+)">\(Table \d+\)<\/'.self::PROCESSED_TAG.'>\)\./i', '<'.self::PROCESSED_TAG.' data-id="$1">(Table $1)'.self::PROCESSED_TAG.'>.', $html);
$html = preg_replace('/<\/'.self::PROCESSED_TAG.'>\)\.([\.,:]{0,1})/', ''.self::PROCESSED_TAG.'>)$1', $html);
$html = preg_replace('/<\/'.self::PROCESSED_TAG.'>\.\)([\.,:]{0,1})/', ''.self::PROCESSED_TAG.'>)$1', $html);
@@ -270,47 +249,4 @@ class TableTagProcessor {
return @preg_match($pattern, $content) === 1;
}
- /**
- * 获取正则错误信息(便于调试)
- * @param int $pregError 正则错误码
- * @return string
- */
- private function getPregErrorMsg($pregError) {
- $errorCodes = [
- PREG_INTERNAL_ERROR => '内部错误',
- PREG_BACKTRACK_LIMIT_ERROR => '回溯限制超出',
- PREG_RECURSION_LIMIT_ERROR => '递归限制超出',
- PREG_BAD_UTF8_ERROR => '无效UTF-8字符',
- PREG_BAD_UTF8_OFFSET_ERROR => 'UTF-8偏移量无效',
- PREG_JIT_STACKLIMIT_ERROR => 'JIT栈限制超出'
- ];
- return isset($errorCodes[$pregError]) ? $errorCodes[$pregError] : "未知错误({$pregError})";
- }
-
- /**
- * 记录错误日志(生产环境可对接日志系统)
- * @param string $msg
- * @param array $context
- */
- private function logError($msg, $context = []) {
- error_log(json_encode(['level' => 'error', 'msg' => $msg, 'context' => $context, 'time' => date('Y-m-d H:i:s')]));
- }
-
- /**
- * 记录警告日志
- * @param string $msg
- * @param array $context
- */
- private function logWarning($msg, $context = []) {
- error_log(json_encode(['level' => 'warning', 'msg' => $msg, 'context' => $context, 'time' => date('Y-m-d H:i:s')]));
- }
-
- /**
- * 记录信息日志
- * @param string $msg
- * @param array $context
- */
- private function logInfo($msg, $context = []) {
- error_log(json_encode(['level' => 'info', 'msg' => $msg, 'context' => $context, 'time' => date('Y-m-d H:i:s')]));
- }
}
\ No newline at end of file