From b217ab03fd29aa089c5aed68f669295f20af4e4d Mon Sep 17 00:00:00 2001 From: chengxl Date: Sun, 18 Jan 2026 17:13:58 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A4=84=E7=90=86=E6=AD=A3=E6=96=87=E5=86=85?= =?UTF-8?q?=E5=AE=B9=E8=A1=A8=E6=A0=BC/=E5=9B=BE=E7=89=87=E7=9B=B8?= =?UTF-8?q?=E5=85=B3=E8=81=94?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- application/common/FigureTagProcessor.php | 42 ++++--------- application/common/TableTagProcessor.php | 76 ++--------------------- 2 files changed, 18 insertions(+), 100 deletions(-) diff --git a/application/common/FigureTagProcessor.php b/application/common/FigureTagProcessor.php index ed6a5c8..97d53d4 100644 --- a/application/common/FigureTagProcessor.php +++ b/application/common/FigureTagProcessor.php @@ -13,11 +13,11 @@ class FigureTagProcessor { * status: 2-空输入, 4-无匹配, 5-处理异常, 1-处理成功 */ public function dealFigureStr($html = '') { - // 1. 基础输入校验 + //验证 if (!is_string($html) || trim($html) === '') { return ['status' => 2, 'data' => '']; } - // 2. 超大字符串拦截 + //超大字符串拦截 if (strlen($html) > self::MAX_HTML_LENGTH) { return ['status' => 4, 'data' => $html]; } @@ -26,22 +26,22 @@ class FigureTagProcessor { $hasReplace = false; try { - // 3. 合并嵌套样式标签 + //合并嵌套样式标签 $mergedHtml = $this->mergeFragmentStyleTags($html); - // 4. 提取纯文本(用于匹配Figure) + //提取纯文本(用于匹配Figure) $plainText = preg_replace('/<[^>]+>/', ' ', $mergedHtml); $plainText = preg_replace('/\s+/', ' ', trim($plainText)); - // 5. 提取所有匹配的Figure数字 + //提取所有匹配的Figure数字 $allMatches = $this->extractAllFigureMatches($plainText); if (empty($allMatches)) { return ['status' => 4, 'data' => $originalHtml]; } - // 6. 替换为myfigure标签 + //替换为myfigure标签 $html = $this->replaceFigureWithTag($html, $allMatches, $hasReplace); - // 7. 清理冗余内容(仅替换成功后执行) + //清理冗余内容(仅替换成功后执行) if ($hasReplace) { $html = $this->cleanRedundantStyles($html); $html = $this->cleanRedundantPunctuation($html); @@ -50,17 +50,6 @@ class FigureTagProcessor { } } catch (\Throwable $e) { - // 8. 异常处理(记录详细日志) - $errorMsg = sprintf( - '[%s] FigureTagProcessor-dealFigureStr 异常:%s | 文件:%s | 行:%d | 入参MD5:%s | 正则错误:%s', - date('Y-m-d H:i:s'), - $e->getMessage(), - $e->getFile(), - $e->getLine(), - md5($originalHtml), - preg_last_error() ? preg_last_error_msg() : '无' - ); - error_log($errorMsg); return ['status' => 5, 'data' => $originalHtml]; } @@ -71,7 +60,7 @@ class FigureTagProcessor { } /** - * 合并嵌套的样式标签(如aaabbb → aaa bbb) + * 合并嵌套的样式标签 * @param string $html * @return string */ @@ -148,8 +137,7 @@ class FigureTagProcessor { ? "({$info['content']})" : $info['content']; - // 核心修改:规范myfigure标签格式(去掉属性值空格、加双引号) - // 最终生成:Figure 1 + //Figure 1 $targetTag = "{$innerContent}"; if (!empty($info['validPunct']) && !$info['hasOuterBracket']) { $targetTag .= $info['validPunct']; @@ -160,7 +148,7 @@ class FigureTagProcessor { ? '/\(\s*(?:<[^>]+>|\s)*Figure(?:<[^>]+>|\s)*' . $num . '\b' . $patternSuffix . '(?:\s*[\.,;:]*\s*|\s*<[^>]+>)*\s*\)/iu' : '/\s*(?:<[^>]+>|\s)*Figure(?:<[^>]+>|\s)*' . $num . '\b' . $patternSuffix . '(?:\s*[\.,;:]*\s*|\s*<[^>]+>)*\s*([\.,:]{0,1})/iu'; - // 执行替换(最多替换1次,避免重复) + //执行替换(最多替换1次,避免重复) $html = @preg_replace($pattern, $targetTag, $html, 1, $count); if ($count > 0) { $hasReplace = true; @@ -177,11 +165,10 @@ class FigureTagProcessor { */ private function cleanRedundantStyles($html) { foreach (self::STYLE_TAGS as $tag) { - // 修改正则:适配 data-id="数字" 的格式 $pattern = '/<' . $tag . '>\s*]*)>(.*?)<\/myfigure>([\.,:]{0,1})\s*<\/' . $tag . '>/is'; $html = @preg_replace($pattern, '$2$3', $html); } - // 清理孤立的样式闭标签 + //清理闭标签 $html = preg_replace('/<\/('.implode('|', self::STYLE_TAGS).')>(?![^<]*<\1>)/is', '', $html); return $html; } @@ -192,29 +179,24 @@ class FigureTagProcessor { * @return string */ private function cleanRedundantPunctuation($html) { - // 修改正则:将 data-id = (\d+) 改为 data-id="(\d+)",适配新格式 $html = preg_replace('/\(Figure \d+\)<\/myfigure>\)\./i', '(Figure $1).', $html); $html = preg_replace('/<\/myfigure>\)\.([\.,:]{0,1})/', ')$1', $html); $html = preg_replace('/<\/myfigure>\.\)([\.,:]{0,1})/', ')$1', $html); $html = preg_replace('/<\/myfigure>([\.,:]){2,}/', '$1', $html); - // 同步修改此处正则的属性格式 $html = preg_replace('/\((Figure \d+)\s*<\/myfigure>([\.,:]{0,1})/i', '($2)$3', $html); return $html; } /** - * 清理孤立的样式标签(优先暴力清理myfigure后标签,再用栈算法兜底) + * 清理孤立的样式标签 * @param string $html * @return string */ private function cleanUnclosedTags($html) { - // 第一步:暴力清理myfigure后孤立的样式闭标签 foreach (self::STYLE_TAGS as $tag) { $html = @preg_replace('/(<\/myfigure>)\s*<\/' . $tag . '>/i', '$1', $html); } - - // 第二步:栈算法清理其他孤立标签 foreach (self::STYLE_TAGS as $tag) { @preg_match_all("/<{$tag}\b[^>]*>/i", $html, $openMatches, PREG_OFFSET_CAPTURE); @preg_match_all("/<\/{$tag}>/i", $html, $closeMatches, PREG_OFFSET_CAPTURE); diff --git a/application/common/TableTagProcessor.php b/application/common/TableTagProcessor.php index 65e19ca..3a7e50f 100644 --- a/application/common/TableTagProcessor.php +++ b/application/common/TableTagProcessor.php @@ -25,17 +25,16 @@ class TableTagProcessor { * status: 2-空输入, 4-无匹配/已处理, 5-处理异常, 1-处理成功 */ public function dealTableStr($html = '', $aTableMain = []) { - // 1. 基础输入校验 + //验证 if (!is_string($html) || trim($html) === '') { return ['status' => 2, 'data' => '']; } - // 2. 超大字符串拦截(防止内存溢出) + //超大字符串拦截(防止内存溢出) if (strlen($html) > self::MAX_HTML_LENGTH) { - $this->logWarning('处理文本超出最大长度限制', ['length' => strlen($html)]); return ['status' => 4, 'data' => $html]; } - // 初始化主键映射数组(过滤非数字键/值,保证数据合法性) + //初始化主键映射数组 if(!empty($aTableMain)){ $aTableMainNew = []; foreach ($aTableMain as $key => $value) { @@ -52,10 +51,10 @@ class TableTagProcessor { $hasReplace = false; try { - // 核心:直接在原始HTML中匹配所有符合规则的Table(含嵌套标签) + //原始HTML中匹配所有符合规则的Table $html = $this->replaceTableInHtml($html, $hasReplace); - // 清理冗余内容(仅替换成功后执行,保证输出整洁) + // 清理冗余内容 if ($hasReplace) { $html = $this->cleanRedundantStyles($html); $html = $this->cleanRedundantPunctuation($html); @@ -65,19 +64,6 @@ class TableTagProcessor { } } catch (\Throwable $e) { - // 异常兜底:捕获所有异常,记录详细日志,返回原始文本避免业务中断 - $pregError = preg_last_error(); - $pregErrorMsg = $this->getPregErrorMsg($pregError); - $errorMsg = sprintf( - '[%s] TableTagProcessor-dealTableStr 异常:%s | 文件:%s | 行:%d | 入参MD5:%s | 正则错误:%s', - date('Y-m-d H:i:s'), - $e->getMessage(), - $e->getFile(), - $e->getLine(), - md5($originalHtml), - $pregErrorMsg - ); - $this->logError($errorMsg); return ['status' => 5, 'data' => $originalHtml]; } @@ -88,9 +74,7 @@ class TableTagProcessor { } /** - * 核心方法:直接在HTML中匹配并替换Table(支持嵌套标签) - * @param string $html - * @param bool $hasReplace 引用传递:标记是否有替换 + * 核心方法:直接在HTML中匹配并替换Table * @return string */ private function replaceTableInHtml($html, &$hasReplace) { @@ -112,12 +96,10 @@ class TableTagProcessor { } $primaryId = $this->aTableMain[$numInt]; - // 核心修改:规范mytable标签格式(属性值加双引号、去掉两侧空格) $baseTag = "<".self::PROCESSED_TAG." data-id=\"{$primaryId}\">Table {$num}"; $target = "({$baseTag}{$suffix})"; $hasReplace = true; - $this->logInfo("替换带括号Table成功", ['num' => $num, 'primary_id' => $primaryId]); return $target; }, $html); @@ -135,12 +117,10 @@ class TableTagProcessor { } $primaryId = $this->aTableMain[$numInt]; - // 核心修改:规范mytable标签格式(属性值加双引号、去掉两侧空格) $baseTag = "<".self::PROCESSED_TAG." data-id=\"{$primaryId}\">Table {$num}"; $target = "{$baseTag}{$suffix}"; $hasReplace = true; - $this->logInfo("替换无括号Table成功", ['num' => $num, 'primary_id' => $primaryId]); return $target; }, $html); @@ -168,7 +148,6 @@ class TableTagProcessor { * @return string */ private function cleanRedundantPunctuation($html) { - // 核心修改:适配新的mytable标签格式(data-id="数字") $html = preg_replace('/<'.self::PROCESSED_TAG.' data-id="(\d+)">\(Table \d+\)<\/'.self::PROCESSED_TAG.'>\)\./i', '<'.self::PROCESSED_TAG.' data-id="$1">(Table $1).', $html); $html = preg_replace('/<\/'.self::PROCESSED_TAG.'>\)\.([\.,:]{0,1})/', ')$1', $html); $html = preg_replace('/<\/'.self::PROCESSED_TAG.'>\.\)([\.,:]{0,1})/', ')$1', $html); @@ -270,47 +249,4 @@ class TableTagProcessor { return @preg_match($pattern, $content) === 1; } - /** - * 获取正则错误信息(便于调试) - * @param int $pregError 正则错误码 - * @return string - */ - private function getPregErrorMsg($pregError) { - $errorCodes = [ - PREG_INTERNAL_ERROR => '内部错误', - PREG_BACKTRACK_LIMIT_ERROR => '回溯限制超出', - PREG_RECURSION_LIMIT_ERROR => '递归限制超出', - PREG_BAD_UTF8_ERROR => '无效UTF-8字符', - PREG_BAD_UTF8_OFFSET_ERROR => 'UTF-8偏移量无效', - PREG_JIT_STACKLIMIT_ERROR => 'JIT栈限制超出' - ]; - return isset($errorCodes[$pregError]) ? $errorCodes[$pregError] : "未知错误({$pregError})"; - } - - /** - * 记录错误日志(生产环境可对接日志系统) - * @param string $msg - * @param array $context - */ - private function logError($msg, $context = []) { - error_log(json_encode(['level' => 'error', 'msg' => $msg, 'context' => $context, 'time' => date('Y-m-d H:i:s')])); - } - - /** - * 记录警告日志 - * @param string $msg - * @param array $context - */ - private function logWarning($msg, $context = []) { - error_log(json_encode(['level' => 'warning', 'msg' => $msg, 'context' => $context, 'time' => date('Y-m-d H:i:s')])); - } - - /** - * 记录信息日志 - * @param string $msg - * @param array $context - */ - private function logInfo($msg, $context = []) { - error_log(json_encode(['level' => 'info', 'msg' => $msg, 'context' => $context, 'time' => date('Y-m-d H:i:s')])); - } } \ No newline at end of file