From 2e5c0bb22301d7308d15fd4c7c0bcf72fd282652 Mon Sep 17 00:00:00 2001 From: chengxl Date: Wed, 14 Jan 2026 09:58:10 +0800 Subject: [PATCH 1/5] =?UTF-8?q?=E8=BF=87=E6=9C=9F=E7=94=B3=E8=AF=B7?= =?UTF-8?q?=E6=9C=89=E9=99=90=E5=8F=91=E9=80=81=E5=88=B0=E6=9C=9F=E5=88=8A?= =?UTF-8?q?=E9=82=AE=E7=AE=B1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- application/api/controller/Workbench.php | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/application/api/controller/Workbench.php b/application/api/controller/Workbench.php index 0d8bf45..5ccd66c 100644 --- a/application/api/controller/Workbench.php +++ b/application/api/controller/Workbench.php @@ -659,9 +659,6 @@ class Workbench extends Base return json_encode(['status' => 7,'msg' => 'The article is not in the review status']); } - //查询期刊信息 - $aWhere = ['journal_id' => $aArticle['article_id'],'state' => 0]; - $aJournal = Db::name('journal')->field('title as journal_name,website')->find(); //查询期刊信息 if(empty($aArticle['journal_id'])){ return json_encode(array('status' => 8,'msg' => 'The article is not associated with a journal' )); @@ -721,7 +718,8 @@ class Workbench extends Base //发邮件 //邮箱 - $email = empty($aUser[$iUserId]['email']) ? '' : $aUser[$iUserId]['email']; + // $email = empty($aUser[$iUserId]['email']) ? '' : $aUser[$iUserId]['email']; + $email = empty($aJournal['email']) ? '' : $aJournal['email']; if(empty($email)){ return json_encode(['status' => 8,'msg' => 'Edit email as empty']); } @@ -793,9 +791,6 @@ class Workbench extends Base return json_encode(['status' => 7,'msg' => 'The article is not in the review status']); } - //查询期刊信息 - $aWhere = ['journal_id' => $aArticle['article_id'],'state' => 0]; - $aJournal = Db::name('journal')->field('title as journal_name,website')->find(); //查询期刊信息 if(empty($aArticle['journal_id'])){ return json_encode(array('status' => 8,'msg' => 'The article is not associated with a journal' )); @@ -807,10 +802,10 @@ class Workbench extends Base } //判断编辑的操作权限 - $iEditorId = empty($aJournal['editor_id']) ? 0 : $aJournal['editor_id']; - if($iEditorId != $iUserId){ - return json_encode(array('status' => 10,'msg' => 'This article is not authorized for operation under the journal you are responsible for' )); - } + // $iEditorId = empty($aJournal['editor_id']) ? 0 : $aJournal['editor_id']; + // if($iEditorId != $iUserId){ + // return json_encode(array('status' => 10,'msg' => 'This article is not authorized for operation under the journal you are responsible for' )); + // } //更新文章状态为邀请 $aWhere = ['art_rev_id' => $iArtRevId,'state' => 4]; From 42978e1756d56aad1a99bdbb0806921c6fba2f63 Mon Sep 17 00:00:00 2001 From: chengxl Date: Sun, 18 Jan 2026 17:02:45 +0800 Subject: [PATCH 2/5] =?UTF-8?q?=E6=8E=92=E7=89=88=E7=9B=B8=E5=85=B3?= =?UTF-8?q?=E6=AD=A3=E6=96=87=E5=86=85=E5=AE=B9=E8=B0=83=E6=95=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- application/api/controller/Articlemain.php | 358 +++++++++++++++++++++ 1 file changed, 358 insertions(+) create mode 100644 application/api/controller/Articlemain.php diff --git a/application/api/controller/Articlemain.php b/application/api/controller/Articlemain.php new file mode 100644 index 0000000..f826868 --- /dev/null +++ b/application/api/controller/Articlemain.php @@ -0,0 +1,358 @@ +request->post(); + + //文章ID + $iArticleId = empty($aParam['article_id']) ? 0 : $aParam['article_id']; + if(empty($iArticleId)){ + return json_encode(array('status' => 2,'msg' => 'Please select an article' )); + } + //上传文件域 + $file = $this->request->file('file_name'); + if (empty($file)) { + return json_encode(array('status' => 2,'msg' => 'No uploaded file was obtained')); + } + //查询文章信息 + $aWhere = ['article_id' => $iArticleId]; + $aArticle = Db::name('article')->field('article_id')->where($aWhere)->find(); + if(empty($aArticle)){ + return json_encode(array('status' => 3,'msg' => 'The article does not exist' )); + } + //验证文件(类型/大小) + $validate = [ + 'size' => 1024000, // 限制1M以内 + // 'ext' => 'jpg,jpeg,png,gif' // 允许的后缀 + ]; + $fileValid = $file->validate($validate); + if (!$fileValid) { + $errorMsg = $file->getError(); + return json_encode(['status' => 3, 'msg' => 'Format/size validation failed:' . $errorMsg]); + } + + //图片名后缀 + $ext = $file->getExtension(); + if (empty($ext)) { + // 获取文件真实MIME类型(复制粘贴的图片也能识别) + $mime = $file->getMime(); + // MIME类型与后缀的映射表(覆盖常见图片类型) + $mimeExtMap = [ + // 基础图片格式 + 'image/jpeg' => 'jpg', + 'image/jpg' => 'jpg', // 兼容简写 + 'image/png' => 'png', + 'image/gif' => 'gif', + 'image/bmp' => 'bmp', + 'image/x-bmp' => 'bmp', // 兼容Windows bmp + // 扩展图片格式 + 'image/webp' => 'webp', // 网页常用 + 'image/tiff' => 'tiff', + 'image/x-tiff' => 'tiff', + 'image/svg+xml' => 'svg', // 矢量图 + 'image/heic' => 'heic', // 苹果图片格式 + 'image/heif' => 'heif', + 'image/avif' => 'avif', // 高效压缩图 + // 特殊图片格式 + 'image/x-icon' => 'ico', // 图标 + 'image/vnd.microsoft.icon' => 'ico', + ]; + $ext = $mimeExtMap[$mime] ?? ''; + } + $ext = empty($ext) ? 'png' : $ext; + //组装新的图片名 + $sFileName = md5(uniqid(mt_rand(), true)) . '.' . $ext; + + //保存文件 + $sImagePath = rtrim(ROOT_PATH,'/') . '/public' . DS . 'articleTableImage' . DS . $iArticleId; + if (!is_dir($sImagePath)) { + mkdir($sImagePath, 0777, true); + } + $sImageName = $fileValid->move($sImagePath,$sFileName); + if (!$sImageName) { + return json_encode(['status' => 4, 'msg' => $fileValid->getError()]); + } + + //返回图片路径 + return json_encode(['status' => 1, 'msg' => 'Upload successful','data' => $iArticleId .DS.$sFileName]); + } + + /** + * 获取正文内容里删除图片 + */ + public function removeMainImage(){ + + //获取参数 + $aParam = $this->request->post(); + + //文章ID + $iArticleId = empty($aParam['article_id']) ? 0 : $aParam['article_id']; + if(empty($iArticleId)){ + return json_encode(array('status' => 2,'msg' => 'Please select an article' )); + } + //图片ID + $iAmiId = empty($aParam['ami_id']) ? 0 : $aParam['ami_id']; + if(empty($iAmiId)){ + return json_encode(array('status' => 2,'msg' => 'Please select the image ID to query')); + } + //查询文章信息 + $aWhere = ['article_id' => $iArticleId]; + $aArticle = Db::name('article')->field('article_id')->where($aWhere)->find(); + if(empty($aArticle)){ + return json_encode(array('status' => 3,'msg' => 'The article does not exist' )); + } + //查询图片信息 + $aWhere = ['ami_id' => $iAmiId,'state' => 0,'article_id' => $iArticleId]; + $aMainImage = Db::name('article_main_image')->field('ami_id')->where($aWhere)->find(); + if(empty($aMainImage)){ + return json_encode(array('status' => 4,'msg' => 'Article image information does not exist' )); + } + //获取正文内容 + $aWhere = ['article_id' => $iArticleId,'type' => ['in',[0,1]],'state' => 0,'is_h1' => ['<>',1],'is_h2' => ['<>',1],'is_h3' => ['<>',1]]; + $aArticleMain = Db::name('article_main')->field('am_id,content,type,ami_id')->where($aWhere)->order('sort asc')->select(); + + $iBindImage = 2; + if(!empty($aArticleMain)){//验证正文内容是否绑定该图片 + //数据处理 + foreach ($aArticleMain as $key => $value) { + $sContent = empty($value['content']) ? '' : trim($value['content']); + if(empty($sContent)){ + continue; + } + if(!empty($iAmiId)){ + if($value['type'] == 1 && $value['ami_id'] == $iAmiId){ + $iBindImage = 1; + break; + } + if($value['type'] == 0){ + $result = $this->hasProcessedTagWithId($value['content'],$iAmiId); + if($result == 1){ + $iBindImage = 1; + break; + } + } + } + } + } + if($iBindImage == 1){ + return json_encode(array('status' => 5,'msg' => 'The main content has been bound to this image and cannot be deleted' )); + } + //删除图片 + $aWhere = ['ami_id' => $iAmiId]; + $aUpdate = ['state' => 1]; + $result = Db::name('article_main_image')->where($aWhere)->limit(1)->update($aUpdate); + if($result === false){ + return json_encode(array('status' => 6,'msg' => 'Image deletion failed' )); + } + //返回数据 + return json_encode(array('status' => 1,'msg' => 'Image deleted successfully')); + } + /** + * 获取正文内容里删除表格 + */ + public function removeMainTable(){ + + //获取参数 + $aParam = $this->request->post(); + + //文章ID + $iArticleId = empty($aParam['article_id']) ? 0 : $aParam['article_id']; + if(empty($iArticleId)){ + return json_encode(array('status' => 2,'msg' => 'Please select an article' )); + } + //表格ID + $iAmtId = empty($aParam['amt_id']) ? 0 : $aParam['amt_id']; + if(empty($iAmtId)){ + return json_encode(array('status' => 2,'msg' => 'Please select the table ID to query')); + } + //查询文章信息 + $aWhere = ['article_id' => $iArticleId]; + $aArticle = Db::name('article')->field('article_id')->where($aWhere)->find(); + if(empty($aArticle)){ + return json_encode(array('status' => 3,'msg' => 'The article does not exist' )); + } + //查询表格信息 + $aWhere = ['amt_id' => $iAmtId,'state' => 0,'article_id' => $iArticleId]; + $aMainTable = Db::name('article_main_table')->field('amt_id')->where($aWhere)->find(); + if(empty($aMainTable)){ + return json_encode(array('status' => 4,'msg' => 'Article table information does not exist' )); + } + + //获取正文内容 + $aWhere = ['article_id' => $iArticleId,'type' => ['in',[0,2]],'state' => 0,'is_h1' => ['<>',1],'is_h2' => ['<>',1],'is_h3' => ['<>',1]]; + $aArticleMain = Db::name('article_main')->field('am_id,content,type,amt_id')->where($aWhere)->order('sort asc')->select(); + //数据处理-验证正文是否绑定表格 + $iBindTable = 2; + if(!empty($aArticleMain)){ + foreach ($aArticleMain as $key => $value) { + $sContent = empty($value['content']) ? '' : trim($value['content']); + if(empty($sContent)){ + continue; + } + if($value['type'] == 2 && $value['amt_id'] == $iAmtId){ + $iBindTable = 1; + break; + } + if($value['type'] == 0){ + $result = $this->hasProcessedTagWithId($value['content'],$iAmtId,'mytable'); + if($result == 1){ + $iBindTable = 1; + break; + } + } + } + } + if($iBindTable == 1){ + return json_encode(array('status' => 5,'msg' => 'The main content is already bound to this table and cannot be deleted' )); + } + //删除表格 + $aWhere = ['amt_id' => $iAmtId]; + $aUpdate = ['state' => 1]; + $result = Db::name('article_main_table')->where($aWhere)->limit(1)->update($aUpdate); + if($result === false){ + return json_encode(array('status' => 6,'msg' => 'Table deletion failed' )); + } + //返回数据 + return json_encode(array('status' => 1,'msg' => 'Table deleted successfully')); + } + /** + * 验证是否存在 + */ + public function hasProcessedTagWithId($content = '', $primaryId = 0, $sLable = 'myfigure') { + if(empty($content) || empty($primaryId)){ + return 2; + } + $escapedTagName = preg_quote($sLable, '/'); + $escapedId = preg_quote($primaryId, '/'); + + // 优化后的正则表达式 + $pattern = "/<{$escapedTagName}\s+data-id\s*=\s*['\"]{$escapedId}['\"]\s*>(.*?)<\/{$escapedTagName}>/i"; + + // 执行匹配 + if (!preg_match($pattern, $content, $matches)) { + return 3; + } + return 1; + } + /** + * 获取文章关联的图片 + */ + public function getArticleMainImage(){ + //获取参数 + $aParam = $this->request->post(); + + //文章ID + $iArticleId = empty($aParam['article_id']) ? 0 : $aParam['article_id']; + if(empty($iArticleId)){ + return json_encode(array('status' => 2,'msg' => 'Please select an article' )); + } + //查询文章信息 + $aWhere = ['article_id' => $iArticleId]; + $aArticle = Db::name('article')->field('article_id')->where($aWhere)->find(); + if(empty($aArticle)){ + return json_encode(array('status' => 3,'msg' => 'The article does not exist' )); + } + //查询图片信息 + $aWhere = ['state' => 0,'article_id' => $iArticleId]; + $aMainImage = Db::name('article_main_image')->field('ami_id,title')->where($aWhere)->select(); + return json_encode(array('status' => 1,'msg' => 'success','data' => $aMainImage)); + } + /** + * 获取文章关联的表格 + */ + public function getArticleMainTable(){ + //获取参数 + $aParam = $this->request->post(); + + //文章ID + $iArticleId = empty($aParam['article_id']) ? 0 : $aParam['article_id']; + if(empty($iArticleId)){ + return json_encode(array('status' => 2,'msg' => 'Please select an article' )); + } + //查询文章信息 + $aWhere = ['article_id' => $iArticleId]; + $aArticle = Db::name('article')->field('article_id')->where($aWhere)->find(); + if(empty($aArticle)){ + return json_encode(array('status' => 3,'msg' => 'The article does not exist' )); + } + //查询表格信息 + $aWhere = ['state' => 0,'article_id' => $iArticleId]; + $aMainTable = Db::name('article_main_table')->field('amt_id,title')->where($aWhere)->order('amt_id asc')->select(); + return json_encode(array('status' => 1,'msg' => 'success','data' => $aMainTable)); + } + /** + * 获取文章关联图片的详细信息 + */ + public function getMainImageInfo(){ + //获取参数 + $aParam = $this->request->post(); + + //文章ID + $iArticleId = empty($aParam['article_id']) ? 0 : $aParam['article_id']; + if(empty($iArticleId)){ + return json_encode(array('status' => 2,'msg' => 'Please select an article' )); + } + //图片ID + $iAmiId = empty($aParam['ami_id']) ? 0 : $aParam['ami_id']; + if(empty($iAmiId)){ + return json_encode(array('status' => 2,'msg' => 'Please select the image ID to query')); + } + //查询文章信息 + $aWhere = ['article_id' => $iArticleId]; + $aArticle = Db::name('article')->field('article_id')->where($aWhere)->find(); + if(empty($aArticle)){ + return json_encode(array('status' => 3,'msg' => 'The article does not exist' )); + } + //查询图片信息 + $aWhere = ['state' => 0,'article_id' => $iArticleId,'ami_id' => $iAmiId]; + $aMainImage = Db::name('article_main_image')->field('ami_id,title,url,note')->where($aWhere)->find(); + return json_encode(array('status' => 1,'msg' => 'success','data' => $aMainImage)); + } + /** + * 获取文章关联表格的详细信息 + */ + public function getMainTableInfo(){ + //获取参数 + $aParam = $this->request->post(); + + //文章ID + $iArticleId = empty($aParam['article_id']) ? 0 : $aParam['article_id']; + if(empty($iArticleId)){ + return json_encode(array('status' => 2,'msg' => 'Please select an article' )); + } + //表格ID + $iAmtId = empty($aParam['amt_id']) ? 0 : $aParam['amt_id']; + if(empty($iAmtId)){ + return json_encode(array('status' => 2,'msg' => 'Please select the table ID to query')); + } + //查询文章信息 + $aWhere = ['article_id' => $iArticleId]; + $aArticle = Db::name('article')->field('article_id')->where($aWhere)->find(); + if(empty($aArticle)){ + return json_encode(array('status' => 3,'msg' => 'The article does not exist' )); + } + //查询表格信息 + $aWhere = ['state' => 0,'article_id' => $iArticleId,'amt_id' => $iAmtId]; + $aMainTable = Db::name('article_main_table')->field('amt_id,type,table_data,url,title,note')->where($aWhere)->find(); + return json_encode(array('status' => 1,'msg' => 'success','data' => $aMainTable)); + } +} From bf0208b32a0092cc1a968323a705c85b9a3a6f25 Mon Sep 17 00:00:00 2001 From: chengxl Date: Sun, 18 Jan 2026 17:06:16 +0800 Subject: [PATCH 3/5] =?UTF-8?q?=E5=A4=84=E7=90=86=E6=AD=A3=E6=96=87?= =?UTF-8?q?=E5=86=85=E5=AE=B9=E8=A1=A8=E6=A0=BC/=E5=9B=BE=E7=89=87?= =?UTF-8?q?=E7=9B=B8=E5=85=B3=E8=81=94?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- application/api/controller/Production.php | 68 +++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/application/api/controller/Production.php b/application/api/controller/Production.php index 6d5d5b2..4f3b171 100644 --- a/application/api/controller/Production.php +++ b/application/api/controller/Production.php @@ -3803,4 +3803,72 @@ class Production extends Base $sContent .= $sReferences; return ['status' => 1,'msg' => 'success','data' => ['references' => $sContent,'references_list' => $aReferencesLists]]; } + /** + * 生成初稿-处理正文内容表格/图片相关联 + * @param p_article_id 生产环境文章信息 + */ + public function dealMainFigureOrTable(){ + + //获取参数 + $aParam = empty($aParam) ? $this->request->post() : $aParam; + + //必填值验证 + $iArticleId = empty($aParam['article_id']) ? '' : $aParam['article_id']; + if(empty($iArticleId)){ + return json_encode(array('status' => 2,'msg' => 'Please select an article' )); + } + //查询内容 + $aWhere = ['article_id' => $iArticleId,'type' => 0,'state' => 0,'is_h1' => ['<>',1],'is_h2' => ['<>',1],'is_h3' => ['<>',1]]; + $aTextMain = Db::name('article_main')->field('am_id,content')->where($aWhere)->order('sort asc')->select(); + if(empty($aTextMain)){ + return json_encode(array('status' => 3,'msg' => 'The content is empty' )); + } + //查询图片信息 + $aWhere = ['article_id' => $iArticleId,'type' => 1,'state' => 0]; + $aImageMain = Db::name('article_main')->where($aWhere)->order('sort asc')->column('ami_id'); + //查询图片信息 + $aWhere = ['article_id' => $iArticleId,'type' => 2,'state' => 0]; + $aTableMain = Db::name('article_main')->where($aWhere)->order('sort asc')->column('amt_id'); + + //数据处理 + $aUpdate = []; + $oFigureTagProcessor = new \app\common\FigureTagProcessor; + $oTableTagProcessor = new \app\common\TableTagProcessor; + foreach ($aTextMain as $key => $value) { + $sContent = empty($value['content']) ? '' : trim($value['content']); + if(empty($sContent)){ + continue; + } + //处理图片 + $aFigureContent = $oFigureTagProcessor->dealFigureStr($sContent,$aImageMain); + $iStatus = empty($aFigureContent['status']) ? 0 : $aFigureContent['status']; + $sData = empty($aFigureContent['data']) ? '' : $aFigureContent['data']; + // var_dump($aFigureContent); + if($iStatus != 1){ + $sData = $sContent; + }else{ + $aUpdate[$value['am_id']] = ['am_id' => $value['am_id'],'content' => $sData]; + } + //处理表格 + $aTableContent = $oTableTagProcessor->dealTableStr($sData,$aTableMain); + $iStatus = empty($aTableContent['status']) ? 0 : $aTableContent['status']; + $sData = empty($aTableContent['data']) ? $sContent : $aTableContent['data']; + if($iStatus != 1){ + continue; + } + $aUpdate[$value['am_id']] = ['am_id' => $value['am_id'],'content' => $sData]; + } + //批量更新入库 + if(empty($aUpdate)){ + return json_encode(array('status' => 3,'msg' => 'No table or image data to be processed was found' )); + } + + $oArticleMain = new \app\common\ArticleMain(); + $result = $oArticleMain->saveAll($aUpdate); + if ($result === false) { + return json_encode(array('status' => 4,'msg' => 'Operation failed'.json_encode($aUpdate))); + } + + return json_encode(array('status' => 1,'msg' => 'success')); + } } From 4704b61448d6603c91cee058ced40bd309980a15 Mon Sep 17 00:00:00 2001 From: chengxl Date: Sun, 18 Jan 2026 17:07:57 +0800 Subject: [PATCH 4/5] =?UTF-8?q?=E5=A4=84=E7=90=86=E6=AD=A3=E6=96=87?= =?UTF-8?q?=E5=86=85=E5=AE=B9=E8=A1=A8=E6=A0=BC/=E5=9B=BE=E7=89=87?= =?UTF-8?q?=E7=9B=B8=E5=85=B3=E8=81=94?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- application/common/ArticleMain.php | 13 + application/common/FigureTagProcessor.php | 292 ++++++++++++++++++++ application/common/TableTagProcessor.php | 316 ++++++++++++++++++++++ 3 files changed, 621 insertions(+) create mode 100644 application/common/ArticleMain.php create mode 100644 application/common/FigureTagProcessor.php create mode 100644 application/common/TableTagProcessor.php diff --git a/application/common/ArticleMain.php b/application/common/ArticleMain.php new file mode 100644 index 0000000..6d52449 --- /dev/null +++ b/application/common/ArticleMain.php @@ -0,0 +1,13 @@ +状态码, 'data'=>处理后文本] + * status: 2-空输入, 4-无匹配, 5-处理异常, 1-处理成功 + */ + public function dealFigureStr($html = '') { + // 1. 基础输入校验 + if (!is_string($html) || trim($html) === '') { + return ['status' => 2, 'data' => '']; + } + // 2. 超大字符串拦截 + if (strlen($html) > self::MAX_HTML_LENGTH) { + return ['status' => 4, 'data' => $html]; + } + + $originalHtml = $html; + $hasReplace = false; + + try { + // 3. 合并嵌套样式标签 + $mergedHtml = $this->mergeFragmentStyleTags($html); + // 4. 提取纯文本(用于匹配Figure) + $plainText = preg_replace('/<[^>]+>/', ' ', $mergedHtml); + $plainText = preg_replace('/\s+/', ' ', trim($plainText)); + + // 5. 提取所有匹配的Figure数字 + $allMatches = $this->extractAllFigureMatches($plainText); + if (empty($allMatches)) { + return ['status' => 4, 'data' => $originalHtml]; + } + + // 6. 替换为myfigure标签 + $html = $this->replaceFigureWithTag($html, $allMatches, $hasReplace); + + // 7. 清理冗余内容(仅替换成功后执行) + if ($hasReplace) { + $html = $this->cleanRedundantStyles($html); + $html = $this->cleanRedundantPunctuation($html); + $html = $this->cleanUnclosedTags($html); + $html = $this->optimizeFormat($html); + } + + } catch (\Throwable $e) { + // 8. 异常处理(记录详细日志) + $errorMsg = sprintf( + '[%s] FigureTagProcessor-dealFigureStr 异常:%s | 文件:%s | 行:%d | 入参MD5:%s | 正则错误:%s', + date('Y-m-d H:i:s'), + $e->getMessage(), + $e->getFile(), + $e->getLine(), + md5($originalHtml), + preg_last_error() ? preg_last_error_msg() : '无' + ); + error_log($errorMsg); + return ['status' => 5, 'data' => $originalHtml]; + } + + return [ + 'status' => $hasReplace ? 1 : 4, + 'data' => $hasReplace ? $html : $originalHtml + ]; + } + + /** + * 合并嵌套的样式标签(如aaabbb → aaa bbb) + * @param string $html + * @return string + */ + private function mergeFragmentStyleTags($html) { + foreach (self::STYLE_TAGS as $tag) { + $pattern = '/(?:<' . $tag . '>)\s*([^<]+?)\s*<\/' . $tag . '>(?:\s*<' . $tag . '>)\s*([^<]+?)\s*<\/' . $tag . '>/is'; + while (@preg_match($pattern, $html)) { // 抑制正则警告 + $html = preg_replace_callback($pattern, function($matches) { + return trim($matches[1]) . ' ' . trim($matches[2]); + }, $html); + } + } + + // 清理括号内的冗余标点/标签 + $html = preg_replace('/(\(.*?\d+)(?:\s*<[^>]+>)*\s*\.*\s*(?:<[^>]+>)*(\s*.*?\))/is', '$1$2', $html); + $html = preg_replace('/\(\s+/', '(', $html); + $html = preg_replace('/\s+\)/', ')', $html); + return $html; + } + + /** + * 从纯文本中提取所有Figure数字(兼容括号/标点/空格) + * @param string $plainText + * @return array + */ + private function extractAllFigureMatches($plainText) { + $allMatches = []; + $processedNums = []; + + // 匹配带括号的Figure(如 (Figure 1.)) + $pattern1 = '/\(Figure\s*(\d+)\b(?!\p{L}|\s+\p{L})(?:\s*[\.,;:]*\s*)\)\s*([\.,:]{0,1})/iu'; + if (@preg_match_all($pattern1, $plainText, $matchesFull, PREG_SET_ORDER)) { + foreach ($matchesFull as $match) { + $num = $match[1]; + if (!ctype_digit($num) || in_array($num, $processedNums)) continue; + $processedNums[] = $num; + $allMatches[$num] = [ + 'hasOuterBracket' => true, + 'validPunct' => $match[2] ?? '', + 'content' => "Figure {$num}" + ]; + } + } + + // 匹配无括号的Figure(如 Figure 1.) + $pattern2 = '/Figure\s*(\d+)\b(?!\p{L}|\s+\p{L})(?:\s*[\.,;:]*\s*)\s*([\.,:]{0,1})/iu'; + if (@preg_match_all($pattern2, $plainText, $matchesOther, PREG_SET_ORDER)) { + foreach ($matchesOther as $match) { + $num = $match[1]; + if (!ctype_digit($num) || in_array($num, $processedNums)) continue; + $processedNums[] = $num; + $allMatches[$num] = [ + 'hasOuterBracket' => false, + 'validPunct' => $match[2] ?? '', + 'content' => "Figure {$num}" + ]; + } + } + + krsort($allMatches); + return $allMatches; + } + + /** + * 将匹配的Figure替换为myfigure标签(优化标签格式) + * @param string $html + * @param array $allMatches + * @param bool $hasReplace + * @return string + */ + private function replaceFigureWithTag($html, $allMatches, &$hasReplace) { + foreach ($allMatches as $num => $info) { + $innerContent = $info['hasOuterBracket'] + ? "({$info['content']})" + : $info['content']; + + // 核心修改:规范myfigure标签格式(去掉属性值空格、加双引号) + // 最终生成:Figure 1 + $targetTag = "{$innerContent}"; + if (!empty($info['validPunct']) && !$info['hasOuterBracket']) { + $targetTag .= $info['validPunct']; + } + + $patternSuffix = '(?!\p{L}|\s+\p{L})'; + $pattern = $info['hasOuterBracket'] + ? '/\(\s*(?:<[^>]+>|\s)*Figure(?:<[^>]+>|\s)*' . $num . '\b' . $patternSuffix . '(?:\s*[\.,;:]*\s*|\s*<[^>]+>)*\s*\)/iu' + : '/\s*(?:<[^>]+>|\s)*Figure(?:<[^>]+>|\s)*' . $num . '\b' . $patternSuffix . '(?:\s*[\.,;:]*\s*|\s*<[^>]+>)*\s*([\.,:]{0,1})/iu'; + + // 执行替换(最多替换1次,避免重复) + $html = @preg_replace($pattern, $targetTag, $html, 1, $count); + if ($count > 0) { + $hasReplace = true; + error_log("[FigureTagProcessor] 替换成功 - ID:{$num} 括号:".($info['hasOuterBracket']?'是':'否')); + } + } + return $html; + } + + /** + * 清理myfigure标签周围的冗余样式标签(适配新标签格式) + * @param string $html + * @return string + */ + private function cleanRedundantStyles($html) { + foreach (self::STYLE_TAGS as $tag) { + // 修改正则:适配 data-id="数字" 的格式 + $pattern = '/<' . $tag . '>\s*]*)>(.*?)<\/myfigure>([\.,:]{0,1})\s*<\/' . $tag . '>/is'; + $html = @preg_replace($pattern, '$2$3', $html); + } + // 清理孤立的样式闭标签 + $html = preg_replace('/<\/('.implode('|', self::STYLE_TAGS).')>(?![^<]*<\1>)/is', '', $html); + return $html; + } + + /** + * 清理myfigure标签后的冗余标点(适配新标签格式) + * @param string $html + * @return string + */ + private function cleanRedundantPunctuation($html) { + // 修改正则:将 data-id = (\d+) 改为 data-id="(\d+)",适配新格式 + $html = preg_replace('/\(Figure \d+\)<\/myfigure>\)\./i', '(Figure $1).', $html); + $html = preg_replace('/<\/myfigure>\)\.([\.,:]{0,1})/', ')$1', $html); + $html = preg_replace('/<\/myfigure>\.\)([\.,:]{0,1})/', ')$1', $html); + $html = preg_replace('/<\/myfigure>([\.,:]){2,}/', '$1', $html); + // 同步修改此处正则的属性格式 + $html = preg_replace('/\((Figure \d+)\s*<\/myfigure>([\.,:]{0,1})/i', + '($2)$3', $html); + return $html; + } + + /** + * 清理孤立的样式标签(优先暴力清理myfigure后标签,再用栈算法兜底) + * @param string $html + * @return string + */ + private function cleanUnclosedTags($html) { + // 第一步:暴力清理myfigure后孤立的样式闭标签 + foreach (self::STYLE_TAGS as $tag) { + $html = @preg_replace('/(<\/myfigure>)\s*<\/' . $tag . '>/i', '$1', $html); + } + + // 第二步:栈算法清理其他孤立标签 + foreach (self::STYLE_TAGS as $tag) { + @preg_match_all("/<{$tag}\b[^>]*>/i", $html, $openMatches, PREG_OFFSET_CAPTURE); + @preg_match_all("/<\/{$tag}>/i", $html, $closeMatches, PREG_OFFSET_CAPTURE); + + $allTags = []; + foreach ($openMatches[0] as $m) { + $allTags[] = [ + 'offset' => $m[1], + 'type' => 'open', + 'content' => $m[0], + 'length' => strlen($m[0]) + ]; + } + foreach ($closeMatches[0] as $m) { + $allTags[] = [ + 'offset' => $m[1], + 'type' => 'close', + 'content' => $m[0], + 'length' => strlen($m[0]) + ]; + } + usort($allTags, function($a, $b) { + return $a['offset'] - $b['offset']; + }); + + $tagStack = []; + $removeOffsets = []; + foreach ($allTags as $t) { + if ($t['type'] == 'open') { + array_push($tagStack, $t); + } else { + if (!empty($tagStack)) { + array_pop($tagStack); + } else { + $removeOffsets[] = [ + 'pos' => $t['offset'], + 'len' => $t['length'], + 'content' => $t['content'] + ]; + } + } + } + foreach ($tagStack as $t) { + $removeOffsets[] = [ + 'pos' => $t['offset'], + 'len' => $t['length'], + 'content' => $t['content'] + ]; + } + + // 倒序删除,避免偏移错乱 + usort($removeOffsets, function($a, $b) { + return $b['pos'] - $a['pos']; + }); + foreach ($removeOffsets as $item) { + if ($item['pos'] >= 0 && $item['pos'] < strlen($html)) { + $html = substr_replace($html, '', $item['pos'], $item['len']); + } + } + } + return $html; + } + + /** + * 优化文本格式(合并多余空格,规范myfigure标签前后空格) + * @param string $html + * @return string + */ + private function optimizeFormat($html) { + $html = preg_replace('/\s{2,}/', ' ', trim($html)); + $html = preg_replace('/<\/myfigure>([A-Za-z0-9])/is', ' $1', $html); + $html = preg_replace('/([a-zA-Z0-9])1001, 2=>1002]) + * @return array ['status'=>状态码, 'data'=>处理后文本] + * status: 2-空输入, 4-无匹配/已处理, 5-处理异常, 1-处理成功 + */ + public function dealTableStr($html = '', $aTableMain = []) { + // 1. 基础输入校验 + if (!is_string($html) || trim($html) === '') { + return ['status' => 2, 'data' => '']; + } + // 2. 超大字符串拦截(防止内存溢出) + if (strlen($html) > self::MAX_HTML_LENGTH) { + $this->logWarning('处理文本超出最大长度限制', ['length' => strlen($html)]); + return ['status' => 4, 'data' => $html]; + } + + // 初始化主键映射数组(过滤非数字键/值,保证数据合法性) + if(!empty($aTableMain)){ + $aTableMainNew = []; + foreach ($aTableMain as $key => $value) { + if (!ctype_digit((string)$key) || !ctype_digit((string)$value)) { + continue; + } + $keyInt = (int)$key; + $aTableMainNew[$keyInt + 1] = $value; + } + $this->aTableMain = $aTableMainNew; + } + + $originalHtml = $html; + $hasReplace = false; + + try { + // 核心:直接在原始HTML中匹配所有符合规则的Table(含嵌套标签) + $html = $this->replaceTableInHtml($html, $hasReplace); + + // 清理冗余内容(仅替换成功后执行,保证输出整洁) + if ($hasReplace) { + $html = $this->cleanRedundantStyles($html); + $html = $this->cleanRedundantPunctuation($html); + $html = $this->cleanUnclosedTags($html); + $html = $this->optimizeFormat($html); + $html = $this->cleanDuplicateNestedTags($html); + } + + } catch (\Throwable $e) { + // 异常兜底:捕获所有异常,记录详细日志,返回原始文本避免业务中断 + $pregError = preg_last_error(); + $pregErrorMsg = $this->getPregErrorMsg($pregError); + $errorMsg = sprintf( + '[%s] TableTagProcessor-dealTableStr 异常:%s | 文件:%s | 行:%d | 入参MD5:%s | 正则错误:%s', + date('Y-m-d H:i:s'), + $e->getMessage(), + $e->getFile(), + $e->getLine(), + md5($originalHtml), + $pregErrorMsg + ); + $this->logError($errorMsg); + return ['status' => 5, 'data' => $originalHtml]; + } + + return [ + 'status' => $hasReplace ? 1 : 4, + 'data' => $html + ]; + } + + /** + * 核心方法:直接在HTML中匹配并替换Table(支持嵌套标签) + * @param string $html + * @param bool $hasReplace 引用传递:标记是否有替换 + * @return string + */ + private function replaceTableInHtml($html, &$hasReplace) { + $styleTagsPattern = implode('|', self::STYLE_TAGS); + $styleTagsRegex = "(?:<(?:{$styleTagsPattern})[^>]*>)*\s*"; // 匹配任意嵌套样式标签 + $styleTagsCloseRegex = "\s*(?:<\/(?:{$styleTagsPattern})>)*"; + + // 规则1:匹配带括号的Table(如 (Table 82)、(Table 1.)) + $pattern1 = "/\(\s*{$styleTagsRegex}table\s*(\d+)\s*([\.,:]{0,1}){$styleTagsCloseRegex}\s*\)/iu"; + $html = preg_replace_callback($pattern1, function($matches) use (&$hasReplace) { + $num = $matches[1]; + $numInt = intval($num); + $suffix = $matches[2] ?? ''; + + // 校验:纯数字 + 有映射ID + 未被mytable包裹(避免重复替换) + if (!ctype_digit($num) || !isset($this->aTableMain[$numInt]) || + $this->isMatchPositionHasMyTableTag($matches[0], "Table {$num}")) { + return $matches[0]; + } + + $primaryId = $this->aTableMain[$numInt]; + // 核心修改:规范mytable标签格式(属性值加双引号、去掉两侧空格) + $baseTag = "<".self::PROCESSED_TAG." data-id=\"{$primaryId}\">Table {$num}"; + $target = "({$baseTag}{$suffix})"; + + $hasReplace = true; + $this->logInfo("替换带括号Table成功", ['num' => $num, 'primary_id' => $primaryId]); + return $target; + }, $html); + + // 规则2:匹配无括号的Table(如 Table 1、Table 2:Table 3.) + $pattern2 = "/{$styleTagsRegex}table\s*(\d+)\s*([\.,:]{0,1}){$styleTagsCloseRegex}(?![a-zA-Z])/iu"; + $html = preg_replace_callback($pattern2, function($matches) use (&$hasReplace) { + $num = $matches[1]; + $numInt = intval($num); + $suffix = $matches[2] ?? ''; + + // 校验:纯数字 + 有映射ID + 未被mytable包裹 + 不是数字+字母组合 + if (!ctype_digit($num) || !isset($this->aTableMain[$numInt]) || + $this->isMatchPositionHasMyTableTag($matches[0], "Table {$num}")) { + return $matches[0]; + } + + $primaryId = $this->aTableMain[$numInt]; + // 核心修改:规范mytable标签格式(属性值加双引号、去掉两侧空格) + $baseTag = "<".self::PROCESSED_TAG." data-id=\"{$primaryId}\">Table {$num}"; + $target = "{$baseTag}{$suffix}"; + + $hasReplace = true; + $this->logInfo("替换无括号Table成功", ['num' => $num, 'primary_id' => $primaryId]); + return $target; + }, $html); + + return $html; + } + + /** + * 清理mytable标签周围的冗余样式标签 + * @param string $html + * @return string + */ + private function cleanRedundantStyles($html) { + foreach (self::STYLE_TAGS as $tag) { + $pattern = '/<' . $tag . '>\s*<'.self::PROCESSED_TAG.'([^>]*?)>(.*?)<\/'.self::PROCESSED_TAG.'>([\.,:]{0,1})\s*<\/' . $tag . '>/is'; + $html = @preg_replace($pattern, '<'.self::PROCESSED_TAG.'$1>$2$3', $html); + } + // 清理孤立的样式闭标签(避免标签残留) + $html = preg_replace('/<\/('.implode('|', self::STYLE_TAGS).')>(?![^<]*<\1>)/is', '', $html); + return $html; + } + + /** + * 清理mytable标签后的冗余标点(保证格式整洁) + * @param string $html + * @return string + */ + private function cleanRedundantPunctuation($html) { + // 核心修改:适配新的mytable标签格式(data-id="数字") + $html = preg_replace('/<'.self::PROCESSED_TAG.' data-id="(\d+)">\(Table \d+\)<\/'.self::PROCESSED_TAG.'>\)\./i', '<'.self::PROCESSED_TAG.' data-id="$1">(Table $1).', $html); + $html = preg_replace('/<\/'.self::PROCESSED_TAG.'>\)\.([\.,:]{0,1})/', ')$1', $html); + $html = preg_replace('/<\/'.self::PROCESSED_TAG.'>\.\)([\.,:]{0,1})/', ')$1', $html); + $html = preg_replace('/<\/'.self::PROCESSED_TAG.'>([\.,:]){2,}/', '$1', $html); + $html = preg_replace('/<'.self::PROCESSED_TAG.' data-id="(\d+)">\((Table \d+)\s*<\/'.self::PROCESSED_TAG.'>([\.,:]{0,1})/i', + '<'.self::PROCESSED_TAG.' data-id="$1">($2)$3', $html); + return $html; + } + + /** + * 清理孤立的样式标签(栈算法兜底,避免标签不闭合) + * @param string $html + * @return string + */ + private function cleanUnclosedTags($html) { + // 清理mytable后孤立的样式闭标签 + foreach (self::STYLE_TAGS as $tag) { + $html = @preg_replace('/(<\/'.self::PROCESSED_TAG.'>)\s*<\/' . $tag . '>/i', '$1', $html); + } + + // 栈算法清理其他孤立标签 + foreach (self::STYLE_TAGS as $tag) { + @preg_match_all("/<{$tag}\b[^>]*>/i", $html, $openMatches, PREG_OFFSET_CAPTURE); + @preg_match_all("/<\/{$tag}>/i", $html, $closeMatches, PREG_OFFSET_CAPTURE); + + $allTags = []; + foreach ($openMatches[0] as $m) { + $allTags[] = ['offset' => $m[1], 'type' => 'open', 'content' => $m[0], 'length' => strlen($m[0])]; + } + foreach ($closeMatches[0] as $m) { + $allTags[] = ['offset' => $m[1], 'type' => 'close', 'content' => $m[0], 'length' => strlen($m[0])]; + } + usort($allTags, function($a, $b) { + return $a['offset'] - $b['offset']; + }); + + $tagStack = []; + $removeOffsets = []; + foreach ($allTags as $t) { + if ($t['type'] == 'open') { + array_push($tagStack, $t); + } else { + if (!empty($tagStack)) { + array_pop($tagStack); + } else { + $removeOffsets[] = $t; + } + } + } + foreach ($tagStack as $t) { + $removeOffsets[] = $t; + } + + // 倒序删除,避免偏移错乱 + usort($removeOffsets, function($a, $b) { + return $b['offset'] - $a['offset']; + }); + foreach ($removeOffsets as $item) { + if ($item['offset'] >= 0 && $item['offset'] < strlen($html)) { + $html = substr_replace($html, '', $item['offset'], $item['length']); + } + } + } + return $html; + } + + /** + * 优化文本格式(合并多余空格,规范标签前后空格) + * @param string $html + * @return string + */ + private function optimizeFormat($html) { + $html = preg_replace('/\s{2,}/', ' ', trim($html)); + $html = preg_replace('/<\/'.self::PROCESSED_TAG.'>([A-Za-z0-9])/is', ' $1', $html); + $html = preg_replace('/([a-zA-Z0-9])<'.self::PROCESSED_TAG.'/is', '$1 <'.self::PROCESSED_TAG.'', $html); + return $html; + } + + /** + * 清理重复嵌套的mytable标签(兜底方案) + * @param string $html + * @return string + */ + private function cleanDuplicateNestedTags($html) { + $pattern = '/<'.self::PROCESSED_TAG.'[^>]*>\s*<'.self::PROCESSED_TAG.'([^>]*)>(.*?)<\/'.self::PROCESSED_TAG.'>\s*<\/'.self::PROCESSED_TAG.'>/is'; + $html = preg_replace($pattern, '<'.self::PROCESSED_TAG.'$1>$2', $html); + return $html; + } + + /** + * 判断指定Table内容是否被mytable标签包裹 + * @param string $content 待检查内容 + * @param string $tableText Table文本(如 "Table 1") + * @return bool + */ + private function isMatchPositionHasMyTableTag($content, $tableText) { + $escapedText = preg_quote($tableText, '/'); + $pattern = '/<' . self::PROCESSED_TAG . '[^>]*>\s*' . $escapedText . '\s*<\/' . self::PROCESSED_TAG . '>/is'; + return @preg_match($pattern, $content) === 1; + } + + /** + * 获取正则错误信息(便于调试) + * @param int $pregError 正则错误码 + * @return string + */ + private function getPregErrorMsg($pregError) { + $errorCodes = [ + PREG_INTERNAL_ERROR => '内部错误', + PREG_BACKTRACK_LIMIT_ERROR => '回溯限制超出', + PREG_RECURSION_LIMIT_ERROR => '递归限制超出', + PREG_BAD_UTF8_ERROR => '无效UTF-8字符', + PREG_BAD_UTF8_OFFSET_ERROR => 'UTF-8偏移量无效', + PREG_JIT_STACKLIMIT_ERROR => 'JIT栈限制超出' + ]; + return isset($errorCodes[$pregError]) ? $errorCodes[$pregError] : "未知错误({$pregError})"; + } + + /** + * 记录错误日志(生产环境可对接日志系统) + * @param string $msg + * @param array $context + */ + private function logError($msg, $context = []) { + error_log(json_encode(['level' => 'error', 'msg' => $msg, 'context' => $context, 'time' => date('Y-m-d H:i:s')])); + } + + /** + * 记录警告日志 + * @param string $msg + * @param array $context + */ + private function logWarning($msg, $context = []) { + error_log(json_encode(['level' => 'warning', 'msg' => $msg, 'context' => $context, 'time' => date('Y-m-d H:i:s')])); + } + + /** + * 记录信息日志 + * @param string $msg + * @param array $context + */ + private function logInfo($msg, $context = []) { + error_log(json_encode(['level' => 'info', 'msg' => $msg, 'context' => $context, 'time' => date('Y-m-d H:i:s')])); + } +} \ No newline at end of file From b217ab03fd29aa089c5aed68f669295f20af4e4d Mon Sep 17 00:00:00 2001 From: chengxl Date: Sun, 18 Jan 2026 17:13:58 +0800 Subject: [PATCH 5/5] =?UTF-8?q?=E5=A4=84=E7=90=86=E6=AD=A3=E6=96=87?= =?UTF-8?q?=E5=86=85=E5=AE=B9=E8=A1=A8=E6=A0=BC/=E5=9B=BE=E7=89=87?= =?UTF-8?q?=E7=9B=B8=E5=85=B3=E8=81=94?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- application/common/FigureTagProcessor.php | 42 ++++--------- application/common/TableTagProcessor.php | 76 ++--------------------- 2 files changed, 18 insertions(+), 100 deletions(-) diff --git a/application/common/FigureTagProcessor.php b/application/common/FigureTagProcessor.php index ed6a5c8..97d53d4 100644 --- a/application/common/FigureTagProcessor.php +++ b/application/common/FigureTagProcessor.php @@ -13,11 +13,11 @@ class FigureTagProcessor { * status: 2-空输入, 4-无匹配, 5-处理异常, 1-处理成功 */ public function dealFigureStr($html = '') { - // 1. 基础输入校验 + //验证 if (!is_string($html) || trim($html) === '') { return ['status' => 2, 'data' => '']; } - // 2. 超大字符串拦截 + //超大字符串拦截 if (strlen($html) > self::MAX_HTML_LENGTH) { return ['status' => 4, 'data' => $html]; } @@ -26,22 +26,22 @@ class FigureTagProcessor { $hasReplace = false; try { - // 3. 合并嵌套样式标签 + //合并嵌套样式标签 $mergedHtml = $this->mergeFragmentStyleTags($html); - // 4. 提取纯文本(用于匹配Figure) + //提取纯文本(用于匹配Figure) $plainText = preg_replace('/<[^>]+>/', ' ', $mergedHtml); $plainText = preg_replace('/\s+/', ' ', trim($plainText)); - // 5. 提取所有匹配的Figure数字 + //提取所有匹配的Figure数字 $allMatches = $this->extractAllFigureMatches($plainText); if (empty($allMatches)) { return ['status' => 4, 'data' => $originalHtml]; } - // 6. 替换为myfigure标签 + //替换为myfigure标签 $html = $this->replaceFigureWithTag($html, $allMatches, $hasReplace); - // 7. 清理冗余内容(仅替换成功后执行) + //清理冗余内容(仅替换成功后执行) if ($hasReplace) { $html = $this->cleanRedundantStyles($html); $html = $this->cleanRedundantPunctuation($html); @@ -50,17 +50,6 @@ class FigureTagProcessor { } } catch (\Throwable $e) { - // 8. 异常处理(记录详细日志) - $errorMsg = sprintf( - '[%s] FigureTagProcessor-dealFigureStr 异常:%s | 文件:%s | 行:%d | 入参MD5:%s | 正则错误:%s', - date('Y-m-d H:i:s'), - $e->getMessage(), - $e->getFile(), - $e->getLine(), - md5($originalHtml), - preg_last_error() ? preg_last_error_msg() : '无' - ); - error_log($errorMsg); return ['status' => 5, 'data' => $originalHtml]; } @@ -71,7 +60,7 @@ class FigureTagProcessor { } /** - * 合并嵌套的样式标签(如aaabbb → aaa bbb) + * 合并嵌套的样式标签 * @param string $html * @return string */ @@ -148,8 +137,7 @@ class FigureTagProcessor { ? "({$info['content']})" : $info['content']; - // 核心修改:规范myfigure标签格式(去掉属性值空格、加双引号) - // 最终生成:Figure 1 + //Figure 1 $targetTag = "{$innerContent}"; if (!empty($info['validPunct']) && !$info['hasOuterBracket']) { $targetTag .= $info['validPunct']; @@ -160,7 +148,7 @@ class FigureTagProcessor { ? '/\(\s*(?:<[^>]+>|\s)*Figure(?:<[^>]+>|\s)*' . $num . '\b' . $patternSuffix . '(?:\s*[\.,;:]*\s*|\s*<[^>]+>)*\s*\)/iu' : '/\s*(?:<[^>]+>|\s)*Figure(?:<[^>]+>|\s)*' . $num . '\b' . $patternSuffix . '(?:\s*[\.,;:]*\s*|\s*<[^>]+>)*\s*([\.,:]{0,1})/iu'; - // 执行替换(最多替换1次,避免重复) + //执行替换(最多替换1次,避免重复) $html = @preg_replace($pattern, $targetTag, $html, 1, $count); if ($count > 0) { $hasReplace = true; @@ -177,11 +165,10 @@ class FigureTagProcessor { */ private function cleanRedundantStyles($html) { foreach (self::STYLE_TAGS as $tag) { - // 修改正则:适配 data-id="数字" 的格式 $pattern = '/<' . $tag . '>\s*]*)>(.*?)<\/myfigure>([\.,:]{0,1})\s*<\/' . $tag . '>/is'; $html = @preg_replace($pattern, '$2$3', $html); } - // 清理孤立的样式闭标签 + //清理闭标签 $html = preg_replace('/<\/('.implode('|', self::STYLE_TAGS).')>(?![^<]*<\1>)/is', '', $html); return $html; } @@ -192,29 +179,24 @@ class FigureTagProcessor { * @return string */ private function cleanRedundantPunctuation($html) { - // 修改正则:将 data-id = (\d+) 改为 data-id="(\d+)",适配新格式 $html = preg_replace('/\(Figure \d+\)<\/myfigure>\)\./i', '(Figure $1).', $html); $html = preg_replace('/<\/myfigure>\)\.([\.,:]{0,1})/', ')$1', $html); $html = preg_replace('/<\/myfigure>\.\)([\.,:]{0,1})/', ')$1', $html); $html = preg_replace('/<\/myfigure>([\.,:]){2,}/', '$1', $html); - // 同步修改此处正则的属性格式 $html = preg_replace('/\((Figure \d+)\s*<\/myfigure>([\.,:]{0,1})/i', '($2)$3', $html); return $html; } /** - * 清理孤立的样式标签(优先暴力清理myfigure后标签,再用栈算法兜底) + * 清理孤立的样式标签 * @param string $html * @return string */ private function cleanUnclosedTags($html) { - // 第一步:暴力清理myfigure后孤立的样式闭标签 foreach (self::STYLE_TAGS as $tag) { $html = @preg_replace('/(<\/myfigure>)\s*<\/' . $tag . '>/i', '$1', $html); } - - // 第二步:栈算法清理其他孤立标签 foreach (self::STYLE_TAGS as $tag) { @preg_match_all("/<{$tag}\b[^>]*>/i", $html, $openMatches, PREG_OFFSET_CAPTURE); @preg_match_all("/<\/{$tag}>/i", $html, $closeMatches, PREG_OFFSET_CAPTURE); diff --git a/application/common/TableTagProcessor.php b/application/common/TableTagProcessor.php index 65e19ca..3a7e50f 100644 --- a/application/common/TableTagProcessor.php +++ b/application/common/TableTagProcessor.php @@ -25,17 +25,16 @@ class TableTagProcessor { * status: 2-空输入, 4-无匹配/已处理, 5-处理异常, 1-处理成功 */ public function dealTableStr($html = '', $aTableMain = []) { - // 1. 基础输入校验 + //验证 if (!is_string($html) || trim($html) === '') { return ['status' => 2, 'data' => '']; } - // 2. 超大字符串拦截(防止内存溢出) + //超大字符串拦截(防止内存溢出) if (strlen($html) > self::MAX_HTML_LENGTH) { - $this->logWarning('处理文本超出最大长度限制', ['length' => strlen($html)]); return ['status' => 4, 'data' => $html]; } - // 初始化主键映射数组(过滤非数字键/值,保证数据合法性) + //初始化主键映射数组 if(!empty($aTableMain)){ $aTableMainNew = []; foreach ($aTableMain as $key => $value) { @@ -52,10 +51,10 @@ class TableTagProcessor { $hasReplace = false; try { - // 核心:直接在原始HTML中匹配所有符合规则的Table(含嵌套标签) + //原始HTML中匹配所有符合规则的Table $html = $this->replaceTableInHtml($html, $hasReplace); - // 清理冗余内容(仅替换成功后执行,保证输出整洁) + // 清理冗余内容 if ($hasReplace) { $html = $this->cleanRedundantStyles($html); $html = $this->cleanRedundantPunctuation($html); @@ -65,19 +64,6 @@ class TableTagProcessor { } } catch (\Throwable $e) { - // 异常兜底:捕获所有异常,记录详细日志,返回原始文本避免业务中断 - $pregError = preg_last_error(); - $pregErrorMsg = $this->getPregErrorMsg($pregError); - $errorMsg = sprintf( - '[%s] TableTagProcessor-dealTableStr 异常:%s | 文件:%s | 行:%d | 入参MD5:%s | 正则错误:%s', - date('Y-m-d H:i:s'), - $e->getMessage(), - $e->getFile(), - $e->getLine(), - md5($originalHtml), - $pregErrorMsg - ); - $this->logError($errorMsg); return ['status' => 5, 'data' => $originalHtml]; } @@ -88,9 +74,7 @@ class TableTagProcessor { } /** - * 核心方法:直接在HTML中匹配并替换Table(支持嵌套标签) - * @param string $html - * @param bool $hasReplace 引用传递:标记是否有替换 + * 核心方法:直接在HTML中匹配并替换Table * @return string */ private function replaceTableInHtml($html, &$hasReplace) { @@ -112,12 +96,10 @@ class TableTagProcessor { } $primaryId = $this->aTableMain[$numInt]; - // 核心修改:规范mytable标签格式(属性值加双引号、去掉两侧空格) $baseTag = "<".self::PROCESSED_TAG." data-id=\"{$primaryId}\">Table {$num}"; $target = "({$baseTag}{$suffix})"; $hasReplace = true; - $this->logInfo("替换带括号Table成功", ['num' => $num, 'primary_id' => $primaryId]); return $target; }, $html); @@ -135,12 +117,10 @@ class TableTagProcessor { } $primaryId = $this->aTableMain[$numInt]; - // 核心修改:规范mytable标签格式(属性值加双引号、去掉两侧空格) $baseTag = "<".self::PROCESSED_TAG." data-id=\"{$primaryId}\">Table {$num}"; $target = "{$baseTag}{$suffix}"; $hasReplace = true; - $this->logInfo("替换无括号Table成功", ['num' => $num, 'primary_id' => $primaryId]); return $target; }, $html); @@ -168,7 +148,6 @@ class TableTagProcessor { * @return string */ private function cleanRedundantPunctuation($html) { - // 核心修改:适配新的mytable标签格式(data-id="数字") $html = preg_replace('/<'.self::PROCESSED_TAG.' data-id="(\d+)">\(Table \d+\)<\/'.self::PROCESSED_TAG.'>\)\./i', '<'.self::PROCESSED_TAG.' data-id="$1">(Table $1).', $html); $html = preg_replace('/<\/'.self::PROCESSED_TAG.'>\)\.([\.,:]{0,1})/', ')$1', $html); $html = preg_replace('/<\/'.self::PROCESSED_TAG.'>\.\)([\.,:]{0,1})/', ')$1', $html); @@ -270,47 +249,4 @@ class TableTagProcessor { return @preg_match($pattern, $content) === 1; } - /** - * 获取正则错误信息(便于调试) - * @param int $pregError 正则错误码 - * @return string - */ - private function getPregErrorMsg($pregError) { - $errorCodes = [ - PREG_INTERNAL_ERROR => '内部错误', - PREG_BACKTRACK_LIMIT_ERROR => '回溯限制超出', - PREG_RECURSION_LIMIT_ERROR => '递归限制超出', - PREG_BAD_UTF8_ERROR => '无效UTF-8字符', - PREG_BAD_UTF8_OFFSET_ERROR => 'UTF-8偏移量无效', - PREG_JIT_STACKLIMIT_ERROR => 'JIT栈限制超出' - ]; - return isset($errorCodes[$pregError]) ? $errorCodes[$pregError] : "未知错误({$pregError})"; - } - - /** - * 记录错误日志(生产环境可对接日志系统) - * @param string $msg - * @param array $context - */ - private function logError($msg, $context = []) { - error_log(json_encode(['level' => 'error', 'msg' => $msg, 'context' => $context, 'time' => date('Y-m-d H:i:s')])); - } - - /** - * 记录警告日志 - * @param string $msg - * @param array $context - */ - private function logWarning($msg, $context = []) { - error_log(json_encode(['level' => 'warning', 'msg' => $msg, 'context' => $context, 'time' => date('Y-m-d H:i:s')])); - } - - /** - * 记录信息日志 - * @param string $msg - * @param array $context - */ - private function logInfo($msg, $context = []) { - error_log(json_encode(['level' => 'info', 'msg' => $msg, 'context' => $context, 'time' => date('Y-m-d H:i:s')])); - } } \ No newline at end of file