Merge remote-tracking branch 'origin/master'

This commit is contained in:
wangjinlei
2026-01-18 17:23:31 +08:00
6 changed files with 971 additions and 11 deletions

View File

@@ -0,0 +1,358 @@
<?php
namespace app\api\controller;
use app\api\controller\Base;
use think\Db;
/**
* @title 文章正文内容处理
*/
class Articlemain extends Base
{
public function __construct(\think\Request $request = null) {
parent::__construct($request);
}
/**
* 上传表格图片
*/
public function uploadTableImage(){
//获取参数
$aParam = $this->request->post();
//文章ID
$iArticleId = empty($aParam['article_id']) ? 0 : $aParam['article_id'];
if(empty($iArticleId)){
return json_encode(array('status' => 2,'msg' => 'Please select an article' ));
}
//上传文件域
$file = $this->request->file('file_name');
if (empty($file)) {
return json_encode(array('status' => 2,'msg' => 'No uploaded file was obtained'));
}
//查询文章信息
$aWhere = ['article_id' => $iArticleId];
$aArticle = Db::name('article')->field('article_id')->where($aWhere)->find();
if(empty($aArticle)){
return json_encode(array('status' => 3,'msg' => 'The article does not exist' ));
}
//验证文件(类型/大小)
$validate = [
'size' => 1024000, // 限制1M以内
// 'ext' => 'jpg,jpeg,png,gif' // 允许的后缀
];
$fileValid = $file->validate($validate);
if (!$fileValid) {
$errorMsg = $file->getError();
return json_encode(['status' => 3, 'msg' => 'Format/size validation failed:' . $errorMsg]);
}
//图片名后缀
$ext = $file->getExtension();
if (empty($ext)) {
// 获取文件真实MIME类型复制粘贴的图片也能识别
$mime = $file->getMime();
// MIME类型与后缀的映射表覆盖常见图片类型
$mimeExtMap = [
// 基础图片格式
'image/jpeg' => 'jpg',
'image/jpg' => 'jpg', // 兼容简写
'image/png' => 'png',
'image/gif' => 'gif',
'image/bmp' => 'bmp',
'image/x-bmp' => 'bmp', // 兼容Windows bmp
// 扩展图片格式
'image/webp' => 'webp', // 网页常用
'image/tiff' => 'tiff',
'image/x-tiff' => 'tiff',
'image/svg+xml' => 'svg', // 矢量图
'image/heic' => 'heic', // 苹果图片格式
'image/heif' => 'heif',
'image/avif' => 'avif', // 高效压缩图
// 特殊图片格式
'image/x-icon' => 'ico', // 图标
'image/vnd.microsoft.icon' => 'ico',
];
$ext = $mimeExtMap[$mime] ?? '';
}
$ext = empty($ext) ? 'png' : $ext;
//组装新的图片名
$sFileName = md5(uniqid(mt_rand(), true)) . '.' . $ext;
//保存文件
$sImagePath = rtrim(ROOT_PATH,'/') . '/public' . DS . 'articleTableImage' . DS . $iArticleId;
if (!is_dir($sImagePath)) {
mkdir($sImagePath, 0777, true);
}
$sImageName = $fileValid->move($sImagePath,$sFileName);
if (!$sImageName) {
return json_encode(['status' => 4, 'msg' => $fileValid->getError()]);
}
//返回图片路径
return json_encode(['status' => 1, 'msg' => 'Upload successful','data' => $iArticleId .DS.$sFileName]);
}
/**
* 获取正文内容里删除图片
*/
public function removeMainImage(){
//获取参数
$aParam = $this->request->post();
//文章ID
$iArticleId = empty($aParam['article_id']) ? 0 : $aParam['article_id'];
if(empty($iArticleId)){
return json_encode(array('status' => 2,'msg' => 'Please select an article' ));
}
//图片ID
$iAmiId = empty($aParam['ami_id']) ? 0 : $aParam['ami_id'];
if(empty($iAmiId)){
return json_encode(array('status' => 2,'msg' => 'Please select the image ID to query'));
}
//查询文章信息
$aWhere = ['article_id' => $iArticleId];
$aArticle = Db::name('article')->field('article_id')->where($aWhere)->find();
if(empty($aArticle)){
return json_encode(array('status' => 3,'msg' => 'The article does not exist' ));
}
//查询图片信息
$aWhere = ['ami_id' => $iAmiId,'state' => 0,'article_id' => $iArticleId];
$aMainImage = Db::name('article_main_image')->field('ami_id')->where($aWhere)->find();
if(empty($aMainImage)){
return json_encode(array('status' => 4,'msg' => 'Article image information does not exist' ));
}
//获取正文内容
$aWhere = ['article_id' => $iArticleId,'type' => ['in',[0,1]],'state' => 0,'is_h1' => ['<>',1],'is_h2' => ['<>',1],'is_h3' => ['<>',1]];
$aArticleMain = Db::name('article_main')->field('am_id,content,type,ami_id')->where($aWhere)->order('sort asc')->select();
$iBindImage = 2;
if(!empty($aArticleMain)){//验证正文内容是否绑定该图片
//数据处理
foreach ($aArticleMain as $key => $value) {
$sContent = empty($value['content']) ? '' : trim($value['content']);
if(empty($sContent)){
continue;
}
if(!empty($iAmiId)){
if($value['type'] == 1 && $value['ami_id'] == $iAmiId){
$iBindImage = 1;
break;
}
if($value['type'] == 0){
$result = $this->hasProcessedTagWithId($value['content'],$iAmiId);
if($result == 1){
$iBindImage = 1;
break;
}
}
}
}
}
if($iBindImage == 1){
return json_encode(array('status' => 5,'msg' => 'The main content has been bound to this image and cannot be deleted' ));
}
//删除图片
$aWhere = ['ami_id' => $iAmiId];
$aUpdate = ['state' => 1];
$result = Db::name('article_main_image')->where($aWhere)->limit(1)->update($aUpdate);
if($result === false){
return json_encode(array('status' => 6,'msg' => 'Image deletion failed' ));
}
//返回数据
return json_encode(array('status' => 1,'msg' => 'Image deleted successfully'));
}
/**
* 获取正文内容里删除表格
*/
public function removeMainTable(){
//获取参数
$aParam = $this->request->post();
//文章ID
$iArticleId = empty($aParam['article_id']) ? 0 : $aParam['article_id'];
if(empty($iArticleId)){
return json_encode(array('status' => 2,'msg' => 'Please select an article' ));
}
//表格ID
$iAmtId = empty($aParam['amt_id']) ? 0 : $aParam['amt_id'];
if(empty($iAmtId)){
return json_encode(array('status' => 2,'msg' => 'Please select the table ID to query'));
}
//查询文章信息
$aWhere = ['article_id' => $iArticleId];
$aArticle = Db::name('article')->field('article_id')->where($aWhere)->find();
if(empty($aArticle)){
return json_encode(array('status' => 3,'msg' => 'The article does not exist' ));
}
//查询表格信息
$aWhere = ['amt_id' => $iAmtId,'state' => 0,'article_id' => $iArticleId];
$aMainTable = Db::name('article_main_table')->field('amt_id')->where($aWhere)->find();
if(empty($aMainTable)){
return json_encode(array('status' => 4,'msg' => 'Article table information does not exist' ));
}
//获取正文内容
$aWhere = ['article_id' => $iArticleId,'type' => ['in',[0,2]],'state' => 0,'is_h1' => ['<>',1],'is_h2' => ['<>',1],'is_h3' => ['<>',1]];
$aArticleMain = Db::name('article_main')->field('am_id,content,type,amt_id')->where($aWhere)->order('sort asc')->select();
//数据处理-验证正文是否绑定表格
$iBindTable = 2;
if(!empty($aArticleMain)){
foreach ($aArticleMain as $key => $value) {
$sContent = empty($value['content']) ? '' : trim($value['content']);
if(empty($sContent)){
continue;
}
if($value['type'] == 2 && $value['amt_id'] == $iAmtId){
$iBindTable = 1;
break;
}
if($value['type'] == 0){
$result = $this->hasProcessedTagWithId($value['content'],$iAmtId,'mytable');
if($result == 1){
$iBindTable = 1;
break;
}
}
}
}
if($iBindTable == 1){
return json_encode(array('status' => 5,'msg' => 'The main content is already bound to this table and cannot be deleted' ));
}
//删除表格
$aWhere = ['amt_id' => $iAmtId];
$aUpdate = ['state' => 1];
$result = Db::name('article_main_table')->where($aWhere)->limit(1)->update($aUpdate);
if($result === false){
return json_encode(array('status' => 6,'msg' => 'Table deletion failed' ));
}
//返回数据
return json_encode(array('status' => 1,'msg' => 'Table deleted successfully'));
}
/**
* 验证是否存在
*/
public function hasProcessedTagWithId($content = '', $primaryId = 0, $sLable = 'myfigure') {
if(empty($content) || empty($primaryId)){
return 2;
}
$escapedTagName = preg_quote($sLable, '/');
$escapedId = preg_quote($primaryId, '/');
// 优化后的正则表达式
$pattern = "/<{$escapedTagName}\s+data-id\s*=\s*['\"]{$escapedId}['\"]\s*>(.*?)<\/{$escapedTagName}>/i";
// 执行匹配
if (!preg_match($pattern, $content, $matches)) {
return 3;
}
return 1;
}
/**
* 获取文章关联的图片
*/
public function getArticleMainImage(){
//获取参数
$aParam = $this->request->post();
//文章ID
$iArticleId = empty($aParam['article_id']) ? 0 : $aParam['article_id'];
if(empty($iArticleId)){
return json_encode(array('status' => 2,'msg' => 'Please select an article' ));
}
//查询文章信息
$aWhere = ['article_id' => $iArticleId];
$aArticle = Db::name('article')->field('article_id')->where($aWhere)->find();
if(empty($aArticle)){
return json_encode(array('status' => 3,'msg' => 'The article does not exist' ));
}
//查询图片信息
$aWhere = ['state' => 0,'article_id' => $iArticleId];
$aMainImage = Db::name('article_main_image')->field('ami_id,title')->where($aWhere)->select();
return json_encode(array('status' => 1,'msg' => 'success','data' => $aMainImage));
}
/**
* 获取文章关联的表格
*/
public function getArticleMainTable(){
//获取参数
$aParam = $this->request->post();
//文章ID
$iArticleId = empty($aParam['article_id']) ? 0 : $aParam['article_id'];
if(empty($iArticleId)){
return json_encode(array('status' => 2,'msg' => 'Please select an article' ));
}
//查询文章信息
$aWhere = ['article_id' => $iArticleId];
$aArticle = Db::name('article')->field('article_id')->where($aWhere)->find();
if(empty($aArticle)){
return json_encode(array('status' => 3,'msg' => 'The article does not exist' ));
}
//查询表格信息
$aWhere = ['state' => 0,'article_id' => $iArticleId];
$aMainTable = Db::name('article_main_table')->field('amt_id,title')->where($aWhere)->order('amt_id asc')->select();
return json_encode(array('status' => 1,'msg' => 'success','data' => $aMainTable));
}
/**
* 获取文章关联图片的详细信息
*/
public function getMainImageInfo(){
//获取参数
$aParam = $this->request->post();
//文章ID
$iArticleId = empty($aParam['article_id']) ? 0 : $aParam['article_id'];
if(empty($iArticleId)){
return json_encode(array('status' => 2,'msg' => 'Please select an article' ));
}
//图片ID
$iAmiId = empty($aParam['ami_id']) ? 0 : $aParam['ami_id'];
if(empty($iAmiId)){
return json_encode(array('status' => 2,'msg' => 'Please select the image ID to query'));
}
//查询文章信息
$aWhere = ['article_id' => $iArticleId];
$aArticle = Db::name('article')->field('article_id')->where($aWhere)->find();
if(empty($aArticle)){
return json_encode(array('status' => 3,'msg' => 'The article does not exist' ));
}
//查询图片信息
$aWhere = ['state' => 0,'article_id' => $iArticleId,'ami_id' => $iAmiId];
$aMainImage = Db::name('article_main_image')->field('ami_id,title,url,note')->where($aWhere)->find();
return json_encode(array('status' => 1,'msg' => 'success','data' => $aMainImage));
}
/**
* 获取文章关联表格的详细信息
*/
public function getMainTableInfo(){
//获取参数
$aParam = $this->request->post();
//文章ID
$iArticleId = empty($aParam['article_id']) ? 0 : $aParam['article_id'];
if(empty($iArticleId)){
return json_encode(array('status' => 2,'msg' => 'Please select an article' ));
}
//表格ID
$iAmtId = empty($aParam['amt_id']) ? 0 : $aParam['amt_id'];
if(empty($iAmtId)){
return json_encode(array('status' => 2,'msg' => 'Please select the table ID to query'));
}
//查询文章信息
$aWhere = ['article_id' => $iArticleId];
$aArticle = Db::name('article')->field('article_id')->where($aWhere)->find();
if(empty($aArticle)){
return json_encode(array('status' => 3,'msg' => 'The article does not exist' ));
}
//查询表格信息
$aWhere = ['state' => 0,'article_id' => $iArticleId,'amt_id' => $iAmtId];
$aMainTable = Db::name('article_main_table')->field('amt_id,type,table_data,url,title,note')->where($aWhere)->find();
return json_encode(array('status' => 1,'msg' => 'success','data' => $aMainTable));
}
}

View File

@@ -4196,4 +4196,72 @@ class Production extends Base
$sContent .= $sReferences;
return ['status' => 1,'msg' => 'success','data' => ['references' => $sContent,'references_list' => $aReferencesLists]];
}
/**
* 生成初稿-处理正文内容表格/图片相关联
* @param p_article_id 生产环境文章信息
*/
public function dealMainFigureOrTable(){
//获取参数
$aParam = empty($aParam) ? $this->request->post() : $aParam;
//必填值验证
$iArticleId = empty($aParam['article_id']) ? '' : $aParam['article_id'];
if(empty($iArticleId)){
return json_encode(array('status' => 2,'msg' => 'Please select an article' ));
}
//查询内容
$aWhere = ['article_id' => $iArticleId,'type' => 0,'state' => 0,'is_h1' => ['<>',1],'is_h2' => ['<>',1],'is_h3' => ['<>',1]];
$aTextMain = Db::name('article_main')->field('am_id,content')->where($aWhere)->order('sort asc')->select();
if(empty($aTextMain)){
return json_encode(array('status' => 3,'msg' => 'The content is empty' ));
}
//查询图片信息
$aWhere = ['article_id' => $iArticleId,'type' => 1,'state' => 0];
$aImageMain = Db::name('article_main')->where($aWhere)->order('sort asc')->column('ami_id');
//查询图片信息
$aWhere = ['article_id' => $iArticleId,'type' => 2,'state' => 0];
$aTableMain = Db::name('article_main')->where($aWhere)->order('sort asc')->column('amt_id');
//数据处理
$aUpdate = [];
$oFigureTagProcessor = new \app\common\FigureTagProcessor;
$oTableTagProcessor = new \app\common\TableTagProcessor;
foreach ($aTextMain as $key => $value) {
$sContent = empty($value['content']) ? '' : trim($value['content']);
if(empty($sContent)){
continue;
}
//处理图片
$aFigureContent = $oFigureTagProcessor->dealFigureStr($sContent,$aImageMain);
$iStatus = empty($aFigureContent['status']) ? 0 : $aFigureContent['status'];
$sData = empty($aFigureContent['data']) ? '' : $aFigureContent['data'];
// var_dump($aFigureContent);
if($iStatus != 1){
$sData = $sContent;
}else{
$aUpdate[$value['am_id']] = ['am_id' => $value['am_id'],'content' => $sData];
}
//处理表格
$aTableContent = $oTableTagProcessor->dealTableStr($sData,$aTableMain);
$iStatus = empty($aTableContent['status']) ? 0 : $aTableContent['status'];
$sData = empty($aTableContent['data']) ? $sContent : $aTableContent['data'];
if($iStatus != 1){
continue;
}
$aUpdate[$value['am_id']] = ['am_id' => $value['am_id'],'content' => $sData];
}
//批量更新入库
if(empty($aUpdate)){
return json_encode(array('status' => 3,'msg' => 'No table or image data to be processed was found' ));
}
$oArticleMain = new \app\common\ArticleMain();
$result = $oArticleMain->saveAll($aUpdate);
if ($result === false) {
return json_encode(array('status' => 4,'msg' => 'Operation failed'.json_encode($aUpdate)));
}
return json_encode(array('status' => 1,'msg' => 'success'));
}
}

View File

@@ -659,9 +659,6 @@ class Workbench extends Base
return json_encode(['status' => 7,'msg' => 'The article is not in the review status']);
}
//查询期刊信息
$aWhere = ['journal_id' => $aArticle['article_id'],'state' => 0];
$aJournal = Db::name('journal')->field('title as journal_name,website')->find();
//查询期刊信息
if(empty($aArticle['journal_id'])){
return json_encode(array('status' => 8,'msg' => 'The article is not associated with a journal' ));
@@ -721,7 +718,8 @@ class Workbench extends Base
//发邮件
//邮箱
$email = empty($aUser[$iUserId]['email']) ? '' : $aUser[$iUserId]['email'];
// $email = empty($aUser[$iUserId]['email']) ? '' : $aUser[$iUserId]['email'];
$email = empty($aJournal['email']) ? '' : $aJournal['email'];
if(empty($email)){
return json_encode(['status' => 8,'msg' => 'Edit email as empty']);
}
@@ -793,9 +791,6 @@ class Workbench extends Base
return json_encode(['status' => 7,'msg' => 'The article is not in the review status']);
}
//查询期刊信息
$aWhere = ['journal_id' => $aArticle['article_id'],'state' => 0];
$aJournal = Db::name('journal')->field('title as journal_name,website')->find();
//查询期刊信息
if(empty($aArticle['journal_id'])){
return json_encode(array('status' => 8,'msg' => 'The article is not associated with a journal' ));
@@ -807,10 +802,10 @@ class Workbench extends Base
}
//判断编辑的操作权限
$iEditorId = empty($aJournal['editor_id']) ? 0 : $aJournal['editor_id'];
if($iEditorId != $iUserId){
return json_encode(array('status' => 10,'msg' => 'This article is not authorized for operation under the journal you are responsible for' ));
}
// $iEditorId = empty($aJournal['editor_id']) ? 0 : $aJournal['editor_id'];
// if($iEditorId != $iUserId){
// return json_encode(array('status' => 10,'msg' => 'This article is not authorized for operation under the journal you are responsible for' ));
// }
//更新文章状态为邀请
$aWhere = ['art_rev_id' => $iArtRevId,'state' => 4];

View File

@@ -0,0 +1,13 @@
<?php
namespace app\common;
use think\Model;
class ArticleMain extends Model
{
// 绑定数据表(如果表名与模型名不一致需要指定)
protected $table = 't_article_main';
// 允许批量赋值的字段(必须设置,否则批量更新会失败)
protected $fillable = ['am_id','article_id','type','content','ami_id','amt_id','is_h1','is_h2','is_h3','sort','remark','ctime','state','is_proofread','proofread_time'];
}

View File

@@ -0,0 +1,274 @@
<?php
namespace app\common;
class FigureTagProcessor {
// 可配置的样式标签列表(解耦)
const STYLE_TAGS = ['i', 'b', 'font', 'strong', 'em','blue'];
// 最大处理字符串长度(避免内存溢出)
const MAX_HTML_LENGTH = 100000;
/**
* 处理Figure文本替换为myfigure标签并清理冗余内容
* @param string $html 待处理的HTML文本
* @return array ['status'=>状态码, 'data'=>处理后文本]
* status: 2-空输入, 4-无匹配, 5-处理异常, 1-处理成功
*/
public function dealFigureStr($html = '') {
//验证
if (!is_string($html) || trim($html) === '') {
return ['status' => 2, 'data' => ''];
}
//超大字符串拦截
if (strlen($html) > self::MAX_HTML_LENGTH) {
return ['status' => 4, 'data' => $html];
}
$originalHtml = $html;
$hasReplace = false;
try {
//合并嵌套样式标签
$mergedHtml = $this->mergeFragmentStyleTags($html);
//提取纯文本用于匹配Figure
$plainText = preg_replace('/<[^>]+>/', ' ', $mergedHtml);
$plainText = preg_replace('/\s+/', ' ', trim($plainText));
//提取所有匹配的Figure数字
$allMatches = $this->extractAllFigureMatches($plainText);
if (empty($allMatches)) {
return ['status' => 4, 'data' => $originalHtml];
}
//替换为myfigure标签
$html = $this->replaceFigureWithTag($html, $allMatches, $hasReplace);
//清理冗余内容(仅替换成功后执行)
if ($hasReplace) {
$html = $this->cleanRedundantStyles($html);
$html = $this->cleanRedundantPunctuation($html);
$html = $this->cleanUnclosedTags($html);
$html = $this->optimizeFormat($html);
}
} catch (\Throwable $e) {
return ['status' => 5, 'data' => $originalHtml];
}
return [
'status' => $hasReplace ? 1 : 4,
'data' => $hasReplace ? $html : $originalHtml
];
}
/**
* 合并嵌套的样式标签
* @param string $html
* @return string
*/
private function mergeFragmentStyleTags($html) {
foreach (self::STYLE_TAGS as $tag) {
$pattern = '/(?:<' . $tag . '>)\s*([^<]+?)\s*<\/' . $tag . '>(?:\s*<' . $tag . '>)\s*([^<]+?)\s*<\/' . $tag . '>/is';
while (@preg_match($pattern, $html)) { // 抑制正则警告
$html = preg_replace_callback($pattern, function($matches) {
return trim($matches[1]) . ' ' . trim($matches[2]);
}, $html);
}
}
// 清理括号内的冗余标点/标签
$html = preg_replace('/(\(.*?\d+)(?:\s*<[^>]+>)*\s*\.*\s*(?:<[^>]+>)*(\s*.*?\))/is', '$1$2', $html);
$html = preg_replace('/\(\s+/', '(', $html);
$html = preg_replace('/\s+\)/', ')', $html);
return $html;
}
/**
* 从纯文本中提取所有Figure数字兼容括号/标点/空格)
* @param string $plainText
* @return array
*/
private function extractAllFigureMatches($plainText) {
$allMatches = [];
$processedNums = [];
// 匹配带括号的Figure如 (Figure 1.)
$pattern1 = '/\(Figure\s*(\d+)\b(?!\p{L}|\s+\p{L})(?:\s*[\.,;:]*\s*)\)\s*([\.,:]{0,1})/iu';
if (@preg_match_all($pattern1, $plainText, $matchesFull, PREG_SET_ORDER)) {
foreach ($matchesFull as $match) {
$num = $match[1];
if (!ctype_digit($num) || in_array($num, $processedNums)) continue;
$processedNums[] = $num;
$allMatches[$num] = [
'hasOuterBracket' => true,
'validPunct' => $match[2] ?? '',
'content' => "Figure {$num}"
];
}
}
// 匹配无括号的Figure如 Figure 1.
$pattern2 = '/Figure\s*(\d+)\b(?!\p{L}|\s+\p{L})(?:\s*[\.,;:]*\s*)\s*([\.,:]{0,1})/iu';
if (@preg_match_all($pattern2, $plainText, $matchesOther, PREG_SET_ORDER)) {
foreach ($matchesOther as $match) {
$num = $match[1];
if (!ctype_digit($num) || in_array($num, $processedNums)) continue;
$processedNums[] = $num;
$allMatches[$num] = [
'hasOuterBracket' => false,
'validPunct' => $match[2] ?? '',
'content' => "Figure {$num}"
];
}
}
krsort($allMatches);
return $allMatches;
}
/**
* 将匹配的Figure替换为myfigure标签优化标签格式
* @param string $html
* @param array $allMatches
* @param bool $hasReplace
* @return string
*/
private function replaceFigureWithTag($html, $allMatches, &$hasReplace) {
foreach ($allMatches as $num => $info) {
$innerContent = $info['hasOuterBracket']
? "({$info['content']})"
: $info['content'];
//<myfigure data-id="1">Figure 1</myfigure>
$targetTag = "<myfigure data-id=\"{$num}\">{$innerContent}</myfigure>";
if (!empty($info['validPunct']) && !$info['hasOuterBracket']) {
$targetTag .= $info['validPunct'];
}
$patternSuffix = '(?!\p{L}|\s+\p{L})';
$pattern = $info['hasOuterBracket']
? '/\(\s*(?:<[^>]+>|\s)*Figure(?:<[^>]+>|\s)*' . $num . '\b' . $patternSuffix . '(?:\s*[\.,;:]*\s*|\s*<[^>]+>)*\s*\)/iu'
: '/\s*(?:<[^>]+>|\s)*Figure(?:<[^>]+>|\s)*' . $num . '\b' . $patternSuffix . '(?:\s*[\.,;:]*\s*|\s*<[^>]+>)*\s*([\.,:]{0,1})/iu';
//执行替换最多替换1次避免重复
$html = @preg_replace($pattern, $targetTag, $html, 1, $count);
if ($count > 0) {
$hasReplace = true;
error_log("[FigureTagProcessor] 替换成功 - ID:{$num} 括号:".($info['hasOuterBracket']?'是':'否'));
}
}
return $html;
}
/**
* 清理myfigure标签周围的冗余样式标签适配新标签格式
* @param string $html
* @return string
*/
private function cleanRedundantStyles($html) {
foreach (self::STYLE_TAGS as $tag) {
$pattern = '/<' . $tag . '>\s*<myfigure([^>]*)>(.*?)<\/myfigure>([\.,:]{0,1})\s*<\/' . $tag . '>/is';
$html = @preg_replace($pattern, '<myfigure$1>$2</myfigure>$3', $html);
}
//清理闭标签
$html = preg_replace('/<\/('.implode('|', self::STYLE_TAGS).')>(?![^<]*<\1>)/is', '', $html);
return $html;
}
/**
* 清理myfigure标签后的冗余标点适配新标签格式
* @param string $html
* @return string
*/
private function cleanRedundantPunctuation($html) {
$html = preg_replace('/<myfigure data-id="(\d+)">\(Figure \d+\)<\/myfigure>\)\./i', '<myfigure data-id="$1">(Figure $1)</myfigure>.', $html);
$html = preg_replace('/<\/myfigure>\)\.([\.,:]{0,1})/', '</myfigure>)$1', $html);
$html = preg_replace('/<\/myfigure>\.\)([\.,:]{0,1})/', '</myfigure>)$1', $html);
$html = preg_replace('/<\/myfigure>([\.,:]){2,}/', '</myfigure>$1', $html);
$html = preg_replace('/<myfigure data-id="(\d+)">\((Figure \d+)\s*<\/myfigure>([\.,:]{0,1})/i',
'<myfigure data-id="$1">($2)</myfigure>$3', $html);
return $html;
}
/**
* 清理孤立的样式标签
* @param string $html
* @return string
*/
private function cleanUnclosedTags($html) {
foreach (self::STYLE_TAGS as $tag) {
$html = @preg_replace('/(<\/myfigure>)\s*<\/' . $tag . '>/i', '$1', $html);
}
foreach (self::STYLE_TAGS as $tag) {
@preg_match_all("/<{$tag}\b[^>]*>/i", $html, $openMatches, PREG_OFFSET_CAPTURE);
@preg_match_all("/<\/{$tag}>/i", $html, $closeMatches, PREG_OFFSET_CAPTURE);
$allTags = [];
foreach ($openMatches[0] as $m) {
$allTags[] = [
'offset' => $m[1],
'type' => 'open',
'content' => $m[0],
'length' => strlen($m[0])
];
}
foreach ($closeMatches[0] as $m) {
$allTags[] = [
'offset' => $m[1],
'type' => 'close',
'content' => $m[0],
'length' => strlen($m[0])
];
}
usort($allTags, function($a, $b) {
return $a['offset'] - $b['offset'];
});
$tagStack = [];
$removeOffsets = [];
foreach ($allTags as $t) {
if ($t['type'] == 'open') {
array_push($tagStack, $t);
} else {
if (!empty($tagStack)) {
array_pop($tagStack);
} else {
$removeOffsets[] = [
'pos' => $t['offset'],
'len' => $t['length'],
'content' => $t['content']
];
}
}
}
foreach ($tagStack as $t) {
$removeOffsets[] = [
'pos' => $t['offset'],
'len' => $t['length'],
'content' => $t['content']
];
}
// 倒序删除,避免偏移错乱
usort($removeOffsets, function($a, $b) {
return $b['pos'] - $a['pos'];
});
foreach ($removeOffsets as $item) {
if ($item['pos'] >= 0 && $item['pos'] < strlen($html)) {
$html = substr_replace($html, '', $item['pos'], $item['len']);
}
}
}
return $html;
}
/**
* 优化文本格式合并多余空格规范myfigure标签前后空格
* @param string $html
* @return string
*/
private function optimizeFormat($html) {
$html = preg_replace('/\s{2,}/', ' ', trim($html));
$html = preg_replace('/<\/myfigure>([A-Za-z0-9])/is', '</myfigure> $1', $html);
$html = preg_replace('/([a-zA-Z0-9])<myfigure/is', '$1 <myfigure', $html);
return $html;
}
}

View File

@@ -0,0 +1,252 @@
<?php
namespace app\common;
/**
* Table标签处理器生产环境终极版
* 功能精准匹配并替换Table相关格式为mytable标签
* 支持格式table 数字、(table 数字)、table 数字:/table 数字.(含嵌套标签)
* 特性:支持任意嵌套标签/括号、不处理数字+字母、仅跳过已被mytable包裹的Table
*/
class TableTagProcessor {
// 可配置的样式标签列表
const STYLE_TAGS = ['i', 'b', 'font', 'strong', 'em'];
// 最大处理字符串长度
const MAX_HTML_LENGTH = 100000;
// 目标替换标签
const PROCESSED_TAG = 'mytable';
// 数据库表格ID映射
private $aTableMain = [];
/**
* 处理Table文本替换为mytable标签并清理冗余内容
* @param string $html 待处理的HTML文本
* @param array $aTableMain Table数字→主键ID的映射数组如 [1=>1001, 2=>1002]
* @return array ['status'=>状态码, 'data'=>处理后文本]
* status: 2-空输入, 4-无匹配/已处理, 5-处理异常, 1-处理成功
*/
public function dealTableStr($html = '', $aTableMain = []) {
//验证
if (!is_string($html) || trim($html) === '') {
return ['status' => 2, 'data' => ''];
}
//超大字符串拦截(防止内存溢出)
if (strlen($html) > self::MAX_HTML_LENGTH) {
return ['status' => 4, 'data' => $html];
}
//初始化主键映射数组
if(!empty($aTableMain)){
$aTableMainNew = [];
foreach ($aTableMain as $key => $value) {
if (!ctype_digit((string)$key) || !ctype_digit((string)$value)) {
continue;
}
$keyInt = (int)$key;
$aTableMainNew[$keyInt + 1] = $value;
}
$this->aTableMain = $aTableMainNew;
}
$originalHtml = $html;
$hasReplace = false;
try {
//原始HTML中匹配所有符合规则的Table
$html = $this->replaceTableInHtml($html, $hasReplace);
// 清理冗余内容
if ($hasReplace) {
$html = $this->cleanRedundantStyles($html);
$html = $this->cleanRedundantPunctuation($html);
$html = $this->cleanUnclosedTags($html);
$html = $this->optimizeFormat($html);
$html = $this->cleanDuplicateNestedTags($html);
}
} catch (\Throwable $e) {
return ['status' => 5, 'data' => $originalHtml];
}
return [
'status' => $hasReplace ? 1 : 4,
'data' => $html
];
}
/**
* 核心方法直接在HTML中匹配并替换Table
* @return string
*/
private function replaceTableInHtml($html, &$hasReplace) {
$styleTagsPattern = implode('|', self::STYLE_TAGS);
$styleTagsRegex = "(?:<(?:{$styleTagsPattern})[^>]*>)*\s*"; // 匹配任意嵌套样式标签
$styleTagsCloseRegex = "\s*(?:<\/(?:{$styleTagsPattern})>)*";
// 规则1匹配带括号的Table如 (Table 82)、(<b>Table 1.</b>)
$pattern1 = "/\(\s*{$styleTagsRegex}table\s*(\d+)\s*([\.,:]{0,1}){$styleTagsCloseRegex}\s*\)/iu";
$html = preg_replace_callback($pattern1, function($matches) use (&$hasReplace) {
$num = $matches[1];
$numInt = intval($num);
$suffix = $matches[2] ?? '';
// 校验:纯数字 + 有映射ID + 未被mytable包裹避免重复替换
if (!ctype_digit($num) || !isset($this->aTableMain[$numInt]) ||
$this->isMatchPositionHasMyTableTag($matches[0], "Table {$num}")) {
return $matches[0];
}
$primaryId = $this->aTableMain[$numInt];
$baseTag = "<".self::PROCESSED_TAG." data-id=\"{$primaryId}\">Table {$num}</".self::PROCESSED_TAG.">";
$target = "({$baseTag}{$suffix})";
$hasReplace = true;
return $target;
}, $html);
// 规则2匹配无括号的Table如 Table 1、<b>Table 2:</b>、<i>Table 3.</i>
$pattern2 = "/{$styleTagsRegex}table\s*(\d+)\s*([\.,:]{0,1}){$styleTagsCloseRegex}(?![a-zA-Z])/iu";
$html = preg_replace_callback($pattern2, function($matches) use (&$hasReplace) {
$num = $matches[1];
$numInt = intval($num);
$suffix = $matches[2] ?? '';
// 校验:纯数字 + 有映射ID + 未被mytable包裹 + 不是数字+字母组合
if (!ctype_digit($num) || !isset($this->aTableMain[$numInt]) ||
$this->isMatchPositionHasMyTableTag($matches[0], "Table {$num}")) {
return $matches[0];
}
$primaryId = $this->aTableMain[$numInt];
$baseTag = "<".self::PROCESSED_TAG." data-id=\"{$primaryId}\">Table {$num}</".self::PROCESSED_TAG.">";
$target = "{$baseTag}{$suffix}";
$hasReplace = true;
return $target;
}, $html);
return $html;
}
/**
* 清理mytable标签周围的冗余样式标签
* @param string $html
* @return string
*/
private function cleanRedundantStyles($html) {
foreach (self::STYLE_TAGS as $tag) {
$pattern = '/<' . $tag . '>\s*<'.self::PROCESSED_TAG.'([^>]*?)>(.*?)<\/'.self::PROCESSED_TAG.'>([\.,:]{0,1})\s*<\/' . $tag . '>/is';
$html = @preg_replace($pattern, '<'.self::PROCESSED_TAG.'$1>$2</'.self::PROCESSED_TAG.'>$3', $html);
}
// 清理孤立的样式闭标签(避免标签残留)
$html = preg_replace('/<\/('.implode('|', self::STYLE_TAGS).')>(?![^<]*<\1>)/is', '', $html);
return $html;
}
/**
* 清理mytable标签后的冗余标点保证格式整洁
* @param string $html
* @return string
*/
private function cleanRedundantPunctuation($html) {
$html = preg_replace('/<'.self::PROCESSED_TAG.' data-id="(\d+)">\(Table \d+\)<\/'.self::PROCESSED_TAG.'>\)\./i', '<'.self::PROCESSED_TAG.' data-id="$1">(Table $1)</'.self::PROCESSED_TAG.'>.', $html);
$html = preg_replace('/<\/'.self::PROCESSED_TAG.'>\)\.([\.,:]{0,1})/', '</'.self::PROCESSED_TAG.'>)$1', $html);
$html = preg_replace('/<\/'.self::PROCESSED_TAG.'>\.\)([\.,:]{0,1})/', '</'.self::PROCESSED_TAG.'>)$1', $html);
$html = preg_replace('/<\/'.self::PROCESSED_TAG.'>([\.,:]){2,}/', '</'.self::PROCESSED_TAG.'>$1', $html);
$html = preg_replace('/<'.self::PROCESSED_TAG.' data-id="(\d+)">\((Table \d+)\s*<\/'.self::PROCESSED_TAG.'>([\.,:]{0,1})/i',
'<'.self::PROCESSED_TAG.' data-id="$1">($2)</'.self::PROCESSED_TAG.'>$3', $html);
return $html;
}
/**
* 清理孤立的样式标签(栈算法兜底,避免标签不闭合)
* @param string $html
* @return string
*/
private function cleanUnclosedTags($html) {
// 清理mytable后孤立的样式闭标签
foreach (self::STYLE_TAGS as $tag) {
$html = @preg_replace('/(<\/'.self::PROCESSED_TAG.'>)\s*<\/' . $tag . '>/i', '$1', $html);
}
// 栈算法清理其他孤立标签
foreach (self::STYLE_TAGS as $tag) {
@preg_match_all("/<{$tag}\b[^>]*>/i", $html, $openMatches, PREG_OFFSET_CAPTURE);
@preg_match_all("/<\/{$tag}>/i", $html, $closeMatches, PREG_OFFSET_CAPTURE);
$allTags = [];
foreach ($openMatches[0] as $m) {
$allTags[] = ['offset' => $m[1], 'type' => 'open', 'content' => $m[0], 'length' => strlen($m[0])];
}
foreach ($closeMatches[0] as $m) {
$allTags[] = ['offset' => $m[1], 'type' => 'close', 'content' => $m[0], 'length' => strlen($m[0])];
}
usort($allTags, function($a, $b) {
return $a['offset'] - $b['offset'];
});
$tagStack = [];
$removeOffsets = [];
foreach ($allTags as $t) {
if ($t['type'] == 'open') {
array_push($tagStack, $t);
} else {
if (!empty($tagStack)) {
array_pop($tagStack);
} else {
$removeOffsets[] = $t;
}
}
}
foreach ($tagStack as $t) {
$removeOffsets[] = $t;
}
// 倒序删除,避免偏移错乱
usort($removeOffsets, function($a, $b) {
return $b['offset'] - $a['offset'];
});
foreach ($removeOffsets as $item) {
if ($item['offset'] >= 0 && $item['offset'] < strlen($html)) {
$html = substr_replace($html, '', $item['offset'], $item['length']);
}
}
}
return $html;
}
/**
* 优化文本格式(合并多余空格,规范标签前后空格)
* @param string $html
* @return string
*/
private function optimizeFormat($html) {
$html = preg_replace('/\s{2,}/', ' ', trim($html));
$html = preg_replace('/<\/'.self::PROCESSED_TAG.'>([A-Za-z0-9])/is', '</'.self::PROCESSED_TAG.'> $1', $html);
$html = preg_replace('/([a-zA-Z0-9])<'.self::PROCESSED_TAG.'/is', '$1 <'.self::PROCESSED_TAG.'', $html);
return $html;
}
/**
* 清理重复嵌套的mytable标签兜底方案
* @param string $html
* @return string
*/
private function cleanDuplicateNestedTags($html) {
$pattern = '/<'.self::PROCESSED_TAG.'[^>]*>\s*<'.self::PROCESSED_TAG.'([^>]*)>(.*?)<\/'.self::PROCESSED_TAG.'>\s*<\/'.self::PROCESSED_TAG.'>/is';
$html = preg_replace($pattern, '<'.self::PROCESSED_TAG.'$1>$2</'.self::PROCESSED_TAG.'>', $html);
return $html;
}
/**
* 判断指定Table内容是否被mytable标签包裹
* @param string $content 待检查内容
* @param string $tableText Table文本如 "Table 1"
* @return bool
*/
private function isMatchPositionHasMyTableTag($content, $tableText) {
$escapedText = preg_quote($tableText, '/');
$pattern = '/<' . self::PROCESSED_TAG . '[^>]*>\s*' . $escapedText . '\s*<\/' . self::PROCESSED_TAG . '>/is';
return @preg_match($pattern, $content) === 1;
}
}