队列调整
This commit is contained in:
@@ -82,15 +82,15 @@ class ProofReadService
|
||||
if ($converted === false) {
|
||||
$posStart = 0;
|
||||
$posEnd = min(20, strlen($originalContent));
|
||||
$errors[] = $this->createError(
|
||||
'编码转换失败',
|
||||
'保留原始编码内容',
|
||||
"从[{$originalEncoding}]转换为UTF-8失败,保留原始内容",
|
||||
$originalContent,
|
||||
$corrected,
|
||||
$posStart,
|
||||
$posEnd
|
||||
);
|
||||
// $errors[] = $this->createError(
|
||||
// '编码转换失败',
|
||||
// '保留原始编码内容',
|
||||
// "从[{$originalEncoding}]转换为UTF-8失败,保留原始内容",
|
||||
// $originalContent,
|
||||
// $corrected,
|
||||
// $posStart,
|
||||
// $posEnd
|
||||
// );
|
||||
}
|
||||
}
|
||||
|
||||
@@ -213,7 +213,8 @@ class ProofReadService
|
||||
$originalContent,
|
||||
$currentCorrected,
|
||||
$posStart,
|
||||
$posEnd
|
||||
$posEnd,
|
||||
empty($rule['error_type']) ? '' : $rule['error_type']
|
||||
);
|
||||
$processedHashes[$hash] = true;
|
||||
$corrected = $currentCorrected;
|
||||
@@ -264,25 +265,30 @@ class ProofReadService
|
||||
'pattern' => '~(\[\s*[-]?\d+\s*)\x{2014}\s*(\d+\s*\])~u', // 匹配长划线(—)
|
||||
'replacement' => '$1-$2', // 替换为短划线(-)
|
||||
'verbatim_texts' => '带括号数字范围使用长划线(—)不规范',
|
||||
'explanation' => '带括号的数字范围应使用短划线(-),如 [1-5]'
|
||||
'explanation' => '带括号的数字范围应使用短划线[-]',
|
||||
'error_type' => 'en-dash'
|
||||
],
|
||||
[
|
||||
'pattern' => '~(\[\s*[-]?\d+\s*)-\s*(\d+\s*\])~u', // 匹配连接符(-)及可能的空格
|
||||
'replacement' => '$1-$2', // 统一为无空格短划线(-)
|
||||
'verbatim_texts' => '带括号数字范围使用连接符(-)格式不规范',
|
||||
'explanation' => '带括号的数字范围应使用短划线(-)且前后无空格,如 [2-5]'
|
||||
'explanation' => '带括号的数字范围应使用短划线[-]且前后无空格',
|
||||
'error_type' => 'en-dash'
|
||||
|
||||
],
|
||||
[
|
||||
'pattern' => '~(\[\s*[-]?\d+)\s+-\s*(\d+\s*\])~u', // 短划线前多余空格
|
||||
'replacement' => '$1-$2', // 移除前导空格
|
||||
'verbatim_texts' => '数字范围短划线前有多余空格',
|
||||
'explanation' => '带括号数字范围的短划线(-)前不应留空格,如 [3-5]'
|
||||
'explanation' => '带括号数字范围的短划线[-]前不应留空格',
|
||||
'error_type' => 'en-dash'
|
||||
],
|
||||
[
|
||||
'pattern' => '~(\[\s*[-]?\d+)\s*-\s+(\d+\s*\])~u', // 短划线后多余空格
|
||||
'replacement' => '$1-$2', // 移除后导空格
|
||||
'verbatim_texts' => '数字范围短划线后有多余空格',
|
||||
'explanation' => '带括号数字范围的短划线(-)后不应留空格,如 [4-5]'
|
||||
'explanation' => '带括号数字范围的短划线[-]后不应留空格',
|
||||
'error_type' => 'en-dash'
|
||||
],
|
||||
|
||||
// ====================== 2. 无括号数字范围规则(次高优先级,避免与减号运算规则冲突) ======================
|
||||
@@ -290,13 +296,15 @@ class ProofReadService
|
||||
'pattern' => '~(\b\d+)\s*—\s*(\d+\b)~u', // 匹配长划线(—)
|
||||
'replacement' => '$1-$2', // 替换为短划线(-)
|
||||
'verbatim_texts' => '无括号数字范围使用长划线(—)不规范',
|
||||
'explanation' => '无括号的数字范围应使用短划线(-),如 5-6'
|
||||
'explanation' => '无括号的数字范围应使用短划线[-]',
|
||||
'error_type' => 'bracket_en-dash'
|
||||
],
|
||||
[
|
||||
'pattern' => '~(\b\d+)\s*-\s*(\d+\b)~u', // 匹配连接符(-)及可能的空格
|
||||
'replacement' => '$1-$2', // 统一为无空格短划线(-)
|
||||
'verbatim_texts' => '无括号数字范围使用连接符(-)格式不规范',
|
||||
'explanation' => '无括号的数字范围应使用短划线(-)且前后无空格,如 5-7'
|
||||
'explanation' => '无括号的数字范围应使用短划线[-]且前后无空格',
|
||||
'error_type' => 'bracket_en-dash'
|
||||
],
|
||||
|
||||
// ====================== 3. 运算符空格规则(按「复合→独立」顺序,避免冲突) ======================
|
||||
@@ -304,7 +312,8 @@ class ProofReadService
|
||||
'pattern' => '~(\S)\s*([<>!]=|===|!==)\s*(\S)~u', // 复合运算符(>=、<=、==、!=、===、!==)
|
||||
'replacement' => '$1 $2 $3',
|
||||
'verbatim_texts' => '复合运算符前后空格不规范',
|
||||
'explanation' => '复合运算符(>=、<=、==、!=、===、!==)前后应各留一个空格,如 x >= 5'
|
||||
'explanation' => '复合运算符[>=、<=、==、!=、===、!==]前后应各留一个空格',
|
||||
'error_type' => 'composite_operator'
|
||||
],
|
||||
[
|
||||
'pattern' => '~(?<!=|<|>|\*|\+|-|/)(\S+?)\s*=\s*(\S+?)(?!=|<|>|\*|\+|-|/)~u',
|
||||
@@ -314,25 +323,29 @@ class ProofReadService
|
||||
// 前后否定断言:排除与其他运算符(如+=、*=)的冲突
|
||||
'replacement' => '$1 = $2', // 正确拼接“前内容 + 规范等号 + 后内容”
|
||||
'verbatim_texts' => '等号前后空格不规范',
|
||||
'explanation' => '独立等号(=)前后应各留一个空格,如 a = 3' // 移除无效的$1/$2
|
||||
'explanation' => '独立等号[=]前后应各留一个空格',
|
||||
'error_type' => 'equal'
|
||||
],
|
||||
[
|
||||
'pattern' => '~(\d+)\s*\+\s*(\d+)~u', // 加法运算符(+)
|
||||
'replacement' => '$1 + $2',
|
||||
'verbatim_texts' => '加法运算符前后空格不规范',
|
||||
'explanation' => '加法运算符(+)前后应各留一个空格,如 2 + 3'
|
||||
'explanation' => '加法运算符[+]前后应各留一个空格',
|
||||
'error_type' => 'plus'
|
||||
],
|
||||
[
|
||||
'pattern' => '~(\d+)\s*\*\s*(\d+)~u', // 乘法运算符(*)
|
||||
'replacement' => '$1 * $2',
|
||||
'verbatim_texts' => '乘法运算符前后空格不规范',
|
||||
'explanation' => '乘法运算符(*)前后应各留一个空格,如 3 * 4'
|
||||
'explanation' => '乘法运算符[*]前后应各留一个空格',
|
||||
'error_type' => 'ride'
|
||||
],
|
||||
[
|
||||
'pattern' => '~(\d+)\s*/\s*(\d+)~u', // 除法运算符(/)
|
||||
'replacement' => '$1 / $2',
|
||||
'verbatim_texts' => '除法运算符前后空格不规范',
|
||||
'explanation' => '除法运算符(/)前后应各留一个空格,如 8 / 2'
|
||||
'explanation' => '除法运算符[/]前后应各留一个空格',
|
||||
'error_type' => 'except'
|
||||
],
|
||||
[
|
||||
'pattern' => '~
|
||||
@@ -347,7 +360,8 @@ class ProofReadService
|
||||
~ux', // 减法运算符(-,仅处理纯数字减法,排除文献引用等场景)
|
||||
'replacement' => '$1 - $2',
|
||||
'verbatim_texts' => '减法运算符前后空格不规范',
|
||||
'explanation' => '减法运算符(-)前后应各留一个空格(非数字范围场景),如 5 - 3'
|
||||
'explanation' => '减法运算符[-]前后应各留一个空格(非数字范围场景)',
|
||||
'error_type' => 'reduce'
|
||||
],
|
||||
|
||||
// ====================== 4. 特殊符号规则(低优先级,避免干扰核心格式) ======================
|
||||
@@ -355,31 +369,38 @@ class ProofReadService
|
||||
'pattern' => '~(\d+)\s+%~u', // 数字与百分号
|
||||
'replacement' => '$1%',
|
||||
'verbatim_texts' => '数字与百分号之间有多余空格',
|
||||
'explanation' => '数字与百分号(%)之间不应留空格,如 50%'
|
||||
'explanation' => '数字与百分号[%]之间有多余空格',
|
||||
'error_type' => 'number_percentage'
|
||||
],
|
||||
[
|
||||
'pattern' => '~(\(\s*\d+)\s+×\s+(\d+\s*\))~u', // 先匹配「(数字 × 数字)」场景(带括号)
|
||||
'replacement' => '$1×$2', // 修正为「(数字×数字)」,如 (40×33)
|
||||
'verbatim_texts' => '带括号的乘号表示倍数时前后有多余空格',
|
||||
'explanation' => '带括号的乘号(×)表示倍数关系时前后不应留空格,如 (3×5)'
|
||||
'explanation' => '带括号的乘号[×]表示倍数关系时前后有多余空格',
|
||||
'error_type' => 'multiple'
|
||||
|
||||
],
|
||||
[
|
||||
'pattern' => '~(\d+)\s+×\s+(\d+)~u', // 再匹配「数字 × 数字」场景(无括号)
|
||||
'replacement' => '$1×$2',
|
||||
'verbatim_texts' => '乘号表示倍数时前后有多余空格',
|
||||
'explanation' => '乘号(×)表示倍数关系时前后不应留空格,如 3×5'
|
||||
'explanation' => '乘号[×]表示倍数关系时前后不应留空格',
|
||||
'error_type' => 'multiple'
|
||||
],
|
||||
[
|
||||
'pattern' => '~(\d+)\s*\*\s*(\d+)~u', // 星号(*)转乘号(×)
|
||||
'replacement' => '$1 × $2',
|
||||
'verbatim_texts' => '使用星号(*)作为乘法运算符不规范',
|
||||
'explanation' => '乘法运算应使用标准乘号(×)替代星号(*),并前后留空格,如 3 × 5'
|
||||
// 'explanation' => '乘法运算应使用标准乘号(×)替代星号(*),并前后留空格,如 3 × 5'
|
||||
'explanation' => '乘法运算应使用标准乘号[×]替代星号[*]',
|
||||
'error_type' => 'ride'
|
||||
],
|
||||
[
|
||||
'pattern' => '~(\d+)\s+:\s+(\d+)~u', // 比值符号(:)
|
||||
'replacement' => '$1:$2',
|
||||
'verbatim_texts' => '比值符号前后有多余空格',
|
||||
'explanation' => '比值符号(:)前后不应留空格,如 1:2'
|
||||
'explanation' => '比值符号[:]前后有多余空格',
|
||||
'error_type' => 'biliel'
|
||||
]
|
||||
];
|
||||
}
|
||||
@@ -452,7 +473,7 @@ class ProofReadService
|
||||
if (preg_match($decimalZeroPattern, $number, $numMatch)) {
|
||||
$integerPart = $numMatch[1];
|
||||
$corrected = $integerPart;
|
||||
|
||||
$errorType = 'invalid_zero';
|
||||
if (!isset($replacements[$number])) {
|
||||
$replacements[$number] = $corrected;
|
||||
|
||||
@@ -465,11 +486,12 @@ class ProofReadService
|
||||
$errors[] = $this->createError(
|
||||
$number,
|
||||
$corrected,
|
||||
"删除小数点后无效零(原始:{$number} → 修正:{$corrected})",
|
||||
"删除小数点后无效零",
|
||||
$originalContent,
|
||||
$currentCorrected,
|
||||
$posStart,
|
||||
$posEnd
|
||||
$posEnd,
|
||||
$errorType
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -489,15 +511,15 @@ class ProofReadService
|
||||
);
|
||||
|
||||
if (@preg_match($thousandPattern, '') === false) {
|
||||
$errors[] = $this->createError(
|
||||
'千分位正则错误',
|
||||
'跳过千分位处理',
|
||||
"千分位正则错误: {$thousandPattern}",
|
||||
$originalContent,
|
||||
$correctedContent,
|
||||
-1,
|
||||
-1
|
||||
);
|
||||
// $errors[] = $this->createError(
|
||||
// '千分位正则错误',
|
||||
// '跳过千分位处理',
|
||||
// "千分位正则错误: {$thousandPattern}",
|
||||
// $originalContent,
|
||||
// $correctedContent,
|
||||
// -1,
|
||||
// -1
|
||||
// );
|
||||
} else {
|
||||
$correctedContent = preg_replace_callback(
|
||||
$thousandPattern,
|
||||
@@ -516,6 +538,7 @@ class ProofReadService
|
||||
$searchOffset = ($posEnd !== -1) ? $posEnd : $searchOffset + strlen($original);
|
||||
|
||||
$currentCorrected = strtr($originalContent, $replacements);
|
||||
$errorType = 'thousandth_separator';
|
||||
$errors[] = $this->createError(
|
||||
$original,
|
||||
$formatted,
|
||||
@@ -523,7 +546,9 @@ class ProofReadService
|
||||
$originalContent,
|
||||
$currentCorrected,
|
||||
$posStart,
|
||||
$posEnd
|
||||
$posEnd,
|
||||
$errorType
|
||||
|
||||
);
|
||||
return $formatted;
|
||||
},
|
||||
@@ -547,15 +572,15 @@ class ProofReadService
|
||||
$searchOffset = ($posEnd !== -1) ? $posEnd : $searchOffset + strlen($marker);
|
||||
|
||||
$correctedContent = str_replace($marker, $original, $correctedContent);
|
||||
$errors[] = $this->createError(
|
||||
"残留URL/DOI占位符: {$marker}",
|
||||
"已恢复为原始内容",
|
||||
"URL/DOI恢复不完全,已强制恢复",
|
||||
$originalContent,
|
||||
$correctedContent,
|
||||
$posStart,
|
||||
$posEnd
|
||||
);
|
||||
// $errors[] = $this->createError(
|
||||
// "残留URL/DOI占位符: {$marker}",
|
||||
// "已恢复为原始内容",
|
||||
// "URL/DOI恢复不完全,已强制恢复",
|
||||
// $originalContent,
|
||||
// $correctedContent,
|
||||
// $posStart,
|
||||
// $posEnd
|
||||
// );
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -655,15 +680,17 @@ class ProofReadService
|
||||
|
||||
// 错误信息去重(基于原始内容+修正内容哈希)
|
||||
$errorHash = md5($original . $fixed);
|
||||
$errorType = empty( $unit['full']) ? '' : $unit['full'];
|
||||
if (!isset($errors[$errorHash])) {
|
||||
$errors[$errorHash] = $this->createError(
|
||||
$original,
|
||||
$fixed,
|
||||
"{$unit['description']}单位格式不规范:{$errorReason},正确格式为'数字{$unit['abbr']}'",
|
||||
"{$unit['description']}单位格式不规范:{$errorReason},正确格式为[数字{$unit['abbr']}]",
|
||||
$originalContent,
|
||||
strtr($originalContent, $replaceMap + [$original => $fixed]),
|
||||
$posStart,
|
||||
$posEnd
|
||||
$posEnd,
|
||||
$errorType
|
||||
);
|
||||
}
|
||||
|
||||
@@ -726,7 +753,7 @@ class ProofReadService
|
||||
|
||||
// 标准毫升单位格式(L大写)
|
||||
$fixedFull = "{$prefix}mL";
|
||||
|
||||
$errorType = 'mL';
|
||||
// 仅处理与标准格式不一致的场景
|
||||
if ($originalFull !== $fixedFull) {
|
||||
// 计算错误内容在原始文本中的位置
|
||||
@@ -740,11 +767,12 @@ class ProofReadService
|
||||
$errors[$errorHash] = $this->createError(
|
||||
$originalFull,
|
||||
$fixedFull,
|
||||
'毫升单位格式不规范,标准写法为"mL"',
|
||||
'毫升单位格式不规范,标准写法为[mL]',
|
||||
$originalContent,
|
||||
strtr($originalContent, $replaceMap + [$originalFull => $fixedFull]),
|
||||
$posStart,
|
||||
$posEnd
|
||||
$posEnd,
|
||||
$errorType
|
||||
);
|
||||
}
|
||||
|
||||
@@ -815,15 +843,17 @@ class ProofReadService
|
||||
|
||||
// 错误去重(哈希机制)
|
||||
$errorHash = md5($original . $fixed);
|
||||
$errorType = 'P';
|
||||
if (!isset($errors[$errorHash])) {
|
||||
$errors[$errorHash] = $this->createError(
|
||||
$original,
|
||||
$fixed,
|
||||
'显著性P值格式不规范,P/p应使用斜体',
|
||||
'显著性P值格式不规范,P/p应使用斜体',
|
||||
$originalContent,
|
||||
strtr($originalContent, $replaceMap + [$original => $fixed]),
|
||||
$posStart,
|
||||
$posEnd
|
||||
$posEnd,
|
||||
$errorType
|
||||
);
|
||||
}
|
||||
|
||||
@@ -912,15 +942,18 @@ class ProofReadService
|
||||
|
||||
// 错误信息去重(基于单个错误片段的原始值+修正值哈希,避免重复记录)
|
||||
$errorHash = md5($originalFull . $fixedFull);
|
||||
$errorType = 'No.';
|
||||
if (!isset($errors[$errorHash])) {
|
||||
$errors[$errorHash] = $this->createError(
|
||||
$originalFull, // verbatim_texts:具体错误片段
|
||||
$fixedFull, // revised_content:错误片段的修正结果
|
||||
'No. 格式不规范:' . implode(',', $errorReasons) . ',正确格式为「No. 数字」', // explanation:错误说明
|
||||
'No. 格式不规范,正确格式为「No. 数字」', // explanation:错误说明
|
||||
// 'No. 格式不规范:' . implode(',', $errorReasons) . ',正确格式为「No. 数字」', // explanation:错误说明
|
||||
$originalContent, // original:完整原始内容(整个输入文本)
|
||||
strtr($originalContent, $replaceMap), // corrected:完整修正内容(基于当前替换映射生成)
|
||||
$posStart, // position_start:错误起始位置
|
||||
$posEnd // position_end:错误结束位置
|
||||
$posEnd, // position_end:错误结束位置
|
||||
$errorType //错误类型
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -981,19 +1014,19 @@ class ProofReadService
|
||||
switch (strtolower($abbrBase)) {
|
||||
case 'fig':
|
||||
$fullName = 'Figure';
|
||||
$errorDesc = '图表标题使用缩写"Fig",应改为全称"Figure"';
|
||||
$errorDesc = '图表标题使用缩写"Fig",正确:"Figure"';
|
||||
break;
|
||||
case 'figs':
|
||||
$fullName = 'Figures';
|
||||
$errorDesc = '图表标题复数使用缩写"Figs",应改为全称"Figures"';
|
||||
$errorDesc = '图表标题复数使用缩写"Figs",正确:"Figures"';
|
||||
break;
|
||||
case 'tab':
|
||||
$fullName = 'Table';
|
||||
$errorDesc = '表格标题使用缩写"Tab",应改为全称"Table"';
|
||||
$errorDesc = '表格标题使用缩写"Tab",正确:"Table"';
|
||||
break;
|
||||
case 'tabs':
|
||||
$fullName = 'Tables';
|
||||
$errorDesc = '表格标题复数使用缩写"Tabs",应改为全称"Tables"';
|
||||
$errorDesc = '表格标题复数使用缩写"Tabs",正确:"Tables"';
|
||||
break;
|
||||
default:
|
||||
$fullName = '';
|
||||
@@ -1026,7 +1059,8 @@ class ProofReadService
|
||||
$originalContent,
|
||||
$currentCorrected,
|
||||
$posStart,
|
||||
$posEnd
|
||||
$posEnd,
|
||||
$fullName
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1094,15 +1128,16 @@ class ProofReadService
|
||||
/**
|
||||
* 创建标准化错误信息
|
||||
*/
|
||||
private function createError($verbatim, $revised, $explanation,$original,$corrected, $position_start=-1, $position_end=-1) {
|
||||
private function createError($verbatim='', $revised='', $explanation='',$original='',$corrected='', $position_start=-1, $position_end=-1,$error_type='') {
|
||||
return [
|
||||
'verbatim_texts' => $verbatim,
|
||||
'revised_content' => $revised,
|
||||
'explanation' => $explanation,
|
||||
'original' => $original,
|
||||
'corrected' => $corrected,
|
||||
'position_start' => $position_start, // 错误起始位置
|
||||
'position_end' => $position_end // 错误结束位置
|
||||
'position_start' => $position_start,
|
||||
'position_end' => $position_end,
|
||||
'error_type' => $error_type
|
||||
];
|
||||
}
|
||||
/**
|
||||
|
||||
Reference in New Issue
Block a user