1435 lines
68 KiB
PHP
1435 lines
68 KiB
PHP
<?php
|
||
namespace app\common;
|
||
|
||
class ProofReadService
|
||
{
|
||
private $errors = [];
|
||
|
||
// 主校对方法
|
||
public function proofread($content)
|
||
{
|
||
$this->errors = [];
|
||
$this->excludedFormats = [];
|
||
$correctedContent = $content;
|
||
|
||
//时间单位缩写校对
|
||
$correctedContent = $this->checkTimeUnitAbbreviations($correctedContent);
|
||
//横线/运算符校对
|
||
$correctedContent = $this->checkTextFormat($correctedContent);
|
||
//数字格式校对
|
||
$correctedContent = $this->checkNumberFormat($correctedContent);
|
||
//No. 123456的写法统一
|
||
$correctedContent = $this->checkNoFormatUniformity($correctedContent);
|
||
//毫升单位校对
|
||
$correctedContent = $this->checkMlUnit($correctedContent);
|
||
//显著性P斜体校对
|
||
$correctedContent = $this->checkPSignificance($correctedContent);
|
||
//图表标题一律使用全称Figure 1, Table 1.不能写成Fig. 1, Tab 1.
|
||
$correctedContent = $this->checkFigureTableTitle($correctedContent);
|
||
//检测参考文献是否能打开
|
||
// $correctedContent = $this->checkDoi($correctedContent);
|
||
//判断是否为空错误信息
|
||
if(empty($this->errors)){
|
||
return [];
|
||
}
|
||
return [
|
||
'proof_before' => $content,
|
||
'proof_after' => $correctedContent,
|
||
'errors' => $this->errors
|
||
];
|
||
}
|
||
|
||
/**
|
||
* 横线/运算符校对/数字和单位(高可用版)
|
||
*/
|
||
private function checkTextFormat($content) {
|
||
// 初始化错误数组
|
||
$errors = [];
|
||
$defaultReturn = $content;
|
||
$originalContent = $content; // 保存完整原始内容
|
||
$searchOffsetForExclude = 0; // 【新增】仅用于「特殊内容过滤」的偏移量
|
||
$searchOffsetForCore = 0; // 【新增】仅用于「核心规则处理」的偏移量
|
||
|
||
// 验证数据
|
||
if (!is_string($content) || trim($content) === '') {
|
||
$this->handleErrors($errors);
|
||
return $defaultReturn;
|
||
}
|
||
|
||
$corrected = $content;
|
||
$excludeMarkers = []; // 存储 URL/DOI + <wmath>/<math> 的占位符映射
|
||
$processedHashes = [];
|
||
|
||
// 编码处理
|
||
$originalEncoding = mb_detect_encoding($content, ['UTF-8', 'GBK', 'GB2312', 'ISO-8859-1'], true);
|
||
if ($originalEncoding === false) {
|
||
} else {
|
||
$converted = @mb_convert_encoding($content, 'UTF-8', $originalEncoding);
|
||
$corrected = $converted !== false ? $converted : $content;
|
||
if ($converted === false) {
|
||
$posStart = 0;
|
||
$posEnd = min(20, strlen($originalContent));
|
||
}
|
||
}
|
||
|
||
// 过滤 <wmath>/<math>
|
||
$mathTagRegex = '~<(wmath|math)[^>]*?>.*?</\1>~is';
|
||
if (@preg_match($mathTagRegex, '') === false) {
|
||
// 正则错误处理(不变)
|
||
} elseif (preg_match_all($mathTagRegex, $corrected, $matches, PREG_SET_ORDER)) {
|
||
usort($matches, function($a, $b) {
|
||
return strlen($b[0]) - strlen($a[0]);
|
||
});
|
||
|
||
foreach ($matches as $index => $match) {
|
||
$fullTag = $match[0];
|
||
$tagType = $match[1];
|
||
$marker = "___EXCLUDE_{$tagType}_" . time() . "_{$index}___";
|
||
$excludeMarkers[$marker] = $fullTag;
|
||
|
||
// 【修改】使用独立偏移量 $searchOffsetForExclude
|
||
$posStart = strpos($originalContent, $fullTag, $searchOffsetForExclude);
|
||
$posEnd = ($posStart !== false) ? $posStart + strlen($fullTag) : -1;
|
||
$searchOffsetForExclude = ($posEnd !== -1) ? $posEnd : $searchOffsetForExclude + strlen($fullTag);
|
||
|
||
$safeFullTag = preg_quote($fullTag, '~');
|
||
$corrected = preg_replace("~{$safeFullTag}~u", $marker, $corrected, 1);
|
||
}
|
||
}
|
||
|
||
// 过滤 URL/DOI
|
||
$urlDoiRegex = '~(
|
||
https?://[^\s/]{1,100} # 协议(http/https) + 域名(非空白/字符)
|
||
(?:/+[A-Za-z0-9\.\-]+(?:-[A-Za-z0-9\.\-]+)*)* # 多级路径(支持.html后接/1/23等格式)
|
||
(?:\?[A-Za-z0-9_\-=&%\+\.\~]+)? # 可选查询参数(如?J_num=8&page=1)
|
||
(?:\#[A-Za-z0-9_\-]+)? # 可选锚点(如#section)
|
||
|
|
||
\b[a-zA-Z0-9\.\-]+\.[a-zA-Z]{2,} # 无协议域名(如example.com)
|
||
(?:/+[A-Za-z0-9\.\-]+(?:-[A-Za-z0-9\.\-]+)*)* # 无协议多级路径
|
||
(?:\?[A-Za-z0-9_\-=&%\+\.\~]+)? # 无协议查询参数
|
||
(?:\#[A-Za-z0-9_\-]+)? # 无协议锚点
|
||
(?=$|[\s\.,;!]) # 结束边界(空白或标点)
|
||
|
|
||
doi:\s{0,10}\d+\.\d+/[A-Za-z0-9-+×:]+(?:-[A-Za-z0-9-+×:]+)* # DOI格式
|
||
)~iux';
|
||
|
||
if (@preg_match($urlDoiRegex, '') === false) {
|
||
// 正则错误处理(不变)
|
||
} elseif (preg_match_all($urlDoiRegex, $corrected, $matches, PREG_SET_ORDER)) {
|
||
// 按长度降序排序,优先处理长URL(避免短URL被包含时误替换)
|
||
usort($matches, function($a, $b) { return strlen($b[1]) - strlen($a[1]); });
|
||
foreach ($matches as $index => $match) {
|
||
$original = $match[1];
|
||
$marker = "___EXCLUDE_URL_" . time() . "_{$index}___";
|
||
$excludeMarkers[$marker] = $original;
|
||
|
||
// 独立偏移量避免重复匹配,兼容特殊URL格式
|
||
$posStart = strpos($originalContent, $original, $searchOffsetForExclude);
|
||
$posEnd = ($posStart !== false) ? $posStart + strlen($original) : -1;
|
||
$searchOffsetForExclude = ($posEnd !== -1) ? $posEnd : $searchOffsetForExclude + strlen($original);
|
||
|
||
// 精准替换当前URL为标记(仅1次,避免全局替换干扰)
|
||
$corrected = preg_replace("~" . preg_quote($original, '~') . "~u", $marker, $corrected, 1);
|
||
}
|
||
}
|
||
|
||
// 核心格式规则处理(优化偏移量计算与验证逻辑)
|
||
$coreRules = $this->getTextCoreRules();
|
||
foreach ($coreRules as $rule) {
|
||
if (@preg_match($rule['pattern'], '') === false) {
|
||
continue;
|
||
}
|
||
|
||
// 匹配时保留偏移量信息,用于精准定位
|
||
$matchCount = preg_match_all(
|
||
$rule['pattern'],
|
||
$corrected,
|
||
$matches,
|
||
PREG_SET_ORDER | PREG_OFFSET_CAPTURE
|
||
);
|
||
if ($matchCount === 0) {
|
||
continue;
|
||
}
|
||
|
||
foreach ($matches as $match) {
|
||
$original = $match[0][0]; // 匹配到的原始内容
|
||
$originalLen = strlen($original);
|
||
$hash = md5($original);
|
||
|
||
// 跳过已处理的内容,避免重复修正
|
||
if (isset($processedHashes[$hash])) {
|
||
continue;
|
||
}
|
||
|
||
$offsetInCorrected = $match[0][1]; // 匹配内容在$corrected中的偏移量
|
||
$prefixInCorrected = substr($corrected, 0, $offsetInCorrected);
|
||
$prefixInOriginal = strtr($prefixInCorrected, $excludeMarkers); // 还原占位符为原始内容
|
||
$posStart = strlen($prefixInOriginal);
|
||
$posEnd = $posStart + $originalLen;
|
||
|
||
$contentCheck = substr($originalContent, $posStart, $originalLen);
|
||
$contentCheckConv = iconv('UTF-8', 'UTF-8//IGNORE', $contentCheck); // 忽略无效字符
|
||
$originalConv = iconv('UTF-8', 'UTF-8//IGNORE', $original);
|
||
if (strcmp($contentCheckConv, $originalConv) !== 0) {
|
||
// 验证失败时,基于当前偏移量重新定位
|
||
$localPattern = '~' . preg_quote($original, '~') . '~u';
|
||
if (preg_match($localPattern, $originalContent, $localMatch, PREG_OFFSET_CAPTURE, $searchOffsetForCore)) {
|
||
$posStart = $localMatch[0][1];
|
||
$posEnd = $posStart + $originalLen;
|
||
} else {
|
||
continue;
|
||
}
|
||
}
|
||
|
||
// 生成修正后的内容
|
||
$fixed = is_callable($rule['replacement'])
|
||
? call_user_func($rule['replacement'], $match)
|
||
: preg_replace($rule['pattern'], $rule['replacement'], $original);
|
||
|
||
// 仅在内容有变化时更新
|
||
if ($original !== $fixed && $fixed !== null) {
|
||
$searchOffsetForCore = $posEnd; // 更新核心规则偏移量,避免重复匹配
|
||
$currentCorrected = str_replace($original, $fixed, $corrected);
|
||
// 记录错误信息
|
||
$errors[] = $this->createError(
|
||
$original,
|
||
$fixed,
|
||
$rule['explanation'],
|
||
$originalContent,
|
||
$currentCorrected,
|
||
$posStart,
|
||
$posEnd,
|
||
$rule['error_type'] ?? ''
|
||
);
|
||
$processedHashes[$hash] = true;
|
||
$corrected = $currentCorrected;
|
||
}
|
||
}
|
||
}
|
||
|
||
// 批量还原 URL/DOI 和数学标签(保持不变,优化错误提示)
|
||
$restoreErrors = [];
|
||
if (!empty($excludeMarkers)) {
|
||
$corrected = strtr($corrected, $excludeMarkers);
|
||
// 检查未正常还原的占位符
|
||
if (preg_match_all('~___EXCLUDE_(wmath|math|URL)_\d+_\d+___~', $corrected, $remaining)) {
|
||
foreach ($remaining[0] as $marker) {
|
||
$original = $excludeMarkers[$marker] ?? '未知内容';
|
||
$restoreErrors[] = "未正常还原的占位符: {$marker}(原始内容: {$original})";
|
||
$corrected = str_replace($marker, $original, $corrected); // 强制还原
|
||
}
|
||
}
|
||
}
|
||
|
||
$this->handleErrors($errors);
|
||
return is_string($corrected) ? $corrected : $defaultReturn;
|
||
}
|
||
/**
|
||
* 获取文本格式核心规则
|
||
*/
|
||
private function getTextCoreRules()
|
||
{
|
||
return [
|
||
// 1. 最高优先级:特殊格式排除规则(首行专属排除No.编号)
|
||
[
|
||
'pattern' => '~
|
||
# 【首优先级】No.编号专属排除(如No.: 2023YJZX-LN03/13、NO: KHYJ-2023-05、no. 123-ABC/45)
|
||
# 支持变体:No.大小写、冒号可带/不带点、冒号前后空格、编号含-/_/数字/字母
|
||
\b(?:No|NO|no)\.?:?\s* # 前缀:No./NO./no.(冒号可选,点可选,后接任意空格)
|
||
[A-Za-z0-9\-\/_]+ # 编号主体:支持字母、数字、-、/、_(覆盖2023YJZX-LN03/13)
|
||
(?:-[A-Za-z0-9\-\/_]+)* # 编号后缀:支持多段连接(如2023YJZX-LN03/13-001)
|
||
\b # 单词边界:避免编号后接多余字符(如No.: 2023abc)
|
||
|
|
||
# 括号内分数及百分比组合(如(45/45)、(15.6%, 7/45))
|
||
\(\s*(?:\d+(?:\.\d+)?%?\s*,?\s*)?\d+(?:\.\d+)?\s*/\s*\d+(?:\.\d+)?\s*\)
|
||
|
|
||
# 独立年份范围(如1849-1850、2023 - 2025)
|
||
(?<!\d|[\+\-\*\/=<>]|from\s+|:\s*|\[[MZACDP]\]\.\s*)\d{4}\s*-\s*\d{4}
|
||
(?!\d|[\+\-\*\/=<>]|[,\.:;!]|\+\d+\.)
|
||
|
|
||
# from+年份范围(如from 1849-1850)
|
||
\bfrom\s+\d{4}\s*-\s*\d{4}\b
|
||
|
|
||
# 带单位的数字范围/倍数(如50-200 nm、10×5 cm)
|
||
\b\d+\s*[-×]\s*\d+\s*[a-zA-Z%]
|
||
|
|
||
# 无No.前缀的项目编号(如2023YJZX-LN03/13、KHYJ-2023-05-01)
|
||
[A-Za-z]+-?\d+[-/]\d+[-/]\d*
|
||
|
|
||
# 参考文献格式(期刊/专著等)
|
||
\d{4},\s*\d{1,3}\(\d{1,2}\):\s*\d+-\d+(?:\+\d+)*\.|[^\n]+\[[MZACDP]\]\.\s*[^\n]+,\s*\d{4}:\s*\d+-\d+\.
|
||
~ux',
|
||
'replacement' => '$0', // 完全保留原始格式,不做任何修改
|
||
'verbatim_texts' => 'No.编号及非运算场景无需处理',
|
||
'explanation' => 'No.系列编号(如No.: 2023YJZX-LN03/13)、括号内分数、年份范围、带单位数字范围、项目编号、参考文献等非运算场景的符号不做处理',
|
||
'error_type' => 'exclude'
|
||
],
|
||
|
||
// 2. 次高优先级:数字范围规则(避免与-冲突)
|
||
[
|
||
'pattern' => '~(\[\s*[-]?\d+\s*)\x{2014}\s*(\d+\s*\])~u',
|
||
'replacement' => '$1-$2',
|
||
'verbatim_texts' => '带括号数字范围长划线不规范',
|
||
'explanation' => '带括号的数字范围应使用短划线[-]',
|
||
'error_type' => 'en-dash'
|
||
],
|
||
[
|
||
'pattern' => '~(\[\s*[-]?\d+)\s*-\s*(\d+\s*\])~u',
|
||
'replacement' => '$1-$2',
|
||
'verbatim_texts' => '带括号数字范围短划线空格不规范',
|
||
'explanation' => '带括号数字范围的短划线[-]前后不应留空格',
|
||
'error_type' => 'en-dash'
|
||
],
|
||
[
|
||
'pattern' => '~(\b\d+)\s*—\s*(\d+\b)~u',
|
||
'replacement' => '$1-$2',
|
||
'verbatim_texts' => '无括号数字范围长划线不规范',
|
||
'explanation' => '无括号的数字范围应使用短划线[-]',
|
||
'error_type' => 'bracket_en-dash'
|
||
],
|
||
[
|
||
'pattern' => '~
|
||
(?<!\d{4}|:\s*|(?:No|NO|no)\.?:\s*) # 排除No.编号前缀
|
||
\b(\d{1,3})\s*-\s*(\d{1,3})\b
|
||
(?!\d{4}|\+\d+\.)
|
||
~ux',
|
||
'replacement' => '$1-$2',
|
||
'verbatim_texts' => '无括号数字范围短划线空格不规范',
|
||
'explanation' => '无括号数字范围的短划线[-]前后不应留空格',
|
||
'error_type' => 'bracket_en-dash'
|
||
],
|
||
|
||
// 3. 核心优先级:运算符规则(精准匹配,排除No.编号干扰)
|
||
[
|
||
// 'pattern' => '~(\S)\s*([<>!]=|===|!==)\s*(\S)~u',
|
||
'pattern' => '~(?<!</[a-z]+>)\s*(\S)\s*([<>!]=|===|!==)\s*(\S)(?!<[a-z]+>)~u',
|
||
'replacement' => '$1 $2 $3',
|
||
'verbatim_texts' => '复合运算符前后空格不规范',
|
||
'explanation' => '复合运算符[>=、<=、==、!=、===、!==]前后应各留一个空格',
|
||
'error_type' => 'composite_operator'
|
||
],
|
||
[
|
||
'pattern' => '~
|
||
(?<!=|<|>|\*|\+|-|/)
|
||
(\S+?)\s*=\s*(\S+?)
|
||
(?!=|<|>|\*|\+|-|/)
|
||
~ux',
|
||
'replacement' => '$1 = $2',
|
||
'verbatim_texts' => '等号前后空格不规范',
|
||
'explanation' => '独立等号[=]前后应各留一个空格',
|
||
'error_type' => 'equal'
|
||
],
|
||
// 乘法(排除No.编号中的*)
|
||
[
|
||
'pattern' => '~
|
||
(?<!\D|×\s*|(?:No|NO|no)\.?:\s*) # 排除No.编号前缀
|
||
(\d+(?:\.\d+)?)
|
||
\s*(\*)
|
||
\s*(\d+(?:\.\d+)?)
|
||
(?!\D|\s*\])
|
||
~ux',
|
||
'replacement' => '$1 × $3',
|
||
'verbatim_texts' => '乘法运算符格式不规范',
|
||
'explanation' => '乘法运算应使用标准乘号[×],前后各留一个空格',
|
||
'error_type' => 'ride'
|
||
],
|
||
// 除法(排除No.编号中的/)
|
||
[
|
||
'pattern' => '~
|
||
(?<!\D|\(\s*|(?:No|NO|no)\.?:\s*) \# 排除No.编号前缀
|
||
(\d+(?:\.\d+)?)
|
||
\s*(\/)
|
||
\s*(\d+(?:\.\d+)?)
|
||
(?!\D|\s*\))
|
||
~ux',
|
||
'replacement' => '$1 $2 $3',
|
||
'verbatim_texts' => '除法运算符前后空格不规范',
|
||
'explanation' => '除法运算符[/]前后应各留一个空格(纯数字运算场景)',
|
||
'error_type' => 'except'
|
||
],
|
||
// 加法(排除No.编号中的+)
|
||
[
|
||
'pattern' => '~
|
||
(?<!\D|-\s*|(?:No|NO|no)\.?:\s*) # 排除No.编号前缀
|
||
(\d+(?:\.\d+)?)
|
||
\s*(\+)
|
||
\s*(\d+(?:\.\d+)?)
|
||
(?!\D|\s*\.)
|
||
~ux',
|
||
'replacement' => '$1 $2 $3',
|
||
'verbatim_texts' => '加法运算符前后空格不规范',
|
||
'explanation' => '加法运算符[+]前后应各留一个空格(纯数字运算场景)',
|
||
'error_type' => 'plus'
|
||
],
|
||
// 减法(排除No.编号中的-)
|
||
[
|
||
'pattern' => '~
|
||
(?<!\D|from\s+|:\s*|(?:No|NO|no)\.?:\s*) # 排除No.编号前缀
|
||
(\d+(?:\.\d+)?)
|
||
\s*(-)
|
||
\s*(\d+(?:\.\d+)?)
|
||
(?!\D|\s*\,)
|
||
~ux',
|
||
'replacement' => '$1 $2 $3',
|
||
'verbatim_texts' => '减法运算符前后空格不规范',
|
||
'explanation' => '减法运算符[-]前后应各留一个空格(纯数字运算场景)',
|
||
'error_type' => 'reduce'
|
||
],
|
||
|
||
// 4. 低优先级:特殊符号规则
|
||
[
|
||
'pattern' => '~(\d+)\s+%~u',
|
||
'replacement' => '$1%',
|
||
'verbatim_texts' => '数字与百分号空格不规范',
|
||
'explanation' => '数字与百分号[%]之间不应留空格',
|
||
'error_type' => 'number_percentage'
|
||
],
|
||
[
|
||
'pattern' => '~(\d+)\s+×\s+(\d+)~u',
|
||
'replacement' => '$1×$2',
|
||
'verbatim_texts' => '倍数乘号空格不规范',
|
||
'explanation' => '乘号[×]表示倍数时前后不应留空格',
|
||
'error_type' => 'multiple'
|
||
],
|
||
[
|
||
'pattern' => '~(\d+)\s+:\s+(\d+)~u',
|
||
'replacement' => '$1:$2',
|
||
'verbatim_texts' => '比值符号空格不规范',
|
||
'explanation' => '比值符号[:]前后不应留空格',
|
||
'error_type' => 'biliel'
|
||
]
|
||
];
|
||
}
|
||
/**
|
||
* 数字格式处理
|
||
*/
|
||
private function checkNumberFormat($content) {
|
||
$errors = [];
|
||
$defaultReturn = $content;
|
||
$originalContent = $content;
|
||
$searchOffset = 0;
|
||
|
||
if (!is_string($content) || trim($content) === '') {
|
||
$this->handleErrors($errors);
|
||
return $defaultReturn;
|
||
}
|
||
|
||
$correctedContent = $content;
|
||
$replacements = [];
|
||
$urlDoiPlaceholders = [];
|
||
$prefixFormatPlaceholders = [];
|
||
$decimalAlphaPlaceholders = [];
|
||
$dateRelatedPlaceholders = [];
|
||
$specialDecimalPlaceholders = [];
|
||
$softwareVersionPlaceholders = [];
|
||
$postalCodePlaceholders = []; // 精准保护邮编
|
||
$bracketedNumPlaceholders = []; // 精准保护括号内数字
|
||
|
||
// 保护括号内数字(仅匹配(960-1279)这类格式)
|
||
$bracketedNumPattern = '~
|
||
\(\d+[-\d]*\d+\) # 仅匹配带括号的数字/数字范围
|
||
~ux';
|
||
if (@preg_match($bracketedNumPattern, '') !== false) {
|
||
$correctedContent = preg_replace_callback(
|
||
$bracketedNumPattern,
|
||
function ($matches) use (&$bracketedNumPlaceholders, $originalContent, &$searchOffset) {
|
||
$fullMatch = $matches[0];
|
||
$placeholder = '___BRACKETED_NUM_' . uniqid() . '___';
|
||
$bracketedNumPlaceholders[$placeholder] = $fullMatch;
|
||
$posStart = strpos($originalContent, $fullMatch, $searchOffset);
|
||
$searchOffset = $posStart !== false ? $posStart + strlen($fullMatch) : $searchOffset;
|
||
return $placeholder;
|
||
},
|
||
$correctedContent
|
||
);
|
||
}
|
||
|
||
// 精准保护邮编(仅匹配“地名+空格+4-6位数字”
|
||
$postalCodePattern = '~
|
||
\b(?:[A-Za-z]+(?:\s+[A-Za-z]+)*|[\x{4e00}-\x{9fa5}]+)\s+\d{4,6}\b # 强制空格(如Jiangsu 223300、北京 100000)
|
||
|\b0\d{2,3}\d{7}\b # 兼容区号+固定电话(02588888888、01012345678)
|
||
~uix';
|
||
if (@preg_match($postalCodePattern, '') !== false) {
|
||
$correctedContent = preg_replace_callback(
|
||
$postalCodePattern,
|
||
function ($matches) use (&$postalCodePlaceholders, $originalContent, &$searchOffset) {
|
||
$fullMatch = $matches[0];
|
||
$placeholder = '___POSTAL_CODE_' . uniqid() . '___';
|
||
$postalCodePlaceholders[$placeholder] = $fullMatch;
|
||
$posStart = strpos($originalContent, $fullMatch, $searchOffset);
|
||
$searchOffset = $posStart !== false ? $posStart + strlen($fullMatch) : $searchOffset;
|
||
return $placeholder;
|
||
},
|
||
$correctedContent
|
||
);
|
||
}
|
||
|
||
//保护软件版本
|
||
$softwareVersionPattern = '~
|
||
\b(?:[A-Za-z]+(?:\s+[A-Za-z]+)*|[\x{4e00}-\x{9fa5}]+(?:\s+[\x{4e00}-\x{9fa5}]+)*)\s+\d+\.\d+(?:\.\d+)*\b
|
||
~uix';
|
||
if (@preg_match($softwareVersionPattern, '') !== false) {
|
||
$correctedContent = preg_replace_callback(
|
||
$softwareVersionPattern,
|
||
function ($matches) use (&$softwareVersionPlaceholders, $originalContent, &$searchOffset) {
|
||
$fullMatch = $matches[0];
|
||
$placeholder = '___SOFTWARE_VERSION_' . uniqid() . '___';
|
||
$softwareVersionPlaceholders[$placeholder] = $fullMatch;
|
||
$posStart = strpos($originalContent, $fullMatch, $searchOffset);
|
||
$searchOffset = $posStart !== false ? $posStart + strlen($fullMatch) : $searchOffset;
|
||
return $placeholder;
|
||
},
|
||
$correctedContent
|
||
);
|
||
}
|
||
|
||
//保护特殊小数
|
||
$specialDecimalPattern = '~
|
||
a=\s*[\d+\.\d+[A-Za-z]+\d*\-+]+
|
||
|\b\d+\.\d+[A-Za-z]+\d*\b
|
||
|\b\d+\.\d+[-+]\d+\.\d+[A-Za-z]+\d*\b
|
||
|\b\d+\.\d+[-+]\d+\.\d+\b
|
||
~ux';
|
||
if (@preg_match($specialDecimalPattern, '') !== false) {
|
||
$correctedContent = preg_replace_callback(
|
||
$specialDecimalPattern,
|
||
function ($matches) use (&$specialDecimalPlaceholders, $originalContent, &$searchOffset) {
|
||
$fullMatch = $matches[0];
|
||
$placeholder = '___SPECIAL_DECIMAL_' . uniqid() . '___';
|
||
$specialDecimalPlaceholders[$placeholder] = $fullMatch;
|
||
$posStart = strpos($originalContent, $fullMatch, $searchOffset);
|
||
$searchOffset = $posStart !== false ? $posStart + strlen($fullMatch) : $searchOffset;
|
||
return $placeholder;
|
||
},
|
||
$correctedContent
|
||
);
|
||
}
|
||
|
||
// 保护年份/年月格式(2023、202309、2023-0021等)
|
||
$dateRelatedPattern = '~
|
||
\b(?:20\d{2}|20\d{2}(0[1-9]|1[0-2])|20\d{2}-00\d{2})\b(?!\s*[A-Za-z]|\.)
|
||
~ux';
|
||
if (@preg_match($dateRelatedPattern, '') !== false) {
|
||
$correctedContent = preg_replace_callback(
|
||
$dateRelatedPattern,
|
||
function ($matches) use (&$dateRelatedPlaceholders, $originalContent, &$searchOffset) {
|
||
$fullMatch = $matches[0];
|
||
$placeholder = '___DATE_PROTECT_' . uniqid() . '___';
|
||
$dateRelatedPlaceholders[$placeholder] = $fullMatch;
|
||
$posStart = strpos($originalContent, $fullMatch, $searchOffset);
|
||
$searchOffset = $posStart !== false ? $posStart + strlen($fullMatch) : $searchOffset;
|
||
return $placeholder;
|
||
},
|
||
$correctedContent
|
||
);
|
||
}
|
||
|
||
//6. 保护0.00Ac类格式(如1.20mL、0.50mg,避免误删末尾零)
|
||
$decimalAlphaPattern = '~
|
||
\b(?:\d+\.\d+[A-Za-z]+|\d+\.[A-Za-z]+)\b(?!\s*[0-9.])
|
||
~ux';
|
||
if (@preg_match($decimalAlphaPattern, '') !== false) {
|
||
$correctedContent = preg_replace_callback(
|
||
$decimalAlphaPattern,
|
||
function ($matches) use (&$decimalAlphaPlaceholders, $originalContent, &$searchOffset) {
|
||
$fullMatch = $matches[0];
|
||
$placeholder = '___DECIMAL_ALPHA_' . uniqid() . '___';
|
||
$decimalAlphaPlaceholders[$placeholder] = $fullMatch;
|
||
$posStart = strpos($originalContent, $fullMatch, $searchOffset);
|
||
$searchOffset = $posStart !== false ? $posStart + strlen($fullMatch) : $searchOffset;
|
||
return $placeholder;
|
||
},
|
||
$correctedContent
|
||
);
|
||
}
|
||
|
||
//7. 保护通用前缀格式(如ID 123、REF AB456)
|
||
$universalPrefixPattern = '~
|
||
(?:^|\s|\()
|
||
(?:(?!No\.|NO\.|PO|SO|SN|BN|REF|ORD|ID|PID)[A-Za-z]{1,3}(?:s?\.?))
|
||
\s*
|
||
(?:[A-Za-z]+\d+|\d+[A-Za-z]+|[A-Za-z]+\d+[A-Za-z]+|\d{1,3}(?:,\d{3})*|\d+)
|
||
(?:$|\s|\)|\,|\.)
|
||
~ux';
|
||
if (@preg_match($universalPrefixPattern, '') !== false) {
|
||
$correctedContent = preg_replace_callback(
|
||
$universalPrefixPattern,
|
||
function ($matches) use (&$prefixFormatPlaceholders, $originalContent, &$searchOffset) {
|
||
$fullMatch = $matches[0];
|
||
$placeholder = '___UNIVERSAL_PREFIX_' . uniqid() . '___';
|
||
$prefixFormatPlaceholders[$placeholder] = $fullMatch;
|
||
$posStart = strpos($originalContent, $fullMatch, $searchOffset);
|
||
$searchOffset = $posStart !== false ? $posStart + strlen($fullMatch) : $searchOffset;
|
||
return $placeholder;
|
||
},
|
||
$correctedContent
|
||
);
|
||
}
|
||
|
||
// 保护URL/DOI(避免链接中的数字被误加千分位)
|
||
$urlDoiPattern = '#([^\w]|^)(https?://[^<>\s]+|doi:\s{0,10}\d{1,10}\.\d{1,10}/[A-Za-z0-9]{1,30})([^\w]|$)#i';
|
||
if (@preg_match($urlDoiPattern, '') !== false) {
|
||
$correctedContent = preg_replace_callback(
|
||
$urlDoiPattern,
|
||
function ($matches) use (&$urlDoiPlaceholders, $originalContent, &$searchOffset) {
|
||
$fullMatch = $matches[0];
|
||
$placeholder = '___URL_DOI_' . uniqid() . '___';
|
||
$urlDoiPlaceholders[$placeholder] = $fullMatch;
|
||
$posStart = strpos($originalContent, $fullMatch, $searchOffset);
|
||
$searchOffset = $posStart !== false ? $posStart + strlen($fullMatch) : $searchOffset;
|
||
return $placeholder;
|
||
},
|
||
$correctedContent
|
||
);
|
||
}
|
||
|
||
// 小数零处理(仅删除普通小数的无效零,跳过特殊格式)
|
||
$decimalTrailingZeroPattern = '~(-?\d+\.\d*[1-9])0+(?!\d|e|E|___DATE_PROTECT_|___DECIMAL_ALPHA_|___UNIVERSAL_PREFIX_|No\.|PO|SO|___SPECIAL_DECIMAL_|___SOFTWARE_VERSION_|___POSTAL_CODE_|___BRACKETED_NUM_|\-|\+|[A-Za-z])~ix';
|
||
preg_match_all($decimalTrailingZeroPattern, $correctedContent, $trailingMatches);
|
||
foreach (array_unique($trailingMatches[0]) as $number) {
|
||
if (strpos($number, '___POSTAL_CODE_') !== false || strpos($number, '___BRACKETED_NUM_') !== false) {
|
||
continue;
|
||
}
|
||
if (preg_match($decimalTrailingZeroPattern, $number, $numMatch)) {
|
||
$replacements[$number] = $numMatch[1];
|
||
$posStart = strpos($originalContent, $number, $searchOffset);
|
||
$posEnd = $posStart !== false ? $posStart + strlen($number) : -1;
|
||
$searchOffset = $posEnd !== -1 ? $posEnd : $searchOffset;
|
||
$currentCorrected = strtr($originalContent, $replacements);
|
||
$errors[] = $this->createError(
|
||
$number, $numMatch[1], "删除普通小数后末尾无效零",
|
||
$originalContent, $currentCorrected, $posStart, $posEnd, 'invalid_zero'
|
||
);
|
||
}
|
||
}
|
||
|
||
$decimalAllZeroPattern = '~(-?\d+)\.0+(?!\d|e|E|___DATE_PROTECT_|___DECIMAL_ALPHA_|___UNIVERSAL_PREFIX_|No\.|PO|SO|___SPECIAL_DECIMAL_|___SOFTWARE_VERSION_|___POSTAL_CODE_|___BRACKETED_NUM_|\-|\+|[A-Za-z])~ix';
|
||
preg_match_all($decimalAllZeroPattern, $correctedContent, $allZeroMatches);
|
||
foreach (array_unique($allZeroMatches[0]) as $number) {
|
||
if (strpos($number, '___POSTAL_CODE_') !== false || strpos($number, '___BRACKETED_NUM_') !== false) {
|
||
continue;
|
||
}
|
||
if (preg_match($decimalAllZeroPattern, $number, $numMatch)) {
|
||
$replacements[$number] = $numMatch[1];
|
||
$posStart = strpos($originalContent, $number, $searchOffset);
|
||
$posEnd = $posStart !== false ? $posStart + strlen($number) : -1;
|
||
$searchOffset = $posEnd !== -1 ? $posEnd : $searchOffset;
|
||
$currentCorrected = strtr($originalContent, $replacements);
|
||
$errors[] = $this->createError(
|
||
$number, $numMatch[1], "删除普通小数后全量无效零",
|
||
$originalContent, $currentCorrected, $posStart, $posEnd, 'invalid_zero'
|
||
);
|
||
}
|
||
}
|
||
$correctedContent = strtr($correctedContent, $replacements);
|
||
|
||
// 千分位处理
|
||
$excludePatterns = implode('|', [
|
||
'https?://[^<>\s]+|doi:\s{0,10}\d{1,10}\.\d{1,10}/[A-Za-z0-9]{1,30}',
|
||
'20\d{2}(?:0[1-9]|1[0-2])?(?:0[1-9]|[12]\d|3[01])?|20\d{2}-00\d{2}',
|
||
'\d+\.\d+[A-Za-z]+|\d+\.[A-Za-z]+',
|
||
'\(\d+[-\d]*\d+\)',
|
||
'(?:[A-Za-z]+(?:\s+[A-Za-z]+)*|[\x{4e00}-\x{9fa5}]+)\s+\d{4,6}\b|0\d{2,3}\d{7}\b',
|
||
'(?:[A-Za-z]+(?:\s+[A-Za-z]+)*|[\x{4e00}-\x{9fa5}]+(?:\s+[\x{4e00}-\x{9fa5}]+)*)\s+\d+\.\d+(?:\.\d+)*',
|
||
'[A-Za-z]{1,3}s?\.?\s*(?:[A-Za-z]+\d+|\d+[A-Za-z]+|\d{1,3}(?:,\d{3})*|\d+)',
|
||
'No\.?\s*\d+|PO\s*\d+|SO\s*\d+|SN\s*\d+',
|
||
'a=\s*[\d+\.\d+[A-Za-z]+\d*\-+]+',
|
||
'___DATE_PROTECT_.*?___|___DECIMAL_ALPHA_.*?___|___UNIVERSAL_PREFIX_.*?___|___URL_DOI_.*?___|___SPECIAL_DECIMAL_.*?___|___SOFTWARE_VERSION_.*?___|___POSTAL_CODE_.*?___|___BRACKETED_NUM_.*?___'
|
||
]);
|
||
$thousandPattern = sprintf(
|
||
'#(?<!\.)\b(?!(?:%s))\d{7,}\b(?!\.)#ixu', // 仅7位及以上纯整数添加千分位(避免6位批号误处理)
|
||
str_replace('#', '\#', $excludePatterns)
|
||
);
|
||
|
||
if (@preg_match($thousandPattern, '') !== false) {
|
||
$correctedContent = preg_replace_callback(
|
||
$thousandPattern,
|
||
function ($matches) use (&$replacements, $originalContent, &$searchOffset, &$errors) {
|
||
$original = $matches[0];
|
||
if (preg_match('~20\d{2}(0[1-9]|1[0-2])?(0[1-9]|[12]\d|3[01])?|20\d{2}-00\d{2}|(?:[A-Za-z]+|[\x{4e00}-\x{9fa5}]+)\s*\d{4,6}\b|0\d{2,3}\d{7}\b~u', $original)) {
|
||
return $original;
|
||
}
|
||
$isProtected = strpos($original, '___DATE_PROTECT_') !== false
|
||
|| strpos($original, '___DECIMAL_ALPHA_') !== false
|
||
|| strpos($original, '___UNIVERSAL_PREFIX_') !== false
|
||
|| strpos($original, '___SPECIAL_DECIMAL_') !== false
|
||
|| strpos($original, '___SOFTWARE_VERSION_') !== false
|
||
|| strpos($original, '___POSTAL_CODE_') !== false
|
||
|| strpos($original, '___BRACKETED_NUM_') !== false
|
||
|| strpos($original, 'No.') !== false
|
||
|| strpos($original, 'PO') !== false
|
||
|| strpos($original, 'SO') !== false;
|
||
if (isset($replacements[$original]) || strpos($original, ',') !== false || $isProtected) {
|
||
return $original;
|
||
}
|
||
$formatted = number_format($original);
|
||
$replacements[$original] = $formatted;
|
||
$posStart = strpos($originalContent, $original, $searchOffset);
|
||
$posEnd = $posStart !== false ? $posStart + strlen($original) : -1;
|
||
$searchOffset = $posEnd !== -1 ? $posEnd : $searchOffset;
|
||
$currentCorrected = strtr($originalContent, $replacements);
|
||
$errors[] = $this->createError(
|
||
$original, $formatted, "四位及以上的数字需要每三位加一个逗号",
|
||
$originalContent, $currentCorrected, $posStart, $posEnd, 'thousandth_separator'
|
||
);
|
||
return $formatted;
|
||
},
|
||
$correctedContent
|
||
);
|
||
}
|
||
|
||
// 恢复所有保护内容(按优先级反向,避免相互干扰)
|
||
$correctedContent = strtr($correctedContent, $bracketedNumPlaceholders);
|
||
$correctedContent = strtr($correctedContent, $postalCodePlaceholders);
|
||
$correctedContent = strtr($correctedContent, $softwareVersionPlaceholders);
|
||
$correctedContent = strtr($correctedContent, $specialDecimalPlaceholders);
|
||
$correctedContent = strtr($correctedContent, $dateRelatedPlaceholders);
|
||
$correctedContent = strtr($correctedContent, $decimalAlphaPlaceholders);
|
||
$correctedContent = strtr($correctedContent, $prefixFormatPlaceholders);
|
||
$correctedContent = strtr($correctedContent, $urlDoiPlaceholders);
|
||
|
||
// 清理残留占位符(防止异常情况下占位符未替换)
|
||
$correctedContent = preg_replace('~___(BRACKETED_NUM|POSTAL_CODE|SOFTWARE_VERSION|SPECIAL_DECIMAL|DATE_PROTECT|DECIMAL_ALPHA|UNIVERSAL_PREFIX|URL_DOI)_.*?___~', '', $correctedContent);
|
||
|
||
$this->handleErrors($errors);
|
||
return is_string($correctedContent) ? $correctedContent : $defaultReturn;
|
||
}
|
||
/**
|
||
* No. 123456格式统一
|
||
*/
|
||
private function checkNoFormatUniformity($content) {
|
||
$errors = [];
|
||
if (!is_string($content) || trim($content) === '') {
|
||
$this->handleErrors($errors);
|
||
return $content;
|
||
}
|
||
|
||
$corrected = $content;
|
||
$replaceMap = [];
|
||
$originalContent = $corrected;
|
||
$searchOffset = 0;
|
||
|
||
// 关键:精准排除规则
|
||
$postalCodePattern = '~(?:[A-Za-z]+(?:\s+[A-Za-z]+)*|[\x{4e00}-\x{9fa5}]+)\s+\d{4,6}\b~u'; // 邮编
|
||
$areaCodePattern = '~0\d{2,3}\d{7}\b~u'; // 区号
|
||
$urlPattern = '~https?://[^<>\s]+~i'; // URL(如https://test.com/10.1101/2024.11.10)
|
||
$doiPattern = '~doi:\s{0,10}\d{1,10}\.\d{1,10}/[A-Za-z0-9\-_]+~i'; // DOI(如doi:11.1/1-1-1-1-9_2)
|
||
|
||
$batchNumberRules = [
|
||
[
|
||
'name' => 'No.前缀批号',
|
||
'pattern' => '~
|
||
\b
|
||
(?:[Nn][Oo]\.|[Nn][Oo]|NO\.|NO)
|
||
\s*
|
||
(\d+[A-Za-z0-9\-_]*)
|
||
\b
|
||
(?!\s*[年月日]|20\d{2}(?:0[1-9]|1[0-2])?|\.\d+|20\d{2}-00\d{2}
|
||
|https?://[^<>\s]+ # 排除URL
|
||
|doi:\s{0,10}\d{1,10}\.\d{1,10}/[A-Za-z0-9\-_]+) # 排除DOI
|
||
~ux',
|
||
'standardPrefix' => 'No.',
|
||
'spaceAfterPrefix' => true,
|
||
'description' => '带No.前缀的编号(如No. 123、NO.45-A)'
|
||
],
|
||
[
|
||
'name' => '业务前缀批号',
|
||
'pattern' => '~
|
||
\b
|
||
(PO|SO|SN|BN|REF|ORD|ID|PID)
|
||
\s*
|
||
(\d+[A-Za-z0-9\-_]*)
|
||
\b
|
||
(?!\s*[年月日]|20\d{2}(?:0[1-9]|1[0-2])?|\.\d+|20\d{2}-00\d{2}
|
||
|https?://[^<>\s]+ # 排除URL
|
||
|doi:\s{0,10}\d{1,10}\.\d{1,10}/[A-Za-z0-9\-_]+) # 排除DOI
|
||
~iux',
|
||
'standardPrefix' => function($match) {
|
||
return strtoupper($match[1]);
|
||
},
|
||
'spaceAfterPrefix' => true,
|
||
'description' => '带业务前缀的编号'
|
||
],
|
||
// [
|
||
// 'name' => '多段式批号',
|
||
// 'pattern' => '~
|
||
// \b
|
||
// (?:\d+[A-Za-z]?[-_/])+
|
||
// \d+[A-Za-z]?
|
||
// \b
|
||
// (?!\s*[年月日]|20\d{2}(?:0[1-9]|1[0-2])?|20\d{2}-00\d{2}
|
||
// |\d+\.\d+[A-Za-z]+
|
||
// |https?://[^<>\s]+ # 排除URL
|
||
// |doi:\s{0,10}\d{1,10}\.\d{1,10}/[A-Za-z0-9\-_]+) # 排除DOI
|
||
// ~ux',
|
||
// 'standardize' => function($original) use ($postalCodePattern, $areaCodePattern, $urlPattern, $doiPattern) {
|
||
// // 排除URL、DOI、邮编、区号、日期
|
||
// if (preg_match('~20\d{2}(0[1-9]|1[0-2])(0[1-9]|[12]\d|3[01])?|20\d{2}-00\d{2}~', $original)
|
||
// || preg_match($postalCodePattern, $original)
|
||
// || preg_match($areaCodePattern, $original)
|
||
// || preg_match($urlPattern, $original)
|
||
// || preg_match($doiPattern, $original)) {
|
||
// return $original;
|
||
// }
|
||
// return preg_replace(['~[-_/]+~', '~\s+~'], ['-', ''], $original);
|
||
// },
|
||
// 'description' => '多段式编号(如2023-AB-123、XY_456-78)'
|
||
// ],
|
||
[
|
||
'name' => '混合批号',
|
||
'pattern' => '~
|
||
\b
|
||
(?:
|
||
\d{6,}(?!20\d{2}(0[1-9]|1[0-2])(0[1-9]|[12]\d|3[01])?|20\d{2}-00\d{2}
|
||
|(?<=[A-Za-z\s])\d{4,6}\b # 排除邮编
|
||
|0\d{2,3}\d{7}\b # 排除区号
|
||
|https?://[^<>\s]+ # 排除URL
|
||
|doi:\s{0,10}\d{1,10}\.\d{1,10}/[A-Za-z0-9\-_]+) # 排除DOI
|
||
|[A-Za-z]{2,}\d{4,}
|
||
|[A-Za-z0-9]{8,}(?!20\d{2}(0[1-9]|1[0-2])(0[1-9]|[12]\d|3[01])?|20\d{2}-00\d{2}
|
||
|https?://[^<>\s]+ # 排除URL
|
||
|doi:\s{0,10}\d{1,10}\.\d{1,10}/[A-Za-z0-9\-_]+) # 排除DOI
|
||
)
|
||
\b
|
||
(?!\s*[年月日])
|
||
(?!\.\d+)
|
||
(?!\d+\.\d+[A-Za-z]+)
|
||
(?!(?:^|\s|\()(?:[A-Za-z]{1,3}(?:s?\.?))\s*)
|
||
~ux',
|
||
'standardize' => function($original) use ($postalCodePattern, $areaCodePattern, $urlPattern, $doiPattern) {
|
||
// 排除URL、DOI、邮编、区号、日期
|
||
if (preg_match('~20\d{2}(0[1-9]|1[0-2])(0[1-9]|[12]\d|3[01])?|20\d{2}-00\d{2}~', $original)
|
||
|| preg_match($postalCodePattern, $original)
|
||
|| preg_match($areaCodePattern, $original)
|
||
|| preg_match($urlPattern, $original)
|
||
|| preg_match($doiPattern, $original)) {
|
||
return $original;
|
||
}
|
||
return ctype_digit($original) ? $original : $original;
|
||
},
|
||
'description' => '纯数字/字母混合编号'
|
||
]
|
||
];
|
||
|
||
foreach ($batchNumberRules as $rule) {
|
||
if (@preg_match($rule['pattern'], '') === false) continue;
|
||
if (preg_match_all($rule['pattern'], $corrected, $matches, PREG_SET_ORDER)) {
|
||
foreach ($matches as $match) {
|
||
$originalFull = $match[0];
|
||
$fixedFull = $originalFull;
|
||
|
||
// 核心排除逻辑:新增URL和DOI的判断
|
||
if (preg_match($postalCodePattern, $originalFull)
|
||
|| preg_match($areaCodePattern, $originalFull)
|
||
|| preg_match('~20\d{2}(0[1-9]|1[0-2])(0[1-9]|[12]\d|3[01])?|20\d{2}-00\d{2}~', $originalFull)
|
||
|| preg_match($urlPattern, $originalFull) // 跳过URL
|
||
|| preg_match($doiPattern, $originalFull)) { // 跳过DOI
|
||
continue;
|
||
}
|
||
|
||
if (isset($rule['standardPrefix'])) {
|
||
preg_match($rule['pattern'], $originalFull, $parts);
|
||
$body = $parts[1];
|
||
$standardPrefix = is_callable($rule['standardPrefix']) ? $rule['standardPrefix']($parts) : $rule['standardPrefix'];
|
||
$space = $rule['spaceAfterPrefix'] ? ' ' : '';
|
||
$fixedFull = $standardPrefix . $space . $body;
|
||
} elseif (isset($rule['standardize']) && is_callable($rule['standardize'])) {
|
||
$fixedFull = $rule['standardize']($originalFull);
|
||
}
|
||
|
||
if ($originalFull !== $fixedFull && !isset($replaceMap[$originalFull])) {
|
||
$replaceMap[$originalFull] = $fixedFull;
|
||
$posStart = strpos($originalContent, $originalFull, $searchOffset);
|
||
$posEnd = $posStart !== false ? $posStart + strlen($originalFull) : -1;
|
||
$searchOffset = $posEnd !== -1 ? $posEnd : $searchOffset;
|
||
$errorHash = md5($originalFull . $fixedFull);
|
||
$errors[$errorHash] = $this->createError(
|
||
$originalFull, $fixedFull,
|
||
"{$rule['description']}格式不规范,标准格式为「{$fixedFull}」",
|
||
$originalContent, strtr($originalContent, $replaceMap),
|
||
$posStart, $posEnd, $rule['name']
|
||
);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
$corrected = !empty($replaceMap) ? strtr($corrected, $replaceMap) : $corrected;
|
||
$this->handleErrors($errors);
|
||
return $corrected;
|
||
}
|
||
/**
|
||
* 时间单位缩写校对
|
||
*/
|
||
private function checkTimeUnitAbbreviations($content) {
|
||
$errors = [];
|
||
if (!is_string($content) || trim($content) === '') {
|
||
$this->handleErrors($errors);
|
||
return $content;
|
||
}
|
||
|
||
$corrected = $content;
|
||
$replaceMap = [];
|
||
$originalContent = $corrected;
|
||
$searchOffset = 0;
|
||
|
||
// 定义时间单位规则
|
||
$timeUnits = [
|
||
[
|
||
'full' => 'hour',
|
||
'plural' => 'hours',
|
||
'abbr' => 'h',
|
||
'description' => '小时',
|
||
'cn_full' => '小时', // 中文全称
|
||
'cn_plural' => '小时' // 中文单复数同形
|
||
],
|
||
[
|
||
'full' => 'minute',
|
||
'plural' => 'minutes',
|
||
'abbr' => 'min',
|
||
'description' => '分钟',
|
||
'cn_full' => '分钟',
|
||
'cn_plural' => '分钟'
|
||
],
|
||
[
|
||
'full' => 'second',
|
||
'plural' => 'seconds',
|
||
'abbr' => 's',
|
||
'description' => '秒',
|
||
'cn_full' => '秒',
|
||
'cn_plural' => '秒'
|
||
]
|
||
];
|
||
|
||
foreach ($timeUnits as $unit) {
|
||
$pattern = "~
|
||
(?<! # 左侧严格排除非时间场景
|
||
[A-Za-z_\/-] # 排除字母/下划线/斜杠/短横线(如Fig.、v5.0、2023-LN)
|
||
|\d+[A-Za-z_\/-] # 排除数字+符号/字母(如4H、12A、5_min)
|
||
|[\((] \# 排除左括号(如(Fig. 4h、(5h))
|
||
|[::] \# 排除冒号(如Time: 5h、时间:5h)
|
||
)
|
||
(\d+(?:\.\d+)?) \# 数字部分(支持整数/小数,如5、2.5)
|
||
(?:\s+|) \# 数字与单位间可选空格(如5 h / 5h)
|
||
( \# 单位部分:仅匹配时间相关单位
|
||
{$unit['full']}s? \# 英文单复数(hour/hours)
|
||
|ucfirst({$unit['full']})s? \# 英文首字母大写(Hour/Hours)
|
||
|{$unit['abbr']}|" . strtoupper($unit['abbr']) . " \# 英文缩写(h/H、min/MIN)
|
||
|{$unit['cn_full']} \# 中文单位(小时、分钟)
|
||
)
|
||
(?! \# 右侧严格排除非时间场景
|
||
[A-Za-z_\/-] \# 排除字母/符号(如5hA、5h_min)
|
||
|\d+ \# 排除后续数字(如5h30m,需单独处理多单位场景)
|
||
|[\))] # 排除右括号(如5h)、5h))
|
||
|[,,.] # 排除标点(如5h,、5h.)
|
||
)
|
||
\b # 单词边界:确保单位完整(如5h不匹配5hour)
|
||
~iux"; // i:不区分大小写,u:支持Unicode(中文),x:忽略正则空格
|
||
|
||
// 正则有效性校验
|
||
if (@preg_match($pattern, '') === false) {
|
||
continue;
|
||
}
|
||
|
||
// 仅匹配纯时间场景,排除所有干扰
|
||
if (preg_match_all($pattern, $corrected, $matches, PREG_SET_ORDER)) {
|
||
foreach ($matches as $match) {
|
||
$original = $match[0]; // 原始内容(如"5 Hour"、"3 分钟"、"2.5 S")
|
||
$number = $match[1]; // 数字部分
|
||
$unitPart = $match[2]; // 单位部分
|
||
$fixed = $number . strtolower($unit['abbr']); // 标准格式(5h、3min、2.5s)
|
||
|
||
// 仅处理非标准格式
|
||
if ($original !== $fixed) {
|
||
// 细化错误原因
|
||
if (stripos($unitPart, $unit['full']) !== false || strpos($unitPart, $unit['cn_full']) !== false) {
|
||
$errorReason = "应使用缩写'{$unit['abbr']}'(不使用全称'{$unitPart}')";
|
||
} elseif (strpos($original, ' ') !== false) {
|
||
$errorReason = "数字与单位间不应有空格";
|
||
} else {
|
||
$errorReason = "单位缩写应小写'{$unit['abbr']}'(不使用'{$unitPart}')";
|
||
}
|
||
|
||
// 计算位置(避免重复定位)
|
||
$posStart = strpos($originalContent, $original, $searchOffset);
|
||
$posEnd = ($posStart !== false) ? $posStart + strlen($original) : -1;
|
||
$searchOffset = ($posEnd !== -1) ? $posEnd : $searchOffset + strlen($original);
|
||
|
||
// 错误去重
|
||
$errorHash = md5($original . $fixed);
|
||
$errorType = $unit['full'];
|
||
if (!isset($errors[$errorHash])) {
|
||
$errors[$errorHash] = $this->createError(
|
||
$original,
|
||
$fixed,
|
||
"{$unit['description']}格式不规范:{$errorReason},标准格式为[数字{$unit['abbr']}](如3h、2.5min)",
|
||
$originalContent,
|
||
strtr($originalContent, $replaceMap + [$original => $fixed]),
|
||
$posStart,
|
||
$posEnd,
|
||
$errorType
|
||
);
|
||
}
|
||
|
||
// 记录替换映射
|
||
if (!isset($replaceMap[$original])) {
|
||
$replaceMap[$original] = $fixed;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// 批量替换并处理错误
|
||
if (!empty($replaceMap)) {
|
||
$corrected = strtr($corrected, $replaceMap);
|
||
}
|
||
$this->handleErrors($errors);
|
||
|
||
return $corrected;
|
||
}
|
||
|
||
|
||
/**
|
||
* 毫升单位校对
|
||
*/
|
||
private function checkMlUnit($content) {
|
||
$errors = [];
|
||
if (!is_string($content) || trim($content) === '') {
|
||
$this->handleErrors($errors);
|
||
return $content;
|
||
}
|
||
|
||
$corrected = $content;
|
||
$replaceMap = [];
|
||
$originalContent = $corrected; // 保存完整原始内容
|
||
$searchOffset = 0; // 用于计算错误位置的偏移量(避免重复定位)
|
||
|
||
// 优化正则规则:
|
||
// 1. 排除字母后接ML(如Yeh ML,ML为人名缩写)
|
||
// 2. 精准匹配毫升单位(支持数字前缀如“5ml”“3.0 ML”,或纯单位如“ml”“ML”)
|
||
$mlPattern = '/
|
||
(?<!\p{L}) # 左侧排除任意字母(避免Yeh ML、Smith ML等人名场景)
|
||
(\d+(?:\.\d+)?\s*)? # 可选数字前缀(支持整数/小数,如5、3.0,后接可选空格)
|
||
(ml) # 毫升单位(不区分大小写,后续统一转小写判断)
|
||
(?!\p{L}) # 右侧排除任意字母(避免MLabc等非单位场景)
|
||
/iu'; // u修饰符支持Unicode字母,i修饰符不区分大小写
|
||
|
||
// 正则有效性校验
|
||
if (@preg_match($mlPattern, '') === false) {
|
||
|
||
} elseif (preg_match_all($mlPattern, $corrected, $allMatches, PREG_SET_ORDER)) {
|
||
foreach ($allMatches as $matchItem) {
|
||
$originalFull = $matchItem[0]; // 原始错误内容(如 "5ml"、" ML"、"2.5 mL")
|
||
$prefix = $matchItem[1] ?? ''; // 数字前缀(如 "5"、"3.0 "、"")
|
||
$originalUnit = strtolower($matchItem[2]); // 单位部分(统一转小写为"ml")
|
||
|
||
// 标准毫升单位格式(L大写为"mL")
|
||
$fixedFull = "{$prefix}mL";
|
||
$errorType = 'mL';
|
||
|
||
// 仅处理与标准格式不一致的场景(避免无意义替换)
|
||
if ($originalFull !== $fixedFull) {
|
||
// 计算错误内容在原始文本中的精准位置(基于偏移量避免重复)
|
||
$posStart = strpos($originalContent, $originalFull, $searchOffset);
|
||
$posEnd = ($posStart !== false) ? $posStart + strlen($originalFull) : -1;
|
||
$searchOffset = ($posEnd !== -1) ? $posEnd : $searchOffset + strlen($originalFull);
|
||
|
||
// 错误去重(通过“原始内容+修正内容”的哈希避免重复记录)
|
||
$errorHash = md5($originalFull . $fixedFull);
|
||
if (!isset($errors[$errorHash])) {
|
||
$errors[$errorHash] = $this->createError(
|
||
$originalFull,
|
||
$fixedFull,
|
||
'毫升单位格式不规范,标准写法为[mL]',
|
||
$originalContent,
|
||
strtr($originalContent, $replaceMap + [$originalFull => $fixedFull]),
|
||
$posStart,
|
||
$posEnd,
|
||
$errorType
|
||
);
|
||
}
|
||
|
||
// 记录替换映射(去重,避免同一内容多次替换)
|
||
if (!isset($replaceMap[$originalFull])) {
|
||
$replaceMap[$originalFull] = $fixedFull;
|
||
}
|
||
}
|
||
}
|
||
|
||
// 批量替换所有不规范单位(高效处理,避免循环替换)
|
||
if (!empty($replaceMap)) {
|
||
$corrected = strtr($corrected, $replaceMap);
|
||
}
|
||
}
|
||
|
||
$this->handleErrors($errors);
|
||
return $corrected;
|
||
}
|
||
|
||
|
||
/**
|
||
* 显著性P斜体校对
|
||
*/
|
||
private function checkPSignificance($content) {
|
||
$errors = [];
|
||
if (!is_string($content) || trim($content) === '') {
|
||
$this->handleErrors($errors);
|
||
return $content;
|
||
}
|
||
|
||
$corrected = $content;
|
||
$replaceMap = [];
|
||
$originalContent = $corrected; // 保存完整原始内容
|
||
$searchOffset = 0; // 用于计算错误位置的偏移量(避免重复定位)
|
||
|
||
// 优化正则规则(覆盖P/p全场景,支持科学计数法)
|
||
$pValuePattern = '/\b([Pp])(\s*=?\s*)(\d+(?:\.\d+)?(?:[eE][+-]?\d+)?)\b/';
|
||
|
||
// 正则有效性校验
|
||
if (@preg_match($pValuePattern, '') === false) {
|
||
|
||
} elseif (preg_match_all($pValuePattern, $corrected, $allMatches, PREG_SET_ORDER)) {
|
||
foreach ($allMatches as $matchItem) {
|
||
$original = $matchItem[0]; // 原始P值内容(如 "P=0.05"、"p < 0.01")
|
||
$pChar = $matchItem[1]; // P/p字符(如 "P"、"p")
|
||
$separator = $matchItem[2];// 分隔符(如 "="、" < ")
|
||
$number = $matchItem[3]; // 数值部分(如 "0.05"、"1.2e-3")
|
||
|
||
// 生成修正内容(仅P/p加斜体)
|
||
$fixed = "<i>{$pChar}</i>{$separator}{$number}";
|
||
|
||
// 仅处理有变化的场景
|
||
if ($original !== $fixed) {
|
||
// 计算原始P值内容在完整原始文本中的位置
|
||
$posStart = strpos($originalContent, $original, $searchOffset);
|
||
$posEnd = ($posStart !== false) ? $posStart + strlen($original) : -1;
|
||
$searchOffset = ($posEnd !== -1) ? $posEnd : $searchOffset + strlen($original); // 更新偏移量
|
||
|
||
// 错误去重(哈希机制)
|
||
$errorHash = md5($original . $fixed);
|
||
$errorType = 'P';
|
||
if (!isset($errors[$errorHash])) {
|
||
$errors[$errorHash] = $this->createError(
|
||
$original,
|
||
$fixed,
|
||
'显著性P值格式不规范,P/p应使用斜体',
|
||
$originalContent,
|
||
strtr($originalContent, $replaceMap + [$original => $fixed]),
|
||
$posStart,
|
||
$posEnd,
|
||
$errorType
|
||
);
|
||
}
|
||
|
||
// 记录替换映射(去重)
|
||
if (!isset($replaceMap[$original])) {
|
||
$replaceMap[$original] = $fixed;
|
||
}
|
||
}
|
||
}
|
||
|
||
// 批量替换
|
||
if (!empty($replaceMap)) {
|
||
$corrected = strtr($corrected, $replaceMap);
|
||
}
|
||
}
|
||
|
||
$this->handleErrors($errors);
|
||
return $corrected;
|
||
}
|
||
|
||
/**
|
||
* 图表标题一律使用全称Figure 1, Table 1.不能写成Fig. 1, Tab 1.
|
||
*/
|
||
private function checkFigureTableTitle($content) {
|
||
$errors = [];
|
||
// 严格输入验证:空内容/非字符串直接返回
|
||
if (!is_string($content) || trim($content) === '') {
|
||
$this->handleErrors($errors);
|
||
return $content;
|
||
}
|
||
|
||
$corrected = $content;
|
||
$replaceMap = [];
|
||
$originalContent = $corrected; // 备份原始内容,用于错误信息
|
||
$searchOffset = 0; // 错误位置计算偏移量,避免重复定位
|
||
|
||
// 图表标题匹配正则(支持 Fig/Figs/Tab/Tabs、特殊空格、数字范围)
|
||
$titlePattern = '/(?<!\w)(Fig|Figs|Tab|Tabs)(\.?)(\s*|\u00A0|\u0020)(\d+(?:[-\u2013\u2014]\d+)?)(?!\w)/iu';
|
||
|
||
// 正则有效性校验
|
||
if (@preg_match($titlePattern, '') === false) {
|
||
|
||
} else {
|
||
// 全局匹配所有图表标题格式
|
||
$matchCount = preg_match_all($titlePattern, $corrected, $allMatches, PREG_SET_ORDER);
|
||
|
||
if ($matchCount > 0) {
|
||
foreach ($allMatches as $matchItem) {
|
||
$originalFull = $matchItem[0]; // 完整错误片段(如 "Fig 1"、"Tabs-2")
|
||
$abbrBase = $matchItem[1]; // 缩写主体(Fig/Figs/Tab/Tabs)
|
||
$dot = $matchItem[2]; // 可能的点(.)
|
||
$space = $matchItem[3]; // 可能的空格(含特殊空格)
|
||
$number = $matchItem[4]; // 数字部分(支持范围如 "2-3")
|
||
|
||
// 确定全称及错误描述
|
||
switch (strtolower($abbrBase)) {
|
||
case 'fig':
|
||
$fullName = 'Figure';
|
||
$errorDesc = '图表标题使用缩写"Fig",正确:"Figure"';
|
||
break;
|
||
case 'figs':
|
||
$fullName = 'Figures';
|
||
$errorDesc = '图表标题复数使用缩写"Figs",正确:"Figures"';
|
||
break;
|
||
case 'tab':
|
||
$fullName = 'Table';
|
||
$errorDesc = '表格标题使用缩写"Tab",正确:"Table"';
|
||
break;
|
||
case 'tabs':
|
||
$fullName = 'Tables';
|
||
$errorDesc = '表格标题复数使用缩写"Tabs",正确:"Tables"';
|
||
break;
|
||
default:
|
||
$fullName = '';
|
||
$errorDesc = '';
|
||
continue 2; // 修复警告:跳出 switch + 跳过当前 foreach 迭代
|
||
}
|
||
|
||
// 生成标准格式(全称 + 单个空格 + 数字)
|
||
$fixed = "{$fullName} {$number}";
|
||
|
||
// 仅处理需要修正的场景(避免无意义操作)
|
||
if ($originalFull !== $fixed) {
|
||
// 计算错误片段在原始文本中的位置
|
||
$posStart = strpos($originalContent, $originalFull, $searchOffset);
|
||
$posEnd = ($posStart !== false) ? $posStart + strlen($originalFull) : -1;
|
||
$searchOffset = ($posEnd !== -1) ? $posEnd : $searchOffset + strlen($originalFull);
|
||
|
||
// 错误信息去重(基于原始+修正内容哈希)
|
||
$errorHash = md5($originalFull . $fixed);
|
||
if (!isset($errors[$errorHash])) {
|
||
// 生成临时修正内容,用于错误信息预览
|
||
$tempReplace = $replaceMap;
|
||
$tempReplace[$originalFull] = $fixed;
|
||
$currentCorrected = strtr($originalContent, $tempReplace);
|
||
|
||
$errors[$errorHash] = $this->createError(
|
||
$originalFull,
|
||
$fixed,
|
||
$errorDesc,
|
||
$originalContent,
|
||
$currentCorrected,
|
||
$posStart,
|
||
$posEnd,
|
||
$fullName
|
||
);
|
||
}
|
||
|
||
// 记录替换规则(去重,避免重复替换)
|
||
if (!isset($replaceMap[$originalFull])) {
|
||
$replaceMap[$originalFull] = $fixed;
|
||
}
|
||
}
|
||
}
|
||
|
||
// 批量执行所有替换(高效处理)
|
||
if (!empty($replaceMap)) {
|
||
$corrected = strtr($corrected, $replaceMap);
|
||
}
|
||
}
|
||
}
|
||
|
||
// 处理错误信息(需确保 handleErrors 方法已实现)
|
||
$this->handleErrors($errors);
|
||
return $corrected;
|
||
}
|
||
|
||
|
||
|
||
/**
|
||
* 添加错误信息
|
||
*/
|
||
private function addError($error = []) {
|
||
if (!empty($error) && is_array($error)) {
|
||
// 确保错误信息结构完整
|
||
$safeError = array_merge([
|
||
'verbatim_texts' => '',
|
||
'revised_content' => '',
|
||
'explanation' => '',
|
||
'original' => '',
|
||
'corrected' => '',
|
||
'position_start' => '',
|
||
'position_end' => '',
|
||
], $error);
|
||
$this->errors[] = $safeError;
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 处理错误信息(去重和存储)
|
||
*/
|
||
private function handleErrors($errors) {
|
||
if (empty($errors)) return;
|
||
|
||
// 错误去重
|
||
$uniqueErrors = [];
|
||
foreach ($errors as $error) {
|
||
$errorHash = md5($error['verbatim_texts'] . $error['revised_content']. $error['position_start']. $error['position_end']);
|
||
if (!isset($uniqueErrors[$errorHash])) {
|
||
$uniqueErrors[$errorHash] = $error;
|
||
}
|
||
}
|
||
|
||
// 批量添加错误
|
||
foreach (array_values($uniqueErrors) as $error) {
|
||
$this->addError($error);
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 创建标准化错误信息
|
||
*/
|
||
private function createError($verbatim='', $revised='', $explanation='',$original='',$corrected='', $position_start=-1, $position_end=-1,$error_type='') {
|
||
return [
|
||
'verbatim_texts' => $verbatim,
|
||
'revised_content' => $revised,
|
||
'explanation' => $explanation,
|
||
'original' => $original,
|
||
'corrected' => $corrected,
|
||
'position_start' => $position_start,
|
||
'position_end' => $position_end,
|
||
'error_type' => $error_type
|
||
];
|
||
}
|
||
/**
|
||
* 检查doi链接是否都能打开
|
||
*/
|
||
private function checkDoi($content) {
|
||
$errors = [];
|
||
if (!is_string($content) || trim($content) === '') {
|
||
$this->handleErrors($errors); // 注意:原代码笔误“handleErrorsErrors”已修正
|
||
return $content;
|
||
}
|
||
|
||
$corrected = $content;
|
||
$originalContent = $corrected;
|
||
$checkedDois = []; // 用于去重,避免同一DOI重复校验
|
||
|
||
try {
|
||
// 优化正则:匹配标准DOI格式(覆盖所有常见场景)
|
||
// 匹配规则说明:
|
||
// 1. (?<!\w):非单词字符前缀(避免匹配类似“xdoi:10.1017/”的无效内容)
|
||
// 2. (doi):匹配“doi”(不区分大小写,通过/i修饰符实现)
|
||
// 3. :\s*:冒号后允许0个或多个空格(支持“doi:10.1017/”和“doi: 10.1017/”)
|
||
// 4. (\d+\.\d+\/[A-Za-z0-9\/\.\-\_]+):匹配DOI核心部分(如10.1017/abc、10.1038/nature12345)
|
||
// - \d+\.\d+:数字+小数点+数字(DOI前缀,如10.1017)
|
||
// - \/:斜杠(DOI分隔符)
|
||
// - [A-Za-z0-9\/\.\-\_]+:DOI后缀(允许字母、数字、斜杠、小数点、横线、下划线)
|
||
// 5. (?!\w):非单词字符后缀(避免匹配类似“10.1017/abcx”的无效内容)
|
||
$doiPattern = '/(?<!\w)(doi):\s*(\d+\.\d+\/[A-Za-z0-9\/\.\-\_]+)(?!\w)/iu';
|
||
|
||
// 正则有效性校验
|
||
if (@preg_match($doiPattern, '') === false) {
|
||
$errors[] = $this->createError(
|
||
'DOI正则错误',
|
||
'跳过DOI校验',
|
||
"DOI匹配正则语法错误:{$doiPattern},已跳过该校验流程",
|
||
$originalContent,
|
||
$corrected
|
||
);
|
||
} else {
|
||
// 匹配所有符合标准的DOI(PREG_SET_ORDER按匹配项分组)
|
||
$matchCount = preg_match_all($doiPattern, $corrected, $allMatches, PREG_SET_ORDER);
|
||
|
||
if ($matchCount > 0) {
|
||
foreach ($allMatches as $matchItem) {
|
||
$fullDoi = strtolower($matchItem[1]) . ':' . $matchItem[2]; // 完整DOI(统一转为小写,如“doi:10.1017/abc”)
|
||
$doiCore = $matchItem[2]; // DOI核心部分(如“10.1017/abc”,用于拼接访问链接)
|
||
|
||
// 去重:同一DOI仅校验一次
|
||
if (isset($checkedDois[$fullDoi])) {
|
||
continue;
|
||
}
|
||
$checkedDois[$fullDoi] = true;
|
||
|
||
// 测试DOI链接是否可访问
|
||
$isAccessible = $this->testDoiAccessibility($doiCore);
|
||
// 生成错误/状态信息
|
||
if ($isAccessible) {
|
||
$errorDesc = "DOI「{$fullDoi}」格式规范,且链接可正常访问";
|
||
} else {
|
||
$errorDesc = "DOI「{$fullDoi}」格式规范,但链接无法访问(可能无效或网络问题)";
|
||
}
|
||
|
||
// 记录校验结果(DOI无需修正,仅记录状态)
|
||
$errors[] = $this->createError(
|
||
$fullDoi,
|
||
$fullDoi, // 修正后内容与原始一致(DOI格式无需修改)
|
||
$errorDesc,
|
||
$originalContent,
|
||
$corrected
|
||
);
|
||
}
|
||
} else {
|
||
// 无匹配时记录提示(可选,根据业务需求决定是否保留)
|
||
$errors[] = $this->createError(
|
||
'未匹配到DOI',
|
||
'无修正',
|
||
'文本中未发现符合标准格式的DOI(如doi:10.1017/abc、DOI: 10.1038/nature12345)',
|
||
$originalContent,
|
||
$corrected
|
||
);
|
||
}
|
||
}
|
||
|
||
} catch (Exception $e) {
|
||
$errors[] = $this->createError(
|
||
'DOI校验全局异常',
|
||
'已回滚原始内容',
|
||
"DOI校验出错:{$e->getMessage()}(行号:{$e->getLine()}),已恢复原始输入",
|
||
$originalContent,
|
||
$originalContent
|
||
);
|
||
$corrected = $originalContent;
|
||
}
|
||
|
||
$this->handleErrors($errors);
|
||
return $corrected;
|
||
}
|
||
|
||
/**
|
||
* 测试DOI链接是否可访问(基于DOI官方解析地址)
|
||
* @param string $doiCore DOI核心部分(如“10.1017/abc”,不含“doi:”前缀)
|
||
* @return bool 可访问返回true,否则返回false
|
||
*/
|
||
private function testDoiAccessibility($doiCore) {
|
||
// 处理DOI核心部分的空格(若存在)
|
||
$doiCore = trim($doiCore);
|
||
// DOI官方解析地址:https://doi.org/ + 编码后的DOI核心部分
|
||
$doiUrl = 'https://doi.org/' . $doiCore;
|
||
var_dump($doiUrl,$doiCore);exit;
|
||
|
||
// 初始化cURL(支持HTTPS,忽略证书问题避免环境限制)
|
||
$ch = curl_init();
|
||
curl_setopt_array($ch, [
|
||
CURLOPT_URL => $doiUrl,
|
||
CURLOPT_RETURNTRANSFER => true, // 不直接输出响应
|
||
CURLOPT_HEADER => true, // 获取响应头(用于判断状态码)
|
||
CURLOPT_TIMEOUT => 15, // 超时时间(避免长时间阻塞)
|
||
CURLOPT_FOLLOWLOCATION => true, // 跟随301/302重定向(DOI常跳转到期刊页面)
|
||
CURLOPT_SSL_VERIFYPEER => false, // 忽略SSL证书校验(适合测试环境)
|
||
CURLOPT_SSL_VERIFYHOST => false
|
||
]);
|
||
|
||
curl_exec($ch);
|
||
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); // 获取HTTP状态码
|
||
curl_close($ch);
|
||
|
||
// 状态码200-399表示可访问(200成功,3xx重定向均视为有效)
|
||
return $httpCode >= 200 && $httpCode < 400;
|
||
}
|
||
}
|
||
?>
|