From c15b784cf8651f1b1a56cc390264e618061af160 Mon Sep 17 00:00:00 2001 From: chengxl Date: Fri, 28 Nov 2025 16:35:11 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B5=8B=E8=AF=95=E9=97=AE=E9=A2=98=E4=BF=AE?= =?UTF-8?q?=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- application/common/ArticleParserService.php | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/application/common/ArticleParserService.php b/application/common/ArticleParserService.php index fd3dfc16..526ac52a 100644 --- a/application/common/ArticleParserService.php +++ b/application/common/ArticleParserService.php @@ -244,7 +244,7 @@ class ArticleParserService foreach ($section->getElements() as $element) { $text = $this->getTextFromElement($element); $length = mb_strlen(trim($text)); - if ($length > $maxLength && $length > 10) { // 标题通常较长 + if ($length > $maxLength && $length > 3) { // 标题通常较长 $title = trim($text); $maxLength = $length; break 2; // 取第一个最长段落作为标题 @@ -610,7 +610,8 @@ class ArticleParserService ]; } if(empty($aCorresponding)){ - $pattern = '/Corresponding Authors|Correspondence to: (.*?)(?=$|;)/s'; + $pattern = '/Corresponding Authors|Correspondence to|Correspondence: (.*?)(?=$|;)/s'; + $corrText = trim($corrText,'*'); preg_match($pattern, $corrText, $match); if (!empty($match[1])) { $corrContent = $match[1]; @@ -625,6 +626,16 @@ class ArticleParserService ]; } } + if(empty($authors[1])){ + $authorPattern = '/([A-Za-z0-9\s]+?),\s*([\w@\.\-]+)(?=\.?)/'; + preg_match_all($authorPattern, $corrContent, $authors); + for ($i = 0; $i < count($authors[1]); $i++) { + $aCorresponding[] = [ + 'name' => empty($authors[1][$i]) ? '' : trim($authors[1][$i]), + 'email' => empty($authors[2][$i]) ? '' : trim($authors[2][$i]) + ]; + } + } } } return $aCorresponding;