diff --git a/application/common/ArticleParserService.php b/application/common/ArticleParserService.php index fd3dfc16..526ac52a 100644 --- a/application/common/ArticleParserService.php +++ b/application/common/ArticleParserService.php @@ -244,7 +244,7 @@ class ArticleParserService foreach ($section->getElements() as $element) { $text = $this->getTextFromElement($element); $length = mb_strlen(trim($text)); - if ($length > $maxLength && $length > 10) { // 标题通常较长 + if ($length > $maxLength && $length > 3) { // 标题通常较长 $title = trim($text); $maxLength = $length; break 2; // 取第一个最长段落作为标题 @@ -610,7 +610,8 @@ class ArticleParserService ]; } if(empty($aCorresponding)){ - $pattern = '/Corresponding Authors|Correspondence to: (.*?)(?=$|;)/s'; + $pattern = '/Corresponding Authors|Correspondence to|Correspondence: (.*?)(?=$|;)/s'; + $corrText = trim($corrText,'*'); preg_match($pattern, $corrText, $match); if (!empty($match[1])) { $corrContent = $match[1]; @@ -625,6 +626,16 @@ class ArticleParserService ]; } } + if(empty($authors[1])){ + $authorPattern = '/([A-Za-z0-9\s]+?),\s*([\w@\.\-]+)(?=\.?)/'; + preg_match_all($authorPattern, $corrContent, $authors); + for ($i = 0; $i < count($authors[1]); $i++) { + $aCorresponding[] = [ + 'name' => empty($authors[1][$i]) ? '' : trim($authors[1][$i]), + 'email' => empty($authors[2][$i]) ? '' : trim($authors[2][$i]) + ]; + } + } } } return $aCorresponding;