Merge remote-tracking branch 'origin/master'
This commit is contained in:
File diff suppressed because it is too large
Load Diff
359
application/api/controller/Contribute.php
Normal file
359
application/api/controller/Contribute.php
Normal file
@@ -0,0 +1,359 @@
|
||||
<?php
|
||||
|
||||
namespace app\api\controller;
|
||||
|
||||
use app\api\controller\Base;
|
||||
use think\Db;
|
||||
use PhpOffice\PhpWord\IOFactory;
|
||||
use think\Exception;
|
||||
use \app\common\ArticleParserService;
|
||||
/**
|
||||
* @title 自动投稿控制器
|
||||
* @description 所有的测试方法汇总
|
||||
*/
|
||||
class Contribute extends Base
|
||||
{
|
||||
|
||||
private $phpWord; // PhpWord实例
|
||||
private $sections; // 文档段落集合
|
||||
public function __construct(\think\Request $request = null) {
|
||||
parent::__construct($request);
|
||||
}
|
||||
|
||||
/**
|
||||
* 自动投稿
|
||||
* @param file_url 文件地址
|
||||
*/
|
||||
public function contribute($aParam = []){
|
||||
|
||||
//获取参数
|
||||
$aParam = empty($aParam) ? $this->request->post() : $aParam;
|
||||
|
||||
//必填值验证
|
||||
$sFileUrl = empty($aParam['file_url']) ? '' : $aParam['file_url'];
|
||||
$sOriginalFileUrl = $sFileUrl;
|
||||
if(empty($sFileUrl)){
|
||||
return json_encode(['status' => 2,'msg' => 'Please upload the submission file']);
|
||||
}
|
||||
// //文章类型
|
||||
// $sType = empty($aParam['type']) ? '' : $aParam['type'];
|
||||
// if(empty($sType)){
|
||||
// return json_encode(['status' => 2,'msg' => 'Please select the article type']);
|
||||
// }
|
||||
// //期刊ID
|
||||
// $iJournalId = empty($aParam['journal_id']) ? '' : $aParam['journal_id'];
|
||||
// if(empty($iJournalId)){
|
||||
// return json_encode(['status' => 2,'msg' => 'Please select the journal to which you belong']);
|
||||
// }
|
||||
//用户ID
|
||||
$iUserId = empty($aParam['user_id']) ? '' : $aParam['user_id'];
|
||||
if(empty($iUserId)){
|
||||
return json_encode(['status' => 2,'msg' => 'Please select author']);
|
||||
}
|
||||
|
||||
//判断文件是否执行
|
||||
$sFileUrl = rtrim(ROOT_PATH,'/').'/public/'.ltrim(ltrim($sFileUrl,'/'),'public');
|
||||
if (!file_exists($sFileUrl)) {
|
||||
return json_encode(['status' => 3, 'msg' => 'The uploaded file does not exist']);
|
||||
}
|
||||
if (!is_readable($sFileUrl)) {
|
||||
return json_encode(['status' => 4, 'msg' => 'The uploaded file is unreadable']);
|
||||
}
|
||||
//文章ID
|
||||
$iArticleId = empty($aParam['article_id']) ? 0 : $aParam['article_id'];
|
||||
$aWhere = [];
|
||||
if(!empty($iArticleId)){//更新文章内容
|
||||
$aWhere['article_id'] = $iArticleId;
|
||||
$aArticle = Db::name('article')->field('state,user_id,manuscirpt_url')->where($aWhere)->find();
|
||||
if(empty($aArticle)){
|
||||
return json_encode(['status' => 7,'msg' => 'The article does not exist']);
|
||||
}
|
||||
if($aArticle['state'] != -1 && $aArticle['state'] != 3){
|
||||
return json_encode(['status' => 8,'msg' => 'Article cannot be edited']);
|
||||
}
|
||||
if($aArticle['user_id'] != $iUserId){
|
||||
return json_encode(['status' => 9,'msg' => 'Not modified by the author themselves']);
|
||||
}
|
||||
if($aArticle['manuscirpt_url'] == trim($sOriginalFileUrl,'/')){
|
||||
return json_encode(['status' => 9,'msg' => 'The content of the article has not changed']);
|
||||
}
|
||||
$aWhere['article_id'] = ['<>',$iArticleId];
|
||||
}
|
||||
//获取数据
|
||||
$aDealData = json_decode(ArticleParserService::uploadAndParse($sFileUrl),true);
|
||||
$iStatus = empty($aDealData['status']) ? 0 : $aDealData['status'];
|
||||
$sMsg = empty($aDealData['msg']) ? 'fail' : $aDealData['msg'];
|
||||
if($iStatus != 1){
|
||||
return json_encode(['status' => 5, 'msg' => $sMsg]);
|
||||
}
|
||||
$aData = empty($aDealData['data']) ? '' : $aDealData['data'];
|
||||
if(empty($aData['title'])){
|
||||
return json_encode(['status' => 6,'msg' => 'The article title is empty']);
|
||||
}
|
||||
//查询标题是否存在
|
||||
$aWhere += ['title' => trim($aData['title']),'state' => ['<>',3]];
|
||||
$aArticle = Db::name('article')->field('article_id')->where($aWhere)->find();
|
||||
if(!empty($aArticle)){
|
||||
return json_encode(['code' => 7, 'msg' => 'Warning: you are re-submitting the article!']);
|
||||
}
|
||||
|
||||
//数据入库
|
||||
$aData += $aParam;
|
||||
$result = $this->_addData($aData);
|
||||
return $result;
|
||||
}
|
||||
|
||||
/**
|
||||
* 组装数据插入相关数据表
|
||||
* @param array $aParam
|
||||
*/
|
||||
private function _addData($aParam = []){
|
||||
if(empty($aParam)){
|
||||
return json_encode(['status' => 2,'msg' => 'Data is empty']);
|
||||
}
|
||||
|
||||
//获取文章ID
|
||||
$iUpdateArticleId = empty($aParam['article_id']) ? 0 : $aParam['article_id'];
|
||||
if(!empty($iUpdateArticleId)){
|
||||
$aWhere = ['article_id' => $iUpdateArticleId];
|
||||
$aArticle = Db::name('article')->field('title,keywords,abstrart,fund,journal_id')->where($aWhere)->find();
|
||||
}
|
||||
//插入基础表 t_article
|
||||
$aInsert = [];
|
||||
//标题
|
||||
if(empty($aArticle['title'])){
|
||||
$sTitile = empty($aParam['title']) ? '' : $aParam['title'];
|
||||
if(!empty($sTitile)){
|
||||
$aInsert['title'] = $sTitile;
|
||||
}
|
||||
}
|
||||
|
||||
//关键词
|
||||
if(empty($aArticle['keywords'])){
|
||||
$sKeyWords = empty($aParam['keywords']) ? '' : $aParam['keywords'];
|
||||
if(!empty($sKeyWords)){
|
||||
$aInsert['keywords'] = is_array($sKeyWords) ? implode(',', $sKeyWords) : $sKeyWords;
|
||||
}
|
||||
}
|
||||
//摘要
|
||||
if(empty($aArticle['abstrart'])){
|
||||
$sAbstrart = empty($aParam['abstrart']) ? '' : $aParam['abstrart'];
|
||||
if(!empty($sAbstrart)){
|
||||
$aInsert['abstrart'] = $sAbstrart;
|
||||
}
|
||||
}
|
||||
//基金
|
||||
if(empty($aArticle['fund'])){
|
||||
$sFund = empty($aParam['fund']) ? '' : $aParam['fund'];
|
||||
if(!empty($sAbstrart)){
|
||||
$aInsert['fund'] = $sFund;
|
||||
}
|
||||
}
|
||||
//期刊ID
|
||||
$iJournalId = empty($aParam['journal_id']) ? 0 : $aParam['journal_id'];
|
||||
if(!empty($iJournalId)){
|
||||
$aInsert['journal_id'] = $iJournalId;
|
||||
//查询期刊信息
|
||||
$aJournalWhere = ['journal_id' => $iJournalId,'state' => 0];
|
||||
$aJournal = DB::name('journal')->field('editor_id')->where($aJournalWhere)->find();
|
||||
$aInsert['editor_id'] = empty($aJournal['editor_id']) ? 0 : $aJournal['editor_id'];
|
||||
}
|
||||
//类型
|
||||
$sType = empty($aParam['type']) ? '' : $aParam['type'];
|
||||
if(!empty($sType)){
|
||||
$aInsert['type'] = $sType;
|
||||
}
|
||||
//上传文件地址
|
||||
$sFileUrl = empty($aParam['file_url']) ? '' : $aParam['file_url'];
|
||||
if(!empty($sFileUrl)){
|
||||
$aInsert['manuscirpt_url'] = trim(trim($sFileUrl,'/'),'public/');
|
||||
}
|
||||
//用户ID
|
||||
$iUserId = empty($aParam['user_id']) ? 0 : $aParam['user_id'];
|
||||
if(!empty($iUserId)){
|
||||
$aInsert['user_id'] = $iUserId;
|
||||
//查询用户信息
|
||||
$aUser = Db::name('user')->field('account')->where(['user_id' => $iUserId,'state' => 0])->find();
|
||||
}
|
||||
Db::startTrans();
|
||||
//插入article
|
||||
if(empty($aArticle) && !empty($aInsert)){//新插入
|
||||
$aInsert['ctime'] = time();
|
||||
$aInsert['state'] = -1;
|
||||
$aInsert['is_use_ai'] = 3;
|
||||
$aInsert['is_figure_copyright'] = 3;
|
||||
$aInsert['is_transfer'] = 3;
|
||||
$aInsert['is_become_reviewer'] = 3;
|
||||
$iArticleId = Db::name('article')->insertGetId($aInsert);
|
||||
if(empty($iArticleId)){
|
||||
return json_encode(['status' => 3,'msg' => 'Article added successfully']);
|
||||
}
|
||||
//获取accept_sn并更新
|
||||
$sAcceptSn = $this->getArticleAcceptSn(['article_id' => $iArticleId]);
|
||||
if(!empty($sAcceptSn)){
|
||||
$aWhere = ['article_id' => $iArticleId];
|
||||
$aUpdate = ['accept_sn' => $sAcceptSn];
|
||||
$update_result = Db::name('article')->where($aWhere)->limit(1)->update($aUpdate);
|
||||
}
|
||||
}
|
||||
if(!empty($aArticle) && !empty($aInsert)){//更新
|
||||
$aWhere = ['article_id' => $iUpdateArticleId,'state' => ['in',[-1,3]]];
|
||||
$update_result = Db::name('article')->where($aWhere)->limit(1)->update($aInsert);
|
||||
if($update_result === false){
|
||||
return json_encode(['status' => 3,'msg' => 'Article update failed']);
|
||||
}
|
||||
//更新作者为逻辑删除
|
||||
$aWhere['state'] = 0;
|
||||
$update_author_result = DB::name('article_author')->where($aWhere)->update(['state' => 1]);
|
||||
if($update_author_result === false){
|
||||
return json_encode(['status' => 4,'msg' => 'Article author update failed']);
|
||||
}
|
||||
}
|
||||
|
||||
//作者单位
|
||||
$iArticleId = empty($iArticleId) ? $iUpdateArticleId : $iArticleId;
|
||||
$aCompany = empty($aParam['company']) ? [] : $aParam['company'];
|
||||
|
||||
if(!empty($aCompany)){
|
||||
$aWhere = ['article_id' => $iArticleId,'state' => 0];
|
||||
$aOrgan = Db::name('article_organ')->field('organ_id,organ_name,sort')->where($aWhere)->select();
|
||||
$aOrganName = empty($aOrgan) ? [] : array_column($aOrgan, 'organ_name');
|
||||
$iMaxSort = empty($aOrgan) ? 0 : max(array_column($aOrgan, 'sort'));
|
||||
//查询文章
|
||||
$aCompanyInsert = $aSort = [];
|
||||
foreach ($aCompany as $key => $value) {
|
||||
if(empty($value)){
|
||||
continue;
|
||||
}
|
||||
$sName = trim(trim(trim($value,'*'),'#'));
|
||||
if(in_array($sName, $aOrganName)){
|
||||
continue;
|
||||
}
|
||||
if(empty($iMaxSort)){
|
||||
$iSort = $key;
|
||||
$aSort[$key] = $key;
|
||||
}else{
|
||||
$iMaxSort++;
|
||||
$iSort = $iMaxSort;
|
||||
$aSort[$key] = $iMaxSort;
|
||||
}
|
||||
$aCompanyInsert[] = ['article_id' => $iArticleId,'organ_name' =>$sName,'create_time' => time(),'sort' => $iSort];
|
||||
}
|
||||
}
|
||||
if(!empty($aCompanyInsert)){
|
||||
$company_result = Db::name('article_organ')->insertAll($aCompanyInsert);
|
||||
if(empty($company_result)){
|
||||
return json_encode(['status' => 3,'msg' => 'Article institution insertion failed']);
|
||||
}
|
||||
$aWhere = ['article_id' => $iArticleId,'state' => 0];
|
||||
$aCompanyData = Db::name('article_organ')->where($aWhere)->column('sort,organ_id');
|
||||
}
|
||||
//处理作者
|
||||
$aAuthorData = $aAuthorOrgn = [];
|
||||
if(!empty($aParam['author'])){
|
||||
$aAuthor = $aParam['author'];
|
||||
//通讯作者
|
||||
$aCorresponding = empty($aParam['corresponding']) ? [] : array_column($aParam['corresponding'], null,'name');
|
||||
//查询文章作者
|
||||
$aWhere = ['article_id' => $iArticleId,'state' => 0];
|
||||
$aAuthorList = Db::name('article_author')->field('art_aut_id,firstname')->where($aWhere)->select();
|
||||
$aAuthorNameList = empty($aAuthorList) ? [] : array_column($aAuthorList, 'firstname');
|
||||
foreach ($aAuthor as $key => $value) {
|
||||
if(empty($iArticleId)){
|
||||
break;
|
||||
}
|
||||
//处理作者机构单位关联
|
||||
if(!empty($value['company_id'])){
|
||||
foreach ($value['company_id'] as $k => $v) {
|
||||
$iNewSort = empty($aSort[$v]) ? $v : $aSort[$v];
|
||||
$iOrgnId = empty($aCompanyData[$iNewSort]) ? 0 : $aCompanyData[$iNewSort];
|
||||
if(empty($iOrgnId)){
|
||||
continue;
|
||||
}
|
||||
$aAuthorOrgn[] = ['article_id' => $iArticleId,'organ_id' => $iOrgnId,'art_aut_id' => $value['name']];
|
||||
}
|
||||
}
|
||||
$value['firstname'] = empty($value['name']) ? '' : trim($value['name']);
|
||||
//已添加
|
||||
if(!empty($value['firstname']) && in_array($value['firstname'], $aAuthorList)){
|
||||
continue;
|
||||
}
|
||||
$value['email'] = empty($aCorresponding[$value['name']]['email']) ? '' : $aCorresponding[$value['name']]['email'];
|
||||
$value['article_id'] = $iArticleId;
|
||||
unset($value['name'],$value['company_id']);
|
||||
$aAuthorData[$key] = $value;
|
||||
}
|
||||
}
|
||||
if(!empty($aAuthorData)){
|
||||
$author_result = DB::name('article_author')->insertAll($aAuthorData);
|
||||
if(empty($author_result)){
|
||||
return json_encode(['status' => 3,'msg' => 'Adding article author failed']);
|
||||
}
|
||||
$aWhere = ['article_id' => $iArticleId,'state' => 0];
|
||||
$aAuthorResult = DB::name('article_author')->where($aWhere)->select();
|
||||
}
|
||||
|
||||
//作者所属机构
|
||||
$aAuthorOrgnInsert = [];
|
||||
if(!empty($aAuthorOrgn)){
|
||||
//查询文章作者所属机构
|
||||
$aWhere = ['article_id' => $iArticleId,'state' => 0];
|
||||
$aAuthorOrganList = Db::name('article_author_organ')->field('art_aut_id,organ_id')->where($aWhere)->select();
|
||||
$aAuthorOrganIdList = [];
|
||||
if(!empty($aAuthorOrganList)){
|
||||
foreach ($aAuthorOrganList as $key => $value) {
|
||||
$aAuthorOrganIdList[$value['art_aut_id']][] = $value['organ_id'];
|
||||
}
|
||||
}
|
||||
$aAuthorName = empty($aAuthorResult) ? [] : array_column($aAuthorResult, 'art_aut_id','firstname');
|
||||
foreach ($aAuthorOrgn as $key => $value) {
|
||||
if(empty($value['art_aut_id'])){
|
||||
continue;
|
||||
}
|
||||
$iAuthorId = empty($aAuthorName[$value['art_aut_id']]) ? 0 : $aAuthorName[$value['art_aut_id']];
|
||||
if(empty($iAuthorId)){
|
||||
continue;
|
||||
}
|
||||
if(!empty($aAuthorOrganIdList[$iAuthorId]) && in_array($value['organ_id'], $aAuthorOrganIdList[$iAuthorId])){
|
||||
continue;
|
||||
}
|
||||
$value['art_aut_id'] = $iAuthorId;
|
||||
$aAuthorOrgnInsert[] = $value;
|
||||
}
|
||||
}
|
||||
if(!empty($aAuthorOrgnInsert)){
|
||||
$author_orgn_result = DB::name('article_author_organ')->insertAll($aAuthorOrgnInsert);
|
||||
if(empty($author_orgn_result)){
|
||||
return json_encode(['status' => 3,'msg' => 'Adding article author orgn failed']);
|
||||
}
|
||||
}
|
||||
// //处理上传文件
|
||||
// if(!empty($sFileUrl) && !empty($iUserId)){
|
||||
// $aInsertFile['article_id'] = $iArticleId;
|
||||
// $aInsertFile['user_id'] = $iUserId;
|
||||
// $aInsertFile['username'] = empty($aUser['account']) ? '' : $aUser['account'];
|
||||
// $aInsertFile['file_url'] = trim($sFileUrl,'/');
|
||||
// $aInsertFile['type_name'] = 'manuscirpt';
|
||||
// $aInsertFile['ctime'] = time();
|
||||
// $iFileId = Db::name('article_file')->insertGetId($aInsertFile);
|
||||
// }
|
||||
|
||||
Db::commit();
|
||||
$aInsert['article_id'] = $iArticleId;
|
||||
return json_encode(['status' => 1,'msg' => 'Successfully added article','article' => $aInsert]);
|
||||
}
|
||||
|
||||
/**
|
||||
* 生成文章sn号
|
||||
*/
|
||||
private function getArticleAcceptSn($aParam = [],$sFlag = 'Draft')
|
||||
{
|
||||
$iArticleId = empty($aParam['article_id']) ? 0 : $aParam['article_id'];
|
||||
if(empty($iArticleId)){
|
||||
return '';
|
||||
}
|
||||
$sDate = date('Y');
|
||||
$sType = $sDate;
|
||||
return $sFlag.$sType.str_pad($iArticleId,6,'0',STR_PAD_LEFT).rand(100,999);
|
||||
}
|
||||
}
|
||||
@@ -1054,12 +1054,12 @@ class Finalreview extends Base
|
||||
}
|
||||
//查询文章审稿记录
|
||||
$aWhere = ['article_id' => $iArticleId,'state' => ['between',[1,3]]];
|
||||
$aArticleReviewer = Db::name('article_reviewer')->field('art_rev_id,state,ctime')->where($aWhere)->select();
|
||||
$aArticleReviewer = Db::name('article_reviewer')->field('art_rev_id,state,ctime,reviewer_id')->where($aWhere)->select();
|
||||
if(!empty($aArticleReviewer)){
|
||||
$aArtRevId = array_column($aArticleReviewer, 'art_rev_id');
|
||||
$aWhere = ['art_rev_id' => ['in',$aArtRevId],'state' => 0];
|
||||
//查询初审问卷
|
||||
$aQuestion = Db::name('article_reviewer_question')->field('art_rev_id,ctime,score,rated')->where($aWhere)->order('ctime asc')->select();
|
||||
$aQuestion = Db::name('article_reviewer_question')->field('art_rev_id,ctime,score,rated,recommend')->where($aWhere)->order('ctime asc')->select();
|
||||
$aQuestion = empty($aQuestion) ? [] : array_column($aQuestion, null,'art_rev_id');
|
||||
|
||||
//查询复审
|
||||
@@ -1071,12 +1071,20 @@ class Finalreview extends Base
|
||||
$aReviewerRepeatLists[$value['art_rev_id']][] = $value;
|
||||
}
|
||||
}
|
||||
|
||||
//查询作者信息
|
||||
$aUserId = array_unique(array_column($aArticleReviewer, 'reviewer_id'));
|
||||
$aWhere = ['user_id' => ['in',$aUserId],'state' => 0];
|
||||
$aUser = Db::name('user')->where($aWhere)->column('user_id,realname');
|
||||
|
||||
foreach ($aArticleReviewer as $key => $value) {
|
||||
$aQuestionData = empty($aQuestion[$value['art_rev_id']]) ? [] : $aQuestion[$value['art_rev_id']];
|
||||
$value['ctime'] = empty($aQuestionData['ctime']) ? $value['ctime'] : $aQuestionData['ctime'];
|
||||
$value['score'] = empty($aQuestionData['score']) ? 0 : $aQuestionData['score'];
|
||||
$value['repeat'] = empty($aReviewerRepeatLists[$value['art_rev_id']]) ? [] : $aReviewerRepeatLists[$value['art_rev_id']];
|
||||
$value['rated'] = empty($aQuestionData['rated']) ? 0 : $aQuestionData['rated'];
|
||||
$value['realname'] = empty($aUser[$value['reviewer_id']]) ? '' : $aUser[$value['reviewer_id']];
|
||||
$value['recommend'] = empty($aQuestionData['recommend']) ? 0 : $aQuestionData['recommend'];
|
||||
$aArticleReviewer[$key] = $value;
|
||||
}
|
||||
}
|
||||
|
||||
218
application/api/controller/Organ.php
Normal file
218
application/api/controller/Organ.php
Normal file
@@ -0,0 +1,218 @@
|
||||
<?php
|
||||
|
||||
namespace app\api\controller;
|
||||
use app\api\controller\Base;
|
||||
use think\Db;
|
||||
/**
|
||||
* @title 文章机构管理
|
||||
*/
|
||||
class Organ extends Base
|
||||
{
|
||||
|
||||
public function __construct(\think\Request $request = null) {
|
||||
parent::__construct($request);
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取文章机构
|
||||
* @param file_url 文件地址
|
||||
*/
|
||||
public function lists(){
|
||||
//获取参数
|
||||
$aParam = empty($aParam) ? $this->request->post() : $aParam;
|
||||
|
||||
//获取文章ID
|
||||
$iArticleId = empty($aParam['article_id']) ? 0 : $aParam['article_id'];
|
||||
if(empty($iArticleId)){
|
||||
return json_encode(['status' => 2,'msg' => 'Please select the article']);
|
||||
}
|
||||
//用户ID
|
||||
$iUserId = empty($aParam['user_id']) ? 0 : $aParam['user_id'];
|
||||
if(empty($iUserId)){
|
||||
return json_encode(['status' => 2,'msg' => 'Please select author']);
|
||||
}
|
||||
$aWhere = ['article_id' => $iArticleId,'user_id' => $iUserId,'state' => ['in',[-1,3]]];
|
||||
$aArticle = Db::name('article')->field('article_id')->where($aWhere)->find();
|
||||
if(empty($aArticle)){
|
||||
return json_encode(['status' => 2,'msg' => 'The article does not exist']);
|
||||
}
|
||||
if(empty($aArticle)){
|
||||
return json_encode(['status' => 2,'msg' => 'The article does not exist']);
|
||||
}
|
||||
//查询文章机构
|
||||
$aWhere = ['article_id' => $iArticleId,'state' => 0];
|
||||
$aOragn = Db::name('article_organ')->field('organ_id,article_id,organ_name,sort')->where($aWhere)->select();
|
||||
return json_encode(['status' => 1,'data' => $aOragn]);
|
||||
}
|
||||
/**
|
||||
* 添加文章机构
|
||||
* @param file_url 文件地址
|
||||
*/
|
||||
public function add(){
|
||||
//获取参数
|
||||
$aParam = empty($aParam) ? $this->request->post() : $aParam;
|
||||
|
||||
//获取文章ID
|
||||
$iArticleId = empty($aParam['article_id']) ? 0 : $aParam['article_id'];
|
||||
if(empty($iArticleId)){
|
||||
return json_encode(['status' => 2,'msg' => 'Please select the article']);
|
||||
}
|
||||
//用户ID
|
||||
$iUserId = empty($aParam['user_id']) ? '' : $aParam['user_id'];
|
||||
if(empty($iUserId)){
|
||||
return json_encode(['status' => 2,'msg' => 'Please select author']);
|
||||
}
|
||||
//机构名称
|
||||
$sOrganName = empty($aParam['organ_name']) ? '' : $aParam['organ_name'];
|
||||
if(empty($sOrganName)){
|
||||
return json_encode(['status' => 2,'msg' => 'Please enter the name of the institution']);
|
||||
}
|
||||
|
||||
$aWhere = ['article_id' => $iArticleId,'user_id' => $iUserId,'state' => ['in',[-1,3]]];
|
||||
$aArticle = Db::name('article')->field('article_id')->where($aWhere)->find();
|
||||
if(empty($aArticle)){
|
||||
return json_encode(['status' => 2,'msg' => 'The article does not exist']);
|
||||
}
|
||||
//查询文章机构
|
||||
$aWhere = ['article_id' => $iArticleId,'state' => 0,'organ_name' => trim($sOrganName)];
|
||||
$aOragn = Db::name('article_organ')->field('organ_id,article_id,organ_name,sort')->where($aWhere)->find();
|
||||
if(!empty($aOragn)){
|
||||
return json_encode(['status' => 1,'msg' => 'Article organization already exists','data' => $aOragn]);
|
||||
}
|
||||
|
||||
//插入机构表
|
||||
$aWhere = ['article_id' => $iArticleId,'state' => 0];
|
||||
$aOragn = Db::name('article_organ')->field('max(sort) as sort')->where($aWhere)->find();
|
||||
$iSort = empty($aOragn['sort']) ? 0 : $aOragn['sort'];
|
||||
$iSort += 1;
|
||||
$aInsert = ['article_id' => $iArticleId,'organ_name' => $sOrganName,'create_time' => time(),'sort' => $iSort];
|
||||
$iId = Db::name('article_organ')->insertGetId($aInsert);
|
||||
if(empty($iId)){
|
||||
return json_encode(['status' => 3,'msg' => 'Failed to add article organization']);
|
||||
}
|
||||
$aInsert['organ_id'] = $iId;
|
||||
return json_encode(['status' => 1,'msg' => 'success','data' => $aInsert]);
|
||||
}
|
||||
/**
|
||||
* 修改文章机构
|
||||
* @param file_url 文件地址
|
||||
*/
|
||||
public function modify(){
|
||||
//获取参数
|
||||
$aParam = empty($aParam) ? $this->request->post() : $aParam;
|
||||
|
||||
//获取文章ID
|
||||
$iArticleId = empty($aParam['article_id']) ? 0 : $aParam['article_id'];
|
||||
if(empty($iArticleId)){
|
||||
return json_encode(['status' => 2,'msg' => 'Please select the article']);
|
||||
}
|
||||
//用户ID
|
||||
$iUserId = empty($aParam['user_id']) ? '' : $aParam['user_id'];
|
||||
if(empty($iUserId)){
|
||||
return json_encode(['status' => 2,'msg' => 'Please select author']);
|
||||
}
|
||||
|
||||
//机构名称
|
||||
$sOrganName = empty($aParam['organ_name']) ? '' : $aParam['organ_name'];
|
||||
if(empty($sOrganName)){
|
||||
return json_encode(['status' => 2,'msg' => 'Please enter the name of the institution']);
|
||||
}
|
||||
|
||||
//获取机构ID
|
||||
$iOrganId = empty($aParam['organ_id']) ? 0 : $aParam['organ_id'];
|
||||
if(empty($iOrganId)){
|
||||
return json_encode(['status' => 2,'msg' => 'Please select the institution to modify']);
|
||||
}
|
||||
|
||||
//查询文章
|
||||
$aWhere = ['article_id' => $iArticleId,'user_id' => $iUserId,'state' => ['in',[-1,3]]];
|
||||
$aArticle = Db::name('article')->field('article_id')->where($aWhere)->find();
|
||||
if(empty($aArticle)){
|
||||
return json_encode(['status' => 2,'msg' => 'The article does not exist']);
|
||||
}
|
||||
//查询文章机构
|
||||
$aWhere = ['article_id' => $iArticleId,'state' => 0,'organ_id' => $iOrganId];
|
||||
$aOragn = Db::name('article_organ')->field('organ_id,article_id,organ_name,sort')->where($aWhere)->find();
|
||||
if(empty($aOragn)){
|
||||
return json_encode(['status' => 3,'msg' => 'Article organization does not exist','data' => $aOragn]);
|
||||
}
|
||||
//查询是否有重名
|
||||
$aWhere = ['article_id' => $iArticleId,'state' => 0,'organ_id' => ['<>',$iOrganId],'organ_name' => trim($sOrganName)];
|
||||
$aOragn = Db::name('article_organ')->field('organ_id')->where($aWhere)->find();
|
||||
if(!empty($aOragn)){
|
||||
return json_encode(['status' => 1,'msg' => 'Article organization already exists','data' => $aOragn]);
|
||||
}
|
||||
|
||||
//更新机构
|
||||
$aWhere = ['organ_id' => $iOrganId];
|
||||
$aUpdate = ['organ_name' => $sOrganName,'update_time' => time()];
|
||||
$result = Db::name('article_organ')->where($aWhere)->limit(1)->update($aUpdate);
|
||||
if($result === false){
|
||||
return json_encode(['status' => 3,'msg' => 'Failed to update article organization']);
|
||||
}
|
||||
return json_encode(['status' => 1,'msg' => 'success']);
|
||||
}
|
||||
/**
|
||||
* 删除文章机构
|
||||
* @param file_url 文件地址
|
||||
*/
|
||||
public function remove(){
|
||||
//获取参数
|
||||
$aParam = empty($aParam) ? $this->request->post() : $aParam;
|
||||
|
||||
//获取文章ID
|
||||
$iArticleId = empty($aParam['article_id']) ? 0 : $aParam['article_id'];
|
||||
if(empty($iArticleId)){
|
||||
return json_encode(['status' => 2,'msg' => 'Please select the article']);
|
||||
}
|
||||
//用户ID
|
||||
$iUserId = empty($aParam['user_id']) ? '' : $aParam['user_id'];
|
||||
if(empty($iUserId)){
|
||||
return json_encode(['status' => 2,'msg' => 'Please select author']);
|
||||
}
|
||||
|
||||
//获取机构ID
|
||||
$iOrganId = empty($aParam['organ_id']) ? 0 : $aParam['organ_id'];
|
||||
if(empty($iOrganId)){
|
||||
return json_encode(['status' => 2,'msg' => 'Please select the institution to modify']);
|
||||
}
|
||||
|
||||
//查询文章
|
||||
$aWhere = ['article_id' => $iArticleId,'user_id' => $iUserId,'state' => ['in',[-1,3]]];
|
||||
$aArticle = Db::name('article')->field('article_id')->where($aWhere)->find();
|
||||
if(empty($aArticle)){
|
||||
return json_encode(['status' => 2,'msg' => 'The article does not exist']);
|
||||
}
|
||||
//查询文章机构
|
||||
$aWhere = ['article_id' => $iArticleId,'state' => 0,'organ_id' => $iOrganId];
|
||||
$aOragn = Db::name('article_organ')->field('organ_id,article_id,organ_name,sort')->where($aWhere)->find();
|
||||
if(empty($aOragn)){
|
||||
return json_encode(['status' => 3,'msg' => 'Article organization does not exist','data' => $aOragn]);
|
||||
}
|
||||
|
||||
//作者关联的机构
|
||||
$aWhere = ['organ_id' => $iOrganId,'article_id' => $iArticleId,'state' => 0];
|
||||
$aId = Db::name('article_author_organ')->where($aWhere)->column('id');
|
||||
|
||||
//删除机构
|
||||
Db::startTrans();
|
||||
//更新机构状态
|
||||
$aWhere = ['organ_id' => $iOrganId];
|
||||
$aUpdate = ['state' => 1,'update_time' => time()];
|
||||
$result = Db::name('article_organ')->where($aWhere)->limit(1)->update($aUpdate);
|
||||
if($result === false){
|
||||
return json_encode(['status' => 3,'msg' => 'Failed to update article organization']);
|
||||
}
|
||||
//更新作者关联的机构
|
||||
if(!empty($aId)){
|
||||
$aWhere = ['id' => ['in',$aId],'state' => 0];
|
||||
$aUpdate = ['state' => 1,'update_time' => time()];
|
||||
$author_organ_result = Db::name('article_author_organ')->where($aWhere)->limit(count($aId))->update($aUpdate);
|
||||
if($author_organ_result === false){
|
||||
return json_encode(['status' => 3,'msg' => 'Failed to unbind the relationship between the institution and the author']);
|
||||
}
|
||||
}
|
||||
Db::commit();
|
||||
return json_encode(['status' => 1,'msg' => 'success']);
|
||||
}
|
||||
}
|
||||
@@ -617,7 +617,7 @@ class Reviewer extends Base
|
||||
$data = $this->request->post();
|
||||
|
||||
//查询实例数据
|
||||
$res = $this->article_reviewer_obj->field('t_journal.*,t_article_reviewer.*,t_article.abstrart,t_article.title article_title,t_article.type atype,t_article.state astate,t_article.accept_sn accept_sn,t_user.account account')
|
||||
$res = $this->article_reviewer_obj->field('t_journal.*,t_article_reviewer.*,t_article.abstrart,t_article.title article_title,t_article.type atype,t_article.state astate,t_article.accept_sn accept_sn,t_user.account account,t_user.email as user_email')
|
||||
->join('t_article', 't_article.article_id = t_article_reviewer.article_id', 'LEFT')
|
||||
->join('t_user', 't_user.user_id = t_article_reviewer.reviewer_id', 'LEFT')
|
||||
->join('t_journal', 't_journal.journal_id = t_article.journal_id', 'left')
|
||||
|
||||
@@ -198,6 +198,8 @@ class Ucenter extends Base{
|
||||
//发送邮件给编辑
|
||||
$vv = "Dear editor,<br/><br/>";
|
||||
$vv .= "Please check the new application for Young Scientist Board Member for ".$journal_info['title']." Journal.";
|
||||
$sUserEmail = empty($user_info['email']) ? '' : $user_info['email'];
|
||||
$vv .= "<br/>please check applied email:".$sUserEmail;
|
||||
$mai['email'] = $editor_info['email'];
|
||||
$mai['title'] = $journal_info['title'];
|
||||
$mai['content'] = $vv;
|
||||
|
||||
@@ -1083,7 +1083,7 @@ class User extends Base
|
||||
->join("t_journal",'t_journal.journal_id = t_apply_yboard.journal_id','left')
|
||||
->join("t_user","t_user.user_id = t_apply_yboard.user_id",'left')
|
||||
->join("t_user_reviewer_info","t_user_reviewer_info.reviewer_id = t_user.user_id",'left')
|
||||
->where('t_apply_yboard.journal_id','in',$ids)->where('t_apply_yboard.state',0)->select();
|
||||
->where('t_apply_yboard.journal_id','in',$ids)->where('t_apply_yboard.state',0)->order('t_apply_yboard.ctime desc')->select();
|
||||
foreach($list as $k => $v){//补充简历信息
|
||||
$cac = $this->user_cv_obj->where('user_id',$v['user_id'])->where('state',0)->select();
|
||||
$list[$k]['cvs'] = $cac;
|
||||
|
||||
@@ -407,7 +407,7 @@ class Web extends Base
|
||||
{
|
||||
$ca_board = [];
|
||||
$boards = $this->board_to_journal_obj
|
||||
->field("t_board_to_journal.*,t_board_group.group_name,t_user.account,t_user.email,t_user.realname,t_user.icon,t_user.google_index,t_user.google_time,t_user.wos_index,t_user.wos_time,t_user_reviewer_info.*")
|
||||
->field("t_board_to_journal.btj_id,t_board_to_journal.user_id,t_board_to_journal.board_group_id,t_board_to_journal.journal_id,t_board_to_journal.type,t_board_to_journal.research_areas,t_board_to_journal.realname as board_realname,t_board_to_journal.email as board_email,t_board_to_journal.website as board_website,t_board_to_journal.affiliation as board_affiliation,t_board_to_journal.icon as board_icon,t_board_to_journal.technical as board_technical,t_board_to_journal.remark,t_board_to_journal.state,t_board_group.group_name,t_user.account,t_user.email,t_user.realname,t_user.icon,t_user.google_index,t_user.google_time,t_user.wos_index,t_user.wos_time,t_user_reviewer_info.*")
|
||||
->join("t_board_group", "t_board_group.board_group_id = t_board_to_journal.board_group_id", "left")
|
||||
->join("t_user", "t_user.user_id = t_board_to_journal.user_id", "left")
|
||||
->join("t_user_reviewer_info", "t_user_reviewer_info.reviewer_id=t_board_to_journal.user_id", "left")
|
||||
@@ -415,7 +415,6 @@ class Web extends Base
|
||||
->where('t_board_to_journal.state', 0)
|
||||
->order("t_board_group.board_group_id asc,t_user.realname")
|
||||
->select();
|
||||
|
||||
foreach ($boards as $val) {
|
||||
if ($aar) {
|
||||
$article1 = $this->article_obj->where('user_id', $val['user_id'])->where('journal_id', $val['journal_id'])->where('state', 5)->where('ctime', ">", strtotime("-1 year"))->select();
|
||||
@@ -431,6 +430,15 @@ class Web extends Base
|
||||
$val['reviewes'] = $reviewes;
|
||||
}
|
||||
|
||||
//编委数据隔离加默认数据 chengxiaoling 20251022 start
|
||||
$val['realname'] = empty($val['board_realname']) ? $val['realname'] : $val['board_realname'];
|
||||
$val['email'] = empty($val['board_email']) ? $val['email'] : $val['board_email'];
|
||||
$val['technical'] = empty($val['board_technical']) ? $val['technical'] : $val['board_technical'];
|
||||
$val['website'] = empty($val['board_website']) ? $val['website'] : $val['board_website'];
|
||||
$val['icon'] = empty($val['board_icon']) ? $val['icon'] : $val['board_icon'];
|
||||
$val['company'] = empty($val['board_affiliation']) ? $val['company'] : $val['board_affiliation'];
|
||||
//编委数据隔离加默认数据 chengxiaoling 20251022 end
|
||||
|
||||
if ($val['type'] == 0) {//主编
|
||||
$ca_board['main'][] = $val;
|
||||
} elseif ($val['type'] == 1) {//副主编
|
||||
|
||||
@@ -46,7 +46,7 @@ class Aireview
|
||||
$iLogId = empty($aAiReview['id']) ? 0 : $aAiReview['id'];
|
||||
//新增
|
||||
if(empty($iLogId)){
|
||||
$aInsert['create_time'] = date('Y-m-d H:i:s');
|
||||
$aInsert['create_time'] = time();
|
||||
$aInsert['content'] = $iArticleId;
|
||||
$iLogId = Db::name('article_ai_review')->insertGetId($aInsert);
|
||||
if(empty($iLogId)){
|
||||
@@ -57,7 +57,7 @@ class Aireview
|
||||
}
|
||||
if(!empty($iLogId)){
|
||||
$aWhere = ['id' => $iLogId];
|
||||
$aInsert['update_time'] = date('Y-m-d H:i:s');
|
||||
$aInsert['update_time'] = time();
|
||||
if(!Db::name('article_ai_review')->where($aWhere)->limit(1)->update($aInsert)){
|
||||
$aResult = ['status' => 5,'msg' => 'Failed to add AI audit content'];
|
||||
}
|
||||
@@ -81,6 +81,21 @@ class Aireview
|
||||
//查询文章审核内容
|
||||
$aWhere = ['article_id' => $aParam['article_id']];
|
||||
$aAiReview = Db::name('article_ai_review')->where($aWhere)->find();
|
||||
if(!empty($aAiReview)){
|
||||
$sCreateTime = empty($aAiReview['create_time']) ? '' : $aAiReview['create_time'];
|
||||
if(!empty($sCreateTime)){
|
||||
$aAiReview['create_time'] = strtotime($sCreateTime) !== false ? $sCreateTime : date('Y-m-d H:i:s',$sCreateTime);
|
||||
}else{
|
||||
$aAiReview['create_time'] = '';
|
||||
}
|
||||
|
||||
$sUpdateTime = empty($aAiReview['update_time']) ? '' : $aAiReview['update_time'];
|
||||
if(!empty($sUpdateTime)){
|
||||
$aAiReview['update_time'] = strtotime($sUpdateTime) !== false ? $sUpdateTime : date('Y-m-d H:i:s',$sUpdateTime);
|
||||
}else{
|
||||
$aAiReview['update_time'] = '';
|
||||
}
|
||||
}
|
||||
|
||||
return ['status' => 1,'msg' => 'Successfully obtained article review content','data' => $aAiReview];
|
||||
}
|
||||
|
||||
764
application/common/ArticleParserService.php
Normal file
764
application/common/ArticleParserService.php
Normal file
@@ -0,0 +1,764 @@
|
||||
<?php
|
||||
namespace app\common;
|
||||
use PhpOffice\PhpWord\IOFactory;
|
||||
use think\Exception;
|
||||
use ZipArchive;
|
||||
use RecursiveIteratorIterator;
|
||||
use RecursiveDirectoryIterator;
|
||||
use PhpOffice\PhpWord\Settings;
|
||||
use PhpOffice\PhpWord\Element\TextRun;
|
||||
use DOMDocument;
|
||||
use DOMXPath;
|
||||
// use BadMethodCallException;
|
||||
class ArticleParserService
|
||||
{
|
||||
private $phpWord;
|
||||
private $sections;
|
||||
|
||||
public function __construct($filePath = '')
|
||||
{
|
||||
if (!file_exists($filePath)) {
|
||||
throw new Exception("文档不存在:{$filePath}");
|
||||
}
|
||||
try {
|
||||
// 关键配置:关闭“仅读数据”,保留完整节结构
|
||||
$reader = IOFactory::createReader();
|
||||
$reader->setReadDataOnly(false);
|
||||
Settings::setCompatibility(false);
|
||||
Settings::setOutputEscapingEnabled(true); // 避免XML转义冲突
|
||||
|
||||
$doc = $reader->load($filePath);
|
||||
$sectionCount = count($doc->getSections());
|
||||
// $this->log("✅ 文档直接加载成功,节数量:{$sectionCount}");
|
||||
$this->phpWord = $reader->load($filePath);
|
||||
$this->sections = $this->phpWord->getSections();
|
||||
|
||||
} catch (\Exception $e) {
|
||||
return json(['status' => 'error', 'msg' => $e->getMessage()]);
|
||||
}
|
||||
}
|
||||
|
||||
// 上传并解析文档的入口方法
|
||||
public static function uploadAndParse($sFileUrl){
|
||||
//必填值验证
|
||||
if(empty($sFileUrl)){
|
||||
return json_encode(['status' => 2,'msg' => 'Please upload the submission file']);
|
||||
}
|
||||
|
||||
//判断文件是否执行
|
||||
if (!file_exists($sFileUrl)) {
|
||||
return json_encode(['status' => 3, 'msg' => 'The uploaded file does not exist']);
|
||||
}
|
||||
if (!is_readable($sFileUrl)) {
|
||||
return json_encode(['status' => 4, 'msg' => 'The uploaded file is unreadable']);
|
||||
}
|
||||
|
||||
// 解析文档
|
||||
$oDealFile = new self($sFileUrl);
|
||||
//获取标题
|
||||
$sTitle = $oDealFile->getTitle();
|
||||
if(empty($sTitle)){
|
||||
return json_encode(['status' => 5, 'msg' => 'Article title retrieval failed']);
|
||||
}
|
||||
//获取作者
|
||||
$aParam = ['title' => $sTitle];
|
||||
$aAuthor = $oDealFile->getAuthors($aParam);
|
||||
$aAuthorData = empty($aAuthor['author']) ? [] : $aAuthor['author'];//所有作者信息
|
||||
$aAuthorReportData = empty($aAuthor['report']) ? [] : $aAuthor['report'];//通讯作者信息
|
||||
$aParam['author'] = $aAuthorData;
|
||||
$aParam['report'] = $aAuthorReportData;
|
||||
//获取机构
|
||||
$aCompany = $oDealFile->getCompany($aParam);
|
||||
$aParam['company'] = $aCompany;
|
||||
//获取通讯作者信息
|
||||
$aParam['corresponding'] = $oDealFile->getCorrespondingAuthors($aParam);
|
||||
//keywords 和 摘要
|
||||
$aContent = $oDealFile->extractFromWord();
|
||||
$aParam += empty($aContent['data']) ? [] : $aContent['data'];
|
||||
return json_encode(['status' => 1,'msg' => 'success','data' => $aParam]);
|
||||
}
|
||||
|
||||
// 提取文章标题
|
||||
private function getTitle(){
|
||||
$title = '';
|
||||
$maxLength = 0;
|
||||
|
||||
foreach ($this->sections as $section) {
|
||||
foreach ($section->getElements() as $element) {
|
||||
$text = $this->getTextFromElement($element);
|
||||
$length = mb_strlen(trim($text));
|
||||
if ($length > $maxLength && $length > 10) { // 标题通常较长
|
||||
$title = trim($text);
|
||||
$maxLength = $length;
|
||||
break 2; // 取第一个最长段落作为标题
|
||||
}
|
||||
}
|
||||
}
|
||||
return $title;
|
||||
}
|
||||
// 提取作者
|
||||
// private function getAuthors($aParam = []) {
|
||||
// $title = empty($aParam['title']) ? $this->getTitle() : $aParam['title'];
|
||||
// $sAuthorContent = $this->getNextParagraphAfterText($title);
|
||||
// if (empty($sAuthorContent)) {
|
||||
// return ['author' => [], 'report' => []];
|
||||
// }
|
||||
|
||||
// //编码修复
|
||||
// $possibleEncodings = [
|
||||
// 'Windows-1252', 'UTF-8', 'GBK', 'GB2312',
|
||||
// 'Latin-1', 'ISO-8859-1', 'CP1252'
|
||||
// ];
|
||||
// $encodedContent = @mb_convert_encoding($sAuthorContent, 'UTF-8', implode(',', $possibleEncodings));
|
||||
// $sAuthorContent = $encodedContent ?: $sAuthorContent;
|
||||
|
||||
// //清理不可见字符
|
||||
// $sAuthorContent = preg_replace('/[\x00-\x1F\x7F\x{200B}-\x{200F}]/u', '', $sAuthorContent);
|
||||
|
||||
// //修复特殊符号乱码
|
||||
// $symbolMap = [
|
||||
// '†' => '†', 'â ' => '†', 'â' => '†', '?†' => '†',
|
||||
// ':' => ':', ',' => ',', '—' => '-',
|
||||
// '啊' => '' // 针对性移除异常字符“啊”(若为固定乱码)
|
||||
// ];
|
||||
// $sAuthorContent = strtr($sAuthorContent, $symbolMap);
|
||||
|
||||
// //格式标准化
|
||||
// $sAuthorContent = str_replace([',', ';', ';', '、'], ',', $sAuthorContent); // 统一分隔符
|
||||
// $sAuthorContent = preg_replace('/\s+and\s+/i', ', ', $sAuthorContent); // and转逗号
|
||||
// $sAuthorContent = preg_replace('/\s+/', ' ', $sAuthorContent); // 合并多余空格
|
||||
// $sAuthorContent = trim($sAuthorContent);
|
||||
|
||||
// // 处理作者
|
||||
// $content = mb_convert_encoding($sAuthorContent, 'UTF-8', 'auto'); // 确保编码正确
|
||||
// $content = str_replace("\xC2\xA0", ' ', $content); // 替换非-breaking空格为普通空格
|
||||
// $content = preg_replace('/(\d+)\s*([*#†])/', '$1$2', $content); // 合并"1 *"为"1*"、"1 #"为"1#"
|
||||
// $content = preg_replace('/,†/', ',†', $content); // 保留"1,†"格式(防止被拆分)
|
||||
// //标记上标内的逗号+空格(多编号)
|
||||
// $tempStr = preg_replace('/(\d+)\s*,\s*(\d+)/', '$1<SEP>$2', $content);
|
||||
// // 原有步骤2:正则匹配(扩展上标符号支持,保持原有逻辑)
|
||||
// $pattern = '/
|
||||
// ([A-Za-z\s\.\-]+?) # 姓名(支持缩写、空格)
|
||||
// \s* # 姓名与上标间空格
|
||||
// ( # 上标组(扩展符号支持)
|
||||
// \d+ # 起始数字
|
||||
// (?:[†#*,]|<SEP>\d+)* # 允许:†#*符号、逗号、<SEP>+数字(兼容1,†、1,*等)
|
||||
// )
|
||||
// \s*,? # 作者间逗号(可选)
|
||||
// (?=\s|$) # 确保后面是空格或结尾
|
||||
// /ux';
|
||||
|
||||
// preg_match_all($pattern, $tempStr, $matches);
|
||||
// $authorList = [];
|
||||
// if(!empty($matches[1])){
|
||||
// foreach ($matches[1] as $i => $name) {
|
||||
// $name = trim($name);
|
||||
// $superscript = trim($matches[2][$i]);
|
||||
// $superscript = str_replace('<SEP>', ',', $superscript); // 恢复多编号逗号
|
||||
// $superscript = preg_replace('/,$/', '', $superscript); // 清理末尾逗号
|
||||
// // 修复符号与数字间的空格(如原始"1 *"被误处理为"1*"的情况,保持原样)
|
||||
// $superscript = preg_replace('/(\d)([*#†])/', '$1$2', $superscript);
|
||||
// if (!empty($name)) {
|
||||
// $authorList[] = [
|
||||
// 'name' => $name,
|
||||
// 'superscript' => $superscript
|
||||
// ];
|
||||
// }
|
||||
// }
|
||||
// }else {
|
||||
// // 按“两个或多个连续空格”拆分(姓名之间的分隔)
|
||||
// $authorList = array_filter(
|
||||
// array_map('trim',
|
||||
// preg_split('/(,\p{Z}*|\p{Z}{2,})/u', $sAuthorContent)
|
||||
// )
|
||||
// );
|
||||
// }
|
||||
|
||||
|
||||
// // //处理作者
|
||||
// // $authorList = [];
|
||||
// // // 新正则:匹配“姓名+上标”整体,允许上标含逗号(如1,†)
|
||||
// // // 逻辑:姓名以字母/中文开头,上标以数字开头、以符号/数字结尾
|
||||
// // // if (preg_match_all('/([A-Za-z\x{4e00}-\x{9fa5}][A-Za-z\s·\-\'\x{4e00}-\x{9fa5}]*)\s*([\d,†#*]+)/u', $sAuthorContent, $matches)) {
|
||||
// // if(preg_match_all('/([A-Za-z\x{4e00}-\x{9fa5}][A-Za-z\s·\-\'\x{4e00}-\x{9fa5}]*)\s*(\d[\d,†#\s*]*)/u', $sAuthorContent, $matches)){
|
||||
// // for ($i = 0; $i < count($matches[1]); $i++) {
|
||||
// // $authorList[] = trim($matches[1][$i] . $matches[2][$i]);
|
||||
// // }
|
||||
// // } else {
|
||||
// // // 按“两个或多个连续空格”拆分(姓名之间的分隔)
|
||||
// // $authorList = array_filter(
|
||||
// // array_map('trim',
|
||||
// // preg_split('/(,\p{Z}*|\p{Z}{2,})/u', $sAuthorContent)
|
||||
// // )
|
||||
// // );
|
||||
// // }
|
||||
// $aAuthorData = [];
|
||||
// $aReport = [];
|
||||
// $namePattern = '/
|
||||
// (?:[A-Za-z\s·\-\']+| # 英文姓名(支持空格、连字符)
|
||||
// [\x{4e00}-\x{9fa5}]+| # 中文姓名
|
||||
// [\x{1800}-\x{18AF}]+| # 蒙古文姓名
|
||||
// [A-Z]\.) # 单字母缩写(如 J.)
|
||||
// /ux';
|
||||
// var_dump($authorList);exit;
|
||||
// foreach ($authorList as $authorStr) {
|
||||
// if (empty($authorStr)) continue;
|
||||
// var_dump($authorList);exit;
|
||||
// //分离姓名与上标(支持上标含逗号,如1,†)
|
||||
// $superscript = '';
|
||||
// // 新正则:匹配以数字开头、含逗号/符号的完整上标(如1,†、2*#)
|
||||
// $authorStr = trim(trim($authorStr,','),' ');
|
||||
// // if (preg_match('/([\d,†#*]+)$/u', $authorStr, $supMatch)) {
|
||||
// // if(preg_match('/\s*([\d,†#* ]+)$/u', $authorStr, $supMatch)){
|
||||
// // if (preg_match('/.*?\s*([\d,†#* ]+)$/u', $authorStr, $supMatch)) {
|
||||
// // if (preg_match('/.*?\s*([\d,\x{2020}#* ]+?)\s*$/u', $authorStr, $supMatch)) {
|
||||
// // if (preg_match('/^(.+?)\D*?(\d[\d,#*†,\s]*)$/u', $authorStr, $supMatch)) {
|
||||
// // $superscript = $supMatch[1];
|
||||
// // // 移除上标,保留纯姓名(避免残留符号)
|
||||
// // $nameStr = trim(preg_replace('/' . preg_quote($superscript, '/') . '$/', '', $authorStr));
|
||||
// // } else {
|
||||
// // $nameStr = $authorStr;
|
||||
// // }
|
||||
// $pattern = '/^(.+?)\s*(\d[\d,#*†\s]*?)\s*$/u';
|
||||
// if (preg_match($pattern, $authorStr, $supMatch)) {
|
||||
// $nameStr = empty($supMatch[1]) ? '' : trim($supMatch[1]); // 姓名部分:"Liguo Zhang"
|
||||
// $superscript = empty($supMatch[2]) ? $nameStr : $nameStr.trim($supMatch[2]); // 上标部分:"1
|
||||
// // echo "姓名: $nameStr, 上标: $superscript\n";
|
||||
// } else {
|
||||
// $nameStr = $authorStr;
|
||||
// }
|
||||
// //验证姓名合法性(过滤无效内容)
|
||||
// if (!preg_match($namePattern, $nameStr)) {
|
||||
// continue;
|
||||
// }
|
||||
// //解析上标信息(正确识别1,†中的机构编号和符号)
|
||||
// $companyId = '';
|
||||
// $isSuper = 0;
|
||||
// $isReport = 0;
|
||||
// if (!empty($superscript)) {
|
||||
// // 提取机构编号(忽略上标中的逗号,如1,† → 提取1)
|
||||
// if (preg_match('/(\d+)/', $superscript, $numMatch)) {
|
||||
// $companyId = $numMatch[1];
|
||||
// }
|
||||
// // 识别特殊符号(#为超级作者,*†为通讯作者)
|
||||
// $isSuper = strpos($superscript, '#') !== false ? 1 : 0;
|
||||
// $isReport = (strpos($superscript, '*') !== false || strpos($superscript, '†') !== false) ? 1 : 0;
|
||||
// }
|
||||
// if (preg_match("/^([A-Za-z\s'\.-]+)/u", $nameStr, $match)) {
|
||||
// $nameStr = trim($match[1]);
|
||||
// }
|
||||
// $aAuthorData[] = [
|
||||
// 'name' => $nameStr,
|
||||
// 'company_id' => $companyId,
|
||||
// 'is_super' => $isSuper,
|
||||
// 'is_report' => $isReport
|
||||
// ];
|
||||
// if ($isReport) {
|
||||
// $aReport[] = $nameStr;
|
||||
// }
|
||||
// }
|
||||
// var_dump($aAuthorData);exit;
|
||||
// return ['author' => $aAuthorData,'report' => array_unique($aReport)];
|
||||
// }
|
||||
private function getAuthors($aParam = []) {
|
||||
$title = empty($aParam['title']) ? $this->getTitle() : $aParam['title'];
|
||||
$sAuthorContent = $this->getNextParagraphAfterText($title);
|
||||
if (empty($sAuthorContent)) {
|
||||
return ['author' => [], 'report' => []];
|
||||
}
|
||||
|
||||
//编码修复
|
||||
$possibleEncodings = [
|
||||
'Windows-1252', 'UTF-8', 'GBK', 'GB2312',
|
||||
'Latin-1', 'ISO-8859-1', 'CP1252'
|
||||
];
|
||||
$encodedContent = @mb_convert_encoding($sAuthorContent, 'UTF-8', implode(',', $possibleEncodings));
|
||||
$sAuthorContent = $encodedContent ?: $sAuthorContent;
|
||||
|
||||
//清理不可见字符
|
||||
$sAuthorContent = preg_replace('/[\x00-\x1F\x7F\x{200B}-\x{200F}]/u', '', $sAuthorContent);
|
||||
|
||||
//修复特殊符号乱码
|
||||
$symbolMap = [
|
||||
'†' => '†', 'â ' => '†', 'â' => '†', '?†' => '†',
|
||||
':' => ':', ',' => ',', '—' => '-',
|
||||
'啊' => '' // 针对性移除异常字符“啊”(若为固定乱码)
|
||||
];
|
||||
$sAuthorContent = strtr($sAuthorContent, $symbolMap);
|
||||
|
||||
//格式标准化
|
||||
$sAuthorContent = str_replace([',', ';', ';', '、'], ',', $sAuthorContent); // 统一分隔符
|
||||
$sAuthorContent = preg_replace('/\s+and\s+/i', ', ', $sAuthorContent); // and转逗号
|
||||
$sAuthorContent = preg_replace('/\s+/', ' ', $sAuthorContent); // 合并多余空格
|
||||
$sAuthorContent = trim($sAuthorContent);
|
||||
|
||||
// 处理作者
|
||||
$content = mb_convert_encoding($sAuthorContent, 'UTF-8', 'auto'); // 确保编码正确
|
||||
$content = str_replace("\xC2\xA0", ' ', $content); // 替换非-breaking空格为普通空格
|
||||
$content = preg_replace('/(\d+)\s*([*#†])/', '$1$2', $content); // 合并"1 *"为"1*"、"1 #"为"1#"
|
||||
$content = preg_replace('/,†/', ',†', $content); // 保留"1,†"格式(防止被拆分)
|
||||
//标记上标内的逗号+空格(多编号)
|
||||
$tempStr = preg_replace('/(\d+)\s*,\s*(\d+)/', '$1<SEP>$2', $content);
|
||||
// 原有步骤2:正则匹配(扩展上标符号支持,保持原有逻辑)
|
||||
$pattern = '/
|
||||
([A-Za-z\s\.\-]+?) # 姓名(支持缩写、空格)
|
||||
\s* # 姓名与上标间空格
|
||||
( # 上标组(扩展符号支持)
|
||||
\d+ # 起始数字
|
||||
(?:[†#*,]|<SEP>\d+)* # 允许:†#*符号、逗号、<SEP>+数字(兼容1,†、1,*等)
|
||||
)
|
||||
\s*,? # 作者间逗号(可选)
|
||||
(?=\s|$) # 确保后面是空格或结尾
|
||||
/ux';
|
||||
|
||||
preg_match_all($pattern, $tempStr, $matches);
|
||||
$authorList = [];
|
||||
if(!empty($matches[1])){
|
||||
foreach ($matches[1] as $i => $name) {
|
||||
$name = trim($name);
|
||||
$superscript = trim($matches[2][$i]);
|
||||
$superscript = str_replace('<SEP>', ',', $superscript); // 恢复多编号逗号
|
||||
$superscript = preg_replace('/,$/', '', $superscript); // 清理末尾逗号
|
||||
// 修复符号与数字间的空格(如原始"1 *"被误处理为"1*"的情况,保持原样)
|
||||
$superscript = preg_replace('/(\d)([*#†])/', '$1$2', $superscript);
|
||||
if (!empty($name)) {
|
||||
$authorList[] = [
|
||||
'name' => $name,
|
||||
'superscript' => $superscript
|
||||
];
|
||||
}
|
||||
}
|
||||
}else {
|
||||
// 按“两个或多个连续空格”拆分(姓名之间的分隔)
|
||||
$authorList = array_filter(
|
||||
array_map('trim',
|
||||
preg_split('/(,\p{Z}*|\p{Z}{2,})/u', $sAuthorContent)
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
// //处理作者
|
||||
$aAuthorData = [];
|
||||
$aReport = [];
|
||||
$namePattern = '/
|
||||
(?:[A-Za-z\s·\-\']+| # 英文姓名(支持空格、连字符)
|
||||
[\x{4e00}-\x{9fa5}]+| # 中文姓名
|
||||
[\x{1800}-\x{18AF}]+| # 蒙古文姓名
|
||||
[A-Z]\.) # 单字母缩写(如 J.)
|
||||
/ux';
|
||||
|
||||
foreach ($authorList as $authorStr){
|
||||
if (empty($authorStr)) continue;
|
||||
|
||||
//获取下标
|
||||
$superscript = empty($authorStr['superscript']) ? $authorStr : $authorStr['superscript'];
|
||||
$nameStr = empty($authorStr['name']) ? $authorStr : $authorStr['name'];
|
||||
|
||||
$companyId = [];
|
||||
$isSuper = 0;
|
||||
$isReport = 0;
|
||||
if (!empty($superscript)) {
|
||||
// 提取机构编号(忽略上标中的逗号,如1,† → 提取1)
|
||||
preg_match_all('/\d+/', $superscript, $numMatch);
|
||||
// 识别特殊符号(#为超级作者,*†为通讯作者)
|
||||
$isSuper = strpos($superscript, '#') !== false ? 1 : 0;
|
||||
$isReport = (strpos($superscript, '*') !== false || strpos($superscript, '†') !== false) ? 1 : 0;
|
||||
}
|
||||
if (preg_match("/^([A-Za-z\s'\.-]+)/u", $nameStr, $match)) {
|
||||
$nameStr = trim($match[1]);
|
||||
}
|
||||
$aAuthorData[] = [
|
||||
'name' => $nameStr,
|
||||
'company_id' => empty($numMatch[0]) ? [] : $numMatch[0],
|
||||
'is_super' => $isSuper,
|
||||
'is_report' => $isReport
|
||||
];
|
||||
if ($isReport) {
|
||||
$aReport[] = $nameStr;
|
||||
}
|
||||
}
|
||||
return ['author' => $aAuthorData,'report' => array_unique($aReport)];
|
||||
}
|
||||
|
||||
// 获取机构
|
||||
private function getCompany($aParam = []){
|
||||
//获取标题
|
||||
$title = empty($aParam['title']) ? $this->getTitle() : $aParam['title'];
|
||||
//获取标题下的作者
|
||||
$sAuthorContent = empty($aParam['authors']) ? $this->getNextParagraphAfterText($title) : $aParam['authors'];
|
||||
//获取作者结构
|
||||
$sCompany = $this->getContentAfterText($sAuthorContent);
|
||||
if(empty($sCompany)){
|
||||
return [];
|
||||
}
|
||||
//编码修复
|
||||
$possibleEncodings = [
|
||||
'Windows-1252', 'UTF-8', 'GBK', 'GB2312',
|
||||
'Latin-1', 'ISO-8859-1', 'CP1252'
|
||||
];
|
||||
$encodedContent = @mb_convert_encoding($sCompany, 'UTF-8', implode(',', $possibleEncodings));
|
||||
$sCompany = $encodedContent ?: $sCompany;
|
||||
//按行拆分,保留数字开头的行
|
||||
$sCompany = str_replace(["\r\n", "\r"], "\n", $sCompany);
|
||||
$aCompanyLines = explode("\n", $sCompany);
|
||||
$aCompanyLines = array_filter(array_map('trim', $aCompanyLines), function($line) {
|
||||
return preg_match('/^\d+/', $line); // 仅保留数字开头的行
|
||||
});
|
||||
|
||||
$aCompany = [];
|
||||
foreach ($aCompanyLines as $line) {
|
||||
if (preg_match('/^(\d+)\s*(.+)$/', $line, $match)) {
|
||||
if(empty($match[1]) || empty($match[2])){
|
||||
continue;
|
||||
}
|
||||
$aCompany[$match[1]] = ltrim(trim(ltrim($match[2]),'.'),' ');
|
||||
}
|
||||
}
|
||||
return $aCompany;
|
||||
}
|
||||
|
||||
// 提取通讯作者(含E-mail、地址、电话)
|
||||
private function getCorrespondingAuthors($aParam = []){
|
||||
$aCorrespondingAuthor = empty($aParam['report']) ? [] : $aParam['report'];
|
||||
if(empty($aCorrespondingAuthor)){
|
||||
return [];
|
||||
}
|
||||
|
||||
// 获取标题
|
||||
$title = empty($aParam['title']) ? $this->getTitle() : $aParam['title'];
|
||||
$sAuthorContent = $this->getNextParagraphAfterText($title);
|
||||
$sCompany = $this->getNextParagraphAfterText($sAuthorContent); // 直接取机构所在段落的原始文本
|
||||
if (empty($sCompany)) {
|
||||
// 备选方案:若机构段落获取失败,用解析后的机构名称拼接
|
||||
$aCompany = $this->getCompany($aParam);
|
||||
$sCompany = implode(' ', array_values($aCompany));
|
||||
}
|
||||
|
||||
// 获取机构后的完整内容
|
||||
$corrText = $this->getContentAfterText($sCompany);
|
||||
//编码修复
|
||||
$possibleEncodings = [
|
||||
'Windows-1252', 'UTF-8', 'GBK', 'GB2312',
|
||||
'Latin-1', 'ISO-8859-1', 'CP1252'
|
||||
];
|
||||
$encodedContent = @mb_convert_encoding($corrText, 'UTF-8', implode(',', $possibleEncodings));
|
||||
$corrText = $encodedContent ?: $corrText;
|
||||
// // 调试
|
||||
// file_put_contents(ROOT_PATH . 'runtime/corr_text_raw.log', $corrText);
|
||||
|
||||
//清理文本
|
||||
$corrText = str_replace([':', '@'], [':', '@'], $corrText);
|
||||
$corrText = preg_replace('/\s+/', ' ', $corrText); // 统一空格
|
||||
$corrText = str_replace(' ', ' ', $corrText); // 去除多余空格
|
||||
|
||||
//按"*"分割通讯作者
|
||||
$corrBlocks = preg_split('/\s*\*\s*/', $corrText);
|
||||
$corrBlocks = array_filter(array_map('trim', $corrBlocks));
|
||||
|
||||
$aCorresponding = [];
|
||||
foreach ($corrBlocks as $block) {
|
||||
//匹配通讯作者姓名
|
||||
$sName = $this->matchCorrespondingName($block, $aCorrespondingAuthor);
|
||||
if (empty($sName)) {
|
||||
continue;
|
||||
}
|
||||
preg_match('/(E[\s-]*mail|邮箱)[\s:]*([^\s]+@[^\s]+)/i', $block, $email);
|
||||
preg_match('/(Postal[\s-]*address|地址)[\s:]*([^,;]+)/i', $block, $address);
|
||||
preg_match('/(Tel|电话)[\s:]*([^\s]+)/i', $block, $tel);
|
||||
|
||||
$aCorresponding[] = [
|
||||
'name' => $sName,
|
||||
'email' => isset($email[2]) ? trim($email[2]) : '',
|
||||
'postal_address' => isset($address[2]) ? trim($address[2]) : '',
|
||||
'tel' => isset($tel[2]) ? trim($tel[2]) : ''
|
||||
];
|
||||
}
|
||||
return $aCorresponding;
|
||||
}
|
||||
|
||||
//匹配通讯作者姓名
|
||||
private function matchCorrespondingName($block, $corrNames)
|
||||
{
|
||||
$blockLower = strtolower($block);
|
||||
foreach ($corrNames as $name) {
|
||||
if (strpos($blockLower, strtolower($name)) !== false) {
|
||||
return $name;
|
||||
}
|
||||
$nameParts = explode(' ', $name);
|
||||
if (count($nameParts) >= 2) {
|
||||
$reversedName = implode(' ', array_reverse($nameParts));
|
||||
if (strpos($blockLower, strtolower($reversedName)) !== false) {
|
||||
return $name;
|
||||
}
|
||||
}
|
||||
}
|
||||
return '';
|
||||
}
|
||||
|
||||
// 获取目标文本的下一个段落
|
||||
private function getNextParagraphAfterText($targetText){
|
||||
|
||||
$found = false;
|
||||
foreach ($this->sections as $section) {
|
||||
foreach ($section->getElements() as $element) {
|
||||
$text = $this->getTextFromElement($element);
|
||||
if(empty($text)){
|
||||
continue;
|
||||
}
|
||||
if ($found) {
|
||||
return $text;
|
||||
}
|
||||
if (stripos($text, $targetText) !== false) {
|
||||
$found = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return '';
|
||||
}
|
||||
|
||||
// 获取目标文本后的所有内容
|
||||
private function getContentAfterText($targetText){
|
||||
$found = false;
|
||||
$content = [];
|
||||
$stopKeywords = ['关键词', 'Key words', '摘要', 'Abstract'];
|
||||
$maxLines = 200;
|
||||
$lineNumber = 0;
|
||||
foreach ($this->sections as $section) {
|
||||
|
||||
foreach ($section->getElements() as $element) {
|
||||
|
||||
$lineNumber++;
|
||||
if (count($content) >= $maxLines) break;
|
||||
|
||||
$text = $this->getTextFromElement($element,$lineNumber);
|
||||
$text = trim($text);
|
||||
if (empty($text)) continue;
|
||||
if (!$found) {
|
||||
// 移除所有非字母数字字符后匹配
|
||||
$cleanTarget = preg_replace('/[^a-zA-Z0-9]/', '', strtolower($targetText));
|
||||
$cleanText = preg_replace('/[^a-zA-Z0-9]/', '', strtolower($text));
|
||||
// 只要目标文本的50%以上能匹配即可
|
||||
if (strlen($cleanTarget) > 0 && similar_text($cleanText, $cleanTarget) / strlen($cleanTarget) > 0.5) {
|
||||
$found = true;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// 检查停止关键词
|
||||
$shouldStop = false;
|
||||
foreach ($stopKeywords as $kw) {
|
||||
if (stripos($text, $kw) !== false) {
|
||||
$shouldStop = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ($shouldStop) break;
|
||||
|
||||
$content[] = $text;
|
||||
}
|
||||
if (count($content) >= $maxLines || (isset($shouldStop) && $shouldStop)) break;
|
||||
}
|
||||
return implode("\n", $content);
|
||||
}
|
||||
|
||||
// 统一提取元素文本
|
||||
private function getTextFromElement($element,$lineNumber = 0){
|
||||
$text = '';
|
||||
// 处理PreserveText元素
|
||||
if ($element instanceof \PhpOffice\PhpWord\Element\PreserveText) {
|
||||
// 通过反射获取私有属性 text
|
||||
$reflection = new \ReflectionClass($element);
|
||||
$property = $reflection->getProperty('text');
|
||||
$property->setAccessible(true);
|
||||
$textParts = $property->getValue($element);
|
||||
foreach ($textParts as $part) {
|
||||
if (strpos($part, 'HYPERLINK') !== false) {
|
||||
// 解码 HTML 实体(" -> ")
|
||||
$decoded = html_entity_decode($part);
|
||||
// 提取 mailto: 后的邮箱
|
||||
if (preg_match('/mailto:([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})/i', $decoded, $match)) {
|
||||
$text .= $match[1] . ' ';
|
||||
}
|
||||
} else {
|
||||
// 普通文本直接拼接
|
||||
$text .= $part;
|
||||
}
|
||||
}
|
||||
return $text;
|
||||
}
|
||||
// 处理表格和单元格(E-mail可能在表格中)
|
||||
if ($element instanceof \PhpOffice\PhpWord\Element\Table) {
|
||||
foreach ($element->getRows() as $row) {
|
||||
foreach ($row->getCells() as $cell) {
|
||||
$text .= $this->getTextFromElement($cell);
|
||||
}
|
||||
}
|
||||
return $text;
|
||||
}
|
||||
if ($element instanceof \PhpOffice\PhpWord\Element\Cell) {
|
||||
foreach ($element->getElements() as $child) {
|
||||
$text .= $this->getTextFromElement($child);
|
||||
}
|
||||
return $text;
|
||||
}
|
||||
|
||||
//处理嵌套元素(递归提取所有子元素)
|
||||
if (method_exists($element, 'getElements')) {
|
||||
foreach ($element->getElements() as $child) {
|
||||
$text .= $this->getTextFromElement($child);
|
||||
}
|
||||
}
|
||||
|
||||
//处理文本元素(包括带格式的文本)
|
||||
if ($element instanceof \PhpOffice\PhpWord\Element\Text) {
|
||||
$text .= $element->getText();
|
||||
}
|
||||
|
||||
//处理超链接(优先提取链接目标,可能是邮箱)
|
||||
if ($element instanceof \PhpOffice\PhpWord\Element\Link) {
|
||||
$target = $element->getTarget();
|
||||
if (strpos($target, 'mailto:') === 0) {
|
||||
$text .= str_replace('mailto:', '', $target) . ' '; // 剥离mailto:前缀
|
||||
}
|
||||
$text .= $element->getText() . ' ';
|
||||
}
|
||||
|
||||
//处理字段和注释(可能包含隐藏邮箱)
|
||||
if ($element instanceof \PhpOffice\PhpWord\Element\Field) {
|
||||
$text .= $element->getContent() . ' ';
|
||||
}
|
||||
if ($element instanceof \PhpOffice\PhpWord\Element\Note) {
|
||||
$text .= $element->getContent() . ' ';
|
||||
}
|
||||
//清理所有不可见字符(关键:移除格式干扰)
|
||||
$text = preg_replace('/[\x00-\x1F\x7F-\x9F]/', ' ', $text); // 移除控制字符
|
||||
$text = str_replace(["\t", "\r", "\n"], ' ', $text); // 统一空白字符
|
||||
$text = preg_replace('/\s+/', ' ', $text); // 合并多个空格
|
||||
|
||||
return $text;
|
||||
}
|
||||
|
||||
/**
|
||||
* 从 Word 文档提取摘要和关键词
|
||||
* @return array 提取结果
|
||||
*/
|
||||
public function extractFromWord() {
|
||||
$sContent = '';
|
||||
//文本处理
|
||||
$sFundContent = '';
|
||||
foreach ($this->sections as $section) {
|
||||
foreach ($section->getElements() as $element) {
|
||||
$textContent = $this->getTextFromElement($element);
|
||||
if(empty($textContent)){
|
||||
continue;
|
||||
}
|
||||
//编码修复
|
||||
$possibleEncodings = [
|
||||
'Windows-1252', 'UTF-8', 'GBK', 'GB2312',
|
||||
'Latin-1', 'ISO-8859-1', 'CP1252'
|
||||
];
|
||||
$sContent .= @mb_convert_encoding($textContent, 'UTF-8', implode(',', $possibleEncodings));
|
||||
if(stripos($textContent, 'Keywords:') !== false){
|
||||
$sContent .= "Keywords-End-Flag";
|
||||
}
|
||||
if(empty($sFundContent)){
|
||||
$aFund = $this->getMatchedFundPhrases($sContent);
|
||||
if(!empty($aFund[0])){
|
||||
$position = stripos($sContent, $aFund[0]);
|
||||
$sFundContent = substr($sContent, $position);
|
||||
$sFundContent = trim(str_ireplace($aFund[0], '', $sFundContent));
|
||||
if (preg_match('/^(.*?)Peer review/', $sFundContent, $matches)) {
|
||||
$sFundContent = $matches[1]; // 提取匹配到的前置内容
|
||||
}
|
||||
}
|
||||
}
|
||||
$sContent .= "\n";
|
||||
}
|
||||
}
|
||||
// 2. 基础文本清理(合并多余空格,保留有效换行)
|
||||
$textContent = preg_replace('/(\S)\s+/', '$1 ', $sContent);
|
||||
$textContent = trim($textContent);
|
||||
|
||||
// 3. 提取摘要
|
||||
$abstract = '';
|
||||
$abstractPattern = '/Abstract\s*([\s\S]*?)(?=Keywords:|$)/i';
|
||||
if (preg_match($abstractPattern, $textContent, $abstractMatches)) {
|
||||
$abstract = trim($abstractMatches[1]);
|
||||
$abstract = preg_replace('/\n+/', ' ', $abstract);
|
||||
}
|
||||
// 4. 提取关键词(核心:仅保留两种强制匹配逻辑)
|
||||
$keywords = [];
|
||||
// $keywordPattern = '/Keywords:\s*([\s\S]*?)(?=\s*\d+\.|[;,]\s*[\r\n]+\s*[\r\n]+|(?i)\bintroduction|abbreviations\b|$)/i';
|
||||
$keywordPattern = '/Keywords:\s*(.*?)\s*Keywords-End-Flag/s';
|
||||
if (preg_match($keywordPattern, $textContent, $keywordMatches)) {
|
||||
$keywordStr = trim($keywordMatches[1]);
|
||||
|
||||
// 清理关键词列表格式(去除换行、末尾多余符号)
|
||||
$keywordStr = preg_replace('/\n+/', ' ', $keywordStr);
|
||||
$keywordStr = rtrim($keywordStr, ';,. '); // 去除末尾分号、逗号等
|
||||
$keywordStr = trim($keywordStr);
|
||||
|
||||
// 分割并过滤有效关键词
|
||||
$keywords = preg_split('/[,;]\s*/', $keywordStr);
|
||||
$keywords = array_filter(array_map('trim', $keywords), function($item) {
|
||||
return !empty($item) && !ctype_space($item);
|
||||
});
|
||||
}
|
||||
return [
|
||||
'status' => 1,
|
||||
'msg' => '提取成功',
|
||||
'data' => [
|
||||
'abstrart' => $abstract,
|
||||
'keywords' => $keywords,
|
||||
'fund' => $sFundContent
|
||||
]
|
||||
];
|
||||
}
|
||||
private function getMatchedFundPhrases($content = '') {
|
||||
if (empty($content)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
// 基金支持词组列表
|
||||
$fundPhrases = [
|
||||
'Supported by', 'Funded by', 'Sponsored by', 'Supported in part by',
|
||||
'Funding was provided by', 'Funded in part by'
|
||||
];
|
||||
|
||||
// 1. 转义词组中的特殊字符,使用 # 作为分隔符
|
||||
$escapedPhrases = array_map(function($phrase) {
|
||||
return preg_quote($phrase, '#');
|
||||
}, $fundPhrases);
|
||||
|
||||
// 2. 拼接为正则模式:匹配任意一个词组(保留原始词组的捕获)
|
||||
$pattern = '#('.implode('|', $escapedPhrases).')#i';
|
||||
// 注意:此处用 () 捕获分组,而非 (?:),用于提取匹配到的具体词组
|
||||
|
||||
// 3. 全局匹配所有符合的词组
|
||||
preg_match_all($pattern, $content, $matches);
|
||||
|
||||
// 4. 处理结果:去重、保留原始词组格式(忽略大小写导致的变体)
|
||||
$matched = [];
|
||||
if (!empty($matches[1])) {
|
||||
// 遍历匹配到的结果(可能包含大小写变体,如 'funded by')
|
||||
foreach ($matches[1] as $match) {
|
||||
// 与原始词组列表比对,找到完全匹配的原始词组(忽略大小写)
|
||||
foreach ($fundPhrases as $original) {
|
||||
if (strcasecmp($match, $original) === 0) {
|
||||
$matched[] = $original;
|
||||
break; // 找到后跳出内层循环,避免重复
|
||||
}
|
||||
}
|
||||
}
|
||||
// 去重并保持原始顺序
|
||||
$matched = array_values(array_unique($matched));
|
||||
}
|
||||
|
||||
return $matched;
|
||||
}
|
||||
//日志打印
|
||||
private function log($msg){
|
||||
// echo date('[Y-m-d H:i:s] ') . $msg . "\n";
|
||||
}
|
||||
}
|
||||
13
application/common/ReviewerToJournal.php
Normal file
13
application/common/ReviewerToJournal.php
Normal file
@@ -0,0 +1,13 @@
|
||||
<?php
|
||||
namespace app\common;
|
||||
|
||||
use think\Model;
|
||||
|
||||
class ReviewerToJournal extends Model
|
||||
{
|
||||
// 绑定数据表(如果表名与模型名不一致需要指定)
|
||||
protected $table = 't_reviewer_to_journal';
|
||||
|
||||
// 允许批量赋值的字段(必须设置,否则批量更新会失败)
|
||||
protected $fillable = ['reviewer_id','journal_id','account','grade','is_yboard','ctime','state'];
|
||||
}
|
||||
13
application/common/UserReviewerInfo.php
Normal file
13
application/common/UserReviewerInfo.php
Normal file
@@ -0,0 +1,13 @@
|
||||
<?php
|
||||
namespace app\common;
|
||||
|
||||
use think\Model;
|
||||
|
||||
class UserReviewerInfo extends Model
|
||||
{
|
||||
// 绑定数据表(如果表名与模型名不一致需要指定)
|
||||
protected $table = 't_user_reviewer_info';
|
||||
|
||||
// 允许批量赋值的字段(必须设置,否则批量更新会失败)
|
||||
protected $fillable = ['reviewer_info_id','reviewer_id','major','country','test_from'];
|
||||
}
|
||||
Reference in New Issue
Block a user