修改自动推广的相关任务

This commit is contained in:
wangjinlei
2026-05-13 12:26:28 +08:00
parent c36eba77b1
commit fa878334cd
7 changed files with 289 additions and 29 deletions

3
.env
View File

@@ -33,6 +33,9 @@ UNSUBSCRIBE_BASE_URL=https://submission.tmrjournals.com/api/Unsubscribe/index
[yboard] [yboard]
APPLY_URL="https://submission.tmrjournals.com/youthBoardRegister" APPLY_URL="https://submission.tmrjournals.com/youthBoardRegister"
[plagiarism]
static_root="/home/wwwroot/api.tmrjournals.com/public"
[journal] [journal]
;官网服务器地址 ;官网服务器地址
base_url = http://journalapi.tmrjournals.com/public/index.php base_url = http://journalapi.tmrjournals.com/public/index.php

View File

@@ -51,7 +51,7 @@ class Plagiarism extends Base
$localPath = $fileUrl !== '' $localPath = $fileUrl !== ''
? $svc->resolveFileUrlToLocal($fileUrl) ? $svc->resolveFileUrlToLocal($fileUrl)
: $svc->locateArticleManuscript($articleId); : $svc->locateArticleManuscript($articleId);
echo $localPath;
$checkId = $svc->submit($articleId, $localPath, $editorId, 'manual'); $checkId = $svc->submit($articleId, $localPath, $editorId, 'manual');
return jsonSuccess(['check_id' => $checkId]); return jsonSuccess(['check_id' => $checkId]);
} catch (\Throwable $e) { } catch (\Throwable $e) {
@@ -59,6 +59,15 @@ class Plagiarism extends Base
} }
} }
public function testccone(){
$svc = new PlagiarismService();
$checkId = 9;
$filePath = "/home/wwwroot/api.tmrjournals.com/public/manuscirpt/20260509/6832a56e8ace38fe99df390ab5221deb.docx";
$svc->runUploadAndTrigger($checkId,$filePath);
}
/** /**
* 重试 = 提交一次新查重(保留历史) * 重试 = 提交一次新查重(保留历史)
*/ */

View File

@@ -6,6 +6,7 @@ use think\Db;
use think\Env; use think\Env;
use think\Queue; use think\Queue;
use think\Validate; use think\Validate;
use app\common\CrossrefService;
class Preaccept extends Base class Preaccept extends Base
{ {
@@ -708,36 +709,66 @@ class Preaccept extends Base
} }
/**
* 通过 DOI 获取文献元数据Crossref REST API
*
* POST 参数:
* doi 必填,可为纯 DOI10.xxxx/...)或 https://doi.org/10.xxxx/...
*
* 返回 data.formate 与旧版字段兼容: author, title, joura, dateno, doilink
* 另附 data.crossref: 原始摘要字段(不含 raw message避免体积过大
*/
public function searchDoi() public function searchDoi()
{ {
$data = $this->request->post(); $data = $this->request->post();
$rule = new Validate([ $rule = new Validate([
"doi" => "require" 'doi' => 'require',
]); ]);
if (!$rule->check($data)) { if (!$rule->check($data)) {
return jsonError($rule->getError()); return jsonError($rule->getError());
} }
$doi = str_replace('/', '%2F', $data['doi']);
// $url = "https://citation.crosscite.org/format?doi=$doi&style=cancer-translational-medicine&lang=en-US"; $doiInput = trim((string)$data['doi']);
$url = "https://citation.doi.org/format?doi=$doi&style=cancer-translational-medicine&lang=en-US"; if ($doiInput === '') {
$res = myGet($url); return jsonError('doi empty');
$frag = trim(substr($res, strpos($res, '.') + 1));
if ($frag == "") {
return jsonError("not find");
} }
if (mb_substr_count($frag, '.') != 3) { // 去掉 URL 前缀,得到裸 DOI
return jsonError("formate fail"); $doiNorm = preg_replace('#^https?://(dx\.)?doi\.org/#i', '', $doiInput);
$doiNorm = trim($doiNorm, " \t\n\r\0\x0B/");
$svc = new CrossrefService([
'mailto' => trim((string)Env::get('crossref_mailto', '')),
]);
$summary = $svc->fetchWorkSummary($doiNorm);
if ($summary === null || empty($summary['doi'])) {
return jsonError('DOI not found or invalid (Crossref)');
} }
$res = explode('.', $frag);
$f['author'] = prgeAuthor($res[0]); $title = trim((string)($summary['title'] ?? ''));
$f['title'] = trim($res[1]); $jouraRaw = trim((string)($summary['joura'] ?? ''));
$bj = bekjournal($res[2]); $authorStr = trim((string)($summary['author_str'] ?? ''));
$joura = formateJournal(trim($bj[0])); $dateno = trim((string)($summary['dateno'] ?? ''));
$f['joura'] = $joura; $doilink = trim((string)($summary['doilink'] ?? ''));
$f['dateno'] = str_replace(' ', '', str_replace('-', '', trim($bj[1]))); if ($doilink === '') {
$f['doilink'] = strpos($data['doi'], "http") === false ? "http://doi.org/" . $data['doi'] : $data['doi']; $doilink = 'https://doi.org/' . $summary['doi'];
$re['formate'] = $f; }
return jsonSuccess($re);
$f = [
'author' => $authorStr !== '' ? prgeAuthor($authorStr) : '',
'title' => $title,
'joura' => $jouraRaw !== '' ? formateJournal($jouraRaw) : '',
'dateno' => str_replace(' ', '', str_replace('-', '', $dateno)),
'doilink' => $doilink,
];
$crossrefOut = $summary;
unset($crossrefOut['raw']);
return jsonSuccess([
'formate' => $f,
'crossref' => $crossrefOut,
'doi' => $summary['doi'],
]);
} }

View File

@@ -10,6 +10,7 @@ use think\Db;
use think\Queue; use think\Queue;
use think\Validate; use think\Validate;
use think\log; use think\log;
use app\common\ArticleSymbolNormalizer;
/** /**
* @title 公共管理相关 * @title 公共管理相关
@@ -1380,6 +1381,10 @@ class Production extends Base
return $html; return $html;
} }
public function testsym(){
ArticleSymbolNormalizer::normalize("");
}
public function doTypeSettingNew() public function doTypeSettingNew()
{ {
@@ -1399,7 +1404,7 @@ class Production extends Base
$editor_info = $this->user_obj->where('user_id', $journal_info['editor_id'])->find(); $editor_info = $this->user_obj->where('user_id', $journal_info['editor_id'])->find();
$typesetInfo = []; $typesetInfo = [];
$typesetInfo['info_title'] = $p_info['title']; $typesetInfo['info_title'] = ArticleSymbolNormalizer::normalize($p_info['title']);
$typesetInfo['info_type'] = $p_info['type']; $typesetInfo['info_type'] = $p_info['type'];
$typesetInfo['doi'] = $p_info['doi']; $typesetInfo['doi'] = $p_info['doi'];
$typesetInfo['topic'] = ''; $typesetInfo['topic'] = '';

View File

@@ -29,6 +29,7 @@ class PlagiarismRun
return; return;
} }
$svc = new PlagiarismService(); $svc = new PlagiarismService();
$svc->log("PlagiarismRun job act!!");
$svc->runUploadAndTrigger($checkId, $filePath); $svc->runUploadAndTrigger($checkId, $filePath);
$job->delete(); $job->delete();
} }

View File

@@ -0,0 +1,194 @@
<?php
namespace app\common;
/**
* 期刊文章内容「符号层」校对:只调整标点、空白、全角半角等,不增删语义文字。
*
* 设计原则:
* - 默认规则保守,可通过 $options 逐项关闭;
* - 纯文本用 normalize();含 HTML 时用 normalizeHtml()(仅处理标签之间的文本段,避免破坏属性里的 URL
* - Abstract 常用:存储时被转义为 &gt; &lt; &amp; 等,可用 normalizeAbstract() 先解码再符号校对。
* - 英文期刊正文/摘要通常不含中文:设 english_journal=true或 normalizeEnglishAbstract可关闭仅针对汉字的规则。
*/
class ArticleSymbolNormalizer
{
/** @var string 常用汉字 BMP 段(含扩展 A 前部,足够覆盖正文) */
private static $han = '\x{4E00}-\x{9FFF}\x{3400}-\x{4DBF}';
/**
* 纯文本符号校对。
*
* @param string $text
* @param array $options 可选键(均为 bool默认 true
* - line_endings CRLF / CR → LF
* - fullwidth_space U+3000 全角空格 → 普通空格
* - collapse_spaces 连续半角空格(不含换行)压成单个空格
* - remove_zwsp 删除零宽空格等不可见格式字符(不改变可见字)
* - comma_cjk 两个汉字之间的英文逗号「,」→「,」
* - comma_latin 两个 ASCII 字母/数字之间的全角逗号「,」→「,」
* - period_cjk 汉字后的全角句点「.」(U+FF0E) →「。」
* - bracket_latin 仅由 ASCII 标识包裹时「()」→「()」(如 (a) 类简单情形,保守:仅当括号内全为 ASCII
* - decode_html_entities 将 &gt; &lt; &amp; &quot; &#39; 及数字实体等转为真实字符(默认 falseabstract 见 normalizeAbstract
* - english_journal 英文期刊:关闭「两汉字间英文逗号→,」「汉字后 FF0E→。」等中文专用规则默认 false见 normalizeEnglishAbstract
*
* @return string
*/
public static function normalize($text, array $options = [])
{
$text = (string)$text;
if ($text === '') {
return '';
}
$o = array_merge([
'line_endings' => true,
'fullwidth_space' => true,
'collapse_spaces' => true,
'remove_zwsp' => true,
'comma_cjk' => true,
'comma_latin' => true,
'period_cjk' => true,
'bracket_latin' => false,
'decode_html_entities' => false,
'english_journal' => false,
], $options);
if (!empty($o['english_journal'])) {
if (!array_key_exists('comma_cjk', $options)) {
$o['comma_cjk'] = false;
}
if (!array_key_exists('period_cjk', $options)) {
$o['period_cjk'] = false;
}
}
if (!empty($o['decode_html_entities'])) {
$text = html_entity_decode($text, ENT_QUOTES | ENT_HTML5, 'UTF-8');
}
if (!empty($o['line_endings'])) {
$text = str_replace(["\r\n", "\r"], "\n", $text);
}
if (!empty($o['fullwidth_space'])) {
$text = str_replace("\u{3000}", ' ', $text);
}
if (!empty($o['remove_zwsp'])) {
// 零宽空格、零宽非断空格、BOM、软连字符等不改变可见字符
$text = preg_replace('/[\x{200B}-\x{200D}\x{FEFF}\x{00AD}]/u', '', $text);
}
if (!empty($o['collapse_spaces'])) {
$text = preg_replace('/[ \t]{2,}/u', ' ', $text);
}
$han = self::$han;
if (!empty($o['comma_cjk'])) {
// 汉字 , 汉字 → 汉字 汉字
$text = preg_replace('/(?<=[' . $han . ']),(?=[' . $han . '])/u', '', $text);
}
if (!empty($o['comma_latin'])) {
// 字母/数字 字母/数字 → ,
$text = preg_replace('/(?<=[0-9A-Za-z])(?=[0-9A-Za-z])/u', ',', $text);
}
if (!empty($o['period_cjk'])) {
// 汉字后的全角英文句点 FF0E → 中文句号 。
$text = preg_replace('/(?<=[' . $han . '])/u', '。', $text);
}
if (!empty($o['bracket_latin'])) {
// 仅 ASCII + 常见标点 + 空格
$text = preg_replace_callback(
'/([0-9A-Za-z\s\.,;:\-\+/=]+)/u',
static function ($m) {
return '(' . $m[1] . ')';
},
$text
);
}
return $text;
}
/**
* 对 HTML 片段做符号校对:只替换「标签外」的文本,不修改标签名与属性值。
*
* 实现:按 `<...>` 切分,对偶数段(文本)调用 normalize(),奇数段(标签)原样保留。
* 注意:畸形 HTML、属性值中含未转义 `<` 时可能误判,复杂场景请先抽纯文本再校对。
*
* @param string $html
* @param array $options 同 normalize()
* @return string
*/
public static function normalizeHtml($html, array $options = [])
{
$html = (string)$html;
if ($html === '') {
return '';
}
$parts = preg_split('/(<[^>]*>)/u', $html, -1, PREG_SPLIT_DELIM_CAPTURE);
if ($parts === false) {
return self::normalize($html, $options);
}
$out = '';
foreach ($parts as $i => $chunk) {
if ($chunk === '') {
continue;
}
// 偶数索引为文本,奇数索引且以 < 开头为标签
if ($i % 2 === 1 && isset($chunk[0]) && $chunk[0] === '<') {
$out .= $chunk;
} else {
$out .= self::normalize($chunk, $options);
}
}
return $out;
}
/**
* Abstract 专用:先 HTML 实体解码(&gt; → > 等),再执行与普通正文相同的符号校对。
*
* 适用于摘要字段在库中/接口中以 htmlspecialchars 形式存储的场景。
* 若摘要内本身含真实 HTML 标签且需保留标签结构,请改用 normalizeHtml() 并自行传入 decode_html_entities。
*
* @param string $abstract
* @param array $options 同 normalize(),默认会合并 decode_html_entities=true可被显式 false 覆盖)
* @return string
*/
public static function normalizeAbstract($abstract, array $options = [])
{
$opts = array_merge(['decode_html_entities' => true], $options);
return self::normalize($abstract, $opts);
}
/**
* 带 HTML 标签的摘要:仅在「标签外文本」中做实体解码 + 符号校对,不改动标签与属性。
*
* @param string $html
* @param array $options 同 normalize(),默认 decode_html_entities=true
* @return string
*/
public static function normalizeAbstractHtml($html, array $options = [])
{
$opts = array_merge(['decode_html_entities' => true], $options);
return self::normalizeHtml($html, $opts);
}
/**
* 英文期刊 Abstract实体解码 + 符号校对,且默认关闭中文专用标点规则。
*/
public static function normalizeEnglishAbstract($abstract, array $options = [])
{
return self::normalizeAbstract($abstract, array_merge(['english_journal' => true], $options));
}
/**
* 英文期刊、带 HTML 的摘要(标签外文本):实体解码 + 符号校对,且默认关闭中文专用规则。
*/
public static function normalizeEnglishAbstractHtml($html, array $options = [])
{
return self::normalizeAbstractHtml($html, array_merge(['english_journal' => true], $options));
}
}

View File

@@ -34,6 +34,13 @@ class PlagiarismService
*/ */
const MAX_POLL_ATTEMPTS = 60; const MAX_POLL_ATTEMPTS = 60;
private $logFile;
public function __construct()
{
$this->logFile = ROOT_PATH . 'runtime' . DS . 'plagiarism_task.log';
}
// ---------- 顶层入口 ---------- // ---------- 顶层入口 ----------
/** /**
@@ -55,6 +62,7 @@ class PlagiarismService
->where('article_id', $articleId) ->where('article_id', $articleId)
->value('journal_id'); ->value('journal_id');
$now = time(); $now = time();
$checkId = Db::name('plagiarism_check')->insertGetId([ $checkId = Db::name('plagiarism_check')->insertGetId([
'article_id' => $articleId, 'article_id' => $articleId,
@@ -67,12 +75,12 @@ class PlagiarismService
'ctime' => $now, 'ctime' => $now,
'utime' => $now, 'utime' => $now,
]); ]);
$this->log("submit service act");
// 入队执行:上传 + 触发 similarity // 入队执行:上传 + 触发 similarity
Queue::push( Queue::push(
'app\\api\\job\\PlagiarismRun', 'app\\api\\job\\PlagiarismRun',
['check_id' => $checkId, 'file_path' => $filePath], ['check_id' => $checkId, 'file_path' => $filePath],
'plagiarism' 'PlagiarismRun'
); );
return (int)$checkId; return (int)$checkId;
@@ -84,7 +92,7 @@ class PlagiarismService
public function runUploadAndTrigger($checkId, $filePath) public function runUploadAndTrigger($checkId, $filePath)
{ {
$check = $this->mustGetCheck($checkId); $check = $this->mustGetCheck($checkId);
$this->log("runUploadAndTrigger is act0");
try { try {
$tii = new TurnitinService(); $tii = new TurnitinService();
@@ -95,7 +103,7 @@ class PlagiarismService
if ($articleTitle === '') { if ($articleTitle === '') {
$articleTitle = 'Article #' . $check['article_id']; $articleTitle = 'Article #' . $check['article_id'];
} }
$this->log("runUploadAndTrigger is act1");
$createResp = $tii->createSubmission([ $createResp = $tii->createSubmission([
'title' => mb_substr($articleTitle, 0, 250), 'title' => mb_substr($articleTitle, 0, 250),
'owner' => 'editor_' . $check['triggered_by'], 'owner' => 'editor_' . $check['triggered_by'],
@@ -114,7 +122,7 @@ class PlagiarismService
'tii_submission_id' => $submissionId, 'tii_submission_id' => $submissionId,
'raw_response' => json_encode($createResp, JSON_UNESCAPED_UNICODE), 'raw_response' => json_encode($createResp, JSON_UNESCAPED_UNICODE),
]); ]);
$this->log("runUploadAndTrigger is act2");
// 2. 上传文件 // 2. 上传文件
$tii->uploadFile($submissionId, $filePath, basename($filePath)); $tii->uploadFile($submissionId, $filePath, basename($filePath));
@@ -127,12 +135,14 @@ class PlagiarismService
'raw_response' => json_encode($simResp, JSON_UNESCAPED_UNICODE), 'raw_response' => json_encode($simResp, JSON_UNESCAPED_UNICODE),
]); ]);
$this->log("runUploadAndTrigger is act3");
// 4. 排队首次轮询(晚一点开始,让 Turnitin 先处理) // 4. 排队首次轮询(晚一点开始,让 Turnitin 先处理)
Queue::later( Queue::later(
self::POLL_INTERVAL, self::POLL_INTERVAL,
'app\\api\\job\\PlagiarismPoll', 'app\\api\\job\\PlagiarismPoll',
['check_id' => $checkId, 'attempt' => 1], ['check_id' => $checkId, 'attempt' => 1],
'plagiarism' 'PlagiarismPoll'
); );
} catch (\Throwable $e) { } catch (\Throwable $e) {
$this->markFailed($checkId, '[upload] ' . $e->getMessage()); $this->markFailed($checkId, '[upload] ' . $e->getMessage());
@@ -320,6 +330,7 @@ class PlagiarismService
private function markFailed($checkId, $errMsg) private function markFailed($checkId, $errMsg)
{ {
$this->log("markFailed act");
$this->updateCheck($checkId, [ $this->updateCheck($checkId, [
'state' => 4, 'state' => 4,
'error_msg' => mb_substr($errMsg, 0, 1000), 'error_msg' => mb_substr($errMsg, 0, 1000),
@@ -337,7 +348,7 @@ class PlagiarismService
$row = Db::name('article_file') $row = Db::name('article_file')
->where('article_id', $articleId) ->where('article_id', $articleId)
->where('type_name', 'manuscirpt') // 历史拼写 ->where('type_name', 'manuscirpt') // 历史拼写
->order('article_file_id desc') ->order('file_id desc')
->find(); ->find();
if (!$row || empty($row['file_url'])) { if (!$row || empty($row['file_url'])) {
throw new Exception("article #{$articleId} has no manuscirpt file"); throw new Exception("article #{$articleId} has no manuscirpt file");
@@ -420,4 +431,10 @@ class PlagiarismService
{ {
return Db::name('plagiarism_check')->where('check_id', $checkId)->find(); return Db::name('plagiarism_check')->where('check_id', $checkId)->find();
} }
public function log($msg)
{
$line = date('Y-m-d H:i:s') . ' ' . $msg . PHP_EOL;
@file_put_contents($this->logFile, $line, FILE_APPEND);
}
} }