From ab7f08b06cf4301a34c071000c834dfe4105216c Mon Sep 17 00:00:00 2001 From: wangjinlei <751475802@qq.com> Date: Mon, 25 May 2026 13:20:00 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BD=9C=E8=80=85=E7=9A=84ai=E6=80=BB=E7=BB=93?= =?UTF-8?q?field?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .env | 6 + application/api/controller/Article.php | 4 + application/api/controller/Email.php | 1 - application/api/controller/Plagiarism.php | 10 +- application/api/job/mail.php | 2 +- application/common.php | 2 +- application/common/PlagiarismService.php | 13 +- application/common/TurnitinService.php | 238 +++++++++++++++++++--- 8 files changed, 245 insertions(+), 31 deletions(-) diff --git a/.env b/.env index 62277e29..258571d4 100644 --- a/.env +++ b/.env @@ -46,6 +46,12 @@ APPLY_URL="https://submission.tmrjournals.com/youthBoardRegister" [turnitin] viewer_permission_set=ADMINISTRATOR viewer_locale=en-US +; 与 Crossref 网页手动查重对齐:三项默认 0(不排除引用/参考文献/引文)。若只要正文相似度可改为 1 +exclude_quotes=0 +exclude_bibliography=0 +exclude_citations=0 +; 在线报告默认视图:all_sources=按来源库分类(与 Crossref 手动后台一致);match_overview=匹配总览(文中编号易都显示为 1) +viewer_default_mode=all_sources [plagiarism] static_root="/home/wwwroot/api.tmrjournals.com/public" diff --git a/application/api/controller/Article.php b/application/api/controller/Article.php index e47a0473..39572f85 100644 --- a/application/api/controller/Article.php +++ b/application/api/controller/Article.php @@ -598,6 +598,10 @@ class Article extends Base $article_res['is_draft'] = 1; } } + //新增是否存在生产实例 20260204 start + $article_res['has_produce'] = $this->production_article_obj->where('article_id', $data['articleId'])->where('state', 0)->find()?1:0; + + //新增是否是草稿删除 20260204 end return json(['article' => $article_res, 'msg' => $article_msg, 'authors' => $author_res, 'suggest' => $suggest, 'transfer' => $transfer_res, 'transinfo' => $transfer_info, "major" => $major,'suggest_final' => $aFinal]); } diff --git a/application/api/controller/Email.php b/application/api/controller/Email.php index c1f08261..6b5a2d6a 100644 --- a/application/api/controller/Email.php +++ b/application/api/controller/Email.php @@ -115,7 +115,6 @@ class Email extends Base public function pushEmailOnTemplate() { - die(); $data = $this->request->post(); $rule = new Validate([ diff --git a/application/api/controller/Plagiarism.php b/application/api/controller/Plagiarism.php index c76965bd..9400c92b 100644 --- a/application/api/controller/Plagiarism.php +++ b/application/api/controller/Plagiarism.php @@ -151,7 +151,15 @@ class Plagiarism extends Base if (!$row) { return jsonError('not found'); } - return jsonSuccess($this->formatRow($row)); + $out = $this->formatRow($row); + if (!empty($row['raw_response'])) { + $raw = json_decode($row['raw_response'], true); + if (is_array($raw)) { + $out['similarity_meta'] = \app\common\TurnitinService::parseSimilarityReportMeta($raw); + } + } + $out['report_view_hint'] = 'PDF 多为 Match Overview 汇总样式;按来源库(Internet/Publication/Crossref)分类请用 getReportUrl 打开在线报告并切到 All Sources'; + return jsonSuccess($out); } /** diff --git a/application/api/job/mail.php b/application/api/job/mail.php index bbb23ccf..39f96c23 100644 --- a/application/api/job/mail.php +++ b/application/api/job/mail.php @@ -15,7 +15,7 @@ class mail { public function tgpu(Job $job, $data){ -// my_tg_pushmail($data); + my_tg_pushmail($data); $job->delete(); } diff --git a/application/common.php b/application/common.php index f4fb3041..31dd9c06 100644 --- a/application/common.php +++ b/application/common.php @@ -915,7 +915,7 @@ function prgeAuthor($author) function my_tg_pushmail($data) { -// $res = sendEmail($data['email'], $data['title'], $data['title'], $data['content'], $data['tmail'], $data['tpassword'], $data['attachmentFile']); + sendEmail($data['email'], $data['title'], $data['title'], $data['content'], $data['tmail'], $data['tpassword'], $data['attachmentFile']); // if (isset($res['status'])) { // $log_obj = Db::name('email_log'); // $insert['article_id'] = $data['article_id']; diff --git a/application/common/PlagiarismService.php b/application/common/PlagiarismService.php index 4f22290d..0bdcb060 100644 --- a/application/common/PlagiarismService.php +++ b/application/common/PlagiarismService.php @@ -312,11 +312,20 @@ class PlagiarismService ]); if ($status === 'COMPLETE') { - $score = isset($statusResp['overall_match_percentage']) - ? floatval($statusResp['overall_match_percentage']) : 0; + $score = TurnitinService::extractOverallMatchPercentage($statusResp); + if ($score <= 0 && isset($statusResp['overall_match_percentage'])) { + $score = floatval($statusResp['overall_match_percentage']); + } + $this->log('poll complete check_id=' . $checkId . ' score=' . $score + . ' check_type=' . ($check['check_type'] ?? 'full')); $localPdf = $this->downloadAndStorePdf($tii, $check['tii_submission_id'], $checkId); + $meta = TurnitinService::parseSimilarityReportMeta($statusResp); + if ($meta['score'] > 0) { + $score = $meta['score']; + } + $this->updateCheck($checkId, [ 'state' => 3, 'similarity_score' => $score, diff --git a/application/common/TurnitinService.php b/application/common/TurnitinService.php index 3604442f..cf2be2dd 100644 --- a/application/common/TurnitinService.php +++ b/application/common/TurnitinService.php @@ -23,6 +23,9 @@ use think\Exception; * INGEST_CHAIN_FIRST_DELAY 上传后首次 ingest 检查延迟秒数,默认 10(队列链) * INGEST_CHAIN_POLL_INTERVAL ingest 链每步间隔秒数,默认 15 * INGEST_CHAIN_MAX_ATTEMPTS ingest 链最大步数,默认 80 + * EXCLUDE_QUOTES / EXCLUDE_BIBLIOGRAPHY / EXCLUDE_CITATIONS 0|1,默认 0(与 Crossref 网页手动查重更接近) + * VIEWER_DEFAULT_MODE match_overview | all_sources(默认 all_sources,便于按来源库分类查看) + * ADD_TO_INDEX 0|1,默认 1 * * API 文档:https://developers.turnitin.com/docs/tca * @@ -135,22 +138,7 @@ class TurnitinService */ public function triggerSimilarity($submissionId, $opts = []) { - $body = array_merge([ - 'generation_settings' => [ - 'search_repositories' => ['INTERNET', 'PUBLICATION', 'CROSSREF', 'CROSSREF_POSTED_CONTENT', 'SUBMITTED_WORK'], - // 服务端类型为 List,传 true 会 400:Cannot deserialize ... from Boolean - 'submission_auto_excludes' => [], - 'auto_exclude_self_matching_scope' => 'GROUP_CONTEXT', - ], - 'view_settings' => [ - 'exclude_quotes' => true, - 'exclude_bibliography' => true, - 'exclude_citations' => true, - ], - 'indexing_settings' => [ - 'add_to_index' => true, - ], - ], $opts); + $body = array_merge($this->defaultSimilarityPayload(), $opts); return $this->request( 'PUT', @@ -159,6 +147,211 @@ class TurnitinService ); } + /** + * PUT /similarity 与 PDF 导出共用的默认参数。 + * 此前固定 exclude_*=true 时,总相似度会低于 Crossref 网页手动查重(与「匹配来源编号/类型」无关)。 + */ + public function defaultSimilarityPayload() + { + $scope = trim((string) Env::get('turnitin.auto_exclude_self_matching_scope', 'GROUP_CONTEXT')); + if ($scope === '') { + unset($scope); + } + + $generation = [ + 'search_repositories' => ['INTERNET', 'PUBLICATION', 'CROSSREF', 'CROSSREF_POSTED_CONTENT', 'SUBMITTED_WORK'], + 'submission_auto_excludes' => [], + ]; + if (isset($scope)) { + $generation['auto_exclude_self_matching_scope'] = $scope; + } + + return [ + 'generation_settings' => $generation, + 'view_settings' => $this->defaultViewSettings(), + 'indexing_settings' => [ + 'add_to_index' => $this->envBool('turnitin.add_to_index', true), + ], + ]; + } + + public function defaultViewSettings() + { + return [ + 'exclude_quotes' => $this->envBool('turnitin.exclude_quotes', false), + 'exclude_bibliography' => $this->envBool('turnitin.exclude_bibliography', false), + 'exclude_citations' => $this->envBool('turnitin.exclude_citations', false), + ]; + } + + /** + * 从 GET /similarity 响应解析总相似度(0–100)。 + * 兼容 overall_match_percentage 在 message 嵌套、以及 0–1 小数形式。 + */ + public static function extractOverallMatchPercentage(array $statusResp) + { + $candidates = []; + + $push = function ($v) use (&$candidates) { + if ($v === null || $v === '') { + return; + } + if (is_numeric($v)) { + $candidates[] = floatval($v); + } + }; + + $push($statusResp['overall_match_percentage'] ?? null); + $push($statusResp['overall_match'] ?? null); + $push($statusResp['similarity_percentage'] ?? null); + + $msg = $statusResp; + if (isset($statusResp['message']) && is_array($statusResp['message'])) { + $msg = $statusResp['message']; + } + $push($msg['overall_match_percentage'] ?? null); + $push($msg['overall_match'] ?? null); + if (isset($msg['similarity']) && is_array($msg['similarity'])) { + $sim = $msg['similarity']; + $push($sim['overall_match_percentage'] ?? null); + $push($sim['overall_match'] ?? null); + } + + foreach ($candidates as $n) { + if ($n > 0 && $n <= 1.0) { + $scaled = round($n * 100, 2); + if ($scaled > 1.0 || $n < 0.05) { + return $scaled; + } + } + if ($n >= 0) { + return round($n, 2); + } + } + + return 0.0; + } + + /** + * 从 GET /similarity 响应中尽量提取「按来源」的摘要(供列表展示;完整明细仍在 Turnitin 在线报告里)。 + * + * @return array{score:float,sources:array>} + */ + public static function parseSimilarityReportMeta(array $statusResp) + { + $meta = [ + 'score' => self::extractOverallMatchPercentage($statusResp), + 'sources' => [], + ]; + + $candidates = []; + self::collectSimilaritySourceNodes($statusResp, $candidates, 0); + if (isset($statusResp['message']) && is_array($statusResp['message'])) { + self::collectSimilaritySourceNodes($statusResp['message'], $candidates, 0); + } + + $seen = []; + foreach ($candidates as $node) { + if (!is_array($node)) { + continue; + } + $pct = null; + foreach (['percentage', 'match_percentage', 'overall_match_percentage', 'similarity_percentage'] as $k) { + if (isset($node[$k]) && is_numeric($node[$k])) { + $pct = floatval($node[$k]); + break; + } + } + $repo = ''; + foreach (['repository', 'repository_name', 'collection', 'source_type', 'type', 'database', 'category'] as $k) { + if (!empty($node[$k])) { + $repo = strtoupper(trim((string) $node[$k])); + break; + } + } + $words = isset($node['matched_word_count']) ? intval($node['matched_word_count']) + : (isset($node['word_count']) ? intval($node['word_count']) : 0); + $key = $repo . '|' . ($pct !== null ? $pct : '') . '|' . $words; + if (isset($seen[$key])) { + continue; + } + $seen[$key] = true; + $meta['sources'][] = array_filter([ + 'repository' => $repo, + 'match_percentage' => $pct, + 'matched_word_count' => $words > 0 ? $words : null, + ], function ($v) { + return $v !== null && $v !== ''; + }); + } + + return $meta; + } + + /** + * @param array $node + * @param array $out + */ + private static function collectSimilaritySourceNodes($node, array &$out, $depth) + { + if ($depth > 8 || !is_array($node)) { + return; + } + $hasRepo = false; + foreach (['repository', 'repository_name', 'collection', 'source_type'] as $k) { + if (!empty($node[$k])) { + $hasRepo = true; + break; + } + } + if ($hasRepo) { + $out[] = $node; + } + foreach ($node as $v) { + if (is_array($v)) { + if (isset($v[0]) && is_array($v[0])) { + foreach ($v as $item) { + self::collectSimilaritySourceNodes($item, $out, $depth + 1); + } + } else { + self::collectSimilaritySourceNodes($v, $out, $depth + 1); + } + } + } + } + + /** + * 在线 Similarity Report 默认视图(与 Crossref 后台「按来源查看」对齐)。 + */ + public function defaultViewerSimilarityBlock() + { + $mode = strtolower(trim((string) Env::get('turnitin.viewer_default_mode', 'all_sources'))); + if (!in_array($mode, ['match_overview', 'all_sources'], true)) { + $mode = 'all_sources'; + } + + return [ + 'default_mode' => $mode, + 'modes' => [ + 'match_overview' => true, + 'all_sources' => true, + ], + ]; + } + + private function envBool($name, $default = false) + { + $v = Env::get($name, $default ? '1' : '0'); + if ($v === true) { + return true; + } + if ($v === false) { + return false; + } + $v = strtolower(trim((string) $v)); + return in_array($v, ['1', 'true', 'yes', 'on'], true); + } + /** * 查询 submission 详情(上传后用于轮询是否解析完成)。 * GET /submissions/{id} @@ -346,13 +539,7 @@ class TurnitinService $bodies[] = [ 'viewer_default_permission_set' => $perm, 'locale' => $locale, - 'similarity' => [ - 'default_mode' => 'match_overview', - 'modes' => [ - 'match_overview' => true, - 'all_sources' => true, - ], - ], + 'similarity' => $this->defaultViewerSimilarityBlock(), ]; // 最简请求体(部分 Crossref 租户只接受 permission + locale) $bodies[] = [ @@ -386,12 +573,13 @@ class TurnitinService public function requestPdfReport($submissionId, $opts = []) { $body = array_merge([ - 'locale' => 'en-US', + 'locale' => trim((string) Env::get('turnitin.viewer_locale', 'en-US')) ?: 'en-US', + 'view_settings' => $this->defaultViewSettings(), ], $opts); return $this->request( 'POST', - '/submissions/' . urlencode($submissionId) . '/similarity/pdf', + '/submissions/' . rawurlencode($submissionId) . '/similarity/pdf', $body ); }