* X-Turnitin-Integration-Name / X-Turnitin-Integration-Version 用于审计 * * .env 配置([turnitin] 段): * BASE_URL 形如 https://crossref-12345.turnitin.com/api/v1(不带尾斜杠) * API_KEY 生成的 Bearer token * INTEGRATION_NAME Scope Name(创建 integration 时填的名字) * INTEGRATION_VERSION 自定义版本号,便于审计 e.g. 1.0.0 * SUBMISSION_INGEST_MAX_WAIT 上传后轮询 submission 就绪的最长秒数,默认 600(仅 waitAfterUploadForSimilarity 同步用) * SUBMISSION_INGEST_POLL_INTERVAL 同步轮询间隔秒数,默认 3 * INGEST_CHAIN_FIRST_DELAY 上传后首次 ingest 检查延迟秒数,默认 10(队列链) * INGEST_CHAIN_POLL_INTERVAL ingest 链每步间隔秒数,默认 15 * INGEST_CHAIN_MAX_ATTEMPTS ingest 链最大步数,默认 80 * EXCLUDE_QUOTES / EXCLUDE_BIBLIOGRAPHY / EXCLUDE_CITATIONS 0|1,默认 0(与 Crossref 网页手动查重更接近) * VIEWER_DEFAULT_MODE match_overview | all_sources(默认 all_sources,便于按来源库分类查看) * ADD_TO_INDEX 0|1,默认 1 * * API 文档:https://developers.turnitin.com/docs/tca * * 注意: * - 所有方法返回原始 decode 后的数组;HTTP 错误抛 Exception * - 不做任何业务层逻辑(业务层在 PlagiarismService 里) * - 不缓存 token(Bearer 不需要登录,每次请求自带) */ class TurnitinService { private $baseUrl; private $apiKey; private $integrationName; private $integrationVersion; private $timeout = 60; public function __construct() { $this->baseUrl = rtrim(trim((string)Env::get('turnitin.base_url', 'https://crossref-20794.turnitin.com/api/v1')), '/'); $this->apiKey = trim((string)Env::get('turnitin.api_key', 'c6315e8291a4433dae09ad5efdb8a89c')); $this->integrationName = trim((string)Env::get('turnitin.integration_name', 'tmr')); $this->integrationVersion = trim((string)Env::get('turnitin.integration_version', '1.0.0')); if ($this->baseUrl === '' || $this->apiKey === '') { throw new Exception('Turnitin not configured: missing BASE_URL or API_KEY in .env [turnitin] section'); } } // ==================== Public API ==================== /** * 探活 / 拿账户能力 * GET /features-enabled */ public function featuresEnabled() { return $this->request('GET', '/features-enabled'); } /** * 创建 submission(拿到 id 之后才能上传文件) * POST /submissions * * @param array $meta 必填字段: * - title 论文标题 * - owner submission owner 标识符(自定义字符串,比如投稿系统 user_id) * - submitter 提交者标识符(同上) * - eula (可选) ['version' => '...', 'language' => 'en-US', 'accepted_timestamp' => ISO8601] * 如果 features-enabled 返回 require_eula=false 可省略 * 可选字段: * - extract_text_only bool * - metadata array 自定义键值,供后续追溯 * * @return array 含 id(submission UUID), status, owner, ... */ public function createSubmission($meta) { return $this->request('POST', '/submissions', $meta); } /** * 上传文件到 submission * * TCA 文档路径为 PUT /submissions/{id}/original(文件名仅通过 Content-Disposition 传递, * 不要再拼在 URL 末尾;否则网关会 404,错误里常见 path 形如 //v1/submissions/.../original/xxx.docx)。 * * @param string $submissionId * @param string $filePath 本地 PDF/DOCX 路径 * @param string $filename 传给 Turnitin 的展示文件名(默认取 basename) * @return array */ public function uploadFile($submissionId, $filePath, $filename = '') { if (!is_file($filePath) || !is_readable($filePath)) { throw new Exception("File not found or not readable: {$filePath}"); } if ($filename === '') { $filename = basename($filePath); } // Content-Disposition 里避免未转义的双引号 $safeName = str_replace(['"', "\r", "\n"], '', $filename); if ($safeName === '') { $safeName = 'document.bin'; } $body = file_get_contents($filePath); return $this->request( 'PUT', '/submissions/' . rawurlencode($submissionId) . '/original', $body, [ 'Content-Type' => 'application/octet-stream', 'Content-Disposition' => 'attachment; filename="' . $safeName . '"', ] ); } /** * 触发 similarity 比对 * PUT /submissions/{id}/similarity * * @param string $submissionId * @param array $opts * - generation_settings.search_repositories 默认 ['INTERNET','PUBLICATION',...] * - generation_settings.submission_auto_excludes **字符串数组**(如 [] 或具体仓库键),不可传 boolean(否则会 400) * - generation_settings.auto_exclude_self_matching_scope 可选,如 'GROUP_CONTEXT' * - view_settings.exclude_* 布尔排除项(与 TCA 文档一致) * - indexing_settings.add_to_index bool 是否把本文加进 SUBMITTED_WORK 索引(一般 true) * @return array */ public function triggerSimilarity($submissionId, $opts = []) { $body = array_merge($this->defaultSimilarityPayload(), $opts); return $this->request( 'PUT', '/submissions/' . rawurlencode($submissionId) . '/similarity', $body ); } /** * PUT /similarity 与 PDF 导出共用的默认参数。 * 此前固定 exclude_*=true 时,总相似度会低于 Crossref 网页手动查重(与「匹配来源编号/类型」无关)。 */ public function defaultSimilarityPayload() { $scope = trim((string) Env::get('turnitin.auto_exclude_self_matching_scope', 'GROUP_CONTEXT')); if ($scope === '') { unset($scope); } $generation = [ 'search_repositories' => ['INTERNET', 'PUBLICATION', 'CROSSREF', 'CROSSREF_POSTED_CONTENT', 'SUBMITTED_WORK'], 'submission_auto_excludes' => [], ]; if (isset($scope)) { $generation['auto_exclude_self_matching_scope'] = $scope; } return [ 'generation_settings' => $generation, 'view_settings' => $this->defaultViewSettings(), 'indexing_settings' => [ 'add_to_index' => $this->envBool('turnitin.add_to_index', true), ], ]; } public function defaultViewSettings() { return [ 'exclude_quotes' => $this->envBool('turnitin.exclude_quotes', false), 'exclude_bibliography' => $this->envBool('turnitin.exclude_bibliography', false), 'exclude_citations' => $this->envBool('turnitin.exclude_citations', false), ]; } /** * 从 GET /similarity 响应解析总相似度(0–100)。 * 兼容 overall_match_percentage 在 message 嵌套、以及 0–1 小数形式。 */ public static function extractOverallMatchPercentage(array $statusResp) { $candidates = []; $push = function ($v) use (&$candidates) { if ($v === null || $v === '') { return; } if (is_numeric($v)) { $candidates[] = floatval($v); } }; $push($statusResp['overall_match_percentage'] ?? null); $push($statusResp['overall_match'] ?? null); $push($statusResp['similarity_percentage'] ?? null); $msg = $statusResp; if (isset($statusResp['message']) && is_array($statusResp['message'])) { $msg = $statusResp['message']; } $push($msg['overall_match_percentage'] ?? null); $push($msg['overall_match'] ?? null); if (isset($msg['similarity']) && is_array($msg['similarity'])) { $sim = $msg['similarity']; $push($sim['overall_match_percentage'] ?? null); $push($sim['overall_match'] ?? null); } foreach ($candidates as $n) { if ($n < 0) { continue; } // Turnitin TCA 的 overall_match_percentage 是 0–100 整数,"1" 即代表 1%。 // 仅当值是「严格小于 1 的非整数」(真正的 0–1 小数比例,如 0.12=12%)时才 ×100, // 避免把整数 1(1%)误判成 100%。 if ($n > 0 && $n < 1.0) { return round(min($n * 100, 100), 2); } return round(min($n, 100), 2); } return 0.0; } /** * 从 GET /similarity 响应中尽量提取「按来源」的摘要(供列表展示;完整明细仍在 Turnitin 在线报告里)。 * * @return array{score:float,sources:array>} */ public static function parseSimilarityReportMeta(array $statusResp) { $meta = [ 'score' => self::extractOverallMatchPercentage($statusResp), 'sources' => [], ]; $candidates = []; self::collectSimilaritySourceNodes($statusResp, $candidates, 0); if (isset($statusResp['message']) && is_array($statusResp['message'])) { self::collectSimilaritySourceNodes($statusResp['message'], $candidates, 0); } $seen = []; foreach ($candidates as $node) { if (!is_array($node)) { continue; } $pct = null; foreach (['percentage', 'match_percentage', 'overall_match_percentage', 'similarity_percentage'] as $k) { if (isset($node[$k]) && is_numeric($node[$k])) { $pct = floatval($node[$k]); break; } } $repo = ''; foreach (['repository', 'repository_name', 'collection', 'source_type', 'type', 'database', 'category'] as $k) { if (!empty($node[$k])) { $repo = strtoupper(trim((string) $node[$k])); break; } } $words = isset($node['matched_word_count']) ? intval($node['matched_word_count']) : (isset($node['word_count']) ? intval($node['word_count']) : 0); $key = $repo . '|' . ($pct !== null ? $pct : '') . '|' . $words; if (isset($seen[$key])) { continue; } $seen[$key] = true; $meta['sources'][] = array_filter([ 'repository' => $repo, 'match_percentage' => $pct, 'matched_word_count' => $words > 0 ? $words : null, ], function ($v) { return $v !== null && $v !== ''; }); } return $meta; } /** * @param array $node * @param array $out */ private static function collectSimilaritySourceNodes($node, array &$out, $depth) { if ($depth > 8 || !is_array($node)) { return; } $hasRepo = false; foreach (['repository', 'repository_name', 'collection', 'source_type'] as $k) { if (!empty($node[$k])) { $hasRepo = true; break; } } if ($hasRepo) { $out[] = $node; } foreach ($node as $v) { if (is_array($v)) { if (isset($v[0]) && is_array($v[0])) { foreach ($v as $item) { self::collectSimilaritySourceNodes($item, $out, $depth + 1); } } else { self::collectSimilaritySourceNodes($v, $out, $depth + 1); } } } } /** * 在线 Similarity Report 默认视图(与 Crossref 后台「按来源查看」对齐)。 */ public function defaultViewerSimilarityBlock() { $mode = strtolower(trim((string) Env::get('turnitin.viewer_default_mode', 'all_sources'))); if (!in_array($mode, ['match_overview', 'all_sources'], true)) { $mode = 'all_sources'; } return [ 'default_mode' => $mode, 'modes' => [ 'match_overview' => true, 'all_sources' => true, ], ]; } private function envBool($name, $default = false) { $v = Env::get($name, $default ? '1' : '0'); if ($v === true) { return true; } if ($v === false) { return false; } $v = strtolower(trim((string) $v)); return in_array($v, ['1', 'true', 'yes', 'on'], true); } /** * 查询 submission 详情(上传后用于轮询是否解析完成)。 * GET /submissions/{id} * * @return array 解码后的 JSON(常见为 status=ok + message 内含 id/status) */ public function getSubmission($submissionId) { return $this->request('GET', '/submissions/' . rawurlencode($submissionId)); } /** * 单次解析 GET /submissions/{id},判断是否可调用 PUT /similarity(不 sleep,供队列链逐步轮询)。 * * @return array{ready:bool, failed:bool, status:string, snippet:string, message:array} */ public function parseSubmissionIngestState($submissionId) { $raw = $this->getSubmission($submissionId); $msg = self::unwrapSubmissionPayload($raw); $st = strtoupper(trim((string) self::pickSubmissionStatus($msg))); $snippet = mb_substr(json_encode($msg, JSON_UNESCAPED_UNICODE), 0, 400); $ready = [ 'COMPLETE', 'COMPLETED', 'PROCESSED', 'READY', 'SUCCEEDED', 'COMPLETE_PROCESSING', ]; $failed = ['ERROR', 'FAILED', 'CANCELLED', 'CANCELED', 'DELETED']; $readyFlag = $st !== '' && in_array($st, $ready, true); $failedFlag = $st !== '' && in_array($st, $failed, true); return [ 'ready' => $readyFlag, 'failed' => $failedFlag, 'status' => $st, 'snippet' => $snippet, 'message' => $msg, ]; } /** * 上传完成后需等待 Turnitin 异步完成文本解析(同步阻塞版,仅 CLI/调试;线上请用队列链 PlagiarismWaitIngest)。 * * @param string $submissionId * @param int $maxWaitSec 最长等待秒数,默认 600(10 分钟) * @param int $intervalSec 轮询间隔秒数,默认 3 * @throws Exception 超时或终态为失败 */ public function waitAfterUploadForSimilarity($submissionId, $maxWaitSec = 600, $intervalSec = 3) { $deadline = time() + max(30, (int)$maxWaitSec); $intervalSec = max(1, (int)$intervalSec); $lastStatus = ''; $lastSnippet = ''; while (time() < $deadline) { $parsed = $this->parseSubmissionIngestState($submissionId); $lastStatus = $parsed['status']; $lastSnippet = $parsed['snippet']; if (!empty($parsed['ready'])) { return; } if (!empty($parsed['failed'])) { throw new Exception('Turnitin submission failed, status=' . $lastStatus . ' body=' . $lastSnippet); } sleep($intervalSec); } throw new Exception( 'Timeout waiting for Turnitin submission ingest (last status=' . ($lastStatus ?: '(empty)') . ') snippet=' . $lastSnippet ); } /** * @param mixed $decoded * @return array */ private static function unwrapSubmissionPayload($decoded) { if (!is_array($decoded)) { return []; } if (isset($decoded['message']) && is_array($decoded['message'])) { return $decoded['message']; } return $decoded; } /** * @param array $msg * @return string */ private static function pickSubmissionStatus(array $msg) { $candidates = [$msg]; if (isset($msg['submission']) && is_array($msg['submission'])) { $candidates[] = $msg['submission']; } foreach ($candidates as $m) { foreach (['status', 'workflow_status', 'submission_status', 'processing_status', 'paper_status'] as $k) { if (!empty($m[$k])) { return (string)$m[$k]; } } } return ''; } /** * 查询 similarity 状态 * GET /submissions/{id}/similarity * * 返回 status: PROCESSING / COMPLETE / ERROR * COMPLETE 时返回 overall_match_percentage / time_requested / time_generated */ public function getSimilarityStatus($submissionId) { return $this->request( 'GET', '/submissions/' . rawurlencode($submissionId) . '/similarity' ); } /** * 取在线查看报告的临时 URL * POST /submissions/{id}/viewer-url * * 返回 viewer_url(数小时有效) * * TCA 要求 default_mode 为小写(如 match_overview);save_changes 等 LTI 字段会导致 400。 * Crossref 通道常用 ADMINISTRATOR/USER,非 INSTRUCTOR。可在 .env 配置: * turnitin.viewer_permission_set=ADMINISTRATOR * * @param array $viewer 可选:viewer_user_id、triggered_by(映射为 editor_{id})、或完整请求体覆盖 */ public function getViewerUrl($submissionId, $viewer = []) { $submissionId = trim((string) $submissionId); if ($submissionId === '') { throw new Exception('submissionId required for viewer-url'); } $statusResp = $this->getSimilarityStatus($submissionId); $st = strtoupper(trim((string) ($statusResp['status'] ?? ''))); if ($st !== '' && $st !== 'COMPLETE') { throw new Exception('similarity report not ready for viewer-url, status=' . $st); } $path = '/submissions/' . rawurlencode($submissionId) . '/viewer-url'; $lastError = null; foreach ($this->buildViewerUrlBodies($viewer) as $body) { try { return $this->request('POST', $path, $body); } catch (Exception $e) { $lastError = $e; if (strpos($e->getMessage(), 'HTTP 400') === false) { throw $e; } } } throw $lastError ?: new Exception('viewer-url failed'); } /** * 按优先级生成若干合法请求体(前者失败且为 400 时尝试后者)。 * * @return array */ private function buildViewerUrlBodies(array $viewerOverrides) { if (!empty($viewerOverrides) && isset($viewerOverrides['viewer_default_permission_set'])) { $body = $viewerOverrides; if (empty($body['viewer_user_id'])) { $body['viewer_user_id'] = $this->resolveViewerUserId($viewerOverrides); } return [$body]; } $locale = trim((string) Env::get('turnitin.viewer_locale', 'en-US')) ?: 'en-US'; $configured = trim((string) Env::get('turnitin.viewer_permission_set', '')); $permissionSets = $configured !== '' ? array_map('trim', explode(',', $configured)) : $this->defaultViewerPermissionSets(); $viewerUserId = $this->resolveViewerUserId($viewerOverrides); $saveChanges = $this->envBool('turnitin.viewer_save_changes', false); $simModes = $this->defaultViewerSimilarityBlock(); $bodies = []; foreach ($permissionSets as $perm) { if ($perm === '') { continue; } // TCA 认证要求:必须带 viewer_user_id(此前缺失会导致 400 Bad request) $bodies[] = [ 'viewer_user_id' => $viewerUserId, 'locale' => $locale, 'viewer_default_permission_set' => $perm, 'similarity' => [ 'view_settings' => ['save_changes' => $saveChanges], ], ]; $bodies[] = [ 'viewer_user_id' => $viewerUserId, 'locale' => $locale, 'viewer_default_permission_set' => $perm, 'similarity' => array_merge($simModes, [ 'view_settings' => ['save_changes' => $saveChanges], ]), ]; $bodies[] = [ 'viewer_user_id' => $viewerUserId, 'locale' => $locale, 'viewer_default_permission_set' => $perm, ]; } return $bodies; } /** * viewer-url 必填:与 createSubmission 的 owner/submitter 同一命名空间(editor_{user_id})。 */ public function resolveViewerUserId(array $opts = []) { if (!empty($opts['viewer_user_id'])) { return trim((string) $opts['viewer_user_id']); } // 打开报告的人(当前编辑)须与申请 viewer-url 时一致,否则易出现 session 认证失败 $editorId = isset($opts['editor_id']) ? intval($opts['editor_id']) : 0; if ($editorId > 0) { return 'editor_' . $editorId; } $triggeredBy = isset($opts['triggered_by']) ? intval($opts['triggered_by']) : 0; if ($triggeredBy > 0) { return 'editor_' . $triggeredBy; } $custom = trim((string) Env::get('turnitin.viewer_user_id', '')); if ($custom !== '') { return $custom; } $name = trim((string) $this->integrationName); return ($name !== '' ? $name : 'tmr') . '_viewer'; } /** * Crossref Similarity Check 通常不用 INSTRUCTOR;按常见可用角色排序尝试。 * * @return array */ private function defaultViewerPermissionSets() { if (stripos($this->baseUrl, 'crossref') !== false) { return ['ADMINISTRATOR', 'USER', 'EDITOR', 'INSTRUCTOR']; } return ['INSTRUCTOR', 'ADMINISTRATOR', 'USER']; } /** * 触发生成 PDF 报告(异步,状态在另一个轮询里看) * POST /submissions/{id}/similarity/pdf * * 返回 id(pdf 报告 ID) */ public function requestPdfReport($submissionId, $opts = []) { $body = array_merge([ 'locale' => trim((string) Env::get('turnitin.viewer_locale', 'en-US')) ?: 'en-US', 'view_settings' => $this->defaultViewSettings(), ], $opts); return $this->request( 'POST', '/submissions/' . rawurlencode($submissionId) . '/similarity/pdf', $body ); } /** * 查询 PDF 报告状态 * GET /submissions/{id}/similarity/pdf/{pdf_id}/status * * status: PENDING / SUCCESS / FAILED */ public function getPdfReportStatus($submissionId, $pdfId) { return $this->request( 'GET', '/submissions/' . urlencode($submissionId) . '/similarity/pdf/' . urlencode($pdfId) . '/status' ); } /** * 下载 PDF 报告内容(status=SUCCESS 后才可调用) * GET /submissions/{id}/similarity/pdf/{pdf_id} * * 返回 raw PDF binary 字符串;调用方负责落盘 */ public function downloadPdfReport($submissionId, $pdfId) { return $this->request( 'GET', '/submissions/' . urlencode($submissionId) . '/similarity/pdf/' . urlencode($pdfId), null, [], true // raw response (不 json_decode) ); } // ==================== Internal HTTP layer ==================== /** * 统一 HTTP 调用 * * @param string $method GET/POST/PUT/DELETE * @param string $path 以 / 开头的相对路径,会拼到 baseUrl 后 * @param mixed $body array 时按 JSON 编码;string 时直接当 raw body * @param array $extraHeaders 额外 header * @param bool $rawResponse true=返回 raw 字符串;false=json_decode * @return mixed * @throws Exception */ private function request($method, $path, $body = null, $extraHeaders = [], $rawResponse = false) { $url = $this->baseUrl . $path; $headers = [ 'Authorization: Bearer ' . $this->apiKey, 'X-Turnitin-Integration-Name: ' . $this->integrationName, 'X-Turnitin-Integration-Version: ' . $this->integrationVersion, ]; $payload = null; if ($body !== null) { if (is_array($body)) { $payload = json_encode($body, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES); $headers[] = 'Content-Type: application/json'; } else { $payload = $body; if (!isset($extraHeaders['Content-Type'])) { $headers[] = 'Content-Type: application/octet-stream'; } } } foreach ($extraHeaders as $k => $v) { $headers[] = $k . ': ' . $v; } $ch = curl_init(); curl_setopt_array($ch, [ CURLOPT_URL => $url, CURLOPT_CUSTOMREQUEST => strtoupper($method), CURLOPT_RETURNTRANSFER => true, CURLOPT_HTTPHEADER => $headers, CURLOPT_TIMEOUT => $this->timeout, CURLOPT_CONNECTTIMEOUT => 15, CURLOPT_SSL_VERIFYPEER => true, CURLOPT_SSL_VERIFYHOST => 2, ]); if ($payload !== null) { curl_setopt($ch, CURLOPT_POSTFIELDS, $payload); } $resp = curl_exec($ch); $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); $err = curl_error($ch); curl_close($ch); if ($resp === false) { throw new Exception("Turnitin curl error: {$err} (url={$url})"); } if ($httpCode < 200 || $httpCode >= 300) { // 把响应体的前 1k 也带上方便排错 $excerpt = mb_substr((string)$resp, 0, 1000); throw new Exception("Turnitin HTTP {$httpCode} {$method} {$path}: {$excerpt}"); } if ($rawResponse) { return $resp; } // 部分响应可能是 204 No Content if ($resp === '' || $resp === null) { return []; } $data = json_decode($resp, true); if (json_last_error() !== JSON_ERROR_NONE) { // 不是 JSON 也直接抛回原文 return $resp; } return $data; } }