* X-Turnitin-Integration-Name / X-Turnitin-Integration-Version 用于审计 * * .env 配置([turnitin] 段): * BASE_URL 形如 https://crossref-12345.turnitin.com/api/v1(不带尾斜杠) * API_KEY 生成的 Bearer token * INTEGRATION_NAME Scope Name(创建 integration 时填的名字) * INTEGRATION_VERSION 自定义版本号,便于审计 e.g. 1.0.0 * SUBMISSION_INGEST_MAX_WAIT 上传后轮询 submission 就绪的最长秒数,默认 600(仅 waitAfterUploadForSimilarity 同步用) * SUBMISSION_INGEST_POLL_INTERVAL 同步轮询间隔秒数,默认 3 * INGEST_CHAIN_FIRST_DELAY 上传后首次 ingest 检查延迟秒数,默认 10(队列链) * INGEST_CHAIN_POLL_INTERVAL ingest 链每步间隔秒数,默认 15 * INGEST_CHAIN_MAX_ATTEMPTS ingest 链最大步数,默认 80 * * API 文档:https://developers.turnitin.com/docs/tca * * 注意: * - 所有方法返回原始 decode 后的数组;HTTP 错误抛 Exception * - 不做任何业务层逻辑(业务层在 PlagiarismService 里) * - 不缓存 token(Bearer 不需要登录,每次请求自带) */ class TurnitinService { private $baseUrl; private $apiKey; private $integrationName; private $integrationVersion; private $timeout = 60; public function __construct() { $this->baseUrl = rtrim(trim((string)Env::get('turnitin.base_url', 'https://crossref-20794.turnitin.com/api/v1')), '/'); $this->apiKey = trim((string)Env::get('turnitin.api_key', 'c6315e8291a4433dae09ad5efdb8a89c')); $this->integrationName = trim((string)Env::get('turnitin.integration_name', 'tmr')); $this->integrationVersion = trim((string)Env::get('turnitin.integration_version', '1.0.0')); if ($this->baseUrl === '' || $this->apiKey === '') { throw new Exception('Turnitin not configured: missing BASE_URL or API_KEY in .env [turnitin] section'); } } // ==================== Public API ==================== /** * 探活 / 拿账户能力 * GET /features-enabled */ public function featuresEnabled() { return $this->request('GET', '/features-enabled'); } /** * 创建 submission(拿到 id 之后才能上传文件) * POST /submissions * * @param array $meta 必填字段: * - title 论文标题 * - owner submission owner 标识符(自定义字符串,比如投稿系统 user_id) * - submitter 提交者标识符(同上) * - eula (可选) ['version' => '...', 'language' => 'en-US', 'accepted_timestamp' => ISO8601] * 如果 features-enabled 返回 require_eula=false 可省略 * 可选字段: * - extract_text_only bool * - metadata array 自定义键值,供后续追溯 * * @return array 含 id(submission UUID), status, owner, ... */ public function createSubmission($meta) { return $this->request('POST', '/submissions', $meta); } /** * 上传文件到 submission * * TCA 文档路径为 PUT /submissions/{id}/original(文件名仅通过 Content-Disposition 传递, * 不要再拼在 URL 末尾;否则网关会 404,错误里常见 path 形如 //v1/submissions/.../original/xxx.docx)。 * * @param string $submissionId * @param string $filePath 本地 PDF/DOCX 路径 * @param string $filename 传给 Turnitin 的展示文件名(默认取 basename) * @return array */ public function uploadFile($submissionId, $filePath, $filename = '') { if (!is_file($filePath) || !is_readable($filePath)) { throw new Exception("File not found or not readable: {$filePath}"); } if ($filename === '') { $filename = basename($filePath); } // Content-Disposition 里避免未转义的双引号 $safeName = str_replace(['"', "\r", "\n"], '', $filename); if ($safeName === '') { $safeName = 'document.bin'; } $body = file_get_contents($filePath); return $this->request( 'PUT', '/submissions/' . rawurlencode($submissionId) . '/original', $body, [ 'Content-Type' => 'application/octet-stream', 'Content-Disposition' => 'attachment; filename="' . $safeName . '"', ] ); } /** * 触发 similarity 比对 * PUT /submissions/{id}/similarity * * @param string $submissionId * @param array $opts * - generation_settings.search_repositories 默认 ['INTERNET','PUBLICATION',...] * - generation_settings.submission_auto_excludes **字符串数组**(如 [] 或具体仓库键),不可传 boolean(否则会 400) * - generation_settings.auto_exclude_self_matching_scope 可选,如 'GROUP_CONTEXT' * - view_settings.exclude_* 布尔排除项(与 TCA 文档一致) * - indexing_settings.add_to_index bool 是否把本文加进 SUBMITTED_WORK 索引(一般 true) * @return array */ public function triggerSimilarity($submissionId, $opts = []) { $body = array_merge([ 'generation_settings' => [ 'search_repositories' => ['INTERNET', 'PUBLICATION', 'CROSSREF', 'CROSSREF_POSTED_CONTENT', 'SUBMITTED_WORK'], // 服务端类型为 List,传 true 会 400:Cannot deserialize ... from Boolean 'submission_auto_excludes' => [], 'auto_exclude_self_matching_scope' => 'GROUP_CONTEXT', ], 'view_settings' => [ 'exclude_quotes' => true, 'exclude_bibliography' => true, 'exclude_citations' => true, ], 'indexing_settings' => [ 'add_to_index' => true, ], ], $opts); return $this->request( 'PUT', '/submissions/' . rawurlencode($submissionId) . '/similarity', $body ); } /** * 查询 submission 详情(上传后用于轮询是否解析完成)。 * GET /submissions/{id} * * @return array 解码后的 JSON(常见为 status=ok + message 内含 id/status) */ public function getSubmission($submissionId) { return $this->request('GET', '/submissions/' . rawurlencode($submissionId)); } /** * 单次解析 GET /submissions/{id},判断是否可调用 PUT /similarity(不 sleep,供队列链逐步轮询)。 * * @return array{ready:bool, failed:bool, status:string, snippet:string, message:array} */ public function parseSubmissionIngestState($submissionId) { $raw = $this->getSubmission($submissionId); $msg = self::unwrapSubmissionPayload($raw); $st = strtoupper(trim((string) self::pickSubmissionStatus($msg))); $snippet = mb_substr(json_encode($msg, JSON_UNESCAPED_UNICODE), 0, 400); $ready = [ 'COMPLETE', 'COMPLETED', 'PROCESSED', 'READY', 'SUCCEEDED', 'COMPLETE_PROCESSING', ]; $failed = ['ERROR', 'FAILED', 'CANCELLED', 'CANCELED', 'DELETED']; $readyFlag = $st !== '' && in_array($st, $ready, true); $failedFlag = $st !== '' && in_array($st, $failed, true); return [ 'ready' => $readyFlag, 'failed' => $failedFlag, 'status' => $st, 'snippet' => $snippet, 'message' => $msg, ]; } /** * 上传完成后需等待 Turnitin 异步完成文本解析(同步阻塞版,仅 CLI/调试;线上请用队列链 PlagiarismWaitIngest)。 * * @param string $submissionId * @param int $maxWaitSec 最长等待秒数,默认 600(10 分钟) * @param int $intervalSec 轮询间隔秒数,默认 3 * @throws Exception 超时或终态为失败 */ public function waitAfterUploadForSimilarity($submissionId, $maxWaitSec = 600, $intervalSec = 3) { $deadline = time() + max(30, (int)$maxWaitSec); $intervalSec = max(1, (int)$intervalSec); $lastStatus = ''; $lastSnippet = ''; while (time() < $deadline) { $parsed = $this->parseSubmissionIngestState($submissionId); $lastStatus = $parsed['status']; $lastSnippet = $parsed['snippet']; if (!empty($parsed['ready'])) { return; } if (!empty($parsed['failed'])) { throw new Exception('Turnitin submission failed, status=' . $lastStatus . ' body=' . $lastSnippet); } sleep($intervalSec); } throw new Exception( 'Timeout waiting for Turnitin submission ingest (last status=' . ($lastStatus ?: '(empty)') . ') snippet=' . $lastSnippet ); } /** * @param mixed $decoded * @return array */ private static function unwrapSubmissionPayload($decoded) { if (!is_array($decoded)) { return []; } if (isset($decoded['message']) && is_array($decoded['message'])) { return $decoded['message']; } return $decoded; } /** * @param array $msg * @return string */ private static function pickSubmissionStatus(array $msg) { $candidates = [$msg]; if (isset($msg['submission']) && is_array($msg['submission'])) { $candidates[] = $msg['submission']; } foreach ($candidates as $m) { foreach (['status', 'workflow_status', 'submission_status', 'processing_status', 'paper_status'] as $k) { if (!empty($m[$k])) { return (string)$m[$k]; } } } return ''; } /** * 查询 similarity 状态 * GET /submissions/{id}/similarity * * 返回 status: PROCESSING / COMPLETE / ERROR * COMPLETE 时返回 overall_match_percentage / time_requested / time_generated */ public function getSimilarityStatus($submissionId) { return $this->request( 'GET', '/submissions/' . rawurlencode($submissionId) . '/similarity' ); } /** * 取在线查看报告的临时 URL * POST /submissions/{id}/viewer-url * * 返回 viewer_url(数小时有效) * * @param array $viewer 可选 viewer 设置 e.g. ['viewer_default_permission_set' => 'INSTRUCTOR'] */ public function getViewerUrl($submissionId, $viewer = []) { $body = array_merge([ 'viewer_default_permission_set' => 'INSTRUCTOR', 'similarity' => [ 'default_mode' => 'MATCH_OVERVIEW', 'view_settings' => ['save_changes' => true], 'modes' => ['match_overview' => true, 'all_sources' => true], ], 'locale' => 'en-US', ], $viewer); return $this->request( 'POST', '/submissions/' . urlencode($submissionId) . '/viewer-url', $body ); } /** * 触发生成 PDF 报告(异步,状态在另一个轮询里看) * POST /submissions/{id}/similarity/pdf * * 返回 id(pdf 报告 ID) */ public function requestPdfReport($submissionId, $opts = []) { $body = array_merge([ 'locale' => 'en-US', ], $opts); return $this->request( 'POST', '/submissions/' . urlencode($submissionId) . '/similarity/pdf', $body ); } /** * 查询 PDF 报告状态 * GET /submissions/{id}/similarity/pdf/{pdf_id}/status * * status: PENDING / SUCCESS / FAILED */ public function getPdfReportStatus($submissionId, $pdfId) { return $this->request( 'GET', '/submissions/' . urlencode($submissionId) . '/similarity/pdf/' . urlencode($pdfId) . '/status' ); } /** * 下载 PDF 报告内容(status=SUCCESS 后才可调用) * GET /submissions/{id}/similarity/pdf/{pdf_id} * * 返回 raw PDF binary 字符串;调用方负责落盘 */ public function downloadPdfReport($submissionId, $pdfId) { return $this->request( 'GET', '/submissions/' . urlencode($submissionId) . '/similarity/pdf/' . urlencode($pdfId), null, [], true // raw response (不 json_decode) ); } // ==================== Internal HTTP layer ==================== /** * 统一 HTTP 调用 * * @param string $method GET/POST/PUT/DELETE * @param string $path 以 / 开头的相对路径,会拼到 baseUrl 后 * @param mixed $body array 时按 JSON 编码;string 时直接当 raw body * @param array $extraHeaders 额外 header * @param bool $rawResponse true=返回 raw 字符串;false=json_decode * @return mixed * @throws Exception */ private function request($method, $path, $body = null, $extraHeaders = [], $rawResponse = false) { $url = $this->baseUrl . $path; $headers = [ 'Authorization: Bearer ' . $this->apiKey, 'X-Turnitin-Integration-Name: ' . $this->integrationName, 'X-Turnitin-Integration-Version: ' . $this->integrationVersion, ]; $payload = null; if ($body !== null) { if (is_array($body)) { $payload = json_encode($body, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES); $headers[] = 'Content-Type: application/json'; } else { $payload = $body; if (!isset($extraHeaders['Content-Type'])) { $headers[] = 'Content-Type: application/octet-stream'; } } } foreach ($extraHeaders as $k => $v) { $headers[] = $k . ': ' . $v; } $ch = curl_init(); curl_setopt_array($ch, [ CURLOPT_URL => $url, CURLOPT_CUSTOMREQUEST => strtoupper($method), CURLOPT_RETURNTRANSFER => true, CURLOPT_HTTPHEADER => $headers, CURLOPT_TIMEOUT => $this->timeout, CURLOPT_CONNECTTIMEOUT => 15, CURLOPT_SSL_VERIFYPEER => true, CURLOPT_SSL_VERIFYHOST => 2, ]); if ($payload !== null) { curl_setopt($ch, CURLOPT_POSTFIELDS, $payload); } $resp = curl_exec($ch); $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); $err = curl_error($ch); curl_close($ch); if ($resp === false) { throw new Exception("Turnitin curl error: {$err} (url={$url})"); } if ($httpCode < 200 || $httpCode >= 300) { // 把响应体的前 1k 也带上方便排错 $excerpt = mb_substr((string)$resp, 0, 1000); throw new Exception("Turnitin HTTP {$httpCode} {$method} {$path}: {$excerpt}"); } if ($rawResponse) { return $resp; } // 部分响应可能是 204 No Content if ($resp === '' || $resp === null) { return []; } $data = json_decode($resp, true); if (json_last_error() !== JSON_ERROR_NONE) { // 不是 JSON 也直接抛回原文 return $resp; } return $data; } }