作者的ai总结field
This commit is contained in:
@@ -312,11 +312,20 @@ class PlagiarismService
|
||||
]);
|
||||
|
||||
if ($status === 'COMPLETE') {
|
||||
$score = isset($statusResp['overall_match_percentage'])
|
||||
? floatval($statusResp['overall_match_percentage']) : 0;
|
||||
$score = TurnitinService::extractOverallMatchPercentage($statusResp);
|
||||
if ($score <= 0 && isset($statusResp['overall_match_percentage'])) {
|
||||
$score = floatval($statusResp['overall_match_percentage']);
|
||||
}
|
||||
$this->log('poll complete check_id=' . $checkId . ' score=' . $score
|
||||
. ' check_type=' . ($check['check_type'] ?? 'full'));
|
||||
|
||||
$localPdf = $this->downloadAndStorePdf($tii, $check['tii_submission_id'], $checkId);
|
||||
|
||||
$meta = TurnitinService::parseSimilarityReportMeta($statusResp);
|
||||
if ($meta['score'] > 0) {
|
||||
$score = $meta['score'];
|
||||
}
|
||||
|
||||
$this->updateCheck($checkId, [
|
||||
'state' => 3,
|
||||
'similarity_score' => $score,
|
||||
|
||||
@@ -23,6 +23,9 @@ use think\Exception;
|
||||
* INGEST_CHAIN_FIRST_DELAY 上传后首次 ingest 检查延迟秒数,默认 10(队列链)
|
||||
* INGEST_CHAIN_POLL_INTERVAL ingest 链每步间隔秒数,默认 15
|
||||
* INGEST_CHAIN_MAX_ATTEMPTS ingest 链最大步数,默认 80
|
||||
* EXCLUDE_QUOTES / EXCLUDE_BIBLIOGRAPHY / EXCLUDE_CITATIONS 0|1,默认 0(与 Crossref 网页手动查重更接近)
|
||||
* VIEWER_DEFAULT_MODE match_overview | all_sources(默认 all_sources,便于按来源库分类查看)
|
||||
* ADD_TO_INDEX 0|1,默认 1
|
||||
*
|
||||
* API 文档:https://developers.turnitin.com/docs/tca
|
||||
*
|
||||
@@ -135,22 +138,7 @@ class TurnitinService
|
||||
*/
|
||||
public function triggerSimilarity($submissionId, $opts = [])
|
||||
{
|
||||
$body = array_merge([
|
||||
'generation_settings' => [
|
||||
'search_repositories' => ['INTERNET', 'PUBLICATION', 'CROSSREF', 'CROSSREF_POSTED_CONTENT', 'SUBMITTED_WORK'],
|
||||
// 服务端类型为 List<String>,传 true 会 400:Cannot deserialize ... from Boolean
|
||||
'submission_auto_excludes' => [],
|
||||
'auto_exclude_self_matching_scope' => 'GROUP_CONTEXT',
|
||||
],
|
||||
'view_settings' => [
|
||||
'exclude_quotes' => true,
|
||||
'exclude_bibliography' => true,
|
||||
'exclude_citations' => true,
|
||||
],
|
||||
'indexing_settings' => [
|
||||
'add_to_index' => true,
|
||||
],
|
||||
], $opts);
|
||||
$body = array_merge($this->defaultSimilarityPayload(), $opts);
|
||||
|
||||
return $this->request(
|
||||
'PUT',
|
||||
@@ -159,6 +147,211 @@ class TurnitinService
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* PUT /similarity 与 PDF 导出共用的默认参数。
|
||||
* 此前固定 exclude_*=true 时,总相似度会低于 Crossref 网页手动查重(与「匹配来源编号/类型」无关)。
|
||||
*/
|
||||
public function defaultSimilarityPayload()
|
||||
{
|
||||
$scope = trim((string) Env::get('turnitin.auto_exclude_self_matching_scope', 'GROUP_CONTEXT'));
|
||||
if ($scope === '') {
|
||||
unset($scope);
|
||||
}
|
||||
|
||||
$generation = [
|
||||
'search_repositories' => ['INTERNET', 'PUBLICATION', 'CROSSREF', 'CROSSREF_POSTED_CONTENT', 'SUBMITTED_WORK'],
|
||||
'submission_auto_excludes' => [],
|
||||
];
|
||||
if (isset($scope)) {
|
||||
$generation['auto_exclude_self_matching_scope'] = $scope;
|
||||
}
|
||||
|
||||
return [
|
||||
'generation_settings' => $generation,
|
||||
'view_settings' => $this->defaultViewSettings(),
|
||||
'indexing_settings' => [
|
||||
'add_to_index' => $this->envBool('turnitin.add_to_index', true),
|
||||
],
|
||||
];
|
||||
}
|
||||
|
||||
public function defaultViewSettings()
|
||||
{
|
||||
return [
|
||||
'exclude_quotes' => $this->envBool('turnitin.exclude_quotes', false),
|
||||
'exclude_bibliography' => $this->envBool('turnitin.exclude_bibliography', false),
|
||||
'exclude_citations' => $this->envBool('turnitin.exclude_citations', false),
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* 从 GET /similarity 响应解析总相似度(0–100)。
|
||||
* 兼容 overall_match_percentage 在 message 嵌套、以及 0–1 小数形式。
|
||||
*/
|
||||
public static function extractOverallMatchPercentage(array $statusResp)
|
||||
{
|
||||
$candidates = [];
|
||||
|
||||
$push = function ($v) use (&$candidates) {
|
||||
if ($v === null || $v === '') {
|
||||
return;
|
||||
}
|
||||
if (is_numeric($v)) {
|
||||
$candidates[] = floatval($v);
|
||||
}
|
||||
};
|
||||
|
||||
$push($statusResp['overall_match_percentage'] ?? null);
|
||||
$push($statusResp['overall_match'] ?? null);
|
||||
$push($statusResp['similarity_percentage'] ?? null);
|
||||
|
||||
$msg = $statusResp;
|
||||
if (isset($statusResp['message']) && is_array($statusResp['message'])) {
|
||||
$msg = $statusResp['message'];
|
||||
}
|
||||
$push($msg['overall_match_percentage'] ?? null);
|
||||
$push($msg['overall_match'] ?? null);
|
||||
if (isset($msg['similarity']) && is_array($msg['similarity'])) {
|
||||
$sim = $msg['similarity'];
|
||||
$push($sim['overall_match_percentage'] ?? null);
|
||||
$push($sim['overall_match'] ?? null);
|
||||
}
|
||||
|
||||
foreach ($candidates as $n) {
|
||||
if ($n > 0 && $n <= 1.0) {
|
||||
$scaled = round($n * 100, 2);
|
||||
if ($scaled > 1.0 || $n < 0.05) {
|
||||
return $scaled;
|
||||
}
|
||||
}
|
||||
if ($n >= 0) {
|
||||
return round($n, 2);
|
||||
}
|
||||
}
|
||||
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
/**
|
||||
* 从 GET /similarity 响应中尽量提取「按来源」的摘要(供列表展示;完整明细仍在 Turnitin 在线报告里)。
|
||||
*
|
||||
* @return array{score:float,sources:array<int,array<string,mixed>>}
|
||||
*/
|
||||
public static function parseSimilarityReportMeta(array $statusResp)
|
||||
{
|
||||
$meta = [
|
||||
'score' => self::extractOverallMatchPercentage($statusResp),
|
||||
'sources' => [],
|
||||
];
|
||||
|
||||
$candidates = [];
|
||||
self::collectSimilaritySourceNodes($statusResp, $candidates, 0);
|
||||
if (isset($statusResp['message']) && is_array($statusResp['message'])) {
|
||||
self::collectSimilaritySourceNodes($statusResp['message'], $candidates, 0);
|
||||
}
|
||||
|
||||
$seen = [];
|
||||
foreach ($candidates as $node) {
|
||||
if (!is_array($node)) {
|
||||
continue;
|
||||
}
|
||||
$pct = null;
|
||||
foreach (['percentage', 'match_percentage', 'overall_match_percentage', 'similarity_percentage'] as $k) {
|
||||
if (isset($node[$k]) && is_numeric($node[$k])) {
|
||||
$pct = floatval($node[$k]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
$repo = '';
|
||||
foreach (['repository', 'repository_name', 'collection', 'source_type', 'type', 'database', 'category'] as $k) {
|
||||
if (!empty($node[$k])) {
|
||||
$repo = strtoupper(trim((string) $node[$k]));
|
||||
break;
|
||||
}
|
||||
}
|
||||
$words = isset($node['matched_word_count']) ? intval($node['matched_word_count'])
|
||||
: (isset($node['word_count']) ? intval($node['word_count']) : 0);
|
||||
$key = $repo . '|' . ($pct !== null ? $pct : '') . '|' . $words;
|
||||
if (isset($seen[$key])) {
|
||||
continue;
|
||||
}
|
||||
$seen[$key] = true;
|
||||
$meta['sources'][] = array_filter([
|
||||
'repository' => $repo,
|
||||
'match_percentage' => $pct,
|
||||
'matched_word_count' => $words > 0 ? $words : null,
|
||||
], function ($v) {
|
||||
return $v !== null && $v !== '';
|
||||
});
|
||||
}
|
||||
|
||||
return $meta;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<string,mixed> $node
|
||||
* @param array<int,mixed> $out
|
||||
*/
|
||||
private static function collectSimilaritySourceNodes($node, array &$out, $depth)
|
||||
{
|
||||
if ($depth > 8 || !is_array($node)) {
|
||||
return;
|
||||
}
|
||||
$hasRepo = false;
|
||||
foreach (['repository', 'repository_name', 'collection', 'source_type'] as $k) {
|
||||
if (!empty($node[$k])) {
|
||||
$hasRepo = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ($hasRepo) {
|
||||
$out[] = $node;
|
||||
}
|
||||
foreach ($node as $v) {
|
||||
if (is_array($v)) {
|
||||
if (isset($v[0]) && is_array($v[0])) {
|
||||
foreach ($v as $item) {
|
||||
self::collectSimilaritySourceNodes($item, $out, $depth + 1);
|
||||
}
|
||||
} else {
|
||||
self::collectSimilaritySourceNodes($v, $out, $depth + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 在线 Similarity Report 默认视图(与 Crossref 后台「按来源查看」对齐)。
|
||||
*/
|
||||
public function defaultViewerSimilarityBlock()
|
||||
{
|
||||
$mode = strtolower(trim((string) Env::get('turnitin.viewer_default_mode', 'all_sources')));
|
||||
if (!in_array($mode, ['match_overview', 'all_sources'], true)) {
|
||||
$mode = 'all_sources';
|
||||
}
|
||||
|
||||
return [
|
||||
'default_mode' => $mode,
|
||||
'modes' => [
|
||||
'match_overview' => true,
|
||||
'all_sources' => true,
|
||||
],
|
||||
];
|
||||
}
|
||||
|
||||
private function envBool($name, $default = false)
|
||||
{
|
||||
$v = Env::get($name, $default ? '1' : '0');
|
||||
if ($v === true) {
|
||||
return true;
|
||||
}
|
||||
if ($v === false) {
|
||||
return false;
|
||||
}
|
||||
$v = strtolower(trim((string) $v));
|
||||
return in_array($v, ['1', 'true', 'yes', 'on'], true);
|
||||
}
|
||||
|
||||
/**
|
||||
* 查询 submission 详情(上传后用于轮询是否解析完成)。
|
||||
* GET /submissions/{id}
|
||||
@@ -346,13 +539,7 @@ class TurnitinService
|
||||
$bodies[] = [
|
||||
'viewer_default_permission_set' => $perm,
|
||||
'locale' => $locale,
|
||||
'similarity' => [
|
||||
'default_mode' => 'match_overview',
|
||||
'modes' => [
|
||||
'match_overview' => true,
|
||||
'all_sources' => true,
|
||||
],
|
||||
],
|
||||
'similarity' => $this->defaultViewerSimilarityBlock(),
|
||||
];
|
||||
// 最简请求体(部分 Crossref 租户只接受 permission + locale)
|
||||
$bodies[] = [
|
||||
@@ -386,12 +573,13 @@ class TurnitinService
|
||||
public function requestPdfReport($submissionId, $opts = [])
|
||||
{
|
||||
$body = array_merge([
|
||||
'locale' => 'en-US',
|
||||
'locale' => trim((string) Env::get('turnitin.viewer_locale', 'en-US')) ?: 'en-US',
|
||||
'view_settings' => $this->defaultViewSettings(),
|
||||
], $opts);
|
||||
|
||||
return $this->request(
|
||||
'POST',
|
||||
'/submissions/' . urlencode($submissionId) . '/similarity/pdf',
|
||||
'/submissions/' . rawurlencode($submissionId) . '/similarity/pdf',
|
||||
$body
|
||||
);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user