作者的ai总结field

This commit is contained in:
wangjinlei
2026-05-25 13:20:00 +08:00
parent a4e645309e
commit ab7f08b06c
8 changed files with 245 additions and 31 deletions

View File

@@ -312,11 +312,20 @@ class PlagiarismService
]);
if ($status === 'COMPLETE') {
$score = isset($statusResp['overall_match_percentage'])
? floatval($statusResp['overall_match_percentage']) : 0;
$score = TurnitinService::extractOverallMatchPercentage($statusResp);
if ($score <= 0 && isset($statusResp['overall_match_percentage'])) {
$score = floatval($statusResp['overall_match_percentage']);
}
$this->log('poll complete check_id=' . $checkId . ' score=' . $score
. ' check_type=' . ($check['check_type'] ?? 'full'));
$localPdf = $this->downloadAndStorePdf($tii, $check['tii_submission_id'], $checkId);
$meta = TurnitinService::parseSimilarityReportMeta($statusResp);
if ($meta['score'] > 0) {
$score = $meta['score'];
}
$this->updateCheck($checkId, [
'state' => 3,
'similarity_score' => $score,

View File

@@ -23,6 +23,9 @@ use think\Exception;
* INGEST_CHAIN_FIRST_DELAY 上传后首次 ingest 检查延迟秒数,默认 10队列链
* INGEST_CHAIN_POLL_INTERVAL ingest 链每步间隔秒数,默认 15
* INGEST_CHAIN_MAX_ATTEMPTS ingest 链最大步数,默认 80
* EXCLUDE_QUOTES / EXCLUDE_BIBLIOGRAPHY / EXCLUDE_CITATIONS 0|1默认 0与 Crossref 网页手动查重更接近)
* VIEWER_DEFAULT_MODE match_overview | all_sources默认 all_sources便于按来源库分类查看
* ADD_TO_INDEX 0|1默认 1
*
* API 文档https://developers.turnitin.com/docs/tca
*
@@ -135,22 +138,7 @@ class TurnitinService
*/
public function triggerSimilarity($submissionId, $opts = [])
{
$body = array_merge([
'generation_settings' => [
'search_repositories' => ['INTERNET', 'PUBLICATION', 'CROSSREF', 'CROSSREF_POSTED_CONTENT', 'SUBMITTED_WORK'],
// 服务端类型为 List<String>,传 true 会 400Cannot deserialize ... from Boolean
'submission_auto_excludes' => [],
'auto_exclude_self_matching_scope' => 'GROUP_CONTEXT',
],
'view_settings' => [
'exclude_quotes' => true,
'exclude_bibliography' => true,
'exclude_citations' => true,
],
'indexing_settings' => [
'add_to_index' => true,
],
], $opts);
$body = array_merge($this->defaultSimilarityPayload(), $opts);
return $this->request(
'PUT',
@@ -159,6 +147,211 @@ class TurnitinService
);
}
/**
* PUT /similarity 与 PDF 导出共用的默认参数。
* 此前固定 exclude_*=true 时,总相似度会低于 Crossref 网页手动查重(与「匹配来源编号/类型」无关)。
*/
public function defaultSimilarityPayload()
{
$scope = trim((string) Env::get('turnitin.auto_exclude_self_matching_scope', 'GROUP_CONTEXT'));
if ($scope === '') {
unset($scope);
}
$generation = [
'search_repositories' => ['INTERNET', 'PUBLICATION', 'CROSSREF', 'CROSSREF_POSTED_CONTENT', 'SUBMITTED_WORK'],
'submission_auto_excludes' => [],
];
if (isset($scope)) {
$generation['auto_exclude_self_matching_scope'] = $scope;
}
return [
'generation_settings' => $generation,
'view_settings' => $this->defaultViewSettings(),
'indexing_settings' => [
'add_to_index' => $this->envBool('turnitin.add_to_index', true),
],
];
}
public function defaultViewSettings()
{
return [
'exclude_quotes' => $this->envBool('turnitin.exclude_quotes', false),
'exclude_bibliography' => $this->envBool('turnitin.exclude_bibliography', false),
'exclude_citations' => $this->envBool('turnitin.exclude_citations', false),
];
}
/**
* 从 GET /similarity 响应解析总相似度0100
* 兼容 overall_match_percentage 在 message 嵌套、以及 01 小数形式。
*/
public static function extractOverallMatchPercentage(array $statusResp)
{
$candidates = [];
$push = function ($v) use (&$candidates) {
if ($v === null || $v === '') {
return;
}
if (is_numeric($v)) {
$candidates[] = floatval($v);
}
};
$push($statusResp['overall_match_percentage'] ?? null);
$push($statusResp['overall_match'] ?? null);
$push($statusResp['similarity_percentage'] ?? null);
$msg = $statusResp;
if (isset($statusResp['message']) && is_array($statusResp['message'])) {
$msg = $statusResp['message'];
}
$push($msg['overall_match_percentage'] ?? null);
$push($msg['overall_match'] ?? null);
if (isset($msg['similarity']) && is_array($msg['similarity'])) {
$sim = $msg['similarity'];
$push($sim['overall_match_percentage'] ?? null);
$push($sim['overall_match'] ?? null);
}
foreach ($candidates as $n) {
if ($n > 0 && $n <= 1.0) {
$scaled = round($n * 100, 2);
if ($scaled > 1.0 || $n < 0.05) {
return $scaled;
}
}
if ($n >= 0) {
return round($n, 2);
}
}
return 0.0;
}
/**
* 从 GET /similarity 响应中尽量提取「按来源」的摘要(供列表展示;完整明细仍在 Turnitin 在线报告里)。
*
* @return array{score:float,sources:array<int,array<string,mixed>>}
*/
public static function parseSimilarityReportMeta(array $statusResp)
{
$meta = [
'score' => self::extractOverallMatchPercentage($statusResp),
'sources' => [],
];
$candidates = [];
self::collectSimilaritySourceNodes($statusResp, $candidates, 0);
if (isset($statusResp['message']) && is_array($statusResp['message'])) {
self::collectSimilaritySourceNodes($statusResp['message'], $candidates, 0);
}
$seen = [];
foreach ($candidates as $node) {
if (!is_array($node)) {
continue;
}
$pct = null;
foreach (['percentage', 'match_percentage', 'overall_match_percentage', 'similarity_percentage'] as $k) {
if (isset($node[$k]) && is_numeric($node[$k])) {
$pct = floatval($node[$k]);
break;
}
}
$repo = '';
foreach (['repository', 'repository_name', 'collection', 'source_type', 'type', 'database', 'category'] as $k) {
if (!empty($node[$k])) {
$repo = strtoupper(trim((string) $node[$k]));
break;
}
}
$words = isset($node['matched_word_count']) ? intval($node['matched_word_count'])
: (isset($node['word_count']) ? intval($node['word_count']) : 0);
$key = $repo . '|' . ($pct !== null ? $pct : '') . '|' . $words;
if (isset($seen[$key])) {
continue;
}
$seen[$key] = true;
$meta['sources'][] = array_filter([
'repository' => $repo,
'match_percentage' => $pct,
'matched_word_count' => $words > 0 ? $words : null,
], function ($v) {
return $v !== null && $v !== '';
});
}
return $meta;
}
/**
* @param array<string,mixed> $node
* @param array<int,mixed> $out
*/
private static function collectSimilaritySourceNodes($node, array &$out, $depth)
{
if ($depth > 8 || !is_array($node)) {
return;
}
$hasRepo = false;
foreach (['repository', 'repository_name', 'collection', 'source_type'] as $k) {
if (!empty($node[$k])) {
$hasRepo = true;
break;
}
}
if ($hasRepo) {
$out[] = $node;
}
foreach ($node as $v) {
if (is_array($v)) {
if (isset($v[0]) && is_array($v[0])) {
foreach ($v as $item) {
self::collectSimilaritySourceNodes($item, $out, $depth + 1);
}
} else {
self::collectSimilaritySourceNodes($v, $out, $depth + 1);
}
}
}
}
/**
* 在线 Similarity Report 默认视图(与 Crossref 后台「按来源查看」对齐)。
*/
public function defaultViewerSimilarityBlock()
{
$mode = strtolower(trim((string) Env::get('turnitin.viewer_default_mode', 'all_sources')));
if (!in_array($mode, ['match_overview', 'all_sources'], true)) {
$mode = 'all_sources';
}
return [
'default_mode' => $mode,
'modes' => [
'match_overview' => true,
'all_sources' => true,
],
];
}
private function envBool($name, $default = false)
{
$v = Env::get($name, $default ? '1' : '0');
if ($v === true) {
return true;
}
if ($v === false) {
return false;
}
$v = strtolower(trim((string) $v));
return in_array($v, ['1', 'true', 'yes', 'on'], true);
}
/**
* 查询 submission 详情(上传后用于轮询是否解析完成)。
* GET /submissions/{id}
@@ -346,13 +539,7 @@ class TurnitinService
$bodies[] = [
'viewer_default_permission_set' => $perm,
'locale' => $locale,
'similarity' => [
'default_mode' => 'match_overview',
'modes' => [
'match_overview' => true,
'all_sources' => true,
],
],
'similarity' => $this->defaultViewerSimilarityBlock(),
];
// 最简请求体(部分 Crossref 租户只接受 permission + locale
$bodies[] = [
@@ -386,12 +573,13 @@ class TurnitinService
public function requestPdfReport($submissionId, $opts = [])
{
$body = array_merge([
'locale' => 'en-US',
'locale' => trim((string) Env::get('turnitin.viewer_locale', 'en-US')) ?: 'en-US',
'view_settings' => $this->defaultViewSettings(),
], $opts);
return $this->request(
'POST',
'/submissions/' . urlencode($submissionId) . '/similarity/pdf',
'/submissions/' . rawurlencode($submissionId) . '/similarity/pdf',
$body
);
}