insertGetId([ 'article_id' => intval($this->arrGet($extra, 'article_id', 0)), 'am_id' => intval($this->arrGet($extra, 'am_id', 0)), 'p_article_id' => intval($this->arrGet($extra, 'p_article_id', 0)), 'p_refer_id' => intval($this->arrGet($extra, 'p_refer_id', 0)), 'refer_index' => intval($this->arrGet($extra, 'refer_index', 0)), 'reference_no' => intval($this->arrGet($extra, 'reference_no', 0)), 'reference_raw' => (string)$this->arrGet($extra, 'reference_raw', ''), 'cite_tag_start' => intval($this->arrGet($extra, 'cite_tag_start', 0)), 'cite_tag_end' => intval($this->arrGet($extra, 'cite_tag_end', 0)), 'text_start' => intval($this->arrGet($extra, 'text_start', 0)), 'text_end' => intval($this->arrGet($extra, 'text_end', 0)), 'content_a' => $contentA, 'content_b' => trim($contentB), 'status' => 0, 'created_at' => $now, 'updated_at' => $now, ]); $amId = intval($this->arrGet($extra, 'am_id', 0)); if ($amId > 0) { $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING); } $this->pushJob(intval($checkId), intval($this->arrGet($extra, 'queue_delay', 0))); return ['check_id' => $checkId, 'queued' => 1]; } public function enqueueByArticleMain($main){ $amId = $main['am_id']; // $main = Db::name('article_main') // ->field('am_id,content,article_id') // ->where('am_id', $amId) // ->whereIn('state', [0, 2]) // ->find(); $citations = $this->extractReferences((string)$main['content']); // return $citations; if (empty($citations)) { $this->setAmRefCheckStatus($amId, self::AM_STATUS_NONE); return; } $prod = Db::name('production_article') ->where('article_id', $main['article_id']) ->where('state', 0) ->find(); if (empty($prod)) { throw new \RuntimeException('production_article not found for article_id=' . $main['article_id']); } $pArticleId = intval($prod['p_article_id']); $referMap = $this->loadReferMapByPArticleId($pArticleId); if (empty($citations)) { $this->setAmRefCheckStatus($amId, self::AM_STATUS_PASS); return; } $skipped = 0; $delay = 0; foreach ($citations as $cite) { foreach ($cite['reference_numbers'] as $refNo) { $referIndex = $refNo - 1; if ($referIndex < 0 || !isset($referMap[$referIndex])) { $skipped++; continue; } $refer = $referMap[$referIndex]; $referText = $this->formatReferForLlm($refer); $now = date('Y-m-d H:i:s'); // [70-73] 展开为 reference_no=70,71,72,73 共 4 条记录 $checkId = Db::name('article_reference_check_result')->insertGetId([ 'article_id' => $main['article_id'], 'p_article_id' => $pArticleId, 'am_id' => intval($main['am_id']), 'reference_no' => $refNo, 'refer_index' => $refNo, 'origin_text' => $cite['original_text'], 'refer_text' => $referText, 'p_refer_id' => $referMap[$referIndex]['p_refer_id'], 'text_start' => $cite['text_start'], 'text_end' => $cite['text_end'], 'created_at' => $now, 'updated_at' => $now, ]); $this->pushJob(intval($checkId), $delay); $checkIds[] = $checkId; $delay += 1; } } $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING); } public function enqueueByArticle($articleId){ if ($articleId <= 0) { throw new \InvalidArgumentException('article_id is required'); } $prod = Db::name('production_article') ->where('article_id', $articleId) ->where('state', 0) ->find(); if (empty($prod)) { throw new \RuntimeException('production_article not found for article_id=' . $articleId); } $pArticleId = intval($prod['p_article_id']); $referMap = $this->loadReferMapByPArticleId($pArticleId); $mains = Db::name('article_main') ->field('am_id,content,article_id') ->where('article_id', $articleId) ->whereIn('state', [0, 2]) ->order('sort asc') ->select(); if (empty($mains)) { throw new \RuntimeException('article_main is empty'); } $queued = 0; $skipped = 0; $checkIds = []; $delay = 0; $amIdsWithJobs = []; foreach ($mains as $main) { $amId = intval($main['am_id']); $citations = $this->extractReferences((string)$main['content']); if (empty($citations)) { $this->setAmRefCheckStatus($amId, self::AM_STATUS_NONE); continue; } foreach ($citations as $cite) { foreach ($cite['reference_numbers'] as $refNo) { $referIndex = $refNo - 1; if ($referIndex < 0 || !isset($referMap[$referIndex])) { $skipped++; continue; } $refer = $referMap[$referIndex]; $referText = $this->formatReferForLlm($refer); $now = date('Y-m-d H:i:s'); // [70-73] 展开为 reference_no=70,71,72,73 共 4 条记录 $checkId = Db::name('article_reference_check_result')->insertGetId([ 'article_id' => $main['article_id'], 'p_article_id' => $pArticleId, 'am_id' => intval($main['am_id']), 'reference_no' => $refNo, 'refer_index' => $refNo, 'origin_text' => $cite['original_text'], 'refer_text' => $referText, 'p_refer_id' => $referMap[$referIndex]['p_refer_id'], 'text_start' => $cite['text_start'], 'text_end' => $cite['text_end'], 'created_at' => $now, 'updated_at' => $now, ]); $this->pushJob(intval($checkId), $delay); $checkIds[] = $checkId; $queued++; $delay += 1; $amIdsWithJobs[$amId] = true; } } } foreach (array_keys($amIdsWithJobs) as $amId) { $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING); } return [ 'article_id' => $articleId, 'p_article_id' => $pArticleId, 'queued' => $queued, 'skipped' => $skipped, 'check_ids' => $checkIds, 'queue' => self::QUEUE_NAME, ]; } /** * 根据该节全部明细行汇总更新 t_article_main.ref_check_status */ public function syncAmRefCheckStatus($amId) { if ($amId <= 0) { return self::AM_STATUS_NONE; } $rows = Db::name('article_reference_check_result')->where('am_id', $amId)->select(); if (empty($rows)) { $this->setAmRefCheckStatus($amId, self::AM_STATUS_NONE); return self::AM_STATUS_NONE; } $pending = 0; $hasFail = false; $done = 0; foreach ($rows as $row) { $st = intval($row['status']); if ($st === 0) { $pending++; continue; } if ($st === 2 || ($st === 1 && intval($row['is_match']) === 0)) { $hasFail = true; } if ($st === 1) { $done++; } } if ($pending > 0) { $status = self::AM_STATUS_RUNNING; } elseif ($hasFail) { $status = self::AM_STATUS_FAIL; } elseif ($done === count($rows)) { $status = self::AM_STATUS_PASS; } else { $status = self::AM_STATUS_FAIL; } $this->setAmRefCheckStatus($amId, $status); return $status; } public function setAmRefCheckStatus($amId, $status) { if ($amId <= 0) { return; } Db::name('article_main')->where('am_id', $amId)->update([ 'ref_check_status' => $status, ]); } public function clearArticleChecks($articleId) { Db::name('article_reference_check_result')->where('article_id', $articleId)->delete(); Db::name('article_main') ->where('article_id', $articleId) ->whereIn('state', [0, 2]) ->update(['ref_check_status' => self::AM_STATUS_NONE]); } public static function amStatusLabel($status) { $map = [ self::AM_STATUS_NONE => 'none', self::AM_STATUS_PASS => 'pass', self::AM_STATUS_FAIL => 'fail', self::AM_STATUS_RUNNING => 'running', ]; return isset($map[$status]) ? $map[$status] : 'unknown'; } public function getResult($checkId) { if ($checkId <= 0) { return null; } $row = Db::name('article_reference_check_result')->where('check_id', $checkId)->find(); return $row ?: null; } public function listByArticle($articleId, $status = -1, $onlyMismatch = false) { $q = Db::name('article_reference_check_result')->where('article_id', $articleId); if ($status >= 0) { $q->where('status', $status); } if ($onlyMismatch) { $q->where('status', 1)->where('is_match', 0); } return $q->order('am_id asc, cite_tag_start asc, reference_no asc')->select(); } /** * 稿件预览:在 content 上标记不合理引用序号与引用句 * * @return array{sections: array, issues: array, stats: array} */ public function buildArticlePreview($articleId, $amId = 0) { $q = Db::name('article_main') ->field('am_id,content,sort,ref_check_status') ->where('article_id', $articleId) ->whereIn('state', [0, 2]); if ($amId > 0) { $q->where('am_id', $amId); } $mains = $q->order('sort asc')->select(); $rows = $this->listByArticle($articleId, 1); $badByAm = $this->indexBadResults($rows); $sections = []; $issues = []; $stats = ['total' => 0, 'mismatch' => 0, 'match' => 0, 'pending' => 0]; foreach ($this->listByArticle($articleId, -1) as $r) { $stats['total']++; if (intval($r['status']) === 0) { $stats['pending']++; } elseif (intval($r['is_match']) === 1) { $stats['match']++; } else { $stats['mismatch']++; } } foreach ($mains as $main) { $id = intval($main['am_id']); $content = (string)$main['content']; $badIndex = isset($badByAm[$id]) ? $badByAm[$id] : array(); $marked = $this->markContentForPreview($content, $id, $badIndex); $amStatus = intval($this->arrGet($main, 'ref_check_status', 0)); $sections[] = [ 'am_id' => $id, 'ref_check_status' => $amStatus, 'ref_check_pass' => $amStatus === self::AM_STATUS_PASS, 'ref_check_label' => self::amStatusLabel($amStatus), 'content' => $content, 'content_marked' => $marked['html'], 'issue_count' => $marked['issue_count'], ]; foreach ($marked['issues'] as $issue) { $issues[] = $issue; } } $articlePass = $this->resolveArticlePass($sections); return [ 'article_id' => $articleId, 'article_ref_check_pass' => $articlePass, 'sections' => $sections, 'issues' => $issues, 'stats' => $stats, ]; } /** * 全文是否通过:各节均为 pass,且无 running/fail(无引用节忽略) */ private function resolveArticlePass($sections) { $hasChecked = false; foreach ($sections as $sec) { $st = intval($this->arrGet($sec, 'ref_check_status', 0)); if ($st === self::AM_STATUS_NONE) { continue; } $hasChecked = true; if ($st !== self::AM_STATUS_PASS) { return false; } } return $hasChecked ? true : null; } /** * @param array $rows status=1 的检测结果 * @return array am_id => indexed bad map */ private function indexBadResults($rows) { $byAm = []; foreach ($rows as $row) { if (intval($row['status']) !== 1 || intval($row['is_match']) === 1) { continue; } $amId = intval($row['am_id']); $refNo = intval($row['reference_no']); if ($amId <= 0 || $refNo <= 0) { continue; } if (!isset($byAm[$amId])) { $byAm[$amId] = ['by_raw' => [], 'contexts' => []]; } $rawKey = $this->normalizeRefRawKey((string)$this->arrGet($row, 'reference_raw', '')); if ($rawKey !== '') { $byAm[$amId]['by_raw'][$rawKey][$refNo] = $row; } $ctxKey = intval($row['text_start']) . '_' . intval($row['text_end']); if (!isset($byAm[$amId]['contexts'][$ctxKey])) { $byAm[$amId]['contexts'][$ctxKey] = [ 'text_start' => intval($row['text_start']), 'text_end' => intval($row['text_end']), 'check_ids' => [], 'reasons' => [], 'ref_nos' => [], ]; } $byAm[$amId]['contexts'][$ctxKey]['check_ids'][] = intval($row['check_id']); $byAm[$amId]['contexts'][$ctxKey]['ref_nos'][] = $refNo; $reason = trim((string)$this->arrGet($row, 'reason', '')); if ($reason !== '') { $byAm[$amId]['contexts'][$ctxKey]['reasons'][$refNo] = $reason; } } return $byAm; } private function normalizeRefRawKey($raw) { $raw = str_replace( [',', '–', '—', '−', '‐', '‑', ' '], [',', '-', '-', '-', '-', '-', ''], trim($raw) ); return strtolower($raw); } /** * @param array $badIndex indexBadResults 中单 am 的结构 */ private function markContentForPreview($content, $amId, $badIndex) { $badByRaw = isset($badIndex['by_raw']) ? $badIndex['by_raw'] : array(); $contexts = isset($badIndex['contexts']) ? $badIndex['contexts'] : array(); $issues = array(); $issueCount = 0; if ($content === '' || (empty($badByRaw) && empty($contexts))) { return array('html' => $content, 'issues' => array(), 'issue_count' => 0); } $html = $content; // 1) 先标记 blue 内各序号(在原文上操作,[70-73] 仅标不合理者如 70、71) preg_match_all( '/\[([\d,\-\s]+)\]<\/blue>/', $html, $matches, PREG_OFFSET_CAPTURE ); $citeDeltas = []; if (!empty($matches[0])) { $replacements = []; foreach ($matches[0] as $idx => $match) { $fullTag = $match[0]; $tagStart = $match[1]; $tagEnd = $tagStart + strlen($fullTag); $inner = $matches[1][$idx][0]; $rawKey = $this->normalizeRefRawKey($inner); $badNums = isset($badByRaw[$rawKey]) ? $badByRaw[$rawKey] : array(); $innerMarked = preg_replace_callback( '/\d+/', function ($numMatch) use ($badNums, &$issues, &$issueCount, $amId, $inner) { $num = intval($numMatch[0]); if (!isset($badNums[$num])) { return $numMatch[0]; } $row = $badNums[$num]; $rowReason = isset($row['reason']) ? $row['reason'] : ''; $issueCount++; $issues[] = array( 'am_id' => $amId, 'check_id' => intval($row['check_id']), 'reference_no' => $num, 'reference_raw' => $inner, 'reason' => $rowReason, 'confidence' => floatval(isset($row['confidence']) ? $row['confidence'] : 0), ); $title = htmlspecialchars( '引用[' . $num . ']不合理: ' . $rowReason, ENT_QUOTES, 'UTF-8' ); return '' . $numMatch[0] . ''; }, $inner ); $tagClass = !empty($badNums) ? ' ref-cite-error' : ''; $groupIds = !empty($badNums) ? implode(',', array_map('intval', array_column($badNums, 'check_id'))) : ''; $newHtml = '[' . $innerMarked . ']'; $replacements[] = [ 'start' => $tagStart, 'end' => $tagEnd, 'html' => $newHtml, 'delta' => strlen($newHtml) - ($tagEnd - $tagStart), ]; } usort($replacements, function ($a, $b) { return $b['start'] - $a['start']; }); foreach ($replacements as $rep) { $html = substr($html, 0, $rep['start']) . $rep['html'] . substr($html, $rep['end']); $citeDeltas[] = ['start' => $rep['start'], 'delta' => $rep['delta']]; } } $shiftByCite = function ($pos) use ($citeDeltas) { $d = 0; foreach ($citeDeltas as $cd) { if ($cd['start'] < $pos) { $d += $cd['delta']; } } return $pos + $d; }; // 2) 再标记引用句(从后往前) if (!empty($contexts)) { $spans = array_values($contexts); usort($spans, function ($a, $b) { return $b['text_start'] - $a['text_start']; }); foreach ($spans as $span) { $start = $span['text_start']; $end = $span['text_end']; if ($start < 0 || $end <= $start) { continue; } $s = $shiftByCite($start); $e = $shiftByCite($end); if ($e > strlen($html)) { $e = strlen($html); } $checkIds = array_values(array_unique($span['check_ids'])); $refNos = array_values(array_unique($span['ref_nos'])); sort($refNos); $reasonParts = []; foreach ($refNos as $rn) { if (!empty($span['reasons'][$rn])) { $reasonParts[] = '[' . $rn . '] ' . $span['reasons'][$rn]; } } $title = htmlspecialchars( '引用句可能不合理: ' . implode('; ', $reasonParts), ENT_QUOTES, 'UTF-8' ); $open = ''; $close = ''; $html = substr($html, 0, $s) . $open . substr($html, $s, $e - $s) . $close . substr($html, $e); } } return ['html' => $html, 'issues' => $issues, 'issue_count' => $issueCount]; } /** * @return array refer_index => row */ public function loadReferMapByPArticleId($pArticleId) { $map = []; if ($pArticleId <= 0) { return $map; } $rows = Db::name('production_article_refer') ->where('p_article_id', $pArticleId) ->where('state', 0) ->order('index asc') ->select(); foreach ($rows as $row) { $map[intval($row['index'])] = $row; } return $map; } public function formatReferForLlm($refer) { $parts = []; foreach (['title', 'author', 'joura', 'dateno', 'refer_doi', 'doilink'] as $f) { $v = trim((string)$this->arrGet($refer, $f, '')); if ($v !== '') { $parts[] = ucfirst($f) . ': ' . $v; } } $content = trim((string)$this->arrGet($refer, 'refer_content', '')); if ($content !== '') { $parts[] = 'Reference: ' . $content; } return implode("\n", $parts); } /** * 从 article_main.content 提取 blue 引用 */ public function extractReferences($content) { $result = []; preg_match_all('/\[([\d,\-\s]+)\]<\/blue>/', $content, $matches, PREG_OFFSET_CAPTURE); if (empty($matches[0])) { return []; } $tagSpans = []; foreach ($matches[0] as $index => $match) { $tagSpans[] = [ 'start' => $match[1], 'end' => $match[1] + strlen($match[0]), 'index' => $index, ]; } foreach ($matches[0] as $index => $match) { $fullTag = $match[0]; $tagStart = $match[1]; $tagEnd = $tagStart + strlen($fullTag); $rawRef = trim($matches[1][$index][0]); $referenceNumbers = $this->expandReferenceNumbers($rawRef); list($localStart, $localEnd, $originalText) = $this->extractLocalCitationContext( $content, $tagStart, $tagEnd, $tagSpans ); if (!$this->isMeaningfulCitationContext($originalText) || empty($referenceNumbers)) { continue; } $result[] = [ 'reference_raw' => $rawRef, 'reference_numbers' => $referenceNumbers, 'original_text' => $originalText, 'reference_start' => $tagStart, 'reference_end' => $tagEnd, 'text_start' => $localStart, 'text_end' => $localEnd, ]; } return $result; } /** * 按引用位置截取局部上下文:优先取标签前叙述;同句多引时后续引用从上一标签后开始。 */ private function extractLocalCitationContext($content, $tagStart, $tagEnd, array $tagSpans) { $paragraphStart = $this->findParagraphStart($content, $tagStart); $sentenceEnd = $this->findSentenceEnd($content, $tagEnd, $tagEnd); $prevTagEnd = $paragraphStart; $nextTagStart = $sentenceEnd; foreach ($tagSpans as $span) { if ($span['end'] <= $tagStart && $span['end'] > $prevTagEnd) { $prevTagEnd = $span['end']; } if ($span['start'] > $tagEnd && $span['start'] < $nextTagStart) { $nextTagStart = $span['start']; } } $hasPriorCiteInParagraph = ($prevTagEnd > $paragraphStart); // 同段后续引用:从上一 后开始;段内首个引用:从段落开头到本标签前(非仅最后一句) if ($hasPriorCiteInParagraph) { $localStart = $prevTagEnd; } else { $localStart = $this->capContextStartBeforeTag($content, $tagStart, $paragraphStart); } // 默认:引用标签前的论述(如 Yin et al. [13] → 含 “higher than … Yin et al.”) $localEnd = $tagStart; $originalText = $this->buildCitationContextText($content, $localStart, $localEnd); // 标签前几乎无正文(如句末 … ICU nurses [14])→ 改用标签后至下一引用或句末 if (!$this->isMeaningfulCitationContext($originalText) || $this->shouldUseTrailingCitationContext($content, $localStart, $tagStart, $tagEnd) ) { $trailEnd = ($nextTagStart < $sentenceEnd) ? $nextTagStart : $sentenceEnd; $trailText = $this->buildCitationContextText($content, $tagEnd, $trailEnd); if ($this->isMeaningfulCitationContext($trailText)) { $localStart = $tagEnd; $localEnd = $trailEnd; $originalText = $trailText; } } if (!$this->isMeaningfulCitationContext($originalText)) { list($localStart, $localEnd) = $this->widenCitationContextBounds( $content, $tagStart, $tagEnd, $localStart, $localEnd ); $originalText = $this->buildCitationContextText($content, $localStart, $localEnd); } return [$localStart, $localEnd, $originalText]; } /** * 标签前仅有作者缩写等极短片段时,改用标签后上下文 */ private function shouldUseTrailingCitationContext($content, $localStart, $tagStart, $tagEnd) { $before = $this->buildCitationContextText($content, $localStart, $tagStart); if (!$this->isMeaningfulCitationContext($before)) { return true; } return mb_strlen($before) < 25; } public function expandReferenceNumbers($refStr) { $refStr = str_replace( [',', '–', '—', '−', '‐', '‑'], [',', '-', '-', '-', '-', '-'], trim($refStr) ); $numbers = []; foreach (explode(',', $refStr) as $part) { $part = trim($part); if ($part === '') { continue; } if (preg_match('/^(\d+)\s*-\s*(\d+)$/', $part, $m)) { $start = intval($m[1]); $end = intval($m[2]); if ($start <= $end) { $numbers = array_merge($numbers, range($start, $end)); } } elseif (ctype_digit($part)) { $numbers[] = intval($part); } } return array_values(array_unique($numbers)); } /** * 返回 $bytePos 处 UTF-8 码点占用的最后一字节之后的位置(下一字符起始) */ private function utf8CharEnd($content, $bytePos) { $len = strlen($content); if ($bytePos < 0 || $bytePos >= $len) { return max(0, min($len, $bytePos + 1)); } $next = $bytePos + 1; while ($next < $len && (ord($content[$next]) & 0xC0) === 0x80) { $next++; } return $next; } /** * 按字节偏移截取(与 strpos/strlen 一致);勿用 mb_substr,否则遇中文前缀会截断英文词头 */ private function byteSubstr($content, $start, $end) { $length = max(0, $end - $start); if ($length === 0) { return ''; } return (string)mb_strcut($content, $start, $length, 'UTF-8'); } private function buildCitationContextText($content, $start, $end) { $text = $this->byteSubstr($content, $start, $end); $text = preg_replace('/\[[\d,\-\s]+\]<\/blue>/', '', $text); $text = trim(strip_tags($text)); $text = preg_replace('/\s+/u', ' ', $text); $text = ltrim($text, "\xEF\xBB\xBF"); return $text; } /** * 过滤仅标点、过短或无字母/汉字的上下文(如去掉标签后只剩 ".") */ private function isMeaningfulCitationContext($text) { $text = trim($text); if ($text === '') { return false; } if ($this->isOnlyPunctuationOrSpace($text)) { return false; } if (!preg_match('/[\p{L}\p{N}]/u', $text)) { return false; } return mb_strlen($text) >= 2; } private function isOnlyPunctuationOrSpace($text) { return preg_match('/^[\s\p{P}\p{S}]+$/u', $text) === 1; } /** * 首句过短时向前后各扩展一句(上限约 2000 字符) */ private function widenCitationContextBounds($content, $tagStart, $tagEnd, $start, $end) { $len = strlen($content); $maxSpan = 2000; if ($start > 0) { $prevStart = $this->findSentenceStart($content, max(0, $start - 1)); if ($prevStart < $start) { $start = $prevStart; } } $nextEnd = $this->findSentenceEnd($content, $end, $tagEnd); if ($nextEnd > $end && $nextEnd <= $len) { $end = $nextEnd; } if ($end - $start > $maxSpan) { $half = (int)floor($maxSpan / 2); $mid = (int)floor(($tagStart + $tagEnd) / 2); $start = max(0, $mid - $half); $end = min($len, $start + $maxSpan); } return [$start, $end]; } /** * 句号是否可作为句界(排除小数点、et al. 等缩写) */ private function isSentenceDelimiterAt($content, $pos, $delimiter) { $len = strlen($content); if ($delimiter !== '.' || $pos < 0 || $pos >= $len) { return true; } if ($pos > 0 && $pos + 1 < $len && ctype_digit($content[$pos - 1]) && ctype_digit($content[$pos + 1]) ) { return false; } $before = substr($content, max(0, $pos - 12), min(12, $pos)); if (preg_match('/\b(et\s+al|e\.g|i\.e|vs|etc|fig|no)\s*\.?\s*$/i', $before)) { return false; } $after = substr($content, $pos + 1, 24); if (preg_match('/^\s*\s*\[/', $after)) { return false; } return true; } /** * 段落起始(HTML / 换行),避免英文多句段落只取到最后一个句号后的一句 */ private function findParagraphStart($content, $tagStart) { $search = substr($content, 0, max(0, $tagStart)); if ($search === '') { return 0; } $best = 0; if (preg_match_all('/]*>/i', $search, $m, PREG_OFFSET_CAPTURE)) { $last = end($m[0]); $best = max($best, $last[1] + strlen($last[0])); } if (preg_match_all('/<\/p>\s*/i', $search, $m, PREG_OFFSET_CAPTURE)) { $last = end($m[0]); $best = max($best, $last[1] + strlen($last[0])); } if (preg_match_all('/\s*/i', $search, $m, PREG_OFFSET_CAPTURE)) { $last = end($m[0]); $best = max($best, $last[1] + strlen($last[0])); } $pos = strrpos($search, "\n\n"); if ($pos !== false) { $best = max($best, $pos + 2); } $pos = strrpos($search, "\n"); if ($pos !== false) { $best = max($best, $pos + 1); } return $best; } /** * 段落过长时从引用处向前截取上限,避免单次 LLM 上下文过大 */ private function capContextStartBeforeTag($content, $tagStart, $paragraphStart, $maxBytes = 2500) { if ($tagStart - $paragraphStart <= $maxBytes) { return $paragraphStart; } $start = $tagStart - $maxBytes; $slice = substr($content, $start, $tagStart - $start); if (preg_match('/[.!?。!?]\s+/u', $slice, $m, PREG_OFFSET_CAPTURE)) { $last = end($m[0]); $rel = $last[1] + strlen($last[0]); return $start + $rel; } return max($paragraphStart, $start); } private function findSentenceStart($content, $position) { $start = 0; foreach (['.', '。', '!', '?', "\n"] as $delimiter) { $pos = strrpos(substr($content, 0, $position), $delimiter); if ($pos !== false && $this->isSentenceDelimiterAt($content, $pos, $delimiter)) { $start = max($start, $this->utf8CharEnd($content, $pos)); } } return $start; } /** * @param int $searchFrom 从该字节位置起查找句末 * @param int $tagEnd 引用标签结束位置;用于跳过 后紧跟的孤立句号 */ private function findSentenceEnd($content, $searchFrom, $tagEnd = 0) { $length = strlen($content); $minPos = max(0, $searchFrom); while ($minPos < $length) { $endPositions = []; foreach (['.', '。', '!', '?', "\n"] as $delimiter) { $pos = strpos($content, $delimiter, $minPos); if ($pos !== false && $this->isSentenceDelimiterAt($content, $pos, $delimiter)) { $endPositions[] = $this->utf8CharEnd($content, $pos); } } if (empty($endPositions)) { return $length; } $end = min($endPositions); if ($tagEnd <= 0 || $end <= $tagEnd) { return $end; } $gap = substr($content, $tagEnd, $end - $tagEnd); $gapText = trim(strip_tags(preg_replace('/\[[\d,\-\s]+\]<\/blue>/', '', $gap))); if ($gapText !== '' && !$this->isOnlyPunctuationOrSpace($gapText)) { return $end; } $minPos = $end; } return $length; } private function pushJob($checkId, $delaySeconds = 0) { $jobClass = 'app\api\job\ReferenceCheck@fire'; $data = ['check_id' => $checkId]; try { if ($delaySeconds > 0) { $jobId = Queue::later($delaySeconds, $jobClass, $data, self::QUEUE_NAME); } else { $jobId = Queue::push($jobClass, $data, self::QUEUE_NAME); } var_dump("=====jobId:".$jobId); } catch (\Exception $e) { \think\Log::error('ReferenceCheck pushJob failed check_id=' . $checkId . ' ' . $e->getMessage()); throw $e; } } }