\s*\[/', $after)) {
return false;
}
return true;
}
/**
* 段落起始(HTML / 换行),避免英文多句段落只取到最后一个句号后的一句
*/
private function findParagraphStart($content, $tagStart)
{
$search = substr($content, 0, max(0, $tagStart));
if ($search === '') {
return 0;
}
$best = 0;
if (preg_match_all('/]*>/i', $search, $m, PREG_OFFSET_CAPTURE)) {
$last = end($m[0]);
$best = max($best, $last[1] + strlen($last[0]));
}
if (preg_match_all('/<\/p>\s*/i', $search, $m, PREG_OFFSET_CAPTURE)) {
$last = end($m[0]);
$best = max($best, $last[1] + strlen($last[0]));
}
if (preg_match_all('/
\s*/i', $search, $m, PREG_OFFSET_CAPTURE)) {
$last = end($m[0]);
$best = max($best, $last[1] + strlen($last[0]));
}
$pos = strrpos($search, "\n\n");
if ($pos !== false) {
$best = max($best, $pos + 2);
}
$pos = strrpos($search, "\n");
if ($pos !== false) {
$best = max($best, $pos + 1);
}
return $best;
}
/**
* 段落过长时从引用处向前截取上限,避免单次 LLM 上下文过大
*/
private function capContextStartBeforeTag($content, $tagStart, $paragraphStart, $maxBytes = 2500)
{
if ($tagStart - $paragraphStart <= $maxBytes) {
return $paragraphStart;
}
$start = $tagStart - $maxBytes;
$slice = substr($content, $start, $tagStart - $start);
if (preg_match('/[.!?。!?]\s+/u', $slice, $m, PREG_OFFSET_CAPTURE)) {
$rel = $m[0][1] + strlen($m[0][0]);
return $start + $rel;
}
return max($paragraphStart, $start);
}
private function findSentenceStart($content, $position)
{
$start = 0;
foreach (['.', '。', '!', '?', "\n"] as $delimiter) {
$pos = strrpos(substr($content, 0, $position), $delimiter);
if ($pos !== false && $this->isSentenceDelimiterAt($content, $pos, $delimiter)) {
$start = max($start, $this->utf8CharEnd($content, $pos));
}
}
return $start;
}
/**
* @param int $searchFrom 从该字节位置起查找句末
* @param int $tagEnd 引用标签结束位置;用于跳过
后紧跟的孤立句号
*/
private function findSentenceEnd($content, $searchFrom, $tagEnd = 0)
{
$length = strlen($content);
$minPos = max(0, $searchFrom);
while ($minPos < $length) {
$endPositions = [];
foreach (['.', '。', '!', '?', "\n"] as $delimiter) {
$pos = strpos($content, $delimiter, $minPos);
if ($pos !== false && $this->isSentenceDelimiterAt($content, $pos, $delimiter)) {
$endPositions[] = $this->utf8CharEnd($content, $pos);
}
}
if (empty($endPositions)) {
return $length;
}
$end = min($endPositions);
if ($tagEnd <= 0 || $end <= $tagEnd) {
return $end;
}
$gap = substr($content, $tagEnd, $end - $tagEnd);
$gapText = trim(strip_tags(preg_replace(self::BLUE_TAG_REGEX, '', $gap)));
if ($gapText !== '' && !$this->isOnlyPunctuationOrSpace($gapText)) {
return $end;
}
$minPos = $end;
}
return $length;
}
/**
* 已入库记录按文献编号正序入队(同号按 am_id、正文位置稳定排序)
*
* @param array $rows 元素含 check_id、reference_no,可选 am_id、text_start
*/
private function pushJobsSortedByReferenceNo(array $rows)
{
if (empty($rows)) {
return [];
}
usort($rows, function ($a, $b) {
if ($a['reference_no'] !== $b['reference_no']) {
return $a['reference_no'] - $b['reference_no'];
}
$amA = isset($a['am_id']) ? intval($a['am_id']) : 0;
$amB = isset($b['am_id']) ? intval($b['am_id']) : 0;
if ($amA !== $amB) {
return $amA - $amB;
}
$posA = isset($a['text_start']) ? intval($a['text_start']) : 0;
$posB = isset($b['text_start']) ? intval($b['text_start']) : 0;
return $posA - $posB;
});
$checkIds = [];
$delay = 0;
foreach ($rows as $row) {
$checkId = intval($row['check_id']);
$checkIds[] = $checkId;
$this->pushJob($checkId, $delay);
$delay++;
}
return $checkIds;
}
private function pushJob($checkId, $delaySeconds = 0)
{
$checkId = intval($checkId);
$this->clearReferenceCheckQueueLock($checkId);
$jobClass = 'app\api\job\ReferenceCheck@fire';
$data = ['check_id' => $checkId];
try {
if ($delaySeconds > 0) {
$jobId = Queue::later($delaySeconds, $jobClass, $data, self::QUEUE_NAME);
} else {
$jobId = Queue::push($jobClass, $data, self::QUEUE_NAME);
}
} catch (\Exception $e) {
\think\Log::error('ReferenceCheck pushJob failed check_id=' . $checkId . ' ' . $e->getMessage());
throw $e;
}
}
private function pushJob2($checkId, $delaySeconds = 0)
{
$jobClass = 'app\api\job\ReferenceCheckTwo@fire';
$data = ['check_id' => $checkId];
try {
if ($delaySeconds > 0) {
$jobId = Queue::later($delaySeconds, $jobClass, $data, self::QUEUE_NAME);
} else {
$jobId = Queue::push($jobClass, $data, self::QUEUE_NAME);
}
} catch (\Exception $e) {
\think\Log::error('ReferenceCheckTwo pushJob failed check_id=' . $checkId . ' ' . $e->getMessage());
throw $e;
}
}
}