Files
tougao/application/common/ReferenceCheckService.php
2026-05-21 11:30:46 +08:00

856 lines
30 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<?php
namespace app\common;
use think\Db;
use think\Queue;
/**
* 正文 &lt;blue&gt;[n]&lt;/blue&gt; 引用与 t_production_article_referindex+1=n相关性校对。
* LLM 配置与 PromotionLlmService 相同;单条任务走 ReferenceCheck 队列。
*/
class ReferenceCheckService
{
const QUEUE_NAME = 'ReferenceCheck';
/** t_article_main.ref_check_status */
const AM_STATUS_NONE = 0;
const AM_STATUS_PASS = 1;
const AM_STATUS_FAIL = 2;
const AM_STATUS_RUNNING = 3;
/**
* 兼容无 ?? 的 PHP 版本
*/
private function arrGet($arr, $key, $default = '')
{
return isset($arr[$key]) ? $arr[$key] : $default;
}
/**
* 单条入队(可手工指定正文与文献文本)
*/
public function enqueue($contentA, $contentB, array $extra = [])
{
$contentA = trim($contentA);
if ($contentA === '') {
throw new \InvalidArgumentException('content_a is required');
}
$now = date('Y-m-d H:i:s');
$checkId = Db::name('article_reference_check_result')->insertGetId([
'article_id' => intval($this->arrGet($extra, 'article_id', 0)),
'am_id' => intval($this->arrGet($extra, 'am_id', 0)),
'p_article_id' => intval($this->arrGet($extra, 'p_article_id', 0)),
'p_refer_id' => intval($this->arrGet($extra, 'p_refer_id', 0)),
'refer_index' => intval($this->arrGet($extra, 'refer_index', 0)),
'reference_no' => intval($this->arrGet($extra, 'reference_no', 0)),
'reference_raw' => (string)$this->arrGet($extra, 'reference_raw', ''),
'cite_tag_start' => intval($this->arrGet($extra, 'cite_tag_start', 0)),
'cite_tag_end' => intval($this->arrGet($extra, 'cite_tag_end', 0)),
'text_start' => intval($this->arrGet($extra, 'text_start', 0)),
'text_end' => intval($this->arrGet($extra, 'text_end', 0)),
'content_a' => $contentA,
'content_b' => trim($contentB),
'status' => 0,
'created_at' => $now,
'updated_at' => $now,
]);
$amId = intval($this->arrGet($extra, 'am_id', 0));
if ($amId > 0) {
$this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING);
}
$this->pushJob(intval($checkId), intval($this->arrGet($extra, 'queue_delay', 0)));
return ['check_id' => $checkId, 'queued' => 1];
}
public function enqueueByArticleMain($main){
$amId = $main['am_id'];
// $main = Db::name('article_main')
// ->field('am_id,content,article_id')
// ->where('am_id', $amId)
// ->whereIn('state', [0, 2])
// ->find();
$citations = $this->extractReferences((string)$main['content']);
// return $citations;
if (empty($citations)) {
$this->setAmRefCheckStatus($amId, self::AM_STATUS_NONE);
return;
}
$prod = Db::name('production_article')
->where('article_id', $main['article_id'])
->where('state', 0)
->find();
if (empty($prod)) {
throw new \RuntimeException('production_article not found for article_id=' . $main['article_id']);
}
$pArticleId = intval($prod['p_article_id']);
$referMap = $this->loadReferMapByPArticleId($pArticleId);
if (empty($citations)) {
$this->setAmRefCheckStatus($amId, self::AM_STATUS_PASS);
return;
}
$skipped = 0;
$delay = 0;
foreach ($citations as $cite) {
foreach ($cite['reference_numbers'] as $refNo) {
$referIndex = $refNo - 1;
if ($referIndex < 0 || !isset($referMap[$referIndex])) {
$skipped++;
continue;
}
$refer = $referMap[$referIndex];
$referText = $this->formatReferForLlm($refer);
$now = date('Y-m-d H:i:s');
// [70-73] 展开为 reference_no=70,71,72,73 共 4 条记录
$checkId = Db::name('article_reference_check_result')->insertGetId([
'article_id' => $main['article_id'],
'p_article_id' => $pArticleId,
'am_id' => intval($main['am_id']),
'reference_no' => $refNo,
'refer_index' => $refNo,
'origin_text' => $cite['original_text'],
'refer_text' => $referText,
'p_refer_id' => $referMap[$referIndex]['p_refer_id'],
'text_start' => $cite['text_start'],
'text_end' => $cite['text_end'],
'created_at' => $now,
'updated_at' => $now,
]);
$this->pushJob(intval($checkId), $delay);
$checkIds[] = $checkId;
$delay += 1;
}
}
$this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING);
}
public function enqueueByArticle($articleId){
if ($articleId <= 0) {
throw new \InvalidArgumentException('article_id is required');
}
$prod = Db::name('production_article')
->where('article_id', $articleId)
->where('state', 0)
->find();
if (empty($prod)) {
throw new \RuntimeException('production_article not found for article_id=' . $articleId);
}
$pArticleId = intval($prod['p_article_id']);
$referMap = $this->loadReferMapByPArticleId($pArticleId);
$mains = Db::name('article_main')
->field('am_id,content,article_id')
->where('article_id', $articleId)
->whereIn('state', [0, 2])
->order('sort asc')
->select();
if (empty($mains)) {
throw new \RuntimeException('article_main is empty');
}
$queued = 0;
$skipped = 0;
$checkIds = [];
$delay = 0;
$amIdsWithJobs = [];
foreach ($mains as $main) {
$amId = intval($main['am_id']);
$citations = $this->extractReferences((string)$main['content']);
if (empty($citations)) {
$this->setAmRefCheckStatus($amId, self::AM_STATUS_NONE);
continue;
}
foreach ($citations as $cite) {
foreach ($cite['reference_numbers'] as $refNo) {
$referIndex = $refNo - 1;
if ($referIndex < 0 || !isset($referMap[$referIndex])) {
$skipped++;
continue;
}
$refer = $referMap[$referIndex];
$referText = $this->formatReferForLlm($refer);
$now = date('Y-m-d H:i:s');
// [70-73] 展开为 reference_no=70,71,72,73 共 4 条记录
$checkId = Db::name('article_reference_check_result')->insertGetId([
'article_id' => $main['article_id'],
'p_article_id' => $pArticleId,
'am_id' => intval($main['am_id']),
'reference_no' => $refNo,
'refer_index' => $refNo,
'origin_text' => $cite['original_text'],
'refer_text' => $referText,
'p_refer_id' => $referMap[$referIndex]['p_refer_id'],
'text_start' => $cite['text_start'],
'text_end' => $cite['text_end'],
'created_at' => $now,
'updated_at' => $now,
]);
$this->pushJob(intval($checkId), $delay);
$checkIds[] = $checkId;
$queued++;
$delay += 1;
$amIdsWithJobs[$amId] = true;
}
}
}
foreach (array_keys($amIdsWithJobs) as $amId) {
$this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING);
}
return [
'article_id' => $articleId,
'p_article_id' => $pArticleId,
'queued' => $queued,
'skipped' => $skipped,
'check_ids' => $checkIds,
'queue' => self::QUEUE_NAME,
];
}
/**
* 根据该节全部明细行汇总更新 t_article_main.ref_check_status
*/
public function syncAmRefCheckStatus($amId)
{
if ($amId <= 0) {
return self::AM_STATUS_NONE;
}
$rows = Db::name('article_reference_check_result')->where('am_id', $amId)->select();
if (empty($rows)) {
$this->setAmRefCheckStatus($amId, self::AM_STATUS_NONE);
return self::AM_STATUS_NONE;
}
$pending = 0;
$hasFail = false;
$done = 0;
foreach ($rows as $row) {
$st = intval($row['status']);
if ($st === 0) {
$pending++;
continue;
}
if ($st === 2 || ($st === 1 && intval($row['is_match']) === 0)) {
$hasFail = true;
}
if ($st === 1) {
$done++;
}
}
if ($pending > 0) {
$status = self::AM_STATUS_RUNNING;
} elseif ($hasFail) {
$status = self::AM_STATUS_FAIL;
} elseif ($done === count($rows)) {
$status = self::AM_STATUS_PASS;
} else {
$status = self::AM_STATUS_FAIL;
}
$this->setAmRefCheckStatus($amId, $status);
return $status;
}
public function setAmRefCheckStatus($amId, $status)
{
if ($amId <= 0) {
return;
}
Db::name('article_main')->where('am_id', $amId)->update([
'ref_check_status' => $status,
]);
}
public function clearArticleChecks($articleId)
{
Db::name('article_reference_check_result')->where('article_id', $articleId)->delete();
Db::name('article_main')
->where('article_id', $articleId)
->whereIn('state', [0, 2])
->update(['ref_check_status' => self::AM_STATUS_NONE]);
}
public static function amStatusLabel($status)
{
$map = [
self::AM_STATUS_NONE => 'none',
self::AM_STATUS_PASS => 'pass',
self::AM_STATUS_FAIL => 'fail',
self::AM_STATUS_RUNNING => 'running',
];
return isset($map[$status]) ? $map[$status] : 'unknown';
}
public function getResult($checkId)
{
if ($checkId <= 0) {
return null;
}
$row = Db::name('article_reference_check_result')->where('check_id', $checkId)->find();
return $row ?: null;
}
public function listByArticle($articleId, $status = -1, $onlyMismatch = false)
{
$q = Db::name('article_reference_check_result')->where('article_id', $articleId);
if ($status >= 0) {
$q->where('status', $status);
}
if ($onlyMismatch) {
$q->where('status', 1)->where('is_match', 0);
}
return $q->order('am_id asc, cite_tag_start asc, reference_no asc')->select();
}
/**
* 稿件预览:在 content 上标记不合理引用序号与引用句
*
* @return array{sections: array, issues: array, stats: array}
*/
public function buildArticlePreview($articleId, $amId = 0)
{
$q = Db::name('article_main')
->field('am_id,content,sort,ref_check_status')
->where('article_id', $articleId)
->whereIn('state', [0, 2]);
if ($amId > 0) {
$q->where('am_id', $amId);
}
$mains = $q->order('sort asc')->select();
$rows = $this->listByArticle($articleId, 1);
$badByAm = $this->indexBadResults($rows);
$sections = [];
$issues = [];
$stats = ['total' => 0, 'mismatch' => 0, 'match' => 0, 'pending' => 0];
foreach ($this->listByArticle($articleId, -1) as $r) {
$stats['total']++;
if (intval($r['status']) === 0) {
$stats['pending']++;
} elseif (intval($r['is_match']) === 1) {
$stats['match']++;
} else {
$stats['mismatch']++;
}
}
foreach ($mains as $main) {
$id = intval($main['am_id']);
$content = (string)$main['content'];
$badIndex = isset($badByAm[$id]) ? $badByAm[$id] : array();
$marked = $this->markContentForPreview($content, $id, $badIndex);
$amStatus = intval($this->arrGet($main, 'ref_check_status', 0));
$sections[] = [
'am_id' => $id,
'ref_check_status' => $amStatus,
'ref_check_pass' => $amStatus === self::AM_STATUS_PASS,
'ref_check_label' => self::amStatusLabel($amStatus),
'content' => $content,
'content_marked' => $marked['html'],
'issue_count' => $marked['issue_count'],
];
foreach ($marked['issues'] as $issue) {
$issues[] = $issue;
}
}
$articlePass = $this->resolveArticlePass($sections);
return [
'article_id' => $articleId,
'article_ref_check_pass' => $articlePass,
'sections' => $sections,
'issues' => $issues,
'stats' => $stats,
];
}
/**
* 全文是否通过:各节均为 pass且无 running/fail无引用节忽略
*/
private function resolveArticlePass($sections)
{
$hasChecked = false;
foreach ($sections as $sec) {
$st = intval($this->arrGet($sec, 'ref_check_status', 0));
if ($st === self::AM_STATUS_NONE) {
continue;
}
$hasChecked = true;
if ($st !== self::AM_STATUS_PASS) {
return false;
}
}
return $hasChecked ? true : null;
}
/**
* @param array $rows status=1 的检测结果
* @return array<int, array> am_id => indexed bad map
*/
private function indexBadResults($rows)
{
$byAm = [];
foreach ($rows as $row) {
if (intval($row['status']) !== 1 || intval($row['is_match']) === 1) {
continue;
}
$amId = intval($row['am_id']);
$refNo = intval($row['reference_no']);
if ($amId <= 0 || $refNo <= 0) {
continue;
}
if (!isset($byAm[$amId])) {
$byAm[$amId] = ['by_raw' => [], 'contexts' => []];
}
$rawKey = $this->normalizeRefRawKey((string)$this->arrGet($row, 'reference_raw', ''));
if ($rawKey !== '') {
$byAm[$amId]['by_raw'][$rawKey][$refNo] = $row;
}
$ctxKey = intval($row['text_start']) . '_' . intval($row['text_end']);
if (!isset($byAm[$amId]['contexts'][$ctxKey])) {
$byAm[$amId]['contexts'][$ctxKey] = [
'text_start' => intval($row['text_start']),
'text_end' => intval($row['text_end']),
'check_ids' => [],
'reasons' => [],
'ref_nos' => [],
];
}
$byAm[$amId]['contexts'][$ctxKey]['check_ids'][] = intval($row['check_id']);
$byAm[$amId]['contexts'][$ctxKey]['ref_nos'][] = $refNo;
$reason = trim((string)$this->arrGet($row, 'reason', ''));
if ($reason !== '') {
$byAm[$amId]['contexts'][$ctxKey]['reasons'][$refNo] = $reason;
}
}
return $byAm;
}
private function normalizeRefRawKey($raw)
{
$raw = str_replace(
['', '', '—', '', '', '', ' '],
[',', '-', '-', '-', '-', '-', ''],
trim($raw)
);
return strtolower($raw);
}
/**
* @param array $badIndex indexBadResults 中单 am 的结构
*/
private function markContentForPreview($content, $amId, $badIndex)
{
$badByRaw = isset($badIndex['by_raw']) ? $badIndex['by_raw'] : array();
$contexts = isset($badIndex['contexts']) ? $badIndex['contexts'] : array();
$issues = array();
$issueCount = 0;
if ($content === '' || (empty($badByRaw) && empty($contexts))) {
return array('html' => $content, 'issues' => array(), 'issue_count' => 0);
}
$html = $content;
// 1) 先标记 blue 内各序号(在原文上操作,[70-73] 仅标不合理者如 70、71
preg_match_all(
'/<blue>\[([\d,\-\s]+)\]<\/blue>/',
$html,
$matches,
PREG_OFFSET_CAPTURE
);
$citeDeltas = [];
if (!empty($matches[0])) {
$replacements = [];
foreach ($matches[0] as $idx => $match) {
$fullTag = $match[0];
$tagStart = $match[1];
$tagEnd = $tagStart + strlen($fullTag);
$inner = $matches[1][$idx][0];
$rawKey = $this->normalizeRefRawKey($inner);
$badNums = isset($badByRaw[$rawKey]) ? $badByRaw[$rawKey] : array();
$innerMarked = preg_replace_callback(
'/\d+/',
function ($numMatch) use ($badNums, &$issues, &$issueCount, $amId, $inner) {
$num = intval($numMatch[0]);
if (!isset($badNums[$num])) {
return $numMatch[0];
}
$row = $badNums[$num];
$rowReason = isset($row['reason']) ? $row['reason'] : '';
$issueCount++;
$issues[] = array(
'am_id' => $amId,
'check_id' => intval($row['check_id']),
'reference_no' => $num,
'reference_raw' => $inner,
'reason' => $rowReason,
'confidence' => floatval(isset($row['confidence']) ? $row['confidence'] : 0),
);
$title = htmlspecialchars(
'引用[' . $num . ']不合理: ' . $rowReason,
ENT_QUOTES,
'UTF-8'
);
return '<span class="ref-no-error" data-check-id="' . intval($row['check_id'])
. '" data-ref-no="' . $num . '" title="' . $title . '">'
. $numMatch[0] . '</span>';
},
$inner
);
$tagClass = !empty($badNums) ? ' ref-cite-error' : '';
$groupIds = !empty($badNums)
? implode(',', array_map('intval', array_column($badNums, 'check_id')))
: '';
$newHtml = '<blue class="ref-cite-tag' . $tagClass . '" data-ref-raw="' . htmlspecialchars($inner, ENT_QUOTES, 'UTF-8')
. '" data-check-ids="' . $groupIds . '">[' . $innerMarked . ']</blue>';
$replacements[] = [
'start' => $tagStart,
'end' => $tagEnd,
'html' => $newHtml,
'delta' => strlen($newHtml) - ($tagEnd - $tagStart),
];
}
usort($replacements, function ($a, $b) {
return $b['start'] - $a['start'];
});
foreach ($replacements as $rep) {
$html = substr($html, 0, $rep['start']) . $rep['html'] . substr($html, $rep['end']);
$citeDeltas[] = ['start' => $rep['start'], 'delta' => $rep['delta']];
}
}
$shiftByCite = function ($pos) use ($citeDeltas) {
$d = 0;
foreach ($citeDeltas as $cd) {
if ($cd['start'] < $pos) {
$d += $cd['delta'];
}
}
return $pos + $d;
};
// 2) 再标记引用句(从后往前)
if (!empty($contexts)) {
$spans = array_values($contexts);
usort($spans, function ($a, $b) {
return $b['text_start'] - $a['text_start'];
});
foreach ($spans as $span) {
$start = $span['text_start'];
$end = $span['text_end'];
if ($start < 0 || $end <= $start) {
continue;
}
$s = $shiftByCite($start);
$e = $shiftByCite($end);
if ($e > strlen($html)) {
$e = strlen($html);
}
$checkIds = array_values(array_unique($span['check_ids']));
$refNos = array_values(array_unique($span['ref_nos']));
sort($refNos);
$reasonParts = [];
foreach ($refNos as $rn) {
if (!empty($span['reasons'][$rn])) {
$reasonParts[] = '[' . $rn . '] ' . $span['reasons'][$rn];
}
}
$title = htmlspecialchars(
'引用句可能不合理: ' . implode('; ', $reasonParts),
ENT_QUOTES,
'UTF-8'
);
$open = '<span class="ref-context-error" data-check-ids="' . implode(',', $checkIds)
. '" data-ref-nos="' . implode(',', $refNos) . '" title="' . $title . '">';
$close = '</span>';
$html = substr($html, 0, $s) . $open . substr($html, $s, $e - $s) . $close . substr($html, $e);
}
}
return ['html' => $html, 'issues' => $issues, 'issue_count' => $issueCount];
}
/**
* @return array<int, array> refer_index => row
*/
public function loadReferMapByPArticleId($pArticleId)
{
$map = [];
if ($pArticleId <= 0) {
return $map;
}
$rows = Db::name('production_article_refer')
->where('p_article_id', $pArticleId)
->where('state', 0)
->order('index asc')
->select();
foreach ($rows as $row) {
$map[intval($row['index'])] = $row;
}
return $map;
}
public function formatReferForLlm($refer)
{
$parts = [];
foreach (['title', 'author', 'joura', 'dateno', 'refer_doi', 'doilink'] as $f) {
$v = trim((string)$this->arrGet($refer, $f, ''));
if ($v !== '') {
$parts[] = ucfirst($f) . ': ' . $v;
}
}
$content = trim((string)$this->arrGet($refer, 'refer_content', ''));
if ($content !== '') {
$parts[] = 'Reference: ' . $content;
}
return implode("\n", $parts);
}
/**
* 从 article_main.content 提取 blue 引用
*/
public function extractReferences($content)
{
$result = [];
preg_match_all('/<blue>\[([\d,\-\s]+)\]<\/blue>/', $content, $matches,PREG_OFFSET_CAPTURE);
if (empty($matches[0])) {
return [];
}
foreach ($matches[0] as $index => $match) {
$fullTag = $match[0];
$tagStart = $match[1];
$tagEnd = $tagStart + strlen($fullTag);
$rawRef = trim($matches[1][$index][0]);
$referenceNumbers = $this->expandReferenceNumbers($rawRef);
$sentenceStart = $this->findSentenceStart($content, $tagStart);
$sentenceEnd = $this->findSentenceEnd($content, $tagEnd, $tagEnd);
$originalText = $this->buildCitationContextText($content, $sentenceStart, $sentenceEnd);
if (!$this->isMeaningfulCitationContext($originalText)) {
list($sentenceStart, $sentenceEnd) = $this->widenCitationContextBounds(
$content,
$tagStart,
$tagEnd,
$sentenceStart,
$sentenceEnd
);
$originalText = $this->buildCitationContextText($content, $sentenceStart, $sentenceEnd);
}
if (!$this->isMeaningfulCitationContext($originalText) || empty($referenceNumbers)) {
continue;
}
$result[] = [
'reference_raw' => $rawRef,
'reference_numbers' => $referenceNumbers,
'original_text' => $originalText,
'reference_start' => $tagStart,
'reference_end' => $tagEnd,
'text_start' => $sentenceStart,
'text_end' => $sentenceEnd,
];
}
return $result;
}
public function expandReferenceNumbers($refStr)
{
$refStr = str_replace(
['', '', '—', '', '', ''],
[',', '-', '-', '-', '-', '-'],
trim($refStr)
);
$numbers = [];
foreach (explode(',', $refStr) as $part) {
$part = trim($part);
if ($part === '') {
continue;
}
if (preg_match('/^(\d+)\s*-\s*(\d+)$/', $part, $m)) {
$start = intval($m[1]);
$end = intval($m[2]);
if ($start <= $end) {
$numbers = array_merge($numbers, range($start, $end));
}
} elseif (ctype_digit($part)) {
$numbers[] = intval($part);
}
}
return array_values(array_unique($numbers));
}
private function buildCitationContextText($content, $start, $end)
{
$text = mb_substr($content, $start, max(0, $end - $start));
$text = preg_replace('/<blue>\[[\d,\-\s]+\]<\/blue>/', '', $text);
$text = trim(strip_tags($text));
$text = preg_replace('/\s+/u', ' ', $text);
return $text;
}
/**
* 过滤仅标点、过短或无字母/汉字的上下文(如去掉标签后只剩 "."
*/
private function isMeaningfulCitationContext($text)
{
$text = trim($text);
if ($text === '') {
return false;
}
if ($this->isOnlyPunctuationOrSpace($text)) {
return false;
}
if (!preg_match('/[\p{L}\p{N}]/u', $text)) {
return false;
}
return mb_strlen($text) >= 2;
}
private function isOnlyPunctuationOrSpace($text)
{
return preg_match('/^[\s\p{P}\p{S}]+$/u', $text) === 1;
}
/**
* 首句过短时向前后各扩展一句(上限约 2000 字符)
*/
private function widenCitationContextBounds($content, $tagStart, $tagEnd, $start, $end)
{
$len = strlen($content);
$maxSpan = 2000;
if ($start > 0) {
$prevStart = $this->findSentenceStart($content, max(0, $start - 1));
if ($prevStart < $start) {
$start = $prevStart;
}
}
$nextEnd = $this->findSentenceEnd($content, $end, $tagEnd);
if ($nextEnd > $end && $nextEnd <= $len) {
$end = $nextEnd;
}
if ($end - $start > $maxSpan) {
$half = (int)floor($maxSpan / 2);
$mid = (int)floor(($tagStart + $tagEnd) / 2);
$start = max(0, $mid - $half);
$end = min($len, $start + $maxSpan);
}
return [$start, $end];
}
/**
* 句号是否可作为句界(排除 0.95、3.14 等小数点)
*/
private function isSentenceDelimiterAt($content, $pos, $delimiter)
{
$len = strlen($content);
if ($delimiter !== '.' || $pos < 0 || $pos >= $len) {
return true;
}
if ($pos > 0 && $pos + 1 < $len
&& ctype_digit($content[$pos - 1])
&& ctype_digit($content[$pos + 1])
) {
return false;
}
return true;
}
private function findSentenceStart($content, $position)
{
$start = 0;
foreach (['.', '。', '!', '?', "\n"] as $delimiter) {
$pos = strrpos(substr($content, 0, $position), $delimiter);
if ($pos !== false && $this->isSentenceDelimiterAt($content, $pos, $delimiter)) {
$start = max($start, $pos + 1);
}
}
return $start;
}
/**
* @param int $searchFrom 从该字节位置起查找句末
* @param int $tagEnd 引用标签结束位置;用于跳过 </blue> 后紧跟的孤立句号
*/
private function findSentenceEnd($content, $searchFrom, $tagEnd = 0)
{
$length = strlen($content);
$minPos = max(0, $searchFrom);
while ($minPos < $length) {
$endPositions = [];
foreach (['.', '。', '!', '?', "\n"] as $delimiter) {
$pos = strpos($content, $delimiter, $minPos);
if ($pos !== false && $this->isSentenceDelimiterAt($content, $pos, $delimiter)) {
$endPositions[] = $pos + 1;
}
}
if (empty($endPositions)) {
return $length;
}
$end = min($endPositions);
if ($tagEnd <= 0 || $end <= $tagEnd) {
return $end;
}
$gap = substr($content, $tagEnd, $end - $tagEnd);
$gapText = trim(strip_tags(preg_replace('/<blue>\[[\d,\-\s]+\]<\/blue>/', '', $gap)));
if ($gapText !== '' && !$this->isOnlyPunctuationOrSpace($gapText)) {
return $end;
}
$minPos = $end;
}
return $length;
}
private function pushJob($checkId, $delaySeconds = 0)
{
$jobClass = 'app\api\job\ReferenceCheck@fire';
$data = ['check_id' => $checkId];
try {
if ($delaySeconds > 0) {
$jobId = Queue::later($delaySeconds, $jobClass, $data, self::QUEUE_NAME);
} else {
$jobId = Queue::push($jobClass, $data, self::QUEUE_NAME);
}
var_dump("=====jobId:".$jobId);
} catch (\Exception $e) {
\think\Log::error('ReferenceCheck pushJob failed check_id=' . $checkId . ' ' . $e->getMessage());
throw $e;
}
}
}