tougao/application/common/ReferenceCheckService.php

<?php

namespace app\common;

use think\Db;
use think\Env;
use think\Queue;

/**
 * 正文 &lt;blue&gt;[n]&lt;/blue&gt; 引用与 t_production_article_refer（index+1=n）相关性校对。
 * LLM 配置与 PromotionLlmService 相同；单条任务走 ReferenceCheck 队列。
 */
class ReferenceCheckService
{
    const QUEUE_NAME = 'ReferenceCheck';

    /** t_article_main.ref_check_status */
    const AM_STATUS_NONE = 0;
    const AM_STATUS_PASS = 1;
    const AM_STATUS_FAIL = 2;
    const AM_STATUS_RUNNING = 3;

    /**
     * 兼容无 ?? 的 PHP 版本
     */
    private function arrGet($arr, $key, $default = '')
    {
        return isset($arr[$key]) ? $arr[$key] : $default;
    }

    /**
     * 单条入队（可手工指定正文与文献文本）
     */
    public function enqueue($contentA, $contentB, array $extra = [])
    {
        $contentA = trim($contentA);
        if ($contentA === '') {
            throw new \InvalidArgumentException('content_a is required');
        }

        $now = date('Y-m-d H:i:s');
        $checkId = Db::name('article_reference_check_result')->insertGetId([
            'article_id'      => intval($this->arrGet($extra, 'article_id', 0)),
            'am_id'           => intval($this->arrGet($extra, 'am_id', 0)),
            'p_article_id'    => intval($this->arrGet($extra, 'p_article_id', 0)),
            'p_refer_id'      => intval($this->arrGet($extra, 'p_refer_id', 0)),
            'refer_index'     => intval($this->arrGet($extra, 'refer_index', 0)),
            'reference_no'    => intval($this->arrGet($extra, 'reference_no', 0)),
            'reference_raw'   => (string)$this->arrGet($extra, 'reference_raw', ''),
            'cite_tag_start'  => intval($this->arrGet($extra, 'cite_tag_start', 0)),
            'cite_tag_end'    => intval($this->arrGet($extra, 'cite_tag_end', 0)),
            'text_start'      => intval($this->arrGet($extra, 'text_start', 0)),
            'text_end'        => intval($this->arrGet($extra, 'text_end', 0)),
            'content_a'       => $contentA,
            'content_b'       => trim($contentB),
            'status'          => 0,
            'created_at'      => $now,
            'updated_at'      => $now,
        ]);

        $amId = intval($this->arrGet($extra, 'am_id', 0));
        if ($amId > 0) {
            $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING);
        }

        $this->pushJob(intval($checkId), intval($this->arrGet($extra, 'queue_delay', 0)));

        return ['check_id' => $checkId, 'queued' => 1];
    }
    public function enqueueByArticleMain($main){
        $amId = $main['am_id'];
//        $main = Db::name('article_main')
//            ->field('am_id,content,article_id')
//            ->where('am_id', $amId)
//            ->whereIn('state', [0, 2])
//            ->find();
        $citations = $this->extractReferences((string)$main['content']);
//        return $citations;
        if (empty($citations)) {
            $this->setAmRefCheckStatus($amId, self::AM_STATUS_NONE);
            return;
        }
        $prod = Db::name('production_article')
            ->where('article_id', $main['article_id'])
            ->where('state', 0)
            ->find();
        if (empty($prod)) {
            throw new \RuntimeException('production_article not found for article_id=' . $main['article_id']);
        }

        $pArticleId = intval($prod['p_article_id']);
        $referMap = $this->loadReferMapByPArticleId($pArticleId);

        if (empty($citations)) {
            $this->setAmRefCheckStatus($amId, self::AM_STATUS_PASS);
            return;
        }

        $skipped = 0;
        $delay = 0;
        foreach ($citations as $cite) {
            foreach ($cite['reference_numbers'] as $refNo) {
                $referIndex = $refNo - 1;
                if ($referIndex < 0 || !isset($referMap[$referIndex])) {
                    $skipped++;
                    continue;
                }
                $refer = $referMap[$referIndex];
                $referText = $this->formatReferForLlm($refer);

                $now = date('Y-m-d H:i:s');
                // [70-73] 展开为 reference_no=70,71,72,73 共 4 条记录
                $checkId = Db::name('article_reference_check_result')->insertGetId([
                    'article_id'      => $main['article_id'],
                    'p_article_id'    => $pArticleId,
                    'am_id'           => intval($main['am_id']),
                    'reference_no'    => $refNo,
                    'refer_index'     => $refNo,
                    'origin_text'     => $cite['original_text'],
                    'refer_text'      => $referText,
                    'p_refer_id'        => $referMap[$referIndex]['p_refer_id'],
                    'text_start'      => $cite['text_start'],
                    'text_end'        => $cite['text_end'],
                    'created_at'      => $now,
                    'updated_at'      => $now,
                ]);
                $this->pushJob(intval($checkId), $delay);
                $checkIds[] = $checkId;
                $delay += 1;
            }
        }

        $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING);
    }
    /**
     * 手工触发：对已完成且 confidence<=0.65 的记录入队 DOI 第二轮复核
     */
    public function enqueueSecondPassByArticle($articleId)
    {
        $articleId = intval($articleId);
        if ($articleId <= 0) {
            throw new \InvalidArgumentException('article_id is required');
        }

        $rows = Db::name('article_reference_check_result')
            ->where('article_id', $articleId)
            ->where('status', 1)
            ->where('confidence', '<=', 0.65)
            ->orderRaw('rand()')
            ->limit(2)
            ->select();

        $checkIds2 = [];
        $delay2 = 0;
        foreach ($rows as $checkLog) {
            $rowId = $this->resolveCheckRowId($checkLog);
            if ($this->maybeEnqueueSecondPass($rowId, floatval($checkLog['confidence']))) {
                $checkIds2[] = $rowId;
                $delay2 += 1;
            }
        }

        return [
            'article_id'  => $articleId,
            'check_ids2'  => $checkIds2,
            'queued'      => count($checkIds2),
        ];
    }
    public function enqueueByArticle($articleId){
        if ($articleId <= 0) {
            throw new \InvalidArgumentException('article_id is required');
        }
        $prod = Db::name('production_article')
            ->where('article_id', $articleId)
            ->where('state', [0, 2])
            ->find();
        if (empty($prod)) {
            throw new \RuntimeException('production_article not found for article_id=' . $articleId);
        }
        $pArticleId = intval($prod['p_article_id']);
        $referMap = $this->loadReferMapByPArticleId($pArticleId);

        $mains = Db::name('article_main')
            ->field('am_id,content,article_id')
            ->where('article_id', $articleId)
            ->whereIn('state', [0, 2])
            ->order('sort asc')
            ->select();
        if (empty($mains)) {
            throw new \RuntimeException('article_main is empty');
        }
        $queued = 0;
        $skipped = 0;
        $checkIds = [];
        $delay = 0;
        $amIdsWithJobs = [];

        foreach ($mains as $main) {
            $amId = intval($main['am_id']);
            $citations = $this->extractReferences((string)$main['content']);
            if (empty($citations)) {
                $this->setAmRefCheckStatus($amId, self::AM_STATUS_NONE);
                continue;
            }
            foreach ($citations as $cite) {
                foreach ($cite['reference_numbers'] as $refNo) {
                    $referIndex = $refNo - 1;
                    if ($referIndex < 0 || !isset($referMap[$referIndex])) {
                        $skipped++;
                        continue;
                    }
                    $refer = $referMap[$referIndex];
                    $referText = $this->formatReferForLlm($refer);

                    $now = date('Y-m-d H:i:s');
                    // [70-73] 展开为 reference_no=70,71,72,73 共 4 条记录
                    $checkId = Db::name('article_reference_check_result')->insertGetId([
                        'article_id'      => $main['article_id'],
                        'p_article_id'    => $pArticleId,
                        'am_id'           => intval($main['am_id']),
                        'reference_no'    => $refNo,
                        'refer_index'     => $refNo,
                        'origin_text'     => $cite['original_text'],
                        'refer_text'      => $referText,
                        'p_refer_id'      => $referMap[$referIndex]['p_refer_id'],
                        'text_start'      => $cite['text_start'],
                        'text_end'        => $cite['text_end'],
                        'created_at'      => $now,
                        'updated_at'      => $now,
                    ]);

                    $this->pushJob(intval($checkId), $delay);
                    $checkIds[] = $checkId;
                    $queued++;
                    $delay += 1;
                    $amIdsWithJobs[$amId] = true;
                }
            }
        }
        foreach (array_keys($amIdsWithJobs) as $amId) {
            $this->setAmRefCheckStatus($amId, self::AM_STATUS_RUNNING);
        }

        return [
            'article_id'   => $articleId,
            'p_article_id' => $pArticleId,
            'queued'       => $queued,
            'skipped'      => $skipped,
            'check_ids'    => $checkIds,
            'queue'        => self::QUEUE_NAME,
        ];
    }

    /**
     * 根据该节全部明细行汇总更新 t_article_main.ref_check_status
     */
    public function syncAmRefCheckStatus($amId)
    {
        if ($amId <= 0) {
            return self::AM_STATUS_NONE;
        }

        $rows = Db::name('article_reference_check_result')->where('am_id', $amId)->select();
        if (empty($rows)) {
            $this->setAmRefCheckStatus($amId, self::AM_STATUS_NONE);
            return self::AM_STATUS_NONE;
        }

        $pending = 0;
        $hasFail = false;
        $done = 0;

        foreach ($rows as $row) {
            $st = intval($row['status']);
            if ($st === 0) {
                $pending++;
                continue;
            }
            if ($st === 2 || ($st === 1 && intval($row['is_match']) === 0)) {
                $hasFail = true;
            }
            if ($st === 1) {
                $done++;
            }
        }

        if ($pending > 0) {
            $status = self::AM_STATUS_RUNNING;
        } elseif ($hasFail) {
            $status = self::AM_STATUS_FAIL;
        } elseif ($done === count($rows)) {
            $status = self::AM_STATUS_PASS;
        } else {
            $status = self::AM_STATUS_FAIL;
        }

        $this->setAmRefCheckStatus($amId, $status);
        return $status;
    }

    public function setAmRefCheckStatus($amId, $status)
    {
        if ($amId <= 0) {
            return;
        }
        Db::name('article_main')->where('am_id', $amId)->update([
            'ref_check_status' => $status,
        ]);
    }

    public function clearArticleChecks($articleId)
    {
        Db::name('article_reference_check_result')->where('article_id', $articleId)->delete();
        Db::name('article_main')
            ->where('article_id', $articleId)
            ->whereIn('state', [0, 2])
            ->update(['ref_check_status' => self::AM_STATUS_NONE]);
    }

    public static function amStatusLabel($status)
    {
        $map = [
            self::AM_STATUS_NONE    => 'none',
            self::AM_STATUS_PASS    => 'pass',
            self::AM_STATUS_FAIL    => 'fail',
            self::AM_STATUS_RUNNING => 'running',
        ];
        return isset($map[$status]) ? $map[$status] : 'unknown';
    }

    /**
     * 表主键为 id（对外 API 参数名仍叫 check_id）
     */
    public function resolveCheckRowId($row)
    {
        if (!is_array($row)) {
            return 0;
        }
        if (isset($row['id']) && intval($row['id']) > 0) {
            return intval($row['id']);
        }
        if (isset($row['check_id']) && intval($row['check_id']) > 0) {
            return intval($row['check_id']);
        }
        return 0;
    }

    /**
     * 解析 LLM 返回的 is_match（兼容 bool / 0|1 / "true"|"false" 字符串）
     */
    public function parseLlmIsMatch($value)
    {
        if (is_bool($value)) {
            return $value;
        }
        if (is_int($value) || is_float($value)) {
            return intval($value) === 1;
        }
        $s = strtolower(trim((string)$value));
        return in_array($s, ['1', 'true', 'yes', 'match', 'matched'], true);
    }

    /**
     * 写入单条校对结果（统一截断 reason/error_msg，避免 varchar(512) 导致 UPDATE 失败）
     *
     * @throws \RuntimeException
     */
    public function updateCheckResult($checkId, array $fields)
    {
        $checkId = intval($checkId);
        if ($checkId <= 0) {
            throw new \InvalidArgumentException('invalid check id');
        }

        if (isset($fields['reason'])) {
            $fields['reason'] = mb_substr(trim((string)$fields['reason']), 0, 512);
        }
        if (isset($fields['error_msg'])) {
            $fields['error_msg'] = mb_substr(trim((string)$fields['error_msg']), 0, 512);
        }
        $fields['updated_at'] = date('Y-m-d H:i:s');

        $exists = Db::name('article_reference_check_result')->where('id', $checkId)->find();
        if (empty($exists)) {
            throw new \RuntimeException('article_reference_check_result not found, id=' . $checkId);
        }

        $affected = Db::name('article_reference_check_result')->where('id', $checkId)->update($fields);
        if ($affected === false) {
            throw new \RuntimeException('article_reference_check_result update failed, id=' . $checkId);
        }

        \think\Log::info('updateCheckResult id=' . $checkId . ' affected=' . intval($affected));
        return intval($affected);
    }

    public function getResult($checkId)
    {
        if ($checkId <= 0) {
            return null;
        }
        $row = Db::name('article_reference_check_result')->where('id', $checkId)->find();
        return $row ?: null;
    }

    public function listByArticle($articleId, $status = -1, $onlyMismatch = false)
    {
        $q = Db::name('article_reference_check_result')->where('article_id', $articleId);
        if ($status >= 0) {
            $q->where('status', $status);
        }
        if ($onlyMismatch) {
            $q->where('status', 1)->where('is_match', 0);
        }
        return $q->order('am_id asc, cite_tag_start asc, reference_no asc')->select();
    }

    /**
     * 稿件预览：在 content 上标记不合理引用序号与引用句
     *
     * @return array{sections: array, issues: array, stats: array}
     */
    public function buildArticlePreview($articleId, $amId = 0)
    {
        $q = Db::name('article_main')
            ->field('am_id,content,sort,ref_check_status')
            ->where('article_id', $articleId)
            ->whereIn('state', [0, 2]);
        if ($amId > 0) {
            $q->where('am_id', $amId);
        }
        $mains = $q->order('sort asc')->select();

        $rows = $this->listByArticle($articleId, 1);
        $badByAm = $this->indexBadResults($rows);

        $sections = [];
        $issues = [];
        $stats = ['total' => 0, 'mismatch' => 0, 'match' => 0, 'pending' => 0];

        foreach ($this->listByArticle($articleId, -1) as $r) {
            $stats['total']++;
            if (intval($r['status']) === 0) {
                $stats['pending']++;
            } elseif (intval($r['is_match']) === 1) {
                $stats['match']++;
            } else {
                $stats['mismatch']++;
            }
        }

        foreach ($mains as $main) {
            $id = intval($main['am_id']);
            $content = (string)$main['content'];
            $badIndex = isset($badByAm[$id]) ? $badByAm[$id] : array();
            $marked = $this->markContentForPreview($content, $id, $badIndex);
            $amStatus = intval($this->arrGet($main, 'ref_check_status', 0));
            $sections[] = [
                'am_id'              => $id,
                'ref_check_status'   => $amStatus,
                'ref_check_pass'     => $amStatus === self::AM_STATUS_PASS,
                'ref_check_label'    => self::amStatusLabel($amStatus),
                'content'            => $content,
                'content_marked'     => $marked['html'],
                'issue_count'        => $marked['issue_count'],
            ];
            foreach ($marked['issues'] as $issue) {
                $issues[] = $issue;
            }
        }

        $articlePass = $this->resolveArticlePass($sections);

        return [
            'article_id'          => $articleId,
            'article_ref_check_pass' => $articlePass,
            'sections'            => $sections,
            'issues'              => $issues,
            'stats'               => $stats,
        ];
    }

    /**
     * 全文是否通过：各节均为 pass，且无 running/fail（无引用节忽略）
     */
    private function resolveArticlePass($sections)
    {
        $hasChecked = false;
        foreach ($sections as $sec) {
            $st = intval($this->arrGet($sec, 'ref_check_status', 0));
            if ($st === self::AM_STATUS_NONE) {
                continue;
            }
            $hasChecked = true;
            if ($st !== self::AM_STATUS_PASS) {
                return false;
            }
        }
        return $hasChecked ? true : null;
    }

    /**
     * @param array $rows status=1 的检测结果
     * @return array<int, array> am_id => indexed bad map
     */
    private function indexBadResults($rows)
    {
        $byAm = [];
        foreach ($rows as $row) {
            if (intval($row['status']) !== 1 || intval($row['is_match']) === 1) {
                continue;
            }
            $amId = intval($row['am_id']);
            $refNo = intval($row['reference_no']);
            if ($amId <= 0 || $refNo <= 0) {
                continue;
            }
            if (!isset($byAm[$amId])) {
                $byAm[$amId] = ['by_raw' => [], 'contexts' => []];
            }
            $rawKey = $this->normalizeRefRawKey((string)$this->arrGet($row, 'reference_raw', ''));
            if ($rawKey !== '') {
                $byAm[$amId]['by_raw'][$rawKey][$refNo] = $row;
            }

            $ctxKey = intval($row['text_start']) . '_' . intval($row['text_end']);
            if (!isset($byAm[$amId]['contexts'][$ctxKey])) {
                $byAm[$amId]['contexts'][$ctxKey] = [
                    'text_start' => intval($row['text_start']),
                    'text_end'   => intval($row['text_end']),
                    'check_ids'  => [],
                    'reasons'    => [],
                    'ref_nos'    => [],
                ];
            }
            $byAm[$amId]['contexts'][$ctxKey]['check_ids'][] = $this->resolveCheckRowId($row);
            $byAm[$amId]['contexts'][$ctxKey]['ref_nos'][] = $refNo;
            $reason = trim((string)$this->arrGet($row, 'reason', ''));
            if ($reason !== '') {
                $byAm[$amId]['contexts'][$ctxKey]['reasons'][$refNo] = $reason;
            }
        }
        return $byAm;
    }

    private function normalizeRefRawKey($raw)
    {
        $raw = str_replace(
            ['，', '–', '—', '−', '‐', '‑', ' '],
            [',', '-', '-', '-', '-', '-', ''],
            trim($raw)
        );
        return strtolower($raw);
    }

    /**
     * @param array $badIndex indexBadResults 中单 am 的结构
     */
    private function markContentForPreview($content, $amId, $badIndex)
    {
        $badByRaw = isset($badIndex['by_raw']) ? $badIndex['by_raw'] : array();
        $contexts = isset($badIndex['contexts']) ? $badIndex['contexts'] : array();
        $issues = array();
        $issueCount = 0;

        if ($content === '' || (empty($badByRaw) && empty($contexts))) {
            return array('html' => $content, 'issues' => array(), 'issue_count' => 0);
        }

        $html = $content;

        // 1) 先标记 blue 内各序号（在原文上操作，[70-73] 仅标不合理者如 70、71）
        preg_match_all(
            '/<blue>\[([\d,\-\s]+)\]<\/blue>/',
            $html,
            $matches,
            PREG_OFFSET_CAPTURE
        );
        $citeDeltas = [];
        if (!empty($matches[0])) {
            $replacements = [];
            foreach ($matches[0] as $idx => $match) {
                $fullTag = $match[0];
                $tagStart = $match[1];
                $tagEnd = $tagStart + strlen($fullTag);
                $inner = $matches[1][$idx][0];
                $rawKey = $this->normalizeRefRawKey($inner);
                $badNums = isset($badByRaw[$rawKey]) ? $badByRaw[$rawKey] : array();

                $innerMarked = preg_replace_callback(
                    '/\d+/',
                    function ($numMatch) use ($badNums, &$issues, &$issueCount, $amId, $inner) {
                        $num = intval($numMatch[0]);
                        if (!isset($badNums[$num])) {
                            return $numMatch[0];
                        }
                        $row = $badNums[$num];
                        $rowReason = isset($row['reason']) ? $row['reason'] : '';
                        $issueCount++;
                        $issues[] = array(
                            'am_id'         => $amId,
                            'check_id'      => $this->resolveCheckRowId($row),
                            'reference_no'  => $num,
                            'reference_raw' => $inner,
                            'reason'        => $rowReason,
                            'confidence'    => floatval(isset($row['confidence']) ? $row['confidence'] : 0),
                        );
                        $title = htmlspecialchars(
                            '引用[' . $num . ']不合理: ' . $rowReason,
                            ENT_QUOTES,
                            'UTF-8'
                        );
                        return '<span class="ref-no-error" data-check-id="' . $this->resolveCheckRowId($row)
                            . '" data-ref-no="' . $num . '" title="' . $title . '">'
                            . $numMatch[0] . '</span>';
                    },
                    $inner
                );

                $tagClass = !empty($badNums) ? ' ref-cite-error' : '';
                $groupIds = !empty($badNums)
                    ? implode(',', array_map('intval', array_column($badNums, 'check_id')))
                    : '';
                $newHtml = '<blue class="ref-cite-tag' . $tagClass . '" data-ref-raw="' . htmlspecialchars($inner, ENT_QUOTES, 'UTF-8')
                    . '" data-check-ids="' . $groupIds . '">[' . $innerMarked . ']</blue>';
                $replacements[] = [
                    'start' => $tagStart,
                    'end'   => $tagEnd,
                    'html'  => $newHtml,
                    'delta' => strlen($newHtml) - ($tagEnd - $tagStart),
                ];
            }
            usort($replacements, function ($a, $b) {
                return $b['start'] - $a['start'];
            });
            foreach ($replacements as $rep) {
                $html = substr($html, 0, $rep['start']) . $rep['html'] . substr($html, $rep['end']);
                $citeDeltas[] = ['start' => $rep['start'], 'delta' => $rep['delta']];
            }
        }

        $shiftByCite = function ($pos) use ($citeDeltas) {
            $d = 0;
            foreach ($citeDeltas as $cd) {
                if ($cd['start'] < $pos) {
                    $d += $cd['delta'];
                }
            }
            return $pos + $d;
        };

        // 2) 再标记引用句（从后往前）
        if (!empty($contexts)) {
            $spans = array_values($contexts);
            usort($spans, function ($a, $b) {
                return $b['text_start'] - $a['text_start'];
            });
            foreach ($spans as $span) {
                $start = $span['text_start'];
                $end = $span['text_end'];
                if ($start < 0 || $end <= $start) {
                    continue;
                }
                $s = $shiftByCite($start);
                $e = $shiftByCite($end);
                if ($e > strlen($html)) {
                    $e = strlen($html);
                }
                $checkIds = array_values(array_unique($span['check_ids']));
                $refNos = array_values(array_unique($span['ref_nos']));
                sort($refNos);
                $reasonParts = [];
                foreach ($refNos as $rn) {
                    if (!empty($span['reasons'][$rn])) {
                        $reasonParts[] = '[' . $rn . '] ' . $span['reasons'][$rn];
                    }
                }
                $title = htmlspecialchars(
                    '引用句可能不合理: ' . implode('; ', $reasonParts),
                    ENT_QUOTES,
                    'UTF-8'
                );
                $open = '<span class="ref-context-error" data-check-ids="' . implode(',', $checkIds)
                    . '" data-ref-nos="' . implode(',', $refNos) . '" title="' . $title . '">';
                $close = '</span>';
                $html = substr($html, 0, $s) . $open . substr($html, $s, $e - $s) . $close . substr($html, $e);
            }
        }

        return ['html' => $html, 'issues' => $issues, 'issue_count' => $issueCount];
    }

    /**
     * @return array<int, array> refer_index => row
     */
    public function loadReferMapByPArticleId($pArticleId)
    {
        $map = [];
        if ($pArticleId <= 0) {
            return $map;
        }
        $rows = Db::name('production_article_refer')
            ->where('p_article_id', $pArticleId)
            ->where('state', 0)
            ->order('index asc')
            ->select();
        foreach ($rows as $row) {
            $map[intval($row['index'])] = $row;
        }
        return $map;
    }
    public function formatReferForLlm($refer)
    {
        $parts = [];
        foreach (['title', 'author', 'joura', 'dateno', 'refer_doi', 'doilink'] as $f) {
            $v = trim((string)$this->arrGet($refer, $f, ''));
            if ($v !== '') {
                $parts[] = ucfirst($f) . ': ' . $v;
            }
        }
        $content = trim((string)$this->arrGet($refer, 'refer_content', ''));
        if ($content !== '') {
            $parts[] = 'Reference: ' . $content;
        }
        return implode("\n", $parts);
    }

    /**
     * 仅使用 refer_doi 字段（二次 Crossref 摘要用）
     */
    public function extractReferDoiOnly($refer)
    {
        if (!is_array($refer)) {
            return '';
        }
        $raw = trim((string)$this->arrGet($refer, 'refer_doi', ''));
        if ($raw === '' || stripos($raw, 'not available') !== false) {
            return '';
        }
        $dois = $this->extractDoisFromString($raw);
        return empty($dois) ? '' : $dois[0];
    }

    /**
     * 根据 refer_doi 调用 Crossref works API 获取摘要（二次校对专用）
     *
     * @return array{text:string, has_abstract:bool, doi:string}
     */
    public function fetchCrossrefAbstractByReferDoi($refer)
    {
        $doi = $this->extractReferDoiOnly($refer);
        if ($doi === '') {
            return ['text' => '', 'has_abstract' => false, 'doi' => ''];
        }

        $crossref = new CrossrefService([
            'mailto' => trim((string)Env::get('crossref_mailto', '')),
        ]);
        $block = $this->extractCrossrefBlock($doi, $crossref);
        if ($block === null) {
            return ['text' => '', 'has_abstract' => false, 'doi' => $doi];
        }

        return [
            'text'         => $block['text'],
            'has_abstract' => !empty($block['has_abstract']),
            'doi'          => $doi,
        ];
    }

    /**
     * 解析 LLM 返回的 can_support
     */
    public function parseLlmCanSupport($llmResult)
    {
        if (!is_array($llmResult)) {
            return false;
        }
        if (array_key_exists('can_support', $llmResult)) {
            return $this->parseLlmIsMatch($llmResult['can_support']);
        }
        return $this->parseLlmIsMatch(isset($llmResult['is_match']) ? $llmResult['is_match'] : false);
    }

    /**
     * 第一次校对：取 article_main.content（整节正文）
     */
    public function resolveMainContentForJob(array $row, $maxChars = 8000)
    {
        $amId = intval($this->arrGet($row, 'am_id', 0));
        if ($amId <= 0) {
            return '';
        }
        $main = Db::name('article_main')
            ->field('content')
            ->where('am_id', $amId)
            ->find();
        if (empty($main)) {
            return '';
        }

        $text = trim((string)$this->arrGet($main, 'content', ''));
        if ($text === '') {
            return '';
        }

        $text = preg_replace('/<blue>\[([\d,\-\s]+)\]<\/blue>/', '[$1]', $text);
        $text = strip_tags($text);
        $text = html_entity_decode($text, ENT_QUOTES | ENT_HTML5, 'UTF-8');
        $text = preg_replace('/\s+/u', ' ', $text);
        $text = trim($text);

        $maxChars = max(500, intval($maxChars));
        if (mb_strlen($text) > $maxChars) {
            $text = mb_substr($text, 0, $maxChars) . '...';
        }

        return $text;
    }

    /**
     * 引用处局部上下文（origin_text），供其它场景使用
     */
    public function resolveCitationContextForJob(array $row)
    {
        $text = trim((string)$this->arrGet($row, 'origin_text', ''));
        if ($text === '') {
            $text = trim((string)$this->arrGet($row, 'content_a', ''));
        }
        return $text;
    }

    /**
     * 从 refer 行提取标准 DOI（10.xxxx/...）
     *
     * 优先级：refer_content（原始引用文本里的 DOI 最贴近实际被引用的文献）
     *        > refer_doi > doi > doilink
     */
    public function extractDoiFromRefer($refer)
    {
        $list = $this->extractAllDoiCandidatesFromRefer($refer);
        return empty($list) ? '' : $list[0];
    }

    /**
     * 返回 refer 行可能对应的全部 DOI 候选（去重，按优先级排序）
     *
     * 用于第二轮 DOI 复核场景：当 metadata 的 refer_doi 与原始引用文本里的 DOI
     * 不一致时（数据漂移），优先尝试原始引用文本里的 DOI 抓真实摘要。
     *
     * @return string[]
     */
    public function extractAllDoiCandidatesFromRefer($refer)
    {
        if (!is_array($refer)) {
            return [];
        }
        $ordered = [
            (string)$this->arrGet($refer, 'refer_content', ''),
            (string)$this->arrGet($refer, 'refer_doi', ''),
            (string)$this->arrGet($refer, 'doi', ''),
            (string)$this->arrGet($refer, 'doilink', ''),
        ];

        $result = [];
        foreach ($ordered as $raw) {
            foreach ($this->extractDoisFromString($raw) as $doi) {
                if (!in_array($doi, $result, true)) {
                    $result[] = $doi;
                }
            }
        }
        return $result;
    }

    /**
     * 从任意文本里抽取所有形如 10.xxxx/yyy 的 DOI
     * @return string[]
     */
    private function extractDoisFromString($text)
    {
        $text = trim((string)$text);
        if ($text === '' || stripos($text, 'not available') !== false) {
            return [];
        }

        $dois = [];

        if (preg_match_all('~doi\.org/([^\s?#"\'<>]+)~i', $text, $m)) {
            foreach ($m[1] as $cand) {
                $cand = $this->trimDoiTail(trim($cand));
                if ($this->isValidDoi($cand)) {
                    $dois[] = $cand;
                }
            }
        }

        if (preg_match_all('~\b(10\.\d{3,9}/[^\s?#"\'<>]+)~i', $text, $m)) {
            foreach ($m[1] as $cand) {
                $cand = $this->trimDoiTail(trim($cand));
                if ($this->isValidDoi($cand)) {
                    $dois[] = $cand;
                }
            }
        }

        if ($dois === [] && strpos($text, '10.') === 0) {
            $cand = $this->trimDoiTail($text);
            if ($this->isValidDoi($cand)) {
                $dois[] = $cand;
            }
        }

        return array_values(array_unique($dois));
    }

    private function trimDoiTail($doi)
    {
        return rtrim($doi, ".,;:)]}>\"'\\ \t\n\r");
    }

    private function isValidDoi($doi)
    {
        return (bool)preg_match('~^10\.\d{3,9}/[^\s]+$~i', (string)$doi);
    }

    /**
     * 通过 PubMed / Crossref 拉取 DOI 对应文献内容（本地 LLM 无法打开网页，须预先抓取）
     *
     * 行为：
     * - 尝试 refer 行内所有 DOI 候选（refer_content > refer_doi > doi > doilink）
     * - 优先采用第一个能拿到 abstract 的 DOI
     * - PubMed 无摘要时回落到 Crossref raw 解析摘要（清理 JATS 标签）
     * - 全部失败则返回空字符串（调用方据此跳过二次复核）
     */
    public function fetchDoiLiteratureBlock($refer)
    {
        $candidates = $this->extractAllDoiCandidatesFromRefer($refer);
        if (empty($candidates)) {
            return '';
        }

        $pubmed = new PubmedService([
            'email' => trim((string)Env::get('pubmed_email', '')),
            'tool'  => trim((string)Env::get('pubmed_tool', 'tmrjournals')),
        ]);
        $crossref = new CrossrefService([
            'mailto' => trim((string)Env::get('crossref_mailto', '')),
        ]);

        $best = null;
        $fallback = null;

        foreach ($candidates as $doi) {
            $block = $this->buildDoiBlockFromSources($doi, $pubmed, $crossref);
            if ($block === null) {
                continue;
            }
            if (!empty($block['has_abstract'])) {
                $best = $block;
                break;
            }
            if ($fallback === null) {
                $fallback = $block;
            }
        }

        $chosen = $best ?: $fallback;
        if ($chosen === null) {
            return '';
        }
        return $chosen['text'];
    }

    /**
     * 拉单个 DOI 的真实内容，返回 ['text' => string, 'has_abstract' => bool] 或 null
     */
    private function buildDoiBlockFromSources($doi, PubmedService $pubmed, CrossrefService $crossref)
    {
        $doi = trim((string)$doi);
        if ($doi === '') {
            return null;
        }

        $pub = $pubmed->fetchByDoi($doi);
        $pubAbstract = is_array($pub) ? trim((string)$this->arrGet($pub, 'abstract', '')) : '';

        if (is_array($pub) && ($pubAbstract !== '' || trim((string)$this->arrGet($pub, 'title', '')) !== '')) {
            $lines = ['Source: PubMed (DOI ' . $doi . ')'];
            if (!empty($pub['title'])) {
                $lines[] = 'Actual Title: ' . trim((string)$pub['title']);
            }
            if (!empty($pub['journal'])) {
                $lines[] = 'Journal: ' . trim((string)$pub['journal']);
            }
            if (!empty($pub['year'])) {
                $lines[] = 'Year: ' . trim((string)$pub['year']);
            }
            if (!empty($pub['publication_types'])) {
                $lines[] = 'Publication Types: ' . implode('; ', (array)$pub['publication_types']);
            }
            if (!empty($pub['mesh_terms'])) {
                $lines[] = 'MeSH: ' . implode('; ', (array)$pub['mesh_terms']);
            }
            if ($pubAbstract !== '') {
                $lines[] = 'Abstract: ' . $this->truncate($pubAbstract, 3500);
            }

            if ($pubAbstract === '') {
                $cr = $this->extractCrossrefBlock($doi, $crossref);
                if ($cr !== null && $cr['has_abstract']) {
                    $lines[] = "\n--- Crossref 补充 ---\n" . $cr['text'];
                    return ['text' => implode("\n", $lines), 'has_abstract' => true];
                }
            }

            return ['text' => implode("\n", $lines), 'has_abstract' => $pubAbstract !== ''];
        }

        return $this->extractCrossrefBlock($doi, $crossref);
    }

    /**
     * 从 Crossref 拉取标题/期刊/作者/摘要（abstract 通常包裹 JATS XML，需清洗）
     * @return array|null ['text' => string, 'has_abstract' => bool]
     */
    private function extractCrossrefBlock($doi, CrossrefService $crossref)
    {
        $msg = $crossref->fetchWork($doi);
        if (!is_array($msg)) {
            return null;
        }

        $summary = $crossref->fetchWorkSummary($doi);
        if (!is_array($summary)) {
            $summary = [];
        }

        $lines = ['Source: Crossref api.crossref.org/works/' . rawurlencode($doi)];
        $title = isset($msg['title'][0]) ? trim((string)$msg['title'][0]) : trim((string)$this->arrGet($summary, 'title', ''));
        if ($title !== '') {
            $lines[] = 'Actual Title: ' . $title;
        }
        if (!empty($summary['joura'])) {
            $lines[] = 'Journal: ' . trim((string)$summary['joura']);
        }
        if (!empty($summary['author_str'])) {
            $lines[] = 'Authors: ' . trim((string)$summary['author_str']);
        }
        if (!empty($summary['dateno'])) {
            $lines[] = 'Publication: ' . trim((string)$summary['dateno']);
        }
        if (!empty($summary['doilink'])) {
            $lines[] = 'DOI Link: ' . trim((string)$summary['doilink']);
        }
        if (!empty($summary['is_retracted'])) {
            $lines[] = 'Retraction: yes - ' . trim((string)$this->arrGet($summary, 'retract_reason', ''));
        }

        $abstract = $this->cleanCrossrefAbstract((string)$this->arrGet($msg, 'abstract', ''));
        $hasAbstract = $abstract !== '';
        if ($hasAbstract) {
            $lines[] = 'Abstract: ' . $this->truncate($abstract, 3500);
        } else {
            $lines[] = 'Note: Crossref 未返回摘要，请结合标题/期刊/作者与正文谨慎判断。';
        }

        return ['text' => implode("\n", $lines), 'has_abstract' => $hasAbstract];
    }

    private function cleanCrossrefAbstract($raw)
    {
        $raw = trim((string)$raw);
        if ($raw === '') {
            return '';
        }
        $raw = preg_replace('~<jats:title[^>]*>.*?</jats:title>~is', '', $raw);
        $raw = preg_replace('~<jats:p[^>]*>~i', "\n", $raw);
        $raw = preg_replace('~</jats:p>~i', '', $raw);
        $raw = preg_replace('~</?jats:[^>]+>~i', '', $raw);
        $raw = strip_tags($raw);
        $raw = preg_replace('/[ \t]+/u', ' ', $raw);
        $raw = preg_replace("/\r\n|\r/u", "\n", $raw);
        $raw = preg_replace("/\n{2,}/u", "\n", $raw);
        return trim($raw);
    }

    private function truncate($text, $max)
    {
        $text = (string)$text;
        if (mb_strlen($text) <= $max) {
            return $text;
        }
        return mb_substr($text, 0, $max) . '...';
    }

    /**
     * 第二次 DOI 复核数据准备：返回书目信息 + 真实抓取内容
     *
     * @return array{refer_text:string, doi_block:string, has_abstract:bool, doi_used:string}
     */
    public function prepareRecheckPayload($refer, $referText = '')
    {
        $base = trim($referText) !== '' ? trim($referText) : $this->formatReferForLlm($refer);
        $cr = $this->fetchCrossrefAbstractByReferDoi($refer);
        return [
            'refer_text'   => $base,
            'doi_block'    => $cr['text'],
            'has_abstract' => $cr['has_abstract'],
            'doi_used'     => $cr['doi'],
        ];
    }

    /**
     * 旧接口：拼接成单块文本（向后兼容，建议调用方改用 prepareRecheckPayload）
     */
    public function formatReferForDoiRecheck($refer, $referText = '')
    {
        $payload = $this->prepareRecheckPayload($refer, $referText);
        if ($payload['doi_block'] === '') {
            return $payload['refer_text']
                . "\n\n【DOI 文献真实内容】\n未能从 PubMed/Crossref 获取该 DOI 的摘要或元数据，请依据书目条目与正文谨慎判断。";
        }
        return $payload['refer_text']
            . "\n\n【Crossref 摘要（依据 Refer_doi 从 api.crossref.org/works 获取）】\n"
            . $payload['doi_block'];
    }

    /**
     * 第一轮 confidence<=0.65 且能抓到 DOI 真实内容时，延迟入队第二轮复核
     *
     * 跳过条件（避免无意义重跑得到相同结果）：
     * - check_id 不合法 / 一次置信度高于阈值
     * - refer 行不存在
     * - refer_doi 为空或 Crossref 未返回摘要
     */
    public function maybeEnqueueSecondPass($checkId, $confidence)
    {
        $checkId = intval($checkId);
        $confidence = floatval($confidence);
        if ($checkId <= 0 || $confidence > 0.65) {
            return false;
        }

        $row = Db::name('article_reference_check_result')->where('id', $checkId)->find();
        if (empty($row)) {
            return false;
        }

        $refer = null;
        if (intval($row['p_refer_id']) > 0) {
            $refer = Db::name('production_article_refer')
                ->where('p_refer_id', intval($row['p_refer_id']))
                ->where('state', 0)
                ->find();
        }
        if (empty($refer) || $this->extractReferDoiOnly($refer) === '') {
            return false;
        }

        $cr = $this->fetchCrossrefAbstractByReferDoi($refer);
        if (empty($cr['has_abstract'])) {
            return false;
        }

        $this->pushJob2($checkId, 5);
        return true;
    }

    /**
     * 从 article_main.content 提取 blue 引用
     */
    public function extractReferences($content)
    {
        $result = [];
        preg_match_all('/<blue>\[([\d,\-\s]+)\]<\/blue>/', $content, $matches, PREG_OFFSET_CAPTURE);
        if (empty($matches[0])) {
            return [];
        }

        $tagSpans = [];
        foreach ($matches[0] as $index => $match) {
            $tagSpans[] = [
                'start' => $match[1],
                'end'   => $match[1] + strlen($match[0]),
                'index' => $index,
            ];
        }

        foreach ($matches[0] as $index => $match) {
            $fullTag = $match[0];
            $tagStart = $match[1];
            $tagEnd = $tagStart + strlen($fullTag);
            $rawRef = trim($matches[1][$index][0]);
            $referenceNumbers = $this->expandReferenceNumbers($rawRef);

            list($localStart, $localEnd, $originalText) = $this->extractLocalCitationContext(
                $content,
                $tagStart,
                $tagEnd,
                $tagSpans
            );

            if (!$this->isMeaningfulCitationContext($originalText) || empty($referenceNumbers)) {
                continue;
            }

            $result[] = [
                'reference_raw'       => $rawRef,
                'reference_numbers'   => $referenceNumbers,
                'original_text'       => $originalText,
                'reference_start'     => $tagStart,
                'reference_end'       => $tagEnd,
                'text_start'          => $localStart,
                'text_end'            => $localEnd,
            ];
        }

        return $result;
    }

    /**
     * 按引用位置截取局部上下文：优先取标签前叙述；同句多引时后续引用从上一标签后开始。
     */
    private function extractLocalCitationContext($content, $tagStart, $tagEnd, array $tagSpans)
    {
        $paragraphStart = $this->findParagraphStart($content, $tagStart);
        $sentenceEnd = $this->findSentenceEnd($content, $tagEnd, $tagEnd);

        $prevTagEnd = $paragraphStart;
        $nextTagStart = $sentenceEnd;
        foreach ($tagSpans as $span) {
            if ($span['end'] <= $tagStart && $span['end'] > $prevTagEnd) {
                $prevTagEnd = $span['end'];
            }
            if ($span['start'] > $tagEnd && $span['start'] < $nextTagStart) {
                $nextTagStart = $span['start'];
            }
        }

        $hasPriorCiteInParagraph = ($prevTagEnd > $paragraphStart);
        $sentenceStart = $this->findSentenceStart($content, $tagStart);

        // 段内首个引用：整段到标签前；后续引用：取「本句」起点（可早于上一标签），避免只剩 “and external environment” 再误用标签后文本
        if ($hasPriorCiteInParagraph) {
            $localStart = max($paragraphStart, $sentenceStart);
        } else {
            $localStart = $this->capContextStartBeforeTag($content, $tagStart, $paragraphStart);
        }

        // 默认：引用标签前的论述
        $localEnd = $tagStart;
        $originalText = $this->buildCitationContextText($content, $localStart, $localEnd);

        // 仅段内首个引用、且标签前极短（如句末 ICU nurses [14]）时，才改用标签后片段；同段多引禁止标签后截取（会错取下一句）
        $allowTrailing = !$hasPriorCiteInParagraph;
        if ($allowTrailing && (
            !$this->isMeaningfulCitationContext($originalText)
            || $this->shouldUseTrailingCitationContext($content, $localStart, $tagStart, $tagEnd)
        )) {
            $trailEnd = ($nextTagStart < $sentenceEnd) ? $nextTagStart : $sentenceEnd;
            $trailText = $this->buildCitationContextText($content, $tagEnd, $trailEnd);
            if ($this->isMeaningfulCitationContext($trailText)) {
                $localStart = $tagEnd;
                $localEnd = $trailEnd;
                $originalText = $trailText;
            }
        }

        if (!$this->isMeaningfulCitationContext($originalText)) {
            list($localStart, $localEnd) = $this->widenCitationContextBounds(
                $content,
                $tagStart,
                $tagEnd,
                $localStart,
                $localEnd
            );
            $originalText = $this->buildCitationContextText($content, $localStart, $localEnd);
        }

        return [$localStart, $localEnd, $originalText];
    }

    /**
     * 标签前仅有作者缩写等极短片段时，改用标签后上下文
     */
    private function shouldUseTrailingCitationContext($content, $localStart, $tagStart, $tagEnd)
    {
        $before = $this->buildCitationContextText($content, $localStart, $tagStart);
        if (!$this->isMeaningfulCitationContext($before)) {
            return true;
        }

        return mb_strlen($before) < 25;
    }

    public function expandReferenceNumbers($refStr)
    {
        $refStr = str_replace(
            ['，', '–', '—', '−', '‐', '‑'],
            [',', '-', '-', '-', '-', '-'],
            trim($refStr)
        );
        $numbers = [];
        foreach (explode(',', $refStr) as $part) {
            $part = trim($part);
            if ($part === '') {
                continue;
            }
            if (preg_match('/^(\d+)\s*-\s*(\d+)$/', $part, $m)) {
                $start = intval($m[1]);
                $end = intval($m[2]);
                if ($start <= $end) {
                    $numbers = array_merge($numbers, range($start, $end));
                }
            } elseif (ctype_digit($part)) {
                $numbers[] = intval($part);
            }
        }
        return array_values(array_unique($numbers));
    }

    /**
     * 返回 $bytePos 处 UTF-8 码点占用的最后一字节之后的位置（下一字符起始）
     */
    private function utf8CharEnd($content, $bytePos)
    {
        $len = strlen($content);
        if ($bytePos < 0 || $bytePos >= $len) {
            return max(0, min($len, $bytePos + 1));
        }
        $next = $bytePos + 1;
        while ($next < $len && (ord($content[$next]) & 0xC0) === 0x80) {
            $next++;
        }

        return $next;
    }

    /**
     * 按字节偏移截取（与 strpos/strlen 一致）；勿用 mb_substr，否则遇中文前缀会截断英文词头
     */
    private function byteSubstr($content, $start, $end)
    {
        $length = max(0, $end - $start);
        if ($length === 0) {
            return '';
        }

        return (string)mb_strcut($content, $start, $length, 'UTF-8');
    }

    private function buildCitationContextText($content, $start, $end)
    {
        $text = $this->byteSubstr($content, $start, $end);
        $text = preg_replace('/<blue>\[[\d,\-\s]+\]<\/blue>/', '', $text);
        $text = trim(strip_tags($text));
        $text = preg_replace('/\s+/u', ' ', $text);
        $text = ltrim($text, "\xEF\xBB\xBF");

        return $text;
    }

    /**
     * 过滤仅标点、过短或无字母/汉字的上下文（如去掉标签后只剩 "."）
     */
    private function isMeaningfulCitationContext($text)
    {
        $text = trim($text);
        if ($text === '') {
            return false;
        }
        if ($this->isOnlyPunctuationOrSpace($text)) {
            return false;
        }
        if (!preg_match('/[\p{L}\p{N}]/u', $text)) {
            return false;
        }

        return mb_strlen($text) >= 2;
    }

    private function isOnlyPunctuationOrSpace($text)
    {
        return preg_match('/^[\s\p{P}\p{S}]+$/u', $text) === 1;
    }

    /**
     * 首句过短时向前后各扩展一句（上限约 2000 字符）
     */
    private function widenCitationContextBounds($content, $tagStart, $tagEnd, $start, $end)
    {
        $len = strlen($content);
        $maxSpan = 2000;

        if ($start > 0) {
            $prevStart = $this->findSentenceStart($content, max(0, $start - 1));
            if ($prevStart < $start) {
                $start = $prevStart;
            }
        }

        $nextEnd = $this->findSentenceEnd($content, $end, $tagEnd);
        if ($nextEnd > $end && $nextEnd <= $len) {
            $end = $nextEnd;
        }

        if ($end - $start > $maxSpan) {
            $half = (int)floor($maxSpan / 2);
            $mid = (int)floor(($tagStart + $tagEnd) / 2);
            $start = max(0, $mid - $half);
            $end = min($len, $start + $maxSpan);
        }

        return [$start, $end];
    }

    /**
     * 句号是否可作为句界（排除小数点、et al. 等缩写）
     */
    private function isSentenceDelimiterAt($content, $pos, $delimiter)
    {
        $len = strlen($content);
        if ($delimiter !== '.' || $pos < 0 || $pos >= $len) {
            return true;
        }
        if ($pos > 0 && $pos + 1 < $len
            && ctype_digit($content[$pos - 1])
            && ctype_digit($content[$pos + 1])
        ) {
            return false;
        }

        $before = substr($content, max(0, $pos - 12), min(12, $pos));
        if (preg_match('/\b(et\s+al|e\.g|i\.e|vs|etc|fig|no)\s*\.?\s*$/i', $before)) {
            return false;
        }

        $after = substr($content, $pos + 1, 24);
        if (preg_match('/^\s*<blue>\s*\[/', $after)) {
            return false;
        }

        return true;
    }

    /**
     * 段落起始（HTML / 换行），避免英文多句段落只取到最后一个句号后的一句
     */
    private function findParagraphStart($content, $tagStart)
    {
        $search = substr($content, 0, max(0, $tagStart));
        if ($search === '') {
            return 0;
        }

        $best = 0;

        if (preg_match_all('/<p[^>]*>/i', $search, $m, PREG_OFFSET_CAPTURE)) {
            $last = end($m[0]);
            $best = max($best, $last[1] + strlen($last[0]));
        }
        if (preg_match_all('/<\/p>\s*/i', $search, $m, PREG_OFFSET_CAPTURE)) {
            $last = end($m[0]);
            $best = max($best, $last[1] + strlen($last[0]));
        }
        if (preg_match_all('/<br\s*\/?>\s*/i', $search, $m, PREG_OFFSET_CAPTURE)) {
            $last = end($m[0]);
            $best = max($best, $last[1] + strlen($last[0]));
        }

        $pos = strrpos($search, "\n\n");
        if ($pos !== false) {
            $best = max($best, $pos + 2);
        }
        $pos = strrpos($search, "\n");
        if ($pos !== false) {
            $best = max($best, $pos + 1);
        }

        return $best;
    }

    /**
     * 段落过长时从引用处向前截取上限，避免单次 LLM 上下文过大
     */
    private function capContextStartBeforeTag($content, $tagStart, $paragraphStart, $maxBytes = 2500)
    {
        if ($tagStart - $paragraphStart <= $maxBytes) {
            return $paragraphStart;
        }

        $start = $tagStart - $maxBytes;
        $slice = substr($content, $start, $tagStart - $start);
        if (preg_match('/[.!?。！？]\s+/u', $slice, $m, PREG_OFFSET_CAPTURE)) {
            $last = end($m[0]);
            $rel = $last[1] + strlen($last[0]);
            return $start + $rel;
        }

        return max($paragraphStart, $start);
    }

    private function findSentenceStart($content, $position)
    {
        $start = 0;
        foreach (['.', '。', '!', '?', "\n"] as $delimiter) {
            $pos = strrpos(substr($content, 0, $position), $delimiter);
            if ($pos !== false && $this->isSentenceDelimiterAt($content, $pos, $delimiter)) {
                $start = max($start, $this->utf8CharEnd($content, $pos));
            }
        }
        return $start;
    }

    /**
     * @param int $searchFrom  从该字节位置起查找句末
     * @param int $tagEnd      引用标签结束位置；用于跳过 </blue> 后紧跟的孤立句号
     */
    private function findSentenceEnd($content, $searchFrom, $tagEnd = 0)
    {
        $length = strlen($content);
        $minPos = max(0, $searchFrom);

        while ($minPos < $length) {
            $endPositions = [];
            foreach (['.', '。', '!', '?', "\n"] as $delimiter) {
                $pos = strpos($content, $delimiter, $minPos);
                if ($pos !== false && $this->isSentenceDelimiterAt($content, $pos, $delimiter)) {
                    $endPositions[] = $this->utf8CharEnd($content, $pos);
                }
            }
            if (empty($endPositions)) {
                return $length;
            }

            $end = min($endPositions);
            if ($tagEnd <= 0 || $end <= $tagEnd) {
                return $end;
            }

            $gap = substr($content, $tagEnd, $end - $tagEnd);
            $gapText = trim(strip_tags(preg_replace('/<blue>\[[\d,\-\s]+\]<\/blue>/', '', $gap)));
            if ($gapText !== '' && !$this->isOnlyPunctuationOrSpace($gapText)) {
                return $end;
            }

            $minPos = $end;
        }

        return $length;
    }

    private function pushJob($checkId, $delaySeconds = 0)
    {
        $jobClass = 'app\api\job\ReferenceCheck@fire';
        $data     = ['check_id' => $checkId];
        try {
            if ($delaySeconds > 0) {
                $jobId = Queue::later($delaySeconds, $jobClass, $data, self::QUEUE_NAME);
            } else {
                $jobId = Queue::push($jobClass, $data, self::QUEUE_NAME);
            }
        } catch (\Exception $e) {
            \think\Log::error('ReferenceCheck pushJob failed check_id=' . $checkId . ' ' . $e->getMessage());
            throw $e;
        }
    }
    private function pushJob2($checkId, $delaySeconds = 0)
    {
        $jobClass = 'app\api\job\ReferenceCheckTwo@fire';
        $data     = ['check_id' => $checkId];
        try {
            if ($delaySeconds > 0) {
                $jobId = Queue::later($delaySeconds, $jobClass, $data, self::QUEUE_NAME);
            } else {
                $jobId = Queue::push($jobClass, $data, self::QUEUE_NAME);
            }
        } catch (\Exception $e) {
            \think\Log::error('ReferenceCheckTwo pushJob failed check_id=' . $checkId . ' ' . $e->getMessage());
            throw $e;
        }
    }
}