tougao/application/api/controller/Agent.php

<?php

namespace app\api\controller;

use think\Db;
use think\Cache;
use think\Env;

class Agent extends Base
{

    public function __construct(\think\Request $request = null)
    {
        parent::__construct($request);
    }

    /**
     * 获取所有叶子节点 major 及其完整路径，用于 AI 匹配
     */
    private function getMajorTree()
    {
        $cacheKey = 'agent_major_tree';
        $cached = Cache::get($cacheKey);
        if ($cached) {
            return $cached;
        }

        $allMajors = $this->major_obj
            ->where('major_state', 0)
            ->where('major_type', 0)
            ->select();

        $majorMap = [];
        foreach ($allMajors as $m) {
            $majorMap[$m['major_id']] = $m;
        }

        $result = [];
        foreach ($allMajors as $m) {
            $hasChild = false;
            foreach ($allMajors as $check) {
                if ($check['pid'] == $m['major_id']) {
                    $hasChild = true;
                    break;
                }
            }
            if (!$hasChild) {
                $path = $this->buildMajorPath($m['major_id'], $majorMap);
                $result[] = [
                    'major_id'    => $m['major_id'],
                    'major_title' => $m['major_title'],
                    'full_path'   => $path,
                ];
            }
        }

        Cache::set($cacheKey, $result, 3600);
        return $result;
    }

    /**
     * 递归构建 major 的完整路径
     */
    private function buildMajorPath($majorId, &$majorMap)
    {
        if (!isset($majorMap[$majorId])) {
            return '';
        }
        $m = $majorMap[$majorId];
        if ($m['pid'] == 0 || $m['pid'] == 1 || !isset($majorMap[$m['pid']])) {
            return $m['major_title'];
        }
        return $this->buildMajorPath($m['pid'], $majorMap) . ' > ' . $m['major_title'];
    }

    /**
     * 构建 major 列表提示文本（供 AI 使用）
     */
    private function buildMajorListPrompt($majorTree)
    {
        $lines = [];
        foreach ($majorTree as $item) {
            $lines[] = "ID:{$item['major_id']} - {$item['full_path']}";
        }
        return implode("\n", $lines);
    }

    /**
     * 调用 AI 将用户 field 描述匹配到标准 major_id
     */
    private function matchFieldToMajor($field, $majorListPrompt)
    {
        $systemPrompt = "你是一位医学领域分类专家。用户会提供一段研究领域的描述文本，你需要从给定的标准领域列表中找出最匹配的1-3个领域。\n"
            . "请严格按照JSON数组格式返回匹配结果，只返回major_id数组，如 [12,34,56]。\n"
            . "如果没有合适的匹配，返回空数组 []。\n"
            . "不要返回任何其他内容，只返回JSON数组。\n\n"
            . "标准领域列表：\n" . $majorListPrompt;

        $userPrompt = "请为以下研究领域描述匹配最合适的标准领域ID：\n" . $field;

        $messages = [
            ['role' => 'system', 'content' => $systemPrompt],
            ['role' => 'user', 'content' => $userPrompt],
        ];

        $apiKey = Env::get("gpt.api_key1", Env::get("gpt.api_key", ""));
        $url = 'http://chat.taimed.cn/v1/chat/completions';

        $data = [
            'model'       => 'gpt-4.1',
            'messages'    => $messages,
            'temperature' => 0.1,
            'max_tokens'  => 200,
        ];

        $ch = curl_init();
        curl_setopt($ch, CURLOPT_URL, $url);
        curl_setopt($ch, CURLOPT_HTTPHEADER, [
            'Content-Type: application/json',
            'Authorization: Bearer ' . $apiKey,
        ]);
        curl_setopt($ch, CURLOPT_POST, true);
        curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($data));
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
        curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
        curl_setopt($ch, CURLOPT_TIMEOUT, 60);

        $result = curl_exec($ch);
        if (curl_errno($ch)) {
            curl_close($ch);
            return [];
        }
        curl_close($ch);

        $res = json_decode($result, true);
        if (!isset($res['choices'][0]['message']['content'])) {
            return [];
        }

        $content = trim($res['choices'][0]['message']['content']);
        // 提取 JSON 数组
        if (preg_match('/\[[\d,\s]*\]/', $content, $matches)) {
            $ids = json_decode($matches[0], true);
            if (is_array($ids)) {
                return array_map('intval', $ids);
            }
        }

        return [];
    }

    /**
     * 将匹配结果写入 t_major_to_user
     */
    private function saveMajorToUser($userId, $majorIds)
    {
        $existing = $this->major_to_user_obj
            ->where('user_id', $userId)
            ->where('state', 0)
            ->column('major_id');

        $toInsert = array_diff($majorIds, $existing);
        foreach ($toInsert as $majorId) {
            $this->major_to_user_obj->insert([
                'user_id'  => $userId,
                'major_id' => $majorId,
                'ctime'    => time(),
            ]);
        }

        return count($toInsert);
    }

    /**
     * 处理单个用户的 field 转 major
     *
     * @param int user_id 用户ID
     */
    public function processOneUser()
    {
        $data = $this->request->param();
        if (!isset($data['user_id']) || $data['user_id'] == '') {
            return jsonError('user_id不能为空');
        }
        if (!isset($data['field']) || $data['field'] == '') {
            return jsonError('field不能为空');
        }

        $userId = intval($data['user_id']);
        $reviewerInfo = $this->user_reviewer_info_obj
            ->where('reviewer_id', $userId)
            ->where('state', 0)
            ->find();

        if (!$reviewerInfo) {
            return jsonError('未找到该用户的reviewer信息');
        }

        $field = trim($data['field']);
//        if ($field == '') {
//            return jsonError('该用户的field字段为空');
//        }

        $majorTree = $this->getMajorTree();
        if (empty($majorTree)) {
            return jsonError('未获取到标准领域数据');
        }

        $majorListPrompt = $this->buildMajorListPrompt($majorTree);


        $matchedIds = $this->matchFieldToMajor($field, $majorListPrompt);

        if (empty($matchedIds)) {
            return jsonSuccess([
                'user_id'     => $userId,
                'field'       => $field,
                'matched_ids' => [],
                'inserted'    => 0,
                'msg'         => 'AI未匹配到合适的领域',
            ]);
        }

        // 验证 major_id 确实存在
        $validMajors = $this->major_obj
            ->where('major_id', 'in', $matchedIds)
            ->where('major_state', 0)
            ->column('major_id');
        $existing = $this->major_to_user_obj
            ->where('user_id', $userId)
            ->where('state', 0)
            ->column('major_id');
        $unionArray = array_unique(array_merge($validMajors, $existing));
        $ms = $this->major_obj->where('major_id', 'in', $unionArray)->where('major_state', 0)->select();
//        $inserted = $this->saveMajorToUser($userId, $matchedIds);

        foreach ($ms as $k => $major){
            $ms[$k]['shu'] = getMajorShu($major['major_id']);
            $ms[$k]['str'] = getMajorStr($major['major_id']);
        }

        return jsonSuccess([
            'user_id'     => $userId,
            'field'       => $field,
            'majors' => $ms,
//            'inserted'    => $inserted,
        ]);
    }

    /**
     * 批量处理：获取有 field 但没有 major_to_user 记录的用户，逐个用 AI 匹配
     *
     * @param int limit 每次处理的数量，默认10
     * @param int skip_has_major 是否跳过已有major_to_user记录的用户，默认1
     */
    public function batchProcess()
    {
        $data = $this->request->param();
        $limit = isset($data['limit']) ? intval($data['limit']) : 10;
        $skipHasMajor = isset($data['skip_has_major']) ? intval($data['skip_has_major']) : 1;

        if ($limit > 50) {
            $limit = 50;
        }

        $query = $this->user_reviewer_info_obj
            ->alias('ri')
            ->field('ri.reviewer_id, ri.field')
            ->where('ri.state', 0)
            ->where('ri.field', '<>', '');

        if ($skipHasMajor) {
            $subQuery = Db::name('major_to_user')->where('state', 0)->field('user_id')->buildSql();
            $query = $query->where('ri.reviewer_id', 'not in', $subQuery);
        }

        $users = $query->limit($limit)->select();

        if (empty($users)) {
            return jsonSuccess([
                'processed' => 0,
                'msg'       => '没有需要处理的用户',
            ]);
        }

        $majorTree = $this->getMajorTree();
        if (empty($majorTree)) {
            return jsonError('未获取到标准领域数据');
        }
        $majorListPrompt = $this->buildMajorListPrompt($majorTree);

        $validMajorIds = $this->major_obj->where('major_state', 0)->column('major_id');

        $results = [];
        $successCount = 0;
        $failCount = 0;

        foreach ($users as $user) {
            $field = trim($user['field']);
            if ($field == '') {
                continue;
            }

            $matchedIds = $this->matchFieldToMajor($field, $majorListPrompt);
            $matchedIds = array_intersect($matchedIds, $validMajorIds);

            if (!empty($matchedIds)) {
                $inserted = $this->saveMajorToUser($user['reviewer_id'], $matchedIds);
                $results[] = [
                    'user_id'     => $user['reviewer_id'],
                    'field'       => mb_substr($field, 0, 100),
                    'matched_ids' => array_values($matchedIds),
                    'inserted'    => $inserted,
                ];
                $successCount++;
            } else {
                $results[] = [
                    'user_id'     => $user['reviewer_id'],
                    'field'       => mb_substr($field, 0, 100),
                    'matched_ids' => [],
                    'inserted'    => 0,
                ];
                $failCount++;
            }
        }

        return jsonSuccess([
            'processed'     => count($results),
            'success_count' => $successCount,
            'fail_count'    => $failCount,
            'details'       => $results,
        ]);
    }

    /**
     * 查看当前 major 树结构（调试用）
     */
    public function getMajorList()
    {
        $majorTree = $this->getMajorTree();
        return jsonSuccess([
            'total' => count($majorTree),
            'list'  => $majorTree,
        ]);
    }

    /**
     * 从 Excel 文件导入 major 数据到数据库（如需要）
     */
    public function importMajorFromExcel()
    {
        $file = ROOT_PATH . 'public' . DS . 'system' . DS . 't_major.xlsx';
        if (!file_exists($file)) {
            return jsonError('Excel文件不存在: public/system/t_major.xlsx');
        }

        $spreadsheet = \PhpOffice\PhpSpreadsheet\IOFactory::load($file);
        $sheet = $spreadsheet->getActiveSheet();
        $highestRow = $sheet->getHighestRow();
        $highestColumn = $sheet->getHighestColumn();

        $headers = [];
        $colCount = \PhpOffice\PhpSpreadsheet\Cell\Coordinate::columnIndexFromString($highestColumn);
        for ($col = 1; $col <= $colCount; $col++) {
            $headers[$col] = $sheet->getCellByColumnAndRow($col, 1)->getValue();
        }

        $rows = [];
        for ($row = 2; $row <= $highestRow; $row++) {
            $rowData = [];
            for ($col = 1; $col <= $colCount; $col++) {
                $rowData[$headers[$col]] = $sheet->getCellByColumnAndRow($col, $row)->getValue();
            }
            $rows[] = $rowData;
        }

        return jsonSuccess([
            'headers' => array_values($headers),
            'total'   => count($rows),
            'preview' => array_slice($rows, 0, 20),
        ]);
    }

    // ========== CrossRef DOI 查询 & 撤稿检测 ==========

    /**
     * 清洗 DOI，去掉前缀
     */
    private function cleanDoi($doi)
    {
        $doi = trim($doi);
        $doi = preg_replace('/^https?:\/\/doi\.org\//', '', $doi);
        $doi = preg_replace('/^doi:\s*/i', '', $doi);
        return trim($doi);
    }

    /**
     * 请求 CrossRef API 获取 DOI 的原始 message 数据
     */
    private function fetchCrossRefData($doi)
    {
        $url = 'https://api.crossref.org/works/' . urlencode($doi);

        $ch = curl_init();
        curl_setopt($ch, CURLOPT_URL, $url);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
        curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
        curl_setopt($ch, CURLOPT_TIMEOUT, 30);
        curl_setopt($ch, CURLOPT_HTTPHEADER, [
            'User-Agent: TMRJournals/1.0 (mailto:publisher@tmrjournals.com)',
            'Accept: application/json',
        ]);

        $result = curl_exec($ch);
        $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);

        if (curl_errno($ch)) {
            $error = curl_error($ch);
            curl_close($ch);
            return ['success' => false, 'error' => 'CURL错误: ' . $error];
        }
        curl_close($ch);

        if ($httpCode == 404) {
            return ['success' => false, 'error' => 'DOI在CrossRef中未找到'];
        }
        if ($httpCode != 200) {
            return ['success' => false, 'error' => 'CrossRef返回HTTP ' . $httpCode];
        }

        $data = json_decode($result, true);
        if (!isset($data['message'])) {
            return ['success' => false, 'error' => 'CrossRef返回数据格式异常'];
        }

        return ['success' => true, 'message' => $data['message']];
    }

    /**
     * 从 CrossRef date-parts 中提取日期字符串
     */
    private function parseDateParts($dateObj)
    {
        if (!isset($dateObj['date-parts'][0])) {
            return '';
        }
        $parts = $dateObj['date-parts'][0];
        $y = isset($parts[0]) ? $parts[0] : '';
        $m = isset($parts[1]) ? sprintf('%02d', $parts[1]) : '';
        $d = isset($parts[2]) ? sprintf('%02d', $parts[2]) : '';
        if ($y && $m && $d) {
            return "{$y}-{$m}-{$d}";
        }
        if ($y && $m) {
            return "{$y}-{$m}";
        }
        return (string)$y;
    }

    /**
     * 解析作者列表
     */
    private function parseAuthors($authorList)
    {
        if (empty($authorList) || !is_array($authorList)) {
            return [];
        }
        $result = [];
        foreach ($authorList as $a) {
            $author = [
                'given'       => $a['given'] ?? '',
                'family'      => $a['family'] ?? '',
                'name'        => isset($a['name']) ? $a['name'] : ((isset($a['given']) ? $a['given'] . ' ' : '') . ($a['family'] ?? '')),
                'ORCID'       => $a['ORCID'] ?? '',
                'sequence'    => $a['sequence'] ?? '',
                'affiliation' => [],
            ];
            if (isset($a['affiliation']) && is_array($a['affiliation'])) {
                foreach ($a['affiliation'] as $aff) {
                    $author['affiliation'][] = $aff['name'] ?? '';
                }
            }
            $result[] = $author;
        }
        return $result;
    }

    /**
     * 检测撤稿状态
     */
    private function detectRetraction($message)
    {
        $isRetracted = false;
        $retractionDetail = [];

        // 1. update-to 字段
        if (isset($message['update-to']) && is_array($message['update-to'])) {
            foreach ($message['update-to'] as $update) {
                $updateType = strtolower($update['type'] ?? '');
                $updateLabel = strtolower($update['label'] ?? '');
                if (strpos($updateType, 'retract') !== false || strpos($updateLabel, 'retract') !== false) {
                    $isRetracted = true;
                    $retractionDetail['retraction_notice'] = [
                        'type'  => $update['type'] ?? '',
                        'label' => $update['label'] ?? '',
                        'DOI'   => $update['DOI'] ?? '',
                        'date'  => isset($update['updated']) ? $this->parseDateParts($update['updated']) : '',
                    ];
                    break;
                }
            }
        }

        // 2. type/subtype
        $type = strtolower($message['type'] ?? '');
        $subtype = strtolower($message['subtype'] ?? '');
        if (strpos($type, 'retract') !== false || strpos($subtype, 'retract') !== false) {
            $isRetracted = true;
            $retractionDetail['is_retraction_notice'] = true;
        }

        // 3. relation
        if (isset($message['relation']) && is_array($message['relation'])) {
            foreach ($message['relation'] as $relType => $relations) {
                if (strpos(strtolower($relType), 'retract') !== false) {
                    $isRetracted = true;
                    $retractionDetail['relation'] = [$relType => $relations];
                    break;
                }
            }
        }

        // 4. title 关键词
        $titles = $message['title'] ?? [];
        foreach ($titles as $title) {
            $lower = strtolower($title);
            if (strpos($lower, 'retraction') !== false || strpos($lower, 'retracted') !== false
                || strpos($lower, 'withdrawal') !== false || strpos($lower, 'withdrawn') !== false) {
                $isRetracted = true;
                $retractionDetail['title_keyword'] = $title;
                break;
            }
        }

        return ['is_retracted' => $isRetracted, 'retraction_detail' => $retractionDetail];
    }

    /**
     * 解析 CrossRef message 为结构化数据
     */
    private function parseCrossRefMessage($doi, $message)
    {
        // 基础信息
        $info = [
            'doi'   => $doi,
            'url'   => $message['URL'] ?? ('https://doi.org/' . $doi),
            'type'  => $message['type'] ?? '',
            'title' => isset($message['title'][0]) ? $message['title'][0] : '',
        ];

        // 作者
        $info['authors'] = $this->parseAuthors($message['author'] ?? []);
        $info['author_string'] = implode(', ', array_column($info['authors'], 'name'));

        // 期刊/来源
        $info['journal'] = [
            'title'      => isset($message['container-title'][0]) ? $message['container-title'][0] : '',
            'short_title'=> isset($message['short-container-title'][0]) ? $message['short-container-title'][0] : '',
            'ISSN'       => $message['ISSN'] ?? [],
            'publisher'  => $message['publisher'] ?? '',
        ];

        // 卷/期/页码
        $info['volume'] = $message['volume'] ?? '';
        $info['issue']  = $message['issue'] ?? '';
        $info['page']   = $message['page'] ?? '';
        $info['article_number'] = $message['article-number'] ?? '';

        // 日期
        $info['dates'] = [
            'published_print'  => isset($message['published-print']) ? $this->parseDateParts($message['published-print']) : '',
            'published_online' => isset($message['published-online']) ? $this->parseDateParts($message['published-online']) : '',
            'published'        => isset($message['published']) ? $this->parseDateParts($message['published']) : '',
            'created'          => isset($message['created']) ? $this->parseDateParts($message['created']) : '',
            'deposited'        => isset($message['deposited']) ? $this->parseDateParts($message['deposited']) : '',
            'indexed'          => isset($message['indexed']) ? $this->parseDateParts($message['indexed']) : '',
        ];
        $info['year'] = '';
        foreach (['published-print', 'published-online', 'published', 'created'] as $dk) {
            if (isset($message[$dk]['date-parts'][0][0]) && $message[$dk]['date-parts'][0][0]) {
                $info['year'] = (string)$message[$dk]['date-parts'][0][0];
                break;
            }
        }

        // 摘要
        $info['abstract'] = $message['abstract'] ?? '';

        // 学科/主题
        $info['subject'] = $message['subject'] ?? [];

        // 引用统计
        $info['references_count']   = $message['references-count'] ?? 0;
        $info['is_referenced_by_count'] = $message['is-referenced-by-count'] ?? 0;

        // 资助信息
        $funders = [];
        if (isset($message['funder']) && is_array($message['funder'])) {
            foreach ($message['funder'] as $f) {
                $funders[] = [
                    'name'  => $f['name'] ?? '',
                    'DOI'   => $f['DOI'] ?? '',
                    'award' => $f['award'] ?? [],
                ];
            }
        }
        $info['funders'] = $funders;

        // 许可证
        $licenses = [];
        if (isset($message['license']) && is_array($message['license'])) {
            foreach ($message['license'] as $lic) {
                $licenses[] = [
                    'URL'        => $lic['URL'] ?? '',
                    'start_date' => isset($lic['start']) ? $this->parseDateParts($lic['start']) : '',
                ];
            }
        }
        $info['licenses'] = $licenses;

        // 撤稿检测
        $retraction = $this->detectRetraction($message);
        $info['is_retracted']      = $retraction['is_retracted'];
        $info['retraction_detail'] = $retraction['retraction_detail'];

        // update-to（勘误/更正/撤稿通知 等所有更新关系）
        $updates = [];
        if (isset($message['update-to']) && is_array($message['update-to'])) {
            foreach ($message['update-to'] as $up) {
                $updates[] = [
                    'type'  => $up['type'] ?? '',
                    'label' => $up['label'] ?? '',
                    'DOI'   => $up['DOI'] ?? '',
                    'date'  => isset($up['updated']) ? $this->parseDateParts($up['updated']) : '',
                ];
            }
        }
        $info['updates'] = $updates;

        // 关联关系 relation
        $info['relation'] = $message['relation'] ?? [];

        // 分数/评分
        $info['score'] = $message['score'] ?? 0;

        return $info;
    }

    /**
     * 查询单个 DOI，返回完整的结构化元数据
     *
     * @param string doi 文章DOI
     */
    public function queryDoi()
    {
        $data = $this->request->param();
        if (!isset($data['doi']) || trim($data['doi']) == '') {
            return jsonError('doi不能为空');
        }

        $doi = $this->cleanDoi($data['doi']);
        $res = $this->fetchCrossRefData($doi);
        if (!$res['success']) {
            return jsonError($res['error']);
        }

        $parsed = $this->parseCrossRefMessage($doi, $res['message']);
        return jsonSuccess($parsed);
    }

    /**
     * 批量查询多个 DOI 的完整元数据
     *
     * @param string dois 逗号分隔的DOI列表
     */
    public function batchQueryDois()
    {
        $data = $this->request->param();
        if (!isset($data['dois']) || trim($data['dois']) == '') {
            return jsonError('dois不能为空');
        }

        $doiList = array_filter(array_map('trim', explode(',', $data['dois'])));
        if (empty($doiList)) {
            return jsonError('未提供有效的DOI');
        }
        if (count($doiList) > 50) {
            return jsonError('单次最多查询50个DOI');
        }

        $results = [];
        $retractedCount = 0;

        foreach ($doiList as $rawDoi) {
            $doi = $this->cleanDoi($rawDoi);
            $res = $this->fetchCrossRefData($doi);
            if (!$res['success']) {
                $results[] = ['doi' => $doi, 'success' => false, 'error' => $res['error']];
            } else {
                $parsed = $this->parseCrossRefMessage($doi, $res['message']);
                $parsed['success'] = true;
                if ($parsed['is_retracted']) {
                    $retractedCount++;
                }
                $results[] = $parsed;
            }
            usleep(200000);
        }

        return jsonSuccess([
            'total'           => count($results),
            'retracted_count' => $retractedCount,
            'list'            => $results,
        ]);
    }

    /**
     * 检查一篇文章的所有参考文献（返回每条引用的完整 CrossRef 元数据 + 撤稿标记）
     *
     * @param int p_article_id 生产文章ID
     */
    public function checkArticleReferences()
    {
        $data = $this->request->param();
        if (!isset($data['p_article_id']) || $data['p_article_id'] == '') {
            return jsonError('p_article_id不能为空');
        }

        $pArticleId = intval($data['p_article_id']);
        $refers = $this->production_article_refer_obj
            ->where('p_article_id', $pArticleId)
            ->where('state', 0)
            ->where('refer_doi', '<>', '')
            ->select();

        if (empty($refers)) {
            return jsonSuccess([
                'p_article_id'    => $pArticleId,
                'total_checked'   => 0,
                'retracted_count' => 0,
                'list'            => [],
            ]);
        }

        $list = [];
        $retractedCount = 0;
        $errorCount = 0;

        foreach ($refers as $refer) {
            $doi = $this->cleanDoi($refer['refer_doi']);
            if ($doi == '') {
                continue;
            }

            $item = [
                'p_refer_id'    => $refer['p_refer_id'],
                'index'         => $refer['index'],
                'refer_doi'     => $doi,
                'refer_content' => $refer['refer_content'] ?? '',
            ];

            $res = $this->fetchCrossRefData($doi);
            if (!$res['success']) {
                $item['crossref_success'] = false;
                $item['crossref_error']   = $res['error'];
                $errorCount++;
            } else {
                $parsed = $this->parseCrossRefMessage($doi, $res['message']);
                $item['crossref_success'] = true;
                $item['crossref']         = $parsed;
                if ($parsed['is_retracted']) {
                    $retractedCount++;
                }
            }

            $list[] = $item;
            usleep(200000);
        }

        return jsonSuccess([
            'p_article_id'    => $pArticleId,
            'total_checked'   => count($list),
            'retracted_count' => $retractedCount,
            'error_count'     => $errorCount,
            'list'            => $list,
        ]);
    }

    /**
     * 通过 article_id 检查参考文献（完整元数据 + 撤稿检测）
     *
     * @param int article_id 文章ID
     */
    public function checkReferencesByArticleId()
    {
        $data = $this->request->param();
        if (!isset($data['article_id']) || $data['article_id'] == '') {
            return jsonError('article_id不能为空');
        }

        $articleId = intval($data['article_id']);
        $pInfo = $this->production_article_obj
            ->where('article_id', $articleId)
            ->where('state', 0)
            ->find();

        if (!$pInfo) {
            return jsonError('未找到该文章的生产信息');
        }

        $refers = $this->production_article_refer_obj
            ->where('p_article_id', $pInfo['p_article_id'])
            ->where('state', 0)
            ->where('refer_doi', '<>', '')
            ->select();

        if (empty($refers)) {
            return jsonSuccess([
                'article_id'      => $articleId,
                'p_article_id'    => $pInfo['p_article_id'],
                'total_checked'   => 0,
                'retracted_count' => 0,
                'list'            => [],
            ]);
        }

        $list = [];
        $retractedCount = 0;

        foreach ($refers as $refer) {
            $doi = $this->cleanDoi($refer['refer_doi']);
            if ($doi == '') {
                continue;
            }

            $item = [
                'p_refer_id'    => $refer['p_refer_id'],
                'index'         => $refer['index'],
                'refer_doi'     => $doi,
                'refer_content' => $refer['refer_content'] ?? '',
            ];

            $res = $this->fetchCrossRefData($doi);
            if (!$res['success']) {
                $item['crossref_success'] = false;
                $item['crossref_error']   = $res['error'];
            } else {
                $parsed = $this->parseCrossRefMessage($doi, $res['message']);
                $item['crossref_success'] = true;
                $item['crossref']         = $parsed;
                if ($parsed['is_retracted']) {
                    $retractedCount++;
                }
            }

            $list[] = $item;
            usleep(200000);
        }

        return jsonSuccess([
            'article_id'      => $articleId,
            'p_article_id'    => $pInfo['p_article_id'],
            'total_checked'   => count($list),
            'retracted_count' => $retractedCount,
            'list'            => $list,
        ]);
    }

    // ========== DOI 网页抓取通讯作者邮箱 ==========

    /**
     * 通过 DOI 跳转获取出版商页面 HTML
     */
    private function fetchPageByDoi($doi)
    {
        $url = 'https://doi.org/' . $doi;

        $ch = curl_init();
        curl_setopt($ch, CURLOPT_URL, $url);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
        curl_setopt($ch, CURLOPT_MAXREDIRS, 10);
        curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
        curl_setopt($ch, CURLOPT_TIMEOUT, 30);
        curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
        curl_setopt($ch, CURLOPT_HTTPHEADER, [
            'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language: en-US,en;q=0.5',
        ]);

        $html = curl_exec($ch);
        $finalUrl = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
        $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);

        if (curl_errno($ch)) {
            $error = curl_error($ch);
            curl_close($ch);
            return ['success' => false, 'error' => 'CURL错误: ' . $error];
        }
        curl_close($ch);

        if ($httpCode != 200) {
            return ['success' => false, 'error' => 'HTTP ' . $httpCode];
        }

        return ['success' => true, 'html' => $html, 'final_url' => $finalUrl];
    }

    /**
     * 根据最终 URL 判断出版商
     */
    private function detectPublisher($url)
    {
        $host = strtolower(parse_url($url, PHP_URL_HOST) ?: '');

        $map = [
            'mdpi.com'           => 'mdpi',
            'springer.com'       => 'springer',
            'springerlink.com'   => 'springer',
            'nature.com'         => 'springer',
            'biomedcentral.com'  => 'springer',
            'sciencedirect.com'  => 'elsevier',
            'elsevier.com'       => 'elsevier',
            'wiley.com'          => 'wiley',
            'onlinelibrary.wiley'=> 'wiley',
            'frontiersin.org'    => 'frontiers',
            'tandfonline.com'    => 'taylor_francis',
            'sagepub.com'        => 'sage',
            'oup.com'            => 'oxford',
            'plos.org'           => 'plos',
            'hindawi.com'        => 'hindawi',
            'cell.com'           => 'cell',
            'jci.org'            => 'jci',
            'asm.org'            => 'asm',
            'iucr.org'           => 'iucr',
            'rsc.org'            => 'rsc',
            'acs.org'            => 'acs',
        ];

        foreach ($map as $domain => $publisher) {
            if (strpos($host, $domain) !== false) {
                return $publisher;
            }
        }
        return 'unknown';
    }

    /**
     * 从 HTML 中提取所有有效邮箱（过滤系统邮箱）
     */
    private function extractEmails($html)
    {
        $all = [];
        if (preg_match_all('/[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}/', $html, $m)) {
            $all = array_unique($m[0]);
        }

        $systemKeywords = [
            'noreply', 'no-reply', 'support', 'info@', 'admin@', 'webmaster',
            'editor@', 'editorial@', 'help@', 'contact@', 'privacy@', 'service@',
            'marketing@', 'feedback@', 'copyright@', 'permissions@',
            'mdpi.com', 'springer.com', 'elsevier.com', 'wiley.com',
            'frontiersin.org', 'nature.com', 'oup.com', 'sagepub.com',
            'tandfonline.com', 'plos.org', 'hindawi.com', 'biomedcentral.com',
            'crossref.org', 'doi.org', 'example.com', 'sentry.io',
            'acs.org', 'rsc.org',
        ];

        $filtered = [];
        foreach ($all as $email) {
            $lower = strtolower($email);
            $skip = false;
            foreach ($systemKeywords as $kw) {
                if (strpos($lower, $kw) !== false) {
                    $skip = true;
                    break;
                }
            }
            if (strpos($lower, '.png') !== false || strpos($lower, '.jpg') !== false
                || strpos($lower, '.gif') !== false || strpos($lower, '.css') !== false
                || strpos($lower, '.js') !== false) {
                $skip = true;
            }
            if (!$skip) {
                $filtered[] = $email;
            }
        }

        return $filtered;
    }

    /**
     * MDPI 页面解析
     */
    private function parseMdpiEmail($html)
    {
        $result = ['corresponding_authors' => [], 'all_emails' => []];
        $result['all_emails'] = $this->extractEmails($html);

        // MDPI: 通讯作者标 *，邮箱用 "Author to whom correspondence should be addressed"
        // 找对应作者名：带 * 的 <span> 或文本
        $corrNames = [];
        if (preg_match_all('/<span[^>]*>\s*([^<]+?)\s*<\/span>\s*\*/', $html, $m)) {
            $corrNames = array_map('trim', $m[1]);
        }
        if (empty($corrNames) && preg_match_all('/([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)\s*\*/', $html, $m)) {
            $corrNames = array_map('trim', $m[1]);
        }

        // 找 mailto 链接（通常就是通讯作者邮箱）
        $mailtoEmails = [];
        if (preg_match_all('/href=["\']mailto:([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})["\']/', $html, $m)) {
            $mailtoEmails = array_values(array_unique($m[1]));
        }
        $mailtoEmails = array_values(array_filter($mailtoEmails, function ($e) {
            return stripos($e, 'mdpi') === false;
        }));

        foreach ($corrNames as $i => $name) {
            $entry = ['name' => $name, 'email' => ''];
            if (isset($mailtoEmails[$i])) {
                $entry['email'] = $mailtoEmails[$i];
            }
            $result['corresponding_authors'][] = $entry;
        }

        if (empty($result['corresponding_authors']) && !empty($mailtoEmails)) {
            foreach ($mailtoEmails as $email) {
                $result['corresponding_authors'][] = ['name' => '', 'email' => $email];
            }
        }

        return $result;
    }

    /**
     * Springer / Nature / BMC 页面解析
     */
    private function parseSpringerEmail($html)
    {
        $result = ['corresponding_authors' => [], 'all_emails' => []];
        $result['all_emails'] = $this->extractEmails($html);

        // Springer: <a data-test="author-letter" href="mailto:xxx">
        if (preg_match_all('/data-test=["\']author-letter["\'][^>]*href=["\']mailto:([^"\']+)["\']/', $html, $m)) {
            foreach ($m[1] as $email) {
                $result['corresponding_authors'][] = ['name' => '', 'email' => $email];
            }
        }

        // 或者 "Correspondence to" / "Corresponding author" 区域
        if (empty($result['corresponding_authors'])) {
            if (preg_match('/[Cc]orrespond(?:ence|ing\s+author)[^<]{0,50}<[^>]*>([^<]*<[^>]*>)*?[^<]*?href=["\']mailto:([^"\']+)["\']/', $html, $m)) {
                $result['corresponding_authors'][] = ['name' => '', 'email' => $m[2]];
            }
        }

        if (empty($result['corresponding_authors'])) {
            $patterns = [
                '/[Cc]orrespond[a-z]*[^<]{0,300}?([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})/',
            ];
            foreach ($patterns as $p) {
                if (preg_match_all($p, $html, $m)) {
                    foreach ($m[1] as $email) {
                        $clean = $this->extractEmails($email);
                        if (!empty($clean)) {
                            $result['corresponding_authors'][] = ['name' => '', 'email' => $clean[0]];
                        } elseif (filter_var($email, FILTER_VALIDATE_EMAIL)) {
                            $result['corresponding_authors'][] = ['name' => '', 'email' => $email];
                        }
                    }
                    break;
                }
            }
        }

        // 尝试找名字
        if (!empty($result['corresponding_authors'])) {
            if (preg_match('/[Cc]orrespond[a-z]*\s+(?:to|author)[:\s]*([A-Z][a-zA-Z\s.\-]+?)(?:\.|<|,)/', $html, $m)) {
                $name = trim($m[1]);
                if (strlen($name) > 2 && strlen($name) < 80) {
                    $result['corresponding_authors'][0]['name'] = $name;
                }
            }
        }

        return $result;
    }

    /**
     * Frontiers 页面解析
     */
    private function parseFrontiersEmail($html)
    {
        $result = ['corresponding_authors' => [], 'all_emails' => []];
        $result['all_emails'] = $this->extractEmails($html);

        // Frontiers: "*Correspondence: Name, email@xxx.com" 或 "*Correspondence:" 后跟 mailto:
        if (preg_match_all('/\*\s*[Cc]orrespondence:\s*(.*?)(?:<br|<\/p|<\/div|\n)/s', $html, $blocks)) {
            foreach ($blocks[1] as $block) {
                $block = strip_tags($block, '<a>');
                $emails = [];
                if (preg_match_all('/([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})/', $block, $em)) {
                    $emails = $em[1];
                }
                $plainText = strip_tags($block);
                $parts = preg_split('/[,;]/', $plainText);
                $name = '';
                foreach ($parts as $part) {
                    $part = trim($part);
                    if ($part && strpos($part, '@') === false && preg_match('/[A-Z]/', $part)) {
                        $name = $part;
                        break;
                    }
                }
                foreach ($emails as $email) {
                    $result['corresponding_authors'][] = ['name' => $name, 'email' => $email];
                }
            }
        }

        if (empty($result['corresponding_authors'])) {
            if (preg_match_all('/href=["\']mailto:([^"\']+)["\'][^>]*>([^<]*)</i', $html, $m)) {
                foreach ($m[1] as $i => $email) {
                    $label = trim(strip_tags($m[2][$i]));
                    if (stripos($email, 'frontiersin') === false) {
                        $result['corresponding_authors'][] = ['name' => $label ?: '', 'email' => $email];
                    }
                }
            }
        }

        return $result;
    }

    /**
     * Wiley 页面解析
     */
    private function parseWileyEmail($html)
    {
        $result = ['corresponding_authors' => [], 'all_emails' => []];
        $result['all_emails'] = $this->extractEmails($html);

        // Wiley: "Correspondence" 段落或 data-widget-def 中含有 mailto:
        if (preg_match('/[Cc]orrespond[a-z]*[:\s].*?href=["\']mailto:([^"\']+)["\'].*?<\/p/s', $html, $m)) {
            $result['corresponding_authors'][] = ['name' => '', 'email' => $m[1]];
        }

        if (empty($result['corresponding_authors'])) {
            if (preg_match('/[Cc]orrespond[a-z]*[^<]{0,500}?([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})/', $html, $m)) {
                $result['corresponding_authors'][] = ['name' => '', 'email' => $m[1]];
            }
        }

        if (!empty($result['corresponding_authors'])) {
            if (preg_match('/[Cc]orrespond[a-z]*[:\s]*([A-Z][a-zA-Z.\-\s]{2,40}?),/', $html, $m)) {
                $result['corresponding_authors'][0]['name'] = trim($m[1]);
            }
        }

        return $result;
    }

    /**
     * Elsevier / ScienceDirect 页面解析
     */
    private function parseElsevierEmail($html)
    {
        $result = ['corresponding_authors' => [], 'all_emails' => []];
        $result['all_emails'] = $this->extractEmails($html);

        // ScienceDirect: "Corresponding author" 按钮/区域 + mailto:
        if (preg_match_all('/class="[^"]*corresponding[^"]*"[^>]*>.*?href=["\']mailto:([^"\']+)["\']/si', $html, $m)) {
            foreach ($m[1] as $email) {
                $result['corresponding_authors'][] = ['name' => '', 'email' => $email];
            }
        }

        if (empty($result['corresponding_authors'])) {
            if (preg_match_all('/data-[a-z\-]*=["\']corresponding[^"\']*["\'][^>]*>([^<]+)/i', $html, $m)) {
                foreach ($m[1] as $name) {
                    $result['corresponding_authors'][] = ['name' => trim(strip_tags($name)), 'email' => ''];
                }
            }
        }

        if (empty($result['corresponding_authors'])) {
            if (preg_match('/[Cc]orrespond[a-z]*[^<]{0,300}?([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})/', $html, $m)) {
                $result['corresponding_authors'][] = ['name' => '', 'email' => $m[1]];
            }
        }

        return $result;
    }

    /**
     * Taylor & Francis 页面解析
     */
    private function parseTaylorFrancisEmail($html)
    {
        $result = ['corresponding_authors' => [], 'all_emails' => []];
        $result['all_emails'] = $this->extractEmails($html);

        // T&F: <span class="NLM_corresp"> or "CONTACT" section
        if (preg_match('/class="[^"]*corresp[^"]*"[^>]*>(.*?)<\/span>/si', $html, $m)) {
            $block = $m[1];
            if (preg_match('/([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})/', $block, $em)) {
                $name = trim(strip_tags(preg_replace('/[a-zA-Z0-9._%+\-]+@.*/', '', $block)));
                $result['corresponding_authors'][] = ['name' => $name, 'email' => $em[1]];
            }
        }

        if (empty($result['corresponding_authors'])) {
            if (preg_match('/CONTACT\s+(.*?)([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})/s', $html, $m)) {
                $name = trim(strip_tags($m[1]));
                $result['corresponding_authors'][] = ['name' => $name, 'email' => $m[2]];
            }
        }

        return $result;
    }

    /**
     * PLOS 页面解析
     */
    private function parsePlosEmail($html)
    {
        $result = ['corresponding_authors' => [], 'all_emails' => []];
        $result['all_emails'] = $this->extractEmails($html);

        // PLOS: "* E-mail: xxx@yyy.com"
        if (preg_match_all('/\*\s*E-?mail:\s*([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})/', $html, $m)) {
            foreach ($m[1] as $email) {
                $result['corresponding_authors'][] = ['name' => '', 'email' => $email];
            }
        }

        return $result;
    }

    /**
     * 通用邮箱提取（兜底方案）
     */
    private function parseGenericEmail($html)
    {
        $result = ['corresponding_authors' => [], 'all_emails' => []];
        $result['all_emails'] = $this->extractEmails($html);

        // 策略1: 找 "Correspondence" / "Corresponding author" 附近的邮箱
        $corrPatterns = [
            '/[Cc]orrespond(?:ing\s+author|ence)[:\s]*(?:<[^>]*>)*\s*(?:<[^>]*>)*\s*([^<]*?)\s*(?:<[^>]*>)*\s*(?:href=["\'])?mailto:([^"\'>\s]+)/s',
            '/[Cc]orrespond[a-z]*[^<]{0,500}?([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})/',
            '/\*\s*(?:E-?mail|Correspondence)[:\s]*([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})/',
        ];

        foreach ($corrPatterns as $pattern) {
            if (preg_match_all($pattern, $html, $m)) {
                $lastGroup = end($m);
                foreach ($lastGroup as $val) {
                    if (filter_var($val, FILTER_VALIDATE_EMAIL)) {
                        $alreadyExists = false;
                        foreach ($result['corresponding_authors'] as $existing) {
                            if ($existing['email'] === $val) {
                                $alreadyExists = true;
                                break;
                            }
                        }
                        if (!$alreadyExists) {
                            $result['corresponding_authors'][] = ['name' => '', 'email' => $val];
                        }
                    }
                }
            }
            if (!empty($result['corresponding_authors'])) {
                break;
            }
        }

        // 策略2: 找所有 mailto 链接
        if (empty($result['corresponding_authors'])) {
            $mailtoEmails = [];
            if (preg_match_all('/href=["\']mailto:([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})["\']/', $html, $m)) {
                $mailtoEmails = array_unique($m[1]);
            }

            $filtered = array_values(array_filter($mailtoEmails, function ($email) {
                $lower = strtolower($email);
                $skip = ['noreply', 'support', 'info@', 'admin', 'editor', 'help@', 'contact@',
                          'privacy', 'service', 'marketing', 'copyright', 'permission'];
                foreach ($skip as $kw) {
                    if (strpos($lower, $kw) !== false) return false;
                }
                return true;
            }));

            foreach ($filtered as $email) {
                $result['corresponding_authors'][] = ['name' => '', 'email' => $email];
            }
        }

        return $result;
    }

    /**
     * 主方法：根据 DOI 抓取出版商网页，解析通讯作者和邮箱
     */
    private function scrapeCorrespondingAuthor($doi)
    {
        $doi = $this->cleanDoi($doi);
        if ($doi == '') {
            return ['success' => false, 'error' => 'DOI为空'];
        }

        $page = $this->fetchPageByDoi($doi);
        if (!$page['success']) {
            return ['success' => false, 'doi' => $doi, 'error' => $page['error']];
        }

        $finalUrl = $page['final_url'];
        $html = $page['html'];
        $publisher = $this->detectPublisher($finalUrl);

        switch ($publisher) {
            case 'mdpi':
                $parsed = $this->parseMdpiEmail($html);
                break;
            case 'springer':
                $parsed = $this->parseSpringerEmail($html);
                break;
            case 'frontiers':
                $parsed = $this->parseFrontiersEmail($html);
                break;
            case 'wiley':
                $parsed = $this->parseWileyEmail($html);
                break;
            case 'elsevier':
                $parsed = $this->parseElsevierEmail($html);
                break;
            case 'taylor_francis':
                $parsed = $this->parseTaylorFrancisEmail($html);
                break;
            case 'plos':
                $parsed = $this->parsePlosEmail($html);
                break;
            default:
                $parsed = $this->parseGenericEmail($html);
                break;
        }

        // 如果专用解析器没找到，用通用方案兜底
        if (empty($parsed['corresponding_authors'])) {
            $generic = $this->parseGenericEmail($html);
            $parsed['corresponding_authors'] = $generic['corresponding_authors'];
        }

        return [
            'success'                => true,
            'doi'                    => $doi,
            'url'                    => $finalUrl,
            'publisher'              => $publisher,
            'corresponding_authors'  => $parsed['corresponding_authors'],
            'all_emails'             => $parsed['all_emails'],
        ];
    }

    /**
     * 通过 DOI 获取通讯作者邮箱（单个）
     *
     * @param string doi 文章DOI
     */
    public function getAuthorEmail()
    {
        $data = $this->request->param();
        if (!isset($data['doi']) || trim($data['doi']) == '') {
            return jsonError('doi不能为空');
        }

        $result = $this->scrapeCorrespondingAuthor($data['doi']);
        if (!$result['success']) {
            return jsonError($result['error']);
        }

        return jsonSuccess($result);
    }

    /**
     * 批量通过 DOI 获取通讯作者邮箱
     *
     * @param string dois 逗号分隔的DOI列表
     */
    public function batchGetAuthorEmails()
    {
        $data = $this->request->param();
        if (!isset($data['dois']) || trim($data['dois']) == '') {
            return jsonError('dois不能为空');
        }

        $doiList = array_filter(array_map('trim', explode(',', $data['dois'])));
        if (empty($doiList)) {
            return jsonError('未提供有效的DOI');
        }
        if (count($doiList) > 20) {
            return jsonError('单次最多查询20个DOI');
        }

        $results = [];
        $successCount = 0;
        $emailFoundCount = 0;

        foreach ($doiList as $rawDoi) {
            $result = $this->scrapeCorrespondingAuthor($rawDoi);
            if ($result['success']) {
                $successCount++;
                if (!empty($result['corresponding_authors'])) {
                    $emailFoundCount++;
                }
            }
            $results[] = $result;
            usleep(500000);
        }

        return jsonSuccess([
            'total'             => count($results),
            'success_count'     => $successCount,
            'email_found_count' => $emailFoundCount,
            'list'              => $results,
        ]);
    }

    /**
     * 统计当前 field 转 major 的覆盖情况
     */
    public function statistics()
    {
        $totalReviewers = $this->user_reviewer_info_obj
            ->where('state', 0)
            ->count();

        $hasField = $this->user_reviewer_info_obj
            ->where('state', 0)
            ->where('field', '<>', '')
            ->count();

        $hasMajorToUser = Db::name('major_to_user')
            ->where('state', 0)
            ->group('user_id')
            ->count();

        $hasFieldNoMajor = $this->user_reviewer_info_obj
            ->alias('ri')
            ->where('ri.state', 0)
            ->where('ri.field', '<>', '')
            ->where('ri.reviewer_id', 'not in', Db::name('major_to_user')->where('state', 0)->field('user_id')->buildSql())
            ->count();

        return jsonSuccess([
            'total_reviewers'       => $totalReviewers,
            'has_field'             => $hasField,
            'has_major_to_user'     => $hasMajorToUser,
            'has_field_no_major'    => $hasFieldNoMajor,
        ]);
    }
}