From 717c662d817c6275a8c802ce74528180de718dbd Mon Sep 17 00:00:00 2001 From: wangjinlei <751475802@qq.com> Date: Fri, 10 Apr 2026 15:57:19 +0800 Subject: [PATCH] 1 --- application/api/controller/Base.php | 156 ++++++++++++++ application/api/controller/Preaccept.php | 79 +------ application/api/controller/Production.php | 1 + application/api/controller/References.php | 248 ++++++++++++++++++---- 4 files changed, 363 insertions(+), 121 deletions(-) diff --git a/application/api/controller/Base.php b/application/api/controller/Base.php index ce739ee..77e1da7 100644 --- a/application/api/controller/Base.php +++ b/application/api/controller/Base.php @@ -1047,6 +1047,162 @@ class Base extends Controller return $this->production_article_obj->insertGetId($insert); } + public function markUnusedReferencesForArticle(int $articleId) + { + if ($articleId <= 0) return; + + $production = Db::name('production_article') + ->where('article_id', $articleId) + ->where('state', 0) + ->field('p_article_id') + ->find(); + $pArticleId = intval($production['p_article_id'] ?? 0); + if ($pArticleId <= 0) return; + + // 1) 收集已使用的 p_refer_id(与 convertArticleMainCitationsToMycite 一致:正文 + 表格 + 图) + $usedIds = []; + + // t_article_main:type=0 等为正文;type=2 时引用在 article_main_table,content 可能为空,仍扫描不增加成本 + $mains = Db::name('article_main') + ->where('article_id', $articleId) + ->whereIn('state', [0, 2]) + ->field('content') + ->select(); + foreach ($mains as $row) { + $usedIds = array_merge($usedIds, $this->extractMyciteIds((string)($row['content'] ?? ''))); + } + + // t_article_main_table:type=2 对应表格数据(含 url) + $tables = Db::name('article_main_table') + ->where('article_id', $articleId) + ->where('state', 0) + ->field('table_data,html_data,title,note,url') + ->select(); + foreach ($tables as $row) { + $usedIds = array_merge($usedIds, $this->extractMyciteIdsFromTableDataJson((string)($row['table_data'] ?? ''))); + $usedIds = array_merge($usedIds, $this->extractMyciteIds((string)($row['html_data'] ?? ''))); + $usedIds = array_merge($usedIds, $this->extractMyciteIds((string)($row['title'] ?? ''))); + $usedIds = array_merge($usedIds, $this->extractMyciteIds((string)($row['note'] ?? ''))); + $usedIds = array_merge($usedIds, $this->extractMyciteIds((string)($row['url'] ?? ''))); + } + + // t_article_main_image:type=1 图注等 + $images = Db::name('article_main_image') + ->where('article_id', $articleId) + ->where('state', 0) + ->field('title,note,url') + ->select(); + foreach ($images as $row) { + $usedIds = array_merge($usedIds, $this->extractMyciteIds((string)($row['title'] ?? ''))); + $usedIds = array_merge($usedIds, $this->extractMyciteIds((string)($row['note'] ?? ''))); + $usedIds = array_merge($usedIds, $this->extractMyciteIds((string)($row['url'] ?? ''))); + } + + $usedIds = array_values(array_unique($usedIds)); + + // 2) 标记:先全置 0,再把用到的置 1 + try { + Db::name('production_article_refer') + ->where('p_article_id', $pArticleId) + ->where('state', 0) + ->update(['is_used' => 0, 'update_time' => time()]); + + if (!empty($usedIds)) { + Db::name('production_article_refer') + ->where('p_article_id', $pArticleId) + ->where('state', 0) + ->whereIn('p_refer_id', $usedIds) + ->update(['is_used' => 1, 'update_time' => time()]); + } + } catch (\Exception $e) { + // 工具方法:不影响主流程,忽略异常(可按需改为记录日志) + } + } + public function extractMyciteIds(string $text): array + { + if ($text === '') return []; + $ids = []; + if (preg_match_all('/<\s*mycite\b[^>]*\bdata-id\s*=\s*(["\'])(.*?)\1[^>]*>/iu', $text, $m)) { + foreach ($m[2] as $raw) { + $raw = trim((string)$raw); + if ($raw === '') continue; + $parts = preg_split('/\s*,\s*/', $raw); + foreach ($parts as $p) { + $p = trim((string)$p); + if ($p === '') continue; + $v = intval($p); + if ($v > 0) $ids[] = $v; + } + } + } + $ids = array_values(array_unique($ids)); + sort($ids); + return $ids; + } + + /** + * table_data:二维数组 JSON [[{text,colspan,rowspan},...],...];支持双重 JSON 字符串编码。 + * + * @return array|null + */ + protected function decodeTableDataJsonToArray(string $raw): ?array + { + $raw = trim($raw); + if ($raw === '') { + return null; + } + if (preg_match('/^\xEF\xBB\xBF/', $raw)) { + $raw = substr($raw, 3); + } + $decoded = json_decode($raw, true); + if (json_last_error() !== JSON_ERROR_NONE) { + return null; + } + if (is_array($decoded)) { + return $decoded; + } + if (is_string($decoded)) { + $decoded2 = json_decode($decoded, true); + if (json_last_error() === JSON_ERROR_NONE && is_array($decoded2)) { + return $decoded2; + } + } + return null; + } + + /** + * table_data 为 JSON:递归收集所有字符串里的 ;非合法 JSON 时按整串解析。 + * + * @return int[] + */ + protected function extractMyciteIdsFromTableDataJson(string $tableDataJson): array + { + $tableDataJson = trim($tableDataJson); + if ($tableDataJson === '') { + return []; + } + $decoded = $this->decodeTableDataJsonToArray($tableDataJson); + if ($decoded === null) { + return $this->extractMyciteIds($tableDataJson); + } + $ids = []; + $walk = function ($node) use (&$walk, &$ids) { + if (is_string($node)) { + $ids = array_merge($ids, $this->extractMyciteIds($node)); + return; + } + if (is_array($node)) { + foreach ($node as $v) { + $walk($v); + } + } + }; + $walk($decoded); + $ids = array_values(array_unique($ids)); + sort($ids); + return $ids; + } + } diff --git a/application/api/controller/Preaccept.php b/application/api/controller/Preaccept.php index 816cbcd..78fe124 100644 --- a/application/api/controller/Preaccept.php +++ b/application/api/controller/Preaccept.php @@ -1156,7 +1156,7 @@ class Preaccept extends Base // 若检测到引用删除,则进行全文扫描并标记未被引用条目的 is_used=0(含 table 内容) if ($hasCitationDeletion) { -// $this->markUnusedReferencesForArticle(intval($am_info['article_id'])); + $this->markUnusedReferencesForArticle(intval($am_info['article_id'])); } // return jsonSuccess([]); //返回更新数据 20260119 start @@ -1198,27 +1198,7 @@ class Preaccept extends Base * * @return int[] */ - private function extractMyciteIds(string $text): array - { - if ($text === '') return []; - $ids = []; - if (preg_match_all('/<\s*mycite\b[^>]*\bdata-id\s*=\s*(["\'])(.*?)\1[^>]*>/iu', $text, $m)) { - foreach ($m[2] as $raw) { - $raw = trim((string)$raw); - if ($raw === '') continue; - $parts = preg_split('/\s*,\s*/', $raw); - foreach ($parts as $p) { - $p = trim((string)$p); - if ($p === '') continue; - $v = intval($p); - if ($v > 0) $ids[] = $v; - } - } - } - $ids = array_values(array_unique($ids)); - sort($ids); - return $ids; - } + /** * 全文扫描(正文 + table),将 production_article_refer 未被引用的条目标记 is_used=0 @@ -1226,62 +1206,7 @@ class Preaccept extends Base * * 注意:依赖 production_article_refer 表存在 is_used 字段(int/tinyint) */ - private function markUnusedReferencesForArticle(int $articleId) - { - if ($articleId <= 0) return; - $production = Db::name('production_article') - ->where('article_id', $articleId) - ->where('state', 0) - ->field('p_article_id') - ->find(); - $pArticleId = intval($production['p_article_id'] ?? 0); - if ($pArticleId <= 0) return; - - // 1) 收集已使用的 p_refer_id - $usedIds = []; - - $mains = Db::name('article_main') - ->where('article_id', $articleId) - ->whereIn('state', [0, 2]) - ->field('content') - ->select(); - foreach ($mains as $row) { - $usedIds = array_merge($usedIds, $this->extractMyciteIds((string)($row['content'] ?? ''))); - } - - $tables = Db::name('article_main_table') - ->where('article_id', $articleId) - ->where('state', 0) - ->field('table_data,html_data,title,note') - ->select(); - foreach ($tables as $row) { - $usedIds = array_merge($usedIds, $this->extractMyciteIds((string)($row['table_data'] ?? ''))); - $usedIds = array_merge($usedIds, $this->extractMyciteIds((string)($row['html_data'] ?? ''))); - $usedIds = array_merge($usedIds, $this->extractMyciteIds((string)($row['title'] ?? ''))); - $usedIds = array_merge($usedIds, $this->extractMyciteIds((string)($row['note'] ?? ''))); - } - - $usedIds = array_values(array_unique($usedIds)); - - // 2) 标记:先全置 0,再把用到的置 1 - try { - Db::name('production_article_refer') - ->where('p_article_id', $pArticleId) - ->where('state', 0) - ->update(['is_used' => 0, 'update_time' => time()]); - - if (!empty($usedIds)) { - Db::name('production_article_refer') - ->where('p_article_id', $pArticleId) - ->where('state', 0) - ->whereIn('p_refer_id', $usedIds) - ->update(['is_used' => 1, 'update_time' => time()]); - } - } catch (\Exception $e) { - // 工具方法:不影响主流程,忽略异常(可按需改为记录日志) - } - } public function getArticleMainsRecycle(){ $data = $this->request->post(); diff --git a/application/api/controller/Production.php b/application/api/controller/Production.php index 114c1c4..fe29df2 100644 --- a/application/api/controller/Production.php +++ b/application/api/controller/Production.php @@ -1920,6 +1920,7 @@ class Production extends Base $cache_insert['p_article_id'] = $p_info['p_article_id']; $cache_insert['refer_content'] = $v; $cache_insert['index'] = $k; + $cache_insert['old_index'] = $k; $cache_insert['ctime'] = time(); $this->production_article_refer_obj->insert($cache_insert); } diff --git a/application/api/controller/References.php b/application/api/controller/References.php index c528456..47ae232 100644 --- a/application/api/controller/References.php +++ b/application/api/controller/References.php @@ -417,6 +417,12 @@ class References extends Base if ($referencePart === '') { return []; } + // 与 Base::blueIntegerChange 一致:范围可用 ASCII -、en dash –、em dash —、minus − 等 + $referencePart = str_replace( + [',', '–', '—', '−', '‐', '‑'], + [',', '-', '-', '-', '-', '-'], + $referencePart + ); $out = []; $segments = preg_split('/\s*,\s*/', $referencePart); foreach ($segments as $seg) { @@ -424,6 +430,7 @@ class References extends Base if ($seg === '') { continue; } + $seg = str_replace(['–', '—', '−', '‐', '‑'], '-', $seg); if (preg_match('/^(\d+)\s*-\s*(\d+)$/', $seg, $m)) { $a = intval($m[1]); $b = intval($m[2]); @@ -443,16 +450,11 @@ class References extends Base } /** - * 将正文 HTML 中的 [n](及 [1,2]、[2-4] 等)替换为 - * 找不到对应参考文献时保留原 […],避免丢内容。 - * - * @param string $content article_main.content 等 HTML 片段 - * @param int $pArticleId t_production_article_refer.p_article_id + * 纯文本/HTML 片段:[n](需传入已构建的 map) */ - public function rewriteMainContentCitationsToMycite(string $content, int $pArticleId) + private function applyBlueCitationsToMycite(string $content, array $map): string { - $map = $this->buildCitationNumberToPReferIdMap($pArticleId); - if ($map === []) { + if ($map === [] || $content === '') { return $content; } return preg_replace_callback( @@ -462,9 +464,13 @@ class References extends Base if ($inner === '') { return $matches[0]; } - // 仅处理数字引用,避免误伤 [Fig 1] 等 - $innerNorm = str_replace([',', '–', '—'], [',', '-', '-'], $inner); - if (!preg_match('/^[\d\s,\-]+$/', $innerNorm)) { + $innerNorm = str_replace( + [',', '–', '—', '−', '‐', '‑'], + [',', '-', '-', '-', '-', '-'], + $inner + ); + $innerNorm = preg_replace('/\s+/u', ' ', trim($innerNorm)); + if (!preg_match('/^[\d\s,\-]+$/u', $innerNorm)) { return $matches[0]; } @@ -479,7 +485,6 @@ class References extends Base continue; } if (empty($map[$n])) { - // 有任意一个序号无法映射到 p_refer_id,则保持原始片段不变,避免丢引用信息 return $matches[0]; } $ids[] = (string)intval($map[$n]); @@ -493,6 +498,67 @@ class References extends Base ); } + /** + * table_data 存 JSON:递归替换各字符串字段内的引用;非法 JSON 时退回整串替换。 + * html_data 不在此处理(由业务单独维护)。 + */ + private function rewriteTableDataJsonCitationsToMycite(string $tableDataJson, int $pArticleId): string + { + $tableDataJson = trim((string)$tableDataJson); + if ($tableDataJson === '') { + return ''; + } + $map = $this->buildCitationNumberToPReferIdMap($pArticleId); + if ($map === []) { + return $tableDataJson; + } + $decoded = $this->decodeTableDataJsonToArray($tableDataJson); + if ($decoded === null) { + return $this->applyBlueCitationsToMycite($tableDataJson, $map); + } + $walked = $this->rewriteBlueCitationsInJsonNode($decoded, $map); + $flags = JSON_UNESCAPED_UNICODE; + if (defined('JSON_UNESCAPED_SLASHES')) { + $flags |= JSON_UNESCAPED_SLASHES; + } + return json_encode($walked, $flags); + } + + /** + * @param mixed $node + * @return mixed + */ + private function rewriteBlueCitationsInJsonNode($node, array $map) + { + if (is_string($node)) { + return $this->applyBlueCitationsToMycite($node, $map); + } + if (is_array($node)) { + $out = []; + foreach ($node as $k => $v) { + $out[$k] = $this->rewriteBlueCitationsInJsonNode($v, $map); + } + return $out; + } + return $node; + } + + /** + * 将正文 HTML 中的 [n](及 [1,2]、[2-4] 等)替换为 + * 找不到对应参考文献时保留原 […],避免丢内容。 + * + * @param string $content article_main.content 等 HTML 片段 + * @param int $pArticleId t_production_article_refer.p_article_id + */ + public function rewriteMainContentCitationsToMycite(string $content, int $pArticleId) + { + $map = $this->buildCitationNumberToPReferIdMap($pArticleId); + if ($map === []) { + return $content; + } + return $this->applyBlueCitationsToMycite($content, $map); + } + /** * 接口:将 content 中的 blue 引用替换为 mycite(需传 p_article_id) */ @@ -512,40 +578,51 @@ class References extends Base return jsonError('production_article_id not found'); } $pArticleId = $p_info['p_article_id']; + + if (intval($main_info['type'] ?? 0) === 2) { + $amtId = intval($main_info['amt_id'] ?? 0); + if ($amtId <= 0) { + return jsonError('amt_id is empty for table main row'); + } + $tbl = Db::name('article_main_table') + ->where('amt_id', $amtId) + ->where('article_id', $data['article_id']) + ->where('state', 0) + ->find(); + if (empty($tbl)) { + return jsonError('article_main_table not found'); + } + $out = []; + $td = (string)($tbl['table_data'] ?? ''); + $out['table_data'] = $td === '' ? '' : $this->rewriteTableDataJsonCitationsToMycite($td, $pArticleId); + foreach (['title', 'note'] as $f) { + $raw = (string)($tbl[$f] ?? ''); + $out[$f] = $raw === '' ? '' : $this->rewriteMainContentCitationsToMycite($raw, $pArticleId); + } + return jsonSuccess([ + 'target' => 'article_main_table', + 'amt_id' => $amtId, + 'fields' => $out, + ]); + } + $content = $main_info['content']; $out = $this->rewriteMainContentCitationsToMycite($content, $pArticleId); return jsonSuccess(['content' => $out]); } /** - * 批量处理并回写 t_article_main.content: - * 将正文中的 [n] / [1,2] / [2-4] 改写为 + * 批量处理并回写: + * - type=0:t_article_main.content + * - type=2:t_article_main_table(table_data 为 JSON 递归替换;title/note 为纯文本;不修改 html_data) * * 参数: - * - p_article_id (必填):production 侧文章ID - * - type (可选):默认 0(仅文本 main),传空则处理所有 type + * - article_id (必填) + * - type (可选):不传则处理正文+表格(type in 0,2);传具体数字则只处理该 type * - dry_run (可选):1=只预览不落库 */ public function convertArticleMainCitationsToMycite() { -// $aParam = empty($aParam) ? $this->request->post() : $aParam; -// $pArticleId = intval($aParam['p_article_id'] ?? 0); -// if ($pArticleId <= 0) { -// return jsonError('p_article_id is required'); -// } -// -// // 通过 production_article -> article_id,确保是当前系统存在的文章 -// $aArticle = $this->getArticle(['p_article_id' => $pArticleId]); -// $iStatus = empty($aArticle['status']) ? 0 : $aArticle['status']; -// if ($iStatus != 1) { -// return json_encode($aArticle); -// } -// $aArticle = empty($aArticle['data']) ? [] : $aArticle['data']; -// $articleId = intval($aArticle['article_id'] ?? 0); -// if ($articleId <= 0) { -// return jsonError('Article not found'); -// } - $aParam = $this->request->post(); $rule = new Validate([ "article_id"=>"require" @@ -554,18 +631,24 @@ class References extends Base return jsonError($rule->getError()); } $dryRun = intval($aParam['dry_run'] ?? 0) === 1; - $type = $aParam['type'] ?? 0; $p_info = $this->production_article_obj->where('article_id', $aParam['article_id'])->where('state', 0)->find(); + if (empty($p_info)) { + return jsonError('production_article not found'); + } $pArticleId = $p_info['p_article_id']; + $query = Db::name('article_main') ->where('article_id', $aParam['article_id']) ->whereIn('state', [0, 2]) ->order('sort asc'); - if ($type !== '' && $type !== null) { - $query->where('type', intval($type)); + if (isset($aParam['type']) && $aParam['type'] !== '' && $aParam['type'] !== null) { + $query->where('type', intval($aParam['type'])); + } else { + // 默认同时处理正文段落与表格占位行(避免原先默认 type=0 漏掉 type=2) + $query->whereIn('type', [0, 2]); } - $mains = $query->field('am_id,content,type,sort')->select(); + $mains = $query->field('am_id,content,type,sort,amt_id')->select(); if (empty($mains)) { return jsonError('article_main is empty'); } @@ -577,6 +660,80 @@ class References extends Base try { foreach ($mains as $row) { $amId = intval($row['am_id']); + $mainType = intval($row['type'] ?? 0); + + if ($mainType === 2) { + $amtId = intval($row['amt_id'] ?? 0); + if ($amtId <= 0) { + continue; + } + $tbl = Db::name('article_main_table') + ->where('amt_id', $amtId) + ->where('article_id', $aParam['article_id']) + ->where('state', 0) + ->find(); + if (empty($tbl)) { + continue; + } + + $updateTbl = []; + $fieldPreview = []; + + $oldTd = (string)($tbl['table_data'] ?? ''); + if ($oldTd !== '') { + $newTd = $this->rewriteTableDataJsonCitationsToMycite($oldTd, $pArticleId); + if ($newTd !== $oldTd) { + $updateTbl['table_data'] = $newTd; + if (count($fieldPreview) < 4) { + $fieldPreview['table_data'] = [ + 'before' => $oldTd, + 'after' => $newTd, + ]; + } + } + } + foreach (['title', 'note'] as $f) { + $old = (string)($tbl[$f] ?? ''); + if ($old === '') { + continue; + } + $new = $this->rewriteMainContentCitationsToMycite($old, $pArticleId); + if ($new !== $old) { + $updateTbl[$f] = $new; + if (count($fieldPreview) < 4) { + $fieldPreview[$f] = [ + 'before' => $old, + 'after' => $new, + ]; + } + } + } + if ($updateTbl === []) { + continue; + } + + $changed++; + if (count($preview) < 3) { + $preview[] = [ + 'am_id' => $amId, + 'amt_id' => $amtId, + 'type' => 2, + 'sort' => intval($row['sort'] ?? 0), + 'target' => 'article_main_table', + 'fields' => $fieldPreview, + ]; + } + + if (!$dryRun) { + Db::name('article_main_table') + ->where('amt_id', $amtId) + ->limit(1) + ->update($updateTbl); + } + continue; + } + + // 正文等:写回 article_main.content $old = (string)($row['content'] ?? ''); if ($old === '') { continue; @@ -589,11 +746,12 @@ class References extends Base $changed++; if (count($preview) < 3) { $preview[] = [ - 'am_id' => $amId, - 'type' => intval($row['type'] ?? 0), - 'sort' => intval($row['sort'] ?? 0), - 'before'=> $old, - 'after' => $new, + 'am_id' => $amId, + 'type' => $mainType, + 'sort' => intval($row['sort'] ?? 0), + 'target' => 'article_main', + 'before' => $old, + 'after' => $new, ]; } @@ -602,7 +760,7 @@ class References extends Base ->where('am_id', $amId) ->limit(1) ->update([ - 'content' => $new + 'content' => $new, ]); } } @@ -617,6 +775,8 @@ class References extends Base return jsonError('convert failed: ' . $e->getMessage()); } + $this->markUnusedReferencesForArticle(intval($aParam['article_id'])); + return jsonSuccess([ 'article_id' => $aParam['article_id'], 'p_article_id' => $pArticleId,