This commit is contained in:
wangjinlei
2026-04-10 15:57:19 +08:00
parent b0f0d6461f
commit 717c662d81
4 changed files with 363 additions and 121 deletions

View File

@@ -1047,6 +1047,162 @@ class Base extends Controller
return $this->production_article_obj->insertGetId($insert);
}
public function markUnusedReferencesForArticle(int $articleId)
{
if ($articleId <= 0) return;
$production = Db::name('production_article')
->where('article_id', $articleId)
->where('state', 0)
->field('p_article_id')
->find();
$pArticleId = intval($production['p_article_id'] ?? 0);
if ($pArticleId <= 0) return;
// 1) 收集已使用的 p_refer_id与 convertArticleMainCitationsToMycite 一致:正文 + 表格 + 图)
$usedIds = [];
// t_article_maintype=0 等为正文type=2 时引用在 article_main_tablecontent 可能为空,仍扫描不增加成本
$mains = Db::name('article_main')
->where('article_id', $articleId)
->whereIn('state', [0, 2])
->field('content')
->select();
foreach ($mains as $row) {
$usedIds = array_merge($usedIds, $this->extractMyciteIds((string)($row['content'] ?? '')));
}
// t_article_main_tabletype=2 对应表格数据(含 url
$tables = Db::name('article_main_table')
->where('article_id', $articleId)
->where('state', 0)
->field('table_data,html_data,title,note,url')
->select();
foreach ($tables as $row) {
$usedIds = array_merge($usedIds, $this->extractMyciteIdsFromTableDataJson((string)($row['table_data'] ?? '')));
$usedIds = array_merge($usedIds, $this->extractMyciteIds((string)($row['html_data'] ?? '')));
$usedIds = array_merge($usedIds, $this->extractMyciteIds((string)($row['title'] ?? '')));
$usedIds = array_merge($usedIds, $this->extractMyciteIds((string)($row['note'] ?? '')));
$usedIds = array_merge($usedIds, $this->extractMyciteIds((string)($row['url'] ?? '')));
}
// t_article_main_imagetype=1 图注等
$images = Db::name('article_main_image')
->where('article_id', $articleId)
->where('state', 0)
->field('title,note,url')
->select();
foreach ($images as $row) {
$usedIds = array_merge($usedIds, $this->extractMyciteIds((string)($row['title'] ?? '')));
$usedIds = array_merge($usedIds, $this->extractMyciteIds((string)($row['note'] ?? '')));
$usedIds = array_merge($usedIds, $this->extractMyciteIds((string)($row['url'] ?? '')));
}
$usedIds = array_values(array_unique($usedIds));
// 2) 标记:先全置 0再把用到的置 1
try {
Db::name('production_article_refer')
->where('p_article_id', $pArticleId)
->where('state', 0)
->update(['is_used' => 0, 'update_time' => time()]);
if (!empty($usedIds)) {
Db::name('production_article_refer')
->where('p_article_id', $pArticleId)
->where('state', 0)
->whereIn('p_refer_id', $usedIds)
->update(['is_used' => 1, 'update_time' => time()]);
}
} catch (\Exception $e) {
// 工具方法:不影响主流程,忽略异常(可按需改为记录日志)
}
}
public function extractMyciteIds(string $text): array
{
if ($text === '') return [];
$ids = [];
if (preg_match_all('/<\s*mycite\b[^>]*\bdata-id\s*=\s*(["\'])(.*?)\1[^>]*>/iu', $text, $m)) {
foreach ($m[2] as $raw) {
$raw = trim((string)$raw);
if ($raw === '') continue;
$parts = preg_split('/\s*,\s*/', $raw);
foreach ($parts as $p) {
$p = trim((string)$p);
if ($p === '') continue;
$v = intval($p);
if ($v > 0) $ids[] = $v;
}
}
}
$ids = array_values(array_unique($ids));
sort($ids);
return $ids;
}
/**
* table_data二维数组 JSON [[{text,colspan,rowspan},...],...];支持双重 JSON 字符串编码。
*
* @return array|null
*/
protected function decodeTableDataJsonToArray(string $raw): ?array
{
$raw = trim($raw);
if ($raw === '') {
return null;
}
if (preg_match('/^\xEF\xBB\xBF/', $raw)) {
$raw = substr($raw, 3);
}
$decoded = json_decode($raw, true);
if (json_last_error() !== JSON_ERROR_NONE) {
return null;
}
if (is_array($decoded)) {
return $decoded;
}
if (is_string($decoded)) {
$decoded2 = json_decode($decoded, true);
if (json_last_error() === JSON_ERROR_NONE && is_array($decoded2)) {
return $decoded2;
}
}
return null;
}
/**
* table_data 为 JSON递归收集所有字符串里的 <mycite>;非合法 JSON 时按整串解析。
*
* @return int[]
*/
protected function extractMyciteIdsFromTableDataJson(string $tableDataJson): array
{
$tableDataJson = trim($tableDataJson);
if ($tableDataJson === '') {
return [];
}
$decoded = $this->decodeTableDataJsonToArray($tableDataJson);
if ($decoded === null) {
return $this->extractMyciteIds($tableDataJson);
}
$ids = [];
$walk = function ($node) use (&$walk, &$ids) {
if (is_string($node)) {
$ids = array_merge($ids, $this->extractMyciteIds($node));
return;
}
if (is_array($node)) {
foreach ($node as $v) {
$walk($v);
}
}
};
$walk($decoded);
$ids = array_values(array_unique($ids));
sort($ids);
return $ids;
}
}

View File

@@ -1156,7 +1156,7 @@ class Preaccept extends Base
// 若检测到引用删除,则进行全文扫描并标记未被引用条目的 is_used=0含 table 内容)
if ($hasCitationDeletion) {
// $this->markUnusedReferencesForArticle(intval($am_info['article_id']));
$this->markUnusedReferencesForArticle(intval($am_info['article_id']));
}
// return jsonSuccess([]);
//返回更新数据 20260119 start
@@ -1198,27 +1198,7 @@ class Preaccept extends Base
*
* @return int[]
*/
private function extractMyciteIds(string $text): array
{
if ($text === '') return [];
$ids = [];
if (preg_match_all('/<\s*mycite\b[^>]*\bdata-id\s*=\s*(["\'])(.*?)\1[^>]*>/iu', $text, $m)) {
foreach ($m[2] as $raw) {
$raw = trim((string)$raw);
if ($raw === '') continue;
$parts = preg_split('/\s*,\s*/', $raw);
foreach ($parts as $p) {
$p = trim((string)$p);
if ($p === '') continue;
$v = intval($p);
if ($v > 0) $ids[] = $v;
}
}
}
$ids = array_values(array_unique($ids));
sort($ids);
return $ids;
}
/**
* 全文扫描(正文 + table将 production_article_refer 未被引用的条目标记 is_used=0
@@ -1226,62 +1206,7 @@ class Preaccept extends Base
*
* 注意:依赖 production_article_refer 表存在 is_used 字段int/tinyint
*/
private function markUnusedReferencesForArticle(int $articleId)
{
if ($articleId <= 0) return;
$production = Db::name('production_article')
->where('article_id', $articleId)
->where('state', 0)
->field('p_article_id')
->find();
$pArticleId = intval($production['p_article_id'] ?? 0);
if ($pArticleId <= 0) return;
// 1) 收集已使用的 p_refer_id
$usedIds = [];
$mains = Db::name('article_main')
->where('article_id', $articleId)
->whereIn('state', [0, 2])
->field('content')
->select();
foreach ($mains as $row) {
$usedIds = array_merge($usedIds, $this->extractMyciteIds((string)($row['content'] ?? '')));
}
$tables = Db::name('article_main_table')
->where('article_id', $articleId)
->where('state', 0)
->field('table_data,html_data,title,note')
->select();
foreach ($tables as $row) {
$usedIds = array_merge($usedIds, $this->extractMyciteIds((string)($row['table_data'] ?? '')));
$usedIds = array_merge($usedIds, $this->extractMyciteIds((string)($row['html_data'] ?? '')));
$usedIds = array_merge($usedIds, $this->extractMyciteIds((string)($row['title'] ?? '')));
$usedIds = array_merge($usedIds, $this->extractMyciteIds((string)($row['note'] ?? '')));
}
$usedIds = array_values(array_unique($usedIds));
// 2) 标记:先全置 0再把用到的置 1
try {
Db::name('production_article_refer')
->where('p_article_id', $pArticleId)
->where('state', 0)
->update(['is_used' => 0, 'update_time' => time()]);
if (!empty($usedIds)) {
Db::name('production_article_refer')
->where('p_article_id', $pArticleId)
->where('state', 0)
->whereIn('p_refer_id', $usedIds)
->update(['is_used' => 1, 'update_time' => time()]);
}
} catch (\Exception $e) {
// 工具方法:不影响主流程,忽略异常(可按需改为记录日志)
}
}
public function getArticleMainsRecycle(){
$data = $this->request->post();

View File

@@ -1920,6 +1920,7 @@ class Production extends Base
$cache_insert['p_article_id'] = $p_info['p_article_id'];
$cache_insert['refer_content'] = $v;
$cache_insert['index'] = $k;
$cache_insert['old_index'] = $k;
$cache_insert['ctime'] = time();
$this->production_article_refer_obj->insert($cache_insert);
}

View File

@@ -417,6 +417,12 @@ class References extends Base
if ($referencePart === '') {
return [];
}
// 与 Base::blueIntegerChange 一致:范围可用 ASCII -、en dash 、em dash —、minus
$referencePart = str_replace(
['', '', '—', '', '', ''],
[',', '-', '-', '-', '-', '-'],
$referencePart
);
$out = [];
$segments = preg_split('/\s*,\s*/', $referencePart);
foreach ($segments as $seg) {
@@ -424,6 +430,7 @@ class References extends Base
if ($seg === '') {
continue;
}
$seg = str_replace(['', '—', '', '', ''], '-', $seg);
if (preg_match('/^(\d+)\s*-\s*(\d+)$/', $seg, $m)) {
$a = intval($m[1]);
$b = intval($m[2]);
@@ -443,16 +450,11 @@ class References extends Base
}
/**
* 将正文 HTML 中的 <blue>[n]</blue>(及 [1,2]、[2-4] 等)替换为 <mycite data-id="p_refer_id"></mycite>
* 找不到对应参考文献时保留原 <blue>[…]</blue>,避免丢内容。
*
* @param string $content article_main.content 等 HTML 片段
* @param int $pArticleId t_production_article_refer.p_article_id
* 纯文本/HTML 片段:<blue>[n]</blue> → <mycite>(需传入已构建的 map
*/
public function rewriteMainContentCitationsToMycite(string $content, int $pArticleId)
private function applyBlueCitationsToMycite(string $content, array $map): string
{
$map = $this->buildCitationNumberToPReferIdMap($pArticleId);
if ($map === []) {
if ($map === [] || $content === '') {
return $content;
}
return preg_replace_callback(
@@ -462,9 +464,13 @@ class References extends Base
if ($inner === '') {
return $matches[0];
}
// 仅处理数字引用,避免误伤 [Fig 1] 等
$innerNorm = str_replace(['', '', '—'], [',', '-', '-'], $inner);
if (!preg_match('/^[\d\s,\-]+$/', $innerNorm)) {
$innerNorm = str_replace(
['', '', '—', '', '', ''],
[',', '-', '-', '-', '-', '-'],
$inner
);
$innerNorm = preg_replace('/\s+/u', ' ', trim($innerNorm));
if (!preg_match('/^[\d\s,\-]+$/u', $innerNorm)) {
return $matches[0];
}
@@ -479,7 +485,6 @@ class References extends Base
continue;
}
if (empty($map[$n])) {
// 有任意一个序号无法映射到 p_refer_id则保持原始片段不变避免丢引用信息
return $matches[0];
}
$ids[] = (string)intval($map[$n]);
@@ -493,6 +498,67 @@ class References extends Base
);
}
/**
* table_data 存 JSON递归替换各字符串字段内的引用非法 JSON 时退回整串替换。
* html_data 不在此处理(由业务单独维护)。
*/
private function rewriteTableDataJsonCitationsToMycite(string $tableDataJson, int $pArticleId): string
{
$tableDataJson = trim((string)$tableDataJson);
if ($tableDataJson === '') {
return '';
}
$map = $this->buildCitationNumberToPReferIdMap($pArticleId);
if ($map === []) {
return $tableDataJson;
}
$decoded = $this->decodeTableDataJsonToArray($tableDataJson);
if ($decoded === null) {
return $this->applyBlueCitationsToMycite($tableDataJson, $map);
}
$walked = $this->rewriteBlueCitationsInJsonNode($decoded, $map);
$flags = JSON_UNESCAPED_UNICODE;
if (defined('JSON_UNESCAPED_SLASHES')) {
$flags |= JSON_UNESCAPED_SLASHES;
}
return json_encode($walked, $flags);
}
/**
* @param mixed $node
* @return mixed
*/
private function rewriteBlueCitationsInJsonNode($node, array $map)
{
if (is_string($node)) {
return $this->applyBlueCitationsToMycite($node, $map);
}
if (is_array($node)) {
$out = [];
foreach ($node as $k => $v) {
$out[$k] = $this->rewriteBlueCitationsInJsonNode($v, $map);
}
return $out;
}
return $node;
}
/**
* 将正文 HTML 中的 <blue>[n]</blue>(及 [1,2]、[2-4] 等)替换为 <mycite data-id="p_refer_id"></mycite>
* 找不到对应参考文献时保留原 <blue>[…]</blue>,避免丢内容。
*
* @param string $content article_main.content 等 HTML 片段
* @param int $pArticleId t_production_article_refer.p_article_id
*/
public function rewriteMainContentCitationsToMycite(string $content, int $pArticleId)
{
$map = $this->buildCitationNumberToPReferIdMap($pArticleId);
if ($map === []) {
return $content;
}
return $this->applyBlueCitationsToMycite($content, $map);
}
/**
* 接口:将 content 中的 blue 引用替换为 mycite需传 p_article_id
*/
@@ -512,40 +578,51 @@ class References extends Base
return jsonError('production_article_id not found');
}
$pArticleId = $p_info['p_article_id'];
if (intval($main_info['type'] ?? 0) === 2) {
$amtId = intval($main_info['amt_id'] ?? 0);
if ($amtId <= 0) {
return jsonError('amt_id is empty for table main row');
}
$tbl = Db::name('article_main_table')
->where('amt_id', $amtId)
->where('article_id', $data['article_id'])
->where('state', 0)
->find();
if (empty($tbl)) {
return jsonError('article_main_table not found');
}
$out = [];
$td = (string)($tbl['table_data'] ?? '');
$out['table_data'] = $td === '' ? '' : $this->rewriteTableDataJsonCitationsToMycite($td, $pArticleId);
foreach (['title', 'note'] as $f) {
$raw = (string)($tbl[$f] ?? '');
$out[$f] = $raw === '' ? '' : $this->rewriteMainContentCitationsToMycite($raw, $pArticleId);
}
return jsonSuccess([
'target' => 'article_main_table',
'amt_id' => $amtId,
'fields' => $out,
]);
}
$content = $main_info['content'];
$out = $this->rewriteMainContentCitationsToMycite($content, $pArticleId);
return jsonSuccess(['content' => $out]);
}
/**
* 批量处理并回写 t_article_main.content
* 将正文中的 <blue>[n]</blue> / [1,2] / [2-4] 改写为 <mycite data-id="..."></mycite>
* 批量处理并回写:
* - type=0t_article_main.content
* - type=2t_article_main_tabletable_data 为 JSON 递归替换title/note 为纯文本;不修改 html_data
*
* 参数:
* - p_article_id (必填)production 侧文章ID
* - type (可选)默认 0仅文本 main传空则处理所有 type
* - article_id (必填)
* - type (可选)不传则处理正文+表格type in 0,2传具体数字则只处理该 type
* - dry_run (可选)1=只预览不落库
*/
public function convertArticleMainCitationsToMycite()
{
// $aParam = empty($aParam) ? $this->request->post() : $aParam;
// $pArticleId = intval($aParam['p_article_id'] ?? 0);
// if ($pArticleId <= 0) {
// return jsonError('p_article_id is required');
// }
//
// // 通过 production_article -> article_id确保是当前系统存在的文章
// $aArticle = $this->getArticle(['p_article_id' => $pArticleId]);
// $iStatus = empty($aArticle['status']) ? 0 : $aArticle['status'];
// if ($iStatus != 1) {
// return json_encode($aArticle);
// }
// $aArticle = empty($aArticle['data']) ? [] : $aArticle['data'];
// $articleId = intval($aArticle['article_id'] ?? 0);
// if ($articleId <= 0) {
// return jsonError('Article not found');
// }
$aParam = $this->request->post();
$rule = new Validate([
"article_id"=>"require"
@@ -554,18 +631,24 @@ class References extends Base
return jsonError($rule->getError());
}
$dryRun = intval($aParam['dry_run'] ?? 0) === 1;
$type = $aParam['type'] ?? 0;
$p_info = $this->production_article_obj->where('article_id', $aParam['article_id'])->where('state', 0)->find();
if (empty($p_info)) {
return jsonError('production_article not found');
}
$pArticleId = $p_info['p_article_id'];
$query = Db::name('article_main')
->where('article_id', $aParam['article_id'])
->whereIn('state', [0, 2])
->order('sort asc');
if ($type !== '' && $type !== null) {
$query->where('type', intval($type));
if (isset($aParam['type']) && $aParam['type'] !== '' && $aParam['type'] !== null) {
$query->where('type', intval($aParam['type']));
} else {
// 默认同时处理正文段落与表格占位行(避免原先默认 type=0 漏掉 type=2
$query->whereIn('type', [0, 2]);
}
$mains = $query->field('am_id,content,type,sort')->select();
$mains = $query->field('am_id,content,type,sort,amt_id')->select();
if (empty($mains)) {
return jsonError('article_main is empty');
}
@@ -577,6 +660,80 @@ class References extends Base
try {
foreach ($mains as $row) {
$amId = intval($row['am_id']);
$mainType = intval($row['type'] ?? 0);
if ($mainType === 2) {
$amtId = intval($row['amt_id'] ?? 0);
if ($amtId <= 0) {
continue;
}
$tbl = Db::name('article_main_table')
->where('amt_id', $amtId)
->where('article_id', $aParam['article_id'])
->where('state', 0)
->find();
if (empty($tbl)) {
continue;
}
$updateTbl = [];
$fieldPreview = [];
$oldTd = (string)($tbl['table_data'] ?? '');
if ($oldTd !== '') {
$newTd = $this->rewriteTableDataJsonCitationsToMycite($oldTd, $pArticleId);
if ($newTd !== $oldTd) {
$updateTbl['table_data'] = $newTd;
if (count($fieldPreview) < 4) {
$fieldPreview['table_data'] = [
'before' => $oldTd,
'after' => $newTd,
];
}
}
}
foreach (['title', 'note'] as $f) {
$old = (string)($tbl[$f] ?? '');
if ($old === '') {
continue;
}
$new = $this->rewriteMainContentCitationsToMycite($old, $pArticleId);
if ($new !== $old) {
$updateTbl[$f] = $new;
if (count($fieldPreview) < 4) {
$fieldPreview[$f] = [
'before' => $old,
'after' => $new,
];
}
}
}
if ($updateTbl === []) {
continue;
}
$changed++;
if (count($preview) < 3) {
$preview[] = [
'am_id' => $amId,
'amt_id' => $amtId,
'type' => 2,
'sort' => intval($row['sort'] ?? 0),
'target' => 'article_main_table',
'fields' => $fieldPreview,
];
}
if (!$dryRun) {
Db::name('article_main_table')
->where('amt_id', $amtId)
->limit(1)
->update($updateTbl);
}
continue;
}
// 正文等:写回 article_main.content
$old = (string)($row['content'] ?? '');
if ($old === '') {
continue;
@@ -589,11 +746,12 @@ class References extends Base
$changed++;
if (count($preview) < 3) {
$preview[] = [
'am_id' => $amId,
'type' => intval($row['type'] ?? 0),
'sort' => intval($row['sort'] ?? 0),
'before'=> $old,
'after' => $new,
'am_id' => $amId,
'type' => $mainType,
'sort' => intval($row['sort'] ?? 0),
'target' => 'article_main',
'before' => $old,
'after' => $new,
];
}
@@ -602,7 +760,7 @@ class References extends Base
->where('am_id', $amId)
->limit(1)
->update([
'content' => $new
'content' => $new,
]);
}
}
@@ -617,6 +775,8 @@ class References extends Base
return jsonError('convert failed: ' . $e->getMessage());
}
$this->markUnusedReferencesForArticle(intval($aParam['article_id']));
return jsonSuccess([
'article_id' => $aParam['article_id'],
'p_article_id' => $pArticleId,