This commit is contained in:
wangjinlei
2026-04-10 15:57:19 +08:00
parent b0f0d6461f
commit 717c662d81
4 changed files with 363 additions and 121 deletions

View File

@@ -1047,6 +1047,162 @@ class Base extends Controller
return $this->production_article_obj->insertGetId($insert);
}
public function markUnusedReferencesForArticle(int $articleId)
{
if ($articleId <= 0) return;
$production = Db::name('production_article')
->where('article_id', $articleId)
->where('state', 0)
->field('p_article_id')
->find();
$pArticleId = intval($production['p_article_id'] ?? 0);
if ($pArticleId <= 0) return;
// 1) 收集已使用的 p_refer_id与 convertArticleMainCitationsToMycite 一致:正文 + 表格 + 图)
$usedIds = [];
// t_article_maintype=0 等为正文type=2 时引用在 article_main_tablecontent 可能为空,仍扫描不增加成本
$mains = Db::name('article_main')
->where('article_id', $articleId)
->whereIn('state', [0, 2])
->field('content')
->select();
foreach ($mains as $row) {
$usedIds = array_merge($usedIds, $this->extractMyciteIds((string)($row['content'] ?? '')));
}
// t_article_main_tabletype=2 对应表格数据(含 url
$tables = Db::name('article_main_table')
->where('article_id', $articleId)
->where('state', 0)
->field('table_data,html_data,title,note,url')
->select();
foreach ($tables as $row) {
$usedIds = array_merge($usedIds, $this->extractMyciteIdsFromTableDataJson((string)($row['table_data'] ?? '')));
$usedIds = array_merge($usedIds, $this->extractMyciteIds((string)($row['html_data'] ?? '')));
$usedIds = array_merge($usedIds, $this->extractMyciteIds((string)($row['title'] ?? '')));
$usedIds = array_merge($usedIds, $this->extractMyciteIds((string)($row['note'] ?? '')));
$usedIds = array_merge($usedIds, $this->extractMyciteIds((string)($row['url'] ?? '')));
}
// t_article_main_imagetype=1 图注等
$images = Db::name('article_main_image')
->where('article_id', $articleId)
->where('state', 0)
->field('title,note,url')
->select();
foreach ($images as $row) {
$usedIds = array_merge($usedIds, $this->extractMyciteIds((string)($row['title'] ?? '')));
$usedIds = array_merge($usedIds, $this->extractMyciteIds((string)($row['note'] ?? '')));
$usedIds = array_merge($usedIds, $this->extractMyciteIds((string)($row['url'] ?? '')));
}
$usedIds = array_values(array_unique($usedIds));
// 2) 标记:先全置 0再把用到的置 1
try {
Db::name('production_article_refer')
->where('p_article_id', $pArticleId)
->where('state', 0)
->update(['is_used' => 0, 'update_time' => time()]);
if (!empty($usedIds)) {
Db::name('production_article_refer')
->where('p_article_id', $pArticleId)
->where('state', 0)
->whereIn('p_refer_id', $usedIds)
->update(['is_used' => 1, 'update_time' => time()]);
}
} catch (\Exception $e) {
// 工具方法:不影响主流程,忽略异常(可按需改为记录日志)
}
}
public function extractMyciteIds(string $text): array
{
if ($text === '') return [];
$ids = [];
if (preg_match_all('/<\s*mycite\b[^>]*\bdata-id\s*=\s*(["\'])(.*?)\1[^>]*>/iu', $text, $m)) {
foreach ($m[2] as $raw) {
$raw = trim((string)$raw);
if ($raw === '') continue;
$parts = preg_split('/\s*,\s*/', $raw);
foreach ($parts as $p) {
$p = trim((string)$p);
if ($p === '') continue;
$v = intval($p);
if ($v > 0) $ids[] = $v;
}
}
}
$ids = array_values(array_unique($ids));
sort($ids);
return $ids;
}
/**
* table_data二维数组 JSON [[{text,colspan,rowspan},...],...];支持双重 JSON 字符串编码。
*
* @return array|null
*/
protected function decodeTableDataJsonToArray(string $raw): ?array
{
$raw = trim($raw);
if ($raw === '') {
return null;
}
if (preg_match('/^\xEF\xBB\xBF/', $raw)) {
$raw = substr($raw, 3);
}
$decoded = json_decode($raw, true);
if (json_last_error() !== JSON_ERROR_NONE) {
return null;
}
if (is_array($decoded)) {
return $decoded;
}
if (is_string($decoded)) {
$decoded2 = json_decode($decoded, true);
if (json_last_error() === JSON_ERROR_NONE && is_array($decoded2)) {
return $decoded2;
}
}
return null;
}
/**
* table_data 为 JSON递归收集所有字符串里的 <mycite>;非合法 JSON 时按整串解析。
*
* @return int[]
*/
protected function extractMyciteIdsFromTableDataJson(string $tableDataJson): array
{
$tableDataJson = trim($tableDataJson);
if ($tableDataJson === '') {
return [];
}
$decoded = $this->decodeTableDataJsonToArray($tableDataJson);
if ($decoded === null) {
return $this->extractMyciteIds($tableDataJson);
}
$ids = [];
$walk = function ($node) use (&$walk, &$ids) {
if (is_string($node)) {
$ids = array_merge($ids, $this->extractMyciteIds($node));
return;
}
if (is_array($node)) {
foreach ($node as $v) {
$walk($v);
}
}
};
$walk($decoded);
$ids = array_values(array_unique($ids));
sort($ids);
return $ids;
}
}