This commit is contained in:
2025-10-27 14:48:16 +08:00
parent db00f22155
commit 5d4339905e
6 changed files with 738 additions and 544 deletions

View File

@@ -52,7 +52,7 @@ const capitalizeFirstLetter = function (text) {
//px
function emuToPixels(emu) {
// 将 EMU 转换为厘米,并进一步转换为像素
const emuToPixels = emu * 96 / 914400;
const emuToPixels = emu * 96 / 914400;
// return parseFloat((emu * 96 / 914400).toFixed(2)); // ✅
// 四舍五入并保留两位小数
return (Math.round(emuToPixels * 100) / 100).toFixed(0);
@@ -114,99 +114,7 @@ export default {
handleFileUpload(event, callback) {
const file = event.target.files[0];
if (!file || file.type !== 'application/vnd.openxmlformats-officedocument.wordprocessingml.document') {
alert('Please upload a valid Word file !');
return;
}
const reader = new FileReader();
reader.onload = (e) => {
const arrayBuffer = e.target.result;
const zip = new JSZip();
zip.loadAsync(arrayBuffer).then(async (zip) => {
const relsXml = await zip.files['word/_rels/document.xml.rels'].async('string');
const docXml = await zip.files['word/document.xml'].async('string');
const parser = new DOMParser();
const relDoc = parser.parseFromString(relsXml, "text/xml");
const docDom = parser.parseFromString(docXml, "text/xml");
const rels = {};
Array.from(relDoc.getElementsByTagName('Relationship')).forEach((rel) => {
const id = rel.getAttribute('Id');
const target = rel.getAttribute('Target');
rels[id] = target;
});
const imageInfoMap = {};
const blips = docDom.getElementsByTagName('a:blip');
Array.from(blips).forEach((blip) => {
const embedId = blip.getAttribute('r:embed');
const extent = findExtentElement(blip);
if (embedId && extent) {
const cx = extent.getAttribute('cx');
const cy = extent.getAttribute('cy');
if (cx && cy) {
const width = emuToPixels(cx);
const height = emuToPixels(cy);
imageInfoMap[embedId] = { width, height };
}
}
});
mammoth.convertToHtml({ arrayBuffer }, {
convertImage: mammoth.images.inline(async function (image) {
console.log('image at line 163:', image)
const contentType = image.contentType.toLowerCase();
const allowedTypes = ['image/jpeg', 'image/jpg', 'image/png'];
if (!allowedTypes.includes(contentType)) {
return { src: '' };
}
const embedId = image.relationshipId || image.refId || '';
const imageBuffer = await image.read("base64");
const base64Src = `data:${contentType};base64,${imageBuffer}`;
let width = '', height = '';
if (embedId && imageInfoMap[embedId]) {
width = imageInfoMap[embedId].width;
height = imageInfoMap[embedId].height;
}
return {
src: base64Src,
alt: '',
width,
height,
refId: embedId,
'content-type': contentType
};
})
}).then((result) => {
let html = result.value;
// 提取合法表格
const tableContent = html.match(/<table[\s\S]*?<\/table>/g);
const validTables = tableContent
? tableContent.filter(table => /<td[\s\S]*?>/.test(table))
: [];
callback(validTables);
}).catch(err => {
console.error('mammoth 转换失败:', err);
});
}).catch(err => {
console.error("Zip 读取失败:", err);
});
};
reader.readAsArrayBuffer(file);
},
extractLatexFromMathJax() {
@@ -350,7 +258,6 @@ export default {
// 创建临时 DOM 容器
const tempDiv = document.createElement("div");
tempDiv.innerHTML = pastedHtml; // 插入粘贴的 HTML 内容
// 获取表格
const tables = tempDiv.querySelectorAll("table");
if (tables.length === 0) {
@@ -358,31 +265,23 @@ export default {
callback([]);
return;
}
const allTables = []; // 存储所有表格的二维数组
for (const table of tables) {
const rows = table.querySelectorAll("tr");
const tableArray = []; // 当前表格的二维数组
// 存储合并单元格信息
const mergeMap = {};
rows.forEach((row, rowIndex) => {
const cells = row.querySelectorAll("td, th");
const rowArray = [];
let colIndex = 0;
cells.forEach((cell) => {
// 跳过被合并的单元格
while (mergeMap[`${rowIndex},${colIndex}`]) {
colIndex++;
}
// 获取单元格内容,如果为空则设置为默认值
let cellText = cell.innerText.trim() || "&nbsp;"; // 处理空值
// 处理样式
if (cell.style.fontWeight === "bold") {
cellText = `<b>${cellText}</b>`;
@@ -400,7 +299,6 @@ export default {
// 检查合并单元格属性
const colspan = parseInt(cell.getAttribute("colspan") || "1", 10);
const rowspan = parseInt(cell.getAttribute("rowspan") || "1", 10);
// 保存当前单元格信息
rowArray[colIndex] = {
text: cellText,
@@ -417,17 +315,12 @@ export default {
}
}
}
colIndex++; // 移动到下一列
});
tableArray.push(rowArray); // 添加当前行到表格数组
});
allTables.push(tableArray); // 添加当前表格到所有表格数组
}
callback(allTables); // 返回处理后的数组
} catch (error) {
console.error("解析粘贴内容失败:", error);
@@ -439,18 +332,14 @@ export default {
try {
const Zip = new JSZip();
const zip = await Zip.loadAsync(file);
const documentFile = zip.file("word/document.xml");
if (!documentFile) {
console.error("❌ 找不到 word/document.xml无法解析 Word 文件");
return;
}
const documentXml = await documentFile.async("string");
const parser = new DOMParser();
const documentDoc = parser.parseFromString(documentXml, "application/xml");
const numberingFile = zip.file("word/numbering.xml");
let numberingMap = {};
if (numberingFile) {
@@ -460,45 +349,34 @@ export default {
} else {
console.warn("⚠️ word/numbering.xml 不存在,跳过编号解析");
}
const tables = documentDoc.getElementsByTagNameNS(namespace, "tbl");
const allTables = [];
if (!tables || tables.length === 0) {
console.warn("未找到表格内容,请检查 XML 结构");
return [];
}
for (const table of tables) {
const rows = table.getElementsByTagNameNS(namespace, "tr");
const tableArray = [];
let rowSpanMap = [];
for (let rowIndex = 0; rowIndex < rows.length; rowIndex++) {
const row = rows[rowIndex];
const cells = row.getElementsByTagNameNS(namespace, "tc");
const rowArray = [];
if (!rowSpanMap[rowIndex]) {
rowSpanMap[rowIndex] = [];
}
let cellIndex = 0;
for (let i = 0; i < cells.length; i++) {
while (rowSpanMap[rowIndex][cellIndex]) {
rowArray.push(null);
cellIndex++;
}
const cell = cells[i];
let cellText = "";
const paragraphs = cell.getElementsByTagName("w:p");
const gridSpan = cell.getElementsByTagNameNS(namespace, "gridSpan")[0];
const vMerge = cell.getElementsByTagNameNS(namespace, "vMerge")[0];
var colspan = gridSpan ? parseInt(gridSpan.getAttribute("w:val"), 10) : 1;
var rowspan = 1;
if (vMerge) {
@@ -506,49 +384,39 @@ export default {
rowspan = 1; // 初始化 rowspan
let nextRowIdx = rowIndex + 1;
let maxRowspan = rows.length - rowIndex; // 确保 rowspan 不会超过剩余行数
while (nextRowIdx < rows.length) {
const nextRowCells = rows[nextRowIdx].getElementsByTagNameNS(namespace, "tc");
// console.log(`🔍 检查下一行单元格 at row ${nextRowIdx}, col ${cellIndex}:`, nextRowCells);
if (nextRowCells.length > cellIndex) {
const nextCell = nextRowCells[cellIndex];
if (!nextCell) {
// console.warn(`⚠️ nextCell 未定义 at row ${nextRowIdx}, col ${cellIndex}`);
break;
}
const nextVMerge = nextCell.getElementsByTagNameNS(namespace, "vMerge")[0];
// console.log(`🔍 检查 nextVMerge at row ${nextRowIdx}, col ${cellIndex}:`, nextVMerge);
// **如果 nextVMerge 为空,则不应继续增长 rowspan**
if (!nextVMerge) {
// console.log(`⚠️ nextVMerge 为空 at row ${nextRowIdx}, col ${cellIndex} - 停止扩展`);
break;
}
// **解析 nextVMerge 的值**
const vMergeVal = nextVMerge.getAttribute("w:val");
if (!vMergeVal || vMergeVal === "continue") {
if (rowspan < maxRowspan) { // 限制 rowspan 最大值
rowspan++;
// console.log(`✅ rowspan 扩展到: ${rowspan} (row: ${nextRowIdx}, col: ${cellIndex})`);
nextRowIdx++;
} else {
// console.log(`⛔ 最大 rowspan 限制 ${rowspan},在 row ${nextRowIdx} 停止`);
break;
}
} else if (vMergeVal === "restart") {
// console.log(`⛔ 停止 rowspan 扩展 at row ${nextRowIdx}, 因为 w:val="restart"`);
break;
} else {
// console.log(`⚠️ 未知 w:val="${vMergeVal}" at row ${nextRowIdx},停止合并`);
break;
}
} else {
// console.warn(`⚠️ Row ${nextRowIdx} 没有足够的列 cellIndex ${cellIndex}`);
break;
}
}
@@ -556,8 +424,6 @@ export default {
continue;
}
}
const currentLevelNumbers = {};
for (const paragraph of paragraphs) {
let listPrefix = "";
@@ -571,7 +437,6 @@ export default {
listPrefix = this.getListNumber(numId, ilvl, numberingMap, currentLevelNumbers);
}
}
let paragraphText = listPrefix ? `${listPrefix} ` : "";
const runs = paragraph.getElementsByTagName("w:r");
@@ -580,7 +445,6 @@ export default {
const drawings = run.getElementsByTagName("w:drawing");
for (let d = 0; d < drawings.length; d++) {
const drawing = drawings[d];
// 使用命名空间提取 a:blip
const blips = drawing.getElementsByTagNameNS("http://schemas.openxmlformats.org/drawingml/2006/main", "blip");
for (let b = 0; b < blips.length; b++) {
@@ -592,24 +456,16 @@ export default {
}
}
}
const texts = run.getElementsByTagName("w:t");
for (const text of texts) {
textContent += text.textContent;
}
const rPr = run.getElementsByTagName("w:rPr")[0];
let formattedText = textContent;
if (rPr) {
const bold = rPr.getElementsByTagName("w:b").length > 0;
const italic = rPr.getElementsByTagName("w:i").length > 0;
const vertAlignElement = rPr.getElementsByTagName("w:vertAlign")[0];
if (bold) {
formattedText = `<b>${formattedText}</b>`;
}
@@ -625,42 +481,32 @@ export default {
}
}
}
formattedText = replaceNegativeSign(formattedText);
formattedText = capitalizeFirstLetter(formattedText);
const regex = /\[(\d+(?:\d+)?(?:, ?\d+(?:\d+)?)*)\]/g;
formattedText = formattedText.replace(/<blue>/g, '').replace(/<\/blue>/g, '');
if (regex.test(formattedText)) {
formattedText = formattedText.replace(regex, function (match) {
const content = match.slice(1, match.length - 1);
if (/^\d+$/.test(content) || /, ?/.test(content) || //.test(content)) {
return `<blue>${match}</blue>`;
}
return match;
});
}
paragraphText += formattedText;
}
const breaks = paragraph.getElementsByTagName("w:br");
for (const br of breaks) {
paragraphText += "<br/>";
}
cellText += paragraphText;
}
rowArray.push({
text: cellText,
colspan: colspan,
rowspan: rowspan
});
if (rowspan > 1) {
for (let j = 1; j < rowspan; j++) {
if (!rowSpanMap[rowIndex + j]) {
@@ -669,25 +515,102 @@ export default {
rowSpanMap[rowIndex + j][cellIndex] = true;
}
}
cellIndex++;
}
tableArray.push(rowArray.filter(item => item !== null));
}
allTables.push(tableArray);
}
callback(allTables);
} catch (error) {
console.error("解析 Word 文件失败:", error);
callback([]);
}
},
handleFileUpload(event, callback) {
const file = event.target.files[0];
if (!file || file.type !== 'application/vnd.openxmlformats-officedocument.wordprocessingml.document') {
alert('Please upload a valid Word file !');
return;
}
const reader = new FileReader();
reader.onload = (e) => {
const arrayBuffer = e.target.result;
const zip = new JSZip();
zip.loadAsync(arrayBuffer).then(async (zip) => {
const relsXml = await zip.files['word/_rels/document.xml.rels'].async('string');
const docXml = await zip.files['word/document.xml'].async('string');
const parser = new DOMParser();
const relDoc = parser.parseFromString(relsXml, "text/xml");
const docDom = parser.parseFromString(docXml, "text/xml");
const rels = {};
Array.from(relDoc.getElementsByTagName('Relationship')).forEach((rel) => {
const id = rel.getAttribute('Id');
const target = rel.getAttribute('Target');
rels[id] = target;
});
const imageInfoMap = {};
const blips = docDom.getElementsByTagName('a:blip');
Array.from(blips).forEach((blip) => {
const embedId = blip.getAttribute('r:embed');
const extent = findExtentElement(blip);
if (embedId && extent) {
const cx = extent.getAttribute('cx');
const cy = extent.getAttribute('cy');
if (cx && cy) {
const width = emuToPixels(cx);
const height = emuToPixels(cy);
imageInfoMap[embedId] = { width, height };
}
}
});
mammoth.convertToHtml({ arrayBuffer }, {
convertImage: mammoth.images.inline(async function (image) {
console.log('image at line 163:', image)
const contentType = image.contentType.toLowerCase();
const allowedTypes = ['image/jpeg', 'image/jpg', 'image/png'];
if (!allowedTypes.includes(contentType)) {
return { src: '' };
}
const embedId = image.relationshipId || image.refId || '';
const imageBuffer = await image.read("base64");
const base64Src = `data:${contentType};base64,${imageBuffer}`;
let width = '', height = '';
if (embedId && imageInfoMap[embedId]) {
width = imageInfoMap[embedId].width;
height = imageInfoMap[embedId].height;
}
return {
src: base64Src,
alt: '',
width,
height,
refId: embedId,
'content-type': contentType
};
})
}).then((result) => {
let html = result.value;
// 提取合法表格
const tableContent = html.match(/<table[\s\S]*?<\/table>/g);
const validTables = tableContent
? tableContent.filter(table => /<td[\s\S]*?>/.test(table))
: [];
callback(validTables);
}).catch(err => {
console.error('mammoth 转换失败:', err);
});
}).catch(err => {
console.error("Zip 读取失败:", err);
});
};
reader.readAsArrayBuffer(file);
},
// async extractWordTablesToArrays(file, callback) {
// const namespace = "http://schemas.openxmlformats.org/wordprocessingml/2006/main";