tijiao
This commit is contained in:
@@ -52,7 +52,7 @@ const capitalizeFirstLetter = function (text) {
|
||||
//px
|
||||
function emuToPixels(emu) {
|
||||
// 将 EMU 转换为厘米,并进一步转换为像素
|
||||
const emuToPixels = emu * 96 / 914400;
|
||||
const emuToPixels = emu * 96 / 914400;
|
||||
// return parseFloat((emu * 96 / 914400).toFixed(2)); // ✅
|
||||
// 四舍五入并保留两位小数
|
||||
return (Math.round(emuToPixels * 100) / 100).toFixed(0);
|
||||
@@ -114,99 +114,7 @@ export default {
|
||||
|
||||
|
||||
|
||||
handleFileUpload(event, callback) {
|
||||
const file = event.target.files[0];
|
||||
if (!file || file.type !== 'application/vnd.openxmlformats-officedocument.wordprocessingml.document') {
|
||||
alert('Please upload a valid Word file !');
|
||||
return;
|
||||
}
|
||||
|
||||
const reader = new FileReader();
|
||||
|
||||
reader.onload = (e) => {
|
||||
const arrayBuffer = e.target.result;
|
||||
const zip = new JSZip();
|
||||
|
||||
zip.loadAsync(arrayBuffer).then(async (zip) => {
|
||||
const relsXml = await zip.files['word/_rels/document.xml.rels'].async('string');
|
||||
const docXml = await zip.files['word/document.xml'].async('string');
|
||||
|
||||
const parser = new DOMParser();
|
||||
const relDoc = parser.parseFromString(relsXml, "text/xml");
|
||||
const docDom = parser.parseFromString(docXml, "text/xml");
|
||||
|
||||
const rels = {};
|
||||
Array.from(relDoc.getElementsByTagName('Relationship')).forEach((rel) => {
|
||||
const id = rel.getAttribute('Id');
|
||||
const target = rel.getAttribute('Target');
|
||||
rels[id] = target;
|
||||
});
|
||||
|
||||
const imageInfoMap = {};
|
||||
const blips = docDom.getElementsByTagName('a:blip');
|
||||
|
||||
Array.from(blips).forEach((blip) => {
|
||||
const embedId = blip.getAttribute('r:embed');
|
||||
const extent = findExtentElement(blip);
|
||||
if (embedId && extent) {
|
||||
const cx = extent.getAttribute('cx');
|
||||
const cy = extent.getAttribute('cy');
|
||||
if (cx && cy) {
|
||||
const width = emuToPixels(cx);
|
||||
const height = emuToPixels(cy);
|
||||
imageInfoMap[embedId] = { width, height };
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
mammoth.convertToHtml({ arrayBuffer }, {
|
||||
convertImage: mammoth.images.inline(async function (image) {
|
||||
console.log('image at line 163:', image)
|
||||
const contentType = image.contentType.toLowerCase();
|
||||
const allowedTypes = ['image/jpeg', 'image/jpg', 'image/png'];
|
||||
if (!allowedTypes.includes(contentType)) {
|
||||
return { src: '' };
|
||||
}
|
||||
|
||||
const embedId = image.relationshipId || image.refId || '';
|
||||
const imageBuffer = await image.read("base64");
|
||||
const base64Src = `data:${contentType};base64,${imageBuffer}`;
|
||||
|
||||
let width = '', height = '';
|
||||
if (embedId && imageInfoMap[embedId]) {
|
||||
width = imageInfoMap[embedId].width;
|
||||
height = imageInfoMap[embedId].height;
|
||||
}
|
||||
|
||||
return {
|
||||
src: base64Src,
|
||||
alt: '',
|
||||
width,
|
||||
height,
|
||||
refId: embedId,
|
||||
'content-type': contentType
|
||||
};
|
||||
})
|
||||
}).then((result) => {
|
||||
let html = result.value;
|
||||
|
||||
// 提取合法表格
|
||||
const tableContent = html.match(/<table[\s\S]*?<\/table>/g);
|
||||
const validTables = tableContent
|
||||
? tableContent.filter(table => /<td[\s\S]*?>/.test(table))
|
||||
: [];
|
||||
|
||||
callback(validTables);
|
||||
}).catch(err => {
|
||||
console.error('mammoth 转换失败:', err);
|
||||
});
|
||||
}).catch(err => {
|
||||
console.error("Zip 读取失败:", err);
|
||||
});
|
||||
};
|
||||
|
||||
reader.readAsArrayBuffer(file);
|
||||
},
|
||||
|
||||
|
||||
|
||||
extractLatexFromMathJax() {
|
||||
@@ -350,7 +258,6 @@ export default {
|
||||
// 创建临时 DOM 容器
|
||||
const tempDiv = document.createElement("div");
|
||||
tempDiv.innerHTML = pastedHtml; // 插入粘贴的 HTML 内容
|
||||
|
||||
// 获取表格
|
||||
const tables = tempDiv.querySelectorAll("table");
|
||||
if (tables.length === 0) {
|
||||
@@ -358,31 +265,23 @@ export default {
|
||||
callback([]);
|
||||
return;
|
||||
}
|
||||
|
||||
const allTables = []; // 存储所有表格的二维数组
|
||||
|
||||
for (const table of tables) {
|
||||
const rows = table.querySelectorAll("tr");
|
||||
const tableArray = []; // 当前表格的二维数组
|
||||
|
||||
// 存储合并单元格信息
|
||||
const mergeMap = {};
|
||||
|
||||
rows.forEach((row, rowIndex) => {
|
||||
const cells = row.querySelectorAll("td, th");
|
||||
const rowArray = [];
|
||||
|
||||
let colIndex = 0;
|
||||
|
||||
cells.forEach((cell) => {
|
||||
// 跳过被合并的单元格
|
||||
while (mergeMap[`${rowIndex},${colIndex}`]) {
|
||||
colIndex++;
|
||||
}
|
||||
|
||||
// 获取单元格内容,如果为空则设置为默认值
|
||||
let cellText = cell.innerText.trim() || " "; // 处理空值
|
||||
|
||||
// 处理样式
|
||||
if (cell.style.fontWeight === "bold") {
|
||||
cellText = `<b>${cellText}</b>`;
|
||||
@@ -400,7 +299,6 @@ export default {
|
||||
// 检查合并单元格属性
|
||||
const colspan = parseInt(cell.getAttribute("colspan") || "1", 10);
|
||||
const rowspan = parseInt(cell.getAttribute("rowspan") || "1", 10);
|
||||
|
||||
// 保存当前单元格信息
|
||||
rowArray[colIndex] = {
|
||||
text: cellText,
|
||||
@@ -417,17 +315,12 @@ export default {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
colIndex++; // 移动到下一列
|
||||
});
|
||||
|
||||
tableArray.push(rowArray); // 添加当前行到表格数组
|
||||
});
|
||||
|
||||
allTables.push(tableArray); // 添加当前表格到所有表格数组
|
||||
}
|
||||
|
||||
|
||||
callback(allTables); // 返回处理后的数组
|
||||
} catch (error) {
|
||||
console.error("解析粘贴内容失败:", error);
|
||||
@@ -439,18 +332,14 @@ export default {
|
||||
try {
|
||||
const Zip = new JSZip();
|
||||
const zip = await Zip.loadAsync(file);
|
||||
|
||||
|
||||
const documentFile = zip.file("word/document.xml");
|
||||
if (!documentFile) {
|
||||
console.error("❌ 找不到 word/document.xml,无法解析 Word 文件");
|
||||
return;
|
||||
}
|
||||
|
||||
const documentXml = await documentFile.async("string");
|
||||
const parser = new DOMParser();
|
||||
const documentDoc = parser.parseFromString(documentXml, "application/xml");
|
||||
|
||||
const numberingFile = zip.file("word/numbering.xml");
|
||||
let numberingMap = {};
|
||||
if (numberingFile) {
|
||||
@@ -460,45 +349,34 @@ export default {
|
||||
} else {
|
||||
console.warn("⚠️ word/numbering.xml 不存在,跳过编号解析");
|
||||
}
|
||||
|
||||
const tables = documentDoc.getElementsByTagNameNS(namespace, "tbl");
|
||||
const allTables = [];
|
||||
|
||||
if (!tables || tables.length === 0) {
|
||||
console.warn("未找到表格内容,请检查 XML 结构");
|
||||
return [];
|
||||
}
|
||||
|
||||
for (const table of tables) {
|
||||
const rows = table.getElementsByTagNameNS(namespace, "tr");
|
||||
const tableArray = [];
|
||||
|
||||
let rowSpanMap = [];
|
||||
|
||||
for (let rowIndex = 0; rowIndex < rows.length; rowIndex++) {
|
||||
const row = rows[rowIndex];
|
||||
const cells = row.getElementsByTagNameNS(namespace, "tc");
|
||||
const rowArray = [];
|
||||
|
||||
if (!rowSpanMap[rowIndex]) {
|
||||
rowSpanMap[rowIndex] = [];
|
||||
}
|
||||
|
||||
let cellIndex = 0;
|
||||
|
||||
for (let i = 0; i < cells.length; i++) {
|
||||
while (rowSpanMap[rowIndex][cellIndex]) {
|
||||
rowArray.push(null);
|
||||
cellIndex++;
|
||||
}
|
||||
|
||||
const cell = cells[i];
|
||||
let cellText = "";
|
||||
const paragraphs = cell.getElementsByTagName("w:p");
|
||||
|
||||
const gridSpan = cell.getElementsByTagNameNS(namespace, "gridSpan")[0];
|
||||
const vMerge = cell.getElementsByTagNameNS(namespace, "vMerge")[0];
|
||||
|
||||
var colspan = gridSpan ? parseInt(gridSpan.getAttribute("w:val"), 10) : 1;
|
||||
var rowspan = 1;
|
||||
if (vMerge) {
|
||||
@@ -506,49 +384,39 @@ export default {
|
||||
rowspan = 1; // 初始化 rowspan
|
||||
let nextRowIdx = rowIndex + 1;
|
||||
let maxRowspan = rows.length - rowIndex; // 确保 rowspan 不会超过剩余行数
|
||||
|
||||
while (nextRowIdx < rows.length) {
|
||||
const nextRowCells = rows[nextRowIdx].getElementsByTagNameNS(namespace, "tc");
|
||||
// console.log(`🔍 检查下一行单元格 at row ${nextRowIdx}, col ${cellIndex}:`, nextRowCells);
|
||||
|
||||
if (nextRowCells.length > cellIndex) {
|
||||
const nextCell = nextRowCells[cellIndex];
|
||||
|
||||
if (!nextCell) {
|
||||
// console.warn(`⚠️ nextCell 未定义 at row ${nextRowIdx}, col ${cellIndex}`);
|
||||
break;
|
||||
}
|
||||
|
||||
const nextVMerge = nextCell.getElementsByTagNameNS(namespace, "vMerge")[0];
|
||||
// console.log(`🔍 检查 nextVMerge at row ${nextRowIdx}, col ${cellIndex}:`, nextVMerge);
|
||||
|
||||
// **如果 nextVMerge 为空,则不应继续增长 rowspan**
|
||||
if (!nextVMerge) {
|
||||
// console.log(`⚠️ nextVMerge 为空 at row ${nextRowIdx}, col ${cellIndex} - 停止扩展`);
|
||||
break;
|
||||
}
|
||||
|
||||
// **解析 nextVMerge 的值**
|
||||
const vMergeVal = nextVMerge.getAttribute("w:val");
|
||||
|
||||
if (!vMergeVal || vMergeVal === "continue") {
|
||||
if (rowspan < maxRowspan) { // 限制 rowspan 最大值
|
||||
rowspan++;
|
||||
// console.log(`✅ rowspan 扩展到: ${rowspan} (row: ${nextRowIdx}, col: ${cellIndex})`);
|
||||
|
||||
nextRowIdx++;
|
||||
} else {
|
||||
// console.log(`⛔ 最大 rowspan 限制 ${rowspan},在 row ${nextRowIdx} 停止`);
|
||||
|
||||
break;
|
||||
}
|
||||
} else if (vMergeVal === "restart") {
|
||||
// console.log(`⛔ 停止 rowspan 扩展 at row ${nextRowIdx}, 因为 w:val="restart"`);
|
||||
break;
|
||||
} else {
|
||||
// console.log(`⚠️ 未知 w:val="${vMergeVal}" at row ${nextRowIdx},停止合并`);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// console.warn(`⚠️ Row ${nextRowIdx} 没有足够的列 cellIndex ${cellIndex}`);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -556,8 +424,6 @@ export default {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
const currentLevelNumbers = {};
|
||||
for (const paragraph of paragraphs) {
|
||||
let listPrefix = "";
|
||||
@@ -571,7 +437,6 @@ export default {
|
||||
listPrefix = this.getListNumber(numId, ilvl, numberingMap, currentLevelNumbers);
|
||||
}
|
||||
}
|
||||
|
||||
let paragraphText = listPrefix ? `${listPrefix} ` : "";
|
||||
|
||||
const runs = paragraph.getElementsByTagName("w:r");
|
||||
@@ -580,7 +445,6 @@ export default {
|
||||
const drawings = run.getElementsByTagName("w:drawing");
|
||||
for (let d = 0; d < drawings.length; d++) {
|
||||
const drawing = drawings[d];
|
||||
|
||||
// 使用命名空间提取 a:blip
|
||||
const blips = drawing.getElementsByTagNameNS("http://schemas.openxmlformats.org/drawingml/2006/main", "blip");
|
||||
for (let b = 0; b < blips.length; b++) {
|
||||
@@ -592,24 +456,16 @@ export default {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
const texts = run.getElementsByTagName("w:t");
|
||||
for (const text of texts) {
|
||||
textContent += text.textContent;
|
||||
}
|
||||
|
||||
const rPr = run.getElementsByTagName("w:rPr")[0];
|
||||
let formattedText = textContent;
|
||||
|
||||
if (rPr) {
|
||||
const bold = rPr.getElementsByTagName("w:b").length > 0;
|
||||
const italic = rPr.getElementsByTagName("w:i").length > 0;
|
||||
const vertAlignElement = rPr.getElementsByTagName("w:vertAlign")[0];
|
||||
|
||||
if (bold) {
|
||||
formattedText = `<b>${formattedText}</b>`;
|
||||
}
|
||||
@@ -625,42 +481,32 @@ export default {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
formattedText = replaceNegativeSign(formattedText);
|
||||
formattedText = capitalizeFirstLetter(formattedText);
|
||||
|
||||
const regex = /\[(\d+(?:–\d+)?(?:, ?\d+(?:–\d+)?)*)\]/g;
|
||||
formattedText = formattedText.replace(/<blue>/g, '').replace(/<\/blue>/g, '');
|
||||
|
||||
if (regex.test(formattedText)) {
|
||||
formattedText = formattedText.replace(regex, function (match) {
|
||||
const content = match.slice(1, match.length - 1);
|
||||
|
||||
if (/^\d+$/.test(content) || /, ?/.test(content) || /–/.test(content)) {
|
||||
return `<blue>${match}</blue>`;
|
||||
}
|
||||
return match;
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
paragraphText += formattedText;
|
||||
}
|
||||
|
||||
const breaks = paragraph.getElementsByTagName("w:br");
|
||||
for (const br of breaks) {
|
||||
paragraphText += "<br/>";
|
||||
}
|
||||
|
||||
cellText += paragraphText;
|
||||
}
|
||||
|
||||
rowArray.push({
|
||||
text: cellText,
|
||||
colspan: colspan,
|
||||
rowspan: rowspan
|
||||
});
|
||||
|
||||
if (rowspan > 1) {
|
||||
for (let j = 1; j < rowspan; j++) {
|
||||
if (!rowSpanMap[rowIndex + j]) {
|
||||
@@ -669,25 +515,102 @@ export default {
|
||||
rowSpanMap[rowIndex + j][cellIndex] = true;
|
||||
}
|
||||
}
|
||||
|
||||
cellIndex++;
|
||||
}
|
||||
|
||||
tableArray.push(rowArray.filter(item => item !== null));
|
||||
|
||||
}
|
||||
|
||||
allTables.push(tableArray);
|
||||
}
|
||||
|
||||
callback(allTables);
|
||||
|
||||
} catch (error) {
|
||||
console.error("解析 Word 文件失败:", error);
|
||||
callback([]);
|
||||
}
|
||||
},
|
||||
handleFileUpload(event, callback) {
|
||||
const file = event.target.files[0];
|
||||
if (!file || file.type !== 'application/vnd.openxmlformats-officedocument.wordprocessingml.document') {
|
||||
alert('Please upload a valid Word file !');
|
||||
return;
|
||||
}
|
||||
const reader = new FileReader();
|
||||
reader.onload = (e) => {
|
||||
const arrayBuffer = e.target.result;
|
||||
const zip = new JSZip();
|
||||
zip.loadAsync(arrayBuffer).then(async (zip) => {
|
||||
const relsXml = await zip.files['word/_rels/document.xml.rels'].async('string');
|
||||
const docXml = await zip.files['word/document.xml'].async('string');
|
||||
const parser = new DOMParser();
|
||||
const relDoc = parser.parseFromString(relsXml, "text/xml");
|
||||
const docDom = parser.parseFromString(docXml, "text/xml");
|
||||
const rels = {};
|
||||
Array.from(relDoc.getElementsByTagName('Relationship')).forEach((rel) => {
|
||||
const id = rel.getAttribute('Id');
|
||||
const target = rel.getAttribute('Target');
|
||||
rels[id] = target;
|
||||
});
|
||||
|
||||
const imageInfoMap = {};
|
||||
const blips = docDom.getElementsByTagName('a:blip');
|
||||
|
||||
Array.from(blips).forEach((blip) => {
|
||||
const embedId = blip.getAttribute('r:embed');
|
||||
const extent = findExtentElement(blip);
|
||||
if (embedId && extent) {
|
||||
const cx = extent.getAttribute('cx');
|
||||
const cy = extent.getAttribute('cy');
|
||||
if (cx && cy) {
|
||||
const width = emuToPixels(cx);
|
||||
const height = emuToPixels(cy);
|
||||
imageInfoMap[embedId] = { width, height };
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
mammoth.convertToHtml({ arrayBuffer }, {
|
||||
convertImage: mammoth.images.inline(async function (image) {
|
||||
console.log('image at line 163:', image)
|
||||
const contentType = image.contentType.toLowerCase();
|
||||
const allowedTypes = ['image/jpeg', 'image/jpg', 'image/png'];
|
||||
if (!allowedTypes.includes(contentType)) {
|
||||
return { src: '' };
|
||||
}
|
||||
const embedId = image.relationshipId || image.refId || '';
|
||||
const imageBuffer = await image.read("base64");
|
||||
const base64Src = `data:${contentType};base64,${imageBuffer}`;
|
||||
let width = '', height = '';
|
||||
if (embedId && imageInfoMap[embedId]) {
|
||||
width = imageInfoMap[embedId].width;
|
||||
height = imageInfoMap[embedId].height;
|
||||
}
|
||||
return {
|
||||
src: base64Src,
|
||||
alt: '',
|
||||
width,
|
||||
height,
|
||||
refId: embedId,
|
||||
'content-type': contentType
|
||||
};
|
||||
})
|
||||
}).then((result) => {
|
||||
let html = result.value;
|
||||
// 提取合法表格
|
||||
const tableContent = html.match(/<table[\s\S]*?<\/table>/g);
|
||||
const validTables = tableContent
|
||||
? tableContent.filter(table => /<td[\s\S]*?>/.test(table))
|
||||
: [];
|
||||
|
||||
callback(validTables);
|
||||
}).catch(err => {
|
||||
console.error('mammoth 转换失败:', err);
|
||||
});
|
||||
}).catch(err => {
|
||||
console.error("Zip 读取失败:", err);
|
||||
});
|
||||
};
|
||||
|
||||
reader.readAsArrayBuffer(file);
|
||||
},
|
||||
// async extractWordTablesToArrays(file, callback) {
|
||||
|
||||
// const namespace = "http://schemas.openxmlformats.org/wordprocessingml/2006/main";
|
||||
|
||||
Reference in New Issue
Block a user