tijiao

2025-05-20 09:55:12 +08:00
parent 4e8e8e796f
commit c60d06194c
22 changed files with 8978 additions and 321 deletions
--- a/src/common/js/commonJS.js
+++ b/src/common/js/commonJS.js
@@ -1,24 +1,191 @@
 import Vue from 'vue';
 import katex from 'katex';
 import JSZip from 'jszip';
+import mammoth from "mammoth";
 import Common from '@/components/common/common'
 import Tiff from 'tiff.js';
 var mediaUrl = Common.mediaUrl + 'articleImage/';
 // var mediaUrl1 = 'https://submission.tmrjournals.com/public/articleImage/';

 const fs = require('fs');
-// 替换负号的方法
+// 替换数字前面的负号的方法
+// const replaceNegativeSign = function (text) {
+//     return text.replace(/[-−]/g, "–");
+
+// };
 const replaceNegativeSign = function (text) {
-    return text.replace(/^-(?=\d)/, "−");
+    // 1. 替换范围中的负号（`[13-22]` 或 `[13−22]`）
+    text = text.replace(/(\[\d+)[-−](\d+\])/g, '$1–$2');
+
+    // 2. 替换单独负号（`-22` 或 `−22`）
+    text = text.replace(/[-−](?=\d)/g, "–");
+
+    return text;
 };

+
 // 首字母大写的方法
+// const capitalizeFirstLetter = function (text) {
+//     return text.replace(/^\s*([a-zA-Z])/, function (match, firstLetter) {
+//         return firstLetter.toUpperCase();
+//     });
+// };
 const capitalizeFirstLetter = function (text) {
-    return text.replace(/^\s*([a-zA-Z])/, function (match, firstLetter) {
-        return firstLetter.toUpperCase();
-    });
+    // const words = text.split(' '); // 分割字符串为单词数组
+    // if (words.length > 0 && words[0].charAt(0).match(/[a-zA-Z]/)) {
+    //     // 如果第一个字符是字母，则将第一个单词的首字母大写
+    //     words[0] = words[0].charAt(0).toUpperCase() + words[0].slice(1); 
+    // }
+    return text; // 将处理后的单词数组连接成字符串
 };
+
+
+//cm
+// function emuToPixels(emu) {
+//     const emuToCm = emu / 914400 * 2.54;
+//     // 四舍五入并保留两位小数
+//     return (Math.round(emuToCm * 100) / 100).toFixed(2);
+
+// }
+//px
+function emuToPixels(emu) {
+    // 将 EMU 转换为厘米，并进一步转换为像素
+    const emuToPixels = emu / 914400 * 2.54 * 96;
+    // 四舍五入并保留两位小数
+    return (Math.round(emuToPixels * 100) / 100).toFixed(2);
+}
+
 export default {
+    isImageValid(base64) {
+        return new Promise((resolve, reject) => {
+            // 创建 Image 对象
+            const img = new Image();
+
+            // 设置图片的 src 属性
+            img.src = base64;
+
+            // 图片加载成功
+            img.onload = function () {
+                resolve(true); // 图片有效
+            };
+
+            // 图片加载失败
+            img.onerror = function () {
+                resolve(false); // 图片无效
+            };
+        });
+    },
+
+
+
+    handleFileUpload(event, callback) {
+        const file = event.target.files[0]; // 获取用户上传的文件
+        if (file && file.type === 'application/vnd.openxmlformats-officedocument.wordprocessingml.document') {
+            const reader = new FileReader();
+
+            reader.onload = (e) => {
+                // 将文件读取为 ArrayBuffer
+                const arrayBuffer = e.target.result;
+
+                // 使用 JSZip 解析 .docx 文件内容
+                const zip = new JSZip();
+                zip.loadAsync(arrayBuffer).then((zip) => {
+                    // 获取 Word 文档的 XML 内容
+                    const documentXml = zip.files['word/document.xml'];
+                    const images = {}; // 用来保存图片的 src 和对应的宽高
+                    if (documentXml) {
+                        documentXml.async("string").then((xmlString) => {
+                            // 使用正则或 XML 解析器提取所有图片的宽高
+                            const parser = new DOMParser();
+                            const xmlDoc = parser.parseFromString(xmlString, "text/xml");
+
+                            // 查找图片的宽高（通常在 <wp:extent> 标签内）
+                            const imageElements = xmlDoc.getElementsByTagName('wp:extent');
+                            Array.from(imageElements).forEach((imgElement, index) => {
+                                console.log('imgElement at line 78:', imgElement)
+                                // 提取 width 和 height 属性
+                                const widthEmu = imgElement.getAttribute('cx'); // 宽度
+                                const heightEmu = imgElement.getAttribute('cy'); // 高度
+
+                                if (widthEmu && heightEmu) {
+                                    // 转换为像素
+                                    const width = emuToPixels(widthEmu);
+                                    const height = emuToPixels(heightEmu);
+                                    images[index] = { width, height }; // 保存图片的宽高
+                                }
+                                // 这里你可以将宽高传递给回调函数
+                            });
+                        });
+                    }
+
+                    mammoth.convertToHtml({
+                        arrayBuffer: e.target.result
+                    })
+                        .then((result) => {
+
+                            console.log('images at line 115:', images)
+                            // 使用正则提取所有表格内容
+                            const tableContent = result.value.match(/<table[\s\S]*?<\/table>/g);
+                            if (tableContent) {
+                                console.log('tableContent at line 20:', tableContent);
+                                // 筛选出包含 <td> 的表格
+                                const validTables = tableContent.filter(table => /<td[\s\S]*?>/g.test(table));
+                                console.log('validTables at line 71:', validTables);
+
+                                // 提取表格内的图片
+                                validTables.forEach((table, index) => {
+                                    console.log('table at line 75:', table)
+                                    const imgTags = table.match(/<img[\s\S]*?src="([^"]*)"/g);
+                                    console.log('imgTags at line 128:', imgTags)
+                                    if (imgTags) {
+                                        // 遍历所有图片标签
+                                        imgTags.forEach((imgTag, imgIndex) => {
+                                            console.log('imgTag at line 128:', imgTag);
+
+                                            const srcMatch = imgTag.match(/src="([^"]*)"/);
+                                            if (srcMatch) {
+
+                                                const imageInfo = images[imgIndex]; // 从 images 中查找对应的宽高
+
+                                                if (imageInfo) {
+                                                    // 构建新的 <img> 标签，保留原来的其他属性
+                                                    const newImgTag = imgTag.replace(
+                                                        /<img/,
+                                                        `<img width="${imageInfo.width}" height="${imageInfo.height}"`
+                                                    );
+                                                    table = table.replace(imgTag, newImgTag); // 替换旧标签
+                                                }
+                                            }
+                                        });
+                                    }
+                                    validTables[index] = table; // 更新该表格内容
+
+                                });
+
+                                if (validTables.length > 0) {
+                                    console.log('validTables.length at line 147:', validTables)
+                                    callback(validTables);
+                                } else {
+                                    callback([]);
+                                }
+                            } else {
+                                console.log("没有找到表格内容。");
+                            }
+                        })
+                        .catch((err) => {
+                            console.error('Error parsing Word file:', err);
+                        });;
+                }).catch((err) => {
+                    console.error("Error loading zip file:", err);
+                });
+            };
+
+            reader.readAsArrayBuffer(file); // 将文件读取为 ArrayBuffer
+        } else {
+            alert('请上传一个有效的 Word 文件');
+        }
+    },
+
    extractLatexFromMathJax() {
        // 获取所有 MathJax 渲染的公式容器
        const mathContainers = document.querySelectorAll('mjx-container');
@@ -120,7 +287,7 @@ export default {

        console.log('htmlContent at line 94:', htmlContent)

-        str = this.transformHtmlString(htmlContent)
+        str = this.transformHtmlString(htmlContent, 'table')

        // 创建一个临时的 DOM 元素来解析 HTML
        const div = document.createElement('div');
@@ -399,6 +566,26 @@ export default {
                            const runs = paragraph.getElementsByTagName("w:r");
                            for (const run of runs) {
                                let textContent = "";
+                                const drawings = run.getElementsByTagName("w:drawing");
+                                for (let d = 0; d < drawings.length; d++) {
+                                    const drawing = drawings[d];
+
+                                    // 使用命名空间提取 a:blip
+                                    const blips = drawing.getElementsByTagNameNS("http://schemas.openxmlformats.org/drawingml/2006/main", "blip");
+                                    for (let b = 0; b < blips.length; b++) {
+                                        const blip = blips[b];
+                                        const embedId = blip.getAttribute("r:embed");
+                                        if (embedId) {
+                                            textContent += `<img data-embed="${embedId}"/>`;
+                                            console.log("✅ 图片 embedId:", embedId);
+                                        }
+                                    }
+                                }
+
+
+
+
+                                
                                const texts = run.getElementsByTagName("w:t");
                                for (const text of texts) {
                                    textContent += text.textContent;
@@ -709,7 +896,7 @@ export default {
    //     }

    // },
-    transformHtmlString(inputHtml) {
+    transformHtmlString(inputHtml, type) {

        // inputHtml = inputHtml.replace(/(<[^>]+) style="[^"]*"/g, '$1'); // 移除style属性
        // inputHtml = inputHtml.replace(/(<[^>]+) class="[^"]*"/g, '$1'); // 移除class属性
@@ -722,6 +909,9 @@ export default {
                if (attrName === "data-latex") {
                    return attrMatch;
                }
+                if (type == 'table' && tag == 'img' && (attrName === "src" || attrName === "width" || attrName === "height")) {
+                    return attrMatch;
+                }
                return ''; // 删除其他属性
            });

@@ -729,8 +919,15 @@ export default {
            return `<${tag}${updatedAttributes}>`;
        });
        // 2. 删除所有不需要的标签 (除 `strong`, `em`, `sub`, `sup`, `b`, `i` 外的所有标签)
-        inputHtml = inputHtml.replace(/<(?!\/?(strong|em|sub|sup|b|i|blue|wmath))[^>]+>/g, ''); // 删除不需要的标签
+        if (type == 'table') {
+            inputHtml = inputHtml.replace(/<(?!\/?(strong|em|sub|sup|b|i|blue|wmath|img))[^>]+>/g, ''); // 删除不需要的标签
+        } else {
+            inputHtml = inputHtml.replace(/<(?!\/?(strong|em|sub|sup|b|i|blue|wmath))[^>]+>/g, ''); // 删除不需要的标签
+        }

+
+        // 3. 删除所有 `<br>` 标签
+        inputHtml = inputHtml.replace(/<br\s*\/?>/g, ''); // 删除 <br> 标签
        // 3. 如果有 `<strong>` 和 `<em>` 标签，去掉内部样式并保留内容
        inputHtml = inputHtml.replace(/<span[^>]*>/g, '').replace(/<\/span>/g, '');  // 去除span标签
        inputHtml = inputHtml.replace(/<strong>/g, '<b>').replace(/<\/strong>/g, '</b>'); // 将 `strong` 替换成 `b`
@@ -753,6 +950,8 @@ export default {
        // 2️⃣ 将 <p> 内容转换为数组，并处理内容
        let parsedData = Array.from(paragraphs).map(p => {
            let text = p.innerHTML.trim(); // 获取内容，去除两端空格
+            text = replaceNegativeSign(text);
+            console.log('text at line 756:', text)
            text = this.transformHtmlString(text)
            // 3️⃣ **正确移除 <o:p>（Word 复制的无效标签）**
            text = text.replace(/<\/?o:p[^>]*>/g, "");
@@ -788,6 +987,7 @@ export default {


    async parseTableToArray(tableString, callback) {
+        console.log('tableString at line 845:', tableString)
        const parser = new DOMParser();
        const doc = parser.parseFromString(tableString, 'text/html');
        const rows = doc.querySelectorAll('table tr');  // 获取所有的行（<tr>）
@@ -1412,8 +1612,9 @@ export default {
            const cells = table.querySelectorAll('td');
            cells.forEach((td) => {
                if (/^-?\d+(\.\d+)?$/.test(td.textContent.trim())) {
-                    this.replaceNegativeSign(td);
+                    this.replaceNegativeSign(td);  // 替换负号和减号为 EN DASH
                }
+
                // 检查当前 td 是否包含上下标
                if (!this.containsSupOrSub(td)) {
                    // 递归处理单元格内的所有子节点
@@ -1567,7 +1768,7 @@ export default {
    replaceNegativeSign(node) {
        if (node.nodeType === Node.TEXT_NODE) {
            // 如果是文本节点，替换负号
-            node.nodeValue = node.nodeValue.replace(/^-(?=\d)/, '−');
+            node.nodeValue = node.nodeValue.replace(/^-(?=\d)/, '–');
        } else if (node.nodeType === Node.ELEMENT_NODE) {
            this.applyToChildNodes(node, (child) => this.replaceNegativeSign(child));
        }
@@ -1576,7 +1777,9 @@ export default {
    capitalizeFirstLetter(node) {
        if (node.nodeType === Node.TEXT_NODE) {
            // 如果是文本节点，只处理第一个非空字符
-            node.nodeValue = node.nodeValue.replace(/^\s*([a-zA-Z])/, (match, firstLetter) => firstLetter.toUpperCase());
+            // node.nodeValue = node.nodeValue.replace(/^\s*([a-zA-Z])/, (match, firstLetter) => firstLetter.toUpperCase());
+            node.nodeValue = node.nodeValue;
+
        } else if (node.nodeType === Node.ELEMENT_NODE) {
            this.applyToChildNodes(node, (child) => this.capitalizeFirstLetter(child));
        }
@@ -2046,10 +2249,10 @@ export default {
            }
        });
        ed.ui.registry.addButton('Line', {
-            text: '−', // 按钮文本
+            text: '–', // 按钮文本
            onAction: function () {
-                // 插入 `−` 符号到当前光标位置
-                ed.insertContent('−');
+                // 插入 `–` 符号到当前光标位置
+                ed.insertContent('–');
            }
        });