word.ts
3.0 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import { UPLOAD_FILE_MAX_LINE } from "@/app/constant";
import { Document, Packer, Paragraph } from "docx";
import { saveAs } from "file-saver";
import * as mammoth from "mammoth";
import { removeDeepThink } from "../deepThink";
export function exportWord(content: string) {
content = removeDeepThink(content);
// 简单类型检测示例
const isHTML = (text: string): boolean => {
return (
/<html[\s>]/.test(text) &&
/<\/html>/.test(text) &&
/<head>/.test(text) &&
/<body>/.test(text)
);
};
if (isHTML(content)) {
exportHtmlToWord(content);
} else {
exportMarkdownToWord(content);
}
}
export function exportHtmlToWord(content: string) {
let cleanedContent = content.startsWith("```html")
? content.substring(8)
: content;
if (cleanedContent.endsWith("```")) {
cleanedContent = cleanedContent.substring(0, cleanedContent.length - 4);
}
// 解析HTML内容
const parser = new DOMParser();
const doc = parser.parseFromString(cleanedContent, "text/html");
// 提取<h1>标签的内容作为文件名
let fileName = "document.docx"; // 默认文件名
const h1Element = doc.querySelector("h1");
if (h1Element && h1Element.textContent) {
// 使用<h1>的内容作为文件名,并清理非法字符
fileName =
h1Element.textContent.trim().replace(/[\u0000-\u001f\\?*:"<>|]/g, "") +
".docx";
}
const blob = new Blob([cleanedContent], { type: "application/msword" });
const url = URL.createObjectURL(blob);
const a = document.createElement("a");
a.href = url;
a.download = fileName;
// 触发点击事件,开始下载
document.body.appendChild(a);
a.click();
// 下载完成后移除临时链接元素
document.body.removeChild(a);
// 释放 Blob URL 对象
URL.revokeObjectURL(url);
}
function exportMarkdownToWord(content: string) {
content = removeDeepThink(content);
// 按换行符拆分内容
const lines = content.split(/\r?\n/);
const paragraphs: Paragraph[] = [];
for (const line of lines) {
// 去除 Markdown 标记(#、*等)
const cleanedLine = line
.replace(/^#+\s*/, "")
.replace(/^\*\*\s*|\*\s*/g, "")
.trim();
// 处理空行
if (cleanedLine === "") {
paragraphs.push(new Paragraph(""));
continue;
}
// 添加文本段落
paragraphs.push(new Paragraph(cleanedLine));
}
// 创建 Word 文档对象
const doc = new Document({
sections: [
{
children: paragraphs,
},
],
});
// 转换为 Blob 并下载
Packer.toBlob(doc)
.then((blob) => {
saveAs(blob, "demo.docx");
})
.catch((error) => {
console.error("导出 Word 失败:", error);
});
}
export async function getWordData(file: File) {
try {
const arrayBuffer = await file.arrayBuffer();
const { value, messages } = await mammoth.extractRawText({ arrayBuffer });
return value.slice(0, UPLOAD_FILE_MAX_LINE);
} catch (error) {
console.error("Error extracting Word content:", error);
throw error;
}
}