word.ts 3.0 KB

原文件审查历史永久链接

import { UPLOAD_FILE_MAX_LINE } from "@/app/constant";
import { Document, Packer, Paragraph } from "docx";
import { saveAs } from "file-saver";
import * as mammoth from "mammoth";
import { removeDeepThink } from "../deepThink";

export function exportWord(content: string) {
  content = removeDeepThink(content);
  // 简单类型检测示例
  const isHTML = (text: string): boolean => {
    return (
      /<html[\s>]/.test(text) &&
      /<\/html>/.test(text) &&
      /<head>/.test(text) &&
      /<body>/.test(text)
    );
  };
  if (isHTML(content)) {
    exportHtmlToWord(content);
  } else {
    exportMarkdownToWord(content);
  }
}

export function exportHtmlToWord(content: string) {
  let cleanedContent = content.startsWith("```html")
    ? content.substring(8)
    : content;
  if (cleanedContent.endsWith("```")) {
    cleanedContent = cleanedContent.substring(0, cleanedContent.length - 4);
  }
  // 解析HTML内容
  const parser = new DOMParser();
  const doc = parser.parseFromString(cleanedContent, "text/html");
  // 提取<h1>标签的内容作为文件名
  let fileName = "document.docx"; // 默认文件名
  const h1Element = doc.querySelector("h1");
  if (h1Element && h1Element.textContent) {
    // 使用<h1>的内容作为文件名，并清理非法字符
    fileName =
      h1Element.textContent.trim().replace(/[\u0000-\u001f\\?*:"<>|]/g, "") +
      ".docx";
  }
  const blob = new Blob([cleanedContent], { type: "application/msword" });
  const url = URL.createObjectURL(blob);
  const a = document.createElement("a");
  a.href = url;
  a.download = fileName;
  // 触发点击事件，开始下载
  document.body.appendChild(a);
  a.click();
  // 下载完成后移除临时链接元素
  document.body.removeChild(a);
  // 释放 Blob URL 对象
  URL.revokeObjectURL(url);
}

function exportMarkdownToWord(content: string) {
  content = removeDeepThink(content);
  // 按换行符拆分内容
  const lines = content.split(/\r?\n/);
  const paragraphs: Paragraph[] = [];
  for (const line of lines) {
    // 去除 Markdown 标记（#、*等）
    const cleanedLine = line
      .replace(/^#+\s*/, "")
      .replace(/^\*\*\s*|\*\s*/g, "")
      .trim();
    // 处理空行
    if (cleanedLine === "") {
      paragraphs.push(new Paragraph(""));
      continue;
    }
    // 添加文本段落
    paragraphs.push(new Paragraph(cleanedLine));
  }

  // 创建 Word 文档对象
  const doc = new Document({
    sections: [
      {
        children: paragraphs,
      },
    ],
  });

  // 转换为 Blob 并下载
  Packer.toBlob(doc)
    .then((blob) => {
      saveAs(blob, "demo.docx");
    })
    .catch((error) => {
      console.error("导出 Word 失败：", error);
    });
}

export async function getWordData(file: File) {
  try {
    const arrayBuffer = await file.arrayBuffer();
    const { value, messages } = await mammoth.extractRawText({ arrayBuffer });
    return value.slice(0, UPLOAD_FILE_MAX_LINE);
  } catch (error) {
    console.error("Error extracting Word content:", error);
    throw error;
  }
}