You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

74 lines
2.0 KiB

11 months ago
  1. const { v4 } = require("uuid");
  2. const fs = require("fs");
  3. const { mboxParser } = require("mbox-parser");
  4. const {
  5. createdDate,
  6. trashFile,
  7. writeToServerDocuments,
  8. } = require("../../utils/files");
  9. const { tokenizeString } = require("../../utils/tokenizer");
  10. const { default: slugify } = require("slugify");
  11. async function asMbox({ fullFilePath = "", filename = "" }) {
  12. console.log(`-- Working ${filename} --`);
  13. const mails = await mboxParser(fs.createReadStream(fullFilePath))
  14. .then((mails) => mails)
  15. .catch((error) => {
  16. console.log(`Could not parse mail items`, error);
  17. return [];
  18. });
  19. if (!mails.length) {
  20. console.error(`Resulting mail items was empty for ${filename}.`);
  21. trashFile(fullFilePath);
  22. return {
  23. success: false,
  24. reason: `No mail items found in ${filename}.`,
  25. documents: [],
  26. };
  27. }
  28. let item = 1;
  29. const documents = [];
  30. for (const mail of mails) {
  31. if (!mail.hasOwnProperty("text")) continue;
  32. const content = mail.text;
  33. if (!content) continue;
  34. console.log(
  35. `-- Working on message "${mail.subject || "Unknown subject"}" --`
  36. );
  37. const data = {
  38. id: v4(),
  39. url: "file://" + fullFilePath,
  40. title: mail?.subject
  41. ? slugify(mail?.subject?.replace(".", "")) + ".mbox"
  42. : `msg_${item}-${filename}`,
  43. docAuthor: mail?.from?.text,
  44. description: "No description found.",
  45. docSource: "Mbox message file uploaded by the user.",
  46. chunkSource: "",
  47. published: createdDate(fullFilePath),
  48. wordCount: content.split(" ").length,
  49. pageContent: content,
  50. token_count_estimate: tokenizeString(content),
  51. };
  52. item++;
  53. const document = writeToServerDocuments(
  54. data,
  55. `${slugify(filename)}-${data.id}-msg-${item}`
  56. );
  57. documents.push(document);
  58. }
  59. trashFile(fullFilePath);
  60. console.log(
  61. `[SUCCESS]: ${filename} messages converted & ready for embedding.\n`
  62. );
  63. return { success: true, reason: null, documents };
  64. }
  65. module.exports = asMbox;