You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

208 lines
5.7 KiB

11 months ago
11 months ago
11 months ago
11 months ago
11 months ago
11 months ago
  1. process.env.NODE_ENV === "development"
  2. ? require("dotenv").config({ path: `.env.${process.env.NODE_ENV}` })
  3. : require("dotenv").config();
  4. require("./utils/logger")();
  5. const express = require("express");
  6. const bodyParser = require("body-parser");
  7. const cors = require("cors");
  8. const path = require("path");
  9. const { ACCEPTED_MIMES } = require("./utils/constants");
  10. const { reqBody } = require("./utils/http");
  11. const { processSingleFile } = require("./processSingleFile");
  12. const { processLink, getLinkText } = require("./processLink");
  13. const { wipeCollectorStorage } = require("./utils/files");
  14. const extensions = require("./extensions");
  15. const { processRawText } = require("./processRawText");
  16. const { verifyPayloadIntegrity } = require("./middleware/verifyIntegrity");
  17. const app = express();
  18. const FILE_LIMIT = "3GB";
  19. app.use(cors({ origin: true }));
  20. app.use(
  21. bodyParser.text({ limit: FILE_LIMIT }),
  22. bodyParser.json({ limit: FILE_LIMIT }),
  23. bodyParser.urlencoded({
  24. limit: FILE_LIMIT,
  25. extended: true,
  26. })
  27. );
  28. // app.post(
  29. // "/process",
  30. // [verifyPayloadIntegrity],
  31. // async function (request, response) {
  32. // const { filename, options = {} } = reqBody(request);
  33. // try {
  34. // const targetFilename = path
  35. // .normalize(filename)
  36. // .replace(/^(\.\.(\/|\\|$))+/, "");
  37. // const {
  38. // success,
  39. // reason,
  40. // documents = [],
  41. // } = await processSingleFile(targetFilename, options);
  42. // response
  43. // .status(200)
  44. // .json({ filename: targetFilename, success, reason, documents });
  45. // } catch (e) {
  46. // console.error(e);
  47. // response.status(200).json({
  48. // filename: filename,
  49. // success: false,
  50. // reason: "A processing error occurred.",
  51. // documents: [],
  52. // });
  53. // }
  54. // return;
  55. // }
  56. // );
  57. const fs = require("fs").promises; // 使用 fs.promises 支持异步操作
  58. // const path = require("path");
  59. app.post(
  60. "/process",
  61. [verifyPayloadIntegrity],
  62. async function (request, response) {
  63. const { filename, options = {} } = reqBody(request);
  64. console.log("文件名:", filename);
  65. try {
  66. const inputPath = path.resolve("./hotdir");
  67. const sourceFile = path.join(inputPath, filename); // 拼接文件路径
  68. console.log("源文件路径:", sourceFile);
  69. // 检查路径是否是文件
  70. const stats = await fs.stat(sourceFile);
  71. if (!stats.isFile()) {
  72. return response.status(400).json({
  73. success: false,
  74. error: "提供的路径不是文件",
  75. });
  76. }
  77. // 读取文件内容
  78. const fileContent = await fs.readFile(sourceFile); // 读取文件为 Buffer
  79. const fileContentBase64 = fileContent.toString("base64"); // 将文件内容转换为 Base64 字符串
  80. // 处理文件并返回结果
  81. const { success, reason, documents = [] } = await processSingleFile(sourceFile, options);
  82. response.status(200).json({
  83. filename: sourceFile,
  84. success,
  85. reason,
  86. documents,
  87. fileContent: fileContentBase64, // 将文件内容作为 Base64 字符串返回
  88. });
  89. } catch (e) {
  90. console.error(e);
  91. if (e.code === "EISDIR") {
  92. response.status(400).json({
  93. success: false,
  94. error: "提供的路径是目录,不是文件",
  95. });
  96. } else {
  97. response.status(500).json({
  98. filename: filename,
  99. success: false,
  100. reason: "A processing error occurred.",
  101. documents: [],
  102. });
  103. }
  104. }
  105. }
  106. );
  107. app.post(
  108. "/process-link",
  109. [verifyPayloadIntegrity],
  110. async function (request, response) {
  111. const { link } = reqBody(request);
  112. try {
  113. const { success, reason, documents = [] } = await processLink(link);
  114. response.status(200).json({ url: link, success, reason, documents });
  115. } catch (e) {
  116. console.error(e);
  117. response.status(200).json({
  118. url: link,
  119. success: false,
  120. reason: "A processing error occurred.",
  121. documents: [],
  122. });
  123. }
  124. return;
  125. }
  126. );
  127. app.post(
  128. "/util/get-link",
  129. [verifyPayloadIntegrity],
  130. async function (request, response) {
  131. const { link, captureAs = "text" } = reqBody(request);
  132. try {
  133. const { success, content = null } = await getLinkText(link, captureAs);
  134. response.status(200).json({ url: link, success, content });
  135. } catch (e) {
  136. console.error(e);
  137. response.status(200).json({
  138. url: link,
  139. success: false,
  140. content: null,
  141. });
  142. }
  143. return;
  144. }
  145. );
  146. app.post(
  147. "/process-raw-text",
  148. [verifyPayloadIntegrity],
  149. async function (request, response) {
  150. const { textContent, metadata } = reqBody(request);
  151. try {
  152. const {
  153. success,
  154. reason,
  155. documents = [],
  156. } = await processRawText(textContent, metadata);
  157. response
  158. .status(200)
  159. .json({ filename: metadata.title, success, reason, documents });
  160. } catch (e) {
  161. console.error(e);
  162. response.status(200).json({
  163. filename: metadata?.title || "Unknown-doc.txt",
  164. success: false,
  165. reason: "A processing error occurred.",
  166. documents: [],
  167. });
  168. }
  169. return;
  170. }
  171. );
  172. extensions(app);
  173. app.get("/accepts", function (_, response) {
  174. response.status(200).json(ACCEPTED_MIMES);
  175. });
  176. app.all("*", function (_, response) {
  177. response.sendStatus(200);
  178. });
  179. app
  180. .listen(8888, async () => {
  181. await wipeCollectorStorage();
  182. console.log(`Document processor app listening on port 8888`);
  183. })
  184. .on("error", function (_) {
  185. process.once("SIGUSR2", function () {
  186. process.kill(process.pid, "SIGUSR2");
  187. });
  188. process.on("SIGINT", function () {
  189. process.kill(process.pid, "SIGINT");
  190. });
  191. });