You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

215 lines
6.0 KiB

11 months ago
11 months ago
11 months ago
11 months ago
11 months ago
11 months ago
  1. process.env.NODE_ENV === "development"
  2. ? require("dotenv").config({ path: `.env.${process.env.NODE_ENV}` })
  3. : require("dotenv").config();
  4. require("./utils/logger")();
  5. const express = require("express");
  6. const bodyParser = require("body-parser");
  7. const cors = require("cors");
  8. const path = require("path");
  9. const { ACCEPTED_MIMES } = require("./utils/constants");
  10. const { reqBody } = require("./utils/http");
  11. const { processSingleFile } = require("./processSingleFile");
  12. const { processLink, getLinkText } = require("./processLink");
  13. const { wipeCollectorStorage } = require("./utils/files");
  14. const extensions = require("./extensions");
  15. const { processRawText } = require("./processRawText");
  16. const { verifyPayloadIntegrity } = require("./middleware/verifyIntegrity");
  17. const app = express();
  18. const FILE_LIMIT = "3GB";
  19. app.use(cors({ origin: true }));
  20. app.use(
  21. bodyParser.text({ limit: FILE_LIMIT }),
  22. bodyParser.json({ limit: FILE_LIMIT }),
  23. bodyParser.urlencoded({
  24. limit: FILE_LIMIT,
  25. extended: true,
  26. })
  27. );
  28. // app.post(
  29. // "/process",
  30. // [verifyPayloadIntegrity],
  31. // async function (request, response) {
  32. // const { filename, options = {} } = reqBody(request);
  33. // try {
  34. // const targetFilename = path
  35. // .normalize(filename)
  36. // .replace(/^(\.\.(\/|\\|$))+/, "");
  37. // const {
  38. // success,
  39. // reason,
  40. // documents = [],
  41. // } = await processSingleFile(targetFilename, options);
  42. // response
  43. // .status(200)
  44. // .json({ filename: targetFilename, success, reason, documents });
  45. // } catch (e) {
  46. // console.error(e);
  47. // response.status(200).json({
  48. // filename: filename,
  49. // success: false,
  50. // reason: "A processing error occurred.",
  51. // documents: [],
  52. // });
  53. // }
  54. // return;
  55. // }
  56. // );
  57. const fs = require("fs").promises; // 使用 fs.promises 支持异步操作
  58. // const path = require("path");
  59. app.post(
  60. "/process",
  61. [verifyPayloadIntegrity],
  62. async function (request, response) {
  63. const { filename, options = {} } = reqBody(request);
  64. console.log("文件名:", filename);
  65. try {
  66. const targetFilename = path
  67. .normalize(filename)
  68. .replace(/^(\.\.(\/|\\|$))+/, "");
  69. // const inputPath = path.resolve("./hotdir");
  70. const inputPath = process.env.NODE_ENV === "development"
  71. ? path.resolve("../server/storage/hotdir")
  72. : path.resolve("/app/server/storage/hotdir");
  73. console.log("输入路径:(((((((((((((((((((((((((((((((((((((((((:", inputPath,filename);
  74. const sourceFile = path.join(inputPath, filename); // 拼接文件路径
  75. console.log("源文件路径:", sourceFile);
  76. // 检查路径是否是文件
  77. const stats = await fs.stat(sourceFile);
  78. if (!stats.isFile()) {
  79. return response.status(400).json({
  80. success: false,
  81. error: "提供的路径不是文件",
  82. });
  83. }
  84. // 读取文件内容
  85. const fileContent = await fs.readFile(sourceFile); // 读取文件为 Buffer
  86. const fileContentBase64 = fileContent.toString("base64"); // 将文件内容转换为 Base64 字符串
  87. // 处理文件并返回结果
  88. const { success, reason, documents = [] } = await processSingleFile(targetFilename, options);
  89. response.status(200).json({
  90. filename: sourceFile,
  91. success,
  92. reason,
  93. documents,
  94. fileContent: fileContentBase64, // 将文件内容作为 Base64 字符串返回
  95. });
  96. } catch (e) {
  97. console.error(e);
  98. if (e.code === "EISDIR") {
  99. response.status(400).json({
  100. success: false,
  101. error: "提供的路径是目录,不是文件",
  102. });
  103. } else {
  104. response.status(500).json({
  105. filename: filename,
  106. success: false,
  107. reason: "A processing error occurred.",
  108. documents: [],
  109. });
  110. }
  111. }
  112. }
  113. );
  114. app.post(
  115. "/process-link",
  116. [verifyPayloadIntegrity],
  117. async function (request, response) {
  118. const { link } = reqBody(request);
  119. try {
  120. const { success, reason, documents = [] } = await processLink(link);
  121. response.status(200).json({ url: link, success, reason, documents });
  122. } catch (e) {
  123. console.error(e);
  124. response.status(200).json({
  125. url: link,
  126. success: false,
  127. reason: "A processing error occurred.",
  128. documents: [],
  129. });
  130. }
  131. return;
  132. }
  133. );
  134. app.post(
  135. "/util/get-link",
  136. [verifyPayloadIntegrity],
  137. async function (request, response) {
  138. const { link, captureAs = "text" } = reqBody(request);
  139. try {
  140. const { success, content = null } = await getLinkText(link, captureAs);
  141. response.status(200).json({ url: link, success, content });
  142. } catch (e) {
  143. console.error(e);
  144. response.status(200).json({
  145. url: link,
  146. success: false,
  147. content: null,
  148. });
  149. }
  150. return;
  151. }
  152. );
  153. app.post(
  154. "/process-raw-text",
  155. [verifyPayloadIntegrity],
  156. async function (request, response) {
  157. const { textContent, metadata } = reqBody(request);
  158. try {
  159. const {
  160. success,
  161. reason,
  162. documents = [],
  163. } = await processRawText(textContent, metadata);
  164. response
  165. .status(200)
  166. .json({ filename: metadata.title, success, reason, documents });
  167. } catch (e) {
  168. console.error(e);
  169. response.status(200).json({
  170. filename: metadata?.title || "Unknown-doc.txt",
  171. success: false,
  172. reason: "A processing error occurred.",
  173. documents: [],
  174. });
  175. }
  176. return;
  177. }
  178. );
  179. extensions(app);
  180. app.get("/accepts", function (_, response) {
  181. response.status(200).json(ACCEPTED_MIMES);
  182. });
  183. app.all("*", function (_, response) {
  184. response.sendStatus(200);
  185. });
  186. app
  187. .listen(8888, async () => {
  188. await wipeCollectorStorage();
  189. console.log(`Document processor app listening on port 8888`);
  190. })
  191. .on("error", function (_) {
  192. process.once("SIGUSR2", function () {
  193. process.kill(process.pid, "SIGUSR2");
  194. });
  195. process.on("SIGINT", function () {
  196. process.kill(process.pid, "SIGINT");
  197. });
  198. });