|
|
process.env.NODE_ENV === "development" ? require("dotenv").config({ path: `.env.${process.env.NODE_ENV}` }) : require("dotenv").config();
require("./utils/logger")();const express = require("express");const bodyParser = require("body-parser");const cors = require("cors");const path = require("path");const { ACCEPTED_MIMES } = require("./utils/constants");const { reqBody } = require("./utils/http");const { processSingleFile } = require("./processSingleFile");const { processLink, getLinkText } = require("./processLink");const { wipeCollectorStorage } = require("./utils/files");const extensions = require("./extensions");const { processRawText } = require("./processRawText");const { verifyPayloadIntegrity } = require("./middleware/verifyIntegrity");const app = express();const FILE_LIMIT = "3GB";
app.use(cors({ origin: true }));app.use( bodyParser.text({ limit: FILE_LIMIT }), bodyParser.json({ limit: FILE_LIMIT }), bodyParser.urlencoded({ limit: FILE_LIMIT, extended: true, }));
// app.post(
// "/process",
// [verifyPayloadIntegrity],
// async function (request, response) {
// const { filename, options = {} } = reqBody(request);
// try {
// const targetFilename = path
// .normalize(filename)
// .replace(/^(\.\.(\/|\\|$))+/, "");
// const {
// success,
// reason,
// documents = [],
// } = await processSingleFile(targetFilename, options);
// response
// .status(200)
// .json({ filename: targetFilename, success, reason, documents });
// } catch (e) {
// console.error(e);
// response.status(200).json({
// filename: filename,
// success: false,
// reason: "A processing error occurred.",
// documents: [],
// });
// }
// return;
// }
// );
const fs = require("fs").promises; // 使用 fs.promises 支持异步操作
// const path = require("path");
app.post( "/process", [verifyPayloadIntegrity], async function (request, response) { const { filename, options = {} } = reqBody(request); console.log("文件名:", filename);
try { const targetFilename = path .normalize(filename) .replace(/^(\.\.(\/|\\|$))+/, ""); const inputPath = path.resolve("./hotdir"); const sourceFile = path.join(inputPath, filename); // 拼接文件路径
console.log("源文件路径:", sourceFile);
// 检查路径是否是文件
const stats = await fs.stat(sourceFile); if (!stats.isFile()) { return response.status(400).json({ success: false, error: "提供的路径不是文件", }); }
// 读取文件内容
const fileContent = await fs.readFile(sourceFile); // 读取文件为 Buffer
const fileContentBase64 = fileContent.toString("base64"); // 将文件内容转换为 Base64 字符串
// 处理文件并返回结果
const { success, reason, documents = [] } = await processSingleFile(targetFilename, options);
response.status(200).json({ filename: sourceFile, success, reason, documents, fileContent: fileContentBase64, // 将文件内容作为 Base64 字符串返回
}); } catch (e) { console.error(e); if (e.code === "EISDIR") { response.status(400).json({ success: false, error: "提供的路径是目录,不是文件", }); } else { response.status(500).json({ filename: filename, success: false, reason: "A processing error occurred.", documents: [], }); } } });
app.post( "/process-link", [verifyPayloadIntegrity], async function (request, response) { const { link } = reqBody(request); try { const { success, reason, documents = [] } = await processLink(link); response.status(200).json({ url: link, success, reason, documents }); } catch (e) { console.error(e); response.status(200).json({ url: link, success: false, reason: "A processing error occurred.", documents: [], }); } return; });
app.post( "/util/get-link", [verifyPayloadIntegrity], async function (request, response) { const { link, captureAs = "text" } = reqBody(request); try { const { success, content = null } = await getLinkText(link, captureAs); response.status(200).json({ url: link, success, content }); } catch (e) { console.error(e); response.status(200).json({ url: link, success: false, content: null, }); } return; });
app.post( "/process-raw-text", [verifyPayloadIntegrity], async function (request, response) { const { textContent, metadata } = reqBody(request); try { const { success, reason, documents = [], } = await processRawText(textContent, metadata); response .status(200) .json({ filename: metadata.title, success, reason, documents }); } catch (e) { console.error(e); response.status(200).json({ filename: metadata?.title || "Unknown-doc.txt", success: false, reason: "A processing error occurred.", documents: [], }); } return; });
extensions(app);
app.get("/accepts", function (_, response) { response.status(200).json(ACCEPTED_MIMES);});
app.all("*", function (_, response) { response.sendStatus(200);});
app .listen(8888, async () => { await wipeCollectorStorage(); console.log(`Document processor app listening on port 8888`); }) .on("error", function (_) { process.once("SIGUSR2", function () { process.kill(process.pid, "SIGUSR2"); }); process.on("SIGINT", function () { process.kill(process.pid, "SIGINT"); }); });
|