You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

211 lines
5.8 KiB

process.env.NODE_ENV === "development"
? require("dotenv").config({ path: `.env.${process.env.NODE_ENV}` })
: require("dotenv").config();
require("./utils/logger")();
const express = require("express");
const bodyParser = require("body-parser");
const cors = require("cors");
const path = require("path");
const { ACCEPTED_MIMES } = require("./utils/constants");
const { reqBody } = require("./utils/http");
const { processSingleFile } = require("./processSingleFile");
const { processLink, getLinkText } = require("./processLink");
const { wipeCollectorStorage } = require("./utils/files");
const extensions = require("./extensions");
const { processRawText } = require("./processRawText");
const { verifyPayloadIntegrity } = require("./middleware/verifyIntegrity");
const app = express();
const FILE_LIMIT = "3GB";
app.use(cors({ origin: true }));
app.use(
bodyParser.text({ limit: FILE_LIMIT }),
bodyParser.json({ limit: FILE_LIMIT }),
bodyParser.urlencoded({
limit: FILE_LIMIT,
extended: true,
})
);
// app.post(
// "/process",
// [verifyPayloadIntegrity],
// async function (request, response) {
// const { filename, options = {} } = reqBody(request);
// try {
// const targetFilename = path
// .normalize(filename)
// .replace(/^(\.\.(\/|\\|$))+/, "");
// const {
// success,
// reason,
// documents = [],
// } = await processSingleFile(targetFilename, options);
// response
// .status(200)
// .json({ filename: targetFilename, success, reason, documents });
// } catch (e) {
// console.error(e);
// response.status(200).json({
// filename: filename,
// success: false,
// reason: "A processing error occurred.",
// documents: [],
// });
// }
// return;
// }
// );
const fs = require("fs").promises; // 使用 fs.promises 支持异步操作
// const path = require("path");
app.post(
"/process",
[verifyPayloadIntegrity],
async function (request, response) {
const { filename, options = {} } = reqBody(request);
console.log("文件名:", filename);
try {
const targetFilename = path
.normalize(filename)
.replace(/^(\.\.(\/|\\|$))+/, "");
const inputPath = path.resolve("./hotdir");
const sourceFile = path.join(inputPath, filename); // 拼接文件路径
console.log("源文件路径:", sourceFile);
// 检查路径是否是文件
const stats = await fs.stat(sourceFile);
if (!stats.isFile()) {
return response.status(400).json({
success: false,
error: "提供的路径不是文件",
});
}
// 读取文件内容
const fileContent = await fs.readFile(sourceFile); // 读取文件为 Buffer
const fileContentBase64 = fileContent.toString("base64"); // 将文件内容转换为 Base64 字符串
// 处理文件并返回结果
const { success, reason, documents = [] } = await processSingleFile(targetFilename, options);
response.status(200).json({
filename: sourceFile,
success,
reason,
documents,
fileContent: fileContentBase64, // 将文件内容作为 Base64 字符串返回
});
} catch (e) {
console.error(e);
if (e.code === "EISDIR") {
response.status(400).json({
success: false,
error: "提供的路径是目录,不是文件",
});
} else {
response.status(500).json({
filename: filename,
success: false,
reason: "A processing error occurred.",
documents: [],
});
}
}
}
);
app.post(
"/process-link",
[verifyPayloadIntegrity],
async function (request, response) {
const { link } = reqBody(request);
try {
const { success, reason, documents = [] } = await processLink(link);
response.status(200).json({ url: link, success, reason, documents });
} catch (e) {
console.error(e);
response.status(200).json({
url: link,
success: false,
reason: "A processing error occurred.",
documents: [],
});
}
return;
}
);
app.post(
"/util/get-link",
[verifyPayloadIntegrity],
async function (request, response) {
const { link, captureAs = "text" } = reqBody(request);
try {
const { success, content = null } = await getLinkText(link, captureAs);
response.status(200).json({ url: link, success, content });
} catch (e) {
console.error(e);
response.status(200).json({
url: link,
success: false,
content: null,
});
}
return;
}
);
app.post(
"/process-raw-text",
[verifyPayloadIntegrity],
async function (request, response) {
const { textContent, metadata } = reqBody(request);
try {
const {
success,
reason,
documents = [],
} = await processRawText(textContent, metadata);
response
.status(200)
.json({ filename: metadata.title, success, reason, documents });
} catch (e) {
console.error(e);
response.status(200).json({
filename: metadata?.title || "Unknown-doc.txt",
success: false,
reason: "A processing error occurred.",
documents: [],
});
}
return;
}
);
extensions(app);
app.get("/accepts", function (_, response) {
response.status(200).json(ACCEPTED_MIMES);
});
app.all("*", function (_, response) {
response.sendStatus(200);
});
app
.listen(8888, async () => {
await wipeCollectorStorage();
console.log(`Document processor app listening on port 8888`);
})
.on("error", function (_) {
process.once("SIGUSR2", function () {
process.kill(process.pid, "SIGUSR2");
});
process.on("SIGINT", function () {
process.kill(process.pid, "SIGINT");
});
});