You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

502 lines
14 KiB

11 months ago
  1. const { v4: uuidv4 } = require("uuid");
  2. const { DocumentManager } = require("../DocumentManager");
  3. const { WorkspaceChats } = require("../../models/workspaceChats");
  4. const { getVectorDbClass, getLLMProvider } = require("../helpers");
  5. const { writeResponseChunk } = require("../helpers/chat/responses");
  6. const { chatPrompt, sourceIdentifier } = require("./index");
  7. const { PassThrough } = require("stream");
  8. async function chatSync({
  9. workspace,
  10. systemPrompt = null,
  11. history = [],
  12. prompt = null,
  13. temperature = null,
  14. }) {
  15. const uuid = uuidv4();
  16. const chatMode = workspace?.chatMode ?? "chat";
  17. const LLMConnector = getLLMProvider({
  18. provider: workspace?.chatProvider,
  19. model: workspace?.chatModel,
  20. });
  21. const VectorDb = getVectorDbClass();
  22. const hasVectorizedSpace = await VectorDb.hasNamespace(workspace.slug);
  23. const embeddingsCount = await VectorDb.namespaceCount(workspace.slug);
  24. // User is trying to query-mode chat a workspace that has no data in it - so
  25. // we should exit early as no information can be found under these conditions.
  26. if ((!hasVectorizedSpace || embeddingsCount === 0) && chatMode === "query") {
  27. const textResponse =
  28. workspace?.queryRefusalResponse ??
  29. "There is no relevant information in this workspace to answer your query.";
  30. await WorkspaceChats.new({
  31. workspaceId: workspace.id,
  32. prompt: String(prompt),
  33. response: {
  34. text: textResponse,
  35. sources: [],
  36. type: chatMode,
  37. },
  38. include: false,
  39. });
  40. return formatJSON(
  41. {
  42. id: uuid,
  43. type: "textResponse",
  44. sources: [],
  45. close: true,
  46. error: null,
  47. textResponse,
  48. },
  49. { model: workspace.slug, finish_reason: "abort" }
  50. );
  51. }
  52. // If we are here we know that we are in a workspace that is:
  53. // 1. Chatting in "chat" mode and may or may _not_ have embeddings
  54. // 2. Chatting in "query" mode and has at least 1 embedding
  55. let contextTexts = [];
  56. let sources = [];
  57. let pinnedDocIdentifiers = [];
  58. await new DocumentManager({
  59. workspace,
  60. maxTokens: LLMConnector.promptWindowLimit(),
  61. })
  62. .pinnedDocs()
  63. .then((pinnedDocs) => {
  64. pinnedDocs.forEach((doc) => {
  65. const { pageContent, ...metadata } = doc;
  66. pinnedDocIdentifiers.push(sourceIdentifier(doc));
  67. contextTexts.push(doc.pageContent);
  68. sources.push({
  69. text:
  70. pageContent.slice(0, 1_000) +
  71. "...continued on in source document...",
  72. ...metadata,
  73. });
  74. });
  75. });
  76. const vectorSearchResults =
  77. embeddingsCount !== 0
  78. ? await VectorDb.performSimilaritySearch({
  79. namespace: workspace.slug,
  80. input: prompt,
  81. LLMConnector,
  82. similarityThreshold: workspace?.similarityThreshold,
  83. topN: workspace?.topN,
  84. filterIdentifiers: pinnedDocIdentifiers,
  85. rerank: workspace?.vectorSearchMode === "rerank",
  86. })
  87. : {
  88. contextTexts: [],
  89. sources: [],
  90. message: null,
  91. };
  92. // Failed similarity search if it was run at all and failed.
  93. if (!!vectorSearchResults.message) {
  94. return formatJSON(
  95. {
  96. id: uuid,
  97. type: "abort",
  98. textResponse: null,
  99. sources: [],
  100. close: true,
  101. error: vectorSearchResults.message,
  102. },
  103. { model: workspace.slug, finish_reason: "abort" }
  104. );
  105. }
  106. // For OpenAI Compatible chats, we cannot do backfilling so we simply aggregate results here.
  107. contextTexts = [...contextTexts, ...vectorSearchResults.contextTexts];
  108. sources = [...sources, ...vectorSearchResults.sources];
  109. // If in query mode and no context chunks are found from search, backfill, or pins - do not
  110. // let the LLM try to hallucinate a response or use general knowledge and exit early
  111. if (chatMode === "query" && contextTexts.length === 0) {
  112. const textResponse =
  113. workspace?.queryRefusalResponse ??
  114. "There is no relevant information in this workspace to answer your query.";
  115. await WorkspaceChats.new({
  116. workspaceId: workspace.id,
  117. prompt: prompt,
  118. response: {
  119. text: textResponse,
  120. sources: [],
  121. type: chatMode,
  122. },
  123. include: false,
  124. });
  125. return formatJSON(
  126. {
  127. id: uuid,
  128. type: "textResponse",
  129. sources: [],
  130. close: true,
  131. error: null,
  132. textResponse,
  133. },
  134. { model: workspace.slug, finish_reason: "no_content" }
  135. );
  136. }
  137. // Compress & Assemble message to ensure prompt passes token limit with room for response
  138. // and build system messages based on inputs and history.
  139. const messages = await LLMConnector.compressMessages({
  140. systemPrompt: systemPrompt ?? chatPrompt(workspace),
  141. userPrompt: prompt,
  142. contextTexts,
  143. chatHistory: history,
  144. });
  145. // Send the text completion.
  146. const { textResponse, metrics } = await LLMConnector.getChatCompletion(
  147. messages,
  148. {
  149. temperature:
  150. temperature ?? workspace?.openAiTemp ?? LLMConnector.defaultTemp,
  151. }
  152. );
  153. if (!textResponse) {
  154. return formatJSON(
  155. {
  156. id: uuid,
  157. type: "textResponse",
  158. sources: [],
  159. close: true,
  160. error: "No text completion could be completed with this input.",
  161. textResponse: null,
  162. },
  163. { model: workspace.slug, finish_reason: "no_content", usage: metrics }
  164. );
  165. }
  166. const { chat } = await WorkspaceChats.new({
  167. workspaceId: workspace.id,
  168. prompt: prompt,
  169. response: { text: textResponse, sources, type: chatMode, metrics },
  170. });
  171. return formatJSON(
  172. {
  173. id: uuid,
  174. type: "textResponse",
  175. close: true,
  176. error: null,
  177. chatId: chat.id,
  178. textResponse,
  179. sources,
  180. },
  181. { model: workspace.slug, finish_reason: "stop", usage: metrics }
  182. );
  183. }
  184. async function streamChat({
  185. workspace,
  186. response,
  187. systemPrompt = null,
  188. history = [],
  189. prompt = null,
  190. temperature = null,
  191. }) {
  192. const uuid = uuidv4();
  193. const chatMode = workspace?.chatMode ?? "chat";
  194. const LLMConnector = getLLMProvider({
  195. provider: workspace?.chatProvider,
  196. model: workspace?.chatModel,
  197. });
  198. const VectorDb = getVectorDbClass();
  199. const hasVectorizedSpace = await VectorDb.hasNamespace(workspace.slug);
  200. const embeddingsCount = await VectorDb.namespaceCount(workspace.slug);
  201. // We don't want to write a new method for every LLM to support openAI calls
  202. // via the `handleStreamResponseV2` method handler. So here we create a passthrough
  203. // that on writes to the main response, transforms the chunk to OpenAI format.
  204. // The chunk is coming in the format from `writeResponseChunk` but in the AnythingLLM
  205. // response chunk schema, so we here we mutate each chunk.
  206. const responseInterceptor = new PassThrough({});
  207. responseInterceptor.on("data", (chunk) => {
  208. try {
  209. const originalData = JSON.parse(chunk.toString().split("data: ")[1]);
  210. const modified = formatJSON(originalData, {
  211. chunked: true,
  212. model: workspace.slug,
  213. }); // rewrite to OpenAI format
  214. response.write(`data: ${JSON.stringify(modified)}\n\n`);
  215. } catch (e) {
  216. console.error(e);
  217. }
  218. });
  219. // User is trying to query-mode chat a workspace that has no data in it - so
  220. // we should exit early as no information can be found under these conditions.
  221. if ((!hasVectorizedSpace || embeddingsCount === 0) && chatMode === "query") {
  222. const textResponse =
  223. workspace?.queryRefusalResponse ??
  224. "There is no relevant information in this workspace to answer your query.";
  225. await WorkspaceChats.new({
  226. workspaceId: workspace.id,
  227. prompt: String(prompt),
  228. response: {
  229. text: textResponse,
  230. sources: [],
  231. type: chatMode,
  232. },
  233. include: false,
  234. });
  235. writeResponseChunk(
  236. response,
  237. formatJSON(
  238. {
  239. id: uuid,
  240. type: "textResponse",
  241. sources: [],
  242. close: true,
  243. error: null,
  244. textResponse,
  245. },
  246. { chunked: true, model: workspace.slug, finish_reason: "abort" }
  247. )
  248. );
  249. return;
  250. }
  251. // If we are here we know that we are in a workspace that is:
  252. // 1. Chatting in "chat" mode and may or may _not_ have embeddings
  253. // 2. Chatting in "query" mode and has at least 1 embedding
  254. let contextTexts = [];
  255. let sources = [];
  256. let pinnedDocIdentifiers = [];
  257. await new DocumentManager({
  258. workspace,
  259. maxTokens: LLMConnector.promptWindowLimit(),
  260. })
  261. .pinnedDocs()
  262. .then((pinnedDocs) => {
  263. pinnedDocs.forEach((doc) => {
  264. const { pageContent, ...metadata } = doc;
  265. pinnedDocIdentifiers.push(sourceIdentifier(doc));
  266. contextTexts.push(doc.pageContent);
  267. sources.push({
  268. text:
  269. pageContent.slice(0, 1_000) +
  270. "...continued on in source document...",
  271. ...metadata,
  272. });
  273. });
  274. });
  275. const vectorSearchResults =
  276. embeddingsCount !== 0
  277. ? await VectorDb.performSimilaritySearch({
  278. namespace: workspace.slug,
  279. input: prompt,
  280. LLMConnector,
  281. similarityThreshold: workspace?.similarityThreshold,
  282. topN: workspace?.topN,
  283. filterIdentifiers: pinnedDocIdentifiers,
  284. rerank: workspace?.vectorSearchMode === "rerank",
  285. })
  286. : {
  287. contextTexts: [],
  288. sources: [],
  289. message: null,
  290. };
  291. // Failed similarity search if it was run at all and failed.
  292. if (!!vectorSearchResults.message) {
  293. writeResponseChunk(
  294. response,
  295. formatJSON(
  296. {
  297. id: uuid,
  298. type: "abort",
  299. textResponse: null,
  300. sources: [],
  301. close: true,
  302. error: vectorSearchResults.message,
  303. },
  304. { chunked: true, model: workspace.slug, finish_reason: "abort" }
  305. )
  306. );
  307. return;
  308. }
  309. // For OpenAI Compatible chats, we cannot do backfilling so we simply aggregate results here.
  310. contextTexts = [...contextTexts, ...vectorSearchResults.contextTexts];
  311. sources = [...sources, ...vectorSearchResults.sources];
  312. // If in query mode and no context chunks are found from search, backfill, or pins - do not
  313. // let the LLM try to hallucinate a response or use general knowledge and exit early
  314. if (chatMode === "query" && contextTexts.length === 0) {
  315. const textResponse =
  316. workspace?.queryRefusalResponse ??
  317. "There is no relevant information in this workspace to answer your query.";
  318. await WorkspaceChats.new({
  319. workspaceId: workspace.id,
  320. prompt: prompt,
  321. response: {
  322. text: textResponse,
  323. sources: [],
  324. type: chatMode,
  325. },
  326. include: false,
  327. });
  328. writeResponseChunk(
  329. response,
  330. formatJSON(
  331. {
  332. id: uuid,
  333. type: "textResponse",
  334. sources: [],
  335. close: true,
  336. error: null,
  337. textResponse,
  338. },
  339. { chunked: true, model: workspace.slug, finish_reason: "no_content" }
  340. )
  341. );
  342. return;
  343. }
  344. // Compress & Assemble message to ensure prompt passes token limit with room for response
  345. // and build system messages based on inputs and history.
  346. const messages = await LLMConnector.compressMessages({
  347. systemPrompt: systemPrompt ?? chatPrompt(workspace),
  348. userPrompt: prompt,
  349. contextTexts,
  350. chatHistory: history,
  351. });
  352. if (!LLMConnector.streamingEnabled()) {
  353. writeResponseChunk(
  354. response,
  355. formatJSON(
  356. {
  357. id: uuid,
  358. type: "textResponse",
  359. sources: [],
  360. close: true,
  361. error: "Streaming is not available for the connected LLM Provider",
  362. textResponse: null,
  363. },
  364. {
  365. chunked: true,
  366. model: workspace.slug,
  367. finish_reason: "streaming_disabled",
  368. }
  369. )
  370. );
  371. return;
  372. }
  373. const stream = await LLMConnector.streamGetChatCompletion(messages, {
  374. temperature:
  375. temperature ?? workspace?.openAiTemp ?? LLMConnector.defaultTemp,
  376. });
  377. const completeText = await LLMConnector.handleStream(
  378. responseInterceptor,
  379. stream,
  380. {
  381. uuid,
  382. sources,
  383. }
  384. );
  385. if (completeText?.length > 0) {
  386. const { chat } = await WorkspaceChats.new({
  387. workspaceId: workspace.id,
  388. prompt: prompt,
  389. response: {
  390. text: completeText,
  391. sources,
  392. type: chatMode,
  393. metrics: stream.metrics,
  394. },
  395. });
  396. writeResponseChunk(
  397. response,
  398. formatJSON(
  399. {
  400. uuid,
  401. type: "finalizeResponseStream",
  402. close: true,
  403. error: false,
  404. chatId: chat.id,
  405. textResponse: "",
  406. },
  407. {
  408. chunked: true,
  409. model: workspace.slug,
  410. finish_reason: "stop",
  411. usage: stream.metrics,
  412. }
  413. )
  414. );
  415. return;
  416. }
  417. writeResponseChunk(
  418. response,
  419. formatJSON(
  420. {
  421. uuid,
  422. type: "finalizeResponseStream",
  423. close: true,
  424. error: false,
  425. textResponse: "",
  426. },
  427. {
  428. chunked: true,
  429. model: workspace.slug,
  430. finish_reason: "stop",
  431. usage: stream.metrics,
  432. }
  433. )
  434. );
  435. return;
  436. }
  437. function formatJSON(
  438. chat,
  439. { chunked = false, model, finish_reason = null, usage = {} }
  440. ) {
  441. const data = {
  442. id: chat.uuid ?? chat.id,
  443. object: "chat.completion",
  444. created: Math.floor(Number(new Date()) / 1000),
  445. model: model,
  446. choices: [
  447. {
  448. index: 0,
  449. [chunked ? "delta" : "message"]: {
  450. role: "assistant",
  451. content: chat.textResponse,
  452. },
  453. logprobs: null,
  454. finish_reason: finish_reason,
  455. },
  456. ],
  457. usage,
  458. };
  459. return data;
  460. }
  461. module.exports.OpenAICompatibleChat = {
  462. chatSync,
  463. streamChat,
  464. };