You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

309 lines
9.9 KiB

11 months ago
  1. const { NativeEmbedder } = require("../../EmbeddingEngines/native");
  2. const {
  3. LLMPerformanceMonitor,
  4. } = require("../../helpers/chat/LLMPerformanceMonitor");
  5. const { v4: uuidv4 } = require("uuid");
  6. const { MODEL_MAP } = require("../modelMap");
  7. const {
  8. writeResponseChunk,
  9. clientAbortedHandler,
  10. } = require("../../helpers/chat/responses");
  11. class DeepSeekLLM {
  12. constructor(embedder = null, modelPreference = null) {
  13. if (!process.env.DEEPSEEK_API_KEY)
  14. throw new Error("No DeepSeek API key was set.");
  15. const { OpenAI: OpenAIApi } = require("openai");
  16. this.openai = new OpenAIApi({
  17. apiKey: process.env.DEEPSEEK_API_KEY,
  18. baseURL: "https://api.deepseek.com/v1",
  19. });
  20. this.model =
  21. modelPreference || process.env.DEEPSEEK_MODEL_PREF || "deepseek-chat";
  22. this.limits = {
  23. history: this.promptWindowLimit() * 0.15,
  24. system: this.promptWindowLimit() * 0.15,
  25. user: this.promptWindowLimit() * 0.7,
  26. };
  27. this.embedder = embedder ?? new NativeEmbedder();
  28. this.defaultTemp = 0.7;
  29. this.log("Initialized with model:", this.model);
  30. }
  31. log(text, ...args) {
  32. console.log(`\x1b[36m[${this.constructor.name}]\x1b[0m ${text}`, ...args);
  33. }
  34. #appendContext(contextTexts = []) {
  35. if (!contextTexts || !contextTexts.length) return "";
  36. return (
  37. "\nContext:\n" +
  38. contextTexts
  39. .map((text, i) => {
  40. return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
  41. })
  42. .join("")
  43. );
  44. }
  45. streamingEnabled() {
  46. return "streamGetChatCompletion" in this;
  47. }
  48. static promptWindowLimit(modelName) {
  49. return MODEL_MAP.deepseek[modelName] ?? 8192;
  50. }
  51. promptWindowLimit() {
  52. return MODEL_MAP.deepseek[this.model] ?? 8192;
  53. }
  54. async isValidChatCompletionModel(modelName = "") {
  55. const models = await this.openai.models.list().catch(() => ({ data: [] }));
  56. return models.data.some((model) => model.id === modelName);
  57. }
  58. constructPrompt({
  59. systemPrompt = "",
  60. contextTexts = [],
  61. chatHistory = [],
  62. userPrompt = "",
  63. }) {
  64. const prompt = {
  65. role: "system",
  66. content: `${systemPrompt}${this.#appendContext(contextTexts)}`,
  67. };
  68. return [prompt, ...chatHistory, { role: "user", content: userPrompt }];
  69. }
  70. /**
  71. * Parses and prepends reasoning from the response and returns the full text response.
  72. * @param {Object} response
  73. * @returns {string}
  74. */
  75. #parseReasoningFromResponse({ message }) {
  76. let textResponse = message?.content;
  77. if (
  78. !!message?.reasoning_content &&
  79. message.reasoning_content.trim().length > 0
  80. )
  81. textResponse = `<think>${message.reasoning_content}</think>${textResponse}`;
  82. return textResponse;
  83. }
  84. async getChatCompletion(messages = null, { temperature = 0.7 }) {
  85. if (!(await this.isValidChatCompletionModel(this.model)))
  86. throw new Error(
  87. `DeepSeek chat: ${this.model} is not valid for chat completion!`
  88. );
  89. const result = await LLMPerformanceMonitor.measureAsyncFunction(
  90. this.openai.chat.completions
  91. .create({
  92. model: this.model,
  93. messages,
  94. temperature,
  95. })
  96. .catch((e) => {
  97. throw new Error(e.message);
  98. })
  99. );
  100. if (
  101. !result?.output?.hasOwnProperty("choices") ||
  102. result?.output?.choices?.length === 0
  103. )
  104. throw new Error(
  105. `Invalid response body returned from DeepSeek: ${JSON.stringify(result.output)}`
  106. );
  107. return {
  108. textResponse: this.#parseReasoningFromResponse(result.output.choices[0]),
  109. metrics: {
  110. prompt_tokens: result.output.usage.prompt_tokens || 0,
  111. completion_tokens: result.output.usage.completion_tokens || 0,
  112. total_tokens: result.output.usage.total_tokens || 0,
  113. outputTps: result.output.usage.completion_tokens / result.duration,
  114. duration: result.duration,
  115. },
  116. };
  117. }
  118. async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
  119. if (!(await this.isValidChatCompletionModel(this.model)))
  120. throw new Error(
  121. `DeepSeek chat: ${this.model} is not valid for chat completion!`
  122. );
  123. const measuredStreamRequest = await LLMPerformanceMonitor.measureStream(
  124. this.openai.chat.completions.create({
  125. model: this.model,
  126. stream: true,
  127. messages,
  128. temperature,
  129. }),
  130. messages,
  131. false
  132. );
  133. return measuredStreamRequest;
  134. }
  135. // TODO: This is a copy of the generic handleStream function in responses.js
  136. // to specifically handle the DeepSeek reasoning model `reasoning_content` field.
  137. // When or if ever possible, we should refactor this to be in the generic function.
  138. handleStream(response, stream, responseProps) {
  139. const { uuid = uuidv4(), sources = [] } = responseProps;
  140. let hasUsageMetrics = false;
  141. let usage = {
  142. completion_tokens: 0,
  143. };
  144. return new Promise(async (resolve) => {
  145. let fullText = "";
  146. let reasoningText = "";
  147. // Establish listener to early-abort a streaming response
  148. // in case things go sideways or the user does not like the response.
  149. // We preserve the generated text but continue as if chat was completed
  150. // to preserve previously generated content.
  151. const handleAbort = () => {
  152. stream?.endMeasurement(usage);
  153. clientAbortedHandler(resolve, fullText);
  154. };
  155. response.on("close", handleAbort);
  156. try {
  157. for await (const chunk of stream) {
  158. const message = chunk?.choices?.[0];
  159. const token = message?.delta?.content;
  160. const reasoningToken = message?.delta?.reasoning_content;
  161. if (
  162. chunk.hasOwnProperty("usage") && // exists
  163. !!chunk.usage && // is not null
  164. Object.values(chunk.usage).length > 0 // has values
  165. ) {
  166. if (chunk.usage.hasOwnProperty("prompt_tokens")) {
  167. usage.prompt_tokens = Number(chunk.usage.prompt_tokens);
  168. }
  169. if (chunk.usage.hasOwnProperty("completion_tokens")) {
  170. hasUsageMetrics = true; // to stop estimating counter
  171. usage.completion_tokens = Number(chunk.usage.completion_tokens);
  172. }
  173. }
  174. // Reasoning models will always return the reasoning text before the token text.
  175. if (reasoningToken) {
  176. // If the reasoning text is empty (''), we need to initialize it
  177. // and send the first chunk of reasoning text.
  178. if (reasoningText.length === 0) {
  179. writeResponseChunk(response, {
  180. uuid,
  181. sources: [],
  182. type: "textResponseChunk",
  183. textResponse: `<think>${reasoningToken}`,
  184. close: false,
  185. error: false,
  186. });
  187. reasoningText += `<think>${reasoningToken}`;
  188. continue;
  189. } else {
  190. writeResponseChunk(response, {
  191. uuid,
  192. sources: [],
  193. type: "textResponseChunk",
  194. textResponse: reasoningToken,
  195. close: false,
  196. error: false,
  197. });
  198. reasoningText += reasoningToken;
  199. }
  200. }
  201. // If the reasoning text is not empty, but the reasoning token is empty
  202. // and the token text is not empty we need to close the reasoning text and begin sending the token text.
  203. if (!!reasoningText && !reasoningToken && token) {
  204. writeResponseChunk(response, {
  205. uuid,
  206. sources: [],
  207. type: "textResponseChunk",
  208. textResponse: `</think>`,
  209. close: false,
  210. error: false,
  211. });
  212. fullText += `${reasoningText}</think>`;
  213. reasoningText = "";
  214. }
  215. if (token) {
  216. fullText += token;
  217. // If we never saw a usage metric, we can estimate them by number of completion chunks
  218. if (!hasUsageMetrics) usage.completion_tokens++;
  219. writeResponseChunk(response, {
  220. uuid,
  221. sources: [],
  222. type: "textResponseChunk",
  223. textResponse: token,
  224. close: false,
  225. error: false,
  226. });
  227. }
  228. // LocalAi returns '' and others return null on chunks - the last chunk is not "" or null.
  229. // Either way, the key `finish_reason` must be present to determine ending chunk.
  230. if (
  231. message?.hasOwnProperty("finish_reason") && // Got valid message and it is an object with finish_reason
  232. message.finish_reason !== "" &&
  233. message.finish_reason !== null
  234. ) {
  235. writeResponseChunk(response, {
  236. uuid,
  237. sources,
  238. type: "textResponseChunk",
  239. textResponse: "",
  240. close: true,
  241. error: false,
  242. });
  243. response.removeListener("close", handleAbort);
  244. stream?.endMeasurement(usage);
  245. resolve(fullText);
  246. break; // Break streaming when a valid finish_reason is first encountered
  247. }
  248. }
  249. } catch (e) {
  250. console.log(`\x1b[43m\x1b[34m[STREAMING ERROR]\x1b[0m ${e.message}`);
  251. writeResponseChunk(response, {
  252. uuid,
  253. type: "abort",
  254. textResponse: null,
  255. sources: [],
  256. close: true,
  257. error: e.message,
  258. });
  259. stream?.endMeasurement(usage);
  260. resolve(fullText); // Return what we currently have - if anything.
  261. }
  262. });
  263. }
  264. async embedTextInput(textInput) {
  265. return await this.embedder.embedTextInput(textInput);
  266. }
  267. async embedChunks(textChunks = []) {
  268. return await this.embedder.embedChunks(textChunks);
  269. }
  270. async compressMessages(promptArgs = {}, rawHistory = []) {
  271. const { messageArrayCompressor } = require("../../helpers/chat");
  272. const messageArray = this.constructPrompt(promptArgs);
  273. return await messageArrayCompressor(this, messageArray, rawHistory);
  274. }
  275. }
  276. module.exports = {
  277. DeepSeekLLM,
  278. };