You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

191 lines
5.4 KiB

11 months ago
  1. const { NativeEmbedder } = require("../../EmbeddingEngines/native");
  2. const {
  3. LLMPerformanceMonitor,
  4. } = require("../../helpers/chat/LLMPerformanceMonitor");
  5. const {
  6. handleDefaultStreamResponseV2,
  7. formatChatHistory,
  8. } = require("../../helpers/chat/responses");
  9. class LocalAiLLM {
  10. constructor(embedder = null, modelPreference = null) {
  11. if (!process.env.LOCAL_AI_BASE_PATH)
  12. throw new Error("No LocalAI Base Path was set.");
  13. const { OpenAI: OpenAIApi } = require("openai");
  14. this.openai = new OpenAIApi({
  15. baseURL: process.env.LOCAL_AI_BASE_PATH,
  16. apiKey: process.env.LOCAL_AI_API_KEY ?? null,
  17. });
  18. this.model = modelPreference || process.env.LOCAL_AI_MODEL_PREF;
  19. this.limits = {
  20. history: this.promptWindowLimit() * 0.15,
  21. system: this.promptWindowLimit() * 0.15,
  22. user: this.promptWindowLimit() * 0.7,
  23. };
  24. this.embedder = embedder ?? new NativeEmbedder();
  25. this.defaultTemp = 0.7;
  26. }
  27. #appendContext(contextTexts = []) {
  28. if (!contextTexts || !contextTexts.length) return "";
  29. return (
  30. "\nContext:\n" +
  31. contextTexts
  32. .map((text, i) => {
  33. return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
  34. })
  35. .join("")
  36. );
  37. }
  38. streamingEnabled() {
  39. return "streamGetChatCompletion" in this;
  40. }
  41. static promptWindowLimit(_modelName) {
  42. const limit = process.env.LOCAL_AI_MODEL_TOKEN_LIMIT || 4096;
  43. if (!limit || isNaN(Number(limit)))
  44. throw new Error("No LocalAi token context limit was set.");
  45. return Number(limit);
  46. }
  47. // Ensure the user set a value for the token limit
  48. // and if undefined - assume 4096 window.
  49. promptWindowLimit() {
  50. const limit = process.env.LOCAL_AI_MODEL_TOKEN_LIMIT || 4096;
  51. if (!limit || isNaN(Number(limit)))
  52. throw new Error("No LocalAi token context limit was set.");
  53. return Number(limit);
  54. }
  55. async isValidChatCompletionModel(_ = "") {
  56. return true;
  57. }
  58. /**
  59. * Generates appropriate content array for a message + attachments.
  60. * @param {{userPrompt:string, attachments: import("../../helpers").Attachment[]}}
  61. * @returns {string|object[]}
  62. */
  63. #generateContent({ userPrompt, attachments = [] }) {
  64. if (!attachments.length) {
  65. return userPrompt;
  66. }
  67. const content = [{ type: "text", text: userPrompt }];
  68. for (let attachment of attachments) {
  69. content.push({
  70. type: "image_url",
  71. image_url: {
  72. url: attachment.contentString,
  73. },
  74. });
  75. }
  76. return content.flat();
  77. }
  78. /**
  79. * Construct the user prompt for this model.
  80. * @param {{attachments: import("../../helpers").Attachment[]}} param0
  81. * @returns
  82. */
  83. constructPrompt({
  84. systemPrompt = "",
  85. contextTexts = [],
  86. chatHistory = [],
  87. userPrompt = "",
  88. attachments = [],
  89. }) {
  90. const prompt = {
  91. role: "system",
  92. content: `${systemPrompt}${this.#appendContext(contextTexts)}`,
  93. };
  94. return [
  95. prompt,
  96. ...formatChatHistory(chatHistory, this.#generateContent),
  97. {
  98. role: "user",
  99. content: this.#generateContent({ userPrompt, attachments }),
  100. },
  101. ];
  102. }
  103. async getChatCompletion(messages = null, { temperature = 0.7 }) {
  104. if (!(await this.isValidChatCompletionModel(this.model)))
  105. throw new Error(
  106. `LocalAI chat: ${this.model} is not valid for chat completion!`
  107. );
  108. const result = await LLMPerformanceMonitor.measureAsyncFunction(
  109. this.openai.chat.completions.create({
  110. model: this.model,
  111. messages,
  112. temperature,
  113. })
  114. );
  115. if (
  116. !result.output.hasOwnProperty("choices") ||
  117. result.output.choices.length === 0
  118. )
  119. return null;
  120. const promptTokens = LLMPerformanceMonitor.countTokens(messages);
  121. const completionTokens = LLMPerformanceMonitor.countTokens(
  122. result.output.choices[0].message.content
  123. );
  124. return {
  125. textResponse: result.output.choices[0].message.content,
  126. metrics: {
  127. prompt_tokens: promptTokens,
  128. completion_tokens: completionTokens,
  129. total_tokens: promptTokens + completionTokens,
  130. outputTps: completionTokens / result.duration,
  131. duration: result.duration,
  132. },
  133. };
  134. }
  135. async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
  136. if (!(await this.isValidChatCompletionModel(this.model)))
  137. throw new Error(
  138. `LocalAi chat: ${this.model} is not valid for chat completion!`
  139. );
  140. const measuredStreamRequest = await LLMPerformanceMonitor.measureStream(
  141. this.openai.chat.completions.create({
  142. model: this.model,
  143. stream: true,
  144. messages,
  145. temperature,
  146. }),
  147. messages
  148. );
  149. return measuredStreamRequest;
  150. }
  151. handleStream(response, stream, responseProps) {
  152. return handleDefaultStreamResponseV2(response, stream, responseProps);
  153. }
  154. // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
  155. async embedTextInput(textInput) {
  156. return await this.embedder.embedTextInput(textInput);
  157. }
  158. async embedChunks(textChunks = []) {
  159. return await this.embedder.embedChunks(textChunks);
  160. }
  161. async compressMessages(promptArgs = {}, rawHistory = []) {
  162. const { messageArrayCompressor } = require("../../helpers/chat");
  163. const messageArray = this.constructPrompt(promptArgs);
  164. return await messageArrayCompressor(this, messageArray, rawHistory);
  165. }
  166. }
  167. module.exports = {
  168. LocalAiLLM,
  169. };