You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

190 lines
5.3 KiB

11 months ago
  1. const { NativeEmbedder } = require("../../EmbeddingEngines/native");
  2. const {
  3. handleDefaultStreamResponseV2,
  4. formatChatHistory,
  5. } = require("../../helpers/chat/responses");
  6. const {
  7. LLMPerformanceMonitor,
  8. } = require("../../helpers/chat/LLMPerformanceMonitor");
  9. class TextGenWebUILLM {
  10. constructor(embedder = null) {
  11. const { OpenAI: OpenAIApi } = require("openai");
  12. if (!process.env.TEXT_GEN_WEB_UI_BASE_PATH)
  13. throw new Error(
  14. "TextGenWebUI must have a valid base path to use for the api."
  15. );
  16. this.basePath = process.env.TEXT_GEN_WEB_UI_BASE_PATH;
  17. this.openai = new OpenAIApi({
  18. baseURL: this.basePath,
  19. apiKey: process.env.TEXT_GEN_WEB_UI_API_KEY ?? null,
  20. });
  21. this.model = null;
  22. this.limits = {
  23. history: this.promptWindowLimit() * 0.15,
  24. system: this.promptWindowLimit() * 0.15,
  25. user: this.promptWindowLimit() * 0.7,
  26. };
  27. this.embedder = embedder ?? new NativeEmbedder();
  28. this.defaultTemp = 0.7;
  29. this.log(`Inference API: ${this.basePath} Model: ${this.model}`);
  30. }
  31. log(text, ...args) {
  32. console.log(`\x1b[36m[${this.constructor.name}]\x1b[0m ${text}`, ...args);
  33. }
  34. #appendContext(contextTexts = []) {
  35. if (!contextTexts || !contextTexts.length) return "";
  36. return (
  37. "\nContext:\n" +
  38. contextTexts
  39. .map((text, i) => {
  40. return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
  41. })
  42. .join("")
  43. );
  44. }
  45. streamingEnabled() {
  46. return "streamGetChatCompletion" in this;
  47. }
  48. static promptWindowLimit(_modelName) {
  49. const limit = process.env.TEXT_GEN_WEB_UI_MODEL_TOKEN_LIMIT || 4096;
  50. if (!limit || isNaN(Number(limit)))
  51. throw new Error("No token context limit was set.");
  52. return Number(limit);
  53. }
  54. // Ensure the user set a value for the token limit
  55. // and if undefined - assume 4096 window.
  56. promptWindowLimit() {
  57. const limit = process.env.TEXT_GEN_WEB_UI_MODEL_TOKEN_LIMIT || 4096;
  58. if (!limit || isNaN(Number(limit)))
  59. throw new Error("No token context limit was set.");
  60. return Number(limit);
  61. }
  62. // Short circuit since we have no idea if the model is valid or not
  63. // in pre-flight for generic endpoints
  64. isValidChatCompletionModel(_modelName = "") {
  65. return true;
  66. }
  67. /**
  68. * Generates appropriate content array for a message + attachments.
  69. * @param {{userPrompt:string, attachments: import("../../helpers").Attachment[]}}
  70. * @returns {string|object[]}
  71. */
  72. #generateContent({ userPrompt, attachments = [] }) {
  73. if (!attachments.length) {
  74. return userPrompt;
  75. }
  76. const content = [{ type: "text", text: userPrompt }];
  77. for (let attachment of attachments) {
  78. content.push({
  79. type: "image_url",
  80. image_url: {
  81. url: attachment.contentString,
  82. },
  83. });
  84. }
  85. return content.flat();
  86. }
  87. /**
  88. * Construct the user prompt for this model.
  89. * @param {{attachments: import("../../helpers").Attachment[]}} param0
  90. * @returns
  91. */
  92. constructPrompt({
  93. systemPrompt = "",
  94. contextTexts = [],
  95. chatHistory = [],
  96. userPrompt = "",
  97. attachments = [],
  98. }) {
  99. const prompt = {
  100. role: "system",
  101. content: `${systemPrompt}${this.#appendContext(contextTexts)}`,
  102. };
  103. return [
  104. prompt,
  105. ...formatChatHistory(chatHistory, this.#generateContent),
  106. {
  107. role: "user",
  108. content: this.#generateContent({ userPrompt, attachments }),
  109. },
  110. ];
  111. }
  112. async getChatCompletion(messages = null, { temperature = 0.7 }) {
  113. const result = await LLMPerformanceMonitor.measureAsyncFunction(
  114. this.openai.chat.completions
  115. .create({
  116. model: this.model,
  117. messages,
  118. temperature,
  119. })
  120. .catch((e) => {
  121. throw new Error(e.message);
  122. })
  123. );
  124. if (
  125. !result.output.hasOwnProperty("choices") ||
  126. result.output.choices.length === 0
  127. )
  128. return null;
  129. return {
  130. textResponse: result.output.choices[0].message.content,
  131. metrics: {
  132. prompt_tokens: result.output.usage?.prompt_tokens || 0,
  133. completion_tokens: result.output.usage?.completion_tokens || 0,
  134. total_tokens: result.output.usage?.total_tokens || 0,
  135. outputTps: result.output.usage?.completion_tokens / result.duration,
  136. duration: result.duration,
  137. },
  138. };
  139. }
  140. async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
  141. const measuredStreamRequest = await LLMPerformanceMonitor.measureStream(
  142. this.openai.chat.completions.create({
  143. model: this.model,
  144. stream: true,
  145. messages,
  146. temperature,
  147. }),
  148. messages
  149. );
  150. return measuredStreamRequest;
  151. }
  152. handleStream(response, stream, responseProps) {
  153. return handleDefaultStreamResponseV2(response, stream, responseProps);
  154. }
  155. // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
  156. async embedTextInput(textInput) {
  157. return await this.embedder.embedTextInput(textInput);
  158. }
  159. async embedChunks(textChunks = []) {
  160. return await this.embedder.embedChunks(textChunks);
  161. }
  162. async compressMessages(promptArgs = {}, rawHistory = []) {
  163. const { messageArrayCompressor } = require("../../helpers/chat");
  164. const messageArray = this.constructPrompt(promptArgs);
  165. return await messageArrayCompressor(this, messageArray, rawHistory);
  166. }
  167. }
  168. module.exports = {
  169. TextGenWebUILLM,
  170. };