You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

294 lines
8.6 KiB

11 months ago
  1. const { v4 } = require("uuid");
  2. const {
  3. writeResponseChunk,
  4. clientAbortedHandler,
  5. formatChatHistory,
  6. } = require("../../helpers/chat/responses");
  7. const { NativeEmbedder } = require("../../EmbeddingEngines/native");
  8. const { MODEL_MAP } = require("../modelMap");
  9. const {
  10. LLMPerformanceMonitor,
  11. } = require("../../helpers/chat/LLMPerformanceMonitor");
  12. class AnthropicLLM {
  13. constructor(embedder = null, modelPreference = null) {
  14. if (!process.env.ANTHROPIC_API_KEY)
  15. throw new Error("No Anthropic API key was set.");
  16. // Docs: https://www.npmjs.com/package/@anthropic-ai/sdk
  17. const AnthropicAI = require("@anthropic-ai/sdk");
  18. const anthropic = new AnthropicAI({
  19. apiKey: process.env.ANTHROPIC_API_KEY,
  20. });
  21. this.anthropic = anthropic;
  22. this.model =
  23. modelPreference || process.env.ANTHROPIC_MODEL_PREF || "claude-2.0";
  24. this.limits = {
  25. history: this.promptWindowLimit() * 0.15,
  26. system: this.promptWindowLimit() * 0.15,
  27. user: this.promptWindowLimit() * 0.7,
  28. };
  29. this.embedder = embedder ?? new NativeEmbedder();
  30. this.defaultTemp = 0.7;
  31. }
  32. streamingEnabled() {
  33. return "streamGetChatCompletion" in this;
  34. }
  35. static promptWindowLimit(modelName) {
  36. return MODEL_MAP.anthropic[modelName] ?? 100_000;
  37. }
  38. promptWindowLimit() {
  39. return MODEL_MAP.anthropic[this.model] ?? 100_000;
  40. }
  41. isValidChatCompletionModel(modelName = "") {
  42. const validModels = [
  43. "claude-instant-1.2",
  44. "claude-2.0",
  45. "claude-2.1",
  46. "claude-3-haiku-20240307",
  47. "claude-3-sonnet-20240229",
  48. "claude-3-opus-latest",
  49. "claude-3-5-haiku-latest",
  50. "claude-3-5-haiku-20241022",
  51. "claude-3-5-sonnet-latest",
  52. "claude-3-5-sonnet-20241022",
  53. "claude-3-5-sonnet-20240620",
  54. ];
  55. return validModels.includes(modelName);
  56. }
  57. /**
  58. * Generates appropriate content array for a message + attachments.
  59. * @param {{userPrompt:string, attachments: import("../../helpers").Attachment[]}}
  60. * @returns {string|object[]}
  61. */
  62. #generateContent({ userPrompt, attachments = [] }) {
  63. if (!attachments.length) {
  64. return userPrompt;
  65. }
  66. const content = [{ type: "text", text: userPrompt }];
  67. for (let attachment of attachments) {
  68. content.push({
  69. type: "image",
  70. source: {
  71. type: "base64",
  72. media_type: attachment.mime,
  73. data: attachment.contentString.split("base64,")[1],
  74. },
  75. });
  76. }
  77. return content.flat();
  78. }
  79. constructPrompt({
  80. systemPrompt = "",
  81. contextTexts = [],
  82. chatHistory = [],
  83. userPrompt = "",
  84. attachments = [], // This is the specific attachment for only this prompt
  85. }) {
  86. const prompt = {
  87. role: "system",
  88. content: `${systemPrompt}${this.#appendContext(contextTexts)}`,
  89. };
  90. return [
  91. prompt,
  92. ...formatChatHistory(chatHistory, this.#generateContent),
  93. {
  94. role: "user",
  95. content: this.#generateContent({ userPrompt, attachments }),
  96. },
  97. ];
  98. }
  99. async getChatCompletion(messages = null, { temperature = 0.7 }) {
  100. if (!this.isValidChatCompletionModel(this.model))
  101. throw new Error(
  102. `Anthropic chat: ${this.model} is not valid for chat completion!`
  103. );
  104. try {
  105. const result = await LLMPerformanceMonitor.measureAsyncFunction(
  106. this.anthropic.messages.create({
  107. model: this.model,
  108. max_tokens: 4096,
  109. system: messages[0].content, // Strip out the system message
  110. messages: messages.slice(1), // Pop off the system message
  111. temperature: Number(temperature ?? this.defaultTemp),
  112. })
  113. );
  114. const promptTokens = result.output.usage.input_tokens;
  115. const completionTokens = result.output.usage.output_tokens;
  116. return {
  117. textResponse: result.output.content[0].text,
  118. metrics: {
  119. prompt_tokens: promptTokens,
  120. completion_tokens: completionTokens,
  121. total_tokens: promptTokens + completionTokens,
  122. outputTps: completionTokens / result.duration,
  123. duration: result.duration,
  124. },
  125. };
  126. } catch (error) {
  127. console.log(error);
  128. return { textResponse: error, metrics: {} };
  129. }
  130. }
  131. async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
  132. if (!this.isValidChatCompletionModel(this.model))
  133. throw new Error(
  134. `Anthropic chat: ${this.model} is not valid for chat completion!`
  135. );
  136. const measuredStreamRequest = await LLMPerformanceMonitor.measureStream(
  137. this.anthropic.messages.stream({
  138. model: this.model,
  139. max_tokens: 4096,
  140. system: messages[0].content, // Strip out the system message
  141. messages: messages.slice(1), // Pop off the system message
  142. temperature: Number(temperature ?? this.defaultTemp),
  143. }),
  144. messages,
  145. false
  146. );
  147. return measuredStreamRequest;
  148. }
  149. /**
  150. * Handles the stream response from the Anthropic API.
  151. * @param {Object} response - the response object
  152. * @param {import('../../helpers/chat/LLMPerformanceMonitor').MonitoredStream} stream - the stream response from the Anthropic API w/tracking
  153. * @param {Object} responseProps - the response properties
  154. * @returns {Promise<string>}
  155. */
  156. handleStream(response, stream, responseProps) {
  157. return new Promise((resolve) => {
  158. let fullText = "";
  159. const { uuid = v4(), sources = [] } = responseProps;
  160. let usage = {
  161. prompt_tokens: 0,
  162. completion_tokens: 0,
  163. };
  164. // Establish listener to early-abort a streaming response
  165. // in case things go sideways or the user does not like the response.
  166. // We preserve the generated text but continue as if chat was completed
  167. // to preserve previously generated content.
  168. const handleAbort = () => {
  169. stream?.endMeasurement(usage);
  170. clientAbortedHandler(resolve, fullText);
  171. };
  172. response.on("close", handleAbort);
  173. stream.on("error", (event) => {
  174. const parseErrorMsg = (event) => {
  175. const error = event?.error?.error;
  176. if (!!error)
  177. return `Anthropic Error:${error?.type || "unknown"} ${
  178. error?.message || "unknown error."
  179. }`;
  180. return event.message;
  181. };
  182. writeResponseChunk(response, {
  183. uuid,
  184. sources: [],
  185. type: "abort",
  186. textResponse: null,
  187. close: true,
  188. error: parseErrorMsg(event),
  189. });
  190. response.removeListener("close", handleAbort);
  191. stream?.endMeasurement(usage);
  192. resolve(fullText);
  193. });
  194. stream.on("streamEvent", (message) => {
  195. const data = message;
  196. if (data.type === "message_start")
  197. usage.prompt_tokens = data?.message?.usage?.input_tokens;
  198. if (data.type === "message_delta")
  199. usage.completion_tokens = data?.usage?.output_tokens;
  200. if (
  201. data.type === "content_block_delta" &&
  202. data.delta.type === "text_delta"
  203. ) {
  204. const text = data.delta.text;
  205. fullText += text;
  206. writeResponseChunk(response, {
  207. uuid,
  208. sources,
  209. type: "textResponseChunk",
  210. textResponse: text,
  211. close: false,
  212. error: false,
  213. });
  214. }
  215. if (
  216. message.type === "message_stop" ||
  217. (data.stop_reason && data.stop_reason === "end_turn")
  218. ) {
  219. writeResponseChunk(response, {
  220. uuid,
  221. sources,
  222. type: "textResponseChunk",
  223. textResponse: "",
  224. close: true,
  225. error: false,
  226. });
  227. response.removeListener("close", handleAbort);
  228. stream?.endMeasurement(usage);
  229. resolve(fullText);
  230. }
  231. });
  232. });
  233. }
  234. #appendContext(contextTexts = []) {
  235. if (!contextTexts || !contextTexts.length) return "";
  236. return (
  237. "\nContext:\n" +
  238. contextTexts
  239. .map((text, i) => {
  240. return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
  241. })
  242. .join("")
  243. );
  244. }
  245. async compressMessages(promptArgs = {}, rawHistory = []) {
  246. const { messageStringCompressor } = require("../../helpers/chat");
  247. const compressedPrompt = await messageStringCompressor(
  248. this,
  249. promptArgs,
  250. rawHistory
  251. );
  252. return compressedPrompt;
  253. }
  254. // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
  255. async embedTextInput(textInput) {
  256. return await this.embedder.embedTextInput(textInput);
  257. }
  258. async embedChunks(textChunks = []) {
  259. return await this.embedder.embedChunks(textChunks);
  260. }
  261. }
  262. module.exports = {
  263. AnthropicLLM,
  264. };