You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

90 lines
2.8 KiB

11 months ago
  1. const { toChunks } = require("../../helpers");
  2. class OpenAiEmbedder {
  3. constructor() {
  4. if (!process.env.OPEN_AI_KEY) throw new Error("No OpenAI API key was set.");
  5. const { OpenAI: OpenAIApi } = require("openai");
  6. this.openai = new OpenAIApi({
  7. apiKey: process.env.OPEN_AI_KEY,
  8. });
  9. this.model = process.env.EMBEDDING_MODEL_PREF || "text-embedding-ada-002";
  10. // Limit of how many strings we can process in a single pass to stay with resource or network limits
  11. this.maxConcurrentChunks = 500;
  12. // https://platform.openai.com/docs/guides/embeddings/embedding-models
  13. this.embeddingMaxChunkLength = 8_191;
  14. }
  15. async embedTextInput(textInput) {
  16. const result = await this.embedChunks(
  17. Array.isArray(textInput) ? textInput : [textInput]
  18. );
  19. return result?.[0] || [];
  20. }
  21. async embedChunks(textChunks = []) {
  22. // Because there is a hard POST limit on how many chunks can be sent at once to OpenAI (~8mb)
  23. // we concurrently execute each max batch of text chunks possible.
  24. // Refer to constructor maxConcurrentChunks for more info.
  25. const embeddingRequests = [];
  26. for (const chunk of toChunks(textChunks, this.maxConcurrentChunks)) {
  27. embeddingRequests.push(
  28. new Promise((resolve) => {
  29. this.openai.embeddings
  30. .create({
  31. model: this.model,
  32. input: chunk,
  33. })
  34. .then((result) => {
  35. resolve({ data: result?.data, error: null });
  36. })
  37. .catch((e) => {
  38. e.type =
  39. e?.response?.data?.error?.code ||
  40. e?.response?.status ||
  41. "failed_to_embed";
  42. e.message = e?.response?.data?.error?.message || e.message;
  43. resolve({ data: [], error: e });
  44. });
  45. })
  46. );
  47. }
  48. const { data = [], error = null } = await Promise.all(
  49. embeddingRequests
  50. ).then((results) => {
  51. // If any errors were returned from OpenAI abort the entire sequence because the embeddings
  52. // will be incomplete.
  53. const errors = results
  54. .filter((res) => !!res.error)
  55. .map((res) => res.error)
  56. .flat();
  57. if (errors.length > 0) {
  58. let uniqueErrors = new Set();
  59. errors.map((error) =>
  60. uniqueErrors.add(`[${error.type}]: ${error.message}`)
  61. );
  62. return {
  63. data: [],
  64. error: Array.from(uniqueErrors).join(", "),
  65. };
  66. }
  67. return {
  68. data: results.map((res) => res?.data || []).flat(),
  69. error: null,
  70. };
  71. });
  72. if (!!error) throw new Error(`OpenAI Failed to embed: ${error}`);
  73. return data.length > 0 &&
  74. data.every((embd) => embd.hasOwnProperty("embedding"))
  75. ? data.map((embd) => embd.embedding)
  76. : null;
  77. }
  78. }
  79. module.exports = {
  80. OpenAiEmbedder,
  81. };