You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

104 lines
3.6 KiB

11 months ago
  1. const { maximumChunkLength } = require("../../helpers");
  2. const { Ollama } = require("ollama");
  3. class OllamaEmbedder {
  4. constructor() {
  5. if (!process.env.EMBEDDING_BASE_PATH)
  6. throw new Error("No embedding base path was set.");
  7. if (!process.env.EMBEDDING_MODEL_PREF)
  8. throw new Error("No embedding model was set.");
  9. this.basePath = process.env.EMBEDDING_BASE_PATH;
  10. this.model = process.env.EMBEDDING_MODEL_PREF;
  11. // Limit of how many strings we can process in a single pass to stay with resource or network limits
  12. this.maxConcurrentChunks = 1;
  13. this.embeddingMaxChunkLength = maximumChunkLength();
  14. this.client = new Ollama({ host: this.basePath });
  15. this.log(
  16. `initialized with model ${this.model} at ${this.basePath}. num_ctx: ${this.embeddingMaxChunkLength}`
  17. );
  18. }
  19. log(text, ...args) {
  20. console.log(`\x1b[36m[${this.constructor.name}]\x1b[0m ${text}`, ...args);
  21. }
  22. /**
  23. * Checks if the Ollama service is alive by pinging the base path.
  24. * @returns {Promise<boolean>} - A promise that resolves to true if the service is alive, false otherwise.
  25. */
  26. async #isAlive() {
  27. return await fetch(this.basePath)
  28. .then((res) => res.ok)
  29. .catch((e) => {
  30. this.log(e.message);
  31. return false;
  32. });
  33. }
  34. async embedTextInput(textInput) {
  35. const result = await this.embedChunks(
  36. Array.isArray(textInput) ? textInput : [textInput]
  37. );
  38. return result?.[0] || [];
  39. }
  40. /**
  41. * This function takes an array of text chunks and embeds them using the Ollama API.
  42. * chunks are processed sequentially to avoid overwhelming the API with too many requests
  43. * or running out of resources on the endpoint running the ollama instance.
  44. *
  45. * We will use the num_ctx option to set the maximum context window to the max chunk length defined by the user in the settings
  46. * so that the maximum context window is used and content is not truncated.
  47. *
  48. * We also assume the default keep alive option. This could cause issues with models being unloaded and reloaded
  49. * on load memory machines, but that is simply a user-end issue we cannot control. If the LLM and embedder are
  50. * constantly being loaded and unloaded, the user should use another LLM or Embedder to avoid this issue.
  51. * @param {string[]} textChunks - An array of text chunks to embed.
  52. * @returns {Promise<Array<number[]>>} - A promise that resolves to an array of embeddings.
  53. */
  54. async embedChunks(textChunks = []) {
  55. if (!(await this.#isAlive()))
  56. throw new Error(
  57. `Ollama service could not be reached. Is Ollama running?`
  58. );
  59. this.log(
  60. `Embedding ${textChunks.length} chunks of text with ${this.model}.`
  61. );
  62. let data = [];
  63. let error = null;
  64. for (const chunk of textChunks) {
  65. try {
  66. const res = await this.client.embeddings({
  67. model: this.model,
  68. prompt: chunk,
  69. options: {
  70. // Always set the num_ctx to the max chunk length defined by the user in the settings
  71. // so that the maximum context window is used and content is not truncated.
  72. num_ctx: this.embeddingMaxChunkLength,
  73. },
  74. });
  75. const { embedding } = res;
  76. if (!Array.isArray(embedding) || embedding.length === 0)
  77. throw new Error("Ollama returned an empty embedding for chunk!");
  78. data.push(embedding);
  79. } catch (err) {
  80. this.log(err.message);
  81. error = err.message;
  82. data = [];
  83. break;
  84. }
  85. }
  86. if (!!error) throw new Error(`Ollama Failed to embed: ${error}`);
  87. return data.length > 0 ? data : null;
  88. }
  89. }
  90. module.exports = {
  91. OllamaEmbedder,
  92. };