You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

209 lines
5.6 KiB

11 months ago
  1. // Helpers that convert workspace chats to some supported format
  2. // for external use by the user.
  3. const { WorkspaceChats } = require("../../../models/workspaceChats");
  4. const { EmbedChats } = require("../../../models/embedChats");
  5. const { safeJsonParse } = require("../../http");
  6. async function convertToCSV(preparedData) {
  7. const headers = new Set(["id", "workspace", "prompt", "response", "sent_at"]);
  8. preparedData.forEach((item) =>
  9. Object.keys(item).forEach((key) => headers.add(key))
  10. );
  11. const rows = [Array.from(headers).join(",")];
  12. for (const item of preparedData) {
  13. const record = Array.from(headers)
  14. .map((header) => {
  15. const value = item[header] ?? "";
  16. return escapeCsv(String(value));
  17. })
  18. .join(",");
  19. rows.push(record);
  20. }
  21. return rows.join("\n");
  22. }
  23. async function convertToJSON(preparedData) {
  24. return JSON.stringify(preparedData, null, 4);
  25. }
  26. // ref: https://raw.githubusercontent.com/gururise/AlpacaDataCleaned/main/alpaca_data.json
  27. async function convertToJSONAlpaca(preparedData) {
  28. return JSON.stringify(preparedData, null, 4);
  29. }
  30. async function convertToJSONL(workspaceChatsMap) {
  31. return Object.values(workspaceChatsMap)
  32. .map((workspaceChats) => JSON.stringify(workspaceChats))
  33. .join("\n");
  34. }
  35. async function prepareChatsForExport(format = "jsonl", chatType = "workspace") {
  36. if (!exportMap.hasOwnProperty(format))
  37. throw new Error(`Invalid export type: ${format}`);
  38. let chats;
  39. if (chatType === "workspace") {
  40. chats = await WorkspaceChats.whereWithData({}, null, null, {
  41. id: "asc",
  42. });
  43. } else if (chatType === "embed") {
  44. chats = await EmbedChats.whereWithEmbedAndWorkspace(
  45. {},
  46. null,
  47. {
  48. id: "asc",
  49. },
  50. null
  51. );
  52. } else {
  53. throw new Error(`Invalid chat type: ${chatType}`);
  54. }
  55. if (format === "csv" || format === "json") {
  56. const preparedData = chats.map((chat) => {
  57. const responseJson = JSON.parse(chat.response);
  58. const baseData = {
  59. id: chat.id,
  60. prompt: chat.prompt,
  61. response: responseJson.text,
  62. sent_at: chat.createdAt,
  63. };
  64. if (chatType === "embed") {
  65. return {
  66. ...baseData,
  67. workspace: chat.embed_config
  68. ? chat.embed_config.workspace.name
  69. : "unknown workspace",
  70. };
  71. }
  72. return {
  73. ...baseData,
  74. workspace: chat.workspace ? chat.workspace.name : "unknown workspace",
  75. username: chat.user
  76. ? chat.user.username
  77. : chat.api_session_id !== null
  78. ? "API"
  79. : "unknown user",
  80. rating:
  81. chat.feedbackScore === null
  82. ? "--"
  83. : chat.feedbackScore
  84. ? "GOOD"
  85. : "BAD",
  86. };
  87. });
  88. return preparedData;
  89. }
  90. if (format === "jsonAlpaca") {
  91. const preparedData = chats.map((chat) => {
  92. const responseJson = JSON.parse(chat.response);
  93. return {
  94. instruction: buildSystemPrompt(
  95. chat,
  96. chat.workspace ? chat.workspace.openAiPrompt : null
  97. ),
  98. input: chat.prompt,
  99. output: responseJson.text,
  100. };
  101. });
  102. return preparedData;
  103. }
  104. const workspaceChatsMap = chats.reduce((acc, chat) => {
  105. const { prompt, response, workspaceId } = chat;
  106. const responseJson = JSON.parse(response);
  107. if (!acc[workspaceId]) {
  108. acc[workspaceId] = {
  109. messages: [
  110. {
  111. role: "system",
  112. content:
  113. chat.workspace?.openAiPrompt ||
  114. "Given the following conversation, relevant context, and a follow up question, reply with an answer to the current question the user is asking. Return only your response to the question given the above information following the users instructions as needed.",
  115. },
  116. ],
  117. };
  118. }
  119. acc[workspaceId].messages.push(
  120. {
  121. role: "user",
  122. content: prompt,
  123. },
  124. {
  125. role: "assistant",
  126. content: responseJson.text,
  127. }
  128. );
  129. return acc;
  130. }, {});
  131. return workspaceChatsMap;
  132. }
  133. const exportMap = {
  134. json: {
  135. contentType: "application/json",
  136. func: convertToJSON,
  137. },
  138. csv: {
  139. contentType: "text/csv",
  140. func: convertToCSV,
  141. },
  142. jsonl: {
  143. contentType: "application/jsonl",
  144. func: convertToJSONL,
  145. },
  146. jsonAlpaca: {
  147. contentType: "application/json",
  148. func: convertToJSONAlpaca,
  149. },
  150. };
  151. function escapeCsv(str) {
  152. if (str === null || str === undefined) return '""';
  153. return `"${str.replace(/"/g, '""').replace(/\n/g, " ")}"`;
  154. }
  155. async function exportChatsAsType(format = "jsonl", chatType = "workspace") {
  156. const { contentType, func } = exportMap.hasOwnProperty(format)
  157. ? exportMap[format]
  158. : exportMap.jsonl;
  159. const chats = await prepareChatsForExport(format, chatType);
  160. return {
  161. contentType,
  162. data: await func(chats),
  163. };
  164. }
  165. const STANDARD_PROMPT =
  166. "Given the following conversation, relevant context, and a follow up question, reply with an answer to the current question the user is asking. Return only your response to the question given the above information following the users instructions as needed.";
  167. function buildSystemPrompt(chat, prompt = null) {
  168. const sources = safeJsonParse(chat.response)?.sources || [];
  169. const contextTexts = sources.map((source) => source.text);
  170. const context =
  171. sources.length > 0
  172. ? "\nContext:\n" +
  173. contextTexts
  174. .map((text, i) => {
  175. return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
  176. })
  177. .join("")
  178. : "";
  179. return `${prompt ?? STANDARD_PROMPT}${context}`;
  180. }
  181. module.exports = {
  182. prepareChatsForExport,
  183. exportChatsAsType,
  184. };