Make RAG search prompt configurable via API, include search result su…

…mmary in response.
verida · Nov 6, 2024 · d5d3d10 · d5d3d10
1 parent eddb589
commit d5d3d10
Show file tree

Hide file tree

Showing 3 changed files with 94 additions and 36 deletions.
diff --git a/src/services/assistants/interfaces.ts b/src/services/assistants/interfaces.ts
@@ -0,0 +1,29 @@
+import { PromptSearchLLMResponse, PromptSearchType } from "../tools/promptSearch"
+
+export interface PromptSearchServiceDataTypeEmails extends PromptSearchServiceDataType {
+    attachmentLength?: number
+}
+
+export interface PromptSearchServiceDataType {
+    limit?: number,
+    maxLength?: number,
+    outputType?: string
+}
+
+export interface PromptSearchServiceDataTypes {
+    emails?: PromptSearchServiceDataTypeEmails,
+    chatMessages?: PromptSearchServiceDataType,
+    favorites?: PromptSearchServiceDataType,
+    following?: PromptSearchServiceDataType,
+    files?: PromptSearchServiceDataType,
+    calendarEvents?: PromptSearchServiceDataType
+}
+
+export interface PromptSearchServiceConfig {
+    searchType?: PromptSearchType
+    maxContextLength?: number
+    dataTypes?: PromptSearchServiceDataTypes
+    promptSearchConfig?: PromptSearchLLMResponseOptional
+}
+
+export interface PromptSearchLLMResponseOptional extends Partial<PromptSearchLLMResponse> {}
diff --git a/src/services/assistants/search.ts b/src/services/assistants/search.ts
@@ -1,36 +1,52 @@
 const _ = require('lodash')
-import { defaultModel, LLM } from "../llm"
+import { LLM } from "../llm"
 import { PromptSearch, PromptSearchLLMResponse, PromptSearchSort, PromptSearchType } from "../tools/promptSearch"
 import { ChatThreadResult, SearchService, SearchSortType, SearchType } from "../search"
 import { VeridaService } from '../veridaService'
 import { SchemaEmail, SchemaEvent, SchemaFavourite, SchemaFile, SchemaFollowing, SchemaSocialChatMessage } from '../../schemas'
 import { Helpers } from "../helpers"
 import { EmailShortlist } from "../tools/emailShortlist"
-
-const MAX_EMAIL_LENGTH = 500
-const MAX_DOC_LENGTH = 2000
-const MAX_ATTACHMENT_LENGTH = 500
-const MAX_CONTEXT_LENGTH = 20000 // (~5000 tokens)
-
-const MAX_DATERANGE_EMAILS = 30
-const MAX_DATERANGE_CHAT_MESSAGES = 100
-const MAX_DATERANGE_FAVORITES = 30
-const MAX_DATERANGE_FOLLOWING = 30
-const MAX_DATERANGE_FILES = 20
-const MAX_DATERANGE_CALENDAR_EVENT = 20
-
-// "You are a personal assistant with the ability to search the following categories; emails, chat_history and documents. You receive a prompt and generate a JSON response (with no other text) that provides search queries that will source useful information to help answer the prompt. Search queries for each category should contain three properties; \"terms\" (an array of 10 individual words), \"beforeDate\" (results must be before this date), \"afterDate\" (results must be after this date), \"resultType\" (either \"count\" to count results or \"results\" to return the search results), \"filter\" (an array of key, value pairs of fields to filter the results). Categories can be empty if not relevant to the prompt. The current date is 2024-08-12.\n\nHere is an example JSON response:\n{\"email\": {\"terms\": [\"golf\", \"tennis\", \"soccer\"], \"beforeDate\": \"2024-06-01\", \"afterDate\": \"2024-01-10\" \"filter\": {\"from\": \"dave\"}, \"resultType\": \"results}}\n\nHere is the prompt:\nWhat subscriptions do I currently pay for?"
+import { PromptSearchServiceConfig } from "./interfaces"
+
+const DEFAULT_PROMPT_SEARCH_SERVICE_CONFIG: PromptSearchServiceConfig = {
+    maxContextLength: 20000, // (~5000 tokens)
+    dataTypes: {
+        emails: {
+            limit: 30,
+            maxLength: 500,
+            attachmentLength: 500
+        },
+        chatMessages: {
+            limit: 100
+        },
+        favorites: {
+            limit: 30
+        },
+        following: {
+            limit: 30
+        },
+        files: {
+            limit: 30,
+            maxLength: 2000
+        },
+        calendarEvents: {
+            limit: 30
+        },
+    }
+}
 
 export class PromptSearchService extends VeridaService {
 
-    public async prompt(prompt: string, llm: LLM): Promise<{
+    public async prompt(prompt: string, llm: LLM, config: PromptSearchServiceConfig = {}): Promise<{
         result: string,
         duration: number,
         process: PromptSearchLLMResponse
     }> {
+        config = _.merge({}, DEFAULT_PROMPT_SEARCH_SERVICE_CONFIG, config)
+
         const start = Date.now()
         const promptSearch = new PromptSearch(llm)
-        const promptSearchResult = await promptSearch.search(prompt)
+        const promptSearchResult = await promptSearch.search(prompt, undefined, config.promptSearchConfig)
 
         console.log(promptSearchResult)
 
@@ -69,34 +85,34 @@ export class PromptSearchService extends VeridaService {
             const sort = promptSearchResult.sort == PromptSearchSort.RECENT ? SearchSortType.RECENT : SearchSortType.OLDEST
             console.log(`Searching by timeframe: ${maxDatetime} ${sort}`)
             if (promptSearchResult.databases.indexOf(SearchType.EMAILS) !== -1) {
-                emails = await searchService.schemaByDateRange<SchemaEmail>(SearchType.EMAILS, maxDatetime, sort, MAX_DATERANGE_EMAILS*3)
+                emails = await searchService.schemaByDateRange<SchemaEmail>(SearchType.EMAILS, maxDatetime, sort, config.dataTypes.emails.limit*3)
                 const emailShortlist = new EmailShortlist(llm)
-                emails = await emailShortlist.shortlist(prompt, emails, MAX_DATERANGE_EMAILS)
+                emails = await emailShortlist.shortlist(prompt, emails, config.dataTypes.emails.limit)
             }
             if (promptSearchResult.databases.indexOf(SearchType.FILES) !== -1) {
-                files = await searchService.schemaByDateRange<SchemaFile>(SearchType.FILES, maxDatetime, sort, MAX_DATERANGE_FILES)
+                files = await searchService.schemaByDateRange<SchemaFile>(SearchType.FILES, maxDatetime, sort, config.dataTypes.files.limit)
             }
             if (promptSearchResult.databases.indexOf(SearchType.FAVORITES) !== -1) {
-                favourites = await searchService.schemaByDateRange<SchemaFavourite>(SearchType.FAVORITES, maxDatetime, sort, MAX_DATERANGE_FAVORITES)
+                favourites = await searchService.schemaByDateRange<SchemaFavourite>(SearchType.FAVORITES, maxDatetime, sort, config.dataTypes.favorites.limit)
             }
             if (promptSearchResult.databases.indexOf(SearchType.FOLLOWING) !== -1) {
-                following = await searchService.schemaByDateRange<SchemaFollowing>(SearchType.FOLLOWING, maxDatetime, sort, MAX_DATERANGE_FOLLOWING)
+                following = await searchService.schemaByDateRange<SchemaFollowing>(SearchType.FOLLOWING, maxDatetime, sort, config.dataTypes.following.limit)
             }
             if (promptSearchResult.databases.indexOf(SearchType.CHAT_MESSAGES) !== -1) {
-                chatMessages = <SchemaSocialChatMessage[]> await searchService.schemaByDateRange(SearchType.CHAT_MESSAGES, maxDatetime, sort, MAX_DATERANGE_CHAT_MESSAGES)
+                chatMessages = <SchemaSocialChatMessage[]> await searchService.schemaByDateRange(SearchType.CHAT_MESSAGES, maxDatetime, sort, config.dataTypes.chatMessages.limit)
             }
             if (promptSearchResult.databases.indexOf(SearchType.CALENDAR_EVENT) !== -1) {
-                calendarEvents = await searchService.schemaByDateRange<SchemaEvent>(SearchType.CALENDAR_EVENT, maxDatetime, sort, MAX_DATERANGE_CALENDAR_EVENT)
+                calendarEvents = await searchService.schemaByDateRange<SchemaEvent>(SearchType.CALENDAR_EVENT, maxDatetime, sort, config.dataTypes.calendarEvents.limit)
             }
         }
 
-        console.log('files / emails / favourites / following / chatThreads / calendarEvents')
-        console.log(files.length, emails.length, favourites.length, following.length, chatThreads.length, calendarEvents.length)
+        promptSearchResult.search_summary = `Files: ${files.length}, Emails: ${emails.length}, Favorites: ${favourites.length}, Following: ${following.length}, ChatThreads: ${chatThreads.length}, CalandarEvents: ${calendarEvents.length}`
+        console.log(promptSearchResult.search_summary)
 
-        let finalPrompt = `Answer this prompt:\n${prompt}\nHere are some recent messages that may help you provide a relevant answer.\n`
+        let finalPrompt = `Answer this prompt:\n${prompt}\nHere is some of my personal data that may help you provide a relevant answer.\n`
         let contextString = ''
 
-        let maxChatMessages = MAX_DATERANGE_CHAT_MESSAGES
+        let maxChatMessages = config.dataTypes.chatMessages.limit
         for (const chatThread of chatThreads) {
             for (const chatMessage of chatThread.messages) {
                 contextString += `From: ${chatMessage.fromName} <${chatMessage.fromHandle}> (${chatMessage.groupName})\nBody: ${chatMessage.messageText}\n\n`
@@ -112,7 +128,7 @@ export class PromptSearchService extends VeridaService {
         }
 
         for (const file of files) {
-            contextString += `File: ${file.name} ${file.contentText.substring(0,MAX_DOC_LENGTH)} (via ${file.sourceApplication})\n\n`
+            contextString += `File: ${file.name} ${file.contentText.substring(0,config.dataTypes.files.maxLength)} (via ${file.sourceApplication})\n\n`
         }
 
         for (const favourite of favourites) {
@@ -132,15 +148,15 @@ export class PromptSearchService extends VeridaService {
         let emailCount = 0
         for (const email of emails) {
             let extraContext = ""
-            let body = email.messageText.substring(0, MAX_EMAIL_LENGTH)
+            let body = email.messageText.substring(0, config.dataTypes.emails.maxLength)
             if (email.attachments) {
                 for (const attachment of email.attachments) {
-                    body += attachment.textContent!.substring(0, MAX_ATTACHMENT_LENGTH)
+                    body += attachment.textContent!.substring(0, config.dataTypes.emails.attachmentLength)
                 }
             }
 
             extraContext = `To: ${email.toName} <${email.toEmail}>\nFrom: ${email.fromName} <${email.fromEmail}> (${email.name})\nBody: ${body}\n\n`
-            if ((extraContext.length + contextString.length + finalPrompt.length) > MAX_CONTEXT_LENGTH) {
+            if ((extraContext.length + contextString.length + finalPrompt.length) > config.maxContextLength) {
                 break
             }
 

diff --git a/src/services/tools/promptSearch.ts b/src/services/tools/promptSearch.ts
@@ -1,4 +1,6 @@
+const _ = require('lodash')
 import { KeywordSearchTimeframe } from "../../helpers/interfaces";
+import { PromptSearchLLMResponseOptional } from "../assistants/interfaces";
 import { LLM } from "../llm"
 import { SearchType } from "../search";
 
@@ -12,7 +14,7 @@ You must generate a JSON response containing the following information:
 - output_type: The amount of detail in the output of each search result to provide meaningful context. full_content, summary, headline
 - profile_information; Array of these options only; name, contactInfo, demographics, lifestyle, preferences, habits, financial, health, personality, employment, education, skills, language, interests
 
-JSON only, no explanation or formatting.`
+Output JSON only with no explanation or formatting.`
 
 export enum PromptSearchType {
   KEYWORDS = "keywords",
@@ -43,7 +45,8 @@ export interface PromptSearchLLMResponse {
       "financial" | "health" | "personality" | "employment" | "education" | "skills" |
       "language" | "interests"
     >;
-  }
+    search_summary?: string
+}
 
 export class PromptSearch {
 
@@ -53,9 +56,19 @@ export class PromptSearch {
         this.llm = llm
     }
 
-    public async search(userPrompt: string): Promise<PromptSearchLLMResponse> {
+    public async search(userPrompt: string, retries = 3, defaultResponse?: PromptSearchLLMResponseOptional): Promise<PromptSearchLLMResponse> {
         const response = await this.llm.prompt(userPrompt, systemPrompt)
-        return <PromptSearchLLMResponse> JSON.parse(response.choices[0].message.content!)
+
+        try {
+          const searchResponse = <PromptSearchLLMResponse> JSON.parse(response.choices[0].message.content!)
+          return _.merge({}, searchResponse, defaultResponse ? defaultResponse : {})
+        } catch (err: any) {
+          if (retries === 0) {
+            throw new Error(`No user data query available`)
+          } else {
+            this.search(userPrompt, retries--)
+          }
+        }
 
     }