Skip to content

Commit

Permalink
Make RAG search prompt configurable via API, include search result su…
Browse files Browse the repository at this point in the history
…mmary in response.
  • Loading branch information
Chris committed Nov 6, 2024
1 parent eddb589 commit d5d3d10
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 36 deletions.
29 changes: 29 additions & 0 deletions src/services/assistants/interfaces.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import { PromptSearchLLMResponse, PromptSearchType } from "../tools/promptSearch"

export interface PromptSearchServiceDataTypeEmails extends PromptSearchServiceDataType {
attachmentLength?: number
}

export interface PromptSearchServiceDataType {
limit?: number,
maxLength?: number,
outputType?: string
}

export interface PromptSearchServiceDataTypes {
emails?: PromptSearchServiceDataTypeEmails,
chatMessages?: PromptSearchServiceDataType,
favorites?: PromptSearchServiceDataType,
following?: PromptSearchServiceDataType,
files?: PromptSearchServiceDataType,
calendarEvents?: PromptSearchServiceDataType
}

export interface PromptSearchServiceConfig {
searchType?: PromptSearchType
maxContextLength?: number
dataTypes?: PromptSearchServiceDataTypes
promptSearchConfig?: PromptSearchLLMResponseOptional
}

export interface PromptSearchLLMResponseOptional extends Partial<PromptSearchLLMResponse> {}
80 changes: 48 additions & 32 deletions src/services/assistants/search.ts
Original file line number Diff line number Diff line change
@@ -1,36 +1,52 @@
const _ = require('lodash')
import { defaultModel, LLM } from "../llm"
import { LLM } from "../llm"
import { PromptSearch, PromptSearchLLMResponse, PromptSearchSort, PromptSearchType } from "../tools/promptSearch"
import { ChatThreadResult, SearchService, SearchSortType, SearchType } from "../search"
import { VeridaService } from '../veridaService'
import { SchemaEmail, SchemaEvent, SchemaFavourite, SchemaFile, SchemaFollowing, SchemaSocialChatMessage } from '../../schemas'
import { Helpers } from "../helpers"
import { EmailShortlist } from "../tools/emailShortlist"

const MAX_EMAIL_LENGTH = 500
const MAX_DOC_LENGTH = 2000
const MAX_ATTACHMENT_LENGTH = 500
const MAX_CONTEXT_LENGTH = 20000 // (~5000 tokens)

const MAX_DATERANGE_EMAILS = 30
const MAX_DATERANGE_CHAT_MESSAGES = 100
const MAX_DATERANGE_FAVORITES = 30
const MAX_DATERANGE_FOLLOWING = 30
const MAX_DATERANGE_FILES = 20
const MAX_DATERANGE_CALENDAR_EVENT = 20

// "You are a personal assistant with the ability to search the following categories; emails, chat_history and documents. You receive a prompt and generate a JSON response (with no other text) that provides search queries that will source useful information to help answer the prompt. Search queries for each category should contain three properties; \"terms\" (an array of 10 individual words), \"beforeDate\" (results must be before this date), \"afterDate\" (results must be after this date), \"resultType\" (either \"count\" to count results or \"results\" to return the search results), \"filter\" (an array of key, value pairs of fields to filter the results). Categories can be empty if not relevant to the prompt. The current date is 2024-08-12.\n\nHere is an example JSON response:\n{\"email\": {\"terms\": [\"golf\", \"tennis\", \"soccer\"], \"beforeDate\": \"2024-06-01\", \"afterDate\": \"2024-01-10\" \"filter\": {\"from\": \"dave\"}, \"resultType\": \"results}}\n\nHere is the prompt:\nWhat subscriptions do I currently pay for?"
import { PromptSearchServiceConfig } from "./interfaces"

const DEFAULT_PROMPT_SEARCH_SERVICE_CONFIG: PromptSearchServiceConfig = {
maxContextLength: 20000, // (~5000 tokens)
dataTypes: {
emails: {
limit: 30,
maxLength: 500,
attachmentLength: 500
},
chatMessages: {
limit: 100
},
favorites: {
limit: 30
},
following: {
limit: 30
},
files: {
limit: 30,
maxLength: 2000
},
calendarEvents: {
limit: 30
},
}
}

export class PromptSearchService extends VeridaService {

public async prompt(prompt: string, llm: LLM): Promise<{
public async prompt(prompt: string, llm: LLM, config: PromptSearchServiceConfig = {}): Promise<{
result: string,
duration: number,
process: PromptSearchLLMResponse
}> {
config = _.merge({}, DEFAULT_PROMPT_SEARCH_SERVICE_CONFIG, config)

const start = Date.now()
const promptSearch = new PromptSearch(llm)
const promptSearchResult = await promptSearch.search(prompt)
const promptSearchResult = await promptSearch.search(prompt, undefined, config.promptSearchConfig)

console.log(promptSearchResult)

Expand Down Expand Up @@ -69,34 +85,34 @@ export class PromptSearchService extends VeridaService {
const sort = promptSearchResult.sort == PromptSearchSort.RECENT ? SearchSortType.RECENT : SearchSortType.OLDEST
console.log(`Searching by timeframe: ${maxDatetime} ${sort}`)
if (promptSearchResult.databases.indexOf(SearchType.EMAILS) !== -1) {
emails = await searchService.schemaByDateRange<SchemaEmail>(SearchType.EMAILS, maxDatetime, sort, MAX_DATERANGE_EMAILS*3)
emails = await searchService.schemaByDateRange<SchemaEmail>(SearchType.EMAILS, maxDatetime, sort, config.dataTypes.emails.limit*3)
const emailShortlist = new EmailShortlist(llm)
emails = await emailShortlist.shortlist(prompt, emails, MAX_DATERANGE_EMAILS)
emails = await emailShortlist.shortlist(prompt, emails, config.dataTypes.emails.limit)
}
if (promptSearchResult.databases.indexOf(SearchType.FILES) !== -1) {
files = await searchService.schemaByDateRange<SchemaFile>(SearchType.FILES, maxDatetime, sort, MAX_DATERANGE_FILES)
files = await searchService.schemaByDateRange<SchemaFile>(SearchType.FILES, maxDatetime, sort, config.dataTypes.files.limit)
}
if (promptSearchResult.databases.indexOf(SearchType.FAVORITES) !== -1) {
favourites = await searchService.schemaByDateRange<SchemaFavourite>(SearchType.FAVORITES, maxDatetime, sort, MAX_DATERANGE_FAVORITES)
favourites = await searchService.schemaByDateRange<SchemaFavourite>(SearchType.FAVORITES, maxDatetime, sort, config.dataTypes.favorites.limit)
}
if (promptSearchResult.databases.indexOf(SearchType.FOLLOWING) !== -1) {
following = await searchService.schemaByDateRange<SchemaFollowing>(SearchType.FOLLOWING, maxDatetime, sort, MAX_DATERANGE_FOLLOWING)
following = await searchService.schemaByDateRange<SchemaFollowing>(SearchType.FOLLOWING, maxDatetime, sort, config.dataTypes.following.limit)
}
if (promptSearchResult.databases.indexOf(SearchType.CHAT_MESSAGES) !== -1) {
chatMessages = <SchemaSocialChatMessage[]> await searchService.schemaByDateRange(SearchType.CHAT_MESSAGES, maxDatetime, sort, MAX_DATERANGE_CHAT_MESSAGES)
chatMessages = <SchemaSocialChatMessage[]> await searchService.schemaByDateRange(SearchType.CHAT_MESSAGES, maxDatetime, sort, config.dataTypes.chatMessages.limit)
}
if (promptSearchResult.databases.indexOf(SearchType.CALENDAR_EVENT) !== -1) {
calendarEvents = await searchService.schemaByDateRange<SchemaEvent>(SearchType.CALENDAR_EVENT, maxDatetime, sort, MAX_DATERANGE_CALENDAR_EVENT)
calendarEvents = await searchService.schemaByDateRange<SchemaEvent>(SearchType.CALENDAR_EVENT, maxDatetime, sort, config.dataTypes.calendarEvents.limit)
}
}

console.log('files / emails / favourites / following / chatThreads / calendarEvents')
console.log(files.length, emails.length, favourites.length, following.length, chatThreads.length, calendarEvents.length)
promptSearchResult.search_summary = `Files: ${files.length}, Emails: ${emails.length}, Favorites: ${favourites.length}, Following: ${following.length}, ChatThreads: ${chatThreads.length}, CalandarEvents: ${calendarEvents.length}`
console.log(promptSearchResult.search_summary)

let finalPrompt = `Answer this prompt:\n${prompt}\nHere are some recent messages that may help you provide a relevant answer.\n`
let finalPrompt = `Answer this prompt:\n${prompt}\nHere is some of my personal data that may help you provide a relevant answer.\n`
let contextString = ''

let maxChatMessages = MAX_DATERANGE_CHAT_MESSAGES
let maxChatMessages = config.dataTypes.chatMessages.limit
for (const chatThread of chatThreads) {
for (const chatMessage of chatThread.messages) {
contextString += `From: ${chatMessage.fromName} <${chatMessage.fromHandle}> (${chatMessage.groupName})\nBody: ${chatMessage.messageText}\n\n`
Expand All @@ -112,7 +128,7 @@ export class PromptSearchService extends VeridaService {
}

for (const file of files) {
contextString += `File: ${file.name} ${file.contentText.substring(0,MAX_DOC_LENGTH)} (via ${file.sourceApplication})\n\n`
contextString += `File: ${file.name} ${file.contentText.substring(0,config.dataTypes.files.maxLength)} (via ${file.sourceApplication})\n\n`
}

for (const favourite of favourites) {
Expand All @@ -132,15 +148,15 @@ export class PromptSearchService extends VeridaService {
let emailCount = 0
for (const email of emails) {
let extraContext = ""
let body = email.messageText.substring(0, MAX_EMAIL_LENGTH)
let body = email.messageText.substring(0, config.dataTypes.emails.maxLength)
if (email.attachments) {
for (const attachment of email.attachments) {
body += attachment.textContent!.substring(0, MAX_ATTACHMENT_LENGTH)
body += attachment.textContent!.substring(0, config.dataTypes.emails.attachmentLength)
}
}

extraContext = `To: ${email.toName} <${email.toEmail}>\nFrom: ${email.fromName} <${email.fromEmail}> (${email.name})\nBody: ${body}\n\n`
if ((extraContext.length + contextString.length + finalPrompt.length) > MAX_CONTEXT_LENGTH) {
if ((extraContext.length + contextString.length + finalPrompt.length) > config.maxContextLength) {
break
}

Expand Down
21 changes: 17 additions & 4 deletions src/services/tools/promptSearch.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
const _ = require('lodash')
import { KeywordSearchTimeframe } from "../../helpers/interfaces";
import { PromptSearchLLMResponseOptional } from "../assistants/interfaces";
import { LLM } from "../llm"
import { SearchType } from "../search";

Expand All @@ -12,7 +14,7 @@ You must generate a JSON response containing the following information:
- output_type: The amount of detail in the output of each search result to provide meaningful context. full_content, summary, headline
- profile_information; Array of these options only; name, contactInfo, demographics, lifestyle, preferences, habits, financial, health, personality, employment, education, skills, language, interests
JSON only, no explanation or formatting.`
Output JSON only with no explanation or formatting.`

export enum PromptSearchType {
KEYWORDS = "keywords",
Expand Down Expand Up @@ -43,7 +45,8 @@ export interface PromptSearchLLMResponse {
"financial" | "health" | "personality" | "employment" | "education" | "skills" |
"language" | "interests"
>;
}
search_summary?: string
}

export class PromptSearch {

Expand All @@ -53,9 +56,19 @@ export class PromptSearch {
this.llm = llm
}

public async search(userPrompt: string): Promise<PromptSearchLLMResponse> {
public async search(userPrompt: string, retries = 3, defaultResponse?: PromptSearchLLMResponseOptional): Promise<PromptSearchLLMResponse> {
const response = await this.llm.prompt(userPrompt, systemPrompt)
return <PromptSearchLLMResponse> JSON.parse(response.choices[0].message.content!)

try {
const searchResponse = <PromptSearchLLMResponse> JSON.parse(response.choices[0].message.content!)
return _.merge({}, searchResponse, defaultResponse ? defaultResponse : {})
} catch (err: any) {
if (retries === 0) {
throw new Error(`No user data query available`)
} else {
this.search(userPrompt, retries--)
}
}

}

Expand Down

0 comments on commit d5d3d10

Please sign in to comment.