From 8d368d6aaa4190e305c936ba18e2bd9060f72dc0 Mon Sep 17 00:00:00 2001 From: Morgane Dubus Date: Tue, 23 Apr 2024 11:27:33 +0200 Subject: [PATCH] feat: hybrid search improvements for v1.8.x --- src/types/types.ts | 36 +++- tests/__snapshots__/settings.test.ts.snap | 214 ++++++++++++++++++++++ tests/embedders.test.ts | 87 ++++++++- tests/get_search.test.ts | 14 +- tests/search.test.ts | 14 +- tests/settings.test.ts | 61 ++++++ 6 files changed, 421 insertions(+), 5 deletions(-) diff --git a/src/types/types.ts b/src/types/types.ts index fde3b69b9..0606948d8 100644 --- a/src/types/types.ts +++ b/src/types/types.ts @@ -215,7 +215,6 @@ export type SearchResponse< query: string facetDistribution?: FacetDistribution facetStats?: FacetStats - vector?: number[] } & (undefined extends S ? Partial : true extends IsFinitePagination> @@ -333,7 +332,12 @@ export type TypoTolerance = { export type SeparatorTokens = string[] | null export type NonSeparatorTokens = string[] | null export type Dictionary = string[] | null -export type ProximityPrecision = 'byWord' | 'byAttribute' +export type ProximityPrecision = 'byWord' | 'byAttribute' | null + +export type Distribution = { + mean: number + sigma: number +} export type OpenAiEmbedder = { source: 'openAi' @@ -341,6 +345,7 @@ export type OpenAiEmbedder = { apiKey?: string documentTemplate?: string dimensions?: number + distribution?: Distribution } export type HuggingFaceEmbedder = { @@ -348,17 +353,44 @@ export type HuggingFaceEmbedder = { model?: string revision?: string documentTemplate?: string + distribution?: Distribution } export type UserProvidedEmbedder = { source: 'userProvided' dimensions: number + distribution?: Distribution +} + +export type RestEmbedder = { + source: 'rest' + url: string + apiKey?: string + dimensions?: number + documentTemplate?: string + inputField?: string[] | null + inputType?: 'text' | 'textArray' + query?: Record | null + pathToEmbeddings?: string[] | null + embeddingObject?: string[] | null + distribution?: Distribution +} + +export type OllamaEmbedder = { + source: 'ollama' + url?: string + apiKey?: string + model?: string + documentTemplate?: string + distribution?: Distribution } export type Embedder = | OpenAiEmbedder | HuggingFaceEmbedder | UserProvidedEmbedder + | RestEmbedder + | OllamaEmbedder | null export type Embedders = Record | null diff --git a/tests/__snapshots__/settings.test.ts.snap b/tests/__snapshots__/settings.test.ts.snap index 5252c6889..9589e4bb1 100644 --- a/tests/__snapshots__/settings.test.ts.snap +++ b/tests/__snapshots__/settings.test.ts.snap @@ -94,6 +94,53 @@ exports[`Test on settings Admin key: Get default settings of empty index with pr } `; +exports[`Test on settings Admin key: Reset embedders settings 1`] = ` +{ + "dictionary": [], + "displayedAttributes": [ + "*", + ], + "distinctAttribute": null, + "faceting": { + "maxValuesPerFacet": 100, + "sortFacetValuesBy": { + "*": "alpha", + }, + }, + "filterableAttributes": [], + "nonSeparatorTokens": [], + "pagination": { + "maxTotalHits": 1000, + }, + "proximityPrecision": "byWord", + "rankingRules": [ + "words", + "typo", + "proximity", + "attribute", + "sort", + "exactness", + ], + "searchCutoffMs": null, + "searchableAttributes": [ + "*", + ], + "separatorTokens": [], + "sortableAttributes": [], + "stopWords": [], + "synonyms": {}, + "typoTolerance": { + "disableOnAttributes": [], + "disableOnWords": [], + "enabled": true, + "minWordSizeForTypos": { + "oneTypo": 5, + "twoTypos": 9, + }, + }, +} +`; + exports[`Test on settings Admin key: Reset settings 1`] = ` { "dictionary": [], @@ -188,6 +235,66 @@ exports[`Test on settings Admin key: Reset settings of empty index 1`] = ` } `; +exports[`Test on settings Admin key: Update embedders settings 1`] = ` +{ + "dictionary": [], + "displayedAttributes": [ + "*", + ], + "distinctAttribute": null, + "embedders": { + "default": { + "apiKey": " { @@ -97,6 +110,10 @@ describe.each([{ permission: 'Master' }, { permission: 'Admin' }])( model: 'sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2', documentTemplate: "A movie titled '{{doc.title}}' whose description starts with {{doc.overview|truncatewords: 20}}", + distribution: { + mean: 0.7, + sigma: 0.3, + }, }, } const task: EnqueuedTask = await client @@ -109,6 +126,74 @@ describe.each([{ permission: 'Master' }, { permission: 'Admin' }])( expect(response).toEqual(newEmbedder) }) + test(`${permission} key: Update embedders with 'rest' source`, async () => { + const client = await getClient(permission) + const newEmbedder: Embedders = { + default: { + source: 'rest', + url: 'https://api.openai.com/v1/embeddings', + apiKey: '', + dimensions: 1536, + documentTemplate: + "A movie titled '{{doc.title}}' whose description starts with {{doc.overview|truncatewords: 20}}", + inputField: ['input'], + inputType: 'textArray', + query: { + model: 'text-embedding-ada-002', + }, + pathToEmbeddings: ['data'], + embeddingObject: ['embedding'], + distribution: { + mean: 0.7, + sigma: 0.3, + }, + }, + } + const task: EnqueuedTask = await client + .index(index.uid) + .updateEmbedders(newEmbedder) + await client.waitForTask(task.taskUid) + + const response: Embedders = await client.index(index.uid).getEmbedders() + + expect(response).toEqual({ + default: { + ...newEmbedder.default, + apiKey: ' { + const client = await getClient(permission) + const newEmbedder: Embedders = { + default: { + source: 'ollama', + url: 'http://localhost:11434/api/embeddings', + apiKey: '', + model: 'nomic-embed-text', + documentTemplate: 'blabla', + distribution: { + mean: 0.7, + sigma: 0.3, + }, + }, + } + const task: EnqueuedTask = await client + .index(index.uid) + .updateEmbedders(newEmbedder) + await client.waitForTask(task.taskUid) + + const response: Embedders = await client.index(index.uid).getEmbedders() + + expect(response).toEqual({ + default: { + ...newEmbedder.default, + apiKey: ' { const client = await getClient(permission) diff --git a/tests/get_search.test.ts b/tests/get_search.test.ts index 690caec90..d8f53ac37 100644 --- a/tests/get_search.test.ts +++ b/tests/get_search.test.ts @@ -473,7 +473,19 @@ describe.each([ .index(emptyIndex.uid) .searchGet('', { vector: [1], hybridSemanticRatio: 1.0 }) - expect(response.vector).toEqual([1]) + expect(response).toHaveProperty('hits') + expect(response).toHaveProperty('semanticHitCount') + // Those fields are no longer returned by the search response + // We want to ensure that they don't appear in it anymore + expect(response).not.toHaveProperty('vector') + expect(response).not.toHaveProperty('_semanticScore') + }) + + test(`${permission} key: search without vectors`, async () => { + const client = await getClient(permission) + const response = await client.index(index.uid).search('prince', {}) + + expect(response).not.toHaveProperty('semanticHitCount') }) test(`${permission} key: Try to search on deleted index and fail`, async () => { diff --git a/tests/search.test.ts b/tests/search.test.ts index b5053156e..27aea8b27 100644 --- a/tests/search.test.ts +++ b/tests/search.test.ts @@ -859,7 +859,19 @@ describe.each([ }, }) - expect(response.vector).toEqual([1]) + expect(response).toHaveProperty('hits') + expect(response).toHaveProperty('semanticHitCount') + // Those fields are no longer returned by the search response + // We want to ensure that they don't appear in it anymore + expect(response).not.toHaveProperty('vector') + expect(response).not.toHaveProperty('_semanticScore') + }) + + test(`${permission} key: search without vectors`, async () => { + const client = await getClient(permission) + const response = await client.index(index.uid).search('prince', {}) + + expect(response).not.toHaveProperty('semanticHitCount') }) test(`${permission} key: Try to search on deleted index and fail`, async () => { diff --git a/tests/settings.test.ts b/tests/settings.test.ts index 2f3c73631..fd5a5b5be 100644 --- a/tests/settings.test.ts +++ b/tests/settings.test.ts @@ -6,6 +6,8 @@ import { MeiliSearch, getClient, dataset, + getKey, + HOST, } from './utils/meilisearch-test-utils' const index = { @@ -133,6 +135,7 @@ describe.each([{ permission: 'Master' }, { permission: 'Admin' }])( separatorTokens: null, nonSeparatorTokens: null, dictionary: null, + proximityPrecision: null, searchCutoffMs: null, } // Add the settings @@ -146,6 +149,41 @@ describe.each([{ permission: 'Master' }, { permission: 'Admin' }])( expect(response).toMatchSnapshot() }) + test(`${permission} key: Update embedders settings `, async () => { + const client = await getClient(permission) + const key = await getKey(permission) + + await fetch(`${HOST}/experimental-features`, { + body: JSON.stringify({ vectorStore: true }), + headers: { + Authorization: `Bearer ${key}`, + 'Content-Type': 'application/json', + }, + method: 'PATCH', + }) + + const newSettings: Settings = { + embedders: { + default: { + source: 'openAi', + apiKey: '', + model: 'text-embedding-3-small', + documentTemplate: 'A document template', + dimensions: 1536, + distribution: { + mean: 0.7, + sigma: 0.3, + }, + }, + }, + } + const task = await client.index(index.uid).updateSettings(newSettings) + await client.index(index.uid).waitForTask(task.taskUid) + const response = await client.index(index.uid).getSettings() + + expect(response).toMatchSnapshot() + }) + test(`${permission} key: Update settings on empty index with primary key`, async () => { const client = await getClient(permission) const newSettings = { @@ -183,6 +221,29 @@ describe.each([{ permission: 'Master' }, { permission: 'Admin' }])( expect(response).toMatchSnapshot() }) + test(`${permission} key: Reset embedders settings `, async () => { + const client = await getClient(permission) + const key = await getKey(permission) + + await fetch(`${HOST}/experimental-features`, { + body: JSON.stringify({ vectorStore: true }), + headers: { + Authorization: `Bearer ${key}`, + 'Content-Type': 'application/json', + }, + method: 'PATCH', + }) + + const newSettings: Settings = { + embedders: null, + } + const task = await client.index(index.uid).updateSettings(newSettings) + await client.index(index.uid).waitForTask(task.taskUid) + const response = await client.index(index.uid).getSettings() + + expect(response).toMatchSnapshot() + }) + test(`${permission} key: Update searchableAttributes settings on empty index`, async () => { const client = await getClient(permission) const newSettings = {