diff --git a/packages/core/package.json b/packages/core/package.json index 6ede7233be..f0a8e67653 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -214,6 +214,34 @@ "default": "./storage/chat-store/dist/index.js" } }, + "./storage/docstore": { + "require": { + "types": "./storage/docstore/dist/index.d.cts", + "default": "./storage/docstore/dist/index.cjs" + }, + "import": { + "types": "./storage/docstore/dist/index.d.ts", + "default": "./storage/docstore/dist/index.js" + }, + "default": { + "types": "./storage/docstore/dist/index.d.ts", + "default": "./storage/docstore/dist/index.js" + } + }, + "./storage/kvstore": { + "require": { + "types": "./storage/kvstore/dist/index.d.cts", + "default": "./storage/kvstore/dist/index.cjs" + }, + "import": { + "types": "./storage/kvstore/dist/index.d.ts", + "default": "./storage/kvstore/dist/index.js" + }, + "default": { + "types": "./storage/kvstore/dist/index.d.ts", + "default": "./storage/kvstore/dist/index.js" + } + }, "./response-synthesizers": { "require": { "types": "./response-synthesizers/dist/index.d.cts", diff --git a/packages/core/src/global/constants.ts b/packages/core/src/global/constants.ts index 4c742d4b92..5b5dbccefb 100644 --- a/packages/core/src/global/constants.ts +++ b/packages/core/src/global/constants.ts @@ -1,4 +1,12 @@ -import { path } from "@llamaindex/env"; +export { + DEFAULT_PERSIST_FNAME, + DEFAULT_PERSIST_DIR, + DEFAULT_PERSIST_PATH, + DEFAULT_METADATA_COLLECTION_SUFFIX, + DEFAULT_COLLECTION_DATA_SUFFIX, + DEFAULT_NAMESPACE, + DEFAULT_REF_DOC_COLLECTION_SUFFIX +} from '../storage/docstore/index.js'; //#region llm export const DEFAULT_CONTEXT_WINDOW = 3900; @@ -10,15 +18,16 @@ export const DEFAULT_PADDING = 5; //#endregion //#region storage export const DEFAULT_COLLECTION = "data"; -export const DEFAULT_PERSIST_DIR = path.join("./storage"); export const DEFAULT_INDEX_STORE_PERSIST_FILENAME = "index_store.json"; export const DEFAULT_DOC_STORE_PERSIST_FILENAME = "doc_store.json"; export const DEFAULT_VECTOR_STORE_PERSIST_FILENAME = "vector_store.json"; export const DEFAULT_GRAPH_STORE_PERSIST_FILENAME = "graph_store.json"; -export const DEFAULT_NAMESPACE = "docstore"; export const DEFAULT_IMAGE_VECTOR_NAMESPACE = "images"; //#endregion //#region llama cloud export const DEFAULT_PROJECT_NAME = "Default"; export const DEFAULT_BASE_URL = "https://api.cloud.llamaindex.ai"; //#endregion +//#region vector store +export const DEFAULT_BATCH_SIZE = 100; +//#endregion diff --git a/packages/core/src/schema/node.ts b/packages/core/src/schema/node.ts index 0ba548bd80..71fa1abf41 100644 --- a/packages/core/src/schema/node.ts +++ b/packages/core/src/schema/node.ts @@ -1,6 +1,7 @@ import { createSHA256, path, randomUUID } from "@llamaindex/env"; import { lazyInitHash } from "../decorator"; import { chunkSizeCheck } from "./utils/chunk-size-check"; +import { z } from 'zod' export enum NodeRelationship { SOURCE = "SOURCE", diff --git a/packages/core/src/storage/docstore/index.ts b/packages/core/src/storage/docstore/index.ts new file mode 100644 index 0000000000..63a159f896 --- /dev/null +++ b/packages/core/src/storage/docstore/index.ts @@ -0,0 +1,3 @@ +export { DEFAULT_PERSIST_PATH, DEFAULT_PERSIST_DIR, DEFAULT_PERSIST_FNAME} from './types' + +export { DEFAULT_METADATA_COLLECTION_SUFFIX, DEFAULT_COLLECTION_DATA_SUFFIX, DEFAULT_NAMESPACE, DEFAULT_REF_DOC_COLLECTION_SUFFIX} from './kv-document-store' \ No newline at end of file diff --git a/packages/core/src/storage/docstore/kv-document-store.ts b/packages/core/src/storage/docstore/kv-document-store.ts new file mode 100644 index 0000000000..1b8079b0c2 --- /dev/null +++ b/packages/core/src/storage/docstore/kv-document-store.ts @@ -0,0 +1,80 @@ +import { BaseDocumentStore } from './types'; +import { BaseNode } from '../../schema'; +import type { BaseKVStore } from '../kvstore'; +import { DEFAULT_BATCH_SIZE } from '../../global'; +import { path } from '@llamaindex/env'; +import { type DocJson, jsonToDoc } from './utils'; + +// The default namespace prefix for the document store. +export const DEFAULT_NAMESPACE = 'docstore'; +// The nodes collection contains the content of each node, along with metadata specific +// to each node, including associated attributes like excluded metadata and relationships. +export const DEFAULT_COLLECTION_DATA_SUFFIX = '/data'; +// Contains mappings from each document to the list of node IDs that belong to it +// including the document's metadata. +export const DEFAULT_REF_DOC_COLLECTION_SUFFIX = '/ref_doc_info'; +// Contains references from each node to its corresponding document, +// including the node's document hash and reference document ID. +export const DEFAULT_METADATA_COLLECTION_SUFFIX = '/metadata'; + +export class KVDocumentStore extends BaseDocumentStore { + private kvStore: BaseKVStore>; + #namespace: string; + #nodeCollectionSuffix: string; + #refDocCollectionSuffix: string; + #metadataCollectionSuffix: string; + #nodeCollection: string; + #refDocCollection: string; + #metadataCollection: string; + #batchSize: number; + + constructor ( + kvStore: BaseKVStore, + namespace: string = DEFAULT_NAMESPACE, + batchSize: number = DEFAULT_BATCH_SIZE, + nodeCollectionSuffix: string = DEFAULT_COLLECTION_DATA_SUFFIX, + refDocCollectionSuffix: string = DEFAULT_REF_DOC_COLLECTION_SUFFIX, + metadataCollectionSuffix: string = DEFAULT_METADATA_COLLECTION_SUFFIX + ) { + super(); + this.kvStore = kvStore; + this.#namespace = namespace; + this.#nodeCollectionSuffix = nodeCollectionSuffix; + this.#refDocCollectionSuffix = refDocCollectionSuffix; + this.#metadataCollectionSuffix = metadataCollectionSuffix; + this.#nodeCollection = path.join(this.#namespace, + this.#nodeCollectionSuffix); + this.#refDocCollection = path.join(this.#namespace, + this.#refDocCollectionSuffix); + this.#metadataCollection = path.join(this.#namespace, + this.#metadataCollectionSuffix); + this.#batchSize = batchSize; + } + + get docs (): Promise> { + return this.kvStore.getAll(this.#nodeCollection).then(jsonDict => { + const docs = new Map(); + for (const [key, json] of Object.entries(jsonDict)) { + docs.set(key, jsonToDoc(json, this.serializer)); + } + return docs; + }); + } + + #prepareKVPair( + key: string, + val: DocJson, + collection: string | undefined + ) { + + } + + async addDocuments ( + docs: BaseNode[], allowUpdate: boolean, + batchSize?: number, + storeText?: boolean + ) { + batchSize = batchSize || this.#batchSize; + + } +} \ No newline at end of file diff --git a/packages/core/src/storage/docstore/types.ts b/packages/core/src/storage/docstore/types.ts new file mode 100644 index 0000000000..6c91790004 --- /dev/null +++ b/packages/core/src/storage/docstore/types.ts @@ -0,0 +1,85 @@ +import { path } from '@llamaindex/env'; +import { BaseNode } from '../../schema'; +import { jsonSerializer, type Serializer } from './utils'; + +export const DEFAULT_PERSIST_FNAME = 'docstore.json'; +export const DEFAULT_PERSIST_DIR = './storage'; +export const DEFAULT_PERSIST_PATH = path.join(DEFAULT_PERSIST_DIR, + DEFAULT_PERSIST_FNAME); + +type RefDocInfo> = { + nodeIds: string[] + extraInfo: ExtraInfo +} + +export abstract class BaseDocumentStore { + serializer: Serializer< + Record, + // we don't care about what's the target type of the serialization, so we use any here + // eslint-disable-next-line @typescript-eslint/no-explicit-any + any + > = jsonSerializer; + + abstract persist ( + persistPath: string + ): Promise + + abstract get docs (): Promise> + + abstract addDocuments ( + docs: BaseNode[], + allowUpdate: boolean, + batchSize: number, + storeText: boolean + ): Promise + + abstract getDocument ( + docId: string, + raiseError: boolean + ): Promise + + abstract deleteDocument ( + docId: string, + raiseError: boolean + ): Promise + + abstract documentExists ( + docId: string + ): Promise + + abstract setDocumentHash ( + docId: string, + docHash: string + ): Promise + + abstract getDocumentHash ( + docId: string + ): Promise + + abstract getAllDocumentHashes (): Promise> + + abstract getAllRefDocInfo (): Promise> | undefined>> + + abstract getRefDocInfo ( + refDocId: string + ): Promise> | undefined> + + abstract deleteRefDoc ( + refDocId: string, + raiseError: boolean + ): Promise + + abstract getNodes ( + nodeIds: string[], + raiseError: boolean + ): Promise + + abstract getNode ( + nodeId: string, + raiseError: boolean + ): Promise + + abstract getNodeDict ( + nodeIdDict: Record + ): Promise> +} \ No newline at end of file diff --git a/packages/core/src/storage/docstore/utils.ts b/packages/core/src/storage/docstore/utils.ts new file mode 100644 index 0000000000..77b49410f9 --- /dev/null +++ b/packages/core/src/storage/docstore/utils.ts @@ -0,0 +1,99 @@ +import type { BaseNode } from "../../schema"; +import { Document, ObjectType, TextNode } from "../../schema"; +import { ImageDocument, MetadataMode } from '../../schema'; +import type { SerializableValue } from '../kvstore'; + +const TYPE_KEY = "__type__"; +const DATA_KEY = "__data__"; + +export interface Serializer { + toPersistence(data: Data): Persistence; + fromPersistence(data: Persistence): Data +} + +export const jsonSerializer: Serializer, string> = { + toPersistence(data) { + return JSON.stringify(data); + }, + fromPersistence(data) { + return JSON.parse(data); + }, +}; + +export const noneSerializer: Serializer, Record> = { + toPersistence(data) { + return data; + }, + fromPersistence(data) { + return data; + }, +}; + +type DocJson = { + [TYPE_KEY]: ObjectType; + [DATA_KEY]: Record; // from BaseNode, todo: add zod type check here +}; + +export function isValidDocJson( + docJson: SerializableValue, +): docJson is DocJson { + return ( + typeof docJson === "object" && + docJson !== null && + TYPE_KEY in docJson && + DATA_KEY in docJson + ); +} + +export function docToJson( + doc: BaseNode, + serializer: Serializer>, +): DocJson { + return { + [DATA_KEY]: serializer.toPersistence(doc), + [TYPE_KEY]: doc.type, + }; +} + +export function jsonToDoc( + docDict: DocJson, + serializer: Pick, unknown>, 'fromPersistence'>, +): BaseNode { + const docType = docDict[TYPE_KEY]; + + // fixme: add zod type check here + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const dataDict: Record = serializer.fromPersistence(docDict[DATA_KEY]); + let doc: BaseNode; + + if (docType === ObjectType.DOCUMENT) { + doc = new Document({ + text: dataDict.text, + id_: dataDict.id_, + embedding: dataDict.embedding, + hash: dataDict.hash, + metadata: dataDict.metadata, + }); + } else if (docType === ObjectType.TEXT) { + doc = new TextNode({ + text: dataDict.text, + id_: dataDict.id_, + hash: dataDict.hash, + metadata: dataDict.metadata, + relationships: dataDict.relationships, + }); + } else if (docType === ObjectType.IMAGE_DOCUMENT) { + doc = new ImageDocument({ + image: dataDict.image, + id_: dataDict.id_, + embedding: dataDict.embedding, + hash: dataDict.hash, + metadata: dataDict.metadata, + }); + } else { + throw new Error(`Unknown doc type: ${docType}`); + } + + return doc; +} + diff --git a/packages/core/src/storage/kvstore/index.ts b/packages/core/src/storage/kvstore/index.ts new file mode 100644 index 0000000000..13a5a7e757 --- /dev/null +++ b/packages/core/src/storage/kvstore/index.ts @@ -0,0 +1,6 @@ +export { + type SerializableValue, + BaseKVStore, + BaseFileSystemKVStore +} from './types'; +export { SimpleKVStore } from './simple-kv-store'; \ No newline at end of file diff --git a/packages/core/src/storage/kvstore/simple-kv-store.ts b/packages/core/src/storage/kvstore/simple-kv-store.ts new file mode 100644 index 0000000000..dd59e87a1b --- /dev/null +++ b/packages/core/src/storage/kvstore/simple-kv-store.ts @@ -0,0 +1,56 @@ +import { BaseFileSystemKVStore, type SerializableValue } from './types'; +import { fs } from '@llamaindex/env'; +import { + DEFAULT_COLLECTION, + DEFAULT_COLLECTION_DATA_SUFFIX +} from '../../global'; + +export class SimpleKVStore extends BaseFileSystemKVStore { + data: Map> + + constructor ( + initialData?: Record> | Map> + ) { + super(); + if (initialData) { + this.data = new Map(initialData ? Object.entries(initialData) : []); + } else { + this.data = new Map(); + } + } + + persist (persistPath: string): Promise { + return fs.writeFile(persistPath, JSON.stringify(Object.fromEntries(this.data.entries()))); + } + + async put (key: string, val: Value, collection: string = DEFAULT_COLLECTION_DATA_SUFFIX) { + const collectionData = this.data.get(collection) ?? new Map(); + collectionData.set(key, val); + this.data.set(collection, collectionData); + } + + async get (key: string, collection: string = DEFAULT_COLLECTION) { + const collectionData = this.data.get(collection); + if (!collectionData) { + return null; + } + return collectionData.get(key) ?? null; + } + + async getAll (collection: string = DEFAULT_COLLECTION) { + return this.data.get(collection) ?? {}; + } + + async delete (key: string, collection: string = DEFAULT_COLLECTION) { + const collectionData = this.data.get(collection); + if (!collectionData) { + return false; + } + return collectionData.delete(key); + } + + static async fromPersistPath (persistPath: string): Promise> { + const data = JSON.parse(await fs.readFile(persistPath, 'utf-8')); + return new SimpleKVStore(data); + } +} \ No newline at end of file diff --git a/packages/core/src/storage/kvstore/types.ts b/packages/core/src/storage/kvstore/types.ts new file mode 100644 index 0000000000..296411e81b --- /dev/null +++ b/packages/core/src/storage/kvstore/types.ts @@ -0,0 +1,34 @@ + +type SerializableValueInternal = + Record + | number + | string + | boolean + | null + | undefined + | SerializableValueInternal[]; + +export type SerializableValue = + SerializableValueInternal + | Record; + +// we don't have collections in TypeScript side, because we align with JavaScript side similarly +export abstract class BaseKVStore { + abstract put ( + key: string, + val: Value, + collection?: string, + ): Promise; + + abstract get (key: string, collection?: string): Promise; + + abstract getAll (collection?: string): Promise>; + + abstract delete (key: string, collection?: string): Promise; +} + +export abstract class BaseFileSystemKVStore extends BaseKVStore { + abstract persist (persistPath: string): Promise; + + static fromPersistPath: (persistPath: string) => Promise>; +} diff --git a/packages/core/storage/docstore/package.json b/packages/core/storage/docstore/package.json new file mode 100644 index 0000000000..2fdf125e2c --- /dev/null +++ b/packages/core/storage/docstore/package.json @@ -0,0 +1,8 @@ +{ + "type": "module", + "main": "./dist/index.cjs", + "module": "./dist/index.js", + "types": "./dist/index.d.ts", + "exports": "./dist/index.js", + "private": true +} diff --git a/packages/core/storage/kvstore/package.json b/packages/core/storage/kvstore/package.json new file mode 100644 index 0000000000..2fdf125e2c --- /dev/null +++ b/packages/core/storage/kvstore/package.json @@ -0,0 +1,8 @@ +{ + "type": "module", + "main": "./dist/index.cjs", + "module": "./dist/index.js", + "types": "./dist/index.d.ts", + "exports": "./dist/index.js", + "private": true +} diff --git a/packages/llamaindex/src/storage/FileSystem.ts b/packages/llamaindex/src/storage/FileSystem.ts deleted file mode 100644 index 3f23939aa1..0000000000 --- a/packages/llamaindex/src/storage/FileSystem.ts +++ /dev/null @@ -1,35 +0,0 @@ -// FS utility helpers - -import { fs } from "@llamaindex/env"; - -/** - * Checks if a file exists. - * Analogous to the os.path.exists function from Python. - * @param path The path to the file to check. - * @returns A promise that resolves to true if the file exists, false otherwise. - */ -export async function exists(path: string): Promise { - try { - await fs.access(path); - return true; - } catch { - return false; - } -} - -/** - * Recursively traverses a directory and yields all the paths to the files in it. - * @param dirPath The path to the directory to traverse. - */ -export async function* walk(dirPath: string): AsyncIterable { - const entries = await fs.readdir(dirPath); - for (const entry of entries) { - const fullPath = `${dirPath}/${entry}`; - const stats = await fs.stat(fullPath); - if (stats.isDirectory()) { - yield* walk(fullPath); - } else { - yield fullPath; - } - } -} diff --git a/packages/llamaindex/src/storage/docStore/KVDocumentStore.ts b/packages/llamaindex/src/storage/docStore/KVDocumentStore.ts deleted file mode 100644 index 3865c0bebc..0000000000 --- a/packages/llamaindex/src/storage/docStore/KVDocumentStore.ts +++ /dev/null @@ -1,200 +0,0 @@ -import { DEFAULT_NAMESPACE } from "@llamaindex/core/global"; -import type { BaseNode } from "@llamaindex/core/schema"; -import { ObjectType } from "@llamaindex/core/schema"; -import _ from "lodash"; -import type { BaseKVStore } from "../kvStore/types.js"; -import type { RefDocInfo } from "./types.js"; -import { BaseDocumentStore } from "./types.js"; -import { docToJson, isValidDocJson, jsonToDoc } from "./utils.js"; - -type DocMetaData = { docHash: string; refDocId?: string }; - -export class KVDocumentStore extends BaseDocumentStore { - private kvstore: BaseKVStore; - private nodeCollection: string; - private refDocCollection: string; - private metadataCollection: string; - - constructor(kvstore: BaseKVStore, namespace: string = DEFAULT_NAMESPACE) { - super(); - this.kvstore = kvstore; - this.nodeCollection = `${namespace}/data`; - this.refDocCollection = `${namespace}/ref_doc_info`; - this.metadataCollection = `${namespace}/metadata`; - } - - async docs(): Promise> { - const jsonDict = await this.kvstore.getAll(this.nodeCollection); - const docs: Record = {}; - for (const key in jsonDict) { - const value = jsonDict[key]; - if (isValidDocJson(value)) { - docs[key] = jsonToDoc(value, this.serializer); - } else { - console.warn(`Invalid JSON for docId ${key}`); - } - } - return docs; - } - - async addDocuments( - docs: BaseNode[], - allowUpdate: boolean = true, - ): Promise { - for (let idx = 0; idx < docs.length; idx++) { - const doc = docs[idx]!; - if (doc.id_ === null) { - throw new Error("doc_id not set"); - } - if (!allowUpdate && (await this.documentExists(doc.id_))) { - throw new Error( - `doc_id ${doc.id_} already exists. Set allow_update to True to overwrite.`, - ); - } - const nodeKey = doc.id_; - const data = docToJson(doc, this.serializer); - await this.kvstore.put(nodeKey, data, this.nodeCollection); - const metadata: DocMetaData = { docHash: doc.hash }; - - if (doc.type === ObjectType.TEXT && doc.sourceNode !== undefined) { - const refDocInfo = (await this.getRefDocInfo( - doc.sourceNode.nodeId, - )) || { - nodeIds: [], - extraInfo: {}, - }; - refDocInfo.nodeIds.push(doc.id_); - if (_.isEmpty(refDocInfo.extraInfo)) { - refDocInfo.extraInfo = {}; - } - await this.kvstore.put( - doc.sourceNode.nodeId, - refDocInfo, - this.refDocCollection, - ); - metadata.refDocId = doc.sourceNode.nodeId!; - } - - await this.kvstore.put(nodeKey, metadata, this.metadataCollection); - } - } - - async getDocument( - docId: string, - raiseError: boolean = true, - ): Promise { - const json = await this.kvstore.get(docId, this.nodeCollection); - if (_.isNil(json)) { - if (raiseError) { - throw new Error(`docId ${docId} not found.`); - } else { - return; - } - } - if (!isValidDocJson(json)) { - throw new Error(`Invalid JSON for docId ${docId}`); - } - return jsonToDoc(json, this.serializer); - } - - async getRefDocInfo(refDocId: string): Promise { - const refDocInfo = await this.kvstore.get(refDocId, this.refDocCollection); - return refDocInfo ? (_.clone(refDocInfo) as RefDocInfo) : undefined; - } - - async getAllRefDocInfo(): Promise | undefined> { - const refDocInfos = await this.kvstore.getAll(this.refDocCollection); - if (_.isNil(refDocInfos)) { - return; - } - return refDocInfos as Record; - } - - async refDocExists(refDocId: string): Promise { - return !_.isNil(await this.getRefDocInfo(refDocId)); - } - - async documentExists(docId: string): Promise { - return !_.isNil(await this.kvstore.get(docId, this.nodeCollection)); - } - - private async removeRefDocNode(docId: string): Promise { - const metadata = await this.kvstore.get(docId, this.metadataCollection); - if (metadata === null) { - return; - } - - const refDocId = metadata.refDocId; - if (_.isNil(refDocId)) { - return; - } - - const refDocInfo = await this.kvstore.get(refDocId, this.refDocCollection); - if (!_.isNil(refDocInfo)) { - if (refDocInfo.nodeIds.length > 0) { - await this.kvstore.put(refDocId, refDocInfo, this.refDocCollection); - } - await this.kvstore.delete(refDocId, this.metadataCollection); - } - } - - async deleteDocument( - docId: string, - raiseError: boolean = true, - removeRefDocNode: boolean = true, - ): Promise { - if (removeRefDocNode) { - await this.removeRefDocNode(docId); - } - - const deleteSuccess = await this.kvstore.delete(docId, this.nodeCollection); - await this.kvstore.delete(docId, this.metadataCollection); - - if (!deleteSuccess && raiseError) { - throw new Error(`doc_id ${docId} not found.`); - } - } - - async deleteRefDoc( - refDocId: string, - raiseError: boolean = true, - ): Promise { - const refDocInfo = await this.getRefDocInfo(refDocId); - if (_.isNil(refDocInfo)) { - if (raiseError) { - throw new Error(`ref_doc_id ${refDocId} not found.`); - } else { - return; - } - } - - for (const docId of refDocInfo.nodeIds) { - await this.deleteDocument(docId, false, false); - } - - await this.kvstore.delete(refDocId, this.metadataCollection); - await this.kvstore.delete(refDocId, this.refDocCollection); - } - - async setDocumentHash(docId: string, docHash: string): Promise { - const metadata = { docHash: docHash }; - await this.kvstore.put(docId, metadata, this.metadataCollection); - } - - async getDocumentHash(docId: string): Promise { - const metadata = await this.kvstore.get(docId, this.metadataCollection); - return _.get(metadata, "docHash"); - } - - async getAllDocumentHashes(): Promise> { - const hashes: Record = {}; - const metadataDocs = await this.kvstore.getAll(this.metadataCollection); - for (const docId in metadataDocs) { - const hash = await this.getDocumentHash(docId); - if (hash) { - hashes[hash] = docId; - } - } - return hashes; - } -} diff --git a/packages/llamaindex/src/storage/docStore/SimpleDocumentStore.ts b/packages/llamaindex/src/storage/docStore/SimpleDocumentStore.ts deleted file mode 100644 index d1fd477600..0000000000 --- a/packages/llamaindex/src/storage/docStore/SimpleDocumentStore.ts +++ /dev/null @@ -1,70 +0,0 @@ -import { - DEFAULT_DOC_STORE_PERSIST_FILENAME, - DEFAULT_NAMESPACE, - DEFAULT_PERSIST_DIR, -} from "@llamaindex/core/global"; -import { path } from "@llamaindex/env"; -import _ from "lodash"; -import { SimpleKVStore } from "../kvStore/SimpleKVStore.js"; -import { BaseInMemoryKVStore } from "../kvStore/types.js"; -import { KVDocumentStore } from "./KVDocumentStore.js"; - -// eslint-disable-next-line @typescript-eslint/no-explicit-any -type SaveDict = Record; - -export class SimpleDocumentStore extends KVDocumentStore { - private kvStore: SimpleKVStore; - - constructor(kvStore?: SimpleKVStore, namespace?: string) { - kvStore = kvStore || new SimpleKVStore(); - namespace = namespace || DEFAULT_NAMESPACE; - super(kvStore, namespace); - this.kvStore = kvStore; - } - - static async fromPersistDir( - persistDir: string = DEFAULT_PERSIST_DIR, - namespace?: string, - ): Promise { - const persistPath = path.join( - persistDir, - DEFAULT_DOC_STORE_PERSIST_FILENAME, - ); - return await SimpleDocumentStore.fromPersistPath(persistPath, namespace); - } - - static async fromPersistPath( - persistPath: string, - namespace?: string, - ): Promise { - const simpleKVStore = await SimpleKVStore.fromPersistPath(persistPath); - return new SimpleDocumentStore(simpleKVStore, namespace); - } - - async persist( - persistPath: string = path.join( - DEFAULT_PERSIST_DIR, - DEFAULT_DOC_STORE_PERSIST_FILENAME, - ), - ): Promise { - if ( - _.isObject(this.kvStore) && - this.kvStore instanceof BaseInMemoryKVStore - ) { - await this.kvStore.persist(persistPath); - } - } - - static fromDict(saveDict: SaveDict, namespace?: string): SimpleDocumentStore { - const simpleKVStore = SimpleKVStore.fromDict(saveDict); - return new SimpleDocumentStore(simpleKVStore, namespace); - } - - toDict(): SaveDict { - if (_.isObject(this.kvStore) && this.kvStore instanceof SimpleKVStore) { - return this.kvStore.toDict(); - } - // If the kvstore is not a SimpleKVStore, you might want to throw an error or return a default value. - throw new Error("KVStore is not a SimpleKVStore"); - } -} diff --git a/packages/llamaindex/src/storage/docStore/types.ts b/packages/llamaindex/src/storage/docStore/types.ts deleted file mode 100644 index a97cb8cbd7..0000000000 --- a/packages/llamaindex/src/storage/docStore/types.ts +++ /dev/null @@ -1,77 +0,0 @@ -import { - DEFAULT_DOC_STORE_PERSIST_FILENAME, - DEFAULT_PERSIST_DIR, -} from "@llamaindex/core/global"; -import { BaseNode } from "@llamaindex/core/schema"; -import { jsonSerializer, type Serializer } from "./utils.js"; - -const defaultPersistPath = `${DEFAULT_PERSIST_DIR}/${DEFAULT_DOC_STORE_PERSIST_FILENAME}`; - -export interface RefDocInfo { - nodeIds: string[]; - // eslint-disable-next-line @typescript-eslint/no-explicit-any - extraInfo: Record; -} - -export abstract class BaseDocumentStore { - // eslint-disable-next-line @typescript-eslint/no-explicit-any - serializer: Serializer = jsonSerializer; - - // Save/load - persist(persistPath: string = defaultPersistPath): void { - // Persist the docstore to a file. - } - - // Main interface - abstract docs(): Promise>; - - abstract addDocuments(docs: BaseNode[], allowUpdate: boolean): Promise; - - abstract getDocument( - docId: string, - raiseError: boolean, - ): Promise; - - abstract deleteDocument(docId: string, raiseError: boolean): Promise; - - abstract documentExists(docId: string): Promise; - - // Hash - abstract setDocumentHash(docId: string, docHash: string): Promise; - - abstract getDocumentHash(docId: string): Promise; - - abstract getAllDocumentHashes(): Promise>; - - // Ref Docs - abstract getAllRefDocInfo(): Promise | undefined>; - - abstract getRefDocInfo(refDocId: string): Promise; - - abstract deleteRefDoc(refDocId: string, raiseError: boolean): Promise; - - // Nodes - getNodes(nodeIds: string[], raiseError: boolean = true): Promise { - return Promise.all( - nodeIds.map((nodeId) => this.getNode(nodeId, raiseError)), - ); - } - - async getNode(nodeId: string, raiseError: boolean = true): Promise { - const doc = await this.getDocument(nodeId, raiseError); - if (!(doc instanceof BaseNode)) { - throw new Error(`Document ${nodeId} is not a Node.`); - } - return doc; - } - - async getNodeDict(nodeIdDict: { - [index: number]: string; - }): Promise> { - const result: Record = {}; - for (const index in nodeIdDict) { - result[index] = await this.getNode(nodeIdDict[index]!); - } - return result; - } -} diff --git a/packages/llamaindex/src/storage/docStore/utils.ts b/packages/llamaindex/src/storage/docStore/utils.ts deleted file mode 100644 index 8d8ee3ec44..0000000000 --- a/packages/llamaindex/src/storage/docStore/utils.ts +++ /dev/null @@ -1,88 +0,0 @@ -import type { BaseNode } from "@llamaindex/core/schema"; -import { Document, ObjectType, TextNode } from "@llamaindex/core/schema"; -import type { StoredValue } from "../kvStore/types.js"; - -const TYPE_KEY = "__type__"; -const DATA_KEY = "__data__"; - -export interface Serializer { - toPersistence(data: Record): T; - fromPersistence(data: T): Record; -} - -export const jsonSerializer: Serializer = { - toPersistence(data) { - return JSON.stringify(data); - }, - fromPersistence(data) { - return JSON.parse(data); - }, -}; - -export const noneSerializer: Serializer> = { - toPersistence(data) { - return data; - }, - fromPersistence(data) { - return data; - }, -}; - -type DocJson = { - [TYPE_KEY]: ObjectType; - [DATA_KEY]: Data; -}; - -export function isValidDocJson( - docJson: StoredValue | null | undefined, -): docJson is DocJson { - return ( - typeof docJson === "object" && - docJson !== null && - docJson[TYPE_KEY] !== undefined && - docJson[DATA_KEY] !== undefined - ); -} - -export function docToJson( - doc: BaseNode, - serializer: Serializer, -): DocJson { - return { - [DATA_KEY]: serializer.toPersistence(doc.toJSON()), - [TYPE_KEY]: doc.type, - }; -} - -export function jsonToDoc( - docDict: DocJson, - serializer: Serializer, -): BaseNode { - const docType = docDict[TYPE_KEY]; - // fixme: zod type check this - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const dataDict: any = serializer.fromPersistence(docDict[DATA_KEY]); - let doc: BaseNode; - - if (docType === ObjectType.DOCUMENT) { - doc = new Document({ - text: dataDict.text, - id_: dataDict.id_, - embedding: dataDict.embedding, - hash: dataDict.hash, - metadata: dataDict.metadata, - }); - } else if (docType === ObjectType.TEXT) { - doc = new TextNode({ - text: dataDict.text, - id_: dataDict.id_, - hash: dataDict.hash, - metadata: dataDict.metadata, - relationships: dataDict.relationships, - }); - } else { - throw new Error(`Unknown doc type: ${docType}`); - } - - return doc; -} diff --git a/packages/llamaindex/src/storage/indexStore/KVIndexStore.ts b/packages/llamaindex/src/storage/indexStore/KVIndexStore.ts deleted file mode 100644 index 39a4879c04..0000000000 --- a/packages/llamaindex/src/storage/indexStore/KVIndexStore.ts +++ /dev/null @@ -1,48 +0,0 @@ -import { DEFAULT_NAMESPACE } from "@llamaindex/core/global"; -import _ from "lodash"; -import type { IndexStruct } from "../../indices/IndexStruct.js"; -import { jsonToIndexStruct } from "../../indices/json-to-index-struct.js"; -import type { BaseKVStore } from "../kvStore/types.js"; -import { BaseIndexStore } from "./types.js"; - -export class KVIndexStore extends BaseIndexStore { - private _kvStore: BaseKVStore; - private _collection: string; - - constructor(kvStore: BaseKVStore, namespace: string = DEFAULT_NAMESPACE) { - super(); - this._kvStore = kvStore; - this._collection = `${namespace}/data`; - } - - async addIndexStruct(indexStruct: IndexStruct): Promise { - const key = indexStruct.indexId; - const data = indexStruct.toJson(); - await this._kvStore.put(key, data, this._collection); - } - - async deleteIndexStruct(key: string): Promise { - await this._kvStore.delete(key, this._collection); - } - - async getIndexStruct(structId?: string): Promise { - if (_.isNil(structId)) { - const structs = await this.getIndexStructs(); - if (structs.length !== 1) { - throw new Error("More than one index struct found"); - } - return structs[0]; - } else { - const json = await this._kvStore.get(structId, this._collection); - if (_.isNil(json)) { - return; - } - return jsonToIndexStruct(json); - } - } - - async getIndexStructs(): Promise { - const jsons = await this._kvStore.getAll(this._collection); - return _.values(jsons).map((json) => jsonToIndexStruct(json)); - } -} diff --git a/packages/llamaindex/src/storage/indexStore/SimpleIndexStore.ts b/packages/llamaindex/src/storage/indexStore/SimpleIndexStore.ts deleted file mode 100644 index 516024be54..0000000000 --- a/packages/llamaindex/src/storage/indexStore/SimpleIndexStore.ts +++ /dev/null @@ -1,50 +0,0 @@ -import { - DEFAULT_INDEX_STORE_PERSIST_FILENAME, - DEFAULT_PERSIST_DIR, -} from "@llamaindex/core/global"; -import { path } from "@llamaindex/env"; -import type { DataType } from "../kvStore/SimpleKVStore.js"; -import { SimpleKVStore } from "../kvStore/SimpleKVStore.js"; -import type { BaseInMemoryKVStore } from "../kvStore/types.js"; -import { KVIndexStore } from "./KVIndexStore.js"; - -export class SimpleIndexStore extends KVIndexStore { - private kvStore: BaseInMemoryKVStore; - - constructor(kvStore?: BaseInMemoryKVStore) { - kvStore = kvStore || new SimpleKVStore(); - super(kvStore); - this.kvStore = kvStore; - } - - static async fromPersistDir( - persistDir: string = DEFAULT_PERSIST_DIR, - ): Promise { - const persistPath = path.join( - persistDir, - DEFAULT_INDEX_STORE_PERSIST_FILENAME, - ); - return this.fromPersistPath(persistPath); - } - - static async fromPersistPath(persistPath: string): Promise { - const simpleKVStore = await SimpleKVStore.fromPersistPath(persistPath); - return new SimpleIndexStore(simpleKVStore); - } - - async persist(persistPath: string = DEFAULT_PERSIST_DIR): Promise { - this.kvStore.persist(persistPath); - } - - static fromDict(saveDict: DataType): SimpleIndexStore { - const simpleKVStore = SimpleKVStore.fromDict(saveDict); - return new SimpleIndexStore(simpleKVStore); - } - - toDict(): Record { - if (!(this.kvStore instanceof SimpleKVStore)) { - throw new Error("KVStore is not a SimpleKVStore"); - } - return this.kvStore.toDict(); - } -} diff --git a/packages/llamaindex/src/storage/indexStore/types.ts b/packages/llamaindex/src/storage/indexStore/types.ts deleted file mode 100644 index 2cab6aa77f..0000000000 --- a/packages/llamaindex/src/storage/indexStore/types.ts +++ /dev/null @@ -1,25 +0,0 @@ -import { - DEFAULT_INDEX_STORE_PERSIST_FILENAME, - DEFAULT_PERSIST_DIR, -} from "@llamaindex/core/global"; -import { path } from "@llamaindex/env"; -import type { IndexStruct } from "../../indices/IndexStruct.js"; - -const defaultPersistPath = path.join( - DEFAULT_PERSIST_DIR, - DEFAULT_INDEX_STORE_PERSIST_FILENAME, -); - -export abstract class BaseIndexStore { - abstract getIndexStructs(): Promise; - - abstract addIndexStruct(indexStruct: IndexStruct): Promise; - - abstract deleteIndexStruct(key: string): Promise; - - abstract getIndexStruct(structId?: string): Promise; - - async persist(persistPath: string = defaultPersistPath): Promise { - // Persist the index store to disk. - } -} diff --git a/packages/llamaindex/src/storage/kvStore/SimpleKVStore.ts b/packages/llamaindex/src/storage/kvStore/SimpleKVStore.ts deleted file mode 100644 index 7d08692b4d..0000000000 --- a/packages/llamaindex/src/storage/kvStore/SimpleKVStore.ts +++ /dev/null @@ -1,100 +0,0 @@ -import { DEFAULT_COLLECTION } from "@llamaindex/core/global"; -import { fs, path } from "@llamaindex/env"; -import { exists } from "../FileSystem.js"; -import { BaseKVStore, type StoredValue } from "./types.js"; - -export type DataType = Record>; - -export class SimpleKVStore extends BaseKVStore { - private persistPath: string | undefined; - - constructor(private data: DataType = {}) { - super(); - } - - async put( - key: string, - val: StoredValue, - collection: string = DEFAULT_COLLECTION, - ): Promise { - if (!(collection in this.data)) { - this.data[collection] = {}; - } - this.data[collection]![key] = structuredClone(val); // Creating a shallow copy of the object - - if (this.persistPath) { - await this.persist(this.persistPath); - } - } - - async get( - key: string, - collection: string = DEFAULT_COLLECTION, - ): Promise { - const collectionData = this.data[collection]; - if (collectionData == null) { - return null; - } - if (!(key in collectionData)) { - return null; - } - return structuredClone(collectionData[key]) as StoredValue; // Creating a shallow copy of the object - } - - async getAll(collection: string = DEFAULT_COLLECTION) { - // fixme: null value here - return structuredClone(this.data[collection])!; // Creating a shallow copy of the object - } - - async delete( - key: string, - collection: string = DEFAULT_COLLECTION, - ): Promise { - if (key in this.data[collection]!) { - delete this.data[collection]![key]; - if (this.persistPath) { - await this.persist(this.persistPath); - } - return true; - } - return false; - } - - async persist(persistPath: string): Promise { - // TODO: decide on a way to polyfill path - const dirPath = path.dirname(persistPath); - if (!(await exists(dirPath))) { - await fs.mkdir(dirPath); - } - await fs.writeFile(persistPath, JSON.stringify(this.data)); - } - - static async fromPersistPath(persistPath: string): Promise { - const dirPath = path.dirname(persistPath); - if (!(await exists(dirPath))) { - await fs.mkdir(dirPath); - } - - let data: DataType = {}; - try { - const fileData = await fs.readFile(persistPath); - data = JSON.parse(fileData.toString()); - } catch (e) { - console.error( - `No valid data found at path: ${persistPath} starting new store.`, - ); - } - - const store = new SimpleKVStore(data); - store.persistPath = persistPath; - return store; - } - - toDict(): DataType { - return this.data; - } - - static fromDict(saveDict: DataType): SimpleKVStore { - return new SimpleKVStore(saveDict); - } -} diff --git a/packages/llamaindex/src/storage/kvStore/types.ts b/packages/llamaindex/src/storage/kvStore/types.ts deleted file mode 100644 index e8c6900d60..0000000000 --- a/packages/llamaindex/src/storage/kvStore/types.ts +++ /dev/null @@ -1,23 +0,0 @@ -const defaultCollection = "data"; - -// fixme: remove any -// eslint-disable-next-line @typescript-eslint/no-explicit-any -export type StoredValue = Record | null; - -export abstract class BaseKVStore { - abstract put( - key: string, - val: StoredValue, - collection?: string, - ): Promise; - abstract get(key: string, collection?: string): Promise; - abstract getAll(collection?: string): Promise>; - abstract delete(key: string, collection?: string): Promise; -} - -export abstract class BaseInMemoryKVStore extends BaseKVStore { - abstract persist(persistPath: string): void; - static fromPersistPath(persistPath: string): BaseInMemoryKVStore { - throw new Error("Method not implemented."); - } -} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 46bfc0330e..57ea89edb8 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -4899,8 +4899,8 @@ packages: peerDependencies: '@redis/client': ^1.0.0 - '@redis/client@1.5.17': - resolution: {integrity: sha512-IPvU9A31qRCZ7lds/x+ksuK/UMndd0EASveAvCvEtFFKIZjZ+m/a4a0L7S28KEWoR5ka8526hlSghDo4Hrc2Hg==} + '@redis/client@1.6.0': + resolution: {integrity: sha512-aR0uffYI700OEEH4gYnitAnv3vzVGXCFvYfdpu/CJKvk4pHfLPEy/JSZyrpQ+15WhXe1yJRXLtfQ84s4mEXnPg==} engines: {node: '>=14'} '@redis/graph@1.1.1': @@ -4908,18 +4908,18 @@ packages: peerDependencies: '@redis/client': ^1.0.0 - '@redis/json@1.0.6': - resolution: {integrity: sha512-rcZO3bfQbm2zPRpqo82XbW8zg4G/w4W3tI7X8Mqleq9goQjAGLL7q/1n1ZX4dXEAmORVZ4s1+uKLaUOg7LrUhw==} + '@redis/json@1.0.7': + resolution: {integrity: sha512-6UyXfjVaTBTJtKNG4/9Z8PSpKE6XgSyEb8iwaqDcy+uKrd/DGYHTWkUdnQDyzm727V7p21WUMhsqz5oy65kPcQ==} peerDependencies: '@redis/client': ^1.0.0 - '@redis/search@1.1.6': - resolution: {integrity: sha512-mZXCxbTYKBQ3M2lZnEddwEAks0Kc7nauire8q20oA0oA/LoA+E/b5Y5KZn232ztPb1FkIGqo12vh3Lf+Vw5iTw==} + '@redis/search@1.2.0': + resolution: {integrity: sha512-tYoDBbtqOVigEDMAcTGsRlMycIIjwMCgD8eR2t0NANeQmgK/lvxNAvYyb6bZDD4frHRhIHkJu2TBRvB0ERkOmw==} peerDependencies: '@redis/client': ^1.0.0 - '@redis/time-series@1.0.5': - resolution: {integrity: sha512-IFjIgTusQym2B5IZJG3XKr5llka7ey84fw/NOYqESP5WUfQs9zz1ww/9+qoz4ka/S6KcGBodzlCeZ5UImKbscg==} + '@redis/time-series@1.1.0': + resolution: {integrity: sha512-c1Q99M5ljsIuc4YdaCwfUEXsofakb9c8+Zse2qxTadu8TalLXuAESzLvFAvNVbkmSlvlzIQOLpBCmWI9wTOt+g==} peerDependencies: '@redis/client': ^1.0.0 @@ -11415,8 +11415,8 @@ packages: resolution: {integrity: sha512-8HrF5ZsXk5FAH9dgsx3BlUer73nIhuj+9OrQwEbLTPOBzGkL1lsFCR01am+v+0m2Cmbs1nP12hLDl5FA7EszKA==} engines: {node: '>=6.0.0'} - redis@4.6.15: - resolution: {integrity: sha512-2NtuOpMW3tnYzBw6S8mbXSX7RPzvVFCA2wFJq9oErushO2UeBkxObk+uvo7gv7n0rhWeOj/IzrHO8TjcFlRSOg==} + redis@4.7.0: + resolution: {integrity: sha512-zvmkHEAdGMn+hMRXuMBtu4Vo5P6rHQjLoHftu+lBqq8ZTA3RCVC/WzD790bkKKiNFp7d5/9PcSD19fJyyRvOdQ==} reflect.getprototypeof@1.0.6: resolution: {integrity: sha512-fmfw4XgoDke3kdI6h4xcUz1dG8uaiv5q9gcEwLS4Pnth2kxT+GZ7YehS1JTMGBQmtV7Y4GFGbs2re2NqhdozUg==} @@ -18685,31 +18685,31 @@ snapshots: react: 18.3.1 react-dom: 18.3.1(react@18.3.1) - '@redis/bloom@1.2.0(@redis/client@1.5.17)': + '@redis/bloom@1.2.0(@redis/client@1.6.0)': dependencies: - '@redis/client': 1.5.17 + '@redis/client': 1.6.0 - '@redis/client@1.5.17': + '@redis/client@1.6.0': dependencies: cluster-key-slot: 1.1.2 generic-pool: 3.9.0 yallist: 4.0.0 - '@redis/graph@1.1.1(@redis/client@1.5.17)': + '@redis/graph@1.1.1(@redis/client@1.6.0)': dependencies: - '@redis/client': 1.5.17 + '@redis/client': 1.6.0 - '@redis/json@1.0.6(@redis/client@1.5.17)': + '@redis/json@1.0.7(@redis/client@1.6.0)': dependencies: - '@redis/client': 1.5.17 + '@redis/client': 1.6.0 - '@redis/search@1.1.6(@redis/client@1.5.17)': + '@redis/search@1.2.0(@redis/client@1.6.0)': dependencies: - '@redis/client': 1.5.17 + '@redis/client': 1.6.0 - '@redis/time-series@1.0.5(@redis/client@1.5.17)': + '@redis/time-series@1.1.0(@redis/client@1.6.0)': dependencies: - '@redis/client': 1.5.17 + '@redis/client': 1.6.0 '@rollup/plugin-commonjs@28.0.1(rollup@4.24.0)': dependencies: @@ -25146,7 +25146,7 @@ snapshots: memjs: 1.3.2 mongoose: 8.5.1(@aws-sdk/credential-providers@3.675.0) pg: 8.12.0 - redis: 4.6.15 + redis: 4.7.0 safe-stable-stringify: 2.5.0 stopwords-iso: 1.1.0 sylvester: 0.0.12 @@ -26715,14 +26715,14 @@ snapshots: dependencies: minimatch: 3.1.2 - redis@4.6.15: + redis@4.7.0: dependencies: - '@redis/bloom': 1.2.0(@redis/client@1.5.17) - '@redis/client': 1.5.17 - '@redis/graph': 1.1.1(@redis/client@1.5.17) - '@redis/json': 1.0.6(@redis/client@1.5.17) - '@redis/search': 1.1.6(@redis/client@1.5.17) - '@redis/time-series': 1.0.5(@redis/client@1.5.17) + '@redis/bloom': 1.2.0(@redis/client@1.6.0) + '@redis/client': 1.6.0 + '@redis/graph': 1.1.1(@redis/client@1.6.0) + '@redis/json': 1.0.7(@redis/client@1.6.0) + '@redis/search': 1.2.0(@redis/client@1.6.0) + '@redis/time-series': 1.1.0(@redis/client@1.6.0) reflect.getprototypeof@1.0.6: dependencies: