Skip to content

Commit

Permalink
refactor: todo
Browse files Browse the repository at this point in the history
  • Loading branch information
himself65 committed Oct 31, 2024
1 parent 52a4d2b commit e58ae37
Show file tree
Hide file tree
Showing 23 changed files with 449 additions and 748 deletions.
28 changes: 28 additions & 0 deletions packages/core/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,34 @@
"default": "./storage/chat-store/dist/index.js"
}
},
"./storage/docstore": {
"require": {
"types": "./storage/docstore/dist/index.d.cts",
"default": "./storage/docstore/dist/index.cjs"
},
"import": {
"types": "./storage/docstore/dist/index.d.ts",
"default": "./storage/docstore/dist/index.js"
},
"default": {
"types": "./storage/docstore/dist/index.d.ts",
"default": "./storage/docstore/dist/index.js"
}
},
"./storage/kvstore": {
"require": {
"types": "./storage/kvstore/dist/index.d.cts",
"default": "./storage/kvstore/dist/index.cjs"
},
"import": {
"types": "./storage/kvstore/dist/index.d.ts",
"default": "./storage/kvstore/dist/index.js"
},
"default": {
"types": "./storage/kvstore/dist/index.d.ts",
"default": "./storage/kvstore/dist/index.js"
}
},
"./response-synthesizers": {
"require": {
"types": "./response-synthesizers/dist/index.d.cts",
Expand Down
15 changes: 12 additions & 3 deletions packages/core/src/global/constants.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,12 @@
import { path } from "@llamaindex/env";
export {
DEFAULT_PERSIST_FNAME,
DEFAULT_PERSIST_DIR,
DEFAULT_PERSIST_PATH,
DEFAULT_METADATA_COLLECTION_SUFFIX,
DEFAULT_COLLECTION_DATA_SUFFIX,
DEFAULT_NAMESPACE,
DEFAULT_REF_DOC_COLLECTION_SUFFIX
} from '../storage/docstore/index.js';

//#region llm
export const DEFAULT_CONTEXT_WINDOW = 3900;
Expand All @@ -10,15 +18,16 @@ export const DEFAULT_PADDING = 5;
//#endregion
//#region storage
export const DEFAULT_COLLECTION = "data";
export const DEFAULT_PERSIST_DIR = path.join("./storage");
export const DEFAULT_INDEX_STORE_PERSIST_FILENAME = "index_store.json";
export const DEFAULT_DOC_STORE_PERSIST_FILENAME = "doc_store.json";
export const DEFAULT_VECTOR_STORE_PERSIST_FILENAME = "vector_store.json";
export const DEFAULT_GRAPH_STORE_PERSIST_FILENAME = "graph_store.json";
export const DEFAULT_NAMESPACE = "docstore";
export const DEFAULT_IMAGE_VECTOR_NAMESPACE = "images";
//#endregion
//#region llama cloud
export const DEFAULT_PROJECT_NAME = "Default";
export const DEFAULT_BASE_URL = "https://api.cloud.llamaindex.ai";
//#endregion
//#region vector store
export const DEFAULT_BATCH_SIZE = 100;
//#endregion
1 change: 1 addition & 0 deletions packages/core/src/schema/node.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { createSHA256, path, randomUUID } from "@llamaindex/env";
import { lazyInitHash } from "../decorator";
import { chunkSizeCheck } from "./utils/chunk-size-check";
import { z } from 'zod'

export enum NodeRelationship {
SOURCE = "SOURCE",
Expand Down
3 changes: 3 additions & 0 deletions packages/core/src/storage/docstore/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
export { DEFAULT_PERSIST_PATH, DEFAULT_PERSIST_DIR, DEFAULT_PERSIST_FNAME} from './types'

export { DEFAULT_METADATA_COLLECTION_SUFFIX, DEFAULT_COLLECTION_DATA_SUFFIX, DEFAULT_NAMESPACE, DEFAULT_REF_DOC_COLLECTION_SUFFIX} from './kv-document-store'
80 changes: 80 additions & 0 deletions packages/core/src/storage/docstore/kv-document-store.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import { BaseDocumentStore } from './types';
import { BaseNode } from '../../schema';
import type { BaseKVStore } from '../kvstore';
import { DEFAULT_BATCH_SIZE } from '../../global';
import { path } from '@llamaindex/env';
import { type DocJson, jsonToDoc } from './utils';

// The default namespace prefix for the document store.
export const DEFAULT_NAMESPACE = 'docstore';
// The nodes collection contains the content of each node, along with metadata specific
// to each node, including associated attributes like excluded metadata and relationships.
export const DEFAULT_COLLECTION_DATA_SUFFIX = '/data';
// Contains mappings from each document to the list of node IDs that belong to it
// including the document's metadata.
export const DEFAULT_REF_DOC_COLLECTION_SUFFIX = '/ref_doc_info';
// Contains references from each node to its corresponding document,
// including the node's document hash and reference document ID.
export const DEFAULT_METADATA_COLLECTION_SUFFIX = '/metadata';

export class KVDocumentStore extends BaseDocumentStore {
private kvStore: BaseKVStore<DocJson<unknown>>;
#namespace: string;
#nodeCollectionSuffix: string;
#refDocCollectionSuffix: string;
#metadataCollectionSuffix: string;
#nodeCollection: string;
#refDocCollection: string;
#metadataCollection: string;
#batchSize: number;

constructor (
kvStore: BaseKVStore<DocJson>,
namespace: string = DEFAULT_NAMESPACE,
batchSize: number = DEFAULT_BATCH_SIZE,
nodeCollectionSuffix: string = DEFAULT_COLLECTION_DATA_SUFFIX,
refDocCollectionSuffix: string = DEFAULT_REF_DOC_COLLECTION_SUFFIX,
metadataCollectionSuffix: string = DEFAULT_METADATA_COLLECTION_SUFFIX
) {
super();
this.kvStore = kvStore;
this.#namespace = namespace;
this.#nodeCollectionSuffix = nodeCollectionSuffix;
this.#refDocCollectionSuffix = refDocCollectionSuffix;
this.#metadataCollectionSuffix = metadataCollectionSuffix;
this.#nodeCollection = path.join(this.#namespace,
this.#nodeCollectionSuffix);
this.#refDocCollection = path.join(this.#namespace,
this.#refDocCollectionSuffix);
this.#metadataCollection = path.join(this.#namespace,
this.#metadataCollectionSuffix);
this.#batchSize = batchSize;
}

get docs (): Promise<Map<string, BaseNode>> {
return this.kvStore.getAll(this.#nodeCollection).then(jsonDict => {
const docs = new Map<string, BaseNode>();
for (const [key, json] of Object.entries(jsonDict)) {
docs.set(key, jsonToDoc(json, this.serializer));
}
return docs;
});
}

#prepareKVPair(
key: string,
val: DocJson<unknown>,
collection: string | undefined
) {

}

async addDocuments (
docs: BaseNode[], allowUpdate: boolean,
batchSize?: number,
storeText?: boolean
) {
batchSize = batchSize || this.#batchSize;

}
}
85 changes: 85 additions & 0 deletions packages/core/src/storage/docstore/types.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import { path } from '@llamaindex/env';
import { BaseNode } from '../../schema';
import { jsonSerializer, type Serializer } from './utils';

export const DEFAULT_PERSIST_FNAME = 'docstore.json';
export const DEFAULT_PERSIST_DIR = './storage';
export const DEFAULT_PERSIST_PATH = path.join(DEFAULT_PERSIST_DIR,
DEFAULT_PERSIST_FNAME);

type RefDocInfo<ExtraInfo extends Record<string, unknown>> = {
nodeIds: string[]
extraInfo: ExtraInfo
}

export abstract class BaseDocumentStore {
serializer: Serializer<
Record<string, unknown>,
// we don't care about what's the target type of the serialization, so we use any here
// eslint-disable-next-line @typescript-eslint/no-explicit-any
any
> = jsonSerializer;

abstract persist (
persistPath: string
): Promise<void>

abstract get docs (): Promise<Map<string, BaseNode>>

abstract addDocuments (
docs: BaseNode[],
allowUpdate: boolean,
batchSize: number,
storeText: boolean
): Promise<void>

abstract getDocument (
docId: string,
raiseError: boolean
): Promise<BaseNode | undefined>

abstract deleteDocument (
docId: string,
raiseError: boolean
): Promise<void>

abstract documentExists (
docId: string
): Promise<boolean>

abstract setDocumentHash (
docId: string,
docHash: string
): Promise<void>

abstract getDocumentHash (
docId: string
): Promise<string | undefined>

abstract getAllDocumentHashes (): Promise<Map<string, string>>

abstract getAllRefDocInfo (): Promise<Map<string, RefDocInfo<Record<string, unknown>> | undefined>>

abstract getRefDocInfo (
refDocId: string
): Promise<RefDocInfo<Record<string, unknown>> | undefined>

abstract deleteRefDoc (
refDocId: string,
raiseError: boolean
): Promise<void>

abstract getNodes (
nodeIds: string[],
raiseError: boolean
): Promise<BaseNode[]>

abstract getNode (
nodeId: string,
raiseError: boolean
): Promise<BaseNode>

abstract getNodeDict (
nodeIdDict: Record<number, string>
): Promise<Record<number, BaseNode>>
}
99 changes: 99 additions & 0 deletions packages/core/src/storage/docstore/utils.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
import type { BaseNode } from "../../schema";
import { Document, ObjectType, TextNode } from "../../schema";
import { ImageDocument, MetadataMode } from '../../schema';
import type { SerializableValue } from '../kvstore';

const TYPE_KEY = "__type__";
const DATA_KEY = "__data__";

export interface Serializer<Data, Persistence> {
toPersistence(data: Data): Persistence;
fromPersistence(data: Persistence): Data
}

export const jsonSerializer: Serializer<Record<string, unknown>, string> = {
toPersistence(data) {
return JSON.stringify(data);
},
fromPersistence(data) {
return JSON.parse(data);
},
};

export const noneSerializer: Serializer<Record<string, unknown>, Record<string, unknown>> = {
toPersistence(data) {
return data;
},
fromPersistence(data) {
return data;
},
};

type DocJson = {
[TYPE_KEY]: ObjectType;
[DATA_KEY]: Record<string, unknown>; // from BaseNode, todo: add zod type check here
};

export function isValidDocJson(
docJson: SerializableValue,
): docJson is DocJson {
return (
typeof docJson === "object" &&
docJson !== null &&
TYPE_KEY in docJson &&
DATA_KEY in docJson
);
}

export function docToJson(
doc: BaseNode,
serializer: Serializer<BaseNode, Record<string, unknown>>,
): DocJson {
return {
[DATA_KEY]: serializer.toPersistence(doc),
[TYPE_KEY]: doc.type,
};
}

export function jsonToDoc(
docDict: DocJson,
serializer: Pick<Serializer<Record<string, unknown>, unknown>, 'fromPersistence'>,
): BaseNode {
const docType = docDict[TYPE_KEY];

// fixme: add zod type check here
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const dataDict: Record<string, any> = serializer.fromPersistence(docDict[DATA_KEY]);
let doc: BaseNode;

if (docType === ObjectType.DOCUMENT) {
doc = new Document({
text: dataDict.text,
id_: dataDict.id_,
embedding: dataDict.embedding,
hash: dataDict.hash,
metadata: dataDict.metadata,
});
} else if (docType === ObjectType.TEXT) {
doc = new TextNode({
text: dataDict.text,
id_: dataDict.id_,
hash: dataDict.hash,
metadata: dataDict.metadata,
relationships: dataDict.relationships,
});
} else if (docType === ObjectType.IMAGE_DOCUMENT) {
doc = new ImageDocument({
image: dataDict.image,
id_: dataDict.id_,
embedding: dataDict.embedding,
hash: dataDict.hash,
metadata: dataDict.metadata,
});
} else {
throw new Error(`Unknown doc type: ${docType}`);
}

return doc;
}

6 changes: 6 additions & 0 deletions packages/core/src/storage/kvstore/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
export {
type SerializableValue,
BaseKVStore,
BaseFileSystemKVStore
} from './types';
export { SimpleKVStore } from './simple-kv-store';
Loading

0 comments on commit e58ae37

Please sign in to comment.