-
Notifications
You must be signed in to change notification settings - Fork 0
/
import.py
75 lines (65 loc) · 2.65 KB
/
import.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
from loguru import logger
import json
import weaviate
from weaviate.classes.config import Property, DataType, Configure, ReferenceProperty
from weaviate.classes.query import Filter, QueryReference
from weaviate.classes.data import DataReference
from weaviate.util import generate_uuid5
from weaviate.collections.classes.config import ConsistencyLevel
def import_books():
with weaviate.connect_to_local() as client:
_clear_schema(client)
_create_schema(client)
_import_data(client)
logger.success("successfully imported data")
def _clear_schema(client: weaviate.WeaviateClient):
logger.info("clear schema")
client.collections.delete_all()
def _create_schema(client: weaviate.WeaviateClient):
logger.info("create Books collection")
books = client.collections.create(
name="Books",
properties=[
Property(name="uuid", data_type=DataType.UUID),
Property(name="author", data_type=DataType.TEXT),
Property(name="title", data_type=DataType.TEXT),
Property(name="description", data_type=DataType.TEXT),
Property(name="genre", data_type=DataType.TEXT),
Property(name="page_count", data_type=DataType.INT),
],
vectorizer_config=[
Configure.NamedVectors.text2vec_transformers(
name="english",
source_properties=["description"],
inference_url="http://t2v-transformers-mixedbread-ai-mxbai-embed:8080",
vectorize_collection_name=False,
vector_index_config=Configure.VectorIndex.hnsw(
quantizer=Configure.VectorIndex.Quantizer.pq(),
),
),
Configure.NamedVectors.text2vec_transformers(
name="multi_lang",
source_properties=["description"],
vectorize_collection_name=False,
vector_index_config=Configure.VectorIndex.flat(),
),
],
generative_config=Configure.Generative.ollama(
api_endpoint="http://generative-ollama:11434",
model="aya-expanse:8b",
)
)
assert books is not None
assert books.name == "Books"
def _import_data(client: weaviate.WeaviateClient):
books_json = "data/books.json"
with open(books_json) as f:
books = json.load(f)
logger.info("import {} books", len(books))
collection = client.collections.get("Books")
with collection.batch.dynamic() as batch:
for book in books:
batch.add_object(properties=book, uuid=book["uuid"])
batch.flush()
if __name__ == "__main__":
import_books()