-
Notifications
You must be signed in to change notification settings - Fork 79
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1171 from weaviate/1.26/fix-updating-quantizers
Fix updating quantizers between `pq`, `bq`, and `sq` with `hnsw` index
- Loading branch information
Showing
5 changed files
with
290 additions
and
22 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
from typing import Literal, Optional | ||
|
||
|
||
def multi_vector_schema(quantizer: Optional[Literal["pq", "bq", "sq"]] = None) -> dict: | ||
return { | ||
"class": "Something", | ||
"invertedIndexConfig": { | ||
"bm25": {"b": 0.75, "k1": 1.2}, | ||
"cleanupIntervalSeconds": 60, | ||
"stopwords": {"additions": None, "preset": "en", "removals": None}, | ||
}, | ||
"multiTenancyConfig": { | ||
"autoTenantActivation": False, | ||
"autoTenantCreation": False, | ||
"enabled": False, | ||
}, | ||
"properties": [ | ||
{ | ||
"dataType": ["text"], | ||
"indexFilterable": True, | ||
"indexRangeFilters": False, | ||
"indexSearchable": True, | ||
"name": "name", | ||
"tokenization": "word", | ||
} | ||
], | ||
"replicationConfig": {"asyncEnabled": False, "factor": 1}, | ||
"shardingConfig": { | ||
"virtualPerPhysical": 128, | ||
"desiredCount": 1, | ||
"actualCount": 1, | ||
"desiredVirtualCount": 128, | ||
"actualVirtualCount": 128, | ||
"key": "_id", | ||
"strategy": "hash", | ||
"function": "murmur3", | ||
}, | ||
"vectorConfig": { | ||
"boi": { | ||
"vectorIndexConfig": { | ||
"skip": False, | ||
"cleanupIntervalSeconds": 300, | ||
"maxConnections": 32, | ||
"efConstruction": 128, | ||
"ef": -1, | ||
"dynamicEfMin": 100, | ||
"dynamicEfMax": 500, | ||
"dynamicEfFactor": 8, | ||
"vectorCacheMaxObjects": 1000000000000, | ||
"flatSearchCutoff": 40000, | ||
"distance": "cosine", | ||
"pq": { | ||
"enabled": quantizer == "pq", | ||
"bitCompression": False, | ||
"segments": 0, | ||
"centroids": 256, | ||
"trainingLimit": 100000, | ||
"encoder": {"type": "kmeans", "distribution": "log-normal"}, | ||
}, | ||
"bq": {"enabled": quantizer == "bq"}, | ||
"sq": { | ||
"enabled": quantizer == "sq", | ||
"trainingLimit": 100000, | ||
"rescoreLimit": 20, | ||
}, | ||
}, | ||
"vectorIndexType": "hnsw", | ||
"vectorizer": {"none": {}}, | ||
}, | ||
"yeh": { | ||
"vectorIndexConfig": { | ||
"skip": False, | ||
"cleanupIntervalSeconds": 300, | ||
"maxConnections": 32, | ||
"efConstruction": 128, | ||
"ef": -1, | ||
"dynamicEfMin": 100, | ||
"dynamicEfMax": 500, | ||
"dynamicEfFactor": 8, | ||
"vectorCacheMaxObjects": 1000000000000, | ||
"flatSearchCutoff": 40000, | ||
"distance": "cosine", | ||
"pq": { | ||
"enabled": quantizer == "pq", | ||
"bitCompression": False, | ||
"segments": 0, | ||
"centroids": 256, | ||
"trainingLimit": 100000, | ||
"encoder": {"type": "kmeans", "distribution": "log-normal"}, | ||
}, | ||
"bq": {"enabled": quantizer == "bq"}, | ||
"sq": { | ||
"enabled": quantizer == "sq", | ||
"trainingLimit": 100000, | ||
"rescoreLimit": 20, | ||
}, | ||
}, | ||
"vectorIndexType": "hnsw", | ||
"vectorizer": {"none": {}}, | ||
}, | ||
}, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
import pytest | ||
|
||
from test.collection.schema import multi_vector_schema | ||
from weaviate.collections.classes.config import _CollectionConfigUpdate, Reconfigure | ||
from weaviate.exceptions import WeaviateInvalidInputError | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"schema,should_error", | ||
[ | ||
(multi_vector_schema(), False), | ||
(multi_vector_schema("bq"), True), | ||
(multi_vector_schema("sq"), True), | ||
], | ||
) | ||
def test_enabling_pq_multi_vector(schema: dict, should_error: bool) -> None: | ||
update = _CollectionConfigUpdate( | ||
vectorizer_config=[ | ||
Reconfigure.NamedVectors.update( | ||
name="boi", | ||
vector_index_config=Reconfigure.VectorIndex.hnsw( | ||
quantizer=Reconfigure.VectorIndex.Quantizer.pq() | ||
), | ||
) | ||
] | ||
) | ||
if should_error: | ||
with pytest.raises(WeaviateInvalidInputError): | ||
update.merge_with_existing(schema) | ||
return | ||
|
||
new_schema = update.merge_with_existing(schema) | ||
|
||
assert new_schema["vectorConfig"]["boi"]["vectorIndexConfig"]["pq"]["enabled"] | ||
assert not new_schema["vectorConfig"]["boi"]["vectorIndexConfig"]["bq"]["enabled"] | ||
assert not new_schema["vectorConfig"]["boi"]["vectorIndexConfig"]["sq"]["enabled"] | ||
|
||
assert new_schema["vectorConfig"]["yeh"] == schema["vectorConfig"]["yeh"] | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"schema,should_error", | ||
[ | ||
(multi_vector_schema(), False), | ||
(multi_vector_schema("pq"), True), | ||
(multi_vector_schema("sq"), True), | ||
], | ||
) | ||
def test_enabling_bq_multi_vector(schema: dict, should_error: bool) -> None: | ||
update = _CollectionConfigUpdate( | ||
vectorizer_config=[ | ||
Reconfigure.NamedVectors.update( | ||
name="boi", | ||
vector_index_config=Reconfigure.VectorIndex.hnsw( | ||
quantizer=Reconfigure.VectorIndex.Quantizer.bq() | ||
), | ||
) | ||
] | ||
) | ||
if should_error: | ||
with pytest.raises(WeaviateInvalidInputError): | ||
update.merge_with_existing(schema) | ||
return | ||
|
||
new_schema = update.merge_with_existing(schema) | ||
|
||
assert not new_schema["vectorConfig"]["boi"]["vectorIndexConfig"]["pq"]["enabled"] | ||
assert new_schema["vectorConfig"]["boi"]["vectorIndexConfig"]["bq"]["enabled"] | ||
assert not new_schema["vectorConfig"]["boi"]["vectorIndexConfig"]["sq"]["enabled"] | ||
|
||
assert new_schema["vectorConfig"]["yeh"] == schema["vectorConfig"]["yeh"] | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"schema,should_error", | ||
[ | ||
(multi_vector_schema(), False), | ||
(multi_vector_schema("pq"), True), | ||
(multi_vector_schema("bq"), True), | ||
], | ||
) | ||
def test_enabling_sq_multi_vector(schema: dict, should_error: bool) -> None: | ||
update = _CollectionConfigUpdate( | ||
vectorizer_config=[ | ||
Reconfigure.NamedVectors.update( | ||
name="boi", | ||
vector_index_config=Reconfigure.VectorIndex.hnsw( | ||
quantizer=Reconfigure.VectorIndex.Quantizer.sq() | ||
), | ||
) | ||
] | ||
) | ||
if should_error: | ||
with pytest.raises(WeaviateInvalidInputError): | ||
update.merge_with_existing(schema) | ||
return | ||
|
||
new_schema = update.merge_with_existing(schema) | ||
|
||
assert not new_schema["vectorConfig"]["boi"]["vectorIndexConfig"]["pq"]["enabled"] | ||
assert not new_schema["vectorConfig"]["boi"]["vectorIndexConfig"]["bq"]["enabled"] | ||
assert new_schema["vectorConfig"]["boi"]["vectorIndexConfig"]["sq"]["enabled"] | ||
|
||
assert new_schema["vectorConfig"]["yeh"] == schema["vectorConfig"]["yeh"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters