Skip to content

Commit

Permalink
enhance: autoindex for multi data type
Browse files Browse the repository at this point in the history
Signed-off-by: chasingegg <chao.gao@zilliz.com>
  • Loading branch information
chasingegg committed Jun 14, 2024
1 parent 62bd51e commit 049bbd2
Show file tree
Hide file tree
Showing 13 changed files with 140 additions and 31 deletions.
19 changes: 16 additions & 3 deletions internal/proxy/task_index.go
Original file line number Diff line number Diff line change
Expand Up @@ -263,10 +263,23 @@ func (cit *createIndexTask) parseIndexParams() error {
return err
}
}
if indexType == indexparamcheck.IndexSparseInverted || indexType == indexparamcheck.IndexSparseWand {
if isVecIndex {
metricType, metricTypeExist := indexParamsMap[common.MetricTypeKey]
if !metricTypeExist || metricType != metric.IP {
return fmt.Errorf("only IP is the supported metric type for sparse index")
if !metricTypeExist {
return fmt.Errorf("metric type not set for vector index")
}
if typeutil.IsDenseFloatVectorType(cit.fieldSchema.DataType) {
if !funcutil.SliceContain(indexparamcheck.FloatVectorMetrics, metricType) {
return fmt.Errorf("float vector index does not support metric type: " + metricType)
}
} else if typeutil.IsSparseFloatVectorType(cit.fieldSchema.DataType) {
if metricType != metric.IP {
return fmt.Errorf("only IP is the supported metric type for sparse index")
}
} else if typeutil.IsBinaryVectorType(cit.fieldSchema.DataType) {
if !funcutil.SliceContain(indexparamcheck.BinaryVectorMetrics, metricType) {
return fmt.Errorf("binary vector index does not support metric type: " + metricType)
}
}
}
}
Expand Down
1 change: 1 addition & 0 deletions internal/querynodev2/optimizers/query_hook.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ func OptimizeSearchParams(ctx context.Context, req *querypb.SearchRequest, query
common.SearchParamKey: queryInfo.GetSearchParams(),
common.SegmentNumKey: estSegmentNum,
common.WithFilterKey: withFilter,
common.DataTypeKey: plan.GetVectorAnns().GetVectorType(),
common.WithOptimizeKey: paramtable.Get().AutoIndexConfig.EnableOptimize.GetAsBool(),
common.CollectionKey: req.GetReq().GetCollectionID(),
}
Expand Down
1 change: 1 addition & 0 deletions pkg/common/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ const (
SearchParamKey = "search_param"
SegmentNumKey = "segment_num"
WithFilterKey = "with_filter"
DataTypeKey = "data_type"
WithOptimizeKey = "with_optimize"
CollectionKey = "collection"

Expand Down
2 changes: 1 addition & 1 deletion pkg/util/indexparamcheck/base_checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ func (c baseChecker) CheckValidDataType(dType schemapb.DataType) error {
return nil
}

func (c baseChecker) SetDefaultMetricTypeIfNotExist(m map[string]string) {}
func (c baseChecker) SetDefaultMetricTypeIfNotExist(m map[string]string, dType schemapb.DataType) {}

func (c baseChecker) StaticCheck(params map[string]string) error {
return errors.New("unsupported index type")
Expand Down
2 changes: 1 addition & 1 deletion pkg/util/indexparamcheck/binary_vector_base_checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ func (c binaryVectorBaseChecker) CheckValidDataType(dType schemapb.DataType) err
return nil
}

func (c binaryVectorBaseChecker) SetDefaultMetricTypeIfNotExist(params map[string]string) {
func (c binaryVectorBaseChecker) SetDefaultMetricTypeIfNotExist(params map[string]string, dType schemapb.DataType) {
setDefaultIfNotExist(params, common.MetricTypeKey, BinaryVectorDefaultMetricType)
}

Expand Down
6 changes: 4 additions & 2 deletions pkg/util/indexparamcheck/constraints.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,10 @@ const (
SparseDropRatioBuild = "drop_ratio_build"
)

// METRICS is a set of all metrics types supported for float vector.
var METRICS = []string{metric.L2, metric.IP, metric.COSINE} // const
var (
FloatVectorMetrics = []string{metric.L2, metric.IP, metric.COSINE} // const
BinaryVectorMetrics = []string{metric.HAMMING, metric.JACCARD, metric.SUBSTRUCTURE, metric.SUPERSTRUCTURE} // const
)

// BinIDMapMetrics is a set of all metric types supported for binary vector.
var (
Expand Down
6 changes: 3 additions & 3 deletions pkg/util/indexparamcheck/float_vector_base_checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ type floatVectorBaseChecker struct {
}

func (c floatVectorBaseChecker) staticCheck(params map[string]string) error {
if !CheckStrByValues(params, Metric, METRICS) {
return fmt.Errorf("metric type %s not found or not supported, supported: %v", params[Metric], METRICS)
if !CheckStrByValues(params, Metric, FloatVectorMetrics) {
return fmt.Errorf("metric type %s not found or not supported, supported: %v", params[Metric], FloatVectorMetrics)
}

return nil
Expand All @@ -35,7 +35,7 @@ func (c floatVectorBaseChecker) CheckValidDataType(dType schemapb.DataType) erro
return nil
}

func (c floatVectorBaseChecker) SetDefaultMetricTypeIfNotExist(params map[string]string) {
func (c floatVectorBaseChecker) SetDefaultMetricTypeIfNotExist(params map[string]string, dType schemapb.DataType) {
setDefaultIfNotExist(params, common.MetricTypeKey, FloatVectorDefaultMetricType)
}

Expand Down
13 changes: 12 additions & 1 deletion pkg/util/indexparamcheck/hnsw_checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@ import (
"fmt"

"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/util/typeutil"
)

type hnswChecker struct {
floatVectorBaseChecker
baseChecker
}

func (c hnswChecker) StaticCheck(params map[string]string) error {
Expand Down Expand Up @@ -38,6 +39,16 @@ func (c hnswChecker) CheckValidDataType(dType schemapb.DataType) error {
return nil
}

func (c hnswChecker) SetDefaultMetricTypeIfNotExist(params map[string]string, dType schemapb.DataType) {
if typeutil.IsDenseFloatVectorType(dType) {
setDefaultIfNotExist(params, common.MetricTypeKey, FloatVectorDefaultMetricType)
} else if typeutil.IsSparseFloatVectorType(dType) {
setDefaultIfNotExist(params, common.MetricTypeKey, SparseFloatVectorDefaultMetricType)
} else if typeutil.IsBinaryVectorType(dType) {
setDefaultIfNotExist(params, common.MetricTypeKey, BinaryVectorDefaultMetricType)
}
}

func newHnswChecker() IndexChecker {
return &hnswChecker{}
}
39 changes: 39 additions & 0 deletions pkg/util/indexparamcheck/hnsw_checker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,3 +172,42 @@ func Test_hnswChecker_CheckValidDataType(t *testing.T) {
}
}
}

func Test_hnswChecker_SetDefaultMetricType(t *testing.T) {
cases := []struct {
dType schemapb.DataType
metricType string
}{
{
dType: schemapb.DataType_FloatVector,
metricType: metric.IP,
},
{
dType: schemapb.DataType_Float16Vector,
metricType: metric.IP,
},
{
dType: schemapb.DataType_BFloat16Vector,
metricType: metric.IP,
},
{
dType: schemapb.DataType_SparseFloatVector,
metricType: metric.IP,
},
{
dType: schemapb.DataType_BinaryVector,
metricType: metric.JACCARD,
},
}

c := newHnswChecker()
for _, test := range cases {
p := map[string]string{
DIM: strconv.Itoa(128),
HNSWM: strconv.Itoa(16),
EFConstruction: strconv.Itoa(200),
}
c.SetDefaultMetricTypeIfNotExist(p, test.dType)
assert.Equal(t, p[Metric], test.metricType)
}
}
2 changes: 1 addition & 1 deletion pkg/util/indexparamcheck/index_checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,6 @@ import (
type IndexChecker interface {
CheckTrain(map[string]string) error
CheckValidDataType(dType schemapb.DataType) error
SetDefaultMetricTypeIfNotExist(map[string]string)
SetDefaultMetricTypeIfNotExist(map[string]string, schemapb.DataType)
StaticCheck(map[string]string) error
}
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ func (c sparseFloatVectorBaseChecker) CheckValidDataType(dType schemapb.DataType
return nil
}

func (c sparseFloatVectorBaseChecker) SetDefaultMetricTypeIfNotExist(params map[string]string) {
func (c sparseFloatVectorBaseChecker) SetDefaultMetricTypeIfNotExist(params map[string]string, dType schemapb.DataType) {
setDefaultIfNotExist(params, common.MetricTypeKey, SparseFloatVectorDefaultMetricType)
}

Expand Down
25 changes: 16 additions & 9 deletions pkg/util/paramtable/autoindex_param.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package paramtable
import (
"fmt"

"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/config"
"github.com/milvus-io/milvus/pkg/util/funcutil"
Expand Down Expand Up @@ -193,31 +194,37 @@ func (p *autoIndexConfig) init(base *BaseTable) {
}

func (p *autoIndexConfig) panicIfNotValidAndSetDefaultMetricType(mgr *config.Manager) {
m := p.IndexParams.GetAsJSONMap()
p.panicIfNotValidAndSetDefaultMetricTypeHelper(p.IndexParams.Key, p.IndexParams.GetAsJSONMap(), schemapb.DataType_FloatVector, mgr)
p.panicIfNotValidAndSetDefaultMetricTypeHelper(p.BinaryIndexParams.Key, p.BinaryIndexParams.GetAsJSONMap(), schemapb.DataType_BinaryVector, mgr)
p.panicIfNotValidAndSetDefaultMetricTypeHelper(p.SparseIndexParams.Key, p.SparseIndexParams.GetAsJSONMap(), schemapb.DataType_SparseFloatVector, mgr)
}

func (p *autoIndexConfig) panicIfNotValidAndSetDefaultMetricTypeHelper(key string, m map[string]string, dtype schemapb.DataType, mgr *config.Manager) {
// handle := func(key string, m map[string]string, mgr *config.Manager, dtype schemapb.DataType) {
if m == nil {
panic("autoIndex.build not invalid, should be json format")
panic(fmt.Sprintf("%s invalid, should be json format", key))
}

indexType, ok := m[common.IndexTypeKey]
if !ok {
panic("autoIndex.build not invalid, index type not found")
panic(fmt.Sprintf("%s invalid, index type not found", key))
}

checker, err := indexparamcheck.GetIndexCheckerMgrInstance().GetChecker(indexType)
if err != nil {
panic(fmt.Sprintf("autoIndex.build not invalid, unsupported index type: %s", indexType))
panic(fmt.Sprintf("%s invalid, unsupported index type: %s", key, indexType))
}

checker.SetDefaultMetricTypeIfNotExist(m)
checker.SetDefaultMetricTypeIfNotExist(m, dtype)

if err := checker.StaticCheck(m); err != nil {
panic(fmt.Sprintf("autoIndex.build not invalid, parameters not invalid, error: %s", err.Error()))
panic(fmt.Sprintf("%s invalid, parameters invalid, error: %s", key, err.Error()))
}

p.reset(m, mgr)
p.reset(key, m, mgr)
}

func (p *autoIndexConfig) reset(m map[string]string, mgr *config.Manager) {
func (p *autoIndexConfig) reset(key string, m map[string]string, mgr *config.Manager) {
j := funcutil.MapToJSON(m)
mgr.SetConfig("autoIndex.params.build", string(j))
mgr.SetConfig(key, string(j))
}
53 changes: 44 additions & 9 deletions pkg/util/paramtable/autoindex_param_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (

"github.com/stretchr/testify/assert"

"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/config"
"github.com/milvus-io/milvus/pkg/util/indexparamcheck"
Expand Down Expand Up @@ -140,7 +141,7 @@ func Test_autoIndexConfig_panicIfNotValid(t *testing.T) {
}
p.IndexParams.Init(mgr)
assert.Panics(t, func() {
p.panicIfNotValidAndSetDefaultMetricType(mgr)
p.panicIfNotValidAndSetDefaultMetricTypeHelper(p.IndexParams.Key, p.IndexParams.GetAsJSONMap(), schemapb.DataType_FloatVector, mgr)
})
})

Expand All @@ -154,7 +155,7 @@ func Test_autoIndexConfig_panicIfNotValid(t *testing.T) {
}
p.IndexParams.Init(mgr)
assert.Panics(t, func() {
p.panicIfNotValidAndSetDefaultMetricType(mgr)
p.panicIfNotValidAndSetDefaultMetricTypeHelper(p.IndexParams.Key, p.IndexParams.GetAsJSONMap(), schemapb.DataType_FloatVector, mgr)
})
})

Expand All @@ -168,7 +169,7 @@ func Test_autoIndexConfig_panicIfNotValid(t *testing.T) {
}
p.IndexParams.Init(mgr)
assert.Panics(t, func() {
p.panicIfNotValidAndSetDefaultMetricType(mgr)
p.panicIfNotValidAndSetDefaultMetricTypeHelper(p.IndexParams.Key, p.IndexParams.GetAsJSONMap(), schemapb.DataType_FloatVector, mgr)
})
})

Expand All @@ -182,13 +183,47 @@ func Test_autoIndexConfig_panicIfNotValid(t *testing.T) {
}
p.IndexParams.Init(mgr)
assert.NotPanics(t, func() {
p.panicIfNotValidAndSetDefaultMetricType(mgr)
p.panicIfNotValidAndSetDefaultMetricTypeHelper(p.IndexParams.Key, p.IndexParams.GetAsJSONMap(), schemapb.DataType_FloatVector, mgr)
})
metricType, exist := p.IndexParams.GetAsJSONMap()[common.MetricTypeKey]
assert.True(t, exist)
assert.Equal(t, indexparamcheck.FloatVectorDefaultMetricType, metricType)
})

t.Run("normal case, binary vector", func(t *testing.T) {
mgr := config.NewManager()
mgr.SetConfig("autoIndex.params.binary.build", `{"nlist": 1024, "index_type": "BIN_IVF_FLAT"}`)
p := &autoIndexConfig{
BinaryIndexParams: ParamItem{
Key: "autoIndex.params.binary.build",
},
}
p.BinaryIndexParams.Init(mgr)
assert.NotPanics(t, func() {
p.panicIfNotValidAndSetDefaultMetricTypeHelper(p.BinaryIndexParams.Key, p.BinaryIndexParams.GetAsJSONMap(), schemapb.DataType_BinaryVector, mgr)
})
metricType, exist := p.BinaryIndexParams.GetAsJSONMap()[common.MetricTypeKey]
assert.True(t, exist)
assert.Equal(t, indexparamcheck.BinaryVectorDefaultMetricType, metricType)
})

t.Run("normal case, sparse vector", func(t *testing.T) {
mgr := config.NewManager()
mgr.SetConfig("autoIndex.params.sparse.build", `{"index_type": "SPARSE_INVERTED_INDEX", "metric_type": "IP"}`)
p := &autoIndexConfig{
SparseIndexParams: ParamItem{
Key: "autoIndex.params.sparse.build",
},
}
p.SparseIndexParams.Init(mgr)
assert.NotPanics(t, func() {
p.panicIfNotValidAndSetDefaultMetricTypeHelper(p.SparseIndexParams.Key, p.SparseIndexParams.GetAsJSONMap(), schemapb.DataType_SparseFloatVector, mgr)
})
metricType, exist := p.SparseIndexParams.GetAsJSONMap()[common.MetricTypeKey]
assert.True(t, exist)
assert.Equal(t, indexparamcheck.SparseFloatVectorDefaultMetricType, metricType)
})

t.Run("normal case, ivf flat", func(t *testing.T) {
mgr := config.NewManager()
mgr.SetConfig("autoIndex.params.build", `{"nlist": 30, "index_type": "IVF_FLAT"}`)
Expand All @@ -199,7 +234,7 @@ func Test_autoIndexConfig_panicIfNotValid(t *testing.T) {
}
p.IndexParams.Init(mgr)
assert.NotPanics(t, func() {
p.panicIfNotValidAndSetDefaultMetricType(mgr)
p.panicIfNotValidAndSetDefaultMetricTypeHelper(p.IndexParams.Key, p.IndexParams.GetAsJSONMap(), schemapb.DataType_FloatVector, mgr)
})
metricType, exist := p.IndexParams.GetAsJSONMap()[common.MetricTypeKey]
assert.True(t, exist)
Expand All @@ -216,7 +251,7 @@ func Test_autoIndexConfig_panicIfNotValid(t *testing.T) {
}
p.IndexParams.Init(mgr)
assert.NotPanics(t, func() {
p.panicIfNotValidAndSetDefaultMetricType(mgr)
p.panicIfNotValidAndSetDefaultMetricTypeHelper(p.IndexParams.Key, p.IndexParams.GetAsJSONMap(), schemapb.DataType_FloatVector, mgr)
})
metricType, exist := p.IndexParams.GetAsJSONMap()[common.MetricTypeKey]
assert.True(t, exist)
Expand All @@ -233,7 +268,7 @@ func Test_autoIndexConfig_panicIfNotValid(t *testing.T) {
}
p.IndexParams.Init(mgr)
assert.NotPanics(t, func() {
p.panicIfNotValidAndSetDefaultMetricType(mgr)
p.panicIfNotValidAndSetDefaultMetricTypeHelper(p.IndexParams.Key, p.IndexParams.GetAsJSONMap(), schemapb.DataType_FloatVector, mgr)
})
metricType, exist := p.IndexParams.GetAsJSONMap()[common.MetricTypeKey]
assert.True(t, exist)
Expand All @@ -250,7 +285,7 @@ func Test_autoIndexConfig_panicIfNotValid(t *testing.T) {
}
p.IndexParams.Init(mgr)
assert.NotPanics(t, func() {
p.panicIfNotValidAndSetDefaultMetricType(mgr)
p.panicIfNotValidAndSetDefaultMetricTypeHelper(p.IndexParams.Key, p.IndexParams.GetAsJSONMap(), schemapb.DataType_FloatVector, mgr)
})
metricType, exist := p.IndexParams.GetAsJSONMap()[common.MetricTypeKey]
assert.True(t, exist)
Expand All @@ -267,7 +302,7 @@ func Test_autoIndexConfig_panicIfNotValid(t *testing.T) {
}
p.IndexParams.Init(mgr)
assert.NotPanics(t, func() {
p.panicIfNotValidAndSetDefaultMetricType(mgr)
p.panicIfNotValidAndSetDefaultMetricTypeHelper(p.IndexParams.Key, p.IndexParams.GetAsJSONMap(), schemapb.DataType_FloatVector, mgr)
})
metricType, exist := p.IndexParams.GetAsJSONMap()[common.MetricTypeKey]
assert.True(t, exist)
Expand Down

0 comments on commit 049bbd2

Please sign in to comment.