From 5c4279aaf873f91277bb747e476c2a17e9b4a1df Mon Sep 17 00:00:00 2001 From: usamoi Date: Sun, 17 Mar 2024 14:53:16 +0800 Subject: [PATCH 1/6] fix: set amoptionalkey to false Signed-off-by: usamoi --- src/index/am.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/index/am.rs b/src/index/am.rs index c56cf96bf..a178f6737 100644 --- a/src/index/am.rs +++ b/src/index/am.rs @@ -52,7 +52,7 @@ const AM_HANDLER: pgrx::pg_sys::IndexAmRoutine = { am_routine.amcanbackward = false; am_routine.amcanunique = false; am_routine.amcanmulticol = false; - am_routine.amoptionalkey = true; + am_routine.amoptionalkey = false; am_routine.amsearcharray = false; am_routine.amsearchnulls = false; am_routine.amstorage = false; From 8b535d1473bd3801e3743b04f0565dde1c9d59cf Mon Sep 17 00:00:00 2001 From: usamoi Date: Sun, 17 Mar 2024 14:53:41 +0800 Subject: [PATCH 2/6] fix: set returned value of aminsert to false Signed-off-by: usamoi --- src/index/am.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/index/am.rs b/src/index/am.rs index a178f6737..32c02508b 100644 --- a/src/index/am.rs +++ b/src/index/am.rs @@ -210,7 +210,7 @@ pub unsafe extern "C" fn aminsert( if let Some(v) = vector { am_update::update_insert(id, v, *heap_tid); } - true + false } #[pgrx::pg_guard] From f653fe5379b4cdae19baf1a1b2118515309d57d2 Mon Sep 17 00:00:00 2001 From: usamoi Date: Sun, 17 Mar 2024 15:01:35 +0800 Subject: [PATCH 3/6] fix: forbid scanning with a non-MVCC-compliant snapshot Signed-off-by: usamoi --- src/index/am_scan.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/index/am_scan.rs b/src/index/am_scan.rs index 4f7b3662a..cf762b03a 100644 --- a/src/index/am_scan.rs +++ b/src/index/am_scan.rs @@ -64,6 +64,14 @@ pub unsafe fn next_scan(scan: pgrx::pg_sys::IndexScanDesc) -> bool { let scanner = &mut *((*scan).opaque as *mut Scanner); if let Scanner::Initial { vector } = scanner { if let Some(vector) = vector.as_ref() { + // https://www.postgresql.org/docs/current/index-locking.html + // If heap entries referenced physical pointers are deleted before + // they are consumed by PostgreSQL, PostgreSQL will received wrong + // physical pointers: no rows or irreverent rows are referenced. + if (*(*scan).xs_snapshot).snapshot_type != pgrx::pg_sys::SnapshotType_SNAPSHOT_MVCC { + pgrx::error!("scanning with a non-MVCC-compliant snapshot is not supported"); + } + let oid = (*(*scan).indexRelation).rd_id; let id = get_handle(oid); From a0e229cb11aed183204f5c5a177791060b2fe41c Mon Sep 17 00:00:00 2001 From: usamoi Date: Sun, 17 Mar 2024 19:15:23 +0800 Subject: [PATCH 4/6] fix: pg_vector_index_stat Signed-off-by: usamoi --- src/sql/finalize.sql | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/src/sql/finalize.sql b/src/sql/finalize.sql index 5d35f7bad..84da3b931 100644 --- a/src/sql/finalize.sql +++ b/src/sql/finalize.sql @@ -664,67 +664,67 @@ CREATE OPERATOR FAMILY veci8_cos_ops USING vectors; -- List of operator classes CREATE OPERATOR CLASS vector_l2_ops - FOR TYPE vector USING vectors AS + FOR TYPE vector USING vectors FAMILY vector_l2_ops AS OPERATOR 1 <-> (vector, vector) FOR ORDER BY float_ops; CREATE OPERATOR CLASS vector_dot_ops - FOR TYPE vector USING vectors AS + FOR TYPE vector USING vectors FAMILY vector_dot_ops AS OPERATOR 1 <#> (vector, vector) FOR ORDER BY float_ops; CREATE OPERATOR CLASS vector_cos_ops - FOR TYPE vector USING vectors AS + FOR TYPE vector USING vectors FAMILY vector_cos_ops AS OPERATOR 1 <=> (vector, vector) FOR ORDER BY float_ops; CREATE OPERATOR CLASS vecf16_l2_ops - FOR TYPE vecf16 USING vectors AS + FOR TYPE vecf16 USING vectors FAMILY vecf16_l2_ops AS OPERATOR 1 <-> (vecf16, vecf16) FOR ORDER BY float_ops; CREATE OPERATOR CLASS vecf16_dot_ops - FOR TYPE vecf16 USING vectors AS + FOR TYPE vecf16 USING vectors FAMILY vecf16_dot_ops AS OPERATOR 1 <#> (vecf16, vecf16) FOR ORDER BY float_ops; CREATE OPERATOR CLASS vecf16_cos_ops - FOR TYPE vecf16 USING vectors AS + FOR TYPE vecf16 USING vectors FAMILY vecf16_cos_ops AS OPERATOR 1 <=> (vecf16, vecf16) FOR ORDER BY float_ops; CREATE OPERATOR CLASS svector_l2_ops - FOR TYPE svector USING vectors AS + FOR TYPE svector USING vectors FAMILY svector_l2_ops AS OPERATOR 1 <-> (svector, svector) FOR ORDER BY float_ops; CREATE OPERATOR CLASS svector_dot_ops - FOR TYPE svector USING vectors AS + FOR TYPE svector USING vectors FAMILY svector_dot_ops AS OPERATOR 1 <#> (svector, svector) FOR ORDER BY float_ops; CREATE OPERATOR CLASS svector_cos_ops - FOR TYPE svector USING vectors AS + FOR TYPE svector USING vectors FAMILY svector_cos_ops AS OPERATOR 1 <=> (svector, svector) FOR ORDER BY float_ops; CREATE OPERATOR CLASS bvector_l2_ops - FOR TYPE bvector USING vectors AS + FOR TYPE bvector USING vectors FAMILY bvector_l2_ops AS OPERATOR 1 <-> (bvector, bvector) FOR ORDER BY float_ops; CREATE OPERATOR CLASS bvector_dot_ops - FOR TYPE bvector USING vectors AS + FOR TYPE bvector USING vectors FAMILY bvector_dot_ops AS OPERATOR 1 <#> (bvector, bvector) FOR ORDER BY float_ops; CREATE OPERATOR CLASS bvector_cos_ops - FOR TYPE bvector USING vectors AS + FOR TYPE bvector USING vectors FAMILY bvector_cos_ops AS OPERATOR 1 <=> (bvector, bvector) FOR ORDER BY float_ops; CREATE OPERATOR CLASS bvector_jaccard_ops - FOR TYPE bvector USING vectors AS + FOR TYPE bvector USING vectors FAMILY bvector_jaccard_ops AS OPERATOR 1 <~> (bvector, bvector) FOR ORDER BY float_ops; CREATE OPERATOR CLASS veci8_l2_ops - FOR TYPE veci8 USING vectors AS + FOR TYPE veci8 USING vectors FAMILY veci8_l2_ops AS OPERATOR 1 <-> (veci8, veci8) FOR ORDER BY float_ops; CREATE OPERATOR CLASS veci8_dot_ops - FOR TYPE veci8 USING vectors AS + FOR TYPE veci8 USING vectors FAMILY veci8_dot_ops AS OPERATOR 1 <#> (veci8, veci8) FOR ORDER BY float_ops; CREATE OPERATOR CLASS veci8_cos_ops - FOR TYPE veci8 USING vectors AS + FOR TYPE veci8 USING vectors FAMILY veci8_cos_ops AS OPERATOR 1 <=> (veci8, veci8) FOR ORDER BY float_ops; -- List of views @@ -735,7 +735,7 @@ CREATE VIEW pg_vector_index_stat AS I.oid AS indexrelid, C.relname AS tablename, I.relname AS indexname, - (_vectors_index_stat(I.relfilenode)).* + (_vectors_index_stat(I.oid)).* FROM pg_class C JOIN pg_index X ON C.oid = X.indrelid JOIN pg_class I ON I.oid = X.indexrelid JOIN From b40a93b3c5239c8812d230f0cf3c6b5be0dd890c Mon Sep 17 00:00:00 2001 From: usamoi Date: Sun, 17 Mar 2024 19:22:03 +0800 Subject: [PATCH 5/6] fix: printing info instead of throwing errors in amvalidate Signed-off-by: usamoi --- crates/base/src/index.rs | 4 +- src/datatype/memory_bvecf32.rs | 6 +- src/datatype/memory_svecf32.rs | 6 +- src/datatype/memory_vecf16.rs | 6 +- src/datatype/memory_vecf32.rs | 6 +- src/datatype/memory_veci8.rs | 6 +- src/error.rs | 9 -- src/index/am.rs | 23 +--- src/index/am_setup.rs | 228 ++++++++++++++------------------- 9 files changed, 131 insertions(+), 163 deletions(-) diff --git a/crates/base/src/index.rs b/crates/base/src/index.rs index b6179a468..6630f5bee 100644 --- a/crates/base/src/index.rs +++ b/crates/base/src/index.rs @@ -119,10 +119,10 @@ pub struct VectorOptions { #[validate(range(min = 1, max = 1_048_575))] #[serde(rename = "dimensions")] pub dims: u32, - #[serde(rename = "distance")] - pub d: DistanceKind, #[serde(rename = "vector")] pub v: VectorKind, + #[serde(rename = "distance")] + pub d: DistanceKind, } impl VectorOptions { diff --git a/src/datatype/memory_bvecf32.rs b/src/datatype/memory_bvecf32.rs index 3f15060ec..fad19ac9f 100644 --- a/src/datatype/memory_bvecf32.rs +++ b/src/datatype/memory_bvecf32.rs @@ -143,7 +143,11 @@ impl IntoDatum for BVecf32Output { } fn type_oid() -> Oid { - pgrx::wrappers::regtypein("vectors.bvector") + let namespace = pgrx::pg_catalog::PgNamespace::search_namespacename(c"vectors").unwrap(); + let namespace = namespace.get().expect("pgvecto.rs is not installed."); + let t = pgrx::pg_catalog::PgType::search_typenamensp(c"bvector", namespace.oid()).unwrap(); + let t = t.get().expect("pg_catalog is broken."); + t.oid() } } diff --git a/src/datatype/memory_svecf32.rs b/src/datatype/memory_svecf32.rs index 80eadb004..b8c2968e6 100644 --- a/src/datatype/memory_svecf32.rs +++ b/src/datatype/memory_svecf32.rs @@ -159,7 +159,11 @@ impl IntoDatum for SVecf32Output { } fn type_oid() -> Oid { - pgrx::wrappers::regtypein("vectors.svector") + let namespace = pgrx::pg_catalog::PgNamespace::search_namespacename(c"vectors").unwrap(); + let namespace = namespace.get().expect("pgvecto.rs is not installed."); + let t = pgrx::pg_catalog::PgType::search_typenamensp(c"svector", namespace.oid()).unwrap(); + let t = t.get().expect("pg_catalog is broken."); + t.oid() } } diff --git a/src/datatype/memory_vecf16.rs b/src/datatype/memory_vecf16.rs index 552090002..9f9497168 100644 --- a/src/datatype/memory_vecf16.rs +++ b/src/datatype/memory_vecf16.rs @@ -140,7 +140,11 @@ impl IntoDatum for Vecf16Output { } fn type_oid() -> Oid { - pgrx::wrappers::regtypein("vectors.vecf16") + let namespace = pgrx::pg_catalog::PgNamespace::search_namespacename(c"vectors").unwrap(); + let namespace = namespace.get().expect("pgvecto.rs is not installed."); + let t = pgrx::pg_catalog::PgType::search_typenamensp(c"vecf16", namespace.oid()).unwrap(); + let t = t.get().expect("pg_catalog is broken."); + t.oid() } } diff --git a/src/datatype/memory_vecf32.rs b/src/datatype/memory_vecf32.rs index b1469c719..5ce411e6c 100644 --- a/src/datatype/memory_vecf32.rs +++ b/src/datatype/memory_vecf32.rs @@ -136,7 +136,11 @@ impl IntoDatum for Vecf32Output { } fn type_oid() -> Oid { - pgrx::wrappers::regtypein("vectors.vector") + let namespace = pgrx::pg_catalog::PgNamespace::search_namespacename(c"vectors").unwrap(); + let namespace = namespace.get().expect("pgvecto.rs is not installed."); + let t = pgrx::pg_catalog::PgType::search_typenamensp(c"vector", namespace.oid()).unwrap(); + let t = t.get().expect("pg_catalog is broken."); + t.oid() } } diff --git a/src/datatype/memory_veci8.rs b/src/datatype/memory_veci8.rs index 9b54b2a63..386ba3088 100644 --- a/src/datatype/memory_veci8.rs +++ b/src/datatype/memory_veci8.rs @@ -200,7 +200,11 @@ impl IntoDatum for Veci8Output { } fn type_oid() -> Oid { - pgrx::wrappers::regtypein("vectors.veci8") + let namespace = pgrx::pg_catalog::PgNamespace::search_namespacename(c"vectors").unwrap(); + let namespace = namespace.get().expect("pgvecto.rs is not installed."); + let t = pgrx::pg_catalog::PgType::search_typenamensp(c"veci8", namespace.oid()).unwrap(); + let t = t.get().expect("pg_catalog is broken."); + t.oid() } } diff --git a/src/error.rs b/src/error.rs index 86e556dcb..fd368de4c 100644 --- a/src/error.rs +++ b/src/error.rs @@ -100,15 +100,6 @@ pgvecto.rs: Dimensions type modifier of a vector column is needed for building t } } -pub fn bad_opclass() -> ! { - error!( - "\ -pgvecto.rs: Indexes can only be built on built-in distance functions. -ADVICE: If you want pgvecto.rs to support more distance functions, \ -visit `https://github.com/tensorchord/pgvecto.rs/issues` and contribute your ideas." - ); -} - pub fn bad_service_not_exist() -> ! { error!( "\ diff --git a/src/index/am.rs b/src/index/am.rs index 32c02508b..13c519218 100644 --- a/src/index/am.rs +++ b/src/index/am.rs @@ -44,22 +44,7 @@ const AM_HANDLER: pgrx::pg_sys::IndexAmRoutine = { am_routine.type_ = pgrx::pg_sys::NodeTag::T_IndexAmRoutine; - am_routine.amstrategies = 1; - am_routine.amsupport = 0; - - am_routine.amcanorder = false; am_routine.amcanorderbyop = true; - am_routine.amcanbackward = false; - am_routine.amcanunique = false; - am_routine.amcanmulticol = false; - am_routine.amoptionalkey = false; - am_routine.amsearcharray = false; - am_routine.amsearchnulls = false; - am_routine.amstorage = false; - am_routine.amclusterable = false; - am_routine.ampredlocks = false; - am_routine.amcaninclude = false; - am_routine.amkeytype = pgrx::pg_sys::InvalidOid; am_routine.amvalidate = Some(amvalidate); am_routine.amoptions = Some(amoptions); @@ -82,8 +67,12 @@ const AM_HANDLER: pgrx::pg_sys::IndexAmRoutine = { #[pgrx::pg_guard] pub unsafe extern "C" fn amvalidate(opclass_oid: pgrx::pg_sys::Oid) -> bool { - am_setup::convert_opclass_to_distance(opclass_oid); - true + if am_setup::convert_opclass_to_vd(opclass_oid).is_some() { + pgrx::info!("Vector indexes can only be built on built-in operator classes."); + true + } else { + false + } } #[pgrx::pg_guard] diff --git a/src/index/am_setup.rs b/src/index/am_setup.rs index ba70652e3..3e91bb25e 100644 --- a/src/index/am_setup.rs +++ b/src/index/am_setup.rs @@ -1,5 +1,3 @@ -#![allow(unsafe_op_in_unsafe_fn)] - use crate::datatype::typmod::Typmod; use crate::error::*; use base::distance::*; @@ -8,6 +6,13 @@ use base::vector::*; use serde::Deserialize; use std::ffi::CStr; +#[derive(Copy, Clone, Debug, Default)] +#[repr(C)] +pub struct Helper { + pub vl_len_: i32, + pub offset: i32, +} + pub fn helper_offset() -> usize { std::mem::offset_of!(Helper, offset) } @@ -16,144 +21,107 @@ pub fn helper_size() -> usize { std::mem::size_of::() } -pub unsafe fn convert_opclass_to_distance( - opclass: pgrx::pg_sys::Oid, -) -> (DistanceKind, VectorKind) { - let opclass_cache_id = pgrx::pg_sys::SysCacheIdentifier_CLAOID as _; - let tuple = pgrx::pg_sys::SearchSysCache1(opclass_cache_id, opclass.into()); - assert!( - !tuple.is_null(), - "cache lookup failed for operator class {opclass:?}" - ); - let classform = pgrx::pg_sys::GETSTRUCT(tuple).cast::(); - let opfamily = (*classform).opcfamily; - let result = convert_opfamily_to_distance(opfamily); - pgrx::pg_sys::ReleaseSysCache(tuple); - result -} - -pub unsafe fn convert_opfamily_to_distance( - opfamily: pgrx::pg_sys::Oid, -) -> (DistanceKind, VectorKind) { - let opfamily_cache_id = pgrx::pg_sys::SysCacheIdentifier_OPFAMILYOID as _; - let opstrategy_cache_id = pgrx::pg_sys::SysCacheIdentifier_AMOPSTRATEGY as _; - let tuple = pgrx::pg_sys::SearchSysCache1(opfamily_cache_id, opfamily.into()); - assert!( - !tuple.is_null(), - "cache lookup failed for operator family {opfamily:?}" - ); - let list = pgrx::pg_sys::SearchSysCacheList( - opstrategy_cache_id, - 1, - opfamily.into(), - 0.into(), - 0.into(), - ); - assert!((*list).n_members == 1); - let member = (*list).members.as_slice(1)[0]; - let member_tuple = &mut (*member).tuple; - let amop = pgrx::pg_sys::GETSTRUCT(member_tuple).cast::(); - assert!((*amop).amopstrategy == 1); - assert!((*amop).amoppurpose == pgrx::pg_sys::AMOP_ORDER as libc::c_char); - let operator = (*amop).amopopr; - let result; - if operator == regoperatorin("vectors.<->(vectors.vector,vectors.vector)") { - result = (DistanceKind::L2, VectorKind::Vecf32); - } else if operator == regoperatorin("vectors.<#>(vectors.vector,vectors.vector)") { - result = (DistanceKind::Dot, VectorKind::Vecf32); - } else if operator == regoperatorin("vectors.<=>(vectors.vector,vectors.vector)") { - result = (DistanceKind::Cos, VectorKind::Vecf32); - } else if operator == regoperatorin("vectors.<->(vectors.vecf16,vectors.vecf16)") { - result = (DistanceKind::L2, VectorKind::Vecf16); - } else if operator == regoperatorin("vectors.<#>(vectors.vecf16,vectors.vecf16)") { - result = (DistanceKind::Dot, VectorKind::Vecf16); - } else if operator == regoperatorin("vectors.<=>(vectors.vecf16,vectors.vecf16)") { - result = (DistanceKind::Cos, VectorKind::Vecf16); - } else if operator == regoperatorin("vectors.<->(vectors.svector,vectors.svector)") { - result = (DistanceKind::L2, VectorKind::SVecf32); - } else if operator == regoperatorin("vectors.<#>(vectors.svector,vectors.svector)") { - result = (DistanceKind::Dot, VectorKind::SVecf32); - } else if operator == regoperatorin("vectors.<=>(vectors.svector,vectors.svector)") { - result = (DistanceKind::Cos, VectorKind::SVecf32); - } else if operator == regoperatorin("vectors.<->(vectors.bvector,vectors.bvector)") { - result = (DistanceKind::L2, VectorKind::BVecf32); - } else if operator == regoperatorin("vectors.<#>(vectors.bvector,vectors.bvector)") { - result = (DistanceKind::Dot, VectorKind::BVecf32); - } else if operator == regoperatorin("vectors.<=>(vectors.bvector,vectors.bvector)") { - result = (DistanceKind::Cos, VectorKind::BVecf32); - } else if operator == regoperatorin("vectors.<~>(vectors.bvector,vectors.bvector)") { - result = (DistanceKind::Jaccard, VectorKind::BVecf32); - } else if operator == regoperatorin("vectors.<->(vectors.veci8,vectors.veci8)") { - result = (DistanceKind::L2, VectorKind::Veci8); - } else if operator == regoperatorin("vectors.<#>(vectors.veci8,vectors.veci8)") { - result = (DistanceKind::Dot, VectorKind::Veci8); - } else if operator == regoperatorin("vectors.<=>(vectors.veci8,vectors.veci8)") { - result = (DistanceKind::Cos, VectorKind::Veci8); - } else { - bad_opclass(); - }; - pgrx::pg_sys::ReleaseCatCacheList(list); - pgrx::pg_sys::ReleaseSysCache(tuple); - result +pub fn convert_opclass_to_vd(opclass_oid: pgrx::pg_sys::Oid) -> Option<(VectorKind, DistanceKind)> { + let namespace = pgrx::pg_catalog::PgNamespace::search_namespacename(c"vectors").unwrap(); + let namespace = namespace.get().expect("pgvecto.rs is not installed."); + let opclass = pgrx::pg_catalog::PgOpclass::search_claoid(opclass_oid).unwrap(); + let opclass = opclass.get().expect("pg_catalog is broken."); + if opclass.opcnamespace() == namespace.oid() { + if let Ok(name) = opclass.opcname().to_str() { + if let Some(p) = convert_name_to_vd(name) { + return Some(p); + } + } + } + None } -pub unsafe fn options(index_relation: pgrx::pg_sys::Relation) -> IndexOptions { - let nkeysatts = (*(*index_relation).rd_index).indnkeyatts; - assert!(nkeysatts == 1, "Can not be built on multicolumns."); - // get distance - let opfamily = (*index_relation).rd_opfamily.read(); - let (d, k) = convert_opfamily_to_distance(opfamily); - // get dims - let attrs = (*(*index_relation).rd_att).attrs.as_slice(1); - let attr = &attrs[0]; - let typmod = Typmod::parse_from_i32(attr.type_mod()).unwrap(); - let dims = check_column_dims(typmod.dims()).get(); - // get other options - let parsed = get_parsed_from_varlena((*index_relation).rd_options); - IndexOptions { - vector: VectorOptions { dims, d, v: k }, - segment: parsed.segment, - optimizing: parsed.optimizing, - indexing: parsed.indexing, +pub fn convert_opfamily_to_vd( + opfamily_oid: pgrx::pg_sys::Oid, +) -> Option<(VectorKind, DistanceKind)> { + let namespace = pgrx::pg_catalog::PgNamespace::search_namespacename(c"vectors").unwrap(); + let namespace = namespace.get().expect("pgvecto.rs is not installed."); + let opfamily = pgrx::pg_catalog::PgOpfamily::search_opfamilyoid(opfamily_oid).unwrap(); + let opfamily = opfamily.get().expect("pg_catalog is broken."); + if opfamily.opfnamespace() == namespace.oid() { + if let Ok(name) = opfamily.opfname().to_str() { + if let Some(p) = convert_name_to_vd(name) { + return Some(p); + } + } } + None } -#[derive(Copy, Clone, Debug, Default)] -#[repr(C)] -struct Helper { - pub vl_len_: i32, - pub offset: i32, +fn convert_name_to_vd(name: &str) -> Option<(VectorKind, DistanceKind)> { + match name.strip_suffix("_ops") { + Some("vector_l2") => Some((VectorKind::Vecf32, DistanceKind::L2)), + Some("vector_dot") => Some((VectorKind::Vecf32, DistanceKind::Dot)), + Some("vector_cos") => Some((VectorKind::Vecf32, DistanceKind::Cos)), + Some("vecf16_l2") => Some((VectorKind::Vecf16, DistanceKind::L2)), + Some("vecf16_dot") => Some((VectorKind::Vecf16, DistanceKind::Dot)), + Some("vecf16_cos") => Some((VectorKind::Vecf16, DistanceKind::Cos)), + Some("svector_l2") => Some((VectorKind::SVecf32, DistanceKind::L2)), + Some("svector_dot") => Some((VectorKind::SVecf32, DistanceKind::Dot)), + Some("svector_cos") => Some((VectorKind::SVecf32, DistanceKind::Cos)), + Some("bvector_l2") => Some((VectorKind::BVecf32, DistanceKind::L2)), + Some("bvector_dot") => Some((VectorKind::BVecf32, DistanceKind::Dot)), + Some("bvector_cos") => Some((VectorKind::BVecf32, DistanceKind::Cos)), + Some("bvector_jaccard") => Some((VectorKind::BVecf32, DistanceKind::Jaccard)), + Some("veci8_l2") => Some((VectorKind::Veci8, DistanceKind::L2)), + Some("veci8_dot") => Some((VectorKind::Veci8, DistanceKind::Dot)), + Some("veci8_cos") => Some((VectorKind::Veci8, DistanceKind::Cos)), + _ => None, + } } -unsafe fn get_parsed_from_varlena(helper: *const pgrx::pg_sys::varlena) -> Parsed { - let helper = helper as *const Helper; - if helper.is_null() || (*helper).offset == 0 { +unsafe fn convert_varlena_to_soi( + varlena: *const pgrx::pg_sys::varlena, +) -> (SegmentsOptions, OptimizingOptions, IndexingOptions) { + #[derive(Debug, Clone, Deserialize, Default)] + #[serde(deny_unknown_fields)] + struct Parsed { + #[serde(default)] + segment: SegmentsOptions, + #[serde(default)] + optimizing: OptimizingOptions, + #[serde(default)] + indexing: IndexingOptions, + } + let helper = varlena as *const Helper; + if helper.is_null() || unsafe { (*helper).offset == 0 } { return Default::default(); } - let ptr = (helper as *const libc::c_char).offset((*helper).offset as isize); - let cstr = CStr::from_ptr(ptr); - toml::from_str::(cstr.to_str().unwrap()).unwrap() -} - -#[derive(Debug, Clone, Deserialize, Default)] -#[serde(deny_unknown_fields)] -struct Parsed { - #[serde(default)] - segment: SegmentsOptions, - #[serde(default)] - optimizing: OptimizingOptions, - #[serde(default)] - indexing: IndexingOptions, + let ptr = unsafe { (helper as *const libc::c_char).offset((*helper).offset as isize) }; + let s = unsafe { CStr::from_ptr(ptr) }.to_string_lossy().to_string(); + match toml::from_str::(&s) { + Ok(p) => (p.segment, p.optimizing, p.indexing), + Err(e) => pgrx::error!("failed to parse options: {}", e), + } } -fn regoperatorin(name: &str) -> pgrx::pg_sys::Oid { - use pgrx::IntoDatum; - let cstr = std::ffi::CString::new(name).expect("specified name has embedded NULL byte"); - unsafe { - pgrx::direct_function_call::( - pgrx::pg_sys::regoperatorin, - &[cstr.as_c_str().into_datum()], - ) - .expect("operator lookup returned NULL") +pub unsafe fn options(index_relation: pgrx::pg_sys::Relation) -> IndexOptions { + let opfamily = unsafe { (*index_relation).rd_opfamily.read() }; + let att = unsafe { &mut *(*index_relation).rd_att }; + let atts = unsafe { att.attrs.as_slice(att.natts as _) }; + if atts.is_empty() { + pgrx::error!("indexing on no columns is not supported"); + } + if atts.len() != 1 { + pgrx::error!("multicolumn index is not supported"); + } + // get dims + let typmod = Typmod::parse_from_i32(atts[0].type_mod()).unwrap(); + let dims = check_column_dims(typmod.dims()).get(); + // get v, d + let (v, d) = convert_opfamily_to_vd(opfamily).unwrap(); + // get segment, optimizing, indexing + let (segment, optimizing, indexing) = + unsafe { convert_varlena_to_soi((*index_relation).rd_options) }; + IndexOptions { + vector: VectorOptions { dims, v, d }, + segment, + optimizing, + indexing, } } From 75bf6b9c6f2a873e04c50aa55e6fd2cadffc82da Mon Sep 17 00:00:00 2001 From: usamoi Date: Sun, 17 Mar 2024 19:57:04 +0800 Subject: [PATCH 6/6] fix: revert set amoptionalkey to false Signed-off-by: usamoi --- src/index/am.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/index/am.rs b/src/index/am.rs index 13c519218..feb34274c 100644 --- a/src/index/am.rs +++ b/src/index/am.rs @@ -46,6 +46,15 @@ const AM_HANDLER: pgrx::pg_sys::IndexAmRoutine = { am_routine.amcanorderbyop = true; + // Index access methods that set `amoptionalkey` to `false` + // must index all tuples, even if the first column is `NULL`. + // However, PostgreSQL does not generate a path if there is no + // index clauses, even if there is a `ORDER BY` clause. + // So we have to set it to `true` and set costs of every path + // for vector index scans without `ORDER BY` clauses a large number + // and throw errors if someone really wants such a path. + am_routine.amoptionalkey = true; + am_routine.amvalidate = Some(amvalidate); am_routine.amoptions = Some(amoptions); am_routine.amcostestimate = Some(amcostestimate);