Skip to content

Commit

Permalink
refactor: rework vector abstraction
Browse files Browse the repository at this point in the history
Signed-off-by: usamoi <usamoi@outlook.com>
  • Loading branch information
usamoi committed Feb 20, 2024
1 parent 42fa583 commit a3fb110
Show file tree
Hide file tree
Showing 114 changed files with 4,067 additions and 3,959 deletions.
3 changes: 0 additions & 3 deletions .cargo/config.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
[build]
rustdocflags = ["--document-private-items"]

[target.'cfg(target_os="macos")']
# Postgres symbols won't be available until runtime
rustflags = ["-Clink-arg=-Wl,-undefined,dynamic_lookup"]
18 changes: 0 additions & 18 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 9 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ edition.workspace = true
[lib]
crate-type = ["cdylib", "lib"]

[[bin]]
name = "pgrx_embed_vectors"
path = "./src/bin/pgrx_embed.rs"

[features]
default = ["pg15"]
pg14 = ["pgrx/pg14", "pgrx-tests/pg14"]
Expand All @@ -18,7 +22,6 @@ arrayvec.workspace = true
bincode.workspace = true
bytemuck.workspace = true
byteorder.workspace = true
half.workspace = true
libc.workspace = true
log.workspace = true
memmap2.workspace = true
Expand Down Expand Up @@ -48,9 +51,6 @@ pgrx = { git = "https://github.com/tensorchord/pgrx.git", branch = "v0.11.3-patc
pgrx-tests = { git = "https://github.com/tensorchord/pgrx.git", branch = "v0.11.3-patch" }

[lints]
clippy.needless_range_loop = "allow"
clippy.len_zero = "allow"
clippy.too_many_arguments = "allow"
rust.unsafe_op_in_unsafe_fn = "deny"
rust.unused_lifetimes = "warn"
rust.unused_qualifications = "warn"
Expand Down Expand Up @@ -88,6 +88,11 @@ thiserror = "~1.0"
uuid = { version = "1.7.0", features = ["v4", "serde"] }
validator = { version = "~0.16", features = ["derive"] }

[workspace.lints]
rust.unsafe_op_in_unsafe_fn = "forbid"
rust.unused_lifetimes = "warn"
rust.unused_qualifications = "warn"

[profile.dev]
panic = "unwind"

Expand Down
14 changes: 2 additions & 12 deletions crates/base/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,35 +4,25 @@ version.workspace = true
edition.workspace = true

[dependencies]
bincode.workspace = true
bytemuck.workspace = true
byteorder.workspace = true
half.workspace = true
libc.workspace = true
log.workspace = true
memmap2.workspace = true
num-traits.workspace = true
rand.workspace = true
rustix.workspace = true
serde.workspace = true
serde_json.workspace = true
thiserror.workspace = true
uuid.workspace = true
validator.workspace = true
c = { path = "../c" }
detect = { path = "../detect" }
crc32fast = "1.4.0"
crossbeam = "0.8.4"
dashmap = "5.5.3"
parking_lot = "0.12.1"
rayon = "1.8.1"
arc-swap = "1.6.0"
multiversion = "0.7.3"

[lints]
clippy.derivable_impls = "allow"
clippy.len_without_is_empty = "allow"
clippy.len_zero = "allow"
clippy.needless_range_loop = "allow"
clippy.nonminimal_bool = "allow"
clippy.too_many_arguments = "allow"
rust.internal_features = "allow"
rust.unsafe_op_in_unsafe_fn = "forbid"
Expand Down
9 changes: 9 additions & 0 deletions crates/base/src/distance.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
use serde::{Deserialize, Serialize};

#[repr(u8)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
pub enum DistanceKind {
L2,
Cos,
Dot,
}
90 changes: 90 additions & 0 deletions crates/base/src/global/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
mod svecf32;
mod svecf32_cos;
mod svecf32_dot;
mod svecf32_l2;
mod vecf16;
mod vecf16_cos;
mod vecf16_dot;
mod vecf16_l2;
mod vecf32;
mod vecf32_cos;
mod vecf32_dot;
mod vecf32_l2;

pub use svecf32_cos::SVecf32Cos;
pub use svecf32_dot::SVecf32Dot;
pub use svecf32_l2::SVecf32L2;
pub use vecf16_cos::Vecf16Cos;
pub use vecf16_dot::Vecf16Dot;
pub use vecf16_l2::Vecf16L2;
pub use vecf32_cos::Vecf32Cos;
pub use vecf32_dot::Vecf32Dot;
pub use vecf32_l2::Vecf32L2;

use crate::distance::*;
use crate::scalar::*;
use crate::vector::*;

pub trait GlobalElkanKMeans: Global {
fn elkan_k_means_normalize(vector: &mut [Scalar<Self>]);
fn elkan_k_means_normalize2(vector: &mut Self::VectorOwned);
fn elkan_k_means_distance(lhs: &[Scalar<Self>], rhs: &[Scalar<Self>]) -> F32;
fn elkan_k_means_distance2(lhs: Borrowed<'_, Self>, rhs: &[Scalar<Self>]) -> F32;
}

pub trait GlobalScalarQuantization: Global {
fn scalar_quantization_distance(
dims: u16,
max: &[Scalar<Self>],
min: &[Scalar<Self>],
lhs: Borrowed<'_, Self>,
rhs: &[u8],
) -> F32;
fn scalar_quantization_distance2(
dims: u16,
max: &[Scalar<Self>],
min: &[Scalar<Self>],
lhs: &[u8],
rhs: &[u8],
) -> F32;
}

pub trait GlobalProductQuantization: Global {
type ProductQuantizationL2: GlobalElkanKMeans<VectorOwned = Self::VectorOwned>;
fn product_quantization_distance(
dims: u16,
ratio: u16,
centroids: &[Scalar<Self>],
lhs: Borrowed<'_, Self>,
rhs: &[u8],
) -> F32;
fn product_quantization_distance2(
dims: u16,
ratio: u16,
centroids: &[Scalar<Self>],
lhs: &[u8],
rhs: &[u8],
) -> F32;
fn product_quantization_distance_with_delta(
dims: u16,
ratio: u16,
centroids: &[Scalar<Self>],
lhs: Borrowed<'_, Self>,
rhs: &[u8],
delta: &[Scalar<Self>],
) -> F32;
fn product_quantization_l2_distance(lhs: &[Scalar<Self>], rhs: &[Scalar<Self>]) -> F32;
}

pub trait Global: Copy + 'static {
type VectorOwned: VectorOwned;

const VECTOR_KIND: VectorKind;
const DISTANCE_KIND: DistanceKind;

fn distance(lhs: Borrowed<'_, Self>, rhs: Borrowed<'_, Self>) -> F32;
}

pub type Owned<T> = <T as Global>::VectorOwned;
pub type Borrowed<'a, T> = <<T as Global>::VectorOwned as VectorOwned>::Borrowed<'a>;
pub type Scalar<T> = <<T as Global>::VectorOwned as VectorOwned>::Scalar;
Loading

0 comments on commit a3fb110

Please sign in to comment.