Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: support sparse vector #299

Merged
merged 25 commits into from
Feb 18, 2024
Merged
Show file tree
Hide file tree
Changes from 24 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
2fd58c0
basic support
silver-ymz Jan 23, 2024
76508cd
cargo clippy
silver-ymz Jan 23, 2024
07f940c
optimize for ivf
silver-ymz Jan 23, 2024
209e5d8
Merge branch 'main' into feat/sparse
silver-ymz Jan 23, 2024
7b883fe
patch for merge
silver-ymz Jan 23, 2024
351f8fe
set vector cast to non-implicit
silver-ymz Jan 23, 2024
62dce6a
add dimension in sparse vector
silver-ymz Jan 27, 2024
1c1e411
support creating sparse vector from spilt array and kv string
silver-ymz Jan 29, 2024
96786b5
Merge branch '0.3' into feat/sparse
silver-ymz Jan 29, 2024
822e3ca
cargo clippy
silver-ymz Jan 29, 2024
df8a9a7
Merge branch 'main' into feat/sparse
silver-ymz Jan 29, 2024
d23f32b
move vector type from Storage trait to G trait
silver-ymz Feb 2, 2024
001cbd7
merge main into sparse
silver-ymz Feb 2, 2024
887eda2
export svector function by schema manually
silver-ymz Feb 2, 2024
d387b66
spilt index and value for sparse vector
silver-ymz Feb 2, 2024
e14c41f
Merge branch 'main' into feat/sparse
silver-ymz Feb 2, 2024
8032a1c
update offsets from u32 to usize
silver-ymz Feb 5, 2024
45485d9
Merge branch 'main' into feat/sparse
silver-ymz Feb 5, 2024
91e9f39
align SVecf32 to 8 && make SVecf32 support binary
silver-ymz Feb 5, 2024
65b83f0
cargo clippy
silver-ymz Feb 5, 2024
0a65b05
convert the computation to branchless
silver-ymz Feb 5, 2024
9105277
refactor
silver-ymz Feb 7, 2024
567d511
merge main into sparse
silver-ymz Feb 7, 2024
128b7f4
cargo fmt
silver-ymz Feb 7, 2024
3e55f63
update to_svector arguments name
silver-ymz Feb 8, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions crates/service/src/algorithms/clustering/elkan_k_means.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,18 @@ use std::ops::{Index, IndexMut};
pub struct ElkanKMeans<S: G> {
dims: u16,
c: usize,
pub centroids: Vec2<S>,
pub centroids: Vec2<S::Scalar>,
lowerbound: Square,
upperbound: Vec<F32>,
assign: Vec<usize>,
rand: StdRng,
samples: Vec2<S>,
samples: Vec2<S::Scalar>,
}

const DELTA: f32 = 1.0 / 1024.0;

impl<S: G> ElkanKMeans<S> {
pub fn new(c: usize, samples: Vec2<S>) -> Self {
pub fn new(c: usize, samples: Vec2<S::Scalar>) -> Self {
let n = samples.len();
let dims = samples.dims();

Expand Down Expand Up @@ -249,7 +249,7 @@ impl<S: G> ElkanKMeans<S> {
change == 0
}

pub fn finish(self) -> Vec2<S> {
pub fn finish(self) -> Vec2<S::Scalar> {
self.centroids
}
}
Expand Down
63 changes: 32 additions & 31 deletions crates/service/src/algorithms/flat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use crate::utils::dir_ops::sync_dir;
use std::cmp::Reverse;
use std::collections::BinaryHeap;
use std::fs::create_dir;
use std::path::PathBuf;
use std::path::Path;
use std::sync::Arc;

pub struct Flat<S: G> {
Expand All @@ -17,37 +17,26 @@ pub struct Flat<S: G> {

impl<S: G> Flat<S> {
pub fn create(
path: PathBuf,
path: &Path,
options: IndexOptions,
sealed: Vec<Arc<SealedSegment<S>>>,
growing: Vec<Arc<GrowingSegment<S>>>,
) -> Self {
create_dir(&path).unwrap();
let ram = make(path.clone(), sealed, growing, options.clone());
let mmap = save(ram, path.clone());
sync_dir(&path);
create_dir(path).unwrap();
let ram = make(path, sealed, growing, options);
let mmap = save(path, ram);
sync_dir(path);
Self { mmap }
}
pub fn open(path: PathBuf, options: IndexOptions) -> Self {
let mmap = load(path, options.clone());
Self { mmap }
}

pub fn len(&self) -> u32 {
self.mmap.raw.len()
}

pub fn vector(&self, i: u32) -> &[S::Scalar] {
self.mmap.raw.vector(i)
}

pub fn payload(&self, i: u32) -> Payload {
self.mmap.raw.payload(i)
pub fn open(path: &Path, options: IndexOptions) -> Self {
let mmap = open(path, options);
Self { mmap }
}

pub fn basic(
&self,
vector: &[S::Scalar],
vector: S::VectorRef<'_>,
_opts: &SearchOptions,
filter: impl Filter,
) -> BinaryHeap<Reverse<Element>> {
Expand All @@ -56,12 +45,24 @@ impl<S: G> Flat<S> {

pub fn vbase<'a>(
&'a self,
vector: &'a [S::Scalar],
vector: S::VectorRef<'a>,
_opts: &'a SearchOptions,
filter: impl Filter + 'a,
) -> (Vec<Element>, Box<(dyn Iterator<Item = Element> + 'a)>) {
vbase(&self.mmap, vector, filter)
}

pub fn len(&self) -> u32 {
self.mmap.raw.len()
}

pub fn vector(&self, i: u32) -> S::VectorRef<'_> {
self.mmap.raw.vector(i)
}

pub fn payload(&self, i: u32) -> Payload {
self.mmap.raw.payload(i)
}
}

unsafe impl<S: G> Send for Flat<S> {}
Expand All @@ -81,39 +82,39 @@ unsafe impl<S: G> Send for FlatMmap<S> {}
unsafe impl<S: G> Sync for FlatMmap<S> {}

pub fn make<S: G>(
path: PathBuf,
path: &Path,
sealed: Vec<Arc<SealedSegment<S>>>,
growing: Vec<Arc<GrowingSegment<S>>>,
options: IndexOptions,
) -> FlatRam<S> {
let idx_opts = options.indexing.clone().unwrap_flat();
let raw = Arc::new(Raw::create(
path.join("raw"),
&path.join("raw"),
options.clone(),
sealed,
growing,
));
let quantization = Quantization::create(
path.join("quantization"),
&path.join("quantization"),
options.clone(),
idx_opts.quantization,
&raw,
);
FlatRam { raw, quantization }
}

pub fn save<S: G>(ram: FlatRam<S>, _: PathBuf) -> FlatMmap<S> {
pub fn save<S: G>(_: &Path, ram: FlatRam<S>) -> FlatMmap<S> {
FlatMmap {
raw: ram.raw,
quantization: ram.quantization,
}
}

pub fn load<S: G>(path: PathBuf, options: IndexOptions) -> FlatMmap<S> {
pub fn open<S: G>(path: &Path, options: IndexOptions) -> FlatMmap<S> {
let idx_opts = options.indexing.clone().unwrap_flat();
let raw = Arc::new(Raw::open(path.join("raw"), options.clone()));
let raw = Arc::new(Raw::open(&path.join("raw"), options.clone()));
let quantization = Quantization::open(
path.join("quantization"),
&path.join("quantization"),
options.clone(),
idx_opts.quantization,
&raw,
Expand All @@ -123,7 +124,7 @@ pub fn load<S: G>(path: PathBuf, options: IndexOptions) -> FlatMmap<S> {

pub fn basic<S: G>(
mmap: &FlatMmap<S>,
vector: &[S::Scalar],
vector: S::VectorRef<'_>,
mut filter: impl Filter,
) -> BinaryHeap<Reverse<Element>> {
let mut result = BinaryHeap::new();
Expand All @@ -139,7 +140,7 @@ pub fn basic<S: G>(

pub fn vbase<'a, S: G>(
mmap: &'a FlatMmap<S>,
vector: &'a [S::Scalar],
vector: S::VectorRef<'a>,
mut filter: impl Filter + 'a,
) -> (Vec<Element>, Box<dyn Iterator<Item = Element> + 'a>) {
let mut result = Vec::new();
Expand Down
Loading
Loading