Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: kmeans1d #533

Merged
merged 5 commits into from
Jul 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/psql.yml
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ jobs:
wget --quiet -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
sudo apt-get update
sudo apt-get install -y clang-16
sudo update-alternatives --install /usr/bin/clang clang /usr/bin/clang-16 128
- name: Set up Pgrx
run: |
# pg_config
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ jobs:
wget --quiet -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
sudo apt-get update
sudo apt-get install -y clang-16
sudo update-alternatives --install /usr/bin/clang clang /usr/bin/clang-16 128
- name: Set up Pgrx
run: |
# pg_config
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ jobs:
wget --quiet -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
sudo apt-get update
sudo apt-get install -y clang-16
sudo update-alternatives --install /usr/bin/clang clang /usr/bin/clang-16 128
- name: Set up Pgrx
run: |
# pg_config
Expand Down Expand Up @@ -149,6 +150,7 @@ jobs:
wget --quiet -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
sudo apt-get update
sudo apt-get install -y clang-16
sudo update-alternatives --install /usr/bin/clang clang /usr/bin/clang-16 128
- name: Set up Pgrx
run: |
# pg_config
Expand Down
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions crates/base/src/scalar/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ mod f32;
mod half_f16;
mod i8;

use std::iter::Sum;

pub use f32::F32;
pub use half_f16::F16;
pub use i8::I8;
Expand All @@ -19,7 +21,9 @@ pub trait ScalarLike:
+ num_traits::Zero
+ num_traits::NumOps
+ num_traits::NumAssignOps
+ Default
+ crate::pod::Pod
+ Sum
{
fn from_f32(x: f32) -> Self;
fn to_f32(self) -> f32;
Expand Down
2 changes: 1 addition & 1 deletion crates/c/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ fn main() {
println!("cargo:rerun-if-changed=src/f16.h");
println!("cargo:rerun-if-changed=src/f16.c");
cc::Build::new()
.compiler("clang-16")
.compiler("clang")
.file("./src/f16.c")
.opt_level(3)
.flag("-fassociative-math")
Expand Down
4 changes: 4 additions & 0 deletions crates/c/src/f16.c
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
#if !(__clang_major__ >= 16)
#error "clang version must be >= 16"
#endif

#include "f16.h"
#include <math.h>

Expand Down
1 change: 0 additions & 1 deletion crates/common/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,3 @@ pub mod remap;
pub mod sample;
pub mod variants;
pub mod vec2;
pub mod vec3;
8 changes: 4 additions & 4 deletions crates/common/src/sample.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ pub fn sample<O: Operator>(vectors: &impl Vectors<O>) -> Vec2<Scalar<O>> {
let n = vectors.len();
let m = std::cmp::min(SAMPLES as u32, n);
let f = super::rand::sample_u32(&mut rand::thread_rng(), n, m);
let mut samples = Vec2::new(vectors.dims(), m as usize);
let mut samples = Vec2::zeros((m as usize, vectors.dims() as usize));
for i in 0..m {
let v = vectors.vector(f[i as usize] as u32).to_vec();
samples[i as usize].copy_from_slice(&v);
samples[(i as usize,)].copy_from_slice(&v);
}
samples
}
Expand All @@ -27,12 +27,12 @@ pub fn sample_subvector_transform<O: Operator>(
let n = vectors.len();
let m = std::cmp::min(SAMPLES as u32, n);
let f = super::rand::sample_u32(&mut rand::thread_rng(), n, m);
let mut samples = Vec2::new((e - s) as u32, m as usize);
let mut samples = Vec2::zeros((m as usize, e - s));
for i in 0..m {
let v = transform(vectors.vector(f[i as usize] as u32))
.as_borrowed()
.to_vec();
samples[i as usize].copy_from_slice(&v[s..e]);
samples[(i as usize,)].copy_from_slice(&v[s..e]);
}
samples
}
85 changes: 45 additions & 40 deletions crates/common/src/vec2.rs
Original file line number Diff line number Diff line change
@@ -1,70 +1,75 @@
use base::pod::Pod;
use serde::{Deserialize, Serialize};
use std::ops::{Deref, DerefMut, Index, IndexMut};
use std::ops::{Index, IndexMut};

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Vec2<T> {
dims: u32,
v: Vec<T>,
shape: (usize, usize),
base: Vec<T>,
}

impl<T: Pod + Ord> Vec2<T> {
pub fn new(dims: u32, n: usize) -> Self {
impl<T: Default + Copy> Vec2<T> {
pub fn zeros(shape: (usize, usize)) -> Self {
Self {
dims,
v: base::pod::zeroed_vec(dims as usize * n),
shape,
base: vec![T::default(); shape.0 * shape.1],
}
}
pub fn dims(&self) -> u32 {
self.dims
pub fn from_vec(shape: (usize, usize), base: Vec<T>) -> Self {
assert_eq!(shape.0 * shape.1, base.len());
Self { shape, base }
}
pub fn len(&self) -> usize {
self.v.len() / self.dims as usize
}

impl<T: Copy> Vec2<T> {
pub fn copy_within(&mut self, (l_i,): (usize,), (r_i,): (usize,)) {
assert!(l_i < self.shape.0);
assert!(r_i < self.shape.0);
let src_from = l_i * self.shape.1;
let src_to = src_from + self.shape.1;
let dest = r_i * self.shape.1;
self.base.copy_within(src_from..src_to, dest);
}
}

impl<T> Vec2<T> {
pub fn shape_0(&self) -> usize {
self.shape.0
}
pub fn is_empty(&self) -> bool {
self.len() == 0
pub fn shape_1(&self) -> usize {
self.shape.1
}
pub fn argsort(&self) -> Vec<usize> {
let mut index: Vec<usize> = (0..self.len()).collect();
index.sort_by_key(|i| &self[*i]);
index
pub fn as_slice(&self) -> &[T] {
self.base.as_slice()
}
pub fn copy_within(&mut self, i: usize, j: usize) {
assert!(i < self.len() && j < self.len());
unsafe {
if i != j {
let src = self.v.as_ptr().add(self.dims as usize * i);
let dst = self.v.as_mut_ptr().add(self.dims as usize * j);
std::ptr::copy_nonoverlapping(src, dst, self.dims as usize);
}
}
pub fn as_mut_slice(&mut self) -> &mut [T] {
self.base.as_mut_slice()
}
}

impl<T> Index<usize> for Vec2<T> {
impl<T> Index<(usize,)> for Vec2<T> {
type Output = [T];

fn index(&self, index: usize) -> &Self::Output {
&self.v[self.dims as usize * index..][..self.dims as usize]
fn index(&self, (i,): (usize,)) -> &Self::Output {
&self.base[i * self.shape.1..][..self.shape.1]
}
}

impl<T> IndexMut<usize> for Vec2<T> {
fn index_mut(&mut self, index: usize) -> &mut Self::Output {
&mut self.v[self.dims as usize * index..][..self.dims as usize]
impl<T> IndexMut<(usize,)> for Vec2<T> {
fn index_mut(&mut self, (i,): (usize,)) -> &mut Self::Output {
&mut self.base[i * self.shape.1..][..self.shape.1]
}
}

impl<T> Deref for Vec2<T> {
type Target = [T];
impl<T> Index<(usize, usize)> for Vec2<T> {
type Output = T;

fn deref(&self) -> &Self::Target {
self.v.deref()
fn index(&self, (i, j): (usize, usize)) -> &Self::Output {
&self.base[i * self.shape.1..][j]
}
}

impl<T> DerefMut for Vec2<T> {
fn deref_mut(&mut self) -> &mut Self::Target {
self.v.deref_mut()
impl<T> IndexMut<(usize, usize)> for Vec2<T> {
fn index_mut(&mut self, (i, j): (usize, usize)) -> &mut Self::Output {
&mut self.base[i * self.shape.1..][j]
}
}
101 changes: 0 additions & 101 deletions crates/common/src/vec3.rs

This file was deleted.

4 changes: 2 additions & 2 deletions crates/ivf/src/ivf_naive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,8 @@ fn from_nothing<O: Op>(
rayon::check();
let centroids = {
let mut samples = samples;
for i in 0..samples.len() {
O::elkan_k_means_normalize(&mut samples[i]);
for i in 0..samples.shape_0() {
O::elkan_k_means_normalize(&mut samples[(i,)]);
}
k_means(nlist as usize, samples)
};
Expand Down
8 changes: 4 additions & 4 deletions crates/ivf/src/ivf_residual.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ impl<O: Op> IvfResidual<O> {
);
let vectors = lists
.iter()
.map(|&(_, i)| O::vector_sub(vector, &self.centroids[i]))
.map(|&(_, i)| O::vector_sub(vector, &self.centroids[(i,)]))
.collect::<Vec<_>>();
let mut reranker = self
.quantization
Expand Down Expand Up @@ -104,8 +104,8 @@ fn from_nothing<O: Op>(
rayon::check();
let centroids = {
let mut samples = samples;
for i in 0..samples.len() {
O::elkan_k_means_normalize(&mut samples[i]);
for i in 0..samples.shape_0() {
O::elkan_k_means_normalize(&mut samples[(i,)]);
}
k_means(nlist as usize, samples)
};
Expand Down Expand Up @@ -141,7 +141,7 @@ fn from_nothing<O: Op>(
O::elkan_k_means_normalize(&mut vector);
k_means_lookup(&vector, &centroids)
};
O::vector_sub(vector, &centroids[target])
O::vector_sub(vector, &centroids[(target,)])
},
);
let payloads = MmapArray::create(
Expand Down
1 change: 1 addition & 0 deletions crates/k_means/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ rand.workspace = true

base = { path = "../base" }
common = { path = "../common" }
smawk = "0.3.2"
stoppable_rayon = { path = "../stoppable_rayon" }

[lints]
Expand Down
Loading
Loading