From 67bca21a56dc29d7db8ac95fae4521df1b853551 Mon Sep 17 00:00:00 2001 From: Justin Date: Sat, 1 Dec 2018 03:26:12 +0000 Subject: [PATCH 01/11] fix typo --- src/index/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/index/mod.rs b/src/index/mod.rs index 6f62a44fb3..4848fd7ce6 100644 --- a/src/index/mod.rs +++ b/src/index/mod.rs @@ -36,7 +36,7 @@ pub type Idx = idx_t; /// Although all methods appear to be available for all index implementations, /// some methods may not be supported. For instance, a [`FlatIndex`] stores /// vectors sequentially, and so does not support `add_with_ids` nor -/// `remove_with_ids`. Users are advised to read the Faiss wiki pages in order +/// `remove_ids`. Users are advised to read the Faiss wiki pages in order /// to understand which index algorithms support which operations. /// /// [`FlatIndex`]: flat/struct.FlatIndex.html From 82c6c193eb741fecaec5b4c6a9a16304bbe6cbe0 Mon Sep 17 00:00:00 2001 From: Justin Date: Sat, 1 Dec 2018 03:28:08 +0000 Subject: [PATCH 02/11] initial work on adding remove_ids method --- src/index/id_map.rs | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/index/id_map.rs b/src/index/id_map.rs index e0ada0636b..01c0e863b9 100644 --- a/src/index/id_map.rs +++ b/src/index/id_map.rs @@ -251,6 +251,18 @@ impl Index for IdMap { Ok(()) } } + + fn remove_ids(&mut self, sel: &[::index::Idx]) -> Result<()> { + unsafe { + let mut n_removed = 0; + faiss_try!(faiss_Index_remove_ids( + self.inner_ptr(), + sel.as_ptr(), + n_removed.as_mut_ptr() + )); + Ok(()) + } + } } impl ConcurrentIndex for IdMap @@ -340,4 +352,22 @@ mod tests { assert_eq!(result.labels, vec![9, 6, 3, 12, 15, 12, 15, 3, 6, 9]); assert!(result.distances.iter().all(|x| *x > 0.)); } + + #[test] + fn index_remove_ids() { + let mut index = index_factory(4, "Flat", MetricType::L2).unwrap(); + let mut id_index = IdMap::new(index).unwrap(); + let some_data = &[ + 7.5_f32, -7.5, 7.5, -7.5, 7.5, 7.5, 7.5, 7.5, -1., 1., 1., 1., 1., 1., 1., -1., 0., 0., + 0., 1., 1., 0., 0., -1., + ]; + + let id = [42]; + + index.add(some_data).unwrap(); + assert_eq!(index.ntotal(), 6); + + index.remove_ids(&id).unwrap(); + assert_eq!(index.ntotal(), 0); + } } From ebcada4d3dd5fb89a56f76a481aff92c411b642b Mon Sep 17 00:00:00 2001 From: Justin Date: Sat, 1 Dec 2018 03:29:56 +0000 Subject: [PATCH 03/11] bump version to 0.6.1 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 56db70255b..6e648badad 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "faiss" description = "High-level bindings for Faiss, the vector similarity search engine" -version = "0.6.0" +version = "0.6.1" authors = ["Eduardo Pinho "] license = "MIT/Apache-2.0" repository = "https://github.com/Enet4/faiss-rs" From 316e6fae03adf22cb8017d5873661ad67c1621aa Mon Sep 17 00:00:00 2001 From: Justin Date: Sat, 1 Dec 2018 18:58:40 +0000 Subject: [PATCH 04/11] have most of the types figured out --- src/index/id_map.rs | 9 ++++++--- src/index/mod.rs | 3 +++ src/macros.rs | 12 ++++++++++++ 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/src/index/id_map.rs b/src/index/id_map.rs index 01c0e863b9..b2d3b74de5 100644 --- a/src/index/id_map.rs +++ b/src/index/id_map.rs @@ -252,13 +252,13 @@ impl Index for IdMap { } } - fn remove_ids(&mut self, sel: &[::index::Idx]) -> Result<()> { + fn remove_ids(&mut self, sel: &[faiss_sys::FaissIDSelector_H]) -> Result<()> { unsafe { let mut n_removed = 0; faiss_try!(faiss_Index_remove_ids( self.inner_ptr(), sel.as_ptr(), - n_removed.as_mut_ptr() + &mut n_removed )); Ok(()) } @@ -367,7 +367,10 @@ mod tests { index.add(some_data).unwrap(); assert_eq!(index.ntotal(), 6); - index.remove_ids(&id).unwrap(); + // TODO + let id_sel: faiss_sys::FaissIDSelector_H = faiss_sys::FaissIDSelector{_unused: []}; + + index.remove_ids(&[id_sel]).unwrap(); assert_eq!(index.ntotal(), 0); } } diff --git a/src/index/mod.rs b/src/index/mod.rs index 4848fd7ce6..6cd584b378 100644 --- a/src/index/mod.rs +++ b/src/index/mod.rs @@ -79,6 +79,9 @@ pub trait Index { /// Clear the entire index. fn reset(&mut self) -> Result<()>; + + /// Remove data vectors represented by IDs. + fn remove_ids(&mut self, sel: &[faiss_sys::FaissIDSelector_H]) -> Result<()>; } /// Sub-trait for native implementations of a Faiss index. diff --git a/src/macros.rs b/src/macros.rs index 1ea7bea7a4..11d8d70709 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -108,6 +108,18 @@ macro_rules! impl_native_index { Ok(()) } } + + fn remove_ids(&mut self, sel: &[faiss_sys::FaissIDSelector_H]) -> Result<()> { + unsafe { + let mut n_removed = 0; + faiss_try!(faiss_Index_remove_ids( + self.inner_ptr(), + sel.as_ptr(), + &mut n_removed + )); + Ok(()) + } + } } }; } From c533de89b78641bfb3aa6dca64c095f19c50f91d Mon Sep 17 00:00:00 2001 From: Justin Date: Wed, 5 Dec 2018 23:21:58 +0000 Subject: [PATCH 05/11] added beginning of selector mod --- src/lib.rs | 1 + src/selector.rs | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 src/selector.rs diff --git a/src/lib.rs b/src/lib.rs index 1fe75db67a..782bc0badb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -93,6 +93,7 @@ pub mod cluster; pub mod error; pub mod index; pub mod metric; +pub mod selector; #[cfg(feature = "gpu")] pub mod gpu; diff --git a/src/selector.rs b/src/selector.rs new file mode 100644 index 0000000000..c35a05d24d --- /dev/null +++ b/src/selector.rs @@ -0,0 +1,24 @@ +//! Abstract Faiss ID selector +use faiss_sys::*; + +/// Abstraction over IDSelectorRange and IDSelectorBatch +#[derive(Debug)] +pub struct IdSelector { + inner: *mut FaissIDSelector, +} + +impl IdSelector { + /// Create new range selector + pub fn range(min: idx_t, max: idx_t) -> IdSelector { + let mut sel = FaissIDSelector_H {_unused: []}; + let _ = faiss_IDSelectorRange_new(&mut &mut sel, min, max); + IdSelector { inner: &mut sel } + } + + /// Create new batch selector + pub fn batch(n: i64, indices: &idx_t) -> IdSelector { + let mut sel = FaissIDSelector_H {_unused: []}; + let _ = faiss_IDSelectorBatch_new(&mut &mut sel, n, indices); + IdSelector { inner: &mut sel } + } +} From 14ed629fef483507bbb9511a8fd293dbffc84f28 Mon Sep 17 00:00:00 2001 From: Justin Date: Wed, 5 Dec 2018 23:22:22 +0000 Subject: [PATCH 06/11] removed version bump --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 6e648badad..56db70255b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "faiss" description = "High-level bindings for Faiss, the vector similarity search engine" -version = "0.6.1" +version = "0.6.0" authors = ["Eduardo Pinho "] license = "MIT/Apache-2.0" repository = "https://github.com/Enet4/faiss-rs" From 892b5e9a5c6830f7aad4b22edbe5b402326d00c2 Mon Sep 17 00:00:00 2001 From: Justin Date: Thu, 6 Dec 2018 01:07:59 +0000 Subject: [PATCH 07/11] more work on IdSelector --- src/index/id_map.rs | 13 +++++++------ src/index/lsh.rs | 1 + src/index/mod.rs | 3 ++- src/macros.rs | 6 +++--- src/selector.rs | 2 +- 5 files changed, 14 insertions(+), 11 deletions(-) diff --git a/src/index/id_map.rs b/src/index/id_map.rs index b2d3b74de5..149b563f89 100644 --- a/src/index/id_map.rs +++ b/src/index/id_map.rs @@ -59,6 +59,7 @@ use index::{ AssignSearchResult, ConcurrentIndex, CpuIndex, FromInnerPtr, Idx, Index, NativeIndex, RangeSearchResult, SearchResult, }; +use selector::IdSelector; use std::marker::PhantomData; use std::mem; @@ -252,15 +253,15 @@ impl Index for IdMap { } } - fn remove_ids(&mut self, sel: &[faiss_sys::FaissIDSelector_H]) -> Result<()> { + fn remove_ids(&mut self, sel: &IdSelector) -> Result<(i64)> { unsafe { let mut n_removed = 0; faiss_try!(faiss_Index_remove_ids( self.inner_ptr(), - sel.as_ptr(), + sel.inner, &mut n_removed )); - Ok(()) + Ok(n_removed) } } } @@ -320,6 +321,7 @@ where mod tests { use super::IdMap; use index::{index_factory, Index}; + use selector::IdSelector; use MetricType; #[test] @@ -367,10 +369,9 @@ mod tests { index.add(some_data).unwrap(); assert_eq!(index.ntotal(), 6); - // TODO - let id_sel: faiss_sys::FaissIDSelector_H = faiss_sys::FaissIDSelector{_unused: []}; + let id_sel = IdSelector::batch(1, &6); - index.remove_ids(&[id_sel]).unwrap(); + index.remove_ids(&id_sel).unwrap(); assert_eq!(index.ntotal(), 0); } } diff --git a/src/index/lsh.rs b/src/index/lsh.rs index 5b9f5414c0..3c1a4cb4de 100644 --- a/src/index/lsh.rs +++ b/src/index/lsh.rs @@ -4,6 +4,7 @@ use super::{AssignSearchResult, ConcurrentIndex, CpuIndex, FromInnerPtr, Idx, In NativeIndex, RangeSearchResult, SearchResult}; use error::{Error, Result}; use faiss_sys::*; +use selector::IdSelector; use std::mem; use std::ptr; diff --git a/src/index/mod.rs b/src/index/mod.rs index 6cd584b378..0a343ad958 100644 --- a/src/index/mod.rs +++ b/src/index/mod.rs @@ -12,6 +12,7 @@ use error::{Error, Result}; use metric::MetricType; +use selector::IdSelector; use std::ffi::CString; use std::os::raw::c_uint; use std::ptr; @@ -81,7 +82,7 @@ pub trait Index { fn reset(&mut self) -> Result<()>; /// Remove data vectors represented by IDs. - fn remove_ids(&mut self, sel: &[faiss_sys::FaissIDSelector_H]) -> Result<()>; + fn remove_ids(&mut self, sel: &IdSelector) -> Result<(i64)>; } /// Sub-trait for native implementations of a Faiss index. diff --git a/src/macros.rs b/src/macros.rs index 11d8d70709..4a7335983f 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -109,15 +109,15 @@ macro_rules! impl_native_index { } } - fn remove_ids(&mut self, sel: &[faiss_sys::FaissIDSelector_H]) -> Result<()> { + fn remove_ids(&mut self, sel: &IdSelector) -> Result<(i64)> { unsafe { let mut n_removed = 0; faiss_try!(faiss_Index_remove_ids( self.inner_ptr(), - sel.as_ptr(), + sel.inner, &mut n_removed )); - Ok(()) + Ok(n_removed) } } } diff --git a/src/selector.rs b/src/selector.rs index c35a05d24d..380ca9c1fc 100644 --- a/src/selector.rs +++ b/src/selector.rs @@ -4,7 +4,7 @@ use faiss_sys::*; /// Abstraction over IDSelectorRange and IDSelectorBatch #[derive(Debug)] pub struct IdSelector { - inner: *mut FaissIDSelector, + pub inner: *mut FaissIDSelector, } impl IdSelector { From 5a2c1bb838072d04d9dc88d1bb9239f07340a0ae Mon Sep 17 00:00:00 2001 From: Justin Date: Sat, 8 Dec 2018 17:18:18 +0000 Subject: [PATCH 08/11] added IdSelector.inner_ptr --- src/index/id_map.rs | 2 +- src/macros.rs | 2 +- src/selector.rs | 8 +++++++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/index/id_map.rs b/src/index/id_map.rs index 149b563f89..710a924c11 100644 --- a/src/index/id_map.rs +++ b/src/index/id_map.rs @@ -258,7 +258,7 @@ impl Index for IdMap { let mut n_removed = 0; faiss_try!(faiss_Index_remove_ids( self.inner_ptr(), - sel.inner, + sel.inner_ptr(), &mut n_removed )); Ok(n_removed) diff --git a/src/macros.rs b/src/macros.rs index 4a7335983f..5d451e8193 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -114,7 +114,7 @@ macro_rules! impl_native_index { let mut n_removed = 0; faiss_try!(faiss_Index_remove_ids( self.inner_ptr(), - sel.inner, + sel.inner_ptr(), &mut n_removed )); Ok(n_removed) diff --git a/src/selector.rs b/src/selector.rs index 380ca9c1fc..d03f55bbd0 100644 --- a/src/selector.rs +++ b/src/selector.rs @@ -4,7 +4,7 @@ use faiss_sys::*; /// Abstraction over IDSelectorRange and IDSelectorBatch #[derive(Debug)] pub struct IdSelector { - pub inner: *mut FaissIDSelector, + inner: *mut FaissIDSelector, } impl IdSelector { @@ -21,4 +21,10 @@ impl IdSelector { let _ = faiss_IDSelectorBatch_new(&mut &mut sel, n, indices); IdSelector { inner: &mut sel } } + + /// Return the inner pointer + pub fn inner_ptr(&self) -> *mut FaissIDSelector { + self.inner + } + } From 9576ce33f24f40a7c8f353ccc0f349339b772285 Mon Sep 17 00:00:00 2001 From: Justin Date: Sat, 8 Dec 2018 18:42:47 +0000 Subject: [PATCH 09/11] addressed comments --- src/index/id_map.rs | 2 +- src/index/mod.rs | 2 +- src/selector.rs | 31 +++++++++++++++++++++++-------- 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/src/index/id_map.rs b/src/index/id_map.rs index 710a924c11..5a0ffec8c6 100644 --- a/src/index/id_map.rs +++ b/src/index/id_map.rs @@ -369,7 +369,7 @@ mod tests { index.add(some_data).unwrap(); assert_eq!(index.ntotal(), 6); - let id_sel = IdSelector::batch(1, &6); + let id_sel = IdSelector::batch(&[6]).ok().unwrap(); index.remove_ids(&id_sel).unwrap(); assert_eq!(index.ntotal(), 0); diff --git a/src/index/mod.rs b/src/index/mod.rs index 0a343ad958..da14a8c6f3 100644 --- a/src/index/mod.rs +++ b/src/index/mod.rs @@ -82,7 +82,7 @@ pub trait Index { fn reset(&mut self) -> Result<()>; /// Remove data vectors represented by IDs. - fn remove_ids(&mut self, sel: &IdSelector) -> Result<(i64)>; + fn remove_ids(&mut self, sel: &IdSelector) -> Result; } /// Sub-trait for native implementations of a Faiss index. diff --git a/src/selector.rs b/src/selector.rs index d03f55bbd0..75a3a5f334 100644 --- a/src/selector.rs +++ b/src/selector.rs @@ -1,5 +1,8 @@ //! Abstract Faiss ID selector +use error::Result; use faiss_sys::*; +use index::Idx; +use std::ptr; /// Abstraction over IDSelectorRange and IDSelectorBatch #[derive(Debug)] @@ -9,17 +12,18 @@ pub struct IdSelector { impl IdSelector { /// Create new range selector - pub fn range(min: idx_t, max: idx_t) -> IdSelector { - let mut sel = FaissIDSelector_H {_unused: []}; - let _ = faiss_IDSelectorRange_new(&mut &mut sel, min, max); - IdSelector { inner: &mut sel } + pub fn range(min: Idx, max: Idx) -> Result { + let mut p_sel = ptr::null_mut(); + faiss_try!(faiss_IDSelectorRange_new(&mut p_sel, min, max)); + Ok(IdSelector { inner: p_sel }) } /// Create new batch selector - pub fn batch(n: i64, indices: &idx_t) -> IdSelector { - let mut sel = FaissIDSelector_H {_unused: []}; - let _ = faiss_IDSelectorBatch_new(&mut &mut sel, n, indices); - IdSelector { inner: &mut sel } + pub fn batch(indices: &[Idx]) -> Result { + let n = indices.len() as i64; + let mut p_sel = ptr::null_mut(); + faiss_try!(faiss_IDSelectorBatch_new(&mut p_sel, n, &indices[0])); + Ok(IdSelector { inner: p_sel }) } /// Return the inner pointer @@ -28,3 +32,14 @@ impl IdSelector { } } + +impl Drop for IdSelector { + fn drop(&mut self) { + unsafe { + faiss_IDSelector_free(self.inner); + } + } +} + +unsafe impl Send for IdSelector {} +unsafe impl Sync for IdSelector {} From 415c5654e888c511809f181b5de85cb3a5bd3e65 Mon Sep 17 00:00:00 2001 From: Justin Date: Sat, 8 Dec 2018 20:29:41 +0000 Subject: [PATCH 10/11] fixed test case, casting pointer to resolve bug --- src/index/id_map.rs | 19 +++++++++---------- src/selector.rs | 12 ++++++++---- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/src/index/id_map.rs b/src/index/id_map.rs index 5a0ffec8c6..13aa777432 100644 --- a/src/index/id_map.rs +++ b/src/index/id_map.rs @@ -359,19 +359,18 @@ mod tests { fn index_remove_ids() { let mut index = index_factory(4, "Flat", MetricType::L2).unwrap(); let mut id_index = IdMap::new(index).unwrap(); - let some_data = &[ - 7.5_f32, -7.5, 7.5, -7.5, 7.5, 7.5, 7.5, 7.5, -1., 1., 1., 1., 1., 1., 1., -1., 0., 0., - 0., 1., 1., 0., 0., -1., - ]; + let some_data = &[2.3_f32, 0.0, -1., 1., 1., 1., 1., 4.5, 2.3, 7.6, 1., 2.2]; + println!("{}", some_data.len()); - let id = [42]; + let ids = &[4, 8, 12]; - index.add(some_data).unwrap(); - assert_eq!(index.ntotal(), 6); + id_index.add_with_ids(some_data, ids).unwrap(); + assert_eq!(id_index.ntotal(), 3); - let id_sel = IdSelector::batch(&[6]).ok().unwrap(); + let id_sel = IdSelector::batch(&[4, 12]).ok().unwrap(); - index.remove_ids(&id_sel).unwrap(); - assert_eq!(index.ntotal(), 0); + id_index.remove_ids(&id_sel).unwrap(); + println!("{}", id_index.ntotal()); + assert_eq!(id_index.ntotal(), 1); } } diff --git a/src/selector.rs b/src/selector.rs index 75a3a5f334..81618a187a 100644 --- a/src/selector.rs +++ b/src/selector.rs @@ -14,16 +14,20 @@ impl IdSelector { /// Create new range selector pub fn range(min: Idx, max: Idx) -> Result { let mut p_sel = ptr::null_mut(); - faiss_try!(faiss_IDSelectorRange_new(&mut p_sel, min, max)); - Ok(IdSelector { inner: p_sel }) + unsafe { + faiss_try!(faiss_IDSelectorRange_new(&mut p_sel, min, max)); + }; + Ok(IdSelector { inner: p_sel as *mut _}) } /// Create new batch selector pub fn batch(indices: &[Idx]) -> Result { let n = indices.len() as i64; let mut p_sel = ptr::null_mut(); - faiss_try!(faiss_IDSelectorBatch_new(&mut p_sel, n, &indices[0])); - Ok(IdSelector { inner: p_sel }) + unsafe { + faiss_try!(faiss_IDSelectorBatch_new(&mut p_sel, n, &indices[0])); + }; + Ok(IdSelector { inner: p_sel as *mut _}) } /// Return the inner pointer From 8f644496a676f06fed91abc4ed7d9a3c4bf04848 Mon Sep 17 00:00:00 2001 From: Justin Date: Sat, 8 Dec 2018 21:13:04 +0000 Subject: [PATCH 11/11] address comments, cleanup --- src/index/id_map.rs | 2 -- src/macros.rs | 2 +- src/selector.rs | 5 +++-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/index/id_map.rs b/src/index/id_map.rs index 13aa777432..1e94961143 100644 --- a/src/index/id_map.rs +++ b/src/index/id_map.rs @@ -360,7 +360,6 @@ mod tests { let mut index = index_factory(4, "Flat", MetricType::L2).unwrap(); let mut id_index = IdMap::new(index).unwrap(); let some_data = &[2.3_f32, 0.0, -1., 1., 1., 1., 1., 4.5, 2.3, 7.6, 1., 2.2]; - println!("{}", some_data.len()); let ids = &[4, 8, 12]; @@ -370,7 +369,6 @@ mod tests { let id_sel = IdSelector::batch(&[4, 12]).ok().unwrap(); id_index.remove_ids(&id_sel).unwrap(); - println!("{}", id_index.ntotal()); assert_eq!(id_index.ntotal(), 1); } } diff --git a/src/macros.rs b/src/macros.rs index 5d451e8193..fcbfa58e1c 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -109,7 +109,7 @@ macro_rules! impl_native_index { } } - fn remove_ids(&mut self, sel: &IdSelector) -> Result<(i64)> { + fn remove_ids(&mut self, sel: &IdSelector) -> Result { unsafe { let mut n_removed = 0; faiss_try!(faiss_Index_remove_ids( diff --git a/src/selector.rs b/src/selector.rs index 81618a187a..37ef36d474 100644 --- a/src/selector.rs +++ b/src/selector.rs @@ -2,6 +2,7 @@ use error::Result; use faiss_sys::*; use index::Idx; +use std::os::raw::c_long; use std::ptr; /// Abstraction over IDSelectorRange and IDSelectorBatch @@ -22,10 +23,10 @@ impl IdSelector { /// Create new batch selector pub fn batch(indices: &[Idx]) -> Result { - let n = indices.len() as i64; + let n = indices.len() as c_long; let mut p_sel = ptr::null_mut(); unsafe { - faiss_try!(faiss_IDSelectorBatch_new(&mut p_sel, n, &indices[0])); + faiss_try!(faiss_IDSelectorBatch_new(&mut p_sel, n, indices.as_ptr())); }; Ok(IdSelector { inner: p_sel as *mut _}) }