Skip to content

Commit

Permalink
Refine Factor is None by default (#518)
Browse files Browse the repository at this point in the history
* default refine_factor to None

* describe why it is None by default
  • Loading branch information
eddyxu authored Feb 2, 2023
1 parent b1fd3e9 commit ba87905
Showing 1 changed file with 15 additions and 9 deletions.
24 changes: 15 additions & 9 deletions python/src/dataset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ use ::lance::dataset::Dataset as LanceDataset;
use lance::dataset::{Version, WriteMode, WriteParams};

const DEFAULT_NPROBS: usize = 1;
const DEFAULT_REFINE_FACTOR: u32 = 1;

/// Lance Dataset that will be wrapped by another class in Python
#[pyclass(name = "_Dataset", module = "_lib")]
Expand Down Expand Up @@ -122,20 +121,27 @@ impl Dataset {
DEFAULT_NPROBS
};

let refine_factor: u32 = if let Some(refine_factor) = nearest.get_item("refine_factor")
{
if refine_factor.is_none() {
DEFAULT_REFINE_FACTOR
// When refine factor is specified, a final Refine stage will be added to the I/O plan,
// and use Flat index over the raw vectors to refine the results.
// By default, `refine_factor` is None to not involve extra I/O exec node and random access.
let refine_factor: Option<u32> = if let Some(rf) = nearest.get_item("refine_factor") {
if rf.is_none() {
None
} else {
PyAny::downcast::<PyLong>(refine_factor)?.extract()?
PyAny::downcast::<PyLong>(rf)?.extract()?
}
} else {
DEFAULT_REFINE_FACTOR
None
};

scanner
.nearest(column.as_str(), &q, k)
.map(|s| s.nprobs(nprobes).refine(refine_factor))
.map(|s| {
let mut s = s.nprobs(nprobes);
if let Some(factor) = refine_factor {
s = s.refine(factor);
}
s
})
.map_err(|err| PyValueError::new_err(err.to_string()))?;
}

Expand Down

0 comments on commit ba87905

Please sign in to comment.