Skip to content

Commit

Permalink
Share prefiltering struct (#2858)
Browse files Browse the repository at this point in the history
  • Loading branch information
javitonino authored Feb 12, 2025
1 parent 42bed9c commit 42c4a36
Show file tree
Hide file tree
Showing 28 changed files with 334 additions and 356 deletions.
9 changes: 6 additions & 3 deletions nidx/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 13 additions & 5 deletions nidx/nidx_paragraph/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ use nidx_tantivy::{
index_reader::{open_index_with_deletions, DeletionQueryBuilder},
TantivyIndexer, TantivyMeta, TantivySegmentMetadata,
};
use nidx_types::OpenIndexMetadata;
use nidx_types::{prefilter::PrefilterResult, OpenIndexMetadata};
use reader::ParagraphReaderService;
use resource_indexer::index_paragraphs;
use schema::ParagraphSchema;
Expand Down Expand Up @@ -140,13 +140,21 @@ impl ParagraphSearcher {
}

#[instrument(name = "paragraph::search", skip_all)]
pub fn search(&self, request: &ParagraphSearchRequest) -> anyhow::Result<ParagraphSearchResponse> {
self.reader.search(request)
pub fn search(
&self,
request: &ParagraphSearchRequest,
prefilter: &PrefilterResult,
) -> anyhow::Result<ParagraphSearchResponse> {
self.reader.search(request, prefilter)
}

#[instrument(name = "paragraph::suggest", skip_all)]
pub fn suggest(&self, request: &SuggestRequest) -> anyhow::Result<ParagraphSearchResponse> {
self.reader.suggest(request)
pub fn suggest(
&self,
request: &SuggestRequest,
prefilter: &PrefilterResult,
) -> anyhow::Result<ParagraphSearchResponse> {
self.reader.suggest(request, prefilter)
}

pub fn iterator(&self, request: &StreamRequest) -> anyhow::Result<impl Iterator<Item = ParagraphItem>> {
Expand Down
17 changes: 14 additions & 3 deletions nidx/nidx_paragraph/src/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ use std::time::Instant;

use nidx_protos::order_by::{OrderField, OrderType};
use nidx_protos::{OrderBy, ParagraphItem, ParagraphSearchResponse, StreamRequest, SuggestRequest};
use nidx_types::prefilter::PrefilterResult;
use tantivy::collector::{Collector, Count, FacetCollector, TopDocs};
use tantivy::query::{AllQuery, Query, QueryParser};
use tantivy::{schema::*, DateTime, Order};
Expand Down Expand Up @@ -56,7 +57,11 @@ impl ParagraphReaderService {
Ok(count)
}

pub fn suggest(&self, request: &SuggestRequest) -> anyhow::Result<ParagraphSearchResponse> {
pub fn suggest(
&self,
request: &SuggestRequest,
prefilter: &PrefilterResult,
) -> anyhow::Result<ParagraphSearchResponse> {
let time = Instant::now();
let id = Some(&request.shard);

Expand All @@ -65,7 +70,8 @@ impl ParagraphReaderService {

let parser = QueryParser::for_index(&self.index, vec![self.schema.text]);
let text = self.adapt_text(&parser, &request.body);
let (original, termc, fuzzied) = suggest_query(&parser, &text, request, &self.schema, FUZZY_DISTANCE);
let (original, termc, fuzzied) =
suggest_query(&parser, &text, request, prefilter, &self.schema, FUZZY_DISTANCE);
let v = time.elapsed().as_millis();
debug!("{id:?} - Creating query: ends at {v} ms");

Expand Down Expand Up @@ -120,7 +126,11 @@ impl ParagraphReaderService {
Ok(producer.flatten())
}

pub fn search(&self, request: &ParagraphSearchRequest) -> anyhow::Result<ParagraphSearchResponse> {
pub fn search(
&self,
request: &ParagraphSearchRequest,
prefilter: &PrefilterResult,
) -> anyhow::Result<ParagraphSearchResponse> {
let time = Instant::now();
let id = Some(&request.id);

Expand Down Expand Up @@ -151,6 +161,7 @@ impl ParagraphReaderService {
&parser,
&text,
request,
prefilter,
&self.schema,
FUZZY_DISTANCE,
advanced
Expand Down
2 changes: 0 additions & 2 deletions nidx/nidx_paragraph/src/request_types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ use nidx_types::query_language::BooleanExpression;
pub struct ParagraphSearchRequest {
pub id: String,
pub uuid: String,
pub fields: Vec<String>,
/// query this text in all the paragraphs
pub body: String,
pub filter: Option<nidx_protos::Filter>,
Expand All @@ -37,7 +36,6 @@ pub struct ParagraphSearchRequest {
pub with_duplicates: bool,
pub only_faceted: bool,
pub advanced_query: Option<String>,
pub key_filters: Vec<String>,
pub min_score: f32,
pub security: Option<nidx_protos::utils::Security>,

Expand Down
64 changes: 21 additions & 43 deletions nidx/nidx_paragraph/src/search_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ use crate::set_query::SetQuery;
use itertools::Itertools;
use nidx_protos::prost_types::Timestamp as ProstTimestamp;
use nidx_protos::{StreamRequest, SuggestRequest};
use nidx_types::prefilter::PrefilterResult;
use std::borrow::Cow;
use std::collections::{HashMap, HashSet};
use std::ops::Bound;
Expand Down Expand Up @@ -280,10 +281,27 @@ pub fn produce_date_range_query(
Some(query)
}

fn apply_prefilter(
queries: &mut [&mut Vec<(Occur, Box<dyn Query>)>],
schema: &ParagraphSchema,
prefilter: &PrefilterResult,
) {
if let PrefilterResult::Some(field_keys) = prefilter {
let set_query = Box::new(SetQuery::new(
schema.field_uuid,
field_keys.iter().map(|x| format!("{}{}", x.resource_id.simple(), x.field_id)),
));
for q in queries {
q.push((Occur::Must, set_query.clone()));
}
}
}

pub fn suggest_query(
parser: &QueryParser,
text: &str,
request: &SuggestRequest,
prefilter: &PrefilterResult,
schema: &ParagraphSchema,
distance: u8,
) -> (Box<dyn Query>, SharedTermC, Box<dyn Query>) {
Expand Down Expand Up @@ -326,19 +344,7 @@ pub fn suggest_query(
originals.push((Occur::Must, Box::new(facet_term_query)));
});

if !request.key_filters.is_empty() {
let (field_ids, resource_ids) = request.key_filters.iter().cloned().partition::<Vec<_>, _>(|k| k.contains('/'));
if !field_ids.is_empty() {
let set_query = Box::new(SetQuery::new(schema.field_uuid, field_ids));
fuzzies.push((Occur::Must, set_query.clone()));
originals.push((Occur::Must, set_query.clone()));
}
if !resource_ids.is_empty() {
let set_query = Box::new(SetQuery::new(schema.uuid, resource_ids));
fuzzies.push((Occur::Must, set_query.clone()));
originals.push((Occur::Must, set_query.clone()));
}
}
apply_prefilter(&mut [&mut fuzzies, &mut originals], schema, prefilter);

if originals.len() == 1 && originals[0].1.is::<AllQuery>() {
let original = originals.pop().unwrap().1;
Expand All @@ -358,6 +364,7 @@ pub fn search_query(
parser: &QueryParser,
text: &str,
search: &ParagraphSearchRequest,
prefilter: &PrefilterResult,
schema: &ParagraphSchema,
distance: u8,
with_advance: Option<Box<dyn Query>>,
Expand Down Expand Up @@ -401,23 +408,6 @@ pub fn search_query(
originals.push((Occur::Must, Box::new(created)));
}
}
// Fields
let mut field_filter: Vec<(Occur, Box<dyn Query>)> = vec![];
search
.fields
.iter()
.map(|value| format!("/{value}"))
.flat_map(|facet_key| Facet::from_text(&facet_key).ok().into_iter())
.for_each(|facet| {
let facet_term = Term::from_facet(schema.field, &facet);
let facet_term_query = TermQuery::new(facet_term, IndexRecordOption::Basic);
field_filter.push((Occur::Should, Box::new(facet_term_query)));
});
if !field_filter.is_empty() {
let field_filter = Box::new(BooleanQuery::new(field_filter));
fuzzies.push((Occur::Must, field_filter.clone()));
originals.push((Occur::Must, field_filter));
}

// Label filters
if let Some(formula) = &search.filtering_formula {
Expand All @@ -426,19 +416,7 @@ pub fn search_query(
originals.push((Occur::Must, query));
}

if !search.key_filters.is_empty() {
let (field_ids, resource_ids) = search.key_filters.iter().cloned().partition::<Vec<_>, _>(|k| k.contains('/'));
if !field_ids.is_empty() {
let set_query = Box::new(SetQuery::new(schema.field_uuid, field_ids));
fuzzies.push((Occur::Must, set_query.clone()));
originals.push((Occur::Must, set_query.clone()));
}
if !resource_ids.is_empty() {
let set_query = Box::new(SetQuery::new(schema.uuid, resource_ids));
fuzzies.push((Occur::Must, set_query.clone()));
originals.push((Occur::Must, set_query.clone()));
}
}
apply_prefilter(&mut [&mut fuzzies, &mut originals], schema, prefilter);

if originals.len() == 1 && originals[0].1.is::<AllQuery>() {
let original = originals.pop().unwrap().1;
Expand Down
4 changes: 2 additions & 2 deletions nidx/nidx_paragraph/src/set_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ pub struct SetQuery {

impl SetQuery {
/// Create a Term Set Query
pub fn new(field: Field, values: Vec<String>) -> Self {
let values = values.into_iter().collect();
pub fn new(field: Field, values: impl Iterator<Item = String>) -> Self {
let values = values.collect();
let set = SetWeightWrapper::new(SetWeight {
field,
values,
Expand Down
Loading

0 comments on commit 42c4a36

Please sign in to comment.