Skip to content

Commit

Permalink
always use Query for deletion
Browse files Browse the repository at this point in the history
  • Loading branch information
trinity-1686a committed Sep 21, 2022
1 parent deab3c7 commit c94c9c3
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 60 deletions.
24 changes: 16 additions & 8 deletions src/indexer/delete_queue.rs
Original file line number Diff line number Diff line change
Expand Up @@ -246,19 +246,27 @@ impl DeleteCursor {
mod tests {

use super::{DeleteOperation, DeleteQueue};
use crate::indexer::operation::DeleteTarget;
use crate::schema::{Field, Term};
use crate::query::{Explanation, Scorer, Weight};
use crate::{DocId, Score, SegmentReader};

struct DummyWeight;
impl Weight for DummyWeight {
fn scorer(&self, _reader: &SegmentReader, _boost: Score) -> crate::Result<Box<dyn Scorer>> {
Err(crate::TantivyError::InternalError("dummy impl".to_owned()))
}

fn explain(&self, _reader: &SegmentReader, _doc: DocId) -> crate::Result<Explanation> {
Err(crate::TantivyError::InternalError("dummy impl".to_owned()))
}
}

#[test]
fn test_deletequeue() {
let delete_queue = DeleteQueue::new();

let make_op = |i: usize| {
let field = Field::from_field_id(1u32);
DeleteOperation {
opstamp: i as u64,
target: DeleteTarget::Term(Term::from_field_u64(field, i as u64)),
}
let make_op = |i: usize| DeleteOperation {
opstamp: i as u64,
target: Box::new(DummyWeight),
};

delete_queue.push(make_op(1));
Expand Down
56 changes: 20 additions & 36 deletions src/indexer/index_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,15 @@ use super::segment_updater::SegmentUpdater;
use super::{AddBatch, AddBatchReceiver, AddBatchSender, PreparedCommit};
use crate::core::{Index, Segment, SegmentComponent, SegmentId, SegmentMeta, SegmentReader};
use crate::directory::{DirectoryLock, GarbageCollectionResult, TerminatingWrite};
use crate::docset::{DocSet, TERMINATED};
use crate::error::TantivyError;
use crate::fastfield::write_alive_bitset;
use crate::indexer::delete_queue::{DeleteCursor, DeleteQueue};
use crate::indexer::doc_opstamp_mapping::DocToOpstampMapping;
use crate::indexer::index_writer_status::IndexWriterStatus;
use crate::indexer::operation::{DeleteOperation, DeleteTarget};
use crate::indexer::operation::DeleteOperation;
use crate::indexer::stamper::Stamper;
use crate::indexer::{MergePolicy, SegmentEntry, SegmentWriter};
use crate::query::Query;
use crate::query::{Query, TermQuery};
use crate::schema::{Document, IndexRecordOption, Term};
use crate::{FutureResult, IndexReader, Opstamp};

Expand Down Expand Up @@ -94,31 +93,14 @@ fn compute_deleted_bitset(

// A delete operation should only affect
// document that were inserted before it.
match &delete_op.target {
DeleteTarget::Term(term) => {
let inverted_index = segment_reader.inverted_index(term.field())?;
if let Some(mut docset) =
inverted_index.read_postings(term, IndexRecordOption::Basic)?
{
let mut doc_matching_deleted_term = docset.doc();
while doc_matching_deleted_term != TERMINATED {
if doc_opstamps.is_deleted(doc_matching_deleted_term, delete_op.opstamp) {
alive_bitset.remove(doc_matching_deleted_term);
might_have_changed = true;
}
doc_matching_deleted_term = docset.advance();
}
delete_op
.target
.for_each(segment_reader, &mut |doc_matching_delete_query, _| {
if doc_opstamps.is_deleted(doc_matching_delete_query, delete_op.opstamp) {
alive_bitset.remove(doc_matching_delete_query);
might_have_changed = true;
}
}
DeleteTarget::Query(query) => {
query.for_each(segment_reader, &mut |doc_matching_delete_query, _| {
if doc_opstamps.is_deleted(doc_matching_delete_query, delete_op.opstamp) {
alive_bitset.remove(doc_matching_delete_query);
might_have_changed = true;
}
})?;
}
}
})?;
delete_cursor.advance();
}
Ok(might_have_changed)
Expand Down Expand Up @@ -681,13 +663,11 @@ impl IndexWriter {
/// Like adds, the deletion itself will be visible
/// only after calling `commit()`.
pub fn delete_term(&self, term: Term) -> Opstamp {
let opstamp = self.stamper.stamp();
let delete_operation = DeleteOperation {
opstamp,
target: DeleteTarget::Term(term),
};
self.delete_queue.push(delete_operation);
opstamp
let query = TermQuery::new(term, IndexRecordOption::Basic);
// For backward compatibility, if Term is invalid for the index, do nothing but return an
// Opstamp
self.delete_query(Box::new(query))
.unwrap_or_else(|_| self.stamper.stamp())
}

/// Delete all documents matching a given query.
Expand All @@ -706,7 +686,7 @@ impl IndexWriter {
let opstamp = self.stamper.stamp();
let delete_operation = DeleteOperation {
opstamp,
target: DeleteTarget::Query(weight),
target: weight,
};
self.delete_queue.push(delete_operation);
Ok(opstamp)
Expand Down Expand Up @@ -778,12 +758,16 @@ impl IndexWriter {
let (batch_opstamp, stamps) = self.get_batch_opstamps(count);

let mut adds = AddBatch::default();

for (user_op, opstamp) in user_operations_it.zip(stamps) {
match user_op {
UserOperation::Delete(term) => {
let query = TermQuery::new(term, IndexRecordOption::Basic);
let weight = query.weight(&self.index_reader.searcher(), false)?;

let delete_operation = DeleteOperation {
opstamp,
target: DeleteTarget::Term(term),
target: weight,
};
self.delete_queue.push(delete_operation);
}
Expand Down
17 changes: 1 addition & 16 deletions src/indexer/operation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,7 @@ use crate::Opstamp;
/// Timestamped Delete operation.
pub struct DeleteOperation {
pub opstamp: Opstamp,
pub target: DeleteTarget,
}

/// Target of a Delete operation
pub enum DeleteTarget {
Term(Term),
Query(Box<dyn Weight>),
}

impl Default for DeleteOperation {
fn default() -> Self {
DeleteOperation {
opstamp: 0,
target: DeleteTarget::Term(Term::new()),
}
}
pub target: Box<dyn Weight>,
}

/// Timestamped Add operation.
Expand Down

0 comments on commit c94c9c3

Please sign in to comment.