Skip to content

Commit

Permalink
feat(fulltext_index): introduce creator
Browse files Browse the repository at this point in the history
Signed-off-by: Zhenchi <[email protected]>
  • Loading branch information
zhongzc committed Jul 2, 2024
1 parent db5d116 commit 992c10e
Show file tree
Hide file tree
Showing 9 changed files with 802 additions and 16 deletions.
326 changes: 326 additions & 0 deletions Cargo.lock

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions src/index/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,14 @@ pin-project.workspace = true
prost.workspace = true
regex.workspace = true
regex-automata.workspace = true
serde.workspace = true
snafu.workspace = true
tantivy = { version = "0.22", features = ["zstd-compression"] }
tantivy-jieba = "0.11.0"
tokio.workspace = true

[dev-dependencies]
common-test-util.workspace = true
rand.workspace = true
tempfile.workspace = true
tokio.workspace = true
Expand Down
37 changes: 37 additions & 0 deletions src/index/src/fulltext_index.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use serde::{Deserialize, Serialize};

pub mod create;
pub mod error;

/// Configuration for fulltext index.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
pub struct Config {
/// Analyzer to use for tokenization.
pub analyzer: Analyzer,

/// Whether the index should be case sensitive.
pub case_sensitive: bool,
}

/// Analyzer to use for tokenization.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
pub enum Analyzer {
#[default]
English,

Chinese,
}
33 changes: 33 additions & 0 deletions src/index/src/fulltext_index/create.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

mod tantivy;

use async_trait::async_trait;
pub use tantivy::TantivyFulltextIndexCreator;

use crate::fulltext_index::error::Result;

/// `FulltextIndexCreator` is for creating a fulltext index.`
#[async_trait]
pub trait FulltextIndexCreator: Send {
/// Pushes a text to the index.
async fn push_text(&mut self, text: &str) -> Result<()>;

/// Finalizes the creation of the index.
async fn finish(&mut self) -> Result<()>;

/// Returns the memory usage in bytes during the creation of the index.
fn memory_usage(&self) -> usize;
}
Loading

0 comments on commit 992c10e

Please sign in to comment.