-
Notifications
You must be signed in to change notification settings - Fork 18
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(persist): in memory storage for testing, experimentation and deb…
…ugging
- Loading branch information
Showing
7 changed files
with
90 additions
and
10 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,5 @@ | ||
mod html_to_markdown_transformer; | ||
mod loader; | ||
|
||
pub use html_to_markdown_transformer::HtmlToMarkdownTransformer; | ||
pub use loader::ScrapingLoader; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
use std::collections::HashMap; | ||
|
||
use anyhow::Result; | ||
use async_trait::async_trait; | ||
use derive_builder::Builder; | ||
use tokio::sync::RwLock; | ||
|
||
use crate::{ | ||
ingestion::{IngestionNode, IngestionStream}, | ||
Persist, | ||
}; | ||
|
||
#[derive(Debug, Default, Builder)] | ||
#[builder(pattern = "owned")] | ||
/// A simple in-memory storage implementation. | ||
/// | ||
/// Great for experimentation and testing. | ||
pub struct MemoryStorage { | ||
data: RwLock<HashMap<String, IngestionNode>>, | ||
#[builder(default)] | ||
batch_size: Option<usize>, | ||
} | ||
|
||
impl MemoryStorage { | ||
fn key(&self, node: &IngestionNode) -> String { | ||
node.path.clone().to_string_lossy().to_string() | ||
} | ||
|
||
#[allow(dead_code)] | ||
async fn get(&self, key: &str) -> Option<IngestionNode> { | ||
self.data.read().await.get(key).cloned() | ||
} | ||
} | ||
|
||
#[async_trait] | ||
impl Persist for MemoryStorage { | ||
async fn setup(&self) -> Result<()> { | ||
Ok(()) | ||
} | ||
|
||
async fn store(&self, node: IngestionNode) -> Result<IngestionNode> { | ||
self.data | ||
.write() | ||
.await | ||
.insert(self.key(&node), node.clone()); | ||
Ok(node) | ||
} | ||
|
||
async fn batch_store(&self, nodes: Vec<IngestionNode>) -> IngestionStream { | ||
let mut lock = self.data.write().await; | ||
for node in &nodes { | ||
lock.insert(self.key(node), node.clone()); | ||
} | ||
IngestionStream::iter(nodes.into_iter().map(Ok)) | ||
} | ||
|
||
fn batch_size(&self) -> Option<usize> { | ||
self.batch_size | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
mod memory_storage; | ||
pub use memory_storage::MemoryStorage; |