Skip to content

Commit

Permalink
Use index based highligting on a single string
Browse files Browse the repository at this point in the history
This replaces the `[(HighlightedKind, String)]` model with `(String, [HighlightedKind, usize, usize])` to minimize memory footprint and fragmentation.

On issue is that utoipa/abeye does not correctly generate these types, so it currently breaks the frontend.
  • Loading branch information
oeb25 committed Nov 3, 2024
1 parent 2d818dc commit f2f4cb6
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 21 deletions.
67 changes: 61 additions & 6 deletions crates/core/src/highlighted.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
use utoipa::ToSchema;

#[derive(
Default,
Debug,
Clone,
serde::Serialize,
Expand All @@ -27,6 +28,58 @@ use utoipa::ToSchema;
ToSchema,
)]
#[serde(rename_all = "camelCase")]
pub struct Highlighted {
pub text: String,
pub fragments: Vec<HighlightedFragment<(usize, usize)>>,
}

impl Highlighted {
pub fn push(&mut self, fragment: HighlightedFragment<String>) {
let start = self.text.len();
self.text.push_str(fragment.text());
let end = self.text.len();
self.fragments.push(HighlightedFragment {
kind: fragment.kind,
text: (start, end),
});
}

pub fn is_empty(&self) -> bool {
self.text.is_empty()
}

pub(crate) fn iter(&self) -> impl Iterator<Item = HighlightedFragment<&str>> {
self.fragments.iter().map(|f| HighlightedFragment {
kind: f.kind,
text: &self.text[f.text.0..f.text.1],
})
}
}

impl<const N: usize> From<[HighlightedFragment; N]> for Highlighted {
fn from(value: [HighlightedFragment; N]) -> Self {
let mut acc = Highlighted::default();

for frag in value {
acc.push(frag);
}

acc
}
}

#[derive(
Debug,
Clone,
Copy,
serde::Serialize,
serde::Deserialize,
bincode::Encode,
bincode::Decode,
PartialEq,
ToSchema,
)]
#[serde(rename_all = "camelCase")]
pub enum HighlightedKind {
Normal,
Highlighted,
Expand All @@ -43,30 +96,32 @@ pub enum HighlightedKind {
ToSchema,
)]
#[serde(rename_all = "camelCase")]
pub struct HighlightedFragment {
pub struct HighlightedFragment<T = String> {
pub kind: HighlightedKind,
pub text: String,
pub text: T,
}

impl HighlightedFragment {
pub fn new_unhighlighted(text: String) -> Self {
impl<T> HighlightedFragment<T> {
pub fn new_unhighlighted(text: T) -> Self {
Self::new_normal(text)
}

pub fn new_normal(text: String) -> Self {
pub fn new_normal(text: T) -> Self {
Self {
kind: HighlightedKind::Normal,
text,
}
}

pub fn new_highlighted(text: String) -> Self {
pub fn new_highlighted(text: T) -> Self {
Self {
kind: HighlightedKind::Highlighted,
text,
}
}
}

impl<T: std::ops::Deref<Target = str>> HighlightedFragment<T> {
pub fn text(&self) -> &str {
&self.text
}
Expand Down
5 changes: 2 additions & 3 deletions crates/core/src/inverted_index/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -227,9 +227,8 @@ impl InvertedIndex {
.join(" ")
};

page.snippet = TextSnippet {
fragments: vec![HighlightedFragment::new_unhighlighted(snippet)],
};
page.snippet =
TextSnippet::new([HighlightedFragment::new_unhighlighted(snippet)].into());
} else {
let min_body_len = if url.is_homepage() {
self.snippet_config.min_body_length_homepage
Expand Down
25 changes: 13 additions & 12 deletions crates/core/src/snippet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
use std::ops::Range;

use crate::config::SnippetConfig;
use crate::highlighted::{HighlightedFragment, HighlightedKind};
use crate::highlighted::{Highlighted, HighlightedFragment, HighlightedKind};
use crate::query::Query;
use crate::tokenizer::fields::{
BigramTokenizer, DefaultTokenizer, FieldTokenizer, Stemmed, TrigramTokenizer,
Expand Down Expand Up @@ -63,16 +63,16 @@ struct PassageCandidate {
)]
#[serde(rename_all = "camelCase")]
pub struct TextSnippet {
pub fragments: Vec<HighlightedFragment>,
fragments: Highlighted,
}

impl TextSnippet {
pub fn unhighlighted_string(&self) -> String {
self.fragments
.iter()
.map(|f| f.text.clone())
.collect::<Vec<_>>()
.join("")
self.fragments.text.clone()
}

pub fn new(fragments: Highlighted) -> Self {
Self { fragments }
}
}

Expand Down Expand Up @@ -107,7 +107,7 @@ impl SnippetBuilder {
}

fn build(self) -> TextSnippet {
let mut fragments = Vec::new();
let mut fragments = Highlighted::default();

let mut last_end = 0;

Expand Down Expand Up @@ -331,10 +331,11 @@ pub fn generate(query: &Query, text: &str, region: &Region, config: SnippetConfi

if text.is_empty() {
return TextSnippet {
fragments: vec![HighlightedFragment {
fragments: [HighlightedFragment {
kind: HighlightedKind::Normal,
text: "".to_string(),
}],
}]
.into(),
};
}

Expand Down Expand Up @@ -377,9 +378,9 @@ Survey in 2016, 2017, and 2018."#;
let text = snippet.text;

text.fragments
.into_iter()
.iter()
.map(|HighlightedFragment { kind, text }| match kind {
HighlightedKind::Normal => text,
HighlightedKind::Normal => text.to_string(),
HighlightedKind::Highlighted => {
format!("{HIGHLIGHTEN_PREFIX}{}{HIGHLIGHTEN_POSTFIX}", text)
}
Expand Down

0 comments on commit f2f4cb6

Please sign in to comment.