diff --git a/Cargo.lock b/Cargo.lock index ad42dc7ae5cb..31f067fc515c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -72,6 +72,18 @@ version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457" +[[package]] +name = "arrayref" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545" + +[[package]] +name = "arrayvec" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" + [[package]] name = "autocfg" version = "1.1.0" @@ -105,6 +117,19 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" +[[package]] +name = "blake3" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0231f06152bf547e9c2b5194f247cd97aacf6dcd8b15d8e5ec0663f64580da87" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq", +] + [[package]] name = "bstr" version = "1.8.0" @@ -194,6 +219,12 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "constant_time_eq" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2" + [[package]] name = "content_inspector" version = "0.2.4" @@ -1294,8 +1325,10 @@ name = "helix-core" version = "24.3.0" dependencies = [ "ahash", + "anyhow", "arc-swap", "bitflags 2.5.0", + "blake3", "chrono", "dunce", "encoding_rs", @@ -1311,6 +1344,7 @@ dependencies = [ "once_cell", "parking_lot", "quickcheck", + "rand", "regex", "ropey", "serde", @@ -1318,6 +1352,7 @@ dependencies = [ "slotmap", "smallvec", "smartstring", + "tempfile", "textwrap", "toml", "tree-sitter", diff --git a/book/src/generated/typable-cmd.md b/book/src/generated/typable-cmd.md index dbb8b5f380d2..9011a37bf13d 100644 --- a/book/src/generated/typable-cmd.md +++ b/book/src/generated/typable-cmd.md @@ -87,3 +87,5 @@ | `:redraw` | Clear and re-render the whole UI | | `:move` | Move the current buffer and its corresponding file to a different path | | `:yank-diagnostic` | Yank diagnostic(s) under primary cursor to register, or clipboard by default | +| `:history-reload` | Prepends undofile history to current history. | +| `:delete-undofile` | Delete undofile associated with the currently focused document | diff --git a/helix-core/Cargo.toml b/helix-core/Cargo.toml index 7482262eb59e..11cddd69e478 100644 --- a/helix-core/Cargo.toml +++ b/helix-core/Cargo.toml @@ -36,6 +36,9 @@ ahash = "0.8.11" hashbrown = { version = "0.14.3", features = ["raw"] } dunce = "1.0" +blake3 = "1.5" +anyhow = "1" + log = "0.4" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" @@ -57,3 +60,5 @@ globset = "0.4.14" [dev-dependencies] quickcheck = { version = "1", default-features = false } indoc = "2.0.5" +tempfile = "3.9" +rand = { version = "0.8", default-features = false, features = ["getrandom", "small_rng"] } diff --git a/helix-core/src/combinators.rs b/helix-core/src/combinators.rs new file mode 100644 index 000000000000..ab9651ec84ef --- /dev/null +++ b/helix-core/src/combinators.rs @@ -0,0 +1,130 @@ +use std::io::Error; +use std::io::ErrorKind; +use std::io::Read; +use std::io::Result; +use std::io::Write; + +pub fn write_byte(writer: &mut W, byte: u8) -> Result<()> { + writer.write_all(&[byte]) +} + +pub fn write_bool(writer: &mut W, state: bool) -> Result<()> { + write_byte(writer, state as u8) +} + +pub fn write_u32(writer: &mut W, n: u32) -> Result<()> { + writer.write_all(&n.to_ne_bytes()) +} + +pub fn write_u64(writer: &mut W, n: u64) -> Result<()> { + writer.write_all(&n.to_ne_bytes()) +} + +pub fn write_usize(writer: &mut W, n: usize) -> Result<()> { + writer.write_all(&n.to_ne_bytes()) +} + +pub fn write_string(writer: &mut W, s: &str) -> Result<()> { + write_usize(writer, s.len())?; + writer.write_all(s.as_bytes()) +} + +pub fn write_vec( + writer: &mut W, + slice: &[T], + f: impl Fn(&mut W, &T) -> Result<()>, +) -> Result<()> { + write_usize(writer, slice.len())?; + for element in slice { + f(writer, element)?; + } + Ok(()) +} + +pub fn write_option( + writer: &mut W, + value: Option, + f: impl Fn(&mut W, T) -> Result<()>, +) -> Result<()> { + write_bool(writer, value.is_some())?; + if let Some(value) = value { + f(writer, value)?; + } + Ok(()) +} + +pub fn read_byte(reader: &mut R) -> Result { + match reader.bytes().next() { + Some(s) => s, + None => Err(Error::from(ErrorKind::UnexpectedEof)), + } +} + +pub fn read_bool(reader: &mut R) -> Result { + let res = match read_byte(reader)? { + 0 => false, + 1 => true, + _ => { + return Err(Error::new( + ErrorKind::Other, + "invalid byte to bool conversion", + )) + } + }; + Ok(res) +} + +pub fn read_u32(reader: &mut R) -> Result { + let mut buf = [0u8; 4]; + reader.read_exact(&mut buf)?; + Ok(u32::from_ne_bytes(buf)) +} + +pub fn read_u64(reader: &mut R) -> Result { + let mut buf = [0u8; 8]; + reader.read_exact(&mut buf)?; + Ok(u64::from_ne_bytes(buf)) +} + +pub fn read_usize(reader: &mut R) -> Result { + let mut buf = [0u8; std::mem::size_of::()]; + reader.read_exact(&mut buf)?; + Ok(usize::from_ne_bytes(buf)) +} + +/// SAFETY: Only use if it is guaranteed to be a string +pub fn read_string(reader: &mut R) -> Result { + let len = read_usize(reader)?; + let mut buf = vec![0; len]; + reader.read_exact(&mut buf)?; + + let res = String::from_utf8(buf).map_err(|e| Error::new(ErrorKind::InvalidData, e))?; + Ok(res) +} + +pub fn read_vec(reader: &mut R, f: impl Fn(&mut R) -> Result) -> Result> { + let len = read_usize(reader)?; + let mut res = Vec::with_capacity(len); + for _ in 0..len { + res.push(f(reader)?); + } + Ok(res) +} + +pub fn read_option( + reader: &mut R, + f: impl Fn(&mut R) -> Result, +) -> Result> { + let res = if read_bool(reader)? { + Some(f(reader)?) + } else { + None + }; + Ok(res) +} + +pub fn read_many_bytes(reader: &mut R) -> Result<[u8; N]> { + let mut buf = [0u8; N]; + reader.read_exact(&mut buf)?; + Ok(buf) +} diff --git a/helix-core/src/history.rs b/helix-core/src/history.rs index 28d6dd6ec1a0..f6aeedbf4d21 100644 --- a/helix-core/src/history.rs +++ b/helix-core/src/history.rs @@ -1,8 +1,13 @@ use crate::{Assoc, ChangeSet, Range, Rope, Selection, Transaction}; use once_cell::sync::Lazy; use regex::Regex; +use std::io::{Read, Seek, SeekFrom, Write}; use std::num::NonZeroUsize; -use std::time::{Duration, Instant}; +use std::path::Path; +use std::sync::Arc; +use std::time::{Duration, SystemTime}; + +use crate::combinators::*; #[derive(Debug, Clone)] pub struct State { @@ -47,7 +52,7 @@ pub struct State { /// delete, we also store an inversion of the transaction. /// /// Using time to navigate the history: -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct History { revisions: Vec, current: usize, @@ -58,11 +63,11 @@ pub struct History { struct Revision { parent: usize, last_child: Option, - transaction: Transaction, + transaction: Arc, // We need an inversion for undos because delete transactions don't store // the deleted text. - inversion: Transaction, - timestamp: Instant, + inversion: Arc, + timestamp: SystemTime, } impl Default for History { @@ -72,25 +77,252 @@ impl Default for History { revisions: vec![Revision { parent: 0, last_child: None, - transaction: Transaction::from(ChangeSet::new("".into())), - inversion: Transaction::from(ChangeSet::new("".into())), - timestamp: Instant::now(), + transaction: Arc::new(Transaction::from(ChangeSet::new("".into()))), + inversion: Arc::new(Transaction::from(ChangeSet::new("".into()))), + timestamp: SystemTime::now(), }], current: 0, } } } +const HASH_DIGEST_LENGTH: usize = blake3::OUT_LEN; +fn get_hash(reader: &mut R) -> std::io::Result<[u8; HASH_DIGEST_LENGTH]> { + let mut hasher = blake3::Hasher::new(); + hasher.update_reader(reader)?; + Ok(hasher.finalize().as_bytes().to_owned()) +} + +#[derive(Debug)] +pub enum StateError { + Outdated, + InvalidHeader, + InvalidOffset, + InvalidData(String), + InvalidHash, + Other(anyhow::Error), +} + +impl std::fmt::Display for StateError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Outdated => f.write_str("Outdated file"), + Self::InvalidHeader => f.write_str("Invalid undofile header"), + Self::InvalidOffset => f.write_str("Invalid merge offset"), + Self::InvalidData(msg) => f.write_str(msg), + Self::InvalidHash => f.write_str("invalid hash for undofile itself"), + Self::Other(e) => e.fmt(f), + } + } +} + +impl From for StateError { + fn from(value: std::io::Error) -> Self { + Self::Other(value.into()) + } +} + +impl From for StateError { + fn from(value: std::time::SystemTimeError) -> Self { + Self::Other(value.into()) + } +} + +impl From for StateError { + fn from(value: anyhow::Error) -> Self { + Self::Other(value) + } +} + +impl std::error::Error for StateError {} + +impl PartialEq for Revision { + fn eq(&self, other: &Self) -> bool { + self.parent == other.parent + && self.last_child == other.last_child + && self.transaction == other.transaction + && self.inversion == other.inversion + } +} +impl Revision { + fn serialize(&self, writer: &mut W) -> anyhow::Result<()> { + write_usize(writer, self.parent)?; + self.transaction.serialize(writer)?; + self.inversion.serialize(writer)?; + write_u64( + writer, + self.timestamp + .duration_since(std::time::UNIX_EPOCH)? + .as_secs(), + )?; + + Ok(()) + } + + fn deserialize(reader: &mut R) -> anyhow::Result { + let parent = read_usize(reader)?; + let transaction = Arc::new(Transaction::deserialize(reader)?); + let inversion = Arc::new(Transaction::deserialize(reader)?); + let timestamp = std::time::UNIX_EPOCH + .checked_add(Duration::from_secs(read_u64(reader)?)) + .unwrap_or_else(SystemTime::now); + Ok(Revision { + parent, + last_child: None, + transaction, + inversion, + timestamp, + }) + } +} + +const UNDO_FILE_HEADER_TAG: &[u8] = b"Helix"; +const UNDO_FILE_HEADER_LEN: usize = UNDO_FILE_HEADER_TAG.len(); +const UNDO_FILE_VERSION: u8 = 1; + +impl History { + /// It is the responsibility of the caller to ensure the undofile is valid before serializing. + /// This function performs no checks. + pub fn serialize( + &self, + writer: &mut W, + path: &Path, + revision: usize, + last_saved_revision: usize, + ) -> Result<(), StateError> { + // Header + writer.write_all(UNDO_FILE_HEADER_TAG)?; + write_byte(writer, UNDO_FILE_VERSION)?; + write_usize(writer, self.current)?; + write_usize(writer, revision)?; + writer.write_all(&get_hash(&mut std::fs::File::open(path)?)?)?; + + // Append new revisions to the end of the file. + write_usize(writer, self.revisions.len())?; + writer.seek(SeekFrom::End(0))?; + for rev in &self.revisions[last_saved_revision..] { + rev.serialize(writer)?; + } + + writer.flush()?; + Ok(()) + } + + /// Returns the deserialized [`History`] and the last_saved_revision. + pub fn deserialize(reader: &mut R, path: &Path) -> anyhow::Result<(usize, Self)> { + let (current, last_saved_revision) = Self::read_header(reader, path)?; + + // Read the revisions and construct the tree. + let len = read_usize(reader)?; + let mut revisions: Vec = Vec::with_capacity(len); + for _ in 0..len { + let rev = Revision::deserialize(reader)?; + let len = revisions.len(); + match revisions.get_mut(rev.parent) { + Some(r) => r.last_child = NonZeroUsize::new(len), + None if len != 0 => { + anyhow::bail!(StateError::InvalidData(format!( + "non-contiguous history: {} >= {}", + rev.parent, len + ))); + } + None => { + // Starting revision check + let default_rev = History::default().revisions.pop().unwrap(); + if rev != default_rev { + anyhow::bail!(StateError::InvalidData(String::from( + "Missing 0th revision" + ))); + } + } + } + revisions.push(rev); + } + + let history = History { current, revisions }; + Ok((last_saved_revision, history)) + } + + /// If `self.revisions = [A, B, C, D]` and `other.revisions = `[A, B, E, F]`, then + /// they are merged into `[A, B, E, F, C, D]` where the tree can be represented as: + /// ```md + /// A -> B -> C -> D + /// \ + /// E -> F + /// ``` + pub fn merge(&mut self, mut other: History) -> anyhow::Result<()> { + let n = self + .revisions + .iter() + .zip(other.revisions.iter()) + .take_while(|(a, b)| { + a.parent == b.parent && a.transaction == b.transaction && a.inversion == b.inversion + }) + .count(); + + let new_revs = self.revisions.split_off(n); + if new_revs.is_empty() { + return Ok(()); + } + other.revisions.reserve_exact(n); + + // Only unique revisions in `self` matter, so saturating_sub(1) is sound as it going to 0 means there are no new revisions in the other history that aren't in `self` + let offset = (other.revisions.len() - n).saturating_sub(1); + for mut r in new_revs { + // Update parents of new revisions + if r.parent >= n { + r.parent += offset; + } + debug_assert!(r.parent < other.revisions.len()); + + // Update the corresponding parent. + other.revisions.get_mut(r.parent).unwrap().last_child = + NonZeroUsize::new(other.revisions.len()); + other.revisions.push(r); + } + + if self.current >= n { + self.current += offset; + } + self.revisions = other.revisions; + + Ok(()) + } + + pub fn is_valid(reader: &mut R, path: &Path) -> bool { + Self::read_header(reader, path).is_ok() + } + + pub fn read_header(reader: &mut R, path: &Path) -> anyhow::Result<(usize, usize)> { + let header: [u8; UNDO_FILE_HEADER_LEN] = read_many_bytes(reader)?; + let version = read_byte(reader)?; + if header != UNDO_FILE_HEADER_TAG || version != UNDO_FILE_VERSION { + Err(anyhow::anyhow!(StateError::InvalidHeader)) + } else { + let current = read_usize(reader)?; + let last_saved_revision = read_usize(reader)?; + let mut hash = [0u8; HASH_DIGEST_LENGTH]; + reader.read_exact(&mut hash)?; + + if hash != get_hash(&mut std::fs::File::open(path)?)? { + anyhow::bail!(StateError::Outdated); + } + + Ok((current, last_saved_revision)) + } + } +} + impl History { pub fn commit_revision(&mut self, transaction: &Transaction, original: &State) { - self.commit_revision_at_timestamp(transaction, original, Instant::now()); + self.commit_revision_at_timestamp(transaction, original, SystemTime::now()); } pub fn commit_revision_at_timestamp( &mut self, transaction: &Transaction, original: &State, - timestamp: Instant, + timestamp: SystemTime, ) { let inversion = transaction .invert(&original.doc) @@ -102,8 +334,8 @@ impl History { self.revisions.push(Revision { parent: self.current, last_child: None, - transaction: transaction.clone(), - inversion, + transaction: Arc::new(transaction.clone()), + inversion: Arc::new(inversion), timestamp, }); self.current = new_current; @@ -119,6 +351,10 @@ impl History { self.current == 0 } + pub fn is_empty(&self) -> bool { + self.revisions.len() <= 1 + } + /// Returns the changes since the given revision composed into a transaction. /// Returns None if there are no changes between the current and given revisions. pub fn changes_since(&self, revision: usize) -> Option { @@ -128,8 +364,10 @@ impl History { let up_txns = up .iter() .rev() - .map(|&n| self.revisions[n].inversion.clone()); - let down_txns = down.iter().map(|&n| self.revisions[n].transaction.clone()); + .map(|&n| self.revisions[n].inversion.as_ref().clone()); + let down_txns = down + .iter() + .map(|&n| self.revisions[n].transaction.as_ref().clone()); down_txns.chain(up_txns).reduce(|acc, tx| tx.compose(acc)) } @@ -215,11 +453,13 @@ impl History { let up = self.path_up(self.current, lca); let down = self.path_up(to, lca); self.current = to; - let up_txns = up.iter().map(|&n| self.revisions[n].inversion.clone()); + let up_txns = up + .iter() + .map(|&n| self.revisions[n].inversion.as_ref().clone()); let down_txns = down .iter() .rev() - .map(|&n| self.revisions[n].transaction.clone()); + .map(|&n| self.revisions[n].transaction.as_ref().clone()); up_txns.chain(down_txns).collect() } @@ -238,9 +478,14 @@ impl History { } /// Helper for a binary search case below. - fn revision_closer_to_instant(&self, i: usize, instant: Instant) -> usize { - let dur_im1 = instant.duration_since(self.revisions[i - 1].timestamp); - let dur_i = self.revisions[i].timestamp.duration_since(instant); + fn revision_closer_to_time(&self, i: usize, timestamp: SystemTime) -> usize { + let dur_im1 = timestamp + .duration_since(self.revisions[i - 1].timestamp) + .unwrap_or_default(); + let dur_i = self.revisions[i] + .timestamp + .duration_since(timestamp) + .unwrap_or_default(); use std::cmp::Ordering::*; match dur_im1.cmp(&dur_i) { Less => i - 1, @@ -249,17 +494,17 @@ impl History { } /// Creates a [`Transaction`] that will match a revision created at around - /// `instant`. - fn jump_instant(&mut self, instant: Instant) -> Vec { + /// `time`. + fn jump_time(&mut self, time: SystemTime) -> Vec { let search_result = self .revisions - .binary_search_by(|rev| rev.timestamp.cmp(&instant)); + .binary_search_by(|rev| rev.timestamp.cmp(&time)); let revision = match search_result { Ok(revision) => revision, Err(insert_point) => match insert_point { 0 => 0, n if n == self.revisions.len() => n - 1, - i => self.revision_closer_to_instant(i, instant), + i => self.revision_closer_to_time(i, time), }, }; self.jump_to(revision) @@ -269,7 +514,7 @@ impl History { /// from the timestamp of current revision. fn jump_duration_backward(&mut self, duration: Duration) -> Vec { match self.revisions[self.current].timestamp.checked_sub(duration) { - Some(instant) => self.jump_instant(instant), + Some(timestamp) => self.jump_time(timestamp), None => self.jump_to(0), } } @@ -278,7 +523,7 @@ impl History { /// the future from the timestamp of the current revision. fn jump_duration_forward(&mut self, duration: Duration) -> Vec { match self.revisions[self.current].timestamp.checked_add(duration) { - Some(instant) => self.jump_instant(instant), + Some(timestamp) => self.jump_time(timestamp), None => self.jump_to(self.revisions.len() - 1), } } @@ -474,14 +719,14 @@ mod test { history: &mut History, state: &mut State, change: crate::transaction::Change, - instant: Instant, + time: SystemTime, ) { let txn = Transaction::change(&state.doc, vec![change].into_iter()); - history.commit_revision_at_timestamp(&txn, state, instant); + history.commit_revision_at_timestamp(&txn, state, time); txn.apply(&mut state.doc); } - let t0 = Instant::now(); + let t0 = SystemTime::now(); let t = |n| t0.checked_add(Duration::from_secs(n)).unwrap(); commit_change(&mut history, &mut state, (1, 1, Some(" b".into())), t(0)); @@ -630,4 +875,158 @@ mod test { Err("duration too large".to_string()) ); } + + fn is_tree(h: &History) -> bool { + use ahash::{AHashMap, AHashSet}; + + let n = h.revisions.len(); + let nodes = &h.revisions; + + if n == 0 { + return false; + } + + let mut adj_list = AHashMap::with_capacity(n); + for (node, parent) in nodes.iter().enumerate().map(|(idx, r)| (idx, r.parent)) { + // Skip loop + if !(node == 0 && parent == 0) { + adj_list + .entry(node) + .or_insert_with(AHashSet::new) + .insert(parent); + adj_list + .entry(parent) + .or_insert_with(AHashSet::new) + .insert(node); + } + } + + let mut visited = AHashSet::new(); + let mut stack = vec![0]; + while let Some(node) = stack.pop() { + if !visited.insert(node) { + continue; + } + + if let Some(adj_nodes) = adj_list.get(&node) { + for v in adj_nodes { + if !visited.contains(v) { + stack.push(*v); + } + } + } + } + visited.len() == n + } + + fn generate_history(mut inserts: Vec) -> (History, Rope) { + use rand::distributions::{Distribution, Uniform}; + use rand::SeedableRng; + + let dist = Uniform::new_inclusive(0, 2); + let mut rng = rand::rngs::SmallRng::from_entropy(); + + let mut hist = History::default(); + let mut doc = Rope::default(); + let sel = Selection::point(0); + + let mut i = 0; + while !inserts.is_empty() { + if i == 100 { + break; + } + + let n = dist.sample(&mut rng); + let mut range = || { + if doc.len_chars() == 0 { + return (0, 0); + } + let range_dist = Uniform::new(0, doc.len_chars()); + let a = range_dist.sample(&mut rng); + let b = range_dist.sample(&mut rng); + if a > b { + (b, a) + } else { + (a, b) + } + }; + + match n { + 0 => { + if let Some(tx) = hist.undo() { + tx.apply(&mut doc); + } + } + 1 => { + if let Some(tx) = hist.redo() { + tx.apply(&mut doc); + continue; + } + } + 2 => { + let sel_range = range(); + let selection = Selection::single(sel_range.0, sel_range.1); + let state = State { + doc: doc.clone(), + selection, + }; + let del = range(); + let tx = Transaction::delete(&doc, [del].into_iter()); + tx.apply(&mut doc); + hist.commit_revision(&tx, &state); + } + 3 => { + let sel_range = range(); + let selection = Selection::single(sel_range.0, sel_range.1); + let state = State { + doc: doc.clone(), + selection, + }; + let s = inserts.pop().unwrap(); + let tx = Transaction::insert(&doc, &sel, s.into()); + tx.apply(&mut doc); + hist.commit_revision(&tx, &state); + } + _ => unreachable!(), + } + i += 1; + } + + (hist, doc) + } + + quickcheck::quickcheck! { + fn random_undofile(inserts: Vec) -> bool { + let (orig_hist, doc) = generate_history(inserts); + let mut file = tempfile::NamedTempFile::new().unwrap(); + file.write_all(&doc.bytes().collect::>()).unwrap(); + let mut undofile = tempfile::NamedTempFile::new().unwrap(); + + orig_hist + .serialize(&mut undofile, file.path(), orig_hist.revisions.len(), 0) + .unwrap(); + undofile.rewind().unwrap(); + let (_, de_hist) = History::deserialize(&mut undofile, file.path()).unwrap(); + assert!(is_tree(&de_hist)); + orig_hist.revisions.len() == de_hist.revisions.len() + && orig_hist + .revisions + .iter() + .zip(de_hist.revisions.iter()) + .all(|(a, b)| { + a.parent == b.parent + && a.transaction == b.transaction + && a.inversion == b.inversion + }) + } + } + + quickcheck::quickcheck! { + fn merge_rand_histories(inserts_a: Vec, inserts_b: Vec) -> bool { + let mut hist_a = generate_history(inserts_a).0; + let hist_b = generate_history(inserts_b).0; + hist_a.merge(hist_b).unwrap(); + is_tree(&hist_a) + } + } } diff --git a/helix-core/src/lib.rs b/helix-core/src/lib.rs index 1abd90d10b21..9b726f972440 100644 --- a/helix-core/src/lib.rs +++ b/helix-core/src/lib.rs @@ -1,5 +1,7 @@ pub use encoding_rs as encoding; +mod combinators; + pub mod auto_pairs; pub mod chars; pub mod comment; diff --git a/helix-core/src/selection.rs b/helix-core/src/selection.rs index 579499de5e4a..6018bc9c7449 100644 --- a/helix-core/src/selection.rs +++ b/helix-core/src/selection.rs @@ -3,6 +3,7 @@ //! //! All positioning is done via `char` offsets into the buffer. use crate::{ + combinators::*, graphemes::{ ensure_grapheme_boundary_next, ensure_grapheme_boundary_prev, next_grapheme_boundary, prev_grapheme_boundary, @@ -396,6 +397,45 @@ pub struct Selection { primary_index: usize, } +impl Selection { + pub fn serialize(&self, writer: &mut W) -> std::io::Result<()> { + write_usize(writer, self.primary_index)?; + write_vec(writer, self.ranges(), |writer, range| { + write_usize(writer, range.anchor)?; + write_usize(writer, range.head)?; + write_option(writer, range.old_visual_position.as_ref(), |writer, pos| { + write_u32(writer, pos.0)?; + write_u32(writer, pos.1)?; + Ok(()) + })?; + Ok(()) + })?; + + Ok(()) + } + + pub fn deserialize(reader: &mut R) -> std::io::Result { + let primary_index = read_usize(reader)?; + let ranges = read_vec(reader, |reader| { + let anchor = read_usize(reader)?; + let head = read_usize(reader)?; + let old_visual_position = read_option(reader, |reader| { + let res = (read_u32(reader)?, read_u32(reader)?); + Ok(res) + })?; + Ok(Range { + anchor, + head, + old_visual_position, + }) + })?; + Ok(Self { + ranges: ranges.into(), + primary_index, + }) + } +} + #[allow(clippy::len_without_is_empty)] // a Selection is never empty impl Selection { // eq diff --git a/helix-core/src/transaction.rs b/helix-core/src/transaction.rs index f5a49cc1100d..4ac1e0510520 100644 --- a/helix-core/src/transaction.rs +++ b/helix-core/src/transaction.rs @@ -1,6 +1,7 @@ use ropey::RopeSlice; use smallvec::SmallVec; +use crate::combinators::*; use crate::{chars::char_is_word, Range, Rope, Selection, Tendril}; use std::{borrow::Cow, iter::once}; @@ -502,6 +503,66 @@ pub struct Transaction { selection: Option, } +impl Transaction { + pub fn serialize(&self, writer: &mut W) -> std::io::Result<()> { + write_option(writer, self.selection.as_ref(), |writer, selection| { + selection.serialize(writer) + })?; + + write_usize(writer, self.changes.len)?; + write_usize(writer, self.changes.len_after)?; + write_vec(writer, self.changes.changes(), |writer, operation| { + let variant = match operation { + Operation::Retain(_) => 0, + Operation::Delete(_) => 1, + Operation::Insert(_) => 2, + }; + write_byte(writer, variant)?; + match operation { + Operation::Retain(n) | Operation::Delete(n) => { + write_usize(writer, *n)?; + } + + Operation::Insert(tendril) => { + write_string(writer, tendril.as_str())?; + } + } + + Ok(()) + })?; + + Ok(()) + } + + pub fn deserialize(reader: &mut R) -> std::io::Result { + let selection = read_option(reader, Selection::deserialize)?; + + let len = read_usize(reader)?; + let len_after = read_usize(reader)?; + let changes = read_vec(reader, |reader| { + let res = match read_byte(reader)? { + 0 => Operation::Retain(read_usize(reader)?), + 1 => Operation::Delete(read_usize(reader)?), + 2 => Operation::Insert(read_string(reader)?.into()), + _ => { + return Err(std::io::Error::new( + std::io::ErrorKind::Other, + "invalid variant", + )) + } + }; + Ok(res) + })?; + let changes = ChangeSet { + changes, + len, + len_after, + }; + + Ok(Transaction { changes, selection }) + } +} + impl Transaction { /// Create a new, empty transaction. pub fn new(doc: &Rope) -> Self { diff --git a/helix-stdx/src/path.rs b/helix-stdx/src/path.rs index 968596a703fc..c439542444f7 100644 --- a/helix-stdx/src/path.rs +++ b/helix-stdx/src/path.rs @@ -4,6 +4,7 @@ use std::{ borrow::Cow, ffi::OsString, path::{Component, Path, PathBuf, MAIN_SEPARATOR_STR}, + str::Utf8Error, }; use crate::env::current_working_dir; @@ -201,6 +202,45 @@ pub fn get_truncated_path(path: impl AsRef) -> PathBuf { ret } +fn os_str_as_bytes>(path: P) -> Vec { + let path = path.as_ref(); + + #[cfg(windows)] + return path.to_str().unwrap().into(); + + #[cfg(unix)] + return std::os::unix::ffi::OsStrExt::as_bytes(path).to_vec(); +} + +fn path_from_bytes(slice: &[u8]) -> Result { + #[cfg(windows)] + return Ok(PathBuf::from(std::str::from_utf8(slice)?)); + + #[cfg(unix)] + return Ok(PathBuf::from( + ::from_bytes(slice), + )); +} + +fn is_sep_byte(b: u8) -> bool { + if cfg!(windows) { + b == b'/' || b == b'\\' + } else { + b == b'/' + } +} + +pub fn escape_path(path: &Path) -> PathBuf { + let s = path.as_os_str().to_os_string(); + let mut bytes = os_str_as_bytes(&s); + for b in bytes.iter_mut() { + if is_sep_byte(*b) { + *b = b'%'; + } + } + path_from_bytes(&bytes).unwrap() +} + #[cfg(test)] mod tests { use std::{ diff --git a/helix-term/src/application.rs b/helix-term/src/application.rs index 809393c7fd7b..aa0b8fbc81e6 100644 --- a/helix-term/src/application.rs +++ b/helix-term/src/application.rs @@ -570,12 +570,16 @@ impl Application { self.editor .set_doc_path(doc_save_event.doc_id, &doc_save_event.path); // TODO: fix being overwritten by lsp - self.editor.set_status(format!( + let mut msg = format!( "'{}' written, {}L {}B", get_relative_path(&doc_save_event.path).to_string_lossy(), lines, bytes - )); + ); + if let Some(e) = doc_save_event.undofile_error { + msg = format!("{msg} | Could not write undofile: {e}"); + } + self.editor.set_status(msg); } #[inline(always)] diff --git a/helix-term/src/commands/typed.rs b/helix-term/src/commands/typed.rs index 5d7057da6b39..404e7828a871 100644 --- a/helix-term/src/commands/typed.rs +++ b/helix-term/src/commands/typed.rs @@ -2454,6 +2454,38 @@ fn yank_diagnostic( Ok(()) } +fn reload_undofile( + cx: &mut compositor::Context, + _args: &[Cow], + event: PromptEvent, +) -> anyhow::Result<()> { + if event != PromptEvent::Validate { + return Ok(()); + } + + let doc = doc_mut!(cx.editor); + doc.load_undofile()?; + + Ok(()) +} + +fn delete_undofile( + cx: &mut compositor::Context, + _args: &[Cow], + event: PromptEvent, +) -> anyhow::Result<()> { + if event != PromptEvent::Validate { + return Ok(()); + } + + let doc = doc!(cx.editor); + if let Some(path) = doc.undo_file()? { + std::fs::remove_file(path)?; + } + + Ok(()) +} + pub const TYPABLE_COMMAND_LIST: &[TypableCommand] = &[ TypableCommand { name: "quit", @@ -3068,6 +3100,21 @@ pub const TYPABLE_COMMAND_LIST: &[TypableCommand] = &[ fun: yank_diagnostic, signature: CommandSignature::all(completers::register), }, + TypableCommand { + // Not named reload-history so people don't accidentally call delete-undofile + name: "history-reload", + aliases: &[], + doc: "Prepends undofile history to current history.", + fun: reload_undofile, + signature: CommandSignature::none(), + }, + TypableCommand { + name: "delete-undofile", + aliases: &[], + doc: "Delete undofile associated with the currently focused document", + fun: delete_undofile, + signature: CommandSignature::none(), + }, ]; pub static TYPABLE_COMMAND_MAP: Lazy> = diff --git a/helix-view/src/document.rs b/helix-view/src/document.rs index f26ba8b97851..6a6c8b72f1a6 100644 --- a/helix-view/src/document.rs +++ b/helix-view/src/document.rs @@ -105,6 +105,8 @@ pub struct DocumentSavedEvent { pub doc_id: DocumentId, pub path: PathBuf, pub text: Rope, + // HAXX: errors can't be cloend + pub undofile_error: Option, } pub type DocumentSavedEventResult = Result; @@ -697,7 +699,7 @@ impl Document { (Rope::from(line_ending.as_str()), encoding, false) }; - let mut doc = Self::from(rope, Some((encoding, has_bom)), config); + let mut doc = Self::from(rope, Some((encoding, has_bom)), config.clone()); // set the path and try detecting the language doc.set_path(Some(path)); @@ -707,6 +709,16 @@ impl Document { doc.detect_indent_and_line_ending(); + if config.load().undofile { + // TODO: Propogate error to display in the statusline without causing the function to error. + if let Err(e) = doc.load_undofile() { + log::error!( + "Failed to load undofile for {}: {e}", + path.to_string_lossy() + ); + } + } + Ok(doc) } @@ -840,6 +852,8 @@ impl Document { impl Future> + 'static + Send, anyhow::Error, > { + use tokio::task::spawn_blocking; + log::debug!( "submitting save of doc '{:?}'", self.path().map(|path| path.to_string_lossy()) @@ -859,6 +873,17 @@ impl Document { } }; + // TODO: Super messy tuple... + let undofile_enabled = self.config.load().undofile; + let (history, uf_path) = if undofile_enabled { + let history = self.history.get_mut().clone(); + let undofile_path = self.undo_file()?.unwrap(); + (Some(history), Some(undofile_path)) + } else { + (None, None) + }; + let last_saved_revision = self.get_last_saved_revision(); + let identifier = self.path().map(|_| self.identifier()); let language_servers = self.language_servers.clone(); @@ -900,6 +925,23 @@ impl Document { "Path is read only" )); } + + // TODO: Decide on how to do error handling. IO errors are ok. Invalid undofile is not + let has_valid_undofile = if undofile_enabled { + let path_ = path.clone(); + let uf_path_ = uf_path.clone(); + spawn_blocking(move || -> anyhow::Result { + Ok(helix_core::history::History::is_valid( + &mut std::fs::File::open(uf_path_.unwrap())?, + &path_, + )) + }) + .await? + .unwrap_or(false) + } else { + false + }; + let backup = if path.exists() { let path_ = path.clone(); // hacks: we use tempfile to handle the complex task of creating @@ -952,11 +994,40 @@ impl Document { write_result?; + let uf_result = if undofile_enabled { + let path_ = path.clone(); + let uf_path_ = uf_path.clone().unwrap(); + + spawn_blocking(move || -> anyhow::Result<()> { + let mut uf = std::fs::OpenOptions::new() + .write(true) + .read(true) + .create(true) + .open(&uf_path_)?; + + let offset = if has_valid_undofile { + last_saved_revision + } else { + uf.set_len(0)?; + 0 + }; + history + .unwrap() + .serialize(&mut uf, &path_, current_rev, offset)?; + copy_metadata(&path_, &uf_path_)?; + Ok(()) + }) + .await? + } else { + Ok(()) + }; + let event = DocumentSavedEvent { revision: current_rev, doc_id, path, text: text.clone(), + undofile_error: uf_result.map_err(|e| e.to_string()).err(), }; for (_, language_server) in language_servers { @@ -1064,6 +1135,38 @@ impl Document { Ok(()) } + pub fn undo_file(&self) -> anyhow::Result> { + let undo_dir = helix_loader::cache_dir().join("undo"); + std::fs::create_dir_all(&undo_dir)?; + let res = self.path().map(|path| { + let escaped_path = helix_stdx::path::escape_path(path); + undo_dir.join(escaped_path) + }); + Ok(res) + } + + pub fn load_undofile(&mut self) -> anyhow::Result<()> { + if let Some(mut undo_file) = self + .undo_file()? + .and_then(|path| std::fs::File::open(path).ok()) + { + if undo_file.metadata()?.len() != 0 { + let (last_saved_revision, history) = helix_core::history::History::deserialize( + &mut undo_file, + self.path().unwrap(), + )?; + + if self.history.get_mut().is_empty() { + self.history.set(history); + } else { + self.history.get_mut().merge(history).unwrap(); + self.set_last_saved_revision(last_saved_revision); + } + } + } + Ok(()) + } + /// Sets the [`Document`]'s encoding with the encoding correspondent to `label`. pub fn set_encoding(&mut self, label: &str) -> Result<(), Error> { let encoding = diff --git a/helix-view/src/editor.rs b/helix-view/src/editor.rs index d7058d3ef022..72c859aa315e 100644 --- a/helix-view/src/editor.rs +++ b/helix-view/src/editor.rs @@ -336,6 +336,7 @@ pub struct Config { deserialize_with = "deserialize_alphabet" )] pub jump_label_alphabet: Vec, + pub undofile: bool, } #[derive(Debug, Clone, PartialEq, Deserialize, Serialize, Eq, PartialOrd, Ord)] @@ -913,6 +914,7 @@ impl Default for Config { popup_border: PopupBorderConfig::None, indent_heuristic: IndentationHeuristic::default(), jump_label_alphabet: ('a'..='z').collect(), + undofile: false, } } }