From a06a6f5fdbc38d8a80483cf445c2030fabe4788d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Juli=C3=A1n=20Espina?= Date: Fri, 4 Aug 2023 17:49:05 -0600 Subject: [PATCH] Implement ephemeron-based weak map (#3052) * Implement ephemeron-based weak map * Document code and improve ephemerons * Fix docs * cargo clippy --- Cargo.lock | 11 + boa_engine/src/builtins/weak_set/mod.rs | 2 +- boa_gc/Cargo.toml | 3 +- boa_gc/src/internals/ephemeron_box.rs | 90 ++--- boa_gc/src/internals/weak_map_box.rs | 15 +- boa_gc/src/lib.rs | 8 +- boa_gc/src/pointers/ephemeron.rs | 22 +- boa_gc/src/pointers/mod.rs | 2 + boa_gc/src/pointers/weak.rs | 2 +- boa_gc/src/pointers/weak_map.rs | 432 +++++++++++++++++++++++- boa_gc/src/test/mod.rs | 4 + boa_gc/src/test/weak.rs | 47 ++- 12 files changed, 558 insertions(+), 80 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3c50f794785..ee4d2ade2fa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -59,6 +59,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "allocator-api2" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56fc6cf8dc8c4158eed8649f9b8b0ea1518eb62b544fe9490d66fa0b349eafe9" + [[package]] name = "android-tzdata" version = "0.1.1" @@ -474,6 +480,7 @@ version = "0.17.0" dependencies = [ "boa_macros", "boa_profiler", + "hashbrown 0.14.0", "thin-vec", ] @@ -1688,6 +1695,10 @@ name = "hashbrown" version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" +dependencies = [ + "ahash 0.8.3", + "allocator-api2", +] [[package]] name = "heck" diff --git a/boa_engine/src/builtins/weak_set/mod.rs b/boa_engine/src/builtins/weak_set/mod.rs index 75a4accab31..fb0ebde46df 100644 --- a/boa_engine/src/builtins/weak_set/mod.rs +++ b/boa_engine/src/builtins/weak_set/mod.rs @@ -151,7 +151,7 @@ impl WeakSet { // 3. If Type(value) is not Object, throw a TypeError exception. let value = args.get_or_undefined(0); - let Some(value) = args.get_or_undefined(0).as_object() else { + let Some(value) = value.as_object() else { return Err(JsNativeError::typ() .with_message(format!( "WeakSet.add: expected target argument of type `object`, got target of type `{}`", diff --git a/boa_gc/Cargo.toml b/boa_gc/Cargo.toml index 74565960e09..939774b84da 100644 --- a/boa_gc/Cargo.toml +++ b/boa_gc/Cargo.toml @@ -18,4 +18,5 @@ thinvec = ["thin-vec"] boa_profiler.workspace = true boa_macros.workspace = true -thin-vec = { version = "0.2.12", optional = true } \ No newline at end of file +thin-vec = { version = "0.2.12", optional = true } +hashbrown = { version = "0.14.0", features = ["raw"] } diff --git a/boa_gc/src/internals/ephemeron_box.rs b/boa_gc/src/internals/ephemeron_box.rs index e5834e49c8e..361b9ebf8b4 100644 --- a/boa_gc/src/internals/ephemeron_box.rs +++ b/boa_gc/src/internals/ephemeron_box.rs @@ -1,6 +1,6 @@ use crate::{trace::Trace, Gc, GcBox}; use std::{ - cell::Cell, + cell::{Cell, UnsafeCell}, ptr::{self, NonNull}, }; @@ -89,41 +89,28 @@ impl core::fmt::Debug for EphemeronBoxHeader { } /// The inner allocation of an [`Ephemeron`][crate::Ephemeron] pointer. -pub(crate) struct EphemeronBox { +pub(crate) struct EphemeronBox { pub(crate) header: EphemeronBoxHeader, - data: Cell>>>, + data: UnsafeCell>>, } -impl Drop for EphemeronBox { - fn drop(&mut self) { - if let Some(data) = self.data.take() { - // SAFETY: `data` comes from an `into_raw` call, so this pointer is safe to pass to - // `from_raw`. - drop(unsafe { Box::from_raw(data.as_ptr()) }); - } - } -} - -struct Data { +struct Data { key: NonNull>, value: V, } -impl EphemeronBox { +impl EphemeronBox { pub(crate) fn new(key: &Gc, value: V) -> Self { - let data = Box::into_raw(Box::new(Data { - key: key.inner_ptr(), - value, - })); - // SAFETY: `Box::into_raw` must always return a non-null pointer. - let data = unsafe { NonNull::new_unchecked(data) }; Self { header: EphemeronBoxHeader::new(), - data: Cell::new(Some(data)), + data: UnsafeCell::new(Some(Data { + key: key.inner_ptr(), + value, + })), } } - /// Returns `true` if the two references refer to the same `GcBox`. + /// Returns `true` if the two references refer to the same `EphemeronBox`. pub(crate) fn ptr_eq(this: &Self, other: &Self) -> bool { // Use .header to ignore fat pointer vtables, to work around // https://github.com/rust-lang/rust/issues/46139 @@ -131,9 +118,31 @@ impl EphemeronBox { } /// Returns a reference to the ephemeron's value or None. - pub(crate) fn value(&self) -> Option<&V> { - // SAFETY: the garbage collector ensures `ptr` is valid as long as `data` is `Some`. - unsafe { self.data.get().map(|ptr| &ptr.as_ref().value) } + /// + /// # Safety + /// + /// The garbage collector must not run between the call to this function and the eventual + /// drop of the returned reference, since that could free the inner value. + pub(crate) unsafe fn value(&self) -> Option<&V> { + // SAFETY: the garbage collector ensures the ephemeron doesn't mutate until + // finalization. + let data = unsafe { &*self.data.get() }; + data.as_ref().map(|data| &data.value) + } + + /// Returns a reference to the ephemeron's key or None. + /// + /// # Safety + /// + /// The garbage collector must not run between the call to this function and the eventual + /// drop of the returned reference, since that could free the inner value. + pub(crate) unsafe fn key(&self) -> Option<&GcBox> { + // SAFETY: the garbage collector ensures the ephemeron doesn't mutate until + // finalization. + unsafe { + let data = &*self.data.get(); + data.as_ref().map(|data| data.key.as_ref()) + } } /// Marks this `EphemeronBox` as live. @@ -177,7 +186,7 @@ pub(crate) trait ErasedEphemeronBox { fn finalize_and_clear(&self); } -impl ErasedEphemeronBox for EphemeronBox { +impl ErasedEphemeronBox for EphemeronBox { fn header(&self) -> &EphemeronBoxHeader { &self.header } @@ -187,12 +196,13 @@ impl ErasedEphemeronBox for EphemeronBox { return false; } - let Some(data) = self.data.get() else { + // SAFETY: the garbage collector ensures the ephemeron doesn't mutate until + // finalization. + let data = unsafe { &*self.data.get() }; + let Some(data) = data.as_ref() else { return true; }; - // SAFETY: `data` comes from a `Box`, so it is safe to dereference. - let data = unsafe { data.as_ref() }; // SAFETY: `key` comes from a `Gc`, and the garbage collector only invalidates // `key` when it is unreachable, making `key` always valid. let key = unsafe { data.key.as_ref() }; @@ -209,20 +219,18 @@ impl ErasedEphemeronBox for EphemeronBox { } fn trace_non_roots(&self) { - let Some(data) = self.data.get() else { - return; - }; - // SAFETY: `data` comes from a `Box`, so it is safe to dereference. + // SAFETY: Tracing always executes before collecting, meaning this cannot cause + // use after free. unsafe { - data.as_ref().value.trace_non_roots(); - }; + if let Some(value) = self.value() { + value.trace_non_roots(); + } + } } fn finalize_and_clear(&self) { - if let Some(data) = self.data.take() { - // SAFETY: `data` comes from an `into_raw` call, so this pointer is safe to pass to - // `from_raw`. - let _contents = unsafe { Box::from_raw(data.as_ptr()) }; - } + // SAFETY: the invariants of the garbage collector ensures this is only executed when + // there are no remaining references to the inner data. + unsafe { (*self.data.get()).take() }; } } diff --git a/boa_gc/src/internals/weak_map_box.rs b/boa_gc/src/internals/weak_map_box.rs index bd1a355baf0..d19dec5d330 100644 --- a/boa_gc/src/internals/weak_map_box.rs +++ b/boa_gc/src/internals/weak_map_box.rs @@ -1,9 +1,9 @@ -use crate::{GcRefCell, Trace, WeakGc}; -use std::{cell::Cell, collections::HashMap, ptr::NonNull}; +use crate::{pointers::RawWeakMap, GcRefCell, Trace, WeakGc}; +use std::{cell::Cell, ptr::NonNull}; /// A box that is used to track [`WeakMap`][`crate::WeakMap`]s. pub(crate) struct WeakMapBox { - pub(crate) map: WeakGc, V>>>, + pub(crate) map: WeakGc>>, pub(crate) next: Cell>>, } @@ -18,15 +18,16 @@ pub(crate) trait ErasedWeakMapBox { /// Returns `true` if the [`WeakMapBox`] is live. fn is_live(&self) -> bool; - /// Traces the weak reference inside of the [`WeakMapBox`] it the weak map is live. + /// Traces the weak reference inside of the [`WeakMapBox`] if the weak map is live. unsafe fn trace(&self); } -impl ErasedWeakMapBox for WeakMapBox { +impl ErasedWeakMapBox for WeakMapBox { fn clear_dead_entries(&self) { if let Some(map) = self.map.upgrade() { - let mut map = map.borrow_mut(); - map.retain(|k, _| k.upgrade().is_some()); + if let Ok(mut map) = map.try_borrow_mut() { + map.clear_expired(); + } } } diff --git a/boa_gc/src/lib.rs b/boa_gc/src/lib.rs index 42ca8ca3b23..d17dd86d3aa 100644 --- a/boa_gc/src/lib.rs +++ b/boa_gc/src/lib.rs @@ -78,9 +78,9 @@ pub(crate) mod internals; use boa_profiler::Profiler; use internals::{EphemeronBox, ErasedEphemeronBox, ErasedWeakMapBox, WeakMapBox}; +use pointers::RawWeakMap; use std::{ cell::{Cell, RefCell}, - collections::HashMap, mem, ptr::NonNull, }; @@ -198,7 +198,7 @@ impl Allocator { }) } - fn alloc_ephemeron( + fn alloc_ephemeron( value: EphemeronBox, ) -> NonNull> { let _timer = Profiler::global().start_event("New EphemeronBox", "BoaAlloc"); @@ -219,11 +219,11 @@ impl Allocator { }) } - fn alloc_weak_map() -> WeakMap { + fn alloc_weak_map() -> WeakMap { let _timer = Profiler::global().start_event("New WeakMap", "BoaAlloc"); let weak_map = WeakMap { - inner: Gc::new(GcRefCell::new(HashMap::new())), + inner: Gc::new(GcRefCell::new(RawWeakMap::new())), }; let weak = WeakGc::new(&weak_map.inner); diff --git a/boa_gc/src/pointers/ephemeron.rs b/boa_gc/src/pointers/ephemeron.rs index 7e06076b602..c8268f7cd98 100644 --- a/boa_gc/src/pointers/ephemeron.rs +++ b/boa_gc/src/pointers/ephemeron.rs @@ -8,18 +8,20 @@ use std::ptr::NonNull; /// A key-value pair where the value becomes unaccesible when the key is garbage collected. /// -/// See Racket's explanation on [**ephemerons**][eph] for a brief overview or read Barry Hayes' -/// [_Ephemerons_: a new finalization mechanism][acm]. +/// You can read more about ephemerons on: +/// - Racket's page about [**ephemerons**][eph], which gives a brief overview. +/// - Barry Hayes' paper ["_Ephemerons_: a new finalization mechanism"][acm] which explains the topic +/// in full detail. /// /// /// [eph]: https://docs.racket-lang.org/reference/ephemerons.html /// [acm]: https://dl.acm.org/doi/10.1145/263700.263733 #[derive(Debug)] -pub struct Ephemeron { +pub struct Ephemeron { inner_ptr: NonNull>, } -impl Ephemeron { +impl Ephemeron { /// Gets the stored value of this `Ephemeron`, or `None` if the key was already garbage collected. /// /// This needs to return a clone of the value because holding a reference to it between @@ -40,7 +42,7 @@ impl Ephemeron { } } -impl Ephemeron { +impl Ephemeron { /// Creates a new `Ephemeron`. pub fn new(key: &Gc, value: V) -> Self { let inner_ptr = Allocator::alloc_ephemeron(EphemeronBox::new(key, value)); @@ -58,7 +60,7 @@ impl Ephemeron { self.inner_ptr } - fn inner(&self) -> &EphemeronBox { + pub(crate) fn inner(&self) -> &EphemeronBox { // SAFETY: Please see Gc::inner_ptr() unsafe { self.inner_ptr().as_ref() } } @@ -75,7 +77,7 @@ impl Ephemeron { } } -impl Finalize for Ephemeron { +impl Finalize for Ephemeron { fn finalize(&self) { // SAFETY: inner_ptr should be alive when calling finalize. // We don't call inner_ptr() to avoid overhead of calling finalizer_safe(). @@ -87,7 +89,7 @@ impl Finalize for Ephemeron { // SAFETY: `Ephemeron`s trace implementation only marks its inner box because we want to stop // tracing through weakly held pointers. -unsafe impl Trace for Ephemeron { +unsafe impl Trace for Ephemeron { unsafe fn trace(&self) { // SAFETY: We need to mark the inner box of the `Ephemeron` since it is reachable // from a root and this means it cannot be dropped. @@ -105,7 +107,7 @@ unsafe impl Trace for Ephemeron { } } -impl Clone for Ephemeron { +impl Clone for Ephemeron { fn clone(&self) -> Self { let ptr = self.inner_ptr(); self.inner().inc_ref_count(); @@ -114,7 +116,7 @@ impl Clone for Ephemeron { } } -impl Drop for Ephemeron { +impl Drop for Ephemeron { fn drop(&mut self) { if finalizer_safe() { Finalize::finalize(self); diff --git a/boa_gc/src/pointers/mod.rs b/boa_gc/src/pointers/mod.rs index db00dccb150..aab4c094805 100644 --- a/boa_gc/src/pointers/mod.rs +++ b/boa_gc/src/pointers/mod.rs @@ -9,3 +9,5 @@ pub use ephemeron::Ephemeron; pub use gc::Gc; pub use weak::WeakGc; pub use weak_map::WeakMap; + +pub(crate) use weak_map::RawWeakMap; diff --git a/boa_gc/src/pointers/weak.rs b/boa_gc/src/pointers/weak.rs index 840e88a517b..38596b7f24b 100644 --- a/boa_gc/src/pointers/weak.rs +++ b/boa_gc/src/pointers/weak.rs @@ -7,7 +7,7 @@ use std::hash::{Hash, Hasher}; /// garbage collections. However, this also means [`WeakGc::upgrade`] could return `None` at any moment. #[derive(Debug, Trace, Finalize)] #[repr(transparent)] -pub struct WeakGc { +pub struct WeakGc { inner: Ephemeron>, } diff --git a/boa_gc/src/pointers/weak_map.rs b/boa_gc/src/pointers/weak_map.rs index f898f58d55a..a4d0d0414b8 100644 --- a/boa_gc/src/pointers/weak_map.rs +++ b/boa_gc/src/pointers/weak_map.rs @@ -1,14 +1,23 @@ -use crate::{Allocator, Finalize, Gc, GcRefCell, Trace, WeakGc}; -use std::collections::HashMap; +// Implementation taken partly from https://docs.rs/hashbrown/0.14.0/src/hashbrown/lib.rs.html, +// but with some adjustments to use `Ephemeron` instead of `(K,V)` + +use hashbrown::{ + hash_map::DefaultHashBuilder, + raw::{RawIter, RawTable}, + TryReserveError, +}; + +use crate::{custom_trace, Allocator, Ephemeron, Finalize, Gc, GcRefCell, Trace}; +use std::{fmt, hash::BuildHasher, marker::PhantomData, mem}; /// A map that holds weak references to its keys and is traced by the garbage collector. #[derive(Clone, Debug, Default, Trace, Finalize)] pub struct WeakMap { - pub(crate) inner: Gc, V>>>, + pub(crate) inner: Gc>>, } impl WeakMap { - /// Creates a new [`WeakMap`]. + /// Creates a new `WeakMap`. #[must_use] #[inline] pub fn new() -> Self { @@ -18,26 +27,433 @@ impl WeakMap { /// Inserts a key-value pair into the map. #[inline] pub fn insert(&mut self, key: &Gc, value: V) { - self.inner.borrow_mut().insert(WeakGc::new(key), value); + self.inner.borrow_mut().insert(key, value); } /// Removes a key from the map, returning the value at the key if the key was previously in the map. #[inline] pub fn remove(&mut self, key: &Gc) -> Option { - self.inner.borrow_mut().remove(&WeakGc::new(key)) + self.inner.borrow_mut().remove(key) } /// Returns `true` if the map contains a value for the specified key. #[must_use] #[inline] pub fn contains_key(&self, key: &Gc) -> bool { - self.inner.borrow().contains_key(&WeakGc::new(key)) + self.inner.borrow().contains_key(key) } /// Returns a reference to the value corresponding to the key. #[must_use] #[inline] pub fn get(&self, key: &Gc) -> Option { - self.inner.borrow().get(&WeakGc::new(key)).cloned() + self.inner.borrow().get(key) + } +} + +/// A hash map where the bucket type is an [Ephemeron]\. +/// +/// This data structure allows associating a [Gc]\ with a value `V` that will be +/// invalidated when the `Gc` gets collected. In other words, all key entries on the map are weakly +/// held. +pub(crate) struct RawWeakMap +where + K: Trace + 'static, + V: Trace + 'static, +{ + hash_builder: S, + table: RawTable>, +} + +impl Finalize for RawWeakMap +where + K: Trace + 'static, + V: Trace + 'static, +{ +} + +// SAFETY: The implementation correctly marks all ephemerons inside the map. +unsafe impl Trace for RawWeakMap +where + K: Trace + 'static, + V: Trace + 'static, +{ + custom_trace!(this, { + for eph in this.iter() { + mark(eph); + } + }); +} + +impl Default for RawWeakMap +where + S: Default, + K: Trace + 'static, + V: Trace + 'static, +{ + fn default() -> Self { + Self::with_hasher(Default::default()) + } +} + +impl RawWeakMap +where + K: Trace + 'static, + V: Trace + 'static, +{ + /// Creates an empty `RawWeakMap`. + /// + /// The map is initially created with a capacity of 0, so it will not allocate until it + /// is first inserted into. + pub(crate) fn new() -> Self { + Self::default() + } + + /// Creates an empty `RawWeakMap` with the specified capacity. + /// + /// The map will be able to hold at least `capacity` elements without reallocating. + /// If `capacity` is 0, the map will not allocate. + #[allow(unused)] + pub(crate) fn with_capacity(capacity: usize) -> Self { + Self::with_capacity_and_hasher(capacity, DefaultHashBuilder::default()) + } +} + +impl RawWeakMap +where + K: Trace + 'static, + V: Trace + 'static, +{ + /// Creates an empty `RawWeakMap` which will use the given hash builder to hash + /// keys. + /// + /// The map is initially created with a capacity of 0, so it will not allocate until it is first + /// inserted into. + pub(crate) const fn with_hasher(hash_builder: S) -> Self { + Self { + hash_builder, + table: RawTable::new(), + } + } + + /// Creates an empty `RawWeakMap` with the specified capacity, using `hash_builder` + /// to hash the keys. + /// + /// The map will be able to hold at least `capacity` elements without reallocating. + /// If `capacity` is 0, the map will not allocate. + pub(crate) fn with_capacity_and_hasher(capacity: usize, hash_builder: S) -> Self { + Self { + hash_builder, + table: RawTable::with_capacity(capacity), + } + } + + /// Returns a reference to the map's [`BuildHasher`]. + #[allow(unused)] + pub(crate) const fn hasher(&self) -> &S { + &self.hash_builder + } + + /// Returns the number of elements the map can hold without reallocating. + /// + /// This number is a lower bound; the map might be able to hold more, but is guaranteed to be + /// able to hold at least this many. + #[allow(unused)] + pub(crate) fn capacity(&self) -> usize { + self.table.capacity() + } + + /// An iterator visiting all entries in arbitrary order. + /// The iterator element type is [Ephemeron]. + pub(crate) fn iter(&self) -> Iter<'_, K, V> { + // SAFETY: The returned iterator is tied to the lifetime of self. + unsafe { + Iter { + inner: self.table.iter(), + marker: PhantomData, + } + } + } + + /// Returns the number of elements in the map. + /// + /// This is an upper bound; the map might contain some expired keys which haven't been + /// removed. + #[allow(unused)] + pub(crate) fn len(&self) -> usize { + self.table.len() + } + + /// Returns `true` if the map contains no elements. + /// + /// This might return `false` if the map has expired keys that are still pending to be + /// cleaned up. + #[allow(unused)] + pub(crate) fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Retains only the elements specified by the predicate. Keeps the + /// allocated memory for reuse. + /// + /// In other words, remove all ephemerons [Ephemeron] such that + /// `f(&eph)` returns `false`. + /// The elements are visited in unsorted (and unspecified) order. + pub(crate) fn retain(&mut self, mut f: F) + where + F: FnMut(&Ephemeron) -> bool, + { + // SAFETY: + // - `item` is only used internally, which means it outlives self. + // - `item` pointer is not used after the call to `erase`. + unsafe { + for item in self.table.iter() { + let eph = item.as_ref(); + if !f(eph) { + self.table.erase(item); + } + } + } + } + + /// Clears the map, removing all key-value pairs. Keeps the allocated memory + /// for reuse. + #[allow(unused)] + pub(crate) fn clear(&mut self) { + self.table.clear(); + } +} + +impl RawWeakMap +where + K: Trace + 'static, + V: Trace + Clone + 'static, + S: BuildHasher, +{ + /// Reserves capacity for at least `additional` more elements to be inserted + /// in the `RawWeakMap`. The collection may reserve more space to avoid + /// frequent reallocations. + /// + /// # Panics + /// + /// Panics if the new capacity exceeds [`isize::MAX`] bytes and [`abort`](std::process::abort) + /// the program in case of allocation error. Use [`try_reserve`](RawWeakMap::try_reserve) instead + /// if you want to handle memory allocation failure. + #[allow(unused)] + pub(crate) fn reserve(&mut self, additional: usize) { + self.table + .reserve(additional, make_hasher(&self.hash_builder)); + } + + /// Tries to reserve capacity for at least `additional` more elements to be inserted + /// in the given `RawWeakMap`. The collection may reserve more space to avoid + /// frequent reallocations. + /// + /// # Errors + /// + /// If the capacity overflows, or the allocator reports a failure, then an error + /// is returned. + #[allow(unused)] + pub(crate) fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> { + self.table + .try_reserve(additional, make_hasher(&self.hash_builder)) + } + + /// Shrinks the capacity of the map as much as possible. It will drop + /// down as much as possible while maintaining the internal rules + /// and possibly leaving some space in accordance with the resize policy. + #[allow(unused)] + pub(crate) fn shrink_to_fit(&mut self) { + self.table + .shrink_to(0, make_hasher::<_, V, S>(&self.hash_builder)); + } + + /// Shrinks the capacity of the map with a lower limit. It will drop + /// down no lower than the supplied limit while maintaining the internal rules + /// and possibly leaving some space in accordance with the resize policy. + /// + /// This function does nothing if the current capacity is smaller than the + /// supplied minimum capacity. + #[allow(unused)] + pub(crate) fn shrink_to(&mut self, min_capacity: usize) { + self.table + .shrink_to(min_capacity, make_hasher::<_, V, S>(&self.hash_builder)); + } + + /// Returns the value corresponding to the supplied key. + // TODO: make this return a reference instead of cloning. + pub(crate) fn get(&self, k: &Gc) -> Option { + if self.table.is_empty() { + None + } else { + let hash = make_hash_from_gc(&self.hash_builder, k); + self.table.get(hash, equivalent_key(k))?.value() + } + } + + /// Returns `true` if the map contains a value for the specified key. + pub(crate) fn contains_key(&self, k: &Gc) -> bool { + self.get(k).is_some() + } + + // Inserts a key-value pair into the map. + /// + /// If the map did not have this key present, [`None`] is returned. + /// + /// If the map did have this key present, the value is updated, and the old + /// value is returned. The key is not updated. + pub(crate) fn insert(&mut self, k: &Gc, v: V) -> Option> { + let hash = make_hash_from_gc(&self.hash_builder, k); + let hasher = make_hasher(&self.hash_builder); + let eph = Ephemeron::new(k, v); + match self + .table + .find_or_find_insert_slot(hash, equivalent_key(k), hasher) + { + // SAFETY: `bucket` is only used inside the replace call, meaning it doesn't + // outlive self. + Ok(bucket) => Some(mem::replace(unsafe { bucket.as_mut() }, eph)), + Err(slot) => { + // SAFETY: `slot` comes from a call to `find_or_find_insert_slot`, and `self` + // is not mutated until the call to `insert_in_slot`. + unsafe { + self.table.insert_in_slot(hash, slot, eph); + } + None + } + } + } + + /// Removes a key from the map, returning the value at the key if the key + /// was previously in the map. Keeps the allocated memory for reuse. + pub(crate) fn remove(&mut self, k: &Gc) -> Option { + let hash = make_hash_from_gc(&self.hash_builder, k); + self.table.remove_entry(hash, equivalent_key(k))?.value() + } + + /// Clears all the expired keys in the map. + pub(crate) fn clear_expired(&mut self) { + self.retain(|eph| eph.value().is_some()); + } +} + +pub(crate) struct Iter<'a, K, V> +where + K: Trace + 'static, + V: Trace + 'static, +{ + inner: RawIter>, + marker: PhantomData<&'a Ephemeron>, +} + +impl Clone for Iter<'_, K, V> +where + K: Trace + 'static, + V: Trace + 'static, +{ + #[inline] + fn clone(&self) -> Self { + Iter { + inner: self.inner.clone(), + marker: PhantomData, + } + } +} + +impl fmt::Debug for Iter<'_, K, V> +where + K: Trace + 'static + fmt::Debug, + V: Trace + 'static + fmt::Debug, +{ + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_list().entries(self.clone()).finish() + } +} + +impl<'a, K, V> Iterator for Iter<'a, K, V> +where + K: Trace + 'static, + V: Trace + 'static, +{ + type Item = &'a Ephemeron; + + #[inline] + fn next(&mut self) -> Option { + // SAFETY: The original map outlives the iterator thanks to the lifetime parameter, + // and since the returned ephemeron carries that information, the call to `as_ref` is safe. + unsafe { self.inner.next().map(|b| b.as_ref()) } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } +} + +impl fmt::Debug for RawWeakMap +where + K: fmt::Debug + Trace + Finalize, + V: fmt::Debug + Trace + Finalize, +{ + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.iter().fmt(f) + } +} + +fn make_hasher(hash_builder: &S) -> impl Fn(&Ephemeron) -> u64 + '_ +where + S: BuildHasher, + K: Trace + 'static, + V: Trace + 'static, +{ + move |val| make_hash_from_eph::(hash_builder, val) +} + +fn make_hash_from_eph(hash_builder: &S, eph: &Ephemeron) -> u64 +where + S: BuildHasher, + K: Trace + 'static, + V: Trace + 'static, +{ + use std::hash::Hasher; + let mut state = hash_builder.build_hasher(); + // TODO: Is this true for custom hashers? if not, rewrite `key` to be safe. + // SAFETY: The return value of `key` is only used to hash it, which + // cannot trigger a garbage collection, + unsafe { + if let Some(val) = eph.inner().key() { + std::ptr::hash(val, &mut state); + } else { + std::ptr::hash(eph.inner_ptr().as_ptr(), &mut state); + } + } + state.finish() +} + +fn make_hash_from_gc(hash_builder: &S, gc: &Gc) -> u64 +where + S: BuildHasher, + K: Trace + 'static, +{ + use std::hash::Hasher; + let mut state = hash_builder.build_hasher(); + std::ptr::hash(gc.inner_ptr().as_ptr(), &mut state); + state.finish() +} + +fn equivalent_key(k: &Gc) -> impl Fn(&Ephemeron) -> bool + '_ +where + K: Trace + 'static, + V: Trace + 'static, +{ + // SAFETY: The return value of `key` is only used inside eq, which + // cannot trigger a garbage collection. + move |eph| unsafe { + eph.inner().key().is_some_and(|val| { + let val: *const _ = val; + std::ptr::eq(val, k.inner_ptr().as_ptr()) + }) } } diff --git a/boa_gc/src/test/mod.rs b/boa_gc/src/test/mod.rs index fbe6f7939b2..d3e52e2789e 100644 --- a/boa_gc/src/test/mod.rs +++ b/boa_gc/src/test/mod.rs @@ -8,6 +8,7 @@ mod weak_map; struct Harness; impl Harness { + #[track_caller] fn assert_collections(o: usize) { BOA_GC.with(|current| { let gc = current.borrow(); @@ -15,6 +16,7 @@ impl Harness { }); } + #[track_caller] fn assert_empty_gc() { BOA_GC.with(|current| { let gc = current.borrow(); @@ -24,6 +26,7 @@ impl Harness { }); } + #[track_caller] fn assert_bytes_allocated() { BOA_GC.with(|current| { let gc = current.borrow(); @@ -31,6 +34,7 @@ impl Harness { }); } + #[track_caller] fn assert_exact_bytes_allocated(bytes: usize) { BOA_GC.with(|current| { let gc = current.borrow(); diff --git a/boa_gc/src/test/weak.rs b/boa_gc/src/test/weak.rs index 375167335ad..246dce79761 100644 --- a/boa_gc/src/test/weak.rs +++ b/boa_gc/src/test/weak.rs @@ -164,7 +164,7 @@ fn eph_self_referential() { *root.inner.inner.borrow_mut() = Some(eph.clone()); assert!(eph.value().is_some()); - Harness::assert_exact_bytes_allocated(72); + Harness::assert_exact_bytes_allocated(80); } *root.inner.inner.borrow_mut() = None; @@ -210,7 +210,7 @@ fn eph_self_referential_chain() { assert!(eph_start.value().is_some()); assert!(eph_chain2.value().is_some()); - Harness::assert_exact_bytes_allocated(216); + Harness::assert_exact_bytes_allocated(232); } *root.borrow_mut() = None; @@ -230,30 +230,63 @@ fn eph_finalizer() { #[derive(Clone, Trace)] struct S { #[unsafe_ignore_trace] - inner: Rc>, + inner: Rc>, } impl Finalize for S { fn finalize(&self) { - self.inner.set(true); + self.inner.set(self.inner.get() + 1); } } run_test(|| { let val = S { - inner: Rc::new(Cell::new(false)), + inner: Rc::new(Cell::new(0)), + }; + + let key = Gc::new(50u32); + let eph = Ephemeron::new(&key, val.clone()); + assert!(eph.has_value()); + // finalize hasn't been run + assert_eq!(val.inner.get(), 0); + + drop(key); + force_collect(); + assert!(!eph.has_value()); + // finalize ran when collecting + assert_eq!(val.inner.get(), 1); + }); +} + +#[test] +fn eph_gc_finalizer() { + #[derive(Clone, Trace)] + struct S { + #[unsafe_ignore_trace] + inner: Rc>, + } + + impl Finalize for S { + fn finalize(&self) { + self.inner.set(self.inner.get() + 1); + } + } + + run_test(|| { + let val = S { + inner: Rc::new(Cell::new(0)), }; let key = Gc::new(50u32); let eph = Ephemeron::new(&key, Gc::new(val.clone())); assert!(eph.has_value()); // finalize hasn't been run - assert!(!val.inner.get()); + assert_eq!(val.inner.get(), 0); drop(key); force_collect(); assert!(!eph.has_value()); // finalize ran when collecting - assert!(val.inner.get()); + assert_eq!(val.inner.get(), 1); }); }