Skip to content

Commit

Permalink
Memory: Use triomphe::Arc for SharedReference (#8622)
Browse files Browse the repository at this point in the history
### Description

This eliminates the unused weakref count in `std::sync::Arc`, saving 64 bits per unique `SharedReference`.

I noticed there's additional potential optimizations we could do here too (not included in this PR), of increasing levels of difficulty:

- We can deduplicate `ValueTypeId` by moving it inside the `Arc`.

- We can build a mapping of `std::any::TypeId` to `ValueTypeId`, and avoid storing the `ValueTypeId` entirely.

- We can deduplicate the fat pointer's layout metadata by also storing it inside the `Arc` using the nightly `ptr_metadata` feature, similar to `triomphe::ThinArc` (but that only works for slices of non-dst elements, presumably because they don't want to depend on nightly).

### Testing Instructions

Using dhat for measuring the max heap size (vercel/next.js/67166)...

Do a release build

```
PACK_NEXT_COMPRESS=objcopy-zstd pnpm pack-next --release --features __internal_dhat-heap
```

Start the dev server in shadcn-ui, and try to load the homepage:

```
pnpm i
pnpm --filter=www dev --turbo
curl http://localhost:3003/
```

After curl exits, kill the dev server.

```
pkill -INT next-server
```

**Before (2 Runs):**

```
[dhat]: Teardown profiler
dhat: Total:     3,106,545,900 bytes in 20,113,580 blocks
dhat: At t-gmax: 731,860,693 bytes in 4,924,028 blocks
dhat: At t-end:  731,860,693 bytes in 4,924,028 blocks
```

```
[dhat]: Teardown profiler
dhat: Total:     3,108,036,858 bytes in 20,111,694 blocks
dhat: At t-gmax: 730,059,664 bytes in 4,923,002 blocks
dhat: At t-end:  730,059,536 bytes in 4,923,001 blocks
```


**After (2 Runs):**

```
[dhat]: Teardown profiler
dhat: Total:     3,093,298,170 bytes in 20,127,818 blocks
dhat: At t-gmax: 727,258,939 bytes in 4,923,863 blocks
dhat: At t-end:  727,155,146 bytes in 4,923,901 blocks
```

```
[dhat]: Teardown profiler
dhat: Total:     3,102,661,690 bytes in 20,124,408 blocks
dhat: At t-gmax: 728,190,876 bytes in 4,924,856 blocks
dhat: At t-end:  728,124,976 bytes in 4,924,236 blocks
```

This is [a 0.4426% reduction](https://www.wolframalpha.com/input?i=Percent+change+from+%28731%2C860%2C693%2B730%2C059%2C664%29%2F2+to+%28727%2C258%2C939%2B728%2C190%2C876%29%2F2).
  • Loading branch information
bgw authored Jun 28, 2024
1 parent 0f32796 commit 66c2f55
Show file tree
Hide file tree
Showing 14 changed files with 108 additions and 51 deletions.
18 changes: 15 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -299,12 +299,14 @@ tokio-util = { version = "0.7.7", features = ["io"] }
tracing = "0.1.37"
tracing-appender = "0.2.2"
tracing-subscriber = "0.3.16"
triomphe = "0.1.13"
tui-term = { version = "0.1.9", default-features = false }
url = "2.2.2"
urlencoding = "2.1.2"
webbrowser = "0.8.7"
which = "4.4.0"
unicode-segmentation = "1.10.1"
unsize = "1.1.0"

[patch.crates-io]
# TODO: Use upstream when https://github.com/servo/pathfinder/pull/566 lands
Expand Down
6 changes: 3 additions & 3 deletions crates/node-file-trace/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -474,7 +474,7 @@ async fn run<B: Backend + 'static, F: Future<Output = ()>>(
Box::pin(async move {
let output = main_operation(
TransientValue::new(dir.clone()),
args.clone().into(),
TransientInstance::new(args.clone()),
module_options,
resolve_options,
);
Expand Down Expand Up @@ -513,7 +513,7 @@ async fn run<B: Backend + 'static, F: Future<Output = ()>>(
#[turbo_tasks::function]
async fn main_operation(
current_dir: TransientValue<PathBuf>,
args: TransientInstance<Args>,
args: TransientInstance<Arc<Args>>,
module_options: TransientInstance<ModuleOptionsContext>,
resolve_options: TransientInstance<ResolveOptionsContext>,
) -> Result<Vc<Vec<RcStr>>> {
Expand All @@ -533,7 +533,7 @@ async fn main_operation(
let fs = create_fs("context directory", &context_directory, watch).await?;
let process_cwd = process_cwd.clone().map(RcStr::from);

match *args {
match **args {
Args::Print { common: _ } => {
let input = process_input(&dir, &context_directory, input).unwrap();
let mut result = BTreeSet::new();
Expand Down
2 changes: 2 additions & 0 deletions crates/turbo-tasks/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,11 @@ serde_regex = "1.1.0"
thiserror = { workspace = true }
tokio = { workspace = true, features = ["full"] }
tracing = { workspace = true }
triomphe = { workspace = true, features = ["unsize"] }
turbo-tasks-hash = { workspace = true }
turbo-tasks-macros = { workspace = true }
turbo-tasks-malloc = { workspace = true }
unsize = { workspace = true }

[dev-dependencies]
serde_test = "1.0.157"
Expand Down
7 changes: 3 additions & 4 deletions crates/turbo-tasks/src/backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,8 @@ impl CellContent {
let data = self.0.ok_or_else(|| anyhow!("Cell is empty"))?;
let data = data
.downcast()
.ok_or_else(|| anyhow!("Unexpected type in cell"))?;
Ok(ReadRef::new(data))
.map_err(|_err| anyhow!("Unexpected type in cell"))?;
Ok(ReadRef::new_arc(data))
}

/// # Safety
Expand All @@ -185,8 +185,7 @@ impl CellContent {
}

pub fn try_cast<T: Any + VcValueType>(self) -> Option<ReadRef<T>> {
self.0
.and_then(|data| data.downcast().map(|data| ReadRef::new(data)))
Some(ReadRef::new_arc(self.0?.downcast().ok()?))
}
}

Expand Down
1 change: 1 addition & 0 deletions crates/turbo-tasks/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ mod state;
pub mod task;
pub mod trace;
mod trait_ref;
mod triomphe_utils;
pub mod util;
mod value;
mod value_type;
Expand Down
8 changes: 4 additions & 4 deletions crates/turbo-tasks/src/manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1469,9 +1469,9 @@ impl CurrentCellRef {
tt.update_own_task_cell(
self.current_task,
self.index,
CellContent(Some(SharedReference(
CellContent(Some(SharedReference::new(
Some(self.index.type_id),
Arc::new(update),
triomphe::Arc::new(update),
))),
)
}
Expand All @@ -1493,9 +1493,9 @@ impl CurrentCellRef {
tt.update_own_task_cell(
self.current_task,
self.index,
CellContent(Some(SharedReference(
CellContent(Some(SharedReference::new(
Some(self.index.type_id),
Arc::new(new_content),
triomphe::Arc::new(new_content),
))),
)
}
Expand Down
24 changes: 12 additions & 12 deletions crates/turbo-tasks/src/read_ref.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
use std::{
cmp::Ordering,
fmt::{Debug, Display},
hash::Hash,
marker::PhantomData,
mem::transmute_copy,
sync::Arc,
};

use serde::{Deserialize, Serialize};
Expand All @@ -22,7 +20,7 @@ use crate::{
/// certain point in time.
///
/// Internally it stores a reference counted reference to a value on the heap.
pub struct ReadRef<T>(Arc<T>);
pub struct ReadRef<T>(triomphe::Arc<T>);

impl<T> Clone for ReadRef<T> {
fn clone(&self) -> Self {
Expand Down Expand Up @@ -222,21 +220,21 @@ where
D: serde::Deserializer<'de>,
{
let value = T::deserialize(deserializer)?;
Ok(Self(Arc::new(value)))
Ok(Self(triomphe::Arc::new(value)))
}
}

impl<T> ReadRef<T> {
pub fn new(arc: Arc<T>) -> Self {
Self(arc)
pub fn new_owned(value: T) -> Self {
Self(triomphe::Arc::new(value))
}

pub fn ptr_eq(&self, other: &ReadRef<T>) -> bool {
Arc::ptr_eq(&self.0, &other.0)
pub fn new_arc(arc: triomphe::Arc<T>) -> Self {
Self(arc)
}

pub fn ptr_cmp(&self, other: &ReadRef<T>) -> Ordering {
Arc::as_ptr(&self.0).cmp(&Arc::as_ptr(&other.0))
pub fn ptr_eq(&self, other: &ReadRef<T>) -> bool {
triomphe::Arc::ptr_eq(&self.0, &other.0)
}
}

Expand All @@ -248,8 +246,10 @@ where
/// reference.
pub fn cell(read_ref: ReadRef<T>) -> Vc<T> {
let local_cell = find_cell_by_type(T::get_value_type_id());
local_cell
.update_shared_reference(SharedReference(Some(T::get_value_type_id()), read_ref.0));
local_cell.update_shared_reference(SharedReference::new(
Some(T::get_value_type_id()),
read_ref.0,
));
Vc {
node: local_cell.into(),
_t: PhantomData,
Expand Down
30 changes: 19 additions & 11 deletions crates/turbo-tasks/src/task/concrete_task_input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,23 +10,34 @@ use std::{

use anyhow::Result;
use serde::{ser::SerializeTuple, Deserialize, Serialize};
use unsize::CoerceUnsize;

use crate::{
backend::CellContent,
id::{FunctionId, TraitTypeId},
magic_any::MagicAny,
manager::{read_task_cell, read_task_output},
registry, turbo_tasks, CellId, RawVc, RcStr, TaskId, TraitType, ValueTypeId,
registry,
triomphe_utils::{coerce_to_any_send_sync, downcast_triomphe_arc},
turbo_tasks, CellId, RawVc, RcStr, TaskId, TraitType, ValueTypeId,
};

/// A type-erased wrapper for [`triomphe::Arc`].
#[derive(Clone)]
pub struct SharedReference(pub Option<ValueTypeId>, pub Arc<dyn Any + Send + Sync>);
pub struct SharedReference(
pub Option<ValueTypeId>,
pub triomphe::Arc<dyn Any + Send + Sync>,
);

impl SharedReference {
pub fn downcast<T: Any + Send + Sync>(self) -> Option<Arc<T>> {
match Arc::downcast(self.1) {
Ok(data) => Some(data),
Err(_) => None,
pub fn new(type_id: Option<ValueTypeId>, data: triomphe::Arc<impl Any + Send + Sync>) -> Self {
Self(type_id, data.unsize(coerce_to_any_send_sync()))
}

pub fn downcast<T: Any + Send + Sync>(self) -> Result<triomphe::Arc<T>, Self> {
match downcast_triomphe_arc(self.1) {
Ok(data) => Ok(data),
Err(data) => Err(Self(self.0, data)),
}
}
}
Expand All @@ -41,10 +52,7 @@ impl PartialEq for SharedReference {
// only compares their addresses.
#[allow(ambiguous_wide_pointer_comparisons)]
fn eq(&self, other: &Self) -> bool {
std::ptr::addr_eq(
&*self.1 as *const (dyn Any + Send + Sync),
&*other.1 as *const (dyn Any + Send + Sync),
)
triomphe::Arc::ptr_eq(&self.1, &other.1)
}
}
impl Eq for SharedReference {}
Expand Down Expand Up @@ -129,7 +137,7 @@ impl<'de> Deserialize<'de> for SharedReference {
if let Some(seed) = registry::get_value_type(ty).get_any_deserialize_seed()
{
if let Some(value) = seq.next_element_seed(seed)? {
Ok(SharedReference(Some(ty), value.into()))
Ok(SharedReference::new(Some(ty), value.into()))
} else {
Err(serde::de::Error::invalid_length(
1,
Expand Down
33 changes: 33 additions & 0 deletions crates/turbo-tasks/src/triomphe_utils.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
use std::any::Any;

use unsize::Coercion;

/// Attempt to downcast a `triomphe::Arc<dyn Any + Send + Sync>` to a concrete
/// type.
///
/// Ported from [`std::sync::Arc::downcast`] to [`triomphe::Arc`].
pub fn downcast_triomphe_arc<T: Any + Send + Sync>(
this: triomphe::Arc<dyn Any + Send + Sync>,
) -> Result<triomphe::Arc<T>, triomphe::Arc<dyn Any + Send + Sync>> {
if (*this).is::<T>() {
unsafe {
// Get the pointer to the offset (*const T) inside of the ArcInner.
let ptr = triomphe::Arc::into_raw(this);
// SAFETY: The negative offset from the data (ptr) in an Arc to the start of the
// data structure is fixed regardless of type `T`.
//
// SAFETY: Casting from a fat pointer to a thin pointer is safe, as long as the
// types are compatible (they are).
Ok(triomphe::Arc::from_raw(ptr.cast()))
}
} else {
Err(this)
}
}

/// [`Coerce::to_any`] except that it coerces to `dyn Any + Send + Sync` as
/// opposed to `dyn Any`.
pub fn coerce_to_any_send_sync<T: Any + Send + Sync>() -> Coercion<T, dyn Any + Send + Sync> {
// SAFETY: The signature of this function guarantees the coercion is valid
unsafe { Coercion::new(|x| x) }
}
15 changes: 8 additions & 7 deletions crates/turbo-tasks/src/value.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::{fmt::Debug, marker::PhantomData, ops::Deref, sync::Arc};
use std::{fmt::Debug, marker::PhantomData, ops::Deref};

use crate::SharedReference;

Expand Down Expand Up @@ -110,9 +110,10 @@ impl<T> Ord for TransientInstance<T> {
}
}

impl<T: Send + Sync + 'static> From<TransientInstance<T>> for Arc<T> {
impl<T: Send + Sync + 'static> From<TransientInstance<T>> for triomphe::Arc<T> {
fn from(instance: TransientInstance<T>) -> Self {
Arc::downcast(instance.inner.1.clone()).unwrap()
// we know this downcast must work because we have type T
instance.inner.downcast().unwrap()
}
}

Expand All @@ -122,10 +123,10 @@ impl<T: Send + Sync + 'static> From<TransientInstance<T>> for SharedReference {
}
}

impl<T: Send + Sync + 'static> From<Arc<T>> for TransientInstance<T> {
fn from(arc: Arc<T>) -> Self {
impl<T: Send + Sync + 'static> From<triomphe::Arc<T>> for TransientInstance<T> {
fn from(arc: triomphe::Arc<T>) -> Self {
Self {
inner: SharedReference(None, arc),
inner: SharedReference::new(None, arc),
phantom: PhantomData,
}
}
Expand All @@ -149,7 +150,7 @@ impl<T: Send + Sync + 'static> TryFrom<SharedReference> for TransientInstance<T>
impl<T: Send + Sync + 'static> TransientInstance<T> {
pub fn new(value: T) -> Self {
Self {
inner: SharedReference(None, Arc::new(value)),
inner: SharedReference::new(None, triomphe::Arc::new(value)),
phantom: PhantomData,
}
}
Expand Down
2 changes: 1 addition & 1 deletion crates/turbo-tasks/src/value_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ impl ValueType {

pub fn any_as_serializable<'a>(
&self,
arc: &'a Arc<dyn Any + Sync + Send>,
arc: &'a triomphe::Arc<dyn Any + Sync + Send>,
) -> Option<&'a dyn erased_serde::Serialize> {
if let Some(s) = self.any_serialization {
Some((s.0)(&**arc))
Expand Down
8 changes: 4 additions & 4 deletions crates/turbopack-cli/src/dev/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -196,25 +196,25 @@ impl TurbopackDevServerBuilder {
let show_all = self.show_all;
let log_detail: bool = self.log_detail;
let browserslist_query: RcStr = self.browserslist_query;
let log_args = Arc::new(LogOptions {
let log_args = TransientInstance::new(LogOptions {
current_dir: current_dir().unwrap(),
project_dir: PathBuf::from(project_dir.clone()),
show_all,
log_detail,
log_level: self.log_level,
});
let entry_requests = Arc::new(self.entry_requests);
let entry_requests = TransientInstance::new(self.entry_requests);
let tasks = turbo_tasks.clone();
let issue_provider = self.issue_reporter.unwrap_or_else(|| {
// Initialize a ConsoleUi reporter if no custom reporter was provided
Box::new(move || Vc::upcast(ConsoleUi::new(log_args.clone().into())))
Box::new(move || Vc::upcast(ConsoleUi::new(log_args.clone())))
});

let source = move || {
source(
root_dir.clone(),
project_dir.clone(),
entry_requests.clone().into(),
entry_requests.clone(),
eager_compile,
browserslist_query.clone(),
)
Expand Down
Loading

0 comments on commit 66c2f55

Please sign in to comment.