Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Rollup merge of #120308 - utkarshgupta137:duration-opt, r=m-ou-se
core/time: avoid divisions in Duration::new In our (decently large) code base, we use `SystemTime::UNIX_EPOCH.elapsed()` in a lot of places & often in a loop or in the hot path. On [Unix](https://github.com/rust-lang/rust/blob/1.75.0/library/std/src/sys/unix/time.rs#L153-L162) at least, it seems we do calculations before hand to ensure that nanos is within the valid range, yet `Duration::new()` still checks it again, using 2 divisions. It seems like adding a branch can make this function 33% faster on ARM64 in the cases where nanos is already in the valid range & seems to have no effect in the other case. Benchmarks: M1 Pro (14-inch base model): ``` duration/current/checked time: [1.5945 ns 1.6167 ns 1.6407 ns] Found 5 outliers among 100 measurements (5.00%) 2 (2.00%) high mild 3 (3.00%) high severe duration/current/unchecked time: [1.5941 ns 1.6051 ns 1.6179 ns] Found 2 outliers among 100 measurements (2.00%) 1 (1.00%) high mild 1 (1.00%) high severe duration/branched/checked time: [1.1997 ns 1.2048 ns 1.2104 ns] Found 8 outliers among 100 measurements (8.00%) 4 (4.00%) high mild 4 (4.00%) high severe duration/branched/unchecked time: [1.5881 ns 1.5957 ns 1.6039 ns] Found 6 outliers among 100 measurements (6.00%) 3 (3.00%) high mild 3 (3.00%) high severe ``` EC2 c7gd.16xlarge (Graviton 3): ``` duration/current/checked time: [2.7996 ns 2.8000 ns 2.8003 ns] Found 5 outliers among 100 measurements (5.00%) 2 (2.00%) low severe 3 (3.00%) low mild duration/current/unchecked time: [2.9922 ns 2.9925 ns 2.9928 ns] Found 7 outliers among 100 measurements (7.00%) 4 (4.00%) low severe 1 (1.00%) low mild 2 (2.00%) high mild duration/branched/checked time: [2.0830 ns 2.0843 ns 2.0857 ns] Found 3 outliers among 100 measurements (3.00%) 1 (1.00%) low severe 1 (1.00%) low mild 1 (1.00%) high mild duration/branched/unchecked time: [2.9879 ns 2.9886 ns 2.9893 ns] Found 5 outliers among 100 measurements (5.00%) 3 (3.00%) low severe 2 (2.00%) low mild ``` EC2 r7iz.16xlarge (Intel Xeon Scalable-based (Sapphire Rapids)): ``` duration/current/checked time: [980.60 ps 980.79 ps 980.99 ps] Found 10 outliers among 100 measurements (10.00%) 4 (4.00%) low severe 2 (2.00%) low mild 3 (3.00%) high mild 1 (1.00%) high severe duration/current/unchecked time: [979.53 ps 979.74 ps 979.96 ps] Found 6 outliers among 100 measurements (6.00%) 2 (2.00%) low severe 1 (1.00%) low mild 2 (2.00%) high mild 1 (1.00%) high severe duration/branched/checked time: [938.72 ps 938.96 ps 939.22 ps] Found 4 outliers among 100 measurements (4.00%) 1 (1.00%) low mild 1 (1.00%) high mild 2 (2.00%) high severe duration/branched/unchecked time: [1.0103 ns 1.0110 ns 1.0118 ns] Found 10 outliers among 100 measurements (10.00%) 2 (2.00%) low mild 7 (7.00%) high mild 1 (1.00%) high severe ``` Bench code (ran using stable 1.75.0 & criterion latest 0.5.1): I couldn't find any benches for `Duration` in this repo, so I just copied the relevant types & recreated it. ```rust use criterion::{black_box, criterion_group, criterion_main, Criterion}; pub fn duration_bench(c: &mut Criterion) { const NANOS_PER_SEC: u32 = 1_000_000_000; #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[repr(transparent)] struct Nanoseconds(u32); impl Default for Nanoseconds { #[inline] fn default() -> Self { // SAFETY: 0 is within the valid range unsafe { Nanoseconds(0) } } } #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] pub struct Duration { secs: u64, nanos: Nanoseconds, // Always 0 <= nanos < NANOS_PER_SEC } impl Duration { #[inline] pub const fn new_current(secs: u64, nanos: u32) -> Duration { let secs = match secs.checked_add((nanos / NANOS_PER_SEC) as u64) { Some(secs) => secs, None => panic!("overflow in Duration::new"), }; let nanos = nanos % NANOS_PER_SEC; // SAFETY: nanos % NANOS_PER_SEC < NANOS_PER_SEC, therefore nanos is within the valid range Duration { secs, nanos: unsafe { Nanoseconds(nanos) } } } #[inline] pub const fn new_branched(secs: u64, nanos: u32) -> Duration { if nanos < NANOS_PER_SEC { // SAFETY: nanos < NANOS_PER_SEC, therefore nanos is within the valid range Duration { secs, nanos: unsafe { Nanoseconds(nanos) } } } else { let secs = match secs.checked_add((nanos / NANOS_PER_SEC) as u64) { Some(secs) => secs, None => panic!("overflow in Duration::new"), }; let nanos = nanos % NANOS_PER_SEC; // SAFETY: nanos % NANOS_PER_SEC < NANOS_PER_SEC, therefore nanos is within the valid range Duration { secs, nanos: unsafe { Nanoseconds(nanos) } } } } } let mut group = c.benchmark_group("duration/current"); group.bench_function("checked", |b| { b.iter(|| black_box(Duration::new_current(black_box(1_000_000_000), black_box(1_000_000)))); }); group.bench_function("unchecked", |b| { b.iter(|| { black_box(Duration::new_current(black_box(1_000_000_000), black_box(2_000_000_000))) }); }); drop(group); let mut group = c.benchmark_group("duration/branched"); group.bench_function("checked", |b| { b.iter(|| { black_box(Duration::new_branched(black_box(1_000_000_000), black_box(1_000_000))) }); }); group.bench_function("unchecked", |b| { b.iter(|| { black_box(Duration::new_branched(black_box(1_000_000_000), black_box(2_000_000_000))) }); }); } criterion_group!(duration_benches, duration_bench); criterion_main!(duration_benches); ```
- Loading branch information