From 3915c4d1143f2ca5977df0a4804734a066b38459 Mon Sep 17 00:00:00 2001 From: The Miri Conjob Bot Date: Fri, 15 Sep 2023 05:24:07 +0000 Subject: [PATCH 01/26] Preparing for merge from rustc --- src/tools/miri/rust-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/miri/rust-version b/src/tools/miri/rust-version index ab73f33497e9e..5e635667ecf89 100644 --- a/src/tools/miri/rust-version +++ b/src/tools/miri/rust-version @@ -1 +1 @@ -366dab13f711df90a6891411458544199d159cbc +ae9465fee3962c0c895959001302999bc48ba6d5 From 09fd7a8722fb9634706dcd6ddb88473f4cc6bdf3 Mon Sep 17 00:00:00 2001 From: Peter Jaszkowiak Date: Fri, 15 Sep 2023 21:43:14 -0600 Subject: [PATCH 02/26] triagebot exclude_labels -> exclude_titles --- src/tools/miri/.github/workflows/ci.yml | 2 +- src/tools/miri/miri-script/src/commands.rs | 4 ++-- src/tools/miri/triagebot.toml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/tools/miri/.github/workflows/ci.yml b/src/tools/miri/.github/workflows/ci.yml index bcbd44a5f38cc..5e2abdde6aced 100644 --- a/src/tools/miri/.github/workflows/ci.yml +++ b/src/tools/miri/.github/workflows/ci.yml @@ -208,7 +208,7 @@ jobs: git push -u origin $BRANCH - name: Create Pull Request run: | - PR=$(gh pr create -B master --title 'Automatic sync from rustc' --body '' --label subtree-sync) + PR=$(gh pr create -B master --title 'Automatic sync from rustc' --body '') ~/.local/bin/zulip-send --user $ZULIP_BOT_EMAIL --api-key $ZULIP_API_TOKEN --site https://rust-lang.zulipchat.com \ --stream miri --subject "Cron Job Failure (miri, $(date -u +%Y-%m))" \ --message "A PR doing a rustc-pull [has been automatically created]($PR) for your convenience." diff --git a/src/tools/miri/miri-script/src/commands.rs b/src/tools/miri/miri-script/src/commands.rs index 124acc950986a..de80777a689ff 100644 --- a/src/tools/miri/miri-script/src/commands.rs +++ b/src/tools/miri/miri-script/src/commands.rs @@ -339,9 +339,9 @@ impl Command { "Confirmed that the push round-trips back to Miri properly. Please create a rustc PR:" ); println!( - // Open PR with `subtree-sync` label to satisfy the `no-merges` triagebot check + // Open PR with `subtree update` title to silence the `no-merges` triagebot check // See https://github.com/rust-lang/rust/pull/114157 - " https://github.com/rust-lang/rust/compare/{github_user}:{branch}?quick_pull=1&labels=subtree-sync" + " https://github.com/rust-lang/rust/compare/{github_user}:{branch}?quick_pull=1&title=Miri+subtree+update" ); drop(josh); diff --git a/src/tools/miri/triagebot.toml b/src/tools/miri/triagebot.toml index 69f3cd2f8109e..8d00cedb769d3 100644 --- a/src/tools/miri/triagebot.toml +++ b/src/tools/miri/triagebot.toml @@ -11,4 +11,4 @@ allow-unauthenticated = [ [shortcut] [no-merges] -exclude_labels = ["rollup", "subtree-sync"] +exclude_titles = ["Rollup of", "sync from rustc"] From aa6d6ba6c59de1ee3e3dd7200a22345d6b84e70c Mon Sep 17 00:00:00 2001 From: The Miri Conjob Bot Date: Tue, 19 Sep 2023 05:25:27 +0000 Subject: [PATCH 03/26] Preparing for merge from rustc --- src/tools/miri/rust-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/miri/rust-version b/src/tools/miri/rust-version index 5e635667ecf89..a2030c1c34322 100644 --- a/src/tools/miri/rust-version +++ b/src/tools/miri/rust-version @@ -1 +1 @@ -ae9465fee3962c0c895959001302999bc48ba6d5 +19dd9535408db0f1ff3d16613619076aef524d19 From 3ca6c4916e44377647fa2191d8763990d5523d1c Mon Sep 17 00:00:00 2001 From: Neven Villani Date: Thu, 31 Aug 2023 20:14:33 +0200 Subject: [PATCH 04/26] Issue of the current model: spurious reads are not possible This occurs because in some interleavings, inserting a spurious read turns a Reserved into Frozen. We show here an exhaustive test (including arbitrary unknown code in two different threads) that makes this issue observable. --- .../borrow_tracker/tree_borrows/exhaustive.rs | 111 +++ .../src/borrow_tracker/tree_borrows/mod.rs | 10 +- .../src/borrow_tracker/tree_borrows/perms.rs | 165 +++-- .../src/borrow_tracker/tree_borrows/tree.rs | 115 +-- .../borrow_tracker/tree_borrows/tree/tests.rs | 658 ++++++++++++++++++ .../reserved/cell-protected-write.stderr | 4 +- 6 files changed, 917 insertions(+), 146 deletions(-) create mode 100644 src/tools/miri/src/borrow_tracker/tree_borrows/exhaustive.rs create mode 100644 src/tools/miri/src/borrow_tracker/tree_borrows/tree/tests.rs diff --git a/src/tools/miri/src/borrow_tracker/tree_borrows/exhaustive.rs b/src/tools/miri/src/borrow_tracker/tree_borrows/exhaustive.rs new file mode 100644 index 0000000000000..daf3590358fd9 --- /dev/null +++ b/src/tools/miri/src/borrow_tracker/tree_borrows/exhaustive.rs @@ -0,0 +1,111 @@ +//! Exhaustive testing utilities. +//! (These are used in Tree Borrows `#[test]`s for thorough verification +//! of the behavior of the state machine of permissions, +//! but the contents of this file are extremely generic) +#![cfg(test)] + +pub trait Exhaustive: Sized { + fn exhaustive() -> Box>; +} + +macro_rules! precondition { + ($cond:expr) => { + if !$cond { + continue; + } + }; +} +pub(crate) use precondition; + +// Trivial impls of `Exhaustive` for the standard types with 0, 1 and 2 elements respectively. + +impl Exhaustive for ! { + fn exhaustive() -> Box> { + Box::new(std::iter::empty()) + } +} + +impl Exhaustive for () { + fn exhaustive() -> Box> { + Box::new(std::iter::once(())) + } +} + +impl Exhaustive for bool { + fn exhaustive() -> Box> { + Box::new(vec![true, false].into_iter()) + } +} + +// Some container impls for `Exhaustive`. + +impl Exhaustive for Option +where + T: Exhaustive + 'static, +{ + fn exhaustive() -> Box> { + Box::new(std::iter::once(None).chain(T::exhaustive().map(Some))) + } +} + +impl Exhaustive for (T1, T2) +where + T1: Exhaustive + Clone + 'static, + T2: Exhaustive + 'static, +{ + fn exhaustive() -> Box> { + Box::new(T1::exhaustive().flat_map(|t1| T2::exhaustive().map(move |t2| (t1.clone(), t2)))) + } +} + +impl Exhaustive for [T; 1] +where + T: Exhaustive + 'static, +{ + fn exhaustive() -> Box> { + Box::new(T::exhaustive().map(|t| [t])) + } +} + +impl Exhaustive for [T; 2] +where + T: Exhaustive + Clone + 'static, +{ + fn exhaustive() -> Box> { + Box::new(T::exhaustive().flat_map(|t1| T::exhaustive().map(move |t2| [t1.clone(), t2]))) + } +} + +impl Exhaustive for [T; 3] +where + T: Exhaustive + Clone + 'static, +{ + fn exhaustive() -> Box> { + Box::new( + <[T; 2]>::exhaustive() + .flat_map(|[t1, t2]| T::exhaustive().map(move |t3| [t1.clone(), t2.clone(), t3])), + ) + } +} + +impl Exhaustive for [T; 4] +where + T: Exhaustive + Clone + 'static, +{ + fn exhaustive() -> Box> { + Box::new(<[T; 2]>::exhaustive().flat_map(|[t1, t2]| { + <[T; 2]>::exhaustive().map(move |[t3, t4]| [t1.clone(), t2.clone(), t3, t4]) + })) + } +} + +impl Exhaustive for [T; 5] +where + T: Exhaustive + Clone + 'static, +{ + fn exhaustive() -> Box> { + Box::new(<[T; 2]>::exhaustive().flat_map(|[t1, t2]| { + <[T; 3]>::exhaustive().map(move |[t3, t4, t5]| [t1.clone(), t2.clone(), t3, t4, t5]) + })) + } +} diff --git a/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs b/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs index 8dd925a0ad80a..924e0de38c9da 100644 --- a/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs +++ b/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs @@ -15,6 +15,10 @@ pub mod diagnostics; mod perms; mod tree; mod unimap; + +#[cfg(test)] +mod exhaustive; + use perms::Permission; pub use tree::Tree; @@ -271,6 +275,10 @@ trait EvalContextPrivExt<'mir: 'ecx, 'tcx: 'mir, 'ecx>: crate::MiriInterpCxExt<' diagnostics::AccessCause::Reborrow, )?; // Record the parent-child pair in the tree. + // FIXME: We should eventually ensure that the following `assert` holds, because + // some "exhaustive" tests consider only the initial configurations that satisfy it. + // The culprit is `Permission::new_active` in `tb_protect_place`. + //assert!(new_perm.initial_state.is_initial()); tree_borrows.new_child(orig_tag, new_tag, new_perm.initial_state, range, span)?; drop(tree_borrows); @@ -283,7 +291,7 @@ trait EvalContextPrivExt<'mir: 'ecx, 'tcx: 'mir, 'ecx>: crate::MiriInterpCxExt<' // interleaving, but wether UB happens can depend on whether a write occurs in the // future... let is_write = new_perm.initial_state.is_active() - || (new_perm.initial_state.is_reserved() && new_perm.protector.is_some()); + || (new_perm.initial_state.is_reserved(None) && new_perm.protector.is_some()); if is_write { // Need to get mutable access to alloc_extra. // (Cannot always do this as we can do read-only reborrowing on read-only allocations.) diff --git a/src/tools/miri/src/borrow_tracker/tree_borrows/perms.rs b/src/tools/miri/src/borrow_tracker/tree_borrows/perms.rs index dab216bc5b1c5..16bad13e28f34 100644 --- a/src/tools/miri/src/borrow_tracker/tree_borrows/perms.rs +++ b/src/tools/miri/src/borrow_tracker/tree_borrows/perms.rs @@ -6,7 +6,7 @@ use crate::borrow_tracker::tree_borrows::tree::AccessRelatedness; use crate::borrow_tracker::AccessKind; /// The activation states of a pointer. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] enum PermissionPriv { /// represents: a local reference that has not yet been written to; /// allows: child reads, foreign reads, foreign writes if type is freeze; @@ -48,6 +48,13 @@ impl PartialOrd for PermissionPriv { } } +impl PermissionPriv { + /// Check if `self` can be the initial state of a pointer. + fn is_initial(&self) -> bool { + matches!(self, Reserved { ty_is_freeze: _ } | Frozen) + } +} + /// This module controls how each permission individually reacts to an access. /// Although these functions take `protected` as an argument, this is NOT because /// we check protector violations here, but because some permissions behave differently @@ -66,7 +73,6 @@ mod transition { /// A non-child node was read-accessed: noop on non-protected Reserved, advance to Frozen otherwise. fn foreign_read(state: PermissionPriv, protected: bool) -> Option { - use Option::*; Some(match state { // Non-writeable states just ignore foreign reads. non_writeable @ (Frozen | Disabled) => non_writeable, @@ -134,7 +140,7 @@ mod transition { /// Public interface to the state machine that controls read-write permissions. /// This is the "private `enum`" pattern. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd)] pub struct Permission { inner: PermissionPriv, } @@ -147,6 +153,11 @@ pub struct PermTransition { } impl Permission { + /// Check if `self` can be the initial state of a pointer. + pub fn is_initial(&self) -> bool { + self.inner.is_initial() + } + /// Default initial permission of the root of a new tree. pub fn new_active() -> Self { Self { inner: Active } @@ -166,14 +177,24 @@ impl Permission { matches!(self.inner, Active) } - pub fn is_reserved(self) -> bool { - matches!(self.inner, Reserved { .. }) + // Leave `interior_mut` as `None` if interior mutability + // is irrelevant. + pub fn is_reserved(self, interior_mut: Option) -> bool { + match (interior_mut, self.inner) { + (None, Reserved { .. }) => true, + (Some(b1), Reserved { ty_is_freeze: b2 }) => b1 == b2, + _ => false, + } } pub fn is_frozen(self) -> bool { matches!(self.inner, Frozen) } + pub fn is_disabled(self) -> bool { + matches!(self.inner, Disabled) + } + /// Apply the transition to the inner PermissionPriv. pub fn perform_access( kind: AccessKind, @@ -229,7 +250,8 @@ pub mod diagnostics { f, "{}", match self { - Reserved { .. } => "Reserved", + Reserved { ty_is_freeze: true } => "Reserved", + Reserved { ty_is_freeze: false } => "Reserved (interior mutable)", Active => "Active", Frozen => "Frozen", Disabled => "Disabled", @@ -397,43 +419,35 @@ pub mod diagnostics { #[cfg(test)] mod propagation_optimization_checks { pub use super::*; - - mod util { - pub use super::*; - impl PermissionPriv { - /// Enumerate all states - pub fn all() -> impl Iterator { - vec![ - Active, - Reserved { ty_is_freeze: true }, - Reserved { ty_is_freeze: false }, - Frozen, - Disabled, - ] - .into_iter() - } + use crate::borrow_tracker::tree_borrows::exhaustive::{precondition, Exhaustive}; + + impl Exhaustive for PermissionPriv { + fn exhaustive() -> Box> { + Box::new( + vec![Active, Frozen, Disabled] + .into_iter() + .chain(bool::exhaustive().map(|ty_is_freeze| Reserved { ty_is_freeze })), + ) } + } - impl Permission { - pub fn all() -> impl Iterator { - PermissionPriv::all().map(|inner| Self { inner }) - } + impl Exhaustive for Permission { + fn exhaustive() -> Box> { + Box::new(PermissionPriv::exhaustive().map(|inner| Self { inner })) } + } - impl AccessKind { - /// Enumerate all AccessKind. - pub fn all() -> impl Iterator { - use AccessKind::*; - [Read, Write].into_iter() - } + impl Exhaustive for AccessKind { + fn exhaustive() -> Box> { + use AccessKind::*; + Box::new(vec![Read, Write].into_iter()) } + } - impl AccessRelatedness { - /// Enumerate all relative positions - pub fn all() -> impl Iterator { - use AccessRelatedness::*; - [This, StrictChildAccess, AncestorAccess, DistantAccess].into_iter() - } + impl Exhaustive for AccessRelatedness { + fn exhaustive() -> Box> { + use AccessRelatedness::*; + Box::new(vec![This, StrictChildAccess, AncestorAccess, DistantAccess].into_iter()) } } @@ -442,16 +456,22 @@ mod propagation_optimization_checks { // Even if the protector has disappeared. fn all_transitions_idempotent() { use transition::*; - for old in PermissionPriv::all() { - for (old_protected, new_protected) in [(true, true), (true, false), (false, false)] { - for access in AccessKind::all() { - for rel_pos in AccessRelatedness::all() { - if let Some(new) = perform_access(access, rel_pos, old, old_protected) { - assert_eq!( - new, - perform_access(access, rel_pos, new, new_protected).unwrap() - ); - } + for old in PermissionPriv::exhaustive() { + for (old_protected, new_protected) in <(bool, bool)>::exhaustive() { + // Protector can't appear out of nowhere: either the permission was + // created with a protector (`old_protected = true`) and it then may + // or may not lose it (`new_protected = false`, resp. `new_protected = true`), + // or it didn't have one upon creation and never will + // (`old_protected = new_protected = false`). + // We thus eliminate from this test and all other tests + // the case where the tag is initially unprotected and later becomes protected. + precondition!(old_protected || !new_protected); + for (access, rel_pos) in <(AccessKind, AccessRelatedness)>::exhaustive() { + if let Some(new) = perform_access(access, rel_pos, old, old_protected) { + assert_eq!( + new, + perform_access(access, rel_pos, new, new_protected).unwrap() + ); } } } @@ -459,13 +479,16 @@ mod propagation_optimization_checks { } #[test] + #[rustfmt::skip] fn foreign_read_is_noop_after_foreign_write() { use transition::*; let old_access = AccessKind::Write; let new_access = AccessKind::Read; - for old in PermissionPriv::all() { - for (old_protected, new_protected) in [(true, true), (true, false), (false, false)] { - for rel_pos in AccessRelatedness::all().filter(|rel| rel.is_foreign()) { + for old in PermissionPriv::exhaustive() { + for [old_protected, new_protected] in <[bool; 2]>::exhaustive() { + precondition!(old_protected || !new_protected); + for rel_pos in AccessRelatedness::exhaustive() { + precondition!(rel_pos.is_foreign()); if let Some(new) = perform_access(old_access, rel_pos, old, old_protected) { assert_eq!( new, @@ -480,18 +503,44 @@ mod propagation_optimization_checks { #[test] // Check that all transitions are consistent with the order on PermissionPriv, // i.e. Reserved -> Active -> Frozen -> Disabled - fn access_transitions_progress_increasing() { - use transition::*; - for old in PermissionPriv::all() { - for protected in [true, false] { - for access in AccessKind::all() { - for rel_pos in AccessRelatedness::all() { - if let Some(new) = perform_access(access, rel_pos, old, protected) { - assert!(old <= new); + fn permissionpriv_partialord_is_reachability() { + let reach = { + let mut reach = rustc_data_structures::fx::FxHashSet::default(); + // One-step transitions + reflexivity + for start in PermissionPriv::exhaustive() { + reach.insert((start, start)); + for (access, rel) in <(AccessKind, AccessRelatedness)>::exhaustive() { + for prot in bool::exhaustive() { + if let Some(end) = transition::perform_access(access, rel, start, prot) { + reach.insert((start, end)); } } } } + // Transitive closure + let mut finished = false; + while !finished { + finished = true; + for [start, mid, end] in <[PermissionPriv; 3]>::exhaustive() { + if reach.contains(&(start, mid)) + && reach.contains(&(mid, end)) + && !reach.contains(&(start, end)) + { + finished = false; + reach.insert((start, end)); + } + } + } + reach + }; + // Check that it matches `<` + for [p1, p2] in <[PermissionPriv; 2]>::exhaustive() { + let le12 = p1 <= p2; + let reach12 = reach.contains(&(p1, p2)); + assert!( + le12 == reach12, + "`{p1} reach {p2}` ({reach12}) does not match `{p1} <= {p2}` ({le12})" + ); } } } diff --git a/src/tools/miri/src/borrow_tracker/tree_borrows/tree.rs b/src/tools/miri/src/borrow_tracker/tree_borrows/tree.rs index 5abf13229bbcc..eb466f89a8466 100644 --- a/src/tools/miri/src/borrow_tracker/tree_borrows/tree.rs +++ b/src/tools/miri/src/borrow_tracker/tree_borrows/tree.rs @@ -10,6 +10,8 @@ //! and the relative position of the access; //! - idempotency properties asserted in `perms.rs` (for optimizations) +use std::fmt; + use smallvec::SmallVec; use rustc_const_eval::interpret::InterpResult; @@ -26,8 +28,10 @@ use crate::borrow_tracker::tree_borrows::{ use crate::borrow_tracker::{AccessKind, GlobalState, ProtectorKind}; use crate::*; +mod tests; + /// Data for a single *location*. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub(super) struct LocationState { /// A location is initialized when it is child-accessed for the first time (and the initial /// retag initializes the location for the range covered by the type), and it then stays @@ -65,10 +69,25 @@ impl LocationState { self } + /// Check if the location has been initialized, i.e. if it has + /// ever been accessed through a child pointer. pub fn is_initialized(&self) -> bool { self.initialized } + /// Check if the state can exist as the initial permission of a pointer. + /// + /// Do not confuse with `is_initialized`, the two are almost orthogonal + /// as apart from `Active` which is not initial and must be initialized, + /// any other permission can have an arbitrary combination of being + /// initial/initialized. + /// FIXME: when the corresponding `assert` in `tree_borrows/mod.rs` finally + /// passes and can be uncommented, remove this `#[allow(dead_code)]`. + #[cfg_attr(not(test), allow(dead_code))] + pub fn is_initial(&self) -> bool { + self.permission.is_initial() + } + pub fn permission(&self) -> Permission { self.permission } @@ -172,6 +191,16 @@ impl LocationState { } } +impl fmt::Display for LocationState { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.permission)?; + if !self.initialized { + write!(f, "?")?; + } + Ok(()) + } +} + /// Tree structure with both parents and children since we want to be /// able to traverse the tree efficiently in both directions. #[derive(Clone, Debug)] @@ -665,87 +694,3 @@ impl AccessRelatedness { } } } - -#[cfg(test)] -mod commutation_tests { - use super::*; - impl LocationState { - pub fn all() -> impl Iterator { - // We keep `latest_foreign_access` at `None` as that's just a cache. - Permission::all().flat_map(|permission| { - [false, true].into_iter().map(move |initialized| { - Self { permission, initialized, latest_foreign_access: None } - }) - }) - } - } - - #[test] - #[rustfmt::skip] - // Exhaustive check that for any starting configuration loc, - // for any two read accesses r1 and r2, if `loc + r1 + r2` is not UB - // and results in `loc'`, then `loc + r2 + r1` is also not UB and results - // in the same final state `loc'`. - // This lets us justify arbitrary read-read reorderings. - fn all_read_accesses_commute() { - let kind = AccessKind::Read; - // Two of the four combinations of `AccessRelatedness` are trivial, - // but we might as well check them all. - for rel1 in AccessRelatedness::all() { - for rel2 in AccessRelatedness::all() { - // Any protector state works, but we can't move reads across function boundaries - // so the two read accesses occur under the same protector. - for &protected in &[true, false] { - for loc in LocationState::all() { - // Apply 1 then 2. Failure here means that there is UB in the source - // and we skip the check in the target. - let mut loc12 = loc; - let Ok(_) = loc12.perform_access(kind, rel1, protected) else { continue }; - let Ok(_) = loc12.perform_access(kind, rel2, protected) else { continue }; - - // If 1 followed by 2 succeeded, then 2 followed by 1 must also succeed... - let mut loc21 = loc; - loc21.perform_access(kind, rel2, protected).unwrap(); - loc21.perform_access(kind, rel1, protected).unwrap(); - - // ... and produce the same final result. - assert_eq!( - loc12, loc21, - "Read accesses {:?} followed by {:?} do not commute !", - rel1, rel2 - ); - } - } - } - } - } - - #[test] - #[rustfmt::skip] - // Ensure that of 2 accesses happen, one foreign and one a child, and we are protected, that we - // get UB unless they are both reads. - fn protected_enforces_noalias() { - for rel1 in AccessRelatedness::all() { - for rel2 in AccessRelatedness::all() { - if rel1.is_foreign() == rel2.is_foreign() { - // We want to check pairs of accesses where one is foreign and one is not. - continue; - } - for kind1 in AccessKind::all() { - for kind2 in AccessKind::all() { - for mut state in LocationState::all() { - let protected = true; - let Ok(_) = state.perform_access(kind1, rel1, protected) else { continue }; - let Ok(_) = state.perform_access(kind2, rel2, protected) else { continue }; - // If these were both allowed, it must have been two reads. - assert!( - kind1 == AccessKind::Read && kind2 == AccessKind::Read, - "failed to enforce noalias between two accesses that are not both reads" - ); - } - } - } - } - } - } -} diff --git a/src/tools/miri/src/borrow_tracker/tree_borrows/tree/tests.rs b/src/tools/miri/src/borrow_tracker/tree_borrows/tree/tests.rs new file mode 100644 index 0000000000000..11c06d0b763f3 --- /dev/null +++ b/src/tools/miri/src/borrow_tracker/tree_borrows/tree/tests.rs @@ -0,0 +1,658 @@ +//! Tests for the tree +#![cfg(test)] + +use super::*; +use crate::borrow_tracker::tree_borrows::exhaustive::{precondition, Exhaustive}; +use std::fmt; + +impl Exhaustive for LocationState { + fn exhaustive() -> Box> { + // We keep `latest_foreign_access` at `None` as that's just a cache. + Box::new(<(Permission, bool)>::exhaustive().map(|(permission, initialized)| { + Self { permission, initialized, latest_foreign_access: None } + })) + } +} + +#[test] +#[rustfmt::skip] +// Exhaustive check that for any starting configuration loc, +// for any two read accesses r1 and r2, if `loc + r1 + r2` is not UB +// and results in `loc'`, then `loc + r2 + r1` is also not UB and results +// in the same final state `loc'`. +// This lets us justify arbitrary read-read reorderings. +fn all_read_accesses_commute() { + let kind = AccessKind::Read; + // Two of the four combinations of `AccessRelatedness` are trivial, + // but we might as well check them all. + for [rel1, rel2] in <[AccessRelatedness; 2]>::exhaustive() { + // Any protector state works, but we can't move reads across function boundaries + // so the two read accesses occur under the same protector. + for protected in bool::exhaustive() { + for loc in LocationState::exhaustive() { + // Apply 1 then 2. Failure here means that there is UB in the source + // and we skip the check in the target. + let mut loc12 = loc; + precondition!(loc12.perform_access(kind, rel1, protected).is_ok()); + precondition!(loc12.perform_access(kind, rel2, protected).is_ok()); + + // If 1 followed by 2 succeeded, then 2 followed by 1 must also succeed... + let mut loc21 = loc; + loc21.perform_access(kind, rel2, protected).unwrap(); + loc21.perform_access(kind, rel1, protected).unwrap(); + + // ... and produce the same final result. + assert_eq!( + loc12, loc21, + "Read accesses {:?} followed by {:?} do not commute !", + rel1, rel2 + ); + } + } + } +} + +#[test] +#[rustfmt::skip] +// Ensure that of 2 accesses happen, one foreign and one a child, and we are protected, that we +// get UB unless they are both reads. +fn protected_enforces_noalias() { + for [rel1, rel2] in <[AccessRelatedness; 2]>::exhaustive() { + // We want to check pairs of accesses where one is foreign and one is not. + precondition!(rel1.is_foreign() != rel2.is_foreign()); + for [kind1, kind2] in <[AccessKind; 2]>::exhaustive() { + for mut state in LocationState::exhaustive() { + let protected = true; + precondition!(state.perform_access(kind1, rel1, protected).is_ok()); + precondition!(state.perform_access(kind2, rel2, protected).is_ok()); + // If these were both allowed, it must have been two reads. + assert!( + kind1 == AccessKind::Read && kind2 == AccessKind::Read, + "failed to enforce noalias between two accesses that are not both reads" + ); + } + } + } +} + +/// We are going to exhaustively test the possibily of inserting +/// a spurious read in some code. +/// +/// We choose some pointer `x` through which we want a spurious read to be inserted. +/// `x` must thus be reborrowed, not have any children, and initially start protected. +/// +/// To check if inserting a spurious read is possible, we observe the behavior +/// of some pointer `y` different from `x` (possibly from a different thread, thus +/// the protectors on `x` and `y` are not necessarily well-nested). +/// It must be the case that no matter the context, the insertion of a spurious read +/// through `x` does not introduce UB in code that did not already have UB. +/// +/// Testing this will need some setup to simulate the evolution of the permissions +/// of `x` and `y` under arbitrary code. This arbitrary code of course includes +/// read and write accesses through `x` and `y`, but it must also consider +/// the less obvious: +/// - accesses through pointers that are *neither* `x` nor `y`, +/// - retags of `y` that change its relative position to `x`. +/// +/// +/// The general code pattern thus looks like +/// [thread 1] || [thread 2] +/// || y exists +/// retag x (protect) || +/// arbitrary code +/// read/write x/y/other +/// or retag y +/// or unprotect y +/// || +/// arbitrary code +/// read/write x/y/other +/// or retag y +/// or unprotect y +/// or unprotect x +/// +/// `x` must still be protected at the moment the spurious read is inserted +/// because spurious reads are impossible in general on unprotected tags. +mod spurious_read { + use super::*; + + /// Accessed pointer. + #[derive(Debug, Clone, Copy, PartialEq, Eq)] + enum PtrSelector { + X, + Y, + Other, + } + + /// Relative position of `x` and `y`. + /// `y` cannot be a child of `x` because `x` gets retagged as the first + /// thing in the pattern, so it cannot have children. + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] + enum RelPosXY { + MutuallyForeign, + /// X is a child of Y. + XChildY, + } + + impl Exhaustive for PtrSelector { + fn exhaustive() -> Box> { + use PtrSelector::*; + Box::new(vec![X, Y, Other].into_iter()) + } + } + + impl fmt::Display for PtrSelector { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + PtrSelector::X => write!(f, "x"), + PtrSelector::Y => write!(f, "y"), + PtrSelector::Other => write!(f, "z"), + } + } + } + + impl Exhaustive for RelPosXY { + fn exhaustive() -> Box> { + use RelPosXY::*; + Box::new(vec![MutuallyForeign, XChildY].into_iter()) + } + } + + impl fmt::Display for RelPosXY { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + RelPosXY::MutuallyForeign => write!(f, "x and y are mutually foreign"), + RelPosXY::XChildY => write!(f, "x is a child of y"), + } + } + } + + impl PtrSelector { + /// Knowing the relative position of `x` to `y`, determine the relative + /// position of the accessed pointer defined by `self` relative to each `x` + /// and `y`. + /// + /// The output is not necessarily well-defined in general, but it + /// is unique when considered up to equivalence by `AccessRelatedness::is_foreign` + /// (e.g. having `RelPosXY::XChildY` and `PtrSelector::Other`, strictly + /// speaking it is impossible to determine if `Other` is a `DistantAccess` + /// or an `AncestorAccess` relative to `y`, but it doesn't really matter + /// because `DistantAccess.is_foreign() == AncestorAccess.is_foreign()`). + fn rel_pair(self, xy_rel: RelPosXY) -> (AccessRelatedness, AccessRelatedness) { + use AccessRelatedness::*; + match xy_rel { + RelPosXY::MutuallyForeign => + match self { + PtrSelector::X => (This, DistantAccess), + PtrSelector::Y => (DistantAccess, This), + PtrSelector::Other => (DistantAccess, DistantAccess), + }, + RelPosXY::XChildY => + match self { + PtrSelector::X => (This, StrictChildAccess), + PtrSelector::Y => (AncestorAccess, This), + PtrSelector::Other => (DistantAccess, DistantAccess), + }, + } + } + } + + /// Arbitrary access parametrized by the relative position of `x` and `y` + /// to each other. + #[derive(Debug, Clone, Copy)] + struct TestAccess { + ptr: PtrSelector, + kind: AccessKind, + } + + impl Exhaustive for TestAccess { + fn exhaustive() -> Box> { + Box::new( + <(PtrSelector, AccessKind)>::exhaustive() + .map(|(ptr, kind)| TestAccess { ptr, kind }), + ) + } + } + + impl fmt::Display for TestAccess { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let kind_text = match self.kind { + AccessKind::Read => "read", + AccessKind::Write => "write", + }; + write!(f, "{kind_text} {}", self.ptr) + } + } + + type AllowRet = (); + type NoRet = !; + #[derive(Clone)] + /// Events relevant to the evolution of 2 pointers are + /// - any access to the same location + /// - end of one of them being protected + /// - a retag that would change their relative position + /// The type `TestEvent` models these kinds of events. + /// + /// In order to prevent `x` or `y` from losing their protector, + /// choose a type `RetX` or `RetY` that is not inhabited. + /// e.g. + /// - `TestEvent` is any event including end of protector on either `x` or `y` + /// - `TestEvent` is any access + /// - `TestEvent` allows for `y` to lose its protector but not `x` + enum TestEvent { + Access(TestAccess), + RetX(RetX), + RetY(RetY), + /// The inner `LocStateProt` must be an initial state (as per the `is_initial` function) + RetagY(LocStateProt), + } + + impl Exhaustive for TestEvent + where + RetX: Exhaustive + 'static, + RetY: Exhaustive + 'static, + { + fn exhaustive() -> Box> { + Box::new( + ::exhaustive() + .map(|acc| Self::Access(acc)) + .chain(RetX::exhaustive().map(|retx| Self::RetX(retx))) + .chain(RetY::exhaustive().map(|rety| Self::RetY(rety))) + .chain( + LocStateProt::exhaustive() + .filter_map(|s| s.is_initial().then_some(Self::RetagY(s))), + ), + ) + } + } + + impl fmt::Display for TestEvent { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + TestEvent::Access(acc) => write!(f, "{acc}"), + // The fields of the `Ret` variants just serve to make them + // impossible to instanciate via the `RetX = NoRet` type; we can + // always ignore their value. + TestEvent::RetX(_) => write!(f, "ret x"), + TestEvent::RetY(_) => write!(f, "ret y"), + TestEvent::RetagY(newp) => write!(f, "retag y ({newp})"), + } + } + } + + #[derive(Clone, PartialEq, Eq, Hash)] + /// The state of a pointer on a location, including the protector. + /// It is represented here with the protector bound to the `LocationState` rather + /// than the `Map` as is normally the case, + /// but since all our exhaustive tests look at a single location + /// there's no risk of `prot` for different locations of the same tag getting + /// out of sync. + struct LocStateProt { + state: LocationState, + prot: bool, + } + + impl LocStateProt { + fn is_initial(&self) -> bool { + self.state.is_initial() + } + + fn perform_access(&self, kind: AccessKind, rel: AccessRelatedness) -> Result { + let mut state = self.state; + state.perform_access(kind, rel, self.prot).map_err(|_| ())?; + Ok(Self { state, prot: self.prot }) + } + + fn end_protector(&self) -> Self { + Self { prot: false, state: self.state } + } + } + + impl Exhaustive for LocStateProt { + fn exhaustive() -> Box> { + Box::new( + <(LocationState, bool)>::exhaustive().map(|(state, prot)| Self { state, prot }), + ) + } + } + + impl fmt::Display for LocStateProt { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.state)?; + if self.prot { + write!(f, ", protect")?; + } + Ok(()) + } + } + + #[derive(Clone, PartialEq, Eq, Hash)] + /// The states of two pointers to the same location, + /// and their relationship to each other in the tree. + /// + /// Note that the two states interact: using one pointer can have + /// an impact on the other. + /// This makes `LocStateProtPair` more meaningful than a simple + /// `(LocStateProt, LocStateProt)` where the two states are not guaranteed + /// to be updated at the same time. + /// Some `LocStateProtPair` may be unreachable through normal means + /// such as `x: Active, y: Active` in the case of mutually foreign pointers. + struct LocStateProtPair { + xy_rel: RelPosXY, + x: LocStateProt, + y: LocStateProt, + } + + impl LocStateProtPair { + fn perform_test_access(self, acc: &TestAccess) -> Result { + let (xrel, yrel) = acc.ptr.rel_pair(self.xy_rel); + let x = self.x.perform_access(acc.kind, xrel)?; + let y = self.y.perform_access(acc.kind, yrel)?; + Ok(Self { x, y, ..self }) + } + + /// Perform a read on the given pointer if its state is `initialized`. + /// Must be called just after reborrowing a pointer. + fn read_if_initialized(self, ptr: PtrSelector) -> Result { + let initialized = match ptr { + PtrSelector::X => self.x.state.initialized, + PtrSelector::Y => self.y.state.initialized, + PtrSelector::Other => + panic!( + "the `initialized` status of `PtrSelector::Other` is unknown, do not pass it to `read_if_initialized`" + ), + }; + if initialized { + self.perform_test_access(&TestAccess { ptr, kind: AccessKind::Read }) + } else { + Ok(self) + } + } + + fn end_protector_x(self) -> Result { + let x = self.x.end_protector(); + Ok(Self { x, ..self }) + } + + fn end_protector_y(self) -> Result { + let y = self.y.end_protector(); + Ok(Self { y, ..self }) + } + + fn retag_y(self, new_y: LocStateProt) -> Result { + assert!(new_y.is_initial()); + // `xy_rel` changes to "mutually foreign" now: `y` can no longer be a parent of `x`. + Self { y: new_y, xy_rel: RelPosXY::MutuallyForeign, ..self } + .read_if_initialized(PtrSelector::Y) + } + + fn perform_test_event(self, evt: &TestEvent) -> Result { + match evt { + TestEvent::Access(acc) => self.perform_test_access(acc), + // The fields of the `Ret` variants just serve to make them + // impossible to instanciate via the `RetX = NoRet` type; we can + // always ignore their value. + TestEvent::RetX(_) => self.end_protector_x(), + TestEvent::RetY(_) => self.end_protector_y(), + TestEvent::RetagY(newp) => self.retag_y(newp.clone()), + } + } + } + + impl Exhaustive for LocStateProtPair { + fn exhaustive() -> Box> { + Box::new(<[LocStateProt; 2]>::exhaustive().flat_map(|[x, y]| { + RelPosXY::exhaustive() + .map(move |xy_rel| Self { x: x.clone(), y: y.clone(), xy_rel }) + })) + } + } + + impl fmt::Display for LocStateProtPair { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "x:{}, y:{}", self.x, self.y) + } + } + + /// Arbitrary sequence of events, as experienced by two mutually foreign pointers + /// to the same location. + #[derive(Clone)] + struct OpaqueCode { + events: Vec>, + } + + impl fmt::Display for OpaqueCode { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for evt in &self.events { + write!(f, "{evt}; ")?; + } + Ok(()) + } + } + + impl LocStateProtPair { + /// List all sequences of operations that start at `self` and do not cause UB + /// There are no duplicates: all sequences returned lead to distinct final states + /// (though the sequence is not guaranteed to be the shortest possible sequence of events). + /// Yields the states it reaches, and the sequence of operations that got us there. + fn all_states_reachable_via_opaque_code( + self, + ) -> impl Iterator)> + where + RetX: Exhaustive + Clone + 'static, + RetY: Exhaustive + Clone + 'static, + { + // We compute the reachable set of `Self` from `self` by non-UB `OpaqueCode`. + // Configurations are `(reach: Self, code: OpaqueCode)` tuples + // for which `code` applied to `self` returns `Ok(reach)`. + + // Stack of all configurations left to handle. + let mut handle: Vec<(Self, OpaqueCode<_, _>)> = + vec![(self, OpaqueCode { events: Vec::new() })]; + // Code that can be applied to `self`, and final state. + let mut paths: Vec<(Self, OpaqueCode<_, _>)> = Default::default(); + // Already explored states reachable from `self` + let mut seen: FxHashSet = Default::default(); + // This is essentially just computing the transitive closure by `perform_test_event`, + // most of the work lies in remembering the path up to the current state. + while let Some((state, path)) = handle.pop() { + for evt in >::exhaustive() { + if let Ok(next) = state.clone().perform_test_event(&evt) { + if seen.insert(next.clone()) { + let mut evts = path.clone(); + evts.events.push(evt); + paths.push((next.clone(), evts.clone())); + handle.push((next, evts)); + } + } + } + } + paths.into_iter() + } + } + + impl LocStateProtPair { + #[rustfmt::skip] + /// Two states (by convention `self` is the source and `other` is the target) + /// are "distinguishable" if there exists a sequence of instructions + /// that causes UB in the target but not in the source. + /// This implementation simply explores the reachable space + /// by all sequences of `TestEvent`. + /// This function can be instanciated with `RetX` and `RetY` + /// among `NoRet` or `AllowRet` to resp. forbid/allow `x`/`y` to lose their + /// protector. + fn distinguishable(&self, other: &Self) -> bool + where + RetX: Exhaustive + 'static, + RetY: Exhaustive + 'static, + { + if self == other { return false; } + let mut states = vec![(self.clone(), other.clone())]; + let mut seen = FxHashSet::default(); + while let Some(state) = states.pop() { + if !seen.insert(state.clone()) { continue; }; + let (source, target) = state; + for evt in >::exhaustive() { + // Generate successor states through events (accesses and protector ends) + let Ok(new_source) = source.clone().perform_test_event(&evt) else { continue; }; + let Ok(new_target) = target.clone().perform_test_event(&evt) else { return true; }; + if new_source == new_target { continue; } + states.push((new_source, new_target)); + } + } + false + } + } + + #[test] + #[should_panic] + // This is why `Reserved -> Frozen` on foreign read for protected references + // prevents the insertion of spurious reads: the transition can cause UB in the target + // later down the line. + fn reserved_frozen_protected_distinguishable() { + let source = LocStateProtPair { + xy_rel: RelPosXY::MutuallyForeign, + x: LocStateProt { + state: LocationState::new(Permission::new_frozen()).with_access(), + prot: true, + }, + y: LocStateProt { + state: LocationState::new(Permission::new_reserved(false)), + prot: true, + }, + }; + let acc = TestAccess { ptr: PtrSelector::X, kind: AccessKind::Read }; + let target = source.clone().perform_test_access(&acc).unwrap(); + assert!(source.y.state.permission.is_reserved(None)); + assert!(target.y.state.permission.is_frozen()); + assert!(!source.distinguishable::<(), ()>(&target)) + } + + #[derive(Clone, Debug)] + struct Pattern { + /// The relative position of `x` and `y` at the beginning of the arbitrary + /// code (i.e., just after `x` got created). + /// Might change during the execution if said arbitrary code contains any `retag y`. + xy_rel: RelPosXY, + /// Permission that `x` will be created as + /// (always protected until a possible `ret x` in the second phase). + /// This one should be initial (as per `is_initial`). + x_retag_perm: LocationState, + /// Permission that `y` has at the beginning of the pattern. + /// Can be any state, not necessarily initial + /// (since `y` exists already before the pattern starts). + /// This state might be reset during the execution if the opaque code + /// contains any `retag y`, but only to an initial state this time. + y_current_perm: LocationState, + /// Whether `y` starts with a protector. + /// Might change if the opaque code contains any `ret y`. + y_protected: bool, + } + + impl Exhaustive for Pattern { + fn exhaustive() -> Box> { + let mut v = Vec::new(); + for xy_rel in RelPosXY::exhaustive() { + for (x_retag_perm, y_current_perm) in <(LocationState, LocationState)>::exhaustive() + { + // We can only do spurious reads for initialized locations anyway. + precondition!(x_retag_perm.initialized); + // And `x` just got retagged, so it must be initial. + precondition!(x_retag_perm.permission.is_initial()); + for y_protected in bool::exhaustive() { + v.push(Pattern { xy_rel, x_retag_perm, y_current_perm, y_protected }); + } + } + } + Box::new(v.into_iter()) + } + } + + impl fmt::Display for Pattern { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let (x, y) = self.retag_permissions(); + write!(f, "{}; ", self.xy_rel)?; + write!(f, "y: ({}); ", y,)?; + write!(f, "retag x ({}); ", x)?; + + write!(f, "; ;")?; + Ok(()) + } + } + + impl Pattern { + /// Return the permission that `y` starts as, and the permission that we + /// will retag `x` with. + /// This does not yet include a possible read-on-reborrow through `x`. + fn retag_permissions(&self) -> (LocStateProt, LocStateProt) { + let x = LocStateProt { state: self.x_retag_perm, prot: true }; + let y = LocStateProt { state: self.y_current_perm, prot: self.y_protected }; + (x, y) + } + + /// State that the pattern deterministically produces immediately after + /// the retag of `x`. + fn initial_state(&self) -> Result { + let (x, y) = self.retag_permissions(); + let state = LocStateProtPair { xy_rel: self.xy_rel, x, y }; + state.read_if_initialized(PtrSelector::X) + } + } + + #[test] + #[should_panic] + /// For each of the patterns described above, execute it once + /// as-is, and once with a spurious read inserted. Report any UB + /// in the target but not in the source. + fn test_all_patterns() { + let mut ok = 0; + let mut err = 0; + for pat in Pattern::exhaustive() { + let Ok(initial_source) = pat.initial_state() else { + // Failed to retag `x` in the source (e.g. `y` was protected Active) + continue; + }; + // `x` must stay protected, but the function protecting `y` might return here + for (final_source, opaque) in + initial_source.all_states_reachable_via_opaque_code::() + { + // Both executions are identical up to here. + // Now we do nothing in the source and in the target we do a spurious read. + // Then we check if the resulting states are distinguishable. + let distinguishable = { + assert!(final_source.x.prot); + let spurious_read = TestAccess { ptr: PtrSelector::X, kind: AccessKind::Read }; + if let Ok(final_target) = + final_source.clone().perform_test_access(&spurious_read) + { + // Only after the spurious read has been executed can `x` lose its + // protector. + final_source + .distinguishable::(&final_target) + .then_some(format!("{}", final_target)) + } else { + Some(format!("UB")) + } + }; + if let Some(final_target) = distinguishable { + eprintln!( + "For pattern '{}', inserting a spurious read through x makes the final state '{}' instead of '{}' which is observable", + pat, final_target, final_source + ); + eprintln!(" (arbitrary code instanciated with '{}')", opaque); + err += 1; + // We found an instanciation of the opaque code that makes this Pattern + // fail, we don't really need to check the rest. + break; + } + ok += 1; + } + } + if err > 0 { + panic!( + "Test failed after {}/{} patterns had UB in the target but not the source", + err, + ok + err + ) + } + } +} diff --git a/src/tools/miri/tests/fail/tree_borrows/reserved/cell-protected-write.stderr b/src/tools/miri/tests/fail/tree_borrows/reserved/cell-protected-write.stderr index 769769d957dbe..998ab956e1af5 100644 --- a/src/tools/miri/tests/fail/tree_borrows/reserved/cell-protected-write.stderr +++ b/src/tools/miri/tests/fail/tree_borrows/reserved/cell-protected-write.stderr @@ -16,14 +16,14 @@ LL | *y = 1; | = help: this indicates a potential bug in the program: it performed an invalid operation, but the Tree Borrows rules it violated are still experimental = help: the accessed tag (y, callee:y, caller:y) is foreign to the protected tag (callee:x) (i.e., it is not a child) - = help: this foreign write access would cause the protected tag (callee:x) (currently Reserved) to become Disabled + = help: this foreign write access would cause the protected tag (callee:x) (currently Reserved (interior mutable)) to become Disabled = help: protected tags must never be Disabled help: the accessed tag was created here --> $DIR/cell-protected-write.rs:LL:CC | LL | let y = (&mut *n).get(); | ^^^^^^^^^ -help: the protected tag was created here, in the initial state Reserved +help: the protected tag was created here, in the initial state Reserved (interior mutable) --> $DIR/cell-protected-write.rs:LL:CC | LL | unsafe fn write_second(x: &mut UnsafeCell, y: *mut u8) { From 4b865b7d1aa4ffe2e760adfdc9744d6a656ba96a Mon Sep 17 00:00:00 2001 From: The Miri Conjob Bot Date: Thu, 21 Sep 2023 05:28:08 +0000 Subject: [PATCH 05/26] Preparing for merge from rustc --- src/tools/miri/rust-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/miri/rust-version b/src/tools/miri/rust-version index a2030c1c34322..f6db58695fbc6 100644 --- a/src/tools/miri/rust-version +++ b/src/tools/miri/rust-version @@ -1 +1 @@ -19dd9535408db0f1ff3d16613619076aef524d19 +4fda889bf8735755573b27e6116ce025f3ded5f9 From e6cd29d06e48186562eefe6007854ca9776e6e8f Mon Sep 17 00:00:00 2001 From: The Miri Conjob Bot Date: Thu, 21 Sep 2023 05:40:21 +0000 Subject: [PATCH 06/26] fmt --- src/tools/miri/src/shims/backtrace.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/tools/miri/src/shims/backtrace.rs b/src/tools/miri/src/shims/backtrace.rs index 836cab3ac9da8..ee2edd462d19e 100644 --- a/src/tools/miri/src/shims/backtrace.rs +++ b/src/tools/miri/src/shims/backtrace.rs @@ -88,10 +88,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { this.write_pointer(ptr, &place)?; } - this.write_immediate( - Immediate::new_slice(alloc.ptr(), len, this), - dest, - )?; + this.write_immediate(Immediate::new_slice(alloc.ptr(), len, this), dest)?; } // storage for pointers is allocated by the caller 1 => { From 96a85d67fd0e986ba8f9ee2207f3c916f46b4bb1 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Thu, 21 Sep 2023 07:46:41 +0200 Subject: [PATCH 07/26] update lockfile --- src/tools/miri/Cargo.lock | 58 +++++++++++++++++++++++++++++++-------- 1 file changed, 47 insertions(+), 11 deletions(-) diff --git a/src/tools/miri/Cargo.lock b/src/tools/miri/Cargo.lock index ca5b6d225154e..8cd2df8b5c585 100644 --- a/src/tools/miri/Cargo.lock +++ b/src/tools/miri/Cargo.lock @@ -70,6 +70,12 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "bitflags" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635" + [[package]] name = "bstr" version = "1.4.0" @@ -201,12 +207,12 @@ checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" [[package]] name = "env_logger" -version = "0.9.3" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a12e6657c4c97ebab115a42dcee77225f7f482cdd841cf7088c657a42e9e00e7" +checksum = "85cdab6a89accf66733ad5a1693a4dcced6aeff64602b634530dd73c1f3ee9f0" dependencies = [ - "atty", "humantime", + "is-terminal", "log", "regex", "termcolor", @@ -316,6 +322,17 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "is-terminal" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b" +dependencies = [ + "hermit-abi 0.3.1", + "rustix 0.38.14", + "windows-sys 0.48.0", +] + [[package]] name = "itoa" version = "1.0.6" @@ -330,9 +347,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.142" +version = "0.2.148" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a987beff54b60ffa6d51982e1aa1146bc42f19bd26be28b0586f252fccf5317" +checksum = "9cdc71e17332e86d2e1d38c1f99edcb6288ee11b815fb1a4b049eaa2114d369b" [[package]] name = "libffi" @@ -369,6 +386,12 @@ version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ece97ea872ece730aed82664c424eb4c8291e1ff2480247ccf7409044bc6479f" +[[package]] +name = "linux-raw-sys" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a9bad9f94746442c783ca431b22403b519cd7fbeed0533fdd6328b2f2212128" + [[package]] name = "lock_api" version = "0.4.9" @@ -454,7 +477,7 @@ version = "0.26.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bfdda3d196821d6af13126e40375cdf7da646a96114af134d5f417a9a1dc8e1a" dependencies = [ - "bitflags", + "bitflags 1.3.2", "cfg-if", "libc", "static_assertions", @@ -581,7 +604,7 @@ version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" dependencies = [ - "bitflags", + "bitflags 1.3.2", ] [[package]] @@ -590,7 +613,7 @@ version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" dependencies = [ - "bitflags", + "bitflags 1.3.2", ] [[package]] @@ -655,11 +678,24 @@ version = "0.37.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acf8729d8542766f1b2cf77eb034d52f40d375bb8b615d0b147089946e16613d" dependencies = [ - "bitflags", + "bitflags 1.3.2", "errno", "io-lifetimes", "libc", - "linux-raw-sys", + "linux-raw-sys 0.3.7", + "windows-sys 0.48.0", +] + +[[package]] +name = "rustix" +version = "0.38.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "747c788e9ce8e92b12cd485c49ddf90723550b654b32508f979b71a7b1ecda4f" +dependencies = [ + "bitflags 2.4.0", + "errno", + "libc", + "linux-raw-sys 0.4.7", "windows-sys 0.48.0", ] @@ -756,7 +792,7 @@ dependencies = [ "cfg-if", "fastrand", "redox_syscall 0.3.5", - "rustix", + "rustix 0.37.19", "windows-sys 0.45.0", ] From a11b1d8b26f99b256b9e14cf540d8c457d6c83fe Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Thu, 21 Sep 2023 07:49:38 +0200 Subject: [PATCH 08/26] disable no-merges check for now --- src/tools/miri/triagebot.toml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/tools/miri/triagebot.toml b/src/tools/miri/triagebot.toml index 8d00cedb769d3..1c520a9c77926 100644 --- a/src/tools/miri/triagebot.toml +++ b/src/tools/miri/triagebot.toml @@ -10,5 +10,6 @@ allow-unauthenticated = [ # Gives us the commands 'ready', 'author', 'blocked' [shortcut] -[no-merges] -exclude_titles = ["Rollup of", "sync from rustc"] +# disabled until https://github.com/rust-lang/triagebot/pull/1720 lands +#[no-merges] +#exclude_titles = ["Rollup of", "sync from rustc"] From e018f49d4597cc5575703e3ff1d53a8cba11757a Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Thu, 21 Sep 2023 08:02:58 +0200 Subject: [PATCH 09/26] remove atty dependency by updating colored --- src/tools/miri/Cargo.lock | 32 ++++++-------------------------- 1 file changed, 6 insertions(+), 26 deletions(-) diff --git a/src/tools/miri/Cargo.lock b/src/tools/miri/Cargo.lock index 8cd2df8b5c585..f253d71e50d34 100644 --- a/src/tools/miri/Cargo.lock +++ b/src/tools/miri/Cargo.lock @@ -32,17 +32,6 @@ version = "1.0.71" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c7d0618f0e0b7e8ff11427422b64564d5fb0be1940354bfe2e0529b18a9d9b8" -[[package]] -name = "atty" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" -dependencies = [ - "hermit-abi 0.1.19", - "libc", - "winapi", -] - [[package]] name = "autocfg" version = "1.1.0" @@ -161,13 +150,13 @@ dependencies = [ [[package]] name = "colored" -version = "2.0.0" +version = "2.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3616f750b84d8f0de8a58bda93e08e2a81ad3f523089b05f1dffecab48c6cbd" +checksum = "2674ec482fbc38012cf31e6c42ba0177b431a0cb6f15fe40efa5aab1bda516f6" dependencies = [ - "atty", + "is-terminal", "lazy_static", - "winapi", + "windows-sys 0.48.0", ] [[package]] @@ -275,15 +264,6 @@ version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ad0a93d233ebf96623465aad4046a8d3aa4da22d4f4beba5388838c8a434bbb4" -[[package]] -name = "hermit-abi" -version = "0.1.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" -dependencies = [ - "libc", -] - [[package]] name = "hermit-abi" version = "0.3.1" @@ -317,7 +297,7 @@ version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c66c74d2ae7e79a5a8f7ac924adbe38ee42a859c6539ad869eb51f0b52dc220" dependencies = [ - "hermit-abi 0.3.1", + "hermit-abi", "libc", "windows-sys 0.48.0", ] @@ -328,7 +308,7 @@ version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b" dependencies = [ - "hermit-abi 0.3.1", + "hermit-abi", "rustix 0.38.14", "windows-sys 0.48.0", ] From ed8fbcb05910f4439ec572bd163cc99a3603e378 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Thu, 21 Sep 2023 08:03:45 +0200 Subject: [PATCH 10/26] fix clippy lints --- src/tools/miri/src/shims/x86/sse2.rs | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/tools/miri/src/shims/x86/sse2.rs b/src/tools/miri/src/shims/x86/sse2.rs index b68690a835c2f..d3bfb53afd268 100644 --- a/src/tools/miri/src/shims/x86/sse2.rs +++ b/src/tools/miri/src/shims/x86/sse2.rs @@ -426,8 +426,8 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { let right_res = i8::try_from(right).unwrap_or(if right < 0 { i8::MIN } else { i8::MAX }); - this.write_scalar(Scalar::from_i8(left_res.try_into().unwrap()), &left_dest)?; - this.write_scalar(Scalar::from_i8(right_res.try_into().unwrap()), &right_dest)?; + this.write_scalar(Scalar::from_i8(left_res), &left_dest)?; + this.write_scalar(Scalar::from_i8(right_res), &right_dest)?; } } // Used to implement the _mm_packus_epi16 function. @@ -487,11 +487,8 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { let right_res = i16::try_from(right).unwrap_or(if right < 0 { i16::MIN } else { i16::MAX }); - this.write_scalar(Scalar::from_i16(left_res.try_into().unwrap()), &left_dest)?; - this.write_scalar( - Scalar::from_i16(right_res.try_into().unwrap()), - &right_dest, - )?; + this.write_scalar(Scalar::from_i16(left_res), &left_dest)?; + this.write_scalar(Scalar::from_i16(right_res), &right_dest)?; } } // Used to implement _mm_min_sd and _mm_max_sd functions. From 64708fbf9ef018e682ee9bb5716974f057e97129 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Thu, 21 Sep 2023 07:57:38 +0200 Subject: [PATCH 11/26] deprecate -Zmiri-disable-abi-check --- src/tools/miri/README.md | 2 +- src/tools/miri/src/bin/miri.rs | 4 ++++ .../miri/tests/fail/concurrency/unwind_top_of_stack.stderr | 2 ++ .../fail/function_calls/exported_symbol_bad_unwind1.stderr | 2 ++ src/tools/miri/tests/fail/panic/bad_miri_start_panic.stderr | 2 ++ .../miri/tests/pass/function_calls/disable_abi_check.stderr | 2 ++ 6 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 src/tools/miri/tests/pass/function_calls/disable_abi_check.stderr diff --git a/src/tools/miri/README.md b/src/tools/miri/README.md index cedc558ebb87f..f6a023181d858 100644 --- a/src/tools/miri/README.md +++ b/src/tools/miri/README.md @@ -362,7 +362,7 @@ Some of these are **unsound**, which means they can lead to Miri failing to detect cases of undefined behavior in a program. * `-Zmiri-disable-abi-check` disables checking [function ABI]. Using this flag - is **unsound**. + is **unsound**. This flag is **deprecated**. * `-Zmiri-disable-alignment-check` disables checking pointer alignment, so you can focus on other failures, but it means Miri can miss bugs in your program. Using this flag is **unsound**. diff --git a/src/tools/miri/src/bin/miri.rs b/src/tools/miri/src/bin/miri.rs index 1e9219d4bb23a..8f90e4566ae41 100644 --- a/src/tools/miri/src/bin/miri.rs +++ b/src/tools/miri/src/bin/miri.rs @@ -358,6 +358,10 @@ fn main() { since it is now enabled by default" ); } else if arg == "-Zmiri-disable-abi-check" { + eprintln!( + "WARNING: the flag `-Zmiri-disable-abi-check` is deprecated and planned to be removed.\n\ + If you have a use-case for it, please file an issue." + ); miri_config.check_abi = false; } else if arg == "-Zmiri-disable-isolation" { if matches!(isolation_enabled, Some(true)) { diff --git a/src/tools/miri/tests/fail/concurrency/unwind_top_of_stack.stderr b/src/tools/miri/tests/fail/concurrency/unwind_top_of_stack.stderr index ceb5955922a3f..fccd3fbbc9da7 100644 --- a/src/tools/miri/tests/fail/concurrency/unwind_top_of_stack.stderr +++ b/src/tools/miri/tests/fail/concurrency/unwind_top_of_stack.stderr @@ -1,3 +1,5 @@ +WARNING: the flag `-Zmiri-disable-abi-check` is deprecated and planned to be removed. +If you have a use-case for it, please file an issue. thread '' panicked at $DIR/unwind_top_of_stack.rs:LL:CC: explicit panic note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace diff --git a/src/tools/miri/tests/fail/function_calls/exported_symbol_bad_unwind1.stderr b/src/tools/miri/tests/fail/function_calls/exported_symbol_bad_unwind1.stderr index f181f90abd3b8..507b459b718fd 100644 --- a/src/tools/miri/tests/fail/function_calls/exported_symbol_bad_unwind1.stderr +++ b/src/tools/miri/tests/fail/function_calls/exported_symbol_bad_unwind1.stderr @@ -1,3 +1,5 @@ +WARNING: the flag `-Zmiri-disable-abi-check` is deprecated and planned to be removed. +If you have a use-case for it, please file an issue. thread 'main' panicked at $DIR/exported_symbol_bad_unwind1.rs:LL:CC: explicit panic note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace diff --git a/src/tools/miri/tests/fail/panic/bad_miri_start_panic.stderr b/src/tools/miri/tests/fail/panic/bad_miri_start_panic.stderr index 3bd2be03ea1ff..4e5a2dfba3429 100644 --- a/src/tools/miri/tests/fail/panic/bad_miri_start_panic.stderr +++ b/src/tools/miri/tests/fail/panic/bad_miri_start_panic.stderr @@ -1,3 +1,5 @@ +WARNING: the flag `-Zmiri-disable-abi-check` is deprecated and planned to be removed. +If you have a use-case for it, please file an issue. error: Undefined Behavior: unwinding past a stack frame that does not allow unwinding --> $DIR/bad_miri_start_panic.rs:LL:CC | diff --git a/src/tools/miri/tests/pass/function_calls/disable_abi_check.stderr b/src/tools/miri/tests/pass/function_calls/disable_abi_check.stderr new file mode 100644 index 0000000000000..e5b84f6d7090e --- /dev/null +++ b/src/tools/miri/tests/pass/function_calls/disable_abi_check.stderr @@ -0,0 +1,2 @@ +WARNING: the flag `-Zmiri-disable-abi-check` is deprecated and planned to be removed. +If you have a use-case for it, please file an issue. From b075a9d2ab491b921d3ecdd4d25f65d923bb6cd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eduardo=20S=C3=A1nchez=20Mu=C3=B1oz?= Date: Thu, 21 Sep 2023 18:25:41 +0200 Subject: [PATCH 12/26] Implement `llvm.ctpop.v*` intrinsics Tested through x86 avx512vpopcntdq and avx512bitalg functions. --- src/tools/miri/src/shims/foreign_items.rs | 23 +++ .../tests/pass/intrinsics-x86-avx512bitalg.rs | 133 ++++++++++++++++++ .../pass/intrinsics-x86-avx512vpopcntdq.rs | 124 ++++++++++++++++ 3 files changed, 280 insertions(+) create mode 100644 src/tools/miri/tests/pass/intrinsics-x86-avx512bitalg.rs create mode 100644 src/tools/miri/tests/pass/intrinsics-x86-avx512vpopcntdq.rs diff --git a/src/tools/miri/src/shims/foreign_items.rs b/src/tools/miri/src/shims/foreign_items.rs index 0e8eaf450e43a..f596df1f7a1df 100644 --- a/src/tools/miri/src/shims/foreign_items.rs +++ b/src/tools/miri/src/shims/foreign_items.rs @@ -1032,6 +1032,29 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { } } + // Used to implement the x86 `_mm{,256,512}_popcnt_epi{8,16,32,64}` and wasm + // `{i,u}8x16_popcnt` functions. + name if name.starts_with("llvm.ctpop.v") => { + let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + + let (op, op_len) = this.operand_to_simd(op)?; + let (dest, dest_len) = this.place_to_simd(dest)?; + + assert_eq!(dest_len, op_len); + + for i in 0..dest_len { + let op = this.read_immediate(&this.project_index(&op, i)?)?; + // Use `to_uint` to get a zero-extended `u128`. Those + // extra zeros will not affect `count_ones`. + let res = op.to_scalar().to_uint(op.layout.size)?.count_ones(); + + this.write_scalar( + Scalar::from_uint(res, op.layout.size), + &this.project_index(&dest, i)?, + )?; + } + } + name if name.starts_with("llvm.x86.sse.") => { return shims::x86::sse::EvalContextExt::emulate_x86_sse_intrinsic( this, link_name, abi, args, dest, diff --git a/src/tools/miri/tests/pass/intrinsics-x86-avx512bitalg.rs b/src/tools/miri/tests/pass/intrinsics-x86-avx512bitalg.rs new file mode 100644 index 0000000000000..19f9b94ff08a5 --- /dev/null +++ b/src/tools/miri/tests/pass/intrinsics-x86-avx512bitalg.rs @@ -0,0 +1,133 @@ +// Ignore everything except x86 and x86_64 +// Any additional target are added to CI should be ignored here +//@ignore-target-aarch64 +//@ignore-target-arm +//@ignore-target-avr +//@ignore-target-s390x +//@ignore-target-thumbv7em +//@ignore-target-wasm32 +//@compile-flags: -C target-feature=+avx512bitalg,+avx512f,+avx512vl + +#![feature(avx512_target_feature)] +#![feature(stdsimd)] + +#[cfg(target_arch = "x86")] +use std::arch::x86::*; +#[cfg(target_arch = "x86_64")] +use std::arch::x86_64::*; +use std::mem::transmute; + +fn main() { + assert!(is_x86_feature_detected!("avx512bitalg")); + assert!(is_x86_feature_detected!("avx512f")); + assert!(is_x86_feature_detected!("avx512vl")); + + unsafe { + test_avx512bitalg(); + } +} + +// Some of the constants in the tests below are just bit patterns. They should not +// be interpreted as integers; signedness does not make sense for them, but +// __mXXXi happens to be defined in terms of signed integers. +#[allow(overflowing_literals)] +#[target_feature(enable = "avx512bitalg,avx512f,avx512vl")] +unsafe fn test_avx512bitalg() { + // Mostly copied from library/stdarch/crates/core_arch/src/x86/avx512bitalg.rs + + #[target_feature(enable = "avx512bitalg,avx512f")] + unsafe fn test_mm512_popcnt_epi16() { + let test_data = _mm512_set_epi16( + 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF, + 0x3F_FF, 0x7F_FF, 0xFF_FF, -1, -100, 255, 256, 2, 4, 8, 16, 32, 64, 128, 256, 512, + 1024, 2048, + ); + let actual_result = _mm512_popcnt_epi16(test_data); + let reference_result = _mm512_set_epi16( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 12, 8, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, + ); + assert_eq_m512i(actual_result, reference_result); + } + test_mm512_popcnt_epi16(); + + #[target_feature(enable = "avx512bitalg,avx512f,avx512vl")] + unsafe fn test_mm256_popcnt_epi16() { + let test_data = _mm256_set_epi16( + 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF, + 0x3F_FF, 0x7F_FF, + ); + let actual_result = _mm256_popcnt_epi16(test_data); + let reference_result = + _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m256i(actual_result, reference_result); + } + test_mm256_popcnt_epi16(); + + #[target_feature(enable = "avx512bitalg,avx512f,avx512vl")] + unsafe fn test_mm_popcnt_epi16() { + let test_data = _mm_set_epi16(0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F); + let actual_result = _mm_popcnt_epi16(test_data); + let reference_result = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + assert_eq_m128i(actual_result, reference_result); + } + test_mm_popcnt_epi16(); + + #[target_feature(enable = "avx512bitalg,avx512f")] + unsafe fn test_mm512_popcnt_epi8() { + let test_data = _mm512_set_epi8( + 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100, + 217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172, 183, 154, 84, 56, 227, 189, + 140, 35, 117, 219, 169, 226, 170, 13, 22, 159, 251, 73, 121, 143, 145, 85, 91, 137, 90, + 225, 21, 249, 211, 155, 228, 70, + ); + let actual_result = _mm512_popcnt_epi8(test_data); + let reference_result = _mm512_set_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5, + 2, 4, 4, 6, 4, 3, 3, 5, 6, 3, 3, 5, 6, 4, 4, 4, 3, 3, 6, 7, 3, 5, 5, 3, 4, 5, 3, 4, 4, + 3, 6, 5, 5, 4, 3, + ); + assert_eq_m512i(actual_result, reference_result); + } + test_mm512_popcnt_epi8(); + + #[target_feature(enable = "avx512bitalg,avx512f,avx512vl")] + unsafe fn test_mm256_popcnt_epi8() { + let test_data = _mm256_set_epi8( + 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100, + 217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172, + ); + let actual_result = _mm256_popcnt_epi8(test_data); + let reference_result = _mm256_set_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5, + 2, 4, 4, + ); + assert_eq_m256i(actual_result, reference_result); + } + test_mm256_popcnt_epi8(); + + #[target_feature(enable = "avx512bitalg,avx512f,avx512vl")] + unsafe fn test_mm_popcnt_epi8() { + let test_data = + _mm_set_epi8(0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64); + let actual_result = _mm_popcnt_epi8(test_data); + let reference_result = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1); + assert_eq_m128i(actual_result, reference_result); + } + test_mm_popcnt_epi8(); +} + +#[track_caller] +unsafe fn assert_eq_m512i(a: __m512i, b: __m512i) { + assert_eq!(transmute::<_, [i32; 16]>(a), transmute::<_, [i32; 16]>(b)) +} + +#[track_caller] +unsafe fn assert_eq_m256i(a: __m256i, b: __m256i) { + assert_eq!(transmute::<_, [u64; 4]>(a), transmute::<_, [u64; 4]>(b)) +} + +#[track_caller] +unsafe fn assert_eq_m128i(a: __m128i, b: __m128i) { + assert_eq!(transmute::<_, [u64; 2]>(a), transmute::<_, [u64; 2]>(b)) +} diff --git a/src/tools/miri/tests/pass/intrinsics-x86-avx512vpopcntdq.rs b/src/tools/miri/tests/pass/intrinsics-x86-avx512vpopcntdq.rs new file mode 100644 index 0000000000000..5c8821305e61d --- /dev/null +++ b/src/tools/miri/tests/pass/intrinsics-x86-avx512vpopcntdq.rs @@ -0,0 +1,124 @@ +// Ignore everything except x86 and x86_64 +// Any additional target are added to CI should be ignored here +//@ignore-target-aarch64 +//@ignore-target-arm +//@ignore-target-avr +//@ignore-target-s390x +//@ignore-target-thumbv7em +//@ignore-target-wasm32 +//@compile-flags: -C target-feature=+avx512vpopcntdq,+avx512f,+avx512vl + +#![feature(avx512_target_feature)] +#![feature(stdsimd)] + +#[cfg(target_arch = "x86")] +use std::arch::x86::*; +#[cfg(target_arch = "x86_64")] +use std::arch::x86_64::*; +use std::mem::transmute; + +fn main() { + assert!(is_x86_feature_detected!("avx512vpopcntdq")); + assert!(is_x86_feature_detected!("avx512f")); + assert!(is_x86_feature_detected!("avx512vl")); + + unsafe { + test_avx512vpopcntdq(); + } +} + +#[target_feature(enable = "avx512vpopcntdq,avx512f,avx512vl")] +unsafe fn test_avx512vpopcntdq() { + // Mostly copied from library/stdarch/crates/core_arch/src/x86/avx512vpopcntdq.rs + + #[target_feature(enable = "avx512vpopcntdq,avx512f")] + unsafe fn test_mm512_popcnt_epi32() { + let test_data = _mm512_set_epi32( + 0, + 1, + -1, + 2, + 7, + 0xFF_FE, + 0x7F_FF_FF_FF, + -100, + 0x40_00_00_00, + 103, + 371, + 552, + 432_948, + 818_826_998, + 255, + 256, + ); + let actual_result = _mm512_popcnt_epi32(test_data); + let reference_result = + _mm512_set_epi32(0, 1, 32, 1, 3, 15, 31, 28, 1, 5, 6, 3, 10, 17, 8, 1); + assert_eq_m512i(actual_result, reference_result); + } + test_mm512_popcnt_epi32(); + + #[target_feature(enable = "avx512vpopcntdq,avx512f,avx512vl")] + unsafe fn test_mm256_popcnt_epi32() { + let test_data = _mm256_set_epi32(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF, -100); + let actual_result = _mm256_popcnt_epi32(test_data); + let reference_result = _mm256_set_epi32(0, 1, 32, 1, 3, 15, 31, 28); + assert_eq_m256i(actual_result, reference_result); + } + test_mm256_popcnt_epi32(); + + #[target_feature(enable = "avx512vpopcntdq,avx512f,avx512vl")] + unsafe fn test_mm_popcnt_epi32() { + let test_data = _mm_set_epi32(0, 1, -1, -100); + let actual_result = _mm_popcnt_epi32(test_data); + let reference_result = _mm_set_epi32(0, 1, 32, 28); + assert_eq_m128i(actual_result, reference_result); + } + test_mm_popcnt_epi32(); + + #[target_feature(enable = "avx512vpopcntdq,avx512f")] + unsafe fn test_mm512_popcnt_epi64() { + let test_data = _mm512_set_epi64(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF_FF_FF_FF_FF, -100); + let actual_result = _mm512_popcnt_epi64(test_data); + let reference_result = _mm512_set_epi64(0, 1, 64, 1, 3, 15, 63, 60); + assert_eq_m512i(actual_result, reference_result); + } + test_mm512_popcnt_epi64(); + + #[target_feature(enable = "avx512vpopcntdq,avx512vl")] + unsafe fn test_mm256_popcnt_epi64() { + let test_data = _mm256_set_epi64x(0, 1, -1, -100); + let actual_result = _mm256_popcnt_epi64(test_data); + let reference_result = _mm256_set_epi64x(0, 1, 64, 60); + assert_eq_m256i(actual_result, reference_result); + } + test_mm256_popcnt_epi64(); + + #[target_feature(enable = "avx512vpopcntdq,avx512vl")] + unsafe fn test_mm_popcnt_epi64() { + let test_data = _mm_set_epi64x(0, 1); + let actual_result = _mm_popcnt_epi64(test_data); + let reference_result = _mm_set_epi64x(0, 1); + assert_eq_m128i(actual_result, reference_result); + let test_data = _mm_set_epi64x(-1, -100); + let actual_result = _mm_popcnt_epi64(test_data); + let reference_result = _mm_set_epi64x(64, 60); + assert_eq_m128i(actual_result, reference_result); + } + test_mm_popcnt_epi64(); +} + +#[track_caller] +unsafe fn assert_eq_m512i(a: __m512i, b: __m512i) { + assert_eq!(transmute::<_, [i32; 16]>(a), transmute::<_, [i32; 16]>(b)) +} + +#[track_caller] +unsafe fn assert_eq_m256i(a: __m256i, b: __m256i) { + assert_eq!(transmute::<_, [u64; 4]>(a), transmute::<_, [u64; 4]>(b)) +} + +#[track_caller] +unsafe fn assert_eq_m128i(a: __m128i, b: __m128i) { + assert_eq!(transmute::<_, [u64; 2]>(a), transmute::<_, [u64; 2]>(b)) +} From 838e6a6b78ec61c2802d8f1364d2f07c4e2453db Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Fri, 22 Sep 2023 07:28:56 +0200 Subject: [PATCH 13/26] run the cron job a bit earlier --- src/tools/miri/.github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/miri/.github/workflows/ci.yml b/src/tools/miri/.github/workflows/ci.yml index 5e2abdde6aced..f026b7fd10452 100644 --- a/src/tools/miri/.github/workflows/ci.yml +++ b/src/tools/miri/.github/workflows/ci.yml @@ -10,7 +10,7 @@ on: branches: - 'master' schedule: - - cron: '11 5 * * *' # At 5:11 UTC every day. + - cron: '44 4 * * *' # At 4:44 UTC every day. defaults: run: From 5211d045c9aa105cd241106183366da5e6d6416c Mon Sep 17 00:00:00 2001 From: The Miri Conjob Bot Date: Fri, 22 Sep 2023 05:29:34 +0000 Subject: [PATCH 14/26] Preparing for merge from rustc --- src/tools/miri/rust-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/miri/rust-version b/src/tools/miri/rust-version index f6db58695fbc6..b01db9e388a5f 100644 --- a/src/tools/miri/rust-version +++ b/src/tools/miri/rust-version @@ -1 +1 @@ -4fda889bf8735755573b27e6116ce025f3ded5f9 +b7573187180a6ad13ba557f85b7e19a51f8fdaf1 From e6a4a1b12e0d8df98d80c1f4f4c3b09b9b994257 Mon Sep 17 00:00:00 2001 From: The Miri Conjob Bot Date: Fri, 22 Sep 2023 05:40:22 +0000 Subject: [PATCH 15/26] fmt --- src/tools/miri/src/helpers.rs | 4 ++-- src/tools/miri/src/operator.rs | 14 +++++++++----- src/tools/miri/src/shims/x86/sse2.rs | 22 ++++------------------ 3 files changed, 15 insertions(+), 25 deletions(-) diff --git a/src/tools/miri/src/helpers.rs b/src/tools/miri/src/helpers.rs index 537c767065d94..badd2629388cc 100644 --- a/src/tools/miri/src/helpers.rs +++ b/src/tools/miri/src/helpers.rs @@ -1033,7 +1033,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { ) { // Floating point value is NaN (flagged with INVALID_OP) or outside the range // of values of the integer type (flagged with OVERFLOW or UNDERFLOW). - return None + return None; } else { // Floating point value can be represented by the integer type after rounding. // The INEXACT flag is ignored on purpose to allow rounding. @@ -1051,7 +1051,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { ) { // Floating point value is NaN (flagged with INVALID_OP) or outside the range // of values of the integer type (flagged with OVERFLOW or UNDERFLOW). - return None + return None; } else { // Floating point value can be represented by the integer type after rounding. // The INEXACT flag is ignored on purpose to allow rounding. diff --git a/src/tools/miri/src/operator.rs b/src/tools/miri/src/operator.rs index 27fe7374ea5df..1faf8f9fc1228 100644 --- a/src/tools/miri/src/operator.rs +++ b/src/tools/miri/src/operator.rs @@ -65,12 +65,16 @@ impl<'mir, 'tcx> EvalContextExt<'tcx> for super::MiriInterpCx<'mir, 'tcx> { right.to_scalar().to_target_usize(self)?, self.machine.layouts.usize, ); - let (result, overflowing) = - self.overflowing_binary_op(bin_op, &left, &right)?; + let (result, overflowing) = self.overflowing_binary_op(bin_op, &left, &right)?; // Construct a new pointer with the provenance of `ptr` (the LHS). - let result_ptr = - Pointer::new(ptr.provenance, Size::from_bytes(result.to_scalar().to_target_usize(self)?)); - (ImmTy::from_scalar(Scalar::from_maybe_pointer(result_ptr, self), left.layout), overflowing) + let result_ptr = Pointer::new( + ptr.provenance, + Size::from_bytes(result.to_scalar().to_target_usize(self)?), + ); + ( + ImmTy::from_scalar(Scalar::from_maybe_pointer(result_ptr, self), left.layout), + overflowing, + ) } _ => span_bug!(self.cur_span(), "Invalid operator on pointers: {:?}", bin_op), diff --git a/src/tools/miri/src/shims/x86/sse2.rs b/src/tools/miri/src/shims/x86/sse2.rs index 6144e07130f96..855ebbe707269 100644 --- a/src/tools/miri/src/shims/x86/sse2.rs +++ b/src/tools/miri/src/shims/x86/sse2.rs @@ -61,11 +61,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { let right = this.int_to_int_or_float(&right, twice_wide)?; // Calculate left + right + 1 - let added = this.wrapping_binary_op( - mir::BinOp::Add, - &left, - &right, - )?; + let added = this.wrapping_binary_op(mir::BinOp::Add, &left, &right)?; let added = this.wrapping_binary_op( mir::BinOp::Add, &added, @@ -80,10 +76,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { )?; // Narrow back to the original type - let res = this.int_to_int_or_float( - ÷d, - dest.layout, - )?; + let res = this.int_to_int_or_float(÷d, dest.layout)?; this.write_immediate(*res, &dest)?; } } @@ -110,11 +103,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { let right = this.int_to_int_or_float(&right, twice_wide)?; // Multiply - let multiplied = this.wrapping_binary_op( - mir::BinOp::Mul, - &left, - &right, - )?; + let multiplied = this.wrapping_binary_op(mir::BinOp::Mul, &left, &right)?; // Keep the high half let high = this.wrapping_binary_op( mir::BinOp::Shr, @@ -123,10 +112,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { )?; // Narrow back to the original type - let res = this.int_to_int_or_float( - &high, - dest.layout, - )?; + let res = this.int_to_int_or_float(&high, dest.layout)?; this.write_immediate(*res, &dest)?; } } From 896065f902602e99c6996cee9cee99ab7c00dcaa Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Fri, 22 Sep 2023 10:35:58 +0200 Subject: [PATCH 16/26] clippy --- src/tools/miri/src/shims/intrinsics/simd.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/tools/miri/src/shims/intrinsics/simd.rs b/src/tools/miri/src/shims/intrinsics/simd.rs index 49ba7e5556e6f..de0c58de44fd1 100644 --- a/src/tools/miri/src/shims/intrinsics/simd.rs +++ b/src/tools/miri/src/shims/intrinsics/simd.rs @@ -458,7 +458,6 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { dest.layout.ty ) })? - .into() } (ty::Float(FloatTy::F64), ty::Int(_) | ty::Uint(_)) if unsafe_cast => { let f = op.to_scalar().to_f64()?; @@ -469,7 +468,6 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { dest.layout.ty ) })? - .into() } // Ptr-to-ptr cast (ty::RawPtr(..), ty::RawPtr(..)) if ptr_cast => From 68706342323af86d58595d43a725637b42783675 Mon Sep 17 00:00:00 2001 From: The Miri Conjob Bot Date: Sat, 23 Sep 2023 05:10:33 +0000 Subject: [PATCH 17/26] Preparing for merge from rustc --- src/tools/miri/rust-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/miri/rust-version b/src/tools/miri/rust-version index b01db9e388a5f..b084e1358015b 100644 --- a/src/tools/miri/rust-version +++ b/src/tools/miri/rust-version @@ -1 +1 @@ -b7573187180a6ad13ba557f85b7e19a51f8fdaf1 +2d08657901d8fec8f51f203dc8a38e25c162e834 From fdbf9324995517752d0387cf5fa5ebce519fb8e7 Mon Sep 17 00:00:00 2001 From: The Miri Conjob Bot Date: Sun, 24 Sep 2023 05:03:58 +0000 Subject: [PATCH 18/26] Preparing for merge from rustc --- src/tools/miri/rust-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/miri/rust-version b/src/tools/miri/rust-version index b084e1358015b..458bba9a61668 100644 --- a/src/tools/miri/rust-version +++ b/src/tools/miri/rust-version @@ -1 +1 @@ -2d08657901d8fec8f51f203dc8a38e25c162e834 +42ca6e4e5760a548a6fa858482de6d237f6fb3b8 From cf766029b6502cedac0be9104fd7589b7abd8de9 Mon Sep 17 00:00:00 2001 From: The Miri Conjob Bot Date: Sun, 24 Sep 2023 05:14:40 +0000 Subject: [PATCH 19/26] fmt --- src/tools/miri/src/bin/miri.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tools/miri/src/bin/miri.rs b/src/tools/miri/src/bin/miri.rs index d7fb4546fd66b..fc6151772a0fd 100644 --- a/src/tools/miri/src/bin/miri.rs +++ b/src/tools/miri/src/bin/miri.rs @@ -28,9 +28,9 @@ use rustc_middle::{ middle::exported_symbols::{ ExportedSymbol, SymbolExportInfo, SymbolExportKind, SymbolExportLevel, }, - query::{LocalCrate}, - util::Providers, + query::LocalCrate, ty::TyCtxt, + util::Providers, }; use rustc_session::config::{CrateType, ErrorOutputType, OptLevel}; use rustc_session::search_paths::PathKind; From 413ff386c235b45e6ce7fcea022fcdc5ac86feb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eduardo=20S=C3=A1nchez=20Mu=C3=B1oz?= Date: Sun, 24 Sep 2023 11:57:53 +0200 Subject: [PATCH 20/26] Put AVX512 tests together --- ...x512bitalg.rs => intrinsics-x86-avx512.rs} | 87 +++++++++++- .../pass/intrinsics-x86-avx512vpopcntdq.rs | 124 ------------------ 2 files changed, 85 insertions(+), 126 deletions(-) rename src/tools/miri/tests/pass/{intrinsics-x86-avx512bitalg.rs => intrinsics-x86-avx512.rs} (61%) delete mode 100644 src/tools/miri/tests/pass/intrinsics-x86-avx512vpopcntdq.rs diff --git a/src/tools/miri/tests/pass/intrinsics-x86-avx512bitalg.rs b/src/tools/miri/tests/pass/intrinsics-x86-avx512.rs similarity index 61% rename from src/tools/miri/tests/pass/intrinsics-x86-avx512bitalg.rs rename to src/tools/miri/tests/pass/intrinsics-x86-avx512.rs index 19f9b94ff08a5..a5615aaa43225 100644 --- a/src/tools/miri/tests/pass/intrinsics-x86-avx512bitalg.rs +++ b/src/tools/miri/tests/pass/intrinsics-x86-avx512.rs @@ -6,7 +6,7 @@ //@ignore-target-s390x //@ignore-target-thumbv7em //@ignore-target-wasm32 -//@compile-flags: -C target-feature=+avx512bitalg,+avx512f,+avx512vl +//@compile-flags: -C target-feature=+avx512f,+avx512vl,+avx512bitalg,+avx512vpopcntdq #![feature(avx512_target_feature)] #![feature(stdsimd)] @@ -18,12 +18,14 @@ use std::arch::x86_64::*; use std::mem::transmute; fn main() { - assert!(is_x86_feature_detected!("avx512bitalg")); assert!(is_x86_feature_detected!("avx512f")); assert!(is_x86_feature_detected!("avx512vl")); + assert!(is_x86_feature_detected!("avx512bitalg")); + assert!(is_x86_feature_detected!("avx512vpopcntdq")); unsafe { test_avx512bitalg(); + test_avx512vpopcntdq(); } } @@ -117,6 +119,87 @@ unsafe fn test_avx512bitalg() { test_mm_popcnt_epi8(); } +#[target_feature(enable = "avx512vpopcntdq,avx512f,avx512vl")] +unsafe fn test_avx512vpopcntdq() { + // Mostly copied from library/stdarch/crates/core_arch/src/x86/avx512vpopcntdq.rs + + #[target_feature(enable = "avx512vpopcntdq,avx512f")] + unsafe fn test_mm512_popcnt_epi32() { + let test_data = _mm512_set_epi32( + 0, + 1, + -1, + 2, + 7, + 0xFF_FE, + 0x7F_FF_FF_FF, + -100, + 0x40_00_00_00, + 103, + 371, + 552, + 432_948, + 818_826_998, + 255, + 256, + ); + let actual_result = _mm512_popcnt_epi32(test_data); + let reference_result = + _mm512_set_epi32(0, 1, 32, 1, 3, 15, 31, 28, 1, 5, 6, 3, 10, 17, 8, 1); + assert_eq_m512i(actual_result, reference_result); + } + test_mm512_popcnt_epi32(); + + #[target_feature(enable = "avx512vpopcntdq,avx512f,avx512vl")] + unsafe fn test_mm256_popcnt_epi32() { + let test_data = _mm256_set_epi32(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF, -100); + let actual_result = _mm256_popcnt_epi32(test_data); + let reference_result = _mm256_set_epi32(0, 1, 32, 1, 3, 15, 31, 28); + assert_eq_m256i(actual_result, reference_result); + } + test_mm256_popcnt_epi32(); + + #[target_feature(enable = "avx512vpopcntdq,avx512f,avx512vl")] + unsafe fn test_mm_popcnt_epi32() { + let test_data = _mm_set_epi32(0, 1, -1, -100); + let actual_result = _mm_popcnt_epi32(test_data); + let reference_result = _mm_set_epi32(0, 1, 32, 28); + assert_eq_m128i(actual_result, reference_result); + } + test_mm_popcnt_epi32(); + + #[target_feature(enable = "avx512vpopcntdq,avx512f")] + unsafe fn test_mm512_popcnt_epi64() { + let test_data = _mm512_set_epi64(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF_FF_FF_FF_FF, -100); + let actual_result = _mm512_popcnt_epi64(test_data); + let reference_result = _mm512_set_epi64(0, 1, 64, 1, 3, 15, 63, 60); + assert_eq_m512i(actual_result, reference_result); + } + test_mm512_popcnt_epi64(); + + #[target_feature(enable = "avx512vpopcntdq,avx512vl")] + unsafe fn test_mm256_popcnt_epi64() { + let test_data = _mm256_set_epi64x(0, 1, -1, -100); + let actual_result = _mm256_popcnt_epi64(test_data); + let reference_result = _mm256_set_epi64x(0, 1, 64, 60); + assert_eq_m256i(actual_result, reference_result); + } + test_mm256_popcnt_epi64(); + + #[target_feature(enable = "avx512vpopcntdq,avx512vl")] + unsafe fn test_mm_popcnt_epi64() { + let test_data = _mm_set_epi64x(0, 1); + let actual_result = _mm_popcnt_epi64(test_data); + let reference_result = _mm_set_epi64x(0, 1); + assert_eq_m128i(actual_result, reference_result); + let test_data = _mm_set_epi64x(-1, -100); + let actual_result = _mm_popcnt_epi64(test_data); + let reference_result = _mm_set_epi64x(64, 60); + assert_eq_m128i(actual_result, reference_result); + } + test_mm_popcnt_epi64(); +} + #[track_caller] unsafe fn assert_eq_m512i(a: __m512i, b: __m512i) { assert_eq!(transmute::<_, [i32; 16]>(a), transmute::<_, [i32; 16]>(b)) diff --git a/src/tools/miri/tests/pass/intrinsics-x86-avx512vpopcntdq.rs b/src/tools/miri/tests/pass/intrinsics-x86-avx512vpopcntdq.rs deleted file mode 100644 index 5c8821305e61d..0000000000000 --- a/src/tools/miri/tests/pass/intrinsics-x86-avx512vpopcntdq.rs +++ /dev/null @@ -1,124 +0,0 @@ -// Ignore everything except x86 and x86_64 -// Any additional target are added to CI should be ignored here -//@ignore-target-aarch64 -//@ignore-target-arm -//@ignore-target-avr -//@ignore-target-s390x -//@ignore-target-thumbv7em -//@ignore-target-wasm32 -//@compile-flags: -C target-feature=+avx512vpopcntdq,+avx512f,+avx512vl - -#![feature(avx512_target_feature)] -#![feature(stdsimd)] - -#[cfg(target_arch = "x86")] -use std::arch::x86::*; -#[cfg(target_arch = "x86_64")] -use std::arch::x86_64::*; -use std::mem::transmute; - -fn main() { - assert!(is_x86_feature_detected!("avx512vpopcntdq")); - assert!(is_x86_feature_detected!("avx512f")); - assert!(is_x86_feature_detected!("avx512vl")); - - unsafe { - test_avx512vpopcntdq(); - } -} - -#[target_feature(enable = "avx512vpopcntdq,avx512f,avx512vl")] -unsafe fn test_avx512vpopcntdq() { - // Mostly copied from library/stdarch/crates/core_arch/src/x86/avx512vpopcntdq.rs - - #[target_feature(enable = "avx512vpopcntdq,avx512f")] - unsafe fn test_mm512_popcnt_epi32() { - let test_data = _mm512_set_epi32( - 0, - 1, - -1, - 2, - 7, - 0xFF_FE, - 0x7F_FF_FF_FF, - -100, - 0x40_00_00_00, - 103, - 371, - 552, - 432_948, - 818_826_998, - 255, - 256, - ); - let actual_result = _mm512_popcnt_epi32(test_data); - let reference_result = - _mm512_set_epi32(0, 1, 32, 1, 3, 15, 31, 28, 1, 5, 6, 3, 10, 17, 8, 1); - assert_eq_m512i(actual_result, reference_result); - } - test_mm512_popcnt_epi32(); - - #[target_feature(enable = "avx512vpopcntdq,avx512f,avx512vl")] - unsafe fn test_mm256_popcnt_epi32() { - let test_data = _mm256_set_epi32(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF, -100); - let actual_result = _mm256_popcnt_epi32(test_data); - let reference_result = _mm256_set_epi32(0, 1, 32, 1, 3, 15, 31, 28); - assert_eq_m256i(actual_result, reference_result); - } - test_mm256_popcnt_epi32(); - - #[target_feature(enable = "avx512vpopcntdq,avx512f,avx512vl")] - unsafe fn test_mm_popcnt_epi32() { - let test_data = _mm_set_epi32(0, 1, -1, -100); - let actual_result = _mm_popcnt_epi32(test_data); - let reference_result = _mm_set_epi32(0, 1, 32, 28); - assert_eq_m128i(actual_result, reference_result); - } - test_mm_popcnt_epi32(); - - #[target_feature(enable = "avx512vpopcntdq,avx512f")] - unsafe fn test_mm512_popcnt_epi64() { - let test_data = _mm512_set_epi64(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF_FF_FF_FF_FF, -100); - let actual_result = _mm512_popcnt_epi64(test_data); - let reference_result = _mm512_set_epi64(0, 1, 64, 1, 3, 15, 63, 60); - assert_eq_m512i(actual_result, reference_result); - } - test_mm512_popcnt_epi64(); - - #[target_feature(enable = "avx512vpopcntdq,avx512vl")] - unsafe fn test_mm256_popcnt_epi64() { - let test_data = _mm256_set_epi64x(0, 1, -1, -100); - let actual_result = _mm256_popcnt_epi64(test_data); - let reference_result = _mm256_set_epi64x(0, 1, 64, 60); - assert_eq_m256i(actual_result, reference_result); - } - test_mm256_popcnt_epi64(); - - #[target_feature(enable = "avx512vpopcntdq,avx512vl")] - unsafe fn test_mm_popcnt_epi64() { - let test_data = _mm_set_epi64x(0, 1); - let actual_result = _mm_popcnt_epi64(test_data); - let reference_result = _mm_set_epi64x(0, 1); - assert_eq_m128i(actual_result, reference_result); - let test_data = _mm_set_epi64x(-1, -100); - let actual_result = _mm_popcnt_epi64(test_data); - let reference_result = _mm_set_epi64x(64, 60); - assert_eq_m128i(actual_result, reference_result); - } - test_mm_popcnt_epi64(); -} - -#[track_caller] -unsafe fn assert_eq_m512i(a: __m512i, b: __m512i) { - assert_eq!(transmute::<_, [i32; 16]>(a), transmute::<_, [i32; 16]>(b)) -} - -#[track_caller] -unsafe fn assert_eq_m256i(a: __m256i, b: __m256i) { - assert_eq!(transmute::<_, [u64; 4]>(a), transmute::<_, [u64; 4]>(b)) -} - -#[track_caller] -unsafe fn assert_eq_m128i(a: __m128i, b: __m128i) { - assert_eq!(transmute::<_, [u64; 2]>(a), transmute::<_, [u64; 2]>(b)) -} From d6b30b88d0cc7b2f98b5f343353f97ef9bf2f9e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eduardo=20S=C3=A1nchez=20Mu=C3=B1oz?= Date: Thu, 21 Sep 2023 19:41:48 +0200 Subject: [PATCH 21/26] Move `llvm.x86.*` implementations into `shims::x86` --- src/tools/miri/src/shims/foreign_items.rs | 39 +++------------ src/tools/miri/src/shims/x86/mod.rs | 58 ++++++++++++++++++++++- src/tools/miri/src/shims/x86/sse.rs | 4 +- src/tools/miri/src/shims/x86/sse2.rs | 10 +++- 4 files changed, 75 insertions(+), 36 deletions(-) diff --git a/src/tools/miri/src/shims/foreign_items.rs b/src/tools/miri/src/shims/foreign_items.rs index f596df1f7a1df..0c92ede40fd8c 100644 --- a/src/tools/miri/src/shims/foreign_items.rs +++ b/src/tools/miri/src/shims/foreign_items.rs @@ -22,7 +22,7 @@ use rustc_target::{ }; use super::backtrace::EvalContextExt as _; -use crate::helpers::{convert::Truncate, target_os_is_unix}; +use crate::helpers::target_os_is_unix; use crate::*; /// Returned by `emulate_foreign_item_by_name`. @@ -981,30 +981,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { throw_unsup_format!("unsupported `llvm.prefetch` type argument: {}", ty); } } - "llvm.x86.addcarry.64" if this.tcx.sess.target.arch == "x86_64" => { - // Computes u8+u64+u64, returning tuple (u8,u64) comprising the output carry and truncated sum. - let [c_in, a, b] = this.check_shim(abi, Abi::Unadjusted, link_name, args)?; - let c_in = this.read_scalar(c_in)?.to_u8()?; - let a = this.read_scalar(a)?.to_u64()?; - let b = this.read_scalar(b)?.to_u64()?; - - #[allow(clippy::arithmetic_side_effects)] - // adding two u64 and a u8 cannot wrap in a u128 - let wide_sum = u128::from(c_in) + u128::from(a) + u128::from(b); - #[allow(clippy::arithmetic_side_effects)] // it's a u128, we can shift by 64 - let (c_out, sum) = ((wide_sum >> 64).truncate::(), wide_sum.truncate::()); - - let c_out_field = this.project_field(dest, 0)?; - this.write_scalar(Scalar::from_u8(c_out), &c_out_field)?; - let sum_field = this.project_field(dest, 1)?; - this.write_scalar(Scalar::from_u64(sum), &sum_field)?; - } - "llvm.x86.sse2.pause" - if this.tcx.sess.target.arch == "x86" || this.tcx.sess.target.arch == "x86_64" => - { - let [] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; - this.yield_active_thread(); - } + // FIXME: Move these to an `arm` submodule. "llvm.aarch64.isb" if this.tcx.sess.target.arch == "aarch64" => { let [arg] = this.check_shim(abi, Abi::Unadjusted, link_name, args)?; let arg = this.read_scalar(arg)?.to_i32()?; @@ -1055,13 +1032,11 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { } } - name if name.starts_with("llvm.x86.sse.") => { - return shims::x86::sse::EvalContextExt::emulate_x86_sse_intrinsic( - this, link_name, abi, args, dest, - ); - } - name if name.starts_with("llvm.x86.sse2.") => { - return shims::x86::sse2::EvalContextExt::emulate_x86_sse2_intrinsic( + name if name.starts_with("llvm.x86.") + && (this.tcx.sess.target.arch == "x86" + || this.tcx.sess.target.arch == "x86_64") => + { + return shims::x86::EvalContextExt::emulate_x86_intrinsic( this, link_name, abi, args, dest, ); } diff --git a/src/tools/miri/src/shims/x86/mod.rs b/src/tools/miri/src/shims/x86/mod.rs index cbdc500be789c..16eac81fa0a86 100644 --- a/src/tools/miri/src/shims/x86/mod.rs +++ b/src/tools/miri/src/shims/x86/mod.rs @@ -1,11 +1,65 @@ use rustc_middle::mir; +use rustc_span::Symbol; use rustc_target::abi::Size; +use rustc_target::spec::abi::Abi; use crate::*; use helpers::bool_to_simd_element; +use helpers::convert::Truncate as _; +use shims::foreign_items::EmulateByNameResult; + +mod sse; +mod sse2; + +impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {} +pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>: + crate::MiriInterpCxExt<'mir, 'tcx> +{ + fn emulate_x86_intrinsic( + &mut self, + link_name: Symbol, + abi: Abi, + args: &[OpTy<'tcx, Provenance>], + dest: &PlaceTy<'tcx, Provenance>, + ) -> InterpResult<'tcx, EmulateByNameResult<'mir, 'tcx>> { + let this = self.eval_context_mut(); + // Prefix should have already been checked. + let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.").unwrap(); + match unprefixed_name { + "addcarry.64" if this.tcx.sess.target.arch == "x86_64" => { + // Computes u8+u64+u64, returning tuple (u8,u64) comprising the output carry and truncated sum. + let [c_in, a, b] = this.check_shim(abi, Abi::Unadjusted, link_name, args)?; + let c_in = this.read_scalar(c_in)?.to_u8()?; + let a = this.read_scalar(a)?.to_u64()?; + let b = this.read_scalar(b)?.to_u64()?; + + #[allow(clippy::arithmetic_side_effects)] + // adding two u64 and a u8 cannot wrap in a u128 + let wide_sum = u128::from(c_in) + u128::from(a) + u128::from(b); + #[allow(clippy::arithmetic_side_effects)] // it's a u128, we can shift by 64 + let (c_out, sum) = ((wide_sum >> 64).truncate::(), wide_sum.truncate::()); + + let c_out_field = this.project_field(dest, 0)?; + this.write_scalar(Scalar::from_u8(c_out), &c_out_field)?; + let sum_field = this.project_field(dest, 1)?; + this.write_scalar(Scalar::from_u64(sum), &sum_field)?; + } -pub(super) mod sse; -pub(super) mod sse2; + name if name.starts_with("sse.") => { + return sse::EvalContextExt::emulate_x86_sse_intrinsic( + this, link_name, abi, args, dest, + ); + } + name if name.starts_with("sse2.") => { + return sse2::EvalContextExt::emulate_x86_sse2_intrinsic( + this, link_name, abi, args, dest, + ); + } + _ => return Ok(EmulateByNameResult::NotSupported), + } + Ok(EmulateByNameResult::NeedsJumping) + } +} /// Floating point comparison operation /// diff --git a/src/tools/miri/src/shims/x86/sse.rs b/src/tools/miri/src/shims/x86/sse.rs index b6b994b45ac8e..de1e695b6d149 100644 --- a/src/tools/miri/src/shims/x86/sse.rs +++ b/src/tools/miri/src/shims/x86/sse.rs @@ -10,7 +10,9 @@ use crate::*; use shims::foreign_items::EmulateByNameResult; impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {} -pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { +pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>: + crate::MiriInterpCxExt<'mir, 'tcx> +{ fn emulate_x86_sse_intrinsic( &mut self, link_name: Symbol, diff --git a/src/tools/miri/src/shims/x86/sse2.rs b/src/tools/miri/src/shims/x86/sse2.rs index 855ebbe707269..2ca882167bf4b 100644 --- a/src/tools/miri/src/shims/x86/sse2.rs +++ b/src/tools/miri/src/shims/x86/sse2.rs @@ -13,7 +13,9 @@ use crate::*; use shims::foreign_items::EmulateByNameResult; impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {} -pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { +pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>: + crate::MiriInterpCxExt<'mir, 'tcx> +{ fn emulate_x86_sse2_intrinsic( &mut self, link_name: Symbol, @@ -753,6 +755,12 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { this.write_scalar(Scalar::from_u32(res.try_into().unwrap()), dest)?; } + // Used to implement the `_mm_pause` function. + // The intrinsic is used to hint the processor that the code is in a spin-loop. + "pause" => { + let [] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + this.yield_active_thread(); + } _ => return Ok(EmulateByNameResult::NotSupported), } Ok(EmulateByNameResult::NeedsJumping) From 356039985ea8eaf41ef54b89123bd68019181ef8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eduardo=20S=C3=A1nchez=20Mu=C3=B1oz?= Date: Thu, 21 Sep 2023 20:08:52 +0200 Subject: [PATCH 22/26] Implement `llvm.x86.addcarry.32` by refactoring `llvm.x86.addcarry.64` to make it generic --- src/tools/miri/src/shims/x86/mod.rs | 38 ++++++++++++--------- src/tools/miri/tests/pass/intrinsics-x86.rs | 23 +++++++++++++ 2 files changed, 45 insertions(+), 16 deletions(-) diff --git a/src/tools/miri/src/shims/x86/mod.rs b/src/tools/miri/src/shims/x86/mod.rs index 16eac81fa0a86..2db2f2e19ffd5 100644 --- a/src/tools/miri/src/shims/x86/mod.rs +++ b/src/tools/miri/src/shims/x86/mod.rs @@ -5,7 +5,6 @@ use rustc_target::spec::abi::Abi; use crate::*; use helpers::bool_to_simd_element; -use helpers::convert::Truncate as _; use shims::foreign_items::EmulateByNameResult; mod sse; @@ -26,23 +25,30 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>: // Prefix should have already been checked. let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.").unwrap(); match unprefixed_name { - "addcarry.64" if this.tcx.sess.target.arch == "x86_64" => { - // Computes u8+u64+u64, returning tuple (u8,u64) comprising the output carry and truncated sum. + // Used to implement the `_addcarry_u32` and `_addcarry_u64` functions. + // Computes u8+uX+uX (uX is u32 or u64), returning tuple (u8,uX) comprising + // the output carry and truncated sum. + "addcarry.32" | "addcarry.64" => { + if unprefixed_name == "addcarry.64" && this.tcx.sess.target.arch != "x86_64" { + return Ok(EmulateByNameResult::NotSupported); + } + let [c_in, a, b] = this.check_shim(abi, Abi::Unadjusted, link_name, args)?; let c_in = this.read_scalar(c_in)?.to_u8()?; - let a = this.read_scalar(a)?.to_u64()?; - let b = this.read_scalar(b)?.to_u64()?; - - #[allow(clippy::arithmetic_side_effects)] - // adding two u64 and a u8 cannot wrap in a u128 - let wide_sum = u128::from(c_in) + u128::from(a) + u128::from(b); - #[allow(clippy::arithmetic_side_effects)] // it's a u128, we can shift by 64 - let (c_out, sum) = ((wide_sum >> 64).truncate::(), wide_sum.truncate::()); - - let c_out_field = this.project_field(dest, 0)?; - this.write_scalar(Scalar::from_u8(c_out), &c_out_field)?; - let sum_field = this.project_field(dest, 1)?; - this.write_scalar(Scalar::from_u64(sum), &sum_field)?; + let a = this.read_immediate(a)?; + let b = this.read_immediate(b)?; + + let (sum, overflow1) = this.overflowing_binary_op(mir::BinOp::Add, &a, &b)?; + let (sum, overflow2) = this.overflowing_binary_op( + mir::BinOp::Add, + &sum, + &ImmTy::from_uint(c_in, a.layout), + )?; + #[allow(clippy::arithmetic_side_effects)] // adding two bools into a u8 + let c_out = u8::from(overflow1) + u8::from(overflow2); + + this.write_scalar(Scalar::from_u8(c_out), &this.project_field(dest, 0)?)?; + this.write_immediate(*sum, &this.project_field(dest, 1)?)?; } name if name.starts_with("sse.") => { diff --git a/src/tools/miri/tests/pass/intrinsics-x86.rs b/src/tools/miri/tests/pass/intrinsics-x86.rs index 88cd782e70a46..2e947200989fd 100644 --- a/src/tools/miri/tests/pass/intrinsics-x86.rs +++ b/src/tools/miri/tests/pass/intrinsics-x86.rs @@ -1,3 +1,24 @@ +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +mod x86 { + #[cfg(target_arch = "x86")] + use core::arch::x86 as arch; + #[cfg(target_arch = "x86_64")] + use core::arch::x86_64 as arch; + + fn adc(c_in: u8, a: u32, b: u32) -> (u8, u32) { + let mut sum = 0; + // SAFETY: There are no safety requirements for calling `_addcarry_u32`. + // It's just unsafe for API consistency with other intrinsics. + let c_out = unsafe { arch::_addcarry_u32(c_in, a, b, &mut sum) }; + (c_out, sum) + } + + pub fn main() { + assert_eq!(adc(1, 1, 1), (0, 3)); + assert_eq!(adc(3, u32::MAX, u32::MAX), (2, 1)); + } +} + #[cfg(target_arch = "x86_64")] mod x86_64 { use core::arch::x86_64 as arch; @@ -17,6 +38,8 @@ mod x86_64 { } fn main() { + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + x86::main(); #[cfg(target_arch = "x86_64")] x86_64::main(); } From 6ec63ed251f25d3c9b3d28321ebacee15d18fdae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eduardo=20S=C3=A1nchez=20Mu=C3=B1oz?= Date: Thu, 21 Sep 2023 20:25:36 +0200 Subject: [PATCH 23/26] Fix the carry semantics of `_addcarry_u32` and `_addcarry_u64` The input carry is an 8-bit value that is interpreted as 1 when it is non-zero. The output carry is an 8-bit value that will be 0 or 1. https://www.intel.com/content/www/us/en/docs/cpp-compiler/developer-guide-reference/2021-8/addcarry-u32-addcarry-u64.html --- src/tools/miri/src/shims/x86/mod.rs | 13 +++++++------ src/tools/miri/tests/pass/intrinsics-x86.rs | 16 ++++++++++++++-- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/src/tools/miri/src/shims/x86/mod.rs b/src/tools/miri/src/shims/x86/mod.rs index 2db2f2e19ffd5..94759eafd3c5b 100644 --- a/src/tools/miri/src/shims/x86/mod.rs +++ b/src/tools/miri/src/shims/x86/mod.rs @@ -26,15 +26,17 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>: let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.").unwrap(); match unprefixed_name { // Used to implement the `_addcarry_u32` and `_addcarry_u64` functions. - // Computes u8+uX+uX (uX is u32 or u64), returning tuple (u8,uX) comprising - // the output carry and truncated sum. + // Computes a + b with input and output carry. The input carry is an 8-bit + // value, which is interpreted as 1 if it is non-zero. The output carry is + // an 8-bit value that will be 0 or 1. + // https://www.intel.com/content/www/us/en/docs/cpp-compiler/developer-guide-reference/2021-8/addcarry-u32-addcarry-u64.html "addcarry.32" | "addcarry.64" => { if unprefixed_name == "addcarry.64" && this.tcx.sess.target.arch != "x86_64" { return Ok(EmulateByNameResult::NotSupported); } let [c_in, a, b] = this.check_shim(abi, Abi::Unadjusted, link_name, args)?; - let c_in = this.read_scalar(c_in)?.to_u8()?; + let c_in = this.read_scalar(c_in)?.to_u8()? != 0; let a = this.read_immediate(a)?; let b = this.read_immediate(b)?; @@ -44,10 +46,9 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>: &sum, &ImmTy::from_uint(c_in, a.layout), )?; - #[allow(clippy::arithmetic_side_effects)] // adding two bools into a u8 - let c_out = u8::from(overflow1) + u8::from(overflow2); + let c_out = overflow1 | overflow2; - this.write_scalar(Scalar::from_u8(c_out), &this.project_field(dest, 0)?)?; + this.write_scalar(Scalar::from_u8(c_out.into()), &this.project_field(dest, 0)?)?; this.write_immediate(*sum, &this.project_field(dest, 1)?)?; } diff --git a/src/tools/miri/tests/pass/intrinsics-x86.rs b/src/tools/miri/tests/pass/intrinsics-x86.rs index 2e947200989fd..377f962bf1456 100644 --- a/src/tools/miri/tests/pass/intrinsics-x86.rs +++ b/src/tools/miri/tests/pass/intrinsics-x86.rs @@ -14,8 +14,14 @@ mod x86 { } pub fn main() { + assert_eq!(adc(0, 1, 1), (0, 2)); assert_eq!(adc(1, 1, 1), (0, 3)); - assert_eq!(adc(3, u32::MAX, u32::MAX), (2, 1)); + assert_eq!(adc(2, 1, 1), (0, 3)); // any non-zero carry acts as 1! + assert_eq!(adc(u8::MAX, 1, 1), (0, 3)); + assert_eq!(adc(0, u32::MAX, u32::MAX), (1, u32::MAX - 1)); + assert_eq!(adc(1, u32::MAX, u32::MAX), (1, u32::MAX)); + assert_eq!(adc(2, u32::MAX, u32::MAX), (1, u32::MAX)); + assert_eq!(adc(u8::MAX, u32::MAX, u32::MAX), (1, u32::MAX)); } } @@ -32,8 +38,14 @@ mod x86_64 { } pub fn main() { + assert_eq!(adc(0, 1, 1), (0, 2)); assert_eq!(adc(1, 1, 1), (0, 3)); - assert_eq!(adc(3, u64::MAX, u64::MAX), (2, 1)); + assert_eq!(adc(2, 1, 1), (0, 3)); // any non-zero carry acts as 1! + assert_eq!(adc(u8::MAX, 1, 1), (0, 3)); + assert_eq!(adc(0, u64::MAX, u64::MAX), (1, u64::MAX - 1)); + assert_eq!(adc(1, u64::MAX, u64::MAX), (1, u64::MAX)); + assert_eq!(adc(2, u64::MAX, u64::MAX), (1, u64::MAX)); + assert_eq!(adc(u8::MAX, u64::MAX, u64::MAX), (1, u64::MAX)); } } From a617b899960d8b4625354b311f71dea5152d65e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eduardo=20S=C3=A1nchez=20Mu=C3=B1oz?= Date: Fri, 22 Sep 2023 16:27:27 +0200 Subject: [PATCH 24/26] Implement `llvm.x86.subborrow.32` and `llvm.x86.subborrow.64` --- src/tools/miri/src/shims/x86/mod.rs | 26 +++++++++++++ src/tools/miri/tests/pass/intrinsics-x86.rs | 42 +++++++++++++++++++++ 2 files changed, 68 insertions(+) diff --git a/src/tools/miri/src/shims/x86/mod.rs b/src/tools/miri/src/shims/x86/mod.rs index 94759eafd3c5b..fbfe00e03dbcb 100644 --- a/src/tools/miri/src/shims/x86/mod.rs +++ b/src/tools/miri/src/shims/x86/mod.rs @@ -51,6 +51,32 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>: this.write_scalar(Scalar::from_u8(c_out.into()), &this.project_field(dest, 0)?)?; this.write_immediate(*sum, &this.project_field(dest, 1)?)?; } + // Used to implement the `_subborrow_u32` and `_subborrow_u64` functions. + // Computes a - b with input and output borrow. The input borrow is an 8-bit + // value, which is interpreted as 1 if it is non-zero. The output borrow is + // an 8-bit value that will be 0 or 1. + // https://www.intel.com/content/www/us/en/docs/cpp-compiler/developer-guide-reference/2021-8/subborrow-u32-subborrow-u64.html + "subborrow.32" | "subborrow.64" => { + if unprefixed_name == "subborrow.64" && this.tcx.sess.target.arch != "x86_64" { + return Ok(EmulateByNameResult::NotSupported); + } + + let [b_in, a, b] = this.check_shim(abi, Abi::Unadjusted, link_name, args)?; + let b_in = this.read_scalar(b_in)?.to_u8()? != 0; + let a = this.read_immediate(a)?; + let b = this.read_immediate(b)?; + + let (sub, overflow1) = this.overflowing_binary_op(mir::BinOp::Sub, &a, &b)?; + let (sub, overflow2) = this.overflowing_binary_op( + mir::BinOp::Sub, + &sub, + &ImmTy::from_uint(b_in, a.layout), + )?; + let b_out = overflow1 | overflow2; + + this.write_scalar(Scalar::from_u8(b_out.into()), &this.project_field(dest, 0)?)?; + this.write_immediate(*sub, &this.project_field(dest, 1)?)?; + } name if name.starts_with("sse.") => { return sse::EvalContextExt::emulate_x86_sse_intrinsic( diff --git a/src/tools/miri/tests/pass/intrinsics-x86.rs b/src/tools/miri/tests/pass/intrinsics-x86.rs index 377f962bf1456..90bcdba4353f9 100644 --- a/src/tools/miri/tests/pass/intrinsics-x86.rs +++ b/src/tools/miri/tests/pass/intrinsics-x86.rs @@ -13,6 +13,14 @@ mod x86 { (c_out, sum) } + fn sbb(b_in: u8, a: u32, b: u32) -> (u8, u32) { + let mut sum = 0; + // SAFETY: There are no safety requirements for calling `_subborrow_u32`. + // It's just unsafe for API consistency with other intrinsics. + let b_out = unsafe { arch::_subborrow_u32(b_in, a, b, &mut sum) }; + (b_out, sum) + } + pub fn main() { assert_eq!(adc(0, 1, 1), (0, 2)); assert_eq!(adc(1, 1, 1), (0, 3)); @@ -22,6 +30,19 @@ mod x86 { assert_eq!(adc(1, u32::MAX, u32::MAX), (1, u32::MAX)); assert_eq!(adc(2, u32::MAX, u32::MAX), (1, u32::MAX)); assert_eq!(adc(u8::MAX, u32::MAX, u32::MAX), (1, u32::MAX)); + + assert_eq!(sbb(0, 1, 1), (0, 0)); + assert_eq!(sbb(1, 1, 1), (1, u32::MAX)); + assert_eq!(sbb(2, 1, 1), (1, u32::MAX)); // any non-zero borrow acts as 1! + assert_eq!(sbb(u8::MAX, 1, 1), (1, u32::MAX)); + assert_eq!(sbb(0, 2, 1), (0, 1)); + assert_eq!(sbb(1, 2, 1), (0, 0)); + assert_eq!(sbb(2, 2, 1), (0, 0)); + assert_eq!(sbb(u8::MAX, 2, 1), (0, 0)); + assert_eq!(sbb(0, 1, 2), (1, u32::MAX)); + assert_eq!(sbb(1, 1, 2), (1, u32::MAX - 1)); + assert_eq!(sbb(2, 1, 2), (1, u32::MAX - 1)); + assert_eq!(sbb(u8::MAX, 1, 2), (1, u32::MAX - 1)); } } @@ -37,6 +58,14 @@ mod x86_64 { (c_out, sum) } + fn sbb(b_in: u8, a: u64, b: u64) -> (u8, u64) { + let mut sum = 0; + // SAFETY: There are no safety requirements for calling `_subborrow_u64`. + // It's just unsafe for API consistency with other intrinsics. + let b_out = unsafe { arch::_subborrow_u64(b_in, a, b, &mut sum) }; + (b_out, sum) + } + pub fn main() { assert_eq!(adc(0, 1, 1), (0, 2)); assert_eq!(adc(1, 1, 1), (0, 3)); @@ -46,6 +75,19 @@ mod x86_64 { assert_eq!(adc(1, u64::MAX, u64::MAX), (1, u64::MAX)); assert_eq!(adc(2, u64::MAX, u64::MAX), (1, u64::MAX)); assert_eq!(adc(u8::MAX, u64::MAX, u64::MAX), (1, u64::MAX)); + + assert_eq!(sbb(0, 1, 1), (0, 0)); + assert_eq!(sbb(1, 1, 1), (1, u64::MAX)); + assert_eq!(sbb(2, 1, 1), (1, u64::MAX)); // any non-zero borrow acts as 1! + assert_eq!(sbb(u8::MAX, 1, 1), (1, u64::MAX)); + assert_eq!(sbb(0, 2, 1), (0, 1)); + assert_eq!(sbb(1, 2, 1), (0, 0)); + assert_eq!(sbb(2, 2, 1), (0, 0)); + assert_eq!(sbb(u8::MAX, 2, 1), (0, 0)); + assert_eq!(sbb(0, 1, 2), (1, u64::MAX)); + assert_eq!(sbb(1, 1, 2), (1, u64::MAX - 1)); + assert_eq!(sbb(2, 1, 2), (1, u64::MAX - 1)); + assert_eq!(sbb(u8::MAX, 1, 2), (1, u64::MAX - 1)); } } From 4625e1e8ab529ad5393a467dc08c0e5e5b908da9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eduardo=20S=C3=A1nchez=20Mu=C3=B1oz?= Date: Sun, 24 Sep 2023 15:44:52 +0200 Subject: [PATCH 25/26] Use `cfg` gates in x86 SSE and SSE2 tests --- .../miri/tests/pass/intrinsics-x86-sse.rs | 2019 +++++++++-------- .../miri/tests/pass/intrinsics-x86-sse2.rs | 1495 ++++++------ 2 files changed, 1762 insertions(+), 1752 deletions(-) diff --git a/src/tools/miri/tests/pass/intrinsics-x86-sse.rs b/src/tools/miri/tests/pass/intrinsics-x86-sse.rs index 9b1ded94b5d40..a62a5ee3781b7 100644 --- a/src/tools/miri/tests/pass/intrinsics-x86-sse.rs +++ b/src/tools/miri/tests/pass/intrinsics-x86-sse.rs @@ -1,1094 +1,1107 @@ -// Ignore everything except x86 and x86_64 -// Any additional target are added to CI should be ignored here -//@ignore-target-aarch64 -//@ignore-target-arm -//@ignore-target-avr -//@ignore-target-s390x -//@ignore-target-thumbv7em -//@ignore-target-wasm32 - -#[cfg(target_arch = "x86")] -use std::arch::x86::*; -#[cfg(target_arch = "x86_64")] -use std::arch::x86_64::*; -use std::f32::NAN; -use std::mem::transmute; - fn main() { - assert!(is_x86_feature_detected!("sse")); + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + assert!(is_x86_feature_detected!("sse")); - unsafe { - test_sse(); + unsafe { + tests::test_sse(); + } } } -macro_rules! assert_approx_eq { - ($a:expr, $b:expr, $eps:expr) => {{ - let (a, b) = (&$a, &$b); - assert!( - (*a - *b).abs() < $eps, - "assertion failed: `(left !== right)` \ +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +mod tests { + #[cfg(target_arch = "x86")] + use std::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use std::arch::x86_64::*; + use std::f32::NAN; + use std::mem::transmute; + + macro_rules! assert_approx_eq { + ($a:expr, $b:expr, $eps:expr) => {{ + let (a, b) = (&$a, &$b); + assert!( + (*a - *b).abs() < $eps, + "assertion failed: `(left !== right)` \ (left: `{:?}`, right: `{:?}`, expect diff: `{:?}`, real diff: `{:?}`)", - *a, - *b, - $eps, - (*a - *b).abs() - ); - }}; -} - -#[target_feature(enable = "sse")] -unsafe fn test_sse() { - // Mostly copied from library/stdarch/crates/core_arch/src/x86{,_64}/sse.rs - - #[target_feature(enable = "sse")] - unsafe fn assert_eq_m128(a: __m128, b: __m128) { - let r = _mm_cmpeq_ps(a, b); - if _mm_movemask_ps(r) != 0b1111 { - panic!("{:?} != {:?}", a, b); - } - } - - #[target_feature(enable = "sse")] - unsafe fn test_mm_add_ss() { - let a = _mm_set_ps(-1.0, 5.0, 0.0, -10.0); - let b = _mm_set_ps(-100.0, 20.0, 0.0, -5.0); - let r = _mm_add_ss(a, b); - assert_eq_m128(r, _mm_set_ps(-1.0, 5.0, 0.0, -15.0)); - } - test_mm_add_ss(); - - #[target_feature(enable = "sse")] - unsafe fn test_mm_sub_ss() { - let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); - let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); - let r = _mm_sub_ss(a, b); - assert_eq_m128(r, _mm_setr_ps(99.0, 5.0, 0.0, -10.0)); - } - test_mm_sub_ss(); - - #[target_feature(enable = "sse")] - unsafe fn test_mm_mul_ss() { - let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); - let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); - let r = _mm_mul_ss(a, b); - assert_eq_m128(r, _mm_setr_ps(100.0, 5.0, 0.0, -10.0)); - } - test_mm_mul_ss(); - - #[target_feature(enable = "sse")] - unsafe fn test_mm_div_ss() { - let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); - let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); - let r = _mm_div_ss(a, b); - assert_eq_m128(r, _mm_setr_ps(0.01, 5.0, 0.0, -10.0)); - } - test_mm_div_ss(); - - #[target_feature(enable = "sse")] - unsafe fn test_mm_sqrt_ss() { - let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0); - let r = _mm_sqrt_ss(a); - let e = _mm_setr_ps(2.0, 13.0, 16.0, 100.0); - assert_eq_m128(r, e); - } - test_mm_sqrt_ss(); - - #[target_feature(enable = "sse")] - unsafe fn test_mm_sqrt_ps() { - let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0); - let r = _mm_sqrt_ps(a); - let e = _mm_setr_ps(2.0, 3.6055512, 4.0, 10.0); - assert_eq_m128(r, e); + *a, + *b, + $eps, + (*a - *b).abs() + ); + }}; } - test_mm_sqrt_ps(); #[target_feature(enable = "sse")] - unsafe fn test_mm_rcp_ss() { - let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0); - let r = _mm_rcp_ss(a); - let e = _mm_setr_ps(0.24993896, 13.0, 16.0, 100.0); - let rel_err = 0.00048828125; - - let r: [f32; 4] = transmute(r); - let e: [f32; 4] = transmute(e); - assert_approx_eq!(r[0], e[0], 2. * rel_err); - for i in 1..4 { - assert_eq!(r[i], e[i]); + pub(super) unsafe fn test_sse() { + // Mostly copied from library/stdarch/crates/core_arch/src/x86{,_64}/sse.rs + + #[target_feature(enable = "sse")] + unsafe fn assert_eq_m128(a: __m128, b: __m128) { + let r = _mm_cmpeq_ps(a, b); + if _mm_movemask_ps(r) != 0b1111 { + panic!("{:?} != {:?}", a, b); + } } - } - test_mm_rcp_ss(); - #[target_feature(enable = "sse")] - unsafe fn test_mm_rcp_ps() { - let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0); - let r = _mm_rcp_ps(a); - let e = _mm_setr_ps(0.24993896, 0.0769043, 0.06248474, 0.0099983215); - let rel_err = 0.00048828125; - - let r: [f32; 4] = transmute(r); - let e: [f32; 4] = transmute(e); - for i in 0..4 { - assert_approx_eq!(r[i], e[i], 2. * rel_err); + #[target_feature(enable = "sse")] + unsafe fn test_mm_add_ss() { + let a = _mm_set_ps(-1.0, 5.0, 0.0, -10.0); + let b = _mm_set_ps(-100.0, 20.0, 0.0, -5.0); + let r = _mm_add_ss(a, b); + assert_eq_m128(r, _mm_set_ps(-1.0, 5.0, 0.0, -15.0)); } - } - test_mm_rcp_ps(); - - #[target_feature(enable = "sse")] - unsafe fn test_mm_rsqrt_ss() { - let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0); - let r = _mm_rsqrt_ss(a); - let e = _mm_setr_ps(0.49987793, 13.0, 16.0, 100.0); - let rel_err = 0.00048828125; - - let r: [f32; 4] = transmute(r); - let e: [f32; 4] = transmute(e); - assert_approx_eq!(r[0], e[0], 2. * rel_err); - for i in 1..4 { - assert_eq!(r[i], e[i]); + test_mm_add_ss(); + + #[target_feature(enable = "sse")] + unsafe fn test_mm_sub_ss() { + let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); + let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); + let r = _mm_sub_ss(a, b); + assert_eq_m128(r, _mm_setr_ps(99.0, 5.0, 0.0, -10.0)); } - } - test_mm_rsqrt_ss(); - - #[target_feature(enable = "sse")] - unsafe fn test_mm_rsqrt_ps() { - let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0); - let r = _mm_rsqrt_ps(a); - let e = _mm_setr_ps(0.49987793, 0.2772827, 0.24993896, 0.099990845); - let rel_err = 0.00048828125; - - let r: [f32; 4] = transmute(r); - let e: [f32; 4] = transmute(e); - for i in 0..4 { - assert_approx_eq!(r[i], e[i], 2. * rel_err); + test_mm_sub_ss(); + + #[target_feature(enable = "sse")] + unsafe fn test_mm_mul_ss() { + let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); + let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); + let r = _mm_mul_ss(a, b); + assert_eq_m128(r, _mm_setr_ps(100.0, 5.0, 0.0, -10.0)); } - } - test_mm_rsqrt_ps(); - - #[target_feature(enable = "sse")] - unsafe fn test_mm_min_ss() { - let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); - let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); - let r = _mm_min_ss(a, b); - assert_eq_m128(r, _mm_setr_ps(-100.0, 5.0, 0.0, -10.0)); - } - test_mm_min_ss(); - - #[target_feature(enable = "sse")] - unsafe fn test_mm_min_ps() { - let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); - let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); - let r = _mm_min_ps(a, b); - assert_eq_m128(r, _mm_setr_ps(-100.0, 5.0, 0.0, -10.0)); - - // `_mm_min_ps` can **not** be implemented using the `simd_min` rust intrinsic because - // the semantics of `simd_min` are different to those of `_mm_min_ps` regarding handling - // of `-0.0`. - let a = _mm_setr_ps(-0.0, 0.0, 0.0, 0.0); - let b = _mm_setr_ps(0.0, 0.0, 0.0, 0.0); - let r1: [u8; 16] = transmute(_mm_min_ps(a, b)); - let r2: [u8; 16] = transmute(_mm_min_ps(b, a)); - let a: [u8; 16] = transmute(a); - let b: [u8; 16] = transmute(b); - assert_eq!(r1, b); - assert_eq!(r2, a); - assert_ne!(a, b); // sanity check that -0.0 is actually present - } - test_mm_min_ps(); - - #[target_feature(enable = "sse")] - unsafe fn test_mm_max_ss() { - let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); - let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); - let r = _mm_max_ss(a, b); - assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, -10.0)); - } - test_mm_max_ss(); - - #[target_feature(enable = "sse")] - unsafe fn test_mm_max_ps() { - let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); - let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); - let r = _mm_max_ps(a, b); - assert_eq_m128(r, _mm_setr_ps(-1.0, 20.0, 0.0, -5.0)); - - // `_mm_max_ps` can **not** be implemented using the `simd_max` rust intrinsic because - // the semantics of `simd_max` are different to those of `_mm_max_ps` regarding handling - // of `-0.0`. - let a = _mm_setr_ps(-0.0, 0.0, 0.0, 0.0); - let b = _mm_setr_ps(0.0, 0.0, 0.0, 0.0); - let r1: [u8; 16] = transmute(_mm_max_ps(a, b)); - let r2: [u8; 16] = transmute(_mm_max_ps(b, a)); - let a: [u8; 16] = transmute(a); - let b: [u8; 16] = transmute(b); - assert_eq!(r1, b); - assert_eq!(r2, a); - assert_ne!(a, b); // sanity check that -0.0 is actually present - } - test_mm_max_ps(); - - #[target_feature(enable = "sse")] - unsafe fn test_mm_cmpeq_ss() { - let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); - let b = _mm_setr_ps(-1.0, 5.0, 6.0, 7.0); - let r: [u32; 4] = transmute(_mm_cmpeq_ss(a, b)); - let e: [u32; 4] = transmute(_mm_setr_ps(transmute(0u32), 2.0, 3.0, 4.0)); - assert_eq!(r, e); - - let b2 = _mm_setr_ps(1.0, 5.0, 6.0, 7.0); - let r2: [u32; 4] = transmute(_mm_cmpeq_ss(a, b2)); - let e2: [u32; 4] = transmute(_mm_setr_ps(transmute(0xffffffffu32), 2.0, 3.0, 4.0)); - assert_eq!(r2, e2); - } - test_mm_cmpeq_ss(); - - #[target_feature(enable = "sse")] - unsafe fn test_mm_cmplt_ss() { - let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); - let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0); - let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0); - let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0); - - let b1 = 0u32; // a.extract(0) < b.extract(0) - let c1 = 0u32; // a.extract(0) < c.extract(0) - let d1 = !0u32; // a.extract(0) < d.extract(0) - - let rb: [u32; 4] = transmute(_mm_cmplt_ss(a, b)); - let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0)); - assert_eq!(rb, eb); - - let rc: [u32; 4] = transmute(_mm_cmplt_ss(a, c)); - let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0)); - assert_eq!(rc, ec); - - let rd: [u32; 4] = transmute(_mm_cmplt_ss(a, d)); - let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0)); - assert_eq!(rd, ed); - } - test_mm_cmplt_ss(); - - #[target_feature(enable = "sse")] - unsafe fn test_mm_cmple_ss() { - let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); - let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0); - let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0); - let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0); - - let b1 = 0u32; // a.extract(0) <= b.extract(0) - let c1 = !0u32; // a.extract(0) <= c.extract(0) - let d1 = !0u32; // a.extract(0) <= d.extract(0) - - let rb: [u32; 4] = transmute(_mm_cmple_ss(a, b)); - let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0)); - assert_eq!(rb, eb); - - let rc: [u32; 4] = transmute(_mm_cmple_ss(a, c)); - let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0)); - assert_eq!(rc, ec); - - let rd: [u32; 4] = transmute(_mm_cmple_ss(a, d)); - let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0)); - assert_eq!(rd, ed); - } - test_mm_cmple_ss(); - - #[target_feature(enable = "sse")] - unsafe fn test_mm_cmpgt_ss() { - let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); - let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0); - let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0); - let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0); - - let b1 = !0u32; // a.extract(0) > b.extract(0) - let c1 = 0u32; // a.extract(0) > c.extract(0) - let d1 = 0u32; // a.extract(0) > d.extract(0) - - let rb: [u32; 4] = transmute(_mm_cmpgt_ss(a, b)); - let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0)); - assert_eq!(rb, eb); - - let rc: [u32; 4] = transmute(_mm_cmpgt_ss(a, c)); - let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0)); - assert_eq!(rc, ec); - - let rd: [u32; 4] = transmute(_mm_cmpgt_ss(a, d)); - let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0)); - assert_eq!(rd, ed); - } - test_mm_cmpgt_ss(); - - #[target_feature(enable = "sse")] - unsafe fn test_mm_cmpge_ss() { - let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); - let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0); - let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0); - let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0); - - let b1 = !0u32; // a.extract(0) >= b.extract(0) - let c1 = !0u32; // a.extract(0) >= c.extract(0) - let d1 = 0u32; // a.extract(0) >= d.extract(0) - - let rb: [u32; 4] = transmute(_mm_cmpge_ss(a, b)); - let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0)); - assert_eq!(rb, eb); - - let rc: [u32; 4] = transmute(_mm_cmpge_ss(a, c)); - let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0)); - assert_eq!(rc, ec); - - let rd: [u32; 4] = transmute(_mm_cmpge_ss(a, d)); - let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0)); - assert_eq!(rd, ed); - } - test_mm_cmpge_ss(); - - #[target_feature(enable = "sse")] - unsafe fn test_mm_cmpneq_ss() { - let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); - let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0); - let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0); - let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0); - - let b1 = !0u32; // a.extract(0) != b.extract(0) - let c1 = 0u32; // a.extract(0) != c.extract(0) - let d1 = !0u32; // a.extract(0) != d.extract(0) - - let rb: [u32; 4] = transmute(_mm_cmpneq_ss(a, b)); - let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0)); - assert_eq!(rb, eb); - - let rc: [u32; 4] = transmute(_mm_cmpneq_ss(a, c)); - let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0)); - assert_eq!(rc, ec); - - let rd: [u32; 4] = transmute(_mm_cmpneq_ss(a, d)); - let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0)); - assert_eq!(rd, ed); - } - test_mm_cmpneq_ss(); - - #[target_feature(enable = "sse")] - unsafe fn test_mm_cmpnlt_ss() { - // TODO: this test is exactly the same as for `_mm_cmpge_ss`, but there - // must be a difference. It may have to do with behavior in the - // presence of NaNs (signaling or quiet). If so, we should add tests - // for those. - - let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); - let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0); - let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0); - let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0); - - let b1 = !0u32; // a.extract(0) >= b.extract(0) - let c1 = !0u32; // a.extract(0) >= c.extract(0) - let d1 = 0u32; // a.extract(0) >= d.extract(0) - - let rb: [u32; 4] = transmute(_mm_cmpnlt_ss(a, b)); - let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0)); - assert_eq!(rb, eb); - - let rc: [u32; 4] = transmute(_mm_cmpnlt_ss(a, c)); - let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0)); - assert_eq!(rc, ec); - - let rd: [u32; 4] = transmute(_mm_cmpnlt_ss(a, d)); - let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0)); - assert_eq!(rd, ed); - } - test_mm_cmpnlt_ss(); - - #[target_feature(enable = "sse")] - unsafe fn test_mm_cmpnle_ss() { - // TODO: this test is exactly the same as for `_mm_cmpgt_ss`, but there - // must be a difference. It may have to do with behavior in the - // presence - // of NaNs (signaling or quiet). If so, we should add tests for those. - - let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); - let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0); - let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0); - let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0); - - let b1 = !0u32; // a.extract(0) > b.extract(0) - let c1 = 0u32; // a.extract(0) > c.extract(0) - let d1 = 0u32; // a.extract(0) > d.extract(0) - - let rb: [u32; 4] = transmute(_mm_cmpnle_ss(a, b)); - let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0)); - assert_eq!(rb, eb); - - let rc: [u32; 4] = transmute(_mm_cmpnle_ss(a, c)); - let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0)); - assert_eq!(rc, ec); - - let rd: [u32; 4] = transmute(_mm_cmpnle_ss(a, d)); - let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0)); - assert_eq!(rd, ed); - } - test_mm_cmpnle_ss(); - - #[target_feature(enable = "sse")] - unsafe fn test_mm_cmpngt_ss() { - // TODO: this test is exactly the same as for `_mm_cmple_ss`, but there - // must be a difference. It may have to do with behavior in the - // presence of NaNs (signaling or quiet). If so, we should add tests - // for those. - - let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); - let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0); - let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0); - let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0); - - let b1 = 0u32; // a.extract(0) <= b.extract(0) - let c1 = !0u32; // a.extract(0) <= c.extract(0) - let d1 = !0u32; // a.extract(0) <= d.extract(0) - - let rb: [u32; 4] = transmute(_mm_cmpngt_ss(a, b)); - let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0)); - assert_eq!(rb, eb); - - let rc: [u32; 4] = transmute(_mm_cmpngt_ss(a, c)); - let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0)); - assert_eq!(rc, ec); - - let rd: [u32; 4] = transmute(_mm_cmpngt_ss(a, d)); - let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0)); - assert_eq!(rd, ed); - } - test_mm_cmpngt_ss(); - - #[target_feature(enable = "sse")] - unsafe fn test_mm_cmpnge_ss() { - // TODO: this test is exactly the same as for `_mm_cmplt_ss`, but there - // must be a difference. It may have to do with behavior in the - // presence of NaNs (signaling or quiet). If so, we should add tests - // for those. - - let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); - let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0); - let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0); - let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0); - - let b1 = 0u32; // a.extract(0) < b.extract(0) - let c1 = 0u32; // a.extract(0) < c.extract(0) - let d1 = !0u32; // a.extract(0) < d.extract(0) - - let rb: [u32; 4] = transmute(_mm_cmpnge_ss(a, b)); - let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0)); - assert_eq!(rb, eb); - - let rc: [u32; 4] = transmute(_mm_cmpnge_ss(a, c)); - let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0)); - assert_eq!(rc, ec); - - let rd: [u32; 4] = transmute(_mm_cmpnge_ss(a, d)); - let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0)); - assert_eq!(rd, ed); - } - test_mm_cmpnge_ss(); - - #[target_feature(enable = "sse")] - unsafe fn test_mm_cmpord_ss() { - let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); - let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0); - let c = _mm_setr_ps(NAN, 5.0, 6.0, 7.0); - let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0); - - let b1 = !0u32; // a.extract(0) ord b.extract(0) - let c1 = 0u32; // a.extract(0) ord c.extract(0) - let d1 = !0u32; // a.extract(0) ord d.extract(0) - - let rb: [u32; 4] = transmute(_mm_cmpord_ss(a, b)); - let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0)); - assert_eq!(rb, eb); - - let rc: [u32; 4] = transmute(_mm_cmpord_ss(a, c)); - let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0)); - assert_eq!(rc, ec); - - let rd: [u32; 4] = transmute(_mm_cmpord_ss(a, d)); - let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0)); - assert_eq!(rd, ed); - } - test_mm_cmpord_ss(); - - #[target_feature(enable = "sse")] - unsafe fn test_mm_cmpunord_ss() { - let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); - let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0); - let c = _mm_setr_ps(NAN, 5.0, 6.0, 7.0); - let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0); - - let b1 = 0u32; // a.extract(0) unord b.extract(0) - let c1 = !0u32; // a.extract(0) unord c.extract(0) - let d1 = 0u32; // a.extract(0) unord d.extract(0) - - let rb: [u32; 4] = transmute(_mm_cmpunord_ss(a, b)); - let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0)); - assert_eq!(rb, eb); - - let rc: [u32; 4] = transmute(_mm_cmpunord_ss(a, c)); - let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0)); - assert_eq!(rc, ec); - - let rd: [u32; 4] = transmute(_mm_cmpunord_ss(a, d)); - let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0)); - assert_eq!(rd, ed); - } - test_mm_cmpunord_ss(); - - #[target_feature(enable = "sse")] - unsafe fn test_mm_cmpeq_ps() { - let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN); - let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN); - let tru = !0u32; - let fls = 0u32; - - let e = [fls, fls, tru, fls]; - let r: [u32; 4] = transmute(_mm_cmpeq_ps(a, b)); - assert_eq!(r, e); - } - test_mm_cmpeq_ps(); - - #[target_feature(enable = "sse")] - unsafe fn test_mm_cmplt_ps() { - let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN); - let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN); - let tru = !0u32; - let fls = 0u32; - - let e = [tru, fls, fls, fls]; - let r: [u32; 4] = transmute(_mm_cmplt_ps(a, b)); - assert_eq!(r, e); - } - test_mm_cmplt_ps(); - - #[target_feature(enable = "sse")] - unsafe fn test_mm_cmple_ps() { - let a = _mm_setr_ps(10.0, 50.0, 1.0, 4.0); - let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN); - let tru = !0u32; - let fls = 0u32; - - let e = [tru, fls, tru, fls]; - let r: [u32; 4] = transmute(_mm_cmple_ps(a, b)); - assert_eq!(r, e); - } - test_mm_cmple_ps(); - - #[target_feature(enable = "sse")] - unsafe fn test_mm_cmpgt_ps() { - let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN); - let b = _mm_setr_ps(15.0, 20.0, 1.0, 42.0); - let tru = !0u32; - let fls = 0u32; - - let e = [fls, tru, fls, fls]; - let r: [u32; 4] = transmute(_mm_cmpgt_ps(a, b)); - assert_eq!(r, e); - } - test_mm_cmpgt_ps(); - - #[target_feature(enable = "sse")] - unsafe fn test_mm_cmpge_ps() { - let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN); - let b = _mm_setr_ps(15.0, 20.0, 1.0, 42.0); - let tru = !0u32; - let fls = 0u32; - - let e = [fls, tru, tru, fls]; - let r: [u32; 4] = transmute(_mm_cmpge_ps(a, b)); - assert_eq!(r, e); - } - test_mm_cmpge_ps(); - - #[target_feature(enable = "sse")] - unsafe fn test_mm_cmpneq_ps() { - let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN); - let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN); - let tru = !0u32; - let fls = 0u32; - - let e = [tru, tru, fls, tru]; - let r: [u32; 4] = transmute(_mm_cmpneq_ps(a, b)); - assert_eq!(r, e); - } - test_mm_cmpneq_ps(); - - #[target_feature(enable = "sse")] - unsafe fn test_mm_cmpnlt_ps() { - let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN); - let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0); - let tru = !0u32; - let fls = 0u32; - - let e = [fls, tru, tru, tru]; - let r: [u32; 4] = transmute(_mm_cmpnlt_ps(a, b)); - assert_eq!(r, e); - } - test_mm_cmpnlt_ps(); - - #[target_feature(enable = "sse")] - unsafe fn test_mm_cmpnle_ps() { - let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN); - let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0); - let tru = !0u32; - let fls = 0u32; - - let e = [fls, tru, fls, tru]; - let r: [u32; 4] = transmute(_mm_cmpnle_ps(a, b)); - assert_eq!(r, e); - } - test_mm_cmpnle_ps(); - - #[target_feature(enable = "sse")] - unsafe fn test_mm_cmpngt_ps() { - let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN); - let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0); - let tru = !0u32; - let fls = 0u32; - - let e = [tru, fls, tru, tru]; - let r: [u32; 4] = transmute(_mm_cmpngt_ps(a, b)); - assert_eq!(r, e); - } - test_mm_cmpngt_ps(); - - #[target_feature(enable = "sse")] - unsafe fn test_mm_cmpnge_ps() { - let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN); - let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0); - let tru = !0u32; - let fls = 0u32; - - let e = [tru, fls, fls, tru]; - let r: [u32; 4] = transmute(_mm_cmpnge_ps(a, b)); - assert_eq!(r, e); - } - test_mm_cmpnge_ps(); - - #[target_feature(enable = "sse")] - unsafe fn test_mm_cmpord_ps() { - let a = _mm_setr_ps(10.0, 50.0, NAN, NAN); - let b = _mm_setr_ps(15.0, NAN, 1.0, NAN); - let tru = !0u32; - let fls = 0u32; - - let e = [tru, fls, fls, fls]; - let r: [u32; 4] = transmute(_mm_cmpord_ps(a, b)); - assert_eq!(r, e); - } - test_mm_cmpord_ps(); - - #[target_feature(enable = "sse")] - unsafe fn test_mm_cmpunord_ps() { - let a = _mm_setr_ps(10.0, 50.0, NAN, NAN); - let b = _mm_setr_ps(15.0, NAN, 1.0, NAN); - let tru = !0u32; - let fls = 0u32; - - let e = [fls, tru, tru, tru]; - let r: [u32; 4] = transmute(_mm_cmpunord_ps(a, b)); - assert_eq!(r, e); - } - test_mm_cmpunord_ps(); + test_mm_mul_ss(); + + #[target_feature(enable = "sse")] + unsafe fn test_mm_div_ss() { + let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); + let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); + let r = _mm_div_ss(a, b); + assert_eq_m128(r, _mm_setr_ps(0.01, 5.0, 0.0, -10.0)); + } + test_mm_div_ss(); + + #[target_feature(enable = "sse")] + unsafe fn test_mm_sqrt_ss() { + let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0); + let r = _mm_sqrt_ss(a); + let e = _mm_setr_ps(2.0, 13.0, 16.0, 100.0); + assert_eq_m128(r, e); + } + test_mm_sqrt_ss(); + + #[target_feature(enable = "sse")] + unsafe fn test_mm_sqrt_ps() { + let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0); + let r = _mm_sqrt_ps(a); + let e = _mm_setr_ps(2.0, 3.6055512, 4.0, 10.0); + assert_eq_m128(r, e); + } + test_mm_sqrt_ps(); + + #[target_feature(enable = "sse")] + unsafe fn test_mm_rcp_ss() { + let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0); + let r = _mm_rcp_ss(a); + let e = _mm_setr_ps(0.24993896, 13.0, 16.0, 100.0); + let rel_err = 0.00048828125; + + let r: [f32; 4] = transmute(r); + let e: [f32; 4] = transmute(e); + assert_approx_eq!(r[0], e[0], 2. * rel_err); + for i in 1..4 { + assert_eq!(r[i], e[i]); + } + } + test_mm_rcp_ss(); + + #[target_feature(enable = "sse")] + unsafe fn test_mm_rcp_ps() { + let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0); + let r = _mm_rcp_ps(a); + let e = _mm_setr_ps(0.24993896, 0.0769043, 0.06248474, 0.0099983215); + let rel_err = 0.00048828125; + + let r: [f32; 4] = transmute(r); + let e: [f32; 4] = transmute(e); + for i in 0..4 { + assert_approx_eq!(r[i], e[i], 2. * rel_err); + } + } + test_mm_rcp_ps(); + + #[target_feature(enable = "sse")] + unsafe fn test_mm_rsqrt_ss() { + let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0); + let r = _mm_rsqrt_ss(a); + let e = _mm_setr_ps(0.49987793, 13.0, 16.0, 100.0); + let rel_err = 0.00048828125; + + let r: [f32; 4] = transmute(r); + let e: [f32; 4] = transmute(e); + assert_approx_eq!(r[0], e[0], 2. * rel_err); + for i in 1..4 { + assert_eq!(r[i], e[i]); + } + } + test_mm_rsqrt_ss(); + + #[target_feature(enable = "sse")] + unsafe fn test_mm_rsqrt_ps() { + let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0); + let r = _mm_rsqrt_ps(a); + let e = _mm_setr_ps(0.49987793, 0.2772827, 0.24993896, 0.099990845); + let rel_err = 0.00048828125; + + let r: [f32; 4] = transmute(r); + let e: [f32; 4] = transmute(e); + for i in 0..4 { + assert_approx_eq!(r[i], e[i], 2. * rel_err); + } + } + test_mm_rsqrt_ps(); + + #[target_feature(enable = "sse")] + unsafe fn test_mm_min_ss() { + let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); + let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); + let r = _mm_min_ss(a, b); + assert_eq_m128(r, _mm_setr_ps(-100.0, 5.0, 0.0, -10.0)); + } + test_mm_min_ss(); + + #[target_feature(enable = "sse")] + unsafe fn test_mm_min_ps() { + let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); + let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); + let r = _mm_min_ps(a, b); + assert_eq_m128(r, _mm_setr_ps(-100.0, 5.0, 0.0, -10.0)); + + // `_mm_min_ps` can **not** be implemented using the `simd_min` rust intrinsic because + // the semantics of `simd_min` are different to those of `_mm_min_ps` regarding handling + // of `-0.0`. + let a = _mm_setr_ps(-0.0, 0.0, 0.0, 0.0); + let b = _mm_setr_ps(0.0, 0.0, 0.0, 0.0); + let r1: [u8; 16] = transmute(_mm_min_ps(a, b)); + let r2: [u8; 16] = transmute(_mm_min_ps(b, a)); + let a: [u8; 16] = transmute(a); + let b: [u8; 16] = transmute(b); + assert_eq!(r1, b); + assert_eq!(r2, a); + assert_ne!(a, b); // sanity check that -0.0 is actually present + } + test_mm_min_ps(); + + #[target_feature(enable = "sse")] + unsafe fn test_mm_max_ss() { + let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); + let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); + let r = _mm_max_ss(a, b); + assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, -10.0)); + } + test_mm_max_ss(); + + #[target_feature(enable = "sse")] + unsafe fn test_mm_max_ps() { + let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); + let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); + let r = _mm_max_ps(a, b); + assert_eq_m128(r, _mm_setr_ps(-1.0, 20.0, 0.0, -5.0)); + + // `_mm_max_ps` can **not** be implemented using the `simd_max` rust intrinsic because + // the semantics of `simd_max` are different to those of `_mm_max_ps` regarding handling + // of `-0.0`. + let a = _mm_setr_ps(-0.0, 0.0, 0.0, 0.0); + let b = _mm_setr_ps(0.0, 0.0, 0.0, 0.0); + let r1: [u8; 16] = transmute(_mm_max_ps(a, b)); + let r2: [u8; 16] = transmute(_mm_max_ps(b, a)); + let a: [u8; 16] = transmute(a); + let b: [u8; 16] = transmute(b); + assert_eq!(r1, b); + assert_eq!(r2, a); + assert_ne!(a, b); // sanity check that -0.0 is actually present + } + test_mm_max_ps(); + + #[target_feature(enable = "sse")] + unsafe fn test_mm_cmpeq_ss() { + let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let b = _mm_setr_ps(-1.0, 5.0, 6.0, 7.0); + let r: [u32; 4] = transmute(_mm_cmpeq_ss(a, b)); + let e: [u32; 4] = transmute(_mm_setr_ps(transmute(0u32), 2.0, 3.0, 4.0)); + assert_eq!(r, e); + + let b2 = _mm_setr_ps(1.0, 5.0, 6.0, 7.0); + let r2: [u32; 4] = transmute(_mm_cmpeq_ss(a, b2)); + let e2: [u32; 4] = transmute(_mm_setr_ps(transmute(0xffffffffu32), 2.0, 3.0, 4.0)); + assert_eq!(r2, e2); + } + test_mm_cmpeq_ss(); + + #[target_feature(enable = "sse")] + unsafe fn test_mm_cmplt_ss() { + let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0); + let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0); + let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0); + + let b1 = 0u32; // a.extract(0) < b.extract(0) + let c1 = 0u32; // a.extract(0) < c.extract(0) + let d1 = !0u32; // a.extract(0) < d.extract(0) + + let rb: [u32; 4] = transmute(_mm_cmplt_ss(a, b)); + let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0)); + assert_eq!(rb, eb); + + let rc: [u32; 4] = transmute(_mm_cmplt_ss(a, c)); + let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0)); + assert_eq!(rc, ec); + + let rd: [u32; 4] = transmute(_mm_cmplt_ss(a, d)); + let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0)); + assert_eq!(rd, ed); + } + test_mm_cmplt_ss(); + + #[target_feature(enable = "sse")] + unsafe fn test_mm_cmple_ss() { + let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0); + let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0); + let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0); + + let b1 = 0u32; // a.extract(0) <= b.extract(0) + let c1 = !0u32; // a.extract(0) <= c.extract(0) + let d1 = !0u32; // a.extract(0) <= d.extract(0) + + let rb: [u32; 4] = transmute(_mm_cmple_ss(a, b)); + let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0)); + assert_eq!(rb, eb); + + let rc: [u32; 4] = transmute(_mm_cmple_ss(a, c)); + let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0)); + assert_eq!(rc, ec); + + let rd: [u32; 4] = transmute(_mm_cmple_ss(a, d)); + let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0)); + assert_eq!(rd, ed); + } + test_mm_cmple_ss(); + + #[target_feature(enable = "sse")] + unsafe fn test_mm_cmpgt_ss() { + let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0); + let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0); + let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0); + + let b1 = !0u32; // a.extract(0) > b.extract(0) + let c1 = 0u32; // a.extract(0) > c.extract(0) + let d1 = 0u32; // a.extract(0) > d.extract(0) + + let rb: [u32; 4] = transmute(_mm_cmpgt_ss(a, b)); + let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0)); + assert_eq!(rb, eb); + + let rc: [u32; 4] = transmute(_mm_cmpgt_ss(a, c)); + let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0)); + assert_eq!(rc, ec); + + let rd: [u32; 4] = transmute(_mm_cmpgt_ss(a, d)); + let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0)); + assert_eq!(rd, ed); + } + test_mm_cmpgt_ss(); + + #[target_feature(enable = "sse")] + unsafe fn test_mm_cmpge_ss() { + let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0); + let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0); + let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0); + + let b1 = !0u32; // a.extract(0) >= b.extract(0) + let c1 = !0u32; // a.extract(0) >= c.extract(0) + let d1 = 0u32; // a.extract(0) >= d.extract(0) + + let rb: [u32; 4] = transmute(_mm_cmpge_ss(a, b)); + let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0)); + assert_eq!(rb, eb); + + let rc: [u32; 4] = transmute(_mm_cmpge_ss(a, c)); + let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0)); + assert_eq!(rc, ec); + + let rd: [u32; 4] = transmute(_mm_cmpge_ss(a, d)); + let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0)); + assert_eq!(rd, ed); + } + test_mm_cmpge_ss(); + + #[target_feature(enable = "sse")] + unsafe fn test_mm_cmpneq_ss() { + let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0); + let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0); + let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0); + + let b1 = !0u32; // a.extract(0) != b.extract(0) + let c1 = 0u32; // a.extract(0) != c.extract(0) + let d1 = !0u32; // a.extract(0) != d.extract(0) + + let rb: [u32; 4] = transmute(_mm_cmpneq_ss(a, b)); + let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0)); + assert_eq!(rb, eb); + + let rc: [u32; 4] = transmute(_mm_cmpneq_ss(a, c)); + let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0)); + assert_eq!(rc, ec); + + let rd: [u32; 4] = transmute(_mm_cmpneq_ss(a, d)); + let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0)); + assert_eq!(rd, ed); + } + test_mm_cmpneq_ss(); + + #[target_feature(enable = "sse")] + unsafe fn test_mm_cmpnlt_ss() { + // TODO: this test is exactly the same as for `_mm_cmpge_ss`, but there + // must be a difference. It may have to do with behavior in the + // presence of NaNs (signaling or quiet). If so, we should add tests + // for those. + + let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0); + let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0); + let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0); + + let b1 = !0u32; // a.extract(0) >= b.extract(0) + let c1 = !0u32; // a.extract(0) >= c.extract(0) + let d1 = 0u32; // a.extract(0) >= d.extract(0) + + let rb: [u32; 4] = transmute(_mm_cmpnlt_ss(a, b)); + let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0)); + assert_eq!(rb, eb); + + let rc: [u32; 4] = transmute(_mm_cmpnlt_ss(a, c)); + let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0)); + assert_eq!(rc, ec); + + let rd: [u32; 4] = transmute(_mm_cmpnlt_ss(a, d)); + let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0)); + assert_eq!(rd, ed); + } + test_mm_cmpnlt_ss(); + + #[target_feature(enable = "sse")] + unsafe fn test_mm_cmpnle_ss() { + // TODO: this test is exactly the same as for `_mm_cmpgt_ss`, but there + // must be a difference. It may have to do with behavior in the + // presence + // of NaNs (signaling or quiet). If so, we should add tests for those. + + let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0); + let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0); + let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0); + + let b1 = !0u32; // a.extract(0) > b.extract(0) + let c1 = 0u32; // a.extract(0) > c.extract(0) + let d1 = 0u32; // a.extract(0) > d.extract(0) + + let rb: [u32; 4] = transmute(_mm_cmpnle_ss(a, b)); + let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0)); + assert_eq!(rb, eb); + + let rc: [u32; 4] = transmute(_mm_cmpnle_ss(a, c)); + let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0)); + assert_eq!(rc, ec); + + let rd: [u32; 4] = transmute(_mm_cmpnle_ss(a, d)); + let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0)); + assert_eq!(rd, ed); + } + test_mm_cmpnle_ss(); + + #[target_feature(enable = "sse")] + unsafe fn test_mm_cmpngt_ss() { + // TODO: this test is exactly the same as for `_mm_cmple_ss`, but there + // must be a difference. It may have to do with behavior in the + // presence of NaNs (signaling or quiet). If so, we should add tests + // for those. + + let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0); + let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0); + let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0); + + let b1 = 0u32; // a.extract(0) <= b.extract(0) + let c1 = !0u32; // a.extract(0) <= c.extract(0) + let d1 = !0u32; // a.extract(0) <= d.extract(0) + + let rb: [u32; 4] = transmute(_mm_cmpngt_ss(a, b)); + let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0)); + assert_eq!(rb, eb); + + let rc: [u32; 4] = transmute(_mm_cmpngt_ss(a, c)); + let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0)); + assert_eq!(rc, ec); + + let rd: [u32; 4] = transmute(_mm_cmpngt_ss(a, d)); + let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0)); + assert_eq!(rd, ed); + } + test_mm_cmpngt_ss(); + + #[target_feature(enable = "sse")] + unsafe fn test_mm_cmpnge_ss() { + // TODO: this test is exactly the same as for `_mm_cmplt_ss`, but there + // must be a difference. It may have to do with behavior in the + // presence of NaNs (signaling or quiet). If so, we should add tests + // for those. + + let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0); + let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0); + let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0); + + let b1 = 0u32; // a.extract(0) < b.extract(0) + let c1 = 0u32; // a.extract(0) < c.extract(0) + let d1 = !0u32; // a.extract(0) < d.extract(0) + + let rb: [u32; 4] = transmute(_mm_cmpnge_ss(a, b)); + let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0)); + assert_eq!(rb, eb); + + let rc: [u32; 4] = transmute(_mm_cmpnge_ss(a, c)); + let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0)); + assert_eq!(rc, ec); + + let rd: [u32; 4] = transmute(_mm_cmpnge_ss(a, d)); + let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0)); + assert_eq!(rd, ed); + } + test_mm_cmpnge_ss(); + + #[target_feature(enable = "sse")] + unsafe fn test_mm_cmpord_ss() { + let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0); + let c = _mm_setr_ps(NAN, 5.0, 6.0, 7.0); + let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0); + + let b1 = !0u32; // a.extract(0) ord b.extract(0) + let c1 = 0u32; // a.extract(0) ord c.extract(0) + let d1 = !0u32; // a.extract(0) ord d.extract(0) + + let rb: [u32; 4] = transmute(_mm_cmpord_ss(a, b)); + let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0)); + assert_eq!(rb, eb); + + let rc: [u32; 4] = transmute(_mm_cmpord_ss(a, c)); + let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0)); + assert_eq!(rc, ec); + + let rd: [u32; 4] = transmute(_mm_cmpord_ss(a, d)); + let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0)); + assert_eq!(rd, ed); + } + test_mm_cmpord_ss(); + + #[target_feature(enable = "sse")] + unsafe fn test_mm_cmpunord_ss() { + let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0); + let c = _mm_setr_ps(NAN, 5.0, 6.0, 7.0); + let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0); + + let b1 = 0u32; // a.extract(0) unord b.extract(0) + let c1 = !0u32; // a.extract(0) unord c.extract(0) + let d1 = 0u32; // a.extract(0) unord d.extract(0) + + let rb: [u32; 4] = transmute(_mm_cmpunord_ss(a, b)); + let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0)); + assert_eq!(rb, eb); + + let rc: [u32; 4] = transmute(_mm_cmpunord_ss(a, c)); + let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0)); + assert_eq!(rc, ec); + + let rd: [u32; 4] = transmute(_mm_cmpunord_ss(a, d)); + let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0)); + assert_eq!(rd, ed); + } + test_mm_cmpunord_ss(); + + #[target_feature(enable = "sse")] + unsafe fn test_mm_cmpeq_ps() { + let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN); + let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN); + let tru = !0u32; + let fls = 0u32; + + let e = [fls, fls, tru, fls]; + let r: [u32; 4] = transmute(_mm_cmpeq_ps(a, b)); + assert_eq!(r, e); + } + test_mm_cmpeq_ps(); + + #[target_feature(enable = "sse")] + unsafe fn test_mm_cmplt_ps() { + let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN); + let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN); + let tru = !0u32; + let fls = 0u32; + + let e = [tru, fls, fls, fls]; + let r: [u32; 4] = transmute(_mm_cmplt_ps(a, b)); + assert_eq!(r, e); + } + test_mm_cmplt_ps(); + + #[target_feature(enable = "sse")] + unsafe fn test_mm_cmple_ps() { + let a = _mm_setr_ps(10.0, 50.0, 1.0, 4.0); + let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN); + let tru = !0u32; + let fls = 0u32; + + let e = [tru, fls, tru, fls]; + let r: [u32; 4] = transmute(_mm_cmple_ps(a, b)); + assert_eq!(r, e); + } + test_mm_cmple_ps(); + + #[target_feature(enable = "sse")] + unsafe fn test_mm_cmpgt_ps() { + let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN); + let b = _mm_setr_ps(15.0, 20.0, 1.0, 42.0); + let tru = !0u32; + let fls = 0u32; + + let e = [fls, tru, fls, fls]; + let r: [u32; 4] = transmute(_mm_cmpgt_ps(a, b)); + assert_eq!(r, e); + } + test_mm_cmpgt_ps(); + + #[target_feature(enable = "sse")] + unsafe fn test_mm_cmpge_ps() { + let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN); + let b = _mm_setr_ps(15.0, 20.0, 1.0, 42.0); + let tru = !0u32; + let fls = 0u32; + + let e = [fls, tru, tru, fls]; + let r: [u32; 4] = transmute(_mm_cmpge_ps(a, b)); + assert_eq!(r, e); + } + test_mm_cmpge_ps(); + + #[target_feature(enable = "sse")] + unsafe fn test_mm_cmpneq_ps() { + let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN); + let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN); + let tru = !0u32; + let fls = 0u32; + + let e = [tru, tru, fls, tru]; + let r: [u32; 4] = transmute(_mm_cmpneq_ps(a, b)); + assert_eq!(r, e); + } + test_mm_cmpneq_ps(); + + #[target_feature(enable = "sse")] + unsafe fn test_mm_cmpnlt_ps() { + let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN); + let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0); + let tru = !0u32; + let fls = 0u32; + + let e = [fls, tru, tru, tru]; + let r: [u32; 4] = transmute(_mm_cmpnlt_ps(a, b)); + assert_eq!(r, e); + } + test_mm_cmpnlt_ps(); + + #[target_feature(enable = "sse")] + unsafe fn test_mm_cmpnle_ps() { + let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN); + let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0); + let tru = !0u32; + let fls = 0u32; + + let e = [fls, tru, fls, tru]; + let r: [u32; 4] = transmute(_mm_cmpnle_ps(a, b)); + assert_eq!(r, e); + } + test_mm_cmpnle_ps(); + + #[target_feature(enable = "sse")] + unsafe fn test_mm_cmpngt_ps() { + let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN); + let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0); + let tru = !0u32; + let fls = 0u32; + + let e = [tru, fls, tru, tru]; + let r: [u32; 4] = transmute(_mm_cmpngt_ps(a, b)); + assert_eq!(r, e); + } + test_mm_cmpngt_ps(); + + #[target_feature(enable = "sse")] + unsafe fn test_mm_cmpnge_ps() { + let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN); + let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0); + let tru = !0u32; + let fls = 0u32; + + let e = [tru, fls, fls, tru]; + let r: [u32; 4] = transmute(_mm_cmpnge_ps(a, b)); + assert_eq!(r, e); + } + test_mm_cmpnge_ps(); + + #[target_feature(enable = "sse")] + unsafe fn test_mm_cmpord_ps() { + let a = _mm_setr_ps(10.0, 50.0, NAN, NAN); + let b = _mm_setr_ps(15.0, NAN, 1.0, NAN); + let tru = !0u32; + let fls = 0u32; + + let e = [tru, fls, fls, fls]; + let r: [u32; 4] = transmute(_mm_cmpord_ps(a, b)); + assert_eq!(r, e); + } + test_mm_cmpord_ps(); + + #[target_feature(enable = "sse")] + unsafe fn test_mm_cmpunord_ps() { + let a = _mm_setr_ps(10.0, 50.0, NAN, NAN); + let b = _mm_setr_ps(15.0, NAN, 1.0, NAN); + let tru = !0u32; + let fls = 0u32; + + let e = [fls, tru, tru, tru]; + let r: [u32; 4] = transmute(_mm_cmpunord_ps(a, b)); + assert_eq!(r, e); + } + test_mm_cmpunord_ps(); - #[target_feature(enable = "sse")] - unsafe fn test_mm_comieq_ss() { - let aa = &[3.0f32, 12.0, 23.0, NAN]; - let bb = &[3.0f32, 47.5, 1.5, NAN]; + #[target_feature(enable = "sse")] + unsafe fn test_mm_comieq_ss() { + let aa = &[3.0f32, 12.0, 23.0, NAN]; + let bb = &[3.0f32, 47.5, 1.5, NAN]; - let ee = &[1i32, 0, 0, 0]; + let ee = &[1i32, 0, 0, 0]; - for i in 0..4 { - let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); - let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); + for i in 0..4 { + let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); + let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); - let r = _mm_comieq_ss(a, b); + let r = _mm_comieq_ss(a, b); - assert_eq!( - ee[i], r, - "_mm_comieq_ss({:?}, {:?}) = {}, expected: {} (i={})", - a, b, r, ee[i], i - ); + assert_eq!( + ee[i], r, + "_mm_comieq_ss({:?}, {:?}) = {}, expected: {} (i={})", + a, b, r, ee[i], i + ); + } } - } - test_mm_comieq_ss(); + test_mm_comieq_ss(); - #[target_feature(enable = "sse")] - unsafe fn test_mm_comilt_ss() { - let aa = &[3.0f32, 12.0, 23.0, NAN]; - let bb = &[3.0f32, 47.5, 1.5, NAN]; + #[target_feature(enable = "sse")] + unsafe fn test_mm_comilt_ss() { + let aa = &[3.0f32, 12.0, 23.0, NAN]; + let bb = &[3.0f32, 47.5, 1.5, NAN]; - let ee = &[0i32, 1, 0, 0]; + let ee = &[0i32, 1, 0, 0]; - for i in 0..4 { - let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); - let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); + for i in 0..4 { + let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); + let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); - let r = _mm_comilt_ss(a, b); + let r = _mm_comilt_ss(a, b); - assert_eq!( - ee[i], r, - "_mm_comilt_ss({:?}, {:?}) = {}, expected: {} (i={})", - a, b, r, ee[i], i - ); + assert_eq!( + ee[i], r, + "_mm_comilt_ss({:?}, {:?}) = {}, expected: {} (i={})", + a, b, r, ee[i], i + ); + } } - } - test_mm_comilt_ss(); + test_mm_comilt_ss(); - #[target_feature(enable = "sse")] - unsafe fn test_mm_comile_ss() { - let aa = &[3.0f32, 12.0, 23.0, NAN]; - let bb = &[3.0f32, 47.5, 1.5, NAN]; + #[target_feature(enable = "sse")] + unsafe fn test_mm_comile_ss() { + let aa = &[3.0f32, 12.0, 23.0, NAN]; + let bb = &[3.0f32, 47.5, 1.5, NAN]; - let ee = &[1i32, 1, 0, 0]; + let ee = &[1i32, 1, 0, 0]; - for i in 0..4 { - let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); - let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); + for i in 0..4 { + let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); + let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); - let r = _mm_comile_ss(a, b); + let r = _mm_comile_ss(a, b); - assert_eq!( - ee[i], r, - "_mm_comile_ss({:?}, {:?}) = {}, expected: {} (i={})", - a, b, r, ee[i], i - ); + assert_eq!( + ee[i], r, + "_mm_comile_ss({:?}, {:?}) = {}, expected: {} (i={})", + a, b, r, ee[i], i + ); + } } - } - test_mm_comile_ss(); + test_mm_comile_ss(); - #[target_feature(enable = "sse")] - unsafe fn test_mm_comigt_ss() { - let aa = &[3.0f32, 12.0, 23.0, NAN]; - let bb = &[3.0f32, 47.5, 1.5, NAN]; + #[target_feature(enable = "sse")] + unsafe fn test_mm_comigt_ss() { + let aa = &[3.0f32, 12.0, 23.0, NAN]; + let bb = &[3.0f32, 47.5, 1.5, NAN]; - let ee = &[1i32, 0, 1, 0]; + let ee = &[1i32, 0, 1, 0]; - for i in 0..4 { - let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); - let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); + for i in 0..4 { + let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); + let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); - let r = _mm_comige_ss(a, b); + let r = _mm_comige_ss(a, b); - assert_eq!( - ee[i], r, - "_mm_comige_ss({:?}, {:?}) = {}, expected: {} (i={})", - a, b, r, ee[i], i - ); + assert_eq!( + ee[i], r, + "_mm_comige_ss({:?}, {:?}) = {}, expected: {} (i={})", + a, b, r, ee[i], i + ); + } } - } - test_mm_comigt_ss(); + test_mm_comigt_ss(); - #[target_feature(enable = "sse")] - unsafe fn test_mm_comineq_ss() { - let aa = &[3.0f32, 12.0, 23.0, NAN]; - let bb = &[3.0f32, 47.5, 1.5, NAN]; + #[target_feature(enable = "sse")] + unsafe fn test_mm_comineq_ss() { + let aa = &[3.0f32, 12.0, 23.0, NAN]; + let bb = &[3.0f32, 47.5, 1.5, NAN]; - let ee = &[0i32, 1, 1, 1]; + let ee = &[0i32, 1, 1, 1]; - for i in 0..4 { - let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); - let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); + for i in 0..4 { + let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); + let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); - let r = _mm_comineq_ss(a, b); + let r = _mm_comineq_ss(a, b); - assert_eq!( - ee[i], r, - "_mm_comineq_ss({:?}, {:?}) = {}, expected: {} (i={})", - a, b, r, ee[i], i - ); + assert_eq!( + ee[i], r, + "_mm_comineq_ss({:?}, {:?}) = {}, expected: {} (i={})", + a, b, r, ee[i], i + ); + } } - } - test_mm_comineq_ss(); + test_mm_comineq_ss(); - #[target_feature(enable = "sse")] - unsafe fn test_mm_ucomieq_ss() { - let aa = &[3.0f32, 12.0, 23.0, NAN]; - let bb = &[3.0f32, 47.5, 1.5, NAN]; + #[target_feature(enable = "sse")] + unsafe fn test_mm_ucomieq_ss() { + let aa = &[3.0f32, 12.0, 23.0, NAN]; + let bb = &[3.0f32, 47.5, 1.5, NAN]; - let ee = &[1i32, 0, 0, 0]; + let ee = &[1i32, 0, 0, 0]; - for i in 0..4 { - let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); - let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); + for i in 0..4 { + let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); + let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); - let r = _mm_ucomieq_ss(a, b); + let r = _mm_ucomieq_ss(a, b); - assert_eq!( - ee[i], r, - "_mm_ucomieq_ss({:?}, {:?}) = {}, expected: {} (i={})", - a, b, r, ee[i], i - ); + assert_eq!( + ee[i], r, + "_mm_ucomieq_ss({:?}, {:?}) = {}, expected: {} (i={})", + a, b, r, ee[i], i + ); + } } - } - test_mm_ucomieq_ss(); + test_mm_ucomieq_ss(); - #[target_feature(enable = "sse")] - unsafe fn test_mm_ucomilt_ss() { - let aa = &[3.0f32, 12.0, 23.0, NAN]; - let bb = &[3.0f32, 47.5, 1.5, NAN]; + #[target_feature(enable = "sse")] + unsafe fn test_mm_ucomilt_ss() { + let aa = &[3.0f32, 12.0, 23.0, NAN]; + let bb = &[3.0f32, 47.5, 1.5, NAN]; - let ee = &[0i32, 1, 0, 0]; + let ee = &[0i32, 1, 0, 0]; - for i in 0..4 { - let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); - let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); + for i in 0..4 { + let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); + let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); - let r = _mm_ucomilt_ss(a, b); + let r = _mm_ucomilt_ss(a, b); - assert_eq!( - ee[i], r, - "_mm_ucomilt_ss({:?}, {:?}) = {}, expected: {} (i={})", - a, b, r, ee[i], i - ); + assert_eq!( + ee[i], r, + "_mm_ucomilt_ss({:?}, {:?}) = {}, expected: {} (i={})", + a, b, r, ee[i], i + ); + } } - } - test_mm_ucomilt_ss(); + test_mm_ucomilt_ss(); - #[target_feature(enable = "sse")] - unsafe fn test_mm_ucomile_ss() { - let aa = &[3.0f32, 12.0, 23.0, NAN]; - let bb = &[3.0f32, 47.5, 1.5, NAN]; + #[target_feature(enable = "sse")] + unsafe fn test_mm_ucomile_ss() { + let aa = &[3.0f32, 12.0, 23.0, NAN]; + let bb = &[3.0f32, 47.5, 1.5, NAN]; - let ee = &[1i32, 1, 0, 0]; + let ee = &[1i32, 1, 0, 0]; - for i in 0..4 { - let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); - let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); + for i in 0..4 { + let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); + let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); - let r = _mm_ucomile_ss(a, b); + let r = _mm_ucomile_ss(a, b); - assert_eq!( - ee[i], r, - "_mm_ucomile_ss({:?}, {:?}) = {}, expected: {} (i={})", - a, b, r, ee[i], i - ); + assert_eq!( + ee[i], r, + "_mm_ucomile_ss({:?}, {:?}) = {}, expected: {} (i={})", + a, b, r, ee[i], i + ); + } } - } - test_mm_ucomile_ss(); + test_mm_ucomile_ss(); - #[target_feature(enable = "sse")] - unsafe fn test_mm_ucomigt_ss() { - let aa = &[3.0f32, 12.0, 23.0, NAN]; - let bb = &[3.0f32, 47.5, 1.5, NAN]; + #[target_feature(enable = "sse")] + unsafe fn test_mm_ucomigt_ss() { + let aa = &[3.0f32, 12.0, 23.0, NAN]; + let bb = &[3.0f32, 47.5, 1.5, NAN]; - let ee = &[0i32, 0, 1, 0]; + let ee = &[0i32, 0, 1, 0]; - for i in 0..4 { - let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); - let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); + for i in 0..4 { + let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); + let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); - let r = _mm_ucomigt_ss(a, b); + let r = _mm_ucomigt_ss(a, b); - assert_eq!( - ee[i], r, - "_mm_ucomigt_ss({:?}, {:?}) = {}, expected: {} (i={})", - a, b, r, ee[i], i - ); + assert_eq!( + ee[i], r, + "_mm_ucomigt_ss({:?}, {:?}) = {}, expected: {} (i={})", + a, b, r, ee[i], i + ); + } } - } - test_mm_ucomigt_ss(); + test_mm_ucomigt_ss(); - #[target_feature(enable = "sse")] - unsafe fn test_mm_ucomige_ss() { - let aa = &[3.0f32, 12.0, 23.0, NAN]; - let bb = &[3.0f32, 47.5, 1.5, NAN]; + #[target_feature(enable = "sse")] + unsafe fn test_mm_ucomige_ss() { + let aa = &[3.0f32, 12.0, 23.0, NAN]; + let bb = &[3.0f32, 47.5, 1.5, NAN]; - let ee = &[1i32, 0, 1, 0]; + let ee = &[1i32, 0, 1, 0]; - for i in 0..4 { - let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); - let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); + for i in 0..4 { + let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); + let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); - let r = _mm_ucomige_ss(a, b); + let r = _mm_ucomige_ss(a, b); - assert_eq!( - ee[i], r, - "_mm_ucomige_ss({:?}, {:?}) = {}, expected: {} (i={})", - a, b, r, ee[i], i - ); + assert_eq!( + ee[i], r, + "_mm_ucomige_ss({:?}, {:?}) = {}, expected: {} (i={})", + a, b, r, ee[i], i + ); + } } - } - test_mm_ucomige_ss(); + test_mm_ucomige_ss(); - #[target_feature(enable = "sse")] - unsafe fn test_mm_ucomineq_ss() { - let aa = &[3.0f32, 12.0, 23.0, NAN]; - let bb = &[3.0f32, 47.5, 1.5, NAN]; + #[target_feature(enable = "sse")] + unsafe fn test_mm_ucomineq_ss() { + let aa = &[3.0f32, 12.0, 23.0, NAN]; + let bb = &[3.0f32, 47.5, 1.5, NAN]; - let ee = &[0i32, 1, 1, 1]; + let ee = &[0i32, 1, 1, 1]; - for i in 0..4 { - let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); - let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); + for i in 0..4 { + let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); + let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); - let r = _mm_ucomineq_ss(a, b); + let r = _mm_ucomineq_ss(a, b); - assert_eq!( - ee[i], r, - "_mm_ucomineq_ss({:?}, {:?}) = {}, expected: {} (i={})", - a, b, r, ee[i], i - ); + assert_eq!( + ee[i], r, + "_mm_ucomineq_ss({:?}, {:?}) = {}, expected: {} (i={})", + a, b, r, ee[i], i + ); + } } - } - test_mm_ucomineq_ss(); - - #[target_feature(enable = "sse")] - unsafe fn test_mm_cvtss_si32() { - let inputs = &[42.0f32, -3.1, 4.0e10, 4.0e-20, NAN, 2147483500.1]; - let result = &[42i32, -3, i32::MIN, 0, i32::MIN, 2147483520]; - for i in 0..inputs.len() { - let x = _mm_setr_ps(inputs[i], 1.0, 3.0, 4.0); - let e = result[i]; - let r = _mm_cvtss_si32(x); - assert_eq!(e, r, "TestCase #{} _mm_cvtss_si32({:?}) = {}, expected: {}", i, x, r, e); + test_mm_ucomineq_ss(); + + #[target_feature(enable = "sse")] + unsafe fn test_mm_cvtss_si32() { + let inputs = &[42.0f32, -3.1, 4.0e10, 4.0e-20, NAN, 2147483500.1]; + let result = &[42i32, -3, i32::MIN, 0, i32::MIN, 2147483520]; + for i in 0..inputs.len() { + let x = _mm_setr_ps(inputs[i], 1.0, 3.0, 4.0); + let e = result[i]; + let r = _mm_cvtss_si32(x); + assert_eq!( + e, r, + "TestCase #{} _mm_cvtss_si32({:?}) = {}, expected: {}", + i, x, r, e + ); + } } - } - test_mm_cvtss_si32(); - - #[target_feature(enable = "sse")] - unsafe fn test_mm_cvttss_si32() { - let inputs = &[ - (42.0f32, 42i32), - (-31.4, -31), - (-33.5, -33), - (-34.5, -34), - (10.999, 10), - (-5.99, -5), - (4.0e10, i32::MIN), - (4.0e-10, 0), - (NAN, i32::MIN), - (2147483500.1, 2147483520), - ]; - for i in 0..inputs.len() { - let (xi, e) = inputs[i]; - let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0); - let r = _mm_cvttss_si32(x); - assert_eq!(e, r, "TestCase #{} _mm_cvttss_si32({:?}) = {}, expected: {}", i, x, r, e); + test_mm_cvtss_si32(); + + #[target_feature(enable = "sse")] + unsafe fn test_mm_cvttss_si32() { + let inputs = &[ + (42.0f32, 42i32), + (-31.4, -31), + (-33.5, -33), + (-34.5, -34), + (10.999, 10), + (-5.99, -5), + (4.0e10, i32::MIN), + (4.0e-10, 0), + (NAN, i32::MIN), + (2147483500.1, 2147483520), + ]; + for i in 0..inputs.len() { + let (xi, e) = inputs[i]; + let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0); + let r = _mm_cvttss_si32(x); + assert_eq!( + e, r, + "TestCase #{} _mm_cvttss_si32({:?}) = {}, expected: {}", + i, x, r, e + ); + } } - } - test_mm_cvttss_si32(); - - #[target_feature(enable = "sse")] - unsafe fn test_mm_cvtss_f32() { - let a = _mm_setr_ps(312.0134, 5.0, 6.0, 7.0); - assert_eq!(_mm_cvtss_f32(a), 312.0134); - } - test_mm_cvtss_f32(); + test_mm_cvttss_si32(); - #[target_feature(enable = "sse")] - unsafe fn test_mm_cvtsi32_ss() { - let inputs = &[ - (4555i32, 4555.0f32), - (322223333, 322223330.0), - (-432, -432.0), - (-322223333, -322223330.0), - ]; - - for i in 0..inputs.len() { - let (x, f) = inputs[i]; - let a = _mm_setr_ps(5.0, 6.0, 7.0, 8.0); - let r = _mm_cvtsi32_ss(a, x); - let e = _mm_setr_ps(f, 6.0, 7.0, 8.0); - assert_eq_m128(e, r); + #[target_feature(enable = "sse")] + unsafe fn test_mm_cvtss_f32() { + let a = _mm_setr_ps(312.0134, 5.0, 6.0, 7.0); + assert_eq!(_mm_cvtss_f32(a), 312.0134); } - } - test_mm_cvtsi32_ss(); - - // Intrinsic only available on x86_64 - #[cfg(target_arch = "x86_64")] - #[target_feature(enable = "sse")] - unsafe fn test_mm_cvtss_si64() { - let inputs = &[ - (42.0f32, 42i64), - (-31.4, -31), - (-33.5, -34), - (-34.5, -34), - (4.0e10, 40_000_000_000), - (4.0e-10, 0), - (f32::NAN, i64::MIN), - (2147483500.1, 2147483520), - (9.223371e18, 9223370937343148032), - ]; - for i in 0..inputs.len() { - let (xi, e) = inputs[i]; - let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0); - let r = _mm_cvtss_si64(x); - assert_eq!(e, r, "TestCase #{} _mm_cvtss_si64({:?}) = {}, expected: {}", i, x, r, e); + test_mm_cvtss_f32(); + + #[target_feature(enable = "sse")] + unsafe fn test_mm_cvtsi32_ss() { + let inputs = &[ + (4555i32, 4555.0f32), + (322223333, 322223330.0), + (-432, -432.0), + (-322223333, -322223330.0), + ]; + + for i in 0..inputs.len() { + let (x, f) = inputs[i]; + let a = _mm_setr_ps(5.0, 6.0, 7.0, 8.0); + let r = _mm_cvtsi32_ss(a, x); + let e = _mm_setr_ps(f, 6.0, 7.0, 8.0); + assert_eq_m128(e, r); + } } - } - #[cfg(target_arch = "x86_64")] - test_mm_cvtss_si64(); - - // Intrinsic only available on x86_64 - #[cfg(target_arch = "x86_64")] - #[target_feature(enable = "sse")] - unsafe fn test_mm_cvttss_si64() { - let inputs = &[ - (42.0f32, 42i64), - (-31.4, -31), - (-33.5, -33), - (-34.5, -34), - (10.999, 10), - (-5.99, -5), - (4.0e10, 40_000_000_000), - (4.0e-10, 0), - (f32::NAN, i64::MIN), - (2147483500.1, 2147483520), - (9.223371e18, 9223370937343148032), - (9.223372e18, i64::MIN), - ]; - for i in 0..inputs.len() { - let (xi, e) = inputs[i]; - let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0); - let r = _mm_cvttss_si64(x); - assert_eq!(e, r, "TestCase #{} _mm_cvttss_si64({:?}) = {}, expected: {}", i, x, r, e); + test_mm_cvtsi32_ss(); + + // Intrinsic only available on x86_64 + #[cfg(target_arch = "x86_64")] + #[target_feature(enable = "sse")] + unsafe fn test_mm_cvtss_si64() { + let inputs = &[ + (42.0f32, 42i64), + (-31.4, -31), + (-33.5, -34), + (-34.5, -34), + (4.0e10, 40_000_000_000), + (4.0e-10, 0), + (f32::NAN, i64::MIN), + (2147483500.1, 2147483520), + (9.223371e18, 9223370937343148032), + ]; + for i in 0..inputs.len() { + let (xi, e) = inputs[i]; + let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0); + let r = _mm_cvtss_si64(x); + assert_eq!( + e, r, + "TestCase #{} _mm_cvtss_si64({:?}) = {}, expected: {}", + i, x, r, e + ); + } } - } - #[cfg(target_arch = "x86_64")] - test_mm_cvttss_si64(); - - // Intrinsic only available on x86_64 - #[cfg(target_arch = "x86_64")] - #[target_feature(enable = "sse")] - unsafe fn test_mm_cvtsi64_ss() { - let inputs = &[ - (4555i64, 4555.0f32), - (322223333, 322223330.0), - (-432, -432.0), - (-322223333, -322223330.0), - (9223372036854775807, 9.223372e18), - (-9223372036854775808, -9.223372e18), - ]; - - for i in 0..inputs.len() { - let (x, f) = inputs[i]; - let a = _mm_setr_ps(5.0, 6.0, 7.0, 8.0); - let r = _mm_cvtsi64_ss(a, x); - let e = _mm_setr_ps(f, 6.0, 7.0, 8.0); - assert_eq_m128(e, r); + #[cfg(target_arch = "x86_64")] + test_mm_cvtss_si64(); + + // Intrinsic only available on x86_64 + #[cfg(target_arch = "x86_64")] + #[target_feature(enable = "sse")] + unsafe fn test_mm_cvttss_si64() { + let inputs = &[ + (42.0f32, 42i64), + (-31.4, -31), + (-33.5, -33), + (-34.5, -34), + (10.999, 10), + (-5.99, -5), + (4.0e10, 40_000_000_000), + (4.0e-10, 0), + (f32::NAN, i64::MIN), + (2147483500.1, 2147483520), + (9.223371e18, 9223370937343148032), + (9.223372e18, i64::MIN), + ]; + for i in 0..inputs.len() { + let (xi, e) = inputs[i]; + let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0); + let r = _mm_cvttss_si64(x); + assert_eq!( + e, r, + "TestCase #{} _mm_cvttss_si64({:?}) = {}, expected: {}", + i, x, r, e + ); + } } - } - #[cfg(target_arch = "x86_64")] - test_mm_cvtsi64_ss(); + #[cfg(target_arch = "x86_64")] + test_mm_cvttss_si64(); + + // Intrinsic only available on x86_64 + #[cfg(target_arch = "x86_64")] + #[target_feature(enable = "sse")] + unsafe fn test_mm_cvtsi64_ss() { + let inputs = &[ + (4555i64, 4555.0f32), + (322223333, 322223330.0), + (-432, -432.0), + (-322223333, -322223330.0), + (9223372036854775807, 9.223372e18), + (-9223372036854775808, -9.223372e18), + ]; + + for i in 0..inputs.len() { + let (x, f) = inputs[i]; + let a = _mm_setr_ps(5.0, 6.0, 7.0, 8.0); + let r = _mm_cvtsi64_ss(a, x); + let e = _mm_setr_ps(f, 6.0, 7.0, 8.0); + assert_eq_m128(e, r); + } + } + #[cfg(target_arch = "x86_64")] + test_mm_cvtsi64_ss(); - #[target_feature(enable = "sse")] - unsafe fn test_mm_movemask_ps() { - let r = _mm_movemask_ps(_mm_setr_ps(-1.0, 5.0, -5.0, 0.0)); - assert_eq!(r, 0b0101); + #[target_feature(enable = "sse")] + unsafe fn test_mm_movemask_ps() { + let r = _mm_movemask_ps(_mm_setr_ps(-1.0, 5.0, -5.0, 0.0)); + assert_eq!(r, 0b0101); - let r = _mm_movemask_ps(_mm_setr_ps(-1.0, -5.0, -5.0, 0.0)); - assert_eq!(r, 0b0111); + let r = _mm_movemask_ps(_mm_setr_ps(-1.0, -5.0, -5.0, 0.0)); + assert_eq!(r, 0b0111); + } + test_mm_movemask_ps(); + + let x = 0i8; + _mm_prefetch(&x, _MM_HINT_T0); + _mm_prefetch(&x, _MM_HINT_T1); + _mm_prefetch(&x, _MM_HINT_T2); + _mm_prefetch(&x, _MM_HINT_NTA); + _mm_prefetch(&x, _MM_HINT_ET0); + _mm_prefetch(&x, _MM_HINT_ET1); } - test_mm_movemask_ps(); - - let x = 0i8; - _mm_prefetch(&x, _MM_HINT_T0); - _mm_prefetch(&x, _MM_HINT_T1); - _mm_prefetch(&x, _MM_HINT_T2); - _mm_prefetch(&x, _MM_HINT_NTA); - _mm_prefetch(&x, _MM_HINT_ET0); - _mm_prefetch(&x, _MM_HINT_ET1); } diff --git a/src/tools/miri/tests/pass/intrinsics-x86-sse2.rs b/src/tools/miri/tests/pass/intrinsics-x86-sse2.rs index 1b55a94783aa5..fa9df04d36843 100644 --- a/src/tools/miri/tests/pass/intrinsics-x86-sse2.rs +++ b/src/tools/miri/tests/pass/intrinsics-x86-sse2.rs @@ -1,828 +1,825 @@ -// Ignore everything except x86 and x86_64 -// Any additional target are added to CI should be ignored here -//@ignore-target-aarch64 -//@ignore-target-arm -//@ignore-target-avr -//@ignore-target-s390x -//@ignore-target-thumbv7em -//@ignore-target-wasm32 - -#[cfg(target_arch = "x86")] -use std::arch::x86::*; -#[cfg(target_arch = "x86_64")] -use std::arch::x86_64::*; -use std::f64::NAN; -use std::mem::transmute; - fn main() { - assert!(is_x86_feature_detected!("sse2")); + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + assert!(is_x86_feature_detected!("sse2")); - unsafe { - test_sse2(); + unsafe { + tests::test_sse2(); + } } } -#[target_feature(enable = "sse2")] -unsafe fn _mm_setr_epi64x(a: i64, b: i64) -> __m128i { - _mm_set_epi64x(b, a) -} - -#[target_feature(enable = "sse2")] -unsafe fn test_sse2() { - // Mostly copied from library/stdarch/crates/core_arch/src/x86{,_64}/sse2.rs +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +mod tests { + #[cfg(target_arch = "x86")] + use std::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use std::arch::x86_64::*; + use std::f64::NAN; + use std::mem::transmute; + #[target_feature(enable = "sse2")] unsafe fn _mm_setr_epi64x(a: i64, b: i64) -> __m128i { _mm_set_epi64x(b, a) } - #[track_caller] - #[target_feature(enable = "sse")] - unsafe fn assert_eq_m128(a: __m128, b: __m128) { - let r = _mm_cmpeq_ps(a, b); - if _mm_movemask_ps(r) != 0b1111 { - panic!("{:?} != {:?}", a, b); + #[target_feature(enable = "sse2")] + pub(super) unsafe fn test_sse2() { + // Mostly copied from library/stdarch/crates/core_arch/src/x86{,_64}/sse2.rs + + unsafe fn _mm_setr_epi64x(a: i64, b: i64) -> __m128i { + _mm_set_epi64x(b, a) } - } - #[track_caller] - #[target_feature(enable = "sse2")] - unsafe fn assert_eq_m128i(a: __m128i, b: __m128i) { - assert_eq!(transmute::<_, [u64; 2]>(a), transmute::<_, [u64; 2]>(b)) - } + #[track_caller] + #[target_feature(enable = "sse")] + unsafe fn assert_eq_m128(a: __m128, b: __m128) { + let r = _mm_cmpeq_ps(a, b); + if _mm_movemask_ps(r) != 0b1111 { + panic!("{:?} != {:?}", a, b); + } + } - #[track_caller] - #[target_feature(enable = "sse2")] - unsafe fn assert_eq_m128d(a: __m128d, b: __m128d) { - if _mm_movemask_pd(_mm_cmpeq_pd(a, b)) != 0b11 { - panic!("{:?} != {:?}", a, b); + #[track_caller] + #[target_feature(enable = "sse2")] + unsafe fn assert_eq_m128i(a: __m128i, b: __m128i) { + assert_eq!(transmute::<_, [u64; 2]>(a), transmute::<_, [u64; 2]>(b)) } - } - #[target_feature(enable = "sse2")] - unsafe fn test_mm_avg_epu8() { - let (a, b) = (_mm_set1_epi8(3), _mm_set1_epi8(9)); - let r = _mm_avg_epu8(a, b); - assert_eq_m128i(r, _mm_set1_epi8(6)); - } - test_mm_avg_epu8(); + #[track_caller] + #[target_feature(enable = "sse2")] + unsafe fn assert_eq_m128d(a: __m128d, b: __m128d) { + if _mm_movemask_pd(_mm_cmpeq_pd(a, b)) != 0b11 { + panic!("{:?} != {:?}", a, b); + } + } - #[target_feature(enable = "sse2")] - unsafe fn test_mm_avg_epu16() { - let (a, b) = (_mm_set1_epi16(3), _mm_set1_epi16(9)); - let r = _mm_avg_epu16(a, b); - assert_eq_m128i(r, _mm_set1_epi16(6)); - } - test_mm_avg_epu16(); + #[target_feature(enable = "sse2")] + unsafe fn test_mm_avg_epu8() { + let (a, b) = (_mm_set1_epi8(3), _mm_set1_epi8(9)); + let r = _mm_avg_epu8(a, b); + assert_eq_m128i(r, _mm_set1_epi8(6)); + } + test_mm_avg_epu8(); - #[target_feature(enable = "sse2")] - unsafe fn test_mm_mulhi_epi16() { - let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001)); - let r = _mm_mulhi_epi16(a, b); - assert_eq_m128i(r, _mm_set1_epi16(-16)); - } - test_mm_mulhi_epi16(); + #[target_feature(enable = "sse2")] + unsafe fn test_mm_avg_epu16() { + let (a, b) = (_mm_set1_epi16(3), _mm_set1_epi16(9)); + let r = _mm_avg_epu16(a, b); + assert_eq_m128i(r, _mm_set1_epi16(6)); + } + test_mm_avg_epu16(); - #[target_feature(enable = "sse2")] - unsafe fn test_mm_mulhi_epu16() { - let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(1001)); - let r = _mm_mulhi_epu16(a, b); - assert_eq_m128i(r, _mm_set1_epi16(15)); - } - test_mm_mulhi_epu16(); + #[target_feature(enable = "sse2")] + unsafe fn test_mm_mulhi_epi16() { + let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001)); + let r = _mm_mulhi_epi16(a, b); + assert_eq_m128i(r, _mm_set1_epi16(-16)); + } + test_mm_mulhi_epi16(); - #[target_feature(enable = "sse2")] - unsafe fn test_mm_mul_epu32() { - let a = _mm_setr_epi64x(1_000_000_000, 1 << 34); - let b = _mm_setr_epi64x(1_000_000_000, 1 << 35); - let r = _mm_mul_epu32(a, b); - let e = _mm_setr_epi64x(1_000_000_000 * 1_000_000_000, 0); - assert_eq_m128i(r, e); - } - test_mm_mul_epu32(); + #[target_feature(enable = "sse2")] + unsafe fn test_mm_mulhi_epu16() { + let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(1001)); + let r = _mm_mulhi_epu16(a, b); + assert_eq_m128i(r, _mm_set1_epi16(15)); + } + test_mm_mulhi_epu16(); + + #[target_feature(enable = "sse2")] + unsafe fn test_mm_mul_epu32() { + let a = _mm_setr_epi64x(1_000_000_000, 1 << 34); + let b = _mm_setr_epi64x(1_000_000_000, 1 << 35); + let r = _mm_mul_epu32(a, b); + let e = _mm_setr_epi64x(1_000_000_000 * 1_000_000_000, 0); + assert_eq_m128i(r, e); + } + test_mm_mul_epu32(); - #[target_feature(enable = "sse2")] - unsafe fn test_mm_sad_epu8() { - #[rustfmt::skip] + #[target_feature(enable = "sse2")] + unsafe fn test_mm_sad_epu8() { + #[rustfmt::skip] let a = _mm_setr_epi8( 255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8, 1, 2, 3, 4, 155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8, 1, 2, 3, 4, ); - let b = _mm_setr_epi8(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2); - let r = _mm_sad_epu8(a, b); - let e = _mm_setr_epi64x(1020, 614); - assert_eq_m128i(r, e); - } - test_mm_sad_epu8(); - - #[target_feature(enable = "sse2")] - unsafe fn test_mm_sll_epi16() { - let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF); - let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 4)); - assert_eq_m128i( - r, - _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0), - ); - let r = _mm_sll_epi16(a, _mm_set_epi64x(4, 0)); - assert_eq_m128i(r, a); - let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 16)); - assert_eq_m128i(r, _mm_set1_epi16(0)); - let r = _mm_sll_epi16(a, _mm_set_epi64x(0, i64::MAX)); - assert_eq_m128i(r, _mm_set1_epi16(0)); - } - test_mm_sll_epi16(); - - #[target_feature(enable = "sse2")] - unsafe fn test_mm_srl_epi16() { - let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF); - let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 4)); - assert_eq_m128i(r, _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0)); - let r = _mm_srl_epi16(a, _mm_set_epi64x(4, 0)); - assert_eq_m128i(r, a); - let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 16)); - assert_eq_m128i(r, _mm_set1_epi16(0)); - let r = _mm_srl_epi16(a, _mm_set_epi64x(0, i64::MAX)); - assert_eq_m128i(r, _mm_set1_epi16(0)); - } - test_mm_srl_epi16(); - - #[target_feature(enable = "sse2")] - unsafe fn test_mm_sra_epi16() { - let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF); - let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 4)); - assert_eq_m128i(r, _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10)); - let r = _mm_sra_epi16(a, _mm_set_epi64x(4, 0)); - assert_eq_m128i(r, a); - let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 16)); - assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1)); - let r = _mm_sra_epi16(a, _mm_set_epi64x(0, i64::MAX)); - assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1)); - } - test_mm_sra_epi16(); - - #[target_feature(enable = "sse2")] - unsafe fn test_mm_sll_epi32() { - let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF); - let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 4)); - assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0)); - let r = _mm_sll_epi32(a, _mm_set_epi64x(4, 0)); - assert_eq_m128i(r, a); - let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 32)); - assert_eq_m128i(r, _mm_set1_epi32(0)); - let r = _mm_sll_epi32(a, _mm_set_epi64x(0, i64::MAX)); - assert_eq_m128i(r, _mm_set1_epi32(0)); - } - test_mm_sll_epi32(); - - #[target_feature(enable = "sse2")] - unsafe fn test_mm_srl_epi32() { - let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF); - let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 4)); - assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000)); - let r = _mm_srl_epi32(a, _mm_set_epi64x(4, 0)); - assert_eq_m128i(r, a); - let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 32)); - assert_eq_m128i(r, _mm_set1_epi32(0)); - let r = _mm_srl_epi32(a, _mm_set_epi64x(0, i64::MAX)); - assert_eq_m128i(r, _mm_set1_epi32(0)); - } - test_mm_srl_epi32(); - - #[target_feature(enable = "sse2")] - unsafe fn test_mm_sra_epi32() { - let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF); - let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 4)); - assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000)); - let r = _mm_sra_epi32(a, _mm_set_epi64x(4, 0)); - assert_eq_m128i(r, a); - let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 32)); - assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1)); - let r = _mm_sra_epi32(a, _mm_set_epi64x(0, i64::MAX)); - assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1)); - } - test_mm_sra_epi32(); - - #[target_feature(enable = "sse2")] - unsafe fn test_mm_sll_epi64() { - let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF); - let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 4)); - assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0)); - let r = _mm_sll_epi64(a, _mm_set_epi64x(4, 0)); - assert_eq_m128i(r, a); - let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 64)); - assert_eq_m128i(r, _mm_set1_epi64x(0)); - let r = _mm_sll_epi64(a, _mm_set_epi64x(0, i64::MAX)); - assert_eq_m128i(r, _mm_set1_epi64x(0)); - } - test_mm_sll_epi64(); - - #[target_feature(enable = "sse2")] - unsafe fn test_mm_srl_epi64() { - let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF); - let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 4)); - assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000)); - let r = _mm_srl_epi64(a, _mm_set_epi64x(4, 0)); - assert_eq_m128i(r, a); - let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 64)); - assert_eq_m128i(r, _mm_set1_epi64x(0)); - let r = _mm_srl_epi64(a, _mm_set_epi64x(0, i64::MAX)); - assert_eq_m128i(r, _mm_set1_epi64x(0)); - } - test_mm_srl_epi64(); - - #[target_feature(enable = "sse2")] - unsafe fn test_mm_cvtepi32_ps() { - let a = _mm_setr_epi32(1, 2, 3, 4); - let r = _mm_cvtepi32_ps(a); - assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0)); - } - test_mm_cvtepi32_ps(); - - #[target_feature(enable = "sse2")] - unsafe fn test_mm_cvtps_epi32() { - let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); - let r = _mm_cvtps_epi32(a); - assert_eq_m128i(r, _mm_setr_epi32(1, 2, 3, 4)); - } - test_mm_cvtps_epi32(); - - #[target_feature(enable = "sse2")] - unsafe fn test_mm_cvttps_epi32() { - let a = _mm_setr_ps(-1.1, 2.2, -3.3, 6.6); - let r = _mm_cvttps_epi32(a); - assert_eq_m128i(r, _mm_setr_epi32(-1, 2, -3, 6)); - - let a = _mm_setr_ps(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX); - let r = _mm_cvttps_epi32(a); - assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, i32::MIN, i32::MIN)); - } - test_mm_cvttps_epi32(); - - #[target_feature(enable = "sse2")] - unsafe fn test_mm_packs_epi16() { - let a = _mm_setr_epi16(0x80, -0x81, 0, 0, 0, 0, 0, 0); - let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -0x81, 0x80); - let r = _mm_packs_epi16(a, b); - assert_eq_m128i( - r, - _mm_setr_epi8(0x7F, -0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0x80, 0x7F), - ); - } - test_mm_packs_epi16(); - - #[target_feature(enable = "sse2")] - unsafe fn test_mm_packus_epi16() { - let a = _mm_setr_epi16(0x100, -1, 0, 0, 0, 0, 0, 0); - let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -1, 0x100); - let r = _mm_packus_epi16(a, b); - assert_eq_m128i(r, _mm_setr_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, !0)); - } - test_mm_packus_epi16(); - - #[target_feature(enable = "sse2")] - unsafe fn test_mm_packs_epi32() { - let a = _mm_setr_epi32(0x8000, -0x8001, 0, 0); - let b = _mm_setr_epi32(0, 0, -0x8001, 0x8000); - let r = _mm_packs_epi32(a, b); - assert_eq_m128i(r, _mm_setr_epi16(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF)); - } - test_mm_packs_epi32(); - - #[target_feature(enable = "sse2")] - unsafe fn test_mm_min_sd() { - let a = _mm_setr_pd(1.0, 2.0); - let b = _mm_setr_pd(5.0, 10.0); - let r = _mm_min_sd(a, b); - assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0)); - } - test_mm_min_sd(); - - #[target_feature(enable = "sse2")] - unsafe fn test_mm_min_pd() { - let a = _mm_setr_pd(-1.0, 5.0); - let b = _mm_setr_pd(-100.0, 20.0); - let r = _mm_min_pd(a, b); - assert_eq_m128d(r, _mm_setr_pd(-100.0, 5.0)); - - // `_mm_min_pd` can **not** be implemented using the `simd_min` rust intrinsic because - // the semantics of `simd_min` are different to those of `_mm_min_pd` regarding handling - // of `-0.0`. - let a = _mm_setr_pd(-0.0, 0.0); - let b = _mm_setr_pd(0.0, 0.0); - let r1: [u8; 16] = transmute(_mm_min_pd(a, b)); - let r2: [u8; 16] = transmute(_mm_min_pd(b, a)); - let a: [u8; 16] = transmute(a); - let b: [u8; 16] = transmute(b); - assert_eq!(r1, b); - assert_eq!(r2, a); - assert_ne!(a, b); // sanity check that -0.0 is actually present - } - test_mm_min_pd(); - - #[target_feature(enable = "sse2")] - unsafe fn test_mm_max_sd() { - let a = _mm_setr_pd(1.0, 2.0); - let b = _mm_setr_pd(5.0, 10.0); - let r = _mm_max_sd(a, b); - assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0)); - } - test_mm_max_sd(); - - #[target_feature(enable = "sse2")] - unsafe fn test_mm_max_pd() { - let a = _mm_setr_pd(-1.0, 5.0); - let b = _mm_setr_pd(-100.0, 20.0); - let r = _mm_max_pd(a, b); - assert_eq_m128d(r, _mm_setr_pd(-1.0, 20.0)); - - // `_mm_max_pd` can **not** be implemented using the `simd_max` rust intrinsic because - // the semantics of `simd_max` are different to those of `_mm_max_pd` regarding handling - // of `-0.0`. - let a = _mm_setr_pd(-0.0, 0.0); - let b = _mm_setr_pd(0.0, 0.0); - let r1: [u8; 16] = transmute(_mm_max_pd(a, b)); - let r2: [u8; 16] = transmute(_mm_max_pd(b, a)); - let a: [u8; 16] = transmute(a); - let b: [u8; 16] = transmute(b); - assert_eq!(r1, b); - assert_eq!(r2, a); - assert_ne!(a, b); // sanity check that -0.0 is actually present - } - test_mm_max_pd(); - - #[target_feature(enable = "sse2")] - unsafe fn test_mm_sqrt_sd() { - let a = _mm_setr_pd(1.0, 2.0); - let b = _mm_setr_pd(5.0, 10.0); - let r = _mm_sqrt_sd(a, b); - assert_eq_m128d(r, _mm_setr_pd(5.0f64.sqrt(), 2.0)); - } - test_mm_sqrt_sd(); - - #[target_feature(enable = "sse2")] - unsafe fn test_mm_sqrt_pd() { - let r = _mm_sqrt_pd(_mm_setr_pd(1.0, 2.0)); - assert_eq_m128d(r, _mm_setr_pd(1.0f64.sqrt(), 2.0f64.sqrt())); - } - test_mm_sqrt_pd(); - - #[target_feature(enable = "sse2")] - unsafe fn test_mm_cmpeq_sd() { - let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); - let e = _mm_setr_epi64x(!0, transmute(2.0f64)); - let r = transmute::<_, __m128i>(_mm_cmpeq_sd(a, b)); - assert_eq_m128i(r, e); - } - test_mm_cmpeq_sd(); - - #[target_feature(enable = "sse2")] - unsafe fn test_mm_cmplt_sd() { - let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); - let e = _mm_setr_epi64x(!0, transmute(2.0f64)); - let r = transmute::<_, __m128i>(_mm_cmplt_sd(a, b)); - assert_eq_m128i(r, e); - } - test_mm_cmplt_sd(); - - #[target_feature(enable = "sse2")] - unsafe fn test_mm_cmple_sd() { - let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); - let e = _mm_setr_epi64x(!0, transmute(2.0f64)); - let r = transmute::<_, __m128i>(_mm_cmple_sd(a, b)); - assert_eq_m128i(r, e); - } - test_mm_cmple_sd(); - - #[target_feature(enable = "sse2")] - unsafe fn test_mm_cmpgt_sd() { - let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0)); - let e = _mm_setr_epi64x(!0, transmute(2.0f64)); - let r = transmute::<_, __m128i>(_mm_cmpgt_sd(a, b)); - assert_eq_m128i(r, e); - } - test_mm_cmpgt_sd(); - - #[target_feature(enable = "sse2")] - unsafe fn test_mm_cmpge_sd() { - let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); - let e = _mm_setr_epi64x(!0, transmute(2.0f64)); - let r = transmute::<_, __m128i>(_mm_cmpge_sd(a, b)); - assert_eq_m128i(r, e); - } - test_mm_cmpge_sd(); - - #[target_feature(enable = "sse2")] - unsafe fn test_mm_cmpord_sd() { - let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); - let e = _mm_setr_epi64x(0, transmute(2.0f64)); - let r = transmute::<_, __m128i>(_mm_cmpord_sd(a, b)); - assert_eq_m128i(r, e); - } - test_mm_cmpord_sd(); - - #[target_feature(enable = "sse2")] - unsafe fn test_mm_cmpunord_sd() { - let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); - let e = _mm_setr_epi64x(!0, transmute(2.0f64)); - let r = transmute::<_, __m128i>(_mm_cmpunord_sd(a, b)); - assert_eq_m128i(r, e); - } - test_mm_cmpunord_sd(); - - #[target_feature(enable = "sse2")] - unsafe fn test_mm_cmpneq_sd() { - let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); - let e = _mm_setr_epi64x(!0, transmute(2.0f64)); - let r = transmute::<_, __m128i>(_mm_cmpneq_sd(a, b)); - assert_eq_m128i(r, e); - } - test_mm_cmpneq_sd(); - - #[target_feature(enable = "sse2")] - unsafe fn test_mm_cmpnlt_sd() { - let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); - let e = _mm_setr_epi64x(0, transmute(2.0f64)); - let r = transmute::<_, __m128i>(_mm_cmpnlt_sd(a, b)); - assert_eq_m128i(r, e); - } - test_mm_cmpnlt_sd(); - - #[target_feature(enable = "sse2")] - unsafe fn test_mm_cmpnle_sd() { - let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); - let e = _mm_setr_epi64x(0, transmute(2.0f64)); - let r = transmute::<_, __m128i>(_mm_cmpnle_sd(a, b)); - assert_eq_m128i(r, e); - } - test_mm_cmpnle_sd(); - - #[target_feature(enable = "sse2")] - unsafe fn test_mm_cmpngt_sd() { - let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0)); - let e = _mm_setr_epi64x(0, transmute(2.0f64)); - let r = transmute::<_, __m128i>(_mm_cmpngt_sd(a, b)); - assert_eq_m128i(r, e); - } - test_mm_cmpngt_sd(); - - #[target_feature(enable = "sse2")] - unsafe fn test_mm_cmpnge_sd() { - let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); - let e = _mm_setr_epi64x(0, transmute(2.0f64)); - let r = transmute::<_, __m128i>(_mm_cmpnge_sd(a, b)); - assert_eq_m128i(r, e); - } - test_mm_cmpnge_sd(); - - #[target_feature(enable = "sse2")] - unsafe fn test_mm_cmpeq_pd() { - let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); - let e = _mm_setr_epi64x(!0, 0); - let r = transmute::<_, __m128i>(_mm_cmpeq_pd(a, b)); - assert_eq_m128i(r, e); - } - test_mm_cmpeq_pd(); - - #[target_feature(enable = "sse2")] - unsafe fn test_mm_cmplt_pd() { - let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); - let e = _mm_setr_epi64x(0, !0); - let r = transmute::<_, __m128i>(_mm_cmplt_pd(a, b)); - assert_eq_m128i(r, e); - } - test_mm_cmplt_pd(); - - #[target_feature(enable = "sse2")] - unsafe fn test_mm_cmple_pd() { - let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); - let e = _mm_setr_epi64x(!0, !0); - let r = transmute::<_, __m128i>(_mm_cmple_pd(a, b)); - assert_eq_m128i(r, e); - } - test_mm_cmple_pd(); - - #[target_feature(enable = "sse2")] - unsafe fn test_mm_cmpgt_pd() { - let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); - let e = _mm_setr_epi64x(0, 0); - let r = transmute::<_, __m128i>(_mm_cmpgt_pd(a, b)); - assert_eq_m128i(r, e); - } - test_mm_cmpgt_pd(); - - #[target_feature(enable = "sse2")] - unsafe fn test_mm_cmpge_pd() { - let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); - let e = _mm_setr_epi64x(!0, 0); - let r = transmute::<_, __m128i>(_mm_cmpge_pd(a, b)); - assert_eq_m128i(r, e); - } - test_mm_cmpge_pd(); - - #[target_feature(enable = "sse2")] - unsafe fn test_mm_cmpord_pd() { - let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); - let e = _mm_setr_epi64x(0, !0); - let r = transmute::<_, __m128i>(_mm_cmpord_pd(a, b)); - assert_eq_m128i(r, e); - } - test_mm_cmpord_pd(); + let b = _mm_setr_epi8(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2); + let r = _mm_sad_epu8(a, b); + let e = _mm_setr_epi64x(1020, 614); + assert_eq_m128i(r, e); + } + test_mm_sad_epu8(); + + #[target_feature(enable = "sse2")] + unsafe fn test_mm_sll_epi16() { + let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF); + let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 4)); + assert_eq_m128i( + r, + _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0), + ); + let r = _mm_sll_epi16(a, _mm_set_epi64x(4, 0)); + assert_eq_m128i(r, a); + let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 16)); + assert_eq_m128i(r, _mm_set1_epi16(0)); + let r = _mm_sll_epi16(a, _mm_set_epi64x(0, i64::MAX)); + assert_eq_m128i(r, _mm_set1_epi16(0)); + } + test_mm_sll_epi16(); + + #[target_feature(enable = "sse2")] + unsafe fn test_mm_srl_epi16() { + let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF); + let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 4)); + assert_eq_m128i(r, _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0)); + let r = _mm_srl_epi16(a, _mm_set_epi64x(4, 0)); + assert_eq_m128i(r, a); + let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 16)); + assert_eq_m128i(r, _mm_set1_epi16(0)); + let r = _mm_srl_epi16(a, _mm_set_epi64x(0, i64::MAX)); + assert_eq_m128i(r, _mm_set1_epi16(0)); + } + test_mm_srl_epi16(); + + #[target_feature(enable = "sse2")] + unsafe fn test_mm_sra_epi16() { + let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF); + let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 4)); + assert_eq_m128i(r, _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10)); + let r = _mm_sra_epi16(a, _mm_set_epi64x(4, 0)); + assert_eq_m128i(r, a); + let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 16)); + assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1)); + let r = _mm_sra_epi16(a, _mm_set_epi64x(0, i64::MAX)); + assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1)); + } + test_mm_sra_epi16(); + + #[target_feature(enable = "sse2")] + unsafe fn test_mm_sll_epi32() { + let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF); + let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 4)); + assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0)); + let r = _mm_sll_epi32(a, _mm_set_epi64x(4, 0)); + assert_eq_m128i(r, a); + let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 32)); + assert_eq_m128i(r, _mm_set1_epi32(0)); + let r = _mm_sll_epi32(a, _mm_set_epi64x(0, i64::MAX)); + assert_eq_m128i(r, _mm_set1_epi32(0)); + } + test_mm_sll_epi32(); + + #[target_feature(enable = "sse2")] + unsafe fn test_mm_srl_epi32() { + let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF); + let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 4)); + assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000)); + let r = _mm_srl_epi32(a, _mm_set_epi64x(4, 0)); + assert_eq_m128i(r, a); + let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 32)); + assert_eq_m128i(r, _mm_set1_epi32(0)); + let r = _mm_srl_epi32(a, _mm_set_epi64x(0, i64::MAX)); + assert_eq_m128i(r, _mm_set1_epi32(0)); + } + test_mm_srl_epi32(); + + #[target_feature(enable = "sse2")] + unsafe fn test_mm_sra_epi32() { + let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF); + let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 4)); + assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000)); + let r = _mm_sra_epi32(a, _mm_set_epi64x(4, 0)); + assert_eq_m128i(r, a); + let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 32)); + assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1)); + let r = _mm_sra_epi32(a, _mm_set_epi64x(0, i64::MAX)); + assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1)); + } + test_mm_sra_epi32(); + + #[target_feature(enable = "sse2")] + unsafe fn test_mm_sll_epi64() { + let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF); + let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 4)); + assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0)); + let r = _mm_sll_epi64(a, _mm_set_epi64x(4, 0)); + assert_eq_m128i(r, a); + let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 64)); + assert_eq_m128i(r, _mm_set1_epi64x(0)); + let r = _mm_sll_epi64(a, _mm_set_epi64x(0, i64::MAX)); + assert_eq_m128i(r, _mm_set1_epi64x(0)); + } + test_mm_sll_epi64(); + + #[target_feature(enable = "sse2")] + unsafe fn test_mm_srl_epi64() { + let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF); + let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 4)); + assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000)); + let r = _mm_srl_epi64(a, _mm_set_epi64x(4, 0)); + assert_eq_m128i(r, a); + let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 64)); + assert_eq_m128i(r, _mm_set1_epi64x(0)); + let r = _mm_srl_epi64(a, _mm_set_epi64x(0, i64::MAX)); + assert_eq_m128i(r, _mm_set1_epi64x(0)); + } + test_mm_srl_epi64(); - #[target_feature(enable = "sse2")] - unsafe fn test_mm_cmpunord_pd() { - let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); - let e = _mm_setr_epi64x(!0, 0); - let r = transmute::<_, __m128i>(_mm_cmpunord_pd(a, b)); - assert_eq_m128i(r, e); - } - test_mm_cmpunord_pd(); + #[target_feature(enable = "sse2")] + unsafe fn test_mm_cvtepi32_ps() { + let a = _mm_setr_epi32(1, 2, 3, 4); + let r = _mm_cvtepi32_ps(a); + assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0)); + } + test_mm_cvtepi32_ps(); - #[target_feature(enable = "sse2")] - unsafe fn test_mm_cmpneq_pd() { - let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); - let e = _mm_setr_epi64x(!0, !0); - let r = transmute::<_, __m128i>(_mm_cmpneq_pd(a, b)); - assert_eq_m128i(r, e); - } - test_mm_cmpneq_pd(); + #[target_feature(enable = "sse2")] + unsafe fn test_mm_cvtps_epi32() { + let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let r = _mm_cvtps_epi32(a); + assert_eq_m128i(r, _mm_setr_epi32(1, 2, 3, 4)); + } + test_mm_cvtps_epi32(); - #[target_feature(enable = "sse2")] - unsafe fn test_mm_cmpnlt_pd() { - let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); - let e = _mm_setr_epi64x(0, 0); - let r = transmute::<_, __m128i>(_mm_cmpnlt_pd(a, b)); - assert_eq_m128i(r, e); - } - test_mm_cmpnlt_pd(); + #[target_feature(enable = "sse2")] + unsafe fn test_mm_cvttps_epi32() { + let a = _mm_setr_ps(-1.1, 2.2, -3.3, 6.6); + let r = _mm_cvttps_epi32(a); + assert_eq_m128i(r, _mm_setr_epi32(-1, 2, -3, 6)); - #[target_feature(enable = "sse2")] - unsafe fn test_mm_cmpnle_pd() { - let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); - let e = _mm_setr_epi64x(0, 0); - let r = transmute::<_, __m128i>(_mm_cmpnle_pd(a, b)); - assert_eq_m128i(r, e); - } - test_mm_cmpnle_pd(); + let a = _mm_setr_ps(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX); + let r = _mm_cvttps_epi32(a); + assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, i32::MIN, i32::MIN)); + } + test_mm_cvttps_epi32(); + + #[target_feature(enable = "sse2")] + unsafe fn test_mm_packs_epi16() { + let a = _mm_setr_epi16(0x80, -0x81, 0, 0, 0, 0, 0, 0); + let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -0x81, 0x80); + let r = _mm_packs_epi16(a, b); + assert_eq_m128i( + r, + _mm_setr_epi8(0x7F, -0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0x80, 0x7F), + ); + } + test_mm_packs_epi16(); + + #[target_feature(enable = "sse2")] + unsafe fn test_mm_packus_epi16() { + let a = _mm_setr_epi16(0x100, -1, 0, 0, 0, 0, 0, 0); + let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -1, 0x100); + let r = _mm_packus_epi16(a, b); + assert_eq_m128i(r, _mm_setr_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, !0)); + } + test_mm_packus_epi16(); + + #[target_feature(enable = "sse2")] + unsafe fn test_mm_packs_epi32() { + let a = _mm_setr_epi32(0x8000, -0x8001, 0, 0); + let b = _mm_setr_epi32(0, 0, -0x8001, 0x8000); + let r = _mm_packs_epi32(a, b); + assert_eq_m128i(r, _mm_setr_epi16(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF)); + } + test_mm_packs_epi32(); + + #[target_feature(enable = "sse2")] + unsafe fn test_mm_min_sd() { + let a = _mm_setr_pd(1.0, 2.0); + let b = _mm_setr_pd(5.0, 10.0); + let r = _mm_min_sd(a, b); + assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0)); + } + test_mm_min_sd(); + + #[target_feature(enable = "sse2")] + unsafe fn test_mm_min_pd() { + let a = _mm_setr_pd(-1.0, 5.0); + let b = _mm_setr_pd(-100.0, 20.0); + let r = _mm_min_pd(a, b); + assert_eq_m128d(r, _mm_setr_pd(-100.0, 5.0)); + + // `_mm_min_pd` can **not** be implemented using the `simd_min` rust intrinsic because + // the semantics of `simd_min` are different to those of `_mm_min_pd` regarding handling + // of `-0.0`. + let a = _mm_setr_pd(-0.0, 0.0); + let b = _mm_setr_pd(0.0, 0.0); + let r1: [u8; 16] = transmute(_mm_min_pd(a, b)); + let r2: [u8; 16] = transmute(_mm_min_pd(b, a)); + let a: [u8; 16] = transmute(a); + let b: [u8; 16] = transmute(b); + assert_eq!(r1, b); + assert_eq!(r2, a); + assert_ne!(a, b); // sanity check that -0.0 is actually present + } + test_mm_min_pd(); + + #[target_feature(enable = "sse2")] + unsafe fn test_mm_max_sd() { + let a = _mm_setr_pd(1.0, 2.0); + let b = _mm_setr_pd(5.0, 10.0); + let r = _mm_max_sd(a, b); + assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0)); + } + test_mm_max_sd(); + + #[target_feature(enable = "sse2")] + unsafe fn test_mm_max_pd() { + let a = _mm_setr_pd(-1.0, 5.0); + let b = _mm_setr_pd(-100.0, 20.0); + let r = _mm_max_pd(a, b); + assert_eq_m128d(r, _mm_setr_pd(-1.0, 20.0)); + + // `_mm_max_pd` can **not** be implemented using the `simd_max` rust intrinsic because + // the semantics of `simd_max` are different to those of `_mm_max_pd` regarding handling + // of `-0.0`. + let a = _mm_setr_pd(-0.0, 0.0); + let b = _mm_setr_pd(0.0, 0.0); + let r1: [u8; 16] = transmute(_mm_max_pd(a, b)); + let r2: [u8; 16] = transmute(_mm_max_pd(b, a)); + let a: [u8; 16] = transmute(a); + let b: [u8; 16] = transmute(b); + assert_eq!(r1, b); + assert_eq!(r2, a); + assert_ne!(a, b); // sanity check that -0.0 is actually present + } + test_mm_max_pd(); + + #[target_feature(enable = "sse2")] + unsafe fn test_mm_sqrt_sd() { + let a = _mm_setr_pd(1.0, 2.0); + let b = _mm_setr_pd(5.0, 10.0); + let r = _mm_sqrt_sd(a, b); + assert_eq_m128d(r, _mm_setr_pd(5.0f64.sqrt(), 2.0)); + } + test_mm_sqrt_sd(); - #[target_feature(enable = "sse2")] - unsafe fn test_mm_cmpngt_pd() { - let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0)); - let e = _mm_setr_epi64x(0, !0); - let r = transmute::<_, __m128i>(_mm_cmpngt_pd(a, b)); - assert_eq_m128i(r, e); - } - test_mm_cmpngt_pd(); + #[target_feature(enable = "sse2")] + unsafe fn test_mm_sqrt_pd() { + let r = _mm_sqrt_pd(_mm_setr_pd(1.0, 2.0)); + assert_eq_m128d(r, _mm_setr_pd(1.0f64.sqrt(), 2.0f64.sqrt())); + } + test_mm_sqrt_pd(); + + #[target_feature(enable = "sse2")] + unsafe fn test_mm_cmpeq_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + let e = _mm_setr_epi64x(!0, transmute(2.0f64)); + let r = transmute::<_, __m128i>(_mm_cmpeq_sd(a, b)); + assert_eq_m128i(r, e); + } + test_mm_cmpeq_sd(); + + #[target_feature(enable = "sse2")] + unsafe fn test_mm_cmplt_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); + let e = _mm_setr_epi64x(!0, transmute(2.0f64)); + let r = transmute::<_, __m128i>(_mm_cmplt_sd(a, b)); + assert_eq_m128i(r, e); + } + test_mm_cmplt_sd(); + + #[target_feature(enable = "sse2")] + unsafe fn test_mm_cmple_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + let e = _mm_setr_epi64x(!0, transmute(2.0f64)); + let r = transmute::<_, __m128i>(_mm_cmple_sd(a, b)); + assert_eq_m128i(r, e); + } + test_mm_cmple_sd(); + + #[target_feature(enable = "sse2")] + unsafe fn test_mm_cmpgt_sd() { + let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0)); + let e = _mm_setr_epi64x(!0, transmute(2.0f64)); + let r = transmute::<_, __m128i>(_mm_cmpgt_sd(a, b)); + assert_eq_m128i(r, e); + } + test_mm_cmpgt_sd(); + + #[target_feature(enable = "sse2")] + unsafe fn test_mm_cmpge_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + let e = _mm_setr_epi64x(!0, transmute(2.0f64)); + let r = transmute::<_, __m128i>(_mm_cmpge_sd(a, b)); + assert_eq_m128i(r, e); + } + test_mm_cmpge_sd(); + + #[target_feature(enable = "sse2")] + unsafe fn test_mm_cmpord_sd() { + let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); + let e = _mm_setr_epi64x(0, transmute(2.0f64)); + let r = transmute::<_, __m128i>(_mm_cmpord_sd(a, b)); + assert_eq_m128i(r, e); + } + test_mm_cmpord_sd(); + + #[target_feature(enable = "sse2")] + unsafe fn test_mm_cmpunord_sd() { + let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); + let e = _mm_setr_epi64x(!0, transmute(2.0f64)); + let r = transmute::<_, __m128i>(_mm_cmpunord_sd(a, b)); + assert_eq_m128i(r, e); + } + test_mm_cmpunord_sd(); + + #[target_feature(enable = "sse2")] + unsafe fn test_mm_cmpneq_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); + let e = _mm_setr_epi64x(!0, transmute(2.0f64)); + let r = transmute::<_, __m128i>(_mm_cmpneq_sd(a, b)); + assert_eq_m128i(r, e); + } + test_mm_cmpneq_sd(); + + #[target_feature(enable = "sse2")] + unsafe fn test_mm_cmpnlt_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); + let e = _mm_setr_epi64x(0, transmute(2.0f64)); + let r = transmute::<_, __m128i>(_mm_cmpnlt_sd(a, b)); + assert_eq_m128i(r, e); + } + test_mm_cmpnlt_sd(); + + #[target_feature(enable = "sse2")] + unsafe fn test_mm_cmpnle_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + let e = _mm_setr_epi64x(0, transmute(2.0f64)); + let r = transmute::<_, __m128i>(_mm_cmpnle_sd(a, b)); + assert_eq_m128i(r, e); + } + test_mm_cmpnle_sd(); + + #[target_feature(enable = "sse2")] + unsafe fn test_mm_cmpngt_sd() { + let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0)); + let e = _mm_setr_epi64x(0, transmute(2.0f64)); + let r = transmute::<_, __m128i>(_mm_cmpngt_sd(a, b)); + assert_eq_m128i(r, e); + } + test_mm_cmpngt_sd(); + + #[target_feature(enable = "sse2")] + unsafe fn test_mm_cmpnge_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + let e = _mm_setr_epi64x(0, transmute(2.0f64)); + let r = transmute::<_, __m128i>(_mm_cmpnge_sd(a, b)); + assert_eq_m128i(r, e); + } + test_mm_cmpnge_sd(); + + #[target_feature(enable = "sse2")] + unsafe fn test_mm_cmpeq_pd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + let e = _mm_setr_epi64x(!0, 0); + let r = transmute::<_, __m128i>(_mm_cmpeq_pd(a, b)); + assert_eq_m128i(r, e); + } + test_mm_cmpeq_pd(); + + #[target_feature(enable = "sse2")] + unsafe fn test_mm_cmplt_pd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + let e = _mm_setr_epi64x(0, !0); + let r = transmute::<_, __m128i>(_mm_cmplt_pd(a, b)); + assert_eq_m128i(r, e); + } + test_mm_cmplt_pd(); + + #[target_feature(enable = "sse2")] + unsafe fn test_mm_cmple_pd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + let e = _mm_setr_epi64x(!0, !0); + let r = transmute::<_, __m128i>(_mm_cmple_pd(a, b)); + assert_eq_m128i(r, e); + } + test_mm_cmple_pd(); + + #[target_feature(enable = "sse2")] + unsafe fn test_mm_cmpgt_pd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + let e = _mm_setr_epi64x(0, 0); + let r = transmute::<_, __m128i>(_mm_cmpgt_pd(a, b)); + assert_eq_m128i(r, e); + } + test_mm_cmpgt_pd(); + + #[target_feature(enable = "sse2")] + unsafe fn test_mm_cmpge_pd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + let e = _mm_setr_epi64x(!0, 0); + let r = transmute::<_, __m128i>(_mm_cmpge_pd(a, b)); + assert_eq_m128i(r, e); + } + test_mm_cmpge_pd(); + + #[target_feature(enable = "sse2")] + unsafe fn test_mm_cmpord_pd() { + let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); + let e = _mm_setr_epi64x(0, !0); + let r = transmute::<_, __m128i>(_mm_cmpord_pd(a, b)); + assert_eq_m128i(r, e); + } + test_mm_cmpord_pd(); + + #[target_feature(enable = "sse2")] + unsafe fn test_mm_cmpunord_pd() { + let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); + let e = _mm_setr_epi64x(!0, 0); + let r = transmute::<_, __m128i>(_mm_cmpunord_pd(a, b)); + assert_eq_m128i(r, e); + } + test_mm_cmpunord_pd(); + + #[target_feature(enable = "sse2")] + unsafe fn test_mm_cmpneq_pd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); + let e = _mm_setr_epi64x(!0, !0); + let r = transmute::<_, __m128i>(_mm_cmpneq_pd(a, b)); + assert_eq_m128i(r, e); + } + test_mm_cmpneq_pd(); + + #[target_feature(enable = "sse2")] + unsafe fn test_mm_cmpnlt_pd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); + let e = _mm_setr_epi64x(0, 0); + let r = transmute::<_, __m128i>(_mm_cmpnlt_pd(a, b)); + assert_eq_m128i(r, e); + } + test_mm_cmpnlt_pd(); + + #[target_feature(enable = "sse2")] + unsafe fn test_mm_cmpnle_pd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + let e = _mm_setr_epi64x(0, 0); + let r = transmute::<_, __m128i>(_mm_cmpnle_pd(a, b)); + assert_eq_m128i(r, e); + } + test_mm_cmpnle_pd(); + + #[target_feature(enable = "sse2")] + unsafe fn test_mm_cmpngt_pd() { + let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0)); + let e = _mm_setr_epi64x(0, !0); + let r = transmute::<_, __m128i>(_mm_cmpngt_pd(a, b)); + assert_eq_m128i(r, e); + } + test_mm_cmpngt_pd(); + + #[target_feature(enable = "sse2")] + unsafe fn test_mm_cmpnge_pd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + let e = _mm_setr_epi64x(0, !0); + let r = transmute::<_, __m128i>(_mm_cmpnge_pd(a, b)); + assert_eq_m128i(r, e); + } + test_mm_cmpnge_pd(); - #[target_feature(enable = "sse2")] - unsafe fn test_mm_cmpnge_pd() { - let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); - let e = _mm_setr_epi64x(0, !0); - let r = transmute::<_, __m128i>(_mm_cmpnge_pd(a, b)); - assert_eq_m128i(r, e); - } - test_mm_cmpnge_pd(); + #[target_feature(enable = "sse2")] + unsafe fn test_mm_comieq_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + assert!(_mm_comieq_sd(a, b) != 0); - #[target_feature(enable = "sse2")] - unsafe fn test_mm_comieq_sd() { - let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); - assert!(_mm_comieq_sd(a, b) != 0); + let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(1.0, 3.0)); + assert!(_mm_comieq_sd(a, b) == 0); + } + test_mm_comieq_sd(); - let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(1.0, 3.0)); - assert!(_mm_comieq_sd(a, b) == 0); - } - test_mm_comieq_sd(); + #[target_feature(enable = "sse2")] + unsafe fn test_mm_comilt_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + assert!(_mm_comilt_sd(a, b) == 0); + } + test_mm_comilt_sd(); - #[target_feature(enable = "sse2")] - unsafe fn test_mm_comilt_sd() { - let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); - assert!(_mm_comilt_sd(a, b) == 0); - } - test_mm_comilt_sd(); + #[target_feature(enable = "sse2")] + unsafe fn test_mm_comile_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + assert!(_mm_comile_sd(a, b) != 0); + } + test_mm_comile_sd(); - #[target_feature(enable = "sse2")] - unsafe fn test_mm_comile_sd() { - let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); - assert!(_mm_comile_sd(a, b) != 0); - } - test_mm_comile_sd(); + #[target_feature(enable = "sse2")] + unsafe fn test_mm_comigt_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + assert!(_mm_comigt_sd(a, b) == 0); + } + test_mm_comigt_sd(); - #[target_feature(enable = "sse2")] - unsafe fn test_mm_comigt_sd() { - let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); - assert!(_mm_comigt_sd(a, b) == 0); - } - test_mm_comigt_sd(); + #[target_feature(enable = "sse2")] + unsafe fn test_mm_comige_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + assert!(_mm_comige_sd(a, b) != 0); + } + test_mm_comige_sd(); - #[target_feature(enable = "sse2")] - unsafe fn test_mm_comige_sd() { - let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); - assert!(_mm_comige_sd(a, b) != 0); - } - test_mm_comige_sd(); + #[target_feature(enable = "sse2")] + unsafe fn test_mm_comineq_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + assert!(_mm_comineq_sd(a, b) == 0); + } + test_mm_comineq_sd(); - #[target_feature(enable = "sse2")] - unsafe fn test_mm_comineq_sd() { - let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); - assert!(_mm_comineq_sd(a, b) == 0); - } - test_mm_comineq_sd(); + #[target_feature(enable = "sse2")] + unsafe fn test_mm_ucomieq_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + assert!(_mm_ucomieq_sd(a, b) != 0); - #[target_feature(enable = "sse2")] - unsafe fn test_mm_ucomieq_sd() { - let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); - assert!(_mm_ucomieq_sd(a, b) != 0); + let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(NAN, 3.0)); + assert!(_mm_ucomieq_sd(a, b) == 0); + } + test_mm_ucomieq_sd(); - let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(NAN, 3.0)); - assert!(_mm_ucomieq_sd(a, b) == 0); - } - test_mm_ucomieq_sd(); + #[target_feature(enable = "sse2")] + unsafe fn test_mm_ucomilt_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + assert!(_mm_ucomilt_sd(a, b) == 0); + } + test_mm_ucomilt_sd(); - #[target_feature(enable = "sse2")] - unsafe fn test_mm_ucomilt_sd() { - let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); - assert!(_mm_ucomilt_sd(a, b) == 0); - } - test_mm_ucomilt_sd(); + #[target_feature(enable = "sse2")] + unsafe fn test_mm_ucomile_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + assert!(_mm_ucomile_sd(a, b) != 0); + } + test_mm_ucomile_sd(); - #[target_feature(enable = "sse2")] - unsafe fn test_mm_ucomile_sd() { - let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); - assert!(_mm_ucomile_sd(a, b) != 0); - } - test_mm_ucomile_sd(); + #[target_feature(enable = "sse2")] + unsafe fn test_mm_ucomigt_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + assert!(_mm_ucomigt_sd(a, b) == 0); + } + test_mm_ucomigt_sd(); - #[target_feature(enable = "sse2")] - unsafe fn test_mm_ucomigt_sd() { - let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); - assert!(_mm_ucomigt_sd(a, b) == 0); - } - test_mm_ucomigt_sd(); + #[target_feature(enable = "sse2")] + unsafe fn test_mm_ucomige_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + assert!(_mm_ucomige_sd(a, b) != 0); + } + test_mm_ucomige_sd(); - #[target_feature(enable = "sse2")] - unsafe fn test_mm_ucomige_sd() { - let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); - assert!(_mm_ucomige_sd(a, b) != 0); - } - test_mm_ucomige_sd(); + #[target_feature(enable = "sse2")] + unsafe fn test_mm_ucomineq_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + assert!(_mm_ucomineq_sd(a, b) == 0); + } + test_mm_ucomineq_sd(); - #[target_feature(enable = "sse2")] - unsafe fn test_mm_ucomineq_sd() { - let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); - assert!(_mm_ucomineq_sd(a, b) == 0); - } - test_mm_ucomineq_sd(); + #[target_feature(enable = "sse2")] + unsafe fn test_mm_cvtpd_ps() { + let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, 5.0)); + assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, 0.0)); - #[target_feature(enable = "sse2")] - unsafe fn test_mm_cvtpd_ps() { - let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, 5.0)); - assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, 0.0)); + let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, -5.0)); + assert_eq_m128(r, _mm_setr_ps(-1.0, -5.0, 0.0, 0.0)); - let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, -5.0)); - assert_eq_m128(r, _mm_setr_ps(-1.0, -5.0, 0.0, 0.0)); + let r = _mm_cvtpd_ps(_mm_setr_pd(f64::MAX, f64::MIN)); + assert_eq_m128(r, _mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, 0.0, 0.0)); - let r = _mm_cvtpd_ps(_mm_setr_pd(f64::MAX, f64::MIN)); - assert_eq_m128(r, _mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, 0.0, 0.0)); + let r = _mm_cvtpd_ps(_mm_setr_pd(f32::MAX as f64, f32::MIN as f64)); + assert_eq_m128(r, _mm_setr_ps(f32::MAX, f32::MIN, 0.0, 0.0)); + } + test_mm_cvtpd_ps(); - let r = _mm_cvtpd_ps(_mm_setr_pd(f32::MAX as f64, f32::MIN as f64)); - assert_eq_m128(r, _mm_setr_ps(f32::MAX, f32::MIN, 0.0, 0.0)); - } - test_mm_cvtpd_ps(); + #[target_feature(enable = "sse2")] + unsafe fn test_mm_cvtps_pd() { + let r = _mm_cvtps_pd(_mm_setr_ps(-1.0, 2.0, -3.0, 5.0)); + assert_eq_m128d(r, _mm_setr_pd(-1.0, 2.0)); - #[target_feature(enable = "sse2")] - unsafe fn test_mm_cvtps_pd() { - let r = _mm_cvtps_pd(_mm_setr_ps(-1.0, 2.0, -3.0, 5.0)); - assert_eq_m128d(r, _mm_setr_pd(-1.0, 2.0)); + let r = _mm_cvtps_pd(_mm_setr_ps(f32::MAX, f32::INFINITY, f32::NEG_INFINITY, f32::MIN)); + assert_eq_m128d(r, _mm_setr_pd(f32::MAX as f64, f64::INFINITY)); + } + test_mm_cvtps_pd(); - let r = _mm_cvtps_pd(_mm_setr_ps(f32::MAX, f32::INFINITY, f32::NEG_INFINITY, f32::MIN)); - assert_eq_m128d(r, _mm_setr_pd(f32::MAX as f64, f64::INFINITY)); - } - test_mm_cvtps_pd(); + #[target_feature(enable = "sse2")] + unsafe fn test_mm_cvtpd_epi32() { + let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, 5.0)); + assert_eq_m128i(r, _mm_setr_epi32(-1, 5, 0, 0)); - #[target_feature(enable = "sse2")] - unsafe fn test_mm_cvtpd_epi32() { - let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, 5.0)); - assert_eq_m128i(r, _mm_setr_epi32(-1, 5, 0, 0)); + let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, -5.0)); + assert_eq_m128i(r, _mm_setr_epi32(-1, -5, 0, 0)); - let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, -5.0)); - assert_eq_m128i(r, _mm_setr_epi32(-1, -5, 0, 0)); + let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::MAX, f64::MIN)); + assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); - let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::MAX, f64::MIN)); - assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); + let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::INFINITY, f64::NEG_INFINITY)); + assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); - let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::INFINITY, f64::NEG_INFINITY)); - assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); + let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::NAN, f64::NAN)); + assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); + } + test_mm_cvtpd_epi32(); - let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::NAN, f64::NAN)); - assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); - } - test_mm_cvtpd_epi32(); + #[target_feature(enable = "sse2")] + unsafe fn test_mm_cvttpd_epi32() { + let a = _mm_setr_pd(-1.1, 2.2); + let r = _mm_cvttpd_epi32(a); + assert_eq_m128i(r, _mm_setr_epi32(-1, 2, 0, 0)); - #[target_feature(enable = "sse2")] - unsafe fn test_mm_cvttpd_epi32() { - let a = _mm_setr_pd(-1.1, 2.2); - let r = _mm_cvttpd_epi32(a); - assert_eq_m128i(r, _mm_setr_epi32(-1, 2, 0, 0)); - - let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN); - let r = _mm_cvttpd_epi32(a); - assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); - } - test_mm_cvttpd_epi32(); + let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN); + let r = _mm_cvttpd_epi32(a); + assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); + } + test_mm_cvttpd_epi32(); - #[target_feature(enable = "sse2")] - unsafe fn test_mm_cvtsd_si32() { - let r = _mm_cvtsd_si32(_mm_setr_pd(-2.0, 5.0)); - assert_eq!(r, -2); + #[target_feature(enable = "sse2")] + unsafe fn test_mm_cvtsd_si32() { + let r = _mm_cvtsd_si32(_mm_setr_pd(-2.0, 5.0)); + assert_eq!(r, -2); - let r = _mm_cvtsd_si32(_mm_setr_pd(f64::MAX, f64::MIN)); - assert_eq!(r, i32::MIN); + let r = _mm_cvtsd_si32(_mm_setr_pd(f64::MAX, f64::MIN)); + assert_eq!(r, i32::MIN); - let r = _mm_cvtsd_si32(_mm_setr_pd(f64::NAN, f64::NAN)); - assert_eq!(r, i32::MIN); - } - test_mm_cvtsd_si32(); + let r = _mm_cvtsd_si32(_mm_setr_pd(f64::NAN, f64::NAN)); + assert_eq!(r, i32::MIN); + } + test_mm_cvtsd_si32(); - #[target_feature(enable = "sse2")] - unsafe fn test_mm_cvttsd_si32() { - let a = _mm_setr_pd(-1.1, 2.2); - let r = _mm_cvttsd_si32(a); - assert_eq!(r, -1); - - let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN); - let r = _mm_cvttsd_si32(a); - assert_eq!(r, i32::MIN); - } - test_mm_cvttsd_si32(); + #[target_feature(enable = "sse2")] + unsafe fn test_mm_cvttsd_si32() { + let a = _mm_setr_pd(-1.1, 2.2); + let r = _mm_cvttsd_si32(a); + assert_eq!(r, -1); - // Intrinsic only available on x86_64 - #[cfg(target_arch = "x86_64")] - #[target_feature(enable = "sse2")] - unsafe fn test_mm_cvtsd_si64() { - let r = _mm_cvtsd_si64(_mm_setr_pd(-2.0, 5.0)); - assert_eq!(r, -2_i64); + let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN); + let r = _mm_cvttsd_si32(a); + assert_eq!(r, i32::MIN); + } + test_mm_cvttsd_si32(); - let r = _mm_cvtsd_si64(_mm_setr_pd(f64::MAX, f64::MIN)); - assert_eq!(r, i64::MIN); - } - #[cfg(target_arch = "x86_64")] - test_mm_cvtsd_si64(); + // Intrinsic only available on x86_64 + #[cfg(target_arch = "x86_64")] + #[target_feature(enable = "sse2")] + unsafe fn test_mm_cvtsd_si64() { + let r = _mm_cvtsd_si64(_mm_setr_pd(-2.0, 5.0)); + assert_eq!(r, -2_i64); - // Intrinsic only available on x86_64 - #[cfg(target_arch = "x86_64")] - #[target_feature(enable = "sse2")] - unsafe fn test_mm_cvttsd_si64() { - let a = _mm_setr_pd(-1.1, 2.2); - let r = _mm_cvttsd_si64(a); - assert_eq!(r, -1_i64); - } - #[cfg(target_arch = "x86_64")] - test_mm_cvttsd_si64(); + let r = _mm_cvtsd_si64(_mm_setr_pd(f64::MAX, f64::MIN)); + assert_eq!(r, i64::MIN); + } + #[cfg(target_arch = "x86_64")] + test_mm_cvtsd_si64(); + + // Intrinsic only available on x86_64 + #[cfg(target_arch = "x86_64")] + #[target_feature(enable = "sse2")] + unsafe fn test_mm_cvttsd_si64() { + let a = _mm_setr_pd(-1.1, 2.2); + let r = _mm_cvttsd_si64(a); + assert_eq!(r, -1_i64); + } + #[cfg(target_arch = "x86_64")] + test_mm_cvttsd_si64(); - #[target_feature(enable = "sse2")] - unsafe fn test_mm_cvtsd_ss() { - let a = _mm_setr_ps(-1.1, -2.2, 3.3, 4.4); - let b = _mm_setr_pd(2.0, -5.0); + #[target_feature(enable = "sse2")] + unsafe fn test_mm_cvtsd_ss() { + let a = _mm_setr_ps(-1.1, -2.2, 3.3, 4.4); + let b = _mm_setr_pd(2.0, -5.0); - let r = _mm_cvtsd_ss(a, b); + let r = _mm_cvtsd_ss(a, b); - assert_eq_m128(r, _mm_setr_ps(2.0, -2.2, 3.3, 4.4)); + assert_eq_m128(r, _mm_setr_ps(2.0, -2.2, 3.3, 4.4)); - let a = _mm_setr_ps(-1.1, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY); - let b = _mm_setr_pd(f64::INFINITY, -5.0); + let a = _mm_setr_ps(-1.1, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY); + let b = _mm_setr_pd(f64::INFINITY, -5.0); - let r = _mm_cvtsd_ss(a, b); + let r = _mm_cvtsd_ss(a, b); - assert_eq_m128( - r, - _mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY), - ); - } - test_mm_cvtsd_ss(); + assert_eq_m128( + r, + _mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY), + ); + } + test_mm_cvtsd_ss(); - #[target_feature(enable = "sse2")] - unsafe fn test_mm_cvtss_sd() { - let a = _mm_setr_pd(-1.1, 2.2); - let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + #[target_feature(enable = "sse2")] + unsafe fn test_mm_cvtss_sd() { + let a = _mm_setr_pd(-1.1, 2.2); + let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); - let r = _mm_cvtss_sd(a, b); - assert_eq_m128d(r, _mm_setr_pd(1.0, 2.2)); + let r = _mm_cvtss_sd(a, b); + assert_eq_m128d(r, _mm_setr_pd(1.0, 2.2)); - let a = _mm_setr_pd(-1.1, f64::INFINITY); - let b = _mm_setr_ps(f32::NEG_INFINITY, 2.0, 3.0, 4.0); + let a = _mm_setr_pd(-1.1, f64::INFINITY); + let b = _mm_setr_ps(f32::NEG_INFINITY, 2.0, 3.0, 4.0); - let r = _mm_cvtss_sd(a, b); - assert_eq_m128d(r, _mm_setr_pd(f64::NEG_INFINITY, f64::INFINITY)); - } - test_mm_cvtss_sd(); + let r = _mm_cvtss_sd(a, b); + assert_eq_m128d(r, _mm_setr_pd(f64::NEG_INFINITY, f64::INFINITY)); + } + test_mm_cvtss_sd(); - #[target_feature(enable = "sse2")] - unsafe fn test_mm_movemask_pd() { - let r = _mm_movemask_pd(_mm_setr_pd(-1.0, 5.0)); - assert_eq!(r, 0b01); + #[target_feature(enable = "sse2")] + unsafe fn test_mm_movemask_pd() { + let r = _mm_movemask_pd(_mm_setr_pd(-1.0, 5.0)); + assert_eq!(r, 0b01); - let r = _mm_movemask_pd(_mm_setr_pd(-1.0, -5.0)); - assert_eq!(r, 0b11); + let r = _mm_movemask_pd(_mm_setr_pd(-1.0, -5.0)); + assert_eq!(r, 0b11); + } + test_mm_movemask_pd(); } - test_mm_movemask_pd(); } From 085177678e36d4644346371c29014656b41b54cd Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Mon, 25 Sep 2023 08:56:19 +0200 Subject: [PATCH 26/26] remove some dead code and add a comment in the AVX test --- src/tools/miri/src/helpers.rs | 2 - src/tools/miri/src/helpers/convert.rs | 49 ------------------- .../miri/tests/pass/intrinsics-x86-avx512.rs | 1 + 3 files changed, 1 insertion(+), 51 deletions(-) delete mode 100644 src/tools/miri/src/helpers/convert.rs diff --git a/src/tools/miri/src/helpers.rs b/src/tools/miri/src/helpers.rs index badd2629388cc..fd9d57c487c0e 100644 --- a/src/tools/miri/src/helpers.rs +++ b/src/tools/miri/src/helpers.rs @@ -1,5 +1,3 @@ -pub mod convert; - use std::cmp; use std::iter; use std::num::NonZeroUsize; diff --git a/src/tools/miri/src/helpers/convert.rs b/src/tools/miri/src/helpers/convert.rs deleted file mode 100644 index 4506fe47495d0..0000000000000 --- a/src/tools/miri/src/helpers/convert.rs +++ /dev/null @@ -1,49 +0,0 @@ -use implementations::NarrowerThan; - -/// Replacement for `as` casts going from wide integer to narrower integer. -/// -/// # Example -/// -/// ```ignore -/// let x = 99_u64; -/// let lo = x.truncate::(); -/// // lo is of type u16, equivalent to `x as u16`. -/// ``` -pub(crate) trait Truncate: Sized { - fn truncate(self) -> To - where - To: NarrowerThan, - { - NarrowerThan::truncate_from(self) - } -} - -impl Truncate for u16 {} -impl Truncate for u32 {} -impl Truncate for u64 {} -impl Truncate for u128 {} - -mod implementations { - pub(crate) trait NarrowerThan { - fn truncate_from(wide: T) -> Self; - } - - macro_rules! impl_narrower_than { - ($(NarrowerThan<{$($ty:ty),*}> for $self:ty)*) => { - $($( - impl NarrowerThan<$ty> for $self { - fn truncate_from(wide: $ty) -> Self { - wide as Self - } - } - )*)* - }; - } - - impl_narrower_than! { - NarrowerThan<{u128, u64, u32, u16}> for u8 - NarrowerThan<{u128, u64, u32}> for u16 - NarrowerThan<{u128, u64}> for u32 - NarrowerThan<{u128}> for u64 - } -} diff --git a/src/tools/miri/tests/pass/intrinsics-x86-avx512.rs b/src/tools/miri/tests/pass/intrinsics-x86-avx512.rs index a5615aaa43225..c38158dc797ca 100644 --- a/src/tools/miri/tests/pass/intrinsics-x86-avx512.rs +++ b/src/tools/miri/tests/pass/intrinsics-x86-avx512.rs @@ -1,5 +1,6 @@ // Ignore everything except x86 and x86_64 // Any additional target are added to CI should be ignored here +// (We cannot use `cfg`-based tricks here since the `target-feature` flags below only work on x86.) //@ignore-target-aarch64 //@ignore-target-arm //@ignore-target-avr