Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add rtm cpu feature intrinsics #726

Merged
merged 9 commits into from
Apr 25, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions crates/core_arch/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
wasm_target_feature,
abi_unadjusted,
adx_target_feature,
rtm_target_feature,
external_doc
)]
#![cfg_attr(test, feature(test, abi_vectorcall, untagged_unions))]
Expand Down
3 changes: 3 additions & 0 deletions crates/core_arch/src/x86/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -565,3 +565,6 @@ pub use self::avx512ifma::*;

mod bt;
pub use self::bt::*;

mod rtm;
pub use self::rtm::*;
165 changes: 165 additions & 0 deletions crates/core_arch/src/x86/rtm.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
//! Intel's Restricted Transactional Memory (RTM).
//!
//! This CPU feature is available on Intel Broadwell or later CPUs (and some Haswell).
//!
//! The reference is [Intel 64 and IA-32 Architectures Software Developer's
//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
//!
//! [Wikipedia][wikipedia_rtm] provides a quick overview of the assembly instructions, and
//! Intel's [programming considerations][intel_consid] details what sorts of instructions within a
//! transaction are likely to cause an abort.
//!
//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
//! [wikipedia_rtm]: https://en.wikipedia.org/wiki/Transactional_Synchronization_Extensions#Restricted_Transactional_Memory
//! [intel_consid]: https://software.intel.com/en-us/cpp-compiler-developer-guide-and-reference-intel-transactional-synchronization-extensions-intel-tsx-programming-considerations

#[cfg(test)]
gnzlbg marked this conversation as resolved.
Show resolved Hide resolved
use stdsimd_test::assert_instr;

extern "C" {
#[link_name = "llvm.x86.xbegin"]
fn x86_xbegin() -> i32;
#[link_name = "llvm.x86.xend"]
fn x86_xend() -> ();
#[link_name = "llvm.x86.xabort"]
fn x86_xabort(imm8: i8) -> ();
#[link_name = "llvm.x86.xtest"]
fn x86_xtest() -> i32;
}

/// Transaction successfully started.
pub const _XBEGIN_STARTED: u32 = !0;

/// Transaction explicitly aborted with xabort. The parameter passed to xabort is available with
/// `_xabort_code(status)`.
pub const _XABORT_EXPLICIT: u32 = 1 << 0;

/// Transaction retry is possible.
pub const _XABORT_RETRY: u32 = 1 << 1;

/// Transaction abort due to a memory conflict with another thread.
pub const _XABORT_CONFLICT: u32 = 1 << 2;

/// Transaction abort due to the transaction using too much memory.
pub const _XABORT_CAPACITY: u32 = 1 << 3;

/// Transaction abort due to a debug trap.
pub const _XABORT_DEBUG: u32 = 1 << 4;

/// Transaction abort in a inner nested transaction.
pub const _XABORT_NESTED: u32 = 1 << 5;

/// Specifies the start of a restricted transactional memory (RTM) code region and returns a value
/// indicating status.
///
/// [Intel's documentation](https://software.intel.com/en-us/cpp-compiler-developer-guide-and-reference-xbegin).
#[inline]
#[target_feature(enable = "rtm")]
#[cfg_attr(test, assert_instr(xbegin))]
pub unsafe fn _xbegin() -> u32 {
x86_xbegin() as _
}

/// Specifies the end of a restricted transactional memory (RTM) code region.
///
/// [Intel's documentation](https://software.intel.com/en-us/cpp-compiler-developer-guide-and-reference-xend).
#[inline]
#[target_feature(enable = "rtm")]
#[cfg_attr(test, assert_instr(xend))]
pub unsafe fn _xend() {
x86_xend()
}

/// Forces a restricted transactional memory (RTM) region to abort.
///
/// [Intel's documentation](https://software.intel.com/en-us/cpp-compiler-developer-guide-and-reference-xabort).
#[inline]
#[target_feature(enable = "rtm")]
#[cfg_attr(test, assert_instr(xabort, imm8 = 0x0))]
#[rustc_args_required_const(0)]
pub unsafe fn _xabort(imm8: u32) {
macro_rules! call {
($imm8:expr) => {
x86_xabort($imm8)
};
}
constify_imm8!(imm8, call)
}

/// Queries whether the processor is executing in a transactional region identified by restricted
/// transactional memory (RTM) or hardware lock elision (HLE).
///
/// [Intel's documentation](https://software.intel.com/en-us/cpp-compiler-developer-guide-and-reference-xtest).
#[inline]
#[target_feature(enable = "rtm")]
#[cfg_attr(test, assert_instr(xtest))]
pub unsafe fn _xtest() -> u8 {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just changed this to return u8 to match intel headers more directly. It is specified to always be 0 or 1.

Additionally, if/when stdsimd later adds HLE intrinsics, _xtest should be enabled if either hle or rtm is supported.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Additionally, if/when stdsimd later adds HLE intrinsics, _xtest should be enabled if either hle or rtm is supported.

I don't think this is something that can be easily supported. We need to know which features are available when generating code for this intrinsic, so it is either rtm, or hle, or both. But if its rtm or hle, then we need to generate different code depending on the answer to which one is it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All the intel docs say either HLE or RTM. The instruction set reference for xtest says under "CPUID Feature Flag": HLE or RTM.

we need to generate different code depending on the answer to which one is it.

Does that mean we need a duplicate of _xtest (_hletest) in any future hle.rs module?

@gnzlbg This should be ready to run through travis now.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xtest&expand=6168 says RTM CPUID flag only.

Does that mean we need a duplicate of _xtest (_hletest) in any future hle.rs module?

That's what it would mean, but that would be very weird. It seems more likely that the link you posted is incorrect, you might want to ask Intel for clarification in their forums.

x86_xtest() as _
}

/// Retrieves the parameter passed to [`_xabort`] when [`_xbegin`]'s status has the
/// `_XABORT_EXPLICIT` flag set.
#[inline]
pub const fn _xabort_code(status: u32) -> u32 {
(status >> 24) & 0xFF
}

#[cfg(test)]
mod tests {
use stdsimd_test::simd_test;

use crate::core_arch::x86::*;

#[simd_test(enable = "rtm")]
unsafe fn test_xbegin_xend() {
let mut x = 0;
for _ in 0..10 {
let code = rtm::_xbegin();
if code == _XBEGIN_STARTED {
x += 1;
rtm::_xend();
assert_eq!(x, 1);
break;
}
assert_eq!(x, 0);
}
}

#[simd_test(enable = "rtm")]
unsafe fn test_xabort() {
const ABORT_CODE: u32 = 42;
// aborting outside a transactional region does nothing
_xabort(ABORT_CODE);

for _ in 0..10 {
let mut x = 0;
let code = rtm::_xbegin();
if code == _XBEGIN_STARTED {
x += 1;
rtm::_xabort(ABORT_CODE);
} else if code & _XABORT_EXPLICIT != 0 {
let test_abort_code = rtm::_xabort_code(code);
assert_eq!(test_abort_code, ABORT_CODE);
}
assert_eq!(x, 0);
}
}

#[simd_test(enable = "rtm")]
unsafe fn test_xtest() {
assert_eq!(_xtest(), 0);

for _ in 0..10 {
let code = rtm::_xbegin();
if code == _XBEGIN_STARTED {
let in_tx = _xtest();
rtm::_xend();

// putting the assert inside the transaction would abort the transaction on fail
// without any output/panic/etc
assert_eq!(in_tx, 1);
break;
}
}
}
}
8 changes: 8 additions & 0 deletions crates/std_detect/src/detect/arch/x86.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@
/// * `"xsaveopt"`
/// * `"xsaves"`
/// * `"xsavec"`
/// * `"adx"`
/// * `"rtm"`
///
/// [docs]: https://software.intel.com/sites/landingpage/IntrinsicsGuide
#[macro_export]
Expand Down Expand Up @@ -233,6 +235,10 @@ macro_rules! is_x86_feature_detected {
cfg!(target_feature = "adx") || $crate::detect::check_for(
$crate::detect::Feature::adx)
};
("rtm") => {
cfg!(target_feature = "rtm") || $crate::detect::check_for(
$crate::detect::Feature::rtm)
};
($t:tt,) => {
is_x86_feature_detected!($t);
};
Expand Down Expand Up @@ -330,4 +336,6 @@ pub enum Feature {
cmpxchg16b,
/// ADX, Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
adx,
/// RTM, Intel (Restricted Transactional Memory)
rtm,
}
6 changes: 6 additions & 0 deletions crates/std_detect/src/detect/os/x86.rs
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ fn detect_features() -> cache::Initializer {
enable(proc_info_ecx, 30, Feature::rdrand);
enable(extended_features_ebx, 18, Feature::rdseed);
enable(extended_features_ebx, 19, Feature::adx);
enable(extended_features_ebx, 11, Feature::rtm);
enable(proc_info_edx, 4, Feature::tsc);
enable(proc_info_edx, 23, Feature::mmx);
enable(proc_info_edx, 24, Feature::fxsr);
Expand Down Expand Up @@ -290,6 +291,7 @@ mod tests {
println!("xsavec: {:?}", is_x86_feature_detected!("xsavec"));
println!("cmpxchg16b: {:?}", is_x86_feature_detected!("cmpxchg16b"));
println!("adx: {:?}", is_x86_feature_detected!("adx"));
println!("rtm: {:?}", is_x86_feature_detected!("rtm"));
}

#[test]
Expand Down Expand Up @@ -354,5 +356,9 @@ mod tests {
is_x86_feature_detected!("adx"),
information.adx(),
);
assert_eq!(
is_x86_feature_detected!("rtm"),
information.rtm(),
);
}
}
1 change: 1 addition & 0 deletions crates/stdsimd-verify/tests/x86-intel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,7 @@ fn equate(t: &Type, intel: &str, intrinsic: &str, is_const: bool) -> Result<(),
(&Type::PrimSigned(8), "char") => {}
(&Type::PrimUnsigned(16), "unsigned short") => {}
(&Type::PrimUnsigned(32), "unsigned int") => {}
(&Type::PrimUnsigned(32), "const unsigned int") => {}
(&Type::PrimUnsigned(64), "unsigned __int64") => {}
(&Type::PrimUnsigned(8), "unsigned char") => {}

Expand Down