From 8946599dcd496ff8895eb190912a4e6b8cb6df29 Mon Sep 17 00:00:00 2001 From: Taiki Endo Date: Sun, 25 Dec 2022 16:57:42 +0900 Subject: [PATCH] Enable outline-atomics by default and provide cfg to disable it --- .cirrus.yml | 2 +- CHANGELOG.md | 6 ++++-- Cargo.toml | 14 ++++---------- README.md | 27 ++++++++++++++------------- bench/Cargo.toml | 1 - build.rs | 3 ++- src/imp/atomic128/aarch64.rs | 12 ++++++------ src/imp/atomic128/cpuid.rs | 2 +- src/imp/atomic128/x86_64.rs | 10 +++++----- src/lib.rs | 27 ++++++++++++++------------- src/tests/mod.rs | 16 ++++++++++++++++ tools/build.sh | 15 ++++++++++----- 12 files changed, 77 insertions(+), 58 deletions(-) diff --git a/.cirrus.yml b/.cirrus.yml index 79d201cd..dcf43f29 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -96,7 +96,7 @@ aarch64_linux_valgrind_task: # setup_script: # - rustup toolchain add nightly && rustup default nightly # bench_script: -# - RUSTFLAGS="${RUSTFLAGS} -C target-feature=-outline-atomics" cargo bench -vv --manifest-path bench/Cargo.toml +# - RUSTFLAGS="${RUSTFLAGS} -C target-feature=-outline-atomics --cfg portable_atomic_no_outline_atomics" cargo bench -vv --manifest-path bench/Cargo.toml # - RUSTFLAGS="${RUSTFLAGS} -C target-feature=+lse" cargo bench -vv --manifest-path bench/Cargo.toml # aarch64_macos_bench_task: diff --git a/CHANGELOG.md b/CHANGELOG.md index 9b86eb10..41ee5df1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,13 +10,13 @@ Note: In this file, do not use the hard wrap in the middle of a sentence for com ## [Unreleased] -- Add `AtomicI*::{fetch_neg,neg}` and `AtomicF*::fetch_neg` methods. +- Add `AtomicI*::{fetch_neg,neg}` and `AtomicF*::fetch_neg` methods. ([#54](https://github.com/taiki-e/portable-atomic/pull/54)) `AtomicI*::neg` are equivalent to the corresponding `fetch_*` methods, but do not return the previous value. They are intended for optimization on platforms that have atomic instructions for the corresponding operation, such as x86's `lock neg`. Currently, optimizations by these methods (`neg`) are only guaranteed for x86. -- Add `Atomic{I,U}*::{fetch_not,not}` methods. +- Add `Atomic{I,U}*::{fetch_not,not}` methods. ([#54](https://github.com/taiki-e/portable-atomic/pull/54)) `Atomic{I,U}*::not` are equivalent to the corresponding `fetch_*` methods, but do not return the previous value. They are intended for optimization on platforms that have atomic instructions for the corresponding operation, such as x86's `lock not`, MSP430's `inv`. @@ -24,6 +24,8 @@ Note: In this file, do not use the hard wrap in the middle of a sentence for com (Note: `AtomicBool` already has `fetch_not` and `not` methods.) +- Enable outline-atomics for 128-bit atomics by default. ([#57](https://github.com/taiki-e/portable-atomic/pull/57)) See [#57](https://github.com/taiki-e/portable-atomic/pull/57) for more. + - Improve support for old nightly compilers. ## [0.3.18] - 2022-12-15 diff --git a/Cargo.toml b/Cargo.toml index 1ce09ebb..e504d4af 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,16 +35,6 @@ default = ["fallback"] # Disabling this allows only atomic types for which the platform natively supports atomic operations. fallback = [] -# Enable run-time CPU feature detection. -# -# This allows maintaining support for older CPUs while using features that are not supported on older CPUs, such as CMPXCHG16B (x86_64) and FEAT_LSE (aarch64). -# -# Note: -# - Dynamic detection is currently only enabled in Rust 1.61+ for aarch64 and in nightly for other platforms, otherwise it works the same as the default. -# - If the required target features are enabled at compile-time, the atomic operations are inlined. -# - This is compatible with no-std (as with all features except `std`). -outline-atomics = [] - # Provide `AtomicF{32,64}`. # Note that most of `fetch_*` operations of atomic floats are implemented using CAS loops, which can be slower than equivalent operations of atomic integers. float = [] @@ -56,6 +46,10 @@ float = [] # Use `std`. std = [] +# no-op since 0.3.19 +# TODO: remove in next breaking release (0.4 or 1.0). +outline-atomics = [] + # Note: serde is public dependencies. [dependencies] # Implements serde::{Serialize,Deserialize} for atomic types. diff --git a/README.md b/README.md index 620d260a..b69a4a37 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,7 @@ portable-atomic = { version = "0.3", default-features = false } Native 128-bit atomic operations are available on x86_64 (Rust 1.59+), aarch64 (Rust 1.59+), powerpc64 (le or pwr8+, nightly only), and s390x (nightly only), otherwise the fallback implementation is used. -On x86_64, when the `outline-atomics` optional feature is not enabled and `cmpxchg16b` target feature is not enabled at compile-time, this uses the fallback implementation. `cmpxchg16b` target feature is enabled by default only on macOS. +On x86_64, even if `cmpxchg16b` is not available at compile time (note: `cmpxchg16b` target feature is enabled by default only on macOS), run-time detection checks whether `cmpxchg16b` is available. If `cmpxchg16b` is not available at either compile-time or run-time detection, the fallback implementation is used. See also [`portable_atomic_no_outline_atomics`](#optional-cfg-no-outline-atomics) cfg. They are usually implemented using inline assembly, and when using Miri or ThreadSanitizer that do not support inline assembly, core intrinsics are used instead of inline assembly if possible. @@ -55,18 +55,6 @@ See [this list](https://github.com/taiki-e/portable-atomic/issues/10#issuecommen Disabling this allows only atomic types for which the platform natively supports atomic operations. -- **`outline-atomics`**
- Enable run-time CPU feature detection. - - This allows maintaining support for older CPUs while using features that are not supported on older CPUs, such as CMPXCHG16B (x86_64) and FEAT_LSE (aarch64). - - Note: - - Dynamic detection is currently only enabled in Rust 1.61+ for aarch64, in 1.59+ (AVX) or nightly (CMPXCHG16B) for x86_64, and in nightly for other platforms, otherwise it works the same as the default. - - If the required target features are enabled at compile-time, the atomic operations are inlined. - - This is compatible with no-std (as with all features except `std`). - - See also [this list](https://github.com/taiki-e/portable-atomic/issues/10#issuecomment-1159368067). - - **`float`**
Provide `AtomicF{32,64}`. Note that most of `fetch_*` operations of atomic floats are implemented using CAS loops, which can be slower than equivalent operations of atomic integers. @@ -114,6 +102,19 @@ See [this list](https://github.com/taiki-e/portable-atomic/issues/10#issuecommen Feel free to submit an issue if your target is not supported yet. +- **`--cfg portable_atomic_no_outline_atomics`**
+ Disable dynamic dispatching by run-time CPU feature detection. + + If dynamic dispatching by run-time CPU feature detection is enabled, it allows maintaining support for older CPUs while using features that are not supported on older CPUs, such as CMPXCHG16B (x86_64) and FEAT_LSE (aarch64). + + Note: + - Dynamic detection is currently only enabled in Rust 1.61+ for aarch64, in 1.59+ (AVX) or nightly (CMPXCHG16B) for x86_64, and in nightly for other platforms, otherwise it works the same as when this cfg is set. + - If the required target features are enabled at compile-time, the atomic operations are inlined. + - This is compatible with no-std (as with all features except `std`). + - Some aarch64 targets enable LLVM's `outline-atomics` target feature by default, so if you set this cfg, you may want to disable that as well. + + See also [this list](https://github.com/taiki-e/portable-atomic/issues/10#issuecomment-1159368067). + ## Related Projects - [atomic-maybe-uninit]: Atomic operations on potentially uninitialized integers. diff --git a/bench/Cargo.toml b/bench/Cargo.toml index aea13f37..0f8557ea 100644 --- a/bench/Cargo.toml +++ b/bench/Cargo.toml @@ -9,7 +9,6 @@ publish = false default = ["fallback", "std"] fallback = [] std = [] -outline-atomics = [] [dev-dependencies] atomic = "0.5" diff --git a/build.rs b/build.rs index c7ae4b42..8226d198 100644 --- a/build.rs +++ b/build.rs @@ -30,6 +30,7 @@ fn main() { let mut target_upper = target.replace(|c: char| c == '-' || c == '.', "_"); target_upper.make_ascii_uppercase(); println!("cargo:rerun-if-env-changed=CARGO_TARGET_{}_RUSTFLAGS", target_upper); + println!("cargo:rerun-if-env-changed=CARGO_CFG_PORTABLE_ATOMIC_NO_OUTLINE_ATOMICS"); let version = match rustc_version() { Some(version) => version, @@ -170,7 +171,7 @@ fn main() { if version.nightly && (!no_asm || unstable_asm) && cfg!(feature = "fallback") - && cfg!(feature = "outline-atomics") + && env::var_os("CARGO_CFG_PORTABLE_ATOMIC_NO_OUTLINE_ATOMICS").is_none() && is_allowed_feature("cmpxchg16b_target_feature") { println!("cargo:rustc-cfg=portable_atomic_cmpxchg16b_dynamic"); diff --git a/src/imp/atomic128/aarch64.rs b/src/imp/atomic128/aarch64.rs index 8978e35c..39a0a4d0 100644 --- a/src/imp/atomic128/aarch64.rs +++ b/src/imp/atomic128/aarch64.rs @@ -6,10 +6,10 @@ // - CASP (DWCAS) added as FEAT_LSE (armv8.1-a) // - LDP/STP (DW load/store) if FEAT_LSE2 (armv8.4-a) is available // -// If the `outline-atomics` feature is not enabled, we use CASP if -// FEAT_LSE is enabled at compile-time, otherwise, use LDXP/STXP loop. -// If the `outline-atomics` feature is enabled, we use CASP for -// compare_exchange(_weak) if FEAT_LSE is available at run-time. +// If outline-atomics is not enabled, we use CASP if FEAT_LSE is enabled at +// compile-time, otherwise, use LDXP/STXP loop. +// If outline-atomics is enabled, we use CASP for compare_exchange(_weak) if +// FEAT_LSE is available at run-time. // If FEAT_LSE2 is available at compile-time, we use LDP/STP for load/store. // // Note: As of rustc 1.63, -C target-feature=+lse2 does not @@ -269,7 +269,7 @@ unsafe fn atomic_compare_exchange( () => unsafe { _compare_exchange_casp(dst, old, new, success) }, #[cfg(not(all( not(portable_atomic_no_aarch64_target_feature), - feature = "outline-atomics", + not(portable_atomic_no_outline_atomics), // https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/std_detect/src/detect/mod.rs // It is fine to use std for targets that we know can be linked to std. // Note: aarch64 freebsd is tier 3, so std may not be available. @@ -280,7 +280,7 @@ unsafe fn atomic_compare_exchange( () => unsafe { _compare_exchange_ldxp_stxp(dst, old, new, success) }, #[cfg(all( not(portable_atomic_no_aarch64_target_feature), - feature = "outline-atomics", + not(portable_atomic_no_outline_atomics), // https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/std_detect/src/detect/mod.rs // It is fine to use std for targets that we know can be linked to std. // Note: aarch64 freebsd is tier 3, so std may not be available. diff --git a/src/imp/atomic128/cpuid.rs b/src/imp/atomic128/cpuid.rs index c07d2b06..5be4ddaf 100644 --- a/src/imp/atomic128/cpuid.rs +++ b/src/imp/atomic128/cpuid.rs @@ -2,7 +2,7 @@ #![cfg_attr( any( - not(feature = "outline-atomics"), + portable_atomic_no_outline_atomics, not(target_feature = "sse"), miri, portable_atomic_sanitize_thread diff --git a/src/imp/atomic128/x86_64.rs b/src/imp/atomic128/x86_64.rs index 160c0497..3095cf04 100644 --- a/src/imp/atomic128/x86_64.rs +++ b/src/imp/atomic128/x86_64.rs @@ -291,7 +291,7 @@ unsafe fn atomic_load(src: *mut u128, order: Ordering) -> u128 { // https://doc.rust-lang.org/nightly/rustc/platform-support/x86_64-unknown-none.html // Miri and Sanitizer do not support inline assembly. #[cfg(any( - not(feature = "outline-atomics"), + portable_atomic_no_outline_atomics, not(target_feature = "sse"), miri, portable_atomic_sanitize_thread @@ -299,7 +299,7 @@ unsafe fn atomic_load(src: *mut u128, order: Ordering) -> u128 { // SAFETY: the caller must uphold the safety contract for `atomic_load`. () => unsafe { _atomic_load_cmpxchg16b(src, order) }, #[cfg(not(any( - not(feature = "outline-atomics"), + portable_atomic_no_outline_atomics, not(target_feature = "sse"), miri, portable_atomic_sanitize_thread @@ -336,7 +336,7 @@ unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) { // https://doc.rust-lang.org/nightly/rustc/platform-support/x86_64-unknown-none.html // Miri and Sanitizer do not support inline assembly. #[cfg(any( - not(feature = "outline-atomics"), + portable_atomic_no_outline_atomics, not(target_feature = "sse"), miri, portable_atomic_sanitize_thread @@ -344,7 +344,7 @@ unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) { // SAFETY: the caller must uphold the safety contract for `atomic_store`. () => unsafe { _atomic_store_cmpxchg16b(dst, val, order) }, #[cfg(not(any( - not(feature = "outline-atomics"), + portable_atomic_no_outline_atomics, not(target_feature = "sse"), miri, portable_atomic_sanitize_thread @@ -430,7 +430,7 @@ fn is_lock_free() -> bool { } #[inline] const fn is_always_lock_free() -> bool { - cfg!(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b",)) + cfg!(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")) } atomic128!(int, AtomicI128, i128); diff --git a/src/lib.rs b/src/lib.rs index 2cffa287..c86c1b0e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -34,7 +34,7 @@ portable-atomic = { version = "0.3", default-features = false } Native 128-bit atomic operations are available on x86_64 (Rust 1.59+), aarch64 (Rust 1.59+), powerpc64 (le or pwr8+, nightly only), and s390x (nightly only), otherwise the fallback implementation is used. -On x86_64, when the `outline-atomics` optional feature is not enabled and `cmpxchg16b` target feature is not enabled at compile-time, this uses the fallback implementation. `cmpxchg16b` target feature is enabled by default only on macOS. +On x86_64, even if `cmpxchg16b` is not available at compile time (note: `cmpxchg16b` target feature is enabled by default only on macOS), run-time detection checks whether `cmpxchg16b` is available. If `cmpxchg16b` is not available at either compile-time or run-time detection, the fallback implementation is used. See also [`portable_atomic_no_outline_atomics`](#optional-cfg-no-outline-atomics) cfg. They are usually implemented using inline assembly, and when using Miri or ThreadSanitizer that do not support inline assembly, core intrinsics are used instead of inline assembly if possible. @@ -47,18 +47,6 @@ See [this list](https://github.com/taiki-e/portable-atomic/issues/10#issuecommen Disabling this allows only atomic types for which the platform natively supports atomic operations. -- **`outline-atomics`**
- Enable run-time CPU feature detection. - - This allows maintaining support for older CPUs while using features that are not supported on older CPUs, such as CMPXCHG16B (x86_64) and FEAT_LSE (aarch64). - - Note: - - Dynamic detection is currently only enabled in Rust 1.61+ for aarch64, in 1.59+ (AVX) or nightly (CMPXCHG16B) for x86_64, and in nightly for other platforms, otherwise it works the same as the default. - - If the required target features are enabled at compile-time, the atomic operations are inlined. - - This is compatible with no-std (as with all features except `std`). - - See also [this list](https://github.com/taiki-e/portable-atomic/issues/10#issuecomment-1159368067). - - **`float`**
Provide `AtomicF{32,64}`. Note that most of `fetch_*` operations of atomic floats are implemented using CAS loops, which can be slower than equivalent operations of atomic integers. @@ -106,6 +94,19 @@ See [this list](https://github.com/taiki-e/portable-atomic/issues/10#issuecommen Feel free to submit an issue if your target is not supported yet. +- **`--cfg portable_atomic_no_outline_atomics`**
+ Disable dynamic dispatching by run-time CPU feature detection. + + If dynamic dispatching by run-time CPU feature detection is enabled, it allows maintaining support for older CPUs while using features that are not supported on older CPUs, such as CMPXCHG16B (x86_64) and FEAT_LSE (aarch64). + + Note: + - Dynamic detection is currently only enabled in Rust 1.61+ for aarch64, in 1.59+ (AVX) or nightly (CMPXCHG16B) for x86_64, and in nightly for other platforms, otherwise it works the same as when this cfg is set. + - If the required target features are enabled at compile-time, the atomic operations are inlined. + - This is compatible with no-std (as with all features except `std`). + - Some aarch64 targets enable LLVM's `outline-atomics` target feature by default, so if you set this cfg, you may want to disable that as well. + + See also [this list](https://github.com/taiki-e/portable-atomic/issues/10#issuecomment-1159368067). + ## Related Projects - [atomic-maybe-uninit]: Atomic operations on potentially uninitialized integers. diff --git a/src/tests/mod.rs b/src/tests/mod.rs index a16c1237..1cfe813f 100644 --- a/src/tests/mod.rs +++ b/src/tests/mod.rs @@ -97,3 +97,19 @@ extern "C" { #[cfg(feature = "float")] fn _atomic_f64_ffi_safety(_: AtomicF64); } + +#[cfg(target_arch = "x86_64")] +#[test] +#[cfg_attr(miri, ignore)] // Miri doesn't support inline assembly +fn test_x86_64_atomic_128_is_lock_free() { + assert_eq!( + AtomicI128::is_always_lock_free(), + cfg!(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")) + ); + assert_eq!( + AtomicI128::is_lock_free(), + cfg!(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")) + || cfg!(portable_atomic_cmpxchg16b_dynamic) + && std::is_x86_feature_detected!("cmpxchg16b") + ); +} diff --git a/tools/build.sh b/tools/build.sh index f6dd9d1e..930f04c0 100755 --- a/tools/build.sh +++ b/tools/build.sh @@ -92,6 +92,7 @@ known_cfgs=( portable_atomic_unsafe_assume_single_core portable_atomic_s_mode portable_atomic_disable_fiq + portable_atomic_no_outline_atomics ) x() { @@ -242,11 +243,8 @@ build() { --workspace --ignore-private --no-dev-deps --feature-powerset --depth 2 --optional-deps ) - case "${target}" in - x86_64* | aarch64* | arm64*) ;; - # outline-atomics feature only affects x86_64 and aarch64. - *) args+=(--exclude-features "outline-atomics") ;; - esac + # outline-atomics feature is no-op since https://github.com/taiki-e/portable-atomic/pull/57. + args+=(--exclude-features "outline-atomics") case "${target}" in *-none* | *-cuda* | avr-* | *-esp-espidf) args+=(--exclude-features "std") @@ -281,6 +279,13 @@ build() { esac RUSTFLAGS="${target_rustflags}" \ x_cargo "${args[@]}" "$@" + case "${target}" in + # portable_atomic_no_outline_atomics only affects x86_64 and aarch64. + x86_64* | aarch64* | arm64*) + RUSTFLAGS="${target_rustflags} --cfg portable_atomic_no_outline_atomics" \ + x_cargo "${args[@]}" --target-dir target/no_outline_atomics "$@" + ;; + esac case "${target}" in x86_64*) # macOS is skipped because it is +cmpxchg16b by default