diff --git a/Cargo.toml b/Cargo.toml index 3a59674..1221230 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,11 +12,11 @@ categories = ["encoding", "parser-implementations"] exclude = ["/.idea", "/images", "/.github", "/scripts"] -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - [dependencies] [features] +default = ["std"] +std = [] native-optimizations = [] # Please do not enable this feature diff --git a/src/decode/lookup.rs b/src/decode/lookup.rs index d21c609..c964494 100644 --- a/src/decode/lookup.rs +++ b/src/decode/lookup.rs @@ -1,13 +1,13 @@ #[cfg(target_arch = "x86")] -use std::arch::x86::*; +use core::arch::x86::*; #[cfg(target_arch = "x86_64")] -use std::arch::x86_64::*; +use core::arch::x86_64::*; #[repr(align(16), C)] struct Align128(T); pub static LOOKUP_DOUBLE_VEC: [__m128i; 90] = unsafe { - std::mem::transmute(Align128::<[u8; 1440]>([ + core::mem::transmute(Align128::<[u8; 1440]>([ 0, 255, 255, 255, 255, 255, 255, 255, 1, 255, 255, 255, 255, 255, 255, 255, // 1, 1 0, 255, 255, 255, 255, 255, 255, 255, 1, 2, 255, 255, 255, 255, 255, 255, // 1, 2 0, 255, 255, 255, 255, 255, 255, 255, 1, 2, 3, 255, 255, 255, 255, 255, // 1, 3 @@ -1129,7 +1129,7 @@ pub static LOOKUP_DOUBLE_STEP1: [(u8, u8, u8); 1024] = [ ]; pub static LOOKUP_QUAD_VEC: [__m128i; 81] = unsafe { - std::mem::transmute(Align128::<[u8; 1296]>([ + core::mem::transmute(Align128::<[u8; 1296]>([ 0, 255, 255, 255, 1, 255, 255, 255, 2, 255, 255, 255, 3, 255, 255, 255, // 1, 1, 1, 1 0, 255, 255, 255, 1, 255, 255, 255, 2, 255, 255, 255, 3, 4, 255, 255, // 1, 1, 1, 2 0, 255, 255, 255, 1, 255, 255, 255, 2, 255, 255, 255, 3, 4, 5, 255, // 1, 1, 1, 3 diff --git a/src/decode/mod.rs b/src/decode/mod.rs index 671270b..fddf305 100644 --- a/src/decode/mod.rs +++ b/src/decode/mod.rs @@ -1,8 +1,8 @@ #[cfg(target_arch = "x86")] -use std::arch::x86::*; +use core::arch::x86::*; #[cfg(target_arch = "x86_64")] -use std::arch::x86_64::*; -use std::cmp::min; +use core::arch::x86_64::*; +use core::cmp::min; use crate::num::{SignedVarIntTarget, VarIntTarget}; use crate::VarIntDecodeError; @@ -31,7 +31,7 @@ pub fn decode(bytes: &[u8]) -> Result<(T, usize), VarIntDecodeE } else if !bytes.is_empty() { let mut data = [0u8; 16]; let len = min(16, bytes.len()); - // unsafe { std::ptr::copy_nonoverlapping(bytes.as_ptr(), data.as_mut_ptr(), len); } + // unsafe { core::ptr::copy_nonoverlapping(bytes.as_ptr(), data.as_mut_ptr(), len); } data[..len].copy_from_slice(&bytes[..len]); unsafe { decode_unsafe(data.as_ptr()) } } else { @@ -135,7 +135,7 @@ pub unsafe fn decode_unsafe(bytes: *const u8) -> (T, usize) { // let varint_part0 = b0 & !(0xffffffffffffffff << len0.min(63)); // let varint_part1 = b1 & !(0xffffffffffffffff << (((msbs0 == 0) as u32) * len1.min(63))); - let num = T::vector_to_num(std::mem::transmute([varint_part0, varint_part1])); + let num = T::vector_to_num(core::mem::transmute([varint_part0, varint_part1])); let len = if msbs0 == 0 { len1 + 64 } else { len0 } / 8; (num, len as usize) @@ -167,8 +167,8 @@ pub unsafe fn decode_two_unsafe( // check will be eliminated at compile time panic!( "exceeded length limit: cannot decode {} and {}, total length {} exceeds 16 bytes", - std::any::type_name::(), - std::any::type_name::(), + core::any::type_name::(), + core::any::type_name::(), T::MAX_VARINT_BYTES + U::MAX_VARINT_BYTES ); } @@ -219,13 +219,13 @@ pub unsafe fn decode_two_unsafe( dual_u32_stage2(comb) }; - let x: [u32; 4] = std::mem::transmute(x); + let x: [u32; 4] = core::mem::transmute(x); // _mm_extract_epi32 requires SSE4.1 first_num = T::cast_u32(x[0]); second_num = U::cast_u32(x[2]); } else { - first_num = T::vector_to_num(std::mem::transmute(first)); - second_num = U::vector_to_num(std::mem::transmute(second)); + first_num = T::vector_to_num(core::mem::transmute(first)); + second_num = U::vector_to_num(core::mem::transmute(second)); } (first_num, second_num, first_len as u8, second_len as u8) @@ -264,16 +264,16 @@ unsafe fn decode_two_u32_unsafe( dual_u32_stage2(comb) }; - let x: [u32; 4] = std::mem::transmute(x); + let x: [u32; 4] = core::mem::transmute(x); // _mm_extract_epi32 requires SSE4.1 first_num = T::cast_u32(x[0]); second_num = U::cast_u32(x[2]); } else { - first_num = T::vector_to_num(std::mem::transmute(comb)); - second_num = U::vector_to_num(std::mem::transmute(_mm_bsrli_si128(comb, 8))); + first_num = T::vector_to_num(core::mem::transmute(comb)); + second_num = U::vector_to_num(core::mem::transmute(_mm_bsrli_si128(comb, 8))); } - (first_num, second_num, first_len as u8, second_len as u8) + (first_num, second_num, first_len, second_len) } #[inline(always)] @@ -461,8 +461,8 @@ pub unsafe fn decode_two_wide_unsafe( first_num = T::cast_u64(_mm_extract_epi64(x, 0) as u64); second_num = U::cast_u64(_mm_extract_epi64(x, 1) as u64); } else { - first_num = T::vector_to_num(std::mem::transmute(first)); - second_num = U::vector_to_num(std::mem::transmute(second)); + first_num = T::vector_to_num(core::mem::transmute(first)); + second_num = U::vector_to_num(core::mem::transmute(second)); } (first_num, second_num, first_len as u8, second_len as u8) @@ -496,10 +496,10 @@ pub unsafe fn decode_four_unsafe< // check will be eliminated at compile time panic!( "exceeded length limit: cannot decode {}, {}, {}, and {}, total length {} exceeds 16 bytes", - std::any::type_name::(), - std::any::type_name::(), - std::any::type_name::(), - std::any::type_name::(), + core::any::type_name::(), + core::any::type_name::(), + core::any::type_name::(), + core::any::type_name::(), T::MAX_VARINT_BYTES + U::MAX_VARINT_BYTES + V::MAX_VARINT_BYTES + W::MAX_VARINT_BYTES ); } @@ -592,17 +592,17 @@ pub unsafe fn decode_four_unsafe< ) }; - let x: [u32; 4] = std::mem::transmute(x); + let x: [u32; 4] = core::mem::transmute(x); // _mm_extract_epi32 requires SSE4.1 first_num = T::cast_u32(x[0]); second_num = U::cast_u32(x[1]); third_num = V::cast_u32(x[2]); fourth_num = W::cast_u32(x[3]); } else { - first_num = T::vector_to_num(std::mem::transmute(first)); - second_num = U::vector_to_num(std::mem::transmute(second)); - third_num = V::vector_to_num(std::mem::transmute(third)); - fourth_num = W::vector_to_num(std::mem::transmute(fourth)); + first_num = T::vector_to_num(core::mem::transmute(first)); + second_num = U::vector_to_num(core::mem::transmute(second)); + third_num = V::vector_to_num(core::mem::transmute(third)); + fourth_num = W::vector_to_num(core::mem::transmute(fourth)); } ( @@ -679,17 +679,17 @@ unsafe fn decode_four_u16_unsafe< ) }; - let x: [u32; 4] = std::mem::transmute(x); + let x: [u32; 4] = core::mem::transmute(x); // _mm_extract_epi32 requires SSE4.1 first_num = T::cast_u32(x[0]); second_num = U::cast_u32(x[1]); third_num = V::cast_u32(x[2]); fourth_num = W::cast_u32(x[3]); } else { - first_num = T::vector_to_num(std::mem::transmute(comb)); - second_num = U::vector_to_num(std::mem::transmute(_mm_bsrli_si128(comb, 4))); - third_num = V::vector_to_num(std::mem::transmute(_mm_bsrli_si128(comb, 8))); - fourth_num = W::vector_to_num(std::mem::transmute(_mm_bsrli_si128(comb, 12))); + first_num = T::vector_to_num(core::mem::transmute(comb)); + second_num = U::vector_to_num(core::mem::transmute(_mm_bsrli_si128(comb, 4))); + third_num = V::vector_to_num(core::mem::transmute(_mm_bsrli_si128(comb, 8))); + fourth_num = W::vector_to_num(core::mem::transmute(_mm_bsrli_si128(comb, 12))); } ( @@ -790,7 +790,7 @@ pub unsafe fn decode_eight_u8_unsafe(bytes: *const u8) -> ([u8; 8], u8) { cumul_lens = _mm_add_epi8(cumul_lens, _mm_bslli_si128(cumul_lens, 4)); cumul_lens = _mm_add_epi8(cumul_lens, _mm_bslli_si128(cumul_lens, 8)); - let cumul_lens_2: [u8; 16] = std::mem::transmute(cumul_lens); + let cumul_lens_2: [u8; 16] = core::mem::transmute(cumul_lens); let last_len = 8 - cumul_lens_2[7] + 8; // Set one-lengthed second bytes to negative @@ -819,7 +819,7 @@ pub unsafe fn decode_eight_u8_unsafe(bytes: *const u8) -> ([u8; 8], u8) { x, _mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1), ); - let lower: [u64; 2] = std::mem::transmute(shuf); + let lower: [u64; 2] = core::mem::transmute(shuf); let nums = lower[0].to_ne_bytes(); (nums, last_len) diff --git a/src/encode/mod.rs b/src/encode/mod.rs index 708ca15..83d4572 100644 --- a/src/encode/mod.rs +++ b/src/encode/mod.rs @@ -1,7 +1,7 @@ #[cfg(target_arch = "x86")] -use std::arch::x86::*; +use core::arch::x86::*; #[cfg(target_arch = "x86_64")] -use std::arch::x86_64::*; +use core::arch::x86_64::*; use crate::num::{SignedVarIntTarget, VarIntTarget}; @@ -86,10 +86,10 @@ pub unsafe fn encode_unsafe(num: T) -> ([u8; 16], u8) { let merged = stage1 | (msbs & msbmask); - (std::mem::transmute([merged, 0]), bytes_needed as u8) + (core::mem::transmute([merged, 0]), bytes_needed as u8) } else { // Break the number into 7-bit parts and spread them out into a vector - let stage1: __m128i = std::mem::transmute(num.num_to_vector_stage1()); + let stage1: __m128i = core::mem::transmute(num.num_to_vector_stage1()); // Create a mask for where there exist values // This signed comparison works because all MSBs should be cleared at this point @@ -113,6 +113,6 @@ pub unsafe fn encode_unsafe(num: T) -> ([u8; 16], u8) { // Merge the MSB bits into the vector let merged = _mm_or_si128(stage1, msbmask); - (std::mem::transmute(merged), bytes) + (core::mem::transmute(merged), bytes) } } diff --git a/src/lib.rs b/src/lib.rs index c2e1e78..1862c45 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,15 +5,16 @@ encoder and decoder written in Rust. **For more information, please see the [README](https://github.com/as-com/varint-simd#readme).** */ +#![cfg_attr(not(feature = "std"), no_std)] #![cfg_attr(rustc_nightly, feature(doc_cfg))] #[cfg(target_arch = "x86")] -use std::arch::x86::*; +use core::arch::x86::*; #[cfg(target_arch = "x86_64")] -use std::arch::x86_64::*; +use core::arch::x86_64::*; -use std::fmt::Debug; +use core::fmt::Debug; pub mod decode; pub mod encode; @@ -28,12 +29,12 @@ pub use num::*; // Functions to help with debugging #[allow(dead_code)] fn slice_m128i(n: __m128i) -> [u8; 16] { - unsafe { std::mem::transmute(n) } + unsafe { core::mem::transmute(n) } } #[allow(dead_code)] fn slice_m256i(n: __m256i) -> [i8; 32] { - unsafe { std::mem::transmute(n) } + unsafe { core::mem::transmute(n) } } #[derive(Debug)] @@ -42,12 +43,13 @@ pub enum VarIntDecodeError { NotEnoughBytes, } -impl std::fmt::Display for VarIntDecodeError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - std::fmt::Debug::fmt(self, f) +impl core::fmt::Display for VarIntDecodeError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + core::fmt::Debug::fmt(self, f) } } +#[cfg(feature = "std")] impl std::error::Error for VarIntDecodeError {} #[cfg(test)] diff --git a/src/num.rs b/src/num.rs index fd0e8a0..b605253 100644 --- a/src/num.rs +++ b/src/num.rs @@ -1,9 +1,9 @@ #[cfg(target_arch = "x86")] -use std::arch::x86::*; +use core::arch::x86::*; #[cfg(target_arch = "x86_64")] -use std::arch::x86_64::*; +use core::arch::x86_64::*; -use std::fmt::Debug; +use core::fmt::Debug; /// Represents an unsigned scalar value that can be encoded to and decoded from a varint. pub trait VarIntTarget: Debug + Eq + PartialEq + PartialOrd + Sized + Copy { @@ -52,7 +52,7 @@ impl VarIntTarget for u8 { #[inline(always)] fn vector_to_num(res: [u8; 16]) -> Self { - let res: [u64; 2] = unsafe { std::mem::transmute(res) }; + let res: [u64; 2] = unsafe { core::mem::transmute(res) }; let x = res[0]; Self::scalar_to_num(x) @@ -102,7 +102,7 @@ impl VarIntTarget for u8 { res[0] = self.num_to_scalar_stage1(); - unsafe { std::mem::transmute(res) } + unsafe { core::mem::transmute(res) } } #[inline(always)] @@ -112,7 +112,7 @@ impl VarIntTarget for u8 { res[0] = self.num_to_scalar_stage1(); - unsafe { std::mem::transmute(res) } + unsafe { core::mem::transmute(res) } } #[inline(always)] @@ -133,7 +133,7 @@ impl VarIntTarget for u16 { #[inline(always)] fn vector_to_num(res: [u8; 16]) -> Self { - let arr: [u64; 2] = unsafe { std::mem::transmute(res) }; + let arr: [u64; 2] = unsafe { core::mem::transmute(res) }; let x = arr[0]; Self::scalar_to_num(x) @@ -185,7 +185,7 @@ impl VarIntTarget for u16 { res[0] = self.num_to_scalar_stage1(); - unsafe { std::mem::transmute(res) } + unsafe { core::mem::transmute(res) } } #[inline(always)] @@ -194,7 +194,7 @@ impl VarIntTarget for u16 { let mut res = [0u64; 2]; res[0] = self.num_to_scalar_stage1(); - unsafe { std::mem::transmute(res) } + unsafe { core::mem::transmute(res) } } #[inline(always)] @@ -215,7 +215,7 @@ impl VarIntTarget for u32 { #[inline(always)] fn vector_to_num(res: [u8; 16]) -> Self { - let arr: [u64; 2] = unsafe { std::mem::transmute(res) }; + let arr: [u64; 2] = unsafe { core::mem::transmute(res) }; let x = arr[0]; Self::scalar_to_num(x) @@ -273,7 +273,7 @@ impl VarIntTarget for u32 { res[0] = self.num_to_scalar_stage1(); - unsafe { std::mem::transmute(res) } + unsafe { core::mem::transmute(res) } } #[inline(always)] @@ -282,7 +282,7 @@ impl VarIntTarget for u32 { let mut res = [0u64; 2]; res[0] = self.num_to_scalar_stage1(); - unsafe { std::mem::transmute(res) } + unsafe { core::mem::transmute(res) } } #[inline(always)] @@ -308,7 +308,7 @@ impl VarIntTarget for u64 { #[inline(always)] #[cfg(all(target_feature = "bmi2", fast_pdep))] fn vector_to_num(res: [u8; 16]) -> Self { - let arr: [u64; 2] = unsafe { std::mem::transmute(res) }; + let arr: [u64; 2] = unsafe { core::mem::transmute(res) }; let x = arr[0]; let y = arr[1]; @@ -328,14 +328,14 @@ impl VarIntTarget for u64 { res[0] = unsafe { _pdep_u64(x, 0x7f7f7f7f7f7f7f7f) }; res[1] = unsafe { _pdep_u64(x >> 56, 0x000000000000017f) }; - unsafe { std::mem::transmute(res) } + unsafe { core::mem::transmute(res) } } #[inline(always)] #[cfg(all(target_feature = "avx2", not(all(target_feature = "bmi2", fast_pdep))))] fn vector_to_num(res: [u8; 16]) -> Self { let pt1 = unsafe { - let b = std::mem::transmute(res); + let b = core::mem::transmute(res); let c = _mm_broadcastq_epi64(b); let d = _mm_or_si128( @@ -365,7 +365,7 @@ impl VarIntTarget for u64 { _mm_extract_epi64(e, 0) as u64 }; - let arr: [u64; 2] = unsafe { std::mem::transmute(res) }; + let arr: [u64; 2] = unsafe { core::mem::transmute(res) }; let y = arr[1]; @@ -416,13 +416,13 @@ impl VarIntTarget for u64 { res[0] = unsafe { _mm_extract_epi64(d, 0) as u64 }; res[1] = ((x & 0x7f00000000000000) >> 56) | ((x & 0x8000000000000000) >> 55); - unsafe { std::mem::transmute(res) } + unsafe { core::mem::transmute(res) } } #[inline(always)] #[cfg(not(any(target_feature = "avx2", all(target_feature = "bmi2", fast_pdep))))] fn vector_to_num(res: [u8; 16]) -> Self { - let arr: [u64; 2] = unsafe { std::mem::transmute(res) }; + let arr: [u64; 2] = unsafe { core::mem::transmute(res) }; let x = arr[0]; let y = arr[1]; @@ -457,7 +457,7 @@ impl VarIntTarget for u64 { | ((x & 0x00fe000000000000) << 7); res[1] = ((x & 0x7f00000000000000) >> 56) | ((x & 0x8000000000000000) >> 55); - unsafe { std::mem::transmute(res) } + unsafe { core::mem::transmute(res) } } #[inline(always)]