Skip to content

Commit

Permalink
Merge pull request #3 from davnavr/master
Browse files Browse the repository at this point in the history
Add `#[no_std]` support
  • Loading branch information
as-com authored Jun 13, 2023
2 parents 0f46878 + bc539cb commit 79677e9
Show file tree
Hide file tree
Showing 6 changed files with 72 additions and 70 deletions.
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@ categories = ["encoding", "parser-implementations"]

exclude = ["/.idea", "/images", "/.github", "/scripts"]

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]

[features]
default = ["std"]
std = []
native-optimizations = []

# Please do not enable this feature
Expand Down
8 changes: 4 additions & 4 deletions src/decode/lookup.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
use core::arch::x86_64::*;

#[repr(align(16), C)]
struct Align128<T>(T);

pub static LOOKUP_DOUBLE_VEC: [__m128i; 90] = unsafe {
std::mem::transmute(Align128::<[u8; 1440]>([
core::mem::transmute(Align128::<[u8; 1440]>([
0, 255, 255, 255, 255, 255, 255, 255, 1, 255, 255, 255, 255, 255, 255, 255, // 1, 1
0, 255, 255, 255, 255, 255, 255, 255, 1, 2, 255, 255, 255, 255, 255, 255, // 1, 2
0, 255, 255, 255, 255, 255, 255, 255, 1, 2, 3, 255, 255, 255, 255, 255, // 1, 3
Expand Down Expand Up @@ -1129,7 +1129,7 @@ pub static LOOKUP_DOUBLE_STEP1: [(u8, u8, u8); 1024] = [
];

pub static LOOKUP_QUAD_VEC: [__m128i; 81] = unsafe {
std::mem::transmute(Align128::<[u8; 1296]>([
core::mem::transmute(Align128::<[u8; 1296]>([
0, 255, 255, 255, 1, 255, 255, 255, 2, 255, 255, 255, 3, 255, 255, 255, // 1, 1, 1, 1
0, 255, 255, 255, 1, 255, 255, 255, 2, 255, 255, 255, 3, 4, 255, 255, // 1, 1, 1, 2
0, 255, 255, 255, 1, 255, 255, 255, 2, 255, 255, 255, 3, 4, 5, 255, // 1, 1, 1, 3
Expand Down
64 changes: 32 additions & 32 deletions src/decode/mod.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
use std::cmp::min;
use core::arch::x86_64::*;
use core::cmp::min;

use crate::num::{SignedVarIntTarget, VarIntTarget};
use crate::VarIntDecodeError;
Expand Down Expand Up @@ -31,7 +31,7 @@ pub fn decode<T: VarIntTarget>(bytes: &[u8]) -> Result<(T, usize), VarIntDecodeE
} else if !bytes.is_empty() {
let mut data = [0u8; 16];
let len = min(16, bytes.len());
// unsafe { std::ptr::copy_nonoverlapping(bytes.as_ptr(), data.as_mut_ptr(), len); }
// unsafe { core::ptr::copy_nonoverlapping(bytes.as_ptr(), data.as_mut_ptr(), len); }
data[..len].copy_from_slice(&bytes[..len]);
unsafe { decode_unsafe(data.as_ptr()) }
} else {
Expand Down Expand Up @@ -135,7 +135,7 @@ pub unsafe fn decode_unsafe<T: VarIntTarget>(bytes: *const u8) -> (T, usize) {
// let varint_part0 = b0 & !(0xffffffffffffffff << len0.min(63));
// let varint_part1 = b1 & !(0xffffffffffffffff << (((msbs0 == 0) as u32) * len1.min(63)));

let num = T::vector_to_num(std::mem::transmute([varint_part0, varint_part1]));
let num = T::vector_to_num(core::mem::transmute([varint_part0, varint_part1]));
let len = if msbs0 == 0 { len1 + 64 } else { len0 } / 8;

(num, len as usize)
Expand Down Expand Up @@ -167,8 +167,8 @@ pub unsafe fn decode_two_unsafe<T: VarIntTarget, U: VarIntTarget>(
// check will be eliminated at compile time
panic!(
"exceeded length limit: cannot decode {} and {}, total length {} exceeds 16 bytes",
std::any::type_name::<T>(),
std::any::type_name::<U>(),
core::any::type_name::<T>(),
core::any::type_name::<U>(),
T::MAX_VARINT_BYTES + U::MAX_VARINT_BYTES
);
}
Expand Down Expand Up @@ -219,13 +219,13 @@ pub unsafe fn decode_two_unsafe<T: VarIntTarget, U: VarIntTarget>(
dual_u32_stage2(comb)
};

let x: [u32; 4] = std::mem::transmute(x);
let x: [u32; 4] = core::mem::transmute(x);
// _mm_extract_epi32 requires SSE4.1
first_num = T::cast_u32(x[0]);
second_num = U::cast_u32(x[2]);
} else {
first_num = T::vector_to_num(std::mem::transmute(first));
second_num = U::vector_to_num(std::mem::transmute(second));
first_num = T::vector_to_num(core::mem::transmute(first));
second_num = U::vector_to_num(core::mem::transmute(second));
}

(first_num, second_num, first_len as u8, second_len as u8)
Expand Down Expand Up @@ -264,16 +264,16 @@ unsafe fn decode_two_u32_unsafe<T: VarIntTarget, U: VarIntTarget>(
dual_u32_stage2(comb)
};

let x: [u32; 4] = std::mem::transmute(x);
let x: [u32; 4] = core::mem::transmute(x);
// _mm_extract_epi32 requires SSE4.1
first_num = T::cast_u32(x[0]);
second_num = U::cast_u32(x[2]);
} else {
first_num = T::vector_to_num(std::mem::transmute(comb));
second_num = U::vector_to_num(std::mem::transmute(_mm_bsrli_si128(comb, 8)));
first_num = T::vector_to_num(core::mem::transmute(comb));
second_num = U::vector_to_num(core::mem::transmute(_mm_bsrli_si128(comb, 8)));
}

(first_num, second_num, first_len as u8, second_len as u8)
(first_num, second_num, first_len, second_len)
}

#[inline(always)]
Expand Down Expand Up @@ -461,8 +461,8 @@ pub unsafe fn decode_two_wide_unsafe<T: VarIntTarget, U: VarIntTarget>(
first_num = T::cast_u64(_mm_extract_epi64(x, 0) as u64);
second_num = U::cast_u64(_mm_extract_epi64(x, 1) as u64);
} else {
first_num = T::vector_to_num(std::mem::transmute(first));
second_num = U::vector_to_num(std::mem::transmute(second));
first_num = T::vector_to_num(core::mem::transmute(first));
second_num = U::vector_to_num(core::mem::transmute(second));
}

(first_num, second_num, first_len as u8, second_len as u8)
Expand Down Expand Up @@ -496,10 +496,10 @@ pub unsafe fn decode_four_unsafe<
// check will be eliminated at compile time
panic!(
"exceeded length limit: cannot decode {}, {}, {}, and {}, total length {} exceeds 16 bytes",
std::any::type_name::<T>(),
std::any::type_name::<U>(),
std::any::type_name::<V>(),
std::any::type_name::<W>(),
core::any::type_name::<T>(),
core::any::type_name::<U>(),
core::any::type_name::<V>(),
core::any::type_name::<W>(),
T::MAX_VARINT_BYTES + U::MAX_VARINT_BYTES + V::MAX_VARINT_BYTES + W::MAX_VARINT_BYTES
);
}
Expand Down Expand Up @@ -592,17 +592,17 @@ pub unsafe fn decode_four_unsafe<
)
};

let x: [u32; 4] = std::mem::transmute(x);
let x: [u32; 4] = core::mem::transmute(x);
// _mm_extract_epi32 requires SSE4.1
first_num = T::cast_u32(x[0]);
second_num = U::cast_u32(x[1]);
third_num = V::cast_u32(x[2]);
fourth_num = W::cast_u32(x[3]);
} else {
first_num = T::vector_to_num(std::mem::transmute(first));
second_num = U::vector_to_num(std::mem::transmute(second));
third_num = V::vector_to_num(std::mem::transmute(third));
fourth_num = W::vector_to_num(std::mem::transmute(fourth));
first_num = T::vector_to_num(core::mem::transmute(first));
second_num = U::vector_to_num(core::mem::transmute(second));
third_num = V::vector_to_num(core::mem::transmute(third));
fourth_num = W::vector_to_num(core::mem::transmute(fourth));
}

(
Expand Down Expand Up @@ -679,17 +679,17 @@ unsafe fn decode_four_u16_unsafe<
)
};

let x: [u32; 4] = std::mem::transmute(x);
let x: [u32; 4] = core::mem::transmute(x);
// _mm_extract_epi32 requires SSE4.1
first_num = T::cast_u32(x[0]);
second_num = U::cast_u32(x[1]);
third_num = V::cast_u32(x[2]);
fourth_num = W::cast_u32(x[3]);
} else {
first_num = T::vector_to_num(std::mem::transmute(comb));
second_num = U::vector_to_num(std::mem::transmute(_mm_bsrli_si128(comb, 4)));
third_num = V::vector_to_num(std::mem::transmute(_mm_bsrli_si128(comb, 8)));
fourth_num = W::vector_to_num(std::mem::transmute(_mm_bsrli_si128(comb, 12)));
first_num = T::vector_to_num(core::mem::transmute(comb));
second_num = U::vector_to_num(core::mem::transmute(_mm_bsrli_si128(comb, 4)));
third_num = V::vector_to_num(core::mem::transmute(_mm_bsrli_si128(comb, 8)));
fourth_num = W::vector_to_num(core::mem::transmute(_mm_bsrli_si128(comb, 12)));
}

(
Expand Down Expand Up @@ -790,7 +790,7 @@ pub unsafe fn decode_eight_u8_unsafe(bytes: *const u8) -> ([u8; 8], u8) {
cumul_lens = _mm_add_epi8(cumul_lens, _mm_bslli_si128(cumul_lens, 4));
cumul_lens = _mm_add_epi8(cumul_lens, _mm_bslli_si128(cumul_lens, 8));

let cumul_lens_2: [u8; 16] = std::mem::transmute(cumul_lens);
let cumul_lens_2: [u8; 16] = core::mem::transmute(cumul_lens);
let last_len = 8 - cumul_lens_2[7] + 8;

// Set one-lengthed second bytes to negative
Expand Down Expand Up @@ -819,7 +819,7 @@ pub unsafe fn decode_eight_u8_unsafe(bytes: *const u8) -> ([u8; 8], u8) {
x,
_mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1),
);
let lower: [u64; 2] = std::mem::transmute(shuf);
let lower: [u64; 2] = core::mem::transmute(shuf);
let nums = lower[0].to_ne_bytes();

(nums, last_len)
Expand Down
10 changes: 5 additions & 5 deletions src/encode/mod.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
use core::arch::x86_64::*;

use crate::num::{SignedVarIntTarget, VarIntTarget};

Expand Down Expand Up @@ -86,10 +86,10 @@ pub unsafe fn encode_unsafe<T: VarIntTarget>(num: T) -> ([u8; 16], u8) {

let merged = stage1 | (msbs & msbmask);

(std::mem::transmute([merged, 0]), bytes_needed as u8)
(core::mem::transmute([merged, 0]), bytes_needed as u8)
} else {
// Break the number into 7-bit parts and spread them out into a vector
let stage1: __m128i = std::mem::transmute(num.num_to_vector_stage1());
let stage1: __m128i = core::mem::transmute(num.num_to_vector_stage1());

// Create a mask for where there exist values
// This signed comparison works because all MSBs should be cleared at this point
Expand All @@ -113,6 +113,6 @@ pub unsafe fn encode_unsafe<T: VarIntTarget>(num: T) -> ([u8; 16], u8) {
// Merge the MSB bits into the vector
let merged = _mm_or_si128(stage1, msbmask);

(std::mem::transmute(merged), bytes)
(core::mem::transmute(merged), bytes)
}
}
18 changes: 10 additions & 8 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,16 @@ encoder and decoder written in Rust.
**For more information, please see the [README](https://github.com/as-com/varint-simd#readme).**
*/

#![cfg_attr(not(feature = "std"), no_std)]
#![cfg_attr(rustc_nightly, feature(doc_cfg))]

#[cfg(target_arch = "x86")]
use std::arch::x86::*;
use core::arch::x86::*;

#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
use core::arch::x86_64::*;

use std::fmt::Debug;
use core::fmt::Debug;

pub mod decode;
pub mod encode;
Expand All @@ -28,12 +29,12 @@ pub use num::*;
// Functions to help with debugging
#[allow(dead_code)]
fn slice_m128i(n: __m128i) -> [u8; 16] {
unsafe { std::mem::transmute(n) }
unsafe { core::mem::transmute(n) }
}

#[allow(dead_code)]
fn slice_m256i(n: __m256i) -> [i8; 32] {
unsafe { std::mem::transmute(n) }
unsafe { core::mem::transmute(n) }
}

#[derive(Debug)]
Expand All @@ -42,12 +43,13 @@ pub enum VarIntDecodeError {
NotEnoughBytes,
}

impl std::fmt::Display for VarIntDecodeError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Debug::fmt(self, f)
impl core::fmt::Display for VarIntDecodeError {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
core::fmt::Debug::fmt(self, f)
}
}

#[cfg(feature = "std")]
impl std::error::Error for VarIntDecodeError {}

#[cfg(test)]
Expand Down
Loading

0 comments on commit 79677e9

Please sign in to comment.