diff --git a/Cargo.toml b/Cargo.toml index 38a22f7..83433e8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,12 @@ [package] name = "base62" version = "2.0.0" -authors = ["Francois Bernier ", "Chai T. Rex "] +authors = [ + "François Bernier ", + "Chai T. Rex ", + "Kevin Darlington ", + "Christopher Tarquini ", +] edition = "2018" description = "A Base62 encoding/decoding library" documentation = "https://docs.rs/base62/" @@ -15,8 +20,9 @@ exclude = [ ] [dev-dependencies] -quickcheck = "1" criterion = "0.3.5" +quickcheck = "1" +rand = "0.8.5" [[bench]] name = "base62" diff --git a/benches/base62.rs b/benches/base62.rs index 1b2311b..4138e41 100644 --- a/benches/base62.rs +++ b/benches/base62.rs @@ -1,31 +1,76 @@ use base62::{ - decode, decode_alternative, encode, encode_alternative, encode_alternative_buf, encode_buf, + decode, decode_alternative, /*digit_count,*/ encode, encode_alternative, + encode_alternative_buf, encode_buf, }; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use rand::distributions::Standard; +use rand::{thread_rng, Rng}; pub fn criterion_benchmark(c: &mut Criterion) { + let mut random_u128s = thread_rng().sample_iter::(Standard); + + c.bench_function("decode_standard", |b| { + b.iter(|| decode(black_box("7n42DGM5Tflk9n8mt7Fhc7"))) + }); + + c.bench_function("decode_standard_random", |b| { + b.iter(|| decode(black_box(encode(random_u128s.next().unwrap())))) + }); + + c.bench_function("decode_alternative", |b| { + b.iter(|| decode_alternative(black_box("7N42dgm5tFLK9N8MT7fHC7"))) + }); + + c.bench_function("decode_alternative_random", |b| { + b.iter(|| decode_alternative(black_box(encode_alternative(random_u128s.next().unwrap())))) + }); + + /* + c.bench_function("digit_count", |b| { + b.iter(|| digit_count(black_box(random_u128s.next().unwrap()))) + }); + */ + c.bench_function("encode_standard_new", |b| { b.iter(|| encode(black_box(u128::MAX))) }); + c.bench_function("encode_standard_new_random", |b| { + b.iter(|| encode(black_box(random_u128s.next().unwrap()))) + }); + c.bench_function("encode_standard_buf", |b| { b.iter(|| encode_buf(black_box(u128::MAX), black_box(&mut String::new()))) }); + c.bench_function("encode_standard_buf_random", |b| { + b.iter(|| { + encode_buf( + black_box(random_u128s.next().unwrap()), + black_box(&mut String::new()), + ) + }) + }); + c.bench_function("encode_alternative_new", |b| { b.iter(|| encode_alternative(black_box(u128::MAX))) }); - c.bench_function("encode_alternative_buf", |b| { - b.iter(|| encode_alternative_buf(black_box(u128::MAX), black_box(&mut String::new()))) + c.bench_function("encode_alternative_new_random", |b| { + b.iter(|| encode_alternative(black_box(random_u128s.next().unwrap()))) }); - c.bench_function("decode_standard", |b| { - b.iter(|| decode(black_box("7n42DGM5Tflk9n8mt7Fhc7"))) + c.bench_function("encode_alternative_buf", |b| { + b.iter(|| encode_alternative_buf(black_box(u128::MAX), black_box(&mut String::new()))) }); - c.bench_function("decode_alternative", |b| { - b.iter(|| decode_alternative(black_box("7N42dgm5tFLK9N8MT7fHC7"))) + c.bench_function("encode_alternative_buf_random", |b| { + b.iter(|| { + encode_alternative_buf( + black_box(random_u128s.next().unwrap()), + black_box(&mut String::new()), + ) + }) }); } diff --git a/src/lib.rs b/src/lib.rs index 8e7097e..136ecd2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,20 +12,26 @@ extern crate alloc; use alloc::string::String; const BASE: u64 = 62; -const BASE_TO_2: u64 = BASE.pow(2); -const BASE_TO_3: u64 = BASE.pow(3); -const BASE_TO_6: u64 = BASE.pow(6); -const BASE_TO_10: u128 = (BASE as u128).pow(10); -const BASE_TO_11: u128 = (BASE as u128).pow(11); - -// How much to add to the least-significant five bits of an encoded byte to get -// the base 62 digit's decoded value. -// -// Index 1: decimal digits -// Index 2: uppercase letters -// Index 3: lowercase letters -const STANDARD_OFFSETS: u32 = u32::from_le_bytes([0, -16_i8 as u8, 9, 35]); -const ALTERNATIVE_OFFSETS: u32 = u32::from_le_bytes([0, -16_i8 as u8, 35, 9]); +const BASE_TO_2: u64 = BASE * BASE; +const BASE_TO_3: u64 = BASE_TO_2 * BASE; +const BASE_TO_4: u64 = BASE_TO_3 * BASE; +const BASE_TO_5: u64 = BASE_TO_4 * BASE; +const BASE_TO_6: u64 = BASE_TO_5 * BASE; +const BASE_TO_7: u64 = BASE_TO_6 * BASE; +const BASE_TO_8: u64 = BASE_TO_7 * BASE; +const BASE_TO_9: u64 = BASE_TO_8 * BASE; +const BASE_TO_10: u128 = (BASE_TO_9 * BASE) as u128; +const BASE_TO_11: u128 = BASE_TO_10 * BASE as u128; +const BASE_TO_12: u128 = BASE_TO_11 * BASE as u128; +const BASE_TO_13: u128 = BASE_TO_12 * BASE as u128; +const BASE_TO_14: u128 = BASE_TO_13 * BASE as u128; +const BASE_TO_15: u128 = BASE_TO_14 * BASE as u128; +const BASE_TO_16: u128 = BASE_TO_15 * BASE as u128; +const BASE_TO_17: u128 = BASE_TO_16 * BASE as u128; +const BASE_TO_18: u128 = BASE_TO_17 * BASE as u128; +const BASE_TO_19: u128 = BASE_TO_18 * BASE as u128; +const BASE_TO_20: u128 = BASE_TO_19 * BASE as u128; +const BASE_TO_21: u128 = BASE_TO_20 * BASE as u128; /// Indicates the cause of a decoding failure in [`decode`](crate::decode) or /// [`decode_alternative`](crate::decode_alternative). @@ -62,50 +68,379 @@ impl core::fmt::Display for DecodeError { } macro_rules! internal_decoder_loop_body { - ($offsets:ident, $uint:ty, $result:ident, $ch:ident, $i:ident) => { - if $ch.is_ascii_alphanumeric() { - $result = $result - .checked_mul(BASE as $uint) - .and_then(|x| { - x.checked_add(($ch & 0b0001_1111).wrapping_add( - $offsets.wrapping_shr( - ($ch.wrapping_shr(2) & 0b0001_1000) as ::core::primitive::u32, - ) as ::core::primitive::u8, - ) as $uint) - }) - .ok_or($crate::DecodeError::ArithmeticOverflow)? - } else { - return Err($crate::DecodeError::InvalidBase62Byte($ch, $i)); + ($result:ident, $ch:ident, $i:ident, $numeric_start_value:expr, $uppercase_start_value:expr, $lowercase_start_value:expr) => { + const CHARACTER_VALUES: [u8; 256] = [ + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + $numeric_start_value + 0, + $numeric_start_value + 1, + $numeric_start_value + 2, + $numeric_start_value + 3, + $numeric_start_value + 4, + $numeric_start_value + 5, + $numeric_start_value + 6, + $numeric_start_value + 7, + $numeric_start_value + 8, + $numeric_start_value + 9, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + $uppercase_start_value + 0, + $uppercase_start_value + 1, + $uppercase_start_value + 2, + $uppercase_start_value + 3, + $uppercase_start_value + 4, + $uppercase_start_value + 5, + $uppercase_start_value + 6, + $uppercase_start_value + 7, + $uppercase_start_value + 8, + $uppercase_start_value + 9, + $uppercase_start_value + 10, + $uppercase_start_value + 11, + $uppercase_start_value + 12, + $uppercase_start_value + 13, + $uppercase_start_value + 14, + $uppercase_start_value + 15, + $uppercase_start_value + 16, + $uppercase_start_value + 17, + $uppercase_start_value + 18, + $uppercase_start_value + 19, + $uppercase_start_value + 20, + $uppercase_start_value + 21, + $uppercase_start_value + 22, + $uppercase_start_value + 23, + $uppercase_start_value + 24, + $uppercase_start_value + 25, + 255, + 255, + 255, + 255, + 255, + 255, + $lowercase_start_value + 0, + $lowercase_start_value + 1, + $lowercase_start_value + 2, + $lowercase_start_value + 3, + $lowercase_start_value + 4, + $lowercase_start_value + 5, + $lowercase_start_value + 6, + $lowercase_start_value + 7, + $lowercase_start_value + 8, + $lowercase_start_value + 9, + $lowercase_start_value + 10, + $lowercase_start_value + 11, + $lowercase_start_value + 12, + $lowercase_start_value + 13, + $lowercase_start_value + 14, + $lowercase_start_value + 15, + $lowercase_start_value + 16, + $lowercase_start_value + 17, + $lowercase_start_value + 18, + $lowercase_start_value + 19, + $lowercase_start_value + 20, + $lowercase_start_value + 21, + $lowercase_start_value + 22, + $lowercase_start_value + 23, + $lowercase_start_value + 24, + $lowercase_start_value + 25, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + ]; + + let char_value = *unsafe { CHARACTER_VALUES.get_unchecked($ch as usize) }; + if char_value == 255 { + return Err(DecodeError::InvalidBase62Byte($ch, $i)); } + $result = $result.wrapping_mul(BASE).wrapping_add(char_value as u64); }; } macro_rules! internal_decoder_fn { - ($fn_name:ident, $offsets:ident) => { - fn $fn_name( - input: &[::core::primitive::u8], - ) -> ::core::result::Result<::core::primitive::u128, $crate::DecodeError> { + ($fn_name:ident, $numeric_start_value:expr, $uppercase_start_value:expr, $lowercase_start_value:expr) => { + fn $fn_name(mut input: &[u8]) -> Result { if input.is_empty() { - return ::core::result::Result::Err($crate::DecodeError::EmptyInput); + return Err(DecodeError::EmptyInput); } - let mut result = 0_u64; - let mut iter = input.iter().copied().enumerate(); - for (i, ch) in iter.by_ref().take(10) { - internal_decoder_loop_body!($offsets, ::core::primitive::u64, result, ch, i); + // Remove leading zeroes + let mut chopped_count = 0_usize; + while let Option::Some(b'0') = input.first() { + input = &input[1..]; + chopped_count += 1; } + let input_len = input.len(); + if input_len <= 22 { + const MULTIPLIERS: [(u128, u64); 23] = [ + (0, 0), + (1, 1), + (1, 1), + (1, 1), + (1, 1), + (1, 1), + (1, 1), + (1, 1), + (1, 1), + (1, 1), + (1, 1), + (BASE as u128, 1), + (BASE_TO_2 as u128, 1), + (BASE_TO_3 as u128, 1), + (BASE_TO_4 as u128, 1), + (BASE_TO_5 as u128, 1), + (BASE_TO_6 as u128, 1), + (BASE_TO_7 as u128, 1), + (BASE_TO_8 as u128, 1), + (BASE_TO_9 as u128, 1), + (BASE_TO_10, 1), + (BASE_TO_11, BASE), + (BASE_TO_12, BASE_TO_2), + ]; + + let (a_power, b_power) = MULTIPLIERS[input_len]; + + let mut iter = (chopped_count..).zip(input.iter().map(|&ch| ch)); + + let mut result_a = 0_u64; + for (i, ch) in iter.by_ref().take(10) { + internal_decoder_loop_body!( + result_a, + ch, + i, + $numeric_start_value, + $uppercase_start_value, + $lowercase_start_value + ); + } + let result_a = (result_a as u128) + .checked_mul(a_power) + .ok_or(DecodeError::ArithmeticOverflow)?; + + let mut result_b = 0_u64; + for (i, ch) in iter.by_ref().take(10) { + internal_decoder_loop_body!( + result_b, + ch, + i, + $numeric_start_value, + $uppercase_start_value, + $lowercase_start_value + ); + } + let result_b = (result_b as u128).wrapping_mul(b_power as u128); + + let mut result_c = 0_u64; + for (i, ch) in iter { + internal_decoder_loop_body!( + result_c, + ch, + i, + $numeric_start_value, + $uppercase_start_value, + $lowercase_start_value + ); + } + let result_c = result_c as u128; - let mut result = result as ::core::primitive::u128; - for (i, ch) in iter { - internal_decoder_loop_body!($offsets, ::core::primitive::u128, result, ch, i); + let result = result_a + .checked_add(result_b.wrapping_add(result_c)) + .ok_or(DecodeError::ArithmeticOverflow)?; + Ok(result) + } else { + Err(input + .iter() + .position(|b| !b.is_ascii_alphanumeric()) + .map(|i| { + DecodeError::InvalidBase62Byte(input[i], chopped_count.wrapping_add(i)) + }) + .unwrap_or(DecodeError::ArithmeticOverflow)) } - Ok(result) } }; } -internal_decoder_fn!(_decode, STANDARD_OFFSETS); -internal_decoder_fn!(_decode_alternative, ALTERNATIVE_OFFSETS); +internal_decoder_fn!(_decode, 0, 10, 36); +internal_decoder_fn!(_decode_alternative, 0, 36, 10); /// Decodes a base62 byte slice or an equivalent, like a [`String`](alloc::string::String), /// using the standard digit ordering (0 to 9, then A to Z, then a to z). @@ -147,73 +482,132 @@ pub fn decode_alternative>(input: T) -> Result } // Finds out how many base62 digits a value encodes into. -pub(crate) fn digit_count(mut n: u128) -> usize { - let mut result = 1; - - if n >= BASE_TO_11 { - result += 11; - n /= BASE_TO_11; - } - if n >= BASE_TO_10 { - return result + 10; - } - let mut n = n as u64; - if n >= BASE_TO_6 { - result += 6; - n /= BASE_TO_6; - } - if n >= BASE_TO_3 { - result += 3; - n /= BASE_TO_3; +pub(crate) fn digit_count(n: u128) -> usize { + const POWERS: [u128; 22] = [ + 0, + BASE as u128, + BASE_TO_2 as u128, + BASE_TO_3 as u128, + BASE_TO_4 as u128, + BASE_TO_5 as u128, + BASE_TO_6 as u128, + BASE_TO_7 as u128, + BASE_TO_8 as u128, + BASE_TO_9 as u128, + BASE_TO_10, + BASE_TO_11, + BASE_TO_12, + BASE_TO_13, + BASE_TO_14, + BASE_TO_15, + BASE_TO_16, + BASE_TO_17, + BASE_TO_18, + BASE_TO_19, + BASE_TO_20, + BASE_TO_21, + ]; + + match POWERS.binary_search(&n) { + Ok(n) => n.wrapping_add(1), + Err(n) => n, } - if n >= BASE_TO_2 { - return result + 2; - } - if n >= BASE { - return result + 1; - } - - result } macro_rules! internal_encoder_fn { - ($fn_name:ident, $numeric_offset:literal, $first_letters_offset:literal, $last_letters_offset:literal) => { + ($fn_name:ident, $numeric_offset:expr, $first_letters_offset:expr, $last_letters_offset:expr) => { /// # Safety /// /// With this function, `buf` MUST ALREADY have its capacity extended /// to hold all the new base62 characters that will be added - unsafe fn $fn_name( - mut num: ::core::primitive::u128, - digits: ::core::primitive::usize, - buf: &mut ::alloc::string::String, - ) { + unsafe fn $fn_name(mut num: u128, digits: usize, buf: &mut String) { let buf_vec = buf.as_mut_vec(); let new_len = buf_vec.len().wrapping_add(digits); let mut ptr = buf_vec.as_mut_ptr().add(new_len).sub(1); let mut digit_index = 0_usize; - let mut u64_num = (num % $crate::BASE_TO_10) as ::core::primitive::u64; - num /= $crate::BASE_TO_10; + let mut u64_num = (num % BASE_TO_10) as u64; + num /= BASE_TO_10; loop { - ::core::ptr::write(ptr, { - let digit = (u64_num % $crate::BASE) as ::core::primitive::u8; - match digit { - 0..=9 => digit.wrapping_add($numeric_offset), - 10..=35 => digit.wrapping_add($first_letters_offset), - _ => digit.wrapping_add($last_letters_offset), - } - }); + const VALUE_CHARACTERS: [u8; 62] = [ + $numeric_offset, + $numeric_offset + 1, + $numeric_offset + 2, + $numeric_offset + 3, + $numeric_offset + 4, + $numeric_offset + 5, + $numeric_offset + 6, + $numeric_offset + 7, + $numeric_offset + 8, + $numeric_offset + 9, + $first_letters_offset, + $first_letters_offset + 1, + $first_letters_offset + 2, + $first_letters_offset + 3, + $first_letters_offset + 4, + $first_letters_offset + 5, + $first_letters_offset + 6, + $first_letters_offset + 7, + $first_letters_offset + 8, + $first_letters_offset + 9, + $first_letters_offset + 10, + $first_letters_offset + 11, + $first_letters_offset + 12, + $first_letters_offset + 13, + $first_letters_offset + 14, + $first_letters_offset + 15, + $first_letters_offset + 16, + $first_letters_offset + 17, + $first_letters_offset + 18, + $first_letters_offset + 19, + $first_letters_offset + 20, + $first_letters_offset + 21, + $first_letters_offset + 22, + $first_letters_offset + 23, + $first_letters_offset + 24, + $first_letters_offset + 25, + $last_letters_offset, + $last_letters_offset + 1, + $last_letters_offset + 2, + $last_letters_offset + 3, + $last_letters_offset + 4, + $last_letters_offset + 5, + $last_letters_offset + 6, + $last_letters_offset + 7, + $last_letters_offset + 8, + $last_letters_offset + 9, + $last_letters_offset + 10, + $last_letters_offset + 11, + $last_letters_offset + 12, + $last_letters_offset + 13, + $last_letters_offset + 14, + $last_letters_offset + 15, + $last_letters_offset + 16, + $last_letters_offset + 17, + $last_letters_offset + 18, + $last_letters_offset + 19, + $last_letters_offset + 20, + $last_letters_offset + 21, + $last_letters_offset + 22, + $last_letters_offset + 23, + $last_letters_offset + 24, + $last_letters_offset + 25, + ]; + core::ptr::write( + ptr, + *VALUE_CHARACTERS.get_unchecked((u64_num % BASE) as usize), + ); ptr = ptr.sub(1); digit_index = digit_index.wrapping_add(1); match digit_index { _ if digit_index == digits => break, 10 => { - u64_num = (num % $crate::BASE_TO_10) as ::core::primitive::u64; - num /= $crate::BASE_TO_10; + u64_num = (num % BASE_TO_10) as u64; + num /= BASE_TO_10; } - 20 => u64_num = num as ::core::primitive::u64, - _ => u64_num /= $crate::BASE, + 20 => u64_num = num as u64, + _ => u64_num /= BASE, } } @@ -226,8 +620,8 @@ macro_rules! internal_encoder_fn { // // With these functions, `buf` MUST ALREADY have its capacity extended // to hold all the new base62 characters that will be added -internal_encoder_fn!(_encode_buf, 48, 55, 61); -internal_encoder_fn!(_encode_alternative_buf, 48, 87, 29); +internal_encoder_fn!(_encode_buf, b'0', b'A', b'a'); +internal_encoder_fn!(_encode_alternative_buf, b'0', b'a', b'A'); /// Encodes an unsigned integer into base62, using the standard digit ordering /// (0 to 9, then A to Z, then a to z), and returns the resulting @@ -331,6 +725,12 @@ mod tests { } } + quickcheck! { + fn encode_decode_alternative(num: u128) -> bool { + decode_alternative(encode_alternative(num)) == Ok(num) + } + } + quickcheck! { fn decode_bad(input: Vec) -> TestResult { if !input.is_empty() && input.iter().all(|ch| ch.is_ascii_alphanumeric()) { @@ -351,6 +751,199 @@ mod tests { } } + #[test] + fn test_decode() { + // Test leading zeroes handling + assert_eq!( + decode("00001000000000000000000000"), + Ok((BASE as u128).pow(21)) + ); + + // Test numeric type boundaries + assert_eq!(decode("7n42DGM5Tflk9n8mt7Fhc7"), Ok(u128::MAX)); + assert_eq!(decode("LygHa16AHYG"), Ok(u64::MAX as u128 + 1)); + assert_eq!(decode("LygHa16AHYF"), Ok(u64::MAX as u128)); + assert_eq!(decode("0"), Ok(0)); + + // Test base62 length-change boundaries + let mut power = 1_u128; + let mut power_minus_one_str = String::with_capacity(21); + let mut power_str = String::with_capacity(22); + power_str.push('1'); + for _ in 1..22 { + power *= BASE as u128; + power_minus_one_str.push('z'); + power_str.push('0'); + + assert_eq!(decode(&power_minus_one_str), Ok(power - 1)); + assert_eq!(decode(&power_str), Ok(power)); + } + + // Test cases that failed due te earlier bugs + assert_eq!(decode("CAcoUun"), Ok(691337691337)); + assert_eq!( + decode("26tF05fvSIgh0000000000"), + Ok(92202686130861137968548313400401640448) + ); + } + + #[test] + fn test_decode_empty_input() { + assert_eq!(decode(""), Err(DecodeError::EmptyInput)); + } + + #[test] + fn test_decode_invalid_char() { + let mut input = Vec::with_capacity(40); + let mut invalid_chars = (0..b'0') + .chain(b'0' + 10..b'A') + .chain(b'A' + 26..b'a') + .chain(b'a' + 26..=255) + .cycle(); + + for size in [10, 22, 23, 40].iter().map(|&size| size) { + input.clear(); + for _ in 0..size { + input.push(b'0'); + } + + for i in 0..size { + input[i] = b'1'; + for j in 0..size { + let invalid_char = invalid_chars.next().unwrap(); + input[j] = invalid_char; + + assert_eq!( + decode(&input), + Err(DecodeError::InvalidBase62Byte(invalid_char, j)) + ); + + input[j] = b'0'; + input[i] = b'1'; + } + input[i] = b'0'; + } + } + } + + #[test] + fn test_decode_overflow() { + assert_eq!( + decode("10000000000000000000000"), + Err(DecodeError::ArithmeticOverflow) + ); + assert_eq!( + decode("7n42DGM5Tflk9n8mt7Fhc78"), + Err(DecodeError::ArithmeticOverflow) + ); + } + + #[test] + fn test_decode_alternative() { + // Test leading zeroes handling + assert_eq!( + decode_alternative("00001000000000000000000000"), + Ok((BASE as u128).pow(21)) + ); + + // Test numeric type boundaries + assert_eq!(decode_alternative("7N42dgm5tFLK9N8MT7fHC7"), Ok(u128::MAX)); + assert_eq!(decode_alternative("lYGhA16ahyg"), Ok(u64::MAX as u128 + 1)); + assert_eq!(decode_alternative("lYGhA16ahyf"), Ok(u64::MAX as u128)); + assert_eq!(decode_alternative("0"), Ok(0)); + + // Test base62 length-change boundaries + let mut power = 1_u128; + let mut power_minus_one_str = String::with_capacity(21); + let mut power_str = String::with_capacity(22); + power_str.push('1'); + for _ in 1..22 { + power *= BASE as u128; + power_minus_one_str.push('Z'); + power_str.push('0'); + + assert_eq!(decode_alternative(&power_minus_one_str), Ok(power - 1)); + assert_eq!(decode_alternative(&power_str), Ok(power)); + } + + // Test cases that failed due te earlier bugs + assert_eq!(decode_alternative("caCOuUN"), Ok(691337691337)); + assert_eq!( + decode_alternative("26Tf05FVsiGH0000000000"), + Ok(92202686130861137968548313400401640448) + ); + } + + #[test] + fn test_decode_alternative_empty_input() { + assert_eq!(decode_alternative(""), Err(DecodeError::EmptyInput)); + } + + #[test] + fn test_decode_altenative_invalid_char() { + let mut input = Vec::with_capacity(40); + let mut invalid_chars = (0..b'0') + .chain(b'0' + 10..b'A') + .chain(b'A' + 26..b'a') + .chain(b'a' + 26..=255) + .cycle(); + + for size in [10, 22, 23, 40].iter().map(|&size| size) { + input.clear(); + for _ in 0..size { + input.push(b'0'); + } + + for i in 0..size { + input[i] = b'1'; + for j in 0..size { + let invalid_char = invalid_chars.next().unwrap(); + input[j] = invalid_char; + + assert_eq!( + decode_alternative(&input), + Err(DecodeError::InvalidBase62Byte(invalid_char, j)) + ); + + input[j] = b'0'; + input[i] = b'1'; + } + input[i] = b'0'; + } + } + } + + #[test] + fn test_decode_alternative_overflow() { + assert_eq!( + decode_alternative("10000000000000000000000"), + Err(DecodeError::ArithmeticOverflow) + ); + assert_eq!( + decode_alternative("7N42dgm5tFLK9N8MT7fHC8"), + Err(DecodeError::ArithmeticOverflow) + ); + } + + #[test] + fn test_digit_count() { + // Assume that `digit_count` is a monotonically increasing function and + // check that the boundary outputs have the right values + for pow in 1..22 { + let this_power = (BASE as u128).pow(pow as u32); + + assert_eq!(digit_count(this_power - 1), pow); + assert_eq!(digit_count(this_power), pow + 1); + } + + // Check that boundary inputs have the right values + assert_eq!(digit_count(0), 1); + assert_eq!(digit_count(1), 1); + assert_eq!(digit_count(u64::MAX as u128), 11); + assert_eq!(digit_count(u64::MAX as u128 + 1), 11); + assert_eq!(digit_count(u128::MAX), 22); + } + #[test] fn test_encode() { // Test numeric type boundaries @@ -522,125 +1115,4 @@ mod tests { assert_eq!(buf, "26Tf05FVsiGH0000000000"); // buf.clear(); } - - #[test] - fn test_decode() { - // Test numeric type boundaries - assert_eq!(decode("7n42DGM5Tflk9n8mt7Fhc7"), Ok(u128::MAX)); - assert_eq!(decode("LygHa16AHYG"), Ok(u64::MAX as u128 + 1)); - assert_eq!(decode("LygHa16AHYF"), Ok(u64::MAX as u128)); - assert_eq!(decode("0"), Ok(0)); - - // Test base62 length-change boundaries - let mut power = 1_u128; - let mut power_minus_one_str = String::with_capacity(21); - let mut power_str = String::with_capacity(22); - power_str.push('1'); - for _ in 1..22 { - power *= BASE as u128; - power_minus_one_str.push('z'); - power_str.push('0'); - - assert_eq!(decode(&power_minus_one_str), Ok(power - 1)); - assert_eq!(decode(&power_str), Ok(power)); - } - - // Test cases that failed due te earlier bugs - assert_eq!(decode("CAcoUun"), Ok(691337691337)); - assert_eq!( - decode("26tF05fvSIgh0000000000"), - Ok(92202686130861137968548313400401640448) - ); - } - - #[test] - fn test_decode_overflow() { - assert_eq!( - decode("7n42DGM5Tflk9n8mt7Fhc78"), - Err(DecodeError::ArithmeticOverflow) - ); - } - - #[test] - fn test_decode_empty_input() { - assert_eq!(decode(""), Err(DecodeError::EmptyInput)); - } - - #[test] - fn test_decode_invalid_char() { - assert_eq!( - decode("ds{Z455f"), - Err(DecodeError::InvalidBase62Byte(b'{', 2)) - ); - } - - #[test] - fn test_decode_alternative() { - // Test numeric type boundaries - assert_eq!(decode_alternative("7N42dgm5tFLK9N8MT7fHC7"), Ok(u128::MAX)); - assert_eq!(decode_alternative("lYGhA16ahyg"), Ok(u64::MAX as u128 + 1)); - assert_eq!(decode_alternative("lYGhA16ahyf"), Ok(u64::MAX as u128)); - assert_eq!(decode_alternative("0"), Ok(0)); - - // Test base62 length-change boundaries - let mut power = 1_u128; - let mut power_minus_one_str = String::with_capacity(21); - let mut power_str = String::with_capacity(22); - power_str.push('1'); - for _ in 1..22 { - power *= BASE as u128; - power_minus_one_str.push('Z'); - power_str.push('0'); - - assert_eq!(decode_alternative(&power_minus_one_str), Ok(power - 1)); - assert_eq!(decode_alternative(&power_str), Ok(power)); - } - - // Test cases that failed due te earlier bugs - assert_eq!(decode_alternative("caCOuUN"), Ok(691337691337)); - assert_eq!( - decode_alternative("26Tf05FVsiGH0000000000"), - Ok(92202686130861137968548313400401640448) - ); - } - - #[test] - fn test_decode_alternative_overflow() { - assert_eq!( - decode_alternative("7N42dgm5tFLK9N8MT7fHC8"), - Err(DecodeError::ArithmeticOverflow) - ); - } - - #[test] - fn test_decode_alternative_empty_input() { - assert_eq!(decode_alternative(""), Err(DecodeError::EmptyInput)); - } - - #[test] - fn test_decode_alternative_invalid_char() { - assert_eq!( - decode_alternative("ds{Z455f"), - Err(DecodeError::InvalidBase62Byte(b'{', 2)) - ); - } - - #[test] - fn test_digit_count() { - // Assume that `digit_count` is a monotonically increasing function and - // check that the boundary outputs have the right values - for pow in 1..22 { - let this_power = (BASE as u128).pow(pow as u32); - - assert_eq!(digit_count(this_power - 1), pow); - assert_eq!(digit_count(this_power), pow + 1); - } - - // Check that boundary inputs have the right values - assert_eq!(digit_count(0), 1); - assert_eq!(digit_count(1), 1); - assert_eq!(digit_count(u64::MAX as u128), 11); - assert_eq!(digit_count(u64::MAX as u128 + 1), 11); - assert_eq!(digit_count(u128::MAX), 22); - } }