Skip to content

Commit 9d91b3a

Browse files
committed
Remove all non-local unsafety.
This patches a lot of the wrappers in `AsciiStr` being marked as safe but not being safe except within the context, using raw pointer dereferences without local bounds checks. This is extensively documented in aldanor#37: aldanor#37 `AsciiStr` has been re-written as a result, and unsafe functions marked as safe have been either converted to safe variants where the compiled checks can be ellided or marked as unsafe so the caller knows to upholds the safety invariants.
1 parent 52cc113 commit 9d91b3a

File tree

4 files changed

+156
-95
lines changed

4 files changed

+156
-95
lines changed

src/common.rs

+106-59
Original file line numberDiff line numberDiff line change
@@ -18,116 +18,164 @@ impl<'a> AsciiStr<'a> {
1818
}
1919
}
2020

21+
pub fn len(&self) -> usize {
22+
self.end as usize - self.ptr as usize
23+
}
24+
25+
/// # Safety
26+
///
27+
/// Safe if `n <= self.len()`
2128
#[inline]
22-
pub fn step_by(&mut self, n: usize) -> &mut Self {
29+
pub unsafe fn step_by(&mut self, n: usize) -> &mut Self {
30+
debug_assert!(n <= self.len(), "buffer overflow: stepping by greater than our buffer length.");
31+
// SAFETY: Safe if `n <= self.len()`
2332
unsafe { self.ptr = self.ptr.add(n) };
2433
self
2534
}
2635

36+
/// # Safety
37+
///
38+
/// Safe if `!self.is_empty()`
39+
#[inline]
40+
pub unsafe fn step(&mut self) -> &mut Self {
41+
debug_assert!(!self.is_empty(), "buffer overflow: buffer is empty.");
42+
// SAFETY: Safe if the buffer is not empty, that is, `self.len() >= 1`
43+
unsafe { self.step_by(1) }
44+
}
45+
2746
#[inline]
28-
pub fn step(&mut self) -> &mut Self {
29-
self.step_by(1)
47+
pub fn step_if(&mut self, c: u8) -> bool {
48+
let stepped = self.first_is(c);
49+
if stepped {
50+
// SAFETY: safe since we have at least 1 character in the buffer
51+
unsafe { self.step() };
52+
}
53+
stepped
3054
}
3155

3256
#[inline]
3357
pub fn is_empty(&self) -> bool {
3458
self.ptr == self.end
3559
}
3660

61+
/// # Safety
62+
///
63+
/// Safe if `!self.is_empty()`
3764
#[inline]
38-
pub fn first(&self) -> u8 {
65+
pub unsafe fn first_unchecked(&self) -> u8 {
66+
debug_assert!(!self.is_empty(), "attempting to get first value of empty buffer.");
3967
unsafe { *self.ptr }
4068
}
4169

4270
#[inline]
43-
pub fn first_is(&self, c: u8) -> bool {
44-
self.first() == c
71+
pub fn first(&self) -> Option<u8> {
72+
if !self.is_empty() {
73+
// SAFETY: safe since `!self.is_empty()`
74+
Some(unsafe { self.first_unchecked() })
75+
} else {
76+
None
77+
}
4578
}
4679

4780
#[inline]
48-
pub fn first_either(&self, c1: u8, c2: u8) -> bool {
49-
let c = self.first();
50-
c == c1 || c == c2
81+
pub fn first_is(&self, c: u8) -> bool {
82+
self.first() == Some(c)
5183
}
5284

5385
#[inline]
54-
pub fn check_first(&self, c: u8) -> bool {
55-
!self.is_empty() && self.first() == c
86+
pub fn first_is2(&self, c1: u8, c2: u8) -> bool {
87+
if let Some(c) = self.first() {
88+
c == c1 || c == c2
89+
} else {
90+
false
91+
}
5692
}
5793

5894
#[inline]
59-
pub fn check_first_either(&self, c1: u8, c2: u8) -> bool {
60-
!self.is_empty() && (self.first() == c1 || self.first() == c2)
95+
pub fn first_is_digit(&self) -> bool {
96+
if let Some(c) = self.first() {
97+
c.is_ascii_digit()
98+
} else {
99+
false
100+
}
61101
}
62102

63103
#[inline]
64-
pub fn check_first_digit(&self) -> bool {
65-
!self.is_empty() && self.first().is_ascii_digit()
104+
pub fn first_digit(&self) -> Option<u8> {
105+
self.first().and_then(|x| if x.is_ascii_digit() {
106+
Some(x - b'0')
107+
} else {
108+
None
109+
})
66110
}
67111

68112
#[inline]
69-
pub fn parse_digits(&mut self, mut func: impl FnMut(u8)) {
70-
while !self.is_empty() && self.first().is_ascii_digit() {
71-
func(self.first() - b'0');
72-
self.step();
113+
pub fn try_read_digit(&mut self) -> Option<u8> {
114+
if let Some(digit) = self.first_digit() {
115+
// SAFETY: Safe since `first_digit` means the buffer is not empty
116+
unsafe { self.step() };
117+
Some(digit)
118+
} else {
119+
None
73120
}
74121
}
75122

76123
#[inline]
77-
pub fn check_len(&self, n: usize) -> bool {
78-
let len = self.end as usize - self.ptr as usize;
79-
n <= len
124+
pub fn parse_digits(&mut self, mut func: impl FnMut(u8)) {
125+
while let Some(digit) = self.try_read_digit() {
126+
func(digit);
127+
}
80128
}
81129

82130
#[inline]
83131
pub fn try_read_u64(&self) -> Option<u64> {
84-
if self.check_len(8) {
85-
Some(self.read_u64())
132+
if self.len() >= 8 {
133+
Some(unsafe { self.read_u64_unchecked() })
86134
} else {
87135
None
88136
}
89137
}
90138

139+
/// # Safety
140+
///
141+
/// Safe if `self.len() >= 8`
91142
#[inline]
92-
pub fn read_u64(&self) -> u64 {
93-
debug_assert!(self.check_len(8));
143+
pub unsafe fn read_u64_unchecked(&self) -> u64 {
144+
debug_assert!(self.len() >= 8, "overflowing buffer: buffer is not 8 bytes long");
94145
let src = self.ptr as *const u64;
146+
// SAFETY: Safe if `self.len() >= 8`
95147
u64::from_le(unsafe { ptr::read_unaligned(src) })
96148
}
97149

98150
#[inline]
99151
pub fn offset_from(&self, other: &Self) -> isize {
100-
isize::wrapping_sub(self.ptr as _, other.ptr as _) // assuming the same end
152+
isize::wrapping_sub(self.ptr as isize, other.ptr as isize) // assuming the same end
101153
}
102154
}
103155

104156
// Most of these are inherently unsafe; we assume we know what we're calling and when.
105157
pub trait ByteSlice: AsRef<[u8]> + AsMut<[u8]> {
106-
#[inline]
107-
fn get_at(&self, i: usize) -> u8 {
108-
unsafe { *self.as_ref().get_unchecked(i) }
109-
}
110-
111-
#[inline]
112-
fn get_first(&self) -> u8 {
113-
debug_assert!(!self.as_ref().is_empty());
114-
self.get_at(0)
115-
}
116-
117158
#[inline]
118159
fn check_first(&self, c: u8) -> bool {
119-
!self.as_ref().is_empty() && self.get_first() == c
160+
self.as_ref().first() == Some(&c)
120161
}
121162

122163
#[inline]
123164
fn check_first2(&self, c1: u8, c2: u8) -> bool {
124-
!self.as_ref().is_empty() && (self.get_first() == c1 || self.get_first() == c2)
165+
if let Some(&c) = self.as_ref().first() {
166+
c == c1 || c == c2
167+
} else {
168+
false
169+
}
125170
}
126171

127172
#[inline]
128173
fn eq_ignore_case(&self, u: &[u8]) -> bool {
129-
debug_assert!(self.as_ref().len() >= u.len());
130-
let d = (0..u.len()).fold(0, |d, i| d | self.get_at(i) ^ u.get_at(i));
174+
let s = self.as_ref();
175+
if s.len() < u.len() {
176+
return false;
177+
}
178+
let d = (0..u.len()).fold(0, |d, i| d | s[i] ^ u[i]);
131179
d == 0 || d == 32
132180
}
133181

@@ -145,26 +193,25 @@ pub trait ByteSlice: AsRef<[u8]> + AsMut<[u8]> {
145193
s
146194
}
147195

196+
/// # Safety
197+
///
198+
/// Safe if `self.len() >= 8`.
148199
#[inline]
149-
fn skip_chars2(&self, c1: u8, c2: u8) -> &[u8] {
150-
let mut s = self.as_ref();
151-
while !s.is_empty() && (s.get_first() == c1 || s.get_first() == c2) {
152-
s = s.advance(1);
153-
}
154-
s
155-
}
156-
157-
#[inline]
158-
fn read_u64(&self) -> u64 {
200+
unsafe fn read_u64(&self) -> u64 {
159201
debug_assert!(self.as_ref().len() >= 8);
160202
let src = self.as_ref().as_ptr() as *const u64;
203+
// SAFETY: safe if `self.len() >= 8`.
161204
u64::from_le(unsafe { ptr::read_unaligned(src) })
162205
}
163206

207+
/// # Safety
208+
///
209+
/// Safe if `self.len() >= 8`.
164210
#[inline]
165-
fn write_u64(&mut self, value: u64) {
211+
unsafe fn write_u64(&mut self, value: u64) {
166212
debug_assert!(self.as_ref().len() >= 8);
167213
let dst = self.as_mut().as_mut_ptr() as *mut u64;
214+
// SAFETY: safe if `self.len() >= 8`.
168215
unsafe { ptr::write_unaligned(dst, u64::to_le(value)) };
169216
}
170217
}
@@ -180,8 +227,8 @@ pub fn is_8digits(v: u64) -> bool {
180227

181228
#[inline]
182229
pub fn parse_digits(s: &mut &[u8], mut f: impl FnMut(u8)) {
183-
while !s.is_empty() {
184-
let c = s.get_first().wrapping_sub(b'0');
230+
while let Some(&ch) = s.first() {
231+
let c = ch.wrapping_sub(b'0');
185232
if c < 10 {
186233
f(c);
187234
*s = s.advance(1);
@@ -215,14 +262,14 @@ mod tests {
215262
fn test_read_write_u64() {
216263
let bytes = b"01234567";
217264
let string = AsciiStr::new(bytes);
218-
let int = string.read_u64();
219-
assert_eq!(int, 0x3736353433323130);
265+
let int = string.try_read_u64();
266+
assert_eq!(int, Some(0x3736353433323130));
220267

221-
let int = bytes.read_u64();
268+
let int = unsafe { bytes.read_u64() };
222269
assert_eq!(int, 0x3736353433323130);
223270

224271
let mut slc = [0u8; 8];
225-
slc.write_u64(0x3736353433323130);
272+
unsafe { slc.write_u64(0x3736353433323130) };
226273
assert_eq!(&slc, bytes);
227274
}
228275
}

src/decimal.rs

+7-3
Original file line numberDiff line numberDiff line change
@@ -189,9 +189,11 @@ impl Decimal {
189189
#[inline]
190190
pub fn parse_decimal(mut s: &[u8]) -> Decimal {
191191
// can't fail since it follows a call to parse_number
192+
assert!(!s.is_empty(), "the buffer cannot be empty since it follows a call to parse_number");
192193
let mut d = Decimal::default();
193194
let start = s;
194-
let c = s.get_first();
195+
196+
let c = s[0];
195197
d.negative = c == b'-';
196198
if c == b'-' || c == b'+' {
197199
s = s.advance(1);
@@ -205,11 +207,13 @@ pub fn parse_decimal(mut s: &[u8]) -> Decimal {
205207
s = s.skip_chars(b'0');
206208
}
207209
while s.len() >= 8 && d.num_digits + 8 < Decimal::MAX_DIGITS {
208-
let v = s.read_u64();
210+
// SAFETY: Safe since `s.len() >= 8`
211+
let v = unsafe { s.read_u64() };
209212
if !is_8digits(v) {
210213
break;
211214
}
212-
d.digits[d.num_digits..].write_u64(v - 0x3030_3030_3030_3030);
215+
// SAFETY: Safe since `num_digits + 8 < Decimal::MAX_DIGITS`
216+
unsafe { d.digits[d.num_digits..].write_u64(v - 0x3030_3030_3030_3030) };
213217
d.num_digits += 8;
214218
s = s.advance(8);
215219
}

0 commit comments

Comments
 (0)