diff --git a/components/locale_core/src/parser/mod.rs b/components/locale_core/src/parser/mod.rs index a5a8ae24328..4000b8f241a 100644 --- a/components/locale_core/src/parser/mod.rs +++ b/components/locale_core/src/parser/mod.rs @@ -24,25 +24,27 @@ const fn is_separator(slice: &[u8], idx: usize) -> bool { b == b'-' || b == b'_' } -const fn get_current_subtag(slice: &[u8], idx: usize) -> (usize, usize) { +const fn get_current_subtag(mut slice: &[u8], idx: usize) -> (&[u8], &[u8]) { debug_assert!(idx < slice.len()); // This function is called only on the idx == 0 or on a separator. - let (start, mut end) = if is_separator(slice, idx) { - // If it's a separator, set the start to idx+1 and advance the idx to the next char. - (idx + 1, idx + 1) + let mut end = if is_separator(slice, idx) { + // If it's a separator, skip it + slice = slice.split_at(idx + 1).1; + 0 } else { - // If it's idx=0, start is 0 and end is set to 1 + // If it's idx=0, end is set to 1 debug_assert!(idx == 0); - (0, 1) + 1 }; while end < slice.len() && !is_separator(slice, end) { // Advance until we reach end of slice or a separator. end += 1; } - // Notice: this slice may be empty (start == end) for cases like `"en-"` or `"en--US"` - (start, end) + + // Notice: this slice may be empty (end == 0) for cases like `"en-"` or `"en--US"` + (slice, slice.split_at(end).0) } // `SubtagIterator` is a helper iterator for [`LanguageIdentifier`] and [`Locale`] parsing. @@ -59,25 +61,25 @@ const fn get_current_subtag(slice: &[u8], idx: usize) -> (usize, usize) { // All methods return an `Option` of a `Result`. #[derive(Copy, Clone, Debug)] pub struct SubtagIterator<'a> { - pub slice: &'a [u8], + remaining: &'a [u8], done: bool, - // done + subtag is faster than Option<(usize, usize)> + // done + current is faster than Option<&[u8]> // at the time of writing. - subtag: (usize, usize), + current: &'a [u8], } impl<'a> SubtagIterator<'a> { pub const fn new(slice: &'a [u8]) -> Self { - let subtag = if slice.is_empty() || is_separator(slice, 0) { + let (remaining, current) = if slice.is_empty() || is_separator(slice, 0) { // This returns (0, 0) which returns Some(b"") for slices like `"-en"` or `"-"` - (0, 0) + (slice, b"".as_slice()) } else { get_current_subtag(slice, 0) }; Self { - slice, + remaining, done: false, - subtag, + current, } } @@ -85,29 +87,22 @@ impl<'a> SubtagIterator<'a> { if self.done { return (self, None); } - let result = self.subtag; - if result.1 < self.slice.len() { - self.subtag = get_current_subtag(self.slice, result.1); + let result = self.current; + if self.current.len() < self.remaining.len() { + let (remaining, current) = get_current_subtag(self.remaining, self.current.len()); + self.remaining = remaining; + self.current = current; } else { self.done = true; } - ( - self, - Some(self.slice.split_at(result.1).0.split_at(result.0).1), - ) + (self, Some(result)) } pub const fn peek(&self) -> Option<&'a [u8]> { if self.done { return None; } - Some( - self.slice - .split_at(self.subtag.1) - .0 - .split_at(self.subtag.0) - .1, - ) + Some(self.current) } } @@ -193,42 +188,42 @@ mod test { fn get_current_subtag_test() { let slice = "-"; let current = get_current_subtag(slice.as_bytes(), 0); - assert_eq!(current, (1, 1)); + assert_eq!(current, (b"".as_slice(), b"".as_slice())); let slice = "-en"; let current = get_current_subtag(slice.as_bytes(), 0); - assert_eq!(current, (1, 3)); + assert_eq!(current, (b"en".as_slice(), b"en".as_slice())); let slice = "-en-"; let current = get_current_subtag(slice.as_bytes(), 3); - assert_eq!(current, (4, 4)); + assert_eq!(current, (b"".as_slice(), b"".as_slice())); let slice = "en-"; let current = get_current_subtag(slice.as_bytes(), 0); - assert_eq!(current, (0, 2)); + assert_eq!(current, (b"en-".as_slice(), b"en".as_slice())); let current = get_current_subtag(slice.as_bytes(), 2); - assert_eq!(current, (3, 3)); + assert_eq!(current, (b"".as_slice(), b"".as_slice())); let slice = "en--US"; let current = get_current_subtag(slice.as_bytes(), 0); - assert_eq!(current, (0, 2)); + assert_eq!(current, (b"en--US".as_slice(), b"en".as_slice())); let current = get_current_subtag(slice.as_bytes(), 2); - assert_eq!(current, (3, 3)); + assert_eq!(current, (b"-US".as_slice(), b"".as_slice())); let current = get_current_subtag(slice.as_bytes(), 3); - assert_eq!(current, (4, 6)); + assert_eq!(current, (b"US".as_slice(), b"US".as_slice())); let slice = "--"; let current = get_current_subtag(slice.as_bytes(), 0); - assert_eq!(current, (1, 1)); + assert_eq!(current, (b"-".as_slice(), b"".as_slice())); let current = get_current_subtag(slice.as_bytes(), 1); - assert_eq!(current, (2, 2)); + assert_eq!(current, (b"".as_slice(), b"".as_slice())); let slice = "-"; let current = get_current_subtag(slice.as_bytes(), 0); - assert_eq!(current, (1, 1)); + assert_eq!(current, (b"".as_slice(), b"".as_slice())); } }