From 715f404417394b870e95276b2d1bb176a77ee49c Mon Sep 17 00:00:00 2001 From: Jonathan Behrens Date: Tue, 30 Jul 2024 20:58:10 -0700 Subject: [PATCH 1/8] inlining --- src/huffman.rs | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/src/huffman.rs b/src/huffman.rs index 852596b..3da6da1 100644 --- a/src/huffman.rs +++ b/src/huffman.rs @@ -26,19 +26,6 @@ pub(crate) struct HuffmanTree { } impl HuffmanTree { - fn is_full(&self) -> bool { - self.num_nodes == self.max_nodes - } - - /// Turns a node from empty into a branch and assigns its children - fn assign_children(&mut self, node_index: usize) -> usize { - let offset_index = self.num_nodes - node_index; - self.tree[node_index] = HuffmanTreeNode::Branch(offset_index); - self.num_nodes += 2; - - offset_index - } - /// Init a huffman tree fn init(num_leaves: usize) -> Result { if num_leaves == 0 { @@ -126,10 +113,15 @@ impl HuffmanTree { let offset = match node { HuffmanTreeNode::Empty => { - if self.is_full() { + if self.num_nodes == self.max_nodes { return Err(DecodingError::HuffmanError); } - self.assign_children(node_index) + + // Turns a node from empty into a branch and assigns its children + let offset_index = self.num_nodes - node_index; + self.tree[node_index] = HuffmanTreeNode::Branch(offset_index); + self.num_nodes += 2; + offset_index } HuffmanTreeNode::Leaf(_) => return Err(DecodingError::HuffmanError), HuffmanTreeNode::Branch(offset) => offset, From cfdae1461a49b2380722b55a07c320c7b22418bf Mon Sep 17 00:00:00 2001 From: Jonathan Behrens Date: Tue, 30 Jul 2024 21:06:58 -0700 Subject: [PATCH 2/8] Switch to dedicated single_node / two_node HuffmanTree creation methods --- src/huffman.rs | 35 +++++++++++++++++++++-------------- src/lossless.rs | 24 +++++++++++------------- 2 files changed, 32 insertions(+), 27 deletions(-) diff --git a/src/huffman.rs b/src/huffman.rs index 3da6da1..bb8eecc 100644 --- a/src/huffman.rs +++ b/src/huffman.rs @@ -151,10 +151,19 @@ impl HuffmanTree { } } - let mut tree = HuffmanTree::init(num_symbols)?; + if num_symbols == 0 { + return Err(DecodingError::HuffmanError); + } + + let max_nodes = 2 * num_symbols - 1; + let mut tree = HuffmanTree { + tree: vec![HuffmanTreeNode::Empty; max_nodes], + max_nodes, + num_nodes: 1, + }; if num_symbols == 1 { - tree.add_symbol(root_symbol, 0, 0)?; + return Ok(Self::build_single_node(root_symbol)); } else { let codes = HuffmanTree::code_lengths_to_codes(&code_lengths)?; @@ -168,19 +177,17 @@ impl HuffmanTree { Ok(tree) } - /// Builds a tree explicitly from lengths, codes and symbols - pub(crate) fn build_explicit( - code_lengths: Vec, - codes: Vec, - symbols: Vec, - ) -> Result { - let mut tree = HuffmanTree::init(symbols.len())?; - - for i in 0..symbols.len() { - tree.add_symbol(symbols[i], codes[i], code_lengths[i])?; - } + pub(crate) fn build_single_node(symbol: u16) -> HuffmanTree { + let mut tree = HuffmanTree::init(1).unwrap(); + tree.add_symbol(symbol, 0, 0).unwrap(); + tree + } - Ok(tree) + pub(crate) fn build_two_node(zero: u16, one: u16) -> HuffmanTree { + let mut tree = HuffmanTree::init(2).unwrap(); + tree.add_symbol(zero, 0, 1).unwrap(); + tree.add_symbol(one, 1, 1).unwrap(); + tree } pub(crate) fn is_single_node(&self) -> bool { diff --git a/src/lossless.rs b/src/lossless.rs index 36b5357..68d7e85 100644 --- a/src/lossless.rs +++ b/src/lossless.rs @@ -358,24 +358,22 @@ impl LosslessDecoder { if simple { let num_symbols = self.bit_reader.read_bits::(1)? + 1; - let mut code_lengths = vec![u16::from(num_symbols - 1)]; - let mut codes = vec![0]; - let mut symbols = Vec::new(); - let is_first_8bits = self.bit_reader.read_bits::(1)?; - symbols.push(self.bit_reader.read_bits::(1 + 7 * is_first_8bits)?); - - if num_symbols == 2 { - symbols.push(self.bit_reader.read_bits::(8)?); - code_lengths.push(1); - codes.push(1); - } + let zero_symbol = self.bit_reader.read_bits::(1 + 7 * is_first_8bits)?; - if symbols.iter().any(|&s| s > alphabet_size) { + if zero_symbol >= alphabet_size { return Err(DecodingError::BitStreamError); } - HuffmanTree::build_explicit(code_lengths, codes, symbols) + if num_symbols == 1 { + Ok(HuffmanTree::build_single_node(zero_symbol)) + } else { + let one_symbol = self.bit_reader.read_bits::(8)?; + if one_symbol >= alphabet_size { + return Err(DecodingError::BitStreamError); + } + Ok(HuffmanTree::build_two_node(zero_symbol, one_symbol)) + } } else { let mut code_length_code_lengths = vec![0; CODE_LENGTH_CODES]; From 79d7d875b0771007b2944a56086ded14cf84c911 Mon Sep 17 00:00:00 2001 From: Jonathan Behrens Date: Tue, 30 Jul 2024 21:19:14 -0700 Subject: [PATCH 3/8] HuffmanTree enum --- src/huffman.rs | 179 +++++++++++++++++++++++++------------------------ 1 file changed, 92 insertions(+), 87 deletions(-) diff --git a/src/huffman.rs b/src/huffman.rs index bb8eecc..31b2b72 100644 --- a/src/huffman.rs +++ b/src/huffman.rs @@ -17,34 +17,75 @@ enum HuffmanTreeNode { Empty, } -/// Huffman tree -#[derive(Clone, Debug, Default)] -pub(crate) struct HuffmanTree { +#[derive(Clone, Debug)] +pub(crate) struct HuffmanTreeInner { tree: Vec, max_nodes: usize, num_nodes: usize, } -impl HuffmanTree { - /// Init a huffman tree - fn init(num_leaves: usize) -> Result { - if num_leaves == 0 { - return Err(DecodingError::HuffmanError); - } +/// Huffman tree +#[derive(Clone, Debug)] +pub(crate) enum HuffmanTree { + Single(u16), + Pair(u16, u16), + Tree(HuffmanTreeInner), +} - let max_nodes = 2 * num_leaves - 1; - let tree = vec![HuffmanTreeNode::Empty; max_nodes]; - let num_nodes = 1; +impl Default for HuffmanTree { + fn default() -> Self { + HuffmanTree::Single(0) + } +} - let tree = HuffmanTree { - tree, - max_nodes, - num_nodes, - }; +impl HuffmanTreeInner { + /// Adds a symbol to a huffman tree + fn add_symbol( + &mut self, + symbol: u16, + code: u16, + code_length: u16, + ) -> Result<(), DecodingError> { + let mut node_index = 0; + let code = usize::from(code); + + for length in (0..code_length).rev() { + if node_index >= self.max_nodes { + return Err(DecodingError::HuffmanError); + } + + let node = self.tree[node_index]; + + let offset = match node { + HuffmanTreeNode::Empty => { + if self.num_nodes == self.max_nodes { + return Err(DecodingError::HuffmanError); + } + + // Turns a node from empty into a branch and assigns its children + let offset_index = self.num_nodes - node_index; + self.tree[node_index] = HuffmanTreeNode::Branch(offset_index); + self.num_nodes += 2; + offset_index + } + HuffmanTreeNode::Leaf(_) => return Err(DecodingError::HuffmanError), + HuffmanTreeNode::Branch(offset) => offset, + }; + + node_index += offset + ((code >> length) & 1); + } + + match self.tree[node_index] { + HuffmanTreeNode::Empty => self.tree[node_index] = HuffmanTreeNode::Leaf(symbol), + HuffmanTreeNode::Leaf(_) => return Err(DecodingError::HuffmanError), + HuffmanTreeNode::Branch(_offset) => return Err(DecodingError::HuffmanError), + } - Ok(tree) + Ok(()) } +} +impl HuffmanTree { /// Converts code lengths to codes fn code_lengths_to_codes(code_lengths: &[u16]) -> Result>, DecodingError> { let max_code_length = *code_lengths @@ -94,51 +135,6 @@ impl HuffmanTree { Ok(huff_codes) } - /// Adds a symbol to a huffman tree - fn add_symbol( - &mut self, - symbol: u16, - code: u16, - code_length: u16, - ) -> Result<(), DecodingError> { - let mut node_index = 0; - let code = usize::from(code); - - for length in (0..code_length).rev() { - if node_index >= self.max_nodes { - return Err(DecodingError::HuffmanError); - } - - let node = self.tree[node_index]; - - let offset = match node { - HuffmanTreeNode::Empty => { - if self.num_nodes == self.max_nodes { - return Err(DecodingError::HuffmanError); - } - - // Turns a node from empty into a branch and assigns its children - let offset_index = self.num_nodes - node_index; - self.tree[node_index] = HuffmanTreeNode::Branch(offset_index); - self.num_nodes += 2; - offset_index - } - HuffmanTreeNode::Leaf(_) => return Err(DecodingError::HuffmanError), - HuffmanTreeNode::Branch(offset) => offset, - }; - - node_index += offset + ((code >> length) & 1); - } - - match self.tree[node_index] { - HuffmanTreeNode::Empty => self.tree[node_index] = HuffmanTreeNode::Leaf(symbol), - HuffmanTreeNode::Leaf(_) => return Err(DecodingError::HuffmanError), - HuffmanTreeNode::Branch(_offset) => return Err(DecodingError::HuffmanError), - } - - Ok(()) - } - /// Builds a tree implicitly, just from code lengths pub(crate) fn build_implicit(code_lengths: Vec) -> Result { let mut num_symbols = 0; @@ -156,7 +152,7 @@ impl HuffmanTree { } let max_nodes = 2 * num_symbols - 1; - let mut tree = HuffmanTree { + let mut tree = HuffmanTreeInner { tree: vec![HuffmanTreeNode::Empty; max_nodes], max_nodes, num_nodes: 1, @@ -174,24 +170,19 @@ impl HuffmanTree { } } - Ok(tree) + Ok(HuffmanTree::Tree(tree)) } pub(crate) fn build_single_node(symbol: u16) -> HuffmanTree { - let mut tree = HuffmanTree::init(1).unwrap(); - tree.add_symbol(symbol, 0, 0).unwrap(); - tree + HuffmanTree::Single(symbol) } pub(crate) fn build_two_node(zero: u16, one: u16) -> HuffmanTree { - let mut tree = HuffmanTree::init(2).unwrap(); - tree.add_symbol(zero, 0, 1).unwrap(); - tree.add_symbol(one, 1, 1).unwrap(); - tree + HuffmanTree::Pair(zero, one) } pub(crate) fn is_single_node(&self) -> bool { - self.num_nodes == 1 + matches!(self, HuffmanTree::Single(_)) } /// Reads a symbol using the bitstream. @@ -202,22 +193,36 @@ impl HuffmanTree { &self, bit_reader: &mut BitReader, ) -> Result { - let mut v = bit_reader.peek(15) as usize; - let mut depth = 0; - - let mut index = 0; - loop { - match &self.tree[index] { - HuffmanTreeNode::Branch(children_offset) => { - index += children_offset + (v & 1); - depth += 1; - v >>= 1; + match self { + HuffmanTree::Single(symbol) => Ok(*symbol), + HuffmanTree::Pair(zero, one) => { + let v = bit_reader.peek(1); + bit_reader.consume(1)?; + if v == 0 { + Ok(*zero) + } else { + Ok(*one) } - HuffmanTreeNode::Leaf(symbol) => { - bit_reader.consume(depth)?; - return Ok(*symbol); + } + HuffmanTree::Tree(inner) => { + let mut v = bit_reader.peek(15) as usize; + let mut depth = 0; + + let mut index = 0; + loop { + match &inner.tree[index] { + HuffmanTreeNode::Branch(children_offset) => { + index += children_offset + (v & 1); + depth += 1; + v >>= 1; + } + HuffmanTreeNode::Leaf(symbol) => { + bit_reader.consume(depth)?; + return Ok(*symbol); + } + HuffmanTreeNode::Empty => return Err(DecodingError::HuffmanError), + } } - HuffmanTreeNode::Empty => return Err(DecodingError::HuffmanError), } } } From 59103f270988f971f66f4f35620f8211e33c5aeb Mon Sep 17 00:00:00 2001 From: Jonathan Behrens Date: Tue, 30 Jul 2024 23:17:01 -0700 Subject: [PATCH 4/8] Table-based huffman decoding --- src/huffman.rs | 135 +++++++++++++++++++++++++++---------------------- 1 file changed, 74 insertions(+), 61 deletions(-) diff --git a/src/huffman.rs b/src/huffman.rs index 31b2b72..31760f6 100644 --- a/src/huffman.rs +++ b/src/huffman.rs @@ -20,15 +20,16 @@ enum HuffmanTreeNode { #[derive(Clone, Debug)] pub(crate) struct HuffmanTreeInner { tree: Vec, - max_nodes: usize, num_nodes: usize, + + table: Vec, + table_mask: u16, } /// Huffman tree #[derive(Clone, Debug)] pub(crate) enum HuffmanTree { Single(u16), - Pair(u16, u16), Tree(HuffmanTreeInner), } @@ -50,18 +51,10 @@ impl HuffmanTreeInner { let code = usize::from(code); for length in (0..code_length).rev() { - if node_index >= self.max_nodes { - return Err(DecodingError::HuffmanError); - } - let node = self.tree[node_index]; let offset = match node { HuffmanTreeNode::Empty => { - if self.num_nodes == self.max_nodes { - return Err(DecodingError::HuffmanError); - } - // Turns a node from empty into a branch and assigns its children let offset_index = self.num_nodes - node_index; self.tree[node_index] = HuffmanTreeNode::Branch(offset_index); @@ -86,8 +79,24 @@ impl HuffmanTreeInner { } impl HuffmanTree { - /// Converts code lengths to codes - fn code_lengths_to_codes(code_lengths: &[u16]) -> Result>, DecodingError> { + /// Builds a tree implicitly, just from code lengths + pub(crate) fn build_implicit(code_lengths: Vec) -> Result { + let mut num_symbols = 0; + let mut root_symbol = 0; + + for (symbol, length) in code_lengths.iter().enumerate() { + if *length > 0 { + num_symbols += 1; + root_symbol = symbol.try_into().unwrap(); + } + } + + if num_symbols == 0 { + return Err(DecodingError::HuffmanError); + } else if num_symbols == 1 { + return Ok(Self::build_single_node(root_symbol)); + }; + let max_code_length = *code_lengths .iter() .reduce(|a, b| if a >= b { a } else { b }) @@ -114,63 +123,60 @@ impl HuffmanTree { // Assign codes let mut curr_code = 0; - let mut next_codes = [None; MAX_ALLOWED_CODE_LENGTH + 1]; + let mut next_codes = [0; MAX_ALLOWED_CODE_LENGTH + 1]; for code_len in 1..=usize::from(max_code_length) { curr_code = (curr_code + code_length_hist[code_len - 1]) << 1; - next_codes[code_len] = Some(curr_code); + next_codes[code_len] = curr_code; } - let mut huff_codes = vec![None; code_lengths.len()]; + let mut huff_codes = vec![0u16; code_lengths.len()]; for (symbol, &length) in code_lengths.iter().enumerate() { let length = usize::from(length); if length > 0 { huff_codes[symbol] = next_codes[length]; - if let Some(value) = next_codes[length].as_mut() { - *value += 1; - } - } else { - huff_codes[symbol] = None; + next_codes[length] += 1; } } - Ok(huff_codes) - } - - /// Builds a tree implicitly, just from code lengths - pub(crate) fn build_implicit(code_lengths: Vec) -> Result { - let mut num_symbols = 0; - let mut root_symbol = 0; - - for (symbol, length) in code_lengths.iter().enumerate() { - if *length > 0 { - num_symbols += 1; - root_symbol = symbol.try_into().unwrap(); + // Populate decoding table + let table_bits = max_code_length.min(10); + let table_size = (1 << table_bits) as usize; + let table_mask = table_size as u16 - 1; + let mut table = vec![0; table_size]; + for (symbol, (&code, &length)) in huff_codes.iter().zip(code_lengths.iter()).enumerate() { + if length != 0 && length <= table_bits { + let mut j = ((code as u16).reverse_bits() >> (16 - length)) as usize; + let entry = ((length as u32) << 16) | symbol as u32; + while j < table_size { + table[j] = entry; + j += 1 << length as usize; + } } } - if num_symbols == 0 { - return Err(DecodingError::HuffmanError); - } - - let max_nodes = 2 * num_symbols - 1; - let mut tree = HuffmanTreeInner { - tree: vec![HuffmanTreeNode::Empty; max_nodes], - max_nodes, - num_nodes: 1, - }; - - if num_symbols == 1 { - return Ok(Self::build_single_node(root_symbol)); - } else { - let codes = HuffmanTree::code_lengths_to_codes(&code_lengths)?; + // If the longest code is larger than the table size, build a tree as a fallback. + if max_code_length > table_bits { + let max_nodes = 2 * num_symbols - 1; + let mut tree = HuffmanTreeInner { + tree: vec![HuffmanTreeNode::Empty; max_nodes], + num_nodes: 1, + table, + table_mask, + }; for (symbol, &length) in code_lengths.iter().enumerate() { - if length > 0 && codes[symbol].is_some() { - tree.add_symbol(symbol.try_into().unwrap(), codes[symbol].unwrap(), length)?; + if length > 0 { + tree.add_symbol(symbol.try_into().unwrap(), huff_codes[symbol], length)?; } } + Ok(HuffmanTree::Tree(tree)) + } else { + Ok(HuffmanTree::Tree(HuffmanTreeInner { + tree: Vec::new(), + num_nodes: 1, + table, + table_mask, + })) } - - Ok(HuffmanTree::Tree(tree)) } pub(crate) fn build_single_node(symbol: u16) -> HuffmanTree { @@ -178,7 +184,17 @@ impl HuffmanTree { } pub(crate) fn build_two_node(zero: u16, one: u16) -> HuffmanTree { - HuffmanTree::Pair(zero, one) + // HuffmanTree::Pair(zero, one) + HuffmanTree::Tree(HuffmanTreeInner { + tree: vec![ + HuffmanTreeNode::Leaf(zero), + HuffmanTreeNode::Leaf(one), + HuffmanTreeNode::Empty, + ], + num_nodes: 3, + table: vec![1 << 16 | zero as u32, 1 << 16 | one as u32], + table_mask: 0x1, + }) } pub(crate) fn is_single_node(&self) -> bool { @@ -195,19 +211,16 @@ impl HuffmanTree { ) -> Result { match self { HuffmanTree::Single(symbol) => Ok(*symbol), - HuffmanTree::Pair(zero, one) => { - let v = bit_reader.peek(1); - bit_reader.consume(1)?; - if v == 0 { - Ok(*zero) - } else { - Ok(*one) - } - } HuffmanTree::Tree(inner) => { let mut v = bit_reader.peek(15) as usize; let mut depth = 0; + let entry = inner.table[v & inner.table_mask as usize]; + if entry != 0 { + bit_reader.consume((entry >> 16) as u8)?; + return Ok(entry as u16); + } + let mut index = 0; loop { match &inner.tree[index] { From 16416397b2f342e1b6bb88abda5ea0d44f4c470b Mon Sep 17 00:00:00 2001 From: Jonathan Behrens Date: Wed, 31 Jul 2024 21:32:16 -0700 Subject: [PATCH 5/8] Separate out slowpath to a different function --- src/huffman.rs | 48 ++++++++++++++++++++++++++++-------------------- src/lossless.rs | 5 +++++ 2 files changed, 33 insertions(+), 20 deletions(-) diff --git a/src/huffman.rs b/src/huffman.rs index 31760f6..546498d 100644 --- a/src/huffman.rs +++ b/src/huffman.rs @@ -201,6 +201,30 @@ impl HuffmanTree { matches!(self, HuffmanTree::Single(_)) } + #[inline(never)] + fn read_symbol_slowpath( + inner: &HuffmanTreeInner, + mut v: usize, + bit_reader: &mut BitReader, + ) -> Result { + let mut depth = 0; + let mut index = 0; + loop { + match &inner.tree[index] { + HuffmanTreeNode::Branch(children_offset) => { + index += children_offset + (v & 1); + depth += 1; + v >>= 1; + } + HuffmanTreeNode::Leaf(symbol) => { + bit_reader.consume(depth)?; + return Ok(*symbol); + } + HuffmanTreeNode::Empty => return Err(DecodingError::HuffmanError), + } + } + } + /// Reads a symbol using the bitstream. /// /// You must call call `bit_reader.fill()` before calling this function or it may erroroneosly @@ -210,33 +234,17 @@ impl HuffmanTree { bit_reader: &mut BitReader, ) -> Result { match self { - HuffmanTree::Single(symbol) => Ok(*symbol), HuffmanTree::Tree(inner) => { - let mut v = bit_reader.peek(15) as usize; - let mut depth = 0; - - let entry = inner.table[v & inner.table_mask as usize]; + let v = bit_reader.peek_full() as u16; + let entry = inner.table[(v & inner.table_mask) as usize]; if entry != 0 { bit_reader.consume((entry >> 16) as u8)?; return Ok(entry as u16); } - let mut index = 0; - loop { - match &inner.tree[index] { - HuffmanTreeNode::Branch(children_offset) => { - index += children_offset + (v & 1); - depth += 1; - v >>= 1; - } - HuffmanTreeNode::Leaf(symbol) => { - bit_reader.consume(depth)?; - return Ok(*symbol); - } - HuffmanTreeNode::Empty => return Err(DecodingError::HuffmanError), - } - } + Self::read_symbol_slowpath(inner, v as usize, bit_reader) } + HuffmanTree::Single(symbol) => Ok(*symbol), } } } diff --git a/src/lossless.rs b/src/lossless.rs index 68d7e85..1503943 100644 --- a/src/lossless.rs +++ b/src/lossless.rs @@ -749,6 +749,11 @@ impl BitReader { self.buffer & ((1 << num) - 1) } + /// Peeks at the full buffer. + pub(crate) fn peek_full(&self) -> u64 { + self.buffer + } + /// Consumes `num` bits from the buffer returning an error if there are not enough bits. pub(crate) fn consume(&mut self, num: u8) -> Result<(), DecodingError> { if self.nbits < num { From aa4270e94f719c03dcb0ac4045d29ab1de4ffc0c Mon Sep 17 00:00:00 2001 From: Jonathan Behrens Date: Thu, 1 Aug 2024 21:20:48 -0700 Subject: [PATCH 6/8] More refactoring of HuffmanTree --- src/huffman.rs | 131 +++++++++++++++++++++---------------------------- 1 file changed, 57 insertions(+), 74 deletions(-) diff --git a/src/huffman.rs b/src/huffman.rs index 546498d..6864746 100644 --- a/src/huffman.rs +++ b/src/huffman.rs @@ -17,20 +17,15 @@ enum HuffmanTreeNode { Empty, } -#[derive(Clone, Debug)] -pub(crate) struct HuffmanTreeInner { - tree: Vec, - num_nodes: usize, - - table: Vec, - table_mask: u16, -} - /// Huffman tree #[derive(Clone, Debug)] pub(crate) enum HuffmanTree { Single(u16), - Tree(HuffmanTreeInner), + Tree { + tree: Vec, + table: Vec, + table_mask: u16, + }, } impl Default for HuffmanTree { @@ -39,45 +34,6 @@ impl Default for HuffmanTree { } } -impl HuffmanTreeInner { - /// Adds a symbol to a huffman tree - fn add_symbol( - &mut self, - symbol: u16, - code: u16, - code_length: u16, - ) -> Result<(), DecodingError> { - let mut node_index = 0; - let code = usize::from(code); - - for length in (0..code_length).rev() { - let node = self.tree[node_index]; - - let offset = match node { - HuffmanTreeNode::Empty => { - // Turns a node from empty into a branch and assigns its children - let offset_index = self.num_nodes - node_index; - self.tree[node_index] = HuffmanTreeNode::Branch(offset_index); - self.num_nodes += 2; - offset_index - } - HuffmanTreeNode::Leaf(_) => return Err(DecodingError::HuffmanError), - HuffmanTreeNode::Branch(offset) => offset, - }; - - node_index += offset + ((code >> length) & 1); - } - - match self.tree[node_index] { - HuffmanTreeNode::Empty => self.tree[node_index] = HuffmanTreeNode::Leaf(symbol), - HuffmanTreeNode::Leaf(_) => return Err(DecodingError::HuffmanError), - HuffmanTreeNode::Branch(_offset) => return Err(DecodingError::HuffmanError), - } - - Ok(()) - } -} - impl HuffmanTree { /// Builds a tree implicitly, just from code lengths pub(crate) fn build_implicit(code_lengths: Vec) -> Result { @@ -154,29 +110,54 @@ impl HuffmanTree { } // If the longest code is larger than the table size, build a tree as a fallback. + let mut tree = Vec::new(); if max_code_length > table_bits { - let max_nodes = 2 * num_symbols - 1; - let mut tree = HuffmanTreeInner { - tree: vec![HuffmanTreeNode::Empty; max_nodes], - num_nodes: 1, - table, - table_mask, - }; + tree = vec![HuffmanTreeNode::Empty; 2 * num_symbols - 1]; + let mut num_nodes = 1; for (symbol, &length) in code_lengths.iter().enumerate() { + let code = huff_codes[symbol]; + let code_length = length; + let symbol = symbol.try_into().unwrap(); + if length > 0 { - tree.add_symbol(symbol.try_into().unwrap(), huff_codes[symbol], length)?; + let mut node_index = 0; + let code = usize::from(code); + + for length in (0..code_length).rev() { + let node = tree[node_index]; + + let offset = match node { + HuffmanTreeNode::Empty => { + // Turns a node from empty into a branch and assigns its children + let offset_index = num_nodes - node_index; + tree[node_index] = HuffmanTreeNode::Branch(offset_index); + num_nodes += 2; + offset_index + } + HuffmanTreeNode::Leaf(_) => return Err(DecodingError::HuffmanError), + HuffmanTreeNode::Branch(offset) => offset, + }; + + node_index += offset + ((code >> length) & 1); + } + + match tree[node_index] { + HuffmanTreeNode::Empty => tree[node_index] = HuffmanTreeNode::Leaf(symbol), + HuffmanTreeNode::Leaf(_) => return Err(DecodingError::HuffmanError), + HuffmanTreeNode::Branch(_offset) => { + return Err(DecodingError::HuffmanError) + } + } } } - Ok(HuffmanTree::Tree(tree)) - } else { - Ok(HuffmanTree::Tree(HuffmanTreeInner { - tree: Vec::new(), - num_nodes: 1, - table, - table_mask, - })) } + + Ok(HuffmanTree::Tree { + tree, + table, + table_mask, + }) } pub(crate) fn build_single_node(symbol: u16) -> HuffmanTree { @@ -184,17 +165,15 @@ impl HuffmanTree { } pub(crate) fn build_two_node(zero: u16, one: u16) -> HuffmanTree { - // HuffmanTree::Pair(zero, one) - HuffmanTree::Tree(HuffmanTreeInner { + HuffmanTree::Tree { tree: vec![ HuffmanTreeNode::Leaf(zero), HuffmanTreeNode::Leaf(one), HuffmanTreeNode::Empty, ], - num_nodes: 3, table: vec![1 << 16 | zero as u32, 1 << 16 | one as u32], table_mask: 0x1, - }) + } } pub(crate) fn is_single_node(&self) -> bool { @@ -203,14 +182,14 @@ impl HuffmanTree { #[inline(never)] fn read_symbol_slowpath( - inner: &HuffmanTreeInner, + tree: &[HuffmanTreeNode], mut v: usize, bit_reader: &mut BitReader, ) -> Result { let mut depth = 0; let mut index = 0; loop { - match &inner.tree[index] { + match &tree[index] { HuffmanTreeNode::Branch(children_offset) => { index += children_offset + (v & 1); depth += 1; @@ -234,15 +213,19 @@ impl HuffmanTree { bit_reader: &mut BitReader, ) -> Result { match self { - HuffmanTree::Tree(inner) => { + HuffmanTree::Tree { + tree, + table, + table_mask, + } => { let v = bit_reader.peek_full() as u16; - let entry = inner.table[(v & inner.table_mask) as usize]; + let entry = table[(v & table_mask) as usize]; if entry != 0 { bit_reader.consume((entry >> 16) as u8)?; return Ok(entry as u16); } - Self::read_symbol_slowpath(inner, v as usize, bit_reader) + Self::read_symbol_slowpath(tree, v as usize, bit_reader) } HuffmanTree::Single(symbol) => Ok(*symbol), } From caccd026f74d644497c586d13fe7b81e57036859 Mon Sep 17 00:00:00 2001 From: Jonathan Behrens Date: Thu, 1 Aug 2024 21:48:12 -0700 Subject: [PATCH 7/8] Fix visibility of HuffmanTree --- src/huffman.rs | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/src/huffman.rs b/src/huffman.rs index 6864746..bbf9f6b 100644 --- a/src/huffman.rs +++ b/src/huffman.rs @@ -17,9 +17,8 @@ enum HuffmanTreeNode { Empty, } -/// Huffman tree #[derive(Clone, Debug)] -pub(crate) enum HuffmanTree { +enum HuffmanTreeInner { Single(u16), Tree { tree: Vec, @@ -28,9 +27,13 @@ pub(crate) enum HuffmanTree { }, } +/// Huffman tree +#[derive(Clone, Debug)] +pub(crate) struct HuffmanTree(HuffmanTreeInner); + impl Default for HuffmanTree { fn default() -> Self { - HuffmanTree::Single(0) + Self(HuffmanTreeInner::Single(0)) } } @@ -153,19 +156,19 @@ impl HuffmanTree { } } - Ok(HuffmanTree::Tree { + Ok(Self(HuffmanTreeInner::Tree { tree, table, table_mask, - }) + })) } pub(crate) fn build_single_node(symbol: u16) -> HuffmanTree { - HuffmanTree::Single(symbol) + Self(HuffmanTreeInner::Single(symbol)) } pub(crate) fn build_two_node(zero: u16, one: u16) -> HuffmanTree { - HuffmanTree::Tree { + Self(HuffmanTreeInner::Tree { tree: vec![ HuffmanTreeNode::Leaf(zero), HuffmanTreeNode::Leaf(one), @@ -173,11 +176,11 @@ impl HuffmanTree { ], table: vec![1 << 16 | zero as u32, 1 << 16 | one as u32], table_mask: 0x1, - } + }) } pub(crate) fn is_single_node(&self) -> bool { - matches!(self, HuffmanTree::Single(_)) + matches!(self.0, HuffmanTreeInner::Single(_)) } #[inline(never)] @@ -212,8 +215,8 @@ impl HuffmanTree { &self, bit_reader: &mut BitReader, ) -> Result { - match self { - HuffmanTree::Tree { + match &self.0 { + HuffmanTreeInner::Tree { tree, table, table_mask, @@ -227,7 +230,7 @@ impl HuffmanTree { Self::read_symbol_slowpath(tree, v as usize, bit_reader) } - HuffmanTree::Single(symbol) => Ok(*symbol), + HuffmanTreeInner::Single(symbol) => Ok(*symbol), } } } From 22f1a9ced23ed2048dc4e5044b99200ad318af43 Mon Sep 17 00:00:00 2001 From: Jonathan Behrens Date: Fri, 2 Aug 2024 18:17:46 -0700 Subject: [PATCH 8/8] Clippy fix --- src/huffman.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/huffman.rs b/src/huffman.rs index bbf9f6b..72b04c1 100644 --- a/src/huffman.rs +++ b/src/huffman.rs @@ -103,7 +103,7 @@ impl HuffmanTree { let mut table = vec![0; table_size]; for (symbol, (&code, &length)) in huff_codes.iter().zip(code_lengths.iter()).enumerate() { if length != 0 && length <= table_bits { - let mut j = ((code as u16).reverse_bits() >> (16 - length)) as usize; + let mut j = (u16::reverse_bits(code) >> (16 - length)) as usize; let entry = ((length as u32) << 16) | symbol as u32; while j < table_size { table[j] = entry;