diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index fd0d562e..7b160ed7 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -68,22 +68,6 @@ jobs: toolchain: ${{ matrix.toolchain }} - run: RUST_LOG=info cargo test --release ${{ matrix.features }} --locked --features __test_unsquashfs - - # benchmark - benchmark: - runs-on: ubuntu-latest - strategy: - matrix: - toolchain: - - stable - - steps: - - uses: actions/checkout@v3 - - uses: dtolnay/rust-toolchain@master - with: - toolchain: ${{ matrix.toolchain }} - - run: cargo bench - # fmt and clippy on stable fmt-clippy-stable: runs-on: ubuntu-latest diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml new file mode 100644 index 00000000..d6ecd332 --- /dev/null +++ b/.github/workflows/pull_request.yml @@ -0,0 +1,16 @@ +on: [pull_request] +name: CI Pull Request +jobs: + runBenchmark: + name: Benchmark + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + - uses: dtolnay/rust-toolchain@stable + - uses: boa-dev/criterion-compare-action@v3 + with: + benchName: "benchmark" + branchName: ${{ github.base_ref }} + token: ${{ secrets.GITHUB_TOKEN }} diff --git a/Cargo.lock b/Cargo.lock index 8f3da3bf..3909b880 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,16 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "acid_io" +version = "0.1.0" +source = "git+https://github.com/dataphract/acid_io.git#2d549317fe9253df8b510ba6bbdcfe623a837286" +dependencies = [ + "byteorder", + "libc", + "memchr", +] + [[package]] name = "adler" version = "1.0.2" @@ -116,6 +126,7 @@ dependencies = [ "indicatif", "jemallocator", "libc", + "log", "nix", "rayon", "rust-lzo", @@ -198,6 +209,12 @@ dependencies = [ "utf8-width", ] +[[package]] +name = "byteorder" +version = "1.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" + [[package]] name = "cast" version = "0.3.0" @@ -466,9 +483,9 @@ dependencies = [ [[package]] name = "deku" version = "0.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "819b87cc7a05b3abe3fc38e59b3980a5fd3162f25a247116441a9171d3e84481" +source = "git+https://github.com/sharksforarms/deku?branch=impl-reader#1565ab639f9376663d3a7a7b1c7ee15181b6408d" dependencies = [ + "acid_io", "bitvec", "deku_derive", ] @@ -476,8 +493,7 @@ dependencies = [ [[package]] name = "deku_derive" version = "0.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e2ca12572239215a352a74ad7c776d7e8a914f8a23511c6cbedddd887e5009e" +source = "git+https://github.com/sharksforarms/deku?branch=impl-reader#1565ab639f9376663d3a7a7b1c7ee15181b6408d" dependencies = [ "darling", "proc-macro-crate", diff --git a/Cargo.toml b/Cargo.toml index 75a25096..4291cf7e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,7 @@ categories = ["filesystem", "parsing"] [dependencies] # for lib -deku = "0.16.0" +deku = { git = "https://github.com/sharksforarms/deku", branch = "impl-reader" } tracing = "0.1.37" thiserror = "1.0.37" flate2 = { version = "1.0.24", optional = true } @@ -31,6 +31,7 @@ clap_complete = "4.2.1" indicatif = "0.17.5" console = "0.15.7" rayon = "1.7.0" +log = "0.4.19" [features] default = ["xz", "gzip", "zstd"] diff --git a/src/compressor.rs b/src/compressor.rs index 27fbf68c..4a4b5b61 100644 --- a/src/compressor.rs +++ b/src/compressor.rs @@ -87,9 +87,9 @@ pub struct Xz { // TODO: in openwrt, git-hash:f97ad870e11ebe5f3dcf833dda6c83b9165b37cb shows that before // offical squashfs-tools had xz support they had the dictionary_size field as the last field // in this struct. If we get test images, I guess we can support this in the future. - #[deku(cond = "!deku::rest.is_empty()")] + #[deku(cond = "!deku::container.end()")] pub bit_opts: Option, - #[deku(cond = "!deku::rest.is_empty()")] + #[deku(cond = "!deku::container.end()")] pub fb: Option, } diff --git a/src/reader.rs b/src/reader.rs index 9576d2dc..1cff42b1 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -1,9 +1,9 @@ //! Reader traits use std::collections::HashMap; -use std::io::{BufRead, Read, Seek, SeekFrom, Write}; +use std::io::{BufRead, Cursor, Read, Seek, SeekFrom, Write}; -use deku::bitvec::{BitView, Msb0}; +use deku::bitvec::BitView; use deku::prelude::*; use rustc_hash::FxHashMap; use tracing::{error, instrument, trace}; @@ -103,25 +103,28 @@ pub trait SquashFsReader: BufReadSeek { // Using this size, a SquashFS reader can determine if another header with further entries // should be following once it reaches the end of a run. - let mut ret_bytes = Vec::with_capacity(METADATA_MAXSIZE); + let mut next = vec![]; let mut metadata_offsets = vec![]; let mut ret_vec = HashMap::default(); let start = self.stream_position()?; while self.stream_position()? < superblock.dir_table { - trace!("offset: {:02x?}", self.stream_position()); metadata_offsets.push(self.stream_position()? - start); // parse into metadata let mut bytes = metadata::read_block(self, superblock, kind)?; // parse as many inodes as you can - ret_bytes.append(&mut bytes); - - let mut input_bits = ret_bytes.view_bits::(); - while !input_bits.is_empty() { - match Inode::read( - input_bits, + let mut inode_bytes = next; + inode_bytes.append(&mut bytes); + let mut c_inode_bytes = Cursor::new(inode_bytes.clone()); + let mut container = Container::new(&mut c_inode_bytes); + + // store last successful read position + let mut container_bits_read = container.bits_read; + loop { + match Inode::from_reader( + &mut container, ( superblock.bytes_used, superblock.block_size, @@ -129,21 +132,22 @@ pub trait SquashFsReader: BufReadSeek { kind.inner.type_endian, ), ) { - Ok((rest, inode)) => { + Ok(inode) => { // Push the new Inode to the return, with the position this was read from ret_vec.insert(inode.header.inode_number, inode); - input_bits = rest; + container_bits_read = container.bits_read; } - Err(_) => { - // try next block, inodes can span multiple blocks! - break; + Err(e) => { + if matches!(e, DekuError::Incomplete(_)) { + // try next block, inodes can span multiple blocks! + next = inode_bytes.clone()[(container_bits_read / 8)..].to_vec(); + break; + } else { + panic!("{:?}", e); + } } } } - - // save leftover bits to new bits to leave for the next metadata block - // this is safe, input_bits is always byte aligned - ret_bytes.drain(..(ret_bytes.len() - (input_bits.len() / 8))); } Ok(ret_vec) @@ -171,10 +175,10 @@ pub trait SquashFsReader: BufReadSeek { error!("root_inode_offset > bytes.len()"); return Err(BackhandError::CorruptedOrInvalidSquashfs); } - let new_bytes = &bytes_01[root_inode_offset..]; - let input_bits = new_bytes.view_bits::<::deku::bitvec::Msb0>(); - if let Ok((_, inode)) = Inode::read( - input_bits, + let mut cursor = Cursor::new(&bytes_01[root_inode_offset..]); + let mut new_bytes = Container::new(&mut cursor); + if let Ok(inode) = Inode::from_reader( + &mut new_bytes, ( superblock.bytes_used, superblock.block_size, @@ -192,11 +196,11 @@ pub trait SquashFsReader: BufReadSeek { error!("root_inode_offset > bytes.len()"); return Err(BackhandError::CorruptedOrInvalidSquashfs); } - let new_bytes = &bytes_01[root_inode_offset..]; - let input_bits = new_bytes.view_bits::<::deku::bitvec::Msb0>(); - match Inode::read( - input_bits, + let mut cursor = Cursor::new(&bytes_01[root_inode_offset..]); + let mut new_bytes = Container::new(&mut cursor); + match Inode::from_reader( + &mut new_bytes, ( superblock.bytes_used, superblock.block_size, @@ -204,7 +208,7 @@ pub trait SquashFsReader: BufReadSeek { kind.inner.type_endian, ), ) { - Ok((_, inode)) => Ok(inode), + Ok(inode) => Ok(inode), Err(e) => Err(e.into()), } } @@ -281,13 +285,16 @@ pub trait SquashFsReader: BufReadSeek { /// Parse Lookup Table #[instrument(skip_all)] - fn lookup_table DekuRead<'a, deku::ctx::Endian>>( + fn lookup_table( &mut self, superblock: &SuperBlock, seek: u64, size: u64, kind: &Kind, - ) -> Result<(u64, Vec), BackhandError> { + ) -> Result<(u64, Vec), BackhandError> + where + T: for<'a> DekuReader<'a, deku::ctx::Endian>, + { // find the pointer at the initial offset trace!("seek: {:02x?}", seek); self.seek(SeekFrom::Start(seek))?; @@ -309,13 +316,16 @@ pub trait SquashFsReader: BufReadSeek { /// Parse count of `Metadata` block at offset into `T` #[instrument(skip_all)] - fn metadata_with_count DekuRead<'a, deku::ctx::Endian>>( + fn metadata_with_count( &mut self, superblock: &SuperBlock, seek: u64, count: u64, kind: &Kind, - ) -> Result, BackhandError> { + ) -> Result, BackhandError> + where + T: for<'a> DekuReader<'a, deku::ctx::Endian>, + { trace!("seek: {:02x?}", seek); self.seek(SeekFrom::Start(seek))?; @@ -326,11 +336,11 @@ pub trait SquashFsReader: BufReadSeek { } let mut ret_vec = vec![]; - let mut all_bytes = all_bytes.view_bits::(); // Read until we fail to turn bytes into `T` - while let Ok((rest, t)) = T::read(all_bytes, kind.inner.type_endian) { + let mut cursor = Cursor::new(all_bytes); + let mut container = Container::new(&mut cursor); + while let Ok(t) = T::from_reader(&mut container, kind.inner.type_endian) { ret_vec.push(t); - all_bytes = rest; } Ok(ret_vec) diff --git a/src/squashfs.rs b/src/squashfs.rs index fd82921b..55823e91 100644 --- a/src/squashfs.rs +++ b/src/squashfs.rs @@ -1,13 +1,12 @@ //! Read from on-disk image use std::ffi::OsString; -use std::io::{Seek, SeekFrom}; +use std::io::{Cursor, Seek, SeekFrom}; use std::os::unix::prelude::OsStringExt; use std::path::PathBuf; use std::sync::Arc; use std::sync::Mutex; -use deku::bitvec::{BitVec, BitView, Msb0}; use deku::prelude::*; use rustc_hash::FxHashMap; use tracing::{error, info, instrument, trace}; @@ -220,19 +219,15 @@ impl Squashfs { /// Read Superblock and Compression Options at current `reader` offset without parsing inodes /// and dirs /// - /// Used for unsquashfs --stat + /// Used for unsquashfs (extraction and --stat) pub fn superblock_and_compression_options( reader: &mut Box, kind: &Kind, ) -> Result<(SuperBlock, Option), BackhandError> { - // Size of metadata + optional compression options metadata block - let mut superblock = [0u8; 96]; - reader.read_exact(&mut superblock)?; - // Parse SuperBlock - let bs = superblock.view_bits::(); - let (_, superblock) = SuperBlock::read( - bs, + let mut container = Container::new(reader); + let superblock = SuperBlock::from_reader( + &mut container, ( kind.inner.magic, kind.inner.version_major, @@ -260,16 +255,18 @@ impl Squashfs { { let bytes = metadata::read_block(reader, &superblock, kind)?; // data -> compression options - let bv = BitVec::from_slice(&bytes); - match CompressionOptions::read(&bv, (kind.inner.type_endian, superblock.compressor)) { + match CompressionOptions::from_reader( + &mut Container::new(&mut Cursor::new(bytes)), + (kind.inner.type_endian, superblock.compressor), + ) { Ok(co) => { - if !co.0.is_empty() { - error!("invalid compression options, bytes left over, using"); - } - Some(co.1) + //if !co.0.is_empty() { + // error!("invalid compression options, bytes left over, using"); + //} + Some(co) } Err(e) => { - error!("invalid compression options: {e:?}[{bytes:02x?}], not using"); + error!("invalid compression options: {e:?}, not using"); None } } @@ -363,7 +360,7 @@ impl Squashfs { } // Read all fields from filesystem to make a Squashfs - info!("Reading Inodes"); + info!("Reading Inodes @ {:02x?}", superblock.inode_table); let inodes = reader.inodes(&superblock, &kind)?; info!("Reading Root Inode"); @@ -481,11 +478,11 @@ impl Squashfs { let bytes = &block[block_offset..][..file_size as usize - 3]; let mut dirs = vec![]; - let mut all_bytes = bytes.view_bits::(); // Read until we fail to turn bytes into `T` - while let Ok((rest, t)) = Dir::read(all_bytes, self.kind.inner.type_endian) { + let mut cursor = Cursor::new(bytes); + let mut container = Container::new(&mut cursor); + while let Ok(t) = Dir::from_reader(&mut container, self.kind.inner.type_endian) { dirs.push(t); - all_bytes = rest; } trace!("finish");