Skip to content

Commit 24556b1

Browse files
authored
Merge pull request #229 from miam-miam100/main
Add support for zstd dictionaries
2 parents 0e2d166 + 2bd88a7 commit 24556b1

File tree

18 files changed

+521
-30
lines changed

18 files changed

+521
-30
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0),
66

77
## Unreleased
88

9+
- Add `Zstd{Encoder,Decoder}::with_dict()` constructors.
910
- Add `zstdmt` crate feature that enables `zstd-safe/zstdmt`, allowing multi-threaded functionality to work as expected.
1011

1112
## 0.4.0 - 2023-05-10

Cargo.toml

+4
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,10 @@ required-features = ["zlib"]
8686
name = "zstd"
8787
required-features = ["zstd"]
8888

89+
[[test]]
90+
name = "zstd-dict"
91+
required-features = ["zstd", "tokio"]
92+
8993
[[example]]
9094
name = "zlib_tokio_write"
9195
required-features = ["zlib", "tokio"]

src/codec/zstd/decoder.rs

+8
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use std::io;
12
use std::io::Result;
23

34
use crate::{codec::Decode, unshared::Unshared, util::PartialBuffer};
@@ -14,6 +15,13 @@ impl ZstdDecoder {
1415
decoder: Unshared::new(Decoder::new().unwrap()),
1516
}
1617
}
18+
19+
pub(crate) fn new_with_dict(dictionary: &[u8]) -> io::Result<Self> {
20+
let mut decoder = Decoder::with_dictionary(dictionary)?;
21+
Ok(Self {
22+
decoder: Unshared::new(decoder),
23+
})
24+
}
1725
}
1826

1927
impl Decode for ZstdDecoder {

src/codec/zstd/encoder.rs

+8
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use crate::{codec::Encode, unshared::Unshared, util::PartialBuffer};
22
use libzstd::stream::raw::{CParameter, Encoder, Operation};
3+
use std::io;
34
use std::io::Result;
45

56
#[derive(Debug)]
@@ -23,6 +24,13 @@ impl ZstdEncoder {
2324
encoder: Unshared::new(encoder),
2425
}
2526
}
27+
28+
pub(crate) fn new_with_dict(level: i32, dictionary: &[u8]) -> io::Result<Self> {
29+
let mut encoder = Encoder::with_dictionary(level, dictionary)?;
30+
Ok(Self {
31+
encoder: Unshared::new(encoder),
32+
})
33+
}
2634
}
2735

2836
impl Encode for ZstdEncoder {

src/futures/bufread/macros/decoder.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
macro_rules! decoder {
2-
($(#[$attr:meta])* $name:ident) => {
2+
($(#[$attr:meta])* $name:ident $({ $($inherent_methods:tt)* })*) => {
33
pin_project_lite::pin_project! {
44
$(#[$attr])*
55
///
@@ -21,6 +21,8 @@ macro_rules! decoder {
2121
}
2222
}
2323

24+
$($($inherent_methods)*)*
25+
2426
/// Configure multi-member/frame decoding, if enabled this will reset the decoder state
2527
/// when reaching the end of a compressed member/frame and expect either EOF or another
2628
/// compressed member/frame to follow it in the stream.

src/futures/bufread/macros/encoder.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
macro_rules! encoder {
2-
($(#[$attr:meta])* $name:ident<$inner:ident> $({ $($constructor:tt)* })*) => {
2+
($(#[$attr:meta])* $name:ident<$inner:ident> $({ $($inherent_methods:tt)* })*) => {
33
pin_project_lite::pin_project! {
44
$(#[$attr])*
55
///
@@ -17,7 +17,7 @@ macro_rules! encoder {
1717
/// Creates a new encoder which will read uncompressed data from the given stream
1818
/// and emit a compressed stream.
1919
///
20-
$($constructor)*
20+
$($inherent_methods)*
2121
)*
2222

2323
/// Acquires a reference to the underlying reader that this encoder is wrapping.

src/futures/write/macros/decoder.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
macro_rules! decoder {
2-
($(#[$attr:meta])* $name:ident) => {
2+
($(#[$attr:meta])* $name:ident $({ $($inherent_methods:tt)* })*) => {
33
pin_project_lite::pin_project! {
44
$(#[$attr])*
55
///
@@ -21,6 +21,8 @@ macro_rules! decoder {
2121
}
2222
}
2323

24+
$($($inherent_methods)*)*
25+
2426
/// Acquires a reference to the underlying reader that this decoder is wrapping.
2527
pub fn get_ref(&self) -> &W {
2628
self.inner.get_ref()

src/futures/write/macros/encoder.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
macro_rules! encoder {
2-
($(#[$attr:meta])* $name:ident<$inner:ident> $({ $($constructor:tt)* })*) => {
2+
($(#[$attr:meta])* $name:ident<$inner:ident> $({ $($inherent_methods:tt)* })*) => {
33
pin_project_lite::pin_project! {
44
$(#[$attr])*
55
///
@@ -17,7 +17,7 @@ macro_rules! encoder {
1717
/// Creates a new encoder which will take in uncompressed data and write it
1818
/// compressed to the given stream.
1919
///
20-
$($constructor)*
20+
$($inherent_methods)*
2121
)*
2222

2323
/// Acquires a reference to the underlying writer that this encoder is wrapping.

src/macros.rs

+93-18
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,15 @@
11
macro_rules! algos {
2-
(@algo $algo:ident [$algo_s:expr] $decoder:ident $encoder:ident<$inner:ident> $({ $($constructor:tt)* })*) => {
2+
(@algo $algo:ident [$algo_s:expr] $decoder:ident $encoder:ident<$inner:ident>
3+
{ @enc $($encoder_methods:tt)* }
4+
{ @dec $($decoder_methods:tt)* }
5+
) => {
36
#[cfg(feature = $algo_s)]
47
decoder! {
58
#[doc = concat!("A ", $algo_s, " decoder, or decompressor")]
69
#[cfg(feature = $algo_s)]
710
$decoder
11+
12+
{ $($decoder_methods)* }
813
}
914

1015
#[cfg(feature = $algo_s)]
@@ -15,12 +20,15 @@ macro_rules! algos {
1520
pub fn new(inner: $inner) -> Self {
1621
Self::with_quality(inner, crate::Level::Default)
1722
}
18-
} $({ $($constructor)* })*
23+
}
24+
25+
{ $($encoder_methods)* }
1926
}
2027
};
2128

2229
($($mod:ident)::+<$inner:ident>) => {
23-
algos!(@algo brotli ["brotli"] BrotliDecoder BrotliEncoder<$inner> {
30+
algos!(@algo brotli ["brotli"] BrotliDecoder BrotliEncoder<$inner>
31+
{ @enc
2432
pub fn with_quality(inner: $inner, level: crate::Level) -> Self {
2533
let params = brotli::enc::backward_references::BrotliEncoderParams::default();
2634
Self {
@@ -30,9 +38,13 @@ macro_rules! algos {
3038
),
3139
}
3240
}
33-
});
41+
}
42+
{ @dec }
43+
);
44+
45+
algos!(@algo bzip2 ["bzip2"] BzDecoder BzEncoder<$inner>
46+
{ @enc
3447

35-
algos!(@algo bzip2 ["bzip2"] BzDecoder BzEncoder<$inner> {
3648
pub fn with_quality(inner: $inner, level: crate::Level) -> Self {
3749
Self {
3850
inner: crate::$($mod::)+generic::Encoder::new(
@@ -41,9 +53,12 @@ macro_rules! algos {
4153
),
4254
}
4355
}
44-
});
56+
}
57+
{ @dec }
58+
);
4559

46-
algos!(@algo deflate ["deflate"] DeflateDecoder DeflateEncoder<$inner> {
60+
algos!(@algo deflate ["deflate"] DeflateDecoder DeflateEncoder<$inner>
61+
{ @enc
4762
pub fn with_quality(inner: $inner, level: crate::Level) -> Self {
4863
Self {
4964
inner: crate::$($mod::)+generic::Encoder::new(
@@ -52,9 +67,13 @@ macro_rules! algos {
5267
),
5368
}
5469
}
55-
});
70+
}
71+
{ @dec }
72+
);
73+
74+
algos!(@algo gzip ["gzip"] GzipDecoder GzipEncoder<$inner>
75+
{ @enc
5676

57-
algos!(@algo gzip ["gzip"] GzipDecoder GzipEncoder<$inner> {
5877
pub fn with_quality(inner: $inner, level: crate::Level) -> Self {
5978
Self {
6079
inner: crate::$($mod::)+generic::Encoder::new(
@@ -63,9 +82,12 @@ macro_rules! algos {
6382
),
6483
}
6584
}
66-
});
85+
}
86+
{ @dec }
87+
);
6788

68-
algos!(@algo zlib ["zlib"] ZlibDecoder ZlibEncoder<$inner> {
89+
algos!(@algo zlib ["zlib"] ZlibDecoder ZlibEncoder<$inner>
90+
{ @enc
6991
pub fn with_quality(inner: $inner, level: crate::Level) -> Self {
7092
Self {
7193
inner: crate::$($mod::)+generic::Encoder::new(
@@ -74,9 +96,13 @@ macro_rules! algos {
7496
),
7597
}
7698
}
77-
});
99+
}
100+
{ @dec }
101+
);
102+
103+
algos!(@algo zstd ["zstd"] ZstdDecoder ZstdEncoder<$inner>
104+
{ @enc
78105

79-
algos!(@algo zstd ["zstd"] ZstdDecoder ZstdEncoder<$inner> {
80106
pub fn with_quality(inner: $inner, level: crate::Level) -> Self {
81107
Self {
82108
inner: crate::$($mod::)+generic::Encoder::new(
@@ -106,9 +132,52 @@ macro_rules! algos {
106132
),
107133
}
108134
}
109-
});
110135

111-
algos!(@algo xz ["xz"] XzDecoder XzEncoder<$inner> {
136+
/// Creates a new encoder, using the specified compression level and pre-trained
137+
/// dictionary, which will read uncompressed data from the given stream and emit a
138+
/// compressed stream.
139+
///
140+
/// Dictionaries provide better compression ratios for small files, but are required to
141+
/// be present during decompression.
142+
///
143+
/// # Errors
144+
///
145+
/// Returns error when `dictionary` is not valid.
146+
pub fn with_dict(inner: $inner, level: crate::Level, dictionary: &[u8]) -> ::std::io::Result<Self> {
147+
Ok(Self {
148+
inner: crate::$($mod::)+generic::Encoder::new(
149+
inner,
150+
crate::codec::ZstdEncoder::new_with_dict(level.into_zstd(), dictionary)?,
151+
),
152+
})
153+
}
154+
}
155+
{ @dec
156+
/// Creates a new decoder, using the specified compression level and pre-trained
157+
/// dictionary, which will read compressed data from the given stream and emit an
158+
/// uncompressed stream.
159+
///
160+
/// Dictionaries provide better compression ratios for small files, but are required to
161+
/// be present during decompression. The dictionary used must be the same as the one
162+
/// used for compression.
163+
///
164+
/// # Errors
165+
///
166+
/// Returns error when `dictionary` is not valid.
167+
pub fn with_dict(inner: $inner, dictionary: &[u8]) -> ::std::io::Result<Self> {
168+
Ok(Self {
169+
inner: crate::$($mod::)+generic::Decoder::new(
170+
inner,
171+
crate::codec::ZstdDecoder::new_with_dict(dictionary)?,
172+
),
173+
})
174+
}
175+
}
176+
);
177+
178+
algos!(@algo xz ["xz"] XzDecoder XzEncoder<$inner>
179+
{ @enc
180+
112181
pub fn with_quality(inner: $inner, level: crate::Level) -> Self {
113182
Self {
114183
inner: crate::$($mod::)+generic::Encoder::new(
@@ -117,9 +186,13 @@ macro_rules! algos {
117186
),
118187
}
119188
}
120-
});
189+
}
190+
{ @dec }
191+
);
192+
193+
algos!(@algo lzma ["lzma"] LzmaDecoder LzmaEncoder<$inner>
194+
{ @enc
121195

122-
algos!(@algo lzma ["lzma"] LzmaDecoder LzmaEncoder<$inner> {
123196
pub fn with_quality(inner: $inner, level: crate::Level) -> Self {
124197
Self {
125198
inner: crate::$($mod::)+generic::Encoder::new(
@@ -128,6 +201,8 @@ macro_rules! algos {
128201
),
129202
}
130203
}
131-
});
204+
}
205+
{ @dec }
206+
);
132207
}
133208
}

src/tokio/bufread/macros/decoder.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
macro_rules! decoder {
2-
($(#[$attr:meta])* $name:ident) => {
2+
($(#[$attr:meta])* $name:ident $({ $($inherent_methods:tt)* })*) => {
33
pin_project_lite::pin_project! {
44
$(#[$attr])*
55
///
@@ -21,6 +21,8 @@ macro_rules! decoder {
2121
}
2222
}
2323

24+
$($($inherent_methods)*)*
25+
2426
/// Configure multi-member/frame decoding, if enabled this will reset the decoder state
2527
/// when reaching the end of a compressed member/frame and expect either EOF or another
2628
/// compressed member/frame to follow it in the stream.

src/tokio/bufread/macros/encoder.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
macro_rules! encoder {
2-
($(#[$attr:meta])* $name:ident<$inner:ident> $({ $($constructor:tt)* })*) => {
2+
($(#[$attr:meta])* $name:ident<$inner:ident> $({ $($inherent_methods:tt)* })*) => {
33
pin_project_lite::pin_project! {
44
$(#[$attr])*
55
///
@@ -17,7 +17,7 @@ macro_rules! encoder {
1717
/// Creates a new encoder which will read uncompressed data from the given stream
1818
/// and emit a compressed stream.
1919
///
20-
$($constructor)*
20+
$($inherent_methods)*
2121
)*
2222

2323
/// Acquires a reference to the underlying reader that this encoder is wrapping.

src/tokio/write/macros/decoder.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
macro_rules! decoder {
2-
($(#[$attr:meta])* $name:ident) => {
2+
($(#[$attr:meta])* $name:ident $({ $($inherent_methods:tt)* })*) => {
33
pin_project_lite::pin_project! {
44
$(#[$attr])*
55
///
@@ -21,6 +21,8 @@ macro_rules! decoder {
2121
}
2222
}
2323

24+
$($($inherent_methods)*)*
25+
2426
/// Acquires a reference to the underlying reader that this decoder is wrapping.
2527
pub fn get_ref(&self) -> &W {
2628
self.inner.get_ref()

src/tokio/write/macros/encoder.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
macro_rules! encoder {
2-
($(#[$attr:meta])* $name:ident<$inner:ident> $({ $($constructor:tt)* })*) => {
2+
($(#[$attr:meta])* $name:ident<$inner:ident> $({ $($inherent_methods:tt)* })*) => {
33
pin_project_lite::pin_project! {
44
$(#[$attr])*
55
///
@@ -17,7 +17,7 @@ macro_rules! encoder {
1717
/// Creates a new encoder which will take in uncompressed data and write it
1818
/// compressed to the given stream.
1919
///
20-
$($constructor)*
20+
$($inherent_methods)*
2121
)*
2222

2323
/// Acquires a reference to the underlying writer that this encoder is wrapping.

tests/artifacts/dictionary-rust

51.8 KB
Binary file not shown.

tests/artifacts/dictionary-rust-other

110 KB
Binary file not shown.

0 commit comments

Comments
 (0)