From 65d29098166af5f2d8c62bd3fb2c4e27d8ce5c96 Mon Sep 17 00:00:00 2001 From: Robert Knight Date: Sun, 28 Jul 2024 07:10:38 +0100 Subject: [PATCH 1/3] Update dependencies This is the result of running `cargo update`. The most notable change is that image-webp replaced its `thiserror` dependency with `quick-error` [1]. [1] https://github.com/image-rs/image-webp/issues/66 --- Cargo.lock | 104 ++++++++++++++++++++++------------------------------- 1 file changed, 42 insertions(+), 62 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 86cf8e8c..82b14d24 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -52,15 +52,9 @@ checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" [[package]] name = "bytemuck" -version = "1.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78834c15cb5d5efe3452d58b1e8ba890dd62d21907f867f383358198e56ebca5" - -[[package]] -name = "byteorder" -version = "1.5.0" +version = "1.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" +checksum = "b236fc92302c97ed75b38da1f4917b5cdda4984745740f153a5d3059e48d725e" [[package]] name = "byteorder-lite" @@ -110,9 +104,9 @@ checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" [[package]] name = "either" -version = "1.12.0" +version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" [[package]] name = "fancy-regex" @@ -182,12 +176,12 @@ checksum = "62adaabb884c94955b19907d60019f4e145d091c75345379e70d1ee696f7854f" [[package]] name = "image" -version = "0.25.1" +version = "0.25.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd54d660e773627692c524beaad361aca785a4f9f5730ce91f42aabe5bce3d11" +checksum = "99314c8a2152b8ddb211f924cdae532d8c5e4c8bb54728e12fff1b0cd5963a10" dependencies = [ "bytemuck", - "byteorder", + "byteorder-lite", "image-webp", "num-traits", "png", @@ -197,12 +191,12 @@ dependencies = [ [[package]] name = "image-webp" -version = "0.1.2" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d730b085583c4d789dfd07fdcf185be59501666a90c97c40162b37e4fdad272d" +checksum = "f79afb8cbee2ef20f59ccd477a218c12a93943d075b492015ecb1bb81f8ee904" dependencies = [ "byteorder-lite", - "thiserror", + "quick-error", ] [[package]] @@ -231,15 +225,15 @@ checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" [[package]] name = "log" -version = "0.4.21" +version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" [[package]] name = "memchr" -version = "2.7.2" +version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" [[package]] name = "memmap2" @@ -252,9 +246,9 @@ dependencies = [ [[package]] name = "miniz_oxide" -version = "0.7.3" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87dfd01fe195c66b572b37921ad8803d010623c0aca821bea2302239d155cdae" +checksum = "b8a240ddb74feaf34a79a7add65a741f3167852fba007066dcac1ca548d89c08" dependencies = [ "adler", "simd-adler32", @@ -300,13 +294,19 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.83" +version = "1.0.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b33eb56c327dec362a9e55b3ad14f9d2f0904fb5a5b03b513ab5465399e9f43" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" dependencies = [ "unicode-ident", ] +[[package]] +name = "quick-error" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3" + [[package]] name = "quote" version = "1.0.36" @@ -338,9 +338,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.10.4" +version = "1.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" +checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" dependencies = [ "aho-corasick", "memchr", @@ -350,9 +350,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.6" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" +checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" dependencies = [ "aho-corasick", "memchr", @@ -361,9 +361,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.3" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" +checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" [[package]] name = "rten" @@ -509,18 +509,18 @@ checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" [[package]] name = "serde" -version = "1.0.202" +version = "1.0.204" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "226b61a0d411b2ba5ff6d7f73a476ac4f8bb900373459cd00fab8512828ba395" +checksum = "bc76f558e0cbb2a839d37354c575f1dc3fdc6546b5be373ba43d95f231bf7c12" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.202" +version = "1.0.204" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6048858004bcff69094cd972ed40a32500f153bd3be9f716b2eed2e8217c4838" +checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222" dependencies = [ "proc-macro2", "quote", @@ -529,9 +529,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.117" +version = "1.0.120" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3" +checksum = "4e0d21c9a8cae1235ad58a00c11cb40d4b1e5c784f1ef2c537876ed6ffd8b7c5" dependencies = [ "itoa", "ryu", @@ -552,40 +552,20 @@ checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" [[package]] name = "syn" -version = "2.0.65" +version = "2.0.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2863d96a84c6439701d7a38f9de935ec562c8832cc55d1dde0f513b52fad106" +checksum = "dc4b9b9bf2add8093d3f2c0204471e951b2285580335de42f9d2534f3ae7a8af" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] -[[package]] -name = "thiserror" -version = "1.0.61" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709" -dependencies = [ - "thiserror-impl", -] - -[[package]] -name = "thiserror-impl" -version = "1.0.61" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "tinyvec" -version = "1.6.0" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +checksum = "445e881f4f6d382d5f27c034e25eb92edd7c784ceab92a0937db7f2e9471b938" dependencies = [ "tinyvec_macros", ] @@ -679,9 +659,9 @@ checksum = "3f423a2c17029964870cfaabb1f13dfab7d092a62a29a89264f4d36990ca414a" [[package]] name = "zune-jpeg" -version = "0.4.11" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec866b44a2a1fd6133d363f073ca1b179f438f99e7e5bfb1e33f7181facfe448" +checksum = "16099418600b4d8f028622f73ff6e3deaabdff330fb9a2a131dea781ee8b0768" dependencies = [ "zune-core", ] From 51f6e292346f23da10123b906e94b50155de9efe Mon Sep 17 00:00:00 2001 From: Robert Knight Date: Sun, 28 Jul 2024 07:18:21 +0100 Subject: [PATCH 2/3] Update rustc-hash to v2.0.0 The hashing algorithm was changed in this version. The change seems to be performance neutral. --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 82b14d24..ba3bfc17 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -482,9 +482,9 @@ dependencies = [ [[package]] name = "rustc-hash" -version = "1.1.0" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" +checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152" [[package]] name = "rustc_version" diff --git a/Cargo.toml b/Cargo.toml index b5b4fa8c..e5fd468c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,7 +50,7 @@ rten-vecmath = { path = "./rten-vecmath", version = "0.11.0" } rten-simd = { path = "./rten-simd", version = "0.11.0" } fastrand = { version = "2.0.2", optional = true } fastrand-contrib = { version = "0.1.0", optional = true } -rustc-hash = "1.1.0" +rustc-hash = "2.0.0" memmap2 = { version = "0.9.4", optional = true } num_cpus = "1.16.0" From 57d8bf6a10fa601e7f88f7bf6e0cb242148e821e Mon Sep 17 00:00:00 2001 From: Robert Knight Date: Sun, 28 Jul 2024 07:23:14 +0100 Subject: [PATCH 3/3] Update fancy-regex to v0.13.0 After this update clippy began to warn about the size of the `TokenizerError` enum due to changes in `fancy_regex::Error`. To resolve this, box the value. --- Cargo.lock | 19 ++++--------------- rten-text/Cargo.toml | 2 +- rten-text/src/tokenizers.rs | 2 +- rten-text/src/tokenizers/bpe.rs | 6 +++--- 4 files changed, 9 insertions(+), 20 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ba3bfc17..54e8f837 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -110,12 +110,13 @@ checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" [[package]] name = "fancy-regex" -version = "0.12.0" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7493d4c459da9f84325ad297371a6b2b8a162800873a22e3b6b6512e61d18c05" +checksum = "531e46835a22af56d1e3b66f04844bed63158bc094a628bec1d321d9b4c44bf2" dependencies = [ "bit-set", - "regex", + "regex-automata", + "regex-syntax", ] [[package]] @@ -336,18 +337,6 @@ dependencies = [ "crossbeam-utils", ] -[[package]] -name = "regex" -version = "1.10.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" -dependencies = [ - "aho-corasick", - "memchr", - "regex-automata", - "regex-syntax", -] - [[package]] name = "regex-automata" version = "0.4.7" diff --git a/rten-text/Cargo.toml b/rten-text/Cargo.toml index 742237da..334f036f 100644 --- a/rten-text/Cargo.toml +++ b/rten-text/Cargo.toml @@ -13,7 +13,7 @@ include = ["/src", "/README.md"] crate-type = ["lib"] [dependencies] -fancy-regex = { version = "0.12.0", default-features = false, features = ["std", "unicode"] } +fancy-regex = { version = "0.13.0", default-features = false, features = ["std", "unicode"] } unicode_categories = "0.1.1" unicode-normalization = "0.1.22" serde = { workspace = true, features = ["derive"] } diff --git a/rten-text/src/tokenizers.rs b/rten-text/src/tokenizers.rs index 5832789e..e2dbadf2 100644 --- a/rten-text/src/tokenizers.rs +++ b/rten-text/src/tokenizers.rs @@ -595,7 +595,7 @@ pub enum TokenizerError { InvalidTokenId(TokenId), /// Splitting the input with a regex failed. - RegexSplitFailed(fancy_regex::Error), + RegexSplitFailed(Box), /// There was an error parsing a byte sequence as a UTF-8 string. /// diff --git a/rten-text/src/tokenizers/bpe.rs b/rten-text/src/tokenizers/bpe.rs index b33c0cc9..8fe41974 100644 --- a/rten-text/src/tokenizers/bpe.rs +++ b/rten-text/src/tokenizers/bpe.rs @@ -18,7 +18,7 @@ pub enum BpeError { InvalidMergeEntry(String), /// The regex for splitting tokens is invalid. - InvalidPattern(fancy_regex::Error), + InvalidPattern(Box), /// An entry in the vocab (token string to ID map) is not either a known /// special token or an entry in the merge list. @@ -289,7 +289,7 @@ impl Bpe { vocab: Option>, added_tokens: HashMap, ) -> Result { - let splitter = Regex::new(pattern).map_err(BpeError::InvalidPattern)?; + let splitter = Regex::new(pattern).map_err(|err| BpeError::InvalidPattern(err.into()))?; let mut builder = BpeBuilder::new(); builder.add_merges(merges)?; @@ -428,7 +428,7 @@ impl Encoder for Bpe { on_token: &mut dyn FnMut(usize, TokenId), ) -> Result<(), TokenizerError> { for piece in self.splitter.find_iter(text) { - let piece = piece.map_err(TokenizerError::RegexSplitFailed)?; + let piece = piece.map_err(|err| TokenizerError::RegexSplitFailed(err.into()))?; if piece.range().is_empty() { continue; }