Skip to content

Commit

Permalink
Merge pull request #294 from robertknight/update-deps
Browse files Browse the repository at this point in the history
Update dependencies
  • Loading branch information
robertknight authored Jul 28, 2024
2 parents 6baa4f6 + 57d8bf6 commit 789a266
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 83 deletions.
123 changes: 46 additions & 77 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ rten-vecmath = { path = "./rten-vecmath", version = "0.11.0" }
rten-simd = { path = "./rten-simd", version = "0.11.0" }
fastrand = { version = "2.0.2", optional = true }
fastrand-contrib = { version = "0.1.0", optional = true }
rustc-hash = "1.1.0"
rustc-hash = "2.0.0"
memmap2 = { version = "0.9.4", optional = true }
num_cpus = "1.16.0"

Expand Down
2 changes: 1 addition & 1 deletion rten-text/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ include = ["/src", "/README.md"]
crate-type = ["lib"]

[dependencies]
fancy-regex = { version = "0.12.0", default-features = false, features = ["std", "unicode"] }
fancy-regex = { version = "0.13.0", default-features = false, features = ["std", "unicode"] }
unicode_categories = "0.1.1"
unicode-normalization = "0.1.22"
serde = { workspace = true, features = ["derive"] }
Expand Down
2 changes: 1 addition & 1 deletion rten-text/src/tokenizers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -595,7 +595,7 @@ pub enum TokenizerError {
InvalidTokenId(TokenId),

/// Splitting the input with a regex failed.
RegexSplitFailed(fancy_regex::Error),
RegexSplitFailed(Box<fancy_regex::Error>),

/// There was an error parsing a byte sequence as a UTF-8 string.
///
Expand Down
6 changes: 3 additions & 3 deletions rten-text/src/tokenizers/bpe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ pub enum BpeError {
InvalidMergeEntry(String),

/// The regex for splitting tokens is invalid.
InvalidPattern(fancy_regex::Error),
InvalidPattern(Box<fancy_regex::Error>),

/// An entry in the vocab (token string to ID map) is not either a known
/// special token or an entry in the merge list.
Expand Down Expand Up @@ -289,7 +289,7 @@ impl Bpe {
vocab: Option<HashMap<EncodedBytes, TokenId>>,
added_tokens: HashMap<TokenId, String>,
) -> Result<Bpe, BpeError> {
let splitter = Regex::new(pattern).map_err(BpeError::InvalidPattern)?;
let splitter = Regex::new(pattern).map_err(|err| BpeError::InvalidPattern(err.into()))?;

let mut builder = BpeBuilder::new();
builder.add_merges(merges)?;
Expand Down Expand Up @@ -428,7 +428,7 @@ impl Encoder for Bpe {
on_token: &mut dyn FnMut(usize, TokenId),
) -> Result<(), TokenizerError> {
for piece in self.splitter.find_iter(text) {
let piece = piece.map_err(TokenizerError::RegexSplitFailed)?;
let piece = piece.map_err(|err| TokenizerError::RegexSplitFailed(err.into()))?;
if piece.range().is_empty() {
continue;
}
Expand Down

0 comments on commit 789a266

Please sign in to comment.