-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlib.rs
46 lines (37 loc) · 1.39 KB
/
lib.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#[cfg(test)]
mod tests {
use std::{
fs::{read_to_string, File},
io::{BufReader, Cursor},
};
use hf_hub::{api::sync::Api, Repo, RepoType};
use tokenizers::Tokenizer;
use super::*;
#[test]
fn test_whisper() -> Result<(), Box<dyn std::error::Error>> {
let model_id = "openai/whisper-tiny";
let revision = "main";
let api = Api::new()?;
let repo = api.repo(Repo::with_revision(
model_id.to_owned(),
RepoType::Model,
revision.to_owned(),
));
let tokenizer_path = repo.get("tokenizer.json")?;
eprintln!("Loading tokenizer from {}", tokenizer_path.display());
// BUG: Tokenizer from_reader fails but not from_str
let content = read_to_string(&tokenizer_path)?;
eprintln!("Loading with from_str");
let tokenizer: Tokenizer = serde_json::from_str(&content)?; // works
eprintln!("Loading with from_reader");
let tokenizer: Tokenizer =
serde_json::from_reader(Cursor::new(&content)).unwrap_or_else(|e| {
eprintln!("Error: {}", e);
serde_json::from_str(&content).unwrap()
});
eprintln!("Loading with from_reader directly");
let tokenizer: Tokenizer =
serde_json::from_reader(BufReader::new(File::open(tokenizer_path)?))?; // fails
Ok(())
}
}