Skip to content

Commit

Permalink
add typing to kokenizer
Browse files Browse the repository at this point in the history
  • Loading branch information
PhilipMay committed May 10, 2021
1 parent e853ad3 commit 70edf9b
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ class SPMTokenizer:
BPE-dropout.
"""

def __init__(self, vocab_file, split_by_punct=False, sp_model_kwargs=None):
def __init__(self, vocab_file, split_by_punct=False, sp_model_kwargs: Optional[Dict[str, Any]] = None):
self.split_by_punct = split_by_punct
self.vocab_file = vocab_file
self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/marian/tokenization_marian.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ def get_special_tokens_mask(
return self._special_token_mask(token_ids_0 + token_ids_1) + [1]


def load_spm(path: str, sp_model_kwargs) -> sentencepiece.SentencePieceProcessor:
def load_spm(path: str, sp_model_kwargs: Dict[str, Any]) -> sentencepiece.SentencePieceProcessor:
spm = sentencepiece.SentencePieceProcessor(**sp_model_kwargs)
spm.Load(path)
return spm
Expand Down

0 comments on commit 70edf9b

Please sign in to comment.