Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(symspellpy.py): handle encoding errors #149

Merged
merged 3 commits into from
Feb 15, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions symspellpy/symspellpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def word_count(self) -> int:
return len(self._words)

def create_dictionary(
self, corpus: Union[Path, str, IO[str]], encoding: Optional[str] = None
self, corpus: Union[Path, str, IO[str]], encoding: Optional[str] = None, errors: Optional[str] = None
) -> bool:
"""Loads multiple dictionary words from a file containing plain text.

Expand All @@ -174,7 +174,8 @@ def create_dictionary(
Args:
corpus: The path+filename of the file or afile object of the
dictionary.
encoding: Text encoding of the corpus file.
encoding: Text encoding of the corpus file. Default None.
errors: Determines how decoding errors are handled. Default None.

Returns:
``True`` if file loaded, or ``False`` if file not found.
Expand All @@ -184,7 +185,7 @@ def create_dictionary(
if not corpus.exists():
logger.error(f"Corpus not found at {corpus}.")
return False
for key in self._parse_words(corpus.read_text(encoding=encoding)):
for key in self._parse_words(corpus.read_text(encoding=encoding, errors=errors)):
self.create_dictionary_entry(key, 1)
else:
for line in corpus:
Expand Down
Loading