Skip to content

Commit

Permalink
Merge pull request #2236 from enricoboos/master
Browse files Browse the repository at this point in the history
added kazakh as new language for pos
  • Loading branch information
alanakbik authored Apr 21, 2021
2 parents 20ef6cb + 0e3ef9b commit 14172c1
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 0 deletions.
1 change: 1 addition & 0 deletions flair/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@
from .treebanks import UniversalDependenciesDataset
from .treebanks import UD_ARMENIAN
from .treebanks import UD_ENGLISH
from .treebanks import UD_KAZAKH
from .treebanks import UD_ESTONIAN
from .treebanks import UD_GERMAN
from .treebanks import UD_GERMAN_HDT
Expand Down
27 changes: 27 additions & 0 deletions flair/datasets/treebanks.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,33 @@ def __init__(self, base_path: Union[str, Path] = None, in_memory: bool = True, s

super(UD_ENGLISH, self).__init__(data_folder, in_memory=in_memory, split_multiwords=split_multiwords)


class UD_KAZAKH(UniversalDependenciesCorpus):
def __init__(self, base_path: Union[str, Path] = None, in_memory: bool = True, split_multiwords: bool = True):

if type(base_path) == str:
base_path: Path = Path(base_path)

# this dataset name
dataset_name = self.__class__.__name__.lower()

# default dataset folder is the cache root
if not base_path:
base_path = Path(flair.cache_root) / "datasets"
data_folder = base_path / dataset_name

# download data if necessary
web_path = "https://raw.githubusercontent.com/UniversalDependencies/UD_Kazakh-KTB/master"
cached_path(
f"{web_path}/kk_ktb-ud-test.conllu", Path("datasets") / dataset_name
)
cached_path(
f"{web_path}/kk_ktb-ud-train.conllu", Path("datasets") / dataset_name
)

super(UD_KAZAKH, self).__init__(data_folder, in_memory=in_memory, split_multiwords=split_multiwords)


class UD_OLD_CHURCH_SLAVONIC(UniversalDependenciesCorpus):
def __init__(self, base_path: Union[str, Path] = None, in_memory: bool = True, split_multiwords: bool = True):

Expand Down

0 comments on commit 14172c1

Please sign in to comment.