Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# default dataset folder is the cache root
if not base_path:
base_path = Path(flair.cache_root) / "datasets"
data_folder = base_path / dataset_name
# download data if necessary
ud_path = "https://raw.githubusercontent.com/UniversalDependencies/UD_Romanian-RRT/master"
cached_path(f"{ud_path}/ro_rrt-ud-dev.conllu", Path("datasets") / dataset_name)
cached_path(f"{ud_path}/ro_rrt-ud-test.conllu", Path("datasets") / dataset_name)
cached_path(
f"{ud_path}/ro_rrt-ud-train.conllu", Path("datasets") / dataset_name
)
super(UD_ROMANIAN, self).__init__(data_folder, in_memory=in_memory)
class UD_CATALAN(UniversalDependenciesCorpus):
def __init__(self, base_path: Union[str, Path] = None, in_memory: bool = True):
if type(base_path) == str:
base_path: Path = Path(base_path)
# this dataset name
dataset_name = self.__class__.__name__.lower()
# default dataset folder is the cache root
if not base_path:
base_path = Path(flair.cache_root) / "datasets"
data_folder = base_path / dataset_name
# download data if necessary
ud_path = "https://raw.githubusercontent.com/UniversalDependencies/UD_Catalan-AnCora/master"
cached_path(
# download data if necessary
ud_path = "https://raw.githubusercontent.com/UniversalDependencies/UD_Norwegian-Bokmaal/master"
cached_path(
f"{ud_path}/no_bokmaal-ud-dev.conllu", Path("datasets") / dataset_name
)
cached_path(
f"{ud_path}/no_bokmaal-ud-test.conllu", Path("datasets") / dataset_name
)
cached_path(
f"{ud_path}/no_bokmaal-ud-train.conllu", Path("datasets") / dataset_name
)
super(UD_NORWEGIAN, self).__init__(data_folder, in_memory=in_memory)
class UD_FINNISH(UniversalDependenciesCorpus):
def __init__(self, base_path: Union[str, Path] = None, in_memory: bool = True):
if type(base_path) == str:
base_path: Path = Path(base_path)
# this dataset name
dataset_name = self.__class__.__name__.lower()
# default dataset folder is the cache root
if not base_path:
base_path = Path(flair.cache_root) / "datasets"
data_folder = base_path / dataset_name
# download data if necessary
ud_path = "https://raw.githubusercontent.com/UniversalDependencies/UD_Finnish-TDT/master"
cached_path(f"{ud_path}/fi_tdt-ud-dev.conllu", Path("datasets") / dataset_name)
log.info("Reading data from {}".format(data_folder))
log.info("Train: {}".format(train_file))
log.info("Test: {}".format(test_file))
log.info("Dev: {}".format(dev_file))
# get train data
train = UniversalDependenciesDataset(train_file, in_memory=in_memory)
# get test data
test = UniversalDependenciesDataset(test_file, in_memory=in_memory)
# get dev data
dev = UniversalDependenciesDataset(dev_file, in_memory=in_memory)
super(UniversalDependenciesCorpus, self).__init__(
train, dev, test, name=data_folder.name
)
if not base_path:
base_path = Path(flair.cache_root) / "datasets"
data_folder = base_path / dataset_name
# download data if necessary
ud_path = "https://raw.githubusercontent.com/UniversalDependencies/UD_Japanese-GSD/master"
cached_path(f"{ud_path}/ja_gsd-ud-dev.conllu", Path("datasets") / dataset_name)
cached_path(f"{ud_path}/ja_gsd-ud-test.conllu", Path("datasets") / dataset_name)
cached_path(
f"{ud_path}/ja_gsd-ud-train.conllu", Path("datasets") / dataset_name
)
super(UD_JAPANESE, self).__init__(data_folder, in_memory=in_memory)
class UD_CHINESE(UniversalDependenciesCorpus):
def __init__(self, base_path: Union[str, Path] = None, in_memory: bool = True):
if type(base_path) == str:
base_path: Path = Path(base_path)
# this dataset name
dataset_name = self.__class__.__name__.lower()
# default dataset folder is the cache root
if not base_path:
base_path = Path(flair.cache_root) / "datasets"
data_folder = base_path / dataset_name
# download data if necessary
ud_path = "https://raw.githubusercontent.com/UniversalDependencies/UD_Chinese-GSD/master"
cached_path(f"{ud_path}/zh_gsd-ud-dev.conllu", Path("datasets") / dataset_name)
# download data if necessary
ud_path = "https://raw.githubusercontent.com/UniversalDependencies/UD_Dutch-Alpino/master"
cached_path(
f"{ud_path}/nl_alpino-ud-dev.conllu", Path("datasets") / dataset_name
)
cached_path(
f"{ud_path}/nl_alpino-ud-test.conllu", Path("datasets") / dataset_name
)
cached_path(
f"{ud_path}/nl_alpino-ud-train.conllu", Path("datasets") / dataset_name
)
super(UD_DUTCH, self).__init__(data_folder, in_memory=in_memory)
class UD_FRENCH(UniversalDependenciesCorpus):
def __init__(self, base_path: Union[str, Path] = None, in_memory: bool = True):
if type(base_path) == str:
base_path: Path = Path(base_path)
# this dataset name
dataset_name = self.__class__.__name__.lower()
# default dataset folder is the cache root
if not base_path:
base_path = Path(flair.cache_root) / "datasets"
data_folder = base_path / dataset_name
# download data if necessary
ud_path = "https://raw.githubusercontent.com/UniversalDependencies/UD_French-GSD/master"
cached_path(f"{ud_path}/fr_gsd-ud-dev.conllu", Path("datasets") / dataset_name)
# default dataset folder is the cache root
if not base_path:
base_path = Path(flair.cache_root) / "datasets"
data_folder = base_path / dataset_name
# download data if necessary
ud_path = "https://raw.githubusercontent.com/UniversalDependencies/UD_Hebrew-HTB/master"
cached_path(f"{ud_path}/he_htb-ud-dev.conllu", Path("datasets") / dataset_name)
cached_path(f"{ud_path}/he_htb-ud-test.conllu", Path("datasets") / dataset_name)
cached_path(
f"{ud_path}/he_htb-ud-train.conllu", Path("datasets") / dataset_name
)
super(UD_HEBREW, self).__init__(data_folder, in_memory=in_memory)
class UD_TURKISH(UniversalDependenciesCorpus):
def __init__(self, base_path: Union[str, Path] = None, in_memory: bool = True):
if type(base_path) == str:
base_path: Path = Path(base_path)
# this dataset name
dataset_name = self.__class__.__name__.lower()
# default dataset folder is the cache root
if not base_path:
base_path = Path(flair.cache_root) / "datasets"
data_folder = base_path / dataset_name
# download data if necessary
ud_path = "https://raw.githubusercontent.com/UniversalDependencies/UD_Turkish-IMST/master"
cached_path(f"{ud_path}/tr_imst-ud-dev.conllu", Path("datasets") / dataset_name)
if not base_path:
base_path = Path(flair.cache_root) / "datasets"
data_folder = base_path / dataset_name
# download data if necessary
ud_path = "https://raw.githubusercontent.com/UniversalDependencies/UD_Bulgarian-BTB/master"
cached_path(f"{ud_path}/bg_btb-ud-dev.conllu", Path("datasets") / dataset_name)
cached_path(f"{ud_path}/bg_btb-ud-test.conllu", Path("datasets") / dataset_name)
cached_path(
f"{ud_path}/bg_btb-ud-train.conllu", Path("datasets") / dataset_name
)
super(UD_BULGARIAN, self).__init__(data_folder, in_memory=in_memory)
class UD_ARABIC(UniversalDependenciesCorpus):
def __init__(self, base_path: Union[str, Path] = None, in_memory: bool = True):
if type(base_path) == str:
base_path: Path = Path(base_path)
# this dataset name
dataset_name = self.__class__.__name__.lower()
# default dataset folder is the cache root
if not base_path:
base_path = Path(flair.cache_root) / "datasets"
data_folder = base_path / dataset_name
# download data if necessary
ud_path = "https://raw.githubusercontent.com/UniversalDependencies/UD_Arabic-PADT/master"
cached_path(f"{ud_path}/ar_padt-ud-dev.conllu", Path("datasets") / dataset_name)
if not base_path:
base_path = Path(flair.cache_root) / "datasets"
data_folder = base_path / dataset_name
# download data if necessary
ud_path = "https://raw.githubusercontent.com/UniversalDependencies/UD_Polish-LFG/master"
cached_path(f"{ud_path}/pl_lfg-ud-dev.conllu", Path("datasets") / dataset_name)
cached_path(f"{ud_path}/pl_lfg-ud-test.conllu", Path("datasets") / dataset_name)
cached_path(
f"{ud_path}/pl_lfg-ud-train.conllu", Path("datasets") / dataset_name
)
super(UD_POLISH, self).__init__(data_folder, in_memory=in_memory)
class UD_CZECH(UniversalDependenciesCorpus):
def __init__(self, base_path: Union[str, Path] = None, in_memory: bool = False):
if type(base_path) == str:
base_path: Path = Path(base_path)
# this dataset name
dataset_name = self.__class__.__name__.lower()
# default dataset folder is the cache root
if not base_path:
base_path = Path(flair.cache_root) / "datasets"
data_folder = base_path / dataset_name
# download data if necessary
ud_path = "https://raw.githubusercontent.com/UniversalDependencies/UD_Czech-PDT/master"
cached_path(f"{ud_path}/cs_pdt-ud-dev.conllu", Path("datasets") / dataset_name)
# download data if necessary
ud_path = "https://raw.githubusercontent.com/UniversalDependencies/UD_Korean-Kaist/master"
cached_path(
f"{ud_path}/ko_kaist-ud-dev.conllu", Path("datasets") / dataset_name
)
cached_path(
f"{ud_path}/ko_kaist-ud-test.conllu", Path("datasets") / dataset_name
)
cached_path(
f"{ud_path}/ko_kaist-ud-train.conllu", Path("datasets") / dataset_name
)
super(UD_KOREAN, self).__init__(data_folder, in_memory=in_memory)
class UD_BASQUE(UniversalDependenciesCorpus):
def __init__(self, base_path: Union[str, Path] = None, in_memory: bool = True):
if type(base_path) == str:
base_path: Path = Path(base_path)
# this dataset name
dataset_name = self.__class__.__name__.lower()
# default dataset folder is the cache root
if not base_path:
base_path = Path(flair.cache_root) / "datasets"
data_folder = base_path / dataset_name
# download data if necessary
ud_path = "https://raw.githubusercontent.com/UniversalDependencies/UD_Basque-BDT/master"
cached_path(f"{ud_path}/eu_bdt-ud-dev.conllu", Path("datasets") / dataset_name)
if not base_path:
base_path = Path(flair.cache_root) / "datasets"
data_folder = base_path / dataset_name
# download data if necessary
ud_path = "https://raw.githubusercontent.com/UniversalDependencies/UD_German-GSD/master"
cached_path(f"{ud_path}/de_gsd-ud-dev.conllu", Path("datasets") / dataset_name)
cached_path(f"{ud_path}/de_gsd-ud-test.conllu", Path("datasets") / dataset_name)
cached_path(
f"{ud_path}/de_gsd-ud-train.conllu", Path("datasets") / dataset_name
)
super(UD_GERMAN, self).__init__(data_folder, in_memory=in_memory)
class UD_GERMAN_HDT(UniversalDependenciesCorpus):
def __init__(self, base_path: Union[str, Path] = None, in_memory: bool = False):
if type(base_path) == str:
base_path: Path = Path(base_path)
# this dataset name
dataset_name = self.__class__.__name__.lower()
# default dataset folder is the cache root
if not base_path:
base_path = Path(flair.cache_root) / "datasets"
data_folder = base_path / dataset_name
# download data if necessary
ud_path = (
"https://raw.githubusercontent.com/UniversalDependencies/UD_German-HDT/dev"