Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
elif builder_config.name in ["axb", "axg"]:
# Cast the SuperGLUE diagnostic tasks as RTE.
benchmark_name = "rte"
else:
benchmark_name = builder_config.name
if builder_config.name == "multirc":
feature_names = ("question", "answer", "paragraph")
elif builder_config.name == "wic":
# This ignores the start/end indices which show where in each sentence the
# word appears.
# TODO(craffel): Investigate using those indices.
feature_names = ("sentence1", "sentence2", "word")
else:
feature_names = None
return functools.partial(
preprocessors.glue,
benchmark_name=benchmark_name,
label_names=builder_config.label_classes,
feature_names=feature_names)
from t5.evaluation import metrics
import tensorflow_datasets as tfds
# ==================================== C4 ======================================
_c4_config_suffixes = ["", ".noclean", ".realnewslike", ".webtextlike"]
for config_suffix in _c4_config_suffixes:
TaskRegistry.add(
"c4{name}_v020_unsupervised".format(
name=config_suffix.replace(".", "_")),
TfdsTask,
tfds_name="c4/en{config}:1.0.0".format(config=config_suffix),
text_preprocessor=functools.partial(
preprocessors.rekey, key_map={"inputs": None, "targets": "text"}),
token_preprocessor=preprocessors.unsupervised,
sentencepiece_model_path=DEFAULT_SPM_PATH,
metric_fns=[])
# ================================ Wikipedia ===================================
TaskRegistry.add(
"wikipedia_20190301.en_v003_unsupervised",
TfdsTask,
# 0.0.4 is identical to 0.0.3 except empty records removed.
tfds_name="wikipedia/20190301.en:0.0.4",
text_preprocessor=functools.partial(
preprocessors.rekey, key_map={"inputs": None, "targets": "text"}),
token_preprocessor=preprocessors.unsupervised,
sentencepiece_model_path=DEFAULT_SPM_PATH,
metric_fns=[])