Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def run_experiment(args):
logger.info(
"\n***********************************************"
f"\n************* Experiment: {args.task.name} ************"
"\n************************************************"
)
ml_logger = MlLogger(tracking_uri=args.logging.mlflow_url)
ml_logger.init_experiment(
experiment_name=args.logging.mlflow_experiment,
run_name=args.logging.mlflow_run_name,
nested=args.logging.mlflow_nested,
)
validate_args(args)
distributed = bool(args.general.local_rank != -1)
# Init device and distributed settings
device, n_gpu = initialize_device_settings(
use_cuda=args.general.cuda,
local_rank=args.general.local_rank,
use_amp=args.general.use_amp,
)
from farm.data_handler.processor import TextClassificationProcessor
from farm.modeling.optimization import initialize_optimizer
from farm.infer import Inferencer
from farm.modeling.adaptive_model import AdaptiveModel
from farm.modeling.language_model import LanguageModel
from farm.modeling.prediction_head import TextClassificationHead
from farm.modeling.tokenization import Tokenizer
from farm.train import Trainer
from farm.utils import set_all_seeds, MLFlowLogger, initialize_device_settings
logging.basicConfig(
format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
datefmt="%m/%d/%Y %H:%M:%S",
level=logging.INFO)
ml_logger = MLFlowLogger(tracking_uri="https://public-mlflow.deepset.ai/")
ml_logger.init_experiment(experiment_name="Public_FARM", run_name="Run_cola")
##########################
########## Settings
##########################
set_all_seeds(seed=42)
device, n_gpu = initialize_device_settings(use_cuda=True)
n_epochs = 5
batch_size = 100
evaluate_every = 20
lang_model = "bert-base-cased"
do_lower_case = False
# 1.Create a tokenizer
tokenizer = Tokenizer.load(pretrained_model_name_or_path=lang_model, do_lower_case=do_lower_case)
def _get_optim(model, opts):
""" Get the optimizer based on dictionary with options. Options are passed to the optimizer constructor.
:param model: model to optimize
:param opts: config dictionary that will be passed to optimizer together with the params
(e.g. lr, weight_decay, correct_bias ...). no_decay' can be given - parameters containing any of those strings
will have weight_decay set to 0.
:return: created optimizer
"""
optimizer_name = opts.pop('name', None)
# Logging
logger.info(f"Loading optimizer `{optimizer_name}`: '{opts}'")
MlLogger.log_params(opts)
MlLogger.log_params({"optimizer_name": optimizer_name})
weight_decay = opts.pop('weight_decay', None)
no_decay = opts.pop('no_decay', None)
if no_decay:
optimizable_parameters = [
{'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay) and p.requires_grad],
**opts},
{'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay) and p.requires_grad],
'weight_decay': 0.0,
**opts}
]
else:
optimizable_parameters = [{'params': [p for p in model.parameters() if p.requires_grad], **opts}]
# default weight decay is not the same for all optimizers, so we can't use default value
from farm.modeling.language_model import LanguageModel
from farm.modeling.prediction_head import BertLMHead, NextSentenceHead
from farm.modeling.tokenization import Tokenizer
from farm.train import Trainer
from farm.modeling.optimization import initialize_optimizer
from farm.utils import set_all_seeds, MLFlowLogger, initialize_device_settings
logging.basicConfig(
format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
datefmt="%m/%d/%Y %H:%M:%S",
level=logging.INFO,
)
set_all_seeds(seed=42)
ml_logger = MLFlowLogger(tracking_uri="https://public-mlflow.deepset.ai/")
ml_logger.init_experiment(
experiment_name="Public_FARM", run_name="Run_minimal_example_lm"
)
##########################
########## Settings
##########################
device, n_gpu = initialize_device_settings(use_cuda=True)
n_epochs = 1
batch_size = 32
evaluate_every = 30
lang_model = "bert-base-cased"
# 1.Create a tokenizer
tokenizer = Tokenizer.load(
pretrained_model_name_or_path=lang_model, do_lower_case=False
)
# get supported args of constructor
allowed_args = inspect.signature(sched_constructor).parameters.keys()
# convert from warmup proporation to steps if required
if 'num_warmup_steps' in allowed_args and 'num_warmup_steps' not in opts and 'warmup_proportion' in opts:
opts['num_warmup_steps'] = int(opts["warmup_proportion"] * opts["num_training_steps"])
MlLogger.log_params({"warmup_proportion": opts["warmup_proportion"]})
# only pass args that are supported by the constructor
opts = {k: v for k, v in opts.items() if k in allowed_args}
# Logging
logger.info(f"Loading schedule `{schedule_name}`: '{opts}'")
MlLogger.log_params(opts)
MlLogger.log_params({"schedule_name": schedule_name})
return sched_constructor(optimizer, **opts)
from farm.metrics import simple_accuracy, register_metrics
##########################
########## Logging
##########################
logger = logging.getLogger(__name__)
logging.basicConfig(
format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
datefmt="%m/%d/%Y %H:%M:%S",
level=logging.INFO)
# reduce verbosity from transformers library
logging.getLogger('transformers').setLevel(logging.WARNING)
# ml_logger = MLFlowLogger(tracking_uri="https://public-mlflow.deepset.ai/")
# for local logging instead:
ml_logger = MLFlowLogger(tracking_uri="logs")
# ml_logger.init_experiment(experiment_name="Public_FARM", run_name="DocClassification_ES_f1_1")
##########################
########## Settings
##########################
xval_folds = 5
xval_stratified = True
set_all_seeds(seed=42)
device, n_gpu = initialize_device_settings(use_cuda=True)
n_epochs = 20
batch_size = 32
evaluate_every = 100
lang_model = "bert-base-german-cased"
# 1.Create a tokenizer
from farm.modeling.optimization import initialize_optimizer
from farm.infer import Inferencer
from farm.modeling.adaptive_model import AdaptiveModel
from farm.modeling.prediction_head import QuestionAnsweringHead
from farm.modeling.language_model import LanguageModel
from farm.modeling.tokenization import Tokenizer
from farm.train import Trainer
from farm.utils import set_all_seeds, MLFlowLogger, initialize_device_settings
logging.basicConfig(
format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
datefmt="%m/%d/%Y %H:%M:%S",
level=logging.INFO,
)
ml_logger = MLFlowLogger(tracking_uri="https://public-mlflow.deepset.ai/")
ml_logger.init_experiment(experiment_name="SQuAD", run_name="qa_albert")
#########################
######## Settings
########################
set_all_seeds(seed=42)
device, n_gpu = initialize_device_settings(use_cuda=True)
batch_size = 60
n_epochs = 2
evaluate_every = 500
base_LM_model = "albert-base-v1"
train_filename="subsets/train_medium-v2.0.json"
dev_filename="subsets/dev_medium-v2.0.json"
save_dir = "../saved_models/qa_medium_albert"
inference_file = "../data/squad20/subsets/dev_medium-v2.0.json"
predictions_file = save_dir + "/predictions.json"
from farm.modeling.adaptive_model import AdaptiveModel
from farm.modeling.language_model import LanguageModel
from farm.modeling.prediction_head import TextClassificationHead
from farm.modeling.tokenization import Tokenizer
from farm.train import Trainer, EarlyStopping
from farm.utils import set_all_seeds, MLFlowLogger, initialize_device_settings
from farm.eval import Evaluator
from sklearn.metrics import matthews_corrcoef, recall_score, precision_score, f1_score, mean_squared_error, r2_score
from farm.metrics import simple_accuracy, register_metrics
logging.basicConfig(
format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
datefmt="%m/%d/%Y %H:%M:%S",
level=logging.INFO)
ml_logger = MLFlowLogger(tracking_uri="https://public-mlflow.deepset.ai/")
# for local logging instead:
# ml_logger = MLFlowLogger(tracking_uri="logs")
ml_logger.init_experiment(experiment_name="Public_FARM", run_name="DocClassification_ES_f1_1")
##########################
########## Settings
##########################
set_all_seeds(seed=42)
use_amp = None
device, n_gpu = initialize_device_settings(use_cuda=True, use_amp=use_amp)
n_epochs = 20
batch_size = 32
evaluate_every = 100
lang_model = "bert-base-german-cased"
# 1.Create a tokenizer