Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
mlflow.set_experiment() # pylint: disable=no-value-for-parameter
with pytest.raises(Exception):
mlflow.set_experiment(None)
with pytest.raises(Exception):
mlflow.set_experiment("")
name = "random_exp"
exp_id = mlflow.create_experiment(name)
mlflow.set_experiment(name)
with start_run() as run:
assert run.info.experiment_id == exp_id
another_name = "another_experiment"
mlflow.set_experiment(another_name)
exp_id2 = mlflow.tracking.MlflowClient().get_experiment_by_name(another_name)
with start_run() as another_run:
assert another_run.info.experiment_id == exp_id2.experiment_id
def test_get_experiment_id_with_active_experiment_returns_active_experiment_id():
# Create a new experiment and set that as active experiment
with TempDir(chdr=True):
name = "Random experiment %d" % random.randint(1, 1e6)
exp_id = mlflow.create_experiment(name)
assert exp_id is not None
mlflow.set_experiment(name)
assert _get_experiment_id() == exp_id
N_ESTIMATORS = 2
MAX_DEPTH = 2
model = RandomForestRegressor(n_estimators=N_ESTIMATORS, max_depth=MAX_DEPTH)
model = model.fit(x_train, y_train.values.ravel())
# save model
joblib.dump(model, 'models/model.joblib')
joblib.dump(column_order, 'models/column_order.joblib')
if settings.SHOULD_USE_MLFLOW == 'true':
# log training run to mlflow
mlflow.set_tracking_uri(uri=f'http://{settings.MLFLOW_IP}:5000')
if settings.CI == 'true':
mlflow.set_experiment('CI')
else:
mlflow.set_experiment('dev')
with mlflow.start_run() as run:
# calculate evaluation metrics
y_test_pred = model.predict(x_test)
rmse = sqrt(metrics.mean_squared_error(y_true=y_test, y_pred=y_test_pred))
r2_score = metrics.r2_score(y_true=y_test, y_pred=y_test_pred)
# log hyperparameters to mlflow
mlflow.log_param('n_estimators', N_ESTIMATORS)
mlflow.log_param('max_depth', MAX_DEPTH)
# log metrics to mlflow
mlflow.log_metric("rmse_validation_data", rmse)
mlflow.log_metric("r2_score_validation_data", r2_score)
else:
print('Not logging training run because MLFlow tracking server is not up, or its URL is not set in train.py')
def train_ner(model_name, output_path, train_data, dev_data, test_data, dropout, n_iter, patience):
mlflow.set_tracking_uri("./mlruns")
mlflow.set_experiment("Spacy NER")
mlflow.start_run(run_name="Using all")
if model_name in ["None", "False", "", "blank"]:
model_name = None
trainer = SpacyNerTrainer(model_name, output_path)
logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
logging.info("Reading train data")
diterator = DataIterator()
train_sentences = list(tqdm(itertools.islice(diterator.tagged_sentences(train_data), None)))
logging.info("Got {} sentences with at least one entity".format(len(train_sentences)))
logging.info("Reading test data")
test_sentences = list(tqdm(diterator.tagged_sentences(test_data)))
logging.info("Got {} sentences with at least one entity".format(len(test_sentences)))
model : Sklearn, LGBM, XGB, or CB Model object
Model
model_name : str
Name of the model
model_kwargs : dict
Model parameters
metrics : dict
Metrics for the model
"""
mlflow.set_tracking_uri(EXP_DIR)
mlflow.set_experiment(exp_name)
with mlflow.start_run(run_name=model_name) as run:
mlflow.log_params(model_kwargs)
mlflow.set_tag("name", model_name)
if isinstance(model, xgb.XGBModel):
mlflow.xgboost.log_model(model, model_name)
else:
mlflow.sklearn.log_model(model, model_name)
mlflow.log_metrics(metrics)
mlflow.log_artifacts(os.path.join(IMAGE_DIR, model_name))
run_id = run.info.run_uuid
import mlflow
from mlflow import pyfunc
import pandas as pd
import shutil
import tempfile
import tensorflow as tf
from tensorflow.python.saved_model import tag_constants
import mlflow.tensorflow
# Note: We can't set this here due to https://github.com/mlflow/mlflow/issues/608
#tracking_uri='file:///mnt/pipelineai/users/experiments'
#mlflow.set_tracking_uri(tracking_uri)
experiment_name = 'boston'
mlflow.set_experiment(experiment_name)
# Enable auto-logging to MLflow to capture TensorBoard metrics.
mlflow.tensorflow.autolog()
# Forcing an end_run() to prevent
# https://github.com/mlflow/mlflow/issues/1335
# https://github.com/mlflow/mlflow/issues/608
mlflow.end_run()
def main(argv):
# Builds, trains and evaluates a tf.estimator. Then, exports it for inference, logs the exported model
# with MLflow, and loads the fitted model back as a PyFunc to make predictions.
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.boston_housing.load_data()
# There are 13 features we are using for inference.
feat_cols = [tf.feature_column.numeric_column(key="features", shape=(x_train.shape[1],))]
feat_spec = {
plt.ylabel('Value')
plt.title('Metrics')
plt.close(fig)
return fig
if __name__ == "__main__":
min_samples_leaf = int(sys.argv[1])
max_depth = int(sys.argv[2])
tag = sys.argv[3] if len(sys.argv) > 3 else ""
dataset = datasets.load_iris()
print("MLflow Version:", version.VERSION)
print("MLflow Tracking URI:", mlflow.get_tracking_uri())
print("experiment_name:",experiment_name)
mlflow.set_experiment(experiment_name)
client = mlflow.tracking.MlflowClient()
experiment_id = client.get_experiment_by_name(experiment_name).experiment_id
print("experiment_id:",experiment_id)
source_name = os.path.basename(__file__)
print("source_name:",source_name)
with mlflow.start_run(source_name=source_name) as run:
run_id = run.info.run_uuid
print("run_id:",run_id)
train(min_samples_leaf, max_depth, dataset.data, dataset.target)
mlflow.set_tag("runner", tag)
mlflow.set_tag("mlflow_version", version.VERSION)
mlflow.set_tag("experiment_id", experiment_id)
mlflow.set_tag("experiment_name", experiment_name)
mlflow.set_tag("platform", platform.system())
def init_experiment(self, experiment_name, run_name=None, nested=True):
mlflow.set_tracking_uri(self.tracking_uri)
mlflow.set_experiment(experiment_name)
mlflow.start_run(run_name=run_name, nested=nested)