Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def reconstruct(self, test_data, reverse_transform=False):
"""
Reconstruct the training data from the model and impute all missing values.
:param H2OFrame test_data: The dataset upon which the model was trained.
:param bool reverse_transform: Whether the transformation of the training data during model-building
should be reversed on the reconstructed frame.
:returns: the approximate reconstruction of the training data.
"""
if test_data is None or test_data.nrow == 0: raise ValueError("Must specify test data")
j = h2o.api("POST /3/Predictions/models/%s/frames/%s" % (self.model_id, test_data.frame_id),
data={"reconstruct_train": True, "reverse_transform": reverse_transform})
return h2o.get_frame(j["model_metrics"][0]["predictions"]["frame_id"]["name"])
def _model_build(x, y, tframe, vframe, algo, kwargs):
kwargs['training_frame'] = tframe
if vframe is not None: kwargs["validation_frame"] = vframe
if y is not None: kwargs['response_column'] = tframe[y].names[0]
kwargs = dict(
[(k, (kwargs[k]._frame()).frame_id if isinstance(kwargs[k], H2OFrame) else kwargs[k]) for k in kwargs if
kwargs[k] is not None]) # gruesome one-liner
rest_ver = kwargs.pop("_rest_version") if "_rest_version" in kwargs else 3
future_model = H2OModelFuture(
H2OJob(h2o.api("POST /%d/ModelBuilders/%s" % (rest_ver, algo), data=kwargs), job_type=(algo + " Model Build")),
x)
return _resolve_model(future_model, _rest_version=rest_ver, **kwargs)
def show_status(self, detailed=False):
"""
Print current cluster status information.
:param detailed: if True, then also print detailed information about each node.
"""
if self._retrieved_at + self.REFRESH_INTERVAL < time.time():
# Info is stale, need to refresh
new_info = h2o.api("GET /3/Cloud")
self._fill_from_h2ocluster(new_info)
ncpus = sum(node["num_cpus"] for node in self.nodes)
allowed_cpus = sum(node["cpus_allowed"] for node in self.nodes)
free_mem = sum(node["free_mem"] for node in self.nodes)
unhealthy_nodes = sum(not node["healthy"] for node in self.nodes)
status = "locked" if self.locked else "accepting new members"
if unhealthy_nodes == 0:
status += ", healthy"
else:
status += ", %d nodes are not healthy" % unhealthy_nodes
api_extensions = self.list_api_extensions()
H2ODisplay([
["H2O cluster uptime:", get_human_readable_time(self.cloud_uptime_millis)],
["H2O cluster timezone:", self.cloud_internal_timezone],
["H2O data parsing timezone:", self.datafile_parser_timezone],
["H2O cluster version:", self.version],
>>> train = cars[r > .2]
>>> valid = cars[r <= .2]
>>> response_col = "cylinders"
>>> distribution = "multinomial"
>>> predictors = ["displacement","power","weight","acceleration","year"]
>>> gbm = H2OGradientBoostingEstimator(nfolds=3,
... distribution=distribution)
>>> gbm.train(x=predictors,
... y=response_col,
... training_frame=train,
... validation_frame=valid)
>>> confusion_matrix = gbm.confusion_matrix(train)
>>> confusion_matrix
"""
assert_is_type(data, H2OFrame)
j = h2o.api("POST /3/Predictions/models/%s/frames/%s" % (self._id, data.frame_id))
return j["model_metrics"][0]["cm"]["table"]
if(secret_access_key is None):
raise H2OValueError("Secret access key must be specified")
if(not secret_key_id):
raise H2OValueError("Secret key ID must not be empty")
if(not secret_access_key):
raise H2OValueError("Secret access key must not be empty")
params = {"secret_key_id": secret_key_id,
"secret_access_key": secret_access_key
}
h2o.api(endpoint="POST /3/PersistS3", data=params)
print("Credentials successfully set.")
assert_is_type(figsize, (int, int))
# Check cols specified exist in frame data
for xi in cols:
if xi not in data.names:
raise H2OValueError("Column %s does not exist in the training frame" % xi)
kwargs = {}
kwargs["cols"] = cols
kwargs["model_id"] = self.model_id
kwargs["frame_id"] = data.frame_id
kwargs["nbins"] = nbins
kwargs["destination_key"] = destination_key
json = H2OJob(h2o.api("POST /3/PartialDependence/", data=kwargs), job_type="PartialDependencePlot").poll()
json = h2o.api("GET /3/PartialDependence/%s" % json.dest_key)
# Extract partial dependence data from json response
pps = json["partial_dependence_data"]
# Plot partial dependence plots using matplotlib
if plot:
plt = _get_matplotlib_pyplot(server)
if not plt: return
fig, axs = plt.subplots(len(cols), squeeze=False, figsize=figsize)
for i, pp in enumerate(pps):
# Check weather column was categorical or numeric
col = cols[i]
cat = data[col].isfactor()[0]
upper = [a + b for a, b in zip(pp[1], pp[2]) ]
lower = [a - b for a, b in zip(pp[1], pp[2]) ]
def proj_archetypes(self, test_data, reverse_transform=False):
"""
Convert archetypes of the model into original feature space.
:param H2OFrame test_data: The dataset upon which the model was trained.
:param bool reverse_transform: Whether the transformation of the training data during model-building
should be reversed on the projected archetypes.
:returns: model archetypes projected back into the original training data's feature space.
"""
if test_data is None or test_data.nrow == 0: raise ValueError("Must specify test data")
j = h2o.api("POST /3/Predictions/models/%s/frames/%s" % (self.model_id, test_data.frame_id),
data={"project_archetypes": True, "reverse_transform": reverse_transform})
return h2o.get_frame(j["model_metrics"][0]["predictions"]["frame_id"]["name"])
def getGLMRegularizationPath(model):
"""
Extract full regularization path explored during lambda search from glm model.
:param model: source lambda search model
"""
x = h2o.api("GET /3/GetGLMRegPath", data={"model": model._model_json["model_id"]["name"]})
ns = x.pop("coefficient_names")
res = {
"lambdas": x["lambdas"],
"explained_deviance_train": x["explained_deviance_train"],
"explained_deviance_valid": x["explained_deviance_valid"],
"coefficients": [dict(zip(ns, y)) for y in x["coefficients"]],
}
if "coefficients_std" in x:
res["coefficients_std"] = [dict(zip(ns, y)) for y in x["coefficients_std"]]
return res
def fill(self, rows=10):
assert self._id is not None
if self._data is not None:
if rows <= len(self):
return
res = h2o.api("GET /3/Frames/%s" % self._id, data={"row_count": rows})["frames"][0]
self._l = rows
self._nrows = res["rows"]
self._ncols = res["total_column_count"]
self._names = [c["label"] for c in res["columns"]]
self._types = dict(zip(self._names, [c["type"] for c in res["columns"]]))
self._fill_data(res)