Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if catValues:
catFeatValues = dict()
catFeatIndices = list()
items = catValues.split(",")
for item in items:
parts = item.split(":")
col = int(parts[0])
catFeatValues[col] = parts[1:]
catFeatIndices.append(col)
encoder = CatLabelGenerator(catFeatValues, ",")
for c in catFeatIndices:
values = encoder.getOrigLabels(c)
catFeatValues[c] = values
self.explainer = lime.lime_tabular.LimeTabularExplainer(trainFeatData, feature_names=featNames,\
categorical_features=catFeatIndices, categorical_names=catFeatValues, kernel_width=kernelWidth,\
verbose=verbose,class_names=classNames,feature_selection=featSelection,sample_around_instance=sampLocal)
def get_lime(request, pk): # TODO: changed self to request, check if correct or not
# get model
TARGET_MODEL = 1090
job = Job.objects.filter(pk=pk)[0]
model = joblib.load(job.predictive_model.model_path)
# load data
training_df, test_df = get_encoded_logs(job)
# get random point in evaluation set
EXPLANATION_TARGET = 1
# get the actual explanation
explainer = lime.lime_tabular.LimeTabularExplainer(
training_df.drop(['trace_id', 'label'], 1).as_matrix(),
feature_names=list(training_df.drop(['trace_id', 'label'], 1).columns.values),
categorical_features=[i for i in range(len(list(training_df.drop(['trace_id', 'label'], 1).columns.values)))],
verbose=True,
mode='classification',
)
exp = explainer.explain_instance(
test_df.drop(['trace_id', 'label'], 1).iloc[EXPLANATION_TARGET],
# TODO probably the opposite would be way less computationally intesive
model[0].predict_proba,
num_features=5
)
exp.as_list()
# show plot
exp.show_in_notebook(show_table=True)
raise ValueError(
'Indices given in the {} parameter '
'are not valid for the input data '
'array.'.format(categorical_indices_keyword))
init_params[categorical_indices_keyword] = np.array(
[data.dtype.names.index(y) for y in categorical_indices])
data = fuat.as_unstructured(data)
# Get a LIME tabular explainer
self.mode = init_params.get('mode', 'classification')
if self.mode not in ['classification', 'regression']:
raise ValueError("The mode must be either 'classification' or "
"'regression'. '{}' given.".format(self.mode))
self.tabular_explainer = lime.lime_tabular.LimeTabularExplainer(
data, **init_params)
# Check the model
self.model = model
self.model_is_probabilistic = False
if model is not None:
if fumv.check_model_functionality(
model, require_probabilities=True, suppress_warning=True):
self.model_is_probabilistic = True
elif fumv.check_model_functionality(
model, require_probabilities=False, suppress_warning=True):
self.model_is_probabilistic = False
logger.warning('The model can only be used for LIME in a '
'regressor mode.')
else:
raise IncompatibleModelError('LIME requires a model object to '
def lime_explain(input):
boston = datasets.load_boston()
categorical_features = np.argwhere(np.array([len(set(boston.data[:,x])) for x in range(boston.data.shape[1])]) <= 10).flatten()
explainer = lime.lime_tabular.LimeTabularExplainer(boston.data, feature_names=boston.feature_names, class_names=['price'], categorical_features=categorical_features, verbose=True, mode='regression')
exp = explainer.explain_instance(np.array(input), model.predict, num_features=5).as_list()
lime_feature_contributions = {}
for feature, contribution in exp:
feature_name = re.findall("[a-zA-Z]+", feature)[0]
lime_feature_contributions[f'LIME_{feature_name}'] = contribution
return lime_feature_contributions
#data_dict = ast.literal_eval(json.loads(flask.request.data))
print("try open model")
with open(flask.request.form.get("model_path"), 'rb') as f:
model = pickle.load(f)
train_data = json.loads(flask.request.form.get("data"))
dim = json.loads(flask.request.form.get("dim"))
train_data = np.asarray(train_data)
train_data = train_data.reshape(((int)(train_data.size/dim), dim))
sample = json.loads(flask.request.form.get("sample"))
num_features = int(request.args.get("numfeatures"))
explainer = lime_tabular.LimeTabularExplainer(train_data, mode="classification", discretize_continuous=True)
exp = explainer.explain_instance(np.asarray(sample), model.predict_proba, num_features=num_features, top_labels=1)
explanation_dictionary = {}
for entry in exp.as_list():
explanation_dictionary.update({entry[0]: entry[1]})
data["explanation"] = explanation_dictionary
data["success"] = "success"
return flask.Response(json.dumps(data), mimetype="text/plain")
def _init_explainer(df, features, columns, mode):
return lime.lime_tabular.LimeTabularExplainer(
df,
feature_names=features,
categorical_features=[i for i in range(len(columns))],
verbose=True,
mode=mode,
)
def __init__(self, *argv, **kwargs):
"""
Initialize lime Tabular Explainer object
"""
super(LimeTabularExplainer, self).__init__(*argv, **kwargs)
self.explainer = lime_tabular.LimeTabularExplainer(*argv, **kwargs)
# Load data
X_train, y_train, X_valid, y_valid, X_test, y_test, train_mean, train_stddev = load_normalize_data("../Datasets/" + dataset + ".csv")
n = X_test.shape[0]
d = X_test.shape[1]
# Load the noise scale parameters
#with open("Sigmas/" + dataset + ".json", "r") as tmp:
#scales = json.load(tmp)
scales = [0.1, 0.25]
scales_len = len(scales)
# Fit MAPLE model
exp_maple = MAPLE(X_train, y_train, X_valid, y_valid)
# Fit LIME to explain MAPLE
exp_lime = lime_tabular.LimeTabularExplainer(X_train, discretize_continuous=False, mode="regression")
# Evaluate model faithfullness on the test set
rmse = 0.0 #MAPLE accuracy on the dataset
lime_rmse = np.zeros((scales_len))
maple_rmse = np.zeros((scales_len))
for i in range(n):
x = X_test[i, :]
#LIME's default parameter for num_samples is 500
# 1) This is larger than any of the datasets we tested on
# 2) It makes explaining MAPLE impractically slow since the complexity of MAPLE's predict() depends on the dataset size
coefs_lime = unpack_coefs(exp_lime, x, exp_maple.predict, d, X_train, num_samples = 100)
e_maple = exp_maple.explain(x)
coefs_maple = e_maple["coefs"]
#data_dict = ast.literal_eval(json.loads(flask.request.data))
print("try open model")
with open(flask.request.form.get("model_path"), 'rb') as f:
model = pickle.load(f)
train_data = json.loads(flask.request.form.get("data"))
dim = json.loads(flask.request.form.get("dim"))
train_data = np.asarray(train_data)
train_data = train_data.reshape(((int)(train_data.size/dim), dim))
sample = json.loads(flask.request.form.get("sample"))
num_features = int(request.args.get("numfeatures"))
explainer = lime_tabular.LimeTabularExplainer(train_data, mode="classification", discretize_continuous=True)
exp = explainer.explain_instance(np.asarray(sample), model.predict_proba, num_features=num_features, top_labels=1)
explanation_dictionary = {}
for entry in exp.as_list():
explanation_dictionary.update({entry[0]: entry[1]})
data["explanation"] = explanation_dictionary
data["success"] = "success"
return flask.Response(json.dumps(data), mimetype="text/plain")
def __init__(self, model, data, mode="classification"):
self.model = model
assert mode in ["classification", "regression"]
self.mode = mode
if str(type(data)).endswith("pandas.core.frame.DataFrame'>"):
data = data.values
self.data = data
self.explainer = lime.lime_tabular.LimeTabularExplainer(data, mode=mode)
out = self.model(data[0:1])
if len(out.shape) == 1:
self.out_dim = 1
self.flat_out = True
if mode == "classification":
def pred(X): # assume that 1d outputs are probabilities
preds = self.model(X).reshape(-1, 1)
p0 = 1 - preds
return np.hstack((p0, preds))
self.model = pred
else:
self.out_dim = self.model(data[0:1]).shape[1]
self.flat_out = False