Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
not_in_model))
else:
X = df[self._features].values # rectify feature order
y_pred = self._backend.predict(X)
df[target + " predicted"] = y_pred
self._logger.debug("Prediction finished successfully.")
return df
if __name__ == "__main__":
from matminer.datasets.dataset_retrieval import load_dataset
from automatminer.featurization import AutoFeaturizer
from automatminer.preprocessing import DataCleaner, FeatureReducer
# Load a dataset
df = load_dataset("elastic_tensor_2015").rename(
columns={"formula": "composition"})[["composition", "K_VRH"]]
testdf = df.iloc[501:550]
traindf = df.iloc[:100]
target = "K_VRH"
# Get top-lvel transformers
autofeater = AutoFeaturizer()
cleaner = DataCleaner()
reducer = FeatureReducer()
learner = TPOTAdaptor("regression", max_time_mins=5)
# Fit transformers on training data
traindf = autofeater.fit_transform(traindf, target)
traindf = cleaner.fit_transform(traindf, target)
traindf = reducer.fit_transform(traindf, target)
learner.fit(traindf, target)
"""
This file will eventually hold a function that tests a mslearn
pipeline on a set of datasets for predictive power.
"""
from matminer.datasets.dataset_retrieval import load_dataset, get_available_datasets
# from matminer.datasets.convenience_loaders import
if __name__ == "__main__":
df_piezo = load_dataset("piezoelectric_tensor")
df_exgap = load_dataset("expt_gap")
df_elastic = load_dataset("elastic_tensor_2015")
df_glass = load_dataset("glass_binary")
"""
from matminer.datasets.dataset_retrieval import load_dataset
from matminer.utils.io import store_dataframe_as_json
from matminer.featurizers.conversions import StrToComposition
from tqdm import tqdm
import pandas as pd
# pd.set_option('display.height', 1000)
pd.set_option("display.max_rows", 500)
pd.set_option("display.max_columns", 500)
pd.set_option("display.width", 1000)
df = load_dataset("glass_ternary_landolt")
df = df.rename(columns={"formula": "composition"})
df = df[["composition", "gfa"]]
df = StrToComposition(target_col_id="composition_obj").featurize_dataframe(
df, "composition"
)
df["composition"] = [c.reduced_formula for c in df["composition_obj"]]
df = df.drop(columns=["composition_obj"])
# print("Ground truth")
# print(df[df["composition"]=="ZrTi9"]) # should be False in final dataframe also!!
# print(df[df["composition"]=="ZrVCo8"]) # should be True in final dataframe also!
# print(df["gfa"].value_counts()) # proportion is about 5000 GFA 2054 no GFA
# raise ValueError
"""
This file will eventually hold a function that tests a mslearn
pipeline on a set of datasets for predictive power.
"""
from matminer.datasets.dataset_retrieval import load_dataset, get_available_datasets
# from matminer.datasets.convenience_loaders import
if __name__ == "__main__":
df_piezo = load_dataset("piezoelectric_tensor")
df_exgap = load_dataset("expt_gap")
df_elastic = load_dataset("elastic_tensor_2015")
df_glass = load_dataset("glass_binary")
"""
This file will eventually hold a function that tests a mslearn
pipeline on a set of datasets for predictive power.
"""
from matminer.datasets.dataset_retrieval import load_dataset, get_available_datasets
# from matminer.datasets.convenience_loaders import
if __name__ == "__main__":
df_piezo = load_dataset("piezoelectric_tensor")
df_exgap = load_dataset("expt_gap")
df_elastic = load_dataset("elastic_tensor_2015")
df_glass = load_dataset("glass_binary")
def test_mse_example(self):
df = load_dataset("elastic_tensor_2015")
default_config = get_preset_config("default")
pipe = MatPipe(**default_config)
df = df.rename(columns={"formula": "composition"})[["composition", "structure", "K_VRH"]]
predicted = pipe.benchmark(df, "K_VRH", test_spec=0.2)
self.assertTrue(not predicted.empty)
y_true = predicted["K_VRH"]
y_test = predicted["K_VRH predicted"]
mse = mean_squared_error(y_true, y_test)
print("MSE: " + str(mse))
self.assertTrue(mse < 500)
self.assertTrue(mse > 0)
"""
from matminer.datasets.dataset_retrieval import load_dataset
from matminer.utils.io import store_dataframe_as_json
from matminer.featurizers.conversions import StrToComposition
from tqdm import tqdm
import numpy as np
import pandas as pd
pd.set_option("display.max_rows", 500)
pd.set_option("display.max_columns", 500)
pd.set_option("display.width", 1000)
pd.set_option("precision", 8)
df = load_dataset("expt_gap")
df = df.rename(columns={"formula": "composition"})
# print("Ground Truth")
# print(df[df["composition"] == "ZrW2"]) # should be 0.00
# print(df[df["composition"] == "ZrSe2"]) # should be 2.00
# raise ValueError
excluded_compositions = []
# Prevent differences in order of formula symbols from corrupting the actual number of unique compositions
df = StrToComposition(target_col_id="composition_obj").featurize_dataframe(
df, "composition"
)
"""
from matminer.datasets.dataset_retrieval import load_dataset
from matminer.data_retrieval.retrieve_MP import MPDataRetrieval
import pandas as pd
# pd.set_option('display.height', 1000)
pd.set_option("display.max_rows", 500)
pd.set_option("display.max_columns", 500)
pd.set_option("display.width", 1000)
mpdr = MPDataRetrieval()
df = load_dataset("castelli_perovskites")
df = df[["structure", "e_form"]]
df = df.reset_index(drop=True)
print(df)
df.to_pickle("castelli.pickle.gz")
col.replace("_", "|")
.replace("-", "|")
.replace(" ", "||")
.replace("(", " ")
.replace(")", "")
)
colmap[col] = k
return colmap
if __name__ == "__main__":
# Just trying it out with a single dataset, Dielectric from MP...
for config in [DIELECTRIC]:
project = config["data_file"].replace(".json.gz", "")
df = load_dataset(project)
pinput = "structure" if "structure" in df.columns else "composition"
column_map_pretty = pretty_column_map(df.columns.tolist())
df = df.rename(columns=column_map_pretty)
target = column_map_pretty[config["target"]]
# print(pinput)
# raise ValueError
# print(df)
# raise ValueError
# clean up
has_more = True
while has_more:
resp = client.contributions.delete_entries(
project=project, _limit=250
raise ValueError("{} is an unknown learner name!"
"".format(self["learner_name"]))
# Set up the pipeline and data
pipe_config_dict = fw_spec["pipe_config"]
pipe_config = {"learner": learner(**pipe_config_dict["learner_kwargs"]),
"reducer": FeatureReducer(
**pipe_config_dict["reducer_kwargs"]),
"cleaner": DataCleaner(
**pipe_config_dict["cleaner_kwargs"]),
"autofeaturizer_kwargs":
AutoFeaturizer(
**pipe_config_dict["autofeaturizer_kwargs"])}
pipe = MatPipe(**pipe_config)
dataset = fw_spec["dataset"]
df = load_dataset(dataset)
df = df.rename(columns=REWRITE_COLS[dataset])[RELEVANT_COLS[dataset]]
target = TARGETS[dataset]
# Run the benchmark
t1 = time.time()
predicted_test_df = pipe.benchmark(df, target, test_spec=0.2)
elapsed_time = time.time() - t1
# Save everything
savedir = fw_spec["save_dir"]
pipe.save(os.path.join(savedir, "pipe.p"))
pipe.digest(os.path.join(savedir, "digest.txt"))
predicted_test_df.to_csv(os.path.join(savedir, "test_df.csv"))
pipe.post_fit_df.to_csv(os.path.join(savedir, "fitted_df.csv"))
# Evaluate model