How to use the matminer.featurizers.conversions.StrToComposition function in matminer

To help you get started, we’ve selected a few matminer examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github hackingmaterials / automatminer / automatminer / featurization / core.py View on Github external
Returns:
            df (pandas.DataFrame): DataFrame with featurizer_type column
                ready for featurization.
        """
        # todo: Make the following conversions more robust (no [0] type checking)
        type_tester = df[featurizer_type].iloc[0]

        if featurizer_type == self.composition_col:
            # Convert formulas to composition objects
            if isinstance(type_tester, str):
                logger.info(
                    self._log_prefix
                    + "Compositions detected as strings. Attempting "
                    "conversion to Composition objects..."
                )
                stc = StrToComposition(
                    overwrite_data=True, target_col_id=featurizer_type
                )
                stc.set_n_jobs(self.n_jobs)
                df = stc.featurize_dataframe(
                    df,
                    featurizer_type,
                    multiindex=self.multiindex,
                    ignore_errors=True,
                    inplace=False,
                )

            elif isinstance(type_tester, dict):
                logger.info(
                    self._log_prefix + "Compositions detected as dicts. Attempting "
                    "conversion to Composition objects..."
                )
github hackingmaterials / automatminer / automatminer_dev / matbench / expt_gap.py View on Github external
df = load_dataset("expt_gap")
df = df.rename(columns={"formula": "composition"})


# print("Ground Truth")
# print(df[df["composition"] == "ZrW2"])  # should be 0.00
# print(df[df["composition"] == "ZrSe2"]) # should be 2.00
# raise ValueError


excluded_compositions = []


# Prevent differences in order of formula symbols from corrupting the actual number of unique compositions
df = StrToComposition(target_col_id="composition_obj").featurize_dataframe(
    df, "composition"
)
df["composition"] = [c.reduced_formula for c in df["composition_obj"]]
df = df.drop(columns=["composition_obj"])

unique = df["composition"].unique()
print("Number of unique compositions:", len(unique))
# raise ValueError

new_df_dict = {"composition": [], "gap expt": []}
for c in tqdm(unique):
    df_per_comp_gaps = df[df["composition"] == c]
    per_comp_gaps = df_per_comp_gaps["gap expt"]
    measurement_range = max(per_comp_gaps) - min(per_comp_gaps)
    if measurement_range > 0.1:
        # print(df_per_comp_gaps)
github hackingmaterials / automatminer / automatminer_dev / matbench / expt_is_metal.py View on Github external
pd.set_option("display.max_columns", 500)
pd.set_option("display.width", 1000)

df = load_dataset("expt_gap")
df = df.rename(columns={"formula": "composition"})
print(df)
df["is_metal"] = df["gap expt"] == 0
df = df.drop(columns=["gap expt"])

# print("Ground truth")
# print(df[df["composition"]=="ZrSe3"]) # should be False in final dataframe also
# print(df[df["composition"]=="ZrW2"]) # should be True in final dataframe also
# print(df["is_metal"].value_counts())   # proportion is about 2500 metals to 4k nonmetals
# raise ValueError

df = StrToComposition(target_col_id="composition_obj").featurize_dataframe(
    df, "composition"
)
df["composition"] = [c.reduced_formula for c in df["composition_obj"]]
df = df.drop(columns=["composition_obj"])

unique = df["composition"].unique()
print("Number of unique compositions:", len(unique))

problem_compositions = []
new_df_dict = {"composition": [], "is_metal": []}
for c in tqdm(unique):
    df_per_comp_is_metal = df[df["composition"] == c]
    per_comp_is_metal = df_per_comp_is_metal["is_metal"]
    any_metals = any(per_comp_is_metal)
    all_metals = any(per_comp_is_metal)
    is_metal = None
github hackingmaterials / automatminer / automatminer_dev / matbench / glass.py View on Github external
from tqdm import tqdm

import pandas as pd

# pd.set_option('display.height', 1000)
pd.set_option("display.max_rows", 500)
pd.set_option("display.max_columns", 500)
pd.set_option("display.width", 1000)


df = load_dataset("glass_ternary_landolt")

df = df.rename(columns={"formula": "composition"})
df = df[["composition", "gfa"]]

df = StrToComposition(target_col_id="composition_obj").featurize_dataframe(
    df, "composition"
)
df["composition"] = [c.reduced_formula for c in df["composition_obj"]]
df = df.drop(columns=["composition_obj"])

# print("Ground truth")
# print(df[df["composition"]=="ZrTi9"])  # should be False in final dataframe also!!
# print(df[df["composition"]=="ZrVCo8"]) # should be True in final dataframe also!
# print(df["gfa"].value_counts())    # proportion is about 5000 GFA 2054 no GFA
# raise ValueError

unique = df["composition"].unique()
print(len(df))
print(len(unique))

problem_compositions = []