How to use the pgmpy.inference.VariableElimination function in pgmpy

To help you get started, we’ve selected a few pgmpy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github koushalkh / ML-LAB / prog / 7.py View on Github external
del heartDisease['thal']
del heartDisease['oldpeak']
heartDisease = heartDisease.replace('?', np.nan)
heartDisease.dtypes
print(heartDisease.dtypes)
heartDisease.columns
model = BayesianModel([('age', 'trestbps'), ('age', 'fbs'), ('sex', 'trestbps'), ('sex', 'trestbps'),
                       ('exang', 'trestbps'), ('trestbps',
                                               'heartdisease'), ('fbs', 'heartdisease'),
                       ('heartdisease', 'restecg'), ('heartdisease', 'thalach'), ('heartdisease', 'chol')])
model.fit(heartDisease, estimator=MaximumLikelihoodEstimator)
print(model.get_cpds('age'))
print(model.get_cpds('chol'))
print(model.get_cpds('sex'))
model.get_independencies()
HeartDisease_infer = VariableElimination(model)
q = HeartDisease_infer.query(variables=['heartdisease'], evidence={'age': 28})
print(q['heartdisease'])
q = HeartDisease_infer.query(
    variables=['heartdisease'], evidence={'chol': 100})
print(q['heartdisease'])
github probml / pyprobml / Old / examples / asia-pgm-demo.py View on Github external
#!wget http://www.bnlearn.com/bnrepository/asia/asia.bif.gz
#!gzip -qd asia.bif.gz | rm asia.bif.gz

from pgmpy.readwrite import BIFReader
reader = BIFReader('data/asia.bif')
asia_model = reader.get_model()

asia_model.nodes()

asia_model.edges()

CPDs = asia_model.get_cpds()

# Doing exact inference using Variable Elimination
from pgmpy.inference import VariableElimination
asia_infer = VariableElimination(asia_model)

# Computing the probability of bronc given smoke.
q = asia_infer.query(variables=['bronc'], evidence={'smoke': 0})
print(q['bronc'])

'''
Sanity check.
 p(A=t|T=t) = p(A=t) p(T=t|A=t) / [
    p(A=t) p(T=t|A=t)  + p(A=f) p(T=t|A=f)]
= 0.01 * 0.05 / (0.01 * 0.05 + 0.99 * 0.01)
= 0.0481
'''
# 0 = True. 1 = False
q = asia_infer.query(variables=['asia'], evidence={'tub': 0})
print(q['asia'])
github probml / pyprobml / scripts / student_pgm_inf_autodiff.py View on Github external
evidence_card=[2, 2])

cpd_l = TabularCPD(variable='L', variable_card=2, 
                   values=paramsL,
                   evidence=['G'],
                   evidence_card=[3])

cpd_s = TabularCPD(variable='S', variable_card=2,
                   values=paramsS,
                   evidence=['I'],
                   evidence_card=[2])


model.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s)
model.check_model()
inf_engine_ve = VariableElimination(model) # compute elim order only once

def infer_pgmpy(evidence, query):
    factor = inf_engine_ve.query([query], evidence=evidence) [query]
    marginal = factor.values # convert from DiscreteFactor to np array
    return marginal

## Check both inference engines give same posterior marginals 
    
evlist = []
evlist.append({})
evlist.append({'G': 0, 'D': 0})
evlist.append({'L': 0, 'D': 1, 'S': 1})
for evidence in evlist:
    all_nodes = set(dag.keys())
    vis_nodes = set(evidence.keys())
    hid_nodes = all_nodes.difference(vis_nodes)
github probml / pyprobml / scripts / student_pgm.py View on Github external
cpd_s = TabularCPD(variable='S', variable_card=2,
                   values=[[0.95, 0.2],
                           [0.05, 0.8]],
                   evidence=['I'],
                   evidence_card=[2])

# Associating the CPDs with the network
model.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s)

# check_model checks for the network structure and CPDs and verifies that the CPDs are correctly 
# defined and sum to 1.
model.check_model()

from pgmpy.inference import VariableElimination
infer = VariableElimination(model)

# p(I=1)=0.3
print(infer.query(['I']) ['I'])




# P(I=1|G=0) = 0.6133
print(infer.query(['I'], evidence={'G': 0}) ['I'])

# P(I=1|G=0,D=0) = 0.5625
print(infer.query(['I'], evidence={'G': 0, 'D': 0}) ['I'])

# P(S=1|G=0) = 0.5099
print(infer.query(['S'], evidence={'G': 0}) ['S'])
github probml / pyprobml / Old / examples / student-pgm.py View on Github external
cpd_s = TabularCPD(variable='S', variable_card=2,
                   values=[[0.95, 0.2],
                           [0.05, 0.8]],
                   evidence=['I'],
                   evidence_card=[2])

# Associating the CPDs with the network
model.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s)

# check_model checks for the network structure and CPDs and verifies that the CPDs are correctly 
# defined and sum to 1.
model.check_model()

from pgmpy.inference import VariableElimination
infer = VariableElimination(model)

# p(I=1)=0.3
print(infer.query(['I']) ['I'])




# P(I=1|G=0) = 0.6133
print(infer.query(['I'], evidence={'G': 0}) ['I'])

# P(I=1|G=0,D=0) = 0.5625
print(infer.query(['I'], evidence={'G': 0, 'D': 0}) ['I'])

# P(S=1|G=0) = 0.5099
print(infer.query(['S'], evidence={'G': 0}) ['S'])
github pgmpy / pgmpy / pgmpy / models / BayesianModel.py View on Github external
"""
        from pgmpy.inference import VariableElimination

        if set(data.columns) == set(self.nodes()):
            raise ValueError("No variable missing in data. Nothing to predict")

        elif set(data.columns) - set(self.nodes()):
            raise ValueError("Data has variables which are not in the model")

        data_unique = data.drop_duplicates()
        missing_variables = set(self.nodes()) - set(data_unique.columns)
        #         pred_values = defaultdict(list)
        pred_values = []

        # Send state_names dict from one of the estimated CPDs to the inference class.
        model_inference = VariableElimination(self)
        pred_values = Parallel(n_jobs=n_jobs)(
            delayed(model_inference.map_query)(
                variables=missing_variables,
                evidence=data_point.to_dict(),
                show_progress=False,
            )
            for index, data_point in tqdm(
                data_unique.iterrows(), total=data_unique.shape[0]
            )
        )

        df_results = pd.DataFrame(pred_values, index=data_unique.index)
        data_with_results = pd.concat([data_unique, df_results], axis=1)
        return data.merge(data_with_results, how="left").loc[:, missing_variables]
github probml / pyprobml / scripts / sprinkler_dgm.py View on Github external
cpd_w = TabularCPD(variable='W', variable_card=2, 
                   values=[[1.0, 0.1, 0.1, 0.01],
                           [0.0, 0.9, 0.9, 0.99]],
                  evidence=['S', 'R'],
                  evidence_card=[2, 2])

# Associating the CPDs with the network
model.add_cpds(cpd_c, cpd_s, cpd_r, cpd_w)

# check_model checks for the network structure and CPDs and verifies that the CPDs are correctly 
# defined and sum to 1.
model.check_model()

from pgmpy.inference import VariableElimination
infer = VariableElimination(model)

# p(R=1)= 0.5*0.2 + 0.5*0.8 = 0.5
print(infer.query(['R']) ['R'])

# P(R=1|W=1) = 0.7079
print(infer.query(['R'], evidence={'W': 1}) ['R'])


# P(R=1|W=1,S=1) = 0.3204
print(infer.query(['R'], evidence={'W': 1, 'S': 1}) ['R'])
github pgmpy / pgmpy / pgmpy / models / BayesianModel.py View on Github external
97  0.417124    0.582876
        98  0.488275    0.511725
        99  0.407978    0.592022
        """
        from pgmpy.inference import VariableElimination

        if set(data.columns) == set(self.nodes()):
            raise ValueError("No variable missing in data. Nothing to predict")

        elif set(data.columns) - set(self.nodes()):
            raise ValueError("Data has variables which are not in the model")

        missing_variables = set(self.nodes()) - set(data.columns)
        pred_values = defaultdict(list)

        model_inference = VariableElimination(self)
        for index, data_point in data.iterrows():
            full_distribution = model_inference.query(
                variables=missing_variables,
                evidence=data_point.to_dict(),
                show_progress=False,
            )
            states_dict = {}
            for var in missing_variables:
                states_dict[var] = full_distribution.marginalize(
                    missing_variables - {var}, inplace=False
                )
            for k, v in states_dict.items():
                for l in range(len(v.values)):
                    state = self.get_cpds(k).state_names[k][l]
                    pred_values[k + "_" + str(state)].append(v.values[l])
        return pd.DataFrame(pred_values, index=data.index)