Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
... data = pd.DataFrame(np.random.randint(0, 5, size=(5000, 9)), columns=list('ABCDEFGHI'))
>>> # add 10th dependent variable
... data['J'] = data['A'] * data['B']
>>> est = HillClimbSearch(data, scoring_method=BicScore(data))
>>> best_model = est.estimate()
>>> sorted(best_model.nodes())
['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J']
>>> best_model.edges()
[('B', 'J'), ('A', 'J')]
>>> # search a model with restriction on the number of parents:
>>> est.estimate(max_indegree=1).edges()
[('J', 'A'), ('B', 'J')]
"""
nodes = self.state_names.keys()
if start is None:
start = DAG()
start.add_nodes_from(nodes)
elif not isinstance(start, DAG) or not set(start.nodes()) == set(nodes):
raise ValueError(
"'start' should be a DAG with the same variables as the data set, or 'None'."
)
tabu_list = []
current_model = start
iter_no = 0
while iter_no <= max_iter:
iter_no += 1
best_score_delta = 0
best_operation = None
import numpy as np
import warnings
import itertools
from networkx.algorithms.dag import descendants
from pyparsing import OneOrMore, Word, Optional, Suppress, alphanums, nums
from pgmpy.base import DAG
from pgmpy.global_vars import HAS_PANDAS
if HAS_PANDAS:
import pandas as pd
class SEMGraph(DAG):
"""
Base class for graphical representation of Structural Equation Models(SEMs).
All variables are by default assumed to have an associated error latent variable, therefore
doesn't need to be specified.
Attributes
----------
latents: list
List of all the latent variables in the model except the error terms.
observed: list
List of all the observed variables in the model.
graph: nx.DirectedGraph
The graphical structure of the latent and observed variables except the error terms.
>>> # pdag_to_dag is static:
... pdag1 = DAG([('A', 'B'), ('C', 'B'), ('C', 'D'), ('D', 'C'), ('D', 'A'), ('A', 'D')])
>>> ConstraintBasedEstimator.pdag_to_dag(pdag1).edges()
[('D', 'C'), ('C', 'B'), ('A', 'B'), ('A', 'D')]
>>> # example of a pdag with no faithful extension:
... pdag2 = DAG([('A', 'B'), ('A', 'C'), ('B', 'C'), ('C', 'B')])
>>> ConstraintBasedEstimator.pdag_to_dag(pdag2).edges()
UserWarning: PDAG has no faithful extension (= no oriented DAG with the same v-structures as PDAG).
Remaining undirected PDAG edges oriented arbitrarily.
[('B', 'C'), ('A', 'B'), ('A', 'C')]
"""
pdag = pdag.copy()
dag = DAG()
dag.add_nodes_from(pdag.nodes())
# add already directed edges of pdag to dag
for X, Y in pdag.edges():
if not pdag.has_edge(Y, X):
dag.add_edge(X, Y)
while pdag.number_of_nodes() > 0:
# find node with (1) no directed outgoing edges and
# (2) the set of undirected neighbors is either empty or
# undirected neighbors + parents of X are a clique
found = False
for X in pdag.nodes():
directed_outgoing_edges = set(pdag.successors(X)) - set(
pdag.predecessors(X)
)
import pandas as pd
from tqdm import tqdm
from joblib import Parallel, delayed
from pgmpy.base import DAG
from pgmpy.factors.discrete import (
TabularCPD,
JointProbabilityDistribution,
DiscreteFactor,
)
from pgmpy.factors.continuous import ContinuousFactor
from pgmpy.independencies import Independencies
from pgmpy.models.MarkovModel import MarkovModel
class BayesianModel(DAG):
"""
Base class for bayesian model.
A models stores nodes and edges with conditional probability
distribution (cpd) and other attributes.
models hold directed edges. Self loops are not allowed neither
multiple (parallel) edges.
Nodes can be any hashable python object.
Edges are represented as links between nodes.
Parameters
----------
data : input graph
... data['J'] = data['A'] * data['B']
>>> est = HillClimbSearch(data, scoring_method=BicScore(data))
>>> best_model = est.estimate()
>>> sorted(best_model.nodes())
['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J']
>>> best_model.edges()
[('B', 'J'), ('A', 'J')]
>>> # search a model with restriction on the number of parents:
>>> est.estimate(max_indegree=1).edges()
[('J', 'A'), ('B', 'J')]
"""
nodes = self.state_names.keys()
if start is None:
start = DAG()
start.add_nodes_from(nodes)
elif not isinstance(start, DAG) or not set(start.nodes()) == set(nodes):
raise ValueError(
"'start' should be a DAG with the same variables as the data set, or 'None'."
)
tabu_list = []
current_model = start
iter_no = 0
while iter_no <= max_iter:
iter_no += 1
best_score_delta = 0
best_operation = None
for operation, score_delta in self._legal_operations(
current_model, tabu_list, max_indegree, black_list, white_list
def model_to_pdag(model):
"""Construct the DAG pattern (representing the I-equivalence class) for
a given DAG. This is the "inverse" to pdag_to_dag.
"""
if not isinstance(model, DAG):
raise TypeError(
"model: Expected DAG instance, "
+ "got type {model_type}".format(model_type=type(model))
)
skel, separating_sets = ConstraintBasedEstimator.build_skeleton(
model.nodes(), model.get_independencies()
)
pdag = ConstraintBasedEstimator.skeleton_to_pdag(skel, separating_sets)
return pdag
from itertools import combinations
from collections import defaultdict
import numpy as np
import networkx as nx
from pgmpy.factors.discrete import TabularCPD
from pgmpy.base import DAG
class DynamicBayesianNetwork(DAG):
def __init__(self, ebunch=None):
"""
Base class for Dynamic Bayesian Network
This is a time variant model of the static Bayesian model, where each
time-slice has some static nodes and is then replicated over a certain
time period.
The nodes can be any hashable python objects.
Parameters
----------
ebunch: Data to initialize graph. If data=None (default) an empty
graph is created. The data can be an edge list, or any NetworkX
graph object
>>> import numpy as np
>>> from pgmpy.estimators import ExhaustiveSearch
>>> # create random data sample with 3 variables, where B and C are identical:
>>> data = pd.DataFrame(np.random.randint(0, 5, size=(5000, 2)), columns=list('AB'))
>>> data['C'] = data['B']
>>> est = ExhaustiveSearch(data)
>>> best_model = est.estimate()
>>> best_model
>>> best_model.edges()
[('B', 'C')]
"""
best_dag = max(self.all_dags(), key=self.scoring_method.score)
best_model = DAG()
best_model.add_nodes_from(sorted(best_dag.nodes()))
best_model.add_edges_from(sorted(best_dag.edges()))
return best_model