Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
import unittest
import pytest
from dowhy.causal_estimator import CausalEstimator
class MockEstimator(CausalEstimator):
pass
def test_causal_estimator_placeholder_methods():
estimator = MockEstimator(None, None, [None], [None], None)
with pytest.raises(NotImplementedError):
estimator._estimate_effect()
with pytest.raises(NotImplementedError):
estimator._do(None)
with pytest.raises(NotImplementedError):
estimator.construct_symbolic_estimator(None)
class TestCausalEstimator(unittest.TestCase):
def setUp(self):
# self.df = pd.read_csv(os.path.join(DATA_PATH,'dgp_1/acic_1_1_data.csv'))
import numpy as np
from sklearn import linear_model
import pandas as pd
import itertools
from dowhy.causal_estimator import CausalEstimate
from dowhy.causal_estimator import CausalEstimator
import statsmodels.api as sm
class LinearRegressionEstimator(CausalEstimator):
"""Compute effect of treatment using linear regression.
Fits a regression model for estimating the outcome using treatment(s) and confounders. For a univariate treatment, the treatment effect is equivalent to the coefficient of the treatment variable.
Demo method to show the implementation of a causal inference method that can handle multiple treatments and heterogeneity in treatment. Requires a strong assumption that all relationships from (T, W) to Y are linear.
Common method but the assumptions required are too strong.
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.logger.debug("Back-door variables used:" +
",".join(self._target_estimand.backdoor_variables))
self._observed_common_causes_names = self._target_estimand.backdoor_variables
if len(self._observed_common_causes_names)>0:
from sklearn import linear_model
import pandas as pd
from dowhy.causal_estimator import CausalEstimate
from dowhy.causal_estimator import CausalEstimator
class PropensityScoreStratificationEstimator(CausalEstimator):
""" Estimate effect of treatment by stratifying the data into bins with
identical common causes.
Straightforward application of the back-door criterion.
"""
def __init__(self, *args, num_strata=50, clipping_threshold=10, **kwargs):
super().__init__(*args, **kwargs)
# Checking if treatment is one-dimensional
if len(self._treatment_name) > 1:
error_msg = str(self.__class__) + " cannot handle more than one treatment variable."
raise Exception(error_msg)
# Checking if treatment is binary
if not pd.api.types.is_bool_dtype(self._data[self._treatment_name[0]]):
error_msg = "Propensity Score Stratification method is only applicable for binary treatments. Try explictly setting dtype=bool for the treatment column."
raise Exception(error_msg)
def get_class_object(method_name, *args, **kwargs):
# from https://www.bnmetrics.com/blog/factory-pattern-in-python3-simple-version
try:
module_name = method_name
class_name = string.capwords(method_name, "_").replace('_', '')
estimator_module = import_module('.' + module_name, package="dowhy.causal_estimators")
estimator_class = getattr(estimator_module, class_name)
assert issubclass(estimator_class, CausalEstimator)
except (AttributeError, AssertionError, ImportError):
raise ImportError('{} is not an existing causal estimator.'.format(method_name))
return estimator_class
from sklearn import linear_model
from sklearn.neighbors import NearestNeighbors
import pandas as pd
from dowhy.causal_estimator import CausalEstimate
from dowhy.causal_estimator import CausalEstimator
class PropensityScoreMatchingEstimator(CausalEstimator):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# Checking if treatment is one-dimensional
if len(self._treatment_name) > 1:
error_msg = str(self.__class__) + " cannot handle more than one treatment variable."
raise Exception(error_msg)
# Checking if treatment is binary
if not pd.api.types.is_bool_dtype(self._data[self._treatment_name[0]]):
error_msg = "Propensity Score Matching method is only applicable for binary treatments. Try explictly setting dtype=bool for the treatment column."
raise Exception(error_msg)
self.logger.debug("Back-door variables used:" +
",".join(self._target_estimand.backdoor_variables))
self._observed_common_causes_names = self._target_estimand.backdoor_variables
if self._observed_common_causes_names:
import numpy as np
import sympy as sp
import sympy.stats as spstats
from statsmodels.sandbox.regression.gmm import IV2SLS
from dowhy.causal_estimator import CausalEstimate
from dowhy.causal_estimator import CausalEstimator
from dowhy.causal_estimator import RealizedEstimand
from dowhy.utils.api import parse_state
class InstrumentalVariableEstimator(CausalEstimator):
"""Compute effect of treatment using the instrumental variables method.
This is a superclass that is inherited by other specific methods.
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.logger.debug("Instrumental Variables used:" +
",".join(self._target_estimand.instrumental_variables))
# choosing the instrumental variable to use
if getattr(self, 'iv_instrument_name', None) is None:
self.estimating_instrument_names = self._target_estimand.instrumental_variables
else:
self.estimating_instrument_names = parse_state(self.iv_instrument_name)
import numpy as np
import pandas as pd
from sklearn import linear_model
from dowhy.causal_estimator import CausalEstimate
from dowhy.causal_estimator import CausalEstimator
class PropensityScoreWeightingEstimator(CausalEstimator):
""" Estimate effect of treatment by weighing the data by
inverse probability of occurrence.
Straightforward application of the back-door criterion.
"""
def __init__(self, *args, min_ps_score=0.05, max_ps_score=0.95, **kwargs):
super().__init__(*args, **kwargs)
# Checking if treatment is one-dimensional
if len(self._treatment_name) > 1:
error_msg = str(self.__class__) + " cannot handle more than one treatment variable."
raise Exception(error_msg)
# Checking if treatment is binary
if not pd.api.types.is_bool_dtype(self._data[self._treatment_name[0]]):
error_msg = "Propensity Score Weighting method is only applicable for binary treatments. Try explictly setting dtype=bool for the treatment column."
raise Exception(error_msg)
import numpy as np
import pandas as pd
from dowhy.causal_estimator import CausalEstimate
from dowhy.causal_estimator import CausalEstimator
from importlib import import_module
import econml
class EconmlCateEstimator(CausalEstimator):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.identifier_method = self._target_estimand.identifier_method
self.logger.debug("Back-door variables used:" +
",".join(self._target_estimand.backdoor_variables))
self._observed_common_causes_names = self._target_estimand.backdoor_variables
if self._observed_common_causes_names:
self._observed_common_causes = self._data[self._observed_common_causes_names]
self._observed_common_causes = pd.get_dummies(self._observed_common_causes, drop_first=True)
else:
self._observed_common_causes= None
error_msg ="No common causes/confounders present."
self.logger.error(error_msg)
raise Exception(error_msg)
import numpy as np
import pandas as pd
from dowhy.causal_estimator import CausalEstimator
from dowhy.causal_estimators.instrumental_variable_estimator import InstrumentalVariableEstimator
class RegressionDiscontinuityEstimator(CausalEstimator):
"""Compute effect of treatment using the regression discontinuity method.
Estimates effect by transforming the problem to an instrumental variables
problem.
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.logger.info("Using Regression Discontinuity Estimator")
self.rd_variable = self._data[self.rd_variable_name]
self.symbolic_estimator = self.construct_symbolic_estimator(self._target_estimand)
self.logger.info(self.symbolic_estimator)
def _estimate_effect(self):
upper_limit = self.rd_threshold_value + self.rd_bandwidth