Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
import pandas as pd
import numpy as np
import csv
from sklearn.metrics import accuracy_score
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import LabelEncoder
from azureml.logging import get_azureml_logger
# initialize the logger
run_logger = get_azureml_logger()
run_logger.log('amlrealworld.ChurnPrediction.CATelcoCustomerChurnModeling','true')
with Package.open_package('CATelcoCustomerChurnTrainingSample.dprep') as pkg:
df = pkg.dataflows[0].get_dataframe(spark=False)
columns_to_encode = list(df.select_dtypes(include=['category','object']))
for column_to_encode in columns_to_encode:
dummies = pd.get_dummies(df[column_to_encode])
one_hot_col_names = []
for col_name in list(dummies.columns):
one_hot_col_names.append(column_to_encode + '_' + col_name)
dummies.columns = one_hot_col_names
df = df.drop(column_to_encode, axis=1)
df = df.join(dummies)
model = GaussianNB()
random_seed = 42
train, test = train_test_split(df, random_state = random_seed, test_size = 0.3)