Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
----------
tree :
The tree structure
outfile :
The output file
encoders :
The encoders used to encode categorical features
is_spark :
If the tree was produced by Spark or not
"""
dot_data = StringIO()
export_graphviz(tree, encoders, out_file=dot_data, is_spark=is_spark)
graph = pydot.graph_from_dot_data(dot_data.getvalue())
graph.write_pdf(outfile)
P=to_pydot(G)
if root is not None :
P.set("root",make_str(root))
D=P.create_dot(prog=prog)
if D=="": # no data returned
print("Graphviz layout with %s failed"%(prog))
print()
print("To debug what happened try:")
print("P=pydot_from_networkx(G)")
print("P.write_dot(\"file.dot\")")
print("And then run %s on file.dot"%(prog))
return
Q=pydot.graph_from_dot_data(D)
node_pos={}
for n in G.nodes():
pydot_node = pydot.Node(make_str(n)).get_name().encode('utf-8')
node=Q.get_node(pydot_node)
if isinstance(node,list):
node=node[0]
pos=node.get_pos()[1:-1] # strip leading and trailing double quotes
if pos != None:
xx,yy=pos.split(",")
node_pos[n]=(float(xx),float(yy))
return node_pos
#cv accuracy for bagged tree ensemble
et_estimator1 = ensemble.ExtraTreesClassifier(n_estimators=5, max_features=4)
scores = model_selection.cross_val_score(et_estimator1, X_train, y_train, cv = 10)
print(scores.mean())
et_estimator1.fit(X_train, y_train)
et_estimator1.estimators_
#extracting all the trees build by random forest algorithm
n_tree = 0
for est in et_estimator1.estimators_:
dot_data = io.StringIO()
tmp = est.tree_
tree.export_graphviz(tmp, out_file = dot_data, feature_names = X_train.columns)
graph = pydot.graph_from_dot_data(dot_data.getvalue())[0]
graph.write_pdf("extratree" + str(n_tree) + ".pdf")
n_tree = n_tree + 1
train_data = shelve.open(train_sample)
metadata = shelve.open(pcap_metadata)
sample = train_data['res'][0]
labels = train_data['res'][1]
pcap_meta = metadata['res']
#train
clf = tree.DecisionTreeClassifier(criterion='entropy')
clf.fit(sample,labels)
#generate pdf
dot_data = StringIO()
tree.export_graphviz(clf,out_file=dot_data)
graph = pydot.graph_from_dot_data(dot_data.getvalue())
#classify
metadata_length = len(pcap_meta)
index = 0
tmp = []
def classifier(data,clf):
answer = clf.predict(data)
if answer[0] == 'TARGET':
return 'one'
else:
return 'next'
while index < metadata_length:
result = classifier(pcap_meta[index],clf)
if result == 'one':
clf.fit(matrix,vector)
dot_data = StringIO()
tree.export_graphviz(clf, out_file=dot_data,
feature_names=label,
class_names=['File2','File1'],
filled=True, rounded=True,
special_characters=True,
proportion=False,
impurity=True)
out_tree = dot_data.getvalue()
out_tree = out_tree.replace('True','Inactive').replace('False','Active').replace(' ≤ 0.5', '')
graph = pydot.graph_from_dot_data(str(out_tree))
try:
graph.write_jpg(output_name_tree)
except AttributeError:
graph = pydot.graph_from_dot_data(str(out_tree))[0]
graph.write_jpg(output_name_tree)
return
bag_tree_estimator1.fit(X_train, y_train)
#oob accuracy for bagged tree ensemble
bag_tree_estimator2 = ensemble.BaggingClassifier(dt_estimator, 5, oob_score=True)
bag_tree_estimator2.fit(X_train, y_train)
bag_tree_estimator2.oob_score_
bag_tree_estimator1.estimators_
#extracting all the trees build by random forest algorithm
n_tree = 0
for est in bag_tree_estimator1.estimators_:
dot_data = io.StringIO()
tmp = est.tree_
tree.export_graphviz(tmp, out_file = dot_data, feature_names = X_train.columns)
graph = pydot.graph_from_dot_data(dot_data.getvalue())[0]
graph.write_pdf("bagtree" + str(n_tree) + ".pdf")
n_tree = n_tree + 1
titanic_train1 = pd.get_dummies(titanic_train, columns=['Pclass', 'Sex', 'Embarked'])
titanic_train1.shape
titanic_train1.info()
titanic_train1.head(6)
X_train = titanic_train1.drop(['PassengerId','Age','Cabin','Ticket', 'Name','Survived'], 1)
y_train = titanic_train['Survived']
#build the decision tree model
dt = tree.DecisionTreeClassifier()
dt.fit(X_train,y_train)
#visualize the deciion tree
dot_data = io.StringIO()
tree.export_graphviz(dt, out_file = dot_data, feature_names = X_train.columns)
graph = pydot.graph_from_dot_data(dot_data.getvalue())[0]
graph.write_pdf("decisiont-tree.pdf")
#predict the outcome using decision tree
titanic_test = pd.read_csv("test.csv")
titanic_test.Fare[titanic_test['Fare'].isnull()] = titanic_test['Fare'].mean()
titanic_test1 = pd.get_dummies(titanic_test, columns=['Pclass', 'Sex', 'Embarked'])
titanic_test1.shape
titanic_test1.info()
titanic_test1.head(6)
X_test = titanic_test1.drop(['PassengerId','Age','Cabin','Ticket', 'Name'], 1)
titanic_test['Survived'] = dt.predict(X_test)
titanic_test.to_csv("submission.csv", columns=['PassengerId','Survived'], index=False)
print test_target
print clf.predict(test_data)
# viz code
from sklearn.externals.six import StringIO
import pydot
dot_data = StringIO()
tree.export_graphviz(clf,
out_file=dot_data,
feature_names=iris.feature_names,
class_names=iris.target_names,
filled=True, rounded=True,
impurity=False)
graph = pydot.graph_from_dot_data(dot_data.getvalue())
graph.write_pdf("iris.pdf")
#graph = pydot.graph_from_dot_data(dot_data.getvalue())
#graph[0].write_pdf("iris.pdf")
#import pydotplus
#...
#graph = pydotplus.graph_from_dot_data(dot_data.getvalue())
#graph.write_pdf("iris.pdf")
"""
from sklearn.externals.six import StringIO
import pydot
dot_data = StringIO()
import pydot
except ImportError :
error("module pydot not found")
buff = "digraph code {\n"
buff += "graph [bgcolor=white];\n"
buff += "node [color=lightgray, style=filled shape=box fontname=\"Courier\" fontsize=\"8\"];\n"
if raw == False :
buff += method2dot( mx )
else :
buff += raw
buff += "}"
d = pydot.graph_from_dot_data( buff )
if d :
getattr(d, "write_" + _format)( output )
def render_output_pydot(self, dotdata, **kwargs):
"""Renders the image using pydot"""
if not HAS_PYDOT:
raise CommandError("You need to install pydot python module")
graph = pydot.graph_from_dot_data(dotdata)
if not graph:
raise CommandError("pydot returned an error")
output_file = kwargs['outputfile']
formats = ['bmp', 'canon', 'cmap', 'cmapx', 'cmapx_np', 'dot', 'dia', 'emf',
'em', 'fplus', 'eps', 'fig', 'gd', 'gd2', 'gif', 'gv', 'imap',
'imap_np', 'ismap', 'jpe', 'jpeg', 'jpg', 'metafile', 'pdf',
'pic', 'plain', 'plain-ext', 'png', 'pov', 'ps', 'ps2', 'svg',
'svgz', 'tif', 'tiff', 'tk', 'vml', 'vmlz', 'vrml', 'wbmp', 'xdot']
ext = output_file[output_file.rfind('.') + 1:]
format = ext if ext in formats else 'raw'
graph.write(output_file, format=format)