VMware Bangalore
June 18-20, 2018
Amit kapoor • Anand Chitipothu • Bargava Subramanian
Notes of this workshop are available online at:
https://bit.ly/vmware-ml
Home | Day 1 | Day 2 | Day 2 - Housing | Day 3
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
url = "https://notes.pipal.in/2018/vmware-ml/iris.csv"
df = pd.read_csv(url)
df.head()
Problem: Write a Python function that takes PetalLength and PetalWidth as arguments and predicts the type of the flower.
def predict0(PetalLength, PetalWidth):
return 'Iris-setosa'
def test(dataset, predict_function):
predicted = np.array([predict_function(x1, x2) for x1, x2 in
zip(dataset.PetalLength, dataset.PetalWidth)])
actual = dataset.Name
matched = np.sum(predicted == actual)
return matched / len(dataset)
test(df, predict0)
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier(max_depth=2)
X = df[['PetalLength', 'PetalWidth']]
y = df.Name
model.fit(X, y)
model.predict([[3, 4]])
def test_model(model):
def model_predict(PetalLength, PetalWidth):
row = [PetalLength, PetalWidth]
return model.predict([row])[0]
return test(df, model_predict)
test_model(model)
Install the modelvis library.
!pip install modelvis
import modelvis
modelvis.__version__
modelvis.render_tree(model,
feature_names=["PetalLength", "PetalWidth"],
class_names=["setosa", "versicolor", "virginica"])
model3 = DecisionTreeClassifier(max_depth=3)
model3.fit(X, y);
modelvis.render_tree(model3,
feature_names=["PetalLength", "PetalWidth"],
class_names=["setosa", "versicolor", "virginica"])
test_model(model3)
model5 = DecisionTreeClassifier(max_depth=5)
model5.fit(X, y);
test_model(model5)
modelvis.render_tree(model5,
feature_names=["PetalLength", "PetalWidth"],
class_names=["setosa", "versicolor", "virginica"])
print(modelvis.render_tree_as_code(model))
print(modelvis.render_tree_as_code(model3))
names = {'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2}
df['iname'] = df.Name.map(names.get)
y = df.iname
model = DecisionTreeClassifier(max_depth=3)
model.fit(X, y)
modelvis.plot_decision_boundaries(model, X, y,
show_input=True)
print(modelvis.render_tree_as_code(model))
model = DecisionTreeClassifier(max_depth=5)
model.fit(X, y)
modelvis.plot_decision_boundaries(model, X, y,
show_input=True)