Practical Machine Learning

03 - Cars

Notes: https://notes.pipal.in/2018/vmware-ml2/

1 - Introduction | Freight Optimization | Cars

Frame¶

Predict whether a cars is Hatchback or Sedan?

- y: type
- X: price, kmpl, bhp, brand

Acquire¶

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')

df = pd.read_csv("https://notes.pipal.in/2018/vmware-ml2/cars_small.csv")

df.head()

Transform (Pre-Processing)¶

Preprocessing y¶

from sklearn.preprocessing import LabelEncoder

# Instantiate the encoder
le = LabelEncoder()

# Fit the encoder
le.fit(df.type)

LabelEncoder()

le.classes_

array(['Hatchback', 'Sedan'], dtype=object)

# Transform the data
y = le.transform(df.type)

Preprocessing X¶

from sklearn.preprocessing import StandardScaler

# Instatiate a scale
sc = StandardScaler()

sc.fit(df[["price", "kmpl"]])

StandardScaler(copy=True, with_mean=True, with_std=True)

X = sc.transform(df[["price", "kmpl"]])

Explore¶

df.plot(kind="scatter", x = "kmpl", y = "price", c=y, cmap="viridis");

plt.scatter(x = X[:,0], y = X[:,1], c=y, cmap="plasma");

plt.scatter(y = y,x  = X[:,0])

<matplotlib.collections.PathCollection at 0x10cbfa5f8>

Model¶

from sklearn.tree import DecisionTreeClassifier

# Instatiate a model
tree = DecisionTreeClassifier(max_depth=1)

# Fit the model
tree.fit(X,y)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=1,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')

#!pip import modelvis

import modelvis

modelvis.plot_decision_boundaries(tree, pd.DataFrame(X), y, show_input=True, probability=True)

modelvis.render_tree(tree)

tree.predict(X)

array([0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0,
       1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1])

tree.predict_proba(X)

array([[ 1.       ,  0.       ],
       [ 0.2173913,  0.7826087],
       [ 1.       ,  0.       ],
       [ 1.       ,  0.       ],
       [ 0.2173913,  0.7826087],
       [ 0.2173913,  0.7826087],
       [ 0.2173913,  0.7826087],
       [ 0.2173913,  0.7826087],
       [ 1.       ,  0.       ],
       [ 0.2173913,  0.7826087],
       [ 1.       ,  0.       ],
       [ 1.       ,  0.       ],
       [ 1.       ,  0.       ],
       [ 0.2173913,  0.7826087],
       [ 0.2173913,  0.7826087],
       [ 0.2173913,  0.7826087],
       [ 1.       ,  0.       ],
       [ 1.       ,  0.       ],
       [ 1.       ,  0.       ],
       [ 0.2173913,  0.7826087],
       [ 1.       ,  0.       ],
       [ 1.       ,  0.       ],
       [ 1.       ,  0.       ],
       [ 0.2173913,  0.7826087],
       [ 0.2173913,  0.7826087],
       [ 1.       ,  0.       ],
       [ 1.       ,  0.       ],
       [ 1.       ,  0.       ],
       [ 0.2173913,  0.7826087],
       [ 1.       ,  0.       ],
       [ 0.2173913,  0.7826087],
       [ 0.2173913,  0.7826087],
       [ 0.2173913,  0.7826087],
       [ 0.2173913,  0.7826087],
       [ 0.2173913,  0.7826087],
       [ 1.       ,  0.       ],
       [ 0.2173913,  0.7826087],
       [ 0.2173913,  0.7826087],
       [ 0.2173913,  0.7826087],
       [ 0.2173913,  0.7826087],
       [ 1.       ,  0.       ],
       [ 0.2173913,  0.7826087]])

print(modelvis.render_tree_as_code(tree))

def predict(row):
    """Your decision-tree model wrote this code."""
    # 42 samples; value=[24, 18]; class=0
    if row[0] < -0.1565473973751068:
        # 19 samples; value=[19, 0]; class=0
        return 0
    else:
        # 23 samples; value=[5, 18]; class=1
        return 1

Full Model¶

X_raw = df[["price", "kmpl", "bhp"]]
y_raw = df["type"]

# Transformation X_raw
from sklearn.preprocessing import StandardScaler
scaleX = StandardScaler()
scaleX.fit(X_raw)
X = scaleX.transform(X_raw)

# Transform y
from sklearn.preprocessing import LabelEncoder
labelY = LabelEncoder()
labelY.fit(y_raw)
y = labelY.transform(y_raw)

# Model Creation
from sklearn.tree import DecisionTreeClassifier
modelTree = DecisionTreeClassifier()
modelTree.fit(X,y)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')

# Model Validation
from sklearn.model_selection import cross_val_score
score = cross_val_score(modelTree, X, y, cv=6, scoring="accuracy")
np.mean(score)

0.80952380952380965

# Model Tuning
from sklearn.model_selection import GridSearchCV
parameters = {"max_depth" : [1,2,3,4,5,6,7]}
clf = GridSearchCV(modelTree, parameters, return_train_score=True)
clf.fit(X,y)
clf.best_params_

{'max_depth': 1}

model = clf.best_estimator_
model

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=1,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')

Saving the Model¶

import joblib

joblib.dump(model, "model.pkl")
joblib.dump(scaleX, "scalex.pkl")
joblib.dump(labelY, "labely.pkl")

['labely.pkl']

The model can now be loaded from the model.pkl file any time.

model2 = joblib.load("model.pkl")
scaleX2 = joblib.load("scalex.pkl")
labely2 = joblib.load("labely.pkl")

model2.predict(X[:10])

array([0, 1, 0, 0, 1, 1, 1, 1, 0, 1])

df.head()

def predict(price, kmpl, bhp):
    X = scaleX2.transform([[price, kmpl, bhp]])
    y = model2.predict(X)
    return labely2.inverse_transform(y)[0]

predict(550, 18.2, 82)

'Sedan'

Running Model as API¶

Install firefly using:

pip install firefly-python

And run the predict function in cars.py as an API using:

firefly cars.predict

!pip install firefly-python

import firefly
cars_api = firefly.Client("http://127.0.0.1:8000/")

cars_api.predict(price=550, kmpl=18.2, bhp=82)

'Sedan'

help(cars_api.predict)

Help on function predict in module firefly.client:

predict(*args, **kwargs)
    Predicts the model of a car using its price, milage and horse power.

cars_api.square(x=10)

100

	brand	model	price	kmpl	bhp	type
0	Chevrolet	Beat	421	18.6	79	Hatchback
1	Chevrolet	Sail	551	18.2	82	Sedan
2	Chevrolet	Sail Hatchback	468	18.2	82	Hatchback
3	Chevrolet	Spark	345	16.2	62	Hatchback
4	Fiat	Linea Classic	612	14.9	89	Sedan

	brand	model	price	kmpl	bhp	type
0	Chevrolet	Beat	421	18.6	79	Hatchback
1	Chevrolet	Sail	551	18.2	82	Sedan
2	Chevrolet	Sail Hatchback	468	18.2	82	Hatchback
3	Chevrolet	Spark	345	16.2	62	Hatchback
4	Fiat	Linea Classic	612	14.9	89	Sedan