Notes: https://notes.pipal.in/2018/vmware-ml2/
1 - Introduction | Freight Optimization | Cars
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')
df = pd.read_csv("https://notes.pipal.in/2018/vmware-ml2/cars_small.csv")
df.head()
from sklearn.preprocessing import LabelEncoder
# Instantiate the encoder
le = LabelEncoder()
# Fit the encoder
le.fit(df.type)
le.classes_
# Transform the data
y = le.transform(df.type)
from sklearn.preprocessing import StandardScaler
# Instatiate a scale
sc = StandardScaler()
sc.fit(df[["price", "kmpl"]])
X = sc.transform(df[["price", "kmpl"]])
df.plot(kind="scatter", x = "kmpl", y = "price", c=y, cmap="viridis");
plt.scatter(x = X[:,0], y = X[:,1], c=y, cmap="plasma");
plt.scatter(y = y,x = X[:,0])
from sklearn.tree import DecisionTreeClassifier
# Instatiate a model
tree = DecisionTreeClassifier(max_depth=1)
# Fit the model
tree.fit(X,y)
#!pip import modelvis
import modelvis
modelvis.plot_decision_boundaries(tree, pd.DataFrame(X), y, show_input=True, probability=True)
modelvis.render_tree(tree)
tree.predict(X)
tree.predict_proba(X)
print(modelvis.render_tree_as_code(tree))
X_raw = df[["price", "kmpl", "bhp"]]
y_raw = df["type"]
# Transformation X_raw
from sklearn.preprocessing import StandardScaler
scaleX = StandardScaler()
scaleX.fit(X_raw)
X = scaleX.transform(X_raw)
# Transform y
from sklearn.preprocessing import LabelEncoder
labelY = LabelEncoder()
labelY.fit(y_raw)
y = labelY.transform(y_raw)
# Model Creation
from sklearn.tree import DecisionTreeClassifier
modelTree = DecisionTreeClassifier()
modelTree.fit(X,y)
# Model Validation
from sklearn.model_selection import cross_val_score
score = cross_val_score(modelTree, X, y, cv=6, scoring="accuracy")
np.mean(score)
# Model Tuning
from sklearn.model_selection import GridSearchCV
parameters = {"max_depth" : [1,2,3,4,5,6,7]}
clf = GridSearchCV(modelTree, parameters, return_train_score=True)
clf.fit(X,y)
clf.best_params_
model = clf.best_estimator_
model
import joblib
joblib.dump(model, "model.pkl")
joblib.dump(scaleX, "scalex.pkl")
joblib.dump(labelY, "labely.pkl")
The model can now be loaded from the model.pkl file any time.
model2 = joblib.load("model.pkl")
scaleX2 = joblib.load("scalex.pkl")
labely2 = joblib.load("labely.pkl")
model2.predict(X[:10])
df.head()
def predict(price, kmpl, bhp):
X = scaleX2.transform([[price, kmpl, bhp]])
y = model2.predict(X)
return labely2.inverse_transform(y)[0]
predict(550, 18.2, 82)
Install firefly using:
pip install firefly-python
And run the predict function in cars.py as an API using:
firefly cars.predict
!pip install firefly-python
import firefly
cars_api = firefly.Client("http://127.0.0.1:8000/")
cars_api.predict(price=550, kmpl=18.2, bhp=82)
help(cars_api.predict)
cars_api.square(x=10)