Notes: https://notes.pipal.in/2018/vmware-ml2/
1 - Introduction | 2 - Freight Optimization | 3 - Cars | 4 - Ensemble
import pandas as pd
import numpy as np
df = pd.read_csv("https://notes.pipal.in/2018/vmware-ml2/cars_small.csv")
X = df[["kmpl", "bhp", "type"]].copy()
y = df.price
# Transform y
from sklearn.preprocessing import LabelEncoder
labelY = LabelEncoder()
labelY.fit(X.iloc[:,2])
X["type"] = labelY.transform(X.iloc[:,2])
# Model Creation
from sklearn.tree import DecisionTreeRegressor
modelTree = DecisionTreeRegressor(max_depth = 5)
modelTree.fit(X,y)
from sklearn.metrics import mean_squared_error
-1 * mean_squared_error(y, y_pred)
from sklearn.model_selection import cross_validate
cross_validate(modelTree, X, y, scoring="neg_mean_squared_error", cv=6, return_train_score=True)
# Model Validation
from sklearn.model_selection import cross_val_score
score = cross_val_score(modelTree, X, y, cv=6, scoring="neg_mean_squared_error")
#score
np.mean(score)
score
y_pred = modelTree.predict(X)
output = pd.DataFrame({"actual": y, "predicted" :y_pred})
output["error"] = output.predicted - output.actual
output["squared_error"] = output.error * output.error
mean_squared_error = output.squared_error.mean()
mean_squared_error
# Model Creation
from sklearn.ensemble import RandomForestRegressor
modelForest = RandomForestRegressor(max_depth = 5, n_estimators=50)
modelForest.fit(X,y)
# Model Validation
from sklearn.model_selection import cross_val_score
score = cross_val_score(modelForest, X, y, cv=6, scoring="neg_mean_squared_error")
np.mean(score)
modelForest.feature_importances_