import numpy as np


np.array([1, 2, 3, 4])

array([1, 2, 3, 4])


a = np.array([1, 2, 3, 4])


a[0]

1


a[-1]

4

a

array([1, 2, 3, 4])


a + a

array([2, 4, 6, 8])

a*a

array([ 1,  4,  9, 16])

a*5

array([ 5, 10, 15, 20])


zeros = np.zeros(100) # this will create numy array of size 100 with zero in it


zeros

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])


zeros.shape

(100,)


matrix = zeros.reshape(10, 10)


matrix

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])


matrix.shape

(10, 10)


zeros.reshape(20, 5)

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])


np.linspace(0, 100, 50)

array([  0.        ,   2.04081633,   4.08163265,   6.12244898,
         8.16326531,  10.20408163,  12.24489796,  14.28571429,
        16.32653061,  18.36734694,  20.40816327,  22.44897959,
        24.48979592,  26.53061224,  28.57142857,  30.6122449 ,
        32.65306122,  34.69387755,  36.73469388,  38.7755102 ,
        40.81632653,  42.85714286,  44.89795918,  46.93877551,
        48.97959184,  51.02040816,  53.06122449,  55.10204082,
        57.14285714,  59.18367347,  61.2244898 ,  63.26530612,
        65.30612245,  67.34693878,  69.3877551 ,  71.42857143,
        73.46938776,  75.51020408,  77.55102041,  79.59183673,
        81.63265306,  83.67346939,  85.71428571,  87.75510204,
        89.79591837,  91.83673469,  93.87755102,  95.91836735,
        97.95918367, 100.        ])


import matplotlib.pyplot as plt
%matplotlib inline 
# this is reuiqred on jupyter notebook if you want to see graphs


x = np.linspace(-10, 10, 100) # array of size 100 between -10, 10 equally spaced
y = np.sin(x)
plt.plot(x, y, marker='x')

[<matplotlib.lines.Line2D at 0x7f5068e74790>]


import pandas as pd


data = {"Name" :['Indraraj', "Renuka", "Pushkar", "Gunjan", "Samiksha", "Nisha"],
       "Location": ["Pune", "Pune", "Mumbai", "Aakurdi", "Nashik", "Pune"],
       "Score":[95, 94, 96, 95, 93, 97]}

df = pd.DataFrame(data)
df


df.columns # column names

Index(['Name', 'Location', 'Score'], dtype='object')


df.Name

0    Indraraj
1      Renuka
2     Pushkar
3      Gunjan
4    Samiksha
5       Nisha
Name: Name, dtype: object


df['Name']

0    Indraraj
1      Renuka
2     Pushkar
3      Gunjan
4    Samiksha
5       Nisha
Name: Name, dtype: object


df.Score # this is series

0    95
1    94
2    96
3    95
4    93
5    97
Name: Score, dtype: int64


stock = pd.DataFrame({"value":[123, 335, 334, 124],
                     "high":[125, 340, 350, 150]}, 
                    index = ['IBM', 'APPLE', 'M&M', "INFY"])


stock


stock.value

IBM      123
APPLE    335
M&M      334
INFY     124
Name: value, dtype: int64


stock.value['IBM']

123

df


df.Score > 95

0    False
1    False
2     True
3    False
4    False
5     True
Name: Score, dtype: bool


df[df.Score > 95]


df[df.Location=='Pune']


from IPython.display import Image
sepal_petal_url = "https://external-content.duckduckgo.com/iu/?u=https%3A%2F%2Ftse1.mm.bing.net%2Fth%3Fid%3DOIP.5iVkkI_CdoAcvj6L_mxR7QHaGI%26pid%3DApi&f=1"
Image(url=sepal_petal_url, width=400, height=400)


from sklearn.datasets import load_iris


iris_dataset = load_iris()


iris_dataset.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])


iris_dataset.data[:5]

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2]])


iris_dataset.feature_names

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']


iris_dataset.target_names

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')


iris_dataset.target

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])


from sklearn.model_selection import train_test_split


X_train, X_test, y_train, y_test = train_test_split(iris_dataset['data'], iris_dataset['target'], random_state=0)


iris_dataset['data'].shape # it has 150 rows and 4 columns

(150, 4)


X_train.shape # 75% of data

(112, 4)


X_test.shape # 25% of data

(38, 4)


y_train.shape

(112,)


y_test.shape

(38,)


!pip install mglearn

Requirement already satisfied: mglearn in /home/vikrant/programming/work/github/python-ml-course/notebooks/venv/lib/python3.8/site-packages (0.1.9)
Requirement already satisfied: numpy in /home/vikrant/programming/work/github/python-ml-course/notebooks/venv/lib/python3.8/site-packages (from mglearn) (1.21.2)
Requirement already satisfied: matplotlib in /home/vikrant/programming/work/github/python-ml-course/notebooks/venv/lib/python3.8/site-packages (from mglearn) (3.4.3)
Requirement already satisfied: scikit-learn in /home/vikrant/programming/work/github/python-ml-course/notebooks/venv/lib/python3.8/site-packages (from mglearn) (1.0)
Requirement already satisfied: pandas in /home/vikrant/programming/work/github/python-ml-course/notebooks/venv/lib/python3.8/site-packages (from mglearn) (1.3.4)
Requirement already satisfied: pillow in /home/vikrant/programming/work/github/python-ml-course/notebooks/venv/lib/python3.8/site-packages (from mglearn) (8.4.0)
Requirement already satisfied: cycler in /home/vikrant/programming/work/github/python-ml-course/notebooks/venv/lib/python3.8/site-packages (from mglearn) (0.10.0)
Requirement already satisfied: imageio in /home/vikrant/programming/work/github/python-ml-course/notebooks/venv/lib/python3.8/site-packages (from mglearn) (2.9.0)
Requirement already satisfied: joblib in /home/vikrant/programming/work/github/python-ml-course/notebooks/venv/lib/python3.8/site-packages (from mglearn) (1.1.0)
Requirement already satisfied: six in /home/vikrant/programming/work/github/python-ml-course/notebooks/venv/lib/python3.8/site-packages (from cycler->mglearn) (1.16.0)
Requirement already satisfied: python-dateutil>=2.7 in /home/vikrant/programming/work/github/python-ml-course/notebooks/venv/lib/python3.8/site-packages (from matplotlib->mglearn) (2.8.2)
Requirement already satisfied: pyparsing>=2.2.1 in /home/vikrant/programming/work/github/python-ml-course/notebooks/venv/lib/python3.8/site-packages (from matplotlib->mglearn) (2.4.7)
Requirement already satisfied: kiwisolver>=1.0.1 in /home/vikrant/programming/work/github/python-ml-course/notebooks/venv/lib/python3.8/site-packages (from matplotlib->mglearn) (1.3.2)
Requirement already satisfied: pytz>=2017.3 in /home/vikrant/programming/work/github/python-ml-course/notebooks/venv/lib/python3.8/site-packages (from pandas->mglearn) (2021.3)
Requirement already satisfied: threadpoolctl>=2.0.0 in /home/vikrant/programming/work/github/python-ml-course/notebooks/venv/lib/python3.8/site-packages (from scikit-learn->mglearn) (3.0.0)
Requirement already satisfied: scipy>=1.1.0 in /home/vikrant/programming/work/github/python-ml-course/notebooks/venv/lib/python3.8/site-packages (from scikit-learn->mglearn) (1.7.1)
WARNING: You are using pip version 21.3; however, version 21.3.1 is available.
You should consider upgrading via the '/home/vikrant/programming/work/github/python-ml-course/notebooks/venv/bin/python -m pip install --upgrade pip' command.


import mglearn


iris_dataframe = pd.DataFrame(X_train, columns=iris_dataset.feature_names)


iris_dataframe


graph = pd.plotting.scatter_matrix(iris_dataframe, c=y_train, figsize=(15, 15), marker='o',
                                   hist_kwds={'bins':20}, s=60, alpha=0.8, cmap=mglearn.cm3)


from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(X_train, y_train)

KNeighborsClassifier(n_neighbors=1)


X_new = np.array([[5, 2.9, 1, 2.0]])


X_new.shape

(1, 4)


prediction = knn.predict(X_new)


prediction

array([0])


iris_dataset['target_names'][prediction]

array(['setosa'], dtype='<U10')


y_pred = knn.predict(X_test)


print(y_pred)

[2 1 0 2 0 2 0 1 1 1 2 1 1 1 1 0 1 1 0 0 2 1 0 0 2 0 0 1 1 0 2 1 0 2 2 1 0
 2]


y_test

array([2, 1, 0, 2, 0, 2, 0, 1, 1, 1, 2, 1, 1, 1, 1, 0, 1, 1, 0, 0, 2, 1,
       0, 0, 2, 0, 0, 1, 1, 0, 2, 1, 0, 2, 2, 1, 0, 1])


np.mean(y_pred==y_test)

0.9736842105263158


X_train, X_test, y_train, y_test = train_test_split(iris_dataset['data'], 
                                                    iris_dataset['target'], 
                                                    random_state=0)
knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(X_train, y_train) # give known set of inputs and outputs for learning!
print(knn.score(X_test, y_test))

0.9736842105263158


X, y = mglearn.datasets.make_forge()
mglearn.discrete_scatter(X[:, 0], X[:,1], y)
plt.legend(["Class 0", "Class 1"], loc=4)
plt.xlabel("First Feature")
plt.ylabel("Second Feature")

/home/vikrant/programming/work/github/python-ml-course/notebooks/venv/lib/python3.8/site-packages/sklearn/utils/deprecation.py:87: FutureWarning: Function make_blobs is deprecated; Please import make_blobs directly from scikit-learn
  warnings.warn(msg, category=FutureWarning)

Text(0, 0.5, 'Second Feature')


X.shape

(26, 2)


X,y = mglearn.datasets.make_wave(n_samples=40)
plt.plot(X,y, 'o')
plt.ylim(-3, 3)
plt.xlabel("Feature")
plt.ylabel('Target')

Text(0, 0.5, 'Target')


mglearn.plots.plot_knn_classification(n_neighbors=1)

/home/vikrant/programming/work/github/python-ml-course/notebooks/venv/lib/python3.8/site-packages/sklearn/utils/deprecation.py:87: FutureWarning: Function make_blobs is deprecated; Please import make_blobs directly from scikit-learn
  warnings.warn(msg, category=FutureWarning)


mglearn.plots.plot_knn_classification(n_neighbors=3)

/home/vikrant/programming/work/github/python-ml-course/notebooks/venv/lib/python3.8/site-packages/sklearn/utils/deprecation.py:87: FutureWarning: Function make_blobs is deprecated; Please import make_blobs directly from scikit-learn
  warnings.warn(msg, category=FutureWarning)


from sklearn.model_selection import train_test_split

X,y = mglearn.datasets.make_forge()
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

/home/vikrant/programming/work/github/python-ml-course/notebooks/venv/lib/python3.8/site-packages/sklearn/utils/deprecation.py:87: FutureWarning: Function make_blobs is deprecated; Please import make_blobs directly from scikit-learn
  warnings.warn(msg, category=FutureWarning)


from sklearn.neighbors import KNeighborsClassifier
clf = KNeighborsClassifier(n_neighbors=3)


clf.fit(X_train, y_train)

KNeighborsClassifier(n_neighbors=3)


clf.predict(X_test)

array([1, 0, 1, 0, 1, 0, 0])


clf.score(X_test, y_test)

0.8571428571428571


fig, axes = plt.subplots(1, 3, figsize=(10,3))
for n_neighbors, ax in zip([1, 3, 9], axes):
    clf = KNeighborsClassifier(n_neighbors=n_neighbors).fit(X,y)
    mglearn.plots.plot_2d_separator(clf, X, fill=True, eps=0.5, ax=ax, alpha=0.4)
    mglearn.discrete_scatter(X[:, 0], X[:, 1], y, ax=ax)
    ax.set_title(f"{n_neighbors} neighbors")
    ax.set_xlabel("feature 0")
    ax.set_xlabel("feature 1")
    
axes[0].legend(loc=3)

<matplotlib.legend.Legend at 0x7f505714d0d0>


from sklearn.datasets import load_breast_cancer


cancer = load_breast_cancer()


X_train, X_test, y_train, y_test = train_test_split(cancer.data,
                                                   cancer.target, 
                                                   stratify=cancer.target, 
                                                   random_state=66)
training_accuracy = []
test_accuracy = []
n_range = range(1, 11)
for n in  n_range:
    clf = KNeighborsClassifier(n_neighbors=n)
    clf.fit(X_train, y_train)
    
    training_accuracy.append(clf.score(X_train, y_train))
    
    test_accuracy.append(clf.score(X_test, y_test))
    
plt.plot(n_range, training_accuracy, label='training accuracy')
plt.plot(n_range, test_accuracy, label='test_accuracy')
plt.ylabel("Accuracy")
plt.xlabel("n_neighbors")
plt.legend()

<matplotlib.legend.Legend at 0x7f503a0f6a60>

Machine Learning¶

Why ML?¶

What kind of problems ML solve?¶

some typical examples of ML applications¶

Slightly complicated examples¶

Things/LIbraries required¶

Numpy¶

pandas¶

Simple example of classification¶

what is the effectiveness of my learning algorithm¶

Looking at data¶

Build our first model¶

Evaluating my model¶

Supervised learning¶

K-Neighbors classification¶

Analysing KNeighborsClassifiers¶

breast cancer dataset¶

	Name	Location	Score
0	Indraraj	Pune	95
1	Renuka	Pune	94
2	Pushkar	Mumbai	96
3	Gunjan	Aakurdi	95
4	Samiksha	Nashik	93
5	Nisha	Pune	97

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)
0	5.9	3.0	4.2	1.5
1	5.8	2.6	4.0	1.2
2	6.8	3.0	5.5	2.1
3	4.7	3.2	1.3	0.2
4	6.9	3.1	5.1	2.3
...	...	...	...	...
107	4.9	3.1	1.5	0.1
108	6.3	2.9	5.6	1.8
109	5.8	2.7	4.1	1.0
110	7.7	3.8	6.7	2.2
111	4.6	3.2	1.4	0.2

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)
0	5.9	3.0	4.2	1.5
1	5.8	2.6	4.0	1.2
2	6.8	3.0	5.5	2.1
3	4.7	3.2	1.3	0.2
4	6.9	3.1	5.1	2.3
...	...	...	...	...
107	4.9	3.1	1.5	0.1
108	6.3	2.9	5.6	1.8
109	5.8	2.7	4.1	1.0
110	7.7	3.8	6.7	2.2
111	4.6	3.2	1.4	0.2

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)
0	5.9	3.0	4.2	1.5
1	5.8	2.6	4.0	1.2
2	6.8	3.0	5.5	2.1
3	4.7	3.2	1.3	0.2
4	6.9	3.1	5.1	2.3
...	...	...	...	...
107	4.9	3.1	1.5	0.1
108	6.3	2.9	5.6	1.8
109	5.8	2.7	4.1	1.0
110	7.7	3.8	6.7	2.2
111	4.6	3.2	1.4	0.2