In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# show matlpotlib graphs in the same HTML page
%matplotlib inline
In [2]:
hdata = pd.read_csv("Heart_Disease_Not-Processed.csv")
In [3]:
hdata.head()
Out[3]:
age sex cp trestbps chol fbs restecg thalach exang oldpeak slop ca thal pred_attribute
0 63 1 1 145 233.0 1 2 150 0 2.3 3 0 6 0
1 67 1 4 160 NaN 0 2 108 1 1.5 2 3 3 2
2 67 1 7 120 229.0 0 2 129 1 2.6 2 2 7 1
3 37 1 3 130 250.0 0 0 187 0 3.5 3 0 3 0
4 41 0 2 130 204.0 0 2 172 0 1.4 1 0 3 0
In [21]:
hdata.tail()
Out[21]:
age sex cp trestbps chol fbs restecg thalach exang oldpeak slop ca thal pred_attribute
298 45 1 1 110 NaN 0 0 132 0 1.2 2 0 7 1
299 68 1 4 144 193.0 1 0 141 0 3.4 2 2 7 2
300 57 1 5 130 131.0 0 0 115 1 1.2 2 1 7 3
301 57 0 2 130 236.0 0 2 174 0 0.0 2 1 3 1
302 38 1 3 138 175.0 0 0 173 0 0.0 1 ? 3 0
In [20]:
hdata.plot("age","trestbps",kind="scatter")
Out[20]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fe03322cc18>
In [22]:
hdata.plot("age","thalach",kind="scatter")
Out[22]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fe02fdf8f98>
In [35]:
hdata.plot()
Out[35]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fe02dcfb390>
In [28]:
hdata.plot()
Out[28]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fe02e1332e8>
In [4]:
hd1 = pd.read_csv("Heart_Disease_Data.csv")
In [5]:
hd1.head()
Out[5]:
age sex cp trestbps chol fbs restecg thalach exang oldpeak slop ca thal pred_attribute
0 63 1 1 145 233 1 2 150 0 2.3 3 0 6 0
1 67 1 4 160 286 0 2 108 1 1.5 2 3 3 2
2 67 1 4 120 229 0 2 129 1 2.6 2 2 7 1
3 37 1 3 130 250 0 0 187 0 3.5 3 0 3 0
4 41 0 2 130 204 0 2 172 0 1.4 1 0 3 0
In [6]:
hd1.tail()
Out[6]:
age sex cp trestbps chol fbs restecg thalach exang oldpeak slop ca thal pred_attribute
298 45 1 1 110 264 0 0 132 0 1.2 2 0 7 1
299 68 1 4 144 193 1 0 141 0 3.4 2 2 7 2
300 57 1 4 130 131 0 0 115 1 1.2 2 1 7 3
301 57 0 2 130 236 0 2 174 0 0.0 2 1 3 1
302 38 1 3 138 175 0 0 173 0 0.0 1 ? 3 0
In [9]:
hdata.tail()
Out[9]:
age sex cp trestbps chol fbs restecg thalach exang oldpeak slop ca thal pred_attribute
298 45 1 1 110 NaN 0 0 132 0 1.2 2 0 7 1
299 68 1 4 144 193.0 1 0 141 0 3.4 2 2 7 2
300 57 1 5 130 131.0 0 0 115 1 1.2 2 1 7 3
301 57 0 2 130 236.0 0 2 174 0 0.0 2 1 3 1
302 38 1 3 138 175.0 0 0 173 0 0.0 1 ? 3 0
In [8]:
max(hd1.chol)
Out[8]:
564
In [10]:
months = ["jan","feb","mar","apr","may"]
In [11]:
col = ["feb","jan","may","apr","mar"]
In [13]:
[months.index(m) for m in col]
Out[13]:
[1, 0, 4, 3, 2]
In [14]:
hdata.columns
Out[14]:
Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'oldpeak', 'slop', 'ca', 'thal', 'pred_attribute'],
      dtype='object')
In [16]:
hdata.plot("chol","age", kind="scatter")
Out[16]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f9cfb4ea550>
In [ ]:
hdata.plot("chol","age", kind="scatter")