Basic Python Training at Arcesium - Day 5

Nov 26-30, 2018 Vikrant Patil

These notes are available online at http://notes.pipal.in/2018/arcesium-basic-nov/day5.html

© Pipal Academy LLP

Day 1 | Day 2 | Day 3 | Day 4 | Day 5

We will be using python 3 (>= 3.0) from anaconda for this training. You can download it from

https://www.anaconda.com/download/

pitfalls

In [1]:
x = 3
y = x
y = 5
print(x)
3
In [2]:
a = [1,2,3,4]
b = a
b.append(5)
print(a)
[1, 2, 3, 4, 5]
In [3]:
a = [1,2,3,4]
b = a
b = [1,1,1]
print(a)
[1, 2, 3, 4]
In [4]:
l = [1]*5
def append(x):
    x.append(-1)
append(l)
print(l)
[1, 1, 1, 1, 1, -1]
In [5]:
x = 3
def addone(z):
    z = z+1
    
addone(x)
print(x)
3

for basic data types int, float, boolean there is no way to pass them by reference

In [8]:
a = addone(3) # if function has no return statement, it returns None
In [7]:
print(a)
None
In [9]:
def fun():
    print("Fun!")
In [10]:
fun
Out[10]:
<function __main__.fun>
In [11]:
type(a)
Out[11]:
NoneType
In [12]:
type([1,2,3])
Out[12]:
list
In [13]:
type(fun)
Out[13]:
function
In [14]:
aliasfun = fun
In [15]:
aliasfun
Out[15]:
<function __main__.fun>
In [16]:
fun()
Fun!
In [18]:
aliasfun()
Fun!
In [19]:
def square(x):
    return x*x

def sumofsquare(x, y):
    return square(x) + square(y)
In [20]:
def sumof(f, x, y):
    return f(x) + f(y)
In [21]:
sumof(square, 4, 5)
Out[21]:
41
In [22]:
sumofsquare(4, 5)
Out[22]:
41
In [23]:
words = ["one","two","three","four","five","six"]
In [24]:
sorted(words)
Out[24]:
['five', 'four', 'one', 'six', 'three', 'two']
In [25]:
sorted(words, key=len)
Out[25]:
['one', 'two', 'six', 'four', 'five', 'three']
In [26]:
max(words)
Out[26]:
'two'
In [27]:
max(words, key=len)
Out[27]:
'three'
In [28]:
sorted(words, key=len, reverse=True)
Out[28]:
['three', 'four', 'five', 'one', 'two', 'six']
In [29]:
records = [
    ("tata", 100, 40),
    ("raliance", 300, 17),
    ("infy", 250, 20),
    ("arc", 100,1000)
    
]
In [30]:
records
Out[30]:
[('tata', 100, 40),
 ('raliance', 300, 17),
 ('infy', 250, 20),
 ('arc', 100, 1000)]
In [31]:
max(records)
Out[31]:
('tata', 100, 40)
In [32]:
min(records)
Out[32]:
('arc', 100, 1000)
In [33]:
def get_value(r):
    return r[1]

def get_volume(r):
    return r[2]

def get_name(r):
    return r[0]

max(records, key=get_value)
Out[33]:
('raliance', 300, 17)
In [34]:
max(records, key=get_volume)
Out[34]:
('arc', 100, 1000)

classes

In [39]:
%%file bank0.py
balance = 0

def get_balance():
    return balance

def deposit(amount):
    global balance
    balance += amount
    
def withdraw(amount):
    global balance
    balance -= amount
    
if __name__ == "__main__":
    print("Initial :", get_balance())
    deposit(100)
    print("After deposit :", get_balance())
    withdraw(20)
    print("current balance:", get_balance())
Overwriting bank0.py
In [40]:
!python bank0.py
Initial : 0
After deposit : 100
current balance: 80
In [55]:
%%file bank1.py

def make_account(balance):
    return {"balance":balance}

def get_balance(account):
    return account['balance']

def deposit(account, amount):
    account['balance'] += amount
    
def withdraw(account, amount):
    account['balance'] -= amount
    
if __name__ == "__main__":
    a1 = make_account(0)
    a2 = make_account(100)
    print("a1:", get_balance(a1))
    print("a2:", get_balance(a2))
    deposit(a1, 100)
    deposit(a2, 300)
    print("a1:",get_balance(a1))
    print("a2:", get_balance(a2))
    
    withdraw(a1, 42)
    withdraw(a2, 42)
    print("a1:", get_balance(a1))
    print("a2:", get_balance(a2))
Overwriting bank1.py
In [43]:
class BankAccount:
    
    def __init__(account, balance):
        account.balance = balance
        
    def get_balance(account):
        return account.balance
    
    def deposit(account, amount):
        account.balance += amount
        
    def withdraw(self, amount):
        account.balance -= amount
        
In [44]:
a1 = BankAccount(100)
a2 = BankAccount(500)
In [45]:
a1.get_balance()
Out[45]:
100
In [46]:
a2.get_balance()
Out[46]:
500
In [47]:
class Foo:
    pass
In [48]:
f = Foo()
In [49]:
f
Out[49]:
<__main__.Foo at 0x7f8da41f1da0>
In [50]:
class Bar:
    
    def add_value(self, value):
        self.value = value
        
    def get_value(self):
        return self.value
In [51]:
b = Bar()
In [52]:
b.get_value()
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-52-1f56a10a8ef8> in <module>()
----> 1 b.get_value()

<ipython-input-50-b2dffb257c07> in get_value(self)
      5 
      6     def get_value(self):
----> 7         return self.value

AttributeError: 'Bar' object has no attribute 'value'
In [53]:
b.add_value(5)
In [54]:
b.get_value()
Out[54]:
5
In [57]:
BankAccount.get_balance(a1)
Out[57]:
100
In [58]:
a1.get_balance()
Out[58]:
100
In [59]:
a3 = BankAccount()
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-59-76e8aaaf0d8e> in <module>()
----> 1 a3 = BankAccount()

TypeError: __init__() missing 1 required positional argument: 'balance'
In [60]:
def area(radius=1):
    return 3.14*radius*radius
In [61]:
area(3)
Out[61]:
28.259999999999998
In [62]:
area()
Out[62]:
3.14
In [67]:
class BankAccount:
    
    def __init__(self, balance=0):
        self.balance = balance
        
    def get_balance(self):
        return self.balance
    
    def deposit(self, amount):
        self.balance += amount
        
    def withdraw(self, amount):
        self.balance -= amount
        
In [68]:
a3 = BankAccount()
In [69]:
a3.get_balance()
Out[69]:
0
In [70]:
a3.deposit(200)
In [71]:
a3.get_balance()
Out[71]:
200
In [72]:
a3.withdraw(500)
In [73]:
a3.get_balance()
Out[73]:
-300
In [74]:
BankAccount.get_balance(a3)
Out[74]:
-300

pandas

pip install pandas

In [75]:
import pandas as pd
In [78]:
import random
s = pd.Series([random.random() for i in range(5)])
In [79]:
s
Out[79]:
0    0.459782
1    0.808892
2    0.100614
3    0.402390
4    0.335367
dtype: float64
In [80]:
s[0]
Out[80]:
0.4597822431743116
In [81]:
s[1]
Out[81]:
0.80889218490612169
In [83]:
s[4]
Out[83]:
0.33536654888440565
In [84]:
s = pd.Series([random.random() for i in range(5)], index=['a','b','c','d','e'])
In [85]:
s
Out[85]:
a    0.677113
b    0.283198
c    0.687966
d    0.782437
e    0.284615
dtype: float64
In [86]:
s['a']
Out[86]:
0.67711293882154155
In [87]:
s['d']
Out[87]:
0.78243740055450273
In [88]:
s[0]
Out[88]:
0.67711293882154155
In [89]:
s
Out[89]:
a    0.677113
b    0.283198
c    0.687966
d    0.782437
e    0.284615
dtype: float64
In [90]:
s.reindex(index=['e','b','a','c','d'])
Out[90]:
e    0.284615
b    0.283198
a    0.677113
c    0.687966
d    0.782437
dtype: float64
In [91]:
s
Out[91]:
a    0.677113
b    0.283198
c    0.687966
d    0.782437
e    0.284615
dtype: float64
In [92]:
neworder = s.reindex(index=['e','b','a','c','d'])
In [93]:
neworder
Out[93]:
e    0.284615
b    0.283198
a    0.677113
c    0.687966
d    0.782437
dtype: float64
In [95]:
neworder = s.reindex(index=['e','b','a','c','d', "f"])
In [96]:
neworder
Out[96]:
e    0.284615
b    0.283198
a    0.677113
c    0.687966
d    0.782437
f         NaN
dtype: float64
In [97]:
neworder['f'] = 0.04
In [98]:
neworder
Out[98]:
e    0.284615
b    0.283198
a    0.677113
c    0.687966
d    0.782437
f    0.040000
dtype: float64
In [99]:
 pd.Series([random.random() for i in range(5)], index=['a','b','c','d'])
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-99-dad3c8ecaa7b> in <module>()
----> 1 pd.Series([random.random() for i in range(5)], index=['a','b','c','d'])

~/usr/local/anaconda3/lib/python3.6/site-packages/pandas/core/series.py in __init__(self, data, index, dtype, name, copy, fastpath)
    248                                        raise_cast_failure=True)
    249 
--> 250                 data = SingleBlockManager(data, index, fastpath=True)
    251 
    252         generic.NDFrame.__init__(self, data, fastpath=True)

~/usr/local/anaconda3/lib/python3.6/site-packages/pandas/core/internals.py in __init__(self, block, axis, do_integrity_check, fastpath)
   4115         if not isinstance(block, Block):
   4116             block = make_block(block, placement=slice(0, len(axis)), ndim=1,
-> 4117                                fastpath=True)
   4118 
   4119         self.blocks = [block]

~/usr/local/anaconda3/lib/python3.6/site-packages/pandas/core/internals.py in make_block(values, placement, klass, ndim, dtype, fastpath)
   2717                      placement=placement, dtype=dtype)
   2718 
-> 2719     return klass(values, ndim=ndim, fastpath=fastpath, placement=placement)
   2720 
   2721 # TODO: flexible with index=None and/or items=None

~/usr/local/anaconda3/lib/python3.6/site-packages/pandas/core/internals.py in __init__(self, values, placement, ndim, fastpath)
    113             raise ValueError('Wrong number of items passed %d, placement '
    114                              'implies %d' % (len(self.values),
--> 115                                              len(self.mgr_locs)))
    116 
    117     @property

ValueError: Wrong number of items passed 5, placement implies 4
In [104]:
import string
values = [random.random() for i in range(5)]
keys = [c for c in string.ascii_uppercase[:5]]
In [105]:
values
Out[105]:
[0.32609921371823714,
 0.327397605227011,
 0.7611946096657219,
 0.47281894960324966,
 0.16712038381357497]
In [106]:
keys
Out[106]:
['A', 'B', 'C', 'D', 'E']
In [108]:
d = dict(zip(keys, values))
In [109]:
d
Out[109]:
{'A': 0.32609921371823714,
 'B': 0.327397605227011,
 'C': 0.7611946096657219,
 'D': 0.47281894960324966,
 'E': 0.16712038381357497}
In [110]:
s1 = pd.Series(d)
In [111]:
s1
Out[111]:
A    0.326099
B    0.327398
C    0.761195
D    0.472819
E    0.167120
dtype: float64
In [114]:
gt_0_5 = s1[s1>0.5]
In [115]:
lt_0_5 = s1[s1<0.5]
In [116]:
gt_0_5
Out[116]:
C    0.761195
dtype: float64
In [117]:
lt_0_5
Out[117]:
A    0.326099
B    0.327398
D    0.472819
E    0.167120
dtype: float64
In [118]:
s
Out[118]:
a    0.677113
b    0.283198
c    0.687966
d    0.782437
e    0.284615
dtype: float64
In [120]:
cond = s > 0.5
In [122]:
cond
Out[122]:
a     True
b    False
c     True
d     True
e    False
dtype: bool
In [123]:
s[cond]
Out[123]:
a    0.677113
c    0.687966
d    0.782437
dtype: float64
In [125]:
genders = pd.Series([random.choice(["Male", "Female"]) for i in range(10)])
In [126]:
genders
Out[126]:
0      Male
1      Male
2      Male
3    Female
4      Male
5    Female
6      Male
7      Male
8    Female
9    Female
dtype: object
In [127]:
genders=="Female"
Out[127]:
0    False
1    False
2    False
3     True
4    False
5     True
6    False
7    False
8     True
9     True
dtype: bool
In [129]:
filter_ = genders=="Female"
In [130]:
genders[filter_]
Out[130]:
3    Female
5    Female
8    Female
9    Female
dtype: object
In [131]:
!cat stocks.csv
name,ticker,value,volume
Infosys,infy,1000,500
Tata,tata,500,50
Reliance,reliance,700,100
Tata Infotech,tatainf,600,60
In [133]:
df = pd.DataFrame({"col1":[1,2,3,4,5],
                   "col2":[0.1,0.3,0.5,0.6,0.8],
                   "cat":['a','a','b','b','d']})
In [134]:
df
Out[134]:
cat col1 col2
0 a 1 0.1
1 a 2 0.3
2 b 3 0.5
3 b 4 0.6
4 d 5 0.8
In [135]:
sdata = {
    "name":["Infosys","Tata","Reliance", "Tata Infotech"],
    "ticker":["infy","tata","reliance","tatainf"],
    "value":[1000,500,700,600],
    "volume":[500, 50, 100, 60]
}
stocks = pd.DataFrame(sdata)
In [136]:
stocks
Out[136]:
name ticker value volume
0 Infosys infy 1000 500
1 Tata tata 500 50
2 Reliance reliance 700 100
3 Tata Infotech tatainf 600 60
In [138]:
stocks['name']
Out[138]:
0          Infosys
1             Tata
2         Reliance
3    Tata Infotech
Name: name, dtype: object
In [139]:
stocks.name
Out[139]:
0          Infosys
1             Tata
2         Reliance
3    Tata Infotech
Name: name, dtype: object
In [140]:
stocks.ticker
Out[140]:
0        infy
1        tata
2    reliance
3     tatainf
Name: ticker, dtype: object
In [141]:
stocks.value
Out[141]:
0    1000
1     500
2     700
3     600
Name: value, dtype: int64
In [143]:
stocks.describe()
Out[143]:
value volume
count 4.00000 4.000000
mean 700.00000 177.500000
std 216.02469 216.082546
min 500.00000 50.000000
25% 575.00000 57.500000
50% 650.00000 80.000000
75% 775.00000 200.000000
max 1000.00000 500.000000
In [144]:
dsc = stocks.describe()
In [150]:
dsc.iloc[:3, 1] # iloc gives access by row number and column number
Out[150]:
count      4.000000
mean     177.500000
std      216.082546
Name: volume, dtype: float64
In [151]:
dsc.iloc[:3, 0]
Out[151]:
count      4.00000
mean     700.00000
std      216.02469
Name: value, dtype: float64
In [152]:
dsc.loc['min']
Out[152]:
value     500.0
volume     50.0
Name: min, dtype: float64
In [153]:
stocks
Out[153]:
name ticker value volume
0 Infosys infy 1000 500
1 Tata tata 500 50
2 Reliance reliance 700 100
3 Tata Infotech tatainf 600 60
In [154]:
stocks.loc[3]
Out[154]:
name      Tata Infotech
ticker          tatainf
value               600
volume               60
Name: 3, dtype: object
In [157]:
thirdrow  = stocks.loc[3, ['ticker','value','volume']]
In [158]:
thirdrow
Out[158]:
ticker    tatainf
value         600
volume         60
Name: 3, dtype: object
In [159]:
thirdrow['value']
Out[159]:
600
In [160]:
thirdrow['volume']
Out[160]:
60
In [161]:
stocks.ndim
Out[161]:
2
In [162]:
t = [[i*j for i in range(3)] for j in range(5)]
In [163]:
t
Out[163]:
[[0, 0, 0], [0, 1, 2], [0, 2, 4], [0, 3, 6], [0, 4, 8]]
In [164]:
t[3,1] # will not
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-164-49866ff2a57d> in <module>()
----> 1 t[3,1] # will not

TypeError: list indices must be integers or slices, not tuple
In [165]:
t[3][1]
Out[165]:
3
In [166]:
stocks.iloc[3,1]
Out[166]:
'tatainf'
In [167]:
stocks.iloc[:,1]
Out[167]:
0        infy
1        tata
2    reliance
3     tatainf
Name: ticker, dtype: object
In [168]:
t[3][:]
Out[168]:
[0, 3, 6]
In [170]:
t
Out[170]:
[[0, 0, 0], [0, 1, 2], [0, 2, 4], [0, 3, 6], [0, 4, 8]]
In [171]:
t[1:3]
Out[171]:
[[0, 1, 2], [0, 2, 4]]
In [172]:
t[1:3][0]
Out[172]:
[0, 1, 2]
In [173]:
stocks.iloc[:,2]
Out[173]:
0    1000
1     500
2     700
3     600
Name: value, dtype: int64
In [175]:
st = pd.read_csv("stocks.csv")
In [176]:
st
Out[176]:
name ticker value volume
0 Infosys infy 1000 500
1 Tata tata 500 50
2 Reliance reliance 700 100
3 Tata Infotech tatainf 600 60
In [177]:
url = "http://notes.pipal.in/2018/arcesium-basic-nov/loansData.csv"
In [ ]:
 
In [178]:
loansData = pd.read_csv(url)
In [179]:
loansData.head(5)
Out[179]:
Amount.Requested Amount.Funded.By.Investors Interest.Rate Loan.Length Loan.Purpose Debt.To.Income.Ratio State Home.Ownership Monthly.Income FICO.Range Open.CREDIT.Lines Revolving.CREDIT.Balance Inquiries.in.the.Last.6.Months Employment.Length
81174 20000 20000.0 8.90% 36 months debt_consolidation 14.90% SC MORTGAGE 6541.67 735-739 14.0 14272.0 2.0 < 1 year
99592 19200 19200.0 12.12% 36 months debt_consolidation 28.36% TX MORTGAGE 4583.33 715-719 12.0 11140.0 1.0 2 years
80059 35000 35000.0 21.98% 60 months debt_consolidation 23.81% CA MORTGAGE 11500.00 690-694 14.0 21977.0 1.0 2 years
15825 10000 9975.0 9.99% 36 months debt_consolidation 14.30% KS MORTGAGE 3833.33 695-699 10.0 9346.0 0.0 5 years
33182 12000 12000.0 11.71% 36 months credit_card 18.78% NJ RENT 3195.00 695-699 11.0 14469.0 0.0 9 years
In [184]:
loansData.dtypes # show datatypes of columns
Out[184]:
Amount.Requested                    int64
Amount.Funded.By.Investors        float64
Interest.Rate                      object
Loan.Length                        object
Loan.Purpose                       object
Debt.To.Income.Ratio               object
State                              object
Home.Ownership                     object
Monthly.Income                    float64
FICO.Range                         object
Open.CREDIT.Lines                 float64
Revolving.CREDIT.Balance          float64
Inquiries.in.the.Last.6.Months    float64
Employment.Length                  object
dtype: object
In [183]:
loansData.shape # show how many rows and columns are there
Out[183]:
(2500, 14)
In [185]:
loansData['Amount.Requested']
Out[185]:
81174     20000
99592     19200
80059     35000
15825     10000
33182     12000
62403      6000
48808     10000
22090     33500
76404     14675
15867      7000
94971      2000
36911     10625
41200     28000
83869     35000
53853      9600
21399     25000
62127     10000
23446     14000
44987     10000
17977      5200
86099     22000
99483     30000
28798      6500
24168     17400
10356      4000
46027      7200
2238       8000
65278      8000
4227       3000
50182     14500
          ...  
84265     20000
80231     19000
49533     17300
102514     7000
78618      7200
86953     10000
80129      4000
85216     17500
38247     20000
91245     16200
53041     10000
63051     27000
14446      4500
68628     15875
98758     15000
13070     25000
45836      7000
52330     15000
48243     17000
63256     19075
42124     10000
78043      8475
925        6400
74047     30000
49957     24000
23735     30000
65882     16000
55610     10000
38576      6000
3116       9000
Name: Amount.Requested, Length: 2500, dtype: int64
In [187]:
loansData.columns # names of columns
Out[187]:
Index(['Amount.Requested', 'Amount.Funded.By.Investors', 'Interest.Rate',
       'Loan.Length', 'Loan.Purpose', 'Debt.To.Income.Ratio', 'State',
       'Home.Ownership', 'Monthly.Income', 'FICO.Range', 'Open.CREDIT.Lines',
       'Revolving.CREDIT.Balance', 'Inquiries.in.the.Last.6.Months',
       'Employment.Length'],
      dtype='object')
In [188]:
oldnames = [c for c in loansData.columns]
In [189]:
oldnames
Out[189]:
['Amount.Requested',
 'Amount.Funded.By.Investors',
 'Interest.Rate',
 'Loan.Length',
 'Loan.Purpose',
 'Debt.To.Income.Ratio',
 'State',
 'Home.Ownership',
 'Monthly.Income',
 'FICO.Range',
 'Open.CREDIT.Lines',
 'Revolving.CREDIT.Balance',
 'Inquiries.in.the.Last.6.Months',
 'Employment.Length']
In [190]:
newnames = [name.replace(".","_") for name in oldnames]
In [191]:
newnames
Out[191]:
['Amount_Requested',
 'Amount_Funded_By_Investors',
 'Interest_Rate',
 'Loan_Length',
 'Loan_Purpose',
 'Debt_To_Income_Ratio',
 'State',
 'Home_Ownership',
 'Monthly_Income',
 'FICO_Range',
 'Open_CREDIT_Lines',
 'Revolving_CREDIT_Balance',
 'Inquiries_in_the_Last_6_Months',
 'Employment_Length']
In [192]:
cols = dict(zip(oldnames, newnames))
loansData.rename(columns=cols)
Out[192]:
Amount_Requested Amount_Funded_By_Investors Interest_Rate Loan_Length Loan_Purpose Debt_To_Income_Ratio State Home_Ownership Monthly_Income FICO_Range Open_CREDIT_Lines Revolving_CREDIT_Balance Inquiries_in_the_Last_6_Months Employment_Length
81174 20000 20000.00 8.90% 36 months debt_consolidation 14.90% SC MORTGAGE 6541.67 735-739 14.0 14272.0 2.0 < 1 year
99592 19200 19200.00 12.12% 36 months debt_consolidation 28.36% TX MORTGAGE 4583.33 715-719 12.0 11140.0 1.0 2 years
80059 35000 35000.00 21.98% 60 months debt_consolidation 23.81% CA MORTGAGE 11500.00 690-694 14.0 21977.0 1.0 2 years
15825 10000 9975.00 9.99% 36 months debt_consolidation 14.30% KS MORTGAGE 3833.33 695-699 10.0 9346.0 0.0 5 years
33182 12000 12000.00 11.71% 36 months credit_card 18.78% NJ RENT 3195.00 695-699 11.0 14469.0 0.0 9 years
62403 6000 6000.00 15.31% 36 months other 20.05% CT OWN 4891.67 670-674 17.0 10391.0 2.0 3 years
48808 10000 10000.00 7.90% 36 months debt_consolidation 26.09% MA RENT 2916.67 720-724 10.0 15957.0 0.0 10+ years
22090 33500 33450.00 17.14% 60 months credit_card 14.70% LA MORTGAGE 13863.42 705-709 12.0 27874.0 0.0 10+ years
76404 14675 14675.00 14.33% 36 months credit_card 26.92% CA RENT 3150.00 685-689 9.0 7246.0 1.0 8 years
15867 7000 7000.00 6.91% 36 months credit_card 7.10% CA RENT 5000.00 715-719 8.0 7612.0 0.0 3 years
94971 2000 2000.00 19.72% 36 months moving 10.29% FL RENT 3575.00 670-674 10.0 12036.0 0.0 6 years
36911 10625 10625.00 14.27% 36 months debt_consolidation 12.54% CA MORTGAGE 4250.00 665-669 14.0 10767.0 0.0 < 1 year
41200 28000 27975.00 21.67% 60 months debt_consolidation 13.07% CT MORTGAGE 14166.67 670-674 12.0 10311.0 0.0 1 year
83869 35000 34950.00 8.90% 36 months debt_consolidation 20.46% CT RENT 9166.67 735-739 19.0 21536.0 0.0 1 year
53853 9600 9600.00 7.62% 36 months debt_consolidation 3.45% DC RENT 11250.00 725-729 13.0 4606.0 0.0 < 1 year
21399 25000 24975.00 15.65% 60 months debt_consolidation 21.99% CA RENT 5416.67 730-734 6.0 13929.0 0.0 9 years
62127 10000 10000.00 12.12% 36 months debt_consolidation 17.72% CA RENT 9000.00 695-699 18.0 20317.0 0.0 7 years
23446 14000 13900.25 10.37% 60 months debt_consolidation 11.95% OH RENT 4333.33 740-744 6.0 7419.0 0.0 9 years
44987 10000 10000.00 9.76% 36 months credit_card 7.13% FL RENT 2733.33 730-734 7.0 6112.0 2.0 3 years
17977 5200 5175.00 9.99% 60 months debt_consolidation 10.29% AL MORTGAGE 3750.00 760-764 10.0 16094.0 0.0 < 1 year
86099 22000 21975.00 21.98% 36 months debt_consolidation 11.19% TX MORTGAGE 6666.67 665-669 9.0 23124.0 0.0 10+ years
99483 30000 30000.00 19.05% 60 months credit_card 21.25% FL MORTGAGE 6250.00 695-699 12.0 34927.0 0.0 6 years
28798 6500 6500.00 17.99% 60 months car 19.63% FL RENT 4100.00 665-669 11.0 11697.0 1.0 2 years
24168 17400 17400.00 11.99% 36 months credit_card 12.47% AZ RENT 6833.33 695-699 7.0 26587.0 0.0 7 years
10356 4000 4000.00 16.82% 60 months vacation 13.71% GA MORTGAGE 4500.00 670-674 5.0 20804.0 0.0 3 years
46027 7200 7200.00 7.90% 36 months debt_consolidation 24.82% TX RENT 5416.67 705-709 8.0 12017.0 0.0 7 years
2238 8000 8000.00 14.42% 36 months debt_consolidation 24.63% MA RENT 2964.17 675-679 9.0 8928.0 2.0 6 years
65278 8000 8000.00 15.31% 36 months debt_consolidation 15.46% CA MORTGAGE 2916.67 675-679 13.0 7152.0 1.0 5 years
4227 3000 3000.00 8.59% 36 months other 3.72% MA MORTGAGE 4167.00 765-769 4.0 7074.0 0.0 5 years
50182 14500 14500.00 7.90% 36 months debt_consolidation 4.85% GA MORTGAGE 3958.33 760-764 4.0 9598.0 0.0 4 years
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
84265 20000 20000.00 22.95% 60 months debt_consolidation 7.10% NJ RENT 6750.00 665-669 6.0 16104.0 1.0 7 years
80231 19000 19000.00 7.90% 36 months debt_consolidation 9.76% RI MORTGAGE 5166.67 770-774 18.0 43617.0 2.0 10+ years
49533 17300 17250.00 22.45% 60 months wedding 3.58% PA MORTGAGE 5500.00 685-689 11.0 2306.0 3.0 4 years
102514 7000 711.54 15.13% 36 months major_purchase 18.91% CO MORTGAGE 3833.00 650-654 13.0 12634.0 0.0 2 years
78618 7200 7200.00 18.75% 36 months debt_consolidation 16.21% MI RENT 8333.33 660-664 8.0 28916.0 0.0 10+ years
86953 10000 10000.00 14.09% 36 months major_purchase 9.71% CO RENT 4583.33 675-679 6.0 3859.0 0.0 < 1 year
80129 4000 3925.00 14.09% 36 months credit_card 12.27% FL MORTGAGE 8583.33 675-679 9.0 36943.0 1.0 10+ years
85216 17500 17500.00 8.90% 36 months debt_consolidation 10.94% UT MORTGAGE 25000.00 730-734 9.0 34545.0 0.0 6 years
38247 20000 20000.00 11.71% 36 months credit_card 9.58% SD MORTGAGE 5416.67 725-729 12.0 18267.0 1.0 4 years
91245 16200 16200.00 15.80% 60 months debt_consolidation 7.92% PA MORTGAGE 4833.33 680-684 12.0 12313.0 2.0 10+ years
53041 10000 10000.00 6.03% 36 months small_business 13.03% FL RENT 5000.00 760-764 8.0 3952.0 0.0 2 years
63051 27000 27000.00 6.62% 36 months debt_consolidation 12.21% OH MORTGAGE 9250.00 810-814 12.0 4211.0 0.0 5 years
14446 4500 4475.00 7.51% 36 months small_business 20.27% VA MORTGAGE 7075.50 720-724 15.0 68618.0 2.0 10+ years
68628 15875 15875.00 14.33% 36 months small_business 17.44% MD MORTGAGE 3416.67 675-679 11.0 15891.0 0.0 2 years
98758 15000 15000.00 10.16% 36 months credit_card 28.28% OH MORTGAGE 6666.67 690-694 15.0 14880.0 0.0 10+ years
13070 25000 24950.00 10.75% 36 months debt_consolidation 20.48% OR MORTGAGE 7083.33 765-769 10.0 25429.0 0.0 6 years
45836 7000 7000.00 17.27% 36 months other 18.38% NY OWN 2464.37 665-669 9.0 7089.0 0.0 3 years
52330 15000 15000.00 19.99% 36 months wedding 18.05% CA RENT 8000.00 660-664 6.0 45976.0 1.0 2 years
48243 17000 17000.00 15.81% 36 months debt_consolidation 17.01% CO RENT 3833.33 685-689 6.0 15484.0 1.0 6 years
63256 19075 19075.00 18.75% 36 months debt_consolidation 15.23% NY RENT 5166.67 670-674 17.0 13749.0 3.0 10+ years
42124 10000 10000.00 11.71% 36 months debt_consolidation 8.40% CA RENT 4500.00 710-714 8.0 8404.0 1.0 3 years
78043 8475 8475.00 7.62% 36 months debt_consolidation 15.88% CA RENT 3983.33 720-724 9.0 6882.0 0.0 n/a
925 6400 6350.00 10.08% 36 months debt_consolidation 8.11% NJ MORTGAGE 5166.67 710-714 5.0 5815.0 2.0 10+ years
74047 30000 30000.00 23.28% 60 months other 12.10% IL MORTGAGE 7083.33 675-679 16.0 17969.0 1.0 10+ years
49957 24000 23975.00 14.65% 36 months debt_consolidation 15.29% WA MORTGAGE 6666.67 685-689 13.0 17521.0 0.0 5 years
23735 30000 29950.00 16.77% 60 months debt_consolidation 19.23% NY MORTGAGE 9250.00 705-709 15.0 45880.0 1.0 8 years
65882 16000 16000.00 14.09% 60 months home_improvement 21.54% MD OWN 8903.25 740-744 18.0 18898.0 1.0 10+ years
55610 10000 10000.00 13.99% 36 months debt_consolidation 4.89% PA MORTGAGE 2166.67 680-684 4.0 4544.0 0.0 10+ years
38576 6000 6000.00 12.42% 36 months major_purchase 16.66% NJ RENT 3500.00 675-679 8.0 7753.0 0.0 5 years
3116 9000 5242.75 13.79% 36 months debt_consolidation 6.76% NY RENT 3875.00 670-674 7.0 7589.0 0.0 10+ years

2500 rows × 14 columns

In [ ]:
 
In [196]:
loansData.rename(columns=cols, inplace=True) # this will change names in original dataframe
In [194]:
loansData.head()
Out[194]:
Amount_Requested Amount_Funded_By_Investors Interest_Rate Loan_Length Loan_Purpose Debt_To_Income_Ratio State Home_Ownership Monthly_Income FICO_Range Open_CREDIT_Lines Revolving_CREDIT_Balance Inquiries_in_the_Last_6_Months Employment_Length
81174 20000 20000.0 8.90% 36 months debt_consolidation 14.90% SC MORTGAGE 6541.67 735-739 14.0 14272.0 2.0 < 1 year
99592 19200 19200.0 12.12% 36 months debt_consolidation 28.36% TX MORTGAGE 4583.33 715-719 12.0 11140.0 1.0 2 years
80059 35000 35000.0 21.98% 60 months debt_consolidation 23.81% CA MORTGAGE 11500.00 690-694 14.0 21977.0 1.0 2 years
15825 10000 9975.0 9.99% 36 months debt_consolidation 14.30% KS MORTGAGE 3833.33 695-699 10.0 9346.0 0.0 5 years
33182 12000 12000.0 11.71% 36 months credit_card 18.78% NJ RENT 3195.00 695-699 11.0 14469.0 0.0 9 years
In [197]:
dict([("a",1),("b",1),("c",2)])
Out[197]:
{'a': 1, 'b': 1, 'c': 2}
In [198]:
list(zip(['a','b','c'], [1,2,3]))
Out[198]:
[('a', 1), ('b', 2), ('c', 3)]
In [201]:
loansData.Interest_Rate.dtype
Out[201]:
dtype('O')

doing string oerations

In [203]:
interest = loansData.Interest_Rate.str.replaceint
In [204]:
interest.head(5)
Out[204]:
81174     8.90
99592    12.12
80059    21.98
15825     9.99
33182    11.71
Name: Interest_Rate, dtype: object
In [205]:
interest = pd.to_numeric(interest)
In [206]:
interest.dtype
Out[206]:
dtype('float64')
In [208]:
interest.head()
Out[208]:
81174     8.90
99592    12.12
80059    21.98
15825     9.99
33182    11.71
Name: Interest_Rate, dtype: float64
In [209]:
loansData.Interest_Rate = interest
In [210]:
loansData.dtypes
Out[210]:
Amount_Requested                    int64
Amount_Funded_By_Investors        float64
Interest_Rate                     float64
Loan_Length                        object
Loan_Purpose                       object
Debt_To_Income_Ratio               object
State                              object
Home_Ownership                     object
Monthly_Income                    float64
FICO_Range                         object
Open_CREDIT_Lines                 float64
Revolving_CREDIT_Balance          float64
Inquiries_in_the_Last_6_Months    float64
Employment_Length                  object
dtype: object
In [211]:
loansData.Amount_Requested.str.replace("0",",")
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-211-8525d4fc7880> in <module>()
----> 1 loansData.Amount_Requested.str.replace("0",",")

~/usr/local/anaconda3/lib/python3.6/site-packages/pandas/core/generic.py in __getattr__(self, name)
   3075         if (name in self._internal_names_set or name in self._metadata or
   3076                 name in self._accessors):
-> 3077             return object.__getattribute__(self, name)
   3078         else:
   3079             if name in self._info_axis:

~/usr/local/anaconda3/lib/python3.6/site-packages/pandas/core/base.py in __get__(self, instance, owner)
    241             # this ensures that Series.str.<method> is well defined
    242             return self.accessor_cls
--> 243         return self.construct_accessor(instance)
    244 
    245     def __set__(self, instance, value):

~/usr/local/anaconda3/lib/python3.6/site-packages/pandas/core/strings.py in _make_str_accessor(self)
   1907             # (instead of test for object dtype), but that isn't practical for
   1908             # performance reasons until we have a str dtype (GH 9343)
-> 1909             raise AttributeError("Can only use .str accessor with string "
   1910                                  "values, which use np.object_ dtype in "
   1911                                  "pandas")

AttributeError: Can only use .str accessor with string values, which use np.object_ dtype in pandas
In [213]:
loansData.head(5)
Out[213]:
Amount_Requested Amount_Funded_By_Investors Interest_Rate Loan_Length Loan_Purpose Debt_To_Income_Ratio State Home_Ownership Monthly_Income FICO_Range Open_CREDIT_Lines Revolving_CREDIT_Balance Inquiries_in_the_Last_6_Months Employment_Length
81174 20000 20000.0 8.90 36 months debt_consolidation 14.90% SC MORTGAGE 6541.67 735-739 14.0 14272.0 2.0 < 1 year
99592 19200 19200.0 12.12 36 months debt_consolidation 28.36% TX MORTGAGE 4583.33 715-719 12.0 11140.0 1.0 2 years
80059 35000 35000.0 21.98 60 months debt_consolidation 23.81% CA MORTGAGE 11500.00 690-694 14.0 21977.0 1.0 2 years
15825 10000 9975.0 9.99 36 months debt_consolidation 14.30% KS MORTGAGE 3833.33 695-699 10.0 9346.0 0.0 5 years
33182 12000 12000.0 11.71 36 months credit_card 18.78% NJ RENT 3195.00 695-699 11.0 14469.0 0.0 9 years
In [214]:
loansData.FICO_Range.str.split("-").head(5)
Out[214]:
81174    [735, 739]
99592    [715, 719]
80059    [690, 694]
15825    [695, 699]
33182    [695, 699]
Name: FICO_Range, dtype: object
In [222]:
FICO = loansData.FICO_Range.str.split("-", expand=True)
# seperate every item by split in column
In [223]:
FICO.head()
Out[223]:
0 1
81174 735 739
99592 715 719
80059 690 694
15825 695 699
33182 695 699
In [225]:
loansData['FICO_start'] = FICO[0]
loansData['FICO_end'] = FICO[1]
In [226]:
loansData.head()
Out[226]:
Amount_Requested Amount_Funded_By_Investors Interest_Rate Loan_Length Loan_Purpose Debt_To_Income_Ratio State Home_Ownership Monthly_Income FICO_Range Open_CREDIT_Lines Revolving_CREDIT_Balance Inquiries_in_the_Last_6_Months Employment_Length FICO_start FICO_end
81174 20000 20000.0 8.90 36 months debt_consolidation 14.90% SC MORTGAGE 6541.67 735-739 14.0 14272.0 2.0 < 1 year 735 739
99592 19200 19200.0 12.12 36 months debt_consolidation 28.36% TX MORTGAGE 4583.33 715-719 12.0 11140.0 1.0 2 years 715 719
80059 35000 35000.0 21.98 60 months debt_consolidation 23.81% CA MORTGAGE 11500.00 690-694 14.0 21977.0 1.0 2 years 690 694
15825 10000 9975.0 9.99 36 months debt_consolidation 14.30% KS MORTGAGE 3833.33 695-699 10.0 9346.0 0.0 5 years 695 699
33182 12000 12000.0 11.71 36 months credit_card 18.78% NJ RENT 3195.00 695-699 11.0 14469.0 0.0 9 years 695 699
In [228]:
del loansData["FICO_Range"]
In [229]:
loansData.columns
Out[229]:
Index(['Amount_Requested', 'Amount_Funded_By_Investors', 'Interest_Rate',
       'Loan_Length', 'Loan_Purpose', 'Debt_To_Income_Ratio', 'State',
       'Home_Ownership', 'Monthly_Income', 'Open_CREDIT_Lines',
       'Revolving_CREDIT_Balance', 'Inquiries_in_the_Last_6_Months',
       'Employment_Length', 'FICO_start', 'FICO_end'],
      dtype='object')
In [235]:
loansData.State
Out[235]:
81174     SC
99592     TX
80059     CA
15825     KS
33182     NJ
62403     CT
48808     MA
22090     LA
76404     CA
15867     CA
94971     FL
36911     CA
41200     CT
83869     CT
53853     DC
21399     CA
62127     CA
23446     OH
44987     FL
17977     AL
86099     TX
99483     FL
28798     FL
24168     AZ
10356     GA
46027     TX
2238      MA
65278     CA
4227      MA
50182     GA
          ..
84265     NJ
80231     RI
49533     PA
102514    CO
78618     MI
86953     CO
80129     FL
85216     UT
38247     SD
91245     PA
53041     FL
63051     OH
14446     VA
68628     MD
98758     OH
13070     OR
45836     NY
52330     CA
48243     CO
63256     NY
42124     CA
78043     CA
925       NJ
74047     IL
49957     WA
23735     NY
65882     MD
55610     PA
38576     NJ
3116      NY
Name: State, Length: 2500, dtype: object
In [236]:
loansData_CA = loansData[loansData.State=="CA"]
In [238]:
loansData_CA
Out[238]:
Amount_Requested Amount_Funded_By_Investors Interest_Rate Loan_Length Loan_Purpose Debt_To_Income_Ratio State Home_Ownership Monthly_Income Open_CREDIT_Lines Revolving_CREDIT_Balance Inquiries_in_the_Last_6_Months Employment_Length FICO_start FICO_end
80059 35000 35000.00 21.98 60 months debt_consolidation 23.81% CA MORTGAGE 11500.00 14.0 21977.0 1.0 2 years 690 694
76404 14675 14675.00 14.33 36 months credit_card 26.92% CA RENT 3150.00 9.0 7246.0 1.0 8 years 685 689
15867 7000 7000.00 6.91 36 months credit_card 7.10% CA RENT 5000.00 8.0 7612.0 0.0 3 years 715 719
36911 10625 10625.00 14.27 36 months debt_consolidation 12.54% CA MORTGAGE 4250.00 14.0 10767.0 0.0 < 1 year 665 669
21399 25000 24975.00 15.65 60 months debt_consolidation 21.99% CA RENT 5416.67 6.0 13929.0 0.0 9 years 730 734
62127 10000 10000.00 12.12 36 months debt_consolidation 17.72% CA RENT 9000.00 18.0 20317.0 0.0 7 years 695 699
65278 8000 8000.00 15.31 36 months debt_consolidation 15.46% CA MORTGAGE 2916.67 13.0 7152.0 1.0 5 years 675 679
46120 14000 14000.00 12.12 36 months debt_consolidation 14.93% CA MORTGAGE 10583.33 9.0 35457.0 0.0 2 years 685 689
98445 9000 9000.00 17.27 36 months credit_card 25.22% CA RENT 5166.67 14.0 5852.0 1.0 10+ years 675 679
34501 20000 19975.00 23.91 60 months debt_consolidation 9.47% CA MORTGAGE 11250.00 5.0 11168.0 0.0 < 1 year 670 674
96613 27575 27575.00 14.33 36 months debt_consolidation 19.63% CA MORTGAGE 5166.67 6.0 8720.0 0.0 10+ years 690 694
51948 5300 5300.00 7.90 36 months medical 12.82% CA RENT 5500.00 8.0 9996.0 2.0 1 year 725 729
82294 20000 19975.00 16.29 36 months debt_consolidation 11.85% CA RENT 9166.67 10.0 12421.0 3.0 < 1 year 685 689
20328 15000 14993.57 18.25 60 months debt_consolidation 13.65% CA RENT 2916.67 4.0 12786.0 0.0 < 1 year 705 709
43203 24000 23947.48 19.03 60 months debt_consolidation 3.63% CA RENT 7500.00 6.0 8568.0 1.0 10+ years 695 699
58359 3225 3225.00 7.62 36 months debt_consolidation 17.79% CA MORTGAGE 5833.33 5.0 70323.0 1.0 10+ years 760 764
103948 16000 15850.00 11.12 36 months credit_card 17.86% CA MORTGAGE 6666.67 7.0 18659.0 4.0 5 years 725 729
51929 9525 9500.00 15.81 36 months small_business 11.35% CA RENT 2666.67 15.0 5610.0 1.0 n/a 670 674
25380 4500 4500.00 5.99 36 months debt_consolidation 18.48% CA MORTGAGE 10000.00 18.0 17863.0 3.0 6 years 765 769
69456 5000 5000.00 7.90 36 months debt_consolidation 17.22% CA OWN 4000.00 10.0 6023.0 0.0 10+ years 705 709
82476 10000 10000.00 14.09 36 months debt_consolidation 10.59% CA RENT 2916.67 9.0 10390.0 3.0 < 1 year 690 694
57866 5750 5750.00 13.99 36 months credit_card 12.44% CA RENT 5000.00 13.0 12465.0 0.0 10+ years 660 664
76693 35000 35000.00 8.90 36 months debt_consolidation 14.62% CA RENT 10416.67 9.0 21321.0 0.0 10+ years 735 739
32008 14000 14000.00 8.49 60 months debt_consolidation 9.89% CA MORTGAGE 9000.00 17.0 23773.0 0.0 4 years 760 764
36889 10800 10800.00 14.65 36 months debt_consolidation 7.89% CA RENT 9583.33 6.0 6487.0 1.0 < 1 year 675 679
15427 7500 7500.00 16.32 36 months debt_consolidation 12.54% CA MORTGAGE 4600.00 3.0 6120.0 0.0 2 years 665 669
19328 7500 7500.00 16.40 36 months debt_consolidation 13.34% CA RENT 4166.67 8.0 7430.0 0.0 < 1 year 660 664
65134 34500 34450.00 6.62 36 months small_business 16.46% CA MORTGAGE 10000.00 8.0 5317.0 0.0 9 years 805 809
62445 18000 18000.00 15.31 60 months debt_consolidation 4.93% CA RENT 5258.50 6.0 11596.0 0.0 10+ years 710 714
68821 11500 11475.00 11.14 36 months credit_card 28.25% CA RENT 2750.00 6.0 9079.0 0.0 5 years 700 704
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
6143 7900 7782.65 8.59 36 months credit_card 15.20% CA MORTGAGE 7583.33 14.0 3950.0 0.0 10+ years 810 814
59807 10000 10000.00 15.31 36 months debt_consolidation 14.61% CA RENT 3750.00 18.0 13051.0 1.0 5 years 680 684
65041 12000 11975.00 6.62 36 months major_purchase 1.43% CA RENT 9666.67 5.0 4693.0 0.0 1 year 750 754
26914 10000 9975.00 6.99 36 months home_improvement 27.92% CA MORTGAGE 2500.00 5.0 1230.0 2.0 1 year 780 784
22573 9800 9800.00 14.91 60 months credit_card 12.89% CA MORTGAGE 2708.33 11.0 8111.0 0.0 3 years 690 694
26312 12000 12000.00 7.91 36 months credit_card 4.51% CA RENT 3500.00 16.0 6015.0 1.0 < 1 year 725 729
17242 19000 18975.00 13.72 36 months credit_card 13.50% CA RENT 2964.67 9.0 11219.0 0.0 10+ years 705 709
6712 10000 10000.00 12.53 36 months debt_consolidation 10.81% CA RENT 7158.33 15.0 23152.0 0.0 6 years 700 704
10176 20500 19861.23 14.96 60 months debt_consolidation 10.76% CA RENT 2500.00 2.0 109.0 0.0 2 years 730 734
3768 4800 4750.00 12.21 36 months wedding 9.41% CA RENT 6250.00 10.0 21069.0 0.0 < 1 year 695 699
6326 3600 3600.00 7.74 36 months major_purchase 1.41% CA MORTGAGE 5666.67 6.0 2907.0 0.0 10+ years 735 739
42148 10000 9950.00 14.27 36 months debt_consolidation 12.63% CA RENT 7166.67 11.0 10309.0 1.0 < 1 year 680 684
11626 4500 4500.00 7.51 36 months moving 13.90% CA MORTGAGE 3000.00 10.0 387.0 0.0 < 1 year 765 769
6466 24250 24144.61 11.83 36 months major_purchase 4.79% CA RENT 12000.00 10.0 2097.0 1.0 < 1 year 755 759
91639 12000 12000.00 17.77 36 months debt_consolidation 22.32% CA RENT 4575.08 13.0 8048.0 2.0 < 1 year 675 679
66064 31825 31825.00 14.09 60 months major_purchase 4.37% CA MORTGAGE 6666.67 4.0 10864.0 0.0 4 years 750 754
54428 21000 21000.00 13.67 36 months other 8.08% CA RENT 7916.67 6.0 6524.0 1.0 1 year 705 709
26195 7000 7000.00 6.99 36 months debt_consolidation 14.32% CA RENT 5166.67 6.0 0.0 1.0 10+ years 775 779
82216 15000 15000.00 8.90 36 months debt_consolidation 13.59% CA MORTGAGE 9583.33 10.0 13427.0 3.0 5 years 735 739
18049 9700 9700.00 14.09 36 months debt_consolidation 9.52% CA RENT 7333.33 9.0 14723.0 0.0 3 years 665 669
70427 8450 8450.00 18.49 36 months small_business 21.98% CA OWN 2083.33 14.0 7352.0 2.0 3 years 660 664
12976 2000 2000.00 11.49 36 months major_purchase 2.44% CA RENT 2500.00 4.0 67.0 0.0 2 years 700 704
90560 11400 11375.00 8.90 36 months debt_consolidation 14.27% CA MORTGAGE 5641.67 9.0 11092.0 1.0 10+ years 740 744
13767 15000 15000.00 19.04 60 months debt_consolidation 16.69% CA RENT 5250.00 7.0 8615.0 1.0 2 years 680 684
72356 28200 28150.00 10.16 60 months home_improvement 10.35% CA MORTGAGE 5900.00 13.0 20670.0 0.0 10+ years 780 784
43056 15000 15000.00 7.90 36 months credit_card 22.24% CA RENT 2666.67 15.0 11105.0 0.0 < 1 year 735 739
19492 3500 3500.00 7.29 36 months other 15.54% CA MORTGAGE 6333.00 12.0 41602.0 0.0 10+ years 715 719
52330 15000 15000.00 19.99 36 months wedding 18.05% CA RENT 8000.00 6.0 45976.0 1.0 2 years 660 664
42124 10000 10000.00 11.71 36 months debt_consolidation 8.40% CA RENT 4500.00 8.0 8404.0 1.0 3 years 710 714
78043 8475 8475.00 7.62 36 months debt_consolidation 15.88% CA RENT 3983.33 9.0 6882.0 0.0 n/a 720 724

433 rows × 15 columns

In [239]:
2 ==3 or 4==5
Out[239]:
False
In [242]:
 
Out[242]:
a    False
b    False
c    False
d    False
e    False
dtype: bool
In [243]:
import numpy as np
In [249]:
s[(s >0.5) & (s <0.7)]
Out[249]:
a    0.677113
c    0.687966
dtype: float64
In [250]:
s[(s >0.5) | (s <0.7)]
Out[250]:
a    0.677113
b    0.283198
c    0.687966
d    0.782437
e    0.284615
dtype: float64
In [251]:
loansData[(loansData.State=="CA") & (loansData.Home_Ownership=="RENT")]
Out[251]:
Amount_Requested Amount_Funded_By_Investors Interest_Rate Loan_Length Loan_Purpose Debt_To_Income_Ratio State Home_Ownership Monthly_Income Open_CREDIT_Lines Revolving_CREDIT_Balance Inquiries_in_the_Last_6_Months Employment_Length FICO_start FICO_end
76404 14675 14675.00 14.33 36 months credit_card 26.92% CA RENT 3150.00 9.0 7246.0 1.0 8 years 685 689
15867 7000 7000.00 6.91 36 months credit_card 7.10% CA RENT 5000.00 8.0 7612.0 0.0 3 years 715 719
21399 25000 24975.00 15.65 60 months debt_consolidation 21.99% CA RENT 5416.67 6.0 13929.0 0.0 9 years 730 734
62127 10000 10000.00 12.12 36 months debt_consolidation 17.72% CA RENT 9000.00 18.0 20317.0 0.0 7 years 695 699
98445 9000 9000.00 17.27 36 months credit_card 25.22% CA RENT 5166.67 14.0 5852.0 1.0 10+ years 675 679
51948 5300 5300.00 7.90 36 months medical 12.82% CA RENT 5500.00 8.0 9996.0 2.0 1 year 725 729
82294 20000 19975.00 16.29 36 months debt_consolidation 11.85% CA RENT 9166.67 10.0 12421.0 3.0 < 1 year 685 689
20328 15000 14993.57 18.25 60 months debt_consolidation 13.65% CA RENT 2916.67 4.0 12786.0 0.0 < 1 year 705 709
43203 24000 23947.48 19.03 60 months debt_consolidation 3.63% CA RENT 7500.00 6.0 8568.0 1.0 10+ years 695 699
51929 9525 9500.00 15.81 36 months small_business 11.35% CA RENT 2666.67 15.0 5610.0 1.0 n/a 670 674
82476 10000 10000.00 14.09 36 months debt_consolidation 10.59% CA RENT 2916.67 9.0 10390.0 3.0 < 1 year 690 694
57866 5750 5750.00 13.99 36 months credit_card 12.44% CA RENT 5000.00 13.0 12465.0 0.0 10+ years 660 664
76693 35000 35000.00 8.90 36 months debt_consolidation 14.62% CA RENT 10416.67 9.0 21321.0 0.0 10+ years 735 739
36889 10800 10800.00 14.65 36 months debt_consolidation 7.89% CA RENT 9583.33 6.0 6487.0 1.0 < 1 year 675 679
19328 7500 7500.00 16.40 36 months debt_consolidation 13.34% CA RENT 4166.67 8.0 7430.0 0.0 < 1 year 660 664
62445 18000 18000.00 15.31 60 months debt_consolidation 4.93% CA RENT 5258.50 6.0 11596.0 0.0 10+ years 710 714
68821 11500 11475.00 11.14 36 months credit_card 28.25% CA RENT 2750.00 6.0 9079.0 0.0 5 years 700 704
80512 2500 2500.00 11.14 36 months major_purchase 10.08% CA RENT 4333.33 4.0 4922.0 0.0 3 years 720 724
97954 4375 4375.00 19.05 36 months credit_card 9.09% CA RENT 2916.67 9.0 6944.0 2.0 6 years 670 674
66605 5000 5000.00 15.80 36 months debt_consolidation 34.20% CA RENT 3750.00 18.0 5414.0 3.0 3 years 665 669
18512 12000 11975.00 14.83 60 months credit_card 22.31% CA RENT 4186.00 9.0 9398.0 0.0 3 years 685 689
80538 12300 12300.00 12.12 36 months debt_consolidation 10.01% CA RENT 6583.33 15.0 12450.0 0.0 1 year 690 694
39722 24000 24000.00 10.65 36 months debt_consolidation 14.80% CA RENT 3750.00 5.0 24708.0 0.0 10+ years 725 729
34051 2000 2000.00 7.51 36 months other 16.73% CA RENT 1500.00 5.0 2925.0 0.0 4 years 730 734
65889 6000 6000.00 7.62 36 months debt_consolidation 9.33% CA RENT 8315.67 8.0 6239.0 0.0 2 years 730 734
83672 20000 20000.00 15.31 36 months debt_consolidation 27.35% CA RENT 5250.00 11.0 11343.0 1.0 6 years 690 694
32974 3000 3000.00 12.69 36 months debt_consolidation 3.12% CA RENT 2083.33 3.0 844.0 0.0 3 years 715 719
99867 35000 34950.00 17.27 36 months debt_consolidation 22.74% CA RENT 6750.00 15.0 24626.0 1.0 10+ years 675 679
84283 10000 10000.00 15.31 36 months credit_card 27.71% CA RENT 3750.00 19.0 19385.0 1.0 3 years 680 684
85172 4000 4000.00 6.03 36 months car 13.03% CA RENT 2916.67 7.0 2860.0 0.0 5 years 750 754
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
58924 9950 9950.00 15.31 36 months wedding 7.84% CA RENT 2833.33 7.0 4904.0 2.0 n/a 675 679
20234 12000 10400.00 18.25 60 months car 12.11% CA RENT 5600.00 7.0 2409.0 1.0 3 years 660 664
51742 18000 17925.00 19.22 60 months debt_consolidation 21.36% CA RENT 6867.92 13.0 19446.0 0.0 10+ years 690 694
54076 5400 5400.00 13.67 36 months debt_consolidation 15.42% CA RENT 1666.67 6.0 2533.0 0.0 4 years 680 684
40692 3750 3750.00 11.71 36 months debt_consolidation 16.98% CA RENT 1650.00 6.0 8101.0 0.0 4 years 675 679
24187 4500 4500.00 11.49 36 months other 9.26% CA RENT 3500.00 5.0 7247.0 0.0 2 years 695 699
13413 25000 15600.00 17.93 60 months small_business 19.94% CA RENT 5416.67 13.0 24810.0 0.0 10+ years 690 694
73203 12000 12000.00 14.09 36 months debt_consolidation 19.49% CA RENT 6166.67 13.0 6499.0 0.0 6 years 675 679
33275 4400 4400.00 12.42 36 months small_business 19% CA RENT 2000.00 9.0 2411.0 3.0 5 years 715 719
48048 12800 12800.00 12.12 36 months debt_consolidation 21.53% CA RENT 4750.00 16.0 14291.0 0.0 < 1 year 695 699
34802 7200 7200.00 13.49 36 months debt_consolidation 14.60% CA RENT 5000.00 4.0 17268.0 0.0 1 year 725 729
59807 10000 10000.00 15.31 36 months debt_consolidation 14.61% CA RENT 3750.00 18.0 13051.0 1.0 5 years 680 684
65041 12000 11975.00 6.62 36 months major_purchase 1.43% CA RENT 9666.67 5.0 4693.0 0.0 1 year 750 754
26312 12000 12000.00 7.91 36 months credit_card 4.51% CA RENT 3500.00 16.0 6015.0 1.0 < 1 year 725 729
17242 19000 18975.00 13.72 36 months credit_card 13.50% CA RENT 2964.67 9.0 11219.0 0.0 10+ years 705 709
6712 10000 10000.00 12.53 36 months debt_consolidation 10.81% CA RENT 7158.33 15.0 23152.0 0.0 6 years 700 704
10176 20500 19861.23 14.96 60 months debt_consolidation 10.76% CA RENT 2500.00 2.0 109.0 0.0 2 years 730 734
3768 4800 4750.00 12.21 36 months wedding 9.41% CA RENT 6250.00 10.0 21069.0 0.0 < 1 year 695 699
42148 10000 9950.00 14.27 36 months debt_consolidation 12.63% CA RENT 7166.67 11.0 10309.0 1.0 < 1 year 680 684
6466 24250 24144.61 11.83 36 months major_purchase 4.79% CA RENT 12000.00 10.0 2097.0 1.0 < 1 year 755 759
91639 12000 12000.00 17.77 36 months debt_consolidation 22.32% CA RENT 4575.08 13.0 8048.0 2.0 < 1 year 675 679
54428 21000 21000.00 13.67 36 months other 8.08% CA RENT 7916.67 6.0 6524.0 1.0 1 year 705 709
26195 7000 7000.00 6.99 36 months debt_consolidation 14.32% CA RENT 5166.67 6.0 0.0 1.0 10+ years 775 779
18049 9700 9700.00 14.09 36 months debt_consolidation 9.52% CA RENT 7333.33 9.0 14723.0 0.0 3 years 665 669
12976 2000 2000.00 11.49 36 months major_purchase 2.44% CA RENT 2500.00 4.0 67.0 0.0 2 years 700 704
13767 15000 15000.00 19.04 60 months debt_consolidation 16.69% CA RENT 5250.00 7.0 8615.0 1.0 2 years 680 684
43056 15000 15000.00 7.90 36 months credit_card 22.24% CA RENT 2666.67 15.0 11105.0 0.0 < 1 year 735 739
52330 15000 15000.00 19.99 36 months wedding 18.05% CA RENT 8000.00 6.0 45976.0 1.0 2 years 660 664
42124 10000 10000.00 11.71 36 months debt_consolidation 8.40% CA RENT 4500.00 8.0 8404.0 1.0 3 years 710 714
78043 8475 8475.00 7.62 36 months debt_consolidation 15.88% CA RENT 3983.33 9.0 6882.0 0.0 n/a 720 724

273 rows × 15 columns

In [253]:
loansData.groupby(loansData.State).mean()
Out[253]:
Amount_Requested Amount_Funded_By_Investors Interest_Rate Monthly_Income Open_CREDIT_Lines Revolving_CREDIT_Balance Inquiries_in_the_Last_6_Months
State
AK 13688.636364 13679.545455 16.594545 4975.576364 9.818182 13262.363636 0.909091
AL 12734.210526 11894.035000 13.144211 5029.517895 9.394737 13171.631579 0.947368
AR 8598.076923 8457.692308 13.230000 5823.102308 9.384615 11371.461538 1.307692
AZ 12315.760870 12011.720217 13.150652 5446.441957 9.130435 16372.347826 0.804348
CA 12207.101617 11921.603464 12.955266 5761.773903 9.741339 15268.845266 0.775982
CO 12261.065574 11701.219672 12.919672 5155.414590 10.098361 12442.655738 1.065574
CT 12169.500000 11645.271400 13.721000 5260.232400 10.220000 14134.580000 1.140000
DC 15602.272727 14805.958182 13.741818 7752.097273 10.090909 12911.727273 0.818182
DE 10559.375000 10546.875000 11.661250 5770.832500 9.125000 14505.875000 1.000000
FL 11974.260355 11419.491479 12.964852 5450.207396 10.017751 18017.106509 0.905325
GA 13320.408163 12437.758776 12.544388 6242.562857 9.989796 19427.581633 0.836735
HI 13941.666667 13819.640000 15.984167 5113.888333 11.000000 13053.083333 0.166667
IA 3500.000000 690.340000 14.120000 3750.000000 8.000000 9771.000000 4.000000
IL 13143.811881 12812.574455 12.616337 5715.900099 10.108911 14789.762376 1.188119
IN 13725.000000 8791.666667 13.090000 3750.000000 13.666667 14485.333333 4.333333
KS 11619.047619 10922.096190 13.717619 4801.883810 9.523810 9786.333333 1.047619
KY 9906.521739 9788.454783 12.382609 4366.063043 8.478261 12192.652174 0.826087
LA 17619.318182 17475.772727 14.920455 5481.591364 9.954545 14978.272727 0.909091
MA 13856.164384 13534.591096 12.613014 5877.133562 9.958904 13904.438356 0.904110
MD 13385.294118 13169.033088 13.278971 7111.838529 10.029412 12581.897059 0.882353
MI 12240.555556 11765.681333 14.375556 4782.703778 10.377778 14313.888889 1.000000
MN 13861.184211 13294.764737 13.621842 5150.977368 10.210526 15860.078947 0.657895
MO 13006.060606 12023.355455 12.783333 4735.805758 10.393939 13852.848485 1.333333
MS 4575.000000 4575.000000 15.650000 5833.330000 25.000000 82586.000000 2.000000
MT 12242.857143 12242.857143 10.774286 4145.522857 8.571429 34194.714286 0.428571
NC 11395.312500 11351.499063 12.640312 5317.459844 9.406250 17568.734375 0.640625
NH 16776.666667 15023.652000 12.171333 7734.733333 9.533333 24717.600000 1.000000
NJ 11451.861702 10942.380000 12.778617 6106.467660 9.829787 14762.063830 0.946809
NM 13607.692308 13605.769231 14.325385 5229.133846 9.384615 18429.461538 0.923077
NV 11746.875000 11473.803125 13.512187 5110.346250 10.500000 13888.531250 0.750000
NY 12773.823529 12207.519098 13.102392 6507.559921 10.316206 15923.205534 0.794466
OH 11392.957746 11014.558592 12.259296 5162.090141 10.577465 11539.436620 1.197183
OK 12808.333333 12571.313810 13.780952 5914.364762 9.571429 11608.714286 1.190476
OR 11035.000000 10956.450333 12.657000 4727.888667 9.266667 11193.866667 0.466667
PA 11599.218750 11268.495729 12.538646 5108.773750 10.322917 10675.947917 1.208333
RI 11493.333333 11488.333333 13.013333 6182.128000 12.400000 19046.866667 1.000000
SC 10401.785714 10398.174286 12.790357 5525.952143 9.428571 9503.892857 0.821429
SD 14600.000000 14600.000000 10.252500 4583.332500 12.000000 15598.000000 0.250000
TX 12722.701149 12325.628793 13.381954 5500.602529 10.614943 13757.000000 0.896552
UT 9559.375000 9551.562500 13.174375 5306.353750 9.812500 17797.875000 0.625000
VA 12929.807692 12508.127692 13.337564 6728.106667 11.256410 24267.358974 0.974359
VT 19160.000000 19160.000000 17.884000 5932.118000 9.800000 29618.200000 1.000000
WA 12157.327586 12129.595517 12.977759 5220.620862 9.741379 14174.913793 0.862069
WI 10459.615385 10276.437692 13.881923 5020.220000 10.538462 14333.000000 1.192308
WV 11273.333333 11268.333333 14.108667 4321.167333 11.733333 15279.133333 1.266667
WY 7325.000000 7325.000000 13.465000 4875.750000 12.750000 14286.750000 0.250000
In [258]:
loansData.groupby(loansData.State).max()
Out[258]:
Amount_Requested Amount_Funded_By_Investors Interest_Rate Loan_Length Loan_Purpose Debt_To_Income_Ratio Home_Ownership Monthly_Income Open_CREDIT_Lines Revolving_CREDIT_Balance Inquiries_in_the_Last_6_Months Employment_Length FICO_start FICO_end
State
AK 24925 24925.00 24.70 60 months small_business 9.16% RENT 9166.67 25.0 32137.0 4.0 < 1 year 760 764
AL 35000 35000.00 24.70 60 months small_business 8.51% RENT 10583.33 23.0 55691.0 7.0 n/a 780 784
AR 20000 20000.00 18.49 60 months other 4% RENT 18333.33 17.0 33229.0 3.0 < 1 year 735 739
AZ 35000 33500.00 22.47 60 months wedding 8.89% RENT 15583.33 20.0 217827.0 4.0 n/a 815 819
CA 35000 35000.00 24.33 60 months wedding 9.93% RENT 25000.00 31.0 270800.0 7.0 n/a 815 819
CO 30000 30000.00 22.47 60 months small_business 9.95% RENT 16416.67 34.0 62549.0 4.0 n/a 795 799
CT 35000 35000.00 21.67 60 months wedding 9.73% RENT 14166.67 38.0 99549.0 4.0 n/a 800 804
DC 30100 30100.00 19.69 60 months home_improvement 9.37% RENT 12083.33 20.0 27904.0 3.0 < 1 year 805 809
DE 20000 19900.00 15.81 60 months other 5.98% RENT 7833.33 15.0 49898.0 2.0 6 years 790 794
FL 35000 35000.00 23.33 60 months wedding 9.99% RENT 21666.67 24.0 216561.0 8.0 n/a 815 819
GA 35000 34975.00 23.83 60 months wedding 9.73% RENT 20000.00 24.0 245886.0 9.0 n/a 805 809
HI 30000 30000.00 22.95 60 months major_purchase 8.36% RENT 12500.00 15.0 29756.0 1.0 9 years 760 764
IA 3500 690.34 14.12 36 months medical 10.19% OTHER 3750.00 8.0 9771.0 4.0 10+ years 655 659
IL 35000 35000.00 23.28 60 months wedding 9.58% RENT 25000.00 26.0 71879.0 9.0 n/a 805 809
IN 24925 24925.00 17.54 60 months debt_consolidation 24.80% RENT 6250.00 20.0 19245.0 9.0 < 1 year 735 739
KS 25000 25000.00 23.76 60 months other 7.17% RENT 12916.67 17.0 22566.0 7.0 n/a 795 799
KY 22000 22000.00 19.99 60 months other 8.61% RENT 10408.00 15.0 92399.0 3.0 < 1 year 805 809
LA 35000 35000.00 22.78 60 months major_purchase 9.51% RENT 13863.42 19.0 39934.0 3.0 n/a 775 779
MA 35000 35000.00 22.95 60 months other 9.86% RENT 14166.67 26.0 103354.0 4.0 n/a 810 814
MD 35000 35000.00 22.95 60 months vacation 9.79% RENT 65000.00 36.0 44198.0 5.0 < 1 year 820 824
MI 35000 35000.00 21.14 60 months wedding 8.37% RENT 11500.00 22.0 47504.0 9.0 n/a 750 754
MN 30000 30000.00 22.11 60 months wedding 9.54% RENT 22916.67 21.0 44868.0 3.0 < 1 year 800 804
MO 35000 35000.00 21.97 60 months small_business 9.90% RENT 9083.33 17.0 38176.0 9.0 < 1 year 810 814
MS 4575 4575.00 15.65 36 months credit_card 14.31% OTHER 5833.33 25.0 82586.0 2.0 2 years 680 684
MT 24375 24375.00 15.31 60 months other 26.36% RENT 7500.00 11.0 143151.0 3.0 7 years 795 799
NC 32000 32000.00 23.76 60 months wedding 9.46% RENT 17500.00 24.0 194205.0 3.0 n/a 810 814
NH 35000 32000.00 24.89 60 months other 9.96% RENT 19166.67 26.0 100233.0 3.0 < 1 year 785 789
NJ 35000 35000.00 22.95 60 months wedding 9.16% RENT 20833.33 22.0 61613.0 6.0 n/a 815 819
NM 25875 25875.00 19.99 60 months small_business 8.03% RENT 10000.00 19.0 46358.0 3.0 n/a 760 764
NV 28000 28000.00 22.78 60 months vacation 9.61% RENT 10500.00 19.0 44403.0 3.0 n/a 765 769
NY 35000 35000.00 23.76 60 months wedding 9.84% RENT 102750.00 23.0 121563.0 7.0 n/a 810 814
OH 28000 28000.00 22.47 60 months wedding 9.59% RENT 14166.67 21.0 49026.0 6.0 n/a 810 814
OK 35000 35000.00 23.63 60 months other 9.14% RENT 12083.33 21.0 36555.0 3.0 < 1 year 760 764
OR 35000 34950.00 23.28 60 months small_business 8.96% RENT 11250.00 17.0 33178.0 2.0 n/a 815 819
PA 35000 35000.00 22.45 60 months wedding 9.60% RENT 15416.67 24.0 49136.0 5.0 n/a 800 804
RI 35000 34950.00 21.97 60 months other 9.98% RENT 14166.67 22.0 51314.0 3.0 8 years 780 784
SC 27000 27000.00 19.91 60 months other 8.48% RENT 16083.33 22.0 23378.0 3.0 n/a 800 804
SD 30000 30000.00 13.11 36 months debt_consolidation 9.58% OWN 8000.00 14.0 33218.0 1.0 6 years 805 809
TX 35000 35000.00 23.76 60 months wedding 9.81% RENT 18750.00 25.0 58836.0 7.0 n/a 815 819
UT 33600 33600.00 23.83 60 months home_improvement 9% RENT 25000.00 18.0 73258.0 2.0 < 1 year 740 744
VA 35000 35000.00 24.20 60 months wedding 9.73% RENT 18750.00 25.0 129071.0 6.0 n/a 830 834
VT 25800 25800.00 24.89 60 months moving 29.05% RENT 11583.33 13.0 86700.0 3.0 8 years 715 719
WA 28000 27975.00 22.47 60 months wedding 9.92% RENT 12500.00 24.0 53494.0 3.0 n/a 800 804
WI 30075 30050.00 23.76 60 months small_business 9.05% RENT 15000.00 18.0 60568.0 7.0 < 1 year 795 799
WV 35000 34950.00 22.47 60 months home_improvement 33.93% RENT 7083.33 21.0 31168.0 3.0 n/a 790 794
WY 15000 15000.00 17.27 36 months home_improvement 33.30% RENT 10000.00 20.0 26267.0 1.0 5 years 695 699
In [260]:
loansData
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-260-76eeb29f140d> in <module>()
----> 1 loansData.groupby(loansData.State).dim

~/usr/local/anaconda3/lib/python3.6/site-packages/pandas/core/groupby.py in __getattr__(self, attr)
    549 
    550         raise AttributeError("%r object has no attribute %r" %
--> 551                              (type(self).__name__, attr))
    552 
    553     plot = property(GroupByPlot)

AttributeError: 'DataFrameGroupBy' object has no attribute 'dim'

merging

pd.merge()  # merge columns
pd.concant() # append rows
In [263]:
stocks
Out[263]:
name ticker value volume
0 Infosys infy 1000 500
1 Tata tata 500 50
2 Reliance reliance 700 100
3 Tata Infotech tatainf 600 60
In [266]:
df = pd.DataFrame({"name":["Tata", "x","y","z", "a"]})
In [267]:
df
Out[267]:
name
0 Tata
1 x
2 y
3 z
4 a

reding from html

In [272]:
df1 = pd.DataFrame([stocks.name==n for n in df.name]).transpose()
In [275]:
df1.apply()
Out[275]:
name name name name name
0 False False False False False
1 True False False False False
2 False False False False False
3 False False False False False
In [276]:
 
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-276-db26a67c49da> in <module>()
----> 1 stocks.isin("Tata")

~/usr/local/anaconda3/lib/python3.6/site-packages/pandas/core/frame.py in isin(self, values)
   5472                                 "allowed to be passed to DataFrame.isin(), "
   5473                                 "you passed a "
-> 5474                                 "{0!r}".format(type(values).__name__))
   5475             return DataFrame(
   5476                 algorithms.isin(self.values.ravel(),

TypeError: only list-like or dict-like objects are allowed to be passed to DataFrame.isin(), you passed a 'str'

beautiful soup

In [5]:
from bs4 import BeautifulSoup
html_doc = """
<html><head><title>The Dormouse's story</title></head>
<body>
<p class="title"><b>The Dormouse's story</b></p>

<p class="story">Once upon a time there were three little sisters; and their names were
<a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>,
<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>

<p class="story">...</p>
"""
soup = BeautifulSoup(html_doc, 'html.parser')
In [6]:
soup.find_all("table")
Out[6]:
[]
In [7]:
table = """
<html><head><title>The Dormouse's story</title></head>
<body>
<p class="title"><b>The Dormouse's story</b></p>

<p class="story">Once upon a time there were three little sisters; and their names were
<a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>,
<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>

<p class="story">...</p>

<table>
 <tr>
  <th>Name</th>
  <th>Favorite Color</th>
 </tr>
 <tr>
  <td>Bob</td>
  <td>Yellow</td>
 </tr>
 <tr>
  <td>Michelle</td>
  <td>Purple</td>
 </tr>
</table>
</body>
</html>

"""
In [96]:
soup = BeautifulSoup(table, 'html.parser')
In [105]:
def parseTable(table):
    return [[c.contents[0] for c in row.contents if c!="\n"] for row in table.contents if row!="\n"]
In [107]:
table = parseTable(soup.table)
In [108]:
table
Out[108]:
[['Name', 'Favorite Color'], ['Bob', 'Yellow'], ['Michelle', 'Purple']]
In [109]:
table[0]
Out[109]:
['Name', 'Favorite Color']
In [110]:
table[1]
Out[110]:
['Bob', 'Yellow']
In [111]:
table[2]
Out[111]:
['Michelle', 'Purple']