Basic Python Training at Arcesium - Day 4

Apr 15-18, 2019 Vikrant Patil

These notes are available online at http://notes.pipal.in/2019/arcesium_basic_aug/day4.html

© Pipal Academy LLP

Day 1 | Day 2 | Day 3 | Day 4

We will be using python 3 (>= 3.0) from anaconda for this training. You can download it from

https://www.anaconda.com/download/

In [1]:
def getwords(filename):
    with open(filename) as f:
        return f.read().strip().split()

words = getwords("words.txt")

def wordfreq(words):
    freq = {}
    uniq = set(words)
    for w in uniq:
        freq[w] = words.count(w)
    return freq

freq = wordfreq(words)
In [2]:
freq
Out[2]:
{'five': 6,
 'ten': 1,
 'six': 5,
 'two': 9,
 'one': 10,
 'four': 7,
 'three': 8,
 'nine': 2,
 'eight': 3,
 'seven': 4}
In [3]:
def wordfreq1(words):
    freq = {}
    for w in words:
        if w in freq:
            freq[w] += 1   # freq[w] = freq[w] + 1
        else:
            freq[w] = 1
    return freq
In [4]:
wordfreq1(words)
Out[4]:
{'one': 10,
 'two': 9,
 'three': 8,
 'four': 7,
 'five': 6,
 'six': 5,
 'seven': 4,
 'eight': 3,
 'nine': 2,
 'ten': 1}
In [5]:
def wordfreq2(words):
    freq = {}
    for w in words:
        freq[w] = freq.get(w, 0) + 1
    return freq

Classes

In [6]:
%%file bank0.py

balance = 0

def get_balance():
    return balance

def withdraw(amount):
    global balance
    balance -= amount
    
def deposit(amount):
    global balance
    balance += amount
Writing bank0.py
In [7]:
import bank0
In [8]:
print(bank0.__name__)
bank0
In [9]:
bank0.get_balance()
Out[9]:
0
In [10]:
bank0.deposit(10000)
In [11]:
bank0.get_balance()
Out[11]:
10000
In [12]:
bank0.withdraw(3000)
In [13]:
bank0.get_balance()
Out[13]:
7000
In [14]:
%%file bank1.py

def make_account():
    return {"balance":0}

def get_balance(account):
    return account['balance']

def deposit(account, amount):
    account['balance'] += amount
    
def withdraw(account, amount):
    account['balance'] -= amount
    
Writing bank1.py
In [16]:
import bank1
In [17]:
a1 = bank1.make_account()
In [18]:
bank1.get_balance(a1)
Out[18]:
0
In [19]:
bank1.deposit(a1, 30000)
In [20]:
bank1.withdraw(a1, 2323)
In [21]:
bank1.get_balance(a1)
Out[21]:
27677
In [22]:
class BankAccount:
    
    def __init__(self):
        self.balance = 0
        
    def get_balance(self):
        return self.balance
    
    def deposit(self, amount):
        self.balance += amount
        
    def withdraw(self, amount):
        self.balance -= amount
In [23]:
b1 = BankAccount()
In [24]:
b1
Out[24]:
<__main__.BankAccount at 0x7f38589ff400>
In [25]:
type(b1)
Out[25]:
__main__.BankAccount
In [26]:
isinstance(b1, BankAccount)
Out[26]:
True
In [27]:
isinstance(b1, int)
Out[27]:
False
In [28]:
isinstance(1, int)
Out[28]:
True
In [29]:
x = input()
3
In [30]:
x
Out[30]:
'3'
In [31]:
y = input("Input the value")
Input the value54
In [32]:
y
Out[32]:
'54'
In [33]:
y + 1
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-33-d252634cf9a3> in <module>
----> 1 y + 1

TypeError: can only concatenate str (not "int") to str
In [34]:
class BankAccount1:
    
    def __init__(self, name, amount):
        self.name = name
        self.balance = amount
     
    def get_name(self):
        return self.name
    
    def get_balance(self):
        return self.balance
    
    def deposit(self, amount):
        self.balance += amount
        
    def withdraw(self, amount):
        self.balance -= amount
In [41]:
name = input("Eneter your name: ")
balance = input("Enter initial amount: ")
b2 = BankAccount1(name, float(balance))
Eneter your name: vikrant
Enter initial amount: 1000
In [42]:
b2.get_balance()
Out[42]:
1000.0
In [43]:
b2.get_name()
Out[43]:
'vikrant'
In [44]:
b2 = BankAccount1("Arcesium", 10000)
In [45]:
b2.get_balance()
Out[45]:
10000
In [46]:
l = [1,2,3,4]
In [47]:
l
Out[47]:
[1, 2, 3, 4]
In [48]:
b2
Out[48]:
<__main__.BankAccount1 at 0x7f3858997400>
In [49]:
class Point:
    
    def __init__(self, x, y):
        self.x = x
        self.y =y
In [50]:
p = Point(2, 9)
In [51]:
p
Out[51]:
<__main__.Point at 0x7f3858a450b8>
In [52]:
class Point:
    
    def __init__(self, x, y):
        self.x = x
        self.y =y
        
    def __repr__(self):
        return "<{0},{1}>".format(self.x, self.y)
In [53]:
p = Point(3,4)
In [54]:
p
Out[54]:
<3,4>
In [55]:
print(p)
<3,4>
In [56]:
class Point:
    
    def __init__(self, x, y):
        self.x = x
        self.y =y
        
    def __repr__(self):
        return "<{0},{1}>".format(self.x, self.y)
    
    def __str__(self):
        return "{},{}".format(self.x, self.y)
In [57]:
p = Point(5,6)
In [58]:
p
Out[58]:
<5,6>
In [59]:
print(p)
5,6
In [60]:
str(p)
Out[60]:
'5,6'
In [61]:
str(l)
Out[61]:
'[1, 2, 3, 4]'
In [63]:
class RedPoint(Point):
    
    color = "Red"
    
In [64]:
rp = RedPoint(5,7)
In [65]:
rp
Out[65]:
<5,7>
In [66]:
rp.color
Out[66]:
'Red'
In [67]:
p.color
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-67-17c844401d04> in <module>
----> 1 p.color

AttributeError: 'Point' object has no attribute 'color'
In [68]:
isinstance(rp, Point)
Out[68]:
True
In [69]:
isinstance(rp, RedPoint)
Out[69]:
True
In [70]:
rp
Out[70]:
<5,7>
In [71]:
rp.color
Out[71]:
'Red'

pandas

In [72]:
import pandas as pd
In [73]:
import math as m
In [74]:
del m

Series

In [75]:
values = pd.Series([100.0,202.0, 223.0, 224.5, 343.6])
In [76]:
values
Out[76]:
0    100.0
1    202.0
2    223.0
3    224.5
4    343.6
dtype: float64
In [79]:
d = dict(zip(["tata","hcl",'infy','reliance','wipro'],[100.0,202.0, 223.0, 224.5, 343.6]))
In [80]:
d
Out[80]:
{'tata': 100.0, 'hcl': 202.0, 'infy': 223.0, 'reliance': 224.5, 'wipro': 343.6}
In [81]:
values = pd.Series(d)
In [82]:
values
Out[82]:
tata        100.0
hcl         202.0
infy        223.0
reliance    224.5
wipro       343.6
dtype: float64
In [83]:
values = pd.Series(d, index=["tata",'reliance','wipro',"hcl",'infy'])
In [84]:
values
Out[84]:
tata        100.0
reliance    224.5
wipro       343.6
hcl         202.0
infy        223.0
dtype: float64
In [85]:
values = pd.Series([100.0,202.0, 223.0, 224.5, 343.6], index=["tata","hcl",'infy','reliance','wipro'])
In [86]:
values
Out[86]:
tata        100.0
hcl         202.0
infy        223.0
reliance    224.5
wipro       343.6
dtype: float64
In [87]:
values['tata']
Out[87]:
100.0
In [88]:
values[0]
Out[88]:
100.0
In [89]:
values + 100
Out[89]:
tata        200.0
hcl         302.0
infy        323.0
reliance    324.5
wipro       443.6
dtype: float64
In [90]:
values
Out[90]:
tata        100.0
hcl         202.0
infy        223.0
reliance    224.5
wipro       343.6
dtype: float64
In [91]:
v = values + 100
In [92]:
v
Out[92]:
tata        200.0
hcl         302.0
infy        323.0
reliance    324.5
wipro       443.6
dtype: float64
In [93]:
values + v
Out[93]:
tata        300.0
hcl         504.0
infy        546.0
reliance    549.0
wipro       787.2
dtype: float64
In [94]:
v*2
Out[94]:
tata        400.0
hcl         604.0
infy        646.0
reliance    649.0
wipro       887.2
dtype: float64
In [95]:
v*v
Out[95]:
tata         40000.00
hcl          91204.00
infy        104329.00
reliance    105300.25
wipro       196780.96
dtype: float64
In [96]:
v
Out[96]:
tata        200.0
hcl         302.0
infy        323.0
reliance    324.5
wipro       443.6
dtype: float64
In [97]:
v['tata']
Out[97]:
200.0
In [99]:
v[2:]
Out[99]:
infy        323.0
reliance    324.5
wipro       443.6
dtype: float64
In [100]:
v  = pd.Series([1,2,3,4,5],index=['a','b','c','d','e'])
In [101]:
v
Out[101]:
a    1
b    2
c    3
d    4
e    5
dtype: int64
In [103]:
v['a']
Out[103]:
1
In [104]:
v["a":"c"]
Out[104]:
a    1
b    2
c    3
dtype: int64
In [105]:
values
Out[105]:
tata        100.0
hcl         202.0
infy        223.0
reliance    224.5
wipro       343.6
dtype: float64
In [106]:
values[values<220]
Out[106]:
tata    100.0
hcl     202.0
dtype: float64
In [107]:
values[values>223]
Out[107]:
reliance    224.5
wipro       343.6
dtype: float64
In [108]:
values[values==223]
Out[108]:
infy    223.0
dtype: float64
In [109]:
v
Out[109]:
a    1
b    2
c    3
d    4
e    5
dtype: int64
In [111]:
v[v%2==0]
Out[111]:
b    2
d    4
dtype: int64
In [114]:
values[values==values['tata']]
Out[114]:
tata    100.0
dtype: float64

Dataframe

In [116]:
df = pd.DataFrame({"values":[100.0,202.0, 223.0, 224.5, 343.6],
                   "gain" : [11.0,5.5,20.0,34.5,20.0],
                   'Name' : ['Tata Motors', "Reliance", "Wipro", "Hindustan Computers", "Infosys"],
                   'ticker': ["tata",'reliance','wipro',"hcl",'infy']})
In [117]:
df
Out[117]:
values gain Name ticker
0 100.0 11.0 Tata Motors tata
1 202.0 5.5 Reliance reliance
2 223.0 20.0 Wipro wipro
3 224.5 34.5 Hindustan Computers hcl
4 343.6 20.0 Infosys infy
In [165]:
df = pd.DataFrame({"values":[100.0,202.0, 223.0, 224.5, 343.6],
                   "gain" : [11.0,5.5,20.0,34.5,20.0],
                   'Name' : ['Tata Motors', "Reliance", "Wipro", "Hindustan Computers", "Infosys"]
                  }, index= ["tata",'reliance','wipro',"hcl",'infy'])
In [166]:
df
Out[166]:
values gain Name
tata 100.0 11.0 Tata Motors
reliance 202.0 5.5 Reliance
wipro 223.0 20.0 Wipro
hcl 224.5 34.5 Hindustan Computers
infy 343.6 20.0 Infosys
In [120]:
df['values']
Out[120]:
tata        100.0
reliance    202.0
wipro       223.0
hcl         224.5
infy        343.6
Name: values, dtype: float64
In [123]:
df.gain
Out[123]:
tata        11.0
reliance     5.5
wipro       20.0
hcl         34.5
infy        20.0
Name: gain, dtype: float64
In [122]:
df['gain']
Out[122]:
tata        11.0
reliance     5.5
wipro       20.0
hcl         34.5
infy        20.0
Name: gain, dtype: float64
In [125]:
df[df['gain']>10]
Out[125]:
values gain Name
tata 100.0 11.0 Tata Motors
wipro 223.0 20.0 Wipro
hcl 224.5 34.5 Hindustan Computers
infy 343.6 20.0 Infosys
In [126]:
df[df['Name']=="Wipro"]
Out[126]:
values gain Name
wipro 223.0 20.0 Wipro
In [129]:
df.loc['tata'] 
Out[129]:
values            100
gain               11
Name      Tata Motors
Name: tata, dtype: object
In [131]:
df['Name'][df['gain']>10] # Name wherever gain is > 10
Out[131]:
tata             Tata Motors
wipro                  Wipro
hcl      Hindustan Computers
infy                 Infosys
Name: Name, dtype: object
In [132]:
df
Out[132]:
values gain Name
tata 100.0 11.0 Tata Motors
reliance 202.0 5.5 Reliance
wipro 223.0 20.0 Wipro
hcl 224.5 34.5 Hindustan Computers
infy 343.6 20.0 Infosys
In [134]:
t = [[i*j for i in range(1,6)] for j in range(1, 11)]
tables = pd.DataFrame(t, index=range(1, 11))
In [135]:
tables
Out[135]:
0 1 2 3 4
1 1 2 3 4 5
2 2 4 6 8 10
3 3 6 9 12 15
4 4 8 12 16 20
5 5 10 15 20 25
6 6 12 18 24 30
7 7 14 21 28 35
8 8 16 24 32 40
9 9 18 27 36 45
10 10 20 30 40 50
In [136]:
tables + 100
Out[136]:
0 1 2 3 4
1 101 102 103 104 105
2 102 104 106 108 110
3 103 106 109 112 115
4 104 108 112 116 120
5 105 110 115 120 125
6 106 112 118 124 130
7 107 114 121 128 135
8 108 116 124 132 140
9 109 118 127 136 145
10 110 120 130 140 150
In [137]:
tables + tables
Out[137]:
0 1 2 3 4
1 2 4 6 8 10
2 4 8 12 16 20
3 6 12 18 24 30
4 8 16 24 32 40
5 10 20 30 40 50
6 12 24 36 48 60
7 14 28 42 56 70
8 16 32 48 64 80
9 18 36 54 72 90
10 20 40 60 80 100
In [138]:
df
Out[138]:
values gain Name
tata 100.0 11.0 Tata Motors
reliance 202.0 5.5 Reliance
wipro 223.0 20.0 Wipro
hcl 224.5 34.5 Hindustan Computers
infy 343.6 20.0 Infosys
In [139]:
df['values']  + 100
Out[139]:
tata        200.0
reliance    302.0
wipro       323.0
hcl         324.5
infy        443.6
Name: values, dtype: float64
In [140]:
v1 = df['values']  + 100
In [141]:
df['values'] = v1
In [142]:
df
Out[142]:
values gain Name
tata 200.0 11.0 Tata Motors
reliance 302.0 5.5 Reliance
wipro 323.0 20.0 Wipro
hcl 324.5 34.5 Hindustan Computers
infy 443.6 20.0 Infosys
In [143]:
newgain = pd.Series([1.0, 1.1, 1.3, 1, 1], index=["tata",'reliance','wipro',"hcl",'infy'])
In [144]:
newgain
Out[144]:
tata        1.0
reliance    1.1
wipro       1.3
hcl         1.0
infy        1.0
dtype: float64
In [146]:
df['gain'] = df['gain'] + newgain
In [147]:
df
Out[147]:
values gain Name
tata 200.0 12.0 Tata Motors
reliance 302.0 6.6 Reliance
wipro 323.0 21.3 Wipro
hcl 324.5 35.5 Hindustan Computers
infy 443.6 21.0 Infosys
In [148]:
tables
Out[148]:
0 1 2 3 4
1 1 2 3 4 5
2 2 4 6 8 10
3 3 6 9 12 15
4 4 8 12 16 20
5 5 10 15 20 25
6 6 12 18 24 30
7 7 14 21 28 35
8 8 16 24 32 40
9 9 18 27 36 45
10 10 20 30 40 50
In [149]:
t2 = [[i*j for i in range(6,11)] for j in range(1, 11)]
tables2 = pd.DataFrame(t2, index=range(1, 11), columns=range(5,10))
In [150]:
tables2
Out[150]:
5 6 7 8 9
1 6 7 8 9 10
2 12 14 16 18 20
3 18 21 24 27 30
4 24 28 32 36 40
5 30 35 40 45 50
6 36 42 48 54 60
7 42 49 56 63 70
8 48 56 64 72 80
9 54 63 72 81 90
10 60 70 80 90 100
In [152]:
tables.join(tables2)
Out[152]:
0 1 2 3 4 5 6 7 8 9
1 1 2 3 4 5 6 7 8 9 10
2 2 4 6 8 10 12 14 16 18 20
3 3 6 9 12 15 18 21 24 27 30
4 4 8 12 16 20 24 28 32 36 40
5 5 10 15 20 25 30 35 40 45 50
6 6 12 18 24 30 36 42 48 54 60
7 7 14 21 28 35 42 49 56 63 70
8 8 16 24 32 40 48 56 64 72 80
9 9 18 27 36 45 54 63 72 81 90
10 10 20 30 40 50 60 70 80 90 100
In [153]:
tables.join(tables2, how="inner")
Out[153]:
0 1 2 3 4 5 6 7 8 9
1 1 2 3 4 5 6 7 8 9 10
2 2 4 6 8 10 12 14 16 18 20
3 3 6 9 12 15 18 21 24 27 30
4 4 8 12 16 20 24 28 32 36 40
5 5 10 15 20 25 30 35 40 45 50
6 6 12 18 24 30 36 42 48 54 60
7 7 14 21 28 35 42 49 56 63 70
8 8 16 24 32 40 48 56 64 72 80
9 9 18 27 36 45 54 63 72 81 90
10 10 20 30 40 50 60 70 80 90 100
In [154]:
tables.join(tables2, how="outer")
Out[154]:
0 1 2 3 4 5 6 7 8 9
1 1 2 3 4 5 6 7 8 9 10
2 2 4 6 8 10 12 14 16 18 20
3 3 6 9 12 15 18 21 24 27 30
4 4 8 12 16 20 24 28 32 36 40
5 5 10 15 20 25 30 35 40 45 50
6 6 12 18 24 30 36 42 48 54 60
7 7 14 21 28 35 42 49 56 63 70
8 8 16 24 32 40 48 56 64 72 80
9 9 18 27 36 45 54 63 72 81 90
10 10 20 30 40 50 60 70 80 90 100
In [155]:
df
Out[155]:
values gain Name
tata 200.0 12.0 Tata Motors
reliance 302.0 6.6 Reliance
wipro 323.0 21.3 Wipro
hcl 324.5 35.5 Hindustan Computers
infy 443.6 21.0 Infosys
In [175]:
df2 =  pd.DataFrame({"col1":[100.0,202.0, 223.0, 224.5, 343.6],
                   "col2" : [11.0,5.5,20.0,34.5,20.0],
                   'Name1' : ['Tata Motors', "Reliance", "Wipro", "arc", "vmware"]
                  }, index= ["tata",'reliance','wipro',"arc",'vmware'])
In [167]:
df
Out[167]:
values gain Name
tata 100.0 11.0 Tata Motors
reliance 202.0 5.5 Reliance
wipro 223.0 20.0 Wipro
hcl 224.5 34.5 Hindustan Computers
infy 343.6 20.0 Infosys
In [168]:
df2
Out[168]:
col1 col2 Name1
tata 100.0 11.0 Tata Motors
reliance 202.0 5.5 Reliance
wipro 223.0 20.0 Wipro
arc 224.5 34.5 arc
vmware 343.6 20.0 vmware
In [171]:
df.join(df2, how="inner") ### intrsection of labels
Out[171]:
values gain Name col1 col2 Name1
tata 100.0 11.0 Tata Motors 100.0 11.0 Tata Motors
reliance 202.0 5.5 Reliance 202.0 5.5 Reliance
wipro 223.0 20.0 Wipro 223.0 20.0 Wipro
In [172]:
df.join(df2, how="outer") ## unioin of labels
Out[172]:
values gain Name col1 col2 Name1
arc NaN NaN NaN 224.5 34.5 arc
hcl 224.5 34.5 Hindustan Computers NaN NaN NaN
infy 343.6 20.0 Infosys NaN NaN NaN
reliance 202.0 5.5 Reliance 202.0 5.5 Reliance
tata 100.0 11.0 Tata Motors 100.0 11.0 Tata Motors
vmware NaN NaN NaN 343.6 20.0 vmware
wipro 223.0 20.0 Wipro 223.0 20.0 Wipro
In [173]:
df
Out[173]:
values gain Name
tata 100.0 11.0 Tata Motors
reliance 202.0 5.5 Reliance
wipro 223.0 20.0 Wipro
hcl 224.5 34.5 Hindustan Computers
infy 343.6 20.0 Infosys
In [176]:
df3 =  pd.DataFrame({"values":[ 224.5, 343.6],
                   "gain" : [34.5,20.0],
                   'Name' : ["arc", "vmware"]
                  }, index= ["arc",'vmware'])
In [177]:
df3
Out[177]:
values gain Name
arc 224.5 34.5 arc
vmware 343.6 20.0 vmware
In [179]:
df
Out[179]:
values gain Name
tata 100.0 11.0 Tata Motors
reliance 202.0 5.5 Reliance
wipro 223.0 20.0 Wipro
hcl 224.5 34.5 Hindustan Computers
infy 343.6 20.0 Infosys
In [180]:
pd.concat([df, df3])
Out[180]:
values gain Name
tata 100.0 11.0 Tata Motors
reliance 202.0 5.5 Reliance
wipro 223.0 20.0 Wipro
hcl 224.5 34.5 Hindustan Computers
infy 343.6 20.0 Infosys
arc 224.5 34.5 arc
vmware 343.6 20.0 vmware
In [184]:
df.append(df3.loc['vmware'])
Out[184]:
values gain Name
tata 100.0 11.0 Tata Motors
reliance 202.0 5.5 Reliance
wipro 223.0 20.0 Wipro
hcl 224.5 34.5 Hindustan Computers
infy 343.6 20.0 Infosys
vmware 343.6 20.0 vmware
In [185]:
df.append(df3)
Out[185]:
values gain Name
tata 100.0 11.0 Tata Motors
reliance 202.0 5.5 Reliance
wipro 223.0 20.0 Wipro
hcl 224.5 34.5 Hindustan Computers
infy 343.6 20.0 Infosys
arc 224.5 34.5 arc
vmware 343.6 20.0 vmware
In [186]:
url = "http://notes.pipal.in/2018/arcesium-basic-nov/loansData.csv"
In [187]:
loans = pd.read_csv(url)
In [189]:
loans.head()
Out[189]:
Amount.Requested Amount.Funded.By.Investors Interest.Rate Loan.Length Loan.Purpose Debt.To.Income.Ratio State Home.Ownership Monthly.Income FICO.Range Open.CREDIT.Lines Revolving.CREDIT.Balance Inquiries.in.the.Last.6.Months Employment.Length
81174 20000 20000.0 8.90% 36 months debt_consolidation 14.90% SC MORTGAGE 6541.67 735-739 14.0 14272.0 2.0 < 1 year
99592 19200 19200.0 12.12% 36 months debt_consolidation 28.36% TX MORTGAGE 4583.33 715-719 12.0 11140.0 1.0 2 years
80059 35000 35000.0 21.98% 60 months debt_consolidation 23.81% CA MORTGAGE 11500.00 690-694 14.0 21977.0 1.0 2 years
15825 10000 9975.0 9.99% 36 months debt_consolidation 14.30% KS MORTGAGE 3833.33 695-699 10.0 9346.0 0.0 5 years
33182 12000 12000.0 11.71% 36 months credit_card 18.78% NJ RENT 3195.00 695-699 11.0 14469.0 0.0 9 years
In [190]:
loans.tail()
Out[190]:
Amount.Requested Amount.Funded.By.Investors Interest.Rate Loan.Length Loan.Purpose Debt.To.Income.Ratio State Home.Ownership Monthly.Income FICO.Range Open.CREDIT.Lines Revolving.CREDIT.Balance Inquiries.in.the.Last.6.Months Employment.Length
23735 30000 29950.00 16.77% 60 months debt_consolidation 19.23% NY MORTGAGE 9250.00 705-709 15.0 45880.0 1.0 8 years
65882 16000 16000.00 14.09% 60 months home_improvement 21.54% MD OWN 8903.25 740-744 18.0 18898.0 1.0 10+ years
55610 10000 10000.00 13.99% 36 months debt_consolidation 4.89% PA MORTGAGE 2166.67 680-684 4.0 4544.0 0.0 10+ years
38576 6000 6000.00 12.42% 36 months major_purchase 16.66% NJ RENT 3500.00 675-679 8.0 7753.0 0.0 5 years
3116 9000 5242.75 13.79% 36 months debt_consolidation 6.76% NY RENT 3875.00 670-674 7.0 7589.0 0.0 10+ years
In [191]:
loans.describe()
Out[191]:
Amount.Requested Amount.Funded.By.Investors Monthly.Income Open.CREDIT.Lines Revolving.CREDIT.Balance Inquiries.in.the.Last.6.Months
count 2500.000000 2500.000000 2499.000000 2498.000000 2498.000000 2498.000000
mean 12406.500000 12001.573236 5688.931321 10.075661 15244.559648 0.906325
std 7801.544872 7745.320754 3963.118185 4.508644 18308.549795 1.231036
min 1000.000000 -0.010000 588.500000 2.000000 0.000000 0.000000
25% 6000.000000 6000.000000 3500.000000 7.000000 5585.750000 0.000000
50% 10000.000000 10000.000000 5000.000000 9.000000 10962.000000 0.000000
75% 17000.000000 16000.000000 6800.000000 13.000000 18888.750000 1.000000
max 35000.000000 35000.000000 102750.000000 38.000000 270800.000000 9.000000
In [192]:
loans.columns
Out[192]:
Index(['Amount.Requested', 'Amount.Funded.By.Investors', 'Interest.Rate',
       'Loan.Length', 'Loan.Purpose', 'Debt.To.Income.Ratio', 'State',
       'Home.Ownership', 'Monthly.Income', 'FICO.Range', 'Open.CREDIT.Lines',
       'Revolving.CREDIT.Balance', 'Inquiries.in.the.Last.6.Months',
       'Employment.Length'],
      dtype='object')
In [193]:
cols = [name.replace(".","_") for name in loans.columns]
In [194]:
cols
Out[194]:
['Amount_Requested',
 'Amount_Funded_By_Investors',
 'Interest_Rate',
 'Loan_Length',
 'Loan_Purpose',
 'Debt_To_Income_Ratio',
 'State',
 'Home_Ownership',
 'Monthly_Income',
 'FICO_Range',
 'Open_CREDIT_Lines',
 'Revolving_CREDIT_Balance',
 'Inquiries_in_the_Last_6_Months',
 'Employment_Length']
In [195]:
col_dict = dict(zip(loans.columns, cols))
In [196]:
col_dict
Out[196]:
{'Amount.Requested': 'Amount_Requested',
 'Amount.Funded.By.Investors': 'Amount_Funded_By_Investors',
 'Interest.Rate': 'Interest_Rate',
 'Loan.Length': 'Loan_Length',
 'Loan.Purpose': 'Loan_Purpose',
 'Debt.To.Income.Ratio': 'Debt_To_Income_Ratio',
 'State': 'State',
 'Home.Ownership': 'Home_Ownership',
 'Monthly.Income': 'Monthly_Income',
 'FICO.Range': 'FICO_Range',
 'Open.CREDIT.Lines': 'Open_CREDIT_Lines',
 'Revolving.CREDIT.Balance': 'Revolving_CREDIT_Balance',
 'Inquiries.in.the.Last.6.Months': 'Inquiries_in_the_Last_6_Months',
 'Employment.Length': 'Employment_Length'}
In [198]:
loans.rename(columns=col_dict, inplace=True)
In [200]:
loans.Amount_Requested
Out[200]:
81174     20000
99592     19200
80059     35000
15825     10000
33182     12000
62403      6000
48808     10000
22090     33500
76404     14675
15867      7000
94971      2000
36911     10625
41200     28000
83869     35000
53853      9600
21399     25000
62127     10000
23446     14000
44987     10000
17977      5200
86099     22000
99483     30000
28798      6500
24168     17400
10356      4000
46027      7200
2238       8000
65278      8000
4227       3000
50182     14500
          ...  
84265     20000
80231     19000
49533     17300
102514     7000
78618      7200
86953     10000
80129      4000
85216     17500
38247     20000
91245     16200
53041     10000
63051     27000
14446      4500
68628     15875
98758     15000
13070     25000
45836      7000
52330     15000
48243     17000
63256     19075
42124     10000
78043      8475
925        6400
74047     30000
49957     24000
23735     30000
65882     16000
55610     10000
38576      6000
3116       9000
Name: Amount_Requested, Length: 2500, dtype: int64
In [201]:
loans.describe()
Out[201]:
Amount_Requested Amount_Funded_By_Investors Monthly_Income Open_CREDIT_Lines Revolving_CREDIT_Balance Inquiries_in_the_Last_6_Months
count 2500.000000 2500.000000 2499.000000 2498.000000 2498.000000 2498.000000
mean 12406.500000 12001.573236 5688.931321 10.075661 15244.559648 0.906325
std 7801.544872 7745.320754 3963.118185 4.508644 18308.549795 1.231036
min 1000.000000 -0.010000 588.500000 2.000000 0.000000 0.000000
25% 6000.000000 6000.000000 3500.000000 7.000000 5585.750000 0.000000
50% 10000.000000 10000.000000 5000.000000 9.000000 10962.000000 0.000000
75% 17000.000000 16000.000000 6800.000000 13.000000 18888.750000 1.000000
max 35000.000000 35000.000000 102750.000000 38.000000 270800.000000 9.000000
In [202]:
loans.head()
Out[202]:
Amount_Requested Amount_Funded_By_Investors Interest_Rate Loan_Length Loan_Purpose Debt_To_Income_Ratio State Home_Ownership Monthly_Income FICO_Range Open_CREDIT_Lines Revolving_CREDIT_Balance Inquiries_in_the_Last_6_Months Employment_Length
81174 20000 20000.0 8.90% 36 months debt_consolidation 14.90% SC MORTGAGE 6541.67 735-739 14.0 14272.0 2.0 < 1 year
99592 19200 19200.0 12.12% 36 months debt_consolidation 28.36% TX MORTGAGE 4583.33 715-719 12.0 11140.0 1.0 2 years
80059 35000 35000.0 21.98% 60 months debt_consolidation 23.81% CA MORTGAGE 11500.00 690-694 14.0 21977.0 1.0 2 years
15825 10000 9975.0 9.99% 36 months debt_consolidation 14.30% KS MORTGAGE 3833.33 695-699 10.0 9346.0 0.0 5 years
33182 12000 12000.0 11.71% 36 months credit_card 18.78% NJ RENT 3195.00 695-699 11.0 14469.0 0.0 9 years
In [203]:
loans.Loan_Length
Out[203]:
81174     36 months
99592     36 months
80059     60 months
15825     36 months
33182     36 months
62403     36 months
48808     36 months
22090     60 months
76404     36 months
15867     36 months
94971     36 months
36911     36 months
41200     60 months
83869     36 months
53853     36 months
21399     60 months
62127     36 months
23446     60 months
44987     36 months
17977     60 months
86099     36 months
99483     60 months
28798     60 months
24168     36 months
10356     60 months
46027     36 months
2238      36 months
65278     36 months
4227      36 months
50182     36 months
            ...    
84265     60 months
80231     36 months
49533     60 months
102514    36 months
78618     36 months
86953     36 months
80129     36 months
85216     36 months
38247     36 months
91245     60 months
53041     36 months
63051     36 months
14446     36 months
68628     36 months
98758     36 months
13070     36 months
45836     36 months
52330     36 months
48243     36 months
63256     36 months
42124     36 months
78043     36 months
925       36 months
74047     60 months
49957     36 months
23735     60 months
65882     60 months
55610     36 months
38576     36 months
3116      36 months
Name: Loan_Length, Length: 2500, dtype: object
In [206]:
loanlength = loans.Loan_Length.str.split(expand=True)
In [209]:
loanlength.rename(columns={0:"Loan_length", 1:1}, inplace=True)
In [212]:
loans['Loan_Length']  = pd.to_numeric(loanlength.Loan_length)
In [214]:
loans.describe()
Out[214]:
Amount_Requested Amount_Funded_By_Investors Loan_Length Monthly_Income Open_CREDIT_Lines Revolving_CREDIT_Balance Inquiries_in_the_Last_6_Months
count 2500.000000 2500.000000 2500.000000 2499.000000 2498.000000 2498.000000 2498.000000
mean 12406.500000 12001.573236 41.260800 5688.931321 10.075661 15244.559648 0.906325
std 7801.544872 7745.320754 9.930893 3963.118185 4.508644 18308.549795 1.231036
min 1000.000000 -0.010000 36.000000 588.500000 2.000000 0.000000 0.000000
25% 6000.000000 6000.000000 36.000000 3500.000000 7.000000 5585.750000 0.000000
50% 10000.000000 10000.000000 36.000000 5000.000000 9.000000 10962.000000 0.000000
75% 17000.000000 16000.000000 36.000000 6800.000000 13.000000 18888.750000 1.000000
max 35000.000000 35000.000000 60.000000 102750.000000 38.000000 270800.000000 9.000000
In [215]:
loans.head()
Out[215]:
Amount_Requested Amount_Funded_By_Investors Interest_Rate Loan_Length Loan_Purpose Debt_To_Income_Ratio State Home_Ownership Monthly_Income FICO_Range Open_CREDIT_Lines Revolving_CREDIT_Balance Inquiries_in_the_Last_6_Months Employment_Length
81174 20000 20000.0 8.90% 36 debt_consolidation 14.90% SC MORTGAGE 6541.67 735-739 14.0 14272.0 2.0 < 1 year
99592 19200 19200.0 12.12% 36 debt_consolidation 28.36% TX MORTGAGE 4583.33 715-719 12.0 11140.0 1.0 2 years
80059 35000 35000.0 21.98% 60 debt_consolidation 23.81% CA MORTGAGE 11500.00 690-694 14.0 21977.0 1.0 2 years
15825 10000 9975.0 9.99% 36 debt_consolidation 14.30% KS MORTGAGE 3833.33 695-699 10.0 9346.0 0.0 5 years
33182 12000 12000.0 11.71% 36 credit_card 18.78% NJ RENT 3195.00 695-699 11.0 14469.0 0.0 9 years
In [217]:
loans.Interest_Rate = pd.to_numeric(loans.Interest_Rate.str.replace("%",""))
In [218]:
loans.describe()
Out[218]:
Amount_Requested Amount_Funded_By_Investors Interest_Rate Loan_Length Monthly_Income Open_CREDIT_Lines Revolving_CREDIT_Balance Inquiries_in_the_Last_6_Months
count 2500.000000 2500.000000 2500.000000 2500.000000 2499.000000 2498.000000 2498.000000 2498.000000
mean 12406.500000 12001.573236 13.066996 41.260800 5688.931321 10.075661 15244.559648 0.906325
std 7801.544872 7745.320754 4.178230 9.930893 3963.118185 4.508644 18308.549795 1.231036
min 1000.000000 -0.010000 5.420000 36.000000 588.500000 2.000000 0.000000 0.000000
25% 6000.000000 6000.000000 10.160000 36.000000 3500.000000 7.000000 5585.750000 0.000000
50% 10000.000000 10000.000000 13.110000 36.000000 5000.000000 9.000000 10962.000000 0.000000
75% 17000.000000 16000.000000 15.800000 36.000000 6800.000000 13.000000 18888.750000 1.000000
max 35000.000000 35000.000000 24.890000 60.000000 102750.000000 38.000000 270800.000000 9.000000
In [221]:
loans.groupby("State").mean()
Out[221]:
Amount_Requested Amount_Funded_By_Investors Interest_Rate Loan_Length Monthly_Income Open_CREDIT_Lines Revolving_CREDIT_Balance Inquiries_in_the_Last_6_Months
State
AK 13688.636364 13679.545455 16.594545 46.909091 4975.576364 9.818182 13262.363636 0.909091
AL 12734.210526 11894.035000 13.144211 42.315789 5029.517895 9.394737 13171.631579 0.947368
AR 8598.076923 8457.692308 13.230000 37.846154 5823.102308 9.384615 11371.461538 1.307692
AZ 12315.760870 12011.720217 13.150652 40.695652 5446.441957 9.130435 16372.347826 0.804348
CA 12207.101617 11921.603464 12.955266 40.489607 5761.773903 9.741339 15268.845266 0.775982
CO 12261.065574 11701.219672 12.919672 39.934426 5155.414590 10.098361 12442.655738 1.065574
CT 12169.500000 11645.271400 13.721000 40.800000 5260.232400 10.220000 14134.580000 1.140000
DC 15602.272727 14805.958182 13.741818 46.909091 7752.097273 10.090909 12911.727273 0.818182
DE 10559.375000 10546.875000 11.661250 45.000000 5770.832500 9.125000 14505.875000 1.000000
FL 11974.260355 11419.491479 12.964852 40.402367 5450.207396 10.017751 18017.106509 0.905325
GA 13320.408163 12437.758776 12.544388 41.877551 6242.562857 9.989796 19427.581633 0.836735
HI 13941.666667 13819.640000 15.984167 44.000000 5113.888333 11.000000 13053.083333 0.166667
IA 3500.000000 690.340000 14.120000 36.000000 3750.000000 8.000000 9771.000000 4.000000
IL 13143.811881 12812.574455 12.616337 41.940594 5715.900099 10.108911 14789.762376 1.188119
IN 13725.000000 8791.666667 13.090000 44.000000 3750.000000 13.666667 14485.333333 4.333333
KS 11619.047619 10922.096190 13.717619 42.857143 4801.883810 9.523810 9786.333333 1.047619
KY 9906.521739 9788.454783 12.382609 43.304348 4366.063043 8.478261 12192.652174 0.826087
LA 17619.318182 17475.772727 14.920455 43.636364 5481.591364 9.954545 14978.272727 0.909091
MA 13856.164384 13534.591096 12.613014 39.616438 5877.133562 9.958904 13904.438356 0.904110
MD 13385.294118 13169.033088 13.278971 42.705882 7111.838529 10.029412 12581.897059 0.882353
MI 12240.555556 11765.681333 14.375556 40.266667 4782.703778 10.377778 14313.888889 1.000000
MN 13861.184211 13294.764737 13.621842 42.315789 5150.977368 10.210526 15860.078947 0.657895
MO 13006.060606 12023.355455 12.783333 42.545455 4735.805758 10.393939 13852.848485 1.333333
MS 4575.000000 4575.000000 15.650000 36.000000 5833.330000 25.000000 82586.000000 2.000000
MT 12242.857143 12242.857143 10.774286 39.428571 4145.522857 8.571429 34194.714286 0.428571
NC 11395.312500 11351.499063 12.640312 42.000000 5317.459844 9.406250 17568.734375 0.640625
NH 16776.666667 15023.652000 12.171333 42.400000 7734.733333 9.533333 24717.600000 1.000000
NJ 11451.861702 10942.380000 12.778617 40.340426 6106.467660 9.829787 14762.063830 0.946809
NM 13607.692308 13605.769231 14.325385 45.230769 5229.133846 9.384615 18429.461538 0.923077
NV 11746.875000 11473.803125 13.512187 42.000000 5110.346250 10.500000 13888.531250 0.750000
NY 12773.823529 12207.519098 13.102392 41.835294 6507.559921 10.316206 15923.205534 0.794466
OH 11392.957746 11014.558592 12.259296 41.070423 5162.090141 10.577465 11539.436620 1.197183
OK 12808.333333 12571.313810 13.780952 44.000000 5914.364762 9.571429 11608.714286 1.190476
OR 11035.000000 10956.450333 12.657000 40.000000 4727.888667 9.266667 11193.866667 0.466667
PA 11599.218750 11268.495729 12.538646 40.750000 5108.773750 10.322917 10675.947917 1.208333
RI 11493.333333 11488.333333 13.013333 44.000000 6182.128000 12.400000 19046.866667 1.000000
SC 10401.785714 10398.174286 12.790357 38.571429 5525.952143 9.428571 9503.892857 0.821429
SD 14600.000000 14600.000000 10.252500 36.000000 4583.332500 12.000000 15598.000000 0.250000
TX 12722.701149 12325.628793 13.381954 40.000000 5500.602529 10.614943 13757.000000 0.896552
UT 9559.375000 9551.562500 13.174375 37.500000 5306.353750 9.812500 17797.875000 0.625000
VA 12929.807692 12508.127692 13.337564 43.076923 6728.106667 11.256410 24267.358974 0.974359
VT 19160.000000 19160.000000 17.884000 50.400000 5932.118000 9.800000 29618.200000 1.000000
WA 12157.327586 12129.595517 12.977759 42.620690 5220.620862 9.741379 14174.913793 0.862069
WI 10459.615385 10276.437692 13.881923 43.384615 5020.220000 10.538462 14333.000000 1.192308
WV 11273.333333 11268.333333 14.108667 44.000000 4321.167333 11.733333 15279.133333 1.266667
WY 7325.000000 7325.000000 13.465000 36.000000 4875.750000 12.750000 14286.750000 0.250000
In [222]:
loans.groupby("State").sum()
Out[222]:
Amount_Requested Amount_Funded_By_Investors Interest_Rate Loan_Length Monthly_Income Open_CREDIT_Lines Revolving_CREDIT_Balance Inquiries_in_the_Last_6_Months
State
AK 150575 150475.00 182.54 516 54731.34 108.0 145886.0 10.0
AL 483900 451973.33 499.48 1608 191121.68 357.0 500522.0 36.0
AR 111775 109950.00 171.99 492 75700.33 122.0 147829.0 17.0
AZ 566525 552539.13 604.93 1872 250536.33 420.0 753128.0 37.0
CA 5285675 5162054.30 5609.63 17532 2494848.10 4218.0 6611410.0 336.0
CO 747925 713774.40 788.10 2436 314480.29 616.0 759002.0 65.0
CT 608475 582263.57 686.05 2040 263011.62 511.0 706729.0 57.0
DC 171625 162865.54 151.16 516 85273.07 111.0 142029.0 9.0
DE 84475 84375.00 93.29 360 46166.66 73.0 116047.0 8.0
FL 2023650 1929894.06 2191.06 6828 921085.05 1693.0 3044891.0 153.0
GA 1305400 1218900.36 1229.35 4104 611771.16 979.0 1903903.0 82.0
HI 167300 165835.68 191.81 528 61366.66 132.0 156637.0 2.0
IA 3500 690.34 14.12 36 3750.00 8.0 9771.0 4.0
IL 1327525 1294070.02 1274.25 4236 577305.91 1021.0 1493766.0 120.0
IN 41175 26375.00 39.27 132 11250.00 41.0 43456.0 13.0
KS 244000 229364.02 288.07 900 100839.56 200.0 205513.0 22.0
KY 227850 225134.46 284.80 996 100419.45 195.0 280431.0 19.0
LA 387625 384467.00 328.25 960 120595.01 219.0 329522.0 20.0
MA 1011500 988025.15 920.75 2892 429030.75 727.0 1015024.0 66.0
MD 910200 895494.25 902.97 2904 483605.02 682.0 855569.0 60.0
MI 550825 529455.66 646.90 1812 215221.67 467.0 644125.0 45.0
MN 526725 505201.06 517.63 1608 195737.14 388.0 602683.0 25.0
MO 429200 396770.73 421.85 1404 156281.59 343.0 457144.0 44.0
MS 4575 4575.00 15.65 36 5833.33 25.0 82586.0 2.0
MT 85700 85700.00 75.42 276 29018.66 60.0 239363.0 3.0
NC 729300 726495.94 808.98 2688 340317.43 602.0 1124399.0 41.0
NH 251650 225354.78 182.57 636 116021.00 143.0 370764.0 15.0
NJ 1076475 1028583.72 1201.19 3792 574007.96 924.0 1387634.0 89.0
NM 176900 176875.00 186.23 588 67978.74 122.0 239583.0 12.0
NV 375900 367161.70 432.39 1344 163531.08 336.0 444433.0 24.0
NY 3257325 3112917.37 3341.11 10668 1652920.22 2610.0 4028571.0 201.0
OH 808900 782033.66 870.41 2916 366508.40 751.0 819300.0 85.0
OK 268975 263997.59 289.40 924 124201.66 201.0 243783.0 25.0
OR 331050 328693.51 379.71 1200 141836.66 278.0 335816.0 14.0
PA 1113525 1081775.59 1203.71 3912 490442.28 991.0 1024891.0 116.0
RI 172400 172325.00 195.20 660 92731.92 186.0 285703.0 15.0
SC 291250 291148.88 358.13 1080 154726.66 264.0 266109.0 23.0
SD 58400 58400.00 41.01 144 18333.33 48.0 62392.0 1.0
TX 2213750 2144659.41 2328.46 6960 957104.84 1847.0 2393718.0 156.0
UT 152950 152825.00 210.79 600 84901.66 157.0 284766.0 10.0
VA 1008525 975633.96 1040.33 3360 524792.32 878.0 1892854.0 76.0
VT 95800 95800.00 89.42 252 29660.59 49.0 148091.0 5.0
WA 705125 703516.54 752.71 2472 302796.01 565.0 822145.0 50.0
WI 271950 267187.38 360.93 1128 130525.72 274.0 372658.0 31.0
WV 169100 169025.00 211.63 660 64817.51 176.0 229187.0 19.0
WY 29300 29300.00 53.86 144 19503.00 51.0 57147.0 1.0
In [224]:
loans.groupby("State").std()
Out[224]:
Amount_Requested Amount_Funded_By_Investors Interest_Rate Loan_Length Monthly_Income Open_CREDIT_Lines Revolving_CREDIT_Balance Inquiries_in_the_Last_6_Months
State
AK 6230.021706 6229.143382 4.887796 12.533591 1973.012878 5.996969 8903.179446 1.221028
AL 8451.314109 8248.740545 4.558670 10.710200 2207.319980 4.390035 12294.324180 1.487740
AR 5279.307695 5144.033605 3.734541 6.656402 4457.244591 4.253204 9432.504083 1.250641
AZ 7272.605293 7236.092129 3.714566 9.626052 3016.563530 4.261500 31683.696368 1.046042
CA 7433.210915 7373.270025 3.870786 9.369993 3255.324115 4.245103 20401.612172 1.092133
CO 6816.524843 6717.698123 3.984925 8.958923 2781.290761 5.482411 11664.357402 0.980966
CT 8291.868543 8595.175475 4.413391 9.697464 2659.105046 5.884223 16610.722387 1.069236
DC 7999.284769 8396.650576 4.850991 12.533591 3322.355844 5.521857 9900.313531 0.981650
DE 5062.501102 5035.914541 2.824067 12.421180 1929.691634 3.907411 16246.446042 0.925820
FL 7791.485750 7486.184226 4.196036 9.316089 3506.542200 4.296279 26542.085643 1.402560
GA 8166.231502 8417.915952 4.190037 10.373703 3530.794782 4.800118 29051.181591 1.419263
HI 8590.026811 8562.624716 4.597781 11.816783 2990.024152 2.628515 8770.460229 0.389249
IA NaN NaN NaN NaN NaN NaN NaN NaN
IL 8210.086332 7935.989778 4.233769 10.409440 3266.810268 4.173490 11863.971023 1.467739
IN 9879.049802 13972.972423 3.863612 13.856406 2204.793389 8.504901 6916.081429 4.509250
KS 6643.271524 6766.137803 4.380900 11.109841 2517.905231 3.855114 5800.361509 1.627151
KY 4482.340459 4366.847400 3.854074 11.291327 2161.949437 2.905630 18979.160065 0.834058
LA 11431.759051 11227.602097 4.633174 11.441551 2751.804205 3.696553 10088.326126 1.150945
MA 8784.937967 8748.547717 3.879707 8.645214 3286.087398 4.735910 14138.230039 1.180447
MD 8307.961515 8397.003905 4.484916 10.849113 7745.793004 5.677845 10596.315605 1.165799
MI 8105.732905 8168.474939 3.089244 9.279498 2362.381161 4.443017 12376.046766 1.692228
MN 8139.590221 7857.665544 3.864125 10.710200 3487.799626 4.185935 11705.949475 1.020766
MO 7125.685357 7535.764969 4.498809 10.854408 1914.855889 3.481553 10122.928878 1.831438
MS NaN NaN NaN NaN NaN NaN NaN NaN
MT 5517.674523 5517.674523 3.828441 9.071147 1716.153496 1.511858 49036.466956 1.133893
NC 7392.786428 7346.697182 4.296432 10.474459 2980.538403 4.460475 26419.500548 0.861287
NH 9953.335167 9177.489444 5.430313 10.985705 4876.882749 6.174448 28415.686060 1.000000
NJ 7430.380494 7381.948799 4.044832 9.287005 3535.262544 4.682703 12466.616645 1.264631
NM 7705.155097 7706.268781 4.195642 12.152872 2542.033423 3.594868 10550.579965 0.954074
NV 7496.772827 7336.051969 4.347268 10.558592 2085.896443 4.340358 11259.607015 1.016001
NY 8389.167919 8350.251736 4.281625 10.315702 7395.194573 4.439498 17846.734578 1.150073
OH 6837.962812 6469.697305 4.511920 9.866717 2465.323152 4.354512 9909.962596 1.214434
OK 8483.813019 8748.870643 4.217586 11.593101 2626.601520 4.272838 9527.720835 1.249762
OR 8076.959034 8107.091297 4.708811 9.097177 2334.917863 3.666562 8731.391629 0.776079
PA 7647.117150 7525.667233 4.096787 9.612492 2707.464239 4.867713 9458.698103 1.435759
RI 9260.184869 9248.662387 4.624496 11.710801 2633.227764 5.766901 13439.541313 1.069045
SC 6255.042146 6257.250431 3.668329 7.559289 3531.349648 4.483951 6285.771984 1.090483
SD 12721.635115 12721.635115 3.062008 0.000000 2672.738262 2.160247 13246.799135 0.500000
TX 7412.121119 7347.385395 4.123098 8.970085 2677.678697 4.559942 11057.963045 1.226249
UT 8264.985758 8266.354843 4.196956 6.000000 5664.139737 4.354595 17888.415484 0.806226
VA 7777.555806 7825.630803 4.738306 11.014476 3683.934240 4.197822 26256.876745 1.268749
VT 6966.204131 6966.204131 5.530504 13.145341 3245.565858 2.167948 33304.202763 1.224745
WA 7189.656631 7188.333833 4.194194 10.820428 2353.717024 4.636092 10494.445963 0.963333
WI 8363.309981 8523.991956 4.991112 11.296289 2994.531872 4.447298 12673.067013 1.414757
WV 8788.478998 8776.660744 4.092355 11.710801 1905.740593 4.636296 12513.789268 0.883715
WY 5533.760024 5533.760024 2.840205 0.000000 3514.102861 4.924429 8682.071849 0.500000
In [225]:
loans.describe()
Out[225]:
Amount_Requested Amount_Funded_By_Investors Interest_Rate Loan_Length Monthly_Income Open_CREDIT_Lines Revolving_CREDIT_Balance Inquiries_in_the_Last_6_Months
count 2500.000000 2500.000000 2500.000000 2500.000000 2499.000000 2498.000000 2498.000000 2498.000000
mean 12406.500000 12001.573236 13.066996 41.260800 5688.931321 10.075661 15244.559648 0.906325
std 7801.544872 7745.320754 4.178230 9.930893 3963.118185 4.508644 18308.549795 1.231036
min 1000.000000 -0.010000 5.420000 36.000000 588.500000 2.000000 0.000000 0.000000
25% 6000.000000 6000.000000 10.160000 36.000000 3500.000000 7.000000 5585.750000 0.000000
50% 10000.000000 10000.000000 13.110000 36.000000 5000.000000 9.000000 10962.000000 0.000000
75% 17000.000000 16000.000000 15.800000 36.000000 6800.000000 13.000000 18888.750000 1.000000
max 35000.000000 35000.000000 24.890000 60.000000 102750.000000 38.000000 270800.000000 9.000000
In [226]:
loans.Amount_Requested.sum()
Out[226]:
31016250
In [227]:
loans.Amount_Requested.std()
Out[227]:
7801.544872072222

Writing Excel files

In [233]:
import xlsxwriter
In [234]:
w = xlsxwriter.Workbook("sample.xlsx")
In [235]:
sheet = w.add_worksheet("sheet1")
In [236]:
for i in range(1,6):
    for j in range(1,11):
        sheet.write(j, 1, i*j) # write(row, col, value)
In [237]:
w.close()
In [238]:
w = xlsxwriter.Workbook("sample2.xlsx")
sheet = w.add_worksheet("sheet1")
for i in range(1,6):
    for j in range(1,11):
        sheet.write("A"+str(j), i*j) # write(row, col, value)
w.close()
In [239]:
pd.read_excel("sample2.xlsx")
Out[239]:
5
0 10
1 15
2 20
3 25
4 30
5 35
6 40
7 45
8 50
In [243]:
w = xlsxwriter.Workbook("sample2.xlsx")
sheet = w.add_worksheet("sheet1")
for i in range(1,6):
    sheet.write(0, i-1, i)
    for j in range(1,11):
        sheet.write(j, i-1 , i*j) # write(row, col, value)
w.close()
pd.read_excel("sample2.xlsx")
Out[243]:
1 2 3 4 5
0 1 2 3 4 5
1 2 4 6 8 10
2 3 6 9 12 15
3 4 8 12 16 20
4 5 10 15 20 25
5 6 12 18 24 30
6 7 14 21 28 35
7 8 16 24 32 40
8 9 18 27 36 45
9 10 20 30 40 50
In [245]:
w = xlsxwriter.Workbook("format.xlsx")
sheet = w.add_worksheet("sheet1")
bold = w.add_format({'bold':True})
numberformat = w.add_format({'num_format':"$#,###"})

sheet.write(0,0,"Bold", bold)
sheet.write(0,1, 1000, numberformat)

w.close()

formula

In [251]:
w = xlsxwriter.Workbook("formula.xlsx")
sheet = w.add_worksheet("sheet1")
bold = w.add_format({'bold':True})
## headers
sheet.write(0,0, "A", bold)
sheet.write(0,1, "B", bold)
sheet.write(0, 2, "A+B", bold)
for i in range(1,11):
    sheet.write(i, 0, i+5)
    sheet.write(i, 1, 2*i)
    sheet.write(i, 2, "=A{0}+B{0}".format(i+1)) #string formating
sheet.write(11, 0, "=SUM(A2:A11)")
w.close()
In [250]:
"A{row} + B{row}".format(row=9)
Out[250]:
'A9 + B9'

For more detailed documentation see

https://xlsxwriter.readthedocs.io/

In [253]:
help(w.add_format)
Help on method add_format in module xlsxwriter.workbook:

add_format(properties=None) method of xlsxwriter.workbook.Workbook instance
    Add a new Format to the Excel Workbook.
    
    Args:
        properties: The format properties.
    
    Returns:
        Reference to a Format object.

In [254]:
import openpyxl
In [255]:
w = openpyxl.load_workbook("formula.xlsx")
In [259]:
w['sheet1']['A1'].value
Out[259]:
'A'
In [ ]: