Nov 26-30, 2018 Vikrant Patil
These notes are available online at http://notes.pipal.in/2018/arcesium-basic-nov/day5.html
© Pipal Academy LLP
Day 1 | Day 2 | Day 3 | Day 4 | Day 5
We will be using python 3 (>= 3.0) from anaconda for this training. You can download it from
x = 3
y = x
y = 5
print(x)
a = [1,2,3,4]
b = a
b.append(5)
print(a)
a = [1,2,3,4]
b = a
b = [1,1,1]
print(a)
l = [1]*5
def append(x):
x.append(-1)
append(l)
print(l)
x = 3
def addone(z):
z = z+1
addone(x)
print(x)
for basic data types int, float, boolean there is no way to pass them by reference
a = addone(3) # if function has no return statement, it returns None
print(a)
def fun():
print("Fun!")
fun
type(a)
type([1,2,3])
type(fun)
aliasfun = fun
aliasfun
fun()
aliasfun()
def square(x):
return x*x
def sumofsquare(x, y):
return square(x) + square(y)
def sumof(f, x, y):
return f(x) + f(y)
sumof(square, 4, 5)
sumofsquare(4, 5)
words = ["one","two","three","four","five","six"]
sorted(words)
sorted(words, key=len)
max(words)
max(words, key=len)
sorted(words, key=len, reverse=True)
records = [
("tata", 100, 40),
("raliance", 300, 17),
("infy", 250, 20),
("arc", 100,1000)
]
records
max(records)
min(records)
def get_value(r):
return r[1]
def get_volume(r):
return r[2]
def get_name(r):
return r[0]
max(records, key=get_value)
max(records, key=get_volume)
%%file bank0.py
balance = 0
def get_balance():
return balance
def deposit(amount):
global balance
balance += amount
def withdraw(amount):
global balance
balance -= amount
if __name__ == "__main__":
print("Initial :", get_balance())
deposit(100)
print("After deposit :", get_balance())
withdraw(20)
print("current balance:", get_balance())
!python bank0.py
%%file bank1.py
def make_account(balance):
return {"balance":balance}
def get_balance(account):
return account['balance']
def deposit(account, amount):
account['balance'] += amount
def withdraw(account, amount):
account['balance'] -= amount
if __name__ == "__main__":
a1 = make_account(0)
a2 = make_account(100)
print("a1:", get_balance(a1))
print("a2:", get_balance(a2))
deposit(a1, 100)
deposit(a2, 300)
print("a1:",get_balance(a1))
print("a2:", get_balance(a2))
withdraw(a1, 42)
withdraw(a2, 42)
print("a1:", get_balance(a1))
print("a2:", get_balance(a2))
class BankAccount:
def __init__(account, balance):
account.balance = balance
def get_balance(account):
return account.balance
def deposit(account, amount):
account.balance += amount
def withdraw(self, amount):
account.balance -= amount
a1 = BankAccount(100)
a2 = BankAccount(500)
a1.get_balance()
a2.get_balance()
class Foo:
pass
f = Foo()
f
class Bar:
def add_value(self, value):
self.value = value
def get_value(self):
return self.value
b = Bar()
b.get_value()
b.add_value(5)
b.get_value()
BankAccount.get_balance(a1)
a1.get_balance()
a3 = BankAccount()
def area(radius=1):
return 3.14*radius*radius
area(3)
area()
class BankAccount:
def __init__(self, balance=0):
self.balance = balance
def get_balance(self):
return self.balance
def deposit(self, amount):
self.balance += amount
def withdraw(self, amount):
self.balance -= amount
a3 = BankAccount()
a3.get_balance()
a3.deposit(200)
a3.get_balance()
a3.withdraw(500)
a3.get_balance()
BankAccount.get_balance(a3)
pip install pandas
import pandas as pd
import random
s = pd.Series([random.random() for i in range(5)])
s
s[0]
s[1]
s[4]
s = pd.Series([random.random() for i in range(5)], index=['a','b','c','d','e'])
s
s['a']
s['d']
s[0]
s
s.reindex(index=['e','b','a','c','d'])
s
neworder = s.reindex(index=['e','b','a','c','d'])
neworder
neworder = s.reindex(index=['e','b','a','c','d', "f"])
neworder
neworder['f'] = 0.04
neworder
pd.Series([random.random() for i in range(5)], index=['a','b','c','d'])
import string
values = [random.random() for i in range(5)]
keys = [c for c in string.ascii_uppercase[:5]]
values
keys
d = dict(zip(keys, values))
d
s1 = pd.Series(d)
s1
gt_0_5 = s1[s1>0.5]
lt_0_5 = s1[s1<0.5]
gt_0_5
lt_0_5
s
cond = s > 0.5
cond
s[cond]
genders = pd.Series([random.choice(["Male", "Female"]) for i in range(10)])
genders
genders=="Female"
filter_ = genders=="Female"
genders[filter_]
!cat stocks.csv
df = pd.DataFrame({"col1":[1,2,3,4,5],
"col2":[0.1,0.3,0.5,0.6,0.8],
"cat":['a','a','b','b','d']})
df
sdata = {
"name":["Infosys","Tata","Reliance", "Tata Infotech"],
"ticker":["infy","tata","reliance","tatainf"],
"value":[1000,500,700,600],
"volume":[500, 50, 100, 60]
}
stocks = pd.DataFrame(sdata)
stocks
stocks['name']
stocks.name
stocks.ticker
stocks.value
stocks.describe()
dsc = stocks.describe()
dsc.iloc[:3, 1] # iloc gives access by row number and column number
dsc.iloc[:3, 0]
dsc.loc['min']
stocks
stocks.loc[3]
thirdrow = stocks.loc[3, ['ticker','value','volume']]
thirdrow
thirdrow['value']
thirdrow['volume']
stocks.ndim
t = [[i*j for i in range(3)] for j in range(5)]
t
t[3,1] # will not
t[3][1]
stocks.iloc[3,1]
stocks.iloc[:,1]
t[3][:]
t
t[1:3]
t[1:3][0]
stocks.iloc[:,2]
st = pd.read_csv("stocks.csv")
st
url = "http://notes.pipal.in/2018/arcesium-basic-nov/loansData.csv"
loansData = pd.read_csv(url)
loansData.head(5)
loansData.dtypes # show datatypes of columns
loansData.shape # show how many rows and columns are there
loansData['Amount.Requested']
loansData.columns # names of columns
oldnames = [c for c in loansData.columns]
oldnames
newnames = [name.replace(".","_") for name in oldnames]
newnames
cols = dict(zip(oldnames, newnames))
loansData.rename(columns=cols)
loansData.rename(columns=cols, inplace=True) # this will change names in original dataframe
loansData.head()
dict([("a",1),("b",1),("c",2)])
list(zip(['a','b','c'], [1,2,3]))
loansData.Interest_Rate.dtype
interest = loansData.Interest_Rate.str.replaceint
interest.head(5)
interest = pd.to_numeric(interest)
interest.dtype
interest.head()
loansData.Interest_Rate = interest
loansData.dtypes
loansData.Amount_Requested.str.replace("0",",")
loansData.head(5)
loansData.FICO_Range.str.split("-").head(5)
FICO = loansData.FICO_Range.str.split("-", expand=True)
# seperate every item by split in column
FICO.head()
loansData['FICO_start'] = FICO[0]
loansData['FICO_end'] = FICO[1]
loansData.head()
del loansData["FICO_Range"]
loansData.columns
loansData.State
loansData_CA = loansData[loansData.State=="CA"]
loansData_CA
2 ==3 or 4==5
import numpy as np
s[(s >0.5) & (s <0.7)]
s[(s >0.5) | (s <0.7)]
loansData[(loansData.State=="CA") & (loansData.Home_Ownership=="RENT")]
loansData.groupby(loansData.State).mean()
loansData.groupby(loansData.State).max()
loansData
pd.merge() # merge columns
pd.concant() # append rows
stocks
df = pd.DataFrame({"name":["Tata", "x","y","z", "a"]})
df
df1 = pd.DataFrame([stocks.name==n for n in df.name]).transpose()
df1.apply()
from bs4 import BeautifulSoup
html_doc = """
<html><head><title>The Dormouse's story</title></head>
<body>
<p class="title"><b>The Dormouse's story</b></p>
<p class="story">Once upon a time there were three little sisters; and their names were
<a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>,
<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>
<p class="story">...</p>
"""
soup = BeautifulSoup(html_doc, 'html.parser')
soup.find_all("table")
table = """
<html><head><title>The Dormouse's story</title></head>
<body>
<p class="title"><b>The Dormouse's story</b></p>
<p class="story">Once upon a time there were three little sisters; and their names were
<a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>,
<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>
<p class="story">...</p>
<table>
<tr>
<th>Name</th>
<th>Favorite Color</th>
</tr>
<tr>
<td>Bob</td>
<td>Yellow</td>
</tr>
<tr>
<td>Michelle</td>
<td>Purple</td>
</tr>
</table>
</body>
</html>
"""
soup = BeautifulSoup(table, 'html.parser')
def parseTable(table):
return [[c.contents[0] for c in row.contents if c!="\n"] for row in table.contents if row!="\n"]
table = parseTable(soup.table)
table
table[0]
table[1]
table[2]