Apr 15-18, 2019 Vikrant Patil
These notes are available online at http://notes.pipal.in/2019/arcesium_basic_aug/day4.html
© Pipal Academy LLP
We will be using python 3 (>= 3.0) from anaconda for this training. You can download it from
def getwords(filename):
with open(filename) as f:
return f.read().strip().split()
words = getwords("words.txt")
def wordfreq(words):
freq = {}
uniq = set(words)
for w in uniq:
freq[w] = words.count(w)
return freq
freq = wordfreq(words)
freq
def wordfreq1(words):
freq = {}
for w in words:
if w in freq:
freq[w] += 1 # freq[w] = freq[w] + 1
else:
freq[w] = 1
return freq
wordfreq1(words)
def wordfreq2(words):
freq = {}
for w in words:
freq[w] = freq.get(w, 0) + 1
return freq
%%file bank0.py
balance = 0
def get_balance():
return balance
def withdraw(amount):
global balance
balance -= amount
def deposit(amount):
global balance
balance += amount
import bank0
print(bank0.__name__)
bank0.get_balance()
bank0.deposit(10000)
bank0.get_balance()
bank0.withdraw(3000)
bank0.get_balance()
%%file bank1.py
def make_account():
return {"balance":0}
def get_balance(account):
return account['balance']
def deposit(account, amount):
account['balance'] += amount
def withdraw(account, amount):
account['balance'] -= amount
import bank1
a1 = bank1.make_account()
bank1.get_balance(a1)
bank1.deposit(a1, 30000)
bank1.withdraw(a1, 2323)
bank1.get_balance(a1)
class BankAccount:
def __init__(self):
self.balance = 0
def get_balance(self):
return self.balance
def deposit(self, amount):
self.balance += amount
def withdraw(self, amount):
self.balance -= amount
b1 = BankAccount()
b1
type(b1)
isinstance(b1, BankAccount)
isinstance(b1, int)
isinstance(1, int)
x = input()
x
y = input("Input the value")
y
y + 1
class BankAccount1:
def __init__(self, name, amount):
self.name = name
self.balance = amount
def get_name(self):
return self.name
def get_balance(self):
return self.balance
def deposit(self, amount):
self.balance += amount
def withdraw(self, amount):
self.balance -= amount
name = input("Eneter your name: ")
balance = input("Enter initial amount: ")
b2 = BankAccount1(name, float(balance))
b2.get_balance()
b2.get_name()
b2 = BankAccount1("Arcesium", 10000)
b2.get_balance()
l = [1,2,3,4]
l
b2
class Point:
def __init__(self, x, y):
self.x = x
self.y =y
p = Point(2, 9)
p
class Point:
def __init__(self, x, y):
self.x = x
self.y =y
def __repr__(self):
return "<{0},{1}>".format(self.x, self.y)
p = Point(3,4)
p
print(p)
class Point:
def __init__(self, x, y):
self.x = x
self.y =y
def __repr__(self):
return "<{0},{1}>".format(self.x, self.y)
def __str__(self):
return "{},{}".format(self.x, self.y)
p = Point(5,6)
p
print(p)
str(p)
str(l)
class RedPoint(Point):
color = "Red"
rp = RedPoint(5,7)
rp
rp.color
p.color
isinstance(rp, Point)
isinstance(rp, RedPoint)
rp
rp.color
import pandas as pd
import math as m
del m
values = pd.Series([100.0,202.0, 223.0, 224.5, 343.6])
values
d = dict(zip(["tata","hcl",'infy','reliance','wipro'],[100.0,202.0, 223.0, 224.5, 343.6]))
d
values = pd.Series(d)
values
values = pd.Series(d, index=["tata",'reliance','wipro',"hcl",'infy'])
values
values = pd.Series([100.0,202.0, 223.0, 224.5, 343.6], index=["tata","hcl",'infy','reliance','wipro'])
values
values['tata']
values[0]
values + 100
values
v = values + 100
v
values + v
v*2
v*v
v
v['tata']
v[2:]
v = pd.Series([1,2,3,4,5],index=['a','b','c','d','e'])
v
v['a']
v["a":"c"]
values
values[values<220]
values[values>223]
values[values==223]
v
v[v%2==0]
values[values==values['tata']]
df = pd.DataFrame({"values":[100.0,202.0, 223.0, 224.5, 343.6],
"gain" : [11.0,5.5,20.0,34.5,20.0],
'Name' : ['Tata Motors', "Reliance", "Wipro", "Hindustan Computers", "Infosys"],
'ticker': ["tata",'reliance','wipro',"hcl",'infy']})
df
df = pd.DataFrame({"values":[100.0,202.0, 223.0, 224.5, 343.6],
"gain" : [11.0,5.5,20.0,34.5,20.0],
'Name' : ['Tata Motors', "Reliance", "Wipro", "Hindustan Computers", "Infosys"]
}, index= ["tata",'reliance','wipro',"hcl",'infy'])
df
df['values']
df.gain
df['gain']
df[df['gain']>10]
df[df['Name']=="Wipro"]
df.loc['tata']
df['Name'][df['gain']>10] # Name wherever gain is > 10
df
t = [[i*j for i in range(1,6)] for j in range(1, 11)]
tables = pd.DataFrame(t, index=range(1, 11))
tables
tables + 100
tables + tables
df
df['values'] + 100
v1 = df['values'] + 100
df['values'] = v1
df
newgain = pd.Series([1.0, 1.1, 1.3, 1, 1], index=["tata",'reliance','wipro',"hcl",'infy'])
newgain
df['gain'] = df['gain'] + newgain
df
tables
t2 = [[i*j for i in range(6,11)] for j in range(1, 11)]
tables2 = pd.DataFrame(t2, index=range(1, 11), columns=range(5,10))
tables2
tables.join(tables2)
tables.join(tables2, how="inner")
tables.join(tables2, how="outer")
df
df2 = pd.DataFrame({"col1":[100.0,202.0, 223.0, 224.5, 343.6],
"col2" : [11.0,5.5,20.0,34.5,20.0],
'Name1' : ['Tata Motors', "Reliance", "Wipro", "arc", "vmware"]
}, index= ["tata",'reliance','wipro',"arc",'vmware'])
df
df2
df.join(df2, how="inner") ### intrsection of labels
df.join(df2, how="outer") ## unioin of labels
df
df3 = pd.DataFrame({"values":[ 224.5, 343.6],
"gain" : [34.5,20.0],
'Name' : ["arc", "vmware"]
}, index= ["arc",'vmware'])
df3
df
pd.concat([df, df3])
df.append(df3.loc['vmware'])
df.append(df3)
url = "http://notes.pipal.in/2018/arcesium-basic-nov/loansData.csv"
loans = pd.read_csv(url)
loans.head()
loans.tail()
loans.describe()
loans.columns
cols = [name.replace(".","_") for name in loans.columns]
cols
col_dict = dict(zip(loans.columns, cols))
col_dict
loans.rename(columns=col_dict, inplace=True)
loans.Amount_Requested
loans.describe()
loans.head()
loans.Loan_Length
loanlength = loans.Loan_Length.str.split(expand=True)
loanlength.rename(columns={0:"Loan_length", 1:1}, inplace=True)
loans['Loan_Length'] = pd.to_numeric(loanlength.Loan_length)
loans.describe()
loans.head()
loans.Interest_Rate = pd.to_numeric(loans.Interest_Rate.str.replace("%",""))
loans.describe()
loans.groupby("State").mean()
loans.groupby("State").sum()
loans.groupby("State").std()
loans.describe()
loans.Amount_Requested.sum()
loans.Amount_Requested.std()
import xlsxwriter
w = xlsxwriter.Workbook("sample.xlsx")
sheet = w.add_worksheet("sheet1")
for i in range(1,6):
for j in range(1,11):
sheet.write(j, 1, i*j) # write(row, col, value)
w.close()
w = xlsxwriter.Workbook("sample2.xlsx")
sheet = w.add_worksheet("sheet1")
for i in range(1,6):
for j in range(1,11):
sheet.write("A"+str(j), i*j) # write(row, col, value)
w.close()
pd.read_excel("sample2.xlsx")
w = xlsxwriter.Workbook("sample2.xlsx")
sheet = w.add_worksheet("sheet1")
for i in range(1,6):
sheet.write(0, i-1, i)
for j in range(1,11):
sheet.write(j, i-1 , i*j) # write(row, col, value)
w.close()
pd.read_excel("sample2.xlsx")
w = xlsxwriter.Workbook("format.xlsx")
sheet = w.add_worksheet("sheet1")
bold = w.add_format({'bold':True})
numberformat = w.add_format({'num_format':"$#,###"})
sheet.write(0,0,"Bold", bold)
sheet.write(0,1, 1000, numberformat)
w.close()
w = xlsxwriter.Workbook("formula.xlsx")
sheet = w.add_worksheet("sheet1")
bold = w.add_format({'bold':True})
## headers
sheet.write(0,0, "A", bold)
sheet.write(0,1, "B", bold)
sheet.write(0, 2, "A+B", bold)
for i in range(1,11):
sheet.write(i, 0, i+5)
sheet.write(i, 1, 2*i)
sheet.write(i, 2, "=A{0}+B{0}".format(i+1)) #string formating
sheet.write(11, 0, "=SUM(A2:A11)")
w.close()
"A{row} + B{row}".format(row=9)
For more detailed documentation see
help(w.add_format)
import openpyxl
w = openpyxl.load_workbook("formula.xlsx")
w['sheet1']['A1'].value