Dec 17-23, 2020 Vikrant Patil
These notes are available online at http://notes.pipal.in/2020/arcesium_finop_batch3/module3-day1.html
© Pipal Academy LLP
Day 1 | Day 2 | Day 3 | Day 4 | Day 5
We will be using jupyter hub from http://lab.pipal.in for this training. Create a notebook with name module3-day1.ipynb for today's session. Before you start shutdown all kernels except today's notebook.
items = ["Here","are", "some","words","with","different","length"]
numbers = [120, 234, 345, 346, 11]
max(numbers)
max(items)
max(items, key=len)
[len(w) for w in items]
max(items, key=len)
volume = ["1M","2M","0.5B","4M","100K"]
def numeric_part(strv):
return float(strv[:-1]) # everything except last charecter
def suffix(strv):
return strv[-1]
def convert(strv):
n = numeric_part(strv)
if suffix(strv)=="M":
return n*1000000 # take till second last!
elif suffix(strv)=="B":
return n*1000000000
elif suffix(strv)=="K":
return n*1000
else:
return int(strv)
def convert_numeric(strvolume):
numeric_v = []
for v in strvolume:
numeric_v.append(convert(v))
return numeric_v
max(convert_numeric(volume))
"34B"[:-1]
max(convert_numeric(volume))
max(volume, key=convert)
sorted(volume, key=convert)
numeric_volume = convert_numeric(volume)
numeric_volume
sorted(numeric_volume)
sorted(volume, key=convert)
def convert_numeric(strvolume):
numeric_v = []
for v in strvolume:
numeric_v.append(convert(v))
return numeric_v
def convert_numeric(strvolume):
return [convert(v) for v in strvolume]
convert_numeric(volume)
[x*x for x in range(5)]
items
{word:len(word) for word in items}
lengths = {}
for w in items:
lengths[w] = len(w)
lengths
import pandas
!python3 -m pip install --user pandas
import pandas
import pandas as pd
%%file download.py
import sys
import requests
def download(url, filename):
resp = requests.get(url)
with open(filename, "wb") as f:
f.write(resp.content)
if __name__ == "__main__":
url = sys.argv[1]
filename = sys.argv[2]
download(url, filename)
!python3 download.py "https://raw.githubusercontent.com/vikipedia/python-trainings/master/online_course/source/module2/wallet.csv" wallet.csv
!head wallet.csv
pd.read_csv("wallet.csv")
wallet = pd.read_csv("wallet.csv")
type(wallet)
wallet
wallet.head()
wallet.tail()
wallet.describe()
pd.read_csv("https://raw.githubusercontent.com/vikipedia/python-trainings/master/online_course/source/module2/wallet.csv")
pd.read_excel("https://raw.githubusercontent.com/vikipedia/python-trainings/master/online_course/source/module2/wallet.xlsx")
!python3 -m pip install --user Xlrd
problems
!python3 -m pip install --user lxml
import pandas as pd
help(pd.read_excel)
r = pd.read_html("https://www.moneycontrol.com/markets/indian-indices/")
type(r)
len(r)
r[0]
r[1]
r[2]
r[3]
wallet = pd.read_excel("https://raw.githubusercontent.com/vikipedia/python-trainings/master/online_course/source/module2/wallet.xlsx")
wallet
type(wallet)
wallet.columns
wallet['category']
wallet.category
wallet.date
wallet.debit
wallet['Unnamed: 0']
type(wallet.debit)
wallet
s1 = pd.Series([400, 300, 3244, 500])
s1
[i for i in s1]
stocks = pd.Series([400, 300, 3244, 500], index=["APPLE","AT&T","IBM","NIKE"])
stocks # 1D data..equilvalent to column
stocks['APPLE']
[s for s in stocks]
stocks['NIKE']
stocks[0]
labels = ['APPLE','AT&T','IBM','NIKE']
value = pd.Series([234.0, 221.0, 124.5, 100.4], index=labels)
high = pd.Series([235.0, 225.0 , 125.5, 101.5], index=labels)
low = pd.Series([230.0, 220.0, 120.0, 100.0], index=labels)
volume = pd.Series([100, 235, 125, 300], index=labels)
stocks = pd.DataFrame({
"value":value,
"high": high,
"low": low,
"volume": volume
})
stocks
stocks = pd.DataFrame({
"value":[234.0, 221.0, 124.5, 100.4],
"high": [235.0, 225.0 , 125.5, 101.5],
"low": [230.0, 220.0, 120.0, 100.0],
"volume" : [100, 235, 125, 300]},
index= labels
)
stocks
stocks1 = pd.DataFrame({
"value":[234.0, 221.0, 124.5, 100.4],
"high": [235.0, 225.0 , 125.5, 101.5],
"low": [230.0, 220.0, 120.0, 100.0],
"volume" : [100, 235, 125, 300]}
)
stocks1
stocks
stocks['value'] # on a dataframe columns accessible just like dictionary items
stocks['value']['APPLE']
stocks.value
stocks.volume
stocks2 = pd.DataFrame({
"value":[234.0, 221.0, 124.5, 100.4],
"high value": [235.0, 225.0 , 125.5, 101.5],
"low value": [230.0, 220.0, 120.0, 100.0],
"volume" : [100, 235, 125, 300]},
index= labels
)
stocks2
stocks2.high value
stocks2['high value']
stocks
stocks.loc["APPLE"] # complete row of label APPLE
stocks.loc[['APPLE',"IBM"]]
stocks.loc['APPLE', 'value'] # first item is for row and second item is from column
stocks.loc[['APPLE','IBM'], 'value']
stocks.loc[['APPLE','IBM'], ['value','volume']]
stocks.iloc[0] # gives zeroth row
stocks.iloc[[0,3]]
stocks.iloc[[0,3],0]
stocks.iloc[[0,3],[0, 3]]
stocks.iloc[:, :] # all the rows and all the columns
stocks.iloc[:2, :] # take first two rows and all the columns
stocks.iloc[:, [0, 3]]
value = stocks.value
value
type(value)
value.sum()
value.cumsum()
value.min()
value.std()
value.mean()
s1
s1.std()
s1.abs()
s1 + 100
s2 = pd.Series([1, 2, 3, 4])
s2
s1
s1 + s2
s1 = pd.Series([200, 300, 100], index=['a','b','c'])
s1
s2 = pd.Series([1, 2, 3], index=['b','a','c'])
s2
s1 + s2
stocks
stocks.head()
stocks
s1
s1 + 100
s1 > 100
s1[s1>100]
stocks
stocks.volume > 200
stocks[stocks.volume > 200]
wallet
food = wallet[wallet.category=="Food"]
food
wallet[wallet.category=="Books"]
food.describe()
wallet[wallet.debit > 450]
wallet[wallet.debit < 450]
wallet[wallet.description=="Amazon"]
wallet[wallet.description=="Taxi"]
class Stock:
def __init__(self, symbol, value, volume):
self.symbol = symbol
self.value = value
self.volume = volume
def __repr__(self):
return "Stock({} {} {})".format(self.symbol, self.value, self.volume)
ibm = Stock("IBM", 120, 100)
%%file stocksdata.csv
"IBM", 120, 100
"IBM1", 121, 101
"IBM2", 122, 102
"IBM3", 123, 103
"IBM4", 124, 104
"IBM5", 125, 105
import csv
def load_as_list(filename):
with open(filename) as f:
reader = csv.reader(f)
print("skipping header", next(reader))
stocks = []
for row in reader:
stocks.append(Stock(row[0], row[1], row[2]))
return stocks
l = load_as_list("stocksdata.csv")
l[0]
def load_as_list(filename):
with open(filename) as f:
reader = csv.reader(f)
print("skipping header", next(reader))
return [Stock(row[0], row[1], row[2]) for row in reader]
load_as_list("stocksdata.csv")
def add(x, y):
return x+y
add(1, 2)
args = [1, 2]
add(args[0], args[1])
add(*args)
def load_as_list(filename):
with open(filename) as f:
reader = csv.reader(f)
print("skipping header", next(reader))
return [Stock(*row) for row in reader]
load_as_list("stocksdata.csv")
mat = []
for c in range(5):
row = []
for r in range(5):
if c==r:
row.append(1)
else:
row.append(0)
mat.append(row)
mat