Jun Jul 18-22, 2022 Vikrant Patil
All notes are available online at https://notes.pipal.in/2022/arcesium_finop_batch1/
Please accept the invitation that you have received in your email and login to
From there launch your jupyter lab. Create a notebook with name module2-day4.
Shutdown all older notebooks so that load on server reduces.
© Pipal Academy LLP
Lets start with an example
Paper work
def average_rows_of_a_file(filepath):
"""Takes filepath as argument and returns list of average values for each row.
"""
with open(filepath) as f:
averages = []
for line in f:
row = extract_row(line)
avg = mean(row)
averages.append(avg)
return averages
def extract_row(line):
return [float(item) for item in line.strip().split(",")]# strip will make sure that \n at end is removed
def mean(row):
return sum(row)/len(row)
mean([1, 2, 3, 4, 5])
3.0
extract_row("23,45,4.6,56,34") # this testing is incomplete! it does not have \n at end
[23.0, 45.0, 4.6, 56.0, 34.0]
extract_row("23,45,4.6,56,34\n")
[23.0, 45.0, 4.6, 56.0, 34.0]
extract_row("23,45,4.6,56,34 \n")
[23.0, 45.0, 4.6, 56.0, 34.0]
%%file numeric_values.txt
177.86307848009363,168.57970829672172,56.64667509485027,140.9453091498533,7.60588098031354,35.46585201959433,109.79299526934932,127.73481373344411,142.6569862535187,55.42226271294348
50.73110854833257,37.04770129180774,134.12303059207613,24.094715946706696,16.62709152318884,76.79158320138639,18.869039939685138,55.16544658992794,9.080542932129527,86.33584190856514
78.13874487140086,6.230686793261686,9.90129561402988,38.73432202944475,28.24102532900047,178.32675126964756,156.91807110256244,13.715926468541555,40.65402349504774,56.26057091162304
52.33390135982785,82.18187238357866,165.98732406823763,16.24849453464723,134.6940059960185,43.074586969351294,164.98109061612234,49.01998365034387,46.47274837268185,71.57223840684533
14.054024611303051,33.26061264583661,6.025038296009622,46.20617825321064,20.36303569589534,76.36142148522664,35.7247427244146,96.95015828855182,14.591002411820476,114.73930165632399
176.98106147487977,69.94975607322517,118.2574420888458,40.0226187414381,65.86533365890166,112.00582375498792,111.92750266225774,26.795263824303984,113.78920957808114,129.28326476993482
35.08017332225934,49.66626098662552,97.1770522711201,103.31251256854945,45.385340645578644,70.73296754777625,124.0599818449227,3.1152276402996506,189.621114798281,26.984391777865575
47.86347022018917,143.08376550789785,48.86114868739707,86.02253627471258,57.673544870935736,85.46964903214331,88.72038289528996,81.1572932565442,93.33330250009122,73.21497272029562
40.548227987191765,3.3788760416886054,45.81322292884595,18.41416684775437,0.4957688331691745,151.46049182954297,74.50089155354308,17.159975671658877,25.732237076201475,22.157154152625157
107.43073464586683,28.781131842076306,46.961173143962576,20.468161528556195,5.099354269264699,35.01542373037388,28.907504914509495,20.089203534798646,56.800621137020045,143.02788557795452
Writing numeric_values.txt
average_rows_of_a_file("numeric_values.txt")
[102.27135619906822, 50.8866102473806, 60.712141788455995, 82.65662463576545, 45.827551606859274, 96.48772766268561, 74.51350234032782, 80.54000659654967, 39.96610129222215, 49.25811943243832]
import os
os.getcwd()
'/home/vikrant/trainings/2022/arcesium_finop_batch1'
average_rows_of_a_file('/home/vikrant/trainings/2022/arcesium_finop_batch1/numeric_values.txt')
[102.27135619906822, 50.8866102473806, 60.712141788455995, 82.65662463576545, 45.827551606859274, 96.48772766268561, 74.51350234032782, 80.54000659654967, 39.96610129222215, 49.25811943243832]
text = " hello world "
text.rstrip()
' hello world'
text.lstrip()
'hello world '
strip is used to remove trailing space
text.strip() # spaces which are not at end are not removed, only tailing spaces will be removed
'hello world'
stock = {"name":"IBM", "value":125, "high":127, "low":123}
stock
{'name': 'IBM', 'value': 125, 'high': 127, 'low': 123}
stock['name']
'IBM'
stock['value'] = 124
stock
{'name': 'IBM', 'value': 124, 'high': 127, 'low': 123}
stock['quantity']
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) Input In [31], in <cell line: 1>() ----> 1 stock['quantity'] KeyError: 'quantity'
stock.get('name') # will work same as stock['name']
'IBM'
stock.get('quantity') # this will not fail..no error
print(stock.get('quantity'))
None
stock.get('quantity', 0)
0
stock.get('quantity', 0) #if the key 'quantity` does not exist in stock dictionary then return the value for this key as 0
0
stock.get('quantity') # if the second argument is not given, it will return None
stock.get('exchange', "NYC") # you can choose your own value
'NYC'
stock
{'name': 'IBM', 'value': 124, 'high': 127, 'low': 123}
stock.setdefault('quantity', 0) # it will return 0 as value but also set it
0
stock
{'name': 'IBM', 'value': 124, 'high': 127, 'low': 123, 'quantity': 0}
%%file stocks.csv
IBM,125,128,123
XYS,234,235,233,4
XYM,234,235,233
XYN,234,235,233,10
XYO,234,235,233,15
Overwriting stocks.csv
values = ["IBM",125,128,123]
keys = ["name", "value", "high", "low"]
# compare this with list comprehensions loop!
data = {} # it is similar except that there is dictionary here!
for key, value in zip(keys, values):
data[key] = value
data
{'name': 'IBM', 'value': 125, 'high': 128, 'low': 123}
{key:value for key,value in zip(keys, values)}
{'name': 'IBM', 'value': 125, 'high': 128, 'low': 123}
# compare this with list comprehensions loop!
data = {} # it is similar except that there is dictionary here!
length = len(values)
for i in range(length):
data[keys[i]] = values[i]
data
{'name': 'IBM', 'value': 125, 'high': 128, 'low': 123}
def load_stocks_data(filename):
keys = ["name", "value", "high", "low", 'quantity']
with open(filename) as f:
filedata = []
for line in f:
items = line.strip().split(",")
values = [items[0]] + [float(v) for v in items[1:]] # 0th item is string so it is handled seperately
data = {key:value for key,value in zip(keys, values)}
filedata.append(data)
return filedata
stocksdata = load_stocks_data("stocks.csv")
stocksdata[0]
{'name': 'IBM', 'value': 125.0, 'high': 128.0, 'low': 123.0}
sum([stock['value'] for stock in stocksdata])
1061.0
sum([stock['quantity'] for stock in stocksdata])
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) Input In [86], in <cell line: 1>() ----> 1 sum([stock['quantity'] for stock in stocksdata]) Input In [86], in <listcomp>(.0) ----> 1 sum([stock['quantity'] for stock in stocksdata]) KeyError: 'quantity'
sum([stock.get('quantity', 0) for stock in stocksdata])
29.0
sum([stock.get("quantity", 0)*stock['value'] for stock in stocksdata])
6786.0
{key:value for zip(keys, values)}
dict(zip(keys, values))
{'name': 'IBM', 'value': 125, 'high': 128, 'low': 123}
prices = [('IBM', 'Monday', 111.71436961893693),
('IBM', 'Tuesday', 141.21220022208635),
('IBM', 'Wednesday', 112.40571010053796),
('IBM', 'Thursday', 137.54133351926248),
('IBM', 'Friday', 140.25154281801224),
('MICROSOFT', 'Monday', 235.0403622499107),
('MICROSOFT', 'Tuesday', 225.0206535036475),
('MICROSOFT', 'Wednesday', 216.10342426936444),
('MICROSOFT', 'Thursday', 200.38038844494193),
('MICROSOFT', 'Friday', 235.80850482793264),
('APPLE', 'Monday', 321.49182055844256),
('APPLE', 'Tuesday', 340.63612771662815),
('APPLE', 'Wednesday', 303.9065277507285),
('APPLE', 'Thursday', 338.1350605764038),
('APPLE', 'Friday', 318.3912296144338)]
def weekly_average(prices, symbol):
return mean([price for name, day, price in prices if name==symbol])
weekly_average(prices, "MICROSOFT")
222.47066665915946
symbols = set([symbol for symbol,day, value in prices])
symbols # it is a set .. it does not have key:value pair... it is not a dictonary
{'APPLE', 'IBM', 'MICROSOFT'}
in set uniqueness is gauranteed and not order. in list order is gauranteed, it is not necessariliy unique
symbols.add("APPLE") # uniqueness
symbols
{'APPLE', 'IBM', 'MICROSOFT'}
ones = [1, 1, 1, 1]
ones.append(1)
ones
[1, 1, 1, 1, 1]
help(symbols.pop) # it will remove arbitrary...not last
Help on built-in function pop:
pop(...) method of builtins.set instance
Remove and return an arbitrary set element.
Raises KeyError if the set is empty.
s = {1, 2, 3, 4, 5, 5}
s
{1, 2, 3, 4, 5}
for i in range(len(s)):
print(s.pop())
1 2 3 4 5
s
set()
s = {1, 2, 3, 4}
s.remove(1)
s
{2, 3, 4}
nums = [1, 2, 3, 4]
nums.remove(2)
nums
[1, 3, 4]
symbols
{'APPLE', 'IBM', 'MICROSOFT'}
weekly_averages = {symbol:weekly_average(prices, symbol) for symbol in symbols}
weekly_averages
{'APPLE': 324.51215324332736,
'MICROSOFT': 222.47066665915946,
'IBM': 128.62503125576717}
stocks= {'APPLE': 700.5,
'IBM': 300.1,
'AT&T': 355.7,
'AGILENT': 600.3}
[i for i in range(100) if i%7==0 or i%11==0]
[0, 7, 11, 14, 21, 22, 28, 33, 35, 42, 44, 49, 55, 56, 63, 66, 70, 77, 84, 88, 91, 98, 99]
stocks= {'APPLE': 700.5,
'IBM': 300.1,
'AT&T': 355.7,
'AGILENT': 600.3}
{k:v for k,v in stocks.items() if v > 300}
{'APPLE': 700.5, 'IBM': 300.1, 'AT&T': 355.7, 'AGILENT': 600.3}
symbols
{'APPLE', 'IBM', 'MICROSOFT'}
{k:v for k,v in stocks.items() if k in symbols}
{'APPLE': 700.5, 'IBM': 300.1}
Problem
def generate_test_data(filename):
words = ["one","two","three","four","five","six","seven","eight","nine","ten"]
freq = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
with open(filename, "w") as file:
for w,f in zip(words, freq):
file.write(",".join([w]*f))
file.write("\n")
[1,2]*2
[1, 2, 1, 2]
["one"]*2
['one', 'one']
generate_test_data("words.csv")
!python cat.py words.csv
one two,two three,three,three four,four,four,four five,five,five,five,five six,six,six,six,six,six seven,seven,seven,seven,seven,seven,seven eight,eight,eight,eight,eight,eight,eight,eight nine,nine,nine,nine,nine,nine,nine,nine,nine ten,ten,ten,ten,ten,ten,ten,ten,ten,ten
Problem
Paperwork
ones.count(1)
5
def get_all_words(filename):
with open(filename) as f:
return f.read().split()
def word_freq(filename):
words = get_all_words(filename)
unique_words = set(words)
wordfreq = {}
for w in unique_words:
wordfreq[w] = words.count(w)
return wordfreq
word_freq("words.csv")
{'two,two': 1,
'four,four,four,four': 1,
'eight,eight,eight,eight,eight,eight,eight,eight': 1,
'ten,ten,ten,ten,ten,ten,ten,ten,ten,ten': 1,
'three,three,three': 1,
'six,six,six,six,six,six': 1,
'nine,nine,nine,nine,nine,nine,nine,nine,nine': 1,
'seven,seven,seven,seven,seven,seven,seven': 1,
'five,five,five,five,five': 1,
'one': 1}
get_all_words("words.csv")
['one', 'two,two', 'three,three,three', 'four,four,four,four', 'five,five,five,five,five', 'six,six,six,six,six,six', 'seven,seven,seven,seven,seven,seven,seven', 'eight,eight,eight,eight,eight,eight,eight,eight', 'nine,nine,nine,nine,nine,nine,nine,nine,nine', 'ten,ten,ten,ten,ten,ten,ten,ten,ten,ten']
def get_all_words(filename):
with open(filename) as f:
words = []
for line in f:
words.extend(line.strip().split(","))
return words
def word_freq(filename):
words = get_all_words(filename)
unique_words = set(words)
wordfreq = {}
for w in unique_words:
wordfreq[w] = words.count(w)
return wordfreq
word_freq("words.csv")
{'five': 5,
'ten': 10,
'six': 6,
'seven': 7,
'two': 2,
'four': 4,
'three': 3,
'nine': 9,
'eight': 8,
'one': 1}
def word_freq1(filename):
words = get_all_words(filename)
wordfreq = {}
for w in words:
if w not in wordfreq:
wordfreq[w] = 1
else:
wordfreq[w] += 1
return wordfreq
word_freq1("words.csv")
{'one': 1,
'two': 2,
'three': 3,
'four': 4,
'five': 5,
'six': 6,
'seven': 7,
'eight': 8,
'nine': 9,
'ten': 10}
# wordfreq[w]-> get/setdefault
def word_freq2(filename):
words = get_all_words(filename)
wordfreq = {}
for w in words:
wordfreq[w] = wordfreq.get(w, 0) + 1
return wordfreq
word_freq2("words.csv")
{'one': 1,
'two': 2,
'three': 3,
'four': 4,
'five': 5,
'six': 6,
'seven': 7,
'eight': 8,
'nine': 9,
'ten': 10}
for i in range(1, 11):
print(i, i**2, i**3)
1 1 1 2 4 8 3 9 27 4 16 64 5 25 125 6 36 216 7 49 343 8 64 512 9 81 729 10 100 1000
"hello {name}!".format(name="vikrant")
'hello vikrant!'
"{x} {y} {z}".format(x=1, y=2, z=3)
'1 2 3'
x, y,z = 1, 2, 3
f"{x}, {y}, {z}"
'1, 2, 3'
"some string one {}, {}, {} thsese values".format(1, 2, 3)
'some string one 1, 2, 3 thsese values'
"some string one {0}, {2}, {1} thsese values".format("ABC", "XYZ", "MNO")
'some string one ABC, MNO, XYZ thsese values'
stocksdata
[{'name': 'IBM', 'value': 125.0, 'high': 128.0, 'low': 123.0},
{'name': 'XYS', 'value': 234.0, 'high': 235.0, 'low': 233.0, 'quantity': 4.0},
{'name': 'XYM', 'value': 234.0, 'high': 235.0, 'low': 233.0},
{'name': 'XYN',
'value': 234.0,
'high': 235.0,
'low': 233.0,
'quantity': 10.0},
{'name': 'XYO',
'value': 234.0,
'high': 235.0,
'low': 233.0,
'quantity': 15.0}]
for d in stocksdata:
v = list(d.values())
print("{},{},{},{}".format(v[0], v[1], v[2], v[3]))
IBM,125.0,128.0,123.0 XYS,234.0,235.0,233.0 XYM,234.0,235.0,233.0 XYN,234.0,235.0,233.0 XYO,234.0,235.0,233.0
for i in range(1, 11):
print("{} {} {}".format(i, i**2, i**3))
1 1 1 2 4 8 3 9 27 4 16 64 5 25 125 6 36 216 7 49 343 8 64 512 9 81 729 10 100 1000
for i in range(1, 11):
print("{c0} {c1} {c2}".format(c0=i, c1=i**2, c2=i**3))
1 1 1 2 4 8 3 9 27 4 16 64 5 25 125 6 36 216 7 49 343 8 64 512 9 81 729 10 100 1000
for i in range(1, 11):
print("{c0:2d} {c1:3d} {c2:4d}".format(c0=i, c1=i**2, c2=i**3))
1 1 1 2 4 8 3 9 27 4 16 64 5 25 125 6 36 216 7 49 343 8 64 512 9 81 729 10 100 1000
word_freq("words.csv")
{'five': 5,
'ten': 10,
'six': 6,
'seven': 7,
'two': 2,
'four': 4,
'three': 3,
'nine': 9,
'eight': 8,
'one': 1}
freq = word_freq("words.csv")
freq
{'five': 5,
'ten': 10,
'six': 6,
'seven': 7,
'two': 2,
'four': 4,
'three': 3,
'nine': 9,
'eight': 8,
'one': 1}
def get_freq(r):
return r[1]
for w, f in sorted(freq.items(), key=get_freq):
print(w, f)
one 1 two 2 three 3 four 4 five 5 six 6 seven 7 eight 8 nine 9 ten 10
for w, f in sorted(freq.items(), key=get_freq):
print(w.rjust(5), f)
one 1 two 2 three 3 four 4 five 5 six 6 seven 7 eight 8 nine 9 ten 10
for w, f in sorted(freq.items(), key=get_freq):
print(w.rjust(5), "{:2d}".format(f), "*"*f)
one 1 * two 2 ** three 3 *** four 4 **** five 5 ***** six 6 ****** seven 7 ******* eight 8 ******** nine 9 ********* ten 10 **********
for w, f in sorted(freq.items()):
print(w.rjust(5), "{:2d}".format(f), "*"*f)
eight 8 ******** five 5 ***** four 4 **** nine 9 ********* one 1 * seven 7 ******* six 6 ****** ten 10 ********** three 3 *** two 2 **
list(freq.items())
[('five', 5),
('ten', 10),
('six', 6),
('seven', 7),
('two', 2),
('four', 4),
('three', 3),
('nine', 9),
('eight', 8),
('one', 1)]
for w, f in sorted(freq.items(), key=get_freq, reverse=True):
print(w.rjust(5), "{:2d}".format(f), "*"*f)
ten 10 ********** nine 9 ********* eight 8 ******** seven 7 ******* six 6 ****** five 5 ***** four 4 **** three 3 *** two 2 ** one 1 *
freq
{'five': 5,
'ten': 10,
'six': 6,
'seven': 7,
'two': 2,
'four': 4,
'three': 3,
'nine': 9,
'eight': 8,
'one': 1}
d = {'five': 5,
'ten': 10,
'six': 6,
'seven': 7}
d
{'five': 5, 'ten': 10, 'six': 6, 'seven': 7}
freq
{'five': 5,
'ten': 10,
'six': 6,
'seven': 7,
'two': 2,
'four': 4,
'three': 3,
'nine': 9,
'eight': 8,
'one': 1}
d.keys() & freq.keys()
{'five', 'seven', 'six', 'ten'}
import operator as op
def countifs(criterio_list, condstr):
value = int("".join([c for c in condstr if c.isdigit()]))
cond = "".join([c for c in condstr if not c.isdigit()])
conds = [">", ">=", "<", "<=", "", "<>"]
funcs = [op.gt, op.ge, op.lt, op.le, op.eq, op.ne]
funcmap = dict(zip(conds, funcs))
return len([item for item in criterio_list if funcmap[cond](item, value)])
countifs([10, 20, 10, 20, 30, 40, 50, 50, 60, 60], "<>60")
8
countifs([10, 20, 10, 20, 30, 40, 50, 50, 60, 60], ">60")
0
countifs([10, 20, 10, 20, 30, 40, 50, 50, 60, 60], "<60")
8
countifs([10, 20, 10, 20, 30, 40, 50, 50, 60, 60], "<=30")
5
condstr = "<>40"
int("".join([c for c in condstr if c.isdigit()]))
40
"".join([c for c in condstr if not c.isdigit()])
'<>'
op.ge
<function _operator.ge(a, b, /)>
def greaterthan(x, y):
return x > y
conds = [">", ">=", "<", "<=", "", "<>"]
funcs = [op.gt, op.ge, op.lt, op.le, op.eq, op.ne]
funcmap = dict(zip(conds, funcs))
funcmap
{'>': <function _operator.gt(a, b, /)>,
'>=': <function _operator.ge(a, b, /)>,
'<': <function _operator.lt(a, b, /)>,
'<=': <function _operator.le(a, b, /)>,
'': <function _operator.eq(a, b, /)>,
'<>': <function _operator.ne(a, b, /)>}
funcmap[">"]
<function _operator.gt(a, b, /)>
funcmap[">"](2, 1)
True
funcmap[">"](2, 5)
False