Oct 11-15, 2021 Vikrant Patil
These notes are available online at https://notes.pipal.in/2021/arcesium_finop_batch2/
© Pipal Academy LLP
Day 1 | Day 2 | Day 3 | Day 4 | Day 5
We will be using jupyter hub from https://lab2.pipal.in for this training.
create a notebook with name module2-day5
dict = {1:"one"}
dict(zip([1, 2, 3], ["one", "two", "three"]))
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-2-e63fd93196a3> in <module> ----> 1 dict(zip([1, 2, 3], ["one", "two", "three"])) TypeError: 'dict' object is not callable
dict
{1: 'one'}
del dict
dict(zip([1, 2, 3], ["one", "two", "three"]))
{1: 'one', 2: 'two', 3: 'three'}
def csvparser(filename):
with open(filename) as f:
return [line.strip().split(",") for line in f if line.strip()]
csvparser("data.csv")
[['1', '2', '3', '4'], ['2', '3', '4', '5']]
!ls
aad.py module1-assignment.html module2-day1.ipynb numbers.txt addapp.py module1-assignment.ipynb module2-day2.html push add.py module1-day1.html module2-day2.ipynb __pycache__ anotherpyfile.py module1-day1.ipynb module2-day3.html square.py bank0.py module1-day2.html module2-day3.ipynb tables.csv bank1.py module1-day2.ipynb module2-day4.html test cat.py module1-day3.html module2-day4.ipynb testmodule.py data.csv module1-day3.ipynb module2-day5.html Untitled.html head.py module1-day4.html module2-day5.ipynb words.txt indexdata.csv module1-day4.ipynb myindexdata.csv zen.txt index.html module1-day5.html mymodule1.py index.ipynb module1-day5.ipynb mymodule.py Makefile module2-day1.html mytables.csv
!python3 cat.py myindexdata.csv
symbol,day,price IBM,Monday,111.343 IBM,Tuesday,112.3232 MICROSOFT,Monday,234.454 MICROSOFT,Tuesday,343.54 APPLE,Monday,454.5 APPLE,Tuesday,465.5
%%file indexdata.csv
symbol,day,price,volume
IBM,Monday,111.343,1000
IBM,Tuesday,112.3232,3000
MICROSOFT,Monday,234.454,5000
MICROSOFT,Tuesday,343.54,0
APPLE,Monday,454.5,10000
APPLE,Tuesday,465.5,4323
Overwriting indexdata.csv
import csv
%%file indexdata_quoted.csv
"symbol","day","price","volume"
"IBM","Monday","111.343","1,000"
"IBM","Tuesday","112.3232","3,000"
Writing indexdata_quoted.csv
csvparser("indexdata.csv")
[['symbol', 'day', 'price', 'volume'], ['IBM', 'Monday', '111.343', '1000'], ['IBM', 'Tuesday', '112.3232', '3000'], ['MICROSOFT', 'Monday', '234.454', '5000'], ['MICROSOFT', 'Tuesday', '343.54', '0'], ['APPLE', 'Monday', '454.5', '10000'], ['APPLE', 'Tuesday', '465.5', '4323']]
csvparser("indexdata_quoted.csv")
[['"symbol"', '"day"', '"price"', '"volume"'], ['"IBM"', '"Monday"', '"111.343"', '"1', '000"'], ['"IBM"', '"Tuesday"', '"112.3232"', '"3', '000"']]
import csv
def read_csv(filename):
with open(filename) as f:
creader = csv.reader(f) # this is again one iterator
# it is gives fields directly from csv file
for fields in creader:
print(fields) # fiels is a list of fields in every row
read_csv("indexdata.csv")
['symbol', 'day', 'price', 'volume'] ['IBM', 'Monday', '111.343', '1000'] ['IBM', 'Tuesday', '112.3232', '3000'] ['MICROSOFT', 'Monday', '234.454', '5000'] ['MICROSOFT', 'Tuesday', '343.54', '0'] ['APPLE', 'Monday', '454.5', '10000'] ['APPLE', 'Tuesday', '465.5', '4323']
import csv
def read_csv(filename):
with open(filename) as f:
creader = csv.reader(f) # this is again one iterator
# it is gives fields directly from csv file
headers = next(creader)
print(headers, "X"*10)
for fields in creader:
print(fields) # fiels is a list of fields in every row
read_csv("indexdata.csv")
['symbol', 'day', 'price', 'volume'] XXXXXXXXXX ['IBM', 'Monday', '111.343', '1000'] ['IBM', 'Tuesday', '112.3232', '3000'] ['MICROSOFT', 'Monday', '234.454', '5000'] ['MICROSOFT', 'Tuesday', '343.54', '0'] ['APPLE', 'Monday', '454.5', '10000'] ['APPLE', 'Tuesday', '465.5', '4323']
import csv
def read_csv(filename):
with open(filename) as f:
creader = csv.reader(f) # this is again one iterator
# it is gives fields directly from csv file
headers = next(creader)
data = []
print(headers, "X"*10)
for fields in creader:
data.append(fields) # fiels is a list of fields in every row
return data
data = read_csv("indexdata.csv")
['symbol', 'day', 'price', 'volume'] XXXXXXXXXX
data
[['IBM', 'Monday', '111.343', '1000'], ['IBM', 'Tuesday', '112.3232', '3000'], ['MICROSOFT', 'Monday', '234.454', '5000'], ['MICROSOFT', 'Tuesday', '343.54', '0'], ['APPLE', 'Monday', '454.5', '10000'], ['APPLE', 'Tuesday', '465.5', '4323']]
data[0]
['IBM', 'Monday', '111.343', '1000']
data[1]
['IBM', 'Tuesday', '112.3232', '3000']
read_csv("indexdata_quoted.csv")
['symbol', 'day', 'price', 'volume'] XXXXXXXXXX
[['IBM', 'Monday', '111.343', '1,000'], ['IBM', 'Tuesday', '112.3232', '3,000']]
csvparser("indexdata_quoted.csv")
[['"symbol"', '"day"', '"price"', '"volume"'], ['"IBM"', '"Monday"', '"111.343"', '"1', '000"'], ['"IBM"', '"Tuesday"', '"112.3232"', '"3', '000"']]
import csv
fieldtypes = [str, str, float, int] # creating a list of functions with will convert to appropriate datatype
def read_csv(filename, fieldtypes):
with open(filename) as f:
creader = csv.reader(f) # this is again one iterator
# it is gives fields directly from csv file
headers = next(creader) # skip header!
data = []
print(headers, "X"*10)
for fields in creader:
data.append([func(v) for v, func in zip(fields, fieldtypes)])
return data
fieldtypes = [str, str, float, int]
row = ['IBM', 'Monday', '111.343', '1000']
for value, convert in zip(row, fieldtypes):
print(convert(value))
IBM Monday 111.343 1000
[convert(value) for value, convert in zip(row, fieldtypes)]
['IBM', 'Monday', 111.343, 1000]
data = read_csv("indexdata.csv", [str, str, float, int])
['symbol', 'day', 'price', 'volume'] XXXXXXXXXX
data
[['IBM', 'Monday', 111.343, 1000], ['IBM', 'Tuesday', 112.3232, 3000], ['MICROSOFT', 'Monday', 234.454, 5000], ['MICROSOFT', 'Tuesday', 343.54, 0], ['APPLE', 'Monday', 454.5, 10000], ['APPLE', 'Tuesday', 465.5, 4323]]
data[0][2]
111.343
data[0][3]
1000
fieldtypes = [("price", float),
("volume", int)]
with open("indexdata.csv") as f:
csvdictreader = csv.DictReader(f)
for row in csvdictreader:
print(row)
{'symbol': 'IBM', 'day': 'Monday', 'price': '111.343', 'volume': '1000'}
{'symbol': 'IBM', 'day': 'Tuesday', 'price': '112.3232', 'volume': '3000'}
{'symbol': 'MICROSOFT', 'day': 'Monday', 'price': '234.454', 'volume': '5000'}
{'symbol': 'MICROSOFT', 'day': 'Tuesday', 'price': '343.54', 'volume': '0'}
{'symbol': 'APPLE', 'day': 'Monday', 'price': '454.5', 'volume': '10000'}
{'symbol': 'APPLE', 'day': 'Tuesday', 'price': '465.5', 'volume': '4323'}
def get_column(filename, columname, type_):
with open(filename) as f:
col = []
for row in csv.DictReader(f):
col.append(type_(row[columname]))
return col
get_column("indexdata.csv", "price", float)
[111.343, 112.3232, 234.454, 343.54, 454.5, 465.5]
get_column("indexdata.csv", "volume", int)
[1000, 3000, 5000, 0, 10000, 4323]
def get_column(filename, columname, type_):
with open(filename) as f:
return [type_(row[columname]) for row in csv.DictReader(f)]
get_column("indexdata.csv", "price", float)
[111.343, 112.3232, 234.454, 343.54, 454.5, 465.5]
def write_csv(data, columns, filename):
with open(filename, "w") as f:
csvf = csv.writer(f)
csvf.writerow(columns)
for row in data:
csvf.writerow(row)
data
[['IBM', 'Monday', 111.343, 1000], ['IBM', 'Tuesday', 112.3232, 3000], ['MICROSOFT', 'Monday', 234.454, 5000], ['MICROSOFT', 'Tuesday', 343.54, 0], ['APPLE', 'Monday', 454.5, 10000], ['APPLE', 'Tuesday', 465.5, 4323]]
write_csv(data, ["symbol","weekday", "price", "volume"], "index_write.csv")
!python cat.py index_write.csv
symbol,weekday,price,volume IBM,Monday,111.343,1000 IBM,Tuesday,112.3232,3000 MICROSOFT,Monday,234.454,5000 MICROSOFT,Tuesday,343.54,0 APPLE,Monday,454.5,10000 APPLE,Tuesday,465.5,4323
%%file file1.csv
item,price,category,notes
amazon kindle book,545.0,books,some notes
mobile recharge,499.0,communication,sdksljakd
amazon kindle book1,550.0,books,some notes
data recharge,399.0,communication,sdksljakd
Writing file1.csv
%%file file2.csv
item,price,category
air ticket,6000.0,travel
meal,499.0,food
Writing file2.csv
problems
!python3 combine.py file1.csv file2.csv output.csv
!python3 cat.py output.csv
item,price,category
amazon kindle book,545.0,books
mobile recharge,499.0,communication
amazon kindle book1,550.0,books
data recharge,399.0,communication
air ticket,6000.0,travel
meal,499.0,food
a = {"a":1, "b":2, "c":3}
b = {"a":2, "b":6, "m":5, "n":3}
a
{'a': 1, 'b': 2, 'c': 3}
b
{'a': 2, 'b': 6, 'm': 5, 'n': 3}
for key in a.keys():
print(key)
a b c
a.keys()
dict_keys(['a', 'b', 'c'])
b.keys()
dict_keys(['a', 'b', 'm', 'n'])
a.keys() & b.keys()
{'a', 'b'}
def find_common_cols(file1, file2):
with open(file1) as f1:
csvf1 = csv.DictReader(f1)
with open(file2) as f2:
csvf2 = csv.DictReader(f2)
row1 = next(csvf1)
row2 = next(csvf2)
return row1.keys() & row2.keys()
def combine_csvs(file1, file2, outputfile):
commoncols = find_common_cols(file1, file2)
with open(file1) as f1:
csvf1 = csv.DictReader(f1)
with open(file2) as f2:
csvf2 = csv.DictReader(f2)
with open(outputfile, "w") as w:
csvfw = csv.DictWriter(w, commoncols)
csvfw.writeheader()
for row in csvf1:
csvfw.writerow({c:row[c] for c in commoncols})
for row in csvf2:
csvfw.writerow({c:row[c] for c in commoncols})
combine_csvs("file1.csv", "file2.csv", "combined.csv")
!python3 cat.py combined.csv
item,category,price amazon kindle book,books,545.0 mobile recharge,communication,499.0 amazon kindle book1,books,550.0 data recharge,communication,399.0 air ticket,travel,6000.0 meal,food,499.0
%%file combine.py
import sys
import csv
def find_common_cols(file1, file2):
with open(file1) as f1:
csvf1 = csv.DictReader(f1)
with open(file2) as f2:
csvf2 = csv.DictReader(f2)
row1 = next(csvf1)
row2 = next(csvf2)
return row1.keys() & row2.keys()
def combine_csvs(file1, file2, outputfile):
commoncols = find_common_cols(file1, file2)
with open(file1) as f1:
csvf1 = csv.DictReader(f1)
with open(file2) as f2:
csvf2 = csv.DictReader(f2)
with open(outputfile, "w") as w:
csvfw = csv.DictWriter(w, commoncols)
csvfw.writeheader()
for row in csvf1:
csvfw.writerow({c:row[c] for c in commoncols})
for row in csvf2:
csvfw.writerow({c:row[c] for c in commoncols})
if __name__ == "__main__":
file1 = sys.argv[1]
file2 = sys.argv[2]
output = sys.argv[3]
combine_csvs(file1, file2, output)
Overwriting combine.py
!python3 combine.py file1.csv file2.csv output.csv
!python3 cat.py output.csv
category,item,price books,amazon kindle book,545.0 communication,mobile recharge,499.0 books,amazon kindle book1,550.0 communication,data recharge,399.0 travel,air ticket,6000.0 food,meal,499.0
%%file combine.py
import typer
import csv
app = typer.Typer()
def find_common_cols(file1, file2):
with open(file1) as f1:
csvf1 = csv.DictReader(f1)
with open(file2) as f2:
csvf2 = csv.DictReader(f2)
row1 = next(csvf1)
row2 = next(csvf2)
return row1.keys() & row2.keys()
@app.command()
def combine_csvs(file1:str, file2:str, outputfile:str): # type annotation
commoncols = find_common_cols(file1, file2)
with open(file1) as f1:
csvf1 = csv.DictReader(f1)
with open(file2) as f2:
csvf2 = csv.DictReader(f2)
with open(outputfile, "w") as w:
csvfw = csv.DictWriter(w, commoncols)
csvfw.writeheader()
for row in csvf1:
csvfw.writerow({c:row[c] for c in commoncols})
for row in csvf2:
csvfw.writerow({c:row[c] for c in commoncols})
if __name__ == "__main__":
app()
Overwriting combine.py
!python3 combine.py --help
Usage: combine.py [OPTIONS] FILE1 FILE2 OUTPUTFILE
Arguments:
FILE1 [required]
FILE2 [required]
OUTPUTFILE [required]
Options:
--install-completion [bash|zsh|fish|powershell|pwsh]
Install completion for the specified shell.
--show-completion [bash|zsh|fish|powershell|pwsh]
Show completion for the specified shell, to
copy it or customize the installation.
--help Show this message and exit.
!python cat.py --help
Traceback (most recent call last):
File "cat.py", line 11, in <module>
cat(filename)
File "cat.py", line 5, in cat
with open(filename) as f:
FileNotFoundError: [Errno 2] No such file or directory: '--help'
!python3 combine.py file1.csv file2.csv output.csv
!python3 cat.py output.csv
item,price,category amazon kindle book,545.0,books mobile recharge,499.0,communication amazon kindle book1,550.0,books data recharge,399.0,communication air ticket,6000.0,travel meal,499.0,food
%%file head.py
import typer
app = typer.Typer()
@app.command()
def head(filename:str, n:int=5):
with open(filename) as f:
for i in range(n):
print(next(f), end="")
if __name__ == "__main__":
app()
Overwriting head.py
!python3 head.py --help
Usage: head.py [OPTIONS] FILENAME
Arguments:
FILENAME [required]
Options:
--n INTEGER [default: 5]
--install-completion [bash|zsh|fish|powershell|pwsh]
Install completion for the specified shell.
--show-completion [bash|zsh|fish|powershell|pwsh]
Show completion for the specified shell, to
copy it or customize the installation.
--help Show this message and exit.
!python3 head.py --n 3 zen.txt
The Zen of Python, by Tim Peters Beautiful is better than ugly.
!python3 head.py --n 6 zen.txt
The Zen of Python, by Tim Peters Beautiful is better than ugly. Explicit is better than implicit. Simple is better than complex. Complex is better than complicated.
!python3 head.py zen.txt
The Zen of Python, by Tim Peters Beautiful is better than ugly. Explicit is better than implicit. Simple is better than complex.
!python3 -m pip install typer
Requirement already satisfied: typer in /home/vikrant/anaconda3/lib/python3.8/site-packages (0.4.0) Requirement already satisfied: click<9.0.0,>=7.1.1 in /home/vikrant/anaconda3/lib/python3.8/site-packages (from typer) (7.1.2)
!pip install typer
Requirement already satisfied: typer in /home/vikrant/anaconda3/lib/python3.8/site-packages (0.4.0) Requirement already satisfied: click<9.0.0,>=7.1.1 in /home/vikrant/anaconda3/lib/python3.8/site-packages (from typer) (7.1.2)
import xlsxwriter
!python3 -m pip install XlsxWriter
Requirement already satisfied: XlsxWriter in /home/vikrant/anaconda3/lib/python3.8/site-packages (1.3.7)
w = xlsxwriter.Workbook("indexdata.xlsx")
worksheet = w.add_worksheet("prices")
worksheet.write(0, 0 , "Hello world")
0
w.close()
data
[['IBM', 'Monday', 111.343, 1000], ['IBM', 'Tuesday', 112.3232, 3000], ['MICROSOFT', 'Monday', 234.454, 5000], ['MICROSOFT', 'Tuesday', 343.54, 0], ['APPLE', 'Monday', 454.5, 10000], ['APPLE', 'Tuesday', 465.5, 4323]]
w = xlsxwriter.Workbook("indexdata1.xlsx")
worksheet = w.add_worksheet("prices")
for r, row in enumerate(data, start=1):
for c, entry in enumerate(row, start=1):
worksheet.write(r, c, entry)
w.close()
w = xlsxwriter.Workbook("format.xlsx")
worksheet = w.add_worksheet("sheet1")
bold = w.add_format({"bold": True})
worksheet.write("A1", "Bold Statement", bold)
w.close()