Dec 17-23, 2020 Vikrant Patil
These notes are available online at http://notes.pipal.in/2020/arcesium_finop_batch3/module2-day5.html
© Pipal Academy LLP
Day 1 | Day 2 | Day 3 | Day 4 | Day 5
We will be using jupyter hub from http://lab.pipal.in for this training. Create a notebook with name module2-day5.ipynb for today's session. Before you start shutdown all kernels except today's notebook.
%%file sample.csv
"name","symbol","high, for the day"
"IBM",ibm,123
"APPLE"apple,200
Writing sample.csv
import csv
!python cat.py index.csv
symbol,price,change,volume MJRWW,130.72759478533843,3.694439794801307,3604 RBFER,138.70935284443215,2.216908526674474,1827 NILRA,191.03471832663206,0.4674588939635804,3809 KLAUL,171.8732962407023,3.3674773238125804,4930 NSDQL,115.53093172800759,2.9539737397625716,3046 IIJCO,132.72824504260913,2.0073130063688,2311 RFGYO,165.16558643307485,4.298006358962731,214 ISUBX,163.00760618033004,0.04753420473903924,2903 JSFNB,162.7964294777765,4.468458672093418,1486 XIIHE,100.1283215847198,2.4659528050212005,1866 KBNBF,165.47518316307583,0.23959575155823087,3834 UMMXP,109.69660529916302,3.1912597734501857,306 TKKVO,144.64943725495849,4.550962454198143,2995 DEFKM,183.563134741773,0.354583080023832,453 DDAUE,167.43485943165342,4.462651203629318,881 CABGJ,182.4185113412454,0.9893499879911921,3399 VQDKF,128.7740419792224,4.040731018215812,3784 WQHNA,189.2156926901894,1.6436503299556149,130 DRBWM,134.7001220983072,0.3527838213440315,4449 OSTGG,188.16655957982945,3.708942762087685,3490
%%file index.csv
symbol,price,change,volume
MJRWW,130.72759478533843,3.694439794801307,3604
RBFER,138.70935284443215,2.216908526674474,1827
NILRA,191.03471832663206,0.4674588939635804,3809
KLAUL,171.8732962407023,3.3674773238125804,4930
NSDQL,115.53093172800759,2.9539737397625716,3046
IIJCO,132.72824504260913,2.0073130063688,2311
RFGYO,165.16558643307485,4.298006358962731,214
ISUBX,163.00760618033004,0.04753420473903924,2903
JSFNB,162.7964294777765,4.468458672093418,1486
XIIHE,100.1283215847198,2.4659528050212005,1866
KBNBF,165.47518316307583,0.23959575155823087,3834
UMMXP,109.69660529916302,3.1912597734501857,306
TKKVO,144.64943725495849,4.550962454198143,2995
DEFKM,183.563134741773,0.354583080023832,453
DDAUE,167.43485943165342,4.462651203629318,881
CABGJ,182.4185113412454,0.9893499879911921,3399
VQDKF,128.7740419792224,4.040731018215812,3784
WQHNA,189.2156926901894,1.6436503299556149,130
DRBWM,134.7001220983072,0.3527838213440315,4449
OSTGG,188.16655957982945,3.708942762087685,3490
Overwriting index.csv
import csv
def read_csv(filename):
data = []
with open(filename) as f:
freader = csv.reader(f)
header = next(freader) # consumed first line of headers
print("headers", header)
for row in freader:
data.append(row)
return data
read_csv("index.csv")
headers ['symbol', 'price', 'change', 'volume']
[['MJRWW', '130.72759478533843', '3.694439794801307', '3604'], ['RBFER', '138.70935284443215', '2.216908526674474', '1827'], ['NILRA', '191.03471832663206', '0.4674588939635804', '3809'], ['KLAUL', '171.8732962407023', '3.3674773238125804', '4930'], ['NSDQL', '115.53093172800759', '2.9539737397625716', '3046'], ['IIJCO', '132.72824504260913', '2.0073130063688', '2311'], ['RFGYO', '165.16558643307485', '4.298006358962731', '214'], ['ISUBX', '163.00760618033004', '0.04753420473903924', '2903'], ['JSFNB', '162.7964294777765', '4.468458672093418', '1486'], ['XIIHE', '100.1283215847198', '2.4659528050212005', '1866'], ['KBNBF', '165.47518316307583', '0.23959575155823087', '3834'], ['UMMXP', '109.69660529916302', '3.1912597734501857', '306'], ['TKKVO', '144.64943725495849', '4.550962454198143', '2995'], ['DEFKM', '183.563134741773', '0.354583080023832', '453'], ['DDAUE', '167.43485943165342', '4.462651203629318', '881'], ['CABGJ', '182.4185113412454', '0.9893499879911921', '3399'], ['VQDKF', '128.7740419792224', '4.040731018215812', '3784'], ['WQHNA', '189.2156926901894', '1.6436503299556149', '130'], ['DRBWM', '134.7001220983072', '0.3527838213440315', '4449'], ['OSTGG', '188.16655957982945', '3.708942762087685', '3490']]
read_csv("sample.csv")
headers ['name', 'symbol', 'high, for the day']
[['IBM', 'ibm', '123'], ['APPLEapple', '200']]
data = """1,2,3
4,5,6"""
data
'1,2,3\n4,5,6'
int(data)
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-12-c07095f4a360> in <module> ----> 1 int(data) ValueError: invalid literal for int() with base 10: '1,2,3\n4,5,6'
data1 = """123
234"""
int(data1)
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-14-c1599ae8306a> in <module> ----> 1 int(data1) ValueError: invalid literal for int() with base 10: '123\n234'
int("2 3")
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-15-b80c07d29005> in <module> ----> 1 int("2 3") ValueError: invalid literal for int() with base 10: '2 3'
int("23")
23
data.split()
['1,2,3', '4,5,6']
[row.split(",") for row in data.split()]
[['1', '2', '3'], ['4', '5', '6']]
row = ['1', '2', '3']
[int(i) for i in row]
[1, 2, 3]
d = [row.split(",") for row in data.split()]
[[int(i) for i in row] for row in d]
[[1, 2, 3], [4, 5, 6]]
import csv
def read_csv(filename, fields):
data = []
with open(filename) as f:
freader = csv.reader(f) # don't forget this line
headers = next(freader) # freader is an iterator!
for row in freader:
row = [func(v) for v,func in zip(row, fields)]
data.append(row)
return data
fields = [str, float, float, int] # list of functions for type conversion
read_csv("index.csv", fields)
[['MJRWW', 130.72759478533843, 3.694439794801307, 3604], ['RBFER', 138.70935284443215, 2.216908526674474, 1827], ['NILRA', 191.03471832663206, 0.4674588939635804, 3809], ['KLAUL', 171.8732962407023, 3.3674773238125804, 4930], ['NSDQL', 115.53093172800759, 2.9539737397625716, 3046], ['IIJCO', 132.72824504260913, 2.0073130063688, 2311], ['RFGYO', 165.16558643307485, 4.298006358962731, 214], ['ISUBX', 163.00760618033004, 0.04753420473903924, 2903], ['JSFNB', 162.7964294777765, 4.468458672093418, 1486], ['XIIHE', 100.1283215847198, 2.4659528050212005, 1866], ['KBNBF', 165.47518316307583, 0.23959575155823087, 3834], ['UMMXP', 109.69660529916302, 3.1912597734501857, 306], ['TKKVO', 144.64943725495849, 4.550962454198143, 2995], ['DEFKM', 183.563134741773, 0.354583080023832, 453], ['DDAUE', 167.43485943165342, 4.462651203629318, 881], ['CABGJ', 182.4185113412454, 0.9893499879911921, 3399], ['VQDKF', 128.7740419792224, 4.040731018215812, 3784], ['WQHNA', 189.2156926901894, 1.6436503299556149, 130], ['DRBWM', 134.7001220983072, 0.3527838213440315, 4449], ['OSTGG', 188.16655957982945, 3.708942762087685, 3490]]
x = ["1","2","3"]
[int(i) for i in x]
[1, 2, 3]
nums = ["IBM", "123.6", "2.1", "200"]
fields = [str, float, float, int]
[func(n) for n, func in zip(nums, fields)]
['IBM', 123.6, 2.1, 200]
for n, func in zip(nums, fields):
print(n, func, func(n))
IBM <class 'str'> IBM 123.6 <class 'float'> 123.6 2.1 <class 'float'> 2.1 200 <class 'int'> 200
first = ["Alice", "Alex", "Elsa"]
lastname = ["Wondegirl", "Lion", "Frozen"]
for name, surname in zip(first, lastname):
print(name, surname)
Alice Wondegirl Alex Lion Elsa Frozen
indexdata = read_csv("index.csv", [str, float, float, int])
type(indexdata[0][-1])
int
%%file sample1.csv
name,price,high,low,volume
IBM,123.4,125.7,122.3,200
APPLE,200.3,205.4,199.0,300
AGILENT,300.3,302.5,299.4,100
Writing sample1.csv
fields = [("price", float),
("high", float),
("low",float),
("volume", int)]
with open("sample1.csv") as f:
freader = csv.DictReader(f) #Disctreader will convert every row into a dictionary
for row in freader:
print(row)
{'name': 'IBM', 'price': '123.4', 'high': '125.7', 'low': '122.3', 'volume': '200'}
{'name': 'APPLE', 'price': '200.3', 'high': '205.4', 'low': '199.0', 'volume': '300'}
{'name': 'AGILENT', 'price': '300.3', 'high': '302.5', 'low': '299.4', 'volume': '100'}
fields = [("price", float),
("high", float),
("low",float),
("volume", int)]
with open("sample1.csv") as f:
freader = csv.reader(f)
for row in freader:
print(row)
['name', 'price', 'high', 'low', 'volume'] ['IBM', '123.4', '125.7', '122.3', '200'] ['APPLE', '200.3', '205.4', '199.0', '300'] ['AGILENT', '300.3', '302.5', '299.4', '100']
fields = [("price", float),
("high", float),
("low",float),
("volume", int)]
with open("sample1.csv") as f:
freader = csv.DictReader(f) #DisctReader will convert every row into a dictionary
for row in freader:
for key, func in fields:
row[key] = func(row[key])
print(row)
{'name': 'IBM', 'price': 123.4, 'high': 125.7, 'low': 122.3, 'volume': 200}
{'name': 'APPLE', 'price': 200.3, 'high': 205.4, 'low': 199.0, 'volume': 300}
{'name': 'AGILENT', 'price': 300.3, 'high': 302.5, 'low': 299.4, 'volume': 100}
fields = [("price", float),
("high", float),
("low",float)]
with open("sample1.csv") as f:
freader = csv.DictReader(f) #DisctReader will convert every row into a dictionary
for row in freader:
for key, func in fields:
row[key] = func(row[key])
print(row)
{'name': 'IBM', 'price': 123.4, 'high': 125.7, 'low': 122.3, 'volume': '200'}
{'name': 'APPLE', 'price': 200.3, 'high': 205.4, 'low': 199.0, 'volume': '300'}
{'name': 'AGILENT', 'price': 300.3, 'high': 302.5, 'low': 299.4, 'volume': '100'}
def extract_name_price(filename):
fields = [("price", float)]
data = []
with open(filename) as f:
for row in csv.DictReader(f):
r = [(row['name'], func(row[key])) for key, func in fields]
data.append(r)
return data
extract_name_price("sample1.csv")
[[('IBM', 123.4)], [('APPLE', 200.3)], [('AGILENT', 300.3)]]
def get_column(filename, name, type_):
with open(filename) as f:
return [type_(row[name]) for row in csv.DictReader(f)]
get_column("sample1.csv", "name", str)
['IBM', 'APPLE', 'AGILENT']
get_column("sample1.csv", "price", float)
[123.4, 200.3, 300.3]
get_column("index.csv", "symbol", str)
['MJRWW', 'RBFER', 'NILRA', 'KLAUL', 'NSDQL', 'IIJCO', 'RFGYO', 'ISUBX', 'JSFNB', 'XIIHE', 'KBNBF', 'UMMXP', 'TKKVO', 'DEFKM', 'DDAUE', 'CABGJ', 'VQDKF', 'WQHNA', 'DRBWM', 'OSTGG']
instead of csv.reader, we make use of csv.writer
def write_csv(data, columns, filename):
with open(filename, "w") as f:
csvf = csv.writer(f)
csvf.writerow(columns) # header
for row in data:
csvf.writerow(row)
d = read_csv("sample1.csv", [str, float, float, float, int])
d
[['IBM', 123.4, 125.7, 122.3, 200], ['APPLE', 200.3, 205.4, 199.0, 300], ['AGILENT', 300.3, 302.5, 299.4, 100]]
write_csv(d, ['name','price','high','low','volume'], 'csvexample.csv')
!python3 cat.py csvexample.csv
name,price,high,low,volume IBM,123.4,125.7,122.3,200 APPLE,200.3,205.4,199.0,300 AGILENT,300.3,302.5,299.4,100
write_csv(d, ['name','price,Rs','high','low','volume'], 'csvexample.csv')
!python3 cat.py csvexample.csv
name,"price,Rs",high,low,volume IBM,123.4,125.7,122.3,200 APPLE,200.3,205.4,199.0,300 AGILENT,300.3,302.5,299.4,100
%%file this is not related to python programming! it is not a prograame at all... it is simply an editor provided by jupyter.
def append_csv(data, columns, filename):
with open(filename, "a") as f:
csvf = csv.writer(f)
csvf.writerow(columns) # header has to be handled separately
for row in data:
csvf.writerow(row)
append_csv(d, ['name', "price", "high", "low", "volume"], "csvexample.csv")
!cat csvexample.csv
name,"price,Rs",high,low,volume IBM,123.4,125.7,122.3,200 APPLE,200.3,205.4,199.0,300 AGILENT,300.3,302.5,299.4,100 name,price,high,low,volume IBM,123.4,125.7,122.3,200 APPLE,200.3,205.4,199.0,300 AGILENT,300.3,302.5,299.4,100
def append_csv(data, columns, filename):
with open(filename, "a") as f:
csvf = csv.writer(f)
for row in data:
csvf.writerow(row)
write_csv(d, ['name','price','high','low','volume'], 'csvexample1.csv')
!cat csvexample1.csv
name,price,high,low,volume IBM,123.4,125.7,122.3,200 APPLE,200.3,205.4,199.0,300 AGILENT,300.3,302.5,299.4,100
append_csv(d, ['name','price','high','low','volume'], 'csvexample1.csv')
!cat csvexample1.csv
name,price,high,low,volume IBM,123.4,125.7,122.3,200 APPLE,200.3,205.4,199.0,300 AGILENT,300.3,302.5,299.4,100 IBM,123.4,125.7,122.3,200 APPLE,200.3,205.4,199.0,300 AGILENT,300.3,302.5,299.4,100
import xlsxwriter
!python3 -m pip install XlsxWriter
Requirement already satisfied: XlsxWriter in /home/vikrant/anaconda3/lib/python3.8/site-packages (1.2.9)
WARNING: You are using pip version 20.2.3; however, version 20.3.3 is available.
You should consider upgrading via the '/home/vikrant/anaconda3/bin/python3 -m pip install --upgrade pip' command.
Above command is from jupyter! from cmd ...you open cmd, and run
python3 -m pip install XlsxWriter
if you don't have admin previllages use --user
python -m pip install --user XlsxWriter
import xlsxwriter
workbook = xlsxwriter.Workbook("sample.xlsx")
worksheet = workbook.add_worksheet("sample_sheet")
worksheet.write(0, 0, "Hello") # (rownumber, columnnumber, value)
worksheet.write(0, 1, "World")
worksheet.write("A2", "Some entry") #(CellID, value)
workbook.close()
!ls sample.xlsx
sample.xlsx
import os
os.chdir("")