Jun Jul 18-22, 2022 Vikrant Patil
All notes are available online at https://notes.pipal.in/2022/arcesium_finop_batch1/
Please accept the invitation that you have received in your email and login to
From there launch your jupyter lab. Create a notebook with name module2-day5.
Shutdown all older notebooks so that load on server reduces.
© Pipal Academy LLP
%%file stocks_quoted.csv
"IBM","125","128","123"
"XYS","234","235","233"
"XYM","234","235","233"
"XYN","234","235","233"
"XYO","234","235","233"
Overwriting stocks_quoted.csv
import csv
def read_csv(filepath):
with open(filepath) as f:
creader = csv.reader(f)
for fields in creader:
print(fields)
read_csv("stocks_quoted.csv")
['IBM', '125', '128', '123'] ['XYS', '234', '235', '233'] ['XYM', '234', '235', '233'] ['XYN', '234', '235', '233'] ['XYO', '234', '235', '233']
fieldtypes = [str, int, int, float] # this is list of functions to convert into appropriate type
def read_csv_with_type(filepath, fieldtypes):
with open(filepath) as f:
creader = csv.reader(f)
data = []
for fields in creader: # you get data as fields of every row
converted_fields = [converter(field) for converter, field in zip(fieldtypes, fields)]
data.append(converted_fields)
return data
fieldtypes = [str, int, int, float]
read_csv_with_type("stocks_quoted.csv", fieldtypes)
[['IBM', 125, 128, 123.0], ['XYS', 234, 235, 233.0], ['XYM', 234, 235, 233.0], ['XYN', 234, 235, 233.0], ['XYO', 234, 235, 233.0]]
csv_data = read_csv_with_type("stocks_quoted.csv", fieldtypes)
csv_data[0]
['IBM', 125, 128, 123.0]
csv_data[-1]
['XYO', 234, 235, 233.0]
[row for row in csv_data if row[1] > 200]
[['XYS', 234, 235, 233.0], ['XYM', 234, 235, 233.0], ['XYN', 234, 235, 233.0], ['XYO', 234, 235, 233.0]]
[row for row in csv_data if row[0] == 'IBM']
[['IBM', 125, 128, 123.0]]
%%file stocks_unquoted.csv
IBM,125,128,123
XYS,234,235,233
XYM,234,235,233
XYN,234,235,233
XYO,234,235,233
Overwriting stocks_unquoted.csv
read_csv_with_type("stocks_unquoted.csv", fieldtypes)
[['IBM', 125, 128, 123.0], ['XYS', 234, 235, 233.0], ['XYM', 234, 235, 233.0], ['XYN', 234, 235, 233.0], ['XYO', 234, 235, 233.0]]
%%file stocks_quoted_with_header.csv
"name","value","high","low,0"
"IBM","125","128","123"
"XYS","234","235","233"
"XYM","234","235","233"
"XYN","234","235","233"
"XYO","234","235","233"
Overwriting stocks_quoted_with_header.csv
f = open("stocks_quoted_with_header.csv")
creader = csv.reader(f)
next(creader)
['name', 'value', 'high', 'low,0']
next(creader)
['IBM', '125', '128', '123']
next(creader)
['XYS', '234', '235', '233']
for fields in creader: # it will go over remaining items
print(fields)
['XYM', '234', '235', '233'] ['XYN', '234', '235', '233'] ['XYO', '234', '235', '233']
def read_csv(filepath, fieldtypes, header=True):
with open(filepath) as f:
creader = csv.reader(f)
if header:
column_headers = next(creader)
data = []
for fields in creader:
data.append([convert(field) for convert, field in zip(fieldtypes, fields)])
return data
read_csv("stocks_quoted_with_header.csv", fieldtypes)
[['IBM', 125, 128, 123.0], ['XYS', 234, 235, 233.0], ['XYM', 234, 235, 233.0], ['XYN', 234, 235, 233.0], ['XYO', 234, 235, 233.0]]
with open("stocks_quoted_with_header.csv") as f:
creader = csv.DictReader(f) # first row as column header
for fields in creader:
print(fields)
{'name': 'IBM', 'value': '125', 'high': '128', 'low,0': '123'}
{'name': 'XYS', 'value': '234', 'high': '235', 'low,0': '233'}
{'name': 'XYM', 'value': '234', 'high': '235', 'low,0': '233'}
{'name': 'XYN', 'value': '234', 'high': '235', 'low,0': '233'}
{'name': 'XYO', 'value': '234', 'high': '235', 'low,0': '233'}
def get_column(filepath, columnname):
with open(filepath) as f:
creader = csv.DictReader(f)
return [row[columnname] for row in creader]
get_column("stocks_quoted_with_header.csv", "name")
['IBM', 'XYS', 'XYM', 'XYN', 'XYO']
get_column("stocks_quoted_with_header.csv", "low,0")
['123', '233', '233', '233', '233']
get_column("stocks_quoted_with_header.csv", "value")
['125', '234', '234', '234', '234']
def get_column(filepath, columnname, converter):
with open(filepath) as f:
creader = csv.DictReader(f)
return [converter(row[columnname]) for row in creader]
get_column("stocks_quoted_with_header.csv", "value", float)
[125.0, 234.0, 234.0, 234.0, 234.0]
get_column("stocks_quoted_with_header.csv", "value", int)
[125, 234, 234, 234, 234]
fieldtypes = {'value':float,
'high':int}
def read_csv_as_columns(filepath, fieldconverters):
with open(filepath) as f:
creader = csv.DictReader(f)
data = {}
for row in creader: # for every row you get a dictionary
for key in row: # going over every column
if key in fieldconverters: # do I have a converter for this column
convert = fieldconverters[key]
data.setdefault(key,[]).append(convert(row[key]))
else:
data.setdefault(key,[]).append(row[key])
return data
columndata = read_csv_as_columns("stocks_quoted_with_header.csv", fieldtypes)
columndata['name']
['IBM', 'XYS', 'XYM', 'XYN', 'XYO']
columndata['value']
[125.0, 234.0, 234.0, 234.0, 234.0]
d = {}
d.setdefault("value", []).append(5)
d
{'value': [5]}
d.setdefault("value", []).append(10)
d
{'value': [5, 10]}
stock = {"name":"IBM", "value":125}
stock['low']
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) Input In [71], in <cell line: 1>() ----> 1 stock['low'] KeyError: 'low'
stock.setdefault('low', 123)
123
stock
{'name': 'IBM', 'value': 125, 'low': 123}
# consider fields from csv file that we were working on
columnsdata = {"name":[],
"value":[],
"high":[],
"low":[]}
columnsdata['name'].append("IBM")
columnsdata['value'].append(125)
columnsdata['high'].append(130)
columnsdata['low'].append(123)
columnsdata
{'name': ['IBM'], 'value': [125], 'high': [130], 'low': [123]}
cdata = {}
cdata.setdefault('name', []).append('IBM')
cdata.setdefault('value', []).append(125)
cdata.setdefault('high', []).append(130)
cdata.setdefault('low', []).append(123)
cdata
{'name': ['IBM'], 'value': [125], 'high': [130], 'low': [123]}
def write_csv(data, header, filename):
with open(filename, "w") as f:
cwriter = csv.writer(f)
cwriter.writerow(header) # this will write single row
cwriter.writerows(data) # this will write multiple rows a a time
data = [['IBM', 125, 128, 123.0],
['XYS', 234, 235, 233.0],
['XYM', 234, 235, 233.0],
['XYN', 234, 235, 233.0],
['XYO', 234, 235, 233.0]]
columnnames = ['name', 'value', 'high', 'low']
write_csv(data, columnnames, "csvdata.csv")
!python cat.py csvdata.csv
name,value,high,low IBM,125,128,123.0 XYS,234,235,233.0 XYM,234,235,233.0 XYN,234,235,233.0 XYO,234,235,233.0
data.insert(0,["name","value","high","low"])
data # this will not work with above function
[['name', 'value', 'high', 'low'], ['IBM', 125, 128, 123.0], ['XYS', 234, 235, 233.0], ['XYM', 234, 235, 233.0], ['XYN', 234, 235, 233.0], ['XYO', 234, 235, 233.0]]
write_csv(data[1:], data[0], "test.csv")
!cat test.csv
name,value,high,low IBM,125,128,123.0 XYS,234,235,233.0 XYM,234,235,233.0 XYN,234,235,233.0 XYO,234,235,233.0
write_csv(data, columnnames, "csvdata.csv")
!cat csvdata.csv
name,value,high,low name,value,high,low IBM,125,128,123.0 XYS,234,235,233.0 XYM,234,235,233.0 XYN,234,235,233.0 XYO,234,235,233.0
# write a function get_column_from_file to get nth column from a file
def paste(file1, n, file2, m, file3):
col1 = get_column_from_file(file1, n)
col2 = get_column_from_file(file2, m)
with open(file3, "w") as f:
cwriter = csv.writer(f)
cwriter.writerows(zip(col1, col2))
def get_column_from_file(filepath, n):
with open(filepath) as f:
creader = csv.reader(f)
return [row[n] for row in creader]
paste("stocks_quoted_with_header.csv", 0, "stocks_quoted_with_header.csv", 2, "paste.csv")
!cat paste.csv
name,high IBM,128 XYS,235 XYM,235 XYN,235 XYO,235
!pip install XlsxWriter
Collecting XlsxWriter Using cached XlsxWriter-3.0.3-py3-none-any.whl (149 kB) Installing collected packages: XlsxWriter Successfully installed XlsxWriter-3.0.3
import xlsxwriter
workbook = xlsxwriter.Workbook("Table.xlsx")
worksheet = workbook.add_worksheet()
rownumber = 0
columnnumber = 0
worksheet.write(rownumber, columnnumber, "Hello" )
workbook.close()
workbook = xlsxwriter.Workbook("Table_sheet.xlsx")
worksheet = workbook.add_worksheet("hello")# you can give name to worksheet
rownumber = 0
columnnumber = 0
worksheet.write(rownumber, columnnumber, "Hello" )
workbook.close()
workbook = xlsxwriter.Workbook("Table_A2.xlsx")
worksheet = workbook.add_worksheet("hello")# you can give name to worksheet
worksheet.write("A2", "Hello" ) # you can also give cell identifier instead of row, column
workbook.close()
import random
workbook = xlsxwriter.Workbook("Formula.xlsx")
worksheet = workbook.add_worksheet("numbers")# you can give name to worksheet
c = 2
for r in range(10): # will go from 0 to 9
worksheet.write(r, c, random.random()*100)
worksheet.write(10, c, "=SUM(C1:C10)")
workbook.close()
import random
workbook = xlsxwriter.Workbook("Formula2.xlsx")
worksheet = workbook.add_worksheet("numbers")# you can give name to worksheet
for r in range(1, 11): # will go from 0 to 9
worksheet.write(f"A{r}", random.random()*100)
worksheet.write("A11", "=SUM(A1:A10)") # column A and eleventh row
workbook.close()
workbook = xlsxwriter.Workbook("Bold.xlsx")
worksheet = workbook.add_worksheet("Hello")# you can give name to worksheet
bold = workbook.add_format({'bold': True})
worksheet.write("A1", "Hello", bold)
workbook.close()
workbook = xlsxwriter.Workbook("Number_format.xlsx")
worksheet = workbook.add_worksheet("Hello")# you can give name to worksheet
number_format = workbook.add_format({'num_format': '$#,##0'})
worksheet.write("A1", 1000, number_format)
workbook.close()