Python Virtual Training For Arcesium - Module II - Day 5¶

Jun Jul 18-22, 2022 Vikrant Patil

All notes are available online at https://notes.pipal.in/2022/arcesium_finop_batch1/

Please accept the invitation that you have received in your email and login to

https://engage.pipal.in/

From there launch your jupyter lab. Create a notebook with name module2-day5.

Shutdown all older notebooks so that load on server reduces.

© Pipal Academy LLP

Wroking with csv file¶

In [19]:
%%file stocks_quoted.csv
"IBM","125","128","123"
"XYS","234","235","233"
"XYM","234","235","233"
"XYN","234","235","233"
"XYO","234","235","233"
Overwriting stocks_quoted.csv
In [2]:
import csv
In [3]:
def read_csv(filepath):
    with open(filepath) as f:
        creader = csv.reader(f)
        
        for fields in creader:
            print(fields)
In [4]:
read_csv("stocks_quoted.csv")
['IBM', '125', '128', '123']
['XYS', '234', '235', '233']
['XYM', '234', '235', '233']
['XYN', '234', '235', '233']
['XYO', '234', '235', '233']
In [12]:
fieldtypes = [str, int, int, float] # this is list of functions to convert into appropriate type

def read_csv_with_type(filepath, fieldtypes):
    with open(filepath) as f:
        creader = csv.reader(f)
        data = []
        for fields in creader: # you get data as fields of every row
            converted_fields = [converter(field) for converter, field in zip(fieldtypes, fields)]
            data.append(converted_fields)
            
        return data
In [6]:
fieldtypes = [str, int, int, float] 
read_csv_with_type("stocks_quoted.csv", fieldtypes)
Out[6]:
[['IBM', 125, 128, 123.0],
 ['XYS', 234, 235, 233.0],
 ['XYM', 234, 235, 233.0],
 ['XYN', 234, 235, 233.0],
 ['XYO', 234, 235, 233.0]]
In [7]:
csv_data = read_csv_with_type("stocks_quoted.csv", fieldtypes)
In [8]:
csv_data[0]
Out[8]:
['IBM', 125, 128, 123.0]
In [9]:
csv_data[-1]
Out[9]:
['XYO', 234, 235, 233.0]
In [10]:
[row for row in csv_data if row[1] > 200]
Out[10]:
[['XYS', 234, 235, 233.0],
 ['XYM', 234, 235, 233.0],
 ['XYN', 234, 235, 233.0],
 ['XYO', 234, 235, 233.0]]
In [11]:
[row for row in csv_data if row[0] == 'IBM']
Out[11]:
[['IBM', 125, 128, 123.0]]
In [14]:
%%file stocks_unquoted.csv
IBM,125,128,123
XYS,234,235,233
XYM,234,235,233
XYN,234,235,233
XYO,234,235,233
Overwriting stocks_unquoted.csv
In [15]:
read_csv_with_type("stocks_unquoted.csv", fieldtypes)
Out[15]:
[['IBM', 125, 128, 123.0],
 ['XYS', 234, 235, 233.0],
 ['XYM', 234, 235, 233.0],
 ['XYN', 234, 235, 233.0],
 ['XYO', 234, 235, 233.0]]
In [32]:
%%file stocks_quoted_with_header.csv
"name","value","high","low,0"
"IBM","125","128","123"
"XYS","234","235","233"
"XYM","234","235","233"
"XYN","234","235","233"
"XYO","234","235","233"
Overwriting stocks_quoted_with_header.csv
In [33]:
f = open("stocks_quoted_with_header.csv")
creader = csv.reader(f)
    
In [34]:
next(creader)
Out[34]:
['name', 'value', 'high', 'low,0']
In [35]:
next(creader)
Out[35]:
['IBM', '125', '128', '123']
In [36]:
next(creader)
Out[36]:
['XYS', '234', '235', '233']
In [37]:
for fields in creader: # it will go over remaining items
    print(fields)
['XYM', '234', '235', '233']
['XYN', '234', '235', '233']
['XYO', '234', '235', '233']
In [38]:
def read_csv(filepath, fieldtypes, header=True):
    with open(filepath) as f:
        creader = csv.reader(f)
        
        if header:
            column_headers = next(creader)
        data = []
        for fields in creader:
            data.append([convert(field) for convert, field in zip(fieldtypes, fields)])
            
        return data
In [40]:
read_csv("stocks_quoted_with_header.csv", fieldtypes)
Out[40]:
[['IBM', 125, 128, 123.0],
 ['XYS', 234, 235, 233.0],
 ['XYM', 234, 235, 233.0],
 ['XYN', 234, 235, 233.0],
 ['XYO', 234, 235, 233.0]]
In [41]:
with open("stocks_quoted_with_header.csv") as f:
    creader = csv.DictReader(f) # first row as column header
    for fields in creader:
        print(fields)
{'name': 'IBM', 'value': '125', 'high': '128', 'low,0': '123'}
{'name': 'XYS', 'value': '234', 'high': '235', 'low,0': '233'}
{'name': 'XYM', 'value': '234', 'high': '235', 'low,0': '233'}
{'name': 'XYN', 'value': '234', 'high': '235', 'low,0': '233'}
{'name': 'XYO', 'value': '234', 'high': '235', 'low,0': '233'}
In [42]:
def get_column(filepath, columnname):
    with open(filepath) as f:
        creader = csv.DictReader(f)
        return [row[columnname] for row in creader]
In [43]:
get_column("stocks_quoted_with_header.csv", "name")
Out[43]:
['IBM', 'XYS', 'XYM', 'XYN', 'XYO']
In [44]:
get_column("stocks_quoted_with_header.csv", "low,0")
Out[44]:
['123', '233', '233', '233', '233']
In [45]:
get_column("stocks_quoted_with_header.csv", "value")
Out[45]:
['125', '234', '234', '234', '234']
In [46]:
def get_column(filepath, columnname, converter):
    with open(filepath) as f:
        creader = csv.DictReader(f)
        return [converter(row[columnname]) for row in creader]
In [47]:
get_column("stocks_quoted_with_header.csv", "value", float)
Out[47]:
[125.0, 234.0, 234.0, 234.0, 234.0]
In [48]:
get_column("stocks_quoted_with_header.csv", "value", int)
Out[48]:
[125, 234, 234, 234, 234]
In [69]:
fieldtypes = {'value':float,
             'high':int}

def read_csv_as_columns(filepath, fieldconverters):
    with open(filepath) as f:
        creader = csv.DictReader(f)
        
        data = {}
        for row in creader: # for every row you get a dictionary
            
            for key in row: # going over every column
                
                if key in fieldconverters: # do I have a converter for this column
                    convert = fieldconverters[key]
                    data.setdefault(key,[]).append(convert(row[key]))
                else:
                    data.setdefault(key,[]).append(row[key])
            
        return data
In [67]:
columndata = read_csv_as_columns("stocks_quoted_with_header.csv", fieldtypes)
In [68]:
columndata['name']
Out[68]:
['IBM', 'XYS', 'XYM', 'XYN', 'XYO']
In [56]:
columndata['value']
Out[56]:
[125.0, 234.0, 234.0, 234.0, 234.0]
In [62]:
d = {}

d.setdefault("value", []).append(5)
In [63]:
d
Out[63]:
{'value': [5]}
In [64]:
d.setdefault("value", []).append(10)
In [65]:
d
Out[65]:
{'value': [5, 10]}
In [70]:
stock = {"name":"IBM", "value":125}
In [71]:
stock['low']
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
Input In [71], in <cell line: 1>()
----> 1 stock['low']

KeyError: 'low'
In [72]:
stock.setdefault('low', 123)
Out[72]:
123
In [73]:
stock
Out[73]:
{'name': 'IBM', 'value': 125, 'low': 123}
In [79]:
# consider fields from csv file that we were working on
columnsdata = {"name":[],
              "value":[],
              "high":[],
              "low":[]}

columnsdata['name'].append("IBM")
columnsdata['value'].append(125)
columnsdata['high'].append(130)
columnsdata['low'].append(123)
In [77]:
columnsdata
Out[77]:
{'name': ['IBM'], 'value': [125], 'high': [130], 'low': [123]}
In [78]:
cdata = {}

cdata.setdefault('name', []).append('IBM')
cdata.setdefault('value', []).append(125)
cdata.setdefault('high', []).append(130)
cdata.setdefault('low', []).append(123)
cdata
Out[78]:
{'name': ['IBM'], 'value': [125], 'high': [130], 'low': [123]}

Writing csv files¶

In [80]:
def write_csv(data, header, filename):
    with open(filename, "w") as f:
        cwriter = csv.writer(f)
        cwriter.writerow(header) # this will write single row
        cwriter.writerows(data) # this will write multiple rows a a time
In [83]:
data = [['IBM', 125, 128, 123.0],
 ['XYS', 234, 235, 233.0],
 ['XYM', 234, 235, 233.0],
 ['XYN', 234, 235, 233.0],
 ['XYO', 234, 235, 233.0]]
In [84]:
columnnames = ['name', 'value', 'high', 'low']
In [85]:
write_csv(data, columnnames, "csvdata.csv")
In [86]:
!python cat.py csvdata.csv
name,value,high,low
IBM,125,128,123.0
XYS,234,235,233.0
XYM,234,235,233.0
XYN,234,235,233.0
XYO,234,235,233.0

In [87]:
data.insert(0,["name","value","high","low"])
In [89]:
data # this will not work with above function
Out[89]:
[['name', 'value', 'high', 'low'],
 ['IBM', 125, 128, 123.0],
 ['XYS', 234, 235, 233.0],
 ['XYM', 234, 235, 233.0],
 ['XYN', 234, 235, 233.0],
 ['XYO', 234, 235, 233.0]]
In [90]:
write_csv(data[1:], data[0], "test.csv")
In [91]:
!cat test.csv
name,value,high,low
IBM,125,128,123.0
XYS,234,235,233.0
XYM,234,235,233.0
XYN,234,235,233.0
XYO,234,235,233.0
In [92]:
write_csv(data, columnnames, "csvdata.csv")
In [93]:
!cat csvdata.csv
name,value,high,low
name,value,high,low
IBM,125,128,123.0
XYS,234,235,233.0
XYM,234,235,233.0
XYN,234,235,233.0
XYO,234,235,233.0
In [95]:
# write a function get_column_from_file to get nth column from a file

def paste(file1, n, file2, m, file3):
    col1 = get_column_from_file(file1, n)
    col2 = get_column_from_file(file2, m)
    
    with open(file3, "w") as f:
        cwriter = csv.writer(f)
        cwriter.writerows(zip(col1, col2))
In [101]:
def get_column_from_file(filepath, n):
    with open(filepath) as f:
        creader = csv.reader(f)
        return [row[n] for row in creader]
In [99]:
paste("stocks_quoted_with_header.csv", 0, "stocks_quoted_with_header.csv", 2, "paste.csv")
In [100]:
!cat paste.csv
name,high
IBM,128
XYS,235
XYM,235
XYN,235
XYO,235

Writing excel files using XlsxWriter¶

In [102]:
!pip install XlsxWriter
Collecting XlsxWriter
  Using cached XlsxWriter-3.0.3-py3-none-any.whl (149 kB)
Installing collected packages: XlsxWriter
Successfully installed XlsxWriter-3.0.3
In [103]:
import xlsxwriter

Simple row,column¶

In [104]:
workbook = xlsxwriter.Workbook("Table.xlsx")
worksheet = workbook.add_worksheet()

rownumber = 0
columnnumber = 0
worksheet.write(rownumber, columnnumber, "Hello" )
workbook.close()

Custom sheet name¶

In [105]:
workbook = xlsxwriter.Workbook("Table_sheet.xlsx")
worksheet = workbook.add_worksheet("hello")# you can give name to worksheet

rownumber = 0
columnnumber = 0
worksheet.write(rownumber, columnnumber, "Hello" )
workbook.close()

With cell identifier¶

In [106]:
workbook = xlsxwriter.Workbook("Table_A2.xlsx")
worksheet = workbook.add_worksheet("hello")# you can give name to worksheet

worksheet.write("A2", "Hello" ) # you can also give cell identifier instead of row, column
workbook.close()

Formula¶

In [107]:
import random
workbook = xlsxwriter.Workbook("Formula.xlsx")
worksheet = workbook.add_worksheet("numbers")# you can give name to worksheet

c = 2
for r in range(10): # will go from 0 to 9
    worksheet.write(r, c, random.random()*100)
    
worksheet.write(10, c, "=SUM(C1:C10)")
workbook.close()
In [110]:
import random
workbook = xlsxwriter.Workbook("Formula2.xlsx")
worksheet = workbook.add_worksheet("numbers")# you can give name to worksheet

for r in range(1, 11): # will go from 0 to 9
    worksheet.write(f"A{r}", random.random()*100)
    
worksheet.write("A11", "=SUM(A1:A10)") # column A and eleventh row
workbook.close()

Make some cell bold¶

In [116]:
workbook = xlsxwriter.Workbook("Bold.xlsx")
worksheet = workbook.add_worksheet("Hello")# you can give name to worksheet
    
bold = workbook.add_format({'bold': True})

worksheet.write("A1", "Hello", bold)
workbook.close()

Add number formating to a cell¶

In [115]:
workbook = xlsxwriter.Workbook("Number_format.xlsx")
worksheet = workbook.add_worksheet("Hello")# you can give name to worksheet
    
number_format = workbook.add_format({'num_format': '$#,##0'})

worksheet.write("A1", 1000, number_format)
workbook.close()

For more complex stuff you can have a look at this library documentation

In [ ]: