Module 2 - Day 5

problem

There is indexdata given as a list. Find weekly average for each stock.

  indexdata = [('IBM', 'Monday', 111.71436961893693),
          ('IBM', 'Tuesday', 141.21220022208635),
          ('IBM', 'Wednesday', 112.40571010053796),
          ('IBM', 'Thursday', 137.54133351926248),
          ('IBM', 'Friday', 140.25154281801224),
          ('MICROSOFT', 'Monday', 235.0403622499107),
          ('MICROSOFT', 'Tuesday', 225.0206535036475),
          ('MICROSOFT', 'Wednesday', 216.10342426936444),
          ('MICROSOFT', 'Thursday', 200.38038844494193),
          ('MICROSOFT', 'Friday', 235.80850482793264),
          ('APPLE', 'Monday', 321.49182055844256),
          ('APPLE', 'Tuesday', 340.63612771662815),
          ('APPLE', 'Wednesday', 303.9065277507285),
          ('APPLE', 'Thursday', 338.1350605764038),
          ('APPLE', 'Friday', 318.3912296144338)]


def mean(nums):
    return sum(nums)/len(nums)

def find_average(symbol, data):
    prices = [price for symbol_, day, price in data if symbol == symbol_] # 
    return mean(prices)

def weekly_average(data):
    weekly_avg = {}
    for symbol, day, price in data:
        avg = find_average(symbol, data)
        weekly_avg[symbol] = avg

    return weekly_avg

indexdata = [('IBM', 'Monday', 111.71436961893693),
          ('IBM', 'Tuesday', 141.21220022208635),
          ('IBM', 'Wednesday', 112.40571010053796),
          ('IBM', 'Thursday', 137.54133351926248),
          ('IBM', 'Friday', 140.25154281801224),
          ('MICROSOFT', 'Monday', 235.0403622499107),
          ('MICROSOFT', 'Tuesday', 225.0206535036475),
          ('MICROSOFT', 'Wednesday', 216.10342426936444),
          ('MICROSOFT', 'Thursday', 200.38038844494193),
          ('MICROSOFT', 'Friday', 235.80850482793264),
          ('APPLE', 'Monday', 321.49182055844256),
          ('APPLE', 'Tuesday', 340.63612771662815),
          ('APPLE', 'Wednesday', 303.9065277507285),
          ('APPLE', 'Thursday', 338.1350605764038),
          ('APPLE', 'Friday', 318.3912296144338)]
weekly_average(indexdata)

{'IBM': 128.62503125576717,
 'MICROSOFT': 222.47066665915946,
 'APPLE': 324.51215324332736}

def get_symbols(data):
    return [symbol for symbol, day, price in data]

def unique(items):
    return set(items)

def weekly_average(data):
    weekly_avg = {}
    symbols = unique(get_symbols(data))
    for symbol in symbols:
        avg = find_average(symbol, data)
        weekly_avg[symbol] = avg

    return weekly_avg

weekly_average(indexdata)

{'APPLE': 324.51215324332736,
 'IBM': 128.62503125576717,
 'MICROSOFT': 222.47066665915946}

def weekly_average(data):
    symbols = unique(get_symbols(data))
    return {find_average(symbol, data) for symbol in symbols}

weekly_average(indexdata)

{128.62503125576717, 222.47066665915946, 324.51215324332736}

def weekly_average(data):
    symbols = unique(get_symbols(data))
    return {symbol:find_average(symbol, data) for symbol in symbols}

weekly_average(indexdata)

{'APPLE': 324.51215324332736,
 'IBM': 128.62503125576717,
 'MICROSOFT': 222.47066665915946}

a = weekly_average(indexdata)

{'APPLE': 324.51215324332736,
 'IBM': 128.62503125576717,
 'MICROSOFT': 222.47066665915946}

2 + 3

a = 2 + 3

Writing Excel files with xlsxwriter

xlsxwriter is third party library (it does not come with python by default) which allows us to write excel files from python.

Detailed documentation can be seen here

Documentation

import xlsxwriter

Using cell identifier


workbook = xlsxwriter.Workbook("sample.xlsx")
sheet = workbook.add_worksheet()
sheet.write("A1", "Test text") #cellid, data
workbook.close()

Using row number and column number

workbook = xlsxwriter.Workbook("another_sample.xlsx")
sheet = workbook.add_worksheet("mydata")
sheet.write(0,2, "Some data to test") # row number, column number , data. row/columnn numbers start at zero
workbook.close()

problem

Write a function write_tabular_data which takes a 2D list and writes in in excel file

>>> write_tabular_data(data, excelfilename)

indexdata

[('IBM', 'Monday', 111.71436961893693),
 ('IBM', 'Tuesday', 141.21220022208635),
 ('IBM', 'Wednesday', 112.40571010053796),
 ('IBM', 'Thursday', 137.54133351926248),
 ('IBM', 'Friday', 140.25154281801224),
 ('MICROSOFT', 'Monday', 235.0403622499107),
 ('MICROSOFT', 'Tuesday', 225.0206535036475),
 ('MICROSOFT', 'Wednesday', 216.10342426936444),
 ('MICROSOFT', 'Thursday', 200.38038844494193),
 ('MICROSOFT', 'Friday', 235.80850482793264),
 ('APPLE', 'Monday', 321.49182055844256),
 ('APPLE', 'Tuesday', 340.63612771662815),
 ('APPLE', 'Wednesday', 303.9065277507285),
 ('APPLE', 'Thursday', 338.1350605764038),
 ('APPLE', 'Friday', 318.3912296144338)]

xlsxwriter.workbook # this is not what we want to create a wrokbook
xlsxwriter.Workbook # this is what we want!

xlsxwriter.workbook.Workbook

Iteration patterns

data = "Helllo some words for test".split()

for i, w in enumerate(data):
    print(i, w)

0 Helllo
1 some
2 words
3 for
4 test

Question

Can we edit existing excel documents ? Yes , but with another library (openpyxl)

def write_tabular_data(data, filepath):
    workbook = xlsxwriter.Workbook(filepath)
    sheet = workbook.add_worksheet()

    for i, row in enumerate(data):
        symbol, day, price = row
        sheet.write(i, 0, symbol)
        sheet.write(i, 1, day)
        sheet.write(i, 2, price)

    workbook.close()

write_tabular_data(indexdata, "index.xlsx")

def write_tabular_data(data, filepath):
    workbook = xlsxwriter.Workbook(filepath)
    sheet = workbook.add_worksheet()

    for i, row in enumerate(data):
        for j, coldata in enumerate(row):
            sheet.write(i, j, coldata)
    workbook.close()

write_tabular_data(indexdata, "indexdata.xlsx")

def write_tabular_data_with_headers(data, headers, filepath):
    workbook = xlsxwriter.Workbook(filepath)
    sheet = workbook.add_worksheet()

    for i, h in enumerate(headers):
        sheet.write(0, i, h)
    
    for i, row in enumerate(data, start=1): #row has to start at 1 now
        for j, coldata in enumerate(row):
            sheet.write(i, j, coldata)
    workbook.close()

write_tabular_data_with_headers(indexdata, ['Symbol', 'Day',  'Price'], "with_header.xlsx")

Making some cells bold

workbook = xlsxwriter.Workbook("Bold.xlsx")
worksheet = workbook.add_worksheet("hello")
bold = workbook.add_format({"bold":True})

worksheet.write("A1", "Hello", bold) 
worksheet.write(1, 1, "World", bold)
workbook.close()

def write_tabular_data_with_headers(data, headers, filepath):
    workbook = xlsxwriter.Workbook(filepath)
    sheet = workbook.add_worksheet()
    bold = workbook.add_format({"bold":True})
    
    for i, h in enumerate(headers):
        sheet.write(0, i, h, bold)
    
    for i, row in enumerate(data, start=1): #row has to start at 1 now
        for j, coldata in enumerate(row):
            sheet.write(i, j, coldata) # this suport only text and numeric data
    workbook.close()

write_tabular_data_with_headers(indexdata, ['Symbol', 'Day',  'Price'], "bold_headers.xlsx") # current working directory

folder = "/home/jupyter-vikrant/test"
filename = "bold_headers.xlsx"
filepath = os.path.join(folder, filename)
print(filepath)
write_tabular_data_with_headers(indexdata, ['Symbol', 'Day',  'Price'],filepath)

/home/jupyter-vikrant/test/bold_headers.xlsx

Any where you give just filename instaed of filepath it will be taken as file in current working directory

number format

workbook = xlsxwriter.Workbook("number_format.xlsx")
worksheet = workbook.add_worksheet()
number_format = workbook.add_format({"num_format": "$#,##0"})
worksheet.write("A1", 1000, number_format)
workbook.close()

Formula

import random

workbook = xlsxwriter.Workbook("formula.xlsx")
worksheet = workbook.add_worksheet("numbers")

for i, n in enumerate(range(10)):
    worksheet.write(i, 0, n)

worksheet.write(10, 0, "=SUM(A1:A10)")
workbook.close()

problem

Write a function write_multiplication_tables which writes multiplication tables upto n in a excel file

>>> write_multiplication_tables(10, "tables.xlsx")

divide it into subproblems 1. generate tables , write a function generate_tables! 2. write tables using our existing write_tabular_data 3. then combine it in a single function write_multiplication_tables

[2, 4, 6, 8, 10, 12, 14, 16, 18, 20]


def generate_table(n):
    return [n*i for i in range(1, 11)]

list(range(2, 2*11, 2))

[2, 4, 6, 8, 10, 12, 14, 16, 18, 20]

def multipication_tables(n):
    return [generate_table(i) for i in range(1, n+1)]

multipication_tables(7)

[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
 [2, 4, 6, 8, 10, 12, 14, 16, 18, 20],
 [3, 6, 9, 12, 15, 18, 21, 24, 27, 30],
 [4, 8, 12, 16, 20, 24, 28, 32, 36, 40],
 [5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
 [6, 12, 18, 24, 30, 36, 42, 48, 54, 60],
 [7, 14, 21, 28, 35, 42, 49, 56, 63, 70]]

def write_multiplication_tables(n, filename):
    tables = multipication_tables(n)
    write_tabular_data(tables, filename)

write_multiplication_tables(14, "multtables.xlsx")

tables = multipication_tables(8)

tables

[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
 [2, 4, 6, 8, 10, 12, 14, 16, 18, 20],
 [3, 6, 9, 12, 15, 18, 21, 24, 27, 30],
 [4, 8, 12, 16, 20, 24, 28, 32, 36, 40],
 [5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
 [6, 12, 18, 24, 30, 36, 42, 48, 54, 60],
 [7, 14, 21, 28, 35, 42, 49, 56, 63, 70],
 [8, 16, 24, 32, 40, 48, 56, 64, 72, 80]]

def multipication_tables(start=1, end=10):
    return [generate_table(i) for i in range(start, end+1)]

multipication_tables(3, 13)

[[3, 6, 9, 12, 15, 18, 21, 24, 27, 30],
 [4, 8, 12, 16, 20, 24, 28, 32, 36, 40],
 [5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
 [6, 12, 18, 24, 30, 36, 42, 48, 54, 60],
 [7, 14, 21, 28, 35, 42, 49, 56, 63, 70],
 [8, 16, 24, 32, 40, 48, 56, 64, 72, 80],
 [9, 18, 27, 36, 45, 54, 63, 72, 81, 90],
 [10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
 [11, 22, 33, 44, 55, 66, 77, 88, 99, 110],
 [12, 24, 36, 48, 60, 72, 84, 96, 108, 120],
 [13, 26, 39, 52, 65, 78, 91, 104, 117, 130]]

data2d = multipication_tables(3, 13)

data2d[0] # what is this?
data2d[0][0]

data2d[1][0]

data2d[2][0]

data2d[2][0] # first index is row number and column number

def column(data, c):
    rowcount = len(data)
    return [data[i][c] for i in range(rowcount)]

column(data2d, 0)

[3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]

data2d

[[3, 6, 9, 12, 15, 18, 21, 24, 27, 30],
 [4, 8, 12, 16, 20, 24, 28, 32, 36, 40],
 [5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
 [6, 12, 18, 24, 30, 36, 42, 48, 54, 60],
 [7, 14, 21, 28, 35, 42, 49, 56, 63, 70],
 [8, 16, 24, 32, 40, 48, 56, 64, 72, 80],
 [9, 18, 27, 36, 45, 54, 63, 72, 81, 90],
 [10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
 [11, 22, 33, 44, 55, 66, 77, 88, 99, 110],
 [12, 24, 36, 48, 60, 72, 84, 96, 108, 120],
 [13, 26, 39, 52, 65, 78, 91, 104, 117, 130]]

column(data2d, 1)

[6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26]

data2d

[[3, 6, 9, 12, 15, 18, 21, 24, 27, 30],
 [4, 8, 12, 16, 20, 24, 28, 32, 36, 40],
 [5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
 [6, 12, 18, 24, 30, 36, 42, 48, 54, 60],
 [7, 14, 21, 28, 35, 42, 49, 56, 63, 70],
 [8, 16, 24, 32, 40, 48, 56, 64, 72, 80],
 [9, 18, 27, 36, 45, 54, 63, 72, 81, 90],
 [10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
 [11, 22, 33, 44, 55, 66, 77, 88, 99, 110],
 [12, 24, 36, 48, 60, 72, 84, 96, 108, 120],
 [13, 26, 39, 52, 65, 78, 91, 104, 117, 130]]

def transpose(data):
    colcount = len(data[0]) # assume that every row has same number of columns
    return [column(data, i) for i in range(colcount)]

transpose(data2d)

[[3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
 [6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26],
 [9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39],
 [12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52],
 [15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65],
 [18, 24, 30, 36, 42, 48, 54, 60, 66, 72, 78],
 [21, 28, 35, 42, 49, 56, 63, 70, 77, 84, 91],
 [24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104],
 [27, 36, 45, 54, 63, 72, 81, 90, 99, 108, 117],
 [30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130]]

def column(data, c):
    """extract c'th column from 2d data""" 
    return [row[c] for row in data]

column(data2d, 5)

[18, 24, 30, 36, 42, 48, 54, 60, 66, 72, 78]

data2d = multipication_tables(1, 13)

column(data2d, 1)

[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26]

column(data2d, 0)

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]

def write_multiplication_tables(n, filename):
    tables = multipication_tables(1, n)
    tables_ = transpose(tables)
    write_tabular_data(tables_, filename)

write_multiplication_tables(17, "17tables.xlsx")