def average_rows_of_a_file(filepath):
    """Takes filepath as argument and returns list of average values for each row.
    """
    with open(filepath) as f:
        averages = []
        for line in f:
            row = extract_row(line)
            avg = mean(row)
            averages.append(avg)
            
        return averages
    
def extract_row(line):
    return [float(item) for item in line.strip().split(",")]# strip will make sure that \n at end is removed

def mean(row):
    return sum(row)/len(row)


mean([1, 2, 3, 4, 5])

3.0


extract_row("23,45,4.6,56,34") # this testing is incomplete! it does not have \n at end

[23.0, 45.0, 4.6, 56.0, 34.0]


extract_row("23,45,4.6,56,34\n")

[23.0, 45.0, 4.6, 56.0, 34.0]


extract_row("23,45,4.6,56,34    \n")

[23.0, 45.0, 4.6, 56.0, 34.0]


%%file numeric_values.txt
177.86307848009363,168.57970829672172,56.64667509485027,140.9453091498533,7.60588098031354,35.46585201959433,109.79299526934932,127.73481373344411,142.6569862535187,55.42226271294348
50.73110854833257,37.04770129180774,134.12303059207613,24.094715946706696,16.62709152318884,76.79158320138639,18.869039939685138,55.16544658992794,9.080542932129527,86.33584190856514
78.13874487140086,6.230686793261686,9.90129561402988,38.73432202944475,28.24102532900047,178.32675126964756,156.91807110256244,13.715926468541555,40.65402349504774,56.26057091162304
52.33390135982785,82.18187238357866,165.98732406823763,16.24849453464723,134.6940059960185,43.074586969351294,164.98109061612234,49.01998365034387,46.47274837268185,71.57223840684533
14.054024611303051,33.26061264583661,6.025038296009622,46.20617825321064,20.36303569589534,76.36142148522664,35.7247427244146,96.95015828855182,14.591002411820476,114.73930165632399
176.98106147487977,69.94975607322517,118.2574420888458,40.0226187414381,65.86533365890166,112.00582375498792,111.92750266225774,26.795263824303984,113.78920957808114,129.28326476993482
35.08017332225934,49.66626098662552,97.1770522711201,103.31251256854945,45.385340645578644,70.73296754777625,124.0599818449227,3.1152276402996506,189.621114798281,26.984391777865575
47.86347022018917,143.08376550789785,48.86114868739707,86.02253627471258,57.673544870935736,85.46964903214331,88.72038289528996,81.1572932565442,93.33330250009122,73.21497272029562
40.548227987191765,3.3788760416886054,45.81322292884595,18.41416684775437,0.4957688331691745,151.46049182954297,74.50089155354308,17.159975671658877,25.732237076201475,22.157154152625157
107.43073464586683,28.781131842076306,46.961173143962576,20.468161528556195,5.099354269264699,35.01542373037388,28.907504914509495,20.089203534798646,56.800621137020045,143.02788557795452

Writing numeric_values.txt


average_rows_of_a_file("numeric_values.txt")

[102.27135619906822,
 50.8866102473806,
 60.712141788455995,
 82.65662463576545,
 45.827551606859274,
 96.48772766268561,
 74.51350234032782,
 80.54000659654967,
 39.96610129222215,
 49.25811943243832]


import os
os.getcwd()

'/home/vikrant/trainings/2022/arcesium_finop_batch1'


average_rows_of_a_file('/home/vikrant/trainings/2022/arcesium_finop_batch1/numeric_values.txt')

[102.27135619906822,
 50.8866102473806,
 60.712141788455995,
 82.65662463576545,
 45.827551606859274,
 96.48772766268561,
 74.51350234032782,
 80.54000659654967,
 39.96610129222215,
 49.25811943243832]


text = "    hello world    "


text.rstrip()

'    hello world'


text.lstrip()

'hello world    '


text.strip() # spaces which are not at end are not removed, only tailing spaces will be removed

'hello world'


stock = {"name":"IBM", "value":125, "high":127, "low":123}


stock

{'name': 'IBM', 'value': 125, 'high': 127, 'low': 123}


stock['name']

'IBM'


stock['value'] = 124


stock

{'name': 'IBM', 'value': 124, 'high': 127, 'low': 123}


stock['quantity']

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
Input In [31], in <cell line: 1>()
----> 1 stock['quantity']

KeyError: 'quantity'


stock.get('name') # will work same as stock['name']

'IBM'


stock.get('quantity') # this will not fail..no error


print(stock.get('quantity'))

None


stock.get('quantity', 0)

0


stock.get('quantity', 0) #if the key 'quantity` does not exist in stock dictionary then return the value for this key as 0

0


stock.get('quantity') # if the second argument is not given, it will return None


stock.get('exchange', "NYC") # you can choose your own value

'NYC'


stock

{'name': 'IBM', 'value': 124, 'high': 127, 'low': 123}


stock.setdefault('quantity', 0) # it will return 0 as value but also set it

0


stock

{'name': 'IBM', 'value': 124, 'high': 127, 'low': 123, 'quantity': 0}


%%file stocks.csv
IBM,125,128,123
XYS,234,235,233,4
XYM,234,235,233
XYN,234,235,233,10
XYO,234,235,233,15

Overwriting stocks.csv


values = ["IBM",125,128,123]
keys = ["name", "value", "high", "low"]


# compare this with list comprehensions loop!
data = {} # it is similar except that there is dictionary here!
for key, value in zip(keys, values):
    data[key] = value


data

{'name': 'IBM', 'value': 125, 'high': 128, 'low': 123}


{key:value for key,value in zip(keys, values)}

{'name': 'IBM', 'value': 125, 'high': 128, 'low': 123}


# compare this with list comprehensions loop!
data = {} # it is similar except that there is dictionary here!
length = len(values)
for i in range(length):
    data[keys[i]] = values[i]


data

{'name': 'IBM', 'value': 125, 'high': 128, 'low': 123}


def load_stocks_data(filename):
    keys = ["name", "value", "high", "low", 'quantity']
    with open(filename) as f:
        filedata = []
        for line in f:
            items = line.strip().split(",")
            values = [items[0]] + [float(v) for v in items[1:]] # 0th item is string so it is handled seperately
            data = {key:value for key,value in zip(keys, values)}
            filedata.append(data)
    return filedata


stocksdata = load_stocks_data("stocks.csv")


stocksdata[0]

{'name': 'IBM', 'value': 125.0, 'high': 128.0, 'low': 123.0}


sum([stock['value'] for stock in stocksdata])

1061.0


sum([stock['quantity'] for stock in stocksdata])

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
Input In [86], in <cell line: 1>()
----> 1 sum([stock['quantity'] for stock in stocksdata])

Input In [86], in <listcomp>(.0)
----> 1 sum([stock['quantity'] for stock in stocksdata])

KeyError: 'quantity'


sum([stock.get('quantity', 0) for stock in stocksdata])

29.0


sum([stock.get("quantity", 0)*stock['value'] for stock in stocksdata])

6786.0


dict(zip(keys, values))

{'name': 'IBM', 'value': 125, 'high': 128, 'low': 123}


prices = [('IBM', 'Monday', 111.71436961893693),
            ('IBM', 'Tuesday', 141.21220022208635),
            ('IBM', 'Wednesday', 112.40571010053796),
            ('IBM', 'Thursday', 137.54133351926248),
            ('IBM', 'Friday', 140.25154281801224),
            ('MICROSOFT', 'Monday', 235.0403622499107),
            ('MICROSOFT', 'Tuesday', 225.0206535036475),
            ('MICROSOFT', 'Wednesday', 216.10342426936444),
            ('MICROSOFT', 'Thursday', 200.38038844494193),
            ('MICROSOFT', 'Friday', 235.80850482793264),
            ('APPLE', 'Monday', 321.49182055844256),
            ('APPLE', 'Tuesday', 340.63612771662815),
            ('APPLE', 'Wednesday', 303.9065277507285),
            ('APPLE', 'Thursday', 338.1350605764038),
            ('APPLE', 'Friday', 318.3912296144338)]


def weekly_average(prices, symbol):
    return mean([price for name, day, price in prices if name==symbol])


weekly_average(prices, "MICROSOFT")

222.47066665915946


symbols = set([symbol for symbol,day, value in prices])


symbols # it is a set .. it does not have key:value pair... it is not a dictonary

{'APPLE', 'IBM', 'MICROSOFT'}


symbols.add("APPLE") # uniqueness


symbols

{'APPLE', 'IBM', 'MICROSOFT'}


ones = [1, 1, 1, 1]


ones.append(1)


ones

[1, 1, 1, 1, 1]


help(symbols.pop) # it will remove arbitrary...not last

Help on built-in function pop:

pop(...) method of builtins.set instance
    Remove and return an arbitrary set element.
    Raises KeyError if the set is empty.


s = {1, 2, 3, 4, 5, 5}

s

{1, 2, 3, 4, 5}


for i in range(len(s)):
    print(s.pop())

s

set()


s = {1, 2, 3, 4}


s.remove(1)

s

{2, 3, 4}


nums = [1, 2, 3, 4]


nums.remove(2)


nums

[1, 3, 4]


symbols

{'APPLE', 'IBM', 'MICROSOFT'}


weekly_averages = {symbol:weekly_average(prices, symbol) for symbol in symbols}


weekly_averages

{'APPLE': 324.51215324332736,
 'MICROSOFT': 222.47066665915946,
 'IBM': 128.62503125576717}


stocks=  {'APPLE': 700.5,
          'IBM': 300.1,
          'AT&T': 355.7,
          'AGILENT': 600.3}


[i for i in range(100) if i%7==0 or i%11==0]

[0,
 7,
 11,
 14,
 21,
 22,
 28,
 33,
 35,
 42,
 44,
 49,
 55,
 56,
 63,
 66,
 70,
 77,
 84,
 88,
 91,
 98,
 99]


stocks=  {'APPLE': 700.5,
          'IBM': 300.1,
          'AT&T': 355.7,
          'AGILENT': 600.3}


{k:v for k,v in stocks.items() if v > 300}

{'APPLE': 700.5, 'IBM': 300.1, 'AT&T': 355.7, 'AGILENT': 600.3}


symbols

{'APPLE', 'IBM', 'MICROSOFT'}


{k:v for k,v in stocks.items() if k in symbols}

{'APPLE': 700.5, 'IBM': 300.1}


def generate_test_data(filename):
    words = ["one","two","three","four","five","six","seven","eight","nine","ten"]
    freq = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
    with open(filename, "w") as file:
        for w,f in zip(words, freq):
            file.write(",".join([w]*f))
            file.write("\n")


[1,2]*2

[1, 2, 1, 2]


["one"]*2

['one', 'one']


generate_test_data("words.csv")


!python cat.py words.csv

one
two,two
three,three,three
four,four,four,four
five,five,five,five,five
six,six,six,six,six,six
seven,seven,seven,seven,seven,seven,seven
eight,eight,eight,eight,eight,eight,eight,eight
nine,nine,nine,nine,nine,nine,nine,nine,nine
ten,ten,ten,ten,ten,ten,ten,ten,ten,ten


ones.count(1)

5


def get_all_words(filename):
    with open(filename) as f:
        
        return f.read().split()
    
def word_freq(filename):
    words = get_all_words(filename)
    
    unique_words = set(words)
    wordfreq = {}
    for w in unique_words:
        wordfreq[w] = words.count(w)
    return wordfreq


word_freq("words.csv")

{'two,two': 1,
 'four,four,four,four': 1,
 'eight,eight,eight,eight,eight,eight,eight,eight': 1,
 'ten,ten,ten,ten,ten,ten,ten,ten,ten,ten': 1,
 'three,three,three': 1,
 'six,six,six,six,six,six': 1,
 'nine,nine,nine,nine,nine,nine,nine,nine,nine': 1,
 'seven,seven,seven,seven,seven,seven,seven': 1,
 'five,five,five,five,five': 1,
 'one': 1}


get_all_words("words.csv")

['one',
 'two,two',
 'three,three,three',
 'four,four,four,four',
 'five,five,five,five,five',
 'six,six,six,six,six,six',
 'seven,seven,seven,seven,seven,seven,seven',
 'eight,eight,eight,eight,eight,eight,eight,eight',
 'nine,nine,nine,nine,nine,nine,nine,nine,nine',
 'ten,ten,ten,ten,ten,ten,ten,ten,ten,ten']


def get_all_words(filename):
    with open(filename) as f:
        words = []
        for line in f:
            words.extend(line.strip().split(","))
        return words
    
def word_freq(filename):
    words = get_all_words(filename)
    
    unique_words = set(words)
    wordfreq = {}
    for w in unique_words:
        wordfreq[w] = words.count(w)
    return wordfreq


word_freq("words.csv")

{'five': 5,
 'ten': 10,
 'six': 6,
 'seven': 7,
 'two': 2,
 'four': 4,
 'three': 3,
 'nine': 9,
 'eight': 8,
 'one': 1}


    
def word_freq1(filename):
    words = get_all_words(filename)
    
    wordfreq = {}
    for w in words:
        if w not in wordfreq:
            wordfreq[w] = 1
        else:
            wordfreq[w] += 1
    return wordfreq


word_freq1("words.csv")

{'one': 1,
 'two': 2,
 'three': 3,
 'four': 4,
 'five': 5,
 'six': 6,
 'seven': 7,
 'eight': 8,
 'nine': 9,
 'ten': 10}


# wordfreq[w]-> get/setdefault

def word_freq2(filename):
    words = get_all_words(filename)
    
    wordfreq = {}
    for w in words:
        wordfreq[w] = wordfreq.get(w, 0) + 1
    return wordfreq


word_freq2("words.csv")

{'one': 1,
 'two': 2,
 'three': 3,
 'four': 4,
 'five': 5,
 'six': 6,
 'seven': 7,
 'eight': 8,
 'nine': 9,
 'ten': 10}


for i in range(1, 11):
    print(i, i**2, i**3)

1 1 1
2 4 8
3 9 27
4 16 64
5 25 125
6 36 216
7 49 343
8 64 512
9 81 729
10 100 1000


"hello {name}!".format(name="vikrant")

'hello vikrant!'


"{x} {y} {z}".format(x=1, y=2, z=3)

'1 2 3'


x, y,z = 1, 2, 3
f"{x}, {y}, {z}"

'1, 2, 3'


"some string one {}, {}, {} thsese values".format(1, 2, 3)

'some string one 1, 2, 3 thsese values'


"some string one {0}, {2}, {1} thsese values".format("ABC", "XYZ", "MNO")

'some string one ABC, MNO, XYZ thsese values'


stocksdata

[{'name': 'IBM', 'value': 125.0, 'high': 128.0, 'low': 123.0},
 {'name': 'XYS', 'value': 234.0, 'high': 235.0, 'low': 233.0, 'quantity': 4.0},
 {'name': 'XYM', 'value': 234.0, 'high': 235.0, 'low': 233.0},
 {'name': 'XYN',
  'value': 234.0,
  'high': 235.0,
  'low': 233.0,
  'quantity': 10.0},
 {'name': 'XYO',
  'value': 234.0,
  'high': 235.0,
  'low': 233.0,
  'quantity': 15.0}]


for d in stocksdata:
    v = list(d.values())
    print("{},{},{},{}".format(v[0], v[1], v[2], v[3]))

IBM,125.0,128.0,123.0
XYS,234.0,235.0,233.0
XYM,234.0,235.0,233.0
XYN,234.0,235.0,233.0
XYO,234.0,235.0,233.0


for i in range(1, 11):
    print("{} {} {}".format(i, i**2, i**3))

1 1 1
2 4 8
3 9 27
4 16 64
5 25 125
6 36 216
7 49 343
8 64 512
9 81 729
10 100 1000


for i in range(1, 11):
    print("{c0} {c1} {c2}".format(c0=i, c1=i**2, c2=i**3))

1 1 1
2 4 8
3 9 27
4 16 64
5 25 125
6 36 216
7 49 343
8 64 512
9 81 729
10 100 1000


for i in range(1, 11):
    print("{c0:2d} {c1:3d} {c2:4d}".format(c0=i, c1=i**2, c2=i**3))

 1   1    1
 2   4    8
 3   9   27
 4  16   64
 5  25  125
 6  36  216
 7  49  343
 8  64  512
 9  81  729
10 100 1000


word_freq("words.csv")

{'five': 5,
 'ten': 10,
 'six': 6,
 'seven': 7,
 'two': 2,
 'four': 4,
 'three': 3,
 'nine': 9,
 'eight': 8,
 'one': 1}


freq = word_freq("words.csv")


freq

{'five': 5,
 'ten': 10,
 'six': 6,
 'seven': 7,
 'two': 2,
 'four': 4,
 'three': 3,
 'nine': 9,
 'eight': 8,
 'one': 1}


def get_freq(r):
    return r[1]
for w, f in sorted(freq.items(), key=get_freq):
    print(w, f)

one 1
two 2
three 3
four 4
five 5
six 6
seven 7
eight 8
nine 9
ten 10


for w, f in sorted(freq.items(), key=get_freq):
    print(w.rjust(5), f)

  one 1
  two 2
three 3
 four 4
 five 5
  six 6
seven 7
eight 8
 nine 9
  ten 10


for w, f in sorted(freq.items(), key=get_freq):
    print(w.rjust(5), "{:2d}".format(f), "*"*f)

  one  1 *
  two  2 **
three  3 ***
 four  4 ****
 five  5 *****
  six  6 ******
seven  7 *******
eight  8 ********
 nine  9 *********
  ten 10 **********


for w, f in sorted(freq.items()):
    print(w.rjust(5), "{:2d}".format(f), "*"*f)

eight  8 ********
 five  5 *****
 four  4 ****
 nine  9 *********
  one  1 *
seven  7 *******
  six  6 ******
  ten 10 **********
three  3 ***
  two  2 **


list(freq.items())

[('five', 5),
 ('ten', 10),
 ('six', 6),
 ('seven', 7),
 ('two', 2),
 ('four', 4),
 ('three', 3),
 ('nine', 9),
 ('eight', 8),
 ('one', 1)]


for w, f in sorted(freq.items(), key=get_freq, reverse=True):
    print(w.rjust(5), "{:2d}".format(f), "*"*f)

  ten 10 **********
 nine  9 *********
eight  8 ********
seven  7 *******
  six  6 ******
 five  5 *****
 four  4 ****
three  3 ***
  two  2 **
  one  1 *


freq

{'five': 5,
 'ten': 10,
 'six': 6,
 'seven': 7,
 'two': 2,
 'four': 4,
 'three': 3,
 'nine': 9,
 'eight': 8,
 'one': 1}


d = {'five': 5,
 'ten': 10,
 'six': 6,
 'seven': 7}

d

{'five': 5, 'ten': 10, 'six': 6, 'seven': 7}


freq

{'five': 5,
 'ten': 10,
 'six': 6,
 'seven': 7,
 'two': 2,
 'four': 4,
 'three': 3,
 'nine': 9,
 'eight': 8,
 'one': 1}


d.keys() & freq.keys()

{'five', 'seven', 'six', 'ten'}


import operator as op

def countifs(criterio_list, condstr):
    value = int("".join([c for c in condstr if c.isdigit()]))
    cond = "".join([c for c in condstr if not c.isdigit()])
    
    conds = [">", ">=", "<", "<=", "", "<>"]
    funcs = [op.gt, op.ge, op.lt, op.le, op.eq, op.ne]
    funcmap = dict(zip(conds, funcs))
    
    return len([item for item in criterio_list if funcmap[cond](item, value)])
    

countifs([10, 20, 10, 20, 30, 40, 50, 50, 60, 60], "<>60")

8


countifs([10, 20, 10, 20, 30, 40, 50, 50, 60, 60], ">60")

0


countifs([10, 20, 10, 20, 30, 40, 50, 50, 60, 60], "<60")

8


countifs([10, 20, 10, 20, 30, 40, 50, 50, 60, 60], "<=30")

5


condstr = "<>40"
int("".join([c for c in condstr if c.isdigit()]))

40


"".join([c for c in condstr if not c.isdigit()])

'<>'


op.ge

<function _operator.ge(a, b, /)>


def greaterthan(x, y):
    return x > y


conds = [">", ">=", "<", "<=", "", "<>"]
funcs = [op.gt, op.ge, op.lt, op.le, op.eq, op.ne]
funcmap = dict(zip(conds, funcs))


funcmap

{'>': <function _operator.gt(a, b, /)>,
 '>=': <function _operator.ge(a, b, /)>,
 '<': <function _operator.lt(a, b, /)>,
 '<=': <function _operator.le(a, b, /)>,
 '': <function _operator.eq(a, b, /)>,
 '<>': <function _operator.ne(a, b, /)>}


funcmap[">"]

<function _operator.gt(a, b, /)>


funcmap[">"](2, 1)

True


funcmap[">"](2, 5)

False

Python Virtual Training For Arcesium - Module II - Day 4¶

Problem Solving¶

Working with dictionaries¶

string formating¶

Finding common keys from two dictionaries¶

COUNTIFS¶