Basic Python Training at Grofers - Day 2¶

Dec 19-21, 2019 Vikrant Patil, Anand Chitipothu

These notes are available online at http://notes.pipal.in/2019/grofers_basic_dec/day2.html

We will be using python 3 (>= 3.0) from anaconda for this training. You can download it from

https://www.anaconda.com/download/

Some Examples on lists¶

def is_palindrome(text):
    return text == text[::-1]

is_palindrome("madam")

True

import os

os.getcwd()

'/home/vikrant/trainings/2019/grofers_basic_dec'

os.listdir("/home/vikrant/trainings/2019/grofers_basic_dec/")

['day2.html',
 'date1.py',
 'push',
 'day2.ipynb',
 'square.py',
 'day1.html',
 '__pycache__',
 'Untitled.html',
 'day3.html',
 'mysum.py',
 'Untitled.ipynb',
 'day1.ipynb',
 '.pytest_cache',
 'magic.py',
 'minimum.py',
 'square1.py',
 'date.py',
 'today.py',
 'Makefile',
 '.ipynb_checkpoints',
 'test_min.py',
 'day3.ipynb']

os.listdir()

['day2.html',
 'date1.py',
 'push',
 'day2.ipynb',
 'square.py',
 'day1.html',
 '__pycache__',
 'Untitled.html',
 'day3.html',
 'mysum.py',
 'Untitled.ipynb',
 'day1.ipynb',
 '.pytest_cache',
 'magic.py',
 'minimum.py',
 'square1.py',
 'date.py',
 'today.py',
 'Makefile',
 '.ipynb_checkpoints',
 'test_min.py',
 'day3.ipynb']

problems

Write a python script ls.py which lists files in given directory

python ls.py /home/vikrant/trainings/2019/grofers_basic_dec/
day1.html
day1.ipynb
.
.

Write a function squares to find squares of given list
```
>>> squares([1,2,3])
[1,4,9]
```
Write a function evens to find even numbers from given list

def mysum(nums):
    s = 0
    for n in nums:
        s += n
    return s

%%file ls.py
import os, sys

def ls(path):
    files = os.listdir(path)
    for file in files:
        print(file)

if __name__ == "__main__":
    ls(sys.argv[1])

Overwriting ls.py

!python ls.py .

day2.html
ls.py
date1.py
push
day2.ipynb
square.py
day1.html
__pycache__
Untitled.html
day3.html
mysum.py
Untitled.ipynb
day1.ipynb
.pytest_cache
magic.py
minimum.py
square1.py
date.py
today.py
Makefile
.ipynb_checkpoints
test_min.py
day3.ipynb

%%file ls1.py
import os, sys

def ls(path):
    files = os.listdir(path)
    for file in files:
        print(file)

if __name__ == "__main__":
    if len(sys.argv)==1:
        path = os.getcwd()
    else:
        path = sys.argv[1]
    ls(path)

Writing ls1.py

!python ls1.py

ls1.py
day2.html
ls.py
date1.py
push
day2.ipynb
square.py
day1.html
__pycache__
Untitled.html
day3.html
mysum.py
Untitled.ipynb
day1.ipynb
.pytest_cache
magic.py
minimum.py
square1.py
date.py
today.py
Makefile
.ipynb_checkpoints
test_min.py
day3.ipynb

!python ls1.py "/tmp"

ssh-2y5f6AC2S1oM
.X11-unix
systemd-private-602342188a20475190f6f5bc2229a0f4-systemd-timesyncd.service-eBza1c
Temp-19e8c0b6-caf8-49eb-ba05-b23f10768176
Temp-4ed6d016-d3f1-4fdc-811a-ac3f96fba48e
systemd-private-602342188a20475190f6f5bc2229a0f4-ModemManager.service-zTq6Go
.XIM-unix
.Test-unix
.font-unix
systemd-private-602342188a20475190f6f5bc2229a0f4-rtkit-daemon.service-1ziqOs
systemd-private-602342188a20475190f6f5bc2229a0f4-colord.service-0nwbhC
.X0-lock
config-err-3zP8Kc
systemd-private-602342188a20475190f6f5bc2229a0f4-bolt.service-MN7iXA
.ICE-unix
systemd-private-602342188a20475190f6f5bc2229a0f4-systemd-resolved.service-TkFoT1
mintUpdate

def squares(nums):
    sqrs = []
    for n in nums:
        sqrs.append(n*n)
    return sqrs

def print_squares(nums):
    sqrs = squares(nums)
    print("Original list:", end=" ")
    for n in nums:
        print(n, end=" ")
        
    print("Squared list:")
    
    for s in sqrs:
        print(s, end=" ")

print_squares([2,3,4,5])

Original list: 2 3 4 5 Squared list:
4 9 16 25

%%file listoperations.py

def squares(nums):
    """
    >>> squares([])
    []
    >>> squares([-1, 1, 2])
    [1, 1, 4]
    """
    sqrs = []
    for n in nums:
        sqrs.append(n*n)
    return sqrs

def evens(nums):
    def even(x):
        return x%2==0
    
    evens_ = []
    for n in nums:
        if even(n):
            evens_.append(n)
    return evens_

Overwriting listoperations.py

!python -m doctest -v listoperations.py

Trying:
    squares([])
Expecting:
    []
ok
Trying:
    squares([-1, 1, 2])
Expecting:
    [1, 1, 4]
ok
2 items had no tests:
    listoperations
    listoperations.evens
1 items passed all tests:
   2 tests in listoperations.squares
2 tests in 3 items.
2 passed and 0 failed.
Test passed.

problem

Write another module test_lists.py which writes assert tests for list_operations module and test it using pytest

List comprehensions¶

nums = [1,2,3,5,8,13]
[n*n for n in nums]

[1, 4, 9, 25, 64, 169]

[n**3 for n in nums]

[1, 8, 27, 125, 512, 2197]

def even(x):
    return x%2==0
[n for n in nums if even(n)]

[2, 8]

[n*n for n in nums if even(n)]

[4, 64]

Example¶

problem decomposition and lists comprehensions*

def factors(n):
    return [i for i in range(1,n+1) if n%i==0]

factors(12)

[1, 2, 3, 4, 6, 12]

factors(5)

[1, 5]

factors(7)

[1, 7]

def is_prime(p):
    return len(factors(p))==2

def primes(n):
    """
    generate prime numbers less than
    """
    return [p for p in range(1, n) if is_prime(p)]

primes(50)

[2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47]

problems

Find sum of all multiples of 7 or 11 less than 1000
Write a function filterpy to filter out only py files from given directory
```
>>> filterpy(path)
a.py
b.py
```
Write a file based on extension from given directory filter_by_ext
```
filter_by_ext(path, ext)
```

4==3 or 4%2==0 and 1==1

True

sum([1,2,3,4])

10

e = []
for i in range(n):
    e.append(do(i))

Above loop translates to list comprehesnion as below

[do(i) for i in range(n)]

=======================================================

e = []
for i in range(n):
    if cond(i):
        e.append(do(i))

Above loop translates to list comprehesnion as below

[do(i) for i in range(n) if cond(i)]

sum([i for i in range(1,1000) if i%7==0 or i%11==0])

110110

def filter_py(path):
    files = os.listdir(path)
    return [file for file in files if file.endswith(".py")]

def filter_by_ext(path, ext):
    files = os.listdir(path)
    return [file for file in files if file.endswith(ext)]

def filter_py1(path):
    return filter_by_ext(path, ".py")

tables = [[i*j for i in range(1,6)] for j in range(1,11)]

tables

[[1, 2, 3, 4, 5],
 [2, 4, 6, 8, 10],
 [3, 6, 9, 12, 15],
 [4, 8, 12, 16, 20],
 [5, 10, 15, 20, 25],
 [6, 12, 18, 24, 30],
 [7, 14, 21, 28, 35],
 [8, 16, 24, 32, 40],
 [9, 18, 27, 36, 45],
 [10, 20, 30, 40, 50]]

tables[0]

[1, 2, 3, 4, 5]

tables[-1]

[10, 20, 30, 40, 50]

tables[:]

[[1, 2, 3, 4, 5],
 [2, 4, 6, 8, 10],
 [3, 6, 9, 12, 15],
 [4, 8, 12, 16, 20],
 [5, 10, 15, 20, 25],
 [6, 12, 18, 24, 30],
 [7, 14, 21, 28, 35],
 [8, 16, 24, 32, 40],
 [9, 18, 27, 36, 45],
 [10, 20, 30, 40, 50]]

tables[2:]

[[3, 6, 9, 12, 15],
 [4, 8, 12, 16, 20],
 [5, 10, 15, 20, 25],
 [6, 12, 18, 24, 30],
 [7, 14, 21, 28, 35],
 [8, 16, 24, 32, 40],
 [9, 18, 27, 36, 45],
 [10, 20, 30, 40, 50]]

tables[0]

[1, 2, 3, 4, 5]

tables[0][2]

3

tables[1][2]

6

tables[2][2]

9

def column(data, colnum):
    return [data[i][colnum] for i in range(len(data))]

column(tables, 2)

[3, 6, 9, 12, 15, 18, 21, 24, 27, 30]

tables

[[1, 2, 3, 4, 5],
 [2, 4, 6, 8, 10],
 [3, 6, 9, 12, 15],
 [4, 8, 12, 16, 20],
 [5, 10, 15, 20, 25],
 [6, 12, 18, 24, 30],
 [7, 14, 21, 28, 35],
 [8, 16, 24, 32, 40],
 [9, 18, 27, 36, 45],
 [10, 20, 30, 40, 50]]

def transpose(data):
    colcount = len(data[0])
    return [column(data, c) for c in range(colcount)]

transpose(tables)

[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
 [2, 4, 6, 8, 10, 12, 14, 16, 18, 20],
 [3, 6, 9, 12, 15, 18, 21, 24, 27, 30],
 [4, 8, 12, 16, 20, 24, 28, 32, 36, 40],
 [5, 10, 15, 20, 25, 30, 35, 40, 45, 50]]

Iteration Patterns¶

nums = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47]

for n in nums:
    print(n, end=",")

2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,

for i, n in enumerate(nums[:5]):
    print(i, n)

0 2
1 3
2 5
3 7
4 11

lines = """first line
second line
third line
fourth line
""".strip().split("\n")

lines

['first line', 'second line', 'third line', 'fourth line']

for linum, line in enumerate(lines, start=1):
    print(linum, line)

1 first line
2 second line
3 third line
4 fourth line

x, y = 1,2

enumerate(lines)

<enumerate at 0x7f118d3bf120>

list(enumerate(lines))

[(0, 'first line'), (1, 'second line'), (2, 'third line'), (3, 'fourth line')]

list(enumerate(lines, start=1))

[(1, 'first line'), (2, 'second line'), (3, 'third line'), (4, 'fourth line')]

for line in reversed(lines):
    print(line)

fourth line
third line
second line
first line

for i in reversed(range(5)):
    print(i)

4
3
2
1
0

first = ["Elsa", "Alice", "Elisa"]
second = ["Frozen", "Wonder", "Hacker"]
for f, s in zip(first, second):
    print(f, s)

Elsa Frozen
Alice Wonder
Elisa Hacker

a = [1, 2, 3, 4]
b = ['a','b','c']
for i, j in zip(a,b):
    print(i,j)

1 a
2 b
3 c

items = ["Daliya", "Rava", "Rice"]
quantities = [2,1,3]
prices = [32, 40, 50]
for item, qty,price in zip(items, quantities, prices):
    print(item, qty, price)

Daliya 2 32
Rava 1 40
Rice 3 50

items = ["Daliya", "Rava", "Rice"]
quantities = [2,1,3]
prices = [32, 40, 50]
for item, qty,price in zip(items, quantities, prices):
    print(item.rjust(6), qty, price)

Daliya 2 32
  Rava 1 40
  Rice 3 50

r = reversed([1, 2, 3, 4]) # is only for one time use!

for i in r:
    print(i, end=" ")

4 3 2 1

for i in r:# is only for one time use!
    print(i, end=" ")

problems

Write a function to vector add two lists

>>> vector_add([1, 2, 3], [1, 1, 1])
[2, 3, 4]

def vector_add(vector1, vector2):
    return [a+b for a,b in zip(vector1, vector2)]

vector_add([2, 3, 4, 5], [1, 1, 1, 1])

[3, 4, 5, 6]

String formating¶

for i in range(1, 11):
    print(i, i**2, i**3)

1 1 1
2 4 8
3 9 27
4 16 64
5 25 125
6 36 216
7 49 343
8 64 512
9 81 729
10 100 1000

for i in range(1, 11):
    print(str(i).rjust(2), str(i**2).rjust(3), str(i**3).rjust(4))

 1   1    1
 2   4    8
 3   9   27
 4  16   64
 5  25  125
 6  36  216
 7  49  343
 8  64  512
 9  81  729
10 100 1000

"Answer to {} is {}".format("life", 42)

'Answer to life is 42'

"Answer to {1} is {0}".format("life", 42)

'Answer to 42 is life'

"Wizard of {place} is {name}".format(place="oz", name="python")

'Wizard of oz is python'

template = """
<html>
<header>
{HEADER}
</header>
<body>
{BODY}
</body>
</html>
"""

print(template.format(HEADER="Post1", BODY="Contests of Post1"))

<html>
<header>
Post1
</header>
<body>
Contests of Post1
</body>
</html>

for i in range(1, 11):
    print("{num:2} {sqr:3} {cube:4}".format(num=i, sqr=i*i, cube=i**3))

 1   1    1
 2   4    8
 3   9   27
 4  16   64
 5  25  125
 6  36  216
 7  49  343
 8  64  512
 9  81  729
10 100 1000

for i in range(1, 11):
    print("{num:{w}} {sqr:{w}} {cube:{w}}".format(num=i, sqr=i*i, cube=i**3, w=4))

   1    1    1
   2    4    8
   3    9   27
   4   16   64
   5   25  125
   6   36  216
   7   49  343
   8   64  512
   9   81  729
  10  100 1000

def make_triangle(n):
    return ["*"*row for row in range(1, n+1)]

make_triangle(5)

['*', '**', '***', '****', '*****']

for row in make_triangle(5):
    print(row)

*
**
***
****
*****

"helo".center(10)

'   helo   '

def prety_print(triangle):
    width = len(triangle[-1])
    for row in triangle:
        print(row.center(width))

prety_print(make_triangle(10))

    *     
    **    
   ***    
   ****   
  *****   
  ******  
 *******  
 ******** 
********* 
**********

def format_row(row):
    return " ".join(list(row))

def prety_print(triangle):
    base = len(triangle[-1])
    width = base + base -1
    for row in triangle:
        row_ = format_row(row)
        print(row_.center(width))

prety_print(make_triangle(10))

         *         
        * *        
       * * *       
      * * * *      
     * * * * *     
    * * * * * *    
   * * * * * * *   
  * * * * * * * *  
 * * * * * * * * * 
* * * * * * * * * *

def make_triangle(n, char="*"):
    return [char*row for row in range(1, n+1)]

make_triangle(10, "@")

['@',
 '@@',
 '@@@',
 '@@@@',
 '@@@@@',
 '@@@@@@',
 '@@@@@@@',
 '@@@@@@@@',
 '@@@@@@@@@',
 '@@@@@@@@@@']

prety_print(make_triangle(10, "@"))

         @         
        @ @        
       @ @ @       
      @ @ @ @      
     @ @ @ @ @     
    @ @ @ @ @ @    
   @ @ @ @ @ @ @   
  @ @ @ @ @ @ @ @  
 @ @ @ @ @ @ @ @ @ 
@ @ @ @ @ @ @ @ @ @

problem

Write a function to generate pascal triangle of base n
```
>>> pascal(4)
[[1],[1,1],[1,2,1],[1,3,3,1]]
```

11**4

14641

11**5

161051

%%file pascal.py

def get_next_row(row):
    r1 = [0] + row[:]
    r2 = row[:] + [0]
    return [a+b for a,b in zip(r1, r2)]

def test_get_next_row():
    assert get_next_row([1]) == [1,1]
    assert get_next_row([1,1]) == [1,2,1]
    assert get_next_row(get_next_row([1])) == [1,2,1]

Overwriting pascal.py

!pytest pascal.py

============================= test session starts ==============================
platform linux -- Python 3.7.3, pytest-5.0.1, py-1.8.0, pluggy-0.12.0
benchmark: 3.2.2 (defaults: timer=time.perf_counter disable_gc=False min_rounds=5 min_time=0.000005 max_time=1.0 calibration_precision=10 warmup=False warmup_iterations=100000)
rootdir: /home/vikrant/trainings/2019/grofers_basic_dec
plugins: doctestplus-0.3.0, benchmark-3.2.2, cov-2.8.1, arraydiff-0.3, remotedata-0.3.1, openfiles-0.3.2
collected 1 item                                                               

pascal.py .                                                              [100%]

=========================== 1 passed in 0.03 seconds ===========================

%%file pascal1.py

def pascal(n):
    tr = [[1]]
    for i in range(n-1):
        last = tr[-1]
        tr.append(get_next_row(last))
    return tr

def test_pascal():
    assert pascal(1) == [[1]]
    assert pascal(2) == [[1],[1,1]]
    assert pascal(3) == [[1],[1,1],[1,2,1]]
    
def get_next_row(row):
    r1 = [0] + row[:]
    r2 = row[:] + [0]
    return [a+b for a,b in zip(r1, r2)]

def test_get_next_row():
    assert get_next_row([1]) == [1,1]
    assert get_next_row([1,1]) == [1,2,1]
    assert get_next_row(get_next_row([1])) == [1,2,1]

Writing pascal1.py

!pytest pascal1.py

============================= test session starts ==============================
platform linux -- Python 3.7.3, pytest-5.0.1, py-1.8.0, pluggy-0.12.0
benchmark: 3.2.2 (defaults: timer=time.perf_counter disable_gc=False min_rounds=5 min_time=0.000005 max_time=1.0 calibration_precision=10 warmup=False warmup_iterations=100000)
rootdir: /home/vikrant/trainings/2019/grofers_basic_dec
plugins: doctestplus-0.3.0, benchmark-3.2.2, cov-2.8.1, arraydiff-0.3, remotedata-0.3.1, openfiles-0.3.2
collected 2 items                                                              

pascal1.py ..                                                            [100%]

=========================== 2 passed in 0.03 seconds ===========================

from pascal1 import pascal

pascal(5)

[[1], [1, 1], [1, 2, 1], [1, 3, 3, 1], [1, 4, 6, 4, 1]]

def print_pascal(n):
    pascal_t = pascal(n)
    
    def format_row(row, boxwidth):
        return " ".join([f"{r:{boxwidth}}" for r in row])
    
    maxnum = max(pascal_t[-1])
    boxwidth = len(str(maxnum))
    length = len(pascal_t[-1])
    width = boxwidth*length + length -1
    for row in pascal_t:
        row_ = format_row(row, boxwidth)
        print(row_.center(width))

print_pascal(5)

    1    
   1 1   
  1 2 1  
 1 3 3 1 
1 4 6 4 1

print_pascal(12)

                        1                      
                      1   1                    
                    1   2   1                  
                  1   3   3   1                
                1   4   6   4   1              
              1   5  10  10   5   1            
            1   6  15  20  15   6   1          
          1   7  21  35  35  21   7   1        
        1   8  28  56  70  56  28   8   1      
      1   9  36  84 126 126  84  36   9   1    
    1  10  45 120 210 252 210 120  45  10   1  
  1  11  55 165 330 462 462 330 165  55  11   1

Working with files¶

import this

The Zen of Python, by Tim Peters

Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
Special cases aren't special enough to break the rules.
Although practicality beats purity.
Errors should never pass silently.
Unless explicitly silenced.
In the face of ambiguity, refuse the temptation to guess.
There should be one-- and preferably only one --obvious way to do it.
Although that way may not be obvious at first unless you're Dutch.
Now is better than never.
Although never is often better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it may be a good idea.
Namespaces are one honking great idea -- let's do more of those!

%%file poem.txt
The Zen of Python, by Tim Peters

Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
Special cases aren't special enough to break the rules.
Although practicality beats purity.
Errors should never pass silently.
Unless explicitly silenced.
In the face of ambiguity, refuse the temptation to guess.
There should be one-- and preferably only one --obvious way to do it.
Although that way may not be obvious at first unless you're Dutch.
Now is better than never.
Although never is often better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it may be a good idea.
Namespaces are one honking great idea -- let's do more of those!

Writing poem.txt

with open("poem.txt") as file:
    print(file.read())

The Zen of Python, by Tim Peters

Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
Special cases aren't special enough to break the rules.
Although practicality beats purity.
Errors should never pass silently.
Unless explicitly silenced.
In the face of ambiguity, refuse the temptation to guess.
There should be one-- and preferably only one --obvious way to do it.
Although that way may not be obvious at first unless you're Dutch.
Now is better than never.
Although never is often better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it may be a good idea.
Namespaces are one honking great idea -- let's do more of those!

with open("poem.txt") as file:
    for line in file:
        print(line)

The Zen of Python, by Tim Peters


Beautiful is better than ugly.

Explicit is better than implicit.

Simple is better than complex.

Complex is better than complicated.

Flat is better than nested.

Sparse is better than dense.

Readability counts.

Special cases aren't special enough to break the rules.

Although practicality beats purity.

Errors should never pass silently.

Unless explicitly silenced.

In the face of ambiguity, refuse the temptation to guess.

There should be one-- and preferably only one --obvious way to do it.

Although that way may not be obvious at first unless you're Dutch.

Now is better than never.

Although never is often better than *right* now.

If the implementation is hard to explain, it's a bad idea.

If the implementation is easy to explain, it may be a good idea.

Namespaces are one honking great idea -- let's do more of those!

with open("poem.txt") as file: # with statement makes sure that file 
    for line in file:          # is closed after with block
        print(line, end="")

The Zen of Python, by Tim Peters

Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
Special cases aren't special enough to break the rules.
Although practicality beats purity.
Errors should never pass silently.
Unless explicitly silenced.
In the face of ambiguity, refuse the temptation to guess.
There should be one-- and preferably only one --obvious way to do it.
Although that way may not be obvious at first unless you're Dutch.
Now is better than never.
Although never is often better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it may be a good idea.
Namespaces are one honking great idea -- let's do more of those!

file = open("poem.txt")

file.readline()

'The Zen of Python, by Tim Peters\n'

file.readline()

'\n'

file.read()

"Beautiful is better than ugly.\nExplicit is better than implicit.\nSimple is better than complex.\nComplex is better than complicated.\nFlat is better than nested.\nSparse is better than dense.\nReadability counts.\nSpecial cases aren't special enough to break the rules.\nAlthough practicality beats purity.\nErrors should never pass silently.\nUnless explicitly silenced.\nIn the face of ambiguity, refuse the temptation to guess.\nThere should be one-- and preferably only one --obvious way to do it.\nAlthough that way may not be obvious at first unless you're Dutch.\nNow is better than never.\nAlthough never is often better than *right* now.\nIf the implementation is hard to explain, it's a bad idea.\nIf the implementation is easy to explain, it may be a good idea.\nNamespaces are one honking great idea -- let's do more of those!\n"

file.read()

''

file.readline()

''

file.close()

with open("poem.txt") as f:
    line = f.readline()
    while line:
        print(line, end="")
        line = f.readline()

The Zen of Python, by Tim Peters

Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
Special cases aren't special enough to break the rules.
Although practicality beats purity.
Errors should never pass silently.
Unless explicitly silenced.
In the face of ambiguity, refuse the temptation to guess.
There should be one-- and preferably only one --obvious way to do it.
Although that way may not be obvious at first unless you're Dutch.
Now is better than never.
Although never is often better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it may be a good idea.
Namespaces are one honking great idea -- let's do more of those!

if '':
    print("false")
else:
    print("True")

True

%%file small.txt
one
two
three

Writing small.txt

f = open("small.txt")

f.readline()

'one\n'

f.readline()

'two\n'

f.readline()

'three\n'

f.readline()

''

f.close()

with open("small.txt") as f:
    while f.readline():
        pass

with open("small.txt") as f:
    for line in f:
        print(line, end="")

one
two
three

problems

Write a function to print file with line numbers
Write a function revered_lines to print every line in reversed fashion
```
>>> reversed_line("small.txt")
eno
owt
eerht
```
Write python script cat.py which mimics unix command cat
Write a python script head.py which mimics unix command head
Make use of list to write a python script tail.py which prints last five lines of a file.

!head -n 5 poem.txt

The Zen of Python, by Tim Peters

Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.

!tail -n 5 poem.txt

Now is better than never.
Although never is often better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it may be a good idea.
Namespaces are one honking great idea -- let's do more of those!

def lines_nums(filename):
    with open(filename) as f:
        for linum, line in enumerate(f, start=1):
            print(linum, line, end="")

lines_nums("poem.txt")

1 The Zen of Python, by Tim Peters
2 
3 Beautiful is better than ugly.
4 Explicit is better than implicit.
5 Simple is better than complex.
6 Complex is better than complicated.
7 Flat is better than nested.
8 Sparse is better than dense.
9 Readability counts.
10 Special cases aren't special enough to break the rules.
11 Although practicality beats purity.
12 Errors should never pass silently.
13 Unless explicitly silenced.
14 In the face of ambiguity, refuse the temptation to guess.
15 There should be one-- and preferably only one --obvious way to do it.
16 Although that way may not be obvious at first unless you're Dutch.
17 Now is better than never.
18 Although never is often better than *right* now.
19 If the implementation is hard to explain, it's a bad idea.
20 If the implementation is easy to explain, it may be a good idea.
21 Namespaces are one honking great idea -- let's do more of those!

def reversed_lines(filename):
    with open(filename) as f:
        for line in f:
            print(line.strip()[::-1])

reversed_lines("poem.txt")

sreteP miT yb ,nohtyP fo neZ ehT

.ylgu naht retteb si lufituaeB
.ticilpmi naht retteb si ticilpxE
.xelpmoc naht retteb si elpmiS
.detacilpmoc naht retteb si xelpmoC
.detsen naht retteb si talF
.esned naht retteb si esrapS
.stnuoc ytilibadaeR
.selur eht kaerb ot hguone laiceps t'nera sesac laicepS
.ytirup staeb ytilacitcarp hguohtlA
.yltnelis ssap reven dluohs srorrE
.decnelis ylticilpxe sselnU
.sseug ot noitatpmet eht esufer ,ytiugibma fo ecaf eht nI
.ti od ot yaw suoivbo-- eno ylno ylbareferp dna --eno eb dluohs erehT
.hctuD er'uoy sselnu tsrif ta suoivbo eb ton yam yaw taht hguohtlA
.reven naht retteb si woN
.won *thgir* naht retteb netfo si reven hguohtlA
.aedi dab a s'ti ,nialpxe ot drah si noitatnemelpmi eht fI
.aedi doog a eb yam ti ,nialpxe ot ysae si noitatnemelpmi eht fI
!esoht fo erom od s'tel -- aedi taerg gniknoh eno era secapsemaN

with open("poem.txt") as f:
    lines = f.readlines()
    for line in lines:
        print(line.strip()[::-1])

sreteP miT yb ,nohtyP fo neZ ehT

.ylgu naht retteb si lufituaeB
.ticilpmi naht retteb si ticilpxE
.xelpmoc naht retteb si elpmiS
.detacilpmoc naht retteb si xelpmoC
.detsen naht retteb si talF
.esned naht retteb si esrapS
.stnuoc ytilibadaeR
.selur eht kaerb ot hguone laiceps t'nera sesac laicepS
.ytirup staeb ytilacitcarp hguohtlA
.yltnelis ssap reven dluohs srorrE
.decnelis ylticilpxe sselnU
.sseug ot noitatpmet eht esufer ,ytiugibma fo ecaf eht nI
.ti od ot yaw suoivbo-- eno ylno ylbareferp dna --eno eb dluohs erehT
.hctuD er'uoy sselnu tsrif ta suoivbo eb ton yam yaw taht hguohtlA
.reven naht retteb si woN
.won *thgir* naht retteb netfo si reven hguohtlA
.aedi dab a s'ti ,nialpxe ot drah si noitatnemelpmi eht fI
.aedi doog a eb yam ti ,nialpxe ot ysae si noitatnemelpmi eht fI
!esoht fo erom od s'tel -- aedi taerg gniknoh eno era secapsemaN

%%file cat.py
import sys

def cat(filename):
    with open(filename) as f:
        print(f.read())
        
if __name__ == "__main__":
    cat(sys.argv[1])

Writing cat.py

!python cat.py poem.txt

The Zen of Python, by Tim Peters

Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
Special cases aren't special enough to break the rules.
Although practicality beats purity.
Errors should never pass silently.
Unless explicitly silenced.
In the face of ambiguity, refuse the temptation to guess.
There should be one-- and preferably only one --obvious way to do it.
Although that way may not be obvious at first unless you're Dutch.
Now is better than never.
Although never is often better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it may be a good idea.
Namespaces are one honking great idea -- let's do more of those!

!cat small.txt poem.txt

one
two
three
The Zen of Python, by Tim Peters

Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
Special cases aren't special enough to break the rules.
Although practicality beats purity.
Errors should never pass silently.
Unless explicitly silenced.
In the face of ambiguity, refuse the temptation to guess.
There should be one-- and preferably only one --obvious way to do it.
Although that way may not be obvious at first unless you're Dutch.
Now is better than never.
Although never is often better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it may be a good idea.
Namespaces are one honking great idea -- let's do more of those!

%%file cat.py
import sys

def cat_(files):
    for file in files:
        cat(file)

def cat(filename):
    with open(filename) as f:
        print(f.read())
        
if __name__ == "__main__":
    cat_(sys.argv[1:])

Overwriting cat.py

!python cat.py small.txt poem.txt

one
two
three

The Zen of Python, by Tim Peters

Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
Special cases aren't special enough to break the rules.
Although practicality beats purity.
Errors should never pass silently.
Unless explicitly silenced.
In the face of ambiguity, refuse the temptation to guess.
There should be one-- and preferably only one --obvious way to do it.
Although that way may not be obvious at first unless you're Dutch.
Now is better than never.
Although never is often better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it may be a good idea.
Namespaces are one honking great idea -- let's do more of those!

!python cat.py small.txt

one
two
three

%%file head.py

import sys

def head(n, filename):
    with open(filename) as f:
        for i in range(n):
            print(f.readline(), end="")
            
if __name__ == "__main__":
    head(int(sys.argv[1]), sys.argv[2])

Overwriting head.py

!python head.py 3 poem.txt

The Zen of Python, by Tim Peters

Beautiful is better than ugly.

nums[-5:]

[31, 37, 41, 43, 47]

nums

[2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47]

def tail(filename, n):
    window = []
    with open(filename) as f:
        for i in range(n):
            window.append(f.readline())
        line = f.readline()
        while line:
            window.pop(0)
            window.append(line)
            line = f.readline()
            
    for l in window:
        print(l, end="")

tail("poem.txt", 5)

Now is better than never.
Although never is often better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it may be a good idea.
Namespaces are one honking great idea -- let's do more of those!

more about testing¶

!pytest pascal1.py

============================= test session starts ==============================
platform linux -- Python 3.7.3, pytest-5.0.1, py-1.8.0, pluggy-0.12.0
benchmark: 3.2.2 (defaults: timer=time.perf_counter disable_gc=False min_rounds=5 min_time=0.000005 max_time=1.0 calibration_precision=10 warmup=False warmup_iterations=100000)
rootdir: /home/vikrant/trainings/2019/grofers_basic_dec
plugins: doctestplus-0.3.0, benchmark-3.2.2, cov-2.8.1, arraydiff-0.3, remotedata-0.3.1, openfiles-0.3.2
collected 2 items                                                              

pascal1.py ..                                                            [100%]

=========================== 2 passed in 0.01 seconds ===========================

!pip install pytest-cov pytest-benchmark

Usage:   
  pip install [options] <requirement specifier> [package-index-options] ...
  pip install [options] -r <requirements file> [package-index-options] ...
  pip install [options] [-e] <vcs project url> ...
  pip install [options] [-e] <local project path> ...
  pip install [options] <archive url/path> ...

no such option: --capture

!pytest --capture=no --cov=.

============================= test session starts ==============================
platform linux -- Python 3.7.3, pytest-5.0.1, py-1.8.0, pluggy-0.12.0
benchmark: 3.2.2 (defaults: timer=time.perf_counter disable_gc=False min_rounds=5 min_time=0.000005 max_time=1.0 calibration_precision=10 warmup=False warmup_iterations=100000)
rootdir: /home/vikrant/trainings/2019/grofers_basic_dec
plugins: doctestplus-0.3.0, benchmark-3.2.2, cov-2.8.1, arraydiff-0.3, remotedata-0.3.1, openfiles-0.3.2
collected 2 items                                                              

test_min.py .F

=================================== FAILURES ===================================
__________________________________ test_min3 ___________________________________

    def test_min3():
        assert minimum.min3(1, 2, 3)==1
        assert minimum.min3(1,1, 1)==1
>       assert minimum.min3(2,1, -1)==1
E       assert -1 == 1
E        +  where -1 = <function min3 at 0x7f7523ba0a60>(2, 1, -1)
E        +    where <function min3 at 0x7f7523ba0a60> = minimum.min3

test_min.py:11: AssertionError

----------- coverage: platform linux, python 3.7.3-final-0 -----------
Name                Stmts   Miss  Cover
---------------------------------------
cat.py                  9      9     0%
date.py                 3      3     0%
date1.py                4      4     0%
head.py                 7      7     0%
listoperations.py      13     13     0%
ls.py                   7      7     0%
ls1.py                 10     10     0%
magic.py                1      1     0%
minimum.py              8      1    88%
mysum.py                8      8     0%
pascal.py               8      8     0%
pascal1.py             18     18     0%
square.py               7      7     0%
square1.py              6      6     0%
test_min.py             9      0   100%
today.py                5      5     0%
---------------------------------------
TOTAL                 123    107    13%

====================== 1 failed, 1 passed in 0.09 seconds ======================

============================= test session starts ==============================
platform linux -- Python 3.7.3, pytest-5.0.1, py-1.8.0, pluggy-0.12.0
benchmark: 3.2.2 (defaults: timer=time.perf_counter disable_gc=False min_rounds=5 min_time=0.000005 max_time=1.0 calibration_precision=10 warmup=False warmup_iterations=100000)
rootdir: /home/vikrant/trainings/2019/grofers_basic_dec
plugins: doctestplus-0.3.0, benchmark-3.2.2, cov-2.8.1, arraydiff-0.3, remotedata-0.3.1, openfiles-0.3.2
collected 2 items                                                              

test_min.py .FCoverage.py warning: Module pascal1.py was never imported. (module-not-imported)
Coverage.py warning: No data was collected. (no-data-collected)
WARNING: Failed to generate report: No data to report.

/home/vikrant/anaconda3/lib/python3.7/site-packages/pytest_cov/plugin.py:254: PytestWarning: Failed to generate report: No data to report.

  self.cov_controller.finish()


=================================== FAILURES ===================================
__________________________________ test_min3 ___________________________________

    def test_min3():
        assert minimum.min3(1, 2, 3)==1
        assert minimum.min3(1,1, 1)==1
>       assert minimum.min3(2,1, -1)==1
E       assert -1 == 1
E        +  where -1 = <function min3 at 0x7f9066dbea60>(2, 1, -1)
E        +    where <function min3 at 0x7f9066dbea60> = minimum.min3

test_min.py:11: AssertionError

----------- coverage: platform linux, python 3.7.3-final-0 -----------

====================== 1 failed, 1 passed in 0.05 seconds ======================

benchmarking¶

Example¶

Write a function to find unique items from a list, while keeping the order

%%file unique.py

def unique(seq):
    seen = []
    u  = []
    for item in seq:
        if item not in seen:
            u.append(item)
            seen.append(item)
    return u

def unique1(seq):
    seen = set()
    u = []
    for item in seq:
        if item not in seen:
            u.append(item)
            seen.add(item)
    return u

def test_bechmark_unique(benchmark):
    benchmark(unique, range(10000))

def test_bechmark_unique1(benchmark):
    benchmark(unique1, range(10000))

Overwriting unique.py

!pytest -vv unique.py

============================= test session starts ==============================
platform linux -- Python 3.7.3, pytest-5.0.1, py-1.8.0, pluggy-0.12.0 -- /home/vikrant/anaconda3/bin/python
cachedir: .pytest_cache
benchmark: 3.2.2 (defaults: timer=time.perf_counter disable_gc=False min_rounds=5 min_time=0.000005 max_time=1.0 calibration_precision=10 warmup=False warmup_iterations=100000)
rootdir: /home/vikrant/trainings/2019/grofers_basic_dec
plugins: doctestplus-0.3.0, benchmark-3.2.2, cov-2.8.1, arraydiff-0.3, remotedata-0.3.1, openfiles-0.3.2
collected 2 items                                                              

unique.py::test_bechmark_unique PASSED                                   [ 50%]
unique.py::test_bechmark_unique1 PASSED                                  [100%]


-------------------------------------------------------------------------------------- benchmark: 2 tests --------------------------------------------------------------------------------------
Name (time in ms)              Min                 Max                Mean             StdDev              Median                IQR            Outliers       OPS            Rounds  Iterations
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
test_bechmark_unique1       1.2249 (1.0)        3.5156 (1.0)        1.2437 (1.0)       0.1008 (1.0)        1.2356 (1.0)       0.0055 (1.0)          7;39  804.0710 (1.0)         715           1
test_bechmark_unique      523.0453 (427.02)   570.8395 (162.37)   533.3227 (428.83)   20.9999 (208.33)   523.5422 (423.71)   13.7183 (>1000.0)       1;1    1.8750 (0.00)          5           1
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

Legend:
  Outliers: 1 Standard Deviation from Mean; 1.5 IQR (InterQuartile Range) from 1st Quartile and 3rd Quartile.
  OPS: Operations Per Second, computed as 1 / Mean
=========================== 2 passed in 5.67 seconds ===========================

test fixtures¶

%%file words.py
def get_words(file):
    with open(file) as f:
        return f.read().strip().split()

Writing words.py

%%file test_words.py
import pytest
from words import get_words
import os

@pytest.fixture
def wordfile():
    path = "/tmp/wordcounttest.txt"
    with open(path, "w") as f:
        f.write("one two three")
    print("Setup.....")

    yield path
    
    print("Teardown...")
    os.remove(path)
    
def test_words(wordfile):
    print("[Test]:", wordfile)
    assert get_words(wordfile)==["one","two","three"]

Overwriting test_words.py

!pytest -vv --capture=no test_words.py

============================= test session starts ==============================
platform linux -- Python 3.7.3, pytest-5.0.1, py-1.8.0, pluggy-0.12.0 -- /home/vikrant/anaconda3/bin/python
cachedir: .pytest_cache
benchmark: 3.2.2 (defaults: timer=time.perf_counter disable_gc=False min_rounds=5 min_time=0.000005 max_time=1.0 calibration_precision=10 warmup=False warmup_iterations=100000)
rootdir: /home/vikrant/trainings/2019/grofers_basic_dec
plugins: doctestplus-0.3.0, benchmark-3.2.2, cov-2.8.1, arraydiff-0.3, remotedata-0.3.1, openfiles-0.3.2
collected 1 item                                                               

test_words.py::test_words Setup.....
[Test]: /tmp/wordcounttest.txt
PASSEDTeardown...


=========================== 1 passed in 0.02 seconds ===========================

Working with dictinaries¶

person = {"name":"Vikrant",
          "email":"vikrant@pipalacademy", 
          "address":"Pune"}

person['name']

'Vikrant'

person['company']

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-196-a2b0e6ec2841> in <module>
----> 1 person['company']

KeyError: 'company'

person.get("company", "Pipal Academy")

'Pipal Academy'

person

{'name': 'Vikrant', 'email': 'vikrant@pipalacademy', 'address': 'Pune'}

person.get("company")

print(person.get("company"))

None

for key in person:
    print(key)

name
email
address

for key in person:
    print(key, person[key])

name Vikrant
email vikrant@pipalacademy
address Pune

for value in person.values():
    print(value)

Vikrant
vikrant@pipalacademy
Pune

for key, value in person.items():
    print(key, value)

name Vikrant
email vikrant@pipalacademy
address Pune

%%file words.txt
one
one two 
one two three
one two three four
one two three four five
one two three four six
one two three six seven
one two six seven eight
one six seven eight nine
six seven eight nine ten
seven eight nine ten
eight nine ten
nine ten
ten

Writing words.txt

def get_words(filename):
    with open(filename) as f:
        return f.read().strip().split()

def wordfreq(words):
    freq = {}
    for w in words:
        if w not in freq:
            freq[w] = 1
        else:
            freq[w] +=1
    return freq

def wordfreq1(words):
    freq = {}
    for w in words:
        freq[w] = freq.get(w, 0) + 1
    return freq

def wordfreq2(words):
    uniq = set(words)
    for w in uniq:
        freq[w] = words.count(w)
    return freq

words = get_words("words.txt")

freq = wordfreq1(words)

freq

{'one': 9,
 'two': 7,
 'three': 5,
 'four': 3,
 'five': 1,
 'six': 5,
 'seven': 5,
 'eight': 5,
 'nine': 5,
 'ten': 5}

for word in sorted(freq,key=lambda w: freq[w]):
    print(word, freq[word])

five 1
four 3
three 5
six 5
seven 5
eight 5
nine 5
ten 5
two 7
one 9

def get_freq(w):
    return freq[w]

for word in sorted(freq,key=get_freq, reverse=True):
    print(word, freq[word])

one 9
two 7
three 5
six 5
seven 5
eight 5
nine 5
ten 5
four 3
five 1

for word, f in sorted(freq.items(),key=lambda r: r[1], reverse=True):
    print(word, freq[word])

one 9
two 7
three 5
six 5
seven 5
eight 5
nine 5
ten 5
four 3
five 1

for word, f in sorted(freq.items(),key=lambda r: r[1], reverse=True):
    print(word.rjust(5), f, "*"*f)

  one 9 *********
  two 7 *******
three 5 *****
  six 5 *****
seven 5 *****
eight 5 *****
 nine 5 *****
  ten 5 *****
 four 3 ***
 five 1 *

testing untestable!¶

%%file weekday.py

import datetime

def now():
    return datetime.datetime.now()

def weekday():
    t = now()
    return t.strftime("%A")

if __name__ == "__main__":
    print(weekday())

Overwriting weekday.py

!python weekday.py

Friday

%%file test_weekday.py
import weekday
import datetime

def test_weekday(monkeypatch):
    faketime = 2010, 1, 1
    def fakenow():
        return datetime.datetime(*faketime)
    
    monkeypatch.setattr(weekday, "now", fakenow)
    
    faketime = 2010, 1, 1
    assert weekday.weekday() == "Friday"
    
    faketime = 2010, 1, 2
    assert weekday.weekday() == "Saturday"

Overwriting test_weekday.py

!pytest test_weekday.py

============================= test session starts ==============================
platform linux -- Python 3.7.3, pytest-5.0.1, py-1.8.0, pluggy-0.12.0
benchmark: 3.2.2 (defaults: timer=time.perf_counter disable_gc=False min_rounds=5 min_time=0.000005 max_time=1.0 calibration_precision=10 warmup=False warmup_iterations=100000)
rootdir: /home/vikrant/trainings/2019/grofers_basic_dec
plugins: doctestplus-0.3.0, benchmark-3.2.2, cov-2.8.1, arraydiff-0.3, remotedata-0.3.1, openfiles-0.3.2
collected 1 item                                                               

test_weekday.py .                                                        [100%]

=========================== 1 passed in 0.03 seconds ===========================