Python Foundation Training Day 2¶

by Vikrant Patil

at MIT Pune, IT Dept. Mar 12-16, 2018

Notes are available online at

https://notes.pipal.in/2018/mit-pune-march/

def pascal(n):
    t = [[1]]
    for i in range(n-1):
        lastrow = t[-1]
        pre = [0] + lastrow #lastrow.insert(0,0)
        post = lastrow + [0] #lastrow.append(0)
        row = [x+y for x,y in zip(pre,post)]
        t.append(row)
    return t

pascal(5)

def pascal(n):
    t = [[1]]
    for i in range(n-1):
        lastrow = t[-1]
        l = len(lastrow)
        row = [1] + [lastrow[i]+lastrow[i+1] for i in range(l-1)] + [1]
        t.append(row)
    return t

pascal(4)

def print_pascal(t):
    base = len(t[-1])
    block_size = len(str(max(t[-1]))) #number of digits in largest number
    width = block_size*base + base -1 

    def format_row(row):
        s = ""
        for e in row:
            s += "{value:{digits}} ".format(value=e, digits=block_size)
        return s
    
    for row in t:
        r = format_row(row)
        print(r.center(width))

print_pascal(pascal(5))

Working with files¶

import this

%%file data.txt
The Zen of Python, by Tim Peters

Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
Special cases aren't special enough to break the rules.
Although practicality beats purity.
Errors should never pass silently.
Unless explicitly silenced.
In the face of ambiguity, refuse the temptation to guess.
There should be one-- and preferably only one --obvious way to do it.
Although that way may not be obvious at first unless you're Dutch.
Now is better than never.
Although never is often better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it may be a good idea.
Namespaces are one honking great idea -- let's do more of those!

f = open("data.txt")

f.read()

f.read()

f.close()

f = open("data.txt")

f.readline()

f.readline()

f.readline()

line = f.readline()
while line:
    print(line, end="")
    line = f.readline()

f = open("data.txt")

lines = f.readlines()

for linenumber, line in enumerate(lines):
    print(linenumber, line, end="")

for line in open("data.txt"):
    print(line, end="")

problem

Write a python script cat.py which implements unix command cat

python cat.py x.txt
one
two
three

Write a python script head.py which implements unix command head approximately, take number of lines as first argument.

python head.py 5 data.txt
The Zen of Python, by Tim Peters

Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.

Write a python script wc.py which implements unix command wc

python wc.py data.txt
20 144 856 data.txt

%%file cat.py
import sys

def cat(f):
    print(open(f).read())
    
if __name__ == "__main__":
    for file in sys.argv[1:]:
        cat(file)

!python cat.py hello.py x.txt

%%file head.py
import sys

def head(filename, n):
    f = open(filename)
    for i in range(n):
        print(f.readline(), end="")
    
if __name__ == "__main__":
    head(sys.argv[2],int(sys.argv[1]))

!python head.py 5 data.txt

%%file wc.py

import sys

def line_count(f):
    return len(open(f).readlines())

def word_count(f):
    return len(open(f).read().split())

def char_count(f):
    return len(open(f).read())

if __name__ == "__main__":
    f = sys.argv[1]
    print(line_count(f), word_count(f), char_count(f), f)

!python wc.py data.txt

import os

files = [f for f in os.listdir(os.getcwd()) if os.path.isfile(f)]

import wc
max(files, key=wc.line_count) #file with max lines

Writing files¶

f = open("numbers.txt", "w")

f.write("one\n")
f.write("two\n")
f.write("three\n")
f.write("four\n")

f.close()

!python cat.py numbers.txt

f = open("numbers.txt", "a")
f.write("five\n")
f.write("six\n")

f.close()

!python cat.py numbers.txt

modes in which you can operate while working with files

wb   - write binary
ab   - append binary
w    - write in text mode
a    - append in text mode
rb   - read binary

f = open("regional.txt", "w", encoding="utf-8")

f.write("आथतवबआ")
f.close()

f = open("regional.txt", encoding="utf-8")
print(f.read())

with open("numbers.txt", "a") as f:
    f.write("seven\n")

!python cat.py numbers.txt

t = [[i*j for i in range(1,6)] for j in range(1,11)]

t

def writecsv(data, filename):
    with open(filename, "w") as f:
        for row in data:
            srow = [str(e) for e in row]
            f.write(",".join(srow))
            f.write("\n")

writecsv(t, "tables.csv")

!python cat.py tables.csv

problem

Write a function csvparser which parses csv file and loads data from it as 2D list of integers.

def csvparser(filename):
    def convertints(row):
        return [int(i) for i in row]
    
    return [convertints(line.strip().split(",")) for line in open(filename)]

csvparser("tables.csv")

[[int(i) for i in line.strip().split(",")] for line in open("tables.csv")]

Dictionaries¶

person = {"name":"Lewis carrol",
         "books":["Alice in wonderland", "Looking through glass"],
         "language":"english"}

person['name']

person["country"]

person['country']="UK"

person

del person['country']

person

person['country']

person.get("country","UK")

person

def parsegrubconf(file):
    conf  = {}
    with open(file) as f:
        for line in f:
            if line.strip()=="" or line.strip().startswith("#"):
                continue
            tokens = line.strip().split("=", maxsplit=1)
            var, value = tokens[0], tokens[1]
            conf[var] = value
    return conf

parsegrubconf("/etc/default/grub")

Iterating over dictinaries¶

items = ["Pen", "Pencil", "Eraser", "Pouch"]
prices = [10,12,5,25]

cart = dict(zip(items, prices))

cart

for item in cart:
    print(item, cart[item])

for value in cart.values():
    print(value)

for key, value in cart.items():
    print(key, value)

for key in cart.keys():
    print(key)

for item, value in cart.items():
    print(item.ljust(6), str(value).rjust(2))
print("="*12)
print("Total".ljust(6), str(sum(cart.values())).rjust(2))

Example Word count¶

%%file words.txt
one
one two
one two three
one two three four
one two three four five
one two three four six
one two three four
one two three
one two
one

def getwords(file):
    return open(file).read().split()

def wordfreq(words):
    freq = {}
    for w in words:
        if w in freq:
            freq[w] += 1
        else:
            freq[w] = 1
    return freq

words = getwords("words.txt")

wordfreq(words)

def wordfreq1(words):
    freq = {}
    for w in words:
        freq[w] = freq.get(w, 0) + 1 
    return freq

wordfreq1(words)

def wordfreq2(words):
    freq = {}
    uniq = set(words)
    for w in uniq:
        freq[w] = words.count(w)
    return freq

freq = wordfreq2(words)

for w, f in freq.items():
    print(w, f)

for w, f in sorted(freq.items(), key=lambda x:x[1]):
    print(w, f)

for w, f in sorted(freq.items(), key=lambda x:x[1], reverse=True):
    print(w, f)

for w, f in sorted(freq.items(), key=lambda x:x[1], reverse=True):
    print(w.ljust(5), str(f).rjust(2))

for w, f in sorted(freq.items(), key=lambda x:x[1], reverse=True):
    print(w.ljust(5), str(f).rjust(2), "*"*f)

pitfalls¶

x = [1, 2, 3, 4]
y = x
y.append(5)

x

x = [1,1,1,1]
y = x
y = [2,3,4]

x

people = {"Anand":"India", "Noufal":"India", 
          "David":"USA", "Ken":"UK",
         "Harry":"USA"}

people

cart

[ (item,value) for item,value in cart.items()]

[ value for value in cart.values()]

[ item for item in cart]

[k for k,v in people.items() if v=="India"]

[k for k,v in people.items() if v=="USA"]

Why classes¶

import math

math.pi

math.sin(3.14)

%%file bank0.py

balance = 0

def getbalance():
    return balance

def withdraw(amount):
    global balance
    balance -= amount
    
def deposit(amount):
    global balance
    balance += amount

import bank0

bank0.getbalance()

bank0.deposit(50)

bank0.getbalance()

bank0.withdraw(10)

bank0.getbalance()

%%file bank1.py

def make_account():
    return {"balance":0}

def getbalance(account):
    return account['balance']

def deposit(account, amount):
    account['balance'] += amount
    
def withdraw(account, amount):
    account['balance'] -= amount

import bank1

a1 = bank1.make_account()

a2 = bank1.make_account()

bank1.getbalance(a1)

bank1.deposit(a1,1000)
bank1.getbalance(a1)

class BankAccount:
    
    def __init__(self, balance=0):
        self.balance = balance
        
    def getbalance(self):
        return self.balance
    
    def deposit(self, amount):
        self.balance += amount
        
    def withdraw(self, amount):
        self.balance -= amount

a3 = BankAccount()

a3

type(a3)

def f():
    pass

f

type(f)

BankAccount

type(a3)

type(BankAccount)

class Point:
    
    def __init__(self, x, y):
        self._x = x
        self._y = y
        
    def getx(self):
        return self._x
    
    def gety(self):
        return self._y

p = Point(0,0)

p.getx()

p.gety()

p._x

class ColoredPoint(Point):
    color = (0,0,256)

cp = ColoredPoint(2,3)

cp.getx

cp.getx()

ColoredPoint.color

cp.color

p

p._x

p._x = 2

ColoredPoint.color = (0,0,0)

cp.color

p._x

cp2 = ColoredPoint(4,5)

cp2.color

cp.color

ColoredPoint.color = (256,256,256)

cp.color

cp2.color

class Complex:
    
    def __init__(self, real, img):
        self.real = real
        self.imag = img
        
    def __str__(self):
        return "{r} + {i}j".format(r=self.real, i=self.imag)
    
    def __repr__(self):
        return "Complex({r},{i})".format(r=self.real, i=self.imag)

c1 = Complex(2,3)

print(c1)

print(p)

c1

l = []

problem

Implement a class Timer which times given snippet of code. hint: use time.time()

t = Timer()
t.start()
do something
t.stop()
print(t.get_time_taken())

import time

time.time()

1521019945.8914545

class Timer:
    def __init__(self):
        self._start = 0
        self._end = 0
        
    def start(self):
        self._start = time.time()
        
    def stop(self):
        self._stop = time.time()
        
    def get_time_taken(self):
        return self._stop - self._start

Exceptions¶

[[int(i) for i in line.strip().split(",")] for line in open("tables.csv")]

[[1, 2, 3, 4, 5],
 [2, 4, 6, 8, 10],
 [3, 6, 9, 12, 15],
 [4, 8, 12, 16, 20],
 [5, 10, 15, 20, 25],
 [6, 12, 18, 24, 30],
 [7, 14, 21, 28, 35],
 [8, 16, 24, 32, 40],
 [9, 18, 27, 36, 45],
 [10, 20, 30, 40, 50]]

[[int(i) for i in line.strip().split(",")] for line in open("tables1.csv")]

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-207-112f44a4857d> in <module>()
----> 1 [[int(i) for i in line.strip().split(",")] for line in open("tables1.csv")]

<ipython-input-207-112f44a4857d> in <listcomp>(.0)
----> 1 [[int(i) for i in line.strip().split(",")] for line in open("tables1.csv")]

<ipython-input-207-112f44a4857d> in <listcomp>(.0)
----> 1 [[int(i) for i in line.strip().split(",")] for line in open("tables1.csv")]

ValueError: invalid literal for int() with base 10: ''

import sys
def parseint(strnum):
    try:
        return int(strnum)
    except ValueError as e:
        sys.stderr.write("Error in parsing '{}' as int\n".format(strnum))
        return 0
    finally:
        pass

[[parseint(i) for i in line.strip().split(",")] for line in open("tables1.csv")]

Error in parsing '' as int
Error in parsing 'Nan' as int
Error in parsing '' as int

[[1, 2, 3, 4, 5],
 [2, 4, 6, 0, 10],
 [3, 6, 9, 12, 15],
 [4, 8, 12, 16, 20],
 [5, 10, 15, 20, 25],
 [6, 12, 18, 24, 30],
 [7, 0, 21, 28, 35],
 [8, 16, 24, 32, 40],
 [9, 18, 27, 36, 0],
 [10, 20, 30, 40, 50]]

Commandline applications¶

%%file grep.py
import argparse

def parse_arguments():
    parser = argparse.ArgumentParser()
    parser.add_argument("pattern", type=str,
                       help="Pattern which you want to search")
    parser.add_argument("file", type=str,
                       help="File in which pattern will be searched")
    return parser.parse_args()


if __name__ == "__main__":
    args = parse_arguments()
    print(args)

Writing grep.py

!python grep.py

usage: grep.py [-h] pattern file
grep.py: error: the following arguments are required: pattern, file

!python grep.py -h

usage: grep.py [-h] pattern file

positional arguments:
  pattern     Pattern which you want to search
  file        File in which pattern will be searched

optional arguments:
  -h, --help  show this help message and exit

%%file grep.py
import argparse

def parse_arguments():
    parser = argparse.ArgumentParser()
    parser.add_argument("pattern", type=str,
                       help="Pattern which you want to search")
    parser.add_argument("file", type=str,
                       help="File in which pattern will be searched")
    
    parser.add_argument("-v", "--invert-match", 
                       action="store_true",
                       help="Prints lines which do not match with pattern")
    return parser.parse_args()


if __name__ == "__main__":
    args = parse_arguments()
    print(args)

Overwriting grep.py

!python grep.py -h

usage: grep.py [-h] [-v] pattern file

positional arguments:
  pattern             Pattern which you want to search
  file                File in which pattern will be searched

optional arguments:
  -h, --help          show this help message and exit
  -v, --invert-match  Prints lines which do not match with pattern

!python grep.py "Tim" data.txt

Namespace(file='data.txt', invert_match=False, pattern='Tim')

%%file grep.py
import argparse


def grep(file, pattern):
    for line in open(file):
        if pattern in line:
            print(line, end="")
            
def invertgrep(file, patter):
    for line in open(file):
        if patter not in line:
            print(line, end="")

def parse_arguments():
    parser = argparse.ArgumentParser()
    parser.add_argument("pattern", type=str,
                       help="Pattern which you want to search")
    parser.add_argument("file", type=str,
                       help="File in which pattern will be searched")
    
    parser.add_argument("-v", "--invert_match", 
                       action="store_true",
                       help="Prints lines which do not match with pattern")
    return parser.parse_args()


if __name__ == "__main__":
    args = parse_arguments()
    if args.invert_match:
        invertgrep(args.file, args.pattern)
    else:
        grep(args.file, args.pattern)

Overwriting grep.py

!python grep.py "Tim" data.txt

The Zen of Python, by Tim Peters

!python grep.py -v "Tim" data.txt

Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
Special cases aren't special enough to break the rules.
Although practicality beats purity.
Errors should never pass silently.
Unless explicitly silenced.
In the face of ambiguity, refuse the temptation to guess.
There should be one-- and preferably only one --obvious way to do it.
Although that way may not be obvious at first unless you're Dutch.
Now is better than never.
Although never is often better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it may be a good idea.
Namespaces are one honking great idea -- let's do more of those!

!python grep.py -d "Tim" data.txt

usage: grep.py [-h] [-v] pattern file
grep.py: error: unrecognized arguments: -d

problem

implement a command fib.py with following options

python fib.py [-h] [-s] n
-s  --series this prints all fibionacci numbers less than n

%%file head.py
import argparse

def head(filename, n):
    f = open(filename)
    for i in range(n):
        print(f.readline(), end="")

def parse_arguments():
    parser = argparse.ArgumentParser()
    parser.add_argument("file", type=str,
                       help="File whose lines will be displayed")
    
    parser.add_argument("-n", "--lines", 
                       type=int,default=5,
                       help="These many lines will be printed from start of file")
    return parser.parse_args()      
    
    
    
if __name__ == "__main__":
    args = parse_arguments()
    print(args)
    head(args.file, args.lines)

Overwriting head.py

!python head.py -h

usage: head.py [-h] [-n LINES] file

positional arguments:
  file                  File whose lines will be displayed

optional arguments:
  -h, --help            show this help message and exit
  -n LINES, --lines LINES
                        These many lines will be printed from start of file

!python head.py -n 3 data.txt

Namespace(file='data.txt', lines=3)
The Zen of Python, by Tim Peters

Beautiful is better than ugly.

!python head.py data.txt

Namespace(file='data.txt', lines=5)
The Zen of Python, by Tim Peters

Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.

References¶

SICP structure and interpretation of computer programming
https://docs.python.org/3/
https://anandology.com/python-practice-book/
for advanced python google for David Beazly , look for his "python cookbook", videos
for data sciences learn numpy, matplotlib, pandas