Advanced Python Training at VMWare - Day 2¶

Nov 20-22, 2017 Vikrant Patil

These notes will be available online at http://notes.pipal.in/2017/vmware-nov-advpython/ after this training!

Example: converting functions to commands¶

%%file functions.py

def cat(filename):
    """
    prints given file to standard output
    """
    print(open(filename).read())
    
    
def head(filename, n):
    """ 
    prints first n lines from given file to standard output
    """
    
    with open(filename) as f:
        for i in range(n):
            print(f.readline(), end="")

def grep(pattern, filename):
    """
    looks for pattern in given file
    """
    for line in open(filename):
        if pattern in line:
            print(line.strip())

Overwriting functions.py

import functions

functions.head("day1.html", 3)

<!DOCTYPE html>
<html>
<head><meta charset="utf-8" />

functions.grep("html", "day1.html")

<!DOCTYPE html>
<html>
html {
html input[type="button"],
html input[disabled] {
/*! Source: https://github.com/h5bp/html5-boilerplate/blob/master/src/css/main.css */
html {
.fa-html5:before {
-khtml-user-select: none;
div.output_area .rendered_html table {
div.output_area .rendered_html img {
.rendered_html {
.rendered_html em {
.rendered_html strong {
.rendered_html u {
.rendered_html :link {
.rendered_html :visited {
.rendered_html h1 {
.rendered_html h2 {
.rendered_html h3 {
.rendered_html h4 {
.rendered_html h5 {
.rendered_html h6 {
.rendered_html h1:first-child {
.rendered_html h2:first-child {
.rendered_html h3:first-child {
.rendered_html h4:first-child {
.rendered_html h5:first-child {
.rendered_html h6:first-child {
.rendered_html ul {
.rendered_html ul ul {
.rendered_html ul ul ul {
.rendered_html ol {
.rendered_html ol ol {
.rendered_html ol ol ol {
.rendered_html ol ol ol ol {
.rendered_html ol ol ol ol ol {
.rendered_html * + ul {
.rendered_html * + ol {
.rendered_html hr {
.rendered_html pre {
.rendered_html pre,
.rendered_html code {
.rendered_html blockquote {
.rendered_html table {
.rendered_html tr,
.rendered_html th,
.rendered_html td {
.rendered_html td,
.rendered_html th {
.rendered_html th {
.rendered_html * + table {
.rendered_html p {
.rendered_html * + p {
.rendered_html img {
.rendered_html * + img {
.rendered_html img,
.rendered_html svg {
.rendered_html img.unconfined,
.rendered_html svg.unconfined {
.text_cell.rendered .rendered_html {
<!-- Custom stylesheet, it must be in the same directory as the html file -->
<div class="text_cell_render border-box-sizing rendered_html">
<p><a href="http://notes.pipal.in/2017/vmware-nov-advpython/day1.html">Day 1</a> | <a href="http://notes.pipal.in/2017/vmware-nov-advpython/day2.html">Day 2</a> | <a href="http://notes.pipal.in/2017/vmware-nov-advpython/day3.html">Day 3</a></p>
<div class="text_cell_render border-box-sizing rendered_html">
<div class="text_cell_render border-box-sizing rendered_html">
<div class="text_cell_render border-box-sizing rendered_html">
<div class="text_cell_render border-box-sizing rendered_html">
<div class="text_cell_render border-box-sizing rendered_html">
<div class="text_cell_render border-box-sizing rendered_html">
<div class="text_cell_render border-box-sizing rendered_html">
<div class="text_cell_render border-box-sizing rendered_html">
<div class="text_cell_render border-box-sizing rendered_html">
<div class="text_cell_render border-box-sizing rendered_html">
<div class="text_cell_render border-box-sizing rendered_html">
<div class="text_cell_render border-box-sizing rendered_html">
<div class="text_cell_render border-box-sizing rendered_html">
<pre>&#39;day1.html&#39;</pre>
<div class="text_cell_render border-box-sizing rendered_html">
<div class="text_cell_render border-box-sizing rendered_html">
<div class="text_cell_render border-box-sizing rendered_html">
<div class="text_cell_render border-box-sizing rendered_html">
<div class="text_cell_render border-box-sizing rendered_html">
<div class="text_cell_render border-box-sizing rendered_html">
</html>

s = "  hello \n"

print(s)

  hello

s.strip()

'hello'

%%file cmdline.py
import sys

commands = {}

def command(f):
    commands[f.__qualname__] = f
    return f

def main():
    cmdname = sys.argv[1]
    args = sys.argv[2:]
    cmd = commands[cmdname]
    cmd(*args)

Overwriting cmdline.py

%%file function_commands.py

from cmdline import command, main

@command
def cat(filename):
    """
    prints given file to standard output
    """
    print(open(filename).read())
    
@command    
def head(filename, n):
    """ 
    prints first n lines from given file to standard output
    """
    
    with open(filename) as f:
        for i in range(n):
            print(f.readline(), end="")

@command
def grep(pattern, filename):
    """
    looks for pattern in given file
    """
    for line in open(filename):
        if pattern in line:
            print(line.strip())
            
if __name__ == "__main__":            
    main()

Writing function_commands.py

!python function_commands.py cat data.csv

A1,B1,C1
A2,B2,C2
A3,B3,C3
A4,B4,C4

!python function_commands.py grep def cmdline.py

def command(f):
def main():

functions.grep.__doc__

'\n    looks for pattern in given file\n    '

%%file cmdline.py
import sys

commands = {}

def command(f):
    commands[f.__qualname__] = f
    return f

def help_():
    print("Following commands are available")
    for name, func in commands.items():
        print(name.rjust(6), ":" ,func.__doc__.strip())

def main():
    cmdname = sys.argv[1]
    if cmdname == "help":
        help_()
    else:
        args = sys.argv[2:]
        cmd = commands[cmdname]
        cmd(*args)

Overwriting cmdline.py

!python function_commands.py help

Following commands are available
   cat : prints given file to standard output
  head : prints first n lines from given file to standard output
  grep : looks for pattern in given file

Decorators taking arguments¶

@with_retries(retries=5, dealy=0.1)
def wget(url):
    ...

@debug(prefix)
def fib(n):

@login_required(role="admin")
def edit_interface(..)

import time

def with_retries(retries=5, delay=0):
    def decor(f):
        
        def wrapper(*args):
            print("retires = {0}, delay ={1}".format(retries, delay))
            for i in range(retries):
                try: 
                    return f(*args)
                except Exception as e:
                    print(f.__name__, args, "failed:", e)
                time.sleep(delay)
            print("Giving up!")
        
        return wrapper
    
    return decor
    
    
from urllib.request import urlopen

@with_retries(retries=3, delay=0.5)
def wget(url):
    response = urlopen(url)
    if response:
        return response.read()

wget("http://google.com/nosuchpage")

retires = 3, delay =0.5
wget ('http://google.com/nosuchpage',) failed: HTTP Error 404: Not Found
wget ('http://google.com/nosuchpage',) failed: HTTP Error 404: Not Found
wget ('http://google.com/nosuchpage',) failed: HTTP Error 404: Not Found
Giving up!

#ddecor_fun = with_retries()

#wget ddecor_fun(wget)

from functools import partial
import time

def with_retries(f=None, retries=5, delay=0):
    if f is None:
        return partial(with_retries, retries=retries, delay=delay)
    
    def g(*args):
        print("retires = {0}, delay ={1}".format(retries, delay))
        for i in range(retries):
            try: 
                return f(*args)
            except Exception as e:
                print(f.__name__, args, "failed:", e)
            time.sleep(delay)
        print("Giving up!")
    
    return g

from urllib.request import urlopen

@with_retries(retries=3, delay=0.5)
def wget(url):
    response = urlopen(url)
    if response:
        return response.read()

wget("http://google.com/noptapage")

retires = 3, delay =0.5
wget ('http://google.com/noptapage',) failed: HTTP Error 404: Not Found
wget ('http://google.com/noptapage',) failed: HTTP Error 404: Not Found
wget ('http://google.com/noptapage',) failed: HTTP Error 404: Not Found
Giving up!

import fib

help(fib.fib)

Help on function g in module trace:

g(*args)

!cat fib.py

import sys
from trace import trace

@trace
def fib(n):
    """
    computes nth fibonacci number
    """
    if n in [1,2]:
        return 1
    else:
        return fib(n-1)+fib(n-2)

    
def main():
    n = int(sys.argv[1])
    fib(n)
    
if __name__ == "__main__":
    main()

from fib import fib

fib.__name__

'g'

help(fib)

Help on function g in module trace:

g(*args)

from functools import wraps

def debug(f):
    
    @wraps(f)
    def wrapper(*args):
        print("DEBUG ", f.__name__)
        return f(*args)
    
    return wrapper

@debug
def add(x,y):
    """
    adds two entities
    """
    return x+y

help(add)

Help on function add in module __main__:

add(x, y)
    adds two entities

add.__name__

'add'

Iterators and Generators¶

for n in [1,2,3,4,5]:
    print(n)

for s in "string":
    print(s)

s
t
r
i
n
g

for key in {"one":1, "two":2}:
    print(key)

one
two

%%file nums.txt
one
two
three
four

Writing nums.txt

for line in open("nums.txt"):
    print(line.strip())

one
two
three
four

The Iteration Protocol¶

items = [1,2,3]

itr = iter(items)

next(itr)

1

next(itr)

2

next(itr)

3

next(itr)

---------------------------------------------------------------------------
StopIteration                             Traceback (most recent call last)
<ipython-input-56-94b7b2f7f392> in <module>()
----> 1 next(itr)

StopIteration:

Generators¶

def squares(n):
    for i in range(1,n+1):
        yield i*i

sqr = squares(4)

next(sqr)

1

for s in sqr:
    print(s)

4
9
16

def squares(n):
    print("inside squares")
    for i in range(1,n+1):
        print("Computing square of ", i)
        yield i*i
        print("back after yield")
    print("Finished squares")

sqr4 = squares(4)

sqr4

<generator object squares at 0x7f1e440aa830>

next(sqr4)

inside squares
Computing square of  1

1

next(sqr4)

back after yield
Computing square of  2

4

next(sqr4)

back after yield
Computing square of  3

9

next(sqr4)

back after yield
Computing square of  4

16

next(sqr4)

back after yield
Finished squares

---------------------------------------------------------------------------
StopIteration                             Traceback (most recent call last)
<ipython-input-69-af27318d178c> in <module>()
----> 1 next(sqr4)

StopIteration:

for i in squares(4):
    print(i)

inside squares
Computing square of  1
1
back after yield
Computing square of  2
4
back after yield
Computing square of  3
9
back after yield
Computing square of  4
16
back after yield
Finished squares

def f():
    for i in range(10000):
        if i==6:
            return 
        yield i*i

g = f()

for item in g:
    print(item)

0
1
4
9
16
25

problem Write a generator countdown which will take a number n as argument and generate sequence of integers starting from n and ending at 1

>>> for i in countdown(3):
...     print(i)
3
2
1

problem Write a generator triangular that takes a number n as argument and generate sequence of first n triangular numbers. nth triangular number is sum of first n natural numbers.

>>> for t in triangular(5):
...     print(t, end=",")
1,3,6,10,15

bonus problem : remove duplicates from a sequence while maintaining the order. can same generators be used to remove duplicate lines from a file?

>>> for item in consumedup([3,5,3,4,5,6,7,8,8,9])
...     print(item, end=",")
3,5,4,6,8,9,

def countdown(n):
    
    while n >0:
        yield n
        n -= 1

for i in countdown(5):
    print(i, end=",")

5,4,3,2,1,

def triangular(n):
    for i in range(1, n+1):
        yield sum(range(1,i+1))

for t in triangular(10):
    print(t, end=",")

1,3,6,10,15,21,28,36,45,55,

def consumedup(seq):
    seen = set()
    for item in seq:
        if item not in seen:
            yield item
            seen.add(item)

"".join([c for c in consumedup("This statement has few chars repeated!")])

'This taemnfwcrpd!'

[n*n for n in range(1,11)]

[1, 4, 9, 16, 25, 36, 49, 64, 81, 100]

s = (n*n for n in range(1,100000000))

next(s)

1

for item in s:
    if item == 625:
        break
    print(item, end=",")

4,9,16,25,36,49,64,81,100,121,144,169,196,225,256,289,324,361,400,441,484,529,576,

next(s)

676

sum(s)

333333328333333350000000

def cdown(n):
    return (n-i for i in range(n))

for i in cdown(10):
    print(i, end=",")

10,9,8,7,6,5,4,3,2,1,

sum((n*n for n in range(1,10)))

285

sum(n*n for n in range(1,10))

285

What is the advantage?¶

evaluation is lazy, need based
you can build lazy pipelines of data processing
simple interface of iteration protocol is what visible to user, every other complication is encapsulated inside generator function

Example : Building data pipelines¶

import os
def find(root):
    for path, dirnames, filenames in os.walk(root):
        for f in filenames:
            yield os.path.join(path, f)

def take(n , seq):
    it = iter(seq)
    return list([next(it) for i in range(n)])

def integers():
    """
    generates infinite sequence of natural numbers
    """
    i = 0
    while True:
        yield i
        i += 1
        
def squares(numbers):
    return (n*n for n in numbers)

take(10, squares(integers()))

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

def grep(pattern, seq):
    return (x for x in seq if pattern in x)

files = find(".")
pyfiles = grep(".py", files)

print(take(10, pyfiles))

---------------------------------------------------------------------------
StopIteration                             Traceback (most recent call last)
<ipython-input-14-9b57db6b6cb0> in <module>()
----> 1 print(take(10, pyfiles))

<ipython-input-5-5cedf2c40eed> in take(n, seq)
      1 def take(n , seq):
      2     it = iter(seq)
----> 3     return list([next(it) for i in range(n)])

<ipython-input-5-5cedf2c40eed> in <listcomp>(.0)
      1 def take(n , seq):
      2     it = iter(seq)
----> 3     return list([next(it) for i in range(n)])

StopIteration:

def count(seq):
    i = 0
    for x in seq:
        i += 1
    return i

count(range(10))

10

files = find(".")
pyfiles = grep(".py", files)
print(count(pyfiles))

17

def readlines(filenames):
    """
    gives iterator over lines in all files in filenames
    """
    for f in filenames:
        for line in open(f):
            yield line

files = find(".")
pyfiles = grep(".py", files)
lines = readlines(pyfiles)
print(count(lines))

261

files = find(".")
pyfiles = grep(".py", files)
lines = readlines(pyfiles)
functions = grep("def ", lines)
print(count(functions))

37

!yes "hello python this is python" | head -n 10000000 > /tmp/big1.txt
!yes "hello python this is python" | head -n 10000000 > /tmp/big2.txt
!yes "hello python this is python" | head -n 10000000 > /tmp/big3.txt
!yes "hello python this is python" | head -n 10000000 > /tmp/big4.txt
!yes "hello python this is python" | head -n 10000000 > /tmp/big5.txt
!echo "somepattern" >> /tmp/big5.txt

yes: standard output: Broken pipe
yes: standard output: Broken pipe
yes: standard output: Broken pipe
yes: standard output: Broken pipe
yes: standard output: Broken pipe

lines = readlines(grep(".txt", find("/tmp")))

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-514286363ad2> in <module>()
----> 1 lines = readlines(grep(".txt", find("/tmp")))
      2 grep("somepattern", lines)

NameError: name 'readlines' is not defined

take(1, grep("somepattern", lines))

['somepattern\n']

problem Write a function get_paragraphs to split given text into paragraphs. The function sholuld take a sequence of lines as argument and return a sequence of paragraphs. paragraphs are seperated by empty lines.

sample data is http://anandology.com/tmp/pg1342.txt

once the function is ready , we should be able to find

number of paragraphs
longest paragraph

def get_paragraphs(lines):
    para = []
    for line in lines:
        if line.strip()=="":
            if para:
                yield "".join(para)
                para = []
        else:
            para.append(line)
            
    if para:
        yield "".join(para)

p = get_paragraphs(open("pg1342.txt"))

p

<generator object get_paragraphs at 0x7f1e3575f4c0>

count(p)

2189

p = get_paragraphs(open("pg1342.txt"))

max(p, key=len)

Writing commandline applications¶

!ls

backup1.py  day1.ipynb		  functions.py	push		 trace.py
backup2.py  day2.html		  Makefile	sq1.py		 weekday.py
backup.py   day2.ipynb		  memoize.py	sq2.py
cmdline.py  fib1.py		  module.py	sq3.py
data.csv    fib.py		  nums.txt	sum.py
day1.html   function_commands.py  pg1342.txt	test_weekday.py

!ls /home/

lost+found  vikrant

!cp day1.html /tmp/

!ls -l

total 1660
-rw-rw-r-- 1 vikrant vikrant    148 Nov 20 16:54 backup1.py
-rw-rw-r-- 1 vikrant vikrant    235 Nov 20 16:56 backup2.py
-rw-rw-r-- 1 vikrant vikrant    145 Nov 20 16:51 backup.py
-rw-rw-r-- 1 vikrant vikrant    414 Nov 21 10:58 cmdline.py
-rw-rw-r-- 1 vikrant vikrant     35 Nov 20 10:21 data.csv
-rw-rw-r-- 1 vikrant vikrant 424641 Nov 20 17:06 day1.html
-rw-rw-r-- 1 vikrant vikrant  84469 Nov 20 17:06 day1.ipynb
-rw-rw-r-- 1 vikrant vikrant 339862 Nov 21 15:41 day2.html
-rw-rw-r-- 1 vikrant vikrant  43244 Nov 21 15:40 day2.ipynb
-rw-rw-r-- 1 vikrant vikrant    317 Nov 20 16:15 fib1.py
-rw-rw-r-- 1 vikrant vikrant    280 Nov 20 15:46 fib.py
-rw-rw-r-- 1 vikrant vikrant    619 Nov 21 10:12 function_commands.py
-rw-rw-r-- 1 vikrant vikrant    521 Nov 21 10:02 functions.py
-rw-r--r-- 1 vikrant vikrant    617 Nov 20 09:19 Makefile
-rw-rw-r-- 1 vikrant vikrant    163 Nov 20 16:11 memoize.py
-rw-rw-r-- 1 vikrant vikrant     49 Nov 20 16:49 module.py
-rw-rw-r-- 1 vikrant vikrant     18 Nov 21 12:10 nums.txt
-rw-rw-r-- 1 vikrant vikrant 717574 Sep 18  2016 pg1342.txt
-rw-rw-r-- 1 vikrant vikrant      0 Nov 21 15:41 push
-rw-rw-r-- 1 vikrant vikrant    103 Nov 20 16:26 sq1.py
-rw-rw-r-- 1 vikrant vikrant    127 Nov 20 16:29 sq2.py
-rw-rw-r-- 1 vikrant vikrant    240 Nov 20 16:30 sq3.py
-rw-rw-r-- 1 vikrant vikrant    183 Nov 20 15:41 sum.py
-rw-rw-r-- 1 vikrant vikrant    356 Nov 20 16:43 test_weekday.py
-rw-rw-r-- 1 vikrant vikrant    358 Nov 20 15:40 trace.py
-rw-rw-r-- 1 vikrant vikrant    169 Nov 20 16:39 weekday.py

!ls --help

Usage: ls [OPTION]... [FILE]...
List information about the FILEs (the current directory by default).
Sort entries alphabetically if none of -cftuvSUX nor --sort is specified.

Mandatory arguments to long options are mandatory for short options too.
  -a, --all                  do not ignore entries starting with .
  -A, --almost-all           do not list implied . and ..
      --author               with -l, print the author of each file
  -b, --escape               print C-style escapes for nongraphic characters
      --block-size=SIZE      scale sizes by SIZE before printing them; e.g.,
                               '--block-size=M' prints sizes in units of
                               1,048,576 bytes; see SIZE format below
  -B, --ignore-backups       do not list implied entries ending with ~
  -c                         with -lt: sort by, and show, ctime (time of last
                               modification of file status information);
                               with -l: show ctime and sort by name;
                               otherwise: sort by ctime, newest first
  -C                         list entries by columns
      --color[=WHEN]         colorize the output; WHEN can be 'always' (default
                               if omitted), 'auto', or 'never'; more info below
  -d, --directory            list directories themselves, not their contents
  -D, --dired                generate output designed for Emacs' dired mode
  -f                         do not sort, enable -aU, disable -ls --color
  -F, --classify             append indicator (one of */=>@|) to entries
      --file-type            likewise, except do not append '*'
      --format=WORD          across -x, commas -m, horizontal -x, long -l,
                               single-column -1, verbose -l, vertical -C
      --full-time            like -l --time-style=full-iso
  -g                         like -l, but do not list owner
      --group-directories-first
                             group directories before files;
                               can be augmented with a --sort option, but any
                               use of --sort=none (-U) disables grouping
  -G, --no-group             in a long listing, don't print group names
  -h, --human-readable       with -l and/or -s, print human readable sizes
                               (e.g., 1K 234M 2G)
      --si                   likewise, but use powers of 1000 not 1024
  -H, --dereference-command-line
                             follow symbolic links listed on the command line
      --dereference-command-line-symlink-to-dir
                             follow each command line symbolic link
                               that points to a directory
      --hide=PATTERN         do not list implied entries matching shell PATTERN
                               (overridden by -a or -A)
      --indicator-style=WORD  append indicator with style WORD to entry names:
                               none (default), slash (-p),
                               file-type (--file-type), classify (-F)
  -i, --inode                print the index number of each file
  -I, --ignore=PATTERN       do not list implied entries matching shell PATTERN
  -k, --kibibytes            default to 1024-byte blocks for disk usage
  -l                         use a long listing format
  -L, --dereference          when showing file information for a symbolic
                               link, show information for the file the link
                               references rather than for the link itself
  -m                         fill width with a comma separated list of entries
  -n, --numeric-uid-gid      like -l, but list numeric user and group IDs
  -N, --literal              print raw entry names (don't treat e.g. control
                               characters specially)
  -o                         like -l, but do not list group information
  -p, --indicator-style=slash
                             append / indicator to directories
  -q, --hide-control-chars   print ? instead of nongraphic characters
      --show-control-chars   show nongraphic characters as-is (the default,
                               unless program is 'ls' and output is a terminal)
  -Q, --quote-name           enclose entry names in double quotes
      --quoting-style=WORD   use quoting style WORD for entry names:
                               literal, locale, shell, shell-always,
                               shell-escape, shell-escape-always, c, escape
  -r, --reverse              reverse order while sorting
  -R, --recursive            list subdirectories recursively
  -s, --size                 print the allocated size of each file, in blocks
  -S                         sort by file size, largest first
      --sort=WORD            sort by WORD instead of name: none (-U), size (-S),
                               time (-t), version (-v), extension (-X)
      --time=WORD            with -l, show time as WORD instead of default
                               modification time: atime or access or use (-u);
                               ctime or status (-c); also use specified time
                               as sort key if --sort=time (newest first)
      --time-style=STYLE     with -l, show times using style STYLE:
                               full-iso, long-iso, iso, locale, or +FORMAT;
                               FORMAT is interpreted like in 'date'; if FORMAT
                               is FORMAT1<newline>FORMAT2, then FORMAT1 applies
                               to non-recent files and FORMAT2 to recent files;
                               if STYLE is prefixed with 'posix-', STYLE
                               takes effect only outside the POSIX locale
  -t                         sort by modification time, newest first
  -T, --tabsize=COLS         assume tab stops at each COLS instead of 8
  -u                         with -lt: sort by, and show, access time;
                               with -l: show access time and sort by name;
                               otherwise: sort by access time, newest first
  -U                         do not sort; list entries in directory order
  -v                         natural sort of (version) numbers within text
  -w, --width=COLS           set output width to COLS.  0 means no limit
  -x                         list entries by lines instead of by columns
  -X                         sort alphabetically by entry extension
  -Z, --context              print any security context of each file
  -1                         list one file per line.  Avoid '\n' with -q or -b
      --help     display this help and exit
      --version  output version information and exit

The SIZE argument is an integer and optional unit (example: 10K is 10*1024).
Units are K,M,G,T,P,E,Z,Y (powers of 1024) or KB,MB,... (powers of 1000).

Using color to distinguish file types is disabled both by default and
with --color=never.  With --color=auto, ls emits color codes only when
standard output is connected to a terminal.  The LS_COLORS environment
variable can change the settings.  Use the dircolors command to set it.

Exit status:
 0  if OK,
 1  if minor problems (e.g., cannot access subdirectory),
 2  if serious trouble (e.g., cannot access command-line argument).

GNU coreutils online help: <http://www.gnu.org/software/coreutils/>
Full documentation at: <http://www.gnu.org/software/coreutils/ls>
or available locally via: info '(coreutils) ls invocation'

%%file fib.py
import argparse

def fib(n):
    prev, current = 1, 1
    for i in range(2, n):
        current, prev = prev+current, current
        
    return current

def parse_args():
    p = argparse.ArgumentParser()
    p.add_argument("n", help="n for computing nth fibonacci number",
                    type=int)
    return p.parse_args()

def main():
    args = parse_args()
    print(args)
    print(fib(args.n))
    
if __name__ == "__main__":
    main()

Overwriting fib.py

!python fib.py

usage: fib.py [-h] n
fib.py: error: the following arguments are required: n

!python fib.py -h

usage: fib.py [-h] n

positional arguments:
  n           n for computing nth fibonacci number

optional arguments:
  -h, --help  show this help message and exit

!python fib.py 10

Namespace(n=10)
55

%%file fib.py
import argparse

def fib(n):
    prev, current = 1, 1
    for i in range(2, n):
        current, prev = prev+current, current
        
    return current

def print_fiblist(n):
    prev, current = 1, 1
    for i in range(2, n):
        current, prev = prev+current, current
        print(current, end=" ")

def parse_args():
    p = argparse.ArgumentParser()
    p.add_argument("n", help="n for computing nth fibonacci number",
                    type=int)
    p.add_argument("-s", "--sequence",
                  help="Print sequence",
                  action="store_true")
    
    return p.parse_args()

def main():
    args = parse_args()
    if args.sequence:
        print_fiblist(args.n)
    else:
        print(fib(args.n))
    
if __name__ == "__main__":
    main()

Overwriting fib.py

!python fib.py -h

usage: fib.py [-h] [-s] n

positional arguments:
  n               n for computing nth fibonacci number

optional arguments:
  -h, --help      show this help message and exit
  -s, --sequence  Print sequence

!python fib.py -s 10

2 3 5 8 13 21 34 55

!python fib.py 10

55

regular expressions¶

import re

pattern = re.compile("^def")

s = "def "
m = pattern.match(s)
print(m)
s = " def "
m = pattern.match(s)
print(m)

<_sre.SRE_Match object; span=(0, 3), match='def'>
None

s = """
def add(x,y):
    def should start at start of line
    return x+y
    
    
defination of add is shown above
"""

[line for line in s.split("\n") if pattern.match(line)]

['def add(x,y):', 'defination of add is shown above']

pattern = re.compile("[a-zA-Z]{5,10}[\d]{2}$")

print(pattern.match("hello"))

None

print(pattern.match("hello12"))

<_sre.SRE_Match object; span=(0, 7), match='hello12'>

print(pattern.match("hello123"))

None

print(list(range(5)))

[0, 1, 2, 3, 4]

str(list(range(3)))

'[0, 1, 2]'

p = re.compile("\[(\d), (\d), (\d)\]")

l = str(list(range(3)))

l

'[0, 1, 2]'

m = p.match(l)

m

<_sre.SRE_Match object; span=(0, 9), match='[0, 1, 2]'>

m.groups()

('0', '1', '2')

Working with databases¶

import sqlite3

conn = sqlite3.connect("a.db")
cur = conn.cursor()
result = cur.execute("select * from person")
result.fetchall()

[('alice', 'alice@example.com')]

def find_person(email):
    #not right way to do?
    q = "select * from person where email='{}'".format(email)
    print(q)
    cur = conn.cursor()
    result = cur.execute(q)
    return result.fetchall()

find_person("alice@example.com")

select * from person where email='alice@example.com'

[('alice', 'alice@example.com')]

def find_person(email):
    query = "select * from person where email=?"
    cur = conn.cursor()
    result = cur.execute(query, (email, ))
    return result.fetchall()

find_person("alice@example.com")

[('alice', 'alice@example.com')]

def query(conn, querystring, params):
    cur = conn.cursor()
    result = cur.execute(querystring, params)
    return result.fetchall()

query(conn, "select * from person where name=?", params=("alice",))

[('alice', 'alice@example.com')]

conn.close()

conn = sqlite3.connect("a.db")

cur = conn.cursor()
persons = [
    ("dilbert", "dilbert@dilbert.com"),
    ("calvin", "calvin@calvinhobes.com"),
    ("jerry", "jerry@disney.com")
]

cur.executemany("insert into person values(?,?)", persons)

<sqlite3.Cursor at 0x7f1e35755d50>

conn.commit()

conn.close()

conn = sqlite3.connect("a.db")
cur = conn.cursor()
for name, email in cur.execute("select * from person ORDER by name"):
    print(name.rjust(10), email)

     alice alice@example.com
    calvin calvin@calvinhobes.com
   dilbert dilbert@dilbert.com
     jerry jerry@disney.com