Advanced Python Training at VMWare - Day 2

Nov 20-22, 2017 Vikrant Patil

These notes will be available online at http://notes.pipal.in/2017/vmware-nov-advpython/ after this training!

© Pipal Academy LLP

Day 1 | Day 2 | Day 3

Example: converting functions to commands

In [2]:
%%file functions.py

def cat(filename):
    """
    prints given file to standard output
    """
    print(open(filename).read())
    
    
def head(filename, n):
    """ 
    prints first n lines from given file to standard output
    """
    
    with open(filename) as f:
        for i in range(n):
            print(f.readline(), end="")

def grep(pattern, filename):
    """
    looks for pattern in given file
    """
    for line in open(filename):
        if pattern in line:
            print(line.strip())
            
            
Overwriting functions.py
In [3]:
import functions
In [4]:
functions.head("day1.html", 3)
<!DOCTYPE html>
<html>
<head><meta charset="utf-8" />
In [5]:
functions.grep("html", "day1.html")
<!DOCTYPE html>
<html>
html {
html input[type="button"],
html input[disabled] {
/*! Source: https://github.com/h5bp/html5-boilerplate/blob/master/src/css/main.css */
html {
.fa-html5:before {
-khtml-user-select: none;
div.output_area .rendered_html table {
div.output_area .rendered_html img {
.rendered_html {
.rendered_html em {
.rendered_html strong {
.rendered_html u {
.rendered_html :link {
.rendered_html :visited {
.rendered_html h1 {
.rendered_html h2 {
.rendered_html h3 {
.rendered_html h4 {
.rendered_html h5 {
.rendered_html h6 {
.rendered_html h1:first-child {
.rendered_html h2:first-child {
.rendered_html h3:first-child {
.rendered_html h4:first-child {
.rendered_html h5:first-child {
.rendered_html h6:first-child {
.rendered_html ul {
.rendered_html ul ul {
.rendered_html ul ul ul {
.rendered_html ol {
.rendered_html ol ol {
.rendered_html ol ol ol {
.rendered_html ol ol ol ol {
.rendered_html ol ol ol ol ol {
.rendered_html * + ul {
.rendered_html * + ol {
.rendered_html hr {
.rendered_html pre {
.rendered_html pre,
.rendered_html code {
.rendered_html blockquote {
.rendered_html table {
.rendered_html tr,
.rendered_html th,
.rendered_html td {
.rendered_html td,
.rendered_html th {
.rendered_html th {
.rendered_html * + table {
.rendered_html p {
.rendered_html * + p {
.rendered_html img {
.rendered_html * + img {
.rendered_html img,
.rendered_html svg {
.rendered_html img.unconfined,
.rendered_html svg.unconfined {
.text_cell.rendered .rendered_html {
<!-- Custom stylesheet, it must be in the same directory as the html file -->
<div class="text_cell_render border-box-sizing rendered_html">
<p><a href="http://notes.pipal.in/2017/vmware-nov-advpython/day1.html">Day 1</a> | <a href="http://notes.pipal.in/2017/vmware-nov-advpython/day2.html">Day 2</a> | <a href="http://notes.pipal.in/2017/vmware-nov-advpython/day3.html">Day 3</a></p>
<div class="text_cell_render border-box-sizing rendered_html">
<div class="text_cell_render border-box-sizing rendered_html">
<div class="text_cell_render border-box-sizing rendered_html">
<div class="text_cell_render border-box-sizing rendered_html">
<div class="text_cell_render border-box-sizing rendered_html">
<div class="text_cell_render border-box-sizing rendered_html">
<div class="text_cell_render border-box-sizing rendered_html">
<div class="text_cell_render border-box-sizing rendered_html">
<div class="text_cell_render border-box-sizing rendered_html">
<div class="text_cell_render border-box-sizing rendered_html">
<div class="text_cell_render border-box-sizing rendered_html">
<div class="text_cell_render border-box-sizing rendered_html">
<div class="text_cell_render border-box-sizing rendered_html">
<pre>&#39;day1.html&#39;</pre>
<div class="text_cell_render border-box-sizing rendered_html">
<div class="text_cell_render border-box-sizing rendered_html">
<div class="text_cell_render border-box-sizing rendered_html">
<div class="text_cell_render border-box-sizing rendered_html">
<div class="text_cell_render border-box-sizing rendered_html">
<div class="text_cell_render border-box-sizing rendered_html">
</html>
In [6]:
s = "  hello \n"
In [7]:
print(s)
  hello 

In [8]:
s.strip()
Out[8]:
'hello'
In [12]:
%%file cmdline.py
import sys

commands = {}

def command(f):
    commands[f.__qualname__] = f
    return f

def main():
    cmdname = sys.argv[1]
    args = sys.argv[2:]
    cmd = commands[cmdname]
    cmd(*args)
Overwriting cmdline.py
In [10]:
%%file function_commands.py

from cmdline import command, main

@command
def cat(filename):
    """
    prints given file to standard output
    """
    print(open(filename).read())
    
@command    
def head(filename, n):
    """ 
    prints first n lines from given file to standard output
    """
    
    with open(filename) as f:
        for i in range(n):
            print(f.readline(), end="")

@command
def grep(pattern, filename):
    """
    looks for pattern in given file
    """
    for line in open(filename):
        if pattern in line:
            print(line.strip())
            
if __name__ == "__main__":            
    main()
Writing function_commands.py
In [13]:
!python function_commands.py cat data.csv
A1,B1,C1
A2,B2,C2
A3,B3,C3
A4,B4,C4
In [14]:
!python function_commands.py grep def cmdline.py
def command(f):
def main():
In [15]:
functions.grep.__doc__
Out[15]:
'\n    looks for pattern in given file\n    '
In [24]:
%%file cmdline.py
import sys

commands = {}

def command(f):
    commands[f.__qualname__] = f
    return f

def help_():
    print("Following commands are available")
    for name, func in commands.items():
        print(name.rjust(6), ":" ,func.__doc__.strip())

def main():
    cmdname = sys.argv[1]
    if cmdname == "help":
        help_()
    else:
        args = sys.argv[2:]
        cmd = commands[cmdname]
        cmd(*args)
Overwriting cmdline.py
In [25]:
!python function_commands.py help
Following commands are available
   cat : prints given file to standard output
  head : prints first n lines from given file to standard output
  grep : looks for pattern in given file

Decorators taking arguments

@with_retries(retries=5, dealy=0.1)
def wget(url):
    ...

@debug(prefix)
def fib(n):

@login_required(role="admin")
def edit_interface(..)
In [27]:
import time

def with_retries(retries=5, delay=0):
    def decor(f):
        
        def wrapper(*args):
            print("retires = {0}, delay ={1}".format(retries, delay))
            for i in range(retries):
                try: 
                    return f(*args)
                except Exception as e:
                    print(f.__name__, args, "failed:", e)
                time.sleep(delay)
            print("Giving up!")
        
        return wrapper
    
    return decor
    
    
from urllib.request import urlopen

@with_retries(retries=3, delay=0.5)
def wget(url):
    response = urlopen(url)
    if response:
        return response.read()
In [28]:
wget("http://google.com/nosuchpage")
retires = 3, delay =0.5
wget ('http://google.com/nosuchpage',) failed: HTTP Error 404: Not Found
wget ('http://google.com/nosuchpage',) failed: HTTP Error 404: Not Found
wget ('http://google.com/nosuchpage',) failed: HTTP Error 404: Not Found
Giving up!
In [29]:
#ddecor_fun = with_retries()
In [30]:
#wget ddecor_fun(wget)
In [31]:
from functools import partial
import time

def with_retries(f=None, retries=5, delay=0):
    if f is None:
        return partial(with_retries, retries=retries, delay=delay)
    
    def g(*args):
        print("retires = {0}, delay ={1}".format(retries, delay))
        for i in range(retries):
            try: 
                return f(*args)
            except Exception as e:
                print(f.__name__, args, "failed:", e)
            time.sleep(delay)
        print("Giving up!")
    
    return g
        
In [32]:
from urllib.request import urlopen

@with_retries(retries=3, delay=0.5)
def wget(url):
    response = urlopen(url)
    if response:
        return response.read()
In [33]:
wget("http://google.com/noptapage")
retires = 3, delay =0.5
wget ('http://google.com/noptapage',) failed: HTTP Error 404: Not Found
wget ('http://google.com/noptapage',) failed: HTTP Error 404: Not Found
wget ('http://google.com/noptapage',) failed: HTTP Error 404: Not Found
Giving up!
In [34]:
import fib
In [35]:
help(fib.fib)
Help on function g in module trace:

g(*args)

In [36]:
!cat fib.py
import sys
from trace import trace

@trace
def fib(n):
    """
    computes nth fibonacci number
    """
    if n in [1,2]:
        return 1
    else:
        return fib(n-1)+fib(n-2)

    
def main():
    n = int(sys.argv[1])
    fib(n)
    
if __name__ == "__main__":
    main()
In [37]:
from fib import fib
In [38]:
fib.__name__
Out[38]:
'g'
In [39]:
help(fib)
Help on function g in module trace:

g(*args)

In [ ]:
 
In [40]:
from functools import wraps
In [41]:
def debug(f):
    
    @wraps(f)
    def wrapper(*args):
        print("DEBUG ", f.__name__)
        return f(*args)
    
    return wrapper
In [42]:
@debug
def add(x,y):
    """
    adds two entities
    """
    return x+y
In [43]:
help(add)
Help on function add in module __main__:

add(x, y)
    adds two entities

In [44]:
add.__name__
Out[44]:
'add'

Iterators and Generators

In [45]:
for n in [1,2,3,4,5]:
    print(n)
1
2
3
4
5
In [46]:
for s in "string":
    print(s)
s
t
r
i
n
g
In [47]:
for key in {"one":1, "two":2}:
    print(key)
one
two
In [48]:
%%file nums.txt
one
two
three
four
Writing nums.txt
In [50]:
for line in open("nums.txt"):
    print(line.strip())
one
two
three
four

The Iteration Protocol

In [51]:
items = [1,2,3]
In [52]:
itr = iter(items)
In [53]:
next(itr)
Out[53]:
1
In [54]:
next(itr)
Out[54]:
2
In [55]:
next(itr)
Out[55]:
3
In [56]:
next(itr)
---------------------------------------------------------------------------
StopIteration                             Traceback (most recent call last)
<ipython-input-56-94b7b2f7f392> in <module>()
----> 1 next(itr)

StopIteration: 

Generators

In [57]:
def squares(n):
    for i in range(1,n+1):
        yield i*i
In [58]:
sqr = squares(4)
In [59]:
next(sqr)
Out[59]:
1
In [60]:
for s in sqr:
    print(s)
4
9
16
In [61]:
def squares(n):
    print("inside squares")
    for i in range(1,n+1):
        print("Computing square of ", i)
        yield i*i
        print("back after yield")
    print("Finished squares")
In [63]:
sqr4 = squares(4)
In [64]:
sqr4
Out[64]:
<generator object squares at 0x7f1e440aa830>
In [65]:
next(sqr4)
inside squares
Computing square of  1
Out[65]:
1
In [66]:
next(sqr4)
back after yield
Computing square of  2
Out[66]:
4
In [67]:
next(sqr4)
back after yield
Computing square of  3
Out[67]:
9
In [68]:
next(sqr4)
back after yield
Computing square of  4
Out[68]:
16
In [69]:
next(sqr4)
back after yield
Finished squares
---------------------------------------------------------------------------
StopIteration                             Traceback (most recent call last)
<ipython-input-69-af27318d178c> in <module>()
----> 1 next(sqr4)

StopIteration: 
In [71]:
for i in squares(4):
    print(i)
inside squares
Computing square of  1
1
back after yield
Computing square of  2
4
back after yield
Computing square of  3
9
back after yield
Computing square of  4
16
back after yield
Finished squares
In [72]:
def f():
    for i in range(10000):
        if i==6:
            return 
        yield i*i
In [73]:
g = f()
In [74]:
for item in g:
    print(item)
0
1
4
9
16
25

problem Write a generator countdown which will take a number n as argument and generate sequence of integers starting from n and ending at 1

>>> for i in countdown(3):
...     print(i)
3
2
1

problem Write a generator triangular that takes a number n as argument and generate sequence of first n triangular numbers. nth triangular number is sum of first n natural numbers.

>>> for t in triangular(5):
...     print(t, end=",")
1,3,6,10,15

bonus problem : remove duplicates from a sequence while maintaining the order. can same generators be used to remove duplicate lines from a file?

>>> for item in consumedup([3,5,3,4,5,6,7,8,8,9])
...     print(item, end=",")
3,5,4,6,8,9,
In [75]:
def countdown(n):
    
    while n >0:
        yield n
        n -= 1
    
    
In [76]:
for i in countdown(5):
    print(i, end=",")
5,4,3,2,1,
In [77]:
def triangular(n):
    for i in range(1, n+1):
        yield sum(range(1,i+1))
In [78]:
for t in triangular(10):
    print(t, end=",")
1,3,6,10,15,21,28,36,45,55,
In [79]:
def consumedup(seq):
    seen = set()
    for item in seq:
        if item not in seen:
            yield item
            seen.add(item)
            
In [81]:
"".join([c for c in consumedup("This statement has few chars repeated!")])
Out[81]:
'This taemnfwcrpd!'
In [82]:
[n*n for n in range(1,11)]
Out[82]:
[1, 4, 9, 16, 25, 36, 49, 64, 81, 100]
In [93]:
s = (n*n for n in range(1,100000000))
In [89]:
next(s)
Out[89]:
1
In [90]:
for item in s:
    if item == 625:
        break
    print(item, end=",")
4,9,16,25,36,49,64,81,100,121,144,169,196,225,256,289,324,361,400,441,484,529,576,
In [91]:
next(s)
Out[91]:
676
In [94]:
sum(s)
Out[94]:
333333328333333350000000
In [95]:
def cdown(n):
    return (n-i for i in range(n))
In [96]:
for i in cdown(10):
    print(i, end=",")
10,9,8,7,6,5,4,3,2,1,
In [97]:
sum((n*n for n in range(1,10)))
Out[97]:
285
In [98]:
sum(n*n for n in range(1,10))
Out[98]:
285

What is the advantage?

  • evaluation is lazy, need based
  • you can build lazy pipelines of data processing
  • simple interface of iteration protocol is what visible to user, every other complication is encapsulated inside generator function

Example : Building data pipelines

In [4]:
import os
def find(root):
    for path, dirnames, filenames in os.walk(root):
        for f in filenames:
            yield os.path.join(path, f)
In [5]:
def take(n , seq):
    it = iter(seq)
    return list([next(it) for i in range(n)])
In [6]:
def integers():
    """
    generates infinite sequence of natural numbers
    """
    i = 0
    while True:
        yield i
        i += 1
        
def squares(numbers):
    return (n*n for n in numbers)
In [7]:
take(10, squares(integers()))
Out[7]:
[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
In [8]:
def grep(pattern, seq):
    return (x for x in seq if pattern in x)
In [9]:
files = find(".")
pyfiles = grep(".py", files)
In [14]:
print(take(10, pyfiles))
---------------------------------------------------------------------------
StopIteration                             Traceback (most recent call last)
<ipython-input-14-9b57db6b6cb0> in <module>()
----> 1 print(take(10, pyfiles))

<ipython-input-5-5cedf2c40eed> in take(n, seq)
      1 def take(n , seq):
      2     it = iter(seq)
----> 3     return list([next(it) for i in range(n)])

<ipython-input-5-5cedf2c40eed> in <listcomp>(.0)
      1 def take(n , seq):
      2     it = iter(seq)
----> 3     return list([next(it) for i in range(n)])

StopIteration: 
In [ ]:
def count(seq):
    i = 0
    for x in seq:
        i += 1
    return i
In [12]:
count(range(10))
Out[12]:
10
In [13]:
files = find(".")
pyfiles = grep(".py", files)
print(count(pyfiles))
17
In [112]:
def readlines(filenames):
    """
    gives iterator over lines in all files in filenames
    """
    for f in filenames:
        for line in open(f):
            yield line
In [114]:
files = find(".")
pyfiles = grep(".py", files)
lines = readlines(pyfiles)
print(count(lines))
261
In [116]:
files = find(".")
pyfiles = grep(".py", files)
lines = readlines(pyfiles)
functions = grep("def ", lines)
print(count(functions))
37
In [2]:
!yes "hello python this is python" | head -n 10000000 > /tmp/big1.txt
!yes "hello python this is python" | head -n 10000000 > /tmp/big2.txt
!yes "hello python this is python" | head -n 10000000 > /tmp/big3.txt
!yes "hello python this is python" | head -n 10000000 > /tmp/big4.txt
!yes "hello python this is python" | head -n 10000000 > /tmp/big5.txt
!echo "somepattern" >> /tmp/big5.txt
yes: standard output: Broken pipe
yes: standard output: Broken pipe
yes: standard output: Broken pipe
yes: standard output: Broken pipe
yes: standard output: Broken pipe
In [1]:
lines = readlines(grep(".txt", find("/tmp")))
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-514286363ad2> in <module>()
----> 1 lines = readlines(grep(".txt", find("/tmp")))
      2 grep("somepattern", lines)

NameError: name 'readlines' is not defined
In [119]:
take(1, grep("somepattern", lines))
Out[119]:
['somepattern\n']
In [ ]:
 

problem Write a function get_paragraphs to split given text into paragraphs. The function sholuld take a sequence of lines as argument and return a sequence of paragraphs. paragraphs are seperated by empty lines.

sample data is http://anandology.com/tmp/pg1342.txt

once the function is ready , we should be able to find

  • number of paragraphs
  • longest paragraph
In [120]:
def get_paragraphs(lines):
    para = []
    for line in lines:
        if line.strip()=="":
            if para:
                yield "".join(para)
                para = []
        else:
            para.append(line)
            
    if para:
        yield "".join(para)
In [125]:
p = get_paragraphs(open("pg1342.txt"))
In [126]:
p
Out[126]:
<generator object get_paragraphs at 0x7f1e3575f4c0>
In [127]:
count(p)
Out[127]:
2189
In [128]:
p = get_paragraphs(open("pg1342.txt"))

max(p, key=len)

Writing commandline applications

In [130]:
!ls
backup1.py  day1.ipynb		  functions.py	push		 trace.py
backup2.py  day2.html		  Makefile	sq1.py		 weekday.py
backup.py   day2.ipynb		  memoize.py	sq2.py
cmdline.py  fib1.py		  module.py	sq3.py
data.csv    fib.py		  nums.txt	sum.py
day1.html   function_commands.py  pg1342.txt	test_weekday.py
In [131]:
!ls /home/
lost+found  vikrant
In [132]:
!cp day1.html /tmp/
In [133]:
!ls -l
total 1660
-rw-rw-r-- 1 vikrant vikrant    148 Nov 20 16:54 backup1.py
-rw-rw-r-- 1 vikrant vikrant    235 Nov 20 16:56 backup2.py
-rw-rw-r-- 1 vikrant vikrant    145 Nov 20 16:51 backup.py
-rw-rw-r-- 1 vikrant vikrant    414 Nov 21 10:58 cmdline.py
-rw-rw-r-- 1 vikrant vikrant     35 Nov 20 10:21 data.csv
-rw-rw-r-- 1 vikrant vikrant 424641 Nov 20 17:06 day1.html
-rw-rw-r-- 1 vikrant vikrant  84469 Nov 20 17:06 day1.ipynb
-rw-rw-r-- 1 vikrant vikrant 339862 Nov 21 15:41 day2.html
-rw-rw-r-- 1 vikrant vikrant  43244 Nov 21 15:40 day2.ipynb
-rw-rw-r-- 1 vikrant vikrant    317 Nov 20 16:15 fib1.py
-rw-rw-r-- 1 vikrant vikrant    280 Nov 20 15:46 fib.py
-rw-rw-r-- 1 vikrant vikrant    619 Nov 21 10:12 function_commands.py
-rw-rw-r-- 1 vikrant vikrant    521 Nov 21 10:02 functions.py
-rw-r--r-- 1 vikrant vikrant    617 Nov 20 09:19 Makefile
-rw-rw-r-- 1 vikrant vikrant    163 Nov 20 16:11 memoize.py
-rw-rw-r-- 1 vikrant vikrant     49 Nov 20 16:49 module.py
-rw-rw-r-- 1 vikrant vikrant     18 Nov 21 12:10 nums.txt
-rw-rw-r-- 1 vikrant vikrant 717574 Sep 18  2016 pg1342.txt
-rw-rw-r-- 1 vikrant vikrant      0 Nov 21 15:41 push
-rw-rw-r-- 1 vikrant vikrant    103 Nov 20 16:26 sq1.py
-rw-rw-r-- 1 vikrant vikrant    127 Nov 20 16:29 sq2.py
-rw-rw-r-- 1 vikrant vikrant    240 Nov 20 16:30 sq3.py
-rw-rw-r-- 1 vikrant vikrant    183 Nov 20 15:41 sum.py
-rw-rw-r-- 1 vikrant vikrant    356 Nov 20 16:43 test_weekday.py
-rw-rw-r-- 1 vikrant vikrant    358 Nov 20 15:40 trace.py
-rw-rw-r-- 1 vikrant vikrant    169 Nov 20 16:39 weekday.py
In [134]:
!ls --help
Usage: ls [OPTION]... [FILE]...
List information about the FILEs (the current directory by default).
Sort entries alphabetically if none of -cftuvSUX nor --sort is specified.

Mandatory arguments to long options are mandatory for short options too.
  -a, --all                  do not ignore entries starting with .
  -A, --almost-all           do not list implied . and ..
      --author               with -l, print the author of each file
  -b, --escape               print C-style escapes for nongraphic characters
      --block-size=SIZE      scale sizes by SIZE before printing them; e.g.,
                               '--block-size=M' prints sizes in units of
                               1,048,576 bytes; see SIZE format below
  -B, --ignore-backups       do not list implied entries ending with ~
  -c                         with -lt: sort by, and show, ctime (time of last
                               modification of file status information);
                               with -l: show ctime and sort by name;
                               otherwise: sort by ctime, newest first
  -C                         list entries by columns
      --color[=WHEN]         colorize the output; WHEN can be 'always' (default
                               if omitted), 'auto', or 'never'; more info below
  -d, --directory            list directories themselves, not their contents
  -D, --dired                generate output designed for Emacs' dired mode
  -f                         do not sort, enable -aU, disable -ls --color
  -F, --classify             append indicator (one of */=>@|) to entries
      --file-type            likewise, except do not append '*'
      --format=WORD          across -x, commas -m, horizontal -x, long -l,
                               single-column -1, verbose -l, vertical -C
      --full-time            like -l --time-style=full-iso
  -g                         like -l, but do not list owner
      --group-directories-first
                             group directories before files;
                               can be augmented with a --sort option, but any
                               use of --sort=none (-U) disables grouping
  -G, --no-group             in a long listing, don't print group names
  -h, --human-readable       with -l and/or -s, print human readable sizes
                               (e.g., 1K 234M 2G)
      --si                   likewise, but use powers of 1000 not 1024
  -H, --dereference-command-line
                             follow symbolic links listed on the command line
      --dereference-command-line-symlink-to-dir
                             follow each command line symbolic link
                               that points to a directory
      --hide=PATTERN         do not list implied entries matching shell PATTERN
                               (overridden by -a or -A)
      --indicator-style=WORD  append indicator with style WORD to entry names:
                               none (default), slash (-p),
                               file-type (--file-type), classify (-F)
  -i, --inode                print the index number of each file
  -I, --ignore=PATTERN       do not list implied entries matching shell PATTERN
  -k, --kibibytes            default to 1024-byte blocks for disk usage
  -l                         use a long listing format
  -L, --dereference          when showing file information for a symbolic
                               link, show information for the file the link
                               references rather than for the link itself
  -m                         fill width with a comma separated list of entries
  -n, --numeric-uid-gid      like -l, but list numeric user and group IDs
  -N, --literal              print raw entry names (don't treat e.g. control
                               characters specially)
  -o                         like -l, but do not list group information
  -p, --indicator-style=slash
                             append / indicator to directories
  -q, --hide-control-chars   print ? instead of nongraphic characters
      --show-control-chars   show nongraphic characters as-is (the default,
                               unless program is 'ls' and output is a terminal)
  -Q, --quote-name           enclose entry names in double quotes
      --quoting-style=WORD   use quoting style WORD for entry names:
                               literal, locale, shell, shell-always,
                               shell-escape, shell-escape-always, c, escape
  -r, --reverse              reverse order while sorting
  -R, --recursive            list subdirectories recursively
  -s, --size                 print the allocated size of each file, in blocks
  -S                         sort by file size, largest first
      --sort=WORD            sort by WORD instead of name: none (-U), size (-S),
                               time (-t), version (-v), extension (-X)
      --time=WORD            with -l, show time as WORD instead of default
                               modification time: atime or access or use (-u);
                               ctime or status (-c); also use specified time
                               as sort key if --sort=time (newest first)
      --time-style=STYLE     with -l, show times using style STYLE:
                               full-iso, long-iso, iso, locale, or +FORMAT;
                               FORMAT is interpreted like in 'date'; if FORMAT
                               is FORMAT1<newline>FORMAT2, then FORMAT1 applies
                               to non-recent files and FORMAT2 to recent files;
                               if STYLE is prefixed with 'posix-', STYLE
                               takes effect only outside the POSIX locale
  -t                         sort by modification time, newest first
  -T, --tabsize=COLS         assume tab stops at each COLS instead of 8
  -u                         with -lt: sort by, and show, access time;
                               with -l: show access time and sort by name;
                               otherwise: sort by access time, newest first
  -U                         do not sort; list entries in directory order
  -v                         natural sort of (version) numbers within text
  -w, --width=COLS           set output width to COLS.  0 means no limit
  -x                         list entries by lines instead of by columns
  -X                         sort alphabetically by entry extension
  -Z, --context              print any security context of each file
  -1                         list one file per line.  Avoid '\n' with -q or -b
      --help     display this help and exit
      --version  output version information and exit

The SIZE argument is an integer and optional unit (example: 10K is 10*1024).
Units are K,M,G,T,P,E,Z,Y (powers of 1024) or KB,MB,... (powers of 1000).

Using color to distinguish file types is disabled both by default and
with --color=never.  With --color=auto, ls emits color codes only when
standard output is connected to a terminal.  The LS_COLORS environment
variable can change the settings.  Use the dircolors command to set it.

Exit status:
 0  if OK,
 1  if minor problems (e.g., cannot access subdirectory),
 2  if serious trouble (e.g., cannot access command-line argument).

GNU coreutils online help: <http://www.gnu.org/software/coreutils/>
Full documentation at: <http://www.gnu.org/software/coreutils/ls>
or available locally via: info '(coreutils) ls invocation'
In [135]:
%%file fib.py
import argparse

def fib(n):
    prev, current = 1, 1
    for i in range(2, n):
        current, prev = prev+current, current
        
    return current

def parse_args():
    p = argparse.ArgumentParser()
    p.add_argument("n", help="n for computing nth fibonacci number",
                    type=int)
    return p.parse_args()

def main():
    args = parse_args()
    print(args)
    print(fib(args.n))
    
if __name__ == "__main__":
    main()
Overwriting fib.py
In [136]:
!python fib.py
usage: fib.py [-h] n
fib.py: error: the following arguments are required: n
In [137]:
!python fib.py -h 
usage: fib.py [-h] n

positional arguments:
  n           n for computing nth fibonacci number

optional arguments:
  -h, --help  show this help message and exit
In [138]:
!python fib.py 10
Namespace(n=10)
55
In [139]:
%%file fib.py
import argparse

def fib(n):
    prev, current = 1, 1
    for i in range(2, n):
        current, prev = prev+current, current
        
    return current

def print_fiblist(n):
    prev, current = 1, 1
    for i in range(2, n):
        current, prev = prev+current, current
        print(current, end=" ")

def parse_args():
    p = argparse.ArgumentParser()
    p.add_argument("n", help="n for computing nth fibonacci number",
                    type=int)
    p.add_argument("-s", "--sequence",
                  help="Print sequence",
                  action="store_true")
    
    return p.parse_args()

def main():
    args = parse_args()
    if args.sequence:
        print_fiblist(args.n)
    else:
        print(fib(args.n))
    
if __name__ == "__main__":
    main()
Overwriting fib.py
In [140]:
!python fib.py -h
usage: fib.py [-h] [-s] n

positional arguments:
  n               n for computing nth fibonacci number

optional arguments:
  -h, --help      show this help message and exit
  -s, --sequence  Print sequence
In [141]:
!python fib.py -s 10
2 3 5 8 13 21 34 55 
In [142]:
!python fib.py 10
55

regular expressions

In [143]:
import re
In [144]:
pattern = re.compile("^def")
In [147]:
s = "def "
m = pattern.match(s)
print(m)
s = " def "
m = pattern.match(s)
print(m)
<_sre.SRE_Match object; span=(0, 3), match='def'>
None
In [149]:
s = """
def add(x,y):
    def should start at start of line
    return x+y
    
    
defination of add is shown above
"""
In [150]:
[line for line in s.split("\n") if pattern.match(line)]
Out[150]:
['def add(x,y):', 'defination of add is shown above']
In [156]:
pattern = re.compile("[a-zA-Z]{5,10}[\d]{2}$")
In [153]:
print(pattern.match("hello"))
None
In [157]:
print(pattern.match("hello12"))
<_sre.SRE_Match object; span=(0, 7), match='hello12'>
In [158]:
print(pattern.match("hello123"))
None
In [161]:
print(list(range(5)))
[0, 1, 2, 3, 4]
In [162]:
str(list(range(3)))
Out[162]:
'[0, 1, 2]'
In [163]:
p = re.compile("\[(\d), (\d), (\d)\]")
In [164]:
l = str(list(range(3)))
In [165]:
l
Out[165]:
'[0, 1, 2]'
In [166]:
m = p.match(l)
In [167]:
m
Out[167]:
<_sre.SRE_Match object; span=(0, 9), match='[0, 1, 2]'>
In [168]:
m.groups()
Out[168]:
('0', '1', '2')

Working with databases

In [169]:
import sqlite3
In [172]:
conn = sqlite3.connect("a.db")
cur = conn.cursor()
result = cur.execute("select * from person")
result.fetchall()
Out[172]:
[('alice', 'alice@example.com')]
In [175]:
def find_person(email):
    #not right way to do?
    q = "select * from person where email='{}'".format(email)
    print(q)
    cur = conn.cursor()
    result = cur.execute(q)
    return result.fetchall()
In [176]:
find_person("alice@example.com")
select * from person where email='alice@example.com'
Out[176]:
[('alice', 'alice@example.com')]
In [177]:
def find_person(email):
    query = "select * from person where email=?"
    cur = conn.cursor()
    result = cur.execute(query, (email, ))
    return result.fetchall()
In [178]:
find_person("alice@example.com")
Out[178]:
[('alice', 'alice@example.com')]
In [179]:
def query(conn, querystring, params):
    cur = conn.cursor()
    result = cur.execute(querystring, params)
    return result.fetchall()
In [182]:
query(conn, "select * from person where name=?", params=("alice",))
Out[182]:
[('alice', 'alice@example.com')]
In [183]:
conn.close()
In [184]:
conn = sqlite3.connect("a.db")

cur = conn.cursor()
persons = [
    ("dilbert", "dilbert@dilbert.com"),
    ("calvin", "calvin@calvinhobes.com"),
    ("jerry", "jerry@disney.com")
]
In [185]:
cur.executemany("insert into person values(?,?)", persons)
Out[185]:
<sqlite3.Cursor at 0x7f1e35755d50>
In [186]:
conn.commit()
In [187]:
conn.close()
In [189]:
conn = sqlite3.connect("a.db")
cur = conn.cursor()
for name, email in cur.execute("select * from person ORDER by name"):
    print(name.rjust(10), email)
     alice alice@example.com
    calvin calvin@calvinhobes.com
   dilbert dilbert@dilbert.com
     jerry jerry@disney.com
In [ ]: