Advanced Python Training at Arcesium - Day 2

Oct 22-24, 2018 Vikrant Patil

These notes are available online at http://notes.pipal.in/2018/arcesium-advanced-oct/day2.html

© Pipal Academy LLP

Day 1 | Day 2 | Day 3

Iterations

In [2]:
for c in "some string":
    print(c,end=",")
s,o,m,e, ,s,t,r,i,n,g,
In [4]:
for n in range(10):
    print(n, end=",")
0,1,2,3,4,5,6,7,8,9,
In [5]:
def numbers(n):
    print("Begin numbers")
    i = 0
    while i<n:
        print("yielding ...")
        yield i
        print("Back to numbers...")
        i += 1
    print("Finished numbers!")
In [6]:
five = numbers(5)
In [7]:
five
Out[7]:
<generator object numbers at 0x7ff7607826d0>
In [8]:
next(five)
Begin numbers
yielding ...
Out[8]:
0
In [9]:
next(five)
Back to numbers...
yielding ...
Out[9]:
1
In [10]:
next(five)
Back to numbers...
yielding ...
Out[10]:
2
In [11]:
next(five)
Back to numbers...
yielding ...
Out[11]:
3
In [12]:
next(five)
Back to numbers...
yielding ...
Out[12]:
4
In [13]:
next(five)
Back to numbers...
Finished numbers!
---------------------------------------------------------------------------
StopIteration                             Traceback (most recent call last)
<ipython-input-13-48867e55ed06> in <module>()
----> 1 next(five)

StopIteration: 
In [14]:
for i in numbers(4):
    print(i, end=",")
Begin numbers
yielding ...
0,Back to numbers...
yielding ...
1,Back to numbers...
yielding ...
2,Back to numbers...
yielding ...
3,Back to numbers...
Finished numbers!
In [26]:
def greet():
    name = None
    while name != "end":
        name = yield "Hello, whats your name"
        print("Hello ", name)
In [27]:
g = greet()
In [28]:
g.send(None)
Out[28]:
'Hello, whats your name'
In [29]:
g.send("Vikrant")
Hello  Vikrant
Out[29]:
'Hello, whats your name'
In [30]:
three = numbers(3)
In [31]:
three
Out[31]:
<generator object numbers at 0x7ff760782db0>
In [32]:
type(three)
Out[32]:
generator
In [33]:
type(g)
Out[33]:
generator
In [34]:
g
Out[34]:
<generator object greet at 0x7ff7607943b8>
In [35]:
def numbers(n):
    print("Begin numbers")
    i = 0
    while i<n:
        print("yielding ...")
        yield i
        print("Back to numbers...")
        i += 1
    print("Finished numbers!")
In [36]:
three = numbers(3)
In [37]:
next(three)
Begin numbers
yielding ...
Out[37]:
0
In [38]:
next(three)
Back to numbers...
yielding ...
Out[38]:
1
In [39]:
next(three)
Back to numbers...
yielding ...
Out[39]:
2
In [40]:
def numbers_sqrs(n):
    print("Begin numbers")
    i = 0
    while i<n:
        print("yielding ...")
        yield i
        print("Back to numbers...1")
        yield i*i
        print("Back to numbers...2")
        i += 1
    print("Finished numbers!")
In [43]:
g = numbers_sqrs(3)
In [44]:
next(g)
Begin numbers
yielding ...
Out[44]:
0
In [45]:
next(g)
Back to numbers...1
Out[45]:
0
In [46]:
next(g)
Back to numbers...2
yielding ...
Out[46]:
1
In [47]:
next(g)
Back to numbers...1
Out[47]:
1
In [48]:
next(g)
Back to numbers...2
yielding ...
Out[48]:
2
In [49]:
next(g)
Back to numbers...1
Out[49]:
4
In [50]:
next(g)
Back to numbers...2
Finished numbers!
---------------------------------------------------------------------------
StopIteration                             Traceback (most recent call last)
<ipython-input-50-5f315c5de15b> in <module>()
----> 1 next(g)

StopIteration: 

problem

  • Write a generator function countdown which yields numbers top down manner.
    >>> for n in coutdown(3):
    ...    print(n, end=",")
    >>> 3,2,1
  • Write a function take which makes use of next and list comprehensions to take only first n objects from given generator
    >>> take(5, countdown(100))
    [100,99,98,97,96]
  • Write a generator which generates infinte series of fibonacci numbers!
In [51]:
def countdown(n):
    while n>0:
        yield n
        n -= 1
In [52]:
def take(seq, n):
    return [next(seq) for i in range(n)]
In [56]:
def fib():
    prev, cur = 1, 1
    yield prev
    while True:
        prev, cur = cur, prev+cur
        yield prev
In [57]:
f = fib()
In [58]:
take(f, 10)
Out[58]:
[1, 1, 2, 3, 5, 8, 13, 21, 34, 55]
In [60]:
import os
In [71]:
import os
def find(location=None):
    if not location:
        location = os.getcwd()
    for dirpath, dirnames, files in os.walk(location):
        for f in files:
            yield os.path.join(dirpath, f)
        
In [77]:
files = find(location="/home/vikrant/programming/explorations/python/")
In [78]:
take(pyfiles, 5)
Out[78]:
['/home/vikrant/programming/explorations/python/emailids.txt',
 '/home/vikrant/programming/explorations/python/bokeh_plot.py',
 '/home/vikrant/programming/explorations/python/b.txt~',
 '/home/vikrant/programming/explorations/python/simple.log',
 '/home/vikrant/programming/explorations/python/#functions.py#']
In [74]:
import re
def grep(pattern, seq):
    p = re.compile(pattern)
    for item in seq:
        if p.match(item):
            yield item
In [81]:
files = find(location="/home/vikrant/programming/explorations/python/")
pyfiles = grep(".*\.py$", files)
take(pyfiles, 5)
Out[81]:
['/home/vikrant/programming/explorations/python/wc.py',
 '/home/vikrant/programming/explorations/python/setup.py',
 '/home/vikrant/programming/explorations/python/coroutines.py',
 '/home/vikrant/programming/explorations/python/pipe.py',
 '/home/vikrant/programming/explorations/python/x.py']
In [86]:
def lines(files):
    for f in files:
        with open(f) as fd:
            for line in fd:
                yield line
In [87]:
files = find(location="/home/vikrant/programming/explorations/python/")
pyfiles = grep(".*\.py$", files)
lines_ = lines(pyfiles)
take(grep("^def .*", lines_), 10)
Out[87]:
['def word_count(inbuffer):\n',
 'def wc(filename):\n',
 'def coroutine(func):\n',
 'def aout():\n',
 'def y():\n',
 'def test_f():\n',
 'def log(*args):\n',
 'def trace(f):\n',
 'def multiply(a,b):\n',
 'def fib(n):\n']
In [88]:
def count(seq):
    return sum(1 for item in seq)
In [89]:
files = find(location="/home/vikrant/programming/explorations/python/")
pyfiles = grep(".*\.py$", files)
lines_ = lines(pyfiles)
functions = grep("^def .*", lines_)
count(functions)
Out[89]:
3823
In [90]:
files = find(location="/home/vikrant/programming/explorations/python/")
pyfiles = grep(".*\.py$", files)
count(pyfiles)
Out[90]:
924

generator expressions

In [91]:
squares = (i*i for i in range(100))
In [92]:
squares
Out[92]:
<generator object <genexpr> at 0x7ff76049eca8>
In [93]:
next(squares)
Out[93]:
0
In [94]:
next(squares)
Out[94]:
1
In [96]:
def squares(n):
    return (i*i for i in range(n))
In [97]:
s100 = squares(100)
In [98]:
next(s100)
Out[98]:
0
In [99]:
max([i for i in range(10)])
Out[99]:
9
In [100]:
max(i for i in range(10))
Out[100]:
9
In [101]:
sum(i*i for i in range(100))
Out[101]:
328350
In [102]:
{a:y for a,y in [(1,1),(2,2)]}
Out[102]:
{1: 1, 2: 2}

debuging

In [119]:
%%file ssq.py
def SSQ(n):
    digits = [int(d) for d in str(n)]
    return sum([i*i for i in digits])

def SSQ_(n):
    while True:
        n = SSQ(n)
        yield n
        
if __name__ == "__main__":
    ssq = SSQ_(23)
    for i in range(5):
        print(next(ssq))
Overwriting ssq.py
In [104]:
!ls
data.csv   day1.ipynb  day3.html   Makefile    push	    test.db
data.db    day2.html   day3.ipynb  Makefile~   __pycache__
day1.html  day2.ipynb  debug.py    memoize.py  ssq.py
In [105]:
pdb
Automatic pdb calling has been turned ON
In [107]:
files = find(location="/home/vikrant/programming/explorations/python/")
pyfiles = grep(".*\.py$", files)
dom
count(pyfiles)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-107-c23d81e3fa21> in <module>()
      1 files = find(location="/home/vikrant/programming/explorations/python/")
      2 pyfiles = grep(".*\.py$", files)
----> 3 dom
      4 count(pyfiles)

NameError: name 'dom' is not defined
> <ipython-input-107-c23d81e3fa21>(3)<module>()
      1 files = find(location="/home/vikrant/programming/explorations/python/")
      2 pyfiles = grep(".*\.py$", files)
----> 3 dom
      4 count(pyfiles)

ipdb> type(pyfiles)
<class 'generator'>
ipdb> l
      1 files = find(location="/home/vikrant/programming/explorations/python/")
      2 pyfiles = grep(".*\.py$", files)
----> 3 dom
      4 count(pyfiles)

ipdb> c
In [108]:
files = find(location="/home/vikrant/programming/explorations/python/")
pyfiles = grep(".*\.py$", files)
dom
count(pyfiles)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-108-c23d81e3fa21> in <module>()
      1 files = find(location="/home/vikrant/programming/explorations/python/")
      2 pyfiles = grep(".*\.py$", files)
----> 3 dom
      4 count(pyfiles)

NameError: name 'dom' is not defined
> <ipython-input-108-c23d81e3fa21>(3)<module>()
      1 files = find(location="/home/vikrant/programming/explorations/python/")
      2 pyfiles = grep(".*\.py$", files)
----> 3 dom
      4 count(pyfiles)

ipdb> n
In [112]:
files = find(location="/home/vikrant/programming/explorations/python/")
pyfiles = grep(".*\.py$", files)
dom1
print(count(pyfiles))
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-112-bb24d40f4495> in <module>()
      1 files = find(location="/home/vikrant/programming/explorations/python/")
      2 pyfiles = grep(".*\.py$", files)
----> 3 dom1
      4 print(count(pyfiles))

NameError: name 'dom1' is not defined
> <ipython-input-112-bb24d40f4495>(3)<module>()
      1 files = find(location="/home/vikrant/programming/explorations/python/")
      2 pyfiles = grep(".*\.py$", files)
----> 3 dom1
      4 print(count(pyfiles))

ipdb> dom1 = 3
ipdb> c
In [114]:
files = find(location="/home/vikrant/programming/explorations/python/")
pyfiles = grep(".*\.py$", files)
dom_
print(count(pyfiles))
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-114-348d90b24c1d> in <module>()
      1 files = find(location="/home/vikrant/programming/explorations/python/")
      2 pyfiles = grep(".*\.py$", files)
----> 3 dom_
      4 print(count(pyfiles))

NameError: name 'dom_' is not defined
> <ipython-input-114-348d90b24c1d>(3)<module>()
      1 files = find(location="/home/vikrant/programming/explorations/python/")
      2 pyfiles = grep(".*\.py$", files)
----> 3 dom_
      4 print(count(pyfiles))

ipdb> ?

Documented commands (type help <topic>):
========================================
EOF    cl         disable  interact  next    psource  rv         unt   
a      clear      display  j         p       q        s          until 
alias  commands   down     jump      pdef    quit     source     up    
args   condition  enable   l         pdoc    r        step       w     
b      cont       exit     list      pfile   restart  tbreak     whatis
break  continue   h        ll        pinfo   return   u          where 
bt     d          help     longlist  pinfo2  retval   unalias  
c      debug      ignore   n         pp      run      undisplay

Miscellaneous help topics:
==========================
exec  pdb

ipdb> l
      1 files = find(location="/home/vikrant/programming/explorations/python/")
      2 pyfiles = grep(".*\.py$", files)
----> 3 dom_
      4 print(count(pyfiles))

ipdb> help c
c(ont(inue))
        Continue execution, only stop when a breakpoint is encountered.
ipdb> c
In [116]:
del dom1
In [117]:
files = find(location="/home/vikrant/programming/explorations/python/")
pyfiles = grep(".*\.py$", files)
dom
lines_ = lines(pyfiles)
funs = grep("^def.*",lines_)
print(count(funcs))
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-117-7303fe650cfb> in <module>()
      1 files = find(location="/home/vikrant/programming/explorations/python/")
      2 pyfiles = grep(".*\.py$", files)
----> 3 dom
      4 lines_ = lines(pyfiles)
      5 funs = grep("^def.*",lines_)

NameError: name 'dom' is not defined
> <ipython-input-117-7303fe650cfb>(3)<module>()
      1 files = find(location="/home/vikrant/programming/explorations/python/")
      2 pyfiles = grep(".*\.py$", files)
----> 3 dom
      4 lines_ = lines(pyfiles)
      5 funs = grep("^def.*",lines_)

ipdb> l
      1 files = find(location="/home/vikrant/programming/explorations/python/")
      2 pyfiles = grep(".*\.py$", files)
----> 3 dom
      4 lines_ = lines(pyfiles)
      5 funs = grep("^def.*",lines_)
      6 print(count(funcs))

ipdb> b 5
Breakpoint 1 at <ipython-input-117-7303fe650cfb>:5
ipdb> c
In [118]:
 
Out[118]:
'g'

profiling

In [128]:
%%file saxpy.py

import time
import math

def saxpy(n):
    f = 0
    for i in range(n):
        for j in range(n):
            f = f + i*j*1.0
    return f

def compute_sqrt(n):
    s = 0
    for j in range(n):
        for i in range(n):
            s += math.sqrt(i)
    return s

def compute_sqrt1(n):
    sqrt = math.sqrt
    s = 0
    for j in range(n):
        for i in range(n):
            s += sqrt(i)
    return s

def donothing(t=2):
    time.sleep(t)
    
    
def main():
    for i in range(1000, 5000, 1000):
        saxpy(i)
        compute_sqrt(i)
        compute_sqrt1(i)
        donothing()
        
if __name__ == "__main__":
    main()
Overwriting saxpy.py
In [129]:
!python -m cProfile saxpy.py
         60000311 function calls in 33.242 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap>:102(release)
        1    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap>:142(__init__)
        1    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap>:146(__enter__)
        1    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap>:153(__exit__)
        1    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap>:159(_get_module_lock)
        1    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap>:173(cb)
        2    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap>:197(_call_with_frames_removed)
       29    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap>:208(_verbose_message)
        1    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap>:293(__init__)
        1    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap>:297(__enter__)
        1    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap>:304(__exit__)
        4    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap>:307(<genexpr>)
        1    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap>:355(__init__)
        1    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap>:389(cached)
        1    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap>:402(parent)
        1    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap>:410(has_location)
        1    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap>:493(_init_module_attrs)
        1    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap>:553(module_from_spec)
        1    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap>:57(__init__)
        1    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap>:641(_load_unlocked)
        1    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap>:698(find_spec)
        1    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap>:77(acquire)
        1    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap>:771(find_spec)
        3    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap>:834(__enter__)
        3    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap>:838(__exit__)
        1    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap>:861(_find_spec)
        1    0.000    0.000    0.001    0.001 <frozen importlib._bootstrap>:931(_find_and_load_unlocked)
        1    0.000    0.000    0.001    0.001 <frozen importlib._bootstrap>:958(_find_and_load)
        7    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap_external>:1080(_path_importer_cache)
        1    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap_external>:1117(_get_spec)
        1    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap_external>:1149(find_spec)
        1    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap_external>:1228(_get_spec)
        6    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap_external>:1233(find_spec)
        1    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap_external>:361(_get_cached)
        6    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap_external>:41(_relax_case)
        1    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap_external>:524(spec_from_file_location)
       26    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap_external>:57(_path_join)
       26    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap_external>:59(<listcomp>)
        7    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap_external>:75(_path_stat)
        1    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap_external>:85(_path_is_mode_type)
        1    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap_external>:908(__init__)
        1    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap_external>:919(create_module)
        1    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap_external>:927(exec_module)
        1    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap_external>:94(_path_isfile)
        4    7.410    1.853   11.099    2.775 saxpy.py:12(compute_sqrt)
        4    5.428    1.357    9.086    2.271 saxpy.py:19(compute_sqrt1)
        1    0.000    0.000   33.242   33.242 saxpy.py:2(<module>)
        4    0.000    0.000    8.009    2.002 saxpy.py:27(donothing)
        1    0.000    0.000   33.241   33.241 saxpy.py:31(main)
        4    5.047    1.262    5.047    1.262 saxpy.py:5(saxpy)
        3    0.000    0.000    0.000    0.000 {built-in method _imp.acquire_lock}
        1    0.000    0.000    0.000    0.000 {built-in method _imp.create_dynamic}
        1    0.000    0.000    0.000    0.000 {built-in method _imp.exec_dynamic}
        1    0.000    0.000    0.000    0.000 {built-in method _imp.is_builtin}
        1    0.000    0.000    0.000    0.000 {built-in method _imp.is_frozen}
        4    0.000    0.000    0.000    0.000 {built-in method _imp.release_lock}
        2    0.000    0.000    0.000    0.000 {built-in method _thread.allocate_lock}
        2    0.000    0.000    0.000    0.000 {built-in method _thread.get_ident}
        1    0.000    0.000    0.000    0.000 {built-in method builtins.any}
        1    0.000    0.000   33.242   33.242 {built-in method builtins.exec}
        6    0.000    0.000    0.000    0.000 {built-in method builtins.getattr}
        8    0.000    0.000    0.000    0.000 {built-in method builtins.hasattr}
        7    0.000    0.000    0.000    0.000 {built-in method builtins.isinstance}
 60000000    7.346    0.000    7.346    0.000 {built-in method math.sqrt}
        1    0.000    0.000    0.000    0.000 {built-in method posix.fspath}
        2    0.000    0.000    0.000    0.000 {built-in method posix.getcwd}
        7    0.000    0.000    0.000    0.000 {built-in method posix.stat}
        4    8.009    2.002    8.009    2.002 {built-in method time.sleep}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
        2    0.000    0.000    0.000    0.000 {method 'endswith' of 'str' objects}
       26    0.000    0.000    0.000    0.000 {method 'join' of 'str' objects}
        8    0.000    0.000    0.000    0.000 {method 'rpartition' of 'str' objects}
       52    0.000    0.000    0.000    0.000 {method 'rstrip' of 'str' objects}


In [127]:
import cProfile
In [132]:
import math
def compute_sqrt(n):
    s = 0
    for j in range(n):
        for i in range(n):
            s += math.sqrt(i)
    return s

def compute_sqrt1(n):
    sqrt = math.sqrt
    s = 0
    for j in range(n):
        for i in range(n):
            s += sqrt(i)
    return s
In [131]:
cProfile.run("compute_sqrt(1000)")
         4 function calls in 0.000 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.000    0.000 <ipython-input-130-3e2107a9e9b2>:1(compute_sqrt)
        1    0.000    0.000    0.000    0.000 <string>:1(<module>)
        1    0.000    0.000    0.000    0.000 {built-in method builtins.exec}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-131-fd90dd24caf6> in <module>()
----> 1 cProfile.run("compute_sqrt(1000)")

~/usr/local/anaconda3/lib/python3.6/cProfile.py in run(statement, filename, sort)
     14 
     15 def run(statement, filename=None, sort=-1):
---> 16     return _pyprofile._Utils(Profile).run(statement, filename, sort)
     17 
     18 def runctx(statement, globals, locals, filename=None, sort=-1):

~/usr/local/anaconda3/lib/python3.6/profile.py in run(self, statement, filename, sort)
     53         prof = self.profiler()
     54         try:
---> 55             prof.run(statement)
     56         except SystemExit:
     57             pass

~/usr/local/anaconda3/lib/python3.6/cProfile.py in run(self, cmd)
     93         import __main__
     94         dict = __main__.__dict__
---> 95         return self.runctx(cmd, dict, dict)
     96 
     97     def runctx(self, cmd, globals, locals):

~/usr/local/anaconda3/lib/python3.6/cProfile.py in runctx(self, cmd, globals, locals)
     98         self.enable()
     99         try:
--> 100             exec(cmd, globals, locals)
    101         finally:
    102             self.disable()

<string> in <module>()

<ipython-input-130-3e2107a9e9b2> in compute_sqrt(n)
      3     for j in range(n):
      4         for i in range(n):
----> 5             s += math.sqrt(i)
      6     return s
      7 

NameError: name 'math' is not defined
> <ipython-input-130-3e2107a9e9b2>(5)compute_sqrt()
      3     for j in range(n):
      4         for i in range(n):
----> 5             s += math.sqrt(i)
      6     return s
      7 

ipdb> c
In [133]:
cProfile.run("compute_sqrt(1000)")
         1000004 function calls in 0.435 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.293    0.293    0.435    0.435 <ipython-input-132-bb65aa1512a6>:2(compute_sqrt)
        1    0.000    0.000    0.435    0.435 <string>:1(<module>)
        1    0.000    0.000    0.435    0.435 {built-in method builtins.exec}
  1000000    0.142    0.000    0.142    0.000 {built-in method math.sqrt}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}


In [134]:
help(cProfile.run)
Help on function run in module cProfile:

run(statement, filename=None, sort=-1)
    Run statement under profiler optionally saving results in filename
    
    This function takes a single argument that can be passed to the
    "exec" statement, and an optional file name.  In all cases this
    routine attempts to "exec" its first argument and gather profiling
    statistics from the execution. If no file name is present, then this
    function automatically prints a simple profiling report, sorted by the
    standard name string (file/line/function-name) that is presented in
    each line.

In [135]:
cProfile.run("compute_sqrt(1000)", filename="sqrt.profiledata")

Numpy

In [4]:
import numpy as np
In [2]:
a = np.array([1,2,3,4,5,6,7,8,9])
In [3]:
a
Out[3]:
array([1, 2, 3, 4, 5, 6, 7, 8, 9])
In [4]:
a.shape
Out[4]:
(9,)
In [5]:
a.ndim
Out[5]:
1
In [8]:
a100 = np.arange(100).reshape(10,10)
In [9]:
a100.nbytes
Out[9]:
800
In [10]:
a100.ndim
Out[10]:
2
In [11]:
a100.shape
Out[11]:
(10, 10)
In [12]:
a100[0]
Out[12]:
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
In [13]:
a100[0][0]
Out[13]:
0
In [14]:
a100[0,0]
Out[14]:
0
In [16]:
a100[:,0]
Out[16]:
array([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])
In [17]:
a100[0,:]
Out[17]:
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
In [18]:
a100
Out[18]:
array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35, 36, 37, 38, 39],
       [40, 41, 42, 43, 44, 45, 46, 47, 48, 49],
       [50, 51, 52, 53, 54, 55, 56, 57, 58, 59],
       [60, 61, 62, 63, 64, 65, 66, 67, 68, 69],
       [70, 71, 72, 73, 74, 75, 76, 77, 78, 79],
       [80, 81, 82, 83, 84, 85, 86, 87, 88, 89],
       [90, 91, 92, 93, 94, 95, 96, 97, 98, 99]])
In [20]:
np.zeros(100).reshape(5,20)
Out[20]:
array([[ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.]])
In [21]:
np.zeros_like(a100)
Out[21]:
array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])
In [22]:
np.ones_like(a100)
Out[22]:
array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])
In [23]:
d = np.asarray([1,2,3,4,5,6,7])
In [24]:
d
Out[24]:
array([1, 2, 3, 4, 5, 6, 7])
In [26]:
np.empty(10)
Out[26]:
array([  0.00000000e+000,   4.94065646e-323,   9.88131292e-323,
         1.48219694e-322,   1.97626258e-322,   2.47032823e-322,
         2.96439388e-322,   3.45845952e-322,   3.95252517e-322,
         4.44659081e-322])
In [27]:
np.empty_like(d)
Out[27]:
array([    140204774964152,            41473392,                   0,
                         0,                   0,                   0,
       3979271335011496761])
In [28]:
np.ones(10, dtype=np.int8)
Out[28]:
array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int8)
In [29]:
d
Out[29]:
array([1, 2, 3, 4, 5, 6, 7])
In [31]:
subview = a100[:5,:5]
In [32]:
subview
Out[32]:
array([[ 0,  1,  2,  3,  4],
       [10, 11, 12, 13, 14],
       [20, 21, 22, 23, 24],
       [30, 31, 32, 33, 34],
       [40, 41, 42, 43, 44]])
In [33]:
subview[0,0] = -1
In [34]:
subview
Out[34]:
array([[-1,  1,  2,  3,  4],
       [10, 11, 12, 13, 14],
       [20, 21, 22, 23, 24],
       [30, 31, 32, 33, 34],
       [40, 41, 42, 43, 44]])
In [35]:
a100
Out[35]:
array([[-1,  1,  2,  3,  4,  5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35, 36, 37, 38, 39],
       [40, 41, 42, 43, 44, 45, 46, 47, 48, 49],
       [50, 51, 52, 53, 54, 55, 56, 57, 58, 59],
       [60, 61, 62, 63, 64, 65, 66, 67, 68, 69],
       [70, 71, 72, 73, 74, 75, 76, 77, 78, 79],
       [80, 81, 82, 83, 84, 85, 86, 87, 88, 89],
       [90, 91, 92, 93, 94, 95, 96, 97, 98, 99]])
In [36]:
scopy = subview.copy()
In [37]:
scopy
Out[37]:
array([[-1,  1,  2,  3,  4],
       [10, 11, 12, 13, 14],
       [20, 21, 22, 23, 24],
       [30, 31, 32, 33, 34],
       [40, 41, 42, 43, 44]])
In [38]:
scopy[0,0] = 0
In [39]:
subview
Out[39]:
array([[-1,  1,  2,  3,  4],
       [10, 11, 12, 13, 14],
       [20, 21, 22, 23, 24],
       [30, 31, 32, 33, 34],
       [40, 41, 42, 43, 44]])
In [40]:
scopy
Out[40]:
array([[ 0,  1,  2,  3,  4],
       [10, 11, 12, 13, 14],
       [20, 21, 22, 23, 24],
       [30, 31, 32, 33, 34],
       [40, 41, 42, 43, 44]])
In [41]:
d
Out[41]:
array([1, 2, 3, 4, 5, 6, 7])
In [42]:
d>3
Out[42]:
array([False, False, False,  True,  True,  True,  True], dtype=bool)
In [43]:
d[d>3]
Out[43]:
array([4, 5, 6, 7])
In [44]:
d - 2
Out[44]:
array([-1,  0,  1,  2,  3,  4,  5])
In [45]:
d2 = d*2
In [46]:
d2
Out[46]:
array([ 2,  4,  6,  8, 10, 12, 14])
In [47]:
d
Out[47]:
array([1, 2, 3, 4, 5, 6, 7])
In [48]:
d + d2
Out[48]:
array([ 3,  6,  9, 12, 15, 18, 21])
In [49]:
d *  d2
Out[49]:
array([ 2,  8, 18, 32, 50, 72, 98])
In [50]:
np.exp(d)
Out[50]:
array([    2.71828183,     7.3890561 ,    20.08553692,    54.59815003,
         148.4131591 ,   403.42879349,  1096.63315843])
In [51]:
a100.max()
Out[51]:
99
In [52]:
a100.mean()
Out[52]:
49.490000000000002
In [53]:
a100.std()
Out[53]:
28.883384496973342
In [54]:
a100.cumsum()
Out[54]:
array([  -1,    0,    2,    5,    9,   14,   20,   27,   35,   44,   54,
         65,   77,   90,  104,  119,  135,  152,  170,  189,  209,  230,
        252,  275,  299,  324,  350,  377,  405,  434,  464,  495,  527,
        560,  594,  629,  665,  702,  740,  779,  819,  860,  902,  945,
        989, 1034, 1080, 1127, 1175, 1224, 1274, 1325, 1377, 1430, 1484,
       1539, 1595, 1652, 1710, 1769, 1829, 1890, 1952, 2015, 2079, 2144,
       2210, 2277, 2345, 2414, 2484, 2555, 2627, 2700, 2774, 2849, 2925,
       3002, 3080, 3159, 3239, 3320, 3402, 3485, 3569, 3654, 3740, 3827,
       3915, 4004, 4094, 4185, 4277, 4370, 4464, 4559, 4655, 4752, 4850,
       4949])
In [57]:
x = np.arange(-5, 5, 0.01)
In [3]:
from scipy.misc import face
In [59]:
image = face(gray=True)
In [60]:
image
Out[60]:
array([[114, 130, 145, ..., 119, 129, 137],
       [ 83, 104, 123, ..., 118, 134, 146],
       [ 68,  88, 109, ..., 119, 134, 145],
       ..., 
       [ 98, 103, 116, ..., 144, 143, 143],
       [ 94, 104, 120, ..., 143, 142, 142],
       [ 94, 106, 119, ..., 142, 141, 140]], dtype=uint8)
In [2]:
from  matplotlib import pyplot as plt
%matplotlib inline

def imshow(img):
    plt.imshow(img, cmap=plt.cm.gray)
    plt.show()
In [68]:
imshow(image)
In [64]:
negate = 255 - image
In [69]:
imshow(negate)
In [75]:
thumb = image[::3,::3]
In [76]:
thumb.shape
Out[76]:
(256, 342)
In [77]:
imshow(thumb)
In [78]:
a100[:,0] = 0
In [79]:
a100
Out[79]:
array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
       [ 0, 11, 12, 13, 14, 15, 16, 17, 18, 19],
       [ 0, 21, 22, 23, 24, 25, 26, 27, 28, 29],
       [ 0, 31, 32, 33, 34, 35, 36, 37, 38, 39],
       [ 0, 41, 42, 43, 44, 45, 46, 47, 48, 49],
       [ 0, 51, 52, 53, 54, 55, 56, 57, 58, 59],
       [ 0, 61, 62, 63, 64, 65, 66, 67, 68, 69],
       [ 0, 71, 72, 73, 74, 75, 76, 77, 78, 79],
       [ 0, 81, 82, 83, 84, 85, 86, 87, 88, 89],
       [ 0, 91, 92, 93, 94, 95, 96, 97, 98, 99]])
In [80]:
plain = np.zeros_like(thumb)
In [81]:
imshow(plain)
In [84]:
plain = np.zeros_like(thumb)
plain[::10,:] = 255
plain[:,::10] = 255
In [85]:
imshow(plain)
In [89]:
plain[:11,:11]
Out[89]:
array([[255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255],
       [255,   0,   0,   0,   0,   0,   0,   0,   0,   0, 255],
       [255,   0,   0,   0,   0,   0,   0,   0,   0,   0, 255],
       [255,   0,   0,   0,   0,   0,   0,   0,   0,   0, 255],
       [255,   0,   0,   0,   0,   0,   0,   0,   0,   0, 255],
       [255,   0,   0,   0,   0,   0,   0,   0,   0,   0, 255],
       [255,   0,   0,   0,   0,   0,   0,   0,   0,   0, 255],
       [255,   0,   0,   0,   0,   0,   0,   0,   0,   0, 255],
       [255,   0,   0,   0,   0,   0,   0,   0,   0,   0, 255],
       [255,   0,   0,   0,   0,   0,   0,   0,   0,   0, 255],
       [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255]], dtype=uint8)
In [90]:
plain = np.zeros(100).reshape(10,10)
plain[::3,:] = 255
plain[:,::3] = 255
imshow(plain)
In [91]:
plain = np.zeros_like(thumb)
plain[::10,:] = 255
plain[:,::10] = 255
In [92]:
imshow(thumb*0.5 + plain*0.5)
In [93]:
imshow(thumb)
In [94]:
imshow(image)
In [95]:
thumb = thumb.copy()
In [97]:
thumb.shape
Out[97]:
(256, 342)
In [98]:
def swapcorners(img):
    imglike = img.copy()
    h, w, = img.shape
    q1 = img[:h//2,:w//2].copy()
    q4 = img[h//2:,w//2:].copy()
    
    imglike[:h//2,:w//2] = q4
    imglike[h//2:,w//2:] = q1
    return imglike
In [99]:
imshow(swapcorners(thumb))
In [102]:
thumb = image[::10,::10]
In [103]:
hthumb = np.hstack([thumb,thumb,thumb])
vthump = np.vstack([hthumb, hthumb, hthumb])
imshow(vthump)
In [104]:
!wget https://notes.pipal.in/2017/arcesium-oct-advpython/HYDERABAD-weather.csv
--2018-10-23 15:20:09--  https://notes.pipal.in/2017/arcesium-oct-advpython/HYDERABAD-weather.csv
Resolving notes.pipal.in (notes.pipal.in)... 64:ff9b::8b3b:9eb, 139.59.9.235
Connecting to notes.pipal.in (notes.pipal.in)|64:ff9b::8b3b:9eb|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 24524 (24K) [application/octet-stream]
Saving to: ‘HYDERABAD-weather.csv’

HYDERABAD-weather.c 100%[===================>]  23.95K  7.91KB/s    in 3.0s    

2018-10-23 15:20:25 (7.91 KB/s) - ‘HYDERABAD-weather.csv’ saved [24524/24524]

In [7]:
import csv
In [8]:
data = csv.reader(open("HYDERABAD-weather.csv"))
In [107]:
import requests
def download(url, filename):
    resp = requests.get(url)
    with open(filename, "w") as f:
        f.write(resp.text)
    
In [110]:
url = "https://notes.pipal.in/2017/arcesium-oct-advpython/HYDERABAD-weather.csv"
download(url, "wheatherdata.csv")
In [111]:
!tail wheatherdata.csv
589,HYDERABAD,December,1991,28.1,14.9,0.3
590,HYDERABAD,December,1992,27.1,13.8,0.0
591,HYDERABAD,December,1993,27.1,13.2,34.9
592,HYDERABAD,December,1994,27.9,12.0,0.0
593,HYDERABAD,December,1995,28.9,15.9,0.0
594,HYDERABAD,December,1996,28.3,14.9,0.0
595,HYDERABAD,December,1997,28.7,19.2,40.6
596,HYDERABAD,December,1998,28.7,12.8,0.0
597,HYDERABAD,December,1999,29.0,14.2,0.0
598,HYDERABAD,December,2000,29.6,13.3,1.0
In [9]:
data = list(data)
In [10]:
data[0]
Out[10]:
['', 'city', 'month', 'year', 'maxtemp', 'mintemp', 'rainfall']
In [11]:
d = data[1:]
In [119]:
def floatcolumn(data, n):
    return [float(row[n]) for row in data]
In [121]:
maxtemp = floatcolumn(d, 4)
In [122]:
mintemp = floatcolumn(d, 5)
In [123]:
rainfall = floatcolumn(d, 6)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-123-cb7126acb28b> in <module>()
----> 1 rainfall = floatcolumn(d, 6)

<ipython-input-119-25d06e4bad5d> in floatcolumn(data, n)
      1 def floatcolumn(data, n):
----> 2     return [float(row[n]) for row in data]

<ipython-input-119-25d06e4bad5d> in <listcomp>(.0)
      1 def floatcolumn(data, n):
----> 2     return [float(row[n]) for row in data]

ValueError: could not convert string to float: 
In [13]:
def float_(sf):
    try:
        return float(sf)
    except Exception as e:
        print(e)
        return 0
    
def floatcolumn(data, n):
    return [float_(row[n]) for row in data]
In [125]:
rainfall = floatcolumn(d, 6)
could not convert string to float: 
In [134]:
plt.scatter(rainfall, maxtemp)
Out[134]:
<matplotlib.collections.PathCollection at 0x7f83b70283c8>
In [128]:
plt.scatter(rainfall, mintemp)
Out[128]:
<matplotlib.collections.PathCollection at 0x7f83c269bf60>
In [135]:
data[0]
Out[135]:
['', 'city', 'month', 'year', 'maxtemp', 'mintemp', 'rainfall']
In [137]:
year = [int(row[3]) for row in d]
In [138]:
ra = np.array(rainfall)
In [142]:
sorteddata = sorted(d, key=lambda r:r[3])
In [143]:
rainfall = floatcolumn(sorteddata, 6)
could not convert string to float: 
In [144]:
year = [int(row[3]) for row in sorteddata]
In [145]:
plt.plot(year, rainfall)
Out[145]:
[<matplotlib.lines.Line2D at 0x7f83b6f90cc0>]
In [146]:
import random
plt.bar(range(12), [random.random() for i in range(12)])
Out[146]:
<Container object of 12 artists>
In [12]:
months = np.array([row[2] for row in d])
In [15]:
rainfall = np.array(floatcolumn(d, 6))
could not convert string to float: 
In [19]:
rainfall[months=="January"].mean()
Out[19]:
13.177999999999997
In [20]:
def get_mean_rainfall(rainfall, months, month):
    return rainfall[months==month].mean()
In [21]:
import datetime
In [22]:
d = datetime.datetime(2000,1,1)
In [23]:
d.strftime("%B")
Out[23]:
'January'
In [24]:
mnames = [datetime.datetime(2000,i+1,1).strftime("%B") for i in range(12)]
In [25]:
mnames
Out[25]:
['January',
 'February',
 'March',
 'April',
 'May',
 'June',
 'July',
 'August',
 'September',
 'October',
 'November',
 'December']
In [26]:
rainfall_ = [get_mean_rainfall(rainfall, months, m) for m in mnames]
In [27]:
rainfall_
Out[27]:
[13.177999999999997,
 7.9400000000000004,
 15.264000000000001,
 20.23469387755102,
 35.713999999999999,
 103.75399999999999,
 169.86000000000001,
 178.69,
 158.292,
 97.158000000000015,
 21.971999999999998,
 5.9120000000000008]
In [31]:
plt.bar(mnames, rainfall_)
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-31-d0cecb7e1d8e> in <module>()
----> 1 plt.bar(mnames, rainfall_)

~/usr/local/anaconda3/lib/python3.6/site-packages/matplotlib/pyplot.py in bar(left, height, width, bottom, hold, data, **kwargs)
   2702     try:
   2703         ret = ax.bar(left, height, width=width, bottom=bottom, data=data,
-> 2704                      **kwargs)
   2705     finally:
   2706         ax._hold = washold

~/usr/local/anaconda3/lib/python3.6/site-packages/matplotlib/__init__.py in inner(ax, *args, **kwargs)
   1896                     warnings.warn(msg % (label_namer, func.__name__),
   1897                                   RuntimeWarning, stacklevel=2)
-> 1898             return func(ax, *args, **kwargs)
   1899         pre_doc = inner.__doc__
   1900         if pre_doc is None:

~/usr/local/anaconda3/lib/python3.6/site-packages/matplotlib/axes/_axes.py in bar(self, left, height, width, bottom, **kwargs)
   2103         if align == 'center':
   2104             if orientation == 'vertical':
-> 2105                 left = [left[i] - width[i] / 2. for i in xrange(len(left))]
   2106             elif orientation == 'horizontal':
   2107                 bottom = [bottom[i] - height[i] / 2.

~/usr/local/anaconda3/lib/python3.6/site-packages/matplotlib/axes/_axes.py in <listcomp>(.0)
   2103         if align == 'center':
   2104             if orientation == 'vertical':
-> 2105                 left = [left[i] - width[i] / 2. for i in xrange(len(left))]
   2106             elif orientation == 'horizontal':
   2107                 bottom = [bottom[i] - height[i] / 2.

TypeError: unsupported operand type(s) for -: 'str' and 'float'
In [32]:
plt.bar(range(12), rainfall_)
Out[32]:
<Container object of 12 artists>
In [34]:
plt.pie(rainfall_, labels=mnames, explode=[0]*12)
Out[34]:
([<matplotlib.patches.Wedge at 0x7f7fc7ae06d8>,
  <matplotlib.patches.Wedge at 0x7f7fc7771240>,
  <matplotlib.patches.Wedge at 0x7f7ff06a8358>,
  <matplotlib.patches.Wedge at 0x7f7fc79be400>,
  <matplotlib.patches.Wedge at 0x7f7fc80664a8>,
  <matplotlib.patches.Wedge at 0x7f7fc829e7f0>,
  <matplotlib.patches.Wedge at 0x7f7fc7746198>,
  <matplotlib.patches.Wedge at 0x7f7fc818b860>,
  <matplotlib.patches.Wedge at 0x7f7fc8526550>,
  <matplotlib.patches.Wedge at 0x7f7fc76bb668>,
  <matplotlib.patches.Wedge at 0x7f7fc4348748>,
  <matplotlib.patches.Wedge at 0x7f7fc76c8ac8>],
 [<matplotlib.text.Text at 0x7f7fc791de80>,
  <matplotlib.text.Text at 0x7f7fc7771320>,
  <matplotlib.text.Text at 0x7f7fc851b748>,
  <matplotlib.text.Text at 0x7f7fc79bec88>,
  <matplotlib.text.Text at 0x7f7fc8066198>,
  <matplotlib.text.Text at 0x7f7fc829ec88>,
  <matplotlib.text.Text at 0x7f7fc7746470>,
  <matplotlib.text.Text at 0x7f7fc818b320>,
  <matplotlib.text.Text at 0x7f7fc76bb9e8>,
  <matplotlib.text.Text at 0x7f7fc7f3d240>,
  <matplotlib.text.Text at 0x7f7fc8168eb8>,
  <matplotlib.text.Text at 0x7f7fc76c87b8>])

pandas

In [35]:
from pandas import Series, DataFrame
In [36]:
s = Series(range(5))
In [37]:
s
Out[37]:
0    0
1    1
2    2
3    3
4    4
dtype: int64
In [39]:
s = Series([4,2,7,3],index=['a','b','c','d'])
In [40]:
s
Out[40]:
a    4
b    2
c    7
d    3
dtype: int64
In [41]:
s['a']
Out[41]:
4
In [42]:
s[0]
Out[42]:
4
In [43]:
s['a':'c']
Out[43]:
a    4
b    2
c    7
dtype: int64
In [44]:
s = Series({"a":1,"b":2,"c":3,"d":4})
In [45]:
s
Out[45]:
a    1
b    2
c    3
d    4
dtype: int64
In [48]:
s = Series({"a":1,"b":2,"c":3,"d":4,"j":6}, index=['e','d','c','b','a'])
In [49]:
s
Out[49]:
e    NaN
d    4.0
c    3.0
b    2.0
a    1.0
dtype: float64
In [50]:
s[s>2]
Out[50]:
d    4.0
c    3.0
dtype: float64
In [51]:
s*s
Out[51]:
e     NaN
d    16.0
c     9.0
b     4.0
a     1.0
dtype: float64
In [52]:
s-1
Out[52]:
e    NaN
d    3.0
c    2.0
b    1.0
a    0.0
dtype: float64
In [53]:
np.exp(s)
Out[53]:
e          NaN
d    54.598150
c    20.085537
b     7.389056
a     2.718282
dtype: float64
In [54]:
s.reindex(['a','b','c','d','f','g'])
Out[54]:
a    1.0
b    2.0
c    3.0
d    4.0
f    NaN
g    NaN
dtype: float64
In [56]:
s['f'] = 0
In [57]:
s
Out[57]:
e    NaN
d    4.0
c    3.0
b    2.0
a    1.0
f    0.0
dtype: float64
In [59]:
months_s = Series(months)
In [62]:
months_s.drop_duplicates()
Out[62]:
0        January
50      February
100        March
150        April
199          May
249         June
299         July
349       August
399    September
449      October
499     November
549     December
dtype: object
In [64]:
d
Out[64]:
datetime.datetime(2000, 1, 1, 0, 0)
In [65]:
data[0]
Out[65]:
['', 'city', 'month', 'year', 'maxtemp', 'mintemp', 'rainfall']
In [68]:
df = DataFrame(data[1:], columns=['x', 'city', 'month', 'year', 'maxtemp', 'mintemp', 'rainfall'])
In [69]:
df.describe()
Out[69]:
x city month year maxtemp mintemp rainfall
count 599 599 599 599 599 599 599
unique 599 1 12 50 135 154 401
top 215 HYDERABAD February 1981 29.0 22.2 0.0
freq 1 599 50 12 17 18 122
In [70]:
df.head()
Out[70]:
x city month year maxtemp mintemp rainfall
0 0 HYDERABAD January 1951 29.0 14.8 0.0
1 1 HYDERABAD January 1952 29.1 13.6 0.0
2 2 HYDERABAD January 1953 28.6 14.6 3.5
3 3 HYDERABAD January 1954 28.2 13.9 0.0
4 4 HYDERABAD January 1955 28.0 14.7 0.0
In [71]:
df.tail()
Out[71]:
x city month year maxtemp mintemp rainfall
594 594 HYDERABAD December 1996 28.3 14.9 0.0
595 595 HYDERABAD December 1997 28.7 19.2 40.6
596 596 HYDERABAD December 1998 28.7 12.8 0.0
597 597 HYDERABAD December 1999 29.0 14.2 0.0
598 598 HYDERABAD December 2000 29.6 13.3 1.0
In [73]:
import pandas as pd
In [74]:
wd = pd.read_csv("HYDERABAD-weather.csv")
In [75]:
wd.head()
Out[75]:
Unnamed: 0 city month year maxtemp mintemp rainfall
0 0 HYDERABAD January 1951 29.0 14.8 0.0
1 1 HYDERABAD January 1952 29.1 13.6 0.0
2 2 HYDERABAD January 1953 28.6 14.6 3.5
3 3 HYDERABAD January 1954 28.2 13.9 0.0
4 4 HYDERABAD January 1955 28.0 14.7 0.0
In [76]:
wd.plot("maxtemp", "rainfall", kind="scatter")
Out[76]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f7fbc067dd8>
In [79]:
grpyear = wd.groupby('year').mean()
In [80]:
grpyear.head()
Out[80]:
Unnamed: 0 maxtemp mintemp rainfall
year
1951 274.333333 32.666667 20.233333 58.975000
1952 275.333333 31.975000 19.891667 46.741667
1953 276.333333 32.183333 20.266667 74.245455
1954 277.333333 31.525000 19.875000 70.366667
1955 278.333333 30.883333 19.725000 92.775000
In [82]:
groupbymonth = wd.groupby("month").mean()
In [84]:
groupbymonth
Out[84]:
Unnamed: 0 year maxtemp mintemp rainfall
month
April 174.0 1975.77551 37.863265 24.273469 20.234694
August 373.5 1975.50000 29.786000 22.086000 178.690000
December 573.5 1975.50000 28.004000 14.526000 5.912000
February 74.5 1975.50000 31.932000 17.556000 7.940000
January 24.5 1975.50000 28.760000 15.214000 13.178000
July 323.5 1975.50000 30.754000 22.560000 169.860000
June 273.5 1975.50000 34.528000 23.976000 103.754000
March 124.5 1975.50000 35.444000 20.798000 15.264000
May 223.5 1975.50000 38.996000 26.160000 35.714000
November 523.5 1975.50000 29.016000 16.862000 22.420408
October 473.5 1975.50000 30.582000 20.306000 97.158000
September 423.5 1975.50000 30.452000 21.962000 158.292000
In [86]:
del groupbymonth['year']
In [88]:
del groupbymonth['Unnamed: 0']
In [89]:
groupbymonth
Out[89]:
maxtemp mintemp rainfall
month
April 37.863265 24.273469 20.234694
August 29.786000 22.086000 178.690000
December 28.004000 14.526000 5.912000
February 31.932000 17.556000 7.940000
January 28.760000 15.214000 13.178000
July 30.754000 22.560000 169.860000
June 34.528000 23.976000 103.754000
March 35.444000 20.798000 15.264000
May 38.996000 26.160000 35.714000
November 29.016000 16.862000 22.420408
October 30.582000 20.306000 97.158000
September 30.452000 21.962000 158.292000
In [91]:
help(groupbymonth.hist)
Help on method hist_frame in module pandas.plotting._core:

hist_frame(column=None, by=None, grid=True, xlabelsize=None, xrot=None, ylabelsize=None, yrot=None, ax=None, sharex=False, sharey=False, figsize=None, layout=None, bins=10, **kwds) method of pandas.core.frame.DataFrame instance
    Draw histogram of the DataFrame's series using matplotlib / pylab.
    
    Parameters
    ----------
    data : DataFrame
    column : string or sequence
        If passed, will be used to limit data to a subset of columns
    by : object, optional
        If passed, then used to form histograms for separate groups
    grid : boolean, default True
        Whether to show axis grid lines
    xlabelsize : int, default None
        If specified changes the x-axis label size
    xrot : float, default None
        rotation of x axis labels
    ylabelsize : int, default None
        If specified changes the y-axis label size
    yrot : float, default None
        rotation of y axis labels
    ax : matplotlib axes object, default None
    sharex : boolean, default True if ax is None else False
        In case subplots=True, share x axis and set some x axis labels to
        invisible; defaults to True if ax is None otherwise False if an ax
        is passed in; Be aware, that passing in both an ax and sharex=True
        will alter all x axis labels for all subplots in a figure!
    sharey : boolean, default False
        In case subplots=True, share y axis and set some y axis labels to
        invisible
    figsize : tuple
        The size of the figure to create in inches by default
    layout : tuple, optional
        Tuple of (rows, columns) for the layout of the histograms
    bins : integer, default 10
        Number of histogram bins to be used
    kwds : other plotting keyword arguments
        To be passed to hist function

In [100]:
groupbymonth.plot()
Out[100]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f7fba7fc5c0>
In [101]:
monthindex = [mnames.index(m) for m in groupbymonth.index]
In [102]:
monthindex
Out[102]:
[3, 7, 11, 1, 0, 6, 5, 2, 4, 10, 9, 8]
In [103]:
groupbymonth.index
Out[103]:
Index(['April', 'August', 'December', 'February', 'January', 'July', 'June',
       'March', 'May', 'November', 'October', 'September'],
      dtype='object', name='month')
In [108]:
groupbymonth['m'] = monthindex
In [109]:
groupbymonth
Out[109]:
maxtemp mintemp rainfall m
month
April 37.863265 24.273469 20.234694 3
August 29.786000 22.086000 178.690000 7
December 28.004000 14.526000 5.912000 11
February 31.932000 17.556000 7.940000 1
January 28.760000 15.214000 13.178000 0
July 30.754000 22.560000 169.860000 6
June 34.528000 23.976000 103.754000 5
March 35.444000 20.798000 15.264000 2
May 38.996000 26.160000 35.714000 4
November 29.016000 16.862000 22.420408 10
October 30.582000 20.306000 97.158000 9
September 30.452000 21.962000 158.292000 8
In [114]:
groupbymonth.set_index('m').sort_index().plot()
Out[114]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f7fba6162b0>
In [116]:
groupbymonth.reindex(mnames).plot()
Out[116]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f7fba3a8c18>
In [117]:
groupbymonth
Out[117]:
maxtemp mintemp rainfall m
month
April 37.863265 24.273469 20.234694 3
August 29.786000 22.086000 178.690000 7
December 28.004000 14.526000 5.912000 11
February 31.932000 17.556000 7.940000 1
January 28.760000 15.214000 13.178000 0
July 30.754000 22.560000 169.860000 6
June 34.528000 23.976000 103.754000 5
March 35.444000 20.798000 15.264000 2
May 38.996000 26.160000 35.714000 4
November 29.016000 16.862000 22.420408 10
October 30.582000 20.306000 97.158000 9
September 30.452000 21.962000 158.292000 8
In [118]:
groupbymonth.reindex(mnames)
Out[118]:
maxtemp mintemp rainfall m
month
January 28.760000 15.214000 13.178000 0
February 31.932000 17.556000 7.940000 1
March 35.444000 20.798000 15.264000 2
April 37.863265 24.273469 20.234694 3
May 38.996000 26.160000 35.714000 4
June 34.528000 23.976000 103.754000 5
July 30.754000 22.560000 169.860000 6
August 29.786000 22.086000 178.690000 7
September 30.452000 21.962000 158.292000 8
October 30.582000 20.306000 97.158000 9
November 29.016000 16.862000 22.420408 10
December 28.004000 14.526000 5.912000 11

Why classes ?

In [119]:
%%file bank.py

balance = 0

def get_balance():
    return balance

def withdraw(amount):
    global balance
    balance -= amount
    
def deposit(amount):
    global balance
    balance += amount

    
Writing bank.py
In [120]:
import bank
In [121]:
bank.get_balance()
Out[121]:
0
In [122]:
bank.deposit(10)
In [124]:
bank.get_balance()
Out[124]:
10
In [125]:
%%file bank1.py


def make_account():
    return {"balance":0}
    
def get_balance(account):
    return account['balance']

def withdraw(account, amount):
    account['balance'] -= amount
    
def deposit(account, amount):
    account['balance'] += amount

    
Writing bank1.py
In [126]:
import bank1
In [127]:
a1 = bank1.make_account()
In [128]:
bank1.get_balance(a1)
Out[128]:
0
In [130]:
bank1.get_balance(a1)
Out[130]:
0

modules

In [131]:
%%file mymodule.py

x = 0

print("location 1")


if __name__ == "__main__":
    print("location 2")
    
Writing mymodule.py
In [132]:
import mymodule
location 1
In [133]:
!python mymodule.py
location 1
location 2
In [134]:
%%file mymodule1.py

x = 0

print(__name__)
Writing mymodule1.py
In [135]:
import mymodule1
mymodule1
In [136]:
!python mymodule1.py
__main__
In [137]:
%%file backup.py

def backup():
    # go to some location
    # delete few old unrequired files
    # copy new uopdated files from specfied location
    pass

backup()
Writing backup.py
In [138]:
!python backup.py
In [139]:
%%file backup.py

def backup():
    # go to some location
    # delete few old unrequired files
    # copy new uopdated files from specfied location
    pass

if __name__ == "__main__":
    backup()
Overwriting backup.py
In [140]:
!python backup.py
In [141]:
import math
In [142]:
math.pi
Out[142]:
3.141592653589793
In [ ]: