Nov 15-17, 2017 Vikrant Patil
These notes are available online at http://notes.pipal.in/2017/arcesium-oct-advpython/day2.html
© Pipal Academy LLP
nums = list(range(5))
for n in nums:
print(n)
for c in "string":
print(c)
for key in {"x":1,"y":2}:
print(key)
for line in open("data.csv"):
print(repr(line))
items = [1,2,3]
itr = iter(items)
next(itr)
next(itr)
next(itr)
next(itr)
def square(numbers):
for n in numbers:
yield n*n
sq5 = square(range(1,6))
sq5
for i in sq5:
print(i)
sq4 = square(range(1,4))
next(sq4)
range(5)
def square(numbers):
print("Begin squares")
for i in numbers:
print("Computing square of ",i)
yield i*i
print("After yield")
print("Finish square")
sq4 = square(range(1,4))
next(sq4)
next(sq4)
next(sq4)
next(sq4)
for s in square(range(1,5)):
print(s)
def f():
for i in range(1000):
if i ==13:
return
yield i*i
for s in f():
print(s)
def f():
for i in range(1000):
if i ==3:
return
yield i*i
g = f()
next(g)
next(g)
next(g)
next(g)
problem: Write a generators function countdown that takes a number n as argument and generates all numbers down to 0 starting from n
>>> for i in countdown(3):
... print(i)
3
2
1
0
problem: Write a generator triangular that takes number n as argument and generates sequence of first n triangular numbers. nth triangular number is sum of fisrt n natural numbers.
>>> for t in triangular(5):
... print(t, end=",")
1,3,6,10,15
Bonus problem: Remove duplicates from a sequence while maintianing order. Can same function be used to remove duplicate lines from a file?
>>> for item in consumedup([3,5,3,4,5,6,7,8,8,9]):
... print(item, end=",")
2,5,4,6,7,8,9
x = set()
def countdown(n):
while n>=0:
yield n
n -= 1
for i in countdown(3):
print(i)
def triangular(n):
for i in range(1, n+1):
yield sum(range(1,i+1))
for t in triangular(5):
print(t, end=",")
def consumedup(seq):
seen = set()
for item in seq:
if item not in seen:
yield item
seen.add(item)
g = consumedup([3,5,3,4,5,6,7,8,8,9])
for item in g:
print(item, end=",")
%%file duplicatelines.txt
Saving file at /day2.ipynb
Saving file at /day2.ipynb
Saving file at /day2.ipynb
hello
hello
x
for line in consumedup(open("duplicatelines.txt")):
print(line, end="")
[n*n for n in range(1,11)] # list comprehension
s = (n*n for n in range(1,11)) # generator expression
s
sum(s)
max(s)
sum((x*x for x in range(1000000)))
sum(x*x for x in range(1000000))# when generator expression is the only argument
# to function then you can skeep parenthesis
g = consumedup(x*x for x in range(1,5))
for i in g:
print(i, end=",")
def ones():
count = 0
while True:
if count >=3:
DOOM
yield 1
count += 1
one = ones()
next(one)
next(one)
next(one)
next(one)
next(one)
next(one)
import os
def find(root):
for path, dirnames, filenames in os.walk(root):
for f in filenames:
yield os.path.join(path, f)
def take(n, seq):
it = iter(seq)
return list(next(it) for i in range(n))
def integers():
"""
generates infinite sequence of natural numbers
"""
i = 1
while True:
yield i
i += 1
def squares(numbers):
return (n*n for n in numbers)
take(10, squares(integers()))
def grep(pattern, seq):
return (x for x in seq if pattern in x)
files = find(".")
pyfiles = grep(".py", files)
print(take(10, pyfiles))
def count(seq):
return sum(1 for item in seq)
count(range(100))
def readlines(filenames):
"""
returns iterator over lines of all files
"""
for f in filenames:
for line in open(f):
yield line
How many line of python code we have written during this course
files = find(".")
pyfiles = grep(".py", files)
lines = readlines(pyfiles)
print(count(lines))
How many pythong function we have written?
files = find(".")
pyfiles = grep(".py", files)
lines = readlines(pyfiles)
functions = grep("def " ,lines)
print(count(functions))
problem: Write a function get_paragraphs to split given text into paragraphs. Paragraphs are seperated by empty line. The function should take a sequence of lines as argument and return a sequence of paragraphs. For sample input, see http://anandology.com/tmp/pg1342.txt once the function is there, we should be able to find:
x = [1,2,3,4]
itr = iter(x)
type(itr)
y = (i for i in range(5))
type(y)
type(range(1,2))
r = range(1,2)
type(range)
range.__class__
next(y)
if "":
print("x")
def get_paragraphs(seq):
paragraphs = []
for line in seq:
if line.strip()=="" and paragraphs:
yield "".join(paragraphs)
paragraphs = []
paragraphs.append(line)
if paragraphs:
yield "".join(paragraphs)
count(get_paragraphs(["A\nB", "\n", "A\n","B\n","\n" ,"A\n", "A\n"]))
max(get_paragraphs(["A\nB", "\n", "A\n","B\n","\n" ,"A\n", "A\n"]), key=len)
g = get_paragraphs(open("pg1342.txt"))
count(g)
g = get_paragraphs(open("pg1342.txt"))
max(g, key=len)
import requests
url = "http://www.thehindu.com/"
response = requests.get(url, params = {"service":"rss"})
xmltext = response.text
xmltext[:100]
from xml.etree import ElementTree as et
root = et.fromstring(xmltext)
items = root.findall(".//item")
len(items)
items[0]
print(et.tostring(items[0]).decode())
for item in items[:10]:
print(item.findtext("title"))
print(item.findtext("link"))
print("-"*50)
from xml.dom.minidom import parseString
root = parseString(xmltext)
items = root.getElementsByTagName("item")
len(items)
item = items[0]
title = item.getElementsByTagName("title")[0]
title.firstChild.data
import json
j = json.loads('{"a":2,"l":["a","b","c"]}')
type(j)
j['a']
j['l']
d = {"service":"rss", "x":[1,2,3,4,5]}
json.dumps(d)
Find distance between two cities using google API
import requests
def distance(origin, dest):
url = "https://maps.googleapis.com/maps/api/distancematrix/json"
response = requests.get(url, params={"units":"metric",
"origins":origin,
"destinations":dest
})
data = response.json()
return data['rows'][0]['elements'][0]['distance']['text']
distance("hyderabad", "mumbai")
distance("hyderabad", "mumbai")
import numpy as np
x = np.arange(32)
x
x.reshape(4,8)
y = np.arange(64).reshape(4,2,8)
y
len(y[-1][-1])
len(y[-1])
len(y)
y.shape
l,w,h = y.shape
l,w,h
y.dtype
y.size
y.itemsize
problem: create a 2D array of size 5x6
np.random.random(50).reshape(5,10)
np.linspace(1.0, 10, 15)
np.zeros(20).reshape(4,5)
x = np.arange(10)
x + 10
x * 2
x + x
x * x
x ** 3
from scipy import misc
face = misc.face(gray=True)
type(face)
face.ndim
face.shape
face.dtype
face[1][:10]
#show images from matplotlib in the same HTML page
%matplotlib inline
import matplotlib.pyplot as plt
plt.imshow(face, cmap=plt.cm.gray)
import matplotlib.pyplot as plt
def imshow(img):
plt.imshow(img, cmap=plt.cm.gray)
plt.show()
negface = 255 - face
imshow(negface)
face[-1][:10]
negface[-1][:10]
x = np.arange(20).reshape(4,5)
x[1][2]
x[1,2]
x[1,:] # 1st row
x[:,0] # 0th column
x[:,:2] # first two columns
x.transpose()
x.transpose().shape
x.shape
facet = face.transpose()
imshow(facet)
face.mean()
x = np.arange(10)
x < 5
x[x<5]
a = x < 5
a
a.sum()
problem: Convert the face image to black and white image (instead of gray scale)
facebw = face > 127
imshow(facebw)
x = np.arange(10000).reshape(100,100)
imshow(x)
x[:,50] = 9999
x[50,:] = 9999
x = np.zeros_like(face)
imshow(x)
x[::10,:] = 255
imshow(x)
x = np.zeros(10000).reshape(100,100)
x[::5,:] = 255
imshow(x)
x[:,::5] = 255
imshow(x)
mesh = np.zeros_like(face)
mesh[::50,:]= 255
mesh[:,::50]= 255
imshow(mesh)
imshow(0.5*face + 0.5*mesh)
x = list(range(10))
x
x[2:]
x[:3]
x[::2]
x[::3]
x[::4]
x = np.zeros(10000).reshape(100,100)
x[:,::10] = 255 #columns at interval of 10
x[::10,:] = 255 #rows at interval of 10
imshow(x)
face2 = face + mesh
imshow(face2)
imshow(face)
face2[:,50]
face[:,50]
x = face2 - face
imshow(x)
problem: Try to swap parts of image. split the image in four parts like
AB
CD
it should become
AC
BD
face2 = face.copy()
h, w = face2.shape
TR = face2[:h//2,w//2:].copy()
BL = face2[h//2:,:w//2].copy()
imshow(face2)
imshow(TR)
imshow(BL)
face2[:h//2,w//2:] = BL
face2[h//2:,:w//2] = TR
imshow(face2)
f2 = np.rot90(face)
imshow(f2)
imshow(np.roll(face, 400))
imshow(np.flip(face, 1))
thumb = face[::4,::4]
imshow(thumb)
thumb.shape
t = np.hstack([thumb, thumb, thumb, thumb])
v = np.vstack([t,t,t,t])
imshow(v)
import numpy as np
import matplotlib.pyplot as plt
X = np.linspace(-np.pi, np.pi, 256, endpoint=True)
X.shape
C = np.cos(X)
S = np.sin(X)
plt.plot(X,C, label="cos(x)")
plt.plot(X,S, label="sin(x)")
plt.legend()
plt.show()
T = np.tan(X)
T
plt.plot(X,T, label="tan(x)")
plt.legend()
plt.show()
n = 1024
X = np.random.normal(0,1, n)
Y = np.random.normal(0,1, n)
plt.scatter(X,Y)
plt.show()
Download data from http://notes.pipal.in/2017/arcesium-oct-advpython/HYDERABAD-weather.csv
import csv
data = list(csv.reader(open("HYDERABAD-weather.csv")))
data[:3]
data = data[1:] # skip header
tmax = [float(row[4]) for row in data]
tmin = [float(row[5]) for row in data]
plt.scatter(tmin, tmax)
rainfall = [float(row[6]) for row in data]
def safefloat(value):
try:
return float(value)
except ValueError:
print("bad value: %r"% value)
return 0.0
rainfall = [safefloat(row[6]) for row in data]
plt.scatter(tmax, rainfall)
n = 10
X = np.arange(n)
Y = np.random.normal(0,100,n)
plt.bar(X,Y)
problem: Using above dataset, plot a bar chart of average rainfall per month.
data[:2]
data[:10]
months = np.array([row[2] for row in data])
rainfall = np.array([safefloat(row[-1]) for row in data])
rainfall[months == "January"].mean()
import datetime
list_of_months = [datetime.date(2000, i+1, 1).strftime("%B") for i in range(12)]
list_of_months
def get_mean_rainfall(month):
return rainfall[months == month].mean()
mean_rainfall = [get_mean_rainfall(m) for m in list_of_months]
mean_rainfall
plt.bar(range(12), mean_rainfall)
x = np.arange(10)
x > 5
x = np.arange(3)
x
x[np.array([True, False, True, True])]
import pandas as pd
import numpy as np
%matplotlib inline
x = pd.Series(range(10))
x
s = pd.Series(np.random.randn(5), index=['a','b','c','d','e'])
s
d = {'a':0, 'b':1, 'c':2}
s = pd.Series(d)
s
pd.Series(d, index=['b','c','d','a'])
s = pd.Series(np.random.randn(5), index=['a','b','c','d','e'])
s[0]
s[:3]
s['a']
s[1:4:2]
s[s > s.median()]
s.mean()
np.exp(s)
'e' in s
s['a']
'z' in s
s + s
s * s
data = [["A",1], ["B", 2], ["c",3], ["D",4]]
pd.DataFrame(data)
d = {"one":[1. , 2. , 3., 4.],
"two":[4. ,3., 2. , 1.]
}
df = pd.DataFrame(d, index=['a','b','c','d'])
df
df['one']
df['one']['a']
df.columns
df.columns = ["column1", "column2"]
df
df2 = df.set_index("column2")
df2
df2['column1'][4.0]
df.to_csv("df.csv")
!cat df.csv
df = pd.read_csv("HYDERABAD-weather.csv", index_col=0)
df
df.head()
df.tail()
df.plot("maxtemp", "mintemp", kind="scatter")
mean = df.groupby("year").mean()
mean
mean.plot()
bymonth = df.groupby("month").mean()
bymonth
del bymonth['year']
bymonth
bymonth.index
newindex = [list_of_months.index(month) for month in bymonth.index]
newindex
bymonth['m'] = newindex
bymonth
bymonth2 = bymonth.set_index("m")
bymonth2
bymonth
bymonth2.sort_index().plot()