Sep 25-27, 2019 Vikrant Patil
These notes are available online at http://notes.pipal.in/2019/arcesium_advanced_sep/day2.html
© Pipal Academy LLP
We will be using python 3 (>= 3.0) from anaconda for this training. You can download it from
for i in [1, 2, 3, 4]:
print(i)
for i in range(3):
print(i)
for key in {"one":1, "two":2}:
print(key)
items = [1, 2, 3, 4, 5]
itr_items = iter(items)
itr_items
next(itr_items)
next(itr_items)
next(itr_items)
next(itr_items)
next(itr_items)
next(itr_items)
def squares(numbers):
for n in numbers:
yield n*n
squares
sqr = squares(range(5))
sqr
for s in sqr:
print(s)
for s in sqr:
print(s)
def squares(numbers):
print("Begin squares")
for n in numbers:
print("Computing square of", n)
yield n*n
print("Back to squares")
print("Finished squares")
sq4 = squares(range(1, 5))
sq4
next(sq4)
next(sq4)
next(sq4)
next(sq4)
next(sq4)
problems
>>> for i in countdown(4):
print(i, end=",")
4,3,2,1
ones which can generate infinite sequence of ones?def foo(n):
i = 1
while True:
print("Before yield")
yield i
print("after yield")
if i ==4:
return i+1
i += 1
for i in foo(50):
print(i, "="*5)
def ones():
while True:
yield 1
def take(seq, n):
return [next(seq) for i in range(n)]
infinite = ones()
take(infinite, 10)
f = foo(4)
x = next(f)
x
x=next(f)
x
x = next(f)
x
x = next(f)
x
x = next(f)
x
f = foo(3)
next(f)
next(f)
next(f)
f()
try:
next(f)
except StopIteration as s:
def piseries():
n = 1
while True:
yield 8/((4*n-3)*(4*n-1))
n += 1
def pi(n):
series = piseries()
return sum(take(series, n))
pi(10000)
len(piseries())
take(piseries(), 10)
import os
def find(root):
for path, direnames, filenames in os.walk(root):
for f in filenames:
yield os.path.join(path, f)
def grep(pattern , seq):
return (x for x in seq if pattern in x)
files = find("/home/vikrant/trainings")
pyfiles = grep(".py", files)
print(take(pyfiles, 5))
x = (i*i for i in range(5))
x
next(x)
next(x)
for i in x:
print(i)
def count(seq):
return sum(1 for i in seq)
def count(seq):
i = 0
for x in seq:
i += 1
return i
def count(seq):
return sum((1 for i in seq))
files = find("/home/vikrant/trainings")
pyfiles = grep(".py", files)
print(count(pyfiles))
def readlines(filenames):
for file in filenames:
with open(file) as f:
for line in f:
yield line
f = open("day1.html")
f. close()
files = find(".")
pyfiles = grep(".", files)
lines = readlines(pyfiles)
funcs = grep("def", lines)
count(funcs)
def antigrep(pattern, seq):
return (x for x in seq if pattern not in x)
files = find("/home/vikrant/trainings/")
pyfiles = grep(".py", files)
pyfiles = antigrep(".pyc", pyfiles)
pyfiles = antigrep("~", pyfiles)
#print(take(pyfiles, 50))
lines = readlines(pyfiles)
funcs = grep("def", lines)
count(funcs)
problem
Write a function get_paragraphs to split given text into paragraphs. The function should take a sequence of lines and should return a sequence of paragraphs. Whenever you encounter empty line, it is end of previous paragraph.
https://ia802902.us.archive.org/4/items/prideandprejudic01342gut/pandp12.txt
import requests
def wget(url, filename):
resp = requests.get(url)
with open(filename, "w") as f:
f.write(resp.text)
url="https://ia802902.us.archive.org/4/items/prideandprejudic01342gut/pandp12.txt"
wget(url, "pandp.txt")
!tail pandp.txt
def get_paragraphs(lines):
paragraph = []
for line in lines:
if line.strip() !="":
paragraph.append(line.strip())
elif paragraph:
yield "\n".join(paragraph)
paragraph = []
if paragraph:
yield "\n".join(paragraph)
lines = readlines(["pandp.txt"])
paras = get_paragraphs(lines)
count(paras)
lines = readlines(["pandp.txt"])
paras = get_paragraphs(lines)
max(paras, key=len)
!wc pandp.txt
import numpy as np
a = np.array([1, 2, 3, 4, 5, 6, 7, 8])
a
a.shape
a.ndim
a100 = np.arange(100).reshape(10, 10)
a100
a100.shape
a100.ndim
a100[0]
a100[0][0]
a100[:,0]
a100[0,:]
a100[1,:]
np.zeros(100).reshape(5,20)
np.zeros_like(a100)
np.ones_like(a100)
np.asarray([1,2,3,4,5,6,7])
np.asarray(np.zeros(20).reshape(5,4))
empty = np.empty(1000).reshape(10,10,10)
empty.ndim
empty.shape
np.empty_like(a100)
a100[:5, :5]
a100[5:,5:]
subview = a100[:5,:5]
subview
subview[0,0] = -1
a100
scopy = subview.copy()
scopy[0,0]=0
subview
scopy
d = np.array(range(10))
d
d > 3
d[d>3]
d - 2
d2 = d*2
d2
d + d2
d * d2
np.exp(d)
a100.max()
a100.std()
a100.cumsum()
help(np.arange)
from scipy.misc import face
image = face(gray=True)
image
from matplotlib import pyplot as plt
%matplotlib inline
def imshow(img):
plt.imshow(img, cmap=plt.cm.gray)
plt.show()
imshow(image)
negate = 255 - image
imshow(negate)
thumb = image[::3, ::3]
thumb.shape
image.shape
imshow(thumb)
plain = np.zeros_like(thumb)
imshow(plain)
plain[::10, :] = 255
plain[:,::10] = 255
imshow(plain)
plain
plain[:21,:21]
p = np.zeros(100).reshape(10,10)
p[::3, :] = 255
p[:,::3] = 255
imshow(p)
imshow(thumb*0.5 + plain*0.5)
imshow(thumb)
def swapcorners(img):
imglike = img.copy()
h, w = img.shape
q1 = img[:h//2, :w//2].copy()
q4 = img[h//2:, w//2:].copy()
imglike[:h//2, :w//2] = q4
imglike[h//2:, w//2:] = q1
return imglike
imshow(swapcorners(thumb))
5/3
5//3
thumb = image[::10, ::10]
hthumb = np.hstack([thumb, thumb, thumb])
vthumb = np.vstack([hthumb, hthumb, hthumb])
imshow(vthumb)
imshow(np.flip(thumb))
np.roll?
imshow(np.roll(image, 300, 0))
Download data from http://notes.pipal.in/2019/arcesium_advanced_sep/HYDERABAD-weather.csv
url = "http://notes.pipal.in/2019/arcesium_advanced_sep/HYDERABAD-weather.csv"
wget(url, "HYDERABAD-weather.csv")
!tail HYDERABAD-weather.csv
import csv
with open("HYDERABAD-weather.csv") as f:
data = list(csv.reader(f))
type(data)
data[0]
d = data[1:]
d[:3]
def floatcolumn(data, n):
return [float(row[n]) for row in data]
maxtemp = floatcolumn(d, 4)
mintemp = floatcolumn(d, 5)
rainfall = floatcolumn(d, 6)
def float_(sf):
try:
return float(sf)
except Exception as e:
print(e)
return 0
def floatcolumn(data, n):
return [float_(row[n]) for row in data]
rainfall = floatcolumn(d, 6)
plt.scatter(rainfall, maxtemp)
plt.scatter(rainfall, mintemp)
data[0]
d[:2]
year = [int(row[3]) for row in d ]
d[:6]
d[-6:]
len(set(year))
ra = np.array(rainfall)
sorteddata = sorted(d, key=lambda r:r[3])
rainfall = floatcolumn(sorteddata, 6)
year = [int(row[3]) for row in sorteddata]
plt.plot(year, rainfall)
import random
plt.bar(range(12), [random.random() for i in range(12)])
months = np.array([row[2] for row in d])
rainfall = np.array(floatcolumn(d, 6))
rainfall[months=="January"].mean()
import datetime
def get_mean_rainfall(rainfall, months, month):
return rainfall[months==month].mean()
d = datetime.datetime(2019, 9, 26)
d.strftime("%B")
help(d.strftime)
mnames = [datetime.datetime(2010, i+1, 1).strftime("%B") for i in range(12)]
mnames
rainfall_ = [get_mean_rainfall(rainfall, months, m) for m in mnames]
rainfall_
plt.bar(mnames, rainfall_)
plt.bar(range(12), rainfall_)
import altair as alt
%%file sample.txt
area,sales,profit
North,5,2
East,25,8
West,15,6
South,20,5
Central,10,3
import pandas as pd
sample = pd.read_csv("sample.txt")
alt.Chart(sample).mark_point()
alt.renderers.enable('notebook')
alt.Chart(sample).mark_point()
alt.Chart(sample).mark_point().encode(y="area")
alt.Chart(sample).mark_point().encode(
y = "area",
x = "sales"
)
base = alt.Chart(sample).mark_point().encode(
y = "area",
x = "sales"
)
base.mark_bar()
base.mark_line()
base.mark_circle()
base = alt.Chart(sample).mark_bar().encode(
y = "area",
x = "sales",
color = "area"
)
base
base.encode(size="profit")
base.encode(size="profit").mark_circle()
print(base.to_json())
s = base.encode(size="profit").mark_circle()
s.save("sample.html")
!less sample.html
sales = alt.Chart(sample).mark_bar().encode(
alt.Y("area"),
alt.X("sales"),
alt.Color(value="green")
)
sales
profit = alt.Chart(sample).mark_bar().encode(
alt.Y("area"),
alt.X("profit"),
alt.Color(value="firebrick")
)
profit
sales + profit
sample
area = pd.Series(['North','East','West','South','Central'])
area
sales = pd.Series([5,25,15,20,10], index=area)
sales
sales['North']
sales[0]
sales.reindex(index=sorted(area))
sales
sales[sales > 10]
sales
sales[-1]
profit = pd.Series([2, 8, 6, 5, 3])
profit[-1]
profit.mean()
profit.std()
df = pd.DataFrame({"sales":[20,23,12,6,25],
"profit":[5,2,7,1,8]
}, index=['North','East','West','South','Central']
)
df
df['sales']
df.sales
df.head()
weather = pd.read_csv("HYDERABAD-weather.csv")
weather.head()
df.loc['North']
df.iloc[3]
weather.info()
weather.plot("maxtemp", "rainfall", kind="scatter")
weather.groupby('year').mean()
weather.groupby("month").mean()
weather
groupbymonth = weather.groupby('month').mean()
groupbymonth
del groupbymonth['Unnamed: 0']
del groupbymonth['year']
groupbymonth
groupbymonth.plot()
groupbymonth
groupbymonth.index
mnames
groupbymonth.reindex(index=mnames).plot()
decdata = groupbymonth.loc['December']
decdata
decdata['maxtemp']
dict(decdata)
groupbymonth.plot(kind='bar')
url = "http://www.thehindu.com/"
response = requests.get(url, params={"service":"rss"})
xmltext = response.text
print(xmltext[:1200])
from xml.etree import ElementTree as et
root = et.fromstring(xmltext)
items = root.findall(".//item")
len(items)
type(items)
items[0]
print(et.tostring(items[0]).decode())
for item in items[:10]:
print(item.findtext("title"))
print(item.findtext("link"))
print("-"*30)
from xml.dom.minidom import parseString
root = parseString(xmltext)
root
items = root.getElementsByTagName("item")
type(items)
for item in items[:10]:
title = item.getElementsByTagName("title")[0]
link = item.getElementsByTagName("link")[0]
print(title.firstChild.data)
print(link.firstChild.data)
print("-"*30)
import json
decdata = dict(decdata)
decdata
s = json.dumps(decdata)
s
json.loads(s)
url = "https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY&symbol=MSFT&interval=5min&outputsize=full&apikey=demo"
print(url)
resp = requests.get(url)
data = resp.json()
pd.DataFrame(data['Time Series (5min)']).transpose()
url = "https://api.github.com/orgs/{}/repos".format("google")
url
repos = requests.get(url).json()
type(repos)
repos[0]
r = repos[0]
r['forks']
r['owner']['id']
for r in sorted(repos, key=lambda r:r['owner']['id'], reverse=True)[:20]:
print(r['owner']['id'], r['forks'])
for r in sorted(repos, key=lambda r:r['forks'], reverse=True)[:10]:
print(r['full_name'], r['forks'])
pd.read_json("https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY&symbol=MSFT&interval=5min&outputsize=full&apikey=demo")
%%file flask_app.py
from flask import Flask, render_template
app = Flask(__name__)
@app.route("/hello/<name>")
def hellourl(name="Flask"):
return render_template("hello.html", name=name)
@app.route("/")
def index():
return "This is index page of flask app"
if __name__ == "__main__":
app.run()
!python flask_app.py
!mkdir templates
%%file templates/hello.html
<!doctype html>
<title>Hello from Flask</title>
{% if name %}
<h1>Hello {{ name }} </h1>
{% else %}
<h1>Hello, World!</h1>
{% endif %}