Sep 25-27, 2019 Vikrant Patil
These notes are available online at http://notes.pipal.in/2020/arcesium_advanced_feb/day2.html
© Pipal Academy LLP
We will be using python 3.7 from anaconda for this training. You can download it from
for n in [2, 3, 4, 5, 6, 7]:
print(n)
for c in "This is a string to test for loop":
print(c, end=",")
for item in {"a":True, "b":False}:
print(item)
items = [1, 2, 3, 4, 5]
itr_items = iter(items)
itr_items
next(itr_items)
next(itr_items)
next(itr_items)
next(itr_items)
next(itr_items)
next(itr_items)
def squares(numbers):
for n in numbers:
yield n*n
squares
s =squares([2,3,4])
s
for i in s:
print(i)
def squares(numbers):
print("Begin squares")
for n in numbers:
print("Computing square of ", n)
yield n*n
print("Back to squares")
print("Finished squares")
sqrs = squares([4,5,6])
sqrs
next(sqrs)
next(sqrs)
next(sqrs)
next(sqrs)
problems
coutdown which works exactly opposite of range! take which takes only n items from given sequence.
>>> ones = infiniteones()
>>> take(ones, 5)
[1, 1, 1, 1, 1]
def hold():
print("Enter ....")
yield 1
print("After 1")
print("Going 2")
yield 2
print("After 2")
print("Going 3")
yield 3
print("After 3")
print("Stopping....")
h = hold()
next(h)
next(h)
next(h)
next(h)
def foo(x):
if x:
return 1
else:
yield 0
f = foo(True)
f
next(f)
f = foo(True)
x = next(f)
print(x)
def loop():
n = 0
while True:
yield n
if n == 3:
return
n += 1
l = loop()
next(l)
next(l)
next(l)
next(l)
next(l)
help(max)
def countdown(n):
while n > 0:
yield n
n -= 1
for i in countdown(4):
print(i, end=",")
def piseries():
n = 1
while True:
yield 8/((4*n-3)*(4*n-1))
n += 1
def take(seq, n):
return [next(seq) for _ in range(n)]
take(piseries(), 5)
sum(take(piseries(), 1000))
def fibseries():
cur, next_ = 1, 1
while True:
yield cur
cur , next_ = next_, cur+next_
take(fibseries(), 10)
cntd3 = countdown(3)
for i in cntd3:
print(i, end=",")
for i in cntd3:
print(i, end=",")
c3 = countdown(3)
copyc3 = c3
for i in c3:
print(i, end=",")
for i in copyc3:
print(i, end=",")
import os
def take(seq, n):
return [next(seq) for _ in range(n)]
def find(root):
for path, dirnames, filenames in os.walk(root):
for f in filenames:
yield os.path.join(path, f)
def grep(pattern, seq):
return (x for x in seq if pattern in x) # this called generator expression
files = find("/home/vikrant/trainings")
pyfiles = grep(".py", files)
print(take(pyfiles, 5))
def readlines(filenames):
for file in filenames:
with open(file) as f:
yield from f
def count(seq):
return sum(1 for item in seq) # this is also generator expression
files = find("/home/vikrant/trainings/")
csvfiles = grep(".csv", files)
lines = readlines(csvfiles)
count(lines)
import re
def grep(pattern, seq):
p = re.compile(pattern)
return (x for x in seq if p.match(x)) # this called generator expression
files = find("/home/vikrant/trainings/")
pyfiles = grep(r"[\w\/]+\.py", files)
take(pyfiles, 5)
import re
pattern = re.compile(r"\w+.py")
pattern.match("/vikrant/trainings/hello.py")
files = find("/home/vikrant/trainings/")
pyfiles = grep(r"[\w\/]+\.py", files)
lines = readlines(pyfiles)
funcs = grep(r"def .*", lines)
count(funcs)
files = find("/home/vikrant/trainings/")
pyfiles = grep(r"[\w\/]+\.py", files)
lines = readlines(pyfiles)
funcs = grep(r"def .*", lines)
next(funcs)
count(funcs)
problem
https://ia802902.us.archive.org/4/items/prideandprejudic01342gut/pandp12.txt
get_paragraphs to split text in above text text file into paragraphs. When an empty line comes, thats end of paragraph.import requests
def wget(url, filename):
resp = requests.get(url)
with open(filename, "w") as f:
f.write(resp.text)
novelurl = "https://ia802902.us.archive.org/4/items/prideandprejudic01342gut/pandp12.txt"
wget(novelurl, "pandp.txt")
!tail pandp.txt
!head pandp.txt
def get_paragraphs(lines):
para = []
for line in lines:
if line.strip() !="":
para.append(line.strip())
elif para:
yield "\n".join(para)
para = []
if para:
yield "\n".join(para)
def get_paragraphs_(lines):
para = ""
for line in lines:
line = line.strip()
if line=="":
if para=="":
continue
else:
yield para
para = ""
else:
para = para + "\n" + line
if para:
yield para
lines = readlines(["pandp.txt"])
count(get_paragraphs(lines))
!wc pandp.txt
def test_get_paragraphs(func):
def append_newl(items):
return [item+"\n" for item in items]
lines = [""]
assert count(func(append_newl(lines))) == 0
lines = ["A","B","","","C"]
assert count(func(append_newl(lines))) == 2
lines = ["A","","B","","","C","D"]
assert count(func(append_newl(lines))) == 3
assert max(func(append_newl(lines)), key=len)=="C\nD"
#test_get_paragraphs(get_paragraphs_)
test_get_paragraphs(get_paragraphs)
!pyhton -m pip install numpy
import numpy as np
a = np.array([1,2,3,4,5])
a
a.shape
a.ndim
a100 = np.arange(100).reshape(10,10)
a100
a100.shape
a100.ndim
a100.dtype
a100[0]
a100[-1]
a100[:,0]
a100[1,:]
a100
np.zeros(100).reshape(20,5)
z = _
z.dtype
help(np.zeros)
np.zeros(10, dtype=np.int16)
np.zeros_like(a100)
np.ones_like(a100)
np.asarray([1, 2, 3, 4, 5])
np.empty(100).reshape(25,4)
np.empty_like(range(10))
a100 = np.arange(100).reshape(10,10)
a100[:5, :5]
a100[5:,5:]
a100[:5,5:]
subview = a100[:5, :5]
subview
subview.shape
subview[0,0]= -1
subview
type(subview)
a100
type(a100)
copy_subview = subview.copy()
copy_subview
copy_subview[0,0] = 0
copy_subview
subview
a100
a = np.array(range(10))
a
a > 4
a[a>3]
a + 4
a - 5
a * 2
a **2
a + a
a*a
np.exp(a)
a100
a100.max()
a100.min()
a100.std()
a100.sum()
a100.cumsum()
from scipy.misc import face
image = face(gray=True)
image
from matplotlib import pyplot as plt
def imshow(img):
plt.imshow(img, cmap=plt.cm.gray)
plt.show()
%matplotlib inline
imshow(a100)
imshow(image)
image
negate = 255 - image
imshow(negate)
thumbnail = image[::3,::3]
imshow(thumbnail)
imshow(image[::5,::5])
imshow(image[::20,::20])
plain = np.zeros_like(thumbnail)
imshow(plain)
plain[::10,:] = 255
plain[:, ::10] = 255
imshow(plain)
plain[:12,:12]
small = np.zeros(100).reshape(10,10)
small[::3,:] = 255
small[:,::3] = 255
imshow(small)
imshow(thumbnail + plain)
imshow(0.75*thumbnail + 0.25*plain)
imshow((0.5*thumbnail + 0.5*plain)*2)
imshow(a100+200)
imshow(np.maximum(thumbnail, plain))
problem
def swapcorners(img):
imglike = img.copy()
h, w = img.shape
q1 = img[:h//2, :w//2].copy()
q2 = img[h//2:,w//2:].copy()
imglike[:h//2, :w//2] = q2
imglike[h//2:, w//2:] = q1
return imglike
imshow(swapcorners(thumbnail))
thumb = image[::10, ::10]
hthumb = np.hstack([thumb, thumb, thumb])
vthump = np.vstack([hthumb, hthumb, hthumb])
imshow(vthump)
imshow(np.flip(thumb))
url = "https://notes.pipal.in/2020/arcesium_advanced_feb/HYDERABAD-weather.csv"
wget(url, "HYDERABAD-weather.csv")
!tail -n 5 HYDERABAD-weather.csv
import csv
with open("HYDERABAD-weather.csv") as f:
data = list(csv.reader(f))
type(data)
data[0]
numeric_data = data[1:]
data[0]
numeric_data[:3]
def floatcolumn(matrix, colnum):
return [float(row[colnum]) for row in matrix]
maxtemp = floatcolumn(numeric_data, 4)
mintemp = floatcolumn(numeric_data, 5)
rainfall = floatcolumn(numeric_data, 6)
def parsefloat(sf):
try:
return float(sf)
except ValueError as v:
print(v)
return 0
def floatcolumn(matrix, colnum):
return [parsefloat(row[colnum]) for row in matrix]
rainfall = floatcolumn(numeric_data, 6)
plt.scatter(rainfall, maxtemp)
plt.scatter(rainfall, mintemp)
year = [int(row[3]) for row in numeric_data]
numeric_data[:6]
numeric_data[-6:]
plt.plot(year, rainfall)
sorted_data = sorted(numeric_data, key= lambda r:r[3])
year = [int(row[3]) for row in sorted_data]
rainfall = floatcolumn(sorted_data, 6)
plt.plot(year, rainfall)
a100.mean()
months = np.array([row[2] for row in numeric_data])
months
rainfall = np.array(floatcolumn(numeric_data, 6))
rainfall[:5]
a = np.array(range(5))
b = np.array(['a','b','a','b','c'])
b=="a"
a[b=="a"]
rainfall[months=="March"].mean()
months=="March"
def get_mean_rainfall(rainfall, months, month):
return rainfall[months==month].mean()
import datetime
set(months)
uniqmonths = list(set(months))
rainfall_ = [get_mean_rainfall(rainfall, months, month) for month in uniqmonths]
plt.bar(uniqmonths, rainfall_)
import altair as alt
import pandas as pd
!python -m pip install altair
import altair as alt
import pandas as pd
%%file sales.txt
area,sales,profit
North,5,2
East,25,8
West,15,6
South,20,5
Central,10,3
sales = pd.read_csv("sales.txt")
sales
alt.Chart(sales).mark_point()
alt.Chart(sales).mark_point().encode(y="area")
alt.Chart(sales).mark_point().encode(
x="sales",
y="area")
alt.Chart(sales).mark_bar().encode(
x="sales",
y="area")
alt.Chart(sales).mark_line().encode(
x="sales",
y="area")
base = alt.Chart(sales).mark_bar().encode(
x="sales",
y="area")
base.mark_circle()
base.encode(color="area")
base.encode(color="area", size="profit")
base.encode(color="area", size="profit").mark_circle()
base.to_json()
import json
d = {"a":28.565,
"b": 30,
"c" : [1,2,3]}
d
json.dumps(d)
jsondata = '{"a": 28.565, "b": 30, "c": [1, 2, 3]}'
json.loads(jsondata)
url = "https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY&symbol=MSFT&interval=5min&outputsize=full&apikey=demo"
resp = requests.get(url)
data = resp.json()
type(data)
data.keys()
data['Meta Data']
pd.DataFrame(data['Time Series (5min)'])
pd.DataFrame(data['Time Series (5min)']).transpose()
url = "http://www.thehindu.com"
resp = requests.get(url, params={"service":"rss"})
xmltext = resp.text
print(xmltext[:1300])
from xml.etree import ElementTree as et
root = et.fromstring(xmltext)
items = root.findall(".//item")
type(items)
len(items)
items[0]
print(et.tostring(items[0]).decode())
for item in items[:10]:
print(item.findtext("title"))
print(item.findtext("link"))
print("*"*30)
from xml.dom.minidom import parseString
root = parseString(xmltext)
items = root.getElementsByTagName("item")
for item in items[:10]:
title = item.getElementsByTagName("title")[0]
link = item.getElementsByTagName("link")[0]
print(title.firstChild.data)
print(link.firstChild.data)
print("*"*30)
import sqlite3
conn = sqlite3.connect("data.db")
cur = conn.cursor()
cur.execute("create table person (name varchar(100), email varchar(100))")
cur.execute("insert into person (name, email) values('alice', 'alice@wonder.land')")
cur = cur.execute("select * from person")
cur.fetchall()
def find(conn , email):
q = "select * from person where email='{}'".format(email)
print(q)
cur = conn.cursor()
return cur.execute(q).fetchall()
find(conn, "alice@wonder.land")
def find_(conn, email):
q = "select * from person where email=?"
cur = conn.cursor()
return cur.execute(q, (email,)).fetchall()
find_(conn, "alice@wonder.land")
conn.commit()
conn.close()
conn = sqlite3.connect("data.db")
find(conn, "*")
find_(conn, "alice@wonder.land")
records = [
("alex", "alex@zoo.in"),
("Elsa", "elsa@frozen.mov"),
("ELisa", "elisa@hacker.hack")
]
cur = conn.cursor()
cur.executemany("insert into person values(?,?)", records)
cur.execute("select * from person").fetchall()
To manage database tables as classes, one should use ORM (Object Relational Mapping) which can be done using library sqlalchemy. More details can ne seen at library homepage