Sep 13-17, 2021 Vikrant Patil
These notes are available online at https://notes.pipal.in/2021/arcesium_finop_batch1/
© Pipal Academy LLP
Day 1 | Day 2 | Day 3 | Day 4 | Day 5
We will be using jupyter hub from https://lab.pipal.in for this training.
login to hub and create a notebook with name module3-day5
x = 20
2x = 2*x
File "<ipython-input-1-52d1993ec0f3>", line 2 2x = 2*x ^ SyntaxError: invalid syntax
variables names/functions/classes or name any object that you want to create
twice_y = 2*y
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-6-35a9ed9e59d9> in <module> ----> 1 twice_y = 2*y NameError: name 'y' is not defined
y = 10
twice_y = 2*y
def mysum(nums):
s = 0
for n in nums:
s += n
return s ##this is difficult to find!
mysum([1, 2, 3, 3, 4, 5, 5])
1
mysum([3, 4, 5, 6, 6])
3
def mysum_(nums):
s = 0
for n in nums:
s += n
return s
mysum_([2, 3, 4, 5, 5])
19
def function2(data, i):
return [item[i] for item in data]
def function(data):
t = []
n = len(data)
for i in range(n):
t.append(function2(data, i) # closing bracket is missing
return t
function([[1, 2, 3],[3, 4, 5]])
File "<ipython-input-18-7d9bb40a3976>", line 9 return t ^ SyntaxError: invalid syntax
def foo():
retur 3 # return 3
File "<ipython-input-17-da8b9629daa5>", line 2 retur 3 ^ SyntaxError: invalid syntax
for i in range(5)
print(i)
File "<ipython-input-21-820d466f0a3e>", line 1 for i in range(5) ^ SyntaxError: invalid syntax
: is missing in above for loop
# logical errors can be caught very easily if we give correct names to functions and data
def function2(data, key):# what can be data? dict, list/tuple, 2d list/tuple
return [item[key] for item in data]
def function(data):
t = []
n = len(data[0])
for i in range(n):
t.append(function2(data, i))
return t
def column(data2d, colnum):
return [row[collnun] for row in data2d]
tables = [[i*j for i in range(1, 11)] for j in range(1, 6)]
tables
[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [2, 4, 6, 8, 10, 12, 14, 16, 18, 20], [3, 6, 9, 12, 15, 18, 21, 24, 27, 30], [4, 8, 12, 16, 20, 24, 28, 32, 36, 40], [5, 10, 15, 20, 25, 30, 35, 40, 45, 50]]
tables[0] # zeroth row
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
tables[0][2] # 2nd col from 0th row
3
tables[1][2] # 2nd col from 1st row
6
def column(data2d, colnum):
rowcount = len(data2d)
return [dat2d[i][colnum] for i in range(rowcount)]
def column(data2d, colnum):
return [row[colnum] for row in data2d]
column(tables, 0)
[1, 2, 3, 4, 5]
column(tables, 2)
[3, 6, 9, 12, 15]
tables
[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [2, 4, 6, 8, 10, 12, 14, 16, 18, 20], [3, 6, 9, 12, 15, 18, 21, 24, 27, 30], [4, 8, 12, 16, 20, 24, 28, 32, 36, 40], [5, 10, 15, 20, 25, 30, 35, 40, 45, 50]]
function(tables)
[[1, 2, 3, 4, 5], [2, 4, 6, 8, 10], [3, 6, 9, 12, 15], [4, 8, 12, 16, 20], [5, 10, 15, 20, 25], [6, 12, 18, 24, 30], [7, 14, 21, 28, 35], [8, 16, 24, 32, 40], [9, 18, 27, 36, 45], [10, 20, 30, 40, 50]]
def transpose(data2d):
colcount = len(data2d[0])
return [column(data2d, i) for i in range(colcount)]
transpose(tables)
[[1, 2, 3, 4, 5], [2, 4, 6, 8, 10], [3, 6, 9, 12, 15], [4, 8, 12, 16, 20], [5, 10, 15, 20, 25], [6, 12, 18, 24, 30], [7, 14, 21, 28, 35], [8, 16, 24, 32, 40], [9, 18, 27, 36, 45], [10, 20, 30, 40, 50]]
def sum_inputs(list_from_input_func):
strnums = list_from_input_func.split(",")
s = 0
for textnum in strnums:
n = int(textnum)
s += n
return s
user_entered_nums = input("Enter comma seperated numbers")
Enter comma seperated numbers1,2,3,4,5
sum_inputs(user_entered_nums)
15
class Foo:
def __init__(self, z): # here z is just an argument ... it will valish after call is over
self.z = z
def intermediate_method(self):
return self.square()
def square(self):
# where is z defined?
return z*z
f = Foo(5)
f.intermediate_method()
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-47-fa0441a912ec> in <module> ----> 1 f.intermediate_method() <ipython-input-45-974ce879af4e> in intermediate_method(self) 5 6 def intermediate_method(self): ----> 7 return self.square() 8 9 def square(self): <ipython-input-45-974ce879af4e> in square(self) 8 9 def square(self): ---> 10 return z*z NameError: name 'z' is not defined
f6 = Foo(6)
f7 = Foo(7)
class Foo:
def __init__(self, z):
self.z = z
def intermediate_method(self):
return self.square()
def square(self):
return self.z*self.z
f5 = Foo(5)
f5.intermediate_method()
25
del x, y
class Stateless:
def __init__(self, x, y):
x = x
y = y
def get_x(self):
return x
def get_y(self):
return y
s = Stateless(4, 5)
s.get_x()
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-57-52d3ff21ad1e> in <module> ----> 1 s.get_x() <ipython-input-55-ba4e32fca3eb> in get_x(self) 7 8 def get_x(self): ----> 9 return x 10 11 def get_y(self): NameError: name 'x' is not defined
class Dog:
def __init__(self, name):
self.name = name
def bark(self):
print("Bhow..Bhow..ssss from {name}".format(name=self.name))
t = Dog("Tipu")
t.bark()
Bhow..Bhow..ssss from Tipu
class WhiteDog(Dog):
colour = "White"
def bark(self):
print("Bhow..Bhow..ssss from {name} , with color as {colour}".format(name=self.name, colour=colour))
wd = WhiteDog("sheru")
wd.bark()
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-64-5ea5de95f25c> in <module> ----> 1 wd.bark() <ipython-input-62-f74d8996146d> in bark(self) 3 4 def bark(self): ----> 5 print("Bhow..Bhow..ssss from {name} , with color as {colour}".format(name=self.name, colour=colour)) NameError: name 'colour' is not defined
wd.colour
'White'
class WhiteDog(Dog):
colour = "White"
def bark(self):
print("Bhow..Bhow..ssss from {name} , with color as {colour}".format(name=self.name, colour=self.colour))
wd1 = WhiteDog("sheru1")
wd1.bark()
Bhow..Bhow..ssss from sheru1 , with color as White
float("four") # trying to convert wrong values
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-69-ea608515b3fd> in <module> ----> 1 float("four") # trying to convert wrong values ValueError: could not convert string to float: 'four'
float("4.5")
4.5
3 + "5" # you can't do operation of different data types together
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-71-314df8523bbe> in <module> ----> 1 3 + "5" # you can't do operation of different data types together TypeError: unsupported operand type(s) for +: 'int' and 'str'
x = 3
y = "5"
x + y
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-73-cd60f97aa77f> in <module> ----> 1 x + y TypeError: unsupported operand type(s) for +: 'int' and 'str'
print(type(x), type(y))
<class 'int'> <class 'str'>
use this after all syntax kind of errors are fixed, after proper variable/function names are given
%%file extract_tableA.py
import pandas as pd
import typer
import PyPDF2
app = typer.Typer()
def get_rows(lines, n, numrow):
s = 0
rows = []
for i in range(numrow):
rows.append(lines[s:s+n])
s = s+n
return rows
def column(data2d, n):
return [row[n] for row in data2d]
def transpose(data2d):
numcols = len(data2d[0])
return [column(data2d, i) for i in range(numcols)]
def get_page(filename, pageno):
with open(filename, "rb") as f:
pdfreader = PyPDF2.PdfFileReader(f)
page = pdfreader.getPage(pageno)
return page.extractText()
def extract_tableA_(page):
lines = page.split("\n")
header = "NR WR SR ER NER TOTAL".split()
rowdata = get_rows(lines[len(header):], len(header), 9) # we counted manually that there 9 rows
columndata = transpose(rowdata)
data = dict(zip(header, columndata))
return pd.DataFrame(data)
@app.command()
def extract_tableA(filename:str, output:str="tableA.csv"):
page1 = get_page(filename, 1)
extract_tableA_(page1).to_csv(output)
if __name__ == "__main__":
app()
Overwriting extract_tableA.py
lets create a package called tableA
tableA
|
|--setup.py
|--requirements.py
+ A
|
|--__init__.py
|-- extract_tableA.py
+ B
|
|-- __init__.py
%%file tableA/setup.py
from distutils.core import setup
setup(
name="extract_table",
version="1.0",
description="extract_table is sample package created to show python packaging",
author="Vikrant",
author_email="vikrant@table.sds.com",
url="https://extract_table.com",
packages=['A','A.B'],
install_requires=[
'pandas',
'PyPDF2',
'typer'
],
entry_points={"console_script":["extract_table=A.extract_tableA:main"]}
)
Overwriting tableA/setup.py
%%file tableA/rquirements.txt
pandas
PyPDF2
typer
Overwriting tableA/rquirements.txt
this package can be installed in virtual environment and can be passed to other users as a python package
import re
pattern = re.compile("^.$") # only one character on one line
datelikepatten = re.compile("^\d{4,4}-\d{2,2}-\d{2,2}$")
lines = ["line1",
"l",
"2",
"some tda;lksd;",
"2021-10-25",
"sakdsa sgd sgf kgf"]
for line in lines:
if pattern.match(line):
print(line)
l 2
for line in lines:
if datelikepatten.match(line):
print(line)
2021-10-25
"Total = 34343"
"Total : 3434.455"
"Total : 4343"
'Total : 4343'