Aug 19-25, 2022 Vikrant Patil
All notes are available online at https://notes.pipal.in/2022/arcesium_finop_batch1/
Please accept the invitation that you have received in your email and login to
login to lab and create today's notebook module3-day5
© Pipal Academy LLP
x = 20
2x = 2*x # this will fail
Input In [2] 2x = 2*x # this will fail ^ SyntaxError: invalid decimal literal
twice_x = 2*x
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Input In [3], in <cell line: 1>() ----> 1 twice_x = 2*x NameError: name 'x' is not defined
x = 20
twice_x = 2*x
def mysum(nums):
s = 0
for n in nums:
s += n
return s #this is wrongly indented!
mysum([1, 1, 1, 1, 1])
1
def mysum(nums):
s = 0
for n in nums:
s += n
return s #this is wrongly indented!
sum = 2 +3 #this will overrite the built in function
print(x)
y = 2*x
sum([2, 3, 4, 5])
20
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) Input In [8], in <cell line: 6>() 3 print(x) 4 y = 2*x ----> 6 sum([2, 3, 4, 5]) TypeError: 'int' object is not callable
x = 5
x()
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) Input In [9], in <cell line: 2>() 1 x = 5 ----> 2 x() TypeError: 'int' object is not callable
list = [1, 2, 3]
#
#
list("3434324")
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) Input In [11], in <cell line: 5>() 1 list = [1, 2, 3] 2 # 3 # ----> 5 list("3434324") TypeError: 'list' object is not callable
del sum
del list
sum_ = 3 + 5 # no conflict with built in!
list_ = [1 , 2, 3, 4]
def function2(data, i):
return [item[i] for item in data]
def function1(data):
t = []
n = len(data[0])
for i in range(n):
t.append(function2(data, i))
return t
def column(matrix, colnum):
return [row[colnum] for row in matrix]
def transpose(matrix):
transpose_ = []
firstrow = matrix[0]
columncount = len(firstrow)
for c in range(columncount):
transpose_.append(column(matrix, c))
return transpose_
def transpose(matrix):
firstrow = matrix[0]
columncount = len(firstrow)
return [column(matrix, i) for i in range(columncount)]
%%file extract_tableA.py
import csv
import typer
import PyPDF2
def extract_tableA(pagetext):
lines = pagetext.split("\n")
tableA = lines[:9]
headers = tableA[0].split()
data = [line.strip().split() for line in tableA[1:]]
return headers, data
def write_csv(headers, data, filename):
with open(filename, "w") as f:
csvf = csv.writer(f)
csvf.writerow(headers)
csvf.writerows(data)
def read_from_pdf(pdffile:str, csvoutput:str):
with open(pdffile, "rb") as f:
pdfreader = PyPDF2.PdfFileReader(f)
page = pdfreader.getPage(1)
headers, data = extract_tableA(page.extractText())
write_csv(headers, data, csvoutput)
if __name__ == "__main__":
typer.run(read_from_pdf)
Overwriting extract_tableA.py
!python extract_tableA.py --help
Usage: extract_tableA.py [OPTIONS] PDFFILE CSVOUTPUT
Arguments:
PDFFILE [required]
CSVOUTPUT [required]
Options:
--install-completion [bash|zsh|fish|powershell|pwsh]
Install completion for the specified shell.
--show-completion [bash|zsh|fish|powershell|pwsh]
Show completion for the specified shell, to
copy it or customize the installation.
--help Show this message and exit.
To debug your python program
python -m pdb extact_tableA.py data.pdf output.csv
(Pdb)
l -> print the code with line numbers
h -> to see help
b -> set a breakpoint
h b -> see help of how to set breakpoint
p var -> prints given variable
c -> continue execution and stop at next break point
s -> step in a function (if there is a function on current line)
n -> execute this line and go to next line
q -> exit debugger
lets try to create a package for above program
tableA
|
|--setup.py
|--requirements.txt
+-A
|
|--__init__.py
|-- extract_tableA.py
+-B
|
|-- __init__.py
|-- stats.py
%%file tableA/setup.py
from distutils.core import setup
setup(
name="extract_table",
version="1.0",
description="a sample package to demonstrate python packaging",
author="Vikrant",
author_email="vikrant@dsad.sd.com",
url="https://extracttable.com",
packages=['A','A.B'],
install_requires=[
'PyPDF2',
'typer'
],
entry_points={"console_scripts": ["extract_table=A.extract_tableA:app"]}
)
Overwriting tableA/setup.py
%%file tableA/requirements.txt
typer
PYPDF2
Overwriting tableA/requirements.txt
%%file tableA/A/__init__.py
#
Writing tableA/A/__init__.py
%%file tableA/A/extract_tableA.py
import csv
import typer
import PyPDF2
app = typer.Typer()
def extract_tableA(pagetext):
lines = pagetext.split("\n")
tableA = lines[:9]
headers = tableA[0].split()
data = [line.strip().split() for line in tableA[1:]]
return headers, data
def write_csv(headers, data, filename):
with open(filename, "w") as f:
csvf = csv.writer(f)
csvf.writerow(headers)
csvf.writerows(data)
@app.command()
def read_from_pdf(pdffile:str, csvoutput:str):
with open(pdffile, "rb") as f:
pdfreader = PyPDF2.PdfFileReader(f)
page = pdfreader.getPage(1)
headers, data = extract_tableA(page.extractText())
write_csv(headers, data, csvoutput)
if __name__ == "__main__":
app()
Overwriting tableA/A/extract_tableA.py
%%file tableA/A/B/__init__.py
##
Overwriting tableA/A/B/__init__.py
%%file tableA/A/B/stats.py
def mean(nums):
return sum(nums)/len(nums)
Writing tableA/A/B/stats.py
import time
import datetime
while True:
time.sleep(5) # program acctually sleeps...not consuming CPU
date = datetime.datetime.now()
if date.hour==11 and date.minute==57: # will run at 11:57!
print("hello, the time has come!")
hello, the time has come! hello, the time has come! hello, the time has come! hello, the time has come! hello, the time has come! hello, the time has come! hello, the time has come! hello, the time has come! hello, the time has come! hello, the time has come! hello, the time has come! hello, the time has come!
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) Input In [45], in <cell line: 4>() 2 import datetime 4 while True: ----> 5 time.sleep(5) # program acctually sleeps...not consuming CPU 6 date = datetime.datetime.now() 7 if date.hour==11 and date.minute==57: KeyboardInterrupt:
date.minute
54