Module 3 - Day 5

Debugging python programs

Look at error trace carefully, it have useful information with which you can figure whats wrong immediately

x # NameError
NameError: name 'x' is not defined
def square(x):
    return a*a


def sumofsquares(x, y):
    return square(x) + squarey)


sumofsquares(10, 20)
SyntaxError: unmatched ')' (2414728682.py, line 6)
def square(x):
    return a*a


def sumofsquares(x, y):
    return square(x) + square(y)


sumofsquares(10, 20)
NameError: name 'a' is not defined
def square(x):
    return x*x


def sumofsquares(x, y):
    return square(x) + square(y)


sumofsquares(10, 20)
500
def add(*args):
    sum = 0
    for a in args:
        sum += a
    return sum
sum = add(1, 2, 3, 4, 5, 6, 7, 8,9)
print(sum)
45
nums = [1, 2, 3, 4, 5, 6, 7, 8,9]
sum(nums)
TypeError: 'int' object is not callable

‘XXX’ object is not callable!

sum # integer
45
sum([1, 2, 3])
TypeError: 'int' object is not callable
45([1, 2, 3])
<>:1: SyntaxWarning: 'int' object is not callable; perhaps you missed a comma?
<>:1: SyntaxWarning: 'int' object is not callable; perhaps you missed a comma?
/tmp/ipykernel_4078463/1956412847.py:1: SyntaxWarning: 'int' object is not callable; perhaps you missed a comma?
  45([1, 2, 3])
/tmp/ipykernel_4078463/1956412847.py:1: SyntaxWarning: 'int' object is not callable; perhaps you missed a comma?
  45([1, 2, 3])
/tmp/ipykernel_4078463/1956412847.py:1: SyntaxWarning: 'int' object is not callable; perhaps you missed a comma?
  45([1, 2, 3])
TypeError: 'int' object is not callable
del sum
sum_ = add(1, 2, 3, 4, 5, 6, 7)
sum(nums)
45

Giving appropriate names to variables and functions

def function2(data, i):
    return [item[i] for item in data]


def function1(x):
    y = []
    n = len(x[0])
    for i in range(n):
        y.append(function2(x, i))
    return y

Questions 1. What is x in function1? 2. what is data in function2? 3. what is function2 doing? 4. what is function1 doing?

where do we use this kind of access? x[0] - list, tuple, dictionary? data in function2? - list, tuple, dictionary

data = [1, 2, 3, 4, 5, 6, 7, 8] # lets say data is list
function2(data, 1)
TypeError: 'int' object is not subscriptable
for item in data:
    print(item)
1
2
3
4
5
6
7
8
for item in data:
    print(item[1])# this will not work because item is an integer
TypeError: 'int' object is not subscriptable
matrix = [[11, 12, 13],
          [21, 22, 23],
          [31, 32, 33]]
function2(matrix, 1)
[12, 22, 32]
function2(matrix, 0)
[11, 21, 31]
function2(matrix, 2)
[13, 23, 33]
def column(matrix, i):
    return [item[i] for item in matrix]
column(matrix, 0)
[11, 21, 31]
def column(matrix, n):
    """finds nth column from the matrix.
    assumes that all rows of matrix are of same size
    """
    return [row[n] for row in matrix]

def function1(x):
    y = []
    n = len(x[0])
    for i in range(n):
        y.append(column(x, i))
    return y

it is making list of columns

matrix
[[11, 12, 13], [21, 22, 23], [31, 32, 33]]
function1(matrix)
[[11, 21, 31], [12, 22, 32], [13, 23, 33]]

def transpose(matrix):
    cols = []
    colcount = len(matrix[0])
    for i in range(colcount):
        cols.append(column(matrix, i))
    return cols
transpose(matrix)
[[11, 21, 31], [12, 22, 32], [13, 23, 33]]
def column(matrix, n):
    """finds nth column from the matrix.
    assumes that all rows of matrix are of same size
    """
    return [row[n] for row in matrix]

def transpose(matrix):
    """finds transpose of a 2d matrix
    assumes that every row in matrix is of same size
    """
    firstrow = matrix[0]
    colcount = len(firstrow)
    return [column(matrix, c) for c in range(colcount)]
transpose(matrix)
[[11, 21, 31], [12, 22, 32], [13, 23, 33]]
matrix
[[11, 12, 13], [21, 22, 23], [31, 32, 33]]

Naming guidelines - i,j,k are for iteration varaiables , preferable if those are integers - x,y ..look like int/float from algebra! - if you know exactly what is the integers/float data, make use of that - for example instaed i , I can use col/c if i represents column index - for example instaed i , I can use row/r if i represents row index - for iterating over 2D data, each item from 2D can be iterated as [row for row in data2D] instead [item for item in data2d] - Give function name which represents the activity done by that function clearly.

python debugging

%%file sumofsquares.py
import sys

def square(x):
    return x*x

def sumofsquares(*args):
    return sum([square(a) for a in args])


args = sys.argv[1:]  # these are strings/text
intargs = [int(arg) for arg in args]

print(sumofsquares(intargs))
Overwriting sumofsquares.py
!python sumofsquares.py 1 2 3 4
Traceback (most recent call last):
  File "/home/jupyter-vikrant/arcesium-python-2024/sumofsquares.py", line 13, in <module>
    print(sumofsquares(intargs))
  File "/home/jupyter-vikrant/arcesium-python-2024/sumofsquares.py", line 7, in sumofsquares
    return sum([square(a) for a in args])
  File "/home/jupyter-vikrant/arcesium-python-2024/sumofsquares.py", line 7, in <listcomp>
    return sum([square(a) for a in args])
  File "/home/jupyter-vikrant/arcesium-python-2024/sumofsquares.py", line 4, in square
    return x*x
TypeError: can't multiply sequence by non-int of type 'list'
def foo(*args):
    print(args)
foo(1, 2, 3, 4, 5)
(1, 2, 3, 4, 5)
foo([1, 2, 3, 4, 5])
([1, 2, 3, 4, 5],)
nums = [1, 2, 3, 4, 5]
foo(nums)
([1, 2, 3, 4, 5],)
foo(1, 2, 3, 4, 5)
(1, 2, 3, 4, 5)
foo(*nums)
(1, 2, 3, 4, 5)
%%file sumofsquares.py
import sys

def square(x):
    print(x)
    return x*x

def sumofsquares(*args): # variable number of arguments
    return sum([square(a) for a in args])


args = sys.argv[1:]  # these are strings/text
intargs = [int(arg) for arg in args]

print(sumofsquares(*intargs)) # while passing make sure to pass list as variable number of argument
Overwriting sumofsquares.py
!python sumofsquares.py 1 2 3 4
1
2
3
4
30

pdb commnads

to debug python programs we can make use of pdb module

python -m pdb sumofsquares.py 1 2 3 4 5 6
(Pdb)

on this (Pdb) prompt you can make use some basic debugging command

  1. l -> print the code with line numbers
  2. h -> to see help (only h will show list f available commnads)
  3. b -> to set brakepoint
  4. h b -> help for brekpoint
  5. r -> start the execution in debuger
  6. p var -> print variable
  7. c -> continue execution till next breakpoint
  8. s -> step in a function (if there is a function call at current line)
  9. n -> execute only this line and go to next line
  10. q-> exit the debugger

packaging your python code

Python script, instructions , requirements.txt

requirements.txt is nothing but list of packages that you will need for executing your python scipt
if you want to be very strict about the versions of packages , then make use pip freeze to create requirements.txt

Creating an execuatble using pyinstalle

For a common user, you make executable and instructions about using the program

you will need a thrid party libray pyinstaller

pip install pyinstaller

once you have pyinstaller you can make use this commnad to create executable

pyinstaller -F sumofsquares.py

there will be build and dist folder created. in dist folder there will be executable. This executable does not need even python installed on user’s system.

./sumofsquares 1 2 3 4 5 6
91

python package

You will need to follow some folder structure

tableA
  |
  |--setup.py
  |--requirements.txt
  +-A
    |
    |--__init__.py
    |-- extract_tableA.py
    +-B
      |
      |-- __init__.py
      |-- stats.py
!mkdir tableA
!mkdir tableA/A
!mkdir tableA/A/B
!rm tableA/__init__.py
!touch tableA/A/__init__.py
!touch tableA/A/B/__init__.py
%%file tableA/A/extract_tableA.py
import PyPDF2

def get_row_lables():
    rowlabels = """Demand Met during Evening Peak hrs(MW) (at 2000 hrs; from RLDCs)
Peak Shortage (MW)
Energy Met (MU)
Hydro Gen (MU)
Wind Gen (MU)
Solar Gen (MU)
Energy Shortage (MU)
Maximum Demand Met During the Day (MW) (From NLDC SCADA)
Time Of Maximum Demand Met (From NLDC SCADA)""".split("\n")
    return rowlabels


def extract_page(i):
    with open("data.pdf", "rb") as f: # you will have to open in read-binary mode
        pdfreader = PyPDF2.PdfReader(f)
        page = pdfreader.pages[i]
        return page.extract_text()


def extract_tableA_data(pagetext):
    lines = pagetext.split("\n")
    data_of_interest = lines[:10]

    headers = data_of_interest[0].split()
    data = [line.split() for line in data_of_interest[1:]]
    return headers, data


def extract_table_A(filename):
    page = extract_page(1)
    colnames, rows = extract_tableA_data(page)
    dictrows  = [dict(zip(colnames, row)) for row in rows]
    rowlabels = get_row_lables()
    return pd.DataFrame(dictrows, index=rowlabels)

if __name__ == "main__":
    extract_table_A(sys.argv[1])
Overwriting tableA/A/extract_tableA.py
%%file tableA/A/B/stats.py

def mean(nums):
    pass

def std(nums):
    pass
Overwriting tableA/A/B/stats.py
%%file tableA/setup.py
from distutils.core import setup

setup(
    name="tableA",
    version='1.0',
    description="A sample package",
    author="Vikrant",
    author_email="sads@sdsa.com",
    url="https://somesample.web.com",
    packages=['A','A.B'],
    install_requires=[
        'PyPDF2',
        'pandas'
    ]
    entry_points={"console_scripts": ["extract_table=A.extract_tableA"]
)
Overwriting tableA/setup.py
%%file tableA/requirements.txt
PyPDF2
pandas
Overwriting tableA/requirements.txt
!python sumofsquares.py 1 2 3 4 5

Running python functionality at given time

import time
import datetime

while True:
    time.sleep(5) # during sleep cpu and resources are released for others to use
    date = datetime.datetime.now() 
    print("tick")
    if date.hour == 6 and date.minute == 43:
        print("hello, the time has come to execute")
        break 
tick
tick
tick
tick
tick
tick
tick
tick
hello, the time has come to execute