Python Virtual Training For Arcesium - Module III - Day 5¶

Dec 17-23, 2020 Vikrant Patil

These notes are available online at http://notes.pipal.in/2020/arcesium_finop_batch3/module3-day5.html

We will be using jupyter hub from http://lab.pipal.in for this training. Create a notebook with name module3-day5.ipynb for today's session. Before you start shutdown all kernels except today's notebook.

Database¶

import sqlite3

conn = sqlite3.connect("database.db")# database.db is file name for this sqlite3 db

cur = conn.cursor()

import pandas as pd

df = pd.read_csv("wallet.csv")

df

df.to_sql("wallet", con=conn, if_exists="append")

/home/vikrant/anaconda3/lib/python3.8/site-packages/pandas/core/generic.py:2653: UserWarning: The spaces in these column names will not be changed. In pandas versions < 0.14, spaces were converted to underscores.
  sql.to_sql(

results = conn.cursor().execute("select * from wallet")

for item in results:
    print(item)

(0, 0, '2021-03-07 14:53:28.377359', 'Music', 'Amazon', 421.2073272347991)
(1, 1, '2020-10-08 09:53:28.377359', 'Food', 'Swiggy', 328.4400802428426)
(2, 2, '2021-02-23 09:53:28.377359', 'Books', 'Amazon', 244.67943701511356)
(3, 3, '2020-11-01 14:53:28.377359', 'Utility', 'Phone', 222.75631758052768)
(4, 4, '2021-06-05 13:53:28.377359', 'Books', 'Flipcart', 494.1284923793595)
(5, 5, '2021-07-28 19:53:28.377359', 'Utility', 'Electricity', 219.9417113096841)
(6, 6, '2021-04-16 11:53:28.377359', 'Books', 'Amazon Kindle', 270.32259514795845)
(7, 7, '2021-02-15 10:53:28.377359', 'Food', 'Zomato', 457.1831036346536)
(8, 8, '2021-08-10 19:53:28.377359', 'Utility', 'Phone', 151.4963725994779)
(9, 9, '2020-11-29 14:53:28.377359', 'Travel', 'Auto', 443.61888423247854)
(10, 10, '2021-06-15 13:53:28.377359', 'Travel', 'Metro', 328.1754210974373)
(11, 11, '2021-07-24 13:53:28.377359', 'Food', 'Zomato', 434.4954675355444)
(12, 12, '2021-07-24 14:53:28.377359', 'Music', 'Amazon', 329.5360031897569)
(13, 13, '2021-06-06 10:53:28.377359', 'Utility', 'Phone', 154.04494918166588)
(14, 14, '2021-06-09 13:53:28.377359', 'Travel', 'Taxi', 485.2977429821982)
(15, 15, '2021-08-24 17:53:28.377359', 'Food', 'Zomato', 262.9439932340398)
(16, 16, '2021-03-05 19:53:28.377359', 'Utility', 'Phone', 390.31687619327926)
(17, 17, '2021-04-17 18:53:28.377359', 'Utility', 'Electricity', 316.87867542466364)
(18, 18, '2021-05-08 15:53:28.377359', 'Travel', 'Auto', 433.8224042777937)
(19, 19, '2021-05-16 10:53:28.377359', 'Books', 'Flipcart', 109.32590886550068)
(20, 20, '2020-10-12 18:53:28.377359', 'Travel', 'Auto', 365.9218082537661)
(21, 21, '2021-01-04 19:53:28.377359', 'Travel', 'Metro', 329.0973715025852)
(22, 22, '2021-06-24 15:53:28.377359', 'Food', 'Zomato', 489.14348305222535)
(23, 23, '2020-12-11 10:53:28.377359', 'Music', 'Netflix', 354.9402409919817)
(24, 24, '2021-05-31 11:53:28.377359', 'Books', 'Amazon', 498.1004955046106)
(25, 25, '2021-05-21 14:53:28.377359', 'Food', 'Hotel', 483.315863517772)
(26, 26, '2020-08-26 15:53:28.377359', 'Books', 'Amazon Kindle', 138.806577801854)
(27, 27, '2021-05-01 15:53:28.377359', 'Utility', 'Electricity', 103.68079074846584)
(28, 28, '2020-12-14 15:53:28.377359', 'Utility', 'Phone', 358.4599327957656)
(29, 29, '2021-06-20 10:53:28.377359', 'Utility', 'Electricity', 184.5577284049955)
(30, 30, '2020-09-15 18:53:28.377359', 'Food', 'Swiggy', 203.52923978943267)
(31, 31, '2020-09-25 11:53:28.377359', 'Books', 'Flipcart', 246.503527384528)
(32, 32, '2021-06-23 11:53:28.377359', 'Food', 'Zomato', 345.0304360814152)
(33, 33, '2021-05-14 18:53:28.377359', 'Food', 'Hotel', 449.24802955761743)
(34, 34, '2021-05-14 10:53:28.377359', 'Utility', 'Phone', 499.8581815222449)
(35, 35, '2021-02-18 18:53:28.377359', 'Travel', 'Metro', 441.6021430011205)
(36, 36, '2020-12-10 10:53:28.377359', 'Travel', 'Auto', 472.94143917262176)
(37, 37, '2021-04-18 16:53:28.377359', 'Music', 'Amazon', 266.0690783774673)
(38, 38, '2021-08-15 10:53:28.377359', 'Travel', 'Auto', 494.12439940565713)
(39, 39, '2021-05-17 17:53:28.377359', 'Food', 'Swiggy', 112.33316019807457)
(40, 40, '2021-07-19 12:53:28.377359', 'Food', 'Swiggy', 291.54598801930536)
(41, 41, '2021-02-20 19:53:28.377359', 'Utility', 'Phone', 425.18719068071806)
(42, 42, '2021-08-22 17:53:28.377359', 'Food', 'Hotel', 210.25626950078566)
(43, 43, '2020-09-21 12:53:28.377359', 'Utility', 'Phone', 486.0339327616073)
(44, 44, '2020-12-26 19:53:28.377359', 'Utility', 'Electricity', 257.92759337085425)
(45, 45, '2021-05-27 16:53:28.377359', 'Utility', 'Electricity', 154.74287259516657)
(46, 46, '2021-05-15 15:53:28.377359', 'Utility', 'Electricity', 359.3249716537848)
(47, 47, '2020-10-28 10:53:28.377359', 'Books', 'Flipcart', 310.408610004679)
(48, 48, '2021-08-23 17:53:28.377359', 'Utility', 'Electricity', 310.05840961423314)
(49, 49, '2021-03-16 09:53:28.377359', 'Music', 'spotify', 232.30340219121135)
(50, 50, '2020-12-24 11:53:28.377359', 'Food', 'Zomato', 463.0018749263554)
(51, 51, '2020-12-22 17:53:28.377359', 'Food', 'Zomato', 331.22702332837093)
(52, 52, '2021-03-26 09:53:28.377359', 'Travel', 'Taxi', 403.61007013419334)
(53, 53, '2021-01-27 09:53:28.377359', 'Utility', 'Electricity', 183.1866624101276)
(54, 54, '2020-11-16 10:53:28.377359', 'Music', 'spotify', 160.81754340768396)
(55, 55, '2021-01-21 19:53:28.377359', 'Books', 'Flipcart', 423.74970808720553)
(56, 56, '2021-05-19 18:53:28.377359', 'Utility', 'Phone', 319.34287626846185)
(57, 57, '2021-07-15 15:53:28.377359', 'Utility', 'Phone', 279.6090437716363)
(58, 58, '2021-05-20 10:53:28.377359', 'Food', 'Hotel', 255.8710346734312)
(59, 59, '2020-08-28 11:53:28.377359', 'Food', 'Swiggy', 208.2329120852039)
(60, 60, '2021-01-17 11:53:28.377359', 'Utility', 'Electricity', 382.5195101154448)
(61, 61, '2021-02-25 13:53:28.377359', 'Food', 'Hotel', 124.65827844174062)
(62, 62, '2021-01-27 19:53:28.377359', 'Books', 'Amazon Kindle', 497.7708601564023)
(63, 63, '2021-05-10 11:53:28.377359', 'Travel', 'Taxi', 355.9890502253258)
(64, 64, '2021-01-31 14:53:28.377359', 'Food', 'Zomato', 232.2223798622789)
(65, 65, '2020-10-23 18:53:28.377359', 'Music', 'Netflix', 188.7487426895118)
(66, 66, '2020-10-09 16:53:28.377359', 'Food', 'Swiggy', 263.9577700340145)
(67, 67, '2021-07-31 14:53:28.377359', 'Music', 'Netflix', 324.786916846731)
(68, 68, '2020-08-26 09:53:28.377359', 'Travel', 'Taxi', 279.1478844739421)
(69, 69, '2020-10-10 15:53:28.377359', 'Utility', 'Electricity', 300.5246204193512)
(70, 70, '2021-08-17 13:53:28.377359', 'Utility', 'Phone', 125.22977317126335)
(71, 71, '2021-03-30 12:53:28.377359', 'Food', 'Swiggy', 245.36050838040904)
(72, 72, '2021-06-30 18:53:28.377359', 'Books', 'Amazon', 294.6628689900488)
(73, 73, '2021-08-15 17:53:28.377359', 'Travel', 'Metro', 117.58872931045572)
(74, 74, '2021-03-20 11:53:28.377359', 'Travel', 'Taxi', 303.0554209852045)
(75, 75, '2021-03-03 12:53:28.377359', 'Food', 'Hotel', 425.62529099481486)
(76, 76, '2020-11-17 09:53:28.377359', 'Music', 'Netflix', 197.5346000167895)
(77, 77, '2021-01-18 14:53:28.377359', 'Books', 'Amazon Kindle', 482.1523430204321)
(78, 78, '2020-09-09 16:53:28.377359', 'Music', 'spotify', 415.3728938035302)
(79, 79, '2021-08-17 09:53:28.377359', 'Music', 'Netflix', 321.7634156544651)
(80, 80, '2021-02-17 09:53:28.377359', 'Food', 'Swiggy', 283.09570727160764)
(81, 81, '2020-10-29 16:53:28.377359', 'Food', 'Hotel', 470.0809953992362)
(82, 82, '2020-09-22 09:53:28.377359', 'Music', 'spotify', 411.14270120842224)
(83, 83, '2021-03-18 09:53:28.377359', 'Books', 'Flipcart', 451.5844070294999)
(84, 84, '2020-09-21 10:53:28.377359', 'Music', 'Netflix', 158.7936457269333)
(85, 85, '2021-01-12 09:53:28.377359', 'Music', 'Amazon', 130.37490757527002)
(86, 86, '2021-05-07 16:53:28.377359', 'Food', 'Zomato', 198.45067179263802)
(87, 87, '2021-05-19 15:53:28.377359', 'Food', 'Zomato', 378.82064134052473)
(88, 88, '2021-04-18 09:53:28.377359', 'Utility', 'Phone', 124.22124784445779)
(89, 89, '2021-04-12 14:53:28.377359', 'Music', 'Amazon', 218.487173429263)
(90, 90, '2020-12-01 14:53:28.377359', 'Music', 'Amazon', 101.57327588889416)
(91, 91, '2021-01-22 17:53:28.377359', 'Food', 'Hotel', 232.66346838787223)
(92, 92, '2021-01-12 19:53:28.377359', 'Travel', 'Taxi', 356.84263798863265)
(93, 93, '2021-01-11 09:53:28.377359', 'Utility', 'Electricity', 111.72080867898062)
(94, 94, '2021-01-04 13:53:28.377359', 'Utility', 'Phone', 431.1855366816298)
(95, 95, '2021-07-19 13:53:28.377359', 'Utility', 'Phone', 388.67121323884214)
(96, 96, '2021-01-12 19:53:28.377359', 'Books', 'Flipcart', 467.5545618966052)
(97, 97, '2021-03-25 11:53:28.377359', 'Utility', 'Phone', 320.78943360123816)
(98, 98, '2021-05-13 15:53:28.377359', 'Travel', 'Taxi', 442.0964693975505)
(99, 99, '2020-10-11 16:53:28.377359', 'Food', 'Hotel', 100.45550129902664)

cur = conn.cursor()
r = cur.execute("select * from wallet where category='{}'".format("Books"))
for item in r:
    print(item)

(2, 2, '2021-02-23 09:53:28.377359', 'Books', 'Amazon', 244.67943701511356)
(4, 4, '2021-06-05 13:53:28.377359', 'Books', 'Flipcart', 494.1284923793595)
(6, 6, '2021-04-16 11:53:28.377359', 'Books', 'Amazon Kindle', 270.32259514795845)
(19, 19, '2021-05-16 10:53:28.377359', 'Books', 'Flipcart', 109.32590886550068)
(24, 24, '2021-05-31 11:53:28.377359', 'Books', 'Amazon', 498.1004955046106)
(26, 26, '2020-08-26 15:53:28.377359', 'Books', 'Amazon Kindle', 138.806577801854)
(31, 31, '2020-09-25 11:53:28.377359', 'Books', 'Flipcart', 246.503527384528)
(47, 47, '2020-10-28 10:53:28.377359', 'Books', 'Flipcart', 310.408610004679)
(55, 55, '2021-01-21 19:53:28.377359', 'Books', 'Flipcart', 423.74970808720553)
(62, 62, '2021-01-27 19:53:28.377359', 'Books', 'Amazon Kindle', 497.7708601564023)
(72, 72, '2021-06-30 18:53:28.377359', 'Books', 'Amazon', 294.6628689900488)
(77, 77, '2021-01-18 14:53:28.377359', 'Books', 'Amazon Kindle', 482.1523430204321)
(83, 83, '2021-03-18 09:53:28.377359', 'Books', 'Flipcart', 451.5844070294999)
(96, 96, '2021-01-12 19:53:28.377359', 'Books', 'Flipcart', 467.5545618966052)

def find_transcactions(conn, cat):
    q = "select * from wallet where category='{}'".format(cat)
    cur = conn.cursor()
    result = cur.execute(q)
    return result.fetchall()

find_transcactions(conn, "Food")

[(1, 1, '2020-10-08 09:53:28.377359', 'Food', 'Swiggy', 328.4400802428426),
 (7, 7, '2021-02-15 10:53:28.377359', 'Food', 'Zomato', 457.1831036346536),
 (11, 11, '2021-07-24 13:53:28.377359', 'Food', 'Zomato', 434.4954675355444),
 (15, 15, '2021-08-24 17:53:28.377359', 'Food', 'Zomato', 262.9439932340398),
 (22, 22, '2021-06-24 15:53:28.377359', 'Food', 'Zomato', 489.14348305222535),
 (25, 25, '2021-05-21 14:53:28.377359', 'Food', 'Hotel', 483.315863517772),
 (30, 30, '2020-09-15 18:53:28.377359', 'Food', 'Swiggy', 203.52923978943267),
 (32, 32, '2021-06-23 11:53:28.377359', 'Food', 'Zomato', 345.0304360814152),
 (33, 33, '2021-05-14 18:53:28.377359', 'Food', 'Hotel', 449.24802955761743),
 (39, 39, '2021-05-17 17:53:28.377359', 'Food', 'Swiggy', 112.33316019807457),
 (40, 40, '2021-07-19 12:53:28.377359', 'Food', 'Swiggy', 291.54598801930536),
 (42, 42, '2021-08-22 17:53:28.377359', 'Food', 'Hotel', 210.25626950078566),
 (50, 50, '2020-12-24 11:53:28.377359', 'Food', 'Zomato', 463.0018749263554),
 (51, 51, '2020-12-22 17:53:28.377359', 'Food', 'Zomato', 331.22702332837093),
 (58, 58, '2021-05-20 10:53:28.377359', 'Food', 'Hotel', 255.8710346734312),
 (59, 59, '2020-08-28 11:53:28.377359', 'Food', 'Swiggy', 208.2329120852039),
 (61, 61, '2021-02-25 13:53:28.377359', 'Food', 'Hotel', 124.65827844174062),
 (64, 64, '2021-01-31 14:53:28.377359', 'Food', 'Zomato', 232.2223798622789),
 (66, 66, '2020-10-09 16:53:28.377359', 'Food', 'Swiggy', 263.9577700340145),
 (71, 71, '2021-03-30 12:53:28.377359', 'Food', 'Swiggy', 245.36050838040904),
 (75, 75, '2021-03-03 12:53:28.377359', 'Food', 'Hotel', 425.62529099481486),
 (80, 80, '2021-02-17 09:53:28.377359', 'Food', 'Swiggy', 283.09570727160764),
 (81, 81, '2020-10-29 16:53:28.377359', 'Food', 'Hotel', 470.0809953992362),
 (86, 86, '2021-05-07 16:53:28.377359', 'Food', 'Zomato', 198.45067179263802),
 (87, 87, '2021-05-19 15:53:28.377359', 'Food', 'Zomato', 378.82064134052473),
 (91, 91, '2021-01-22 17:53:28.377359', 'Food', 'Hotel', 232.66346838787223),
 (99, 99, '2020-10-11 16:53:28.377359', 'Food', 'Hotel', 100.45550129902664)]

def find_transcactions(conn, cat):
    q = "select * from wallet where category=?"
    cur = conn.cursor()
    result = cur.execute(q, (cat,))
    return result.fetchall()

find_transcactions(conn, "Music")

[(0, 0, '2021-03-07 14:53:28.377359', 'Music', 'Amazon', 421.2073272347991),
 (12, 12, '2021-07-24 14:53:28.377359', 'Music', 'Amazon', 329.5360031897569),
 (23, 23, '2020-12-11 10:53:28.377359', 'Music', 'Netflix', 354.9402409919817),
 (37, 37, '2021-04-18 16:53:28.377359', 'Music', 'Amazon', 266.0690783774673),
 (49,
  49,
  '2021-03-16 09:53:28.377359',
  'Music',
  'spotify',
  232.30340219121135),
 (54,
  54,
  '2020-11-16 10:53:28.377359',
  'Music',
  'spotify',
  160.81754340768396),
 (65, 65, '2020-10-23 18:53:28.377359', 'Music', 'Netflix', 188.7487426895118),
 (67, 67, '2021-07-31 14:53:28.377359', 'Music', 'Netflix', 324.786916846731),
 (76, 76, '2020-11-17 09:53:28.377359', 'Music', 'Netflix', 197.5346000167895),
 (78, 78, '2020-09-09 16:53:28.377359', 'Music', 'spotify', 415.3728938035302),
 (79, 79, '2021-08-17 09:53:28.377359', 'Music', 'Netflix', 321.7634156544651),
 (82,
  82,
  '2020-09-22 09:53:28.377359',
  'Music',
  'spotify',
  411.14270120842224),
 (84, 84, '2020-09-21 10:53:28.377359', 'Music', 'Netflix', 158.7936457269333),
 (85, 85, '2021-01-12 09:53:28.377359', 'Music', 'Amazon', 130.37490757527002),
 (89, 89, '2021-04-12 14:53:28.377359', 'Music', 'Amazon', 218.487173429263),
 (90, 90, '2020-12-01 14:53:28.377359', 'Music', 'Amazon', 101.57327588889416)]

def find_transcactions(conn, cat, desc):
    q = "select * from wallet where category=? and description=?"
    cur = conn.cursor()
    result = cur.execute(q, (cat,desc))
    return result.fetchall()

find_transcactions(conn, "Music", "Amazon")

[(0, 0, '2021-03-07 14:53:28.377359', 'Music', 'Amazon', 421.2073272347991),
 (12, 12, '2021-07-24 14:53:28.377359', 'Music', 'Amazon', 329.5360031897569),
 (37, 37, '2021-04-18 16:53:28.377359', 'Music', 'Amazon', 266.0690783774673),
 (85, 85, '2021-01-12 09:53:28.377359', 'Music', 'Amazon', 130.37490757527002),
 (89, 89, '2021-04-12 14:53:28.377359', 'Music', 'Amazon', 218.487173429263),
 (90, 90, '2020-12-01 14:53:28.377359', 'Music', 'Amazon', 101.57327588889416)]

find_transcactions(conn, "Music", "Netflix")

[(23, 23, '2020-12-11 10:53:28.377359', 'Music', 'Netflix', 354.9402409919817),
 (65, 65, '2020-10-23 18:53:28.377359', 'Music', 'Netflix', 188.7487426895118),
 (67, 67, '2021-07-31 14:53:28.377359', 'Music', 'Netflix', 324.786916846731),
 (76, 76, '2020-11-17 09:53:28.377359', 'Music', 'Netflix', 197.5346000167895),
 (79, 79, '2021-08-17 09:53:28.377359', 'Music', 'Netflix', 321.7634156544651),
 (84, 84, '2020-09-21 10:53:28.377359', 'Music', 'Netflix', 158.7936457269333)]

df = pd.read_sql_query("select * from wallet", con= conn)

df

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 102 entries, 0 to 101
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   index        102 non-null    int64  
 1   Unnamed: 0   102 non-null    int64  
 2   date         102 non-null    object 
 3   category     102 non-null    object 
 4   description  102 non-null    object 
 5   debit        102 non-null    float64
dtypes: float64(1), int64(2), object(3)
memory usage: 4.9+ KB

help(pd.read_sql_query)

Help on function read_sql_query in module pandas.io.sql:

read_sql_query(sql, con, index_col=None, coerce_float=True, params=None, parse_dates=None, chunksize=None)
    Read SQL query into a DataFrame.
    
    Returns a DataFrame corresponding to the result set of the query
    string. Optionally provide an `index_col` parameter to use one of the
    columns as the index, otherwise default integer index will be used.
    
    Parameters
    ----------
    sql : str SQL query or SQLAlchemy Selectable (select or text object)
        SQL query to be executed.
    con : SQLAlchemy connectable(engine/connection), database str URI,
        or sqlite3 DBAPI2 connection
        Using SQLAlchemy makes it possible to use any DB supported by that
        library.
        If a DBAPI2 object, only sqlite3 is supported.
    index_col : str or list of strings, optional, default: None
        Column(s) to set as index(MultiIndex).
    coerce_float : bool, default True
        Attempts to convert values of non-string, non-numeric objects (like
        decimal.Decimal) to floating point. Useful for SQL result sets.
    params : list, tuple or dict, optional, default: None
        List of parameters to pass to execute method.  The syntax used
        to pass parameters is database driver dependent. Check your
        database driver documentation for which of the five syntax styles,
        described in PEP 249's paramstyle, is supported.
        Eg. for psycopg2, uses %(name)s so use params={'name' : 'value'}.
    parse_dates : list or dict, default: None
        - List of column names to parse as dates.
        - Dict of ``{column_name: format string}`` where format string is
          strftime compatible in case of parsing string times, or is one of
          (D, s, ns, ms, us) in case of parsing integer timestamps.
        - Dict of ``{column_name: arg dict}``, where the arg dict corresponds
          to the keyword arguments of :func:`pandas.to_datetime`
          Especially useful with databases without native Datetime support,
          such as SQLite.
    chunksize : int, default None
        If specified, return an iterator where `chunksize` is the number of
        rows to include in each chunk.
    
    Returns
    -------
    DataFrame
    
    See Also
    --------
    read_sql_table : Read SQL database table into a DataFrame.
    read_sql
    
    Notes
    -----
    Any datetime values with time zone information parsed via the `parse_dates`
    parameter will be converted to UTC.

import time
with conn: # to lock the database
    #time.sleep(10) # wait for 10 seconds
    conn.execute("select * from wallet")

def insert_values(values):
    conn = sqlite3.connect("database.db")
    with conn:
        cur = conn.cursor()
        cur = cur.execute("insert into wallet values (?,?,?,?,?,?)" ,values)

    conn.close()
    
insert_values((101,101,'2020-09-21 10:53:28.377359', "NEW","Amazon", 450.0))

conn = sqlite3.connect("database.db")
find_transcactions(conn, "NEW", "Amazon")

[(101, 101, '2020-09-21 10:53:28.377359', 'NEW', 'Amazon', 450.0),
 (101, 101, '2020-09-21 10:53:28.377359', 'NEW', 'Amazon', 450.0)]

%%file samplecats.csv
NEW,Amazon

Overwriting samplecats.csv

def read_cat_desc(filename):
    with open(filename) as f:
        return f.read().strip().split(",")
    
cat, desc = read_cat_desc("samplecats.csv")
print(cat, desc)
print("NEW", "Amazon")
find_transcactions(conn, cat, desc)

#find_transcactions(conn, "NEW", "Amazon")

NEW Amazon
NEW Amazon

[(101, 101, '2020-09-21 10:53:28.377359', 'NEW', 'Amazon', 450.0),
 (101, 101, '2020-09-21 10:53:28.377359', 'NEW', 'Amazon', 450.0)]

q = "select * from tablre where field=?"
conn.execute(q, (30.5, ))

for advanced cases of database programming sqlalchemy¶

sqlalchemy gives nice framework which allows mapping classes and database tables

debugging python programs¶

Learn to look at errors

x = 20
2x = x*20 # is it possible

  File "<ipython-input-63-441b1973867a>", line 2
    2x = x*20 # is it possible
     ^
SyntaxError: invalid syntax

x2 = x*20

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-64-93cf000f2455> in <module>
----> 1 x2 = x*20

NameError: name 'x' is not defined

x = 20
x2 = x*20

def function1(x, key):
    n = len(x)
    return [x[i][key] for i in range(n)]

def column(tabular, colindex):
    return [row[colindex] for row in tabular]

table = [[i*j for i in range(1,6)] for j in range(1,11)]

table

[[1, 2, 3, 4, 5],
 [2, 4, 6, 8, 10],
 [3, 6, 9, 12, 15],
 [4, 8, 12, 16, 20],
 [5, 10, 15, 20, 25],
 [6, 12, 18, 24, 30],
 [7, 14, 21, 28, 35],
 [8, 16, 24, 32, 40],
 [9, 18, 27, 36, 45],
 [10, 20, 30, 40, 50]]

column(table, 1)

[2, 4, 6, 8, 10, 12, 14, 16, 18, 20]

column(table, 4)

[5, 10, 15, 20, 25, 30, 35, 40, 45, 50]

function1(table, 5)

---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-74-5ac910859d9e> in <module>
----> 1 function1(table, 5)

<ipython-input-66-90bf80f7c2bf> in function1(x, key)
      1 def function1(x, key):
      2     n = len(x)
----> 3     return [x[i][key] for i in range(n)]

<ipython-input-66-90bf80f7c2bf> in <listcomp>(.0)
      1 def function1(x, key):
      2     n = len(x)
----> 3     return [x[i][key] for i in range(n)]

IndexError: list index out of range

column(table, 5)

---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-75-3d6b50984172> in <module>
----> 1 column(table, 5)

<ipython-input-69-36e3766dcffc> in column(tabular, colindex)
      1 def column(tabular, colindex):
----> 2     return [row[colindex] for row in tabular]

<ipython-input-69-36e3766dcffc> in <listcomp>(.0)
      1 def column(tabular, colindex):
----> 2     return [row[colindex] for row in tabular]

IndexError: list index out of range

table

[[1, 2, 3, 4, 5],
 [2, 4, 6, 8, 10],
 [3, 6, 9, 12, 15],
 [4, 8, 12, 16, 20],
 [5, 10, 15, 20, 25],
 [6, 12, 18, 24, 30],
 [7, 14, 21, 28, 35],
 [8, 16, 24, 32, 40],
 [9, 18, 27, 36, 45],
 [10, 20, 30, 40, 50]]

def transpose(tabubar):
    columncount = len(tabubar[0])
    return [column(tabubar, c) for c in range(columncount)]

transpose(table)

[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
 [2, 4, 6, 8, 10, 12, 14, 16, 18, 20],
 [3, 6, 9, 12, 15, 18, 21, 24, 27, 30],
 [4, 8, 12, 16, 20, 24, 28, 32, 36, 40],
 [5, 10, 15, 20, 25, 30, 35, 40, 45, 50]]

"name" in "this senetence conatins no name"

True

text = "this senetence conatins no name"

Regular expressions¶

import re

#2020-09-21 10:

p = re.compile(r"2020-\d{2,2}-\d{2,2}")

with open("wallet.csv") as f:
    for line in f:
        fields = line.strip().split(",")
        if p.match(fields[1]):
            print(fields)

['1', '2020-10-08 09:53:28.377359', 'Food', 'Swiggy', '328.4400802428426']
['3', '2020-11-01 14:53:28.377359', 'Utility', 'Phone', '222.7563175805277']
['9', '2020-11-29 14:53:28.377359', 'Travel', 'Auto', '443.61888423247854']
['20', '2020-10-12 18:53:28.377359', 'Travel', 'Auto', '365.92180825376613']
['23', '2020-12-11 10:53:28.377359', 'Music', 'Netflix', '354.94024099198157']
['26', '2020-08-26 15:53:28.377359', 'Books', 'Amazon Kindle', '138.806577801854']
['28', '2020-12-14 15:53:28.377359', 'Utility', 'Phone', '358.4599327957656']
['30', '2020-09-15 18:53:28.377359', 'Food', 'Swiggy', '203.5292397894327']
['31', '2020-09-25 11:53:28.377359', 'Books', 'Flipcart', '246.50352738452796']
['36', '2020-12-10 10:53:28.377359', 'Travel', 'Auto', '472.94143917262176']
['43', '2020-09-21 12:53:28.377359', 'Utility', 'Phone', '486.03393276160733']
['44', '2020-12-26 19:53:28.377359', 'Utility', 'Electricity', '257.92759337085425']
['47', '2020-10-28 10:53:28.377359', 'Books', 'Flipcart', '310.408610004679']
['50', '2020-12-24 11:53:28.377359', 'Food', 'Zomato', '463.00187492635547']
['51', '2020-12-22 17:53:28.377359', 'Food', 'Zomato', '331.22702332837093']
['54', '2020-11-16 10:53:28.377359', 'Music', 'spotify', '160.81754340768396']
['59', '2020-08-28 11:53:28.377359', 'Food', 'Swiggy', '208.2329120852039']
['65', '2020-10-23 18:53:28.377359', 'Music', 'Netflix', '188.7487426895118']
['66', '2020-10-09 16:53:28.377359', 'Food', 'Swiggy', '263.9577700340145']
['68', '2020-08-26 09:53:28.377359', 'Travel', 'Taxi', '279.1478844739421']
['69', '2020-10-10 15:53:28.377359', 'Utility', 'Electricity', '300.52462041935115']
['76', '2020-11-17 09:53:28.377359', 'Music', 'Netflix', '197.5346000167895']
['78', '2020-09-09 16:53:28.377359', 'Music', 'spotify', '415.3728938035302']
['81', '2020-10-29 16:53:28.377359', 'Food', 'Hotel', '470.08099539923614']
['82', '2020-09-22 09:53:28.377359', 'Music', 'spotify', '411.14270120842224']
['84', '2020-09-21 10:53:28.377359', 'Music', 'Netflix', '158.7936457269333']
['90', '2020-12-01 14:53:28.377359', 'Music', 'Amazon', '101.57327588889417']
['99', '2020-10-11 16:53:28.377359', 'Food', 'Hotel', '100.45550129902665']

p = re.compile(r"2020-(?P<month>\d{2,2})-\d{2,2}")


with open("wallet.csv") as f:
    for line in f:
        fields = line.strip().split(",")
        m = p.match(fields[1])
        if m:
            print(fields, m.groupdict()['month'])

['1', '2020-10-08 09:53:28.377359', 'Food', 'Swiggy', '328.4400802428426'] 10
['3', '2020-11-01 14:53:28.377359', 'Utility', 'Phone', '222.7563175805277'] 11
['9', '2020-11-29 14:53:28.377359', 'Travel', 'Auto', '443.61888423247854'] 11
['20', '2020-10-12 18:53:28.377359', 'Travel', 'Auto', '365.92180825376613'] 10
['23', '2020-12-11 10:53:28.377359', 'Music', 'Netflix', '354.94024099198157'] 12
['26', '2020-08-26 15:53:28.377359', 'Books', 'Amazon Kindle', '138.806577801854'] 08
['28', '2020-12-14 15:53:28.377359', 'Utility', 'Phone', '358.4599327957656'] 12
['30', '2020-09-15 18:53:28.377359', 'Food', 'Swiggy', '203.5292397894327'] 09
['31', '2020-09-25 11:53:28.377359', 'Books', 'Flipcart', '246.50352738452796'] 09
['36', '2020-12-10 10:53:28.377359', 'Travel', 'Auto', '472.94143917262176'] 12
['43', '2020-09-21 12:53:28.377359', 'Utility', 'Phone', '486.03393276160733'] 09
['44', '2020-12-26 19:53:28.377359', 'Utility', 'Electricity', '257.92759337085425'] 12
['47', '2020-10-28 10:53:28.377359', 'Books', 'Flipcart', '310.408610004679'] 10
['50', '2020-12-24 11:53:28.377359', 'Food', 'Zomato', '463.00187492635547'] 12
['51', '2020-12-22 17:53:28.377359', 'Food', 'Zomato', '331.22702332837093'] 12
['54', '2020-11-16 10:53:28.377359', 'Music', 'spotify', '160.81754340768396'] 11
['59', '2020-08-28 11:53:28.377359', 'Food', 'Swiggy', '208.2329120852039'] 08
['65', '2020-10-23 18:53:28.377359', 'Music', 'Netflix', '188.7487426895118'] 10
['66', '2020-10-09 16:53:28.377359', 'Food', 'Swiggy', '263.9577700340145'] 10
['68', '2020-08-26 09:53:28.377359', 'Travel', 'Taxi', '279.1478844739421'] 08
['69', '2020-10-10 15:53:28.377359', 'Utility', 'Electricity', '300.52462041935115'] 10
['76', '2020-11-17 09:53:28.377359', 'Music', 'Netflix', '197.5346000167895'] 11
['78', '2020-09-09 16:53:28.377359', 'Music', 'spotify', '415.3728938035302'] 09
['81', '2020-10-29 16:53:28.377359', 'Food', 'Hotel', '470.08099539923614'] 10
['82', '2020-09-22 09:53:28.377359', 'Music', 'spotify', '411.14270120842224'] 09
['84', '2020-09-21 10:53:28.377359', 'Music', 'Netflix', '158.7936457269333'] 09
['90', '2020-12-01 14:53:28.377359', 'Music', 'Amazon', '101.57327588889417'] 12
['99', '2020-10-11 16:53:28.377359', 'Food', 'Hotel', '100.45550129902665'] 10

You can refer https://docs.python.org/3/howto/regex.html for more details

%%file download.py
import sys
import requests

def download(url, filename):
    resp = requests.get(url)
    with open(filename, "w") as f:
        f.write(resp.text)

if __name__ == "__main__":
    url = sys.argv[1]
    filename = sys.argv[2]
    download(url, filename)

Overwriting download.py

	Unnamed: 0	date	category	description	debit
0	0	2021-03-07 14:53:28.377359	Music	Amazon	421.207327
1	1	2020-10-08 09:53:28.377359	Food	Swiggy	328.440080
2	2	2021-02-23 09:53:28.377359	Books	Amazon	244.679437
3	3	2020-11-01 14:53:28.377359	Utility	Phone	222.756318
4	4	2021-06-05 13:53:28.377359	Books	Flipcart	494.128492
...	...	...	...	...	...
95	95	2021-07-19 13:53:28.377359	Utility	Phone	388.671213
96	96	2021-01-12 19:53:28.377359	Books	Flipcart	467.554562
97	97	2021-03-25 11:53:28.377359	Utility	Phone	320.789434
98	98	2021-05-13 15:53:28.377359	Travel	Taxi	442.096469
99	99	2020-10-11 16:53:28.377359	Food	Hotel	100.455501