Mar 13-17, 2023 Vikrant Patil
All notes are available online at https://notes.pipal.in/2023/arcesium_finop_jan/
Please login to https://engage.pipal.in/ and launch jupyter lab
For today create a notebook with name module3-day3
notebook names are case sensitive. Make sure you give correct name
© Pipal Academy LLP
python -m venv chrome_seleniumthat will copy chromedriver.exe in above folder
%%file download_using_chrome.py
from selenium import webdriver
import os
import time
chromeoptions = webdriver.ChromeOptions()
prefs = {"download.default.directory": os.path.abspath("/home/vikrant/Downloads")}
chrome = webdriver.Chrome(options=chromeoptions)
url = "https://selenium-python.readthedocs.io/installation.html"
chrome.get(url)
time.sleep(20)
Overwriting download_using_chrome.py
"""all about outlook and python
create virtual environment with name pywinservices
python -m venv pywinservices
activate it using
\pywinservices\Scripts\activate.bat
after activation install following packages
pip install pypiwin32
following code makes use of MAPI api provided by microsoft
for more advanced cases refer
https://docs.microsoft.com/en-us/office/client-developer/outlook/outlook-home
"""
import datetime
import os
import win32com.client
def get_outlook():
return win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
def get_inbox(email=None):
outlook = get_outlook()
if email == None:
inbox = outlook.GetDefaultFolder(6) # this works
else:
inbox = outlook.Folders.Items(email).Folders.Items('Inbox')
# inbox1 = outlook.Folders.Items('123@abc.com').Folders.Items('Inbox') # To access 123@abc.com Inbox
# inbox2 = outlook.Folders.Items('456@def.com').Folders.Itmes('Inbox') # To access 456@def.com Inbox
return inbox
def get_default_inbox_messages():
inbox = get_inbox()
messages = inbox.Items
return messages
def saveattachemnts_from_email(subject):
"""saves attachment from email that matches with subject
"""
messages = get_default_inbox_messages()
path = os.path.expanduser("~/Desktop/Attachments")
today = datetime.date.today()
for message in messages:
if message.Subject == subject and message.Unread or message.Senton.date() == today:
# body_content = message.body
attachments = message.Attachments
attachment = attachments.Item(1)
for attachment in message.Attachments:
attachment.SaveAsFile(os.path.join(path, str(attachment)))
if message.Subject == subject and message.Unread:
message.Unread = False
break
# print first 3 emails from ...
def print_emails(folderindex: int = 6, emailcount: int = 3):
"""
folderindex 3, 4, 5, 6 ..Trash, Outbox, Sent, Inbox
""" # folders,and emailcount,
# try different numbers
messages = get_default_inbox_messages()
for i in range(emailcount):
message = messages[i]
print(message.Subject)
# this body of message can be parsed to extract table
print(message.Body)
print("="*30)
def send_email_with_attachment():
import win32com.client as win32
outlook = win32.Dispatch('outlook.application')
mail = outlook.CreateItem(0)
mail.To = 'To address'
mail.Subject = 'Message subject'
mail.Body = 'Message body'
mail.HTMLBody = '<h2>HTML Message body</h2>' # this field is optional
# To attach a file to the email (optional):
attachment = "Path to the attachment"
mail.Attachments.Add(attachment)
mail.Send()
--------------------------------------------------------------------------- ModuleNotFoundError Traceback (most recent call last) Cell In[3], line 23 21 import datetime 22 import os ---> 23 import win32com.client 26 def get_outlook(): 27 return win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI") ModuleNotFoundError: No module named 'win32com'
We will use XlsxWriter library to write excel files
For more details you see this library documentation
!pip install XlsxWriter
Requirement already satisfied: XlsxWriter in /home/vikrant/usr/local/default/lib/python3.10/site-packages (3.0.3)
import xlsxwriter
workbook = xlsxwriter.Workbook("Hello.xlsx")
worksheet = workbook.add_worksheet()
worksheet.write("A1", "Hello From Python")
workbook.close()
workbook = xlsxwriter.Workbook("Hello1.xlsx")
worksheet = workbook.add_worksheet("hello")
worksheet.write("A1", "Hello From Python") # cellid/ row/col
workbook.close()
records = [("IBM",123.0, 200),
("AGILENT",125.0,300),
("AT&T", 130, 100)]
workbook = xlsxwriter.Workbook("Records.xlsx")
wrecords = workbook.add_worksheet("records")
wrecords.write(0,0,"Ticker") # row,col, value
wrecords.write(0,1,"Price")
wrecords.write(0,2,"Volume")
row = 1
for ticker,price,volume in records:
wrecords.write(row, 0, ticker)
wrecords.write(row, 1, price)
wrecords.write(row, 2, volume)
row += 1
workbook.close()
records = [("IBM",123.0, 200),
("AGILENT",125.0,300),
("AT&T", 130, 100)]
workbook = xlsxwriter.Workbook("Formula.xlsx")
wrecords = workbook.add_worksheet("records")
wrecords.write(0,0,"Ticker") # row,col, value
wrecords.write(0,1,"Price")
wrecords.write(0,2,"Volume")
row = 1
for ticker,price,volume in records:
wrecords.write(row, 0, ticker)
wrecords.write(row, 1, price)
wrecords.write(row, 2, volume)
row += 1
wrecords.write(row, 1, "Total")
wrecords.write(row, 2, "=SUM(C2:C4)")
workbook.close()
records = [("IBM",123.0, 200),
("AGILENT",125.0,300),
("AT&T", 130, 100)]
workbook = xlsxwriter.Workbook("Formatting.xlsx")
wrecords = workbook.add_worksheet("records")
bold = workbook.add_format({"bold":True})
wrecords.write(0,0,"Ticker", bold) # row,col, value
wrecords.write(0,1,"Price", bold)
wrecords.write(0,2,"Volume", bold)
moneyformat = workbook.add_format({"num_format":"$#,##0"})
row = 1
for ticker,price,volume in records:
wrecords.write(row, 0, ticker)
wrecords.write(row, 1, price, moneyformat)
wrecords.write(row, 2, volume)
row += 1
wrecords.write(row, 1, "Total", bold)
wrecords.write(row, 2, "=SUM(C2:C4)")
workbook.close()
import pandas as pd
wallet = pd.read_csv("wallet.csv", index_col=0)
wallet.to_excel("wallet.xlsx", engine="xlsxwriter")
wallet.to_excel("wallet.xlsx", sheet_name= "expenses", engine="xlsxwriter")
We will use sqlite3 for training purposes
import sqlite3
conn = sqlite3.connect("test1.db") # this will create the file if it is not there
cur = conn.cursor()
cur.execute("create table person (name varchar(100), email varchar(100));")
cur.execute("insert into person (name, email) values ('alice', 'alice@wonder.land');")
conn.commit()
conn.close()
conn = sqlite3.connect("test1.db")
cur = conn.cursor()
results = cur.execute("select * from person")
results.fetchall()
[('alice', 'alice@wonder.land')]
def find_person(conn, email):
q = f"select * from person where email='{email}'" # not a good way to make query
print(q)
cur = conn.cursor()
r = cur.execute(q)
return r.fetchall()
find_person(conn, email="alice@wonder.land")
select * from person where email='alice@wonder.land'
[('alice', 'alice@wonder.land')]
def find_person(conn, email):
q = "select * from person where email=?"
cur = conn.cursor()
r = cur.execute(q, (email,))
return r.fetchone()
(1) # this is not a tuple
1
(1,) # this is tuple
(1,)
find_person(conn, "alice@wonder.land")
('alice', 'alice@wonder.land')
def query(conn, querystring, params=()):
cur = conn.cursor()
r = cur.execute(querystring, params)
return r.fetchall()
def find_person_with_name(conn, name):
return query(conn, "select * from person where name=?", (name,))
find_person_with_name(conn, "alice")
[('alice', 'alice@wonder.land')]
def find_person_with_email(conn, email):
return query(conn, "select * from person where email=?", (email,))
find_person_with_email(conn, "alice@wonder.land")
[('alice', 'alice@wonder.land')]
wallet
| date | category | description | debit | |
|---|---|---|---|---|
| 0 | 2021-03-07 14:53:28.377359 | Music | Amazon | 421.207327 |
| 1 | 2020-10-08 09:53:28.377359 | Food | Swiggy | 328.440080 |
| 2 | 2021-02-23 09:53:28.377359 | Books | Amazon | 244.679437 |
| 3 | 2020-11-01 14:53:28.377359 | Utility | Phone | 222.756318 |
| 4 | 2021-06-05 13:53:28.377359 | Books | Flipcart | 494.128492 |
| ... | ... | ... | ... | ... |
| 95 | 2021-07-19 13:53:28.377359 | Utility | Phone | 388.671213 |
| 96 | 2021-01-12 19:53:28.377359 | Books | Flipcart | 467.554562 |
| 97 | 2021-03-25 11:53:28.377359 | Utility | Phone | 320.789434 |
| 98 | 2021-05-13 15:53:28.377359 | Travel | Taxi | 442.096469 |
| 99 | 2020-10-11 16:53:28.377359 | Food | Hotel | 100.455501 |
100 rows × 4 columns
wallet.to_sql("expenses", con=conn, if_exists="replace") # be carefull while writing
100
r = conn.cursor().execute("select * from expenses")
for item in r:
print(item)
(0, '2021-03-07 14:53:28.377359', 'Music', 'Amazon', 421.2073272347991) (1, '2020-10-08 09:53:28.377359', 'Food', 'Swiggy', 328.4400802428426) (2, '2021-02-23 09:53:28.377359', 'Books', 'Amazon', 244.67943701511356) (3, '2020-11-01 14:53:28.377359', 'Utility', 'Phone', 222.7563175805277) (4, '2021-06-05 13:53:28.377359', 'Books', 'Flipcart', 494.1284923793595) (5, '2021-07-28 19:53:28.377359', 'Utility', 'Electricity', 219.94171130968408) (6, '2021-04-16 11:53:28.377359', 'Books', 'Amazon Kindle', 270.32259514795845) (7, '2021-02-15 10:53:28.377359', 'Food', 'Zomato', 457.1831036346536) (8, '2021-08-10 19:53:28.377359', 'Utility', 'Phone', 151.49637259947792) (9, '2020-11-29 14:53:28.377359', 'Travel', 'Auto', 443.61888423247854) (10, '2021-06-15 13:53:28.377359', 'Travel', 'Metro', 328.1754210974373) (11, '2021-07-24 13:53:28.377359', 'Food', 'Zomato', 434.4954675355444) (12, '2021-07-24 14:53:28.377359', 'Music', 'Amazon', 329.5360031897569) (13, '2021-06-06 10:53:28.377359', 'Utility', 'Phone', 154.0449491816659) (14, '2021-06-09 13:53:28.377359', 'Travel', 'Taxi', 485.2977429821982) (15, '2021-08-24 17:53:28.377359', 'Food', 'Zomato', 262.9439932340398) (16, '2021-03-05 19:53:28.377359', 'Utility', 'Phone', 390.31687619327926) (17, '2021-04-17 18:53:28.377359', 'Utility', 'Electricity', 316.8786754246636) (18, '2021-05-08 15:53:28.377359', 'Travel', 'Auto', 433.8224042777936) (19, '2021-05-16 10:53:28.377359', 'Books', 'Flipcart', 109.32590886550068) (20, '2020-10-12 18:53:28.377359', 'Travel', 'Auto', 365.9218082537661) (21, '2021-01-04 19:53:28.377359', 'Travel', 'Metro', 329.09737150258513) (22, '2021-06-24 15:53:28.377359', 'Food', 'Zomato', 489.1434830522253) (23, '2020-12-11 10:53:28.377359', 'Music', 'Netflix', 354.9402409919816) (24, '2021-05-31 11:53:28.377359', 'Books', 'Amazon', 498.10049550461065) (25, '2021-05-21 14:53:28.377359', 'Food', 'Hotel', 483.315863517772) (26, '2020-08-26 15:53:28.377359', 'Books', 'Amazon Kindle', 138.806577801854) (27, '2021-05-01 15:53:28.377359', 'Utility', 'Electricity', 103.68079074846584) (28, '2020-12-14 15:53:28.377359', 'Utility', 'Phone', 358.4599327957656) (29, '2021-06-20 10:53:28.377359', 'Utility', 'Electricity', 184.5577284049955) (30, '2020-09-15 18:53:28.377359', 'Food', 'Swiggy', 203.5292397894327) (31, '2020-09-25 11:53:28.377359', 'Books', 'Flipcart', 246.503527384528) (32, '2021-06-23 11:53:28.377359', 'Food', 'Zomato', 345.03043608141513) (33, '2021-05-14 18:53:28.377359', 'Food', 'Hotel', 449.24802955761743) (34, '2021-05-14 10:53:28.377359', 'Utility', 'Phone', 499.8581815222449) (35, '2021-02-18 18:53:28.377359', 'Travel', 'Metro', 441.6021430011205) (36, '2020-12-10 10:53:28.377359', 'Travel', 'Auto', 472.94143917262176) (37, '2021-04-18 16:53:28.377359', 'Music', 'Amazon', 266.0690783774673) (38, '2021-08-15 10:53:28.377359', 'Travel', 'Auto', 494.1243994056571) (39, '2021-05-17 17:53:28.377359', 'Food', 'Swiggy', 112.33316019807457) (40, '2021-07-19 12:53:28.377359', 'Food', 'Swiggy', 291.54598801930536) (41, '2021-02-20 19:53:28.377359', 'Utility', 'Phone', 425.18719068071806) (42, '2021-08-22 17:53:28.377359', 'Food', 'Hotel', 210.2562695007857) (43, '2020-09-21 12:53:28.377359', 'Utility', 'Phone', 486.0339327616073) (44, '2020-12-26 19:53:28.377359', 'Utility', 'Electricity', 257.92759337085425) (45, '2021-05-27 16:53:28.377359', 'Utility', 'Electricity', 154.74287259516657) (46, '2021-05-15 15:53:28.377359', 'Utility', 'Electricity', 359.3249716537848) (47, '2020-10-28 10:53:28.377359', 'Books', 'Flipcart', 310.408610004679) (48, '2021-08-23 17:53:28.377359', 'Utility', 'Electricity', 310.05840961423314) (49, '2021-03-16 09:53:28.377359', 'Music', 'spotify', 232.30340219121135) (50, '2020-12-24 11:53:28.377359', 'Food', 'Zomato', 463.0018749263554) (51, '2020-12-22 17:53:28.377359', 'Food', 'Zomato', 331.22702332837093) (52, '2021-03-26 09:53:28.377359', 'Travel', 'Taxi', 403.6100701341934) (53, '2021-01-27 09:53:28.377359', 'Utility', 'Electricity', 183.1866624101276) (54, '2020-11-16 10:53:28.377359', 'Music', 'spotify', 160.81754340768396) (55, '2021-01-21 19:53:28.377359', 'Books', 'Flipcart', 423.74970808720553) (56, '2021-05-19 18:53:28.377359', 'Utility', 'Phone', 319.3428762684619) (57, '2021-07-15 15:53:28.377359', 'Utility', 'Phone', 279.6090437716363) (58, '2021-05-20 10:53:28.377359', 'Food', 'Hotel', 255.8710346734312) (59, '2020-08-28 11:53:28.377359', 'Food', 'Swiggy', 208.2329120852039) (60, '2021-01-17 11:53:28.377359', 'Utility', 'Electricity', 382.5195101154448) (61, '2021-02-25 13:53:28.377359', 'Food', 'Hotel', 124.65827844174062) (62, '2021-01-27 19:53:28.377359', 'Books', 'Amazon Kindle', 497.7708601564023) (63, '2021-05-10 11:53:28.377359', 'Travel', 'Taxi', 355.9890502253258) (64, '2021-01-31 14:53:28.377359', 'Food', 'Zomato', 232.2223798622789) (65, '2020-10-23 18:53:28.377359', 'Music', 'Netflix', 188.7487426895118) (66, '2020-10-09 16:53:28.377359', 'Food', 'Swiggy', 263.9577700340145) (67, '2021-07-31 14:53:28.377359', 'Music', 'Netflix', 324.786916846731) (68, '2020-08-26 09:53:28.377359', 'Travel', 'Taxi', 279.1478844739421) (69, '2020-10-10 15:53:28.377359', 'Utility', 'Electricity', 300.52462041935115) (70, '2021-08-17 13:53:28.377359', 'Utility', 'Phone', 125.22977317126336) (71, '2021-03-30 12:53:28.377359', 'Food', 'Swiggy', 245.36050838040904) (72, '2021-06-30 18:53:28.377359', 'Books', 'Amazon', 294.6628689900488) (73, '2021-08-15 17:53:28.377359', 'Travel', 'Metro', 117.58872931045572) (74, '2021-03-20 11:53:28.377359', 'Travel', 'Taxi', 303.05542098520453) (75, '2021-03-03 12:53:28.377359', 'Food', 'Hotel', 425.6252909948148) (76, '2020-11-17 09:53:28.377359', 'Music', 'Netflix', 197.5346000167895) (77, '2021-01-18 14:53:28.377359', 'Books', 'Amazon Kindle', 482.1523430204321) (78, '2020-09-09 16:53:28.377359', 'Music', 'spotify', 415.3728938035302) (79, '2021-08-17 09:53:28.377359', 'Music', 'Netflix', 321.7634156544651) (80, '2021-02-17 09:53:28.377359', 'Food', 'Swiggy', 283.09570727160764) (81, '2020-10-29 16:53:28.377359', 'Food', 'Hotel', 470.08099539923614) (82, '2020-09-22 09:53:28.377359', 'Music', 'spotify', 411.14270120842224) (83, '2021-03-18 09:53:28.377359', 'Books', 'Flipcart', 451.5844070294999) (84, '2020-09-21 10:53:28.377359', 'Music', 'Netflix', 158.7936457269333) (85, '2021-01-12 09:53:28.377359', 'Music', 'Amazon', 130.37490757527) (86, '2021-05-07 16:53:28.377359', 'Food', 'Zomato', 198.450671792638) (87, '2021-05-19 15:53:28.377359', 'Food', 'Zomato', 378.82064134052473) (88, '2021-04-18 09:53:28.377359', 'Utility', 'Phone', 124.2212478444578) (89, '2021-04-12 14:53:28.377359', 'Music', 'Amazon', 218.487173429263) (90, '2020-12-01 14:53:28.377359', 'Music', 'Amazon', 101.57327588889416) (91, '2021-01-22 17:53:28.377359', 'Food', 'Hotel', 232.66346838787223) (92, '2021-01-12 19:53:28.377359', 'Travel', 'Taxi', 356.8426379886326) (93, '2021-01-11 09:53:28.377359', 'Utility', 'Electricity', 111.72080867898062) (94, '2021-01-04 13:53:28.377359', 'Utility', 'Phone', 431.1855366816298) (95, '2021-07-19 13:53:28.377359', 'Utility', 'Phone', 388.6712132388421) (96, '2021-01-12 19:53:28.377359', 'Books', 'Flipcart', 467.5545618966052) (97, '2021-03-25 11:53:28.377359', 'Utility', 'Phone', 320.78943360123816) (98, '2021-05-13 15:53:28.377359', 'Travel', 'Taxi', 442.0964693975505) (99, '2020-10-11 16:53:28.377359', 'Food', 'Hotel', 100.45550129902664)
pd.read_sql_query("select * from expenses", con=conn)
| index | date | category | description | debit | |
|---|---|---|---|---|---|
| 0 | 0 | 2021-03-07 14:53:28.377359 | Music | Amazon | 421.207327 |
| 1 | 1 | 2020-10-08 09:53:28.377359 | Food | Swiggy | 328.440080 |
| 2 | 2 | 2021-02-23 09:53:28.377359 | Books | Amazon | 244.679437 |
| 3 | 3 | 2020-11-01 14:53:28.377359 | Utility | Phone | 222.756318 |
| 4 | 4 | 2021-06-05 13:53:28.377359 | Books | Flipcart | 494.128492 |
| ... | ... | ... | ... | ... | ... |
| 95 | 95 | 2021-07-19 13:53:28.377359 | Utility | Phone | 388.671213 |
| 96 | 96 | 2021-01-12 19:53:28.377359 | Books | Flipcart | 467.554562 |
| 97 | 97 | 2021-03-25 11:53:28.377359 | Utility | Phone | 320.789434 |
| 98 | 98 | 2021-05-13 15:53:28.377359 | Travel | Taxi | 442.096469 |
| 99 | 99 | 2020-10-11 16:53:28.377359 | Food | Hotel | 100.455501 |
100 rows × 5 columns
you will need pyodbc package for this
import pyodbc
cnxn = pyodbc.connect(r'Driver={SQL Server};Server=.\SQLEXPRESS;Database=myDB;Trusted_Connection=yes;')
cursor = cnxn.cursor()
cursor.execute("SELECT LastName FROM myContacts")
while 1:
row = cursor.fetchone()
if not row:
break
print(row.LastName)
cnxn.close()
--------------------------------------------------------------------------- ModuleNotFoundError Traceback (most recent call last) Cell In[51], line 1 ----> 1 import pyodbc 2 cnxn = pyodbc.connect(r'Driver={SQL Server};Server=.\SQLEXPRESS;Database=myDB;Trusted_Connection=yes;') 3 cursor = cnxn.cursor() ModuleNotFoundError: No module named 'pyodbc'