Dec 17-23, 2020 Vikrant Patil
These notes are available online at http://notes.pipal.in/2020/arcesium_finop_batch3/module3-day2.html
© Pipal Academy LLP
Day 1 | Day 2 | Day 3 | Day 4 | Day 5
We will be using jupyter hub from http://lab.pipal.in for this training. Create a notebook with name module3-day2.ipynb for today's session. Before you start shutdown all kernels except today's notebook.
import pandas as pd
wallet = pd.read_csv("wallet.csv")
wallet
wallet[wallet.category=="Music"]
wallet[wallet.debit > 200]
wallet[wallet.category=="Music"][wallet.debit>300] # category -> Music and amount > 300
music = wallet[wallet.category=="Music"]
music
music[music.debit>300]
wallet[(wallet.description=="Amazon") & (wallet.category=='Music')]
wallet[(wallet.description=="Netflix") & (wallet.category=='Music')]
wallet[(wallet.description=="Netflix") | (wallet.description=='Amazon')]
def total_expenditure(wallet, category):
df_category = wallet[wallet.category==category]
debit = df_category.debit
return debit.sum()
total_expenditure(wallet, "Books")
total_expenditure(wallet, "Music")
total_expenditure(wallet, "Food")
wallet.category.unique()
for cat in wallet.category.unique():
print("Expenditure for {} is".format(cat),total_expenditure(wallet, cat))
wallet.groupby("category").mean() # average for every numeric column grouped by category
wallet.groupby("category").sum()
wallet.groupby("category").max()
wallet.date
import datetime
today = datetime.datetime.now()
today
tommorrow = today + datetime.timedelta(days=1)
tommorrow
tommorrow > today
today > tommorrow
pd.to_datetime(wallet.date)
wallet.date
wallet['date'] = pd.to_datetime(wallet.date)
wallet.date
wallet.date.min()
wallet.date.max()
today
wallet[wallet.date < today]
wallet.to_csv("wallet_dataframe.csv")
!head wallet_dataframe.csv
df = wallet[['date','category','description','debit']]
df.to_csv("wallet_dataframe1.csv")
!head wallet_dataframe1.csv
df.to_csv("wallet_dataframe1.csv", index=False)
!head wallet_dataframe1.csv
help(df.to_excel)
pd.read_csv("https://raw.githubusercontent.com/vikipedia/python-trainings/master/online_course/source/module2/wallet.csv")
df1 = pd.DataFrame(
{"a":[1, 2, 3, 4, 5],
"b":[43, 54, 23, 65, 34],
"labels" : ['x','y','z','m','n']}
)
df1
df2 = pd.DataFrame(
{"c": [1, 2, 3, 4],
'd': [34,56, 56, 45],
'labels' : ['x','y','z','m']}
)
df2
pd.merge(df2, df1, on='labels')
stocks = pd.DataFrame(
{
"value": [123, 130, 140, 150],
"high":[125, 135, 145, 155],
"low":[120, 125, 138, 149],
"volume":[10, 101, 10, 20]},
index = ['APPLE','AT&T','AGILENT','HP']
)
stocks
stocks1 = pd.DataFrame(
{
"value": [123, 130],
"high":[125, 135],
"low":[120, 125],
"volume":[10, 101]},
index = ['XEROX','TESLA']
)
stocks1
pd.concat([stocks, stocks1])
dfj1 = pd.DataFrame(
{"a":[1,2,3,4,5],
"b":[32, 34, 23, 45 ,12]},
index = ['x','y','z','m','n']
)
dfj2 = pd.DataFrame(
{"c":[5, 6, 7, 8],
"d":[65,656,67,67]},
index = ['x','y','z','m',]
)
dfj1
dfj2
dfj1.join(dfj2)
wallet = pd.read_csv("https://raw.githubusercontent.com/vikipedia/python-trainings/master/online_course/source/module2/wallet.csv")
wallet.date
s = "2021-03-07 14:53:28.377359"
date, timestamp = s.split()
date
date.split("-")[0]
wallet.date.str.split()
date_ = wallet.date.str.split(expand=True)[0]
date_df = date_.str.split("-", expand=True)
date_with_columns = date_df.rename(columns={0:'year',1:'month',2:'day'})
date_with_columns
wallet
wallet['year'] = date_with_columns['year']
wallet.groupby('year').sum()
wallet.groupby(['category','year']).sum()
columns_of_insterest = ['date','category','description','debit', 'year']
w = wallet[columns_of_insterest]
w.groupby(['category','year']).sum()
wallet.groupby(['category','year']).sum()[['debit']] # this is going to give me dataframe
wallet.groupby(['category','year']).sum()['debit'] # this will give series
d = wallet.groupby(['category','year']).sum()[['debit']] # this is going to give me dataframe
type(d)
s = wallet.groupby(['category','year']).sum()['debit'] # this will give series
type(s)
wallet['debit']
wallet[['debit']]
To create virtual environment on your system (windows)
open cmd
then run
python -m venv NAME_OF_VIRTUAL_ENV
this will create a folder with name NAME_OF_VIRTUAL_ENV for unix/linux/mac
+ NAME_OF_VIRTUAL_ENV
|
+-bin
+-include
+-lib
+-lib64
+-pyenv.cfg
For windows following structure will get created
+ NAME_OF_VIRTUAL_ENV
|
+-Include
+-Lib
+-Scripts
+-pyvenv.cfg
to activate environment on windows
c:\Users\vik> NAME_OF_VIRTUAL_ENV\Scripts\activate.bat
(NAME_OF_VIRTUAL_ENV) c:\Users\vik>
to deactivate environment on windows
(NAME_OF_VIRTUAL_ENV) c:\Users\vik> NAME_OF_VIRTUAL_ENV\Scripts\deactivate.bat
c:\Users\vik>
to activate on linux/max/unix
source NAME_OF_VIRTUAL_ENV/bin/activate
(NAME_OF_VIRTUAL_ENV) $
Activate environment
c:\Users\vik> NAME_OF_VIRTUAL_ENV\Scripts\activate.bat
(NAME_OF_VIRTUAL_ENV) c:\Users\vik>
fire pip command to install packages
(NAME_OF_VIRTUAL_ENV) c:\Users\vik> pip install pandas
(NAME_OF_VIRTUAL_ENV) c:\Users\vik> pip freeze
this is a text file with one entry on every line which corresponds to package name (and version if required)
%%file requirements.txt
jupyter
pandas
requests
xlrd
jupyter lab