Sep 13-17, 2021 Vikrant Patil
These notes are available online at https://notes.pipal.in/2021/arcesium_finop_batch1/
© Pipal Academy LLP
Day 1 | Day 2 | Day 3 | Day 4 | Day 5
We will be using jupyter hub from https://lab.pipal.in for this training.
login to hub and create a notebook with name module3-day2
import pandas as pd
stocks = pd.Series([542,345.8, 454, 500.5], index=["APPLE","AT&T","IBM","NIKE"])
stocks
APPLE 542.0 AT&T 345.8 IBM 454.0 NIKE 500.5 dtype: float64
stocks['APPLE']
542.0
stocks_ = pd.Series([542,345.8, 454, 500.5])
stocks_ # if no labels are given by default it takes it as number starting from 0
0 542.0 1 345.8 2 454.0 3 500.5 dtype: float64
stocks_dict = dict(zip(["APPLE","AT&T","IBM","NIKE"], [542,345.8, 454, 500.5]))
stocks_dict['APPLE']
542
stocks_dict[0]
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) <ipython-input-10-f7d1349b86c2> in <module> ----> 1 stocks_dict[0] KeyError: 0
stocks['APPLE']
542.0
stocks[0]
542.0
stocks.mean()
460.575
stocks.std()
84.53907873482731
stocks.isnull() # if some item in series is empty or None or NaN
APPLE False AT&T False IBM False NIKE False dtype: bool
stocks.head()
APPLE 542.0 AT&T 345.8 IBM 454.0 NIKE 500.5 dtype: float64
stocks.head(2)
APPLE 542.0 AT&T 345.8 dtype: float64
stocks.sum()
1842.3
score_math1 = pd.Series([87, 88, 99], index=['student1','student2','student2'])
score_math2 = pd.Series([98, 87, 90], index=['student1','student2','student2'])
score_math1
student1 87 student2 88 student2 99 dtype: int64
score_math2
student1 98 student2 87 student2 90 dtype: int64
score_math1 + score_math2
student1 185 student2 175 student2 189 dtype: int64
score_math1/100*10
student1 8.7 student2 8.8 student2 9.9 dtype: float64
df = pd.DataFrame({
"value": [234.5, 221.6, 125.7, 100.5],
"high value": [240.3,22.5,127.3,105.0],
"low value":[233.0,220,123,104.0],
"volume": [100, 200, 50, 1000]},
index = ["APPLE","AT&T","IBM","NIKE"]
)
df
| value | high value | low value | volume | |
|---|---|---|---|---|
| APPLE | 234.5 | 240.3 | 233.0 | 100 |
| AT&T | 221.6 | 22.5 | 220.0 | 200 |
| IBM | 125.7 | 127.3 | 123.0 | 50 |
| NIKE | 100.5 | 105.0 | 104.0 | 1000 |
df['APPLE'] # label will not work for dataframe
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) ~/anaconda3/lib/python3.8/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance) 2894 try: -> 2895 return self._engine.get_loc(casted_key) 2896 except KeyError as err: pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc() pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc() pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item() pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item() KeyError: 'APPLE' The above exception was the direct cause of the following exception: KeyError Traceback (most recent call last) <ipython-input-33-5691c2fd1745> in <module> ----> 1 df['APPLE'] # label will not work for dataframe ~/anaconda3/lib/python3.8/site-packages/pandas/core/frame.py in __getitem__(self, key) 2900 if self.columns.nlevels > 1: 2901 return self._getitem_multilevel(key) -> 2902 indexer = self.columns.get_loc(key) 2903 if is_integer(indexer): 2904 indexer = [indexer] ~/anaconda3/lib/python3.8/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance) 2895 return self._engine.get_loc(casted_key) 2896 except KeyError as err: -> 2897 raise KeyError(key) from err 2898 2899 if tolerance is not None: KeyError: 'APPLE'
value = df['value']
value
APPLE 234.5 AT&T 221.6 IBM 125.7 NIKE 100.5 Name: value, dtype: float64
value['APPLE']
234.5
df
| value | high value | low value | volume | |
|---|---|---|---|---|
| APPLE | 234.5 | 240.3 | 233.0 | 100 |
| AT&T | 221.6 | 22.5 | 220.0 | 200 |
| IBM | 125.7 | 127.3 | 123.0 | 50 |
| NIKE | 100.5 | 105.0 | 104.0 | 1000 |
df.value # only if column names do not have space in it
APPLE 234.5 AT&T 221.6 IBM 125.7 NIKE 100.5 Name: value, dtype: float64
df.high value
File "<ipython-input-40-bc75c75a26f6>", line 1 df.high value ^ SyntaxError: invalid syntax
df['high value']
APPLE 240.3 AT&T 22.5 IBM 127.3 NIKE 105.0 Name: high value, dtype: float64
df.mean()
value 170.575 high value 123.775 low value 170.000 volume 337.500 dtype: float64
df.describe()
| value | high value | low value | volume | |
|---|---|---|---|---|
| count | 4.000000 | 4.000000 | 4.000000 | 4.000000 |
| mean | 170.575000 | 123.775000 | 170.000000 | 337.500000 |
| std | 67.365242 | 89.813933 | 65.914085 | 446.047456 |
| min | 100.500000 | 22.500000 | 104.000000 | 50.000000 |
| 25% | 119.400000 | 84.375000 | 118.250000 | 87.500000 |
| 50% | 173.650000 | 116.150000 | 171.500000 | 150.000000 |
| 75% | 224.825000 | 155.550000 | 223.250000 | 400.000000 |
| max | 234.500000 | 240.300000 | 233.000000 | 1000.000000 |
df.head(2)
| value | high value | low value | volume | |
|---|---|---|---|---|
| APPLE | 234.5 | 240.3 | 233.0 | 100 |
| AT&T | 221.6 | 22.5 | 220.0 | 200 |
df.sum() # it is returning series... with column names as label
value 682.3 high value 495.1 low value 680.0 volume 1350.0 dtype: float64
df.sum()['value']
682.3000000000001
df.value.sum()
682.3000000000001
stocks
APPLE 542.0 AT&T 345.8 IBM 454.0 NIKE 500.5 dtype: float64
df
| value | high value | low value | volume | |
|---|---|---|---|---|
| APPLE | 234.5 | 240.3 | 233.0 | 100 |
| AT&T | 221.6 | 22.5 | 220.0 | 200 |
| IBM | 125.7 | 127.3 | 123.0 | 50 |
| NIKE | 100.5 | 105.0 | 104.0 | 1000 |
stocks['APPLE'] # by label
542.0
stocks[0] # by index
542.0
df.value # by column name as attribute
APPLE 234.5 AT&T 221.6 IBM 125.7 NIKE 100.5 Name: value, dtype: float64
df['value'] # by column name as dictionary like interface
APPLE 234.5 AT&T 221.6 IBM 125.7 NIKE 100.5 Name: value, dtype: float64
df.loc['APPLE'] # give location (row) with label 'APPLE'
value 234.5 high value 240.3 low value 233.0 volume 100.0 Name: APPLE, dtype: float64
df.iloc[0] # access row of dataframe by index
value 234.5 high value 240.3 low value 233.0 volume 100.0 Name: APPLE, dtype: float64
stocks
APPLE 542.0 AT&T 345.8 IBM 454.0 NIKE 500.5 dtype: float64
stocks[stocks>400] # this is filtering based on some criteria
APPLE 542.0 IBM 454.0 NIKE 500.5 dtype: float64
stocks>400
APPLE True AT&T False IBM True NIKE True dtype: bool
stocks[stocks>400]
APPLE 542.0 IBM 454.0 NIKE 500.5 dtype: float64
stocks[0:3] # simple slicing
APPLE 542.0 AT&T 345.8 IBM 454.0 dtype: float64
[s for s in stocks if s > 400] # this very slow and would not prefer to do this with series
df
| value | high value | low value | volume | |
|---|---|---|---|---|
| APPLE | 234.5 | 240.3 | 233.0 | 100 |
| AT&T | 221.6 | 22.5 | 220.0 | 200 |
| IBM | 125.7 | 127.3 | 123.0 | 50 |
| NIKE | 100.5 | 105.0 | 104.0 | 1000 |
stocks[stocks<400]
AT&T 345.8 dtype: float64
df
| value | high value | low value | volume | |
|---|---|---|---|---|
| APPLE | 234.5 | 240.3 | 233.0 | 100 |
| AT&T | 221.6 | 22.5 | 220.0 | 200 |
| IBM | 125.7 | 127.3 | 123.0 | 50 |
| NIKE | 100.5 | 105.0 | 104.0 | 1000 |
df[df.value<200]
| value | high value | low value | volume | |
|---|---|---|---|---|
| IBM | 125.7 | 127.3 | 123.0 | 50 |
| NIKE | 100.5 | 105.0 | 104.0 | 1000 |
df.value<200
APPLE False AT&T False IBM True NIKE True Name: value, dtype: bool
x = df.value<200
x
APPLE False AT&T False IBM True NIKE True Name: value, dtype: bool
df[x]
| value | high value | low value | volume | |
|---|---|---|---|---|
| IBM | 125.7 | 127.3 | 123.0 | 50 |
| NIKE | 100.5 | 105.0 | 104.0 | 1000 |
pd.read_csv("wallet.csv")
| Unnamed: 0 | date | category | description | debit | |
|---|---|---|---|---|---|
| 0 | 0 | 2021-03-07 14:53:28.377359 | Music | Amazon | 421.207327 |
| 1 | 1 | 2020-10-08 09:53:28.377359 | Food | Swiggy | 328.440080 |
| 2 | 2 | 2021-02-23 09:53:28.377359 | Books | Amazon | 244.679437 |
| 3 | 3 | 2020-11-01 14:53:28.377359 | Utility | Phone | 222.756318 |
| 4 | 4 | 2021-06-05 13:53:28.377359 | Books | Flipcart | 494.128492 |
| ... | ... | ... | ... | ... | ... |
| 95 | 95 | 2021-07-19 13:53:28.377359 | Utility | Phone | 388.671213 |
| 96 | 96 | 2021-01-12 19:53:28.377359 | Books | Flipcart | 467.554562 |
| 97 | 97 | 2021-03-25 11:53:28.377359 | Utility | Phone | 320.789434 |
| 98 | 98 | 2021-05-13 15:53:28.377359 | Travel | Taxi | 442.096469 |
| 99 | 99 | 2020-10-11 16:53:28.377359 | Food | Hotel | 100.455501 |
100 rows × 5 columns
wallet = pd.read_csv("wallet.csv", index_col=0)
wallet
| date | category | description | debit | |
|---|---|---|---|---|
| 0 | 2021-03-07 14:53:28.377359 | Music | Amazon | 421.207327 |
| 1 | 2020-10-08 09:53:28.377359 | Food | Swiggy | 328.440080 |
| 2 | 2021-02-23 09:53:28.377359 | Books | Amazon | 244.679437 |
| 3 | 2020-11-01 14:53:28.377359 | Utility | Phone | 222.756318 |
| 4 | 2021-06-05 13:53:28.377359 | Books | Flipcart | 494.128492 |
| ... | ... | ... | ... | ... |
| 95 | 2021-07-19 13:53:28.377359 | Utility | Phone | 388.671213 |
| 96 | 2021-01-12 19:53:28.377359 | Books | Flipcart | 467.554562 |
| 97 | 2021-03-25 11:53:28.377359 | Utility | Phone | 320.789434 |
| 98 | 2021-05-13 15:53:28.377359 | Travel | Taxi | 442.096469 |
| 99 | 2020-10-11 16:53:28.377359 | Food | Hotel | 100.455501 |
100 rows × 4 columns
wallet.describe()
| debit | |
|---|---|
| count | 100.000000 |
| mean | 310.595905 |
| std | 121.178218 |
| min | 100.455501 |
| 25% | 216.429447 |
| 50% | 318.110776 |
| 75% | 424.109079 |
| max | 499.858182 |
wallet.head()
| date | category | description | debit | |
|---|---|---|---|---|
| 0 | 2021-03-07 14:53:28.377359 | Music | Amazon | 421.207327 |
| 1 | 2020-10-08 09:53:28.377359 | Food | Swiggy | 328.440080 |
| 2 | 2021-02-23 09:53:28.377359 | Books | Amazon | 244.679437 |
| 3 | 2020-11-01 14:53:28.377359 | Utility | Phone | 222.756318 |
| 4 | 2021-06-05 13:53:28.377359 | Books | Flipcart | 494.128492 |
wallet.debit.sum()
31059.590543177277
music_wallet = wallet[wallet.category=='Music']
music_wallet
| date | category | description | debit | |
|---|---|---|---|---|
| 0 | 2021-03-07 14:53:28.377359 | Music | Amazon | 421.207327 |
| 12 | 2021-07-24 14:53:28.377359 | Music | Amazon | 329.536003 |
| 23 | 2020-12-11 10:53:28.377359 | Music | Netflix | 354.940241 |
| 37 | 2021-04-18 16:53:28.377359 | Music | Amazon | 266.069078 |
| 49 | 2021-03-16 09:53:28.377359 | Music | spotify | 232.303402 |
| 54 | 2020-11-16 10:53:28.377359 | Music | spotify | 160.817543 |
| 65 | 2020-10-23 18:53:28.377359 | Music | Netflix | 188.748743 |
| 67 | 2021-07-31 14:53:28.377359 | Music | Netflix | 324.786917 |
| 76 | 2020-11-17 09:53:28.377359 | Music | Netflix | 197.534600 |
| 78 | 2020-09-09 16:53:28.377359 | Music | spotify | 415.372894 |
| 79 | 2021-08-17 09:53:28.377359 | Music | Netflix | 321.763416 |
| 82 | 2020-09-22 09:53:28.377359 | Music | spotify | 411.142701 |
| 84 | 2020-09-21 10:53:28.377359 | Music | Netflix | 158.793646 |
| 85 | 2021-01-12 09:53:28.377359 | Music | Amazon | 130.374908 |
| 89 | 2021-04-12 14:53:28.377359 | Music | Amazon | 218.487173 |
| 90 | 2020-12-01 14:53:28.377359 | Music | Amazon | 101.573276 |
music_wallet.debit.sum() # expenses done for music
4233.451868232711
netflix = wallet[wallet.description=="Netflix"]
netflix
| date | category | description | debit | |
|---|---|---|---|---|
| 23 | 2020-12-11 10:53:28.377359 | Music | Netflix | 354.940241 |
| 65 | 2020-10-23 18:53:28.377359 | Music | Netflix | 188.748743 |
| 67 | 2021-07-31 14:53:28.377359 | Music | Netflix | 324.786917 |
| 76 | 2020-11-17 09:53:28.377359 | Music | Netflix | 197.534600 |
| 79 | 2021-08-17 09:53:28.377359 | Music | Netflix | 321.763416 |
| 84 | 2020-09-21 10:53:28.377359 | Music | Netflix | 158.793646 |
netflix.debit.sum()
1546.5675619264125
!python3 -m pip install pandas openpyxl lxml
Requirement already satisfied: pandas in /home/vikrant/anaconda3/lib/python3.8/site-packages (1.1.3) Requirement already satisfied: openpyxl in /home/vikrant/anaconda3/lib/python3.8/site-packages (3.0.5) Requirement already satisfied: lxml in /home/vikrant/anaconda3/lib/python3.8/site-packages (4.6.1) Requirement already satisfied: python-dateutil>=2.7.3 in /home/vikrant/anaconda3/lib/python3.8/site-packages (from pandas) (2.8.1) Requirement already satisfied: pytz>=2017.2 in /home/vikrant/anaconda3/lib/python3.8/site-packages (from pandas) (2020.1) Requirement already satisfied: numpy>=1.15.4 in /home/vikrant/anaconda3/lib/python3.8/site-packages (from pandas) (1.19.2) Requirement already satisfied: jdcal in /home/vikrant/anaconda3/lib/python3.8/site-packages (from openpyxl) (1.4.1) Requirement already satisfied: et-xmlfile in /home/vikrant/anaconda3/lib/python3.8/site-packages (from openpyxl) (1.0.1) Requirement already satisfied: six>=1.5 in /home/vikrant/anaconda3/lib/python3.8/site-packages (from python-dateutil>=2.7.3->pandas) (1.15.0)
url = "https://raw.githubusercontent.com/vikipedia/python-trainings/master/online_course/source/module2/wallet.csv"
w = pd.read_csv(url,
index_col=0)
w
| date | category | description | debit | |
|---|---|---|---|---|
| 0 | 2021-03-07 14:53:28.377359 | Music | Amazon | 421.207327 |
| 1 | 2020-10-08 09:53:28.377359 | Food | Swiggy | 328.440080 |
| 2 | 2021-02-23 09:53:28.377359 | Books | Amazon | 244.679437 |
| 3 | 2020-11-01 14:53:28.377359 | Utility | Phone | 222.756318 |
| 4 | 2021-06-05 13:53:28.377359 | Books | Flipcart | 494.128492 |
| ... | ... | ... | ... | ... |
| 95 | 2021-07-19 13:53:28.377359 | Utility | Phone | 388.671213 |
| 96 | 2021-01-12 19:53:28.377359 | Books | Flipcart | 467.554562 |
| 97 | 2021-03-25 11:53:28.377359 | Utility | Phone | 320.789434 |
| 98 | 2021-05-13 15:53:28.377359 | Travel | Taxi | 442.096469 |
| 99 | 2020-10-11 16:53:28.377359 | Food | Hotel | 100.455501 |
100 rows × 4 columns
%%file stocks.csv
symbol,value,high value,low value,volume
APPLE,234.5,240.3,233.0,100
AT&T,221.6,22.5,220.0,200
IBM,125.7,127.3,123.0,50
NIKE,100.5,105.0,104.0,1000
Writing stocks.csv
pd.read_csv("stocks.csv")
| symbol | value | high value | low value | volume | |
|---|---|---|---|---|---|
| 0 | APPLE | 234.5 | 240.3 | 233.0 | 100 |
| 1 | AT&T | 221.6 | 22.5 | 220.0 | 200 |
| 2 | IBM | 125.7 | 127.3 | 123.0 | 50 |
| 3 | NIKE | 100.5 | 105.0 | 104.0 | 1000 |
stocks =pd.read_csv("stocks.csv", index_col=0)
stocks
| value | high value | low value | volume | |
|---|---|---|---|---|
| symbol | ||||
| APPLE | 234.5 | 240.3 | 233.0 | 100 |
| AT&T | 221.6 | 22.5 | 220.0 | 200 |
| IBM | 125.7 | 127.3 | 123.0 | 50 |
| NIKE | 100.5 | 105.0 | 104.0 | 1000 |
stocks.value['APPLE']
234.5
%%file stocks1.csv
symbol,value,high value,low value,volume
APPLE,234.5,240.3,233.0,
AT&T,221.6,22.5,220.0,200
IBM,125.7,127.3,,50
NIKE,100.5,105.0,104.0,1000
Writing stocks1.csv
stocks1 = pd.read_csv("stocks1.csv", index_col=0)
stocks1
| value | high value | low value | volume | |
|---|---|---|---|---|
| symbol | ||||
| APPLE | 234.5 | 240.3 | 233.0 | NaN |
| AT&T | 221.6 | 22.5 | 220.0 | 200.0 |
| IBM | 125.7 | 127.3 | NaN | 50.0 |
| NIKE | 100.5 | 105.0 | 104.0 | 1000.0 |
stocks1.isnull().sum()
value 0 high value 0 low value 1 volume 1 dtype: int64
stocks1.sum()
value 682.3 high value 495.1 low value 557.0 volume 1250.0 dtype: float64
pd.DataFrame({"y1":[90,30],
"y2":[80,68]},
index = ["maths",'english'])
| y1 | y2 | |
|---|---|---|
| maths | 90 | 80 |
| english | 30 | 68 |
y1 = pd.Series([90,30], index=['maths','english'])
y2 = pd.Series([80,68], index=['maths','english'])
pd.DataFrame({"y1":y1, "y2":y2})
| y1 | y2 | |
|---|---|---|
| maths | 90 | 80 |
| english | 30 | 68 |
y1 = pd.Series([90,30], index=['maths','english'])
y2 = pd.Series([80,68], index=['hindi','english'])
pd.DataFrame({"y1":y1, "y2":y2})
| y1 | y2 | |
|---|---|---|
| english | 30.0 | 68.0 |
| hindi | NaN | 80.0 |
| maths | 90.0 | NaN |
wallet.date
0 2021-03-07 14:53:28.377359
1 2020-10-08 09:53:28.377359
2 2021-02-23 09:53:28.377359
3 2020-11-01 14:53:28.377359
4 2021-06-05 13:53:28.377359
...
95 2021-07-19 13:53:28.377359
96 2021-01-12 19:53:28.377359
97 2021-03-25 11:53:28.377359
98 2021-05-13 15:53:28.377359
99 2020-10-11 16:53:28.377359
Name: date, Length: 100, dtype: object
wallet.date.str.split(" ")
0 [2021-03-07, 14:53:28.377359]
1 [2020-10-08, 09:53:28.377359]
2 [2021-02-23, 09:53:28.377359]
3 [2020-11-01, 14:53:28.377359]
4 [2021-06-05, 13:53:28.377359]
...
95 [2021-07-19, 13:53:28.377359]
96 [2021-01-12, 19:53:28.377359]
97 [2021-03-25, 11:53:28.377359]
98 [2021-05-13, 15:53:28.377359]
99 [2020-10-11, 16:53:28.377359]
Name: date, Length: 100, dtype: object
timecols_df = wallet.date.str.split(" ", expand=True)
timecols_df
| 0 | 1 | |
|---|---|---|
| 0 | 2021-03-07 | 14:53:28.377359 |
| 1 | 2020-10-08 | 09:53:28.377359 |
| 2 | 2021-02-23 | 09:53:28.377359 |
| 3 | 2020-11-01 | 14:53:28.377359 |
| 4 | 2021-06-05 | 13:53:28.377359 |
| ... | ... | ... |
| 95 | 2021-07-19 | 13:53:28.377359 |
| 96 | 2021-01-12 | 19:53:28.377359 |
| 97 | 2021-03-25 | 11:53:28.377359 |
| 98 | 2021-05-13 | 15:53:28.377359 |
| 99 | 2020-10-11 | 16:53:28.377359 |
100 rows × 2 columns
timecols_df.rename(columns={0:"date", 1:"timestamp"})
| date | timestamp | |
|---|---|---|
| 0 | 2021-03-07 | 14:53:28.377359 |
| 1 | 2020-10-08 | 09:53:28.377359 |
| 2 | 2021-02-23 | 09:53:28.377359 |
| 3 | 2020-11-01 | 14:53:28.377359 |
| 4 | 2021-06-05 | 13:53:28.377359 |
| ... | ... | ... |
| 95 | 2021-07-19 | 13:53:28.377359 |
| 96 | 2021-01-12 | 19:53:28.377359 |
| 97 | 2021-03-25 | 11:53:28.377359 |
| 98 | 2021-05-13 | 15:53:28.377359 |
| 99 | 2020-10-11 | 16:53:28.377359 |
100 rows × 2 columns
timecols_df = timecols_df.rename(columns={0:"date", 1:"timestamp"})
timecols_df
| date | timestamp | |
|---|---|---|
| 0 | 2021-03-07 | 14:53:28.377359 |
| 1 | 2020-10-08 | 09:53:28.377359 |
| 2 | 2021-02-23 | 09:53:28.377359 |
| 3 | 2020-11-01 | 14:53:28.377359 |
| 4 | 2021-06-05 | 13:53:28.377359 |
| ... | ... | ... |
| 95 | 2021-07-19 | 13:53:28.377359 |
| 96 | 2021-01-12 | 19:53:28.377359 |
| 97 | 2021-03-25 | 11:53:28.377359 |
| 98 | 2021-05-13 | 15:53:28.377359 |
| 99 | 2020-10-11 | 16:53:28.377359 |
100 rows × 2 columns
wallet
| date | category | description | debit | |
|---|---|---|---|---|
| 0 | 2021-03-07 14:53:28.377359 | Music | Amazon | 421.207327 |
| 1 | 2020-10-08 09:53:28.377359 | Food | Swiggy | 328.440080 |
| 2 | 2021-02-23 09:53:28.377359 | Books | Amazon | 244.679437 |
| 3 | 2020-11-01 14:53:28.377359 | Utility | Phone | 222.756318 |
| 4 | 2021-06-05 13:53:28.377359 | Books | Flipcart | 494.128492 |
| ... | ... | ... | ... | ... |
| 95 | 2021-07-19 13:53:28.377359 | Utility | Phone | 388.671213 |
| 96 | 2021-01-12 19:53:28.377359 | Books | Flipcart | 467.554562 |
| 97 | 2021-03-25 11:53:28.377359 | Utility | Phone | 320.789434 |
| 98 | 2021-05-13 15:53:28.377359 | Travel | Taxi | 442.096469 |
| 99 | 2020-10-11 16:53:28.377359 | Food | Hotel | 100.455501 |
100 rows × 4 columns
del wallet['date'] # this has modified original dataframe
wallet
| category | description | debit | |
|---|---|---|---|
| 0 | Music | Amazon | 421.207327 |
| 1 | Food | Swiggy | 328.440080 |
| 2 | Books | Amazon | 244.679437 |
| 3 | Utility | Phone | 222.756318 |
| 4 | Books | Flipcart | 494.128492 |
| ... | ... | ... | ... |
| 95 | Utility | Phone | 388.671213 |
| 96 | Books | Flipcart | 467.554562 |
| 97 | Utility | Phone | 320.789434 |
| 98 | Travel | Taxi | 442.096469 |
| 99 | Food | Hotel | 100.455501 |
100 rows × 3 columns
wallet['date'] = timecols_df['date']
wallet['timestamp'] = timecols_df['timestamp']
wallet
| category | description | debit | date | timestamp | |
|---|---|---|---|---|---|
| 0 | Music | Amazon | 421.207327 | 2021-03-07 | 14:53:28.377359 |
| 1 | Food | Swiggy | 328.440080 | 2020-10-08 | 09:53:28.377359 |
| 2 | Books | Amazon | 244.679437 | 2021-02-23 | 09:53:28.377359 |
| 3 | Utility | Phone | 222.756318 | 2020-11-01 | 14:53:28.377359 |
| 4 | Books | Flipcart | 494.128492 | 2021-06-05 | 13:53:28.377359 |
| ... | ... | ... | ... | ... | ... |
| 95 | Utility | Phone | 388.671213 | 2021-07-19 | 13:53:28.377359 |
| 96 | Books | Flipcart | 467.554562 | 2021-01-12 | 19:53:28.377359 |
| 97 | Utility | Phone | 320.789434 | 2021-03-25 | 11:53:28.377359 |
| 98 | Travel | Taxi | 442.096469 | 2021-05-13 | 15:53:28.377359 |
| 99 | Food | Hotel | 100.455501 | 2020-10-11 | 16:53:28.377359 |
100 rows × 5 columns
wallet[wallet.date=="2021-03-07"]
| category | description | debit | date | timestamp | |
|---|---|---|---|---|---|
| 0 | Music | Amazon | 421.207327 | 2021-03-07 | 14:53:28.377359 |
category_group = wallet.groupby("category")
category_group
<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7fd2f20f2040>
category_group.sum()# give me mean debit (or all numeric columns) by category
| debit | |
|---|---|
| category | |
| Books | 4929.750393 |
| Food | 8281.189173 |
| Music | 4233.451868 |
| Travel | 6052.931876 |
| Utility | 7562.267233 |
category_group.mean() # give me mean debit (or all numeric columns) by category
| debit | |
|---|---|
| category | |
| Books | 352.125028 |
| Food | 306.710710 |
| Music | 264.590742 |
| Travel | 378.308242 |
| Utility | 280.083972 |
multiple_groups = wallet.groupby(['category', 'description'])
multiple_groups.sum()
| debit | ||
|---|---|---|
| category | description | |
| Books | Amazon | 1037.442802 |
| Amazon Kindle | 1389.052376 | |
| Flipcart | 2503.255216 | |
| Food | Hotel | 2752.174732 |
| Swiggy | 1936.495366 | |
| Zomato | 3592.519075 | |
| Music | Amazon | 1467.247766 |
| Netflix | 1546.567562 | |
| spotify | 1219.636541 | |
| Travel | Auto | 2210.428935 |
| Metro | 1216.463665 | |
| Taxi | 2626.039276 | |
| Utility | Electricity | 2885.064355 |
| Phone | 4677.202878 |
wallet[wallet.category=='Music'].sum()
category MusicMusicMusicMusicMusicMusicMusicMusicMusicM... description AmazonAmazonNetflixAmazonspotifyspotifyNetflix... debit 4233.45 date 2021-03-072021-07-242020-12-112021-04-182021-0... timestamp 14:53:28.37735914:53:28.37735910:53:28.3773591... dtype: object
category_group.sum()
| debit | |
|---|---|
| category | |
| Books | 4929.750393 |
| Food | 8281.189173 |
| Music | 4233.451868 |
| Travel | 6052.931876 |
| Utility | 7562.267233 |
wallet
| category | description | debit | date | timestamp | |
|---|---|---|---|---|---|
| 0 | Music | Amazon | 421.207327 | 2021-03-07 | 14:53:28.377359 |
| 1 | Food | Swiggy | 328.440080 | 2020-10-08 | 09:53:28.377359 |
| 2 | Books | Amazon | 244.679437 | 2021-02-23 | 09:53:28.377359 |
| 3 | Utility | Phone | 222.756318 | 2020-11-01 | 14:53:28.377359 |
| 4 | Books | Flipcart | 494.128492 | 2021-06-05 | 13:53:28.377359 |
| ... | ... | ... | ... | ... | ... |
| 95 | Utility | Phone | 388.671213 | 2021-07-19 | 13:53:28.377359 |
| 96 | Books | Flipcart | 467.554562 | 2021-01-12 | 19:53:28.377359 |
| 97 | Utility | Phone | 320.789434 | 2021-03-25 | 11:53:28.377359 |
| 98 | Travel | Taxi | 442.096469 | 2021-05-13 | 15:53:28.377359 |
| 99 | Food | Hotel | 100.455501 | 2020-10-11 | 16:53:28.377359 |
100 rows × 5 columns
wallet.to_csv("wallet_from_frame.csv")
!python3 head.py wallet_from_frame.csv
,category,description,debit,date,timestamp 0,Music,Amazon,421.2073272347991,2021-03-07,14:53:28.377359 1,Food,Swiggy,328.4400802428426,2020-10-08,09:53:28.377359 2,Books,Amazon,244.67943701511356,2021-02-23,09:53:28.377359 3,Utility,Phone,222.75631758052768,2020-11-01,14:53:28.377359 4,Books,Flipcart,494.1284923793595,2021-06-05,13:53:28.377359 5,Utility,Electricity,219.9417113096841,2021-07-28,19:53:28.377359 6,Books,Amazon Kindle,270.32259514795845,2021-04-16,11:53:28.377359 7,Food,Zomato,457.1831036346536,2021-02-15,10:53:28.377359 8,Utility,Phone,151.4963725994779,2021-08-10,19:53:28.377359
wallet.to_csv("wallet_from_frame.csv", index=False)
!python3 head.py wallet_from_frame.csv
category,description,debit,date,timestamp Music,Amazon,421.2073272347991,2021-03-07,14:53:28.377359 Food,Swiggy,328.4400802428426,2020-10-08,09:53:28.377359 Books,Amazon,244.67943701511356,2021-02-23,09:53:28.377359 Utility,Phone,222.75631758052768,2020-11-01,14:53:28.377359 Books,Flipcart,494.1284923793595,2021-06-05,13:53:28.377359 Utility,Electricity,219.9417113096841,2021-07-28,19:53:28.377359 Books,Amazon Kindle,270.32259514795845,2021-04-16,11:53:28.377359 Food,Zomato,457.1831036346536,2021-02-15,10:53:28.377359 Utility,Phone,151.4963725994779,2021-08-10,19:53:28.377359
music_wallet
| date | category | description | debit | |
|---|---|---|---|---|
| 0 | 2021-03-07 14:53:28.377359 | Music | Amazon | 421.207327 |
| 12 | 2021-07-24 14:53:28.377359 | Music | Amazon | 329.536003 |
| 23 | 2020-12-11 10:53:28.377359 | Music | Netflix | 354.940241 |
| 37 | 2021-04-18 16:53:28.377359 | Music | Amazon | 266.069078 |
| 49 | 2021-03-16 09:53:28.377359 | Music | spotify | 232.303402 |
| 54 | 2020-11-16 10:53:28.377359 | Music | spotify | 160.817543 |
| 65 | 2020-10-23 18:53:28.377359 | Music | Netflix | 188.748743 |
| 67 | 2021-07-31 14:53:28.377359 | Music | Netflix | 324.786917 |
| 76 | 2020-11-17 09:53:28.377359 | Music | Netflix | 197.534600 |
| 78 | 2020-09-09 16:53:28.377359 | Music | spotify | 415.372894 |
| 79 | 2021-08-17 09:53:28.377359 | Music | Netflix | 321.763416 |
| 82 | 2020-09-22 09:53:28.377359 | Music | spotify | 411.142701 |
| 84 | 2020-09-21 10:53:28.377359 | Music | Netflix | 158.793646 |
| 85 | 2021-01-12 09:53:28.377359 | Music | Amazon | 130.374908 |
| 89 | 2021-04-12 14:53:28.377359 | Music | Amazon | 218.487173 |
| 90 | 2020-12-01 14:53:28.377359 | Music | Amazon | 101.573276 |
category_group.sum()
| debit | |
|---|---|
| category | |
| Books | 4929.750393 |
| Food | 8281.189173 |
| Music | 4233.451868 |
| Travel | 6052.931876 |
| Utility | 7562.267233 |
category_group.sum().to_csv("wallet_sum_by_category.csv")
!python3 cat.py wallet_sum_by_category.csv
category,debit Books,4929.750393283797 Food,8281.189172581233 Music,4233.451868232711 Travel,6052.931876440963 Utility,7562.267232638568
writer = pd.ExcelWriter("wallet1.xlsx", engine="xlsxwriter")
wallet.to_excel(writer, sheet_name="statement")
writer.save()
!ls wallet*
wallet1.xlsx wallet.csv wallet_from_frame.csv wallet_sum_by_category.csv