Module 3 - Day 1

Pandas

You can install pandas using pip command

pip install pandas

reading data with pandas

import pandas as pd
%%file stockdata.csv
symbol,day,price
IBM,Monday,111.71436961893693
IBM,Tuesday,141.21220022208635
IBM,Wednesday,112.40571010053796
IBM,Thursday,137.54133351926248
IBM,Friday,140.25154281801224
MICROSOFT,Monday,235.0403622499107
MICROSOFT,Tuesday,225.0206535036475
MICROSOFT,Wednesday,216.10342426936444
MICROSOFT,Thursday,200.38038844494193
MICROSOFT,Friday,235.80850482793264
APPLE,Monday,321.49182055844256
APPLE,Tuesday,340.63612771662815
APPLE,Wednesday,303.9065277507285
APPLE,Thursday,338.1350605764038
APPLE,Friday,318.3912296144338
Writing stockdata.csv
stockdata = pd.read_csv("stockdata.csv")
stockdata
symbol day price
0 IBM Monday 111.714370
1 IBM Tuesday 141.212200
2 IBM Wednesday 112.405710
3 IBM Thursday 137.541334
4 IBM Friday 140.251543
5 MICROSOFT Monday 235.040362
6 MICROSOFT Tuesday 225.020654
7 MICROSOFT Wednesday 216.103424
8 MICROSOFT Thursday 200.380388
9 MICROSOFT Friday 235.808505
10 APPLE Monday 321.491821
11 APPLE Tuesday 340.636128
12 APPLE Wednesday 303.906528
13 APPLE Thursday 338.135061
14 APPLE Friday 318.391230
csvurl = "https://raw.githubusercontent.com/vikipedia/python-trainings/master/online_course/source/module2/wallet.csv"
pd.read_csv(csvurl)
Unnamed: 0 date category description debit
0 0 2021-03-07 14:53:28.377359 Music Amazon 421.207327
1 1 2020-10-08 09:53:28.377359 Food Swiggy 328.440080
2 2 2021-02-23 09:53:28.377359 Books Amazon 244.679437
3 3 2020-11-01 14:53:28.377359 Utility Phone 222.756318
4 4 2021-06-05 13:53:28.377359 Books Flipcart 494.128492
... ... ... ... ... ...
95 95 2021-07-19 13:53:28.377359 Utility Phone 388.671213
96 96 2021-01-12 19:53:28.377359 Books Flipcart 467.554562
97 97 2021-03-25 11:53:28.377359 Utility Phone 320.789434
98 98 2021-05-13 15:53:28.377359 Travel Taxi 442.096469
99 99 2020-10-11 16:53:28.377359 Food Hotel 100.455501

100 rows × 5 columns

excelurl = "https://raw.githubusercontent.com/vikipedia/python-trainings/master/online_course/source/module2/wallet.xlsx"
pd.read_excel(excelurl) # by default it will load first sheet 
Unnamed: 0 date category description debit
0 0 2021-03-07 14:53:28.377359 Music Amazon 421.207327
1 1 2020-10-08 09:53:28.377359 Food Swiggy 328.440080
2 2 2021-02-23 09:53:28.377359 Books Amazon 244.679437
3 3 2020-11-01 14:53:28.377359 Utility Phone 222.756318
4 4 2021-06-05 13:53:28.377359 Books Flipcart 494.128492
... ... ... ... ... ...
95 95 2021-07-19 13:53:28.377359 Utility Phone 388.671213
96 96 2021-01-12 19:53:28.377359 Books Flipcart 467.554562
97 97 2021-03-25 11:53:28.377359 Utility Phone 320.789434
98 98 2021-05-13 15:53:28.377359 Travel Taxi 442.096469
99 99 2020-10-11 16:53:28.377359 Food Hotel 100.455501

100 rows × 5 columns

w = pd.read_excel(excelurl, sheet_name="wallet")
w
Unnamed: 0 date category description debit
0 0 2021-03-07 14:53:28.377359 Music Amazon 421.207327
1 1 2020-10-08 09:53:28.377359 Food Swiggy 328.440080
2 2 2021-02-23 09:53:28.377359 Books Amazon 244.679437
3 3 2020-11-01 14:53:28.377359 Utility Phone 222.756318
4 4 2021-06-05 13:53:28.377359 Books Flipcart 494.128492
... ... ... ... ... ...
95 95 2021-07-19 13:53:28.377359 Utility Phone 388.671213
96 96 2021-01-12 19:53:28.377359 Books Flipcart 467.554562
97 97 2021-03-25 11:53:28.377359 Utility Phone 320.789434
98 98 2021-05-13 15:53:28.377359 Travel Taxi 442.096469
99 99 2020-10-11 16:53:28.377359 Food Hotel 100.455501

100 rows × 5 columns

pd.read_excel(excelurl, sheet_name="squrs")
x sqr(x)
0 1 1
1 2 4
2 3 9
3 4 16
4 5 25
5 6 36
6 7 49
7 8 64
8 9 81
9 10 100
10 11 121
11 12 144
12 13 169
13 14 196
14 15 225
15 16 256
16 17 289
17 18 324
tables_moneycontrol = pd.read_html("https://www.moneycontrol.com/markets/indian-indices/")
type(tables_moneycontrol)
list
len(tables_moneycontrol)
6
tables_moneycontrol[0]
Stock Name Sector LTP Change %Chg
0 No Data No Data No Data No Data No Data
tables_moneycontrol[1]
Stock Name Sector LTP Change %Chg
0 TATA Cons. Prod Plantations - Tea & Coffee 1218.9 -42.65 -3.38
tables_moneycontrol[3]
Stock Name Sector LTP Change %Chg
0 Nestle Food Processing 2616.30 55.35 2.16
1 Hindalco Iron & Steel 535.35 8.40 1.59
2 BPCL Refineries 634.15 9.45 1.51
3 Cipla Pharmaceuticals 1506.35 21.10 1.42
4 Bharti Airtel Telecommunications - Service 1214.85 15.15 1.26
5 Sun Pharma Pharmaceuticals 1623.25 17.55 1.09
6 Bajaj Finserv Finance - Investments 1599.70 16.90 1.07
7 SBI Life Insura Life & Health Insurance 1525.65 15.25 1.01
8 Grasim Diversified 2250.00 21.90 0.98
9 Britannia Food Processing 4933.40 44.25 0.91
10 Adani Ports Infrastructure - General 1335.05 9.60 0.72
11 Divis Labs Pharmaceuticals 3626.55 24.95 0.69
12 Coal India Mining & Minerals 461.20 2.60 0.57
13 Dr Reddys Labs Pharmaceuticals 6358.95 34.80 0.55
14 Apollo Hospital Hospitals & Medical Services 6072.40 23.60 0.39
15 Adani Enterpris Trading 3239.00 12.45 0.39
16 HDFC Life Life & Health Insurance 624.20 2.25 0.36
17 UltraTechCement Cement - Major 9705.80 32.65 0.34
18 ITC Diversified 414.95 1.40 0.34
19 Axis Bank Banks - Private Sector 1115.00 3.55 0.32
20 Reliance Refineries 2964.25 6.40 0.22
21 Eicher Motors Auto - LCVs & HCVs 3793.00 7.75 0.20
22 TCS Computers - Software 4111.00 2.40 0.06
23 Titan Company Miscellaneous 3787.05 0.15 0.00
24 Asian Paints Paints & Varnishes 2870.45 -0.10 0.00
tables_moneycontrol[4]
Stock Name Sector LTP Change %Chg
0 No Data No Data No Data No Data No Data
tables_moneycontrol[5]
Stock Name Sector LTP Change %Chg
0 No Data No Data No Data No Data No Data

DataFrame/Series

stockdata
symbol day price
0 IBM Monday 111.714370
1 IBM Tuesday 141.212200
2 IBM Wednesday 112.405710
3 IBM Thursday 137.541334
4 IBM Friday 140.251543
5 MICROSOFT Monday 235.040362
6 MICROSOFT Tuesday 225.020654
7 MICROSOFT Wednesday 216.103424
8 MICROSOFT Thursday 200.380388
9 MICROSOFT Friday 235.808505
10 APPLE Monday 321.491821
11 APPLE Tuesday 340.636128
12 APPLE Wednesday 303.906528
13 APPLE Thursday 338.135061
14 APPLE Friday 318.391230
stockdata['symbol'] # like a dictinary! you can give column name in square bracket
0           IBM
1           IBM
2           IBM
3           IBM
4           IBM
5     MICROSOFT
6     MICROSOFT
7     MICROSOFT
8     MICROSOFT
9     MICROSOFT
10        APPLE
11        APPLE
12        APPLE
13        APPLE
14        APPLE
Name: symbol, dtype: object
stockdata.price # you can also access column like class attribute
0     111.714370
1     141.212200
2     112.405710
3     137.541334
4     140.251543
5     235.040362
6     225.020654
7     216.103424
8     200.380388
9     235.808505
10    321.491821
11    340.636128
12    303.906528
13    338.135061
14    318.391230
Name: price, dtype: float64
stockdata['price']
0     111.714370
1     141.212200
2     112.405710
3     137.541334
4     140.251543
5     235.040362
6     225.020654
7     216.103424
8     200.380388
9     235.808505
10    321.491821
11    340.636128
12    303.906528
13    338.135061
14    318.391230
Name: price, dtype: float64
%%file columns_with_space.csv
symbol,day,Today's price
IBM,Monday,111.71436961893693
IBM,Tuesday,141.21220022208635
IBM,Wednesday,112.40571010053796
IBM,Thursday,137.54133351926248
IBM,Friday,140.25154281801224
MICROSOFT,Monday,235.0403622499107
MICROSOFT,Tuesday,225.0206535036475
MICROSOFT,Wednesday,216.10342426936444
MICROSOFT,Thursday,200.38038844494193
MICROSOFT,Friday,235.80850482793264
APPLE,Monday,321.49182055844256
APPLE,Tuesday,340.63612771662815
APPLE,Wednesday,303.9065277507285
APPLE,Thursday,338.1350605764038
APPLE,Friday,318.3912296144338
Writing columns_with_space.csv
df = pd.read_csv("columns_with_space.csv")
df
symbol day Today's price
0 IBM Monday 111.714370
1 IBM Tuesday 141.212200
2 IBM Wednesday 112.405710
3 IBM Thursday 137.541334
4 IBM Friday 140.251543
5 MICROSOFT Monday 235.040362
6 MICROSOFT Tuesday 225.020654
7 MICROSOFT Wednesday 216.103424
8 MICROSOFT Thursday 200.380388
9 MICROSOFT Friday 235.808505
10 APPLE Monday 321.491821
11 APPLE Tuesday 340.636128
12 APPLE Wednesday 303.906528
13 APPLE Thursday 338.135061
14 APPLE Friday 318.391230
df.symbol
0           IBM
1           IBM
2           IBM
3           IBM
4           IBM
5     MICROSOFT
6     MICROSOFT
7     MICROSOFT
8     MICROSOFT
9     MICROSOFT
10        APPLE
11        APPLE
12        APPLE
13        APPLE
14        APPLE
Name: symbol, dtype: object
df.Today's price
SyntaxError: unterminated string literal (detected at line 1) (2692468768.py, line 1)
df["Today's price"]
0     111.714370
1     141.212200
2     112.405710
3     137.541334
4     140.251543
5     235.040362
6     225.020654
7     216.103424
8     200.380388
9     235.808505
10    321.491821
11    340.636128
12    303.906528
13    338.135061
14    318.391230
Name: Today's price, dtype: float64
columnname = "Today's price"
df[columnname]
0     111.714370
1     141.212200
2     112.405710
3     137.541334
4     140.251543
5     235.040362
6     225.020654
7     216.103424
8     200.380388
9     235.808505
10    321.491821
11    340.636128
12    303.906528
13    338.135061
14    318.391230
Name: Today's price, dtype: float64
df.columnname  # you can't make use variables here
AttributeError: 'DataFrame' object has no attribute 'columnname'
df[columnname]
0     111.714370
1     141.212200
2     112.405710
3     137.541334
4     140.251543
5     235.040362
6     225.020654
7     216.103424
8     200.380388
9     235.808505
10    321.491821
11    340.636128
12    303.906528
13    338.135061
14    318.391230
Name: Today's price, dtype: float64
stockdata
symbol day price
0 IBM Monday 111.714370
1 IBM Tuesday 141.212200
2 IBM Wednesday 112.405710
3 IBM Thursday 137.541334
4 IBM Friday 140.251543
5 MICROSOFT Monday 235.040362
6 MICROSOFT Tuesday 225.020654
7 MICROSOFT Wednesday 216.103424
8 MICROSOFT Thursday 200.380388
9 MICROSOFT Friday 235.808505
10 APPLE Monday 321.491821
11 APPLE Tuesday 340.636128
12 APPLE Wednesday 303.906528
13 APPLE Thursday 338.135061
14 APPLE Friday 318.391230
csvurl = "https://raw.githubusercontent.com/vikipedia/python-trainings/master/online_course/source/module2/wallet.csv"
wallet = pd.read_csv(csvurl)
wallet.head()
Unnamed: 0 date category description debit
0 0 2021-03-07 14:53:28.377359 Music Amazon 421.207327
1 1 2020-10-08 09:53:28.377359 Food Swiggy 328.440080
2 2 2021-02-23 09:53:28.377359 Books Amazon 244.679437
3 3 2020-11-01 14:53:28.377359 Utility Phone 222.756318
4 4 2021-06-05 13:53:28.377359 Books Flipcart 494.128492
wallet.head(10)
Unnamed: 0 date category description debit
0 0 2021-03-07 14:53:28.377359 Music Amazon 421.207327
1 1 2020-10-08 09:53:28.377359 Food Swiggy 328.440080
2 2 2021-02-23 09:53:28.377359 Books Amazon 244.679437
3 3 2020-11-01 14:53:28.377359 Utility Phone 222.756318
4 4 2021-06-05 13:53:28.377359 Books Flipcart 494.128492
5 5 2021-07-28 19:53:28.377359 Utility Electricity 219.941711
6 6 2021-04-16 11:53:28.377359 Books Amazon Kindle 270.322595
7 7 2021-02-15 10:53:28.377359 Food Zomato 457.183104
8 8 2021-08-10 19:53:28.377359 Utility Phone 151.496373
9 9 2020-11-29 14:53:28.377359 Travel Auto 443.618884
wallet.tail()# show last 5 entries in the table
Unnamed: 0 date category description debit
95 95 2021-07-19 13:53:28.377359 Utility Phone 388.671213
96 96 2021-01-12 19:53:28.377359 Books Flipcart 467.554562
97 97 2021-03-25 11:53:28.377359 Utility Phone 320.789434
98 98 2021-05-13 15:53:28.377359 Travel Taxi 442.096469
99 99 2020-10-11 16:53:28.377359 Food Hotel 100.455501
wallet.tail(15)
Unnamed: 0 date category description debit
85 85 2021-01-12 09:53:28.377359 Music Amazon 130.374908
86 86 2021-05-07 16:53:28.377359 Food Zomato 198.450672
87 87 2021-05-19 15:53:28.377359 Food Zomato 378.820641
88 88 2021-04-18 09:53:28.377359 Utility Phone 124.221248
89 89 2021-04-12 14:53:28.377359 Music Amazon 218.487173
90 90 2020-12-01 14:53:28.377359 Music Amazon 101.573276
91 91 2021-01-22 17:53:28.377359 Food Hotel 232.663468
92 92 2021-01-12 19:53:28.377359 Travel Taxi 356.842638
93 93 2021-01-11 09:53:28.377359 Utility Electricity 111.720809
94 94 2021-01-04 13:53:28.377359 Utility Phone 431.185537
95 95 2021-07-19 13:53:28.377359 Utility Phone 388.671213
96 96 2021-01-12 19:53:28.377359 Books Flipcart 467.554562
97 97 2021-03-25 11:53:28.377359 Utility Phone 320.789434
98 98 2021-05-13 15:53:28.377359 Travel Taxi 442.096469
99 99 2020-10-11 16:53:28.377359 Food Hotel 100.455501
stockdata
symbol day price
0 IBM Monday 111.714370
1 IBM Tuesday 141.212200
2 IBM Wednesday 112.405710
3 IBM Thursday 137.541334
4 IBM Friday 140.251543
5 MICROSOFT Monday 235.040362
6 MICROSOFT Tuesday 225.020654
7 MICROSOFT Wednesday 216.103424
8 MICROSOFT Thursday 200.380388
9 MICROSOFT Friday 235.808505
10 APPLE Monday 321.491821
11 APPLE Tuesday 340.636128
12 APPLE Wednesday 303.906528
13 APPLE Thursday 338.135061
14 APPLE Friday 318.391230
stockdata.info() # show basic information of all columns from this dataframe
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15 entries, 0 to 14
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   symbol  15 non-null     object 
 1   day     15 non-null     object 
 2   price   15 non-null     float64
dtypes: float64(1), object(2)
memory usage: 488.0+ bytes
prices = stockdata.price
prices
0     111.714370
1     141.212200
2     112.405710
3     137.541334
4     140.251543
5     235.040362
6     225.020654
7     216.103424
8     200.380388
9     235.808505
10    321.491821
11    340.636128
12    303.906528
13    338.135061
14    318.391230
Name: price, dtype: float64
prices.sum()
3378.0392557912696
prices.mean()
225.20261705275132
prices.std()
83.96223802273543
prices.min()
111.71436961893691
prices.max()
340.6361277166281

Filtering

stockdata
symbol day price
0 IBM Monday 111.714370
1 IBM Tuesday 141.212200
2 IBM Wednesday 112.405710
3 IBM Thursday 137.541334
4 IBM Friday 140.251543
5 MICROSOFT Monday 235.040362
6 MICROSOFT Tuesday 225.020654
7 MICROSOFT Wednesday 216.103424
8 MICROSOFT Thursday 200.380388
9 MICROSOFT Friday 235.808505
10 APPLE Monday 321.491821
11 APPLE Tuesday 340.636128
12 APPLE Wednesday 303.906528
13 APPLE Thursday 338.135061
14 APPLE Friday 318.391230
stockdata[stockdata.symbol=="IBM"] 
symbol day price
0 IBM Monday 111.714370
1 IBM Tuesday 141.212200
2 IBM Wednesday 112.405710
3 IBM Thursday 137.541334
4 IBM Friday 140.251543
ibm = stockdata[stockdata.symbol=="IBM"] 
ibm
symbol day price
0 IBM Monday 111.714370
1 IBM Tuesday 141.212200
2 IBM Wednesday 112.405710
3 IBM Thursday 137.541334
4 IBM Friday 140.251543
ibm.price
0    111.714370
1    141.212200
2    112.405710
3    137.541334
4    140.251543
Name: price, dtype: float64
ibm.price.mean()
128.62503125576717

problem

Write a function weekly_average which will take stock dataframe and stock name as argument and finds weekely average for that stock

>>> weekly_avearge(stockdata, "IBM")
128.62503125576717
def weekly_average(stockdata, stockname):
    subset = stockdata[stockdata.symbol == stockname]
    return subset.price.mean()
weekly_average(stockdata, "APPLE")
324.51215324332736
weekly_average(stockdata, "IBM")
128.62503125576717
weekly_average(stockdata, "MICROSOFT")
222.47066665915946
prices = pd.Series([111.714370, 141.212200, 112.405710, 137.541334, 140.251543])
prices
0    111.714370
1    141.212200
2    112.405710
3    137.541334
4    140.251543
dtype: float64
prices > 120
0    False
1     True
2    False
3     True
4     True
dtype: bool
gtthan120 = prices > 120
gtthan120
0    False
1     True
2    False
3     True
4     True
dtype: bool
prices[gtthan120] # this will filter out only those prices which are greater than 120
1    141.212200
3    137.541334
4    140.251543
dtype: float64
stockdata.symbol
0           IBM
1           IBM
2           IBM
3           IBM
4           IBM
5     MICROSOFT
6     MICROSOFT
7     MICROSOFT
8     MICROSOFT
9     MICROSOFT
10        APPLE
11        APPLE
12        APPLE
13        APPLE
14        APPLE
Name: symbol, dtype: object
stockdata.symbol == "IBM"
0      True
1      True
2      True
3      True
4      True
5     False
6     False
7     False
8     False
9     False
10    False
11    False
12    False
13    False
14    False
Name: symbol, dtype: bool

flagseries = stockdata.symbol == "IBM"
stockdata[flagseries] # it will take only those lines for which the flatseries is True
symbol day price
0 IBM Monday 111.714370
1 IBM Tuesday 141.212200
2 IBM Wednesday 112.405710
3 IBM Thursday 137.541334
4 IBM Friday 140.251543
cond = (stockdata.symbol == "IBM") & (stockdata.price > 120)
cond
0     False
1      True
2     False
3      True
4      True
5     False
6     False
7     False
8     False
9     False
10    False
11    False
12    False
13    False
14    False
dtype: bool
stockdata[cond]
symbol day price
1 IBM Tuesday 141.212200
3 IBM Thursday 137.541334
4 IBM Friday 140.251543
tables_moneycontrol = pd.read_html("https://www.moneycontrol.com/markets/indian-indices/")
indices_data = tables_moneycontrol[3]
indices_data
Stock Name Sector LTP Change %Chg
0 Nestle Food Processing 2618.05 57.10 2.23
1 Hindalco Iron & Steel 536.90 9.95 1.89
2 Grasim Diversified 2262.80 34.70 1.56
3 BPCL Refineries 633.50 8.80 1.41
4 Cipla Pharmaceuticals 1503.50 18.25 1.23
5 Sun Pharma Pharmaceuticals 1625.20 19.50 1.21
6 Dr Reddys Labs Pharmaceuticals 6399.15 75.00 1.19
7 Bharti Airtel Telecommunications - Service 1213.45 13.75 1.15
8 Bajaj Finserv Finance - Investments 1600.80 18.00 1.14
9 Adani Ports Infrastructure - General 1337.70 12.25 0.92
10 Divis Labs Pharmaceuticals 3628.80 27.20 0.76
11 UltraTechCement Cement - Major 9743.70 70.55 0.73
12 Coal India Mining & Minerals 461.15 2.55 0.56
13 HCL Tech Computers - Software 1646.50 8.85 0.54
14 Britannia Food Processing 4912.15 23.00 0.47
15 SBI Life Insura Life & Health Insurance 1516.65 6.25 0.41
16 TCS Computers - Software 4122.65 14.05 0.34
17 Adani Enterpris Trading 3237.40 10.85 0.34
18 Apollo Hospital Hospitals & Medical Services 6066.90 18.10 0.30
19 Eicher Motors Auto - LCVs & HCVs 3790.15 4.90 0.13
20 Reliance Refineries 2960.00 2.15 0.07
21 ITC Diversified 413.55 0.00 0.00
change1percent = indices_data["%Chg"] >= 1
indices_data[change1percent]
Stock Name Sector LTP Change %Chg
0 Nestle Food Processing 2618.05 57.10 2.23
1 Hindalco Iron & Steel 536.90 9.95 1.89
2 Grasim Diversified 2262.80 34.70 1.56
3 BPCL Refineries 633.50 8.80 1.41
4 Cipla Pharmaceuticals 1503.50 18.25 1.23
5 Sun Pharma Pharmaceuticals 1625.20 19.50 1.21
6 Dr Reddys Labs Pharmaceuticals 6399.15 75.00 1.19
7 Bharti Airtel Telecommunications - Service 1213.45 13.75 1.15
8 Bajaj Finserv Finance - Investments 1600.80 18.00 1.14
indices_data[indices_data.Sector == "Pharmaceuticals"]
Stock Name Sector LTP Change %Chg
4 Cipla Pharmaceuticals 1503.50 18.25 1.23
5 Sun Pharma Pharmaceuticals 1625.20 19.50 1.21
6 Dr Reddys Labs Pharmaceuticals 6399.15 75.00 1.19
10 Divis Labs Pharmaceuticals 3628.80 27.20 0.76
indices_data[indices_data.Sector.str.contains("Pharma")]
Stock Name Sector LTP Change %Chg
4 Cipla Pharmaceuticals 1503.50 18.25 1.23
5 Sun Pharma Pharmaceuticals 1625.20 19.50 1.21
6 Dr Reddys Labs Pharmaceuticals 6399.15 75.00 1.19
10 Divis Labs Pharmaceuticals 3628.80 27.20 0.76
text  = "text data ..some text ... to test"
"tetx" in text
False
"text" in text
True
indices_data[indices_data.Sector.str.contains("Pharma")]
Stock Name Sector LTP Change %Chg
4 Cipla Pharmaceuticals 1503.50 18.25 1.23
5 Sun Pharma Pharmaceuticals 1625.20 19.50 1.21
6 Dr Reddys Labs Pharmaceuticals 6399.15 75.00 1.19
10 Divis Labs Pharmaceuticals 3628.80 27.20 0.76
indices_data[indices_data.Sector.str.contains("Food")]
Stock Name Sector LTP Change %Chg
0 Nestle Food Processing 2618.05 57.1 2.23
14 Britannia Food Processing 4912.15 23.0 0.47
indices_data
Stock Name Sector LTP Change %Chg
0 Nestle Food Processing 2618.05 57.10 2.23
1 Hindalco Iron & Steel 536.90 9.95 1.89
2 Grasim Diversified 2262.80 34.70 1.56
3 BPCL Refineries 633.50 8.80 1.41
4 Cipla Pharmaceuticals 1503.50 18.25 1.23
5 Sun Pharma Pharmaceuticals 1625.20 19.50 1.21
6 Dr Reddys Labs Pharmaceuticals 6399.15 75.00 1.19
7 Bharti Airtel Telecommunications - Service 1213.45 13.75 1.15
8 Bajaj Finserv Finance - Investments 1600.80 18.00 1.14
9 Adani Ports Infrastructure - General 1337.70 12.25 0.92
10 Divis Labs Pharmaceuticals 3628.80 27.20 0.76
11 UltraTechCement Cement - Major 9743.70 70.55 0.73
12 Coal India Mining & Minerals 461.15 2.55 0.56
13 HCL Tech Computers - Software 1646.50 8.85 0.54
14 Britannia Food Processing 4912.15 23.00 0.47
15 SBI Life Insura Life & Health Insurance 1516.65 6.25 0.41
16 TCS Computers - Software 4122.65 14.05 0.34
17 Adani Enterpris Trading 3237.40 10.85 0.34
18 Apollo Hospital Hospitals & Medical Services 6066.90 18.10 0.30
19 Eicher Motors Auto - LCVs & HCVs 3790.15 4.90 0.13
20 Reliance Refineries 2960.00 2.15 0.07
21 ITC Diversified 413.55 0.00 0.00
tables_moneycontrol[2]
Stock Name Sector LTP Change %Chg
0 Tata Steel Iron & Steel 153.75 -3.50 -2.23
1 Bajaj Auto Auto - 2 & 3 Wheelers 8695.05 -184.00 -2.07
2 IndusInd Bank Banks - Private Sector 1539.75 -24.00 -1.53
3 Power Grid Corp Power - Generation & Distribution 288.20 -4.45 -1.52
4 Infosys Computers - Software 1592.85 -23.60 -1.46
5 Tata Motors Auto - LCVs & HCVs 1027.00 -12.30 -1.18
6 Kotak Mahindra Banks - Private Sector 1729.00 -19.05 -1.09
7 HDFC Bank Banks - Private Sector 1430.85 -15.25 -1.05
8 UPL Chemicals 477.45 -4.85 -1.01
9 Maruti Suzuki Auto - Cars & Jeeps 11394.05 -116.00 -1.01
10 Larsen Infrastructure - General 3649.00 -27.95 -0.76
11 ONGC Oil Drilling And Exploration 276.10 -2.00 -0.72
12 NTPC Power - Generation & Distribution 349.90 -2.30 -0.65
13 SBI Banks - Public Sector 783.00 -5.05 -0.64
14 Hero Motocorp Auto - 2 & 3 Wheelers 4623.00 -29.90 -0.64
15 M&M Auto - Cars & Jeeps 1885.85 -11.70 -0.62
16 ICICI Bank Banks - Private Sector 1081.40 -6.55 -0.60
17 HUL Personal Care 2405.55 -14.00 -0.58
18 LTIMindtree Computers - Software 5099.10 -23.60 -0.46
19 JSW Steel Steel - Large 826.50 -3.50 -0.42
20 Wipro Computers - Software 513.85 -1.65 -0.32
21 HDFC Life Life & Health Insurance 620.15 -1.80 -0.29
22 Axis Bank Banks - Private Sector 1108.70 -2.75 -0.25
23 Tech Mahindra Computers - Software 1285.00 -3.15 -0.24
24 Asian Paints Paints & Varnishes 2864.20 -6.35 -0.22
25 Titan Company Miscellaneous 3780.00 -6.90 -0.18
26 Bajaj Finance Finance - NBFC 6411.25 -9.80 -0.15
tables_moneycontrol[0]
Stock Name Sector LTP Change %Chg
0 No Data No Data No Data No Data No Data

Accesing Items from Series

s1 = pd.Series([421, 328, 123, 234])
s1
0    421
1    328
2    123
3    234
dtype: int64
stocks = pd.Series([421, 328, 123, 234], index=["APPLE","AT&T","IBM","NIKE"])
stocks
APPLE    421
AT&T     328
IBM      123
NIKE     234
dtype: int64
stocks['APPLE'] # this label and value behaves just like dictionary
421
stocks["IBM"]
123
stocks[0]
/tmp/ipykernel_2581436/1593074069.py:1: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
  stocks[0]
421
stocks.iloc[0]
421
stocks.iloc[3] # iloc -> location by index
234
stocks.loc['APPLE'] # loc -> location by label
421
labels=["APPLE","AT&T","IBM","NIKE"]
stocks = pd.DataFrame({"value" : [421, 328, 123, 234],
              "high" :  [422.0, 328, 124.5, 235.5],
              "low" : [420.4, 335, 120, 230]} , index=labels)
stocks
value high low
APPLE 421 422.0 420.4
AT&T 328 328.0 335.0
IBM 123 124.5 120.0
NIKE 234 235.5 230.0
stocks['APPLE'] # because we give column names here!
KeyError: 'APPLE'
stocks.loc['APPLE']  # row with label 'APPLE'
value    421.0
high     422.0
low      420.4
Name: APPLE, dtype: float64
stocks.loc['NIKE']
value    234.0
high     235.5
low      230.0
Name: NIKE, dtype: float64
stocks.loc[['APPLE','NIKE']]
value high low
APPLE 421 422.0 420.4
NIKE 234 235.5 230.0
stocks.loc[['APPLE','NIKE'], ['high','low']]
high low
APPLE 422.0 420.4
NIKE 235.5 230.0
stocks.iloc[0] # give zeroth row
value    421.0
high     422.0
low      420.4
Name: APPLE, dtype: float64
stocks.iloc[2:] # drop first two row, give remaining
value high low
IBM 123 124.5 120.0
NIKE 234 235.5 230.0
stocks.iloc[2:, :2] # drop frist two rows but take first two columns
value high
IBM 123 124.5
NIKE 234 235.5
s1
0    421
1    328
2    123
3    234
dtype: int64
s1.sum()
1106
s1 > 2
0    True
1    True
2    True
3    True
dtype: bool
s1 + 2
0    423
1    330
2    125
3    236
dtype: int64
s1
0    421
1    328
2    123
3    234
dtype: int64

Groupby

del wallet['Unnamed: 0'] # you can delete a column from dataframe
wallet
date category description debit
0 2021-03-07 14:53:28.377359 Music Amazon 421.207327
1 2020-10-08 09:53:28.377359 Food Swiggy 328.440080
2 2021-02-23 09:53:28.377359 Books Amazon 244.679437
3 2020-11-01 14:53:28.377359 Utility Phone 222.756318
4 2021-06-05 13:53:28.377359 Books Flipcart 494.128492
... ... ... ... ...
95 2021-07-19 13:53:28.377359 Utility Phone 388.671213
96 2021-01-12 19:53:28.377359 Books Flipcart 467.554562
97 2021-03-25 11:53:28.377359 Utility Phone 320.789434
98 2021-05-13 15:53:28.377359 Travel Taxi 442.096469
99 2020-10-11 16:53:28.377359 Food Hotel 100.455501

100 rows × 4 columns

wallet.category.unique()
array(['Music', 'Food', 'Books', 'Utility', 'Travel'], dtype=object)
grp = wallet.groupby("category")
grp # this is a group created by groupby method and on this obhect you do operations
<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7f3aacc81ba0>
grp['debit'].sum() 
category
Books      4929.750393
Food       8281.189173
Music      4233.451868
Travel     6052.931876
Utility    7562.267233
Name: debit, dtype: float64
grp.sum(numeric_only=True)
debit
category
Books 4929.750393
Food 8281.189173
Music 4233.451868
Travel 6052.931876
Utility 7562.267233
grp['debit'].max() 
category
Books      498.100496
Food       489.143483
Music      421.207327
Travel     494.124399
Utility    499.858182
Name: debit, dtype: float64
grp['debit'].min()
category
Books      109.325909
Food       100.455501
Music      101.573276
Travel     117.588729
Utility    103.680791
Name: debit, dtype: float64
grp['debit'].std()
category
Books      137.793128
Food       119.856277
Music      105.824988
Travel      96.400518
Utility    119.148546
Name: debit, dtype: float64
grp['debit'].mean()
category
Books      352.125028
Food       306.710710
Music      264.590742
Travel     378.308242
Utility    280.083972
Name: debit, dtype: float64

problem

Find out total amount spent on flipcart.

Reading tab seperated file

%%file data.tsv
symbol  day price
IBM Monday  111.71436961893693
IBM Tuesday 141.21220022208635
IBM Wednesday   112.40571010053796
IBM Thursday    137.54133351926248
IBM Friday  140.25154281801224
MICROSOFT   Monday  235.0403622499107
MICROSOFT   Tuesday 225.0206535036475
MICROSOFT   Wednesday   216.10342426936444
MICROSOFT   Thursday    200.38038844494193
MICROSOFT   Friday  235.80850482793264
APPLE   Monday  321.49182055844256
APPLE   Tuesday 340.63612771662815
APPLE   Wednesday   303.9065277507285
APPLE   Thursday    338.1350605764038
APPLE   Friday  318.3912296144338
Overwriting data.tsv
pd.read_csv("data.tsv")
symbol\tday\tprice
0 IBM\tMonday\t111.71436961893693
1 IBM\tTuesday\t141.21220022208635
2 IBM\tWednesday\t112.40571010053796
3 IBM\tThursday\t137.54133351926248
4 IBM\tFriday\t140.25154281801224
5 MICROSOFT\tMonday\t235.0403622499107
6 MICROSOFT\tTuesday\t225.0206535036475
7 MICROSOFT\tWednesday\t216.10342426936444
8 MICROSOFT\tThursday\t200.38038844494193
9 MICROSOFT\tFriday\t235.80850482793264
10 APPLE\tMonday\t321.49182055844256
11 APPLE\tTuesday\t340.63612771662815
12 APPLE\tWednesday\t303.9065277507285
13 APPLE\tThursday\t338.1350605764038
14 APPLE\tFriday\t318.3912296144338
pd.read_csv("data.tsv", delimiter="\t") 
symbol day price
0 IBM Monday 111.714370
1 IBM Tuesday 141.212200
2 IBM Wednesday 112.405710
3 IBM Thursday 137.541334
4 IBM Friday 140.251543
5 MICROSOFT Monday 235.040362
6 MICROSOFT Tuesday 225.020654
7 MICROSOFT Wednesday 216.103424
8 MICROSOFT Thursday 200.380388
9 MICROSOFT Friday 235.808505
10 APPLE Monday 321.491821
11 APPLE Tuesday 340.636128
12 APPLE Wednesday 303.906528
13 APPLE Thursday 338.135061
14 APPLE Friday 318.391230
wallet.groupby("description").sum(numeric_only=True).loc['Flipcart']
debit    2503.255216
Name: Flipcart, dtype: float64
wallet.groupby("description").sum(numeric_only=True).loc['Flipcart', 'debit'] # indentifier for row and column in .loc
2503.255215647378
wallet.groupby("description")['debit'].sum().loc['Flipcart']
2503.255215647378
wallet.groupby("description").sum(numeric_only=True)
debit
description
Amazon 2504.690567
Amazon Kindle 1389.052376
Auto 2210.428935
Electricity 2885.064355
Flipcart 2503.255216
Hotel 2752.174732
Metro 1216.463665
Netflix 1546.567562
Phone 4677.202878
Swiggy 1936.495366
Taxi 2626.039276
Zomato 3592.519075
spotify 1219.636541
grpdescsum = wallet.groupby("description").sum(numeric_only=True)
grpdescsum # this is a dataframe with index/labels coming from description column
debit
description
Amazon 2504.690567
Amazon Kindle 1389.052376
Auto 2210.428935
Electricity 2885.064355
Flipcart 2503.255216
Hotel 2752.174732
Metro 1216.463665
Netflix 1546.567562
Phone 4677.202878
Swiggy 1936.495366
Taxi 2626.039276
Zomato 3592.519075
spotify 1219.636541
grpdescsum.description
AttributeError: 'DataFrame' object has no attribute 'description'
grpdescsum.reset_index() 
description debit
0 Amazon 2504.690567
1 Amazon Kindle 1389.052376
2 Auto 2210.428935
3 Electricity 2885.064355
4 Flipcart 2503.255216
5 Hotel 2752.174732
6 Metro 1216.463665
7 Netflix 1546.567562
8 Phone 4677.202878
9 Swiggy 1936.495366
10 Taxi 2626.039276
11 Zomato 3592.519075
12 spotify 1219.636541

combining data from two dataframes

%%file stocks1.csv
symbol,value,high,low
APPLE,421,422.0,420.4
AT&T,328,328.0,335.0
IBM,123,124.5,120.0
NIKE,234,235.5,230.0
Writing stocks1.csv
%%file stocks2.csv
symbol,value,high,low
AGILENT,521,522.0,520.4
XEROX,428,428.0,435.0
TESLA,623,624.5,620.0
Writing stocks2.csv
stocks1 = pd.read_csv("stocks1.csv")
stocks2 = pd.read_csv("stocks2.csv")
stocks1
symbol value high low
0 APPLE 421 422.0 420.4
1 AT&T 328 328.0 335.0
2 IBM 123 124.5 120.0
3 NIKE 234 235.5 230.0
stocks2
symbol value high low
0 AGILENT 521 522.0 520.4
1 XEROX 428 428.0 435.0
2 TESLA 623 624.5 620.0
pd.concat([stocks1, stocks2])
symbol value high low
0 APPLE 421 422.0 420.4
1 AT&T 328 328.0 335.0
2 IBM 123 124.5 120.0
3 NIKE 234 235.5 230.0
0 AGILENT 521 522.0 520.4
1 XEROX 428 428.0 435.0
2 TESLA 623 624.5 620.0
labels = ["APPLE","AT&T","IBM","NIKE"]
value = pd.Series([234.5, 221.6, 125.7, 100.5], index=labels)
high = pd.Series([240.32, 222.5, 127.3, 105.0], index=labels)
low = pd.Series([233.0, 220.0, 123.0, 104.0], index=labels)
volume = pd.Series([100, 200, 50, 1000], index=labels)
stocks = pd.DataFrame({"value":value, "high":high, "low":low, "volume":volume})
stocks
value high low volume
APPLE 234.5 240.32 233.0 100
AT&T 221.6 222.50 220.0 200
IBM 125.7 127.30 123.0 50
NIKE 100.5 105.00 104.0 1000
stocks1 = pd.DataFrame({
            "value" : [125, 500.0, 300.4, 423.9],
            "low" : [125.0, 490.0, 299.5, 421.1],
            "high" : [130.0, 500.0, 305.0, 425.5],
            "volume" : [123, 50, 100, 80]},
            index = ["BELL","XEROX","FORD","TESLA"])
stocks1
value low high volume
BELL 125.0 125.0 130.0 123
XEROX 500.0 490.0 500.0 50
FORD 300.4 299.5 305.0 100
TESLA 423.9 421.1 425.5 80
pd.concat(stocks, stocks1)
TypeError: concat() takes 1 positional argument but 2 were given
pd.concat([stocks, stocks1])
value high low volume
APPLE 234.5 240.32 233.0 100
AT&T 221.6 222.50 220.0 200
IBM 125.7 127.30 123.0 50
NIKE 100.5 105.00 104.0 1000
BELL 125.0 130.00 125.0 123
XEROX 500.0 500.00 490.0 50
FORD 300.4 305.00 299.5 100
TESLA 423.9 425.50 421.1 80
df = pd.concat([stocks, stocks1])
df
value high low volume
APPLE 234.5 240.32 233.0 100
AT&T 221.6 222.50 220.0 200
IBM 125.7 127.30 123.0 50
NIKE 100.5 105.00 104.0 1000
BELL 125.0 130.00 125.0 123
XEROX 500.0 500.00 490.0 50
FORD 300.4 305.00 299.5 100
TESLA 423.9 425.50 421.1 80
df.index.name="symbol"
df
value high low volume
symbol
APPLE 234.5 240.32 233.0 100
AT&T 221.6 222.50 220.0 200
IBM 125.7 127.30 123.0 50
NIKE 100.5 105.00 104.0 1000
BELL 125.0 130.00 125.0 123
XEROX 500.0 500.00 490.0 50
FORD 300.4 305.00 299.5 100
TESLA 423.9 425.50 421.1 80
df1 = df[['value', 'volume']]
df1
value volume
symbol
APPLE 234.5 100
AT&T 221.6 200
IBM 125.7 50
NIKE 100.5 1000
BELL 125.0 123
XEROX 500.0 50
FORD 300.4 100
TESLA 423.9 80
df2 = df[['high', 'low']]
df2
high low
symbol
APPLE 240.32 233.0
AT&T 222.50 220.0
IBM 127.30 123.0
NIKE 105.00 104.0
BELL 130.00 125.0
XEROX 500.00 490.0
FORD 305.00 299.5
TESLA 425.50 421.1
df1.join(df2) # by default join will  be done on index
value volume high low
symbol
APPLE 234.5 100 240.32 233.0
AT&T 221.6 200 222.50 220.0
IBM 125.7 50 127.30 123.0
NIKE 100.5 1000 105.00 104.0
BELL 125.0 123 130.00 125.0
XEROX 500.0 50 500.00 490.0
FORD 300.4 100 305.00 299.5
TESLA 423.9 80 425.50 421.1

Writing csv and excel files using pandas

stocks1
value low high volume
BELL 125.0 125.0 130.0 123
XEROX 500.0 490.0 500.0 50
FORD 300.4 299.5 305.0 100
TESLA 423.9 421.1 425.5 80
df
value high low volume
symbol
APPLE 234.5 240.32 233.0 100
AT&T 221.6 222.50 220.0 200
IBM 125.7 127.30 123.0 50
NIKE 100.5 105.00 104.0 1000
BELL 125.0 130.00 125.0 123
XEROX 500.0 500.00 490.0 50
FORD 300.4 305.00 299.5 100
TESLA 423.9 425.50 421.1 80
df1
value volume
symbol
APPLE 234.5 100
AT&T 221.6 200
IBM 125.7 50
NIKE 100.5 1000
BELL 125.0 123
XEROX 500.0 50
FORD 300.4 100
TESLA 423.9 80
df2
high low
symbol
APPLE 240.32 233.0
AT&T 222.50 220.0
IBM 127.30 123.0
NIKE 105.00 104.0
BELL 130.00 125.0
XEROX 500.00 490.0
FORD 305.00 299.5
TESLA 425.50 421.1
df1.to_csv("dataframe1.csv") # path of file
!cat dataframe1.csv
symbol,value,volume
APPLE,234.5,100
AT&T,221.6,200
IBM,125.7,50
NIKE,100.5,1000
BELL,125.0,123
XEROX,500.0,50
FORD,300.4,100
TESLA,423.9,80
value high low volume
symbol
APPLE 234.5 240.32 233.0 100
AT&T 221.6 222.50 220.0 200
IBM 125.7 127.30 123.0 50
NIKE 100.5 105.00 104.0 1000
BELL 125.0 130.00 125.0 123
XEROX 500.0 500.00 490.0 50
FORD 300.4 305.00 299.5 100
TESLA 423.9 425.50 421.1 80
df
value high low volume
symbol
APPLE 234.5 240.32 233.0 100
AT&T 221.6 222.50 220.0 200
IBM 125.7 127.30 123.0 50
NIKE 100.5 105.00 104.0 1000
BELL 125.0 130.00 125.0 123
XEROX 500.0 500.00 490.0 50
FORD 300.4 305.00 299.5 100
TESLA 423.9 425.50 421.1 80
import xlsxwriter

def write_stocks_in_excel(stocksdata, filename):
    workbook = xlsxwriter.Workbook(filename)
    worksheet = workbook.add_worksheet("stocks")
    bold = workbook.add_format({'bold':True})

    # write headers
    worksheet.write(0, 0, stocksdata.index.name, bold)
    for c,colname in enumerate(stocksdata.columns, start=1):
        worksheet.write(0, c, colname, bold)

    for r, symbol in enumerate(stocksdata.index, start=1):
        worksheet.write(r, 0, symbol, bold )

    for c, colname in enumerate(stocksdata.columns, start=1):
        for r, data in enumerate(stocksdata[colname], start=1):
            worksheet.write(r, c, data)

    workbook.close()
write_stocks_in_excel(df, "stock_data.xlsx")
df
value high low volume
symbol
APPLE 234.5 240.32 233.0 100
AT&T 221.6 222.50 220.0 200
IBM 125.7 127.30 123.0 50
NIKE 100.5 105.00 104.0 1000
BELL 125.0 130.00 125.0 123
XEROX 500.0 500.00 490.0 50
FORD 300.4 305.00 299.5 100
TESLA 423.9 425.50 421.1 80
df
value high low volume
symbol
APPLE 234.5 240.32 233.0 100
AT&T 221.6 222.50 220.0 200
IBM 125.7 127.30 123.0 50
NIKE 100.5 105.00 104.0 1000
BELL 125.0 130.00 125.0 123
XEROX 500.0 500.00 490.0 50
FORD 300.4 305.00 299.5 100
TESLA 423.9 425.50 421.1 80
writer = pd.ExcelWriter("exceldata.xlsx", engine='xlsxwriter')
df.to_excel(writer, sheet_name="data")
with pd.ExcelWriter("exceldata.xlsx", engine='xlsxwriter') as writer:
    df1.to_excel(writer, sheet_name="df1")
    df2.to_excel(writer, sheet_name="df2")
with pd.ExcelWriter("shifted_loc.xlsx", engine='xlsxwriter') as writer:
    df.to_excel(writer, sheet_name="data", startrow=2, startcol=2)