Pandas
You can install pandas using pip command
pip install pandas
reading data with pandas
%% file stockdata.csv
symbol,day,price
IBM,Monday,111.71436961893693
IBM,Tuesday,141.21220022208635
IBM,Wednesday,112.40571010053796
IBM,Thursday,137.54133351926248
IBM,Friday,140.25154281801224
MICROSOFT,Monday,235.0403622499107
MICROSOFT,Tuesday,225.0206535036475
MICROSOFT,Wednesday,216.10342426936444
MICROSOFT,Thursday,200.38038844494193
MICROSOFT,Friday,235.80850482793264
APPLE,Monday,321.49182055844256
APPLE,Tuesday,340.63612771662815
APPLE,Wednesday,303.9065277507285
APPLE,Thursday,338.1350605764038
APPLE,Friday,318.3912296144338
stockdata = pd.read_csv("stockdata.csv" )
0
IBM
Monday
111.714370
1
IBM
Tuesday
141.212200
2
IBM
Wednesday
112.405710
3
IBM
Thursday
137.541334
4
IBM
Friday
140.251543
5
MICROSOFT
Monday
235.040362
6
MICROSOFT
Tuesday
225.020654
7
MICROSOFT
Wednesday
216.103424
8
MICROSOFT
Thursday
200.380388
9
MICROSOFT
Friday
235.808505
10
APPLE
Monday
321.491821
11
APPLE
Tuesday
340.636128
12
APPLE
Wednesday
303.906528
13
APPLE
Thursday
338.135061
14
APPLE
Friday
318.391230
csvurl = "https://raw.githubusercontent.com/vikipedia/python-trainings/master/online_course/source/module2/wallet.csv"
0
0
2021-03-07 14:53:28.377359
Music
Amazon
421.207327
1
1
2020-10-08 09:53:28.377359
Food
Swiggy
328.440080
2
2
2021-02-23 09:53:28.377359
Books
Amazon
244.679437
3
3
2020-11-01 14:53:28.377359
Utility
Phone
222.756318
4
4
2021-06-05 13:53:28.377359
Books
Flipcart
494.128492
...
...
...
...
...
...
95
95
2021-07-19 13:53:28.377359
Utility
Phone
388.671213
96
96
2021-01-12 19:53:28.377359
Books
Flipcart
467.554562
97
97
2021-03-25 11:53:28.377359
Utility
Phone
320.789434
98
98
2021-05-13 15:53:28.377359
Travel
Taxi
442.096469
99
99
2020-10-11 16:53:28.377359
Food
Hotel
100.455501
100 rows × 5 columns
excelurl = "https://raw.githubusercontent.com/vikipedia/python-trainings/master/online_course/source/module2/wallet.xlsx"
pd.read_excel(excelurl) # by default it will load first sheet
0
0
2021-03-07 14:53:28.377359
Music
Amazon
421.207327
1
1
2020-10-08 09:53:28.377359
Food
Swiggy
328.440080
2
2
2021-02-23 09:53:28.377359
Books
Amazon
244.679437
3
3
2020-11-01 14:53:28.377359
Utility
Phone
222.756318
4
4
2021-06-05 13:53:28.377359
Books
Flipcart
494.128492
...
...
...
...
...
...
95
95
2021-07-19 13:53:28.377359
Utility
Phone
388.671213
96
96
2021-01-12 19:53:28.377359
Books
Flipcart
467.554562
97
97
2021-03-25 11:53:28.377359
Utility
Phone
320.789434
98
98
2021-05-13 15:53:28.377359
Travel
Taxi
442.096469
99
99
2020-10-11 16:53:28.377359
Food
Hotel
100.455501
100 rows × 5 columns
w = pd.read_excel(excelurl, sheet_name= "wallet" )
0
0
2021-03-07 14:53:28.377359
Music
Amazon
421.207327
1
1
2020-10-08 09:53:28.377359
Food
Swiggy
328.440080
2
2
2021-02-23 09:53:28.377359
Books
Amazon
244.679437
3
3
2020-11-01 14:53:28.377359
Utility
Phone
222.756318
4
4
2021-06-05 13:53:28.377359
Books
Flipcart
494.128492
...
...
...
...
...
...
95
95
2021-07-19 13:53:28.377359
Utility
Phone
388.671213
96
96
2021-01-12 19:53:28.377359
Books
Flipcart
467.554562
97
97
2021-03-25 11:53:28.377359
Utility
Phone
320.789434
98
98
2021-05-13 15:53:28.377359
Travel
Taxi
442.096469
99
99
2020-10-11 16:53:28.377359
Food
Hotel
100.455501
100 rows × 5 columns
pd.read_excel(excelurl, sheet_name= "squrs" )
0
1
1
1
2
4
2
3
9
3
4
16
4
5
25
5
6
36
6
7
49
7
8
64
8
9
81
9
10
100
10
11
121
11
12
144
12
13
169
13
14
196
14
15
225
15
16
256
16
17
289
17
18
324
tables_moneycontrol = pd.read_html("https://www.moneycontrol.com/markets/indian-indices/" )
type (tables_moneycontrol)
0
No Data
No Data
No Data
No Data
No Data
0
TATA Cons. Prod
Plantations - Tea & Coffee
1218.9
-42.65
-3.38
0
Nestle
Food Processing
2616.30
55.35
2.16
1
Hindalco
Iron & Steel
535.35
8.40
1.59
2
BPCL
Refineries
634.15
9.45
1.51
3
Cipla
Pharmaceuticals
1506.35
21.10
1.42
4
Bharti Airtel
Telecommunications - Service
1214.85
15.15
1.26
5
Sun Pharma
Pharmaceuticals
1623.25
17.55
1.09
6
Bajaj Finserv
Finance - Investments
1599.70
16.90
1.07
7
SBI Life Insura
Life & Health Insurance
1525.65
15.25
1.01
8
Grasim
Diversified
2250.00
21.90
0.98
9
Britannia
Food Processing
4933.40
44.25
0.91
10
Adani Ports
Infrastructure - General
1335.05
9.60
0.72
11
Divis Labs
Pharmaceuticals
3626.55
24.95
0.69
12
Coal India
Mining & Minerals
461.20
2.60
0.57
13
Dr Reddys Labs
Pharmaceuticals
6358.95
34.80
0.55
14
Apollo Hospital
Hospitals & Medical Services
6072.40
23.60
0.39
15
Adani Enterpris
Trading
3239.00
12.45
0.39
16
HDFC Life
Life & Health Insurance
624.20
2.25
0.36
17
UltraTechCement
Cement - Major
9705.80
32.65
0.34
18
ITC
Diversified
414.95
1.40
0.34
19
Axis Bank
Banks - Private Sector
1115.00
3.55
0.32
20
Reliance
Refineries
2964.25
6.40
0.22
21
Eicher Motors
Auto - LCVs & HCVs
3793.00
7.75
0.20
22
TCS
Computers - Software
4111.00
2.40
0.06
23
Titan Company
Miscellaneous
3787.05
0.15
0.00
24
Asian Paints
Paints & Varnishes
2870.45
-0.10
0.00
0
No Data
No Data
No Data
No Data
No Data
0
No Data
No Data
No Data
No Data
No Data
DataFrame/Series
0
IBM
Monday
111.714370
1
IBM
Tuesday
141.212200
2
IBM
Wednesday
112.405710
3
IBM
Thursday
137.541334
4
IBM
Friday
140.251543
5
MICROSOFT
Monday
235.040362
6
MICROSOFT
Tuesday
225.020654
7
MICROSOFT
Wednesday
216.103424
8
MICROSOFT
Thursday
200.380388
9
MICROSOFT
Friday
235.808505
10
APPLE
Monday
321.491821
11
APPLE
Tuesday
340.636128
12
APPLE
Wednesday
303.906528
13
APPLE
Thursday
338.135061
14
APPLE
Friday
318.391230
stockdata['symbol' ] # like a dictinary! you can give column name in square bracket
0 IBM
1 IBM
2 IBM
3 IBM
4 IBM
5 MICROSOFT
6 MICROSOFT
7 MICROSOFT
8 MICROSOFT
9 MICROSOFT
10 APPLE
11 APPLE
12 APPLE
13 APPLE
14 APPLE
Name: symbol, dtype: object
stockdata.price # you can also access column like class attribute
0 111.714370
1 141.212200
2 112.405710
3 137.541334
4 140.251543
5 235.040362
6 225.020654
7 216.103424
8 200.380388
9 235.808505
10 321.491821
11 340.636128
12 303.906528
13 338.135061
14 318.391230
Name: price, dtype: float64
0 111.714370
1 141.212200
2 112.405710
3 137.541334
4 140.251543
5 235.040362
6 225.020654
7 216.103424
8 200.380388
9 235.808505
10 321.491821
11 340.636128
12 303.906528
13 338.135061
14 318.391230
Name: price, dtype: float64
%% file columns_with_space.csv
symbol,day,Today's price
IBM,Monday,111.71436961893693
IBM,Tuesday,141.21220022208635
IBM,Wednesday,112.40571010053796
IBM,Thursday,137.54133351926248
IBM,Friday,140.25154281801224
MICROSOFT,Monday,235.0403622499107
MICROSOFT,Tuesday,225.0206535036475
MICROSOFT,Wednesday,216.10342426936444
MICROSOFT,Thursday,200.38038844494193
MICROSOFT,Friday,235.80850482793264
APPLE,Monday,321.49182055844256
APPLE,Tuesday,340.63612771662815
APPLE,Wednesday,303.9065277507285
APPLE,Thursday,338.1350605764038
APPLE,Friday,318.3912296144338
Writing columns_with_space.csv
df = pd.read_csv("columns_with_space.csv" )
0
IBM
Monday
111.714370
1
IBM
Tuesday
141.212200
2
IBM
Wednesday
112.405710
3
IBM
Thursday
137.541334
4
IBM
Friday
140.251543
5
MICROSOFT
Monday
235.040362
6
MICROSOFT
Tuesday
225.020654
7
MICROSOFT
Wednesday
216.103424
8
MICROSOFT
Thursday
200.380388
9
MICROSOFT
Friday
235.808505
10
APPLE
Monday
321.491821
11
APPLE
Tuesday
340.636128
12
APPLE
Wednesday
303.906528
13
APPLE
Thursday
338.135061
14
APPLE
Friday
318.391230
0 IBM
1 IBM
2 IBM
3 IBM
4 IBM
5 MICROSOFT
6 MICROSOFT
7 MICROSOFT
8 MICROSOFT
9 MICROSOFT
10 APPLE
11 APPLE
12 APPLE
13 APPLE
14 APPLE
Name: symbol, dtype: object
SyntaxError: unterminated string literal (detected at line 1) (2692468768.py, line 1)
0 111.714370
1 141.212200
2 112.405710
3 137.541334
4 140.251543
5 235.040362
6 225.020654
7 216.103424
8 200.380388
9 235.808505
10 321.491821
11 340.636128
12 303.906528
13 338.135061
14 318.391230
Name: Today's price, dtype: float64
columnname = "Today's price"
0 111.714370
1 141.212200
2 112.405710
3 137.541334
4 140.251543
5 235.040362
6 225.020654
7 216.103424
8 200.380388
9 235.808505
10 321.491821
11 340.636128
12 303.906528
13 338.135061
14 318.391230
Name: Today's price, dtype: float64
df.columnname # you can't make use variables here
AttributeError: 'DataFrame' object has no attribute 'columnname'
0 111.714370
1 141.212200
2 112.405710
3 137.541334
4 140.251543
5 235.040362
6 225.020654
7 216.103424
8 200.380388
9 235.808505
10 321.491821
11 340.636128
12 303.906528
13 338.135061
14 318.391230
Name: Today's price, dtype: float64
0
IBM
Monday
111.714370
1
IBM
Tuesday
141.212200
2
IBM
Wednesday
112.405710
3
IBM
Thursday
137.541334
4
IBM
Friday
140.251543
5
MICROSOFT
Monday
235.040362
6
MICROSOFT
Tuesday
225.020654
7
MICROSOFT
Wednesday
216.103424
8
MICROSOFT
Thursday
200.380388
9
MICROSOFT
Friday
235.808505
10
APPLE
Monday
321.491821
11
APPLE
Tuesday
340.636128
12
APPLE
Wednesday
303.906528
13
APPLE
Thursday
338.135061
14
APPLE
Friday
318.391230
csvurl = "https://raw.githubusercontent.com/vikipedia/python-trainings/master/online_course/source/module2/wallet.csv"
wallet = pd.read_csv(csvurl)
0
0
2021-03-07 14:53:28.377359
Music
Amazon
421.207327
1
1
2020-10-08 09:53:28.377359
Food
Swiggy
328.440080
2
2
2021-02-23 09:53:28.377359
Books
Amazon
244.679437
3
3
2020-11-01 14:53:28.377359
Utility
Phone
222.756318
4
4
2021-06-05 13:53:28.377359
Books
Flipcart
494.128492
0
0
2021-03-07 14:53:28.377359
Music
Amazon
421.207327
1
1
2020-10-08 09:53:28.377359
Food
Swiggy
328.440080
2
2
2021-02-23 09:53:28.377359
Books
Amazon
244.679437
3
3
2020-11-01 14:53:28.377359
Utility
Phone
222.756318
4
4
2021-06-05 13:53:28.377359
Books
Flipcart
494.128492
5
5
2021-07-28 19:53:28.377359
Utility
Electricity
219.941711
6
6
2021-04-16 11:53:28.377359
Books
Amazon Kindle
270.322595
7
7
2021-02-15 10:53:28.377359
Food
Zomato
457.183104
8
8
2021-08-10 19:53:28.377359
Utility
Phone
151.496373
9
9
2020-11-29 14:53:28.377359
Travel
Auto
443.618884
wallet.tail()# show last 5 entries in the table
95
95
2021-07-19 13:53:28.377359
Utility
Phone
388.671213
96
96
2021-01-12 19:53:28.377359
Books
Flipcart
467.554562
97
97
2021-03-25 11:53:28.377359
Utility
Phone
320.789434
98
98
2021-05-13 15:53:28.377359
Travel
Taxi
442.096469
99
99
2020-10-11 16:53:28.377359
Food
Hotel
100.455501
85
85
2021-01-12 09:53:28.377359
Music
Amazon
130.374908
86
86
2021-05-07 16:53:28.377359
Food
Zomato
198.450672
87
87
2021-05-19 15:53:28.377359
Food
Zomato
378.820641
88
88
2021-04-18 09:53:28.377359
Utility
Phone
124.221248
89
89
2021-04-12 14:53:28.377359
Music
Amazon
218.487173
90
90
2020-12-01 14:53:28.377359
Music
Amazon
101.573276
91
91
2021-01-22 17:53:28.377359
Food
Hotel
232.663468
92
92
2021-01-12 19:53:28.377359
Travel
Taxi
356.842638
93
93
2021-01-11 09:53:28.377359
Utility
Electricity
111.720809
94
94
2021-01-04 13:53:28.377359
Utility
Phone
431.185537
95
95
2021-07-19 13:53:28.377359
Utility
Phone
388.671213
96
96
2021-01-12 19:53:28.377359
Books
Flipcart
467.554562
97
97
2021-03-25 11:53:28.377359
Utility
Phone
320.789434
98
98
2021-05-13 15:53:28.377359
Travel
Taxi
442.096469
99
99
2020-10-11 16:53:28.377359
Food
Hotel
100.455501
0
IBM
Monday
111.714370
1
IBM
Tuesday
141.212200
2
IBM
Wednesday
112.405710
3
IBM
Thursday
137.541334
4
IBM
Friday
140.251543
5
MICROSOFT
Monday
235.040362
6
MICROSOFT
Tuesday
225.020654
7
MICROSOFT
Wednesday
216.103424
8
MICROSOFT
Thursday
200.380388
9
MICROSOFT
Friday
235.808505
10
APPLE
Monday
321.491821
11
APPLE
Tuesday
340.636128
12
APPLE
Wednesday
303.906528
13
APPLE
Thursday
338.135061
14
APPLE
Friday
318.391230
stockdata.info() # show basic information of all columns from this dataframe
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15 entries, 0 to 14
Data columns (total 3 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 symbol 15 non-null object
1 day 15 non-null object
2 price 15 non-null float64
dtypes: float64(1), object(2)
memory usage: 488.0+ bytes
0 111.714370
1 141.212200
2 112.405710
3 137.541334
4 140.251543
5 235.040362
6 225.020654
7 216.103424
8 200.380388
9 235.808505
10 321.491821
11 340.636128
12 303.906528
13 338.135061
14 318.391230
Name: price, dtype: float64
Filtering
0
IBM
Monday
111.714370
1
IBM
Tuesday
141.212200
2
IBM
Wednesday
112.405710
3
IBM
Thursday
137.541334
4
IBM
Friday
140.251543
5
MICROSOFT
Monday
235.040362
6
MICROSOFT
Tuesday
225.020654
7
MICROSOFT
Wednesday
216.103424
8
MICROSOFT
Thursday
200.380388
9
MICROSOFT
Friday
235.808505
10
APPLE
Monday
321.491821
11
APPLE
Tuesday
340.636128
12
APPLE
Wednesday
303.906528
13
APPLE
Thursday
338.135061
14
APPLE
Friday
318.391230
stockdata[stockdata.symbol== "IBM" ]
0
IBM
Monday
111.714370
1
IBM
Tuesday
141.212200
2
IBM
Wednesday
112.405710
3
IBM
Thursday
137.541334
4
IBM
Friday
140.251543
ibm = stockdata[stockdata.symbol== "IBM" ]
0
IBM
Monday
111.714370
1
IBM
Tuesday
141.212200
2
IBM
Wednesday
112.405710
3
IBM
Thursday
137.541334
4
IBM
Friday
140.251543
0 111.714370
1 141.212200
2 112.405710
3 137.541334
4 140.251543
Name: price, dtype: float64
problem
Write a function weekly_average which will take stock dataframe and stock name as argument and finds weekely average for that stock
>>> weekly_avearge(stockdata, "IBM")
128.62503125576717
def weekly_average(stockdata, stockname):
subset = stockdata[stockdata.symbol == stockname]
return subset.price.mean()
weekly_average(stockdata, "APPLE" )
weekly_average(stockdata, "IBM" )
weekly_average(stockdata, "MICROSOFT" )
prices = pd.Series([111.714370 , 141.212200 , 112.405710 , 137.541334 , 140.251543 ])
0 111.714370
1 141.212200
2 112.405710
3 137.541334
4 140.251543
dtype: float64
0 False
1 True
2 False
3 True
4 True
dtype: bool
0 False
1 True
2 False
3 True
4 True
dtype: bool
prices[gtthan120] # this will filter out only those prices which are greater than 120
1 141.212200
3 137.541334
4 140.251543
dtype: float64
0 IBM
1 IBM
2 IBM
3 IBM
4 IBM
5 MICROSOFT
6 MICROSOFT
7 MICROSOFT
8 MICROSOFT
9 MICROSOFT
10 APPLE
11 APPLE
12 APPLE
13 APPLE
14 APPLE
Name: symbol, dtype: object
stockdata.symbol == "IBM"
0 True
1 True
2 True
3 True
4 True
5 False
6 False
7 False
8 False
9 False
10 False
11 False
12 False
13 False
14 False
Name: symbol, dtype: bool
flagseries = stockdata.symbol == "IBM"
stockdata[flagseries] # it will take only those lines for which the flatseries is True
0
IBM
Monday
111.714370
1
IBM
Tuesday
141.212200
2
IBM
Wednesday
112.405710
3
IBM
Thursday
137.541334
4
IBM
Friday
140.251543
cond = (stockdata.symbol == "IBM" ) & (stockdata.price > 120 )
0 False
1 True
2 False
3 True
4 True
5 False
6 False
7 False
8 False
9 False
10 False
11 False
12 False
13 False
14 False
dtype: bool
1
IBM
Tuesday
141.212200
3
IBM
Thursday
137.541334
4
IBM
Friday
140.251543
tables_moneycontrol = pd.read_html("https://www.moneycontrol.com/markets/indian-indices/" )
indices_data = tables_moneycontrol[3 ]
0
Nestle
Food Processing
2618.05
57.10
2.23
1
Hindalco
Iron & Steel
536.90
9.95
1.89
2
Grasim
Diversified
2262.80
34.70
1.56
3
BPCL
Refineries
633.50
8.80
1.41
4
Cipla
Pharmaceuticals
1503.50
18.25
1.23
5
Sun Pharma
Pharmaceuticals
1625.20
19.50
1.21
6
Dr Reddys Labs
Pharmaceuticals
6399.15
75.00
1.19
7
Bharti Airtel
Telecommunications - Service
1213.45
13.75
1.15
8
Bajaj Finserv
Finance - Investments
1600.80
18.00
1.14
9
Adani Ports
Infrastructure - General
1337.70
12.25
0.92
10
Divis Labs
Pharmaceuticals
3628.80
27.20
0.76
11
UltraTechCement
Cement - Major
9743.70
70.55
0.73
12
Coal India
Mining & Minerals
461.15
2.55
0.56
13
HCL Tech
Computers - Software
1646.50
8.85
0.54
14
Britannia
Food Processing
4912.15
23.00
0.47
15
SBI Life Insura
Life & Health Insurance
1516.65
6.25
0.41
16
TCS
Computers - Software
4122.65
14.05
0.34
17
Adani Enterpris
Trading
3237.40
10.85
0.34
18
Apollo Hospital
Hospitals & Medical Services
6066.90
18.10
0.30
19
Eicher Motors
Auto - LCVs & HCVs
3790.15
4.90
0.13
20
Reliance
Refineries
2960.00
2.15
0.07
21
ITC
Diversified
413.55
0.00
0.00
change1percent = indices_data["%Chg" ] >= 1
indices_data[change1percent]
0
Nestle
Food Processing
2618.05
57.10
2.23
1
Hindalco
Iron & Steel
536.90
9.95
1.89
2
Grasim
Diversified
2262.80
34.70
1.56
3
BPCL
Refineries
633.50
8.80
1.41
4
Cipla
Pharmaceuticals
1503.50
18.25
1.23
5
Sun Pharma
Pharmaceuticals
1625.20
19.50
1.21
6
Dr Reddys Labs
Pharmaceuticals
6399.15
75.00
1.19
7
Bharti Airtel
Telecommunications - Service
1213.45
13.75
1.15
8
Bajaj Finserv
Finance - Investments
1600.80
18.00
1.14
indices_data[indices_data.Sector == "Pharmaceuticals" ]
4
Cipla
Pharmaceuticals
1503.50
18.25
1.23
5
Sun Pharma
Pharmaceuticals
1625.20
19.50
1.21
6
Dr Reddys Labs
Pharmaceuticals
6399.15
75.00
1.19
10
Divis Labs
Pharmaceuticals
3628.80
27.20
0.76
indices_data[indices_data.Sector.str .contains("Pharma" )]
4
Cipla
Pharmaceuticals
1503.50
18.25
1.23
5
Sun Pharma
Pharmaceuticals
1625.20
19.50
1.21
6
Dr Reddys Labs
Pharmaceuticals
6399.15
75.00
1.19
10
Divis Labs
Pharmaceuticals
3628.80
27.20
0.76
text = "text data ..some text ... to test"
indices_data[indices_data.Sector.str .contains("Pharma" )]
4
Cipla
Pharmaceuticals
1503.50
18.25
1.23
5
Sun Pharma
Pharmaceuticals
1625.20
19.50
1.21
6
Dr Reddys Labs
Pharmaceuticals
6399.15
75.00
1.19
10
Divis Labs
Pharmaceuticals
3628.80
27.20
0.76
indices_data[indices_data.Sector.str .contains("Food" )]
0
Nestle
Food Processing
2618.05
57.1
2.23
14
Britannia
Food Processing
4912.15
23.0
0.47
0
Nestle
Food Processing
2618.05
57.10
2.23
1
Hindalco
Iron & Steel
536.90
9.95
1.89
2
Grasim
Diversified
2262.80
34.70
1.56
3
BPCL
Refineries
633.50
8.80
1.41
4
Cipla
Pharmaceuticals
1503.50
18.25
1.23
5
Sun Pharma
Pharmaceuticals
1625.20
19.50
1.21
6
Dr Reddys Labs
Pharmaceuticals
6399.15
75.00
1.19
7
Bharti Airtel
Telecommunications - Service
1213.45
13.75
1.15
8
Bajaj Finserv
Finance - Investments
1600.80
18.00
1.14
9
Adani Ports
Infrastructure - General
1337.70
12.25
0.92
10
Divis Labs
Pharmaceuticals
3628.80
27.20
0.76
11
UltraTechCement
Cement - Major
9743.70
70.55
0.73
12
Coal India
Mining & Minerals
461.15
2.55
0.56
13
HCL Tech
Computers - Software
1646.50
8.85
0.54
14
Britannia
Food Processing
4912.15
23.00
0.47
15
SBI Life Insura
Life & Health Insurance
1516.65
6.25
0.41
16
TCS
Computers - Software
4122.65
14.05
0.34
17
Adani Enterpris
Trading
3237.40
10.85
0.34
18
Apollo Hospital
Hospitals & Medical Services
6066.90
18.10
0.30
19
Eicher Motors
Auto - LCVs & HCVs
3790.15
4.90
0.13
20
Reliance
Refineries
2960.00
2.15
0.07
21
ITC
Diversified
413.55
0.00
0.00
0
Tata Steel
Iron & Steel
153.75
-3.50
-2.23
1
Bajaj Auto
Auto - 2 & 3 Wheelers
8695.05
-184.00
-2.07
2
IndusInd Bank
Banks - Private Sector
1539.75
-24.00
-1.53
3
Power Grid Corp
Power - Generation & Distribution
288.20
-4.45
-1.52
4
Infosys
Computers - Software
1592.85
-23.60
-1.46
5
Tata Motors
Auto - LCVs & HCVs
1027.00
-12.30
-1.18
6
Kotak Mahindra
Banks - Private Sector
1729.00
-19.05
-1.09
7
HDFC Bank
Banks - Private Sector
1430.85
-15.25
-1.05
8
UPL
Chemicals
477.45
-4.85
-1.01
9
Maruti Suzuki
Auto - Cars & Jeeps
11394.05
-116.00
-1.01
10
Larsen
Infrastructure - General
3649.00
-27.95
-0.76
11
ONGC
Oil Drilling And Exploration
276.10
-2.00
-0.72
12
NTPC
Power - Generation & Distribution
349.90
-2.30
-0.65
13
SBI
Banks - Public Sector
783.00
-5.05
-0.64
14
Hero Motocorp
Auto - 2 & 3 Wheelers
4623.00
-29.90
-0.64
15
M&M
Auto - Cars & Jeeps
1885.85
-11.70
-0.62
16
ICICI Bank
Banks - Private Sector
1081.40
-6.55
-0.60
17
HUL
Personal Care
2405.55
-14.00
-0.58
18
LTIMindtree
Computers - Software
5099.10
-23.60
-0.46
19
JSW Steel
Steel - Large
826.50
-3.50
-0.42
20
Wipro
Computers - Software
513.85
-1.65
-0.32
21
HDFC Life
Life & Health Insurance
620.15
-1.80
-0.29
22
Axis Bank
Banks - Private Sector
1108.70
-2.75
-0.25
23
Tech Mahindra
Computers - Software
1285.00
-3.15
-0.24
24
Asian Paints
Paints & Varnishes
2864.20
-6.35
-0.22
25
Titan Company
Miscellaneous
3780.00
-6.90
-0.18
26
Bajaj Finance
Finance - NBFC
6411.25
-9.80
-0.15
0
No Data
No Data
No Data
No Data
No Data
Accesing Items from Series
s1 = pd.Series([421 , 328 , 123 , 234 ])
0 421
1 328
2 123
3 234
dtype: int64
stocks = pd.Series([421 , 328 , 123 , 234 ], index= ["APPLE" ,"AT&T" ,"IBM" ,"NIKE" ])
APPLE 421
AT&T 328
IBM 123
NIKE 234
dtype: int64
stocks['APPLE' ] # this label and value behaves just like dictionary
/tmp/ipykernel_2581436/1593074069.py:1: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
stocks[0]
stocks.iloc[3 ] # iloc -> location by index
stocks.loc['APPLE' ] # loc -> location by label
labels= ["APPLE" ,"AT&T" ,"IBM" ,"NIKE" ]
stocks = pd.DataFrame({"value" : [421 , 328 , 123 , 234 ],
"high" : [422.0 , 328 , 124.5 , 235.5 ],
"low" : [420.4 , 335 , 120 , 230 ]} , index= labels)
APPLE
421
422.0
420.4
AT&T
328
328.0
335.0
IBM
123
124.5
120.0
NIKE
234
235.5
230.0
stocks['APPLE' ] # because we give column names here!
stocks.loc['APPLE' ] # row with label 'APPLE'
value 421.0
high 422.0
low 420.4
Name: APPLE, dtype: float64
value 234.0
high 235.5
low 230.0
Name: NIKE, dtype: float64
stocks.loc[['APPLE' ,'NIKE' ]]
APPLE
421
422.0
420.4
NIKE
234
235.5
230.0
stocks.loc[['APPLE' ,'NIKE' ], ['high' ,'low' ]]
APPLE
422.0
420.4
NIKE
235.5
230.0
stocks.iloc[0 ] # give zeroth row
value 421.0
high 422.0
low 420.4
Name: APPLE, dtype: float64
stocks.iloc[2 :] # drop first two row, give remaining
IBM
123
124.5
120.0
NIKE
234
235.5
230.0
stocks.iloc[2 :, :2 ] # drop frist two rows but take first two columns
IBM
123
124.5
NIKE
234
235.5
0 421
1 328
2 123
3 234
dtype: int64
0 True
1 True
2 True
3 True
dtype: bool
0 423
1 330
2 125
3 236
dtype: int64
0 421
1 328
2 123
3 234
dtype: int64
Groupby
del wallet['Unnamed: 0' ] # you can delete a column from dataframe
0
2021-03-07 14:53:28.377359
Music
Amazon
421.207327
1
2020-10-08 09:53:28.377359
Food
Swiggy
328.440080
2
2021-02-23 09:53:28.377359
Books
Amazon
244.679437
3
2020-11-01 14:53:28.377359
Utility
Phone
222.756318
4
2021-06-05 13:53:28.377359
Books
Flipcart
494.128492
...
...
...
...
...
95
2021-07-19 13:53:28.377359
Utility
Phone
388.671213
96
2021-01-12 19:53:28.377359
Books
Flipcart
467.554562
97
2021-03-25 11:53:28.377359
Utility
Phone
320.789434
98
2021-05-13 15:53:28.377359
Travel
Taxi
442.096469
99
2020-10-11 16:53:28.377359
Food
Hotel
100.455501
100 rows × 4 columns
array(['Music', 'Food', 'Books', 'Utility', 'Travel'], dtype=object)
grp = wallet.groupby("category" )
grp # this is a group created by groupby method and on this obhect you do operations
<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7f3aacc81ba0>
category
Books 4929.750393
Food 8281.189173
Music 4233.451868
Travel 6052.931876
Utility 7562.267233
Name: debit, dtype: float64
grp.sum (numeric_only= True )
category
Books
4929.750393
Food
8281.189173
Music
4233.451868
Travel
6052.931876
Utility
7562.267233
category
Books 498.100496
Food 489.143483
Music 421.207327
Travel 494.124399
Utility 499.858182
Name: debit, dtype: float64
category
Books 109.325909
Food 100.455501
Music 101.573276
Travel 117.588729
Utility 103.680791
Name: debit, dtype: float64
category
Books 137.793128
Food 119.856277
Music 105.824988
Travel 96.400518
Utility 119.148546
Name: debit, dtype: float64
category
Books 352.125028
Food 306.710710
Music 264.590742
Travel 378.308242
Utility 280.083972
Name: debit, dtype: float64
problem
Find out total amount spent on flipcart.
Reading tab seperated file
%% file data.tsv
symbol day price
IBM Monday 111.71436961893693
IBM Tuesday 141.21220022208635
IBM Wednesday 112.40571010053796
IBM Thursday 137.54133351926248
IBM Friday 140.25154281801224
MICROSOFT Monday 235.0403622499107
MICROSOFT Tuesday 225.0206535036475
MICROSOFT Wednesday 216.10342426936444
MICROSOFT Thursday 200.38038844494193
MICROSOFT Friday 235.80850482793264
APPLE Monday 321.49182055844256
APPLE Tuesday 340.63612771662815
APPLE Wednesday 303.9065277507285
APPLE Thursday 338.1350605764038
APPLE Friday 318.3912296144338
0
IBM\tMonday\t111.71436961893693
1
IBM\tTuesday\t141.21220022208635
2
IBM\tWednesday\t112.40571010053796
3
IBM\tThursday\t137.54133351926248
4
IBM\tFriday\t140.25154281801224
5
MICROSOFT\tMonday\t235.0403622499107
6
MICROSOFT\tTuesday\t225.0206535036475
7
MICROSOFT\tWednesday\t216.10342426936444
8
MICROSOFT\tThursday\t200.38038844494193
9
MICROSOFT\tFriday\t235.80850482793264
10
APPLE\tMonday\t321.49182055844256
11
APPLE\tTuesday\t340.63612771662815
12
APPLE\tWednesday\t303.9065277507285
13
APPLE\tThursday\t338.1350605764038
14
APPLE\tFriday\t318.3912296144338
pd.read_csv("data.tsv" , delimiter= " \t " )
0
IBM
Monday
111.714370
1
IBM
Tuesday
141.212200
2
IBM
Wednesday
112.405710
3
IBM
Thursday
137.541334
4
IBM
Friday
140.251543
5
MICROSOFT
Monday
235.040362
6
MICROSOFT
Tuesday
225.020654
7
MICROSOFT
Wednesday
216.103424
8
MICROSOFT
Thursday
200.380388
9
MICROSOFT
Friday
235.808505
10
APPLE
Monday
321.491821
11
APPLE
Tuesday
340.636128
12
APPLE
Wednesday
303.906528
13
APPLE
Thursday
338.135061
14
APPLE
Friday
318.391230
wallet.groupby("description" ).sum (numeric_only= True ).loc['Flipcart' ]
debit 2503.255216
Name: Flipcart, dtype: float64
wallet.groupby("description" ).sum (numeric_only= True ).loc['Flipcart' , 'debit' ] # indentifier for row and column in .loc
wallet.groupby("description" )['debit' ].sum ().loc['Flipcart' ]
wallet.groupby("description" ).sum (numeric_only= True )
description
Amazon
2504.690567
Amazon Kindle
1389.052376
Auto
2210.428935
Electricity
2885.064355
Flipcart
2503.255216
Hotel
2752.174732
Metro
1216.463665
Netflix
1546.567562
Phone
4677.202878
Swiggy
1936.495366
Taxi
2626.039276
Zomato
3592.519075
spotify
1219.636541
grpdescsum = wallet.groupby("description" ).sum (numeric_only= True )
grpdescsum # this is a dataframe with index/labels coming from description column
description
Amazon
2504.690567
Amazon Kindle
1389.052376
Auto
2210.428935
Electricity
2885.064355
Flipcart
2503.255216
Hotel
2752.174732
Metro
1216.463665
Netflix
1546.567562
Phone
4677.202878
Swiggy
1936.495366
Taxi
2626.039276
Zomato
3592.519075
spotify
1219.636541
AttributeError: 'DataFrame' object has no attribute 'description'
0
Amazon
2504.690567
1
Amazon Kindle
1389.052376
2
Auto
2210.428935
3
Electricity
2885.064355
4
Flipcart
2503.255216
5
Hotel
2752.174732
6
Metro
1216.463665
7
Netflix
1546.567562
8
Phone
4677.202878
9
Swiggy
1936.495366
10
Taxi
2626.039276
11
Zomato
3592.519075
12
spotify
1219.636541
combining data from two dataframes
%% file stocks1.csv
symbol,value,high,low
APPLE,421 ,422.0 ,420.4
AT& T,328 ,328.0 ,335.0
IBM,123 ,124.5 ,120.0
NIKE,234 ,235.5 ,230.0
%% file stocks2.csv
symbol,value,high,low
AGILENT,521 ,522.0 ,520.4
XEROX,428 ,428.0 ,435.0
TESLA,623 ,624.5 ,620.0
stocks1 = pd.read_csv("stocks1.csv" )
stocks2 = pd.read_csv("stocks2.csv" )
0
APPLE
421
422.0
420.4
1
AT&T
328
328.0
335.0
2
IBM
123
124.5
120.0
3
NIKE
234
235.5
230.0
0
AGILENT
521
522.0
520.4
1
XEROX
428
428.0
435.0
2
TESLA
623
624.5
620.0
pd.concat([stocks1, stocks2])
0
APPLE
421
422.0
420.4
1
AT&T
328
328.0
335.0
2
IBM
123
124.5
120.0
3
NIKE
234
235.5
230.0
0
AGILENT
521
522.0
520.4
1
XEROX
428
428.0
435.0
2
TESLA
623
624.5
620.0
labels = ["APPLE" ,"AT&T" ,"IBM" ,"NIKE" ]
value = pd.Series([234.5 , 221.6 , 125.7 , 100.5 ], index= labels)
high = pd.Series([240.32 , 222.5 , 127.3 , 105.0 ], index= labels)
low = pd.Series([233.0 , 220.0 , 123.0 , 104.0 ], index= labels)
volume = pd.Series([100 , 200 , 50 , 1000 ], index= labels)
stocks = pd.DataFrame({"value" :value, "high" :high, "low" :low, "volume" :volume})
APPLE
234.5
240.32
233.0
100
AT&T
221.6
222.50
220.0
200
IBM
125.7
127.30
123.0
50
NIKE
100.5
105.00
104.0
1000
stocks1 = pd.DataFrame({
"value" : [125 , 500.0 , 300.4 , 423.9 ],
"low" : [125.0 , 490.0 , 299.5 , 421.1 ],
"high" : [130.0 , 500.0 , 305.0 , 425.5 ],
"volume" : [123 , 50 , 100 , 80 ]},
index = ["BELL" ,"XEROX" ,"FORD" ,"TESLA" ])
BELL
125.0
125.0
130.0
123
XEROX
500.0
490.0
500.0
50
FORD
300.4
299.5
305.0
100
TESLA
423.9
421.1
425.5
80
pd.concat(stocks, stocks1)
TypeError: concat() takes 1 positional argument but 2 were given
pd.concat([stocks, stocks1])
APPLE
234.5
240.32
233.0
100
AT&T
221.6
222.50
220.0
200
IBM
125.7
127.30
123.0
50
NIKE
100.5
105.00
104.0
1000
BELL
125.0
130.00
125.0
123
XEROX
500.0
500.00
490.0
50
FORD
300.4
305.00
299.5
100
TESLA
423.9
425.50
421.1
80
df = pd.concat([stocks, stocks1])
APPLE
234.5
240.32
233.0
100
AT&T
221.6
222.50
220.0
200
IBM
125.7
127.30
123.0
50
NIKE
100.5
105.00
104.0
1000
BELL
125.0
130.00
125.0
123
XEROX
500.0
500.00
490.0
50
FORD
300.4
305.00
299.5
100
TESLA
423.9
425.50
421.1
80
symbol
APPLE
234.5
240.32
233.0
100
AT&T
221.6
222.50
220.0
200
IBM
125.7
127.30
123.0
50
NIKE
100.5
105.00
104.0
1000
BELL
125.0
130.00
125.0
123
XEROX
500.0
500.00
490.0
50
FORD
300.4
305.00
299.5
100
TESLA
423.9
425.50
421.1
80
df1 = df[['value' , 'volume' ]]
symbol
APPLE
234.5
100
AT&T
221.6
200
IBM
125.7
50
NIKE
100.5
1000
BELL
125.0
123
XEROX
500.0
50
FORD
300.4
100
TESLA
423.9
80
df2 = df[['high' , 'low' ]]
symbol
APPLE
240.32
233.0
AT&T
222.50
220.0
IBM
127.30
123.0
NIKE
105.00
104.0
BELL
130.00
125.0
XEROX
500.00
490.0
FORD
305.00
299.5
TESLA
425.50
421.1
df1.join(df2) # by default join will be done on index
symbol
APPLE
234.5
100
240.32
233.0
AT&T
221.6
200
222.50
220.0
IBM
125.7
50
127.30
123.0
NIKE
100.5
1000
105.00
104.0
BELL
125.0
123
130.00
125.0
XEROX
500.0
50
500.00
490.0
FORD
300.4
100
305.00
299.5
TESLA
423.9
80
425.50
421.1
Writing csv and excel files using pandas
BELL
125.0
125.0
130.0
123
XEROX
500.0
490.0
500.0
50
FORD
300.4
299.5
305.0
100
TESLA
423.9
421.1
425.5
80
symbol
APPLE
234.5
240.32
233.0
100
AT&T
221.6
222.50
220.0
200
IBM
125.7
127.30
123.0
50
NIKE
100.5
105.00
104.0
1000
BELL
125.0
130.00
125.0
123
XEROX
500.0
500.00
490.0
50
FORD
300.4
305.00
299.5
100
TESLA
423.9
425.50
421.1
80
symbol
APPLE
234.5
100
AT&T
221.6
200
IBM
125.7
50
NIKE
100.5
1000
BELL
125.0
123
XEROX
500.0
50
FORD
300.4
100
TESLA
423.9
80
symbol
APPLE
240.32
233.0
AT&T
222.50
220.0
IBM
127.30
123.0
NIKE
105.00
104.0
BELL
130.00
125.0
XEROX
500.00
490.0
FORD
305.00
299.5
TESLA
425.50
421.1
df1.to_csv("dataframe1.csv" ) # path of file
symbol,value,volume
APPLE,234.5,100
AT&T,221.6,200
IBM,125.7,50
NIKE,100.5,1000
BELL,125.0,123
XEROX,500.0,50
FORD,300.4,100
TESLA,423.9,80
symbol
APPLE
234.5
240.32
233.0
100
AT&T
221.6
222.50
220.0
200
IBM
125.7
127.30
123.0
50
NIKE
100.5
105.00
104.0
1000
BELL
125.0
130.00
125.0
123
XEROX
500.0
500.00
490.0
50
FORD
300.4
305.00
299.5
100
TESLA
423.9
425.50
421.1
80
symbol
APPLE
234.5
240.32
233.0
100
AT&T
221.6
222.50
220.0
200
IBM
125.7
127.30
123.0
50
NIKE
100.5
105.00
104.0
1000
BELL
125.0
130.00
125.0
123
XEROX
500.0
500.00
490.0
50
FORD
300.4
305.00
299.5
100
TESLA
423.9
425.50
421.1
80
import xlsxwriter
def write_stocks_in_excel(stocksdata, filename):
workbook = xlsxwriter.Workbook(filename)
worksheet = workbook.add_worksheet("stocks" )
bold = workbook.add_format({'bold' :True })
# write headers
worksheet.write(0 , 0 , stocksdata.index.name, bold)
for c,colname in enumerate (stocksdata.columns, start= 1 ):
worksheet.write(0 , c, colname, bold)
for r, symbol in enumerate (stocksdata.index, start= 1 ):
worksheet.write(r, 0 , symbol, bold )
for c, colname in enumerate (stocksdata.columns, start= 1 ):
for r, data in enumerate (stocksdata[colname], start= 1 ):
worksheet.write(r, c, data)
workbook.close()
write_stocks_in_excel(df, "stock_data.xlsx" )
symbol
APPLE
234.5
240.32
233.0
100
AT&T
221.6
222.50
220.0
200
IBM
125.7
127.30
123.0
50
NIKE
100.5
105.00
104.0
1000
BELL
125.0
130.00
125.0
123
XEROX
500.0
500.00
490.0
50
FORD
300.4
305.00
299.5
100
TESLA
423.9
425.50
421.1
80
symbol
APPLE
234.5
240.32
233.0
100
AT&T
221.6
222.50
220.0
200
IBM
125.7
127.30
123.0
50
NIKE
100.5
105.00
104.0
1000
BELL
125.0
130.00
125.0
123
XEROX
500.0
500.00
490.0
50
FORD
300.4
305.00
299.5
100
TESLA
423.9
425.50
421.1
80
writer = pd.ExcelWriter("exceldata.xlsx" , engine= 'xlsxwriter' )
df.to_excel(writer, sheet_name= "data" )
with pd.ExcelWriter("exceldata.xlsx" , engine= 'xlsxwriter' ) as writer:
df1.to_excel(writer, sheet_name= "df1" )
df2.to_excel(writer, sheet_name= "df2" )
with pd.ExcelWriter("shifted_loc.xlsx" , engine= 'xlsxwriter' ) as writer:
df.to_excel(writer, sheet_name= "data" , startrow= 2 , startcol= 2 )