Module 3 - Day 1

Login to Lab using your credentials. There is a notebook with name 3-1.ipynb already created for you. Open that and use it for today’s training.

Shut down all previous notebooks.

You can access live notes from https://live.arcesium-lab.pipal.in

Pandas

pandas is called as spreadsheet of python

You can install pandas library using pip module as given below

pip install pandas

How to load csv data into pandas

A csv file is located in internet . Url is “https://raw.githubusercontent.com/vikipedia/python-trainings/master/online_course/source/module2/wallet.csv”

csvurl = "https://raw.githubusercontent.com/vikipedia/python-trainings/master/online_course/source/module2/wallet.csv"

!ls *.csv

csvdata.csv  salary.csv  stocks.csv  tabular1.csv  tabular.csv

pd.read_csv("stocks.csv")

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[8], line 1
----> 1 pd.read_csv("stocks.csv")

NameError: name 'pd' is not defined

import pandas as pd

!cat salary.csv

11111,22222,33333,40000,50000

!cat stocks.csv

TATA,200.0,5.5
INFY,2000.0,-5
RELIANCE,1505.5,50.0
HCL,1200,70.5

pd.read_csv("stocks.csv") # just filename is enough because it is in current working directory

	TATA	200.0	5.5
0	INFY	2000.0	-5.0
1	RELIANCE	1505.5	50.0
2	HCL	1200.0	70.5

data = pd.read_csv("stocks.csv")

data # __repr__ method of data is returning something that looks like a nice html table

	TATA	200.0	5.5
0	INFY	2000.0	-5.0
1	RELIANCE	1505.5	50.0
2	HCL	1200.0	70.5

data_internet = pd.read_csv(csvurl)

data_internet

	Unnamed: 0	date	category	description	debit
0	0	2021-03-07 14:53:28.377359	Music	Amazon	421.207327
1	1	2020-10-08 09:53:28.377359	Food	Swiggy	328.440080
2	2	2021-02-23 09:53:28.377359	Books	Amazon	244.679437
3	3	2020-11-01 14:53:28.377359	Utility	Phone	222.756318
4	4	2021-06-05 13:53:28.377359	Books	Flipcart	494.128492
...	...	...	...	...	...
95	95	2021-07-19 13:53:28.377359	Utility	Phone	388.671213
96	96	2021-01-12 19:53:28.377359	Books	Flipcart	467.554562
97	97	2021-03-25 11:53:28.377359	Utility	Phone	320.789434
98	98	2021-05-13 15:53:28.377359	Travel	Taxi	442.096469
99	99	2020-10-11 16:53:28.377359	Food	Hotel	100.455501

100 rows × 5 columns

type(data)

pandas.core.frame.DataFrame

type(data_internet)

pandas.core.frame.DataFrame

excel_url = "https://github.com/vikipedia/python-trainings/raw/master/online_course/source/module2/wallet.xlsx"

pd.read_excel(excel_url)

	Unnamed: 0	date	category	description	debit
0	0	2021-03-07 14:53:28.377359	Music	Amazon	421.207327
1	1	2020-10-08 09:53:28.377359	Food	Swiggy	328.440080
2	2	2021-02-23 09:53:28.377359	Books	Amazon	244.679437
3	3	2020-11-01 14:53:28.377359	Utility	Phone	222.756318
4	4	2021-06-05 13:53:28.377359	Books	Flipcart	494.128492
...	...	...	...	...	...
95	95	2021-07-19 13:53:28.377359	Utility	Phone	388.671213
96	96	2021-01-12 19:53:28.377359	Books	Flipcart	467.554562
97	97	2021-03-25 11:53:28.377359	Utility	Phone	320.789434
98	98	2021-05-13 15:53:28.377359	Travel	Taxi	442.096469
99	99	2020-10-11 16:53:28.377359	Food	Hotel	100.455501

100 rows × 5 columns

problem

try to load data from internet from this website “https://www.moneycontrol.com/markets/indian-indices/” using pd.read_html()
examin what is returned result

import openpyxl

moneycontrolurl = "https://www.moneycontrol.com/markets/indian-indices/"

moneycontrol = pd.read_html(moneycontrolurl)

mdata = pd.read_html("https://www.moneycontrol.com/markets/indian-indices/")

type(mdata)

list

len(mdata)

mdata[0]

	Stock Name	Sector	LTP	Change	%Chg
0	No Data	No Data	No Data	No Data	No Data

mdata[1]

	Stock Name	Sector	LTP	Change	%Chg
0	No Data	No Data	No Data	No Data	No Data

mdata[2]

	Stock Name	Sector	LTP	Change	%Chg
0	ONGC	Oil Drilling And Exploration	330.70	-4.60	-1.37
1	Bharti Airtel	Telecommunications - Service	1454.50	-15.15	-1.03
2	Cipla	Pharmaceuticals	1562.80	-12.70	-0.81
3	Tata Steel	Iron & Steel	152.88	-1.08	-0.70
4	JSW Steel	Steel - Large	911.60	-6.15	-0.67
5	M&M	Auto - Cars & Jeeps	2750.00	-15.15	-0.55
6	TATA Cons. Prod	Plantations - Tea & Coffee	1171.50	-5.90	-0.50
7	Tata Motors	Auto - LCVs & HCVs	1082.55	-5.15	-0.47
8	ITC	Diversified	499.90	-1.55	-0.31
9	Adani Enterpris	Trading	3096.00	-6.55	-0.21
10	Adani Ports	Infrastructure - General	1495.65	-0.80	-0.05
11	Power Grid Corp	Power - Generation & Distribution	340.20	-0.10	-0.03

mdata[3]

	Stock Name	Sector	LTP	Change	%Chg
0	Hero Motocorp	Auto - 2 & 3 Wheelers	5340.95	152.05	2.93
1	SBI Life Insura	Life & Health Insurance	1715.40	43.85	2.62
2	IndusInd Bank	Banks - Private Sector	1381.20	33.60	2.49
3	HDFC Life	Life & Health Insurance	701.85	16.15	2.36
4	Bajaj Finserv	Finance - Investments	1577.65	26.55	1.71
5	BPCL	Refineries	349.60	5.80	1.69
6	UltraTechCement	Cement - Major	11506.00	190.65	1.68
7	Axis Bank	Banks - Private Sector	1171.50	18.25	1.58
8	Eicher Motors	Auto - LCVs & HCVs	4882.15	68.85	1.43
9	Shriram Finance	Finance - Leasing & Hire Purchase	3116.95	41.35	1.34
10	Hindalco	Iron & Steel	667.00	8.15	1.24
11	Kotak Mahindra	Banks - Private Sector	1802.60	21.25	1.19
12	Tech Mahindra	Computers - Software	1611.15	16.50	1.03
13	LTIMindtree	Computers - Software	5734.80	58.70	1.03
14	Bajaj Finance	Finance - NBFC	6684.55	68.20	1.03
15	Wipro	Computers - Software	524.00	4.25	0.82
16	Grasim	Diversified	2619.00	20.60	0.79
17	Infosys	Computers - Software	1878.95	14.15	0.76
18	Reliance	Refineries	2999.00	22.20	0.75
19	Bajaj Auto	Auto - 2 & 3 Wheelers	9828.85	58.20	0.60
20	Coal India	Mining & Minerals	525.00	3.00	0.57
21	TCS	Computers - Software	4515.05	25.05	0.56
22	Asian Paints	Paints & Varnishes	3093.00	16.70	0.54
23	SBI	Banks - Public Sector	818.00	4.30	0.53
24	NTPC	Power - Generation & Distribution	405.15	2.05	0.51
25	Maruti Suzuki	Auto - Cars & Jeeps	12206.15	56.35	0.46
26	HDFC Bank	Banks - Private Sector	1637.95	6.40	0.39
27	Nestle	Food Processing	2511.00	7.85	0.31
28	Larsen	Infrastructure - General	3566.05	11.00	0.31
29	Divis Labs	Pharmaceuticals	4682.90	13.00	0.28
30	HCL Tech	Computers - Software	1682.95	4.45	0.27
31	ICICI Bank	Banks - Private Sector	1179.00	3.10	0.26
32	Britannia	Food Processing	5747.50	15.00	0.26
33	Dr Reddys Labs	Pharmaceuticals	6922.35	11.00	0.16
34	Sun Pharma	Pharmaceuticals	1749.55	1.75	0.10
35	Titan Company	Miscellaneous	3467.30	2.45	0.07
36	Apollo Hospital	Hospitals & Medical Services	6728.40	3.50	0.05
37	HUL	Personal Care	2742.90	0.35	0.01

mdata[4]

	Stock Name	Sector	LTP	Change	%Chg
0	No Data	No Data	No Data	No Data	No Data

mdata[5]

	Stock Name	Sector	LTP	Change	%Chg
0	No Data	No Data	No Data	No Data	No Data

import requests
def download(url, filename):
    r = requests.get(url)
    with open(filename, "wb") as f:
        f.write(r.content)

download(excel_url, "wallet.xlsx")

pd.read_excel("wallet.xlsx", sheet_name="squrs")

	x	sqr(x)
0	1	1
1	2	4
2	3	9
3	4	16
4	5	25
5	6	36
6	7	49
7	8	64
8	9	81
9	10	100
10	11	121
11	12	144
12	13	169
13	14	196
14	15	225
15	16	256
16	17	289
17	18	324

pd.read_excel("wallet.xlsx", sheet_name="wallet")

	Unnamed: 0	date	category	description	debit
0	0	2021-03-07 14:53:28.377359	Music	Amazon	421.207327
1	1	2020-10-08 09:53:28.377359	Food	Swiggy	328.440080
2	2	2021-02-23 09:53:28.377359	Books	Amazon	244.679437
3	3	2020-11-01 14:53:28.377359	Utility	Phone	222.756318
4	4	2021-06-05 13:53:28.377359	Books	Flipcart	494.128492
...	...	...	...	...	...
95	95	2021-07-19 13:53:28.377359	Utility	Phone	388.671213
96	96	2021-01-12 19:53:28.377359	Books	Flipcart	467.554562
97	97	2021-03-25 11:53:28.377359	Utility	Phone	320.789434
98	98	2021-05-13 15:53:28.377359	Travel	Taxi	442.096469
99	99	2020-10-11 16:53:28.377359	Food	Hotel	100.455501

100 rows × 5 columns

data

	TATA	200.0	5.5
0	INFY	2000.0	-5.0
1	RELIANCE	1505.5	50.0
2	HCL	1200.0	70.5

wallet = pd.read_csv(csvurl)

wallet

	Unnamed: 0	date	category	description	debit
0	0	2021-03-07 14:53:28.377359	Music	Amazon	421.207327
1	1	2020-10-08 09:53:28.377359	Food	Swiggy	328.440080
2	2	2021-02-23 09:53:28.377359	Books	Amazon	244.679437
3	3	2020-11-01 14:53:28.377359	Utility	Phone	222.756318
4	4	2021-06-05 13:53:28.377359	Books	Flipcart	494.128492
...	...	...	...	...	...
95	95	2021-07-19 13:53:28.377359	Utility	Phone	388.671213
96	96	2021-01-12 19:53:28.377359	Books	Flipcart	467.554562
97	97	2021-03-25 11:53:28.377359	Utility	Phone	320.789434
98	98	2021-05-13 15:53:28.377359	Travel	Taxi	442.096469
99	99	2020-10-11 16:53:28.377359	Food	Hotel	100.455501

100 rows × 5 columns

wallet.info() # method

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Unnamed: 0   100 non-null    int64  
 1   date         100 non-null    object 
 2   category     100 non-null    object 
 3   description  100 non-null    object 
 4   debit        100 non-null    float64
dtypes: float64(1), int64(1), object(3)
memory usage: 4.0+ KB

wallet.head() # first few rows of data

	Unnamed: 0	date	category	description	debit
0	0	2021-03-07 14:53:28.377359	Music	Amazon	421.207327
1	1	2020-10-08 09:53:28.377359	Food	Swiggy	328.440080
2	2	2021-02-23 09:53:28.377359	Books	Amazon	244.679437
3	3	2020-11-01 14:53:28.377359	Utility	Phone	222.756318
4	4	2021-06-05 13:53:28.377359	Books	Flipcart	494.128492

wallet.tail() # last few rows of data

	Unnamed: 0	date	category	description	debit
95	95	2021-07-19 13:53:28.377359	Utility	Phone	388.671213
96	96	2021-01-12 19:53:28.377359	Books	Flipcart	467.554562
97	97	2021-03-25 11:53:28.377359	Utility	Phone	320.789434
98	98	2021-05-13 15:53:28.377359	Travel	Taxi	442.096469
99	99	2020-10-11 16:53:28.377359	Food	Hotel	100.455501

Accesing columns from dataframe

wallet.debit # column can be accessed just like class attribute

0     421.207327
1     328.440080
2     244.679437
3     222.756318
4     494.128492
         ...    
95    388.671213
96    467.554562
97    320.789434
98    442.096469
99    100.455501
Name: debit, Length: 100, dtype: float64

wallet['debit'] # just like dictionary

0     421.207327
1     328.440080
2     244.679437
3     222.756318
4     494.128492
         ...    
95    388.671213
96    467.554562
97    320.789434
98    442.096469
99    100.455501
Name: debit, Length: 100, dtype: float64

column = wallet['debit']

column

0     421.207327
1     328.440080
2     244.679437
3     222.756318
4     494.128492
         ...    
95    388.671213
96    467.554562
97    320.789434
98    442.096469
99    100.455501
Name: debit, Length: 100, dtype: float64

type(column)

pandas.core.series.Series

column.sum()

np.float64(31059.590543177277)

column.std()

np.float64(121.17821796983654)

column.max()

np.float64(499.8581815222449)

column.min()

np.float64(100.45550129902664)

column + 3 # vector calculation

0     424.207327
1     331.440080
2     247.679437
3     225.756318
4     497.128492
         ...    
95    391.671213
96    470.554562
97    323.789434
98    445.096469
99    103.455501
Name: debit, Length: 100, dtype: float64

column

0     421.207327
1     328.440080
2     244.679437
3     222.756318
4     494.128492
         ...    
95    388.671213
96    467.554562
97    320.789434
98    442.096469
99    100.455501
Name: debit, Length: 100, dtype: float64

column*3

0     1263.621982
1      985.320241
2      734.038311
3      668.268953
4     1482.385477
         ...     
95    1166.013640
96    1402.663686
97     962.368301
98    1326.289408
99     301.366504
Name: debit, Length: 100, dtype: float64

import numpy

column + 5.0

0     426.207327
1     333.440080
2     249.679437
3     227.756318
4     499.128492
         ...    
95    393.671213
96    472.554562
97    325.789434
98    447.096469
99    105.455501
Name: debit, Length: 100, dtype: float64

column.min() + 100

np.float64(200.45550129902665)

column.min()

np.float64(100.45550129902664)

column

0     421.207327
1     328.440080
2     244.679437
3     222.756318
4     494.128492
         ...    
95    388.671213
96    467.554562
97    320.789434
98    442.096469
99    100.455501
Name: debit, Length: 100, dtype: float64

A series can save a data of single type

wallet.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Unnamed: 0   100 non-null    int64  
 1   date         100 non-null    object 
 2   category     100 non-null    object 
 3   description  100 non-null    object 
 4   debit        100 non-null    float64
dtypes: float64(1), int64(1), object(3)
memory usage: 4.0+ KB

wallet.sum()

Unnamed: 0                                                  4950
date           2021-03-07 14:53:28.3773592020-10-08 09:53:28....
category       MusicFoodBooksUtilityBooksUtilityBooksFoodUtil...
description    AmazonSwiggyAmazonPhoneFlipcartElectricityAmaz...
debit                                               31059.590543
dtype: object

wallet.std()

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
File /opt/tljh/user/lib/python3.10/site-packages/pandas/core/nanops.py:85, in disallow.__call__.<locals>._f(*args, **kwargs)
     84 try:
---> 85     return f(*args, **kwargs)
     86 except ValueError as e:
     87     # we want to transform an object array
     88     # ValueError message to the more typical TypeError
     89     # e.g. this is normally a disallowed function on
     90     # object arrays that contain strings

File /opt/tljh/user/lib/python3.10/site-packages/pandas/core/nanops.py:147, in bottleneck_switch.__call__.<locals>.f(values, axis, skipna, **kwds)
    146 else:
--> 147     result = alt(values, axis=axis, skipna=skipna, **kwds)
    149 return result

File /opt/tljh/user/lib/python3.10/site-packages/pandas/core/nanops.py:1013, in nanvar(values, axis, skipna, ddof, mask)
   1007 # xref GH10242
   1008 # Compute variance via two-pass algorithm, which is stable against
   1009 # cancellation errors and relatively accurate for small numbers of
   1010 # observations.
   1011 #
   1012 # See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
-> 1013 avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count
   1014 if axis is not None:

File /opt/tljh/user/lib/python3.10/site-packages/numpy/_core/_methods.py:52, in _sum(a, axis, dtype, out, keepdims, initial, where)
     50 def _sum(a, axis=None, dtype=None, out=None, keepdims=False,
     51          initial=_NoValue, where=True):
---> 52     return umr_sum(a, axis, dtype, out, keepdims, initial, where)

ValueError: could not convert string to float: '2021-03-07 14:53:28.377359'

The above exception was the direct cause of the following exception:

TypeError                                 Traceback (most recent call last)
Cell In[69], line 1
----> 1 wallet.std()

File /opt/tljh/user/lib/python3.10/site-packages/pandas/core/frame.py:11748, in DataFrame.std(self, axis, skipna, ddof, numeric_only, **kwargs)
  11739 @doc(make_doc("std", ndim=2))
  11740 def std(
  11741     self,
   (...)
  11746     **kwargs,
  11747 ):
> 11748     result = super().std(axis, skipna, ddof, numeric_only, **kwargs)
  11749     if isinstance(result, Series):
  11750         result = result.__finalize__(self, method="std")

File /opt/tljh/user/lib/python3.10/site-packages/pandas/core/generic.py:12358, in NDFrame.std(self, axis, skipna, ddof, numeric_only, **kwargs)
  12350 def std(
  12351     self,
  12352     axis: Axis | None = 0,
   (...)
  12356     **kwargs,
  12357 ) -> Series | float:
> 12358     return self._stat_function_ddof(
  12359         "std", nanops.nanstd, axis, skipna, ddof, numeric_only, **kwargs
  12360     )

File /opt/tljh/user/lib/python3.10/site-packages/pandas/core/generic.py:12322, in NDFrame._stat_function_ddof(self, name, func, axis, skipna, ddof, numeric_only, **kwargs)
  12319 elif axis is lib.no_default:
  12320     axis = 0
> 12322 return self._reduce(
  12323     func, name, axis=axis, numeric_only=numeric_only, skipna=skipna, ddof=ddof
  12324 )

File /opt/tljh/user/lib/python3.10/site-packages/pandas/core/frame.py:11562, in DataFrame._reduce(self, op, name, axis, skipna, numeric_only, filter_type, **kwds)
  11558     df = df.T
  11560 # After possibly _get_data and transposing, we are now in the
  11561 #  simple case where we can use BlockManager.reduce
> 11562 res = df._mgr.reduce(blk_func)
  11563 out = df._constructor_from_mgr(res, axes=res.axes).iloc[0]
  11564 if out_dtype is not None and out.dtype != "boolean":

File /opt/tljh/user/lib/python3.10/site-packages/pandas/core/internals/managers.py:1500, in BlockManager.reduce(self, func)
   1498 res_blocks: list[Block] = []
   1499 for blk in self.blocks:
-> 1500     nbs = blk.reduce(func)
   1501     res_blocks.extend(nbs)
   1503 index = Index([None])  # placeholder

File /opt/tljh/user/lib/python3.10/site-packages/pandas/core/internals/blocks.py:404, in Block.reduce(self, func)
    398 @final
    399 def reduce(self, func) -> list[Block]:
    400     # We will apply the function and reshape the result into a single-row
    401     #  Block with the same mgr_locs; squeezing will be done at a higher level
    402     assert self.ndim == 2
--> 404     result = func(self.values)
    406     if self.values.ndim == 1:
    407         res_values = result

File /opt/tljh/user/lib/python3.10/site-packages/pandas/core/frame.py:11481, in DataFrame._reduce.<locals>.blk_func(values, axis)
  11479         return np.array([result])
  11480 else:
> 11481     return op(values, axis=axis, skipna=skipna, **kwds)

File /opt/tljh/user/lib/python3.10/site-packages/pandas/core/nanops.py:147, in bottleneck_switch.__call__.<locals>.f(values, axis, skipna, **kwds)
    145         result = alt(values, axis=axis, skipna=skipna, **kwds)
    146 else:
--> 147     result = alt(values, axis=axis, skipna=skipna, **kwds)
    149 return result

File /opt/tljh/user/lib/python3.10/site-packages/pandas/core/nanops.py:950, in nanstd(values, axis, skipna, ddof, mask)
    947 orig_dtype = values.dtype
    948 values, mask = _get_values(values, skipna, mask=mask)
--> 950 result = np.sqrt(nanvar(values, axis=axis, skipna=skipna, ddof=ddof, mask=mask))
    951 return _wrap_results(result, orig_dtype)

File /opt/tljh/user/lib/python3.10/site-packages/pandas/core/nanops.py:92, in disallow.__call__.<locals>._f(*args, **kwargs)
     86 except ValueError as e:
     87     # we want to transform an object array
     88     # ValueError message to the more typical TypeError
     89     # e.g. this is normally a disallowed function on
     90     # object arrays that contain strings
     91     if is_object_dtype(args[0]):
---> 92         raise TypeError(e) from e
     93     raise

TypeError: could not convert string to float: '2021-03-07 14:53:28.377359'

Series

s1 = pd.Series([12, 23, 34.5, 234, 34, 76])
s2 = pd.Series([2, 2, 2, 2, 2, 2])

s1

0     12.0
1     23.0
2     34.5
3    234.0
4     34.0
5     76.0
dtype: float64

s1 + s2 # vector operation

0     14.0
1     25.0
2     36.5
3    236.0
4     36.0
5     78.0
dtype: float64

s1 - s2

0     10.0
1     21.0
2     32.5
3    232.0
4     32.0
5     74.0
dtype: float64

s1 * s2

0     24.0
1     46.0
2     69.0
3    468.0
4     68.0
5    152.0
dtype: float64

s1/s2

0      6.00
1     11.50
2     17.25
3    117.00
4     17.00
5     38.00
dtype: float64

a = pd.Series([10, 20, 30, 40, 50], index = ['a','b','c','d','e'])

a # we changed the labels!

a    10
b    20
c    30
d    40
e    50
dtype: int64

A = pd.Series([10, 20, 30, 40, 50]) # labels are takes as row number

0    10
1    20
2    30
3    40
4    50
dtype: int64

a    10
b    20
c    30
d    40
e    50
dtype: int64

A[0]

np.int64(10)

A[1]

np.int64(20)

a['a']

np.int64(10)

a['e']

np.int64(50)

a.iloc[0] # by row number

np.int64(10)

a.iloc[4] #

np.int64(50)

a['d']

np.int64(40)

DataFrame

!cat stocks.csv

TATA,200.0,5.5
INFY,2000.0,-5
RELIANCE,1505.5,50.0
HCL,1200,70.5

pd.read_csv("stocks.csv")

	TATA	200.0	5.5
0	INFY	2000.0	-5.0
1	RELIANCE	1505.5	50.0
2	HCL	1200.0	70.5

%%file stocks1.csv
ticker,value,gain
TATA,200.0,5.5
INFY,2000.0,-5
RELIANCE,1505.5,50.0
HCL,1200,70.5

Writing stocks1.csv

stocks = pd.read_csv("stocks1.csv")

stocks

	ticker	value	gain
0	TATA	200.0	5.5
1	INFY	2000.0	-5.0
2	RELIANCE	1505.5	50.0
3	HCL	1200.0	70.5

stocks['ticker']

0        TATA
1        INFY
2    RELIANCE
3         HCL
Name: ticker, dtype: object

stocks.ticker

0        TATA
1        INFY
2    RELIANCE
3         HCL
Name: ticker, dtype: object

stocks_labeled = pd.read_csv("stocks1.csv", index_col=0)

stocks_labeled

	value	gain
ticker
TATA	200.0	5.5
INFY	2000.0	-5.0
RELIANCE	1505.5	50.0
HCL	1200.0	70.5

stocks_labeled.loc['TATA'] # to access row from dataframe

value    200.0
gain       5.5
Name: TATA, dtype: float64

stocks_labeled.loc[['TATA','RELIANCE']]

	value	gain
ticker
TATA	200.0	5.5
RELIANCE	1505.5	50.0

stocks_labeled.iloc[0]

value    200.0
gain       5.5
Name: TATA, dtype: float64

stocks_labeled.iloc[2]

value    1505.5
gain       50.0
Name: RELIANCE, dtype: float64

stocks_labeled.iloc[[0,2,3]]

	value	gain
ticker
TATA	200.0	5.5
RELIANCE	1505.5	50.0
HCL	1200.0	70.5

problem - Find total debit amount in dataframe loaded from wallet.csv - What is that value of ‘category’ for 70th row.

wallet.debit.sum()

np.float64(31059.590543177277)

wallet.category.iloc[70] # selct a column first , then row

'Utility'

wallet.iloc[70]

Unnamed: 0                             70
date           2021-08-17 13:53:28.377359
category                          Utility
description                         Phone
debit                          125.229773
Name: 70, dtype: object

wallet.iloc[70]['category'] # select a row first, then column

'Utility'

wallet

	Unnamed: 0	date	category	description	debit
0	0	2021-03-07 14:53:28.377359	Music	Amazon	421.207327
1	1	2020-10-08 09:53:28.377359	Food	Swiggy	328.440080
2	2	2021-02-23 09:53:28.377359	Books	Amazon	244.679437
3	3	2020-11-01 14:53:28.377359	Utility	Phone	222.756318
4	4	2021-06-05 13:53:28.377359	Books	Flipcart	494.128492
...	...	...	...	...	...
95	95	2021-07-19 13:53:28.377359	Utility	Phone	388.671213
96	96	2021-01-12 19:53:28.377359	Books	Flipcart	467.554562
97	97	2021-03-25 11:53:28.377359	Utility	Phone	320.789434
98	98	2021-05-13 15:53:28.377359	Travel	Taxi	442.096469
99	99	2020-10-11 16:53:28.377359	Food	Hotel	100.455501

100 rows × 5 columns

wallet['Unnamed: 0'] # if column has special chars and or space then you will have to
                     # access that column using square brackets and columns names in it

0      0
1      1
2      2
3      3
4      4
      ..
95    95
96    96
97    97
98    98
99    99
Name: Unnamed: 0, Length: 100, dtype: int64

wallet.category

0       Music
1        Food
2       Books
3     Utility
4       Books
       ...   
95    Utility
96      Books
97    Utility
98     Travel
99       Food
Name: category, Length: 100, dtype: object

Filtering

s1 = pd.Series([12, 23, 34.5, 234, 34, 76])
s2 = pd.Series([2, 2, 2, 2, 2, 2])

s1 > 30 # boolean series

0    False
1    False
2     True
3     True
4     True
5     True
dtype: bool

s1[s1>30] # this will select rows for which the condition is true

2     34.5
3    234.0
4     34.0
5     76.0
dtype: float64

s2[s1>30]  # take rows from s2 where corresponding row number of s1 has value greater than 30!

2    2
3    2
4    2
5    2
dtype: int64

wallet

	Unnamed: 0	date	category	description	debit
0	0	2021-03-07 14:53:28.377359	Music	Amazon	421.207327
1	1	2020-10-08 09:53:28.377359	Food	Swiggy	328.440080
2	2	2021-02-23 09:53:28.377359	Books	Amazon	244.679437
3	3	2020-11-01 14:53:28.377359	Utility	Phone	222.756318
4	4	2021-06-05 13:53:28.377359	Books	Flipcart	494.128492
...	...	...	...	...	...
95	95	2021-07-19 13:53:28.377359	Utility	Phone	388.671213
96	96	2021-01-12 19:53:28.377359	Books	Flipcart	467.554562
97	97	2021-03-25 11:53:28.377359	Utility	Phone	320.789434
98	98	2021-05-13 15:53:28.377359	Travel	Taxi	442.096469
99	99	2020-10-11 16:53:28.377359	Food	Hotel	100.455501

100 rows × 5 columns

wallet[wallet.category=='Music']

	Unnamed: 0	date	category	description	debit
0	0	2021-03-07 14:53:28.377359	Music	Amazon	421.207327
12	12	2021-07-24 14:53:28.377359	Music	Amazon	329.536003
23	23	2020-12-11 10:53:28.377359	Music	Netflix	354.940241
37	37	2021-04-18 16:53:28.377359	Music	Amazon	266.069078
49	49	2021-03-16 09:53:28.377359	Music	spotify	232.303402
54	54	2020-11-16 10:53:28.377359	Music	spotify	160.817543
65	65	2020-10-23 18:53:28.377359	Music	Netflix	188.748743
67	67	2021-07-31 14:53:28.377359	Music	Netflix	324.786917
76	76	2020-11-17 09:53:28.377359	Music	Netflix	197.534600
78	78	2020-09-09 16:53:28.377359	Music	spotify	415.372894
79	79	2021-08-17 09:53:28.377359	Music	Netflix	321.763416
82	82	2020-09-22 09:53:28.377359	Music	spotify	411.142701
84	84	2020-09-21 10:53:28.377359	Music	Netflix	158.793646
85	85	2021-01-12 09:53:28.377359	Music	Amazon	130.374908
89	89	2021-04-12 14:53:28.377359	Music	Amazon	218.487173
90	90	2020-12-01 14:53:28.377359	Music	Amazon	101.573276

wallet

	Unnamed: 0	date	category	description	debit
0	0	2021-03-07 14:53:28.377359	Music	Amazon	421.207327
1	1	2020-10-08 09:53:28.377359	Food	Swiggy	328.440080
2	2	2021-02-23 09:53:28.377359	Books	Amazon	244.679437
3	3	2020-11-01 14:53:28.377359	Utility	Phone	222.756318
4	4	2021-06-05 13:53:28.377359	Books	Flipcart	494.128492
...	...	...	...	...	...
95	95	2021-07-19 13:53:28.377359	Utility	Phone	388.671213
96	96	2021-01-12 19:53:28.377359	Books	Flipcart	467.554562
97	97	2021-03-25 11:53:28.377359	Utility	Phone	320.789434
98	98	2021-05-13 15:53:28.377359	Travel	Taxi	442.096469
99	99	2020-10-11 16:53:28.377359	Food	Hotel	100.455501

100 rows × 5 columns

music = wallet[wallet.category=='Music']

music

	Unnamed: 0	date	category	description	debit
0	0	2021-03-07 14:53:28.377359	Music	Amazon	421.207327
12	12	2021-07-24 14:53:28.377359	Music	Amazon	329.536003
23	23	2020-12-11 10:53:28.377359	Music	Netflix	354.940241
37	37	2021-04-18 16:53:28.377359	Music	Amazon	266.069078
49	49	2021-03-16 09:53:28.377359	Music	spotify	232.303402
54	54	2020-11-16 10:53:28.377359	Music	spotify	160.817543
65	65	2020-10-23 18:53:28.377359	Music	Netflix	188.748743
67	67	2021-07-31 14:53:28.377359	Music	Netflix	324.786917
76	76	2020-11-17 09:53:28.377359	Music	Netflix	197.534600
78	78	2020-09-09 16:53:28.377359	Music	spotify	415.372894
79	79	2021-08-17 09:53:28.377359	Music	Netflix	321.763416
82	82	2020-09-22 09:53:28.377359	Music	spotify	411.142701
84	84	2020-09-21 10:53:28.377359	Music	Netflix	158.793646
85	85	2021-01-12 09:53:28.377359	Music	Amazon	130.374908
89	89	2021-04-12 14:53:28.377359	Music	Amazon	218.487173
90	90	2020-12-01 14:53:28.377359	Music	Amazon	101.573276

music.debit.sum() # total expenditure done on Music!

np.float64(4233.45186823271)

music.info()

<class 'pandas.core.frame.DataFrame'>
Index: 16 entries, 0 to 90
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Unnamed: 0   16 non-null     int64  
 1   date         16 non-null     object 
 2   category     16 non-null     object 
 3   description  16 non-null     object 
 4   debit        16 non-null     float64
dtypes: float64(1), int64(1), object(3)
memory usage: 768.0+ bytes

music

	Unnamed: 0	date	category	description	debit
0	0	2021-03-07 14:53:28.377359	Music	Amazon	421.207327
12	12	2021-07-24 14:53:28.377359	Music	Amazon	329.536003
23	23	2020-12-11 10:53:28.377359	Music	Netflix	354.940241
37	37	2021-04-18 16:53:28.377359	Music	Amazon	266.069078
49	49	2021-03-16 09:53:28.377359	Music	spotify	232.303402
54	54	2020-11-16 10:53:28.377359	Music	spotify	160.817543
65	65	2020-10-23 18:53:28.377359	Music	Netflix	188.748743
67	67	2021-07-31 14:53:28.377359	Music	Netflix	324.786917
76	76	2020-11-17 09:53:28.377359	Music	Netflix	197.534600
78	78	2020-09-09 16:53:28.377359	Music	spotify	415.372894
79	79	2021-08-17 09:53:28.377359	Music	Netflix	321.763416
82	82	2020-09-22 09:53:28.377359	Music	spotify	411.142701
84	84	2020-09-21 10:53:28.377359	Music	Netflix	158.793646
85	85	2021-01-12 09:53:28.377359	Music	Amazon	130.374908
89	89	2021-04-12 14:53:28.377359	Music	Amazon	218.487173
90	90	2020-12-01 14:53:28.377359	Music	Amazon	101.573276

music.reset_index() # it will set index from 0 to row count

	index	Unnamed: 0	date	category	description	debit
0	0	0	2021-03-07 14:53:28.377359	Music	Amazon	421.207327
1	12	12	2021-07-24 14:53:28.377359	Music	Amazon	329.536003
2	23	23	2020-12-11 10:53:28.377359	Music	Netflix	354.940241
3	37	37	2021-04-18 16:53:28.377359	Music	Amazon	266.069078
4	49	49	2021-03-16 09:53:28.377359	Music	spotify	232.303402
5	54	54	2020-11-16 10:53:28.377359	Music	spotify	160.817543
6	65	65	2020-10-23 18:53:28.377359	Music	Netflix	188.748743
7	67	67	2021-07-31 14:53:28.377359	Music	Netflix	324.786917
8	76	76	2020-11-17 09:53:28.377359	Music	Netflix	197.534600
9	78	78	2020-09-09 16:53:28.377359	Music	spotify	415.372894
10	79	79	2021-08-17 09:53:28.377359	Music	Netflix	321.763416
11	82	82	2020-09-22 09:53:28.377359	Music	spotify	411.142701
12	84	84	2020-09-21 10:53:28.377359	Music	Netflix	158.793646
13	85	85	2021-01-12 09:53:28.377359	Music	Amazon	130.374908
14	89	89	2021-04-12 14:53:28.377359	Music	Amazon	218.487173
15	90	90	2020-12-01 14:53:28.377359	Music	Amazon	101.573276

stocks_labeled

	value	gain
ticker
TATA	200.0	5.5
INFY	2000.0	-5.0
RELIANCE	1505.5	50.0
HCL	1200.0	70.5

stocks_labeled[stocks_labeled.gain>0]

	value	gain
ticker
TATA	200.0	5.5
RELIANCE	1505.5	50.0
HCL	1200.0	70.5

mdash = music.reset_index()

mdash['index']

0      0
1     12
2     23
3     37
4     49
5     54
6     65
7     67
8     76
9     78
10    79
11    82
12    84
13    85
14    89
15    90
Name: index, dtype: int64

mdash.index # there is a conflict in name of variable of dataframe and columnname!

RangeIndex(start=0, stop=16, step=1)

music.reset_index(drop=True)

	Unnamed: 0	date	category	description	debit
0	0	2021-03-07 14:53:28.377359	Music	Amazon	421.207327
1	12	2021-07-24 14:53:28.377359	Music	Amazon	329.536003
2	23	2020-12-11 10:53:28.377359	Music	Netflix	354.940241
3	37	2021-04-18 16:53:28.377359	Music	Amazon	266.069078
4	49	2021-03-16 09:53:28.377359	Music	spotify	232.303402
5	54	2020-11-16 10:53:28.377359	Music	spotify	160.817543
6	65	2020-10-23 18:53:28.377359	Music	Netflix	188.748743
7	67	2021-07-31 14:53:28.377359	Music	Netflix	324.786917
8	76	2020-11-17 09:53:28.377359	Music	Netflix	197.534600
9	78	2020-09-09 16:53:28.377359	Music	spotify	415.372894
10	79	2021-08-17 09:53:28.377359	Music	Netflix	321.763416
11	82	2020-09-22 09:53:28.377359	Music	spotify	411.142701
12	84	2020-09-21 10:53:28.377359	Music	Netflix	158.793646
13	85	2021-01-12 09:53:28.377359	Music	Amazon	130.374908
14	89	2021-04-12 14:53:28.377359	Music	Amazon	218.487173
15	90	2020-12-01 14:53:28.377359	Music	Amazon	101.573276

problem - Find out expenditure done on spotify?

wallet = pd.read_csv(csvurl)

wallet[wallet.description=='spotify']

	Unnamed: 0	date	category	description	debit
49	49	2021-03-16 09:53:28.377359	Music	spotify	232.303402
54	54	2020-11-16 10:53:28.377359	Music	spotify	160.817543
78	78	2020-09-09 16:53:28.377359	Music	spotify	415.372894
82	82	2020-09-22 09:53:28.377359	Music	spotify	411.142701

wallet[wallet.description=='spotify']['debit'].sum()

np.float64(1219.6365406108478)

wallet[(wallet.category=='Music') & (wallet.description=='Amazon')]

	Unnamed: 0	date	category	description	debit
0	0	2021-03-07 14:53:28.377359	Music	Amazon	421.207327
12	12	2021-07-24 14:53:28.377359	Music	Amazon	329.536003
37	37	2021-04-18 16:53:28.377359	Music	Amazon	266.069078
85	85	2021-01-12 09:53:28.377359	Music	Amazon	130.374908
89	89	2021-04-12 14:53:28.377359	Music	Amazon	218.487173
90	90	2020-12-01 14:53:28.377359	Music	Amazon	101.573276

s1[s1>30]

2     34.5
3    234.0
4     34.0
5     76.0
dtype: float64

s1[s1<=30]

0    12.0
1    23.0
dtype: float64

s1[~(s1>30)] # ~ will negate the boolean series

0    12.0
1    23.0
dtype: float64

f = s1 > 30

s1[f]

2     34.5
3    234.0
4     34.0
5     76.0
dtype: float64

s1[~f]

0    12.0
1    23.0
dtype: float64

wallet[~(wallet.description=='Amazon')]

	Unnamed: 0	date	category	description	debit
1	1	2020-10-08 09:53:28.377359	Food	Swiggy	328.440080
3	3	2020-11-01 14:53:28.377359	Utility	Phone	222.756318
4	4	2021-06-05 13:53:28.377359	Books	Flipcart	494.128492
5	5	2021-07-28 19:53:28.377359	Utility	Electricity	219.941711
6	6	2021-04-16 11:53:28.377359	Books	Amazon Kindle	270.322595
...	...	...	...	...	...
95	95	2021-07-19 13:53:28.377359	Utility	Phone	388.671213
96	96	2021-01-12 19:53:28.377359	Books	Flipcart	467.554562
97	97	2021-03-25 11:53:28.377359	Utility	Phone	320.789434
98	98	2021-05-13 15:53:28.377359	Travel	Taxi	442.096469
99	99	2020-10-11 16:53:28.377359	Food	Hotel	100.455501

91 rows × 5 columns

groupby

wallet.groupby('category').sum(numeric_only=True)

	Unnamed: 0	debit
category
Books	604	4929.750393
Food	1376	8281.189173
Music	970	4233.451868
Travel	721	6052.931876
Utility	1279	7562.267233

wallet.groupby('description').sum(numeric_only=True)

	Unnamed: 0	debit
description
Amazon	411	2504.690567
Amazon Kindle	171	1389.052376
Auto	121	2210.428935
Electricity	536	2885.064355
Flipcart	335	2503.255216
Hotel	565	2752.174732
Metro	139	1216.463665
Netflix	394	1546.567562
Phone	743	4677.202878
Swiggy	386	1936.495366
Taxi	461	2626.039276
Zomato	425	3592.519075
spotify	263	1219.636541

wallet.groupby(['category', 'description']).sum(numeric_only=True)

		Unnamed: 0	debit
category	description
Books	Amazon	98	1037.442802
	Amazon Kindle	171	1389.052376
	Flipcart	335	2503.255216
Food	Hotel	565	2752.174732
	Swiggy	386	1936.495366
	Zomato	425	3592.519075
Music	Amazon	313	1467.247766
	Netflix	394	1546.567562
	spotify	263	1219.636541
Travel	Auto	121	2210.428935
	Metro	139	1216.463665
	Taxi	461	2626.039276
Utility	Electricity	536	2885.064355
Utility	Phone	743	4677.202878

wallet.groupby('description').max(numeric_only=True)

	Unnamed: 0	debit
description
Amazon	90	498.100496
Amazon Kindle	77	497.770860
Auto	38	494.124399
Electricity	93	382.519510
Flipcart	96	494.128492
Hotel	99	483.315864
Metro	73	441.602143
Netflix	84	354.940241
Phone	97	499.858182
Swiggy	80	328.440080
Taxi	98	485.297743
Zomato	87	489.143483
spotify	82	415.372894

wallet.groupby('description').min(numeric_only=True)

	Unnamed: 0	debit
description
Amazon	0	101.573276
Amazon Kindle	6	138.806578
Auto	9	365.921808
Electricity	5	103.680791
Flipcart	4	109.325909
Hotel	25	100.455501
Metro	10	117.588729
Netflix	23	158.793646
Phone	3	124.221248
Swiggy	1	112.333160
Taxi	14	279.147884
Zomato	7	198.450672
spotify	49	160.817543

multiindexed = wallet.groupby(['category', 'description']).sum(numeric_only=True)

multiindexed

		Unnamed: 0	debit
category	description
Books	Amazon	98	1037.442802
	Amazon Kindle	171	1389.052376
	Flipcart	335	2503.255216
Food	Hotel	565	2752.174732
	Swiggy	386	1936.495366
	Zomato	425	3592.519075
Music	Amazon	313	1467.247766
	Netflix	394	1546.567562
	spotify	263	1219.636541
Travel	Auto	121	2210.428935
	Metro	139	1216.463665
	Taxi	461	2626.039276
Utility	Electricity	536	2885.064355
Utility	Phone	743	4677.202878

multiindexed.loc['Utility']

	Unnamed: 0	debit
description
Electricity	536	2885.064355
Phone	743	4677.202878

multiindexed.loc['Travel']

	Unnamed: 0	debit
description
Auto	121	2210.428935
Metro	139	1216.463665
Taxi	461	2626.039276

multiindexed.loc['Music']

	Unnamed: 0	debit
description
Amazon	313	1467.247766
Netflix	394	1546.567562
spotify	263	1219.636541

multiindexed['debit'].loc['Music'].loc['Netflix']

np.float64(1546.5675619264123)

%%file weeklydata.csv
symbol,day,price
IBM,Monday,111.71436961893693
IBM,Tuesday,141.21220022208635
IBM,Wednesday,112.40571010053796
IBM,Thursday,137.54133351926248
IBM,Friday,140.25154281801224
MICROSOFT,Monday,235.0403622499107
MICROSOFT,Tuesday,225.0206535036475
MICROSOFT,Wednesday,216.10342426936444
MICROSOFT,Thursday,200.38038844494193
MICROSOFT,Friday,235.80850482793264
APPLE,Monday,321.49182055844256
APPLE,Tuesday,340.63612771662815
APPLE,Wednesday,303.9065277507285
APPLE,Thursday,338.1350605764038
APPLE,Friday,318.3912296144338

Writing weeklydata.csv

pd.read_csv("weeklydata.csv")

	symbol	day	price
0	IBM	Monday	111.714370
1	IBM	Tuesday	141.212200
2	IBM	Wednesday	112.405710
3	IBM	Thursday	137.541334
4	IBM	Friday	140.251543
5	MICROSOFT	Monday	235.040362
6	MICROSOFT	Tuesday	225.020654
7	MICROSOFT	Wednesday	216.103424
8	MICROSOFT	Thursday	200.380388
9	MICROSOFT	Friday	235.808505
10	APPLE	Monday	321.491821
11	APPLE	Tuesday	340.636128
12	APPLE	Wednesday	303.906528
13	APPLE	Thursday	338.135061
14	APPLE	Friday	318.391230

problem - Write a function weekly_average to find weekly average from given index data for given ticker

>>> weekly_average(dataframe, "IBM")

def weekly_average(dataframe, ticker):
    return dataframe.price[dataframe.symbol==ticker].mean()

df = pd.read_csv("weeklydata.csv")
weekly_average(df, 'IBM')

np.float64(128.62503125576717)

df.groupby('symbol').mean(numeric_only=True)['price'].loc['IBM']

np.float64(128.62503125576717)

Combining data

%%file stocks1.csv
symbol,value,high,low
APPLE,421,422.0,420.4
AT&T,328,328.0,335.0
IBM,123,124.5,120.0
NIKE,234,235.5,230.0

Overwriting stocks1.csv

%%file stocks2.csv
symbol,value,high,low
AGILENT,521,522.0,520.4
XEROX,428,428.0,435.0
TESLA,623,624.5,620.0

Writing stocks2.csv

df1 = pd.read_csv("stocks1.csv")

df2 = pd.read_csv("stocks2.csv")

df1

	symbol	value	high	low
0	APPLE	421	422.0	420.4
1	AT&T	328	328.0	335.0
2	IBM	123	124.5	120.0
3	NIKE	234	235.5	230.0

df2

	symbol	value	high	low
0	AGILENT	521	522.0	520.4
1	XEROX	428	428.0	435.0
2	TESLA	623	624.5	620.0

pd.concat([df1, df2])

	symbol	value	high	low
0	APPLE	421	422.0	420.4
1	AT&T	328	328.0	335.0
2	IBM	123	124.5	120.0
3	NIKE	234	235.5	230.0
0	AGILENT	521	522.0	520.4
1	XEROX	428	428.0	435.0
2	TESLA	623	624.5	620.0

df3 = pd.concat([df1, df2])

df3

	symbol	value	high	low
0	APPLE	421	422.0	420.4
1	AT&T	328	328.0	335.0
2	IBM	123	124.5	120.0
3	NIKE	234	235.5	230.0
0	AGILENT	521	522.0	520.4
1	XEROX	428	428.0	435.0
2	TESLA	623	624.5	620.0

df4 = df3.set_index('symbol')

df3

	symbol	value	high	low
0	APPLE	421	422.0	420.4
1	AT&T	328	328.0	335.0
2	IBM	123	124.5	120.0
3	NIKE	234	235.5	230.0
0	AGILENT	521	522.0	520.4
1	XEROX	428	428.0	435.0
2	TESLA	623	624.5	620.0

df4

	value	high	low
symbol
APPLE	421	422.0	420.4
AT&T	328	328.0	335.0
IBM	123	124.5	120.0
NIKE	234	235.5	230.0
AGILENT	521	522.0	520.4
XEROX	428	428.0	435.0
TESLA	623	624.5	620.0

df3.set_index('symbol', inplace=True)

df3

	value	high	low
symbol
APPLE	421	422.0	420.4
AT&T	328	328.0	335.0
IBM	123	124.5	120.0
NIKE	234	235.5	230.0
AGILENT	521	522.0	520.4
XEROX	428	428.0	435.0
TESLA	623	624.5	620.0

hi_low = df3[['high','low']] # list columns

hi_low

	high	low
symbol
APPLE	422.0	420.4
AT&T	328.0	335.0
IBM	124.5	120.0
NIKE	235.5	230.0
AGILENT	522.0	520.4
XEROX	428.0	435.0
TESLA	624.5	620.0

value = df3[['value']]

value

	value
symbol
APPLE	421
AT&T	328
IBM	123
NIKE	234
AGILENT	521
XEROX	428
TESLA	623

hi_low

	high	low
symbol
APPLE	422.0	420.4
AT&T	328.0	335.0
IBM	124.5	120.0
NIKE	235.5	230.0
AGILENT	522.0	520.4
XEROX	428.0	435.0
TESLA	624.5	620.0

value

	value
symbol
APPLE	421
AT&T	328
IBM	123
NIKE	234
AGILENT	521
XEROX	428
TESLA	623

hi_low.to_csv("hi_low.csv")

value.to_csv("value.csv")

!cat hi_low.csv

symbol,high,low
APPLE,422.0,420.4
AT&T,328.0,335.0
IBM,124.5,120.0
NIKE,235.5,230.0
AGILENT,522.0,520.4
XEROX,428.0,435.0
TESLA,624.5,620.0

!cat value.csv

symbol,value
APPLE,421
AT&T,328
IBM,123
NIKE,234
AGILENT,521
XEROX,428
TESLA,623

df1 = pd.read_csv("hi_low.csv")

df2 = pd.read_csv("value.csv")

df1

	symbol	high	low
0	APPLE	422.0	420.4
1	AT&T	328.0	335.0
2	IBM	124.5	120.0
3	NIKE	235.5	230.0
4	AGILENT	522.0	520.4
5	XEROX	428.0	435.0
6	TESLA	624.5	620.0

df2

	symbol	value
0	APPLE	421
1	AT&T	328
2	IBM	123
3	NIKE	234
4	AGILENT	521
5	XEROX	428
6	TESLA	623

pd.merge(df1, df2, on='symbol')

	symbol	high	low	value
0	APPLE	422.0	420.4	421
1	AT&T	328.0	335.0	328
2	IBM	124.5	120.0	123
3	NIKE	235.5	230.0	234
4	AGILENT	522.0	520.4	521
5	XEROX	428.0	435.0	428
6	TESLA	624.5	620.0	623