import pandas as pd


!pip install pandas

Requirement already satisfied: pandas in /home/vikrant/usr/local/jupyter-py3.10/lib/python3.10/site-packages (1.4.2)
Requirement already satisfied: pytz>=2020.1 in /home/vikrant/usr/local/jupyter-py3.10/lib/python3.10/site-packages (from pandas) (2022.1)
Requirement already satisfied: numpy>=1.21.0 in /home/vikrant/usr/local/jupyter-py3.10/lib/python3.10/site-packages (from pandas) (1.22.3)
Requirement already satisfied: python-dateutil>=2.8.1 in /home/vikrant/usr/local/jupyter-py3.10/lib/python3.10/site-packages (from pandas) (2.8.2)
Requirement already satisfied: six>=1.5 in /home/vikrant/usr/local/jupyter-py3.10/lib/python3.10/site-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)

[notice] A new release of pip available: 22.1.2 -> 22.2.2
[notice] To update, run: pip install --upgrade pip


import pandas as pd


!python -m pip install pandas

Requirement already satisfied: pandas in /home/vikrant/usr/local/jupyter-py3.10/lib/python3.10/site-packages (1.4.2)
Requirement already satisfied: pytz>=2020.1 in /home/vikrant/usr/local/jupyter-py3.10/lib/python3.10/site-packages (from pandas) (2022.1)
Requirement already satisfied: numpy>=1.21.0 in /home/vikrant/usr/local/jupyter-py3.10/lib/python3.10/site-packages (from pandas) (1.22.3)
Requirement already satisfied: python-dateutil>=2.8.1 in /home/vikrant/usr/local/jupyter-py3.10/lib/python3.10/site-packages (from pandas) (2.8.2)
Requirement already satisfied: six>=1.5 in /home/vikrant/usr/local/jupyter-py3.10/lib/python3.10/site-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)

[notice] A new release of pip available: 22.1.2 -> 22.2.2
[notice] To update, run: pip install --upgrade pip


import pandas as pd


data = pd.read_csv("https://raw.githubusercontent.com/vikipedia/python-trainings/master/online_course/source/module2/wallet.csv")


data


pd.read_csv("wallet.csv") # you can give filepath or url where the file is stored


url = "https://raw.githubusercontent.com/vikipedia/python-trainings/master/online_course/source/module2/wallet.csv"


pd.read_csv(url, columns=['date','category','description','debit'])

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Input In [6], in <cell line: 1>()
----> 1 pd.read_csv(url, columns=['date','category','description','debit'])

File ~/usr/local/jupyter-py3.10/lib/python3.10/site-packages/pandas/util/_decorators.py:311, in deprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper(*args, **kwargs)
    305 if len(args) > num_allow_args:
    306     warnings.warn(
    307         msg.format(arguments=arguments),
    308         FutureWarning,
    309         stacklevel=stacklevel,
    310     )
--> 311 return func(*args, **kwargs)

TypeError: read_csv() got an unexpected keyword argument 'columns'


pd.read_csv(url, usecols=['date','category','description','debit'])


pd.read_csv(url, usecols=['date','category'])


wallet = pd.read_csv(url, usecols=['date','category','description','debit'])


wallet


wallet.head() # show first five lines


wallet.tail() # show last five lines


wallet.describe() # describe numeric columns by giving basic statistical information about it


wallet.tail(10)


help(wallet.head)

Help on method head in module pandas.core.generic:

head(n: 'int' = 5) -> 'NDFrameT' method of pandas.core.frame.DataFrame instance
    Return the first `n` rows.
    
    This function returns the first `n` rows for the object based
    on position. It is useful for quickly testing if your object
    has the right type of data in it.
    
    For negative values of `n`, this function returns all rows except
    the last `n` rows, equivalent to ``df[:-n]``.
    
    Parameters
    ----------
    n : int, default 5
        Number of rows to select.
    
    Returns
    -------
    same type as caller
        The first `n` rows of the caller object.
    
    See Also
    --------
    DataFrame.tail: Returns the last `n` rows.
    
    Examples
    --------
    >>> df = pd.DataFrame({'animal': ['alligator', 'bee', 'falcon', 'lion',
    ...                    'monkey', 'parrot', 'shark', 'whale', 'zebra']})
    >>> df
          animal
    0  alligator
    1        bee
    2     falcon
    3       lion
    4     monkey
    5     parrot
    6      shark
    7      whale
    8      zebra
    
    Viewing the first 5 lines
    
    >>> df.head()
          animal
    0  alligator
    1        bee
    2     falcon
    3       lion
    4     monkey
    
    Viewing the first `n` lines (three in this case)
    
    >>> df.head(3)
          animal
    0  alligator
    1        bee
    2     falcon
    
    For negative values of `n`
    
    >>> df.head(-3)
          animal
    0  alligator
    1        bee
    2     falcon
    3       lion
    4     monkey
    5     parrot


!pip install lxml

Requirement already satisfied: lxml in /home/vikrant/usr/local/jupyter-py3.10/lib/python3.10/site-packages (4.9.1)

[notice] A new release of pip available: 22.1.2 -> 22.2.2
[notice] To update, run: pip install --upgrade pip


money_control = pd.read_html("https://www.moneycontrol.com/markets/indian-indices/")


type(money_control)

list


len(money_control)

7


money_control[0]


money_control[1]


money_control[2]


df = money_control[2]


df.head()


df.describe()


len(money_control)

7


money_control

[  Stock Name   Sector      LTP   Change     %Chg
 0    No Data  No Data  No Data  No Data  No Data,
   Stock Name   Sector      LTP   Change     %Chg
 0    No Data  No Data  No Data  No Data  No Data,
          Stock Name                             Sector       LTP  Change  %Chg
 0     IndusInd Bank             Banks - Private Sector   1074.05  -31.05 -2.81
 1              BPCL                         Refineries    339.45   -7.60 -2.19
 2   Power Grid Corp  Power - Generation & Distribution    227.15   -4.20 -1.82
 3   Apollo Hospital       Hospitals & Medical Services   4190.20  -76.45 -1.79
 4        Coal India                  Mining & Minerals    218.50   -3.25 -1.47
 5               HUL                      Personal Care   2645.50  -36.55 -1.36
 6     Bajaj Finserv              Finance - Investments  16580.95 -224.95 -1.34
 7        Sun Pharma                    Pharmaceuticals    904.70  -12.05 -1.31
 8         HDFC Life            Life & Health Insurance    579.35   -7.40 -1.26
 9     Bajaj Finance                     Finance - NBFC   7394.80  -93.45 -1.25
 10       ICICI Bank             Banks - Private Sector    874.60  -10.50 -1.19
 11  TATA Cons. Prod         Plantations - Tea & Coffee    798.50   -9.35 -1.16
 12             NTPC  Power - Generation & Distribution    159.00   -1.80 -1.12
 13      Tata Motors                 Auto - LCVs & HCVs    479.40   -5.40 -1.11
 14              SBI              Banks - Public Sector    526.50   -5.85 -1.10
 15        HDFC Bank             Banks - Private Sector   1495.60  -16.10 -1.07
 16     Asian Paints                 Paints & Varnishes   3497.90  -37.45 -1.06
 17    Maruti Suzuki                Auto - Cars & Jeeps   8855.00  -92.70 -1.04
 18         Hindalco                       Iron & Steel    433.50   -4.55 -1.04
 19        Britannia                    Food Processing   3656.70  -36.35 -0.98
 20    Hero Motocorp              Auto - 2 & 3 Wheelers   2882.00  -26.20 -0.90
 21            Cipla                    Pharmaceuticals   1023.00   -9.20 -0.89
 22             HDFC                  Finance - Housing   2476.65  -21.30 -0.85
 23       Tata Steel                       Iron & Steel    111.60   -0.95 -0.84
 24        JSW Steel                      Steel - Large    668.50   -4.90 -0.73
 25    Shree Cements                     Cement - Major  21474.85 -147.65 -0.68
 26       Divis Labs                    Pharmaceuticals   3720.00  -23.60 -0.63
 27        Axis Bank             Banks - Private Sector    760.50   -4.55 -0.59
 28              UPL                          Chemicals    769.60   -4.35 -0.56
 29   Dr Reddys Labs                    Pharmaceuticals   4202.00  -23.10 -0.55
 30         Reliance                         Refineries   2647.55  -13.75 -0.52
 31           Grasim                        Diversified   1605.35   -8.40 -0.52
 32              ITC                        Diversified    314.75   -0.90 -0.29
 33  SBI Life Insura                        Diversified   1320.55   -3.65 -0.28
 34              M&M                Auto - Cars & Jeeps   1257.80   -3.35 -0.27
 35    Titan Company                      Miscellaneous   2475.15   -3.55 -0.14
 36  UltraTechCement                     Cement - Major   6696.00   -7.95 -0.12
 37           Nestle                    Food Processing  19554.30   -3.35 -0.02,
         Stock Name                        Sector      LTP  Change  %Chg
 0    Tech Mahindra          Computers - Software  1124.90   19.75  1.79
 1           Larsen      Infrastructure - General  1924.05   29.50  1.56
 2          Infosys          Computers - Software  1599.80   16.85  1.06
 3            Wipro          Computers - Software   438.60    3.55  0.82
 4             ONGC  Oil Drilling And Exploration   136.60    1.05  0.77
 5         HCL Tech          Computers - Software   979.50    6.00  0.62
 6    Eicher Motors            Auto - LCVs & HCVs  3431.55   20.15  0.59
 7              TCS          Computers - Software  3398.80   17.55  0.52
 8   Kotak Mahindra        Banks - Private Sector  1905.95    1.45  0.08
 9    Bharti Airtel  Telecommunications - Service   733.60    0.40  0.05
 10      Bajaj Auto         Auto - 2 & 3 Wheelers  4062.15    1.50  0.04,
     Stock Name                    Sector    LTP  Change  %Chg
 0  Adani Ports  Infrastructure - General  867.8   34.75  4.17,
   Stock Name   Sector      LTP   Change     %Chg
 0    No Data  No Data  No Data  No Data  No Data,
     0   1   2   3   4   5   6   7   8   9
 0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
 1 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
 2 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
 3 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
 4 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
 5 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
 6 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
 7 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
 8 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
 9 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN]


money_control[3]


money_control[4]


money_control[5]

df


df.describe()


df['LTP'] # access LTP column from dataframe

0      1074.05
1       339.45
2       227.15
3      4190.20
4       218.50
5      2645.50
6     16580.95
7       904.70
8       579.35
9      7394.80
10      874.60
11      798.50
12      159.00
13      479.40
14      526.50
15     1495.60
16     3497.90
17     8855.00
18      433.50
19     3656.70
20     2882.00
21     1023.00
22     2476.65
23      111.60
24      668.50
25    21474.85
26     3720.00
27      760.50
28      769.60
29     4202.00
30     2647.55
31     1605.35
32      314.75
33     1320.55
34     1257.80
35     2475.15
36     6696.00
37    19554.30
Name: LTP, dtype: float64


ltp  = df['LTP']


type(ltp)

pandas.core.series.Series

df


pd.Series([1, 2, 3, 4]) # it will make aseries of type integer

0    1
1    2
2    3
3    4
dtype: int64


pd.Series([45.6, 20.1, 30.2, 4.5]) # a series of type float

0    45.6
1    20.1
2    30.2
3     4.5
dtype: float64


pd.Series(["a","b","v","f"])

0    a
1    b
2    v
3    f
dtype: object

ltp

0      1074.05
1       339.45
2       227.15
3      4190.20
4       218.50
5      2645.50
6     16580.95
7       904.70
8       579.35
9      7394.80
10      874.60
11      798.50
12      159.00
13      479.40
14      526.50
15     1495.60
16     3497.90
17     8855.00
18      433.50
19     3656.70
20     2882.00
21     1023.00
22     2476.65
23      111.60
24      668.50
25    21474.85
26     3720.00
27      760.50
28      769.60
29     4202.00
30     2647.55
31     1605.35
32      314.75
33     1320.55
34     1257.80
35     2475.15
36     6696.00
37    19554.30
Name: LTP, dtype: float64


values = pd.Series([12.0,23.3,43.6], index=['a','b','c'])


values

a    12.0
b    23.3
c    43.6
dtype: float64


values['a'] # access by label

12.0


values[0] # access by index

12.0


labels = ["APPLE", "AT&T", "IBM", "NIKE"]
value = pd.Series([234.5, 221.3, 125.3, 100.5], index=labels)
high = pd.Series([240, 225, 130, 110.0], index=labels)
low = pd.Series([230.0,220.0,120.0,100.0], index=labels)
volume = pd.Series([100,200,300,400], index=labels)
stocks = pd.DataFrame({ "value":value,
                       "high": high,
                       "low": low,
                       "volume": volume})


stocks


stocks1


pd.DataFrame({"value":[234.5, 221.3, 125.3, 100.5],
                       "high": [240, 225, 130, 110.0],
                       "low": [230.0,220.0,120.0,100.0],
                       "volume": [100,200,300,400]},
                       )


stocks.value # if column does not have space in it then we can also acess column like

APPLE    234.5
AT&T     221.3
IBM      125.3
NIKE     100.5
Name: value, dtype: float64


stocks['value']

APPLE    234.5
AT&T     221.3
IBM      125.3
NIKE     100.5
Name: value, dtype: float64


stocks.value.std()

67.37863162754198

ltp

0      1074.05
1       339.45
2       227.15
3      4190.20
4       218.50
5      2645.50
6     16580.95
7       904.70
8       579.35
9      7394.80
10      874.60
11      798.50
12      159.00
13      479.40
14      526.50
15     1495.60
16     3497.90
17     8855.00
18      433.50
19     3656.70
20     2882.00
21     1023.00
22     2476.65
23      111.60
24      668.50
25    21474.85
26     3720.00
27      760.50
28      769.60
29     4202.00
30     2647.55
31     1605.35
32      314.75
33     1320.55
34     1257.80
35     2475.15
36     6696.00
37    19554.30
Name: LTP, dtype: float64


ltp.min()

111.6


ltp.max()

21474.85


ltp.mean()

3391.881578947369


ltp.describe()

count       38.000000
mean      3391.881579
std       5157.015842
min        111.600000
25%        601.637500
50%       1289.175000
75%       3617.000000
max      21474.850000
Name: LTP, dtype: float64


stocks.value

APPLE    234.5
AT&T     221.3
IBM      125.3
NIKE     100.5
Name: value, dtype: float64


stocks.value['APPLE']

234.5


stocks


stocks['APPLE'] # this will not row! on dataframe this is a way to access column

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
File ~/usr/local/jupyter-py3.10/lib/python3.10/site-packages/pandas/core/indexes/base.py:3621, in Index.get_loc(self, key, method, tolerance)
   3620 try:
-> 3621     return self._engine.get_loc(casted_key)
   3622 except KeyError as err:

File ~/usr/local/jupyter-py3.10/lib/python3.10/site-packages/pandas/_libs/index.pyx:136, in pandas._libs.index.IndexEngine.get_loc()

File ~/usr/local/jupyter-py3.10/lib/python3.10/site-packages/pandas/_libs/index.pyx:163, in pandas._libs.index.IndexEngine.get_loc()

File pandas/_libs/hashtable_class_helper.pxi:5198, in pandas._libs.hashtable.PyObjectHashTable.get_item()

File pandas/_libs/hashtable_class_helper.pxi:5206, in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 'APPLE'

The above exception was the direct cause of the following exception:

KeyError                                  Traceback (most recent call last)
Input In [75], in <cell line: 1>()
----> 1 stocks['APPLE']

File ~/usr/local/jupyter-py3.10/lib/python3.10/site-packages/pandas/core/frame.py:3505, in DataFrame.__getitem__(self, key)
   3503 if self.columns.nlevels > 1:
   3504     return self._getitem_multilevel(key)
-> 3505 indexer = self.columns.get_loc(key)
   3506 if is_integer(indexer):
   3507     indexer = [indexer]

File ~/usr/local/jupyter-py3.10/lib/python3.10/site-packages/pandas/core/indexes/base.py:3623, in Index.get_loc(self, key, method, tolerance)
   3621     return self._engine.get_loc(casted_key)
   3622 except KeyError as err:
-> 3623     raise KeyError(key) from err
   3624 except TypeError:
   3625     # If we have a listlike key, _check_indexing_error will raise
   3626     #  InvalidIndexError. Otherwise we fall through and re-raise
   3627     #  the TypeError.
   3628     self._check_indexing_error(key)

KeyError: 'APPLE'


stocks.loc['APPLE',:] # row location

value     234.5
high      240.0
low       230.0
volume    100.0
Name: APPLE, dtype: float64


stocks


stocks.loc['APPLE'] # just take a row

value     234.5
high      240.0
low       230.0
volume    100.0
Name: APPLE, dtype: float64


stocks.loc[['APPLE','IBM']]


stocks.loc[['APPLE','IBM'],'value']

APPLE    234.5
IBM      125.3
Name: value, dtype: float64


stocks.loc['APPLE', 'volume']

100


stocks.loc['APPLE',:]

value     234.5
high      240.0
low       230.0
volume    100.0
Name: APPLE, dtype: float64


stocks.iloc[0,0] # 0th row and 0th column

234.5


stocks.iloc[0:3,:]


stocks.tail()# there is this round bracket ..which says this is a method


stocks.loc()# not to be called like this

<pandas.core.indexing._LocIndexer at 0x7f1edf929850>


stocks.loc['IBM']

value     125.3
high      130.0
low       120.0
volume    300.0
Name: IBM, dtype: float64


value

APPLE    234.5
AT&T     221.3
IBM      125.3
NIKE     100.5
dtype: float64


value*2

APPLE    469.0
AT&T     442.6
IBM      250.6
NIKE     201.0
dtype: float64


value

APPLE    234.5
AT&T     221.3
IBM      125.3
NIKE     100.5
dtype: float64


volume

APPLE    100
AT&T     200
IBM      300
NIKE     400
dtype: int64


value*volume

APPLE    23450.0
AT&T     44260.0
IBM      37590.0
NIKE     40200.0
dtype: float64


s1 = pd.Series([1,2,3,4], index=['a','b','c','d'])
s2 = pd.Series([4,3,2,1], index=['d','c','b','a'])

s1

a    1
b    2
c    3
d    4
dtype: int64

s2

d    4
c    3
b    2
a    1
dtype: int64


s1 + s2

a    2
b    4
c    6
d    8
dtype: int64


s1*s2

a     1
b     4
c     9
d    16
dtype: int64


s1 = pd.Series([1,2,3,4], index=['a','b','c','d'])
s2 = pd.Series([3,2,1], index=['c','b','a'])

s1

a    1
b    2
c    3
d    4
dtype: int64

s2

c    3
b    2
a    1
dtype: int64


s1+s2

a    2.0
b    4.0
c    6.0
d    NaN
dtype: float64

s1

a    1
b    2
c    3
d    4
dtype: int64


s1 + s2

a    2.0
b    4.0
c    6.0
d    NaN
dtype: float64


wallet

s1

a    1
b    2
c    3
d    4
dtype: int64


s1  + 1

a    2
b    3
c    4
d    5
dtype: int64


s1 > 2 # boolean searies

a    False
b    False
c     True
d     True
dtype: bool


s1[s1>2]

c    3
d    4
dtype: int64


wallet.debit[wallet.debit>400]

0     421.207327
4     494.128492
7     457.183104
9     443.618884
11    434.495468
14    485.297743
18    433.822404
22    489.143483
24    498.100496
25    483.315864
33    449.248030
34    499.858182
35    441.602143
36    472.941439
38    494.124399
41    425.187191
43    486.033933
50    463.001875
52    403.610070
55    423.749708
62    497.770860
75    425.625291
77    482.152343
78    415.372894
81    470.080995
82    411.142701
83    451.584407
94    431.185537
96    467.554562
98    442.096469
Name: debit, dtype: float64


wallet[wallet.debit>450]


wallet[wallet.category == 'Books']


def expenses_for_category(walletdata, category):
    return walletdata[walletdata.category==category]


expenses_for_category(wallet, "Music")


print(url)

https://raw.githubusercontent.com/vikipedia/python-trainings/master/online_course/source/module2/wallet.csv


wallet = pd.read_csv(url, usecols=['date','category','description','debit'])


music = expenses_for_category(wallet, "Music")


music


music.debit.sum()

4233.45186823271


def total_expenses_for_category(walletdata, category):
    df_category = walletdata[walletdata.category==category]
    return df_category.debit.sum()


total_expenses_for_category(wallet, "Music")

4233.45186823271


total_expenses_for_category(wallet, "Books")

4929.750393283798


wallet[wallet.description == 'Amazon']


wallet[ (wallet.description == 'Amazon') & (wallet.category=='Music')]


wallet.query("description == 'Amazon' & category=='Music'")


wallet[stocks.value>300] # the filter that you create should have same index as dataframe

/tmp/ipykernel_19555/494991953.py:1: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
  wallet[stocks.value>300] # the filter that you create should have same index as dataframe

---------------------------------------------------------------------------
IndexingError                             Traceback (most recent call last)
Input In [136], in <cell line: 1>()
----> 1 wallet[stocks.value>300]

File ~/usr/local/jupyter-py3.10/lib/python3.10/site-packages/pandas/core/frame.py:3496, in DataFrame.__getitem__(self, key)
   3494 # Do we have a (boolean) 1d indexer?
   3495 if com.is_bool_indexer(key):
-> 3496     return self._getitem_bool_array(key)
   3498 # We are left with two options: a single key, and a collection of keys,
   3499 # We interpret tuples as collections only for non-MultiIndex
   3500 is_single_key = isinstance(key, tuple) or not is_list_like(key)

File ~/usr/local/jupyter-py3.10/lib/python3.10/site-packages/pandas/core/frame.py:3549, in DataFrame._getitem_bool_array(self, key)
   3543     raise ValueError(
   3544         f"Item wrong length {len(key)} instead of {len(self.index)}."
   3545     )
   3547 # check_bool_indexer will throw exception if Series key cannot
   3548 # be reindexed to match DataFrame rows
-> 3549 key = check_bool_indexer(self.index, key)
   3550 indexer = key.nonzero()[0]
   3551 return self._take_with_is_copy(indexer, axis=0)

File ~/usr/local/jupyter-py3.10/lib/python3.10/site-packages/pandas/core/indexing.py:2383, in check_bool_indexer(index, key)
   2381     mask = isna(result._values)
   2382     if mask.any():
-> 2383         raise IndexingError(
   2384             "Unalignable boolean Series provided as "
   2385             "indexer (index of the boolean Series and of "
   2386             "the indexed object do not match)."
   2387         )
   2388     return result.astype(bool)._values
   2389 if is_object_dtype(key):
   2390     # key might be object-dtype bool, check_array_indexer needs bool array

IndexingError: Unalignable boolean Series provided as indexer (index of the boolean Series and of the indexed object do not match).


wallet[wallet.debit>300]


greater_than_475 = wallet.debit > 475


greater_than_475

0     False
1     False
2     False
3     False
4      True
      ...  
95    False
96    False
97    False
98    False
99    False
Name: debit, Length: 100, dtype: bool


wallet[greater_than_475] # all rows with True will be kept and row with False will be removed


wallet.category[greater_than_475]

4       Books
14     Travel
22       Food
24      Books
25       Food
34    Utility
38     Travel
43    Utility
62      Books
77      Books
Name: category, dtype: object


stocks


f = stocks.value > 200

f

APPLE     True
AT&T      True
IBM      False
NIKE     False
Name: value, dtype: bool


stocks[f]


value

APPLE    234.5
AT&T     221.3
IBM      125.3
NIKE     100.5
dtype: float64


value[f]

APPLE    234.5
AT&T     221.3
dtype: float64


wallet[f]

/tmp/ipykernel_19555/2995545018.py:1: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
  wallet[f]

---------------------------------------------------------------------------
IndexingError                             Traceback (most recent call last)
Input In [151], in <cell line: 1>()
----> 1 wallet[f]

File ~/usr/local/jupyter-py3.10/lib/python3.10/site-packages/pandas/core/frame.py:3496, in DataFrame.__getitem__(self, key)
   3494 # Do we have a (boolean) 1d indexer?
   3495 if com.is_bool_indexer(key):
-> 3496     return self._getitem_bool_array(key)
   3498 # We are left with two options: a single key, and a collection of keys,
   3499 # We interpret tuples as collections only for non-MultiIndex
   3500 is_single_key = isinstance(key, tuple) or not is_list_like(key)

File ~/usr/local/jupyter-py3.10/lib/python3.10/site-packages/pandas/core/frame.py:3549, in DataFrame._getitem_bool_array(self, key)
   3543     raise ValueError(
   3544         f"Item wrong length {len(key)} instead of {len(self.index)}."
   3545     )
   3547 # check_bool_indexer will throw exception if Series key cannot
   3548 # be reindexed to match DataFrame rows
-> 3549 key = check_bool_indexer(self.index, key)
   3550 indexer = key.nonzero()[0]
   3551 return self._take_with_is_copy(indexer, axis=0)

File ~/usr/local/jupyter-py3.10/lib/python3.10/site-packages/pandas/core/indexing.py:2383, in check_bool_indexer(index, key)
   2381     mask = isna(result._values)
   2382     if mask.any():
-> 2383         raise IndexingError(
   2384             "Unalignable boolean Series provided as "
   2385             "indexer (index of the boolean Series and of "
   2386             "the indexed object do not match)."
   2387         )
   2388     return result.astype(bool)._values
   2389 if is_object_dtype(key):
   2390     # key might be object-dtype bool, check_array_indexer needs bool array

IndexingError: Unalignable boolean Series provided as indexer (index of the boolean Series and of the indexed object do not match).

s1

a    1
b    2
c    3
d    4
dtype: int64

s2

c    3
b    2
a    1
dtype: int64


s1[s2]

d    4
c    3
b    2
dtype: int64


x = 9


def check(x):
    if x:
        print(x, "True")
    else:
        print(x, "False")


check(9)

9 True


check(0)

0 False


check([])

[] False


check([1, 2, 3])

[1, 2, 3] True

s1

a    1
b    2
c    3
d    4
dtype: int64


a = pd.Series([1, 2, 3, 4, 5])
b = pd.Series([1, 1, 1, 0, 1])

a

0    1
1    2
2    3
3    4
4    5
dtype: int64

b

0    1
1    1
2    1
3    0
4    1
dtype: int64


a[b] # I don't know what this is doing!

1    2
1    2
1    2
0    1
1    2
dtype: int64


print(url)

https://raw.githubusercontent.com/vikipedia/python-trainings/master/online_course/source/module2/wallet.csv


import math as m


m.pi

3.141592653589793


!pip install openpyxl

Requirement already satisfied: openpyxl in /home/vikrant/usr/local/jupyter-py3.10/lib/python3.10/site-packages (3.0.10)
Requirement already satisfied: et-xmlfile in /home/vikrant/usr/local/jupyter-py3.10/lib/python3.10/site-packages (from openpyxl) (1.1.0)

[notice] A new release of pip available: 22.1.2 -> 22.2.2
[notice] To update, run: pip install --upgrade pip


pd.read_excel("https://raw.githubusercontent.com/vikipedia/python-trainings/master/online_course/source/module2/wallet.xlsx")


wallet


categorygroup = wallet.groupby('category')


categorygroup.sum()


wallet.groupby('category').sum()


wallet.groupby('category').mean()


df = pd.DataFrame({"A":list("XYZ"*10),
                  "N1":range(200, 230),
                  "N2":range(30)})

df


df.groupby("A")['N1'].sum()

A
X    2135
Y    2145
Z    2155
Name: N1, dtype: int64


wallet[['date','debit']]


df.groupby("A")[['N1',"N2"]].sum() # take only specified columns


wallet.groupby('category')

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7f1edcf7b970>


wallet = pd.read_csv(url)


wallet


wallet.groupby('category').sum()


wallet.groupby('category').sum().loc['Music']

Unnamed: 0     970.000000
debit         4233.451868
Name: Music, dtype: float64


musicdf = wallet[wallet.category=='Music']
musicdf.groupby('category').sum()


wallet = wallet[["date","category","description","debit"]]


wallet


wallet.groupby(['category', 'description']).sum()


wallet.groupby(['category', 'description']).sum().loc[('Books','Flipcart')]

debit    2503.255216
Name: (Books, Flipcart), dtype: float64


wallet.groupby('description').max()


wallet.groupby(['category','description']).max()


wallet['custom_category'] = "Not_a_book"


f = (wallet.description == 'Amazon') | (wallet.description == 'Flipcart')
wallet.custom_category[f] = "book"

/tmp/ipykernel_19555/3913929795.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wallet.custom_category[f] = "book"


wallet.groupby(['category','custom_category']).sum()


wallet = wallet[["date","category","description","debit"]]


wallet[f].groupby(['category', 'description']).sum()


df1 = wallet[f].groupby(['category', 'description']).sum()

df1


df1.reset_index().groupby('category').sum()


stocks


stocks.to_csv("stocks_dataframe.csv")


!cat stocks_dataframe.csv

,value,high,low,volume
APPLE,234.5,240.0,230.0,100
AT&T,221.3,225.0,220.0,200
IBM,125.3,130.0,120.0,300
NIKE,100.5,110.0,100.0,400


stocks


stocks.index.name = "ticker"


stocks


stocks.to_csv("stocks_dataframe1.csv")


!cat stocks_dataframe1.csv

ticker,value,high,low,volume
APPLE,234.5,240.0,230.0,100
AT&T,221.3,225.0,220.0,200
IBM,125.3,130.0,120.0,300
NIKE,100.5,110.0,100.0,400


labels = ["APPLE","AT&T","IBM","NIKE"]
value = pd.Series([234.5, 221.6, 125.7, 100.5], index=labels)
high = pd.Series([240.32, 222.5, 127.3, 105.0], index=labels)
low = pd.Series([233.0, 220.0, 123.0, 104.0], index=labels)
volume = pd.Series([100, 200, 50, 1000], index=labels)
stocks = pd.DataFrame({"value":value, "high":high, "low":low, "volume":volume})

stocks1 = pd.DataFrame({
        "value" : [125, 500.0, 300.4, 423.9],
        "low" : [125.0, 490.0, 299.5, 421.1],
        "high" : [130.0, 500.0, 305.0, 425.5],
        "volume" : [123, 50, 100, 80]
        },
        index = ["BELL","XEROX","FORD","TESLA"]
)


stocks


stocks1


pd.concat([stocks, stocks1])


labels = ["APPLE","AT&T","IBM","NIKE"]
value = pd.Series([234.5, 221.6, 125.7, 100.5], index=labels)
high = pd.Series([240.32, 222.5, 127.3, 105.0], index=labels)
low = pd.Series([233.0, 220.0, 123.0, 104.0], index=labels)
volume = pd.Series([100, 200, 50, 1000], index=labels)
stocks = pd.DataFrame({"value":value, "high":high, "low":low, "volume":volume})

stocks1 = pd.DataFrame({
        "value" : [125, 500.0, 300.4, 423.9],
        "low" : [125.0, 490.0, 299.5, 421.1],
        "high" : [130.0, 500.0, 305.0, 425.5],
        },
        index = ["BELL","XEROX","FORD","TESLA"]
)


stocks


stocks1


pd.concat([stocks, stocks1])


combined_data = pd.concat([stocks, stocks1])


combined_data


combined_data.describe()


combined_data.isna().sum() # seeing how many missing values are there in a dataframe

value     0
high      0
low       0
volume    4
dtype: int64


df1 = pd.DataFrame(
{
"a":[1,2, 3, 4, 5],
"b":[34, 56, 76, 87, 9],
"labels": ["x","y","z","m","n"]
}
)

df2 = pd.DataFrame(
{
"c":[1,2, 3, 4],
"d":[34, 56, 76, 87],
"labels":["x","y","z","m"]}
)

df1

df2


pd.merge(df2, df1)


pd.merge(df2, df1, on='labels')

	date	debit
0	2021-03-07 14:53:28.377359	421.207327
1	2020-10-08 09:53:28.377359	328.440080
2	2021-02-23 09:53:28.377359	244.679437
3	2020-11-01 14:53:28.377359	222.756318
4	2021-06-05 13:53:28.377359	494.128492
...	...	...
95	2021-07-19 13:53:28.377359	388.671213
96	2021-01-12 19:53:28.377359	467.554562
97	2021-03-25 11:53:28.377359	320.789434
98	2021-05-13 15:53:28.377359	442.096469
99	2020-10-11 16:53:28.377359	100.455501

		debit
category	description
Books	Amazon	1037.442802
	Amazon Kindle	1389.052376
	Flipcart	2503.255216
Food	Hotel	2752.174732
	Swiggy	1936.495366
	Zomato	3592.519075
Music	Amazon	1467.247766
	Netflix	1546.567562
	spotify	1219.636541
Travel	Auto	2210.428935
	Metro	1216.463665
	Taxi	2626.039276
Utility	Electricity	2885.064355
Utility	Phone	4677.202878

Python Virtual Training For Arcesium - Module III - Day 1¶

Pandas - Spreadsheet of python¶

Basic data strutcures of pandas Series¶

Selecting or Filtering¶

Grouping¶

Combining two dataframes¶

ANother way of combining dataframe¶

	debit
count	100.000000
mean	310.595905
std	121.178218
min	100.455501
25%	216.429447
50%	318.110776
75%	424.109079
max	499.858182

	date	category	description	debit
90	2020-12-01 14:53:28.377359	Music	Amazon	101.573276
91	2021-01-22 17:53:28.377359	Food	Hotel	232.663468
92	2021-01-12 19:53:28.377359	Travel	Taxi	356.842638
93	2021-01-11 09:53:28.377359	Utility	Electricity	111.720809
94	2021-01-04 13:53:28.377359	Utility	Phone	431.185537
95	2021-07-19 13:53:28.377359	Utility	Phone	388.671213
96	2021-01-12 19:53:28.377359	Books	Flipcart	467.554562
97	2021-03-25 11:53:28.377359	Utility	Phone	320.789434
98	2021-05-13 15:53:28.377359	Travel	Taxi	442.096469
99	2020-10-11 16:53:28.377359	Food	Hotel	100.455501

	Stock Name	Sector	LTP	Change	%Chg
0	IndusInd Bank	Banks - Private Sector	1074.05	-31.05	-2.81
1	BPCL	Refineries	339.45	-7.60	-2.19
2	Power Grid Corp	Power - Generation & Distribution	227.15	-4.20	-1.82
3	Apollo Hospital	Hospitals & Medical Services	4190.20	-76.45	-1.79
4	Coal India	Mining & Minerals	218.50	-3.25	-1.47
5	HUL	Personal Care	2645.50	-36.55	-1.36
6	Bajaj Finserv	Finance - Investments	16580.95	-224.95	-1.34
7	Sun Pharma	Pharmaceuticals	904.70	-12.05	-1.31
8	HDFC Life	Life & Health Insurance	579.35	-7.40	-1.26
9	Bajaj Finance	Finance - NBFC	7394.80	-93.45	-1.25
10	ICICI Bank	Banks - Private Sector	874.60	-10.50	-1.19
11	TATA Cons. Prod	Plantations - Tea & Coffee	798.50	-9.35	-1.16
12	NTPC	Power - Generation & Distribution	159.00	-1.80	-1.12
13	Tata Motors	Auto - LCVs & HCVs	479.40	-5.40	-1.11
14	SBI	Banks - Public Sector	526.50	-5.85	-1.10
15	HDFC Bank	Banks - Private Sector	1495.60	-16.10	-1.07
16	Asian Paints	Paints & Varnishes	3497.90	-37.45	-1.06
17	Maruti Suzuki	Auto - Cars & Jeeps	8855.00	-92.70	-1.04
18	Hindalco	Iron & Steel	433.50	-4.55	-1.04
19	Britannia	Food Processing	3656.70	-36.35	-0.98
20	Hero Motocorp	Auto - 2 & 3 Wheelers	2882.00	-26.20	-0.90
21	Cipla	Pharmaceuticals	1023.00	-9.20	-0.89
22	HDFC	Finance - Housing	2476.65	-21.30	-0.85
23	Tata Steel	Iron & Steel	111.60	-0.95	-0.84
24	JSW Steel	Steel - Large	668.50	-4.90	-0.73
25	Shree Cements	Cement - Major	21474.85	-147.65	-0.68
26	Divis Labs	Pharmaceuticals	3720.00	-23.60	-0.63
27	Axis Bank	Banks - Private Sector	760.50	-4.55	-0.59
28	UPL	Chemicals	769.60	-4.35	-0.56
29	Dr Reddys Labs	Pharmaceuticals	4202.00	-23.10	-0.55
30	Reliance	Refineries	2647.55	-13.75	-0.52
31	Grasim	Diversified	1605.35	-8.40	-0.52
32	ITC	Diversified	314.75	-0.90	-0.29
33	SBI Life Insura	Diversified	1320.55	-3.65	-0.28
34	M&M	Auto - Cars & Jeeps	1257.80	-3.35	-0.27
35	Titan Company	Miscellaneous	2475.15	-3.55	-0.14
36	UltraTechCement	Cement - Major	6696.00	-7.95	-0.12
37	Nestle	Food Processing	19554.30	-3.35	-0.02

	LTP	Change	%Chg
count	38.000000	38.000000	38.000000
mean	3391.881579	-27.044737	-0.969737
std	5157.015842	45.436558	0.574073
min	111.600000	-224.950000	-2.810000
25%	601.637500	-25.550000	-1.235000
50%	1289.175000	-8.800000	-1.010000
75%	3617.000000	-4.400000	-0.567500
max	21474.850000	-0.900000	-0.020000

	Stock Name	Sector	LTP	Change	%Chg
0	Tech Mahindra	Computers - Software	1124.90	19.75	1.79
1	Larsen	Infrastructure - General	1924.05	29.50	1.56
2	Infosys	Computers - Software	1599.80	16.85	1.06
3	Wipro	Computers - Software	438.60	3.55	0.82
4	ONGC	Oil Drilling And Exploration	136.60	1.05	0.77
5	HCL Tech	Computers - Software	979.50	6.00	0.62
6	Eicher Motors	Auto - LCVs & HCVs	3431.55	20.15	0.59
7	TCS	Computers - Software	3398.80	17.55	0.52
8	Kotak Mahindra	Banks - Private Sector	1905.95	1.45	0.08
9	Bharti Airtel	Telecommunications - Service	733.60	0.40	0.05
10	Bajaj Auto	Auto - 2 & 3 Wheelers	4062.15	1.50	0.04

	value	high	low	volume
APPLE	234.5	240.0	230.0	100
AT&T	221.3	225.0	220.0	200
IBM	125.3	130.0	120.0	300
NIKE	100.5	110.0	100.0	400

	debit
category
Books	4929.750393
Food	8281.189173
Music	4233.451868
Travel	6052.931876
Utility	7562.267233

	debit
category
Books	352.125028
Food	306.710710
Music	264.590742
Travel	378.308242
Utility	280.083972

	A	N1	N2
0	X	200	0
1	Y	201	1
2	Z	202	2
3	X	203	3
4	Y	204	4
5	Z	205	5
6	X	206	6
7	Y	207	7
8	Z	208	8
9	X	209	9
10	Y	210	10
11	Z	211	11
12	X	212	12
13	Y	213	13
14	Z	214	14
15	X	215	15
16	Y	216	16
17	Z	217	17
18	X	218	18
19	Y	219	19
20	Z	220	20
21	X	221	21
22	Y	222	22
23	Z	223	23
24	X	224	24
25	Y	225	25
26	Z	226	26
27	X	227	27
28	Y	228	28
29	Z	229	29

	date	category	debit
description
Amazon	2021-07-24 14:53:28.377359	Music	498.100496
Amazon Kindle	2021-04-16 11:53:28.377359	Books	497.770860
Auto	2021-08-15 10:53:28.377359	Travel	494.124399
Electricity	2021-08-23 17:53:28.377359	Utility	382.519510
Flipcart	2021-06-05 13:53:28.377359	Books	494.128492
Hotel	2021-08-22 17:53:28.377359	Food	483.315864
Metro	2021-08-15 17:53:28.377359	Travel	441.602143
Netflix	2021-08-17 09:53:28.377359	Music	354.940241
Phone	2021-08-17 13:53:28.377359	Utility	499.858182
Swiggy	2021-07-19 12:53:28.377359	Food	328.440080
Taxi	2021-06-09 13:53:28.377359	Travel	485.297743
Zomato	2021-08-24 17:53:28.377359	Food	489.143483
spotify	2021-03-16 09:53:28.377359	Music	415.372894

	A	N1	N2
0	X	200	0
1	Y	201	1
2	Z	202	2
3	X	203	3
4	Y	204	4
5	Z	205	5
6	X	206	6
7	Y	207	7
8	Z	208	8
9	X	209	9
10	Y	210	10
11	Z	211	11
12	X	212	12
13	Y	213	13
14	Z	214	14
15	X	215	15
16	Y	216	16
17	Z	217	17
18	X	218	18
19	Y	219	19
20	Z	220	20
21	X	221	21
22	Y	222	22
23	Z	223	23
24	X	224	24
25	Y	225	25
26	Z	226	26
27	X	227	27
28	Y	228	28
29	Z	229	29

		debit
category	custom_category
Books	Not_a_book	1389.052376
Books	book	3540.698017
Food	Not_a_book	8281.189173
Music	Not_a_book	2766.204103
Music	book	1467.247766
Travel	Not_a_book	6052.931876
Utility	Not_a_book	7562.267233

	value	high	low	volume
ticker
APPLE	234.5	240.0	230.0	100
AT&T	221.3	225.0	220.0	200
IBM	125.3	130.0	120.0	300
NIKE	100.5	110.0	100.0	400

	value	high	low	volume
APPLE	234.5	240.32	233.0	100
AT&T	221.6	222.50	220.0	200
IBM	125.7	127.30	123.0	50
NIKE	100.5	105.00	104.0	1000

	value	low	high	volume
BELL	125.0	125.0	130.0	123
XEROX	500.0	490.0	500.0	50
FORD	300.4	299.5	305.0	100
TESLA	423.9	421.1	425.5	80

	value	high	low	volume
count	8.000000	8.000000	8.000000	4.000000
mean	253.950000	256.952500	251.950000	337.500000
std	146.203918	145.046197	143.205168	446.047456
min	100.500000	105.000000	104.000000	50.000000
25%	125.525000	129.325000	124.500000	87.500000
50%	228.050000	231.410000	226.500000	150.000000
75%	331.275000	335.125000	329.900000	400.000000
max	500.000000	500.000000	490.000000	1000.000000

	A	N1	N2
0	X	200	0
1	Y	201	1
2	Z	202	2
3	X	203	3
4	Y	204	4
5	Z	205	5
6	X	206	6
7	Y	207	7
8	Z	208	8
9	X	209	9
10	Y	210	10
11	Z	211	11
12	X	212	12
13	Y	213	13
14	Z	214	14
15	X	215	15
16	Y	216	16
17	Z	217	17
18	X	218	18
19	Y	219	19
20	Z	220	20
21	X	221	21
22	Y	222	22
23	Z	223	23
24	X	224	24
25	Y	225	25
26	Z	226	26
27	X	227	27
28	Y	228	28
29	Z	229	29