Python Virtual Training For Arcesium - Module III - Day 1

Dec 17-23, 2020 Vikrant Patil

These notes are available online at http://notes.pipal.in/2020/arcesium_finop_batch3/module3-day1.html

© Pipal Academy LLP

Day 1 | Day 2 | Day 3 | Day 4 | Day 5

We will be using jupyter hub from http://lab.pipal.in for this training. Create a notebook with name module3-day1.ipynb for today's session. Before you start shutdown all kernels except today's notebook.

In [8]:
items = ["Here","are", "some","words","with","different","length"]
In [4]:
numbers = [120, 234, 345, 346, 11]
In [33]:
max(numbers)
Out[33]:
346
In [9]:
max(items)
Out[9]:
'words'
In [10]:
max(items, key=len)
Out[10]:
'different'
In [11]:
[len(w) for w in items]
Out[11]:
[4, 3, 4, 5, 4, 9, 6]
In [12]:
max(items, key=len)
Out[12]:
'different'
In [21]:
volume = ["1M","2M","0.5B","4M","100K"]
In [23]:
def numeric_part(strv):
    return float(strv[:-1]) # everything except last charecter

def suffix(strv):
    return strv[-1]

def convert(strv):
    n  = numeric_part(strv)
    if suffix(strv)=="M":
        return n*1000000 # take till second last!
    elif suffix(strv)=="B":
        return n*1000000000
    elif suffix(strv)=="K":
        return n*1000
    else:
        return int(strv)

def convert_numeric(strvolume):
    numeric_v = []
    for v in strvolume:
        numeric_v.append(convert(v))
    
    return numeric_v
        
max(convert_numeric(volume))
Out[23]:
500000000.0
In [19]:
"34B"[:-1]
Out[19]:
'34'
In [26]:
max(convert_numeric(volume))
Out[26]:
500000000.0
In [27]:
max(volume, key=convert)
Out[27]:
'0.5B'
In [28]:
sorted(volume, key=convert)
Out[28]:
['100K', '1M', '2M', '4M', '0.5B']
In [29]:
numeric_volume = convert_numeric(volume)
In [30]:
numeric_volume
Out[30]:
[1000000.0, 2000000.0, 500000000.0, 4000000.0, 100000.0]
In [31]:
sorted(numeric_volume)
Out[31]:
[100000.0, 1000000.0, 2000000.0, 4000000.0, 500000000.0]
In [32]:
sorted(volume, key=convert)
Out[32]:
['100K', '1M', '2M', '4M', '0.5B']

List Comprehensions

In [34]:
def convert_numeric(strvolume):
    numeric_v = []
    for v in strvolume:
        numeric_v.append(convert(v))
    
    return numeric_v
        
In [36]:
def convert_numeric(strvolume):
    return [convert(v) for v in strvolume]
In [37]:
convert_numeric(volume)
Out[37]:
[1000000.0, 2000000.0, 500000000.0, 4000000.0, 100000.0]
In [38]:
[x*x for x in range(5)]
Out[38]:
[0, 1, 4, 9, 16]
In [39]:
items
Out[39]:
['Here', 'are', 'some', 'words', 'with', 'different', 'length']
In [40]:
{word:len(word) for word in items}
Out[40]:
{'Here': 4,
 'are': 3,
 'some': 4,
 'words': 5,
 'with': 4,
 'different': 9,
 'length': 6}
In [41]:
lengths = {}
for w in items:
    lengths[w] = len(w)
In [42]:
lengths
Out[42]:
{'Here': 4,
 'are': 3,
 'some': 4,
 'words': 5,
 'with': 4,
 'different': 9,
 'length': 6}

Spreadsheet of python, pandas

In [44]:
import pandas
In [45]:
!python3 -m pip install --user pandas
Requirement already satisfied: pandas in /home/vikrant/anaconda3/lib/python3.8/site-packages (1.0.5)
Requirement already satisfied: numpy>=1.13.3 in /home/vikrant/anaconda3/lib/python3.8/site-packages (from pandas) (1.18.5)
Requirement already satisfied: pytz>=2017.2 in /home/vikrant/anaconda3/lib/python3.8/site-packages (from pandas) (2020.1)
Requirement already satisfied: python-dateutil>=2.6.1 in /home/vikrant/anaconda3/lib/python3.8/site-packages (from pandas) (2.8.1)
Requirement already satisfied: six>=1.5 in /home/vikrant/anaconda3/lib/python3.8/site-packages (from python-dateutil>=2.6.1->pandas) (1.15.0)
WARNING: You are using pip version 20.2.3; however, version 20.3.3 is available.
You should consider upgrading via the '/home/vikrant/anaconda3/bin/python3 -m pip install --upgrade pip' command.
In [46]:
import pandas
In [47]:
import pandas as pd
In [52]:
%%file download.py
import sys
import requests

def download(url, filename):
    resp = requests.get(url)
    with open(filename, "wb") as f:
        f.write(resp.content)
        
if __name__ == "__main__":
    url = sys.argv[1]
    filename = sys.argv[2]
    download(url, filename)
    
Overwriting download.py
In [55]:
!python3 download.py "https://raw.githubusercontent.com/vikipedia/python-trainings/master/online_course/source/module2/wallet.csv" wallet.csv
In [56]:
!head wallet.csv
,date,category,description,debit
0,2021-03-07 14:53:28.377359,Music,Amazon,421.2073272347991
1,2020-10-08 09:53:28.377359,Food,Swiggy,328.4400802428426
2,2021-02-23 09:53:28.377359,Books,Amazon,244.67943701511354
3,2020-11-01 14:53:28.377359,Utility,Phone,222.7563175805277
4,2021-06-05 13:53:28.377359,Books,Flipcart,494.1284923793595
5,2021-07-28 19:53:28.377359,Utility,Electricity,219.94171130968408
6,2021-04-16 11:53:28.377359,Books,Amazon Kindle,270.32259514795845
7,2021-02-15 10:53:28.377359,Food,Zomato,457.1831036346536
8,2021-08-10 19:53:28.377359,Utility,Phone,151.49637259947792
In [57]:
pd.read_csv("wallet.csv")
Out[57]:
Unnamed: 0 date category description debit
0 0 2021-03-07 14:53:28.377359 Music Amazon 421.207327
1 1 2020-10-08 09:53:28.377359 Food Swiggy 328.440080
2 2 2021-02-23 09:53:28.377359 Books Amazon 244.679437
3 3 2020-11-01 14:53:28.377359 Utility Phone 222.756318
4 4 2021-06-05 13:53:28.377359 Books Flipcart 494.128492
... ... ... ... ... ...
95 95 2021-07-19 13:53:28.377359 Utility Phone 388.671213
96 96 2021-01-12 19:53:28.377359 Books Flipcart 467.554562
97 97 2021-03-25 11:53:28.377359 Utility Phone 320.789434
98 98 2021-05-13 15:53:28.377359 Travel Taxi 442.096469
99 99 2020-10-11 16:53:28.377359 Food Hotel 100.455501

100 rows × 5 columns

In [58]:
wallet = pd.read_csv("wallet.csv")
In [59]:
type(wallet)
Out[59]:
pandas.core.frame.DataFrame
In [60]:
wallet
Out[60]:
Unnamed: 0 date category description debit
0 0 2021-03-07 14:53:28.377359 Music Amazon 421.207327
1 1 2020-10-08 09:53:28.377359 Food Swiggy 328.440080
2 2 2021-02-23 09:53:28.377359 Books Amazon 244.679437
3 3 2020-11-01 14:53:28.377359 Utility Phone 222.756318
4 4 2021-06-05 13:53:28.377359 Books Flipcart 494.128492
... ... ... ... ... ...
95 95 2021-07-19 13:53:28.377359 Utility Phone 388.671213
96 96 2021-01-12 19:53:28.377359 Books Flipcart 467.554562
97 97 2021-03-25 11:53:28.377359 Utility Phone 320.789434
98 98 2021-05-13 15:53:28.377359 Travel Taxi 442.096469
99 99 2020-10-11 16:53:28.377359 Food Hotel 100.455501

100 rows × 5 columns

In [61]:
wallet.head()
Out[61]:
Unnamed: 0 date category description debit
0 0 2021-03-07 14:53:28.377359 Music Amazon 421.207327
1 1 2020-10-08 09:53:28.377359 Food Swiggy 328.440080
2 2 2021-02-23 09:53:28.377359 Books Amazon 244.679437
3 3 2020-11-01 14:53:28.377359 Utility Phone 222.756318
4 4 2021-06-05 13:53:28.377359 Books Flipcart 494.128492
In [62]:
wallet.tail()
Out[62]:
Unnamed: 0 date category description debit
95 95 2021-07-19 13:53:28.377359 Utility Phone 388.671213
96 96 2021-01-12 19:53:28.377359 Books Flipcart 467.554562
97 97 2021-03-25 11:53:28.377359 Utility Phone 320.789434
98 98 2021-05-13 15:53:28.377359 Travel Taxi 442.096469
99 99 2020-10-11 16:53:28.377359 Food Hotel 100.455501
In [63]:
wallet.describe()
Out[63]:
Unnamed: 0 debit
count 100.000000 100.000000
mean 49.500000 310.595905
std 29.011492 121.178218
min 0.000000 100.455501
25% 24.750000 216.429447
50% 49.500000 318.110776
75% 74.250000 424.109079
max 99.000000 499.858182
In [66]:
pd.read_csv("https://raw.githubusercontent.com/vikipedia/python-trainings/master/online_course/source/module2/wallet.csv")
Out[66]:
Unnamed: 0 date category description debit
0 0 2021-03-07 14:53:28.377359 Music Amazon 421.207327
1 1 2020-10-08 09:53:28.377359 Food Swiggy 328.440080
2 2 2021-02-23 09:53:28.377359 Books Amazon 244.679437
3 3 2020-11-01 14:53:28.377359 Utility Phone 222.756318
4 4 2021-06-05 13:53:28.377359 Books Flipcart 494.128492
... ... ... ... ... ...
95 95 2021-07-19 13:53:28.377359 Utility Phone 388.671213
96 96 2021-01-12 19:53:28.377359 Books Flipcart 467.554562
97 97 2021-03-25 11:53:28.377359 Utility Phone 320.789434
98 98 2021-05-13 15:53:28.377359 Travel Taxi 442.096469
99 99 2020-10-11 16:53:28.377359 Food Hotel 100.455501

100 rows × 5 columns

In [67]:
pd.read_excel("https://raw.githubusercontent.com/vikipedia/python-trainings/master/online_course/source/module2/wallet.xlsx")
Out[67]:
Unnamed: 0 date category description debit
0 0 2021-03-07 14:53:28.377359 Music Amazon 421.207327
1 1 2020-10-08 09:53:28.377359 Food Swiggy 328.440080
2 2 2021-02-23 09:53:28.377359 Books Amazon 244.679437
3 3 2020-11-01 14:53:28.377359 Utility Phone 222.756318
4 4 2021-06-05 13:53:28.377359 Books Flipcart 494.128492
... ... ... ... ... ...
95 95 2021-07-19 13:53:28.377359 Utility Phone 388.671213
96 96 2021-01-12 19:53:28.377359 Books Flipcart 467.554562
97 97 2021-03-25 11:53:28.377359 Utility Phone 320.789434
98 98 2021-05-13 15:53:28.377359 Travel Taxi 442.096469
99 99 2020-10-11 16:53:28.377359 Food Hotel 100.455501

100 rows × 5 columns

In [69]:
!python3 -m pip install --user Xlrd
Requirement already satisfied: Xlrd in /home/vikrant/anaconda3/lib/python3.8/site-packages (1.2.0)
WARNING: You are using pip version 20.2.3; however, version 20.3.3 is available.
You should consider upgrading via the '/home/vikrant/anaconda3/bin/python3 -m pip install --upgrade pip' command.

problems

  1. Load one excel file from your use case. using pandas, see help and load particular sheet from your excel file.
  2. Try to load data from url "https://www.moneycontrol.com/markets/indian-indices/" using pd.read_html
In [1]:
!python3 -m pip install --user lxml
Requirement already satisfied: lxml in /home/vikrant/anaconda3/lib/python3.8/site-packages (4.5.2)
WARNING: You are using pip version 20.2.3; however, version 20.3.3 is available.
You should consider upgrading via the '/home/vikrant/anaconda3/bin/python3 -m pip install --upgrade pip' command.
In [2]:
import pandas as pd
help(pd.read_excel)
Help on function read_excel in module pandas.io.excel._base:

read_excel(io, sheet_name=0, header=0, names=None, index_col=None, usecols=None, squeeze=False, dtype=None, engine=None, converters=None, true_values=None, false_values=None, skiprows=None, nrows=None, na_values=None, keep_default_na=True, verbose=False, parse_dates=False, date_parser=None, thousands=None, comment=None, skipfooter=0, convert_float=True, mangle_dupe_cols=True, **kwds)
    Read an Excel file into a pandas DataFrame.
    
    Supports `xls`, `xlsx`, `xlsm`, `xlsb`, and `odf` file extensions
    read from a local filesystem or URL. Supports an option to read
    a single sheet or a list of sheets.
    
    Parameters
    ----------
    io : str, bytes, ExcelFile, xlrd.Book, path object, or file-like object
        Any valid string path is acceptable. The string could be a URL. Valid
        URL schemes include http, ftp, s3, and file. For file URLs, a host is
        expected. A local file could be: ``file://localhost/path/to/table.xlsx``.
    
        If you want to pass in a path object, pandas accepts any ``os.PathLike``.
    
        By file-like object, we refer to objects with a ``read()`` method,
        such as a file handler (e.g. via builtin ``open`` function)
        or ``StringIO``.
    sheet_name : str, int, list, or None, default 0
        Strings are used for sheet names. Integers are used in zero-indexed
        sheet positions. Lists of strings/integers are used to request
        multiple sheets. Specify None to get all sheets.
    
        Available cases:
    
        * Defaults to ``0``: 1st sheet as a `DataFrame`
        * ``1``: 2nd sheet as a `DataFrame`
        * ``"Sheet1"``: Load sheet with name "Sheet1"
        * ``[0, 1, "Sheet5"]``: Load first, second and sheet named "Sheet5"
          as a dict of `DataFrame`
        * None: All sheets.
    
    header : int, list of int, default 0
        Row (0-indexed) to use for the column labels of the parsed
        DataFrame. If a list of integers is passed those row positions will
        be combined into a ``MultiIndex``. Use None if there is no header.
    names : array-like, default None
        List of column names to use. If file contains no header row,
        then you should explicitly pass header=None.
    index_col : int, list of int, default None
        Column (0-indexed) to use as the row labels of the DataFrame.
        Pass None if there is no such column.  If a list is passed,
        those columns will be combined into a ``MultiIndex``.  If a
        subset of data is selected with ``usecols``, index_col
        is based on the subset.
    usecols : int, str, list-like, or callable default None
        * If None, then parse all columns.
        * If str, then indicates comma separated list of Excel column letters
          and column ranges (e.g. "A:E" or "A,C,E:F"). Ranges are inclusive of
          both sides.
        * If list of int, then indicates list of column numbers to be parsed.
        * If list of string, then indicates list of column names to be parsed.
    
          .. versionadded:: 0.24.0
    
        * If callable, then evaluate each column name against it and parse the
          column if the callable returns ``True``.
    
        Returns a subset of the columns according to behavior above.
    
          .. versionadded:: 0.24.0
    
    squeeze : bool, default False
        If the parsed data only contains one column then return a Series.
    dtype : Type name or dict of column -> type, default None
        Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32}
        Use `object` to preserve data as stored in Excel and not interpret dtype.
        If converters are specified, they will be applied INSTEAD
        of dtype conversion.
    engine : str, default None
        If io is not a buffer or path, this must be set to identify io.
        Acceptable values are None, "xlrd", "openpyxl" or "odf".
    converters : dict, default None
        Dict of functions for converting values in certain columns. Keys can
        either be integers or column labels, values are functions that take one
        input argument, the Excel cell content, and return the transformed
        content.
    true_values : list, default None
        Values to consider as True.
    false_values : list, default None
        Values to consider as False.
    skiprows : list-like
        Rows to skip at the beginning (0-indexed).
    nrows : int, default None
        Number of rows to parse.
    
        .. versionadded:: 0.23.0
    
    na_values : scalar, str, list-like, or dict, default None
        Additional strings to recognize as NA/NaN. If dict passed, specific
        per-column NA values. By default the following values are interpreted
        as NaN: '', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan',
        '1.#IND', '1.#QNAN', '<NA>', 'N/A', 'NA', 'NULL', 'NaN', 'n/a',
        'nan', 'null'.
    keep_default_na : bool, default True
        Whether or not to include the default NaN values when parsing the data.
        Depending on whether `na_values` is passed in, the behavior is as follows:
    
        * If `keep_default_na` is True, and `na_values` are specified, `na_values`
          is appended to the default NaN values used for parsing.
        * If `keep_default_na` is True, and `na_values` are not specified, only
          the default NaN values are used for parsing.
        * If `keep_default_na` is False, and `na_values` are specified, only
          the NaN values specified `na_values` are used for parsing.
        * If `keep_default_na` is False, and `na_values` are not specified, no
          strings will be parsed as NaN.
    
        Note that if `na_filter` is passed in as False, the `keep_default_na` and
        `na_values` parameters will be ignored.
    na_filter : bool, default True
        Detect missing value markers (empty strings and the value of na_values). In
        data without any NAs, passing na_filter=False can improve the performance
        of reading a large file.
    verbose : bool, default False
        Indicate number of NA values placed in non-numeric columns.
    parse_dates : bool, list-like, or dict, default False
        The behavior is as follows:
    
        * bool. If True -> try parsing the index.
        * list of int or names. e.g. If [1, 2, 3] -> try parsing columns 1, 2, 3
          each as a separate date column.
        * list of lists. e.g.  If [[1, 3]] -> combine columns 1 and 3 and parse as
          a single date column.
        * dict, e.g. {'foo' : [1, 3]} -> parse columns 1, 3 as date and call
          result 'foo'
    
        If a column or index contains an unparseable date, the entire column or
        index will be returned unaltered as an object data type. If you don`t want to
        parse some cells as date just change their type in Excel to "Text".
        For non-standard datetime parsing, use ``pd.to_datetime`` after ``pd.read_excel``.
    
        Note: A fast-path exists for iso8601-formatted dates.
    date_parser : function, optional
        Function to use for converting a sequence of string columns to an array of
        datetime instances. The default uses ``dateutil.parser.parser`` to do the
        conversion. Pandas will try to call `date_parser` in three different ways,
        advancing to the next if an exception occurs: 1) Pass one or more arrays
        (as defined by `parse_dates`) as arguments; 2) concatenate (row-wise) the
        string values from the columns defined by `parse_dates` into a single array
        and pass that; and 3) call `date_parser` once for each row using one or
        more strings (corresponding to the columns defined by `parse_dates`) as
        arguments.
    thousands : str, default None
        Thousands separator for parsing string columns to numeric.  Note that
        this parameter is only necessary for columns stored as TEXT in Excel,
        any numeric columns will automatically be parsed, regardless of display
        format.
    comment : str, default None
        Comments out remainder of line. Pass a character or characters to this
        argument to indicate comments in the input file. Any data between the
        comment string and the end of the current line is ignored.
    skipfooter : int, default 0
        Rows at the end to skip (0-indexed).
    convert_float : bool, default True
        Convert integral floats to int (i.e., 1.0 --> 1). If False, all numeric
        data will be read in as floats: Excel stores all numbers as floats
        internally.
    mangle_dupe_cols : bool, default True
        Duplicate columns will be specified as 'X', 'X.1', ...'X.N', rather than
        'X'...'X'. Passing in False will cause data to be overwritten if there
        are duplicate names in the columns.
    **kwds : optional
            Optional keyword arguments can be passed to ``TextFileReader``.
    
    Returns
    -------
    DataFrame or dict of DataFrames
        DataFrame from the passed in Excel file. See notes in sheet_name
        argument for more information on when a dict of DataFrames is returned.
    
    See Also
    --------
    to_excel : Write DataFrame to an Excel file.
    to_csv : Write DataFrame to a comma-separated values (csv) file.
    read_csv : Read a comma-separated values (csv) file into DataFrame.
    read_fwf : Read a table of fixed-width formatted lines into DataFrame.
    
    Examples
    --------
    The file can be read using the file name as string or an open file object:
    
    >>> pd.read_excel('tmp.xlsx', index_col=0)  # doctest: +SKIP
           Name  Value
    0   string1      1
    1   string2      2
    2  #Comment      3
    
    >>> pd.read_excel(open('tmp.xlsx', 'rb'),
    ...               sheet_name='Sheet3')  # doctest: +SKIP
       Unnamed: 0      Name  Value
    0           0   string1      1
    1           1   string2      2
    2           2  #Comment      3
    
    Index and header can be specified via the `index_col` and `header` arguments
    
    >>> pd.read_excel('tmp.xlsx', index_col=None, header=None)  # doctest: +SKIP
         0         1      2
    0  NaN      Name  Value
    1  0.0   string1      1
    2  1.0   string2      2
    3  2.0  #Comment      3
    
    Column types are inferred but can be explicitly specified
    
    >>> pd.read_excel('tmp.xlsx', index_col=0,
    ...               dtype={'Name': str, 'Value': float})  # doctest: +SKIP
           Name  Value
    0   string1    1.0
    1   string2    2.0
    2  #Comment    3.0
    
    True, False, and NA values, and thousands separators have defaults,
    but can be explicitly specified, too. Supply the values you would like
    as strings or lists of strings!
    
    >>> pd.read_excel('tmp.xlsx', index_col=0,
    ...               na_values=['string1', 'string2'])  # doctest: +SKIP
           Name  Value
    0       NaN      1
    1       NaN      2
    2  #Comment      3
    
    Comment lines in the excel input file can be skipped using the `comment` kwarg
    
    >>> pd.read_excel('tmp.xlsx', index_col=0, comment='#')  # doctest: +SKIP
          Name  Value
    0  string1    1.0
    1  string2    2.0
    2     None    NaN

In [3]:
r = pd.read_html("https://www.moneycontrol.com/markets/indian-indices/")
In [5]:
type(r)
Out[5]:
list
In [6]:
len(r)
Out[6]:
4
In [7]:
r[0]
Out[7]:
0 1 2 3 4 5 6 7
0 Asian Paints 2831.30 -0.42 37346 2830.45 2831.85 24.00 22.00
1 Axis Bank 665.00 -1.12 466036 665.00 665.40 108.00 95.00
2 Bajaj Auto 3557.05 0.68 14690 3554.75 3557.80 2.00 27.00
3 Bajaj Finance 4993.00 -1.73 58005 4993.00 4994.95 3.00 30.00
4 Bajaj Finserv 9015.00 -1.70 462082 9018.85 9023.90 2.00 2.00
5 Bharti Airtel 548.00 1.53 915534 548.40 548.65 22.00 45.00
6 Dr Reddys Labs 5408.35 1.33 17159 5407.70 5410.00 2.00 7.00
7 HCL Tech 1031.70 3.76 244597 1031.05 1031.75 76.00 22.00
8 HDFC 2696.25 1.71 85102 2697.20 2698.50 5.00 29.00
9 HDFC Bank 1452.80 1.48 394429 1452.40 1453.10 125.00 46.00
10 HUL 2423.75 1.47 50068 2424.20 2425.40 30.00 4.00
11 ICICI Bank 540.75 -0.12 780610 541.20 541.50 182.00 85.00
12 IndusInd Bank 933.55 -0.59 157458 933.45 933.85 30.00 49.00
13 Infosys 1372.25 4.54 498159 1371.40 1371.85 116.00 11.00
14 ITC 202.00 0.20 2390218 201.85 201.95 774.00 870.00
15 Kotak Mahindra 1950.30 -1.03 40637 1950.50 1951.20 7.00 59.00
16 Larsen 1347.30 -1.87 87734 1347.15 1347.55 10.00 27.00
17 M&M 776.80 0.91 143567 777.60 778.00 107.00 375.00
18 Maruti Suzuki 8240.20 2.81 71990 8237.20 8242.50 15.00 5.00
19 Nestle 18243.30 -0.29 1599 18220.75 18241.35 2.00 5.00
20 NTPC 99.50 -0.60 2334652 99.45 99.55 7758.00 6048.00
21 ONGC 101.80 1.14 2080224 101.80 101.85 2181.00 8506.00
22 Power Grid Corp 202.75 -0.54 254197 202.85 203.00 882.00 166.00
23 Reliance 1910.65 -1.16 169682 1910.45 1910.90 2.00 20.00
24 SBI 281.40 -1.63 3755657 281.25 281.40 825.00 349.00
25 Sun Pharma 622.40 0.33 284708 622.65 622.90 72.00 43.00
26 TCS 3169.60 1.58 194706 3168.90 3169.70 12.00 41.00
27 Tech Mahindra 1049.50 -0.18 115810 1049.10 1049.65 20.00 69.00
28 Titan Company 1552.30 0.16 58136 1551.60 1552.15 15.00 10.00
29 UltraTechCement 5672.20 1.55 10747 5674.75 5678.10 3.00 36.00
30 Company LTP %Change Volume Buy Price Sell Price Buy Qty Sell Qty
In [8]:
r[1]
Out[8]:
0 1 2 3 4 5 6
0 SENSEX 49128.12 345.61 0.71 49252.31 49269.02 48956.38
1 S&P BSE Smallcap 18845.03 -63.56 -0.34 19076.89 19085.82 18779.90
2 S&P BSE Midcap 19044.17 -94.55 -0.49 19238.20 19249.01 18954.55
3 S&P BSE SmallCap Select Index 3347.51 -4.42 -0.13 3384.80 3387.03 3330.85
4 S&P BSE MidCap Select Index 8217.59 -28.78 -0.35 8282.73 8285.29 8171.59
5 S&P BSE LargeCap 5547.60 31.70 0.57 5566.59 5567.17 5526.58
6 S&P BSE AllCap 5416.89 17.85 0.33 5445.84 5446.28 5396.34
7 S&P BSE 100 14626.74 73.09 0.50 14681.87 14683.45 14571.05
8 S&P BSE 200 6135.07 25.00 0.41 6162.79 6163.37 6111.46
9 S&P BSE 500 19019.49 65.63 0.35 19118.48 19119.96 18946.88
10 S&P BSE BANKEX 36515.44 -143.33 -0.39 36892.58 36893.81 36352.54
11 S&P BSE Auto 22294.38 296.49 1.35 22120.66 22294.95 21884.64
12 S&P BSE Basic Materials 3683.99 -28.91 -0.78 3722.72 3722.72 3654.72
13 S&P BSE Capital Goods 19631.54 -287.35 -1.44 20043.42 20092.83 19604.07
14 S&P BSE Consumer Discretionary Goods & Services 4682.64 7.55 0.16 4711.13 4720.73 4652.46
15 S&P BSE Consumer Durables 30989.54 -201.20 -0.65 31330.56 31581.91 30985.88
16 S&P BSE Energy 5925.01 -49.00 -0.82 5990.90 5999.47 5915.45
17 S&P BSE Finance 7245.62 3.77 0.05 7302.18 7302.18 7220.82
18 S&P BSE FMCG 12695.68 100.24 0.80 12693.56 12817.12 12657.38
19 S&P BSE Healthcare 22344.27 58.26 0.26 22395.21 22430.73 22203.08
20 S&P BSE India Mfg 529.55 0.75 0.14 531.04 531.56 527.72
21 S&P BSE Industrials 3626.42 -26.20 -0.72 3678.73 3683.33 3617.02
22 S&P BSE IPO 8005.75 -74.51 -0.92 8139.96 8162.48 7998.88
23 S&P BSE IT 26621.47 785.49 3.04 26419.60 26745.41 26302.94
24 S&P BSE Metals 12395.24 -262.69 -2.08 12675.11 12676.88 12354.14
25 S&P BSE Oil and Gas 14714.79 -17.27 -0.12 14775.91 14815.95 14584.81
26 S&P BSE Power 2116.87 -21.88 -1.02 2150.61 2156.78 2115.14
27 S&P BSE PSU 6051.85 -40.68 -0.67 6137.18 6137.61 6015.07
28 S&P BSE Realty 2611.63 27.42 1.06 2609.84 2618.10 2571.25
29 S&P BSE TECk 12146.82 309.68 2.62 12077.45 12199.48 12030.37
30 S&P BSE Telecom 1369.30 7.36 0.54 1368.40 1386.33 1361.98
31 S&P BSE Utilities 1882.50 -5.98 -0.32 1896.16 1904.97 1880.96
32 S&P BSE Dividend Stability Index 492.28 0.99 0.20 494.95 495.02 489.57
33 S&P BSE Enhanced Value Index 294.65 -3.28 -1.10 298.93 298.97 292.67
34 S&P BSE Low Volatility Index 1070.53 4.03 0.38 1072.38 1077.76 1068.38
35 S&P BSE Momentum Index 903.83 5.82 0.65 905.75 908.12 900.30
36 S&P BSE Quality Index 981.42 5.32 0.55 983.05 986.48 979.24
37 S&P BSE SENSEX 50 Index 15117.12 100.06 0.67 15157.41 15161.46 15061.38
38 S&P BSE 150 MidCap 6308.47 -34.44 -0.54 6387.16 6389.84 6282.69
39 S&P BSE 250 SmallCap 2638.39 -5.81 -0.22 2668.07 2670.58 2628.68
40 S&P BSE 250 LargeMid 5786.23 21.96 0.38 5814.22 5814.72 5763.92
41 S&P BSE 400 MidSmall 4721.23 -21.19 -0.45 4778.40 4780.72 4702.93
42 Name Current Value Change % Chg Open High Low
In [9]:
r[2]
Out[9]:
0 1 2 3 4 5 6
0 NIFTY 50 14436.95 89.70 0.63 14474.05 14479.70 14383.10
1 NIFTY NEXT 50 34315.25 -15.80 -0.05 34566.95 34570.65 34143.60
2 NIFTY Midcap 100 22073.45 -120.65 -0.54 22334.45 22359.00 21965.75
3 NIFTY MIDCAP 50 6339.85 -49.85 -0.78 6420.00 6428.35 6302.30
4 NIFTY 100 14592.05 77.70 0.54 14638.80 14638.95 14535.40
5 NIFTY 500 11967.90 42.30 0.35 12024.65 12024.65 11921.75
6 NIFTY AUTO 9878.30 137.05 1.41 9789.80 9879.05 9693.70
7 NIFTY BANK 31984.45 -99.75 -0.31 32280.30 32288.45 31836.95
8 NIFTY ENERGY 17109.00 -146.15 -0.85 17336.50 17345.30 17052.15
9 NIFTY FMCG 34575.70 298.95 0.87 34597.35 34919.60 34446.35
10 NIFTY INFRA 3821.30 -12.15 -0.32 3844.50 3848.70 3805.50
11 NIFTY IT 26795.30 632.90 2.42 26703.20 26905.65 26528.00
12 NIFTY MEDIA 1732.70 -23.90 -1.36 1773.35 1780.90 1724.65
13 NIFTY METAL 3461.75 -67.45 -1.91 3532.70 3537.40 3451.85
14 NIFTY MNC 16333.70 79.50 0.49 16357.10 16380.75 16253.15
15 NIFTY PHARMA 13383.55 112.00 0.84 13315.45 13400.40 13222.80
16 NIFTY PSE 2955.90 -5.75 -0.19 2978.05 2984.85 2932.60
17 NIFTY PSU BANK 1802.90 -27.05 -1.48 1842.30 1843.75 1791.80
18 NIFTY REALTY 330.85 3.35 1.02 329.85 331.65 325.75
19 NIFTY SERV SECTOR 20526.95 199.05 0.98 20559.50 20585.40 20430.30
20 INDIA VIX 22.31 1.67 8.09 20.64 22.74 20.64
21 SX40 17283.18 -159.32 -0.91 17442.50 17442.50 17283.18
22 NIFTY GROWSECT 15 7137.50 40.05 0.56 7146.05 7163.65 7104.05
23 NIFTY50 VALUE 20 4351.35 48.50 1.13 4310.95 4355.65 4304.25
24 NIFTY DIV OPPS 50 3006.15 18.15 0.61 3021.35 3021.35 2994.85
25 NIFTY MID LIQ 15 5590.90 -55.00 -0.97 5669.35 5679.00 5568.50
26 NIFTY100 QUALITY 30 2705.75 0.20 0.01 2715.95 2719.35 2697.25
27 NIFTY Smallcap 100 7433.75 -9.25 -0.12 7509.10 7525.95 7414.75
28 NIFTY COMMODITIES 3974.60 -24.15 -0.60 4005.55 4006.85 3943.10
29 NIFTY CONSUMPTION 6112.05 39.95 0.66 6119.60 6144.15 6090.30
30 NIFTY FIN SERVICE 15543.75 32.60 0.21 15613.50 15626.85 15493.25
31 NIFTY PVT BANK 17705.40 -50.65 -0.29 17859.10 17863.55 17615.80
32 NIFTY MIDCAP 150 8309.90 -39.20 -0.47 8404.65 8411.60 8273.50
33 NIFTY MIDSML 400 7609.20 -29.60 -0.39 7693.95 7700.00 7579.85
34 NIFTY Smallcap 250 6351.25 -12.80 -0.20 6418.20 6426.20 6332.85
35 NIFTY Smallcap 50 3740.60 8.55 0.23 3768.50 3786.55 3729.05
36 Name Current Value Change % Chg Open High Low
In [10]:
r[3]
Out[10]:
0 1 2 3 4 5 6
0 SENSEX 49128.12 345.61 0.71 49252.31 49269.02 48956.38
1 S&P BSE Auto 22294.38 296.49 1.35 22120.66 22294.95 21884.64
2 S&P BSE Metals 12395.24 -262.69 -2.08 12675.11 12676.88 12354.14
3 S&P BSE Oil and Gas 14714.79 -17.27 -0.12 14775.91 14815.95 14584.81
4 NIFTY 50 14436.95 89.70 0.63 14474.05 14479.70 14383.10
5 NIFTY NEXT 50 34315.25 -15.80 -0.05 34566.95 34570.65 34143.60
6 NIFTY Midcap 100 22073.45 -120.65 -0.54 22334.45 22359.00 21965.75
7 NIFTY 100 14592.05 77.70 0.54 14638.80 14638.95 14535.40
8 NIFTY BANK 31984.45 -99.75 -0.31 32280.30 32288.45 31836.95
9 NIFTY FMCG 34575.70 298.95 0.87 34597.35 34919.60 34446.35
10 NIFTY INFRA 3821.30 -12.15 -0.32 3844.50 3848.70 3805.50
11 NIFTY IT 26795.30 632.90 2.42 26703.20 26905.65 26528.00
12 NIFTY PHARMA 13383.55 112.00 0.84 13315.45 13400.40 13222.80
13 Name Current Value Change % Chg Open High Low
In [11]:
wallet = pd.read_excel("https://raw.githubusercontent.com/vikipedia/python-trainings/master/online_course/source/module2/wallet.xlsx")
In [12]:
wallet
Out[12]:
Unnamed: 0 date category description debit
0 0 2021-03-07 14:53:28.377359 Music Amazon 421.207327
1 1 2020-10-08 09:53:28.377359 Food Swiggy 328.440080
2 2 2021-02-23 09:53:28.377359 Books Amazon 244.679437
3 3 2020-11-01 14:53:28.377359 Utility Phone 222.756318
4 4 2021-06-05 13:53:28.377359 Books Flipcart 494.128492
... ... ... ... ... ...
95 95 2021-07-19 13:53:28.377359 Utility Phone 388.671213
96 96 2021-01-12 19:53:28.377359 Books Flipcart 467.554562
97 97 2021-03-25 11:53:28.377359 Utility Phone 320.789434
98 98 2021-05-13 15:53:28.377359 Travel Taxi 442.096469
99 99 2020-10-11 16:53:28.377359 Food Hotel 100.455501

100 rows × 5 columns

DataFrame and Series

In [14]:
type(wallet)
Out[14]:
pandas.core.frame.DataFrame
In [15]:
wallet.columns
Out[15]:
Index(['Unnamed: 0', 'date', 'category', 'description', 'debit'], dtype='object')
In [16]:
wallet['category']
Out[16]:
0       Music
1        Food
2       Books
3     Utility
4       Books
       ...   
95    Utility
96      Books
97    Utility
98     Travel
99       Food
Name: category, Length: 100, dtype: object
In [17]:
wallet.category
Out[17]:
0       Music
1        Food
2       Books
3     Utility
4       Books
       ...   
95    Utility
96      Books
97    Utility
98     Travel
99       Food
Name: category, Length: 100, dtype: object
In [18]:
wallet.date
Out[18]:
0     2021-03-07 14:53:28.377359
1     2020-10-08 09:53:28.377359
2     2021-02-23 09:53:28.377359
3     2020-11-01 14:53:28.377359
4     2021-06-05 13:53:28.377359
                 ...            
95    2021-07-19 13:53:28.377359
96    2021-01-12 19:53:28.377359
97    2021-03-25 11:53:28.377359
98    2021-05-13 15:53:28.377359
99    2020-10-11 16:53:28.377359
Name: date, Length: 100, dtype: object
In [19]:
wallet.debit
Out[19]:
0     421.207327
1     328.440080
2     244.679437
3     222.756318
4     494.128492
         ...    
95    388.671213
96    467.554562
97    320.789434
98    442.096469
99    100.455501
Name: debit, Length: 100, dtype: float64
In [20]:
wallet['Unnamed: 0']
Out[20]:
0      0
1      1
2      2
3      3
4      4
      ..
95    95
96    96
97    97
98    98
99    99
Name: Unnamed: 0, Length: 100, dtype: int64
In [21]:
type(wallet.debit)
Out[21]:
pandas.core.series.Series
In [22]:
wallet
Out[22]:
Unnamed: 0 date category description debit
0 0 2021-03-07 14:53:28.377359 Music Amazon 421.207327
1 1 2020-10-08 09:53:28.377359 Food Swiggy 328.440080
2 2 2021-02-23 09:53:28.377359 Books Amazon 244.679437
3 3 2020-11-01 14:53:28.377359 Utility Phone 222.756318
4 4 2021-06-05 13:53:28.377359 Books Flipcart 494.128492
... ... ... ... ... ...
95 95 2021-07-19 13:53:28.377359 Utility Phone 388.671213
96 96 2021-01-12 19:53:28.377359 Books Flipcart 467.554562
97 97 2021-03-25 11:53:28.377359 Utility Phone 320.789434
98 98 2021-05-13 15:53:28.377359 Travel Taxi 442.096469
99 99 2020-10-11 16:53:28.377359 Food Hotel 100.455501

100 rows × 5 columns

In [23]:
s1 = pd.Series([400, 300, 3244, 500])
In [24]:
s1
Out[24]:
0     400
1     300
2    3244
3     500
dtype: int64
In [25]:
[i for i in s1]
Out[25]:
[400, 300, 3244, 500]
In [26]:
stocks = pd.Series([400, 300, 3244, 500], index=["APPLE","AT&T","IBM","NIKE"])
In [28]:
stocks # 1D data..equilvalent to column
Out[28]:
APPLE     400
AT&T      300
IBM      3244
NIKE      500
dtype: int64
In [29]:
stocks['APPLE']
Out[29]:
400
In [30]:
[s for s in stocks]
Out[30]:
[400, 300, 3244, 500]
In [31]:
stocks['NIKE']
Out[31]:
500
In [32]:
stocks[0]
Out[32]:
400
In [33]:
labels = ['APPLE','AT&T','IBM','NIKE']
value = pd.Series([234.0, 221.0, 124.5, 100.4], index=labels)
high = pd.Series([235.0, 225.0 , 125.5, 101.5], index=labels)
low = pd.Series([230.0, 220.0, 120.0, 100.0], index=labels)
volume = pd.Series([100, 235, 125, 300], index=labels)
stocks = pd.DataFrame({
    "value":value,
    "high": high,
    "low": low,
    "volume": volume
    })
    
In [34]:
stocks
Out[34]:
value high low volume
APPLE 234.0 235.0 230.0 100
AT&T 221.0 225.0 220.0 235
IBM 124.5 125.5 120.0 125
NIKE 100.4 101.5 100.0 300
In [35]:
stocks = pd.DataFrame({
    "value":[234.0, 221.0, 124.5, 100.4],
    "high": [235.0, 225.0 , 125.5, 101.5],
    "low": [230.0, 220.0, 120.0, 100.0],
    "volume" : [100, 235, 125, 300]},
    index= labels
)
In [36]:
stocks
Out[36]:
value high low volume
APPLE 234.0 235.0 230.0 100
AT&T 221.0 225.0 220.0 235
IBM 124.5 125.5 120.0 125
NIKE 100.4 101.5 100.0 300
In [37]:
stocks1 = pd.DataFrame({
    "value":[234.0, 221.0, 124.5, 100.4],
    "high": [235.0, 225.0 , 125.5, 101.5],
    "low": [230.0, 220.0, 120.0, 100.0],
    "volume" : [100, 235, 125, 300]}
)
In [38]:
stocks1
Out[38]:
value high low volume
0 234.0 235.0 230.0 100
1 221.0 225.0 220.0 235
2 124.5 125.5 120.0 125
3 100.4 101.5 100.0 300
In [39]:
stocks
Out[39]:
value high low volume
APPLE 234.0 235.0 230.0 100
AT&T 221.0 225.0 220.0 235
IBM 124.5 125.5 120.0 125
NIKE 100.4 101.5 100.0 300
In [44]:
stocks['value'] # on a dataframe columns accessible just like dictionary items
Out[44]:
APPLE    234.0
AT&T     221.0
IBM      124.5
NIKE     100.4
Name: value, dtype: float64
In [43]:
stocks['value']['APPLE']
Out[43]:
234.0
In [45]:
stocks.value
Out[45]:
APPLE    234.0
AT&T     221.0
IBM      124.5
NIKE     100.4
Name: value, dtype: float64
In [46]:
stocks.volume
Out[46]:
APPLE    100
AT&T     235
IBM      125
NIKE     300
Name: volume, dtype: int64
In [47]:
stocks2 = pd.DataFrame({
    "value":[234.0, 221.0, 124.5, 100.4],
    "high value": [235.0, 225.0 , 125.5, 101.5],
    "low value": [230.0, 220.0, 120.0, 100.0],
    "volume" : [100, 235, 125, 300]},
    index= labels
)
In [48]:
stocks2
Out[48]:
value high value low value volume
APPLE 234.0 235.0 230.0 100
AT&T 221.0 225.0 220.0 235
IBM 124.5 125.5 120.0 125
NIKE 100.4 101.5 100.0 300
In [49]:
stocks2.high value
  File "<ipython-input-49-8e814d9be44a>", line 1
    stocks2.high value
                 ^
SyntaxError: invalid syntax
In [50]:
stocks2['high value']
Out[50]:
APPLE    235.0
AT&T     225.0
IBM      125.5
NIKE     101.5
Name: high value, dtype: float64

ways to access data from dataframe

In [51]:
stocks
Out[51]:
value high low volume
APPLE 234.0 235.0 230.0 100
AT&T 221.0 225.0 220.0 235
IBM 124.5 125.5 120.0 125
NIKE 100.4 101.5 100.0 300
In [53]:
stocks.loc["APPLE"] # complete row of label APPLE
Out[53]:
value     234.0
high      235.0
low       230.0
volume    100.0
Name: APPLE, dtype: float64
In [54]:
stocks.loc[['APPLE',"IBM"]]
Out[54]:
value high low volume
APPLE 234.0 235.0 230.0 100
IBM 124.5 125.5 120.0 125
In [55]:
stocks.loc['APPLE', 'value'] # first item is for row and second item is from column
Out[55]:
234.0
In [56]:
stocks.loc[['APPLE','IBM'], 'value'] 
Out[56]:
APPLE    234.0
IBM      124.5
Name: value, dtype: float64
In [57]:
stocks.loc[['APPLE','IBM'], ['value','volume']] 
Out[57]:
value volume
APPLE 234.0 100
IBM 124.5 125
In [58]:
stocks.iloc[0] # gives zeroth row
Out[58]:
value     234.0
high      235.0
low       230.0
volume    100.0
Name: APPLE, dtype: float64
In [59]:
stocks.iloc[[0,3]]
Out[59]:
value high low volume
APPLE 234.0 235.0 230.0 100
NIKE 100.4 101.5 100.0 300
In [60]:
stocks.iloc[[0,3],0]
Out[60]:
APPLE    234.0
NIKE     100.4
Name: value, dtype: float64
In [61]:
stocks.iloc[[0,3],[0, 3]]
Out[61]:
value volume
APPLE 234.0 100
NIKE 100.4 300
In [63]:
stocks.iloc[:, :] # all the rows and all the columns
Out[63]:
value high low volume
APPLE 234.0 235.0 230.0 100
AT&T 221.0 225.0 220.0 235
IBM 124.5 125.5 120.0 125
NIKE 100.4 101.5 100.0 300
In [64]:
stocks.iloc[:2, :] # take first two rows and all the columns
Out[64]:
value high low volume
APPLE 234.0 235.0 230.0 100
AT&T 221.0 225.0 220.0 235
In [65]:
stocks.iloc[:, [0, 3]]
Out[65]:
value volume
APPLE 234.0 100
AT&T 221.0 235
IBM 124.5 125
NIKE 100.4 300
In [66]:
value  = stocks.value
In [67]:
value
Out[67]:
APPLE    234.0
AT&T     221.0
IBM      124.5
NIKE     100.4
Name: value, dtype: float64
In [68]:
type(value)
Out[68]:
pandas.core.series.Series
In [69]:
value.sum()
Out[69]:
679.9
In [70]:
value.cumsum()
Out[70]:
APPLE    234.0
AT&T     455.0
IBM      579.5
NIKE     679.9
Name: value, dtype: float64
In [71]:
value.min()
Out[71]:
100.4
In [72]:
value.std()
Out[72]:
67.35826477377022
In [73]:
value.mean()
Out[73]:
169.975
In [74]:
s1
Out[74]:
0     400
1     300
2    3244
3     500
dtype: int64
In [75]:
s1.std()
Out[75]:
1424.3421873505913
In [76]:
s1.abs()
Out[76]:
0     400
1     300
2    3244
3     500
dtype: int64
In [77]:
s1 + 100
Out[77]:
0     500
1     400
2    3344
3     600
dtype: int64
In [78]:
s2 = pd.Series([1, 2, 3, 4])
In [79]:
s2
Out[79]:
0    1
1    2
2    3
3    4
dtype: int64
In [80]:
s1
Out[80]:
0     400
1     300
2    3244
3     500
dtype: int64
In [81]:
s1 + s2
Out[81]:
0     401
1     302
2    3247
3     504
dtype: int64
In [82]:
s1 = pd.Series([200, 300, 100], index=['a','b','c'])
In [83]:
s1
Out[83]:
a    200
b    300
c    100
dtype: int64
In [84]:
s2 = pd.Series([1, 2, 3], index=['b','a','c'])
In [85]:
s2
Out[85]:
b    1
a    2
c    3
dtype: int64
In [86]:
s1 + s2
Out[86]:
a    202
b    301
c    103
dtype: int64
In [87]:
stocks
Out[87]:
value high low volume
APPLE 234.0 235.0 230.0 100
AT&T 221.0 225.0 220.0 235
IBM 124.5 125.5 120.0 125
NIKE 100.4 101.5 100.0 300
In [88]:
stocks.head()
Out[88]:
value high low volume
APPLE 234.0 235.0 230.0 100
AT&T 221.0 225.0 220.0 235
IBM 124.5 125.5 120.0 125
NIKE 100.4 101.5 100.0 300

Selecting /Filtering on some condition

In [89]:
stocks
Out[89]:
value high low volume
APPLE 234.0 235.0 230.0 100
AT&T 221.0 225.0 220.0 235
IBM 124.5 125.5 120.0 125
NIKE 100.4 101.5 100.0 300
In [90]:
s1
Out[90]:
a    200
b    300
c    100
dtype: int64
In [91]:
s1 + 100
Out[91]:
a    300
b    400
c    200
dtype: int64
In [92]:
s1 > 100
Out[92]:
a     True
b     True
c    False
dtype: bool
In [93]:
s1[s1>100]
Out[93]:
a    200
b    300
dtype: int64
In [94]:
stocks
Out[94]:
value high low volume
APPLE 234.0 235.0 230.0 100
AT&T 221.0 225.0 220.0 235
IBM 124.5 125.5 120.0 125
NIKE 100.4 101.5 100.0 300
In [95]:
stocks.volume > 200
Out[95]:
APPLE    False
AT&T      True
IBM      False
NIKE      True
Name: volume, dtype: bool
In [98]:
stocks[stocks.volume > 200]
Out[98]:
value high low volume
AT&T 221.0 225.0 220.0 235
NIKE 100.4 101.5 100.0 300
In [99]:
wallet
Out[99]:
Unnamed: 0 date category description debit
0 0 2021-03-07 14:53:28.377359 Music Amazon 421.207327
1 1 2020-10-08 09:53:28.377359 Food Swiggy 328.440080
2 2 2021-02-23 09:53:28.377359 Books Amazon 244.679437
3 3 2020-11-01 14:53:28.377359 Utility Phone 222.756318
4 4 2021-06-05 13:53:28.377359 Books Flipcart 494.128492
... ... ... ... ... ...
95 95 2021-07-19 13:53:28.377359 Utility Phone 388.671213
96 96 2021-01-12 19:53:28.377359 Books Flipcart 467.554562
97 97 2021-03-25 11:53:28.377359 Utility Phone 320.789434
98 98 2021-05-13 15:53:28.377359 Travel Taxi 442.096469
99 99 2020-10-11 16:53:28.377359 Food Hotel 100.455501

100 rows × 5 columns

In [101]:
food = wallet[wallet.category=="Food"]
In [102]:
food
Out[102]:
Unnamed: 0 date category description debit
1 1 2020-10-08 09:53:28.377359 Food Swiggy 328.440080
7 7 2021-02-15 10:53:28.377359 Food Zomato 457.183104
11 11 2021-07-24 13:53:28.377359 Food Zomato 434.495468
15 15 2021-08-24 17:53:28.377359 Food Zomato 262.943993
22 22 2021-06-24 15:53:28.377359 Food Zomato 489.143483
25 25 2021-05-21 14:53:28.377359 Food Hotel 483.315864
30 30 2020-09-15 18:53:28.377359 Food Swiggy 203.529240
32 32 2021-06-23 11:53:28.377359 Food Zomato 345.030436
33 33 2021-05-14 18:53:28.377359 Food Hotel 449.248030
39 39 2021-05-17 17:53:28.377359 Food Swiggy 112.333160
40 40 2021-07-19 12:53:28.377359 Food Swiggy 291.545988
42 42 2021-08-22 17:53:28.377359 Food Hotel 210.256270
50 50 2020-12-24 11:53:28.377359 Food Zomato 463.001875
51 51 2020-12-22 17:53:28.377359 Food Zomato 331.227023
58 58 2021-05-20 10:53:28.377359 Food Hotel 255.871035
59 59 2020-08-28 11:53:28.377359 Food Swiggy 208.232912
61 61 2021-02-25 13:53:28.377359 Food Hotel 124.658278
64 64 2021-01-31 14:53:28.377359 Food Zomato 232.222380
66 66 2020-10-09 16:53:28.377359 Food Swiggy 263.957770
71 71 2021-03-30 12:53:28.377359 Food Swiggy 245.360508
75 75 2021-03-03 12:53:28.377359 Food Hotel 425.625291
80 80 2021-02-17 09:53:28.377359 Food Swiggy 283.095707
81 81 2020-10-29 16:53:28.377359 Food Hotel 470.080995
86 86 2021-05-07 16:53:28.377359 Food Zomato 198.450672
87 87 2021-05-19 15:53:28.377359 Food Zomato 378.820641
91 91 2021-01-22 17:53:28.377359 Food Hotel 232.663468
99 99 2020-10-11 16:53:28.377359 Food Hotel 100.455501
In [103]:
wallet[wallet.category=="Books"]
Out[103]:
Unnamed: 0 date category description debit
2 2 2021-02-23 09:53:28.377359 Books Amazon 244.679437
4 4 2021-06-05 13:53:28.377359 Books Flipcart 494.128492
6 6 2021-04-16 11:53:28.377359 Books Amazon Kindle 270.322595
19 19 2021-05-16 10:53:28.377359 Books Flipcart 109.325909
24 24 2021-05-31 11:53:28.377359 Books Amazon 498.100496
26 26 2020-08-26 15:53:28.377359 Books Amazon Kindle 138.806578
31 31 2020-09-25 11:53:28.377359 Books Flipcart 246.503527
47 47 2020-10-28 10:53:28.377359 Books Flipcart 310.408610
55 55 2021-01-21 19:53:28.377359 Books Flipcart 423.749708
62 62 2021-01-27 19:53:28.377359 Books Amazon Kindle 497.770860
72 72 2021-06-30 18:53:28.377359 Books Amazon 294.662869
77 77 2021-01-18 14:53:28.377359 Books Amazon Kindle 482.152343
83 83 2021-03-18 09:53:28.377359 Books Flipcart 451.584407
96 96 2021-01-12 19:53:28.377359 Books Flipcart 467.554562
In [106]:
food.describe()
Out[106]:
Unnamed: 0 debit
count 27.000000 27.000000
mean 50.962963 306.710710
std 27.784170 119.856277
min 1.000000 100.455501
25% 31.000000 221.239325
50% 51.000000 283.095707
75% 73.000000 430.060379
max 99.000000 489.143483
In [107]:
wallet[wallet.debit > 450]
Out[107]:
Unnamed: 0 date category description debit
4 4 2021-06-05 13:53:28.377359 Books Flipcart 494.128492
7 7 2021-02-15 10:53:28.377359 Food Zomato 457.183104
14 14 2021-06-09 13:53:28.377359 Travel Taxi 485.297743
22 22 2021-06-24 15:53:28.377359 Food Zomato 489.143483
24 24 2021-05-31 11:53:28.377359 Books Amazon 498.100496
25 25 2021-05-21 14:53:28.377359 Food Hotel 483.315864
34 34 2021-05-14 10:53:28.377359 Utility Phone 499.858182
36 36 2020-12-10 10:53:28.377359 Travel Auto 472.941439
38 38 2021-08-15 10:53:28.377359 Travel Auto 494.124399
43 43 2020-09-21 12:53:28.377359 Utility Phone 486.033933
50 50 2020-12-24 11:53:28.377359 Food Zomato 463.001875
62 62 2021-01-27 19:53:28.377359 Books Amazon Kindle 497.770860
77 77 2021-01-18 14:53:28.377359 Books Amazon Kindle 482.152343
81 81 2020-10-29 16:53:28.377359 Food Hotel 470.080995
83 83 2021-03-18 09:53:28.377359 Books Flipcart 451.584407
96 96 2021-01-12 19:53:28.377359 Books Flipcart 467.554562
In [108]:
wallet[wallet.debit < 450]
Out[108]:
Unnamed: 0 date category description debit
0 0 2021-03-07 14:53:28.377359 Music Amazon 421.207327
1 1 2020-10-08 09:53:28.377359 Food Swiggy 328.440080
2 2 2021-02-23 09:53:28.377359 Books Amazon 244.679437
3 3 2020-11-01 14:53:28.377359 Utility Phone 222.756318
5 5 2021-07-28 19:53:28.377359 Utility Electricity 219.941711
... ... ... ... ... ...
94 94 2021-01-04 13:53:28.377359 Utility Phone 431.185537
95 95 2021-07-19 13:53:28.377359 Utility Phone 388.671213
97 97 2021-03-25 11:53:28.377359 Utility Phone 320.789434
98 98 2021-05-13 15:53:28.377359 Travel Taxi 442.096469
99 99 2020-10-11 16:53:28.377359 Food Hotel 100.455501

84 rows × 5 columns

In [109]:
wallet[wallet.description=="Amazon"]
Out[109]:
Unnamed: 0 date category description debit
0 0 2021-03-07 14:53:28.377359 Music Amazon 421.207327
2 2 2021-02-23 09:53:28.377359 Books Amazon 244.679437
12 12 2021-07-24 14:53:28.377359 Music Amazon 329.536003
24 24 2021-05-31 11:53:28.377359 Books Amazon 498.100496
37 37 2021-04-18 16:53:28.377359 Music Amazon 266.069078
72 72 2021-06-30 18:53:28.377359 Books Amazon 294.662869
85 85 2021-01-12 09:53:28.377359 Music Amazon 130.374908
89 89 2021-04-12 14:53:28.377359 Music Amazon 218.487173
90 90 2020-12-01 14:53:28.377359 Music Amazon 101.573276
In [110]:
wallet[wallet.description=="Taxi"]
Out[110]:
Unnamed: 0 date category description debit
14 14 2021-06-09 13:53:28.377359 Travel Taxi 485.297743
52 52 2021-03-26 09:53:28.377359 Travel Taxi 403.610070
63 63 2021-05-10 11:53:28.377359 Travel Taxi 355.989050
68 68 2020-08-26 09:53:28.377359 Travel Taxi 279.147884
74 74 2021-03-20 11:53:28.377359 Travel Taxi 303.055421
92 92 2021-01-12 19:53:28.377359 Travel Taxi 356.842638
98 98 2021-05-13 15:53:28.377359 Travel Taxi 442.096469
In [112]:
class Stock:
    
    def __init__(self, symbol, value, volume):
        self.symbol = symbol
        self.value  = value
        self.volume = volume
        
    def __repr__(self):
        return "Stock({} {} {})".format(self.symbol, self.value, self.volume)
In [115]:
ibm  = Stock("IBM", 120, 100)
In [116]:
%%file stocksdata.csv
"IBM", 120, 100
"IBM1", 121, 101
"IBM2", 122, 102
"IBM3", 123, 103
"IBM4", 124, 104
"IBM5", 125, 105
Writing stocksdata.csv
In [117]:
import csv
def load_as_list(filename):
    with open(filename) as f:
        reader = csv.reader(f)
        print("skipping header", next(reader))
        stocks = []
        for row in reader:
            stocks.append(Stock(row[0], row[1], row[2]))
        return stocks
In [119]:
l = load_as_list("stocksdata.csv")
skipping header ['IBM', ' 120', ' 100']
In [121]:
l[0]
Out[121]:
Stock(IBM1  121  101)
In [122]:
def load_as_list(filename):
    with open(filename) as f:
        reader = csv.reader(f)
        print("skipping header", next(reader))
        return [Stock(row[0], row[1], row[2]) for row in reader]
In [123]:
load_as_list("stocksdata.csv")
skipping header ['IBM', ' 120', ' 100']
Out[123]:
[Stock(IBM1  121  101),
 Stock(IBM2  122  102),
 Stock(IBM3  123  103),
 Stock(IBM4  124  104),
 Stock(IBM5  125  105)]
In [124]:
def add(x, y):
    return x+y
In [125]:
add(1, 2)
Out[125]:
3
In [126]:
args = [1, 2]
In [127]:
add(args[0], args[1])
Out[127]:
3
In [128]:
add(*args)
Out[128]:
3
In [129]:
def load_as_list(filename):
    with open(filename) as f:
        reader = csv.reader(f)
        print("skipping header", next(reader))
        return [Stock(*row) for row in reader]
In [130]:
load_as_list("stocksdata.csv")
skipping header ['IBM', ' 120', ' 100']
Out[130]:
[Stock(IBM1  121  101),
 Stock(IBM2  122  102),
 Stock(IBM3  123  103),
 Stock(IBM4  124  104),
 Stock(IBM5  125  105)]
In [131]:
mat = []
for c in range(5):
    row = []
    for r in range(5):
        if c==r:
            row.append(1)
        else:
            row.append(0)
    mat.append(row)
In [132]:
mat
Out[132]:
[[1, 0, 0, 0, 0],
 [0, 1, 0, 0, 0],
 [0, 0, 1, 0, 0],
 [0, 0, 0, 1, 0],
 [0, 0, 0, 0, 1]]
In [ ]: