sqr = []
for i in range(5):
    sqr.append(i*i) #some operation

sqr

[0, 1, 4, 9, 16]


[i*i for i in range(5)]

[0, 1, 4, 9, 16]


[i*i for i in range(10) if i%2==0]

[0, 4, 16, 36, 64]


{1:1, 2:4, 3:9, 4:16}

{1: 1, 2: 4, 3: 9, 4: 16}


{i:i*i for i in range(10)} # dictionary comprehension

{0: 0, 1: 1, 2: 4, 3: 9, 4: 16, 5: 25, 6: 36, 7: 49, 8: 64, 9: 81}


%%file data.csv
1,2,3
4,5,6
7,8,9

Overwriting data.csv


with open("data.csv") as f:
    for line in f:
        print(line, end="")

1,2,3
4,5,6
7,8,9


def csvparser(filename):
    with open(filename) as f:
        return [line.strip().split(",") for line in f if line.strip()]


csvparser("data.csv")

[['1', '2', '3'], ['4', '5', '6'], ['7', '8', '9']]


data = csvparser("data.csv")

with open("mydata.csv", "w") as wf:
    for row in data:
        line = ",".join(row)
        wf.write(line)
        wf.write("\n")


!python3 cat.py data.csv

1,2,3
4,5,6
7,8,9


import csv


def read_data(filename):
    with open(filename) as f:
        csvf = csv.reader(f) # new file rading iterator which gives list instead of line
        return [row for row in csvf]


read_data("data.csv")

[['1', '2', '3'], ['4', '5', '6'], ['7', '8', '9']]


%%file indexdata.csv
symbol,price,volume
IBM,232.6,1000
APPLE,343.5,400
BELL,454.5,300

Overwriting indexdata.csv


def read_data(filename, types_): # types_  list of conversion functions
    with open(filename) as f:
        csvf = csv.reader(f) # new file rading iterator which gives list instead of line
        headers = next(csvf) # skip first line (i.e. headers)
        data = []
        for row in csvf:
            data.append([func(value) for value, func in zip(row, types_)])
        return data


read_data("indexdata.csv", [str,float,int]) # str, float, int ..built in conversion functions

[['IBM', 232.6, 1000], ['APPLE', 343.5, 400], ['BELL', 454.5, 300]]


first = [1, 2, 3]
second  = ["one", "two", "three"]

for i,w in zip(first, second):
    print(i, w)

1 one
2 two
3 three


{key:value for key, value in zip(second,first)}

{'one': 1, 'two': 2, 'three': 3}


names = ["vikrant", "vishal", "vidya", "vaishali"]
for i, name in enumerate(names, start=1):
    print(i, name)

1 vikrant
2 vishal
3 vidya
4 vaishali


for name in reversed(names):
    print(name)

vaishali
vidya
vishal
vikrant


import xlsxwriter


!python3 -m pip install XlsxWriter

Requirement already satisfied: XlsxWriter in /home/vikrant/anaconda3/lib/python3.8/site-packages (1.3.7)


workbook = xlsxwriter.Workbook("sample.xlsx")


worksheet = workbook.add_worksheet("hello")


#worksheet.write(rownumber, columnnumber, value) # rownumber and columnumber starts at 0


worksheet.write(1, 1, "Welcome!")

0


workbook.close()


!ls sampl* # this is how you will check on lab.pipal.in

sample.xlsx


bold_sample_workbook  = xlsxwriter.Workbook("bold.xlsx") # this will create new xlsx file with name "bole.xlsx" in 
                                                # current working directory
worksheet = bold_sample_workbook.add_worksheet("sheet1")
worksheet.write(0,0, "normal case")
bold_format = bold_sample_workbook.add_format({'bold':True}) # these are details.. which you need not remember...
# just see help whenever you need
worksheet.write("B2", "Bold b2 cell", bold_format)
bold_sample_workbook.close() # unless you close the workbook


w = xlsxwriter.Workbook("multiple.xlsx")


sheet1 = w.add_worksheet("sheet1")


sheet2  = w.add_worksheet("sheet2")


sheet1.write("A1", "hello")

0


sheet2.write("A1", "Welcome")

0


w.close()


!python3 cat.py indexdata.csv

symbol,price,volume
IBM,232.6,1000
APPLE,343.5,400
BELL,454.5,300


w = xlsxwriter.Workbook("indexdata.xlsx")


data_sheet = w.add_worksheet("data")


def read_data(filename, types_): # types_  list of conversion functions
    with open(filename) as f:
        csvf = csv.reader(f) # new file rading iterator which gives list instead of line
        headers = next(csvf) # skip first line (i.e. headers)
        data = []
        for row in csvf:
            data.append([func(value) for value, func in zip(row, types_)])
        return data, headers


data, headers = read_data("indexdata.csv", [str, float, int])


bold = w.add_format({'bold':True})

for c, colname in enumerate(headers):
    data_sheet.write(0, c, colname, bold)
    

for r, rowdata in enumerate(data, start=1):
    for c,coldata in enumerate(rowdata):
        data_sheet.write(r,c, coldata)
        
w.close()


%%file convert_to_excel.py
import sys
import csv
import xlsxwriter

def read_data(filename):
    types_ = [str, float, int]
    with open(filename) as f:
        csvf = csv.reader(f) # new file rading iterator which gives list instead of line
        headers = next(csvf) # skip first line (i.e. headers)
        data = []
        for row in csvf:
            data.append([func(value) for value, func in zip(row, types_)])
        return data, headers
    
    
def convert_to_excel(csvfile, excelfile):
    data, headers = read_data(csvfile)
    w = xlsxwriter.Workbook(excelfile)
    data_sheet = w.add_worksheet("data")
    bold = w.add_format({'bold':True})

    for c, colname in enumerate(headers):
        data_sheet.write(0, c, colname, bold)


    for r, rowdata in enumerate(data, start=1):
        for c,coldata in enumerate(rowdata):
            data_sheet.write(r,c, coldata)

    w.close()
    

if __name__ == "__main__":
    convert_to_excel(sys.argv[1], sys.argv[2])

Overwriting convert_to_excel.py


!python3 convert_to_excel.py indexdata.csv index1.xlsx


!python3 convert_to_excel.py --help

Traceback (most recent call last):
  File "convert_to_excel.py", line 34, in <module>
    convert_to_excel(sys.argv[1], sys.argv[2])
IndexError: list index out of range


%%file csv2excel.py
import typer # this is for smart commanline arguments
import csv
import xlsxwriter


app = typer.Typer()

def read_data(filename):
    types_ = [str, float, int]
    with open(filename) as f:
        csvf = csv.reader(f) # new file rading iterator which gives list instead of line
        headers = next(csvf) # skip first line (i.e. headers)
        data = []
        for row in csvf:
            data.append([func(value) for value, func in zip(row, types_)])
        return data, headers
    
@app.command()  
def convert_to_excel(csvfile:str, excelfile:str):
    data, headers = read_data(csvfile)
    w = xlsxwriter.Workbook(excelfile)
    data_sheet = w.add_worksheet("data")
    bold = w.add_format({'bold':True})

    for c, colname in enumerate(headers):
        data_sheet.write(0, c, colname, bold)


    for r, rowdata in enumerate(data, start=1):
        for c,coldata in enumerate(rowdata):
            data_sheet.write(r,c, coldata)

    w.close()
    

if __name__ == "__main__":
    app()

Overwriting csv2excel.py


!python3 csv2excel.py --help

Usage: csv2excel.py [OPTIONS] CSVFILE EXCELFILE

Arguments:
  CSVFILE    [required]
  EXCELFILE  [required]

Options:
  --install-completion [bash|zsh|fish|powershell|pwsh]
                                  Install completion for the specified shell.
  --show-completion [bash|zsh|fish|powershell|pwsh]
                                  Show completion for the specified shell, to
                                  copy it or customize the installation.

  --help                          Show this message and exit.


%%file csv2excel.py
import typer # this is for smart commanline arguments
import csv
import xlsxwriter


app = typer.Typer()

def read_data(filename):
    types_ = [str, float, int]
    with open(filename) as f:
        csvf = csv.reader(f) # new file rading iterator which gives list instead of line
        headers = next(csvf) # skip first line (i.e. headers)
        data = []
        for row in csvf:
            data.append([func(value) for value, func in zip(row, types_)])
        return data, headers
     
        
# decorator modifies the behavior of orginal function
        
@app.command() # with @ you decorate the function. app.command is a decorator and it is applied to convert_to_excel
def convert_to_excel(csvfile:str, excelfile:str="output.xlsx"):
    data, headers = read_data(csvfile)
    w = xlsxwriter.Workbook(excelfile)
    data_sheet = w.add_worksheet("data")
    bold = w.add_format({'bold':True})

    for c, colname in enumerate(headers):
        data_sheet.write(0, c, colname, bold)


    for r, rowdata in enumerate(data, start=1):
        for c,coldata in enumerate(rowdata):
            data_sheet.write(r,c, coldata)
    
    w.close()
    

if __name__ == "__main__":
    app() # callable

Overwriting csv2excel.py


!python3 csv2excel.py --help

Usage: csv2excel.py [OPTIONS] CSVFILE

Arguments:
  CSVFILE  [required]

Options:
  --excelfile TEXT                [default: output.xlsx]
  --install-completion [bash|zsh|fish|powershell|pwsh]
                                  Install completion for the specified shell.
  --show-completion [bash|zsh|fish|powershell|pwsh]
                                  Show completion for the specified shell, to
                                  copy it or customize the installation.

  --help                          Show this message and exit.


!python3 csv2excel.py --excelfile index2.xlsx indexdata.csv


!python3 csv2excel.py  indexdata.csv


!python3 -m pip install typer

Requirement already satisfied: typer in /home/vikrant/anaconda3/lib/python3.8/site-packages (0.4.0)
Requirement already satisfied: click<9.0.0,>=7.1.1 in /home/vikrant/anaconda3/lib/python3.8/site-packages (from typer) (7.1.2)


!python3 -m pip install pandas # after this restart the kernel

Requirement already satisfied: pandas in /home/vikrant/anaconda3/lib/python3.8/site-packages (1.1.3)
Requirement already satisfied: python-dateutil>=2.7.3 in /home/vikrant/anaconda3/lib/python3.8/site-packages (from pandas) (2.8.1)
Requirement already satisfied: numpy>=1.15.4 in /home/vikrant/anaconda3/lib/python3.8/site-packages (from pandas) (1.19.2)
Requirement already satisfied: pytz>=2017.2 in /home/vikrant/anaconda3/lib/python3.8/site-packages (from pandas) (2020.1)
Requirement already satisfied: six>=1.5 in /home/vikrant/anaconda3/lib/python3.8/site-packages (from python-dateutil>=2.7.3->pandas) (1.15.0)


import pandas as pd


%%file download.py
import typer
import requests

app = typer.Typer()

@app.command()
def download(url, filename):
    resp = requests.get(url)
    with open(filename, "w") as f:
        f.write(resp.text)

if __name__ == "__main__":
    app()

Writing download.py


!python3 download.py --help

Usage: download.py [OPTIONS] URL FILENAME

Arguments:
  URL       [required]
  FILENAME  [required]

Options:
  --install-completion [bash|zsh|fish|powershell|pwsh]
                                  Install completion for the specified shell.
  --show-completion [bash|zsh|fish|powershell|pwsh]
                                  Show completion for the specified shell, to
                                  copy it or customize the installation.

  --help                          Show this message and exit.


!python3 download.py https://raw.githubusercontent.com/vikipedia/python-trainings/master/online_course/source/module2/wallet.csv wallet.csv


import pandas as pd


wallet = pd.read_csv("wallet.csv")


wallet


%%file head.py
import typer
app = typer.Typer()

@app.command()
def head(filename:str, n:int=10):
    with open(filename) as f:
        for i in range(n):
            print(next(f), end="")
            
            
if __name__ == "__main__":
    app()

Overwriting head.py


!python3 head.py --help

Usage: head.py [OPTIONS] FILENAME

Arguments:
  FILENAME  [required]

Options:
  --n INTEGER                     [default: 10]
  --install-completion [bash|zsh|fish|powershell|pwsh]
                                  Install completion for the specified shell.
  --show-completion [bash|zsh|fish|powershell|pwsh]
                                  Show completion for the specified shell, to
                                  copy it or customize the installation.

  --help                          Show this message and exit.


!python3 head.py wallet.csv

,date,category,description,debit
0,2021-03-07 14:53:28.377359,Music,Amazon,421.2073272347991
1,2020-10-08 09:53:28.377359,Food,Swiggy,328.4400802428426
2,2021-02-23 09:53:28.377359,Books,Amazon,244.67943701511354
3,2020-11-01 14:53:28.377359,Utility,Phone,222.7563175805277
4,2021-06-05 13:53:28.377359,Books,Flipcart,494.1284923793595
5,2021-07-28 19:53:28.377359,Utility,Electricity,219.94171130968408
6,2021-04-16 11:53:28.377359,Books,Amazon Kindle,270.32259514795845
7,2021-02-15 10:53:28.377359,Food,Zomato,457.1831036346536
8,2021-08-10 19:53:28.377359,Utility,Phone,151.49637259947792


pd.read_csv("wallet.csv")


pd.read_csv("https://raw.githubusercontent.com/vikipedia/python-trainings/master/online_course/source/module2/wallet.csv")


wallet


type(wallet)

pandas.core.frame.DataFrame


pd.read_excel("https://raw.githubusercontent.com/vikipedia/python-trainings/master/online_course/source/module2/wallet.xlsx")


x = 1


type(x)

int


!python3 -m pip install openpyxl lxml

Requirement already satisfied: openpyxl in /home/vikrant/anaconda3/lib/python3.8/site-packages (3.0.5)
Requirement already satisfied: lxml in /home/vikrant/anaconda3/lib/python3.8/site-packages (4.6.1)
Requirement already satisfied: jdcal in /home/vikrant/anaconda3/lib/python3.8/site-packages (from openpyxl) (1.4.1)
Requirement already satisfied: et-xmlfile in /home/vikrant/anaconda3/lib/python3.8/site-packages (from openpyxl) (1.0.1)


money_control = pd.read_html("https://www.moneycontrol.com/markets/indian-indices")


type(money_control)

list


len(money_control) # it has 4 tables loaded as dataframe

4


money_control[0]


wallet


wallet.columns # what are the column names in this dataframe

Index(['Unnamed: 0', 'date', 'category', 'description', 'debit'], dtype='object')


class Point:
    
    def __init__(self, x, y):
        self.x = x
        self.y = y


p1 = Point(4, 5)

p1

<__main__.Point at 0x7f7dec897ac0>


class Point:
    
    def __init__(self, x, y):
        self.x = x
        self.y = y
        
    def __repr__(self):
        return "Point<{0}, {1}>".format(self.x, self.y)


p2 = Point(4, 5)

p2

Point<4, 5>


for col in wallet.columns:
    print(col)

Unnamed: 0
date
category
description
debit


wallet.date # you can access column directly by typing name of column after . after the dataframe name

0     2021-03-07 14:53:28.377359
1     2020-10-08 09:53:28.377359
2     2021-02-23 09:53:28.377359
3     2020-11-01 14:53:28.377359
4     2021-06-05 13:53:28.377359
                 ...            
95    2021-07-19 13:53:28.377359
96    2021-01-12 19:53:28.377359
97    2021-03-25 11:53:28.377359
98    2021-05-13 15:53:28.377359
99    2020-10-11 16:53:28.377359
Name: date, Length: 100, dtype: object


wallet.category

0       Music
1        Food
2       Books
3     Utility
4       Books
       ...   
95    Utility
96      Books
97    Utility
98     Travel
99       Food
Name: category, Length: 100, dtype: object


wallet['category']

0       Music
1        Food
2       Books
3     Utility
4       Books
       ...   
95    Utility
96      Books
97    Utility
98     Travel
99       Food
Name: category, Length: 100, dtype: object


s1 = pd.Series([421, 232, 4343, 545])

s1

0     421
1     232
2    4343
3     545
dtype: int64


[i for i in s1]

[421, 232, 4343, 545]


s1[0]

421


s1[1]

232


s1[-1] # this is Series object

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
~/anaconda3/lib/python3.8/site-packages/pandas/core/indexes/range.py in get_loc(self, key, method, tolerance)
    354                 try:
--> 355                     return self._range.index(new_key)
    356                 except ValueError as err:

ValueError: -1 is not in range

The above exception was the direct cause of the following exception:

KeyError                                  Traceback (most recent call last)
<ipython-input-142-663e5b45842b> in <module>
----> 1 s1[-1] # this is Series object

~/anaconda3/lib/python3.8/site-packages/pandas/core/series.py in __getitem__(self, key)
    880 
    881         elif key_is_scalar:
--> 882             return self._get_value(key)
    883 
    884         if is_hashable(key):

~/anaconda3/lib/python3.8/site-packages/pandas/core/series.py in _get_value(self, label, takeable)
    987 
    988         # Similar to Index.get_value, but we do not fall back to positional
--> 989         loc = self.index.get_loc(label)
    990         return self.index._get_values_for_loc(self, loc, label)
    991 

~/anaconda3/lib/python3.8/site-packages/pandas/core/indexes/range.py in get_loc(self, key, method, tolerance)
    355                     return self._range.index(new_key)
    356                 except ValueError as err:
--> 357                     raise KeyError(key) from err
    358             raise KeyError(key)
    359         return super().get_loc(key, method=method, tolerance=tolerance)

KeyError: -1


s2 = pd.Series([123, 324, 435, 434], index=['APPLE','IBM','AT&T','NIKE'])

s2

APPLE    123
IBM      324
AT&T     435
NIKE     434
dtype: int64


s2['APPLE']

123


s2[0]

123

'APPLE'


labels = ['APPLE','IBM','AT&T','NIKE']
value = pd.Series([124.33, 343, 456.6, 454], index=labels)
volume = pd.Series([100,200,300,400], index=labels)
stocks = pd.DataFrame({"value":value, "volume":volume})


stocks


stocks['value']

APPLE    124.33
IBM      343.00
AT&T     456.60
NIKE     454.00
Name: value, dtype: float64


stocks['volume']

APPLE    100
IBM      200
AT&T     300
NIKE     400
Name: volume, dtype: int64


stocks.value

APPLE    124.33
IBM      343.00
AT&T     456.60
NIKE     454.00
Name: value, dtype: float64


stocks.volume

APPLE    100
IBM      200
AT&T     300
NIKE     400
Name: volume, dtype: int64


labels = ['APPLE','IBM','AT&T','NIKE']
stocks = pd.DataFrame({"value":[124.33, 343, 456.6, 454],
                      "volume":[100,200,300,400]},
                     index= labels)


stocks

	0	1	2	3	4	5	6	7
0	Asian Paints	3234.05	-0.22	42883	3234.05	3234.95	11.00	23.00
1	Axis Bank	810.95	-0.28	133957	810.45	810.85	61.00	62.00
2	Bajaj Auto	3919.00	-0.02	11818	3915.60	3917.80	2.00	1.00
3	Bajaj Finance	7848.80	-0.21	11521	7844.35	7847.00	1.00	5.00
4	Bajaj Finserv	19280.60	3.20	13465	19259.40	19271.75	8.00	3.00
5	Bharti Airtel	688.75	1.22	125763	688.30	688.70	451.00	248.00
6	Dr Reddys Labs	4818.50	-1.14	6675	4818.60	4822.35	10.00	1.00
7	HCL Tech	1232.00	0.90	151531	1232.00	1232.65	300.00	96.00
8	HDFC	2812.80	0.08	8981	2812.00	2812.95	5.00	2.00
9	HDFC Bank	1685.10	0.80	81034	1683.60	1684.30	50.00	4.00
10	HUL	2693.50	1.49	90754	2693.75	2695.40	19.00	1.00
11	ICICI Bank	749.15	0.57	137073	749.00	749.30	42.00	96.00
12	IndusInd Bank	1221.10	0.05	113880	1221.20	1222.05	129.00	1.00
13	Infosys	1829.10	2.09	191453	1828.60	1829.20	106.00	3.00
14	ITC	249.75	-4.84	1999084	249.60	249.75	1903.00	3466.00
15	Kotak Mahindra	2014.80	0.08	27865	2014.20	2014.80	14.00	1.00
16	Larsen	1846.50	3.27	115989	1845.00	1845.90	94.00	13.00
17	M&M	912.20	0.15	46858	911.80	912.35	67.00	169.00
18	Maruti Suzuki	7734.00	1.07	6076	7726.65	7731.30	1.00	5.00
19	Nestle	19492.25	0.33	1767	19520.00	19539.40	1.00	1.00
20	NTPC	151.30	1.27	542049	151.25	151.30	400.00	148.00
21	Power Grid Corp	201.70	-1.03	312494	201.70	201.80	357.00	50.00
22	Reliance	2738.45	1.16	63448	2738.25	2739.55	65.00	108.00
23	SBI	498.25	0.13	490765	498.25	498.35	18.00	126.00
24	Sun Pharma	832.05	-0.38	40311	832.00	832.30	113.00	29.00
25	Tata Steel	1398.05	-0.91	383833	1398.20	1398.90	200.00	65.00
26	TCS	3666.85	0.54	142055	3666.00	3667.35	38.00	15.00
27	Tech Mahindra	1551.30	5.05	194508	1552.35	1553.30	70.00	189.00
28	Titan Company	2528.50	-2.36	55640	2527.10	2529.10	31.00	60.00
29	UltraTechCement	7175.15	-2.92	15153	7177.55	7181.95	7.00	4.00
30	Company	LTP	%Change	Volume	Buy Price	Sell Price	Buy Qty	Sell Qty

Python Virtual Training For Arcesium - Module III - Day 1¶

quick recap¶

Writing Excel Files¶

xlsxwriter documentation¶

Example¶

Pandas (spreadsheet of python)¶

DataFrame and Series¶

	Unnamed: 0	date	category	description	debit
0	0	2021-03-07 14:53:28.377359	Music	Amazon	421.207327
1	1	2020-10-08 09:53:28.377359	Food	Swiggy	328.440080
2	2	2021-02-23 09:53:28.377359	Books	Amazon	244.679437
3	3	2020-11-01 14:53:28.377359	Utility	Phone	222.756318
4	4	2021-06-05 13:53:28.377359	Books	Flipcart	494.128492
...	...	...	...	...	...
95	95	2021-07-19 13:53:28.377359	Utility	Phone	388.671213
96	96	2021-01-12 19:53:28.377359	Books	Flipcart	467.554562
97	97	2021-03-25 11:53:28.377359	Utility	Phone	320.789434
98	98	2021-05-13 15:53:28.377359	Travel	Taxi	442.096469
99	99	2020-10-11 16:53:28.377359	Food	Hotel	100.455501