url = "https://finance.yahoo.com/quote/{TICKER}/history?p={TICKER}"


url.format(TICKER="IBM")

'https://finance.yahoo.com/quote/IBM/history?p=IBM'


import requests


resp = requests.get(url.format(TICKER="IBM"), headers={'User-Agent':"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:93.0) Gecko/20100101 Firefox/93.0"})


resp.status_code

403


print(resp.text)

<!DOCTYPE html>
  <html lang="en-us"><head>
  <meta http-equiv="content-type" content="text/html; charset=UTF-8">
      <meta charset="utf-8">
      <title>Yahoo</title>
      <meta name="viewport" content="width=device-width,initial-scale=1,minimal-ui">
      <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
      <style>
  html {
      height: 100%;
  }
  body {
      background: #fafafc url(https://s.yimg.com/nn/img/sad-panda-201402200631.png) 50% 50%;
      background-size: cover;
      height: 100%;
      text-align: center;
      font: 300 18px "helvetica neue", helvetica, verdana, tahoma, arial, sans-serif;
  }
  table {
      height: 100%;
      width: 100%;
      table-layout: fixed;
      border-collapse: collapse;
      border-spacing: 0;
      border: none;
  }
  h1 {
      font-size: 42px;
      font-weight: 400;
      color: #400090;
  }
  p {
      color: #1A1A1A;
  }
  #message-1 {
      font-weight: bold;
      margin: 0;
  }
  #message-2 {
      display: inline-block;
      *display: inline;
      zoom: 1;
      max-width: 17em;
      _width: 17em;
  }
      </style>
  <script>
    document.write('<img src="//geo.yahoo.com/b?s=1197757129&t='+new Date().getTime()+'&src=aws&err_url='+encodeURIComponent(document.URL)+'&err=%<pssc>&test='+encodeURIComponent('%<{Bucket}cqh[:200]>')+'" width="0px" height="0px"/>');var beacon = new Image();beacon.src="//bcn.fp.yahoo.com/p?s=1197757129&t="+new Date().getTime()+"&src=aws&err_url="+encodeURIComponent(document.URL)+"&err=%<pssc>&test="+encodeURIComponent('%<{Bucket}cqh[:200]>');
  </script>
  </head>
  <body>
  <!-- status code : 404 -->
  <!-- Not Found on Server -->
  <table>
  <tbody><tr>
      <td>
      <img src="https://s.yimg.com/rz/p/yahoo_frontpage_en-US_s_f_p_205x58_frontpage.png" alt="Yahoo Logo">
      <h1 style="margin-top:20px;">Will be right back...</h1>
      <p id="message-1">Thank you for your patience.</p>
      <p id="message-2">Our engineers are working quickly to resolve the issue.</p>
      </td>
  </tr>
  </tbody></table>
  </body></html>


baseurl = "https://yfapi.net"
quoteurl = "/v6/finance/quote"
url = baseurl + quoteurl

params = {
    "region":"US",
    "lang":"en",
    "symbols":"AAPL"
}


resp = requests.get(url, params=params)


resp.status_code

403


import requests

url = "https://rest.yahoofinanceapi.com/v6/finance/quote"

querystring = {"symbols":"AAPL,BTC-USD,EURUSD=X"}

headers = {
    'x-api-key': "YOUR-PERSONAL-API-KEY"
    }

response = requests.get(url, headers=headers, params=querystring)

print(response.text)


jsondata = requests.get("https://httpbin.org/get?key1=value1&key2=value2&myparam=myvalue").json()


jsondata['headers']

{'Accept': '*/*',
 'Accept-Encoding': 'gzip, deflate',
 'Host': 'httpbin.org',
 'User-Agent': 'python-requests/2.24.0',
 'X-Amzn-Trace-Id': 'Root=1-617244f3-5293488d65b7036a10e1bce5'}


%%file yahoo_fianace.py
from selenium import webdriver
from typer

app = typer.Typer()



def get_url(ticker):
    return "https://finance.yahoo.com/quote/{0}/history?p={0}".format(ticker)

@app.command()
def download(ticker="AAPL")

    fp = webdriver.FirefoxProfile()
    xls_mimetype = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
    fp.set_preference("browser.helperApps.neverAsk.saveToDisk", xls_mimetype)

    driver = webdriver.Firefox(firefox_profile=fp)
    driver.implicitly_wait(5)


    url = get_url(ticker)

    driver.get(url)

    download = driver.find_element_by_link_text("Download")
    download.click()
    driver.close()

    
if __name__ == "__main__":
    app()

Writing yahoo_fianace.py


"this is string formating {var1}, {var2}".format(var1=2, var2=3)

'this is string formating 2, 3'


x = 90
y = 100
f"this is {x} and this is {y}"

'this is 90 and this is 100'


import sqlite3


conn = sqlite3.connect("data1.db") # this will vary depending on what database you are trying to connect with
cur = conn.cursor()
cur.execute("create table person (name varchar(100), email varchar(100))")
conn.commit()


def insert_person(name, email, conn):
    cur = conn.cursor()
    cur.execute(f"insert into person (name, email) values ('{name}', '{email}')")
    conn.commit() # make sure you commit when you are doing write or change operation.


insert_person("alice", "aice@wonder.land", conn)


insert_person("alex", "alex@newyork.zoo", conn)


def find_person(name, conn):
    query = f"select * from person where name='{name}'"
    print(query)
    cur = conn.cursor()
    results = cur.execute(query)
    return results


 r= find_person("alex", conn) # iterator ... so you can use it only once

select * from person where name='alex'


r.fetchall() # consumed ..

[('alex', 'alex@newyork.zoo')]


r.fetchone() # already consumed, will not get any result


r = find_person("alice", conn)

select * from person where name='alice'


r.fetchone() # it get first item from the results..being just one item it is tuple

('alice', 'aice@wonder.land')


r = find_person("alice", conn)
r.fetchall() # it returns list of tuples... every tuple is one row from database

select * from person where name='alice'

[('alice', 'aice@wonder.land')]


(1, 2, 3)

(1, 2, 3)


(1,) # a tuple with single item in it

(1,)


def find_person_(name, conn):
    q = "select * from person where name=?"
    cur = conn.cursor()
    result = cur.execute(q, (name, )) # this is right way of doing
    return result.fetchall()


find_person_("alice", conn)

[('alice', 'aice@wonder.land')]


find_person("alice", conn).fetchall()

select * from person where name='alice'

[('alice', 'aice@wonder.land')]


find_person_("alice", conn)

[('alice', 'aice@wonder.land')]


conn.close()


import pandas as pd


conn = sqlite3.connect("data1")


wallet = pd.read_csv("wallet.csv", index_col=0)


wallet


wallet.to_sql("wallet", con=conn, if_exists="append")


 r= conn.cursor().execute("select * from wallet")


for item in r:
    print(item)

(0, '2021-03-07 14:53:28.377359', 'Music', 'Amazon', 421.2073272347991)
(1, '2020-10-08 09:53:28.377359', 'Food', 'Swiggy', 328.4400802428426)
(2, '2021-02-23 09:53:28.377359', 'Books', 'Amazon', 244.67943701511356)
(3, '2020-11-01 14:53:28.377359', 'Utility', 'Phone', 222.75631758052768)
(4, '2021-06-05 13:53:28.377359', 'Books', 'Flipcart', 494.1284923793595)
(5, '2021-07-28 19:53:28.377359', 'Utility', 'Electricity', 219.9417113096841)
(6, '2021-04-16 11:53:28.377359', 'Books', 'Amazon Kindle', 270.32259514795845)
(7, '2021-02-15 10:53:28.377359', 'Food', 'Zomato', 457.1831036346536)
(8, '2021-08-10 19:53:28.377359', 'Utility', 'Phone', 151.4963725994779)
(9, '2020-11-29 14:53:28.377359', 'Travel', 'Auto', 443.61888423247854)
(10, '2021-06-15 13:53:28.377359', 'Travel', 'Metro', 328.1754210974373)
(11, '2021-07-24 13:53:28.377359', 'Food', 'Zomato', 434.4954675355444)
(12, '2021-07-24 14:53:28.377359', 'Music', 'Amazon', 329.5360031897569)
(13, '2021-06-06 10:53:28.377359', 'Utility', 'Phone', 154.04494918166588)
(14, '2021-06-09 13:53:28.377359', 'Travel', 'Taxi', 485.2977429821982)
(15, '2021-08-24 17:53:28.377359', 'Food', 'Zomato', 262.9439932340398)
(16, '2021-03-05 19:53:28.377359', 'Utility', 'Phone', 390.31687619327926)
(17, '2021-04-17 18:53:28.377359', 'Utility', 'Electricity', 316.87867542466364)
(18, '2021-05-08 15:53:28.377359', 'Travel', 'Auto', 433.8224042777937)
(19, '2021-05-16 10:53:28.377359', 'Books', 'Flipcart', 109.32590886550068)
(20, '2020-10-12 18:53:28.377359', 'Travel', 'Auto', 365.9218082537661)
(21, '2021-01-04 19:53:28.377359', 'Travel', 'Metro', 329.0973715025852)
(22, '2021-06-24 15:53:28.377359', 'Food', 'Zomato', 489.14348305222535)
(23, '2020-12-11 10:53:28.377359', 'Music', 'Netflix', 354.9402409919817)
(24, '2021-05-31 11:53:28.377359', 'Books', 'Amazon', 498.1004955046106)
(25, '2021-05-21 14:53:28.377359', 'Food', 'Hotel', 483.315863517772)
(26, '2020-08-26 15:53:28.377359', 'Books', 'Amazon Kindle', 138.806577801854)
(27, '2021-05-01 15:53:28.377359', 'Utility', 'Electricity', 103.68079074846584)
(28, '2020-12-14 15:53:28.377359', 'Utility', 'Phone', 358.4599327957656)
(29, '2021-06-20 10:53:28.377359', 'Utility', 'Electricity', 184.5577284049955)
(30, '2020-09-15 18:53:28.377359', 'Food', 'Swiggy', 203.52923978943267)
(31, '2020-09-25 11:53:28.377359', 'Books', 'Flipcart', 246.503527384528)
(32, '2021-06-23 11:53:28.377359', 'Food', 'Zomato', 345.0304360814152)
(33, '2021-05-14 18:53:28.377359', 'Food', 'Hotel', 449.24802955761743)
(34, '2021-05-14 10:53:28.377359', 'Utility', 'Phone', 499.8581815222449)
(35, '2021-02-18 18:53:28.377359', 'Travel', 'Metro', 441.6021430011205)
(36, '2020-12-10 10:53:28.377359', 'Travel', 'Auto', 472.94143917262176)
(37, '2021-04-18 16:53:28.377359', 'Music', 'Amazon', 266.0690783774673)
(38, '2021-08-15 10:53:28.377359', 'Travel', 'Auto', 494.12439940565713)
(39, '2021-05-17 17:53:28.377359', 'Food', 'Swiggy', 112.33316019807457)
(40, '2021-07-19 12:53:28.377359', 'Food', 'Swiggy', 291.54598801930536)
(41, '2021-02-20 19:53:28.377359', 'Utility', 'Phone', 425.18719068071806)
(42, '2021-08-22 17:53:28.377359', 'Food', 'Hotel', 210.25626950078566)
(43, '2020-09-21 12:53:28.377359', 'Utility', 'Phone', 486.0339327616073)
(44, '2020-12-26 19:53:28.377359', 'Utility', 'Electricity', 257.92759337085425)
(45, '2021-05-27 16:53:28.377359', 'Utility', 'Electricity', 154.74287259516657)
(46, '2021-05-15 15:53:28.377359', 'Utility', 'Electricity', 359.3249716537848)
(47, '2020-10-28 10:53:28.377359', 'Books', 'Flipcart', 310.408610004679)
(48, '2021-08-23 17:53:28.377359', 'Utility', 'Electricity', 310.05840961423314)
(49, '2021-03-16 09:53:28.377359', 'Music', 'spotify', 232.30340219121135)
(50, '2020-12-24 11:53:28.377359', 'Food', 'Zomato', 463.0018749263554)
(51, '2020-12-22 17:53:28.377359', 'Food', 'Zomato', 331.22702332837093)
(52, '2021-03-26 09:53:28.377359', 'Travel', 'Taxi', 403.61007013419334)
(53, '2021-01-27 09:53:28.377359', 'Utility', 'Electricity', 183.1866624101276)
(54, '2020-11-16 10:53:28.377359', 'Music', 'spotify', 160.81754340768396)
(55, '2021-01-21 19:53:28.377359', 'Books', 'Flipcart', 423.74970808720553)
(56, '2021-05-19 18:53:28.377359', 'Utility', 'Phone', 319.34287626846185)
(57, '2021-07-15 15:53:28.377359', 'Utility', 'Phone', 279.6090437716363)
(58, '2021-05-20 10:53:28.377359', 'Food', 'Hotel', 255.8710346734312)
(59, '2020-08-28 11:53:28.377359', 'Food', 'Swiggy', 208.2329120852039)
(60, '2021-01-17 11:53:28.377359', 'Utility', 'Electricity', 382.5195101154448)
(61, '2021-02-25 13:53:28.377359', 'Food', 'Hotel', 124.65827844174062)
(62, '2021-01-27 19:53:28.377359', 'Books', 'Amazon Kindle', 497.7708601564023)
(63, '2021-05-10 11:53:28.377359', 'Travel', 'Taxi', 355.9890502253258)
(64, '2021-01-31 14:53:28.377359', 'Food', 'Zomato', 232.2223798622789)
(65, '2020-10-23 18:53:28.377359', 'Music', 'Netflix', 188.7487426895118)
(66, '2020-10-09 16:53:28.377359', 'Food', 'Swiggy', 263.9577700340145)
(67, '2021-07-31 14:53:28.377359', 'Music', 'Netflix', 324.786916846731)
(68, '2020-08-26 09:53:28.377359', 'Travel', 'Taxi', 279.1478844739421)
(69, '2020-10-10 15:53:28.377359', 'Utility', 'Electricity', 300.5246204193512)
(70, '2021-08-17 13:53:28.377359', 'Utility', 'Phone', 125.22977317126335)
(71, '2021-03-30 12:53:28.377359', 'Food', 'Swiggy', 245.36050838040904)
(72, '2021-06-30 18:53:28.377359', 'Books', 'Amazon', 294.6628689900488)
(73, '2021-08-15 17:53:28.377359', 'Travel', 'Metro', 117.58872931045572)
(74, '2021-03-20 11:53:28.377359', 'Travel', 'Taxi', 303.0554209852045)
(75, '2021-03-03 12:53:28.377359', 'Food', 'Hotel', 425.62529099481486)
(76, '2020-11-17 09:53:28.377359', 'Music', 'Netflix', 197.5346000167895)
(77, '2021-01-18 14:53:28.377359', 'Books', 'Amazon Kindle', 482.1523430204321)
(78, '2020-09-09 16:53:28.377359', 'Music', 'spotify', 415.3728938035302)
(79, '2021-08-17 09:53:28.377359', 'Music', 'Netflix', 321.7634156544651)
(80, '2021-02-17 09:53:28.377359', 'Food', 'Swiggy', 283.09570727160764)
(81, '2020-10-29 16:53:28.377359', 'Food', 'Hotel', 470.0809953992362)
(82, '2020-09-22 09:53:28.377359', 'Music', 'spotify', 411.14270120842224)
(83, '2021-03-18 09:53:28.377359', 'Books', 'Flipcart', 451.5844070294999)
(84, '2020-09-21 10:53:28.377359', 'Music', 'Netflix', 158.7936457269333)
(85, '2021-01-12 09:53:28.377359', 'Music', 'Amazon', 130.37490757527002)
(86, '2021-05-07 16:53:28.377359', 'Food', 'Zomato', 198.45067179263802)
(87, '2021-05-19 15:53:28.377359', 'Food', 'Zomato', 378.82064134052473)
(88, '2021-04-18 09:53:28.377359', 'Utility', 'Phone', 124.22124784445779)
(89, '2021-04-12 14:53:28.377359', 'Music', 'Amazon', 218.487173429263)
(90, '2020-12-01 14:53:28.377359', 'Music', 'Amazon', 101.57327588889416)
(91, '2021-01-22 17:53:28.377359', 'Food', 'Hotel', 232.66346838787223)
(92, '2021-01-12 19:53:28.377359', 'Travel', 'Taxi', 356.84263798863265)
(93, '2021-01-11 09:53:28.377359', 'Utility', 'Electricity', 111.72080867898062)
(94, '2021-01-04 13:53:28.377359', 'Utility', 'Phone', 431.1855366816298)
(95, '2021-07-19 13:53:28.377359', 'Utility', 'Phone', 388.67121323884214)
(96, '2021-01-12 19:53:28.377359', 'Books', 'Flipcart', 467.5545618966052)
(97, '2021-03-25 11:53:28.377359', 'Utility', 'Phone', 320.78943360123816)
(98, '2021-05-13 15:53:28.377359', 'Travel', 'Taxi', 442.0964693975505)
(99, '2020-10-11 16:53:28.377359', 'Food', 'Hotel', 100.45550129902664)


df = pd.read_sql_query("select * from wallet" , con=conn)

df


import PyPDF2


with open("/home/vikrant/Downloads/upload.pdf", "rb") as f: # make note of binary mode (b)
    pdfreader = PyPDF2.PdfFileReader(f)
    n = pdfreader.getNumPages()
    for p in range(n):
        page = pdfreader.getPage(p)
        print(page.extractText()[:50])

 
National Load Despatch Centre
 
 
POWER SYSTEM O
NR
WR
SR
ER
NER
TOTAL
59882
41115
34238
21526
2730
16-Jul-2020
Sl 
No
Voltage Level
Line Details
Circ


with open("/home/vikrant/Downloads/upload.pdf", "rb") as f: # make note of binary mode (b)
    pdfreader = PyPDF2.PdfFileReader(f)
    n = pdfreader.getNumPages()
    page = pdfreader.getPage(1)
    print(page.extractText()[:300])

NR
WR
SR
ER
NER
TOTAL
59882
41115
34238
21526
2730
159491
1114
0
0
0
6
1120
1398
998
807
447
48
3698
355
33
77
149
29
643
11
49
128
-
-
187
39.60
16.60
41.59
4.60
0.03
102
12.6
0.0
0.0
0.0
0.0
12.6
65470
43593
38117
21535
2827
160654
22:20
10:29
10:00
21:20
19:41
21:26
Region
FVI
< 49.7
49.7 - 49.8


import pandas as pd

def get_rows(lines, n, numrow):
    s = 0
    rows = []
    for i in range(numrow):
        rows.append(lines[s:s+n])
        s = s+n
    return rows
            
def column(data2d, n):
    return [row[n] for row in data2d]
    
def transpose(data2d):
    numcols = len(data2d[0])
    return [column(data2d, i)  for i in range(numcols)]

def extract_tableA(page):
    lines = page.split("\n")
    header = "NR WR SR ER NER TOTAL".split()
    rowdata = get_rows(lines[len(header):], len(header), 9) # we counted manually that there 9 rows
    columndata = transpose(rowdata)
    data = dict(zip(header, columndata))
    return pd.DataFrame(data)


extract_tableA(pdfreader.getPage(1).extractText())


tables = [[i*j for i in range(1, 11)] for j in range(1, 6)]


tables

[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
 [2, 4, 6, 8, 10, 12, 14, 16, 18, 20],
 [3, 6, 9, 12, 15, 18, 21, 24, 27, 30],
 [4, 8, 12, 16, 20, 24, 28, 32, 36, 40],
 [5, 10, 15, 20, 25, 30, 35, 40, 45, 50]]


tables[2]

[3, 6, 9, 12, 15, 18, 21, 24, 27, 30]


tables[3]

[4, 8, 12, 16, 20, 24, 28, 32, 36, 40]


tables[0][2]

3


tables[1][2]

6


tables[2][2]

9


[row[2] for row in tables]

[3, 6, 9, 12, 15]


def column(data2d, n):
    return [row[n] for row in data2d]


[column(tables, i) for i in range(10)]

[[1, 2, 3, 4, 5],
 [2, 4, 6, 8, 10],
 [3, 6, 9, 12, 15],
 [4, 8, 12, 16, 20],
 [5, 10, 15, 20, 25],
 [6, 12, 18, 24, 30],
 [7, 14, 21, 28, 35],
 [8, 16, 24, 32, 40],
 [9, 18, 27, 36, 45],
 [10, 20, 30, 40, 50]]


tables

[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
 [2, 4, 6, 8, 10, 12, 14, 16, 18, 20],
 [3, 6, 9, 12, 15, 18, 21, 24, 27, 30],
 [4, 8, 12, 16, 20, 24, 28, 32, 36, 40],
 [5, 10, 15, 20, 25, 30, 35, 40, 45, 50]]

Python Virtual Training For Arcesium - Module III - Day 4¶

Example - geting data from yahoo finance¶

using yahoo API¶

Databases¶

pandas as database¶

reading pdf files¶

	date	category	description	debit
0	2021-03-07 14:53:28.377359	Music	Amazon	421.207327
1	2020-10-08 09:53:28.377359	Food	Swiggy	328.440080
2	2021-02-23 09:53:28.377359	Books	Amazon	244.679437
3	2020-11-01 14:53:28.377359	Utility	Phone	222.756318
4	2021-06-05 13:53:28.377359	Books	Flipcart	494.128492
...	...	...	...	...
95	2021-07-19 13:53:28.377359	Utility	Phone	388.671213
96	2021-01-12 19:53:28.377359	Books	Flipcart	467.554562
97	2021-03-25 11:53:28.377359	Utility	Phone	320.789434
98	2021-05-13 15:53:28.377359	Travel	Taxi	442.096469
99	2020-10-11 16:53:28.377359	Food	Hotel	100.455501

	NR	WR	SR	ER	NER	TOTAL
0	59882	41115	34238	21526	2730	159491
1	1114	0	0	0	6	1120
2	1398	998	807	447	48	3698
3	355	33	77	149	29	643
4	11	49	128	-	-	187
5	39.60	16.60	41.59	4.60	0.03	102
6	12.6	0.0	0.0	0.0	0.0	12.6
7	65470	43593	38117	21535	2827	160654
8	22:20	10:29	10:00	21:20	19:41	21:26