def countdown(n):
while n > 0:
yield n
n -= 1
for i in countdown(5):
print(i)
5 4 3 2 1
import os
def find(root):
for path, dirnames, filenames in os.walk(root):
for f in filenames:
yield os.path.join(path, f)
def take(n, seq):
return [next(seq) for i in range(n)]
def naturals():
"""generator object which generates infinite natural numbers
"""
n = 1
while True:
yield n
n += 1
def squares(seq):
return (n*n for n in seq) # generator expression, square item from seq
nat_numbers = naturals()
sqrs_nat = squares(nat_numbers)
take(5, sqrs_nat)
[1, 4, 9, 16, 25]
take(5, sqrs_nat)
[36, 49, 64, 81, 100]
take(5, sqrs_nat)
[121, 144, 169, 196, 225]
next(sqrs_nat)
256
import os
def find(root):
for path, dirnames, filenames in os.walk(root):
for f in filenames:
yield os.path.join(path, f)
def take(n, seq):
return [next(seq) for i in range(n)]
def grep(pattern, seq):
return (x for x in seq if pattern in x)
files = find(".")
notebooks = grep(".ipynb", files)
take(3, notebooks)
['./Lecture6-Comprehensions-1.ipynb', './Lecture9-Functions2.ipynb', './Lecture10-Modules-and-Scripts.ipynb']
next(notebooks)
'./Lecture15-Oops.ipynb'
def count(seq):
return sum(1 for i in seq)
files = find("../")
notebooks = grep(".ipynb", files)
count(notebooks)
151
def readlines(files):
for file in files:
with open(file) as f:
for line in f:
yield line
files = find("../") # . is current folder ..where this program is running
## .. is parent folder
notebooks = grep(".ipynb", files)
lines = readlines(notebooks)
function_defs = grep("def ", lines)
count(function_defs)
995
problem
Write a function get_paragraphs to split fiven textfile into paragraphs. Where ever there is a empty line, thats where paragraph ends. The function should take a sequence of lines as an argument and should return a sequence of paragraphs.
one the function is ready make use of it to find
%%file simple_sample.txt
jklgjfd fdshfkjd djshfkjds fdsjfhkjdshf
kjdhfkjdh gjhfkjhgfdkhgkfdjhgkjfd kjfhgfkjdg
kjhkjhgfd gkhkjfd
jfhgjuoiut dshgkjhfdg iutoireu tgjkfhgkjf
khfgkjhfdg jhgkjhfd hjkhkjhfg jhjghfj
jhkjgj kjhgjfhg hjfgjhf g hjhjgf
hgfd gkjhgkjhfd ggjfdg kjhgkjfd glkjfgkjh
gjfhgkjfd kjhkjghfg kdfghfdg kjhfdgkjhfd fdghfdg
kjhgkjfdg khjgkjhfd khjfdkhgf
Overwriting simple_sample.txt
!python cat.py simple_sample.txt
jklgjfd fdshfkjd djshfkjds fdsjfhkjdshf kjdhfkjdh gjhfkjhgfdkhgkfdjhgkjfd kjfhgfkjdg kjhkjhgfd gkhkjfd jfhgjuoiut dshgkjhfdg iutoireu tgjkfhgkjf khfgkjhfdg jhgkjhfd hjkhkjhfg jhjghfj jhkjgj kjhgjfhg hjfgjhf g hjhjgf hgfd gkjhgkjhfd ggjfdg kjhgkjfd glkjfgkjh gjfhgkjfd kjhkjghfg kdfghfdg kjhfdgkjhfd fdghfdg kjhgkjfdg khjgkjhfd khjfdkhgf
def lineseq(filename):
with open(filename) as f:
for line in f:
yield line
!wget https://ia802902.us.archive.org/4/items/prideandprejudic01342gut/pandp12.txt
--2021-10-07 19:54:22-- https://ia802902.us.archive.org/4/items/prideandprejudic01342gut/pandp12.txt Resolving ia802902.us.archive.org (ia802902.us.archive.org)... 207.241.233.52 Connecting to ia802902.us.archive.org (ia802902.us.archive.org)|207.241.233.52|:443... connected. HTTP request sent, awaiting response... 301 Moved Permanently Location: https://archive.org/download/prideandprejudic01342gut/pandp12.txt [following] --2021-10-07 19:54:23-- https://archive.org/download/prideandprejudic01342gut/pandp12.txt Resolving archive.org (archive.org)... 207.241.224.2 Connecting to archive.org (archive.org)|207.241.224.2|:443... connected. HTTP request sent, awaiting response... 302 Found Location: https://ia802806.us.archive.org/23/items/prideandprejudic01342gut/pandp12.txt [following] --2021-10-07 19:54:25-- https://ia802806.us.archive.org/23/items/prideandprejudic01342gut/pandp12.txt Resolving ia802806.us.archive.org (ia802806.us.archive.org)... 207.241.232.116 Connecting to ia802806.us.archive.org (ia802806.us.archive.org)|207.241.232.116|:443... connected. HTTP request sent, awaiting response... 200 OK Length: 717331 (701K) [text/plain] Saving to: ‘pandp12.txt’ pandp12.txt 100%[===================>] 700.52K 86.5KB/s in 8.2s 2021-10-07 19:54:35 (85.0 KB/s) - ‘pandp12.txt’ saved [717331/717331]
lines = lineseq("pandp12.txt")
lines
<generator object lineseq at 0x7fe289818270>
next(lines)
'The Project Gutenberg EBook of Pride and Prejudice, by Jane Austen\n'
with open("simple_sample.txt") as f:
slines = f.readlines()
[line.strip() for line in slines]
['jklgjfd fdshfkjd djshfkjds fdsjfhkjdshf', 'kjdhfkjdh gjhfkjhgfdkhgkfdjhgkjfd kjfhgfkjdg', 'kjhkjhgfd gkhkjfd', '', 'jfhgjuoiut dshgkjhfdg iutoireu tgjkfhgkjf', 'khfgkjhfdg jhgkjhfd hjkhkjhfg jhjghfj', 'jhkjgj kjhgjfhg hjfgjhf g hjhjgf', '', 'hgfd gkjhgkjhfd ggjfdg kjhgkjfd glkjfgkjh', 'gjfhgkjfd kjhkjghfg kdfghfdg kjhfdgkjhfd fdghfdg', 'kjhgkjfdg khjgkjhfd khjfdkhgf']
def lineseq(filename):
with open(filename) as f:
for line in f:
yield line
def paragraphs(lines):
paragraph = []
for line in lines:
if line.strip()!="":
paragraph.append(line)
elif paragraph:
yield "".join(paragraph)
paragraph = []
if paragraph:
yield "".join(paragraph)
paras = paragraphs(lineseq("pandp12.txt"))
paras
<generator object paragraphs at 0x7fe289a5f0b0>
next(paras)
'The Project Gutenberg EBook of Pride and Prejudice, by Jane Austen\n(#8 in our series by Jane Austen)\n'
print(next(paras))
Copyright laws are changing all over the world. Be sure to check the copyright laws for your country before downloading or redistributing this or any other Project Gutenberg eBook.
print(next(paras))
This header should be the first thing seen when viewing this Project Gutenberg file. Please do not remove it. Do not change or edit the header without written permission.
print(next(paras))
Please read the "legal small print," and other information about the eBook and Project Gutenberg at the bottom of this file. Included is important information about your specific rights and restrictions in how the file may be used. You can also find out about how to make a donation to Project Gutenberg, and how to get involved.
print(next(paras))
**Welcome To The World of Free Plain Vanilla Electronic Texts**
print(next(paras))
**eBooks Readable By Both Humans and By Computers, Since 1971**
print(next(paras))
*****These eBooks Were Prepared By Thousands of Volunteers!*****
print(next(paras))
Title: Pride and Prejudice
print(next(paras))
Author: Jane Austen
print(next(paras))
Release Date: Jun, 1998 [EBook #1342] [Most recently updated: April 21, 2006]
print(next(paras))
Edition: 12
print(next(paras))
Language: English
print(next(paras))
Character set encoding: ASCII
print(next(paras))
*** START OF THE PROJECT GUTENBERG EBOOK, PRIDE AND PREJUDICE ***
print(next(paras))
Pride and Prejudice
paras = paragraphs(lineseq("pandp12.txt"))
count(paras)
2202