Problem 5.3
Split a File
Write a program split.py
that splits a large file into multiple smaller files. The program should take a filename and the number of lines as arguments and write multiple small files each containing the specified number of lines (The last one may have smaller number of lines).
$ python split.py files/100.txt 30
writing files/100-part1.txt
writing files/100-part2.txt
writing files/100-part3.txt
writing files/100-part4.txt
Solution
"""Program to split a file into smaller parts.
It takes a filename and the number of lines in each part as
command-line arguments and splits the file into smaller parts
with each file having no more than the specified number of lines.
USAGE:
$ python split.py large-file.txt 100
writing large-file-part1.txt
writing large-file-part2.txt
...
"""
import sys
def group(values, n):
return [values[i:i+n] for i in range(0, len(values), n)]
def splitfile(filename, chunk_size):
lines = open(filename).readlines()
return group(lines, chunk_size)
def write_lines(filename, lines):
"""Write a list of lines to the a file.
"""
print("writing", filename)
with open(filename, "w") as f:
f.writelines(lines)
def generate_part_filename(filename, index):
"""Generates a new filename by adding index as suffix to the filename.
>>> generate_part_filename("a.txt", 1)
"a-part1.txt"
"""
nameparts = filename.split(".", 1)
if len(nameparts) == 2:
name, ext = nameparts
ext = "." + ext
else:
name = filename
ext = ""
return f"{name}-part{index}{ext}"
def write_small_files(filename, file_chunks):
for i, chunk in enumerate(file_chunks, start=1):
new_filename = generate_part_filename(filename, i)
write_lines(new_filename, chunk)
def main():
filename = sys.argv[1]
chunk_size = int(sys.argv[2])
file_chunks = splitfile(filename, chunk_size)
write_small_files(filename, file_chunks)
if __name__ == "__main__":
main()