Problem 5.3

Split a File

Write a program split.py that splits a large file into multiple smaller files. The program should take a filename and the number of lines as arguments and write multiple small files each containing the specified number of lines (The last one may have smaller number of lines).

$ python split.py files/100.txt 30
writing files/100-part1.txt
writing files/100-part2.txt
writing files/100-part3.txt
writing files/100-part4.txt

Solution

"""Program to split a file into smaller parts.

It takes a filename and the number of lines in each part as
command-line arguments and splits the file into smaller parts
with each file having no more than the specified number of lines.

USAGE:
    $ python split.py large-file.txt 100
    writing large-file-part1.txt
    writing large-file-part2.txt
    ...
"""
import sys

def group(values, n):
    return [values[i:i+n] for i in range(0, len(values), n)]

def splitfile(filename, chunk_size):
    lines = open(filename).readlines()
    return group(lines, chunk_size)

def write_lines(filename, lines):
    """Write a list of lines to the a file.
    """
    print("writing", filename)
    with open(filename, "w") as f:
        f.writelines(lines)

def generate_part_filename(filename, index):
    """Generates a new filename by adding index as suffix to the filename.

        >>> generate_part_filename("a.txt", 1)
        "a-part1.txt"
    """
    nameparts = filename.split(".", 1)
    if len(nameparts) == 2:
        name, ext = nameparts
        ext = "." + ext
    else:
        name = filename
        ext = ""

    return f"{name}-part{index}{ext}"

def write_small_files(filename, file_chunks):
    for i, chunk in enumerate(file_chunks, start=1):
        new_filename = generate_part_filename(filename, i)
        write_lines(new_filename, chunk)

def main():
    filename = sys.argv[1]
    chunk_size = int(sys.argv[2])
    file_chunks = splitfile(filename, chunk_size)
    write_small_files(filename, file_chunks)

if __name__ == "__main__":
    main()