PDF Splitter
A simple Python utility to split long PDFs into markdown chapters.
Installation
No external dependencies required.
Usage
Run python main.py -h for help.
Example
Split a PDF into chapters based on headings: python main.py -i input.pdf -o output.md
import argparse
import re
from urllib.parse import urlparse
from pathlib import Path
from PyPDF2 import PdfReader
def pdf_to_markdown(input_file, output_file):
reader = PdfReader(input_file)
with open(output_file, 'w') as f:
for page in reader.pages:
text = page.extract_text()
headings = re.findall(r'^(?=#[^#])', text, re.MULTILINE)
if headings:
f.write('---
')
f.write(text)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Split PDF into markdown chapters')
parser.add_argument('-i', '--input', help='input PDF file', required=True)
parser.add_argument('-o', '--output', help='output markdown file', required=True)
args = parser.parse_args()
pdf_to_markdown(args.input, args.output)
Top comments (0)