#!/usr/bin/env python3 import os import config from pathlib import Path pdf_files:list[str] = [] file_counter:int = 0 def find_pdf_files() -> None: # Recursively find PDF files in config.SOURCE_FOLDER source_path = Path(config.SOURCE_FOLDER) for path in source_path.rglob("*.pdf"): pdf_files.append(str(path)) def create_temp_folder() -> None: Path(config.TEMP_FOLDER).mkdir(parents=True, exist_ok=True) def create_output_folder() -> None: Path(config.OUTPUT_FOLDER).mkdir(parents=True, exist_ok=True) def extract_images() -> bool: if len(pdf_files) == 0: return False next_file:str = pdf_files.pop(0) command:str = f"pdfimages -all {next_file} {config.TEMP_FOLDER}" print(command) return_code:int = os.system(command) return return_code == 0 def move_images() -> None: global file_counter for file in os.listdir(config.TEMP_FOLDER): print(f" {file_counter:010d}.jpg") os.rename(os.path.join(config.TEMP_FOLDER, file), os.path.join(config.OUTPUT_FOLDER, f"{file_counter:010d}.jpg")) file_counter += 1 if __name__ == "__main__": print(f"Source folder: {config.SOURCE_FOLDER}") print(f"Temp folder: {config.TEMP_FOLDER}") print(f"Output folder: {config.OUTPUT_FOLDER}") find_pdf_files() print(f"Found {len(pdf_files)} PDF files.") create_temp_folder() create_output_folder() while extract_images(): move_images()