Verified Commit a5abeb3c authored by Jakob Moser's avatar Jakob Moser
Browse files

Implement loading from PDF using "tabula"

parent 59eb0a1d
Loading
Loading
Loading
Loading
+22 −0
Original line number Diff line number Diff line
import csv
import subprocess
from pathlib import Path
from typing import Sequence
from tempfile import NamedTemporaryFile


from .BudgetPlan import BudgetPlan
@@ -22,3 +24,23 @@ def load_csv(csv_path: Path | str) -> BudgetPlan:
        ]

    return BudgetPlan({item.id: item for item in items})


def load_pdf(pdf_path: Path | str) -> BudgetPlan:
    if isinstance(pdf_path, str):
        pdf_path = Path(pdf_path)

    with NamedTemporaryFile() as f:
        print(f.name)
        subprocess.run(
            [
                "tabula",
                pdf_path.absolute(),
                "--pages",
                "all",
                "--out",
                f.name,
                "--lattice",
            ]
        )
        return load_csv(f.name)