Verified Commit 0cd18037 authored by Jakob Moser's avatar Jakob Moser
Browse files

Fix parsing error with newlines

parent 55b51e70
Loading
Loading
Loading
Loading
+21 −1
Original line number Diff line number Diff line
@@ -3,6 +3,7 @@ import subprocess
from pathlib import Path
from collections.abc import Sequence, Iterable
from typing import Optional
from io import StringIO

from ..amount.AmountParser import parse_euro_amount
from ..cash_flow.CashFlowDirection import CashFlowDirection
@@ -25,6 +26,7 @@ def _is_combined_row(row: Sequence[str]) -> bool:


def _parse_combined_row(row: Sequence[str]) -> ...:
    print(row)
    lines = row[0].split("\n")

    return (lines[0][0], lines[0][1:])  # TODO
@@ -100,4 +102,22 @@ def load_pdf(pdf_path: Path | str) -> BudgetPlan:
        capture_output=True,
        encoding="utf-8",
    )
    return _load_csv_lines(result.stdout.split("\n"))

    # Okay, so this is a bit ugly. The output CSV (result.stdout) obviously contains newlines to separate the rows. However,
    # it might also contain newlines within a cell (properly escaped using quotation marks). This is to spec, and any sufficiently
    # advanced CSV parser should be able to deal with this (Python's csv can deal with this).
    #
    # But: There are only two ways to pass data to the csv.reader class. Either you supply a file-like object, in which case
    # Python will take care of correctly handling newlines; or an iterable of strings (each string corresponds to a line), in which case
    # you have to pre-parse the CSV yourself to find out which newlines separate lines, and which are within one cell.
    # 
    # We therefore simulate a file-like object by wrapping result.stdout in a StringIO instance, from which csv.reader can read like
    # a file, even though it is not.
    #
    # This idea is inspired by the following two answers on Stack Overflow:
    #
    # - Answer to “Parse a single CSV string?”, https://stackoverflow.com/a/35822843/
    #   by alecxe (https://stackoverflow.com/users/771848/alecxe), https://creativecommons.org/licenses/by-sa/3.0/
    # - Answer to “CSVs in Python with newline in quotes”, https://stackoverflow.com/a/18724978/
    #   by Claudiu (https://stackoverflow.com/users/15055/claudiu), https://creativecommons.org/licenses/by-sa/3.0/
    return _load_csv_lines(StringIO(result.stdout))