Loading muffin/budget_plan/BudgetPlanLoader.py +21 −1 Original line number Diff line number Diff line Loading @@ -3,6 +3,7 @@ import subprocess from pathlib import Path from collections.abc import Sequence, Iterable from typing import Optional from io import StringIO from ..amount.AmountParser import parse_euro_amount from ..cash_flow.CashFlowDirection import CashFlowDirection Loading @@ -25,6 +26,7 @@ def _is_combined_row(row: Sequence[str]) -> bool: def _parse_combined_row(row: Sequence[str]) -> ...: print(row) lines = row[0].split("\n") return (lines[0][0], lines[0][1:]) # TODO Loading Loading @@ -100,4 +102,22 @@ def load_pdf(pdf_path: Path | str) -> BudgetPlan: capture_output=True, encoding="utf-8", ) return _load_csv_lines(result.stdout.split("\n")) # Okay, so this is a bit ugly. The output CSV (result.stdout) obviously contains newlines to separate the rows. However, # it might also contain newlines within a cell (properly escaped using quotation marks). This is to spec, and any sufficiently # advanced CSV parser should be able to deal with this (Python's csv can deal with this). # # But: There are only two ways to pass data to the csv.reader class. Either you supply a file-like object, in which case # Python will take care of correctly handling newlines; or an iterable of strings (each string corresponds to a line), in which case # you have to pre-parse the CSV yourself to find out which newlines separate lines, and which are within one cell. # # We therefore simulate a file-like object by wrapping result.stdout in a StringIO instance, from which csv.reader can read like # a file, even though it is not. # # This idea is inspired by the following two answers on Stack Overflow: # # - Answer to “Parse a single CSV string?”, https://stackoverflow.com/a/35822843/ # by alecxe (https://stackoverflow.com/users/771848/alecxe), https://creativecommons.org/licenses/by-sa/3.0/ # - Answer to “CSVs in Python with newline in quotes”, https://stackoverflow.com/a/18724978/ # by Claudiu (https://stackoverflow.com/users/15055/claudiu), https://creativecommons.org/licenses/by-sa/3.0/ return _load_csv_lines(StringIO(result.stdout)) Loading
muffin/budget_plan/BudgetPlanLoader.py +21 −1 Original line number Diff line number Diff line Loading @@ -3,6 +3,7 @@ import subprocess from pathlib import Path from collections.abc import Sequence, Iterable from typing import Optional from io import StringIO from ..amount.AmountParser import parse_euro_amount from ..cash_flow.CashFlowDirection import CashFlowDirection Loading @@ -25,6 +26,7 @@ def _is_combined_row(row: Sequence[str]) -> bool: def _parse_combined_row(row: Sequence[str]) -> ...: print(row) lines = row[0].split("\n") return (lines[0][0], lines[0][1:]) # TODO Loading Loading @@ -100,4 +102,22 @@ def load_pdf(pdf_path: Path | str) -> BudgetPlan: capture_output=True, encoding="utf-8", ) return _load_csv_lines(result.stdout.split("\n")) # Okay, so this is a bit ugly. The output CSV (result.stdout) obviously contains newlines to separate the rows. However, # it might also contain newlines within a cell (properly escaped using quotation marks). This is to spec, and any sufficiently # advanced CSV parser should be able to deal with this (Python's csv can deal with this). # # But: There are only two ways to pass data to the csv.reader class. Either you supply a file-like object, in which case # Python will take care of correctly handling newlines; or an iterable of strings (each string corresponds to a line), in which case # you have to pre-parse the CSV yourself to find out which newlines separate lines, and which are within one cell. # # We therefore simulate a file-like object by wrapping result.stdout in a StringIO instance, from which csv.reader can read like # a file, even though it is not. # # This idea is inspired by the following two answers on Stack Overflow: # # - Answer to “Parse a single CSV string?”, https://stackoverflow.com/a/35822843/ # by alecxe (https://stackoverflow.com/users/771848/alecxe), https://creativecommons.org/licenses/by-sa/3.0/ # - Answer to “CSVs in Python with newline in quotes”, https://stackoverflow.com/a/18724978/ # by Claudiu (https://stackoverflow.com/users/15055/claudiu), https://creativecommons.org/licenses/by-sa/3.0/ return _load_csv_lines(StringIO(result.stdout))