Loading src/coliverter/documents/transform/TransformError.py 0 → 100644 +2 −0 Original line number Diff line number Diff line class TransformError(Exception): pass src/coliverter/documents/transform/remove_irrelevant_sections.py +21 −6 Original line number Diff line number Diff line Loading @@ -2,6 +2,7 @@ from pandoc.types import Pandoc, Header, Str, Block, Space from coliverter.documents.DocumentType import DocumentType from coliverter.documents.transform.TransformError import TransformError def _starts_with(text: list[Block], prefix: Block | list[Block]) -> bool: Loading @@ -9,9 +10,14 @@ def _starts_with(text: list[Block], prefix: Block | list[Block]) -> bool: return text[: len(prefix)] == prefix_list def _keep_only_resolutions(content: Pandoc) -> Pandoc: def _keep_only_resolutions(content: Pandoc, strict: bool = True) -> Pandoc | None: """ Create a variant of the content that only contains resolutions. If strict is True, this will return None if the content does not contain a resolution. """ is_document_start = True found_resolution = False is_resolution = False contains_resolution = False meta, text = content to_keep = [] Loading @@ -28,19 +34,28 @@ def _keep_only_resolutions(content: Pandoc) -> Pandoc: # Heading introduces the agenda, so we start deleting now (including this heading) is_document_start = False case Header(_, _, text) if _starts_with(text, Str("Finanzbeschluss:")): found_resolution = True is_resolution = True case Header(): # Any other header means this new section is not a resolution found_resolution = False is_resolution = False if is_document_start or found_resolution: if is_resolution: # If we find a resolution, we note that this document contains at least one resolution contains_resolution = True if is_document_start or is_resolution: to_keep.append(element) if strict and not contains_resolution: return None return Pandoc(meta, to_keep) def remove_irrelevant_sections(content: Pandoc, document_type: DocumentType) -> Pandoc: if document_type == DocumentType.MEETING_RESOLUTIONS: return _keep_only_resolutions(content) resolutions = _keep_only_resolutions(content) if not resolutions: raise TransformError("After removing irrelevant sections, none were left.") return content Loading
src/coliverter/documents/transform/TransformError.py 0 → 100644 +2 −0 Original line number Diff line number Diff line class TransformError(Exception): pass
src/coliverter/documents/transform/remove_irrelevant_sections.py +21 −6 Original line number Diff line number Diff line Loading @@ -2,6 +2,7 @@ from pandoc.types import Pandoc, Header, Str, Block, Space from coliverter.documents.DocumentType import DocumentType from coliverter.documents.transform.TransformError import TransformError def _starts_with(text: list[Block], prefix: Block | list[Block]) -> bool: Loading @@ -9,9 +10,14 @@ def _starts_with(text: list[Block], prefix: Block | list[Block]) -> bool: return text[: len(prefix)] == prefix_list def _keep_only_resolutions(content: Pandoc) -> Pandoc: def _keep_only_resolutions(content: Pandoc, strict: bool = True) -> Pandoc | None: """ Create a variant of the content that only contains resolutions. If strict is True, this will return None if the content does not contain a resolution. """ is_document_start = True found_resolution = False is_resolution = False contains_resolution = False meta, text = content to_keep = [] Loading @@ -28,19 +34,28 @@ def _keep_only_resolutions(content: Pandoc) -> Pandoc: # Heading introduces the agenda, so we start deleting now (including this heading) is_document_start = False case Header(_, _, text) if _starts_with(text, Str("Finanzbeschluss:")): found_resolution = True is_resolution = True case Header(): # Any other header means this new section is not a resolution found_resolution = False is_resolution = False if is_document_start or found_resolution: if is_resolution: # If we find a resolution, we note that this document contains at least one resolution contains_resolution = True if is_document_start or is_resolution: to_keep.append(element) if strict and not contains_resolution: return None return Pandoc(meta, to_keep) def remove_irrelevant_sections(content: Pandoc, document_type: DocumentType) -> Pandoc: if document_type == DocumentType.MEETING_RESOLUTIONS: return _keep_only_resolutions(content) resolutions = _keep_only_resolutions(content) if not resolutions: raise TransformError("After removing irrelevant sections, none were left.") return content