Loading pyproject.toml +1 −1 Original line number Diff line number Diff line [project] name = "coliverter" version = "0.4.3" version = "0.5.0" authors = [ { name="Jakob Moser", email="moser@cl.uni-heidelberg.de" }, ] Loading src/coliverter/documents/transform/remove_irrelevant_sections.py +37 −3 Original line number Diff line number Diff line from pandoc.types import Pandoc from pandoc.types import Pandoc, Header, Str, Block, Space from coliverter.documents.DocumentType import DocumentType def _starts_with(text: list[Block], prefix: Block | list[Block]) -> bool: prefix_list = prefix if isinstance(prefix, list) else [prefix] return text[: len(prefix)] == prefix_list def _keep_only_resolutions(content: Pandoc) -> Pandoc: is_document_start = True found_resolution = False meta, text = content to_keep = [] for element in text: match element: case Header(level, _, text) if level <= 2 and _starts_with( text, Str("Protokoll") ): text[0] = Str("Beschlussprotokoll") case Header(level, _, text) if level == 3 and _starts_with( text, [Str("1)"), Space(), Str("Tagesordnung")] ): # Heading introduces the agenda, so we start deleting now (including this heading) is_document_start = False case Header(_, _, text) if _starts_with(text, Str("Finanzbeschluss:")): found_resolution = True case Header(): # Any other header means this new section is not a resolution found_resolution = False if is_document_start or found_resolution: to_keep.append(element) return Pandoc(meta, to_keep) def remove_irrelevant_sections(content: Pandoc, document_type: DocumentType) -> Pandoc: if document_type == DocumentType.MEETING_RESOLUTIONS: # TODO Remove everything that is not a resolution return content return _keep_only_resolutions(content) return content uv.lock +1 −1 Original line number Diff line number Diff line Loading @@ -104,7 +104,7 @@ wheels = [ [[package]] name = "coliverter" version = "0.4.1" version = "0.4.2" source = { editable = "." } dependencies = [ { name = "pandoc" }, Loading Loading
pyproject.toml +1 −1 Original line number Diff line number Diff line [project] name = "coliverter" version = "0.4.3" version = "0.5.0" authors = [ { name="Jakob Moser", email="moser@cl.uni-heidelberg.de" }, ] Loading
src/coliverter/documents/transform/remove_irrelevant_sections.py +37 −3 Original line number Diff line number Diff line from pandoc.types import Pandoc from pandoc.types import Pandoc, Header, Str, Block, Space from coliverter.documents.DocumentType import DocumentType def _starts_with(text: list[Block], prefix: Block | list[Block]) -> bool: prefix_list = prefix if isinstance(prefix, list) else [prefix] return text[: len(prefix)] == prefix_list def _keep_only_resolutions(content: Pandoc) -> Pandoc: is_document_start = True found_resolution = False meta, text = content to_keep = [] for element in text: match element: case Header(level, _, text) if level <= 2 and _starts_with( text, Str("Protokoll") ): text[0] = Str("Beschlussprotokoll") case Header(level, _, text) if level == 3 and _starts_with( text, [Str("1)"), Space(), Str("Tagesordnung")] ): # Heading introduces the agenda, so we start deleting now (including this heading) is_document_start = False case Header(_, _, text) if _starts_with(text, Str("Finanzbeschluss:")): found_resolution = True case Header(): # Any other header means this new section is not a resolution found_resolution = False if is_document_start or found_resolution: to_keep.append(element) return Pandoc(meta, to_keep) def remove_irrelevant_sections(content: Pandoc, document_type: DocumentType) -> Pandoc: if document_type == DocumentType.MEETING_RESOLUTIONS: # TODO Remove everything that is not a resolution return content return _keep_only_resolutions(content) return content
uv.lock +1 −1 Original line number Diff line number Diff line Loading @@ -104,7 +104,7 @@ wheels = [ [[package]] name = "coliverter" version = "0.4.1" version = "0.4.2" source = { editable = "." } dependencies = [ { name = "pandoc" }, Loading