Verified Commit a576b863 authored by Jakob Moser's avatar Jakob Moser
Browse files

Ignore first heading for meeting minutes

parent 3cd434f0
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
[project]
name = "coliverter"
version = "0.3.3"
version = "0.3.4"
authors = [
  { name="Jakob Moser", email="moser@cl.uni-heidelberg.de" },
]
+32 −0
Original line number Diff line number Diff line
from enum import auto, StrEnum
from typing import Self

import pandoc
from pandoc.types import Header, Pandoc, Str


class DocumentType(StrEnum):
    MEETING_MINUTES = auto()
    OTHER = auto()

    @classmethod
    def guess(cls, document: Pandoc) -> Self:
        headers = tuple(
            element for element in pandoc.iter(document) if isinstance(element, Header)
        )

        if not headers:
            # No headers means we can't really guess anything
            return cls.OTHER

        # Take first header
        level, attrs, content = headers[0]

        if not content:
            # No content in the header still leaves us quite at a loss
            return cls.OTHER

        if content[0] == Str("Protokoll"):
            return cls.MEETING_MINUTES

        return cls.OTHER
+9 −2
Original line number Diff line number Diff line
@@ -5,6 +5,7 @@ from pathlib import Path
import pandoc
from pandoc.types import Block, BulletList, Header, Pandoc, Plain, Para, Str, Emph

from coliverter.DocumentType import DocumentType
from coliverter.toc.Node import Node
from coliverter.toc.make_toc import make_toc

@@ -27,12 +28,18 @@ TOC_PLACEHOLDER = Para([Str("[["), Emph([Str("TOC")]), Str("]]")])


def _get_table_of_contents(document: Pandoc) -> BulletList:
    headers = (
    document_type = DocumentType.guess(document)

    headers = tuple(
        element for element in pandoc.iter(document) if isinstance(element, Header)
    )
    # For meeting minutes, we skip the first header in the table of contents. This is because the first header
    # is just "Meeting minutes of meeting X", and we want to use the table of contents as agenda in meeting minutes.
    # The agenda should not contain the meeting itself.
    offset = 1 if document_type == DocumentType.MEETING_MINUTES else 0

    table_of_contents = make_toc(
        (level, content) for (level, attrs, content) in headers
        (level, content) for (level, attrs, content) in headers[offset:]
    )

    def make_bullet_list(node: Node[list[Block]]) -> BulletList:
+1 −1
Original line number Diff line number Diff line
@@ -104,7 +104,7 @@ wheels = [

[[package]]
name = "coliverter"
version = "0.3.1"
version = "0.3.3"
source = { editable = "." }
dependencies = [
    { name = "pandoc" },