Verified Commit ca56c77f authored by Jakob Moser's avatar Jakob Moser
Browse files

Prepare coliverter for handling other file types

parent bc7b6a13
Loading
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
# Coliverter

Convert Markdown files into other formats, using the organization identity of Fachschaft Computerlinguistik.
Convert files into other formats, using the organization identity of Fachschaft Computerlinguistik.

This app uses the universal document converter **[Pandoc](https://pandoc.org/)** and the PDF converter **[WeasyPrint](https://weasyprint.org/)**. Both are excellent pieces of software and to almost all heavy lifting in this app. If you ever need to do some document conversion yourself, you will likely not need the Coliverter, and instead just use the dependencies directly.

## Why Coliverter?

- **If your Markdown files contain Emoji or other special Unicode**: Pandocs Markdown to PDF converter uses LaTeX for elegant results, but sadly this also makes it quite bad at processing Eomi.
- **If your Markdown files contain Emoji or other special Unicode**: Pandoc's Markdown-to-PDF converter uses LaTeX for elegant results, but sadly this also makes it quite bad at processing Eomi.
- **If you want to use the Fachschaft Computerlinguistik styles**: Those are integrated in the Coliverter, so that every file converted with it also looks like it was produced by the Fachschaft.

## Install
+18 −17
Original line number Diff line number Diff line
@@ -36,13 +36,14 @@ def _get_output_format(


def _convert_single(
    markdown_path: Path | None,
    input_path: Path | None,
    output_path: Path | None,
    output_format: FileFormat,
    document_type: DocumentType | None,
) -> None:
    markdown = read(markdown_path)
    document = Document.from_markdown(markdown, document_type)
    data = read(input_path)
    # TODO Handle other types than markdown
    document = Document.from_markdown(data, document_type)

    match output_format:
        case FileFormat.HTML:
@@ -52,17 +53,17 @@ def _convert_single(


def _convert_single_guessing_output_path(
    markdown_file_path: Path,
    markdown_dir_path: Path,
    input_file_path: Path,
    input_dir_path: Path,
    output_dir_path: Path,
    output_format: FileFormat,
    document_type: DocumentType | None,
) -> None:
    new_name = markdown_file_path.stem + (
    new_name = input_file_path.stem + (
        "_nur_Beschlüsse" if document_type == DocumentType.MEETING_RESOLUTIONS else ""
    )
    relative_output_file_path = (
        markdown_file_path.relative_to(markdown_dir_path)
        input_file_path.relative_to(input_dir_path)
        .with_name(new_name)
        .with_suffix(f".{output_format}")
    )
@@ -72,7 +73,7 @@ def _convert_single_guessing_output_path(

    try:
        _convert_single(
            markdown_file_path, output_file_path, output_format, document_type
            input_file_path, output_file_path, output_format, document_type
        )
    except TransformError:
        # TODO This is spectacularly ugly: As I know this function is called during batch processing, and I decided I
@@ -83,16 +84,16 @@ def _convert_single_guessing_output_path(

@typer.command()
def convert(
    markdown_path: Annotated[
    input_path: Annotated[
        Path | None,
        Argument(
            help="Path to the input Markdown file. If None, read Markdown from STDIN. If a directory, recursively find all *.md files and process those."
            help="Path to the input file. If None, read input from STDIN. If a directory, recursively find all processable files and process those."
        ),
    ] = None,
    output_path: Annotated[
        Path | None,
        Argument(
            help="Path to the output file. If None, write output to STDOUT. If markdown_path is a directory, this must be a directory as well."
            help="Path to the output file. If None, write output to STDOUT. If input_path is a directory, this must be a directory as well."
        ),
    ] = None,
    output_format: Annotated[
@@ -109,24 +110,24 @@ def convert(
    ] = None,
) -> None:
    """
    Convert Markdown files into other formats, using the organization identity of Fachschaft Computerlinguistik.
    Convert files into other formats, using the organization identity of Fachschaft Computerlinguistik.
    """

    # output_format might be none, but definitely_output_format definitely isn't.
    definitely_output_format = _get_output_format(output_path, output_format)

    if markdown_path and markdown_path.is_dir():
    if input_path and input_path.is_dir():
        if not output_path or not output_path.is_dir():
            raise ValueError(
                "output_path must be a directory, given that markdown_path is one."
                "output_path must be a directory, given that input_path is one."
            )

        with Pool() as p:
            p.starmap(
                _convert_single_guessing_output_path,
                zip(
                    markdown_path.rglob("*.md"),
                    repeat(markdown_path),
                    input_path.rglob("*.md"),  # TODO Also support other file types
                    repeat(input_path),
                    repeat(output_path),
                    repeat(definitely_output_format),
                    repeat(document_type),
@@ -134,5 +135,5 @@ def convert(
            )
    else:
        _convert_single(
            markdown_path, output_path, definitely_output_format, document_type
            input_path, output_path, definitely_output_format, document_type
        )