Verified Commit 7fd06006 authored by Jakob Moser's avatar Jakob Moser
Browse files

Allow to batch-process entire directories

parent bb20c827
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
[project]
name = "coliverter"
version = "0.3.4"
version = "0.4.0"
authors = [
  { name="Jakob Moser", email="moser@cl.uni-heidelberg.de" },
]
+76 −20
Original line number Diff line number Diff line
from itertools import repeat
from pathlib import Path
from typing import Annotated

from multiprocessing import Pool
import typer

from coliverter.FileFormat import FileFormat
@@ -11,18 +13,71 @@ from coliverter.steps.html_to_pdf import html_to_pdf
from coliverter.steps.md_to_html import md_to_html


def _get_output_format(
    output_path: Path | None, output_format: FileFormat | None
) -> FileFormat:
    """
    Given a file format that might or might not be set, and an output path that might or might not be set,
    get the output format we want to use (in the worst case, by using some default).

    You can be sure that whatever you put in here, you will get a file format out.
    """
    if output_format:
        # Well, that was easy
        return output_format

    try:
        return FileFormat(output_path.suffix.removeprefix(".") if output_path else None)
    except ValueError:
        return FileFormat.HTML


def _convert_single(
    markdown_path: Path | None, output_path: Path | None, output_format: FileFormat
) -> None:
    markdown = read(markdown_path)
    html = md_to_html(markdown)

    if output_format == FileFormat.HTML:
        write(html, output_path)
        return

    pdf = html_to_pdf(html)
    if output_format == FileFormat.PDF:
        write(pdf, output_path)
        return


def _convert_single_guessing_output_path(
    markdown_file_path: Path,
    markdown_dir_path: Path,
    output_dir_path: Path,
    output_format: FileFormat,
) -> None:
    _convert_single(
        markdown_file_path,
        output_dir_path
        / (
            markdown_file_path.relative_to(markdown_dir_path).with_suffix(
                f".{output_format}"
            )
        ),
        output_format,
    )


@app.command()
def convert(
    markdown_path: Annotated[
        Path | None,
        typer.Argument(
            help="Path to the input Markdown file. If None, read Markdown from STDIN."
            help="Path to the input Markdown file. If None, read Markdown from STDIN. If a directory, recursively find all *.md files and process those."
        ),
    ] = None,
    output_path: Annotated[
        Path | None,
        typer.Argument(
            help="Path to the output file. If None, write output to STDOUT."
            help="Path to the output file. If None, write output to STDOUT. If markdown_path is a directory, this must be a directory as well."
        ),
    ] = None,
    output_format: Annotated[
@@ -35,24 +90,25 @@ def convert(
    """
    Convert Markdown files into other formats, using the organization identity of Fachschaft Computerlinguistik.
    """
    if output_format is None:
        try:
            output_format = FileFormat(
                output_path.suffix.removeprefix(".") if output_path else None
            )
        except ValueError:
            output_format = FileFormat.HTML

    markdown = read(markdown_path)

    html = md_to_html(markdown)

    if output_format == FileFormat.HTML:
        write(html, output_path)
        return
    # output_format might be none, but definitely_output_format definitely isn't.
    definitely_output_format = _get_output_format(output_path, output_format)

    pdf = html_to_pdf(html)
    if markdown_path and markdown_path.is_dir():
        if not output_path or not output_path.is_dir():
            raise ValueError(
                "output_path must be a directory, given that markdown_path is one."
            )

    if output_format == FileFormat.PDF:
        write(pdf, output_path)
        return
        with Pool() as p:
            p.starmap(
                _convert_single_guessing_output_path,
                zip(
                    markdown_path.rglob("*.md"),
                    repeat(markdown_path),
                    repeat(output_path),
                    repeat(definitely_output_format),
                ),
            )
    else:
        _convert_single(markdown_path, output_path, definitely_output_format)