Loading pyproject.toml +1 −1 Original line number Diff line number Diff line [project] name = "coliverter" version = "0.4.1" version = "0.4.2" authors = [ { name="Jakob Moser", email="moser@cl.uni-heidelberg.de" }, ] Loading src/coliverter/convert.py +18 −6 Original line number Diff line number Diff line Loading @@ -5,6 +5,7 @@ from typing import Annotated from multiprocessing import Pool from coliverter.documents.Document import Document from coliverter.documents.DocumentType import DocumentType from coliverter.files.FileFormat import FileFormat from coliverter.cli.typer import typer from coliverter.files.read import read Loading Loading @@ -34,10 +35,13 @@ def _get_output_format( def _convert_single( markdown_path: Path | None, output_path: Path | None, output_format: FileFormat markdown_path: Path | None, output_path: Path | None, output_format: FileFormat, document_type: DocumentType | None, ) -> None: markdown = read(markdown_path) document = Document.from_markdown(markdown) document = Document.from_markdown(markdown, document_type) match output_format: case FileFormat.HTML: Loading @@ -51,6 +55,7 @@ def _convert_single_guessing_output_path( markdown_dir_path: Path, output_dir_path: Path, output_format: FileFormat, document_type: DocumentType | None, ) -> None: guessed_out_file_path = output_dir_path / ( markdown_file_path.relative_to(markdown_dir_path).with_suffix( Loading @@ -61,9 +66,7 @@ def _convert_single_guessing_output_path( guessed_out_file_path.parent.mkdir(parents=True, exist_ok=True) _convert_single( markdown_file_path, guessed_out_file_path, output_format, markdown_file_path, guessed_out_file_path, output_format, document_type ) Loading @@ -87,6 +90,12 @@ def convert( help="Format of the output file. If None, infer from extension in output path. If inferring is not possible, default to html." ), ] = None, document_type: Annotated[ DocumentType | None, Option( help="Type of the document. If None, guess it. Can be used to influence how content is transformed during conversion." ), ] = None, ) -> None: """ Convert Markdown files into other formats, using the organization identity of Fachschaft Computerlinguistik. Loading @@ -109,7 +118,10 @@ def convert( repeat(markdown_path), repeat(output_path), repeat(definitely_output_format), repeat(document_type), ), ) else: _convert_single(markdown_path, output_path, definitely_output_format) _convert_single( markdown_path, output_path, definitely_output_format, document_type ) src/coliverter/documents/Document.py +3 −2 Original line number Diff line number Diff line Loading @@ -41,13 +41,14 @@ class Document: format=f"commonmark_x+{'+'.join(ACTIVE_MARKDOWN_EXTENSIONS)}", options=INPUT_OPTIONS, ) document_type = maybe_type or DocumentType.guess(content) # Apply the transformations we want to do transformations = (insert_table_of_contents,) for transform in transformations: content = transform(content) content = transform(content, document_type) return cls(content=content, type=maybe_type or DocumentType.guess(content)) return cls(content=content, type=document_type) @property def pdf(self) -> bytes: Loading src/coliverter/documents/transform/insert_table_of_contents.py +8 −5 Original line number Diff line number Diff line Loading @@ -9,9 +9,7 @@ from coliverter.tree.Node import Node TOC_PLACEHOLDER = Para([Str("[["), Emph([Str("TOC")]), Str("]]")]) def _get_table_of_contents(document: Pandoc) -> BulletList: document_type = DocumentType.guess(document) def _get_table_of_contents(document: Pandoc, document_type: DocumentType) -> BulletList: headers = tuple( element for element in pandoc.iter(document) if isinstance(element, Header) ) Loading @@ -32,10 +30,15 @@ def _get_table_of_contents(document: Pandoc) -> BulletList: return make_bullet_list(headers_tree) def insert_table_of_contents(content: Pandoc) -> Pandoc: def insert_table_of_contents(content: Pandoc, document_type: DocumentType) -> Pandoc: """ Replace [[_TOC_]] with a table of contents whenever it appears in the content. This works in-place, i.e. `content` is modified. We nevertheless return it for convenience. :param content: The document contents :param document_type: The type of the document (necessary as it might require a special table of contents format) """ return replace(content, TOC_PLACEHOLDER, _get_table_of_contents(content)) return replace( content, TOC_PLACEHOLDER, _get_table_of_contents(content, document_type) ) Loading
pyproject.toml +1 −1 Original line number Diff line number Diff line [project] name = "coliverter" version = "0.4.1" version = "0.4.2" authors = [ { name="Jakob Moser", email="moser@cl.uni-heidelberg.de" }, ] Loading
src/coliverter/convert.py +18 −6 Original line number Diff line number Diff line Loading @@ -5,6 +5,7 @@ from typing import Annotated from multiprocessing import Pool from coliverter.documents.Document import Document from coliverter.documents.DocumentType import DocumentType from coliverter.files.FileFormat import FileFormat from coliverter.cli.typer import typer from coliverter.files.read import read Loading Loading @@ -34,10 +35,13 @@ def _get_output_format( def _convert_single( markdown_path: Path | None, output_path: Path | None, output_format: FileFormat markdown_path: Path | None, output_path: Path | None, output_format: FileFormat, document_type: DocumentType | None, ) -> None: markdown = read(markdown_path) document = Document.from_markdown(markdown) document = Document.from_markdown(markdown, document_type) match output_format: case FileFormat.HTML: Loading @@ -51,6 +55,7 @@ def _convert_single_guessing_output_path( markdown_dir_path: Path, output_dir_path: Path, output_format: FileFormat, document_type: DocumentType | None, ) -> None: guessed_out_file_path = output_dir_path / ( markdown_file_path.relative_to(markdown_dir_path).with_suffix( Loading @@ -61,9 +66,7 @@ def _convert_single_guessing_output_path( guessed_out_file_path.parent.mkdir(parents=True, exist_ok=True) _convert_single( markdown_file_path, guessed_out_file_path, output_format, markdown_file_path, guessed_out_file_path, output_format, document_type ) Loading @@ -87,6 +90,12 @@ def convert( help="Format of the output file. If None, infer from extension in output path. If inferring is not possible, default to html." ), ] = None, document_type: Annotated[ DocumentType | None, Option( help="Type of the document. If None, guess it. Can be used to influence how content is transformed during conversion." ), ] = None, ) -> None: """ Convert Markdown files into other formats, using the organization identity of Fachschaft Computerlinguistik. Loading @@ -109,7 +118,10 @@ def convert( repeat(markdown_path), repeat(output_path), repeat(definitely_output_format), repeat(document_type), ), ) else: _convert_single(markdown_path, output_path, definitely_output_format) _convert_single( markdown_path, output_path, definitely_output_format, document_type )
src/coliverter/documents/Document.py +3 −2 Original line number Diff line number Diff line Loading @@ -41,13 +41,14 @@ class Document: format=f"commonmark_x+{'+'.join(ACTIVE_MARKDOWN_EXTENSIONS)}", options=INPUT_OPTIONS, ) document_type = maybe_type or DocumentType.guess(content) # Apply the transformations we want to do transformations = (insert_table_of_contents,) for transform in transformations: content = transform(content) content = transform(content, document_type) return cls(content=content, type=maybe_type or DocumentType.guess(content)) return cls(content=content, type=document_type) @property def pdf(self) -> bytes: Loading
src/coliverter/documents/transform/insert_table_of_contents.py +8 −5 Original line number Diff line number Diff line Loading @@ -9,9 +9,7 @@ from coliverter.tree.Node import Node TOC_PLACEHOLDER = Para([Str("[["), Emph([Str("TOC")]), Str("]]")]) def _get_table_of_contents(document: Pandoc) -> BulletList: document_type = DocumentType.guess(document) def _get_table_of_contents(document: Pandoc, document_type: DocumentType) -> BulletList: headers = tuple( element for element in pandoc.iter(document) if isinstance(element, Header) ) Loading @@ -32,10 +30,15 @@ def _get_table_of_contents(document: Pandoc) -> BulletList: return make_bullet_list(headers_tree) def insert_table_of_contents(content: Pandoc) -> Pandoc: def insert_table_of_contents(content: Pandoc, document_type: DocumentType) -> Pandoc: """ Replace [[_TOC_]] with a table of contents whenever it appears in the content. This works in-place, i.e. `content` is modified. We nevertheless return it for convenience. :param content: The document contents :param document_type: The type of the document (necessary as it might require a special table of contents format) """ return replace(content, TOC_PLACEHOLDER, _get_table_of_contents(content)) return replace( content, TOC_PLACEHOLDER, _get_table_of_contents(content, document_type) )