diff --git a/allzweckmesser/model.py b/allzweckmesser/model.py index 8e9bd7869f93873bc57886952725a7e67755e254..fe33c664fa95aee78a6cc084fea482ad834723b8 100644 --- a/allzweckmesser/model.py +++ b/allzweckmesser/model.py @@ -7,7 +7,8 @@ import os import re from typing import Dict, List, Set -from .style import mark_long, mark_wrong_length, mark_wrong_syllables +from .style import (mark_long, mark_wrong_length, mark_wrong_syllables, + mark_syllables_provider) def check_format(json_file, check_for=dict): @@ -289,7 +290,8 @@ class Token: def __init__(self, token: str, span: List[int], syllables: List[Syllable] = None, clitic: str = None, accented: str = None, - lemma_to_morphtags: Dict[str, Set[str]] = None): + lemma_to_morphtags: Dict[str, Set[str]] = None, + syllables_provider=None): if len(token) != span[1]-span[0]: raise ValueError('Length of token {} does not match span {}.' .format(token, span)) @@ -302,6 +304,8 @@ class Token: self.accented = accented self.lemma_to_morphtags = lemma_to_morphtags + self.syllables_provider = syllables_provider + @classmethod def from_json(cls, json_file): raw = check_format(json_file) @@ -426,6 +430,7 @@ class Reading: def format_differences(self, reference, mark_long=mark_long, mark_wrong_length=mark_wrong_length, mark_wrong_syllables=mark_wrong_syllables, + mark_syllables_provider=mark_syllables_provider, syllable_joiner='-', token_joiner=' '): formatted_tokens = [] for token, ref_token in zip(self.tokens, reference.tokens): @@ -445,7 +450,12 @@ class Reading: else: formatted_syllables = [mark_wrong_syllables(syll) for syll in sylls] - formatted_tokens.append(syllable_joiner.join(formatted_syllables)) + if not token.is_punct(): + formatted_token = mark_syllables_provider( + syllable_joiner.join(formatted_syllables), + token.syllables_provider + ) + formatted_tokens.append(formatted_token) formatted = token_joiner.join(formatted_tokens) return formatted diff --git a/allzweckmesser/scanner.py b/allzweckmesser/scanner.py index fb6b538f2d1b904267c93860b2f2b0c712a8d502..b500264996574f93b9c51d99eba0aab68bcc5705 100644 --- a/allzweckmesser/scanner.py +++ b/allzweckmesser/scanner.py @@ -368,9 +368,11 @@ def get_syllables_for_token(token: Token): syllables = [] if token.accented: syllables = get_syllables_for_accented_form(token) + token.syllables_provider = 'get_syllables_for_accented_form' else: if not token.is_punct(): syllables = get_syllables_for_unknown_form(token) + token.syllables_provider = 'get_syllables_for_unknown_form' return syllables diff --git a/allzweckmesser/style.py b/allzweckmesser/style.py index 7f37c3c4774949c52384652efc916ef13b69288c..5da70be728604ccc104f0acbf08282d059d39beb 100644 --- a/allzweckmesser/style.py +++ b/allzweckmesser/style.py @@ -23,3 +23,12 @@ def mark_wrong_syllables(text): def mark_correct(text): return ('{Fore.GREEN}{text}{Fore.RESET}' .format(Fore=Fore, text=text)) + + +def mark_syllables_provider(text, provider): + if provider == 'get_syllables_for_accented_form': + return text + elif provider == 'get_syllables_for_unknown_form': + return '{}{}'.format(text, 'â‚') + else: + return '{}{}'.format(text, 'â‚‘')