Skip to content
Snippets Groups Projects
Commit f464eec0 authored by Simon Will's avatar Simon Will
Browse files

Mark in dev what function provided the syllables

parent 6f84f3e0
No related branches found
No related tags found
No related merge requests found
......@@ -7,7 +7,8 @@ import os
import re
from typing import Dict, List, Set
from .style import mark_long, mark_wrong_length, mark_wrong_syllables
from .style import (mark_long, mark_wrong_length, mark_wrong_syllables,
mark_syllables_provider)
def check_format(json_file, check_for=dict):
......@@ -289,7 +290,8 @@ class Token:
def __init__(self, token: str, span: List[int],
syllables: List[Syllable] = None, clitic: str = None,
accented: str = None,
lemma_to_morphtags: Dict[str, Set[str]] = None):
lemma_to_morphtags: Dict[str, Set[str]] = None,
syllables_provider=None):
if len(token) != span[1]-span[0]:
raise ValueError('Length of token {} does not match span {}.'
.format(token, span))
......@@ -302,6 +304,8 @@ class Token:
self.accented = accented
self.lemma_to_morphtags = lemma_to_morphtags
self.syllables_provider = syllables_provider
@classmethod
def from_json(cls, json_file):
raw = check_format(json_file)
......@@ -426,6 +430,7 @@ class Reading:
def format_differences(self, reference, mark_long=mark_long,
mark_wrong_length=mark_wrong_length,
mark_wrong_syllables=mark_wrong_syllables,
mark_syllables_provider=mark_syllables_provider,
syllable_joiner='-', token_joiner=' '):
formatted_tokens = []
for token, ref_token in zip(self.tokens, reference.tokens):
......@@ -445,7 +450,12 @@ class Reading:
else:
formatted_syllables = [mark_wrong_syllables(syll)
for syll in sylls]
formatted_tokens.append(syllable_joiner.join(formatted_syllables))
if not token.is_punct():
formatted_token = mark_syllables_provider(
syllable_joiner.join(formatted_syllables),
token.syllables_provider
)
formatted_tokens.append(formatted_token)
formatted = token_joiner.join(formatted_tokens)
return formatted
......
......@@ -368,9 +368,11 @@ def get_syllables_for_token(token: Token):
syllables = []
if token.accented:
syllables = get_syllables_for_accented_form(token)
token.syllables_provider = 'get_syllables_for_accented_form'
else:
if not token.is_punct():
syllables = get_syllables_for_unknown_form(token)
token.syllables_provider = 'get_syllables_for_unknown_form'
return syllables
......
......@@ -23,3 +23,12 @@ def mark_wrong_syllables(text):
def mark_correct(text):
return ('{Fore.GREEN}{text}{Fore.RESET}'
.format(Fore=Fore, text=text))
def mark_syllables_provider(text, provider):
if provider == 'get_syllables_for_accented_form':
return text
elif provider == 'get_syllables_for_unknown_form':
return '{}{}'.format(text, '')
else:
return '{}{}'.format(text, '')
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment