Skip to content
Snippets Groups Projects
Commit 30b9355b authored by Simon Will's avatar Simon Will
Browse files

Unidecode syllables and tokens in corpus.py

parent 2cceee07
No related branches found
No related tags found
No related merge requests found
......@@ -5,6 +5,7 @@ import re
import os.path
from bs4 import BeautifulSoup
from unidecode import unidecode
from .model import Reading, Syllable, Token, Verse
......@@ -29,7 +30,7 @@ def get_reading_from_line_element(element):
syllables = []
token_text = token_tag.text
token = Token(
token=token_text,
token=unidecode(token_text),
span=[span_begin, span_begin + len(token_text)]
)
......@@ -48,7 +49,7 @@ def get_reading_from_line_element(element):
)
syllable = Syllable(
idx=idx,
syllable=syllable_text,
syllable=unidecode(syllable_text),
span=[span_begin, span_begin + len(syllable_text)],
syllable_length=syllable_length,
vowel_length=None
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment