Newer
Older
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import re
from typing import Dict, List, Set
def check_format(json_file, check_for=dict):
if isinstance(json_file, check_for):
elif isinstance(json_file, str):
if os.path.exists(json_file):
with open(json_file, 'r') as jf:
return json.load(jf)
else:
return json.loads(json_file)
def from_json(json_file):
if hasattr(json_file, 'read'):
verses = json.loads(json_file.read())
elif isinstance(json_file, str) and os.path.exists(json_file):
verses = json.loads(open(json_file).read())
else:
TypeError('Input not convertible.')
return [Verse.from_json(verse) for verse in verses]
def minimal(full_dict:dict):
#print(full_dict)
result_dict = dict()
for key,value in full_dict.items():
if value == {}:
pass
elif isinstance(value, dict):
result_dict.update({key:minimal(value)})
elif value != None:
result_dict.update({key:value})
else:
pass
#print(result_dict)
return result_dict
def __init__(self, syllable: str, span: List[int], idx: int,
syllable_length: int, vowel_length: int,
if len(syllable) != span[1] - span[0]:
raise ValueError('Syllable length does not match syllable span.')
else:
self.text = syllable
self.span = span
self.id = idx
self.syllable_length = syllable_length
self.vowel_length = vowel_length
self.phenomena = phenomena or dict()
@classmethod
def from_json(cls, json_file):
span = raw['span']
text = raw['syllable']
syllable_length = raw['syllable_length']
vowel_length = raw['vowel_length']
syllable = cls(text, span, idx, syllable_length, vowel_length)
if 'phenomena' in raw:
syllable.phenomena = dict()
for phenomenon in raw['phenomena'].items():
syllable.phenomena[phenomenon[0]] = Phenomenon.from_json(phenomenon[1])
features.update({'id':self.id})
features.update({'span':self.span})
features.update({'syllable':self.text})
features.update({'syllable_length':self.syllable_length})
features.update({'vowel_length':self.vowel_length})
features.update({'phenomena': minimal({key:value.to_dict() for key,value in self.phenomena.items()}) })
return minimal(features)
return json.dumps(self.to_dict())
def __init__(self, caused_by=None, overruled_by=None,
self.caused_by = caused_by
self.overruled_by = overruled_by
self.chars = chars
self.typus = typus
self.omitted = omitted
#@classmethod
#def positional_lengthening(cls, chars: str, caused_by=None,
#overruled_by=None):
#phenomenon = cls('positional lengthening', caused_by, overruled_by)
#phenomenon.chars = chars
#@classmethod
#def iambic_shortening(cls, typus: str, caused_by=None, overruled_by=None):
#phenomenon = cls('iambic shortening', caused_by, overruled_by)
#phenomenon.typus = typus
#@classmethod
#def s_elision(cls, caused_by=None, overruled_by=None):
#phenomenon = cls('s-elision', caused_by, overruled_by)
#phenomenon.omitted = 's'
#@classmethod
#def verse_end(cls, caused_by=None, overruled_by=None):
#phenomenon = cls('verse end', caused_by, overruled_by)
@classmethod
def from_json(cls, json_file):
phenomenon.caused_by = raw['caused_by']
phenomenon.overruled_by = raw['overruled_by']
phenomenon.chars = raw['chars']
phenomenon.typus = raw['typus']
phenomenon.omitted = raw['omitted']
if self.caused_by != None:
features.update({'caused_by':self.caused_by})
if self.overruled_by != None:
features.update({'overruled_by':self.overruled_by})
if self.chars != None:
features.update({'chars':self.chars})
if self.typus != None:
features.update({'typus':self.typus})
if self.omitted != None:
features.update({'omitted':self.omitted})
return minimal(features)
return json.dumps(self.to_dict())
class MultisyllablePhenomenon(Phenomenon):
def __init__(self, beginning:int, end:int, caused_by=None,
overruled_by=None, chars=None, typus=None, omitted=None):
Phenomenon.__init__(self, caused_by, overruled_by,
chars, typus, omitted)
self.beginning = beginning
self.end = end
#def apheresis(self, beginning, end, caused_by=None, overruled_by=None):
#MultisyllablePhenomenon.__init__(self, 'apheresis', beginning, end,
#caused_by, overruled_by)
#def synizesis(self, beginning, end, caused_by=None, overruled_by=None):
#MultisyllablePhenomenon.__init__(self, 'synizesis', beginning, end,
#caused_by, overruled_by)
@classmethod
def from_json(cls, json_file):
beginning = raw['beginning']
end = raw['end']
phenomenon = cls(beginning, end)
phenomenon.caused_by = raw['caused_by']
phenomenon.overruled_by = raw['overruled_by']
phenomenon.chars = raw['chars']
phenomenon.typus = raw['typus']
phenomenon.omitted = raw['omitted']
features.update({'beginning':self.beginning})
if self.caused_by != None:
features.update({'caused_by':self.caused_by})
if self.overruled_by != None:
features.update({'overruled_by':self.overruled_by})
if self.chars != None:
features.update({'chars':self.chars})
if self.typus != None:
features.update({'typus':self.typus})
if self.omitted != None:
features.update({'omitted':self.omitted})
return minimal(features)
return json.dumps(self.to_dict())
class Token:
def __init__(self, token: str, span: List[int],
syllables: List[Syllable] = None, clitic: str = None,
accented: str = None,
lemma_to_morphtags: Dict[str, Set[str]] = None):
raise ValueError('Length of token {} does not match span {}.'
.format(token, span))
else:
self.text = token
self.span = span
self.syllables = syllables or list()
self.accented = accented
self.lemma_to_morphtags = lemma_to_morphtags
token = cls(text, span)
if 'clitic' in raw:
token.clitic = raw['clitic']

Simon Will
committed
if 'syllables' in raw:
for syllable in raw['syllables']:
token.syllables.append(Syllable.from_json(syllable))
features.update({'token': self.text})
features.update({'span': self.span})
features.update({'clitic': self.clitic})
if self.syllables:
features.update({'syllables': [syllable.to_dict() for syllable in self.syllables]})
return minimal(features)
def to_json(self):
return json.dumps(self.to_dict())
def is_punct(self):
return bool(re.match('^[\W_]+$', self.text))
def __str__(self):
return self.text
def __repr__(self):
return ('Token(token={}, span={}, syllables={})'
def __init__(self, tokens: List[Token] = None, phenomena: dict = None):
self.tokens = tokens or list()
self.phenomena = phenomena or dict()
@classmethod
def from_json(cls, json_file):
raw = check_format(json_file)
for token in raw["tokens"]:
# self is undefined
if 'phenomena' in raw:
for phenomenon in raw['phenomena'].items():
key, value = phenomenon
for v in value:
if key in reading.phenomena:
reading.phenomena[key].append(MultisyllablePhenomenon.from_json(v))
else:
reading.phenomena[key] = [MultisyllablePhenomenon.from_json(v)]
return reading
def get_schema(self):
schema_list = []
for token in self.tokens:
for syllable in token.syllables:
if syllable.syllable_length == 1:
schema_list.append('⏑')
elif syllable.syllable_length == 2:
schema_list.append('–')
# If length == 0, don’t append a symbol.
return ''.join(schema_list)
features = dict()
features.update({'tokens': [token.to_dict() for token in self.tokens]})
phenomena = {key:[minimal(v.to_dict()) for v in value] for key,value in self.phenomena.items()}
features.update({'phenomena': phenomena})
return minimal(features)
return json.dumps(self.to_dict())
def __len__(self):
return len(self.tokens)
def append_token(self, token: Token):
self.tokens.append(token)
def __str__(self):
forms = [
t.accented if t.accented is not None else t.text
for t in self.tokens
]
return ' '.join(forms)
def __repr__(self):
# TODO: Implement this properly.
return str(self)

Simon Will
committed
def __init__(self, verse: str, source: dict = None,
readings: List[Reading] = None):
self.readings = readings or list()
@classmethod
def from_plain_verse(cls, plain_verse):
verse = cls(plain_verse)
# TODO: Generate readings.
pass
return verse
@classmethod
def from_json(cls, json_file):
raw = check_format(json_file)
text = raw['verse']
source = dict()
source['author'] = raw['source']['author']
source['work'] = raw['source']['work']
source['place'] = raw['source']['place']
verse = cls(text, source=source)
for reading in raw['readings']:
verse.readings.append(Reading.from_json(reading))
def to_dict(self):
features = dict()
features.update({'verse':self.text})
features.update({'source':self.source})
features.update({'readings': [reading.to_dict() for reading in self.readings]})
return minimal(features)
return json.dumps(self.to_dict())
def __str__(self):
s = 'Verse: {verse}\n{reading_num} Readings:\n{readings}'
readings_str = '\n'.join(str(r) for r in self.readings)
return s.format(verse=self.text, reading_num=len(self.readings),
readings=readings_str)
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
class Position:
def __init__(self, reading: Reading, mora: int, word_boundary: bool,
token: Token, syllable: Syllable,
meter: '.meters.Meter' = None, element: int = None):
self.reading = reading
self.mora = mora
self.word_boundary = word_boundary
self.meter = meter
self.element = element
@classmethod
def after_mora(cls, reading: Reading, mora: int) -> 'Position':
morae = 0
position = None
for token in reading.tokens:
for i, syllable in enumerate(token.syllables):
word_boundary = i == 0
if morae == mora:
position = cls(
reading=reading, mora=mora, token=token,
syllable=syllable, word_boundary=word_boundary,
meter=meter
)
else:
morae += syllable.syllable_length
return position
@classmethod
def after_element(cls, reading: Reading, meter: '.meters.Meter',
element: int) -> 'Position':
# TODO: Implement this.
pass
@classmethod
def after(cls, type: str, reading: Reading, position_number: int,
meter: '.meters.Meter') -> 'Position':
if type == 'mora':
return cls.after_mora(*args, reading, position_number)
elif type == 'element':
return cls.after_element(*args, reading, meter, position_number)
else:
raise ValueError(
'The after type has to be "mora" or "element", but is {!r}'
.format(spec)
)