Newer
Older
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import re
from typing import Dict, List, Set
def check_format(json_file, check_for=dict):
if isinstance(json_file, check_for):
elif isinstance(json_file, str):
if os.path.exists(json_file):
with open(json_file, 'r') as jf:
return json.load(jf)
else:
return json.loads(json_file)
def from_json(json_file):
if hasattr(json_file, 'read'):
verses = json.loads(json_file.read())
elif isinstance(json_file, str) and os.path.exists(json_file):
verses = json.loads(open(json_file).read())
else:
TypeError('Input not convertible.')
return [Verse.from_json(verse) for verse in verses]
def minimal(full_dict:dict):
#print(full_dict)
result_dict = dict()
for key,value in full_dict.items():
if value == {}:
pass
elif isinstance(value, dict):
result_dict.update({key:minimal(value)})
elif value != None:
result_dict.update({key:value})
else:
pass
#print(result_dict)
return result_dict
def __init__(self, syllable: str, span: List[int], idx: int,
syllable_length: int, vowel_length: int,
phenomena: dict = dict()):
if len(syllable) != span[1] - span[0]:
raise ValueError('Syllable length does not match syllable span.')
else:
self.text = syllable
self.span = span
self.id = idx
self.syllable_length = syllable_length
self.vowel_length = vowel_length
self.phenomena = phenomena
@classmethod
def from_json(cls, json_file):
span = raw['span']
text = raw['syllable']
syllable_length = raw['syllable_length']
vowel_length = raw['vowel_length']
syllable = cls(text, span, idx, syllable_length, vowel_length)
if 'phenomena' in raw:
syllable.phenomena = dict()
for phenomenon in raw['phenomena'].items():
syllable.phenomena[phenomenon[0]] = Phenomenon.from_json(phenomenon[1])
features.update({'id':self.id})
features.update({'span':self.span})
features.update({'syllable':self.text})
features.update({'syllable_length':self.syllable_length})
features.update({'vowel_length':self.vowel_length})
features.update({'phenomena': minimal({key:value.to_dict() for key,value in self.phenomena.items()}) })
return minimal(features)
return json.dumps(self.to_dict())
def __init__(self, caused_by=None, overruled_by=None,
self.caused_by = caused_by
self.overruled_by = overruled_by
self.chars = chars
self.typus = typus
self.omitted = omitted
#@classmethod
#def positional_lengthening(cls, chars: str, caused_by=None,
#overruled_by=None):
#phenomenon = cls('positional lengthening', caused_by, overruled_by)
#phenomenon.chars = chars
#@classmethod
#def iambic_shortening(cls, typus: str, caused_by=None, overruled_by=None):
#phenomenon = cls('iambic shortening', caused_by, overruled_by)
#phenomenon.typus = typus
#@classmethod
#def s_elision(cls, caused_by=None, overruled_by=None):
#phenomenon = cls('s-elision', caused_by, overruled_by)
#phenomenon.omitted = 's'
#@classmethod
#def verse_end(cls, caused_by=None, overruled_by=None):
#phenomenon = cls('verse end', caused_by, overruled_by)
@classmethod
def from_json(cls, json_file):
phenomenon.caused_by = raw['caused_by']
phenomenon.overruled_by = raw['overruled_by']
phenomenon.chars = raw['chars']
phenomenon.typus = raw['typus']
phenomenon.omitted = raw['omitted']
if self.caused_by != None:
features.update({'caused_by':self.caused_by})
if self.overruled_by != None:
features.update({'overruled_by':self.overruled_by})
if self.chars != None:
features.update({'chars':self.chars})
if self.typus != None:
features.update({'typus':self.typus})
if self.omitted != None:
features.update({'omitted':self.omitted})
return minimal(features)
return json.dumps(self.to_dict())
class MultisyllablePhenomenon(Phenomenon):
def __init__(self, beginning:int, end:int, caused_by=None,
overruled_by=None, chars=None, typus=None, omitted=None):
Phenomenon.__init__(self, caused_by, overruled_by,
chars, typus, omitted)
self.beginning = beginning
self.end = end
#def apheresis(self, beginning, end, caused_by=None, overruled_by=None):
#MultisyllablePhenomenon.__init__(self, 'apheresis', beginning, end,
#caused_by, overruled_by)
#def synizesis(self, beginning, end, caused_by=None, overruled_by=None):
#MultisyllablePhenomenon.__init__(self, 'synizesis', beginning, end,
#caused_by, overruled_by)
@classmethod
def from_json(cls, json_file):
beginning = raw['beginning']
end = raw['end']
phenomenon = cls(beginning, end)
phenomenon.caused_by = raw['caused_by']
phenomenon.overruled_by = raw['overruled_by']
phenomenon.chars = raw['chars']
phenomenon.typus = raw['typus']
phenomenon.omitted = raw['omitted']
features.update({'beginning':self.beginning})
if self.caused_by != None:
features.update({'caused_by':self.caused_by})
if self.overruled_by != None:
features.update({'overruled_by':self.overruled_by})
if self.chars != None:
features.update({'chars':self.chars})
if self.typus != None:
features.update({'typus':self.typus})
if self.omitted != None:
features.update({'omitted':self.omitted})
return minimal(features)
return json.dumps(self.to_dict())
class Token:
def __init__(self, token: str, span: List[int],
syllables: List[Syllable] = None, clitic: str = None,
accented: str = None,
lemma_to_morphtags: Dict[str, Set[str]] = None):
raise ValueError('Length of token {} does not match span {}.'
.format(token, span))
else:
self.text = token
self.span = span
self.syllables = syllables or list()
self.accented = accented
self.lemma_to_morphtags = lemma_to_morphtags
token = cls(text, span)
if 'clitic' in raw:
token.clitic = raw['clitic']

Simon Will
committed
if 'syllables' in raw:
for syllable in raw['syllables']:
token.syllables.append(Syllable.from_json(syllable))
features.update({'token': self.text})
features.update({'span': self.span})
features.update({'clitic': self.clitic})
if self.syllables:
features.update({'syllables': [syllable.to_dict() for syllable in self.syllables]})
return minimal(features)
def to_json(self):
return json.dumps(self.to_dict())
def is_punct(self):
return bool(re.match('^[\W_]+$', self.text))
def __str__(self):
return self.text
def __repr__(self):
return ('Token(token={}, span={}, syllables={})'
def __init__(self, tokens: List[Token] = None, phenomena: dict = None):
self.tokens = tokens or list()
self.phenomena = phenomena or dict()
@classmethod
def from_json(cls, json_file):
raw = check_format(json_file)
for token in raw["tokens"]:
# self is undefined
if 'phenomena' in raw:
for phenomenon in raw['phenomena'].items():
key, value = phenomenon
for v in value:
if key in reading.phenomena:
reading.phenomena[key].append(MultisyllablePhenomenon.from_json(v))
else:
reading.phenomena[key] = [MultisyllablePhenomenon.from_json(v)]
return reading
features = dict()
features.update({'tokens': [token.to_dict() for token in self.tokens]})
phenomena = {key:[minimal(v.to_dict()) for v in value] for key,value in self.phenomena.items()}
features.update({'phenomena': phenomena})
return minimal(features)
def to_json(self):
return json.dumps(self.to_dict())
def __len__(self):
return len(self.tokens)
def append_token(self, token: Token):
self.tokens.append(token)
def __str__(self):
forms = [
t.accented if t.accented is not None else t.text
for t in self.tokens
]
return ' '.join(forms)
def __repr__(self):
# TODO: Implement this properly.
return str(self)

Simon Will
committed
def __init__(self, verse: str, source: dict = None,
readings: List[Reading] = None):
self.readings = readings or list()
@classmethod
def from_plain_verse(cls, plain_verse):
verse = cls(plain_verse)
# TODO: Generate readings.
pass
return verse
@classmethod
def from_json(cls, json_file):
raw = check_format(json_file)
text = raw['verse']
source = dict()
source['author'] = raw['source']['author']
source['work'] = raw['source']['work']
source['place'] = raw['source']['place']
verse = cls(text, source=source)
for reading in raw['readings']:
verse.readings.append(Reading.from_json(reading))
def to_dict(self):
features = dict()
features.update({'verse':self.text})
features.update({'source':self.source})
features.update({'readings': [reading.to_dict() for reading in self.readings]})
return minimal(features)
return json.dumps(self.to_dict())
def __str__(self):
s = 'Verse: {verse}\n{reading_num} Readings:\n{readings}'
readings_str = '\n'.join(str(r) for r in self.readings)
return s.format(verse=self.text, reading_num=len(self.readings),
readings=readings_str)