Skip to content
Snippets Groups Projects
Commit 33192679 authored by Simon Will's avatar Simon Will
Browse files

Implement features and overhaul meters

parent 42c41827
No related branches found
No related tags found
No related merge requests found
...@@ -3,104 +3,144 @@ ...@@ -3,104 +3,144 @@
import re import re
from .features import ReadingMeterFeatures
from .model import Reading, Position from .model import Reading, Position
def caesurae_together(position_specs, reward): def bridge(position_spec, feature):
def get_reward(meter: Meter, reading: Reading): def get_feature(meter: Meter, reading: Reading):
for spec in position_specs:
position = Position.after(spec[0], reading, spec[1], meter)
if not position.word_boundary:
return 0
else:
return reward
return get_reward
def bridge(position_spec, reward):
def get_reward(meter: Meter, reading: Reading):
position = Position.after(position_spec[0], reading, meter, position = Position.after(position_spec[0], reading, meter,
position_spec[1]) position_spec[1])
if position.word_boundary: if position.word_boundary:
return 0 return None
else: else:
return reward return feature
return get_reward return get_feature
class Meter: class Meter:
def __init__(self, name: str, schema: str, conditions: list = None, def __init__(self, name: str, schema: str, breaks: list = None,
short_name: str = None): conditions: list = None, short_name: str = None,
id: int = None):
self.name = name self.name = name
self.schema = schema self.schema = schema
self.break_specs = breaks
# Convert condition functions to instance-bound methods. # Convert condition functions to instance-bound methods.
self.conditions = ([cond.__get__(self) for cond in conditions] self.conditions = ([cond.__get__(self) for cond in conditions]
if conditions else []) if conditions else [])
self.short_name = short_name self.short_name = short_name
self.id = id
def match_reading(self, reading: Reading): def match_reading(self, reading: Reading):
return re.match(self.schema, reading.get_schema()) return re.match(self.schema, reading.get_schema())
def get_rewards(self, reading: Reading): def collect_condition_features(self, reading: Reading):
return sum(cond(reading) for cond in self.conditions) features = []
for cond in self.conditions:
feature = condition(reading)
if feature:
features.append(feature)
return features
def reading_has_usual_breaks(self, reading: Reading):
if self.break_specs:
for breaks in self.break_specs:
satisfied = True
for b in breaks:
position = Position.after(b[0], reading, b[1], self)
if not (hasattr(position, 'word_boundary')
and position.word_boundary):
satisfied = False
break
if satisfied:
return True
else:
return False
else:
return True
AEOLIC_BASE = r'(?:(–)(–)|(–)(⏑)|(⏑)(–))' AEOLIC_BASE = r'(?:(–)(–)|(–)(⏑)|(⏑)(–))'
ALL_METERS = { ALL_METERS = {
'Catalectic Dactylic Hexameter': Meter( 'Catalectic Dactylic Hexameter': Meter(
'Catalectic Dactylic Hexameter', 'Catalectic Dactylic Hexameter',
r'(–)(⏑⏑|–)(–)(⏑⏑|–)(–)(⏑⏑|–)(–)(⏑⏑|–)(–)(⏑⏑|–)(⏑|–)', r'(–)(⏑⏑|–)(–)(⏑⏑|–)(–)(⏑⏑|–)(–)(⏑⏑|–)(–)(⏑⏑|–)(⏑|–)',
conditions={ conditions=[
caesurae_together([('mora', 6, 'Trithemimeral'), bridge(('mora', 15, 'Hermann’s Bridge'),
('mora', 14, 'Hephthemimeral')], 2), ReadingMeterFeatures.HEXAMETER_BRIDGE_VIOLATED)
caesurae_together([('mora', 10, 'Penthemimeral')], 2), ],
caesurae_together([('mora', 16, 'Bucolic Diaeresis')], 1), breaks=[
bridge(('mora', 15, 'Hermann’s Bridge'), 1) [('mora', 6, 'Trithemimeral'), ('mora', 14, 'Hephthemimeral')],
}, [('mora', 10, 'Penthemimeral')],
short_name='6da‸' [('mora', 16, 'Bucolic Diaeresis')]
],
short_name='6da‸',
id=0
), ),
'Dactylic Pentameter': Meter( 'Dactylic Pentameter': Meter(
'Dactylic Pentameter', 'Dactylic Pentameter',
r'(–)(⏑⏑|–)(–)(⏑⏑|–)(–)(–)(⏑⏑)(–)(⏑⏑)(⏑|–)', r'(–)(⏑⏑|–)(–)(⏑⏑|–)(–)(–)(⏑⏑)(–)(⏑⏑)(⏑|–)',
conditions={ breaks=[[('mora', 5, 'Middle diaeresis')]],
caesurae_together([('mora', 5, 'Middle diaresis')], 2) short_name='3da‸3da‸',
}, id=1
short_name='3da‸3da‸'
), ),
'Iambic Trimeter': Meter( 'Iambic Trimeter': Meter(
'Iambic Trimeter', 'Iambic Trimeter',
r'(⏑|⏑⏑|–)(⏑⏑|–)(⏑)(⏑⏑|–)(⏑|⏑⏑|–)(⏑⏑|–)(⏑)(⏑⏑|–)(⏑|⏑⏑|–)(⏑⏑|–)(⏑)(⏑|–)', r'(⏑|⏑⏑|–)(⏑⏑|–)(⏑)(⏑⏑|–)(⏑|⏑⏑|–)(⏑⏑|–)(⏑)(⏑⏑|–)(⏑|⏑⏑|–)(⏑⏑|–)(⏑)(⏑|–)',
conditions={ breaks=[
caesurae_together([('element', 4, 'After fourth element')], 1), [('element', 4, 'After fourth element')]
caesurae_together([('element', 8, 'After eighth element')], 1), [('element', 8, 'After eighth element')]
}, ],
short_name='3ia' short_name='3ia',
id=2
), ),
'Iambic Senarius': Meter( 'Iambic Senarius': Meter(
'Iambic Senarius', 'Iambic Senarius',
r'(⏑|⏑⏑|–)(⏑⏑|–)(⏑|⏑⏑|–)(⏑⏑|–)(⏑|⏑⏑|–)(⏑⏑|–)(⏑|⏑⏑|–)(⏑⏑|–)(⏑|⏑⏑|–)(⏑⏑|–)(⏑)(⏑|–)', r'(⏑|⏑⏑|–)(⏑⏑|–)(⏑|⏑⏑|–)(⏑⏑|–)(⏑|⏑⏑|–)(⏑⏑|–)(⏑|⏑⏑|–)(⏑⏑|–)(⏑|⏑⏑|–)(⏑⏑|–)(⏑)(⏑|–)',
short_name='6ia' short_name='6ia',
id=3
), ),
'Sapphic Hendecasyllable': Meter( 'Sapphic Hendecasyllable': Meter(
'Sapphic Hendecasyllable', 'Sapphic Hendecasyllable',
r'(–)(–|⏑)(–)(–|⏑)(–)(⏑)(⏑)(–)(⏑)(–)(⏑|–)', r'(–)(–|⏑)(–)(–|⏑)(–)(⏑)(⏑)(–)(⏑)(–)(⏑|–)',
conditions={}, conditions={},
short_name='sap hen' short_name='sap hen',
id=4
), ),
'Adoneus': Meter( 'Adoneus': Meter(
'Adoneus', 'Adoneus',
r'(–)(⏑⏑)(–)(⏑|–)', r'(–)(⏑⏑)(–)(⏑|–)',
short_name='adoneus', short_name='adoneus',
id=5
), ),
'Phalaecian Hendecasyllable': Meter( 'Phalaecian Hendecasyllable': Meter(
'Phalaecian Hendecasyllable', 'Phalaecian Hendecasyllable',
AEOLIC_BASE + r'(–)(⏑)(⏑)(–)(⏑)(–)(⏑)(–)(⏑|–)', AEOLIC_BASE + r'(–)(⏑)(⏑)(–)(⏑)(–)(⏑)(–)(⏑|–)',
conditions={ breaks=[[('element', 6, 'After sixth element')]],
caesurae_together([('element', 6, 'After sixth element')], 1) short_name='hen',
}, id=6
short_name='hen'
), ),
} }
def get_reading_meter_combinations(readings, meters=ALL_METERS):
reading_meter_rmfeatures = [
[reading, meter, {}]
for reading, meter
in itertools.product(readings, meters)
]
for reading, meter, rmfeatures in reading_meter_rmfeatures:
rmfeatures[ReadingMeterFeatures.DOES_NOT_FIT_METER] = (
meter.match_reading(reading) is None)
# XXX: Implement this.
rmfeatures[ReadingMeterFeatures.NECESSARY_CHANGES_TO_MAKE_IT_FIT] = 0
rmfeatures[ReadingMeterFeatures.METER] = meter.id
rmfeatures[ReadingMeterFeatures.NO_USUAL_BREAK_PRESENT] = int(
meter.reading_has_usual_breaks(reading))
for feature in meter.collect_condition_features(reading):
rmfeatures[feature] = 1
return reading_meter_rmfeatures
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from collections import defaultdict
import itertools import itertools
import json import json
import os import os
...@@ -360,6 +361,7 @@ class Reading: ...@@ -360,6 +361,7 @@ class Reading:
def __init__(self, tokens: List[Token] = None, phenomena: dict = None): def __init__(self, tokens: List[Token] = None, phenomena: dict = None):
self.tokens = tokens or list() self.tokens = tokens or list()
self.phenomena = phenomena or dict() self.phenomena = phenomena or dict()
self.features = defaultdict(lambda: 0)
@classmethod @classmethod
def from_json(cls, json_file): def from_json(cls, json_file):
......
...@@ -7,6 +7,7 @@ from itertools import product ...@@ -7,6 +7,7 @@ from itertools import product
from .db import FormAnalysis from .db import FormAnalysis
from .model import Reading, Syllable, Token, Verse, Phenomenon from .model import Reading, Syllable, Token, Verse, Phenomenon
from .features import ReadingFeature
from .wordlist import WordList from .wordlist import WordList
CLITICS = ['que', 'qve', 'ue', 've', 'ne'] CLITICS = ['que', 'qve', 'ue', 've', 'ne']
...@@ -468,6 +469,7 @@ def generate_synizesis(reading): ...@@ -468,6 +469,7 @@ def generate_synizesis(reading):
syllable.syllable_length = 2 syllable.syllable_length = 2
syllable.vowel_length = 2 syllable.vowel_length = 2
syllable.phenomena['synizesis'] = Phenomenon(chars=syn_dict[syllable.id][3]) syllable.phenomena['synizesis'] = Phenomenon(chars=syn_dict[syllable.id][3])
reading.features[ReadingFeature.SYNIZESIS] += 1
for s in token.syllables[j+2:]: for s in token.syllables[j+2:]:
s.id -= 1 s.id -= 1
...@@ -635,6 +637,7 @@ def parse_verse(verse): ...@@ -635,6 +637,7 @@ def parse_verse(verse):
(s.phenomena['positional lengthening'] (s.phenomena['positional lengthening']
.overruled_by) = 'muta cum liquida' .overruled_by) = 'muta cum liquida'
elif blueprint[syll_id] == '2': elif blueprint[syll_id] == '2':
reading.features[ReadingFeature.MCL_TRIGGERS_PL] += 1
s.syllable_length = 2 s.syllable_length = 2
syll_id += 1 syll_id += 1
......
# -*- coding: utf-8 -*-
from enum import Enum
class ReadingFeature(Enum):
MCL_TRIGGERS_PL = 0
SYNIZESIS = 1
S_ELISION = 2
HIAT = 3
class ReadingMeterFeatures(Enum):
DOES_NOT_FIT_METER = 10
NECESSARY_CHANGES_TO_MAKE_IT_FIT = 11
METER = 12
NO_USUAL_BREAK_PRESENT = 13
HEXAMETER_BRIDGE_VIOLATED = 14
class CombinedFeatures(Enum):
MCL_TRIGGERS_PL = 0
SYNIZESIS = 1
S_ELISION = 2
DOES_NOT_FIT_METER = 3
NECESSARY_CHANGES_TO_MAKE_IT_FIT = 4
NO_USUAL_BREAK_PRESENT = 5
BRIDGES_VIOLATED = 6
...@@ -41,7 +41,7 @@ def main(hypotactic_dir, outfile): ...@@ -41,7 +41,7 @@ def main(hypotactic_dir, outfile):
pair[0] = list(pair[0]) pair[0] = list(pair[0])
with open(outfile, 'w') as f: with open(outfile, 'w') as f:
obj = {'poem_meters': poem_meters, obj = {'poem_meters': poem_meters,
'line_meters': line_meters} 'line_meters': line_meters}
json.dump(obj, f, indent=2) json.dump(obj, f, indent=2)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment