Skip to content
Snippets Groups Projects
Commit 33192679 authored by Simon Will's avatar Simon Will
Browse files

Implement features and overhaul meters

parent 42c41827
No related branches found
No related tags found
No related merge requests found
......@@ -3,104 +3,144 @@
import re
from .features import ReadingMeterFeatures
from .model import Reading, Position
def caesurae_together(position_specs, reward):
def get_reward(meter: Meter, reading: Reading):
for spec in position_specs:
position = Position.after(spec[0], reading, spec[1], meter)
if not position.word_boundary:
return 0
else:
return reward
return get_reward
def bridge(position_spec, reward):
def get_reward(meter: Meter, reading: Reading):
def bridge(position_spec, feature):
def get_feature(meter: Meter, reading: Reading):
position = Position.after(position_spec[0], reading, meter,
position_spec[1])
if position.word_boundary:
return 0
return None
else:
return reward
return get_reward
return feature
return get_feature
class Meter:
def __init__(self, name: str, schema: str, conditions: list = None,
short_name: str = None):
def __init__(self, name: str, schema: str, breaks: list = None,
conditions: list = None, short_name: str = None,
id: int = None):
self.name = name
self.schema = schema
self.break_specs = breaks
# Convert condition functions to instance-bound methods.
self.conditions = ([cond.__get__(self) for cond in conditions]
if conditions else [])
self.short_name = short_name
self.id = id
def match_reading(self, reading: Reading):
return re.match(self.schema, reading.get_schema())
def get_rewards(self, reading: Reading):
return sum(cond(reading) for cond in self.conditions)
def collect_condition_features(self, reading: Reading):
features = []
for cond in self.conditions:
feature = condition(reading)
if feature:
features.append(feature)
return features
def reading_has_usual_breaks(self, reading: Reading):
if self.break_specs:
for breaks in self.break_specs:
satisfied = True
for b in breaks:
position = Position.after(b[0], reading, b[1], self)
if not (hasattr(position, 'word_boundary')
and position.word_boundary):
satisfied = False
break
if satisfied:
return True
else:
return False
else:
return True
AEOLIC_BASE = r'(?:(–)(–)|(–)(⏑)|(⏑)(–))'
ALL_METERS = {
'Catalectic Dactylic Hexameter': Meter(
'Catalectic Dactylic Hexameter',
r'(–)(⏑⏑|–)(–)(⏑⏑|–)(–)(⏑⏑|–)(–)(⏑⏑|–)(–)(⏑⏑|–)(⏑|–)',
conditions={
caesurae_together([('mora', 6, 'Trithemimeral'),
('mora', 14, 'Hephthemimeral')], 2),
caesurae_together([('mora', 10, 'Penthemimeral')], 2),
caesurae_together([('mora', 16, 'Bucolic Diaeresis')], 1),
bridge(('mora', 15, 'Hermann’s Bridge'), 1)
},
short_name='6da‸'
conditions=[
bridge(('mora', 15, 'Hermann’s Bridge'),
ReadingMeterFeatures.HEXAMETER_BRIDGE_VIOLATED)
],
breaks=[
[('mora', 6, 'Trithemimeral'), ('mora', 14, 'Hephthemimeral')],
[('mora', 10, 'Penthemimeral')],
[('mora', 16, 'Bucolic Diaeresis')]
],
short_name='6da‸',
id=0
),
'Dactylic Pentameter': Meter(
'Dactylic Pentameter',
r'(–)(⏑⏑|–)(–)(⏑⏑|–)(–)(–)(⏑⏑)(–)(⏑⏑)(⏑|–)',
conditions={
caesurae_together([('mora', 5, 'Middle diaresis')], 2)
},
short_name='3da‸3da‸'
breaks=[[('mora', 5, 'Middle diaeresis')]],
short_name='3da‸3da‸',
id=1
),
'Iambic Trimeter': Meter(
'Iambic Trimeter',
r'(⏑|⏑⏑|–)(⏑⏑|–)(⏑)(⏑⏑|–)(⏑|⏑⏑|–)(⏑⏑|–)(⏑)(⏑⏑|–)(⏑|⏑⏑|–)(⏑⏑|–)(⏑)(⏑|–)',
conditions={
caesurae_together([('element', 4, 'After fourth element')], 1),
caesurae_together([('element', 8, 'After eighth element')], 1),
},
short_name='3ia'
breaks=[
[('element', 4, 'After fourth element')]
[('element', 8, 'After eighth element')]
],
short_name='3ia',
id=2
),
'Iambic Senarius': Meter(
'Iambic Senarius',
r'(⏑|⏑⏑|–)(⏑⏑|–)(⏑|⏑⏑|–)(⏑⏑|–)(⏑|⏑⏑|–)(⏑⏑|–)(⏑|⏑⏑|–)(⏑⏑|–)(⏑|⏑⏑|–)(⏑⏑|–)(⏑)(⏑|–)',
short_name='6ia'
short_name='6ia',
id=3
),
'Sapphic Hendecasyllable': Meter(
'Sapphic Hendecasyllable',
r'(–)(–|⏑)(–)(–|⏑)(–)(⏑)(⏑)(–)(⏑)(–)(⏑|–)',
conditions={},
short_name='sap hen'
short_name='sap hen',
id=4
),
'Adoneus': Meter(
'Adoneus',
r'(–)(⏑⏑)(–)(⏑|–)',
short_name='adoneus',
id=5
),
'Phalaecian Hendecasyllable': Meter(
'Phalaecian Hendecasyllable',
AEOLIC_BASE + r'(–)(⏑)(⏑)(–)(⏑)(–)(⏑)(–)(⏑|–)',
conditions={
caesurae_together([('element', 6, 'After sixth element')], 1)
},
short_name='hen'
breaks=[[('element', 6, 'After sixth element')]],
short_name='hen',
id=6
),
}
def get_reading_meter_combinations(readings, meters=ALL_METERS):
reading_meter_rmfeatures = [
[reading, meter, {}]
for reading, meter
in itertools.product(readings, meters)
]
for reading, meter, rmfeatures in reading_meter_rmfeatures:
rmfeatures[ReadingMeterFeatures.DOES_NOT_FIT_METER] = (
meter.match_reading(reading) is None)
# XXX: Implement this.
rmfeatures[ReadingMeterFeatures.NECESSARY_CHANGES_TO_MAKE_IT_FIT] = 0
rmfeatures[ReadingMeterFeatures.METER] = meter.id
rmfeatures[ReadingMeterFeatures.NO_USUAL_BREAK_PRESENT] = int(
meter.reading_has_usual_breaks(reading))
for feature in meter.collect_condition_features(reading):
rmfeatures[feature] = 1
return reading_meter_rmfeatures
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from collections import defaultdict
import itertools
import json
import os
......@@ -360,6 +361,7 @@ class Reading:
def __init__(self, tokens: List[Token] = None, phenomena: dict = None):
self.tokens = tokens or list()
self.phenomena = phenomena or dict()
self.features = defaultdict(lambda: 0)
@classmethod
def from_json(cls, json_file):
......
......@@ -7,6 +7,7 @@ from itertools import product
from .db import FormAnalysis
from .model import Reading, Syllable, Token, Verse, Phenomenon
from .features import ReadingFeature
from .wordlist import WordList
CLITICS = ['que', 'qve', 'ue', 've', 'ne']
......@@ -468,6 +469,7 @@ def generate_synizesis(reading):
syllable.syllable_length = 2
syllable.vowel_length = 2
syllable.phenomena['synizesis'] = Phenomenon(chars=syn_dict[syllable.id][3])
reading.features[ReadingFeature.SYNIZESIS] += 1
for s in token.syllables[j+2:]:
s.id -= 1
......@@ -635,6 +637,7 @@ def parse_verse(verse):
(s.phenomena['positional lengthening']
.overruled_by) = 'muta cum liquida'
elif blueprint[syll_id] == '2':
reading.features[ReadingFeature.MCL_TRIGGERS_PL] += 1
s.syllable_length = 2
syll_id += 1
......
# -*- coding: utf-8 -*-
from enum import Enum
class ReadingFeature(Enum):
MCL_TRIGGERS_PL = 0
SYNIZESIS = 1
S_ELISION = 2
HIAT = 3
class ReadingMeterFeatures(Enum):
DOES_NOT_FIT_METER = 10
NECESSARY_CHANGES_TO_MAKE_IT_FIT = 11
METER = 12
NO_USUAL_BREAK_PRESENT = 13
HEXAMETER_BRIDGE_VIOLATED = 14
class CombinedFeatures(Enum):
MCL_TRIGGERS_PL = 0
SYNIZESIS = 1
S_ELISION = 2
DOES_NOT_FIT_METER = 3
NECESSARY_CHANGES_TO_MAKE_IT_FIT = 4
NO_USUAL_BREAK_PRESENT = 5
BRIDGES_VIOLATED = 6
......@@ -41,7 +41,7 @@ def main(hypotactic_dir, outfile):
pair[0] = list(pair[0])
with open(outfile, 'w') as f:
obj = {'poem_meters': poem_meters,
'line_meters': line_meters}
'line_meters': line_meters}
json.dump(obj, f, indent=2)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment