diff --git a/allzweckmesser/meters.py b/allzweckmesser/meters.py index fdfcc9d4c042f527f0e1bc8b18ec833ec0b3f4e0..0ffa84c072d36e2caa82ed001cad6feb65326447 100644 --- a/allzweckmesser/meters.py +++ b/allzweckmesser/meters.py @@ -3,104 +3,144 @@ import re +from .features import ReadingMeterFeatures from .model import Reading, Position -def caesurae_together(position_specs, reward): - def get_reward(meter: Meter, reading: Reading): - for spec in position_specs: - position = Position.after(spec[0], reading, spec[1], meter) - if not position.word_boundary: - return 0 - else: - return reward - return get_reward - - -def bridge(position_spec, reward): - def get_reward(meter: Meter, reading: Reading): +def bridge(position_spec, feature): + def get_feature(meter: Meter, reading: Reading): position = Position.after(position_spec[0], reading, meter, position_spec[1]) if position.word_boundary: - return 0 + return None else: - return reward - return get_reward + return feature + return get_feature class Meter: - def __init__(self, name: str, schema: str, conditions: list = None, - short_name: str = None): + def __init__(self, name: str, schema: str, breaks: list = None, + conditions: list = None, short_name: str = None, + id: int = None): self.name = name self.schema = schema + self.break_specs = breaks # Convert condition functions to instance-bound methods. self.conditions = ([cond.__get__(self) for cond in conditions] if conditions else []) self.short_name = short_name + self.id = id def match_reading(self, reading: Reading): return re.match(self.schema, reading.get_schema()) - def get_rewards(self, reading: Reading): - return sum(cond(reading) for cond in self.conditions) + def collect_condition_features(self, reading: Reading): + features = [] + for cond in self.conditions: + feature = condition(reading) + if feature: + features.append(feature) + return features + + def reading_has_usual_breaks(self, reading: Reading): + if self.break_specs: + for breaks in self.break_specs: + satisfied = True + for b in breaks: + position = Position.after(b[0], reading, b[1], self) + if not (hasattr(position, 'word_boundary') + and position.word_boundary): + satisfied = False + break + if satisfied: + return True + else: + return False + else: + return True AEOLIC_BASE = r'(?:(–)(–)|(–)(â‘)|(â‘)(–))' ALL_METERS = { - 'Catalectic Dactylic Hexameter': Meter( 'Catalectic Dactylic Hexameter', r'(–)(â‘â‘|–)(–)(â‘â‘|–)(–)(â‘â‘|–)(–)(â‘â‘|–)(–)(â‘â‘|–)(â‘|–)', - conditions={ - caesurae_together([('mora', 6, 'Trithemimeral'), - ('mora', 14, 'Hephthemimeral')], 2), - caesurae_together([('mora', 10, 'Penthemimeral')], 2), - caesurae_together([('mora', 16, 'Bucolic Diaeresis')], 1), - bridge(('mora', 15, 'Hermann’s Bridge'), 1) - }, - short_name='6da‸' + conditions=[ + bridge(('mora', 15, 'Hermann’s Bridge'), + ReadingMeterFeatures.HEXAMETER_BRIDGE_VIOLATED) + ], + breaks=[ + [('mora', 6, 'Trithemimeral'), ('mora', 14, 'Hephthemimeral')], + [('mora', 10, 'Penthemimeral')], + [('mora', 16, 'Bucolic Diaeresis')] + ], + short_name='6da‸', + id=0 ), 'Dactylic Pentameter': Meter( 'Dactylic Pentameter', r'(–)(â‘â‘|–)(–)(â‘â‘|–)(–)(–)(â‘â‘)(–)(â‘â‘)(â‘|–)', - conditions={ - caesurae_together([('mora', 5, 'Middle diaresis')], 2) - }, - short_name='3da‸3da‸' + breaks=[[('mora', 5, 'Middle diaeresis')]], + short_name='3da‸3da‸', + id=1 ), 'Iambic Trimeter': Meter( 'Iambic Trimeter', r'(â‘|â‘â‘|–)(â‘â‘|–)(â‘)(â‘â‘|–)(â‘|â‘â‘|–)(â‘â‘|–)(â‘)(â‘â‘|–)(â‘|â‘â‘|–)(â‘â‘|–)(â‘)(â‘|–)', - conditions={ - caesurae_together([('element', 4, 'After fourth element')], 1), - caesurae_together([('element', 8, 'After eighth element')], 1), - }, - short_name='3ia' + breaks=[ + [('element', 4, 'After fourth element')] + [('element', 8, 'After eighth element')] + ], + short_name='3ia', + id=2 ), 'Iambic Senarius': Meter( 'Iambic Senarius', r'(â‘|â‘â‘|–)(â‘â‘|–)(â‘|â‘â‘|–)(â‘â‘|–)(â‘|â‘â‘|–)(â‘â‘|–)(â‘|â‘â‘|–)(â‘â‘|–)(â‘|â‘â‘|–)(â‘â‘|–)(â‘)(â‘|–)', - short_name='6ia' + short_name='6ia', + id=3 ), 'Sapphic Hendecasyllable': Meter( 'Sapphic Hendecasyllable', r'(–)(–|â‘)(–)(–|â‘)(–)(â‘)(â‘)(–)(â‘)(–)(â‘|–)', conditions={}, - short_name='sap hen' + short_name='sap hen', + id=4 ), 'Adoneus': Meter( 'Adoneus', r'(–)(â‘â‘)(–)(â‘|–)', short_name='adoneus', + id=5 ), 'Phalaecian Hendecasyllable': Meter( 'Phalaecian Hendecasyllable', AEOLIC_BASE + r'(–)(â‘)(â‘)(–)(â‘)(–)(â‘)(–)(â‘|–)', - conditions={ - caesurae_together([('element', 6, 'After sixth element')], 1) - }, - short_name='hen' + breaks=[[('element', 6, 'After sixth element')]], + short_name='hen', + id=6 ), } + + +def get_reading_meter_combinations(readings, meters=ALL_METERS): + reading_meter_rmfeatures = [ + [reading, meter, {}] + for reading, meter + in itertools.product(readings, meters) + ] + for reading, meter, rmfeatures in reading_meter_rmfeatures: + rmfeatures[ReadingMeterFeatures.DOES_NOT_FIT_METER] = ( + meter.match_reading(reading) is None) + + # XXX: Implement this. + rmfeatures[ReadingMeterFeatures.NECESSARY_CHANGES_TO_MAKE_IT_FIT] = 0 + + rmfeatures[ReadingMeterFeatures.METER] = meter.id + rmfeatures[ReadingMeterFeatures.NO_USUAL_BREAK_PRESENT] = int( + meter.reading_has_usual_breaks(reading)) + for feature in meter.collect_condition_features(reading): + rmfeatures[feature] = 1 + return reading_meter_rmfeatures diff --git a/allzweckmesser/model.py b/allzweckmesser/model.py index fe2edfbf97f26aa26ca9e4af7c2371abe5b0444d..f86b31429fb6e312fc01846da4f1fffba243e0c8 100644 --- a/allzweckmesser/model.py +++ b/allzweckmesser/model.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- +from collections import defaultdict import itertools import json import os @@ -360,6 +361,7 @@ class Reading: def __init__(self, tokens: List[Token] = None, phenomena: dict = None): self.tokens = tokens or list() self.phenomena = phenomena or dict() + self.features = defaultdict(lambda: 0) @classmethod def from_json(cls, json_file): diff --git a/allzweckmesser/scanner.py b/allzweckmesser/scanner.py index d074a282204a0f2e6c2b34f05f87aa02bf58cdbe..393ebb258d043e645fd6f50149c81355a8124078 100644 --- a/allzweckmesser/scanner.py +++ b/allzweckmesser/scanner.py @@ -7,6 +7,7 @@ from itertools import product from .db import FormAnalysis from .model import Reading, Syllable, Token, Verse, Phenomenon +from .features import ReadingFeature from .wordlist import WordList CLITICS = ['que', 'qve', 'ue', 've', 'ne'] @@ -468,6 +469,7 @@ def generate_synizesis(reading): syllable.syllable_length = 2 syllable.vowel_length = 2 syllable.phenomena['synizesis'] = Phenomenon(chars=syn_dict[syllable.id][3]) + reading.features[ReadingFeature.SYNIZESIS] += 1 for s in token.syllables[j+2:]: s.id -= 1 @@ -635,6 +637,7 @@ def parse_verse(verse): (s.phenomena['positional lengthening'] .overruled_by) = 'muta cum liquida' elif blueprint[syll_id] == '2': + reading.features[ReadingFeature.MCL_TRIGGERS_PL] += 1 s.syllable_length = 2 syll_id += 1 diff --git a/features.py b/features.py new file mode 100644 index 0000000000000000000000000000000000000000..e6a80e88c31c3f786be5cc5d132c0234e9fa7a0d --- /dev/null +++ b/features.py @@ -0,0 +1,28 @@ +# -*- coding: utf-8 -*- + +from enum import Enum + + +class ReadingFeature(Enum): + MCL_TRIGGERS_PL = 0 + SYNIZESIS = 1 + S_ELISION = 2 + HIAT = 3 + + +class ReadingMeterFeatures(Enum): + DOES_NOT_FIT_METER = 10 + NECESSARY_CHANGES_TO_MAKE_IT_FIT = 11 + METER = 12 + NO_USUAL_BREAK_PRESENT = 13 + HEXAMETER_BRIDGE_VIOLATED = 14 + + +class CombinedFeatures(Enum): + MCL_TRIGGERS_PL = 0 + SYNIZESIS = 1 + S_ELISION = 2 + DOES_NOT_FIT_METER = 3 + NECESSARY_CHANGES_TO_MAKE_IT_FIT = 4 + NO_USUAL_BREAK_PRESENT = 5 + BRIDGES_VIOLATED = 6 diff --git a/scripts/extract_meters.py b/scripts/extract_meters.py index 29ab5de9f03b1dad2a192702cf78c5c33a2b8804..1c2d583be11809bd0d4801f1eb5724b8b4b36350 100644 --- a/scripts/extract_meters.py +++ b/scripts/extract_meters.py @@ -41,7 +41,7 @@ def main(hypotactic_dir, outfile): pair[0] = list(pair[0]) with open(outfile, 'w') as f: obj = {'poem_meters': poem_meters, - 'line_meters': line_meters} + 'line_meters': line_meters} json.dump(obj, f, indent=2)