Loading allzweckmesser/meters.py +84 −44 Original line number Diff line number Diff line Loading @@ -3,104 +3,144 @@ import re from .features import ReadingMeterFeatures from .model import Reading, Position def caesurae_together(position_specs, reward): def get_reward(meter: Meter, reading: Reading): for spec in position_specs: position = Position.after(spec[0], reading, spec[1], meter) if not position.word_boundary: return 0 else: return reward return get_reward def bridge(position_spec, reward): def get_reward(meter: Meter, reading: Reading): def bridge(position_spec, feature): def get_feature(meter: Meter, reading: Reading): position = Position.after(position_spec[0], reading, meter, position_spec[1]) if position.word_boundary: return 0 return None else: return reward return get_reward return feature return get_feature class Meter: def __init__(self, name: str, schema: str, conditions: list = None, short_name: str = None): def __init__(self, name: str, schema: str, breaks: list = None, conditions: list = None, short_name: str = None, id: int = None): self.name = name self.schema = schema self.break_specs = breaks # Convert condition functions to instance-bound methods. self.conditions = ([cond.__get__(self) for cond in conditions] if conditions else []) self.short_name = short_name self.id = id def match_reading(self, reading: Reading): return re.match(self.schema, reading.get_schema()) def get_rewards(self, reading: Reading): return sum(cond(reading) for cond in self.conditions) def collect_condition_features(self, reading: Reading): features = [] for cond in self.conditions: feature = condition(reading) if feature: features.append(feature) return features def reading_has_usual_breaks(self, reading: Reading): if self.break_specs: for breaks in self.break_specs: satisfied = True for b in breaks: position = Position.after(b[0], reading, b[1], self) if not (hasattr(position, 'word_boundary') and position.word_boundary): satisfied = False break if satisfied: return True else: return False else: return True AEOLIC_BASE = r'(?:(–)(–)|(–)(⏑)|(⏑)(–))' ALL_METERS = { 'Catalectic Dactylic Hexameter': Meter( 'Catalectic Dactylic Hexameter', r'(–)(⏑⏑|–)(–)(⏑⏑|–)(–)(⏑⏑|–)(–)(⏑⏑|–)(–)(⏑⏑|–)(⏑|–)', conditions={ caesurae_together([('mora', 6, 'Trithemimeral'), ('mora', 14, 'Hephthemimeral')], 2), caesurae_together([('mora', 10, 'Penthemimeral')], 2), caesurae_together([('mora', 16, 'Bucolic Diaeresis')], 1), bridge(('mora', 15, 'Hermann’s Bridge'), 1) }, short_name='6da‸' conditions=[ bridge(('mora', 15, 'Hermann’s Bridge'), ReadingMeterFeatures.HEXAMETER_BRIDGE_VIOLATED) ], breaks=[ [('mora', 6, 'Trithemimeral'), ('mora', 14, 'Hephthemimeral')], [('mora', 10, 'Penthemimeral')], [('mora', 16, 'Bucolic Diaeresis')] ], short_name='6da‸', id=0 ), 'Dactylic Pentameter': Meter( 'Dactylic Pentameter', r'(–)(⏑⏑|–)(–)(⏑⏑|–)(–)(–)(⏑⏑)(–)(⏑⏑)(⏑|–)', conditions={ caesurae_together([('mora', 5, 'Middle diaresis')], 2) }, short_name='3da‸3da‸' breaks=[[('mora', 5, 'Middle diaeresis')]], short_name='3da‸3da‸', id=1 ), 'Iambic Trimeter': Meter( 'Iambic Trimeter', r'(⏑|⏑⏑|–)(⏑⏑|–)(⏑)(⏑⏑|–)(⏑|⏑⏑|–)(⏑⏑|–)(⏑)(⏑⏑|–)(⏑|⏑⏑|–)(⏑⏑|–)(⏑)(⏑|–)', conditions={ caesurae_together([('element', 4, 'After fourth element')], 1), caesurae_together([('element', 8, 'After eighth element')], 1), }, short_name='3ia' breaks=[ [('element', 4, 'After fourth element')] [('element', 8, 'After eighth element')] ], short_name='3ia', id=2 ), 'Iambic Senarius': Meter( 'Iambic Senarius', r'(⏑|⏑⏑|–)(⏑⏑|–)(⏑|⏑⏑|–)(⏑⏑|–)(⏑|⏑⏑|–)(⏑⏑|–)(⏑|⏑⏑|–)(⏑⏑|–)(⏑|⏑⏑|–)(⏑⏑|–)(⏑)(⏑|–)', short_name='6ia' short_name='6ia', id=3 ), 'Sapphic Hendecasyllable': Meter( 'Sapphic Hendecasyllable', r'(–)(–|⏑)(–)(–|⏑)(–)(⏑)(⏑)(–)(⏑)(–)(⏑|–)', conditions={}, short_name='sap hen' short_name='sap hen', id=4 ), 'Adoneus': Meter( 'Adoneus', r'(–)(⏑⏑)(–)(⏑|–)', short_name='adoneus', id=5 ), 'Phalaecian Hendecasyllable': Meter( 'Phalaecian Hendecasyllable', AEOLIC_BASE + r'(–)(⏑)(⏑)(–)(⏑)(–)(⏑)(–)(⏑|–)', conditions={ caesurae_together([('element', 6, 'After sixth element')], 1) }, short_name='hen' breaks=[[('element', 6, 'After sixth element')]], short_name='hen', id=6 ), } def get_reading_meter_combinations(readings, meters=ALL_METERS): reading_meter_rmfeatures = [ [reading, meter, {}] for reading, meter in itertools.product(readings, meters) ] for reading, meter, rmfeatures in reading_meter_rmfeatures: rmfeatures[ReadingMeterFeatures.DOES_NOT_FIT_METER] = ( meter.match_reading(reading) is None) # XXX: Implement this. rmfeatures[ReadingMeterFeatures.NECESSARY_CHANGES_TO_MAKE_IT_FIT] = 0 rmfeatures[ReadingMeterFeatures.METER] = meter.id rmfeatures[ReadingMeterFeatures.NO_USUAL_BREAK_PRESENT] = int( meter.reading_has_usual_breaks(reading)) for feature in meter.collect_condition_features(reading): rmfeatures[feature] = 1 return reading_meter_rmfeatures allzweckmesser/model.py +2 −0 Original line number Diff line number Diff line #!/usr/bin/env python3 # -*- coding: utf-8 -*- from collections import defaultdict import itertools import json import os Loading Loading @@ -360,6 +361,7 @@ class Reading: def __init__(self, tokens: List[Token] = None, phenomena: dict = None): self.tokens = tokens or list() self.phenomena = phenomena or dict() self.features = defaultdict(lambda: 0) @classmethod def from_json(cls, json_file): Loading allzweckmesser/scanner.py +3 −0 Original line number Diff line number Diff line Loading @@ -7,6 +7,7 @@ from itertools import product from .db import FormAnalysis from .model import Reading, Syllable, Token, Verse, Phenomenon from .features import ReadingFeature from .wordlist import WordList CLITICS = ['que', 'qve', 'ue', 've', 'ne'] Loading Loading @@ -468,6 +469,7 @@ def generate_synizesis(reading): syllable.syllable_length = 2 syllable.vowel_length = 2 syllable.phenomena['synizesis'] = Phenomenon(chars=syn_dict[syllable.id][3]) reading.features[ReadingFeature.SYNIZESIS] += 1 for s in token.syllables[j+2:]: s.id -= 1 Loading Loading @@ -635,6 +637,7 @@ def parse_verse(verse): (s.phenomena['positional lengthening'] .overruled_by) = 'muta cum liquida' elif blueprint[syll_id] == '2': reading.features[ReadingFeature.MCL_TRIGGERS_PL] += 1 s.syllable_length = 2 syll_id += 1 Loading features.py 0 → 100644 +28 −0 Original line number Diff line number Diff line # -*- coding: utf-8 -*- from enum import Enum class ReadingFeature(Enum): MCL_TRIGGERS_PL = 0 SYNIZESIS = 1 S_ELISION = 2 HIAT = 3 class ReadingMeterFeatures(Enum): DOES_NOT_FIT_METER = 10 NECESSARY_CHANGES_TO_MAKE_IT_FIT = 11 METER = 12 NO_USUAL_BREAK_PRESENT = 13 HEXAMETER_BRIDGE_VIOLATED = 14 class CombinedFeatures(Enum): MCL_TRIGGERS_PL = 0 SYNIZESIS = 1 S_ELISION = 2 DOES_NOT_FIT_METER = 3 NECESSARY_CHANGES_TO_MAKE_IT_FIT = 4 NO_USUAL_BREAK_PRESENT = 5 BRIDGES_VIOLATED = 6 scripts/extract_meters.py +1 −1 File changed.Contains only whitespace changes. Show changes Loading
allzweckmesser/meters.py +84 −44 Original line number Diff line number Diff line Loading @@ -3,104 +3,144 @@ import re from .features import ReadingMeterFeatures from .model import Reading, Position def caesurae_together(position_specs, reward): def get_reward(meter: Meter, reading: Reading): for spec in position_specs: position = Position.after(spec[0], reading, spec[1], meter) if not position.word_boundary: return 0 else: return reward return get_reward def bridge(position_spec, reward): def get_reward(meter: Meter, reading: Reading): def bridge(position_spec, feature): def get_feature(meter: Meter, reading: Reading): position = Position.after(position_spec[0], reading, meter, position_spec[1]) if position.word_boundary: return 0 return None else: return reward return get_reward return feature return get_feature class Meter: def __init__(self, name: str, schema: str, conditions: list = None, short_name: str = None): def __init__(self, name: str, schema: str, breaks: list = None, conditions: list = None, short_name: str = None, id: int = None): self.name = name self.schema = schema self.break_specs = breaks # Convert condition functions to instance-bound methods. self.conditions = ([cond.__get__(self) for cond in conditions] if conditions else []) self.short_name = short_name self.id = id def match_reading(self, reading: Reading): return re.match(self.schema, reading.get_schema()) def get_rewards(self, reading: Reading): return sum(cond(reading) for cond in self.conditions) def collect_condition_features(self, reading: Reading): features = [] for cond in self.conditions: feature = condition(reading) if feature: features.append(feature) return features def reading_has_usual_breaks(self, reading: Reading): if self.break_specs: for breaks in self.break_specs: satisfied = True for b in breaks: position = Position.after(b[0], reading, b[1], self) if not (hasattr(position, 'word_boundary') and position.word_boundary): satisfied = False break if satisfied: return True else: return False else: return True AEOLIC_BASE = r'(?:(–)(–)|(–)(⏑)|(⏑)(–))' ALL_METERS = { 'Catalectic Dactylic Hexameter': Meter( 'Catalectic Dactylic Hexameter', r'(–)(⏑⏑|–)(–)(⏑⏑|–)(–)(⏑⏑|–)(–)(⏑⏑|–)(–)(⏑⏑|–)(⏑|–)', conditions={ caesurae_together([('mora', 6, 'Trithemimeral'), ('mora', 14, 'Hephthemimeral')], 2), caesurae_together([('mora', 10, 'Penthemimeral')], 2), caesurae_together([('mora', 16, 'Bucolic Diaeresis')], 1), bridge(('mora', 15, 'Hermann’s Bridge'), 1) }, short_name='6da‸' conditions=[ bridge(('mora', 15, 'Hermann’s Bridge'), ReadingMeterFeatures.HEXAMETER_BRIDGE_VIOLATED) ], breaks=[ [('mora', 6, 'Trithemimeral'), ('mora', 14, 'Hephthemimeral')], [('mora', 10, 'Penthemimeral')], [('mora', 16, 'Bucolic Diaeresis')] ], short_name='6da‸', id=0 ), 'Dactylic Pentameter': Meter( 'Dactylic Pentameter', r'(–)(⏑⏑|–)(–)(⏑⏑|–)(–)(–)(⏑⏑)(–)(⏑⏑)(⏑|–)', conditions={ caesurae_together([('mora', 5, 'Middle diaresis')], 2) }, short_name='3da‸3da‸' breaks=[[('mora', 5, 'Middle diaeresis')]], short_name='3da‸3da‸', id=1 ), 'Iambic Trimeter': Meter( 'Iambic Trimeter', r'(⏑|⏑⏑|–)(⏑⏑|–)(⏑)(⏑⏑|–)(⏑|⏑⏑|–)(⏑⏑|–)(⏑)(⏑⏑|–)(⏑|⏑⏑|–)(⏑⏑|–)(⏑)(⏑|–)', conditions={ caesurae_together([('element', 4, 'After fourth element')], 1), caesurae_together([('element', 8, 'After eighth element')], 1), }, short_name='3ia' breaks=[ [('element', 4, 'After fourth element')] [('element', 8, 'After eighth element')] ], short_name='3ia', id=2 ), 'Iambic Senarius': Meter( 'Iambic Senarius', r'(⏑|⏑⏑|–)(⏑⏑|–)(⏑|⏑⏑|–)(⏑⏑|–)(⏑|⏑⏑|–)(⏑⏑|–)(⏑|⏑⏑|–)(⏑⏑|–)(⏑|⏑⏑|–)(⏑⏑|–)(⏑)(⏑|–)', short_name='6ia' short_name='6ia', id=3 ), 'Sapphic Hendecasyllable': Meter( 'Sapphic Hendecasyllable', r'(–)(–|⏑)(–)(–|⏑)(–)(⏑)(⏑)(–)(⏑)(–)(⏑|–)', conditions={}, short_name='sap hen' short_name='sap hen', id=4 ), 'Adoneus': Meter( 'Adoneus', r'(–)(⏑⏑)(–)(⏑|–)', short_name='adoneus', id=5 ), 'Phalaecian Hendecasyllable': Meter( 'Phalaecian Hendecasyllable', AEOLIC_BASE + r'(–)(⏑)(⏑)(–)(⏑)(–)(⏑)(–)(⏑|–)', conditions={ caesurae_together([('element', 6, 'After sixth element')], 1) }, short_name='hen' breaks=[[('element', 6, 'After sixth element')]], short_name='hen', id=6 ), } def get_reading_meter_combinations(readings, meters=ALL_METERS): reading_meter_rmfeatures = [ [reading, meter, {}] for reading, meter in itertools.product(readings, meters) ] for reading, meter, rmfeatures in reading_meter_rmfeatures: rmfeatures[ReadingMeterFeatures.DOES_NOT_FIT_METER] = ( meter.match_reading(reading) is None) # XXX: Implement this. rmfeatures[ReadingMeterFeatures.NECESSARY_CHANGES_TO_MAKE_IT_FIT] = 0 rmfeatures[ReadingMeterFeatures.METER] = meter.id rmfeatures[ReadingMeterFeatures.NO_USUAL_BREAK_PRESENT] = int( meter.reading_has_usual_breaks(reading)) for feature in meter.collect_condition_features(reading): rmfeatures[feature] = 1 return reading_meter_rmfeatures
allzweckmesser/model.py +2 −0 Original line number Diff line number Diff line #!/usr/bin/env python3 # -*- coding: utf-8 -*- from collections import defaultdict import itertools import json import os Loading Loading @@ -360,6 +361,7 @@ class Reading: def __init__(self, tokens: List[Token] = None, phenomena: dict = None): self.tokens = tokens or list() self.phenomena = phenomena or dict() self.features = defaultdict(lambda: 0) @classmethod def from_json(cls, json_file): Loading
allzweckmesser/scanner.py +3 −0 Original line number Diff line number Diff line Loading @@ -7,6 +7,7 @@ from itertools import product from .db import FormAnalysis from .model import Reading, Syllable, Token, Verse, Phenomenon from .features import ReadingFeature from .wordlist import WordList CLITICS = ['que', 'qve', 'ue', 've', 'ne'] Loading Loading @@ -468,6 +469,7 @@ def generate_synizesis(reading): syllable.syllable_length = 2 syllable.vowel_length = 2 syllable.phenomena['synizesis'] = Phenomenon(chars=syn_dict[syllable.id][3]) reading.features[ReadingFeature.SYNIZESIS] += 1 for s in token.syllables[j+2:]: s.id -= 1 Loading Loading @@ -635,6 +637,7 @@ def parse_verse(verse): (s.phenomena['positional lengthening'] .overruled_by) = 'muta cum liquida' elif blueprint[syll_id] == '2': reading.features[ReadingFeature.MCL_TRIGGERS_PL] += 1 s.syllable_length = 2 syll_id += 1 Loading
features.py 0 → 100644 +28 −0 Original line number Diff line number Diff line # -*- coding: utf-8 -*- from enum import Enum class ReadingFeature(Enum): MCL_TRIGGERS_PL = 0 SYNIZESIS = 1 S_ELISION = 2 HIAT = 3 class ReadingMeterFeatures(Enum): DOES_NOT_FIT_METER = 10 NECESSARY_CHANGES_TO_MAKE_IT_FIT = 11 METER = 12 NO_USUAL_BREAK_PRESENT = 13 HEXAMETER_BRIDGE_VIOLATED = 14 class CombinedFeatures(Enum): MCL_TRIGGERS_PL = 0 SYNIZESIS = 1 S_ELISION = 2 DOES_NOT_FIT_METER = 3 NECESSARY_CHANGES_TO_MAKE_IT_FIT = 4 NO_USUAL_BREAK_PRESENT = 5 BRIDGES_VIOLATED = 6