Commit 33192679 authored by Simon Will's avatar Simon Will
Browse files

Implement features and overhaul meters

parent 42c41827
Loading
Loading
Loading
Loading
+84 −44
Original line number Diff line number Diff line
@@ -3,104 +3,144 @@

import re

from .features import ReadingMeterFeatures
from .model import Reading, Position


def caesurae_together(position_specs, reward):
    def get_reward(meter: Meter, reading: Reading):
        for spec in position_specs:
            position = Position.after(spec[0], reading, spec[1], meter)
            if not position.word_boundary:
                return 0
        else:
            return reward
    return get_reward


def bridge(position_spec, reward):
    def get_reward(meter: Meter, reading: Reading):
def bridge(position_spec, feature):
    def get_feature(meter: Meter, reading: Reading):
        position = Position.after(position_spec[0], reading, meter,
                                  position_spec[1])
        if position.word_boundary:
            return 0
            return None
        else:
            return reward
    return get_reward
            return feature
    return get_feature


class Meter:

    def __init__(self, name: str, schema: str, conditions: list = None,
                 short_name: str = None):
    def __init__(self, name: str, schema: str, breaks: list = None,
                 conditions: list = None, short_name: str = None,
                 id: int = None):
        self.name = name
        self.schema = schema
        self.break_specs = breaks
        # Convert condition functions to instance-bound methods.
        self.conditions = ([cond.__get__(self) for cond in conditions]
                           if conditions else [])
        self.short_name = short_name
        self.id = id

    def match_reading(self, reading: Reading):
        return re.match(self.schema, reading.get_schema())

    def get_rewards(self, reading: Reading):
        return sum(cond(reading) for cond in self.conditions)
    def collect_condition_features(self, reading: Reading):
        features = []
        for cond in self.conditions:
            feature = condition(reading)
            if feature:
                features.append(feature)
        return features

    def reading_has_usual_breaks(self, reading: Reading):
        if self.break_specs:
            for breaks in self.break_specs:
                satisfied = True
                for b in breaks:
                    position = Position.after(b[0], reading, b[1], self)
                    if not (hasattr(position, 'word_boundary')
                            and position.word_boundary):
                        satisfied = False
                        break
                if satisfied:
                    return True
            else:
                return False
        else:
            return True


AEOLIC_BASE = r'(?:(–)(–)|(–)(⏑)|(⏑)(–))'

ALL_METERS = {

    'Catalectic Dactylic Hexameter': Meter(
        'Catalectic Dactylic Hexameter',
        r'(–)(⏑⏑|–)(–)(⏑⏑|–)(–)(⏑⏑|–)(–)(⏑⏑|–)(–)(⏑⏑|–)(⏑|–)',
        conditions={
            caesurae_together([('mora', 6, 'Trithemimeral'),
                               ('mora', 14, 'Hephthemimeral')], 2),
            caesurae_together([('mora', 10, 'Penthemimeral')], 2),
            caesurae_together([('mora', 16, 'Bucolic Diaeresis')], 1),
            bridge(('mora', 15, 'Hermann’s Bridge'), 1)
        },
        short_name='6da‸'
        conditions=[
            bridge(('mora', 15, 'Hermann’s Bridge'),
                   ReadingMeterFeatures.HEXAMETER_BRIDGE_VIOLATED)
        ],
        breaks=[
            [('mora', 6, 'Trithemimeral'), ('mora', 14, 'Hephthemimeral')],
            [('mora', 10, 'Penthemimeral')],
            [('mora', 16, 'Bucolic Diaeresis')]
        ],
        short_name='6da‸',
        id=0
    ),
    'Dactylic Pentameter': Meter(
        'Dactylic Pentameter',
        r'(–)(⏑⏑|–)(–)(⏑⏑|–)(–)(–)(⏑⏑)(–)(⏑⏑)(⏑|–)',
        conditions={
            caesurae_together([('mora', 5, 'Middle diaresis')], 2)
        },
        short_name='3da‸3da‸'
        breaks=[[('mora', 5, 'Middle diaeresis')]],
        short_name='3da‸3da‸',
        id=1
    ),
    'Iambic Trimeter': Meter(
        'Iambic Trimeter',
        r'(⏑|⏑⏑|–)(⏑⏑|–)(⏑)(⏑⏑|–)(⏑|⏑⏑|–)(⏑⏑|–)(⏑)(⏑⏑|–)(⏑|⏑⏑|–)(⏑⏑|–)(⏑)(⏑|–)',
        conditions={
            caesurae_together([('element', 4, 'After fourth element')], 1),
            caesurae_together([('element', 8, 'After eighth element')], 1),
        },
        short_name='3ia'
        breaks=[
            [('element', 4, 'After fourth element')]
            [('element', 8, 'After eighth element')]
        ],
        short_name='3ia',
        id=2
    ),
    'Iambic Senarius': Meter(
        'Iambic Senarius',
        r'(⏑|⏑⏑|–)(⏑⏑|–)(⏑|⏑⏑|–)(⏑⏑|–)(⏑|⏑⏑|–)(⏑⏑|–)(⏑|⏑⏑|–)(⏑⏑|–)(⏑|⏑⏑|–)(⏑⏑|–)(⏑)(⏑|–)',
        short_name='6ia'
        short_name='6ia',
        id=3
    ),
    'Sapphic Hendecasyllable': Meter(
        'Sapphic Hendecasyllable',
        r'(–)(–|⏑)(–)(–|⏑)(–)(⏑)(⏑)(–)(⏑)(–)(⏑|–)',
        conditions={},
        short_name='sap hen'
        short_name='sap hen',
        id=4
    ),
    'Adoneus': Meter(
        'Adoneus',
        r'(–)(⏑⏑)(–)(⏑|–)',
        short_name='adoneus',
        id=5
    ),
    'Phalaecian Hendecasyllable': Meter(
        'Phalaecian Hendecasyllable',
        AEOLIC_BASE + r'(–)(⏑)(⏑)(–)(⏑)(–)(⏑)(–)(⏑|–)',
        conditions={
            caesurae_together([('element', 6, 'After sixth element')], 1)
        },
        short_name='hen'
        breaks=[[('element', 6, 'After sixth element')]],
        short_name='hen',
        id=6
    ),
}


def get_reading_meter_combinations(readings, meters=ALL_METERS):
    reading_meter_rmfeatures = [
        [reading, meter, {}]
        for reading, meter
        in itertools.product(readings, meters)
    ]
    for reading, meter, rmfeatures in reading_meter_rmfeatures: 
        rmfeatures[ReadingMeterFeatures.DOES_NOT_FIT_METER] = (
            meter.match_reading(reading) is None)

        # XXX: Implement this.
        rmfeatures[ReadingMeterFeatures.NECESSARY_CHANGES_TO_MAKE_IT_FIT] = 0

        rmfeatures[ReadingMeterFeatures.METER] = meter.id
        rmfeatures[ReadingMeterFeatures.NO_USUAL_BREAK_PRESENT] = int(
            meter.reading_has_usual_breaks(reading))
        for feature in meter.collect_condition_features(reading):
            rmfeatures[feature] = 1
    return reading_meter_rmfeatures
+2 −0
Original line number Diff line number Diff line
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

from collections import defaultdict
import itertools
import json
import os
@@ -360,6 +361,7 @@ class Reading:
    def __init__(self, tokens: List[Token] = None, phenomena: dict = None):
        self.tokens = tokens or list()
        self.phenomena = phenomena or dict()
        self.features = defaultdict(lambda: 0)

    @classmethod
    def from_json(cls, json_file):
+3 −0
Original line number Diff line number Diff line
@@ -7,6 +7,7 @@ from itertools import product

from .db import FormAnalysis
from .model import Reading, Syllable, Token, Verse, Phenomenon
from .features import ReadingFeature
from .wordlist import WordList

CLITICS = ['que', 'qve', 'ue', 've', 'ne']
@@ -468,6 +469,7 @@ def generate_synizesis(reading):
                    syllable.syllable_length = 2
                    syllable.vowel_length = 2
                    syllable.phenomena['synizesis'] = Phenomenon(chars=syn_dict[syllable.id][3])
                    reading.features[ReadingFeature.SYNIZESIS] += 1
                    for s in token.syllables[j+2:]:
                        s.id -= 1
                        
@@ -635,6 +637,7 @@ def parse_verse(verse):
                            (s.phenomena['positional lengthening']
                             .overruled_by) = 'muta cum liquida'
                    elif blueprint[syll_id] == '2':
                        reading.features[ReadingFeature.MCL_TRIGGERS_PL] += 1
                        s.syllable_length = 2
                    syll_id += 1

features.py

0 → 100644
+28 −0
Original line number Diff line number Diff line
# -*- coding: utf-8 -*-

from enum import Enum


class ReadingFeature(Enum):
    MCL_TRIGGERS_PL = 0
    SYNIZESIS = 1
    S_ELISION = 2
    HIAT = 3


class ReadingMeterFeatures(Enum):
    DOES_NOT_FIT_METER = 10
    NECESSARY_CHANGES_TO_MAKE_IT_FIT = 11
    METER = 12
    NO_USUAL_BREAK_PRESENT = 13
    HEXAMETER_BRIDGE_VIOLATED = 14


class CombinedFeatures(Enum):
    MCL_TRIGGERS_PL = 0
    SYNIZESIS = 1
    S_ELISION = 2
    DOES_NOT_FIT_METER = 3
    NECESSARY_CHANGES_TO_MAKE_IT_FIT = 4
    NO_USUAL_BREAK_PRESENT = 5
    BRIDGES_VIOLATED = 6
+1 −1

File changed.

Contains only whitespace changes.