Commit 2cd8838e authored by Simon Will's avatar Simon Will
Browse files

“Finishing” touches

parent 96acda2e
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
from . import config, corpus, db, meters, model, scan, scanner, wordlist
from . import (config, corpus, db, features, meters, model, scan, scanner,
               wordlist)
+3 −1
Original line number Diff line number Diff line
@@ -358,10 +358,12 @@ class Token:

class Reading:

    def __init__(self, tokens: List[Token] = None, phenomena: dict = None):
    def __init__(self, tokens: List[Token] = None, phenomena: dict = None,
                 meter=None):
        self.tokens = tokens or list()
        self.phenomena = phenomena or dict()
        self.features = defaultdict(lambda: 0)
        self.meter = meter

    @classmethod
    def from_json(cls, json_file):
+22 −1
Original line number Diff line number Diff line
@@ -3,9 +3,12 @@

import argparse
import sys
from sklearn import joblib
from typing import List

from .meters import ALL_METERS
from .config import RANKING_MODEL_PATH
from .features import combine_features
from .meters import ALL_METERS, get_reading_meter_combinations
from .model import Verse
from .scanner import Scanner

@@ -14,6 +17,24 @@ def scan(plain_verses: List[str], meters=ALL_METERS, **options) -> List[Verse]:
    """Scan Latin verses."""
    scanner = Scanner()
    scanned_verses = scanner.scan_verses(plain_verses)
    model = joblib.load(RANKING_MODEL_PATH)
    for verse in scanned_verses:
        reading_meter_combinations = (
            get_reading_meter_combinations(
                verse.readings, meters
            )
        )
        vectors = []
        for reading, meter, rmfeatures in reading_meter_combinations:
            reading.meter = meter
            vectors.append(combine_features(reading.features, rmfeatures))
        probs = model.predict_proba(vectors)
        sorted_probs = sorted(
            [(probs[i], reading) for i in range(len(probs))],
            key=lambda x: x[0][0]
        )
        first_one = sorted_probs[:1]
        verse.readings = [prob_reading[1] for prob_reading in first_one]
    return scanned_verses


Loading