Commit 2cd8838e authored by Simon Will's avatar Simon Will
Browse files

“Finishing” touches

parent 96acda2e
from . import config, corpus, db, meters, model, scan, scanner, wordlist
from . import (config, corpus, db, features, meters, model, scan, scanner,
wordlist)
......@@ -358,10 +358,12 @@ class Token:
class Reading:
def __init__(self, tokens: List[Token] = None, phenomena: dict = None):
def __init__(self, tokens: List[Token] = None, phenomena: dict = None,
meter=None):
self.tokens = tokens or list()
self.phenomena = phenomena or dict()
self.features = defaultdict(lambda: 0)
self.meter = meter
@classmethod
def from_json(cls, json_file):
......
......@@ -3,9 +3,12 @@
import argparse
import sys
from sklearn import joblib
from typing import List
from .meters import ALL_METERS
from .config import RANKING_MODEL_PATH
from .features import combine_features
from .meters import ALL_METERS, get_reading_meter_combinations
from .model import Verse
from .scanner import Scanner
......@@ -14,6 +17,24 @@ def scan(plain_verses: List[str], meters=ALL_METERS, **options) -> List[Verse]:
"""Scan Latin verses."""
scanner = Scanner()
scanned_verses = scanner.scan_verses(plain_verses)
model = joblib.load(RANKING_MODEL_PATH)
for verse in scanned_verses:
reading_meter_combinations = (
get_reading_meter_combinations(
verse.readings, meters
)
)
vectors = []
for reading, meter, rmfeatures in reading_meter_combinations:
reading.meter = meter
vectors.append(combine_features(reading.features, rmfeatures))
probs = model.predict_proba(vectors)
sorted_probs = sorted(
[(probs[i], reading) for i in range(len(probs))],
key=lambda x: x[0][0]
)
first_one = sorted_probs[:1]
verse.readings = [prob_reading[1] for prob_reading in first_one]
return scanned_verses
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment