Loading allzweckmesser/__init__.py +2 −1 Original line number Diff line number Diff line from . import config, corpus, db, meters, model, scan, scanner, wordlist from . import (config, corpus, db, features, meters, model, scan, scanner, wordlist) allzweckmesser/model.py +3 −1 Original line number Diff line number Diff line Loading @@ -358,10 +358,12 @@ class Token: class Reading: def __init__(self, tokens: List[Token] = None, phenomena: dict = None): def __init__(self, tokens: List[Token] = None, phenomena: dict = None, meter=None): self.tokens = tokens or list() self.phenomena = phenomena or dict() self.features = defaultdict(lambda: 0) self.meter = meter @classmethod def from_json(cls, json_file): Loading allzweckmesser/scan.py +22 −1 Original line number Diff line number Diff line Loading @@ -3,9 +3,12 @@ import argparse import sys from sklearn import joblib from typing import List from .meters import ALL_METERS from .config import RANKING_MODEL_PATH from .features import combine_features from .meters import ALL_METERS, get_reading_meter_combinations from .model import Verse from .scanner import Scanner Loading @@ -14,6 +17,24 @@ def scan(plain_verses: List[str], meters=ALL_METERS, **options) -> List[Verse]: """Scan Latin verses.""" scanner = Scanner() scanned_verses = scanner.scan_verses(plain_verses) model = joblib.load(RANKING_MODEL_PATH) for verse in scanned_verses: reading_meter_combinations = ( get_reading_meter_combinations( verse.readings, meters ) ) vectors = [] for reading, meter, rmfeatures in reading_meter_combinations: reading.meter = meter vectors.append(combine_features(reading.features, rmfeatures)) probs = model.predict_proba(vectors) sorted_probs = sorted( [(probs[i], reading) for i in range(len(probs))], key=lambda x: x[0][0] ) first_one = sorted_probs[:1] verse.readings = [prob_reading[1] for prob_reading in first_one] return scanned_verses Loading scripts/forest_classifier.joblib→models/forest_classifier.joblib (37.2 KiB) File moved. View file scripts/svm_classifier.joblib→models/svm_classifier.joblib (14.8 KiB) File moved. View file Loading
allzweckmesser/__init__.py +2 −1 Original line number Diff line number Diff line from . import config, corpus, db, meters, model, scan, scanner, wordlist from . import (config, corpus, db, features, meters, model, scan, scanner, wordlist)
allzweckmesser/model.py +3 −1 Original line number Diff line number Diff line Loading @@ -358,10 +358,12 @@ class Token: class Reading: def __init__(self, tokens: List[Token] = None, phenomena: dict = None): def __init__(self, tokens: List[Token] = None, phenomena: dict = None, meter=None): self.tokens = tokens or list() self.phenomena = phenomena or dict() self.features = defaultdict(lambda: 0) self.meter = meter @classmethod def from_json(cls, json_file): Loading
allzweckmesser/scan.py +22 −1 Original line number Diff line number Diff line Loading @@ -3,9 +3,12 @@ import argparse import sys from sklearn import joblib from typing import List from .meters import ALL_METERS from .config import RANKING_MODEL_PATH from .features import combine_features from .meters import ALL_METERS, get_reading_meter_combinations from .model import Verse from .scanner import Scanner Loading @@ -14,6 +17,24 @@ def scan(plain_verses: List[str], meters=ALL_METERS, **options) -> List[Verse]: """Scan Latin verses.""" scanner = Scanner() scanned_verses = scanner.scan_verses(plain_verses) model = joblib.load(RANKING_MODEL_PATH) for verse in scanned_verses: reading_meter_combinations = ( get_reading_meter_combinations( verse.readings, meters ) ) vectors = [] for reading, meter, rmfeatures in reading_meter_combinations: reading.meter = meter vectors.append(combine_features(reading.features, rmfeatures)) probs = model.predict_proba(vectors) sorted_probs = sorted( [(probs[i], reading) for i in range(len(probs))], key=lambda x: x[0][0] ) first_one = sorted_probs[:1] verse.readings = [prob_reading[1] for prob_reading in first_one] return scanned_verses Loading