Newer
Older
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import argparse
import json
import sys
import traceback
from unidecode import unidecode
import allzweckmesser as azm
def main(meter_reference_verses, outfile, meters=['hexameter']):
meters = [
azm.meters.ALL_METERS[meter]
for meter in meters
if meter in azm.meters.ALL_METERS
]
total_instances = len(meter_reference_verses)
for i, (ref_meter, ref_verse, correct) in enumerate(meter_reference_verses, 1):
print('Processing verse {} ({}/{})'
.format(ref_verse.text, i, total_instances))
instances = []
ref_reading = ref_verse.readings[0]
ref_schema = ref_reading.get_schema()
try:
analysis = scanner.scan_verses([unidecode(ref_verse.text)])[0]
except Exception:
print('ERROR when scanning verse {!r}'.format(ref_verse),
file=sys.stderr)
traceback.print_exc()
reading_meter_combinations = (
azm.meters.get_reading_meter_combinations(
analysis.readings, meters
)
)
for reading, meter, rmfeatures in reading_meter_combinations:
features = azm.features.combine_features(
reading.features, rmfeatures)
# A feature vector gets a correct label if the schema matches
# the reference reading’s schema and the meter matches the
# reference meter.
reading_is_correct = int(
meter.short_name == ref_meter.short_name
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
and reading.get_schema() == ref_schema
)
instances.append((features, reading_is_correct))
out.append((ref_verse.text, ref_meter, instances))
with open(outfile, 'w') as f:
json.dump(out, f, indent=2)
def read_infile(infile):
meter_reference_verses = []
with open(infile) as f:
for meter, verse_dict, correct in json.load(f):
verse = azm.model.Verse.from_json(verse_dict)
meter_reference_verses.append((meter, verse, correct))
return meter_reference_verses
def parse_args_and_main():
d = 'Generate feature vectors for reading-meter combinations'
parser = argparse.ArgumentParser(description=d)
parser.add_argument('--meters', '-m', nargs='+',
help='Meters to consider when scanning.')
parser.add_argument('infile',
help='JSON file containing the reference verses')
parser.add_argument('outfile',
help='JSON file for the output')
args = parser.parse_args()
args = vars(args)
args['meter_reference_verses'] = read_infile(args['infile'])
del args['infile']
main(**args)
if __name__ == '__main__':
parse_args_and_main()