#!/usr/bin/python3 # -*- coding: utf-8 -*- import argparse import json import sys import traceback from unidecode import unidecode import allzweckmesser as azm def main(meter_reference_verses, outfile, meters=['hexameter']): meters = [ azm.meters.ALL_METERS[meter] for meter in meters if meter in azm.meters.ALL_METERS ] scanner = azm.scanner.Scanner() out = [] total_instances = len(meter_reference_verses) for i, (ref_meter, ref_verse, correct) in enumerate(meter_reference_verses, 1): if correct: print('Processing verse {} ({}/{})' .format(ref_verse.text, i, total_instances)) instances = [] ref_reading = ref_verse.readings[0] ref_schema = ref_reading.get_schema() try: analysis = scanner.scan_verses([unidecode(ref_verse.text)])[0] except Exception: print('ERROR when scanning verse {!r}'.format(ref_verse), file=sys.stderr) traceback.print_exc() continue reading_meter_combinations = ( azm.meters.get_reading_meter_combinations( analysis.readings, meters ) ) for reading, meter, rmfeatures in reading_meter_combinations: features = azm.features.combine_features( reading.features, rmfeatures) # A feature vector gets a correct label if the schema matches # the reference reading’s schema and the meter matches the # reference meter. reading_is_correct = int( meter.short_name == ref_meter and reading.get_schema() == ref_schema ) instances.append((features, reading_is_correct)) out.append((ref_verse.text, ref_meter, instances)) with open(outfile, 'w') as f: json.dump(out, f, indent=2) def read_infile(infile): meter_reference_verses = [] with open(infile) as f: for meter, verse_dict, correct in json.load(f): verse = azm.model.Verse.from_json(verse_dict) meter_reference_verses.append((meter, verse, correct)) return meter_reference_verses def parse_args_and_main(): d = 'Generate feature vectors for reading-meter combinations' parser = argparse.ArgumentParser(description=d) parser.add_argument('--meters', '-m', nargs='+', help='Meters to consider when scanning.') parser.add_argument('infile', help='JSON file containing the reference verses') parser.add_argument('outfile', help='JSON file for the output') args = parser.parse_args() args = vars(args) args['meter_reference_verses'] = read_infile(args['infile']) del args['infile'] main(**args) if __name__ == '__main__': parse_args_and_main()