Skip to content
Snippets Groups Projects
dev.py 2.96 KiB
Newer Older
#!/usr/bin/python3
# -*- coding: utf-8 -*-

import argparse
import json
import random
import traceback
from typing import List

from unidecode import unidecode

from .style import mark_correct
from .model import Verse
from .scanner import Scanner


def dev(reference_verses, number=10, randomize=False) -> List[Verse]:
    """Scan verses and compare them with their correct reference version."""
    scanner = Scanner()
    all_analyses = []
    correct = 0

    if randomize:
        sample = random.sample(reference_verses, number)
    else:
        sample = reference_verses[:number]

    for ref in sample:
        ref_reading = ref.readings[0]

        try:
            analysis = scanner.scan_verses([unidecode(ref.text)])[0]
        except:
            errors += 1
            print('ERROR at verse {}'.format(ref.text))
            traceback.print_exc()
        all_analyses.append(analysis)

        correct_schema = ref_reading.get_schema()
        analysis_correctnesses = [r.get_schema() == correct_schema
                                  for r in analysis.readings]
        this_correct = any(analysis_correctnesses)

        if this_correct:
            correct += 1
            print('{ref} ({n} readings)'
                  .format(ref=mark_correct(ref_reading),
                          n=len(analysis.readings)))
        else:
            print('{ref} ({n} readings)'
                  .format(ref=ref_reading, n=len(analysis.readings)))
            for reading in analysis.readings:
                print('    {}'.format(reading.format_differences(ref_reading)))

    print('Correct: {}/{} ({:.2f})\n{} program errors'
          .format(correct, len(sample),
                  correct / len(sample),
                  errors))
    return all_analyses


def parse_args() -> argparse.Namespace:
    """Parse arguments from the commandline.

    :return: An argparse Namespace holding the arguments.
    """
    d = 'Identify errors in verse parsing.'
    parser = argparse.ArgumentParser(prog='allzweckmesser', description=d)
    parser.add_argument('infile', help=('A JSON file containing verses'
                                        ' with one reading each.'))
    parser.add_argument('--number', '-n', default=10, type=int,
                        help='Number of verses to analyze')
    parser.add_argument('--randomize', '-r', default=False,
                        action='store_true',
                        help=('Randomize what verses are analyzed. If this is'
                              ' not set, the first {number} verses are'
                              ' analyzed.'))
    args = parser.parse_args()
    return args


def main():
    """Parse CLI arguments then read and scan verses."""
    args = vars(parse_args())
    args['reference_verses'] = [Verse.from_json(verse)
                                for verse
                                in json.load(open(args['infile']))]
    del args['infile']
    verse_analyses = dev(**args)


if __name__ == '__main__':
    main()