From edf799ed0b9b1a4858b0f841d08145b273945643 Mon Sep 17 00:00:00 2001 From: Simon Will <will@cl.uni-heidelberg.de> Date: Wed, 26 Sep 2018 20:18:39 +0200 Subject: [PATCH] Add script for meter-based Hypotactic extraction --- scripts/extract_verses_by_meters.py | 41 +++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 scripts/extract_verses_by_meters.py diff --git a/scripts/extract_verses_by_meters.py b/scripts/extract_verses_by_meters.py new file mode 100644 index 0000000..a87a267 --- /dev/null +++ b/scripts/extract_verses_by_meters.py @@ -0,0 +1,41 @@ +#!/usr/bin/python3 +# -*- coding: utf-8 -*- + +import argparse +import json +import os + +import allzweckmesser as azm + + +def main(hypotactic_dir, top_out_dir, meters=['hexameter']): + corpus = azm.corpus.HypotacticCorpus.from_directory(hypotactic_dir) + for document in corpus.documents: + doc_out_dir = os.path.join(top_out_dir, document.title) + os.makedirs(doc_out_dir, exist_ok=True) + for meter in meters: + verses_for_meter = [ + azm.corpus.HypotacticLine(line).verse.to_dict() + for line in document.get_lines_with_meter([meter]) + ] + if verses_for_meter: + with open(os.path.join(doc_out_dir, '{}.json'.format(meter)), + 'w') as f: + json.dump(verses_for_meter, f) + + +def parse_args_and_main(): + d = 'Extract lines from a Hypotactic corpus' + parser = argparse.ArgumentParser(description=d) + parser.add_argument('--meters', '-m', nargs='+', + help='Meters of the lines to extract') + parser.add_argument('hypotactic_dir', + help='Top level directory of the Hypotactic corpus') + parser.add_argument('top_out_dir', + help='Top level directory of the created JSON files') + args = parser.parse_args() + main(**vars(args)) + + +if __name__ == '__main__': + parse_args_and_main() -- GitLab