From edf799ed0b9b1a4858b0f841d08145b273945643 Mon Sep 17 00:00:00 2001
From: Simon Will <will@cl.uni-heidelberg.de>
Date: Wed, 26 Sep 2018 20:18:39 +0200
Subject: [PATCH] Add script for meter-based Hypotactic extraction

---
 scripts/extract_verses_by_meters.py | 41 +++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)
 create mode 100644 scripts/extract_verses_by_meters.py

diff --git a/scripts/extract_verses_by_meters.py b/scripts/extract_verses_by_meters.py
new file mode 100644
index 0000000..a87a267
--- /dev/null
+++ b/scripts/extract_verses_by_meters.py
@@ -0,0 +1,41 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+
+import argparse
+import json
+import os
+
+import allzweckmesser as azm
+
+
+def main(hypotactic_dir, top_out_dir, meters=['hexameter']):
+    corpus = azm.corpus.HypotacticCorpus.from_directory(hypotactic_dir)
+    for document in corpus.documents:
+        doc_out_dir = os.path.join(top_out_dir, document.title)
+        os.makedirs(doc_out_dir, exist_ok=True)
+        for meter in meters:
+            verses_for_meter = [
+                azm.corpus.HypotacticLine(line).verse.to_dict()
+                for line in document.get_lines_with_meter([meter])
+            ]
+            if verses_for_meter:
+                with open(os.path.join(doc_out_dir, '{}.json'.format(meter)),
+                        'w') as f:
+                    json.dump(verses_for_meter, f)
+
+
+def parse_args_and_main():
+    d = 'Extract lines from a Hypotactic corpus'
+    parser = argparse.ArgumentParser(description=d)
+    parser.add_argument('--meters', '-m', nargs='+',
+                        help='Meters of the lines to extract')
+    parser.add_argument('hypotactic_dir',
+                        help='Top level directory of the Hypotactic corpus')
+    parser.add_argument('top_out_dir',
+                        help='Top level directory of the created JSON files')
+    args = parser.parse_args()
+    main(**vars(args))
+
+
+if __name__ == '__main__':
+    parse_args_and_main()
-- 
GitLab