Skip to content
Snippets Groups Projects
Commit 470ef76f authored by Simon Will's avatar Simon Will
Browse files

Add script for extracting meters from Hypotactic

parent c3aab459
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import argparse
from collections import defaultdict
import json
import allzweckmesser as azm
def main(hypotactic_dir, outfile):
corpus = azm.corpus.HypotacticCorpus.from_directory(hypotactic_dir)
poem_meters = defaultdict(lambda: [set(), 0])
line_meters = defaultdict(lambda: [set(), 0])
for document in corpus.documents:
print('Processing {}'.format(document.title))
has_poems = False
for poem in document.get_poems():
has_poems = True
meters = [c for c in poem.attrs['class'] if c != 'poem']
for meter in meters:
poem_meters[meter][0].add(document.title)
poem_meters[meter][1] += 1
line_meters[meter][0].add(document.title)
line_meters[meter][1] += len(poem.find_all(name='div',
class_='line'))
if not has_poems:
for line in document.get_lines():
meters = [c for c in line.attrs['class'] if c != 'line']
for meter in meters:
line_meters[meter][0].add(document.title)
line_meters[meter][1] += 1
print('Meters: {}'
.format(set(poem_meters.keys()).union(line_meters.keys())))
for pair in poem_meters.values():
pair[0] = list(pair[0])
for pair in line_meters.values():
pair[0] = list(pair[0])
with open(outfile, 'w') as f:
obj = {'poem_meters': poem_meters,
'line_meters': line_meters}
json.dump(obj, f, indent=2)
def parse_args_and_main():
d = 'Extract occurring meters from a Hypotactic corpus'
parser = argparse.ArgumentParser(description=d)
parser.add_argument('hypotactic_dir',
help='Top level directory of the Hypotactic corpus')
parser.add_argument('outfile', help='File to save the lines in')
args = parser.parse_args()
main(**vars(args))
if __name__ == '__main__':
parse_args_and_main()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment