Skip to content
Snippets Groups Projects
Commit fa40d35e authored by vvye's avatar vvye
Browse files

what

parent 51642aa1
No related branches found
No related tags found
No related merge requests found
......@@ -128,8 +128,11 @@ def get_crisis_dataset():
date_path = topic_path / 'public' / 'content' / pub_date
for article_filename in util.files(date_path, extension='.cont'):
article_file_path = date_path / article_filename
if '2429.htm.cont' not in str(article_file_path):
# nah
if '2429.htm.cont' in str(article_file_path):
continue
print(article_file_path)
article = {'pub_date': pub_date, 'sentences': []}
......
......@@ -17,7 +17,7 @@ def mentioned_dates_by_sentence(filename, pub_date):
# create a temporary copy of the file with interfering characters escaped
escaped_filename = str(filename) + '.escaped'
with util.detect_encoding_and_open(filename) as f, open(escaped_filename, 'w', encoding='utf-8') as g:
for line in f:
for line in f.readlines():
g.write(escape(line))
# change to heideltime directory (and keep track of the path back to the root)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment