Skip to content
Snippets Groups Projects
Commit e63996a5 authored by vvye's avatar vvye
Browse files

Fix the same bug again but for real this time

parent 45061591
No related branches found
No related tags found
No related merge requests found
......@@ -128,6 +128,8 @@ def get_crisis_dataset():
date_path = topic_path / 'public' / 'content' / pub_date
for article_filename in util.files(date_path, extension='.cont'):
article_file_path = date_path / article_filename
if '1093.htm.cont' not in str(article_file_path):
continue
print(article_file_path)
article = {'pub_date': pub_date, 'sentences': []}
......
......@@ -49,9 +49,9 @@ def mentioned_dates_by_sentence(filename, pub_date):
match = date_format_regex.match(child.attrib['value'])
if match is not None:
dates_for_this_sentence.append(match[0])
dates_by_sentences.append(dates_for_this_sentence)
except ET.ParseError:
continue
pass
dates_by_sentences.append(dates_for_this_sentence)
# change directory back
os.chdir(working_dir)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment