From e97c777b789d11046143ee156a70b8d9481b2246 Mon Sep 17 00:00:00 2001 From: vvye <ekaiser.hellwege@gmail.com> Date: Fri, 24 Sep 2021 14:31:38 +0200 Subject: [PATCH] Remove debug logic --- dataset.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/dataset.py b/dataset.py index d53ca44..a61d63d 100644 --- a/dataset.py +++ b/dataset.py @@ -128,8 +128,6 @@ def get_crisis_dataset(): date_path = topic_path / 'public' / 'content' / pub_date for article_filename in util.files(date_path, extension='.cont'): article_file_path = date_path / article_filename - if '1093.htm.cont' not in str(article_file_path): - continue print(article_file_path) article = {'pub_date': pub_date, 'sentences': []} @@ -144,8 +142,8 @@ def get_crisis_dataset(): # get date mentions using HeidelTime # and add them to the sentence data mentioned_dates_by_sentence = heideltime_util.mentioned_dates_by_sentence(article_file_path, pub_date) - mentioned_dates_by_sentence = mentioned_dates_by_sentence[1:] - assert len(mentioned_dates_by_sentence) == len(sentences_in_article) # skip first line (headline) + mentioned_dates_by_sentence = mentioned_dates_by_sentence[1:] # skip first line (headline) + assert len(mentioned_dates_by_sentence) == len(sentences_in_article) for i in range(len(sentences_in_article)): sentence = sentences_in_article[i] sentence['mentioned_dates'] = mentioned_dates_by_sentence[i] -- GitLab