diff --git a/heideltime_util.py b/heideltime_util.py index 0638d413c423c28806f99a507851bf8dba5d7d1d..0c4bab3aa13885a645a0cc203a5a47df2b137003 100644 --- a/heideltime_util.py +++ b/heideltime_util.py @@ -42,13 +42,16 @@ def mentioned_dates_by_sentence(filename, pub_date): if not sentence.strip(): continue dates_for_this_sentence = [] - root = ET.fromstring('<root>' + sentence + '</root>') - for child in root: - if child.tag == 'TIMEX3' and child.attrib['type'] in ['DATE', 'TIME']: - match = date_format_regex.match(child.attrib['value']) - if match is not None: - dates_for_this_sentence.append(match[0]) - dates_by_sentences.append(dates_for_this_sentence) + try: + root = ET.fromstring('<root>' + sentence + '</root>') + for child in root: + if child.tag == 'TIMEX3' and child.attrib['type'] in ['DATE', 'TIME']: + match = date_format_regex.match(child.attrib['value']) + if match is not None: + dates_for_this_sentence.append(match[0]) + dates_by_sentences.append(dates_for_this_sentence) + except ET.ParseError: + continue # change directory back os.chdir(working_dir)