From df1cc4c84c37e5ff69f0d179fb2affb304eae515 Mon Sep 17 00:00:00 2001 From: vvye <ekaiser.hellwege@gmail.com> Date: Sun, 12 Sep 2021 14:29:48 +0200 Subject: [PATCH] Remove keyword function --- sentence_selection.py | 3 --- summarization.py | 2 +- util.py | 10 ---------- 3 files changed, 1 insertion(+), 14 deletions(-) diff --git a/sentence_selection.py b/sentence_selection.py index 0931fae..eff8e44 100644 --- a/sentence_selection.py +++ b/sentence_selection.py @@ -1,7 +1,6 @@ from datetime import datetime, timedelta from sklearn.metrics.pairwise import cosine_similarity import numpy as np -from scipy import sparse import util @@ -48,8 +47,6 @@ def candidate_sentences(articles, date, vectorizer): def sentences_published_on_date(articles, date, tolerance_days, num_first_sentences): - # implementation details are the same as ghalandari et al: - # a sentence is not included in the final list if it also mentions any date at all sentences = [] for article in articles: pub_date = datetime.strptime(article['pub_date'], '%Y-%m-%d') diff --git a/summarization.py b/summarization.py index 8642ae3..141cf74 100644 --- a/summarization.py +++ b/summarization.py @@ -41,7 +41,7 @@ def summarize(sentences, vectorizer, keywords, num_sentences): for i in sorted_indices: remaining_indices.remove(i) sentence = sentences[i] - if not util.contains_any(sentence['text'], keywords): + if not any([kw.lower() in sentence['text'].lower() for kw in keywords]): continue if redundant(i, selected_indices, X): continue diff --git a/util.py b/util.py index 4630ae7..3ba515b 100644 --- a/util.py +++ b/util.py @@ -1,5 +1,4 @@ import os -import re def subdirs(path): @@ -10,15 +9,6 @@ def files(path, extension=None): return [f for f in os.listdir(path) if os.path.isfile(path / f) and (extension is None or f.endswith(extension))] -def contains_any(string, keywords): - for keyword in keywords: - # following ghalandari, don't account for word boundaries - # if re.search(fr'\b{keyword.lower()}\b', string.lower()): - if keyword.lower() in string.lower(): - return True - return False - - def avg(lst): return sum(lst) / len(lst) -- GitLab