From df1cc4c84c37e5ff69f0d179fb2affb304eae515 Mon Sep 17 00:00:00 2001
From: vvye <ekaiser.hellwege@gmail.com>
Date: Sun, 12 Sep 2021 14:29:48 +0200
Subject: [PATCH] Remove keyword function

---
 sentence_selection.py |  3 ---
 summarization.py      |  2 +-
 util.py               | 10 ----------
 3 files changed, 1 insertion(+), 14 deletions(-)

diff --git a/sentence_selection.py b/sentence_selection.py
index 0931fae..eff8e44 100644
--- a/sentence_selection.py
+++ b/sentence_selection.py
@@ -1,7 +1,6 @@
 from datetime import datetime, timedelta
 from sklearn.metrics.pairwise import cosine_similarity
 import numpy as np
-from scipy import sparse
 
 import util
 
@@ -48,8 +47,6 @@ def candidate_sentences(articles, date, vectorizer):
 
 
 def sentences_published_on_date(articles, date, tolerance_days, num_first_sentences):
-    # implementation details are the same as ghalandari et al:
-    # a sentence is not included in the final list if it also mentions any date at all
     sentences = []
     for article in articles:
         pub_date = datetime.strptime(article['pub_date'], '%Y-%m-%d')
diff --git a/summarization.py b/summarization.py
index 8642ae3..141cf74 100644
--- a/summarization.py
+++ b/summarization.py
@@ -41,7 +41,7 @@ def summarize(sentences, vectorizer, keywords, num_sentences):
         for i in sorted_indices:
             remaining_indices.remove(i)
             sentence = sentences[i]
-            if not util.contains_any(sentence['text'], keywords):
+            if not any([kw.lower() in sentence['text'].lower() for kw in keywords]):
                 continue
             if redundant(i, selected_indices, X):
                 continue
diff --git a/util.py b/util.py
index 4630ae7..3ba515b 100644
--- a/util.py
+++ b/util.py
@@ -1,5 +1,4 @@
 import os
-import re
 
 
 def subdirs(path):
@@ -10,15 +9,6 @@ def files(path, extension=None):
     return [f for f in os.listdir(path) if os.path.isfile(path / f) and (extension is None or f.endswith(extension))]
 
 
-def contains_any(string, keywords):
-    for keyword in keywords:
-        # following ghalandari, don't account for word boundaries
-        # if re.search(fr'\b{keyword.lower()}\b', string.lower()):
-        if keyword.lower() in string.lower():
-            return True
-    return False
-
-
 def avg(lst):
     return sum(lst) / len(lst)
 
-- 
GitLab