Skip to content
Snippets Groups Projects
Commit df1cc4c8 authored by vvye's avatar vvye
Browse files

Remove keyword function

parent 037dba51
No related branches found
No related tags found
No related merge requests found
from datetime import datetime, timedelta
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from scipy import sparse
import util
......@@ -48,8 +47,6 @@ def candidate_sentences(articles, date, vectorizer):
def sentences_published_on_date(articles, date, tolerance_days, num_first_sentences):
# implementation details are the same as ghalandari et al:
# a sentence is not included in the final list if it also mentions any date at all
sentences = []
for article in articles:
pub_date = datetime.strptime(article['pub_date'], '%Y-%m-%d')
......
......@@ -41,7 +41,7 @@ def summarize(sentences, vectorizer, keywords, num_sentences):
for i in sorted_indices:
remaining_indices.remove(i)
sentence = sentences[i]
if not util.contains_any(sentence['text'], keywords):
if not any([kw.lower() in sentence['text'].lower() for kw in keywords]):
continue
if redundant(i, selected_indices, X):
continue
......
import os
import re
def subdirs(path):
......@@ -10,15 +9,6 @@ def files(path, extension=None):
return [f for f in os.listdir(path) if os.path.isfile(path / f) and (extension is None or f.endswith(extension))]
def contains_any(string, keywords):
for keyword in keywords:
# following ghalandari, don't account for word boundaries
# if re.search(fr'\b{keyword.lower()}\b', string.lower()):
if keyword.lower() in string.lower():
return True
return False
def avg(lst):
return sum(lst) / len(lst)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment