Skip to content
Snippets Groups Projects
Commit bb521dda authored by vvye's avatar vvye
Browse files

Merge remote-tracking branch 'origin/master'

# Conflicts:
#	evaluation.py
#	run.py
parents 781c23f3 e985c022
No related branches found
No related tags found
No related merge requests found
......@@ -21,7 +21,7 @@ def summarize(sentences, vectorizer, keywords, by_tokens, num_sentences, num_tok
else:
return len(selected_indices) < round(num_sentences)
while remaining_indices and constraint():
while True:
# if the summary already has sentences, calculate the current summary vector
if selected_indices:
......@@ -44,16 +44,33 @@ def summarize(sentences, vectorizer, keywords, by_tokens, num_sentences, num_tok
similarities[i] = cosine_similarity(candidate_summary_vector, centroid)[0, 0]
sorted_indices = sorted(remaining_indices, key=lambda i: similarities[i], reverse=True)
# go through all sentences in order from "best" (most similar to centroid) to "worst"
for i in sorted_indices:
# don't consider this sentence in the next round
remaining_indices.remove(i)
sentence = sentences[i]
if not any([kw.lower() in sentence['text'].lower() for kw in keywords]):
# if the sentence contains no keywords, skip it
if not any([kw.lower() in sentences[i]['text'].lower() for kw in keywords]):
continue
# if the sentence is near-identical to the current summary, skip it
if redundant(i, selected_indices, X):
continue
# if the sentence would make the summary too long, skip it
if by_tokens:
if sum([len(sentences[k]['text'].split()) for k in (selected_indices + [i])]) > round(num_tokens):
continue
else:
if len(selected_indices) + 1 > round(num_sentences):
continue
# otherwise, select the sentence
selected_indices.append(i)
break
# if there are still sentences left, repeat this step
if not remaining_indices:
break
return [sentences[i]['text'] for i in selected_indices]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment