Skip to content
Snippets Groups Projects
Commit 32ad8ed1 authored by vvye's avatar vvye
Browse files

Rename sentence summarization to sentence shortening for clarity

parent bb521dda
No related branches found
No related tags found
No related merge requests found
......@@ -72,7 +72,7 @@ if __name__ == '__main__':
required=True)
parser.add_argument('--length_constraint',
type=str,
choices=['sentences', 'tokens'], default='sentences',
choices=['sentences', 'tokens'], default='tokens',
help='what constraint to impose on the length of the generated timeline '
'(number of sentences or number of tokens)')
parser.add_argument('--shorten_sentences',
......
......@@ -8,11 +8,11 @@ with open('data/in/sentence_summarization_vocab.txt', encoding='utf-8') as f:
vocab = [line.strip() for line in f.readlines() if line.strip()]
def summarization(sentence, num_tokens, do_deunk=True):
def shorten(sentence, num_tokens, do_resolve_unks=True):
try:
summarized_sentence = summarized_sentences[sentence.lower()][num_tokens]
if do_deunk:
summarized_sentence = deunk(sentence, summarized_sentence)
if do_resolve_unks:
summarized_sentence = resolve_unks(sentence, summarized_sentence)
return summarized_sentence
except KeyError:
print(sentence)
......@@ -21,7 +21,7 @@ def summarization(sentence, num_tokens, do_deunk=True):
return sentence
def deunk(sentence, summarized_sentence):
def resolve_unks(sentence, summarized_sentence):
sentence = sentence.split()
summarized_sentence = summarized_sentence.split()
......@@ -54,4 +54,4 @@ def deunk(sentence, summarized_sentence):
if __name__ == '__main__':
print(summarization('and this was on - going in egypt before january 25 .', 10))
print(shorten('and this was on - going in egypt before january 25 .', 10))
......@@ -5,7 +5,7 @@ import date_selection
import evaluation
import sentence_selection
import summarization
import sentence_summarization
import sentence_shortening
import util
......@@ -44,8 +44,8 @@ def make_timeline(articles, gold_timeline, keywords, by_tokens, shorten_sentence
new_candidate_sentences = []
for i in range(len(candidate_sentences)):
new_candidate_sentences.append({
'text': sentence_summarization.summarization(candidate_sentences[i]['text'], shortening_length,
resolve_unks),
'text': sentence_shortening.shorten(candidate_sentences[i]['text'], shortening_length,
resolve_unks),
'mentioned_dates': candidate_sentences[i]['mentioned_dates']
})
candidate_sentences = new_candidate_sentences
......@@ -58,7 +58,7 @@ def make_timeline(articles, gold_timeline, keywords, by_tokens, shorten_sentence
# shorten sentences if needed
if shorten_sentences == 'after_summarization':
summary_for_date = [sentence_summarization.summarization(sentence, 8, resolve_unks)
summary_for_date = [sentence_shortening.shorten(sentence, shortening_length, resolve_unks)
for sentence in summary_for_date]
timeline[date] = summary_for_date
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment