Newer
Older
from datetime import datetime
from tilse.data import timelines
from tilse.evaluation import rouge
evaluator = rouge.TimelineRougeEvaluator(measures=['rouge_1', 'rouge_2'])
def tilse_format(timeline):
return timelines.Timeline({datetime.strptime(date, '%Y-%m-%d'): sentences for date, sentences in timeline.items()})
def evaluate(timeline, gold_timelines):
timeline = tilse_format(timeline)
ground_truth = timelines.GroundTruth([tilse_format(gold_timeline) for gold_timeline in gold_timelines])
eval_result = evaluator.evaluate_align_date_content_costs_many_to_one(timeline, ground_truth)
ar1_f = eval_result['rouge_1']['f_score']
ar2_f = eval_result['rouge_2']['f_score']
return ar1_f, ar2_f
def date_f1(timeline, gold_timeline):
dates = timeline.keys()
gold_dates = gold_timeline.keys()
shared = [d for d in dates if d in gold_dates]
precision = len(shared) / len(dates)
recall = len(shared) / len(gold_dates)
if precision + recall == 0:
return 0
return (2 * precision * recall) / (precision + recall)