diff --git a/evaluation.py b/evaluation.py index 30bdb5640b11f8f4f7f02f0e2323400166b7dd93..2ea78b41949045a3bb6b2c1b783a56a07cb35878 100644 --- a/evaluation.py +++ b/evaluation.py @@ -2,6 +2,7 @@ import csv from datetime import datetime from tilse.data import timelines from tilse.evaluation import rouge +import numpy as np import util @@ -68,6 +69,12 @@ class ResultLogger: avg_date_f = util.avg([row['date_f1'] for row in self.results]) return avg_ar1_f, avg_ar2_f, avg_date_f + def stdev_scores(self): + std_ar1_f = np.std([row['ar1_f'] for row in self.results]) + std_ar2_f = np.std([row['ar2_f'] for row in self.results]) + std_date_f = np.std([row['date_f1'] for row in self.results]) + return std_ar1_f, std_ar2_f, std_date_f + def average_stats(self): avg_gold_sentences = util.avg([row['avg_gold_sentences'] for row in self.results]) avg_system_sentences = util.avg([row['avg_system_sentences'] for row in self.results]) @@ -81,6 +88,7 @@ class ResultLogger: def save_to_file(self, filename): avg_ar1_f, avg_ar2_f, avg_date_f1 = self.average_scores() + std_ar1_f, std_ar2_f, std_date_f1 = self.stdev_scores() avg_gold_sentences, avg_system_sentences, avg_gold_tokens, avg_system_tokens = self.average_stats() with open(filename, 'w', encoding='utf-8') as f: w = csv.DictWriter(f, ['topic', 'gold_timeline', 'avg_gold_sentences', 'avg_system_sentences', @@ -98,3 +106,14 @@ class ResultLogger: 'ar2_f': avg_ar2_f, 'date_f1': avg_date_f1 }) + w.writerow({ + 'topic': 'stdev', + 'gold_timeline': '', + 'avg_gold_sentences': '', + 'avg_system_sentences': '', + 'avg_gold_tokens': '', + 'avg_system_tokens': '', + 'ar1_f': std_ar1_f, + 'ar2_f': std_ar2_f, + 'date_f1': std_date_f1 + }) diff --git a/run.py b/run.py index a6a50a2d70031c44875275888247e9309888f196..8c5b2901822e47327599e0783810389da3884a72 100644 --- a/run.py +++ b/run.py @@ -60,8 +60,7 @@ if __name__ == '__main__': required=True) parser.add_argument('--length_constraint', type=str, choices=['sentences', 'tokens'], default='sentences', help='what constraint to impose on the length of the generated timeline ' - '(number of sentences or number of tokens)', - required=True) + '(number of sentences or number of tokens)') parser.add_argument('--print_timelines', action='store_true', help='whether to print the timelines to the console after generating them')