From 120282019b2b471f76c6788114922afd0ac701cc Mon Sep 17 00:00:00 2001
From: vvye <ekaiser.hellwege@gmail.com>
Date: Wed, 15 Sep 2021 17:09:52 +0200
Subject: [PATCH] Implement date uniformity plotting

---
 date_selection.py | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/date_selection.py b/date_selection.py
index 4d3f8d5..ada541a 100644
--- a/date_selection.py
+++ b/date_selection.py
@@ -1,13 +1,15 @@
 import math
 from collections import Counter
+from random import random
 
 import igraph
 import numpy as np
+from matplotlib import pyplot as plt
 
 import util
 
 
-def rank_dates_by_mention_count(articles, start_date, end_date):
+def rank_dates_by_mention_count(articles, start_date, end_date, num_dates):
     mention_count = Counter({})
     for article in articles:
         for sentence in article['sentences']:
@@ -66,6 +68,12 @@ def rank_dates_by_wilson(articles, start_date, end_date, num_dates):
             best_uniformity = uniformity
             best_ranked_dates = ranked_dates
 
+    plot_dates(plt, best_ranked_dates[:num_dates])
+    pagerank_scores = g.pagerank(directed=True, weights=g.es['weight'])
+    ranked_dates = util.rank(vertex_names, scores=pagerank_scores)
+    plot_dates(plt, ranked_dates[:num_dates])
+    plt.show()
+
     return best_ranked_dates
 
 
@@ -73,3 +81,13 @@ def date_uniformity(dates):
     dates.sort()
     date_diffs = [util.days_between(dates[i], dates[i + 1]) for i in range(len(dates) - 1)]
     return np.std(date_diffs)
+
+
+def plot_dates(plt, dates):
+    xs = [0.01 * x for x in range(100)]
+    ys = [len([date for date in dates if frac(date, dates) <= x]) / len(dates) for x in xs]
+    plt.plot(xs, ys)
+
+
+def frac(date, dates):
+    return util.days_between(date, min(dates)) / util.days_between(max(dates), min(dates))
-- 
GitLab