From 120282019b2b471f76c6788114922afd0ac701cc Mon Sep 17 00:00:00 2001 From: vvye <ekaiser.hellwege@gmail.com> Date: Wed, 15 Sep 2021 17:09:52 +0200 Subject: [PATCH] Implement date uniformity plotting --- date_selection.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/date_selection.py b/date_selection.py index 4d3f8d5..ada541a 100644 --- a/date_selection.py +++ b/date_selection.py @@ -1,13 +1,15 @@ import math from collections import Counter +from random import random import igraph import numpy as np +from matplotlib import pyplot as plt import util -def rank_dates_by_mention_count(articles, start_date, end_date): +def rank_dates_by_mention_count(articles, start_date, end_date, num_dates): mention_count = Counter({}) for article in articles: for sentence in article['sentences']: @@ -66,6 +68,12 @@ def rank_dates_by_wilson(articles, start_date, end_date, num_dates): best_uniformity = uniformity best_ranked_dates = ranked_dates + plot_dates(plt, best_ranked_dates[:num_dates]) + pagerank_scores = g.pagerank(directed=True, weights=g.es['weight']) + ranked_dates = util.rank(vertex_names, scores=pagerank_scores) + plot_dates(plt, ranked_dates[:num_dates]) + plt.show() + return best_ranked_dates @@ -73,3 +81,13 @@ def date_uniformity(dates): dates.sort() date_diffs = [util.days_between(dates[i], dates[i + 1]) for i in range(len(dates) - 1)] return np.std(date_diffs) + + +def plot_dates(plt, dates): + xs = [0.01 * x for x in range(100)] + ys = [len([date for date in dates if frac(date, dates) <= x]) / len(dates) for x in xs] + plt.plot(xs, ys) + + +def frac(date, dates): + return util.days_between(date, min(dates)) / util.days_between(max(dates), min(dates)) -- GitLab