Skip to content
Snippets Groups Projects
Commit e682cf0c authored by vvye's avatar vvye
Browse files

Implement personalized pagerank by date uniformity

parent 53a69981
No related branches found
No related tags found
No related merge requests found
import math
from collections import Counter
import igraph
......@@ -37,13 +38,35 @@ def rank_dates_by_wilson(articles, start_date, end_date, num_dates):
g = igraph.Graph.TupleList(edges, directed=True, edge_attrs='weight')
vertex_names = g.vs['name']
# rank vertices by pagerank score
pagerank_scores = g.pagerank(directed=True, weights=g.es['weight'])
ranked_dates = util.rank(vertex_names, scores=pagerank_scores)
# rank the dates with personalized pagerank
# (do this multiple times with different "vertex weights", depending on alpha,
# and return the result that is most uniform)
best_uniformity = math.inf
best_ranked_dates = []
candidate_alphas = [0.01 * x for x in range(1, 100)]
for alpha in candidate_alphas:
print(date_uniformity(ranked_dates[:num_dates]))
# calculate vertex "weights" for personalized pagerank
vertex_weights = []
start = min(vertex_names)
for date in vertex_names:
diff_to_start = util.days_between(start, date)
try:
vertex_weights.append(alpha ** -diff_to_start)
except OverflowError:
vertex_weights.append(math.inf)
return ranked_dates
# rank vertices with personalized pagerank
pagerank_scores = g.personalized_pagerank(directed=True, weights=g.es['weight'], reset=vertex_weights)
ranked_dates = util.rank(vertex_names, scores=pagerank_scores)
# if this result is the most uniform yet, save it
uniformity = date_uniformity(ranked_dates[:num_dates])
if uniformity < best_uniformity:
best_uniformity = uniformity
best_ranked_dates = ranked_dates
return best_ranked_dates
def date_uniformity(dates):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment