Skip to content
Snippets Groups Projects
Commit 7c49fcd7 authored by vvye's avatar vvye
Browse files

Refactor list ranking

parent a5d0ee9d
No related branches found
No related tags found
No related merge requests found
......@@ -3,6 +3,8 @@ from datetime import datetime
import igraph
import util
def rank_dates_by_mention_count(articles, start_date, end_date):
mention_count = Counter({})
......@@ -38,9 +40,8 @@ def rank_dates_by_wilson(articles, start_date, end_date):
# igraph.plot(g, layout='kk', vertex_label=g.vs['name'], bbox=(3000, 3000))
# rate vertices by pagerank score
# rank vertices by pagerank score
pagerank_scores = g.pagerank()
sorted_indices = sorted(list(range(len(pagerank_scores))), key=lambda i: pagerank_scores[i], reverse=True)
ranked_dates = [vertex_names[i] for i in sorted_indices]
ranked_dates = util.rank(vertex_names, scores=pagerank_scores)
return ranked_dates
......@@ -3,6 +3,8 @@ from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from scipy import sparse
import util
def candidate_sentences(articles, date, vectorizer):
pub_sentences = sentences_published_on_date(articles, date, tolerance_days=2, num_first_sentences=5)
......@@ -34,13 +36,10 @@ def candidate_sentences(articles, date, vectorizer):
candidate_sentence_pool = ment_sentences + pub_sentences
sent_vectors = vectorizer.transform([s['text'] for s in candidate_sentence_pool]).toarray().tolist()
similarities = cosine_similarity([date_vector], sent_vectors)[0]
sorted_indices = sorted(list(range(len(candidate_sentence_pool))), key=lambda i: similarities[i], reverse=True)
sorted_sentences = [candidate_sentence_pool[i] for i in sorted_indices]
sorted_scores = [similarities[i] for i in sorted_indices]
sorted_sentences = util.rank(candidate_sentence_pool, scores=similarities)
# only consider sentences above the "knee point"
cutoff_index = knee_point(sorted_scores)
cutoff_index = knee_point(sorted(similarities, reverse=True))
candidates = sorted_sentences[:cutoff_index + 1]
if not candidates:
......
......@@ -21,3 +21,15 @@ def contains_any(string, keywords):
def avg(lst):
return sum(lst) / len(lst)
def rank(lst, scores):
"""
Sorts a list by the values in another, corresponding, list.
:param lst: The list to be sorted.
:param scores: A list of values to sort by, where each item corresponds to the item in lst
(being that item's "score").
:return: A copy of lst sorted from highest to lowest score.
"""
sorted_indices = sorted(list(range(len(scores))), key=lambda i: scores[i], reverse=True)
return [lst[i] for i in sorted_indices]
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment