Skip to content
Snippets Groups Projects
Commit eb2bfe91 authored by chrysanthopoulou's avatar chrysanthopoulou
Browse files

Add cumulative kudo distribution of fanfics

parent 32a5209c
No related branches found
No related tags found
No related merge requests found
...@@ -6,6 +6,36 @@ import pandas as pd ...@@ -6,6 +6,36 @@ import pandas as pd
import statistics import statistics
import re import re
from nltk.probability import FreqDist from nltk.probability import FreqDist
import numpy as np
# code snippets for prettifying plots
#colours
pink = '#d600a7'
light_green = '#55a480'
blue_grey = '#5d9c9c'
purple_grey = '#636273'
CB91_Blue = '#2CBDFE'
CB91_Green = '#47DBCD'
CB91_Pink = '#F3A0F2'
CB91_Purple = '#9D2EC5'
CB91_Violet = '#661D98'
CB91_Amber = '#F5B14C'
color_list = [pink, light_green, purple_grey, blue_grey, CB91_Green, CB91_Pink, CB91_Blue, CB91_Amber,
CB91_Purple, CB91_Violet]
plt.rcParams['axes.prop_cycle'] = plt.cycler(color=color_list)
#some colour palette playing around
cm = sns.cubehelix_palette(start=.5, rot=-.75, as_cmap=True)
cm1 = sns.cubehelix_palette(start=.5, rot=-.5, as_cmap=True)
cm2 = sns.cubehelix_palette(as_cmap=True)
#palette_1 = sns.color_palette("flare")
#palette_2 = sns.color_palette("mako_r", as_cmap=True)
#file header: #file header:
# work_id,title,author,rating,category,fandom,relationship,character,additional tags,language,published,status,status date,words,chapters,comments,kudos,bookmarks,hits,all_kudos,all_bookmarks,body # work_id,title,author,rating,category,fandom,relationship,character,additional tags,language,published,status,status date,words,chapters,comments,kudos,bookmarks,hits,all_kudos,all_bookmarks,body
...@@ -14,7 +44,10 @@ from nltk.probability import FreqDist ...@@ -14,7 +44,10 @@ from nltk.probability import FreqDist
grisha_fanfics = pd.read_csv("grishaverse/data/fanfics/grishaverse_fics.csv") grisha_fanfics = pd.read_csv("grishaverse/data/fanfics/grishaverse_fics.csv")
tog_fanfics = pd.read_csv("throne_of_glass/data/fanfics/throne_of_glass_fics.csv") tog_fanfics = pd.read_csv("throne_of_glass/data/fanfics/throne_of_glass_fics.csv")
""" """
# plot distribution of kudos for Grishaverse Fanfics
grisha_kudos = grisha_fanfics["kudos"].values.tolist() grisha_kudos = grisha_fanfics["kudos"].values.tolist()
grisha_kudos_freq_dist = FreqDist(grisha_kudos) grisha_kudos_freq_dist = FreqDist(grisha_kudos)
...@@ -22,6 +55,18 @@ grisha_kudos_freq_dist = FreqDist(grisha_kudos) ...@@ -22,6 +55,18 @@ grisha_kudos_freq_dist = FreqDist(grisha_kudos)
dist_panda = pd.Series(dict(grisha_kudos_freq_dist)) dist_panda = pd.Series(dict(grisha_kudos_freq_dist))
#print(dist_panda) #print(dist_panda)
# sort, normalise and round the panda series
new_dist = dist_panda.sort_index()
for i in range(0, len(new_dist.index)):
#for index in new_token_len_dist.index:
new_dist.iat[i] = round(new_dist.iat[i]/len(grisha_kudos), 3) #index-1 bc the index starts counting from zero, the word lengths not
#if float(new_token_len_dist.iat[i]) == 0.00:
# new_token_len_dist.drop(index=i) # here it is used as the label, so we want the index, not index -1; bad work-around, I'm sorry
#calculate cumulative distribution
cum_dist = np.cumsum(new_dist.values)
# plot using matplotlib and seaborn # plot using matplotlib and seaborn
...@@ -31,16 +76,58 @@ fig, ax = plt.subplots(figsize=(10,10)) ...@@ -31,16 +76,58 @@ fig, ax = plt.subplots(figsize=(10,10))
# call function for bar (value) labels # call function for bar (value) labels
#addlabels(x=new_sent_len_dist.index, y=new_sent_len_dist.values) #addlabels(x=new_sent_len_dist.index, y=new_sent_len_dist.values)
plt.title("Grishaverse Frequency Distribution of All Kudos") plt.title("Grishaverse Cumulative Frequency Distribution of All Kudos")
ax.set_xlabel("Number of Kudos") ax.set_xlabel("Number of Kudos")
ax.set_ylabel("Percentage of Occurence") ax.set_ylabel("Percentage of Occurence")
sns.lineplot(x=dist_panda.index, y=dist_panda.values, ax=ax, palette="flare") sns.lineplot(x=new_dist.index, y=cum_dist, ax=ax)
#plt.xticks(rotation=30) !!! very useful for words #plt.xticks(rotation=30) !!! very useful for words
plt.savefig(f"grishaverse/freq_distribution/fanfic_kudo_freq_dist.png") # "throne_of_glass/freq_distribution/all_canon_sent_len.png" fig.savefig(f"grishaverse/freq_distribution/fanfic_kudo_freq_dist.png") # "throne_of_glass/freq_distribution/all_canon_sent_len.png"
""" """
# plot distribution of kudos for Throne of Glass Fanfics
tog_kudos = tog_fanfics["kudos"].values.tolist()
tog_kudos_freq_dist = FreqDist(tog_kudos)
# convert to FreqDist object to a pandas series for easier processing
dist_panda = pd.Series(dict(tog_kudos_freq_dist))
#print(dist_panda)
# sort, normalise and round the panda series
new_dist = dist_panda.sort_index()
for i in range(0, len(new_dist.index)):
#for index in new_token_len_dist.index:
new_dist.iat[i] = round(new_dist.iat[i]/len(tog_kudos), 3) #index-1 bc the index starts counting from zero, the word lengths not
#if float(new_token_len_dist.iat[i]) == 0.00:
# new_token_len_dist.drop(index=i) # here it is used as the label, so we want the index, not index -1; bad work-around, I'm sorry
#calculate cumulative distribution
cum_dist = np.cumsum(new_dist.values)
# plot using matplotlib and seaborn
# set figure, ax into variables
fig, ax = plt.subplots(figsize=(10,10))
# call function for bar (value) labels
#addlabels(x=new_sent_len_dist.index, y=new_sent_len_dist.values)
plt.title("Throne of Glass Cumulative Frequency Distribution of Kudos")
ax.set_xlabel("Number of Kudos")
ax.set_ylabel("Percentage of Occurence")
sns.lineplot(x=new_dist.index, y=cum_dist, ax=ax)
#plt.xticks(rotation=30) !!! very useful for words
fig.savefig(f"throne_of_glass/freq_distribution/fanfic_kudo_freq_dist.png") # "throne_of_glass/freq_distribution/all_canon_sent_len.png"
"""
def preprocess_data(df, series): def preprocess_data(df, series):
good_fics = [] good_fics = []
medium_fics = [] medium_fics = []
...@@ -78,3 +165,4 @@ def preprocess_data(df, series): ...@@ -78,3 +165,4 @@ def preprocess_data(df, series):
preprocess_data(grisha_fanfics, "grishaverse") preprocess_data(grisha_fanfics, "grishaverse")
preprocess_data(tog_fanfics, "throne_of_glass") preprocess_data(tog_fanfics, "throne_of_glass")
"""
\ No newline at end of file
I am a filler, namely a chubby pink dragon subsisting on mint drops to uphold the folder structure. Of course I could be replaced with a git ignore file, but where's the fun in that? Have a mint drop!
\ No newline at end of file
grishaverse/freq_distribution/fanfic_kudo_freq_dist.png

27.7 KiB | W: | H:

grishaverse/freq_distribution/fanfic_kudo_freq_dist.png

33 KiB | W: | H:

grishaverse/freq_distribution/fanfic_kudo_freq_dist.png
grishaverse/freq_distribution/fanfic_kudo_freq_dist.png
grishaverse/freq_distribution/fanfic_kudo_freq_dist.png
grishaverse/freq_distribution/fanfic_kudo_freq_dist.png
  • 2-up
  • Swipe
  • Onion skin
hello, I am a sentient cream puff instead of a professional git ignore file. I exist. (I like prime numbers)
\ No newline at end of file
throne_of_glass/freq_distribution/fanfic_kudo_freq_dist.png

33.2 KiB

0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment