Skip to content
Snippets Groups Projects
Commit 904f7a80 authored by chrysanthopoulou's avatar chrysanthopoulou
Browse files

Add some pearson and spearman correlation

parent cf65f872
No related branches found
No related tags found
No related merge requests found
Showing
with 136 additions and 26 deletions
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
import os
#sing_fanfic = pd.read_csv("cosmere/fanfiction_stylo_data/deltas/md_freq_dist.csv", index_col=0)
#sns.scatterplot(data=sing_fanfic, x="grishaverse_good", y=sing_fanfic.index)
#plt.savefig("cosmere/fanfiction_stylo_data/deltas/grishaverse_good.png")
types_of_feature = ["data_overview", "md_freq_dist", "tag_freq_dist","punct_tag_freq_dist", "sent_len_dist", "tk_len_dist",]
fandoms = ["call_me_by_your_name"]
# deltas
for fandom in fandoms:
print(f"{fandom}")
df_spearman = pd.DataFrame(columns=types_of_feature)
df_pearsons = pd.DataFrame(columns=types_of_feature)
for type_of_feature in types_of_feature:
sing_fanfic = pd.read_csv(f"data_overview/single_fic_deltas/{fandom}/{type_of_feature}.csv", index_col=0)
#feature_fanfic = pd.read_csv(f"{fandom}/fanfiction_stylo_data/stylo_data/{type_of_feature}.csv", index_col=0)
pearsons_list = []
spearman_list = []
for column in sing_fanfic.columns:
kudos = sing_fanfic.index
delta = sing_fanfic[column]
corr, _ = stats.pearsonr(kudos, delta)
pearsons_list.append(corr)
#print(f"\n{type_of_feature}")
#print('Pearsons correlation: %.3f' % corr)
corr, _ = stats.spearmanr(kudos, delta)
spearman_list.append(corr)
#print(f"\n{type_of_feature}")
#print('Spearman correlation: %.3f' % corr)
df_pearsons[type_of_feature] = pearsons_list
df_spearman[type_of_feature] = spearman_list
df_pearsons.index = sing_fanfic.columns
df_spearman.index = sing_fanfic.columns
df_pearsons = df_pearsons.T
df_spearman = df_spearman.T
df_pearsons.to_csv(f"correlation/deltas/{fandom}/pearsons.csv")
df_spearman.to_csv(f"correlation/deltas/{fandom}/spearman.csv")
# stylo features
for type_of_feature in types_of_feature:
pearsons_list = []
spearman_list = []
for fandom in fandoms:
print(f"{fandom}")
#sing_fanfic = pd.read_csv(f"data_overview/single_fic_deltas/{fandom}/{type_of_feature}.csv", index_col=0)
feature_fanfic = pd.read_csv(f"{fandom}/fanfiction_stylo_data/stylo_data/{type_of_feature}.csv", index_col=0)
feature_fanfic.fillna(0, inplace=True)
pearsons_dict= {}
spearman_dict = {}
for column in feature_fanfic.columns:
kudos = feature_fanfic.index
delta = feature_fanfic[column]
corr, _ = stats.pearsonr(kudos, delta)
pearsons_dict[column] = corr
#print(f"\n{type_of_feature}")
#print('Pearsons correlation: %.3f' % corr)
corr, _ = stats.spearmanr(kudos, delta)
spearman_dict[column] = corr
#print(f"\n{type_of_feature}")
#print('Spearman correlation: %.3f' % corr)
pearsons_list.append(pearsons_dict)
spearman_list.append(spearman_dict)
df_pearsons = pd.DataFrame(pearsons_list)
df_spearman = pd.DataFrame(spearman_list)
df_pearsons.index = fandoms
df_spearman.index = fandoms
data_path = f"correlation/stylo_features/{type_of_feature}"
if os.path.exists(data_path) == False: os.makedirs(data_path)
df_pearsons.to_csv(f"{data_path}/pearsons.csv")
df_spearman.to_csv(f"{data_path}/spearman.csv")
\ No newline at end of file
correlation.txt 0 → 100644 LFS
Source diff could not be displayed: it is stored in LFS. Options to address this: view the blob.
Source diff could not be displayed: it is stored in LFS. Options to address this: view the blob.
Source diff could not be displayed: it is stored in LFS. Options to address this: view the blob.
Source diff could not be displayed: it is stored in LFS. Options to address this: view the blob.
Source diff could not be displayed: it is stored in LFS. Options to address this: view the blob.
Source diff could not be displayed: it is stored in LFS. Options to address this: view the blob.
Source diff could not be displayed: it is stored in LFS. Options to address this: view the blob.
Source diff could not be displayed: it is stored in LFS. Options to address this: view the blob.
Source diff could not be displayed: it is stored in LFS. Options to address this: view the blob.
Source diff could not be displayed: it is stored in LFS. Options to address this: view the blob.
Source diff could not be displayed: it is stored in LFS. Options to address this: view the blob.
Source diff could not be displayed: it is stored in LFS. Options to address this: view the blob.
Source diff could not be displayed: it is stored in LFS. Options to address this: view the blob.
Source diff could not be displayed: it is stored in LFS. Options to address this: view the blob.
Source diff could not be displayed: it is stored in LFS. Options to address this: view the blob.
Source diff could not be displayed: it is stored in LFS. Options to address this: view the blob.
Source diff could not be displayed: it is stored in LFS. Options to address this: view the blob.
......@@ -135,7 +135,7 @@ def run_functions_single_fanfic(path_to_general_df:str, path_to_singular_df:str,
if __name__ == "__main__":
types_of_feature = ["sent_len_dist", "tk_len_dist","punct_tag_freq_dist", "md_freq_dist", "tag_freq_dist", ] #"data_overview",
types_of_feature = ["data_overview","sent_len_dist", "tk_len_dist","punct_tag_freq_dist", "md_freq_dist", "tag_freq_dist", ] #"data_overview",
fandoms = ["call_me_by_your_name"] #"cosmere"
for type_of_feature in types_of_feature:
print(type_of_feature)
......
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
#sing_fanfic = pd.read_csv("cosmere/fanfiction_stylo_data/deltas/md_freq_dist.csv", index_col=0)
#sns.scatterplot(data=sing_fanfic, x="grishaverse_good", y=sing_fanfic.index)
#plt.savefig("cosmere/fanfiction_stylo_data/deltas/grishaverse_good.png")
types_of_feature = ["data_overview_deltas", "md_freq_dist", "tag_freq_dist","punct_tag_freq_dist", "sent_len_dist", "tk_len_dist",]
for type_of_feature in types_of_feature:
sing_fanfic = pd.read_csv(f"cosmere/fanfiction_stylo_data/deltas/{type_of_feature}.csv", index_col=0)
for column in sing_fanfic.columns:
kudos = sing_fanfic.index
delta = sing_fanfic[column]
corr, _ = stats.pearsonr(kudos, delta)
print(f"\n{type_of_feature}")
print('Pearsons correlation: %.3f' % corr)
corr, _ = stats.spearmanr(kudos, delta)
print(f"\n{type_of_feature}")
print('Spearman correlation: %.3f' % corr)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment