Skip to content
Snippets Groups Projects
Commit d4462195 authored by chrysanthopoulou's avatar chrysanthopoulou
Browse files

Add Pearson & Spearmann heatmaps

parent cb298f5a
No related branches found
No related tags found
No related merge requests found
Showing
with 206 additions and 0 deletions
correlation/deltas/simonverse/spearman_heatmap.png

131 B

correlation/deltas/song_of_achilles/pearsons_heatmap.png

131 B

correlation/deltas/song_of_achilles/spearman_heatmap.png

131 B

correlation/deltas/throne_of_glass/pearsons_heatmap.png

131 B

correlation/deltas/throne_of_glass/spearman_heatmap.png

131 B

correlation/stylo_features/data_overview/pearsons_heatmap.png

130 B

correlation/stylo_features/data_overview/spearman_heatmap.png

131 B

correlation/stylo_features/md_freq_dist/pearsons_heatmap.png

131 B

correlation/stylo_features/md_freq_dist/spearman_heatmap.png

131 B

correlation/stylo_features/pronouns_dist/pearsons_heatmap.png

131 B

correlation/stylo_features/pronouns_dist/spearman_heatmap.png

131 B

correlation/stylo_features/punct_tag_freq_dist/pearsons_heatmap.png

131 B

correlation/stylo_features/punct_tag_freq_dist/spearman_heatmap.png

131 B

correlation/stylo_features/sent_len_dist/pearsons_heatmap.png

131 B

correlation/stylo_features/sent_len_dist/spearman_heatmap.png

131 B

correlation/stylo_features/tag_freq_dist/pearsons_heatmap.png

131 B

correlation/stylo_features/tag_freq_dist/spearman_heatmap.png

131 B

correlation/stylo_features/tk_len_dist/pearsons_heatmap.png

131 B

correlation/stylo_features/tk_len_dist/spearman_heatmap.png

131 B

import pandas as pd
import scipy.stats as stats
import seaborn as sns
import matplotlib.pyplot as plt
import os
types_of_feature = ["data_overview","sent_len_dist", "tk_len_dist","punct_tag_freq_dist", "md_freq_dist", "tag_freq_dist", "pronouns_dist"] #"data_overview",
fandoms = ['call_me_by_your_name', 'cosmere', 'divergent', 'grishaverse', 'maze_runner', 'murderbot', 'percy', 'red_white_royal_blue', 'school_for_good_and_evil', 'simonverse', 'song_of_achilles', 'throne_of_glass',]
# Function to test significance and return correlation and p-value
def test_significance(corr_func, x, y):
corr, p_value = corr_func(x, y)
return corr, p_value
# Function to create a heatmap
def create_heatmap(df, title, save_path):
# Filter the dataframe for values >= 0.2 or <= -0.2
#df = df.where((df >= 0.2) | (df <= -0.2))
plt.figure(figsize=(30, 26))
sns.heatmap(df, annot=True, cmap='coolwarm', vmin=-1, vmax=1, annot_kws={"size": 8})
plt.title(title)
plt.tight_layout()
plt.savefig(save_path)
plt.close()
# Loop through each fandom and process the data
for fandom in fandoms:
print(f"{fandom}")
df_spearman = pd.DataFrame(columns=types_of_feature)
df_pearsons = pd.DataFrame(columns=types_of_feature)
df_pvalues_pearson = pd.DataFrame(columns=types_of_feature)
df_pvalues_spearman = pd.DataFrame(columns=types_of_feature)
for type_of_feature in types_of_feature:
sing_fanfic = pd.read_csv(f"data_overview/single_fic_deltas/{fandom}/{type_of_feature}.csv", index_col=0)
pearsons_list = []
spearman_list = []
pvalues_pearson = []
pvalues_spearman = []
for column in sing_fanfic.columns:
kudos = sing_fanfic.index
delta = sing_fanfic[column]
# Pearson's correlation and p-value
corr, p_value = test_significance(stats.pearsonr, kudos, delta)
pearsons_list.append(corr)
pvalues_pearson.append(p_value)
# Spearman's correlation and p-value
corr, p_value = test_significance(stats.spearmanr, kudos, delta)
spearman_list.append(corr)
pvalues_spearman.append(p_value)
df_pearsons[type_of_feature] = pearsons_list
df_spearman[type_of_feature] = spearman_list
df_pvalues_pearson[type_of_feature] = pvalues_pearson
df_pvalues_spearman[type_of_feature] = pvalues_spearman
df_pearsons.index = sing_fanfic.columns
df_spearman.index = sing_fanfic.columns
df_pvalues_pearson.index = sing_fanfic.columns
df_pvalues_spearman.index = sing_fanfic.columns
df_pearsons = df_pearsons.T
df_spearman = df_spearman.T
df_pvalues_pearson = df_pvalues_pearson.T
df_pvalues_spearman = df_pvalues_spearman.T
# Highlight significant values
significant_pearson = df_pearsons.where(df_pvalues_pearson < 0.01)
significant_spearman = df_spearman.where(df_pvalues_spearman < 0.01)
data_path = f"correlation/deltas/{fandom}"
if not os.path.exists(data_path):
os.makedirs(data_path)
# Save correlation data and visualizations
df_pearsons.to_csv(f"{data_path}/pearsons.csv")
df_spearman.to_csv(f"{data_path}/spearman.csv")
create_heatmap(significant_pearson, f"Pearson's Correlation (Significant) - {fandom}", f"{data_path}/pearsons_heatmap.png")
create_heatmap(significant_spearman, f"Spearman's Correlation (Significant) - {fandom}", f"{data_path}/spearman_heatmap.png")
# Note: The code assumes the presence of the required data files and directories.
# Actual file paths and data should be used instead of placeholders.
# stylo features
for type_of_feature in types_of_feature:
pearsons_list = []
spearman_list = []
pvalues_pearson = []
pvalues_spearman = []
for fandom in fandoms:
print(f"{fandom}")
#sing_fanfic = pd.read_csv(f"data_overview/single_fic_deltas/{fandom}/{type_of_feature}.csv", index_col=0)
feature_fanfic = pd.read_csv(f"{fandom}/fanfiction_stylo_data/stylo_data/{type_of_feature}.csv", index_col=0)
feature_fanfic.fillna(0, inplace=True)
pearsons_dict= {}
spearman_dict = {}
pvalues_pearsons_dict = {}
pvalues_spearman_dict = {}
for column in feature_fanfic.columns:
kudos = feature_fanfic.index
delta = feature_fanfic[column]
corr, p_value = test_significance(stats.pearsonr, kudos, delta)
pearsons_dict[column] = corr
pvalues_pearsons_dict[column] = p_value
#print(f"\n{type_of_feature}")
#print('Pearsons correlation: %.3f' % corr)
corr, p_value = test_significance(stats.spearmanr, kudos, delta)
spearman_dict[column] = corr
pvalues_spearman_dict[column] = p_value
#print(f"\n{type_of_feature}")
#print('Spearman correlation: %.3f' % corr)
pearsons_list.append(pearsons_dict)
spearman_list.append(spearman_dict)
pvalues_pearson.append(pvalues_pearsons_dict)
pvalues_spearman.append(pvalues_spearman_dict)
df_pearsons = pd.DataFrame(pearsons_list)
df_spearman = pd.DataFrame(spearman_list)
df_pvalues_pearson = pd.DataFrame(pvalues_pearson)
df_pvalues_spearman = pd.DataFrame(pvalues_spearman)
df_pearsons.index = fandoms
df_spearman.index = fandoms
df_pvalues_pearson.index = fandoms
df_pvalues_spearman.index = fandoms
# Highlight significant values
significant_pearson = df_pearsons.where(df_pvalues_pearson < 0.01)
significant_spearman = df_spearman.where(df_pvalues_spearman < 0.01)
data_path = f"correlation/stylo_features/{type_of_feature}"
if os.path.exists(data_path) == False: os.makedirs(data_path)
df_pearsons.to_csv(f"{data_path}/pearsons.csv")
df_spearman.to_csv(f"{data_path}/spearman.csv")
create_heatmap(significant_pearson, f"Pearson's Correlation (Significant) - {fandom}", f"{data_path}/pearsons_heatmap.png")
create_heatmap(significant_spearman, f"Spearman's Correlation (Significant) - {fandom}", f"{data_path}/spearman_heatmap.png")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment