Newer
Older
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import os
fandoms = ["call_me_by_your_name"]
#types_of_feature = ["data_overview","sent_len_dist", "tk_len_dist","punct_tag_freq_dist", "md_freq_dist", "tag_freq_dist", "pronouns_dist"] #"data_overview",
#fandoms = ['call_me_by_your_name', 'cosmere', 'divergent', 'grishaverse', 'maze_runner', 'murderbot', 'percy', 'red_white_royal_blue', 'school_for_good_and_evil', 'simonverse', 'song_of_achilles', 'throne_of_glass',]
types_of_feature = ["data_overview","sent_len_dist", "tk_len_dist","punct_tag_freq_dist"] #, "md_freq_dist", "tag_freq_dist", "pronouns_dist"
for fandom in fandoms:
print(f"{fandom}")
for type_of_feature in types_of_feature:
print(type_of_feature)
data = pd.read_csv(f"{fandom}/{type_of_feature}.csv", index_col=0, header=2)
# Filter the data to include only columns with p-value <= 0.05
significant_data = {col: mic for col, mic, p in zip(data.index, data["MIC"], data["p_value"]) if p <= 0.05}
# Creating a DataFrame for the heatmap
df = pd.DataFrame([significant_data], index=["MIC"])
# Plotting the heatmap
plt.figure(figsize=(10, 2))
sns.heatmap(df, annot=True, cmap="viridis", cbar=True)
plt.title("Heatmap of MIC Values with Statistical Significance (p <= 0.05)")
plt.xlabel("Book Series")
plt.ylabel("MIC")
plt.xticks(rotation=45)
plt.savefig(f"{fandom}/{type_of_feature}.png")