Skip to content
Snippets Groups Projects
visualisation.py 1.49 KiB
Newer Older
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import os

fandoms = ["call_me_by_your_name"]
#types_of_feature = ["data_overview","sent_len_dist", "tk_len_dist","punct_tag_freq_dist", "md_freq_dist", "tag_freq_dist", "pronouns_dist"] #"data_overview",
#fandoms = ['call_me_by_your_name', 'cosmere', 'divergent', 'grishaverse', 'maze_runner', 'murderbot', 'percy', 'red_white_royal_blue', 'school_for_good_and_evil', 'simonverse', 'song_of_achilles', 'throne_of_glass',]
types_of_feature = ["data_overview","sent_len_dist", "tk_len_dist","punct_tag_freq_dist"] #, "md_freq_dist", "tag_freq_dist", "pronouns_dist"

for fandom in fandoms:
    print(f"{fandom}")
    for type_of_feature in types_of_feature:
        print(type_of_feature)
        data = pd.read_csv(f"{fandom}/{type_of_feature}.csv", index_col=0, header=2)
        # Filter the data to include only columns with p-value <= 0.05
        significant_data = {col: mic for col, mic, p in zip(data.index, data["MIC"], data["p_value"]) if p <= 0.05}

        # Creating a DataFrame for the heatmap
        df = pd.DataFrame([significant_data], index=["MIC"])

        # Plotting the heatmap
        plt.figure(figsize=(10, 2))
        sns.heatmap(df, annot=True, cmap="viridis", cbar=True)
        plt.title("Heatmap of MIC Values with Statistical Significance (p <= 0.05)")
        plt.xlabel("Book Series")
        plt.ylabel("MIC")
        plt.xticks(rotation=45)
        plt.savefig(f"{fandom}/{type_of_feature}.png")