From d4462195b05ba2b4a09e9f1bc4988db3c8e85282 Mon Sep 17 00:00:00 2001 From: chrysanthopoulou <vm255@stud.uni-heidelberg.de> Date: Wed, 28 Feb 2024 13:53:09 +0100 Subject: [PATCH] Add Pearson & Spearmann heatmaps --- correlation.py | 1 + .../call_me_by_your_name/pearsons_heatmap.png | 3 + .../call_me_by_your_name/spearman_heatmap.png | 3 + .../deltas/cosmere/pearsons_heatmap.png | 3 + .../deltas/cosmere/spearman_heatmap.png | 3 + .../deltas/divergent/pearsons_heatmap.png | 3 + .../deltas/divergent/spearman_heatmap.png | 3 + .../deltas/grishaverse/pearsons_heatmap.png | 3 + .../deltas/grishaverse/spearman_heatmap.png | 3 + .../deltas/maze_runner/pearsons_heatmap.png | 3 + .../deltas/maze_runner/spearman_heatmap.png | 3 + .../deltas/murderbot/pearsons_heatmap.png | 3 + .../deltas/murderbot/spearman_heatmap.png | 3 + correlation/deltas/percy/pearsons_heatmap.png | 3 + correlation/deltas/percy/spearman_heatmap.png | 3 + .../red_white_royal_blue/pearsons_heatmap.png | 3 + .../red_white_royal_blue/spearman_heatmap.png | 3 + .../pearsons_heatmap.png | 3 + .../spearman_heatmap.png | 3 + .../deltas/simonverse/pearsons_heatmap.png | 3 + .../deltas/simonverse/spearman_heatmap.png | 3 + .../song_of_achilles/pearsons_heatmap.png | 3 + .../song_of_achilles/spearman_heatmap.png | 3 + .../throne_of_glass/pearsons_heatmap.png | 3 + .../throne_of_glass/spearman_heatmap.png | 3 + .../data_overview/pearsons_heatmap.png | 3 + .../data_overview/spearman_heatmap.png | 3 + .../md_freq_dist/pearsons_heatmap.png | 3 + .../md_freq_dist/spearman_heatmap.png | 3 + .../pronouns_dist/pearsons_heatmap.png | 3 + .../pronouns_dist/spearman_heatmap.png | 3 + .../punct_tag_freq_dist/pearsons_heatmap.png | 3 + .../punct_tag_freq_dist/spearman_heatmap.png | 3 + .../sent_len_dist/pearsons_heatmap.png | 3 + .../sent_len_dist/spearman_heatmap.png | 3 + .../tag_freq_dist/pearsons_heatmap.png | 3 + .../tag_freq_dist/spearman_heatmap.png | 3 + .../tk_len_dist/pearsons_heatmap.png | 3 + .../tk_len_dist/spearman_heatmap.png | 3 + correlation_adapted.py | 149 ++++++++++++++++++ .../call_me_by_your_name/data_overview.csv | 3 + .../punct_tag_freq_dist.csv | 3 + .../call_me_by_your_name/sent_len_dist.csv | 3 + .../call_me_by_your_name/tk_len_dist.csv | 3 + correlation_with_minepy/example_4.py | 1 + correlation_with_minepy/minepy_6.txt | 3 - correlation_with_minepy/visualisation.py | 30 ++++ submit_correlation.sh | 2 +- 48 files changed, 308 insertions(+), 4 deletions(-) create mode 100644 correlation/deltas/call_me_by_your_name/pearsons_heatmap.png create mode 100644 correlation/deltas/call_me_by_your_name/spearman_heatmap.png create mode 100644 correlation/deltas/cosmere/pearsons_heatmap.png create mode 100644 correlation/deltas/cosmere/spearman_heatmap.png create mode 100644 correlation/deltas/divergent/pearsons_heatmap.png create mode 100644 correlation/deltas/divergent/spearman_heatmap.png create mode 100644 correlation/deltas/grishaverse/pearsons_heatmap.png create mode 100644 correlation/deltas/grishaverse/spearman_heatmap.png create mode 100644 correlation/deltas/maze_runner/pearsons_heatmap.png create mode 100644 correlation/deltas/maze_runner/spearman_heatmap.png create mode 100644 correlation/deltas/murderbot/pearsons_heatmap.png create mode 100644 correlation/deltas/murderbot/spearman_heatmap.png create mode 100644 correlation/deltas/percy/pearsons_heatmap.png create mode 100644 correlation/deltas/percy/spearman_heatmap.png create mode 100644 correlation/deltas/red_white_royal_blue/pearsons_heatmap.png create mode 100644 correlation/deltas/red_white_royal_blue/spearman_heatmap.png create mode 100644 correlation/deltas/school_for_good_and_evil/pearsons_heatmap.png create mode 100644 correlation/deltas/school_for_good_and_evil/spearman_heatmap.png create mode 100644 correlation/deltas/simonverse/pearsons_heatmap.png create mode 100644 correlation/deltas/simonverse/spearman_heatmap.png create mode 100644 correlation/deltas/song_of_achilles/pearsons_heatmap.png create mode 100644 correlation/deltas/song_of_achilles/spearman_heatmap.png create mode 100644 correlation/deltas/throne_of_glass/pearsons_heatmap.png create mode 100644 correlation/deltas/throne_of_glass/spearman_heatmap.png create mode 100644 correlation/stylo_features/data_overview/pearsons_heatmap.png create mode 100644 correlation/stylo_features/data_overview/spearman_heatmap.png create mode 100644 correlation/stylo_features/md_freq_dist/pearsons_heatmap.png create mode 100644 correlation/stylo_features/md_freq_dist/spearman_heatmap.png create mode 100644 correlation/stylo_features/pronouns_dist/pearsons_heatmap.png create mode 100644 correlation/stylo_features/pronouns_dist/spearman_heatmap.png create mode 100644 correlation/stylo_features/punct_tag_freq_dist/pearsons_heatmap.png create mode 100644 correlation/stylo_features/punct_tag_freq_dist/spearman_heatmap.png create mode 100644 correlation/stylo_features/sent_len_dist/pearsons_heatmap.png create mode 100644 correlation/stylo_features/sent_len_dist/spearman_heatmap.png create mode 100644 correlation/stylo_features/tag_freq_dist/pearsons_heatmap.png create mode 100644 correlation/stylo_features/tag_freq_dist/spearman_heatmap.png create mode 100644 correlation/stylo_features/tk_len_dist/pearsons_heatmap.png create mode 100644 correlation/stylo_features/tk_len_dist/spearman_heatmap.png create mode 100644 correlation_adapted.py create mode 100644 correlation_with_minepy/call_me_by_your_name/data_overview.csv create mode 100644 correlation_with_minepy/call_me_by_your_name/punct_tag_freq_dist.csv create mode 100644 correlation_with_minepy/call_me_by_your_name/sent_len_dist.csv create mode 100644 correlation_with_minepy/call_me_by_your_name/tk_len_dist.csv create mode 100644 correlation_with_minepy/visualisation.py diff --git a/correlation.py b/correlation.py index d9e3906..29d4fd8 100644 --- a/correlation.py +++ b/correlation.py @@ -1,5 +1,6 @@ import seaborn as sns import pandas as pd +import scipy.stats as stats import matplotlib.pyplot as plt from scipy import stats import os diff --git a/correlation/deltas/call_me_by_your_name/pearsons_heatmap.png b/correlation/deltas/call_me_by_your_name/pearsons_heatmap.png new file mode 100644 index 0000000..3bd7d33 --- /dev/null +++ b/correlation/deltas/call_me_by_your_name/pearsons_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4ab1dddf8fc8ee97f0f6562f51a2f959bc57fe89ef59c7062c2f0c8ca9ba75a +size 247203 diff --git a/correlation/deltas/call_me_by_your_name/spearman_heatmap.png b/correlation/deltas/call_me_by_your_name/spearman_heatmap.png new file mode 100644 index 0000000..1eb9f70 --- /dev/null +++ b/correlation/deltas/call_me_by_your_name/spearman_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0e1e039c57ba7b3746f68e357f891a9c05af33db7e08b7c5cb1fea69e58500d +size 259966 diff --git a/correlation/deltas/cosmere/pearsons_heatmap.png b/correlation/deltas/cosmere/pearsons_heatmap.png new file mode 100644 index 0000000..26c5c7a --- /dev/null +++ b/correlation/deltas/cosmere/pearsons_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1dcafc4eae6c5280a56b94326a201ef6d9641153e1bd819bbdaafe3dd4673b0d +size 230540 diff --git a/correlation/deltas/cosmere/spearman_heatmap.png b/correlation/deltas/cosmere/spearman_heatmap.png new file mode 100644 index 0000000..c502415 --- /dev/null +++ b/correlation/deltas/cosmere/spearman_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ccb42a25fda6ee675ac175edef00c08735e5dad80b33aab1f8f48eb8840c957 +size 240427 diff --git a/correlation/deltas/divergent/pearsons_heatmap.png b/correlation/deltas/divergent/pearsons_heatmap.png new file mode 100644 index 0000000..2c112ef --- /dev/null +++ b/correlation/deltas/divergent/pearsons_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8850b536891da4ed66bd1523be42b2571117f900d2ce44f74c86d80f41151bf2 +size 236215 diff --git a/correlation/deltas/divergent/spearman_heatmap.png b/correlation/deltas/divergent/spearman_heatmap.png new file mode 100644 index 0000000..179538b --- /dev/null +++ b/correlation/deltas/divergent/spearman_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7f36ae2addde5872bd2c947f23fb1904a88ababdd95d59e8dc11ce4784661d8 +size 246794 diff --git a/correlation/deltas/grishaverse/pearsons_heatmap.png b/correlation/deltas/grishaverse/pearsons_heatmap.png new file mode 100644 index 0000000..bfd2a6a --- /dev/null +++ b/correlation/deltas/grishaverse/pearsons_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1630c2f9ca234066c0e53b59ae8fa0e0b5ace509e907c813e5fb466a817afc8d +size 248425 diff --git a/correlation/deltas/grishaverse/spearman_heatmap.png b/correlation/deltas/grishaverse/spearman_heatmap.png new file mode 100644 index 0000000..d97b0f3 --- /dev/null +++ b/correlation/deltas/grishaverse/spearman_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6662eda04a07dcace59321c9d5d66e09b4f0196aca4f65f879dd095e4bed627c +size 264065 diff --git a/correlation/deltas/maze_runner/pearsons_heatmap.png b/correlation/deltas/maze_runner/pearsons_heatmap.png new file mode 100644 index 0000000..4e22f2f --- /dev/null +++ b/correlation/deltas/maze_runner/pearsons_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6c8faa952db2eb6acaa17bd2f28f19b501e72087d0d3e0970d0c10b24fd4730 +size 247222 diff --git a/correlation/deltas/maze_runner/spearman_heatmap.png b/correlation/deltas/maze_runner/spearman_heatmap.png new file mode 100644 index 0000000..f489ec8 --- /dev/null +++ b/correlation/deltas/maze_runner/spearman_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce49ee88dfacbfe9a88f269a4f22736dce8ea495cb798e49bce6403a4f9513a9 +size 258861 diff --git a/correlation/deltas/murderbot/pearsons_heatmap.png b/correlation/deltas/murderbot/pearsons_heatmap.png new file mode 100644 index 0000000..d0e7952 --- /dev/null +++ b/correlation/deltas/murderbot/pearsons_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5096ba507c6c2ded111af3feb8c232a1e25f0dc4b96a623b59c6e2521fc6bd04 +size 223430 diff --git a/correlation/deltas/murderbot/spearman_heatmap.png b/correlation/deltas/murderbot/spearman_heatmap.png new file mode 100644 index 0000000..a469aa9 --- /dev/null +++ b/correlation/deltas/murderbot/spearman_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7c66685a6818b08557c9dff722d80de7e881384239ac47709e0fdfc5b546f67 +size 239405 diff --git a/correlation/deltas/percy/pearsons_heatmap.png b/correlation/deltas/percy/pearsons_heatmap.png new file mode 100644 index 0000000..a9cb436 --- /dev/null +++ b/correlation/deltas/percy/pearsons_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d101c7aaf30a18a6f58b2e842ad2889a1a993ec9fa12048f37a5e2ebc765736f +size 247136 diff --git a/correlation/deltas/percy/spearman_heatmap.png b/correlation/deltas/percy/spearman_heatmap.png new file mode 100644 index 0000000..dfa2bb7 --- /dev/null +++ b/correlation/deltas/percy/spearman_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:387b81b2ed879d6304a04b047747b3936f1ca72909bef00b4c91ba7cbaf862ae +size 253978 diff --git a/correlation/deltas/red_white_royal_blue/pearsons_heatmap.png b/correlation/deltas/red_white_royal_blue/pearsons_heatmap.png new file mode 100644 index 0000000..929288e --- /dev/null +++ b/correlation/deltas/red_white_royal_blue/pearsons_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d544084fbcce32c45d15d2d1c4e5514b57ef433dc27000f86323f385e827e9a +size 244692 diff --git a/correlation/deltas/red_white_royal_blue/spearman_heatmap.png b/correlation/deltas/red_white_royal_blue/spearman_heatmap.png new file mode 100644 index 0000000..31354f8 --- /dev/null +++ b/correlation/deltas/red_white_royal_blue/spearman_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fd0730a69dfd1f26845cea01151ef70ec72dd1e5f9d140caf8770e0b9513928 +size 266702 diff --git a/correlation/deltas/school_for_good_and_evil/pearsons_heatmap.png b/correlation/deltas/school_for_good_and_evil/pearsons_heatmap.png new file mode 100644 index 0000000..25db406 --- /dev/null +++ b/correlation/deltas/school_for_good_and_evil/pearsons_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2aa95239cf1f97255041fa04be8611aeda7e5d8fb1f738315db70c744047133e +size 231274 diff --git a/correlation/deltas/school_for_good_and_evil/spearman_heatmap.png b/correlation/deltas/school_for_good_and_evil/spearman_heatmap.png new file mode 100644 index 0000000..e45649b --- /dev/null +++ b/correlation/deltas/school_for_good_and_evil/spearman_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:800a0779c817d6c9d3c7e79f08e2fd5301b00cfb93c069fce4d439507115fc82 +size 241060 diff --git a/correlation/deltas/simonverse/pearsons_heatmap.png b/correlation/deltas/simonverse/pearsons_heatmap.png new file mode 100644 index 0000000..6c66f72 --- /dev/null +++ b/correlation/deltas/simonverse/pearsons_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3126baa7030726664da78207df6fed885f6ea1a8af4a140fa18a2962a8cc06b2 +size 246958 diff --git a/correlation/deltas/simonverse/spearman_heatmap.png b/correlation/deltas/simonverse/spearman_heatmap.png new file mode 100644 index 0000000..60488a3 --- /dev/null +++ b/correlation/deltas/simonverse/spearman_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f09321d0382de30e652f53a77787612c5251ab40650150a2c4bc522596edbf9f +size 246700 diff --git a/correlation/deltas/song_of_achilles/pearsons_heatmap.png b/correlation/deltas/song_of_achilles/pearsons_heatmap.png new file mode 100644 index 0000000..311243d --- /dev/null +++ b/correlation/deltas/song_of_achilles/pearsons_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18fd7361dd03f397eab90f44139fb1e058e657b6a5ac1896b78d3d1d9bb5d759 +size 239335 diff --git a/correlation/deltas/song_of_achilles/spearman_heatmap.png b/correlation/deltas/song_of_achilles/spearman_heatmap.png new file mode 100644 index 0000000..a695497 --- /dev/null +++ b/correlation/deltas/song_of_achilles/spearman_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c6cf3f17826e212a4ba4b12c8d87336628cd1957d4c44fe9aa6391c60a04d3c +size 245583 diff --git a/correlation/deltas/throne_of_glass/pearsons_heatmap.png b/correlation/deltas/throne_of_glass/pearsons_heatmap.png new file mode 100644 index 0000000..76776cd --- /dev/null +++ b/correlation/deltas/throne_of_glass/pearsons_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:264e33d2ef40d085376ca996bae3c6ad522098f459370d083d9f540ce2a53c97 +size 244173 diff --git a/correlation/deltas/throne_of_glass/spearman_heatmap.png b/correlation/deltas/throne_of_glass/spearman_heatmap.png new file mode 100644 index 0000000..1052633 --- /dev/null +++ b/correlation/deltas/throne_of_glass/spearman_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbb255b7739723b340d3a1272f74adaa347dc04dbb6654a78a04e156170f3be4 +size 255574 diff --git a/correlation/stylo_features/data_overview/pearsons_heatmap.png b/correlation/stylo_features/data_overview/pearsons_heatmap.png new file mode 100644 index 0000000..4e282d3 --- /dev/null +++ b/correlation/stylo_features/data_overview/pearsons_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d5655bd48b4b633303a51eb1f081f9c17e42344da74136f704486f9df204882 +size 99011 diff --git a/correlation/stylo_features/data_overview/spearman_heatmap.png b/correlation/stylo_features/data_overview/spearman_heatmap.png new file mode 100644 index 0000000..fc6aafd --- /dev/null +++ b/correlation/stylo_features/data_overview/spearman_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:652247a0b4d6ddcb1fefea65b2e8fe77471eb3ac1ece243d7f7104f94f758cc6 +size 105088 diff --git a/correlation/stylo_features/md_freq_dist/pearsons_heatmap.png b/correlation/stylo_features/md_freq_dist/pearsons_heatmap.png new file mode 100644 index 0000000..8b3e76f --- /dev/null +++ b/correlation/stylo_features/md_freq_dist/pearsons_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ac1696101d41010269b7cf946ab2e0707909056a0c79fd8c22575b83c267dea +size 112691 diff --git a/correlation/stylo_features/md_freq_dist/spearman_heatmap.png b/correlation/stylo_features/md_freq_dist/spearman_heatmap.png new file mode 100644 index 0000000..42e11d6 --- /dev/null +++ b/correlation/stylo_features/md_freq_dist/spearman_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:584320b4b371489242d8aae40eb7597c71a8c95b998c7ac7c1c24a599f46cf5c +size 135718 diff --git a/correlation/stylo_features/pronouns_dist/pearsons_heatmap.png b/correlation/stylo_features/pronouns_dist/pearsons_heatmap.png new file mode 100644 index 0000000..4ce41e4 --- /dev/null +++ b/correlation/stylo_features/pronouns_dist/pearsons_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f64409fb62448f52b1539a208469599d674e2ff2d8d35ae3245af7f97f57923 +size 108733 diff --git a/correlation/stylo_features/pronouns_dist/spearman_heatmap.png b/correlation/stylo_features/pronouns_dist/spearman_heatmap.png new file mode 100644 index 0000000..32f51bc --- /dev/null +++ b/correlation/stylo_features/pronouns_dist/spearman_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf6012b6ea1abceaa58357c328ab5929a8c06394fc2ae47490ba696ba91662d8 +size 118721 diff --git a/correlation/stylo_features/punct_tag_freq_dist/pearsons_heatmap.png b/correlation/stylo_features/punct_tag_freq_dist/pearsons_heatmap.png new file mode 100644 index 0000000..fde6aa4 --- /dev/null +++ b/correlation/stylo_features/punct_tag_freq_dist/pearsons_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c60ebb89d805d5ff3eef3c068d5f7d1378ccdf600f8010ff3c572c95aeea04d7 +size 113625 diff --git a/correlation/stylo_features/punct_tag_freq_dist/spearman_heatmap.png b/correlation/stylo_features/punct_tag_freq_dist/spearman_heatmap.png new file mode 100644 index 0000000..fbe3329 --- /dev/null +++ b/correlation/stylo_features/punct_tag_freq_dist/spearman_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb08692487ba97eb99cb50ad13a630d56bf64785c65169ccdf6fde6b94b338ca +size 126174 diff --git a/correlation/stylo_features/sent_len_dist/pearsons_heatmap.png b/correlation/stylo_features/sent_len_dist/pearsons_heatmap.png new file mode 100644 index 0000000..4b95715 --- /dev/null +++ b/correlation/stylo_features/sent_len_dist/pearsons_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d03d36472dc9704e57faffb62c2a8216e8de96b4cbe568e3830426af74198960 +size 119906 diff --git a/correlation/stylo_features/sent_len_dist/spearman_heatmap.png b/correlation/stylo_features/sent_len_dist/spearman_heatmap.png new file mode 100644 index 0000000..2b26ffe --- /dev/null +++ b/correlation/stylo_features/sent_len_dist/spearman_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbfdbc230ff373b508af9c21de067cfeff29ed3d8e7863b1efc2129bf68e5ea8 +size 207993 diff --git a/correlation/stylo_features/tag_freq_dist/pearsons_heatmap.png b/correlation/stylo_features/tag_freq_dist/pearsons_heatmap.png new file mode 100644 index 0000000..fc8b9a8 --- /dev/null +++ b/correlation/stylo_features/tag_freq_dist/pearsons_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da2f01ba93266b3ede2f728acfdbe57d1aa464752e58f24a8c719049bea21514 +size 127061 diff --git a/correlation/stylo_features/tag_freq_dist/spearman_heatmap.png b/correlation/stylo_features/tag_freq_dist/spearman_heatmap.png new file mode 100644 index 0000000..344bac5 --- /dev/null +++ b/correlation/stylo_features/tag_freq_dist/spearman_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:583c7d8d3968cb6f3f40de057be2e2b2e28c43ae4a04f5ff81085cae883a694f +size 130013 diff --git a/correlation/stylo_features/tk_len_dist/pearsons_heatmap.png b/correlation/stylo_features/tk_len_dist/pearsons_heatmap.png new file mode 100644 index 0000000..e1846f2 --- /dev/null +++ b/correlation/stylo_features/tk_len_dist/pearsons_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ac90534b62ec6d708b32295e688ed6b83abda33c3bab65346dcfad306e67aa2 +size 119416 diff --git a/correlation/stylo_features/tk_len_dist/spearman_heatmap.png b/correlation/stylo_features/tk_len_dist/spearman_heatmap.png new file mode 100644 index 0000000..cce9665 --- /dev/null +++ b/correlation/stylo_features/tk_len_dist/spearman_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ddf4083289838908132eb5a8dfc2d504fdbae8d19c48af8c83d04e8b507b96b +size 135861 diff --git a/correlation_adapted.py b/correlation_adapted.py new file mode 100644 index 0000000..e08ee7d --- /dev/null +++ b/correlation_adapted.py @@ -0,0 +1,149 @@ +import pandas as pd +import scipy.stats as stats +import seaborn as sns +import matplotlib.pyplot as plt +import os + + +types_of_feature = ["data_overview","sent_len_dist", "tk_len_dist","punct_tag_freq_dist", "md_freq_dist", "tag_freq_dist", "pronouns_dist"] #"data_overview", +fandoms = ['call_me_by_your_name', 'cosmere', 'divergent', 'grishaverse', 'maze_runner', 'murderbot', 'percy', 'red_white_royal_blue', 'school_for_good_and_evil', 'simonverse', 'song_of_achilles', 'throne_of_glass',] + + +# Function to test significance and return correlation and p-value +def test_significance(corr_func, x, y): + corr, p_value = corr_func(x, y) + return corr, p_value + +# Function to create a heatmap +def create_heatmap(df, title, save_path): + # Filter the dataframe for values >= 0.2 or <= -0.2 + #df = df.where((df >= 0.2) | (df <= -0.2)) + plt.figure(figsize=(30, 26)) + sns.heatmap(df, annot=True, cmap='coolwarm', vmin=-1, vmax=1, annot_kws={"size": 8}) + plt.title(title) + plt.tight_layout() + plt.savefig(save_path) + plt.close() + +# Loop through each fandom and process the data +for fandom in fandoms: + print(f"{fandom}") + df_spearman = pd.DataFrame(columns=types_of_feature) + df_pearsons = pd.DataFrame(columns=types_of_feature) + df_pvalues_pearson = pd.DataFrame(columns=types_of_feature) + df_pvalues_spearman = pd.DataFrame(columns=types_of_feature) + + for type_of_feature in types_of_feature: + sing_fanfic = pd.read_csv(f"data_overview/single_fic_deltas/{fandom}/{type_of_feature}.csv", index_col=0) + + pearsons_list = [] + spearman_list = [] + pvalues_pearson = [] + pvalues_spearman = [] + + for column in sing_fanfic.columns: + kudos = sing_fanfic.index + delta = sing_fanfic[column] + + # Pearson's correlation and p-value + corr, p_value = test_significance(stats.pearsonr, kudos, delta) + pearsons_list.append(corr) + pvalues_pearson.append(p_value) + + # Spearman's correlation and p-value + corr, p_value = test_significance(stats.spearmanr, kudos, delta) + spearman_list.append(corr) + pvalues_spearman.append(p_value) + + df_pearsons[type_of_feature] = pearsons_list + df_spearman[type_of_feature] = spearman_list + df_pvalues_pearson[type_of_feature] = pvalues_pearson + df_pvalues_spearman[type_of_feature] = pvalues_spearman + + df_pearsons.index = sing_fanfic.columns + df_spearman.index = sing_fanfic.columns + df_pvalues_pearson.index = sing_fanfic.columns + df_pvalues_spearman.index = sing_fanfic.columns + + df_pearsons = df_pearsons.T + df_spearman = df_spearman.T + df_pvalues_pearson = df_pvalues_pearson.T + df_pvalues_spearman = df_pvalues_spearman.T + + # Highlight significant values + significant_pearson = df_pearsons.where(df_pvalues_pearson < 0.01) + significant_spearman = df_spearman.where(df_pvalues_spearman < 0.01) + + data_path = f"correlation/deltas/{fandom}" + if not os.path.exists(data_path): + os.makedirs(data_path) + + # Save correlation data and visualizations + df_pearsons.to_csv(f"{data_path}/pearsons.csv") + df_spearman.to_csv(f"{data_path}/spearman.csv") + + create_heatmap(significant_pearson, f"Pearson's Correlation (Significant) - {fandom}", f"{data_path}/pearsons_heatmap.png") + create_heatmap(significant_spearman, f"Spearman's Correlation (Significant) - {fandom}", f"{data_path}/spearman_heatmap.png") + +# Note: The code assumes the presence of the required data files and directories. +# Actual file paths and data should be used instead of placeholders. +# stylo features + +for type_of_feature in types_of_feature: + pearsons_list = [] + spearman_list = [] + pvalues_pearson = [] + pvalues_spearman = [] + for fandom in fandoms: + print(f"{fandom}") + #sing_fanfic = pd.read_csv(f"data_overview/single_fic_deltas/{fandom}/{type_of_feature}.csv", index_col=0) + feature_fanfic = pd.read_csv(f"{fandom}/fanfiction_stylo_data/stylo_data/{type_of_feature}.csv", index_col=0) + feature_fanfic.fillna(0, inplace=True) + pearsons_dict= {} + spearman_dict = {} + pvalues_pearsons_dict = {} + pvalues_spearman_dict = {} + for column in feature_fanfic.columns: + kudos = feature_fanfic.index + delta = feature_fanfic[column] + + corr, p_value = test_significance(stats.pearsonr, kudos, delta) + pearsons_dict[column] = corr + pvalues_pearsons_dict[column] = p_value + + #print(f"\n{type_of_feature}") + #print('Pearsons correlation: %.3f' % corr) + + corr, p_value = test_significance(stats.spearmanr, kudos, delta) + spearman_dict[column] = corr + pvalues_spearman_dict[column] = p_value + #print(f"\n{type_of_feature}") + #print('Spearman correlation: %.3f' % corr) + pearsons_list.append(pearsons_dict) + spearman_list.append(spearman_dict) + pvalues_pearson.append(pvalues_pearsons_dict) + pvalues_spearman.append(pvalues_spearman_dict) + + df_pearsons = pd.DataFrame(pearsons_list) + df_spearman = pd.DataFrame(spearman_list) + df_pvalues_pearson = pd.DataFrame(pvalues_pearson) + df_pvalues_spearman = pd.DataFrame(pvalues_spearman) + + df_pearsons.index = fandoms + df_spearman.index = fandoms + df_pvalues_pearson.index = fandoms + df_pvalues_spearman.index = fandoms + + # Highlight significant values + significant_pearson = df_pearsons.where(df_pvalues_pearson < 0.01) + significant_spearman = df_spearman.where(df_pvalues_spearman < 0.01) + + + data_path = f"correlation/stylo_features/{type_of_feature}" + if os.path.exists(data_path) == False: os.makedirs(data_path) + + df_pearsons.to_csv(f"{data_path}/pearsons.csv") + df_spearman.to_csv(f"{data_path}/spearman.csv") + + create_heatmap(significant_pearson, f"Pearson's Correlation (Significant) - {fandom}", f"{data_path}/pearsons_heatmap.png") + create_heatmap(significant_spearman, f"Spearman's Correlation (Significant) - {fandom}", f"{data_path}/spearman_heatmap.png") diff --git a/correlation_with_minepy/call_me_by_your_name/data_overview.csv b/correlation_with_minepy/call_me_by_your_name/data_overview.csv new file mode 100644 index 0000000..55535b8 --- /dev/null +++ b/correlation_with_minepy/call_me_by_your_name/data_overview.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0547517f6bfb2adec85354700c481ec8769499c4bee4990db5d4f3ffb4b7b361 +size 2476 diff --git a/correlation_with_minepy/call_me_by_your_name/punct_tag_freq_dist.csv b/correlation_with_minepy/call_me_by_your_name/punct_tag_freq_dist.csv new file mode 100644 index 0000000..dd7855d --- /dev/null +++ b/correlation_with_minepy/call_me_by_your_name/punct_tag_freq_dist.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:762be919dbcf37ccbb9e106d1b79c5c2b363bae8af655c03a66e7b07d59baa9c +size 2466 diff --git a/correlation_with_minepy/call_me_by_your_name/sent_len_dist.csv b/correlation_with_minepy/call_me_by_your_name/sent_len_dist.csv new file mode 100644 index 0000000..020f40f --- /dev/null +++ b/correlation_with_minepy/call_me_by_your_name/sent_len_dist.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49bfb9a9a40f6e559278d0bb0ab91e67b4144a5bc3f88e76a787468446f41fcd +size 2455 diff --git a/correlation_with_minepy/call_me_by_your_name/tk_len_dist.csv b/correlation_with_minepy/call_me_by_your_name/tk_len_dist.csv new file mode 100644 index 0000000..daf7996 --- /dev/null +++ b/correlation_with_minepy/call_me_by_your_name/tk_len_dist.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1aefafd6e2296b13a8124fc3e76071cde5ce98193670be2692b75891cc2b7541 +size 2531 diff --git a/correlation_with_minepy/example_4.py b/correlation_with_minepy/example_4.py index 6db6c4f..2db4ec9 100644 --- a/correlation_with_minepy/example_4.py +++ b/correlation_with_minepy/example_4.py @@ -53,6 +53,7 @@ for fandom in fandoms: results[("index", col)] = {'MIC': mic, 'p-value': p_value} if os.path.exists(f"{fandom}") == False: os.makedirs(f"{fandom}") + results = pd.DataFrame(results) results.to_csv(f"{fandom}/{type_of_feature}.csv") diff --git a/correlation_with_minepy/minepy_6.txt b/correlation_with_minepy/minepy_6.txt index 19f88c1..e69de29 100644 --- a/correlation_with_minepy/minepy_6.txt +++ b/correlation_with_minepy/minepy_6.txt @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9fb19ae7c52c773bad7425fbd5147e28e4336dcfa3c049abdd5459db0e646938 -size 224 diff --git a/correlation_with_minepy/visualisation.py b/correlation_with_minepy/visualisation.py new file mode 100644 index 0000000..c5efd2b --- /dev/null +++ b/correlation_with_minepy/visualisation.py @@ -0,0 +1,30 @@ +import matplotlib.pyplot as plt +import seaborn as sns +import numpy as np +import pandas as pd +import os + +fandoms = ["call_me_by_your_name"] +#types_of_feature = ["data_overview","sent_len_dist", "tk_len_dist","punct_tag_freq_dist", "md_freq_dist", "tag_freq_dist", "pronouns_dist"] #"data_overview", +#fandoms = ['call_me_by_your_name', 'cosmere', 'divergent', 'grishaverse', 'maze_runner', 'murderbot', 'percy', 'red_white_royal_blue', 'school_for_good_and_evil', 'simonverse', 'song_of_achilles', 'throne_of_glass',] +types_of_feature = ["data_overview","sent_len_dist", "tk_len_dist","punct_tag_freq_dist"] #, "md_freq_dist", "tag_freq_dist", "pronouns_dist" + +for fandom in fandoms: + print(f"{fandom}") + for type_of_feature in types_of_feature: + print(type_of_feature) + data = pd.read_csv(f"{fandom}/{type_of_feature}.csv", index_col=0, header=2) + # Filter the data to include only columns with p-value <= 0.05 + significant_data = {col: mic for col, mic, p in zip(data.index, data["MIC"], data["p_value"]) if p <= 0.05} + + # Creating a DataFrame for the heatmap + df = pd.DataFrame([significant_data], index=["MIC"]) + + # Plotting the heatmap + plt.figure(figsize=(10, 2)) + sns.heatmap(df, annot=True, cmap="viridis", cbar=True) + plt.title("Heatmap of MIC Values with Statistical Significance (p <= 0.05)") + plt.xlabel("Book Series") + plt.ylabel("MIC") + plt.xticks(rotation=45) + plt.savefig(f"{fandom}/{type_of_feature}.png") diff --git a/submit_correlation.sh b/submit_correlation.sh index 88160b2..7db2486 100644 --- a/submit_correlation.sh +++ b/submit_correlation.sh @@ -14,5 +14,5 @@ #python3 stylo_sing.py source fanfic_venv/bin/activate -python3 correlation.py +python3 correlation_adapted.py deactivate \ No newline at end of file -- GitLab