diff --git a/correlation.py b/correlation.py index d9e3906b271f737e0f641368a3d900f78c7eaf0a..29d4fd86e204dc53c31e841a41a4851e46dd7d76 100644 --- a/correlation.py +++ b/correlation.py @@ -1,5 +1,6 @@ import seaborn as sns import pandas as pd +import scipy.stats as stats import matplotlib.pyplot as plt from scipy import stats import os diff --git a/correlation/deltas/call_me_by_your_name/pearsons_heatmap.png b/correlation/deltas/call_me_by_your_name/pearsons_heatmap.png new file mode 100644 index 0000000000000000000000000000000000000000..3bd7d339e1f7c668494629262b9371e452404923 --- /dev/null +++ b/correlation/deltas/call_me_by_your_name/pearsons_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4ab1dddf8fc8ee97f0f6562f51a2f959bc57fe89ef59c7062c2f0c8ca9ba75a +size 247203 diff --git a/correlation/deltas/call_me_by_your_name/spearman_heatmap.png b/correlation/deltas/call_me_by_your_name/spearman_heatmap.png new file mode 100644 index 0000000000000000000000000000000000000000..1eb9f70d68f876ab3b5932bcc25085bf77aab2b1 --- /dev/null +++ b/correlation/deltas/call_me_by_your_name/spearman_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0e1e039c57ba7b3746f68e357f891a9c05af33db7e08b7c5cb1fea69e58500d +size 259966 diff --git a/correlation/deltas/cosmere/pearsons_heatmap.png b/correlation/deltas/cosmere/pearsons_heatmap.png new file mode 100644 index 0000000000000000000000000000000000000000..26c5c7a969e663a6156e903d602f0c1e2f914f93 --- /dev/null +++ b/correlation/deltas/cosmere/pearsons_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1dcafc4eae6c5280a56b94326a201ef6d9641153e1bd819bbdaafe3dd4673b0d +size 230540 diff --git a/correlation/deltas/cosmere/spearman_heatmap.png b/correlation/deltas/cosmere/spearman_heatmap.png new file mode 100644 index 0000000000000000000000000000000000000000..c502415814a75b5f7809c8a87eaba84acd90aa05 --- /dev/null +++ b/correlation/deltas/cosmere/spearman_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ccb42a25fda6ee675ac175edef00c08735e5dad80b33aab1f8f48eb8840c957 +size 240427 diff --git a/correlation/deltas/divergent/pearsons_heatmap.png b/correlation/deltas/divergent/pearsons_heatmap.png new file mode 100644 index 0000000000000000000000000000000000000000..2c112efa525a3176beb256e548ae5cbd843398ec --- /dev/null +++ b/correlation/deltas/divergent/pearsons_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8850b536891da4ed66bd1523be42b2571117f900d2ce44f74c86d80f41151bf2 +size 236215 diff --git a/correlation/deltas/divergent/spearman_heatmap.png b/correlation/deltas/divergent/spearman_heatmap.png new file mode 100644 index 0000000000000000000000000000000000000000..179538b16837cd607fa415145356f3d47e2d481c --- /dev/null +++ b/correlation/deltas/divergent/spearman_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7f36ae2addde5872bd2c947f23fb1904a88ababdd95d59e8dc11ce4784661d8 +size 246794 diff --git a/correlation/deltas/grishaverse/pearsons_heatmap.png b/correlation/deltas/grishaverse/pearsons_heatmap.png new file mode 100644 index 0000000000000000000000000000000000000000..bfd2a6a758716a1fb1dad35bf7b1c716a97ad717 --- /dev/null +++ b/correlation/deltas/grishaverse/pearsons_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1630c2f9ca234066c0e53b59ae8fa0e0b5ace509e907c813e5fb466a817afc8d +size 248425 diff --git a/correlation/deltas/grishaverse/spearman_heatmap.png b/correlation/deltas/grishaverse/spearman_heatmap.png new file mode 100644 index 0000000000000000000000000000000000000000..d97b0f3c089c54efcc294492a9bc29ed403b3ec0 --- /dev/null +++ b/correlation/deltas/grishaverse/spearman_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6662eda04a07dcace59321c9d5d66e09b4f0196aca4f65f879dd095e4bed627c +size 264065 diff --git a/correlation/deltas/maze_runner/pearsons_heatmap.png b/correlation/deltas/maze_runner/pearsons_heatmap.png new file mode 100644 index 0000000000000000000000000000000000000000..4e22f2fb844c90c233898511b911fb207f361774 --- /dev/null +++ b/correlation/deltas/maze_runner/pearsons_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6c8faa952db2eb6acaa17bd2f28f19b501e72087d0d3e0970d0c10b24fd4730 +size 247222 diff --git a/correlation/deltas/maze_runner/spearman_heatmap.png b/correlation/deltas/maze_runner/spearman_heatmap.png new file mode 100644 index 0000000000000000000000000000000000000000..f489ec81a7102f766bdb09f386c79efc8f0304a9 --- /dev/null +++ b/correlation/deltas/maze_runner/spearman_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce49ee88dfacbfe9a88f269a4f22736dce8ea495cb798e49bce6403a4f9513a9 +size 258861 diff --git a/correlation/deltas/murderbot/pearsons_heatmap.png b/correlation/deltas/murderbot/pearsons_heatmap.png new file mode 100644 index 0000000000000000000000000000000000000000..d0e7952936b95f32fbe03a5ba188ef4c1ce1c322 --- /dev/null +++ b/correlation/deltas/murderbot/pearsons_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5096ba507c6c2ded111af3feb8c232a1e25f0dc4b96a623b59c6e2521fc6bd04 +size 223430 diff --git a/correlation/deltas/murderbot/spearman_heatmap.png b/correlation/deltas/murderbot/spearman_heatmap.png new file mode 100644 index 0000000000000000000000000000000000000000..a469aa9d32a2e60c1dd7585dc65eedc4e341307c --- /dev/null +++ b/correlation/deltas/murderbot/spearman_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7c66685a6818b08557c9dff722d80de7e881384239ac47709e0fdfc5b546f67 +size 239405 diff --git a/correlation/deltas/percy/pearsons_heatmap.png b/correlation/deltas/percy/pearsons_heatmap.png new file mode 100644 index 0000000000000000000000000000000000000000..a9cb436884ad0cfed0ff1ea13f958dd449ad6e79 --- /dev/null +++ b/correlation/deltas/percy/pearsons_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d101c7aaf30a18a6f58b2e842ad2889a1a993ec9fa12048f37a5e2ebc765736f +size 247136 diff --git a/correlation/deltas/percy/spearman_heatmap.png b/correlation/deltas/percy/spearman_heatmap.png new file mode 100644 index 0000000000000000000000000000000000000000..dfa2bb789be0e3c600dbf2d9e40e61ca5ea944d5 --- /dev/null +++ b/correlation/deltas/percy/spearman_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:387b81b2ed879d6304a04b047747b3936f1ca72909bef00b4c91ba7cbaf862ae +size 253978 diff --git a/correlation/deltas/red_white_royal_blue/pearsons_heatmap.png b/correlation/deltas/red_white_royal_blue/pearsons_heatmap.png new file mode 100644 index 0000000000000000000000000000000000000000..929288ec246d7fbd2994fd13feb7f9eeda998f5a --- /dev/null +++ b/correlation/deltas/red_white_royal_blue/pearsons_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d544084fbcce32c45d15d2d1c4e5514b57ef433dc27000f86323f385e827e9a +size 244692 diff --git a/correlation/deltas/red_white_royal_blue/spearman_heatmap.png b/correlation/deltas/red_white_royal_blue/spearman_heatmap.png new file mode 100644 index 0000000000000000000000000000000000000000..31354f8bf49bda25dc1369f2aa15d29d0952229a --- /dev/null +++ b/correlation/deltas/red_white_royal_blue/spearman_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fd0730a69dfd1f26845cea01151ef70ec72dd1e5f9d140caf8770e0b9513928 +size 266702 diff --git a/correlation/deltas/school_for_good_and_evil/pearsons_heatmap.png b/correlation/deltas/school_for_good_and_evil/pearsons_heatmap.png new file mode 100644 index 0000000000000000000000000000000000000000..25db4065bd941424bebe2e6ab0710aebb753e755 --- /dev/null +++ b/correlation/deltas/school_for_good_and_evil/pearsons_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2aa95239cf1f97255041fa04be8611aeda7e5d8fb1f738315db70c744047133e +size 231274 diff --git a/correlation/deltas/school_for_good_and_evil/spearman_heatmap.png b/correlation/deltas/school_for_good_and_evil/spearman_heatmap.png new file mode 100644 index 0000000000000000000000000000000000000000..e45649bf819cf5c0b6154b69cf425b66c95cd676 --- /dev/null +++ b/correlation/deltas/school_for_good_and_evil/spearman_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:800a0779c817d6c9d3c7e79f08e2fd5301b00cfb93c069fce4d439507115fc82 +size 241060 diff --git a/correlation/deltas/simonverse/pearsons_heatmap.png b/correlation/deltas/simonverse/pearsons_heatmap.png new file mode 100644 index 0000000000000000000000000000000000000000..6c66f727f1fa58b5e9f8a41d7520e4b011b05f7c --- /dev/null +++ b/correlation/deltas/simonverse/pearsons_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3126baa7030726664da78207df6fed885f6ea1a8af4a140fa18a2962a8cc06b2 +size 246958 diff --git a/correlation/deltas/simonverse/spearman_heatmap.png b/correlation/deltas/simonverse/spearman_heatmap.png new file mode 100644 index 0000000000000000000000000000000000000000..60488a37601d9acea6695e2df0df2861e8c71ab3 --- /dev/null +++ b/correlation/deltas/simonverse/spearman_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f09321d0382de30e652f53a77787612c5251ab40650150a2c4bc522596edbf9f +size 246700 diff --git a/correlation/deltas/song_of_achilles/pearsons_heatmap.png b/correlation/deltas/song_of_achilles/pearsons_heatmap.png new file mode 100644 index 0000000000000000000000000000000000000000..311243db6366d73dc9d4f359d3fba1ec8da00aa9 --- /dev/null +++ b/correlation/deltas/song_of_achilles/pearsons_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18fd7361dd03f397eab90f44139fb1e058e657b6a5ac1896b78d3d1d9bb5d759 +size 239335 diff --git a/correlation/deltas/song_of_achilles/spearman_heatmap.png b/correlation/deltas/song_of_achilles/spearman_heatmap.png new file mode 100644 index 0000000000000000000000000000000000000000..a695497bd5e8ddc999b7217b12c0f9c2384cb3a4 --- /dev/null +++ b/correlation/deltas/song_of_achilles/spearman_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c6cf3f17826e212a4ba4b12c8d87336628cd1957d4c44fe9aa6391c60a04d3c +size 245583 diff --git a/correlation/deltas/throne_of_glass/pearsons_heatmap.png b/correlation/deltas/throne_of_glass/pearsons_heatmap.png new file mode 100644 index 0000000000000000000000000000000000000000..76776cd537a074b05468a7e01e1ed13fb2dbb718 --- /dev/null +++ b/correlation/deltas/throne_of_glass/pearsons_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:264e33d2ef40d085376ca996bae3c6ad522098f459370d083d9f540ce2a53c97 +size 244173 diff --git a/correlation/deltas/throne_of_glass/spearman_heatmap.png b/correlation/deltas/throne_of_glass/spearman_heatmap.png new file mode 100644 index 0000000000000000000000000000000000000000..1052633bb353fabb9381822f9410021079e58079 --- /dev/null +++ b/correlation/deltas/throne_of_glass/spearman_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbb255b7739723b340d3a1272f74adaa347dc04dbb6654a78a04e156170f3be4 +size 255574 diff --git a/correlation/stylo_features/data_overview/pearsons_heatmap.png b/correlation/stylo_features/data_overview/pearsons_heatmap.png new file mode 100644 index 0000000000000000000000000000000000000000..4e282d3d5d279a0b3cfa77e0461ac34db24001d8 --- /dev/null +++ b/correlation/stylo_features/data_overview/pearsons_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d5655bd48b4b633303a51eb1f081f9c17e42344da74136f704486f9df204882 +size 99011 diff --git a/correlation/stylo_features/data_overview/spearman_heatmap.png b/correlation/stylo_features/data_overview/spearman_heatmap.png new file mode 100644 index 0000000000000000000000000000000000000000..fc6aafdfae56ef560a38b64be0af8adcac1a2c8d --- /dev/null +++ b/correlation/stylo_features/data_overview/spearman_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:652247a0b4d6ddcb1fefea65b2e8fe77471eb3ac1ece243d7f7104f94f758cc6 +size 105088 diff --git a/correlation/stylo_features/md_freq_dist/pearsons_heatmap.png b/correlation/stylo_features/md_freq_dist/pearsons_heatmap.png new file mode 100644 index 0000000000000000000000000000000000000000..8b3e76ffe58fbb0451a2e937682653e049ee05ab --- /dev/null +++ b/correlation/stylo_features/md_freq_dist/pearsons_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ac1696101d41010269b7cf946ab2e0707909056a0c79fd8c22575b83c267dea +size 112691 diff --git a/correlation/stylo_features/md_freq_dist/spearman_heatmap.png b/correlation/stylo_features/md_freq_dist/spearman_heatmap.png new file mode 100644 index 0000000000000000000000000000000000000000..42e11d66f16588246177a1b8e69ef7bc1a0ce7bb --- /dev/null +++ b/correlation/stylo_features/md_freq_dist/spearman_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:584320b4b371489242d8aae40eb7597c71a8c95b998c7ac7c1c24a599f46cf5c +size 135718 diff --git a/correlation/stylo_features/pronouns_dist/pearsons_heatmap.png b/correlation/stylo_features/pronouns_dist/pearsons_heatmap.png new file mode 100644 index 0000000000000000000000000000000000000000..4ce41e444c34d24ac2eb5fcb2af9c1df3bfaaeaa --- /dev/null +++ b/correlation/stylo_features/pronouns_dist/pearsons_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f64409fb62448f52b1539a208469599d674e2ff2d8d35ae3245af7f97f57923 +size 108733 diff --git a/correlation/stylo_features/pronouns_dist/spearman_heatmap.png b/correlation/stylo_features/pronouns_dist/spearman_heatmap.png new file mode 100644 index 0000000000000000000000000000000000000000..32f51bc2d3a69bcf9b15ed471662619e24aa23bc --- /dev/null +++ b/correlation/stylo_features/pronouns_dist/spearman_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf6012b6ea1abceaa58357c328ab5929a8c06394fc2ae47490ba696ba91662d8 +size 118721 diff --git a/correlation/stylo_features/punct_tag_freq_dist/pearsons_heatmap.png b/correlation/stylo_features/punct_tag_freq_dist/pearsons_heatmap.png new file mode 100644 index 0000000000000000000000000000000000000000..fde6aa4d6b7ef5040c9dcd950d26a35e0bfd6b8b --- /dev/null +++ b/correlation/stylo_features/punct_tag_freq_dist/pearsons_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c60ebb89d805d5ff3eef3c068d5f7d1378ccdf600f8010ff3c572c95aeea04d7 +size 113625 diff --git a/correlation/stylo_features/punct_tag_freq_dist/spearman_heatmap.png b/correlation/stylo_features/punct_tag_freq_dist/spearman_heatmap.png new file mode 100644 index 0000000000000000000000000000000000000000..fbe33298c5320eb36f926506c11e78720fcee915 --- /dev/null +++ b/correlation/stylo_features/punct_tag_freq_dist/spearman_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb08692487ba97eb99cb50ad13a630d56bf64785c65169ccdf6fde6b94b338ca +size 126174 diff --git a/correlation/stylo_features/sent_len_dist/pearsons_heatmap.png b/correlation/stylo_features/sent_len_dist/pearsons_heatmap.png new file mode 100644 index 0000000000000000000000000000000000000000..4b957159a68374bb40a4a724304b1674631e17cf --- /dev/null +++ b/correlation/stylo_features/sent_len_dist/pearsons_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d03d36472dc9704e57faffb62c2a8216e8de96b4cbe568e3830426af74198960 +size 119906 diff --git a/correlation/stylo_features/sent_len_dist/spearman_heatmap.png b/correlation/stylo_features/sent_len_dist/spearman_heatmap.png new file mode 100644 index 0000000000000000000000000000000000000000..2b26ffec21e360cbca888e5839d863a39d9a3a4c --- /dev/null +++ b/correlation/stylo_features/sent_len_dist/spearman_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbfdbc230ff373b508af9c21de067cfeff29ed3d8e7863b1efc2129bf68e5ea8 +size 207993 diff --git a/correlation/stylo_features/tag_freq_dist/pearsons_heatmap.png b/correlation/stylo_features/tag_freq_dist/pearsons_heatmap.png new file mode 100644 index 0000000000000000000000000000000000000000..fc8b9a8bd7fac323bce4ffad6b17f1f64504b9ff --- /dev/null +++ b/correlation/stylo_features/tag_freq_dist/pearsons_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da2f01ba93266b3ede2f728acfdbe57d1aa464752e58f24a8c719049bea21514 +size 127061 diff --git a/correlation/stylo_features/tag_freq_dist/spearman_heatmap.png b/correlation/stylo_features/tag_freq_dist/spearman_heatmap.png new file mode 100644 index 0000000000000000000000000000000000000000..344bac5bcecf8590909293242de036fa492d22a0 --- /dev/null +++ b/correlation/stylo_features/tag_freq_dist/spearman_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:583c7d8d3968cb6f3f40de057be2e2b2e28c43ae4a04f5ff81085cae883a694f +size 130013 diff --git a/correlation/stylo_features/tk_len_dist/pearsons_heatmap.png b/correlation/stylo_features/tk_len_dist/pearsons_heatmap.png new file mode 100644 index 0000000000000000000000000000000000000000..e1846f240336219403bafbdb83d1d0c15b80be27 --- /dev/null +++ b/correlation/stylo_features/tk_len_dist/pearsons_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ac90534b62ec6d708b32295e688ed6b83abda33c3bab65346dcfad306e67aa2 +size 119416 diff --git a/correlation/stylo_features/tk_len_dist/spearman_heatmap.png b/correlation/stylo_features/tk_len_dist/spearman_heatmap.png new file mode 100644 index 0000000000000000000000000000000000000000..cce9665784bcca70843f32b9210d595a5e9f7a12 --- /dev/null +++ b/correlation/stylo_features/tk_len_dist/spearman_heatmap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ddf4083289838908132eb5a8dfc2d504fdbae8d19c48af8c83d04e8b507b96b +size 135861 diff --git a/correlation_adapted.py b/correlation_adapted.py new file mode 100644 index 0000000000000000000000000000000000000000..e08ee7d395da793d9da612ecd1f687e112d1411d --- /dev/null +++ b/correlation_adapted.py @@ -0,0 +1,149 @@ +import pandas as pd +import scipy.stats as stats +import seaborn as sns +import matplotlib.pyplot as plt +import os + + +types_of_feature = ["data_overview","sent_len_dist", "tk_len_dist","punct_tag_freq_dist", "md_freq_dist", "tag_freq_dist", "pronouns_dist"] #"data_overview", +fandoms = ['call_me_by_your_name', 'cosmere', 'divergent', 'grishaverse', 'maze_runner', 'murderbot', 'percy', 'red_white_royal_blue', 'school_for_good_and_evil', 'simonverse', 'song_of_achilles', 'throne_of_glass',] + + +# Function to test significance and return correlation and p-value +def test_significance(corr_func, x, y): + corr, p_value = corr_func(x, y) + return corr, p_value + +# Function to create a heatmap +def create_heatmap(df, title, save_path): + # Filter the dataframe for values >= 0.2 or <= -0.2 + #df = df.where((df >= 0.2) | (df <= -0.2)) + plt.figure(figsize=(30, 26)) + sns.heatmap(df, annot=True, cmap='coolwarm', vmin=-1, vmax=1, annot_kws={"size": 8}) + plt.title(title) + plt.tight_layout() + plt.savefig(save_path) + plt.close() + +# Loop through each fandom and process the data +for fandom in fandoms: + print(f"{fandom}") + df_spearman = pd.DataFrame(columns=types_of_feature) + df_pearsons = pd.DataFrame(columns=types_of_feature) + df_pvalues_pearson = pd.DataFrame(columns=types_of_feature) + df_pvalues_spearman = pd.DataFrame(columns=types_of_feature) + + for type_of_feature in types_of_feature: + sing_fanfic = pd.read_csv(f"data_overview/single_fic_deltas/{fandom}/{type_of_feature}.csv", index_col=0) + + pearsons_list = [] + spearman_list = [] + pvalues_pearson = [] + pvalues_spearman = [] + + for column in sing_fanfic.columns: + kudos = sing_fanfic.index + delta = sing_fanfic[column] + + # Pearson's correlation and p-value + corr, p_value = test_significance(stats.pearsonr, kudos, delta) + pearsons_list.append(corr) + pvalues_pearson.append(p_value) + + # Spearman's correlation and p-value + corr, p_value = test_significance(stats.spearmanr, kudos, delta) + spearman_list.append(corr) + pvalues_spearman.append(p_value) + + df_pearsons[type_of_feature] = pearsons_list + df_spearman[type_of_feature] = spearman_list + df_pvalues_pearson[type_of_feature] = pvalues_pearson + df_pvalues_spearman[type_of_feature] = pvalues_spearman + + df_pearsons.index = sing_fanfic.columns + df_spearman.index = sing_fanfic.columns + df_pvalues_pearson.index = sing_fanfic.columns + df_pvalues_spearman.index = sing_fanfic.columns + + df_pearsons = df_pearsons.T + df_spearman = df_spearman.T + df_pvalues_pearson = df_pvalues_pearson.T + df_pvalues_spearman = df_pvalues_spearman.T + + # Highlight significant values + significant_pearson = df_pearsons.where(df_pvalues_pearson < 0.01) + significant_spearman = df_spearman.where(df_pvalues_spearman < 0.01) + + data_path = f"correlation/deltas/{fandom}" + if not os.path.exists(data_path): + os.makedirs(data_path) + + # Save correlation data and visualizations + df_pearsons.to_csv(f"{data_path}/pearsons.csv") + df_spearman.to_csv(f"{data_path}/spearman.csv") + + create_heatmap(significant_pearson, f"Pearson's Correlation (Significant) - {fandom}", f"{data_path}/pearsons_heatmap.png") + create_heatmap(significant_spearman, f"Spearman's Correlation (Significant) - {fandom}", f"{data_path}/spearman_heatmap.png") + +# Note: The code assumes the presence of the required data files and directories. +# Actual file paths and data should be used instead of placeholders. +# stylo features + +for type_of_feature in types_of_feature: + pearsons_list = [] + spearman_list = [] + pvalues_pearson = [] + pvalues_spearman = [] + for fandom in fandoms: + print(f"{fandom}") + #sing_fanfic = pd.read_csv(f"data_overview/single_fic_deltas/{fandom}/{type_of_feature}.csv", index_col=0) + feature_fanfic = pd.read_csv(f"{fandom}/fanfiction_stylo_data/stylo_data/{type_of_feature}.csv", index_col=0) + feature_fanfic.fillna(0, inplace=True) + pearsons_dict= {} + spearman_dict = {} + pvalues_pearsons_dict = {} + pvalues_spearman_dict = {} + for column in feature_fanfic.columns: + kudos = feature_fanfic.index + delta = feature_fanfic[column] + + corr, p_value = test_significance(stats.pearsonr, kudos, delta) + pearsons_dict[column] = corr + pvalues_pearsons_dict[column] = p_value + + #print(f"\n{type_of_feature}") + #print('Pearsons correlation: %.3f' % corr) + + corr, p_value = test_significance(stats.spearmanr, kudos, delta) + spearman_dict[column] = corr + pvalues_spearman_dict[column] = p_value + #print(f"\n{type_of_feature}") + #print('Spearman correlation: %.3f' % corr) + pearsons_list.append(pearsons_dict) + spearman_list.append(spearman_dict) + pvalues_pearson.append(pvalues_pearsons_dict) + pvalues_spearman.append(pvalues_spearman_dict) + + df_pearsons = pd.DataFrame(pearsons_list) + df_spearman = pd.DataFrame(spearman_list) + df_pvalues_pearson = pd.DataFrame(pvalues_pearson) + df_pvalues_spearman = pd.DataFrame(pvalues_spearman) + + df_pearsons.index = fandoms + df_spearman.index = fandoms + df_pvalues_pearson.index = fandoms + df_pvalues_spearman.index = fandoms + + # Highlight significant values + significant_pearson = df_pearsons.where(df_pvalues_pearson < 0.01) + significant_spearman = df_spearman.where(df_pvalues_spearman < 0.01) + + + data_path = f"correlation/stylo_features/{type_of_feature}" + if os.path.exists(data_path) == False: os.makedirs(data_path) + + df_pearsons.to_csv(f"{data_path}/pearsons.csv") + df_spearman.to_csv(f"{data_path}/spearman.csv") + + create_heatmap(significant_pearson, f"Pearson's Correlation (Significant) - {fandom}", f"{data_path}/pearsons_heatmap.png") + create_heatmap(significant_spearman, f"Spearman's Correlation (Significant) - {fandom}", f"{data_path}/spearman_heatmap.png") diff --git a/correlation_with_minepy/call_me_by_your_name/data_overview.csv b/correlation_with_minepy/call_me_by_your_name/data_overview.csv new file mode 100644 index 0000000000000000000000000000000000000000..55535b8ee4de79483566b5e284648f360b44cff6 --- /dev/null +++ b/correlation_with_minepy/call_me_by_your_name/data_overview.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0547517f6bfb2adec85354700c481ec8769499c4bee4990db5d4f3ffb4b7b361 +size 2476 diff --git a/correlation_with_minepy/call_me_by_your_name/punct_tag_freq_dist.csv b/correlation_with_minepy/call_me_by_your_name/punct_tag_freq_dist.csv new file mode 100644 index 0000000000000000000000000000000000000000..dd7855d5f6c24b777158389a71757b11ee440872 --- /dev/null +++ b/correlation_with_minepy/call_me_by_your_name/punct_tag_freq_dist.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:762be919dbcf37ccbb9e106d1b79c5c2b363bae8af655c03a66e7b07d59baa9c +size 2466 diff --git a/correlation_with_minepy/call_me_by_your_name/sent_len_dist.csv b/correlation_with_minepy/call_me_by_your_name/sent_len_dist.csv new file mode 100644 index 0000000000000000000000000000000000000000..020f40fedb7c75be6c0c8fc3dbcd22cc9f8adaba --- /dev/null +++ b/correlation_with_minepy/call_me_by_your_name/sent_len_dist.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49bfb9a9a40f6e559278d0bb0ab91e67b4144a5bc3f88e76a787468446f41fcd +size 2455 diff --git a/correlation_with_minepy/call_me_by_your_name/tk_len_dist.csv b/correlation_with_minepy/call_me_by_your_name/tk_len_dist.csv new file mode 100644 index 0000000000000000000000000000000000000000..daf7996e673c7483b23669c1ccc6a202633a5320 --- /dev/null +++ b/correlation_with_minepy/call_me_by_your_name/tk_len_dist.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1aefafd6e2296b13a8124fc3e76071cde5ce98193670be2692b75891cc2b7541 +size 2531 diff --git a/correlation_with_minepy/example_4.py b/correlation_with_minepy/example_4.py index 6db6c4f4f89bda306efd1fdff8a0b6574e379f05..2db4ec9d15c7738df837155e9eb8a76b64d126e9 100644 --- a/correlation_with_minepy/example_4.py +++ b/correlation_with_minepy/example_4.py @@ -53,6 +53,7 @@ for fandom in fandoms: results[("index", col)] = {'MIC': mic, 'p-value': p_value} if os.path.exists(f"{fandom}") == False: os.makedirs(f"{fandom}") + results = pd.DataFrame(results) results.to_csv(f"{fandom}/{type_of_feature}.csv") diff --git a/correlation_with_minepy/minepy_6.txt b/correlation_with_minepy/minepy_6.txt index 19f88c1dbbd0b56167b12b1e2f3c27605cdab326..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 100644 --- a/correlation_with_minepy/minepy_6.txt +++ b/correlation_with_minepy/minepy_6.txt @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9fb19ae7c52c773bad7425fbd5147e28e4336dcfa3c049abdd5459db0e646938 -size 224 diff --git a/correlation_with_minepy/visualisation.py b/correlation_with_minepy/visualisation.py new file mode 100644 index 0000000000000000000000000000000000000000..c5efd2b2e665b2429b9518b91d3d0ab00e5e4619 --- /dev/null +++ b/correlation_with_minepy/visualisation.py @@ -0,0 +1,30 @@ +import matplotlib.pyplot as plt +import seaborn as sns +import numpy as np +import pandas as pd +import os + +fandoms = ["call_me_by_your_name"] +#types_of_feature = ["data_overview","sent_len_dist", "tk_len_dist","punct_tag_freq_dist", "md_freq_dist", "tag_freq_dist", "pronouns_dist"] #"data_overview", +#fandoms = ['call_me_by_your_name', 'cosmere', 'divergent', 'grishaverse', 'maze_runner', 'murderbot', 'percy', 'red_white_royal_blue', 'school_for_good_and_evil', 'simonverse', 'song_of_achilles', 'throne_of_glass',] +types_of_feature = ["data_overview","sent_len_dist", "tk_len_dist","punct_tag_freq_dist"] #, "md_freq_dist", "tag_freq_dist", "pronouns_dist" + +for fandom in fandoms: + print(f"{fandom}") + for type_of_feature in types_of_feature: + print(type_of_feature) + data = pd.read_csv(f"{fandom}/{type_of_feature}.csv", index_col=0, header=2) + # Filter the data to include only columns with p-value <= 0.05 + significant_data = {col: mic for col, mic, p in zip(data.index, data["MIC"], data["p_value"]) if p <= 0.05} + + # Creating a DataFrame for the heatmap + df = pd.DataFrame([significant_data], index=["MIC"]) + + # Plotting the heatmap + plt.figure(figsize=(10, 2)) + sns.heatmap(df, annot=True, cmap="viridis", cbar=True) + plt.title("Heatmap of MIC Values with Statistical Significance (p <= 0.05)") + plt.xlabel("Book Series") + plt.ylabel("MIC") + plt.xticks(rotation=45) + plt.savefig(f"{fandom}/{type_of_feature}.png") diff --git a/submit_correlation.sh b/submit_correlation.sh index 88160b22c8d045c1e48be0da70bcbbb49a3cab2d..7db2486528fa001cbe408fa5d7336bc3290dc079 100644 --- a/submit_correlation.sh +++ b/submit_correlation.sh @@ -14,5 +14,5 @@ #python3 stylo_sing.py source fanfic_venv/bin/activate -python3 correlation.py +python3 correlation_adapted.py deactivate \ No newline at end of file