diff --git a/data_overview/data_overview.csv b/data_overview/data_overview.csv
index a0ad901496093f7836d33592f2234140b2ffe140..f0ac6bf42ff85f72edac5e4052b79bf5ba3cbcac 100644
--- a/data_overview/data_overview.csv
+++ b/data_overview/data_overview.csv
@@ -1,3 +1,9 @@
-,mean_tokens,std_dev,type_token_ratio,mean_sent
-throne_of_glass_canon,4.20580153308561,9.820105672393566,0.4612289416846652,14.468550677890269
-grishaverse_canon,4.1116821403167725,9.42692548599567,0.4679412861136999,14.026379022147932
+,mean_tokens,std_dev_tokens,type_token_ratio,mean_sent,std_dev_sent
+throne_of_glass_canon,4.20580153308561,2.0348877670869365,0.4612289416846652,14.468550677890269,9.820105672393566
+grishaverse_canon,4.1116821403167725,2.1047643402022285,0.4679412861136999,14.026379022147932,9.42692548599567
+grishaverse_good_fics,4.128605681546294,2.12767094657917,0.44176648168701443,12.920361563144626,10.031898461069263
+grishaverse_bad_fics,4.192839204109023,2.1961898296996827,0.4488349209373214,13.098263374311202,10.83490565859641
+grishaverse_medium_fics,4.125989775260719,2.1266952539859654,0.4420552018160678,13.1788589173054,10.270865275375563
+throne_of_glass_good_fics,4.197038090427363,2.0907564170382065,0.4495104669887279,13.376067824328105,9.013067041149515
+throne_of_glass_bad_fics,4.123089252572971,2.075327500013793,0.43527116374871266,12.966996479535549,9.797982354809053
+throne_of_glass_medium_fics,4.123495735120379,2.072193436253281,0.4337096917417227,12.511614522473558,8.912865289012412
diff --git a/data_overview/data_overview.png b/data_overview/data_overview.png
new file mode 100644
index 0000000000000000000000000000000000000000..7afd43d04e64eb69d5dd14aae0c4c7c068325c93
Binary files /dev/null and b/data_overview/data_overview.png differ
diff --git a/data_overview/delta_scores_grouped_fanfics.png b/data_overview/delta_scores_grouped_fanfics.png
new file mode 100644
index 0000000000000000000000000000000000000000..30baa73931768e4ceeec826040235b05ab940ee2
Binary files /dev/null and b/data_overview/delta_scores_grouped_fanfics.png differ
diff --git a/data_overview/z_scores_all_data.png b/data_overview/z_scores_all_data.png
new file mode 100644
index 0000000000000000000000000000000000000000..ae6199c3058e67693bdb3c317463425ebd92652d
Binary files /dev/null and b/data_overview/z_scores_all_data.png differ
diff --git a/data_visualisation.py b/data_visualisation.py
new file mode 100644
index 0000000000000000000000000000000000000000..f6aea3fba502c664011e89ddb773e2d0160c1659
--- /dev/null
+++ b/data_visualisation.py
@@ -0,0 +1,44 @@
+import seaborn as sns
+import matplotlib.pyplot as plt
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns 
+from cycler import cycler
+import json
+import dataframe_image as dfi
+
+#make the plots a bit less ugly
+
+CB91_Blue = '#2CBDFE'
+CB91_Green = '#47DBCD'
+CB91_Pink = '#F3A0F2'
+CB91_Purple = '#9D2EC5'
+CB91_Violet = '#661D98'
+CB91_Amber = '#F5B14C'
+
+color_list = [CB91_Pink, CB91_Blue, CB91_Green, CB91_Amber,
+              CB91_Purple, CB91_Violet]
+plt.rcParams['axes.prop_cycle'] = plt.cycler(color=color_list)
+
+#some colour palette playing around
+
+cm = sns.cubehelix_palette(start=.5, rot=-.75, as_cmap=True)
+cm1 = sns.cubehelix_palette(start=.5, rot=-.5, as_cmap=True)
+cm2 = sns.cubehelix_palette(as_cmap=True)
+
+#read data
+
+data_overview = pd.DataFrame(pd.read_csv("data_overview/data_overview.csv"))
+
+# pairplot initial features -- kinda useless in our case, but hey
+
+"""
+data_pairplot = sns.pairplot(ceramics_motives)
+
+data_pairplot.savefig(r"project\pictures_general\data_pairplot.png")
+
+"""
+
+data_overview_styled = data_overview.style.background_gradient(cmap=cm)
+
+dfi.export(data_overview_styled, "data_overview/data_overview.png", table_conversion = "matplotlib")
\ No newline at end of file
diff --git a/delta_measure.py b/delta_measure.py
new file mode 100644
index 0000000000000000000000000000000000000000..4d2025d17eab82638fca47974432cea877a1eb80
--- /dev/null
+++ b/delta_measure.py
@@ -0,0 +1,62 @@
+import matplotlib.pyplot as plt
+import os
+from nltk.probability import FreqDist
+import pandas as pd
+import statistics
+import re
+import dataframe_image as dfi
+
+data_overview = pd.DataFrame(pd.read_csv("data_overview/data_overview.csv", index_col=0))
+
+"""
+data_overview = pd.DataFrame(
+    {"mean_tokens":mean_tokens, 
+     "std_dev_tokens":std_dev_tokens, 
+     "type_token_ratio":type_token_ratio, 
+     "mean_sent":mean_sent, 
+     "std_dev_tokens":std_dev_tokens}, 
+     index = index
+)
+    
+data_overview.to_csv(f"data_overview/data_overview.csv")
+"""
+z_score_provider = data_overview.drop(["grishaverse_bad_fics", "grishaverse_good_fics", "grishaverse_medium_fics"], axis=0)
+mean_std_dev_list = []
+mean_std_dev_list = [[columnName, columnData.mean(), columnData.std()] for columnName, columnData in z_score_provider.iteritems()]
+
+# Create a new DataFrame with the same column names and index labels as data_overview
+z_scores_all_data = pd.DataFrame(columns=data_overview.columns, index=data_overview.index)
+
+# Iterate over each cell in the data_overview DataFrame and write the corresponding z-score in the z_scores_all_data DataFrame
+for index, row in data_overview.iterrows():
+    for column in data_overview.columns:
+        mean, std_dev = [elem[1:] for elem in mean_std_dev_list if elem[0]==column][0]
+        cell_value = data_overview.loc[index, column]
+        z_score = (cell_value - mean) / std_dev
+        z_scores_all_data.loc[index, column] = z_score
+
+dfi.export(z_scores_all_data, "data_overview/z_scores_all_data.png", table_conversion = "matplotlib")
+
+print(z_scores_all_data)
+
+
+delta_scores_grouped_fanfics = pd.DataFrame(columns=["throne_of_glass_canon", "grishaverse_canon", "throne_of_glass_bad_fics", "throne_of_glass_good_fics", "throne_of_glass_medium_fics"], index=["grishaverse_bad_fics", "grishaverse_good_fics", "grishaverse_medium_fics"])
+
+for fic in ["grishaverse_bad_fics", "grishaverse_good_fics", "grishaverse_medium_fics"]:
+    delta_scores = []
+    for index, row in z_scores_all_data.iterrows():
+        if index not in ["grishaverse_bad_fics", "grishaverse_good_fics", "grishaverse_medium_fics"]:
+            for column in z_scores_all_data.columns:
+                delta_score = 0
+                delta_score += abs(row[column] - z_scores_all_data.loc[fic, column])
+                delta_score /= len(z_scores_all_data.columns)
+                delta_scores.append(delta_score)
+                delta_scores_grouped_fanfics.loc[fic, index] = delta_score
+    #delta_scores_grouped_fanfics.loc[fic, :] = delta_score 
+
+print(delta_scores)
+
+dfi.export(delta_scores_grouped_fanfics, "data_overview/delta_scores_grouped_fanfics.png", table_conversion = "matplotlib")
+
+print(delta_scores_grouped_fanfics)
+
diff --git a/fanfic_internal_metrics.py b/fanfic_internal_metrics.py
new file mode 100644
index 0000000000000000000000000000000000000000..63c06702123979c6c55a2984a4ecdc58ffd42c67
--- /dev/null
+++ b/fanfic_internal_metrics.py
@@ -0,0 +1,210 @@
+import os
+from nltk.tokenize import word_tokenize
+from nltk.tokenize import sent_tokenize
+import pandas as pd
+import statistics
+import re
+
+
+# you'll have to also download "punkt" from nltk
+
+# by subdiving the text into segments of 1000, it calculates the type token ratio for each segment and then averages over them
+# this ensures a comparability of the type token ratios for varying text sizes
+def standardised_type_token_ratio(tokens):
+    ttrs = []
+    segment_tokens = []
+    segment = 0
+    for token in tokens:
+        if segment < 1000:
+            segment_tokens.append(token)
+            segment += 1
+        elif segment == 1000:
+            types = set(segment_tokens)
+            ttr = len(types)/len(segment_tokens)
+            ttrs.append(ttr)
+            segment_tokens =[]
+            segment = 0
+    if len(ttrs) <= 1:
+        types = set(tokens)
+        std_ttr = len(types)/len(tokens)
+        print("Warning: Text was too short for segmentation!")
+    else:
+        std_ttr = statistics.mean(ttrs)
+    return std_ttr
+
+
+def tokenize_and_clean_text(text):
+
+    tokens = word_tokenize(text)
+    cleaned_tokens = ([token for token in tokens if any(c.isalpha() for c in token)])
+    short_clean_tokens = [] # when looking at the results, there were some strange token lengths, because somewhere in the data conversion hyphens
+    # had been added in the wrong places. I had the tokens with very large lengths printed and they had this format, e.g. "everywhere—assassin"
+    # and where counted, in this instance as 19 characters long but up to 45 characters long: "walking-as-fast-as-they-could-without-running"
+
+    for token in cleaned_tokens:
+        dehyphenated_token = []
+        letter_present = 0
+        dehyphenated = 0
+        second_word_in_compound = 0
+        for c in token:
+            if c.isalpha() == True:
+                dehyphenated_token.append(c)
+                letter_present = 1
+                if dehyphenated == 1:
+                    second_word_in_compound = 1
+            elif c.isalpha() == False and letter_present == 1: #here I am eliminating both dashes and hyphens, 
+                #bc it skews the word metric if red-blue is counted as a 9 character token, boosting the count of 
+                # high-character tokens significantly. all texts will be preprocessed the same way, so it shouldn't make a difference,
+                # relatively speaking 
+                dehyphenated_token_joined = ''.join(map(str, dehyphenated_token))
+                #print(dehyphenated_token_joined)
+                short_clean_tokens.append(dehyphenated_token_joined)
+                dehyphenated_token = []
+                letter_present = 0
+                dehyphenated = 1
+                second_word_in_compound = 0
+        if letter_present == 1 and dehyphenated == 0:
+            short_clean_tokens.append(token) #catching the tokens that didn't have any special characters; but not the dehyphenated ones twice
+        elif letter_present == 1 and dehyphenated == 1 and second_word_in_compound == 1:
+            short_clean_tokens.append(''.join(map(str, dehyphenated_token)))
+    return short_clean_tokens
+
+
+
+# this function takes a corpus as its input and gives a Mendenhall curve, i.e. a frequency distribution of tokens as its output
+# precise input: corpus = string ; 
+# curve_title = string, the title of the plot that will be produced, e.g., "Mendenhall Curve for Throne of Glass Series"
+# plot_destination = string, the (relative) path, including the file name and .png tag of the plot produced, e.g. f"throne_of_glass/freq_distribution/all_canon_token_len.png" 
+      
+
+def mendenhall_curve(corpus): 
+    
+    short_clean_tokens = tokenize_and_clean_text(corpus)
+    
+    # create the distribution of token lengths / Mendenhall curve
+
+    token_lengths = [len(token) for token in short_clean_tokens]
+    
+    # calculate the standard deviation, mean, token/type ratio
+    standard_deviation = statistics.stdev(token_lengths)
+    mean = statistics.mean(token_lengths)
+
+    type_token_ratio = standardised_type_token_ratio(short_clean_tokens)
+
+    return standard_deviation, mean, type_token_ratio
+
+
+def sentence_metrics(corpus): 
+
+    sents = sent_tokenize(corpus)
+    sent_lens = []
+    for sent in sents:
+        short_clean_tokens = tokenize_and_clean_text(sent)
+        sent_lens.append(len(short_clean_tokens))
+        
+    # calculate the standard deviation, mean
+    standard_deviation_sent = statistics.stdev(sent_lens)
+    mean_sent = statistics.mean(sent_lens)
+
+    return standard_deviation_sent, mean_sent
+
+
+def run_functions(directory_path):
+    good_mean_tks = []
+    bad_mean_tks = []
+    medium_mean_tks = []
+    #idx = []
+    good_std_dev_tks = []
+    bad_std_dev_tks = []
+    medium_std_dev_tks = []
+    good_ttrs = []
+    bad_ttrs = []
+    medium_ttrs = []
+    good_mean_sts= []
+    bad_mean_sts= []
+    medium_mean_sts= []
+    good_std_dev_sts = []
+    bad_std_dev_sts = []
+    medium_std_dev_sts = []
+
+    few_kudos = 100
+    medium_kudos = 1500
+
+    for index, body in grisha_fanfics["body"]:
+        
+        published = pd.to_datetime(grisha_fanfics["published"][index])
+        if published.year != 2023:
+            if not pd.isna(grisha_fanfics["kudos"][index]):
+                kudos = pd.to_numeric(grisha_fanfics["kudos"][index], errors="coerce")
+
+                if kudos <= few_kudos:
+                    std_dev_tk, mean_tk, ttr = mendenhall_curve(body)
+                    std_dev_st, mean_st = sentence_metrics(body)
+                    bad_mean_tks.append(mean_tk)
+                    bad_std_dev_tks.append(std_dev_tk)
+                    bad_ttrs.append(ttr)
+                    bad_mean_tks.append(mean_tk)
+                    bad_std_dev_tks.append(std_dev_tk)
+                    bad_ttrs.append(ttr)
+
+                elif kudos <= medium_kudos:
+                    std_dev_tk, mean_tk, ttr = mendenhall_curve(body)
+                    std_dev_st, mean_st = sentence_metrics(body)
+                    medium_mean_tks.append(mean_tk)
+                    medium_std_dev_tks.append(std_dev_tk)
+                    medium_ttrs.append(ttr)
+                    medium_mean_sts.append(mean_st)
+                    medium_std_dev_sts.append(std_dev_st)
+                    
+                elif kudos > medium_kudos:
+                    std_dev_tk, mean_tk, ttr = mendenhall_curve(body)
+                    std_dev_st, mean_st = sentence_metrics(body)
+                    good_mean_tks.append(mean_tk)
+                    good_std_dev_tks.append(std_dev_tk)
+                    good_ttrs.append(ttr)
+                    good_mean_sts.append(mean_st)
+                    good_std_dev_sts.append(std_dev_st)
+                    
+            else:
+                print(f"Missing kudos value for row {index}")
+        
+
+    lists = []
+
+    for list in lists:
+
+
+        #idx.append(grisha_fanfics["work_id"][index])
+
+
+grisha_fanfics = pd.read_csv("grishaverse/data/fanfics/grishaverse_fics.csv")
+#grishaverse/data/split_txt_fanfics
+
+#create lists for each of the columns of the dataframe we'll create
+
+mean_tokens = []
+std_dev_tokens = []
+type_token_ratio = []
+mean_sent = []
+std_dev_tokens = []
+index = []
+
+
+# create a dataframe to store all the overview statistics in
+# columns mean_tokens; std_dev_tokens; freq_token_len_1; ...; freq_token_len_15; 
+# mean_sent; std_dev_sent; freq_sent_len ....
+# tag_frequencies 
+# tag_ngram_frequencies
+# punctuation frequencies
+# token/type ratio
+
+data_overview = pd.DataFrame(
+    {"mean_tokens":mean_tokens, 
+     "std_dev_tokens":std_dev_tokens, 
+     "type_token_ratio":type_token_ratio, 
+     "mean_sent":mean_sent, 
+     "std_dev_tokens":std_dev_tokens}, 
+     index = index
+)
+    
+data_overview.to_csv(f"data_overview/data_overview.csv")
diff --git a/grishaverse/freq_distribution/bad_fics_pos_tag_frequencies.png b/grishaverse/freq_distribution/bad_fics_pos_tag_frequencies.png
new file mode 100644
index 0000000000000000000000000000000000000000..c2a4ea88ddafa39f9d86eeeb6e233e4bc914b598
Binary files /dev/null and b/grishaverse/freq_distribution/bad_fics_pos_tag_frequencies.png differ
diff --git a/grishaverse/freq_distribution/bad_fics_punctuation_frequencies.png b/grishaverse/freq_distribution/bad_fics_punctuation_frequencies.png
new file mode 100644
index 0000000000000000000000000000000000000000..c83ed8c7f41fd6e8ba561879305169842b3c3fea
Binary files /dev/null and b/grishaverse/freq_distribution/bad_fics_punctuation_frequencies.png differ
diff --git a/grishaverse/freq_distribution/bad_fics_sent_len_long.png b/grishaverse/freq_distribution/bad_fics_sent_len_long.png
new file mode 100644
index 0000000000000000000000000000000000000000..8285f0a814d7a85b310fc5bc6b557d4d4794cea6
Binary files /dev/null and b/grishaverse/freq_distribution/bad_fics_sent_len_long.png differ
diff --git a/grishaverse/freq_distribution/bad_fics_sent_len_short.png b/grishaverse/freq_distribution/bad_fics_sent_len_short.png
new file mode 100644
index 0000000000000000000000000000000000000000..102829dd127f0e06360a1cbea7ba9723cd72f3f5
Binary files /dev/null and b/grishaverse/freq_distribution/bad_fics_sent_len_short.png differ
diff --git a/grishaverse/freq_distribution/bad_fics_token_len.png b/grishaverse/freq_distribution/bad_fics_token_len.png
new file mode 100644
index 0000000000000000000000000000000000000000..1113e11cea6d9ba912f92ae88ff5a362b0456e10
Binary files /dev/null and b/grishaverse/freq_distribution/bad_fics_token_len.png differ
diff --git a/grishaverse/freq_distribution/canon_pos_tag_frequencies.png b/grishaverse/freq_distribution/canon_pos_tag_frequencies.png
index 93757d925e02030acf521fdc17da18c43b7abef0..84913112f65745cab0feaca3c79c11b8cebd7a2e 100644
Binary files a/grishaverse/freq_distribution/canon_pos_tag_frequencies.png and b/grishaverse/freq_distribution/canon_pos_tag_frequencies.png differ
diff --git a/grishaverse/freq_distribution/canon_punctuation_frequencies.png b/grishaverse/freq_distribution/canon_punctuation_frequencies.png
index 8108cc4cc3b47a2862f6edeb7b43a9c004e739f2..484ee25c3f27ea480d0d87cec1ed4817e8510bf3 100644
Binary files a/grishaverse/freq_distribution/canon_punctuation_frequencies.png and b/grishaverse/freq_distribution/canon_punctuation_frequencies.png differ
diff --git a/grishaverse/freq_distribution/canon_sent_len_short.png b/grishaverse/freq_distribution/canon_sent_len_short.png
index cbb67f98f7dc2404e2b18a0e07b021a71835b83a..76e8634cdf6e3d8841af822e58e3706bd4d7ce01 100644
Binary files a/grishaverse/freq_distribution/canon_sent_len_short.png and b/grishaverse/freq_distribution/canon_sent_len_short.png differ
diff --git a/grishaverse/freq_distribution/good_fics_pos_tag_frequencies.png b/grishaverse/freq_distribution/good_fics_pos_tag_frequencies.png
new file mode 100644
index 0000000000000000000000000000000000000000..c350f9202ecd33d18665979c86baa413241cd6fa
Binary files /dev/null and b/grishaverse/freq_distribution/good_fics_pos_tag_frequencies.png differ
diff --git a/grishaverse/freq_distribution/good_fics_punctuation_frequencies.png b/grishaverse/freq_distribution/good_fics_punctuation_frequencies.png
new file mode 100644
index 0000000000000000000000000000000000000000..ca6cd5cf6d67c67ac94662a337acdbadaf3578a6
Binary files /dev/null and b/grishaverse/freq_distribution/good_fics_punctuation_frequencies.png differ
diff --git a/grishaverse/freq_distribution/good_fics_sent_len_long.png b/grishaverse/freq_distribution/good_fics_sent_len_long.png
new file mode 100644
index 0000000000000000000000000000000000000000..c56a52ca97d587d24f35b5a7f81b1b33369b751d
Binary files /dev/null and b/grishaverse/freq_distribution/good_fics_sent_len_long.png differ
diff --git a/grishaverse/freq_distribution/good_fics_sent_len_short.png b/grishaverse/freq_distribution/good_fics_sent_len_short.png
new file mode 100644
index 0000000000000000000000000000000000000000..ae1ae050d8f26fd47d49628ee0152c8686345552
Binary files /dev/null and b/grishaverse/freq_distribution/good_fics_sent_len_short.png differ
diff --git a/grishaverse/freq_distribution/good_fics_token_len.png b/grishaverse/freq_distribution/good_fics_token_len.png
new file mode 100644
index 0000000000000000000000000000000000000000..ffbf132cd1bb5b9437e3dc2c22db03423ffc5040
Binary files /dev/null and b/grishaverse/freq_distribution/good_fics_token_len.png differ
diff --git a/grishaverse/freq_distribution/medium_fics_pos_tag_frequencies.png b/grishaverse/freq_distribution/medium_fics_pos_tag_frequencies.png
new file mode 100644
index 0000000000000000000000000000000000000000..e58adf18f9a51792183a4f4b9cfac7bdb84684c0
Binary files /dev/null and b/grishaverse/freq_distribution/medium_fics_pos_tag_frequencies.png differ
diff --git a/grishaverse/freq_distribution/medium_fics_punctuation_frequencies.png b/grishaverse/freq_distribution/medium_fics_punctuation_frequencies.png
new file mode 100644
index 0000000000000000000000000000000000000000..9abb5a806f769022558570cb122929a0f75718ad
Binary files /dev/null and b/grishaverse/freq_distribution/medium_fics_punctuation_frequencies.png differ
diff --git a/grishaverse/freq_distribution/medium_fics_sent_len_long.png b/grishaverse/freq_distribution/medium_fics_sent_len_long.png
new file mode 100644
index 0000000000000000000000000000000000000000..82dcf105ccc198ddb33a5a4d7909ca97a64ac90e
Binary files /dev/null and b/grishaverse/freq_distribution/medium_fics_sent_len_long.png differ
diff --git a/grishaverse/freq_distribution/medium_fics_sent_len_short.png b/grishaverse/freq_distribution/medium_fics_sent_len_short.png
new file mode 100644
index 0000000000000000000000000000000000000000..6fe0d8a354f8b1eb3eb68692cbd6b930b46e4726
Binary files /dev/null and b/grishaverse/freq_distribution/medium_fics_sent_len_short.png differ
diff --git a/grishaverse/freq_distribution/medium_fics_token_len.png b/grishaverse/freq_distribution/medium_fics_token_len.png
new file mode 100644
index 0000000000000000000000000000000000000000..addf5151a6d48dd161926e032d12e3554136422e
Binary files /dev/null and b/grishaverse/freq_distribution/medium_fics_token_len.png differ
diff --git a/stylometry_code.py b/stylometry_code.py
index 13766fdf98ebc3418cfe6b8d23ce69998e96a66d..98088f1c813717a4eaa0c3475ee5ac26bd8b4d44 100644
--- a/stylometry_code.py
+++ b/stylometry_code.py
@@ -217,7 +217,7 @@ def sentence_metrics(corpus, curve_title, series, canon_or_fanfic):
     ax.set_ylabel("Percentage of Occurence")
     
     
-    sns.lineplot(x=new_sent_len_dist.index, y=new_sent_len_dist.values, ax=ax, palette="flare")
+    sns.lineplot(x=new_sent_len_dist.index, y=new_sent_len_dist.values, ax=ax, palette="crest")
     #plt.xticks(rotation=30) !!! very useful for words
     plt.savefig(f"{series}/freq_distribution/{canon_or_fanfic}_sent_len_long.png") # "throne_of_glass/freq_distribution/all_canon_sent_len.png"
 
@@ -246,7 +246,7 @@ def sentence_metrics(corpus, curve_title, series, canon_or_fanfic):
     ax.set_xlabel("Sentence Length")
     ax.set_ylabel("Percentage of Occurence")
     
-    sns.barplot(x=new_sent_len_dist_short.index, y=new_sent_len_dist_short.values, ax=ax, palette="flare")
+    sns.barplot(x=new_sent_len_dist_short.index, y=new_sent_len_dist_short.values, ax=ax, palette="YlGnBu")
     #plt.xticks(rotation=30) !!! very useful for words
     plt.savefig(f"{series}/freq_distribution/{canon_or_fanfic}_sent_len_short.png") # "throne_of_glass/freq_distribution/all_canon_sent_len.png"
     
@@ -362,11 +362,11 @@ def pos_tag_frequencies(corpus, series, canon_or_fanfic):
     # call function for bar (value) labels 
     addlabels(x=new_tag_freq_dist.index, y=new_tag_freq_dist.values)
 
-    plt.title(f"POS Tag Frequencies for the {series.title()} {canon_or_fanfic.replace('_' , ' ').title()}")
+    plt.title(f"POS Tag Frequencies for the {series.replace('_' , ' ').title()} {canon_or_fanfic.replace('_' , ' ').title()}")
     ax.set_xlabel("POS Tags")
     ax.set_ylabel("Percentage of Occurence")
     
-    sns.barplot(x=new_tag_freq_dist.index, y=new_tag_freq_dist.values, ax=ax, palette="flare")
+    sns.barplot(x=new_tag_freq_dist.index, y=new_tag_freq_dist.values, ax=ax, palette="RdPu")
     plt.xticks(rotation=30) # !!! very useful for words
     plt.savefig(f"{series}/freq_distribution/{canon_or_fanfic}_pos_tag_frequencies.png") # "throne_of_glass/freq_distribution/all_canon_sent_len.png"
     
@@ -398,11 +398,11 @@ def pos_tag_frequencies(corpus, series, canon_or_fanfic):
     addlabels(x=new_punct_tag_freq_dist.index, y=new_punct_tag_freq_dist.values)
 
     
-    plt.title(f"Punctuation Frequencies for the {series.title()} {canon_or_fanfic.replace('_' , ' ').title()}")
+    plt.title(f"Punctuation Frequencies for the {series.replace('_' , ' ').title()} {canon_or_fanfic.replace('_' , ' ').title()}")
     ax.set_xlabel("Types of Punctuation")
     ax.set_ylabel("Percentage of Occurence")
     
-    sns.barplot(x=new_punct_tag_freq_dist.index, y=new_punct_tag_freq_dist.values, ax=ax, palette="flare")
+    sns.barplot(x=new_punct_tag_freq_dist.index, y=new_punct_tag_freq_dist.values, ax=ax, palette="OrRd")
     plt.xticks(rotation=30) # !!! very useful for words
     plt.savefig(f"{series}/freq_distribution/{canon_or_fanfic}_punctuation_frequencies.png") # "throne_of_glass/freq_distribution/all_canon_sent_len.png"
     
@@ -414,9 +414,6 @@ std_dev_tokens_tog_canon, mean_tokens_tog_canon, type_token_ratio_tog_canon = me
 std_dev_tokens_grishaverse_canon, mean_tokens_grishaverse_canon, type_token_ratio_grishaverse_canon = mendenhall_curve(read_works_into_string(f"grishaverse/data/canon_works"), "Mendenhall Curve for the Grishaverse Books", f"grishaverse/freq_distribution/all_canon_token_len.png")
 
 
-
-
-
 # Mendenhall Curve Sentence Lengths for Throne of Glass Canon
 std_dev_sent_tog_canon, mean_sent_tog_canon = sentence_metrics(read_works_into_string(f"throne_of_glass/data/canon_works"), "Mendenhall Curve for Sentence Lenghts for the Throne of Glass Series", "throne_of_glass", "canon")
 
@@ -426,11 +423,54 @@ std_dev_sent_grishaverse_canon, mean_sent_grishaverse_canon = sentence_metrics(r
 # POS Tag frequencies for TOG
 pos_tag_frequencies(read_works_into_string(f"throne_of_glass/data/canon_works"), "throne_of_glass", "canon")
 
-
 # POS Tag frequencies for Grishaverse
 pos_tag_frequencies(read_works_into_string(f"grishaverse/data/canon_works"), "grishaverse", "canon")
 
+def run_functions(directory_path):
+    """
+    mean_tks = []
+    idx = []
+    std_dev_tks = []
+    ttrs = []
+    mean_sts= []
+    std_dev_sts = []
+
+    """
+
+    #for txt_fic in os.listdir(directory_path):
+    works = os.listdir(directory_path)
+    pattern = r"^[a-zA-Z_]+(?=/)" # get series from directory path
+    match = re.search(pattern, directory_path)
+    if match:
+        series = match.group(0)
+    for work in works:
+        with open(f"{directory_path}"+f"/{work}", "r") as f:
+            f = f.read()
+            std_dev_tk, mean_tk, ttr = mendenhall_curve(f, f"Mendenhall Curve for the {series.replace('_' , ' ').title()} {work[:-4].replace('_' , ' ').title()}", f"{series}/freq_distribution/{work[:-4]}_token_len.png")
+            mean_tokens.append(mean_tk)
+            std_dev_tokens.append(std_dev_tk)
+            type_token_ratio.append(ttr)
+            std_dev_st, mean_st = sentence_metrics(f, f"Mendenhall Curve for Sentence Lenghts for the {series.replace('_' , ' ').title()} {work[:-4].replace('_' , ' ').title()}", series, work[:-4])
+            mean_sent.append(mean_st)
+            std_dev_sents.append(std_dev_st)
+            pos_tag_frequencies(f, series, work[:-4])
+            index.append(f"{series}_{work[:-4]}")
+
+
+#grishaverse/data/split_txt_fanfics
+
+#create lists for each of the columns of the dataframe we'll create
+
+mean_tokens = [mean_tokens_tog_canon, mean_tokens_grishaverse_canon]
+std_dev_tokens = [std_dev_tokens_tog_canon, std_dev_tokens_grishaverse_canon]
+type_token_ratio = [type_token_ratio_tog_canon, type_token_ratio_grishaverse_canon]
+mean_sent = [mean_sent_tog_canon, mean_sent_grishaverse_canon]
+std_dev_sents = [std_dev_sent_tog_canon, std_dev_sent_grishaverse_canon]
+index = ["throne_of_glass_canon", "grishaverse_canon"]
+
 
+run_functions("grishaverse/data/split_txt_fanfics")
+run_functions("throne_of_glass/data/split_txt_fanfics")
 
 # create a dataframe to store all the overview statistics in
 # columns mean_tokens; std_dev_tokens; freq_token_len_1; ...; freq_token_len_15; 
@@ -441,12 +481,12 @@ pos_tag_frequencies(read_works_into_string(f"grishaverse/data/canon_works"), "gr
 # token/type ratio
 
 data_overview = pd.DataFrame(
-    {"mean_tokens":[mean_tokens_tog_canon, mean_tokens_grishaverse_canon], 
-     "std_dev":[std_dev_tokens_tog_canon, std_dev_tokens_grishaverse_canon], 
-     "type_token_ratio":[type_token_ratio_tog_canon, type_token_ratio_grishaverse_canon], 
-     "mean_sent":[mean_sent_tog_canon, mean_sent_grishaverse_canon], 
-     "std_dev":[std_dev_sent_tog_canon, std_dev_sent_grishaverse_canon]}, 
-     index= ["throne_of_glass_canon", "grishaverse_canon"]
-    )
-    
-data_overview.to_csv(f"data_overview/data_overview.csv")
\ No newline at end of file
+    {"mean_tokens":mean_tokens, 
+     "std_dev_tokens":std_dev_tokens, 
+     "type_token_ratio":type_token_ratio, 
+     "mean_sent":mean_sent, 
+     "std_dev_sent":std_dev_sents}, 
+     index = index
+)
+    
+data_overview.to_csv(f"data_overview/data_overview.csv")
diff --git a/throne_of_glass/freq_distribution/bad_fics_pos_tag_frequencies.png b/throne_of_glass/freq_distribution/bad_fics_pos_tag_frequencies.png
new file mode 100644
index 0000000000000000000000000000000000000000..27c4a9097c8700bcd110d548d09f0bf47166c40b
Binary files /dev/null and b/throne_of_glass/freq_distribution/bad_fics_pos_tag_frequencies.png differ
diff --git a/throne_of_glass/freq_distribution/bad_fics_punctuation_frequencies.png b/throne_of_glass/freq_distribution/bad_fics_punctuation_frequencies.png
new file mode 100644
index 0000000000000000000000000000000000000000..d3865e777fe5c154c3b99820cb859f504cd2f0e0
Binary files /dev/null and b/throne_of_glass/freq_distribution/bad_fics_punctuation_frequencies.png differ
diff --git a/throne_of_glass/freq_distribution/bad_fics_sent_len_long.png b/throne_of_glass/freq_distribution/bad_fics_sent_len_long.png
new file mode 100644
index 0000000000000000000000000000000000000000..585a3ecba71c530622f5b3951db70007e0c29296
Binary files /dev/null and b/throne_of_glass/freq_distribution/bad_fics_sent_len_long.png differ
diff --git a/throne_of_glass/freq_distribution/bad_fics_sent_len_short.png b/throne_of_glass/freq_distribution/bad_fics_sent_len_short.png
new file mode 100644
index 0000000000000000000000000000000000000000..a2736f970ad8feb6ec6415b4a6195e672d880b9e
Binary files /dev/null and b/throne_of_glass/freq_distribution/bad_fics_sent_len_short.png differ
diff --git a/throne_of_glass/freq_distribution/bad_fics_token_len.png b/throne_of_glass/freq_distribution/bad_fics_token_len.png
new file mode 100644
index 0000000000000000000000000000000000000000..e81815e73940035d2051c60853f9ce74ff1c314d
Binary files /dev/null and b/throne_of_glass/freq_distribution/bad_fics_token_len.png differ
diff --git a/throne_of_glass/freq_distribution/canon_pos_tag_frequencies.png b/throne_of_glass/freq_distribution/canon_pos_tag_frequencies.png
index 842cf88c60c96dd89c90efb5fe16a109aca22de2..7b86225fb37a323c544590df86db7499a4999e19 100644
Binary files a/throne_of_glass/freq_distribution/canon_pos_tag_frequencies.png and b/throne_of_glass/freq_distribution/canon_pos_tag_frequencies.png differ
diff --git a/throne_of_glass/freq_distribution/canon_punctuation_frequencies.png b/throne_of_glass/freq_distribution/canon_punctuation_frequencies.png
index d32daf8aa1f088945b4099b95ae25b6cd53aa392..b7d381889a3d045d39dfc0160e862a92db929945 100644
Binary files a/throne_of_glass/freq_distribution/canon_punctuation_frequencies.png and b/throne_of_glass/freq_distribution/canon_punctuation_frequencies.png differ
diff --git a/throne_of_glass/freq_distribution/canon_sent_len_short.png b/throne_of_glass/freq_distribution/canon_sent_len_short.png
index 5597cfd99b14064b6a0a4dc68dcf008dabca93ab..e12c7221f8f3ce3801fcaaf65ddb09e5299c55b8 100644
Binary files a/throne_of_glass/freq_distribution/canon_sent_len_short.png and b/throne_of_glass/freq_distribution/canon_sent_len_short.png differ
diff --git a/throne_of_glass/freq_distribution/good_fics_pos_tag_frequencies.png b/throne_of_glass/freq_distribution/good_fics_pos_tag_frequencies.png
new file mode 100644
index 0000000000000000000000000000000000000000..3b943bf0b23097869d9bb05fdc50fd54f72d51f3
Binary files /dev/null and b/throne_of_glass/freq_distribution/good_fics_pos_tag_frequencies.png differ
diff --git a/throne_of_glass/freq_distribution/good_fics_punctuation_frequencies.png b/throne_of_glass/freq_distribution/good_fics_punctuation_frequencies.png
new file mode 100644
index 0000000000000000000000000000000000000000..e2b07cbb6cdce2e52d95b8c4655ac6326d7b7c52
Binary files /dev/null and b/throne_of_glass/freq_distribution/good_fics_punctuation_frequencies.png differ
diff --git a/throne_of_glass/freq_distribution/good_fics_sent_len_long.png b/throne_of_glass/freq_distribution/good_fics_sent_len_long.png
new file mode 100644
index 0000000000000000000000000000000000000000..285a4ea14f106a59a1149325c26098561135fe94
Binary files /dev/null and b/throne_of_glass/freq_distribution/good_fics_sent_len_long.png differ
diff --git a/throne_of_glass/freq_distribution/good_fics_sent_len_short.png b/throne_of_glass/freq_distribution/good_fics_sent_len_short.png
new file mode 100644
index 0000000000000000000000000000000000000000..3b0223095fa33941ba91e8a707cc3da9f3cce767
Binary files /dev/null and b/throne_of_glass/freq_distribution/good_fics_sent_len_short.png differ
diff --git a/throne_of_glass/freq_distribution/good_fics_token_len.png b/throne_of_glass/freq_distribution/good_fics_token_len.png
new file mode 100644
index 0000000000000000000000000000000000000000..e76175ba06df630b82d53f56a82289663ddbeba3
Binary files /dev/null and b/throne_of_glass/freq_distribution/good_fics_token_len.png differ
diff --git a/throne_of_glass/freq_distribution/medium_fics_pos_tag_frequencies.png b/throne_of_glass/freq_distribution/medium_fics_pos_tag_frequencies.png
new file mode 100644
index 0000000000000000000000000000000000000000..6bf1eeb0f24d08963dc075264d40b067dfde7961
Binary files /dev/null and b/throne_of_glass/freq_distribution/medium_fics_pos_tag_frequencies.png differ
diff --git a/throne_of_glass/freq_distribution/medium_fics_punctuation_frequencies.png b/throne_of_glass/freq_distribution/medium_fics_punctuation_frequencies.png
new file mode 100644
index 0000000000000000000000000000000000000000..078d9f1539b0d7252d6aae8fb2d24f0cc9389fba
Binary files /dev/null and b/throne_of_glass/freq_distribution/medium_fics_punctuation_frequencies.png differ
diff --git a/throne_of_glass/freq_distribution/medium_fics_sent_len_long.png b/throne_of_glass/freq_distribution/medium_fics_sent_len_long.png
new file mode 100644
index 0000000000000000000000000000000000000000..b07f8f8f01c60acf419d6eb450fdc06f0caf5ef1
Binary files /dev/null and b/throne_of_glass/freq_distribution/medium_fics_sent_len_long.png differ
diff --git a/throne_of_glass/freq_distribution/medium_fics_sent_len_short.png b/throne_of_glass/freq_distribution/medium_fics_sent_len_short.png
new file mode 100644
index 0000000000000000000000000000000000000000..0643ee6929c32fa04cb689b6e4d32624ab054f8f
Binary files /dev/null and b/throne_of_glass/freq_distribution/medium_fics_sent_len_short.png differ
diff --git a/throne_of_glass/freq_distribution/medium_fics_token_len.png b/throne_of_glass/freq_distribution/medium_fics_token_len.png
new file mode 100644
index 0000000000000000000000000000000000000000..4a6f6afe23c99a1d2da8e545747b5e4a332d177a
Binary files /dev/null and b/throne_of_glass/freq_distribution/medium_fics_token_len.png differ