diff --git a/grishaverse/freq_distribution/all_canon_sent_len.png b/grishaverse/freq_distribution/all_canon_sent_len.png
new file mode 100644
index 0000000000000000000000000000000000000000..9d003763c78a0d013eb2a61b571b8a6f22c17c2c
Binary files /dev/null and b/grishaverse/freq_distribution/all_canon_sent_len.png differ
diff --git a/stylometry_code.py b/stylometry_code.py
index 061eb5462152a1b8a7faf47f7bcbd579013b45cc..30c2ec1f0e86b494c7e403d01927262ce040eee9 100644
--- a/stylometry_code.py
+++ b/stylometry_code.py
@@ -4,9 +4,11 @@ from cycler import cycler
 import os
 from nltk.tokenize import word_tokenize
 from nltk.probability import FreqDist
+from nltk.tokenize import sent_tokenize
 import pandas as pd
 import statistics
 
+
 # you'll have to also download "punkt" from nltk
 
 # code snippets for prettifying plots
@@ -49,10 +51,6 @@ def read_works_into_string(directory_path):
             strings.append(f.read())
     return "\n".join(strings)
 
-# this function takes a corpus as its input and gives a Mendenhall curve, i.e. a frequency distribution of tokens as its output
-# precise input: corpus = string ; 
-# curve_title = string, the title of the plot that will be produced, e.g., "Mendenhall Curve for Throne of Glass Series"
-# plot_destination = string, the (relative) path, including the file name and .png tag of the plot produced, e.g. f"throne_of_glass/freq_distribution/all_canon_token_len.png" 
 
 # by subdiving the text into segments of 1000, it calculates the type token ratio for each segment and then averages over them
 # this ensures a comparability of the type token ratios for varying text sizes
@@ -78,7 +76,12 @@ def standardised_type_token_ratio(tokens):
         std_ttr = statistics.mean(ttrs)
     return std_ttr
 
-            
+
+# this function takes a corpus as its input and gives a Mendenhall curve, i.e. a frequency distribution of tokens as its output
+# precise input: corpus = string ; 
+# curve_title = string, the title of the plot that will be produced, e.g., "Mendenhall Curve for Throne of Glass Series"
+# plot_destination = string, the (relative) path, including the file name and .png tag of the plot produced, e.g. f"throne_of_glass/freq_distribution/all_canon_token_len.png" 
+      
 
 def mendenhall_curve(corpus, curve_title, plot_destination): 
     tokens = word_tokenize(corpus)
@@ -154,11 +157,88 @@ def mendenhall_curve(corpus, curve_title, plot_destination):
     return standard_deviation, mean, type_token_ratio
 
 
+def sentence_metrics(corpus, curve_title, plot_destination): 
+    sents = sent_tokenize(corpus)
+    
+    sent_lens = []
+    for sent in sents:
+        tokens = word_tokenize(sent)
+        
+        #cleaned_tokens = ([token for token in tokens if any(c.isalpha() for c in token)])
+        """
+        short_clean_tokens = [] # when looking at the results, there were some strange token lengths, because somewhere in the data conversion hyphens
+        # had been added in the wrong places. I had the tokens with very large lengths printed and they had this format, e.g. "everywhereâ€”assassin"
+        # and where counted, in this instance as 19 characters long but up to 45 characters long: "walking-as-fast-as-they-could-without-running"
+        for token in cleaned_tokens:
+            dehyphenated_token = []
+            letter_present = 0
+            for c in token:
+                if c.isalpha() == True:
+                    dehyphenated_token.append(c)
+                    letter_present = 1
+                elif c.isalpha() == False and letter_present == 1: #here I am eliminating both dashes and hyphens, 
+                    #bc it skews the word metric if red-blue is counted as a 9 character token, boosting the count of 
+                    # high-character tokens significantly. all texts will be preprocessed the same way, so it shouldn't make a difference,
+                    # relatively speaking 
+                    dehyphenated_token_joined = ''.join(map(str, dehyphenated_token))
+                    #print(dehyphenated_token_joined)
+                    short_clean_tokens.append(dehyphenated_token_joined)
+                    dehyphenated_token = []
+                    letter_present = 0
+        """
+        sent_lens.append(len(tokens))
+
+
+    sent_len_dist = FreqDist(sent_lens).most_common(50)
+    
+    # convert to FreqDist object to a pandas series for easier processing
+    sent_len_dist_panda = pd.Series(dict(sent_len_dist))
+    
+    # sort, normalise and round the panda series
+
+    new_sent_len_dist = sent_len_dist_panda.sort_index()
+    print(new_sent_len_dist)
+    
+    for i in range(0, len(new_sent_len_dist.index)):
+    #for index in new_token_len_dist.index:
+        new_sent_len_dist.iat[i] = round(new_sent_len_dist.iat[i]/len(sent_lens), 2) #index-1 bc the index starts counting from zero, the word lengths not
+    
+    # plot using matplotlib and seaborn 
+
+    # set figure, ax into variables
+    fig, ax = plt.subplots(figsize=(10,10))
+
+    # call function for bar (value) labels 
+    #addlabels(x=new_sent_len_dist.index, y=new_sent_len_dist.values)
+
+    plt.title(curve_title)
+    ax.set_xlabel("Sentence Length")
+    ax.set_ylabel("Percentage of Occurence")
+    
+    
+    sns.lineplot(x=new_sent_len_dist.index, y=new_sent_len_dist.values, ax=ax, palette="flare")
+    #plt.xticks(rotation=30) !!! very useful for words
+    plt.savefig(plot_destination)
+    
+    # calculate the standard deviation, mean, token/type ratio
+    standard_deviation_sent = statistics.stdev(sent_lens)
+    mean_sent = statistics.mean(sent_lens)
+
+    return standard_deviation_sent, mean_sent
+
+
 #create the Mendenhall Curve for the Throne of Glass Series
-std_dev_tokens_tog_canon, mean_tokens_tog_canon, type_token_ratio_tog_canon = mendenhall_curve(read_works_into_string(f"throne_of_glass/data/canon_works"), "Mendenhall Curve for the Throne of Glass Series", f"throne_of_glass/freq_distribution/all_canon_token_len.png")
+#std_dev_tokens_tog_canon, mean_tokens_tog_canon, type_token_ratio_tog_canon = mendenhall_curve(read_works_into_string(f"throne_of_glass/data/canon_works"), "Mendenhall Curve for the Throne of Glass Series", f"throne_of_glass/freq_distribution/all_canon_token_len.png")
 
 #create the Mendenhall Curve for the Grishaverse Books
-std_dev_tokens_grishaverse_canon, mean_tokens_grishaverse_canon, type_token_ratio_grishaverse_canon = mendenhall_curve(read_works_into_string(f"grishaverse/data/canon_works"), "Mendenhall Curve for Grishaverse Books", f"grishaverse/freq_distribution/all_canon_token_len.png")
+#std_dev_tokens_grishaverse_canon, mean_tokens_grishaverse_canon, type_token_ratio_grishaverse_canon = mendenhall_curve(read_works_into_string(f"grishaverse/data/canon_works"), "Mendenhall Curve for the Grishaverse Books", f"grishaverse/freq_distribution/all_canon_token_len.png")
+
+# Mendenhall Curve Sentence Lengths for Throne of Glass Canon
+std_dev_sent_tog_canon, mean_sent_tog_canon = sentence_metrics(read_works_into_string(f"throne_of_glass/data/canon_works"), "Mendenhall Curve for Sentence Lenghts for the Throne of Glass Series", f"throne_of_glass/freq_distribution/all_canon_sent_len.png")
+
+# Mendenhall Curve Sentence Lenghts for Grishavers Canon
+std_dev_sent_grishaverse_canon, mean_sent_grishaverse_canon = sentence_metrics(read_works_into_string(f"grishaverse/data/canon_works"), "Mendenhall Curve for Sentence Lenghts for the Grishaverse Books", f"grishaverse/freq_distribution/all_canon_sent_len.png")
+
 
 # create a dataframe to store all the overview statistics in
 # columns mean_tokens; std_dev_tokens; freq_token_len_1; ...; freq_token_len_15; 
@@ -167,5 +247,14 @@ std_dev_tokens_grishaverse_canon, mean_tokens_grishaverse_canon, type_token_rati
 # tag_ngram_frequencies
 # punctuation frequencies
 # token/type ratio
-data_overview = pd.DataFrame({"mean_tokens":[mean_tokens_tog_canon, mean_tokens_grishaverse_canon], "std_dev":[std_dev_tokens_tog_canon, std_dev_tokens_grishaverse_canon], "type_token_ratio":[type_token_ratio_tog_canon, type_token_ratio_grishaverse_canon]}, index= ["throne_of_glass_canon", "grishaverse_canon"])
-data_overview.to_csv(f"data_overview/data_overview.csv")
\ No newline at end of file
+"""
+data_overview = pd.DataFrame(
+    {"mean_tokens":[mean_tokens_tog_canon, mean_tokens_grishaverse_canon], 
+     "std_dev":[std_dev_tokens_tog_canon, std_dev_tokens_grishaverse_canon], 
+     "type_token_ratio":[type_token_ratio_tog_canon, type_token_ratio_grishaverse_canon], 
+     "mean_sent":[mean_sent_tog_canon, mean_sent_grishaverse_canon], 
+     "std_dev":[std_dev_sent_tog_canon, std_dev_sent_grishaverse_canon]}, 
+     index= ["throne_of_glass_canon", "grishaverse_canon"]
+    )
+    """
+#data_overview.to_csv(f"data_overview/data_overview.csv")
\ No newline at end of file
diff --git a/throne_of_glass/freq_distribution/all_canon_sent_len.png b/throne_of_glass/freq_distribution/all_canon_sent_len.png
new file mode 100644
index 0000000000000000000000000000000000000000..13c68cc270455f7f12eba53092f86b88804cf899
Binary files /dev/null and b/throne_of_glass/freq_distribution/all_canon_sent_len.png differ