From f5226bf91b3ae2e9354d825512b1505abb957173 Mon Sep 17 00:00:00 2001 From: chrysanthopoulou <chrysanthopoulou@cl.uni-heidelberg.de> Date: Wed, 12 Apr 2023 23:47:49 +0200 Subject: [PATCH] Fix some trailing lines --- stylometry_code.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/stylometry_code.py b/stylometry_code.py index 16fdcaf..061eb54 100644 --- a/stylometry_code.py +++ b/stylometry_code.py @@ -74,10 +74,8 @@ def standardised_type_token_ratio(tokens): types = set(tokens) std_ttr = len(types)/len(tokens) print("Warning: Text was too short for segmentation!") - print(ttrs) else: std_ttr = statistics.mean(ttrs) - print(ttrs) return std_ttr @@ -151,19 +149,11 @@ def mendenhall_curve(corpus, curve_title, plot_destination): standard_deviation = statistics.stdev(token_lengths) mean = statistics.mean(token_lengths) - # to get the number of unique tokens, i.e., types, I'm converting - # my list to a set (and back). I could also transform it to a pandas - # series, and drop the duplicates, but: if it is stupid and it works - # it isn't stupid - - types_set = set(short_clean_tokens) - #type_token_ratio = len(types_set)/len(short_clean_tokens) type_token_ratio = standardised_type_token_ratio(short_clean_tokens) return standard_deviation, mean, type_token_ratio - #create the Mendenhall Curve for the Throne of Glass Series std_dev_tokens_tog_canon, mean_tokens_tog_canon, type_token_ratio_tog_canon = mendenhall_curve(read_works_into_string(f"throne_of_glass/data/canon_works"), "Mendenhall Curve for the Throne of Glass Series", f"throne_of_glass/freq_distribution/all_canon_token_len.png") -- GitLab