# curve_title = string, the title of the plot that will be produced, e.g., "Mendenhall Curve for Throne of Glass Series"
# curve_title = string, the title of the plot that will be produced, e.g., "Mendenhall Curve for Throne of Glass Series"
# plot_destination = string, the (relative) path, including the file name and .png tag of the plot produced, e.g. f"throne_of_glass/freq_distribution/all_canon_token_len.png"
# plot_destination = string, the (relative) path, including the file name and .png tag of the plot produced, e.g. f"throne_of_glass/freq_distribution/all_canon_token_len.png"
# by subdiving the text into segments of 1000, it calculates the type token ratio for each segment and then averages over them
# this ensures a comparability of the type token ratios for varying text sizes
defstandardised_type_token_ratio(tokens):
ttrs=[]
segment_tokens=[]
segment=0
fortokenintokens:
ifsegment<1000:
segment_tokens.append(token)
segment+=1
elifsegment==1000:
types=set(segment_tokens)
ttr=len(types)/len(segment_tokens)
ttrs.append(ttr)
segment_tokens=[]
segment=0
iflen(ttrs)<=1:
types=set(tokens)
std_ttr=len(types)/len(tokens)
print("Warning: Text was too short for segmentation!")
#create the Mendenhall Curve for the Throne of Glass Series
#create the Mendenhall Curve for the Throne of Glass Series
mendenhall_curve(read_works_into_string(f"throne_of_glass/data/canon_works"),"Mendenhall Curve for the Throne of Glass Series",f"throne_of_glass/freq_distribution/all_canon_token_len.png")
std_dev_tokens_tog_canon,mean_tokens_tog_canon,type_token_ratio_tog_canon=mendenhall_curve(read_works_into_string(f"throne_of_glass/data/canon_works"),"Mendenhall Curve for the Throne of Glass Series",f"throne_of_glass/freq_distribution/all_canon_token_len.png")
#create the Mendenhall Curve for the Grishaverse Books
#create the Mendenhall Curve for the Grishaverse Books
mendenhall_curve(read_works_into_string(f"grishaverse/data/canon_works"),"Mendenhall Curve for Grishaverse Books",f"grishaverse/freq_distribution/all_canon_token_len.png")
std_dev_tokens_grishaverse_canon,mean_tokens_grishaverse_canon,type_token_ratio_grishaverse_canon=mendenhall_curve(read_works_into_string(f"grishaverse/data/canon_works"),"Mendenhall Curve for Grishaverse Books",f"grishaverse/freq_distribution/all_canon_token_len.png")
# create a dataframe to store all the overview statistics in