defplot_distribution(x,y,plt_title,file_path_for_pic:str,x_label="Number of Kudos",y_label="Percentage of Occurence",palette="flare",plt_type="barplot",add_labels=True):
defplot_distribution(x,y,plt_title,file_path_for_pic:str,x_label="Number of Kudos",y_label="Percentage of Occurence",palette="flare",plt_type="barplot",add_labels=True,rotate_ticks=True):
plt.figure(figsize=(10,10))
plt.figure(figsize=(10,10))
plt.title(plt_title)
plt.title(plt_title)
...
@@ -133,45 +133,115 @@ def plot_distribution(x, y, plt_title, file_path_for_pic:str, x_label="Number of
...
@@ -133,45 +133,115 @@ def plot_distribution(x, y, plt_title, file_path_for_pic:str, x_label="Number of
new_punct_tag_freq_dist.iat[i]=round(new_punct_tag_freq_dist.iat[i]/len(punctuation_tags),3)#index-1 bc the index starts counting from zero, the word lengths not
pattern=r"^[a-zA-Z_]+(?=/)"# get series from directory path
match=re.search(pattern,directory_path)
ifmatch:
series=match.group(0)
forworkinworks:
withopen(f"{directory_path}"+f"/{work}","r")asf:
f=f.read()
std_dev_tk,mean_tk,ttr=mendenhall_curve(f,f"Mendenhall Curve for the {series.replace('_','').title()}{work[:-4].replace('_','').title()}",f"{series}/freq_distribution/{work[:-4]}_token_len.png")
plt_title=self.name+""+(self.quality+"")ifself.fanfictionelse""+"Fanfiction"ifself.fanfictionelse" Canon"+" Token Frequency Distribution"
plot_distribution(x=self.tk_len_dist,y=self.tk_len_dist,plt_title=plt_title,file_path_for_pic=file_path_for_pic,x_label="Token Length",y_label="Percentage of Occurence")
# this function takes a corpus as its input and gives a Mendenhall curve, i.e. a frequency distribution of tokens as its output
# precise input: corpus = string ;
# curve_title = string, the title of the plot that will be produced, e.g., "Mendenhall Curve for Throne of Glass Series"
# plot_destination = string, the (relative) path, including the file name and .png tag of the plot produced, e.g. f"throne_of_glass/freq_distribution/all_canon_token_len.png"
plt_title="POS Tag Frequencies for the "+self.name+""+(self.quality+"")ifself.fanfictionelse""+"Fanfiction"ifself.fanfictionelse" Canon"
new_token_len_dist.iat[i]=round(new_token_len_dist.iat[i]/len(short_clean_tokens),3)#index-1 bc the index starts counting from zero, the word lengths not
plot_distribution(x=self.tag_freq_dist,y=self.tag_freq_dist,plt_title=plt_title,file_path_for_pic=file_path_for_pic,x_label="POS Tags",y_label="Percentage of Occurence")
#if float(new_token_len_dist.iat[i]) == 0.00:
# new_token_len_dist.drop(index=i) # here it is used as the label, so we want the index, not index -1; bad work-around, I'm sorry
# plot using matplotlib and seaborn
defplot_punct_freq(self,file_path_for_pic):
plt_title="Punctuation Frequencies for the "+self.name+""+(self.quality+"")ifself.fanfictionelse""+"Fanfiction"ifself.fanfictionelse" Canon"
# set figure, ax into variables
plot_distribution(x=self.punct_tag_freq_dist,y=self.punct_tag_freq_dist,plt_title=plt_title,file_path_for_pic=file_path_for_pic,x_label="Types of Punctuation",y_label="Percentage of Occurence")
new_punct_tag_freq_dist.iat[i]=round(new_punct_tag_freq_dist.iat[i]/len(punctuation_tags),3)#index-1 bc the index starts counting from zero, the word lengths not
#create the Mendenhall Curve for the Throne of Glass Series
#create the Mendenhall Curve for the Throne of Glass Series
std_dev_tokens_tog_canon,mean_tokens_tog_canon,type_token_ratio_tog_canon=mendenhall_curve(read_works_into_string(f"throne_of_glass/data/canon_works"),"Mendenhall Curve for the Throne of Glass Series",f"throne_of_glass/freq_distribution/all_canon_token_len.png")
#std_dev_tokens_tog_canon, mean_tokens_tog_canon, type_token_ratio_tog_canon = mendenhall_curve(read_works_into_string(f"throne_of_glass/data/canon_works"), "Mendenhall Curve for the Throne of Glass Series", f"throne_of_glass/freq_distribution/all_canon_token_len.png")
#create the Mendenhall Curve for the Grishaverse Books
#create the Mendenhall Curve for the Grishaverse Books
std_dev_tokens_grishaverse_canon,mean_tokens_grishaverse_canon,type_token_ratio_grishaverse_canon=mendenhall_curve(read_works_into_string(f"grishaverse/data/canon_works"),"Mendenhall Curve for the Grishaverse Books",f"grishaverse/freq_distribution/all_canon_token_len.png")
#std_dev_tokens_grishaverse_canon, mean_tokens_grishaverse_canon, type_token_ratio_grishaverse_canon = mendenhall_curve(read_works_into_string(f"grishaverse/data/canon_works"), "Mendenhall Curve for the Grishaverse Books", f"grishaverse/freq_distribution/all_canon_token_len.png")
# Mendenhall Curve Sentence Lengths for Throne of Glass Canon
# Mendenhall Curve Sentence Lengths for Throne of Glass Canon
std_dev_sent_tog_canon,mean_sent_tog_canon=sentence_metrics(read_works_into_string(f"throne_of_glass/data/canon_works"),"Mendenhall Curve for Sentence Lenghts for the Throne of Glass Series","throne_of_glass","canon")
#std_dev_sent_tog_canon, mean_sent_tog_canon = sentence_metrics(read_works_into_string(f"throne_of_glass/data/canon_works"), "Mendenhall Curve for Sentence Lenghts for the Throne of Glass Series", "throne_of_glass", "canon")
# Mendenhall Curve Sentence Lenghts for Grishavers Canon
# Mendenhall Curve Sentence Lenghts for Grishavers Canon
std_dev_sent_grishaverse_canon,mean_sent_grishaverse_canon=sentence_metrics(read_works_into_string(f"grishaverse/data/canon_works"),"Mendenhall Curve for Sentence Lenghts for the Grishaverse Books","grishaverse","canon")
#std_dev_sent_grishaverse_canon, mean_sent_grishaverse_canon = sentence_metrics(read_works_into_string(f"grishaverse/data/canon_works"), "Mendenhall Curve for Sentence Lenghts for the Grishaverse Books", "grishaverse", "canon")