diff --git a/src/asses_results.py b/src/asses_results.py index ff9cb1dcf3d4417c5b6a3c97dd287df5a1d1d6cb..1f5d548fd7eb2cee99c92499a5e3f24421edc67b 100644 --- a/src/asses_results.py +++ b/src/asses_results.py @@ -19,6 +19,11 @@ def get_all_data_from_folder(foldername, datatype="txt"): return answers def process_survey_data(headers, answers): + """ + Looks at the tags/questions and adds to every question every response of every participant. + In the end you get a dict with question names as keys and the values are the answers of every participant. + So the first participant will have the index 0 in every value. + """ survey_group_answers = {} for responses in answers: question_num = 0 @@ -137,7 +142,7 @@ class Proccess_Data(object): return above_avg_indicies, below_avg_indicies def compare_ai(self, correct_percentage): - gpt2, opt, gpt4o , gpt2_count, opt_count, gpt4o_count = 0, 0, 0, 0 ,0, 0 + gpt2, opt, gpt4o , gpt2_count, opt_count, gpt4o_count = 0, 0, 0, 0, 0, 0 for ai, correctness in zip(self.models, correct_percentage): if ai == "gpt2": gpt2 += correctness @@ -150,9 +155,30 @@ class Proccess_Data(object): gpt4o_count += 1 return {"gpt2": round(gpt2 / gpt2_count, 2), "opt": round(opt / opt_count, 2), "gpt4o": round(gpt4o / gpt4o_count, 2)} - def average_parameter(self): - """Looks at the parameters like coherence, conciseness, creawtivity and clarity of concept and calculates the average.""" - pass + def average_parameter(self, parameter, model_name): + """Looks at the parameters like coherence, conciseness, creativity and clarity of concept and calculates the average.""" + count, rate = 0, 0 + if model_name != "human": + for question_num, correct_label in enumerate(self.correct_labels): + for keys in self.survey_data.keys(): + if parameter in keys and correct_label in keys: + model = self.models[question_num] + if model == model_name: + for rating in self.survey_data[keys]: + if rating: + rate += int(rating) + count += 1 + if model_name == "human": + for question_num, correct_label in enumerate(self.correct_labels): + for keys in self.survey_data.keys(): + if parameter in keys and correct_label not in keys: + model = self.models[question_num] + for rating in self.survey_data[keys]: + if rating: + rate += int(rating) + count += 1 + + return round(rate / count, 2) if __name__ == "__main__": answers = get_all_data_from_folder("results", "csv") @@ -182,8 +208,14 @@ if __name__ == "__main__": #correct_percentage = [i[2] for i in total_correct.values()] # extracts average percentage of correct answers #model_results = evaluator.compare_ai(correct_percentage) - test = evaluator.average_parameter() - print(test) + parameters = ["Coherence", "Conciseness", "Creativity", "Clarity of Concept"] + models = ["gpt2", "gpt4o", "opt", "human"] + + avg_scores = { + param: {model: evaluator.average_parameter(param, model) for model in models} + for param in parameters + } +