Skip to content
Snippets Groups Projects
Commit acf7f897 authored by perov's avatar perov
Browse files

add a script to asses the texts used in the survey automatically

parent 976f42ac
No related branches found
No related tags found
No related merge requests found
import automatic_metrics as am
from pathlib import Path
import re
import copy
def extract_marked_text(file_path):
"""Uses "X" as a marker to find which lines/texts to extract. Passages without a X are ignored."""
with open(file_path, 'r', encoding='utf-8') as file:
lines = file.readlines()
final_poems = {}
poems = []
current_poem = []
collecting = False
idx = 0
for line in lines:
match = re.match(r"(\d+):.*\bX\s*$", line.strip())
if collecting and current_poem:
if current_poem and any(line.strip() for line in current_poem):
poems.append("\n".join(current_poem).strip())
current_poem = []
if match:
if collecting:
final_poems[idx] = ''.join(poems)
idx += 1
poems = []
collecting = True
elif re.match(r"\d+:", line.strip()):
if collecting:
final_poems[idx] = ''.join(poems)
idx += 1
poems = []
collecting = False
elif collecting:
current_poem.append(line.strip())
if collecting and current_poem:
poems.append("\n".join(current_poem).strip())
if collecting and current_poem:
final_poems[idx] = ''.join(poems)
return final_poems
def get_all_data_from_folder(foldername, datatype="txt"):
"""extracts all files from given folder for further processing"""
script_dir = Path(__file__).resolve().parent
data_dir = script_dir.parent / f"{foldername}"
files = list(data_dir.rglob(f"*.{datatype}"))
all_extracted_text = {}
for file in files:
relativ_file_location = file.relative_to(data_dir)
text = extract_marked_text(file)
all_extracted_text[str(relativ_file_location)] = text
return all_extracted_text
def calculate_scores_texts(text):
"""Calculates scores for given text"""
texts = copy.deepcopy(text)
evaluator = am.Compute_Metrics()
evaluated_texts = {}
for filename in texts:
for idx in texts[filename]:
text = texts[filename][idx]
calc_metrics = []
calc_metrics.append(evaluator.compute_fre(text))
calc_metrics.append(evaluator.compute_ttr(text))
calc_metrics.append(evaluator.compute_pmi(text))
calc_metrics.append(evaluator.compute_tfidf(text))
evaluated_texts[f"{filename}\\{idx}"] = calc_metrics
return evaluated_texts # {filename\idx: [fre, ttr, pmi, tfidf]}
class Calculate_Parameters(object):
""""automated procedure to calculate parameters"""
def __init__(self, metrics_ai, metrics_human, question_num):
# FRE, TTR, PMI, and TF-IDF are at index 0, 1, 2, and 3
self.ai_fre, self.ai_ttr, self.ai_pmi, self.ai_tfidf = metrics_ai[0], metrics_ai[1], metrics_ai[2], metrics_ai[3]
self.human_fre, self.human_ttr, self.human_pmi, self.human_tfidf = metrics_human[0], metrics_human[1], metrics_human[2], metrics_human[3]
self.question_num = question_num
def calculate_coherence(self):
score = 0
if abs(self.ai_fre - self.human_fre) >= 20:
if self.ai_fre > self.human_fre:
score += 1
else:
score -= 1
if abs(self.ai_pmi - self.human_pmi) >= 0.8:
if self.ai_pmi > self.human_pmi:
score += 2
else:
score -= 2
if abs(self.ai_tfidf - self.human_tfidf) >= 0.2:
if self.ai_tfidf > self.human_tfidf:
score += 1
elif self.ai_tfidf < self.human_tfidf:
score -= 1
if score > 0:
return "ai"
if score < 0:
return "human"
if score == 0:
return "equal"
def calculate_conciseness(self):
score = 0
if abs(self.ai_pmi - self.human_pmi) >= 1:
if self.ai_pmi > self.human_pmi:
score += 1
else:
score -= 1
if abs(self.ai_ttr - self.human_ttr) >= 0.1:
if self.ai_ttr < self.human_ttr:
score += 2
elif self.ai_ttr > self.human_ttr:
score -= 2
if score > 0:
return "ai"
if score < 0:
return "human"
if score == 0:
return "equal"
def calculate_creativity(self):
score = 0
if abs(self.ai_pmi - self.human_pmi) >= 1:
if self.ai_pmi < self.human_pmi:
score += 1
else:
score -= 1
if abs(self.ai_ttr - self.human_ttr) >= 0.1:
if self.ai_ttr > self.human_ttr:
score += 1
elif self.ai_ttr < self.human_ttr:
score -= 1
if abs(self.ai_fre - self.human_fre) >= 20:
if self.ai_fre < self.human_fre:
score += 1
else:
score -= 1
if score > 0:
return "ai"
if score < 0:
return "human"
if score == 0:
return "equal"
def calculate_clarity_of_concept(self):
score = 0
if abs(self.ai_pmi - self.human_pmi) >= 1:
if self.ai_pmi < self.human_pmi:
score += 1
else:
score -= 1
if abs(self.ai_ttr - self.human_ttr) >= 0.1:
if self.ai_ttr < self.human_ttr:
score += 1
else:
score -= 1
if abs(self.ai_fre - self.human_fre) >= 20:
if self.ai_fre < self.human_fre:
score += 1
else:
score -= 1
if abs(self.ai_tfidf - self.human_tfidf) >= 0.2:
if self.ai_tfidf > self.human_tfidf:
score += 1
elif self.ai_tfidf < self.human_tfidf:
score -= 1
if score > 0:
return "ai"
if score < 0:
return "human"
if score == 0:
return "equal"
def predict_human_ai(survey_assessment):
"""
Counts the times when the human text had better scores on the parameters and count the times when the
ai text had better scores on the parameters.
The outputed tag is the predicted tag. "Equal" means it couldn't decide.
"""
predicted_tags = {}
for question_num, rated_param in survey_assessment.items():
ai = 0
human = 0
keys_to_check = set(rated_param.keys())
if question_num <= 6:
keys_to_check.discard("clarity_of_concept")
elif 6 < question_num <= 12:
keys_to_check.discard("creativity")
elif 12 < question_num <= 18:
keys_to_check.discard("clarity_of_concept")
keys_to_check.discard("creativity")
for key in keys_to_check:
if rated_param[key] == "ai":
ai += 1
elif rated_param[key] == "human":
human += 1
# Determine result
if human > ai:
predicted_tags[question_num] = "human"
elif human < ai:
predicted_tags[question_num] = "ai"
else:
predicted_tags[question_num] = "equal"
return predicted_tags
if __name__ == '__main__':
survey_texts = get_all_data_from_folder("data", "txt")
evaluated_texts = calculate_scores_texts(survey_texts)
# I manually ordered the texts in the order used in the survey
survey_ai_texts = ['ai\\gpt2_poem.txt\\0', 'ai\\gpt2_poem.txt\\1', 'ai\\opt_poem.txt\\0', 'ai\\opt_poem.txt\\1', 'ai\\gpt4o_poem.txt\\0', 'ai\\gpt4o_poem.txt\\1',
'ai\\gpt4o_wiki.txt\\0', 'ai\\gpt4o_wiki.txt\\1', 'ai\\opt_wiki.txt\\0', 'ai\\opt_wiki.txt\\1', 'ai\\gpt2_wiki.txt\\0', 'ai\\gpt2_wiki.txt\\1',
'ai\\opt_sport.txt\\0', 'ai\\opt_sport.txt\\1', 'ai\\gpt4o_sports.txt\\0', 'ai\\gpt4o_sports.txt\\1', 'ai\\gpt2_sport.txt\\0', 'ai\\gpt2_sport.txt\\1'
]
survey_human_texts = ["human\\poetry.txt\\0", 'human\\poetry.txt\\1', 'human\\poetry.txt\\2', 'human\\poetry.txt\\3', 'human\\poetry.txt\\4', 'human\\poetry.txt\\5',
'human\\wiki.txt\\0', 'human\\wiki.txt\\1', 'human\\wiki.txt\\2', 'human\\wiki.txt\\3', 'human\\wiki.txt\\4', 'human\\wiki.txt\\5',
'human\\sport_bbc.txt\\0', 'human\\sport_bbc.txt\\1', 'human\\sport_bbc.txt\\2', 'human\\sport_bbc.txt\\3', 'human\\sport_bbc.txt\\4', 'human\\sport_bbc.txt\\5'
]
survey_groups = zip(survey_ai_texts, survey_human_texts)
# Rate parameters Coherence, Creativity, Conciseness, Clarity of Concepts between survey groups
survey_assessment = {}
for i, group in enumerate(survey_groups, start=1):
for idx, name in enumerate(group):
if idx < len(group) - 1:
metrics_ai = evaluated_texts[name]
metrics_human = evaluated_texts[group[idx + 1]]
evaluation_metrics = Calculate_Parameters(metrics_ai, metrics_human, i)
coherence_score = evaluation_metrics.calculate_coherence()
conciseness_score = evaluation_metrics.calculate_conciseness()
creativity_score = evaluation_metrics.calculate_creativity()
clarity_score = evaluation_metrics.calculate_clarity_of_concept()
survey_assessment[i] = {
"coherence": coherence_score,
"conciseness": conciseness_score,
"creativity": creativity_score,
"clarity_of_concept": clarity_score
}
# Automatically asses if text is human or ai genareted
result = predict_human_ai(survey_assessment)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment