From c995a9acf86c083a726a9f93e5187e72b654d74b Mon Sep 17 00:00:00 2001 From: mai <mai@cl.uni-heidelberg.de> Date: Sat, 11 Mar 2023 11:46:03 +0100 Subject: [PATCH] Move evaluation to utils/eval.py --- test.py | 23 ++++++++++++-------- utils/attack.py | 3 ++- utils/eval.py | 58 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 74 insertions(+), 10 deletions(-) create mode 100644 utils/eval.py diff --git a/test.py b/test.py index 3b778d3..d8b5427 100644 --- a/test.py +++ b/test.py @@ -1,9 +1,10 @@ import torch from transformers import AutoTokenizer, AutoModelForSequenceClassification -### from models.py from pretrained.models import * import json from nltk.tokenize.treebank import TreebankWordDetokenizer +from utils.eval import eval +from utils.attack import attack device = 'cuda' if torch.cuda.is_available() else 'cpu' @@ -43,17 +44,21 @@ for post in dataset(test_ids): # counter += 1 detokenized = TreebankWordDetokenizer().detokenize(post["post_tokens"]) + + probabilities = eval(detokenized, model, tokenizer) + print(f"Normal: {probabilities[0][0]}\nHatespeech: {probabilities[0][1]}\n\n") + # print(f"Normal: {probabilities[1][0]}\nHatespeech: {probabilities[1][1]}\n\n") # ATTACK HERE - batch = attack(detokenized) - - inputs = tokenizer(batch, return_tensors="pt", padding=True).to(device) - prediction_logits, _ = model(input_ids=inputs['input_ids'],attention_mask=inputs['attention_mask']) - softmax = torch.nn.Softmax(dim=1) - probs = softmax(prediction_logits) - print(f"Normal: {probs[0][0]}\nHatespeech: {probs[0][1]}\n\n") - print(f"Normal: {probs[1][0]}\nHatespeech: {probs[1][1]}\n\n") + # batch = attack(detokenized) + # inputs = tokenizer(detokenized, return_tensors="pt", padding=True).to(device) + # prediction_logits, _ = model(input_ids=inputs['input_ids'],attention_mask=inputs['attention_mask']) + # softmax = torch.nn.Softmax(dim=1) + # probs = softmax(prediction_logits) + # print(f"Normal: {probs[0][0]}\nHatespeech: {probs[0][1]}\n\n") + # print(f"Normal: {probs[1][0]}\nHatespeech: {probs[1][1]}\n\n") + # break # print("------------------") diff --git a/utils/attack.py b/utils/attack.py index 5b456b3..d6d1c12 100644 --- a/utils/attack.py +++ b/utils/attack.py @@ -1,4 +1,5 @@ import transformers -def attack(sentence, model): +def attack(sentence, model, tokenizer): + model = model.to(device) diff --git a/utils/eval.py b/utils/eval.py new file mode 100644 index 0000000..0a451f9 --- /dev/null +++ b/utils/eval.py @@ -0,0 +1,58 @@ +from typing import Union +import torch +# from transformers import AutoTokenizer, AutoModelForSequenceClassification +# from pretrained.models import * +# +# device = "cuda" if torch.cuda.is_available() else "cpu" +# +# tokenizer = AutoTokenizer.from_pretrained( +# "Hate-speech-CNERG/bert-base-uncased-hatexplain-rationale-two" +# ) +# model = Model_Rational_Label.from_pretrained( +# "Hate-speech-CNERG/bert-base-uncased-hatexplain-rationale-two" +# ) +# model = model.to(device) + +def eval(text, model, tokenizer): + """ + Get model's prediction on a text. + + Parameters + ---------- + text : Union[str, list] + Text to be classified. Either a single string or a list of strings + model : transformers.AutoModelForSequenceClassification + Trained HateXplain model + tokenizer : transformers.AutoTokenizer + Tokenizer from trained HateXplain model + + Returns + ------- + probabilities : torch.Tensor + If text is only one string, then get probabilities with + `probabilities[0][0]` for `normal` and + `probabilities[0][1]` for `hatespeech`. + If text is multiple strings in a list, then get probabilities with + `probabilities[i][0]` and `probabilities[i][1]`, respectively, where + `i` is the sample in the batch. + """ + device = 'cuda' if torch.cuda.is_available() else 'cpu' + model = model.to(device) + + inputs = tokenizer( + detokenized, + return_tensors="pt", + padding=True + ).to(device) + prediction_logits, _ = model( + input_ids=inputs['input_ids'], + attention_mask=inputs['attention_mask'] + ) + softmax = torch.nn.Softmax(dim=1) + probabilities = softmax(prediction_logits) + # print(f"Normal: {probabilities[0][0]}\nHatespeech: {probabilities[0][1]}\n\n") + + return probabilities + + + -- GitLab