Skip to content
Snippets Groups Projects
Commit c995a9ac authored by mai's avatar mai
Browse files

Move evaluation to utils/eval.py

parent 19a42a54
No related branches found
No related tags found
No related merge requests found
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
### from models.py
from pretrained.models import *
import json
from nltk.tokenize.treebank import TreebankWordDetokenizer
from utils.eval import eval
from utils.attack import attack
device = 'cuda' if torch.cuda.is_available() else 'cpu'
......@@ -43,17 +44,21 @@ for post in dataset(test_ids):
# counter += 1
detokenized = TreebankWordDetokenizer().detokenize(post["post_tokens"])
probabilities = eval(detokenized, model, tokenizer)
print(f"Normal: {probabilities[0][0]}\nHatespeech: {probabilities[0][1]}\n\n")
# print(f"Normal: {probabilities[1][0]}\nHatespeech: {probabilities[1][1]}\n\n")
# ATTACK HERE
batch = attack(detokenized)
inputs = tokenizer(batch, return_tensors="pt", padding=True).to(device)
prediction_logits, _ = model(input_ids=inputs['input_ids'],attention_mask=inputs['attention_mask'])
softmax = torch.nn.Softmax(dim=1)
probs = softmax(prediction_logits)
print(f"Normal: {probs[0][0]}\nHatespeech: {probs[0][1]}\n\n")
print(f"Normal: {probs[1][0]}\nHatespeech: {probs[1][1]}\n\n")
# batch = attack(detokenized)
# inputs = tokenizer(detokenized, return_tensors="pt", padding=True).to(device)
# prediction_logits, _ = model(input_ids=inputs['input_ids'],attention_mask=inputs['attention_mask'])
# softmax = torch.nn.Softmax(dim=1)
# probs = softmax(prediction_logits)
# print(f"Normal: {probs[0][0]}\nHatespeech: {probs[0][1]}\n\n")
# print(f"Normal: {probs[1][0]}\nHatespeech: {probs[1][1]}\n\n")
#
break
# print("------------------")
......
import transformers
def attack(sentence, model):
def attack(sentence, model, tokenizer):
model = model.to(device)
from typing import Union
import torch
# from transformers import AutoTokenizer, AutoModelForSequenceClassification
# from pretrained.models import *
#
# device = "cuda" if torch.cuda.is_available() else "cpu"
#
# tokenizer = AutoTokenizer.from_pretrained(
# "Hate-speech-CNERG/bert-base-uncased-hatexplain-rationale-two"
# )
# model = Model_Rational_Label.from_pretrained(
# "Hate-speech-CNERG/bert-base-uncased-hatexplain-rationale-two"
# )
# model = model.to(device)
def eval(text, model, tokenizer):
"""
Get model's prediction on a text.
Parameters
----------
text : Union[str, list]
Text to be classified. Either a single string or a list of strings
model : transformers.AutoModelForSequenceClassification
Trained HateXplain model
tokenizer : transformers.AutoTokenizer
Tokenizer from trained HateXplain model
Returns
-------
probabilities : torch.Tensor
If text is only one string, then get probabilities with
`probabilities[0][0]` for `normal` and
`probabilities[0][1]` for `hatespeech`.
If text is multiple strings in a list, then get probabilities with
`probabilities[i][0]` and `probabilities[i][1]`, respectively, where
`i` is the sample in the batch.
"""
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = model.to(device)
inputs = tokenizer(
detokenized,
return_tensors="pt",
padding=True
).to(device)
prediction_logits, _ = model(
input_ids=inputs['input_ids'],
attention_mask=inputs['attention_mask']
)
softmax = torch.nn.Softmax(dim=1)
probabilities = softmax(prediction_logits)
# print(f"Normal: {probabilities[0][0]}\nHatespeech: {probabilities[0][1]}\n\n")
return probabilities
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment