Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
from typing import Union
import torch
# from transformers import AutoTokenizer, AutoModelForSequenceClassification
# from pretrained.models import *
#
# device = "cuda" if torch.cuda.is_available() else "cpu"
#
# tokenizer = AutoTokenizer.from_pretrained(
# "Hate-speech-CNERG/bert-base-uncased-hatexplain-rationale-two"
# )
# model = Model_Rational_Label.from_pretrained(
# "Hate-speech-CNERG/bert-base-uncased-hatexplain-rationale-two"
# )
# model = model.to(device)
def eval(text, model, tokenizer):
"""
Get model's prediction on a text.
Parameters
----------
text : Union[str, list]
Text to be classified. Either a single string or a list of strings
model : transformers.AutoModelForSequenceClassification
Trained HateXplain model
tokenizer : transformers.AutoTokenizer
Tokenizer from trained HateXplain model
Returns
-------
probabilities : torch.Tensor
If text is only one string, then get probabilities with
`probabilities[0][0]` for `normal` and
`probabilities[0][1]` for `hatespeech`.
If text is multiple strings in a list, then get probabilities with
`probabilities[i][0]` and `probabilities[i][1]`, respectively, where
`i` is the sample in the batch.
"""
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = model.to(device)
inputs = tokenizer(
detokenized,
return_tensors="pt",
padding=True
).to(device)
prediction_logits, _ = model(
input_ids=inputs['input_ids'],
attention_mask=inputs['attention_mask']
)
softmax = torch.nn.Softmax(dim=1)
probabilities = softmax(prediction_logits)
# print(f"Normal: {probabilities[0][0]}\nHatespeech: {probabilities[0][1]}\n\n")
return probabilities