diff --git a/.gitignore b/.gitignore
index c4a428c1be9e42aa2a52b207ce8464736c05c8ff..e67bbc943106d2a8a2bebdc8cfd731d3317f5281 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
+__pycache__
 venv
 bert-base-uncased-hatexplain-rationale-two
diff --git a/test.py b/test.py
index 41742ed0238cced81424fa540e18eec4b61e121b..6ddfc178ce1b93b4faa0e7ac0ac3a27cfd19b950 100644
--- a/test.py
+++ b/test.py
@@ -6,13 +6,14 @@ from nltk.tokenize.treebank import TreebankWordDetokenizer
 from utils.eval import eval
 from utils.attack import attack
 
-device = 'cuda' if torch.cuda.is_available() else 'cpu'
+device = "cuda" if torch.cuda.is_available() else "cpu"
 
-tokenizer = AutoTokenizer.from_pretrained("Hate-speech-CNERG/bert-base-uncased-hatexplain-rationale-two")
-model = \
-    Model_Rational_Label.from_pretrained(
-        "Hate-speech-CNERG/bert-base-uncased-hatexplain-rationale-two"
-    )
+tokenizer = AutoTokenizer.from_pretrained(
+    "Hate-speech-CNERG/bert-base-uncased-hatexplain-rationale-two"
+)
+model = Model_Rational_Label.from_pretrained(
+    "Hate-speech-CNERG/bert-base-uncased-hatexplain-rationale-two"
+)
 model = model.to(device)
 
 
@@ -26,16 +27,18 @@ model = model.to(device)
 # print(f"Normal: {probs[1][0]}\nHatespeech: {probs[1][1]}")
 
 # Load test dataset
-with open('data/post_id_divisions.json') as splits:
+with open("data/post_id_divisions.json") as splits:
     data = json.load(splits)
-    test_ids = data['test']
+    test_ids = data["test"]
+
 
 def dataset(ids):
-    with open('data/dataset.json') as data_file:
+    with open("data/dataset.json") as data_file:
         data = json.load(data_file)
     for i in ids:
         yield data[i]
 
+
 counter = 0
 batchsize = 8
 for post in dataset(test_ids):
@@ -43,15 +46,17 @@ for post in dataset(test_ids):
     #     break
     # counter += 1
 
-    detokenized = TreebankWordDetokenizer().detokenize(post["post_tokens"])
-    # batch = attack(detokenized)
+    text = TreebankWordDetokenizer().detokenize(post["post_tokens"])
 
-    # probabilities = eval(detokenized, model, tokenizer)
-    probabilities = eval(["this is a test", "this is a tast"], model, tokenizer)
+    attacks = attack(text, model, tokenizer)
+    print(attacks)
+
+    probabilities = eval(attacks, model, tokenizer)
+    # probabilities = eval(["this is a test", "this is a tast"], model, tokenizer)
     print(probabilities)
     # print(f"Normal: {probabilities[0][0]}\nHatespeech: {probabilities[0][1]}\n\n")
     # print(f"Normal: {probabilities[1][0]}\nHatespeech: {probabilities[1][1]}\n\n")
-    
+
     # ATTACK HERE
     # batch = attack(detokenized)
 
@@ -68,4 +73,3 @@ for post in dataset(test_ids):
     # print(post["post_id"])
     # print(post["annotators"][0]["label"])
     # print(TreebankWordDetokenizer().detokenize(post["post_tokens"]))
-
diff --git a/utils/__pycache__/attack.cpython-38.pyc b/utils/__pycache__/attack.cpython-38.pyc
deleted file mode 100644
index 775aa2dc19cfcc635d2bb60490f0cc6bc040346b..0000000000000000000000000000000000000000
Binary files a/utils/__pycache__/attack.cpython-38.pyc and /dev/null differ
diff --git a/utils/__pycache__/eval.cpython-38.pyc b/utils/__pycache__/eval.cpython-38.pyc
deleted file mode 100644
index af701cdd725a97b7278e7893c946fb1cf3c62780..0000000000000000000000000000000000000000
Binary files a/utils/__pycache__/eval.cpython-38.pyc and /dev/null differ
diff --git a/utils/attack.py b/utils/attack.py
index d6d1c127209cc7e5d17fb3c1e11664d7444dabb5..506214ecb3d035621a00751d477ae38d00ad7ead 100644
--- a/utils/attack.py
+++ b/utils/attack.py
@@ -1,5 +1,96 @@
+from typing import Union
 import transformers
+import string
 
-def attack(sentence, model, tokenizer):
+def attack(text, model, tokenizer, subs=1, top_k=5):
+    """
+    Return adversarial examples
+
+    Parameters
+    ----------
+    text : str
+        Text to be attacked/modified.
+    model : transformers.AutoModelForSequenceClassification
+        Victim model, trained HateXplain model
+    tokenizer : transformers.AutoTokenizer
+        Tokenizer from trained HateXplain model
+    subs : int
+        Number of character substitutions. 
+        Default: 1
+    top_k : int
+        Return this many of the best candidates. Best is determined by how much
+        they influence the probability scores
+        Default: 5
+
+    Returns
+    -------
+    attacks : List[str]
+        List of the `top_k` attacks on the input text
+    """
+    device = 'cuda' if torch.cuda.is_available() else 'cpu'
     model = model.to(device)
 
+    # Compute probabilities prior to the attacks
+    # inputs = tokenizer(
+    #     text, 
+    #     return_tensors="pt", 
+    #     padding=True
+    # ).to(device)
+    # prediction_logits, _ = model(
+    #     input_ids=inputs['input_ids'],
+    #     attention_mask=inputs['attention_mask']
+    # )
+    # softmax = torch.nn.Softmax(dim=1)
+    # prior_probabilities = softmax(prediction_logits)
+    # prior_hatespeech_probability = prior_probabilities[0][1]
+
+    prior_hatespeech_probability = eval(text, model, tokenizer)[0][1]
+
+    # Generate attacks
+    candidate_scores = {}
+    for i, char in enumerate(text):
+        for candidate in generate_candidates(text, i, model, tokenizer):
+            candidate_probability = eval(candidate, model, tokenizer)[0][1]
+            
+            candidate_score = prior_hatespeech_probability - candidate_probability
+            # higher score is better
+            candidate_scores[candidate] = candidate_score
+
+    sorted_candidate_scores = dict(sorted(candidate_scores.items(), 
+                                   key=lambda item: item[1], 
+                                   reverse=True))
+    attacks = list(sorted_candidate_scores)[:top_k]
+    return attacks
+
+
+def generate_candidates(text, i, model, tokenizer)
+    """
+    Substitute a character in the text with every possible substitution 
+
+    Parameters
+    ----------
+    text : str
+        Text to be attacked/modified.
+    i : int
+        Index of character to be substituted
+    model : transformers.AutoModelForSequenceClassification
+        Victim model, trained HateXplain model
+    tokenizer : transformers.AutoTokenizer
+        Tokenizer from trained HateXplain model
+
+    Yields
+    ------
+    candidate : 
+        List of the `top_k` attacks on the input text
+    """
+
+    permissible_substitutions = string.printable
+    # 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~
+
+    for substitution_char in permissible_substitutions:
+        if substitution_char == text[i]:
+            continue
+        candidate = list(text)
+        candidate[i] = substitution_char 
+        candidate = "".join(candidate)
+        yield candidate