diff --git a/punctuation_feature.py b/punctuation_feature.py
index ec9fd5f5c67c1af145971e9fce6424f145ab1d3c..904eae1a36abe23a8aa36fd24a45fdea4e533788 100644
--- a/punctuation_feature.py
+++ b/punctuation_feature.py
@@ -1,22 +1,28 @@
 from corpus import read_corpus
 from nltk.tokenize import word_tokenize
+import re
 
 def extract(corpus_instance):
     relevant_punctuation = ['!', '?', '...', '""', '``']
     #"?!", "!?", "???", "!!!" are no lemmas
     allcaps = 0
-    review = (word_tokenize(corpus_instance["TITLE"])) + corpus_instance["TOKENS"]
+    excessive_punctuation = re.compile('[!?][!?]+')
+    review_tokens = (word_tokenize(corpus_instance["TITLE"])) + corpus_instance["TOKENS"]
+    review_lemmas = ((corpus_instance["TITLE"] + " " + corpus_instance["REVIEW"]))
 
     corpus_instance_vector = []
 
     for punctuation in relevant_punctuation:
-        corpus_instance_vector.append(review.count(punctuation)/len(review))
+        corpus_instance_vector.append(review_tokens.count(punctuation)/len(review_tokens))
         #print((str(punctuation) + ": " + str(review.count(punctuation))))
 
-    for token in review:
-        if token.isupper() and len(token) > 1:
+    for token in review_tokens:
+        if token.isupper() and len(token) > 1 and any(vowel.lower() in 'aeiuo' for vowel in token):
             allcaps += 1
-    corpus_instance_vector.append(allcaps/len(review))
+    
+    corpus_instance_vector.append(len(re.findall(excessive_punctuation, review_lemmas))/len(review_lemmas))
+    corpus_instance_vector.append(allcaps/len(review_tokens))
+
 
     return corpus_instance_vector
 
@@ -25,6 +31,7 @@ if __name__ == '__main__':
     """
     function calls for testing purposes on a small corpus
     """
-    corpus = read_corpus("minicorpus.csv")
-    corpus_instance = corpus[3]
+    pass
+    #corpus = read_corpus("minicorpus.csv")
+    #corpus_instance = corpus[3]
     #print(extract(corpus_instance))