diff --git a/__pycache__/corpus.cpython-36.pyc b/__pycache__/corpus.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..25e416bc49ae76484bbb240a7f9e3845c05904f4 Binary files /dev/null and b/__pycache__/corpus.cpython-36.pyc differ diff --git a/postagger.py b/postagger.py new file mode 100644 index 0000000000000000000000000000000000000000..1fc2a773bef40261eefedd53e8e2556a2fafa23e --- /dev/null +++ b/postagger.py @@ -0,0 +1,13 @@ +import nltk +from nltk.tokenize import word_tokenize +from corpus import read_corpus + +corpus = read_corpus("corpus.csv") +tagged_corpus = [] + +# for debugging purposes. if you're sure it's worth it, use +# for i in range(len(corpus)): +for i in range (9): + tagged_corpus.append(nltk.pos_tag(word_tokenize(str(corpus[i]['REVIEW'])))) + +print (tagged_corpus)