Skip to content
Snippets Groups Projects
Commit 064ce3e4 authored by toyota's avatar toyota
Browse files

add sentenceindex list

parent 1cafcda4
No related branches found
No related tags found
No related merge requests found
File added
File added
File added
......@@ -128,3 +128,23 @@ def read_pkl(path = "d000.pkl"):
with open (path, "rb") as f:
li = pkl.load(f)
return li
def get_sent_list(key = "d000"):
counter = 0
sent_li = []
sent_start = 0
data = read_pkl(key + ".pkl")
sent = data[0]
sent_end_bool = False
for elm in sent:
if elm[1] == "PUNCT":
if elm[0] == "." or elm[0] == "!" or elm[0] == "?":
sent_li.append([sent_start, counter])
sent_end_bool = True
elif sent_end_bool == True:
sent_start = counter
sent_end_bool = False
counter += 1
with open(key + "_sent.pkl", "wb") as f:
pkl.dump(sent_li, f)
print ("Done")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment