tk=Tokenizer(nb_words=10000,lower=True,split="")#nb_words=number of most frequent words which the NN considers, lower = caseunsensitive, split=tokenisierer
tk.fit_on_texts(x)
x=tk.texts_to_sequences(x)
xtest=tk.texts_to_sequences(xtest)
max_len=30#number of words per clause that the NN considers
x=sequence.pad_sequences(x,maxlen=max_len)#cutting and zero padding
f=open(os.path.join(embedding_path,emb_en))#word2vec pre-trained Google News corpus (3 billion running words) word vector model (3 million 300-dimension English word vectors).))