Newer
Older
nn.Conv1d(200, 50, 1, padding="valid", groups=1),
nn.Conv1d(200, 50, 2, padding="valid", groups=1),
nn.Conv1d(200, 50, 3, padding="valid", groups=1),
nn.Conv1d(200, 50, 4, padding="valid", groups=1),
nn.Conv1d(200, 50, 5, padding="valid", groups=1),
nn.Conv1d(200, 50, 6, padding="valid", groups=1),
nn.Conv1d(200, 50, 7, padding="valid", groups=1)
convolutions = []
for cnn in self.cnns:
convolutions.append(cnn(document.transpose(1,2)).amax(dim=2))
_, (hidden_state, cell_state) = self.document_encoder(encoded_sentences.flip(dims=(0,)))
return hidden_state, cell_state
def encode(self, document):
encoded_sentences = self.encode_sentences(document)
return encoded_sentences, self.encode_document(encoded_sentences)
logits = self.projector(self.sentence_extractor(encoded_sentences, states)[0])
if k < len(probs):
return probs.topk(k).indices, probs # handle doc weniger als 3 sents?
return torch.arange(len(probs)), probs
class ActorOnlySummarisationModel(SummarisationModel):
def _train(self, dataset, epochs=20, batch_size=20, learning_rate=0.001, shuffle=True):
optimizer = torch.optim.Adam(self.parameters(), lr=learning_rate)
training_dataloader = torch.utils.data.DataLoader(dataset.train, batch_size=batch_size, shuffle=shuffle)
test_dataloader = torch.utils.data.DataLoader(dataset.test, batch_size=batch_size, shuffle=shuffle)
since = time.time()
val_rouge_history = []
best_rouge = 0.0
for epoch in range(epochs):
print('Epoch {}/{}'.format(epoch, epochs - 1))
print('-' * 10)
for batch in training_dataloader:
optimizer.zero_grad()
for datapoint in batch:
o = datapoint.p_searchspace @ torch.log(probs) + datapoint.n_searchspace @ torch.log(1 - probs)
idx_sample = torch.argmax(o)
loss = - datapoint.top_rouge[idx_sample] * o[idx_sample]
loss.backward()
# training statistics (train dataset)
running_loss += loss.item()
running_rouge += datapoint.top_rouge[idx_sample] # größer als val/test rouge = indiz for searchspace funtionality
epoch_loss = running_loss/ len(training_dataloader.dataset)
epoch_rouge = running_rouge/ len(training_dataloader.dataset) # abh. von __len__ of PreprocessedDataSet
print('Train Loss: {:.4f} Rouge Score: {:.4f}'.format(epoch_loss, epoch_rouge))
top_indices, probs = self.__call__(datapoint.document)
running_rouge += rouge(select_elements(datapoint.raw_document, top_indices), datapoint.raw_summary)
# vgl. train rouge for searchspace funtionality
epoch_rouge = running_rouge/ len(PreprocessedDataSet.validation)
val_rouge_history.append(epoch_rouge)
if epoch_rouge > best_rouge:
best_rouge = epoch_rouge
best_model_wts = copy.deepcopy(self.state_dict())
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
print('Best val rouge: {:4f}'.format(best_rouge))
# write val_rouge_history in file
# load best model weights
self.load_state_dict(best_model_wts)
class SummarisationModelWithCrossEntropyLoss(SummarisationModel):
def _train(self, dataset, epochs=20, batch_size=20, learning_rate=0.001, shuffle=True):
optimizer = torch.optim.Adam(self.parameters(), lr=learning_rate)
loss_fn = nn.BCELoss(reduction='sum')
for _ in range(epochs):
training_dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)
for batch in training_dataloader:
optimizer.zero_grad()
for datapoint in batch:
loss = loss_fn(probs, datapoint.bin_summary)
loss.backward()
optimizer.step()
class ActorCriticSummarisationModel(SummarisationModel):
self.steepness = steepness
self.denoise = denoise
model = copy.deepcopy(model)
#model.eval()
for param in model.parameters():
param.requires_grad = False
self.document_encoder = model.encode_document
self.layer_1 = nn.Linear(1200, 600)
self.layer_2 = nn.Linear(600, 600)
self.layer_3 = nn.Linear(600, 1)
W_1 = torch.cat((torch.eye(600), -torch.eye(600)), 1)
W_2 = torch.eye(600)
W_3 = torch.ones(600)
def forward(self, encoded_sentences_1, encoded_sentences_2):
_, document_vec_1 = self.document_encoder(encoded_sentences_1)
_, document_vec_2 = self.document_encoder(encoded_sentences_2)
double_document = torch.cat((torch.squeeze(document_vec_1), torch.squeeze(document_vec_2)), dim=-1)
return torch.tanh(self.steepness*nn.functional.relu(self.layer_3(
nn.functional.relu(self.layer_2(
utils.gaussian(self.layer_1(double_document)))))))
def _train(self, dataset, epochs=200, batch_size=20, learning_rate=0.001, shuffle=True, pos_samples=0.5):
optimizer = torch.optim.Adam(self.parameters(), lr=learning_rate)
loss_fn = nn.MSELoss()
for _ in range(epochs):
training_dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)
for batch in training_dataloader:
optimizer.zero_grad()
for datapoint in batch:
r = np.random.random()
if r > pos_samples:
sample = datapoint.sent_vecs.masked_select(datapoint.p_searchspace[k].bool()) # not padded sent embeddngs
score = self.__call__(sample, datapoint.gold_sent_vecs)
loss = loss_fn(score, datapoint.top_rouge[k])
if len(datapoint.sent_vecs) >= 3:
narray = np.random.choice(len(datapoint.sent_vecs), 3, replace = False)
narray.sort()
sample = datapoint.sent_vecs[narray]
else:
continue # handle len(sent_vecs) < 3
score = self.__call__(sample, datapoint.gold_sent_vecs)
loss = loss_fn(score, utils.rouge(raw_document[narray]), raw_summary))
# rouge score berechnen für negative sample => besser wäre externes berechnen und speichern?