Add small comments for MonolingualDataset and TokenBlockDataset

Summary: Pull Request resolved: https://github.com/pytorch/fairseq/pull/669 Differential Revision: D15114160 Pulled By: myleott fbshipit-source-id: 64f4a8154c8931ddbbe459d4d4a54c46680ad6b6

Add small comments for MonolingualDataset and TokenBlockDataset
8bf8399d · Myle Ott · Facebook Github Bot · f701aa8c · 8bf8399d · 8bf8399d
Commit 8bf8399d authored 5 years ago by Myle Ott Committed by Facebook Github Bot 5 years ago
--- a/fairseq/data/monolingual_dataset.py
+++ b/fairseq/data/monolingual_dataset.py
@@ -78,6 +78,14 @@ class MonolingualDataset(FairseqDataset):

    def __getitem__(self, index):
        if self.targets is not None:
+            # *future_target* is the original sentence
+            # *source* is shifted right by 1 (maybe left-padded with eos)
+            # *past_target* is shifted right by 2 (left-padded as needed)
+            #
+            # Left-to-right language models should condition on *source* and
+            # predict *future_target*.
+            # Right-to-left language models should condition on *source* and
+            # predict *past_target*.
            source, future_target, past_target = self.dataset[index]
            source, target = self._make_source_target(source, future_target, past_target)
        else:

--- a/fairseq/data/token_block_dataset.py
+++ b/fairseq/data/token_block_dataset.py
@@ -112,8 +112,8 @@ class TokenBlockDataset(FairseqDataset):

        if self.include_targets:
            # *target* is the original sentence (=item)
-            # *source* is rotated left by 1 (maybe left-padded with eos)
-            # *past_target* is rotated left by 2 (left-padded as needed)
+            # *source* is shifted right by 1 (maybe left-padded with eos)
+            # *past_target* is shifted right by 2 (left-padded as needed)
            if s == 0:
                source = torch.cat([item.new([self.eos]), buffer[0:e - 1]])
                past_target = torch.cat([item.new([self.pad, self.eos]), buffer[0:e - 2]])