Loading train.py +10 −1 Original line number Diff line number Diff line Loading @@ -11,8 +11,8 @@ Train a new model on one or across multiple GPUs. import collections import itertools import os import math import os import random import torch Loading Loading @@ -282,6 +282,10 @@ def get_perplexity(loss): def save_checkpoint(args, trainer, epoch_itr, val_loss): if args.no_save or not distributed_utils.is_master(args): return write_timer = StopwatchMeter() write_timer.start() epoch = epoch_itr.epoch end_of_epoch = epoch_itr.end_of_epoch() updates = trainer.get_num_updates() Loading Loading @@ -330,6 +334,11 @@ def save_checkpoint(args, trainer, epoch_itr, val_loss): if os.path.lexists(old_chk): os.remove(old_chk) write_timer.stop() print('| saved checkpoint {} (epoch {} @ {} updates) (writing took {} seconds)'.format( checkpoints[0], epoch, updates, write_timer.sum)) def load_checkpoint(args, trainer, epoch_itr): """Load a checkpoint and replay dataloader to match.""" Loading Loading
train.py +10 −1 Original line number Diff line number Diff line Loading @@ -11,8 +11,8 @@ Train a new model on one or across multiple GPUs. import collections import itertools import os import math import os import random import torch Loading Loading @@ -282,6 +282,10 @@ def get_perplexity(loss): def save_checkpoint(args, trainer, epoch_itr, val_loss): if args.no_save or not distributed_utils.is_master(args): return write_timer = StopwatchMeter() write_timer.start() epoch = epoch_itr.epoch end_of_epoch = epoch_itr.end_of_epoch() updates = trainer.get_num_updates() Loading Loading @@ -330,6 +334,11 @@ def save_checkpoint(args, trainer, epoch_itr, val_loss): if os.path.lexists(old_chk): os.remove(old_chk) write_timer.stop() print('| saved checkpoint {} (epoch {} @ {} updates) (writing took {} seconds)'.format( checkpoints[0], epoch, updates, write_timer.sum)) def load_checkpoint(args, trainer, epoch_itr): """Load a checkpoint and replay dataloader to match.""" Loading