Skip to content
Snippets Groups Projects
Commit d797afc2 authored by wesenberg's avatar wesenberg
Browse files

17.

parent c96f5aa8
No related branches found
No related tags found
No related merge requests found
{
"loader": "loaders/gigaword.py",
"dataset": "data/train-data/gigaword",
"indices": "data/train-data/gigaword/indices.npy",
"model_dir": "data/models/gigaword-AMR-3",
"verbose": true,
"print_every": 1,
"eval_every": 20,
"save_every": 20,
"max_val_steps": 100,
"max_train_seconds": null,
"max_train_steps": 100,
"batch_size": 4,
"learning_rate": 1e-05,
"k_samples": 100,
"sample_aggregation": "max",
"loss": "pgb",
"encoder_model_id": "distilroberta-base",
"rewards": {
"Fluency": {
"weight": 1,
"type": "masked",
"model_id": "distilroberta-base",
"max_score": 40.0,
"norm": "max"
},
"AmrReward": {
"weight": 1
},
"GaussianLength": {
"weight": 1,
"mean": 8,
"std": 3.2
}
}
}
{
"loader": "loaders/gigaword.py",
"dataset": "data/train-data/gigaword",
"indices": "data/train-data/gigaword/indices.npy",
"model_dir": "data/models/gigaword-AMR-4",
"verbose": true,
"print_every": 1,
"eval_every": 20,
"save_every": 20,
"max_val_steps": 100,
"max_train_seconds": null,
"max_train_steps": 100,
"batch_size": 4,
"learning_rate": 1e-05,
"k_samples": 100,
"sample_aggregation": "max",
"loss": "pgb",
"encoder_model_id": "distilroberta-base",
"rewards": {
"Fluency": {
"weight": 1,
"type": "masked",
"model_id": "distilroberta-base",
"max_score": 40.0,
"norm": "max"
},
"BiEncoderSimilarity": {
"weight": 1,
"model_id": "all-distilroberta-v1"
},
"AmrReward": {
"weight": 1
},
"GaussianLength": {
"weight": 1,
"mean": 8,
"std": 3.2
}
}
}
......@@ -5,8 +5,8 @@
"model_dir": "data/models/gigaword-L10_2000_then_AMR",
"verbose": true,
"print_every": 1,
"eval_every": 50,
"save_every": 50,
"eval_every": 100,
"save_every": 100,
"max_val_steps": 8000,
"max_train_seconds": null,
"max_train_steps": 8000,
......
{
"loader": "loaders/gigaword.py",
"dataset": "data/train-data/gigaword",
"indices": "data/train-data/gigaword/indices.npy",
"model_dir": "data/models/gigaword-L10_2000_then_AMR_V2",
"verbose": true,
"print_every": 1,
"eval_every": 50,
"save_every": 50,
"max_val_steps": 2100,
"max_train_seconds": null,
"max_train_steps": 2100,
"batch_size": 4,
"learning_rate": 1e-05,
"k_samples": 100,
"sample_aggregation": "max",
"loss": "pgb",
"encoder_model_id": "distilroberta-base",
"rewards": {
"Fluency": {
"weight": 1,
"type": "masked",
"model_id": "distilroberta-base",
"max_score": 40.0,
"norm": "max"
},
"BiEncoderSimilarity": {
"weight": 1,
"model_id": "all-distilroberta-v1"
},
"AmrReward": {
"weight": 1
},
"GaussianLength": {
"weight": 1,
"mean": 10,
"std": 3.2
}
}
}
{
"loader": "loaders/gigaword.py",
"dataset": "data/train-data/gigaword",
"indices": "data/train-data/gigaword/indices.npy",
"model_dir": "data/models/gigaword-L8-then-AMR-3",
"verbose": true,
"print_every": 1,
"eval_every": 20,
"save_every": 20,
"max_val_steps": 1000,
"max_train_seconds": null,
"max_train_steps": 1000,
"batch_size": 4,
"learning_rate": 1e-05,
"k_samples": 100,
"sample_aggregation": "max",
"loss": "pgb",
"encoder_model_id": "distilroberta-base",
"rewards": {
"Fluency": {
"weight": 1,
"type": "masked",
"model_id": "distilroberta-base",
"max_score": 40.0,
"norm": "max"
},
"AmrReward": {
"weight": 1
},
"GaussianLength": {
"weight": 1,
"mean": 8,
"std": 3.2
}
}
}
{
"loader": "loaders/gigaword.py",
"dataset": "data/train-data/gigaword",
"indices": "data/train-data/gigaword/indices.npy",
"model_dir": "data/models/gigaword-L8-then-AMR-4",
"verbose": true,
"print_every": 1,
"eval_every": 20,
"save_every": 20,
"max_val_steps": 100,
"max_train_seconds": null,
"max_train_steps": 100,
"batch_size": 4,
"learning_rate": 1e-05,
"k_samples": 100,
"sample_aggregation": "max",
"loss": "pgb",
"encoder_model_id": "distilroberta-base",
"rewards": {
"Fluency": {
"weight": 1,
"type": "masked",
"model_id": "distilroberta-base",
"max_score": 40.0,
"norm": "max"
},
"BiEncoderSimilarity": {
"weight": 1,
"model_id": "all-distilroberta-v1"
},
"AmrReward": {
"weight": 1
},
"GaussianLength": {
"weight": 1,
"mean": 8,
"std": 3.2
}
}
}
{
"loader": "loaders/gigaword.py",
"dataset": "data/train-data/gigaword",
"indices": "data/train-data/gigaword/indices.npy",
"model_dir": "data/models/gigaword-L8_10-then-AMR-4_pstats",
"verbose": true,
"print_every": 1,
"eval_every": 1,
"save_every": 1,
"max_val_steps": 200,
"max_train_seconds": null,
"max_train_steps": 200,
"batch_size": 4,
"learning_rate": 1e-05,
"k_samples": 100,
"sample_aggregation": "max",
"loss": "pgb",
"encoder_model_id": "distilroberta-base",
"rewards": {
"Fluency": {
"weight": 1,
"type": "masked",
"model_id": "distilroberta-base",
"max_score": 40.0,
"norm": "max"
},
"BiEncoderSimilarity": {
"weight": 1,
"model_id": "all-distilroberta-v1"
},
"AmrReward": {
"weight": 1
},
"GaussianLength": {
"weight": 1,
"mean": 8,
"std": 3.2
}
}
}
{
"loader": "loaders/gigaword.py",
"dataset": "data/train-data/gigaword",
"indices": "data/train-data/gigaword/indices.npy",
"model_dir": "data/models/gigaword-L8_1000-then-AMR-3",
"verbose": true,
"print_every": 1,
"eval_every": 20,
"save_every": 20,
"max_val_steps": 1100,
"max_train_seconds": null,
"max_train_steps": 1100,
"batch_size": 4,
"learning_rate": 1e-05,
"k_samples": 100,
"sample_aggregation": "max",
"loss": "pgb",
"encoder_model_id": "distilroberta-base",
"rewards": {
"Fluency": {
"weight": 1,
"type": "masked",
"model_id": "distilroberta-base",
"max_score": 40.0,
"norm": "max"
},
"AmrReward": {
"weight": 1
},
"GaussianLength": {
"weight": 1,
"mean": 8,
"std": 3.2
}
}
}
{
"loader": "loaders/gigaword.py",
"dataset": "data/train-data/gigaword",
"indices": "data/train-data/gigaword/indices.npy",
"model_dir": "data/models/gigaword-L8_1000-then-AMR-4",
"verbose": true,
"print_every": 1,
"eval_every": 20,
"save_every": 20,
"max_val_steps": 1200,
"max_train_seconds": null,
"max_train_steps": 1200,
"batch_size": 4,
"learning_rate": 1e-05,
"k_samples": 100,
"sample_aggregation": "max",
"loss": "pgb",
"encoder_model_id": "distilroberta-base",
"rewards": {
"Fluency": {
"weight": 1,
"type": "masked",
"model_id": "distilroberta-base",
"max_score": 40.0,
"norm": "max"
},
"BiEncoderSimilarity": {
"weight": 1,
"model_id": "all-distilroberta-v1"
},
"AmrReward": {
"weight": 1
},
"GaussianLength": {
"weight": 1,
"mean": 8,
"std": 3.2
}
}
}
{
"loader": "loaders/gigaword.py",
"dataset": "data/train-data/gigaword",
"indices": "data/train-data/gigaword/indices.npy",
"model_dir": "data/models/gigaword-L8_1000",
"verbose": true,
"print_every": 1,
"eval_every": 50,
"save_every": 50,
"max_val_steps": 1000,
"max_train_seconds": null,
"max_train_steps": 1000,
"batch_size": 4,
"learning_rate": 1e-05,
"k_samples": 100,
"sample_aggregation": "max",
"loss": "pgb",
"encoder_model_id": "distilroberta-base",
"rewards": {
"Fluency": {
"weight": 1,
"type": "masked",
"model_id": "distilroberta-base",
"max_score": 40.0,
"norm": "max"
},
"BiEncoderSimilarity": {
"weight": 1,
"model_id": "all-distilroberta-v1"
},
"GaussianLength": {
"weight": 1,
"mean": 8,
"std": 3.2
}
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment