2024-02-12 13:17:51 +00:00
|
|
|
[mock_model]
|
2024-02-10 14:53:18 +00:00
|
|
|
requires_grad = true
|
2024-02-12 14:53:24 +00:00
|
|
|
use_activation = true
|
2024-02-10 14:53:18 +00:00
|
|
|
|
2024-02-12 08:28:41 +00:00
|
|
|
[clock]
|
|
|
|
verbose = false
|
|
|
|
|
2024-01-14 14:06:48 +00:00
|
|
|
[training]
|
|
|
|
duration = "100:epoch"
|
|
|
|
seed = 0
|
2024-02-06 21:39:38 +00:00
|
|
|
device = "cpu"
|
|
|
|
dtype = "float32"
|
2024-01-14 14:06:48 +00:00
|
|
|
batch_size = 4
|
|
|
|
gradient_accumulation = "4:step"
|
|
|
|
evaluation_interval = "5:epoch"
|
|
|
|
evaluation_seed = 1
|
2024-03-19 16:34:34 +00:00
|
|
|
gradient_clipping_max_norm = 1.0
|
2024-01-14 14:06:48 +00:00
|
|
|
|
|
|
|
[optimizer]
|
|
|
|
optimizer = "SGD"
|
|
|
|
learning_rate = 1
|
|
|
|
|
2024-02-15 09:48:12 +00:00
|
|
|
[lr_scheduler]
|
|
|
|
type = "ConstantLR"
|
2024-01-14 14:06:48 +00:00
|
|
|
update_interval = "1:step"
|
|
|
|
warmup = "20:step"
|