[mock_callback] on_batch_end_interval = "3:step" on_batch_end_seed = 42 on_optimizer_step_interval = "2:iteration" [mock_model] requires_grad = true use_activation = true [clock] verbose = false [training] duration = "100:epoch" seed = 0 device = "cpu" dtype = "float32" batch_size = 4 gradient_accumulation = "4:step" gradient_clipping_max_norm = 1.0 [optimizer] optimizer = "SGD" learning_rate = 1 [lr_scheduler] type = "ConstantLR" update_interval = "1:iteration" warmup = "20:iteration"