mirror of
https://github.com/finegrain-ai/refiners.git
synced 2024-11-09 23:12:02 +00:00
deprecate evaluation
This commit is contained in:
parent
061d44888f
commit
44760ac19f
|
@ -25,7 +25,6 @@ class TrainingClock(Callback["Trainer[BaseConfig, Any]"]):
|
||||||
batch_size: int,
|
batch_size: int,
|
||||||
training_duration: TimeValue,
|
training_duration: TimeValue,
|
||||||
gradient_accumulation: TimeValue,
|
gradient_accumulation: TimeValue,
|
||||||
evaluation_interval: TimeValue,
|
|
||||||
lr_scheduler_interval: TimeValue,
|
lr_scheduler_interval: TimeValue,
|
||||||
verbose: bool = True,
|
verbose: bool = True,
|
||||||
) -> None:
|
) -> None:
|
||||||
|
@ -37,7 +36,6 @@ class TrainingClock(Callback["Trainer[BaseConfig, Any]"]):
|
||||||
self.batch_size = batch_size
|
self.batch_size = batch_size
|
||||||
self.training_duration = training_duration
|
self.training_duration = training_duration
|
||||||
self.gradient_accumulation = gradient_accumulation
|
self.gradient_accumulation = gradient_accumulation
|
||||||
self.evaluation_interval = evaluation_interval
|
|
||||||
self.lr_scheduler_interval = lr_scheduler_interval
|
self.lr_scheduler_interval = lr_scheduler_interval
|
||||||
self.verbose = verbose
|
self.verbose = verbose
|
||||||
self.num_batches_per_epoch = dataset_length // batch_size
|
self.num_batches_per_epoch = dataset_length // batch_size
|
||||||
|
@ -85,10 +83,6 @@ class TrainingClock(Callback["Trainer[BaseConfig, Any]"]):
|
||||||
def num_step_per_iteration(self) -> int:
|
def num_step_per_iteration(self) -> int:
|
||||||
return self.convert_time_value_to_steps(self.gradient_accumulation)
|
return self.convert_time_value_to_steps(self.gradient_accumulation)
|
||||||
|
|
||||||
@cached_property
|
|
||||||
def num_step_per_evaluation(self) -> int:
|
|
||||||
return self.convert_time_value_to_steps(self.evaluation_interval)
|
|
||||||
|
|
||||||
def is_due(self, interval: TimeValue) -> bool:
|
def is_due(self, interval: TimeValue) -> bool:
|
||||||
return self.step % self.convert_time_value_to_steps(interval) == 0
|
return self.step % self.convert_time_value_to_steps(interval) == 0
|
||||||
|
|
||||||
|
@ -171,9 +165,3 @@ class TrainingClock(Callback["Trainer[BaseConfig, Any]"]):
|
||||||
self.log(f"Iteration {trainer.clock.iteration} ended.")
|
self.log(f"Iteration {trainer.clock.iteration} ended.")
|
||||||
trainer.clock.iteration += 1
|
trainer.clock.iteration += 1
|
||||||
trainer.clock.num_minibatches_processed = 0
|
trainer.clock.num_minibatches_processed = 0
|
||||||
|
|
||||||
def on_evaluate_begin(self, trainer: "Trainer[BaseConfig, Any]") -> None:
|
|
||||||
self.log("Evaluation started.")
|
|
||||||
|
|
||||||
def on_evaluate_end(self, trainer: "Trainer[BaseConfig, Any]") -> None:
|
|
||||||
self.log("Evaluation ended.")
|
|
||||||
|
|
|
@ -26,13 +26,11 @@ class TrainingConfig(BaseModel):
|
||||||
seed: int = 0
|
seed: int = 0
|
||||||
batch_size: int = 1
|
batch_size: int = 1
|
||||||
gradient_accumulation: Step | Epoch = Step(1)
|
gradient_accumulation: Step | Epoch = Step(1)
|
||||||
evaluation_interval: Iteration | Epoch = Iteration(1)
|
|
||||||
gradient_clipping_max_norm: float | None = None
|
gradient_clipping_max_norm: float | None = None
|
||||||
evaluation_seed: int = 0
|
|
||||||
|
|
||||||
model_config = ConfigDict(extra="forbid")
|
model_config = ConfigDict(extra="forbid")
|
||||||
|
|
||||||
@field_validator("duration", "gradient_accumulation", "evaluation_interval", mode="before")
|
@field_validator("duration", "gradient_accumulation", mode="before")
|
||||||
def parse_field(cls, value: TimeValueInput) -> TimeValue:
|
def parse_field(cls, value: TimeValueInput) -> TimeValue:
|
||||||
return parse_number_unit_field(value)
|
return parse_number_unit_field(value)
|
||||||
|
|
||||||
|
|
|
@ -24,7 +24,6 @@ from torch.optim.lr_scheduler import (
|
||||||
from torch.utils.data import DataLoader, Dataset
|
from torch.utils.data import DataLoader, Dataset
|
||||||
|
|
||||||
from refiners.fluxion import layers as fl
|
from refiners.fluxion import layers as fl
|
||||||
from refiners.fluxion.utils import no_grad
|
|
||||||
from refiners.training_utils.callback import (
|
from refiners.training_utils.callback import (
|
||||||
Callback,
|
Callback,
|
||||||
CallbackConfig,
|
CallbackConfig,
|
||||||
|
@ -154,7 +153,6 @@ class Trainer(Generic[ConfigType, Batch], ABC):
|
||||||
dataset_length=self.dataset_length,
|
dataset_length=self.dataset_length,
|
||||||
batch_size=self.config.training.batch_size,
|
batch_size=self.config.training.batch_size,
|
||||||
training_duration=self.config.training.duration,
|
training_duration=self.config.training.duration,
|
||||||
evaluation_interval=self.config.training.evaluation_interval,
|
|
||||||
gradient_accumulation=self.config.training.gradient_accumulation,
|
gradient_accumulation=self.config.training.gradient_accumulation,
|
||||||
lr_scheduler_interval=self.config.lr_scheduler.update_interval,
|
lr_scheduler_interval=self.config.lr_scheduler.update_interval,
|
||||||
verbose=config.verbose,
|
verbose=config.verbose,
|
||||||
|
@ -345,9 +343,6 @@ class Trainer(Generic[ConfigType, Batch], ABC):
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def compute_loss(self, batch: Batch) -> Tensor: ...
|
def compute_loss(self, batch: Batch) -> Tensor: ...
|
||||||
|
|
||||||
def compute_evaluation(self) -> None:
|
|
||||||
pass
|
|
||||||
|
|
||||||
def backward(self) -> None:
|
def backward(self) -> None:
|
||||||
"""Backward pass on the loss."""
|
"""Backward pass on the loss."""
|
||||||
self._call_callbacks(event_name="on_backward_begin")
|
self._call_callbacks(event_name="on_backward_begin")
|
||||||
|
@ -365,8 +360,6 @@ class Trainer(Generic[ConfigType, Batch], ABC):
|
||||||
self._call_callbacks(event_name="on_lr_scheduler_step_begin")
|
self._call_callbacks(event_name="on_lr_scheduler_step_begin")
|
||||||
self.lr_scheduler.step()
|
self.lr_scheduler.step()
|
||||||
self._call_callbacks(event_name="on_lr_scheduler_step_end")
|
self._call_callbacks(event_name="on_lr_scheduler_step_end")
|
||||||
if self.clock.is_due(self.config.training.evaluation_interval):
|
|
||||||
self.evaluate()
|
|
||||||
|
|
||||||
def step(self, batch: Batch) -> None:
|
def step(self, batch: Batch) -> None:
|
||||||
"""Perform a single training step."""
|
"""Perform a single training step."""
|
||||||
|
@ -395,27 +388,12 @@ class Trainer(Generic[ConfigType, Batch], ABC):
|
||||||
self.set_models_to_mode("train")
|
self.set_models_to_mode("train")
|
||||||
self._call_callbacks(event_name="on_train_begin")
|
self._call_callbacks(event_name="on_train_begin")
|
||||||
assert self.learnable_parameters, "There are no learnable parameters in the models."
|
assert self.learnable_parameters, "There are no learnable parameters in the models."
|
||||||
self.evaluate()
|
|
||||||
while not self.clock.done:
|
while not self.clock.done:
|
||||||
self._call_callbacks(event_name="on_epoch_begin")
|
self._call_callbacks(event_name="on_epoch_begin")
|
||||||
self.epoch()
|
self.epoch()
|
||||||
self._call_callbacks(event_name="on_epoch_end")
|
self._call_callbacks(event_name="on_epoch_end")
|
||||||
self._call_callbacks(event_name="on_train_end")
|
self._call_callbacks(event_name="on_train_end")
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def get_evaluation_seed(instance: "Trainer[BaseConfig, Any]") -> int:
|
|
||||||
return instance.config.training.evaluation_seed
|
|
||||||
|
|
||||||
@no_grad()
|
|
||||||
@scoped_seed(seed=get_evaluation_seed)
|
|
||||||
def evaluate(self) -> None:
|
|
||||||
"""Evaluate the model."""
|
|
||||||
self.set_models_to_mode(mode="eval")
|
|
||||||
self._call_callbacks(event_name="on_evaluate_begin")
|
|
||||||
self.compute_evaluation()
|
|
||||||
self._call_callbacks(event_name="on_evaluate_end")
|
|
||||||
self.set_models_to_mode(mode="train")
|
|
||||||
|
|
||||||
def set_models_to_mode(self, mode: Literal["train", "eval"]) -> None:
|
def set_models_to_mode(self, mode: Literal["train", "eval"]) -> None:
|
||||||
for item in self.models.values():
|
for item in self.models.values():
|
||||||
if mode == "train":
|
if mode == "train":
|
||||||
|
|
|
@ -4,7 +4,6 @@ on_batch_end_seed = 42
|
||||||
on_optimizer_step_interval = "2:iteration"
|
on_optimizer_step_interval = "2:iteration"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
[mock_model]
|
[mock_model]
|
||||||
requires_grad = true
|
requires_grad = true
|
||||||
use_activation = true
|
use_activation = true
|
||||||
|
@ -19,8 +18,6 @@ device = "cpu"
|
||||||
dtype = "float32"
|
dtype = "float32"
|
||||||
batch_size = 4
|
batch_size = 4
|
||||||
gradient_accumulation = "4:step"
|
gradient_accumulation = "4:step"
|
||||||
evaluation_interval = "5:epoch"
|
|
||||||
evaluation_seed = 1
|
|
||||||
gradient_clipping_max_norm = 1.0
|
gradient_clipping_max_norm = 1.0
|
||||||
|
|
||||||
[optimizer]
|
[optimizer]
|
||||||
|
|
|
@ -13,8 +13,6 @@ duration = "100:epoch"
|
||||||
seed = 0
|
seed = 0
|
||||||
batch_size = 4
|
batch_size = 4
|
||||||
gradient_accumulation = "4:step"
|
gradient_accumulation = "4:step"
|
||||||
evaluation_interval = "5:epoch"
|
|
||||||
evaluation_seed = 1
|
|
||||||
gradient_clipping_max_norm = 1.0
|
gradient_clipping_max_norm = 1.0
|
||||||
|
|
||||||
[optimizer]
|
[optimizer]
|
||||||
|
|
|
@ -186,7 +186,6 @@ def training_clock() -> TrainingClock:
|
||||||
batch_size=10,
|
batch_size=10,
|
||||||
training_duration=Epoch(5),
|
training_duration=Epoch(5),
|
||||||
gradient_accumulation=Epoch(1),
|
gradient_accumulation=Epoch(1),
|
||||||
evaluation_interval=Epoch(1),
|
|
||||||
lr_scheduler_interval=Epoch(1),
|
lr_scheduler_interval=Epoch(1),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -198,7 +197,6 @@ def test_small_dataset_error():
|
||||||
batch_size=10,
|
batch_size=10,
|
||||||
training_duration=Epoch(5),
|
training_duration=Epoch(5),
|
||||||
gradient_accumulation=Epoch(1),
|
gradient_accumulation=Epoch(1),
|
||||||
evaluation_interval=Epoch(1),
|
|
||||||
lr_scheduler_interval=Epoch(1),
|
lr_scheduler_interval=Epoch(1),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -210,7 +208,6 @@ def test_zero_batch_size_error():
|
||||||
batch_size=0,
|
batch_size=0,
|
||||||
training_duration=Epoch(5),
|
training_duration=Epoch(5),
|
||||||
gradient_accumulation=Epoch(1),
|
gradient_accumulation=Epoch(1),
|
||||||
evaluation_interval=Epoch(1),
|
|
||||||
lr_scheduler_interval=Epoch(1),
|
lr_scheduler_interval=Epoch(1),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -244,13 +241,6 @@ def test_timer_functionality(training_clock: TrainingClock) -> None:
|
||||||
assert training_clock.time_elapsed >= 0
|
assert training_clock.time_elapsed >= 0
|
||||||
|
|
||||||
|
|
||||||
def test_state_based_properties(training_clock: TrainingClock) -> None:
|
|
||||||
training_clock.step = 5 # Halfway through the first epoch
|
|
||||||
assert not training_clock.is_due(training_clock.evaluation_interval) # Assuming evaluation every epoch
|
|
||||||
training_clock.step = 10 # End of the first epoch
|
|
||||||
assert training_clock.is_due(training_clock.evaluation_interval)
|
|
||||||
|
|
||||||
|
|
||||||
def test_mock_trainer_initialization(mock_config: MockConfig, mock_trainer: MockTrainer) -> None:
|
def test_mock_trainer_initialization(mock_config: MockConfig, mock_trainer: MockTrainer) -> None:
|
||||||
assert mock_trainer.config == mock_config
|
assert mock_trainer.config == mock_config
|
||||||
assert isinstance(mock_trainer, MockTrainer)
|
assert isinstance(mock_trainer, MockTrainer)
|
||||||
|
|
Loading…
Reference in a new issue