Lightning-Universe · Borda · Nov 1, 2022 · Oct 22, 2022 · Oct 22, 2022 · Oct 22, 2022
@@ -38,7 +38,7 @@ class PrintTableMetricsCallback(Callback):
     def __init__(self) -> None:
         self.metrics: List = []
 
-    def on_epoch_end(self, trainer: Trainer, pl_module: LightningModule) -> None:
+    def on_train_epoch_end(self, trainer: Trainer, pl_module: LightningModule) -> None:
         metrics_dict = copy.copy(trainer.callback_metrics)
         self.metrics.append(metrics_dict)
         rank_zero_info(dicts_to_table(self.metrics))

@@ -51,42 +51,13 @@ def on_fit_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
             raise MisconfigurationException("SparseML only supports training with one optimizer.")
         optimizer = optimizer[0]
         optimizer = self.manager.modify(
-            pl_module, optimizer, steps_per_epoch=self._num_training_steps_per_epoch(trainer), epoch=0
+            pl_module, optimizer, steps_per_epoch=trainer.estimated_stepping_batches, epoch=0
         )
         trainer.optimizers = [optimizer]
 
     def on_fit_end(self, trainer: Trainer, pl_module: LightningModule) -> None:
         self.manager.finalize(pl_module)
 
-    def _num_training_steps_per_epoch(self, trainer: Trainer) -> int:
-        """Total training steps inferred from the datamodule and devices."""
-        if isinstance(trainer.limit_train_batches, int) and trainer.limit_train_batches != 0:
-            dataset_size = trainer.limit_train_batches
-        elif isinstance(trainer.limit_train_batches, float):
-            # limit_train_batches is a percentage of batches
-            dataset_size = len(trainer.datamodule.train_dataloader())
-            dataset_size = int(dataset_size * trainer.limit_train_batches)
-        else:
-            dataset_size = len(trainer.datamodule.train_dataloader())
-
-        if hasattr(trainer, "num_devices"):
-            # New behavior in Lightning
-            num_devices = max(1, trainer.num_devices)
-        else:
-            # Old behavior deprecated in v1.6
-            num_devices = max(1, trainer.num_gpus, trainer.num_processes)
-            if trainer.tpu_cores:
-                num_devices = max(num_devices, trainer.tpu_cores)
-
-        effective_batch_size = trainer.accumulate_grad_batches * num_devices
-        max_estimated_steps = dataset_size // effective_batch_size
-
-        # To avoid breaking changes, max_steps is set to -1 if it is not defined
-        max_steps = -1 if not trainer.max_steps else trainer.max_steps
-        if max_steps != -1 and max_steps < max_estimated_steps:
-            return max_steps
-        return max_estimated_steps
-
     @staticmethod
     def export_to_sparse_onnx(
         model: LightningModule, output_dir: str, sample_batch: Optional[torch.Tensor] = None, **export_kwargs: Any

@@ -68,7 +68,7 @@ def setup(self, trainer: Trainer, pl_module: LightningModule, stage: Optional[st
         if self.dataset is None:
             self.dataset = trainer.datamodule.name
 
-    def on_pretrain_routine_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
+    def on_fit_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
         # must move to device after setup, as during setup, pl_module is still on cpu
         self.online_evaluator = SSLEvaluator(
             n_input=self.z_dim,
@@ -167,11 +167,11 @@ def on_validation_batch_end(
         pl_module.log("online_val_acc", val_acc, on_step=False, on_epoch=True, sync_dist=True)
         pl_module.log("online_val_loss", mlp_loss, on_step=False, on_epoch=True, sync_dist=True)
 
-    def on_save_checkpoint(self, trainer: Trainer, pl_module: LightningModule, checkpoint: Dict[str, Any]) -> dict:
+    def state_dict(self) -> dict:
         return {"state_dict": self.online_evaluator.state_dict(), "optimizer_state": self.optimizer.state_dict()}
 
-    def on_load_checkpoint(self, trainer: Trainer, pl_module: LightningModule, callback_state: Dict[str, Any]) -> None:
-        self._recovered_callback_state = callback_state
+    def load_state_dict(self, state_dict: Dict[str, Any]) -> None:
+        self._recovered_callback_state = state_dict
 
 
 @under_review()

@@ -59,7 +59,7 @@ def __init__(
         self.normalize = normalize
         self.steps = steps
 
-    def on_epoch_end(self, trainer: Trainer, pl_module: LightningModule) -> None:
+    def on_train_epoch_end(self, trainer: Trainer, pl_module: LightningModule) -> None:
         if (trainer.current_epoch + 1) % self.interpolate_epoch_interval == 0:
             images = self.interpolate_latent_space(pl_module, latent_dim=pl_module.hparams.latent_dim)
             images = torch.cat(images, dim=0)

@@ -70,7 +70,7 @@ def __init__(
         self.scale_each = scale_each
         self.pad_value = pad_value
 
-    def on_epoch_end(self, trainer: Trainer, pl_module: LightningModule) -> None:
+    def on_train_epoch_end(self, trainer: Trainer, pl_module: LightningModule) -> None:
         dim = (self.num_samples, pl_module.hparams.latent_dim)
         z = torch.normal(mean=0.0, std=1.0, size=dim, device=pl_module.device)
 

@@ -57,7 +57,7 @@ def training_step(self, batch: Tuple[Tensor, Tensor], _) -> OrderedDict:
         # calculates training loss
         loss = double_dqn_loss(batch, self.net, self.target_net, self.gamma)
 
-        if self._use_dp_or_ddp2(self.trainer):
+        if self._use_dp(self.trainer):
             loss = loss.unsqueeze(0)
 
         # Soft update of target network

@@ -7,7 +7,7 @@
 import torch
 from pytorch_lightning import LightningModule, Trainer, seed_everything
 from pytorch_lightning.callbacks import ModelCheckpoint
-from pytorch_lightning.plugins import DataParallelPlugin, DDP2Plugin
+from pytorch_lightning.strategies import DataParallelStrategy
 from torch import Tensor, optim
 from torch.optim.optimizer import Optimizer
 from torch.utils.data import DataLoader
@@ -269,7 +269,7 @@ def training_step(self, batch: Tuple[Tensor, Tensor], _) -> OrderedDict:
         # calculates training loss
         loss = dqn_loss(batch, self.net, self.target_net, self.gamma)
 
-        if self._use_dp_or_ddp2(self.trainer):
+        if self._use_dp(self.trainer):
             loss = loss.unsqueeze(0)
 
         # Soft update of target network
@@ -406,8 +406,8 @@ def add_model_specific_args(
         return arg_parser
 
     @staticmethod
-    def _use_dp_or_ddp2(trainer: Trainer) -> bool:
-        return isinstance(trainer.training_type_plugin, (DataParallelPlugin, DDP2Plugin))
+    def _use_dp(trainer: Trainer) -> bool:
+        return isinstance(trainer.strategy, DataParallelStrategy)
 
 
 @under_review()

@@ -112,7 +112,7 @@ def training_step(self, batch, _) -> OrderedDict:
         # calculates training loss
         loss, batch_weights = per_dqn_loss(samples, weights, self.net, self.target_net, self.gamma)
 
-        if self._use_dp_or_ddp2(self.trainer):
+        if self._use_dp(self.trainer):
             loss = loss.unsqueeze(0)
 
         # update priorities in buffer

@@ -14,7 +14,7 @@ def __init__(self, initial_lr=0.03, use_cosine_scheduler=False, schedule=(120, 1
         self.schedule = schedule
         self.max_epochs = max_epochs
 
-    def on_epoch_start(self, trainer, pl_module):
+    def on_train_epoch_start(self, trainer, pl_module):
         epoch = trainer.current_epoch
         lr = self.lr
 

@@ -13,7 +13,7 @@
 
 import torch
 from pytorch_lightning import LightningModule, Trainer
-from pytorch_lightning.plugins import DDP2Plugin, DDPPlugin
+from pytorch_lightning.strategies import DDPStrategy
 from torch import nn
 from torch.nn import functional as F
 
@@ -147,7 +147,7 @@ def _momentum_update_key_encoder(self):
     @torch.no_grad()
     def _dequeue_and_enqueue(self, keys, queue_ptr, queue):
         # gather keys before updating queue
-        if self._use_ddp_or_ddp2(self.trainer):
+        if self._use_ddp(self.trainer):
             keys = concat_all_gather(keys)
 
         batch_size = keys.shape[0]
@@ -226,14 +226,14 @@ def forward(self, img_q, img_k, queue):
         with torch.no_grad():  # no gradient to keys
 
             # shuffle for making use of BN
-            if self._use_ddp_or_ddp2(self.trainer):
+            if self._use_ddp(self.trainer):
                 img_k, idx_unshuffle = self._batch_shuffle_ddp(img_k)
 
             k = self.encoder_k(img_k)  # keys: NxC
             k = nn.functional.normalize(k, dim=1)
 
             # undo shuffle
-            if self._use_ddp_or_ddp2(self.trainer):
+            if self._use_ddp(self.trainer):
                 k = self._batch_unshuffle_ddp(k, idx_unshuffle)
 
         # compute logits
@@ -337,8 +337,8 @@ def add_model_specific_args(parent_parser):
         return parser
 
     @staticmethod
-    def _use_ddp_or_ddp2(trainer: Trainer) -> bool:
-        return isinstance(trainer.training_type_plugin, (DDPPlugin, DDP2Plugin))
+    def _use_ddp(trainer: Trainer) -> bool:
+        return isinstance(trainer.strategy, DDPStrategy)
 
 
 # utils

diff --git a/pl_bolts/utils/__init__.py b/pl_bolts/utils/__init__.py
@@ -1,49 +1,25 @@
-import importlib
 import operator
-from typing import Callable
 
 import torch
-from packaging.version import Version
-from pkg_resources import DistributionNotFound
-from pytorch_lightning.utilities import _module_available
+from lightning_utilities.core.imports import compare_version, module_available
 
 from pl_bolts.callbacks.verification.batch_gradient import BatchGradientVerification  # type: ignore
 
-
-# Ported from https://github.com/PyTorchLightning/pytorch-lightning/blob/master/pytorch_lightning/utilities/imports.py
-def _compare_version(package: str, op: Callable, version: str) -> bool:
-    """Compare package version with some requirements.
-
-    >>> _compare_version("torch", operator.ge, "0.1")
-    True
-    """
-    try:
-        pkg = importlib.import_module(package)
-    except (ModuleNotFoundError, DistributionNotFound):
-        return False
-    try:
-        pkg_version = Version(pkg.__version__)
-    except TypeError:
-        # this is mock by sphinx, so it shall return True ro generate all summaries
-        return True
-    return op(pkg_version, Version(version))
-
-
-_NATIVE_AMP_AVAILABLE: bool = _module_available("torch.cuda.amp") and hasattr(torch.cuda.amp, "autocast")
-
-_TORCHVISION_AVAILABLE: bool = _module_available("torchvision")
-_GYM_AVAILABLE: bool = _module_available("gym")
-_SKLEARN_AVAILABLE: bool = _module_available("sklearn")
-_PIL_AVAILABLE: bool = _module_available("PIL")
-_OPENCV_AVAILABLE: bool = _module_available("cv2")
-_WANDB_AVAILABLE: bool = _module_available("wandb")
-_MATPLOTLIB_AVAILABLE: bool = _module_available("matplotlib")
-_TORCHVISION_LESS_THAN_0_9_1: bool = _compare_version("torchvision", operator.lt, "0.9.1")
-_TORCHVISION_LESS_THAN_0_13: bool = _compare_version("torchvision", operator.le, "0.13.0")
-_PL_GREATER_EQUAL_1_4 = _compare_version("pytorch_lightning", operator.ge, "1.4.0")
-_PL_GREATER_EQUAL_1_4_5 = _compare_version("pytorch_lightning", operator.ge, "1.4.5")
-_TORCH_ORT_AVAILABLE = _module_available("torch_ort")
-_TORCH_MAX_VERSION_SPARSEML = _compare_version("torch", operator.lt, "1.11.0")
-_SPARSEML_AVAILABLE = _module_available("sparseml") and _PL_GREATER_EQUAL_1_4_5 and _TORCH_MAX_VERSION_SPARSEML
+_NATIVE_AMP_AVAILABLE: bool = module_available("torch.cuda.amp") and hasattr(torch.cuda.amp, "autocast")
+
+_TORCHVISION_AVAILABLE: bool = module_available("torchvision")
+_GYM_AVAILABLE: bool = module_available("gym")
+_SKLEARN_AVAILABLE: bool = module_available("sklearn")
+_PIL_AVAILABLE: bool = module_available("PIL")
+_OPENCV_AVAILABLE: bool = module_available("cv2")
+_WANDB_AVAILABLE: bool = module_available("wandb")
+_MATPLOTLIB_AVAILABLE: bool = module_available("matplotlib")
+_TORCHVISION_LESS_THAN_0_9_1: bool = compare_version("torchvision", operator.lt, "0.9.1")
+_TORCHVISION_LESS_THAN_0_13: bool = compare_version("torchvision", operator.le, "0.13.0")
+_PL_GREATER_EQUAL_1_4 = compare_version("pytorch_lightning", operator.ge, "1.4.0")
+_PL_GREATER_EQUAL_1_4_5 = compare_version("pytorch_lightning", operator.ge, "1.4.5")
+_TORCH_ORT_AVAILABLE = module_available("torch_ort")
+_TORCH_MAX_VERSION_SPARSEML = compare_version("torch", operator.lt, "1.11.0")
+_SPARSEML_AVAILABLE = module_available("sparseml") and _PL_GREATER_EQUAL_1_4_5 and _TORCH_MAX_VERSION_SPARSEML
 
 __all__ = ["BatchGradientVerification"]
@@ -1,4 +1,2 @@
 torch>=1.9.*
-torchmetrics>=0.4.1
-pytorch-lightning>=1.6.0
 packaging
@@ -4,7 +4,6 @@
 import pytest
 import torch
 from pytorch_lightning import Trainer
-from pytorch_lightning.loggers import LoggerCollection, TensorBoardLogger
 from torch import nn
 
 from pl_bolts.callbacks import ModuleDataMonitor, TrainingDataMonitor
@@ -63,14 +62,6 @@ def test_base_no_logger_warning():
         monitor.on_train_start(trainer, pl_module=None)
 
 
-def test_base_unsupported_logger_warning(tmpdir):
-    """Test a warning is displayed when an unsupported logger is used."""
-    monitor = TrainingDataMonitor()
-    trainer = Trainer(logger=LoggerCollection([TensorBoardLogger(tmpdir)]), callbacks=[monitor])
-    with pytest.warns(UserWarning, match="does not support logging with LoggerCollection"):
-        monitor.on_train_start(trainer, pl_module=None)
-
-
 @mock.patch("pl_bolts.callbacks.data_monitor.TrainingDataMonitor.log_histogram")
 def test_training_data_monitor(log_histogram, tmpdir, datadir):
     """Test that the TrainingDataMonitor logs histograms of data points going into training_step."""

@@ -29,4 +29,4 @@ def test_mnist(tmpdir, datadir, catch_warnings):
     )
     trainer.fit(model, datamodule=datamodule)
     loss = trainer.callback_metrics["train_loss"]
-    assert loss <= 2.2, "mnist failed"
+    assert loss <= 2.3, "mnist failed"