From 7def795b75edbe1e63882163da40b876111af2c6 Mon Sep 17 00:00:00 2001 From: Sri Datta Budaraju Date: Mon, 16 May 2022 00:27:02 +0200 Subject: [PATCH] Add "Trainable" column (#128) * add is_trainable column * test: testcase for is_trainable column * model which has fully, partial and non trainable modules * update tests that require all coloums to display is_trainable coloumn as well * docs: update README.md * fix type ignores and nits * Rename is_trainable to trainable * Calculate trainable in pre_hook * Fix readme Co-authored-by: Tyler Yep --- README.md | 4 +++- tests/fixtures/models.py | 26 +++++++++++++++++++++ tests/test_output/parameter_list.out | 20 ++++++++-------- tests/test_output/single_input_all_cols.out | 24 +++++++++---------- tests/test_output/trainable_column.out | 19 +++++++++++++++ tests/torchinfo_test.py | 14 ++++++++--- torchinfo/enums.py | 1 + torchinfo/formatting.py | 2 ++ torchinfo/layer_info.py | 19 +++++++++++++++ torchinfo/torchinfo.py | 4 +++- 10 files changed, 106 insertions(+), 27 deletions(-) create mode 100644 tests/test_output/trainable_column.out diff --git a/README.md b/README.md index d731fd8..390c6bb 100644 --- a/README.md +++ b/README.md @@ -115,7 +115,8 @@ Summarize the given PyTorch model. Summarized information includes: 2) input/output shapes, 3) kernel shape, 4) # of parameters, - 5) # of operations (Mult-Adds) + 5) # of operations (Mult-Adds), + 6) whether layer is trainable NOTE: If neither input_data or input_size are provided, no forward pass through the network is performed, and the provided model information is limited to layer names. @@ -166,6 +167,7 @@ Args: "num_params", "kernel_size", "mult_adds", + "trainable", ) Default: ("output_size", "num_params") If input_data / input_size are not provided, only "num_params" is used. diff --git a/tests/fixtures/models.py b/tests/fixtures/models.py index 120ab83..8fc2004 100644 --- a/tests/fixtures/models.py +++ b/tests/fixtures/models.py @@ -502,6 +502,32 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: return self.w * x + self.b +class MixedTrainable(nn.Module): + """Model with fully, partial and non trainable modules.""" + + def __init__(self) -> None: + super().__init__() + self.fully_trainable = nn.Conv1d(1, 1, 1) + + self.partially_trainable = nn.Conv1d(1, 1, 1, bias=True) + assert self.partially_trainable.bias is not None + self.partially_trainable.bias.requires_grad = False + + self.non_trainable = nn.Conv1d(1, 1, 1, 1, bias=True) + self.non_trainable.weight.requires_grad = False + assert self.non_trainable.bias is not None + self.non_trainable.bias.requires_grad = False + + self.dropout = nn.Dropout() + + def forward(self, x: torch.Tensor) -> torch.Tensor: + x = self.fully_trainable(x) + x = self.partially_trainable(x) + x = self.non_trainable(x) + x = self.dropout(x) + return x + + class ReuseLinear(nn.Module): """Model that uses a reference to the same Linear layer over and over.""" diff --git a/tests/test_output/parameter_list.out b/tests/test_output/parameter_list.out index 749c389..56f0a48 100644 --- a/tests/test_output/parameter_list.out +++ b/tests/test_output/parameter_list.out @@ -1,18 +1,18 @@ -=================================================================================================================== -Layer (type:depth-idx) Kernel Shape Input Shape Output Shape Param # Mult-Adds -=================================================================================================================== -ParameterListModel -- -- -- -- -- -├─ParameterList: 1-1 -- -- -- 30,000 -- -│ └─0 [100, 100] ├─10,000 -│ └─1 [100, 200] └─20,000 -=================================================================================================================== +================================================================================================================================================================ +Layer (type:depth-idx) Kernel Shape Input Shape Output Shape Param # Mult-Adds Trainable +================================================================================================================================================================ +ParameterListModel -- -- -- -- -- True +├─ParameterList: 1-1 -- -- -- 30,000 -- True +│ └─0 [100, 100] ├─10,000 +│ └─1 [100, 200] └─20,000 +================================================================================================================================================================ Total params: 30,000 Trainable params: 30,000 Non-trainable params: 0 Total mult-adds (M): 0.00 -=================================================================================================================== +================================================================================================================================================================ Input size (MB): 0.04 Forward/backward pass size (MB): 0.00 Params size (MB): 0.12 Estimated Total Size (MB): 0.16 -=================================================================================================================== +================================================================================================================================================================ diff --git a/tests/test_output/single_input_all_cols.out b/tests/test_output/single_input_all_cols.out index 7eb08f6..fcc62ff 100644 --- a/tests/test_output/single_input_all_cols.out +++ b/tests/test_output/single_input_all_cols.out @@ -1,20 +1,20 @@ -============================================================================================================================================ -Layer (type:depth-idx) Kernel Shape Input Shape Output Shape Param # Mult-Adds -============================================================================================================================================ -SingleInputNet -- -- -- -- -- -├─Conv2d: 1-1 [5, 5] [7, 1, 28, 28] [7, 10, 24, 24] 260 1,048,320 -├─Conv2d: 1-2 [5, 5] [7, 10, 12, 12] [7, 20, 8, 8] 5,020 2,248,960 -├─Dropout2d: 1-3 -- [7, 20, 8, 8] [7, 20, 8, 8] -- -- -├─Linear: 1-4 -- [7, 320] [7, 50] 16,050 112,350 -├─Linear: 1-5 -- [7, 50] [7, 10] 510 3,570 -============================================================================================================================================ +================================================================================================================================================================ +Layer (type:depth-idx) Kernel Shape Input Shape Output Shape Param # Mult-Adds Trainable +================================================================================================================================================================ +SingleInputNet -- -- -- -- -- True +├─Conv2d: 1-1 [5, 5] [7, 1, 28, 28] [7, 10, 24, 24] 260 1,048,320 True +├─Conv2d: 1-2 [5, 5] [7, 10, 12, 12] [7, 20, 8, 8] 5,020 2,248,960 True +├─Dropout2d: 1-3 -- [7, 20, 8, 8] [7, 20, 8, 8] -- -- -- +├─Linear: 1-4 -- [7, 320] [7, 50] 16,050 112,350 True +├─Linear: 1-5 -- [7, 50] [7, 10] 510 3,570 True +================================================================================================================================================================ Total params: 21,840 Trainable params: 21,840 Non-trainable params: 0 Total mult-adds (M): 3.41 -============================================================================================================================================ +================================================================================================================================================================ Input size (MB): 0.02 Forward/backward pass size (MB): 0.40 Params size (MB): 0.09 Estimated Total Size (MB): 0.51 -============================================================================================================================================ +================================================================================================================================================================ diff --git a/tests/test_output/trainable_column.out b/tests/test_output/trainable_column.out new file mode 100644 index 0000000..eb06d8d --- /dev/null +++ b/tests/test_output/trainable_column.out @@ -0,0 +1,19 @@ +============================================================================================================================================ +Layer (type:depth-idx) Kernel Shape Input Shape Output Shape Trainable +============================================================================================================================================ +MixedTrainable -- -- -- Partial +├─Conv1d: 1-1 [1] [1, 1, 1] [1, 1, 1] True +├─Conv1d: 1-2 [1] [1, 1, 1] [1, 1, 1] Partial +├─Conv1d: 1-3 [1] [1, 1, 1] [1, 1, 1] False +├─Dropout: 1-4 -- [1, 1, 1] [1, 1, 1] -- +============================================================================================================================================ +Total params: 6 +Trainable params: 3 +Non-trainable params: 3 +Total mult-adds (M): 0.00 +============================================================================================================================================ +Input size (MB): 0.00 +Forward/backward pass size (MB): 0.00 +Params size (MB): 0.00 +Estimated Total Size (MB): 0.00 +============================================================================================================================================ diff --git a/tests/torchinfo_test.py b/tests/torchinfo_test.py index fd87683..20aa4cb 100644 --- a/tests/torchinfo_test.py +++ b/tests/torchinfo_test.py @@ -14,6 +14,7 @@ FakePrunedLayerModel, LinearModel, LSTMNet, + MixedTrainable, MixedTrainableParameters, ModuleDictModel, MultipleInputNetDifferentDtypes, @@ -111,13 +112,12 @@ def test_multiple_input_types() -> None: def test_single_input_all_cols() -> None: model = SingleInputNet() - col_names = ("kernel_size", "input_size", "output_size", "num_params", "mult_adds") input_shape = (7, 1, 28, 28) summary( model, input_data=torch.randn(*input_shape), depth=1, - col_names=col_names, + col_names=list(ColumnSettings), col_width=20, ) @@ -194,7 +194,7 @@ def test_parameter_list() -> None: input_size=(100, 100), verbose=2, col_names=list(ColumnSettings), - col_width=15, + col_width=20, ) @@ -462,3 +462,11 @@ def test_pruned_adversary() -> None: results = summary(second_model, input_size=(1,)) assert results.total_params == 32 # should be 64 + + +def test_trainable_column() -> None: + summary( + MixedTrainable(), + input_size=(1, 1, 1), + col_names=("kernel_size", "input_size", "output_size", "trainable"), + ) diff --git a/torchinfo/enums.py b/torchinfo/enums.py index 32ecfc0..ba6bf81 100644 --- a/torchinfo/enums.py +++ b/torchinfo/enums.py @@ -29,6 +29,7 @@ class ColumnSettings(str, Enum): OUTPUT_SIZE = "output_size" NUM_PARAMS = "num_params" MULT_ADDS = "mult_adds" + TRAINABLE = "trainable" @unique diff --git a/torchinfo/formatting.py b/torchinfo/formatting.py index 463e14d..d25b475 100644 --- a/torchinfo/formatting.py +++ b/torchinfo/formatting.py @@ -12,6 +12,7 @@ ColumnSettings.OUTPUT_SIZE: "Output Shape", ColumnSettings.NUM_PARAMS: "Param #", ColumnSettings.MULT_ADDS: "Mult-Adds", + ColumnSettings.TRAINABLE: "Trainable", } @@ -113,6 +114,7 @@ def layer_info_to_row( ColumnSettings.MULT_ADDS: layer_info.macs_to_str( reached_max_depth, children_layers ), + ColumnSettings.TRAINABLE: self.str_(layer_info.trainable), } start_str = self.get_start_str(layer_info.depth) layer_name = layer_info.get_layer_name(self.show_var_name, self.show_depth) diff --git a/torchinfo/layer_info.py b/torchinfo/layer_info.py index 87189f3..592ebfe 100644 --- a/torchinfo/layer_info.py +++ b/torchinfo/layer_info.py @@ -59,6 +59,7 @@ def __init__( self.param_bytes = 0 self.output_bytes = 0 self.macs = 0 + self.trainable = self.is_trainable(module) def __repr__(self) -> str: return f"{self.class_name}: {self.depth}" @@ -159,6 +160,24 @@ def get_kernel_size(module: nn.Module) -> int | list[int] | None: return kernel_size return None + @staticmethod + def is_trainable(module: nn.Module) -> str: + """ + Checks if the module is trainable. Returns: + "True", if all the parameters are trainable (`requires_grad=True`) + "False" if none of the parameters are trainable. + "Partial" if some weights are trainable, but not all. + "--" if no module has no parameters, like Dropout. + """ + module_requires_grad = [param.requires_grad for param in module.parameters()] + if not module_requires_grad: + return "--" + if all(module_requires_grad): + return "True" + if any(module_requires_grad): + return "Partial" + return "False" + def get_layer_name(self, show_var_name: bool, show_depth: bool) -> str: layer_name = self.class_name if show_var_name and self.var_name: diff --git a/torchinfo/torchinfo.py b/torchinfo/torchinfo.py index 9f279df..7c45033 100644 --- a/torchinfo/torchinfo.py +++ b/torchinfo/torchinfo.py @@ -70,7 +70,8 @@ def summary( 2) input/output shapes, 3) kernel shape, 4) # of parameters, - 5) # of operations (Mult-Adds) + 5) # of operations (Mult-Adds), + 6) whether layer is trainable NOTE: If neither input_data or input_size are provided, no forward pass through the network is performed, and the provided model information is limited to layer names. @@ -121,6 +122,7 @@ class name as the key. If the forward pass is an expensive operation, "num_params", "kernel_size", "mult_adds", + "trainable", ) Default: ("output_size", "num_params") If input_data / input_size are not provided, only "num_params" is used.