From 26c2803fe341c72108301c595efe753245d3637d Mon Sep 17 00:00:00 2001 From: Sankar Manoj Date: Mon, 9 Sep 2024 15:22:14 +0530 Subject: [PATCH 1/4] #12376: Support for non-32 Height in Width Sharded Conv2d --- tests/ttnn/unit_tests/operations/test_new_conv2d.py | 9 +++++++-- ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp | 7 +++---- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/tests/ttnn/unit_tests/operations/test_new_conv2d.py b/tests/ttnn/unit_tests/operations/test_new_conv2d.py index 43533a8b7fd..a6826b633a9 100644 --- a/tests/ttnn/unit_tests/operations/test_new_conv2d.py +++ b/tests/ttnn/unit_tests/operations/test_new_conv2d.py @@ -322,13 +322,17 @@ def run_conv_with_split( @pytest.mark.parametrize( "output_channels, input_channels, input_height, input_width, filter_height, filter_width, pad_h, pad_w, act_block_w_div", ( + (128, 128, 8, 8, 3, 3, 0, 0, 1), (128, 256, 8, 8, 3, 3, 1, 1, 1), + (576, 576, 8, 8, 3, 3, 0, 0, 1), + (960, 960, 4, 4, 3, 3, 0, 0, 1), (256, 2048, 8, 8, 3, 3, 1, 1, 8), (512, 2048, 16, 16, 3, 3, 1, 1, 4), - (768, 768, 8, 8, 3, 3, 1, 1, 1), (768, 768, 16, 16, 3, 3, 1, 1, 1), (1280, 1280, 16, 16, 3, 3, 1, 1, 1), (1280, 2560, 16, 16, 3, 3, 1, 1, 2), + (1280, 2560, 16, 16, 3, 3, 0, 0, 2), + ), ) @pytest.mark.parametrize( @@ -384,7 +388,7 @@ def test_conv_ws( torch_input_tensor_nchw = torch_input_tensor_nchw.broadcast_to(conv_input_shape).float() torch_input_tensor = torch.permute(torch_input_tensor_nchw, (0, 2, 3, 1)) - torch_weight_tensor = torch.randn(conv_weight_shape, dtype=torch.bfloat16).float() + torch_weight_tensor = torch.ones(conv_weight_shape, dtype=torch.bfloat16).float() tt_bias_tensor = None torch_bias_tensor = None @@ -457,6 +461,7 @@ def test_conv_ws( # torch_output_tensor is in row major layout and NHWC shape # NHWC to NCHW + # torch_output_tensor = torch_output_tensor[:, :, : batch_size * out_height * out_width, :] torch_output_tensor = torch_output_tensor.reshape(batch_size, out_height, out_width, output_channels) torch_output_tensor = torch.permute(torch_output_tensor, (0, 3, 1, 2)) diff --git a/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp b/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp index bdf570f175a..85f4534ee3c 100644 --- a/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp +++ b/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp @@ -229,13 +229,13 @@ OptimizedConvParallelizationConfig determine_conv_op_parallel_config_from_conv_o TT_ASSERT(conv_output_mem_config.shard_spec.has_value()); const auto& shard_spec = conv_output_mem_config.shard_spec.value(); const auto& shard_shape = shard_spec.shape; - TT_ASSERT(shard_shape[0] % 32 == 0); + // TT_ASSERT(shard_shape[0] % 32 == 0); TT_ASSERT(shard_shape[1] % 32 == 0); return { .grid_size = shard_spec.grid.bounding_box().grid_size(), .num_cores_nhw = num_cores_nhw, .num_cores_c = num_cores_c, - .per_core_out_matrix_height_ntiles = shard_shape[0] / 32, + .per_core_out_matrix_height_ntiles = tt::round_up(shard_shape[0], 32) / 32, .per_core_out_matrix_width_ntiles = shard_shape[1] / 32, }; } @@ -765,8 +765,7 @@ std::tuple Date: Wed, 11 Sep 2024 11:05:45 +0530 Subject: [PATCH 2/4] #12376: Support for non-32 Height in Width Sharded Conv2d --- tests/ttnn/unit_tests/operations/test_new_conv2d.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/ttnn/unit_tests/operations/test_new_conv2d.py b/tests/ttnn/unit_tests/operations/test_new_conv2d.py index a6826b633a9..4ddf497dd97 100644 --- a/tests/ttnn/unit_tests/operations/test_new_conv2d.py +++ b/tests/ttnn/unit_tests/operations/test_new_conv2d.py @@ -328,8 +328,7 @@ def run_conv_with_split( (960, 960, 4, 4, 3, 3, 0, 0, 1), (256, 2048, 8, 8, 3, 3, 1, 1, 8), (512, 2048, 16, 16, 3, 3, 1, 1, 4), - (768, 768, 16, 16, 3, 3, 1, 1, 1), - (1280, 1280, 16, 16, 3, 3, 1, 1, 1), + (768, 768, 16, 16, 3, 3, 0, 0, 1), (1280, 2560, 16, 16, 3, 3, 1, 1, 2), (1280, 2560, 16, 16, 3, 3, 0, 0, 2), @@ -388,7 +387,7 @@ def test_conv_ws( torch_input_tensor_nchw = torch_input_tensor_nchw.broadcast_to(conv_input_shape).float() torch_input_tensor = torch.permute(torch_input_tensor_nchw, (0, 2, 3, 1)) - torch_weight_tensor = torch.ones(conv_weight_shape, dtype=torch.bfloat16).float() + torch_weight_tensor = torch.randn(conv_weight_shape, dtype=torch.bfloat16).float() tt_bias_tensor = None torch_bias_tensor = None @@ -463,12 +462,13 @@ def test_conv_ws( # NHWC to NCHW # torch_output_tensor = torch_output_tensor[:, :, : batch_size * out_height * out_width, :] torch_output_tensor = torch_output_tensor.reshape(batch_size, out_height, out_width, output_channels) - + logger.info(f"Output Shape : {torch_output_tensor.shape}") torch_output_tensor = torch.permute(torch_output_tensor, (0, 3, 1, 2)) reader_patterns_cache.clear() pcc = 0.94 passing, pcc_msg = check_with_pcc_without_tensor_printout(torch_output_tensor, torch_out_golden_tensor, pcc=pcc) + logger.info(f"{pcc_msg} Threshold : {pcc}") if not passing: logger.error("Fails with PCC ", pcc_msg) assert passing From d9fe62a10f5c99a4cbd0abc8d7a370ab7e4c62b6 Mon Sep 17 00:00:00 2001 From: Sankar Manoj Date: Wed, 11 Sep 2024 09:46:58 +0000 Subject: [PATCH 3/4] #0: Skipped WS Conv2d on Grayskull --- tests/ttnn/unit_tests/operations/test_new_conv2d.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/tests/ttnn/unit_tests/operations/test_new_conv2d.py b/tests/ttnn/unit_tests/operations/test_new_conv2d.py index 4ddf497dd97..33fd62f35fa 100644 --- a/tests/ttnn/unit_tests/operations/test_new_conv2d.py +++ b/tests/ttnn/unit_tests/operations/test_new_conv2d.py @@ -317,6 +317,7 @@ def run_conv_with_split( assert_with_pcc(torch_output_tensor, torch_out_golden_tensor, pcc=pcc) +@skip_for_grayskull() @pytest.mark.parametrize("device_params", [{"l1_small_size": 16384}], indirect=True) @pytest.mark.parametrize("stride", [1, 2]) @pytest.mark.parametrize( @@ -331,7 +332,6 @@ def run_conv_with_split( (768, 768, 16, 16, 3, 3, 0, 0, 1), (1280, 2560, 16, 16, 3, 3, 1, 1, 2), (1280, 2560, 16, 16, 3, 3, 0, 0, 2), - ), ) @pytest.mark.parametrize( @@ -363,12 +363,6 @@ def test_conv_ws( weights_dtype, activations_dtype, ): - if is_grayskull(): - if input_channels >= 2048: - pytest.skip("Skipping on grayskull due to insufficient L1") - if input_channels >= 768 and input_height >= 10: - pytest.skip("Skipping on grayskull due to insufficient L1") - stride_h = stride stride_w = stride batch_size = 2 From 0441baeb8bfe626bc691c4ead259ff4457655382 Mon Sep 17 00:00:00 2001 From: Sankar Manoj Date: Thu, 12 Sep 2024 10:10:18 +0530 Subject: [PATCH 4/4] #0: Bug Fix --- ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp b/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp index 85f4534ee3c..aa5daf1510e 100644 --- a/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp +++ b/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp @@ -229,7 +229,7 @@ OptimizedConvParallelizationConfig determine_conv_op_parallel_config_from_conv_o TT_ASSERT(conv_output_mem_config.shard_spec.has_value()); const auto& shard_spec = conv_output_mem_config.shard_spec.value(); const auto& shard_shape = shard_spec.shape; - // TT_ASSERT(shard_shape[0] % 32 == 0); + TT_ASSERT(conv_output_mem_config.memory_layout == TensorMemoryLayout::WIDTH_SHARDED || shard_shape[0] % 32 == 0); TT_ASSERT(shard_shape[1] % 32 == 0); return { .grid_size = shard_spec.grid.bounding_box().grid_size(),