Skip to content

Commit

Permalink
#12435: Add queue_id and optional output tensors to div_bw op (#12697)
Browse files Browse the repository at this point in the history
* #12435: Add queue_id and optional output tensors to div_bw op

* #12435: Add queue_id and optional output tensors to div_bw op

* #12435: cleanup binary bw ops

---------

Co-authored-by: KalaivaniMCW <kbaskar@tenstorrent.com>
  • Loading branch information
mouliraj-mcw and KalaivaniMCW authored Sep 20, 2024
1 parent 72f5ccd commit 08a123c
Show file tree
Hide file tree
Showing 5 changed files with 278 additions and 93 deletions.
99 changes: 99 additions & 0 deletions tests/ttnn/unit_tests/operations/backward/test_backward_div.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ def test_bw_div_binary(input_shapes, round_mode, device):

if round_mode == None:
round_mode = "None"

tt_output_tensor_on_device = ttnn.div_bw(grad_tensor, input_tensor, other_tensor, round_mode=round_mode)

status = compare_pcc(tt_output_tensor_on_device, golden_tensor)
Expand Down Expand Up @@ -175,3 +176,101 @@ def test_bw_unary_div_default(input_shapes, scalar, device):

status = compare_pcc(tt_output_tensor_on_device, golden_tensor)
assert status


@pytest.mark.parametrize(
"input_shapes",
(
(torch.Size([1, 1, 32, 32])),
(torch.Size([1, 1, 320, 384])),
(torch.Size([1, 3, 320, 384])),
),
)
@pytest.mark.parametrize(
"round_mode",
(
"None",
"trunc",
"floor",
),
)
@pytest.mark.parametrize("scalar", [0.05, 1.0, 0.5, 0.12, 0.0, -0.05, -1.0, -0.5, -0.12])
def test_bw_div_scalar_opt_output(input_shapes, scalar, round_mode, device):
in_data, input_tensor = data_gen_with_range(input_shapes, -100, 100, device, True)
grad_data, grad_tensor = data_gen_with_range(input_shapes, -5, 5, device)

_, input_grad = data_gen_with_range(input_shapes, -1, 1, device)

cq_id = 0
pages_before = ttnn._ttnn.reports.get_buffer_pages()
ttnn.div_bw(grad_tensor, input_tensor, scalar, round_mode=round_mode, input_grad=input_grad, queue_id=cq_id)
assert len(pages_before) == len(ttnn._ttnn.reports.get_buffer_pages())
tt_output_tensor_on_device = [input_grad]

if round_mode == "None":
round_mode = None
golden_function = ttnn.get_golden_function(ttnn.div_bw)
golden_tensor = golden_function(grad_data, in_data, scalar, round_mode)

status = compare_pcc(tt_output_tensor_on_device, golden_tensor)
assert status


@pytest.mark.parametrize(
"input_shapes",
(
(torch.Size([1, 1, 32, 32])),
(torch.Size([1, 1, 320, 384])),
(torch.Size([1, 3, 320, 384])),
),
)
@pytest.mark.parametrize(
"round_mode",
(
"None",
"trunc",
"floor",
),
)
@pytest.mark.parametrize("are_required_outputs", [[True, True], [True, False], [False, True]])
def test_bw_div_opt(input_shapes, round_mode, are_required_outputs, device):
in_data, input_tensor = data_gen_with_range(input_shapes, -100, 100, device, True)
other_data, other_tensor = data_gen_with_range(input_shapes, -100, 100, device, True)
grad_data, grad_tensor = data_gen_with_range(input_shapes, -100, 100, device)

input_grad = None
other_grad = None
tt_output_tensor_on_device = None

if are_required_outputs[0]:
_, input_grad = data_gen_with_range(input_shapes, -1, 1, device)
if are_required_outputs[1]:
_, other_grad = data_gen_with_range(input_shapes, -1, 1, device)

cq_id = 0

pages_before = ttnn._ttnn.reports.get_buffer_pages()
ttnn.div_bw(
grad_tensor,
input_tensor,
other_tensor,
round_mode=round_mode,
are_required_outputs=are_required_outputs,
input_grad=input_grad,
other_grad=other_grad,
queue_id=cq_id,
)
assert len(pages_before) == len(ttnn._ttnn.reports.get_buffer_pages())
tt_output_tensor_on_device = [input_grad, other_grad]

if round_mode == "None":
round_mode = None

golden_function = ttnn.get_golden_function(ttnn.div_bw)
golden_tensor = golden_function(grad_data, in_data, other_data, round_mode)

status = True
for i in range(len(are_required_outputs)):
if are_required_outputs[i]:
status = status & compare_pcc([tt_output_tensor_on_device[i]], [golden_tensor[i]])
assert status
Original file line number Diff line number Diff line change
Expand Up @@ -203,14 +203,17 @@ struct ExecuteBackwardAdd {
const Tensor &grad_tensor_arg,
const Tensor &input_tensor_arg,
float scalar,
const std::optional<MemoryConfig> &memory_config = std::nullopt);
const std::optional<MemoryConfig> &memory_config = std::nullopt,
std::optional<Tensor> input_grad = std::nullopt);

static std::vector<std::optional<Tensor>> invoke(
const Tensor &grad_tensor_arg,
const Tensor &input_tensor_a_arg,
const Tensor &input_tensor_b_arg,
const std::vector<bool> &are_required_outputs = std::vector<bool>{true, true},
const std::optional<MemoryConfig> &memory_config = std::nullopt);
const std::optional<MemoryConfig> &memory_config = std::nullopt,
std::optional<Tensor> input_grad = std::nullopt,
std::optional<Tensor> other_grad = std::nullopt);

static std::vector<ComplexTensor> invoke(
const ComplexTensor &grad_tensor_arg,
Expand Down Expand Up @@ -244,15 +247,17 @@ struct ExecuteBackwardSub {
const Tensor &grad_tensor_arg,
const Tensor &input_tensor_arg,
float scalar,
const std::optional<MemoryConfig> &memory_config = std::nullopt);

const std::optional<MemoryConfig> &memory_config = std::nullopt,
std::optional<Tensor> input_grad = std::nullopt);

static std::vector<std::optional<Tensor>> invoke(
const Tensor &grad_tensor_arg,
const Tensor &input_tensor_a_arg,
const Tensor &input_tensor_b_arg,
const std::vector<bool> &are_required_outputs = std::vector<bool>{true, true},
const std::optional<MemoryConfig> &memory_config = std::nullopt);
const std::optional<MemoryConfig> &memory_config = std::nullopt,
std::optional<Tensor> input_grad = std::nullopt,
std::optional<Tensor> other_grad = std::nullopt);

static std::vector<ComplexTensor> invoke(
const ComplexTensor &grad_tensor_arg,
Expand All @@ -264,19 +269,43 @@ struct ExecuteBackwardSub {
};

struct ExecuteBackwardDiv {
static std::vector<Tensor> invoke(
static std::vector<std::optional<Tensor>> invoke(
uint8_t queue_id,
const Tensor &grad_tensor_arg,
const Tensor &input_tensor_arg,
float scalar,
string round_mode = "None",
const std::optional<MemoryConfig> &memory_config = std::nullopt);
const std::optional<MemoryConfig> &memory_config = std::nullopt,
std::optional<Tensor> input_grad = std::nullopt);

static std::vector<Tensor> invoke(
static std::vector<std::optional<Tensor>> invoke(
uint8_t queue_id,
const Tensor &grad_tensor_arg,
const Tensor &input_tensor_arg,
const Tensor &other_tensor_arg,
string round_mode = "None",
const std::optional<MemoryConfig> &memory_config = std::nullopt);
const std::vector<bool> &are_required_outputs = std::vector<bool>{true, true},
const std::optional<MemoryConfig> &memory_config = std::nullopt,
std::optional<Tensor> input_grad = std::nullopt,
std::optional<Tensor> other_grad = std::nullopt);

static std::vector<std::optional<Tensor>> invoke(
const Tensor &grad_tensor_arg,
const Tensor &input_tensor_arg,
float scalar,
string round_mode = "None",
const std::optional<MemoryConfig> &memory_config = std::nullopt,
std::optional<Tensor> input_grad = std::nullopt);

static std::vector<std::optional<Tensor>> invoke(
const Tensor &grad_tensor_arg,
const Tensor &input_tensor_arg,
const Tensor &other_tensor_arg,
string round_mode = "None",
const std::vector<bool> &are_required_outputs = std::vector<bool>{true, true},
const std::optional<MemoryConfig> &memory_config = std::nullopt,
std::optional<Tensor> input_grad = std::nullopt,
std::optional<Tensor> other_grad = std::nullopt);

static std::vector<ComplexTensor> invoke(
const ComplexTensor &grad_tensor_arg,
Expand Down Expand Up @@ -357,7 +386,9 @@ struct ExecuteBackwardSubAlpha {
const Tensor &input_tensor_b_arg,
float alpha,
const std::vector<bool> &are_required_outputs = std::vector<bool>{true, true},
const std::optional<MemoryConfig> &memory_config = std::nullopt);
const std::optional<MemoryConfig> &memory_config = std::nullopt,
std::optional<Tensor> input_grad = std::nullopt,
std::optional<Tensor> other_grad = std::nullopt);

};

Expand All @@ -377,7 +408,9 @@ struct ExecuteBackwardRsub {
const Tensor &input_tensor_a_arg,
const Tensor &input_tensor_b_arg,
const std::vector<bool> &are_required_outputs = std::vector<bool>{true, true},
const std::optional<MemoryConfig> &memory_config = std::nullopt);
const std::optional<MemoryConfig> &memory_config = std::nullopt,
std::optional<Tensor> input_grad = std::nullopt,
std::optional<Tensor> other_grad = std::nullopt);

};

Expand Down Expand Up @@ -452,7 +485,9 @@ constexpr auto sub_bw = ttnn::register_operation<
"ttnn::sub_bw",
operations::binary_backward::ExecuteBackwardSub>();

constexpr auto div_bw = ttnn::register_operation<"ttnn::div_bw", operations::binary_backward::ExecuteBackwardDiv>();
constexpr auto div_bw = ttnn::register_operation<
"ttnn::div_bw",
operations::binary_backward::ExecuteBackwardDiv>();

constexpr auto remainder_bw = ttnn::register_operation<
"ttnn::remainder_bw",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -790,8 +790,10 @@ void bind_binary_bw_div(py::module& module, const binary_backward_operation_t& o
* :attr:`input_tensor_b` (ComplexTensor or ttnn.Tensor or Number): the tensor or number to add to :attr:`input_tensor_a`.
Keyword args:
* :attr:`are_required_outputs` (Optional[bool]): required output gradients
* :attr:`memory_config` (Optional[ttnn.MemoryConfig]): memory config for the output tensor
* :attr:`dtype` (Optional[ttnn.DataType]): data type for the output tensor
* :attr:`output_tensor` (Optional[ttnn.Tensor]): preallocated output tensor
* :attr:`queue_id` (Optional[uint8]): command queue id
Supported dtypes, layouts, and ranks:
Expand Down Expand Up @@ -822,15 +824,19 @@ void bind_binary_bw_div(py::module& module, const binary_backward_operation_t& o
const Tensor& input_tensor_a,
const float scalar,
std::string round_mode,
const std::optional<MemoryConfig>& memory_config){
return self(grad_tensor, input_tensor_a, scalar, round_mode, memory_config);
const std::optional<ttnn::MemoryConfig>& memory_config,
const std::optional<ttnn::Tensor>& input_grad,
const uint8_t& queue_id) -> std::vector<std::optional<ttnn::Tensor>> {
return self(queue_id, grad_tensor, input_tensor_a, scalar, round_mode, memory_config, input_grad);
},
py::arg("grad_tensor"),
py::arg("input_tensor_a"),
py::arg("scalar"),
py::kw_only(),
py::arg("round_mode") = "None",
py::arg("memory_config") = std::nullopt},
py::arg("memory_config") = std::nullopt,
py::arg("input_grad") = std::nullopt,
py::arg("queue_id") = ttnn::DefaultQueueId},

// tensor and tensor
ttnn::pybind_overload_t{
Expand All @@ -839,15 +845,23 @@ void bind_binary_bw_div(py::module& module, const binary_backward_operation_t& o
const ttnn::Tensor& input_tensor,
const ttnn::Tensor& other_tensor,
std::string round_mode,
const std::optional<ttnn::MemoryConfig>& memory_config) {
return self(grad_tensor, input_tensor, other_tensor, round_mode, memory_config);
const std::vector<bool>& are_required_outputs,
const std::optional<ttnn::MemoryConfig>& memory_config,
const std::optional<ttnn::Tensor>& input_grad,
const std::optional<ttnn::Tensor>& other_grad,
const uint8_t& queue_id) -> std::vector<std::optional<ttnn::Tensor>> {
return self(queue_id, grad_tensor, input_tensor, other_tensor, round_mode, are_required_outputs, memory_config, input_grad, other_grad);
},
py::arg("grad_tensor"),
py::arg("input_tensor"),
py::arg("other_tensor"),
py::kw_only(),
py::arg("round_mode") = "None",
py::arg("memory_config") = std::nullopt},
py::arg("are_required_outputs") = std::vector<bool>{true, true},
py::arg("memory_config") = std::nullopt,
py::arg("input_grad") = std::nullopt,
py::arg("other_grad") = std::nullopt,
py::arg("queue_id") = ttnn::DefaultQueueId},

// complex tensor
ttnn::pybind_overload_t{
Expand Down
Loading

0 comments on commit 08a123c

Please sign in to comment.