diff --git a/onnxruntime/core/providers/cpu/controlflow/scan.cc b/onnxruntime/core/providers/cpu/controlflow/scan.cc index 3366c68cb7981..b4ea993954c06 100644 --- a/onnxruntime/core/providers/cpu/controlflow/scan.cc +++ b/onnxruntime/core/providers/cpu/controlflow/scan.cc @@ -1,6 +1,13 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +// there's no way to use a raw pointer as the copy destination with std::copy_n +// (which gsl::copy uses with span::data() which returns a raw pointer) with the 14.11 toolset +// without generating a 4996 warning. going through an iterator is way too much overhead so turn off the warning. +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4996) +#endif #include "core/providers/cpu/controlflow/scan.h" #include "core/framework/framework_common.h" @@ -12,6 +19,10 @@ #include "core/providers/cpu/tensor/utils.h" +#ifdef _MSC_VER +#pragma warning(pop) +#endif + using namespace ONNX_NAMESPACE; using namespace onnxruntime::common; @@ -118,6 +129,64 @@ class LoopStateVariable { MLValue b_; }; +/* +Class that co-ordinates writing to slices of the overall Scan output buffer returned by OpKernelContext.Output(i). +If the subgraph has a symbolic dimension in an output it will use a temporary MLValue for the first execution +in order to discover the output shape. Once the shape is known, it will switch to using the overall output buffer +to avoid copies. +*/ +class OutputIterator { + public: + static Status Create(OpKernelContextInternal& context, + int output_index, + bool is_loop_state_var, + TensorShape final_shape, + std::unique_ptr& iterator) { + iterator.reset(new OutputIterator(context, output_index, is_loop_state_var, final_shape)); + return iterator->Initialize(); + } + + MLValue& operator*(); + OutputIterator& operator++(); + + // set the output for the current iteration to zeros. used for short sequence lengths + void ZeroOutCurrent() { + auto* tensor = (**this).GetMutable(); + memset(tensor->MutableDataRaw(), 0, tensor->Size()); + } + + private: + OutputIterator(OpKernelContextInternal& context, + int output_index, + bool is_loop_state_var, + TensorShape final_shape); + + Status Initialize(); + Status AllocateFinalBuffer(); + Status MakeConcrete(); + + OpKernelContextInternal& context_; + const int output_index_; + TensorShapeProto per_iteration_shape_; + TensorShape final_shape_; + bool is_loop_state_var_; + int64_t num_iterations_; + int64_t cur_iteration_; + + // is the final shape concrete, or does it have symbolic dimensions + bool is_concrete_shape_; + + // one or more slicers for writing to the output + std::vector::Iterator> slicer_iterators_; + std::vector::Iterator>::iterator cur_slicer_iterator_; + + // if shape is not concrete we need the first output to know the missing dimension before + // we can allocate final_output_mlvalue_ and use the slicers. + MLValue first_output_; + + MLValue* final_output_mlvalue_; +}; + class ScanImpl { public: ScanImpl(OpKernelContextInternal& context, @@ -135,10 +204,10 @@ class ScanImpl { private: // validate inputs and setup batch size and max sequence length. Status ValidateInput(); - Status ValidateSubgraphInput(int start_input, int end_input, bool has_seq_len_dim, + Status ValidateSubgraphInput(int start_input, int end_input, bool is_loop_state_var, const std::vector& graph_inputs); - Status AllocateOutput(int index, bool has_sequence_len); + Status AllocateOutput(int index, bool is_loop_state_var); Status AllocateOutputTensors(); Status CreateLoopStateVariables(std::vector>& loop_state_variables); @@ -147,7 +216,6 @@ class ScanImpl { Status IterateSequence(std::vector& loop_state_variables, ConstTensorSlicerIterators& scan_input_stream_iterators, - MutableTensorSlicerIterators& scan_output_stream_iterators, int64_t seq_length); OpKernelContextInternal& context_; @@ -166,6 +234,7 @@ class ScanImpl { std::vector sequence_lens_; std::vector subgraph_output_names_; + std::vector> output_iterators_; std::unordered_map implicit_inputs_; }; @@ -249,6 +318,150 @@ void LoopStateVariable::Next() { ++iteration_num_; } +// fill in a symbolic dimension in the overall output using the output shape from an iteration of the subgraph +static Status MakeShapeConcrete(const TensorShape& per_iteration_shape, TensorShape& final_shape) { + auto num_dims_per_iteration = per_iteration_shape.NumDimensions(); + auto final_shape_offset = final_shape.NumDimensions() - num_dims_per_iteration; + for (size_t i = 0; i < num_dims_per_iteration; ++i) { + auto existing_value = final_shape[i + final_shape_offset]; + if (existing_value == -1) { + final_shape[i + final_shape_offset] = per_iteration_shape[i]; + } else { + if (existing_value != per_iteration_shape[i]) { + return ONNXRUNTIME_MAKE_STATUS(ONNXRUNTIME, FAIL, + "Mismatch between expected shape and shape from first output", + final_shape, " is not compatible with ", per_iteration_shape); + } + } + } + + return Status::OK(); +} + +OutputIterator::OutputIterator(OpKernelContextInternal& context, + int output_index, + bool is_loop_state_var, + TensorShape final_shape) + : context_{context}, + output_index_{output_index}, + final_shape_{final_shape}, + is_loop_state_var_{is_loop_state_var}, + cur_iteration_{0} { + is_concrete_shape_ = final_shape_.Size() >= 0; + + // there are one or two dimensions being iterated depending on whether it's a loop state variable or scan input. + auto num_iteration_dims = is_loop_state_var_ ? 1 : 2; + num_iterations_ = final_shape_.Slice(0, num_iteration_dims).Size(); +} + +Status OutputIterator::Initialize() { + Status status = Status::OK(); + + if (is_loop_state_var_ && !is_concrete_shape_) { + // copy the shape from the input initial value which will have a concrete shape. + auto* input = context_.Input(output_index_ + 1); // +1 to skip the sequence_len input + status = MakeShapeConcrete(input->Shape(), final_shape_); + ONNXRUNTIME_RETURN_IF_ERROR(status); + + is_concrete_shape_ = true; + } + + if (is_concrete_shape_) { + status = AllocateFinalBuffer(); + ONNXRUNTIME_RETURN_IF_ERROR(status); + } else { + // use first_output_ + } + + return Status::OK(); +} + +Status OutputIterator::AllocateFinalBuffer() { + // make sure a single buffer for the full output is created upfront. + // we slice this into per-iteration pieces in Execute using MLValueTensorSlicer. + auto* tensor = context_.Output(output_index_, final_shape_); + + if (!tensor) + return ONNXRUNTIME_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to create output tensor for output #", output_index_); + + // get the output tensor we just created as an MLValue + final_output_mlvalue_ = context_.GetOutputMLValue(output_index_); + + if (is_loop_state_var_) { + // only one entry is required as we slice on a single dimension + slicer_iterators_.push_back(MLValueTensorSlicer::Create(*final_output_mlvalue_).begin()); + } else { + auto batch_size = final_shape_[0]; + for (int i = 0; i < batch_size; ++i) { + // the slicer handles the sequence dimension (dim 1) so create an entry for each batch + slicer_iterators_.push_back(MLValueTensorSlicer::Create(*final_output_mlvalue_, 1, i).begin()); + } + } + + cur_slicer_iterator_ = slicer_iterators_.begin(); + + return Status::OK(); +} + +Status OutputIterator::MakeConcrete() { + ONNXRUNTIME_ENFORCE(first_output_.IsAllocated(), "First usage of OutputIterator did not result in any output."); + Status status = Status::OK(); + + auto& tensor = first_output_.Get(); + auto& tensor_shape = tensor.Shape(); + + // update the final shape + status = MakeShapeConcrete(tensor_shape, final_shape_); + ONNXRUNTIME_RETURN_IF_ERROR(status); + + is_concrete_shape_ = true; + status = AllocateFinalBuffer(); + ONNXRUNTIME_RETURN_IF_ERROR(status); + + // copy first output to final buffer + auto input_span = gsl::make_span(static_cast(tensor.DataRaw()), tensor.Size()); + + auto output = (**this).GetMutable(); + auto output_span = gsl::make_span(static_cast(output->MutableDataRaw()), output->Size()); + + gsl::copy(input_span, output_span); + + // release the MLValue we used for the first output + first_output_ = {}; + + return status; +} + +MLValue& OutputIterator::operator*() { + ONNXRUNTIME_ENFORCE(cur_iteration_ < num_iterations_); + + if (is_concrete_shape_) + return **cur_slicer_iterator_; + else + return first_output_; +} + +OutputIterator& OutputIterator::operator++() { + if (cur_iteration_ < num_iterations_) { + if (!is_concrete_shape_) { + // we should have an output now, so convert to using the overall output buffer and slicers + auto status = MakeConcrete(); + ONNXRUNTIME_ENFORCE(status.IsOK(), status.ErrorMessage()); + } + + ++cur_iteration_; + + // if not a loop state var, see if we just finished the current sequence (dim 1) + if (!is_loop_state_var_ && cur_iteration_ % final_shape_[1] == 0) { + ++cur_slicer_iterator_; + } else { + ++(*cur_slicer_iterator_); + } + } + + return *this; +} + ScanImpl::ScanImpl(OpKernelContextInternal& context, const SessionState& session_state, int64_t num_scan_inputs, @@ -258,7 +471,7 @@ ScanImpl::ScanImpl(OpKernelContextInternal& context, subgraph_{*session_state.GetGraphViewer()}, directions_{directions}, implicit_inputs_{context_.GetImplicitInputs()} { - //optional first input so may be nullptr + // optional first input so may be nullptr sequence_lens_tensor_ = context.Input(0); num_variadic_inputs_ = context_.NumVariadicInputs(1); @@ -271,12 +484,12 @@ Status ScanImpl::Initialize() { auto status = ValidateInput(); ONNXRUNTIME_RETURN_IF_ERROR(status); - auto& graph_outputs = subgraph_.GetOutputs(); - subgraph_output_names_.reserve(graph_outputs.size()); + auto& subgraph_outputs = subgraph_.GetOutputs(); + subgraph_output_names_.reserve(subgraph_outputs.size()); // save list of subgraph output names in their provided order to use when fetching the results // from each subgraph execution. the Scan outputs will match this order. - for (auto& output : graph_outputs) { + for (auto& output : subgraph_outputs) { subgraph_output_names_.push_back(output->Name()); } @@ -301,10 +514,11 @@ static const MLValue& GetSubgraphInputMLValue(const OpKernelContextInternal& con } // Validate that the subgraph input has valid shapes -Status ScanImpl::ValidateSubgraphInput(int start_input, int end_input, bool has_seq_len_dim, +Status ScanImpl::ValidateSubgraphInput(int start_input, int end_input, bool is_loop_state_var, const std::vector& graph_inputs) { // first dim is batch size. optional sequence dim. dim/s for the data. // if there is no dim for the data treat it as a scalar. + bool has_seq_len_dim = !is_loop_state_var; auto min_dims_required = has_seq_len_dim ? 2 : 1; for (int i = start_input; i < end_input; ++i) { @@ -356,11 +570,11 @@ Status ScanImpl::ValidateInput() { } // process any loop state variables, which will set the batch size - auto status = ValidateSubgraphInput(0, num_loop_state_variables_, false, graph_inputs); + auto status = ValidateSubgraphInput(0, num_loop_state_variables_, true, graph_inputs); ONNXRUNTIME_RETURN_IF_ERROR(status); // process the scan inputs. sets/validates batch size and sequence length - status = ValidateSubgraphInput(num_loop_state_variables_, num_variadic_inputs_, true, graph_inputs); + status = ValidateSubgraphInput(num_loop_state_variables_, num_variadic_inputs_, false, graph_inputs); ONNXRUNTIME_RETURN_IF_ERROR(status); if (sequence_lens_tensor_ != nullptr) { @@ -387,11 +601,12 @@ Status ScanImpl::ValidateInput() { return Status::OK(); } -Status ScanImpl::AllocateOutput(int index, bool has_sequence_len_dimension) { +Status ScanImpl::AllocateOutput(int index, bool is_loop_state_var) { // use the shape from the subgraph output. we require this to be specified in the model or inferable. auto& graph_outputs = subgraph_.GetOutputs(); auto* graph_output = graph_outputs.at(index); auto* graph_output_shape = graph_output->Shape(); + if (!graph_output_shape) { return ONNXRUNTIME_MAKE_STATUS(ONNXRUNTIME, FAIL, "Subgraph must have the shape set for all outputs but ", graph_output->Name(), " did not."); @@ -405,24 +620,16 @@ Status ScanImpl::AllocateOutput(int index, bool has_sequence_len_dimension) { scan_output_dims.push_back(batch_size_); - if (has_sequence_len_dimension) { + if (!is_loop_state_var) { scan_output_dims.push_back(max_sequence_len_); } scan_output_dims.insert(scan_output_dims.cend(), graph_output_dims.cbegin(), graph_output_dims.cend()); - // make sure a single buffer for the full output is created upfront. - // we slice this into per-iteration pieces in Execute using MLValueTensorSlicer. - auto* tensor = context_.Output(index, TensorShape(scan_output_dims)); + std::unique_ptr output_iter; + OutputIterator::Create(context_, index, is_loop_state_var, TensorShape(scan_output_dims), output_iter); - if (!tensor) - return ONNXRUNTIME_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to create output tensor for ", graph_output->Name()); - - // zero out the output so that any short sequences have deterministic values in unused slots. - // strictly speaking this isn't required, and alternatively we could fill with zeros when we - // encounter a short sequence and are creating output, but one memset is easy, involves - // less code complexity, and should be relatively cheap. - memset(tensor->MutableDataRaw(), 0, tensor->Size()); + output_iterators_.push_back(std::move(output_iter)); return Status::OK(); } @@ -436,17 +643,13 @@ Status ScanImpl::AllocateOutputTensors() { " outputs but Scan expects ", num_variadic_outputs_); } - // TODO: Need to handle shape/type inference for subgraphs. - // For now copy shape from subgraph output and expand based on batch size and sequence length - for (int i = 0; i < num_loop_state_variables_; ++i) { - const bool has_sequence_len_dimension = false; // loop state variables don't have a sequence_len dimension; - status = AllocateOutput(i, has_sequence_len_dimension); + status = AllocateOutput(i, true); ONNXRUNTIME_RETURN_IF_ERROR(status); } for (int i = num_loop_state_variables_, end = num_variadic_outputs_; i < end; ++i) { - status = AllocateOutput(i, true); + status = AllocateOutput(i, false); ONNXRUNTIME_RETURN_IF_ERROR(status); } @@ -462,9 +665,7 @@ Status ScanImpl::CreateLoopStateVariables(std::vector::Iterator> loop_state_input_iterators; - std::vector::Iterator> loop_state_output_iterators; loop_state_input_iterators.reserve(num_loop_state_variables_); - loop_state_output_iterators.reserve(num_loop_state_variables_); // create the input and output slice iterator for each loop state variable. for (int i = 0; i < num_loop_state_variables_; ++i) { @@ -474,7 +675,6 @@ Status ScanImpl::CreateLoopStateVariables(std::vector::Create(mlvalue).begin()); - loop_state_output_iterators.push_back(MLValueTensorSlicer::Create(*p_mlvalue).begin()); } batch_loop_state_variables.clear(); @@ -491,7 +691,7 @@ Status ScanImpl::CreateLoopStateVariables(std::vector::Iterator> scan_output_stream_iterators; - scan_output_stream_iterators.reserve(num_variadic_outputs_); - - for (int i = num_loop_state_variables_, end = num_variadic_outputs_; i < end; ++i) { - MLValue* p_mlvalue = context_.GetOutputMLValue(i); - ONNXRUNTIME_ENFORCE(p_mlvalue, "Output MLValue has not been created for output ", i); - - scan_output_stream_iterators.push_back(MLValueTensorSlicer::Create(*p_mlvalue, 1, b).begin()); - } - // Call the subgraph for each item in the sequence status = IterateSequence(batch_loop_state_variables[b], scan_input_stream_iterators, - scan_output_stream_iterators, sequence_lens_[b]); ONNXRUNTIME_RETURN_IF_ERROR(status); @@ -559,7 +747,6 @@ Status ScanImpl::Execute() { Status ScanImpl::IterateSequence(std::vector& loop_state_variables, ConstTensorSlicerIterators& scan_input_stream_iterators, - MutableTensorSlicerIterators& scan_output_stream_iterators, int64_t seq_length) { Status status = Status::OK(); auto& graph_inputs = subgraph_.GetInputs(); @@ -576,9 +763,8 @@ Status ScanImpl::IterateSequence(std::vector& loop_state_vari feeds[entry.first] = *entry.second; } - // as we fill all the outputs with 0 initially, just iterate seq_length not max_seq_length_ - // as we don't need to pad the output for a short sequence here. - for (int64_t seq_no = 0; seq_no < seq_length; ++seq_no) { + int64_t seq_no = 0; + for (; seq_no < seq_length; ++seq_no) { for (int input = 0; input < num_variadic_inputs_; ++input) { // the ordering of the Scan inputs should match the ordering of the subgraph inputs auto name = graph_inputs[input]->Name(); @@ -597,15 +783,24 @@ Status ScanImpl::IterateSequence(std::vector& loop_state_vari fetches.clear(); + // one or more outputs have symbolic dimensions and need the first fetch to be copied to the OutputIterator + bool have_symbolic_dim_in_output = false; + for (int output = 0, end = num_variadic_outputs_; output < end; ++output) { if (output < num_loop_state_variables_) { // add loop state variable output fetches.push_back(loop_state_variables[output].Output()); } else { - // add sliced output - auto& iterator = scan_output_stream_iterators[output - num_loop_state_variables_]; - fetches.push_back(*iterator); - ++iterator; + // add MLValue from sliced output + auto& iterator = *output_iterators_[output]; + auto& mlvalue = *iterator; + fetches.push_back(mlvalue); + + // mlvalue.IsAllocated will be false when the OutputIterator is using a temporary MLValue + // and not the overall output buffer. + have_symbolic_dim_in_output = seq_no == 0 && + (mlvalue.IsAllocated() == false || + have_symbolic_dim_in_output); // don't unset } } @@ -621,6 +816,28 @@ Status ScanImpl::IterateSequence(std::vector& loop_state_vari // cycle the LoopStateVariable input/output in preparation for the next iteration std::for_each(loop_state_variables.begin(), loop_state_variables.end(), [](LoopStateVariable& v) { v.Next(); }); + + // and move the output iterators. + for (int output = num_loop_state_variables_; output < num_variadic_outputs_; ++output) { + auto& iterator = *output_iterators_[output]; + + // copy data from the fetch to the iterator so it can setup the overall output when the iterator is incremented. + // if the iterator is already using the overall output buffer IsAllocated() will be true and no copy is required. + if (have_symbolic_dim_in_output && (*iterator).IsAllocated() == false) { + *iterator = fetches[output]; + } + + ++iterator; + } + } + + // zero out any remaining values in the sequence + for (; seq_length < max_sequence_len_; ++seq_length) { + for (int output = num_loop_state_variables_; output < num_variadic_outputs_; ++output) { + auto& iterator = *output_iterators_[output]; + iterator.ZeroOutCurrent(); + ++iterator; + } } return status; diff --git a/onnxruntime/test/providers/cpu/controlflow/if_test.cc b/onnxruntime/test/providers/cpu/controlflow/if_test.cc index b53b451a0d4eb..858d9c550fef3 100644 --- a/onnxruntime/test/providers/cpu/controlflow/if_test.cc +++ b/onnxruntime/test/providers/cpu/controlflow/if_test.cc @@ -16,7 +16,7 @@ namespace test { struct RunOptions { bool include_dim_values_in_main_graph = false; - bool symbolic_dim_values_in_main_graph = false; + int symbolic_dim_value_in_main_graph = -1; bool include_dim_values_in_subgraph = true; }; @@ -181,7 +181,7 @@ void RunTest(bool condition_value, IfOpTester test{options}; test.AddShapeToTensorData(options.include_dim_values_in_main_graph, - options.symbolic_dim_values_in_main_graph); + options.symbolic_dim_value_in_main_graph); // add the main graph inputs and outputs. // we will handle the 'If' inputs in the AddNodes override, and as 'If' is the last node diff --git a/onnxruntime/test/providers/cpu/controlflow/scan_test.cc b/onnxruntime/test/providers/cpu/controlflow/scan_test.cc index b943f90655600..d38682eeb15c6 100644 --- a/onnxruntime/test/providers/cpu/controlflow/scan_test.cc +++ b/onnxruntime/test/providers/cpu/controlflow/scan_test.cc @@ -270,8 +270,6 @@ void RunTest(const std::string test_name, int64_t batch_size, int64_t max_sequen ScanOpTester test; - test.AddShapeToTensorData(options.include_dim_values_in_main_graph); - test.AddAttribute("body", proto); test.AddAttribute("num_scan_inputs", 2); @@ -286,6 +284,8 @@ void RunTest(const std::string test_name, int64_t batch_size, int64_t max_sequen test.AddInput("sequence_lens", sequence_lens_dims, *sequence_lens); } + test.AddShapeToTensorData(options.include_dim_values_in_main_graph); + std::vector loop_state_shape{batch_size}; if (!options.scalar_loop_state_value) { loop_state_shape.push_back(1); @@ -689,5 +689,61 @@ TEST(Scan, MixedTypeInputs) { test.Run(); } +// create a subgraph that will have unknown dimensions in both the loop state variable and output +// after shape inferencing. +TEST(Scan, UnknownDimInSubgraphOutput) { + Model model("ScanBody"); + auto& graph = model.MainGraph(); + + TypeProto float_tensor; + float_tensor.mutable_tensor_type()->set_elem_type(TensorProto_DataType_FLOAT); + float_tensor.mutable_tensor_type()->mutable_shape()->add_dim()->set_dim_param("param"); + TypeProto int_tensor; + int_tensor.mutable_tensor_type()->set_elem_type(TensorProto_DataType_INT64); + int_tensor.mutable_tensor_type()->mutable_shape()->add_dim()->set_dim_param("param"); + + auto& state_in_1 = graph.GetOrCreateNodeArg("state_in_1", &float_tensor); + auto& scan_in_1 = graph.GetOrCreateNodeArg("scan_in_1", &float_tensor); + + auto& state_out_1 = graph.GetOrCreateNodeArg("state_out_1", &float_tensor); + auto& scan_out_1 = graph.GetOrCreateNodeArg("scan_out_1", &float_tensor); + + graph.AddNode("node1", "Identity", "Copy state_in_1 to scan_out_1", {&state_in_1}, {&scan_out_1}); + graph.AddNode("node2", "Identity", "Copy scan_in_1 to state_out_1", {&scan_in_1}, {&state_out_1}); + + graph.SetInputOrder({&state_in_1, &scan_in_1}); + graph.SetOutputOrder({&state_out_1, &scan_out_1}); + + auto status = graph.Resolve(); + EXPECT_EQ(status, Status::OK()); + + auto& scan_body = graph.ToGraphProto(); + + // Construct and run scan test + ScanOpTester test; + + int64_t batch_size = 1, sequence_len = 3, input_size = 1; + std::vector seq_shape{batch_size, sequence_len, input_size}; + std::vector state_shape{batch_size, input_size}; + + test.AddAttribute("body", scan_body); + test.AddAttribute("num_scan_inputs", 1); + test.AddMissingOptionalInput(); + + // we add a symbolic dimension to both the initial state and the scan input so we test + // the path that handles loop state variables (OutputIterator::Initialize) and + // the path that handles subgraph outputs (OutputIterator::MakeConcrete). + // Note that we cross the values over in the subgraph, so the symbolic dimension in + // initial_state_1 affects scan_out_1, and the symbolic dimension in scan_input_1 affects state_out_1. + test.AddShapeToTensorData(true, 1); // add shape and symbolic dim in dim 1 for initial_state_1 + test.AddInput("initial_state_1", state_shape, {0.0}); + test.AddShapeToTensorData(true, 2); // add shape and symbolic dim in dim 2 for scan_input_1 + test.AddInput("scan_input_1", seq_shape, {1.0, 2.0, 3.0}); + + test.AddOutput("final_state_1", state_shape, {3.0}); + test.AddOutput("scan_output_1", seq_shape, {0.0, 1.0, 2.0}); + + test.Run(); +} } // namespace test } // namespace onnxruntime diff --git a/onnxruntime/test/providers/provider_test_utils.cc b/onnxruntime/test/providers/provider_test_utils.cc index 2c118f1372f3f..7815ab4d3748f 100644 --- a/onnxruntime/test/providers/provider_test_utils.cc +++ b/onnxruntime/test/providers/provider_test_utils.cc @@ -407,7 +407,9 @@ void OpTester::Run(ExpectResult expect_result, const auto& expected_shape = expected_data.data_.Get().Shape(); EXPECT_TRUE(inferred_dims.size() == expected_shape.NumDimensions()); for (int d = 0; d < inferred_dims.size(); ++d) { - EXPECT_EQ(expected_shape[d], inferred_dims[d]); + // check equal unless the input involved a symbolic dimension + if (inferred_dims[d] != -1) + EXPECT_EQ(expected_shape[d], inferred_dims[d]) << "Output idx = " << idx << " dim = " << d; } } Check(expected_data, mlvalue.Get(), provider_type); diff --git a/onnxruntime/test/providers/provider_test_utils.h b/onnxruntime/test/providers/provider_test_utils.h index 0ce06aea34bd9..7c6abcebcce68 100644 --- a/onnxruntime/test/providers/provider_test_utils.h +++ b/onnxruntime/test/providers/provider_test_utils.h @@ -91,7 +91,11 @@ struct TTypeProto : ONNX_NAMESPACE::TypeProto { if (shape) { auto mutable_shape = mutable_tensor_type()->mutable_shape(); for (auto i : *shape) { - mutable_shape->add_dim()->set_dim_value(i); + auto* mutable_dim = mutable_shape->add_dim(); + if (i != -1) + mutable_dim->set_dim_value(i); + else + mutable_dim->set_dim_param("symbolic"); } } } @@ -145,10 +149,11 @@ class OpTester { // Set whether the NodeArg created by AddInput/AddOutput should include shape information // for Tensor types. If not added, shape inferencing should resolve. If added, shape inferencing - // should validate. Default is to not add. - OpTester& AddShapeToTensorData(bool add_shape = true, bool add_symbolic_dim = false) { + // should validate. Default is to not add. + // Additionally a symbolic dimension will be added if symbolic_dim matches a dimension in the input. + OpTester& AddShapeToTensorData(bool add_shape = true, int symbolic_dim = -1) { add_shape_to_tensor_data_ = add_shape; - add_symbolic_dim_to_tensor_data_ = add_symbolic_dim; + add_symbolic_dim_to_tensor_data_ = symbolic_dim; return *this; } @@ -268,7 +273,7 @@ class OpTester { ONNXRUNTIME_ENFORCE(shape.Size() == values_count, values_count, " input values doesn't match tensor size of ", shape.Size()); - auto allocator = ::onnxruntime::test::AllocatorManager::Instance().GetAllocator(CPU); + auto allocator = test::AllocatorManager::Instance().GetAllocator(CPU); auto size_in_bytes = values_count * sizeof(T); void* buffer = allocator->Alloc(size_in_bytes); auto p_tensor = std::make_unique(DataTypeImpl::GetType(), @@ -283,8 +288,8 @@ class OpTester { } std::vector dims_for_proto{dims}; - if (add_symbolic_dim_to_tensor_data_ && !dims.empty()) { - dims_for_proto[0] = -1; + if (add_symbolic_dim_to_tensor_data_ >= 0 && dims.size() > add_symbolic_dim_to_tensor_data_) { + dims_for_proto[add_symbolic_dim_to_tensor_data_] = -1; } TTypeProto type_proto(add_shape_to_tensor_data_ ? &dims_for_proto : nullptr); @@ -302,7 +307,7 @@ class OpTester { const char* domain_; int opset_version_; bool add_shape_to_tensor_data_ = true; - bool add_symbolic_dim_to_tensor_data_ = false; + int add_symbolic_dim_to_tensor_data_ = -1; std::vector input_data_; std::vector output_data_; std::vector initializer_index_;