Skip to content

Commit

Permalink
Remove local tensor copies
Browse files Browse the repository at this point in the history
pad, range, relu, reshape nodes
  • Loading branch information
kraiskil committed Jul 29, 2023
1 parent 14f2295 commit e5d7abc
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 85 deletions.
9 changes: 6 additions & 3 deletions src/nodes/pad.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,15 @@ void Pad::parseAttributes( onnx::NodeProto &node )
/* Assign input tensors, resolve output tensor shapes, allocate output tensors */
void Pad::resolve(void)
{
data = inputs[0];
register_input(data, "data");
const Tensor *data = inputs[0];
register_input(inputs[0], "data");

const Tensor *pads_tensor = nullptr;
if (inputs.size() > 1) {
pads_tensor = inputs[1];
register_input(pads_tensor, "pads");
}
const Tensor *constant_value= nullptr;
if (inputs.size() > 2) {
// This is not a tensor but a scalar. Not sure how to handle - first scalar in onnx2c :)
constant_value = inputs[2];
Expand Down Expand Up @@ -85,7 +87,6 @@ void Pad::resolve(void)
t->data_type = onnx::TensorProto_DataType_FLOAT;
/* Store the created tensor both as reference in this node, and into
* the return value vector! */
output = t;
register_output(t, "output");

/* TODO: optional outputs? */
Expand All @@ -108,6 +109,8 @@ void Pad::print(std::ostream &dst) const
INDT_1 << " * mode: " << mode << std::endl;
INDT_1 << " */" << std::endl;

const Tensor *data = inputs[0];
const Tensor *output = outputs[0];

std::string iidxs = "";
std::string oidxs = "";
Expand Down
9 changes: 0 additions & 9 deletions src/nodes/pad.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ class Pad : public Node {
public:
Pad() {
op_name = "Pad";
data=output=pads_tensor=constant_value=0;
value_attribute=0;
mode = "constant";
}
Expand All @@ -20,14 +19,6 @@ class Pad : public Node {
std::vector<int64_t> pads_attribute;
float value_attribute;

// input and output tensors
const Tensor *data;
const Tensor *output;
// inputs added in version 11
const Tensor *pads_tensor;
const Tensor *constant_value;


// The actual paddings used. Collected from pads_attribute or pads_tensor
std::vector<int64_t> paddings_start;
std::vector<int64_t> paddings_end;
Expand Down
45 changes: 14 additions & 31 deletions src/nodes/range.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,8 @@ class Range : public Node {
public:
Range() {
op_name = "Range";
start=limit=delta=output=NULL;
}

// input and output
const Tensor *start;
const Tensor *limit;
const Tensor *delta;
const Tensor *output;

uint32_t output_size;

template <typename data_type>
Expand All @@ -34,6 +27,9 @@ class Range : public Node {
void resolve_limits()
{
data_type v_start, v_limit, v_delta;
const Tensor *start = inputs[0];
const Tensor *limit = inputs[1];
const Tensor *delta = inputs[2];

v_start = resolve_input_var<data_type>(start);
v_limit = resolve_input_var<data_type>(limit);
Expand All @@ -49,9 +45,12 @@ class Range : public Node {

if (inputs.size() != 3)
ERROR("Range node does not have 3 inputs");
start = inputs[0];
limit = inputs[1];
delta = inputs[2];
const Tensor *start = inputs[0];
const Tensor *limit = inputs[1];
const Tensor *delta = inputs[2];
register_input(start, "start_arg");
register_input(limit, "limit_arg");
register_input(delta, "delta_arg");

if( start->isConst == false )
ERROR("Unimplemented: non-constant input (start) to Range node");
Expand All @@ -78,40 +77,24 @@ class Range : public Node {
t->data_type = start->data_type;
/* Store the created tensor both as reference in this node, and into
* the return value vector! */
output = t;
outputs.push_back(t);

/* TODO: optional outputs? */
}


/* Print the function parameters - use the order they are introduced in the
* ONNX documentation */
virtual void print_parameters(std::ostream &dst, bool decorate ) const override
{
start->print_tensor(dst, !decorate);
dst << ", ";
limit->print_tensor(dst, !decorate);
dst << ", ";
delta->print_tensor(dst, !decorate);
dst << ", ";
output->print_tensor(dst, !decorate);
register_output(t, "output");
}


/* Body of the node implementing function */
virtual void print(std::ostream &dst) const override
{
const Tensor *start = inputs[0];
std::string dt = start->data_type_str();

INDT_1 << "/* Range" << std::endl;
INDT_1 << " */" << std::endl;


INDT_1 << dt <<" start = " << start->cname() << "[0];" << std::endl;
INDT_1 << dt <<" delta = " << delta->cname() << "[0];" << std::endl;
INDT_1 << dt <<" start = start_arg[0];" << std::endl;
INDT_1 << dt <<" delta = delta_arg[0];" << std::endl;
INDT_1 << "for(int i=0; i< "<< output_size << "; ++i) {" << std::endl;
INDT_2 << output->cname() << "[i] = start + (i * delta);" << std::endl;
INDT_2 << "output[i] = start + (i * delta);" << std::endl;
INDT_1 << "}" << std::endl;
}
};
Expand Down
26 changes: 7 additions & 19 deletions src/nodes/relu.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,38 +6,27 @@ class Relu : public Node {
public:
Relu() {
op_name = "Relu";
X=Y=NULL;
}
// inputs
const Tensor *X;
// outputs
const Tensor *Y;

virtual void print_parameters(std::ostream &dst, bool decorate ) const override
{
X->print_tensor_as_const(dst, !decorate);
dst << ", ";
Y->print_tensor(dst, !decorate);
}


virtual void print(std::ostream &dst) const override
{
const Tensor *X=inputs[0];
std::string type = X->data_type_str();

dst << "\t/*Relu*/" << std::endl;

dst << "\t" << type << " *X = (" << type << "*)" << X->cname() << ";" << std::endl;
dst << "\t" << type << " *Y = (" << type << "*)" << Y->cname() << ";" << std::endl;
dst << "\t" << type << " *X_ptr = (" << type << "*)X;" << std::endl;
dst << "\t" << type << " *Y_ptr = (" << type << "*)Y;" << std::endl;

dst << "\t" << "for( uint32_t i=0; i<" << X->data_num_elem() << "; i++ )" << std::endl;
dst << "\t\tY[i] = X[i] > 0 ? X[i] : 0;" << std::endl;
dst << "\t\tY_ptr[i] = X_ptr[i] > 0 ? X_ptr[i] : 0;" << std::endl;
dst << std::endl;
}

virtual void resolve(void) override
{
X = inputs[0];
const Tensor *X = inputs[0];
register_input(X, "X");
if(( typeConstraint_allFloatingPoints(X)
||typeConstraint_signed_integers(X) ) == false )
ERROR("Incorrect input for Relu");
Expand All @@ -49,8 +38,7 @@ class Relu : public Node {
for( auto d : X->data_dim )
rv->data_dim.push_back(d);
rv->data_type = X->data_type;
Y=rv;
outputs.push_back(rv);
register_output(rv, "Y");
}
};
}
35 changes: 12 additions & 23 deletions src/nodes/reshape.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,8 @@ class Reshape : public Node {
public:
Reshape() {
op_name = "Reshape";
data=shape=reshaped=NULL;
allowzero=0;
}
// inputs
const Tensor *data;
const Tensor *shape;
// outputs
const Tensor *reshaped;

int32_t allowzero;

Expand All @@ -28,36 +22,32 @@ class Reshape : public Node {
}


virtual void print_parameters(std::ostream &dst, bool decorate ) const override
{
data->print_tensor_as_const(dst, !decorate);
dst << ", ";
shape->print_tensor_as_const(dst, !decorate);
dst << ", ";
reshaped->print_tensor(dst, !decorate);
}


virtual void print(std::ostream &dst) const override
{
const Tensor *data = inputs[0];
std::string type = data->data_type_str();

/* TODO: is there ANY case where a reshape needs to re-order the internal data layout ? */
/* TODO: and if not - check that at least gcc can get rid of this copy! (So onnx2c doesn't need to) */
/* (check if implementing this with a single call to memcpy() would be sufficient hint for gcc to
optimize it away?) */
/* TODO: or - can we mark output an onnx2c-alias of input? */
/* Sounds similar to the aliasing of "Cast" node? */
dst << "\t/*Reshape*/" << std::endl;
dst << "\t" << type << " *data = (" << type << "*)" << data->cname() << ";" << std::endl;
dst << "\t" << type << " *reshaped = (" << type << "*)" << reshaped->cname() << ";" << std::endl;
dst << "\t" << type << " *data_ptr = (" << type << "*)data;" << std::endl;
dst << "\t" << type << " *reshaped_ptr = (" << type << "*)reshaped;" << std::endl;

dst << "\t" << "for( uint32_t i=0; i<" << data->data_num_elem() << "; i++ )" << std::endl;
dst << "\t\treshaped[i] = data[i];" << std::endl;
dst << "\t\treshaped_ptr[i] = data_ptr[i];" << std::endl;
dst << std::endl;
}

virtual void resolve(void) override
{
data = inputs[0];
shape = inputs[1];
const Tensor *data= inputs[0];
register_input(data, "data");
const Tensor *shape = inputs[1];
register_input(shape, "shape");

/* Reshape should allow only int64_t here,
* but that is a pointless restriction at this stage and does not play well
Expand Down Expand Up @@ -118,8 +108,7 @@ class Reshape : public Node {
rv->data_dim = out_data_dim;

rv->data_type = data->data_type;
reshaped = rv;
outputs.push_back(rv);
register_output(rv, "reshaped");
}
};
}

0 comments on commit e5d7abc

Please sign in to comment.