diff --git a/src/graph.cc b/src/graph.cc
index 492c7de..b8802cf 100644
--- a/src/graph.cc
+++ b/src/graph.cc
@@ -1,6 +1,7 @@
 // model resolving part of the toC Graph class
 #include "error.h"
 #include "graph.h"
+#include "nodes/graph_io.h"
 #include "onnx.pb.h"
 #include "options.h"
 
@@ -36,30 +37,47 @@ void Graph::processGraph(
 		LOG(DEBUG) << "  - " << t->name <<std::endl;
 		tensors.push_back(t);
 	}
+	LOG(TRACE) << "  (done adding external tensors)." <<std::endl;
 
 	// 1. add initializers as resolved tensors
 	// in case of quantization, make quantized copies here
 	LOG(DEBUG) << "Adding initialized constant tensors from .onnx file." <<std::endl;
 	for( auto i : onnx_graph.initializer() )
 		addInitializedTensor( i );
+	LOG(TRACE) << "  (done adding initialized tensors)." <<std::endl;
 
 	// 2. add graph inputs as resolved tensors
 	// in case of quantization, convert all IO to INT8
+	addGraphInputMetanode();
 	LOG(DEBUG) << "Marking graph input tensors as IO." <<std::endl;
 	for ( auto i : onnx_graph.input() ) {
 		Tensor *n = getIoTensor( i );
+		n->isConst = true;
 		addTensor( n );
 	}
+	LOG(TRACE) << "  (done marking input tensors)." <<std::endl;
 
 	// 3. Do the nodes
 	LOG(DEBUG) << "Resolving nodes." <<std::endl;
 	resolveGraphNodes(onnx_graph);
 
 	// 4. Add the IO tag to those tensors the user wants back.
+	Node *graph_output_node = addGraphOutputMetanode();
 	LOG(DEBUG) << "Marking graph output tensors as IO." <<std::endl;
 	for ( auto o : onnx_graph.output() ) {
-		Tensor *n = getIoTensor( o );
-		addTensor(n);
+		LOG(TRACE) << "\t- found graph output tensor '" << o.name() << "':" << std::endl;
+		Tensor *t = findTensor(o.name());
+		if( t == nullptr )
+			ERROR("Badly formed ONNX graph: No node produced this graph output tensor");
+		t->isIO = true;
+		// There is the odd case (in tests, mostly), where an constant tensor is passed
+		// as graph output. Only in this case should the graph's output be generated into
+		// the C source
+		t->generate = t->isConst;
+
+		t->consumers.push_back(graph_output_node);
+		graph_output_node->register_input(t, "");
+		LOG(TRACE) << "\t\t " << t->print_trace_dump() << std::endl;
 	}
 }
 
@@ -183,34 +201,44 @@ Tensor* Graph::getIoTensor(onnx::ValueInfoProto &vi)
 }
 
 
-
-bool Graph::getNodeInputTensors(const onnx::NodeProto &node, std::vector<Tensor*> &inputs)
+// Populate the onnx2c_node's
+// input tensors using already created tensors in the graph.
+// All inputs should exist as onnx2c tensors before calling this, or else we return false.
+bool Graph::getNodeInputTensors(const onnx::NodeProto &node, toC::Node *onnx2c_node)
 {
-	// TODO: ugly. Move where?
-	static Tensor unused;
-
-	// if all inputs can be found in the tensors-vector, then yes, inputs are resolved
+	// Step through the ONNX node's input tensors
 	for( auto i : node.input() )
 	{
 		bool input_resolved = false;
-		// Unused inputs don't need to be resolved.
+		// in case the input is not used by the node, ONNX has a dummy input
+		// for the node. This dummy input serves only to put the rest of the
+		// node's inputs in correct order
 		if( i == "" ) {
+			static Tensor input_is_unused_sentinel;
+			LOG(TRACE) << "\t-unnamed input tensor - using shared 'unused' sentinel tensor" << std::endl;
 			input_resolved = true;
-			inputs.push_back(&unused);
+			onnx2c_node->register_input(&input_is_unused_sentinel, "");
 			continue;
 		}
 
+		LOG(TRACE) << "Looking for input tensor '" << i << "':" << std::endl;
 		for( auto t : tensors ) {
 			if ( t->name == i ) {
+				LOG(TRACE) << "\t- found input tensor '" << i << "':" << std::endl;
+				LOG(TRACE) << "\t\t " << t->print_trace_dump() << std::endl;
 				input_resolved = true;
-				inputs.push_back(t);
+				// register node with local name "" - since we don't have node context here
+				// we don't know if it is named 'X', 'input', 'A' or whatever. Node resolver
+				// assigns that name.
+				onnx2c_node->register_input(t, "");
 				break;
 			}
 		}
+		LOG(TRACE) << "    finished looking" << std::endl;
 
 		// Node has an unresolved input tensor
 		if( input_resolved == false ) {
-			LOG(TRACE) << "Input tensor " << i << " not resolved" << std::endl;
+			LOG(DEBUG) << "Input tensor '" << i << "' not resolved" << std::endl;
 			return false;
 		}
 	}
@@ -223,26 +251,23 @@ bool Graph::getNodeInputTensors(const onnx::NodeProto &node, std::vector<Tensor*
  * @return true node is (or was earlier) added to Graph::nodes datastructure.
  *          Return false if Graph::tensors does not yet have all the input tensor for this node.
  */
-bool Graph::tryResolveNode(onnx::NodeProto &node)
+bool Graph::tryResolveNode(onnx::NodeProto &onnx_node)
 {
 	std::vector<Tensor*> inputs;
-	LOG(DEBUG) << "Resolving ONNX node " << node.name() <<std::endl;
+	LOG(DEBUG) << "Resolving ONNX node: '" << onnx_node.name() << "'" <<std::endl;
 
+	// This check is needed in case the caller needs to iterate over the nodes more than once.
 	for( auto o : nodes )
-		if( node.name() == o->onnx_name ) {
-			LOG(TRACE) << "Node " << node.name() << " already resolved"<<std::endl;
+		if( onnx_node.name() == o->onnx_name ) {
+			LOG(TRACE) << "Node '" << onnx_node.name() << "' already resolved"<<std::endl;
 			return true;
 		}
 
-	// Early exit on error cases - cannot resolve this node (now)
-	if( getNodeInputTensors(node, inputs) == false )
-		return false;
-
 	// ONNX has a few nodes that have quantized alternatives.
 	// Switch to those here.
 	// For the rest, rely on optional quantization in the
 	// onnx2c implementation.
-	std::string new_node = node.op_type();
+	std::string new_node = onnx_node.op_type();
 	if( options.quantize ) {
 		replaceWithQuantized(inputs);
 		if( new_node == "Conv" )
@@ -250,31 +275,53 @@ bool Graph::tryResolveNode(onnx::NodeProto &node)
 		if( new_node == "MatMul" )
 			new_node = "MatMulInteger";
 	}
+	LOG(DEBUG) << "Creating new node: " << onnx_node.name() << std::endl;
+	LOG(DEBUG) << "     Operand type: " << new_node << std::endl;
 	Node *n = createNode(new_node);
-	LOG(DEBUG) << "    inputs: " << std::endl;
-	for( auto i : inputs) {
-		LOG(DEBUG) << "         " << i->name << " - "<< i->data_type_str() << " { " << i->str_dimensions() << "}" << std::endl;
-		n->inputs.push_back(i);
-		const_cast<Tensor*>(i)->consumers.push_back(n);
+	if( getNodeInputTensors(onnx_node, n) == false ) {
+		LOG(DEBUG) << "getNodeInputTensors() failed. Not adding node!"<< std::endl;
+		delete n;
+		return false;
 	}
 
-	n->isResolved = false;
-	n->op_name = new_node;
-	n->onnx_name = node.name();
-
-	// onnx allows (or at least some tools create) nodes without names
-	// create unique names for those, e.g. "anonymous_5_relu"
-	if( n->onnx_name == "" ) {
+	// ONNX allows (or at least some tools create) nodes without names.
+	// Here we create unique names for those, e.g. "anonymous_5_relu".
+	if( onnx_node.name() == "" ) {
 		std::string name = "anonymous_";
 		name += n->op_name;
 		name +=  "_" + std::to_string(anonymous_nodes);
 		n->onnx_name = name;
 		anonymous_nodes++;
 	}
-	LOG(DEBUG) << "    Name in C sources " << n->c_name() << std::endl;
+	else
+		n->onnx_name = onnx_node.name();
+
+	LOG(DEBUG) << "    Node name in C sources " << n->c_name() << std::endl;
+	LOG(DEBUG) << "    inputs: " << std::endl;
+
+	// Record this node as the consumer of the the input tensors
+	for(unsigned iidx=0; iidx<(n->get_number_of_inputs()); iidx++) {
+		Tensor *i = n->get_input_tensor(iidx);
+		LOG(DEBUG) << "         " << i->name << " - "<< i->data_type_str() << " { " << i->str_dimensions() << "}" << std::endl;
+		const_cast<Tensor*>(i)->consumers.push_back(n);
+		i->print_trace_dump();
+	}
+	LOG(TRACE) << "     (no more inputs)" << std::endl;
+	n->isResolved = false;
+	n->op_name = new_node;
+
+	LOG(DEBUG) << "  Parsing node attributes" << std::endl;
+	if( onnx_node.attribute_size() != 0 )
+		n->parseAttributes( onnx_node );
+	LOG(TRACE) << "    (done parsing attributes)" << std::endl;
 
-	if( node.attribute_size() != 0 )
-		n->parseAttributes( node );
+	// Now loop over the node inputs, check that they are all added
+	// into the graph's known tensors - seems the ONNX graph does not keep track of
+	// vectors provided as nodes' attributes.
+	LOG(DEBUG) << "  Making sure node attributes are in the graph" << std::endl;
+	for(unsigned nn = 0; nn<n->get_number_of_inputs(); nn++)
+		addTensor(n->get_input_tensor(nn));
+	LOG(TRACE) << "   (end of attribute-input-vectors)" << std::endl;
 
 	// create output nodes for the tensor.
 	// this is a kludge around a chicken & egg problem caused by bad design in
@@ -284,14 +331,14 @@ bool Graph::tryResolveNode(onnx::NodeProto &node)
 	// So create a list of that tells if outputs are used or not *before* resolving
 	// the node.
 	std::vector<bool> output_used;
-	for(int nn = 0; nn<node.output_size(); nn++)
+	for(int nn = 0; nn<onnx_node.output_size(); nn++)
 	{
 		// ONNX spec:
 		// "There are two ways to leave an optional input or output unspecified:
 		// the first, available only for trailing inputs and outputs, is to simply
 		// not provide that input; the second method is to use an empty string in
 		// place of an input or output name."
-		if( node.output(nn) == "" )
+		if( onnx_node.output(nn) == "" )
 			output_used.push_back(false);
 		else
 			output_used.push_back(true);
@@ -299,18 +346,22 @@ bool Graph::tryResolveNode(onnx::NodeProto &node)
 	n->set_output_used(output_used);
 
 	// Configure Node internals, and populate its outputs vector.
+	LOG(TRACE) << "Resolving node" << std::endl;
 	n->resolve();
 
 	// Add the output tensors the resolve() generated to the graph's list of tensors.
+	// Name the generated output tensors according to how they are named in
+	// the ONNX model.
 	// This will now contain all of the node's outputs, also such optional ones
 	// that are not used in the model.
-	for( unsigned o=0; o<n->get_outputs().size(); o++) {
-		Tensor *t = n->get_outputs()[o];
+	LOG(DEBUG) << "Adding resolved node's output to graph's tensors" << std::endl;
+	for( unsigned o=0; o<n->get_number_of_outputs(); o++) {
+		Tensor *t = n->get_output_tensor(o);
 
 		// optional outputs are named "" or just omitted
 		std::string onnx_name;
 		if( n->is_output_N_used(o) )
-			onnx_name = node.output(o);
+			onnx_name = onnx_node.output(o);
 		else
 			onnx_name = "";
 
@@ -327,10 +378,14 @@ bool Graph::tryResolveNode(onnx::NodeProto &node)
 
 		addTensor(t);
 	}
-	LOG(DEBUG) << "    outputs: " << std::endl;
-	for( auto o : n->get_outputs())
-		LOG(DEBUG) << "         " << o->name << " - "<< o->data_type_str() << " { " << o->str_dimensions() << "}" << std::endl;
+	LOG(DEBUG) << "   (done) all outputs now:" << std::endl;
+	for( unsigned o=0; o<n->get_number_of_outputs(); o++) {
+		Tensor *t = n->get_output_tensor(o);
+		LOG(DEBUG) << "         " << t->name << " - "<< t->data_type_str() << " { " << t->str_dimensions() << "}" << std::endl;
+	}
+	LOG(TRACE) << "      (no more outputs)" << std::endl;
 
+	log_trace_all_tensors();
 	n->isResolved = true;
 	nodes.push_back(n);
 	return true;
@@ -395,6 +450,8 @@ int64_t Graph::onnx_ir_version(void)
 #include "nodes/unsqueeze.h"
 #include "nodes/upsample.h"
 
+// Create a new onnx2c Node from an operand name of an ONNX Graph node.
+// NB: the onnx2c-special graph input and graph output nodes are not created here
 Node* Graph::createNode(std::string opName)
 {
 	if( opName == "Abs" )return new Elementwise("Abs");
@@ -518,6 +575,7 @@ void Graph::addTensor(Tensor *t)
 	if( prev == NULL ) {
 		tensors.push_back(t);
 		LOG(DEBUG) << "New tensor: " << t->name << " - "<< t->data_type_str() << " { " << t->str_dimensions() << "}" << std::endl;
+		LOG(TRACE) << "    " << t->print_trace_dump();
 		// TODO return & remove else {}
 	}
 	else {
@@ -530,8 +588,6 @@ void Graph::addTensor(Tensor *t)
 			// Since this tensor was already added, it was added
 			// because it is a graph output.
 			// This is because recursion means recursion to same node, not a general loop in the network
-			if( prev->isIO == false )
-				ERROR("Update logic failure (i.e. this is an assert fail)");
 			prev->generate = t->generate;
 			prev->initialize = t->initialize;
 			prev->isRecursive = true;
@@ -556,6 +612,10 @@ void Graph::addTensor(Tensor *t)
 		if( t->isIO && prev->initialize == false)
 			prev->isIO=true;
 
+		// Some graph IO (output) tensors are not marked with dimensions in ONNX files
+		if( prev->rank() == 0 )
+			prev->data_dim = t->data_dim;
+
 		LOG(TRACE) << "  now: " << prev->print_trace_dump() << std::endl;
 	}
 }
@@ -578,3 +638,28 @@ void Graph::replaceWithQuantized(std::vector<Tensor*> &inputs)
 
 
 
+Node* Graph::addGraphInputMetanode()
+{
+	Node *n = new graph_io();
+	n->isResolved = true;
+	n->onnx_name = "graph_input";
+	nodes.push_back(n);
+	return n;
+}
+
+Node* Graph::addGraphOutputMetanode()
+{
+	Node *n = new graph_io();
+	n->isResolved = true;
+	n->onnx_name = "graph_output";
+	nodes.push_back(n);
+	return n;
+}
+
+Node* Graph::findNodeByName( const std::string node_name )
+{
+	for( auto n : nodes )
+		if( n->onnx_name == node_name )
+			return n;
+	return nullptr;
+}
diff --git a/src/graph.h b/src/graph.h
index eda3bd0..3fe284f 100644
--- a/src/graph.h
+++ b/src/graph.h
@@ -44,7 +44,7 @@ class Graph {
 	Tensor* getIoTensor(onnx::ValueInfoProto &vi);
 
 	void replaceWithQuantized(std::vector<Tensor*> &inputs);
-	bool getNodeInputTensors(const onnx::NodeProto &node, std::vector<Tensor*> &inputs);
+	bool getNodeInputTensors(const onnx::NodeProto &node, toC::Node *inputs);
 
 	bool tryResolveNode(onnx::NodeProto &node);
 	bool hasUnresolvedNodes(void);
@@ -60,6 +60,8 @@ class Graph {
 	std::vector<Tensor*> tensors;
 	// The kernels/nodes/operators of the network.
 	std::vector<Node*> nodes;
+	Node* findNodeByName( const std::string node_name );
+
 	// Should onnx2c print debug info while compiling
 	bool verbose_mode;
 
@@ -68,6 +70,10 @@ class Graph {
 	 * the existing tensor is updated */
 	void addTensor(Tensor *t);
 
+	Node* addGraphInputMetanode(void);
+	Node* addGraphOutputMetanode(void);
+
+
 	void log_trace_all_tensors(void)
 	{
 		LOG(TRACE) << "All known tensors at this moment:" << std::endl;
@@ -76,6 +82,7 @@ class Graph {
 
 	Tensor *findTensor(const std::string &name) const;
 
+	// counter for naming anonymous nodes with a number
 	static int anonymous_nodes;
 
 	// For the unionize optimization.
diff --git a/src/graph_print.cc b/src/graph_print.cc
index 6727e68..8d1896a 100644
--- a/src/graph_print.cc
+++ b/src/graph_print.cc
@@ -49,7 +49,7 @@ void Graph::print_tensor(const Tensor *t, std::ostream &dst)
 {
 	if( t->generate == false )
 		return;
-	if( t->isIO == true )
+	if( t->name == "" )
 		return;
 	if( t->data_dim.size() == 0 )
 		ERROR("Tensor of no dimensions?");
@@ -75,12 +75,16 @@ void Graph::print_tensor(const Tensor *t, std::ostream &dst)
 void Graph::print_global_tensors(std::ostream &dst)
 {
 	// ununionized tensors
+	LOG(TRACE) << "printing global tensors - ununionized " << std::endl;
 	for( auto t : tensors )
 	{
-		if( t->union_no < 0 )
+		LOG(TRACE) << "\t" << t->print_trace_dump() << std::endl;
+		if( t->union_no < 0
+		 && t->generate)
 			print_tensor(t, dst);
 	}
 
+	LOG(TRACE) << "printing global tensors - unionized " << std::endl;
 	for( unsigned u=0; u<tensor_unions.size(); u++ )
 	{
 		dst << "union tensor_union_" << u << " {" << std::endl;
@@ -92,11 +96,19 @@ void Graph::print_global_tensors(std::ostream &dst)
 		dst << "};" <<std::endl;
 		dst << "static union tensor_union_" << u << " tu" << u << ";" << std::endl <<std::endl;
 	}
+	LOG(TRACE) << "(done printing global tensors)"<< std::endl;
 }
 
 void Graph::print_functions(std::ostream &dst)
 {
 	for( auto n : nodes ) {
+		// handle meta-nodes separately
+		if( n->op_name == "graph_io" )
+			continue;
+		dst << "/*" << std::endl;
+		dst << " * Operand:           " << n->op_name << std::endl;
+		dst << " * Name in ONNX file: " << n->onnx_name << std::endl;
+		dst << " */" << std::endl;
 		dst << "static inline void ";
 		dst << n->c_name() << "( ";
 		n->print_function_parameters_definition(dst);
@@ -148,16 +160,25 @@ void Graph::print_interface_function(std::ostream &dst, bool definition)
 		}
 	}
 
-	for ( auto i : model.graph().output() ) {
-		/* TODO: when there are more than one output, see above for how
-		 * inputs are handled */
-		Tensor *t = findTensor(i.name());
+	// find the graph output node
+	// loop through the output nodes' inputs, printing them
+	Node *graph_out_node = findNodeByName("graph_output");
+	if( graph_out_node == nullptr )
+		ERROR("internal onnx2c error: no graph_output node");
+
+	for( unsigned o=0; o<graph_out_node->get_number_of_inputs(); o++)
+	{
+		Tensor *t = graph_out_node->get_input_tensor(o);
 
 		if( t ) {
 			if(!isfirst)
 				dst << ", ";
 			else
 				isfirst = false;
+
+			// kludge... in contrived cases (like unit tests), the graph can have a constant vector as its ouput.
+			// Since this is the last function we write anyway...
+			t->isConst = false;
 			t->print_tensor(dst);
 		}
 	}
@@ -176,6 +197,10 @@ void Graph::print_interface_function(std::ostream &dst, bool definition)
 	// we don't need to check dependancies :)
 	for( auto n : nodes )
 	{
+		// handle meta-nodes separately
+		if( n->op_name == "graph_io" )
+			continue;
+
 		dst << "\t" << n->c_name() << "( ";
 		n->print_function_parameters_callsite(dst);
 		dst << ");" << std::endl;
diff --git a/src/node.cc b/src/node.cc
index 8674502..d441afe 100644
--- a/src/node.cc
+++ b/src/node.cc
@@ -1,6 +1,5 @@
 
 #include "error.h"
-#include "graph.h"
 #include "node.h"
 
 
@@ -117,19 +116,29 @@ void Node::print_parameters(std::ostream &dst, bool not_callsite ) const
 	for( auto i : input_params ) {
 		const Tensor *t = std::get<0>(i);
 		std::string name = std::get<1>(i);
+		// Unused inputs in the ONNX model still exist in the onnx2c node too
+		if( t->is_used() == false )
+			continue;
 		if( not_callsite )
 			params.push_back( t->print_tensor_as_const(name) );
 		else
 			params.push_back( t->print_tensor_callsite() );
 	}
 	for( auto o : output_params ) {
-		const Tensor *t = std::get<0>(o);
+		Tensor *t = std::get<0>(o);
 		// A node does not know at its resolve time if an optional
 		// output is used, so it registers all. Once all nodes
 		// are resolved, the tensor knows if some one uses it.
 		if( t->is_used() == false )
 			continue;
 		std::string name = std::get<1>(o);
+
+		// corner case with Shape node: in case the shape output is graph output
+		// it is marked const (since other nodes have already used the compile-time generated output
+		// of the shape node).
+		if( t->isIO )
+			t->isConst = false;
+
 		if( not_callsite )
 			params.push_back( t->print_tensor(name) );
 		else
@@ -154,12 +163,43 @@ void Node::print_function_parameters_callsite(std::ostream &destination) const
 	print_parameters(destination, false);
 }
 
-void Node::register_input(const Tensor *t, std::string name)
+void Node::register_input(Tensor *t, std::string name)
 {
 	input_params.push_back(function_parameter(t, name));
 }
 void Node::register_output(Tensor *t, std::string name)
 {
+	//t->generate=true;
 	output_params.push_back(function_parameter(t, name));
-	outputs.push_back(t);
+}
+
+void Node::name_input(unsigned input_no, std::string name)
+{
+	std::get<1>(input_params[input_no]) = name;
+}
+void Node::register_output(unsigned output_no, std::string name)
+{
+	std::get<1>(output_params[output_no]) = name;
+}
+Tensor* Node::get_output_tensor(unsigned N) const
+{
+	if( output_params.size() < N )
+		return nullptr;
+	return std::get<0>(output_params[N]);
+}
+Tensor* Node::get_input_tensor(unsigned N) const
+{
+	if( input_params.size() < N )
+		return nullptr;
+	return std::get<0>(input_params[N]);
+}
+
+unsigned Node::get_number_of_inputs(void) const
+{
+	return input_params.size();
+}
+
+unsigned Node::get_number_of_outputs(void) const
+{
+	return output_params.size();
 }
diff --git a/src/node.h b/src/node.h
index fa8e6a5..648d4b1 100644
--- a/src/node.h
+++ b/src/node.h
@@ -8,7 +8,7 @@
 namespace toC {
 
 class Tensor;
-typedef std::tuple<const Tensor *, std::string> function_parameter;
+typedef std::tuple<Tensor *, std::string> function_parameter;
 
 /* The ONNX node, or computation kernel. *
  * Node is a virtual parent class for each of the
@@ -22,13 +22,7 @@ class Node {
 	std::string onnx_name; //ONNX name of the individual node
 	std::string op_name;   //ONNX name of node type
 	static int64_t onnx_ir_version;
-	std::vector<Tensor*> inputs; // List of input tensors in the .onnx file
-
-	// NB: this is deprecated. Whenever a node is updated,
-	// any reference to this variable should be removed.
-	// instead of outputs.push_back(), use register_output()
-	// Eventually this variable should be made protected
-	std::vector<Tensor *> outputs;
+	virtual ~Node(){}
 private:
 	std::vector<function_parameter> input_params;
 	std::vector<function_parameter> output_params;
@@ -40,8 +34,21 @@ class Node {
 public:
 	void set_output_used(std::vector<bool>val){output_used = val; }
 
-	// when output is removed, get the vector of tensors from output_params.
-	std::vector<Tensor *> get_outputs(void) const {return outputs;}
+	// Get a pointer to the Nth input/output tensor for this node.
+	Tensor *get_output_tensor(unsigned N) const;
+	Tensor *get_input_tensor(unsigned N) const;
+	unsigned get_number_of_inputs(void) const;
+	unsigned get_number_of_outputs(void) const;
+
+	// Run caller provided lambda for each output Tensor.
+	void forEachOutput( std::function<void(Tensor*)> caller_lambda)
+	{
+		for( auto op : output_params )
+		{
+			Tensor* o = std::get<0>(op);
+			caller_lambda(o);
+		}
+	}
 
 	/* Create the C source name. Replace all non a-z,A-Z,0-9 or _
 	 * characters. Also prefix name since ONNX allows tensors and nodes
@@ -116,12 +123,15 @@ class Node {
 		const std::vector<int> B,
 		std::vector<int> &result) const;
 
-protected:
+public:  // TODO: split up into more protected functions
 	/* Record a tensor as the generated function's parameter.
 	 * - name: the name to be used locally for the tensor in the C-function
 	 */
-	void register_input(const Tensor *, std::string name);
+	void register_input(Tensor *, std::string name);
 	void register_output(Tensor *, std::string name);
+	void name_input(unsigned input_no, std::string name);
+	void register_output(unsigned output_no, std::string name);
 
 };
 }
+
diff --git a/src/nodes/TEMPLATE b/src/nodes/TEMPLATE
index 94109a6..f4bc09d 100644
--- a/src/nodes/TEMPLATE
+++ b/src/nodes/TEMPLATE
@@ -79,7 +79,6 @@ void TEMPLATE::resolve(void)
 /* Body of the node implementing function */
 void TEMPLATE::print(std::ostream &dst) const
 {
-	INDT_1 << "/* TEMPLATE */" << std::endl;
 	INDT_1 << "/* Print info on this node here, for debugging purposes */" << std::endl;
 
 	/* Genereate the C code here */
diff --git a/src/nodes/averagepool.h b/src/nodes/averagepool.h
index a650d59..05443d4 100644
--- a/src/nodes/averagepool.h
+++ b/src/nodes/averagepool.h
@@ -52,7 +52,7 @@ class AveragePool : public Pooling {
  
 	virtual void resolve(void) override
 	{
-		register_input(inputs[0], "x");
+		name_input(0, "x");
 
 		resolve_strides();
 		resolve_dilations();
diff --git a/src/nodes/batchnormalization.h b/src/nodes/batchnormalization.h
index 98b10c4..c81aa9f 100644
--- a/src/nodes/batchnormalization.h
+++ b/src/nodes/batchnormalization.h
@@ -67,9 +67,9 @@ class BatchNormalization : public Node {
 
 	virtual void print(std::ostream &dst) const override
 	{
-		Tensor *input = inputs[0];
-		const Tensor *scale = inputs[1];
-		const Tensor *bias  = inputs[2];
+		const Tensor *input = get_input_tensor(0);
+		const Tensor *scale = get_input_tensor(1);
+		const Tensor *bias  = get_input_tensor(2);
 		int batch_size =input->data_dim[0]; 
 		int num_chan =input->data_dim[1]; 
 		std::string type = input->data_type_str();
@@ -140,7 +140,7 @@ class BatchNormalization : public Node {
 	// Updates variance tensor in-place to contain the entire denominator
 	// of the BatchNormalization formula.
 	// TODO: This breaks if var is used anywere else.
-	void calculateSqrtVarOffline(Tensor *var)
+	void calculateSqrtVarOffline(const Tensor *var)
 	{
 		float *v = (float*)var->data_buffer;
 		for( int i=0; i<var->data_num_elem(); i++)
@@ -149,23 +149,23 @@ class BatchNormalization : public Node {
 
 	virtual void resolve(void) override
 	{
-		if( inputs.size() != 5 )
+		if( get_number_of_inputs() != 5 )
 			ERROR("wrong number of inputs to BatchNormalization");
 
-		register_input(inputs[0], "X");
-		register_input(inputs[1], "scale");
-		register_input(inputs[2], "bias");
-		register_input(inputs[3], "mean");
-		register_input(inputs[4], "var");
+		name_input(0, "X");
+		name_input(1, "scale");
+		name_input(2, "bias");
+		name_input(3, "mean");
+		name_input(4, "var");
 
-		if( inputs[4]->isConst ) {
-			calculateSqrtVarOffline(inputs[4]);
+		if( get_input_tensor(4)->isConst ) {
+			calculateSqrtVarOffline(get_input_tensor(4));
 			sqrt_var_offline = true;
 		}
 
 		Tensor *rv = new Tensor;
-		rv->data_dim = inputs[0]->data_dim;
-		rv->data_type = inputs[0]->data_type;
+		rv->data_dim = get_input_tensor(0)->data_dim;
+		rv->data_type = get_input_tensor(0)->data_type;
 		register_output(rv, "output");
 	}
 };
diff --git a/src/nodes/cast.cc b/src/nodes/cast.cc
index 32645b6..cdefd32 100644
--- a/src/nodes/cast.cc
+++ b/src/nodes/cast.cc
@@ -21,7 +21,7 @@ void Cast::parseAttributes( onnx::NodeProto &node )
 void Cast::resolve(void)
 {
 	LOG(INFO) << "'Cast' node found." << std::endl;
-	register_input(inputs[0], "input");
+	name_input(0, "input");
 
 	switch(to)
 	{
@@ -35,7 +35,7 @@ void Cast::resolve(void)
 	}
 
 	Tensor *t = new Tensor;
-	t->data_dim = inputs[0]->data_dim;
+	t->data_dim = get_input_tensor(0)->data_dim;
 	t->data_type = static_cast<onnx::TensorProto_DataType>(to);
 	register_output(t, "output");
 }
@@ -44,8 +44,8 @@ void Cast::resolve(void)
 void Cast::print(std::ostream &dst) const
 {
 	INDT_1 << "/* Cast */" << std::endl;
-	const Tensor *input = inputs[0];
-	const Tensor *output = outputs[0];
+	const Tensor *input = get_input_tensor(0);
+	const Tensor *output = get_output_tensor(0);
 
 	std::string intype = input->data_type_str();
 	std::string outtype = output->data_type_str();
diff --git a/src/nodes/clip.h b/src/nodes/clip.h
index 6e48acd..ccd1da4 100644
--- a/src/nodes/clip.h
+++ b/src/nodes/clip.h
@@ -34,12 +34,12 @@ class Clip : public Node {
 
 	virtual void resolve(void) override
 	{
-		const Tensor *input = inputs[0];
-		register_input(inputs[0], "input");
-		if (inputs.size() > 1 && inputs[1]->is_used())
-			register_input(inputs[1], "min_tensor");
-		if (inputs.size() > 2 && inputs[2]->is_used())
-			register_input(inputs[2], "max_tensor");
+		const Tensor *input = get_input_tensor(0);
+		name_input(0, "input");
+		if (get_number_of_inputs() > 1 && get_input_tensor(1)->is_used())
+			name_input(1, "min_tensor");
+		if (get_number_of_inputs() > 2 && get_input_tensor(2)->is_used())
+			name_input(2, "max_tensor");
 
 		Tensor *t = new Tensor;
 		t->data_dim = input->data_dim;
@@ -50,14 +50,14 @@ class Clip : public Node {
 	virtual void print(std::ostream &dst) const override
 	{
 
-		const Tensor *input = inputs[0];
+		const Tensor *input = get_input_tensor(0);
 		const Tensor *min_tensor = nullptr;
 		const Tensor *max_tensor = nullptr;
 
-		if (inputs.size() > 1 && inputs[1]->is_used())
-			min_tensor = inputs[1];
-		if (inputs.size() > 2 && inputs[2]->is_used())
-			max_tensor = inputs[2];
+		if (get_number_of_inputs() > 1 && get_input_tensor(1)->is_used())
+			min_tensor = get_input_tensor(1);
+		if (get_number_of_inputs() > 2 && get_input_tensor(2)->is_used())
+			max_tensor = get_input_tensor(2);
 
 		INDT_1 << "/* Clip */" << std::endl;
 
diff --git a/src/nodes/concat.h b/src/nodes/concat.h
index e8ceb9c..0c2fbff 100644
--- a/src/nodes/concat.h
+++ b/src/nodes/concat.h
@@ -30,7 +30,7 @@ namespace toC {
 		void print(std::ostream &dst) const override {
 
 			dst << "\t/* Concat */" << std::endl;
-			const Tensor *concat_result = outputs[0];
+			const Tensor *concat_result = get_output_tensor(0);
 
 			// the axisPitch is the number of elements to add to move to the next split axis in the concat_result
 			int64_t axisPitch = 1;
@@ -41,7 +41,7 @@ namespace toC {
 			dst << "\tint64_t outputOffset;" << std::endl;
 
 			int64_t outputBase = 0;
-			int64_t input_count = inputs.size();
+			int64_t input_count = get_number_of_inputs();
 
 			for (int64_t inputIndex = 0; inputIndex < input_count; inputIndex++) {
 
@@ -50,11 +50,11 @@ namespace toC {
 
 				// the inputAxisPitch is the number of elements to add to move to the next split axis in the inputs
 				int64_t inputAxisPitch = 1;
-				for (int i = inputs[inputIndex]->data_dim.size() - 1; i >= axis; i--) {
-					inputAxisPitch *= inputs[inputIndex]->data_dim[i];
+				for (int i = get_input_tensor(inputIndex)->data_dim.size() - 1; i >= axis; i--) {
+					inputAxisPitch *= get_input_tensor(inputIndex)->data_dim[i];
 				}
 
-				int64_t inputSize = inputs[inputIndex]->data_num_elem();
+				int64_t inputSize = get_input_tensor(inputIndex)->data_num_elem();
 
 				// copy the data across: for every 'inputAxisPitch' values copied, we move over by the 'axisPitch'
 				dst << "\toutputOffset = " << outputBase << ";" << std::endl;
@@ -77,39 +77,39 @@ namespace toC {
 		}
 
 		void resolve(void) override {
-			if (inputs.size() == 1 ) {
+			if (get_number_of_inputs() == 1 ) {
 				LOG(WARNING) << "Concat node " << onnx_name << " has only one input." << std::endl;
 			}
 
 			if (axis < 0)
-				axis = inputs[0]->data_dim.size() + axis;
+				axis = get_input_tensor(0)->data_dim.size() + axis;
 
-			auto *rv = new Tensor;
-			rv->data_dim = inputs[0]->data_dim;
-			size_t input_count = inputs.size();
+			size_t input_count = get_number_of_inputs();
 			size_t output_axis_size = 0;
 			size_t i, j;
-			std::vector<int> dims = inputs[0]->data_dim;
+			std::vector<int> dims = get_input_tensor(0)->data_dim;
 			LOG(TRACE) << "Concatenating on axis " << axis << std::endl;
 			for (i = 0; i < input_count; i++) {
-				if( inputs[0]->rank() != inputs[i]->rank() ) {
-					LOG(DEBUG) << "Input " << inputs[0]->name << " has " << inputs[0]->rank() << " dimensions" << std::endl;
-					LOG(DEBUG) << "Input " << inputs[i]->name << " has " << inputs[i]->rank() << " dimensions" << std::endl;
+				if( get_input_tensor(0)->rank() != get_input_tensor(i)->rank() ) {
+					LOG(DEBUG) << "Input " << get_input_tensor(0)->name << " has " << get_input_tensor(0)->rank() << " dimensions" << std::endl;
+					LOG(DEBUG) << "Input " << get_input_tensor(i)->name << " has " << get_input_tensor(i)->rank() << " dimensions" << std::endl;
 					ERROR("Concat expects all inputs to have equal number of dimensions");
 				}
 				for (j = 0; j < dims.size(); j++) {
-					if (dims[j] != inputs[i]->data_dim[j] && (int) j != axis)
+					if (dims[j] != get_input_tensor(i)->data_dim[j] && (int) j != axis)
 						ERROR("Concat's input tensors must have the same shape, except for the "
 							  "dimension size of the axis to concatenate on.");
 				}
 
 				std::string input_name = "input_";
 				input_name += std::to_string(i);
-				register_input(inputs[i], input_name);
-				output_axis_size += inputs[i]->data_dim[axis];
+				name_input(i, input_name);
+				output_axis_size += get_input_tensor(i)->data_dim[axis];
 			}
+			auto *rv = new Tensor;
+			rv->data_dim = get_input_tensor(0)->data_dim;
 			rv->data_dim[axis] = output_axis_size;
-			rv->data_type = inputs[0]->data_type;
+			rv->data_type = get_input_tensor(0)->data_type;
 			register_output(rv, "output");
 		}
 	};
diff --git a/src/nodes/constant.h b/src/nodes/constant.h
index 364d944..dea8e99 100644
--- a/src/nodes/constant.h
+++ b/src/nodes/constant.h
@@ -17,8 +17,11 @@ class Constant : public Node {
 	virtual void parseAttributes( onnx::NodeProto &node ) override {
 		for( const auto& a : node.attribute() ) {
 			LOG(TRACE) << "Parsing attribute " << a.name() << std::endl;
-			if( a.name() == "value" )
+			if( a.name() == "value" ) {
+				LOG(TRACE) << "Adding attribute 'value' as input tensor to node" << std::endl;
 				value_tensor = parse_attribute_tensor(a);
+				LOG(TRACE) << "\t" << value_tensor->print_trace_dump() << std::endl;
+			}
 			else
 				ERROR("Unimplemented parsing of attribute " << a.name());
 		}
@@ -27,17 +30,51 @@ class Constant : public Node {
 
 	virtual void print(std::ostream &dst) const override
 	{
+		Tensor *output= get_output_tensor(0);
+
 		dst << "\t/* Constant */" << std::endl;
 		dst << "\t/* The output is generated as a global tensor */" << std::endl;
-		dst << "\t(void)output;" <<std::endl;
+
+		if( output->isIO == false ) {
+			dst << "\t(void)output;" <<std::endl;
+			return;
+		}
+		// most likely this is not what the user wants :)
+		LOG(WARNING) << "Constant tensor used as graph output?" << std::endl;
+
+		// Handle the degenerate case (happens in ONNX backend tests for some reason :))
+		// where the graph output is a constant.
+		if( value_tensor == nullptr )
+			ERROR("Constant tensor not resolved");
+		std::string dimstr;
+		for( unsigned dim=0; dim<value_tensor->rank(); dim++) {
+			dimstr += "[d" + std::to_string(dim) + "]";
+		}
+
+		print_loops_over_dims(dst, value_tensor, "d", 1);
+		INDT_2 << "output" << dimstr << " = " << value_tensor->cname() << dimstr << ";" << std::endl;
+		print_loop_closes_over_dims(dst, value_tensor, 1);
 	}
 
 	virtual void resolve(void) override
 	{
+		// value_tensor is the one supplied as the node attribute. It gets
+		// copied into the output, as is.
 		if( value_tensor == nullptr )
 			ERROR("Constant tensor not resolved");
 		// "This operator produces a constant tensor."
 		value_tensor->isConst = true;
+		value_tensor->initialize = true;
+
+		// Just in case someone wants to print out a constant tensor from the graph.
+		// Yeah, it's kinda strange... but valid.
+		Tensor *rv = new Tensor;
+		rv->data_dim = value_tensor->data_dim;
+		rv->data_type = value_tensor->data_type;
+		rv->isConst = true;
+		value_tensor->initialize = true;
+		// TODO: remove the above. We have to register the value tensor as output.
+		// Otherwise other nodes that access the constant don't see its data.
 		register_output(value_tensor, "output");
 	}
 };
diff --git a/src/nodes/constantofshape.cc b/src/nodes/constantofshape.cc
index 28165e5..b676498 100644
--- a/src/nodes/constantofshape.cc
+++ b/src/nodes/constantofshape.cc
@@ -19,8 +19,8 @@ void ConstantOfShape::parseAttributes( onnx::NodeProto &node )
 
 void ConstantOfShape::resolve(void)
 {
-	Tensor *input  = inputs[0];
-	register_input(input, "input");
+	Tensor *input  = get_input_tensor(0);
+	name_input(0, "input");
 
 	Tensor *t = new Tensor;
 	for( int i=0; i<input->data_num_elem(); i++) {
@@ -38,7 +38,7 @@ void ConstantOfShape::resolve(void)
 
 void ConstantOfShape::print(std::ostream &dst) const
 {
-	Tensor *output  = outputs[0];
+	Tensor *output  = get_output_tensor(0);
 	std::string type = output->data_type_str();
 
 	INDT_1 << "/* ConstantOfShape */" << std::endl;
diff --git a/src/nodes/conv.h b/src/nodes/conv.h
index 9bd641f..12470e5 100644
--- a/src/nodes/conv.h
+++ b/src/nodes/conv.h
@@ -19,7 +19,7 @@ class Conv : public SpatialFilter {
 		for(unsigned i=0; i<get_numDataDim(); i++)
 			outidx += "[o" + std::to_string(i) + "]";
 		INDT_3 << "y[b][m]" << outidx << " = ";
-		if( inputs.size() < 3 ) // bias is the 3rd input, optional
+		if( get_number_of_inputs() < 3 ) // bias is the 3rd input, optional
 			dst << "0;" << std::endl;
 		else
 			dst << "bias[m];" << std::endl;
@@ -55,10 +55,10 @@ class Conv : public SpatialFilter {
  
 	virtual void resolve(void) override
 	{
-		register_input(inputs[0],"x");
-		register_input(inputs[1],"w");
-		if( inputs.size() == 3 ) {
-			register_input(inputs[2],"bias");
+		name_input(0,"x");
+		name_input(1,"w");
+		if( get_number_of_inputs() == 3 ) {
+			name_input(2,"bias");
 		}
 
 		resolve_strides();
diff --git a/src/nodes/convinteger.h b/src/nodes/convinteger.h
index 777841d..46af385 100644
--- a/src/nodes/convinteger.h
+++ b/src/nodes/convinteger.h
@@ -35,7 +35,7 @@ class ConvInteger : public SpatialFilter {
 		const std::string &y_idx) const override
 	{
 		std::string x_zero;
-		if( inputs.size() >= 3 ) // x_zero_point is optional, 3rd input
+		if( get_number_of_inputs() >= 3 ) // x_zero_point is optional, 3rd input
 			x_zero = constant_acces_code( "x_zero_point[0]");
 		else
 			x_zero = "0";
@@ -71,13 +71,13 @@ class ConvInteger : public SpatialFilter {
 
 	virtual void resolve(void) override
 	{
-		register_input(inputs[0], "x");
-		register_input(inputs[1], "w");
+		name_input(0, "x");
+		name_input(1, "w");
 
-		if( inputs.size() > 2 )
-			register_input(inputs[2], "x_zero_point");
-		if( inputs.size() > 3 ){
-			register_input(inputs[3], "w_zero_point");
+		if( get_number_of_inputs() > 2 )
+			name_input(2, "x_zero_point");
+		if( get_number_of_inputs() > 3 ){
+			name_input(3, "w_zero_point");
 			ERROR("unimplemented: weight zero points");
 		}
 
diff --git a/src/nodes/convtranspose.cc b/src/nodes/convtranspose.cc
index 6ee8659..f1dac3e 100644
--- a/src/nodes/convtranspose.cc
+++ b/src/nodes/convtranspose.cc
@@ -174,13 +174,13 @@ std::vector<int> ConvTranspose::calculate_output_size(void)
 
 void ConvTranspose::resolve(void)
 {
-	x = inputs[0]; // data
-	register_input(x,"x");
-	w = inputs[1]; // weights
-	register_input(w,"w");
-	if( inputs.size() == 3 ) {
-		b = inputs[2];
-		register_input(b,"bias"); // 'b' is the batch looping index
+	x = get_input_tensor(0); // data
+	name_input(0,"x");
+	w = get_input_tensor(1); // weights
+	name_input(1,"w");
+	if( get_number_of_inputs() == 3 ) {
+		b = get_input_tensor(2);
+		name_input(2,"bias"); // 'b' is the batch looping index
 	}
 	else
 		b = NULL;
diff --git a/src/nodes/dropout.h b/src/nodes/dropout.h
index 4e3f30d..ed109de 100644
--- a/src/nodes/dropout.h
+++ b/src/nodes/dropout.h
@@ -32,7 +32,7 @@ class Dropout : public Node {
 	/* Body of the node implementing function */
 	virtual void print(std::ostream &dst) const override
 	{
-		const Tensor *data = inputs[0];
+		const Tensor *data = get_input_tensor(0);
 		std::string datatype = data->data_type_str();
 		dst << "\t/* Dropout */" << std::endl;
 
@@ -54,22 +54,18 @@ class Dropout : public Node {
 
 	virtual void resolve(void) override
 	{
-		const Tensor *data = inputs[0];
-		const Tensor *ratio = nullptr;
-		const Tensor *training_mode = nullptr;
-		register_input(data, "input");
+		const Tensor *data = get_input_tensor(0);
+		name_input(0, "input");
 		if(  typeConstraint_highPrecisionNumeric(data) == false )
 			ERROR("Incorrect input for node");
 
-		if( inputs.size() > 1 ) {
-			ratio = inputs[1];
-			register_input(ratio, "ratio");
+		if( get_number_of_inputs() > 1 ) {
+			name_input(1, "ratio");
 		}
 
-		if( inputs.size() > 2 ) {
+		if( get_number_of_inputs() > 2 ) {
 			ERROR("Unimplemented - training_mode input to Dropout");
-			training_mode = inputs[2];
-			register_input(training_mode, "training_mode");
+			name_input(2, "training_mode");
 		}
 
 		/* Create output tensor */
diff --git a/src/nodes/dynamicquantizelinear.h b/src/nodes/dynamicquantizelinear.h
index 6f1866e..0f6921a 100644
--- a/src/nodes/dynamicquantizelinear.h
+++ b/src/nodes/dynamicquantizelinear.h
@@ -22,7 +22,7 @@ class DynamicQuantizeLinear : public Node {
 
 	virtual void print(std::ostream &dst) const override
 	{
-		const Tensor *x = inputs[0];
+		const Tensor *x = get_input_tensor(0);
 		int n_el = x->data_num_elem();
 
 		INDT_1 << "/* DynamicQuantizeLinear */" << std::endl;
@@ -65,8 +65,8 @@ class DynamicQuantizeLinear : public Node {
 
 	virtual void resolve(void) override
 	{
-		const Tensor *x = inputs[0];
-		register_input(x, "x");
+		const Tensor *x = get_input_tensor(0);
+		name_input(0, "x");
 
 		Tensor *t = new Tensor;
 		t->data_dim = x->data_dim;
diff --git a/src/nodes/elementwise.h b/src/nodes/elementwise.h
index 1204aee..8ed85e8 100644
--- a/src/nodes/elementwise.h
+++ b/src/nodes/elementwise.h
@@ -179,8 +179,9 @@ class Elementwise : public Node {
 
 	virtual void print(std::ostream &dst) const override
 	{
-		const Tensor *Y = outputs[0];
+		const Tensor *Y = get_output_tensor(0);
 		INDT_1 << "/* " << op_name << std::endl;
+		INDT_1 << "   Implemented with Elementwise template." << std::endl;
 		INDT_1 << "   alpha = " << alpha << std::endl;
 		INDT_1 << "   beta = " << beta << std::endl;
 		INDT_1 << "*/" << std::endl;
@@ -207,8 +208,8 @@ class Elementwise : public Node {
 
 	virtual void resolve(void) override
 	{
-		const Tensor *X = inputs[0];
-		register_input(X, "X");
+		const Tensor *X = get_input_tensor(0);
+		name_input(0, "X");
 
 		Tensor *t = new Tensor;
 		t->data_dim = X->data_dim;
diff --git a/src/nodes/elementwise_2.h b/src/nodes/elementwise_2.h
index d1c14a7..c683c34 100644
--- a/src/nodes/elementwise_2.h
+++ b/src/nodes/elementwise_2.h
@@ -125,9 +125,9 @@ class Elementwise_2 : public Node {
 		INDT_1 << " */" << std::endl;
 
 		// C = A ? B
-		Tensor *A = inputs[0];
-		Tensor *B = inputs[1];
-		Tensor *C = outputs[0];
+		const Tensor *A = get_input_tensor(0);
+		const Tensor *B = get_input_tensor(1);
+		const Tensor *C = get_output_tensor(0);
 
 		// if either A or B does not have enough dimensions, prepend
 		// dimensions of 1 to match rank of C
@@ -178,10 +178,10 @@ class Elementwise_2 : public Node {
 
 	virtual void resolve(void) override
 	{
-		const Tensor *A = inputs[0];
-		const Tensor *B = inputs[1];
-		register_input(A, "A");
-		register_input(B, "B");
+		const Tensor *A = get_input_tensor(0);
+		const Tensor *B = get_input_tensor(1);
+		name_input(0, "A");
+		name_input(1, "B");
 
 		std::vector<int> result_dim;
 		multidirectional_broadcast_size(A->data_dim, B->data_dim, result_dim);
diff --git a/src/nodes/elementwise_variadic.h b/src/nodes/elementwise_variadic.h
index 025536b..a03b3af 100644
--- a/src/nodes/elementwise_variadic.h
+++ b/src/nodes/elementwise_variadic.h
@@ -23,38 +23,42 @@ class Elementwise_variadic : public Node {
 		if( op == "Min" )
 			operation = [this](std::ostream &dst, const std::vector<std::string> &idxs)
 				{
+					const unsigned n_inp = get_number_of_inputs();
 					INDT_3 << "MIN(in_0" << idxs[0] << ", " << std::endl;
-					for(unsigned i=0; i<inputs.size()-1; i++)
+					for(unsigned i=0; i<n_inp-1; i++)
 						INDT_3 << "MIN(in_" << i << idxs[i] << ", " << std::endl;
-					INDT_4 << "in_" << inputs.size()-1 << idxs[inputs.size()-1];
-					for(unsigned i=0; i<inputs.size(); i++)
+					INDT_4 << "in_" << n_inp-1 << idxs[n_inp-1];
+					for(unsigned i=0; i<n_inp; i++)
 						dst << ")";
 					dst << ";" << std::endl;
 				};
 		else if( op == "Mean" )
 			operation = [this](std::ostream &dst, const std::vector<std::string> &idxs)
 				{
+					const unsigned n_inp = get_number_of_inputs();
 					INDT_3 << "(in_0" << idxs[0] << std::endl;
-					for(unsigned i=1; i<inputs.size(); i++)
+					for(unsigned i=1; i<n_inp; i++)
 						INDT_3 << " + in_" << i << idxs[i] << std::endl;
-					INDT_3 << ")/" << inputs.size() << ";" << std::endl;
+					INDT_3 << ")/" << n_inp << ";" << std::endl;
 				};
 		else if( op == "Max" )
 			operation = [this](std::ostream &dst, const std::vector<std::string> &idxs)
 				{
+					const unsigned n_inp = get_number_of_inputs();
 					INDT_3 << "MAX(in_0" << idxs[0] << ", " << std::endl;
-					for(unsigned i=0; i<inputs.size()-1; i++)
+					for(unsigned i=0; i<n_inp-1; i++)
 						INDT_3 << "MAX(in_" << i << idxs[i] << ", " << std::endl;
-					INDT_4 << "in_" << inputs.size()-1 << idxs[inputs.size()-1];
-					for(unsigned i=0; i<inputs.size(); i++)
+					INDT_4 << "in_" << n_inp-1 << idxs[n_inp-1];
+					for(unsigned i=0; i<n_inp; i++)
 						dst << ")";
 					dst << ";" << std::endl;
 				};
 		else if (op == "Sum" )
 			operation = [this](std::ostream &dst, const std::vector<std::string> &idxs)
 				{
+					const unsigned n_inp = get_number_of_inputs();
 					INDT_3 << "(in_0" << idxs[0] << std::endl;
-					for(unsigned i=1; i<inputs.size(); i++)
+					for(unsigned i=1; i<n_inp; i++)
 						INDT_3 << " + in_" << i << idxs[i] << std::endl;
 					INDT_3 << ");" << std::endl;
 				};
@@ -73,9 +77,9 @@ class Elementwise_variadic : public Node {
 
 	virtual void print(std::ostream &dst) const override
 	{
-		const Tensor *out = outputs[0];
+		const Tensor *out = get_output_tensor(0);
 		std::string type = out->data_type_str();
-		std::vector<std::string> in_idx_strs(inputs.size());
+		std::vector<std::string> in_idx_strs(get_number_of_inputs());
 		std::string out_idx_str;
 		INDT_1 << "/* " << op_name  << std::endl;
 		INDT_1 << "   Implemented with Elementwise_variadic template." << std::endl;
@@ -92,8 +96,8 @@ class Elementwise_variadic : public Node {
 			// Generate indexing strings to be printed later on.
 			// TODO: this is a copy from earlier code. Feels like there might
 			// be a more elegant way of doing this.
-			for( unsigned i=0; i<inputs.size(); i++) {
-				std::vector<int> pads = inputs[i]->data_dim;
+			for( unsigned i=0; i<get_number_of_inputs(); i++) {
+				std::vector<int> pads = get_input_tensor(i)->data_dim;
 				std::string idx_str;
 				if (pads[r]==1)
 					idx_str += "[0]";
@@ -119,21 +123,21 @@ class Elementwise_variadic : public Node {
 	virtual void resolve(void) override
 	{
 		// There can be 1-N inputs.
-		int num_inputs = inputs.size();
+		int num_inputs = get_number_of_inputs();
 
-		std::vector<int> result_dim=inputs[0]->data_dim;
-		register_input(inputs[0], "in_0");
+		std::vector<int> result_dim=get_input_tensor(0)->data_dim;
+		name_input(0, "in_0");
 		for( int i=1; i<num_inputs; i++ ){
 			std::vector<int> tmp;
-			multidirectional_broadcast_size(result_dim, inputs[i]->data_dim, tmp);
+			multidirectional_broadcast_size(result_dim, get_input_tensor(i)->data_dim, tmp);
 			result_dim=tmp;
 			std::string input_name = "in_" + std::to_string(i);
-			register_input(inputs[i], input_name);
+			name_input(i, input_name);
 		}
 
 		Tensor *t = new Tensor;
 		t->data_dim = result_dim;
-		t->data_type = inputs[0]->data_type;
+		t->data_type = get_input_tensor(0)->data_type;
 		register_output(t, "output");
 	}
 };
diff --git a/src/nodes/expand.cc b/src/nodes/expand.cc
index 39865d0..b656040 100644
--- a/src/nodes/expand.cc
+++ b/src/nodes/expand.cc
@@ -44,10 +44,10 @@ std::vector<int32_t> Expand::resolve_output_shape(void) const
 /* Assign input tensors, resolve output tensor shapes, allocate output tensors */
 void Expand::resolve(void)
 {
-	input  = inputs[0];
-	register_input(input, "input");
-	shape = inputs[1];
-	register_input(shape, "shape");
+	input  = get_input_tensor(0);
+	name_input(0, "input");
+	shape = get_input_tensor(1);
+	name_input(1, "shape");
 
 	if( shape->isConst == false )
 		ERROR("Unimplemented: Expand operand with non-constant input (shape).");
diff --git a/src/nodes/flatten.h b/src/nodes/flatten.h
index 6425603..1d3e824 100644
--- a/src/nodes/flatten.h
+++ b/src/nodes/flatten.h
@@ -26,7 +26,7 @@ class Flatten : public Node {
 
 	virtual void print(std::ostream &dst) const override
 	{
-		const Tensor *input = inputs[0];
+		const Tensor *input = get_input_tensor(0);
 		std::string type = input->data_type_str();
 
 		dst << "\t/* Flatten*/" << std::endl;
@@ -40,14 +40,13 @@ class Flatten : public Node {
 	}
 
 
-
 	virtual void resolve(void) override
 	{
-		if( inputs.size() != 1 )
+		if( get_number_of_inputs() != 1 )
 			ERROR("wrong number of inputs to Flatten");
 
-		const Tensor *input = inputs[0];
-		register_input(input, "input");
+		const Tensor *input = get_input_tensor(0);
+		name_input(0, "input");
 
 		// output:
 		// A 2D tensor with the contents of the input tensor, with input dimensions up to axis
diff --git a/src/nodes/gather.h b/src/nodes/gather.h
index a04f60f..c30245d 100644
--- a/src/nodes/gather.h
+++ b/src/nodes/gather.h
@@ -26,10 +26,10 @@ class Gather : public Node {
 
 	virtual void resolve(void) override
 	{
-		const Tensor *data = inputs[0];
-		const Tensor *indices = inputs[1];
-		register_input(data, "X");
-		register_input(indices, "indices");
+		const Tensor *data = get_input_tensor(0);
+		const Tensor *indices = get_input_tensor(1);
+		name_input(0, "X");
+		name_input(1, "indices");
 
 		unsigned a = axis >= 0 ? axis : data->rank()+axis;
 
@@ -58,9 +58,9 @@ class Gather : public Node {
 
 	virtual void print(std::ostream &dst) const override
 	{
-		const Tensor *data = inputs[0];
-		const Tensor *indices = inputs[1];
-		const Tensor *output= outputs[0];
+		const Tensor *data = get_input_tensor(0);
+		const Tensor *indices = get_input_tensor(1);
+		const Tensor *output= get_output_tensor(0);
 		INDT_1 << "/* Gather" << std::endl;
 		INDT_1 << "   axis = " << axis << std::endl;
 		INDT_1 << " */" << std::endl;
diff --git a/src/nodes/gemm.h b/src/nodes/gemm.h
index 568db6b..1fd8134 100644
--- a/src/nodes/gemm.h
+++ b/src/nodes/gemm.h
@@ -42,13 +42,12 @@ class Gemm : public Node {
 	}
 
 
-
 	/* Body of the node implementing function */
 	virtual void print(std::ostream &dst) const override
 	{
-		const Tensor *A  = inputs[0];
-		const Tensor *B  = inputs[1];
-		const Tensor *C  = inputs.size() > 2 ? inputs[2]:nullptr;
+		const Tensor *A  = get_input_tensor(0);
+		const Tensor *B  = get_input_tensor(1);
+		const Tensor *C  = get_number_of_inputs() > 2 ? get_input_tensor(2):nullptr;
 		//int A1 = A->data_dim[1];
 		int C0,C1; C0=C1=0;
 		if( C ) {
@@ -173,16 +172,16 @@ class Gemm : public Node {
 	/* Assign input tensors, resolve output tensor shapes, allocate output tensors */
 	virtual void resolve(void) override
 	{
-		if (inputs.size() < 2)
+		if (get_number_of_inputs() < 2)
 			ERROR("Not enough inputs");
 
-		const Tensor *A  = inputs[0];
-		const Tensor *B  = inputs[1];
-		register_input(A, "A");
-		register_input(B, "B");
+		const Tensor *A  = get_input_tensor(0);
+		const Tensor *B  = get_input_tensor(1);
+		name_input(0, "A");
+		name_input(1, "B");
 
-		if (inputs.size() == 3) {
-			register_input(inputs[2], "C");
+		if (get_number_of_inputs() == 3) {
+			name_input(2, "C");
 		}
 
 		// output dimensions - see the specification
diff --git a/src/nodes/globalaveragepool.h b/src/nodes/globalaveragepool.h
index 7362585..d6bb3a5 100644
--- a/src/nodes/globalaveragepool.h
+++ b/src/nodes/globalaveragepool.h
@@ -15,7 +15,7 @@ class GlobalAveragePool : public Node {
 	/* Body of the node implementing function */
 	virtual void print(std::ostream &dst) const override
 	{
-		const Tensor *X=inputs[0];
+		const Tensor *X=get_input_tensor(0);
 		int batch_size = X->data_dim[0];
 		int num_channels = X->data_dim[1];
 
@@ -58,8 +58,8 @@ class GlobalAveragePool : public Node {
 
 	virtual void resolve(void) override
 	{
-		const Tensor *X = inputs[0];
-		register_input(X, "input");
+		const Tensor *X = get_input_tensor(0);
+		name_input(0, "input");
 		if(  typeConstraint_plainFloatingPoints(X) == false )
 			ERROR("Incorrect input for node"); 
 
diff --git a/src/nodes/graph_io.h b/src/nodes/graph_io.h
new file mode 100644
index 0000000..83cf4a5
--- /dev/null
+++ b/src/nodes/graph_io.h
@@ -0,0 +1,51 @@
+/* This file is part of onnx2c.
+ *
+ * Graph_io is an onnx2c-specific meta-node.
+ * This does not have an equivalent in the ONNX graph.
+ * Each graph has two of these nodes, the graph input
+ * and graph output.
+ * These nodes only exists for symmetry in onnx2c graph
+ * traversing algorithms and optimization passes.
+ *
+ * These meta-nodes are a fairly late addition to onnx2c,
+ * so there might be places in the code base where
+ * the usage of this kind of node would make sense,
+ * but is missing.
+ */
+#include "node.h"
+
+namespace toC {
+
+class graph_io : public Node {
+	public:
+	graph_io() {
+		op_name = "graph_io";
+	}
+
+	// Mandatory "API" functions towards the rest of onnx2c
+	virtual void parseAttributes( onnx::NodeProto &node ) override;
+	virtual void resolve(void) override;
+	virtual void print(std::ostream &dst) const override;
+};
+
+
+/* Parse attributes, if this node has them. */
+void graph_io::parseAttributes( onnx::NodeProto &node )
+{
+	// No attributes for special nodes
+}
+
+
+void graph_io::resolve(void)
+{
+	// These special nodes are handled .. specially. In graph.cc
+}
+
+
+void graph_io::print(std::ostream &dst) const
+{
+	// nothing to do here
+}
+
+} // namespace
+
diff --git a/src/nodes/instancenorm.cc b/src/nodes/instancenorm.cc
index afa9070..a1ec424 100644
--- a/src/nodes/instancenorm.cc
+++ b/src/nodes/instancenorm.cc
@@ -28,14 +28,14 @@ void InstanceNormalization::parseAttributes( onnx::NodeProto &node )
 
 void InstanceNormalization::resolve(void)
 {
-	input = inputs[0];
-	register_input(input, "input");
+	input = get_input_tensor(0);
+	name_input(0, "input");
 
-	scale = inputs[1];
-	register_input(scale, "scale");
+	scale = get_input_tensor(1);
+	name_input(1, "scale");
 
-	B = inputs[2];
-	register_input(B, "B");
+	B = get_input_tensor(2);
+	name_input(2, "B");
 
 	Tensor *t = new Tensor;
 	t->data_dim = input->data_dim;
diff --git a/src/nodes/lrn.h b/src/nodes/lrn.h
index 6d89d9e..19e2e1c 100644
--- a/src/nodes/lrn.h
+++ b/src/nodes/lrn.h
@@ -45,8 +45,8 @@ class LRN : public Node {
 	/* Assign input tensors, resolve output tensor shapes, allocate output tensors */
 	virtual void resolve(void) override
 	{
-		const Tensor *X = inputs[0];
-		register_input(X, "X");
+		const Tensor *X = get_input_tensor(0);
+		name_input(0, "X");
 
 		if( size == -1 )
 			ERROR("LRN: attribute 'size' was not given");
@@ -61,7 +61,7 @@ class LRN : public Node {
 
 	virtual void print(std::ostream &dst) const override
 	{
-		const Tensor *X = inputs[0];
+		const Tensor *X = get_input_tensor(0);
 
 		INDT_1 << "/* LRN */" << std::endl;
 		INDT_1 << "/* attributes:" << std::endl;
diff --git a/src/nodes/lstm.cc b/src/nodes/lstm.cc
index 5105e9a..d538dbb 100644
--- a/src/nodes/lstm.cc
+++ b/src/nodes/lstm.cc
@@ -348,7 +348,7 @@ void LSTM::calculate_data_dimensions()
 
 void LSTM::resolve(void)
 {
-	if( inputs.size() < 3 || inputs.size() > 8 )
+	if( get_number_of_inputs() < 3 || get_number_of_inputs() > 8 )
 		ERROR("wrong number of inputs to LSTM");
 
 
@@ -385,27 +385,27 @@ void LSTM::resolve(void)
 	if( hidden_size < 0 )
 		ERROR("Must provide hidden_size attribute!");
 
-	register_input(get_X(), "X");
-	register_input(get_W(), "W");
-	register_input(get_R(), "R");
+	name_input(0, "X");
+	name_input(1, "W");
+	name_input(2, "R");
 
 	//optional inputs. Trailing unprovided inputs can just be left out
 	//but non-trailing, unprovided inputs MUST have an empty string as name
 	// (guess that means tensors MAY NOT have an empty string as name?)
 	if( get_B() ) {
-		register_input(get_B(), "B");
+		name_input(3, "B");
 	}
 	if( get_sequence_lens() ) {
-		register_input(get_sequence_lens(), "sequence_lens");
+		name_input(4, "sequence_lens");
 	}
 	if( get_initial_h()) {
-		register_input(get_initial_h(), "initial_h");
+		name_input(5, "initial_h");
 	}
 	if( get_initial_c()) {
-		register_input(get_initial_c(), "initial_c");
+		name_input(6, "initial_c");
 	}
 	if( get_P() ) {
-		register_input(get_P(), "P");
+		name_input(7, "P");
 	}
 
 
diff --git a/src/nodes/lstm.h b/src/nodes/lstm.h
index dfe0d6d..afcf392 100644
--- a/src/nodes/lstm.h
+++ b/src/nodes/lstm.h
@@ -52,23 +52,23 @@ class LSTM : public Node {
 
 	float get_activation_alpha( const std::string &a);
 	float get_activation_beta( const std::string &a);
-	const Tensor* get_X(void) const { return inputs[0]; }
-	const Tensor* get_W(void) const { return inputs[1]; }
-	const Tensor* get_R(void) const { return inputs[2]; }
-	const Tensor* get_Y(void) const { return outputs[0]; }
-	const Tensor* get_Y_h(void) const { return outputs[1]; }
-	const Tensor* get_Y_c(void) const { return outputs[2]; }
+	const Tensor* get_X(void) const { return get_input_tensor(0); }
+	const Tensor* get_W(void) const { return get_input_tensor(1); }
+	const Tensor* get_R(void) const { return get_input_tensor(2); }
+	const Tensor* get_Y(void) const { return get_output_tensor(0); }
+	const Tensor* get_Y_h(void) const { return get_output_tensor(1); }
+	const Tensor* get_Y_c(void) const { return get_output_tensor(2); }
 
 	// ONNX allows omitting optional inputs by either:
 	//  - not give them at all
 	//  - named with the empty string
 	const Tensor* get_optional(unsigned N) const
 	{
-		if( inputs.size() <= N )
+		if( get_number_of_inputs() <= N )
 			return nullptr;
-		if( inputs[N]->name == "" )
+		if( get_input_tensor(N)->name == "" )
 			return nullptr;
-		return inputs[N];
+		return get_input_tensor(N);
 	}
 	const Tensor* get_B(void) const { return get_optional(3); }
 	const Tensor* get_sequence_lens(void) const { return get_optional(4); }
diff --git a/src/nodes/matmul.h b/src/nodes/matmul.h
index 2ea18d3..1047493 100644
--- a/src/nodes/matmul.h
+++ b/src/nodes/matmul.h
@@ -9,8 +9,8 @@ class MatMul : public Node {
 
 	virtual void print(std::ostream &dst) const override
 	{
-		Tensor *A = inputs[0];
-		Tensor *B = inputs[1];
+		const Tensor *A = get_input_tensor(0);
+		const Tensor *B = get_input_tensor(1);
 		std::string type = A->data_type_str();
 
 		if( A->data_dim.size() != 2 )
@@ -39,18 +39,17 @@ class MatMul : public Node {
 	} 
 	virtual void resolve(void) override
 	{
-		Tensor *A = inputs[0];
-		Tensor *B = inputs[1];
-		register_input(A, "A");
-		register_input(B, "B");
+		const Tensor *A = get_input_tensor(0);
+		const Tensor *B = get_input_tensor(1);
+		name_input(0, "A");
+		name_input(1, "B");
 		if(  typeConstraint_highPrecisionNumeric(A) == false )
 			ERROR("Incorrect input for MatMul"); 
 		if(  typeConstraint_highPrecisionNumeric(B) == false )
 			ERROR("Incorrect input for MatMul"); 
 
 		int32_t rows, cols;
-		result_dim(inputs, rows, cols);
-	
+		result_dim(rows, cols);
 
 		Tensor *rv = new Tensor;
 		rv->data_dim.push_back(rows);
@@ -59,33 +58,33 @@ class MatMul : public Node {
 		register_output(rv, "Y");
 	}
 
-	void result_dim( const std::vector< Tensor*> &inputs, int32_t &rows, int32_t &cols) const
+	void result_dim( int32_t &rows, int32_t &cols) const
 	{
 		// TODO: this is the check for vectors. Check equivalent for N-dimensons: N>2
-		if( inputs[0]->data_dim[1] != 0 && inputs[1]->data_dim[1] != 0 )
+		if( get_input_tensor(0)->data_dim[1] != 0 && get_input_tensor(1)->data_dim[1] != 0 )
 		{
-			rows = inputs[0]->data_dim[0];
-			cols = inputs[1]->data_dim[1];
+			rows = get_input_tensor(0)->data_dim[0];
+			cols = get_input_tensor(1)->data_dim[1];
 		}
-		else if( inputs[0]->data_dim[1] == 0 && inputs[1]->data_dim[1] == 0 )
+		else if( get_input_tensor(0)->data_dim[1] == 0 && get_input_tensor(1)->data_dim[1] == 0 )
 		{
 			ERROR("Bad input/unhandled: 2 vectors to MatMul");
 		}
-		else if( inputs[0]->data_dim[1] == 0 )
+		else if( get_input_tensor(0)->data_dim[1] == 0 )
 		{
-			cols = inputs[1]->data_dim[1];
-			if( inputs[0]->data_dim[0] == inputs[1]->data_dim[0] )
+			cols = get_input_tensor(1)->data_dim[1];
+			if( get_input_tensor(0)->data_dim[0] == get_input_tensor(1)->data_dim[0] )
 				rows = 1;
 			else
-				rows = inputs[0]->data_dim[0];
+				rows = get_input_tensor(0)->data_dim[0];
 		}
 		else
 		{
-			rows = inputs[0]->data_dim[0];
-			if( inputs[0]->data_dim[1] == inputs[1]->data_dim[0] )
+			rows = get_input_tensor(0)->data_dim[0];
+			if( get_input_tensor(0)->data_dim[1] == get_input_tensor(1)->data_dim[0] )
 				cols = 1;
 			else
-				cols = inputs[1]->data_dim[0];
+				cols = get_input_tensor(1)->data_dim[0];
 		}
 	}
 };
diff --git a/src/nodes/matmulinteger.h b/src/nodes/matmulinteger.h
index eb8cf60..74dd0bd 100644
--- a/src/nodes/matmulinteger.h
+++ b/src/nodes/matmulinteger.h
@@ -19,9 +19,9 @@ class MatMulInteger : public Node {
 
 	virtual void print(std::ostream &dst) const override
 	{
-		Tensor *A = inputs[0];
-		Tensor *B = inputs[1];
-		Tensor *Y = outputs[0];
+		const Tensor *A = get_input_tensor(0);
+		const Tensor *B = get_input_tensor(1);
+		const Tensor *Y = get_output_tensor(0);
 		std::string intype = A->data_type_str();
 		std::string outtype = Y->data_type_str();
 		std::string weighttype = B->data_type_str();
@@ -43,11 +43,11 @@ class MatMulInteger : public Node {
 		if( inner != inner2 )
 			ERROR("MatMulInteger input's inner dimensions don't match");
 
-		if( inputs.size() > 2)
+		if( get_number_of_inputs() > 2)
 			a_zero = "a_zero_point[0]";
 		else
 			a_zero = "0";
-		if( inputs.size() > 3)
+		if( get_number_of_inputs() > 3)
 			b_zero = "b_zero_point[0]";
 		else
 			b_zero = "0";
@@ -88,23 +88,23 @@ class MatMulInteger : public Node {
 
 	virtual void resolve(void) override
 	{
-		register_input(inputs[0], "input_A");
-		register_input(inputs[1], "input_B");
+		name_input(0, "input_A");
+		name_input(1, "input_B");
 
-		if( inputs.size() > 2 ) {
-			register_input(inputs[2], "a_zero_point");
+		if( get_number_of_inputs() > 2 ) {
+			name_input(2, "a_zero_point");
 			/* There is no backend reference test for this case */
-			if( inputs[2]->data_dim[0] != 1 )
+			if( get_input_tensor(2)->data_dim[0] != 1 )
 				ERROR("Unimplemented: 1D zero_point input");
 		}
-		if( inputs.size() > 3 ) {
-			register_input(inputs[3], "b_zero_point");
-			if( inputs[3]->data_dim[0] != 1 )
+		if( get_number_of_inputs() > 3 ) {
+			name_input(3, "b_zero_point");
+			if( get_input_tensor(3)->data_dim[0] != 1 )
 				ERROR("Unimplemented: 1D zero_point input");
 		}
 
 		int32_t rows, cols;
-		result_dim(inputs, rows, cols);
+		result_dim(rows, cols);
 
 		Tensor *rv = new Tensor;
 		rv->data_dim.push_back(rows);
@@ -117,33 +117,33 @@ class MatMulInteger : public Node {
 		register_output(rv, "output_Y");
 	}
 
-	void result_dim( const std::vector< Tensor*> &inputs, int32_t &rows, int32_t &cols) const
+	void result_dim( int32_t &rows, int32_t &cols) const
 	{
 		// TODO: this is the check for vectors. Check equivalent for N-dimensons: N>2
-		if( inputs[0]->data_dim[1] != 0 && inputs[1]->data_dim[1] != 0 )
+		if( get_input_tensor(0)->data_dim[1] != 0 && get_input_tensor(1)->data_dim[1] != 0 )
 		{
-			rows = inputs[0]->data_dim[0];
-			cols = inputs[1]->data_dim[1];
+			rows = get_input_tensor(0)->data_dim[0];
+			cols = get_input_tensor(1)->data_dim[1];
 		}
-		else if( inputs[0]->data_dim[1] == 0 && inputs[1]->data_dim[1] == 0 )
+		else if( get_input_tensor(0)->data_dim[1] == 0 && get_input_tensor(1)->data_dim[1] == 0 )
 		{
 			ERROR("Bad input/unhandled: 2 vectors to MatMulInteger");
 		}
-		else if( inputs[0]->data_dim[1] == 0 )
+		else if( get_input_tensor(0)->data_dim[1] == 0 )
 		{
-			cols = inputs[1]->data_dim[1];
-			if( inputs[0]->data_dim[0] == inputs[1]->data_dim[0] )
+			cols = get_input_tensor(1)->data_dim[1];
+			if( get_input_tensor(0)->data_dim[0] == get_input_tensor(1)->data_dim[0] )
 				rows = 1;
 			else
-				rows = inputs[0]->data_dim[0];
+				rows = get_input_tensor(0)->data_dim[0];
 		}
 		else
 		{
-			rows = inputs[0]->data_dim[0];
-			if( inputs[0]->data_dim[1] == inputs[1]->data_dim[0] )
+			rows = get_input_tensor(0)->data_dim[0];
+			if( get_input_tensor(0)->data_dim[1] == get_input_tensor(1)->data_dim[0] )
 				cols = 1;
 			else
-				cols = inputs[1]->data_dim[0];
+				cols = get_input_tensor(1)->data_dim[0];
 		}
 	}
 };
diff --git a/src/nodes/maxpool.h b/src/nodes/maxpool.h
index 12abb24..ccebb6d 100644
--- a/src/nodes/maxpool.h
+++ b/src/nodes/maxpool.h
@@ -92,7 +92,7 @@ class MaxPool : public Pooling {
 
 	virtual void resolve(void) override
 	{
-		register_input(inputs[0], "x");
+		name_input(0, "x");
 
 		resolve_strides();
 		resolve_dilations();
@@ -117,8 +117,8 @@ class MaxPool : public Pooling {
 		register_output(indices_out, "Indices");
 	}
 	const Tensor* get_Indices(void) const {
-		if( outputs[1]->name != "" )
-			return outputs[1];
+		if( get_output_tensor(1)->name != "" )
+			return get_output_tensor(1);
 		else
 			return nullptr;
 	}
diff --git a/src/nodes/pad.cc b/src/nodes/pad.cc
index b2374d5..a20b8d6 100644
--- a/src/nodes/pad.cc
+++ b/src/nodes/pad.cc
@@ -26,19 +26,19 @@ void Pad::parseAttributes( onnx::NodeProto &node )
 /* Assign input tensors, resolve output tensor shapes, allocate output tensors */
 void Pad::resolve(void)
 {
-	const Tensor *data = inputs[0];
-	register_input(inputs[0], "data");
+	const Tensor *data = get_input_tensor(0);
+	name_input(0, "data");
 
 	const Tensor *pads_tensor = nullptr;
-	if (inputs.size() > 1) {
-		pads_tensor = inputs[1];
-		register_input(pads_tensor, "pads");
+	if (get_number_of_inputs() > 1) {
+		pads_tensor = get_input_tensor(1);
+		name_input(1, "pads");
 	}
 	const Tensor *constant_value= nullptr;
-	if (inputs.size() > 2) {
+	if (get_number_of_inputs() > 2) {
 		// This is not a tensor but a scalar. Not sure how to handle - first scalar in onnx2c :)
-		constant_value = inputs[2];
-		register_input(constant_value, "constant_value");
+		constant_value = get_input_tensor(2);
+		name_input(2, "constant_value");
 	}
 
 	if (pads_tensor && pads_tensor->isConst == false)
@@ -109,8 +109,8 @@ void Pad::print(std::ostream &dst) const
 	INDT_1 << " * mode: " << mode << std::endl;
 	INDT_1 << " */" << std::endl;
 
-	const Tensor *data = inputs[0];
-	const Tensor *output = outputs[0];
+	const Tensor *data = get_input_tensor(0);
+	const Tensor *output = get_output_tensor(0);
 
 	std::string iidxs = "";
 	std::string oidxs = "";
diff --git a/src/nodes/range.h b/src/nodes/range.h
index 00d01b5..5c1efa0 100644
--- a/src/nodes/range.h
+++ b/src/nodes/range.h
@@ -27,9 +27,9 @@ class Range : public Node {
 	void resolve_limits()
 	{
 		data_type v_start, v_limit, v_delta;
-		const Tensor *start = inputs[0];
-		const Tensor *limit = inputs[1];
-		const Tensor *delta = inputs[2];
+		const Tensor *start = get_input_tensor(0);
+		const Tensor *limit = get_input_tensor(1);
+		const Tensor *delta = get_input_tensor(2);
 
 		v_start = resolve_input_var<data_type>(start);
 		v_limit = resolve_input_var<data_type>(limit);
@@ -42,15 +42,14 @@ class Range : public Node {
 	/* Assign input tensors, resolve output tensor shapes, allocate output tensors */
 	virtual void resolve(void) override
 	{
-
-		if (inputs.size() != 3)
+		if (get_number_of_inputs() != 3)
 			ERROR("Range node does not have 3 inputs");
-		const Tensor *start = inputs[0];
-		const Tensor *limit = inputs[1];
-		const Tensor *delta = inputs[2];
-		register_input(start, "start_arg");
-		register_input(limit, "limit_arg");
-		register_input(delta, "delta_arg");
+		const Tensor *start = get_input_tensor(0);
+		const Tensor *limit = get_input_tensor(1);
+		const Tensor *delta = get_input_tensor(2);
+		name_input(0, "start_arg");
+		name_input(1, "limit_arg");
+		name_input(2, "delta_arg");
 
 		if( start->isConst == false )
 			ERROR("Unimplemented: non-constant input (start) to Range node");
@@ -84,13 +83,9 @@ class Range : public Node {
 	/* Body of the node implementing function */
 	virtual void print(std::ostream &dst) const override
 	{
-		const Tensor *start = inputs[0];
+		const Tensor *start = get_input_tensor(0);
 		std::string dt = start->data_type_str();
 
-		INDT_1 << "/* Range" << std::endl;
-		INDT_1 << " */" << std::endl;
-
-
 		INDT_1 << dt <<" start = start_arg[0];" << std::endl;
 		INDT_1 << dt <<" delta = delta_arg[0];" << std::endl;
 		INDT_1 << "for(int i=0; i< "<< output_size << "; ++i) {" << std::endl;
diff --git a/src/nodes/relu.h b/src/nodes/relu.h
index 12dd7f7..69c4689 100644
--- a/src/nodes/relu.h
+++ b/src/nodes/relu.h
@@ -10,7 +10,7 @@ class Relu : public Node {
 
 	virtual void print(std::ostream &dst) const override
 	{
-		const Tensor *X=inputs[0];
+		const Tensor *X=get_input_tensor(0);
 		std::string type = X->data_type_str();
 
 		dst << "\t/*Relu*/" << std::endl;
@@ -25,8 +25,8 @@ class Relu : public Node {
 
 	virtual void resolve(void) override
 	{
-		const Tensor *X = inputs[0];
-		register_input(X, "X");
+		const Tensor *X = get_input_tensor(0);
+		name_input(0, "X");
 		if((  typeConstraint_allFloatingPoints(X)
 		    ||typeConstraint_signed_integers(X)   ) == false )
 			ERROR("Incorrect input for Relu"); 
diff --git a/src/nodes/reshape.h b/src/nodes/reshape.h
index 6c3044b..127fe37 100644
--- a/src/nodes/reshape.h
+++ b/src/nodes/reshape.h
@@ -24,7 +24,7 @@ class Reshape : public Node {
 
 	virtual void print(std::ostream &dst) const override
 	{
-		const Tensor *data = inputs[0];
+		const Tensor *data = get_input_tensor(0);
 		std::string type = data->data_type_str();
 
 		/* TODO: is there ANY case where a reshape needs to re-order the internal data layout ? */
@@ -44,10 +44,10 @@ class Reshape : public Node {
 
 	virtual void resolve(void) override
 	{
-		const Tensor *data= inputs[0];
-		register_input(data, "data");
-		const Tensor *shape = inputs[1];
-		register_input(shape, "shape");
+		const Tensor *data= get_input_tensor(0);
+		name_input(0, "data");
+		const Tensor *shape = get_input_tensor(1);
+		name_input(1, "shape");
 
 		/* Reshape should allow only int64_t here,
 		 * but that is a pointless restriction at this stage and does not play well
@@ -57,7 +57,7 @@ class Reshape : public Node {
 			ERROR("Incorrect input for node");
 
 
-		if( shape->initialize == false ) {
+		if( shape->isConst == false ) {
 			ERROR("Reshaping to a run-time defined shape is not supported");
 		}
 
diff --git a/src/nodes/resize.h b/src/nodes/resize.h
index df0ddd3..b882924 100644
--- a/src/nodes/resize.h
+++ b/src/nodes/resize.h
@@ -51,22 +51,22 @@ class Resize : public Node {
 
 	const Tensor *get_roi(void) const
 	{
-		if(inputs.size() > 1 && inputs[1]->is_used() )
-			return inputs[1];
+		if(get_number_of_inputs() > 1 && get_input_tensor(1)->is_used() )
+			return get_input_tensor(1);
 		else
 			return nullptr;
 	}
 	const Tensor *get_scales(void) const
 	{
-		if(inputs.size() > 2 && inputs[2]->is_used() )
-			return inputs[2];
+		if(get_number_of_inputs() > 2 && get_input_tensor(2)->is_used() )
+			return get_input_tensor(2);
 		else
 			return nullptr;
 	}
 	const Tensor *get_sizes(void) const
 	{
-		if(inputs.size() > 3)
-			return inputs[3];
+		if(get_number_of_inputs() > 3)
+			return get_input_tensor(3);
 		else
 			return nullptr;
 	}
@@ -75,18 +75,18 @@ class Resize : public Node {
 	/* Assign input tensors, resolve output tensor shapes, allocate output tensors */
 	virtual void resolve(void) override
 	{
-		const Tensor *X = inputs[0];
+		const Tensor *X = get_input_tensor(0);
 		const Tensor *roi = get_roi();
 		const Tensor *scales = get_scales();
 		const Tensor *sizes = get_sizes();
 
-		register_input(X, "X");
+		name_input(0, "X");
 		if (roi)
-			register_input(roi, "roi");
+			name_input(1, "roi");
 		if (scales)
-			register_input(scales, "scales");
+			name_input(2, "scales");
 		if (sizes)
-			register_input(sizes, "sizes");
+			name_input(3, "sizes");
 
 		// "One of 'scales' and 'sizes' MUST be specified and it is an error if both are specified."
 		if (scales == NULL && sizes == NULL)
@@ -129,8 +129,8 @@ class Resize : public Node {
 	/* Print the coordinate transform algorithm, without integer roundings. */
 	std::string coordinate_transformation( int dim, std::string y_coordinate) const
 	{
-		const Tensor *X = inputs[0];
-		const Tensor *Y = outputs[0];
+		const Tensor *X = get_input_tensor(0);
+		const Tensor *Y = get_output_tensor(0);
 		std::string scale = std::to_string(dim_scales[dim]);
 		std::string x_dimsize = std::to_string(X->data_dim[dim]);
 		std::string y_dimsize = std::to_string(Y->data_dim[dim]);
@@ -176,7 +176,7 @@ class Resize : public Node {
 	/* For the mode 'nearest', calculate the rounding of x_resized to indexes */
 	std::string x_coord_nearest( int dim) const
 	{
-		const Tensor *X = inputs[0];
+		const Tensor *X = get_input_tensor(0);
 		std::string x_dimsize = std::to_string(X->data_dim[dim]);
 		std::string x_resized = "x_orig_" + std::to_string(dim);
 		// Apply rounding
@@ -209,7 +209,7 @@ class Resize : public Node {
 
 	void print_calc_nearest(std::ostream &dst) const
 	{
-		const Tensor *Y = outputs[0];
+		const Tensor *Y = get_output_tensor(0);
 		std::string out = "Y";
 		std::string in = "X";
 		unsigned n_data_dims = Y->rank();
@@ -223,8 +223,8 @@ class Resize : public Node {
 	
 	void print_calc_linear(std::ostream &dst) const
 	{
-		const Tensor *X = inputs[0];
-		const Tensor *Y = outputs[0];
+		const Tensor *X = get_input_tensor(0);
+		const Tensor *Y = get_output_tensor(0);
 		std::string out = "Y";
 		std::string in = "X";
 		std::vector<int> interpolate_dims;
@@ -344,7 +344,7 @@ class Resize : public Node {
 			INDT_1 << " * " << s << std::endl;
 		INDT_1 << " */" << std::endl;
 
-		const Tensor *Y = outputs[0];
+		const Tensor *Y = get_output_tensor(0);
 		unsigned n_data_dims = Y->rank();
 
 		// loop over output
diff --git a/src/nodes/scatternd.cc b/src/nodes/scatternd.cc
index e28b9bf..7e0a156 100644
--- a/src/nodes/scatternd.cc
+++ b/src/nodes/scatternd.cc
@@ -22,16 +22,14 @@ void ScatterND::parseAttributes( onnx::NodeProto &node )
 
 void ScatterND::resolve(void)
 {
-	if (inputs.size() != 3) {
+	if (get_number_of_inputs() != 3) {
 		ERROR("Wrong number of inputs to ScatterND");
 	}
-	const Tensor *data = inputs[0];
-	const Tensor *indices = inputs[1];
-	const Tensor *updates = inputs[2];
-	register_input(data, "data");
-	register_input(indices, "indices");
-	register_input(updates, "updates");
+	name_input(0, "data");
+	name_input(1, "indices");
+	name_input(2, "updates");
 
+	const Tensor *data = get_input_tensor(0);
 	Tensor *t = new Tensor;
 	t->data_dim = data->data_dim;
 	t->data_type = data->data_type;
@@ -41,9 +39,9 @@ void ScatterND::resolve(void)
 
 void ScatterND::print(std::ostream &dst) const
 {
-	const Tensor *data = inputs[0];
-	const Tensor *indices = inputs[1];
-	const Tensor *output = outputs[0];
+	const Tensor *data = get_input_tensor(0);
+	const Tensor *indices = get_input_tensor(1);
+	const Tensor *output = get_output_tensor(0);
 
 	unsigned k = indices->data_dim[indices->rank()-1];
 	std::string data_op="=";
diff --git a/src/nodes/shape.h b/src/nodes/shape.h
index a76cb2b..062a072 100644
--- a/src/nodes/shape.h
+++ b/src/nodes/shape.h
@@ -13,8 +13,8 @@ class Shape : public Node {
 
 	virtual void resolve(void) override
 	{
-		const Tensor *data = inputs[0];
-		register_input(data, "data");
+		const Tensor *data = get_input_tensor(0);
+		name_input(0, "data");
 
 		Tensor *t = new Tensor;
 		t->data_dim.push_back(data->rank());
@@ -33,10 +33,8 @@ class Shape : public Node {
 
 	virtual void print(std::ostream &dst) const override
 	{
-		const Tensor *data = inputs[0];
-		const Tensor *output= outputs[0];
-
-		INDT_1 << "/* Shape */" << std::endl;
+		const Tensor *data = get_input_tensor(0);
+		const Tensor *output= get_output_tensor(0);
 
 		// In the odd case of the shape result being a graph output, print it.
 		// Othervise those nodes that take Shape output have already
diff --git a/src/nodes/slice.h b/src/nodes/slice.h
index 63291bd..5582942 100644
--- a/src/nodes/slice.h
+++ b/src/nodes/slice.h
@@ -41,20 +41,20 @@ class Slice : public Node {
 
 	virtual void resolve(void) override
 	{
-		const Tensor *data = inputs[0];
+		const Tensor *data = get_input_tensor(0);
 		const Tensor *starts = nullptr;
 		const Tensor *ends= nullptr;
 		const Tensor *axes= nullptr;
 		const Tensor *steps= nullptr;
-		register_input(data, "data");
+		name_input(0, "data");
 
-		if (inputs.size() > 1) {
-			starts = inputs[1];
-			register_input(starts, "starts");
+		if (get_number_of_inputs() > 1) {
+			starts = get_input_tensor(1);
+			name_input(1, "starts");
 		}
-		if (inputs.size() > 2) {
-			ends = inputs[2];
-			register_input(ends, "ends");
+		if (get_number_of_inputs() > 2) {
+			ends = get_input_tensor(2);
+			name_input(2, "ends");
 		}
 
 		if( starts && starts->isConst == false )
@@ -62,13 +62,13 @@ class Slice : public Node {
 		if( ends && ends->isConst == false )
 			ERROR("Non-const inputs to Slice not handled");
 
-		if (inputs.size() > 3) {
-			axes = inputs[3];
-			register_input(axes, "axes");
+		if (get_number_of_inputs() > 3) {
+			axes = get_input_tensor(3);
+			name_input(3, "axes");
 		}
-		if (inputs.size() > 4) {
-			steps = inputs[4];
-			register_input(steps, "steps");
+		if (get_number_of_inputs() > 4) {
+			steps = get_input_tensor(4);
+			name_input(4, "steps");
 		}
 
 		// the output tensor
@@ -203,10 +203,9 @@ class Slice : public Node {
 	/* Body of the node implementing function */
 	virtual void print(std::ostream &dst) const override
 	{
-		const Tensor *data= inputs[0];
-		const Tensor *output = outputs[0];
+		const Tensor *data= get_input_tensor(0);
+		const Tensor *output = get_output_tensor(0);
 
-		INDT_1 << "/* Slice */" << std::endl;
 		std::string out_idx, in_idx;
 
 		// Loop over output dimensions & create the indexing arrays
diff --git a/src/nodes/softmax.h b/src/nodes/softmax.h
index cb55cb9..b53345c 100644
--- a/src/nodes/softmax.h
+++ b/src/nodes/softmax.h
@@ -60,7 +60,7 @@ class Softmax : public Node {
 
 	void print11(std::ostream &dst) const
 	{
-		const Tensor *input=inputs[0];
+		const Tensor *input=get_input_tensor(0);
 		std::string type = input->data_type_str();
 		unsigned n_dim = input->data_dim.size();
 		std::string expfunc = "expf";
@@ -131,7 +131,7 @@ class Softmax : public Node {
 
 	void print13(std::ostream &dst) const
 	{
-		const Tensor *input=inputs[0];
+		const Tensor *input=get_input_tensor(0);
 
 		std::string type = input->data_type_str();
 		unsigned num_dim = input->rank();
@@ -198,14 +198,14 @@ class Softmax : public Node {
 
 	virtual void resolve(void) override
 	{
-		if( inputs.size() != 1 )
+		if( get_number_of_inputs() != 1 )
 			ERROR("wrong number of inputs to Softmax");
 
-		register_input(inputs[0], "input");
+		name_input(0, "input");
 
 		Tensor *rv = new Tensor;
-		rv->data_dim = inputs[0]->data_dim;
-		rv->data_type = inputs[0]->data_type;
+		rv->data_dim = get_input_tensor(0)->data_dim;
+		rv->data_type = get_input_tensor(0)->data_type;
 		register_output(rv, "output");
 	}
 };
diff --git a/src/nodes/spatialfilter.h b/src/nodes/spatialfilter.h
index 96bd35a..afb8740 100644
--- a/src/nodes/spatialfilter.h
+++ b/src/nodes/spatialfilter.h
@@ -28,14 +28,14 @@ class SpatialFilter : public Node {
 	std::vector<int64_t> pads;
 	std::vector<int64_t> strides;
 
-	const Tensor* get_X(void) const { return inputs[0]; }
+	const Tensor* get_X(void) const { return get_input_tensor(0); }
 	const Tensor* get_W(void) const {
-		if( inputs.size() > 1 )
-			return inputs[1];
+		if( get_number_of_inputs() > 1 )
+			return get_input_tensor(1);
 		else
 			return nullptr;
 	}
-	const Tensor* get_Y(void) const { return outputs[0]; }
+	const Tensor* get_Y(void) const { return get_output_tensor(0); }
 	uint32_t get_numDataDim(void) const {return get_X()->rank() - 2; }
 
 	virtual void parseAttributes( onnx::NodeProto &node ) override {
diff --git a/src/nodes/squeeze.h b/src/nodes/squeeze.h
index d3c5622..a60b413 100644
--- a/src/nodes/squeeze.h
+++ b/src/nodes/squeeze.h
@@ -26,7 +26,7 @@ class Squeeze : public Node {
 
 	virtual void print(std::ostream &dst) const override
 	{
-		const Tensor *data = inputs[0];
+		const Tensor *data = get_input_tensor(0);
 		std::string type = data->data_type_str();
 
 		dst << "\t/*Squeeze*/" << std::endl;
@@ -41,12 +41,12 @@ class Squeeze : public Node {
  
 	virtual void resolve(void) override
 	{
-		const Tensor *data = inputs[0];
-		register_input(data, "input");
-		if (inputs.size() == 2) {
-			const Tensor *axes_tensor = inputs[1];
-			register_input(axes_tensor, "axes_tensor");
-			if (axes_tensor->initialize == false)
+		const Tensor *data = get_input_tensor(0);
+		name_input(0, "input");
+		if (get_number_of_inputs() == 2) {
+			const Tensor *axes_tensor = get_input_tensor(1);
+			name_input(1, "axes_tensor");
+			if (axes_tensor->isConst == false)
 				ERROR("provided axes are dynamic, not implmeneted");
 			for( unsigned i=0; (int)i<axes_tensor->data_num_elem(); i++) {
 				int64_t *rd = (int64_t*)axes_tensor->data_buffer;  // axes data must be int64
diff --git a/src/nodes/transpose.h b/src/nodes/transpose.h
index 58ddbc6..5872047 100644
--- a/src/nodes/transpose.h
+++ b/src/nodes/transpose.h
@@ -29,7 +29,7 @@ class Transpose : public Node {
 
 	virtual void print(std::ostream &dst) const override
 	{
-		const Tensor *data = inputs[0];
+		const Tensor *data = get_input_tensor(0);
 		std::string type = data->data_type_str();
 		unsigned n_dim = data->data_dim.size();
 
@@ -68,11 +68,11 @@ class Transpose : public Node {
 
 	virtual void resolve(void) override
 	{
-		if( inputs.size() != 1 )
+		if( get_number_of_inputs() != 1 )
 			ERROR("wrong number of inputs to Transpose");
 
-		const Tensor *data = inputs[0];
-		register_input(data, "input");
+		const Tensor *data = get_input_tensor(0);
+		name_input(0, "input");
 		unsigned n_dim = data->data_dim.size();
 
 		// "By default, reverse the dimensions, otherwise permute the axes according to the values given."
diff --git a/src/nodes/unsqueeze.h b/src/nodes/unsqueeze.h
index c30de17..3d2f9c6 100644
--- a/src/nodes/unsqueeze.h
+++ b/src/nodes/unsqueeze.h
@@ -30,7 +30,7 @@ class Unsqueeze : public Node {
 	/* Body of the node implementing function */
 	virtual void print(std::ostream &dst) const override
 	{
-		const Tensor *data = inputs[0];
+		const Tensor *data = get_input_tensor(0);
 		std::string type = data->data_type_str();
 
 		dst << "\t/* Unsqueeze */" << std::endl;
@@ -49,8 +49,8 @@ class Unsqueeze : public Node {
 	/* Assign input tensors, resolve output tensor shapes, allocate output tensors */
 	virtual void resolve(void) override
 	{
-		const Tensor *data = inputs[0];
-		register_input(data, "input");
+		const Tensor *data = get_input_tensor(0);
+		name_input(0, "input");
 
 		// ONNX13 changed how axes were passed (but not the contents).
 		// if axes_attr is set, then the padding axes are passed as attribute
@@ -59,11 +59,11 @@ class Unsqueeze : public Node {
 		// After this, the axes_attr contains the (raw) axes data in either case.
 		// TODO: since axes is now an input tensor - can the contents be dynamic??
 		if (axes_attr.size() == 0 ) {
-			if( inputs.size() != 2 )
+			if( get_number_of_inputs() != 2 )
 				ERROR("axes not provided. Malformatted ONNX?");
-			const Tensor *axes_tensor = inputs[1];
-			register_input(axes_tensor, "axes_tensor");
-			if (axes_tensor->initialize == false)
+			const Tensor *axes_tensor = get_input_tensor(1);
+			name_input(1, "axes_tensor");
+			if (axes_tensor->isConst == false)
 				ERROR("provided axes are dynamic, not implmeneted");
 			for( unsigned i=0; (int)i<axes_tensor->data_num_elem(); i++) {
 				int64_t *rd = (int64_t*)axes_tensor->data_buffer;  // axes data must be int64
diff --git a/src/nodes/upsample.h b/src/nodes/upsample.h
index 63730c0..6fd3b3b 100644
--- a/src/nodes/upsample.h
+++ b/src/nodes/upsample.h
@@ -15,10 +15,10 @@ class Upsample : public Resize {
 
 	virtual void resolve(void) override
 	{
-		const Tensor *X = inputs[0];
-		const Tensor *scales = inputs[1];
-		register_input(X, "X");
-		register_input(scales, "scales");
+		const Tensor *X = get_input_tensor(0);
+		const Tensor *scales = get_input_tensor(1);
+		name_input(0, "X");
+		name_input(1, "scales");
 
 		if( scales->isConst == false )
 			ERROR("Unimplemented: Upsample 'sizes' input is not a compile-time constant: " + scales->name);
diff --git a/src/optimization_passes/unionize_tensors.cpp b/src/optimization_passes/unionize_tensors.cpp
index 623324b..f16fdc8 100644
--- a/src/optimization_passes/unionize_tensors.cpp
+++ b/src/optimization_passes/unionize_tensors.cpp
@@ -28,35 +28,49 @@ uint32_t Graph::add_to_free_union(Tensor *t)
 }
 void Graph::mark_union_unoccupied(uint32_t u)
 {
+	LOG(TRACE) << "\tunion " << u << " is unoccupied" << std::endl;
 	tensor_unions[u]=NULL;
 }
-// tag intermediate (graph internal) tensors
+
+// Entry to the Unionize Tensors optimization pass.
+// This tags intermediate (graph internal) tensors
 // with union numbers so they can share memory
 // in a temporal fashion
 void Graph::unionize_tensors(void)
 {
+	LOG(INFO) << "Running Unionize optimization pass" << std::endl;
 	for( auto n : nodes ) {
 		n->isResolved = false;
 	}
 
 	for( auto n : nodes ) {
 
-		// for each output tensor of node
-		for( auto o : n->outputs ) {
-			// assign tensor to next free union
-			// if it is an internal tensor that gets
-			// calculated by a node.
-			if( o->is_used() == false )
-				continue;
-			if( o->isIO == true )
-				continue;
-			// the Constant node is a bit weird - this check must be in
-			if( o->isConst == true )
-				continue;
-			if( o->initialize == true )
-				continue;
-			add_to_free_union(o);
-		}
+		LOG(TRACE) << "\tunionizing outputs of node: " << n->onnx_name << std::endl;
+		// TODO: research out a nice code layout rule for calling lambdas.
+		//       Leaving this suggestion here to be analyzed next time I read this code
+		n->forEachOutput(
+			[this](Tensor *o)
+			{
+				LOG(TRACE) << "\t\tconsidering output: " << o->name << std::endl;
+				LOG(TRACE) << "\t\t\t" << o->print_trace_dump() << std::endl;
+				// assign tensor to next free union
+				// if it is an internal tensor that gets
+				// calculated by a node.
+				if( o->is_used() == false )
+					return;
+				if( o->isIO == true )
+					return;
+				// the Constant node is a bit weird - this check must be in
+				if( o->isConst == true )
+					return;
+				if( o->initialize == true )
+					return;
+				LOG(TRACE) << "\t\t\tunionizing it!" << std::endl;
+				this->add_to_free_union(o);
+				return;
+			}
+		);
+
 		// mark node as resolved
 		n->isResolved = true;
 
@@ -77,5 +91,7 @@ void Graph::unionize_tensors(void)
 				mark_union_unoccupied(ui);
 		}
 	}
+
+	LOG(TRACE) << "Unionize optimization pass finished" << std::endl;
 }
 
diff --git a/src/tensor.cc b/src/tensor.cc
index 10692bb..0de250a 100644
--- a/src/tensor.cc
+++ b/src/tensor.cc
@@ -339,6 +339,8 @@ void Tensor::print_tensor_initializer(std::ostream &dst, int dim, int offs) cons
 
 void Tensor::print_tensor(std::ostream &dst, bool is_callsite, std::string alternate_name, bool as_const) const
 {
+	// TODO: dupe code. Call print_tesor(string, bool, bool)!
+
 	if( is_callsite == false ) {
 		if( isConst || as_const )
 			dst << "const ";
diff --git a/src/util.cc b/src/util.cc
index f309c93..aea0ae9 100644
--- a/src/util.cc
+++ b/src/util.cc
@@ -155,3 +155,23 @@ bool isInt(onnx::TensorProto_DataType data_type)
 	     ||data_type == onnx::TensorProto_DataType_INT64;
 }
 
+
+void print_loops_over_dims(std::ostream &dst, const toC::Tensor *t, std::string prefix, unsigned indents)
+{
+	for( unsigned dim=0; dim<t->rank(); dim++) {
+		std::string loopvar = prefix + std::to_string(dim);
+		for(unsigned i=0; i<indents; i++)
+			dst << "\t";
+		dst << "for( uint32_t " << loopvar << "=0; ";
+		dst <<       loopvar << " < " << t->data_dim[dim] << "; ";
+		dst <<       loopvar << "++) {"<< std::endl;
+	}
+}
+void print_loop_closes_over_dims(std::ostream &dst, const toC::Tensor *t, unsigned indents)
+{
+	for( unsigned dim=0; dim<t->rank(); dim++) {
+		for(unsigned i=0; i<indents; i++)
+			dst << "\t";
+		dst << "}"<< std::endl;
+	}
+}
diff --git a/src/util.h b/src/util.h
index abe8912..be51759 100644
--- a/src/util.h
+++ b/src/util.h
@@ -41,3 +41,11 @@ std::string cast_to_ndim_arrayptr(const toC::Tensor *t, const std::string shortn
 bool isFloat(onnx::TensorProto_DataType data_type);
 // is data_type any sort of integer type
 bool isInt(onnx::TensorProto_DataType data_type);
+
+// print the start over a loopnest
+// for( uint32_t d0=0; d0 < dim0size; d0++) {
+// for( uint32_t d1=0; d1 < dim1size; d1++) {
+void print_loops_over_dims(std::ostream &dst, const toC::Tensor *, std::string prefix, unsigned num_indents);
+// and the same for the loop closes
+void print_loop_closes_over_dims(std::ostream &dst, const toC::Tensor *t, unsigned indents);
+
diff --git a/test/onnx_backend_tests_generator.cc b/test/onnx_backend_tests_generator.cc
index 3a1fb03..93291e5 100644
--- a/test/onnx_backend_tests_generator.cc
+++ b/test/onnx_backend_tests_generator.cc
@@ -96,7 +96,11 @@ int main(int argc, char *argv[])
 		if( t == NULL )
 			break;
 		t->isIO = true;
-		t->initialize = true;
+		// Don't write the initialization from the onnx2c graph
+		// It is written from the test suite, which is part of "the application",
+		// not the neural net
+		t->initialize = false;
+		t->generate = false;
 		t->isConst=true;
 		if( t->name == "" )
 			t->name = std::string("input_") + std::to_string(input_number);
@@ -176,15 +180,17 @@ int main(int argc, char *argv[])
 #endif
 
 	for( auto i : inputs) {
+		std::string refname = "graphin_" + i->cname();
 		std::cout << "static ";
-		i->print_tensor(std::cout, false, i->cname());
+		i->print_tensor(std::cout, false, refname);
 		std::cout << " = ";
 		i->print_tensor_initializer(std::cout);
 		std::cout << ";" << std::endl;
 	}
 	for( auto o : outputs) {
+		std::string refname = "graphout_" + o->cname();
 		std::cout << "static ";
-		o->print_tensor(std::cout, false, o->cname());
+		o->print_tensor(std::cout, false, refname);
 		std::cout << ";" << std::endl;
 	}
 	// print the reference tensors
@@ -204,18 +210,14 @@ int main(int argc, char *argv[])
 	std::cout << "\t"<<  "entry(";
 	bool isfirst = true;
 	for( auto i : inputs) {
-//		if( i-> isAliasOf )
-//			continue;
 		if( isfirst ) isfirst=false;
 		else          std::cout << ", ";
-		std::cout << i->cname();
+		std::cout << "graphin_" + i->cname();
 	}
 	for( auto r : outputs ) {
-//		if( r->isAliasOf )
-//			continue;
 		if( isfirst ) isfirst=false;
 		else          std::cout << ", ";
-		std::cout << r->cname();
+		std::cout << "graphout_"+r->cname();
 	}
 	std::cout << ");" << std::endl;
 	std::cout << std::endl;
@@ -227,7 +229,7 @@ int main(int argc, char *argv[])
 		Tensor *r = references[i];
 		Tensor *o = outputs[i];
 		//std::string outname = o->isAliasOf? o->isAliasOf->cname() : o->cname();
-		std::string outname = o->cname();
+		std::string outname = "graphout_" + o->cname();
 		std::string refname = "reference_" + r->cname();
 		std::string type = r->data_type_str();