Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

memory optimization for vgg and resnet in image classification demo #7385

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions paddle/framework/executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id,
auto op = paddle::framework::OpRegistry::CreateOp(*op_desc);
VLOG(3) << op->DebugStringEx(local_scope);
op->Run(*local_scope, place_);
LOG(INFO) << "Memory used " << memory::memory_usage(place_);
if (FLAGS_check_nan_inf) {
for (auto& vname : op->OutputVars(true)) {
auto* var = local_scope->FindVar(vname);
Expand All @@ -127,9 +128,12 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id,
}
}
}
LOG(INFO) << "Memory used " << memory::memory_usage(place_);
if (create_vars && create_local_scope) {
scope->DeleteScope(local_scope);
}
LOG(INFO) << "Memory used after deleting local scope "
<< memory::memory_usage(place_);
}

} // namespace framework
Expand Down
3 changes: 2 additions & 1 deletion paddle/framework/scope.cc
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,9 @@ void Scope::DeleteScope(Scope* scope) {
auto it = std::find(this->kids_.begin(), this->kids_.end(), scope);
PADDLE_ENFORCE(it != this->kids_.end(), "Cannot find %p as kid scope", scope);
this->kids_.erase(it);
delete scope;
// Make delete async.
Async([scope] { delete scope; });
// Async([scope] { delete scope; });
}

void Scope::Rename(const std::string& origin_name,
Expand Down
3 changes: 3 additions & 0 deletions python/paddle/v2/fluid/framework.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,9 @@ def to_string(self, throw_on_error):

__repr__ = __str__

def set_desc(self, input):
self.desc = input

@property
def persistable(self):
return self.desc.persistable()
Expand Down
17 changes: 16 additions & 1 deletion python/paddle/v2/fluid/layers/nn.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
'chunk_eval', 'sequence_conv', 'conv2d', 'sequence_pool', 'pool2d',
'batch_norm', 'beam_search_decode', 'conv2d_transpose', 'sequence_expand',
'lstm_unit', 'reduce_sum', 'reduce_mean', 'reduce_max', 'reduce_min',
'sequence_first_step', 'sequence_last_step'
'sequence_first_step', 'sequence_last_step', 'dropout'
]


Expand Down Expand Up @@ -386,6 +386,21 @@ def cos_sim(X, Y, **kwargs):
return out


def dropout(x, dropout_prob, is_test=False, seed=0, **kwargs):
helper = LayerHelper('dropout', **kwargs)
out = helper.create_tmp_variable(dtype=x.dtype)
mask = helper.create_tmp_variable(dtype=x.dtype, stop_gradient=True)
helper.append_op(
type='dropout',
inputs={'X': [x]},
outputs={'Out': [out],
'Mask': [mask]},
attrs={'dropout_prob': dropout_prob,
'is_test': is_test,
'seed': seed})
return out


def cross_entropy(input, label, **kwargs):
"""
**Cross Entropy Layer**
Expand Down
1 change: 0 additions & 1 deletion python/paddle/v2/fluid/layers/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
__all__ = [
'mean',
'mul',
'dropout',
'reshape',
'scale',
'transpose',
Expand Down
77 changes: 59 additions & 18 deletions python/paddle/v2/fluid/memory_optimization_transpiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,17 @@
from framework import Program, default_main_program, Parameter, Variable
import backward
from backward import _rename_arg_
from . import core

dtype_to_size = {
core.DataType.FP16: 2,
core.DataType.FP32: 4,
core.DataType.FP64: 8,
core.DataType.INT16: 2,
core.DataType.INT32: 4,
core.DataType.INT64: 8,
core.DataType.BOOL: 1
}


class ControlFlowGraph(object):
Expand All @@ -28,18 +39,33 @@ def _build_graph(self):
block_size = program_desc.num_blocks()

# TODO(qijun) handle Program with if/while operators
self.global_block = program_desc.block(0)
self.op_size = self.global_block.op_size()
self.global_block_desc = program_desc.block(0)
self.op_size = self.global_block_desc.op_size()

op_node_connections = [(i, i + 1) for i in range(self.op_size - 1)]
self._add_connections(op_node_connections)

self.ops = [self.global_block.op(i) for i in range(self.op_size)]
self.ops = [self.global_block_desc.op(i) for i in range(self.op_size)]

for i in range(self.op_size):
self._uses[i].update(self.ops[i].input_arg_names())
self._defs[i].update(self.ops[i].output_arg_names())

def _update_graph(self, old_name, new_name, begin_idx=0):
for i in range(begin_idx, self.op_size):
if old_name in self._uses[i]:
self._uses[i].remove(old_name)
self._uses[i].add(new_name)
if old_name in self._defs[i]:
self._defs[i].remove(old_name)
self._defs[i].add(new_name)
if old_name in self._live_in[i]:
self._live_in[i].remove(old_name)
self._live_out[i].add(new_name)
if old_name in self._live_out[i]:
self._live_out[i].remove(old_name)
self._live_out[i].add(new_name)

def _reach_fixed_point(self, live_in, live_out):
if len(live_in) != len(self._live_in):
return False
Expand Down Expand Up @@ -79,30 +105,45 @@ def memory_optimize(self):
self.pool = []
for i in range(self.op_size):
if self.pool:
out_pair = [(x, self.global_block.var(str(x)).shape())
out_pair = [(x, self.global_block_desc.var(str(x)).shape())
for x in self._defs[i]]
for x, x_shape in out_pair:
for index, cache_pair in enumerate(self.pool):
cache_var = cache_pair[0]
cache_shape = cache_pair[1]
if x_shape == cache_shape:
print(
"Hit Cache !!!! cache pool index is %d, var name is %s, cached var name is %s, var shape is %s "
% (index, x, cache_var, str(cache_shape)))
self.pool.pop(index)
_rename_arg_(self.ops, x, cache_var, begin_idx=i)
self._dataflow_analyze()
break
if not self.global_block_desc.var(str(x)).persistable():
for index, cache_pair in enumerate(self.pool):
cache_var = cache_pair[0]
cache_shape = cache_pair[1]
if x_shape == cache_shape:
x_dtype = self.global_block_desc.var(str(
x)).dtype()
cache_dtype = self.global_block_desc.var(
str(cache_var)).dtype()
# TODO(qijun): actually, we should compare dtype_to_size[x_dtype]
# and dtype_to_size[cache_dtype]
if x_dtype == cache_dtype:
print(
"Hit Cache !!!! cache pool index is %d, var name is %s, cached var name is %s, var shape is %s "
%
(index, x, cache_var, str(cache_shape)))
self.pool.pop(index)
_rename_arg_(
self.ops, x, cache_var, begin_idx=i)
self._program.current_block().var(str(
x)).desc = self.global_block_desc.var(
str(cache_var))
self._update_graph(
x, cache_var, begin_idx=i)
break

in_diff, out_diff = self._get_diff(self._live_in[i],
self._live_out[i])
can_optimize = filter(
lambda x: not self.global_block.var(str(x)).persistable(),
lambda x: not self.global_block_desc.var(str(x)).persistable(),
in_diff)
if can_optimize:
for var_name in can_optimize:
self.pool.append((
var_name, self.global_block.var(str(var_name)).shape()))
self.pool.append(
(var_name,
self.global_block_desc.var(str(var_name)).shape()))

def get_program(self):
return self._program
Expand Down
10 changes: 7 additions & 3 deletions python/paddle/v2/fluid/tests/book/test_fit_a_line.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,13 @@
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
avg_cost = fluid.layers.mean(x=cost)

sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
sgd_optimizer = fluid.optimizer.Adam(learning_rate=0.1)
sgd_optimizer.minimize(avg_cost)

BATCH_SIZE = 20
# memopt_program = fluid.default_main_program()
memopt_program = fluid.memory_optimize(fluid.default_main_program())

BATCH_SIZE = 200

train_reader = paddle.batch(
paddle.reader.shuffle(
Expand All @@ -32,10 +35,11 @@
fluid.io.save_persistables(exe, "./fit_a_line.model/")
fluid.io.load_persistables(exe, "./fit_a_line.model/")
for data in train_reader():
avg_loss_value, = exe.run(fluid.default_main_program(),
avg_loss_value, = exe.run(memopt_program,
feed=feeder.feed(data),
fetch_list=[avg_cost])

print avg_loss_value[0]
if avg_loss_value[0] < 10.0:
exit(0) # if avg cost less than 10.0, we think our code is good.
exit(1)
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,9 @@ def conv_block(input, num_filter, groups, dropouts):

accuracy = fluid.evaluator.Accuracy(input=predict, label=label)

# memopt_program = fluid.default_main_program()
memopt_program = fluid.memory_optimize(fluid.default_main_program())

BATCH_SIZE = 128
PASS_NUM = 1

Expand All @@ -116,15 +119,18 @@ def conv_block(input, num_filter, groups, dropouts):
feeder = fluid.DataFeeder(place=place, feed_list=[images, label])
exe.run(fluid.default_startup_program())

i = 0
for pass_id in range(PASS_NUM):
accuracy.reset(exe)
for data in train_reader():
loss, acc = exe.run(fluid.default_main_program(),
loss, acc = exe.run(memopt_program,
feed=feeder.feed(data),
fetch_list=[avg_cost] + accuracy.metrics)
pass_acc = accuracy.eval(exe)
print("loss:" + str(loss) + " acc:" + str(acc) + " pass_acc:" + str(
pass_acc))
i = i + 1
# this model is slow, so if we can train two mini batch, we think it works properly.
exit(0)
if i > 2:
exit(0)
exit(1)