Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

"add dynamic lstm scripts" #3

Merged
merged 5 commits into from
Dec 5, 2017
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
168 changes: 168 additions & 0 deletions paddle/understand_sentiment_dynamic_lstm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import argparse
import time

import paddle.v2 as paddle
import paddle.v2.fluid as fluid
import paddle.v2.fluid.profiler as profiler


def parse_args():
parser = argparse.ArgumentParser("LSTM model benchmark.")
parser.add_argument(
'--batch_size', type=int, default=32, help='The minibatch size.')
parser.add_argument(
'--stacked_num', type=int, default=3, help='Stacked LSTM Layer size.')
parser.add_argument(
'--emb_dim', type=int, default=32, help='The embedding dim.')
parser.add_argument(
'--hid_dim',
type=int,
default=32,
help='The sequence length of one sentence.')
parser.add_argument(
'--iterations', type=int, default=35, help='The number of minibatches.')
parser.add_argument(
'--pass_num', type=int, default=100, help='The number of passes.')
parser.add_argument(
'--device',
type=str,
default='CPU',
choices=['CPU', 'GPU'],
help='The device type.')
parser.add_argument(
'--infer_only', action='store_true', help='If set, run forward only.')
parser.add_argument(
'--use_cprof', action='store_true', help='If set, use cProfile.')
parser.add_argument(
'--use_nvprof',
action='store_false',
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

store_false -> store_true

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed.

help='If set, use nvprof for CUDA.')
args = parser.parse_args()
return args


def print_arguments(args):
print('----------- Configuration Arguments -----------')
for arg, value in sorted(vars(args).iteritems()):
print('%s: %s' % (arg, value))
print('------------------------------------------------')


def dynamic_lstm_model(data, dict_dim, class_dim=2):
batch_size = args.batch_size
emb_dim = args.emb_dim
hid_dim = args.hid_dim
stacked_num = args.stacked_num

assert stacked_num % 2 == 1, "Must stacked_num %2 == 1."
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove this check.


emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim])

# TODO(qijun) linear act
fc1 = fluid.layers.fc(input=emb, size=hid_dim)
lstm1, cell1 = fluid.layers.dynamic_lstm(input=fc1, size=hid_dim)

inputs = [fc1, lstm1]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

inputs = lstm1


for i in range(2, stacked_num + 1):
fc = fluid.layers.fc(input=inputs, size=hid_dim)
lstm, cell = fluid.layers.dynamic_lstm(
input=fc, size=hid_dim, is_reverse=(i % 2) == 0)
inputs = [fc, lstm]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The configuration here is not consistent with the config in https://github.com/dzhwinter/benchmark/pull/2/files . There is no reversed LSTM in that PR. Just simple stacked LSTM.

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

inputs = lstm

line 66 67实际可以和line 71 - lin 74行合并。


fc_last = fluid.layers.sequence_pool(input=inputs[0], pool_type='max')
lstm_last = fluid.layers.sequence_pool(input=inputs[1], pool_type='max')
Copy link
Collaborator

@qingqing01 qingqing01 Dec 5, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

只需要对最后的lstm进行pooling即可,line 79只有一个pooling 之后的输入。

意图是为了保持和 https://github.com/dzhwinter/benchmark/blob/master/paddle/understand_sentiment_lstm.py#L74 这里一致。


prediction = fluid.layers.fc(input=[fc_last, lstm_last],
size=class_dim,
act='softmax')

return prediction


def to_lodtensor(data, place):
seq_lens = [len(seq) for seq in data]
cur_len = 0
lod = [cur_len]
for l in seq_lens:
cur_len += l
lod.append(cur_len)
flattened_data = np.concatenate(data, axis=0).astype("int64")
flattened_data = flattened_data.reshape([len(flattened_data), 1])
res = fluid.LoDTensor()
res.set(flattened_data, place)
res.set_lod([lod])
return res


def run_benchmark(model, args):
if args.use_cprof:
pr = cProfile.Profile()
pr.enable()
start_time = time.time()
word_dict = paddle.dataset.imdb.word_dict()

print("load word dict successfully")

dict_dim = len(word_dict)
# data = fluid.layers.data(
# name="words", shape=[1], append_batch_size=False, dtype="int64")
# label = fluid.layers.data(
# name="label", shape=[1], append_batch_size=False, dtype="int64")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove the unused codes.

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed.


data = fluid.layers.data(
name="words", shape=[1], dtype="int64", lod_level=1)
label = fluid.layers.data(name="label", shape=[1], dtype="int64")

prediction = model(data, dict_dim)
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
adam_optimizer = fluid.optimizer.Adam(learning_rate=0.002)
adam_optimizer.minimize(avg_cost)
accuracy = fluid.evaluator.Accuracy(input=prediction, label=label)

train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.imdb.train(word_dict),
buf_size=args.batch_size * 10),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

batch_size=args.batch_size)
place = fluid.CPUPlace() if args.device == 'CPU' else fluid.GPUPlace(0)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())

for it, pass_id in enumerate(xrange(args.pass_num)):
accuracy.reset(exe)
if iter == args.iterations:
break
for data in train_reader():
tensor_words = to_lodtensor(map(lambda x: x[0], data), place)

label = np.array(map(lambda x: x[1], data)).astype("int64")
label = label.reshape([args.batch_size, 1])

tensor_label = fluid.LoDTensor()
tensor_label.set(label, place)

loss, acc = exe.run(
fluid.default_main_program(),
feed={"words": tensor_words,
"label": tensor_label},
fetch_list=[avg_cost] + accuracy.metrics)
pass_acc = accuracy.eval(exe)
print("Iter: %d, loss: %s, acc: %s, pass_acc: %s" %
(it, str(loss), str(acc), str(pass_acc)))


if __name__ == '__main__':
args = parse_args()
print_arguments(args)
if args.use_nvprof and args.device == 'GPU':
with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof:
run_benchmark(dynamic_lstm_model, args)
else:
run_benchmark(dynamic_lstm_model, args)