-
Notifications
You must be signed in to change notification settings - Fork 5.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add the unified LSTM api [WIP] #25502
Conversation
test=develop
Thanks for your contribution! |
整个LSTM相关工作包含以下内容(测试代码见 #25502 (comment) ):
按照C++ OP和python API划分的话,整理来讲对C++端的需求是1和2,个人偏向于1和2通过重新实现cudnn lstm op来解决。3中的方案选择也会对C++端产生一些额外的需求。 |
测试代码,调整层数和双向结果有问题 import paddle
import paddle.fluid as fluid
from paddle.nn.layer import LSTM
import torch
import numpy as np
np.random.seed(123)
torch.manual_seed(123)
dtype = "float32"
device = torch.device('cuda:0')
x_size = seq_length, batch_size, input_size = 20, 2, 32
hidden_size = 32
gate_size = 4 * hidden_size
# 调整层数和双向结果有问题
n_layer, is_bi = 1, False
n_direct = 2 if is_bi else 1
dygraph = True
input = np.random.random(x_size).astype(dtype)
np_all_weights = []
for layer in range(n_layer):
for direction in range(n_direct):
layer_input_size = input_size if layer == 0 else hidden_size * n_direct
w_ih = np.random.rand(gate_size, layer_input_size).astype(dtype)
w_hh = np.random.rand(gate_size, hidden_size).astype(dtype)
b_ih = np.random.rand(gate_size).astype(dtype)
b_hh = np.random.rand(gate_size).astype(dtype)
layer_params = [w_ih, w_hh, b_ih, b_hh]
np_all_weights.extend(layer_params)
lstm = torch.nn.LSTM(input_size, hidden_size, num_layers=n_layer, bidirectional=is_bi).to(device)
for i, param in enumerate(lstm.parameters()):
param.data = torch.from_numpy(np_all_weights[i]).to(device)
x = torch.from_numpy(input).to(device)
out, state = lstm(x)
print(out.cpu().detach().numpy(), out.shape)
def to_tensor(var, val):
def set_var(var, ndarray):
assert fluid.executor.global_scope().find_var(
var.name) and fluid.executor.global_scope().find_var(
var.name).get_tensor(
), "Please do parameter initialization."
t = fluid.executor.global_scope().find_var(
var.name).get_tensor()
p = t._place()
if p.is_cpu_place():
place = fluid.CPUPlace()
elif p.is_cuda_pinned_place():
place = fluid.CUDAPinnedPlace()
else:
p = fluid.core.Place()
p.set_place(t._place())
place = fluid.CUDAPlace(p.gpu_device_id())
t.set(ndarray, place)
val = to_numpy(val)
if isinstance(var, fluid.core.VarBase):
return var.set_value(val)
set_var(var, val)
return var
def to_numpy(var):
if isinstance(var, np.ndarray):
return var
if isinstance(var, fluid.core.VarBase):
return var.numpy()
assert fluid.executor.global_scope(
).find_var(var.name) and fluid.executor.global_scope().find_var(
var.name).get_tensor(), "Please do parameter initialization."
t = fluid.executor.global_scope().find_var(var.name).get_tensor()
return np.array(t)
place = paddle.fluid.CUDAPlace(0)
paddle.fluid.enable_dygraph(place) if dygraph else None
pd_lstm = paddle.nn.layer.rnn.LSTM(input_size, hidden_size, num_layers=n_layer, direction="bidirect" if is_bi else "forward", time_major=True, dtype=dtype)
if not dygraph:
executor = paddle.fluid.Executor(place)
executor.run(paddle.fluid.default_startup_program())
pd_lstm.set_parameter_values(np_all_weights)
for p1, p2 in zip(lstm.parameters(), pd_lstm.lstm.parameters()):
p1 = p1.cpu().detach().numpy()
p2 = to_numpy(p2)
assert np.allclose(p1, p2)
x = paddle.fluid.dygraph.to_variable(input) if dygraph else fluid.data(name='x', shape=input.shape, dtype=input.dtype)
x.stop_gradient = False
out, state = pd_lstm(x)
out = out.numpy() if dygraph else executor.run(feed={'x': input}, fetch_list=[out])[0]
print(out, out.shape) 直接使用静态图API也存在异常结果,如错误的init_h的shape仍能得到计算结果 import numpy as np
import paddle.fluid as fluid
import paddle.fluid.layers as layers
fluid.default_main_program().random_seed = 123
fluid.default_startup_program().random_seed = 123
np.random.seed(123)
emb_dim = 20 #256
vocab_size = 10000
data = fluid.data(name='x', shape=[None, None], dtype='int64')
emb = fluid.embedding(input=data, size=[vocab_size, emb_dim], is_sparse=True)
batch_size = 1 #20
max_len = 20#10
dropout_prob = 0.0
input_size = 100
hidden_size = 20#150
num_layers = 1
init_h = layers.fill_constant( [num_layers*1, batch_size, hidden_size-2], 'float32', 0.0 )
init_c = layers.fill_constant( [num_layers*1, batch_size, hidden_size-2], 'float32', 0.0 )
rnn_out, last_h, last_c = layers.lstm( emb, init_h, init_c, \
max_len+10, hidden_size, num_layers, \
dropout_prob=dropout_prob, seed=123, is_bidirec=True)
rnn_out.shape # (-1, 100, 150)
last_h.shape # (1, 20, 150)
last_c.shape # (1, 20, 150)
place = fluid.CUDAPlace(0)
executor = fluid.Executor(place)
executor.run(fluid.default_startup_program())
out = executor.run(feed={'x': np.random.randint(0, 100, (max_len, batch_size)).astype('int64')}, fetch_list=[rnn_out])[0]
print(out, out.shape) |
将动态图和静态图接口合入一份测试代码,通过 import torch
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.nn.layer import LSTM
import paddle.fluid.layers as layers
fluid.default_main_program().random_seed = 123
fluid.default_startup_program().random_seed = 123
np.random.seed(123)
torch.manual_seed(123)
dtype = "float32"
device = torch.device('cuda:0')
x_size = seq_length, batch_size, input_size = 20, 2, 32
hidden_size = 32
gate_size = 4 * hidden_size
n_layer, is_bi = 2, False
n_direct = 2 if is_bi else 1
dygraph = False#True
input = np.random.random(x_size).astype(dtype)
np_all_weights = []
for layer in range(n_layer):
for direction in range(n_direct):
layer_input_size = input_size if layer == 0 else hidden_size * n_direct
w_ih = np.random.rand(gate_size, layer_input_size).astype(dtype)
w_hh = np.random.rand(gate_size, hidden_size).astype(dtype)
b_ih = np.random.rand(gate_size).astype(dtype)
b_hh = np.random.rand(gate_size).astype(dtype)
layer_params = [w_ih, w_hh, b_ih, b_hh]
np_all_weights.extend(layer_params)
np_flat_weight = np.concatenate([p.reshape([-1]) for p in np_all_weights])
lstm = torch.nn.LSTM(input_size, hidden_size, num_layers=n_layer, bidirectional=is_bi).to(device)
for i, param in enumerate(lstm.parameters()):
param.data = torch.from_numpy(np_all_weights[i]).to(device)
x = torch.from_numpy(input).to(device)
out, state = lstm(x)
print(out.cpu().detach().numpy(), out.shape)
def to_tensor(var, val):
def set_var(var, ndarray):
assert fluid.executor.global_scope().find_var(
var.name) and fluid.executor.global_scope().find_var(
var.name).get_tensor(
), "Please do parameter initialization."
t = fluid.executor.global_scope().find_var(
var.name).get_tensor()
p = t._place()
if p.is_cpu_place():
place = fluid.CPUPlace()
elif p.is_cuda_pinned_place():
place = fluid.CUDAPinnedPlace()
else:
p = fluid.core.Place()
p.set_place(t._place())
place = fluid.CUDAPlace(p.gpu_device_id())
t.set(ndarray, place)
val = to_numpy(val)
if isinstance(var, fluid.core.VarBase):
return var.set_value(val)
set_var(var, val)
return var
def to_numpy(var):
if isinstance(var, np.ndarray):
return var
if isinstance(var, fluid.core.VarBase):
return var.numpy()
assert fluid.executor.global_scope(
).find_var(var.name) and fluid.executor.global_scope().find_var(
var.name).get_tensor(), "Please do parameter initialization."
t = fluid.executor.global_scope().find_var(var.name).get_tensor()
return np.array(t)
place = paddle.fluid.CUDAPlace(0)
if dygraph:
paddle.fluid.enable_dygraph(place)
x = paddle.fluid.dygraph.to_variable(input) if dygraph else fluid.data(name='x', shape=input.shape, dtype=input.dtype)
x.stop_gradient = False
outs = []
pd_lstm = paddle.nn.layer.rnn.LSTM(input_size, hidden_size, num_layers=n_layer, direction="bidirect" if is_bi else "forward", time_major=True, dtype=dtype)
if not dygraph:
init_h = layers.fill_constant( [n_layer * n_direct, batch_size, hidden_size-10], dtype, 0.0 )
init_c = layers.fill_constant( [n_layer * n_direct, batch_size, hidden_size-10], dtype, 0.0 )
rnn_out, last_h, last_c = layers.lstm(x, init_h, init_c, \
50, hidden_size, n_layer, \
dropout_prob=0., seed=123, is_bidirec=is_bi)
outs.append(rnn_out)
executor = paddle.fluid.Executor(place)
executor.run(paddle.fluid.default_startup_program())
all_params = fluid.default_main_program().all_parameters()
for p in all_params:
if p.shape == np_flat_weight.shape:
to_tensor(p, np_flat_weight)
pd_lstm.set_parameter_values(np_all_weights)
out, state = pd_lstm(x)
outs.append(out)
if not dygraph:
outs = executor.run(feed={'x': input}, fetch_list=outs)
for out in outs:
out = to_numpy(out)
print(out, out.shape)
# 测试转换前后参数是否一致
for p1, p2 in zip(lstm.parameters(), pd_lstm.lstm.parameters()):
p1 = p1.cpu().detach().numpy()
p2 = to_numpy(p2)
print(np.allclose(p1, p2)) |
Since you haven't replied for more than a year, we have closed this issue/pr. |
PR types
New features
PR changes
APIs
Describe
Add a unified LSTM API, which switch between cudnn and non-cudnn