Skip to content

Commit

Permalink
Merge pull request #1769 from zh794390558/nnet
Browse files Browse the repository at this point in the history
[speechx] set nnet param by flags
  • Loading branch information
zh794390558 authored Apr 24, 2022
2 parents 312fc4e + d4ffa16 commit 87ef68f
Show file tree
Hide file tree
Showing 10 changed files with 67 additions and 38 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,10 @@ DEFINE_string(
DEFINE_string(model_output_names,
"softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0",
"model output names");
DEFINE_string(model_cache_names, "5-1-1024,5-1-1024", "model cache names");
DEFINE_string(model_cache_names,
"chunk_state_h_box,chunk_state_c_box",
"model cache names");
DEFINE_string(model_cache_shapes, "5-1-1024,5-1-1024", "model cache shapes");

using kaldi::BaseFloat;
using kaldi::Matrix;
Expand Down Expand Up @@ -77,7 +80,8 @@ int main(int argc, char* argv[]) {
ppspeech::ModelOptions model_opts;
model_opts.model_path = model_path;
model_opts.param_path = model_params;
model_opts.cache_shape = FLAGS_model_cache_names;
model_opts.cache_names = FLAGS_model_cache_names;
model_opts.cache_shape = FLAGS_model_cache_shapes;
model_opts.input_names = FLAGS_model_input_names;
model_opts.output_names = FLAGS_model_output_names;
std::shared_ptr<ppspeech::PaddleNnet> nnet(
Expand Down
8 changes: 6 additions & 2 deletions speechx/examples/ds2_ol/decoder/wfst-decoder-ol.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,10 @@ DEFINE_string(
DEFINE_string(model_output_names,
"softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0",
"model output names");
DEFINE_string(model_cache_names, "5-1-1024,5-1-1024", "model cache names");
DEFINE_string(model_cache_names,
"chunk_state_h_box,chunk_state_c_box",
"model cache names");
DEFINE_string(model_cache_shapes, "5-1-1024,5-1-1024", "model cache shapes");

using kaldi::BaseFloat;
using kaldi::Matrix;
Expand Down Expand Up @@ -80,7 +83,8 @@ int main(int argc, char* argv[]) {
ppspeech::ModelOptions model_opts;
model_opts.model_path = model_graph;
model_opts.param_path = model_params;
model_opts.cache_shape = FLAGS_model_cache_names;
model_opts.cache_names = FLAGS_model_cache_names;
model_opts.cache_shape = FLAGS_model_cache_shapes;
model_opts.input_names = FLAGS_model_input_names;
model_opts.output_names = FLAGS_model_output_names;
std::shared_ptr<ppspeech::PaddleNnet> nnet(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ int main(int argc, char* argv[]) {

int32 num_done = 0, num_err = 0;

// feature pipeline: wave cache --> hanning
// window -->linear_spectrogram --> global cmvn -> feat cache
// feature pipeline: wave cache --> hanning window
// -->linear_spectrogram --> global cmvn -> feat cache

std::unique_ptr<ppspeech::FrontendInterface> data_source(
new ppspeech::AudioCache(3600 * 1600, true));
Expand Down
Empty file.
9 changes: 7 additions & 2 deletions speechx/speechx/decoder/param.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,10 @@ DEFINE_string(
DEFINE_string(model_output_names,
"softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0",
"model output names");
DEFINE_string(model_cache_names, "5-1-1024,5-1-1024", "model cache names");
DEFINE_string(model_cache_names,
"chunk_state_h_box,chunk_state_c_box",
"model cache names");
DEFINE_string(model_cache_shapes, "5-1-1024,5-1-1024", "model cache shapes");


namespace ppspeech {
Expand All @@ -70,7 +73,9 @@ ModelOptions InitModelOptions() {
ModelOptions model_opts;
model_opts.model_path = FLAGS_model_path;
model_opts.param_path = FLAGS_param_path;
model_opts.cache_shape = FLAGS_model_cache_names;
model_opts.cache_names = FLAGS_model_cache_names;
model_opts.cache_shape = FLAGS_model_cache_shapes;
model_opts.input_names = FLAGS_model_input_names;
model_opts.output_names = FLAGS_model_output_names;
return model_opts;
}
Expand Down
4 changes: 2 additions & 2 deletions speechx/speechx/frontend/audio/audio_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ namespace ppspeech {
class AudioCache : public FrontendInterface {
public:
explicit AudioCache(int buffer_size = 1000 * kint16max,
bool to_float32 = true);
bool to_float32 = false);

virtual void Accept(const kaldi::VectorBase<BaseFloat>& waves);

Expand Down Expand Up @@ -58,7 +58,7 @@ class AudioCache : public FrontendInterface {
std::mutex mutex_;
std::condition_variable ready_feed_condition_;
kaldi::int32 timeout_; // millisecond
bool to_float32_;
bool to_float32_; // int16 -> float32. used in linear_spectrogram

DISALLOW_COPY_AND_ASSIGN(AudioCache);
};
Expand Down
11 changes: 8 additions & 3 deletions speechx/speechx/frontend/audio/feature_cache.cc
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ bool FeatureCache::Read(kaldi::Vector<kaldi::BaseFloat>* feats) {
usleep(100); // sleep 0.1 ms
}
if (cache_.empty()) return false;

// read from cache
feats->Resize(cache_.front().Dim());
feats->CopyFromVec(cache_.front());
cache_.pop();
Expand All @@ -74,15 +76,16 @@ bool FeatureCache::Compute() {

// join with remained
int32 joint_len = feature.Dim() + remained_feature_.Dim();
int32 num_chunk =
((joint_len / dim_) - frame_chunk_size_) / frame_chunk_stride_ + 1;

Vector<BaseFloat> joint_feature(joint_len);
joint_feature.Range(0, remained_feature_.Dim())
.CopyFromVec(remained_feature_);
joint_feature.Range(remained_feature_.Dim(), feature.Dim())
.CopyFromVec(feature);

// one by one, or stride with window
// controlled by frame_chunk_stride_ and frame_chunk_size_
int32 num_chunk =
((joint_len / dim_) - frame_chunk_size_) / frame_chunk_stride_ + 1;
for (int chunk_idx = 0; chunk_idx < num_chunk; ++chunk_idx) {
int32 start = chunk_idx * frame_chunk_stride_ * dim_;

Expand All @@ -101,6 +104,8 @@ bool FeatureCache::Compute() {
cache_.push(feature_chunk);
ready_read_condition_.notify_one();
}

// cache remained feats
int32 remained_feature_len =
joint_len - num_chunk * frame_chunk_stride_ * dim_;
remained_feature_.Resize(remained_feature_len);
Expand Down
2 changes: 1 addition & 1 deletion speechx/speechx/frontend/audio/mfcc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ using kaldi::Matrix;
using std::vector;

Mfcc::Mfcc(const MfccOptions& opts,
std::unique_ptr<FrontendInterface> base_extractor)
std::unique_ptr<FrontendInterface> base_extractor)
: opts_(opts),
computer_(opts.mfcc_opts),
window_function_(computer_.GetFrameOptions()) {
Expand Down
26 changes: 19 additions & 7 deletions speechx/speechx/nnet/paddle_nnet.cc
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ PaddleNnet::PaddleNnet(const ModelOptions& opts) : opts_(opts) {
LOG(INFO) << "output names: " << opts.output_names;
vector<string> input_names_vec = absl::StrSplit(opts.input_names, ",");
vector<string> output_names_vec = absl::StrSplit(opts.output_names, ",");

paddle_infer::Predictor* predictor = GetPredictor();

std::vector<std::string> model_input_names = predictor->GetInputNames();
Expand All @@ -87,6 +88,7 @@ PaddleNnet::PaddleNnet(const ModelOptions& opts) : opts_(opts) {
for (size_t i = 0; i < output_names_vec.size(); i++) {
assert(output_names_vec[i] == model_output_names[i]);
}

ReleasePredictor(predictor);
InitCacheEncouts(opts);
}
Expand All @@ -95,6 +97,7 @@ void PaddleNnet::Reset() { InitCacheEncouts(opts_); }

paddle_infer::Predictor* PaddleNnet::GetPredictor() {
paddle_infer::Predictor* predictor = nullptr;

std::lock_guard<std::mutex> guard(pool_mutex);
int pred_id = 0;

Expand Down Expand Up @@ -144,15 +147,19 @@ void PaddleNnet::FeedForward(const Vector<BaseFloat>& features,
Vector<BaseFloat>* inferences,
int32* inference_dim) {
paddle_infer::Predictor* predictor = GetPredictor();

int feat_row = features.Dim() / feature_dim;

std::vector<std::string> input_names = predictor->GetInputNames();
std::vector<std::string> output_names = predictor->GetOutputNames();

// feed inputs
std::unique_ptr<paddle_infer::Tensor> input_tensor =
predictor->GetInputHandle(input_names[0]);
std::vector<int> INPUT_SHAPE = {1, feat_row, feature_dim};
input_tensor->Reshape(INPUT_SHAPE);
input_tensor->CopyFromCpu(features.Data());

std::unique_ptr<paddle_infer::Tensor> input_len =
predictor->GetInputHandle(input_names[1]);
std::vector<int> input_len_size = {1};
Expand All @@ -161,32 +168,36 @@ void PaddleNnet::FeedForward(const Vector<BaseFloat>& features,
audio_len.push_back(feat_row);
input_len->CopyFromCpu(audio_len.data());

std::unique_ptr<paddle_infer::Tensor> h_box =
std::unique_ptr<paddle_infer::Tensor> state_h =
predictor->GetInputHandle(input_names[2]);
shared_ptr<Tensor<BaseFloat>> h_cache = GetCacheEncoder(input_names[2]);
h_box->Reshape(h_cache->get_shape());
h_box->CopyFromCpu(h_cache->get_data().data());
std::unique_ptr<paddle_infer::Tensor> c_box =
state_h->Reshape(h_cache->get_shape());
state_h->CopyFromCpu(h_cache->get_data().data());

std::unique_ptr<paddle_infer::Tensor> state_c =
predictor->GetInputHandle(input_names[3]);
shared_ptr<Tensor<float>> c_cache = GetCacheEncoder(input_names[3]);
c_box->Reshape(c_cache->get_shape());
c_box->CopyFromCpu(c_cache->get_data().data());
state_c->Reshape(c_cache->get_shape());
state_c->CopyFromCpu(c_cache->get_data().data());

// forward
bool success = predictor->Run();

if (success == false) {
LOG(INFO) << "predictor run occurs error";
}

// fetch outpus
std::unique_ptr<paddle_infer::Tensor> h_out =
predictor->GetOutputHandle(output_names[2]);
assert(h_cache->get_shape() == h_out->shape());
h_out->CopyToCpu(h_cache->get_data().data());

std::unique_ptr<paddle_infer::Tensor> c_out =
predictor->GetOutputHandle(output_names[3]);
assert(c_cache->get_shape() == c_out->shape());
c_out->CopyToCpu(c_cache->get_data().data());

// get result
std::unique_ptr<paddle_infer::Tensor> output_tensor =
predictor->GetOutputHandle(output_names[0]);
std::vector<int> output_shape = output_tensor->shape();
Expand All @@ -195,6 +206,7 @@ void PaddleNnet::FeedForward(const Vector<BaseFloat>& features,
inferences->Resize(row * col);
*inference_dim = col;
output_tensor->CopyToCpu(inferences->Data());

ReleasePredictor(predictor);
}

Expand Down
33 changes: 16 additions & 17 deletions speechx/speechx/nnet/paddle_nnet.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ namespace ppspeech {
struct ModelOptions {
std::string model_path;
std::string param_path;
int thread_num;
int thread_num; // predictor thread pool size
bool use_gpu;
bool switch_ir_optim;
std::string input_names;
Expand All @@ -34,19 +34,14 @@ struct ModelOptions {
bool enable_fc_padding;
bool enable_profile;
ModelOptions()
: model_path("avg_1.jit.pdmodel"),
param_path("avg_1.jit.pdiparams"),
: model_path(""),
param_path(""),
thread_num(2),
use_gpu(false),
input_names(
"audio_chunk,audio_chunk_lens,chunk_state_h_box,chunk_state_c_"
"box"),
output_names(
"save_infer_model/scale_0.tmp_1,save_infer_model/"
"scale_1.tmp_1,save_infer_model/scale_2.tmp_1,save_infer_model/"
"scale_3.tmp_1"),
cache_names("chunk_state_h_box,chunk_state_c_box"),
cache_shape("3-1-1024,3-1-1024"),
input_names(""),
output_names(""),
cache_names(""),
cache_shape(""),
switch_ir_optim(false),
enable_fc_padding(false),
enable_profile(false) {}
Expand Down Expand Up @@ -76,17 +71,19 @@ class Tensor {
public:
Tensor() {}
Tensor(const std::vector<int>& shape) : _shape(shape) {
int data_size = std::accumulate(
int neml = std::accumulate(
_shape.begin(), _shape.end(), 1, std::multiplies<int>());
LOG(INFO) << "data size: " << data_size;
_data.resize(data_size, 0);
LOG(INFO) << "Tensor neml: " << neml;
_data.resize(neml, 0);
}

void reshape(const std::vector<int>& shape) {
_shape = shape;
int data_size = std::accumulate(
int neml = std::accumulate(
_shape.begin(), _shape.end(), 1, std::multiplies<int>());
_data.resize(data_size, 0);
_data.resize(neml, 0);
}

const std::vector<int>& get_shape() const { return _shape; }
std::vector<T>& get_data() { return _data; }

Expand All @@ -98,10 +95,12 @@ class Tensor {
class PaddleNnet : public NnetInterface {
public:
PaddleNnet(const ModelOptions& opts);

virtual void FeedForward(const kaldi::Vector<kaldi::BaseFloat>& features,
int32 feature_dim,
kaldi::Vector<kaldi::BaseFloat>* inferences,
int32* inference_dim);

void Dim();
virtual void Reset();
std::shared_ptr<Tensor<kaldi::BaseFloat>> GetCacheEncoder(
Expand Down

0 comments on commit 87ef68f

Please sign in to comment.