diff --git a/paddle/fluid/operators/fused/fused_seqpool_cvm_with_conv_op.cc b/paddle/fluid/operators/fused/fused_seqpool_cvm_with_conv_op.cc index 2cb1a0caf30ea..66bb9afdde8c6 100644 --- a/paddle/fluid/operators/fused/fused_seqpool_cvm_with_conv_op.cc +++ b/paddle/fluid/operators/fused/fused_seqpool_cvm_with_conv_op.cc @@ -109,6 +109,10 @@ class FusedSeqpoolCVMOpWithConvMaker : public framework::OpProtoAndCheckerMaker "(float, default 0.0) The value to pad for empty sequence.") .SetDefault(0.0); AddAttr("use_cvm", "bool, use cvm or not").SetDefault(true); + AddAttr("need_filter", "(bool, default false)").SetDefault(false); + AddAttr("show_coeff", "(float, default 0.2)").SetDefault(0.2); + AddAttr("clk_coeff", "(float, default 1)").SetDefault(1); + AddAttr("threshold", "(float, default 0.96)").SetDefault(0.96); AddAttr("cvm_offset", "(int, default 3)").SetDefault(3); AddAttr("show_filter", "(bool, default false)").SetDefault(false); AddAttr("embedx_concate_size", "(int, default 1)").SetDefault(1); diff --git a/paddle/fluid/operators/fused/fused_seqpool_cvm_with_conv_op.cu b/paddle/fluid/operators/fused/fused_seqpool_cvm_with_conv_op.cu index cb56a9109e6c7..0e01eb1785132 100644 --- a/paddle/fluid/operators/fused/fused_seqpool_cvm_with_conv_op.cu +++ b/paddle/fluid/operators/fused/fused_seqpool_cvm_with_conv_op.cu @@ -53,6 +53,38 @@ __global__ void FusedSeqpoolWithConvKernelNormal(const size_t N, T **input_value } } +// Filter +template +__global__ void FusedSeqpoolWithConvKernelFilter(const size_t N, T **input_values, + T **seqpool_output_values, + size_t **lods_values, + const int batch_size, + const int embedding_size, + const float pad_value, + const float show_coeff, + const float clk_coeff, + const float threshold) { + CUDA_KERNEL_LOOP(i, N) { + int key = i / embedding_size; + int offset = i % embedding_size; + int x = key / batch_size; // slot id + int y = key % batch_size; // ins id + auto &start = *(lods_values[x] + y); + auto &end = *(lods_values[x] + y + 1); + + double val = pad_value; + for (auto k = start; k < end; ++k) { + T &show = *(input_values[x] + k * embedding_size); + T &click = *(input_values[x] + k * embedding_size + 1); + if ((show - click) * show_coeff + click * clk_coeff < threshold) { + continue; + } + val += *(input_values[x] + k * embedding_size + offset); + } + *(seqpool_output_values[x] + y * embedding_size + offset) = val; + } +} + // normal & expand slot's feasign template __global__ void FusedSeqpoolWithConvKernelNormalEmbedxConcate(const size_t N, T **input_values, @@ -257,6 +289,8 @@ void FusedSeqpoolCVMWithConv(const paddle::platform::Place &place, std::vector lods, const int batch_size, const int slot_num, const int embedding_size, const float padding_value, const bool use_cvm, + float need_filter, float show_coeff, + float clk_coeff, float threshold, const int cvm_offset, bool show_filter, const int embedx_concate_size) { auto stream = dynamic_cast( @@ -290,10 +324,17 @@ void FusedSeqpoolCVMWithConv(const paddle::platform::Place &place, size_t N = static_cast(batch_size * slot_num * embedding_size); // first sum pool if (embedx_concate_size == 1){ + if (need_filter) { //filter + FusedSeqpoolWithConvKernelFilter<<>>( + N, gpu_input_values, gpu_seqpool_output_values, lods_values, batch_size, + embedding_size, padding_value, show_coeff, clk_coeff, threshold); + } else { //normal FusedSeqpoolWithConvKernelNormal<<>>( - N, gpu_input_values, gpu_seqpool_output_values, lods_values, batch_size, - embedding_size, padding_value); + stream>>>( + N, gpu_input_values, gpu_seqpool_output_values, lods_values, batch_size, + embedding_size, padding_value); + } } else { FusedSeqpoolWithConvKernelNormalEmbedxConcate<<>>( @@ -595,6 +636,10 @@ class FusedSeqpoolCVMWithConvCUDAKernel : public framework::OpKernel { auto padding_value = ctx.Attr("pad_value"); auto use_cvm = ctx.Attr("use_cvm"); + bool need_filter = ctx.Attr("need_filter"); + float show_coeff = ctx.Attr("show_coeff"); + float clk_coeff = ctx.Attr("clk_coeff"); + float threshold = ctx.Attr("threshold"); const int cvm_offset = ctx.Attr("cvm_offset"); bool show_filter = ctx.Attr("show_filter"); const int embedx_concate_size = ctx.Attr("embedx_concate_size"); @@ -638,7 +683,9 @@ class FusedSeqpoolCVMWithConvCUDAKernel : public framework::OpKernel { } FusedSeqpoolCVMWithConv(ctx.GetPlace(), input_data, output_data, seqpool_output_data, lods_data, batch_size, slot_size, - embedding_size, padding_value, use_cvm, cvm_offset, show_filter, embedx_concate_size); + embedding_size, padding_value, use_cvm, + need_filter, show_coeff, clk_coeff, threshold, + cvm_offset, show_filter, embedx_concate_size); } }; diff --git a/python/paddle/fluid/contrib/layers/nn.py b/python/paddle/fluid/contrib/layers/nn.py index 5e3eb92f2d401..222ac19dca143 100644 --- a/python/paddle/fluid/contrib/layers/nn.py +++ b/python/paddle/fluid/contrib/layers/nn.py @@ -1908,6 +1908,10 @@ def fused_seqpool_cvm_with_conv(input, cvm, pad_value=0.0, use_cvm=True, + need_filter=False, + show_coeff=0.2, + clk_coeff=1.0, + threshold=0.96, show_filter=False, cvm_offset=3, embedx_concate_size=1): @@ -1955,6 +1959,10 @@ def fused_seqpool_cvm_with_conv(input, "pad_value": pad_value, "use_cvm": use_cvm, "cvm_offset": cvm_offset, + "need_filter": need_filter, + "show_coeff": show_coeff, + "clk_coeff": clk_coeff, + "threshold": threshold, "show_filter": show_filter, "embedx_concate_size": embedx_concate_size, })