Skip to content

Commit

Permalink
Vulkan conv1d (#5060)
Browse files Browse the repository at this point in the history
  • Loading branch information
FhqTreap authored Oct 20, 2023
1 parent 84aaedb commit dc25128
Show file tree
Hide file tree
Showing 11 changed files with 2,339 additions and 0 deletions.
423 changes: 423 additions & 0 deletions src/layer/vulkan/convolution1d_vulkan.cpp

Large diffs are not rendered by default.

53 changes: 53 additions & 0 deletions src/layer/vulkan/convolution1d_vulkan.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2023 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

#ifndef LAYER_CONVOLUTION1D_VULKAN_H
#define LAYER_CONVOLUTION1D_VULKAN_H

#include "convolution1d.h"

namespace ncnn {

class Convolution1D_vulkan : virtual public Convolution1D
{
public:
Convolution1D_vulkan();

virtual int create_pipeline(const Option& opt);
virtual int destroy_pipeline(const Option& opt);

virtual int upload_model(VkTransfer& cmd, const Option& opt);

using Convolution1D::forward;
virtual int forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute& cmd, const Option& opt) const;
virtual int forward(const VkImageMat& bottom_blob, VkImageMat& top_blob, VkCompute& cmd, const Option& opt) const;

public:
ncnn::Layer* padding;

Mat weight_data_packed;
Mat bias_data_packed;

VkMat weight_data_gpu;
VkMat bias_data_gpu;

VkImageMat weight_data_gpu_image;
VkImageMat bias_data_gpu_image;

Pipeline* pipeline_convolution1d;
};

} // namespace ncnn

#endif // LAYER_CONVOLUTION1D_VULKAN_H
177 changes: 177 additions & 0 deletions src/layer/vulkan/shader/convolution1d.comp
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2023 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

#version 450

#if NCNN_fp16_storage
#extension GL_EXT_shader_16bit_storage: require
#endif
#if NCNN_fp16_arithmetic
#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
#endif

#extension GL_GOOGLE_include_directive: enable
#include "vulkan_activation.comp"

layout (constant_id = 0) const int kernel_w = 1;
layout (constant_id = 1) const int dilation_w = 1;
layout (constant_id = 2) const int stride_w = 1;
layout (constant_id = 3) const int bias_term = 0;
layout (constant_id = 4) const int activation_type = 0;
layout (constant_id = 5) const float activation_param_0 = 0;
layout (constant_id = 6) const float activation_param_1 = 0;

#define shape_constant_id_offset 7
layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;

layout (constant_id = shape_constant_id_offset + 5) const int outdims = 0;
layout (constant_id = shape_constant_id_offset + 6) const int outw = 0;
layout (constant_id = shape_constant_id_offset + 7) const int outh = 0;
layout (constant_id = shape_constant_id_offset + 8) const int outc = 0;
layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler3D bottom_blob;
layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler3D weight_blob;
layout (binding = 3) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; };
layout (binding = 2) readonly buffer weight_blob { sfp weight_data[]; };
layout (binding = 3) readonly buffer bias_blob { sfp bias_data[]; };
#endif

layout (push_constant) uniform parameter
{
int dims;
int w;
int h;
int c;
int cstep;

int outdims;
int outw;
int outh;
int outc;
int outcstep;
} p;

void main()
{
int gx = int(gl_GlobalInvocationID.x) * 2;
int gy = int(gl_GlobalInvocationID.y) * 2;

if (gx >= psc(outw) || gy >= psc(outh))
return;

const ivec2 gx2 = gx + ivec2(0, 1);
const ivec2 gy2 = gy + ivec2(0, 1);

afp sum0 = afp(0.0f);
afp sum1 = afp(0.0f);
afp sum2 = afp(0.0f);
afp sum3 = afp(0.0f);

if (bias_term == 1)
{
#if NCNN_image_shader
sum0 = image3d_ld1(bias_blob, ivec3(gy2.x, 0, 0));
sum2 = image3d_ld1(bias_blob, ivec3(gy2.y, 0, 0));
#else
sum0 = buffer_ld1(bias_data, gy2.x);
sum2 = buffer_ld1(bias_data, gy2.y);
#endif
sum1 = sum0;
sum3 = sum2;
}

#if NCNN_image_shader

ivec2 v_offset = gx2 * stride_w;

for (int y = 0; y < psc(h); y++)
{
int wx = 0;

for (int x = 0; x < kernel_w; x++)
{
afp v0 = image3d_ld1(bottom_blob, ivec3(v_offset.x + x * dilation_w, y, 0));
afp v1 = image3d_ld1(bottom_blob, ivec3(v_offset.y + x * dilation_w, y, 0));

afp k0 = image3d_ld1(weight_blob, ivec3(wx, y, gy2.x));
afp k1 = image3d_ld1(weight_blob, ivec3(wx, y, gy2.y));

sum0 += v0 * k0;
sum1 += v1 * k0;
sum2 += v0 * k1;
sum3 += v1 * k1;

wx += 1;
}
}

#else

ivec2 v_offset = gx2 * stride_w;
ivec2 w_offset = gy2 * psc(h) * kernel_w;

for (int y = 0; y < psc(h); y++)
{
for (int x = 0; x < kernel_w; x++)
{
afp v0 = buffer_ld1(bottom_blob_data, v_offset.x + x * dilation_w);
afp v1 = buffer_ld1(bottom_blob_data, v_offset.y + x * dilation_w);

afp k0 = buffer_ld1(weight_data, w_offset.x + x);
afp k1 = buffer_ld1(weight_data, w_offset.y + x);

sum0 += v0 * k0;
sum1 += v1 * k0;
sum2 += v0 * k1;
sum3 += v1 * k1;
}
v_offset += psc(w);
w_offset += kernel_w;
}

#endif

sum0 = activation_afp(sum0, activation_type, activation_param_0, activation_param_1);
sum1 = activation_afp(sum1, activation_type, activation_param_0, activation_param_1);
sum2 = activation_afp(sum2, activation_type, activation_param_0, activation_param_1);
sum3 = activation_afp(sum3, activation_type, activation_param_0, activation_param_1);

#if NCNN_image_shader

image3d_st1(top_blob, ivec3(gx2.x, gy2.x, 0), sum0);
image3d_st1(top_blob, ivec3(gx2.y, gy2.x, 0), sum1);
image3d_st1(top_blob, ivec3(gx2.x, gy2.y, 0), sum2);
image3d_st1(top_blob, ivec3(gx2.y, gy2.y, 0), sum3);

#else

const int gi = gy * psc(outw) + gx;

buffer_st1(top_blob_data, gi, sum0);
if (gx + 1 < psc(outw)) buffer_st1(top_blob_data, gi + 1, sum1);
if (gy + 1 < psc(outh)) buffer_st1(top_blob_data, gi + psc(outw), sum2);
if (gy + 1 < psc(outh) && gx + 1 < psc(outw)) buffer_st1(top_blob_data, gi + psc(outw) + 1, sum3);

#endif
}
Loading

0 comments on commit dc25128

Please sign in to comment.