Skip to content

Commit cc54b88

Browse files
authored
add gelu vulkan operator (#5001)
1 parent b3fbbcc commit cc54b88

File tree

5 files changed

+441
-0
lines changed

5 files changed

+441
-0
lines changed

src/layer/vulkan/gelu_vulkan.cpp

Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
// Tencent is pleased to support the open source community by making ncnn available.
2+
//
3+
// Copyright (C) 2023 THL A29 Limited, a Tencent company. All rights reserved.
4+
//
5+
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6+
// in compliance with the License. You may obtain a copy of the License at
7+
//
8+
// https://opensource.org/licenses/BSD-3-Clause
9+
//
10+
// Unless required by applicable law or agreed to in writing, software distributed
11+
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12+
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13+
// specific language governing permissions and limitations under the License.
14+
15+
#include "gelu_vulkan.h"
16+
17+
#include "layer_shader_type.h"
18+
19+
namespace ncnn {
20+
21+
GELU_vulkan::GELU_vulkan()
22+
{
23+
support_vulkan = true;
24+
support_image_storage = true;
25+
26+
pipeline_gelu = 0;
27+
pipeline_gelu_pack4 = 0;
28+
pipeline_gelu_pack8 = 0;
29+
}
30+
31+
int GELU_vulkan::create_pipeline(const Option& opt)
32+
{
33+
const Mat& shape = top_shapes.empty() ? Mat() : top_shapes[0];
34+
35+
int elempack = 1;
36+
if (shape.dims == 1) elempack = opt.use_shader_pack8 && shape.w % 8 == 0 ? 8 : shape.w % 4 == 0 ? 4 : 1;
37+
if (shape.dims == 2) elempack = opt.use_shader_pack8 && shape.h % 8 == 0 ? 8 : shape.h % 4 == 0 ? 4 : 1;
38+
if (shape.dims == 3 || shape.dims == 4) elempack = opt.use_shader_pack8 && shape.c % 8 == 0 ? 8 : shape.c % 4 == 0 ? 4 : 1;
39+
40+
size_t elemsize;
41+
if (opt.use_fp16_storage)
42+
{
43+
elemsize = elempack * 2u;
44+
}
45+
else if (opt.use_fp16_packed)
46+
{
47+
elemsize = elempack == 1 ? 4u : elempack * 2u;
48+
}
49+
else
50+
{
51+
elemsize = elempack * 4u;
52+
}
53+
54+
Mat shape_packed;
55+
if (shape.dims == 1) shape_packed = Mat(shape.w / elempack, (void*)0, elemsize, elempack);
56+
if (shape.dims == 2) shape_packed = Mat(shape.w, shape.h / elempack, (void*)0, elemsize, elempack);
57+
if (shape.dims == 3) shape_packed = Mat(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack);
58+
if (shape.dims == 4) shape_packed = Mat(shape.w, shape.h, shape.d, shape.c / elempack, (void*)0, elemsize, elempack);
59+
60+
std::vector<vk_specialization_type> specializations(0 + 5);
61+
specializations[0 + 0].i = shape_packed.dims;
62+
specializations[0 + 1].i = shape_packed.w;
63+
specializations[0 + 2].i = shape_packed.h * shape_packed.d;
64+
specializations[0 + 3].i = shape_packed.c;
65+
specializations[0 + 4].i = shape_packed.cstep;
66+
67+
Mat local_size_xyz;
68+
if (shape_packed.dims == 1)
69+
{
70+
local_size_xyz.w = std::min(64, shape_packed.w);
71+
local_size_xyz.h = 1;
72+
local_size_xyz.c = 1;
73+
}
74+
if (shape_packed.dims == 2)
75+
{
76+
local_size_xyz.w = std::min(8, shape_packed.w);
77+
local_size_xyz.h = std::min(8, shape_packed.h);
78+
local_size_xyz.c = 1;
79+
}
80+
if (shape_packed.dims == 3)
81+
{
82+
local_size_xyz.w = std::min(4, shape_packed.w);
83+
local_size_xyz.h = std::min(4, shape_packed.h);
84+
local_size_xyz.c = std::min(4, shape_packed.c);
85+
}
86+
if (shape_packed.dims == 4)
87+
{
88+
local_size_xyz.w = std::min(4, shape_packed.w);
89+
local_size_xyz.h = std::min(4, shape_packed.h * shape_packed.d);
90+
local_size_xyz.c = std::min(4, shape_packed.c);
91+
}
92+
93+
// pack1
94+
if (shape.dims == 0 || elempack == 1)
95+
{
96+
pipeline_gelu = new Pipeline(vkdev);
97+
pipeline_gelu->set_optimal_local_size_xyz(local_size_xyz);
98+
pipeline_gelu->create(LayerShaderType::gelu, opt, specializations);
99+
}
100+
101+
// pack4
102+
if (shape.dims == 0 || elempack == 4)
103+
{
104+
pipeline_gelu_pack4 = new Pipeline(vkdev);
105+
pipeline_gelu_pack4->set_optimal_local_size_xyz(local_size_xyz);
106+
pipeline_gelu_pack4->create(LayerShaderType::gelu_pack4, opt, specializations);
107+
}
108+
109+
// pack8
110+
if ((opt.use_shader_pack8 && shape.dims == 0) || elempack == 8)
111+
{
112+
pipeline_gelu_pack8 = new Pipeline(vkdev);
113+
pipeline_gelu_pack8->set_optimal_local_size_xyz(local_size_xyz);
114+
pipeline_gelu_pack8->create(LayerShaderType::gelu_pack8, opt, specializations);
115+
}
116+
117+
return 0;
118+
}
119+
120+
int GELU_vulkan::destroy_pipeline(const Option& /*opt*/)
121+
{
122+
delete pipeline_gelu;
123+
pipeline_gelu = 0;
124+
125+
delete pipeline_gelu_pack4;
126+
pipeline_gelu_pack4 = 0;
127+
128+
delete pipeline_gelu_pack8;
129+
pipeline_gelu_pack8 = 0;
130+
131+
return 0;
132+
}
133+
134+
int GELU_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& /*opt*/) const
135+
{
136+
int elempack = bottom_top_blob.elempack;
137+
138+
std::vector<VkMat> bindings(1);
139+
bindings[0] = bottom_top_blob;
140+
141+
std::vector<vk_constant_type> constants(5);
142+
constants[0].i = bottom_top_blob.dims;
143+
constants[1].i = bottom_top_blob.w;
144+
constants[2].i = bottom_top_blob.h * bottom_top_blob.d;
145+
constants[3].i = bottom_top_blob.c;
146+
constants[4].i = bottom_top_blob.cstep;
147+
148+
const Pipeline* pipeline = elempack == 8 ? pipeline_gelu_pack8
149+
: elempack == 4 ? pipeline_gelu_pack4
150+
: pipeline_gelu;
151+
152+
cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob);
153+
154+
return 0;
155+
}
156+
157+
int GELU_vulkan::forward_inplace(VkImageMat& bottom_top_blob, VkCompute& cmd, const Option& /*opt*/) const
158+
{
159+
int elempack = bottom_top_blob.elempack;
160+
161+
std::vector<VkImageMat> bindings(2);
162+
bindings[0] = bottom_top_blob;
163+
bindings[1] = bottom_top_blob;
164+
165+
std::vector<vk_constant_type> constants(5);
166+
constants[0].i = bottom_top_blob.dims;
167+
constants[1].i = bottom_top_blob.w;
168+
constants[2].i = bottom_top_blob.h * bottom_top_blob.d;
169+
constants[3].i = bottom_top_blob.c;
170+
constants[4].i = 0; //bottom_top_blob.cstep;
171+
172+
const Pipeline* pipeline = elempack == 8 ? pipeline_gelu_pack8
173+
: elempack == 4 ? pipeline_gelu_pack4
174+
: pipeline_gelu;
175+
176+
cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob);
177+
178+
return 0;
179+
}
180+
181+
} // namespace ncnn

src/layer/vulkan/gelu_vulkan.h

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
// Tencent is pleased to support the open source community by making ncnn available.
2+
//
3+
// Copyright (C) 2023 THL A29 Limited, a Tencent company. All rights reserved.
4+
//
5+
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6+
// in compliance with the License. You may obtain a copy of the License at
7+
//
8+
// https://opensource.org/licenses/BSD-3-Clause
9+
//
10+
// Unless required by applicable law or agreed to in writing, software distributed
11+
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12+
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13+
// specific language governing permissions and limitations under the License.
14+
15+
#ifndef LAYER_GELU_VULKAN_H
16+
#define LAYER_GELU_VULKAN_H
17+
18+
#include "gelu.h"
19+
20+
namespace ncnn {
21+
22+
class GELU_vulkan : virtual public GELU
23+
{
24+
public:
25+
GELU_vulkan();
26+
27+
virtual int create_pipeline(const Option& opt);
28+
virtual int destroy_pipeline(const Option& opt);
29+
30+
using GELU::forward_inplace;
31+
virtual int forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& opt) const;
32+
virtual int forward_inplace(VkImageMat& bottom_top_blob, VkCompute& cmd, const Option& opt) const;
33+
34+
public:
35+
Pipeline* pipeline_gelu;
36+
Pipeline* pipeline_gelu_pack4;
37+
Pipeline* pipeline_gelu_pack8;
38+
};
39+
40+
} // namespace ncnn
41+
42+
#endif // LAYER_GELU_VULKAN_H

src/layer/vulkan/shader/gelu.comp

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
// Tencent is pleased to support the open source community by making ncnn available.
2+
//
3+
// Copyright (C) 2023 THL A29 Limited, a Tencent company. All rights reserved.
4+
//
5+
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6+
// in compliance with the License. You may obtain a copy of the License at
7+
//
8+
// https://opensource.org/licenses/BSD-3-Clause
9+
//
10+
// Unless required by applicable law or agreed to in writing, software distributed
11+
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12+
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13+
// specific language governing permissions and limitations under the License.
14+
15+
#version 450
16+
17+
#if NCNN_fp16_storage
18+
#extension GL_EXT_shader_16bit_storage: require
19+
#endif
20+
#if NCNN_fp16_arithmetic
21+
#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
22+
#endif
23+
24+
#define shape_constant_id_offset 0
25+
layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
26+
layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
27+
layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
28+
layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
29+
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;
30+
31+
#if NCNN_image_shader
32+
layout (binding = 0) uniform unfp sampler3D bottom_blob_3d;
33+
layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d;
34+
#else
35+
layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; };
36+
#endif
37+
38+
layout (push_constant) uniform parameter
39+
{
40+
int dims;
41+
int w;
42+
int h;
43+
int c;
44+
int cstep;
45+
} p;
46+
47+
void main()
48+
{
49+
int gx = int(gl_GlobalInvocationID.x);
50+
int gy = int(gl_GlobalInvocationID.y);
51+
int gz = int(gl_GlobalInvocationID.z);
52+
53+
if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c))
54+
return;
55+
56+
#if NCNN_image_shader
57+
afp v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz));
58+
#else
59+
const int gi = gz * psc(cstep) + gy * psc(w) + gx;
60+
61+
afp v = buffer_ld1(bottom_top_blob_data, gi);
62+
#endif
63+
64+
// y = 0.5x * (1 + tanh(sqrt(2/Pi) * (x + 0.044715x^3)))
65+
v = 0.5f * v * (1.0f + tanh(0.79788452f * (v + 0.044715f * v * v * v)));
66+
67+
#if NCNN_image_shader
68+
image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v);
69+
#else
70+
buffer_st1(bottom_top_blob_data, gi, v);
71+
#endif
72+
}
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
// Tencent is pleased to support the open source community by making ncnn available.
2+
//
3+
// Copyright (C) 2023 THL A29 Limited, a Tencent company. All rights reserved.
4+
//
5+
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6+
// in compliance with the License. You may obtain a copy of the License at
7+
//
8+
// https://opensource.org/licenses/BSD-3-Clause
9+
//
10+
// Unless required by applicable law or agreed to in writing, software distributed
11+
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12+
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13+
// specific language governing permissions and limitations under the License.
14+
15+
#version 450
16+
17+
#if NCNN_fp16_storage
18+
#extension GL_EXT_shader_16bit_storage: require
19+
#endif
20+
#if NCNN_fp16_arithmetic
21+
#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
22+
#endif
23+
24+
#define shape_constant_id_offset 0
25+
layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
26+
layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
27+
layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
28+
layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
29+
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;
30+
31+
#if NCNN_image_shader
32+
layout (binding = 0) uniform unfp sampler3D bottom_blob_3d;
33+
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d;
34+
#else
35+
layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; };
36+
#endif
37+
38+
layout (push_constant) uniform parameter
39+
{
40+
int dims;
41+
int w;
42+
int h;
43+
int c;
44+
int cstep;
45+
} p;
46+
47+
void main()
48+
{
49+
int gx = int(gl_GlobalInvocationID.x);
50+
int gy = int(gl_GlobalInvocationID.y);
51+
int gz = int(gl_GlobalInvocationID.z);
52+
53+
if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c))
54+
return;
55+
56+
#if NCNN_image_shader
57+
afpvec4 v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz));
58+
#else
59+
const int gi = gz * psc(cstep) + gy * psc(w) + gx;
60+
61+
afpvec4 v = buffer_ld4(bottom_top_blob_data, gi);
62+
#endif
63+
64+
// y = 0.5x * (1 + tanh(sqrt(2/Pi) * (x + 0.044715x^3)))
65+
v = 0.5f * v * (1.0f + tanh(0.79788452f * (v + 0.044715f * v * v * v)));
66+
67+
#if NCNN_image_shader
68+
image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v);
69+
#else
70+
buffer_st4(bottom_top_blob_data, gi, v);
71+
#endif
72+
}

0 commit comments

Comments
 (0)