-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathspherical_harmonic_coefficients_sum.comp
88 lines (78 loc) · 3.49 KB
/
spherical_harmonic_coefficients_sum.comp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#version 450
#extension GL_EXT_scalar_block_layout : enable
#extension GL_KHR_shader_subgroup_arithmetic : enable
const vec3[9] _9_ZERO_VEC3S = vec3[9](vec3(0), vec3(0), vec3(0), vec3(0), vec3(0), vec3(0), vec3(0), vec3(0), vec3(0));
layout (set = 0, binding = 0, scalar) buffer PingPongBuffer {
vec3 data[][9];
};
layout (push_constant, std430) uniform PushConstant{
uint srcOffset;
uint count;
uint dstOffset;
} pc;
layout (local_size_x = 256) in;
shared vec3 subgroupReduction[32][9]; // gl_NumSubgroups ≤ 32 (subgroup size must be at least 8).
void main(){
vec3 pingPongData[] = gl_GlobalInvocationID.x < pc.count ? data[pc.srcOffset + gl_GlobalInvocationID.x] : _9_ZERO_VEC3S;
vec3 reductions[] = vec3[9](
subgroupAdd(pingPongData[0]),
subgroupAdd(pingPongData[1]),
subgroupAdd(pingPongData[2]),
subgroupAdd(pingPongData[3]),
subgroupAdd(pingPongData[4]),
subgroupAdd(pingPongData[5]),
subgroupAdd(pingPongData[6]),
subgroupAdd(pingPongData[7]),
subgroupAdd(pingPongData[8])
);
if (subgroupElect()){
subgroupReduction[gl_SubgroupID] = reductions;
}
memoryBarrierShared();
barrier();
// For subgroup size 8, use subgroup whose ID is 0..4 to reduce the data one more time.
// TODO: this code is not tested yet.
if ((gl_SubgroupSize == 8U) && (gl_SubgroupID < 4U)){
reductions = vec3[9](
subgroupAdd(pingPongData[0]),
subgroupAdd(pingPongData[1]),
subgroupAdd(pingPongData[2]),
subgroupAdd(pingPongData[3]),
subgroupAdd(pingPongData[4]),
subgroupAdd(pingPongData[5]),
subgroupAdd(pingPongData[6]),
subgroupAdd(pingPongData[7]),
subgroupAdd(pingPongData[8])
);
if (subgroupElect()){
subgroupReduction[gl_SubgroupID] = reductions;
}
memoryBarrierShared();
barrier();
}
uint pingPongDataElementCount = (gl_SubgroupSize == 8U) ? 4U : gl_NumSubgroups;
if (gl_SubgroupID == 0U){
pingPongData = gl_SubgroupInvocationID < pingPongDataElementCount ? subgroupReduction[gl_SubgroupInvocationID] : _9_ZERO_VEC3S;
// TODO: Following code compile successfully in glslc, but failed in SPIRV-Cross (SPIR-V -> MSL). Fix when available.
// data[pc.dstOffset + gl_WorkGroupID.x] = vec3[](
// subgroupAdd(pingPongData[0]),
// subgroupAdd(pingPongData[1]),
// subgroupAdd(pingPongData[2]),
// subgroupAdd(pingPongData[3]),
// subgroupAdd(pingPongData[4]),
// subgroupAdd(pingPongData[5]),
// subgroupAdd(pingPongData[6]),
// subgroupAdd(pingPongData[7]),
// subgroupAdd(pingPongData[8])
// );
data[pc.dstOffset + gl_WorkGroupID.x][0] = subgroupAdd(pingPongData[0]);
data[pc.dstOffset + gl_WorkGroupID.x][1] = subgroupAdd(pingPongData[1]);
data[pc.dstOffset + gl_WorkGroupID.x][2] = subgroupAdd(pingPongData[2]);
data[pc.dstOffset + gl_WorkGroupID.x][3] = subgroupAdd(pingPongData[3]);
data[pc.dstOffset + gl_WorkGroupID.x][4] = subgroupAdd(pingPongData[4]);
data[pc.dstOffset + gl_WorkGroupID.x][5] = subgroupAdd(pingPongData[5]);
data[pc.dstOffset + gl_WorkGroupID.x][6] = subgroupAdd(pingPongData[6]);
data[pc.dstOffset + gl_WorkGroupID.x][7] = subgroupAdd(pingPongData[7]);
data[pc.dstOffset + gl_WorkGroupID.x][8] = subgroupAdd(pingPongData[8]);
}
}