kompute_tests/test2/shader.comp

37 lines
1000 B
Plaintext
Raw Permalink Normal View History

2023-12-06 23:13:26 +01:00
#version 450
// clang-format off
// The number of threads spawned per-workgroup, these are substituted by the
// program pre-compilation
layout(
2023-12-07 16:25:54 +01:00
local_size_x = __lcsize_x__,
local_size_y = __lcsize_y__,
local_size_z = __lcsize_z__
2023-12-06 23:13:26 +01:00
) in;
// The buffers are provided via the tensors
layout(binding = 0) buffer tensorA { float matA[]; };
layout(binding = 1) buffer tensorB { float matB[]; };
layout(binding = 2) buffer tensorC { float matC[]; };
2023-12-07 16:25:54 +01:00
// specialization constant
2023-12-06 23:13:26 +01:00
layout(constant_id = 0) const float tensor_size_f = 0;
2023-12-07 16:25:54 +01:00
// each thread calculates just matC[id.y][id.x]
2023-12-06 23:13:26 +01:00
void main()
{
2023-12-07 16:25:54 +01:00
uint tensor_size_u = uint(tensor_size_f);
// thread ID in the workgroup and workgroup ID
uvec3 tid = gl_LocalInvocationID;
uvec3 gid = gl_WorkGroupID;
uvec3 id = gl_GlobalInvocationID;
// Cyx = sum(k, Ayk * Bkx)
float acc = 0;
uint y = id.y * tensor_size_u;
uint x = id.x;
for (uint k = 0; k < tensor_size_u; k++) {
acc += matA[y + k] * matB[x + k * tensor_size_u];
}
matC[y + id.x] = acc;
2023-12-06 23:13:26 +01:00
}