Skip to content

Commit 38d60f1

Browse files
committed
Split the loop
1 parent 914eaeb commit 38d60f1

File tree

1 file changed

+4
-1
lines changed

1 file changed

+4
-1
lines changed

src/computesim.nim

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ proc subgroupProc[A, B, C](wg: WorkGroupContext; numActiveThreads: uint32; barri
132132
let globalOffsetX = wg.gl_WorkGroupID.x * wg.gl_WorkGroupSize.x
133133
let globalOffsetY = wg.gl_WorkGroupID.y * wg.gl_WorkGroupSize.y
134134
let globalOffsetZ = wg.gl_WorkGroupID.z * wg.gl_WorkGroupSize.z
135+
# Setup thread contexts
135136
for threadId in 0..<numActiveThreads:
136137
threadContexts[threadId] = ThreadContext(
137138
gl_LocalInvocationID: uvec3(x, y, z),
@@ -142,7 +143,6 @@ proc subgroupProc[A, B, C](wg: WorkGroupContext; numActiveThreads: uint32; barri
142143
),
143144
gl_SubgroupInvocationID: threadId
144145
)
145-
threads[threadId] = compute(buffers, shared, args)
146146
# Update coordinates
147147
inc x
148148
if x >= wg.gl_WorkGroupSize.x:
@@ -151,6 +151,9 @@ proc subgroupProc[A, B, C](wg: WorkGroupContext; numActiveThreads: uint32; barri
151151
if y >= wg.gl_WorkGroupSize.y:
152152
y = 0
153153
inc z
154+
# Allocate all compute closures
155+
for threadId in 0..<numActiveThreads:
156+
threads[threadId] = compute(buffers, shared, args)
154157
# Run threads in lockstep
155158
runThreads(threads, wg, threadContexts, numActiveThreads, barrier)
156159

0 commit comments

Comments
 (0)