Skip to content

Commit 3c79ff9

Browse files
committed
going into extremes to reduce more bytes, 32bytes saved
1 parent 38d60f1 commit 3c79ff9

File tree

4 files changed

+9
-10
lines changed

4 files changed

+9
-10
lines changed

src/computesim.nim

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -140,8 +140,7 @@ proc subgroupProc[A, B, C](wg: WorkGroupContext; numActiveThreads: uint32; barri
140140
globalOffsetX + x,
141141
globalOffsetY + y,
142142
globalOffsetZ + z
143-
),
144-
gl_SubgroupInvocationID: threadId
143+
)
145144
)
146145
# Update coordinates
147146
inc x

src/computesim/core.nim

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,6 @@ type
9696
ThreadContext* = object
9797
gl_GlobalInvocationID*: UVec3
9898
gl_LocalInvocationID*: UVec3
99-
gl_SubgroupInvocationID*: uint32
10099

101100
BarrierHandle* = object
102101
x: ptr Barrier
@@ -129,8 +128,8 @@ proc wait*(m: BarrierHandle) {.inline.} =
129128
wait(m.x[])
130129
131130
type
132-
ThreadClosure* = iterator (iterArg: SubgroupResult,
133-
wg: WorkGroupContext, thread: ThreadContext): SubgroupCommand
131+
ThreadClosure* = iterator (iterArg: SubgroupResult, wg: WorkGroupContext,
132+
thread: ThreadContext, threadId: uint32): SubgroupCommand
134133
SubgroupResults* = array[SubgroupSize, SubgroupResult]
135134
SubgroupCommands* = array[SubgroupSize, SubgroupCommand]
136135
SubgroupThreadIDs* = array[SubgroupSize, uint32]

src/computesim/lockstep.nim

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ proc runThreads*(threads: SubgroupThreads; workGroup: WorkGroupContext,
6262
threadStates[threadId] == running or canReconverge or canPassBarrier:
6363
madeProgress = true
6464
{.cast(gcsafe).}:
65-
commands[threadId] = threads[threadId](results[threadId], workGroup, threadContexts[threadId])
65+
commands[threadId] = threads[threadId](results[threadId], workGroup, threadContexts[threadId], threadId)
6666
if finished(threads[threadId]):
6767
threadStates[threadId] = finished
6868
elif commands[threadId].kind == barrier:

src/computesim/transform.nim

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -153,8 +153,7 @@ proc generateWorkGroupTemplates(wgSym: NimNode): NimNode =
153153
proc generateThreadTemplates(threadSym: NimNode): NimNode =
154154
generateTemplates(threadSym, [
155155
"gl_GlobalInvocationID",
156-
"gl_LocalInvocationID",
157-
"gl_SubgroupInvocationID"
156+
"gl_LocalInvocationID"
158157
])
159158

160159
proc isDiscard(n: NimNode, op: SubgroupOp): bool =
@@ -273,16 +272,18 @@ macro computeShader*(prc: untyped): untyped =
273272
# Create symbols for both contexts
274273
let wgSym = genSym(nskParam, "wg")
275274
let threadSym = genSym(nskParam, "thread")
275+
let tidSym = genSym(nskParam, "threadId")
276276
# Generate template declarations for both contexts
277277
let wgTemplates = generateWorkGroupTemplates(wgSym)
278278
let threadTemplates = generateThreadTemplates(threadSym)
279279

280280
result = quote do:
281281
proc `procName`(): ThreadClosure =
282282
iterator (`iterArg`: SubgroupResult, `wgSym`: WorkGroupContext,
283-
`threadSym`: ThreadContext): SubgroupCommand =
284-
`wgTemplates`
283+
`threadSym`: ThreadContext, `tidSym`: uint32): SubgroupCommand =
284+
template gl_SubgroupInvocationID(): uint32 {.used.} = `tidSym`
285285
`threadTemplates`
286+
`wgTemplates`
286287
`traversedBody`
287288

288289
# Now inject the parameters and pragmas from original proc

0 commit comments

Comments
 (0)