Skip to content

Commit e595743

Browse files
authored
Fix intrinsic LoadLocalRootTableConstant for optix (#7949)
Due to an older version of spec referred there was an inconsitency v1.29 2/20/2025 - [HitObject LoadLocalRootArgumentsConstant] Latest spec https://microsoft.github.io/DirectX-Specs/d3d/Raytracing.html#hitobject-loadlocalroottableconstant Refer: OptiX backend support for Shader Execution Reordering (SER) features as outlined in issue #6647. -
1 parent 4721b6e commit e595743

File tree

8 files changed

+108
-54
lines changed

8 files changed

+108
-54
lines changed

prelude/slang-cuda-prelude.h

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3644,22 +3644,23 @@ __forceinline__ __device__ void* optixTraverse(
36443644
r1);
36453645
}
36463646

3647-
static __forceinline__ __device__ bool optixHitObjectIsHit(OptixTraversableHandle* hitObj)
3647+
static __forceinline__ __device__ bool slangOptixHitObjectIsHit(OptixTraversableHandle* hitObj)
36483648
{
36493649
return optixHitObjectIsHit();
36503650
}
36513651

3652-
static __forceinline__ __device__ bool optixHitObjectIsMiss(OptixTraversableHandle* hitObj)
3652+
static __forceinline__ __device__ bool slangOptixHitObjectIsMiss(OptixTraversableHandle* hitObj)
36533653
{
36543654
return optixHitObjectIsMiss();
36553655
}
36563656

3657-
static __forceinline__ __device__ bool optixHitObjectIsNop(OptixTraversableHandle* hitObj)
3657+
static __forceinline__ __device__ bool slangOptixHitObjectIsNop(OptixTraversableHandle* hitObj)
36583658
{
36593659
return optixHitObjectIsNop();
36603660
}
36613661

3662-
static __forceinline__ __device__ uint optixHitObjectGetClusterId(OptixTraversableHandle* hitObj)
3662+
static __forceinline__ __device__ uint
3663+
slangOptixHitObjectGetClusterId(OptixTraversableHandle* hitObj)
36633664
{
36643665
return optixHitObjectGetClusterId();
36653666
}
@@ -3809,7 +3810,7 @@ static __forceinline__ __device__ void optixMakeHitObject(
38093810
0 /*numTransforms */);
38103811
}
38113812

3812-
static __forceinline__ __device__ void optixMakeNopHitObject(OptixTraversableHandle* Obj)
3813+
static __forceinline__ __device__ void slangOptixMakeNopHitObject(OptixTraversableHandle* Obj)
38133814
{
38143815
optixMakeNopHitObject();
38153816
}
@@ -3834,22 +3835,25 @@ static __forceinline__ __device__ RayDesc optixHitObjectGetRayDesc(OptixTraversa
38343835
return ray;
38353836
}
38363837

3837-
static __forceinline__ __device__ uint optixHitObjectGetInstanceIndex(OptixTraversableHandle* Obj)
3838+
static __forceinline__ __device__ uint
3839+
slangOptixHitObjectGetInstanceIndex(OptixTraversableHandle* Obj)
38383840
{
38393841
return optixHitObjectGetInstanceIndex();
38403842
}
38413843

3842-
static __forceinline__ __device__ uint optixHitObjectGetInstanceId(OptixTraversableHandle* Obj)
3844+
static __forceinline__ __device__ uint slangOptixHitObjectGetInstanceId(OptixTraversableHandle* Obj)
38433845
{
38443846
return optixHitObjectGetInstanceId();
38453847
}
38463848

3847-
static __forceinline__ __device__ uint optixHitObjectGetSbtGASIndex(OptixTraversableHandle* Obj)
3849+
static __forceinline__ __device__ uint
3850+
slangOptixHitObjectGetSbtGASIndex(OptixTraversableHandle* Obj)
38483851
{
38493852
return optixHitObjectGetSbtGASIndex();
38503853
}
38513854

3852-
static __forceinline__ __device__ uint optixHitObjectGetPrimitiveIndex(OptixTraversableHandle* Obj)
3855+
static __forceinline__ __device__ uint
3856+
slangOptixHitObjectGetPrimitiveIndex(OptixTraversableHandle* Obj)
38533857
{
38543858
return optixHitObjectGetPrimitiveIndex();
38553859
}
@@ -3888,22 +3892,17 @@ static __forceinline__ __device__ T optixHitObjectGetAttribute(OptixTraversableH
38883892
return result;
38893893
}
38903894

3891-
static __forceinline__ __device__ uint optixHitObjectGetSbtRecordIndex(OptixTraversableHandle* Obj)
3895+
static __forceinline__ __device__ uint
3896+
slangOptixHitObjectGetSbtRecordIndex(OptixTraversableHandle* Obj)
38923897
{
38933898
return optixHitObjectGetSbtRecordIndex();
38943899
}
38953900

38963901
static __forceinline__ __device__ uint
3897-
optixHitObjectSetSbtRecordIndex(OptixTraversableHandle* Obj, uint sbtRecordIndex)
3902+
slangOptixHitObjectSetSbtRecordIndex(OptixTraversableHandle* Obj, uint sbtRecordIndex)
38983903
{
38993904
optixHitObjectSetSbtRecordIndex(sbtRecordIndex); // returns void
3900-
return 0;
3901-
}
3902-
static __forceinline__ __device__ uint
3903-
optixHitObjectGetSbtDataPointer(OptixTraversableHandle* Obj, uint sbtRecordIndex)
3904-
{
3905-
optixHitObjectGetSbtDataPointer(); // returns void
3906-
return 0;
3905+
return sbtRecordIndex;
39073906
}
39083907
#endif
39093908
static const int kSlangTorchTensorMaxDim = 5;

source/core/slang-platform.cpp

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -174,12 +174,27 @@ SLANG_COMPILE_TIME_ASSERT(E_OUTOFMEMORY == SLANG_E_OUT_OF_MEMORY);
174174
}
175175

176176
#else // _WIN32
177-
178177
/* static */ SlangResult PlatformUtil::getInstancePath([[maybe_unused]] StringBuilder& out)
179178
{
180-
// On non Windows it's typically hard to get the instance path, so we'll say not implemented.
181-
// The meaning is also somewhat more ambiguous - is it the exe or the shared library path?
179+
#if defined(__linux__) || defined(__CYGWIN__)
180+
char path[PATH_MAX];
181+
ssize_t len = readlink("/proc/self/exe", path, sizeof(path) - 1);
182+
if (len == -1)
183+
{
184+
return SLANG_FAIL;
185+
}
186+
187+
path[len] = '\0';
188+
String pathString(path);
189+
190+
// We don't want the instance name, just the path to it
191+
out.clear();
192+
out.append(Path::getParentDirectory(pathString));
193+
194+
return out.getLength() > 0 ? SLANG_OK : SLANG_FAIL;
195+
#else
182196
return SLANG_E_NOT_IMPLEMENTED;
197+
#endif
183198
}
184199

185200
/* static */ SlangResult PlatformUtil::appendResult(

source/slang/hlsl.meta.slang

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20472,7 +20472,7 @@ struct HitObject
2047220472
__intrinsic_asm "($0 = NvMakeNop())";
2047320473
case glsl:
2047420474
__glslMakeNop(__return_val);
20475-
case cuda: __intrinsic_asm "optixMakeNopHitObject";
20475+
case cuda: __intrinsic_asm "slangOptixMakeNopHitObject";
2047620476
case spirv:
2047720477
spirv_asm
2047820478
{
@@ -20561,7 +20561,7 @@ struct HitObject
2056120561
{
2056220562
case hlsl: __intrinsic_asm ".IsMiss";
2056320563
case glsl: __intrinsic_asm "hitObjectIsMissNV($0)";
20564-
case cuda: __intrinsic_asm "optixHitObjectIsMiss";
20564+
case cuda: __intrinsic_asm "slangOptixHitObjectIsMiss";
2056520565
case spirv:
2056620566
return spirv_asm
2056720567
{
@@ -20582,7 +20582,7 @@ struct HitObject
2058220582
{
2058320583
case hlsl: __intrinsic_asm ".IsHit";
2058420584
case glsl: __intrinsic_asm "hitObjectIsHitNV($0)";
20585-
case cuda: __intrinsic_asm "optixHitObjectIsHit";
20585+
case cuda: __intrinsic_asm "slangOptixHitObjectIsHit";
2058620586
case spirv:
2058720587
return spirv_asm
2058820588
{
@@ -20603,7 +20603,7 @@ struct HitObject
2060320603
{
2060420604
case hlsl: __intrinsic_asm ".IsNop";
2060520605
case glsl: __intrinsic_asm "hitObjectIsEmptyNV($0)";
20606-
case cuda: __intrinsic_asm "optixHitObjectIsNop";
20606+
case cuda: __intrinsic_asm "slangOptixHitObjectIsNop";
2060720607
case spirv:
2060820608
return spirv_asm
2060920609
{
@@ -20655,7 +20655,7 @@ struct HitObject
2065520655
{
2065620656
case hlsl: __intrinsic_asm ".GetShaderTableIndex";
2065720657
case glsl: __intrinsic_asm "hitObjectGetShaderBindingTableRecordIndexNV($0)";
20658-
case cuda: __intrinsic_asm "optixHitObjectGetSbtRecordIndex";
20658+
case cuda: __intrinsic_asm "slangOptixHitObjectGetSbtRecordIndex";
2065920659
case spirv:
2066020660
return spirv_asm
2066120661
{
@@ -20675,7 +20675,7 @@ struct HitObject
2067520675
__target_switch
2067620676
{
2067720677
case hlsl: __intrinsic_asm ".SetShaderTableIndex";
20678-
case cuda: __intrinsic_asm "optixHitObjectSetSbtRecordIndex";
20678+
case cuda: __intrinsic_asm "slangOptixHitObjectSetSbtRecordIndex";
2067920679
}
2068020680
}
2068120681
/// Returns the instance index of a hit. Valid if the hit object represents a hit.
@@ -20689,7 +20689,7 @@ struct HitObject
2068920689
{
2069020690
case hlsl: __intrinsic_asm ".GetInstanceIndex";
2069120691
case glsl: __intrinsic_asm "hitObjectGetInstanceIdNV($0)";
20692-
case cuda: __intrinsic_asm "optixHitObjectGetInstanceIndex";
20692+
case cuda: __intrinsic_asm "slangOptixHitObjectGetInstanceIndex";
2069320693
case spirv:
2069420694
return spirv_asm
2069520695
{
@@ -20711,7 +20711,7 @@ struct HitObject
2071120711
{
2071220712
case hlsl: __intrinsic_asm ".GetInstanceID";
2071320713
case glsl: __intrinsic_asm "hitObjectGetInstanceCustomIndexNV($0)";
20714-
case cuda: __intrinsic_asm "optixHitObjectGetInstanceId";
20714+
case cuda: __intrinsic_asm "slangOptixHitObjectGetInstanceId";
2071520715
case spirv:
2071620716
return spirv_asm
2071720717
{
@@ -20733,7 +20733,7 @@ struct HitObject
2073320733
{
2073420734
case hlsl: __intrinsic_asm ".GetGeometryIndex";
2073520735
case glsl: __intrinsic_asm "hitObjectGetGeometryIndexNV($0)";
20736-
case cuda: __intrinsic_asm "optixHitObjectGetSbtGASIndex";
20736+
case cuda: __intrinsic_asm "slangOptixHitObjectGetSbtGASIndex";
2073720737
case spirv:
2073820738
return spirv_asm
2073920739
{
@@ -20755,7 +20755,7 @@ struct HitObject
2075520755
{
2075620756
case hlsl: __intrinsic_asm ".GetPrimitiveIndex";
2075720757
case glsl: __intrinsic_asm "hitObjectGetPrimitiveIndexNV($0)";
20758-
case cuda: __intrinsic_asm "optixHitObjectGetPrimitiveIndex";
20758+
case cuda: __intrinsic_asm "slangOptixHitObjectGetPrimitiveIndex";
2075920759
case spirv:
2076020760
return spirv_asm
2076120761
{
@@ -20799,7 +20799,7 @@ struct HitObject
2079920799
{
2080020800
case hlsl: __intrinsic_asm ".GetClusterID";
2080120801
case glsl: __intrinsic_asm "hitObjectGetClusterIdNV($0)";
20802-
case cuda: __intrinsic_asm "optixHitObjectGetClusterId";
20802+
case cuda: __intrinsic_asm "slangOptixHitObjectGetClusterId";
2080320803
case spirv:
2080420804
return spirv_asm
2080520805
{

tests/cuda/optix-cluster.slang

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
//TEST:SIMPLE(filecheck=CHECK): -target cuda
2+
//TEST:SIMPLE(filecheck=CHECK-PTX): -target ptx -Xnvrtc -I"./external/optix-dev/include/" -entry closestHitShaderA
23
//CHECK: __global__ void __closesthit__closestHitShaderA
34
//CHECK: optixGetClusterId
45
struct RayPayload
@@ -9,6 +10,7 @@ struct RayPayload
910
[shader("closesthit")]
1011
void closestHitShaderA(inout RayPayload payload, in BuiltInTriangleIntersectionAttributes attr)
1112
{
13+
//CHECK-PTX:_optix_get_cluster_id
1214
int clusterId = GetClusterID();
1315
float4 color = float4(0, 0, 0, 1);
1416
if (clusterId >= 0)

tests/cuda/optix-coopvec.slang

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
//TEST:SIMPLE(filecheck=CHECK): -target cuda -capability optix_coopvec
2+
//TEST:SIMPLE(filecheck=CHECK-PTX): -target ptx -Xnvrtc -I"./external/optix-dev/include/"
23

4+
// CHECK-PTX: add.f32
35
// CHECK: optixCoopVecLoad
46
// CHECK: OptixCoopVec
57
// CHECK: optixCoopVecTanh

tests/cuda/optix-hit-attributes.slang

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
//TEST:SIMPLE(filecheck=CHECK): -target cuda
22
//CHECK: __global__ void __closesthit__closestHitShaderA
3+
//TEST:SIMPLE(filecheck=CHECK-PTX): -target ptx -Xnvrtc -I"./external/optix-dev/include/"
34
struct RayPayload
45
{
56
float4 color;
@@ -8,6 +9,7 @@ struct RayPayload
89
[shader("closesthit")]
910
void closestHitShaderA(inout RayPayload payload, in BuiltInTriangleIntersectionAttributes attr)
1011
{
12+
//CHECK-PTX: _optix_read_primitive_idx
1113
uint primitiveIndex = PrimitiveIndex();
1214
float4 color = float4(0, 0, 0, 1);
1315
color[primitiveIndex] = 1;

tests/cuda/optix-ignore-hit.slang

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
// optix-ignore-hit.slang
22
//TEST:SIMPLE(filecheck=CHECK): -target cuda -entry anyHitShader
3+
//TEST:SIMPLE(filecheck=CHECK-PTX): -target ptx -Xnvrtc -I"./external/optix-dev/include/"
34
//CHECK: HitBuffer_insert_0(((HitBuffer_0 *)getOptiXRayPayloadPtr()), hit_0.t_0);
45
//CHECK: optixIgnoreIntersection
56

7+
//CHECK-PTX: _optix_get_ray_tmax
68

79
struct HitBuffer
810
{
@@ -28,6 +30,7 @@ void anyHitShader(inout HitBuffer rayHitBuffer)
2830
// Modify the inout parameter
2931
rayHitBuffer.insert(hit.t);
3032

33+
// CHECK-PTX: _optix_ignore_intersection
3134
// Early exit - should not lose the modification to inout rayHitBuffer
3235
if (hit.t < rayHitBuffer.last)
3336
IgnoreHit();

0 commit comments

Comments
 (0)