41#include "nvHLSLExtnsInternal.h"
61int NvShfl(
int val, uint srcLane,
int width = NV_WARP_SIZE)
63 uint index = g_NvidiaExt.IncrementCounter();
64 g_NvidiaExt[index].src0u.x = val;
65 g_NvidiaExt[index].src0u.y = srcLane;
66 g_NvidiaExt[index].src0u.z = __NvGetShflMaskFromWidth(width);
67 g_NvidiaExt[index].opcode = NV_EXTN_OP_SHFL;
70 return g_NvidiaExt.IncrementCounter();
73int2 NvShfl(int2 val, uint srcLane,
int width = NV_WARP_SIZE)
75 int x = NvShfl(val.x, srcLane, width);
76 int y = NvShfl(val.y, srcLane, width);
80int4 NvShfl(int4 val, uint srcLane,
int width = NV_WARP_SIZE)
82 int x = NvShfl(val.x, srcLane, width);
83 int y = NvShfl(val.y, srcLane, width);
84 int z = NvShfl(val.z, srcLane, width);
85 int w = NvShfl(val.w, srcLane, width);
86 return int4(x, y, z, w);
92int NvShflUp(
int val, uint delta,
int width = NV_WARP_SIZE)
94 uint index = g_NvidiaExt.IncrementCounter();
95 g_NvidiaExt[index].src0u.x = val;
96 g_NvidiaExt[index].src0u.y = delta;
97 g_NvidiaExt[index].src0u.z = (NV_WARP_SIZE - width) << 8;
98 g_NvidiaExt[index].opcode = NV_EXTN_OP_SHFL_UP;
99 return g_NvidiaExt.IncrementCounter();
105int NvShflDown(
int val, uint delta,
int width = NV_WARP_SIZE)
107 uint index = g_NvidiaExt.IncrementCounter();
108 g_NvidiaExt[index].src0u.x = val;
109 g_NvidiaExt[index].src0u.y = delta;
110 g_NvidiaExt[index].src0u.z = __NvGetShflMaskFromWidth(width);
111 g_NvidiaExt[index].opcode = NV_EXTN_OP_SHFL_DOWN;
112 return g_NvidiaExt.IncrementCounter();
118int NvShflXor(
int val, uint laneMask,
int width = NV_WARP_SIZE)
120 uint index = g_NvidiaExt.IncrementCounter();
121 g_NvidiaExt[index].src0u.x = val;
122 g_NvidiaExt[index].src0u.y = laneMask;
123 g_NvidiaExt[index].src0u.z = __NvGetShflMaskFromWidth(width);
124 g_NvidiaExt[index].opcode = NV_EXTN_OP_SHFL_XOR;
125 return g_NvidiaExt.IncrementCounter();
134uint NvAny(
int predicate)
136 uint index = g_NvidiaExt.IncrementCounter();
137 g_NvidiaExt[index].src0u.x = predicate;
138 g_NvidiaExt[index].opcode = NV_EXTN_OP_VOTE_ANY;
139 return g_NvidiaExt.IncrementCounter();
143uint NvAll(
int predicate)
145 uint index = g_NvidiaExt.IncrementCounter();
146 g_NvidiaExt[index].src0u.x = predicate;
147 g_NvidiaExt[index].opcode = NV_EXTN_OP_VOTE_ALL;
148 return g_NvidiaExt.IncrementCounter();
152uint NvBallot(
int predicate)
154 uint index = g_NvidiaExt.IncrementCounter();
155 g_NvidiaExt[index].src0u.x = predicate;
156 g_NvidiaExt[index].opcode = NV_EXTN_OP_VOTE_BALLOT;
157 return g_NvidiaExt.IncrementCounter();
168 uint index = g_NvidiaExt.IncrementCounter();
169 g_NvidiaExt[index].opcode = NV_EXTN_OP_GET_LANE_ID;
170 return g_NvidiaExt.IncrementCounter();
174uint NvGetSpecial(uint subOpCode)
176 return __NvGetSpecial(subOpCode);
188uint NvInterlockedAddFp16x2(RWByteAddressBuffer uav, uint byteAddress, uint fp16x2Val)
190 return __NvAtomicOpFP16x2(uav, byteAddress, fp16x2Val, NV_EXTN_ATOM_ADD);
193uint NvInterlockedMinFp16x2(RWByteAddressBuffer uav, uint byteAddress, uint fp16x2Val)
195 return __NvAtomicOpFP16x2(uav, byteAddress, fp16x2Val, NV_EXTN_ATOM_MIN);
198uint NvInterlockedMaxFp16x2(RWByteAddressBuffer uav, uint byteAddress, uint fp16x2Val)
200 return __NvAtomicOpFP16x2(uav, byteAddress, fp16x2Val, NV_EXTN_ATOM_MAX);
205uint NvInterlockedAddFp16x2(RWByteAddressBuffer uav, uint byteAddress, float2 val)
207 return __NvAtomicOpFP16x2(uav, byteAddress, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_ADD);
210uint NvInterlockedMinFp16x2(RWByteAddressBuffer uav, uint byteAddress, float2 val)
212 return __NvAtomicOpFP16x2(uav, byteAddress, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_MIN);
215uint NvInterlockedMaxFp16x2(RWByteAddressBuffer uav, uint byteAddress, float2 val)
217 return __NvAtomicOpFP16x2(uav, byteAddress, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_MAX);
229uint NvInterlockedAddFp16x2(RWTexture1D<float2> uav, uint address, uint fp16x2Val)
231 return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_ADD);
234uint NvInterlockedMinFp16x2(RWTexture1D<float2> uav, uint address, uint fp16x2Val)
236 return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MIN);
239uint NvInterlockedMaxFp16x2(RWTexture1D<float2> uav, uint address, uint fp16x2Val)
241 return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MAX);
244uint NvInterlockedAddFp16x2(RWTexture2D<float2> uav, uint2 address, uint fp16x2Val)
246 return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_ADD);
249uint NvInterlockedMinFp16x2(RWTexture2D<float2> uav, uint2 address, uint fp16x2Val)
251 return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MIN);
254uint NvInterlockedMaxFp16x2(RWTexture2D<float2> uav, uint2 address, uint fp16x2Val)
256 return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MAX);
259uint NvInterlockedAddFp16x2(RWTexture3D<float2> uav, uint3 address, uint fp16x2Val)
261 return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_ADD);
264uint NvInterlockedMinFp16x2(RWTexture3D<float2> uav, uint3 address, uint fp16x2Val)
266 return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MIN);
269uint NvInterlockedMaxFp16x2(RWTexture3D<float2> uav, uint3 address, uint fp16x2Val)
271 return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MAX);
276uint NvInterlockedAddFp16x2(RWTexture1D<float2> uav, uint address, float2 val)
278 return __NvAtomicOpFP16x2(uav, address, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_ADD);
281uint NvInterlockedMinFp16x2(RWTexture1D<float2> uav, uint address, float2 val)
283 return __NvAtomicOpFP16x2(uav, address, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_MIN);
286uint NvInterlockedMaxFp16x2(RWTexture1D<float2> uav, uint address, float2 val)
288 return __NvAtomicOpFP16x2(uav, address, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_MAX);
291uint NvInterlockedAddFp16x2(RWTexture2D<float2> uav, uint2 address, float2 val)
293 return __NvAtomicOpFP16x2(uav, address, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_ADD);
296uint NvInterlockedMinFp16x2(RWTexture2D<float2> uav, uint2 address, float2 val)
298 return __NvAtomicOpFP16x2(uav, address, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_MIN);
301uint NvInterlockedMaxFp16x2(RWTexture2D<float2> uav, uint2 address, float2 val)
303 return __NvAtomicOpFP16x2(uav, address, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_MAX);
306uint NvInterlockedAddFp16x2(RWTexture3D<float2> uav, uint3 address, float2 val)
308 return __NvAtomicOpFP16x2(uav, address, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_ADD);
311uint NvInterlockedMinFp16x2(RWTexture3D<float2> uav, uint3 address, float2 val)
313 return __NvAtomicOpFP16x2(uav, address, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_MIN);
316uint NvInterlockedMaxFp16x2(RWTexture3D<float2> uav, uint3 address, float2 val)
318 return __NvAtomicOpFP16x2(uav, address, __fp32x2Tofp16x2(val), NV_EXTN_ATOM_MAX);
331uint2 NvInterlockedAddFp16x4(RWTexture1D<float4> uav, uint address, uint2 fp16x2Val)
333 return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_ADD);
336uint2 NvInterlockedMinFp16x4(RWTexture1D<float4> uav, uint address, uint2 fp16x2Val)
338 return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MIN);
341uint2 NvInterlockedMaxFp16x4(RWTexture1D<float4> uav, uint address, uint2 fp16x2Val)
343 return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MAX);
346uint2 NvInterlockedAddFp16x4(RWTexture2D<float4> uav, uint2 address, uint2 fp16x2Val)
348 return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_ADD);
351uint2 NvInterlockedMinFp16x4(RWTexture2D<float4> uav, uint2 address, uint2 fp16x2Val)
353 return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MIN);
356uint2 NvInterlockedMaxFp16x4(RWTexture2D<float4> uav, uint2 address, uint2 fp16x2Val)
358 return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MAX);
361uint2 NvInterlockedAddFp16x4(RWTexture3D<float4> uav, uint3 address, uint2 fp16x2Val)
363 return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_ADD);
366uint2 NvInterlockedMinFp16x4(RWTexture3D<float4> uav, uint3 address, uint2 fp16x2Val)
368 return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MIN);
371uint2 NvInterlockedMaxFp16x4(RWTexture3D<float4> uav, uint3 address, uint2 fp16x2Val)
373 return __NvAtomicOpFP16x2(uav, address, fp16x2Val, NV_EXTN_ATOM_MAX);
377uint2 NvInterlockedAddFp16x4(RWTexture1D<float4> uav, uint address, float4 val)
379 return __NvAtomicOpFP16x2(uav, address, __fp32x4Tofp16x4(val), NV_EXTN_ATOM_ADD);
382uint2 NvInterlockedMinFp16x4(RWTexture1D<float4> uav, uint address, float4 val)
384 return __NvAtomicOpFP16x2(uav, address, __fp32x4Tofp16x4(val), NV_EXTN_ATOM_MIN);
387uint2 NvInterlockedMaxFp16x4(RWTexture1D<float4> uav, uint address, float4 val)
389 return __NvAtomicOpFP16x2(uav, address, __fp32x4Tofp16x4(val), NV_EXTN_ATOM_MAX);
392uint2 NvInterlockedAddFp16x4(RWTexture2D<float4> uav, uint2 address, float4 val)
394 return __NvAtomicOpFP16x2(uav, address, __fp32x4Tofp16x4(val), NV_EXTN_ATOM_ADD);
397uint2 NvInterlockedMinFp16x4(RWTexture2D<float4> uav, uint2 address, float4 val)
399 return __NvAtomicOpFP16x2(uav, address, __fp32x4Tofp16x4(val), NV_EXTN_ATOM_MIN);
402uint2 NvInterlockedMaxFp16x4(RWTexture2D<float4> uav, uint2 address, float4 val)
404 return __NvAtomicOpFP16x2(uav, address, __fp32x4Tofp16x4(val), NV_EXTN_ATOM_MAX);
407uint2 NvInterlockedAddFp16x4(RWTexture3D<float4> uav, uint3 address, float4 val)
409 return __NvAtomicOpFP16x2(uav, address, __fp32x4Tofp16x4(val), NV_EXTN_ATOM_ADD);
412uint2 NvInterlockedMinFp16x4(RWTexture3D<float4> uav, uint3 address, float4 val)
414 return __NvAtomicOpFP16x2(uav, address, __fp32x4Tofp16x4(val), NV_EXTN_ATOM_MIN);
417uint2 NvInterlockedMaxFp16x4(RWTexture3D<float4> uav, uint3 address, float4 val)
419 return __NvAtomicOpFP16x2(uav, address, __fp32x4Tofp16x4(val), NV_EXTN_ATOM_MAX);
431float NvInterlockedAddFp32(RWByteAddressBuffer uav, uint byteAddress,
float val)
433 return __NvAtomicAddFP32(uav, byteAddress, val);
443float NvInterlockedAddFp32(RWTexture1D<float> uav, uint address,
float val)
445 return __NvAtomicAddFP32(uav, address, val);
448float NvInterlockedAddFp32(RWTexture2D<float> uav, uint2 address,
float val)
450 return __NvAtomicAddFP32(uav, address, val);
453float NvInterlockedAddFp32(RWTexture3D<float> uav, uint3 address,
float val)
455 return __NvAtomicAddFP32(uav, address, val);
468uint2 NvInterlockedAddUint64(RWByteAddressBuffer uav, uint byteAddress, uint2 value)
470 return __NvAtomicOpUINT64(uav, byteAddress, value, NV_EXTN_ATOM_ADD);
473uint2 NvInterlockedMaxUint64(RWByteAddressBuffer uav, uint byteAddress, uint2 value)
475 return __NvAtomicOpUINT64(uav, byteAddress, value, NV_EXTN_ATOM_MAX);
478uint2 NvInterlockedMinUint64(RWByteAddressBuffer uav, uint byteAddress, uint2 value)
480 return __NvAtomicOpUINT64(uav, byteAddress, value, NV_EXTN_ATOM_MIN);
483uint2 NvInterlockedAndUint64(RWByteAddressBuffer uav, uint byteAddress, uint2 value)
485 return __NvAtomicOpUINT64(uav, byteAddress, value, NV_EXTN_ATOM_AND);
488uint2 NvInterlockedOrUint64(RWByteAddressBuffer uav, uint byteAddress, uint2 value)
490 return __NvAtomicOpUINT64(uav, byteAddress, value, NV_EXTN_ATOM_OR);
493uint2 NvInterlockedXorUint64(RWByteAddressBuffer uav, uint byteAddress, uint2 value)
495 return __NvAtomicOpUINT64(uav, byteAddress, value, NV_EXTN_ATOM_XOR);
498uint2 NvInterlockedCompareExchangeUint64(RWByteAddressBuffer uav, uint byteAddress, uint2 compare_value, uint2 value)
500 return __NvAtomicCompareExchangeUINT64(uav, byteAddress, compare_value, value);
503uint2 NvInterlockedExchangeUint64(RWByteAddressBuffer uav, uint byteAddress, uint2 value)
505 return __NvAtomicOpUINT64(uav, byteAddress, value, NV_EXTN_ATOM_SWAP);
515uint2 NvInterlockedAddUint64(RWTexture1D<uint2> uav, uint address, uint2 value)
517 return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_ADD);
520uint2 NvInterlockedMaxUint64(RWTexture1D<uint2> uav, uint address, uint2 value)
522 return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_MAX);
525uint2 NvInterlockedMinUint64(RWTexture1D<uint2> uav, uint address, uint2 value)
527 return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_MIN);
530uint2 NvInterlockedAndUint64(RWTexture1D<uint2> uav, uint address, uint2 value)
532 return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_AND);
535uint2 NvInterlockedOrUint64(RWTexture1D<uint2> uav, uint address, uint2 value)
537 return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_OR);
540uint2 NvInterlockedXorUint64(RWTexture1D<uint2> uav, uint address, uint2 value)
542 return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_XOR);
545uint2 NvInterlockedCompareExchangeUint64(RWTexture1D<uint2> uav, uint address, uint2 compare_value, uint2 value)
547 return __NvAtomicCompareExchangeUINT64(uav, address, compare_value, value);
550uint2 NvInterlockedExchangeUint64(RWTexture1D<uint2> uav, uint address, uint2 value)
552 return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_SWAP);
555uint2 NvInterlockedAddUint64(RWTexture2D<uint2> uav, uint2 address, uint2 value)
557 return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_ADD);
560uint2 NvInterlockedMaxUint64(RWTexture2D<uint2> uav, uint2 address, uint2 value)
562 return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_MAX);
565uint2 NvInterlockedMinUint64(RWTexture2D<uint2> uav, uint2 address, uint2 value)
567 return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_MIN);
570uint2 NvInterlockedAndUint64(RWTexture2D<uint2> uav, uint2 address, uint2 value)
572 return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_AND);
575uint2 NvInterlockedOrUint64(RWTexture2D<uint2> uav, uint2 address, uint2 value)
577 return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_OR);
580uint2 NvInterlockedXorUint64(RWTexture2D<uint2> uav, uint2 address, uint2 value)
582 return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_XOR);
585uint2 NvInterlockedCompareExchangeUint64(RWTexture2D<uint2> uav, uint2 address, uint2 compare_value, uint2 value)
587 return __NvAtomicCompareExchangeUINT64(uav, address, compare_value, value);
590uint2 NvInterlockedExchangeUint64(RWTexture2D<uint2> uav, uint2 address, uint2 value)
592 return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_SWAP);
595uint2 NvInterlockedAddUint64(RWTexture3D<uint2> uav, uint3 address, uint2 value)
597 return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_ADD);
600uint2 NvInterlockedMaxUint64(RWTexture3D<uint2> uav, uint3 address, uint2 value)
602 return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_MAX);
605uint2 NvInterlockedMinUint64(RWTexture3D<uint2> uav, uint3 address, uint2 value)
607 return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_MIN);
610uint2 NvInterlockedAndUint64(RWTexture3D<uint2> uav, uint3 address, uint2 value)
612 return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_AND);
615uint2 NvInterlockedOrUint64(RWTexture3D<uint2> uav, uint3 address, uint2 value)
617 return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_OR);
620uint2 NvInterlockedXorUint64(RWTexture3D<uint2> uav, uint3 address, uint2 value)
622 return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_XOR);
625uint2 NvInterlockedCompareExchangeUint64(RWTexture3D<uint2> uav, uint3 address, uint2 compare_value, uint2 value)
627 return __NvAtomicCompareExchangeUINT64(uav, address, compare_value, value);
630uint2 NvInterlockedExchangeUint64(RWTexture3D<uint2> uav, uint3 address, uint2 value)
632 return __NvAtomicOpUINT64(uav, address, value, NV_EXTN_ATOM_SWAP);
640uint3 NvGetShadingRate()
642 uint3 shadingRate = (uint3)0;
643 uint index = g_NvidiaExt.IncrementCounter();
644 g_NvidiaExt[index].opcode = NV_EXTN_OP_GET_SHADING_RATE;
645 g_NvidiaExt[index].numOutputsForIncCounter = 3;
646 shadingRate.x = g_NvidiaExt.IncrementCounter();
647 shadingRate.y = g_NvidiaExt.IncrementCounter();
648 shadingRate.z = g_NvidiaExt.IncrementCounter();
652float NvEvaluateAttributeAtSampleForVPRS(
float attrib, uint sampleIndex, int2 pixelOffset)
654 float value = (float)0;
655 uint ext = g_NvidiaExt.IncrementCounter();
656 g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE;
657 g_NvidiaExt[ext].src0u.x = asuint(attrib.x);
658 g_NvidiaExt[ext].src1u.x = sampleIndex;
659 g_NvidiaExt[ext].src2u.xy = pixelOffset;
660 g_NvidiaExt[ext].numOutputsForIncCounter = 1;
661 value.x = asfloat(g_NvidiaExt.IncrementCounter());
665float2 NvEvaluateAttributeAtSampleForVPRS(float2 attrib, uint sampleIndex, int2 pixelOffset)
667 float2 value = (float2)0;
668 uint ext = g_NvidiaExt.IncrementCounter();
669 g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE;
670 g_NvidiaExt[ext].src0u.xy = asuint(attrib.xy);
671 g_NvidiaExt[ext].src1u.x = sampleIndex;
672 g_NvidiaExt[ext].src2u.xy = pixelOffset;
673 g_NvidiaExt[ext].numOutputsForIncCounter = 2;
674 value.x = asfloat(g_NvidiaExt.IncrementCounter());
675 value.y = asfloat(g_NvidiaExt.IncrementCounter());
679float3 NvEvaluateAttributeAtSampleForVPRS(float3 attrib, uint sampleIndex, int2 pixelOffset)
681 float3 value = (float3)0;
682 uint ext = g_NvidiaExt.IncrementCounter();
683 g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE;
684 g_NvidiaExt[ext].src0u.xyz = asuint(attrib.xyz);
685 g_NvidiaExt[ext].src1u.x = sampleIndex;
686 g_NvidiaExt[ext].src2u.xy = pixelOffset;
687 g_NvidiaExt[ext].numOutputsForIncCounter = 3;
688 value.x = asfloat(g_NvidiaExt.IncrementCounter());
689 value.y = asfloat(g_NvidiaExt.IncrementCounter());
690 value.z = asfloat(g_NvidiaExt.IncrementCounter());
694float4 NvEvaluateAttributeAtSampleForVPRS(float4 attrib, uint sampleIndex, int2 pixelOffset)
696 float4 value = (float4)0;
697 uint ext = g_NvidiaExt.IncrementCounter();
698 g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE;
699 g_NvidiaExt[ext].src0u.xyzw = asuint(attrib.xyzw);
700 g_NvidiaExt[ext].src1u.x = sampleIndex;
701 g_NvidiaExt[ext].src2u.xy = pixelOffset;
702 g_NvidiaExt[ext].numOutputsForIncCounter = 4;
703 value.x = asfloat(g_NvidiaExt.IncrementCounter());
704 value.y = asfloat(g_NvidiaExt.IncrementCounter());
705 value.z = asfloat(g_NvidiaExt.IncrementCounter());
706 value.w = asfloat(g_NvidiaExt.IncrementCounter());
710int NvEvaluateAttributeAtSampleForVPRS(
int attrib, uint sampleIndex, int2 pixelOffset)
713 uint ext = g_NvidiaExt.IncrementCounter();
714 g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE;
715 g_NvidiaExt[ext].src0u.x = asuint(attrib.x);
716 g_NvidiaExt[ext].src1u.x = sampleIndex;
717 g_NvidiaExt[ext].src2u.xy = pixelOffset;
718 g_NvidiaExt[ext].numOutputsForIncCounter = 1;
719 value.x = asint(g_NvidiaExt.IncrementCounter());
723int2 NvEvaluateAttributeAtSampleForVPRS(int2 attrib, uint sampleIndex, int2 pixelOffset)
725 int2 value = (int2)0;
726 uint ext = g_NvidiaExt.IncrementCounter();
727 g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE;
728 g_NvidiaExt[ext].src0u.xy = asuint(attrib.xy);
729 g_NvidiaExt[ext].src1u.x = sampleIndex;
730 g_NvidiaExt[ext].src2u.xy = pixelOffset;
731 g_NvidiaExt[ext].numOutputsForIncCounter = 2;
732 value.x = asint(g_NvidiaExt.IncrementCounter());
733 value.y = asint(g_NvidiaExt.IncrementCounter());
737int3 NvEvaluateAttributeAtSampleForVPRS(int3 attrib, uint sampleIndex, int2 pixelOffset)
739 int3 value = (int3)0;
740 uint ext = g_NvidiaExt.IncrementCounter();
741 g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE;
742 g_NvidiaExt[ext].src0u.xyz = asuint(attrib.xyz);
743 g_NvidiaExt[ext].src1u.x = sampleIndex;
744 g_NvidiaExt[ext].src2u.xy = pixelOffset;
745 g_NvidiaExt[ext].numOutputsForIncCounter = 3;
746 value.x = asint(g_NvidiaExt.IncrementCounter());
747 value.y = asint(g_NvidiaExt.IncrementCounter());
748 value.z = asint(g_NvidiaExt.IncrementCounter());
752int4 NvEvaluateAttributeAtSampleForVPRS(int4 attrib, uint sampleIndex, int2 pixelOffset)
754 int4 value = (int4)0;
755 uint ext = g_NvidiaExt.IncrementCounter();
756 g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE;
757 g_NvidiaExt[ext].src0u.xyzw = asuint(attrib.xyzw);
758 g_NvidiaExt[ext].src1u.x = sampleIndex;
759 g_NvidiaExt[ext].src2u.xy = pixelOffset;
760 g_NvidiaExt[ext].numOutputsForIncCounter = 4;
761 value.x = asint(g_NvidiaExt.IncrementCounter());
762 value.y = asint(g_NvidiaExt.IncrementCounter());
763 value.z = asint(g_NvidiaExt.IncrementCounter());
764 value.w = asint(g_NvidiaExt.IncrementCounter());
768uint NvEvaluateAttributeAtSampleForVPRS(uint attrib, uint sampleIndex, int2 pixelOffset)
770 uint value = (uint)0;
771 uint ext = g_NvidiaExt.IncrementCounter();
772 g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE;
773 g_NvidiaExt[ext].src0u.x = asuint(attrib.x);
774 g_NvidiaExt[ext].src1u.x = sampleIndex;
775 g_NvidiaExt[ext].src2u.xy = pixelOffset;
776 g_NvidiaExt[ext].numOutputsForIncCounter = 1;
777 value.x = asuint(g_NvidiaExt.IncrementCounter());
781uint2 NvEvaluateAttributeAtSampleForVPRS(uint2 attrib, uint sampleIndex, int2 pixelOffset)
783 uint2 value = (uint2)0;
784 uint ext = g_NvidiaExt.IncrementCounter();
785 g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE;
786 g_NvidiaExt[ext].src0u.xy = asuint(attrib.xy);
787 g_NvidiaExt[ext].src1u.x = sampleIndex;
788 g_NvidiaExt[ext].src2u.xy = pixelOffset;
789 g_NvidiaExt[ext].numOutputsForIncCounter = 2;
790 value.x = asuint(g_NvidiaExt.IncrementCounter());
791 value.y = asuint(g_NvidiaExt.IncrementCounter());
795uint3 NvEvaluateAttributeAtSampleForVPRS(uint3 attrib, uint sampleIndex, int2 pixelOffset)
797 uint3 value = (uint3)0;
798 uint ext = g_NvidiaExt.IncrementCounter();
799 g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE;
800 g_NvidiaExt[ext].src0u.xyz = asuint(attrib.xyz);
801 g_NvidiaExt[ext].src1u.x = sampleIndex;
802 g_NvidiaExt[ext].src2u.xy = pixelOffset;
803 g_NvidiaExt[ext].numOutputsForIncCounter = 3;
804 value.x = asuint(g_NvidiaExt.IncrementCounter());
805 value.y = asuint(g_NvidiaExt.IncrementCounter());
806 value.z = asuint(g_NvidiaExt.IncrementCounter());
810uint4 NvEvaluateAttributeAtSampleForVPRS(uint4 attrib, uint sampleIndex, int2 pixelOffset)
812 uint4 value = (uint4)0;
813 uint ext = g_NvidiaExt.IncrementCounter();
814 g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_AT_SAMPLE;
815 g_NvidiaExt[ext].src0u.xyzw = asuint(attrib.xyzw);
816 g_NvidiaExt[ext].src1u.x = sampleIndex;
817 g_NvidiaExt[ext].src2u.xy = pixelOffset;
818 g_NvidiaExt[ext].numOutputsForIncCounter = 4;
819 value.x = asuint(g_NvidiaExt.IncrementCounter());
820 value.y = asuint(g_NvidiaExt.IncrementCounter());
821 value.z = asuint(g_NvidiaExt.IncrementCounter());
822 value.w = asuint(g_NvidiaExt.IncrementCounter());
827float NvEvaluateAttributeSnappedForVPRS(
float attrib, uint2 offset)
829 float value = (float)0;
830 uint ext = g_NvidiaExt.IncrementCounter();
831 g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED;
832 g_NvidiaExt[ext].src0u.x = asuint(attrib.x);
833 g_NvidiaExt[ext].src1u.xy = offset;
834 g_NvidiaExt[ext].numOutputsForIncCounter = 1;
835 value.x = asfloat(g_NvidiaExt.IncrementCounter());
839float2 NvEvaluateAttributeSnappedForVPRS(float2 attrib, uint2 offset)
841 float2 value = (float2)0;
842 uint ext = g_NvidiaExt.IncrementCounter();
843 g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED;
844 g_NvidiaExt[ext].src0u.xy = asuint(attrib.xy);
845 g_NvidiaExt[ext].src1u.xy = offset;
846 g_NvidiaExt[ext].numOutputsForIncCounter = 2;
847 value.x = asfloat(g_NvidiaExt.IncrementCounter());
848 value.y = asfloat(g_NvidiaExt.IncrementCounter());
852float3 NvEvaluateAttributeSnappedForVPRS(float3 attrib, uint2 offset)
854 float3 value = (float3)0;
855 uint ext = g_NvidiaExt.IncrementCounter();
856 g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED;
857 g_NvidiaExt[ext].src0u.xyz = asuint(attrib.xyz);
858 g_NvidiaExt[ext].src1u.xy = offset;
859 g_NvidiaExt[ext].numOutputsForIncCounter = 3;
860 value.x = asfloat(g_NvidiaExt.IncrementCounter());
861 value.y = asfloat(g_NvidiaExt.IncrementCounter());
862 value.z = asfloat(g_NvidiaExt.IncrementCounter());
866float4 NvEvaluateAttributeSnappedForVPRS(float4 attrib, uint2 offset)
868 float4 value = (float4)0;
869 uint ext = g_NvidiaExt.IncrementCounter();
870 g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED;
871 g_NvidiaExt[ext].src0u.xyzw = asuint(attrib.xyzw);
872 g_NvidiaExt[ext].src1u.xy = offset;
873 g_NvidiaExt[ext].numOutputsForIncCounter = 4;
874 value.x = asfloat(g_NvidiaExt.IncrementCounter());
875 value.y = asfloat(g_NvidiaExt.IncrementCounter());
876 value.z = asfloat(g_NvidiaExt.IncrementCounter());
877 value.w = asfloat(g_NvidiaExt.IncrementCounter());
881int NvEvaluateAttributeSnappedForVPRS(
int attrib, uint2 offset)
884 uint ext = g_NvidiaExt.IncrementCounter();
885 g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED;
886 g_NvidiaExt[ext].src0u.x = asuint(attrib.x);
887 g_NvidiaExt[ext].src1u.xy = offset;
888 g_NvidiaExt[ext].numOutputsForIncCounter = 1;
889 value.x = asint(g_NvidiaExt.IncrementCounter());
893int2 NvEvaluateAttributeSnappedForVPRS(int2 attrib, uint2 offset)
895 int2 value = (int2)0;
896 uint ext = g_NvidiaExt.IncrementCounter();
897 g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED;
898 g_NvidiaExt[ext].src0u.xy = asuint(attrib.xy);
899 g_NvidiaExt[ext].src1u.xy = offset;
900 g_NvidiaExt[ext].numOutputsForIncCounter = 2;
901 value.x = asint(g_NvidiaExt.IncrementCounter());
902 value.y = asint(g_NvidiaExt.IncrementCounter());
906int3 NvEvaluateAttributeSnappedForVPRS(int3 attrib, uint2 offset)
908 int3 value = (int3)0;
909 uint ext = g_NvidiaExt.IncrementCounter();
910 g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED;
911 g_NvidiaExt[ext].src0u.xyz = asuint(attrib.xyz);
912 g_NvidiaExt[ext].src1u.xy = offset;
913 g_NvidiaExt[ext].numOutputsForIncCounter = 3;
914 value.x = asint(g_NvidiaExt.IncrementCounter());
915 value.y = asint(g_NvidiaExt.IncrementCounter());
916 value.z = asint(g_NvidiaExt.IncrementCounter());
920int4 NvEvaluateAttributeSnappedForVPRS(int4 attrib, uint2 offset)
922 int4 value = (int4)0;
923 uint ext = g_NvidiaExt.IncrementCounter();
924 g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED;
925 g_NvidiaExt[ext].src0u.xyzw = asuint(attrib.xyzw);
926 g_NvidiaExt[ext].src1u.xy = offset;
927 g_NvidiaExt[ext].numOutputsForIncCounter = 4;
928 value.x = asint(g_NvidiaExt.IncrementCounter());
929 value.y = asint(g_NvidiaExt.IncrementCounter());
930 value.z = asint(g_NvidiaExt.IncrementCounter());
931 value.w = asint(g_NvidiaExt.IncrementCounter());
935uint NvEvaluateAttributeSnappedForVPRS(uint attrib, uint2 offset)
937 uint value = (uint)0;
938 uint ext = g_NvidiaExt.IncrementCounter();
939 g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED;
940 g_NvidiaExt[ext].src0u.x = asuint(attrib.x);
941 g_NvidiaExt[ext].src1u.xy = offset;
942 g_NvidiaExt[ext].numOutputsForIncCounter = 1;
943 value.x = asuint(g_NvidiaExt.IncrementCounter());
947uint2 NvEvaluateAttributeSnappedForVPRS(uint2 attrib, uint2 offset)
949 uint2 value = (uint2)0;
950 uint ext = g_NvidiaExt.IncrementCounter();
951 g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED;
952 g_NvidiaExt[ext].src0u.xy = asuint(attrib.xy);
953 g_NvidiaExt[ext].src1u.xy = offset;
954 g_NvidiaExt[ext].numOutputsForIncCounter = 2;
955 value.x = asuint(g_NvidiaExt.IncrementCounter());
956 value.y = asuint(g_NvidiaExt.IncrementCounter());
960uint3 NvEvaluateAttributeSnappedForVPRS(uint3 attrib, uint2 offset)
962 uint3 value = (uint3)0;
963 uint ext = g_NvidiaExt.IncrementCounter();
964 g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED;
965 g_NvidiaExt[ext].src0u.xyz = asuint(attrib.xyz);
966 g_NvidiaExt[ext].src1u.xy = offset;
967 g_NvidiaExt[ext].numOutputsForIncCounter = 3;
968 value.x = asuint(g_NvidiaExt.IncrementCounter());
969 value.y = asuint(g_NvidiaExt.IncrementCounter());
970 value.z = asuint(g_NvidiaExt.IncrementCounter());
974uint4 NvEvaluateAttributeSnappedForVPRS(uint4 attrib, uint2 offset)
976 uint4 value = (uint4)0;
977 uint ext = g_NvidiaExt.IncrementCounter();
978 g_NvidiaExt[ext].opcode = NV_EXTN_OP_VPRS_EVAL_ATTRIB_SNAPPED;
979 g_NvidiaExt[ext].src0u.xyzw = asuint(attrib.xyzw);
980 g_NvidiaExt[ext].src1u.xy = offset;
981 g_NvidiaExt[ext].numOutputsForIncCounter = 4;
982 value.x = asuint(g_NvidiaExt.IncrementCounter());
983 value.y = asuint(g_NvidiaExt.IncrementCounter());
984 value.z = asuint(g_NvidiaExt.IncrementCounter());
985 value.w = asuint(g_NvidiaExt.IncrementCounter());
990uint NvWaveMatch(uint value)
992 uint index = g_NvidiaExt.IncrementCounter();
993 g_NvidiaExt[index].src0u.x = value;
994 g_NvidiaExt[index].src1u.x = 1;
995 g_NvidiaExt[index].opcode = NV_EXTN_OP_MATCH_ANY;
997 return g_NvidiaExt.IncrementCounter();
1000uint NvWaveMatch(uint2 value)
1002 uint index = g_NvidiaExt.IncrementCounter();
1003 g_NvidiaExt[index].src0u.xy = value.xy;
1004 g_NvidiaExt[index].src1u.x = 2;
1005 g_NvidiaExt[index].opcode = NV_EXTN_OP_MATCH_ANY;
1007 return g_NvidiaExt.IncrementCounter();
1010uint NvWaveMatch(uint4 value)
1012 uint index = g_NvidiaExt.IncrementCounter();
1013 g_NvidiaExt[index].src0u = value;
1014 g_NvidiaExt[index].src1u.x = 4;
1015 g_NvidiaExt[index].opcode = NV_EXTN_OP_MATCH_ANY;
1017 return g_NvidiaExt.IncrementCounter();
1020uint NvWaveMatch(
float value)
1022 uint index = g_NvidiaExt.IncrementCounter();
1023 g_NvidiaExt[index].src0u.x = asuint(value);
1024 g_NvidiaExt[index].src1u.x = 1;
1025 g_NvidiaExt[index].opcode = NV_EXTN_OP_MATCH_ANY;
1027 return g_NvidiaExt.IncrementCounter();
1030uint NvWaveMatch(float2 value)
1032 uint index = g_NvidiaExt.IncrementCounter();
1033 g_NvidiaExt[index].src0u.xy = asuint(value);
1034 g_NvidiaExt[index].src1u.x = 2;
1035 g_NvidiaExt[index].opcode = NV_EXTN_OP_MATCH_ANY;
1037 return g_NvidiaExt.IncrementCounter();
1040uint NvWaveMatch(float4 value)
1042 uint index = g_NvidiaExt.IncrementCounter();
1043 g_NvidiaExt[index].src0u = asuint(value);
1044 g_NvidiaExt[index].src1u.x = 4;
1045 g_NvidiaExt[index].opcode = NV_EXTN_OP_MATCH_ANY;
1047 return g_NvidiaExt.IncrementCounter();
1060uint4 NvFootprintFine(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, int3 offset = int3(0, 0, 0))
1062 return __NvFootprint(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_FINE, gran, offset);
1065uint4 NvFootprintCoarse(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, int3 offset = int3(0, 0, 0))
1067 return __NvFootprint(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_COARSE, gran, offset);
1072uint4 NvFootprintFineBias(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran,
float bias, int3 offset = int3(0, 0, 0))
1074 return __NvFootprintBias(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_FINE, gran, bias, offset);
1077uint4 NvFootprintCoarseBias(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran,
float bias, int3 offset = int3(0, 0, 0))
1079 return __NvFootprintBias(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_COARSE, gran, bias, offset);
1084uint4 NvFootprintFineLevel(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran,
float lodLevel, int3 offset = int3(0, 0, 0))
1086 return __NvFootprintLevel(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_FINE, gran, lodLevel, offset);
1089uint4 NvFootprintCoarseLevel(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran,
float lodLevel, int3 offset = int3(0, 0, 0))
1091 return __NvFootprintLevel(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_COARSE, gran, lodLevel, offset);
1096uint4 NvFootprintFineGrad(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, float3 ddx, float3 ddy, int3 offset = int3(0, 0, 0))
1098 return __NvFootprintGrad(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_FINE, gran, ddx, ddy, offset);
1101uint4 NvFootprintCoarseGrad(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, float3 ddx, float3 ddy, int3 offset = int3(0, 0, 0))
1103 return __NvFootprintGrad(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_COARSE, gran, ddx, ddy, offset);
1106uint NvFootprintExtractLOD(uint4 blob)
1108 return ((blob.w & 0xF000) >> 12);
1111uint NvFootprintExtractReturnGran(uint4 blob)
1113 return ((blob.z & 0xF000000) >> 24);
1116uint2 NvFootprintExtractAnchorTileLoc2D(uint4 blob)
1119 loc.x = (blob.w & 0xFFF);
1120 loc.y = (blob.z & 0xFFF);
1124uint3 NvFootprintExtractAnchorTileLoc3D(uint4 blob)
1127 loc.x = (blob.w & 0xFFF);
1128 loc.y = ((blob.w & 0xFFF0000) >> 16);
1129 loc.z = (blob.z & 0x1FFF);
1133uint2 NvFootprintExtractOffset2D(uint4 blob)
1136 loc.x = ((blob.z & 0x070000) >> 16);
1137 loc.y = ((blob.z & 0x380000) >> 19);
1141uint3 NvFootprintExtractOffset3D(uint4 blob)
1144 loc.x = ((blob.z & 0x030000) >> 16);
1145 loc.y = ((blob.z & 0x0C0000) >> 18);
1146 loc.z = ((blob.z & 0x300000) >> 20);
1150uint2 NvFootprintExtractBitmask(uint4 blob)
1158uint4 NvFootprintFine(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, out uint isSingleLod, int3 offset = int3(0, 0, 0))
1160 uint4 res = __NvFootprint(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_FINE, gran, offset);
1161 isSingleLod = __NvGetSpecial(NV_SPECIALOP_FOOTPRINT_SINGLELOD_PRED);
1165uint4 NvFootprintCoarse(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, out uint isSingleLod, int3 offset = int3(0, 0, 0))
1167 uint4 res = __NvFootprint(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_COARSE, gran, offset);
1168 isSingleLod = __NvGetSpecial(NV_SPECIALOP_FOOTPRINT_SINGLELOD_PRED);
1174uint4 NvFootprintFineBias(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran,
float bias, out uint isSingleLod, int3 offset = int3(0, 0, 0))
1176 uint4 res = __NvFootprintBias(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_FINE, gran, bias, offset);
1177 isSingleLod = __NvGetSpecial(NV_SPECIALOP_FOOTPRINT_SINGLELOD_PRED);
1181uint4 NvFootprintCoarseBias(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran,
float bias, out uint isSingleLod, int3 offset = int3(0, 0, 0))
1183 uint4 res = __NvFootprintBias(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_COARSE, gran, bias, offset);
1184 isSingleLod = __NvGetSpecial(NV_SPECIALOP_FOOTPRINT_SINGLELOD_PRED);
1190uint4 NvFootprintFineLevel(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran,
float lodLevel, out uint isSingleLod, int3 offset = int3(0, 0, 0))
1192 uint4 res = __NvFootprintLevel(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_FINE, gran, lodLevel, offset);
1193 isSingleLod = __NvGetSpecial(NV_SPECIALOP_FOOTPRINT_SINGLELOD_PRED);
1197uint4 NvFootprintCoarseLevel(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran,
float lodLevel, out uint isSingleLod, int3 offset = int3(0, 0, 0))
1199 uint4 res = __NvFootprintLevel(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_COARSE, gran, lodLevel, offset);
1200 isSingleLod = __NvGetSpecial(NV_SPECIALOP_FOOTPRINT_SINGLELOD_PRED);
1206uint4 NvFootprintFineGrad(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, float3 ddx, float3 ddy, out uint isSingleLod, int3 offset = int3(0, 0, 0))
1208 uint4 res = __NvFootprintGrad(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_FINE, gran, ddx, ddy, offset);
1209 isSingleLod = __NvGetSpecial(NV_SPECIALOP_FOOTPRINT_SINGLELOD_PRED);
1213uint4 NvFootprintCoarseGrad(uint texSpace, uint texIndex, uint smpSpace, uint smpIndex, uint texType, float3 location, uint gran, float3 ddx, float3 ddy, out uint isSingleLod, int3 offset = int3(0, 0, 0))
1215 uint4 res = __NvFootprintGrad(texSpace, texIndex, smpSpace, smpIndex, texType, location, NV_EXTN_FOOTPRINT_MODE_COARSE, gran, ddx, ddy, offset);
1216 isSingleLod = __NvGetSpecial(NV_SPECIALOP_FOOTPRINT_SINGLELOD_PRED);
1221uint NvActiveThreads()
1245uint NvWaveMultiPrefixInclusiveAdd(uint val, uint mask)
1248 uint a = NvActiveThreads();
1249 uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
1250 uint nextLane = firstbithigh(remainingThreads);
1251 for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++)
1253 temp = NvShfl(val, nextLane);
1258 uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid));
1268uint NvWaveMultiPrefixExclusiveAdd(uint val, uint mask)
1271 uint a = NvActiveThreads();
1272 uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
1273 uint lane = firstbithigh(remainingThreads);
1274 temp = NvShfl(val, lane);
1275 val = remainingThreads != 0 ? temp : 0;
1276 return NvWaveMultiPrefixInclusiveAdd(val, mask);
1279uint2 NvWaveMultiPrefixInclusiveAdd(uint2 val, uint mask)
1282 uint a = NvActiveThreads();
1283 uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
1284 uint nextLane = firstbithigh(remainingThreads);
1285 for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++)
1287 temp = NvShfl(val, nextLane);
1289 uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid));
1299uint2 NvWaveMultiPrefixExclusiveAdd(uint2 val, uint mask)
1302 uint a = NvActiveThreads();
1303 uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
1304 uint lane = firstbithigh(remainingThreads);
1305 temp = NvShfl(val, lane);
1306 val = remainingThreads != 0 ? temp : uint2(0, 0);
1307 return NvWaveMultiPrefixInclusiveAdd(val, mask);
1310uint4 NvWaveMultiPrefixInclusiveAdd(uint4 val, uint mask)
1313 uint a = NvActiveThreads();
1314 uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
1315 uint nextLane = firstbithigh(remainingThreads);
1316 for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++)
1318 temp = NvShfl(val, nextLane);
1320 uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid));
1330uint4 NvWaveMultiPrefixExclusiveAdd(uint4 val, uint mask)
1333 uint a = NvActiveThreads();
1334 uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
1335 uint lane = firstbithigh(remainingThreads);
1336 temp = NvShfl(val, lane);
1337 val = remainingThreads != 0 ? temp : uint4(0, 0, 0, 0);
1338 return NvWaveMultiPrefixInclusiveAdd(val, mask);
1342uint NvWaveMultiPrefixInclusiveAnd(uint val, uint mask)
1345 uint a = NvActiveThreads();
1346 uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
1347 uint nextLane = firstbithigh(remainingThreads);
1348 for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++)
1350 temp = NvShfl(val, nextLane);
1352 uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid));
1362uint NvWaveMultiPrefixExclusiveAnd(uint val, uint mask)
1365 uint a = NvActiveThreads();
1366 uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
1367 uint lane = firstbithigh(remainingThreads);
1368 temp = NvShfl(val, lane);
1369 val = remainingThreads != 0 ? temp : ~0;
1370 return NvWaveMultiPrefixInclusiveAnd(val, mask);
1373uint2 NvWaveMultiPrefixInclusiveAnd(uint2 val, uint mask)
1376 uint a = NvActiveThreads();
1377 uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
1378 uint nextLane = firstbithigh(remainingThreads);
1379 for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++)
1381 temp = NvShfl(val, nextLane);
1383 uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid));
1393uint2 NvWaveMultiPrefixExclusiveAnd(uint2 val, uint mask)
1396 uint a = NvActiveThreads();
1397 uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
1398 uint lane = firstbithigh(remainingThreads);
1399 temp = NvShfl(val, lane);
1400 val = remainingThreads != 0 ? temp : uint2(~0, ~0);
1401 return NvWaveMultiPrefixInclusiveAnd(val, mask);
1405uint4 NvWaveMultiPrefixInclusiveAnd(uint4 val, uint mask)
1408 uint a = NvActiveThreads();
1409 uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
1410 uint nextLane = firstbithigh(remainingThreads);
1411 for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++)
1413 temp = NvShfl(val, nextLane);
1415 uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid));
1425uint4 NvWaveMultiPrefixExclusiveAnd(uint4 val, uint mask)
1428 uint a = NvActiveThreads();
1429 uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
1430 uint lane = firstbithigh(remainingThreads);
1431 temp = NvShfl(val, lane);
1432 val = remainingThreads != 0 ? temp : uint4(~0, ~0, ~0, ~0);
1433 return NvWaveMultiPrefixInclusiveAnd(val, mask);
1438uint NvWaveMultiPrefixInclusiveOr(uint val, uint mask)
1441 uint a = NvActiveThreads();
1442 uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
1443 uint nextLane = firstbithigh(remainingThreads);
1444 for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++)
1446 temp = NvShfl(val, nextLane);
1448 uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid));
1458uint NvWaveMultiPrefixExclusiveOr(uint val, uint mask)
1461 uint a = NvActiveThreads();
1462 uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
1463 uint lane = firstbithigh(remainingThreads);
1464 temp = NvShfl(val, lane);
1465 val = remainingThreads != 0 ? temp : 0;
1466 return NvWaveMultiPrefixInclusiveOr(val, mask);
1469uint2 NvWaveMultiPrefixInclusiveOr(uint2 val, uint mask)
1472 uint a = NvActiveThreads();
1473 uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
1474 uint nextLane = firstbithigh(remainingThreads);
1475 for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++)
1477 temp = NvShfl(val, nextLane);
1479 uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid));
1489uint2 NvWaveMultiPrefixExclusiveOr(uint2 val, uint mask)
1492 uint a = NvActiveThreads();
1493 uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
1494 uint lane = firstbithigh(remainingThreads);
1495 temp = NvShfl(val, lane);
1496 val = remainingThreads != 0 ? temp : uint2(0, 0);
1497 return NvWaveMultiPrefixInclusiveOr(val, mask);
1501uint4 NvWaveMultiPrefixInclusiveOr(uint4 val, uint mask)
1504 uint a = NvActiveThreads();
1505 uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
1506 uint nextLane = firstbithigh(remainingThreads);
1507 for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++)
1509 temp = NvShfl(val, nextLane);
1511 uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid));
1521uint4 NvWaveMultiPrefixExclusiveOr(uint4 val, uint mask)
1524 uint a = NvActiveThreads();
1525 uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
1526 uint lane = firstbithigh(remainingThreads);
1527 temp = NvShfl(val, lane);
1528 val = remainingThreads != 0 ? temp : uint4(0, 0, 0, 0);
1529 return NvWaveMultiPrefixInclusiveOr(val, mask);
1534uint NvWaveMultiPrefixInclusiveXOr(uint val, uint mask)
1537 uint a = NvActiveThreads();
1538 uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
1539 uint nextLane = firstbithigh(remainingThreads);
1540 for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++)
1542 temp = NvShfl(val, nextLane);
1544 uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid));
1554uint NvWaveMultiPrefixExclusiveXOr(uint val, uint mask)
1557 uint a = NvActiveThreads();
1558 uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
1559 uint lane = firstbithigh(remainingThreads);
1560 temp = NvShfl(val, lane);
1561 val = remainingThreads != 0 ? temp : 0;
1562 return NvWaveMultiPrefixInclusiveXOr(val, mask);
1565uint2 NvWaveMultiPrefixInclusiveXOr(uint2 val, uint mask)
1568 uint a = NvActiveThreads();
1569 uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
1570 uint nextLane = firstbithigh(remainingThreads);
1571 for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++)
1573 temp = NvShfl(val, nextLane);
1575 uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid));
1585uint2 NvWaveMultiPrefixExclusiveXOr(uint2 val, uint mask)
1588 uint a = NvActiveThreads();
1589 uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
1590 uint lane = firstbithigh(remainingThreads);
1591 temp = NvShfl(val, lane);
1592 val = remainingThreads != 0 ? temp : uint2(0, 0);
1593 return NvWaveMultiPrefixInclusiveXOr(val, mask);
1597uint4 NvWaveMultiPrefixInclusiveXOr(uint4 val, uint mask)
1600 uint a = NvActiveThreads();
1601 uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
1602 uint nextLane = firstbithigh(remainingThreads);
1603 for (uint i = 0; i < NV_WARP_SIZE_LOG2; i++)
1605 temp = NvShfl(val, nextLane);
1607 uint newLane = asuint(__NvShflGeneric(nextLane, nextLane, 30, laneValid));
1617uint4 NvWaveMultiPrefixExclusiveXOr(uint4 val, uint mask)
1620 uint a = NvActiveThreads();
1621 uint remainingThreads = a & __NvGetSpecial(NV_SPECIALOP_THREADLTMASK) & mask;
1622 uint lane = firstbithigh(remainingThreads);
1623 temp = NvShfl(val, lane);
1624 val = remainingThreads != 0 ? temp : uint4(0, 0, 0, 0);
1625 return NvWaveMultiPrefixInclusiveXOr(val, mask);
1633float3x3 NvRtTriangleObjectPositions()
1635 uint index = g_NvidiaExt.IncrementCounter();
1636 g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_TRIANGLE_OBJECT_POSITIONS;
1639 ret[0][0] = asfloat(g_NvidiaExt.IncrementCounter());
1640 ret[0][1] = asfloat(g_NvidiaExt.IncrementCounter());
1641 ret[0][2] = asfloat(g_NvidiaExt.IncrementCounter());
1642 ret[1][0] = asfloat(g_NvidiaExt.IncrementCounter());
1643 ret[1][1] = asfloat(g_NvidiaExt.IncrementCounter());
1644 ret[1][2] = asfloat(g_NvidiaExt.IncrementCounter());
1645 ret[2][0] = asfloat(g_NvidiaExt.IncrementCounter());
1646 ret[2][1] = asfloat(g_NvidiaExt.IncrementCounter());
1647 ret[2][2] = asfloat(g_NvidiaExt.IncrementCounter());
1651float3x3 NvRtMicroTriangleObjectPositions()
1653 uint index = g_NvidiaExt.IncrementCounter();
1654 g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_MICRO_TRIANGLE_OBJECT_POSITIONS;
1657 ret[0][0] = asfloat(g_NvidiaExt.IncrementCounter());
1658 ret[0][1] = asfloat(g_NvidiaExt.IncrementCounter());
1659 ret[0][2] = asfloat(g_NvidiaExt.IncrementCounter());
1660 ret[1][0] = asfloat(g_NvidiaExt.IncrementCounter());
1661 ret[1][1] = asfloat(g_NvidiaExt.IncrementCounter());
1662 ret[1][2] = asfloat(g_NvidiaExt.IncrementCounter());
1663 ret[2][0] = asfloat(g_NvidiaExt.IncrementCounter());
1664 ret[2][1] = asfloat(g_NvidiaExt.IncrementCounter());
1665 ret[2][2] = asfloat(g_NvidiaExt.IncrementCounter());
1669float3x2 NvRtMicroTriangleBarycentrics()
1671 uint index = g_NvidiaExt.IncrementCounter();
1672 g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_MICRO_TRIANGLE_BARYCENTRICS;
1675 ret[0][0] = asfloat(g_NvidiaExt.IncrementCounter());
1676 ret[0][1] = asfloat(g_NvidiaExt.IncrementCounter());
1677 ret[1][0] = asfloat(g_NvidiaExt.IncrementCounter());
1678 ret[1][1] = asfloat(g_NvidiaExt.IncrementCounter());
1679 ret[2][0] = asfloat(g_NvidiaExt.IncrementCounter());
1680 ret[2][1] = asfloat(g_NvidiaExt.IncrementCounter());
1684bool NvRtIsMicroTriangleHit()
1686 uint index = g_NvidiaExt.IncrementCounter();
1687 g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_IS_MICRO_TRIANGLE_HIT;
1688 uint ret = g_NvidiaExt.IncrementCounter();
1692bool NvRtIsBackFacing()
1694 uint index = g_NvidiaExt.IncrementCounter();
1695 g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_IS_BACK_FACING;
1696 uint ret = g_NvidiaExt.IncrementCounter();
1700#if __SHADER_TARGET_MAJOR > 6 || (__SHADER_TARGET_MAJOR == 6 && __SHADER_TARGET_MINOR >= 5)
1702float3 NvRtMicroVertexObjectPosition(RaytracingAccelerationStructure AccelerationStructure, uint InstanceIndex, uint GeometryIndex, uint PrimitiveIndex, uint2 UV)
1704 uint index = g_NvidiaExt.IncrementCounter();
1705 g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_MICRO_VERTEX_OBJECT_POSITION;
1706 g_NvidiaExt[index].src0u.x = InstanceIndex;
1707 g_NvidiaExt[index].src0u.y = GeometryIndex;
1708 g_NvidiaExt[index].src0u.z = PrimitiveIndex;
1709 g_NvidiaExt[index].src0u.w = UV.x;
1710 g_NvidiaExt[index].src1u.x = UV.y;
1711 uint handle = g_NvidiaExt.IncrementCounter();
1713 ret.x = asfloat(g_NvidiaExt.IncrementCounter());
1714 ret.y = asfloat(g_NvidiaExt.IncrementCounter());
1715 ret.z = asfloat(g_NvidiaExt.IncrementCounter());
1718 rq.TraceRayInline(AccelerationStructure, 0, handle, (RayDesc)0);
1723float2 NvRtMicroVertexBarycentrics(RaytracingAccelerationStructure AccelerationStructure, uint InstanceIndex, uint GeometryIndex, uint PrimitiveIndex, uint2 UV)
1725 uint index = g_NvidiaExt.IncrementCounter();
1726 g_NvidiaExt[index].opcode = NV_EXTN_OP_RT_MICRO_VERTEX_BARYCENTRICS;
1727 g_NvidiaExt[index].src0u.x = InstanceIndex;
1728 g_NvidiaExt[index].src0u.y = GeometryIndex;
1729 g_NvidiaExt[index].src0u.z = PrimitiveIndex;
1730 g_NvidiaExt[index].src0u.w = UV.x;
1731 g_NvidiaExt[index].src1u.x = UV.y;
1732 uint handle = g_NvidiaExt.IncrementCounter();
1734 ret.x = asfloat(g_NvidiaExt.IncrementCounter());
1735 ret.y = asfloat(g_NvidiaExt.IncrementCounter());
1738 rq.TraceRayInline(AccelerationStructure, 0, handle, (RayDesc)0);
1751#if defined(__HLSL_VERSION) && (__HLSL_VERSION >= 2021) && !defined(NV_HITOBJECT_USE_MACRO_API)
1758 uint index = g_NvidiaExt.IncrementCounter();
1759 g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_IS_MISS;
1760 g_NvidiaExt[index].src0u.x = _handle;
1761 uint ret = g_NvidiaExt.IncrementCounter();
1767 uint index = g_NvidiaExt.IncrementCounter();
1768 g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_IS_HIT;
1769 g_NvidiaExt[index].src0u.x = _handle;
1770 uint ret = g_NvidiaExt.IncrementCounter();
1776 uint index = g_NvidiaExt.IncrementCounter();
1777 g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_IS_NOP;
1778 g_NvidiaExt[index].src0u.x = _handle;
1779 uint ret = g_NvidiaExt.IncrementCounter();
1783 uint GetInstanceID()
1785 uint index = g_NvidiaExt.IncrementCounter();
1786 g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_INSTANCE_ID;
1787 g_NvidiaExt[index].src0u.x = _handle;
1788 return g_NvidiaExt.IncrementCounter();
1791 uint GetInstanceIndex()
1793 uint index = g_NvidiaExt.IncrementCounter();
1794 g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_INSTANCE_INDEX;
1795 g_NvidiaExt[index].src0u.x = _handle;
1796 return g_NvidiaExt.IncrementCounter();
1799 uint GetPrimitiveIndex()
1801 uint index = g_NvidiaExt.IncrementCounter();
1802 g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_PRIMITIVE_INDEX;
1803 g_NvidiaExt[index].src0u.x = _handle;
1804 return g_NvidiaExt.IncrementCounter();
1807 uint GetGeometryIndex()
1809 uint index = g_NvidiaExt.IncrementCounter();
1810 g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_GEOMETRY_INDEX;
1811 g_NvidiaExt[index].src0u.x = _handle;
1812 return g_NvidiaExt.IncrementCounter();
1817 uint index = g_NvidiaExt.IncrementCounter();
1818 g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_HIT_KIND;
1819 g_NvidiaExt[index].src0u.x = _handle;
1820 return g_NvidiaExt.IncrementCounter();
1823 RayDesc GetRayDesc()
1825 uint index = g_NvidiaExt.IncrementCounter();
1826 g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_RAY_DESC;
1827 g_NvidiaExt[index].src0u.x = _handle;
1829 uint tmin = g_NvidiaExt.IncrementCounter();
1830 uint tmax = g_NvidiaExt.IncrementCounter();
1831 uint rayOrgX = g_NvidiaExt.IncrementCounter();
1832 uint rayOrgY = g_NvidiaExt.IncrementCounter();
1833 uint rayOrgZ = g_NvidiaExt.IncrementCounter();
1834 uint rayDirX = g_NvidiaExt.IncrementCounter();
1835 uint rayDirY = g_NvidiaExt.IncrementCounter();
1836 uint rayDirZ = g_NvidiaExt.IncrementCounter();
1839 ray.TMin = asfloat(tmin);
1840 ray.TMax = asfloat(tmax);
1841 ray.Origin.x = asfloat(rayOrgX);
1842 ray.Origin.y = asfloat(rayOrgY);
1843 ray.Origin.z = asfloat(rayOrgZ);
1844 ray.Direction.x = asfloat(rayDirX);
1845 ray.Direction.y = asfloat(rayDirY);
1846 ray.Direction.z = asfloat(rayDirZ);
1851 template <
typename T>
1854 uint index = g_NvidiaExt.IncrementCounter();
1855 g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_ATTRIBUTES;
1856 g_NvidiaExt[index].src0u.x = _handle;
1857 uint callHandle = g_NvidiaExt.IncrementCounter();
1860 CallShader(callHandle, attrs);
1864 uint GetShaderTableIndex()
1866 uint index = g_NvidiaExt.IncrementCounter();
1867 g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_SHADER_TABLE_INDEX;
1868 g_NvidiaExt[index].src0u.x = _handle;
1869 return g_NvidiaExt.IncrementCounter();
1872 uint LoadLocalRootTableConstant(uint RootConstantOffsetInBytes)
1874 uint index = g_NvidiaExt.IncrementCounter();
1875 g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_LOAD_LOCAL_ROOT_TABLE_CONSTANT;
1876 g_NvidiaExt[index].src0u.x = _handle;
1877 g_NvidiaExt[index].src0u.y = RootConstantOffsetInBytes;
1878 return g_NvidiaExt.IncrementCounter();
1883NvHitObject NvTraceRayHitObject(
1884 RaytracingAccelerationStructure AccelerationStructure,
1886 uint InstanceInclusionMask,
1887 uint RayContributionToHitGroupIndex,
1888 uint MultiplierForGeometryContributionToHitGroupIndex,
1889 uint MissShaderIndex,
1893 uint index = g_NvidiaExt.IncrementCounter();
1894 g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_TRACE_RAY;
1895 g_NvidiaExt[index].numOutputsForIncCounter = 2;
1896 g_NvidiaExt[index].src0u.x = MissShaderIndex;
1897 uint hitHandle = g_NvidiaExt.IncrementCounter();
1898 uint traceHandle = g_NvidiaExt.IncrementCounter();
1900 TraceRay(AccelerationStructure, RayFlags, InstanceInclusionMask, RayContributionToHitGroupIndex, MultiplierForGeometryContributionToHitGroupIndex, traceHandle, Ray, Payload);
1903 hitObj._handle = hitHandle;
1907template <
typename T>
1908NvHitObject NvMakeHit(
1909 RaytracingAccelerationStructure AccelerationStructure,
1912 uint PrimitiveIndex,
1914 uint RayContributionToHitGroupIndex,
1915 uint MultiplierForGeometryContributionToHitGroupIndex,
1919 uint index = g_NvidiaExt.IncrementCounter();
1920 g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_MAKE_HIT;
1921 g_NvidiaExt[index].numOutputsForIncCounter = 2;
1922 g_NvidiaExt[index].src0u.x = InstanceIndex;
1923 g_NvidiaExt[index].src0u.y = GeometryIndex;
1924 g_NvidiaExt[index].src0u.z = PrimitiveIndex;
1925 g_NvidiaExt[index].src0u.w = HitKind;
1926 g_NvidiaExt[index].src1u.x = RayContributionToHitGroupIndex;
1927 g_NvidiaExt[index].src1u.y = MultiplierForGeometryContributionToHitGroupIndex;
1928 uint hitHandle = g_NvidiaExt.IncrementCounter();
1929 uint traceHandle = g_NvidiaExt.IncrementCounter();
1931 struct AttrWrapper { T Attrs; };
1932 AttrWrapper wrapper;
1933 wrapper.Attrs = Attributes;
1934 CallShader(traceHandle, wrapper);
1936 struct DummyPayload {
int a; };
1937 DummyPayload payload;
1938 TraceRay(AccelerationStructure, 0, 0, 0, 0, traceHandle, Ray, payload);
1941 hitObj._handle = hitHandle;
1945template <
typename T>
1946NvHitObject NvMakeHitWithRecordIndex(
1947 uint HitGroupRecordIndex,
1948 RaytracingAccelerationStructure AccelerationStructure,
1951 uint PrimitiveIndex,
1956 uint index = g_NvidiaExt.IncrementCounter();
1957 g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_MAKE_HIT_WITH_RECORD_INDEX;
1958 g_NvidiaExt[index].numOutputsForIncCounter = 2;
1959 g_NvidiaExt[index].src0u.x = InstanceIndex;
1960 g_NvidiaExt[index].src0u.y = GeometryIndex;
1961 g_NvidiaExt[index].src0u.z = PrimitiveIndex;
1962 g_NvidiaExt[index].src0u.w = HitKind;
1963 g_NvidiaExt[index].src1u.x = HitGroupRecordIndex;
1964 uint hitHandle = g_NvidiaExt.IncrementCounter();
1965 uint traceHandle = g_NvidiaExt.IncrementCounter();
1967 struct AttrWrapper { T Attrs; };
1968 AttrWrapper wrapper;
1969 wrapper.Attrs = Attributes;
1970 CallShader(traceHandle, wrapper);
1972 struct DummyPayload {
int a; };
1973 DummyPayload payload;
1974 TraceRay(AccelerationStructure, 0, 0, 0, 0, traceHandle, Ray, payload);
1977 hitObj._handle = hitHandle;
1981NvHitObject NvMakeMiss(
1982 uint MissShaderIndex,
1985 uint index = g_NvidiaExt.IncrementCounter();
1986 g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_MAKE_MISS;
1987 g_NvidiaExt[index].src0u.x = MissShaderIndex;
1988 g_NvidiaExt[index].src0u.y = asuint(Ray.TMin);
1989 g_NvidiaExt[index].src0u.z = asuint(Ray.TMax);
1990 g_NvidiaExt[index].src1u.x = asuint(Ray.Origin.x);
1991 g_NvidiaExt[index].src1u.y = asuint(Ray.Origin.y);
1992 g_NvidiaExt[index].src1u.z = asuint(Ray.Origin.z);
1993 g_NvidiaExt[index].src2u.x = asuint(Ray.Direction.x);
1994 g_NvidiaExt[index].src2u.y = asuint(Ray.Direction.y);
1995 g_NvidiaExt[index].src2u.z = asuint(Ray.Direction.z);
1996 uint hitHandle = g_NvidiaExt.IncrementCounter();
1999 hitObj._handle = hitHandle;
2003NvHitObject NvMakeNop()
2005 uint index = g_NvidiaExt.IncrementCounter();
2006 g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_MAKE_NOP;
2007 uint hitHandle = g_NvidiaExt.IncrementCounter();
2010 hitObj._handle = hitHandle;
2014void NvReorderThread(uint CoherenceHint, uint NumCoherenceHintBits)
2016 uint index = g_NvidiaExt.IncrementCounter();
2017 g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_REORDER_THREAD;
2018 g_NvidiaExt[index].src0u.x = 0;
2019 g_NvidiaExt[index].src0u.y = 0;
2020 g_NvidiaExt[index].src0u.z = CoherenceHint;
2021 g_NvidiaExt[index].src0u.w = NumCoherenceHintBits;
2022 g_NvidiaExt.IncrementCounter();
2025void NvReorderThread(NvHitObject HitObj, uint CoherenceHint, uint NumCoherenceHintBits)
2027 uint index = g_NvidiaExt.IncrementCounter();
2028 g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_REORDER_THREAD;
2029 g_NvidiaExt[index].src0u.x = 1;
2030 g_NvidiaExt[index].src0u.y = HitObj._handle;
2031 g_NvidiaExt[index].src0u.z = CoherenceHint;
2032 g_NvidiaExt[index].src0u.w = NumCoherenceHintBits;
2033 g_NvidiaExt.IncrementCounter();
2036void NvReorderThread(NvHitObject HitObj)
2038 NvReorderThread(HitObj, 0, 0);
2042void NvInvokeHitObject(
2043 RaytracingAccelerationStructure AccelerationStructure,
2047 uint index = g_NvidiaExt.IncrementCounter();
2048 g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_INVOKE;
2049 g_NvidiaExt[index].src0u.x = HitObj._handle;
2050 uint handle = g_NvidiaExt.IncrementCounter();
2052 TraceRay(AccelerationStructure, 0, 0, 0, 0, handle, (RayDesc)0, Payload);
2057#elif defined(NV_HITOBJECT_USE_MACRO_API)
2064 uint index = g_NvidiaExt.IncrementCounter();
2065 g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_IS_MISS;
2066 g_NvidiaExt[index].src0u.x = _handle;
2067 uint ret = g_NvidiaExt.IncrementCounter();
2073 uint index = g_NvidiaExt.IncrementCounter();
2074 g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_IS_HIT;
2075 g_NvidiaExt[index].src0u.x = _handle;
2076 uint ret = g_NvidiaExt.IncrementCounter();
2082 uint index = g_NvidiaExt.IncrementCounter();
2083 g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_IS_NOP;
2084 g_NvidiaExt[index].src0u.x = _handle;
2085 uint ret = g_NvidiaExt.IncrementCounter();
2089 uint GetInstanceID()
2091 uint index = g_NvidiaExt.IncrementCounter();
2092 g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_INSTANCE_ID;
2093 g_NvidiaExt[index].src0u.x = _handle;
2094 return g_NvidiaExt.IncrementCounter();
2097 uint GetInstanceIndex()
2099 uint index = g_NvidiaExt.IncrementCounter();
2100 g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_INSTANCE_INDEX;
2101 g_NvidiaExt[index].src0u.x = _handle;
2102 return g_NvidiaExt.IncrementCounter();
2105 uint GetPrimitiveIndex()
2107 uint index = g_NvidiaExt.IncrementCounter();
2108 g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_PRIMITIVE_INDEX;
2109 g_NvidiaExt[index].src0u.x = _handle;
2110 return g_NvidiaExt.IncrementCounter();
2113 uint GetGeometryIndex()
2115 uint index = g_NvidiaExt.IncrementCounter();
2116 g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_GEOMETRY_INDEX;
2117 g_NvidiaExt[index].src0u.x = _handle;
2118 return g_NvidiaExt.IncrementCounter();
2123 uint index = g_NvidiaExt.IncrementCounter();
2124 g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_HIT_KIND;
2125 g_NvidiaExt[index].src0u.x = _handle;
2126 return g_NvidiaExt.IncrementCounter();
2129 RayDesc GetRayDesc()
2131 uint index = g_NvidiaExt.IncrementCounter();
2132 g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_RAY_DESC;
2133 g_NvidiaExt[index].src0u.x = _handle;
2135 uint tmin = g_NvidiaExt.IncrementCounter();
2136 uint tmax = g_NvidiaExt.IncrementCounter();
2137 uint rayOrgX = g_NvidiaExt.IncrementCounter();
2138 uint rayOrgY = g_NvidiaExt.IncrementCounter();
2139 uint rayOrgZ = g_NvidiaExt.IncrementCounter();
2140 uint rayDirX = g_NvidiaExt.IncrementCounter();
2141 uint rayDirY = g_NvidiaExt.IncrementCounter();
2142 uint rayDirZ = g_NvidiaExt.IncrementCounter();
2145 ray.TMin = asfloat(tmin);
2146 ray.TMax = asfloat(tmax);
2147 ray.Origin.x = asfloat(rayOrgX);
2148 ray.Origin.y = asfloat(rayOrgY);
2149 ray.Origin.z = asfloat(rayOrgZ);
2150 ray.Direction.x = asfloat(rayDirX);
2151 ray.Direction.y = asfloat(rayDirY);
2152 ray.Direction.z = asfloat(rayDirZ);
2157 uint GetShaderTableIndex()
2159 uint index = g_NvidiaExt.IncrementCounter();
2160 g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_SHADER_TABLE_INDEX;
2161 g_NvidiaExt[index].src0u.x = _handle;
2162 return g_NvidiaExt.IncrementCounter();
2165 uint LoadLocalRootTableConstant(uint RootConstantOffsetInBytes)
2167 uint index = g_NvidiaExt.IncrementCounter();
2168 g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_LOAD_LOCAL_ROOT_TABLE_CONSTANT;
2169 g_NvidiaExt[index].src0u.x = _handle;
2170 g_NvidiaExt[index].src0u.y = RootConstantOffsetInBytes;
2171 return g_NvidiaExt.IncrementCounter();
2175#define NvTraceRayHitObject(AccelerationStructure,RayFlags,InstanceInclusionMask,RayContributionToHitGroupIndex,MultiplierForGeometryContributionToHitGroupIndex,MissShaderIndex,Ray,Payload,ResultHitObj) \
2177 uint _rayFlags = RayFlags; \
2178 uint _instanceInclusionMask = InstanceInclusionMask; \
2179 uint _rayContributionToHitGroupIndex = RayContributionToHitGroupIndex; \
2180 uint _multiplierForGeometryContributionToHitGroupIndex = MultiplierForGeometryContributionToHitGroupIndex; \
2181 uint _missShaderIndex = MissShaderIndex; \
2182 RayDesc _ray = Ray; \
2183 uint _index = g_NvidiaExt.IncrementCounter(); \
2184 g_NvidiaExt[_index].opcode = NV_EXTN_OP_HIT_OBJECT_TRACE_RAY; \
2185 g_NvidiaExt[_index].numOutputsForIncCounter = 2; \
2186 g_NvidiaExt[_index].src0u.x = _missShaderIndex; \
2187 uint _hitHandle = g_NvidiaExt.IncrementCounter(); \
2188 uint _traceHandle = g_NvidiaExt.IncrementCounter(); \
2189 TraceRay(AccelerationStructure, _rayFlags, _instanceInclusionMask, _rayContributionToHitGroupIndex, _multiplierForGeometryContributionToHitGroupIndex, _traceHandle, _ray, Payload); \
2190 ResultHitObj._handle = _hitHandle; \
2193struct NvHitObjectMacroDummyPayloadType {
int a; };
2195#define NvMakeHit(AccelerationStructure,InstanceIndex,GeometryIndex,PrimitiveIndex,HitKind,RayContributionToHitGroupIndex,MultiplierForGeometryContributionToHitGroupIndex,Ray,Attributes,ResultHitObj) \
2197 uint _instanceIndex = InstanceIndex; \
2198 uint _geometryIndex = GeometryIndex; \
2199 uint _primitiveIndex = PrimitiveIndex; \
2200 uint _hitKind = HitKind; \
2201 uint _rayContributionToHitGroupIndex = RayContributionToHitGroupIndex; \
2202 uint _multiplierForGeometryContributionToHitGroupIndex = MultiplierForGeometryContributionToHitGroupIndex; \
2203 RayDesc _ray = Ray; \
2204 uint _index = g_NvidiaExt.IncrementCounter(); \
2205 g_NvidiaExt[_index].opcode = NV_EXTN_OP_HIT_OBJECT_MAKE_HIT; \
2206 g_NvidiaExt[_index].numOutputsForIncCounter = 2; \
2207 g_NvidiaExt[_index].src0u.x = _instanceIndex; \
2208 g_NvidiaExt[_index].src0u.y = _geometryIndex; \
2209 g_NvidiaExt[_index].src0u.z = _primitiveIndex; \
2210 g_NvidiaExt[_index].src0u.w = _hitKind; \
2211 g_NvidiaExt[_index].src1u.x = _rayContributionToHitGroupIndex; \
2212 g_NvidiaExt[_index].src1u.y = _multiplierForGeometryContributionToHitGroupIndex; \
2213 uint _hitHandle = g_NvidiaExt.IncrementCounter(); \
2214 uint _traceHandle = g_NvidiaExt.IncrementCounter(); \
2215 CallShader(_traceHandle, Attributes); \
2216 NvHitObjectMacroDummyPayloadType _payload; \
2217 TraceRay(AccelerationStructure, 0, 0, 0, 0, _traceHandle, _ray, _payload); \
2218 ResultHitObj._handle = _hitHandle; \
2221#define NvMakeHitWithRecordIndex(HitGroupRecordIndex,AccelerationStructure,InstanceIndex,GeometryIndex,PrimitiveIndex,HitKind,Ray,Attributes,ResultHitObj) \
2223 uint _hitGroupRecordIndex = HitGroupRecordIndex; \
2224 uint _instanceIndex = InstanceIndex; \
2225 uint _geometryIndex = GeometryIndex; \
2226 uint _primitiveIndex = PrimitiveIndex; \
2227 uint _hitKind = HitKind; \
2228 RayDesc _ray = Ray; \
2229 uint _index = g_NvidiaExt.IncrementCounter(); \
2230 g_NvidiaExt[_index].opcode = NV_EXTN_OP_HIT_OBJECT_MAKE_HIT_WITH_RECORD_INDEX; \
2231 g_NvidiaExt[_index].numOutputsForIncCounter = 2; \
2232 g_NvidiaExt[_index].src0u.x = _instanceIndex; \
2233 g_NvidiaExt[_index].src0u.y = _geometryIndex; \
2234 g_NvidiaExt[_index].src0u.z = _primitiveIndex; \
2235 g_NvidiaExt[_index].src0u.w = _hitKind; \
2236 g_NvidiaExt[_index].src1u.x = _hitGroupRecordIndex; \
2237 uint _hitHandle = g_NvidiaExt.IncrementCounter(); \
2238 uint _traceHandle = g_NvidiaExt.IncrementCounter(); \
2239 CallShader(_traceHandle, Attributes); \
2240 NvHitObjectMacroDummyPayloadType _payload; \
2241 TraceRay(AccelerationStructure, 0, 0, 0, 0, _traceHandle, _ray, _payload); \
2242 ResultHitObj._handle = _hitHandle; \
2245NvHitObject NvMakeMiss(
2246 uint MissShaderIndex,
2249 uint index = g_NvidiaExt.IncrementCounter();
2250 g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_MAKE_MISS;
2251 g_NvidiaExt[index].src0u.x = MissShaderIndex;
2252 g_NvidiaExt[index].src0u.y = asuint(Ray.TMin);
2253 g_NvidiaExt[index].src0u.z = asuint(Ray.TMax);
2254 g_NvidiaExt[index].src1u.x = asuint(Ray.Origin.x);
2255 g_NvidiaExt[index].src1u.y = asuint(Ray.Origin.y);
2256 g_NvidiaExt[index].src1u.z = asuint(Ray.Origin.z);
2257 g_NvidiaExt[index].src2u.x = asuint(Ray.Direction.x);
2258 g_NvidiaExt[index].src2u.y = asuint(Ray.Direction.y);
2259 g_NvidiaExt[index].src2u.z = asuint(Ray.Direction.z);
2260 uint hitHandle = g_NvidiaExt.IncrementCounter();
2263 hitObj._handle = hitHandle;
2267NvHitObject NvMakeNop()
2269 uint index = g_NvidiaExt.IncrementCounter();
2270 g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_MAKE_NOP;
2271 uint hitHandle = g_NvidiaExt.IncrementCounter();
2274 hitObj._handle = hitHandle;
2278#define NvGetAttributesFromHitObject(HitObj,ResultAttributes) \
2280 uint _index = g_NvidiaExt.IncrementCounter(); \
2281 g_NvidiaExt[_index].opcode = NV_EXTN_OP_HIT_OBJECT_GET_ATTRIBUTES; \
2282 g_NvidiaExt[_index].src0u.x = HitObj._handle; \
2283 uint _callHandle = g_NvidiaExt.IncrementCounter(); \
2284 CallShader(_callHandle, ResultAttributes); \
2287void NvReorderThread(uint CoherenceHint, uint NumCoherenceHintBits)
2289 uint index = g_NvidiaExt.IncrementCounter();
2290 g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_REORDER_THREAD;
2291 g_NvidiaExt[index].src0u.x = 0;
2292 g_NvidiaExt[index].src0u.y = 0;
2293 g_NvidiaExt[index].src0u.z = CoherenceHint;
2294 g_NvidiaExt[index].src0u.w = NumCoherenceHintBits;
2295 g_NvidiaExt.IncrementCounter();
2298void NvReorderThread(NvHitObject HitObj, uint CoherenceHint, uint NumCoherenceHintBits)
2300 uint index = g_NvidiaExt.IncrementCounter();
2301 g_NvidiaExt[index].opcode = NV_EXTN_OP_HIT_OBJECT_REORDER_THREAD;
2302 g_NvidiaExt[index].src0u.x = 1;
2303 g_NvidiaExt[index].src0u.y = HitObj._handle;
2304 g_NvidiaExt[index].src0u.z = CoherenceHint;
2305 g_NvidiaExt[index].src0u.w = NumCoherenceHintBits;
2306 g_NvidiaExt.IncrementCounter();
2309void NvReorderThread(NvHitObject HitObj)
2311 NvReorderThread(HitObj, 0, 0);
2314#define NvInvokeHitObject(AccelerationStructure,HitObj,Payload) \
2316 uint _index = g_NvidiaExt.IncrementCounter(); \
2317 g_NvidiaExt[_index].opcode = NV_EXTN_OP_HIT_OBJECT_INVOKE; \
2318 g_NvidiaExt[_index].src0u.x = HitObj._handle; \
2319 uint _handle = g_NvidiaExt.IncrementCounter(); \
2320 TraceRay(AccelerationStructure, 0, 0, 0, 0, _handle, (RayDesc)0, Payload); \