22 #ifndef __optix_optix_internal_h__
23 #define __optix_optix_internal_h__
27 #include "../optix_sizet.h"
33 #if (defined(__CUDACC__) && (__CUDA_ARCH__ > 0))
34 #if ((CUDA_VERSION >= 4010) || (CUDART_VERSION >= 4010)) && __CUDA_ARCH__ >= 200
44 void rt_undefined_use(
int);
47 #if (defined(__CUDACC__) && (__CUDA_ARCH__ > 0))
48 #if ((CUDA_VERSION >= 4010) || (CUDART_VERSION >= 4010)) && __CUDA_ARCH__ >= 200
54 rt_undefined_use64(
unsigned long long)
58 void rt_undefined_use64(
int);
61 static __forceinline__ __device__ uint3 rt_texture_get_size_id(
int tex)
63 unsigned int u0, u1, u2;
65 asm volatile(
"call (%0, %1, %2), _rt_texture_get_size_id, (%3);" :
66 "=r"(u0),
"=r"(u1),
"=r"(u2) :
"r"(tex) : );
68 rt_undefined_use((
int)u0);
69 rt_undefined_use((
int)u1);
70 rt_undefined_use((
int)u2);
72 return make_uint3(u0, u1, u2);
75 static __forceinline__ __device__ float4 rt_texture_get_gather_id(
int tex,
float x,
float y,
int comp)
80 asm volatile(
"call (%0, %1, %2, %3), _rt_texture_get_gather_id, (%4, %5, %6, %7, %8);" :
81 "=f"(f0),
"=f"(f1),
"=f"(f2),
"=f"(f3) :
82 "r"(tex),
"r"(dim),
"f"(x),
"f"(y),
"r"(comp) :
85 rt_undefined_use((
int)f0);
86 rt_undefined_use((
int)f1);
87 rt_undefined_use((
int)f2);
88 rt_undefined_use((
int)f3);
90 return make_float4(f0, f1, f2, f3);
93 static __forceinline__ __device__ float4 rt_texture_get_base_id(
int tex,
int dim,
float x,
float y,
float z,
int layer)
97 asm volatile(
"call (%0, %1, %2, %3), _rt_texture_get_base_id, (%4, %5, %6, %7, %8, %9);" :
98 "=f"(f0),
"=f"(f1),
"=f"(f2),
"=f"(f3) :
99 "r"(tex),
"r"(dim),
"f"(x),
"f"(y),
"f"(z),
"r"(layer) :
102 rt_undefined_use((
int)f0);
103 rt_undefined_use((
int)f1);
104 rt_undefined_use((
int)f2);
105 rt_undefined_use((
int)f3);
107 return make_float4(f0, f1, f2, f3);
110 static __forceinline__ __device__ float4
111 rt_texture_get_level_id(
int tex,
int dim,
float x,
float y,
float z,
int layer,
float level)
113 float f0, f1, f2, f3;
115 asm volatile(
"call (%0, %1, %2, %3), _rt_texture_get_level_id, (%4, %5, %6, %7, %8, %9, %10);" :
116 "=f"(f0),
"=f"(f1),
"=f"(f2),
"=f"(f3) :
117 "r"(tex),
"r"(dim),
"f"(x),
"f"(y),
"f"(z),
"r"(layer),
"f"(level) :
120 rt_undefined_use((
int)f0);
121 rt_undefined_use((
int)f1);
122 rt_undefined_use((
int)f2);
123 rt_undefined_use((
int)f3);
125 return make_float4(f0, f1, f2, f3);
128 static __forceinline__ __device__ float4 rt_texture_get_grad_id(
int tex,
int dim,
float x,
float y,
float z,
int layer,
129 float dPdx_x,
float dPdx_y,
float dPdx_z,
float dPdy_x,
float dPdy_y,
float dPdy_z)
131 float f0, f1, f2, f3;
133 asm volatile(
"call (%0, %1, %2, %3), _rt_texture_get_grad_id, (%4, %5, %6, %7, %8, %9, %10, %11, %12, %13, %14, %15);" :
134 "=f"(f0),
"=f"(f1),
"=f"(f2),
"=f"(f3) :
135 "r"(tex),
"r"(dim),
"f"(x),
"f"(y),
"f"(z),
"r"(layer),
"f"(dPdx_x),
"f"(dPdx_y),
"f"(dPdx_z),
"f"(dPdy_x),
"f"(dPdy_y),
"f"(dPdy_z) :
138 rt_undefined_use((
int)f0);
139 rt_undefined_use((
int)f1);
140 rt_undefined_use((
int)f2);
141 rt_undefined_use((
int)f3);
143 return make_float4(f0, f1, f2, f3);
146 static __forceinline__ __device__ float4 rt_texture_get_f_id(
int tex,
int dim,
float x,
float y,
float z,
float w)
148 float f0, f1, f2, f3;
150 asm volatile(
"call (%0, %1, %2, %3), _rt_texture_get_f_id, (%4, %5, %6, %7, %8, %9);" :
151 "=f"(f0),
"=f"(f1),
"=f"(f2),
"=f"(f3) :
152 "r"(tex),
"r"(dim),
"f"(x),
"f"(y),
"f"(z),
"f"(w) :
155 rt_undefined_use((
int)f0);
156 rt_undefined_use((
int)f1);
157 rt_undefined_use((
int)f2);
158 rt_undefined_use((
int)f3);
160 return make_float4(f0, f1, f2, f3);
163 static __forceinline__ __device__ int4 rt_texture_get_i_id(
int tex,
int dim,
float x,
float y,
float z,
float w)
167 asm volatile(
"call (%0, %1, %2, %3), _rt_texture_get_i_id, (%4, %5, %6, %7, %8, %9);" :
168 "=r"(i0),
"=r"(i1),
"=r"(i2),
"=r"(i3) :
169 "r"(tex),
"r"(dim),
"f"(x),
"f"(y),
"f"(z),
"f"(w) :
172 rt_undefined_use((
int)i0);
173 rt_undefined_use((
int)i1);
174 rt_undefined_use((
int)i2);
175 rt_undefined_use((
int)i3);
177 return make_int4(i0, i1, i2, i3);
180 static __forceinline__ __device__ uint4 rt_texture_get_u_id(
int tex,
int dim,
float x,
float y,
float z,
float w)
182 unsigned int u0, u1, u2, u3;
184 asm volatile(
"call (%0, %1, %2, %3), _rt_texture_get_u_id, (%4, %5, %6, %7, %8, %9);" :
185 "=r"(u0),
"=r"(u1),
"=r"(u2),
"=r"(u3) :
186 "r"(tex),
"r"(dim),
"f"(x),
"f"(y),
"f"(z),
"f"(w) :
189 rt_undefined_use((
int)u0);
190 rt_undefined_use((
int)u1);
191 rt_undefined_use((
int)u2);
192 rt_undefined_use((
int)u3);
194 return make_uint4(u0, u1, u2, u3);
197 static __forceinline__ __device__ float4 rt_texture_get_fetch_id(
int tex,
int dim,
int x,
int y,
int z,
int w)
199 float f0, f1, f2, f3;
201 asm volatile(
"call (%0, %1, %2, %3), _rt_texture_get_fetch_id, (%4, %5, %6, %7, %8, %9);" :
202 "=f"(f0),
"=f"(f1),
"=f"(f2),
"=f"(f3) :
203 "r"(tex),
"r"(dim),
"r"(x),
"r"(y),
"r"(z),
"r"(w) :
206 rt_undefined_use((
int)f0);
207 rt_undefined_use((
int)f1);
208 rt_undefined_use((
int)f2);
209 rt_undefined_use((
int)f3);
211 return make_float4(f0, f1, f2, f3);
214 static __forceinline__ __device__
void* rt_buffer_get(
void* buffer,
unsigned int dim,
unsigned int element_size,
215 size_t i0_in,
size_t i1_in,
size_t i2_in,
size_t i3_in)
217 optix::optix_size_t i0, i1, i2, i3;
223 asm volatile(
"call (%0), _rt_buffer_get" OPTIX_BITNESS_SUFFIX
", (%1, %2, %3, %4, %5, %6, %7);" :
224 "=" OPTIX_ASM_PTR(tmp) :
225 OPTIX_ASM_PTR(buffer),
"r"(dim),
"r"(element_size),
226 OPTIX_ASM_SIZE_T(i0), OPTIX_ASM_SIZE_T(i1), OPTIX_ASM_SIZE_T(i2), OPTIX_ASM_SIZE_T(i3) :
229 #if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
230 rt_undefined_use64((
unsigned long long)tmp);
232 rt_undefined_use((
int)tmp);
237 static __forceinline__ __device__
void* rt_buffer_get_id(
int id,
unsigned int dim,
unsigned int element_size,
238 size_t i0_in,
size_t i1_in,
size_t i2_in,
size_t i3_in)
240 optix::optix_size_t i0, i1, i2, i3;
246 asm volatile(
"call (%0), _rt_buffer_get_id" OPTIX_BITNESS_SUFFIX
", (%1, %2, %3, %4, %5, %6, %7);" :
247 "=" OPTIX_ASM_PTR(tmp) :
248 "r"(id),
"r"(dim),
"r"(element_size),
249 OPTIX_ASM_SIZE_T(i0), OPTIX_ASM_SIZE_T(i1), OPTIX_ASM_SIZE_T(i2), OPTIX_ASM_SIZE_T(i3) :
252 #if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
253 rt_undefined_use64((
unsigned long long)tmp);
255 rt_undefined_use((
int)tmp);
260 static __forceinline__ __device__ size_t4 rt_buffer_get_size(
const void* buffer,
unsigned int dim,
unsigned int element_size)
262 optix::optix_size_t d0, d1, d2, d3;
263 asm volatile(
"call (%0, %1, %2, %3), _rt_buffer_get_size" OPTIX_BITNESS_SUFFIX
", (%4, %5, %6);" :
264 "=" OPTIX_ASM_SIZE_T(d0),
"=" OPTIX_ASM_SIZE_T(d1),
"=" OPTIX_ASM_SIZE_T(d2),
"=" OPTIX_ASM_SIZE_T(d3) :
265 OPTIX_ASM_PTR(buffer),
"r"(dim),
"r"(element_size) :
268 return make_size_t4(d0, d1, d2, d3);
271 static __forceinline__ __device__ size_t4 rt_buffer_get_size_id(
int id,
unsigned int dim,
unsigned int element_size)
273 optix::optix_size_t d0, d1, d2, d3;
274 asm volatile(
"call (%0, %1, %2, %3), _rt_buffer_get_id_size" OPTIX_BITNESS_SUFFIX
", (%4, %5, %6);" :
275 "=" OPTIX_ASM_SIZE_T(d0),
"=" OPTIX_ASM_SIZE_T(d1),
"=" OPTIX_ASM_SIZE_T(d2),
"=" OPTIX_ASM_SIZE_T(d3) :
276 "r"(id),
"r"(dim),
"r"(element_size) :
279 return make_size_t4(d0, d1, d2, d3);
282 static __forceinline__ __device__
void* rt_callable_program_from_id(
int id)
285 asm volatile(
"call (%0), _rt_callable_program_from_id" OPTIX_BITNESS_SUFFIX
", (%1);" :
286 "=" OPTIX_ASM_PTR(tmp) :
290 #if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
291 rt_undefined_use64((
unsigned long long)tmp);
293 rt_undefined_use((
int)tmp);
298 static __forceinline__ __device__
void rt_trace(
unsigned int group, float3 origin, float3 direction,
unsigned int ray_type,
299 float tmin,
float tmax,
void* prd,
unsigned int prd_size)
301 float ox = origin.x, oy = origin.y, oz = origin.z;
302 float dx = direction.x, dy = direction.y, dz = direction.z;
303 #if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
304 rt_undefined_use64((
unsigned long long)prd);
306 rt_undefined_use((
int)prd);
308 asm volatile(
"call _rt_trace" OPTIX_BITNESS_SUFFIX
", (%0, %1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11);" :
310 "r"(group),
"f"(ox),
"f"(oy),
"f"(oz),
"f"(dx),
"f"(dy),
"f"(dz),
311 "r"(ray_type),
"f"(tmin),
"f"(tmax), OPTIX_ASM_PTR(prd),
"r"(prd_size) :
315 static __forceinline__ __device__
unsigned int rt_pickle_pointer(
void *p)
318 asm volatile(
"call (%0), _rt_pickle_pointer" OPTIX_BITNESS_SUFFIX
", (%1);" :
326 static __forceinline__ __device__
void * rt_unpickle_pointer(
unsigned int p)
329 asm volatile(
"call (%0), _rt_unpickle_pointer" OPTIX_BITNESS_SUFFIX
", (%1);" :
330 "=" OPTIX_ASM_PTR(ret) :
337 static __forceinline__ __device__
bool rt_potential_intersection(
float t)
340 asm volatile(
"call (%0), _rt_potential_intersection, (%1);" :
348 static __forceinline__ __device__
bool rt_report_intersection(
unsigned int matlIndex)
351 asm volatile(
"call (%0), _rt_report_intersection, (%1);" :
359 static __forceinline__ __device__
void rt_ignore_intersection()
361 asm volatile(
"call _rt_ignore_intersection, ();");
364 static __forceinline__ __device__
void rt_terminate_ray()
366 asm volatile(
"call _rt_terminate_ray, ();");
369 static __forceinline__ __device__
void rt_intersect_child(
unsigned int index)
371 asm volatile(
"call _rt_intersect_child, (%0);" :
377 static __forceinline__ __device__ float3 rt_transform_point(
RTtransformkind kind,
const float3& p )
380 float f0, f1, f2, f3;
381 asm volatile(
"call (%0, %1, %2, %3), _rt_transform_tuple, (%4, %5, %6, %7, %8);" :
382 "=f"(f0),
"=f"(f1),
"=f"(f2),
"=f"(f3) :
383 "r"(kind),
"f"(p.x),
"f"(p.y),
"f"(p.z),
"f"(1.0f) :
386 rt_undefined_use((
int)f0);
387 rt_undefined_use((
int)f1);
388 rt_undefined_use((
int)f2);
389 rt_undefined_use((
int)f3);
390 return make_float3( f0/f3, f1/f3, f2/f3 );
394 static __forceinline__ __device__ float3 rt_transform_vector(
RTtransformkind kind,
const float3& v )
396 float f0, f1, f2, f3;
397 asm volatile(
"call (%0, %1, %2, %3), _rt_transform_tuple, (%4, %5, %6, %7, %8);" :
398 "=f"(f0),
"=f"(f1),
"=f"(f2),
"=f"(f3) :
399 "r"(kind),
"f"(v.x),
"f"(v.y),
"f"(v.z),
"f"(0.0f) :
402 rt_undefined_use((
int)f0);
403 rt_undefined_use((
int)f1);
404 rt_undefined_use((
int)f2);
405 rt_undefined_use((
int)f3);
407 return make_float3( f0, f1, f2 );
410 static __forceinline__ __device__ float3 rt_transform_normal(
RTtransformkind kind,
const float3& n )
412 float f0, f1, f2, f3;
413 asm volatile(
"call (%0, %1, %2, %3), _rt_transform_tuple, (%4, %5, %6, %7, %8);" :
414 "=f"(f0),
"=f"(f1),
"=f"(f2),
"=f"(f3) :
418 rt_undefined_use((
int)f0);
419 rt_undefined_use((
int)f1);
420 rt_undefined_use((
int)f2);
421 rt_undefined_use((
int)f3);
423 return make_float3( f0, f1, f2 );
426 static __forceinline__ __device__
void rt_get_transform(
RTtransformkind kind,
float matrix[16] )
428 asm volatile(
"call (%0, %1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11, %12, %13, %14, %15), _rt_get_transform, (%16);" :
429 "=f"(matrix[ 0]),
"=f"(matrix[ 1]),
"=f"(matrix[ 2]),
"=f"(matrix[ 3]),
430 "=f"(matrix[ 4]),
"=f"(matrix[ 5]),
"=f"(matrix[ 6]),
"=f"(matrix[ 7]),
431 "=f"(matrix[ 8]),
"=f"(matrix[ 9]),
"=f"(matrix[10]),
"=f"(matrix[11]),
432 "=f"(matrix[12]),
"=f"(matrix[13]),
"=f"(matrix[14]),
"=f"(matrix[15]) :
437 static __forceinline__ __device__
void rt_throw(
unsigned int code )
439 asm volatile(
"call _rt_throw, (%0);" :
445 static __forceinline__ __device__
unsigned int rt_get_exception_code()
448 asm volatile(
"call (%0), _rt_get_exception_code, ();" :
466 template<
typename T>
struct rt_print_t {
static const int desc = 0; };
467 template<>
struct rt_print_t<long long> {
static const int desc = 1; };
468 template<>
struct rt_print_t<unsigned long long> {
static const int desc = 1; };
469 template<>
struct rt_print_t<float> {
static const int desc = 2; };
470 template<>
struct rt_print_t<double> {
static const int desc = 3; };
472 static __forceinline__ __device__
int rt_print_strlen(
const char* s )
480 static __forceinline__ __device__
int rt_print_arg( T arg,
int off )
482 const int sz = max( 4, (
int)
sizeof( arg ) );
483 const int typedesc = rt_print_t<T>::desc;
485 const unsigned int* p;
489 if(
sizeof(arg) < 4 )
491 iarg = (
unsigned int)arg;
496 p = (
unsigned int*)&arg;
500 asm volatile(
"call (), _rt_print_write32, (%0, %1);" :
502 "r"(typedesc),
"r"(off) :
506 for(
int i=0; i<sz/4; ++i )
508 asm volatile(
"call (), _rt_print_write32, (%0, %1);" :
510 "r"(p[i]),
"r"( off + (i+1)*4 ) :
517 static __forceinline__ __device__
int rt_print_active()
520 asm volatile(
"call (%0), _rt_print_active, ();" :
527 static __forceinline__ __device__
int rt_print_start(
const char* fmt,
int sz )
530 asm volatile(
"call (%0), _rt_print_start" OPTIX_BITNESS_SUFFIX
", (%1, %2);" :
532 OPTIX_ASM_PTR(fmt),
"r"(sz) :
537 #define _RT_PRINTF_1() \
538 if( !optix::rt_print_active() ) \
541 const int fmtlen = optix::rt_print_strlen( fmt ); \
542 int sz = 4 + fmtlen + 1; \
545 #define _RT_PRINTF_2() \
547 if( !(off=optix::rt_print_start(fmt,sz)) ) \
550 #define _RT_PRINTF_ARG_1( a ) \
553 sz += max( 4, static_cast<unsigned int>(sizeof( a )) );
555 #define _RT_PRINTF_ARG_2( a ) \
557 off += optix::rt_print_arg( a, off ); \
Opaque handle to a OptiX object.
Definition: optix_device.h:179
Definition: optix_defines.h:52
Definition: optix_internal.h:466
RTtransformkind
Definition: optix_defines.h:45