OptiX  3.9
NVIDIA OptiX Acceleration Engine
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
optix_internal.h
1 
2 /*
3  * Copyright (c) 2008 - 2009 NVIDIA Corporation. All rights reserved.
4  *
5  * NVIDIA Corporation and its licensors retain all intellectual property and proprietary
6  * rights in and to this software, related documentation and any modifications thereto.
7  * Any use, reproduction, disclosure or distribution of this software and related
8  * documentation without an express license agreement from NVIDIA Corporation is strictly
9  * prohibited.
10  *
11  * TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS*
12  * AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED,
13  * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
14  * PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY
15  * SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT
16  * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF
17  * BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR
18  * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF
19  * SUCH DAMAGES
20  */
21 
22 #ifndef __optix_optix_internal_h__
23 #define __optix_optix_internal_h__
24 
25 #include "optix_datatypes.h"
26 #include "optix_defines.h"
27 #include "../optix_sizet.h"
28 
29 struct rtObject;
30 
31 namespace optix {
32 
33 #if (defined(__CUDACC__) && (__CUDA_ARCH__ > 0))
34 #if ((CUDA_VERSION >= 4010) || (CUDART_VERSION >= 4010)) && __CUDA_ARCH__ >= 200
35  __forceinline__
36 #else
37  __noinline__
38 #endif
39  __device__ void
40  rt_undefined_use(int)
41  {
42  }
43 #else
44  void rt_undefined_use(int);
45 #endif
46 
47 #if (defined(__CUDACC__) && (__CUDA_ARCH__ > 0))
48 #if ((CUDA_VERSION >= 4010) || (CUDART_VERSION >= 4010)) && __CUDA_ARCH__ >= 200
49  __forceinline__
50 #else
51  __noinline__
52 #endif
53  __device__ void
54  rt_undefined_use64(unsigned long long)
55  {
56  }
57 #else
58  void rt_undefined_use64(int);
59 #endif
60 
61  static __forceinline__ __device__ uint3 rt_texture_get_size_id(int tex)
62  {
63  unsigned int u0, u1, u2;
64 
65  asm volatile("call (%0, %1, %2), _rt_texture_get_size_id, (%3);" :
66  "=r"(u0), "=r"(u1), "=r"(u2) : "r"(tex) : );
67 
68  rt_undefined_use((int)u0);
69  rt_undefined_use((int)u1);
70  rt_undefined_use((int)u2);
71 
72  return make_uint3(u0, u1, u2);
73  }
74 
75  static __forceinline__ __device__ float4 rt_texture_get_gather_id(int tex, float x, float y, int comp)
76  {
77  float f0, f1, f2, f3;
78  int dim = 2;
79 
80  asm volatile("call (%0, %1, %2, %3), _rt_texture_get_gather_id, (%4, %5, %6, %7, %8);" :
81  "=f"(f0), "=f"(f1), "=f"(f2), "=f"(f3) :
82  "r"(tex), "r"(dim), "f"(x), "f"(y), "r"(comp) :
83  );
84 
85  rt_undefined_use((int)f0);
86  rt_undefined_use((int)f1);
87  rt_undefined_use((int)f2);
88  rt_undefined_use((int)f3);
89 
90  return make_float4(f0, f1, f2, f3);
91  }
92 
93  static __forceinline__ __device__ float4 rt_texture_get_base_id(int tex, int dim, float x, float y, float z, int layer)
94  {
95  float f0, f1, f2, f3;
96 
97  asm volatile("call (%0, %1, %2, %3), _rt_texture_get_base_id, (%4, %5, %6, %7, %8, %9);" :
98  "=f"(f0), "=f"(f1), "=f"(f2), "=f"(f3) :
99  "r"(tex), "r"(dim), "f"(x), "f"(y), "f"(z), "r"(layer) :
100  );
101 
102  rt_undefined_use((int)f0);
103  rt_undefined_use((int)f1);
104  rt_undefined_use((int)f2);
105  rt_undefined_use((int)f3);
106 
107  return make_float4(f0, f1, f2, f3);
108  }
109 
110  static __forceinline__ __device__ float4
111  rt_texture_get_level_id(int tex, int dim, float x, float y, float z, int layer, float level)
112  {
113  float f0, f1, f2, f3;
114 
115  asm volatile("call (%0, %1, %2, %3), _rt_texture_get_level_id, (%4, %5, %6, %7, %8, %9, %10);" :
116  "=f"(f0), "=f"(f1), "=f"(f2), "=f"(f3) :
117  "r"(tex), "r"(dim), "f"(x), "f"(y), "f"(z), "r"(layer), "f"(level) :
118  );
119 
120  rt_undefined_use((int)f0);
121  rt_undefined_use((int)f1);
122  rt_undefined_use((int)f2);
123  rt_undefined_use((int)f3);
124 
125  return make_float4(f0, f1, f2, f3);
126  }
127 
128  static __forceinline__ __device__ float4 rt_texture_get_grad_id(int tex, int dim, float x, float y, float z, int layer,
129  float dPdx_x, float dPdx_y, float dPdx_z, float dPdy_x, float dPdy_y, float dPdy_z)
130  {
131  float f0, f1, f2, f3;
132 
133  asm volatile("call (%0, %1, %2, %3), _rt_texture_get_grad_id, (%4, %5, %6, %7, %8, %9, %10, %11, %12, %13, %14, %15);" :
134  "=f"(f0), "=f"(f1), "=f"(f2), "=f"(f3) :
135  "r"(tex), "r"(dim), "f"(x), "f"(y), "f"(z), "r"(layer), "f"(dPdx_x), "f"(dPdx_y), "f"(dPdx_z), "f"(dPdy_x), "f"(dPdy_y), "f"(dPdy_z) :
136  );
137 
138  rt_undefined_use((int)f0);
139  rt_undefined_use((int)f1);
140  rt_undefined_use((int)f2);
141  rt_undefined_use((int)f3);
142 
143  return make_float4(f0, f1, f2, f3);
144  }
145 
146  static __forceinline__ __device__ float4 rt_texture_get_f_id(int tex, int dim, float x, float y, float z, float w)
147  {
148  float f0, f1, f2, f3;
149 
150  asm volatile("call (%0, %1, %2, %3), _rt_texture_get_f_id, (%4, %5, %6, %7, %8, %9);" :
151  "=f"(f0), "=f"(f1), "=f"(f2), "=f"(f3) :
152  "r"(tex), "r"(dim), "f"(x), "f"(y), "f"(z), "f"(w) :
153  );
154 
155  rt_undefined_use((int)f0);
156  rt_undefined_use((int)f1);
157  rt_undefined_use((int)f2);
158  rt_undefined_use((int)f3);
159 
160  return make_float4(f0, f1, f2, f3);
161  }
162 
163  static __forceinline__ __device__ int4 rt_texture_get_i_id(int tex, int dim, float x, float y, float z, float w)
164  {
165  int i0, i1, i2, i3;
166 
167  asm volatile("call (%0, %1, %2, %3), _rt_texture_get_i_id, (%4, %5, %6, %7, %8, %9);" :
168  "=r"(i0), "=r"(i1), "=r"(i2), "=r"(i3) :
169  "r"(tex), "r"(dim), "f"(x), "f"(y), "f"(z), "f"(w) :
170  );
171 
172  rt_undefined_use((int)i0);
173  rt_undefined_use((int)i1);
174  rt_undefined_use((int)i2);
175  rt_undefined_use((int)i3);
176 
177  return make_int4(i0, i1, i2, i3);
178  }
179 
180  static __forceinline__ __device__ uint4 rt_texture_get_u_id(int tex, int dim, float x, float y, float z, float w)
181  {
182  unsigned int u0, u1, u2, u3;
183 
184  asm volatile("call (%0, %1, %2, %3), _rt_texture_get_u_id, (%4, %5, %6, %7, %8, %9);" :
185  "=r"(u0), "=r"(u1), "=r"(u2), "=r"(u3) :
186  "r"(tex), "r"(dim), "f"(x), "f"(y), "f"(z), "f"(w) :
187  );
188 
189  rt_undefined_use((int)u0);
190  rt_undefined_use((int)u1);
191  rt_undefined_use((int)u2);
192  rt_undefined_use((int)u3);
193 
194  return make_uint4(u0, u1, u2, u3);
195  }
196 
197  static __forceinline__ __device__ float4 rt_texture_get_fetch_id(int tex, int dim, int x, int y, int z, int w)
198  {
199  float f0, f1, f2, f3;
200 
201  asm volatile("call (%0, %1, %2, %3), _rt_texture_get_fetch_id, (%4, %5, %6, %7, %8, %9);" :
202  "=f"(f0), "=f"(f1), "=f"(f2), "=f"(f3) :
203  "r"(tex), "r"(dim), "r"(x), "r"(y), "r"(z), "r"(w) :
204  );
205 
206  rt_undefined_use((int)f0);
207  rt_undefined_use((int)f1);
208  rt_undefined_use((int)f2);
209  rt_undefined_use((int)f3);
210 
211  return make_float4(f0, f1, f2, f3);
212  }
213 
214  static __forceinline__ __device__ void* rt_buffer_get(void* buffer, unsigned int dim, unsigned int element_size,
215  size_t i0_in, size_t i1_in, size_t i2_in, size_t i3_in)
216  {
217  optix::optix_size_t i0, i1, i2, i3;
218  i0 = i0_in;
219  i1 = i1_in;
220  i2 = i2_in;
221  i3 = i3_in;
222  void* tmp;
223  asm volatile("call (%0), _rt_buffer_get" OPTIX_BITNESS_SUFFIX ", (%1, %2, %3, %4, %5, %6, %7);" :
224  "=" OPTIX_ASM_PTR(tmp) :
225  OPTIX_ASM_PTR(buffer), "r"(dim), "r"(element_size),
226  OPTIX_ASM_SIZE_T(i0), OPTIX_ASM_SIZE_T(i1), OPTIX_ASM_SIZE_T(i2), OPTIX_ASM_SIZE_T(i3) :
227  );
228 
229 #if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
230  rt_undefined_use64((unsigned long long)tmp);
231 #else
232  rt_undefined_use((int)tmp);
233 #endif
234  return tmp;
235  }
236 
237  static __forceinline__ __device__ void* rt_buffer_get_id(int id, unsigned int dim, unsigned int element_size,
238  size_t i0_in, size_t i1_in, size_t i2_in, size_t i3_in)
239  {
240  optix::optix_size_t i0, i1, i2, i3;
241  i0 = i0_in;
242  i1 = i1_in;
243  i2 = i2_in;
244  i3 = i3_in;
245  void* tmp;
246  asm volatile("call (%0), _rt_buffer_get_id" OPTIX_BITNESS_SUFFIX ", (%1, %2, %3, %4, %5, %6, %7);" :
247  "=" OPTIX_ASM_PTR(tmp) :
248  "r"(id), "r"(dim), "r"(element_size),
249  OPTIX_ASM_SIZE_T(i0), OPTIX_ASM_SIZE_T(i1), OPTIX_ASM_SIZE_T(i2), OPTIX_ASM_SIZE_T(i3) :
250  );
251 
252 #if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
253  rt_undefined_use64((unsigned long long)tmp);
254 #else
255  rt_undefined_use((int)tmp);
256 #endif
257  return tmp;
258  }
259 
260  static __forceinline__ __device__ size_t4 rt_buffer_get_size(const void* buffer, unsigned int dim, unsigned int element_size)
261  {
262  optix::optix_size_t d0, d1, d2, d3;
263  asm volatile("call (%0, %1, %2, %3), _rt_buffer_get_size" OPTIX_BITNESS_SUFFIX ", (%4, %5, %6);" :
264  "=" OPTIX_ASM_SIZE_T(d0), "=" OPTIX_ASM_SIZE_T(d1), "=" OPTIX_ASM_SIZE_T(d2), "=" OPTIX_ASM_SIZE_T(d3) :
265  OPTIX_ASM_PTR(buffer), "r"(dim), "r"(element_size) :
266  );
267 
268  return make_size_t4(d0, d1, d2, d3);
269  }
270 
271  static __forceinline__ __device__ size_t4 rt_buffer_get_size_id(int id, unsigned int dim, unsigned int element_size)
272  {
273  optix::optix_size_t d0, d1, d2, d3;
274  asm volatile("call (%0, %1, %2, %3), _rt_buffer_get_id_size" OPTIX_BITNESS_SUFFIX ", (%4, %5, %6);" :
275  "=" OPTIX_ASM_SIZE_T(d0), "=" OPTIX_ASM_SIZE_T(d1), "=" OPTIX_ASM_SIZE_T(d2), "=" OPTIX_ASM_SIZE_T(d3) :
276  "r"(id), "r"(dim), "r"(element_size) :
277  );
278 
279  return make_size_t4(d0, d1, d2, d3);
280  }
281 
282  static __forceinline__ __device__ void* rt_callable_program_from_id(int id)
283  {
284  void* tmp;
285  asm volatile("call (%0), _rt_callable_program_from_id" OPTIX_BITNESS_SUFFIX ", (%1);" :
286  "=" OPTIX_ASM_PTR(tmp) :
287  "r"(id):
288  );
289 
290 #if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
291  rt_undefined_use64((unsigned long long)tmp);
292 #else
293  rt_undefined_use((int)tmp);
294 #endif
295  return tmp;
296  }
297 
298  static __forceinline__ __device__ void rt_trace(unsigned int group, float3 origin, float3 direction, unsigned int ray_type,
299  float tmin, float tmax, void* prd, unsigned int prd_size)
300  {
301  float ox = origin.x, oy = origin.y, oz = origin.z;
302  float dx = direction.x, dy = direction.y, dz = direction.z;
303 #if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
304  rt_undefined_use64((unsigned long long)prd);
305 #else
306  rt_undefined_use((int)prd);
307 #endif
308  asm volatile("call _rt_trace" OPTIX_BITNESS_SUFFIX ", (%0, %1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11);" :
309  /* no return value */ :
310  "r"(group), "f"(ox), "f"(oy), "f"(oz), "f"(dx), "f"(dy), "f"(dz),
311  "r"(ray_type), "f"(tmin), "f"(tmax), OPTIX_ASM_PTR(prd), "r"(prd_size) :
312  );
313  }
314 
315  static __forceinline__ __device__ unsigned int rt_pickle_pointer(void *p)
316  {
317  unsigned int ret;
318  asm volatile("call (%0), _rt_pickle_pointer" OPTIX_BITNESS_SUFFIX ", (%1);" :
319  "=r"(ret) :
320  OPTIX_ASM_PTR(p):
321  );
322 
323  return ret;
324  }
325 
326  static __forceinline__ __device__ void * rt_unpickle_pointer(unsigned int p)
327  {
328  void * ret;
329  asm volatile("call (%0), _rt_unpickle_pointer" OPTIX_BITNESS_SUFFIX ", (%1);" :
330  "=" OPTIX_ASM_PTR(ret) :
331  "r"(p):
332  );
333 
334  return ret;
335  }
336 
337  static __forceinline__ __device__ bool rt_potential_intersection(float t)
338  {
339  int ret;
340  asm volatile("call (%0), _rt_potential_intersection, (%1);" :
341  "=r"(ret) :
342  "f"(t):
343  );
344 
345  return ret;
346  }
347 
348  static __forceinline__ __device__ bool rt_report_intersection(unsigned int matlIndex)
349  {
350  int ret;
351  asm volatile("call (%0), _rt_report_intersection, (%1);" :
352  "=r"(ret) :
353  "r"(matlIndex) :
354  );
355 
356  return ret;
357  }
358 
359  static __forceinline__ __device__ void rt_ignore_intersection()
360  {
361  asm volatile("call _rt_ignore_intersection, ();");
362  }
363 
364  static __forceinline__ __device__ void rt_terminate_ray()
365  {
366  asm volatile("call _rt_terminate_ray, ();");
367  }
368 
369  static __forceinline__ __device__ void rt_intersect_child(unsigned int index)
370  {
371  asm volatile("call _rt_intersect_child, (%0);" :
372  /* no return value */ :
373  "r"(index) :
374  );
375  }
376 
377  static __forceinline__ __device__ float3 rt_transform_point( RTtransformkind kind, const float3& p )
378  {
379 
380  float f0, f1, f2, f3;
381  asm volatile("call (%0, %1, %2, %3), _rt_transform_tuple, (%4, %5, %6, %7, %8);" :
382  "=f"(f0), "=f"(f1), "=f"(f2), "=f"(f3) :
383  "r"(kind), "f"(p.x), "f"(p.y), "f"(p.z), "f"(1.0f) :
384  );
385 
386  rt_undefined_use((int)f0);
387  rt_undefined_use((int)f1);
388  rt_undefined_use((int)f2);
389  rt_undefined_use((int)f3);
390  return make_float3( f0/f3, f1/f3, f2/f3 );
391 
392  }
393 
394  static __forceinline__ __device__ float3 rt_transform_vector( RTtransformkind kind, const float3& v )
395  {
396  float f0, f1, f2, f3;
397  asm volatile("call (%0, %1, %2, %3), _rt_transform_tuple, (%4, %5, %6, %7, %8);" :
398  "=f"(f0), "=f"(f1), "=f"(f2), "=f"(f3) :
399  "r"(kind), "f"(v.x), "f"(v.y), "f"(v.z), "f"(0.0f) :
400  );
401 
402  rt_undefined_use((int)f0);
403  rt_undefined_use((int)f1);
404  rt_undefined_use((int)f2);
405  rt_undefined_use((int)f3);
406  f3 = f3;
407  return make_float3( f0, f1, f2 );
408  }
409 
410  static __forceinline__ __device__ float3 rt_transform_normal( RTtransformkind kind, const float3& n )
411  {
412  float f0, f1, f2, f3;
413  asm volatile("call (%0, %1, %2, %3), _rt_transform_tuple, (%4, %5, %6, %7, %8);" :
414  "=f"(f0), "=f"(f1), "=f"(f2), "=f"(f3) :
415  "r"(kind | RT_INTERNAL_INVERSE_TRANSPOSE ), "f"(n.x), "f"(n.y), "f"(n.z), "f"(0.0f) :
416  );
417 
418  rt_undefined_use((int)f0);
419  rt_undefined_use((int)f1);
420  rt_undefined_use((int)f2);
421  rt_undefined_use((int)f3);
422  f3 = f3;
423  return make_float3( f0, f1, f2 );
424  }
425 
426  static __forceinline__ __device__ void rt_get_transform( RTtransformkind kind, float matrix[16] )
427  {
428  asm volatile("call (%0, %1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11, %12, %13, %14, %15), _rt_get_transform, (%16);" :
429  "=f"(matrix[ 0]), "=f"(matrix[ 1]), "=f"(matrix[ 2]), "=f"(matrix[ 3]),
430  "=f"(matrix[ 4]), "=f"(matrix[ 5]), "=f"(matrix[ 6]), "=f"(matrix[ 7]),
431  "=f"(matrix[ 8]), "=f"(matrix[ 9]), "=f"(matrix[10]), "=f"(matrix[11]),
432  "=f"(matrix[12]), "=f"(matrix[13]), "=f"(matrix[14]), "=f"(matrix[15]) :
433  "r"( kind ) :
434  );
435  }
436 
437  static __forceinline__ __device__ void rt_throw( unsigned int code )
438  {
439  asm volatile("call _rt_throw, (%0);" :
440  /* no return value */ :
441  "r"(code) :
442  );
443  }
444 
445  static __forceinline__ __device__ unsigned int rt_get_exception_code()
446  {
447  unsigned int result;
448  asm volatile("call (%0), _rt_get_exception_code, ();" :
449  "=r"(result) :
450  );
451 
452  return result;
453  }
454 
455  /*
456  Printing
457  */
458 
459  /*
460  Type descriptors for printf arguments:
461  0 = 32 bit integer value
462  1 = 64 bit integer value
463  2 = 32 bit float value
464  3 = 64 bit double value
465  */
466  template<typename T> struct rt_print_t { static const int desc = 0; };
467  template<> struct rt_print_t<long long> { static const int desc = 1; };
468  template<> struct rt_print_t<unsigned long long> { static const int desc = 1; };
469  template<> struct rt_print_t<float> { static const int desc = 2; };
470  template<> struct rt_print_t<double> { static const int desc = 3; };
471 
472  static __forceinline__ __device__ int rt_print_strlen( const char* s )
473  {
474  const char* p = s;
475  while( *p ) ++p;
476  return p - s;
477  }
478 
479  template<typename T>
480  static __forceinline__ __device__ int rt_print_arg( T arg, int off )
481  {
482  const int sz = max( 4, (int)sizeof( arg ) );
483  const int typedesc = rt_print_t<T>::desc;
484 
485  const unsigned int* p;
486 
487  /* Get a pointer to a (at least) 32 bit value. */
488  unsigned int iarg;
489  if( sizeof(arg) < 4 )
490  {
491  iarg = (unsigned int)arg;
492  p = &iarg;
493  }
494  else
495  {
496  p = (unsigned int*)&arg;
497  }
498 
499  /* Write type descriptor. */
500  asm volatile("call (), _rt_print_write32, (%0, %1);" :
501  :
502  "r"(typedesc), "r"(off) :
503  );
504 
505  /* Write argument. */
506  for( int i=0; i<sz/4; ++i )
507  {
508  asm volatile("call (), _rt_print_write32, (%0, %1);" :
509  :
510  "r"(p[i]), "r"( off + (i+1)*4 ) :
511  );
512  }
513 
514  return sz;
515  }
516 
517  static __forceinline__ __device__ int rt_print_active()
518  {
519  int ret;
520  asm volatile("call (%0), _rt_print_active, ();" :
521  "=r"(ret) :
522  :
523  );
524  return ret;
525  }
526 
527  static __forceinline__ __device__ int rt_print_start( const char* fmt, int sz )
528  {
529  int ret;
530  asm volatile("call (%0), _rt_print_start" OPTIX_BITNESS_SUFFIX ", (%1, %2);" :
531  "=r"(ret) :
532  OPTIX_ASM_PTR(fmt), "r"(sz) :
533  );
534  return ret;
535  }
536 
537 #define _RT_PRINTF_1() \
538  if( !optix::rt_print_active() ) \
539  return; \
540  /* Compute length of header (=batchsize) plus format string. */ \
541  const int fmtlen = optix::rt_print_strlen( fmt ); \
542  int sz = 4 + fmtlen + 1; \
543  sz = (sz+3) & ~3; /* align */
544 
545 #define _RT_PRINTF_2() \
546  int off; /* offset where to start writing args */ \
547  if( !(off=optix::rt_print_start(fmt,sz)) ) \
548  return; /* print buffer is full */
549 
550 #define _RT_PRINTF_ARG_1( a ) \
551  /* Sum up argument sizes. */ \
552  sz += 4; /* type descriptor */ \
553  sz += max( 4, static_cast<unsigned int>(sizeof( a )) );
554 
555 #define _RT_PRINTF_ARG_2( a ) \
556  /* Write out argument. */ \
557  off += optix::rt_print_arg( a, off ); \
558  off += 4; /* space for type desc */
559 
560 } /* end namespace optix */
561 
562 #endif /* __optix_optix_internal_h__ */
Opaque handle to a OptiX object.
Definition: optix_device.h:179
OptiX public API.
Definition: optix_defines.h:52
OptiX public API.
Definition: optix_internal.h:466
RTtransformkind
Definition: optix_defines.h:45