clang 20.0.0git
__clang_cuda_cmath.h
Go to the documentation of this file.
1/*===---- __clang_cuda_cmath.h - Device-side CUDA cmath support ------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9#ifndef __CLANG_CUDA_CMATH_H__
10#define __CLANG_CUDA_CMATH_H__
11#ifndef __CUDA__
12#error "This file is for CUDA compilation only."
13#endif
14
15#ifndef __OPENMP_NVPTX__
16#include <limits>
17#endif
18
19// CUDA lets us use various std math functions on the device side. This file
20// works in concert with __clang_cuda_math_forward_declares.h to make this work.
21//
22// Specifically, the forward-declares header declares __device__ overloads for
23// these functions in the global namespace, then pulls them into namespace std
24// with 'using' statements. Then this file implements those functions, after
25// their implementations have been pulled in.
26//
27// It's important that we declare the functions in the global namespace and pull
28// them into namespace std with using statements, as opposed to simply declaring
29// these functions in namespace std, because our device functions need to
30// overload the standard library functions, which may be declared in the global
31// namespace or in std, depending on the degree of conformance of the stdlib
32// implementation. Declaring in the global namespace and pulling into namespace
33// std covers all of the known knowns.
34
35#ifdef __OPENMP_NVPTX__
36#define __DEVICE__ static constexpr __attribute__((always_inline, nothrow))
37#else
38#define __DEVICE__ static __device__ __inline__ __attribute__((always_inline))
39#endif
40
41__DEVICE__ long long abs(long long __n) { return ::llabs(__n); }
42__DEVICE__ long abs(long __n) { return ::labs(__n); }
43__DEVICE__ float abs(float __x) { return ::fabsf(__x); }
44__DEVICE__ double abs(double __x) { return ::fabs(__x); }
45__DEVICE__ float acos(float __x) { return ::acosf(__x); }
46__DEVICE__ float asin(float __x) { return ::asinf(__x); }
47__DEVICE__ float atan(float __x) { return ::atanf(__x); }
48__DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); }
49__DEVICE__ float ceil(float __x) { return ::ceilf(__x); }
50__DEVICE__ float cos(float __x) { return ::cosf(__x); }
51__DEVICE__ float cosh(float __x) { return ::coshf(__x); }
52__DEVICE__ float exp(float __x) { return ::expf(__x); }
53__DEVICE__ float fabs(float __x) { return ::fabsf(__x); }
54__DEVICE__ float floor(float __x) { return ::floorf(__x); }
55__DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); }
56__DEVICE__ int fpclassify(float __x) {
57 return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
58 FP_ZERO, __x);
59}
60__DEVICE__ int fpclassify(double __x) {
61 return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
62 FP_ZERO, __x);
63}
64__DEVICE__ float frexp(float __arg, int *__exp) {
65 return ::frexpf(__arg, __exp);
66}
67
68// For inscrutable reasons, the CUDA headers define these functions for us on
69// Windows.
70#if !defined(_MSC_VER) || defined(__OPENMP_NVPTX__)
71
72// For OpenMP we work around some old system headers that have non-conforming
73// `isinf(float)` and `isnan(float)` implementations that return an `int`. We do
74// this by providing two versions of these functions, differing only in the
75// return type. To avoid conflicting definitions we disable implicit base
76// function generation. That means we will end up with two specializations, one
77// per type, but only one has a base function defined by the system header.
78#if defined(__OPENMP_NVPTX__)
79#pragma omp begin declare variant match( \
80 implementation = {extension(disable_implicit_base)})
81
82// FIXME: We lack an extension to customize the mangling of the variants, e.g.,
83// add a suffix. This means we would clash with the names of the variants
84// (note that we do not create implicit base functions here). To avoid
85// this clash we add a new trait to some of them that is always true
86// (this is LLVM after all ;)). It will only influence the mangled name
87// of the variants inside the inner region and avoid the clash.
88#pragma omp begin declare variant match(implementation = {vendor(llvm)})
89
90__DEVICE__ int isinf(float __x) { return ::__isinff(__x); }
91__DEVICE__ int isinf(double __x) { return ::__isinf(__x); }
92__DEVICE__ int isfinite(float __x) { return ::__finitef(__x); }
93__DEVICE__ int isfinite(double __x) { return ::__isfinited(__x); }
94__DEVICE__ int isnan(float __x) { return ::__isnanf(__x); }
95__DEVICE__ int isnan(double __x) { return ::__isnan(__x); }
96
97#pragma omp end declare variant
98
99#endif
100
101__DEVICE__ bool isinf(float __x) { return ::__isinff(__x); }
102__DEVICE__ bool isinf(double __x) { return ::__isinf(__x); }
103__DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }
104// For inscrutable reasons, __finite(), the double-precision version of
105// __finitef, does not exist when compiling for MacOS. __isfinited is available
106// everywhere and is just as good.
107__DEVICE__ bool isfinite(double __x) { return ::__isfinited(__x); }
108__DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); }
109__DEVICE__ bool isnan(double __x) { return ::__isnan(__x); }
110
111#if defined(__OPENMP_NVPTX__)
112#pragma omp end declare variant
113#endif
114
115#endif
116
117__DEVICE__ bool isgreater(float __x, float __y) {
118 return __builtin_isgreater(__x, __y);
119}
120__DEVICE__ bool isgreater(double __x, double __y) {
121 return __builtin_isgreater(__x, __y);
122}
123__DEVICE__ bool isgreaterequal(float __x, float __y) {
124 return __builtin_isgreaterequal(__x, __y);
125}
126__DEVICE__ bool isgreaterequal(double __x, double __y) {
127 return __builtin_isgreaterequal(__x, __y);
128}
129__DEVICE__ bool isless(float __x, float __y) {
130 return __builtin_isless(__x, __y);
131}
132__DEVICE__ bool isless(double __x, double __y) {
133 return __builtin_isless(__x, __y);
134}
135__DEVICE__ bool islessequal(float __x, float __y) {
136 return __builtin_islessequal(__x, __y);
137}
138__DEVICE__ bool islessequal(double __x, double __y) {
139 return __builtin_islessequal(__x, __y);
140}
141__DEVICE__ bool islessgreater(float __x, float __y) {
142 return __builtin_islessgreater(__x, __y);
143}
144__DEVICE__ bool islessgreater(double __x, double __y) {
145 return __builtin_islessgreater(__x, __y);
146}
147__DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); }
148__DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); }
149__DEVICE__ bool isunordered(float __x, float __y) {
150 return __builtin_isunordered(__x, __y);
151}
152__DEVICE__ bool isunordered(double __x, double __y) {
153 return __builtin_isunordered(__x, __y);
154}
155__DEVICE__ float ldexp(float __arg, int __exp) {
156 return ::ldexpf(__arg, __exp);
157}
158__DEVICE__ float log(float __x) { return ::logf(__x); }
159__DEVICE__ float log10(float __x) { return ::log10f(__x); }
160__DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); }
161__DEVICE__ float pow(float __base, float __exp) {
162 return ::powf(__base, __exp);
163}
164__DEVICE__ float pow(float __base, int __iexp) {
165 return ::powif(__base, __iexp);
166}
167__DEVICE__ double pow(double __base, int __iexp) {
168 return ::powi(__base, __iexp);
169}
170__DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); }
171__DEVICE__ bool signbit(double __x) { return ::__signbitd(__x); }
172__DEVICE__ float sin(float __x) { return ::sinf(__x); }
173__DEVICE__ float sinh(float __x) { return ::sinhf(__x); }
174__DEVICE__ float sqrt(float __x) { return ::sqrtf(__x); }
175__DEVICE__ float tan(float __x) { return ::tanf(__x); }
176__DEVICE__ float tanh(float __x) { return ::tanhf(__x); }
177
178// There was a redefinition error for this this overload in CUDA mode.
179// We restrict it to OpenMP mode for now, that is where it is actually needed
180// anyway.
181#ifdef __OPENMP_NVPTX__
182__DEVICE__ float remquo(float __n, float __d, int *__q) {
183 return ::remquof(__n, __d, __q);
184}
185#endif
186
187// Notably missing above is nexttoward. We omit it because
188// libdevice doesn't provide an implementation, and we don't want to be in the
189// business of implementing tricky libm functions in this header.
190
191#ifndef __OPENMP_NVPTX__
192
193// Now we've defined everything we promised we'd define in
194// __clang_cuda_math_forward_declares.h. We need to do two additional things to
195// fix up our math functions.
196//
197// 1) Define __device__ overloads for e.g. sin(int). The CUDA headers define
198// only sin(float) and sin(double), which means that e.g. sin(0) is
199// ambiguous.
200//
201// 2) Pull the __device__ overloads of "foobarf" math functions into namespace
202// std. These are defined in the CUDA headers in the global namespace,
203// independent of everything else we've done here.
204
205// We can't use std::enable_if, because we want to be pre-C++11 compatible. But
206// we go ahead and unconditionally define functions that are only available when
207// compiling for C++11 to match the behavior of the CUDA headers.
208template<bool __B, class __T = void>
210
211template <class __T> struct __clang_cuda_enable_if<true, __T> {
212 typedef __T type;
213};
214
215// Defines an overload of __fn that accepts one integral argument, calls
216// __fn((double)x), and returns __retty.
217#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(__retty, __fn) \
218 template <typename __T> \
219 __DEVICE__ \
220 typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, \
221 __retty>::type \
222 __fn(__T __x) { \
223 return ::__fn((double)__x); \
224 }
225
226// Defines an overload of __fn that accepts one two arithmetic arguments, calls
227// __fn((double)x, (double)y), and returns a double.
228//
229// Note this is different from OVERLOAD_1, which generates an overload that
230// accepts only *integral* arguments.
231#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(__retty, __fn) \
232 template <typename __T1, typename __T2> \
233 __DEVICE__ typename __clang_cuda_enable_if< \
234 std::numeric_limits<__T1>::is_specialized && \
235 std::numeric_limits<__T2>::is_specialized, \
236 __retty>::type \
237 __fn(__T1 __x, __T2 __y) { \
238 return __fn((double)__x, (double)__y); \
239 }
240
301
302#undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_1
303#undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_2
304
305// Overloads for functions that don't match the patterns expected by
306// __CUDA_CLANG_FN_INTEGER_OVERLOAD_{1,2}.
307template <typename __T1, typename __T2, typename __T3>
309 std::numeric_limits<__T1>::is_specialized &&
310 std::numeric_limits<__T2>::is_specialized &&
311 std::numeric_limits<__T3>::is_specialized,
312 double>::type
313fma(__T1 __x, __T2 __y, __T3 __z) {
314 return std::fma((double)__x, (double)__y, (double)__z);
315}
316
317template <typename __T>
319 double>::type
320frexp(__T __x, int *__exp) {
321 return std::frexp((double)__x, __exp);
322}
323
324template <typename __T>
326 double>::type
327ldexp(__T __x, int __exp) {
328 return std::ldexp((double)__x, __exp);
329}
330
331template <typename __T1, typename __T2>
333 std::numeric_limits<__T1>::is_specialized &&
334 std::numeric_limits<__T2>::is_specialized,
335 double>::type
336remquo(__T1 __x, __T2 __y, int *__quo) {
337 return std::remquo((double)__x, (double)__y, __quo);
338}
339
340template <typename __T>
342 double>::type
343scalbln(__T __x, long __exp) {
344 return std::scalbln((double)__x, __exp);
345}
346
347template <typename __T>
349 double>::type
350scalbn(__T __x, int __exp) {
351 return std::scalbn((double)__x, __exp);
352}
353
354// We need to define these overloads in exactly the namespace our standard
355// library uses (including the right inline namespace), otherwise they won't be
356// picked up by other functions in the standard library (e.g. functions in
357// <complex>). Thus the ugliness below.
358#ifdef _LIBCPP_BEGIN_NAMESPACE_STD
359_LIBCPP_BEGIN_NAMESPACE_STD
360#else
361namespace std {
362#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION
363_GLIBCXX_BEGIN_NAMESPACE_VERSION
364#endif
365#endif
366
367// Pull the new overloads we defined above into namespace std.
368using ::acos;
369using ::acosh;
370using ::asin;
371using ::asinh;
372using ::atan;
373using ::atan2;
374using ::atanh;
375using ::cbrt;
376using ::ceil;
377using ::copysign;
378using ::cos;
379using ::cosh;
380using ::erf;
381using ::erfc;
382using ::exp;
383using ::exp2;
384using ::expm1;
385using ::fabs;
386using ::fdim;
387using ::floor;
388using ::fma;
389using ::fmax;
390using ::fmin;
391using ::fmod;
392using ::fpclassify;
393using ::frexp;
394using ::hypot;
395using ::ilogb;
396using ::isfinite;
397using ::isgreater;
398using ::isgreaterequal;
399using ::isless;
400using ::islessequal;
401using ::islessgreater;
402using ::isnormal;
403using ::isunordered;
404using ::ldexp;
405using ::lgamma;
406using ::llrint;
407using ::llround;
408using ::log;
409using ::log10;
410using ::log1p;
411using ::log2;
412using ::logb;
413using ::lrint;
414using ::lround;
415using ::nearbyint;
416using ::nextafter;
417using ::pow;
418using ::remainder;
419using ::remquo;
420using ::rint;
421using ::round;
422using ::scalbln;
423using ::scalbn;
424using ::signbit;
425using ::sin;
426using ::sinh;
427using ::sqrt;
428using ::tan;
429using ::tanh;
430using ::tgamma;
431using ::trunc;
432
433// Well this is fun: We need to pull these symbols in for libc++, but we can't
434// pull them in with libstdc++, because its ::isinf and ::isnan are different
435// than its std::isinf and std::isnan.
436#ifndef __GLIBCXX__
437using ::isinf;
438using ::isnan;
439#endif
440
441// Finally, pull the "foobarf" functions that CUDA defines in its headers into
442// namespace std.
443using ::acosf;
444using ::acoshf;
445using ::asinf;
446using ::asinhf;
447using ::atan2f;
448using ::atanf;
449using ::atanhf;
450using ::cbrtf;
451using ::ceilf;
452using ::copysignf;
453using ::cosf;
454using ::coshf;
455using ::erfcf;
456using ::erff;
457using ::exp2f;
458using ::expf;
459using ::expm1f;
460using ::fabsf;
461using ::fdimf;
462using ::floorf;
463using ::fmaf;
464using ::fmaxf;
465using ::fminf;
466using ::fmodf;
467using ::frexpf;
468using ::hypotf;
469using ::ilogbf;
470using ::ldexpf;
471using ::lgammaf;
472using ::llrintf;
473using ::llroundf;
474using ::log10f;
475using ::log1pf;
476using ::log2f;
477using ::logbf;
478using ::logf;
479using ::lrintf;
480using ::lroundf;
481using ::modff;
482using ::nearbyintf;
483using ::nextafterf;
484using ::powf;
485using ::remainderf;
486using ::remquof;
487using ::rintf;
488using ::roundf;
489using ::scalblnf;
490using ::scalbnf;
491using ::sinf;
492using ::sinhf;
493using ::sqrtf;
494using ::tanf;
495using ::tanhf;
496using ::tgammaf;
497using ::truncf;
498
499#ifdef _LIBCPP_END_NAMESPACE_STD
500_LIBCPP_END_NAMESPACE_STD
501#else
502#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION
503_GLIBCXX_END_NAMESPACE_VERSION
504#endif
505} // namespace std
506#endif
507
508#endif // __OPENMP_NVPTX__
509
510#undef __DEVICE__
511
512#endif
__DEVICE__ bool isunordered(float __x, float __y)
Test if arguments are unordered.
__DEVICE__ bool isgreater(float __x, float __y)
Returns the component-wise compare of x > y.
__DEVICE__ bool islessgreater(float __x, float __y)
Returns the component-wise compare of (x < y) || (x > y) .
__DEVICE__ bool isnan(float __x)
Test for a NaN.
__DEVICE__ int fpclassify(float __x)
__DEVICE__ bool isfinite(float __x)
Test for finite value.
__DEVICE__ bool signbit(float __x)
Test for sign bit.
__DEVICE__ bool isinf(float __x)
Test for infinity value (+ve or -ve) .
#define __DEVICE__
__DEVICE__ float modf(float __x, float *__iptr)
#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(__retty, __fn)
__DEVICE__ bool islessequal(float __x, float __y)
Returns the component-wise compare of x <= y.
#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(__retty, __fn)
__DEVICE__ long long abs(long long __n)
__DEVICE__ bool isless(float __x, float __y)
Returns the component-wise compare of x < y.
__DEVICE__ bool isnormal(float __x)
Test for a normal value.
__DEVICE__ bool isgreaterequal(float __x, float __y)
Returns the component-wise compare of x >= y.
static __inline__ uint32_t uint32_t __y
Definition: arm_acle.h:130
#define true
Definition: stdbool.h:25
#define sinh(__x)
Definition: tgmath.h:373
#define asin(__x)
Definition: tgmath.h:112
#define scalbln(__x, __y)
Definition: tgmath.h:1182
#define sqrt(__x)
Definition: tgmath.h:520
#define acos(__x)
Definition: tgmath.h:83
#define fmin(__x, __y)
Definition: tgmath.h:780
#define exp(__x)
Definition: tgmath.h:431
#define ilogb(__x)
Definition: tgmath.h:851
#define copysign(__x, __y)
Definition: tgmath.h:618
#define erf(__x)
Definition: tgmath.h:636
#define atanh(__x)
Definition: tgmath.h:228
#define remquo(__x, __y, __z)
Definition: tgmath.h:1111
#define nextafter(__x, __y)
Definition: tgmath.h:1055
#define frexp(__x, __y)
Definition: tgmath.h:816
#define asinh(__x)
Definition: tgmath.h:199
#define erfc(__x)
Definition: tgmath.h:653
#define atan2(__x, __y)
Definition: tgmath.h:566
#define hypot(__x, __y)
Definition: tgmath.h:833
#define exp2(__x)
Definition: tgmath.h:670
#define sin(__x)
Definition: tgmath.h:286
#define cbrt(__x)
Definition: tgmath.h:584
#define log2(__x)
Definition: tgmath.h:970
#define llround(__x)
Definition: tgmath.h:919
#define cosh(__x)
Definition: tgmath.h:344
#define trunc(__x)
Definition: tgmath.h:1216
#define fmax(__x, __y)
Definition: tgmath.h:762
#define ldexp(__x, __y)
Definition: tgmath.h:868
#define acosh(__x)
Definition: tgmath.h:170
#define tgamma(__x)
Definition: tgmath.h:1199
#define scalbn(__x, __y)
Definition: tgmath.h:1165
#define round(__x)
Definition: tgmath.h:1148
#define fmod(__x, __y)
Definition: tgmath.h:798
#define llrint(__x)
Definition: tgmath.h:902
#define tan(__x)
Definition: tgmath.h:315
#define cos(__x)
Definition: tgmath.h:257
#define log10(__x)
Definition: tgmath.h:936
#define fabs(__x)
Definition: tgmath.h:549
#define pow(__x, __y)
Definition: tgmath.h:490
#define log1p(__x)
Definition: tgmath.h:953
#define rint(__x)
Definition: tgmath.h:1131
#define expm1(__x)
Definition: tgmath.h:687
#define remainder(__x, __y)
Definition: tgmath.h:1090
#define fdim(__x, __y)
Definition: tgmath.h:704
#define lgamma(__x)
Definition: tgmath.h:885
#define tanh(__x)
Definition: tgmath.h:402
#define lrint(__x)
Definition: tgmath.h:1004
#define atan(__x)
Definition: tgmath.h:141
#define floor(__x)
Definition: tgmath.h:722
#define ceil(__x)
Definition: tgmath.h:601
#define log(__x)
Definition: tgmath.h:460
#define logb(__x)
Definition: tgmath.h:987
#define nearbyint(__x)
Definition: tgmath.h:1038
#define lround(__x)
Definition: tgmath.h:1021
#define fma(__x, __y, __z)
Definition: tgmath.h:742