clang 22.0.0git
__clang_cuda_complex_builtins.h
Go to the documentation of this file.
1/*===-- __clang_cuda_complex_builtins - CUDA impls of runtime complex fns ---===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __CLANG_CUDA_COMPLEX_BUILTINS
11#define __CLANG_CUDA_COMPLEX_BUILTINS
12
13// This header defines __muldc3, __mulsc3, __divdc3, and __divsc3. These are
14// libgcc functions that clang assumes are available when compiling c99 complex
15// operations. (These implementations come from libc++, and have been modified
16// to work with CUDA and OpenMP target offloading [in C and C++ mode].)
17
18#pragma push_macro("__DEVICE__")
19#if defined(__OPENMP_NVPTX__) || defined(__OPENMP_AMDGCN__)
20#pragma omp declare target
21#define __DEVICE__ __attribute__((noinline, nothrow, cold, weak))
22#else
23#define __DEVICE__ __device__ inline
24#endif
25
26#ifdef __NVPTX__
27// FIXME: NVPTX should use generic builtins.
28#define _SCALBNd __nv_scalbn
29#define _SCALBNf __nv_scalbnf
30#define _LOGBd __nv_logb
31#define _LOGBf __nv_logbf
32#else
33#define _SCALBNd __builtin_scalbn
34#define _SCALBNf __builtin_scalbnf
35#define _LOGBd __builtin_logb
36#define _LOGBf __builtin_logbf
37#endif
38
39#if defined(__cplusplus)
40extern "C" {
41#endif
42
43__DEVICE__ double _Complex __muldc3(double __a, double __b, double __c,
44 double __d) {
45 double __ac = __a * __c;
46 double __bd = __b * __d;
47 double __ad = __a * __d;
48 double __bc = __b * __c;
49 double _Complex z;
50 __real__(z) = __ac - __bd;
51 __imag__(z) = __ad + __bc;
52 if (__builtin_isnan(__real__(z)) && __builtin_isnan(__imag__(z))) {
53 int __recalc = 0;
54 if (__builtin_isinf(__a) || __builtin_isinf(__b)) {
55 __a = __builtin_copysign(__builtin_isinf(__a) ? 1 : 0, __a);
56 __b = __builtin_copysign(__builtin_isinf(__b) ? 1 : 0, __b);
57 if (__builtin_isnan(__c))
58 __c = __builtin_copysign(0, __c);
59 if (__builtin_isnan(__d))
60 __d = __builtin_copysign(0, __d);
61 __recalc = 1;
62 }
63 if (__builtin_isinf(__c) || __builtin_isinf(__d)) {
64 __c = __builtin_copysign(__builtin_isinf(__c) ? 1 : 0, __c);
65 __d = __builtin_copysign(__builtin_isinf(__d) ? 1 : 0, __d);
66 if (__builtin_isnan(__a))
67 __a = __builtin_copysign(0, __a);
68 if (__builtin_isnan(__b))
69 __b = __builtin_copysign(0, __b);
70 __recalc = 1;
71 }
72 if (!__recalc && (__builtin_isinf(__ac) || __builtin_isinf(__bd) ||
73 __builtin_isinf(__ad) || __builtin_isinf(__bc))) {
74 if (__builtin_isnan(__a))
75 __a = __builtin_copysign(0, __a);
76 if (__builtin_isnan(__b))
77 __b = __builtin_copysign(0, __b);
78 if (__builtin_isnan(__c))
79 __c = __builtin_copysign(0, __c);
80 if (__builtin_isnan(__d))
81 __d = __builtin_copysign(0, __d);
82 __recalc = 1;
83 }
84 if (__recalc) {
85 // Can't use std::numeric_limits<double>::infinity() -- that doesn't have
86 // a device overload (and isn't constexpr before C++11, naturally).
87 __real__(z) = __builtin_huge_val() * (__a * __c - __b * __d);
88 __imag__(z) = __builtin_huge_val() * (__a * __d + __b * __c);
89 }
90 }
91 return z;
92}
93
94__DEVICE__ float _Complex __mulsc3(float __a, float __b, float __c, float __d) {
95 float __ac = __a * __c;
96 float __bd = __b * __d;
97 float __ad = __a * __d;
98 float __bc = __b * __c;
99 float _Complex z;
100 __real__(z) = __ac - __bd;
101 __imag__(z) = __ad + __bc;
102 if (__builtin_isnan(__real__(z)) && __builtin_isnan(__imag__(z))) {
103 int __recalc = 0;
104 if (__builtin_isinf(__a) || __builtin_isinf(__b)) {
105 __a = __builtin_copysignf(__builtin_isinf(__a) ? 1 : 0, __a);
106 __b = __builtin_copysignf(__builtin_isinf(__b) ? 1 : 0, __b);
107 if (__builtin_isnan(__c))
108 __c = __builtin_copysignf(0, __c);
109 if (__builtin_isnan(__d))
110 __d = __builtin_copysignf(0, __d);
111 __recalc = 1;
112 }
113 if (__builtin_isinf(__c) || __builtin_isinf(__d)) {
114 __c = __builtin_copysignf(__builtin_isinf(__c) ? 1 : 0, __c);
115 __d = __builtin_copysignf(__builtin_isinf(__d) ? 1 : 0, __d);
116 if (__builtin_isnan(__a))
117 __a = __builtin_copysignf(0, __a);
118 if (__builtin_isnan(__b))
119 __b = __builtin_copysignf(0, __b);
120 __recalc = 1;
121 }
122 if (!__recalc && (__builtin_isinf(__ac) || __builtin_isinf(__bd) ||
123 __builtin_isinf(__ad) || __builtin_isinf(__bc))) {
124 if (__builtin_isnan(__a))
125 __a = __builtin_copysignf(0, __a);
126 if (__builtin_isnan(__b))
127 __b = __builtin_copysignf(0, __b);
128 if (__builtin_isnan(__c))
129 __c = __builtin_copysignf(0, __c);
130 if (__builtin_isnan(__d))
131 __d = __builtin_copysignf(0, __d);
132 __recalc = 1;
133 }
134 if (__recalc) {
135 __real__(z) = __builtin_huge_valf() * (__a * __c - __b * __d);
136 __imag__(z) = __builtin_huge_valf() * (__a * __d + __b * __c);
137 }
138 }
139 return z;
140}
141
142__DEVICE__ double _Complex __divdc3(double __a, double __b, double __c,
143 double __d) {
144 int __ilogbw = 0;
145 // Can't use std::max, because that's defined in <algorithm>, and we don't
146 // want to pull that in for every compile. The CUDA headers define
147 // ::max(float, float) and ::max(double, double), which is sufficient for us.
148 double __logbw =
149 _LOGBd(__builtin_fmax(__builtin_fabs(__c), __builtin_fabs(__d)));
150 if (__builtin_isfinite(__logbw)) {
151 __ilogbw = (int)__logbw;
152 __c = _SCALBNd(__c, -__ilogbw);
153 __d = _SCALBNd(__d, -__ilogbw);
154 }
155 double __denom = __c * __c + __d * __d;
156 double _Complex z;
157 __real__(z) = _SCALBNd((__a * __c + __b * __d) / __denom, -__ilogbw);
158 __imag__(z) = _SCALBNd((__b * __c - __a * __d) / __denom, -__ilogbw);
159 if (__builtin_isnan(__real__(z)) && __builtin_isnan(__imag__(z))) {
160 if ((__denom == 0.0) && (!__builtin_isnan(__a) || !__builtin_isnan(__b))) {
161 __real__(z) = __builtin_copysign(__builtin_huge_val(), __c) * __a;
162 __imag__(z) = __builtin_copysign(__builtin_huge_val(), __c) * __b;
163 } else if ((__builtin_isinf(__a) || __builtin_isinf(__b)) &&
164 __builtin_isfinite(__c) && __builtin_isfinite(__d)) {
165 __a = __builtin_copysign(__builtin_isinf(__a) ? 1.0 : 0.0, __a);
166 __b = __builtin_copysign(__builtin_isinf(__b) ? 1.0 : 0.0, __b);
167 __real__(z) = __builtin_huge_val() * (__a * __c + __b * __d);
168 __imag__(z) = __builtin_huge_val() * (__b * __c - __a * __d);
169 } else if (__builtin_isinf(__logbw) && __logbw > 0.0 &&
170 __builtin_isfinite(__a) && __builtin_isfinite(__b)) {
171 __c = __builtin_copysign(__builtin_isinf(__c) ? 1.0 : 0.0, __c);
172 __d = __builtin_copysign(__builtin_isinf(__d) ? 1.0 : 0.0, __d);
173 __real__(z) = 0.0 * (__a * __c + __b * __d);
174 __imag__(z) = 0.0 * (__b * __c - __a * __d);
175 }
176 }
177 return z;
178}
179
180__DEVICE__ float _Complex __divsc3(float __a, float __b, float __c, float __d) {
181 int __ilogbw = 0;
182 float __logbw =
183 _LOGBf(__builtin_fmaxf(__builtin_fabsf(__c), __builtin_fabsf(__d)));
184 if (__builtin_isfinite(__logbw)) {
185 __ilogbw = (int)__logbw;
186 __c = _SCALBNf(__c, -__ilogbw);
187 __d = _SCALBNf(__d, -__ilogbw);
188 }
189 float __denom = __c * __c + __d * __d;
190 float _Complex z;
191 __real__(z) = _SCALBNf((__a * __c + __b * __d) / __denom, -__ilogbw);
192 __imag__(z) = _SCALBNf((__b * __c - __a * __d) / __denom, -__ilogbw);
193 if (__builtin_isnan(__real__(z)) && __builtin_isnan(__imag__(z))) {
194 if ((__denom == 0) && (!__builtin_isnan(__a) || !__builtin_isnan(__b))) {
195 __real__(z) = __builtin_copysignf(__builtin_huge_valf(), __c) * __a;
196 __imag__(z) = __builtin_copysignf(__builtin_huge_valf(), __c) * __b;
197 } else if ((__builtin_isinf(__a) || __builtin_isinf(__b)) &&
198 __builtin_isfinite(__c) && __builtin_isfinite(__d)) {
199 __a = __builtin_copysignf(__builtin_isinf(__a) ? 1 : 0, __a);
200 __b = __builtin_copysignf(__builtin_isinf(__b) ? 1 : 0, __b);
201 __real__(z) = __builtin_huge_valf() * (__a * __c + __b * __d);
202 __imag__(z) = __builtin_huge_valf() * (__b * __c - __a * __d);
203 } else if (__builtin_isinf(__logbw) && __logbw > 0 &&
204 __builtin_isfinite(__a) && __builtin_isfinite(__b)) {
205 __c = __builtin_copysignf(__builtin_isinf(__c) ? 1 : 0, __c);
206 __d = __builtin_copysignf(__builtin_isinf(__d) ? 1 : 0, __d);
207 __real__(z) = 0 * (__a * __c + __b * __d);
208 __imag__(z) = 0 * (__b * __c - __a * __d);
209 }
210 }
211 return z;
212}
213
214#if defined(__cplusplus)
215} // extern "C"
216#endif
217
218#undef _SCALBNd
219#undef _SCALBNf
220#undef _LOGBd
221#undef _LOGBf
222
223#if defined(__OPENMP_NVPTX__) || defined(__OPENMP_AMDGCN__)
224#pragma omp end declare target
225#endif
226
227#pragma pop_macro("__DEVICE__")
228
229#endif // __CLANG_CUDA_COMPLEX_BUILTINS
#define __DEVICE__
__DEVICE__ double _Complex __divdc3(double __a, double __b, double __c, double __d)
__DEVICE__ double _Complex __muldc3(double __a, double __b, double __c, double __d)
__DEVICE__ float _Complex __mulsc3(float __a, float __b, float __c, float __d)
__DEVICE__ float _Complex __divsc3(float __a, float __b, float __c, float __d)
static __inline__ vector float vector float vector float __c
Definition altivec.h:4800
static __inline__ vector float vector float __b
Definition altivec.h:578
static __inline__ void int __a
Definition emmintrin.h:4077