clang 23.0.0git
__clang_cuda_complex_builtins.h
Go to the documentation of this file.
1/*===-- __clang_cuda_complex_builtins - CUDA impls of runtime complex fns ---===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __CLANG_CUDA_COMPLEX_BUILTINS
11#define __CLANG_CUDA_COMPLEX_BUILTINS
12
13// This header defines __muldc3, __mulsc3, __divdc3, and __divsc3. These are
14// libgcc functions that clang assumes are available when compiling c99 complex
15// operations. (These implementations come from libc++, and have been modified
16// to work with CUDA and OpenMP target offloading [in C and C++ mode].)
17
18#pragma push_macro("__DEVICE__")
19#if defined(__OPENMP_NVPTX__) || defined(__OPENMP_AMDGCN__) || \
20 defined(__OPENMP_SPIRV__)
21#pragma omp declare target
22#define __DEVICE__ __attribute__((noinline, nothrow, cold, weak))
23#else
24#define __DEVICE__ __device__ inline
25#endif
26
27#if defined(__NVPTX__)
28// FIXME: NVPTX should use generic builtins.
29#define _SCALBNd __nv_scalbn
30#define _SCALBNf __nv_scalbnf
31#define _LOGBd __nv_logb
32#define _LOGBf __nv_logbf
33#elif defined(__OPENMP_SPIRV__)
34#define _SCALBNd __spirv_ocl_ldexp
35#define _SCALBNf __spirv_ocl_ldexp
36#define _LOGBd __spirv_ocl_logb
37#define _LOGBf __spirv_ocl_logb
38#else
39#define _SCALBNd __builtin_scalbn
40#define _SCALBNf __builtin_scalbnf
41#define _LOGBd __builtin_logb
42#define _LOGBf __builtin_logbf
43#endif
44
45#if defined(__cplusplus)
46extern "C" {
47#endif
48
49__DEVICE__ double _Complex __muldc3(double __a, double __b, double __c,
50 double __d) {
51 double __ac = __a * __c;
52 double __bd = __b * __d;
53 double __ad = __a * __d;
54 double __bc = __b * __c;
55 double _Complex z;
56 __real__(z) = __ac - __bd;
57 __imag__(z) = __ad + __bc;
58 if (__builtin_isnan(__real__(z)) && __builtin_isnan(__imag__(z))) {
59 int __recalc = 0;
60 if (__builtin_isinf(__a) || __builtin_isinf(__b)) {
61 __a = __builtin_copysign(__builtin_isinf(__a) ? 1 : 0, __a);
62 __b = __builtin_copysign(__builtin_isinf(__b) ? 1 : 0, __b);
63 if (__builtin_isnan(__c))
64 __c = __builtin_copysign(0, __c);
65 if (__builtin_isnan(__d))
66 __d = __builtin_copysign(0, __d);
67 __recalc = 1;
68 }
69 if (__builtin_isinf(__c) || __builtin_isinf(__d)) {
70 __c = __builtin_copysign(__builtin_isinf(__c) ? 1 : 0, __c);
71 __d = __builtin_copysign(__builtin_isinf(__d) ? 1 : 0, __d);
72 if (__builtin_isnan(__a))
73 __a = __builtin_copysign(0, __a);
74 if (__builtin_isnan(__b))
75 __b = __builtin_copysign(0, __b);
76 __recalc = 1;
77 }
78 if (!__recalc && (__builtin_isinf(__ac) || __builtin_isinf(__bd) ||
79 __builtin_isinf(__ad) || __builtin_isinf(__bc))) {
80 if (__builtin_isnan(__a))
81 __a = __builtin_copysign(0, __a);
82 if (__builtin_isnan(__b))
83 __b = __builtin_copysign(0, __b);
84 if (__builtin_isnan(__c))
85 __c = __builtin_copysign(0, __c);
86 if (__builtin_isnan(__d))
87 __d = __builtin_copysign(0, __d);
88 __recalc = 1;
89 }
90 if (__recalc) {
91 // Can't use std::numeric_limits<double>::infinity() -- that doesn't have
92 // a device overload (and isn't constexpr before C++11, naturally).
93 __real__(z) = __builtin_huge_val() * (__a * __c - __b * __d);
94 __imag__(z) = __builtin_huge_val() * (__a * __d + __b * __c);
95 }
96 }
97 return z;
98}
99
100__DEVICE__ float _Complex __mulsc3(float __a, float __b, float __c, float __d) {
101 float __ac = __a * __c;
102 float __bd = __b * __d;
103 float __ad = __a * __d;
104 float __bc = __b * __c;
105 float _Complex z;
106 __real__(z) = __ac - __bd;
107 __imag__(z) = __ad + __bc;
108 if (__builtin_isnan(__real__(z)) && __builtin_isnan(__imag__(z))) {
109 int __recalc = 0;
110 if (__builtin_isinf(__a) || __builtin_isinf(__b)) {
111 __a = __builtin_copysignf(__builtin_isinf(__a) ? 1 : 0, __a);
112 __b = __builtin_copysignf(__builtin_isinf(__b) ? 1 : 0, __b);
113 if (__builtin_isnan(__c))
114 __c = __builtin_copysignf(0, __c);
115 if (__builtin_isnan(__d))
116 __d = __builtin_copysignf(0, __d);
117 __recalc = 1;
118 }
119 if (__builtin_isinf(__c) || __builtin_isinf(__d)) {
120 __c = __builtin_copysignf(__builtin_isinf(__c) ? 1 : 0, __c);
121 __d = __builtin_copysignf(__builtin_isinf(__d) ? 1 : 0, __d);
122 if (__builtin_isnan(__a))
123 __a = __builtin_copysignf(0, __a);
124 if (__builtin_isnan(__b))
125 __b = __builtin_copysignf(0, __b);
126 __recalc = 1;
127 }
128 if (!__recalc && (__builtin_isinf(__ac) || __builtin_isinf(__bd) ||
129 __builtin_isinf(__ad) || __builtin_isinf(__bc))) {
130 if (__builtin_isnan(__a))
131 __a = __builtin_copysignf(0, __a);
132 if (__builtin_isnan(__b))
133 __b = __builtin_copysignf(0, __b);
134 if (__builtin_isnan(__c))
135 __c = __builtin_copysignf(0, __c);
136 if (__builtin_isnan(__d))
137 __d = __builtin_copysignf(0, __d);
138 __recalc = 1;
139 }
140 if (__recalc) {
141 __real__(z) = __builtin_huge_valf() * (__a * __c - __b * __d);
142 __imag__(z) = __builtin_huge_valf() * (__a * __d + __b * __c);
143 }
144 }
145 return z;
146}
147
148__DEVICE__ double _Complex __divdc3(double __a, double __b, double __c,
149 double __d) {
150 int __ilogbw = 0;
151 // Can't use std::max, because that's defined in <algorithm>, and we don't
152 // want to pull that in for every compile. The CUDA headers define
153 // ::max(float, float) and ::max(double, double), which is sufficient for us.
154 double __logbw =
155 _LOGBd(__builtin_fmax(__builtin_fabs(__c), __builtin_fabs(__d)));
156 if (__builtin_isfinite(__logbw)) {
157 __ilogbw = (int)__logbw;
158 __c = _SCALBNd(__c, -__ilogbw);
159 __d = _SCALBNd(__d, -__ilogbw);
160 }
161 double __denom = __c * __c + __d * __d;
162 double _Complex z;
163 __real__(z) = _SCALBNd((__a * __c + __b * __d) / __denom, -__ilogbw);
164 __imag__(z) = _SCALBNd((__b * __c - __a * __d) / __denom, -__ilogbw);
165 if (__builtin_isnan(__real__(z)) && __builtin_isnan(__imag__(z))) {
166 if ((__denom == 0.0) && (!__builtin_isnan(__a) || !__builtin_isnan(__b))) {
167 __real__(z) = __builtin_copysign(__builtin_huge_val(), __c) * __a;
168 __imag__(z) = __builtin_copysign(__builtin_huge_val(), __c) * __b;
169 } else if ((__builtin_isinf(__a) || __builtin_isinf(__b)) &&
170 __builtin_isfinite(__c) && __builtin_isfinite(__d)) {
171 __a = __builtin_copysign(__builtin_isinf(__a) ? 1.0 : 0.0, __a);
172 __b = __builtin_copysign(__builtin_isinf(__b) ? 1.0 : 0.0, __b);
173 __real__(z) = __builtin_huge_val() * (__a * __c + __b * __d);
174 __imag__(z) = __builtin_huge_val() * (__b * __c - __a * __d);
175 } else if (__builtin_isinf(__logbw) && __logbw > 0.0 &&
176 __builtin_isfinite(__a) && __builtin_isfinite(__b)) {
177 __c = __builtin_copysign(__builtin_isinf(__c) ? 1.0 : 0.0, __c);
178 __d = __builtin_copysign(__builtin_isinf(__d) ? 1.0 : 0.0, __d);
179 __real__(z) = 0.0 * (__a * __c + __b * __d);
180 __imag__(z) = 0.0 * (__b * __c - __a * __d);
181 }
182 }
183 return z;
184}
185
186__DEVICE__ float _Complex __divsc3(float __a, float __b, float __c, float __d) {
187 int __ilogbw = 0;
188 float __logbw =
189 _LOGBf(__builtin_fmaxf(__builtin_fabsf(__c), __builtin_fabsf(__d)));
190 if (__builtin_isfinite(__logbw)) {
191 __ilogbw = (int)__logbw;
192 __c = _SCALBNf(__c, -__ilogbw);
193 __d = _SCALBNf(__d, -__ilogbw);
194 }
195 float __denom = __c * __c + __d * __d;
196 float _Complex z;
197 __real__(z) = _SCALBNf((__a * __c + __b * __d) / __denom, -__ilogbw);
198 __imag__(z) = _SCALBNf((__b * __c - __a * __d) / __denom, -__ilogbw);
199 if (__builtin_isnan(__real__(z)) && __builtin_isnan(__imag__(z))) {
200 if ((__denom == 0) && (!__builtin_isnan(__a) || !__builtin_isnan(__b))) {
201 __real__(z) = __builtin_copysignf(__builtin_huge_valf(), __c) * __a;
202 __imag__(z) = __builtin_copysignf(__builtin_huge_valf(), __c) * __b;
203 } else if ((__builtin_isinf(__a) || __builtin_isinf(__b)) &&
204 __builtin_isfinite(__c) && __builtin_isfinite(__d)) {
205 __a = __builtin_copysignf(__builtin_isinf(__a) ? 1 : 0, __a);
206 __b = __builtin_copysignf(__builtin_isinf(__b) ? 1 : 0, __b);
207 __real__(z) = __builtin_huge_valf() * (__a * __c + __b * __d);
208 __imag__(z) = __builtin_huge_valf() * (__b * __c - __a * __d);
209 } else if (__builtin_isinf(__logbw) && __logbw > 0 &&
210 __builtin_isfinite(__a) && __builtin_isfinite(__b)) {
211 __c = __builtin_copysignf(__builtin_isinf(__c) ? 1 : 0, __c);
212 __d = __builtin_copysignf(__builtin_isinf(__d) ? 1 : 0, __d);
213 __real__(z) = 0 * (__a * __c + __b * __d);
214 __imag__(z) = 0 * (__b * __c - __a * __d);
215 }
216 }
217 return z;
218}
219
220#if defined(__cplusplus)
221} // extern "C"
222#endif
223
224#undef _SCALBNd
225#undef _SCALBNf
226#undef _LOGBd
227#undef _LOGBf
228
229#if defined(__OPENMP_NVPTX__) || defined(__OPENMP_AMDGCN__) || \
230 defined(__OPENMP_SPIRV__)
231#pragma omp end declare target
232#endif
233
234#pragma pop_macro("__DEVICE__")
235
236#endif // __CLANG_CUDA_COMPLEX_BUILTINS
#define __DEVICE__
__DEVICE__ double _Complex __divdc3(double __a, double __b, double __c, double __d)
__DEVICE__ double _Complex __muldc3(double __a, double __b, double __c, double __d)
__DEVICE__ float _Complex __mulsc3(float __a, float __b, float __c, float __d)
__DEVICE__ float _Complex __divsc3(float __a, float __b, float __c, float __d)
static __inline__ vector float vector float vector float __c
Definition altivec.h:4800
static __inline__ vector float vector float __b
Definition altivec.h:578
static __inline__ void int __a
Definition emmintrin.h:4077