clang  14.0.0git
__clang_cuda_complex_builtins.h
Go to the documentation of this file.
1 /*===-- __clang_cuda_complex_builtins - CUDA impls of runtime complex fns ---===
2  *
3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  * See https://llvm.org/LICENSE.txt for license information.
5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  *
7  *===-----------------------------------------------------------------------===
8  */
9 
10 #ifndef __CLANG_CUDA_COMPLEX_BUILTINS
11 #define __CLANG_CUDA_COMPLEX_BUILTINS
12 
13 // This header defines __muldc3, __mulsc3, __divdc3, and __divsc3. These are
14 // libgcc functions that clang assumes are available when compiling c99 complex
15 // operations. (These implementations come from libc++, and have been modified
16 // to work with CUDA and OpenMP target offloading [in C and C++ mode].)
17 
18 #pragma push_macro("__DEVICE__")
19 #if defined(__OPENMP_NVPTX__) || defined(__OPENMP_AMDGCN__)
20 #pragma omp declare target
21 #define __DEVICE__ __attribute__((noinline, nothrow, cold, weak))
22 #else
23 #define __DEVICE__ __device__ inline
24 #endif
25 
26 // To make the algorithms available for C and C++ in CUDA and OpenMP we select
27 // different but equivalent function versions. TODO: For OpenMP we currently
28 // select the native builtins as the overload support for templates is lacking.
29 #if !defined(__OPENMP_NVPTX__) && !defined(__OPENMP_AMDGCN__)
30 #define _ISNANd std::isnan
31 #define _ISNANf std::isnan
32 #define _ISINFd std::isinf
33 #define _ISINFf std::isinf
34 #define _ISFINITEd std::isfinite
35 #define _ISFINITEf std::isfinite
36 #define _COPYSIGNd std::copysign
37 #define _COPYSIGNf std::copysign
38 #define _SCALBNd std::scalbn
39 #define _SCALBNf std::scalbn
40 #define _ABSd std::abs
41 #define _ABSf std::abs
42 #define _LOGBd std::logb
43 #define _LOGBf std::logb
44 // Rather than pulling in std::max from algorithm everytime, use available ::max.
45 #define _fmaxd max
46 #define _fmaxf max
47 #else
48 #ifdef __AMDGCN__
49 #define _ISNANd __ocml_isnan_f64
50 #define _ISNANf __ocml_isnan_f32
51 #define _ISINFd __ocml_isinf_f64
52 #define _ISINFf __ocml_isinf_f32
53 #define _ISFINITEd __ocml_isfinite_f64
54 #define _ISFINITEf __ocml_isfinite_f32
55 #define _COPYSIGNd __ocml_copysign_f64
56 #define _COPYSIGNf __ocml_copysign_f32
57 #define _SCALBNd __ocml_scalbn_f64
58 #define _SCALBNf __ocml_scalbn_f32
59 #define _ABSd __ocml_fabs_f64
60 #define _ABSf __ocml_fabs_f32
61 #define _LOGBd __ocml_logb_f64
62 #define _LOGBf __ocml_logb_f32
63 #define _fmaxd __ocml_fmax_f64
64 #define _fmaxf __ocml_fmax_f32
65 #else
66 #define _ISNANd __nv_isnand
67 #define _ISNANf __nv_isnanf
68 #define _ISINFd __nv_isinfd
69 #define _ISINFf __nv_isinff
70 #define _ISFINITEd __nv_isfinited
71 #define _ISFINITEf __nv_finitef
72 #define _COPYSIGNd __nv_copysign
73 #define _COPYSIGNf __nv_copysignf
74 #define _SCALBNd __nv_scalbn
75 #define _SCALBNf __nv_scalbnf
76 #define _ABSd __nv_fabs
77 #define _ABSf __nv_fabsf
78 #define _LOGBd __nv_logb
79 #define _LOGBf __nv_logbf
80 #define _fmaxd __nv_fmax
81 #define _fmaxf __nv_fmaxf
82 #endif
83 #endif
84 
85 #if defined(__cplusplus)
86 extern "C" {
87 #endif
88 
89 __DEVICE__ double _Complex __muldc3(double __a, double __b, double __c,
90  double __d) {
91  double __ac = __a * __c;
92  double __bd = __b * __d;
93  double __ad = __a * __d;
94  double __bc = __b * __c;
95  double _Complex z;
96  __real__(z) = __ac - __bd;
97  __imag__(z) = __ad + __bc;
98  if (_ISNANd(__real__(z)) && _ISNANd(__imag__(z))) {
99  int __recalc = 0;
100  if (_ISINFd(__a) || _ISINFd(__b)) {
101  __a = _COPYSIGNd(_ISINFd(__a) ? 1 : 0, __a);
102  __b = _COPYSIGNd(_ISINFd(__b) ? 1 : 0, __b);
103  if (_ISNANd(__c))
104  __c = _COPYSIGNd(0, __c);
105  if (_ISNANd(__d))
106  __d = _COPYSIGNd(0, __d);
107  __recalc = 1;
108  }
109  if (_ISINFd(__c) || _ISINFd(__d)) {
110  __c = _COPYSIGNd(_ISINFd(__c) ? 1 : 0, __c);
111  __d = _COPYSIGNd(_ISINFd(__d) ? 1 : 0, __d);
112  if (_ISNANd(__a))
113  __a = _COPYSIGNd(0, __a);
114  if (_ISNANd(__b))
115  __b = _COPYSIGNd(0, __b);
116  __recalc = 1;
117  }
118  if (!__recalc &&
119  (_ISINFd(__ac) || _ISINFd(__bd) || _ISINFd(__ad) || _ISINFd(__bc))) {
120  if (_ISNANd(__a))
121  __a = _COPYSIGNd(0, __a);
122  if (_ISNANd(__b))
123  __b = _COPYSIGNd(0, __b);
124  if (_ISNANd(__c))
125  __c = _COPYSIGNd(0, __c);
126  if (_ISNANd(__d))
127  __d = _COPYSIGNd(0, __d);
128  __recalc = 1;
129  }
130  if (__recalc) {
131  // Can't use std::numeric_limits<double>::infinity() -- that doesn't have
132  // a device overload (and isn't constexpr before C++11, naturally).
133  __real__(z) = __builtin_huge_val() * (__a * __c - __b * __d);
134  __imag__(z) = __builtin_huge_val() * (__a * __d + __b * __c);
135  }
136  }
137  return z;
138 }
139 
140 __DEVICE__ float _Complex __mulsc3(float __a, float __b, float __c, float __d) {
141  float __ac = __a * __c;
142  float __bd = __b * __d;
143  float __ad = __a * __d;
144  float __bc = __b * __c;
145  float _Complex z;
146  __real__(z) = __ac - __bd;
147  __imag__(z) = __ad + __bc;
148  if (_ISNANf(__real__(z)) && _ISNANf(__imag__(z))) {
149  int __recalc = 0;
150  if (_ISINFf(__a) || _ISINFf(__b)) {
151  __a = _COPYSIGNf(_ISINFf(__a) ? 1 : 0, __a);
152  __b = _COPYSIGNf(_ISINFf(__b) ? 1 : 0, __b);
153  if (_ISNANf(__c))
154  __c = _COPYSIGNf(0, __c);
155  if (_ISNANf(__d))
156  __d = _COPYSIGNf(0, __d);
157  __recalc = 1;
158  }
159  if (_ISINFf(__c) || _ISINFf(__d)) {
160  __c = _COPYSIGNf(_ISINFf(__c) ? 1 : 0, __c);
161  __d = _COPYSIGNf(_ISINFf(__d) ? 1 : 0, __d);
162  if (_ISNANf(__a))
163  __a = _COPYSIGNf(0, __a);
164  if (_ISNANf(__b))
165  __b = _COPYSIGNf(0, __b);
166  __recalc = 1;
167  }
168  if (!__recalc &&
169  (_ISINFf(__ac) || _ISINFf(__bd) || _ISINFf(__ad) || _ISINFf(__bc))) {
170  if (_ISNANf(__a))
171  __a = _COPYSIGNf(0, __a);
172  if (_ISNANf(__b))
173  __b = _COPYSIGNf(0, __b);
174  if (_ISNANf(__c))
175  __c = _COPYSIGNf(0, __c);
176  if (_ISNANf(__d))
177  __d = _COPYSIGNf(0, __d);
178  __recalc = 1;
179  }
180  if (__recalc) {
181  __real__(z) = __builtin_huge_valf() * (__a * __c - __b * __d);
182  __imag__(z) = __builtin_huge_valf() * (__a * __d + __b * __c);
183  }
184  }
185  return z;
186 }
187 
188 __DEVICE__ double _Complex __divdc3(double __a, double __b, double __c,
189  double __d) {
190  int __ilogbw = 0;
191  // Can't use std::max, because that's defined in <algorithm>, and we don't
192  // want to pull that in for every compile. The CUDA headers define
193  // ::max(float, float) and ::max(double, double), which is sufficient for us.
194  double __logbw = _LOGBd(_fmaxd(_ABSd(__c), _ABSd(__d)));
195  if (_ISFINITEd(__logbw)) {
196  __ilogbw = (int)__logbw;
197  __c = _SCALBNd(__c, -__ilogbw);
198  __d = _SCALBNd(__d, -__ilogbw);
199  }
200  double __denom = __c * __c + __d * __d;
201  double _Complex z;
202  __real__(z) = _SCALBNd((__a * __c + __b * __d) / __denom, -__ilogbw);
203  __imag__(z) = _SCALBNd((__b * __c - __a * __d) / __denom, -__ilogbw);
204  if (_ISNANd(__real__(z)) && _ISNANd(__imag__(z))) {
205  if ((__denom == 0.0) && (!_ISNANd(__a) || !_ISNANd(__b))) {
206  __real__(z) = _COPYSIGNd(__builtin_huge_val(), __c) * __a;
207  __imag__(z) = _COPYSIGNd(__builtin_huge_val(), __c) * __b;
208  } else if ((_ISINFd(__a) || _ISINFd(__b)) && _ISFINITEd(__c) &&
209  _ISFINITEd(__d)) {
210  __a = _COPYSIGNd(_ISINFd(__a) ? 1.0 : 0.0, __a);
211  __b = _COPYSIGNd(_ISINFd(__b) ? 1.0 : 0.0, __b);
212  __real__(z) = __builtin_huge_val() * (__a * __c + __b * __d);
213  __imag__(z) = __builtin_huge_val() * (__b * __c - __a * __d);
214  } else if (_ISINFd(__logbw) && __logbw > 0.0 && _ISFINITEd(__a) &&
215  _ISFINITEd(__b)) {
216  __c = _COPYSIGNd(_ISINFd(__c) ? 1.0 : 0.0, __c);
217  __d = _COPYSIGNd(_ISINFd(__d) ? 1.0 : 0.0, __d);
218  __real__(z) = 0.0 * (__a * __c + __b * __d);
219  __imag__(z) = 0.0 * (__b * __c - __a * __d);
220  }
221  }
222  return z;
223 }
224 
225 __DEVICE__ float _Complex __divsc3(float __a, float __b, float __c, float __d) {
226  int __ilogbw = 0;
227  float __logbw = _LOGBf(_fmaxf(_ABSf(__c), _ABSf(__d)));
228  if (_ISFINITEf(__logbw)) {
229  __ilogbw = (int)__logbw;
230  __c = _SCALBNf(__c, -__ilogbw);
231  __d = _SCALBNf(__d, -__ilogbw);
232  }
233  float __denom = __c * __c + __d * __d;
234  float _Complex z;
235  __real__(z) = _SCALBNf((__a * __c + __b * __d) / __denom, -__ilogbw);
236  __imag__(z) = _SCALBNf((__b * __c - __a * __d) / __denom, -__ilogbw);
237  if (_ISNANf(__real__(z)) && _ISNANf(__imag__(z))) {
238  if ((__denom == 0) && (!_ISNANf(__a) || !_ISNANf(__b))) {
239  __real__(z) = _COPYSIGNf(__builtin_huge_valf(), __c) * __a;
240  __imag__(z) = _COPYSIGNf(__builtin_huge_valf(), __c) * __b;
241  } else if ((_ISINFf(__a) || _ISINFf(__b)) && _ISFINITEf(__c) &&
242  _ISFINITEf(__d)) {
243  __a = _COPYSIGNf(_ISINFf(__a) ? 1 : 0, __a);
244  __b = _COPYSIGNf(_ISINFf(__b) ? 1 : 0, __b);
245  __real__(z) = __builtin_huge_valf() * (__a * __c + __b * __d);
246  __imag__(z) = __builtin_huge_valf() * (__b * __c - __a * __d);
247  } else if (_ISINFf(__logbw) && __logbw > 0 && _ISFINITEf(__a) &&
248  _ISFINITEf(__b)) {
249  __c = _COPYSIGNf(_ISINFf(__c) ? 1 : 0, __c);
250  __d = _COPYSIGNf(_ISINFf(__d) ? 1 : 0, __d);
251  __real__(z) = 0 * (__a * __c + __b * __d);
252  __imag__(z) = 0 * (__b * __c - __a * __d);
253  }
254  }
255  return z;
256 }
257 
258 #if defined(__cplusplus)
259 } // extern "C"
260 #endif
261 
262 #undef _ISNANd
263 #undef _ISNANf
264 #undef _ISINFd
265 #undef _ISINFf
266 #undef _COPYSIGNd
267 #undef _COPYSIGNf
268 #undef _ISFINITEd
269 #undef _ISFINITEf
270 #undef _SCALBNd
271 #undef _SCALBNf
272 #undef _ABSd
273 #undef _ABSf
274 #undef _LOGBd
275 #undef _LOGBf
276 #undef _fmaxd
277 #undef _fmaxf
278 
279 #if defined(__OPENMP_NVPTX__) || defined(__OPENMP_AMDGCN__)
280 #pragma omp end declare target
281 #endif
282 
283 #pragma pop_macro("__DEVICE__")
284 
285 #endif // __CLANG_CUDA_COMPLEX_BUILTINS
_COPYSIGNf
#define _COPYSIGNf
Definition: __clang_cuda_complex_builtins.h:37
_SCALBNf
#define _SCALBNf
Definition: __clang_cuda_complex_builtins.h:39
_ISINFd
#define _ISINFd
Definition: __clang_cuda_complex_builtins.h:32
int
__device__ int
Definition: __clang_hip_libdevice_declares.h:63
_ISINFf
#define _ISINFf
Definition: __clang_cuda_complex_builtins.h:33
__a
static __inline__ void int __a
Definition: emmintrin.h:4189
__DEVICE__
#define __DEVICE__
Definition: __clang_cuda_complex_builtins.h:23
__divsc3
__DEVICE__ float _Complex __divsc3(float __a, float __b, float __c, float __d)
Definition: __clang_cuda_complex_builtins.h:225
_ABSd
#define _ABSd
Definition: __clang_cuda_complex_builtins.h:40
_ISNANf
#define _ISNANf
Definition: __clang_cuda_complex_builtins.h:31
__divdc3
__DEVICE__ double _Complex __divdc3(double __a, double __b, double __c, double __d)
Definition: __clang_cuda_complex_builtins.h:188
_ISFINITEd
#define _ISFINITEd
Definition: __clang_cuda_complex_builtins.h:34
__muldc3
__DEVICE__ double _Complex __muldc3(double __a, double __b, double __c, double __d)
Definition: __clang_cuda_complex_builtins.h:89
__mulsc3
__DEVICE__ float _Complex __mulsc3(float __a, float __b, float __c, float __d)
Definition: __clang_cuda_complex_builtins.h:140
_LOGBd
#define _LOGBd
Definition: __clang_cuda_complex_builtins.h:42
_ISNANd
#define _ISNANd
Definition: __clang_cuda_complex_builtins.h:30
_ABSf
#define _ABSf
Definition: __clang_cuda_complex_builtins.h:41
_SCALBNd
#define _SCALBNd
Definition: __clang_cuda_complex_builtins.h:38
_fmaxd
#define _fmaxd
Definition: __clang_cuda_complex_builtins.h:45
_COPYSIGNd
#define _COPYSIGNd
Definition: __clang_cuda_complex_builtins.h:36
__b
static __inline__ vector float vector float __b
Definition: altivec.h:566
_ISFINITEf
#define _ISFINITEf
Definition: __clang_cuda_complex_builtins.h:35
_LOGBf
#define _LOGBf
Definition: __clang_cuda_complex_builtins.h:43
__c
static __inline__ vector float vector float vector float __c
Definition: altivec.h:4751
_fmaxf
#define _fmaxf
Definition: __clang_cuda_complex_builtins.h:46