clang 23.0.0git
SemaAMDGPU.cpp
Go to the documentation of this file.
1//===------ SemaAMDGPU.cpp ------- AMDGPU target-specific routines --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements semantic analysis functions specific to AMDGPU.
10//
11//===----------------------------------------------------------------------===//
12
14#include "clang/AST/Decl.h"
16#include "clang/AST/Expr.h"
22#include "clang/Sema/Scope.h"
23#include "clang/Sema/Sema.h"
24#include "llvm/ADT/SmallVector.h"
25#include "llvm/ADT/StringExtras.h"
26#include "llvm/ADT/StringMap.h"
27#include "llvm/Support/AMDGPUAddrSpace.h"
28#include "llvm/Support/AtomicOrdering.h"
29#include "llvm/TargetParser/AMDGPUTargetParser.h"
30#include <cstdint>
31#include <utility>
32
33namespace clang {
34
36
38 CallExpr *TheCall) {
39 // position of memory order and scope arguments in the builtin
40 unsigned OrderIndex, ScopeIndex;
41
42 const auto *FD = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true);
43 assert(FD && "AMDGPU builtins should not be used outside of a function");
44 llvm::StringMap<bool> CallerFeatureMap;
45 getASTContext().getFunctionFeatureMap(CallerFeatureMap, FD);
46 bool HasGFX950Insts =
47 Builtin::evaluateRequiredTargetFeatures("gfx950-insts", CallerFeatureMap);
48
49 switch (BuiltinID) {
50 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_load_lds:
51 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_load_async_lds:
52 case AMDGPU::BI__builtin_amdgcn_struct_ptr_buffer_load_lds:
53 case AMDGPU::BI__builtin_amdgcn_struct_ptr_buffer_load_async_lds:
54 case AMDGPU::BI__builtin_amdgcn_load_to_lds:
55 case AMDGPU::BI__builtin_amdgcn_load_async_to_lds:
56 case AMDGPU::BI__builtin_amdgcn_global_load_lds:
57 case AMDGPU::BI__builtin_amdgcn_global_load_async_lds: {
58 constexpr const int SizeIdx = 2;
59 llvm::APSInt Size;
60 Expr *ArgExpr = TheCall->getArg(SizeIdx);
61 // Check for instantiation-dependent expressions (e.g., involving template
62 // parameters). These will be checked again during template instantiation.
63 if (ArgExpr->isInstantiationDependent())
64 return false;
65 [[maybe_unused]] ExprResult R =
66 SemaRef.VerifyIntegerConstantExpression(ArgExpr, &Size);
67 assert(!R.isInvalid());
68 switch (Size.getSExtValue()) {
69 case 1:
70 case 2:
71 case 4:
72 return false;
73 case 12:
74 case 16: {
75 if (HasGFX950Insts)
76 return false;
77 [[fallthrough]];
78 }
79 default:
80 SemaRef.targetDiag(ArgExpr->getExprLoc(),
81 diag::err_amdgcn_load_lds_size_invalid_value)
82 << ArgExpr->getSourceRange();
83 SemaRef.targetDiag(ArgExpr->getExprLoc(),
84 diag::note_amdgcn_load_lds_size_valid_value)
85 << HasGFX950Insts << ArgExpr->getSourceRange();
86 return true;
87 }
88 }
89 case AMDGPU::BI__builtin_amdgcn_get_fpenv:
90 case AMDGPU::BI__builtin_amdgcn_set_fpenv:
91 return false;
92 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
93 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
94 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
95 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
96 OrderIndex = 2;
97 ScopeIndex = 3;
98 break;
99 case AMDGPU::BI__builtin_amdgcn_fence:
100 OrderIndex = 0;
101 ScopeIndex = 1;
102 break;
103 case AMDGPU::BI__builtin_amdgcn_s_setreg:
104 return SemaRef.BuiltinConstantArgRange(TheCall, /*ArgNum=*/0, /*Low=*/0,
105 /*High=*/UINT16_MAX);
106 case AMDGPU::BI__builtin_amdgcn_s_wait_event: {
107 llvm::APSInt Result;
108 if (SemaRef.BuiltinConstantArg(TheCall, 0, Result))
109 return true;
110
112 "gfx12-insts", CallerFeatureMap);
113
114 // gfx11 -> gfx12 changed the interpretation of the bitmask. gfx12 inverted
115 // the intepretation for export_ready, but shifted the used bit by 1. Thus
116 // waiting for the export_ready event can use a value of 2 universally.
117 if (((IsGFX12Plus && !Result[1]) || (!IsGFX12Plus && Result[0])) ||
118 Result.getZExtValue() > 2) {
119 Expr *ArgExpr = TheCall->getArg(0);
120 SemaRef.targetDiag(ArgExpr->getExprLoc(),
121 diag::warn_amdgpu_s_wait_event_mask_no_effect_target)
122 << ArgExpr->getSourceRange();
123 SemaRef.targetDiag(ArgExpr->getExprLoc(),
124 diag::note_amdgpu_s_wait_event_suggested_value)
125 << ArgExpr->getSourceRange();
126 }
127
128 return false;
129 }
130 case AMDGPU::BI__builtin_amdgcn_mov_dpp:
131 return checkMovDPPFunctionCall(TheCall, 5, 1);
132 case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
133 return checkMovDPPFunctionCall(TheCall, 2, 1);
134 case AMDGPU::BI__builtin_amdgcn_update_dpp:
135 return checkMovDPPFunctionCall(TheCall, 6, 2);
136 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_f16_fp8:
137 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_bf16_fp8:
138 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_f16_bf8:
139 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_bf16_bf8:
140 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_f16_fp4:
141 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_bf16_fp4:
142 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_f32_fp8:
143 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_f32_bf8:
144 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_f32_fp4:
145 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_f16_fp6:
146 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_bf16_fp6:
147 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_f16_bf6:
148 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_bf16_bf6:
149 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_f32_fp6:
150 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_f32_bf6:
151 return SemaRef.BuiltinConstantArgRange(TheCall, 2, 0, 15);
152 case AMDGPU::BI__builtin_amdgcn_av_load_b128:
153 return checkAVLoadStore(TheCall, /*IsStore=*/false);
154 case AMDGPU::BI__builtin_amdgcn_av_store_b128:
155 return checkAVLoadStore(TheCall, /*IsStore=*/true);
156 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B:
157 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B:
158 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B:
159 return checkCoopAtomicFunctionCall(TheCall, /*IsStore=*/false);
160 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_32x4B:
161 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B:
162 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B:
163 return checkCoopAtomicFunctionCall(TheCall, /*IsStore=*/true);
164 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b32:
165 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b64:
166 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b128:
167 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b32:
168 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b64:
169 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b128:
170 return checkAtomicMonitorLoad(TheCall);
171 case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32:
172 case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32:
173 case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32:
174 case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32:
175 case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32:
176 case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32:
177 case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32:
178 case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32:
179 case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32:
180 case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32:
181 case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32:
182 case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32:
183 case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32:
184 case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32:
185 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32:
186 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32:
187 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32:
188 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32:
189 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32:
190 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32:
191 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32:
192 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32:
193 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32:
194 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32:
195 case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32:
196 case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32:
197 case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:
198 case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32:
199 case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f32_f32:
200 case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f32_f32:
201 case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f16_f32:
202 case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f16_f32:
203 case AMDGPU::BI__builtin_amdgcn_image_sample_2d_f32_f32:
204 case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f32_f32:
205 case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f16_f32:
206 case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_f32_f32:
207 case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f32_f32:
208 case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f16_f32:
209 case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f32_f32:
210 case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f16_f32:
211 case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f32_f32:
212 case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32:
213 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f32_f32:
214 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f16_f32:
215 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f32_f32:
216 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f16_f32:
217 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_f32_f32:
218 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f32_f32:
219 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f16_f32:
220 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_f32_f32:
221 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f32_f32:
222 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f16_f32:
223 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f32_f32:
224 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f16_f32:
225 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f32_f32:
226 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f16_f32:
227 case AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f32_f32:
228 case AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f16_f32:
229 case AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f32_f32:
230 case AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f16_f32:
231 case AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_f32_f32:
232 case AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f16_f32:
233 case AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f32_f32:
234 case AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_f32_f32:
235 case AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f32_f32:
236 case AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f16_f32:
237 case AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f32_f32:
238 case AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f16_f32:
239 case AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f32_f32:
240 case AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f16_f32:
241 case AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f32_f32:
242 case AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f16_f32:
243 case AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f32_f32:
244 case AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f16_f32:
245 case AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_f32_f32:
246 case AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f32_f32:
247 case AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f16_f32:
248 case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_f32_f32:
249 case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f32_f32:
250 case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f16_f32:
251 case AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f32_f32:
252 case AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f16_f32:
253 case AMDGPU::BI__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32: {
254 StringRef FeatureList(
255 getASTContext().BuiltinInfo.getRequiredFeatures(BuiltinID));
257 CallerFeatureMap)) {
258 Diag(TheCall->getBeginLoc(), diag::err_builtin_needs_feature)
259 << FD->getDeclName() << FeatureList;
260 return false;
261 }
262
263 unsigned ArgCount = TheCall->getNumArgs() - 1;
264 llvm::APSInt Result;
265
266 // Compilain about dmask values which are too huge to fully fit into 4 bits
267 // (which is the actual size of the dmask in corresponding HW instructions).
268 constexpr unsigned DMaskArgNo = 0;
269 constexpr int Low = 0;
270 constexpr int High = 15;
271 if (SemaRef.BuiltinConstantArg(TheCall, DMaskArgNo, Result) ||
272 SemaRef.BuiltinConstantArgRange(TheCall, DMaskArgNo, Low, High,
273 /* RangeIsError = */ true))
274 return true;
275
276 // Dmask indicates which elements should be returned and it is not possible
277 // to return more values than there are elements in return type.
278 int NumElementsInRetTy = 1;
279 const Type *RetTy = TheCall->getType().getTypePtr();
280 if (auto *VTy = dyn_cast<VectorType>(RetTy))
281 NumElementsInRetTy = VTy->getNumElements();
282 int NumActiveBitsInDMask =
283 llvm::popcount(static_cast<uint8_t>(Result.getExtValue()));
284 if (NumActiveBitsInDMask > NumElementsInRetTy) {
285 Diag(TheCall->getBeginLoc(),
286 diag::err_amdgcn_dmask_has_too_many_bits_set);
287 return true;
288 }
289
290 // For gather, only one bit can be set indicating which exact component to
291 // return.
292 bool ExtraGatherChecks =
293 BuiltinID == AMDGPU::BI__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32 &&
294 SemaRef.BuiltinConstantArgPower2(TheCall, 0);
295
296 return ExtraGatherChecks ||
297 (SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result)) ||
298 (SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result));
299 }
300 case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f32_i32:
301 case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f32_i32:
302 case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f16_i32:
303 case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f16_i32:
304 case AMDGPU::BI__builtin_amdgcn_image_store_2d_f32_i32:
305 case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f32_i32:
306 case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f16_i32:
307 case AMDGPU::BI__builtin_amdgcn_image_store_2darray_f32_i32:
308 case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f32_i32:
309 case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f16_i32:
310 case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f32_i32:
311 case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f16_i32:
312 case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f32_i32:
313 case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f16_i32:
314 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f32_i32:
315 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f16_i32:
316 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f32_i32:
317 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f16_i32:
318 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_f32_i32:
319 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f32_i32:
320 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f16_i32:
321 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_f32_i32:
322 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f32_i32:
323 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f16_i32:
324 case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f32_i32:
325 case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f16_i32:
326 case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f32_i32:
327 case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32: {
328 StringRef FeatureList(
329 getASTContext().BuiltinInfo.getRequiredFeatures(BuiltinID));
331 CallerFeatureMap)) {
332 Diag(TheCall->getBeginLoc(), diag::err_builtin_needs_feature)
333 << FD->getDeclName() << FeatureList;
334 return false;
335 }
336
337 unsigned ArgCount = TheCall->getNumArgs() - 1;
338 llvm::APSInt Result;
339
340 // Complain about dmask values which are too huge to fully fit into 4 bits
341 // (which is the actual size of the dmask in corresponding HW instructions).
342 constexpr unsigned DMaskArgNo = 1;
343 return SemaRef.BuiltinConstantArgRange(TheCall, DMaskArgNo, /*Low=*/0,
344 /*High=*/15,
345 /*RangeIsError=*/true) ||
346 SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result) ||
347 SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result);
348 }
349 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8:
350 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8: {
351 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8) {
352 if (SemaRef.checkArgCountRange(TheCall, 7, 8))
353 return true;
354 if (TheCall->getNumArgs() == 7)
355 return false;
356 } else if (BuiltinID ==
357 AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8) {
358 if (SemaRef.checkArgCountRange(TheCall, 8, 9))
359 return true;
360 if (TheCall->getNumArgs() == 8)
361 return false;
362 }
363 // Check if the last argument (clamp operand) is a constant and is
364 // convertible to bool.
365 Expr *ClampArg = TheCall->getArg(TheCall->getNumArgs() - 1);
366 // 1) Ensure clamp argument is a constant expression
367 llvm::APSInt ClampValue;
368 if (!SemaRef.VerifyIntegerConstantExpression(ClampArg, &ClampValue)
369 .isUsable())
370 return true;
371 // 2) Check if the argument can be converted to bool type
372 if (!SemaRef.Context.hasSameType(ClampArg->getType(),
373 SemaRef.Context.BoolTy)) {
374 // Try to convert to bool
375 QualType BoolTy = SemaRef.Context.BoolTy;
376 ExprResult ClampExpr(ClampArg);
377 SemaRef.CheckSingleAssignmentConstraints(BoolTy, ClampExpr);
378 if (ClampExpr.isInvalid())
379 return true;
380 }
381 return false;
382 }
383 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_bf16:
384 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x4_f32:
385 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_f16:
386 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x32_f16:
387 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x32_bf16:
388 case AMDGPU::BI__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16:
389 return SemaRef.BuiltinConstantArgRange(TheCall, /*ArgNum=*/0, /*Low=*/0,
390 /*High=*/0) ||
391 SemaRef.BuiltinConstantArgRange(TheCall, /*ArgNum=*/2, /*Low=*/0,
392 /*High=*/0);
393 default:
394 return false;
395 }
396
397 ExprResult Arg = TheCall->getArg(OrderIndex);
398 auto ArgExpr = Arg.get();
399 Expr::EvalResult ArgResult;
400
401 if (!ArgExpr->EvaluateAsInt(ArgResult, getASTContext()))
402 return Diag(ArgExpr->getExprLoc(), diag::err_typecheck_expect_int)
403 << ArgExpr->getType();
404 auto Ord = ArgResult.Val.getInt().getZExtValue();
405
406 // Check validity of memory ordering as per C11 / C++11's memory model.
407 // Only fence needs check. Atomic dec/inc allow all memory orders.
408 if (!llvm::isValidAtomicOrderingCABI(Ord))
409 return Diag(ArgExpr->getBeginLoc(),
410 diag::warn_atomic_op_has_invalid_memory_order)
411 << 0 << ArgExpr->getSourceRange();
412 switch (static_cast<llvm::AtomicOrderingCABI>(Ord)) {
413 case llvm::AtomicOrderingCABI::relaxed:
414 case llvm::AtomicOrderingCABI::consume:
415 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_fence)
416 return Diag(ArgExpr->getBeginLoc(),
417 diag::warn_atomic_op_has_invalid_memory_order)
418 << 0 << ArgExpr->getSourceRange();
419 break;
420 case llvm::AtomicOrderingCABI::acquire:
421 case llvm::AtomicOrderingCABI::release:
422 case llvm::AtomicOrderingCABI::acq_rel:
423 case llvm::AtomicOrderingCABI::seq_cst:
424 break;
425 }
426
427 Arg = TheCall->getArg(ScopeIndex);
428 ArgExpr = Arg.get();
429 Expr::EvalResult ArgResult1;
430 // Check that sync scope is a constant literal
431 if (!ArgExpr->EvaluateAsConstantExpr(ArgResult1, getASTContext()))
432 return Diag(ArgExpr->getExprLoc(), diag::err_expr_not_string_literal)
433 << ArgExpr->getType();
434
435 return false;
436}
437
439 bool MayStore) {
440 Expr::EvalResult AtomicOrdArgRes;
441 if (!E->EvaluateAsInt(AtomicOrdArgRes, getASTContext()))
442 llvm_unreachable("Intrinsic requires imm for atomic ordering argument!");
443 auto Ord =
444 llvm::AtomicOrderingCABI(AtomicOrdArgRes.Val.getInt().getZExtValue());
445
446 // Atomic ordering cannot be acq_rel in any case, acquire for stores or
447 // release for loads.
448 if (!llvm::isValidAtomicOrderingCABI((unsigned)Ord) ||
449 (!(MayLoad && MayStore) && (Ord == llvm::AtomicOrderingCABI::acq_rel)) ||
450 (!MayLoad && Ord == llvm::AtomicOrderingCABI::acquire) ||
451 (!MayStore && Ord == llvm::AtomicOrderingCABI::release)) {
452 return Diag(E->getBeginLoc(), diag::warn_atomic_op_has_invalid_memory_order)
453 << 0 << E->getSourceRange();
454 }
455
456 return false;
457}
458
459// Check that the first argument to TheCall is a global or generic pointer.
461 Expr *PtrArg = TheCall->getArg(0);
462 QualType PtrTy = PtrArg->getType()->getPointeeType();
463 unsigned AS =
464 S.getASTContext().getTargetAddressSpace(PtrTy.getAddressSpace());
465 if (AS != llvm::AMDGPUAS::FLAT_ADDRESS &&
466 AS != llvm::AMDGPUAS::GLOBAL_ADDRESS) {
467 return S.Diag(TheCall->getBeginLoc(),
468 diag::err_amdgcn_global_or_flat_pointer_required)
469 << PtrArg->getSourceRange();
470 }
471 return false;
472}
473
475 if (Scope->isValueDependent())
476 return false;
478 if (std::optional<llvm::APSInt> Result =
479 Scope->getIntegerConstantExpr(S.SemaRef.Context)) {
480 if (!ScopeModel->isValid(Result->getZExtValue())) {
481 return S.Diag(Scope->getBeginLoc(),
482 diag::err_atomic_op_has_invalid_sync_scope)
483 << Scope->getSourceRange();
484 }
485 }
486 return false;
487}
488
489bool SemaAMDGPU::checkAVLoadStore(CallExpr *TheCall, bool IsStore) {
490 if (checkGlobalOrFlatPointerArg(*this, TheCall))
491 return true;
492
493 Expr *Scope = TheCall->getArg(TheCall->getNumArgs() - 1);
494 return checkScopeAsInt(*this, Scope);
495}
496
498 bool Fail = checkGlobalOrFlatPointerArg(*this, TheCall);
499
500 Expr *AO = TheCall->getArg(IsStore ? 2 : 1);
501 Expr *Scope = TheCall->getArg(TheCall->getNumArgs() - 1);
502
503 if (AO->isValueDependent() || Scope->isValueDependent())
504 return false;
505
506 // Check atomic ordering
507 Fail |=
508 checkAtomicOrderingCABIArg(TheCall->getArg(IsStore ? 2 : 1),
509 /*MayLoad=*/!IsStore, /*MayStore=*/IsStore);
510
511 // Last argument is the syncscope as a string literal.
512 if (!isa<StringLiteral>(Scope->IgnoreParenImpCasts())) {
513 Diag(TheCall->getBeginLoc(), diag::err_expr_not_string_literal)
514 << Scope->getSourceRange();
515 Fail = true;
516 }
517
518 return Fail;
519}
520
522 Expr *AO = TheCall->getArg(1);
523 Expr *Scope = TheCall->getArg(TheCall->getNumArgs() - 1);
524
525 if (AO->isValueDependent() || Scope->isValueDependent())
526 return false;
527
528 bool Fail = checkAtomicOrderingCABIArg(AO, /*MayLoad=*/true,
529 /*MayStore=*/false);
530 Fail |= checkScopeAsInt(*this, Scope);
531 return Fail;
532}
533
534bool SemaAMDGPU::checkMovDPPFunctionCall(CallExpr *TheCall, unsigned NumArgs,
535 unsigned NumDataArgs) {
536 assert(NumDataArgs <= 2);
537 if (SemaRef.checkArgCountRange(TheCall, NumArgs, NumArgs))
538 return true;
539 Expr *Args[2];
540 QualType ArgTys[2];
541 for (unsigned I = 0; I != NumDataArgs; ++I) {
542 Args[I] = TheCall->getArg(I);
543 ArgTys[I] = Args[I]->getType();
544 // TODO: Vectors can also be supported.
545 if (!ArgTys[I]->isArithmeticType() || ArgTys[I]->isAnyComplexType()) {
546 SemaRef.Diag(Args[I]->getBeginLoc(),
547 diag::err_typecheck_cond_expect_int_float)
548 << ArgTys[I] << Args[I]->getSourceRange();
549 return true;
550 }
551 }
552 if (NumDataArgs < 2)
553 return false;
554
555 if (getASTContext().hasSameUnqualifiedType(ArgTys[0], ArgTys[1]))
556 return false;
557
558 if (((ArgTys[0]->isUnsignedIntegerType() &&
559 ArgTys[1]->isSignedIntegerType()) ||
560 (ArgTys[0]->isSignedIntegerType() &&
561 ArgTys[1]->isUnsignedIntegerType())) &&
562 getASTContext().getTypeSize(ArgTys[0]) ==
563 getASTContext().getTypeSize(ArgTys[1]))
564 return false;
565
566 SemaRef.Diag(Args[1]->getBeginLoc(),
567 diag::err_typecheck_call_different_arg_types)
568 << ArgTys[0] << ArgTys[1];
569 return true;
570}
571
572static bool
574 const AMDGPUFlatWorkGroupSizeAttr &Attr) {
575 // Accept template arguments for now as they depend on something else.
576 // We'll get to check them when they eventually get instantiated.
577 if (MinExpr->isValueDependent() || MaxExpr->isValueDependent())
578 return false;
579
580 uint32_t Min = 0;
581 if (!S.checkUInt32Argument(Attr, MinExpr, Min, 0))
582 return true;
583
584 uint32_t Max = 0;
585 if (!S.checkUInt32Argument(Attr, MaxExpr, Max, 1))
586 return true;
587
588 if (Min == 0 && Max != 0) {
589 S.Diag(Attr.getLocation(), diag::err_attribute_argument_invalid)
590 << &Attr << 0;
591 return true;
592 }
593 if (Min > Max) {
594 S.Diag(Attr.getLocation(), diag::err_attribute_argument_invalid)
595 << &Attr << 1;
596 return true;
597 }
598
599 return false;
600}
601
602AMDGPUFlatWorkGroupSizeAttr *
604 Expr *MinExpr, Expr *MaxExpr) {
605 ASTContext &Context = getASTContext();
606 AMDGPUFlatWorkGroupSizeAttr TmpAttr(Context, CI, MinExpr, MaxExpr);
607
608 if (checkAMDGPUFlatWorkGroupSizeArguments(SemaRef, MinExpr, MaxExpr, TmpAttr))
609 return nullptr;
610 return ::new (Context)
611 AMDGPUFlatWorkGroupSizeAttr(Context, CI, MinExpr, MaxExpr);
612}
613
615 const AttributeCommonInfo &CI,
616 Expr *MinExpr, Expr *MaxExpr) {
617 if (auto *Attr = CreateAMDGPUFlatWorkGroupSizeAttr(CI, MinExpr, MaxExpr))
618 D->addAttr(Attr);
619}
620
622 const ParsedAttr &AL) {
623 Expr *MinExpr = AL.getArgAsExpr(0);
624 Expr *MaxExpr = AL.getArgAsExpr(1);
625
626 addAMDGPUFlatWorkGroupSizeAttr(D, AL, MinExpr, MaxExpr);
627}
628
629static bool checkAMDGPUWavesPerEUArguments(Sema &S, Expr *MinExpr,
630 Expr *MaxExpr,
631 const AMDGPUWavesPerEUAttr &Attr) {
632 if (S.DiagnoseUnexpandedParameterPack(MinExpr) ||
633 (MaxExpr && S.DiagnoseUnexpandedParameterPack(MaxExpr)))
634 return true;
635
636 // Accept template arguments for now as they depend on something else.
637 // We'll get to check them when they eventually get instantiated.
638 if (MinExpr->isValueDependent() || (MaxExpr && MaxExpr->isValueDependent()))
639 return false;
640
641 uint32_t Min = 0;
642 if (!S.checkUInt32Argument(Attr, MinExpr, Min, 0))
643 return true;
644
645 uint32_t Max = 0;
646 if (MaxExpr && !S.checkUInt32Argument(Attr, MaxExpr, Max, 1))
647 return true;
648
649 if (Min == 0 && Max != 0) {
650 S.Diag(Attr.getLocation(), diag::err_attribute_argument_invalid)
651 << &Attr << 0;
652 return true;
653 }
654 if (Max != 0 && Min > Max) {
655 S.Diag(Attr.getLocation(), diag::err_attribute_argument_invalid)
656 << &Attr << 1;
657 return true;
658 }
659
660 return false;
661}
662
663AMDGPUWavesPerEUAttr *
665 Expr *MinExpr, Expr *MaxExpr) {
666 ASTContext &Context = getASTContext();
667 AMDGPUWavesPerEUAttr TmpAttr(Context, CI, MinExpr, MaxExpr);
668
669 if (checkAMDGPUWavesPerEUArguments(SemaRef, MinExpr, MaxExpr, TmpAttr))
670 return nullptr;
671
672 return ::new (Context) AMDGPUWavesPerEUAttr(Context, CI, MinExpr, MaxExpr);
673}
674
676 Expr *MinExpr, Expr *MaxExpr) {
677 if (auto *Attr = CreateAMDGPUWavesPerEUAttr(CI, MinExpr, MaxExpr))
678 D->addAttr(Attr);
679}
680
683 return;
684
685 Expr *MinExpr = AL.getArgAsExpr(0);
686 Expr *MaxExpr = (AL.getNumArgs() > 1) ? AL.getArgAsExpr(1) : nullptr;
687
688 addAMDGPUWavesPerEUAttr(D, AL, MinExpr, MaxExpr);
689}
690
692 Diag(AL.getLoc(), diag::warn_amdgpu_num_reg_attr_deprecated) << AL;
693
694 uint32_t NumSGPR = 0;
695 Expr *NumSGPRExpr = AL.getArgAsExpr(0);
696 if (!SemaRef.checkUInt32Argument(AL, NumSGPRExpr, NumSGPR))
697 return;
698
699 D->addAttr(::new (getASTContext())
700 AMDGPUNumSGPRAttr(getASTContext(), AL, NumSGPR));
701}
702
704 Diag(AL.getLoc(), diag::warn_amdgpu_num_reg_attr_deprecated) << AL;
705
706 uint32_t NumVGPR = 0;
707 Expr *NumVGPRExpr = AL.getArgAsExpr(0);
708 if (!SemaRef.checkUInt32Argument(AL, NumVGPRExpr, NumVGPR))
709 return;
710
711 D->addAttr(::new (getASTContext())
712 AMDGPUNumVGPRAttr(getASTContext(), AL, NumVGPR));
713}
714
715static bool
717 Expr *ZExpr,
718 const AMDGPUMaxNumWorkGroupsAttr &Attr) {
719 if (S.DiagnoseUnexpandedParameterPack(XExpr) ||
720 (YExpr && S.DiagnoseUnexpandedParameterPack(YExpr)) ||
721 (ZExpr && S.DiagnoseUnexpandedParameterPack(ZExpr)))
722 return true;
723
724 // Accept template arguments for now as they depend on something else.
725 // We'll get to check them when they eventually get instantiated.
726 if (XExpr->isValueDependent() || (YExpr && YExpr->isValueDependent()) ||
727 (ZExpr && ZExpr->isValueDependent()))
728 return false;
729
730 uint32_t NumWG = 0;
731 Expr *Exprs[3] = {XExpr, YExpr, ZExpr};
732 for (int i = 0; i < 3; i++) {
733 if (Exprs[i]) {
734 if (!S.checkUInt32Argument(Attr, Exprs[i], NumWG, i,
735 /*StrictlyUnsigned=*/true))
736 return true;
737 if (NumWG == 0) {
738 S.Diag(Attr.getLoc(), diag::err_attribute_argument_is_zero)
739 << &Attr << Exprs[i]->getSourceRange();
740 return true;
741 }
742 }
743 }
744
745 return false;
746}
747
749 const AttributeCommonInfo &CI, Expr *XExpr, Expr *YExpr, Expr *ZExpr) {
750 ASTContext &Context = getASTContext();
751 AMDGPUMaxNumWorkGroupsAttr TmpAttr(Context, CI, XExpr, YExpr, ZExpr);
752 assert(!SemaRef.isSFINAEContext() &&
753 "Can't produce SFINAE diagnostic pointing to temporary attribute");
754
755 if (checkAMDGPUMaxNumWorkGroupsArguments(SemaRef, XExpr, YExpr, ZExpr,
756 TmpAttr))
757 return nullptr;
758
759 return ::new (Context)
760 AMDGPUMaxNumWorkGroupsAttr(Context, CI, XExpr, YExpr, ZExpr);
761}
762
764 const AttributeCommonInfo &CI,
765 Expr *XExpr, Expr *YExpr,
766 Expr *ZExpr) {
767 if (auto *Attr = CreateAMDGPUMaxNumWorkGroupsAttr(CI, XExpr, YExpr, ZExpr))
768 D->addAttr(Attr);
769}
770
772 const ParsedAttr &AL) {
773 Expr *YExpr = (AL.getNumArgs() > 1) ? AL.getArgAsExpr(1) : nullptr;
774 Expr *ZExpr = (AL.getNumArgs() > 2) ? AL.getArgAsExpr(2) : nullptr;
775 addAMDGPUMaxNumWorkGroupsAttr(D, AL, AL.getArgAsExpr(0), YExpr, ZExpr);
776}
777
780 ASTContext &Ctx = getASTContext();
781 QualType BoolTy = Ctx.getLogicalOperationType();
782 SourceLocation Loc = CE->getExprLoc();
783
784 if (!CE->getBuiltinCallee())
785 return *ExpandedPredicates
786 .insert(SemaRef.BuildBoolLiteral(Loc, false).get())
787 .first;
788
789 bool P = false;
790 unsigned BI = CE->getBuiltinCallee();
791 if (Ctx.BuiltinInfo.isAuxBuiltinID(BI))
792 BI = Ctx.BuiltinInfo.getAuxBuiltinID(BI);
793
794 if (BI == AMDGPU::BI__builtin_amdgcn_processor_is) {
795 auto *GFX = dyn_cast<StringLiteral>(CE->getArg(0)->IgnoreParenCasts());
796 if (!GFX) {
797 Diag(Loc, diag::err_amdgcn_processor_is_arg_not_literal);
798 return nullptr;
799 }
800
801 StringRef N = GFX->getString();
802 const TargetInfo &TI = Ctx.getTargetInfo();
803 if (llvm::AMDGPU::parseArchAMDGCN(N) == llvm::AMDGPU::GK_NONE) {
804 Diag(Loc, diag::err_amdgcn_processor_is_arg_invalid_value) << N;
806 llvm::AMDGPU::fillValidArchListAMDGCN(ValidList);
807 if (!ValidList.empty())
808 Diag(Loc, diag::note_amdgcn_processor_is_valid_options)
809 << llvm::join(ValidList, ", ");
810 return nullptr;
811 }
812 if (TI.getTriple().isSPIRV()) {
813 CE->setType(BoolTy);
814 return *ExpandedPredicates.insert(CE).first;
815 }
816
817 P = TI.isProcessorName(N);
818 } else {
819 Expr *Arg = CE->getArg(0);
820 if (!Arg || Arg->getType() != Ctx.BuiltinFnTy) {
821 Diag(Loc, diag::err_amdgcn_is_invocable_arg_invalid_value) << Arg;
822 return nullptr;
823 }
824
825 if (Ctx.getTargetInfo().getTriple().isSPIRV()) {
826 CE->setType(BoolTy);
827 return *ExpandedPredicates.insert(CE).first;
828 }
829
831
832 StringRef RF = Ctx.BuiltinInfo.getRequiredFeatures(FD->getBuiltinID());
833 llvm::StringMap<bool> CF;
834 Ctx.getFunctionFeatureMap(CF, FD);
835
837 }
838
839 return *ExpandedPredicates.insert(SemaRef.BuildBoolLiteral(Loc, P).get())
840 .first;
841}
842
844 return ExpandedPredicates.contains(E);
845}
846
848 PotentiallyUnguardedBuiltinUsers.insert(FD);
849}
850
852 return PotentiallyUnguardedBuiltinUsers.contains(FD);
853}
854
855namespace {
856/// This class implements -Wamdgpu-unguarded-builtin-usage.
857///
858/// This is done with a traversal of the AST of a function that includes a
859/// call to a target specific builtin. Whenever we encounter an \c if of the
860/// form: \c if(__builtin_amdgcn_is_invocable), we consider the then statement
861/// guarded.
862class DiagnoseUnguardedBuiltins : public DynamicRecursiveASTVisitor {
863 // TODO: this could eventually be extended to consider what happens when there
864 // are multiple target architectures specified via target("arch=gfxXXX")
865 // target("arch=gfxyyy") etc., as well as feature disabling via "-XXX".
866 Sema &SemaRef;
867
868 SmallVector<StringRef> TargetFeatures;
870 SmallVector<unsigned> GuardedBuiltins;
871
872 static Expr *FindPredicate(Expr *Cond) {
873 if (auto *CE = dyn_cast<CallExpr>(Cond)) {
874 if (CE->getBuiltinCallee() == AMDGPU::BI__builtin_amdgcn_is_invocable ||
875 CE->getBuiltinCallee() == AMDGPU::BI__builtin_amdgcn_processor_is)
876 return Cond;
877 } else if (auto *UO = dyn_cast<UnaryOperator>(Cond)) {
878 return FindPredicate(UO->getSubExpr());
879 } else if (auto *BO = dyn_cast<BinaryOperator>(Cond)) {
880 if ((Cond = FindPredicate(BO->getLHS())))
881 return Cond;
882 return FindPredicate(BO->getRHS());
883 }
884 return nullptr;
885 }
886
887 bool EnterPredicateGuardedContext(CallExpr *P);
888 void ExitPredicateGuardedContext(bool WasProcessorCheck);
889 bool TraverseGuardedStmt(Stmt *S, CallExpr *P);
890
891public:
892 DiagnoseUnguardedBuiltins(Sema &SemaRef) : SemaRef(SemaRef) {
893 if (auto *TAT = SemaRef.getCurFunctionDecl(true)->getAttr<TargetAttr>()) {
894 // We use the somewhat misnamed x86 accessors because they provide exactly
895 // what we require.
896 TAT->getX86AddedFeatures(TargetFeatures);
897 if (auto GFXIP = TAT->getX86Architecture())
898 CurrentGFXIP.emplace_back(TAT->getLocation(), *GFXIP);
899 }
900 }
901
902 bool TraverseLambdaExpr(LambdaExpr *LE) override {
903 if (SemaRef.AMDGPU().HasPotentiallyUnguardedBuiltinUsage(
904 LE->getCallOperator()))
905 return true; // We have already handled this.
906 return DynamicRecursiveASTVisitor::TraverseLambdaExpr(LE);
907 }
908
909 bool TraverseStmt(Stmt *S) override {
910 if (!S)
911 return true;
913 }
914
915 void IssueDiagnostics(Stmt *S) { TraverseStmt(S); }
916
917 bool TraverseIfStmt(IfStmt *If) override {
918 if (auto *CE = dyn_cast_or_null<CallExpr>(FindPredicate(If->getCond())))
919 return TraverseGuardedStmt(If, CE);
920 return DynamicRecursiveASTVisitor::TraverseIfStmt(If);
921 }
922
923 bool TraverseCaseStmt(CaseStmt *CS) override {
924 return TraverseStmt(CS->getSubStmt());
925 }
926
927 bool TraverseConditionalOperator(ConditionalOperator *CO) override {
928 if (auto *CE = dyn_cast_or_null<CallExpr>(FindPredicate(CO->getCond())))
929 return TraverseGuardedStmt(CO, CE);
930 return DynamicRecursiveASTVisitor::TraverseConditionalOperator(CO);
931 }
932
933 bool VisitAsmStmt(AsmStmt *ASM) override;
934 bool VisitCallExpr(CallExpr *CE) override;
935};
936
937bool DiagnoseUnguardedBuiltins::EnterPredicateGuardedContext(CallExpr *P) {
938 bool IsProcessorCheck =
939 P->getBuiltinCallee() == AMDGPU::BI__builtin_amdgcn_processor_is;
940
941 if (IsProcessorCheck) {
942 StringRef G = cast<clang::StringLiteral>(P->getArg(0))->getString();
943 // TODO: handle generic ISAs.
944 if (!CurrentGFXIP.empty() && G != CurrentGFXIP.back().second) {
945 SemaRef.Diag(P->getExprLoc(),
946 diag::err_amdgcn_conflicting_is_processor_options)
947 << P;
948 SemaRef.Diag(CurrentGFXIP.back().first,
949 diag::note_amdgcn_previous_is_processor_guard);
950 }
951 CurrentGFXIP.emplace_back(P->getExprLoc(), G);
952 } else {
953 auto *FD = cast<FunctionDecl>(
954 cast<DeclRefExpr>(P->getArg(0))->getReferencedDeclOfCallee());
955 GuardedBuiltins.push_back(FD->getBuiltinID());
956 }
957
958 return IsProcessorCheck;
959}
960
961void DiagnoseUnguardedBuiltins::ExitPredicateGuardedContext(bool WasProcCheck) {
962 if (WasProcCheck)
963 CurrentGFXIP.pop_back();
964 else
965 GuardedBuiltins.pop_back();
966}
967
968inline std::pair<Stmt *, Stmt *> GetTraversalOrder(Stmt *S) {
969 std::pair<Stmt *, Stmt *> Ordered;
970 Expr *Condition = nullptr;
971
972 if (auto *CO = dyn_cast<ConditionalOperator>(S)) {
973 Condition = CO->getCond();
974 Ordered = {CO->getTrueExpr(), CO->getFalseExpr()};
975 } else if (auto *If = dyn_cast<IfStmt>(S)) {
976 Condition = If->getCond();
977 Ordered = {If->getThen(), If->getElse()};
978 }
979
980 if (auto *UO = dyn_cast<UnaryOperator>(Condition))
981 if (UO->getOpcode() == UnaryOperatorKind::UO_LNot)
982 std::swap(Ordered.first, Ordered.second);
983
984 return Ordered;
985}
986
987bool DiagnoseUnguardedBuiltins::TraverseGuardedStmt(Stmt *S, CallExpr *P) {
988 assert(S && "Unexpected missing Statement!");
989 assert(P && "Unexpected missing Predicate!");
990
991 auto [Guarded, Unguarded] = GetTraversalOrder(S);
992
993 bool WasProcessorCheck = EnterPredicateGuardedContext(P);
994
995 bool Continue = TraverseStmt(Guarded);
996
997 ExitPredicateGuardedContext(WasProcessorCheck);
998
999 return Continue && TraverseStmt(Unguarded);
1000}
1001
1002bool DiagnoseUnguardedBuiltins::VisitAsmStmt(AsmStmt *ASM) {
1003 // TODO: should we check if the ASM is valid for the target? Can we?
1004 if (!CurrentGFXIP.empty())
1005 return true;
1006
1007 std::string S = ASM->generateAsmString(SemaRef.getASTContext());
1008 SemaRef.Diag(ASM->getAsmLoc(), diag::warn_amdgcn_unguarded_asm_stmt) << S;
1009 SemaRef.Diag(ASM->getAsmLoc(), diag::note_amdgcn_unguarded_asm_silence) << S;
1010
1011 return true;
1012}
1013
1014bool DiagnoseUnguardedBuiltins::VisitCallExpr(CallExpr *CE) {
1015 unsigned ID = CE->getBuiltinCallee();
1016 Builtin::Context &BInfo = SemaRef.getASTContext().BuiltinInfo;
1017
1018 if (!ID)
1019 return true;
1020 if (!BInfo.isTSBuiltin(ID))
1021 return true;
1022 if (ID == AMDGPU::BI__builtin_amdgcn_processor_is ||
1023 ID == AMDGPU::BI__builtin_amdgcn_is_invocable)
1024 return true;
1025 if (llvm::find(GuardedBuiltins, ID) != GuardedBuiltins.end())
1026 return true;
1027
1028 StringRef FL(BInfo.getRequiredFeatures(ID));
1029 llvm::StringMap<bool> FeatureMap;
1030 if (CurrentGFXIP.empty()) {
1031 for (auto &&F : TargetFeatures)
1032 FeatureMap[F] = true;
1033 for (auto &&GID : GuardedBuiltins)
1034 for (auto &&F : llvm::split(BInfo.getRequiredFeatures(GID), ','))
1035 FeatureMap[F] = true;
1036 } else {
1037 static const llvm::Triple AMDGCN(llvm::Triple::amdgcn,
1038 llvm::Triple::NoSubArch, llvm::Triple::AMD,
1039 llvm::Triple::AMDHSA);
1040 llvm::AMDGPU::fillAMDGPUFeatureMap(CurrentGFXIP.back().second, AMDGCN,
1041 FeatureMap);
1042 }
1043
1044 FunctionDecl *BI = CE->getDirectCallee();
1045 SourceLocation BICallLoc = CE->getExprLoc();
1046 if (Builtin::evaluateRequiredTargetFeatures(FL, FeatureMap)) {
1047 SemaRef.Diag(BICallLoc, diag::warn_amdgcn_unguarded_builtin) << BI;
1048 SemaRef.Diag(BICallLoc, diag::note_amdgcn_unguarded_builtin_silence) << BI;
1049 } else {
1050 StringRef GFXIP = CurrentGFXIP.empty() ? "" : CurrentGFXIP.back().second;
1051 SemaRef.Diag(BICallLoc, diag::err_amdgcn_incompatible_builtin)
1052 << BI << FL << !CurrentGFXIP.empty() << GFXIP;
1053 if (!CurrentGFXIP.empty())
1054 SemaRef.Diag(CurrentGFXIP.back().first,
1055 diag::note_amdgcn_previous_is_processor_guard);
1056 }
1057
1058 return true;
1059}
1060} // Unnamed namespace
1061
1063 DiagnoseUnguardedBuiltins(SemaRef).IssueDiagnostics(FD->getBody());
1064}
1065} // namespace clang
#define GFX(gpu)
This file declares semantic analysis functions specific to AMDGPU.
Enumerates target-specific builtins in their own namespaces within namespace clang.
APSInt & getInt()
Definition APValue.h:508
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition ASTContext.h:223
Builtin::Context & BuiltinInfo
Definition ASTContext.h:807
CanQualType getLogicalOperationType() const
The result type of logical operations, '<', '>', '!=', etc.
CanQualType BuiltinFnTy
const TargetInfo & getTargetInfo() const
Definition ASTContext.h:924
void getFunctionFeatureMap(llvm::StringMap< bool > &FeatureMap, const FunctionDecl *) const
unsigned getTargetAddressSpace(LangAS AS) const
PtrTy get() const
Definition Ownership.h:171
bool isInvalid() const
Definition Ownership.h:167
static std::unique_ptr< AtomicScopeModel > create(AtomicScopeModelKind K)
Create an atomic scope model by AtomicScopeModelKind.
Definition SyncScope.h:298
Attr - This represents one attribute.
Definition Attr.h:46
SourceLocation getLocation() const
Definition Attr.h:99
SourceLocation getLoc() const
bool isAuxBuiltinID(unsigned ID) const
Return true if the builtin ID belongs exclusively to the AuxTarget, and false if it belongs to both p...
Definition Builtins.h:443
unsigned getAuxBuiltinID(unsigned ID) const
Return real builtin ID (i.e.
Definition Builtins.h:449
const char * getRequiredFeatures(unsigned ID) const
Definition Builtins.cpp:116
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition Expr.h:2949
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition Expr.h:3153
SourceLocation getBeginLoc() const
Definition Expr.h:3283
unsigned getBuiltinCallee() const
getBuiltinCallee - If this is a call to a builtin, return the builtin ID of the callee.
Definition Expr.cpp:1598
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
Definition Expr.h:3140
Decl - This represents one declaration (or definition), e.g.
Definition DeclBase.h:86
void addAttr(Attr *A)
virtual bool TraverseStmt(MaybeConst< Stmt > *S)
This represents one expression.
Definition Expr.h:112
bool EvaluateAsInt(EvalResult &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsInt - Return true if this is a constant which we can fold and convert to an integer,...
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition Expr.cpp:3104
void setType(QualType t)
Definition Expr.h:145
bool isValueDependent() const
Determines whether the value of this expression depends on.
Definition Expr.h:177
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point.
Definition Expr.cpp:3095
Decl * getReferencedDeclOfCallee()
Definition Expr.cpp:1552
bool isInstantiationDependent() const
Whether this expression is instantiation-dependent, meaning that it depends in some way on.
Definition Expr.h:223
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition Expr.cpp:283
QualType getType() const
Definition Expr.h:144
Represents a function declaration or definition.
Definition Decl.h:2027
Stmt * getBody(const FunctionDecl *&Definition) const
Retrieve the body (definition) of the function.
Definition Decl.cpp:3256
ParsedAttr - Represents a syntactic attribute.
Definition ParsedAttr.h:119
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this attribute.
Definition ParsedAttr.h:371
Expr * getArgAsExpr(unsigned Arg) const
Definition ParsedAttr.h:383
bool checkAtLeastNumArgs(class Sema &S, unsigned Num) const
Check if the attribute has at least as many args as Num.
bool checkAtMostNumArgs(class Sema &S, unsigned Num) const
Check if the attribute has at most as many args as Num.
A (possibly-)qualified type.
Definition TypeBase.h:937
const Type * getTypePtr() const
Retrieves a pointer to the underlying (unqualified) type.
Definition TypeBase.h:8447
Scope - A scope is a transient data structure that is used while parsing the program.
Definition Scope.h:41
void handleAMDGPUMaxNumWorkGroupsAttr(Decl *D, const ParsedAttr &AL)
void addAMDGPUFlatWorkGroupSizeAttr(Decl *D, const AttributeCommonInfo &CI, Expr *Min, Expr *Max)
addAMDGPUFlatWorkGroupSizeAttr - Adds an amdgpu_flat_work_group_size attribute to a particular declar...
bool checkCoopAtomicFunctionCall(CallExpr *TheCall, bool IsStore)
bool HasPotentiallyUnguardedBuiltinUsage(FunctionDecl *FD) const
void handleAMDGPUFlatWorkGroupSizeAttr(Decl *D, const ParsedAttr &AL)
bool checkAVLoadStore(CallExpr *TheCall, bool IsStore)
bool checkAtomicMonitorLoad(CallExpr *TheCall)
bool checkAtomicOrderingCABIArg(Expr *E, bool MayLoad, bool MayStore)
Emits a diagnostic if the E is not an atomic ordering encoded in the C ABI format,...
void handleAMDGPUNumSGPRAttr(Decl *D, const ParsedAttr &AL)
AMDGPUMaxNumWorkGroupsAttr * CreateAMDGPUMaxNumWorkGroupsAttr(const AttributeCommonInfo &CI, Expr *XExpr, Expr *YExpr, Expr *ZExpr)
Create an AMDGPUMaxNumWorkGroupsAttr attribute.
Expr * ExpandAMDGPUPredicateBuiltIn(Expr *CE)
Expand a valid use of the feature identification builtins into its corresponding sequence of instruct...
AMDGPUWavesPerEUAttr * CreateAMDGPUWavesPerEUAttr(const AttributeCommonInfo &CI, Expr *Min, Expr *Max)
Create an AMDGPUWavesPerEUAttr attribute.
void DiagnoseUnguardedBuiltinUsage(FunctionDecl *FD)
void handleAMDGPUNumVGPRAttr(Decl *D, const ParsedAttr &AL)
AMDGPUFlatWorkGroupSizeAttr * CreateAMDGPUFlatWorkGroupSizeAttr(const AttributeCommonInfo &CI, Expr *Min, Expr *Max)
Create an AMDGPUWavesPerEUAttr attribute.
void AddPotentiallyUnguardedBuiltinUser(FunctionDecl *FD)
Diagnose unguarded usages of AMDGPU builtins and recommend guarding with __builtin_amdgcn_is_invocabl...
bool checkMovDPPFunctionCall(CallExpr *TheCall, unsigned NumArgs, unsigned NumDataArgs)
void handleAMDGPUWavesPerEUAttr(Decl *D, const ParsedAttr &AL)
bool IsPredicate(Expr *E) const
bool CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall)
void addAMDGPUWavesPerEUAttr(Decl *D, const AttributeCommonInfo &CI, Expr *Min, Expr *Max)
addAMDGPUWavePersEUAttr - Adds an amdgpu_waves_per_eu attribute to a particular declaration.
void addAMDGPUMaxNumWorkGroupsAttr(Decl *D, const AttributeCommonInfo &CI, Expr *XExpr, Expr *YExpr, Expr *ZExpr)
addAMDGPUMaxNumWorkGroupsAttr - Adds an amdgpu_max_num_work_groups attribute to a particular declarat...
SemaBase(Sema &S)
Definition SemaBase.cpp:7
ASTContext & getASTContext() const
Definition SemaBase.cpp:9
Sema & SemaRef
Definition SemaBase.h:40
SemaDiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID)
Emit a diagnostic.
Definition SemaBase.cpp:61
Sema - This implements semantic analysis and AST building for C.
Definition Sema.h:869
ASTContext & Context
Definition Sema.h:1309
bool DiagnoseUnexpandedParameterPack(SourceLocation Loc, TypeSourceInfo *T, UnexpandedParameterPackContext UPPC)
If the given type contains an unexpanded parameter pack, diagnose the error.
bool checkUInt32Argument(const AttrInfo &AI, const Expr *Expr, uint32_t &Val, unsigned Idx=UINT_MAX, bool StrictlyUnsigned=false)
If Expr is a valid integer constant, get the value of the integer expression and return success or fa...
Definition Sema.h:4911
Encodes a location in the source.
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition Stmt.cpp:343
SourceLocation getBeginLoc() const LLVM_READONLY
Definition Stmt.cpp:355
Exposes information about the current target.
Definition TargetInfo.h:227
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
virtual bool isProcessorName(StringRef Name) const
Returns true if the target's processor is compatible with the processor named by Name,...
The base class of the type hierarchy.
Definition TypeBase.h:1875
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition Type.cpp:789
Defines the clang::TargetInfo interface.
bool evaluateRequiredTargetFeatures(llvm::StringRef RequiredFatures, const llvm::StringMap< bool > &TargetFetureMap)
Returns true if the required target features of a builtin function are enabled.
bool LE(InterpState &S, CodePtr OpPC)
Definition Interp.h:1514
The JSON file list parser is used to communicate input to InstallAPI.
bool isa(CodeGen::Address addr)
Definition Address.h:330
static bool checkScopeAsInt(SemaAMDGPU &S, Expr *Scope)
@ If
'if' clause, allowed on all the Compute Constructs, Data Constructs, Executable Constructs,...
Expr * Cond
};
@ Result
The result type of a method or function.
Definition TypeBase.h:905
static bool checkAMDGPUMaxNumWorkGroupsArguments(Sema &S, Expr *XExpr, Expr *YExpr, Expr *ZExpr, const AMDGPUMaxNumWorkGroupsAttr &Attr)
DynamicRecursiveASTVisitorBase< false > DynamicRecursiveASTVisitor
U cast(CodeGen::Address addr)
Definition Address.h:327
static bool checkAMDGPUFlatWorkGroupSizeArguments(Sema &S, Expr *MinExpr, Expr *MaxExpr, const AMDGPUFlatWorkGroupSizeAttr &Attr)
static bool checkGlobalOrFlatPointerArg(SemaAMDGPU &S, CallExpr *TheCall)
ActionResult< Expr * > ExprResult
Definition Ownership.h:249
static bool checkAMDGPUWavesPerEUArguments(Sema &S, Expr *MinExpr, Expr *MaxExpr, const AMDGPUWavesPerEUAttr &Attr)
__packed_splat4 __packed_splat2 __packed_splat8 __packed_splat4 __packed_splat2 uint8_t
__packed_splat4 __packed_splat2 __packed_splat8 __packed_splat4 __packed_splat2 __packed_splat4 __packed_splat2 __packed_splat8 __packed_splat4 uint32_t
EvalResult is a struct with detailed info about an evaluated expression.
Definition Expr.h:652
APValue Val
Val - This is the value the expression can be folded to.
Definition Expr.h:654