clang 23.0.0git
SemaAMDGPU.cpp
Go to the documentation of this file.
1//===------ SemaAMDGPU.cpp ------- AMDGPU target-specific routines --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements semantic analysis functions specific to AMDGPU.
10//
11//===----------------------------------------------------------------------===//
12
14#include "clang/AST/Decl.h"
16#include "clang/AST/Expr.h"
22#include "clang/Sema/Scope.h"
23#include "clang/Sema/Sema.h"
24#include "llvm/ADT/SmallVector.h"
25#include "llvm/ADT/StringExtras.h"
26#include "llvm/ADT/StringMap.h"
27#include "llvm/Support/AMDGPUAddrSpace.h"
28#include "llvm/Support/AtomicOrdering.h"
29#include "llvm/TargetParser/AMDGPUTargetParser.h"
30#include <cstdint>
31#include <utility>
32
33namespace clang {
34
36
38 CallExpr *TheCall) {
39 // position of memory order and scope arguments in the builtin
40 unsigned OrderIndex, ScopeIndex;
41
42 const auto *FD = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true);
43 assert(FD && "AMDGPU builtins should not be used outside of a function");
44 llvm::StringMap<bool> CallerFeatureMap;
45 getASTContext().getFunctionFeatureMap(CallerFeatureMap, FD);
46 bool HasGFX950Insts =
47 Builtin::evaluateRequiredTargetFeatures("gfx950-insts", CallerFeatureMap);
48
49 switch (BuiltinID) {
50 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_load_lds:
51 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_load_async_lds:
52 case AMDGPU::BI__builtin_amdgcn_struct_ptr_buffer_load_lds:
53 case AMDGPU::BI__builtin_amdgcn_struct_ptr_buffer_load_async_lds:
54 case AMDGPU::BI__builtin_amdgcn_load_to_lds:
55 case AMDGPU::BI__builtin_amdgcn_load_async_to_lds:
56 case AMDGPU::BI__builtin_amdgcn_global_load_lds:
57 case AMDGPU::BI__builtin_amdgcn_global_load_async_lds: {
58 constexpr const int SizeIdx = 2;
59 llvm::APSInt Size;
60 Expr *ArgExpr = TheCall->getArg(SizeIdx);
61 // Check for instantiation-dependent expressions (e.g., involving template
62 // parameters). These will be checked again during template instantiation.
63 if (ArgExpr->isInstantiationDependent())
64 return false;
65 [[maybe_unused]] ExprResult R =
66 SemaRef.VerifyIntegerConstantExpression(ArgExpr, &Size);
67 assert(!R.isInvalid());
68 switch (Size.getSExtValue()) {
69 case 1:
70 case 2:
71 case 4:
72 return false;
73 case 12:
74 case 16: {
75 if (HasGFX950Insts)
76 return false;
77 [[fallthrough]];
78 }
79 default:
80 SemaRef.targetDiag(ArgExpr->getExprLoc(),
81 diag::err_amdgcn_load_lds_size_invalid_value)
82 << ArgExpr->getSourceRange();
83 SemaRef.targetDiag(ArgExpr->getExprLoc(),
84 diag::note_amdgcn_load_lds_size_valid_value)
85 << HasGFX950Insts << ArgExpr->getSourceRange();
86 return true;
87 }
88 }
89 case AMDGPU::BI__builtin_amdgcn_get_fpenv:
90 case AMDGPU::BI__builtin_amdgcn_set_fpenv:
91 return false;
92 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
93 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
94 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
95 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
96 OrderIndex = 2;
97 ScopeIndex = 3;
98 break;
99 case AMDGPU::BI__builtin_amdgcn_fence:
100 OrderIndex = 0;
101 ScopeIndex = 1;
102 break;
103 case AMDGPU::BI__builtin_amdgcn_s_setreg:
104 return SemaRef.BuiltinConstantArgRange(TheCall, /*ArgNum=*/0, /*Low=*/0,
105 /*High=*/UINT16_MAX);
106 case AMDGPU::BI__builtin_amdgcn_s_wait_event: {
107 llvm::APSInt Result;
108 if (SemaRef.BuiltinConstantArg(TheCall, 0, Result))
109 return true;
110
112 "gfx12-insts", CallerFeatureMap);
113
114 // gfx11 -> gfx12 changed the interpretation of the bitmask. gfx12 inverted
115 // the intepretation for export_ready, but shifted the used bit by 1. Thus
116 // waiting for the export_ready event can use a value of 2 universally.
117 if (((IsGFX12Plus && !Result[1]) || (!IsGFX12Plus && Result[0])) ||
118 Result.getZExtValue() > 2) {
119 Expr *ArgExpr = TheCall->getArg(0);
120 SemaRef.targetDiag(ArgExpr->getExprLoc(),
121 diag::warn_amdgpu_s_wait_event_mask_no_effect_target)
122 << ArgExpr->getSourceRange();
123 SemaRef.targetDiag(ArgExpr->getExprLoc(),
124 diag::note_amdgpu_s_wait_event_suggested_value)
125 << ArgExpr->getSourceRange();
126 }
127
128 return false;
129 }
130 case AMDGPU::BI__builtin_amdgcn_mov_dpp:
131 return checkMovDPPFunctionCall(TheCall, 5, 1);
132 case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
133 return checkMovDPPFunctionCall(TheCall, 2, 1);
134 case AMDGPU::BI__builtin_amdgcn_update_dpp:
135 return checkMovDPPFunctionCall(TheCall, 6, 2);
136 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_f16_fp8:
137 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_bf16_fp8:
138 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_f16_bf8:
139 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_bf16_bf8:
140 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_f16_fp4:
141 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_bf16_fp4:
142 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_f32_fp8:
143 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_f32_bf8:
144 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_f32_fp4:
145 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_f16_fp6:
146 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_bf16_fp6:
147 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_f16_bf6:
148 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_bf16_bf6:
149 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_f32_fp6:
150 case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_f32_bf6:
151 return SemaRef.BuiltinConstantArgRange(TheCall, 2, 0, 15);
152 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B:
153 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B:
154 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B:
155 return checkCoopAtomicFunctionCall(TheCall, /*IsStore=*/false);
156 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_32x4B:
157 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B:
158 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B:
159 return checkCoopAtomicFunctionCall(TheCall, /*IsStore=*/true);
160 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b32:
161 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b64:
162 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b128:
163 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b32:
164 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b64:
165 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b128:
166 return checkAtomicMonitorLoad(TheCall);
167 case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32:
168 case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32:
169 case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32:
170 case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32:
171 case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32:
172 case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32:
173 case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32:
174 case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32:
175 case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32:
176 case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32:
177 case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32:
178 case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32:
179 case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32:
180 case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32:
181 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32:
182 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32:
183 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32:
184 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32:
185 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32:
186 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32:
187 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32:
188 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32:
189 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32:
190 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32:
191 case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32:
192 case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32:
193 case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:
194 case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32:
195 case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f32_f32:
196 case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f32_f32:
197 case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f16_f32:
198 case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f16_f32:
199 case AMDGPU::BI__builtin_amdgcn_image_sample_2d_f32_f32:
200 case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f32_f32:
201 case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f16_f32:
202 case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_f32_f32:
203 case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f32_f32:
204 case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f16_f32:
205 case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f32_f32:
206 case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f16_f32:
207 case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f32_f32:
208 case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32:
209 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f32_f32:
210 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f16_f32:
211 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f32_f32:
212 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f16_f32:
213 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_f32_f32:
214 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f32_f32:
215 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f16_f32:
216 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_f32_f32:
217 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f32_f32:
218 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f16_f32:
219 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f32_f32:
220 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f16_f32:
221 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f32_f32:
222 case AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f16_f32:
223 case AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f32_f32:
224 case AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f16_f32:
225 case AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f32_f32:
226 case AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f16_f32:
227 case AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_f32_f32:
228 case AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f16_f32:
229 case AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f32_f32:
230 case AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_f32_f32:
231 case AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f32_f32:
232 case AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f16_f32:
233 case AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f32_f32:
234 case AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f16_f32:
235 case AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f32_f32:
236 case AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f16_f32:
237 case AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f32_f32:
238 case AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f16_f32:
239 case AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f32_f32:
240 case AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f16_f32:
241 case AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_f32_f32:
242 case AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f32_f32:
243 case AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f16_f32:
244 case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_f32_f32:
245 case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f32_f32:
246 case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f16_f32:
247 case AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f32_f32:
248 case AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f16_f32:
249 case AMDGPU::BI__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32: {
250 StringRef FeatureList(
251 getASTContext().BuiltinInfo.getRequiredFeatures(BuiltinID));
253 CallerFeatureMap)) {
254 Diag(TheCall->getBeginLoc(), diag::err_builtin_needs_feature)
255 << FD->getDeclName() << FeatureList;
256 return false;
257 }
258
259 unsigned ArgCount = TheCall->getNumArgs() - 1;
260 llvm::APSInt Result;
261
262 // Compilain about dmask values which are too huge to fully fit into 4 bits
263 // (which is the actual size of the dmask in corresponding HW instructions).
264 constexpr unsigned DMaskArgNo = 0;
265 constexpr int Low = 0;
266 constexpr int High = 15;
267 if (SemaRef.BuiltinConstantArg(TheCall, DMaskArgNo, Result) ||
268 SemaRef.BuiltinConstantArgRange(TheCall, DMaskArgNo, Low, High,
269 /* RangeIsError = */ true))
270 return true;
271
272 // Dmask indicates which elements should be returned and it is not possible
273 // to return more values than there are elements in return type.
274 int NumElementsInRetTy = 1;
275 const Type *RetTy = TheCall->getType().getTypePtr();
276 if (auto *VTy = dyn_cast<VectorType>(RetTy))
277 NumElementsInRetTy = VTy->getNumElements();
278 int NumActiveBitsInDMask =
279 llvm::popcount(static_cast<uint8_t>(Result.getExtValue()));
280 if (NumActiveBitsInDMask > NumElementsInRetTy) {
281 Diag(TheCall->getBeginLoc(),
282 diag::err_amdgcn_dmask_has_too_many_bits_set);
283 return true;
284 }
285
286 // For gather, only one bit can be set indicating which exact component to
287 // return.
288 bool ExtraGatherChecks =
289 BuiltinID == AMDGPU::BI__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32 &&
290 SemaRef.BuiltinConstantArgPower2(TheCall, 0);
291
292 return ExtraGatherChecks ||
293 (SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result)) ||
294 (SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result));
295 }
296 case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f32_i32:
297 case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f32_i32:
298 case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f16_i32:
299 case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f16_i32:
300 case AMDGPU::BI__builtin_amdgcn_image_store_2d_f32_i32:
301 case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f32_i32:
302 case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f16_i32:
303 case AMDGPU::BI__builtin_amdgcn_image_store_2darray_f32_i32:
304 case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f32_i32:
305 case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f16_i32:
306 case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f32_i32:
307 case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f16_i32:
308 case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f32_i32:
309 case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f16_i32:
310 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f32_i32:
311 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f16_i32:
312 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f32_i32:
313 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f16_i32:
314 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_f32_i32:
315 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f32_i32:
316 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f16_i32:
317 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_f32_i32:
318 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f32_i32:
319 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f16_i32:
320 case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f32_i32:
321 case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f16_i32:
322 case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f32_i32:
323 case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32: {
324 StringRef FeatureList(
325 getASTContext().BuiltinInfo.getRequiredFeatures(BuiltinID));
327 CallerFeatureMap)) {
328 Diag(TheCall->getBeginLoc(), diag::err_builtin_needs_feature)
329 << FD->getDeclName() << FeatureList;
330 return false;
331 }
332
333 unsigned ArgCount = TheCall->getNumArgs() - 1;
334 llvm::APSInt Result;
335
336 // Complain about dmask values which are too huge to fully fit into 4 bits
337 // (which is the actual size of the dmask in corresponding HW instructions).
338 constexpr unsigned DMaskArgNo = 1;
339 return SemaRef.BuiltinConstantArgRange(TheCall, DMaskArgNo, /*Low=*/0,
340 /*High=*/15,
341 /*RangeIsError=*/true) ||
342 SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result) ||
343 SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result);
344 }
345 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8:
346 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8: {
347 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8) {
348 if (SemaRef.checkArgCountRange(TheCall, 7, 8))
349 return true;
350 if (TheCall->getNumArgs() == 7)
351 return false;
352 } else if (BuiltinID ==
353 AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8) {
354 if (SemaRef.checkArgCountRange(TheCall, 8, 9))
355 return true;
356 if (TheCall->getNumArgs() == 8)
357 return false;
358 }
359 // Check if the last argument (clamp operand) is a constant and is
360 // convertible to bool.
361 Expr *ClampArg = TheCall->getArg(TheCall->getNumArgs() - 1);
362 // 1) Ensure clamp argument is a constant expression
363 llvm::APSInt ClampValue;
364 if (!SemaRef.VerifyIntegerConstantExpression(ClampArg, &ClampValue)
365 .isUsable())
366 return true;
367 // 2) Check if the argument can be converted to bool type
368 if (!SemaRef.Context.hasSameType(ClampArg->getType(),
369 SemaRef.Context.BoolTy)) {
370 // Try to convert to bool
371 QualType BoolTy = SemaRef.Context.BoolTy;
372 ExprResult ClampExpr(ClampArg);
373 SemaRef.CheckSingleAssignmentConstraints(BoolTy, ClampExpr);
374 if (ClampExpr.isInvalid())
375 return true;
376 }
377 return false;
378 }
379 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_bf16:
380 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x4_f32:
381 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_f16:
382 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x32_f16:
383 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x32_bf16:
384 case AMDGPU::BI__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16:
385 return SemaRef.BuiltinConstantArgRange(TheCall, /*ArgNum=*/0, /*Low=*/0,
386 /*High=*/0) ||
387 SemaRef.BuiltinConstantArgRange(TheCall, /*ArgNum=*/2, /*Low=*/0,
388 /*High=*/0);
389 default:
390 return false;
391 }
392
393 ExprResult Arg = TheCall->getArg(OrderIndex);
394 auto ArgExpr = Arg.get();
395 Expr::EvalResult ArgResult;
396
397 if (!ArgExpr->EvaluateAsInt(ArgResult, getASTContext()))
398 return Diag(ArgExpr->getExprLoc(), diag::err_typecheck_expect_int)
399 << ArgExpr->getType();
400 auto Ord = ArgResult.Val.getInt().getZExtValue();
401
402 // Check validity of memory ordering as per C11 / C++11's memory model.
403 // Only fence needs check. Atomic dec/inc allow all memory orders.
404 if (!llvm::isValidAtomicOrderingCABI(Ord))
405 return Diag(ArgExpr->getBeginLoc(),
406 diag::warn_atomic_op_has_invalid_memory_order)
407 << 0 << ArgExpr->getSourceRange();
408 switch (static_cast<llvm::AtomicOrderingCABI>(Ord)) {
409 case llvm::AtomicOrderingCABI::relaxed:
410 case llvm::AtomicOrderingCABI::consume:
411 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_fence)
412 return Diag(ArgExpr->getBeginLoc(),
413 diag::warn_atomic_op_has_invalid_memory_order)
414 << 0 << ArgExpr->getSourceRange();
415 break;
416 case llvm::AtomicOrderingCABI::acquire:
417 case llvm::AtomicOrderingCABI::release:
418 case llvm::AtomicOrderingCABI::acq_rel:
419 case llvm::AtomicOrderingCABI::seq_cst:
420 break;
421 }
422
423 Arg = TheCall->getArg(ScopeIndex);
424 ArgExpr = Arg.get();
425 Expr::EvalResult ArgResult1;
426 // Check that sync scope is a constant literal
427 if (!ArgExpr->EvaluateAsConstantExpr(ArgResult1, getASTContext()))
428 return Diag(ArgExpr->getExprLoc(), diag::err_expr_not_string_literal)
429 << ArgExpr->getType();
430
431 return false;
432}
433
435 bool MayStore) {
436 Expr::EvalResult AtomicOrdArgRes;
437 if (!E->EvaluateAsInt(AtomicOrdArgRes, getASTContext()))
438 llvm_unreachable("Intrinsic requires imm for atomic ordering argument!");
439 auto Ord =
440 llvm::AtomicOrderingCABI(AtomicOrdArgRes.Val.getInt().getZExtValue());
441
442 // Atomic ordering cannot be acq_rel in any case, acquire for stores or
443 // release for loads.
444 if (!llvm::isValidAtomicOrderingCABI((unsigned)Ord) ||
445 (!(MayLoad && MayStore) && (Ord == llvm::AtomicOrderingCABI::acq_rel)) ||
446 (!MayLoad && Ord == llvm::AtomicOrderingCABI::acquire) ||
447 (!MayStore && Ord == llvm::AtomicOrderingCABI::release)) {
448 return Diag(E->getBeginLoc(), diag::warn_atomic_op_has_invalid_memory_order)
449 << 0 << E->getSourceRange();
450 }
451
452 return false;
453}
454
455// Check that the first argument to TheCall is a global or generic pointer.
457 Expr *PtrArg = TheCall->getArg(0);
458 QualType PtrTy = PtrArg->getType()->getPointeeType();
459 unsigned AS =
460 S.getASTContext().getTargetAddressSpace(PtrTy.getAddressSpace());
461 if (AS != llvm::AMDGPUAS::FLAT_ADDRESS &&
462 AS != llvm::AMDGPUAS::GLOBAL_ADDRESS) {
463 return S.Diag(TheCall->getBeginLoc(),
464 diag::err_amdgcn_global_or_flat_pointer_required)
465 << PtrArg->getSourceRange();
466 }
467 return false;
468}
469
471 if (Scope->isValueDependent())
472 return false;
474 if (std::optional<llvm::APSInt> Result =
475 Scope->getIntegerConstantExpr(S.SemaRef.Context)) {
476 if (!ScopeModel->isValid(Result->getZExtValue())) {
477 return S.Diag(Scope->getBeginLoc(),
478 diag::err_atomic_op_has_invalid_sync_scope)
479 << Scope->getSourceRange();
480 }
481 }
482 return false;
483}
484
486 bool Fail = checkGlobalOrFlatPointerArg(*this, TheCall);
487
488 Expr *AO = TheCall->getArg(IsStore ? 2 : 1);
489 Expr *Scope = TheCall->getArg(TheCall->getNumArgs() - 1);
490
491 if (AO->isValueDependent() || Scope->isValueDependent())
492 return false;
493
494 // Check atomic ordering
495 Fail |=
496 checkAtomicOrderingCABIArg(TheCall->getArg(IsStore ? 2 : 1),
497 /*MayLoad=*/!IsStore, /*MayStore=*/IsStore);
498
499 // Last argument is the syncscope as a string literal.
500 if (!isa<StringLiteral>(Scope->IgnoreParenImpCasts())) {
501 Diag(TheCall->getBeginLoc(), diag::err_expr_not_string_literal)
502 << Scope->getSourceRange();
503 Fail = true;
504 }
505
506 return Fail;
507}
508
510 Expr *AO = TheCall->getArg(1);
511 Expr *Scope = TheCall->getArg(TheCall->getNumArgs() - 1);
512
513 if (AO->isValueDependent() || Scope->isValueDependent())
514 return false;
515
516 bool Fail = checkAtomicOrderingCABIArg(AO, /*MayLoad=*/true,
517 /*MayStore=*/false);
518 Fail |= checkScopeAsInt(*this, Scope);
519 return Fail;
520}
521
522bool SemaAMDGPU::checkMovDPPFunctionCall(CallExpr *TheCall, unsigned NumArgs,
523 unsigned NumDataArgs) {
524 assert(NumDataArgs <= 2);
525 if (SemaRef.checkArgCountRange(TheCall, NumArgs, NumArgs))
526 return true;
527 Expr *Args[2];
528 QualType ArgTys[2];
529 for (unsigned I = 0; I != NumDataArgs; ++I) {
530 Args[I] = TheCall->getArg(I);
531 ArgTys[I] = Args[I]->getType();
532 // TODO: Vectors can also be supported.
533 if (!ArgTys[I]->isArithmeticType() || ArgTys[I]->isAnyComplexType()) {
534 SemaRef.Diag(Args[I]->getBeginLoc(),
535 diag::err_typecheck_cond_expect_int_float)
536 << ArgTys[I] << Args[I]->getSourceRange();
537 return true;
538 }
539 }
540 if (NumDataArgs < 2)
541 return false;
542
543 if (getASTContext().hasSameUnqualifiedType(ArgTys[0], ArgTys[1]))
544 return false;
545
546 if (((ArgTys[0]->isUnsignedIntegerType() &&
547 ArgTys[1]->isSignedIntegerType()) ||
548 (ArgTys[0]->isSignedIntegerType() &&
549 ArgTys[1]->isUnsignedIntegerType())) &&
550 getASTContext().getTypeSize(ArgTys[0]) ==
551 getASTContext().getTypeSize(ArgTys[1]))
552 return false;
553
554 SemaRef.Diag(Args[1]->getBeginLoc(),
555 diag::err_typecheck_call_different_arg_types)
556 << ArgTys[0] << ArgTys[1];
557 return true;
558}
559
560static bool
562 const AMDGPUFlatWorkGroupSizeAttr &Attr) {
563 // Accept template arguments for now as they depend on something else.
564 // We'll get to check them when they eventually get instantiated.
565 if (MinExpr->isValueDependent() || MaxExpr->isValueDependent())
566 return false;
567
568 uint32_t Min = 0;
569 if (!S.checkUInt32Argument(Attr, MinExpr, Min, 0))
570 return true;
571
572 uint32_t Max = 0;
573 if (!S.checkUInt32Argument(Attr, MaxExpr, Max, 1))
574 return true;
575
576 if (Min == 0 && Max != 0) {
577 S.Diag(Attr.getLocation(), diag::err_attribute_argument_invalid)
578 << &Attr << 0;
579 return true;
580 }
581 if (Min > Max) {
582 S.Diag(Attr.getLocation(), diag::err_attribute_argument_invalid)
583 << &Attr << 1;
584 return true;
585 }
586
587 return false;
588}
589
590AMDGPUFlatWorkGroupSizeAttr *
592 Expr *MinExpr, Expr *MaxExpr) {
593 ASTContext &Context = getASTContext();
594 AMDGPUFlatWorkGroupSizeAttr TmpAttr(Context, CI, MinExpr, MaxExpr);
595
596 if (checkAMDGPUFlatWorkGroupSizeArguments(SemaRef, MinExpr, MaxExpr, TmpAttr))
597 return nullptr;
598 return ::new (Context)
599 AMDGPUFlatWorkGroupSizeAttr(Context, CI, MinExpr, MaxExpr);
600}
601
603 const AttributeCommonInfo &CI,
604 Expr *MinExpr, Expr *MaxExpr) {
605 if (auto *Attr = CreateAMDGPUFlatWorkGroupSizeAttr(CI, MinExpr, MaxExpr))
606 D->addAttr(Attr);
607}
608
610 const ParsedAttr &AL) {
611 Expr *MinExpr = AL.getArgAsExpr(0);
612 Expr *MaxExpr = AL.getArgAsExpr(1);
613
614 addAMDGPUFlatWorkGroupSizeAttr(D, AL, MinExpr, MaxExpr);
615}
616
617static bool checkAMDGPUWavesPerEUArguments(Sema &S, Expr *MinExpr,
618 Expr *MaxExpr,
619 const AMDGPUWavesPerEUAttr &Attr) {
620 if (S.DiagnoseUnexpandedParameterPack(MinExpr) ||
621 (MaxExpr && S.DiagnoseUnexpandedParameterPack(MaxExpr)))
622 return true;
623
624 // Accept template arguments for now as they depend on something else.
625 // We'll get to check them when they eventually get instantiated.
626 if (MinExpr->isValueDependent() || (MaxExpr && MaxExpr->isValueDependent()))
627 return false;
628
629 uint32_t Min = 0;
630 if (!S.checkUInt32Argument(Attr, MinExpr, Min, 0))
631 return true;
632
633 uint32_t Max = 0;
634 if (MaxExpr && !S.checkUInt32Argument(Attr, MaxExpr, Max, 1))
635 return true;
636
637 if (Min == 0 && Max != 0) {
638 S.Diag(Attr.getLocation(), diag::err_attribute_argument_invalid)
639 << &Attr << 0;
640 return true;
641 }
642 if (Max != 0 && Min > Max) {
643 S.Diag(Attr.getLocation(), diag::err_attribute_argument_invalid)
644 << &Attr << 1;
645 return true;
646 }
647
648 return false;
649}
650
651AMDGPUWavesPerEUAttr *
653 Expr *MinExpr, Expr *MaxExpr) {
654 ASTContext &Context = getASTContext();
655 AMDGPUWavesPerEUAttr TmpAttr(Context, CI, MinExpr, MaxExpr);
656
657 if (checkAMDGPUWavesPerEUArguments(SemaRef, MinExpr, MaxExpr, TmpAttr))
658 return nullptr;
659
660 return ::new (Context) AMDGPUWavesPerEUAttr(Context, CI, MinExpr, MaxExpr);
661}
662
664 Expr *MinExpr, Expr *MaxExpr) {
665 if (auto *Attr = CreateAMDGPUWavesPerEUAttr(CI, MinExpr, MaxExpr))
666 D->addAttr(Attr);
667}
668
671 return;
672
673 Expr *MinExpr = AL.getArgAsExpr(0);
674 Expr *MaxExpr = (AL.getNumArgs() > 1) ? AL.getArgAsExpr(1) : nullptr;
675
676 addAMDGPUWavesPerEUAttr(D, AL, MinExpr, MaxExpr);
677}
678
680 Diag(AL.getLoc(), diag::warn_amdgpu_num_reg_attr_deprecated) << AL;
681
682 uint32_t NumSGPR = 0;
683 Expr *NumSGPRExpr = AL.getArgAsExpr(0);
684 if (!SemaRef.checkUInt32Argument(AL, NumSGPRExpr, NumSGPR))
685 return;
686
687 D->addAttr(::new (getASTContext())
688 AMDGPUNumSGPRAttr(getASTContext(), AL, NumSGPR));
689}
690
692 Diag(AL.getLoc(), diag::warn_amdgpu_num_reg_attr_deprecated) << AL;
693
694 uint32_t NumVGPR = 0;
695 Expr *NumVGPRExpr = AL.getArgAsExpr(0);
696 if (!SemaRef.checkUInt32Argument(AL, NumVGPRExpr, NumVGPR))
697 return;
698
699 D->addAttr(::new (getASTContext())
700 AMDGPUNumVGPRAttr(getASTContext(), AL, NumVGPR));
701}
702
703static bool
705 Expr *ZExpr,
706 const AMDGPUMaxNumWorkGroupsAttr &Attr) {
707 if (S.DiagnoseUnexpandedParameterPack(XExpr) ||
708 (YExpr && S.DiagnoseUnexpandedParameterPack(YExpr)) ||
709 (ZExpr && S.DiagnoseUnexpandedParameterPack(ZExpr)))
710 return true;
711
712 // Accept template arguments for now as they depend on something else.
713 // We'll get to check them when they eventually get instantiated.
714 if (XExpr->isValueDependent() || (YExpr && YExpr->isValueDependent()) ||
715 (ZExpr && ZExpr->isValueDependent()))
716 return false;
717
718 uint32_t NumWG = 0;
719 Expr *Exprs[3] = {XExpr, YExpr, ZExpr};
720 for (int i = 0; i < 3; i++) {
721 if (Exprs[i]) {
722 if (!S.checkUInt32Argument(Attr, Exprs[i], NumWG, i,
723 /*StrictlyUnsigned=*/true))
724 return true;
725 if (NumWG == 0) {
726 S.Diag(Attr.getLoc(), diag::err_attribute_argument_is_zero)
727 << &Attr << Exprs[i]->getSourceRange();
728 return true;
729 }
730 }
731 }
732
733 return false;
734}
735
737 const AttributeCommonInfo &CI, Expr *XExpr, Expr *YExpr, Expr *ZExpr) {
738 ASTContext &Context = getASTContext();
739 AMDGPUMaxNumWorkGroupsAttr TmpAttr(Context, CI, XExpr, YExpr, ZExpr);
740 assert(!SemaRef.isSFINAEContext() &&
741 "Can't produce SFINAE diagnostic pointing to temporary attribute");
742
743 if (checkAMDGPUMaxNumWorkGroupsArguments(SemaRef, XExpr, YExpr, ZExpr,
744 TmpAttr))
745 return nullptr;
746
747 return ::new (Context)
748 AMDGPUMaxNumWorkGroupsAttr(Context, CI, XExpr, YExpr, ZExpr);
749}
750
752 const AttributeCommonInfo &CI,
753 Expr *XExpr, Expr *YExpr,
754 Expr *ZExpr) {
755 if (auto *Attr = CreateAMDGPUMaxNumWorkGroupsAttr(CI, XExpr, YExpr, ZExpr))
756 D->addAttr(Attr);
757}
758
760 const ParsedAttr &AL) {
761 Expr *YExpr = (AL.getNumArgs() > 1) ? AL.getArgAsExpr(1) : nullptr;
762 Expr *ZExpr = (AL.getNumArgs() > 2) ? AL.getArgAsExpr(2) : nullptr;
763 addAMDGPUMaxNumWorkGroupsAttr(D, AL, AL.getArgAsExpr(0), YExpr, ZExpr);
764}
765
768 ASTContext &Ctx = getASTContext();
769 QualType BoolTy = Ctx.getLogicalOperationType();
770 SourceLocation Loc = CE->getExprLoc();
771
772 if (!CE->getBuiltinCallee())
773 return *ExpandedPredicates
774 .insert(SemaRef.BuildBoolLiteral(Loc, false).get())
775 .first;
776
777 bool P = false;
778 unsigned BI = CE->getBuiltinCallee();
779 if (Ctx.BuiltinInfo.isAuxBuiltinID(BI))
780 BI = Ctx.BuiltinInfo.getAuxBuiltinID(BI);
781
782 if (BI == AMDGPU::BI__builtin_amdgcn_processor_is) {
783 auto *GFX = dyn_cast<StringLiteral>(CE->getArg(0)->IgnoreParenCasts());
784 if (!GFX) {
785 Diag(Loc, diag::err_amdgcn_processor_is_arg_not_literal);
786 return nullptr;
787 }
788
789 StringRef N = GFX->getString();
790 const TargetInfo &TI = Ctx.getTargetInfo();
791 const TargetInfo *AuxTI = Ctx.getAuxTargetInfo();
792 if (!TI.isValidCPUName(N) && (!AuxTI || !AuxTI->isValidCPUName(N))) {
793 Diag(Loc, diag::err_amdgcn_processor_is_arg_invalid_value) << N;
795 if (TI.getTriple().getVendor() == llvm::Triple::VendorType::AMD)
796 TI.fillValidCPUList(ValidList);
797 else if (AuxTI) // Since the BI is present it must be an AMDGPU triple.
798 AuxTI->fillValidCPUList(ValidList);
799 if (!ValidList.empty())
800 Diag(Loc, diag::note_amdgcn_processor_is_valid_options)
801 << llvm::join(ValidList, ", ");
802 return nullptr;
803 }
804 if (Ctx.getTargetInfo().getTriple().isSPIRV()) {
805 CE->setType(BoolTy);
806 return *ExpandedPredicates.insert(CE).first;
807 }
808
809 if (auto TID = Ctx.getTargetInfo().getTargetID())
810 P = TID->find(N) == 0;
811 } else {
812 Expr *Arg = CE->getArg(0);
813 if (!Arg || Arg->getType() != Ctx.BuiltinFnTy) {
814 Diag(Loc, diag::err_amdgcn_is_invocable_arg_invalid_value) << Arg;
815 return nullptr;
816 }
817
818 if (Ctx.getTargetInfo().getTriple().isSPIRV()) {
819 CE->setType(BoolTy);
820 return *ExpandedPredicates.insert(CE).first;
821 }
822
824
825 StringRef RF = Ctx.BuiltinInfo.getRequiredFeatures(FD->getBuiltinID());
826 llvm::StringMap<bool> CF;
827 Ctx.getFunctionFeatureMap(CF, FD);
828
830 }
831
832 return *ExpandedPredicates.insert(SemaRef.BuildBoolLiteral(Loc, P).get())
833 .first;
834}
835
837 return ExpandedPredicates.contains(E);
838}
839
841 PotentiallyUnguardedBuiltinUsers.insert(FD);
842}
843
845 return PotentiallyUnguardedBuiltinUsers.contains(FD);
846}
847
848namespace {
849/// This class implements -Wamdgpu-unguarded-builtin-usage.
850///
851/// This is done with a traversal of the AST of a function that includes a
852/// call to a target specific builtin. Whenever we encounter an \c if of the
853/// form: \c if(__builtin_amdgcn_is_invocable), we consider the then statement
854/// guarded.
855class DiagnoseUnguardedBuiltins : public DynamicRecursiveASTVisitor {
856 // TODO: this could eventually be extended to consider what happens when there
857 // are multiple target architectures specified via target("arch=gfxXXX")
858 // target("arch=gfxyyy") etc., as well as feature disabling via "-XXX".
859 Sema &SemaRef;
860
861 SmallVector<StringRef> TargetFeatures;
863 SmallVector<unsigned> GuardedBuiltins;
864
865 static Expr *FindPredicate(Expr *Cond) {
866 if (auto *CE = dyn_cast<CallExpr>(Cond)) {
867 if (CE->getBuiltinCallee() == AMDGPU::BI__builtin_amdgcn_is_invocable ||
868 CE->getBuiltinCallee() == AMDGPU::BI__builtin_amdgcn_processor_is)
869 return Cond;
870 } else if (auto *UO = dyn_cast<UnaryOperator>(Cond)) {
871 return FindPredicate(UO->getSubExpr());
872 } else if (auto *BO = dyn_cast<BinaryOperator>(Cond)) {
873 if ((Cond = FindPredicate(BO->getLHS())))
874 return Cond;
875 return FindPredicate(BO->getRHS());
876 }
877 return nullptr;
878 }
879
880 bool EnterPredicateGuardedContext(CallExpr *P);
881 void ExitPredicateGuardedContext(bool WasProcessorCheck);
882 bool TraverseGuardedStmt(Stmt *S, CallExpr *P);
883
884public:
885 DiagnoseUnguardedBuiltins(Sema &SemaRef) : SemaRef(SemaRef) {
886 if (auto *TAT = SemaRef.getCurFunctionDecl(true)->getAttr<TargetAttr>()) {
887 // We use the somewhat misnamed x86 accessors because they provide exactly
888 // what we require.
889 TAT->getX86AddedFeatures(TargetFeatures);
890 if (auto GFXIP = TAT->getX86Architecture())
891 CurrentGFXIP.emplace_back(TAT->getLocation(), *GFXIP);
892 }
893 }
894
895 bool TraverseLambdaExpr(LambdaExpr *LE) override {
896 if (SemaRef.AMDGPU().HasPotentiallyUnguardedBuiltinUsage(
897 LE->getCallOperator()))
898 return true; // We have already handled this.
899 return DynamicRecursiveASTVisitor::TraverseLambdaExpr(LE);
900 }
901
902 bool TraverseStmt(Stmt *S) override {
903 if (!S)
904 return true;
906 }
907
908 void IssueDiagnostics(Stmt *S) { TraverseStmt(S); }
909
910 bool TraverseIfStmt(IfStmt *If) override {
911 if (auto *CE = dyn_cast_or_null<CallExpr>(FindPredicate(If->getCond())))
912 return TraverseGuardedStmt(If, CE);
913 return DynamicRecursiveASTVisitor::TraverseIfStmt(If);
914 }
915
916 bool TraverseCaseStmt(CaseStmt *CS) override {
917 return TraverseStmt(CS->getSubStmt());
918 }
919
920 bool TraverseConditionalOperator(ConditionalOperator *CO) override {
921 if (auto *CE = dyn_cast_or_null<CallExpr>(FindPredicate(CO->getCond())))
922 return TraverseGuardedStmt(CO, CE);
923 return DynamicRecursiveASTVisitor::TraverseConditionalOperator(CO);
924 }
925
926 bool VisitAsmStmt(AsmStmt *ASM) override;
927 bool VisitCallExpr(CallExpr *CE) override;
928};
929
930bool DiagnoseUnguardedBuiltins::EnterPredicateGuardedContext(CallExpr *P) {
931 bool IsProcessorCheck =
932 P->getBuiltinCallee() == AMDGPU::BI__builtin_amdgcn_processor_is;
933
934 if (IsProcessorCheck) {
935 StringRef G = cast<clang::StringLiteral>(P->getArg(0))->getString();
936 // TODO: handle generic ISAs.
937 if (!CurrentGFXIP.empty() && G != CurrentGFXIP.back().second) {
938 SemaRef.Diag(P->getExprLoc(),
939 diag::err_amdgcn_conflicting_is_processor_options)
940 << P;
941 SemaRef.Diag(CurrentGFXIP.back().first,
942 diag::note_amdgcn_previous_is_processor_guard);
943 }
944 CurrentGFXIP.emplace_back(P->getExprLoc(), G);
945 } else {
946 auto *FD = cast<FunctionDecl>(
947 cast<DeclRefExpr>(P->getArg(0))->getReferencedDeclOfCallee());
948 GuardedBuiltins.push_back(FD->getBuiltinID());
949 }
950
951 return IsProcessorCheck;
952}
953
954void DiagnoseUnguardedBuiltins::ExitPredicateGuardedContext(bool WasProcCheck) {
955 if (WasProcCheck)
956 CurrentGFXIP.pop_back();
957 else
958 GuardedBuiltins.pop_back();
959}
960
961inline std::pair<Stmt *, Stmt *> GetTraversalOrder(Stmt *S) {
962 std::pair<Stmt *, Stmt *> Ordered;
963 Expr *Condition = nullptr;
964
965 if (auto *CO = dyn_cast<ConditionalOperator>(S)) {
966 Condition = CO->getCond();
967 Ordered = {CO->getTrueExpr(), CO->getFalseExpr()};
968 } else if (auto *If = dyn_cast<IfStmt>(S)) {
969 Condition = If->getCond();
970 Ordered = {If->getThen(), If->getElse()};
971 }
972
973 if (auto *UO = dyn_cast<UnaryOperator>(Condition))
974 if (UO->getOpcode() == UnaryOperatorKind::UO_LNot)
975 std::swap(Ordered.first, Ordered.second);
976
977 return Ordered;
978}
979
980bool DiagnoseUnguardedBuiltins::TraverseGuardedStmt(Stmt *S, CallExpr *P) {
981 assert(S && "Unexpected missing Statement!");
982 assert(P && "Unexpected missing Predicate!");
983
984 auto [Guarded, Unguarded] = GetTraversalOrder(S);
985
986 bool WasProcessorCheck = EnterPredicateGuardedContext(P);
987
988 bool Continue = TraverseStmt(Guarded);
989
990 ExitPredicateGuardedContext(WasProcessorCheck);
991
992 return Continue && TraverseStmt(Unguarded);
993}
994
995bool DiagnoseUnguardedBuiltins::VisitAsmStmt(AsmStmt *ASM) {
996 // TODO: should we check if the ASM is valid for the target? Can we?
997 if (!CurrentGFXIP.empty())
998 return true;
999
1000 std::string S = ASM->generateAsmString(SemaRef.getASTContext());
1001 SemaRef.Diag(ASM->getAsmLoc(), diag::warn_amdgcn_unguarded_asm_stmt) << S;
1002 SemaRef.Diag(ASM->getAsmLoc(), diag::note_amdgcn_unguarded_asm_silence) << S;
1003
1004 return true;
1005}
1006
1007bool DiagnoseUnguardedBuiltins::VisitCallExpr(CallExpr *CE) {
1008 unsigned ID = CE->getBuiltinCallee();
1009 Builtin::Context &BInfo = SemaRef.getASTContext().BuiltinInfo;
1010
1011 if (!ID)
1012 return true;
1013 if (!BInfo.isTSBuiltin(ID))
1014 return true;
1015 if (ID == AMDGPU::BI__builtin_amdgcn_processor_is ||
1016 ID == AMDGPU::BI__builtin_amdgcn_is_invocable)
1017 return true;
1018 if (llvm::find(GuardedBuiltins, ID) != GuardedBuiltins.end())
1019 return true;
1020
1021 StringRef FL(BInfo.getRequiredFeatures(ID));
1022 llvm::StringMap<bool> FeatureMap;
1023 if (CurrentGFXIP.empty()) {
1024 for (auto &&F : TargetFeatures)
1025 FeatureMap[F] = true;
1026 for (auto &&GID : GuardedBuiltins)
1027 for (auto &&F : llvm::split(BInfo.getRequiredFeatures(GID), ','))
1028 FeatureMap[F] = true;
1029 } else {
1030 static const llvm::Triple AMDGCN(llvm::Triple::amdgcn,
1031 llvm::Triple::NoSubArch, llvm::Triple::AMD,
1032 llvm::Triple::AMDHSA);
1033 llvm::AMDGPU::fillAMDGPUFeatureMap(CurrentGFXIP.back().second, AMDGCN,
1034 FeatureMap);
1035 }
1036
1037 FunctionDecl *BI = CE->getDirectCallee();
1038 SourceLocation BICallLoc = CE->getExprLoc();
1039 if (Builtin::evaluateRequiredTargetFeatures(FL, FeatureMap)) {
1040 SemaRef.Diag(BICallLoc, diag::warn_amdgcn_unguarded_builtin) << BI;
1041 SemaRef.Diag(BICallLoc, diag::note_amdgcn_unguarded_builtin_silence) << BI;
1042 } else {
1043 StringRef GFXIP = CurrentGFXIP.empty() ? "" : CurrentGFXIP.back().second;
1044 SemaRef.Diag(BICallLoc, diag::err_amdgcn_incompatible_builtin)
1045 << BI << FL << !CurrentGFXIP.empty() << GFXIP;
1046 if (!CurrentGFXIP.empty())
1047 SemaRef.Diag(CurrentGFXIP.back().first,
1048 diag::note_amdgcn_previous_is_processor_guard);
1049 }
1050
1051 return true;
1052}
1053} // Unnamed namespace
1054
1056 DiagnoseUnguardedBuiltins(SemaRef).IssueDiagnostics(FD->getBody());
1057}
1058} // namespace clang
#define GFX(gpu)
This file declares semantic analysis functions specific to AMDGPU.
Enumerates target-specific builtins in their own namespaces within namespace clang.
APSInt & getInt()
Definition APValue.h:508
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition ASTContext.h:226
Builtin::Context & BuiltinInfo
Definition ASTContext.h:807
CanQualType getLogicalOperationType() const
The result type of logical operations, '<', '>', '!=', etc.
const TargetInfo * getAuxTargetInfo() const
Definition ASTContext.h:925
CanQualType BuiltinFnTy
const TargetInfo & getTargetInfo() const
Definition ASTContext.h:924
void getFunctionFeatureMap(llvm::StringMap< bool > &FeatureMap, const FunctionDecl *) const
unsigned getTargetAddressSpace(LangAS AS) const
PtrTy get() const
Definition Ownership.h:171
bool isInvalid() const
Definition Ownership.h:167
static std::unique_ptr< AtomicScopeModel > create(AtomicScopeModelKind K)
Create an atomic scope model by AtomicScopeModelKind.
Definition SyncScope.h:298
Attr - This represents one attribute.
Definition Attr.h:46
SourceLocation getLocation() const
Definition Attr.h:99
SourceLocation getLoc() const
bool isAuxBuiltinID(unsigned ID) const
Return true if the builtin ID belongs exclusively to the AuxTarget, and false if it belongs to both p...
Definition Builtins.h:443
unsigned getAuxBuiltinID(unsigned ID) const
Return real builtin ID (i.e.
Definition Builtins.h:449
const char * getRequiredFeatures(unsigned ID) const
Definition Builtins.cpp:102
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition Expr.h:2946
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition Expr.h:3150
SourceLocation getBeginLoc() const
Definition Expr.h:3280
unsigned getBuiltinCallee() const
getBuiltinCallee - If this is a call to a builtin, return the builtin ID of the callee.
Definition Expr.cpp:1597
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
Definition Expr.h:3137
Decl - This represents one declaration (or definition), e.g.
Definition DeclBase.h:86
void addAttr(Attr *A)
virtual bool TraverseStmt(MaybeConst< Stmt > *S)
This represents one expression.
Definition Expr.h:112
bool EvaluateAsInt(EvalResult &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsInt - Return true if this is a constant which we can fold and convert to an integer,...
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition Expr.cpp:3102
void setType(QualType t)
Definition Expr.h:145
bool isValueDependent() const
Determines whether the value of this expression depends on.
Definition Expr.h:177
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point.
Definition Expr.cpp:3093
Decl * getReferencedDeclOfCallee()
Definition Expr.cpp:1551
bool isInstantiationDependent() const
Whether this expression is instantiation-dependent, meaning that it depends in some way on.
Definition Expr.h:223
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition Expr.cpp:282
QualType getType() const
Definition Expr.h:144
Represents a function declaration or definition.
Definition Decl.h:2018
Stmt * getBody(const FunctionDecl *&Definition) const
Retrieve the body (definition) of the function.
Definition Decl.cpp:3253
ParsedAttr - Represents a syntactic attribute.
Definition ParsedAttr.h:119
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this attribute.
Definition ParsedAttr.h:371
Expr * getArgAsExpr(unsigned Arg) const
Definition ParsedAttr.h:383
bool checkAtLeastNumArgs(class Sema &S, unsigned Num) const
Check if the attribute has at least as many args as Num.
bool checkAtMostNumArgs(class Sema &S, unsigned Num) const
Check if the attribute has at most as many args as Num.
A (possibly-)qualified type.
Definition TypeBase.h:937
const Type * getTypePtr() const
Retrieves a pointer to the underlying (unqualified) type.
Definition TypeBase.h:8440
Scope - A scope is a transient data structure that is used while parsing the program.
Definition Scope.h:41
void handleAMDGPUMaxNumWorkGroupsAttr(Decl *D, const ParsedAttr &AL)
void addAMDGPUFlatWorkGroupSizeAttr(Decl *D, const AttributeCommonInfo &CI, Expr *Min, Expr *Max)
addAMDGPUFlatWorkGroupSizeAttr - Adds an amdgpu_flat_work_group_size attribute to a particular declar...
bool checkCoopAtomicFunctionCall(CallExpr *TheCall, bool IsStore)
bool HasPotentiallyUnguardedBuiltinUsage(FunctionDecl *FD) const
void handleAMDGPUFlatWorkGroupSizeAttr(Decl *D, const ParsedAttr &AL)
bool checkAtomicMonitorLoad(CallExpr *TheCall)
bool checkAtomicOrderingCABIArg(Expr *E, bool MayLoad, bool MayStore)
Emits a diagnostic if the E is not an atomic ordering encoded in the C ABI format,...
void handleAMDGPUNumSGPRAttr(Decl *D, const ParsedAttr &AL)
AMDGPUMaxNumWorkGroupsAttr * CreateAMDGPUMaxNumWorkGroupsAttr(const AttributeCommonInfo &CI, Expr *XExpr, Expr *YExpr, Expr *ZExpr)
Create an AMDGPUMaxNumWorkGroupsAttr attribute.
Expr * ExpandAMDGPUPredicateBuiltIn(Expr *CE)
Expand a valid use of the feature identification builtins into its corresponding sequence of instruct...
AMDGPUWavesPerEUAttr * CreateAMDGPUWavesPerEUAttr(const AttributeCommonInfo &CI, Expr *Min, Expr *Max)
Create an AMDGPUWavesPerEUAttr attribute.
void DiagnoseUnguardedBuiltinUsage(FunctionDecl *FD)
void handleAMDGPUNumVGPRAttr(Decl *D, const ParsedAttr &AL)
AMDGPUFlatWorkGroupSizeAttr * CreateAMDGPUFlatWorkGroupSizeAttr(const AttributeCommonInfo &CI, Expr *Min, Expr *Max)
Create an AMDGPUWavesPerEUAttr attribute.
void AddPotentiallyUnguardedBuiltinUser(FunctionDecl *FD)
Diagnose unguarded usages of AMDGPU builtins and recommend guarding with __builtin_amdgcn_is_invocabl...
bool checkMovDPPFunctionCall(CallExpr *TheCall, unsigned NumArgs, unsigned NumDataArgs)
void handleAMDGPUWavesPerEUAttr(Decl *D, const ParsedAttr &AL)
bool IsPredicate(Expr *E) const
bool CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall)
void addAMDGPUWavesPerEUAttr(Decl *D, const AttributeCommonInfo &CI, Expr *Min, Expr *Max)
addAMDGPUWavePersEUAttr - Adds an amdgpu_waves_per_eu attribute to a particular declaration.
void addAMDGPUMaxNumWorkGroupsAttr(Decl *D, const AttributeCommonInfo &CI, Expr *XExpr, Expr *YExpr, Expr *ZExpr)
addAMDGPUMaxNumWorkGroupsAttr - Adds an amdgpu_max_num_work_groups attribute to a particular declarat...
SemaBase(Sema &S)
Definition SemaBase.cpp:7
ASTContext & getASTContext() const
Definition SemaBase.cpp:9
Sema & SemaRef
Definition SemaBase.h:40
SemaDiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID)
Emit a diagnostic.
Definition SemaBase.cpp:61
Sema - This implements semantic analysis and AST building for C.
Definition Sema.h:868
ASTContext & Context
Definition Sema.h:1308
bool DiagnoseUnexpandedParameterPack(SourceLocation Loc, TypeSourceInfo *T, UnexpandedParameterPackContext UPPC)
If the given type contains an unexpanded parameter pack, diagnose the error.
bool checkUInt32Argument(const AttrInfo &AI, const Expr *Expr, uint32_t &Val, unsigned Idx=UINT_MAX, bool StrictlyUnsigned=false)
If Expr is a valid integer constant, get the value of the integer expression and return success or fa...
Definition Sema.h:4906
Encodes a location in the source.
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition Stmt.cpp:343
SourceLocation getBeginLoc() const LLVM_READONLY
Definition Stmt.cpp:355
Exposes information about the current target.
Definition TargetInfo.h:227
virtual std::optional< std::string > getTargetID() const
Returns the target ID if supported.
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
virtual void fillValidCPUList(SmallVectorImpl< StringRef > &Values) const
Fill a SmallVectorImpl with the valid values to setCPU.
virtual bool isValidCPUName(StringRef Name) const
Determine whether this TargetInfo supports the given CPU name.
The base class of the type hierarchy.
Definition TypeBase.h:1875
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition Type.cpp:790
Defines the clang::TargetInfo interface.
bool evaluateRequiredTargetFeatures(llvm::StringRef RequiredFatures, const llvm::StringMap< bool > &TargetFetureMap)
Returns true if the required target features of a builtin function are enabled.
bool LE(InterpState &S, CodePtr OpPC)
Definition Interp.h:1489
The JSON file list parser is used to communicate input to InstallAPI.
bool isa(CodeGen::Address addr)
Definition Address.h:330
static bool checkScopeAsInt(SemaAMDGPU &S, Expr *Scope)
@ If
'if' clause, allowed on all the Compute Constructs, Data Constructs, Executable Constructs,...
Expr * Cond
};
@ Result
The result type of a method or function.
Definition TypeBase.h:905
static bool checkAMDGPUMaxNumWorkGroupsArguments(Sema &S, Expr *XExpr, Expr *YExpr, Expr *ZExpr, const AMDGPUMaxNumWorkGroupsAttr &Attr)
DynamicRecursiveASTVisitorBase< false > DynamicRecursiveASTVisitor
U cast(CodeGen::Address addr)
Definition Address.h:327
static bool checkAMDGPUFlatWorkGroupSizeArguments(Sema &S, Expr *MinExpr, Expr *MaxExpr, const AMDGPUFlatWorkGroupSizeAttr &Attr)
static bool checkGlobalOrFlatPointerArg(SemaAMDGPU &S, CallExpr *TheCall)
ActionResult< Expr * > ExprResult
Definition Ownership.h:249
static bool checkAMDGPUWavesPerEUArguments(Sema &S, Expr *MinExpr, Expr *MaxExpr, const AMDGPUWavesPerEUAttr &Attr)
EvalResult is a struct with detailed info about an evaluated expression.
Definition Expr.h:648
APValue Val
Val - This is the value the expression can be folded to.
Definition Expr.h:650