clang 23.0.0git
CIRGenOpenACCRecipe.cpp
Go to the documentation of this file.
1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Helperes to emit OpenACC clause recipes as CIR code.
10//
11//===----------------------------------------------------------------------===//
12
13#include <numeric>
14
15#include "CIRGenOpenACCRecipe.h"
16
17namespace clang::CIRGen {
18mlir::Block *OpenACCRecipeBuilderBase::createRecipeBlock(mlir::Region &region,
19 mlir::Type opTy,
20 mlir::Location loc,
21 size_t numBounds,
22 bool isInit) {
24 types.reserve(numBounds + 2);
25 types.push_back(opTy);
26 // The init section is the only one that doesn't have TWO copies of the
27 // operation-type. Copy has a to/from, and destroy has a
28 // 'reference'/'privatized' copy version.
29 if (!isInit)
30 types.push_back(opTy);
31
32 auto boundsTy = mlir::acc::DataBoundsType::get(&cgf.getMLIRContext());
33 for (size_t i = 0; i < numBounds; ++i)
34 types.push_back(boundsTy);
35
36 llvm::SmallVector<mlir::Location> locs{types.size(), loc};
37 return builder.createBlock(&region, region.end(), types, locs);
38}
39void OpenACCRecipeBuilderBase::makeAllocaCopy(mlir::Location loc,
40 mlir::Type copyType,
41 mlir::Value numEltsToCopy,
42 mlir::Value offsetPerSubarray,
43 mlir::Value destAlloca,
44 mlir::Value srcAlloca) {
45 mlir::OpBuilder::InsertionGuard guardCase(builder);
46
47 mlir::Type itrTy = cgf.cgm.convertType(cgf.getContext().UnsignedLongLongTy);
48 auto itrPtrTy = cir::PointerType::get(itrTy);
49 mlir::IntegerAttr itrAlign =
52
53 auto loopBuilder = [&]() {
54 auto itr =
55 cir::AllocaOp::create(builder, loc, itrPtrTy, itrTy, "itr", itrAlign);
56 cir::ConstantOp constZero = builder.getConstInt(loc, itrTy, 0);
57 builder.CIRBaseBuilderTy::createStore(loc, constZero, itr);
59 loc,
60 /*condBuilder=*/
61 [&](mlir::OpBuilder &b, mlir::Location loc) {
62 // itr < numEltsToCopy
63 // Enforce a trip count of 1 if there wasn't any element count, this
64 // way we can just use this loop with a constant bounds instead of a
65 // separate code path.
66 if (!numEltsToCopy)
67 numEltsToCopy = builder.getConstInt(loc, itrTy, 1);
68
69 auto loadCur = cir::LoadOp::create(builder, loc, {itr});
70 auto cmp = builder.createCompare(loc, cir::CmpOpKind::lt, loadCur,
71 numEltsToCopy);
73 },
74 /*bodyBuilder=*/
75 [&](mlir::OpBuilder &b, mlir::Location loc) {
76 // destAlloca[itr] = srcAlloca[offsetPerSubArray * itr];
77 auto loadCur = cir::LoadOp::create(builder, loc, {itr});
78 auto srcOffset = builder.createMul(loc, offsetPerSubarray, loadCur);
79
80 auto ptrToOffsetIntoSrc = cir::PtrStrideOp::create(
81 builder, loc, copyType, srcAlloca, srcOffset);
82
83 auto offsetIntoDecayDest = cir::PtrStrideOp::create(
84 builder, loc, builder.getPointerTo(copyType), destAlloca,
85 loadCur);
86
87 builder.CIRBaseBuilderTy::createStore(loc, ptrToOffsetIntoSrc,
88 offsetIntoDecayDest);
89 builder.createYield(loc);
90 },
91 /*stepBuilder=*/
92 [&](mlir::OpBuilder &b, mlir::Location loc) {
93 // Simple increment of the iterator.
94 auto load = cir::LoadOp::create(builder, loc, {itr});
95 auto inc = builder.createInc(loc, load);
96 builder.CIRBaseBuilderTy::createStore(loc, inc, itr);
97 builder.createYield(loc);
98 });
99 };
100
101 cir::ScopeOp::create(builder, loc,
102 [&](mlir::OpBuilder &b, mlir::Location loc) {
103 loopBuilder();
104 builder.createYield(loc);
105 });
106}
107
108mlir::Value OpenACCRecipeBuilderBase::makeBoundsAlloca(
109 mlir::Block *block, SourceRange exprRange, mlir::Location loc,
110 std::string_view allocaName, size_t numBounds,
111 llvm::ArrayRef<QualType> boundTypes) {
112 mlir::OpBuilder::InsertionGuard guardCase(builder);
113
114 // Get the range of bounds arguments, which are all but the 1st arg.
115 llvm::ArrayRef<mlir::BlockArgument> boundsRange =
116 block->getArguments().drop_front(1);
117
118 // boundTypes contains the before and after of each bounds, so it ends up
119 // having 1 extra. Assert this is the case to ensure we don't call this in the
120 // wrong 'block'.
121 assert(boundsRange.size() + 1 == boundTypes.size());
122
123 mlir::Type itrTy = cgf.cgm.convertType(cgf.getContext().UnsignedLongLongTy);
124 auto idxType = mlir::IndexType::get(&cgf.getMLIRContext());
125
126 auto getUpperBound = [&](mlir::Value bound) {
127 auto upperBoundVal =
128 mlir::acc::GetUpperboundOp::create(builder, loc, idxType, bound);
129 return mlir::UnrealizedConversionCastOp::create(builder, loc, itrTy,
130 upperBoundVal.getResult())
131 .getResult(0);
132 };
133
134 auto isArrayTy = [&](QualType ty) {
135 if (ty->isArrayType() && !ty->isConstantArrayType())
136 cgf.cgm.errorNYI(exprRange, "OpenACC recipe init for VLAs");
137 return ty->isConstantArrayType();
138 };
139
140 mlir::Type topLevelTy = cgf.convertType(boundTypes.back());
141 cir::PointerType topLevelTyPtr = builder.getPointerTo(topLevelTy);
142 // Do an alloca for the 'top' level type without bounds.
143 mlir::Value initialAlloca = builder.createAlloca(
144 loc, topLevelTyPtr, topLevelTy, allocaName,
145 cgf.getContext().getTypeAlignInChars(boundTypes.back()));
146
147 bool lastBoundWasArray = isArrayTy(boundTypes.back());
148
149 // Make sure we track a moving version of this so we can get our
150 // 'copying' back to correct.
151 mlir::Value lastAlloca = initialAlloca;
152
153 // Since we're iterating the types in reverse, this sets up for each index
154 // corresponding to the boundsRange to be the 'after application of the
155 // bounds.
156 llvm::ArrayRef<QualType> boundResults = boundTypes.drop_back(1);
157
158 // Collect the 'do we have any allocas needed after this type' list.
159 llvm::SmallVector<bool> allocasLeftArr;
160 llvm::ArrayRef<QualType> resultTypes = boundTypes.drop_front();
161 std::transform_inclusive_scan(
162 resultTypes.begin(), resultTypes.end(),
163 std::back_inserter(allocasLeftArr), std::plus<bool>{},
164 [](QualType ty) { return !ty->isConstantArrayType(); }, false);
165
166 // Keep track of the number of 'elements' that we're allocating. Individual
167 // allocas should multiply this by the size of its current allocation.
168 mlir::Value cumulativeElts;
169 for (auto [bound, resultType, allocasLeft] : llvm::reverse(
170 llvm::zip_equal(boundsRange, boundResults, allocasLeftArr))) {
171
172 // if there is no further 'alloca' operation we need to do, we can skip
173 // creating the UB/multiplications/etc.
174 if (!allocasLeft)
175 break;
176
177 // First: figure out the number of elements in the current 'bound' list.
178 mlir::Value eltsPerSubArray = getUpperBound(bound);
179 mlir::Value eltsToAlloca;
180
181 // IF we are in a sub-bounds, the total number of elements to alloca is
182 // the product of that one and the current 'bounds' size. That is,
183 // arr[5][5], we would need 25 elements, not just 5. Else it is just the
184 // current number of elements.
185 if (cumulativeElts)
186 eltsToAlloca = builder.createMul(loc, eltsPerSubArray, cumulativeElts);
187 else
188 eltsToAlloca = eltsPerSubArray;
189
190 if (!lastBoundWasArray) {
191 // If we have to do an allocation, figure out the size of the
192 // allocation. alloca takes the number of bytes, not elements.
193 TypeInfoChars eltInfo = cgf.getContext().getTypeInfoInChars(resultType);
194 cir::ConstantOp eltSize = builder.getConstInt(
195 loc, itrTy, eltInfo.Width.alignTo(eltInfo.Align).getQuantity());
196 mlir::Value curSize = builder.createMul(loc, eltsToAlloca, eltSize);
197
198 mlir::Type eltTy = cgf.convertType(resultType);
199 cir::PointerType ptrTy = builder.getPointerTo(eltTy);
200 mlir::Value curAlloca = builder.createAlloca(
201 loc, ptrTy, eltTy, "openacc.init.bounds",
202 cgf.getContext().getTypeAlignInChars(resultType), curSize);
203
204 makeAllocaCopy(loc, ptrTy, cumulativeElts, eltsPerSubArray, lastAlloca,
205 curAlloca);
206 lastAlloca = curAlloca;
207 } else {
208 // In the case of an array, we just need to decay the pointer, so just do
209 // a zero-offset stride on the last alloca to decay it down an array
210 // level.
211 cir::ConstantOp constZero = builder.getConstInt(loc, itrTy, 0);
212 lastAlloca = builder.getArrayElement(loc, loc, lastAlloca,
213 cgf.convertType(resultType),
214 constZero, /*shouldDecay=*/true);
215 }
216
217 cumulativeElts = eltsToAlloca;
218 lastBoundWasArray = isArrayTy(resultType);
219 }
220 return initialAlloca;
221}
222
223std::pair<mlir::Value, mlir::Value> OpenACCRecipeBuilderBase::createBoundsLoop(
224 mlir::Value subscriptedValue, mlir::Value subscriptedValue2,
225 mlir::Value bound, mlir::Location loc, bool inverse) {
226 mlir::Operation *bodyInsertLoc;
227
228 mlir::Type itrTy = cgf.cgm.convertType(cgf.getContext().UnsignedLongLongTy);
229 auto itrPtrTy = cir::PointerType::get(itrTy);
230 mlir::IntegerAttr itrAlign =
231 cgf.cgm.getSize(cgf.getContext().getTypeAlignInChars(
232 cgf.getContext().UnsignedLongLongTy));
233 auto idxType = mlir::IndexType::get(&cgf.getMLIRContext());
234
235 auto doSubscriptOp = [&](mlir::Value subVal,
236 cir::LoadOp idxLoad) -> mlir::Value {
237 auto eltTy = cast<cir::PointerType>(subVal.getType()).getPointee();
238
239 if (auto arrayTy = dyn_cast<cir::ArrayType>(eltTy))
240 return builder.getArrayElement(loc, loc, subVal, arrayTy.getElementType(),
241 idxLoad,
242 /*shouldDecay=*/true);
243
244 assert(isa<cir::PointerType>(eltTy));
245
246 auto eltLoad = cir::LoadOp::create(builder, loc, {subVal});
247
248 return cir::PtrStrideOp::create(builder, loc, eltLoad.getType(), eltLoad,
249 idxLoad);
250 };
251
252 auto forStmtBuilder = [&]() {
253 // get the lower and upper bound for iterating over.
254 auto lowerBoundVal =
255 mlir::acc::GetLowerboundOp::create(builder, loc, idxType, bound);
256 auto lbConversion = mlir::UnrealizedConversionCastOp::create(
257 builder, loc, itrTy, lowerBoundVal.getResult());
258 auto upperBoundVal =
259 mlir::acc::GetUpperboundOp::create(builder, loc, idxType, bound);
260 auto ubConversion = mlir::UnrealizedConversionCastOp::create(
261 builder, loc, itrTy, upperBoundVal.getResult());
262
263 // Create a memory location for the iterator.
264 auto itr =
265 cir::AllocaOp::create(builder, loc, itrPtrTy, itrTy, "iter", itrAlign);
266 // Store to the iterator: either lower bound, or if inverse loop, upper
267 // bound.
268 if (inverse) {
269 cir::ConstantOp constOne = builder.getConstInt(loc, itrTy, 1);
270
271 auto sub =
272 cir::SubOp::create(builder, loc, ubConversion.getResult(0), constOne);
273
274 // Upperbound is exclusive, so subtract 1.
275 builder.CIRBaseBuilderTy::createStore(loc, sub, itr);
276 } else {
277 // Lowerbound is inclusive, so we can include it.
278 builder.CIRBaseBuilderTy::createStore(loc, lbConversion.getResult(0),
279 itr);
280 }
281 // Save the 'end' iterator based on whether we are inverted or not. This
282 // end iterator never changes, so we can just get it and convert it, so no
283 // need to store/load/etc.
284 auto endItr = inverse ? lbConversion : ubConversion;
285
286 builder.createFor(
287 loc,
288 /*condBuilder=*/
289 [&](mlir::OpBuilder &b, mlir::Location loc) {
290 auto loadCur = cir::LoadOp::create(builder, loc, {itr});
291 // Use 'not equal' since we are just doing an increment/decrement.
292 auto cmp = builder.createCompare(
293 loc, inverse ? cir::CmpOpKind::ge : cir::CmpOpKind::lt, loadCur,
294 endItr.getResult(0));
295 builder.createCondition(cmp);
296 },
297 /*bodyBuilder=*/
298 [&](mlir::OpBuilder &b, mlir::Location loc) {
299 auto load = cir::LoadOp::create(builder, loc, {itr});
300
301 if (subscriptedValue)
302 subscriptedValue = doSubscriptOp(subscriptedValue, load);
303 if (subscriptedValue2)
304 subscriptedValue2 = doSubscriptOp(subscriptedValue2, load);
305 bodyInsertLoc = builder.createYield(loc);
306 },
307 /*stepBuilder=*/
308 [&](mlir::OpBuilder &b, mlir::Location loc) {
309 auto load = cir::LoadOp::create(builder, loc, {itr});
310 auto unary = inverse ? builder.createDec(loc, load)
311 : builder.createInc(loc, load);
312 builder.CIRBaseBuilderTy::createStore(loc, unary, itr);
313 builder.createYield(loc);
314 });
315 };
316
317 cir::ScopeOp::create(builder, loc,
318 [&](mlir::OpBuilder &b, mlir::Location loc) {
319 forStmtBuilder();
320 builder.createYield(loc);
321 });
322
323 // Leave the insertion point to be inside the body, so we can loop over
324 // these things.
325 builder.setInsertionPoint(bodyInsertLoc);
326 return {subscriptedValue, subscriptedValue2};
327}
328
329mlir::acc::ReductionOperator
331 switch (op) {
333 return mlir::acc::ReductionOperator::AccAdd;
335 return mlir::acc::ReductionOperator::AccMul;
337 return mlir::acc::ReductionOperator::AccMax;
339 return mlir::acc::ReductionOperator::AccMin;
341 return mlir::acc::ReductionOperator::AccIand;
343 return mlir::acc::ReductionOperator::AccIor;
345 return mlir::acc::ReductionOperator::AccXor;
347 return mlir::acc::ReductionOperator::AccLand;
349 return mlir::acc::ReductionOperator::AccLor;
351 llvm_unreachable("invalid reduction operator");
352 }
353
354 llvm_unreachable("invalid reduction operator");
355}
356
357// This function generates the 'destroy' section for a recipe. Note
358// that this function is not 'insertion point' clean, in that it alters the
359// insertion point to be inside of the 'destroy' section of the recipe, but
360// doesn't restore it aftewards.
362 mlir::Location loc, mlir::Location locEnd, mlir::Value mainOp,
363 CharUnits alignment, QualType origType, size_t numBounds, QualType baseType,
364 mlir::Region &destroyRegion) {
365 mlir::Block *block = createRecipeBlock(destroyRegion, mainOp.getType(), loc,
366 numBounds, /*isInit=*/false);
367 builder.setInsertionPointToEnd(&destroyRegion.back());
368 CIRGenFunction::LexicalScope ls(cgf, loc, block);
369
370 mlir::Type elementTy =
371 mlir::cast<cir::PointerType>(mainOp.getType()).getPointee();
372 auto emitDestroy = [&](mlir::Value var, mlir::Type ty) {
373 Address addr{var, ty, alignment};
374 cgf.emitDestroy(addr, origType,
375 cgf.getDestroyer(QualType::DK_cxx_destructor));
376 };
377
378 if (numBounds) {
379 mlir::OpBuilder::InsertionGuard guardCase(builder);
380 // Get the range of bounds arguments, which are all but the 1st 2. 1st is
381 // a 'reference', 2nd is the 'private' variant we need to destroy from.
383 block->getArguments().drop_front(2);
384
385 mlir::Value subscriptedValue = block->getArgument(1);
386 for (mlir::BlockArgument boundArg : llvm::reverse(boundsRange))
387 subscriptedValue = createBoundsLoop(subscriptedValue, boundArg, loc,
388 /*inverse=*/true);
389
390 emitDestroy(subscriptedValue, cgf.cgm.convertType(origType));
391 } else {
392 // If we don't have any bounds, we can just destroy the variable directly.
393 // The destroy region has a signature of "original item, privatized item".
394 // So the 2nd item is the one that needs destroying, the former is just
395 // for reference and we don't really have a need for it at the moment.
396 emitDestroy(block->getArgument(1), elementTy);
397 }
398
399 ls.forceCleanup();
400 mlir::acc::YieldOp::create(builder, locEnd);
401}
402void OpenACCRecipeBuilderBase::makeBoundsInit(
403 mlir::Value alloca, mlir::Location loc, mlir::Block *block,
404 const VarDecl *allocaDecl, QualType origType, bool isInitSection) {
405 mlir::OpBuilder::InsertionGuard guardCase(builder);
406 builder.setInsertionPointToEnd(block);
407 CIRGenFunction::LexicalScope ls(cgf, loc, block);
408
409 CIRGenFunction::AutoVarEmission tempDeclEmission{*allocaDecl};
410 tempDeclEmission.emittedAsOffload = true;
411
412 // The init section is the only one of the handful that only has a single
413 // argument for the 'type', so we have to drop 1 for init, and future calls
414 // to this will need to drop 2.
416 block->getArguments().drop_front(isInitSection ? 1 : 2);
417
418 mlir::Value subscriptedValue = alloca;
419 for (mlir::BlockArgument boundArg : llvm::reverse(boundsRange))
420 subscriptedValue = createBoundsLoop(subscriptedValue, boundArg, loc,
421 /*inverse=*/false);
422
423 tempDeclEmission.setAllocatedAddress(
424 Address{subscriptedValue, cgf.convertType(origType),
425 cgf.getContext().getDeclAlign(allocaDecl)});
426 cgf.emitAutoVarInit(tempDeclEmission);
427}
428
429// TODO: OpenACC: when we start doing firstprivate for array/vlas/etc, we
430// probably need to do a little work about the 'init' calls to put it in 'copy'
431// region instead.
433 mlir::Location loc, mlir::Location locEnd, SourceRange exprRange,
434 mlir::Value mainOp, mlir::Region &recipeInitRegion, size_t numBounds,
435 llvm::ArrayRef<QualType> boundTypes, const VarDecl *allocaDecl,
436 QualType origType, bool emitInitExpr) {
437 assert(allocaDecl && "Required recipe variable not set?");
438 CIRGenFunction::DeclMapRevertingRAII declMapRAII{cgf, allocaDecl};
439
440 mlir::Block *block = createRecipeBlock(recipeInitRegion, mainOp.getType(),
441 loc, numBounds, /*isInit=*/true);
442 builder.setInsertionPointToEnd(&recipeInitRegion.back());
443 CIRGenFunction::LexicalScope ls(cgf, loc, block);
444
445 const Type *allocaPointeeType =
446 allocaDecl->getType()->getPointeeOrArrayElementType();
447 // We are OK with no init for builtins, arrays of builtins, or pointers,
448 // else we should NYI so we know to go look for these.
449 if (cgf.getContext().getLangOpts().CPlusPlus && !allocaDecl->getInit() &&
450 !allocaDecl->getType()->isPointerType() &&
451 !allocaPointeeType->isBuiltinType() &&
452 !allocaPointeeType->isPointerType()) {
453 // If we don't have any initialization recipe, we failed during Sema to
454 // initialize this correctly. If we disable the
455 // Sema::TentativeAnalysisScopes in SemaOpenACC::CreateInitRecipe, it'll
456 // emit an error to tell us. However, emitting those errors during
457 // production is a violation of the standard, so we cannot do them.
458 cgf.cgm.errorNYI(exprRange, "private/reduction default-init recipe");
459 }
460
461 if (!numBounds) {
462 // This is an 'easy' case, we just have to use the builtin init stuff to
463 // initialize this variable correctly.
464 CIRGenFunction::AutoVarEmission tempDeclEmission =
465 cgf.emitAutoVarAlloca(*allocaDecl, builder.saveInsertionPoint());
466 if (emitInitExpr)
467 cgf.emitAutoVarInit(tempDeclEmission);
468 } else {
469 mlir::Value alloca = makeBoundsAlloca(
470 block, exprRange, loc, allocaDecl->getName(), numBounds, boundTypes);
471
472 // If the initializer is trivial, there is nothing to do here, so save
473 // ourselves some effort.
474 if (emitInitExpr && allocaDecl->getInit() &&
475 (!cgf.isTrivialInitializer(allocaDecl->getInit()) ||
476 cgf.getContext().getLangOpts().getTrivialAutoVarInit() !=
478 makeBoundsInit(alloca, loc, block, allocaDecl, origType,
479 /*isInitSection=*/true);
480 }
481
482 ls.forceCleanup();
483 mlir::acc::YieldOp::create(builder, locEnd);
484}
485
487 mlir::Location loc, mlir::Location locEnd, mlir::Value mainOp,
488 const VarDecl *allocaDecl, const VarDecl *temporary,
489 mlir::Region &copyRegion, size_t numBounds) {
490 mlir::Block *block = createRecipeBlock(copyRegion, mainOp.getType(), loc,
491 numBounds, /*isInit=*/false);
492 builder.setInsertionPointToEnd(&copyRegion.back());
493 CIRGenFunction::LexicalScope ls(cgf, loc, block);
494
495 mlir::Value fromArg = block->getArgument(0);
496 mlir::Value toArg = block->getArgument(1);
497
499 block->getArguments().drop_front(2);
500
501 for (mlir::BlockArgument boundArg : llvm::reverse(boundsRange))
502 std::tie(fromArg, toArg) =
503 createBoundsLoop(fromArg, toArg, boundArg, loc, /*inverse=*/false);
504
505 // Set up the 'to' address.
506 mlir::Type elementTy =
507 mlir::cast<cir::PointerType>(toArg.getType()).getPointee();
508 CIRGenFunction::AutoVarEmission tempDeclEmission(*allocaDecl);
509 tempDeclEmission.emittedAsOffload = true;
510 tempDeclEmission.setAllocatedAddress(
511 Address{toArg, elementTy, cgf.getContext().getDeclAlign(allocaDecl)});
512
513 // Set up the 'from' address from the temporary.
514 CIRGenFunction::DeclMapRevertingRAII declMapRAII{cgf, temporary};
515 cgf.setAddrOfLocalVar(
516 temporary,
517 Address{fromArg, elementTy, cgf.getContext().getDeclAlign(allocaDecl)});
518 cgf.emitAutoVarInit(tempDeclEmission);
519
520 builder.setInsertionPointToEnd(&copyRegion.back());
521 ls.forceCleanup();
522 mlir::acc::YieldOp::create(builder, locEnd);
523}
524
525// This function generates the 'combiner' section for a reduction recipe. Note
526// that this function is not 'insertion point' clean, in that it alters the
527// insertion point to be inside of the 'combiner' section of the recipe, but
528// doesn't restore it aftewards.
530 mlir::Location loc, mlir::Location locEnd, mlir::Value mainOp,
531 mlir::acc::ReductionRecipeOp recipe, size_t numBounds, QualType origType,
533 mlir::Block *block =
534 createRecipeBlock(recipe.getCombinerRegion(), mainOp.getType(), loc,
535 numBounds, /*isInit=*/false);
536 builder.setInsertionPointToEnd(&recipe.getCombinerRegion().back());
537 CIRGenFunction::LexicalScope ls(cgf, loc, block);
538
539 mlir::Value lhsArg = block->getArgument(0);
540 mlir::Value rhsArg = block->getArgument(1);
542 block->getArguments().drop_front(2);
543
544 if (llvm::any_of(combinerRecipes, [](auto &r) { return r.Op == nullptr; })) {
545 cgf.cgm.errorNYI(loc, "OpenACC Reduction combiner not generated");
546 mlir::acc::YieldOp::create(builder, locEnd, block->getArgument(0));
547 return;
548 }
549
550 // apply the bounds so that we can get our bounds emitted correctly.
551 for (mlir::BlockArgument boundArg : llvm::reverse(boundsRange))
552 std::tie(lhsArg, rhsArg) =
553 createBoundsLoop(lhsArg, rhsArg, boundArg, loc, /*inverse=*/false);
554
555 // Emitter for when we know this isn't a struct or array we have to loop
556 // through. This should work for the 'field' once the get-element call has
557 // been made.
558 auto emitSingleCombiner =
559 [&](mlir::Value lhsArg, mlir::Value rhsArg,
561 mlir::Type elementTy =
562 mlir::cast<cir::PointerType>(lhsArg.getType()).getPointee();
563 CIRGenFunction::DeclMapRevertingRAII declMapRAIILhs{cgf, combiner.LHS};
564 cgf.setAddrOfLocalVar(
565 combiner.LHS, Address{lhsArg, elementTy,
566 cgf.getContext().getDeclAlign(combiner.LHS)});
567 CIRGenFunction::DeclMapRevertingRAII declMapRAIIRhs{cgf, combiner.RHS};
568 cgf.setAddrOfLocalVar(
569 combiner.RHS, Address{rhsArg, elementTy,
570 cgf.getContext().getDeclAlign(combiner.RHS)});
571
572 [[maybe_unused]] mlir::LogicalResult stmtRes =
573 cgf.emitStmt(combiner.Op, /*useCurrentScope=*/true);
574 };
575
576 // Emitter for when we know this is either a non-array or element of an array
577 // (which also shouldn't be an array type?). This function should generate the
578 // initialization code for an entire 'array-element'/non-array, including
579 // diving into each element of a struct (if necessary).
580 auto emitCombiner = [&](mlir::Value lhsArg, mlir::Value rhsArg, QualType ty) {
581 assert(!ty->isArrayType() && "Array type shouldn't get here");
582 if (const auto *rd = ty->getAsRecordDecl()) {
583 if (combinerRecipes.size() == 1 &&
584 cgf.getContext().hasSameType(ty, combinerRecipes[0].LHS->getType())) {
585 // If this is a 'top level' operator on the type we can just emit this
586 // as a simple one.
587 emitSingleCombiner(lhsArg, rhsArg, combinerRecipes[0]);
588 } else {
589 // else we have to handle each individual field after after a
590 // get-element.
591 const CIRGenRecordLayout &layout =
592 cgf.cgm.getTypes().getCIRGenRecordLayout(rd);
593 for (const auto &[field, combiner] :
594 llvm::zip_equal(rd->fields(), combinerRecipes)) {
595 mlir::Type fieldType = cgf.convertType(field->getType());
596 auto fieldPtr = cir::PointerType::get(fieldType);
597 unsigned fieldIndex = layout.getCIRFieldNo(field);
598
599 mlir::Value lhsField = builder.createGetMember(
600 loc, fieldPtr, lhsArg, field->getName(), fieldIndex);
601 mlir::Value rhsField = builder.createGetMember(
602 loc, fieldPtr, rhsArg, field->getName(), fieldIndex);
603
604 emitSingleCombiner(lhsField, rhsField, combiner);
605 }
606 }
607
608 } else {
609 // if this is a single-thing (because we should know this isn't an array,
610 // as Sema wouldn't let us get here), we can just do a normal emit call.
611 emitSingleCombiner(lhsArg, rhsArg, combinerRecipes[0]);
612 }
613 };
614
615 if (const auto *cat = cgf.getContext().getAsConstantArrayType(origType)) {
616 // If we're in an array, we have to emit the combiner for each element of
617 // the array.
618 auto itrTy = mlir::cast<cir::IntType>(cgf.ptrDiffTy);
619 auto itrPtrTy = cir::PointerType::get(itrTy);
620
621 mlir::Value zero =
622 builder.getConstInt(loc, mlir::cast<cir::IntType>(cgf.ptrDiffTy), 0);
623 mlir::Value itr =
624 cir::AllocaOp::create(builder, loc, itrPtrTy, itrTy, "itr",
625 cgf.cgm.getSize(cgf.getPointerAlign()));
626 builder.CIRBaseBuilderTy::createStore(loc, zero, itr);
627
628 builder.setInsertionPointAfter(builder.createFor(
629 loc,
630 /*condBuilder=*/
631 [&](mlir::OpBuilder &b, mlir::Location loc) {
632 auto loadItr = cir::LoadOp::create(builder, loc, {itr});
633 mlir::Value arraySize = builder.getConstInt(
634 loc, mlir::cast<cir::IntType>(cgf.ptrDiffTy), cat->getZExtSize());
635 auto cmp = builder.createCompare(loc, cir::CmpOpKind::lt, loadItr,
636 arraySize);
637 builder.createCondition(cmp);
638 },
639 /*bodyBuilder=*/
640 [&](mlir::OpBuilder &b, mlir::Location loc) {
641 auto loadItr = cir::LoadOp::create(builder, loc, {itr});
642 auto lhsElt = builder.getArrayElement(
643 loc, loc, lhsArg, cgf.convertType(cat->getElementType()), loadItr,
644 /*shouldDecay=*/true);
645 auto rhsElt = builder.getArrayElement(
646 loc, loc, rhsArg, cgf.convertType(cat->getElementType()), loadItr,
647 /*shouldDecay=*/true);
648
649 emitCombiner(lhsElt, rhsElt, cat->getElementType());
650 builder.createYield(loc);
651 },
652 /*stepBuilder=*/
653 [&](mlir::OpBuilder &b, mlir::Location loc) {
654 auto loadItr = cir::LoadOp::create(builder, loc, {itr});
655 auto inc = builder.createInc(loc, loadItr);
656 builder.CIRBaseBuilderTy::createStore(loc, inc, itr);
657 builder.createYield(loc);
658 }));
659
660 } else if (origType->isArrayType()) {
661 cgf.cgm.errorNYI(loc,
662 "OpenACC Reduction combiner non-constant array recipe");
663 } else {
664 emitCombiner(lhsArg, rhsArg, origType);
665 }
666
667 builder.setInsertionPointToEnd(&recipe.getCombinerRegion().back());
668 ls.forceCleanup();
669 mlir::acc::YieldOp::create(builder, locEnd, block->getArgument(0));
670}
671
672} // namespace clang::CIRGen
__device__ __2f16 b
cir::ConditionOp createCondition(mlir::Value condition)
Create a loop condition.
cir::ForOp createFor(mlir::Location loc, llvm::function_ref< void(mlir::OpBuilder &, mlir::Location)> condBuilder, llvm::function_ref< void(mlir::OpBuilder &, mlir::Location)> bodyBuilder, llvm::function_ref< void(mlir::OpBuilder &, mlir::Location)> stepBuilder)
Create a for operation.
cir::CmpOp createCompare(mlir::Location loc, cir::CmpOpKind kind, mlir::Value lhs, mlir::Value rhs)
CharUnits getTypeAlignInChars(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in characters.
CharUnits getDeclAlign(const Decl *D, bool ForAlignof=false) const
Return a conservative estimate of the alignment of the specified decl D.
CanQualType UnsignedLongLongTy
cir::ConstantOp getConstInt(mlir::Location loc, llvm::APSInt intVal)
void forceCleanup(ArrayRef< mlir::Value * > valuesToReload={})
Force the emission of cleanups now, instead of waiting until this object is destroyed.
mlir::Type convertType(clang::QualType t)
void emitAutoVarInit(const AutoVarEmission &emission)
Emit the initializer for an allocated variable.
clang::ASTContext & getContext() const
mlir::Type convertType(clang::QualType type)
mlir::IntegerAttr getSize(CharUnits size)
This class handles record and union layout info while lowering AST types to CIR types.
unsigned getCIRFieldNo(const clang::FieldDecl *fd) const
Return cir::RecordType element number that corresponds to the field FD.
void createReductionRecipeCombiner(mlir::Location loc, mlir::Location locEnd, mlir::Value mainOp, mlir::acc::ReductionRecipeOp recipe, size_t numBounds, QualType origType, llvm::ArrayRef< OpenACCReductionRecipe::CombinerRecipe > combinerRecipes)
void createInitRecipe(mlir::Location loc, mlir::Location locEnd, SourceRange exprRange, mlir::Value mainOp, mlir::Region &recipeInitRegion, size_t numBounds, llvm::ArrayRef< QualType > boundTypes, const VarDecl *allocaDecl, QualType origType, bool emitInitExpr)
void createFirstprivateRecipeCopy(mlir::Location loc, mlir::Location locEnd, mlir::Value mainOp, const VarDecl *allocaDecl, const VarDecl *temporary, mlir::Region &copyRegion, size_t numBounds)
mlir::acc::ReductionOperator convertReductionOp(OpenACCReductionOperator op)
std::pair< mlir::Value, mlir::Value > createBoundsLoop(mlir::Value subscriptedValue, mlir::Value subscriptedValue2, mlir::Value bound, mlir::Location loc, bool inverse)
void createRecipeDestroySection(mlir::Location loc, mlir::Location locEnd, mlir::Value mainOp, CharUnits alignment, QualType origType, size_t numBounds, QualType baseType, mlir::Region &destroyRegion)
mlir::Block * createRecipeBlock(mlir::Region &region, mlir::Type opTy, mlir::Location loc, size_t numBounds, bool isInit)
CharUnits - This is an opaque type for sizes expressed in character units.
Definition CharUnits.h:38
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition Decl.h:301
A (possibly-)qualified type.
Definition TypeBase.h:937
A trivial tuple used to represent a source range.
const Type * getPointeeOrArrayElementType() const
If this is a pointer type, return the pointee type.
Definition TypeBase.h:9221
bool isArrayType() const
Definition TypeBase.h:8767
bool isPointerType() const
Definition TypeBase.h:8668
bool isBuiltinType() const
Helper methods to distinguish type categories.
Definition TypeBase.h:8791
QualType getType() const
Definition Decl.h:723
Represents a variable declaration or definition.
Definition Decl.h:926
const Expr * getInit() const
Definition Decl.h:1383
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
OpenACCReductionOperator
@ Invalid
Invalid Reduction Clause Kind.
bool isa(CodeGen::Address addr)
Definition Address.h:330
U cast(CodeGen::Address addr)
Definition Address.h:327
bool emittedAsOffload
True if the variable was emitted as an offload recipe, and thus doesn't have the same sort of alloca ...
Represents a scope, including function bodies, compound statements, and the substatements of if/while...