clang 23.0.0git
LoweringPrepare.cpp
Go to the documentation of this file.
1//===- LoweringPrepare.cpp - pareparation work for LLVM lowering ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "PassDetail.h"
10#include "mlir/IR/Attributes.h"
11#include "mlir/IR/BuiltinAttributeInterfaces.h"
12#include "mlir/IR/IRMapping.h"
13#include "mlir/IR/Location.h"
14#include "mlir/IR/Value.h"
16#include "clang/AST/Mangle.h"
17#include "clang/Basic/Cuda.h"
18#include "clang/Basic/Module.h"
32#include "llvm/ADT/StringRef.h"
33#include "llvm/ADT/TypeSwitch.h"
34#include "llvm/IR/Instructions.h"
35#include "llvm/Support/ErrorHandling.h"
36#include "llvm/Support/MemoryBuffer.h"
37#include "llvm/Support/Path.h"
38#include "llvm/Support/VirtualFileSystem.h"
39
40#include <memory>
41#include <optional>
42
43using namespace mlir;
44using namespace cir;
45
46namespace mlir {
47#define GEN_PASS_DEF_LOWERINGPREPARE
48#include "clang/CIR/Dialect/Passes.h.inc"
49} // namespace mlir
50
51static SmallString<128> getTransformedFileName(mlir::ModuleOp mlirModule) {
52 SmallString<128> fileName;
53
54 if (mlirModule.getSymName())
55 fileName = llvm::sys::path::filename(mlirModule.getSymName()->str());
56
57 if (fileName.empty())
58 fileName = "<null>";
59
60 for (size_t i = 0; i < fileName.size(); ++i) {
61 // Replace everything that's not [a-zA-Z0-9._] with a _. This set happens
62 // to be the set of C preprocessing numbers.
63 if (!clang::isPreprocessingNumberBody(fileName[i]))
64 fileName[i] = '_';
65 }
66
67 return fileName;
68}
69
70namespace {
71struct LoweringPreparePass
72 : public impl::LoweringPrepareBase<LoweringPreparePass> {
73 LoweringPreparePass() = default;
74
75 // `mlir::SymbolTableCollection` is move-only (it owns lazily-created
76 // `unique_ptr<SymbolTable>` entries), which makes the implicit copy
77 // constructor ill-formed. MLIR's `clonePass()` requires copy
78 // construction, so define one explicitly. Per-run state members
79 // (dynamic initializers, guard maps, symbol-table cache, etc.) all
80 // start fresh in the cloned pass, which matches MLIR convention for
81 // pass clones and is more correct than the previous default-generated
82 // behavior that silently copied them.
83 LoweringPreparePass(const LoweringPreparePass &other)
84 : impl::LoweringPrepareBase<LoweringPreparePass>(other) {}
85
86 void runOnOperation() override;
87
88 void runOnOp(mlir::Operation *op);
89 void lowerCastOp(cir::CastOp op);
90 void lowerComplexDivOp(cir::ComplexDivOp op);
91 void lowerComplexMulOp(cir::ComplexMulOp op);
92 void lowerUnaryOp(cir::UnaryOpInterface op);
93 void lowerGetGlobalOp(cir::GetGlobalOp op);
94 void lowerGlobalOp(cir::GlobalOp op);
95 void lowerThreeWayCmpOp(cir::CmpThreeWayOp op);
96 void lowerArrayDtor(cir::ArrayDtor op);
97 void lowerArrayCtor(cir::ArrayCtor op);
98 void lowerTrivialCopyCall(cir::CallOp op);
99 void lowerStoreOfConstAggregate(cir::StoreOp op);
100 void lowerLocalInitOp(cir::LocalInitOp op);
101
102 /// Return the FuncOp called by `callOp`. Uses the cached `symbolTables`
103 /// member to avoid the O(M) module-wide scan that the static
104 /// `mlir::SymbolTable::lookupNearestSymbolFrom` would do per call.
105 cir::FuncOp getCalledFunction(cir::CallOp callOp);
106
107 /// Return a private constant cir::GlobalOp with the given type and initial
108 /// value, suitable for backing a memcpy-initialized local aggregate.
109 ///
110 /// If a global with `baseName` (or one of its `.<n>` versioned siblings)
111 /// already has a matching type and initial value, that global is reused.
112 /// Otherwise a new global is created with the next available `.<n>` suffix
113 /// (matching CIRGenBuilder::createVersionedGlobal and OGCG behavior).
114 cir::GlobalOp getOrCreateConstAggregateGlobal(CIRBaseBuilderTy &builder,
115 mlir::Location loc,
116 llvm::StringRef baseName,
117 mlir::Type ty,
118 mlir::TypedAttr constant);
119
120 /// Build the function that initializes the specified global
121 cir::FuncOp buildCXXGlobalVarDeclInitFunc(cir::GlobalOp op);
122
123 /// When looking at the 'global' op, create the wrapper function.
124 void defineGlobalThreadLocalWrapper(cir::GlobalOp op, cir::FuncOp initAlias,
125 bool isVarDefinition);
126 /// Create an initialization alias for a thread-local variable.
127 cir::FuncOp defineGlobalThreadLocalInitAlias(cir::GlobalOp op,
128 cir::FuncOp aliasee);
129 /// Get the declaration for the 'wrapper' function for a global-TLS variable.
130 cir::FuncOp getOrCreateThreadLocalWrapper(CIRBaseBuilderTy &builder,
131 cir::GlobalOp op);
132 // Function that generates the guard global variable, get-global, and 'if'
133 // condition for global TLS init function generation. This inserts an 'if'
134 // with the store at the beginning of the 'then' region, so inserts into the
135 // body should happen after that.
136 cir::IfOp buildGlobalTlsGuardCheck(CIRBaseBuilderTy &builder,
137 mlir::Location loc, cir::GlobalOp guard);
138 /// Handle the dtor region by registering destructor with __cxa_atexit
139 cir::FuncOp getOrCreateDtorFunc(CIRBaseBuilderTy &builder, cir::GlobalOp op,
140 mlir::Region &dtorRegion,
141 cir::CallOp &dtorCall);
142
143 /// Build a module init function that calls all the dynamic initializers.
144 void buildCXXGlobalInitFunc();
145 // Build an init function for all of the ordered global thread local storage
146 // variables.
147 void buildCXXGlobalTlsFunc();
148
149 /// Materialize global ctor/dtor list
150 void buildGlobalCtorDtorList();
151
152 cir::FuncOp buildRuntimeFunction(
153 mlir::OpBuilder &builder, llvm::StringRef name, mlir::Location loc,
154 cir::FuncType type,
155 cir::GlobalLinkageKind linkage = cir::GlobalLinkageKind::ExternalLinkage);
156
157 cir::GlobalOp getOrCreateRuntimeVariable(
158 mlir::OpBuilder &builder, llvm::StringRef name, mlir::Location loc,
159 mlir::Type type,
160 cir::GlobalLinkageKind linkage = cir::GlobalLinkageKind::ExternalLinkage,
161 cir::VisibilityKind visibility = cir::VisibilityKind::Default);
162
163 /// ------------
164 /// CUDA registration related
165 /// ------------
166
167 llvm::StringMap<FuncOp> cudaKernelMap;
168
169 /// Build the CUDA module constructor that registers the fat binary
170 /// with the CUDA runtime.
171 void buildCUDAModuleCtor();
172 std::optional<FuncOp> buildCUDAModuleDtor();
173 std::optional<FuncOp> buildHIPModuleDtor();
174 std::optional<FuncOp> buildCUDARegisterGlobals();
175 void buildCUDARegisterGlobalFunctions(cir::CIRBaseBuilderTy &builder,
176 FuncOp regGlobalFunc);
177
178 /// Handle static local variable initialization with guard variables.
179 void handleStaticLocal(cir::GlobalOp globalOp, cir::LocalInitOp localInitOp);
180
181 /// Get or create __cxa_guard_acquire function.
182 cir::FuncOp getGuardAcquireFn(cir::PointerType guardPtrTy);
183
184 /// Get or create __cxa_guard_release function.
185 cir::FuncOp getGuardReleaseFn(cir::PointerType guardPtrTy);
186
187 /// Get or create the __init_tls function.
188 cir::FuncOp getTlsInitFn();
189
190 // Create the __tls_guard variable.
191 cir::GlobalOp createGlobalThreadLocalGuard(CIRBaseBuilderTy &builder,
192 mlir::Location loc);
193
194 /// Create a guard global variable for a static local.
195 cir::GlobalOp createGuardGlobalOp(CIRBaseBuilderTy &builder,
196 mlir::Location loc, llvm::StringRef name,
197 cir::IntType guardTy,
198 cir::GlobalLinkageKind linkage);
199
200 /// Get the guard variable for a static local declaration.
201 cir::GlobalOp getStaticLocalDeclGuardAddress(llvm::StringRef globalSymName) {
202 auto it = staticLocalDeclGuardMap.find(globalSymName);
203 if (it != staticLocalDeclGuardMap.end())
204 return it->second;
205 return nullptr;
206 }
207
208 /// Set the guard variable for a static local declaration.
209 void setStaticLocalDeclGuardAddress(llvm::StringRef globalSymName,
210 cir::GlobalOp guard) {
211 staticLocalDeclGuardMap[globalSymName] = guard;
212 }
213
214 /// Get or create the guard variable for a static local declaration.
215 cir::GlobalOp getOrCreateStaticLocalDeclGuardAddress(
216 CIRBaseBuilderTy &builder, cir::GlobalOp globalOp, StringRef guardName,
217 bool isLocalVarDecl, bool useInt8GuardVariable) {
218
219 cir::CIRDataLayout dataLayout(mlirModule);
220 cir::IntType guardTy;
221 clang::CharUnits guardAlignment;
222 // Guard variables are 64 bits in the generic ABI and size width on ARM
223 // (i.e. 32-bit on AArch32, 64-bit on AArch64).
224 if (useInt8GuardVariable) {
225 guardTy = cir::IntType::get(&getContext(), 8, /*isSigned=*/true);
226 guardAlignment = clang::CharUnits::One();
227 } else if (useARMGuardVarABI()) {
228 // Guard variables are size width on ARM (32-bit AArch32, 64-bit AArch64).
229 const unsigned sizeTypeSize =
230 astCtx->getTypeSize(astCtx->getSignedSizeType());
231 guardTy =
232 cir::IntType::get(&getContext(), sizeTypeSize, /*isSigned=*/true);
233 guardAlignment =
234 clang::CharUnits::fromQuantity(dataLayout.getABITypeAlign(guardTy));
235 } else {
236 guardTy = cir::IntType::get(&getContext(), 64, /*isSigned=*/true);
237 guardAlignment =
238 clang::CharUnits::fromQuantity(dataLayout.getABITypeAlign(guardTy));
239 }
240 assert(guardTy && guardAlignment.getQuantity() != 0);
241
242 llvm::StringRef globalSymName = globalOp.getSymName();
243 cir::GlobalOp guard = getStaticLocalDeclGuardAddress(globalSymName);
244 if (!guard) {
245 // Create the guard variable with a zero-initializer.
246 guard = createGuardGlobalOp(builder, globalOp->getLoc(), guardName,
247 guardTy, globalOp.getLinkage());
248 guard.setInitialValueAttr(cir::IntAttr::get(guardTy, 0));
249 guard.setDSOLocal(globalOp.getDsoLocal());
250 guard.setAlignment(guardAlignment.getAsAlign().value());
251 guard.setTlsModel(globalOp.getTlsModel());
252
253 // The ABI says: "It is suggested that it be emitted in the same COMDAT
254 // group as the associated data object." In practice, this doesn't work
255 // for non-ELF and non-Wasm object formats, so only do it for ELF and
256 // Wasm.
257 bool hasComdat = globalOp.getComdat();
258 const llvm::Triple &triple = astCtx->getTargetInfo().getTriple();
259 // TODO(cir): for now, we're just setting comdat to true, but it should
260 // contain a comdat reference name here instead.
261 if (!isLocalVarDecl && hasComdat &&
262 (triple.isOSBinFormatELF() || triple.isOSBinFormatWasm())) {
263 // This should be a comdat for the variable.
264 guard.setComdat(true);
265 } else if (hasComdat && globalOp.isWeakForLinker()) {
266 guard.setComdat(true);
267 }
268
269 setStaticLocalDeclGuardAddress(globalSymName, guard);
270 }
271 return guard;
272 }
273
274 ///
275 /// AST related
276 /// -----------
277
278 clang::ASTContext *astCtx;
279
280 /// Tracks current module.
281 mlir::ModuleOp mlirModule;
282
283 /// Cached symbol tables used to avoid repeated O(M) module-wide scans
284 /// during per-call/per-global symbol lookups. Lazily populated on first
285 /// use. Pass methods access this directly rather than threading it
286 /// through helper signatures (see PR feedback on #195919).
287 ///
288 /// Invariant: every site that mutates the module's symbol table either
289 /// (a) keeps `symbolTables` in sync via
290 /// `symbolTables.getSymbolTable(mlirModule).insert(...)` (as
291 /// `getOrCreateConstAggregateGlobal` does), or (b) creates a symbol
292 /// that is never resolved through the cache later. Today
293 /// `buildRuntimeFunction` and `getOrCreateRuntimeVariable` fall in the
294 /// (b) bucket: their callers either use a separate map
295 /// (`cudaKernelMap`, `staticLocalDeclGuardMap`, `dynamicInitializers`)
296 /// or the static `mlir::SymbolTable::lookupNearestSymbolFrom`, never
297 /// the cached path. If a future change adds a cached lookup of a
298 /// freshly created symbol, the corresponding create site MUST move
299 /// to bucket (a) (insert into the cache or call
300 /// `invalidateSymbolTable`).
301 mlir::SymbolTableCollection symbolTables;
302
303 /// Tracks existing dynamic initializers.
304 llvm::StringMap<uint32_t> dynamicInitializerNames;
305 llvm::SmallVector<cir::FuncOp> dynamicInitializers;
306 llvm::SmallVector<cir::FuncOp> globalThreadLocalInitializers;
307 llvm::StringMap<cir::FuncOp> threadLocalWrappers;
308 llvm::StringMap<cir::FuncOp> threadLocalInitAliases;
309
310 /// Tracks guard variables for static locals (keyed by global symbol name).
311 llvm::StringMap<cir::GlobalOp> staticLocalDeclGuardMap;
312
313 llvm::StringMap<llvm::SmallVector<cir::GlobalOp, 1>> constAggregateGlobals;
314
315 /// List of ctors and their priorities to be called before main()
316 llvm::SmallVector<std::pair<std::string, uint32_t>, 4> globalCtorList;
317 /// List of dtors and their priorities to be called when unloading module.
318 llvm::SmallVector<std::pair<std::string, uint32_t>, 4> globalDtorList;
319
320 /// Returns true if the target uses ARM-style guard variables for static
321 /// local initialization (32-bit guard, check bit 0 only).
322 bool useARMGuardVarABI() const {
323 switch (astCtx->getCXXABIKind()) {
324 case clang::TargetCXXABI::GenericARM:
325 case clang::TargetCXXABI::iOS:
326 case clang::TargetCXXABI::WatchOS:
327 case clang::TargetCXXABI::GenericAArch64:
328 case clang::TargetCXXABI::WebAssembly:
329 return true;
330 default:
331 return false;
332 }
333 }
334
335 void emitGlobalGuardedDtorRegion(CIRBaseBuilderTy &builder,
336 cir::GlobalOp global,
337 mlir::Region &dtorRegion, bool tls,
338 mlir::Block &entryBB) {
339 // Create a variable that binds the atexit to this shared object.
340 builder.setInsertionPointToStart(&mlirModule.getBodyRegion().front());
341 cir::GlobalOp handle = getOrCreateRuntimeVariable(
342 builder, "__dso_handle", global.getLoc(), builder.getI8Type(),
343 cir::GlobalLinkageKind::ExternalLinkage, cir::VisibilityKind::Hidden);
344
345 // If this is a simple call to a destructor, get the called function.
346 // Otherwise, create a helper function for the entire dtor region,
347 // replacing the current dtor region body with a call to the helper
348 // function.
349 cir::CallOp dtorCall;
350 cir::FuncOp dtorFunc =
351 getOrCreateDtorFunc(builder, global, dtorRegion, dtorCall);
352
353 // Create a runtime helper function:
354 // extern "C" int __cxa_atexit(void (*f)(void *), void *p, void *d);
355 cir::PointerType voidPtrTy = builder.getVoidPtrTy();
356 cir::PointerType voidFnPtrTy = builder.getVoidFnPtrTy({voidPtrTy});
357 cir::PointerType handlePtrTy = builder.getPointerTo(handle.getSymType());
358 auto fnAtExitType =
359 builder.getVoidFnTy({voidFnPtrTy, voidPtrTy, handlePtrTy});
360
361 llvm::StringLiteral nameAtExit = "__cxa_atexit";
362 if (tls)
363 nameAtExit = astCtx->getTargetInfo().getTriple().isOSDarwin()
364 ? llvm::StringLiteral("_tlv_atexit")
365 : llvm::StringLiteral("__cxa_thread_atexit");
366
367 cir::FuncOp fnAtExit = buildRuntimeFunction(builder, nameAtExit,
368 global.getLoc(), fnAtExitType);
369
370 // Replace the dtor (or helper) call with a call to
371 // __cxa_atexit(&dtor, &var, &__dso_handle)
372 builder.setInsertionPointAfter(dtorCall);
373 mlir::Value args[3];
374 auto dtorPtrTy = cir::PointerType::get(dtorFunc.getFunctionType());
375 args[0] = cir::GetGlobalOp::create(builder, dtorCall.getLoc(), dtorPtrTy,
376 dtorFunc.getSymName());
377 args[0] = cir::CastOp::create(builder, dtorCall.getLoc(), voidFnPtrTy,
378 cir::CastKind::bitcast, args[0]);
379 args[1] =
380 cir::CastOp::create(builder, dtorCall.getLoc(), voidPtrTy,
381 cir::CastKind::bitcast, dtorCall.getArgOperand(0));
382 args[2] = cir::GetGlobalOp::create(builder, handle.getLoc(), handlePtrTy,
383 handle.getSymName());
384 builder.createCallOp(dtorCall.getLoc(), fnAtExit, args);
385 dtorCall->erase();
386 mlir::Block &dtorBlock = dtorRegion.front();
387 entryBB.getOperations().splice(entryBB.end(), dtorBlock.getOperations(),
388 dtorBlock.begin(),
389 std::prev(dtorBlock.end()));
390 // make sure we leave the insert location after the operations we just
391 // inserted.
392 builder.setInsertionPointToEnd(&entryBB);
393 }
394
395 /// Emit the guarded initialization for a static local variable.
396 /// This handles the if/else structure after the guard byte check,
397 /// following OG's ItaniumCXXABI::EmitGuardedInit skeleton.
398 void emitCXXGuardedInitIf(CIRBaseBuilderTy &builder, cir::GlobalOp globalOp,
399 mlir::Region &ctorRegion, mlir::Region &dtorRegion,
400 cir::ASTVarDeclInterface varDecl,
401 mlir::Value guardPtr, cir::PointerType guardPtrTy,
402 bool threadsafe) {
403 auto loc = globalOp->getLoc();
404
405 // The semantics of dynamic initialization of variables with static or
406 // thread storage duration depends on whether they are declared at
407 // block-scope. The initialization of such variables at block-scope can be
408 // aborted with an exception and later retried (per C++20 [stmt.dcl]p4),
409 // and recursive entry to their initialization has undefined behavior (also
410 // per C++20 [stmt.dcl]p4). For such variables declared at non-block scope,
411 // exceptions lead to termination (per C++20 [except.terminate]p1), and
412 // recursive references to the variables are governed only by the lifetime
413 // rules (per C++20 [class.cdtor]p2), which means such references are
414 // perfectly fine as long as they avoid touching memory. As a result,
415 // block-scope variables must not be marked as initialized until after
416 // initialization completes (unless the mark is reverted following an
417 // exception), but non-block-scope variables must be marked prior to
418 // initialization so that recursive accesses during initialization do not
419 // restart initialization.
420
421 auto emitBody = [&]() {
422 // Emit the initializer and add a global destructor if appropriate.
423 mlir::Block *insertBlock = builder.getInsertionBlock();
424 if (!ctorRegion.empty()) {
425 assert(ctorRegion.hasOneBlock() && "Enforced by MaxSizedRegion<1>");
426
427 mlir::Block &block = ctorRegion.front();
428 insertBlock->getOperations().splice(
429 insertBlock->end(), block.getOperations(), block.begin(),
430 std::prev(block.end()));
431 }
432
433 if (!dtorRegion.empty()) {
434 assert(dtorRegion.hasOneBlock() && "Enforced by MaxSizedRegion<1>");
435
436 emitGlobalGuardedDtorRegion(builder, globalOp, dtorRegion, !threadsafe,
437 *insertBlock);
438 }
439 builder.setInsertionPointToEnd(insertBlock);
440 ctorRegion.getBlocks().clear();
441 };
442
443 // Variables used when coping with thread-safe statics and exceptions.
444 if (threadsafe) {
445 // Call __cxa_guard_acquire.
446 cir::CallOp acquireCall = builder.createCallOp(
447 loc, getGuardAcquireFn(guardPtrTy), mlir::ValueRange{guardPtr});
448 mlir::Value acquireResult = acquireCall.getResult();
449
450 auto acquireZero = builder.getConstantInt(
451 loc, mlir::cast<cir::IntType>(acquireResult.getType()), 0);
452 auto shouldInit = builder.createCompare(loc, cir::CmpOpKind::ne,
453 acquireResult, acquireZero);
454
455 // Create the IfOp for the shouldInit check.
456 // Pass an empty callback to avoid auto-creating a yield terminator.
457 auto ifOp =
458 cir::IfOp::create(builder, loc, shouldInit, /*withElseRegion=*/false,
459 [](mlir::OpBuilder &, mlir::Location) {});
460 mlir::OpBuilder::InsertionGuard insertGuard(builder);
461 builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
462
463 // Call __cxa_guard_abort along the exceptional edge.
464 // OG: CGF.EHStack.pushCleanup<CallGuardAbort>(EHCleanup, guard);
466
467 emitBody();
468
469 // Pop the guard-abort cleanup if we pushed one.
470 // OG: CGF.PopCleanupBlock();
472
473 // Call __cxa_guard_release. This cannot throw.
474 builder.createCallOp(loc, getGuardReleaseFn(guardPtrTy),
475 mlir::ValueRange{guardPtr});
476
477 builder.createYield(loc);
478 } else if (!varDecl.isLocalVarDecl()) {
479 // For non-local variables, store 1 into the first byte of the guard
480 // variable before the object initialization begins so that references
481 // to the variable during initialization don't restart initialization.
482 // OG: Builder.CreateStore(llvm::ConstantInt::get(CGM.Int8Ty, 1), ...);
483 // Then: CGF.EmitCXXGlobalVarDeclInit(D, var, shouldPerformInit);
484 globalOp->emitError("NYI: non-threadsafe init for non-local variables");
485 return;
486 } else {
487 emitBody();
488 // For local variables, store 1 into the first byte of the guard variable
489 // after the object initialization completes so that initialization is
490 // retried if initialization is interrupted by an exception.
491 builder.createStore(
492 loc, builder.getConstantInt(loc, guardPtrTy.getPointee(), 1),
493 guardPtr);
494 }
495
496 builder.createYield(loc); // Outermost IfOp
497 }
498
499 void setASTContext(clang::ASTContext *c) { astCtx = c; }
500};
501
502} // namespace
503
504cir::GlobalOp LoweringPreparePass::getOrCreateRuntimeVariable(
505 mlir::OpBuilder &builder, llvm::StringRef name, mlir::Location loc,
506 mlir::Type type, cir::GlobalLinkageKind linkage,
507 cir::VisibilityKind visibility) {
508 cir::GlobalOp g = dyn_cast_or_null<cir::GlobalOp>(
509 mlir::SymbolTable::lookupNearestSymbolFrom(
510 mlirModule, mlir::StringAttr::get(mlirModule->getContext(), name)));
511 if (!g) {
512 g = cir::GlobalOp::create(builder, loc, name, type);
513 g.setLinkageAttr(
514 cir::GlobalLinkageKindAttr::get(builder.getContext(), linkage));
515 mlir::SymbolTable::setSymbolVisibility(
516 g, mlir::SymbolTable::Visibility::Private);
517 g.setGlobalVisibility(visibility);
518 }
519 return g;
520}
521
522cir::FuncOp LoweringPreparePass::buildRuntimeFunction(
523 mlir::OpBuilder &builder, llvm::StringRef name, mlir::Location loc,
524 cir::FuncType type, cir::GlobalLinkageKind linkage) {
525 cir::FuncOp f = dyn_cast_or_null<FuncOp>(SymbolTable::lookupNearestSymbolFrom(
526 mlirModule, StringAttr::get(mlirModule->getContext(), name)));
527 if (!f) {
528 f = cir::FuncOp::create(builder, loc, name, type);
529 f.setLinkageAttr(
530 cir::GlobalLinkageKindAttr::get(builder.getContext(), linkage));
531 mlir::SymbolTable::setSymbolVisibility(
532 f, mlir::SymbolTable::Visibility::Private);
533
535 }
536 return f;
537}
538
539static mlir::Value lowerScalarToComplexCast(mlir::MLIRContext &ctx,
540 cir::CastOp op) {
541 cir::CIRBaseBuilderTy builder(ctx);
542 builder.setInsertionPoint(op);
543
544 mlir::Value src = op.getSrc();
545 mlir::Value imag = builder.getNullValue(src.getType(), op.getLoc());
546 return builder.createComplexCreate(op.getLoc(), src, imag);
547}
548
549static mlir::Value lowerComplexToScalarCast(mlir::MLIRContext &ctx,
550 cir::CastOp op,
551 cir::CastKind elemToBoolKind) {
552 cir::CIRBaseBuilderTy builder(ctx);
553 builder.setInsertionPoint(op);
554
555 mlir::Value src = op.getSrc();
556 if (!mlir::isa<cir::BoolType>(op.getType()))
557 return builder.createComplexReal(op.getLoc(), src);
558
559 // Complex cast to bool: (bool)(a+bi) => (bool)a || (bool)b
560 mlir::Value srcReal = builder.createComplexReal(op.getLoc(), src);
561 mlir::Value srcImag = builder.createComplexImag(op.getLoc(), src);
562
563 cir::BoolType boolTy = builder.getBoolTy();
564 mlir::Value srcRealToBool =
565 builder.createCast(op.getLoc(), elemToBoolKind, srcReal, boolTy);
566 mlir::Value srcImagToBool =
567 builder.createCast(op.getLoc(), elemToBoolKind, srcImag, boolTy);
568 return builder.createLogicalOr(op.getLoc(), srcRealToBool, srcImagToBool);
569}
570
571static mlir::Value lowerComplexToComplexCast(mlir::MLIRContext &ctx,
572 cir::CastOp op,
573 cir::CastKind scalarCastKind) {
574 CIRBaseBuilderTy builder(ctx);
575 builder.setInsertionPoint(op);
576
577 mlir::Value src = op.getSrc();
578 auto dstComplexElemTy =
579 mlir::cast<cir::ComplexType>(op.getType()).getElementType();
580
581 mlir::Value srcReal = builder.createComplexReal(op.getLoc(), src);
582 mlir::Value srcImag = builder.createComplexImag(op.getLoc(), src);
583
584 mlir::Value dstReal = builder.createCast(op.getLoc(), scalarCastKind, srcReal,
585 dstComplexElemTy);
586 mlir::Value dstImag = builder.createCast(op.getLoc(), scalarCastKind, srcImag,
587 dstComplexElemTy);
588 return builder.createComplexCreate(op.getLoc(), dstReal, dstImag);
589}
590
591void LoweringPreparePass::lowerCastOp(cir::CastOp op) {
592 mlir::MLIRContext &ctx = getContext();
593 mlir::Value loweredValue = [&]() -> mlir::Value {
594 switch (op.getKind()) {
595 case cir::CastKind::float_to_complex:
596 case cir::CastKind::int_to_complex:
597 return lowerScalarToComplexCast(ctx, op);
598 case cir::CastKind::float_complex_to_real:
599 case cir::CastKind::int_complex_to_real:
600 return lowerComplexToScalarCast(ctx, op, op.getKind());
601 case cir::CastKind::float_complex_to_bool:
602 return lowerComplexToScalarCast(ctx, op, cir::CastKind::float_to_bool);
603 case cir::CastKind::int_complex_to_bool:
604 return lowerComplexToScalarCast(ctx, op, cir::CastKind::int_to_bool);
605 case cir::CastKind::float_complex:
606 return lowerComplexToComplexCast(ctx, op, cir::CastKind::floating);
607 case cir::CastKind::float_complex_to_int_complex:
608 return lowerComplexToComplexCast(ctx, op, cir::CastKind::float_to_int);
609 case cir::CastKind::int_complex:
610 return lowerComplexToComplexCast(ctx, op, cir::CastKind::integral);
611 case cir::CastKind::int_complex_to_float_complex:
612 return lowerComplexToComplexCast(ctx, op, cir::CastKind::int_to_float);
613 default:
614 return nullptr;
615 }
616 }();
617
618 if (loweredValue) {
619 op.replaceAllUsesWith(loweredValue);
620 op.erase();
621 }
622}
623
624static mlir::Value buildComplexBinOpLibCall(
625 LoweringPreparePass &pass, CIRBaseBuilderTy &builder,
626 llvm::StringRef (*libFuncNameGetter)(llvm::APFloat::Semantics),
627 mlir::Location loc, cir::ComplexType ty, mlir::Value lhsReal,
628 mlir::Value lhsImag, mlir::Value rhsReal, mlir::Value rhsImag) {
629 cir::FPTypeInterface elementTy =
630 mlir::cast<cir::FPTypeInterface>(ty.getElementType());
631
632 llvm::StringRef libFuncName = libFuncNameGetter(
633 llvm::APFloat::SemanticsToEnum(elementTy.getFloatSemantics()));
634 llvm::SmallVector<mlir::Type, 4> libFuncInputTypes(4, elementTy);
635
636 cir::FuncType libFuncTy = cir::FuncType::get(libFuncInputTypes, ty);
637
638 // Insert a declaration for the runtime function to be used in Complex
639 // multiplication and division when needed
640 cir::FuncOp libFunc;
641 {
642 mlir::OpBuilder::InsertionGuard ipGuard{builder};
643 builder.setInsertionPointToStart(pass.mlirModule.getBody());
644 libFunc = pass.buildRuntimeFunction(builder, libFuncName, loc, libFuncTy);
645 }
646
647 cir::CallOp call =
648 builder.createCallOp(loc, libFunc, {lhsReal, lhsImag, rhsReal, rhsImag});
649 return call.getResult();
650}
651
652static llvm::StringRef
653getComplexDivLibCallName(llvm::APFloat::Semantics semantics) {
654 switch (semantics) {
655 case llvm::APFloat::S_IEEEhalf:
656 return "__divhc3";
657 case llvm::APFloat::S_IEEEsingle:
658 return "__divsc3";
659 case llvm::APFloat::S_IEEEdouble:
660 return "__divdc3";
661 case llvm::APFloat::S_PPCDoubleDouble:
662 return "__divtc3";
663 case llvm::APFloat::S_x87DoubleExtended:
664 return "__divxc3";
665 case llvm::APFloat::S_IEEEquad:
666 return "__divtc3";
667 default:
668 llvm_unreachable("unsupported floating point type");
669 }
670}
671
672static mlir::Value
673buildAlgebraicComplexDiv(CIRBaseBuilderTy &builder, mlir::Location loc,
674 mlir::Value lhsReal, mlir::Value lhsImag,
675 mlir::Value rhsReal, mlir::Value rhsImag) {
676 // (a+bi) / (c+di) = ((ac+bd)/(cc+dd)) + ((bc-ad)/(cc+dd))i
677 mlir::Value &a = lhsReal;
678 mlir::Value &b = lhsImag;
679 mlir::Value &c = rhsReal;
680 mlir::Value &d = rhsImag;
681
682 mlir::Value ac = builder.createMul(loc, a, c); // a*c
683 mlir::Value bd = builder.createMul(loc, b, d); // b*d
684 mlir::Value cc = builder.createMul(loc, c, c); // c*c
685 mlir::Value dd = builder.createMul(loc, d, d); // d*d
686 mlir::Value acbd = builder.createAdd(loc, ac, bd); // ac+bd
687 mlir::Value ccdd = builder.createAdd(loc, cc, dd); // cc+dd
688 mlir::Value resultReal = builder.createDiv(loc, acbd, ccdd);
689
690 mlir::Value bc = builder.createMul(loc, b, c); // b*c
691 mlir::Value ad = builder.createMul(loc, a, d); // a*d
692 mlir::Value bcad = builder.createSub(loc, bc, ad); // bc-ad
693 mlir::Value resultImag = builder.createDiv(loc, bcad, ccdd);
694 return builder.createComplexCreate(loc, resultReal, resultImag);
695}
696
697static mlir::Value
699 mlir::Value lhsReal, mlir::Value lhsImag,
700 mlir::Value rhsReal, mlir::Value rhsImag) {
701 // Implements Smith's algorithm for complex division.
702 // SMITH, R. L. Algorithm 116: Complex division. Commun. ACM 5, 8 (1962).
703
704 // Let:
705 // - lhs := a+bi
706 // - rhs := c+di
707 // - result := lhs / rhs = e+fi
708 //
709 // The algorithm pseudocode looks like follows:
710 // if fabs(c) >= fabs(d):
711 // r := d / c
712 // tmp := c + r*d
713 // e = (a + b*r) / tmp
714 // f = (b - a*r) / tmp
715 // else:
716 // r := c / d
717 // tmp := d + r*c
718 // e = (a*r + b) / tmp
719 // f = (b*r - a) / tmp
720
721 mlir::Value &a = lhsReal;
722 mlir::Value &b = lhsImag;
723 mlir::Value &c = rhsReal;
724 mlir::Value &d = rhsImag;
725
726 auto trueBranchBuilder = [&](mlir::OpBuilder &, mlir::Location) {
727 mlir::Value r = builder.createDiv(loc, d, c); // r := d / c
728 mlir::Value rd = builder.createMul(loc, r, d); // r*d
729 mlir::Value tmp = builder.createAdd(loc, c, rd); // tmp := c + r*d
730
731 mlir::Value br = builder.createMul(loc, b, r); // b*r
732 mlir::Value abr = builder.createAdd(loc, a, br); // a + b*r
733 mlir::Value e = builder.createDiv(loc, abr, tmp);
734
735 mlir::Value ar = builder.createMul(loc, a, r); // a*r
736 mlir::Value bar = builder.createSub(loc, b, ar); // b - a*r
737 mlir::Value f = builder.createDiv(loc, bar, tmp);
738
739 mlir::Value result = builder.createComplexCreate(loc, e, f);
740 builder.createYield(loc, result);
741 };
742
743 auto falseBranchBuilder = [&](mlir::OpBuilder &, mlir::Location) {
744 mlir::Value r = builder.createDiv(loc, c, d); // r := c / d
745 mlir::Value rc = builder.createMul(loc, r, c); // r*c
746 mlir::Value tmp = builder.createAdd(loc, d, rc); // tmp := d + r*c
747
748 mlir::Value ar = builder.createMul(loc, a, r); // a*r
749 mlir::Value arb = builder.createAdd(loc, ar, b); // a*r + b
750 mlir::Value e = builder.createDiv(loc, arb, tmp);
751
752 mlir::Value br = builder.createMul(loc, b, r); // b*r
753 mlir::Value bra = builder.createSub(loc, br, a); // b*r - a
754 mlir::Value f = builder.createDiv(loc, bra, tmp);
755
756 mlir::Value result = builder.createComplexCreate(loc, e, f);
757 builder.createYield(loc, result);
758 };
759
760 auto cFabs = cir::FAbsOp::create(builder, loc, c);
761 auto dFabs = cir::FAbsOp::create(builder, loc, d);
762 cir::CmpOp cmpResult =
763 builder.createCompare(loc, cir::CmpOpKind::ge, cFabs, dFabs);
764 auto ternary = cir::TernaryOp::create(builder, loc, cmpResult,
765 trueBranchBuilder, falseBranchBuilder);
766
767 return ternary.getResult();
768}
769
771 mlir::MLIRContext &context, clang::ASTContext &cc,
772 CIRBaseBuilderTy &builder, mlir::Type elementType) {
773
774 auto getHigherPrecisionFPType = [&context](mlir::Type type) -> mlir::Type {
775 if (mlir::isa<cir::FP16Type>(type))
776 return cir::SingleType::get(&context);
777
778 if (mlir::isa<cir::SingleType>(type) || mlir::isa<cir::BF16Type>(type))
779 return cir::DoubleType::get(&context);
780
781 if (mlir::isa<cir::DoubleType>(type))
782 return cir::LongDoubleType::get(&context, type);
783
784 return type;
785 };
786
787 auto getFloatTypeSemantics =
788 [&cc](mlir::Type type) -> const llvm::fltSemantics & {
789 const clang::TargetInfo &info = cc.getTargetInfo();
790 if (mlir::isa<cir::FP16Type>(type))
791 return info.getHalfFormat();
792
793 if (mlir::isa<cir::BF16Type>(type))
794 return info.getBFloat16Format();
795
796 if (mlir::isa<cir::SingleType>(type))
797 return info.getFloatFormat();
798
799 if (mlir::isa<cir::DoubleType>(type))
800 return info.getDoubleFormat();
801
802 if (mlir::isa<cir::LongDoubleType>(type)) {
803 if (cc.getLangOpts().OpenMP && cc.getLangOpts().OpenMPIsTargetDevice)
804 llvm_unreachable("NYI Float type semantics with OpenMP");
805 return info.getLongDoubleFormat();
806 }
807
808 if (mlir::isa<cir::FP128Type>(type)) {
809 if (cc.getLangOpts().OpenMP && cc.getLangOpts().OpenMPIsTargetDevice)
810 llvm_unreachable("NYI Float type semantics with OpenMP");
811 return info.getFloat128Format();
812 }
813
814 llvm_unreachable("Unsupported float type semantics");
815 };
816
817 const mlir::Type higherElementType = getHigherPrecisionFPType(elementType);
818 const llvm::fltSemantics &elementTypeSemantics =
819 getFloatTypeSemantics(elementType);
820 const llvm::fltSemantics &higherElementTypeSemantics =
821 getFloatTypeSemantics(higherElementType);
822
823 // Check that the promoted type can handle the intermediate values without
824 // overflowing. This can be interpreted as:
825 // (SmallerType.LargestFiniteVal * SmallerType.LargestFiniteVal) * 2 <=
826 // LargerType.LargestFiniteVal.
827 // In terms of exponent it gives this formula:
828 // (SmallerType.LargestFiniteVal * SmallerType.LargestFiniteVal
829 // doubles the exponent of SmallerType.LargestFiniteVal)
830 if (llvm::APFloat::semanticsMaxExponent(elementTypeSemantics) * 2 + 1 <=
831 llvm::APFloat::semanticsMaxExponent(higherElementTypeSemantics)) {
832 return higherElementType;
833 }
834
835 // The intermediate values can't be represented in the promoted type
836 // without overflowing.
837 return {};
838}
839
840static mlir::Value
841lowerComplexDiv(LoweringPreparePass &pass, CIRBaseBuilderTy &builder,
842 mlir::Location loc, cir::ComplexDivOp op, mlir::Value lhsReal,
843 mlir::Value lhsImag, mlir::Value rhsReal, mlir::Value rhsImag,
844 mlir::MLIRContext &mlirCx, clang::ASTContext &cc) {
845 cir::ComplexType complexTy = op.getType();
846 if (mlir::isa<cir::FPTypeInterface>(complexTy.getElementType())) {
847 cir::ComplexRangeKind range = op.getRange();
848 if (range == cir::ComplexRangeKind::Improved)
849 return buildRangeReductionComplexDiv(builder, loc, lhsReal, lhsImag,
850 rhsReal, rhsImag);
851
852 if (range == cir::ComplexRangeKind::Full)
854 loc, complexTy, lhsReal, lhsImag, rhsReal,
855 rhsImag);
856
857 if (range == cir::ComplexRangeKind::Promoted) {
858 mlir::Type originalElementType = complexTy.getElementType();
859 mlir::Type higherPrecisionElementType =
861 originalElementType);
862
863 if (!higherPrecisionElementType)
864 return buildRangeReductionComplexDiv(builder, loc, lhsReal, lhsImag,
865 rhsReal, rhsImag);
866
867 cir::CastKind floatingCastKind = cir::CastKind::floating;
868 lhsReal = builder.createCast(floatingCastKind, lhsReal,
869 higherPrecisionElementType);
870 lhsImag = builder.createCast(floatingCastKind, lhsImag,
871 higherPrecisionElementType);
872 rhsReal = builder.createCast(floatingCastKind, rhsReal,
873 higherPrecisionElementType);
874 rhsImag = builder.createCast(floatingCastKind, rhsImag,
875 higherPrecisionElementType);
876
877 mlir::Value algebraicResult = buildAlgebraicComplexDiv(
878 builder, loc, lhsReal, lhsImag, rhsReal, rhsImag);
879
880 mlir::Value resultReal = builder.createComplexReal(loc, algebraicResult);
881 mlir::Value resultImag = builder.createComplexImag(loc, algebraicResult);
882
883 mlir::Value finalReal =
884 builder.createCast(floatingCastKind, resultReal, originalElementType);
885 mlir::Value finalImag =
886 builder.createCast(floatingCastKind, resultImag, originalElementType);
887 return builder.createComplexCreate(loc, finalReal, finalImag);
888 }
889 }
890
891 return buildAlgebraicComplexDiv(builder, loc, lhsReal, lhsImag, rhsReal,
892 rhsImag);
893}
894
895void LoweringPreparePass::lowerComplexDivOp(cir::ComplexDivOp op) {
896 cir::CIRBaseBuilderTy builder(getContext());
897 builder.setInsertionPointAfter(op);
898 mlir::Location loc = op.getLoc();
899 mlir::TypedValue<cir::ComplexType> lhs = op.getLhs();
900 mlir::TypedValue<cir::ComplexType> rhs = op.getRhs();
901 mlir::Value lhsReal = builder.createComplexReal(loc, lhs);
902 mlir::Value lhsImag = builder.createComplexImag(loc, lhs);
903 mlir::Value rhsReal = builder.createComplexReal(loc, rhs);
904 mlir::Value rhsImag = builder.createComplexImag(loc, rhs);
905
906 mlir::Value loweredResult =
907 lowerComplexDiv(*this, builder, loc, op, lhsReal, lhsImag, rhsReal,
908 rhsImag, getContext(), *astCtx);
909 op.replaceAllUsesWith(loweredResult);
910 op.erase();
911}
912
913static llvm::StringRef
914getComplexMulLibCallName(llvm::APFloat::Semantics semantics) {
915 switch (semantics) {
916 case llvm::APFloat::S_IEEEhalf:
917 return "__mulhc3";
918 case llvm::APFloat::S_IEEEsingle:
919 return "__mulsc3";
920 case llvm::APFloat::S_IEEEdouble:
921 return "__muldc3";
922 case llvm::APFloat::S_PPCDoubleDouble:
923 return "__multc3";
924 case llvm::APFloat::S_x87DoubleExtended:
925 return "__mulxc3";
926 case llvm::APFloat::S_IEEEquad:
927 return "__multc3";
928 default:
929 llvm_unreachable("unsupported floating point type");
930 }
931}
932
933static mlir::Value lowerComplexMul(LoweringPreparePass &pass,
934 CIRBaseBuilderTy &builder,
935 mlir::Location loc, cir::ComplexMulOp op,
936 mlir::Value lhsReal, mlir::Value lhsImag,
937 mlir::Value rhsReal, mlir::Value rhsImag) {
938 // (a+bi) * (c+di) = (ac-bd) + (ad+bc)i
939 mlir::Value resultRealLhs = builder.createMul(loc, lhsReal, rhsReal); // ac
940 mlir::Value resultRealRhs = builder.createMul(loc, lhsImag, rhsImag); // bd
941 mlir::Value resultImagLhs = builder.createMul(loc, lhsReal, rhsImag); // ad
942 mlir::Value resultImagRhs = builder.createMul(loc, lhsImag, rhsReal); // bc
943 mlir::Value resultReal = builder.createSub(loc, resultRealLhs, resultRealRhs);
944 mlir::Value resultImag = builder.createAdd(loc, resultImagLhs, resultImagRhs);
945 mlir::Value algebraicResult =
946 builder.createComplexCreate(loc, resultReal, resultImag);
947
948 cir::ComplexType complexTy = op.getType();
949 cir::ComplexRangeKind rangeKind = op.getRange();
950 if (mlir::isa<cir::IntType>(complexTy.getElementType()) ||
951 rangeKind == cir::ComplexRangeKind::Basic ||
952 rangeKind == cir::ComplexRangeKind::Improved ||
953 rangeKind == cir::ComplexRangeKind::Promoted)
954 return algebraicResult;
955
957
958 // Check whether the real part and the imaginary part of the result are both
959 // NaN. If so, emit a library call to compute the multiplication instead.
960 // We check a value against NaN by comparing the value against itself.
961 mlir::Value resultRealIsNaN = builder.createIsNaN(loc, resultReal);
962 mlir::Value resultImagIsNaN = builder.createIsNaN(loc, resultImag);
963 mlir::Value resultRealAndImagAreNaN =
964 builder.createLogicalAnd(loc, resultRealIsNaN, resultImagIsNaN);
965
966 return cir::TernaryOp::create(
967 builder, loc, resultRealAndImagAreNaN,
968 [&](mlir::OpBuilder &, mlir::Location) {
969 mlir::Value libCallResult = buildComplexBinOpLibCall(
970 pass, builder, &getComplexMulLibCallName, loc, complexTy,
971 lhsReal, lhsImag, rhsReal, rhsImag);
972 builder.createYield(loc, libCallResult);
973 },
974 [&](mlir::OpBuilder &, mlir::Location) {
975 builder.createYield(loc, algebraicResult);
976 })
977 .getResult();
978}
979
980void LoweringPreparePass::lowerComplexMulOp(cir::ComplexMulOp op) {
981 cir::CIRBaseBuilderTy builder(getContext());
982 builder.setInsertionPointAfter(op);
983 mlir::Location loc = op.getLoc();
984 mlir::TypedValue<cir::ComplexType> lhs = op.getLhs();
985 mlir::TypedValue<cir::ComplexType> rhs = op.getRhs();
986 mlir::Value lhsReal = builder.createComplexReal(loc, lhs);
987 mlir::Value lhsImag = builder.createComplexImag(loc, lhs);
988 mlir::Value rhsReal = builder.createComplexReal(loc, rhs);
989 mlir::Value rhsImag = builder.createComplexImag(loc, rhs);
990 mlir::Value loweredResult = lowerComplexMul(*this, builder, loc, op, lhsReal,
991 lhsImag, rhsReal, rhsImag);
992 op.replaceAllUsesWith(loweredResult);
993 op.erase();
994}
995
996void LoweringPreparePass::lowerUnaryOp(cir::UnaryOpInterface op) {
997 if (!mlir::isa<cir::ComplexType>(op.getResult().getType()))
998 return;
999
1000 mlir::Location loc = op->getLoc();
1001 CIRBaseBuilderTy builder(getContext());
1002 builder.setInsertionPointAfter(op);
1003
1004 mlir::Value operand = op.getInput();
1005 mlir::Value operandReal = builder.createComplexReal(loc, operand);
1006 mlir::Value operandImag = builder.createComplexImag(loc, operand);
1007
1008 mlir::Value resultReal = operandReal;
1009 mlir::Value resultImag = operandImag;
1010
1011 llvm::TypeSwitch<mlir::Operation *>(op)
1012 .Case<cir::IncOp>(
1013 [&](auto) { resultReal = builder.createInc(loc, operandReal); })
1014 .Case<cir::DecOp>(
1015 [&](auto) { resultReal = builder.createDec(loc, operandReal); })
1016 .Case<cir::MinusOp>([&](auto) {
1017 resultReal = builder.createMinus(loc, operandReal);
1018 resultImag = builder.createMinus(loc, operandImag);
1019 })
1020 .Case<cir::NotOp>(
1021 [&](auto) { resultImag = builder.createMinus(loc, operandImag); })
1022 .Default([](auto) { llvm_unreachable("unhandled unary complex op"); });
1023
1024 mlir::Value result = builder.createComplexCreate(loc, resultReal, resultImag);
1025 op->replaceAllUsesWith(mlir::ValueRange{result});
1026 op->erase();
1027}
1028
1029cir::FuncOp LoweringPreparePass::getOrCreateDtorFunc(CIRBaseBuilderTy &builder,
1030 cir::GlobalOp op,
1031 mlir::Region &dtorRegion,
1032 cir::CallOp &dtorCall) {
1033 mlir::OpBuilder::InsertionGuard guard(builder);
1035
1036 cir::VoidType voidTy = builder.getVoidTy();
1037 auto voidPtrTy = cir::PointerType::get(voidTy);
1038
1039 // Look for operations in dtorBlock
1040 mlir::Block &dtorBlock = dtorRegion.front();
1041
1042 // The first operation should be a get_global to retrieve the address
1043 // of the global variable we're destroying.
1044 auto opIt = dtorBlock.getOperations().begin();
1045 cir::GetGlobalOp ggop = mlir::cast<cir::GetGlobalOp>(*opIt);
1046
1047 // The simple case is just a call to a destructor, like this:
1048 //
1049 // %0 = cir.get_global %globalS : !cir.ptr<!rec_S>
1050 // cir.call %_ZN1SD1Ev(%0) : (!cir.ptr<!rec_S>) -> ()
1051 // (implicit cir.yield)
1052 //
1053 // That is, if the second operation is a call that takes the get_global result
1054 // as its only operand, and the only other operation is a yield, then we can
1055 // just return the called function.
1056 if (dtorBlock.getOperations().size() == 3) {
1057 auto callOp = mlir::dyn_cast<cir::CallOp>(&*(++opIt));
1058 auto yieldOp = mlir::dyn_cast<cir::YieldOp>(&*(++opIt));
1059 if (yieldOp && callOp && callOp.getNumOperands() == 1 &&
1060 callOp.getArgOperand(0) == ggop) {
1061 dtorCall = callOp;
1062 return getCalledFunction(callOp);
1063 }
1064 }
1065
1066 // Otherwise, we need to create a helper function to replace the dtor region.
1067 // This name is kind of arbitrary, but it matches the name that classic
1068 // codegen uses, based on the expected case that gets us here.
1069 builder.setInsertionPointAfter(op);
1070 SmallString<256> fnName("__cxx_global_array_dtor");
1071 uint32_t cnt = dynamicInitializerNames[fnName]++;
1072 if (cnt)
1073 fnName += "." + std::to_string(cnt);
1074
1075 // Create the helper function.
1076 auto fnType = cir::FuncType::get({voidPtrTy}, voidTy);
1077 cir::FuncOp dtorFunc =
1078 buildRuntimeFunction(builder, fnName, op.getLoc(), fnType,
1079 cir::GlobalLinkageKind::InternalLinkage);
1080
1081 SmallVector<mlir::NamedAttribute> paramAttrs;
1082 paramAttrs.push_back(
1083 builder.getNamedAttr("llvm.noundef", builder.getUnitAttr()));
1084 SmallVector<mlir::Attribute> argAttrDicts;
1085 argAttrDicts.push_back(
1086 mlir::DictionaryAttr::get(builder.getContext(), paramAttrs));
1087 dtorFunc.setArgAttrsAttr(
1088 mlir::ArrayAttr::get(builder.getContext(), argAttrDicts));
1089
1090 mlir::Block *entryBB = dtorFunc.addEntryBlock();
1091
1092 // Move everything from the dtor region into the helper function.
1093 entryBB->getOperations().splice(entryBB->begin(), dtorBlock.getOperations(),
1094 dtorBlock.begin(), dtorBlock.end());
1095
1096 // Before erasing this, clone it back into the dtor region
1097 cir::GetGlobalOp dtorGGop =
1098 mlir::cast<cir::GetGlobalOp>(entryBB->getOperations().front());
1099 builder.setInsertionPointToStart(&dtorBlock);
1100 builder.clone(*dtorGGop.getOperation());
1101
1102 // Replace all uses of the help function's get_global with the function
1103 // argument.
1104 mlir::Value dtorArg = entryBB->getArgument(0);
1105 dtorGGop.replaceAllUsesWith(dtorArg);
1106 dtorGGop.erase();
1107
1108 // Replace the yield in the final block with a return
1109 mlir::Block &finalBlock = dtorFunc.getBody().back();
1110 auto yieldOp = cast<cir::YieldOp>(finalBlock.getTerminator());
1111 builder.setInsertionPoint(yieldOp);
1112 cir::ReturnOp::create(builder, yieldOp->getLoc());
1113 yieldOp->erase();
1114
1115 // Create a call to the helper function, passing the original get_global op
1116 // as the argument.
1117 cir::GetGlobalOp origGGop =
1118 mlir::cast<cir::GetGlobalOp>(dtorBlock.getOperations().front());
1119 builder.setInsertionPointAfter(origGGop);
1120 mlir::Value ggopResult = origGGop.getResult();
1121 dtorCall = builder.createCallOp(op.getLoc(), dtorFunc, ggopResult);
1122
1123 // Add a yield after the call.
1124 auto finalYield = cir::YieldOp::create(builder, op.getLoc());
1125
1126 // Erase everything after the yield.
1127 dtorBlock.getOperations().erase(std::next(mlir::Block::iterator(finalYield)),
1128 dtorBlock.end());
1129 dtorRegion.getBlocks().erase(std::next(dtorRegion.begin()), dtorRegion.end());
1130
1131 return dtorFunc;
1132}
1133
1134cir::FuncOp
1135LoweringPreparePass::buildCXXGlobalVarDeclInitFunc(cir::GlobalOp op) {
1136 // TODO(cir): Store this in the GlobalOp.
1137 // This should come from the MangleContext, but for now I'm hardcoding it.
1138 SmallString<256> fnName("__cxx_global_var_init");
1139 // Get a unique name
1140 uint32_t cnt = dynamicInitializerNames[fnName]++;
1141 if (cnt)
1142 fnName += "." + std::to_string(cnt);
1143
1144 // Create a variable initialization function.
1145 CIRBaseBuilderTy builder(getContext());
1146 builder.setInsertionPointAfter(op);
1147 cir::VoidType voidTy = builder.getVoidTy();
1148 auto fnType = cir::FuncType::get({}, voidTy);
1149 FuncOp f = buildRuntimeFunction(builder, fnName, op.getLoc(), fnType,
1150 cir::GlobalLinkageKind::InternalLinkage);
1151
1152 // Move over the initialization code of the ctor region.
1153 // The ctor region may have multiple blocks when exception handling
1154 // scaffolding creates extra blocks (e.g., unreachable/trap blocks).
1155 // We move all operations from the first block (minus the yield) into
1156 // the function entry, and discard extra blocks (which contain only
1157 // unreachable terminators from EH cleanup paths).
1158 mlir::Block *entryBB = f.addEntryBlock();
1159 builder.setInsertionPointToStart(entryBB);
1160
1161 // If this is a global TLS variable (that is, declared at namespace scope), we
1162 // have to emit the guard variable here.
1163 bool needsTlsGuard = op.getDynTlsRefs() && op.getDynTlsRefs()->getGuardName();
1164 cir::IfOp guardIf;
1165 if (needsTlsGuard) {
1166 guardIf = buildGlobalTlsGuardCheck(
1167 builder, op.getLoc(),
1168 getOrCreateStaticLocalDeclGuardAddress(
1169 builder, op, op.getDynTlsRefs()->getGuardName().getValue(),
1170 /*isLocalVarDecl=*/false,
1171 /*useInt8GuardVariable=*/op.hasInternalLinkage()));
1172 builder.setInsertionPointToEnd(&guardIf.getThenRegion().front());
1173 }
1174
1175 if (!op.getCtorRegion().empty()) {
1176 mlir::Block &block = op.getCtorRegion().front();
1177 mlir::Block *insertBlock = builder.getBlock();
1178 insertBlock->getOperations().splice(insertBlock->end(),
1179 block.getOperations(), block.begin(),
1180 std::prev(block.end()));
1181 }
1182
1183 // Register the destructor call with __cxa_atexit
1184 mlir::Region &dtorRegion = op.getDtorRegion();
1185 if (!dtorRegion.empty()) {
1187
1188 emitGlobalGuardedDtorRegion(builder, op, dtorRegion,
1189 op.getTlsModel().has_value(),
1190 *builder.getBlock());
1191 }
1192
1193 // If we're actually in the 'if' above, create a yield.
1194 if (needsTlsGuard) {
1195 builder.setInsertionPointToEnd(&guardIf.getThenRegion().back());
1196 cir::YieldOp::create(builder, op.getLoc());
1197 }
1198
1199 // Replace cir.yield with cir.return
1200 builder.setInsertionPointToEnd(entryBB);
1201 mlir::Operation *yieldOp = nullptr;
1202 if (!op.getCtorRegion().empty()) {
1203 mlir::Block &block = op.getCtorRegion().front();
1204 yieldOp = &block.getOperations().back();
1205 } else {
1206 assert(!dtorRegion.empty());
1207 mlir::Block &block = dtorRegion.front();
1208 yieldOp = &block.getOperations().back();
1209 }
1210
1211 assert(isa<cir::YieldOp>(*yieldOp));
1212 cir::ReturnOp::create(builder, yieldOp->getLoc());
1213 return f;
1214}
1215
1216cir::FuncOp
1217LoweringPreparePass::getGuardAcquireFn(cir::PointerType guardPtrTy) {
1218 // int __cxa_guard_acquire(__guard *guard_object);
1219 CIRBaseBuilderTy builder(getContext());
1220 mlir::OpBuilder::InsertionGuard ipGuard{builder};
1221 builder.setInsertionPointToStart(mlirModule.getBody());
1222 mlir::Location loc = mlirModule.getLoc();
1223 cir::IntType intTy = cir::IntType::get(&getContext(), 32, /*isSigned=*/true);
1224 auto fnType = cir::FuncType::get({guardPtrTy}, intTy);
1225 return buildRuntimeFunction(builder, "__cxa_guard_acquire", loc, fnType);
1226}
1227
1228cir::FuncOp
1229LoweringPreparePass::getGuardReleaseFn(cir::PointerType guardPtrTy) {
1230 // void __cxa_guard_release(__guard *guard_object);
1231 CIRBaseBuilderTy builder(getContext());
1232 mlir::OpBuilder::InsertionGuard ipGuard{builder};
1233 builder.setInsertionPointToStart(mlirModule.getBody());
1234 mlir::Location loc = mlirModule.getLoc();
1235 cir::VoidType voidTy = cir::VoidType::get(&getContext());
1236 auto fnType = cir::FuncType::get({guardPtrTy}, voidTy);
1237 return buildRuntimeFunction(builder, "__cxa_guard_release", loc, fnType);
1238}
1239
1240cir::FuncOp LoweringPreparePass::getTlsInitFn() {
1241 // void __tls_init(void);
1242 CIRBaseBuilderTy builder(getContext());
1243 mlir::OpBuilder::InsertionGuard _{builder};
1244 builder.setInsertionPointToStart(mlirModule.getBody());
1245 mlir::Location loc = mlirModule.getLoc();
1246 auto fnType = builder.getVoidFnTy();
1247 return buildRuntimeFunction(builder, "__tls_init", loc, fnType,
1248 cir::GlobalLinkageKind::InternalLinkage);
1249}
1250
1251cir::GlobalOp LoweringPreparePass::createGuardGlobalOp(
1252 CIRBaseBuilderTy &builder, mlir::Location loc, llvm::StringRef name,
1253 cir::IntType guardTy, cir::GlobalLinkageKind linkage) {
1254 mlir::OpBuilder::InsertionGuard guard(builder);
1255 builder.setInsertionPointToStart(mlirModule.getBody());
1256 cir::GlobalOp g = cir::GlobalOp::create(builder, loc, name, guardTy);
1257 g.setLinkageAttr(
1258 cir::GlobalLinkageKindAttr::get(builder.getContext(), linkage));
1259 mlir::SymbolTable::setSymbolVisibility(
1260 g, mlir::SymbolTable::Visibility::Private);
1261 return g;
1262}
1263
1264void LoweringPreparePass::handleStaticLocal(cir::GlobalOp globalOp,
1265 cir::LocalInitOp localInitOp) {
1266 CIRBaseBuilderTy builder(getContext());
1267
1268 std::optional<cir::ASTVarDeclInterface> astOption = globalOp.getAst();
1269 assert(astOption.has_value());
1270 cir::ASTVarDeclInterface varDecl = astOption.value();
1271
1272 builder.setInsertionPointAfter(localInitOp);
1273 mlir::Block *localInitBlock = builder.getInsertionBlock();
1274
1275 // Remove the terminator temporarily - we'll add it back at the end.
1276 mlir::Operation *ret = localInitBlock->getTerminator();
1277 ret->remove();
1278 // Note: These two insert-point-after sets are necessary, as the 'trailing'
1279 // operation has changed thanks to the terminator removal.
1280 builder.setInsertionPointAfter(localInitOp);
1281
1282 // Inline variables that weren't instantiated from variable templates have
1283 // partially-ordered initialization within their translation unit.
1284 bool nonTemplateInline =
1285 varDecl.isInline() &&
1286 !clang::isTemplateInstantiation(varDecl.getTemplateSpecializationKind());
1287
1288 // Inline namespace-scope variables require guarded initialization in a
1289 // __cxx_global_var_init function. This is not yet implemented.
1290 if (nonTemplateInline) {
1291 globalOp->emitError(
1292 "NYI: guarded initialization for inline namespace-scope variables");
1293 return;
1294 }
1295
1296 // We only need to use thread-safe statics for local non-TLS variables and
1297 // inline variables; other global initialization is always single-threaded
1298 // or (through lazy dynamic loading in multiple threads) unsequenced.
1299 bool threadsafe = astCtx->getLangOpts().ThreadsafeStatics &&
1300 (varDecl.isLocalVarDecl() || nonTemplateInline) &&
1301 !varDecl.getTLSKind();
1302
1303 // If we have a global variable with internal linkage and thread-safe statics
1304 // are disabled, we can just let the guard variable be of type i8.
1305 bool useInt8GuardVariable = !threadsafe && globalOp.hasInternalLinkage();
1306
1307 // Create the guard variable if we don't already have it.
1308 cir::GlobalOp guard = getOrCreateStaticLocalDeclGuardAddress(
1309 builder, globalOp, globalOp.getStaticLocalGuard()->getName().getValue(),
1310 varDecl.isLocalVarDecl(), useInt8GuardVariable);
1311 if (!guard) {
1312 // Error was already emitted, just restore the terminator and return.
1313 localInitBlock->push_back(ret);
1314 return;
1315 }
1316
1317 mlir::Value guardPtr = builder.createGetGlobal(guard, localInitOp.getTls());
1318
1319 // Test whether the variable has completed initialization.
1320 //
1321 // Itanium C++ ABI 3.3.2:
1322 // The following is pseudo-code showing how these functions can be used:
1323 // if (obj_guard.first_byte == 0) {
1324 // if ( __cxa_guard_acquire (&obj_guard) ) {
1325 // try {
1326 // ... initialize the object ...;
1327 // } catch (...) {
1328 // __cxa_guard_abort (&obj_guard);
1329 // throw;
1330 // }
1331 // ... queue object destructor with __cxa_atexit() ...;
1332 // __cxa_guard_release (&obj_guard);
1333 // }
1334 // }
1335 //
1336 // If threadsafe statics are enabled, but we don't have inline atomics, just
1337 // call __cxa_guard_acquire unconditionally. The "inline" check isn't
1338 // actually inline, and the user might not expect calls to __atomic libcalls.
1339 unsigned maxInlineWidthInBits =
1341
1342 if (!threadsafe || maxInlineWidthInBits) {
1343 // Load the first byte of the guard variable.
1344 auto bytePtrTy = cir::PointerType::get(builder.getSIntNTy(8));
1345 mlir::Value bytePtr = builder.createBitcast(guardPtr, bytePtrTy);
1346 mlir::Value guardLoad = builder.createAlignedLoad(
1347 localInitOp.getLoc(), bytePtr, *guard.getAlignment());
1348
1349 // Itanium ABI:
1350 // An implementation supporting thread-safety on multiprocessor
1351 // systems must also guarantee that references to the initialized
1352 // object do not occur before the load of the initialization flag.
1353 //
1354 // In LLVM, we do this by marking the load Acquire.
1355 if (threadsafe) {
1356 auto loadOp = mlir::cast<cir::LoadOp>(guardLoad.getDefiningOp());
1357 loadOp.setMemOrder(cir::MemOrder::Acquire);
1358 loadOp.setSyncScope(cir::SyncScopeKind::System);
1359 }
1360
1361 // For ARM, we should only check the first bit, rather than the entire byte:
1362 //
1363 // ARM C++ ABI 3.2.3.1:
1364 // To support the potential use of initialization guard variables
1365 // as semaphores that are the target of ARM SWP and LDREX/STREX
1366 // synchronizing instructions we define a static initialization
1367 // guard variable to be a 4-byte aligned, 4-byte word with the
1368 // following inline access protocol.
1369 // #define INITIALIZED 1
1370 // if ((obj_guard & INITIALIZED) != INITIALIZED) {
1371 // if (__cxa_guard_acquire(&obj_guard))
1372 // ...
1373 // }
1374 //
1375 // and similarly for ARM64:
1376 //
1377 // ARM64 C++ ABI 3.2.2:
1378 // This ABI instead only specifies the value bit 0 of the static guard
1379 // variable; all other bits are platform defined. Bit 0 shall be 0 when
1380 // the variable is not initialized and 1 when it is.
1381 if (useARMGuardVarABI() && !useInt8GuardVariable) {
1382 auto one = builder.getConstantInt(
1383 localInitOp.getLoc(), mlir::cast<cir::IntType>(guardLoad.getType()),
1384 1);
1385 guardLoad = builder.createAnd(localInitOp.getLoc(), guardLoad, one);
1386 }
1387
1388 // Check if the first byte of the guard variable is zero.
1389 auto zero = builder.getConstantInt(
1390 localInitOp.getLoc(), mlir::cast<cir::IntType>(guardLoad.getType()), 0);
1391 auto needsInit = builder.createCompare(localInitOp.getLoc(),
1392 cir::CmpOpKind::eq, guardLoad, zero);
1393
1394 // Build the guarded initialization inside an if block.
1395 cir::IfOp::create(
1396 builder, globalOp.getLoc(), needsInit,
1397 /*withElseRegion=*/false, [&](mlir::OpBuilder &, mlir::Location) {
1398 emitCXXGuardedInitIf(builder, globalOp, localInitOp.getCtorRegion(),
1399 localInitOp.getDtorRegion(), varDecl, guardPtr,
1400 builder.getPointerTo(guard.getSymType()),
1401 threadsafe);
1402 });
1403 } else {
1404 // Threadsafe statics without inline atomics - call __cxa_guard_acquire
1405 // unconditionally without the initial guard byte check.
1406 globalOp->emitError("NYI: guarded init without inline atomics support");
1407 return;
1408 }
1409
1410 // Insert the removed terminator back.
1411 builder.getInsertionBlock()->push_back(ret);
1412}
1413
1414void LoweringPreparePass::lowerLocalInitOp(cir::LocalInitOp initOp) {
1415
1416 // If we don't actually need to initialize anything anymore, we're done here.
1417 if (initOp.getCtorRegion().empty() && initOp.getDtorRegion().empty()) {
1418 initOp.erase();
1419 return;
1420 }
1421
1422 cir::GlobalOp globalOp = initOp.getReferencedGlobal(symbolTables);
1423 assert(globalOp && "No global-op found");
1424
1425 handleStaticLocal(globalOp, initOp);
1426
1427 // Remove the init local op, now that we've done everything we need with it.
1428 initOp.erase();
1429}
1430static bool isThreadWrapperReplaceable(cir::TLS_Model tls,
1431 clang::ASTContext &astCtx) {
1432 return tls == cir::TLS_Model::GeneralDynamic &&
1433 astCtx.getTargetInfo().getTriple().isOSDarwin();
1434}
1435
1436static cir::GlobalLinkageKind
1438 if (isLocalLinkage(op.getLinkage()))
1439 return op.getLinkage();
1440
1441 if (isThreadWrapperReplaceable(*op.getTlsModel(), astCtx))
1442 if (!isLinkOnceLinkage(op.getLinkage()) &&
1443 !isWeakODRLinkage(op.getLinkage()))
1444 return op.getLinkage();
1445
1446 // If this isn't a TU in which this variable is defined, the thread wrapper is
1447 // discardable.
1448 if (op.isDeclaration())
1449 return cir::GlobalLinkageKind::LinkOnceODRLinkage;
1450 return cir::GlobalLinkageKind::WeakODRLinkage;
1451}
1452
1453cir::FuncOp
1454LoweringPreparePass::getOrCreateThreadLocalWrapper(CIRBaseBuilderTy &builder,
1455 GlobalOp op) {
1456 mlir::OpBuilder::InsertionGuard insertGuard(builder);
1457 builder.setInsertionPointToStart(&mlirModule.getBodyRegion().front());
1458
1459 mlir::StringAttr wrapperName = op.getDynTlsRefs()->getWrapperName();
1460
1461 auto existingWrapperIter = threadLocalWrappers.find(wrapperName.getValue());
1462 if (existingWrapperIter != threadLocalWrappers.end())
1463 return existingWrapperIter->second;
1464
1465 // type is ptr-to-global-type(void);
1466 auto funcType = cir::FuncType::get({}, builder.getPointerTo(op.getSymType()));
1467 cir::FuncOp func =
1468 cir::FuncOp::create(builder, op.getLoc(), wrapperName, funcType);
1469
1470 cir::GlobalLinkageKind linkageKind =
1471 getThreadLocalWrapperLinkage(op, *astCtx);
1472 func.setLinkageAttr(
1473 cir::GlobalLinkageKindAttr::get(&getContext(), linkageKind));
1474
1475 // TODO(cir): This is supposed to refer to the comdat of the global symbol,
1476 // but that isn't in CIR yet.
1477 if (astCtx->getTargetInfo().getTriple().supportsCOMDAT() &&
1478 func.isWeakForLinker())
1479 func.setComdat(true);
1480
1481 mlir::SymbolTable::setSymbolVisibility(
1482 func, mlir::SymbolTable::Visibility::Private);
1483
1484 if (!isLocalLinkage(linkageKind)) {
1485 if (!isThreadWrapperReplaceable(*op.getTlsModel(), *astCtx) ||
1486 isLinkOnceLinkage(linkageKind) || isWeakODRLinkage(linkageKind) ||
1487 op.getGlobalVisibility() == cir::VisibilityKind::Hidden)
1488 func.setGlobalVisibility(cir::VisibilityKind::Hidden);
1489 }
1490 if (isThreadWrapperReplaceable(*op.getTlsModel(), *astCtx))
1491 op->emitError("Unhandled thread wrapper attributes for CC and Nounwind");
1492
1493 threadLocalWrappers.insert({wrapperName.getValue(), func});
1494 return func;
1495}
1496
1497void LoweringPreparePass::defineGlobalThreadLocalWrapper(cir::GlobalOp op,
1498 cir::FuncOp initAlias,
1499 bool isVarDefinition) {
1500 CIRBaseBuilderTy builder(getContext());
1501 cir::FuncOp wrapper = getOrCreateThreadLocalWrapper(builder, op);
1502 mlir::Block *entryBB = wrapper.addEntryBlock();
1503 builder.setInsertionPointToStart(entryBB);
1504 // If we are a situation where we have/need one, emit a call to the init
1505 // function.
1506 if (initAlias) {
1507 mlir::Location aliasLoc = initAlias.getLoc();
1508 if (!isVarDefinition) {
1509 // If this isn't a definition, we have to check that the alias exists.
1510 mlir::Value funcLoad = cir::GetGlobalOp::create(
1511 builder, aliasLoc, cir::PointerType::get(initAlias.getFunctionType()),
1512 initAlias.getSymName());
1513 mlir::Value nullCheck =
1514 builder.getNullValue(funcLoad.getType(), aliasLoc);
1515 mlir::Value cmp = cir::CmpOp::create(
1516 builder, aliasLoc, cir::CmpOpKind::ne, funcLoad, nullCheck);
1517 cir::IfOp::create(builder, aliasLoc, cmp, /*withElseRegion=*/false,
1518 [&](mlir::OpBuilder &, mlir::Location loc) {
1519 builder.createCallOp(aliasLoc, initAlias, {});
1520 cir::YieldOp::create(builder, aliasLoc);
1521 });
1522 } else {
1523 // If this IS a definition, we know the alias exists, so we can just emit
1524 // a call to it.
1525 builder.createCallOp(aliasLoc, initAlias, {});
1526 }
1527 }
1528 cir::GetGlobalOp get = builder.createGetGlobal(op, /*tls=*/true);
1529 cir::ReturnOp::create(builder, op.getLoc(), {get});
1530}
1531
1532cir::FuncOp
1533LoweringPreparePass::defineGlobalThreadLocalInitAlias(cir::GlobalOp op,
1534 cir::FuncOp aliasee) {
1535 CIRBaseBuilderTy builder(getContext());
1536 mlir::OpBuilder::InsertionGuard insertGuard(builder);
1537 builder.setInsertionPointToStart(&mlirModule.getBodyRegion().front());
1538 mlir::StringAttr aliasName = op.getDynTlsRefs()->getInitName();
1539 auto existingAliasIter = threadLocalInitAliases.find(aliasName.getValue());
1540
1541 if (existingAliasIter != threadLocalInitAliases.end())
1542 return existingAliasIter->second;
1543
1544 auto funcType = builder.getVoidFnTy();
1545 cir::FuncOp alias =
1546 cir::FuncOp::create(builder, op.getLoc(), aliasName, funcType);
1547 alias.setLinkage(op.getLinkage());
1548
1549 if (aliasee) {
1550 alias.setAliasee(aliasee.getSymName());
1551 } else {
1552 // If we don't have anything to alias (because this isn't a variable
1553 // definition!), we set this as just a function definition with no alias,
1554 // and extern-weak.
1555 alias.setLinkage(cir::GlobalLinkageKind::ExternalWeakLinkage);
1556 mlir::SymbolTable::setSymbolVisibility(
1557 alias, mlir::SymbolTable::Visibility::Private);
1558 }
1559
1560 threadLocalInitAliases.insert({aliasName.getValue(), alias});
1561 return alias;
1562}
1563
1564void LoweringPreparePass::lowerGlobalOp(GlobalOp op) {
1565 // Static locals are handled separately via guard variables.
1566 if (op.getStaticLocalGuard())
1567 return;
1568
1569 mlir::Region &ctorRegion = op.getCtorRegion();
1570 mlir::Region &dtorRegion = op.getDtorRegion();
1571 cir::FuncOp initAlias;
1572
1573 if (!ctorRegion.empty() || !dtorRegion.empty()) {
1574 // Build a variable initialization function and move the initialzation code
1575 // in the ctor region over.
1576 cir::FuncOp f = buildCXXGlobalVarDeclInitFunc(op);
1577
1578 // Clear the ctor and dtor region
1579 ctorRegion.getBlocks().clear();
1580 dtorRegion.getBlocks().clear();
1581
1583 if (op.getTlsModel() == TLS_Model::GeneralDynamic &&
1584 !op.getStaticLocalGuard().has_value()) {
1585 // There are two types of global TLS variables: 'ordered' and 'unordered'.
1586 // 'ordered' are the common case. A call to any of them causes all of the
1587 // initializers for all other 'ordered' ones to be called, via a
1588 // `__tls_init` function. So the 'init alias' that gets called in the
1589 // wrapper for these goes directly to `__tls_init`.
1590
1591 // 'Unordered' values are the case for variable templates. In this case,
1592 // their init alias goes directly to their init function. The FE generates
1593 // a guard variable for them (since they cannot use the global guard), so
1594 // we differentiate them that way.
1595
1596 if (op.getDynTlsRefs()->getGuardName()) {
1597 // Unordered: the alias is the function we just generated.
1598 initAlias = defineGlobalThreadLocalInitAlias(op, f);
1599 } else {
1600 // Ordered: Get the __tls_init, and make the alias to that.
1601 initAlias = defineGlobalThreadLocalInitAlias(op, getTlsInitFn());
1602 // Ordered inits also need to get called from the __tls_init function,
1603 // so we add the init function to the list, so that we can add them to
1604 // it later.
1605 globalThreadLocalInitializers.push_back(f);
1606 }
1607 } else {
1608 dynamicInitializers.push_back(f);
1609 }
1610 } else if (op.getTlsModel() == TLS_Model::GeneralDynamic &&
1611 op.getDynTlsRefs() && op.isDeclaration()) {
1612 // If this is a declaration and has no init function, we probably DO have to
1613 // create an alias that needs checking, so create it as extern-weak.
1614 initAlias = defineGlobalThreadLocalInitAlias(op, {});
1615 }
1616
1617 // We need a wrapper for TLS globals that MIGHT have a non-constant
1618 // initialization. The FE will have generated the DynTlsRefs for any with
1619 // known dynamic init, or unknown (extern) init.
1620 if (op.getTlsModel() == TLS_Model::GeneralDynamic && op.getDynTlsRefs())
1621 defineGlobalThreadLocalWrapper(op, initAlias, !op.isDeclaration());
1622
1624}
1625
1626void LoweringPreparePass::lowerGetGlobalOp(GetGlobalOp op) {
1627 if (!op.getTls())
1628 return;
1629 auto globalOp = mlir::cast<cir::GlobalOp>(
1630 symbolTables.lookupNearestSymbolFrom(op, op.getNameAttr()));
1631
1632 // Only global/namespace scope thread local variables need to have their
1633 // get-global operations rewritten to be calls to a wrapper function. If
1634 // we're not in a dynamic TLS (or one without the TLS markers), we can leave
1635 // this one as a get-global and return early.
1636 if (globalOp.getTlsModel() != TLS_Model::GeneralDynamic ||
1637 !globalOp.getDynTlsRefs())
1638 return;
1639
1640 // If this is a global TLS, we need to replace the call to 'get_global' with a
1641 // call to the wrapper function. Classic codegen figures out some cases where
1642 // we can omit this, but for now we're going to always put it in, as it is
1643 // effectively a no-op.
1644
1645 // The first 'GetGlobalOp' at the beginning of a ctor/dtor region on one of
1646 // these is for the purpose of creating/destroying. We want to skip replacing
1647 // THAT one, but leave all other get-global-ops in place, else
1648 // self-referential ops won't work right.
1649
1650 // Note that ctors/dtors are removed during this pass. We get away with these
1651 // checks because the only time that these situations can actually be true
1652 // (that is, the ctor/dtor region exist) is if we're in the process of
1653 // converting the ctor/dtor for this. If we're NOT doing that, the ctor/dtor
1654 // will have already disappeared.
1655 mlir::Operation *parentOp = op->getParentOp();
1656 if (parentOp == globalOp) {
1657 mlir::Region *ctorRegion = &globalOp.getCtorRegion();
1658 mlir::Region *dtorRegion = &globalOp.getDtorRegion();
1659
1660 if (!ctorRegion->empty() && &*ctorRegion->op_begin() == op.getOperation())
1661 return;
1662 if (!dtorRegion->empty() && &*dtorRegion->op_begin() == op.getOperation())
1663 return;
1664 }
1665
1666 CIRBaseBuilderTy builder(getContext());
1667 cir::FuncOp wrapperFunc = getOrCreateThreadLocalWrapper(builder, globalOp);
1668
1669 builder.setInsertionPoint(op);
1670 cir::CallOp call = builder.createCallOp(
1671 wrapperFunc.getLoc(),
1672 mlir::FlatSymbolRefAttr::get(wrapperFunc.getSymNameAttr()),
1673 wrapperFunc.getFunctionType().getReturnType(), {});
1674 op->replaceAllUsesWith(call);
1675 op.erase();
1676}
1677
1678void LoweringPreparePass::lowerThreeWayCmpOp(CmpThreeWayOp op) {
1679 CIRBaseBuilderTy builder(getContext());
1680 builder.setInsertionPointAfter(op);
1681
1682 mlir::Location loc = op->getLoc();
1683 cir::CmpThreeWayInfoAttr cmpInfo = op.getInfo();
1684
1685 mlir::Value ltRes =
1686 builder.getConstantInt(loc, op.getType(), cmpInfo.getLt());
1687 mlir::Value eqRes =
1688 builder.getConstantInt(loc, op.getType(), cmpInfo.getEq());
1689 mlir::Value gtRes =
1690 builder.getConstantInt(loc, op.getType(), cmpInfo.getGt());
1691
1692 mlir::Value transformedResult;
1693 if (cmpInfo.getOrdering() != CmpOrdering::Partial) {
1694 // Total ordering
1695 mlir::Value lt =
1696 builder.createCompare(loc, CmpOpKind::lt, op.getLhs(), op.getRhs());
1697 mlir::Value selectOnLt = builder.createSelect(loc, lt, ltRes, gtRes);
1698 mlir::Value eq =
1699 builder.createCompare(loc, CmpOpKind::eq, op.getLhs(), op.getRhs());
1700 transformedResult = builder.createSelect(loc, eq, eqRes, selectOnLt);
1701 } else {
1702 // Partial ordering
1703 cir::ConstantOp unorderedRes = builder.getConstantInt(
1704 loc, op.getType(), cmpInfo.getUnordered().value());
1705
1706 mlir::Value eq =
1707 builder.createCompare(loc, CmpOpKind::eq, op.getLhs(), op.getRhs());
1708 mlir::Value selectOnEq = builder.createSelect(loc, eq, eqRes, unorderedRes);
1709 mlir::Value gt =
1710 builder.createCompare(loc, CmpOpKind::gt, op.getLhs(), op.getRhs());
1711 mlir::Value selectOnGt = builder.createSelect(loc, gt, gtRes, selectOnEq);
1712 mlir::Value lt =
1713 builder.createCompare(loc, CmpOpKind::lt, op.getLhs(), op.getRhs());
1714 transformedResult = builder.createSelect(loc, lt, ltRes, selectOnGt);
1715 }
1716
1717 op.replaceAllUsesWith(transformedResult);
1718 op.erase();
1719}
1720
1721template <typename AttributeTy>
1722static llvm::SmallVector<mlir::Attribute>
1723prepareCtorDtorAttrList(mlir::MLIRContext *context,
1724 llvm::ArrayRef<std::pair<std::string, uint32_t>> list) {
1726 for (const auto &[name, priority] : list)
1727 attrs.push_back(AttributeTy::get(context, name, priority));
1728 return attrs;
1729}
1730
1731void LoweringPreparePass::buildGlobalCtorDtorList() {
1732 if (!globalCtorList.empty()) {
1733 llvm::SmallVector<mlir::Attribute> globalCtors =
1735 globalCtorList);
1736
1737 mlirModule->setAttr(cir::CIRDialect::getGlobalCtorsAttrName(),
1738 mlir::ArrayAttr::get(&getContext(), globalCtors));
1739 }
1740
1741 if (!globalDtorList.empty()) {
1742 llvm::SmallVector<mlir::Attribute> globalDtors =
1744 globalDtorList);
1745 mlirModule->setAttr(cir::CIRDialect::getGlobalDtorsAttrName(),
1746 mlir::ArrayAttr::get(&getContext(), globalDtors));
1747 }
1748}
1749
1750cir::GlobalOp
1751LoweringPreparePass::createGlobalThreadLocalGuard(CIRBaseBuilderTy &builder,
1752 mlir::Location loc) {
1753 mlir::OpBuilder::InsertionGuard guard(builder);
1754 builder.setInsertionPointToStart(mlirModule.getBody());
1755
1756 // The TLS Guard is always an Int8Ty.
1757 cir::IntType guardTy = builder.getSIntNTy(8);
1758 auto g = cir::GlobalOp::create(builder, loc, "__tls_guard", guardTy);
1759 g.setLinkageAttr(cir::GlobalLinkageKindAttr::get(
1760 builder.getContext(), cir::GlobalLinkageKind::InternalLinkage));
1761 g.setAlignment(clang::CharUnits::One().getAsAlign().value());
1762 // At the moment, we only have implementation for this mode, as it is the
1763 // default. At one point we might need to load this mode from the module.
1764 g.setTlsModel(TLS_Model::GeneralDynamic);
1765 g.setInitialValueAttr(cir::IntAttr::get(guardTy, 0));
1766 return g;
1767}
1768
1769cir::IfOp LoweringPreparePass::buildGlobalTlsGuardCheck(
1770 CIRBaseBuilderTy &builder, mlir::Location loc, cir::GlobalOp guard) {
1771 cir::GetGlobalOp getGuard = builder.createGetGlobal(guard, /*tls=*/true);
1772 mlir::Value getGuardValue = getGuard;
1773
1774 // Classic codegen always just loads the first byte of the guard instead of
1775 // the whole thing. __tls_guard is already only 8 bits, but for the case of
1776 // unordered TLS, it gets created as 64 bits.
1777 if (guard.getSymType() != builder.getSIntNTy(8))
1778 getGuardValue = builder.createBitcast(
1779 getGuard, cir::PointerType::get(builder.getSIntNTy(8)));
1780
1781 mlir::Value guardLoad =
1782 builder.createAlignedLoad(loc, getGuardValue, *guard.getAlignment());
1783 auto zero = builder.getConstantInt(loc, builder.getSIntNTy(8), 0);
1784 cir::CmpOp compare =
1785 builder.createCompare(loc, cir::CmpOpKind::eq, guardLoad, zero);
1786 return cir::IfOp::create(
1787 builder, loc, compare,
1788 /*withElseRegion=*/false, [&](mlir::OpBuilder &, mlir::Location loc) {
1789 // Classic codegen still does this store as a i8, but it doesn't seem
1790 // reasonable to do an i8 store into a 64 bit value?
1791 builder.createStore(
1792 loc, builder.getConstantInt(loc, guard.getSymType(), 1), getGuard);
1793 });
1794}
1795
1796void LoweringPreparePass::buildCXXGlobalTlsFunc() {
1797 if (globalThreadLocalInitializers.empty())
1798 return;
1799
1800 // The global-ordered-init function for TLS variables just calls each of the
1801 // init-functions in order after doing a guard.
1802
1803 cir::FuncOp tlsInit = getTlsInitFn();
1804 mlir::Location loc = tlsInit.getLoc();
1805 CIRBaseBuilderTy builder(getContext());
1806 mlir::Block *entryBB = tlsInit.addEntryBlock();
1807 builder.setInsertionPointToStart(entryBB);
1808
1809 cir::IfOp ifOperation = buildGlobalTlsGuardCheck(
1810 builder, loc, createGlobalThreadLocalGuard(builder, loc));
1811
1812 // Emit the body of the guarded spot.
1813 builder.setInsertionPointToEnd(&ifOperation.getThenRegion().front());
1814 for (cir::FuncOp initFunc : globalThreadLocalInitializers)
1815 builder.createCallOp(loc, initFunc, {});
1816 cir::YieldOp::create(builder, loc);
1817
1818 builder.setInsertionPointAfter(ifOperation);
1819 cir::ReturnOp::create(builder, loc);
1820}
1821
1822void LoweringPreparePass::buildCXXGlobalInitFunc() {
1823 if (dynamicInitializers.empty())
1824 return;
1825
1826 // TODO: handle globals with a user-specified initialzation priority.
1827 // TODO: handle default priority more nicely.
1829
1830 SmallString<256> fnName;
1831 // Include the filename in the symbol name. Including "sub_" matches gcc
1832 // and makes sure these symbols appear lexicographically behind the symbols
1833 // with priority (TBD). Module implementation units behave the same
1834 // way as a non-modular TU with imports.
1835 // TODO: check CXX20ModuleInits
1836 if (astCtx->getCurrentNamedModule() &&
1838 llvm::raw_svector_ostream out(fnName);
1839 std::unique_ptr<clang::MangleContext> mangleCtx(
1840 astCtx->createMangleContext());
1841 cast<clang::ItaniumMangleContext>(*mangleCtx)
1842 .mangleModuleInitializer(astCtx->getCurrentNamedModule(), out);
1843 } else {
1844 fnName += "_GLOBAL__sub_I_";
1845 fnName += getTransformedFileName(mlirModule);
1846 }
1847
1848 CIRBaseBuilderTy builder(getContext());
1849 builder.setInsertionPointToEnd(&mlirModule.getBodyRegion().back());
1850 auto fnType = cir::FuncType::get({}, builder.getVoidTy());
1851 cir::FuncOp f =
1852 buildRuntimeFunction(builder, fnName, mlirModule.getLoc(), fnType,
1853 cir::GlobalLinkageKind::ExternalLinkage);
1854 builder.setInsertionPointToStart(f.addEntryBlock());
1855 for (cir::FuncOp &f : dynamicInitializers)
1856 builder.createCallOp(f.getLoc(), f, {});
1857 // Add the global init function (not the individual ctor functions) to the
1858 // global ctor list.
1859 globalCtorList.emplace_back(fnName,
1860 cir::GlobalCtorAttr::getDefaultPriority());
1861
1862 cir::ReturnOp::create(builder, f.getLoc());
1863}
1864
1865/// Lower a cir.array.ctor or cir.array.dtor into a do-while loop that
1866/// iterates over every element. For cir.array.ctor ops whose partial_dtor
1867/// region is non-empty, the ctor loop is wrapped in a cir.cleanup.scope whose
1868/// EH cleanup performs a reverse destruction loop using the partial dtor body.
1870 clang::ASTContext *astCtx,
1871 mlir::Operation *op, mlir::Type eltTy,
1872 mlir::Value addr,
1873 mlir::Value numElements,
1874 uint64_t arrayLen, bool isCtor) {
1875 mlir::Location loc = op->getLoc();
1876 bool isDynamic = numElements != nullptr;
1877
1878 // TODO: instead of getting the size from the AST context, create alias for
1879 // PtrDiffTy and unify with CIRGen stuff.
1880 const unsigned sizeTypeSize =
1881 astCtx->getTypeSize(astCtx->getSignedSizeType());
1882
1883 // Both constructors and destructors use end = begin + numElements.
1884 // Constructors iterate forward [begin, end). Destructors iterate backward
1885 // from end, decrementing before calling the destructor on each element.
1886 mlir::Value begin, end;
1887 if (isDynamic) {
1888 begin = addr;
1889 end = cir::PtrStrideOp::create(builder, loc, eltTy, begin, numElements);
1890 } else {
1891 mlir::Value endOffsetVal =
1892 builder.getUnsignedInt(loc, arrayLen, sizeTypeSize);
1893 begin = cir::CastOp::create(builder, loc, eltTy,
1894 cir::CastKind::array_to_ptrdecay, addr);
1895 end = cir::PtrStrideOp::create(builder, loc, eltTy, begin, endOffsetVal);
1896 }
1897
1898 mlir::Value start = isCtor ? begin : end;
1899 mlir::Value stop = isCtor ? end : begin;
1900
1901 // For dynamic destructors, guard against zero elements.
1902 // This places the destructor loop emitted below inside the if block.
1903 cir::IfOp ifOp;
1904 if (isDynamic) {
1905 mlir::Value guardCond;
1906 if (isCtor) {
1907 mlir::Value zero = builder.getUnsignedInt(loc, 0, sizeTypeSize);
1908 guardCond = cir::CmpOp::create(builder, loc, cir::CmpOpKind::ne,
1909 numElements, zero);
1910 } else {
1911 // We could check for numElements != 0 in this case too, but this matches
1912 // what classic codegen does.
1913 guardCond =
1914 cir::CmpOp::create(builder, loc, cir::CmpOpKind::ne, start, stop);
1915 }
1916 ifOp = cir::IfOp::create(builder, loc, guardCond,
1917 /*withElseRegion=*/false,
1918 [&](mlir::OpBuilder &, mlir::Location) {});
1919 builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
1920 }
1921
1922 mlir::Value tmpAddr = builder.createAlloca(
1923 loc, /*addr type*/ builder.getPointerTo(eltTy),
1924 /*var type*/ eltTy, "__array_idx", builder.getAlignmentAttr(1));
1925 builder.createStore(loc, start, tmpAddr);
1926
1927 mlir::Block *bodyBlock = &op->getRegion(0).front();
1928
1929 // Clone the region body (ctor/dtor call and any setup ops like per-element
1930 // zero-init) into the loop, remapping the block argument to the current
1931 // element pointer.
1932 auto cloneRegionBodyInto = [&](mlir::Block *srcBlock,
1933 mlir::Value replacement) {
1934 mlir::IRMapping map;
1935 map.map(srcBlock->getArgument(0), replacement);
1936 for (mlir::Operation &regionOp : *srcBlock) {
1937 if (!mlir::isa<cir::YieldOp>(&regionOp))
1938 builder.clone(regionOp, map);
1939 }
1940 };
1941
1942 mlir::Block *partialDtorBlock = nullptr;
1943 if (auto arrayCtor = mlir::dyn_cast<cir::ArrayCtor>(op)) {
1944 mlir::Region &partialDtor = arrayCtor.getPartialDtor();
1945 if (!partialDtor.empty())
1946 partialDtorBlock = &partialDtor.front();
1947 } else if (auto arrayDtor = mlir::dyn_cast<cir::ArrayDtor>(op)) {
1948 // When the element destructor may throw, reuse the body block as the
1949 // partial-dtor block so that an exception thrown by an element's dtor
1950 // continues the reverse-destruction loop in the EH cleanup region. The
1951 // body block already stores the next element pointer to `tmpAddr`
1952 // before invoking the dtor, so when an exception unwinds from the
1953 // dtor call `tmpAddr` already points at the element that threw, and
1954 // the cleanup loop picks up from `tmpAddr - 1` and walks back to
1955 // `begin`.
1956 if (arrayDtor.getDtorMayThrow())
1957 partialDtorBlock = bodyBlock;
1958 }
1959
1960 auto emitCtorDtorLoop = [&]() {
1961 builder.createDoWhile(
1962 loc,
1963 /*condBuilder=*/
1964 [&](mlir::OpBuilder &b, mlir::Location loc) {
1965 auto currentElement = cir::LoadOp::create(b, loc, eltTy, tmpAddr);
1966 auto cmp = cir::CmpOp::create(builder, loc, cir::CmpOpKind::ne,
1967 currentElement, stop);
1968 builder.createCondition(cmp);
1969 },
1970 /*bodyBuilder=*/
1971 [&](mlir::OpBuilder &b, mlir::Location loc) {
1972 auto currentElement = cir::LoadOp::create(b, loc, eltTy, tmpAddr);
1973 if (isCtor) {
1974 cloneRegionBodyInto(bodyBlock, currentElement);
1975 mlir::Value stride = builder.getUnsignedInt(loc, 1, sizeTypeSize);
1976 auto nextElement = cir::PtrStrideOp::create(builder, loc, eltTy,
1977 currentElement, stride);
1978 builder.createStore(loc, nextElement, tmpAddr);
1979 } else {
1980 mlir::Value stride = builder.getSignedInt(loc, -1, sizeTypeSize);
1981 auto prevElement = cir::PtrStrideOp::create(builder, loc, eltTy,
1982 currentElement, stride);
1983 builder.createStore(loc, prevElement, tmpAddr);
1984 cloneRegionBodyInto(bodyBlock, prevElement);
1985 }
1986
1987 cir::YieldOp::create(b, loc);
1988 });
1989 };
1990
1991 if (partialDtorBlock) {
1992 cir::CleanupScopeOp::create(
1993 builder, loc, cir::CleanupKind::EH,
1994 /*bodyBuilder=*/
1995 [&](mlir::OpBuilder &b, mlir::Location loc) {
1996 emitCtorDtorLoop();
1997 cir::YieldOp::create(b, loc);
1998 },
1999 /*cleanupBuilder=*/
2000 [&](mlir::OpBuilder &b, mlir::Location loc) {
2001 auto cur = cir::LoadOp::create(b, loc, eltTy, tmpAddr);
2002 auto cmp =
2003 cir::CmpOp::create(builder, loc, cir::CmpOpKind::ne, cur, begin);
2004 cir::IfOp::create(
2005 builder, loc, cmp, /*withElseRegion=*/false,
2006 [&](mlir::OpBuilder &b, mlir::Location loc) {
2007 builder.createDoWhile(
2008 loc,
2009 /*condBuilder=*/
2010 [&](mlir::OpBuilder &b, mlir::Location loc) {
2011 auto el = cir::LoadOp::create(b, loc, eltTy, tmpAddr);
2012 auto neq = cir::CmpOp::create(
2013 builder, loc, cir::CmpOpKind::ne, el, begin);
2014 builder.createCondition(neq);
2015 },
2016 /*bodyBuilder=*/
2017 [&](mlir::OpBuilder &b, mlir::Location loc) {
2018 auto el = cir::LoadOp::create(b, loc, eltTy, tmpAddr);
2019 mlir::Value negOne =
2020 builder.getSignedInt(loc, -1, sizeTypeSize);
2021 auto prev = cir::PtrStrideOp::create(builder, loc, eltTy,
2022 el, negOne);
2023 builder.createStore(loc, prev, tmpAddr);
2024 cloneRegionBodyInto(partialDtorBlock, prev);
2025 builder.createYield(loc);
2026 });
2027 cir::YieldOp::create(builder, loc);
2028 });
2029 cir::YieldOp::create(b, loc);
2030 });
2031 } else {
2032 emitCtorDtorLoop();
2033 }
2034
2035 if (ifOp)
2036 cir::YieldOp::create(builder, loc);
2037
2038 op->erase();
2039}
2040
2041void LoweringPreparePass::lowerArrayDtor(cir::ArrayDtor op) {
2042 CIRBaseBuilderTy builder(getContext());
2043 builder.setInsertionPointAfter(op.getOperation());
2044
2045 mlir::Type eltTy = op->getRegion(0).getArgument(0).getType();
2046
2047 if (op.getNumElements()) {
2048 lowerArrayDtorCtorIntoLoop(builder, astCtx, op, eltTy, op.getAddr(),
2049 op.getNumElements(), /*arrayLen=*/0,
2050 /*isCtor=*/false);
2051 return;
2052 }
2053
2054 auto arrayLen =
2055 mlir::cast<cir::ArrayType>(op.getAddr().getType().getPointee()).getSize();
2056 lowerArrayDtorCtorIntoLoop(builder, astCtx, op, eltTy, op.getAddr(),
2057 /*numElements=*/nullptr, arrayLen,
2058 /*isCtor=*/false);
2059}
2060
2061void LoweringPreparePass::lowerArrayCtor(cir::ArrayCtor op) {
2062 cir::CIRBaseBuilderTy builder(getContext());
2063 builder.setInsertionPointAfter(op.getOperation());
2064
2065 mlir::Type eltTy = op->getRegion(0).getArgument(0).getType();
2066
2067 if (op.getNumElements()) {
2068 lowerArrayDtorCtorIntoLoop(builder, astCtx, op, eltTy, op.getAddr(),
2069 op.getNumElements(), /*arrayLen=*/0,
2070 /*isCtor=*/true);
2071 return;
2072 }
2073
2074 auto arrayLen =
2075 mlir::cast<cir::ArrayType>(op.getAddr().getType().getPointee()).getSize();
2076 lowerArrayDtorCtorIntoLoop(builder, astCtx, op, eltTy, op.getAddr(),
2077 /*numElements=*/nullptr, arrayLen,
2078 /*isCtor=*/true);
2079}
2080
2081cir::FuncOp LoweringPreparePass::getCalledFunction(cir::CallOp callOp) {
2082 mlir::SymbolRefAttr sym = llvm::dyn_cast_if_present<mlir::SymbolRefAttr>(
2083 callOp.getCallableForCallee());
2084 if (!sym)
2085 return nullptr;
2086 return symbolTables.lookupNearestSymbolFrom<cir::FuncOp>(callOp, sym);
2087}
2088
2089void LoweringPreparePass::lowerTrivialCopyCall(cir::CallOp op) {
2090 cir::FuncOp funcOp = getCalledFunction(op);
2091 if (!funcOp)
2092 return;
2093
2094 std::optional<cir::CtorKind> ctorKind = funcOp.getCxxConstructorKind();
2095 if (ctorKind && *ctorKind == cir::CtorKind::Copy &&
2096 funcOp.isCxxTrivialMemberFunction()) {
2097 // Replace the trivial copy constructor call with a `CopyOp`
2098 CIRBaseBuilderTy builder(getContext());
2099 mlir::ValueRange operands = op.getOperands();
2100 mlir::Value dest = operands[0];
2101 mlir::Value src = operands[1];
2102 builder.setInsertionPoint(op);
2103 builder.createCopy(dest, src);
2104 op.erase();
2105 }
2106}
2107
2108cir::GlobalOp LoweringPreparePass::getOrCreateConstAggregateGlobal(
2109 CIRBaseBuilderTy &builder, mlir::Location loc, llvm::StringRef baseName,
2110 mlir::Type ty, mlir::TypedAttr constant) {
2111 // Look up (and lazily populate) the per-base-name cache.
2112 llvm::SmallVector<cir::GlobalOp, 1> &versions =
2113 constAggregateGlobals[baseName];
2114
2115 // First, check globals we've already discovered for this base name.
2116 for (cir::GlobalOp gv : versions) {
2117 if (gv.getSymType() == ty && gv.getInitialValue() == constant)
2118 return gv;
2119 }
2120
2121 // No cached match. Scan the module's symbol table starting from the next
2122 // unscanned version. In practice this should usually exit on the first
2123 // iteration, but it's possible that some other pass or a previous
2124 // invocation of this pass created globals using this same logic.
2125 llvm::SmallString<128> name(baseName);
2126 size_t baseLen = name.size();
2127 unsigned version = versions.size();
2128 while (true) {
2129 name.resize(baseLen);
2130 if (version != 0) {
2131 name.push_back('.');
2132 llvm::Twine(version).toVector(name);
2133 }
2134 auto existingGv = symbolTables.lookupSymbolIn<cir::GlobalOp>(
2135 mlirModule, mlir::StringAttr::get(&getContext(), name));
2136 if (!existingGv)
2137 break;
2138 versions.push_back(existingGv);
2139 if (existingGv.getSymType() == ty &&
2140 existingGv.getInitialValue() == constant)
2141 return existingGv;
2142 ++version;
2143 }
2144
2145 // No match found, create a new global. The loop above found an unused name.
2146 mlir::OpBuilder::InsertionGuard guard(builder);
2147 builder.setInsertionPointToStart(mlirModule.getBody());
2148 auto gv =
2149 cir::GlobalOp::create(builder, loc, name, ty,
2150 /*isConstant=*/true,
2151 cir::LangAddressSpaceAttr::get(
2152 &getContext(), cir::LangAddressSpace::Default),
2153 cir::GlobalLinkageKind::PrivateLinkage);
2154 mlir::SymbolTable::setSymbolVisibility(
2155 gv, mlir::SymbolTable::Visibility::Private);
2156 gv.setInitialValueAttr(constant);
2157
2158 // Keep the cached symbol table in sync with the new global so subsequent
2159 // lookups for other base names find it.
2160 symbolTables.getSymbolTable(mlirModule).insert(gv);
2161
2162 versions.push_back(gv);
2163 return gv;
2164}
2165
2166void LoweringPreparePass::lowerStoreOfConstAggregate(cir::StoreOp op) {
2167 // Check if the value operand is a cir.const with aggregate type.
2168 auto constOp = op.getValue().getDefiningOp<cir::ConstantOp>();
2169 if (!constOp)
2170 return;
2171
2172 mlir::Type ty = constOp.getType();
2173 if (!mlir::isa<cir::ArrayType, cir::RecordType>(ty))
2174 return;
2175
2176 // Only transform stores to local variables (backed by cir.alloca).
2177 // Stores to other addresses (e.g. base_class_addr) should not be
2178 // transformed as they may be partial initializations.
2179 auto alloca = op.getAddr().getDefiningOp<cir::AllocaOp>();
2180 if (!alloca)
2181 return;
2182
2183 mlir::TypedAttr constant = constOp.getValue();
2184
2185 // OG implements several optimization tiers for constant aggregate
2186 // initialization. For now we always create a global constant + memcpy
2187 // (shouldCreateMemCpyFromGlobal). Future work can add the intermediate
2188 // tiers.
2192
2193 // Get function name from parent cir.func.
2194 auto func = op->getParentOfType<cir::FuncOp>();
2195 if (!func)
2196 return;
2197 llvm::StringRef funcName = func.getSymName();
2198
2199 // Get variable name from the alloca.
2200 llvm::StringRef varName = alloca.getName();
2201
2202 // Build base name: __const.<func>.<var>
2203 std::string baseName = ("__const." + funcName + "." + varName).str();
2204 CIRBaseBuilderTy builder(getContext());
2205
2206 // Check for existing globals and create a new global with a unique name
2207 // if no match is found.
2208 cir::GlobalOp gv = getOrCreateConstAggregateGlobal(builder, op.getLoc(),
2209 baseName, ty, constant);
2210
2211 // Now replace the store with get_global + copy.
2212 builder.setInsertionPoint(op);
2213
2214 auto ptrTy = cir::PointerType::get(ty);
2215 mlir::Value globalPtr =
2216 cir::GetGlobalOp::create(builder, op.getLoc(), ptrTy, gv.getSymName());
2217
2218 // Replace store with copy.
2219 builder.createCopy(op.getAddr(), globalPtr);
2220
2221 // Erase the original store.
2222 op.erase();
2223
2224 // Erase the cir.const if it has no remaining users.
2225 if (constOp.use_empty())
2226 constOp.erase();
2227}
2228
2229void LoweringPreparePass::runOnOp(mlir::Operation *op) {
2230 if (auto arrayCtor = dyn_cast<cir::ArrayCtor>(op)) {
2231 lowerArrayCtor(arrayCtor);
2232 } else if (auto arrayDtor = dyn_cast<cir::ArrayDtor>(op)) {
2233 lowerArrayDtor(arrayDtor);
2234 } else if (auto cast = mlir::dyn_cast<cir::CastOp>(op)) {
2235 lowerCastOp(cast);
2236 } else if (auto complexDiv = mlir::dyn_cast<cir::ComplexDivOp>(op)) {
2237 lowerComplexDivOp(complexDiv);
2238 } else if (auto complexMul = mlir::dyn_cast<cir::ComplexMulOp>(op)) {
2239 lowerComplexMulOp(complexMul);
2240 } else if (auto glob = mlir::dyn_cast<cir::GlobalOp>(op)) {
2241 lowerGlobalOp(glob);
2242 } else if (auto getGlob = mlir::dyn_cast<cir::GetGlobalOp>(op)) {
2243 lowerGetGlobalOp(getGlob);
2244 } else if (auto unaryOp = mlir::dyn_cast<cir::UnaryOpInterface>(op)) {
2245 lowerUnaryOp(unaryOp);
2246 } else if (auto callOp = dyn_cast<cir::CallOp>(op)) {
2247 lowerTrivialCopyCall(callOp);
2248 } else if (auto storeOp = dyn_cast<cir::StoreOp>(op)) {
2249 lowerStoreOfConstAggregate(storeOp);
2250 } else if (auto fnOp = dyn_cast<cir::FuncOp>(op)) {
2251 if (auto globalCtor = fnOp.getGlobalCtorPriority())
2252 globalCtorList.emplace_back(fnOp.getName(), globalCtor.value());
2253 else if (auto globalDtor = fnOp.getGlobalDtorPriority())
2254 globalDtorList.emplace_back(fnOp.getName(), globalDtor.value());
2255
2256 if (mlir::Attribute attr =
2257 fnOp->getAttr(cir::CUDAKernelNameAttr::getMnemonic())) {
2258 auto kernelNameAttr = dyn_cast<CUDAKernelNameAttr>(attr);
2259 llvm::StringRef kernelName = kernelNameAttr.getKernelName();
2260 cudaKernelMap[kernelName] = fnOp;
2261 }
2262 } else if (auto threeWayCmp = dyn_cast<cir::CmpThreeWayOp>(op)) {
2263 lowerThreeWayCmpOp(threeWayCmp);
2264 } else if (auto initOp = dyn_cast<cir::LocalInitOp>(op)) {
2265 lowerLocalInitOp(initOp);
2266 }
2267}
2268
2269static llvm::StringRef getCUDAPrefix(clang::ASTContext *astCtx) {
2270 if (astCtx->getLangOpts().HIP)
2271 return "hip";
2272 return "cuda";
2273}
2274
2275static std::string addUnderscoredPrefix(llvm::StringRef prefix,
2276 llvm::StringRef name) {
2277 return ("__" + prefix + name).str();
2278}
2279
2280/// Creates a global constructor function for the module:
2281///
2282/// For CUDA:
2283/// \code
2284/// void __cuda_module_ctor() {
2285/// Handle = __cudaRegisterFatBinary(GpuBinaryBlob);
2286/// __cuda_register_globals(Handle);
2287/// }
2288/// \endcode
2289///
2290/// For HIP:
2291/// \code
2292/// void __hip_module_ctor() {
2293/// if (__hip_gpubin_handle == 0) {
2294/// __hip_gpubin_handle = __hipRegisterFatBinary(GpuBinaryBlob);
2295/// __hip_register_globals(__hip_gpubin_handle);
2296/// }
2297/// }
2298/// \endcode
2299void LoweringPreparePass::buildCUDAModuleCtor() {
2300 bool isHIP = astCtx->getLangOpts().HIP;
2301
2302 if (astCtx->getLangOpts().GPURelocatableDeviceCode)
2303 llvm_unreachable("GPU RDC NYI");
2304
2305 // For CUDA without -fgpu-rdc, it's safe to stop generating ctor
2306 // if there's nothing to register.
2307 if (cudaKernelMap.empty())
2308 return;
2309
2310 // There's no device-side binary, so no need to proceed for CUDA.
2311 // HIP has to create an external symbol in this case, which is NYI.
2312 mlir::Attribute cudaBinaryHandleAttr =
2313 mlirModule->getAttr(CIRDialect::getCUDABinaryHandleAttrName());
2314 if (!cudaBinaryHandleAttr) {
2315 if (isHIP)
2317 return;
2318 }
2319
2320 llvm::StringRef cudaGPUBinaryName =
2321 mlir::cast<CUDABinaryHandleAttr>(cudaBinaryHandleAttr)
2322 .getName()
2323 .getValue();
2324
2325 llvm::vfs::FileSystem &vfs =
2327 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> gpuBinaryOrErr =
2328 vfs.getBufferForFile(cudaGPUBinaryName);
2329 if (std::error_code ec = gpuBinaryOrErr.getError()) {
2330 mlirModule->emitError("cannot open GPU binary file: " + cudaGPUBinaryName +
2331 ": " + ec.message());
2332 return;
2333 }
2334 std::unique_ptr<llvm::MemoryBuffer> gpuBinary =
2335 std::move(gpuBinaryOrErr.get());
2336
2337 // Set up common types and builder.
2338 llvm::StringRef cudaPrefix = getCUDAPrefix(astCtx);
2339 mlir::Location loc = mlirModule->getLoc();
2340 CIRBaseBuilderTy builder(getContext());
2341 builder.setInsertionPointToStart(mlirModule.getBody());
2342
2343 Type voidTy = builder.getVoidTy();
2344 PointerType voidPtrTy = builder.getVoidPtrTy();
2345 PointerType voidPtrPtrTy = builder.getPointerTo(voidPtrTy);
2346 IntType intTy = builder.getSIntNTy(32);
2347 IntType charTy = cir::IntType::get(&getContext(), astCtx->getCharWidth(),
2348 /*isSigned=*/false);
2349
2350 // --- Create fatbin globals ---
2351
2352 // The section names are different for MAC OS X.
2353 llvm::StringRef fatbinConstName =
2354 astCtx->getLangOpts().HIP ? ".hip_fatbin" : ".nv_fatbin";
2355
2356 llvm::StringRef fatbinSectionName =
2357 astCtx->getLangOpts().HIP ? ".hipFatBinSegment" : ".nvFatBinSegment";
2358
2359 // Create the fatbin string constant with GPU binary contents.
2360 auto fatbinType =
2361 ArrayType::get(&getContext(), charTy, gpuBinary->getBuffer().size());
2362 std::string fatbinStrName = addUnderscoredPrefix(cudaPrefix, "_fatbin_str");
2363 GlobalOp fatbinStr = GlobalOp::create(builder, loc, fatbinStrName, fatbinType,
2364 /*isConstant=*/true, {},
2365 GlobalLinkageKind::PrivateLinkage);
2366 fatbinStr.setAlignment(8);
2367 fatbinStr.setInitialValueAttr(cir::ConstArrayAttr::get(
2368 fatbinType, StringAttr::get(gpuBinary->getBuffer(), fatbinType)));
2369 fatbinStr.setSection(fatbinConstName);
2370 fatbinStr.setPrivate();
2371
2372 // Create the fatbin wrapper struct:
2373 // struct { int magic; int version; void *fatbin; void *unused; };
2374 auto fatbinWrapperType = RecordType::get(
2375 &getContext(), {intTy, intTy, voidPtrTy, voidPtrTy},
2376 /*packed=*/false, /*padded=*/false, RecordType::RecordKind::Struct);
2377 std::string fatbinWrapperName =
2378 addUnderscoredPrefix(cudaPrefix, "_fatbin_wrapper");
2379 GlobalOp fatbinWrapper = GlobalOp::create(
2380 builder, loc, fatbinWrapperName, fatbinWrapperType,
2381 /*isConstant=*/true, {}, GlobalLinkageKind::PrivateLinkage);
2382 fatbinWrapper.setSection(fatbinSectionName);
2383
2384 constexpr unsigned cudaFatMagic = 0x466243b1;
2385 constexpr unsigned hipFatMagic = 0x48495046;
2386 unsigned fatMagic = isHIP ? hipFatMagic : cudaFatMagic;
2387
2388 auto magicInit = IntAttr::get(intTy, fatMagic);
2389 auto versionInit = IntAttr::get(intTy, 1);
2390 auto fatbinStrSymbol =
2391 mlir::FlatSymbolRefAttr::get(fatbinStr.getSymNameAttr());
2392 auto fatbinInit = GlobalViewAttr::get(voidPtrTy, fatbinStrSymbol);
2393 mlir::TypedAttr unusedInit = builder.getConstNullPtrAttr(voidPtrTy);
2394 fatbinWrapper.setInitialValueAttr(cir::ConstRecordAttr::get(
2395 fatbinWrapperType,
2396 mlir::ArrayAttr::get(&getContext(),
2397 {magicInit, versionInit, fatbinInit, unusedInit})));
2398
2399 // Create the GPU binary handle global variable.
2400 std::string gpubinHandleName =
2401 addUnderscoredPrefix(cudaPrefix, "_gpubin_handle");
2402
2403 GlobalOp gpuBinHandle = GlobalOp::create(
2404 builder, loc, gpubinHandleName, voidPtrPtrTy,
2405 /*isConstant=*/false, {}, cir::GlobalLinkageKind::InternalLinkage);
2406 gpuBinHandle.setInitialValueAttr(builder.getConstNullPtrAttr(voidPtrPtrTy));
2407 gpuBinHandle.setPrivate();
2408
2409 // Declare this function:
2410 // void **__{cuda|hip}RegisterFatBinary(void *);
2411
2412 std::string regFuncName =
2413 addUnderscoredPrefix(cudaPrefix, "RegisterFatBinary");
2414 FuncType regFuncType = FuncType::get({voidPtrTy}, voidPtrPtrTy);
2415 cir::FuncOp regFunc =
2416 buildRuntimeFunction(builder, regFuncName, loc, regFuncType);
2417
2418 std::string moduleCtorName = addUnderscoredPrefix(cudaPrefix, "_module_ctor");
2419 cir::FuncOp moduleCtor = buildRuntimeFunction(
2420 builder, moduleCtorName, loc, FuncType::get({}, voidTy),
2421 GlobalLinkageKind::InternalLinkage);
2422
2423 globalCtorList.emplace_back(moduleCtorName,
2424 cir::GlobalCtorAttr::getDefaultPriority());
2425 builder.setInsertionPointToStart(moduleCtor.addEntryBlock());
2427 if (isHIP) {
2428 // --- Create HIP CTOR ---
2429 // if (__hip_gpubin_handle == nullptr)
2430 // __hip_gpubin_handle = __hipRegisterFatBinary(&fatbinWrapper);
2431 // __hip_register_globals(__hip_gpubin_handle);
2432 // atexit(__hip_module_dtor);
2433 mlir::Block *entryBlock = builder.getInsertionBlock();
2434 mlir::Region *parent = entryBlock->getParent();
2435 mlir::Block *ifBlock = builder.createBlock(parent);
2436 mlir::Block *exitBlock = builder.createBlock(parent);
2437 {
2438 mlir::OpBuilder::InsertionGuard guard(builder);
2439 builder.setInsertionPointToEnd(entryBlock);
2440 mlir::Value handle =
2441 builder.createLoad(loc, builder.createGetGlobal(gpuBinHandle));
2442 auto handlePtrTy = mlir::cast<cir::PointerType>(handle.getType());
2443 mlir::Value nullPtr = builder.getNullPtr(handlePtrTy, loc);
2444 mlir::Value isNull =
2445 builder.createCompare(loc, cir::CmpOpKind::eq, handle, nullPtr);
2446 cir::BrCondOp::create(builder, loc, isNull, ifBlock, exitBlock);
2447 }
2448 {
2449 // Handle is null: load the fatbin and register it.
2450 mlir::OpBuilder::InsertionGuard guard(builder);
2451 builder.setInsertionPointToStart(ifBlock);
2452 mlir::Value wrapper = builder.createGetGlobal(fatbinWrapper);
2453 mlir::Value fatbinVoidPtr = builder.createBitcast(wrapper, voidPtrTy);
2454 cir::CallOp gpuBinaryHandleCall =
2455 builder.createCallOp(loc, regFunc, fatbinVoidPtr);
2456 mlir::Value gpuBinaryHandle = gpuBinaryHandleCall.getResult();
2457 // Store the value back to the global `__hip_gpubin_handle`.
2458 mlir::Value gpuBinaryHandleGlobal = builder.createGetGlobal(gpuBinHandle);
2459 builder.createStore(loc, gpuBinaryHandle, gpuBinaryHandleGlobal);
2460 cir::BrOp::create(builder, loc, exitBlock);
2461 }
2462 {
2463 // Exit block: load the (possibly newly-registered) handle, call
2464 // __hip_register_globals, and register the module dtor with atexit().
2465 mlir::OpBuilder::InsertionGuard guard(builder);
2466 builder.setInsertionPointToStart(exitBlock);
2467 mlir::Value gHandle =
2468 builder.createLoad(loc, builder.createGetGlobal(gpuBinHandle));
2469
2470 if (std::optional<FuncOp> regGlobal = buildCUDARegisterGlobals())
2471 builder.createCallOp(loc, *regGlobal, gHandle);
2472
2473 if (std::optional<FuncOp> dtor = buildHIPModuleDtor()) {
2474 cir::CIRBaseBuilderTy globalBuilder(getContext());
2475 globalBuilder.setInsertionPointToStart(mlirModule.getBody());
2476 FuncOp atexit = buildRuntimeFunction(
2477 globalBuilder, "atexit", loc,
2478 FuncType::get(PointerType::get(dtor->getFunctionType()), intTy));
2479 mlir::Value dtorFunc = GetGlobalOp::create(
2480 builder, loc, PointerType::get(dtor->getFunctionType()),
2481 mlir::FlatSymbolRefAttr::get(dtor->getSymNameAttr()));
2482 builder.createCallOp(loc, atexit, dtorFunc);
2483 }
2484 cir::ReturnOp::create(builder, loc);
2485 }
2486 return;
2487 }
2488 if (!astCtx->getLangOpts().GPURelocatableDeviceCode) {
2489
2490 // --- Create CUDA CTOR-DTOR ---
2491 // Register binary with CUDA runtime. This is substantially different in
2492 // default mode vs. separate compilation.
2493 // Corresponding code:
2494 // gpuBinaryHandle = __cudaRegisterFatBinary(&fatbinWrapper);
2495 mlir::Value wrapper = builder.createGetGlobal(fatbinWrapper);
2496 mlir::Value fatbinVoidPtr = builder.createBitcast(wrapper, voidPtrTy);
2497 cir::CallOp gpuBinaryHandleCall =
2498 builder.createCallOp(loc, regFunc, fatbinVoidPtr);
2499 mlir::Value gpuBinaryHandle = gpuBinaryHandleCall.getResult();
2500 // Store the value back to the global `__cuda_gpubin_handle`.
2501 mlir::Value gpuBinaryHandleGlobal = builder.createGetGlobal(gpuBinHandle);
2502 builder.createStore(loc, gpuBinaryHandle, gpuBinaryHandleGlobal);
2503
2504 // --- Generate __cuda_register_globals and call it ---
2505 if (std::optional<FuncOp> regGlobal = buildCUDARegisterGlobals()) {
2506 builder.createCallOp(loc, *regGlobal, gpuBinaryHandle);
2507 }
2508
2509 // From CUDA 10.1 onwards, we must call this function to end registration:
2510 // void __cudaRegisterFatBinaryEnd(void **fatbinHandle);
2511 // This is CUDA-specific, so no need to use `addUnderscoredPrefix`.
2513 astCtx->getTargetInfo().getSDKVersion(),
2515 cir::CIRBaseBuilderTy globalBuilder(getContext());
2516 globalBuilder.setInsertionPointToStart(mlirModule.getBody());
2517 FuncOp endFunc =
2518 buildRuntimeFunction(globalBuilder, "__cudaRegisterFatBinaryEnd", loc,
2519 FuncType::get({voidPtrPtrTy}, voidTy));
2520 builder.createCallOp(loc, endFunc, gpuBinaryHandle);
2521 }
2522 } else
2523 llvm_unreachable("GPU RDC NYI");
2524
2525 // Create destructor and register it with atexit() the way NVCC does it. Doing
2526 // it during regular destructor phase worked in CUDA before 9.2 but results in
2527 // double-free in 9.2.
2528 if (std::optional<FuncOp> dtor = buildCUDAModuleDtor()) {
2529
2530 // extern "C" int atexit(void (*f)(void));
2531 cir::CIRBaseBuilderTy globalBuilder(getContext());
2532 globalBuilder.setInsertionPointToStart(mlirModule.getBody());
2533 FuncOp atexit = buildRuntimeFunction(
2534 globalBuilder, "atexit", loc,
2535 FuncType::get(PointerType::get(dtor->getFunctionType()), intTy));
2536 mlir::Value dtorFunc = GetGlobalOp::create(
2537 builder, loc, PointerType::get(dtor->getFunctionType()),
2538 mlir::FlatSymbolRefAttr::get(dtor->getSymNameAttr()));
2539 builder.createCallOp(loc, atexit, dtorFunc);
2540 }
2541 cir::ReturnOp::create(builder, loc);
2542}
2543
2544std::optional<FuncOp> LoweringPreparePass::buildCUDAModuleDtor() {
2545 if (!mlirModule->getAttr(CIRDialect::getCUDABinaryHandleAttrName()))
2546 return {};
2547
2548 llvm::StringRef prefix = getCUDAPrefix(astCtx);
2549
2550 VoidType voidTy = VoidType::get(&getContext());
2551 PointerType voidPtrPtrTy = PointerType::get(PointerType::get(voidTy));
2552
2553 mlir::Location loc = mlirModule.getLoc();
2554
2555 cir::CIRBaseBuilderTy builder(getContext());
2556 builder.setInsertionPointToStart(mlirModule.getBody());
2557
2558 // define: void __cudaUnregisterFatBinary(void ** handle);
2559 std::string unregisterFuncName =
2560 addUnderscoredPrefix(prefix, "UnregisterFatBinary");
2561 FuncOp unregisterFunc = buildRuntimeFunction(
2562 builder, unregisterFuncName, loc, FuncType::get({voidPtrPtrTy}, voidTy));
2563
2564 // void __cuda_module_dtor();
2565 // Despite the name, OG doesn't treat it as a destructor, so it shouldn't be
2566 // put into globalDtorList. If it were a real dtor, then it would cause
2567 // double free above CUDA 9.2. The way to use it is to manually call
2568 // atexit() at end of module ctor.
2569 std::string dtorName = addUnderscoredPrefix(prefix, "_module_dtor");
2570 FuncOp dtor =
2571 buildRuntimeFunction(builder, dtorName, loc, FuncType::get({}, voidTy),
2572 GlobalLinkageKind::InternalLinkage);
2573
2574 builder.setInsertionPointToStart(dtor.addEntryBlock());
2575
2576 // For dtor, we only need to call:
2577 // __cudaUnregisterFatBinary(__cuda_gpubin_handle);
2578
2579 std::string gpubinName = addUnderscoredPrefix(prefix, "_gpubin_handle");
2580 GlobalOp gpubinGlobal = cast<GlobalOp>(mlirModule.lookupSymbol(gpubinName));
2581 mlir::Value gpubinAddress = builder.createGetGlobal(gpubinGlobal);
2582 mlir::Value gpubin = builder.createLoad(loc, gpubinAddress);
2583 builder.createCallOp(loc, unregisterFunc, gpubin);
2584 ReturnOp::create(builder, loc);
2585
2586 return dtor;
2587}
2588
2589/// Build the HIP module dtor:
2590///
2591/// void __hip_module_dtor() {
2592/// if (__hip_gpubin_handle != nullptr) {
2593/// __hipUnregisterFatBinary(__hip_gpubin_handle);
2594/// __hip_gpubin_handle = nullptr;
2595/// }
2596/// }
2597///
2598/// Despite the name, OG doesn't treat this as a real destructor: putting it on
2599/// the dtor list would cause a double-free. It is meant to be registered via
2600/// atexit() at the end of the module ctor.
2601std::optional<FuncOp> LoweringPreparePass::buildHIPModuleDtor() {
2602 if (!mlirModule->getAttr(CIRDialect::getCUDABinaryHandleAttrName()))
2603 return {};
2604
2605 llvm::StringRef prefix = getCUDAPrefix(astCtx);
2606
2607 VoidType voidTy = VoidType::get(&getContext());
2608 PointerType voidPtrPtrTy = PointerType::get(PointerType::get(voidTy));
2609
2610 mlir::Location loc = mlirModule.getLoc();
2611
2612 cir::CIRBaseBuilderTy builder(getContext());
2613 builder.setInsertionPointToStart(mlirModule.getBody());
2614
2615 // void __hipUnregisterFatBinary(void ** handle);
2616 std::string unregisterFuncName =
2617 addUnderscoredPrefix(prefix, "UnregisterFatBinary");
2618 FuncOp unregisterFunc = buildRuntimeFunction(
2619 builder, unregisterFuncName, loc, FuncType::get({voidPtrPtrTy}, voidTy));
2620
2621 std::string dtorName = addUnderscoredPrefix(prefix, "_module_dtor");
2622 FuncOp dtor =
2623 buildRuntimeFunction(builder, dtorName, loc, FuncType::get({}, voidTy),
2624 GlobalLinkageKind::InternalLinkage);
2625
2626 std::string gpubinName = addUnderscoredPrefix(prefix, "_gpubin_handle");
2627 GlobalOp gpuBinGlobal = cast<GlobalOp>(mlirModule.lookupSymbol(gpubinName));
2628
2629 mlir::Block *entryBlock = dtor.addEntryBlock();
2630 mlir::Block *ifBlock = builder.createBlock(&dtor.getBody());
2631 mlir::Block *exitBlock = builder.createBlock(&dtor.getBody());
2632
2633 mlir::OpBuilder::InsertionGuard guard(builder);
2634 builder.setInsertionPointToEnd(entryBlock);
2635 mlir::Value handle =
2636 builder.createLoad(loc, builder.createGetGlobal(gpuBinGlobal));
2637 auto handlePtrTy = mlir::cast<cir::PointerType>(handle.getType());
2638 mlir::Value nullPtr = builder.getNullPtr(handlePtrTy, loc);
2639 mlir::Value isNotNull =
2640 builder.createCompare(loc, cir::CmpOpKind::ne, handle, nullPtr);
2641 cir::BrCondOp::create(builder, loc, isNotNull, ifBlock, exitBlock);
2642
2643 {
2644 // Handle is non-null: unregister and clear it.
2645 mlir::OpBuilder::InsertionGuard ifGuard(builder);
2646 builder.setInsertionPointToStart(ifBlock);
2647 builder.createCallOp(loc, unregisterFunc, handle);
2648 builder.createStore(loc, nullPtr, builder.createGetGlobal(gpuBinGlobal));
2649 cir::BrOp::create(builder, loc, exitBlock);
2650 }
2651 {
2652 mlir::OpBuilder::InsertionGuard exitGuard(builder);
2653 builder.setInsertionPointToStart(exitBlock);
2654 cir::ReturnOp::create(builder, loc);
2655 }
2656
2657 return dtor;
2658}
2659
2660std::optional<FuncOp> LoweringPreparePass::buildCUDARegisterGlobals() {
2661 // There is nothing to register.
2662 if (cudaKernelMap.empty())
2663 return {};
2664
2665 cir::CIRBaseBuilderTy builder(getContext());
2666 builder.setInsertionPointToStart(mlirModule.getBody());
2667
2668 mlir::Location loc = mlirModule.getLoc();
2669 llvm::StringRef cudaPrefix = getCUDAPrefix(astCtx);
2670
2671 auto voidTy = VoidType::get(&getContext());
2672 auto voidPtrTy = PointerType::get(voidTy);
2673 auto voidPtrPtrTy = PointerType::get(voidPtrTy);
2674
2675 // Create the function:
2676 // void __cuda_register_globals(void **fatbinHandle)
2677 std::string regGlobalFuncName =
2678 addUnderscoredPrefix(cudaPrefix, "_register_globals");
2679 auto regGlobalFuncTy = FuncType::get({voidPtrPtrTy}, voidTy);
2680 FuncOp regGlobalFunc =
2681 buildRuntimeFunction(builder, regGlobalFuncName, loc, regGlobalFuncTy,
2682 /*linkage=*/GlobalLinkageKind::InternalLinkage);
2683 builder.setInsertionPointToStart(regGlobalFunc.addEntryBlock());
2684
2685 buildCUDARegisterGlobalFunctions(builder, regGlobalFunc);
2686 // TODO: Handle shadow registration
2688
2689 ReturnOp::create(builder, loc);
2690 return regGlobalFunc;
2691}
2692
2693void LoweringPreparePass::buildCUDARegisterGlobalFunctions(
2694 cir::CIRBaseBuilderTy &builder, FuncOp regGlobalFunc) {
2695 mlir::Location loc = mlirModule.getLoc();
2696 llvm::StringRef cudaPrefix = getCUDAPrefix(astCtx);
2697 cir::CIRDataLayout dataLayout(mlirModule);
2698
2699 auto voidTy = VoidType::get(&getContext());
2700 auto voidPtrTy = PointerType::get(voidTy);
2701 auto voidPtrPtrTy = PointerType::get(voidPtrTy);
2702 IntType intTy = builder.getSIntNTy(32);
2703 IntType charTy = cir::IntType::get(&getContext(), astCtx->getCharWidth(),
2704 /*isSigned=*/false);
2705
2706 // Extract the GPU binary handle argument.
2707 mlir::Value fatbinHandle = *regGlobalFunc.args_begin();
2708
2709 cir::CIRBaseBuilderTy globalBuilder(getContext());
2710 globalBuilder.setInsertionPointToStart(mlirModule.getBody());
2711
2712 // Declare CUDA internal functions:
2713 // int __cudaRegisterFunction(
2714 // void **fatbinHandle,
2715 // const char *hostFunc,
2716 // char *deviceFunc,
2717 // const char *deviceName,
2718 // int threadLimit,
2719 // uint3 *tid, uint3 *bid, dim3 *bDim, dim3 *gDim,
2720 // int *wsize
2721 // )
2722 // OG doesn't care about the types at all. They're treated as void*.
2723
2724 FuncOp cudaRegisterFunction = buildRuntimeFunction(
2725 globalBuilder, addUnderscoredPrefix(cudaPrefix, "RegisterFunction"), loc,
2726 FuncType::get({voidPtrPtrTy, voidPtrTy, voidPtrTy, voidPtrTy, intTy,
2727 voidPtrTy, voidPtrTy, voidPtrTy, voidPtrTy, voidPtrTy},
2728 intTy));
2729
2730 auto makeConstantString = [&](llvm::StringRef str) -> GlobalOp {
2731 auto strType = ArrayType::get(&getContext(), charTy, 1 + str.size());
2732 auto tmpString = cir::GlobalOp::create(
2733 globalBuilder, loc, (".str" + str).str(), strType,
2734 /*isConstant=*/true, {},
2735 /*linkage=*/cir::GlobalLinkageKind::PrivateLinkage);
2736
2737 // We must make the string zero-terminated.
2738 tmpString.setInitialValueAttr(
2739 ConstArrayAttr::get(strType, StringAttr::get(str + "\0", strType)));
2740 tmpString.setPrivate();
2741 return tmpString;
2742 };
2743
2744 cir::ConstantOp cirNullPtr = builder.getNullPtr(voidPtrTy, loc);
2745 bool isHIP = astCtx->getLangOpts().HIP;
2746 for (auto kernelName : cudaKernelMap.keys()) {
2747 FuncOp deviceStub = cudaKernelMap[kernelName];
2748 GlobalOp deviceFuncStr = makeConstantString(kernelName);
2749 mlir::Value deviceFunc = builder.createBitcast(
2750 builder.createGetGlobal(deviceFuncStr), voidPtrTy);
2751
2752 mlir::Value hostFunc;
2753 if (isHIP) {
2754 // Under HIP, the kernel-handle is a GlobalOp shadow created by CIR
2755 // codegen and named with the kernel-reference mangled name (e.g.
2756 // `@_Z2fnv` pointing at the device-stub function
2757 // `_Z17__device_stub__fnv`). The CUDAKernelNameAttr on the device-stub
2758 // uses the same name, so we can resolve the shadow by symbol lookup.
2759 auto funcHandle = cast<GlobalOp>(mlirModule.lookupSymbol(kernelName));
2760 hostFunc =
2761 builder.createBitcast(builder.createGetGlobal(funcHandle), voidPtrTy);
2762 } else {
2763 hostFunc = builder.createBitcast(
2764 GetGlobalOp::create(
2765 builder, loc, PointerType::get(deviceStub.getFunctionType()),
2766 mlir::FlatSymbolRefAttr::get(deviceStub.getSymNameAttr())),
2767 voidPtrTy);
2768 }
2769 builder.createCallOp(
2770 loc, cudaRegisterFunction,
2771 {fatbinHandle, hostFunc, deviceFunc, deviceFunc,
2772 ConstantOp::create(builder, loc, IntAttr::get(intTy, -1)), cirNullPtr,
2773 cirNullPtr, cirNullPtr, cirNullPtr, cirNullPtr});
2774 }
2775}
2776
2777void LoweringPreparePass::runOnOperation() {
2778 mlir::Operation *op = getOperation();
2779 if (isa<::mlir::ModuleOp>(op))
2780 mlirModule = cast<::mlir::ModuleOp>(op);
2781
2782 llvm::SmallVector<mlir::Operation *> opsToTransform;
2783
2784 op->walk([&](mlir::Operation *op) {
2785 if (mlir::isa<cir::ArrayCtor, cir::ArrayDtor, cir::CastOp,
2786 cir::ComplexMulOp, cir::ComplexDivOp, cir::DynamicCastOp,
2787 cir::FuncOp, cir::CallOp, cir::GetGlobalOp, cir::GlobalOp,
2788 cir::StoreOp, cir::CmpThreeWayOp, cir::IncOp, cir::DecOp,
2789 cir::MinusOp, cir::NotOp, cir::LocalInitOp>(op))
2790 opsToTransform.push_back(op);
2791 });
2792
2793 for (mlir::Operation *o : opsToTransform)
2794 runOnOp(o);
2795
2796 buildCXXGlobalInitFunc();
2797 buildCXXGlobalTlsFunc();
2798 if (astCtx->getLangOpts().CUDA && !astCtx->getLangOpts().CUDAIsDevice)
2799 buildCUDAModuleCtor();
2800
2801 buildGlobalCtorDtorList();
2802}
2803
2804std::unique_ptr<Pass> mlir::createLoweringPreparePass() {
2805 return std::make_unique<LoweringPreparePass>();
2806}
2807
2808std::unique_ptr<Pass>
2810 auto pass = std::make_unique<LoweringPreparePass>();
2811 pass->setASTContext(astCtx);
2812 return std::move(pass);
2813}
Defines the clang::ASTContext interface.
static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop, int MaxLevel, int Level=0)
static llvm::FunctionCallee getGuardReleaseFn(CodeGenModule &CGM, llvm::PointerType *GuardPtrTy)
static llvm::FunctionCallee getGuardAcquireFn(CodeGenModule &CGM, llvm::PointerType *GuardPtrTy)
static mlir::Value buildRangeReductionComplexDiv(CIRBaseBuilderTy &builder, mlir::Location loc, mlir::Value lhsReal, mlir::Value lhsImag, mlir::Value rhsReal, mlir::Value rhsImag)
static llvm::StringRef getComplexDivLibCallName(llvm::APFloat::Semantics semantics)
static llvm::SmallVector< mlir::Attribute > prepareCtorDtorAttrList(mlir::MLIRContext *context, llvm::ArrayRef< std::pair< std::string, uint32_t > > list)
static llvm::StringRef getComplexMulLibCallName(llvm::APFloat::Semantics semantics)
static cir::GlobalLinkageKind getThreadLocalWrapperLinkage(GlobalOp op, clang::ASTContext &astCtx)
static mlir::Value buildComplexBinOpLibCall(LoweringPreparePass &pass, CIRBaseBuilderTy &builder, llvm::StringRef(*libFuncNameGetter)(llvm::APFloat::Semantics), mlir::Location loc, cir::ComplexType ty, mlir::Value lhsReal, mlir::Value lhsImag, mlir::Value rhsReal, mlir::Value rhsImag)
static mlir::Value lowerComplexMul(LoweringPreparePass &pass, CIRBaseBuilderTy &builder, mlir::Location loc, cir::ComplexMulOp op, mlir::Value lhsReal, mlir::Value lhsImag, mlir::Value rhsReal, mlir::Value rhsImag)
static std::string addUnderscoredPrefix(llvm::StringRef prefix, llvm::StringRef name)
static SmallString< 128 > getTransformedFileName(mlir::ModuleOp mlirModule)
static mlir::Value lowerComplexToComplexCast(mlir::MLIRContext &ctx, cir::CastOp op, cir::CastKind scalarCastKind)
static void lowerArrayDtorCtorIntoLoop(cir::CIRBaseBuilderTy &builder, clang::ASTContext *astCtx, mlir::Operation *op, mlir::Type eltTy, mlir::Value addr, mlir::Value numElements, uint64_t arrayLen, bool isCtor)
Lower a cir.array.ctor or cir.array.dtor into a do-while loop that iterates over every element.
static bool isThreadWrapperReplaceable(cir::TLS_Model tls, clang::ASTContext &astCtx)
static mlir::Value lowerComplexToScalarCast(mlir::MLIRContext &ctx, cir::CastOp op, cir::CastKind elemToBoolKind)
static mlir::Value buildAlgebraicComplexDiv(CIRBaseBuilderTy &builder, mlir::Location loc, mlir::Value lhsReal, mlir::Value lhsImag, mlir::Value rhsReal, mlir::Value rhsImag)
static llvm::StringRef getCUDAPrefix(clang::ASTContext *astCtx)
static mlir::Type higherPrecisionElementTypeForComplexArithmetic(mlir::MLIRContext &context, clang::ASTContext &cc, CIRBaseBuilderTy &builder, mlir::Type elementType)
static mlir::Value lowerScalarToComplexCast(mlir::MLIRContext &ctx, cir::CastOp op)
static mlir::Value lowerComplexDiv(LoweringPreparePass &pass, CIRBaseBuilderTy &builder, mlir::Location loc, cir::ComplexDivOp op, mlir::Value lhsReal, mlir::Value lhsImag, mlir::Value rhsReal, mlir::Value rhsImag, mlir::MLIRContext &mlirCx, clang::ASTContext &cc)
Defines the clang::Module class, which describes a module in the source code.
static bool compare(const PathDiagnostic &X, const PathDiagnostic &Y)
Defines the SourceManager interface.
Defines various enumerations that describe declaration and type specifiers.
Defines the TargetCXXABI class, which abstracts details of the C++ ABI that we're targeting.
__device__ __2f16 b
__device__ __2f16 float c
mlir::Value createDiv(mlir::Location loc, mlir::Value lhs, mlir::Value rhs)
mlir::TypedAttr getConstNullPtrAttr(mlir::Type t)
mlir::Value createDec(mlir::Location loc, mlir::Value input, bool nsw=false)
mlir::Value createLogicalOr(mlir::Location loc, mlir::Value lhs, mlir::Value rhs)
mlir::Value createSub(mlir::Location loc, mlir::Value lhs, mlir::Value rhs, OverflowBehavior ob=OverflowBehavior::None)
cir::ConditionOp createCondition(mlir::Value condition)
Create a loop condition.
mlir::Value createInc(mlir::Location loc, mlir::Value input, bool nsw=false)
cir::CopyOp createCopy(mlir::Value dst, mlir::Value src, bool isVolatile=false, bool skipTailPadding=false)
Create a copy with inferred length.
cir::VoidType getVoidTy()
cir::ConstantOp getNullValue(mlir::Type ty, mlir::Location loc)
mlir::Value createCast(mlir::Location loc, cir::CastKind kind, mlir::Value src, mlir::Type newTy)
cir::PointerType getVoidFnPtrTy(mlir::TypeRange argTypes={})
Returns void (*)(T...) as a cir::PointerType.
mlir::Value createAdd(mlir::Location loc, mlir::Value lhs, mlir::Value rhs, OverflowBehavior ob=OverflowBehavior::None)
cir::PointerType getPointerTo(mlir::Type ty)
mlir::Value createComplexImag(mlir::Location loc, mlir::Value operand)
cir::ConstantOp getNullPtr(mlir::Type ty, mlir::Location loc)
cir::DoWhileOp createDoWhile(mlir::Location loc, llvm::function_ref< void(mlir::OpBuilder &, mlir::Location)> condBuilder, llvm::function_ref< void(mlir::OpBuilder &, mlir::Location)> bodyBuilder)
Create a do-while operation.
cir::GetGlobalOp createGetGlobal(mlir::Location loc, cir::GlobalOp global, bool threadLocal=false)
mlir::Value getSignedInt(mlir::Location loc, int64_t val, unsigned numBits)
mlir::Value createAnd(mlir::Location loc, mlir::Value lhs, mlir::Value rhs)
mlir::Value createBitcast(mlir::Value src, mlir::Type newTy)
cir::FuncType getVoidFnTy(mlir::TypeRange argTypes={})
Returns void (T...) as a cir::FuncType.
cir::CmpOp createCompare(mlir::Location loc, cir::CmpOpKind kind, mlir::Value lhs, mlir::Value rhs)
mlir::IntegerAttr getAlignmentAttr(clang::CharUnits alignment)
mlir::Value createSelect(mlir::Location loc, mlir::Value condition, mlir::Value trueValue, mlir::Value falseValue)
mlir::Value createMul(mlir::Location loc, mlir::Value lhs, mlir::Value rhs, OverflowBehavior ob=OverflowBehavior::None)
cir::LoadOp createLoad(mlir::Location loc, mlir::Value ptr, bool isVolatile=false, uint64_t alignment=0)
mlir::Value createMinus(mlir::Location loc, mlir::Value input, bool nsw=false)
cir::ConstantOp getConstantInt(mlir::Location loc, mlir::Type ty, int64_t value)
mlir::Value createComplexCreate(mlir::Location loc, mlir::Value real, mlir::Value imag)
cir::PointerType getVoidPtrTy(clang::LangAS langAS=clang::LangAS::Default)
mlir::Value createIsNaN(mlir::Location loc, mlir::Value operand)
cir::IntType getSIntNTy(int n)
mlir::Value createAlignedLoad(mlir::Location loc, mlir::Value ptr, uint64_t alignment)
cir::CallOp createCallOp(mlir::Location loc, mlir::SymbolRefAttr callee, mlir::Type returnType, mlir::ValueRange operands, llvm::ArrayRef< mlir::NamedAttribute > attrs={}, llvm::ArrayRef< mlir::NamedAttrList > argAttrs={}, llvm::ArrayRef< mlir::NamedAttribute > resAttrs={})
cir::StoreOp createStore(mlir::Location loc, mlir::Value val, mlir::Value dst, bool isVolatile=false, mlir::IntegerAttr align={}, cir::SyncScopeKindAttr scope={}, cir::MemOrderAttr order={})
cir::YieldOp createYield(mlir::Location loc, mlir::ValueRange value={})
Create a yield operation.
mlir::Value createLogicalAnd(mlir::Location loc, mlir::Value lhs, mlir::Value rhs)
mlir::Value createAlloca(mlir::Location loc, cir::PointerType addrType, mlir::Type type, llvm::StringRef name, mlir::IntegerAttr alignment, mlir::Value dynAllocSize)
cir::BoolType getBoolTy()
mlir::Value getUnsignedInt(mlir::Location loc, uint64_t val, unsigned numBits)
mlir::Value createComplexReal(mlir::Location loc, mlir::Value operand)
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition ASTContext.h:227
SourceManager & getSourceManager()
Definition ASTContext.h:866
MangleContext * createMangleContext(const TargetInfo *T=nullptr)
If T is null pointer, assume the target in ASTContext.
const LangOptions & getLangOpts() const
Definition ASTContext.h:959
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
const TargetInfo & getTargetInfo() const
Definition ASTContext.h:924
QualType getSignedSizeType() const
Return the unique signed counterpart of the integer type corresponding to size_t.
Module * getCurrentNamedModule() const
Get module under construction, nullptr if this is not a C++20 module.
uint64_t getCharWidth() const
Return the size of the character type, in bits.
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
Definition CharUnits.h:189
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition CharUnits.h:185
static CharUnits One()
One - Construct a CharUnits quantity of one.
Definition CharUnits.h:58
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition CharUnits.h:63
llvm::vfs::FileSystem & getVirtualFileSystem() const
bool isModuleImplementation() const
Is this a module implementation.
Definition Module.h:843
FileManager & getFileManager() const
Exposes information about the current target.
Definition TargetInfo.h:227
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
unsigned getMaxAtomicInlineWidth() const
Return the maximum width lock-free atomic operation which can be inlined given the supported features...
Definition TargetInfo.h:859
const llvm::fltSemantics & getDoubleFormat() const
Definition TargetInfo.h:804
const llvm::fltSemantics & getHalfFormat() const
Definition TargetInfo.h:789
const llvm::fltSemantics & getBFloat16Format() const
Definition TargetInfo.h:799
const llvm::fltSemantics & getLongDoubleFormat() const
Definition TargetInfo.h:810
const llvm::fltSemantics & getFloatFormat() const
Definition TargetInfo.h:794
const llvm::fltSemantics & getFloat128Format() const
Definition TargetInfo.h:818
const llvm::VersionTuple & getSDKVersion() const
Defines the clang::TargetInfo interface.
static bool isLocalLinkage(GlobalLinkageKind linkage)
Definition CIROpsEnums.h:51
static bool isWeakODRLinkage(GlobalLinkageKind linkage)
Definition CIROpsEnums.h:39
static bool isLinkOnceLinkage(GlobalLinkageKind linkage)
Definition CIROpsEnums.h:33
const internal::VariadicDynCastAllOfMatcher< Decl, VarDecl > varDecl
Matches variable declarations.
bool isHIP(ID Id)
isHIP - Is this a HIP input.
Definition Types.cpp:291
RangeSelector name(std::string ID)
Given a node with a "name", (like NamedDecl, DeclRefExpr, CxxCtorInitializer, and TypeLoc) selects th...
bool isTemplateInstantiation(TemplateSpecializationKind Kind)
Determine whether this template specialization kind refers to an instantiation of an entity (as oppos...
Definition Specifiers.h:213
bool CudaFeatureEnabled(llvm::VersionTuple, CudaFeature)
Definition Cuda.cpp:163
LLVM_READONLY bool isPreprocessingNumberBody(unsigned char c)
Return true if this is the body character of a C preprocessing number, which is [a-zA-Z0-9_.
Definition CharInfo.h:168
@ CUDA_USES_FATBIN_REGISTER_END
Definition Cuda.h:80
unsigned int uint32_t
std::unique_ptr< Pass > createLoweringPreparePass()
static bool hipModuleCtor()
static bool guardAbortOnException()
static bool opGlobalAnnotations()
static bool opGlobalCtorPriority()
static bool shouldSplitConstantStore()
static bool shouldUseMemSetToInitialize()
static bool opFuncExtraAttrs()
static bool shouldUseBZeroPlusStoresToInitialize()
static bool globalRegistration()
static bool fastMathFlags()
static bool astVarDeclInterface()