clang 23.0.0git
CIRABIRewriteContext.cpp
Go to the documentation of this file.
1//===- CIRABIRewriteContext.cpp - CIR ABI rewrite context ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "mlir/IR/Builders.h"
11#include "mlir/IR/Dominance.h"
14
15using namespace cir;
16using namespace mlir;
17using namespace mlir::abi;
18
19// This rewrite context supports the Direct (with or without coercion),
20// Extend, Ignore, and Indirect-return (sret) classifications. Indirect
21// arguments (byval) and Expand still emit an errorNYI here rather than
22// silently passing through, because the IR they would produce is wrong
23// (e.g. Expand should flatten an aggregate into multiple primitives, not
24// pass it through as a single value). byval and struct coercion are not
25// yet handled here; they need the signature-shaping that goes with them
26// (byval inserts an extra pointer argument, struct coercion replaces one
27// argument with several).
28
29namespace {
30
31bool needsRewrite(const FunctionClassification &fc) {
32 // Direct without coercion is a true pass-through; any other kind (or a
33 // coerced Direct) means the rewriter must touch the IR. Extend is
34 // technically attribute-only at the IR level but still counts because the
35 // attribute attachment changes observable behavior.
36 if ((fc.returnInfo.kind != ArgKind::Direct) || fc.returnInfo.coercedType)
37 return true;
38 for (const ArgClassification &ac : fc.argInfos)
39 if ((ac.kind != ArgKind::Direct) || ac.coercedType)
40 return true;
41 return false;
42}
43
44/// Build the new argument-type list for a function whose ABI classification
45/// is \p fc. Handles Direct (with or without coercion), Extend, and Ignore.
46/// Indirect (byval) arguments and Expand emit an error. The sret return
47/// pointer, when present, is prepended by rewriteFunctionDefinition rather
48/// than here.
49mlir::LogicalResult
50buildNewArgTypes(ArrayRef<mlir::Type> oldArgTypes,
51 const FunctionClassification &fc,
52 SmallVectorImpl<mlir::Type> &newArgTypes,
53 function_ref<mlir::InFlightDiagnostic()> emitError) {
54 assert(newArgTypes.empty() && "expected an empty output vector");
55 newArgTypes.reserve(oldArgTypes.size());
56 for (auto [idx, ac] : llvm::enumerate(fc.argInfos)) {
57 mlir::Type origTy = oldArgTypes[idx];
58 switch (ac.kind) {
59 case ArgKind::Direct:
60 // Direct with a coerced type means the wire signature uses the
61 // coerced type; the body still expects origTy and we'll insert a
62 // coercion at the entry block. Direct without a coerced type is a
63 // true pass-through.
64 newArgTypes.push_back(ac.coercedType ? ac.coercedType : origTy);
65 break;
66 case ArgKind::Ignore:
67 break;
68 case ArgKind::Expand:
69 emitError() << "Expand at arg " << idx
70 << " not yet implemented in CallConvLowering";
71 return mlir::failure();
72 case ArgKind::Extend:
73 // Extend keeps the original (narrow) type in the signature; the
74 // sign/zero extension is communicated to LLVM via the llvm.signext /
75 // llvm.zeroext arg attribute, attached separately below. Any
76 // coercedType the classifier set on the Extend ArgClassification is
77 // informational (typically the register-width type the value gets
78 // extended to in registers) but does not change the CIR signature.
79 newArgTypes.push_back(origTy);
80 break;
81 case ArgKind::Indirect:
82 emitError() << "Indirect at arg " << idx
83 << " not yet implemented in CallConvLowering";
84 return mlir::failure();
85 }
86 }
87 return mlir::success();
88}
89
90/// Compute the new return type for a function whose return classification
91/// is \p retInfo. Direct returns keep (or coerce to) their type, Ignore and
92/// Indirect (sret) returns become void, Extend keeps its type; Expand emits
93/// an error.
94mlir::Type
95computeNewReturnType(mlir::Type origRetTy, const ArgClassification &retInfo,
96 mlir::MLIRContext *ctx,
97 function_ref<mlir::InFlightDiagnostic()> emitError) {
98 switch (retInfo.kind) {
99 case ArgKind::Direct:
100 // Direct return with a coerced type uses the coerced type on the wire;
101 // the rewriter inserts a coercion before each cir.return.
102 return retInfo.coercedType ? retInfo.coercedType : origRetTy;
103 case ArgKind::Ignore:
104 return cir::VoidType::get(ctx);
105 case ArgKind::Expand:
106 emitError() << "Expand return is not allowed (classic codegen rejects "
107 << "it in EmitFunctionEpilog)";
108 return nullptr;
109 case ArgKind::Extend:
110 // Same convention as Extend args: keep the original return type in the
111 // signature; the sign/zero extension is communicated via the
112 // llvm.signext / llvm.zeroext res attribute attached separately below.
113 return origRetTy;
114 case ArgKind::Indirect:
115 // sret: the value is returned through a pointer argument that the ABI
116 // synthesizes (rewriteFunctionDefinition prepends it to the argument
117 // list); it is not part of the source-level signature, so the wire
118 // return type becomes void.
119 return cir::VoidType::get(ctx);
120 }
121 llvm_unreachable("all ArgKind cases handled");
122}
123
124/// Create a typed poison constant to stand in for a value the body of a
125/// function (or the result of a call) still references but whose ABI
126/// classification is Ignore. Using poison is honest -- the value is
127/// genuinely unused at the ABI boundary -- and avoids a fake alloca+load
128/// pattern that would suggest we have a value when we don't.
129mlir::Value createIgnoredValue(mlir::OpBuilder &builder, mlir::Location loc,
130 mlir::Type ty) {
131 return cir::ConstantOp::create(builder, loc, ty, cir::PoisonAttr::get(ty));
132}
133
134/// Build an updated arg_attrs ArrayAttr that drops Ignore'd args and adds
135/// llvm.signext / llvm.zeroext on Extend args. Preserves any existing arg
136/// attributes on retained arg slots.
137mlir::ArrayAttr updateArgAttrs(mlir::MLIRContext *ctx,
138 mlir::ArrayAttr existingArgAttrs,
139 const FunctionClassification &fc) {
141 newArgAttrs.reserve(fc.argInfos.size());
142 for (auto [oldIdx, ac] : llvm::enumerate(fc.argInfos)) {
143 if (ac.kind == ArgKind::Ignore)
144 continue;
145 mlir::DictionaryAttr existing = mlir::DictionaryAttr::get(ctx);
146 if (existingArgAttrs && oldIdx < existingArgAttrs.size())
147 existing = mlir::cast<mlir::DictionaryAttr>(existingArgAttrs[oldIdx]);
148 if (ac.kind == ArgKind::Extend) {
149 StringRef attrName = ac.signExtend ? "llvm.signext" : "llvm.zeroext";
150 mlir::NamedAttribute extAttr(mlir::StringAttr::get(ctx, attrName),
151 mlir::UnitAttr::get(ctx));
152 if (existing.empty()) {
153 newArgAttrs.push_back(mlir::DictionaryAttr::get(ctx, {extAttr}));
154 } else {
155 SmallVector<mlir::NamedAttribute> attrs(existing.begin(),
156 existing.end());
157 attrs.push_back(extAttr);
158 newArgAttrs.push_back(mlir::DictionaryAttr::get(ctx, attrs));
159 }
160 } else {
161 newArgAttrs.push_back(existing);
162 }
163 }
164 return mlir::ArrayAttr::get(ctx, newArgAttrs);
165}
166
167/// Build an updated res_attrs ArrayAttr (single entry, since CIR funcs have
168/// at most one result) that adds llvm.signext / llvm.zeroext on an Extend
169/// return. Preserves any existing res attributes.
170mlir::ArrayAttr updateResAttrs(mlir::MLIRContext *ctx,
171 mlir::ArrayAttr existingResAttrs,
172 const ArgClassification &retInfo) {
173 if (retInfo.kind != ArgKind::Extend)
174 return existingResAttrs;
175
177 if (existingResAttrs && !existingResAttrs.empty())
178 for (mlir::NamedAttribute na :
179 mlir::cast<mlir::DictionaryAttr>(existingResAttrs[0]))
180 attrs.push_back(na);
181 StringRef attrName = retInfo.signExtend ? "llvm.signext" : "llvm.zeroext";
182 attrs.push_back(mlir::NamedAttribute(mlir::StringAttr::get(ctx, attrName),
183 mlir::UnitAttr::get(ctx)));
184 return mlir::ArrayAttr::get(ctx, {mlir::DictionaryAttr::get(ctx, attrs)});
185}
186
187/// Coerce \p src to type \p dstTy at the current builder insertion point by
188/// going through memory: allocate a slot, store the source, then load the
189/// destination type back out. Lowers uniformly for scalar, vector, and
190/// record types.
191///
192/// The slot is sized to the larger of the two types so that neither the
193/// store nor the load ever runs past it: the coerced ABI type can be larger
194/// than the original (e.g. a 12-byte aggregate returned as `{i64, i64}`), so
195/// loading the destination out of a source-sized slot would over-read.
196/// Alignment is max(srcAlign, dstAlign) to satisfy both accesses. The slot
197/// is accessed through a source-typed view for the store and a
198/// destination-typed view for the load.
199///
200/// The temporary alloca is placed at the start of the enclosing function's
201/// entry block so that it composes correctly with the HoistAllocas pass
202/// regardless of pipeline ordering.
203///
204/// Any operations the helper creates are appended to \p createdOps so the
205/// caller can pass them to replaceAllUsesExcept and avoid clobbering the
206/// store's value operand when later rewiring the source value.
207mlir::Value emitCoercion(mlir::OpBuilder &builder, mlir::Location loc,
208 mlir::Type dstTy, mlir::Value src,
209 mlir::FunctionOpInterface funcOp,
210 const mlir::DataLayout &dl,
211 SmallPtrSetImpl<mlir::Operation *> &createdOps) {
212 mlir::Type srcTy = src.getType();
213 assert(srcTy != dstTy &&
214 "emitCoercion callers must pre-check that the types differ");
215
216 uint64_t srcAlign = dl.getTypeABIAlignment(srcTy);
217 uint64_t dstAlign = dl.getTypeABIAlignment(dstTy);
218 uint64_t allocaAlign = std::max(srcAlign, dstAlign);
219 mlir::Type slotTy =
220 dl.getTypeSize(srcTy) >= dl.getTypeSize(dstTy) ? srcTy : dstTy;
221
222 auto slotPtrTy = cir::PointerType::get(slotTy);
223 auto srcPtrTy = cir::PointerType::get(srcTy);
224 auto dstPtrTy = cir::PointerType::get(dstTy);
225
226 cir::AllocaOp alloca;
227 {
228 mlir::OpBuilder::InsertionGuard guard(builder);
229 mlir::Block &entry = funcOp->getRegion(0).front();
230 builder.setInsertionPointToStart(&entry);
231 alloca = cir::AllocaOp::create(builder, loc, slotPtrTy,
232 builder.getStringAttr("coerce"),
233 builder.getI64IntegerAttr(allocaAlign));
234 }
235 createdOps.insert(alloca);
236
237 // Store through a source-typed view of the slot.
238 mlir::Value srcSlot = alloca;
239 if (slotTy != srcTy) {
240 auto srcCast = cir::CastOp::create(builder, loc, srcPtrTy,
241 cir::CastKind::bitcast, alloca);
242 createdOps.insert(srcCast);
243 srcSlot = srcCast;
244 }
245 auto store = cir::StoreOp::create(builder, loc, src, srcSlot);
246 createdOps.insert(store);
247
248 // Load through a destination-typed view of the slot.
249 mlir::Value dstSlot = alloca;
250 if (slotTy != dstTy) {
251 auto dstCast = cir::CastOp::create(builder, loc, dstPtrTy,
252 cir::CastKind::bitcast, alloca);
253 createdOps.insert(dstCast);
254 dstSlot = dstCast;
255 }
256 auto load = cir::LoadOp::create(builder, loc, dstSlot);
257 createdOps.insert(load);
258 return load;
259}
260
261/// Convenience overload for callers that don't need the createdOps set
262/// (e.g. call-site coercion where we don't replaceAllUsesExcept).
263mlir::Value emitCoercion(mlir::OpBuilder &builder, mlir::Location loc,
264 mlir::Type dstTy, mlir::Value src,
265 mlir::FunctionOpInterface funcOp,
266 const mlir::DataLayout &dl) {
267 SmallPtrSet<mlir::Operation *, 4> ignored;
268 return emitCoercion(builder, loc, dstTy, src, funcOp, dl, ignored);
269}
270
271/// Insert coercion before each cir.return so the returned value matches the
272/// new (coerced) return type.
273void insertReturnCoercion(mlir::FunctionOpInterface funcOp,
274 mlir::Type origRetTy, mlir::Type coercedRetTy,
275 mlir::OpBuilder &builder,
276 const mlir::DataLayout &dl) {
278 funcOp.walk([&](cir::ReturnOp r) { returns.push_back(r); });
279 for (cir::ReturnOp r : returns) {
280 if (r.getInput().empty())
281 continue;
282 mlir::Value origVal = r.getInput()[0];
283 if (origVal.getType() == coercedRetTy)
284 continue;
285 builder.setInsertionPoint(r);
286 mlir::Value coerced =
287 emitCoercion(builder, r.getLoc(), coercedRetTy, origVal, funcOp, dl);
288 r->setOperand(0, coerced);
289 }
290}
291
292/// For each Direct arg with a coerced type, change the block argument's type
293/// to the coerced type and insert a coercion at function entry that maps it
294/// back to the original type for body uses.
295///
296/// The entry block arguments mirror the function's ABI signature: argument
297/// \p hasSRetArg shifts the classification index by one because a hidden
298/// sret pointer occupies block argument 0 when the function returns by
299/// reference. So fc.argInfos[i] corresponds to block argument
300/// i + hasSRetArg.
301void insertArgCoercion(mlir::FunctionOpInterface funcOp,
302 const FunctionClassification &fc,
303 mlir::OpBuilder &builder, const mlir::DataLayout &dl,
304 bool hasSRetArg) {
305 mlir::Region &body = funcOp->getRegion(0);
306 if (body.empty())
307 return;
308 mlir::Block &entry = body.front();
309
310 for (auto [idx, ac] : llvm::enumerate(fc.argInfos)) {
311 if (ac.kind != ArgKind::Direct || !ac.coercedType)
312 continue;
313 unsigned blockIdx = idx + hasSRetArg;
314 if (blockIdx >= entry.getNumArguments())
315 continue;
316
317 mlir::BlockArgument blockArg = entry.getArgument(blockIdx);
318 mlir::Type oldArgTy = blockArg.getType();
319 mlir::Type newArgTy = ac.coercedType;
320 if (oldArgTy == newArgTy)
321 continue;
322
323 blockArg.setType(newArgTy);
324
325 builder.setInsertionPointToStart(&entry);
326 SmallPtrSet<mlir::Operation *, 4> coercionOps;
327 mlir::Value adapted = emitCoercion(builder, funcOp.getLoc(), oldArgTy,
328 blockArg, funcOp, dl, coercionOps);
329
330 // Replace blockArg uses with the adapted value, except inside the helper
331 // ops we just created. This is critical: the StoreOp's value operand is
332 // blockArg, and if we naively replaceAllUses it gets swapped to adapted
333 // (now of the original type != the alloca's pointee type).
334 blockArg.replaceAllUsesExcept(adapted, coercionOps);
335 }
336}
337
338/// Rewrite each cir.return so the return value flows through the sret
339/// pointer (the prepended first block argument) and the function returns
340/// void.
341///
342/// CIRGen emits a local `__retval` alloca and emits `cir.return %loaded`
343/// where `%loaded = cir.load __retval`. The naive lowering -- store the
344/// loaded SSA value through the sret pointer -- byte-copies the record,
345/// which is wrong for non-trivially-copyable types: e.g. libstdc++'s SSO
346/// `std::string` has a `_M_p` pointer that aliases the source's internal
347/// `_M_local_buf`, so a byte-copy leaves the destination pointing at the
348/// source's (now-dying) stack storage and the destination's destructor
349/// later `free()`s a stack pointer.
350///
351/// Instead, route construction directly into the sret slot: find the
352/// `__retval` alloca, replace its uses with the sret pointer, and drop the
353/// trailing `cir.load __retval` so the rewritten return has no operand.
354/// The CIRGen-emitted constructor / store-into-`__retval` then targets the
355/// sret slot uniformly, matching classic CodeGen's "construct directly into
356/// `%agg.result`" pattern.
357///
358/// CIRGen emits one `%v = cir.load %__retval` / `cir.return %v` pair per
359/// return statement, and every such load reads the single `__retval`
360/// alloca (CIR does not merge returns into a shared epilogue block). The
361/// alloca is therefore rewired to the sret pointer once; each cir.return is
362/// then collapsed to a bare return and its now-dead load erased. This
363/// `cir.return (cir.load <alloca>)` shape is an invariant guaranteed by
364/// CIRGen, so it is asserted via `cast<>` rather than guarded with a
365/// fallback.
366void insertSRetStores(mlir::FunctionOpInterface funcOp, mlir::Type origRetTy,
367 mlir::OpBuilder &builder) {
368 mlir::Value sretPtr = funcOp.getArguments()[0];
369
371 funcOp->walk([&](cir::ReturnOp retOp) { returnOps.push_back(retOp); });
372
373 cir::AllocaOp retAlloca = nullptr;
374 for (cir::ReturnOp retOp : returnOps) {
375 // Every cir.return in an sret function must carry the loaded return
376 // value -- a bare return would mean the sret slot was never written.
377 assert(!retOp.getInput().empty() &&
378 "cir.return in sret function must have an operand");
379
380 cir::LoadOp retLoad =
381 mlir::cast<cir::LoadOp>(retOp.getInput()[0].getDefiningOp());
382
383 // Rewire the shared `__retval` alloca to the sret pointer once.
384 // replaceAllUsesWith updates every load of the alloca (including those
385 // feeding the other cir.return ops) to read from sretPtr instead, so
386 // all returns are covered by this single rewiring. Only then is the
387 // now-unused alloca safe to erase.
388 if (!retAlloca) {
389 retAlloca = mlir::cast<cir::AllocaOp>(retLoad.getAddr().getDefiningOp());
390 retAlloca.getResult().replaceAllUsesWith(sretPtr);
391 retAlloca->erase();
392 }
393
394 // The sret slot now holds the return value directly; replace the
395 // value-carrying return with a void return (no operand).
396 builder.setInsertionPoint(retOp);
397 cir::ReturnOp::create(builder, retOp.getLoc());
398 retOp->erase();
399 if (retLoad.use_empty())
400 retLoad->erase();
401 }
402}
403
404/// Build the attribute dictionary for the sret slot (slot 0 of an
405/// sret-returning function or call). Matches classic CodeGen's
406/// `sret(T) align A [noalias] writable dead_on_unwind`. noalias is only
407/// valid on the callee's parameter, not at the call site, so it is gated by
408/// \p withNoalias. Key order is irrelevant: DictionaryAttr sorts by name.
409SmallVector<mlir::NamedAttribute> buildSretSlotAttrs(mlir::OpBuilder &builder,
410 mlir::Type retTy,
411 uint64_t align,
412 bool withNoalias) {
414 // The sret type must be carried explicitly: LLVM's sret attribute requires
415 // it, and once the CIR `!cir.ptr<retTy>` lowers to an opaque LLVM `ptr` the
416 // pointee type can no longer be recovered from the pointer.
417 attrs.push_back(
418 builder.getNamedAttr("llvm.sret", mlir::TypeAttr::get(retTy)));
419 attrs.push_back(
420 builder.getNamedAttr("llvm.align", builder.getI64IntegerAttr(align)));
421 if (withNoalias)
422 attrs.push_back(
423 builder.getNamedAttr("llvm.noalias", builder.getUnitAttr()));
424 attrs.push_back(builder.getNamedAttr("llvm.writable", builder.getUnitAttr()));
425 attrs.push_back(
426 builder.getNamedAttr("llvm.dead_on_unwind", builder.getUnitAttr()));
427 return attrs;
428}
429
430/// Prepend the sret slot's attrs at position 0 of newCall's arg_attrs.
431/// Called after the call has been rewritten with the sret pointer at
432/// operand 0, so the operand count now includes the sret slot. \p argAttrs
433/// must already be shaped for the rewritten argument list (Extend slots
434/// carry signext/zeroext, Ignore slots dropped); it is shifted to slots
435/// 1..N behind the sret slot.
436void applySretSlotAttrs(cir::CallOp newCall, mlir::ArrayAttr argAttrs,
437 mlir::Type retTy, uint64_t align,
438 mlir::OpBuilder &builder) {
439 mlir::MLIRContext *ctx = newCall->getContext();
441 buildSretSlotAttrs(builder, retTy, align, /*withNoalias=*/false);
442
444 newArgAttrs.reserve(newCall.getArgOperands().size());
445 newArgAttrs.push_back(mlir::DictionaryAttr::get(ctx, sretAttrs));
446 if (argAttrs)
447 llvm::append_range(newArgAttrs, argAttrs);
448 assert(newArgAttrs.size() <= newCall.getArgOperands().size() &&
449 "arg_attrs wider than the rewritten call's operand list");
450 newArgAttrs.resize(newCall.getArgOperands().size(),
451 mlir::DictionaryAttr::get(ctx));
452 newCall->setAttr("arg_attrs", mlir::ArrayAttr::get(ctx, newArgAttrs));
453}
454
455/// Rewrite an indirect-return (sret) call site: prepend a return-slot
456/// pointer as operand 0, make the call return void, and either reuse a
457/// dominating single-use store destination as the slot (so construction
458/// flows directly into it) or allocate a fresh slot and load the result
459/// back out. \p newArgs is the already-shaped (Ignore-dropped,
460/// coercion-applied) non-sret argument list. The caller guarantees the
461/// call has a result and an indirect-return classification.
462void rewriteIndirectReturnCall(cir::CallOp call,
463 const FunctionClassification &fc,
464 ArrayRef<mlir::Value> newArgs,
465 mlir::Type origRetTy, mlir::OpBuilder &builder) {
466 mlir::MLIRContext *ctx = call->getContext();
467 auto ptrTy = cir::PointerType::get(origRetTy);
468 builder.setInsertionPoint(call);
469 uint64_t sretAlign = fc.returnInfo.indirectAlign.value();
470
471 // CIRGen emits `cir.store %callResult, %dest` when the call's result is
472 // bound to a local (e.g. `T s = make();`). Allocating a fresh sret slot
473 // and copying into %dest would byte-copy the record, which is wrong for
474 // non-trivially-copyable types (the libstdc++ SSO `_M_p` pointer
475 // survives a byte-copy but ends up pointing at the dying temp's local
476 // buffer, so the destination's destructor later `free()`s a stack
477 // pointer). When the result has a single store-into-%dest use, use
478 // %dest as the sret slot directly so construction flows into it,
479 // matching classic CodeGen's "pass %s as sret" pattern. %dest must
480 // dominate the call so the rewritten call (which takes it as operand 0)
481 // does not use a value before its definition.
482 mlir::Value sretSlot = nullptr;
483 cir::StoreOp reuseStore = nullptr;
484 if (call.getResult().hasOneUse()) {
485 mlir::Operation *user = *call.getResult().getUsers().begin();
486 if (auto store = mlir::dyn_cast<cir::StoreOp>(user))
487 if (store.getValue() == call.getResult() &&
488 store.getAddr().getType() == ptrTy &&
489 mlir::DominanceInfo().properlyDominates(store.getAddr(), call)) {
490 sretSlot = store.getAddr();
491 reuseStore = store;
492 }
493 }
494 if (!sretSlot) {
495 auto alloca = cir::AllocaOp::create(
496 builder, call.getLoc(), ptrTy,
497 /*name=*/builder.getStringAttr("sret"),
498 /*alignment=*/builder.getI64IntegerAttr(sretAlign));
499 sretSlot = alloca;
500 }
501
503 sretArgs.push_back(sretSlot);
504 sretArgs.append(newArgs.begin(), newArgs.end());
505
506 mlir::Type sretVoidTy = cir::VoidType::get(ctx);
507 auto newCall = cir::CallOp::create(
508 builder, call.getLoc(), call.getCalleeAttr(), sretVoidTy, sretArgs);
509 for (mlir::NamedAttribute attr : call->getAttrs())
510 if (!newCall->hasAttr(attr.getName()))
511 newCall->setAttr(attr.getName(), attr.getValue());
512
513 // Shape the per-argument attrs exactly as the non-sret path does
514 // (signext / zeroext for Extend, drop Ignore slots) before prepending
515 // the sret slot, so sret composes correctly with Extend / Ignore args.
516 mlir::ArrayAttr argAttrs = call->getAttrOfType<mlir::ArrayAttr>("arg_attrs");
517 bool needsArgAttrUpdate =
518 llvm::any_of(fc.argInfos, [](const ArgClassification &ac) {
519 return ac.kind == ArgKind::Ignore || ac.kind == ArgKind::Extend;
520 });
521 if (needsArgAttrUpdate)
522 argAttrs = updateArgAttrs(ctx, argAttrs, fc);
523 applySretSlotAttrs(newCall, argAttrs, origRetTy, sretAlign, builder);
524
525 if (reuseStore) {
526 // The callee now constructs directly into the destination slot, so the
527 // original store-from-result is redundant; dropping it avoids a
528 // byte-copy of the record.
529 reuseStore->erase();
530 } else {
531 builder.setInsertionPointAfter(newCall);
532 auto load = cir::LoadOp::create(builder, call.getLoc(), origRetTy, sretSlot,
533 /*isDeref=*/mlir::UnitAttr(),
534 /*isVolatile=*/mlir::UnitAttr(),
535 /*alignment=*/mlir::IntegerAttr(),
536 /*sync_scope=*/cir::SyncScopeKindAttr(),
537 /*mem_order=*/cir::MemOrderAttr());
538 call.getResult().replaceAllUsesWith(load);
539 }
540 call->erase();
541}
542
543} // namespace
544
546 mlir::FunctionOpInterface funcOpInterface, const FunctionClassification &fc,
547 mlir::OpBuilder &builder) {
548 // The pass driver (CallConvLoweringPass) only ever hands us cir.func ops.
549 // Cast once at the top so the rest of the function reads in CIR's own
550 // vocabulary, and so we can dispatch to the CIRGlobalValueInterface for
551 // isDefinition() (FunctionOpInterface alone does not inherit from
552 // CIRGlobalValueInterface).
553 cir::FuncOp funcOp = mlir::cast<cir::FuncOp>(funcOpInterface);
554
555 if (!needsRewrite(fc))
556 return mlir::success();
557
558 ArrayRef<mlir::Type> oldArgTypes = funcOp.getArgumentTypes();
559 ArrayRef<mlir::Type> oldResultTypes = funcOp.getResultTypes();
560 mlir::MLIRContext *ctx = funcOp->getContext();
561
562 // CIR follows LLVM IR's single-result rule: a function returns either
563 // zero or one value. Document the invariant so a future multi-result
564 // change forces us to revisit the return-handling below.
565 assert(oldResultTypes.size() <= 1 &&
566 "CIR functions return zero or one value");
567
568 SmallVector<mlir::Type> newArgTypes;
569 if (mlir::failed(buildNewArgTypes(oldArgTypes, fc, newArgTypes,
570 [&]() { return funcOp.emitOpError(); })))
571 return mlir::failure();
572
573 mlir::Type voidTy = cir::VoidType::get(ctx);
574 mlir::Type origRetTy = oldResultTypes.empty() ? voidTy : oldResultTypes[0];
575 mlir::Type newRetTy = computeNewReturnType(
576 origRetTy, fc.returnInfo, ctx, [&]() { return funcOp.emitOpError(); });
577 if (!newRetTy)
578 return mlir::failure();
579 SmallVector<mlir::Type> newResultTypes = {newRetTy};
580
581 // sret return: the value is returned through a pointer the ABI inserts as
582 // argument 0. This pointer is not part of the function's source-level
583 // signature -- it is synthesized here -- and the wire return type was
584 // already set to void by computeNewReturnType. Every classification index
585 // therefore maps to a block argument shifted by one in the body handling
586 // below.
587 bool hasSRet =
588 fc.returnInfo.kind == ArgKind::Indirect && !oldResultTypes.empty();
589 if (hasSRet)
590 newArgTypes.insert(newArgTypes.begin(), cir::PointerType::get(origRetTy));
591
592 if (funcOp.isDefinition()) {
593 mlir::Region &body = funcOp->getRegion(0);
594 if (!body.empty()) {
595 // Prepend the sret pointer block argument and route every cir.return
596 // through it before any index-based argument handling below (which
597 // then accounts for the +1 offset).
598 if (hasSRet) {
599 body.front().insertArgument(0u, cir::PointerType::get(origRetTy),
600 funcOp.getLoc());
601 insertSRetStores(funcOp, origRetTy, builder);
602 }
603
604 // In-body coercion for Direct-with-coerce / Extend args: change
605 // block-arg types to the coerced types and insert a memory roundtrip
606 // at the top of the entry block that converts each coerced value back
607 // to its original type, then route existing body uses (including
608 // in-body cir.call operands) through the recovered value. Done before
609 // the Ignore-drop below so the entry block argument indices used here
610 // still refer to the original positions.
611 insertArgCoercion(funcOp, fc, builder, dl, hasSRet);
612
613 // Direct return with coerced type: insert a coercion at every
614 // cir.return so the returned value matches the (coerced) return
615 // type in the new function signature set below.
616 if (fc.returnInfo.kind == ArgKind::Direct && fc.returnInfo.coercedType &&
617 !oldResultTypes.empty() && fc.returnInfo.coercedType != origRetTy)
618 insertReturnCoercion(funcOp, origRetTy, fc.returnInfo.coercedType,
619 builder, dl);
620
621 mlir::Block &entry = body.front();
622
623 // For each Ignored argument: drop the block argument and, if the
624 // body still references it, replace those uses with a poison
625 // constant. Ignore classifications mean the value is empty / not
626 // passed at the ABI level, so any remaining uses are vacuous;
627 // poison says exactly that. Iterate in reverse so that earlier
628 // indices stay stable as later ones are erased.
629 for (int argInfoIdx = static_cast<int>(fc.argInfos.size()) - 1;
630 argInfoIdx >= 0; --argInfoIdx) {
631 if (fc.argInfos[argInfoIdx].kind != ArgKind::Ignore)
632 continue;
633 unsigned blockIdx = static_cast<unsigned>(argInfoIdx) + hasSRet;
634 if (blockIdx >= entry.getNumArguments())
635 continue;
636 mlir::BlockArgument arg = entry.getArgument(blockIdx);
637 if (!arg.use_empty()) {
638 builder.setInsertionPointToStart(&entry);
639 mlir::Value poison =
640 createIgnoredValue(builder, funcOp.getLoc(), arg.getType());
641 arg.replaceAllUsesWith(poison);
642 }
643 entry.eraseArgument(blockIdx);
644 }
645 }
646
647 // When the return is classified Ignore but the original function had
648 // a non-void return type, every cir.return becomes a naked return.
649 // This relies on the invariant that computeNewReturnType has set
650 // newRetTy = void for Ignore above, and that the function type is
651 // updated below to match. Asserting this keeps the dependency
652 // explicit.
653 if (fc.returnInfo.kind == ArgKind::Ignore && !oldResultTypes.empty()) {
654 assert(mlir::isa<cir::VoidType>(newRetTy) &&
655 "Ignore-return path requires the new return type to be void");
657 funcOp.walk([&](cir::ReturnOp r) { returns.push_back(r); });
658 for (cir::ReturnOp r : returns) {
659 if (r.getNumOperands() == 0)
660 continue;
661 builder.setInsertionPoint(r);
662 cir::ReturnOp::create(builder, r.getLoc());
663 r.erase();
664 }
665 }
666 }
667
668 mlir::Type newFnTy = funcOp.cloneTypeWith(newArgTypes, newResultTypes);
669 funcOp.setFunctionTypeAttr(mlir::TypeAttr::get(newFnTy));
670
671 // Rebuild arg_attrs when the function has an sret slot (slot 0 needs the
672 // sret attribute set) or any arg is Ignore (dropped from the output array)
673 // or Extend (needs llvm.signext / llvm.zeroext layered on).
674 bool needsArgAttrUpdate =
675 hasSRet || llvm::any_of(fc.argInfos, [](const ArgClassification &ac) {
676 return ac.kind == ArgKind::Ignore || ac.kind == ArgKind::Extend;
677 });
678 if (needsArgAttrUpdate) {
679 auto existing = funcOp->getAttrOfType<mlir::ArrayAttr>("arg_attrs");
680 mlir::ArrayAttr updated = updateArgAttrs(ctx, existing, fc);
681 if (hasSRet) {
682 // Prepend the sret slot's attribute dict (slot 0); the per-argument
683 // dicts shift to slots 1..N. noalias is valid only on the callee's
684 // parameter, so it is added only for definitions.
685 SmallVector<mlir::NamedAttribute> sretAttrs = buildSretSlotAttrs(
686 builder, origRetTy, fc.returnInfo.indirectAlign.value(),
687 /*withNoalias=*/funcOp.isDefinition());
689 withSret.push_back(mlir::DictionaryAttr::get(ctx, sretAttrs));
690 llvm::append_range(withSret, updated);
691 funcOp->setAttr("arg_attrs", mlir::ArrayAttr::get(ctx, withSret));
692 } else {
693 funcOp->setAttr("arg_attrs", updated);
694 }
695 }
696
697 // Rebuild res_attrs: layer llvm.signext / llvm.zeroext onto an Extend
698 // return.
699 if (fc.returnInfo.kind == ArgKind::Extend) {
700 auto existing = funcOp->getAttrOfType<mlir::ArrayAttr>("res_attrs");
701 funcOp->setAttr("res_attrs", updateResAttrs(ctx, existing, fc.returnInfo));
702 }
703
704 return mlir::success();
705}
706
707mlir::LogicalResult
709 const FunctionClassification &fc,
710 mlir::OpBuilder &builder) {
711 if (!needsRewrite(fc))
712 return mlir::success();
713
714 if (mlir::isa<cir::TryCallOp>(callOp))
715 return callOp->emitOpError()
716 << "TryCallOp not yet implemented in CallConvLowering";
717
718 auto call = mlir::cast<cir::CallOp>(callOp);
719 if (call.isIndirect())
720 return call.emitOpError()
721 << "indirect call not yet implemented in CallConvLowering";
722
723 mlir::MLIRContext *ctx = callOp->getContext();
724 auto enclosingFunc = call->getParentOfType<mlir::FunctionOpInterface>();
725
726 for (auto [idx, ac] : llvm::enumerate(fc.argInfos)) {
727 switch (ac.kind) {
728 case ArgKind::Direct:
729 case ArgKind::Ignore:
730 break;
731 case ArgKind::Expand:
732 return call.emitOpError() << "Expand at call-site arg " << idx
733 << " not yet implemented in CallConvLowering";
734 case ArgKind::Extend:
735 // Direct (with or without coercion), Ignore, Expand, and Extend are
736 // all handled below. Extend is attribute-only at the IR level.
737 break;
738 case ArgKind::Indirect:
739 return call.emitOpError() << "Indirect at call-site arg " << idx
740 << " not yet implemented in CallConvLowering";
741 }
742 }
743
744 builder.setInsertionPoint(call);
745
747 mlir::ValueRange argOperands = call.getArgOperands();
748 newArgs.reserve(argOperands.size());
749 if (argOperands.size() > fc.argInfos.size())
750 return call.emitOpError()
751 << "variadic arguments not yet implemented in CallConvLowering";
752 assert(fc.argInfos.size() == argOperands.size() &&
753 "call operand count must match classified arg count");
754 for (auto [idx, ac] : llvm::enumerate(fc.argInfos)) {
755 if (ac.kind == ArgKind::Ignore)
756 continue;
757 mlir::Value arg = argOperands[idx];
758 if (ac.kind == ArgKind::Direct && ac.coercedType &&
759 arg.getType() != ac.coercedType)
760 arg = emitCoercion(builder, call.getLoc(), ac.coercedType, arg,
761 enclosingFunc, dl);
762 newArgs.push_back(arg);
763 }
764
765 bool hasResult = call.getNumResults() > 0;
766 mlir::Type origRetTy =
767 hasResult ? call.getResult().getType() : cir::VoidType::get(ctx);
768
769 // An indirect (sret) return has a different call shape than the coerce /
770 // extend / ignore return handling further down (the value is returned
771 // through a prepended pointer slot, not as a result), so dispatch to a
772 // dedicated helper for it; everything below handles the by-value returns.
773 if (fc.returnInfo.kind == ArgKind::Indirect && hasResult) {
774 rewriteIndirectReturnCall(call, fc, newArgs, origRetTy, builder);
775 return mlir::success();
776 }
777
778 mlir::Type callRetTy = origRetTy;
779 if (fc.returnInfo.kind == ArgKind::Ignore && hasResult)
780 callRetTy = cir::VoidType::get(ctx);
781 bool returnNeedsCoercion =
782 hasResult && fc.returnInfo.kind == ArgKind::Direct &&
783 fc.returnInfo.coercedType && fc.returnInfo.coercedType != origRetTy;
784 if (returnNeedsCoercion)
785 callRetTy = fc.returnInfo.coercedType;
786
787 builder.setInsertionPoint(call);
788 auto newCall = cir::CallOp::create(builder, call.getLoc(),
789 call.getCalleeAttr(), callRetTy, newArgs);
790 for (mlir::NamedAttribute attr : call->getAttrs())
791 if (!newCall->hasAttr(attr.getName()))
792 newCall->setAttr(attr.getName(), attr.getValue());
793
794 // Direct return with coercion: the new call returns the coerced type;
795 // emit a coercion back to the original type for the call's existing uses.
796 if (returnNeedsCoercion) {
797 builder.setInsertionPointAfter(newCall);
798 mlir::Value coercedBack =
799 emitCoercion(builder, call.getLoc(), origRetTy, newCall.getResult(),
800 enclosingFunc, dl);
801 call.getResult().replaceAllUsesWith(coercedBack);
802 }
803
804 // Layer llvm.signext / llvm.zeroext onto the new call's arg_attrs and
805 // res_attrs for Extend args/return. Ignore args also require a rebuild
806 // because their slots are dropped from the output array.
807 bool needsArgAttrUpdate =
808 llvm::any_of(fc.argInfos, [](const ArgClassification &ac) {
809 return ac.kind == ArgKind::Ignore || ac.kind == ArgKind::Extend;
810 });
811 if (needsArgAttrUpdate) {
812 auto existing = call->getAttrOfType<mlir::ArrayAttr>("arg_attrs");
813 newCall->setAttr("arg_attrs", updateArgAttrs(ctx, existing, fc));
814 }
815 if (fc.returnInfo.kind == ArgKind::Extend) {
816 auto existing = call->getAttrOfType<mlir::ArrayAttr>("res_attrs");
817 newCall->setAttr("res_attrs", updateResAttrs(ctx, existing, fc.returnInfo));
818 }
819
820 if (hasResult && fc.returnInfo.kind == ArgKind::Ignore) {
821 // The new call returns void, but the original call's result may still
822 // have uses. Substitute a poison constant of the original type so
823 // those uses remain well-formed without pretending we have a real
824 // value at the ABI boundary.
825 if (!call.getResult().use_empty()) {
826 builder.setInsertionPointAfter(newCall);
827 mlir::Value poison =
828 createIgnoredValue(builder, call.getLoc(), origRetTy);
829 call.getResult().replaceAllUsesWith(poison);
830 }
831 } else if (hasResult && !returnNeedsCoercion) {
832 // returnNeedsCoercion already wired up the coerced result above.
833 call.getResult().replaceAllUsesWith(newCall.getResult());
834 }
835
836 call->erase();
837 return mlir::success();
838}
__CUDA_BUILTIN_VAR __cuda_builtin_blockIdx_t blockIdx
mlir::LogicalResult rewriteFunctionDefinition(mlir::FunctionOpInterface funcOp, const mlir::abi::FunctionClassification &fc, mlir::OpBuilder &builder) override
mlir::LogicalResult rewriteCallSite(mlir::Operation *callOp, const mlir::abi::FunctionClassification &fc, mlir::OpBuilder &builder) override
const internal::VariadicAllOfMatcher< Attr > attr