clang 23.0.0git
CIRABIRewriteContext.cpp
Go to the documentation of this file.
1//===- CIRABIRewriteContext.cpp - CIR ABI rewrite context ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "mlir/IR/Builders.h"
11#include "mlir/IR/Dominance.h"
14
15using namespace cir;
16using namespace mlir;
17using namespace mlir::abi;
18
19// This rewrite context supports the Direct (with or without coercion),
20// Extend, Ignore, Indirect-return (sret), Indirect-argument (byval and
21// byref), and Expand (struct flattening) classifications.
22//
23// For byval (ArgClassification::byVal == true) the callee gets
24// llvm.byval + llvm.noalias + llvm.noundef; for byref (byVal == false)
25// the callee gets llvm.byref without the ownership attrs. Both pass
26// through an alloca+store at the call site.
27//
28// For Expand, the single struct argument is replaced by N scalar arguments
29// (one per field). At the callee, the N field block arguments are stored
30// directly into the parameter's own alloca (the CIRGen spill slot). At the
31// call site, the struct operand is decomposed into its fields by reading
32// each member from the source alloca (get_member + load) when the operand is
33// a load of an alloca, or via cir.extract_member otherwise.
34//
35// For Direct + canFlatten (where the coerced type is a multi-field struct),
36// the coerced struct is similarly flattened into N individual wire arguments.
37// The callee reassembles the N scalar block args into the coerced struct,
38// then coerces to the original argument type if the two types differ. The
39// call site coerces the original type to the coerced struct, then extracts
40// each field as a separate call argument.
41
42namespace {
43
44bool needsRewrite(const FunctionClassification &fc) {
45 // Direct without coercion is a true pass-through; any other kind (or a
46 // coerced Direct) means the rewriter must touch the IR. Extend is
47 // technically attribute-only at the IR level but still counts because the
48 // attribute attachment changes observable behavior.
49 if ((fc.returnInfo.kind != ArgKind::Direct) || fc.returnInfo.coercedType)
50 return true;
51 for (const ArgClassification &ac : fc.argInfos)
52 if ((ac.kind != ArgKind::Direct) || ac.coercedType)
53 return true;
54 return false;
55}
56
57/// Return the coerced RecordType for a Direct classification that should be
58/// flattened into individual scalar arguments, or a null type if the
59/// classification does not call for flattening.
60///
61/// Flattening applies when all four conditions hold:
62/// 1. The classification is Direct with a non-null coercedType.
63/// 2. canFlatten is set.
64/// 3. The coercedType is a struct (not a union).
65/// 4. The struct has more than one field (single-field structs are already
66/// scalar; flattening them produces no benefit and classic CodeGen skips
67/// them for the same reason).
68cir::RecordType getFlattenedCoercedType(const ArgClassification &ac) {
69 if (ac.kind != ArgKind::Direct || !ac.coercedType || !ac.canFlatten)
70 return {};
71 auto recTy = dyn_cast<cir::RecordType>(ac.coercedType);
72 if (!recTy || !recTy.isStruct() || recTy.getNumElements() <= 1)
73 return {};
74 return recTy;
75}
76
77/// Build the new argument-type list for a function whose ABI classification
78/// is \p fc. Handles Direct (with or without coercion), Extend, Ignore,
79/// Indirect (byval and byref), and Expand (struct flattening) arguments.
80/// The sret return pointer, when present, is prepended by
81/// rewriteFunctionDefinition rather than here.
82mlir::LogicalResult
83buildNewArgTypes(ArrayRef<mlir::Type> oldArgTypes,
84 const FunctionClassification &fc,
85 SmallVectorImpl<mlir::Type> &newArgTypes,
86 function_ref<mlir::InFlightDiagnostic()> emitError) {
87 assert(newArgTypes.empty() && "expected an empty output vector");
88 newArgTypes.reserve(oldArgTypes.size());
89 for (auto [idx, ac] : llvm::enumerate(fc.argInfos)) {
90 mlir::Type origTy = oldArgTypes[idx];
91 switch (ac.kind) {
92 case ArgKind::Direct:
93 // Direct with canFlatten and a struct coerced type: push one wire type
94 // per field of the coerced struct rather than the struct itself.
95 // Single-field coerced structs fall through to the non-flatten path —
96 // the struct is already scalar-sized and flattening adds no value.
97 if (cir::RecordType flatTy = getFlattenedCoercedType(ac)) {
98 llvm::append_range(newArgTypes, flatTy.getMembers());
99 } else {
100 // Direct with a coerced type: the wire signature uses the coerced
101 // type; the body still expects origTy and insertArgCoercion recovers
102 // it via a memory round-trip. Direct without coercion is a
103 // pass-through.
104 newArgTypes.push_back(ac.coercedType ? ac.coercedType : origTy);
105 }
106 break;
107 case ArgKind::Ignore:
108 break;
109 case ArgKind::Expand: {
110 // Flatten the struct into one wire argument per field. The
111 // reassembly in the callee body and the decomposition at the call
112 // site are handled by insertArgCoercion and rewriteCallSite.
113 auto recTy = cast<cir::RecordType>(origTy);
114 assert(recTy.isStruct() &&
115 "Expand classification requires a struct type, not a union");
116 assert(!recTy.getMembers().empty() &&
117 "Expand classification requires at least one struct field");
118 llvm::append_range(newArgTypes, recTy.getMembers());
119 break;
120 }
121 case ArgKind::Extend:
122 // Extend keeps the original (narrow) type in the signature; the
123 // sign/zero extension is communicated to LLVM via the llvm.signext /
124 // llvm.zeroext arg attribute, attached separately below. Any
125 // coercedType the classifier set on the Extend ArgClassification is
126 // informational (typically the register-width type the value gets
127 // extended to in registers) but does not change the CIR signature.
128 newArgTypes.push_back(origTy);
129 break;
130 case ArgKind::Indirect:
131 // byval and byref both use a pointer wire type. The attribute
132 // distinction (llvm.byval vs llvm.byref) is applied in updateArgAttrs;
133 // the call-site rewrite guards against byref separately because passing
134 // a byref pointer from a CIR value requires the original alloca address,
135 // which the rewriter does not yet track.
136 newArgTypes.push_back(cir::PointerType::get(origTy));
137 break;
138 }
139 }
140 return mlir::success();
141}
142
143/// Compute the new return type for a function whose return classification
144/// is \p retInfo. Direct returns keep (or coerce to) their type, Ignore and
145/// Indirect (sret) returns become void, Extend keeps its type; Expand emits
146/// an error.
147mlir::Type
148computeNewReturnType(mlir::Type origRetTy, const ArgClassification &retInfo,
149 mlir::MLIRContext *ctx,
150 function_ref<mlir::InFlightDiagnostic()> emitError) {
151 switch (retInfo.kind) {
152 case ArgKind::Direct:
153 // Direct return with a coerced type uses the coerced type on the wire;
154 // the rewriter inserts a coercion before each cir.return.
155 return retInfo.coercedType ? retInfo.coercedType : origRetTy;
156 case ArgKind::Ignore:
157 return cir::VoidType::get(ctx);
158 case ArgKind::Expand:
159 emitError() << "Expand return is not allowed (classic codegen rejects "
160 << "it in EmitFunctionEpilog)";
161 return nullptr;
162 case ArgKind::Extend:
163 // Same convention as Extend args: keep the original return type in the
164 // signature; the sign/zero extension is communicated via the
165 // llvm.signext / llvm.zeroext res attribute attached separately below.
166 return origRetTy;
167 case ArgKind::Indirect:
168 // sret: the value is returned through a pointer argument that the ABI
169 // synthesizes (rewriteFunctionDefinition prepends it to the argument
170 // list); it is not part of the source-level signature, so the wire
171 // return type becomes void.
172 return cir::VoidType::get(ctx);
173 }
174 llvm_unreachable("all ArgKind cases handled");
175}
176
177/// Create a typed poison constant to stand in for a value the body of a
178/// function (or the result of a call) still references but whose ABI
179/// classification is Ignore. Using poison is honest -- the value is
180/// genuinely unused at the ABI boundary -- and avoids a fake alloca+load
181/// pattern that would suggest we have a value when we don't.
182mlir::Value createIgnoredValue(mlir::OpBuilder &builder, mlir::Location loc,
183 mlir::Type ty) {
184 return cir::ConstantOp::create(builder, loc, ty, cir::PoisonAttr::get(ty));
185}
186
187/// Build an updated arg_attrs ArrayAttr that drops Ignore'd args, adds
188/// llvm.signext / llvm.zeroext on Extend args, and adds llvm.byval /
189/// llvm.align on Indirect args. Preserves any existing arg attributes on
190/// retained arg slots. \p origArgTypes provides the pre-rewrite type for
191/// each arg slot (needed to compute the llvm.byval pointee type).
192mlir::ArrayAttr updateArgAttrs(mlir::MLIRContext *ctx,
193 ArrayRef<mlir::Type> origArgTypes,
194 mlir::ArrayAttr existingArgAttrs,
195 const FunctionClassification &fc) {
196 mlir::Builder builder(ctx);
198 newArgAttrs.reserve(fc.argInfos.size());
199 for (auto [oldIdx, ac] : llvm::enumerate(fc.argInfos)) {
200 if (ac.kind == ArgKind::Ignore)
201 continue;
202 mlir::DictionaryAttr existing = builder.getDictionaryAttr({});
203 if (existingArgAttrs && oldIdx < existingArgAttrs.size())
204 existing = mlir::cast<mlir::DictionaryAttr>(existingArgAttrs[oldIdx]);
205 if (cir::RecordType flatTy = getFlattenedCoercedType(ac)) {
206 // Direct + canFlatten: one empty attribute dict per flattened field; the
207 // flattened scalar arguments carry no special ABI attributes.
208 newArgAttrs.append(flatTy.getNumElements(),
209 builder.getDictionaryAttr({}));
210 } else if (ac.kind == ArgKind::Expand) {
211 // Push one empty attribute dict per expanded field; the flattened
212 // scalar arguments carry no special ABI attributes.
213 auto recTy = cast<cir::RecordType>(origArgTypes[oldIdx]);
214 newArgAttrs.append(recTy.getNumElements(), builder.getDictionaryAttr({}));
215 } else if (ac.kind == ArgKind::Extend) {
216 StringRef attrName = ac.signExtend ? "llvm.signext" : "llvm.zeroext";
217 SmallVector<mlir::NamedAttribute> attrs(existing.begin(), existing.end());
218 attrs.push_back(builder.getNamedAttr(attrName, builder.getUnitAttr()));
219 newArgAttrs.push_back(builder.getDictionaryAttr(attrs));
220 } else if (ac.kind == ArgKind::Indirect) {
221 // byval: caller-allocated copy; callee receives pointer to copy.
222 // byref: callee receives pointer to the caller's original storage.
223 // Both use llvm.align(A). The ownership flag differs: llvm.byval(T)
224 // vs llvm.byref(T). Both are typed attributes carrying the pointee
225 // type T (the pre-rewrite arg type); T is recorded explicitly because
226 // it cannot be recovered from the opaque LLVM pointer after lowering.
227 //
228 // For byval, two additional attributes match classic CodeGen:
229 // llvm.noundef -- the copy is always fully defined (the caller's
230 // original must be defined or UB has already occurred, and the
231 // copy inherits that property).
232 // llvm.noalias -- the copy is a fresh caller-allocated alloca that
233 // no other pointer in the function can alias. Classic CodeGen
234 // emits this when -fpass-by-value-is-noalias is set; here we
235 // emit it unconditionally because our call-site rewrite always
236 // produces a fresh alloca+store.
237 mlir::Type pointeeTy = origArgTypes[oldIdx];
238 StringRef ownershipAttr = ac.byVal ? "llvm.byval" : "llvm.byref";
239 SmallVector<mlir::NamedAttribute> attrs(existing.begin(), existing.end());
240 attrs.push_back(builder.getNamedAttr(
241 "llvm.align", builder.getI64IntegerAttr(ac.indirectAlign.value())));
242 attrs.push_back(
243 builder.getNamedAttr(ownershipAttr, mlir::TypeAttr::get(pointeeTy)));
244 if (ac.byVal) {
245 attrs.push_back(
246 builder.getNamedAttr("llvm.noalias", builder.getUnitAttr()));
247 attrs.push_back(
248 builder.getNamedAttr("llvm.noundef", builder.getUnitAttr()));
249 }
250 newArgAttrs.push_back(builder.getDictionaryAttr(attrs));
251 } else {
252 newArgAttrs.push_back(existing);
253 }
254 }
255 return builder.getArrayAttr(newArgAttrs);
256}
257
258/// Build an updated res_attrs ArrayAttr (single entry, since CIR funcs have
259/// at most one result) that adds llvm.signext / llvm.zeroext on an Extend
260/// return. Preserves any existing res attributes.
261mlir::ArrayAttr updateResAttrs(mlir::MLIRContext *ctx,
262 mlir::ArrayAttr existingResAttrs,
263 const ArgClassification &retInfo) {
264 if (retInfo.kind != ArgKind::Extend)
265 return existingResAttrs;
266
268 if (existingResAttrs && !existingResAttrs.empty())
269 for (mlir::NamedAttribute na :
270 mlir::cast<mlir::DictionaryAttr>(existingResAttrs[0]))
271 attrs.push_back(na);
272 StringRef attrName = retInfo.signExtend ? "llvm.signext" : "llvm.zeroext";
273 attrs.push_back(mlir::NamedAttribute(mlir::StringAttr::get(ctx, attrName),
274 mlir::UnitAttr::get(ctx)));
275 return mlir::ArrayAttr::get(ctx, {mlir::DictionaryAttr::get(ctx, attrs)});
276}
277
278/// Coerce \p src into a temporary memory slot typed for \p dstTy at the
279/// current builder insertion point, and return the destination-typed pointer
280/// to that slot without loading the value back out. This is the shared
281/// memory half of emitCoercion: callers that want the whole coerced value use
282/// emitCoercion (below); callers that want to read individual members of a
283/// coerced struct (the call-site struct flattening) take the returned pointer
284/// and emit their own cir.get_member + cir.load per field. Lowers uniformly
285/// for scalar, vector, and record types.
286///
287/// The slot is sized to the larger of the two types so that neither the store
288/// nor a later load ever runs past it: the coerced ABI type can be larger
289/// than the original (e.g. a 12-byte aggregate passed as `{i64, i64}`), so
290/// accessing the destination through a source-sized slot would over-read.
291/// Alignment is max(srcAlign, dstAlign) to satisfy both accesses. The slot
292/// is written through a source-typed view and returned as a destination-typed
293/// view.
294///
295/// The temporary alloca is placed at the start of the enclosing function's
296/// entry block so that it composes correctly with the HoistAllocas pass
297/// regardless of pipeline ordering.
298///
299/// Any operations the helper creates are appended to \p createdOps so the
300/// caller can pass them to replaceAllUsesExcept and avoid clobbering the
301/// store's value operand when later rewiring the source value.
302mlir::Value
303emitCoercionToMemory(mlir::OpBuilder &builder, mlir::Location loc,
304 mlir::Type dstTy, mlir::Value src,
305 mlir::FunctionOpInterface funcOp,
306 const mlir::DataLayout &dl,
307 SmallPtrSetImpl<mlir::Operation *> &createdOps) {
308 mlir::Type srcTy = src.getType();
309 assert(srcTy != dstTy &&
310 "emitCoercion callers must pre-check that the types differ");
311
312 uint64_t srcAlign = dl.getTypeABIAlignment(srcTy);
313 uint64_t dstAlign = dl.getTypeABIAlignment(dstTy);
314 uint64_t allocaAlign = std::max(srcAlign, dstAlign);
315 mlir::Type slotTy =
316 dl.getTypeSize(srcTy) >= dl.getTypeSize(dstTy) ? srcTy : dstTy;
317
318 auto slotPtrTy = cir::PointerType::get(slotTy);
319 auto srcPtrTy = cir::PointerType::get(srcTy);
320 auto dstPtrTy = cir::PointerType::get(dstTy);
321
322 cir::AllocaOp alloca;
323 {
324 mlir::OpBuilder::InsertionGuard guard(builder);
325 mlir::Block &entry = funcOp->getRegion(0).front();
326 builder.setInsertionPointToStart(&entry);
327 alloca = cir::AllocaOp::create(builder, loc, slotPtrTy,
328 builder.getStringAttr("coerce"),
329 builder.getI64IntegerAttr(allocaAlign));
330 }
331 createdOps.insert(alloca);
332
333 // Store through a source-typed view of the slot.
334 mlir::Value srcSlot = alloca;
335 if (slotTy != srcTy) {
336 auto srcCast = cir::CastOp::create(builder, loc, srcPtrTy,
337 cir::CastKind::bitcast, alloca);
338 createdOps.insert(srcCast);
339 srcSlot = srcCast;
340 }
341 auto store = cir::StoreOp::create(builder, loc, src, srcSlot);
342 createdOps.insert(store);
343
344 // Return a destination-typed view of the slot.
345 if (slotTy != dstTy) {
346 auto dstCast = cir::CastOp::create(builder, loc, dstPtrTy,
347 cir::CastKind::bitcast, alloca);
348 createdOps.insert(dstCast);
349 return dstCast;
350 }
351 return alloca;
352}
353
354/// Coerce \p src to type \p dstTy by going through memory and load the whole
355/// coerced value back out. Builds on emitCoercionToMemory, adding the final
356/// load of the destination-typed view.
357mlir::Value emitCoercion(mlir::OpBuilder &builder, mlir::Location loc,
358 mlir::Type dstTy, mlir::Value src,
359 mlir::FunctionOpInterface funcOp,
360 const mlir::DataLayout &dl,
361 SmallPtrSetImpl<mlir::Operation *> &createdOps) {
362 mlir::Value dstSlot =
363 emitCoercionToMemory(builder, loc, dstTy, src, funcOp, dl, createdOps);
364 auto load = cir::LoadOp::create(builder, loc, dstSlot);
365 createdOps.insert(load);
366 return load;
367}
368
369/// Convenience overload for callers that don't need the createdOps set
370/// (e.g. call-site coercion where we don't replaceAllUsesExcept).
371mlir::Value emitCoercion(mlir::OpBuilder &builder, mlir::Location loc,
372 mlir::Type dstTy, mlir::Value src,
373 mlir::FunctionOpInterface funcOp,
374 const mlir::DataLayout &dl) {
375 SmallPtrSet<mlir::Operation *, 4> ignored;
376 return emitCoercion(builder, loc, dstTy, src, funcOp, dl, ignored);
377}
378
379/// Insert coercion before each cir.return so the returned value matches the
380/// new (coerced) return type.
381void insertReturnCoercion(mlir::FunctionOpInterface funcOp,
382 mlir::Type origRetTy, mlir::Type coercedRetTy,
383 mlir::OpBuilder &builder,
384 const mlir::DataLayout &dl) {
386 funcOp.walk([&](cir::ReturnOp r) { returns.push_back(r); });
387 for (cir::ReturnOp r : returns) {
388 if (r.getInput().empty())
389 continue;
390 mlir::Value origVal = r.getInput()[0];
391 if (origVal.getType() == coercedRetTy)
392 continue;
393 builder.setInsertionPoint(r);
394 mlir::Value coerced =
395 emitCoercion(builder, r.getLoc(), coercedRetTy, origVal, funcOp, dl);
396 r->setOperand(0, coerced);
397 }
398}
399
400/// Decompose a struct value into one scalar call argument per field of \p
401/// recTy, appending the field values to \p newArgs. When \p structVal is a
402/// plain (non-volatile, non-atomic) load straight from an alloca, read each
403/// field with cir.get_member + cir.load from that alloca, emitted at the
404/// original load's position so they observe the same memory state, and record
405/// the now-dead whole-struct load in \p replacedWholeLoads for later erasure.
406/// Otherwise (a call result, compound literal, or qualified load) extract each
407/// field from the value with cir.extract_member. Loading the members from the
408/// alloca rather than extracting from a whole-struct value keeps the result in
409/// a form SROA can promote (it does not reason about extractvalue). Shared by
410/// the Expand and Direct+canFlatten argument paths.
411static void
412emitStructFieldArgs(mlir::OpBuilder &builder, mlir::Location loc,
413 mlir::Value structVal, cir::RecordType recTy,
414 SmallVectorImpl<mlir::Value> &newArgs,
415 SmallVectorImpl<cir::LoadOp> &replacedWholeLoads) {
416 cir::LoadOp wholeLoad = structVal.getDefiningOp<cir::LoadOp>();
417 cir::AllocaOp srcAlloca;
418 if (wholeLoad && !wholeLoad.getIsVolatile() && !wholeLoad.getMemOrder())
419 srcAlloca = wholeLoad.getAddr().getDefiningOp<cir::AllocaOp>();
420
421 if (srcAlloca) {
422 mlir::OpBuilder::InsertionGuard guard(builder);
423 builder.setInsertionPoint(wholeLoad);
424 for (auto [f, fieldTy] : llvm::enumerate(recTy.getMembers())) {
425 mlir::Type fieldPtrTy = cir::PointerType::get(fieldTy);
426 mlir::Value fieldPtr = cir::GetMemberOp::create(
427 builder, loc, fieldPtrTy, srcAlloca, /*name=*/"", /*index=*/f);
428 newArgs.push_back(cir::LoadOp::create(builder, loc, fieldPtr));
429 }
430 replacedWholeLoads.push_back(wholeLoad);
431 } else {
432 for (unsigned f = 0; f < recTy.getNumElements(); ++f)
433 newArgs.push_back(
434 cir::ExtractMemberOp::create(builder, loc, structVal, f));
435 }
436}
437
438/// For each Direct arg with a coerced type, change the block argument's type
439/// to the coerced type and insert a coercion at function entry that maps it
440/// back to the original type for body uses. For each Indirect (byval/byref)
441/// arg, change the block argument's type to a pointer and insert a load at
442/// entry so the body sees the original value type. For each Expand arg,
443/// replace the single struct block argument with N scalar block arguments (one
444/// per field) and store each field directly into the parameter's own alloca
445/// (the CIRGen spill slot), erasing the original whole-struct store.
446///
447/// \p hasSRetArg is true when the function has an sret return (a hidden return
448/// pointer is prepended as block argument 0). Expand arguments expand the
449/// block argument count, so a running index tracks the current block argument
450/// position rather than computing the classification index + \p hasSRetArg
451/// directly.
452void insertArgCoercion(mlir::FunctionOpInterface funcOp,
453 const FunctionClassification &fc,
454 mlir::OpBuilder &builder, const mlir::DataLayout &dl,
455 bool hasSRetArg) {
456 mlir::Region &body = funcOp->getRegion(0);
457 if (body.empty())
458 return;
459 mlir::Block &entry = body.front();
460
461 // Running block argument index. Each non-Expand classification occupies
462 // one block argument slot; each Expand classification occupies N slots
463 // (one per struct field), so the running index must be incremented by N
464 // rather than 1 after processing an Expand arg.
465 unsigned blockArgIdx = hasSRetArg ? 1 : 0;
466
467 for (const ArgClassification &ac : fc.argInfos) {
468 assert(blockArgIdx < entry.getNumArguments() &&
469 "classification count must not exceed entry block arguments");
470
471 if (ac.kind == ArgKind::Expand) {
472 // The block arg at blockArgIdx currently has the original struct type.
473 // Replace it with N scalar args (one per field) and store each field
474 // directly into the parameter's own alloca.
475 mlir::BlockArgument origArg = entry.getArgument(blockArgIdx);
476 auto recTy = cast<cir::RecordType>(origArg.getType());
477 assert(recTy.isStruct() &&
478 "Expand classification requires a struct type, not a union");
479 unsigned numFields = recTy.getNumElements();
480 assert(numFields > 0 &&
481 "Expand classification requires at least one struct field");
482 mlir::Location loc = funcOp.getLoc();
483
484 // CIRGen spills every by-value struct parameter into its local alloca
485 // with a single store before any other use, so the struct block arg's
486 // only use is that spill. Capture it and the destination alloca so the
487 // expanded fields can be stored straight into that alloca, preserving
488 // the alloca's variable name and `init` flag and avoiding a
489 // reassemble-then-reload roundtrip. DCE may have run earlier and
490 // removed the spill (leaving the block arg unused); tolerate that by
491 // only flattening the signature and emitting no field stores.
492 cir::StoreOp paramStore;
493 cir::AllocaOp destAlloca;
494 if (!origArg.use_empty()) {
495 assert(origArg.hasOneUse() &&
496 "Expand arg must have exactly one use (the CIRGen param spill)");
497 paramStore = cast<cir::StoreOp>(*origArg.user_begin());
498 assert(paramStore.getValue() == origArg &&
499 "Expand arg's use must be the value operand of its store");
500 destAlloca = cast<cir::AllocaOp>(paramStore.getAddr().getDefiningOp());
501 }
502
503 // Erase the original whole-struct spill before retyping the block
504 // argument, so the store is never left feeding a type-mismatched value.
505 // The field stores take its place, just before the following operation
506 // (the spill always precedes the entry block's terminator).
507 mlir::Operation *fieldStoreInsertPt = nullptr;
508 if (paramStore) {
509 fieldStoreInsertPt = paramStore->getNextNode();
510 assert(fieldStoreInsertPt &&
511 "param spill must be followed by a block terminator");
512 paramStore->erase();
513 }
514
515 // Split the single struct block arg into N scalar field block args (slot
516 // 0 reuses the original; slots 1..N-1 are inserted after it). The
517 // reshape needs no insertion point. The field stores are gated on the
518 // same destAlloca condition: when the spill survived we set the insert
519 // point to its old slot (which sits after the CIRGen allocas) and store
520 // each field there; when DCE removed the spill the parameter is dead, so
521 // we only reshape the signature and emit no stores.
522 if (destAlloca)
523 builder.setInsertionPoint(fieldStoreInsertPt);
524 for (auto [f, fieldTy] : llvm::enumerate(recTy.getMembers())) {
525 if (f == 0)
526 origArg.setType(fieldTy);
527 else
528 entry.insertArgument(blockArgIdx + f, fieldTy, loc);
529 if (!destAlloca)
530 continue;
531 mlir::Type fieldPtrTy = cir::PointerType::get(fieldTy);
532 auto fieldPtr = cir::GetMemberOp::create(builder, loc, fieldPtrTy,
533 destAlloca, /*name=*/"",
534 /*index=*/f);
535 cir::StoreOp::create(builder, loc, entry.getArgument(blockArgIdx + f),
536 fieldPtr);
537 }
538
539 blockArgIdx += numFields;
540 continue;
541 }
542
543 mlir::BlockArgument blockArg = entry.getArgument(blockArgIdx);
544
545 if (cir::RecordType flatTy = getFlattenedCoercedType(ac)) {
546 // Direct + canFlatten: the coerced type is a struct whose fields become
547 // individual wire arguments. The reconstruction mirrors the Expand path
548 // — replace the single block arg with N scalar block args, store them
549 // into an alloca of the coerced struct type, reload — but then applies
550 // an additional coercion from the coerced struct type to the original
551 // argument type if the two differ in layout.
552 unsigned numFields = flatTy.getNumElements();
553 assert(numFields >= 2 && "getFlattenedCoercedType guarantees >1 fields");
554 Type origTy = blockArg.getType();
555 Location loc = funcOp.getLoc();
556
557 // Change slot 0 to field 0's type; insert slots 1..N-1 after it.
558 blockArg.setType(flatTy.getElementType(0));
559 for (unsigned f = 1; f < numFields; ++f)
560 entry.insertArgument(blockArgIdx + f, flatTy.getElementType(f), loc);
561
562 // setInsertionPointToStart: see comment in the Expand arm above.
563 builder.setInsertionPointToStart(&entry);
564 auto flatPtrTy = cir::PointerType::get(flatTy);
565 uint64_t flatAlign = dl.getTypeABIAlignment(flatTy);
566 auto flatSlot = cir::AllocaOp::create(
567 builder, loc, flatPtrTy, builder.getStringAttr("coerce"),
568 builder.getI64IntegerAttr(flatAlign));
569 SmallPtrSet<Operation *, 8> flattenOps = {flatSlot};
570 for (auto [f, fieldTy] : llvm::enumerate(flatTy.getMembers())) {
571 Type fieldPtrTy = cir::PointerType::get(fieldTy);
572 auto fieldPtr = cir::GetMemberOp::create(builder, loc, fieldPtrTy,
573 flatSlot, /*name=*/"",
574 /*index=*/f);
575 flattenOps.insert(fieldPtr);
576 auto storeOp = cir::StoreOp::create(
577 builder, loc, entry.getArgument(blockArgIdx + f), fieldPtr);
578 flattenOps.insert(storeOp);
579 }
580 auto flatLoaded =
581 cir::LoadOp::create(builder, loc, flatTy, flatSlot.getResult());
582 flattenOps.insert(flatLoaded);
583
584 // If the coerced struct type differs from the original argument type,
585 // insert a memory round-trip to recover the original type for body uses.
586 Value finalVal = flatLoaded;
587 if (origTy != flatTy) {
588 SmallPtrSet<Operation *, 4> coercionOps;
589 finalVal = emitCoercion(builder, loc, origTy, flatLoaded, funcOp, dl,
590 coercionOps);
591 flattenOps.insert(coercionOps.begin(), coercionOps.end());
592 }
593
594 // Replace all original body uses of the struct block arg (now field 0)
595 // with the recovered original-type value.
596 blockArg.replaceAllUsesExcept(finalVal, flattenOps);
597
598 blockArgIdx += numFields;
599 continue;
600 }
601
602 if (ac.kind == ArgKind::Direct && ac.coercedType) {
603 mlir::Type oldArgTy = blockArg.getType();
604 mlir::Type newArgTy = ac.coercedType;
605 if (oldArgTy == newArgTy) {
606 ++blockArgIdx;
607 continue;
608 }
609 blockArg.setType(newArgTy);
610
611 builder.setInsertionPointToStart(&entry);
612 SmallPtrSet<mlir::Operation *, 4> coercionOps;
613 mlir::Value adapted = emitCoercion(builder, funcOp.getLoc(), oldArgTy,
614 blockArg, funcOp, dl, coercionOps);
615
616 // Replace blockArg uses with the adapted value, except inside the
617 // helper ops we just created. This is critical: the StoreOp's value
618 // operand is blockArg, and if we naively replaceAllUses it gets swapped
619 // to adapted (now of the original type != the alloca's pointee type).
620 blockArg.replaceAllUsesExcept(adapted, coercionOps);
621 } else if (ac.kind == ArgKind::Indirect) {
622 // byval and byref: the wire type is !cir.ptr<T>. Change the block arg
623 // to the pointer type and insert a load so the body sees the original
624 // T. The body transformation is the same for both; the distinction
625 // between byval (llvm.byval) and byref (llvm.byref) is in the arg
626 // attributes applied by updateArgAttrs.
627 mlir::Type origTy = blockArg.getType();
628 auto ptrTy = cir::PointerType::get(origTy);
629 blockArg.setType(ptrTy);
630
631 builder.setInsertionPointToStart(&entry);
632 auto loadOp = cir::LoadOp::create(builder, funcOp.getLoc(), blockArg);
633 SmallPtrSet<mlir::Operation *, 1> loadOps = {loadOp};
634 blockArg.replaceAllUsesExcept(loadOp.getResult(), loadOps);
635 }
636 // Ignore, Extend, and Direct-without-coerce need no block-level changes.
637
638 ++blockArgIdx;
639 }
640}
641
642/// Rewrite each cir.return so the return value flows through the sret
643/// pointer (the prepended first block argument) and the function returns
644/// void.
645///
646/// CIRGen emits a local `__retval` alloca and emits `cir.return %loaded`
647/// where `%loaded = cir.load __retval`. The naive lowering -- store the
648/// loaded SSA value through the sret pointer -- byte-copies the record,
649/// which is wrong for non-trivially-copyable types: e.g. libstdc++'s SSO
650/// `std::string` has a `_M_p` pointer that aliases the source's internal
651/// `_M_local_buf`, so a byte-copy leaves the destination pointing at the
652/// source's (now-dying) stack storage and the destination's destructor
653/// later `free()`s a stack pointer.
654///
655/// Instead, route construction directly into the sret slot: find the
656/// `__retval` alloca, replace its uses with the sret pointer, and drop the
657/// trailing `cir.load __retval` so the rewritten return has no operand.
658/// The CIRGen-emitted constructor / store-into-`__retval` then targets the
659/// sret slot uniformly, matching classic CodeGen's "construct directly into
660/// `%agg.result`" pattern.
661///
662/// CIRGen emits one `%v = cir.load %__retval` / `cir.return %v` pair per
663/// return statement, and every such load reads the single `__retval`
664/// alloca (CIR does not merge returns into a shared epilogue block). The
665/// alloca is therefore rewired to the sret pointer once; each cir.return is
666/// then collapsed to a bare return and its now-dead load erased. This
667/// `cir.return (cir.load <alloca>)` shape is an invariant guaranteed by
668/// CIRGen, so it is asserted via `cast<>` rather than guarded with a
669/// fallback.
670void insertSRetStores(mlir::FunctionOpInterface funcOp, mlir::Type origRetTy,
671 mlir::OpBuilder &builder) {
672 mlir::Value sretPtr = funcOp.getArguments()[0];
673
675 funcOp->walk([&](cir::ReturnOp retOp) { returnOps.push_back(retOp); });
676
677 cir::AllocaOp retAlloca = nullptr;
678 for (cir::ReturnOp retOp : returnOps) {
679 // Every cir.return in an sret function must carry the loaded return
680 // value -- a bare return would mean the sret slot was never written.
681 assert(!retOp.getInput().empty() &&
682 "cir.return in sret function must have an operand");
683
684 cir::LoadOp retLoad =
685 mlir::cast<cir::LoadOp>(retOp.getInput()[0].getDefiningOp());
686
687 // Rewire the shared `__retval` alloca to the sret pointer once.
688 // replaceAllUsesWith updates every load of the alloca (including those
689 // feeding the other cir.return ops) to read from sretPtr instead, so
690 // all returns are covered by this single rewiring. Only then is the
691 // now-unused alloca safe to erase.
692 if (!retAlloca) {
693 retAlloca = mlir::cast<cir::AllocaOp>(retLoad.getAddr().getDefiningOp());
694 retAlloca.getResult().replaceAllUsesWith(sretPtr);
695 retAlloca->erase();
696 }
697
698 // The sret slot now holds the return value directly; replace the
699 // value-carrying return with a void return (no operand).
700 builder.setInsertionPoint(retOp);
701 cir::ReturnOp::create(builder, retOp.getLoc());
702 retOp->erase();
703 if (retLoad.use_empty())
704 retLoad->erase();
705 }
706}
707
708/// Build the attribute dictionary for the sret slot (slot 0 of an
709/// sret-returning function or call). Matches classic CodeGen's
710/// `sret(T) align A [noalias] writable dead_on_unwind`. noalias is only
711/// valid on the callee's parameter, not at the call site, so it is gated by
712/// \p withNoalias. Key order is irrelevant: DictionaryAttr sorts by name.
713SmallVector<mlir::NamedAttribute> buildSretSlotAttrs(mlir::OpBuilder &builder,
714 mlir::Type retTy,
715 uint64_t align,
716 bool withNoalias) {
718 // The sret type must be carried explicitly: LLVM's sret attribute requires
719 // it, and once the CIR `!cir.ptr<retTy>` lowers to an opaque LLVM `ptr` the
720 // pointee type can no longer be recovered from the pointer.
721 attrs.push_back(
722 builder.getNamedAttr("llvm.sret", mlir::TypeAttr::get(retTy)));
723 attrs.push_back(
724 builder.getNamedAttr("llvm.align", builder.getI64IntegerAttr(align)));
725 if (withNoalias)
726 attrs.push_back(
727 builder.getNamedAttr("llvm.noalias", builder.getUnitAttr()));
728 attrs.push_back(builder.getNamedAttr("llvm.writable", builder.getUnitAttr()));
729 attrs.push_back(
730 builder.getNamedAttr("llvm.dead_on_unwind", builder.getUnitAttr()));
731 return attrs;
732}
733
734/// Prepend the sret slot's attrs at position 0 of newCall's arg_attrs.
735/// Called after the call has been rewritten with the sret pointer at
736/// operand 0, so the operand count now includes the sret slot. \p argAttrs
737/// must already be shaped for the rewritten argument list (Extend slots
738/// carry signext/zeroext, Ignore slots dropped); it is shifted to slots
739/// 1..N behind the sret slot.
740void applySretSlotAttrs(cir::CallOp newCall, mlir::ArrayAttr argAttrs,
741 mlir::Type retTy, uint64_t align,
742 mlir::OpBuilder &builder) {
743 mlir::MLIRContext *ctx = newCall->getContext();
745 buildSretSlotAttrs(builder, retTy, align, /*withNoalias=*/false);
746
748 newArgAttrs.reserve(newCall.getArgOperands().size());
749 newArgAttrs.push_back(mlir::DictionaryAttr::get(ctx, sretAttrs));
750 if (argAttrs)
751 llvm::append_range(newArgAttrs, argAttrs);
752 assert(newArgAttrs.size() <= newCall.getArgOperands().size() &&
753 "arg_attrs wider than the rewritten call's operand list");
754 newArgAttrs.resize(newCall.getArgOperands().size(),
755 mlir::DictionaryAttr::get(ctx));
756 newCall->setAttr("arg_attrs", mlir::ArrayAttr::get(ctx, newArgAttrs));
757}
758
759/// Rewrite an indirect-return (sret) call site: prepend a return-slot
760/// pointer as operand 0, make the call return void, and either reuse a
761/// dominating single-use store destination as the slot (so construction
762/// flows directly into it) or allocate a fresh slot and load the result
763/// back out. \p newArgs is the already-shaped (Ignore-dropped,
764/// coercion-applied) non-sret argument list. The caller guarantees the
765/// call has a result and an indirect-return classification.
766void rewriteIndirectReturnCall(cir::CallOp call,
767 const FunctionClassification &fc,
768 ArrayRef<mlir::Value> newArgs,
769 mlir::Type origRetTy,
770 ArrayRef<mlir::Type> origCallArgTypes,
771 mlir::OpBuilder &builder) {
772 mlir::MLIRContext *ctx = call->getContext();
773 auto ptrTy = cir::PointerType::get(origRetTy);
774 builder.setInsertionPoint(call);
775 uint64_t sretAlign = fc.returnInfo.indirectAlign.value();
776
777 // CIRGen emits `cir.store %callResult, %dest` when the call's result is
778 // bound to a local (e.g. `T s = make();`). Allocating a fresh sret slot
779 // and copying into %dest would byte-copy the record, which is wrong for
780 // non-trivially-copyable types (the libstdc++ SSO `_M_p` pointer
781 // survives a byte-copy but ends up pointing at the dying temp's local
782 // buffer, so the destination's destructor later `free()`s a stack
783 // pointer). When the result has a single store-into-%dest use, use
784 // %dest as the sret slot directly so construction flows into it,
785 // matching classic CodeGen's "pass %s as sret" pattern. %dest must
786 // dominate the call so the rewritten call (which takes it as operand 0)
787 // does not use a value before its definition.
788 mlir::Value sretSlot = nullptr;
789 cir::StoreOp reuseStore = nullptr;
790 if (call.getResult().hasOneUse()) {
791 mlir::Operation *user = *call.getResult().getUsers().begin();
792 if (auto store = mlir::dyn_cast<cir::StoreOp>(user))
793 if (store.getValue() == call.getResult() &&
794 store.getAddr().getType() == ptrTy &&
795 mlir::DominanceInfo().properlyDominates(store.getAddr(), call)) {
796 sretSlot = store.getAddr();
797 reuseStore = store;
798 }
799 }
800 if (!sretSlot) {
801 auto alloca = cir::AllocaOp::create(
802 builder, call.getLoc(), ptrTy,
803 /*name=*/builder.getStringAttr("sret"),
804 /*alignment=*/builder.getI64IntegerAttr(sretAlign));
805 sretSlot = alloca;
806 }
807
809 sretArgs.push_back(sretSlot);
810 sretArgs.append(newArgs.begin(), newArgs.end());
811
812 mlir::Type sretVoidTy = cir::VoidType::get(ctx);
813 auto newCall = cir::CallOp::create(
814 builder, call.getLoc(), call.getCalleeAttr(), sretVoidTy, sretArgs);
815 for (mlir::NamedAttribute attr : call->getAttrs())
816 if (!newCall->hasAttr(attr.getName()))
817 newCall->setAttr(attr.getName(), attr.getValue());
818
819 // Shape the per-argument attrs exactly as the non-sret path does
820 // (signext / zeroext for Extend, drop Ignore slots, byval / align for
821 // Indirect, flatten for Expand and Direct+canFlatten) before prepending the
822 // sret slot, so sret composes correctly with Extend / Ignore / Indirect /
823 // Expand / Direct+canFlatten args.
824 mlir::ArrayAttr argAttrs = call->getAttrOfType<mlir::ArrayAttr>("arg_attrs");
825 bool needsArgAttrUpdate =
826 llvm::any_of(fc.argInfos, [](const ArgClassification &ac) {
827 return ac.kind == ArgKind::Ignore || ac.kind == ArgKind::Extend ||
828 ac.kind == ArgKind::Indirect || ac.kind == ArgKind::Expand ||
829 getFlattenedCoercedType(ac);
830 });
831 if (needsArgAttrUpdate)
832 argAttrs = updateArgAttrs(ctx, origCallArgTypes, argAttrs, fc);
833 applySretSlotAttrs(newCall, argAttrs, origRetTy, sretAlign, builder);
834
835 if (reuseStore) {
836 // The callee now constructs directly into the destination slot, so the
837 // original store-from-result is redundant; dropping it avoids a
838 // byte-copy of the record.
839 reuseStore->erase();
840 } else {
841 builder.setInsertionPointAfter(newCall);
842 auto load = cir::LoadOp::create(builder, call.getLoc(), origRetTy, sretSlot,
843 /*isDeref=*/mlir::UnitAttr(),
844 /*isVolatile=*/mlir::UnitAttr(),
845 /*is_nontemporal=*/mlir::UnitAttr(),
846 /*alignment=*/mlir::IntegerAttr(),
847 /*sync_scope=*/cir::SyncScopeKindAttr(),
848 /*mem_order=*/cir::MemOrderAttr(),
849 /*invariant=*/mlir::UnitAttr());
850 call.getResult().replaceAllUsesWith(load);
851 }
852 call->erase();
853}
854
855} // namespace
856
858 mlir::FunctionOpInterface funcOpInterface, const FunctionClassification &fc,
859 mlir::OpBuilder &builder) {
860 // The pass driver (CallConvLoweringPass) only ever hands us cir.func ops.
861 // Cast once at the top so the rest of the function reads in CIR's own
862 // vocabulary, and so we can dispatch to the CIRGlobalValueInterface for
863 // isDefinition() (FunctionOpInterface alone does not inherit from
864 // CIRGlobalValueInterface).
865 cir::FuncOp funcOp = mlir::cast<cir::FuncOp>(funcOpInterface);
866
867 if (!needsRewrite(fc))
868 return mlir::success();
869
870 ArrayRef<mlir::Type> oldArgTypes = funcOp.getArgumentTypes();
871 ArrayRef<mlir::Type> oldResultTypes = funcOp.getResultTypes();
872 mlir::MLIRContext *ctx = funcOp->getContext();
873
874 // CIR follows LLVM IR's single-result rule: a function returns either
875 // zero or one value. Document the invariant so a future multi-result
876 // change forces us to revisit the return-handling below.
877 assert(oldResultTypes.size() <= 1 &&
878 "CIR functions return zero or one value");
879
880 SmallVector<mlir::Type> newArgTypes;
881 if (mlir::failed(buildNewArgTypes(oldArgTypes, fc, newArgTypes,
882 [&]() { return funcOp.emitOpError(); })))
883 return mlir::failure();
884
885 mlir::Type voidTy = cir::VoidType::get(ctx);
886 mlir::Type origRetTy = oldResultTypes.empty() ? voidTy : oldResultTypes[0];
887 mlir::Type newRetTy = computeNewReturnType(
888 origRetTy, fc.returnInfo, ctx, [&]() { return funcOp.emitOpError(); });
889 if (!newRetTy)
890 return mlir::failure();
891 SmallVector<mlir::Type> newResultTypes = {newRetTy};
892
893 // sret return: the value is returned through a pointer the ABI inserts as
894 // argument 0. This pointer is not part of the function's source-level
895 // signature -- it is synthesized here -- and the wire return type was
896 // already set to void by computeNewReturnType. Every classification index
897 // therefore maps to a block argument shifted by one in the body handling
898 // below.
899 bool hasSRet =
900 fc.returnInfo.kind == ArgKind::Indirect && !oldResultTypes.empty();
901 if (hasSRet)
902 newArgTypes.insert(newArgTypes.begin(), cir::PointerType::get(origRetTy));
903
904 if (funcOp.isDefinition()) {
905 mlir::Region &body = funcOp->getRegion(0);
906 if (!body.empty()) {
907 // Prepend the sret pointer block argument and route every cir.return
908 // through it before any index-based argument handling below (which
909 // then accounts for the +1 offset).
910 if (hasSRet) {
911 body.front().insertArgument(0u, cir::PointerType::get(origRetTy),
912 funcOp.getLoc());
913 insertSRetStores(funcOp, origRetTy, builder);
914 }
915
916 // In-body coercion for Direct-with-coerce / Extend args: change
917 // block-arg types to the coerced types and insert a memory roundtrip
918 // at the top of the entry block that converts each coerced value back
919 // to its original type, then route existing body uses (including
920 // in-body cir.call operands) through the recovered value. Done before
921 // the Ignore-drop below so the entry block argument indices used here
922 // still refer to the original positions.
923 insertArgCoercion(funcOp, fc, builder, dl, hasSRet);
924
925 // Direct return with coerced type: insert a coercion at every
926 // cir.return so the returned value matches the (coerced) return
927 // type in the new function signature set below.
928 if (fc.returnInfo.kind == ArgKind::Direct && fc.returnInfo.coercedType &&
929 !oldResultTypes.empty() && fc.returnInfo.coercedType != origRetTy)
930 insertReturnCoercion(funcOp, origRetTy, fc.returnInfo.coercedType,
931 builder, dl);
932
933 mlir::Block &entry = body.front();
934
935 // Drop each Ignored argument's block argument, replacing any remaining
936 // body uses with a poison constant (an Ignore arg is not passed at the
937 // ABI level, so any use is vacuous; poison says exactly that). Walk
938 // forward with a running block-argument index that mirrors
939 // insertArgCoercion: an Expand arg or a Direct+canFlatten arg occupies N
940 // slots, every other kept kind one. On erase, do not advance the index
941 // -- the next block argument shifts into the vacated slot.
942 unsigned blockArgIdx = hasSRet ? 1 : 0;
943 for (auto [i, ac] : llvm::enumerate(fc.argInfos)) {
944 if (blockArgIdx >= entry.getNumArguments())
945 break;
946 if (ac.kind == ArgKind::Ignore) {
947 mlir::BlockArgument arg = entry.getArgument(blockArgIdx);
948 if (!arg.use_empty()) {
949 builder.setInsertionPointToStart(&entry);
950 mlir::Value poison =
951 createIgnoredValue(builder, funcOp.getLoc(), arg.getType());
952 arg.replaceAllUsesWith(poison);
953 }
954 entry.eraseArgument(blockArgIdx);
955 continue;
956 }
957 if (cir::RecordType flatTy = getFlattenedCoercedType(ac))
958 blockArgIdx += flatTy.getNumElements();
959 else if (ac.kind == ArgKind::Expand)
960 blockArgIdx += cast<cir::RecordType>(oldArgTypes[i]).getNumElements();
961 else
962 ++blockArgIdx;
963 }
964 }
965
966 // When the return is classified Ignore but the original function had
967 // a non-void return type, every cir.return becomes a naked return.
968 // This relies on the invariant that computeNewReturnType has set
969 // newRetTy = void for Ignore above, and that the function type is
970 // updated below to match. Asserting this keeps the dependency
971 // explicit.
972 if (fc.returnInfo.kind == ArgKind::Ignore && !oldResultTypes.empty()) {
973 assert(mlir::isa<cir::VoidType>(newRetTy) &&
974 "Ignore-return path requires the new return type to be void");
976 funcOp.walk([&](cir::ReturnOp r) { returns.push_back(r); });
977 for (cir::ReturnOp r : returns) {
978 if (r.getNumOperands() == 0)
979 continue;
980 builder.setInsertionPoint(r);
981 cir::ReturnOp::create(builder, r.getLoc());
982 r.erase();
983 }
984 }
985 }
986
987 mlir::Type newFnTy = funcOp.cloneTypeWith(newArgTypes, newResultTypes);
988 funcOp.setFunctionTypeAttr(mlir::TypeAttr::get(newFnTy));
989
990 // Rebuild arg_attrs when the function has an sret slot (slot 0 needs the
991 // sret attribute set) or any arg is Ignore (dropped from the output array),
992 // Extend (needs llvm.signext / llvm.zeroext), Indirect (needs
993 // llvm.byval / llvm.align), Expand or Direct+canFlatten (both change the
994 // argument count).
995 bool needsArgAttrUpdate =
996 hasSRet || llvm::any_of(fc.argInfos, [](const ArgClassification &ac) {
997 return ac.kind == ArgKind::Ignore || ac.kind == ArgKind::Extend ||
998 ac.kind == ArgKind::Indirect || ac.kind == ArgKind::Expand ||
999 getFlattenedCoercedType(ac);
1000 });
1001 if (needsArgAttrUpdate) {
1002 auto existing = funcOp->getAttrOfType<mlir::ArrayAttr>("arg_attrs");
1003 mlir::ArrayAttr updated = updateArgAttrs(ctx, oldArgTypes, existing, fc);
1004 if (hasSRet) {
1005 // Prepend the sret slot's attribute dict (slot 0); the per-argument
1006 // dicts shift to slots 1..N. noalias is valid only on the callee's
1007 // parameter, so it is added only for definitions.
1008 SmallVector<mlir::NamedAttribute> sretAttrs = buildSretSlotAttrs(
1009 builder, origRetTy, fc.returnInfo.indirectAlign.value(),
1010 /*withNoalias=*/funcOp.isDefinition());
1012 withSret.push_back(mlir::DictionaryAttr::get(ctx, sretAttrs));
1013 llvm::append_range(withSret, updated);
1014 funcOp->setAttr("arg_attrs", mlir::ArrayAttr::get(ctx, withSret));
1015 } else {
1016 funcOp->setAttr("arg_attrs", updated);
1017 }
1018 }
1019
1020 // Rebuild res_attrs: layer llvm.signext / llvm.zeroext onto an Extend
1021 // return.
1022 if (fc.returnInfo.kind == ArgKind::Extend) {
1023 auto existing = funcOp->getAttrOfType<mlir::ArrayAttr>("res_attrs");
1024 funcOp->setAttr("res_attrs", updateResAttrs(ctx, existing, fc.returnInfo));
1025 }
1026
1027 return mlir::success();
1028}
1029
1030mlir::LogicalResult
1032 const FunctionClassification &fc,
1033 mlir::OpBuilder &builder) {
1034 if (!needsRewrite(fc))
1035 return mlir::success();
1036
1037 if (mlir::isa<cir::TryCallOp>(callOp))
1038 return callOp->emitOpError()
1039 << "TryCallOp not yet implemented in CallConvLowering";
1040
1041 auto call = mlir::cast<cir::CallOp>(callOp);
1042 if (call.isIndirect())
1043 return call.emitOpError()
1044 << "indirect call not yet implemented in CallConvLowering";
1045
1046 mlir::MLIRContext *ctx = callOp->getContext();
1047 auto enclosingFunc = call->getParentOfType<mlir::FunctionOpInterface>();
1048
1049 builder.setInsertionPoint(call);
1050
1052 mlir::ValueRange argOperands = call.getArgOperands();
1053 newArgs.reserve(argOperands.size());
1054
1055 // Whole-struct loads replaced by direct member loads for Expand operands.
1056 // They can only be erased once the original call (their remaining user) is
1057 // gone, so collect them and erase the dead ones at the end.
1058 SmallVector<cir::LoadOp> replacedWholeLoads;
1059
1060 // Capture original arg types before building newArgs (byval slots change
1061 // the wire argument from T to !cir.ptr<T>, so we save the pre-rewrite
1062 // types here for use in updateArgAttrs).
1063 SmallVector<mlir::Type> origCallArgTypes;
1064 llvm::append_range(origCallArgTypes, argOperands.getTypes());
1065 if (argOperands.size() > fc.argInfos.size())
1066 return call.emitOpError()
1067 << "variadic arguments not yet implemented in CallConvLowering";
1068 assert(fc.argInfos.size() == argOperands.size() &&
1069 "call operand count must match classified arg count");
1070 for (auto [idx, ac] : llvm::enumerate(fc.argInfos)) {
1071 if (ac.kind == ArgKind::Ignore)
1072 continue;
1073 mlir::Value arg = argOperands[idx];
1074 if (cir::RecordType flatTy = getFlattenedCoercedType(ac)) {
1075 // Direct + canFlatten: pass one scalar call argument per field of the
1076 // ABI-coerced struct. When the original and coerced types differ in
1077 // layout, coerce through a memory slot and read each field with
1078 // cir.get_member + cir.load from that slot. When the types already
1079 // match, decompose the struct value directly (reading from its source
1080 // alloca when possible).
1081 if (arg.getType() != flatTy) {
1082 SmallPtrSet<mlir::Operation *, 4> coercionOps;
1083 mlir::Value coercedPtr =
1084 emitCoercionToMemory(builder, call.getLoc(), flatTy, arg,
1085 enclosingFunc, dl, coercionOps);
1086 for (auto [f, fieldTy] : llvm::enumerate(flatTy.getMembers())) {
1087 mlir::Type fieldPtrTy = cir::PointerType::get(fieldTy);
1088 auto fieldPtr =
1089 cir::GetMemberOp::create(builder, call.getLoc(), fieldPtrTy,
1090 coercedPtr, /*name=*/"", /*index=*/f);
1091 newArgs.push_back(cir::LoadOp::create(builder, call.getLoc(), fieldTy,
1092 fieldPtr.getResult()));
1093 }
1094 } else {
1095 emitStructFieldArgs(builder, call.getLoc(), arg, flatTy, newArgs,
1096 replacedWholeLoads);
1097 }
1098 } else if (ac.kind == ArgKind::Expand) {
1099 // Decompose the struct value into its constituent scalar fields and
1100 // pass each as a separate argument.
1101 auto recTy = cast<cir::RecordType>(arg.getType());
1102 assert(recTy.isStruct() &&
1103 "Expand classification requires a struct type, not a union");
1104 emitStructFieldArgs(builder, call.getLoc(), arg, recTy, newArgs,
1105 replacedWholeLoads);
1106 } else if (ac.kind == ArgKind::Direct && ac.coercedType &&
1107 arg.getType() != ac.coercedType) {
1108 arg = emitCoercion(builder, call.getLoc(), ac.coercedType, arg,
1109 enclosingFunc, dl);
1110 newArgs.push_back(arg);
1111 } else if (ac.kind == ArgKind::Indirect) {
1112 // byval and byref: allocate a stack slot, copy the value in, and pass
1113 // the pointer. The alloca+store pattern is identical for both; the
1114 // attribute distinction (llvm.byval vs llvm.byref) is applied by
1115 // updateArgAttrs. byref does not receive llvm.noalias or llvm.noundef
1116 // because it does not assert exclusive ownership of the storage.
1117 mlir::Type argTy = arg.getType();
1118 auto ptrTy = cir::PointerType::get(argTy);
1119 uint64_t align = ac.indirectAlign.value();
1120 StringRef slotName = ac.byVal ? "byval" : "byref";
1121 auto slot = cir::AllocaOp::create(builder, call.getLoc(), ptrTy,
1122 builder.getStringAttr(slotName),
1123 builder.getI64IntegerAttr(align));
1124 cir::StoreOp::create(builder, call.getLoc(), arg, slot);
1125 arg = slot;
1126 newArgs.push_back(arg);
1127 } else {
1128 newArgs.push_back(arg);
1129 }
1130 }
1131
1132 bool hasResult = call.getNumResults() > 0;
1133 mlir::Type origRetTy =
1134 hasResult ? call.getResult().getType() : cir::VoidType::get(ctx);
1135
1136 // An indirect (sret) return has a different call shape than the coerce /
1137 // extend / ignore return handling further down (the value is returned
1138 // through a prepended pointer slot, not as a result), so dispatch to a
1139 // dedicated helper for it; everything below handles the by-value returns.
1140 if (fc.returnInfo.kind == ArgKind::Indirect && hasResult) {
1141 rewriteIndirectReturnCall(call, fc, newArgs, origRetTy, origCallArgTypes,
1142 builder);
1143 return mlir::success();
1144 }
1145
1146 mlir::Type callRetTy = origRetTy;
1147 if (fc.returnInfo.kind == ArgKind::Ignore && hasResult)
1148 callRetTy = cir::VoidType::get(ctx);
1149 bool returnNeedsCoercion =
1150 hasResult && fc.returnInfo.kind == ArgKind::Direct &&
1151 fc.returnInfo.coercedType && fc.returnInfo.coercedType != origRetTy;
1152 if (returnNeedsCoercion)
1153 callRetTy = fc.returnInfo.coercedType;
1154
1155 builder.setInsertionPoint(call);
1156 auto newCall = cir::CallOp::create(builder, call.getLoc(),
1157 call.getCalleeAttr(), callRetTy, newArgs);
1158 for (mlir::NamedAttribute attr : call->getAttrs())
1159 if (!newCall->hasAttr(attr.getName()))
1160 newCall->setAttr(attr.getName(), attr.getValue());
1161
1162 // Direct return with coercion: the new call returns the coerced type;
1163 // emit a coercion back to the original type for the call's existing uses.
1164 if (returnNeedsCoercion) {
1165 builder.setInsertionPointAfter(newCall);
1166 mlir::Value coercedBack =
1167 emitCoercion(builder, call.getLoc(), origRetTy, newCall.getResult(),
1168 enclosingFunc, dl);
1169 call.getResult().replaceAllUsesWith(coercedBack);
1170 }
1171
1172 // Layer llvm.signext / llvm.zeroext onto the new call's arg_attrs and
1173 // res_attrs for Extend args/return. Ignore args require a rebuild because
1174 // their slots are dropped; Indirect args need llvm.byval / llvm.align;
1175 // Expand and Direct+canFlatten args change the argument count.
1176 bool needsArgAttrUpdate =
1177 llvm::any_of(fc.argInfos, [](const ArgClassification &ac) {
1178 return ac.kind == ArgKind::Ignore || ac.kind == ArgKind::Extend ||
1179 ac.kind == ArgKind::Indirect || ac.kind == ArgKind::Expand ||
1180 getFlattenedCoercedType(ac);
1181 });
1182 if (needsArgAttrUpdate) {
1183 auto existing = call->getAttrOfType<mlir::ArrayAttr>("arg_attrs");
1184 newCall->setAttr("arg_attrs",
1185 updateArgAttrs(ctx, origCallArgTypes, existing, fc));
1186 }
1187 if (fc.returnInfo.kind == ArgKind::Extend) {
1188 auto existing = call->getAttrOfType<mlir::ArrayAttr>("res_attrs");
1189 newCall->setAttr("res_attrs", updateResAttrs(ctx, existing, fc.returnInfo));
1190 }
1191
1192 if (hasResult && fc.returnInfo.kind == ArgKind::Ignore) {
1193 // The new call returns void, but the original call's result may still
1194 // have uses. Substitute a poison constant of the original type so
1195 // those uses remain well-formed without pretending we have a real
1196 // value at the ABI boundary.
1197 if (!call.getResult().use_empty()) {
1198 builder.setInsertionPointAfter(newCall);
1199 mlir::Value poison =
1200 createIgnoredValue(builder, call.getLoc(), origRetTy);
1201 call.getResult().replaceAllUsesWith(poison);
1202 }
1203 } else if (hasResult && !returnNeedsCoercion) {
1204 // returnNeedsCoercion already wired up the coerced result above.
1205 call.getResult().replaceAllUsesWith(newCall.getResult());
1206 }
1207
1208 call->erase();
1209
1210 // Now that the original call is gone, drop any whole-struct loads whose
1211 // members we read directly from the source alloca, if nothing else uses
1212 // them. A single load can feed several Expand operands (e.g. after CSE
1213 // merges identical loads), so dedupe before erasing to avoid touching a
1214 // freed op twice.
1215 SmallPtrSet<mlir::Operation *, 4> erased;
1216 for (cir::LoadOp wholeLoad : replacedWholeLoads)
1217 if (erased.insert(wholeLoad).second && wholeLoad.use_empty())
1218 wholeLoad->erase();
1219
1220 return mlir::success();
1221}
mlir::LogicalResult rewriteFunctionDefinition(mlir::FunctionOpInterface funcOp, const mlir::abi::FunctionClassification &fc, mlir::OpBuilder &builder) override
mlir::LogicalResult rewriteCallSite(mlir::Operation *callOp, const mlir::abi::FunctionClassification &fc, mlir::OpBuilder &builder) override
C++ view class that accepts both !cir.struct and !cir.union types.
Definition CIRTypes.h:93
llvm::ArrayRef< mlir::Type > getMembers() const
Definition CIRTypes.cpp:492
bool isStruct() const
Definition CIRTypes.cpp:522
size_t getNumElements() const
Definition CIRTypes.h:120
const internal::VariadicAllOfMatcher< Attr > attr