doxygen/CGOpenMPRuntimeGPU_8cpp_source.html

//===---- CGOpenMPRuntimeGPU.cpp - Interface to OpenMP GPU Runtimes ----===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// This provides a generalized class for OpenMP runtime code generation

// specialized by GPU targets NVPTX, AMDGCN and SPIR-V.

//

//===----------------------------------------------------------------------===//


#include "CGOpenMPRuntimeGPU.h"

#include "CGDebugInfo.h"

#include "CodeGenFunction.h"

#include "clang/AST/Attr.h"

#include "clang/AST/DeclOpenMP.h"

#include "clang/AST/OpenMPClause.h"

#include "clang/AST/StmtOpenMP.h"

#include "clang/AST/StmtVisitor.h"

#include "clang/Basic/Cuda.h"

#include "llvm/ADT/SmallPtrSet.h"

#include "llvm/Frontend/OpenMP/OMPDeviceConstants.h"

#include "llvm/Frontend/OpenMP/OMPGridValues.h"


using namespace clang;

using namespace CodeGen;

using namespace llvm::omp;


namespace {

/// Pre(post)-action for different OpenMP constructs specialized for NVPTX.

class NVPTXActionTy final : public PrePostActionTy {

  llvm::FunctionCallee EnterCallee = nullptr;

  ArrayRef<llvm::Value *> EnterArgs;

  llvm::FunctionCallee ExitCallee = nullptr;

  ArrayRef<llvm::Value *> ExitArgs;

  bool Conditional = false;

  llvm::BasicBlock *ContBlock = nullptr;


public:

  NVPTXActionTy(llvm::FunctionCallee EnterCallee,

                ArrayRef<llvm::Value *> EnterArgs,

                llvm::FunctionCallee ExitCallee,

                ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)

      : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),

        ExitArgs(ExitArgs), Conditional(Conditional) {}

  void Enter(CodeGenFunction &CGF) override {

    llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);

    if (Conditional) {

      llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);

      auto *ThenBlock = CGF.createBasicBlock("omp_if.then");

      ContBlock = CGF.createBasicBlock("omp_if.end");

      // Generate the branch (If-stmt)

      CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);

      CGF.EmitBlock(ThenBlock);

    }

  }

  void Done(CodeGenFunction &CGF) {

    // Emit the rest of blocks/branches

    CGF.EmitBranch(ContBlock);

    CGF.EmitBlock(ContBlock, true);

  }

  void Exit(CodeGenFunction &CGF) override {

    CGF.EmitRuntimeCall(ExitCallee, ExitArgs);

  }

};


/// A class to track the execution mode when codegening directives within

/// a target region. The appropriate mode (SPMD|NON-SPMD) is set on entry

/// to the target region and used by containing directives such as 'parallel'

/// to emit optimized code.

class ExecutionRuntimeModesRAII {

private:

  CGOpenMPRuntimeGPU::ExecutionMode SavedExecMode =

      CGOpenMPRuntimeGPU::EM_Unknown;

  CGOpenMPRuntimeGPU::ExecutionMode &ExecMode;


public:

  ExecutionRuntimeModesRAII(CGOpenMPRuntimeGPU::ExecutionMode &ExecMode,

                            CGOpenMPRuntimeGPU::ExecutionMode EntryMode)

      : ExecMode(ExecMode) {

    SavedExecMode = ExecMode;

    ExecMode = EntryMode;

  }

  ~ExecutionRuntimeModesRAII() { ExecMode = SavedExecMode; }

};


static const ValueDecl *getPrivateItem(const Expr *RefExpr) {

  RefExpr = RefExpr->IgnoreParens();

  if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(RefExpr)) {

    const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();

    while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))

      Base = TempASE->getBase()->IgnoreParenImpCasts();

    RefExpr = Base;

  } else if (auto *OASE = dyn_cast<ArraySectionExpr>(RefExpr)) {

    const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();

    while (const auto *TempOASE = dyn_cast<ArraySectionExpr>(Base))

      Base = TempOASE->getBase()->IgnoreParenImpCasts();

    while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))

      Base = TempASE->getBase()->IgnoreParenImpCasts();

    RefExpr = Base;

  }

  RefExpr = RefExpr->IgnoreParenImpCasts();

  if (const auto *DE = dyn_cast<DeclRefExpr>(RefExpr))

    return cast<ValueDecl>(DE->getDecl()->getCanonicalDecl());

  const auto *ME = cast<MemberExpr>(RefExpr);

  return cast<ValueDecl>(ME->getMemberDecl()->getCanonicalDecl());

}


static RecordDecl *buildRecordForGlobalizedVars(

    ASTContext &C, ArrayRef<const ValueDecl *> EscapedDecls,

    ArrayRef<const ValueDecl *> EscapedDeclsForTeams,

    llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>

        &MappedDeclsFields,

    int BufSize) {

  using VarsDataTy = std::pair<CharUnits /*Align*/, const ValueDecl *>;

  if (EscapedDecls.empty() && EscapedDeclsForTeams.empty())

    return nullptr;

  SmallVector<VarsDataTy, 4> GlobalizedVars;

  for (const ValueDecl *D : EscapedDecls)

    GlobalizedVars.emplace_back(C.getDeclAlign(D), D);

  for (const ValueDecl *D : EscapedDeclsForTeams)

    GlobalizedVars.emplace_back(C.getDeclAlign(D), D);


  // Build struct _globalized_locals_ty {

  //         /*  globalized vars  */[WarSize] align (decl_align)

  //         /*  globalized vars  */ for EscapedDeclsForTeams

  //       };

  RecordDecl *GlobalizedRD = C.buildImplicitRecord("_globalized_locals_ty");

  GlobalizedRD->startDefinition();

  llvm::SmallPtrSet<const ValueDecl *, 16> SingleEscaped(llvm::from_range,

                                                         EscapedDeclsForTeams);

  for (const auto &Pair : GlobalizedVars) {

    const ValueDecl *VD = Pair.second;

    QualType Type = VD->getType();

    if (Type->isLValueReferenceType())

      Type = C.getPointerType(Type.getNonReferenceType());

    else

      Type = Type.getNonReferenceType();

    SourceLocation Loc = VD->getLocation();

    FieldDecl *Field;

    if (SingleEscaped.count(VD)) {

      Field = FieldDecl::Create(

          C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type,

          C.getTrivialTypeSourceInfo(Type, SourceLocation()),

          /*BW=*/nullptr, /*Mutable=*/false,

          /*InitStyle=*/ICIS_NoInit);

      Field->setAccess(AS_public);

      if (VD->hasAttrs()) {

        for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),

             E(VD->getAttrs().end());

             I != E; ++I)

          Field->addAttr(*I);

      }

    } else {

      if (BufSize > 1) {

        llvm::APInt ArraySize(32, BufSize);

        Type = C.getConstantArrayType(Type, ArraySize, nullptr,

                                      ArraySizeModifier::Normal, 0);

      }

      Field = FieldDecl::Create(

          C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type,

          C.getTrivialTypeSourceInfo(Type, SourceLocation()),

          /*BW=*/nullptr, /*Mutable=*/false,

          /*InitStyle=*/ICIS_NoInit);

      Field->setAccess(AS_public);

      llvm::APInt Align(32, Pair.first.getQuantity());

      Field->addAttr(AlignedAttr::CreateImplicit(

          C, /*IsAlignmentExpr=*/true,

          IntegerLiteral::Create(C, Align,

                                 C.getIntTypeForBitwidth(32, /*Signed=*/0),

                                 SourceLocation()),

          {}, AlignedAttr::GNU_aligned));

    }

    GlobalizedRD->addDecl(Field);

    MappedDeclsFields.try_emplace(VD, Field);

  }

  GlobalizedRD->completeDefinition();

  return GlobalizedRD;

}


/// Get the list of variables that can escape their declaration context.

class CheckVarsEscapingDeclContext final

    : public ConstStmtVisitor<CheckVarsEscapingDeclContext> {

  CodeGenFunction &CGF;

  llvm::SetVector<const ValueDecl *> EscapedDecls;

  llvm::SetVector<const ValueDecl *> EscapedVariableLengthDecls;

  llvm::SetVector<const ValueDecl *> DelayedVariableLengthDecls;

  llvm::SmallPtrSet<const Decl *, 4> EscapedParameters;

  RecordDecl *GlobalizedRD = nullptr;

  llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;

  bool AllEscaped = false;

  bool IsForCombinedParallelRegion = false;


  void markAsEscaped(const ValueDecl *VD) {

    // Do not globalize declare target variables.

    if (!isa<VarDecl>(VD) ||

        OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD))

      return;

    VD = cast<ValueDecl>(VD->getCanonicalDecl());

    // Use user-specified allocation.

    if (VD->hasAttrs() && VD->hasAttr<OMPAllocateDeclAttr>())

      return;

    // Variables captured by value must be globalized.

    bool IsCaptured = false;

    if (auto *CSI = CGF.CapturedStmtInfo) {

      if (const FieldDecl *FD = CSI->lookup(cast<VarDecl>(VD))) {

        // Check if need to capture the variable that was already captured by

        // value in the outer region.

        IsCaptured = true;

        if (!IsForCombinedParallelRegion) {

          if (!FD->hasAttrs())

            return;

          const auto *Attr = FD->getAttr<OMPCaptureKindAttr>();

          if (!Attr)

            return;

          if (((Attr->getCaptureKind() != OMPC_map) &&

               !isOpenMPPrivate(Attr->getCaptureKind())) ||

              ((Attr->getCaptureKind() == OMPC_map) &&

               !FD->getType()->isAnyPointerType()))

            return;

        }

        if (!FD->getType()->isReferenceType()) {

          assert(!VD->getType()->isVariablyModifiedType() &&

                 "Parameter captured by value with variably modified type");

          EscapedParameters.insert(VD);

        } else if (!IsForCombinedParallelRegion) {

          return;

        }

      }

    }

    if ((!CGF.CapturedStmtInfo ||

         (IsForCombinedParallelRegion && CGF.CapturedStmtInfo)) &&

        VD->getType()->isReferenceType())

      // Do not globalize variables with reference type.

      return;

    if (VD->getType()->isVariablyModifiedType()) {

      // If not captured at the target region level then mark the escaped

      // variable as delayed.

      if (IsCaptured)

        EscapedVariableLengthDecls.insert(VD);

      else

        DelayedVariableLengthDecls.insert(VD);

    } else

      EscapedDecls.insert(VD);

  }


  void VisitValueDecl(const ValueDecl *VD) {

    if (VD->getType()->isLValueReferenceType())

      markAsEscaped(VD);

    if (const auto *VarD = dyn_cast<VarDecl>(VD)) {

      if (!isa<ParmVarDecl>(VarD) && VarD->hasInit()) {

        const bool SavedAllEscaped = AllEscaped;

        AllEscaped = VD->getType()->isLValueReferenceType();

        Visit(VarD->getInit());

        AllEscaped = SavedAllEscaped;

      }

    }

  }

  void VisitOpenMPCapturedStmt(const CapturedStmt *S,

                               ArrayRef<OMPClause *> Clauses,

                               bool IsCombinedParallelRegion) {

    if (!S)

      return;

    for (const CapturedStmt::Capture &C : S->captures()) {

      if (C.capturesVariable() && !C.capturesVariableByCopy()) {

        const ValueDecl *VD = C.getCapturedVar();

        bool SavedIsForCombinedParallelRegion = IsForCombinedParallelRegion;

        if (IsCombinedParallelRegion) {

          // Check if the variable is privatized in the combined construct and

          // those private copies must be shared in the inner parallel

          // directive.

          IsForCombinedParallelRegion = false;

          for (const OMPClause *C : Clauses) {

            if (!isOpenMPPrivate(C->getClauseKind()) ||

                C->getClauseKind() == OMPC_reduction ||

                C->getClauseKind() == OMPC_linear ||

                C->getClauseKind() == OMPC_private)

              continue;

            ArrayRef<const Expr *> Vars;

            if (const auto *PC = dyn_cast<OMPFirstprivateClause>(C))

              Vars = PC->getVarRefs();

            else if (const auto *PC = dyn_cast<OMPLastprivateClause>(C))

              Vars = PC->getVarRefs();

            else

              llvm_unreachable("Unexpected clause.");

            for (const auto *E : Vars) {

              const Decl *D =

                  cast<DeclRefExpr>(E)->getDecl()->getCanonicalDecl();

              if (D == VD->getCanonicalDecl()) {

                IsForCombinedParallelRegion = true;

                break;

              }

            }

            if (IsForCombinedParallelRegion)

              break;

          }

        }

        markAsEscaped(VD);

        if (isa<OMPCapturedExprDecl>(VD))

          VisitValueDecl(VD);

        IsForCombinedParallelRegion = SavedIsForCombinedParallelRegion;

      }

    }

  }


  void buildRecordForGlobalizedVars(bool IsInTTDRegion) {

    assert(!GlobalizedRD &&

           "Record for globalized variables is built already.");

    ArrayRef<const ValueDecl *> EscapedDeclsForParallel, EscapedDeclsForTeams;

    unsigned WarpSize = CGF.getTarget().getGridValue().GV_Warp_Size;

    if (IsInTTDRegion)

      EscapedDeclsForTeams = EscapedDecls.getArrayRef();

    else

      EscapedDeclsForParallel = EscapedDecls.getArrayRef();

    GlobalizedRD = ::buildRecordForGlobalizedVars(

        CGF.getContext(), EscapedDeclsForParallel, EscapedDeclsForTeams,

        MappedDeclsFields, WarpSize);

  }


public:

  CheckVarsEscapingDeclContext(CodeGenFunction &CGF,

                               ArrayRef<const ValueDecl *> TeamsReductions)

      : CGF(CGF), EscapedDecls(llvm::from_range, TeamsReductions) {}

  ~CheckVarsEscapingDeclContext() = default;

  void VisitDeclStmt(const DeclStmt *S) {

    if (!S)

      return;

    for (const Decl *D : S->decls())

      if (const auto *VD = dyn_cast_or_null<ValueDecl>(D))

        VisitValueDecl(VD);

  }

  void VisitOMPExecutableDirective(const OMPExecutableDirective *D) {

    if (!D)

      return;

    if (!D->hasAssociatedStmt())

      return;

    if (const auto *S =

            dyn_cast_or_null<CapturedStmt>(D->getAssociatedStmt())) {

      // Do not analyze directives that do not actually require capturing,

      // like `omp for` or `omp simd` directives.

      llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;

      getOpenMPCaptureRegions(CaptureRegions, D->getDirectiveKind());

      if (CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown) {

        VisitStmt(S->getCapturedStmt());

        return;

      }

      VisitOpenMPCapturedStmt(

          S, D->clauses(),

          CaptureRegions.back() == OMPD_parallel &&

              isOpenMPDistributeDirective(D->getDirectiveKind()));

    }

  }

  void VisitCapturedStmt(const CapturedStmt *S) {

    if (!S)

      return;

    for (const CapturedStmt::Capture &C : S->captures()) {

      if (C.capturesVariable() && !C.capturesVariableByCopy()) {

        const ValueDecl *VD = C.getCapturedVar();

        markAsEscaped(VD);

        if (isa<OMPCapturedExprDecl>(VD))

          VisitValueDecl(VD);

      }

    }

  }

  void VisitLambdaExpr(const LambdaExpr *E) {

    if (!E)

      return;

    for (const LambdaCapture &C : E->captures()) {

      if (C.capturesVariable()) {

        if (C.getCaptureKind() == LCK_ByRef) {

          const ValueDecl *VD = C.getCapturedVar();

          markAsEscaped(VD);

          if (E->isInitCapture(&C) || isa<OMPCapturedExprDecl>(VD))

            VisitValueDecl(VD);

        }

      }

    }

  }

  void VisitBlockExpr(const BlockExpr *E) {

    if (!E)

      return;

    for (const BlockDecl::Capture &C : E->getBlockDecl()->captures()) {

      if (C.isByRef()) {

        const VarDecl *VD = C.getVariable();

        markAsEscaped(VD);

        if (isa<OMPCapturedExprDecl>(VD) || VD->isInitCapture())

          VisitValueDecl(VD);

      }

    }

  }

  void VisitCallExpr(const CallExpr *E) {

    if (!E)

      return;

    for (const Expr *Arg : E->arguments()) {

      if (!Arg)

        continue;

      if (Arg->isLValue()) {

        const bool SavedAllEscaped = AllEscaped;

        AllEscaped = true;

        Visit(Arg);

        AllEscaped = SavedAllEscaped;

      } else {

        Visit(Arg);

      }

    }

    Visit(E->getCallee());

  }

  void VisitDeclRefExpr(const DeclRefExpr *E) {

    if (!E)

      return;

    const ValueDecl *VD = E->getDecl();

    if (AllEscaped)

      markAsEscaped(VD);

    if (isa<OMPCapturedExprDecl>(VD))

      VisitValueDecl(VD);

    else if (VD->isInitCapture())

      VisitValueDecl(VD);

  }

  void VisitUnaryOperator(const UnaryOperator *E) {

    if (!E)

      return;

    if (E->getOpcode() == UO_AddrOf) {

      const bool SavedAllEscaped = AllEscaped;

      AllEscaped = true;

      Visit(E->getSubExpr());

      AllEscaped = SavedAllEscaped;

    } else {

      Visit(E->getSubExpr());

    }

  }

  void VisitImplicitCastExpr(const ImplicitCastExpr *E) {

    if (!E)

      return;

    if (E->getCastKind() == CK_ArrayToPointerDecay) {

      const bool SavedAllEscaped = AllEscaped;

      AllEscaped = true;

      Visit(E->getSubExpr());

      AllEscaped = SavedAllEscaped;

    } else {

      Visit(E->getSubExpr());

    }

  }

  void VisitExpr(const Expr *E) {

    if (!E)

      return;

    bool SavedAllEscaped = AllEscaped;

    if (!E->isLValue())

      AllEscaped = false;

    for (const Stmt *Child : E->children())

      if (Child)

        Visit(Child);

    AllEscaped = SavedAllEscaped;

  }

  void VisitStmt(const Stmt *S) {

    if (!S)

      return;

    for (const Stmt *Child : S->children())

      if (Child)

        Visit(Child);

  }


  /// Returns the record that handles all the escaped local variables and used

  /// instead of their original storage.

  const RecordDecl *getGlobalizedRecord(bool IsInTTDRegion) {

    if (!GlobalizedRD)

      buildRecordForGlobalizedVars(IsInTTDRegion);

    return GlobalizedRD;

  }


  /// Returns the field in the globalized record for the escaped variable.

  const FieldDecl *getFieldForGlobalizedVar(const ValueDecl *VD) const {

    assert(GlobalizedRD &&

           "Record for globalized variables must be generated already.");

    return MappedDeclsFields.lookup(VD);

  }


  /// Returns the list of the escaped local variables/parameters.

  ArrayRef<const ValueDecl *> getEscapedDecls() const {

    return EscapedDecls.getArrayRef();

  }


  /// Checks if the escaped local variable is actually a parameter passed by

  /// value.

  const llvm::SmallPtrSetImpl<const Decl *> &getEscapedParameters() const {

    return EscapedParameters;

  }


  /// Returns the list of the escaped variables with the variably modified

  /// types.

  ArrayRef<const ValueDecl *> getEscapedVariableLengthDecls() const {

    return EscapedVariableLengthDecls.getArrayRef();

  }


  /// Returns the list of the delayed variables with the variably modified

  /// types.

  ArrayRef<const ValueDecl *> getDelayedVariableLengthDecls() const {

    return DelayedVariableLengthDecls.getArrayRef();

  }

};

} // anonymous namespace


CGOpenMPRuntimeGPU::ExecutionMode

CGOpenMPRuntimeGPU::getExecutionMode() const {

  return CurrentExecutionMode;

}


CGOpenMPRuntimeGPU::DataSharingMode

CGOpenMPRuntimeGPU::getDataSharingMode() const {

  return CurrentDataSharingMode;

}


/// Check for inner (nested) SPMD construct, if any


static bool hasNestedSPMDDirective(ASTContext &Ctx,

                                   const OMPExecutableDirective &D) {

  const auto *CS = D.getInnermostCapturedStmt();

  const auto *Body =

      CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);

  const Stmt *ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body);


  if (const auto *NestedDir =

          dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {

    OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();

    switch (D.getDirectiveKind()) {

    case OMPD_target:

      if (isOpenMPParallelDirective(DKind))

        return true;

      if (DKind == OMPD_teams) {

        Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(

            /*IgnoreCaptured=*/true);

        if (!Body)

          return false;

        ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body);

        if (const auto *NND =

                dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {

          DKind = NND->getDirectiveKind();

          if (isOpenMPParallelDirective(DKind))

            return true;

        }

      }

      return false;

    case OMPD_target_teams:

      return isOpenMPParallelDirective(DKind);

    case OMPD_target_simd:

    case OMPD_target_parallel:

    case OMPD_target_parallel_for:

    case OMPD_target_parallel_for_simd:

    case OMPD_target_teams_distribute:

    case OMPD_target_teams_distribute_simd:

    case OMPD_target_teams_distribute_parallel_for:

    case OMPD_target_teams_distribute_parallel_for_simd:

    case OMPD_parallel:

    case OMPD_for:

    case OMPD_parallel_for:

    case OMPD_parallel_master:

    case OMPD_parallel_sections:

    case OMPD_for_simd:

    case OMPD_parallel_for_simd:

    case OMPD_cancel:

    case OMPD_cancellation_point:

    case OMPD_ordered:

    case OMPD_threadprivate:

    case OMPD_allocate:

    case OMPD_task:

    case OMPD_simd:

    case OMPD_sections:

    case OMPD_section:

    case OMPD_single:

    case OMPD_master:

    case OMPD_critical:

    case OMPD_taskyield:

    case OMPD_barrier:

    case OMPD_taskwait:

    case OMPD_taskgroup:

    case OMPD_atomic:

    case OMPD_flush:

    case OMPD_depobj:

    case OMPD_scan:

    case OMPD_teams:

    case OMPD_target_data:

    case OMPD_target_exit_data:

    case OMPD_target_enter_data:

    case OMPD_distribute:

    case OMPD_distribute_simd:

    case OMPD_distribute_parallel_for:

    case OMPD_distribute_parallel_for_simd:

    case OMPD_teams_distribute:

    case OMPD_teams_distribute_simd:

    case OMPD_teams_distribute_parallel_for:

    case OMPD_teams_distribute_parallel_for_simd:

    case OMPD_target_update:

    case OMPD_declare_simd:

    case OMPD_declare_variant:

    case OMPD_begin_declare_variant:

    case OMPD_end_declare_variant:

    case OMPD_declare_target:

    case OMPD_end_declare_target:

    case OMPD_declare_reduction:

    case OMPD_declare_mapper:

    case OMPD_taskloop:

    case OMPD_taskloop_simd:

    case OMPD_master_taskloop:

    case OMPD_master_taskloop_simd:

    case OMPD_parallel_master_taskloop:

    case OMPD_parallel_master_taskloop_simd:

    case OMPD_requires:

    case OMPD_unknown:

    default:

      llvm_unreachable("Unexpected directive.");

    }

  }


  return false;

}


static bool supportsSPMDExecutionMode(ASTContext &Ctx,

                                      const OMPExecutableDirective &D) {

  OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();

  switch (DirectiveKind) {

  case OMPD_target:

  case OMPD_target_teams:

    return hasNestedSPMDDirective(Ctx, D);

  case OMPD_target_parallel_loop:

  case OMPD_target_parallel:

  case OMPD_target_parallel_for:

  case OMPD_target_parallel_for_simd:

  case OMPD_target_teams_distribute_parallel_for:

  case OMPD_target_teams_distribute_parallel_for_simd:

  case OMPD_target_simd:

  case OMPD_target_teams_distribute_simd:

    return true;

  case OMPD_target_teams_distribute:

    return false;

  case OMPD_target_teams_loop:

    // Whether this is true or not depends on how the directive will

    // eventually be emitted.

    if (auto *TTLD = dyn_cast<OMPTargetTeamsGenericLoopDirective>(&D))

      return TTLD->canBeParallelFor();

    return false;

  case OMPD_parallel:

  case OMPD_for:

  case OMPD_parallel_for:

  case OMPD_parallel_master:

  case OMPD_parallel_sections:

  case OMPD_for_simd:

  case OMPD_parallel_for_simd:

  case OMPD_cancel:

  case OMPD_cancellation_point:

  case OMPD_ordered:

  case OMPD_threadprivate:

  case OMPD_allocate:

  case OMPD_task:

  case OMPD_simd:

  case OMPD_sections:

  case OMPD_section:

  case OMPD_single:

  case OMPD_master:

  case OMPD_critical:

  case OMPD_taskyield:

  case OMPD_barrier:

  case OMPD_taskwait:

  case OMPD_taskgroup:

  case OMPD_atomic:

  case OMPD_flush:

  case OMPD_depobj:

  case OMPD_scan:

  case OMPD_teams:

  case OMPD_target_data:

  case OMPD_target_exit_data:

  case OMPD_target_enter_data:

  case OMPD_distribute:

  case OMPD_distribute_simd:

  case OMPD_distribute_parallel_for:

  case OMPD_distribute_parallel_for_simd:

  case OMPD_teams_distribute:

  case OMPD_teams_distribute_simd:

  case OMPD_teams_distribute_parallel_for:

  case OMPD_teams_distribute_parallel_for_simd:

  case OMPD_target_update:

  case OMPD_declare_simd:

  case OMPD_declare_variant:

  case OMPD_begin_declare_variant:

  case OMPD_end_declare_variant:

  case OMPD_declare_target:

  case OMPD_end_declare_target:

  case OMPD_declare_reduction:

  case OMPD_declare_mapper:

  case OMPD_taskloop:

  case OMPD_taskloop_simd:

  case OMPD_master_taskloop:

  case OMPD_master_taskloop_simd:

  case OMPD_parallel_master_taskloop:

  case OMPD_parallel_master_taskloop_simd:

  case OMPD_requires:

  case OMPD_unknown:

  default:

    break;

  }

  llvm_unreachable(

      "Unknown programming model for OpenMP directive on NVPTX target.");

}


void CGOpenMPRuntimeGPU::emitNonSPMDKernel(const OMPExecutableDirective &D,

                                             StringRef ParentName,

                                             llvm::Function *&OutlinedFn,

                                             llvm::Constant *&OutlinedFnID,

                                             bool IsOffloadEntry,

                                             const RegionCodeGenTy &CodeGen) {

  ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode, EM_NonSPMD);

  EntryFunctionState EST;

  WrapperFunctionsMap.clear();


  [[maybe_unused]] bool IsBareKernel = D.getSingleClause<OMPXBareClause>();

  assert(!IsBareKernel && "bare kernel should not be at generic mode");


  // Emit target region as a standalone region.

  class NVPTXPrePostActionTy : public PrePostActionTy {

    CGOpenMPRuntimeGPU::EntryFunctionState &EST;

    const OMPExecutableDirective &D;


  public:

    NVPTXPrePostActionTy(CGOpenMPRuntimeGPU::EntryFunctionState &EST,

                         const OMPExecutableDirective &D)

        : EST(EST), D(D) {}

    void Enter(CodeGenFunction &CGF) override {

      auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());

      RT.emitKernelInit(D, CGF, EST, /* IsSPMD */ false);

      // Skip target region initialization.

      RT.setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true);

    }

    void Exit(CodeGenFunction &CGF) override {

      auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());

      RT.clearLocThreadIdInsertPt(CGF);

      RT.emitKernelDeinit(CGF, EST, /* IsSPMD */ false);

    }

  } Action(EST, D);

  CodeGen.setAction(Action);

  IsInTTDRegion = true;

  emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,

                                   IsOffloadEntry, CodeGen);

  IsInTTDRegion = false;

}


void CGOpenMPRuntimeGPU::emitKernelInit(const OMPExecutableDirective &D,

                                        CodeGenFunction &CGF,

                                        EntryFunctionState &EST, bool IsSPMD) {

  llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs Attrs;

  Attrs.ExecFlags =

      IsSPMD ? llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_SPMD

             : llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_GENERIC;

  computeMinAndMaxThreadsAndTeams(D, CGF, Attrs);


  CGBuilderTy &Bld = CGF.Builder;

  Bld.restoreIP(OMPBuilder.createTargetInit(Bld, Attrs));

  if (!IsSPMD)

    emitGenericVarsProlog(CGF, EST.Loc);

}


void CGOpenMPRuntimeGPU::emitKernelDeinit(CodeGenFunction &CGF,

                                          EntryFunctionState &EST,

                                          bool IsSPMD) {

  if (!IsSPMD)

    emitGenericVarsEpilog(CGF);


  // This is temporary until we remove the fixed sized buffer.

  ASTContext &C = CGM.getContext();

  RecordDecl *StaticRD = C.buildImplicitRecord(

      "_openmp_teams_reduction_type_$_", RecordDecl::TagKind::Union);

  StaticRD->startDefinition();

  for (const RecordDecl *TeamReductionRec : TeamsReductions) {

    CanQualType RecTy = C.getCanonicalTagType(TeamReductionRec);

    auto *Field = FieldDecl::Create(

        C, StaticRD, SourceLocation(), SourceLocation(), nullptr, RecTy,

        C.getTrivialTypeSourceInfo(RecTy, SourceLocation()),

        /*BW=*/nullptr, /*Mutable=*/false,

        /*InitStyle=*/ICIS_NoInit);

    Field->setAccess(AS_public);

    StaticRD->addDecl(Field);

  }

  StaticRD->completeDefinition();

  CanQualType StaticTy = C.getCanonicalTagType(StaticRD);

  llvm::Type *LLVMReductionsBufferTy =

      CGM.getTypes().ConvertTypeForMem(StaticTy);

  const auto &DL = CGM.getModule().getDataLayout();

  uint64_t ReductionDataSize =

      TeamsReductions.empty()

          ? 0

          : DL.getTypeAllocSize(LLVMReductionsBufferTy).getFixedValue();

  CGBuilderTy &Bld = CGF.Builder;

  OMPBuilder.createTargetDeinit(Bld, ReductionDataSize,

                                C.getLangOpts().OpenMPCUDAReductionBufNum);

  TeamsReductions.clear();

}


void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D,

                                          StringRef ParentName,

                                          llvm::Function *&OutlinedFn,

                                          llvm::Constant *&OutlinedFnID,

                                          bool IsOffloadEntry,

                                          const RegionCodeGenTy &CodeGen) {

  ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode, EM_SPMD);

  EntryFunctionState EST;


  bool IsBareKernel = D.getSingleClause<OMPXBareClause>();


  // Emit target region as a standalone region.

  class NVPTXPrePostActionTy : public PrePostActionTy {

    CGOpenMPRuntimeGPU &RT;

    CGOpenMPRuntimeGPU::EntryFunctionState &EST;

    bool IsBareKernel;

    DataSharingMode Mode;

    const OMPExecutableDirective &D;


  public:

    NVPTXPrePostActionTy(CGOpenMPRuntimeGPU &RT,

                         CGOpenMPRuntimeGPU::EntryFunctionState &EST,

                         bool IsBareKernel, const OMPExecutableDirective &D)

        : RT(RT), EST(EST), IsBareKernel(IsBareKernel),

          Mode(RT.CurrentDataSharingMode), D(D) {}

    void Enter(CodeGenFunction &CGF) override {

      if (IsBareKernel) {

        RT.CurrentDataSharingMode = DataSharingMode::DS_CUDA;

        return;

      }

      RT.emitKernelInit(D, CGF, EST, /* IsSPMD */ true);

      // Skip target region initialization.

      RT.setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true);

    }

    void Exit(CodeGenFunction &CGF) override {

      if (IsBareKernel) {

        RT.CurrentDataSharingMode = Mode;

        return;

      }

      RT.clearLocThreadIdInsertPt(CGF);

      RT.emitKernelDeinit(CGF, EST, /* IsSPMD */ true);

    }

  } Action(*this, EST, IsBareKernel, D);

  CodeGen.setAction(Action);

  IsInTTDRegion = true;

  emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,

                                   IsOffloadEntry, CodeGen);

  IsInTTDRegion = false;

}


void CGOpenMPRuntimeGPU::emitTargetOutlinedFunction(

    const OMPExecutableDirective &D, StringRef ParentName,

    llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,

    bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {

  if (!IsOffloadEntry) // Nothing to do.

    return;


  assert(!ParentName.empty() && "Invalid target region parent name!");


  bool Mode = supportsSPMDExecutionMode(CGM.getContext(), D);

  bool IsBareKernel = D.getSingleClause<OMPXBareClause>();

  if (Mode || IsBareKernel)

    emitSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,

                   CodeGen);

  else

    emitNonSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,

                      CodeGen);

}


CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM)

    : CGOpenMPRuntime(CGM) {

  llvm::OpenMPIRBuilderConfig Config(

      CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),

      CGM.getLangOpts().OpenMPOffloadMandatory,

      /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,

      hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);

  Config.setDefaultTargetAS(

      CGM.getContext().getTargetInfo().getTargetAddressSpace(LangAS::Default));

  OMPBuilder.setConfig(Config);


  if (!CGM.getLangOpts().OpenMPIsTargetDevice)

    llvm_unreachable("OpenMP can only handle device code.");


  if (CGM.getLangOpts().OpenMPCUDAMode)

    CurrentDataSharingMode = CGOpenMPRuntimeGPU::DS_CUDA;


  llvm::OpenMPIRBuilder &OMPBuilder = getOMPBuilder();

  if (CGM.getLangOpts().NoGPULib || CGM.getLangOpts().OMPHostIRFile.empty())

    return;


  OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPTargetDebug,

                              "__omp_rtl_debug_kind");

  OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPTeamSubscription,

                              "__omp_rtl_assume_teams_oversubscription");

  OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPThreadSubscription,

                              "__omp_rtl_assume_threads_oversubscription");

  OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPNoThreadState,

                              "__omp_rtl_assume_no_thread_state");

  OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPNoNestedParallelism,

                              "__omp_rtl_assume_no_nested_parallelism");

}


void CGOpenMPRuntimeGPU::emitProcBindClause(CodeGenFunction &CGF,

                                              ProcBindKind ProcBind,

                                              SourceLocation Loc) {

  // Nothing to do.

}


llvm::Value *CGOpenMPRuntimeGPU::emitMessageClause(CodeGenFunction &CGF,

                                                   const Expr *Message,

                                                   SourceLocation Loc) {

  CGM.getDiags().Report(Loc, diag::warn_omp_gpu_unsupported_clause)

      << getOpenMPClauseName(OMPC_message);

  return nullptr;

}


llvm::Value *


CGOpenMPRuntimeGPU::emitSeverityClause(OpenMPSeverityClauseKind Severity,

                                       SourceLocation Loc) {

  CGM.getDiags().Report(Loc, diag::warn_omp_gpu_unsupported_clause)

      << getOpenMPClauseName(OMPC_severity);

  return nullptr;

}


void CGOpenMPRuntimeGPU::emitNumThreadsClause(

    CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,

    OpenMPNumThreadsClauseModifier Modifier, OpenMPSeverityClauseKind Severity,

    SourceLocation SeverityLoc, const Expr *Message,

    SourceLocation MessageLoc) {

  if (Modifier == OMPC_NUMTHREADS_strict) {

    CGM.getDiags().Report(Loc,

                          diag::warn_omp_gpu_unsupported_modifier_for_clause)

        << "strict" << getOpenMPClauseName(OMPC_num_threads);

    return;

  }


  // Nothing to do.

}


void CGOpenMPRuntimeGPU::emitNumTeamsClause(CodeGenFunction &CGF,

                                              const Expr *NumTeams,

                                              const Expr *ThreadLimit,

                                              SourceLocation Loc) {}


llvm::Function *CGOpenMPRuntimeGPU::emitParallelOutlinedFunction(

    CodeGenFunction &CGF, const OMPExecutableDirective &D,

    const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,

    const RegionCodeGenTy &CodeGen) {

  // Emit target region as a standalone region.

  bool PrevIsInTTDRegion = IsInTTDRegion;

  IsInTTDRegion = false;

  auto *OutlinedFun =

      cast<llvm::Function>(CGOpenMPRuntime::emitParallelOutlinedFunction(

          CGF, D, ThreadIDVar, InnermostKind, CodeGen));

  IsInTTDRegion = PrevIsInTTDRegion;

  if (getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD) {

    llvm::Function *WrapperFun =

        createParallelDataSharingWrapper(OutlinedFun, D);

    WrapperFunctionsMap[OutlinedFun] = WrapperFun;

  }


  return OutlinedFun;

}


/// Get list of lastprivate variables from the teams distribute ... or

/// teams {distribute ...} directives.

static void


getDistributeLastprivateVars(ASTContext &Ctx, const OMPExecutableDirective &D,

                             llvm::SmallVectorImpl<const ValueDecl *> &Vars) {

  assert(isOpenMPTeamsDirective(D.getDirectiveKind()) &&

         "expected teams directive.");

  const OMPExecutableDirective *Dir = &D;

  if (!isOpenMPDistributeDirective(D.getDirectiveKind())) {

    if (const Stmt *S = CGOpenMPRuntime::getSingleCompoundChild(

            Ctx,

            D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(

                /*IgnoreCaptured=*/true))) {

      Dir = dyn_cast_or_null<OMPExecutableDirective>(S);

      if (Dir && !isOpenMPDistributeDirective(Dir->getDirectiveKind()))

        Dir = nullptr;

    }

  }

  if (!Dir)

    return;

  for (const auto *C : Dir->getClausesOfKind<OMPLastprivateClause>()) {

    for (const Expr *E : C->getVarRefs())

      Vars.push_back(getPrivateItem(E));

  }

}


/// Get list of reduction variables from the teams ... directives.

static void


getTeamsReductionVars(ASTContext &Ctx, const OMPExecutableDirective &D,

                      llvm::SmallVectorImpl<const ValueDecl *> &Vars) {

  assert(isOpenMPTeamsDirective(D.getDirectiveKind()) &&

         "expected teams directive.");

  for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {

    for (const Expr *E : C->privates())

      Vars.push_back(getPrivateItem(E));

  }

}


llvm::Function *CGOpenMPRuntimeGPU::emitTeamsOutlinedFunction(

    CodeGenFunction &CGF, const OMPExecutableDirective &D,

    const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,

    const RegionCodeGenTy &CodeGen) {

  SourceLocation Loc = D.getBeginLoc();


  const RecordDecl *GlobalizedRD = nullptr;

  llvm::SmallVector<const ValueDecl *, 4> LastPrivatesReductions;

  llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;

  unsigned WarpSize = CGM.getTarget().getGridValue().GV_Warp_Size;

  // Globalize team reductions variable unconditionally in all modes.

  if (getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD)

    getTeamsReductionVars(CGM.getContext(), D, LastPrivatesReductions);

  if (getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD) {

    getDistributeLastprivateVars(CGM.getContext(), D, LastPrivatesReductions);

    if (!LastPrivatesReductions.empty()) {

      GlobalizedRD = ::buildRecordForGlobalizedVars(

          CGM.getContext(), {}, LastPrivatesReductions, MappedDeclsFields,

          WarpSize);

    }

  } else if (!LastPrivatesReductions.empty()) {

    assert(!TeamAndReductions.first &&

           "Previous team declaration is not expected.");

    TeamAndReductions.first = D.getCapturedStmt(OMPD_teams)->getCapturedDecl();

    std::swap(TeamAndReductions.second, LastPrivatesReductions);

  }


  // Emit target region as a standalone region.

  class NVPTXPrePostActionTy : public PrePostActionTy {

    SourceLocation &Loc;

    const RecordDecl *GlobalizedRD;

    llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>

        &MappedDeclsFields;


  public:

    NVPTXPrePostActionTy(

        SourceLocation &Loc, const RecordDecl *GlobalizedRD,

        llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>

            &MappedDeclsFields)

        : Loc(Loc), GlobalizedRD(GlobalizedRD),

          MappedDeclsFields(MappedDeclsFields) {}

    void Enter(CodeGenFunction &CGF) override {

      auto &Rt =

          static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());

      if (GlobalizedRD) {

        auto I = Rt.FunctionGlobalizedDecls.try_emplace(CGF.CurFn).first;

        I->getSecond().MappedParams =

            std::make_unique<CodeGenFunction::OMPMapVars>();

        DeclToAddrMapTy &Data = I->getSecond().LocalVarData;

        for (const auto &Pair : MappedDeclsFields) {

          assert(Pair.getFirst()->isCanonicalDecl() &&

                 "Expected canonical declaration");

          Data.try_emplace(Pair.getFirst());

        }

      }

      Rt.emitGenericVarsProlog(CGF, Loc);

    }

    void Exit(CodeGenFunction &CGF) override {

      static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime())

          .emitGenericVarsEpilog(CGF);

    }

  } Action(Loc, GlobalizedRD, MappedDeclsFields);

  CodeGen.setAction(Action);

  llvm::Function *OutlinedFun = CGOpenMPRuntime::emitTeamsOutlinedFunction(

      CGF, D, ThreadIDVar, InnermostKind, CodeGen);


  return OutlinedFun;

}


void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF,

                                               SourceLocation Loc) {

  if (getDataSharingMode() != CGOpenMPRuntimeGPU::DS_Generic)

    return;


  CGBuilderTy &Bld = CGF.Builder;


  const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);

  if (I == FunctionGlobalizedDecls.end())

    return;


  for (auto &Rec : I->getSecond().LocalVarData) {

    const auto *VD = cast<VarDecl>(Rec.first);

    bool EscapedParam = I->getSecond().EscapedParameters.count(Rec.first);

    QualType VarTy = VD->getType();


    // Get the local allocation of a firstprivate variable before sharing

    llvm::Value *ParValue;

    if (EscapedParam) {

      LValue ParLVal =

          CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());

      ParValue = CGF.EmitLoadOfScalar(ParLVal, Loc);

    }


    // Allocate space for the variable to be globalized

    llvm::Value *AllocArgs[] = {CGF.getTypeSize(VD->getType())};

    llvm::CallBase *VoidPtr =

        CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(

                                CGM.getModule(), OMPRTL___kmpc_alloc_shared),

                            AllocArgs, VD->getName());

    // FIXME: We should use the variables actual alignment as an argument.

    VoidPtr->addRetAttr(llvm::Attribute::get(

        CGM.getLLVMContext(), llvm::Attribute::Alignment,

        CGM.getContext().getTargetInfo().getNewAlign() / 8));


    // Cast the void pointer and get the address of the globalized variable.

    llvm::Value *CastedVoidPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(

        VoidPtr, Bld.getPtrTy(0), VD->getName() + "_on_stack");

    LValue VarAddr =

        CGF.MakeNaturalAlignPointeeRawAddrLValue(CastedVoidPtr, VarTy);

    Rec.second.PrivateAddr = VarAddr.getAddress();

    Rec.second.GlobalizedVal = VoidPtr;


    // Assign the local allocation to the newly globalized location.

    if (EscapedParam) {

      CGF.EmitStoreOfScalar(ParValue, VarAddr);

      I->getSecond().MappedParams->setVarAddr(CGF, VD, VarAddr.getAddress());

    }

    if (auto *DI = CGF.getDebugInfo())

      VoidPtr->setDebugLoc(DI->SourceLocToDebugLoc(VD->getLocation()));

  }


  for (const auto *ValueD : I->getSecond().EscapedVariableLengthDecls) {

    const auto *VD = cast<VarDecl>(ValueD);

    std::pair<llvm::Value *, llvm::Value *> AddrSizePair =

        getKmpcAllocShared(CGF, VD);

    I->getSecond().EscapedVariableLengthDeclsAddrs.emplace_back(AddrSizePair);

    LValue Base = CGF.MakeAddrLValue(AddrSizePair.first, VD->getType(),

                                     CGM.getContext().getDeclAlign(VD),

                                     AlignmentSource::Decl);

    I->getSecond().MappedParams->setVarAddr(CGF, VD, Base.getAddress());

  }

  I->getSecond().MappedParams->apply(CGF);

}


bool CGOpenMPRuntimeGPU::isDelayedVariableLengthDecl(CodeGenFunction &CGF,

                                                     const VarDecl *VD) const {

  const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);

  if (I == FunctionGlobalizedDecls.end())

    return false;


  // Check variable declaration is delayed:

  return llvm::is_contained(I->getSecond().DelayedVariableLengthDecls, VD);

}


std::pair<llvm::Value *, llvm::Value *>


CGOpenMPRuntimeGPU::getKmpcAllocShared(CodeGenFunction &CGF,

                                       const VarDecl *VD) {

  CGBuilderTy &Bld = CGF.Builder;


  // Compute size and alignment.

  llvm::Value *Size = CGF.getTypeSize(VD->getType());

  CharUnits Align = CGM.getContext().getDeclAlign(VD);

  Size = Bld.CreateNUWAdd(

      Size, llvm::ConstantInt::get(CGF.SizeTy, Align.getQuantity() - 1));

  llvm::Value *AlignVal =

      llvm::ConstantInt::get(CGF.SizeTy, Align.getQuantity());

  Size = Bld.CreateUDiv(Size, AlignVal);

  Size = Bld.CreateNUWMul(Size, AlignVal);


  // Allocate space for this VLA object to be globalized.

  llvm::Value *AllocArgs[] = {Size};

  llvm::CallBase *VoidPtr =

      CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(

                              CGM.getModule(), OMPRTL___kmpc_alloc_shared),

                          AllocArgs, VD->getName());

  VoidPtr->addRetAttr(llvm::Attribute::get(

      CGM.getLLVMContext(), llvm::Attribute::Alignment, Align.getQuantity()));


  return std::make_pair(VoidPtr, Size);

}


void CGOpenMPRuntimeGPU::getKmpcFreeShared(

    CodeGenFunction &CGF,

    const std::pair<llvm::Value *, llvm::Value *> &AddrSizePair) {

  // Deallocate the memory for each globalized VLA object

  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(

                          CGM.getModule(), OMPRTL___kmpc_free_shared),

                      {AddrSizePair.first, AddrSizePair.second});

}


void CGOpenMPRuntimeGPU::emitGenericVarsEpilog(CodeGenFunction &CGF) {

  if (getDataSharingMode() != CGOpenMPRuntimeGPU::DS_Generic)

    return;


  const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);

  if (I != FunctionGlobalizedDecls.end()) {

    // Deallocate the memory for each globalized VLA object that was

    // globalized in the prolog (i.e. emitGenericVarsProlog).

    for (const auto &AddrSizePair :

         llvm::reverse(I->getSecond().EscapedVariableLengthDeclsAddrs)) {

      CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(

                              CGM.getModule(), OMPRTL___kmpc_free_shared),

                          {AddrSizePair.first, AddrSizePair.second});

    }

    // Deallocate the memory for each globalized value

    for (auto &Rec : llvm::reverse(I->getSecond().LocalVarData)) {

      const auto *VD = cast<VarDecl>(Rec.first);

      I->getSecond().MappedParams->restore(CGF);


      llvm::Value *FreeArgs[] = {Rec.second.GlobalizedVal,

                                 CGF.getTypeSize(VD->getType())};

      CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(

                              CGM.getModule(), OMPRTL___kmpc_free_shared),

                          FreeArgs);

    }

  }

}


void CGOpenMPRuntimeGPU::emitTeamsCall(CodeGenFunction &CGF,

                                         const OMPExecutableDirective &D,

                                         SourceLocation Loc,

                                         llvm::Function *OutlinedFn,

                                         ArrayRef<llvm::Value *> CapturedVars) {

  if (!CGF.HaveInsertPoint())

    return;


  bool IsBareKernel = D.getSingleClause<OMPXBareClause>();


  RawAddress ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,

                                                         /*Name=*/".zero.addr");

  CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddr);

  llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;

  // We don't emit any thread id function call in bare kernel, but because the

  // outlined function has a pointer argument, we emit a nullptr here.

  if (IsBareKernel)

    OutlinedFnArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));

  else

    OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).emitRawPointer(CGF));

  OutlinedFnArgs.push_back(ZeroAddr.getPointer());

  OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());

  emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);

}


void CGOpenMPRuntimeGPU::emitParallelCall(

    CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,

    ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond,

    llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier,

    OpenMPSeverityClauseKind Severity, const Expr *Message) {

  if (!CGF.HaveInsertPoint())

    return;


  auto &&ParallelGen = [this, Loc, OutlinedFn, CapturedVars, IfCond,

                        NumThreads](CodeGenFunction &CGF,

                                    PrePostActionTy &Action) {

    CGBuilderTy &Bld = CGF.Builder;

    llvm::Value *NumThreadsVal = NumThreads;

    llvm::Function *WFn = WrapperFunctionsMap[OutlinedFn];

    llvm::PointerType *FnPtrTy = llvm::PointerType::get(

        CGF.getLLVMContext(), CGM.getDataLayout().getProgramAddressSpace());


    llvm::Value *ID = llvm::ConstantPointerNull::get(FnPtrTy);

    if (WFn)

      ID = Bld.CreateBitOrPointerCast(WFn, FnPtrTy);


    llvm::Value *FnPtr = Bld.CreateBitOrPointerCast(OutlinedFn, FnPtrTy);


    // Create a private scope that will globalize the arguments

    // passed from the outside of the target region.

    // TODO: Is that needed?

    CodeGenFunction::OMPPrivateScope PrivateArgScope(CGF);


    Address CapturedVarsAddrs = CGF.CreateDefaultAlignTempAlloca(

        llvm::ArrayType::get(CGM.VoidPtrTy, CapturedVars.size()),

        "captured_vars_addrs");

    // There's something to share.

    if (!CapturedVars.empty()) {

      // Prepare for parallel region. Indicate the outlined function.

      ASTContext &Ctx = CGF.getContext();

      unsigned Idx = 0;

      for (llvm::Value *V : CapturedVars) {

        Address Dst = Bld.CreateConstArrayGEP(CapturedVarsAddrs, Idx);

        llvm::Value *PtrV;

        if (V->getType()->isIntegerTy())

          PtrV = Bld.CreateIntToPtr(V, CGF.VoidPtrTy);

        else

          PtrV = Bld.CreatePointerBitCastOrAddrSpaceCast(V, CGF.VoidPtrTy);

        CGF.EmitStoreOfScalar(PtrV, Dst, /*Volatile=*/false,

                              Ctx.getPointerType(Ctx.VoidPtrTy));

        ++Idx;

      }

    }


    llvm::Value *IfCondVal = nullptr;

    if (IfCond)

      IfCondVal = Bld.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.Int32Ty,

                                    /* isSigned */ false);

    else

      IfCondVal = llvm::ConstantInt::get(CGF.Int32Ty, 1);


    if (!NumThreadsVal)

      NumThreadsVal = llvm::ConstantInt::get(CGF.Int32Ty, -1);

    else

      NumThreadsVal = Bld.CreateZExtOrTrunc(NumThreadsVal, CGF.Int32Ty);


    assert(IfCondVal && "Expected a value");

    llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);

    llvm::Value *Args[] = {

        RTLoc,

        getThreadID(CGF, Loc),

        IfCondVal,

        NumThreadsVal,

        llvm::ConstantInt::get(CGF.Int32Ty, -1),

        FnPtr,

        ID,

        Bld.CreateBitOrPointerCast(CapturedVarsAddrs.emitRawPointer(CGF),

                                   CGF.VoidPtrPtrTy),

        llvm::ConstantInt::get(CGM.SizeTy, CapturedVars.size())};

    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(

                            CGM.getModule(), OMPRTL___kmpc_parallel_51),

                        Args);

  };


  RegionCodeGenTy RCG(ParallelGen);

  RCG(CGF);

}


void CGOpenMPRuntimeGPU::syncCTAThreads(CodeGenFunction &CGF) {

  // Always emit simple barriers!

  if (!CGF.HaveInsertPoint())

    return;

  // Build call __kmpc_barrier_simple_spmd(nullptr, 0);

  // This function does not use parameters, so we can emit just default values.

  llvm::Value *Args[] = {

      llvm::ConstantPointerNull::get(

          cast<llvm::PointerType>(getIdentTyPointerTy())),

      llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/0, /*isSigned=*/true)};

  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(

                          CGM.getModule(), OMPRTL___kmpc_barrier_simple_spmd),

                      Args);

}


void CGOpenMPRuntimeGPU::emitBarrierCall(CodeGenFunction &CGF,

                                           SourceLocation Loc,

                                           OpenMPDirectiveKind Kind, bool,

                                           bool) {

  // Always emit simple barriers!

  if (!CGF.HaveInsertPoint())

    return;

  // Build call __kmpc_cancel_barrier(loc, thread_id);

  unsigned Flags = getDefaultFlagsForBarriers(Kind);

  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),

                         getThreadID(CGF, Loc)};


  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(

                          CGM.getModule(), OMPRTL___kmpc_barrier),

                      Args);

}


void CGOpenMPRuntimeGPU::emitCriticalRegion(

    CodeGenFunction &CGF, StringRef CriticalName,

    const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,

    const Expr *Hint) {

  llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.critical.loop");

  llvm::BasicBlock *TestBB = CGF.createBasicBlock("omp.critical.test");

  llvm::BasicBlock *SyncBB = CGF.createBasicBlock("omp.critical.sync");

  llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.critical.body");

  llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.critical.exit");


  auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());


  // Get the mask of active threads in the warp.

  llvm::Value *Mask = CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(

      CGM.getModule(), OMPRTL___kmpc_warp_active_thread_mask));

  // Fetch team-local id of the thread.

  llvm::Value *ThreadID = RT.getGPUThreadID(CGF);


  // Get the width of the team.

  llvm::Value *TeamWidth = RT.getGPUNumThreads(CGF);


  // Initialize the counter variable for the loop.

  QualType Int32Ty =

      CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/0);

  Address Counter = CGF.CreateMemTemp(Int32Ty, "critical_counter");

  LValue CounterLVal = CGF.MakeAddrLValue(Counter, Int32Ty);

  CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty), CounterLVal,

                        /*isInit=*/true);


  // Block checks if loop counter exceeds upper bound.

  CGF.EmitBlock(LoopBB);

  llvm::Value *CounterVal = CGF.EmitLoadOfScalar(CounterLVal, Loc);

  llvm::Value *CmpLoopBound = CGF.Builder.CreateICmpSLT(CounterVal, TeamWidth);

  CGF.Builder.CreateCondBr(CmpLoopBound, TestBB, ExitBB);


  // Block tests which single thread should execute region, and which threads

  // should go straight to synchronisation point.

  CGF.EmitBlock(TestBB);

  CounterVal = CGF.EmitLoadOfScalar(CounterLVal, Loc);

  llvm::Value *CmpThreadToCounter =

      CGF.Builder.CreateICmpEQ(ThreadID, CounterVal);

  CGF.Builder.CreateCondBr(CmpThreadToCounter, BodyBB, SyncBB);


  // Block emits the body of the critical region.

  CGF.EmitBlock(BodyBB);


  // Output the critical statement.

  CGOpenMPRuntime::emitCriticalRegion(CGF, CriticalName, CriticalOpGen, Loc,

                                      Hint);


  // After the body surrounded by the critical region, the single executing

  // thread will jump to the synchronisation point.

  // Block waits for all threads in current team to finish then increments the

  // counter variable and returns to the loop.

  CGF.EmitBlock(SyncBB);

  // Reconverge active threads in the warp.

  (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(

                                CGM.getModule(), OMPRTL___kmpc_syncwarp),

                            Mask);


  llvm::Value *IncCounterVal =

      CGF.Builder.CreateNSWAdd(CounterVal, CGF.Builder.getInt32(1));

  CGF.EmitStoreOfScalar(IncCounterVal, CounterLVal);

  CGF.EmitBranch(LoopBB);


  // Block that is reached when  all threads in the team complete the region.

  CGF.EmitBlock(ExitBB, /*IsFinished=*/true);

}


/// Cast value to the specified type.


static llvm::Value *castValueToType(CodeGenFunction &CGF, llvm::Value *Val,

                                    QualType ValTy, QualType CastTy,

                                    SourceLocation Loc) {

  assert(!CGF.getContext().getTypeSizeInChars(CastTy).isZero() &&

         "Cast type must sized.");

  assert(!CGF.getContext().getTypeSizeInChars(ValTy).isZero() &&

         "Val type must sized.");

  llvm::Type *LLVMCastTy = CGF.ConvertTypeForMem(CastTy);

  if (ValTy == CastTy)

    return Val;

  if (CGF.getContext().getTypeSizeInChars(ValTy) ==

      CGF.getContext().getTypeSizeInChars(CastTy))

    return CGF.Builder.CreateBitCast(Val, LLVMCastTy);

  if (CastTy->isIntegerType() && ValTy->isIntegerType())

    return CGF.Builder.CreateIntCast(Val, LLVMCastTy,

                                     CastTy->hasSignedIntegerRepresentation());

  Address CastItem = CGF.CreateMemTemp(CastTy);

  Address ValCastItem = CastItem.withElementType(Val->getType());

  CGF.EmitStoreOfScalar(Val, ValCastItem, /*Volatile=*/false, ValTy,

                        LValueBaseInfo(AlignmentSource::Type),

                        TBAAAccessInfo());

  return CGF.EmitLoadOfScalar(CastItem, /*Volatile=*/false, CastTy, Loc,

                              LValueBaseInfo(AlignmentSource::Type),

                              TBAAAccessInfo());

}


///

/// Design of OpenMP reductions on the GPU

///

/// Consider a typical OpenMP program with one or more reduction

/// clauses:

///

/// float foo;

/// double bar;

/// #pragma omp target teams distribute parallel for \

///             reduction(+:foo) reduction(*:bar)

/// for (int i = 0; i < N; i++) {

///   foo += A[i]; bar *= B[i];

/// }

///

/// where 'foo' and 'bar' are reduced across all OpenMP threads in

/// all teams.  In our OpenMP implementation on the NVPTX device an

/// OpenMP team is mapped to a CUDA threadblock and OpenMP threads

/// within a team are mapped to CUDA threads within a threadblock.

/// Our goal is to efficiently aggregate values across all OpenMP

/// threads such that:

///

///   - the compiler and runtime are logically concise, and

///   - the reduction is performed efficiently in a hierarchical

///     manner as follows: within OpenMP threads in the same warp,

///     across warps in a threadblock, and finally across teams on

///     the NVPTX device.

///

/// Introduction to Decoupling

///

/// We would like to decouple the compiler and the runtime so that the

/// latter is ignorant of the reduction variables (number, data types)

/// and the reduction operators.  This allows a simpler interface

/// and implementation while still attaining good performance.

///

/// Pseudocode for the aforementioned OpenMP program generated by the

/// compiler is as follows:

///

/// 1. Create private copies of reduction variables on each OpenMP

///    thread: 'foo_private', 'bar_private'

/// 2. Each OpenMP thread reduces the chunk of 'A' and 'B' assigned

///    to it and writes the result in 'foo_private' and 'bar_private'

///    respectively.

/// 3. Call the OpenMP runtime on the GPU to reduce within a team

///    and store the result on the team master:

///

///     __kmpc_nvptx_parallel_reduce_nowait_v2(...,

///        reduceData, shuffleReduceFn, interWarpCpyFn)

///

///     where:

///       struct ReduceData {

///         double *foo;

///         double *bar;

///       } reduceData

///       reduceData.foo = &foo_private

///       reduceData.bar = &bar_private

///

///     'shuffleReduceFn' and 'interWarpCpyFn' are pointers to two

///     auxiliary functions generated by the compiler that operate on

///     variables of type 'ReduceData'.  They aid the runtime perform

///     algorithmic steps in a data agnostic manner.

///

///     'shuffleReduceFn' is a pointer to a function that reduces data

///     of type 'ReduceData' across two OpenMP threads (lanes) in the

///     same warp.  It takes the following arguments as input:

///

///     a. variable of type 'ReduceData' on the calling lane,

///     b. its lane_id,

///     c. an offset relative to the current lane_id to generate a

///        remote_lane_id.  The remote lane contains the second

///        variable of type 'ReduceData' that is to be reduced.

///     d. an algorithm version parameter determining which reduction

///        algorithm to use.

///

///     'shuffleReduceFn' retrieves data from the remote lane using

///     efficient GPU shuffle intrinsics and reduces, using the

///     algorithm specified by the 4th parameter, the two operands

///     element-wise.  The result is written to the first operand.

///

///     Different reduction algorithms are implemented in different

///     runtime functions, all calling 'shuffleReduceFn' to perform

///     the essential reduction step.  Therefore, based on the 4th

///     parameter, this function behaves slightly differently to

///     cooperate with the runtime to ensure correctness under

///     different circumstances.

///

///     'InterWarpCpyFn' is a pointer to a function that transfers

///     reduced variables across warps.  It tunnels, through CUDA

///     shared memory, the thread-private data of type 'ReduceData'

///     from lane 0 of each warp to a lane in the first warp.

/// 4. Call the OpenMP runtime on the GPU to reduce across teams.

///    The last team writes the global reduced value to memory.

///

///     ret = __kmpc_nvptx_teams_reduce_nowait(...,

///             reduceData, shuffleReduceFn, interWarpCpyFn,

///             scratchpadCopyFn, loadAndReduceFn)

///

///     'scratchpadCopyFn' is a helper that stores reduced

///     data from the team master to a scratchpad array in

///     global memory.

///

///     'loadAndReduceFn' is a helper that loads data from

///     the scratchpad array and reduces it with the input

///     operand.

///

///     These compiler generated functions hide address

///     calculation and alignment information from the runtime.

/// 5. if ret == 1:

///     The team master of the last team stores the reduced

///     result to the globals in memory.

///     foo += reduceData.foo; bar *= reduceData.bar

///

///

/// Warp Reduction Algorithms

///

/// On the warp level, we have three algorithms implemented in the

/// OpenMP runtime depending on the number of active lanes:

///

/// Full Warp Reduction

///

/// The reduce algorithm within a warp where all lanes are active

/// is implemented in the runtime as follows:

///

/// full_warp_reduce(void *reduce_data,

///                  kmp_ShuffleReductFctPtr ShuffleReduceFn) {

///   for (int offset = WARPSIZE/2; offset > 0; offset /= 2)

///     ShuffleReduceFn(reduce_data, 0, offset, 0);

/// }

///

/// The algorithm completes in log(2, WARPSIZE) steps.

///

/// 'ShuffleReduceFn' is used here with lane_id set to 0 because it is

/// not used therefore we save instructions by not retrieving lane_id

/// from the corresponding special registers.  The 4th parameter, which

/// represents the version of the algorithm being used, is set to 0 to

/// signify full warp reduction.

///

/// In this version, 'ShuffleReduceFn' behaves, per element, as follows:

///

/// #reduce_elem refers to an element in the local lane's data structure

/// #remote_elem is retrieved from a remote lane

/// remote_elem = shuffle_down(reduce_elem, offset, WARPSIZE);

/// reduce_elem = reduce_elem REDUCE_OP remote_elem;

///

/// Contiguous Partial Warp Reduction

///

/// This reduce algorithm is used within a warp where only the first

/// 'n' (n <= WARPSIZE) lanes are active.  It is typically used when the

/// number of OpenMP threads in a parallel region is not a multiple of

/// WARPSIZE.  The algorithm is implemented in the runtime as follows:

///

/// void

/// contiguous_partial_reduce(void *reduce_data,

///                           kmp_ShuffleReductFctPtr ShuffleReduceFn,

///                           int size, int lane_id) {

///   int curr_size;

///   int offset;

///   curr_size = size;

///   mask = curr_size/2;

///   while (offset>0) {

///     ShuffleReduceFn(reduce_data, lane_id, offset, 1);

///     curr_size = (curr_size+1)/2;

///     offset = curr_size/2;

///   }

/// }

///

/// In this version, 'ShuffleReduceFn' behaves, per element, as follows:

///

/// remote_elem = shuffle_down(reduce_elem, offset, WARPSIZE);

/// if (lane_id < offset)

///     reduce_elem = reduce_elem REDUCE_OP remote_elem

/// else

///     reduce_elem = remote_elem

///

/// This algorithm assumes that the data to be reduced are located in a

/// contiguous subset of lanes starting from the first.  When there is

/// an odd number of active lanes, the data in the last lane is not

/// aggregated with any other lane's dat but is instead copied over.

///

/// Dispersed Partial Warp Reduction

///

/// This algorithm is used within a warp when any discontiguous subset of

/// lanes are active.  It is used to implement the reduction operation

/// across lanes in an OpenMP simd region or in a nested parallel region.

///

/// void

/// dispersed_partial_reduce(void *reduce_data,

///                          kmp_ShuffleReductFctPtr ShuffleReduceFn) {

///   int size, remote_id;

///   int logical_lane_id = number_of_active_lanes_before_me() * 2;

///   do {

///       remote_id = next_active_lane_id_right_after_me();

///       # the above function returns 0 of no active lane

///       # is present right after the current lane.

///       size = number_of_active_lanes_in_this_warp();

///       logical_lane_id /= 2;

///       ShuffleReduceFn(reduce_data, logical_lane_id,

///                       remote_id-1-threadIdx.x, 2);

///   } while (logical_lane_id % 2 == 0 && size > 1);

/// }

///

/// There is no assumption made about the initial state of the reduction.

/// Any number of lanes (>=1) could be active at any position.  The reduction

/// result is returned in the first active lane.

///

/// In this version, 'ShuffleReduceFn' behaves, per element, as follows:

///

/// remote_elem = shuffle_down(reduce_elem, offset, WARPSIZE);

/// if (lane_id % 2 == 0 && offset > 0)

///     reduce_elem = reduce_elem REDUCE_OP remote_elem

/// else

///     reduce_elem = remote_elem

///

///

/// Intra-Team Reduction

///

/// This function, as implemented in the runtime call

/// '__kmpc_nvptx_parallel_reduce_nowait_v2', aggregates data across OpenMP

/// threads in a team.  It first reduces within a warp using the

/// aforementioned algorithms.  We then proceed to gather all such

/// reduced values at the first warp.

///

/// The runtime makes use of the function 'InterWarpCpyFn', which copies

/// data from each of the "warp master" (zeroth lane of each warp, where

/// warp-reduced data is held) to the zeroth warp.  This step reduces (in

/// a mathematical sense) the problem of reduction across warp masters in

/// a block to the problem of warp reduction.

///

///

/// Inter-Team Reduction

///

/// Once a team has reduced its data to a single value, it is stored in

/// a global scratchpad array.  Since each team has a distinct slot, this

/// can be done without locking.

///

/// The last team to write to the scratchpad array proceeds to reduce the

/// scratchpad array.  One or more workers in the last team use the helper

/// 'loadAndReduceDataFn' to load and reduce values from the array, i.e.,

/// the k'th worker reduces every k'th element.

///

/// Finally, a call is made to '__kmpc_nvptx_parallel_reduce_nowait_v2' to

/// reduce across workers and compute a globally reduced value.

///


void CGOpenMPRuntimeGPU::emitReduction(

    CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,

    ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,

    ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {

  if (!CGF.HaveInsertPoint())

    return;


  bool ParallelReduction = isOpenMPParallelDirective(Options.ReductionKind);

  bool TeamsReduction = isOpenMPTeamsDirective(Options.ReductionKind);


  ASTContext &C = CGM.getContext();


  if (Options.SimpleReduction) {

    assert(!TeamsReduction && !ParallelReduction &&

           "Invalid reduction selection in emitReduction.");

    (void)ParallelReduction;

    CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,

                                   ReductionOps, Options);

    return;

  }


  llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> VarFieldMap;

  llvm::SmallVector<const ValueDecl *, 4> PrivatesReductions(Privates.size());

  int Cnt = 0;

  for (const Expr *DRE : Privates) {

    PrivatesReductions[Cnt] = cast<DeclRefExpr>(DRE)->getDecl();

    ++Cnt;

  }

  const RecordDecl *ReductionRec = ::buildRecordForGlobalizedVars(

      CGM.getContext(), PrivatesReductions, {}, VarFieldMap, 1);


  if (TeamsReduction)

    TeamsReductions.push_back(ReductionRec);


  // Source location for the ident struct

  llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);


  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;

  InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),

                         CGF.AllocaInsertPt->getIterator());

  InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),

                          CGF.Builder.GetInsertPoint());

  llvm::OpenMPIRBuilder::LocationDescription OmpLoc(

      CodeGenIP, CGF.SourceLocToDebugLoc(Loc));

  llvm::SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> ReductionInfos;


  CodeGenFunction::OMPPrivateScope Scope(CGF);

  unsigned Idx = 0;

  for (const Expr *Private : Privates) {

    llvm::Type *ElementType;

    llvm::Value *Variable;

    llvm::Value *PrivateVariable;

    llvm::OpenMPIRBuilder::ReductionGenAtomicCBTy AtomicReductionGen = nullptr;

    ElementType = CGF.ConvertTypeForMem(Private->getType());

    const auto *RHSVar =

        cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[Idx])->getDecl());

    PrivateVariable = CGF.GetAddrOfLocalVar(RHSVar).emitRawPointer(CGF);

    const auto *LHSVar =

        cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[Idx])->getDecl());

    Variable = CGF.GetAddrOfLocalVar(LHSVar).emitRawPointer(CGF);

    llvm::OpenMPIRBuilder::EvalKind EvalKind;

    switch (CGF.getEvaluationKind(Private->getType())) {

    case TEK_Scalar:

      EvalKind = llvm::OpenMPIRBuilder::EvalKind::Scalar;

      break;

    case TEK_Complex:

      EvalKind = llvm::OpenMPIRBuilder::EvalKind::Complex;

      break;

    case TEK_Aggregate:

      EvalKind = llvm::OpenMPIRBuilder::EvalKind::Aggregate;

      break;

    }

    auto ReductionGen = [&](InsertPointTy CodeGenIP, unsigned I,

                            llvm::Value **LHSPtr, llvm::Value **RHSPtr,

                            llvm::Function *NewFunc) {

      CGF.Builder.restoreIP(CodeGenIP);

      auto *CurFn = CGF.CurFn;

      CGF.CurFn = NewFunc;


      *LHSPtr = CGF.GetAddrOfLocalVar(

                       cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()))

                    .emitRawPointer(CGF);

      *RHSPtr = CGF.GetAddrOfLocalVar(

                       cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()))

                    .emitRawPointer(CGF);


      emitSingleReductionCombiner(CGF, ReductionOps[I], Privates[I],

                                  cast<DeclRefExpr>(LHSExprs[I]),

                                  cast<DeclRefExpr>(RHSExprs[I]));


      CGF.CurFn = CurFn;


      return InsertPointTy(CGF.Builder.GetInsertBlock(),

                           CGF.Builder.GetInsertPoint());

    };

    ReductionInfos.emplace_back(llvm::OpenMPIRBuilder::ReductionInfo(

        ElementType, Variable, PrivateVariable, EvalKind,

        /*ReductionGen=*/nullptr, ReductionGen, AtomicReductionGen));

    Idx++;

  }


  llvm::OpenMPIRBuilder::InsertPointTy AfterIP =

      cantFail(OMPBuilder.createReductionsGPU(

          OmpLoc, AllocaIP, CodeGenIP, ReductionInfos, false, TeamsReduction,

          llvm::OpenMPIRBuilder::ReductionGenCBKind::Clang,

          CGF.getTarget().getGridValue(),

          C.getLangOpts().OpenMPCUDAReductionBufNum, RTLoc));

  CGF.Builder.restoreIP(AfterIP);

}


const VarDecl *


CGOpenMPRuntimeGPU::translateParameter(const FieldDecl *FD,

                                       const VarDecl *NativeParam) const {

  if (!NativeParam->getType()->isReferenceType())

    return NativeParam;

  QualType ArgType = NativeParam->getType();

  QualifierCollector QC;

  const Type *NonQualTy = QC.strip(ArgType);

  QualType PointeeTy = cast<ReferenceType>(NonQualTy)->getPointeeType();

  if (const auto *Attr = FD->getAttr<OMPCaptureKindAttr>()) {

    if (Attr->getCaptureKind() == OMPC_map) {

      PointeeTy = CGM.getContext().getAddrSpaceQualType(PointeeTy,

                                                        LangAS::opencl_global);

    }

  }

  ArgType = CGM.getContext().getPointerType(PointeeTy);

  QC.addRestrict();

  enum { NVPTX_local_addr = 5 };

  QC.addAddressSpace(getLangASFromTargetAS(NVPTX_local_addr));

  ArgType = QC.apply(CGM.getContext(), ArgType);

  if (isa<ImplicitParamDecl>(NativeParam))

    return ImplicitParamDecl::Create(

        CGM.getContext(), /*DC=*/nullptr, NativeParam->getLocation(),

        NativeParam->getIdentifier(), ArgType, ImplicitParamKind::Other);

  return ParmVarDecl::Create(

      CGM.getContext(),

      const_cast<DeclContext *>(NativeParam->getDeclContext()),

      NativeParam->getBeginLoc(), NativeParam->getLocation(),

      NativeParam->getIdentifier(), ArgType,

      /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr);

}


Address


CGOpenMPRuntimeGPU::getParameterAddress(CodeGenFunction &CGF,

                                          const VarDecl *NativeParam,

                                          const VarDecl *TargetParam) const {

  assert(NativeParam != TargetParam &&

         NativeParam->getType()->isReferenceType() &&

         "Native arg must not be the same as target arg.");

  Address LocalAddr = CGF.GetAddrOfLocalVar(TargetParam);

  QualType NativeParamType = NativeParam->getType();

  QualifierCollector QC;

  const Type *NonQualTy = QC.strip(NativeParamType);

  QualType NativePointeeTy = cast<ReferenceType>(NonQualTy)->getPointeeType();

  unsigned NativePointeeAddrSpace =

      CGF.getTypes().getTargetAddressSpace(NativePointeeTy);

  QualType TargetTy = TargetParam->getType();

  llvm::Value *TargetAddr = CGF.EmitLoadOfScalar(LocalAddr, /*Volatile=*/false,

                                                 TargetTy, SourceLocation());

  // Cast to native address space.

  TargetAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(

      TargetAddr,

      llvm::PointerType::get(CGF.getLLVMContext(), NativePointeeAddrSpace));

  Address NativeParamAddr = CGF.CreateMemTemp(NativeParamType);

  CGF.EmitStoreOfScalar(TargetAddr, NativeParamAddr, /*Volatile=*/false,

                        NativeParamType);

  return NativeParamAddr;

}


void CGOpenMPRuntimeGPU::emitOutlinedFunctionCall(

    CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,

    ArrayRef<llvm::Value *> Args) const {

  SmallVector<llvm::Value *, 4> TargetArgs;

  TargetArgs.reserve(Args.size());

  auto *FnType = OutlinedFn.getFunctionType();

  for (unsigned I = 0, E = Args.size(); I < E; ++I) {

    if (FnType->isVarArg() && FnType->getNumParams() <= I) {

      TargetArgs.append(std::next(Args.begin(), I), Args.end());

      break;

    }

    llvm::Type *TargetType = FnType->getParamType(I);

    llvm::Value *NativeArg = Args[I];

    if (!TargetType->isPointerTy()) {

      TargetArgs.emplace_back(NativeArg);

      continue;

    }

    TargetArgs.emplace_back(

        CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(NativeArg, TargetType));

  }

  CGOpenMPRuntime::emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, TargetArgs);

}


/// Emit function which wraps the outline parallel region

/// and controls the arguments which are passed to this function.

/// The wrapper ensures that the outlined function is called

/// with the correct arguments when data is shared.

llvm::Function *CGOpenMPRuntimeGPU::createParallelDataSharingWrapper(

    llvm::Function *OutlinedParallelFn, const OMPExecutableDirective &D) {

  ASTContext &Ctx = CGM.getContext();

  const auto &CS = *D.getCapturedStmt(OMPD_parallel);


  // Create a function that takes as argument the source thread.

  FunctionArgList WrapperArgs;

  QualType Int16QTy =

      Ctx.getIntTypeForBitwidth(/*DestWidth=*/16, /*Signed=*/false);

  QualType Int32QTy =

      Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false);

  ImplicitParamDecl ParallelLevelArg(Ctx, /*DC=*/nullptr, D.getBeginLoc(),

                                     /*Id=*/nullptr, Int16QTy,

                                     ImplicitParamKind::Other);

  ImplicitParamDecl WrapperArg(Ctx, /*DC=*/nullptr, D.getBeginLoc(),

                               /*Id=*/nullptr, Int32QTy,

                               ImplicitParamKind::Other);

  WrapperArgs.emplace_back(&ParallelLevelArg);

  WrapperArgs.emplace_back(&WrapperArg);


  const CGFunctionInfo &CGFI =

      CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, WrapperArgs);


  auto *Fn = llvm::Function::Create(

      CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,

      Twine(OutlinedParallelFn->getName(), "_wrapper"), &CGM.getModule());


  // Ensure we do not inline the function. This is trivially true for the ones

  // passed to __kmpc_fork_call but the ones calles in serialized regions

  // could be inlined. This is not a perfect but it is closer to the invariant

  // we want, namely, every data environment starts with a new function.

  // TODO: We should pass the if condition to the runtime function and do the

  //       handling there. Much cleaner code.

  Fn->addFnAttr(llvm::Attribute::NoInline);


  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);

  Fn->setLinkage(llvm::GlobalValue::InternalLinkage);

  Fn->setDoesNotRecurse();


  CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);

  CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, Fn, CGFI, WrapperArgs,

                    D.getBeginLoc(), D.getBeginLoc());


  const auto *RD = CS.getCapturedRecordDecl();

  auto CurField = RD->field_begin();


  Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,

                                                      /*Name=*/".zero.addr");

  CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddr);

  // Get the array of arguments.

  SmallVector<llvm::Value *, 8> Args;


  Args.emplace_back(CGF.GetAddrOfLocalVar(&WrapperArg).emitRawPointer(CGF));

  Args.emplace_back(ZeroAddr.emitRawPointer(CGF));


  CGBuilderTy &Bld = CGF.Builder;

  auto CI = CS.capture_begin();


  // Use global memory for data sharing.

  // Handle passing of global args to workers.

  RawAddress GlobalArgs =

      CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy, "global_args");

  llvm::Value *GlobalArgsPtr = GlobalArgs.getPointer();

  llvm::Value *DataSharingArgs[] = {GlobalArgsPtr};

  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(

                          CGM.getModule(), OMPRTL___kmpc_get_shared_variables),

                      DataSharingArgs);


  // Retrieve the shared variables from the list of references returned

  // by the runtime. Pass the variables to the outlined function.

  Address SharedArgListAddress = Address::invalid();

  if (CS.capture_size() > 0 ||

      isOpenMPLoopBoundSharingDirective(D.getDirectiveKind())) {

    SharedArgListAddress = CGF.EmitLoadOfPointer(

        GlobalArgs, CGF.getContext()

                        .getPointerType(CGF.getContext().VoidPtrTy)

                        .castAs<PointerType>());

  }

  unsigned Idx = 0;

  if (isOpenMPLoopBoundSharingDirective(D.getDirectiveKind())) {

    Address Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, Idx);

    Address TypedAddress = Bld.CreatePointerBitCastOrAddrSpaceCast(

        Src, Bld.getPtrTy(0), CGF.SizeTy);

    llvm::Value *LB = CGF.EmitLoadOfScalar(

        TypedAddress,

        /*Volatile=*/false,

        CGF.getContext().getPointerType(CGF.getContext().getSizeType()),

        cast<OMPLoopDirective>(D).getLowerBoundVariable()->getExprLoc());

    Args.emplace_back(LB);

    ++Idx;

    Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, Idx);

    TypedAddress = Bld.CreatePointerBitCastOrAddrSpaceCast(Src, Bld.getPtrTy(0),

                                                           CGF.SizeTy);

    llvm::Value *UB = CGF.EmitLoadOfScalar(

        TypedAddress,

        /*Volatile=*/false,

        CGF.getContext().getPointerType(CGF.getContext().getSizeType()),

        cast<OMPLoopDirective>(D).getUpperBoundVariable()->getExprLoc());

    Args.emplace_back(UB);

    ++Idx;

  }

  if (CS.capture_size() > 0) {

    ASTContext &CGFContext = CGF.getContext();

    for (unsigned I = 0, E = CS.capture_size(); I < E; ++I, ++CI, ++CurField) {

      QualType ElemTy = CurField->getType();

      Address Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, I + Idx);

      Address TypedAddress = Bld.CreatePointerBitCastOrAddrSpaceCast(

          Src, CGF.ConvertTypeForMem(CGFContext.getPointerType(ElemTy)),

          CGF.ConvertTypeForMem(ElemTy));

      llvm::Value *Arg = CGF.EmitLoadOfScalar(TypedAddress,

                                              /*Volatile=*/false,

                                              CGFContext.getPointerType(ElemTy),

                                              CI->getLocation());

      if (CI->capturesVariableByCopy() &&

          !CI->getCapturedVar()->getType()->isAnyPointerType()) {

        Arg = castValueToType(CGF, Arg, ElemTy, CGFContext.getUIntPtrType(),

                              CI->getLocation());

      }

      Args.emplace_back(Arg);

    }

  }


  emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedParallelFn, Args);

  CGF.FinishFunction();

  return Fn;

}


void CGOpenMPRuntimeGPU::emitFunctionProlog(CodeGenFunction &CGF,

                                              const Decl *D) {

  if (getDataSharingMode() != CGOpenMPRuntimeGPU::DS_Generic)

    return;


  assert(D && "Expected function or captured|block decl.");

  assert(FunctionGlobalizedDecls.count(CGF.CurFn) == 0 &&

         "Function is registered already.");

  assert((!TeamAndReductions.first || TeamAndReductions.first == D) &&

         "Team is set but not processed.");

  const Stmt *Body = nullptr;

  bool NeedToDelayGlobalization = false;

  if (const auto *FD = dyn_cast<FunctionDecl>(D)) {

    Body = FD->getBody();

  } else if (const auto *BD = dyn_cast<BlockDecl>(D)) {

    Body = BD->getBody();

  } else if (const auto *CD = dyn_cast<CapturedDecl>(D)) {

    Body = CD->getBody();

    NeedToDelayGlobalization = CGF.CapturedStmtInfo->getKind() == CR_OpenMP;

    if (NeedToDelayGlobalization &&

        getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD)

      return;

  }

  if (!Body)

    return;

  CheckVarsEscapingDeclContext VarChecker(CGF, TeamAndReductions.second);

  VarChecker.Visit(Body);

  const RecordDecl *GlobalizedVarsRecord =

      VarChecker.getGlobalizedRecord(IsInTTDRegion);

  TeamAndReductions.first = nullptr;

  TeamAndReductions.second.clear();

  ArrayRef<const ValueDecl *> EscapedVariableLengthDecls =

      VarChecker.getEscapedVariableLengthDecls();

  ArrayRef<const ValueDecl *> DelayedVariableLengthDecls =

      VarChecker.getDelayedVariableLengthDecls();

  if (!GlobalizedVarsRecord && EscapedVariableLengthDecls.empty() &&

      DelayedVariableLengthDecls.empty())

    return;

  auto I = FunctionGlobalizedDecls.try_emplace(CGF.CurFn).first;

  I->getSecond().MappedParams =

      std::make_unique<CodeGenFunction::OMPMapVars>();

  I->getSecond().EscapedParameters.insert(

      VarChecker.getEscapedParameters().begin(),

      VarChecker.getEscapedParameters().end());

  I->getSecond().EscapedVariableLengthDecls.append(

      EscapedVariableLengthDecls.begin(), EscapedVariableLengthDecls.end());

  I->getSecond().DelayedVariableLengthDecls.append(

      DelayedVariableLengthDecls.begin(), DelayedVariableLengthDecls.end());

  DeclToAddrMapTy &Data = I->getSecond().LocalVarData;

  for (const ValueDecl *VD : VarChecker.getEscapedDecls()) {

    assert(VD->isCanonicalDecl() && "Expected canonical declaration");

    Data.try_emplace(VD);

  }

  if (!NeedToDelayGlobalization) {

    emitGenericVarsProlog(CGF, D->getBeginLoc());

    struct GlobalizationScope final : EHScopeStack::Cleanup {

      GlobalizationScope() = default;


      void Emit(CodeGenFunction &CGF, Flags flags) override {

        static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime())

            .emitGenericVarsEpilog(CGF);

      }

    };

    CGF.EHStack.pushCleanup<GlobalizationScope>(NormalAndEHCleanup);

  }

}


Address CGOpenMPRuntimeGPU::getAddressOfLocalVariable(CodeGenFunction &CGF,

                                                        const VarDecl *VD) {

  if (VD && VD->hasAttr<OMPAllocateDeclAttr>()) {

    const auto *A = VD->getAttr<OMPAllocateDeclAttr>();

    auto AS = LangAS::Default;

    switch (A->getAllocatorType()) {

    case OMPAllocateDeclAttr::OMPNullMemAlloc:

    case OMPAllocateDeclAttr::OMPDefaultMemAlloc:

    case OMPAllocateDeclAttr::OMPHighBWMemAlloc:

    case OMPAllocateDeclAttr::OMPLowLatMemAlloc:

      break;

    case OMPAllocateDeclAttr::OMPThreadMemAlloc:

      return Address::invalid();

    case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:

      // TODO: implement aupport for user-defined allocators.

      return Address::invalid();

    case OMPAllocateDeclAttr::OMPConstMemAlloc:

      AS = LangAS::cuda_constant;

      break;

    case OMPAllocateDeclAttr::OMPPTeamMemAlloc:

      AS = LangAS::cuda_shared;

      break;

    case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:

    case OMPAllocateDeclAttr::OMPCGroupMemAlloc:

      break;

    }

    llvm::Type *VarTy = CGF.ConvertTypeForMem(VD->getType());

    auto *GV = new llvm::GlobalVariable(

        CGM.getModule(), VarTy, /*isConstant=*/false,

        llvm::GlobalValue::InternalLinkage, llvm::PoisonValue::get(VarTy),

        VD->getName(),

        /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,

        CGM.getContext().getTargetAddressSpace(AS));

    CharUnits Align = CGM.getContext().getDeclAlign(VD);

    GV->setAlignment(Align.getAsAlign());

    return Address(

        CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(

            GV, CGF.Builder.getPtrTy(CGM.getContext().getTargetAddressSpace(

                    VD->getType().getAddressSpace()))),

        VarTy, Align);

  }


  if (getDataSharingMode() != CGOpenMPRuntimeGPU::DS_Generic)

    return Address::invalid();


  VD = VD->getCanonicalDecl();

  auto I = FunctionGlobalizedDecls.find(CGF.CurFn);

  if (I == FunctionGlobalizedDecls.end())

    return Address::invalid();

  auto VDI = I->getSecond().LocalVarData.find(VD);

  if (VDI != I->getSecond().LocalVarData.end())

    return VDI->second.PrivateAddr;

  if (VD->hasAttrs()) {

    for (specific_attr_iterator<OMPReferencedVarAttr> IT(VD->attr_begin()),

         E(VD->attr_end());

         IT != E; ++IT) {

      auto VDI = I->getSecond().LocalVarData.find(

          cast<VarDecl>(cast<DeclRefExpr>(IT->getRef())->getDecl())

              ->getCanonicalDecl());

      if (VDI != I->getSecond().LocalVarData.end())

        return VDI->second.PrivateAddr;

    }

  }


  return Address::invalid();

}


void CGOpenMPRuntimeGPU::functionFinished(CodeGenFunction &CGF) {

  FunctionGlobalizedDecls.erase(CGF.CurFn);

  CGOpenMPRuntime::functionFinished(CGF);

}


void CGOpenMPRuntimeGPU::getDefaultDistScheduleAndChunk(

    CodeGenFunction &CGF, const OMPLoopDirective &S,

    OpenMPDistScheduleClauseKind &ScheduleKind,

    llvm::Value *&Chunk) const {

  auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());

  if (getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD) {

    ScheduleKind = OMPC_DIST_SCHEDULE_static;

    Chunk = CGF.EmitScalarConversion(

        RT.getGPUNumThreads(CGF),

        CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),

        S.getIterationVariable()->getType(), S.getBeginLoc());

    return;

  }

  CGOpenMPRuntime::getDefaultDistScheduleAndChunk(

      CGF, S, ScheduleKind, Chunk);

}


void CGOpenMPRuntimeGPU::getDefaultScheduleAndChunk(

    CodeGenFunction &CGF, const OMPLoopDirective &S,

    OpenMPScheduleClauseKind &ScheduleKind,

    const Expr *&ChunkExpr) const {

  ScheduleKind = OMPC_SCHEDULE_static;

  // Chunk size is 1 in this case.

  llvm::APInt ChunkSize(32, 1);

  ChunkExpr = IntegerLiteral::Create(CGF.getContext(), ChunkSize,

      CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),

      SourceLocation());

}


void CGOpenMPRuntimeGPU::adjustTargetSpecificDataForLambdas(

    CodeGenFunction &CGF, const OMPExecutableDirective &D) const {

  assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&

         " Expected target-based directive.");

  const CapturedStmt *CS = D.getCapturedStmt(OMPD_target);

  for (const CapturedStmt::Capture &C : CS->captures()) {

    // Capture variables captured by reference in lambdas for target-based

    // directives.

    if (!C.capturesVariable())

      continue;

    const VarDecl *VD = C.getCapturedVar();

    const auto *RD = VD->getType()

                         .getCanonicalType()

                         .getNonReferenceType()

                         ->getAsCXXRecordDecl();

    if (!RD || !RD->isLambda())

      continue;

    Address VDAddr = CGF.GetAddrOfLocalVar(VD);

    LValue VDLVal;

    if (VD->getType().getCanonicalType()->isReferenceType())

      VDLVal = CGF.EmitLoadOfReferenceLValue(VDAddr, VD->getType());

    else

      VDLVal = CGF.MakeAddrLValue(

          VDAddr, VD->getType().getCanonicalType().getNonReferenceType());

    llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;

    FieldDecl *ThisCapture = nullptr;

    RD->getCaptureFields(Captures, ThisCapture);

    if (ThisCapture && CGF.CapturedStmtInfo->isCXXThisExprCaptured()) {

      LValue ThisLVal =

          CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);

      llvm::Value *CXXThis = CGF.LoadCXXThis();

      CGF.EmitStoreOfScalar(CXXThis, ThisLVal);

    }

    for (const LambdaCapture &LC : RD->captures()) {

      if (LC.getCaptureKind() != LCK_ByRef)

        continue;

      const ValueDecl *VD = LC.getCapturedVar();

      // FIXME: For now VD is always a VarDecl because OpenMP does not support

      //  capturing structured bindings in lambdas yet.

      if (!CS->capturesVariable(cast<VarDecl>(VD)))

        continue;

      auto It = Captures.find(VD);

      assert(It != Captures.end() && "Found lambda capture without field.");

      LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);

      Address VDAddr = CGF.GetAddrOfLocalVar(cast<VarDecl>(VD));

      if (VD->getType().getCanonicalType()->isReferenceType())

        VDAddr = CGF.EmitLoadOfReferenceLValue(VDAddr,

                                               VD->getType().getCanonicalType())

                     .getAddress();

      CGF.EmitStoreOfScalar(VDAddr.emitRawPointer(CGF), VarLVal);

    }

  }

}


bool CGOpenMPRuntimeGPU::hasAllocateAttributeForGlobalVar(const VarDecl *VD,

                                                            LangAS &AS) {

  if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())

    return false;

  const auto *A = VD->getAttr<OMPAllocateDeclAttr>();

  switch(A->getAllocatorType()) {

  case OMPAllocateDeclAttr::OMPNullMemAlloc:

  case OMPAllocateDeclAttr::OMPDefaultMemAlloc:

  // Not supported, fallback to the default mem space.

  case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:

  case OMPAllocateDeclAttr::OMPCGroupMemAlloc:

  case OMPAllocateDeclAttr::OMPHighBWMemAlloc:

  case OMPAllocateDeclAttr::OMPLowLatMemAlloc:

  case OMPAllocateDeclAttr::OMPThreadMemAlloc:

    AS = LangAS::Default;

    return true;

  case OMPAllocateDeclAttr::OMPConstMemAlloc:

    AS = LangAS::cuda_constant;

    return true;

  case OMPAllocateDeclAttr::OMPPTeamMemAlloc:

    AS = LangAS::cuda_shared;

    return true;

  case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:

    llvm_unreachable("Expected predefined allocator for the variables with the "

                     "static storage.");

  }

  return false;

}


// Get current OffloadArch and ignore any unknown values


static OffloadArch getOffloadArch(CodeGenModule &CGM) {

  if (!CGM.getTarget().hasFeature("ptx"))

    return OffloadArch::UNKNOWN;

  for (const auto &Feature : CGM.getTarget().getTargetOpts().FeatureMap) {

    if (Feature.getValue()) {

      OffloadArch Arch = StringToOffloadArch(Feature.getKey());

      if (Arch != OffloadArch::UNKNOWN)

        return Arch;

    }

  }

  return OffloadArch::UNKNOWN;

}


/// Check to see if target architecture supports unified addressing which is

/// a restriction for OpenMP requires clause "unified_shared_memory".


void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) {

  for (const OMPClause *Clause : D->clauselists()) {

    if (Clause->getClauseKind() == OMPC_unified_shared_memory) {

      OffloadArch Arch = getOffloadArch(CGM);

      switch (Arch) {

      case OffloadArch::SM_20:

      case OffloadArch::SM_21:

      case OffloadArch::SM_30:

      case OffloadArch::SM_32_:

      case OffloadArch::SM_35:

      case OffloadArch::SM_37:

      case OffloadArch::SM_50:

      case OffloadArch::SM_52:

      case OffloadArch::SM_53: {

        SmallString<256> Buffer;

        llvm::raw_svector_ostream Out(Buffer);

        Out << "Target architecture " << OffloadArchToString(Arch)

            << " does not support unified addressing";

        CGM.Error(Clause->getBeginLoc(), Out.str());

        return;

      }

      case OffloadArch::SM_60:

      case OffloadArch::SM_61:

      case OffloadArch::SM_62:

      case OffloadArch::SM_70:

      case OffloadArch::SM_72:

      case OffloadArch::SM_75:

      case OffloadArch::SM_80:

      case OffloadArch::SM_86:

      case OffloadArch::SM_87:

      case OffloadArch::SM_89:

      case OffloadArch::SM_90:

      case OffloadArch::SM_90a:

      case OffloadArch::SM_100:

      case OffloadArch::SM_100a:

      case OffloadArch::SM_101:

      case OffloadArch::SM_101a:

      case OffloadArch::SM_103:

      case OffloadArch::SM_103a:

      case OffloadArch::SM_120:

      case OffloadArch::SM_120a:

      case OffloadArch::SM_121:

      case OffloadArch::SM_121a:

      case OffloadArch::GFX600:

      case OffloadArch::GFX601:

      case OffloadArch::GFX602:

      case OffloadArch::GFX700:

      case OffloadArch::GFX701:

      case OffloadArch::GFX702:

      case OffloadArch::GFX703:

      case OffloadArch::GFX704:

      case OffloadArch::GFX705:

      case OffloadArch::GFX801:

      case OffloadArch::GFX802:

      case OffloadArch::GFX803:

      case OffloadArch::GFX805:

      case OffloadArch::GFX810:

      case OffloadArch::GFX9_GENERIC:

      case OffloadArch::GFX900:

      case OffloadArch::GFX902:

      case OffloadArch::GFX904:

      case OffloadArch::GFX906:

      case OffloadArch::GFX908:

      case OffloadArch::GFX909:

      case OffloadArch::GFX90a:

      case OffloadArch::GFX90c:

      case OffloadArch::GFX9_4_GENERIC:

      case OffloadArch::GFX942:

      case OffloadArch::GFX950:

      case OffloadArch::GFX10_1_GENERIC:

      case OffloadArch::GFX1010:

      case OffloadArch::GFX1011:

      case OffloadArch::GFX1012:

      case OffloadArch::GFX1013:

      case OffloadArch::GFX10_3_GENERIC:

      case OffloadArch::GFX1030:

      case OffloadArch::GFX1031:

      case OffloadArch::GFX1032:

      case OffloadArch::GFX1033:

      case OffloadArch::GFX1034:

      case OffloadArch::GFX1035:

      case OffloadArch::GFX1036:

      case OffloadArch::GFX11_GENERIC:

      case OffloadArch::GFX1100:

      case OffloadArch::GFX1101:

      case OffloadArch::GFX1102:

      case OffloadArch::GFX1103:

      case OffloadArch::GFX1150:

      case OffloadArch::GFX1151:

      case OffloadArch::GFX1152:

      case OffloadArch::GFX1153:

      case OffloadArch::GFX12_GENERIC:

      case OffloadArch::GFX1200:

      case OffloadArch::GFX1201:

      case OffloadArch::GFX1250:

      case OffloadArch::GFX1251:

      case OffloadArch::AMDGCNSPIRV:

      case OffloadArch::Generic:

      case OffloadArch::GRANITERAPIDS:

      case OffloadArch::BMG_G21:

      case OffloadArch::UNUSED:

      case OffloadArch::UNKNOWN:

        break;

      case OffloadArch::LAST:

        llvm_unreachable("Unexpected GPU arch.");

      }

    }

  }

  CGOpenMPRuntime::processRequiresDirective(D);

}


llvm::Value *CGOpenMPRuntimeGPU::getGPUNumThreads(CodeGenFunction &CGF) {

  CGBuilderTy &Bld = CGF.Builder;

  llvm::Module *M = &CGF.CGM.getModule();

  const char *LocSize = "__kmpc_get_hardware_num_threads_in_block";

  llvm::Function *F = M->getFunction(LocSize);

  if (!F) {

    F = llvm::Function::Create(llvm::FunctionType::get(CGF.Int32Ty, {}, false),

                               llvm::GlobalVariable::ExternalLinkage, LocSize,

                               &CGF.CGM.getModule());

  }

  return Bld.CreateCall(F, {}, "nvptx_num_threads");

}


llvm::Value *CGOpenMPRuntimeGPU::getGPUThreadID(CodeGenFunction &CGF) {

  ArrayRef<llvm::Value *> Args{};

  return CGF.EmitRuntimeCall(

      OMPBuilder.getOrCreateRuntimeFunction(

          CGM.getModule(), OMPRTL___kmpc_get_hardware_thread_id_in_block),

      Args);

}


V
#define V(N, I)
Definition ASTContext.h:3619

Attr.h

AttrFeatureKind::Feature
@ Feature
Definition LoongArch.cpp:403

AttrFeatureKind::Arch
@ Arch
Definition LoongArch.cpp:403

CGDebugInfo.h

getTeamsReductionVars
static void getTeamsReductionVars(ASTContext &Ctx, const OMPExecutableDirective &D, llvm::SmallVectorImpl< const ValueDecl * > &Vars)
Get list of reduction variables from the teams ... directives.
Definition CGOpenMPRuntimeGPU.cpp:988

castValueToType
static llvm::Value * castValueToType(CodeGenFunction &CGF, llvm::Value *Val, QualType ValTy, QualType CastTy, SourceLocation Loc)
Cast value to the specified type.
Definition CGOpenMPRuntimeGPU.cpp:1416

getDistributeLastprivateVars
static void getDistributeLastprivateVars(ASTContext &Ctx, const OMPExecutableDirective &D, llvm::SmallVectorImpl< const ValueDecl * > &Vars)
Get list of lastprivate variables from the teams distribute ... or teams {distribute ....
Definition CGOpenMPRuntimeGPU.cpp:963

hasNestedSPMDDirective
static bool hasNestedSPMDDirective(ASTContext &Ctx, const OMPExecutableDirective &D)
Check for inner (nested) SPMD construct, if any.
Definition CGOpenMPRuntimeGPU.cpp:515

supportsSPMDExecutionMode
static bool supportsSPMDExecutionMode(ASTContext &Ctx, const OMPExecutableDirective &D)
Definition CGOpenMPRuntimeGPU.cpp:617

getOffloadArch
static OffloadArch getOffloadArch(CodeGenModule &CGM)
Definition CGOpenMPRuntimeGPU.cpp:2259

CGOpenMPRuntimeGPU.h

CodeGenFunction.h

DeclOpenMP.h
This file defines OpenMP nodes for declarative directives.

OpenMPClause.h
This file defines OpenMP AST classes for clauses.

getPrivateItem
static std::pair< ValueDecl *, bool > getPrivateItem(Sema &S, Expr *&RefExpr, SourceLocation &ELoc, SourceRange &ERange, bool AllowArraySection=false, bool AllowAssumedSizeArray=false, StringRef DiagType="")
Definition SemaOpenMP.cpp:5319

StmtOpenMP.h
This file defines OpenMP AST classes for executable directives and clauses.

StmtVisitor.h

ArgType
Definition FormatString.h:265

Base

FieldDecl::Create
static FieldDecl * Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, Expr *BW, bool Mutable, InClassInitStyle InitStyle)
Definition Decl.cpp:4692

OMPExecutableDirective

OMPLoopDirective

clang::ASTContext
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition ASTContext.h:220

clang::ASTContext::getPointerType
QualType getPointerType(QualType T) const
Return the uniqued reference to the type for a pointer to the specified type.
Definition ASTContext.cpp:3832

clang::ASTContext::VoidPtrTy
CanQualType VoidPtrTy
Definition ASTContext.h:1281

clang::ASTContext::getUIntPtrType
QualType getUIntPtrType() const
Return a type compatible with "uintptr_t" (C99 7.18.1.4), as defined by the target.
Definition ASTContext.cpp:6932

clang::ASTContext::getIntTypeForBitwidth
QualType getIntTypeForBitwidth(unsigned DestWidth, unsigned Signed) const
getIntTypeForBitwidth - sets integer QualTy according to specified details: bitwidth,...
Definition ASTContext.cpp:13228

clang::ASTContext::getTypeSizeInChars
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
Definition ASTContext.cpp:2565

clang::ASTContext::VoidTy
CanQualType VoidTy
Definition ASTContext.h:1254

clang::ASTContext::getSizeType
QualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
Definition ASTContext.cpp:6877

clang::ASTContext::getTargetInfo
const TargetInfo & getTargetInfo() const
Definition ASTContext.h:891

clang::Attr
Attr - This represents one attribute.
Definition Attr.h:44

clang::BlockDecl::captures
ArrayRef< Capture > captures() const
Definition Decl.h:4781

clang::BlockExpr::getBlockDecl
const BlockDecl * getBlockDecl() const
Definition Expr.h:6570

clang::CallExpr::getCallee
Expr * getCallee()
Definition Expr.h:3024

clang::CallExpr::arguments
arg_range arguments()
Definition Expr.h:3129

clang::CapturedStmt::Capture
Describes the capture of either a variable, or 'this', or variable-length array type.
Definition Stmt.h:3899

clang::CapturedStmt
This captures a statement into a function.
Definition Stmt.h:3886

clang::CapturedStmt::capturesVariable
bool capturesVariable(const VarDecl *Var) const
True if this variable has been captured.
Definition Stmt.cpp:1475

clang::CapturedStmt::captures
capture_range captures()
Definition Stmt.h:4024

clang::CastExpr::getCastKind
CastKind getCastKind() const
Definition Expr.h:3654

clang::CastExpr::getSubExpr
Expr * getSubExpr()
Definition Expr.h:3660

clang::CharUnits
CharUnits - This is an opaque type for sizes expressed in character units.
Definition CharUnits.h:38

clang::CharUnits::isZero
bool isZero() const
isZero - Test whether the quantity equals zero.
Definition CharUnits.h:122

clang::CharUnits::getAsAlign
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
Definition CharUnits.h:189

clang::CharUnits::getQuantity
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition CharUnits.h:185

clang::CodeGen::Address
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition Address.h:128

clang::CodeGen::Address::invalid
static Address invalid()
Definition Address.h:176

clang::CodeGen::Address::emitRawPointer
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Definition Address.h:253

clang::CodeGen::Address::withElementType
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition Address.h:276

clang::CodeGen::CGBuilderTy
Definition CGBuilder.h:50

clang::CodeGen::CGBuilderTy::CreateStore
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition CGBuilder.h:140

clang::CodeGen::CGBuilderTy::CreatePointerBitCastOrAddrSpaceCast
Address CreatePointerBitCastOrAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition CGBuilder.h:207

clang::CodeGen::CGBuilderTy::CreateConstArrayGEP
Address CreateConstArrayGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = [n x T]* ... produce name = getelementptr inbounds addr, i64 0, i64 index where i64 is a...
Definition CGBuilder.h:245

clang::CodeGen::CGBuilderTy::CreateConstInBoundsGEP
Address CreateConstInBoundsGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = T* ... produce name = getelementptr inbounds addr, i64 index where i64 is actually the t...
Definition CGBuilder.h:265

clang::CodeGen::CGFunctionInfo
CGFunctionInfo - Class to encapsulate the information about a function definition.
Definition CGFunctionInfo.h:599

clang::CodeGen::CGOpenMPRuntimeGPU::emitTeamsOutlinedFunction
llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits inlined function for the specified OpenMP teams.
Definition CGOpenMPRuntimeGPU.cpp:998

clang::CodeGen::CGOpenMPRuntimeGPU::emitProcBindClause
void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc) override
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32global_tid, int proc_bind) to generate...
Definition CGOpenMPRuntimeGPU.cpp:898

clang::CodeGen::CGOpenMPRuntimeGPU::emitReduction
void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options) override
Emit a code for reduction clause.
Definition CGOpenMPRuntimeGPU.cpp:1684

clang::CodeGen::CGOpenMPRuntimeGPU::DataSharingMode
DataSharingMode
Target codegen is specialized based on two data-sharing modes: CUDA, in which the local variables are...
Definition CGOpenMPRuntimeGPU.h:40

clang::CodeGen::CGOpenMPRuntimeGPU::DS_CUDA
@ DS_CUDA
CUDA data sharing mode.
Definition CGOpenMPRuntimeGPU.h:42

clang::CodeGen::CGOpenMPRuntimeGPU::DS_Generic
@ DS_Generic
Generic data-sharing mode.
Definition CGOpenMPRuntimeGPU.h:44

clang::CodeGen::CGOpenMPRuntimeGPU::getDefaultDistScheduleAndChunk
void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind, llvm::Value *&Chunk) const override
Choose a default value for the dist_schedule clause.
Definition CGOpenMPRuntimeGPU.cpp:2146

clang::CodeGen::CGOpenMPRuntimeGPU::getAddressOfLocalVariable
Address getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD) override
Gets the OpenMP-specific address of the local variable.
Definition CGOpenMPRuntimeGPU.cpp:2074

clang::CodeGen::CGOpenMPRuntimeGPU::emitFunctionProlog
void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) override
Emits OpenMP-specific function prolog.
Definition CGOpenMPRuntimeGPU.cpp:2007

clang::CodeGen::CGOpenMPRuntimeGPU::getDefaultScheduleAndChunk
void getDefaultScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const override
Choose a default value for the schedule clause.
Definition CGOpenMPRuntimeGPU.cpp:2163

clang::CodeGen::CGOpenMPRuntimeGPU::emitNumTeamsClause
void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc) override
This function ought to emit, in the general case, a call to.
Definition CGOpenMPRuntimeGPU.cpp:935

clang::CodeGen::CGOpenMPRuntimeGPU::emitCriticalRegion
void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr) override
Emits a critical region.
Definition CGOpenMPRuntimeGPU.cpp:1346

clang::CodeGen::CGOpenMPRuntimeGPU::emitTeamsCall
void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars) override
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
Definition CGOpenMPRuntimeGPU.cpp:1206

clang::CodeGen::CGOpenMPRuntimeGPU::hasAllocateAttributeForGlobalVar
bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS) override
Checks if the variable has associated OMPAllocateDeclAttr attribute with the predefined allocator and...
Definition CGOpenMPRuntimeGPU.cpp:2229

clang::CodeGen::CGOpenMPRuntimeGPU::getKmpcFreeShared
void getKmpcFreeShared(CodeGenFunction &CGF, const std::pair< llvm::Value *, llvm::Value * > &AddrSizePair) override
Get call to __kmpc_free_shared.
Definition CGOpenMPRuntimeGPU.cpp:1169

clang::CodeGen::CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU
CGOpenMPRuntimeGPU(CodeGenModule &CGM)
Definition CGOpenMPRuntimeGPU.cpp:865

clang::CodeGen::CGOpenMPRuntimeGPU::emitParallelOutlinedFunction
llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits inlined function for the specified OpenMP parallel.
Definition CGOpenMPRuntimeGPU.cpp:940

clang::CodeGen::CGOpenMPRuntimeGPU::functionFinished
void functionFinished(CodeGenFunction &CGF) override
Cleans up references to the objects in finished function.
Definition CGOpenMPRuntimeGPU.cpp:2141

clang::CodeGen::CGOpenMPRuntimeGPU::getGPUThreadID
llvm::Value * getGPUThreadID(CodeGenFunction &CGF)
Get the id of the current thread on the GPU.
Definition CGOpenMPRuntimeGPU.cpp:2398

clang::CodeGen::CGOpenMPRuntimeGPU::processRequiresDirective
void processRequiresDirective(const OMPRequiresDecl *D) override
Perform check on requires decl to ensure that target architecture supports unified addressing.
Definition CGOpenMPRuntimeGPU.cpp:2274

clang::CodeGen::CGOpenMPRuntimeGPU::isDelayedVariableLengthDecl
bool isDelayedVariableLengthDecl(CodeGenFunction &CGF, const VarDecl *VD) const override
Declare generalized virtual functions which need to be defined by all specializations of OpenMPGPURun...
Definition CGOpenMPRuntimeGPU.cpp:1132

clang::CodeGen::CGOpenMPRuntimeGPU::emitOutlinedFunctionCall
void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef< llvm::Value * > Args={}) const override
Emits call of the outlined function with the provided arguments, translating these arguments to corre...
Definition CGOpenMPRuntimeGPU.cpp:1853

clang::CodeGen::CGOpenMPRuntimeGPU::getParameterAddress
Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const override
Gets the address of the native argument basing on the address of the target-specific parameter.
Definition CGOpenMPRuntimeGPU.cpp:1827

clang::CodeGen::CGOpenMPRuntimeGPU::ExecutionMode
ExecutionMode
Defines the execution mode.
Definition CGOpenMPRuntimeGPU.h:27

clang::CodeGen::CGOpenMPRuntimeGPU::EM_NonSPMD
@ EM_NonSPMD
Non-SPMD execution mode (1 master thread, others are workers).
Definition CGOpenMPRuntimeGPU.h:31

clang::CodeGen::CGOpenMPRuntimeGPU::EM_Unknown
@ EM_Unknown
Unknown execution mode (orphaned directive).
Definition CGOpenMPRuntimeGPU.h:33

clang::CodeGen::CGOpenMPRuntimeGPU::EM_SPMD
@ EM_SPMD
SPMD execution mode (all threads are worker threads).
Definition CGOpenMPRuntimeGPU.h:29

clang::CodeGen::CGOpenMPRuntimeGPU::emitBarrierCall
void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false) override
Emit an implicit/explicit barrier for OpenMP threads.
Definition CGOpenMPRuntimeGPU.cpp:1329

clang::CodeGen::CGOpenMPRuntimeGPU::getGPUNumThreads
llvm::Value * getGPUNumThreads(CodeGenFunction &CGF)
Get the maximum number of threads in a block of the GPU.
Definition CGOpenMPRuntimeGPU.cpp:2385

clang::CodeGen::CGOpenMPRuntimeGPU::translateParameter
const VarDecl * translateParameter(const FieldDecl *FD, const VarDecl *NativeParam) const override
Translates the native parameter of outlined function if this is required for target.
Definition CGOpenMPRuntimeGPU.cpp:1795

clang::CodeGen::CGOpenMPRuntimeGPU::getKmpcAllocShared
std::pair< llvm::Value *, llvm::Value * > getKmpcAllocShared(CodeGenFunction &CGF, const VarDecl *VD) override
Get call to __kmpc_alloc_shared.
Definition CGOpenMPRuntimeGPU.cpp:1143

clang::CodeGen::CGOpenMPRuntimeGPU::isGPU
bool isGPU() const override
Returns true if the current target is a GPU.
Definition CGOpenMPRuntimeGPU.h:134

clang::CodeGen::CGOpenMPRuntimeGPU::emitSeverityClause
llvm::Value * emitSeverityClause(OpenMPSeverityClauseKind Severity, SourceLocation Loc) override
Definition CGOpenMPRuntimeGPU.cpp:913

clang::CodeGen::CGOpenMPRuntimeGPU::emitMessageClause
llvm::Value * emitMessageClause(CodeGenFunction &CGF, const Expr *Message, SourceLocation Loc) override
Definition CGOpenMPRuntimeGPU.cpp:904

clang::CodeGen::CGOpenMPRuntimeGPU::emitParallelCall
void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, const Expr *Message=nullptr) override
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
Definition CGOpenMPRuntimeGPU.cpp:1231

clang::CodeGen::CGOpenMPRuntimeGPU::emitNumThreadsClause
void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc, OpenMPNumThreadsClauseModifier Modifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, SourceLocation SeverityLoc=SourceLocation(), const Expr *Message=nullptr, SourceLocation MessageLoc=SourceLocation()) override
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32global_tid, kmp_int32 num_threads) ...
Definition CGOpenMPRuntimeGPU.cpp:920

clang::CodeGen::CGOpenMPRuntimeGPU::adjustTargetSpecificDataForLambdas
void adjustTargetSpecificDataForLambdas(CodeGenFunction &CGF, const OMPExecutableDirective &D) const override
Adjust some parameters for the target-based directives, like addresses of the variables captured by r...
Definition CGOpenMPRuntimeGPU.cpp:2175

clang::CodeGen::CGOpenMPRuntime::emitThreadIDAddress
virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc)
Emits address of the word in a memory where current thread id is stored.
Definition CGOpenMPRuntime.cpp:1931

clang::CodeGen::CGOpenMPRuntime::CGOpenMPRuntime
CGOpenMPRuntime(CodeGenModule &CGM)
Definition CGOpenMPRuntime.cpp:1032

clang::CodeGen::CGOpenMPRuntime::CGM
CodeGenModule & CGM
Definition CGOpenMPRuntime.h:310

clang::CodeGen::CGOpenMPRuntime::getSingleCompoundChild
static const Stmt * getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body)
Checks if the Body is the CompoundStmt and returns its child statement iff there is only one that is ...
Definition CGOpenMPRuntime.cpp:6271

clang::CodeGen::CGOpenMPRuntime::emitUpdateLocation
llvm::Value * emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags=0, bool EmitLoc=false)
Emits object of ident_t type with info for source location.
Definition CGOpenMPRuntime.cpp:1371

clang::CodeGen::CGOpenMPRuntime::functionFinished
virtual void functionFinished(CodeGenFunction &CGF)
Cleans up references to the objects in finished function.
Definition CGOpenMPRuntime.cpp:1468

clang::CodeGen::CGOpenMPRuntime::emitTeamsOutlinedFunction
virtual llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP teams directive D.
Definition CGOpenMPRuntime.cpp:1272

clang::CodeGen::CGOpenMPRuntime::OMPBuilder
llvm::OpenMPIRBuilder OMPBuilder
An OpenMP-IR-Builder instance.
Definition CGOpenMPRuntime.h:313

clang::CodeGen::CGOpenMPRuntime::emitTargetOutlinedFunctionHelper
virtual void emitTargetOutlinedFunctionHelper(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Helper to emit outlined function for 'target' directive.
Definition CGOpenMPRuntime.cpp:6226

clang::CodeGen::CGOpenMPRuntime::hasRequiresUnifiedSharedMemory
bool hasRequiresUnifiedSharedMemory() const
Return whether the unified_shared_memory has been specified.
Definition CGOpenMPRuntime.cpp:10726

clang::CodeGen::CGOpenMPRuntime::processRequiresDirective
virtual void processRequiresDirective(const OMPRequiresDecl *D)
Perform check on requires decl to ensure that target architecture supports unified addressing.
Definition CGOpenMPRuntime.cpp:10673

clang::CodeGen::CGOpenMPRuntime::getThreadID
llvm::Value * getThreadID(CodeGenFunction &CGF, SourceLocation Loc)
Gets thread id value for the current thread.
Definition CGOpenMPRuntime.cpp:1400

clang::CodeGen::CGOpenMPRuntime::computeMinAndMaxThreadsAndTeams
void computeMinAndMaxThreadsAndTeams(const OMPExecutableDirective &D, CodeGenFunction &CGF, llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs)
Helper to determine the min/max number of threads/teams for D.
Definition CGOpenMPRuntime.cpp:6187

clang::CodeGen::CGOpenMPRuntime::getDefaultFlagsForBarriers
static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind)
Returns default flags for the barriers depending on the directive, for which this barier is going to ...
Definition CGOpenMPRuntime.cpp:2294

clang::CodeGen::CGOpenMPRuntime::emitParallelOutlinedFunction
virtual llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP parallel directive D.
Definition CGOpenMPRuntime.cpp:1262

clang::CodeGen::CGOpenMPRuntime::getDefaultDistScheduleAndChunk
virtual void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind, llvm::Value *&Chunk) const
Choose default schedule type and chunk value for the dist_schedule clause.
Definition CGOpenMPRuntime.h:1555

clang::CodeGen::CGOpenMPRuntime::getIdentTyPointerTy
llvm::Type * getIdentTyPointerTy()
Returns pointer to ident_t type.
Definition CGOpenMPRuntime.cpp:1488

clang::CodeGen::CGOpenMPRuntime::emitSingleReductionCombiner
void emitSingleReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp, const Expr *PrivateRef, const DeclRefExpr *LHS, const DeclRefExpr *RHS)
Emits single reduction combiner.
Definition CGOpenMPRuntime.cpp:4926

clang::CodeGen::CGOpenMPRuntime::getOMPBuilder
llvm::OpenMPIRBuilder & getOMPBuilder()
Definition CGOpenMPRuntime.h:307

clang::CodeGen::CGOpenMPRuntime::emitCriticalRegion
virtual void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr)
Emits a critical region.
Definition CGOpenMPRuntime.cpp:1993

clang::CodeGen::CGOpenMPRuntime::emitOutlinedFunctionCall
virtual void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef< llvm::Value * > Args={}) const
Emits call of the outlined function with the provided arguments, translating these arguments to corre...
Definition CGOpenMPRuntime.cpp:11866

clang::CodeGen::CGOpenMPRuntime::emitReduction
virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options)
Emit a code for reduction clause.
Definition CGOpenMPRuntime.cpp:5190

clang::CodeGen::CodeGenFunction::CGCapturedStmtInfo::getKind
CapturedRegionKind getKind() const
Definition CodeGenFunction.h:478

clang::CodeGen::CodeGenFunction::CGCapturedStmtInfo::isCXXThisExprCaptured
bool isCXXThisExprCaptured() const
Definition CodeGenFunction.h:489

clang::CodeGen::CodeGenFunction::OMPPrivateScope
The scope used to remap some variables as private in the OpenMP loop body (or other captured region e...
Definition CodeGenFunction.h:1201

clang::CodeGen::CodeGenFunction
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
Definition CodeGenFunction.h:247

clang::CodeGen::CodeGenFunction::EmitLoadOfReferenceLValue
LValue EmitLoadOfReferenceLValue(LValue RefLVal)
Definition CGExpr.cpp:3221

clang::CodeGen::CodeGenFunction::CapturedStmtInfo
CGCapturedStmtInfo * CapturedStmtInfo
Definition CodeGenFunction.h:522

clang::CodeGen::CodeGenFunction::createBasicBlock
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
Definition CodeGenFunction.h:2617

clang::CodeGen::CodeGenFunction::EmitLoadOfPointer
Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
Load a pointer with type PtrTy stored at address Ptr.
Definition CGExpr.cpp:3230

clang::CodeGen::CodeGenFunction::MakeNaturalAlignPointeeRawAddrLValue
LValue MakeNaturalAlignPointeeRawAddrLValue(llvm::Value *V, QualType T)
Same as MakeNaturalAlignPointeeAddrLValue except that the pointer is known to be unsigned.
Definition CodeGenFunction.cpp:230

clang::CodeGen::CodeGenFunction::AllocaInsertPt
llvm::AssertingVH< llvm::Instruction > AllocaInsertPt
AllocaInsertPoint - This is an instruction in the entry block before which we prefer to insert alloca...
Definition CodeGenFunction.h:423

clang::CodeGen::CodeGenFunction::SourceLocToDebugLoc
llvm::DebugLoc SourceLocToDebugLoc(SourceLocation Location)
Converts Location to a DebugLoc, if debug information is enabled.
Definition CodeGenFunction.cpp:3241

clang::CodeGen::CodeGenFunction::CreateDefaultAlignTempAlloca
RawAddress CreateDefaultAlignTempAlloca(llvm::Type *Ty, const Twine &Name="tmp")
CreateDefaultAlignedTempAlloca - This creates an alloca with the default ABI alignment of the given L...
Definition CGExpr.cpp:175

clang::CodeGen::CodeGenFunction::getTarget
const TargetInfo & getTarget() const
Definition CodeGenFunction.h:2192

clang::CodeGen::CodeGenFunction::StartFunction
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
Definition CodeGenFunction.cpp:753

clang::CodeGen::CodeGenFunction::EvaluateExprAsBool
llvm::Value * EvaluateExprAsBool(const Expr *E)
EvaluateExprAsBool - Perform the usual unary conversions on the specified expression and compare the ...
Definition CGExpr.cpp:224

clang::CodeGen::CodeGenFunction::HaveInsertPoint
bool HaveInsertPoint() const
HaveInsertPoint - True if an insertion point is defined.
Definition CodeGenFunction.h:2658

clang::CodeGen::CodeGenFunction::getDebugInfo
CGDebugInfo * getDebugInfo()
Definition CodeGenFunction.h:2150

clang::CodeGen::CodeGenFunction::getTypeSize
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
Definition CGStmtOpenMP.cpp:384

clang::CodeGen::CodeGenFunction::EmitLValueForFieldInitialization
LValue EmitLValueForFieldInitialization(LValue Base, const FieldDecl *Field)
EmitLValueForFieldInitialization - Like EmitLValueForField, except that if the Field is a reference,...
Definition CGExpr.cpp:5617

clang::CodeGen::CodeGenFunction::Builder
CGBuilderTy Builder
Definition CodeGenFunction.h:286

clang::CodeGen::CodeGenFunction::EHStack
EHScopeStack EHStack
Definition CodeGenFunction.h:653

clang::CodeGen::CodeGenFunction::getContext
ASTContext & getContext() const
Definition CodeGenFunction.h:2149

clang::CodeGen::CodeGenFunction::EmitLoadOfScalar
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
Definition CodeGenFunction.h:4316

clang::CodeGen::CodeGenFunction::EmitRuntimeCall
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")

clang::CodeGen::CodeGenFunction::ConvertTypeForMem
llvm::Type * ConvertTypeForMem(QualType T)
Definition CodeGenFunction.cpp:236

clang::CodeGen::CodeGenFunction::getTypes
CodeGenTypes & getTypes() const
Definition CodeGenFunction.h:2148

clang::CodeGen::CodeGenFunction::getEvaluationKind
static TypeEvaluationKind getEvaluationKind(QualType T)
getEvaluationKind - Return the TypeEvaluationKind of QualType T.
Definition CodeGenFunction.cpp:249

clang::CodeGen::CodeGenFunction::CGM
CodeGenModule & CGM
Definition CodeGenFunction.h:278

clang::CodeGen::CodeGenFunction::EmitBranch
void EmitBranch(llvm::BasicBlock *Block)
EmitBranch - Emit a branch to the specified basic block from the current insert block,...
Definition CGStmt.cpp:675

clang::CodeGen::CodeGenFunction::CreateMemTemp
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
Definition CGExpr.cpp:187

clang::CodeGen::CodeGenFunction::CurFn
llvm::Function * CurFn
Definition CodeGenFunction.h:347

clang::CodeGen::CodeGenFunction::MakeAddrLValue
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
Definition CodeGenFunction.h:2708

clang::CodeGen::CodeGenFunction::FinishFunction
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
Definition CodeGenFunction.cpp:363

clang::CodeGen::CodeGenFunction::LoadCXXThis
llvm::Value * LoadCXXThis()
LoadCXXThis - Load the value of 'this'.
Definition CodeGenFunction.h:3122

clang::CodeGen::CodeGenFunction::GetAddrOfLocalVar
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
Definition CodeGenFunction.h:3034

clang::CodeGen::CodeGenFunction::getLLVMContext
llvm::LLVMContext & getLLVMContext()
Definition CodeGenFunction.h:2193

clang::CodeGen::CodeGenFunction::EmitScalarConversion
llvm::Value * EmitScalarConversion(llvm::Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc)
Emit a conversion from the specified type to the specified destination type, both of which are LLVM s...
Definition CGExprScalar.cpp:5895

clang::CodeGen::CodeGenFunction::EmitStoreOfScalar
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
Definition CodeGenFunction.h:4338

clang::CodeGen::CodeGenFunction::EmitBlock
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
Definition CGStmt.cpp:655

clang::CodeGen::CodeGenModule
This class organizes the cross-function state that is used while generating LLVM code.
Definition CodeGenModule.h:326

clang::CodeGen::CodeGenModule::SetInternalFunctionAttributes
void SetInternalFunctionAttributes(GlobalDecl GD, llvm::Function *F, const CGFunctionInfo &FI)
Set the attributes on the LLVM function for the given decl and function info.
Definition CodeGenModule.cpp:3034

clang::CodeGen::CodeGenModule::getModule
llvm::Module & getModule() const
Definition CodeGenModule.h:831

clang::CodeGen::CodeGenModule::getTypes
CodeGenTypes & getTypes()
Definition CodeGenModule.h:849

clang::CodeGen::CodeGenModule::getTarget
const TargetInfo & getTarget() const
Definition CodeGenModule.h:836

clang::CodeGen::CodeGenModule::getOpenMPRuntime
CGOpenMPRuntime & getOpenMPRuntime()
Return a reference to the configured OpenMP runtime.
Definition CodeGenModule.h:729

clang::CodeGen::CodeGenModule::getContext
ASTContext & getContext() const
Definition CodeGenModule.h:821

clang::CodeGen::CodeGenModule::getLLVMContext
llvm::LLVMContext & getLLVMContext()
Definition CodeGenModule.h:843

clang::CodeGen::CodeGenTypes::GetFunctionType
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
Definition CGCall.cpp:1701

clang::CodeGen::CodeGenTypes::arrangeBuiltinFunctionDeclaration
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
Definition CGCall.cpp:739

clang::CodeGen::CodeGenTypes::getTargetAddressSpace
unsigned getTargetAddressSpace(QualType T) const
Definition CodeGenTypes.cpp:893

clang::CodeGen::FunctionArgList
FunctionArgList - Type for representing both the decl and type of parameters to a function.
Definition CGCall.h:375

clang::CodeGen::LValueBaseInfo
Definition CGValue.h:165

clang::CodeGen::LValue
LValue - This represents an lvalue references.
Definition CGValue.h:182

clang::CodeGen::LValue::getAddress
Address getAddress() const
Definition CGValue.h:361

clang::CodeGen::PrePostActionTy
A basic class for pre|post-action for advanced codegen sequence for OpenMP region.
Definition CGOpenMPRuntime.h:58

clang::CodeGen::RawAddress
An abstract representation of an aligned address.
Definition Address.h:42

clang::CodeGen::RawAddress::getPointer
llvm::Value * getPointer() const
Definition Address.h:66

clang::CodeGen::RegionCodeGenTy
Class provides a way to call simple version of codegen for OpenMP region, or an advanced with possibl...
Definition CGOpenMPRuntime.h:68

clang::CodeGen::RegionCodeGenTy::setAction
void setAction(PrePostActionTy &Action) const
Definition CGOpenMPRuntime.h:89

clang::ConstStmtVisitor
ConstStmtVisitor - This class implements a simple visitor for Stmt subclasses.
Definition StmtVisitor.h:196

clang::DeclContext
DeclContext - This is used only as base class of specific decl types that can act as declaration cont...
Definition DeclBase.h:1449

clang::DeclContext::addDecl
void addDecl(Decl *D)
Add the declaration D into this context.
Definition DeclBase.cpp:1816

clang::DeclRefExpr::getDecl
ValueDecl * getDecl()
Definition Expr.h:1338

clang::DeclStmt::decls
decl_range decls()
Definition Stmt.h:1659

clang::Decl::getAttr
T * getAttr() const
Definition DeclBase.h:573

clang::Decl::hasAttrs
bool hasAttrs() const
Definition DeclBase.h:518

clang::Decl::attr_end
attr_iterator attr_end() const
Definition DeclBase.h:542

clang::Decl::isCanonicalDecl
bool isCanonicalDecl() const
Whether this particular Decl is a canonical one.
Definition DeclBase.h:984

clang::Decl::attr_begin
attr_iterator attr_begin() const
Definition DeclBase.h:539

clang::Decl::getLocation
SourceLocation getLocation() const
Definition DeclBase.h:439

clang::Decl::getDeclContext
DeclContext * getDeclContext()
Definition DeclBase.h:448

clang::Decl::getBeginLoc
SourceLocation getBeginLoc() const LLVM_READONLY
Definition DeclBase.h:431

clang::Decl::getAttrs
AttrVec & getAttrs()
Definition DeclBase.h:524

clang::Decl::hasAttr
bool hasAttr() const
Definition DeclBase.h:577

clang::Decl::getCanonicalDecl
virtual Decl * getCanonicalDecl()
Retrieves the "canonical" declaration of the given declaration.
Definition DeclBase.h:978

clang::DeclaratorDecl::getBeginLoc
SourceLocation getBeginLoc() const LLVM_READONLY
Definition Decl.h:831

clang::Expr
This represents one expression.
Definition Expr.h:112

clang::Expr::IgnoreParenImpCasts
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
Definition Expr.cpp:3085

clang::Expr::IgnoreParens
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point.
Definition Expr.cpp:3081

clang::Expr::isLValue
bool isLValue() const
isLValue - True if this expression is an "l-value" according to the rules of the current language.
Definition Expr.h:284

clang::FieldDecl
Represents a member of a struct/union/class.
Definition Decl.h:3160

clang::FieldDecl::Create
static FieldDecl * Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, Expr *BW, bool Mutable, InClassInitStyle InitStyle)
Definition Decl.cpp:4692

clang::GlobalDecl
GlobalDecl - represents a global declaration.
Definition GlobalDecl.h:57

clang::ImplicitParamDecl
Definition Decl.h:1749

clang::ImplicitParamDecl::Create
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
Definition Decl.cpp:5525

clang::IntegerLiteral::Create
static IntegerLiteral * Create(const ASTContext &C, const llvm::APInt &V, QualType type, SourceLocation l)
Returns a new integer literal with value 'V' and type 'type'.
Definition Expr.cpp:971

clang::LambdaCapture
Describes the capture of a variable or of this, or of a C++1y init-capture.
Definition LambdaCapture.h:25

clang::LambdaExpr::isInitCapture
bool isInitCapture(const LambdaCapture *Capture) const
Determine whether one of this lambda's captures is an init-capture.
Definition ExprCXX.cpp:1358

clang::LambdaExpr::captures
capture_range captures() const
Retrieve this lambda's captures.
Definition ExprCXX.cpp:1371

clang::NamedDecl::getIdentifier
IdentifierInfo * getIdentifier() const
Get the identifier that names this declaration, if there is one.
Definition Decl.h:295

clang::NamedDecl::getName
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition Decl.h:301

clang::OMPClause
This is a basic class for representing single OpenMP clause.
Definition OpenMPClause.h:55

clang::OMPLastprivateClause
This represents clause 'lastprivate' in the 'pragma omp ...' directives.
Definition OpenMPClause.h:3518

clang::OMPReductionClause
This represents clause 'reduction' in the 'pragma omp ...' directives.
Definition OpenMPClause.h:3829

clang::OMPRequiresDecl
This represents 'pragma omp requires...' directive.
Definition DeclOpenMP.h:479

clang::OMPRequiresDecl::clauselists
clauselist_range clauselists()
Definition DeclOpenMP.h:504

clang::OMPXBareClause
This represents 'ompx_bare' clause in the 'pragma omp target teams ...' directive.
Definition OpenMPClause.h:10173

clang::ParmVarDecl::Create
static ParmVarDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, StorageClass S, Expr *DefArg)
Definition Decl.cpp:2946

clang::PointerType
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition TypeBase.h:3328

clang::QualType
A (possibly-)qualified type.
Definition TypeBase.h:937

clang::QualType::getAddressSpace
LangAS getAddressSpace() const
Return the address space of this type.
Definition TypeBase.h:8416

clang::QualType::getNonReferenceType
QualType getNonReferenceType() const
If Type is a reference type (e.g., const int&), returns the type that the reference refers to ("const...
Definition TypeBase.h:8475

clang::QualType::getCanonicalType
QualType getCanonicalType() const
Definition TypeBase.h:8342

clang::QualifierCollector
A qualifier set is used to build a set of qualifiers.
Definition TypeBase.h:8230

clang::QualifierCollector::strip
const Type * strip(QualType type)
Collect any qualifiers on the given type and return an unqualified type.
Definition TypeBase.h:8237

clang::QualifierCollector::apply
QualType apply(const ASTContext &Context, QualType QT) const
Apply the collected qualifiers to the given type.
Definition Type.cpp:4660

clang::Qualifiers::addAddressSpace
void addAddressSpace(LangAS space)
Definition TypeBase.h:597

clang::Qualifiers::addRestrict
void addRestrict()
Definition TypeBase.h:480

clang::RecordDecl
Represents a struct/union/class.
Definition Decl.h:4312

clang::RecordDecl::completeDefinition
virtual void completeDefinition()
Note that the definition of this type is now complete.
Definition Decl.cpp:5221

clang::Scope
Scope - A scope is a transient data structure that is used while parsing the program.
Definition Scope.h:41

clang::SourceLocation
Encodes a location in the source.
Definition SourceLocation.h:90

clang::Stmt
Stmt - This represents one statement.
Definition Stmt.h:85

clang::Stmt::children
child_range children()
Definition Stmt.cpp:295

clang::TagDecl::startDefinition
void startDefinition()
Starts the definition of this tag declaration.
Definition Decl.cpp:4898

clang::TargetInfo::getNewAlign
unsigned getNewAlign() const
Return the largest alignment for which a suitably-sized allocation with 'operator new(size_t)' is gua...
Definition TargetInfo.h:761

clang::TargetInfo::getTargetOpts
TargetOptions & getTargetOpts() const
Retrieve the target options.
Definition TargetInfo.h:323

clang::TargetInfo::getGridValue
virtual const llvm::omp::GV & getGridValue() const
Definition TargetInfo.h:1716

clang::TargetInfo::hasFeature
virtual bool hasFeature(StringRef Feature) const
Determine whether the given target has the given feature.
Definition TargetInfo.h:1549

clang::TargetOptions::FeatureMap
llvm::StringMap< bool > FeatureMap
The map of which features have been enabled disabled based on the command line.
Definition TargetOptions.h:62

clang::Type
The base class of the type hierarchy.
Definition TypeBase.h:1833

clang::Type::getAsCXXRecordDecl
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
Definition Type.h:26

clang::Type::isIntegerType
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition TypeBase.h:8927

clang::Type::isReferenceType
bool isReferenceType() const
Definition TypeBase.h:8551

clang::Type::isLValueReferenceType
bool isLValueReferenceType() const
Definition TypeBase.h:8555

clang::Type::hasSignedIntegerRepresentation
bool hasSignedIntegerRepresentation() const
Determine whether this type has an signed integer representation of some sort, e.g....
Definition Type.cpp:2243

clang::Type::isVariablyModifiedType
bool isVariablyModifiedType() const
Whether this type is a variably-modified type (C99 6.7.5).
Definition TypeBase.h:2800

clang::UnaryOperator::getSubExpr
Expr * getSubExpr() const
Definition Expr.h:2285

clang::UnaryOperator::getOpcode
Opcode getOpcode() const
Definition Expr.h:2280

clang::ValueDecl
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition Decl.h:712

clang::ValueDecl::getType
QualType getType() const
Definition Decl.h:723

clang::ValueDecl::isInitCapture
bool isInitCapture() const
Whether this variable is the implicit variable for a lambda init-capture.
Definition Decl.cpp:5510

clang::VarDecl
Represents a variable declaration or definition.
Definition Decl.h:926

clang::VarDecl::getCanonicalDecl
VarDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition Decl.cpp:2257

clang::VarDecl::isInitCapture
bool isInitCapture() const
Whether this variable is the implicit variable for a lambda init-capture.
Definition Decl.h:1578

clang::specific_attr_iterator
specific_attr_iterator - Iterates over a subrange of an AttrVec, only providing attributes that are o...
Definition AttrIterator.h:36

llvm::ArrayRef
Definition LLVM.h:31

llvm::SmallPtrSet
Definition ASTContext.h:54

llvm::SmallString
Definition LLVM.h:34

llvm::SmallVectorImpl
Definition Randstruct.h:18

llvm::SmallVector
Definition LLVM.h:35

Cuda.h

clang::CodeGen
Definition CGFunctionInfo.h:28

clang::CodeGen::AlignmentSource::Type
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
Definition CGValue.h:154

clang::CodeGen::AlignmentSource::Decl
@ Decl
The l-value was an access to a declared entity or something equivalently strong, like the address of ...
Definition CGValue.h:145

clang::CodeGen::TEK_Aggregate
@ TEK_Aggregate
Definition CodeGenFunction.h:115

clang::CodeGen::TEK_Scalar
@ TEK_Scalar
Definition CodeGenFunction.h:113

clang::CodeGen::TEK_Complex
@ TEK_Complex
Definition CodeGenFunction.h:114

clang::CodeGen::NormalAndEHCleanup
@ NormalAndEHCleanup
Definition EHScopeStack.h:86

clang::format::Base
Base
Definition IntegerLiteralSeparatorFixer.cpp:20

clang::index::SymbolKind::Field
@ Field
Definition IndexSymbol.h:42

clang::interp::Storage::Fn
@ Fn
Definition Pointer.h:59

clang
The JSON file list parser is used to communicate input to InstallAPI.
Definition CalledOnceCheck.h:17

clang::CanQualType
CanQual< Type > CanQualType
Represents a canonical, potentially-qualified type.
Definition CanonicalType.h:213

clang::OpenACCDirectiveKind::Data
@ Data
Definition OpenACCKinds.h:37

clang::isa
bool isa(CodeGen::Address addr)
Definition Address.h:330

clang::LinkageSpecLanguageIDs::C
@ C
Definition DeclCXX.h:3007

clang::ICIS_NoInit
@ ICIS_NoInit
No in-class initializer.
Definition Specifiers.h:272

clang::OffloadArch
OffloadArch
Definition OffloadArch.h:18

clang::OffloadArch::GFX810
@ GFX810
Definition OffloadArch.h:67

clang::OffloadArch::GFX600
@ GFX600
Definition OffloadArch.h:54

clang::OffloadArch::GFX1031
@ GFX1031
Definition OffloadArch.h:87

clang::OffloadArch::GFX701
@ GFX701
Definition OffloadArch.h:58

clang::OffloadArch::GFX1035
@ GFX1035
Definition OffloadArch.h:91

clang::OffloadArch::GFX904
@ GFX904
Definition OffloadArch.h:71

clang::OffloadArch::GFX1012
@ GFX1012
Definition OffloadArch.h:83

clang::OffloadArch::GFX702
@ GFX702
Definition OffloadArch.h:59

clang::OffloadArch::GFX1250
@ GFX1250
Definition OffloadArch.h:105

clang::OffloadArch::GFX1152
@ GFX1152
Definition OffloadArch.h:100

clang::OffloadArch::GFX10_1_GENERIC
@ GFX10_1_GENERIC
Definition OffloadArch.h:80

clang::OffloadArch::GFX11_GENERIC
@ GFX11_GENERIC
Definition OffloadArch.h:93

clang::OffloadArch::GFX805
@ GFX805
Definition OffloadArch.h:66

clang::OffloadArch::GFX602
@ GFX602
Definition OffloadArch.h:56

clang::OffloadArch::SM_120
@ SM_120
Definition OffloadArch.h:50

clang::OffloadArch::GRANITERAPIDS
@ GRANITERAPIDS
Definition OffloadArch.h:111

clang::OffloadArch::SM_21
@ SM_21
Definition OffloadArch.h:23

clang::OffloadArch::AMDGCNSPIRV
@ AMDGCNSPIRV
Definition OffloadArch.h:107

clang::OffloadArch::GFX1010
@ GFX1010
Definition OffloadArch.h:81

clang::OffloadArch::SM_72
@ SM_72
Definition OffloadArch.h:36

clang::OffloadArch::GFX1034
@ GFX1034
Definition OffloadArch.h:90

clang::OffloadArch::GFX90a
@ GFX90a
Definition OffloadArch.h:75

clang::OffloadArch::UNUSED
@ UNUSED
Definition OffloadArch.h:19

clang::OffloadArch::SM_61
@ SM_61
Definition OffloadArch.h:33

clang::OffloadArch::GFX1036
@ GFX1036
Definition OffloadArch.h:92

clang::OffloadArch::GFX1201
@ GFX1201
Definition OffloadArch.h:104

clang::OffloadArch::GFX1011
@ GFX1011
Definition OffloadArch.h:82

clang::OffloadArch::GFX703
@ GFX703
Definition OffloadArch.h:60

clang::OffloadArch::GFX9_4_GENERIC
@ GFX9_4_GENERIC
Definition OffloadArch.h:77

clang::OffloadArch::SM_101
@ SM_101
Definition OffloadArch.h:46

clang::OffloadArch::UNKNOWN
@ UNKNOWN
Definition OffloadArch.h:20

clang::OffloadArch::SM_52
@ SM_52
Definition OffloadArch.h:30

clang::OffloadArch::SM_50
@ SM_50
Definition OffloadArch.h:29

clang::OffloadArch::SM_75
@ SM_75
Definition OffloadArch.h:37

clang::OffloadArch::SM_89
@ SM_89
Definition OffloadArch.h:41

clang::OffloadArch::GFX1100
@ GFX1100
Definition OffloadArch.h:94

clang::OffloadArch::SM_53
@ SM_53
Definition OffloadArch.h:31

clang::OffloadArch::GFX1200
@ GFX1200
Definition OffloadArch.h:103

clang::OffloadArch::GFX1032
@ GFX1032
Definition OffloadArch.h:88

clang::OffloadArch::SM_101a
@ SM_101a
Definition OffloadArch.h:47

clang::OffloadArch::SM_35
@ SM_35
Definition OffloadArch.h:27

clang::OffloadArch::SM_60
@ SM_60
Definition OffloadArch.h:32

clang::OffloadArch::GFX12_GENERIC
@ GFX12_GENERIC
Definition OffloadArch.h:102

clang::OffloadArch::Generic
@ Generic
Definition OffloadArch.h:108

clang::OffloadArch::GFX942
@ GFX942
Definition OffloadArch.h:78

clang::OffloadArch::GFX1151
@ GFX1151
Definition OffloadArch.h:99

clang::OffloadArch::SM_20
@ SM_20
Definition OffloadArch.h:22

clang::OffloadArch::BMG_G21
@ BMG_G21
Definition OffloadArch.h:113

clang::OffloadArch::SM_121a
@ SM_121a
Definition OffloadArch.h:53

clang::OffloadArch::SM_86
@ SM_86
Definition OffloadArch.h:39

clang::OffloadArch::GFX1103
@ GFX1103
Definition OffloadArch.h:97

clang::OffloadArch::SM_87
@ SM_87
Definition OffloadArch.h:40

clang::OffloadArch::GFX909
@ GFX909
Definition OffloadArch.h:74

clang::OffloadArch::SM_62
@ SM_62
Definition OffloadArch.h:34

clang::OffloadArch::GFX704
@ GFX704
Definition OffloadArch.h:61

clang::OffloadArch::GFX1251
@ GFX1251
Definition OffloadArch.h:106

clang::OffloadArch::SM_80
@ SM_80
Definition OffloadArch.h:38

clang::OffloadArch::SM_100
@ SM_100
Definition OffloadArch.h:44

clang::OffloadArch::GFX803
@ GFX803
Definition OffloadArch.h:65

clang::OffloadArch::GFX902
@ GFX902
Definition OffloadArch.h:70

clang::OffloadArch::SM_100a
@ SM_100a
Definition OffloadArch.h:45

clang::OffloadArch::GFX700
@ GFX700
Definition OffloadArch.h:57

clang::OffloadArch::GFX950
@ GFX950
Definition OffloadArch.h:79

clang::OffloadArch::GFX1033
@ GFX1033
Definition OffloadArch.h:89

clang::OffloadArch::GFX10_3_GENERIC
@ GFX10_3_GENERIC
Definition OffloadArch.h:85

clang::OffloadArch::GFX601
@ GFX601
Definition OffloadArch.h:55

clang::OffloadArch::SM_103
@ SM_103
Definition OffloadArch.h:48

clang::OffloadArch::SM_90
@ SM_90
Definition OffloadArch.h:42

clang::OffloadArch::SM_30
@ SM_30
Definition OffloadArch.h:24

clang::OffloadArch::SM_70
@ SM_70
Definition OffloadArch.h:35

clang::OffloadArch::GFX1150
@ GFX1150
Definition OffloadArch.h:98

clang::OffloadArch::GFX1153
@ GFX1153
Definition OffloadArch.h:101

clang::OffloadArch::SM_90a
@ SM_90a
Definition OffloadArch.h:43

clang::OffloadArch::GFX1013
@ GFX1013
Definition OffloadArch.h:84

clang::OffloadArch::GFX900
@ GFX900
Definition OffloadArch.h:69

clang::OffloadArch::GFX801
@ GFX801
Definition OffloadArch.h:63

clang::OffloadArch::SM_103a
@ SM_103a
Definition OffloadArch.h:49

clang::OffloadArch::SM_121
@ SM_121
Definition OffloadArch.h:52

clang::OffloadArch::SM_37
@ SM_37
Definition OffloadArch.h:28

clang::OffloadArch::GFX705
@ GFX705
Definition OffloadArch.h:62

clang::OffloadArch::GFX1101
@ GFX1101
Definition OffloadArch.h:95

clang::OffloadArch::GFX9_GENERIC
@ GFX9_GENERIC
Definition OffloadArch.h:68

clang::OffloadArch::GFX90c
@ GFX90c
Definition OffloadArch.h:76

clang::OffloadArch::GFX906
@ GFX906
Definition OffloadArch.h:72

clang::OffloadArch::SM_120a
@ SM_120a
Definition OffloadArch.h:51

clang::OffloadArch::LAST
@ LAST
Definition OffloadArch.h:114

clang::OffloadArch::SM_32_
@ SM_32_
Definition OffloadArch.h:26

clang::OffloadArch::GFX1030
@ GFX1030
Definition OffloadArch.h:86

clang::OffloadArch::GFX802
@ GFX802
Definition OffloadArch.h:64

clang::OffloadArch::GFX1102
@ GFX1102
Definition OffloadArch.h:96

clang::OffloadArch::GFX908
@ GFX908
Definition OffloadArch.h:73

clang::isOpenMPDistributeDirective
bool isOpenMPDistributeDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a distribute directive.
Definition OpenMPKinds.cpp:717

clang::LCK_ByRef
@ LCK_ByRef
Capturing by reference.
Definition Lambda.h:37

clang::OpenACCClauseKind::Private
@ Private
'private' clause, allowed on 'parallel', 'serial', 'loop', 'parallel loop', and 'serial loop' constru...
Definition OpenACCKinds.h:309

clang::AS_public
@ AS_public
Definition Specifiers.h:124

clang::CR_OpenMP
@ CR_OpenMP
Definition CapturedStmt.h:19

clang::isOpenMPParallelDirective
bool isOpenMPParallelDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a parallel-kind directive.
Definition OpenMPKinds.cpp:665

clang::isOpenMPPrivate
bool isOpenMPPrivate(OpenMPClauseKind Kind)
Checks if the specified clause is one of private clauses like 'private', 'firstprivate',...
Definition OpenMPKinds.cpp:729

clang::SC_None
@ SC_None
Definition Specifiers.h:250

clang::OpenMPDistScheduleClauseKind
OpenMPDistScheduleClauseKind
OpenMP attributes for 'dist_schedule' clause.
Definition OpenMPKinds.h:104

clang::isOpenMPTargetExecutionDirective
bool isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target code offload directive.
Definition OpenMPKinds.cpp:672

clang::DeclaratorContext::LambdaExpr
@ LambdaExpr
Definition DeclSpec.h:1842

clang::isOpenMPTeamsDirective
bool isOpenMPTeamsDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a teams-kind directive.
Definition OpenMPKinds.cpp:694

clang::ArraySizeModifier::Normal
@ Normal
Definition TypeBase.h:3719

clang::StringToOffloadArch
OffloadArch StringToOffloadArch(llvm::StringRef S)
Definition OffloadArch.cpp:126

clang::OpenMPSeverityClauseKind
OpenMPSeverityClauseKind
OpenMP attributes for 'severity' clause.
Definition OpenMPKinds.h:150

clang::isOpenMPLoopBoundSharingDirective
bool isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind)
Checks if the specified directive kind is one of the composite or combined directives that need loop ...
Definition OpenMPKinds.cpp:744

clang::LangAS
LangAS
Defines the address space values used by the address space qualifier of QualType.
Definition AddressSpaces.h:25

clang::LangAS::opencl_global
@ opencl_global
Definition AddressSpaces.h:34

clang::LangAS::cuda_shared
@ cuda_shared
Definition AddressSpaces.h:45

clang::LangAS::cuda_constant
@ cuda_constant
Definition AddressSpaces.h:44

clang::LangAS::Default
@ Default
Definition AddressSpaces.h:28

clang::OffloadArchToString
const char * OffloadArchToString(OffloadArch A)
Definition OffloadArch.cpp:106

clang::OpenMPDirectiveKind
llvm::omp::Directive OpenMPDirectiveKind
OpenMP directives.
Definition OpenMPKinds.h:25

clang::getOpenMPCaptureRegions
void getOpenMPCaptureRegions(llvm::SmallVectorImpl< OpenMPDirectiveKind > &CaptureRegions, OpenMPDirectiveKind DKind)
Return the captured regions of an OpenMP directive.
Definition OpenMPKinds.cpp:849

clang::OpenMPNumThreadsClauseModifier
OpenMPNumThreadsClauseModifier
Definition OpenMPKinds.h:233

clang::cast
U cast(CodeGen::Address addr)
Definition Address.h:327

clang::getLangASFromTargetAS
LangAS getLangASFromTargetAS(unsigned TargetAS)
Definition AddressSpaces.h:90

clang::ImplicitParamKind::CXXThis
@ CXXThis
Parameter for C++ 'this' argument.
Definition Decl.h:1734

clang::ImplicitParamKind::Other
@ Other
Other implicit parameter.
Definition Decl.h:1746

clang::OpenMPScheduleClauseKind
OpenMPScheduleClauseKind
OpenMP attributes for 'schedule' clause.
Definition OpenMPKinds.h:31

hlsl::uint64_t
unsigned long uint64_t
Definition hlsl_basic_types.h:42

clang::CodeGen::CGOpenMPRuntime::ReductionOptionsTy
Definition CGOpenMPRuntime.h:1231

clang::CodeGen::CGOpenMPRuntime::ReductionOptionsTy::ReductionKind
OpenMPDirectiveKind ReductionKind
Definition CGOpenMPRuntime.h:1235

clang::CodeGen::CGOpenMPRuntime::ReductionOptionsTy::SimpleReduction
bool SimpleReduction
Definition CGOpenMPRuntime.h:1233

clang::CodeGen::CodeGenTypeCache::VoidPtrTy
llvm::PointerType * VoidPtrTy
Definition CodeGenTypeCache.h:57

clang::CodeGen::CodeGenTypeCache::SizeTy
llvm::IntegerType * SizeTy
Definition CodeGenTypeCache.h:50

clang::CodeGen::CodeGenTypeCache::VoidPtrPtrTy
llvm::PointerType * VoidPtrPtrTy
Definition CodeGenTypeCache.h:59

clang::CodeGen::CodeGenTypeCache::Int32Ty
llvm::IntegerType * Int32Ty
Definition CodeGenTypeCache.h:37

clang::CodeGen::TBAAAccessInfo
Definition CodeGenTBAA.h:42