//===- MapInfoFinalization.cpp -----------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// /// \file /// An OpenMP dialect related pass for FIR/HLFIR which performs some /// pre-processing of MapInfoOp's after the module has been lowered to /// finalize them. /// /// For example, it expands MapInfoOp's containing descriptor related /// types (fir::BoxType's) into multiple MapInfoOp's containing the parent /// descriptor and pointer member components for individual mapping, /// treating the descriptor type as a record type for later lowering in the /// OpenMP dialect. /// /// The pass also adds MapInfoOp's that are members of a parent object but are /// not directly used in the body of a target region to its BlockArgument list /// to maintain consistency across all MapInfoOp's tied to a region directly or /// indirectly via a parent object. //===----------------------------------------------------------------------===// #include "flang/Optimizer/Builder/DirectivesCommon.h" #include "flang/Optimizer/Builder/FIRBuilder.h" #include "flang/Optimizer/Builder/HLFIRTools.h" #include "flang/Optimizer/Dialect/FIRType.h" #include "flang/Optimizer/Dialect/Support/KindMapping.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" #include "flang/Optimizer/OpenMP/Passes.h" #include "mlir/Analysis/SliceAnalysis.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "mlir/IR/BuiltinDialect.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/Operation.h" #include "mlir/IR/SymbolTable.h" #include "mlir/Pass/Pass.h" #include "mlir/Support/LLVM.h" #include "llvm/ADT/BitmaskEnum.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringSet.h" #include "llvm/Support/raw_ostream.h" #include #include #include #include #define DEBUG_TYPE "omp-map-info-finalization" namespace flangomp { #define GEN_PASS_DEF_MAPINFOFINALIZATIONPASS #include "flang/Optimizer/OpenMP/Passes.h.inc" } // namespace flangomp namespace { class MapInfoFinalizationPass : public flangomp::impl::MapInfoFinalizationPassBase< MapInfoFinalizationPass> { /// Helper class tracking a members parent and its /// placement in the parents member list struct ParentAndPlacement { mlir::omp::MapInfoOp parent; size_t index; }; /// Tracks any intermediate function/subroutine local allocations we /// generate for the descriptors of box type dummy arguments, so that /// we can retrieve it for subsequent reuses within the functions /// scope. /// /// descriptor defining op /// | corresponding local alloca /// | | std::map localBoxAllocas; // List of deferrable descriptors to process at the end of // the pass. llvm::SmallVector deferrableDesc; /// Return true if the given path exists in a list of paths. static bool containsPath(const llvm::SmallVectorImpl> &paths, llvm::ArrayRef path) { return llvm::any_of(paths, [&](const llvm::SmallVector &p) { return p.size() == path.size() && std::equal(p.begin(), p.end(), path.begin()); }); } /// Return true if the given path is already present in /// op.getMembersIndexAttr(). static bool mappedIndexPathExists(mlir::omp::MapInfoOp op, llvm::ArrayRef indexPath) { if (mlir::ArrayAttr attr = op.getMembersIndexAttr()) { for (mlir::Attribute list : attr) { auto listAttr = mlir::cast(list); if (listAttr.size() != indexPath.size()) continue; bool allEq = true; for (auto [i, val] : llvm::enumerate(listAttr)) { if (mlir::cast(val).getInt() != indexPath[i]) { allEq = false; break; } } if (allEq) return true; } } return false; } /// Build a compact string key for an index path for set-based /// deduplication. Format: "N:v0,v1,..." where N is the length. static void buildPathKey(llvm::ArrayRef path, llvm::SmallString<64> &outKey) { outKey.clear(); llvm::raw_svector_ostream os(outKey); os << path.size() << ':'; for (size_t i = 0; i < path.size(); ++i) { if (i) os << ','; os << path[i]; } } /// Return true if the module has an OpenMP requires clause that includes /// unified_shared_memory. static bool moduleRequiresUSM(mlir::ModuleOp module) { assert(module && "invalid module"); if (auto req = module->getAttrOfType( "omp.requires")) return mlir::omp::bitEnumContainsAll( req.getValue(), mlir::omp::ClauseRequires::unified_shared_memory); return false; } /// Create the member map for coordRef and append it (and its index /// path) to the provided new* vectors, if it is not already present. void appendMemberMapIfNew( mlir::omp::MapInfoOp op, fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value coordRef, llvm::ArrayRef indexPath, llvm::StringRef memberName, llvm::SmallVectorImpl &newMapOpsForFields, llvm::SmallVectorImpl> &newMemberIndexPaths) { // Local de-dup within this op invocation. if (containsPath(newMemberIndexPaths, indexPath)) return; // Global de-dup against already present member indices. if (mappedIndexPathExists(op, indexPath)) return; if (op.getMapperId()) { mlir::omp::DeclareMapperOp symbol = mlir::SymbolTable::lookupNearestSymbolFrom< mlir::omp::DeclareMapperOp>(op, op.getMapperIdAttr()); assert(symbol && "missing symbol for declare mapper identifier"); mlir::omp::DeclareMapperInfoOp mapperInfo = symbol.getDeclareMapperInfo(); // TODO: Probably a way to cache these keys in someway so we don't // constantly go through the process of rebuilding them on every check, to // save some cycles, but it can wait for a subsequent patch. for (auto v : mapperInfo.getMapVars()) { mlir::omp::MapInfoOp map = mlir::cast(v.getDefiningOp()); if (!map.getMembers().empty() && mappedIndexPathExists(map, indexPath)) return; } } builder.setInsertionPoint(op); fir::factory::AddrAndBoundsInfo info = fir::factory::getDataOperandBaseAddr( builder, coordRef, /*isOptional=*/false, loc); llvm::SmallVector bounds = fir::factory::genImplicitBoundsOps< mlir::omp::MapBoundsOp, mlir::omp::MapBoundsType>( builder, info, hlfir::translateToExtendedValue(loc, builder, hlfir::Entity{coordRef}) .first, /*dataExvIsAssumedSize=*/false, loc); mlir::omp::MapInfoOp fieldMapOp = mlir::omp::MapInfoOp::create( builder, loc, coordRef.getType(), coordRef, mlir::TypeAttr::get(fir::unwrapRefType(coordRef.getType())), op.getMapTypeAttr(), builder.getAttr( mlir::omp::VariableCaptureKind::ByRef), /*varPtrPtr=*/mlir::Value{}, /*members=*/mlir::ValueRange{}, /*members_index=*/mlir::ArrayAttr{}, bounds, /*mapperId=*/mlir::FlatSymbolRefAttr(), builder.getStringAttr(op.getNameAttr().strref() + "." + memberName + ".implicit_map"), /*partial_map=*/builder.getBoolAttr(false)); newMapOpsForFields.emplace_back(fieldMapOp); newMemberIndexPaths.emplace_back(indexPath.begin(), indexPath.end()); } // Check if the declaration operation we have refers to a dummy // function argument. bool isDummyArgument(mlir::Value mappedValue) { if (auto declareOp = mlir::dyn_cast_if_present( mappedValue.getDefiningOp())) if (auto dummyScope = declareOp.getDummyScope()) return true; return false; } // Relevant for OpenMP < 5.2, where attach semantics and rules don't exist. // As descriptors were an unspoken implementation detail in these versions // there's certain cases where the user (and the compiler implementation) // can create data mapping errors by having temporary descriptors stuck // in memory. The main example is calling an 'target enter data map' // without a corresponding exit on an assumed shape or size dummy // argument, a local stack descriptor is generated, gets mapped and // is then left on device. A user doesn't realize what they've done as // the OpenMP specification isn't explicit on descriptor handling in // earlier versions and as far as Fortran is concerned this si something // hidden from a user. To avoid this we can defer the descriptor mapping // in these cases until target or target data regions, when we can be // sure they have a clear limited scope on device. bool canDeferDescriptorMapping(mlir::Value descriptor) { if (fir::isAllocatableType(descriptor.getType()) || fir::isPointerType(descriptor.getType())) return false; if (isDummyArgument(descriptor) && (fir::isAssumedType(descriptor.getType()) || fir::isAssumedShape(descriptor.getType()))) return true; return false; } /// getMemberUserList gathers all users of a particular MapInfoOp that are /// other MapInfoOp's and places them into the mapMemberUsers list, which /// records the map that the current argument MapInfoOp "op" is part of /// alongside the placement of "op" in the recorded users members list. The /// intent of the generated list is to find all MapInfoOp's that may be /// considered parents of the passed in "op" and in which it shows up in the /// member list, alongside collecting the placement information of "op" in its /// parents member list. void getMemberUserList(mlir::omp::MapInfoOp op, llvm::SmallVectorImpl &mapMemberUsers) { for (auto *user : op->getUsers()) if (auto map = mlir::dyn_cast_if_present(user)) for (auto [i, mapMember] : llvm::enumerate(map.getMembers())) if (mapMember.getDefiningOp() == op) mapMemberUsers.push_back({map, i}); } void getAsIntegers(llvm::ArrayRef values, llvm::SmallVectorImpl &ints) { ints.reserve(values.size()); llvm::transform(values, std::back_inserter(ints), [](mlir::Attribute value) { return mlir::cast(value).getInt(); }); } /// This function will expand a MapInfoOp's member indices back into a vector /// so that they can be trivially modified as unfortunately the attribute type /// that's used does not have modifiable fields at the moment (generally /// awkward to work with) void getMemberIndicesAsVectors( mlir::omp::MapInfoOp mapInfo, llvm::SmallVectorImpl> &indices) { indices.reserve(mapInfo.getMembersIndexAttr().getValue().size()); llvm::transform(mapInfo.getMembersIndexAttr().getValue(), std::back_inserter(indices), [this](mlir::Attribute value) { auto memberIndex = mlir::cast(value); llvm::SmallVector indexes; getAsIntegers(memberIndex.getValue(), indexes); return indexes; }); } /// When provided a MapInfoOp containing a descriptor type that /// we must expand into multiple maps this function will extract /// the value from it and return it, in certain cases we must /// generate a new allocation to store into so that the /// fir::BoxOffsetOp we utilise to access the descriptor datas /// base address can be utilised. mlir::Value getDescriptorFromBoxMap(mlir::omp::MapInfoOp boxMap, fir::FirOpBuilder &builder, bool &canDescBeDeferred) { mlir::Value descriptor = boxMap.getVarPtr(); if (!fir::isTypeWithDescriptor(boxMap.getVarType())) if (auto addrOp = mlir::dyn_cast_if_present( boxMap.getVarPtr().getDefiningOp())) descriptor = addrOp.getVal(); canDescBeDeferred = canDeferDescriptorMapping(descriptor); if (!mlir::isa(descriptor.getType()) && !fir::factory::isOptionalArgument(descriptor.getDefiningOp())) return descriptor; mlir::Value &alloca = localBoxAllocas[descriptor.getDefiningOp()]; mlir::Location loc = boxMap->getLoc(); if (!alloca) { // The fir::BoxOffsetOp only works with !fir.ref> types, as // allowing it to access non-reference box operations can cause some // problematic SSA IR. However, in the case of assumed shape's the type // is not a !fir.ref, in these cases to retrieve the appropriate // !fir.ref> to access the data we need to map we must // perform an alloca and then store to it and retrieve the data from the // new alloca. mlir::OpBuilder::InsertPoint insPt = builder.saveInsertionPoint(); mlir::Block *allocaBlock = builder.getAllocaBlock(); assert(allocaBlock && "No alloca block found for this top level op"); builder.setInsertionPointToStart(allocaBlock); mlir::Type allocaType = descriptor.getType(); if (fir::isBoxAddress(allocaType)) allocaType = fir::unwrapRefType(allocaType); alloca = fir::AllocaOp::create(builder, loc, allocaType); builder.restoreInsertionPoint(insPt); } // We should only emit a store if the passed in data is present, it is // possible a user passes in no argument to an optional parameter, in which // case we cannot store or we'll segfault on the emitted memcpy. // TODO: We currently emit a present -> load/store every time we use a // mapped value that requires a local allocation, this isn't the most // efficient, although, it is more correct in a lot of situations. One // such situation is emitting a this series of instructions in separate // segments of a branch (e.g. two target regions in separate else/if branch // mapping the same function argument), however, it would be nice to be able // to optimize these situations e.g. raising the load/store out of the // branch if possible. But perhaps this is best left to lower level // optimisation passes. auto isPresent = fir::IsPresentOp::create(builder, loc, builder.getI1Type(), descriptor); builder.genIfOp(loc, {}, isPresent, false) .genThen([&]() { descriptor = builder.loadIfRef(loc, descriptor); fir::StoreOp::create(builder, loc, descriptor, alloca); }) .end(); return alloca; } /// Function that generates a FIR operation accessing the descriptor's /// base address (BoxOffsetOp) and a MapInfoOp for it. The most /// important thing to note is that we normally move the bounds from /// the descriptor map onto the base address map. mlir::omp::MapInfoOp genBaseAddrMap(mlir::Value descriptor, mlir::OperandRange bounds, mlir::omp::ClauseMapFlags mapType, fir::FirOpBuilder &builder, mlir::FlatSymbolRefAttr mapperId = mlir::FlatSymbolRefAttr()) { mlir::Location loc = descriptor.getLoc(); mlir::Value baseAddrAddr = fir::BoxOffsetOp::create( builder, loc, descriptor, fir::BoxFieldAttr::base_addr); mlir::Type underlyingVarType = llvm::cast( fir::unwrapRefType(baseAddrAddr.getType())) .getElementType(); if (auto seqType = llvm::dyn_cast(underlyingVarType)) if (seqType.hasDynamicExtents()) underlyingVarType = seqType.getEleTy(); // Member of the descriptor pointing at the allocated data return mlir::omp::MapInfoOp::create( builder, loc, baseAddrAddr.getType(), descriptor, mlir::TypeAttr::get(underlyingVarType), builder.getAttr(mapType), builder.getAttr( mlir::omp::VariableCaptureKind::ByRef), baseAddrAddr, /*members=*/mlir::SmallVector{}, /*membersIndex=*/mlir::ArrayAttr{}, bounds, /*mapperId=*/mapperId, /*name=*/builder.getStringAttr(""), /*partial_map=*/builder.getBoolAttr(false)); } /// This function adjusts the member indices vector to include a new /// base address member. We take the position of the descriptor in /// the member indices list, which is the index data that the base /// addresses index will be based off of, as the base address is /// a member of the descriptor. We must also alter other members /// that are members of this descriptor to account for the addition /// of the base address index. void adjustMemberIndices( llvm::SmallVectorImpl> &memberIndices, size_t memberIndex) { llvm::SmallVector baseAddrIndex = memberIndices[memberIndex]; // If we find another member that is "derived/a member of" the descriptor // that is not the descriptor itself, we must insert a 0 for the new base // address we have just added for the descriptor into the list at the // appropriate position to maintain correctness of the positional/index data // for that member. for (llvm::SmallVector &member : memberIndices) if (member.size() > baseAddrIndex.size() && std::equal(baseAddrIndex.begin(), baseAddrIndex.end(), member.begin())) member.insert(std::next(member.begin(), baseAddrIndex.size()), 0); // Add the base address index to the main base address member data baseAddrIndex.push_back(0); // Insert our newly created baseAddrIndex into the larger list of indices at // the correct location. memberIndices.insert(std::next(memberIndices.begin(), memberIndex + 1), baseAddrIndex); } /// Adjusts the descriptor's map type. The main alteration that is done /// currently is transforming the map type to `OMP_MAP_TO` where possible. /// This is because we will always need to map the descriptor to device /// (or at the very least it seems to be the case currently with the /// current lowered kernel IR), as without the appropriate descriptor /// information on the device there is a risk of the kernel IR /// requesting for various data that will not have been copied to /// perform things like indexing. This can cause segfaults and /// memory access errors. However, we do not need this data mapped /// back to the host from the device, as per the OpenMP spec we cannot alter /// the data via resizing or deletion on the device. Discarding any /// descriptor alterations via no map back is reasonable (and required /// for certain segments of descriptor data like the type descriptor that are /// global constants). This alteration is only inapplicable to `target exit` /// and `target update` currently, and that's due to `target exit` not /// allowing `to` mappings, and `target update` not allowing both `to` and /// `from` simultaneously. We currently try to maintain the `implicit` flag /// where necessary, although it does not seem strictly required. mlir::omp::ClauseMapFlags getDescriptorMapType(mlir::omp::ClauseMapFlags mapTypeFlag, mlir::Operation *target) { using mapFlags = mlir::omp::ClauseMapFlags; if (llvm::isa_and_nonnull(target)) return mapTypeFlag; mapFlags flags = mapFlags::to | (mapTypeFlag & (mapFlags::implicit | mapFlags::always)); // For unified_shared_memory, we additionally add `CLOSE` on the descriptor // to ensure device-local placement where required by tests relying on USM + // close semantics. if (moduleRequiresUSM(target->getParentOfType())) flags |= mapFlags::close; return flags; } /// Check if the mapOp is present in the HasDeviceAddr clause on /// the userOp. Only applies to TargetOp. bool isHasDeviceAddr(mlir::omp::MapInfoOp mapOp, mlir::Operation &userOp) { if (auto targetOp = llvm::dyn_cast(userOp)) { for (mlir::Value hda : targetOp.getHasDeviceAddrVars()) { if (hda.getDefiningOp() == mapOp) return true; } } return false; } bool isUseDeviceAddr(mlir::omp::MapInfoOp mapOp, mlir::Operation &userOp) { if (auto targetDataOp = llvm::dyn_cast(userOp)) { for (mlir::Value uda : targetDataOp.getUseDeviceAddrVars()) { if (uda.getDefiningOp() == mapOp) return true; } } return false; } bool isUseDevicePtr(mlir::omp::MapInfoOp mapOp, mlir::Operation &userOp) { if (auto targetDataOp = llvm::dyn_cast(userOp)) { for (mlir::Value udp : targetDataOp.getUseDevicePtrVars()) { if (udp.getDefiningOp() == mapOp) return true; } } return false; } // Expand mappings of type(C_PTR) to map their `__address` field explicitly // as a single pointer-sized member (USM-gated at callsite). This helps in // USM scenarios to ensure the pointer-sized mapping is used. mlir::omp::MapInfoOp genCptrMemberMap(mlir::omp::MapInfoOp op, fir::FirOpBuilder &builder) { if (!op.getMembers().empty()) return op; mlir::Type varTy = fir::unwrapRefType(op.getVarPtr().getType()); if (!mlir::isa(varTy)) return op; auto recTy = mlir::cast(varTy); // If not a builtin C_PTR record, skip. if (!recTy.getName().ends_with("__builtin_c_ptr")) return op; // Find the index of the c_ptr address component named "__address". int32_t fieldIdx = recTy.getFieldIndex("__address"); if (fieldIdx < 0) return op; mlir::Location loc = op.getVarPtr().getLoc(); mlir::Type memTy = recTy.getType(fieldIdx); fir::IntOrValue idxConst = mlir::IntegerAttr::get(builder.getI32Type(), fieldIdx); mlir::Value coord = fir::CoordinateOp::create( builder, loc, builder.getRefType(memTy), op.getVarPtr(), llvm::SmallVector{idxConst}); // Child for the `__address` member. llvm::SmallVector> memberIdx = {{0}}; mlir::ArrayAttr newMembersAttr = builder.create2DI64ArrayAttr(memberIdx); // Force CLOSE in USM paths so the pointer gets device-local placement // when required by tests relying on USM + close semantics. mlir::omp::ClauseMapFlagsAttr mapTypeAttr = builder.getAttr( op.getMapType() | mlir::omp::ClauseMapFlags::close); mlir::omp::MapInfoOp memberMap = mlir::omp::MapInfoOp::create( builder, loc, coord.getType(), coord, mlir::TypeAttr::get(fir::unwrapRefType(coord.getType())), mapTypeAttr, builder.getAttr( mlir::omp::VariableCaptureKind::ByRef), /*varPtrPtr=*/mlir::Value{}, /*members=*/llvm::SmallVector{}, /*member_index=*/mlir::ArrayAttr{}, /*bounds=*/op.getBounds(), /*mapperId=*/mlir::FlatSymbolRefAttr(), /*name=*/op.getNameAttr(), /*partial_map=*/builder.getBoolAttr(false)); // Rebuild the parent as a container with the `__address` member. mlir::omp::MapInfoOp newParent = mlir::omp::MapInfoOp::create( builder, op.getLoc(), op.getResult().getType(), op.getVarPtr(), op.getVarTypeAttr(), mapTypeAttr, op.getMapCaptureTypeAttr(), /*varPtrPtr=*/mlir::Value{}, /*members=*/llvm::SmallVector{memberMap}, /*member_index=*/newMembersAttr, /*bounds=*/llvm::SmallVector{}, /*mapperId=*/mlir::FlatSymbolRefAttr(), op.getNameAttr(), /*partial_map=*/builder.getBoolAttr(false)); op.replaceAllUsesWith(newParent.getResult()); op->erase(); return newParent; } mlir::omp::MapInfoOp genDescriptorMemberMaps(mlir::omp::MapInfoOp op, fir::FirOpBuilder &builder, mlir::Operation *target) { llvm::SmallVector mapMemberUsers; getMemberUserList(op, mapMemberUsers); // TODO: map the addendum segment of the descriptor, similarly to the // base address/data pointer member. bool descCanBeDeferred = false; mlir::Value descriptor = getDescriptorFromBoxMap(op, builder, descCanBeDeferred); mlir::ArrayAttr newMembersAttr; mlir::SmallVector newMembers; llvm::SmallVector> memberIndices; bool isHasDeviceAddrFlag = isHasDeviceAddr(op, *target); if (!mapMemberUsers.empty() || !op.getMembers().empty()) getMemberIndicesAsVectors( !mapMemberUsers.empty() ? mapMemberUsers[0].parent : op, memberIndices); // If the operation that we are expanding with a descriptor has a user // (parent), then we have to expand the parent's member indices to reflect // the adjusted member indices for the base address insertion. However, if // it does not then we are expanding a MapInfoOp without any pre-existing // member information to now have one new member for the base address, or // we are expanding a parent that is a descriptor and we have to adjust // all of its members to reflect the insertion of the base address. // // If we're expanding a top-level descriptor for a map operation that // resulted from "has_device_addr" clause, then we want the base pointer // from the descriptor to be used verbatim, i.e. without additional // remapping. To avoid this remapping, simply don't generate any map // information for the descriptor members. mlir::FlatSymbolRefAttr mapperId = op.getMapperIdAttr(); if (!mapMemberUsers.empty()) { // Currently, there should only be one user per map when this pass // is executed. Either a parent map, holding the current map in its // member list, or a target operation that holds a map clause. This // may change in the future if we aim to refactor the MLIR for map // clauses to allow sharing of duplicate maps across target // operations. assert(mapMemberUsers.size() == 1 && "OMPMapInfoFinalization currently only supports single users of a " "MapInfoOp"); auto baseAddr = genBaseAddrMap(descriptor, op.getBounds(), op.getMapType(), builder, mapperId); ParentAndPlacement mapUser = mapMemberUsers[0]; adjustMemberIndices(memberIndices, mapUser.index); llvm::SmallVector newMemberOps; for (auto v : mapUser.parent.getMembers()) { newMemberOps.push_back(v); if (v == op) newMemberOps.push_back(baseAddr); } mapUser.parent.getMembersMutable().assign(newMemberOps); mapUser.parent.setMembersIndexAttr( builder.create2DI64ArrayAttr(memberIndices)); } else if (!isHasDeviceAddrFlag) { auto baseAddr = genBaseAddrMap(descriptor, op.getBounds(), op.getMapType(), builder, mapperId); newMembers.push_back(baseAddr); if (!op.getMembers().empty()) { for (auto &indices : memberIndices) indices.insert(indices.begin(), 0); memberIndices.insert(memberIndices.begin(), {0}); newMembersAttr = builder.create2DI64ArrayAttr(memberIndices); newMembers.append(op.getMembers().begin(), op.getMembers().end()); } else { llvm::SmallVector> memberIdx = {{0}}; newMembersAttr = builder.create2DI64ArrayAttr(memberIdx); } } // Descriptors for objects listed on the `has_device_addr` will always // be copied. This is because the descriptor can be rematerialized by the // compiler, and so the address of the descriptor for a given object at // one place in the code may differ from that address in another place. // The contents of the descriptor (the base address in particular) will // remain unchanged though. mlir::omp::ClauseMapFlags mapType = op.getMapType(); if (isHasDeviceAddrFlag) { mapType |= mlir::omp::ClauseMapFlags::always; } mlir::omp::MapInfoOp newDescParentMapOp = mlir::omp::MapInfoOp::create( builder, op->getLoc(), op.getResult().getType(), descriptor, mlir::TypeAttr::get(fir::unwrapRefType(descriptor.getType())), builder.getAttr( getDescriptorMapType(mapType, target)), op.getMapCaptureTypeAttr(), /*varPtrPtr=*/mlir::Value{}, newMembers, newMembersAttr, /*bounds=*/mlir::SmallVector{}, /*mapperId=*/mlir::FlatSymbolRefAttr(), op.getNameAttr(), /*partial_map=*/builder.getBoolAttr(false)); op.replaceAllUsesWith(newDescParentMapOp.getResult()); op->erase(); if (descCanBeDeferred) deferrableDesc.push_back(newDescParentMapOp); return newDescParentMapOp; } // We add all mapped record members not directly used in the target region // to the block arguments in front of their parent and we place them into // the map operands list for consistency. // // These indirect uses (via accesses to their parent) will still be // mapped individually in most cases, and a parent mapping doesn't // guarantee the parent will be mapped in its totality, partial // mapping is common. // // For example: // map(tofrom: x%y) // // Will generate a mapping for "x" (the parent) and "y" (the member). // The parent "x" will not be mapped, but the member "y" will. // However, we must have the parent as a BlockArg and MapOperand // in these cases, to maintain the correct uses within the region and // to help tracking that the member is part of a larger object. // // In the case of: // map(tofrom: x%y, x%z) // // The parent member becomes more critical, as we perform a partial // structure mapping where we link the mapping of the members y // and z together via the parent x. We do this at a kernel argument // level in LLVM IR and not just MLIR, which is important to maintain // similarity to Clang and for the runtime to do the correct thing. // However, we still do not map the structure in its totality but // rather we generate an un-sized "binding" map entry for it. // // In the case of: // map(tofrom: x, x%y, x%z) // // We do actually map the entirety of "x", so the explicit mapping of // x%y, x%z becomes unnecessary. It is redundant to write this from a // Fortran OpenMP perspective (although it is legal), as even if the // members were allocatables or pointers, we are mandated by the // specification to map these (and any recursive components) in their // entirety, which is different to the C++ equivalent, which requires // explicit mapping of these segments. void addImplicitMembersToTarget(mlir::omp::MapInfoOp op, fir::FirOpBuilder &builder, mlir::Operation *target) { auto mapClauseOwner = llvm::dyn_cast_if_present( target); // TargetDataOp is technically a MapClauseOwningOpInterface, so we // do not need to explicitly check for the extra cases here for use_device // addr/ptr if (!mapClauseOwner) return; auto addOperands = [&](mlir::MutableOperandRange &mutableOpRange, mlir::Operation *directiveOp, unsigned blockArgInsertIndex = 0) { if (!llvm::is_contained(mutableOpRange.getAsOperandRange(), op.getResult())) return; // There doesn't appear to be a simple way to convert MutableOperandRange // to a vector currently, so we instead use a for_each to populate our // vector. llvm::SmallVector newMapOps; newMapOps.reserve(mutableOpRange.size()); llvm::for_each( mutableOpRange.getAsOperandRange(), [&newMapOps](mlir::Value oper) { newMapOps.push_back(oper); }); for (auto mapMember : op.getMembers()) { if (llvm::is_contained(mutableOpRange.getAsOperandRange(), mapMember)) continue; newMapOps.push_back(mapMember); if (directiveOp) { directiveOp->getRegion(0).insertArgument( blockArgInsertIndex, mapMember.getType(), mapMember.getLoc()); blockArgInsertIndex++; } } mutableOpRange.assign(newMapOps); }; auto argIface = llvm::dyn_cast(target); if (auto mapClauseOwner = llvm::dyn_cast(target)) { mlir::MutableOperandRange mapMutableOpRange = mapClauseOwner.getMapVarsMutable(); unsigned blockArgInsertIndex = argIface ? argIface.getMapBlockArgsStart() + argIface.numMapBlockArgs() : 0; addOperands(mapMutableOpRange, llvm::dyn_cast_if_present( argIface.getOperation()), blockArgInsertIndex); } if (auto targetDataOp = llvm::dyn_cast(target)) { mlir::MutableOperandRange useDevAddrMutableOpRange = targetDataOp.getUseDeviceAddrVarsMutable(); addOperands(useDevAddrMutableOpRange, target, argIface.getUseDeviceAddrBlockArgsStart() + argIface.numUseDeviceAddrBlockArgs()); mlir::MutableOperandRange useDevPtrMutableOpRange = targetDataOp.getUseDevicePtrVarsMutable(); addOperands(useDevPtrMutableOpRange, target, argIface.getUseDevicePtrBlockArgsStart() + argIface.numUseDevicePtrBlockArgs()); } else if (auto targetOp = llvm::dyn_cast(target)) { mlir::MutableOperandRange hasDevAddrMutableOpRange = targetOp.getHasDeviceAddrVarsMutable(); addOperands(hasDevAddrMutableOpRange, target, argIface.getHasDeviceAddrBlockArgsStart() + argIface.numHasDeviceAddrBlockArgs()); } } // We retrieve the first user that is a Target operation, of which // there should only be one currently. Every MapInfoOp can be tied to // at most one Target operation and at the minimum no operations. // This may change in the future with IR cleanups/modifications, // in which case this pass will need updating to support cases // where a map can have more than one user and more than one of // those users can be a Target operation. For now, we simply // return the first target operation encountered, which may // be on the parent MapInfoOp in the case of a member mapping. // In that case, we traverse the MapInfoOp chain until we // find the first TargetOp user. mlir::Operation *getFirstTargetUser(mlir::omp::MapInfoOp mapOp) { for (auto *user : mapOp->getUsers()) { if (llvm::isa(user)) return user; if (auto mapUser = llvm::dyn_cast(user)) return getFirstTargetUser(mapUser); } return nullptr; } void addImplicitDescriptorMapToTargetDataOp(mlir::omp::MapInfoOp op, fir::FirOpBuilder &builder, mlir::Operation &target) { // Checks if the map is present as an explicit map already on the target // data directive, and not just present on a use_device_addr/ptr, as if // that's the case, we should not need to add an implicit map for the // descriptor. auto explicitMappingPresent = [](mlir::omp::MapInfoOp op, mlir::omp::TargetDataOp tarData) { // Verify top-level descriptor mapping is at least equal with same // varPtr, the map type should always be To for a descriptor, which is // all we really care about for this mapping as we aim to make sure the // descriptor is always present on device if we're expecting to access // the underlying data. if (tarData.getMapVars().empty()) return false; for (mlir::Value mapVar : tarData.getMapVars()) { auto mapOp = llvm::cast(mapVar.getDefiningOp()); if (mapOp.getVarPtr() == op.getVarPtr() && mapOp.getVarPtrPtr() == op.getVarPtrPtr()) { return true; } } return false; }; // if we're not a top level descriptor with members (e.g. member of a // derived type), we do not want to perform this step. if (!llvm::isa(target) || op.getMembers().empty()) return; if (!isUseDeviceAddr(op, target) && !isUseDevicePtr(op, target)) return; auto targetDataOp = llvm::cast(target); if (explicitMappingPresent(op, targetDataOp)) return; mlir::omp::MapInfoOp newDescParentMapOp = mlir::omp::MapInfoOp::create( builder, op->getLoc(), op.getResult().getType(), op.getVarPtr(), op.getVarTypeAttr(), builder.getAttr( mlir::omp::ClauseMapFlags::to | mlir::omp::ClauseMapFlags::always), op.getMapCaptureTypeAttr(), /*varPtrPtr=*/mlir::Value{}, mlir::SmallVector{}, mlir::ArrayAttr{}, /*bounds=*/mlir::SmallVector{}, /*mapperId*/ mlir::FlatSymbolRefAttr(), op.getNameAttr(), /*partial_map=*/builder.getBoolAttr(false)); targetDataOp.getMapVarsMutable().append({newDescParentMapOp}); } void removeTopLevelDescriptor(mlir::omp::MapInfoOp op, fir::FirOpBuilder &builder, mlir::Operation *target) { if (llvm::isa(target)) return; // if we're not a top level descriptor with members (e.g. member of a // derived type), we do not want to perform this step. if (op.getMembers().empty()) return; mlir::SmallVector members = op.getMembers(); mlir::omp::MapInfoOp baseAddr = mlir::dyn_cast_or_null( members.front().getDefiningOp()); assert(baseAddr && "Expected member to be MapInfoOp"); members.erase(members.begin()); llvm::SmallVector> memberIndices; getMemberIndicesAsVectors(op, memberIndices); // Can skip the extra processing if there's only 1 member as it'd // be the base addresses, which we're promoting to the parent. mlir::ArrayAttr membersAttr; if (memberIndices.size() > 1) { memberIndices.erase(memberIndices.begin()); membersAttr = builder.create2DI64ArrayAttr(memberIndices); } // VarPtrPtr is tied to detecting if something is a pointer in the later // lowering currently, this at the moment comes tied with // OMP_MAP_PTR_AND_OBJ being applied which breaks the problem this tries to // solve by emitting a 8-byte mapping tied to the descriptor address (even // if we only emit a single map). So we circumvent this by removing the // varPtrPtr mapping, however, a side affect of this is we lose the // additional load from the backend tied to this which is required for // correctness and getting the correct address of the data to perform our // mapping. So we do our load at this stage. // TODO/FIXME: Tidy up the OMP_MAP_PTR_AND_OBJ and varPtrPtr being tied to // if something is a pointer to try and tidy up the implementation a bit. // This is an unfortunate complexity from push-back from upstream. We // could also emit a load at this level for all base addresses as well, // which in turn will simplify the later lowering a bit as well. But first // need to see how well this alteration works. auto loadBaseAddr = builder.loadIfRef(op->getLoc(), baseAddr.getVarPtrPtr()); mlir::omp::MapInfoOp newBaseAddrMapOp = mlir::omp::MapInfoOp::create( builder, op->getLoc(), loadBaseAddr.getType(), loadBaseAddr, baseAddr.getVarTypeAttr(), baseAddr.getMapTypeAttr(), baseAddr.getMapCaptureTypeAttr(), mlir::Value{}, members, membersAttr, baseAddr.getBounds(), /*mapperId*/ mlir::FlatSymbolRefAttr(), op.getNameAttr(), /*partial_map=*/builder.getBoolAttr(false)); op.replaceAllUsesWith(newBaseAddrMapOp.getResult()); op->erase(); baseAddr.erase(); } static bool hasADescriptor(mlir::Operation *varOp, mlir::Type varType) { if (fir::isTypeWithDescriptor(varType) || mlir::isa(varType) || mlir::isa_and_present(varOp)) return true; return false; } // This pass executes on omp::MapInfoOp's containing descriptor based types // (allocatables, pointers, assumed shape etc.) and expanding them into // multiple omp::MapInfoOp's for each pointer member contained within the // descriptor. // // From the perspective of the MLIR pass manager this runs on the top level // operation (usually function) containing the MapInfoOp because this pass // will mutate siblings of MapInfoOp. void runOnOperation() override { mlir::ModuleOp module = getOperation(); if (!module) module = getOperation()->getParentOfType(); fir::KindMapping kindMap = fir::getKindMapping(module); fir::FirOpBuilder builder{module, std::move(kindMap)}; // We wish to maintain some function level scope (currently // just local function scope variables used to load and store box // variables into so we can access their base address, an // quirk of box_offset requires us to have an in memory box, but Fortran // in certain cases does not provide this) whilst not subjecting // ourselves to the possibility of race conditions while this pass // undergoes frequent re-iteration for the near future. So we loop // over function in the module and then map.info inside of those. getOperation()->walk([&](mlir::Operation *func) { if (!mlir::isa(func)) return; // clear all local allocations we made for any boxes in any prior // iterations from previous function scopes. localBoxAllocas.clear(); deferrableDesc.clear(); // Next, walk `omp.map.info` ops to see if any record members should be // implicitly mapped. func->walk([&](mlir::omp::MapInfoOp op) { mlir::Type underlyingType = fir::unwrapRefType(op.getVarPtr().getType()); // TODO Test with and support more complicated cases; like arrays for // records, for example. if (!fir::isRecordWithAllocatableMember(underlyingType)) return mlir::WalkResult::advance(); // TODO For now, only consider `omp.target` ops. Other ops that support // `map` clauses will follow later. mlir::omp::TargetOp target = mlir::dyn_cast_if_present( getFirstTargetUser(op)); if (!target) return mlir::WalkResult::advance(); auto mapClauseOwner = llvm::dyn_cast(*target); int64_t mapVarIdx = mapClauseOwner.getOperandIndexForMap(op); assert(mapVarIdx >= 0 && mapVarIdx < static_cast(mapClauseOwner.getMapVars().size())); auto argIface = llvm::dyn_cast(*target); // TODO How should `map` block argument that correspond to: `private`, // `use_device_addr`, `use_device_ptr`, be handled? mlir::BlockArgument opBlockArg = argIface.getMapBlockArgs()[mapVarIdx]; llvm::SetVector mapVarForwardSlice; mlir::getForwardSlice(opBlockArg, &mapVarForwardSlice); mapVarForwardSlice.remove_if([&](mlir::Operation *sliceOp) { // TODO Support coordinate_of ops. // // TODO Support call ops by recursively examining the forward slice of // the corresponding parameter to the field in the called function. return !mlir::isa(sliceOp); }); auto recordType = mlir::cast(underlyingType); llvm::SmallVector newMapOpsForFields; llvm::SmallVector> newMemberIndexPaths; // 1) Handle direct top-level allocatable fields. for (auto fieldMemTyPair : recordType.getTypeList()) { auto &field = fieldMemTyPair.first; auto memTy = fieldMemTyPair.second; if (!fir::isAllocatableType(memTy)) continue; bool referenced = llvm::any_of(mapVarForwardSlice, [&](auto *opv) { auto designateOp = mlir::dyn_cast(opv); return designateOp && designateOp.getComponent() && designateOp.getComponent()->strref() == field; }); if (!referenced) continue; int32_t fieldIdx = recordType.getFieldIndex(field); builder.setInsertionPoint(op); fir::IntOrValue idxConst = mlir::IntegerAttr::get(builder.getI32Type(), fieldIdx); auto fieldCoord = fir::CoordinateOp::create( builder, op.getLoc(), builder.getRefType(memTy), op.getVarPtr(), llvm::SmallVector{idxConst}); int64_t fieldIdx64 = static_cast(fieldIdx); llvm::SmallVector idxPath{fieldIdx64}; appendMemberMapIfNew(op, builder, op.getLoc(), fieldCoord, idxPath, field, newMapOpsForFields, newMemberIndexPaths); } // Handle nested allocatable fields along any component chain // referenced in the region via HLFIR designates. llvm::SmallVector> seenIndexPaths; for (mlir::Operation *sliceOp : mapVarForwardSlice) { auto designateOp = mlir::dyn_cast(sliceOp); if (!designateOp || !designateOp.getComponent()) continue; llvm::SmallVector compPathReversed; compPathReversed.push_back(designateOp.getComponent()->strref()); mlir::Value curBase = designateOp.getMemref(); bool rootedAtMapArg = false; while (true) { if (auto parentDes = curBase.getDefiningOp()) { if (!parentDes.getComponent()) break; compPathReversed.push_back(parentDes.getComponent()->strref()); curBase = parentDes.getMemref(); continue; } if (auto decl = curBase.getDefiningOp()) { if (auto barg = mlir::dyn_cast(decl.getMemref())) rootedAtMapArg = (barg == opBlockArg); } else if (auto blockArg = mlir::dyn_cast_or_null( curBase)) { rootedAtMapArg = (blockArg == opBlockArg); } break; } // Only process nested paths (2+ components). Single-component paths // for direct fields are handled above. if (!rootedAtMapArg || compPathReversed.size() < 2) continue; builder.setInsertionPoint(op); llvm::SmallVector indexPath; mlir::Type curTy = underlyingType; mlir::Value coordRef = op.getVarPtr(); bool validPath = true; for (llvm::StringRef compName : llvm::reverse(compPathReversed)) { auto recTy = mlir::dyn_cast(curTy); if (!recTy) { validPath = false; break; } int32_t idx = recTy.getFieldIndex(compName); if (idx < 0) { validPath = false; break; } indexPath.push_back(idx); mlir::Type memTy = recTy.getType(idx); fir::IntOrValue idxConst = mlir::IntegerAttr::get(builder.getI32Type(), idx); coordRef = fir::CoordinateOp::create( builder, op.getLoc(), builder.getRefType(memTy), coordRef, llvm::SmallVector{idxConst}); curTy = memTy; } if (!validPath) continue; if (auto finalRefTy = mlir::dyn_cast(coordRef.getType())) { mlir::Type eleTy = finalRefTy.getElementType(); if (fir::isAllocatableType(eleTy)) { if (!containsPath(seenIndexPaths, indexPath)) { seenIndexPaths.emplace_back(indexPath.begin(), indexPath.end()); appendMemberMapIfNew(op, builder, op.getLoc(), coordRef, indexPath, compPathReversed.front(), newMapOpsForFields, newMemberIndexPaths); } } } } if (newMapOpsForFields.empty()) return mlir::WalkResult::advance(); // Deduplicate by index path to avoid emitting duplicate members for // the same component. Use a set-based key to keep this near O(n). llvm::SmallVector dedupMapOps; llvm::SmallVector> dedupIndexPaths; llvm::StringSet<> seenKeys; for (auto [i, mapOp] : llvm::enumerate(newMapOpsForFields)) { const auto &path = newMemberIndexPaths[i]; llvm::SmallString<64> key; buildPathKey(path, key); if (seenKeys.contains(key)) continue; seenKeys.insert(key); dedupMapOps.push_back(mapOp); dedupIndexPaths.emplace_back(path.begin(), path.end()); } op.getMembersMutable().append(dedupMapOps); llvm::SmallVector> newMemberIndices; if (mlir::ArrayAttr oldAttr = op.getMembersIndexAttr()) for (mlir::Attribute indexList : oldAttr) { llvm::SmallVector listVec; for (mlir::Attribute index : mlir::cast(indexList)) listVec.push_back(mlir::cast(index).getInt()); newMemberIndices.emplace_back(std::move(listVec)); } for (auto &path : dedupIndexPaths) newMemberIndices.emplace_back(path); op.setMembersIndexAttr(builder.create2DI64ArrayAttr(newMemberIndices)); op.setPartialMap(true); return mlir::WalkResult::advance(); }); // Expand type(C_PTR) only when unified_shared_memory is required, // to ensure device-visible pointer size/behavior in USM scenarios // without changing default expectations elsewhere. func->walk([&](mlir::omp::MapInfoOp op) { // Only expand C_PTR members when unified_shared_memory is required. if (!moduleRequiresUSM(func->getParentOfType())) return; builder.setInsertionPoint(op); genCptrMemberMap(op, builder); }); func->walk([&](mlir::omp::MapInfoOp op) { // TODO: Currently only supports a single user for the MapInfoOp. This // is fine for the moment, as the Fortran frontend will generate a // new MapInfoOp with at most one user currently. In the case of // members of other objects, like derived types, the user would be the // parent. In cases where it's a regular non-member map, the user would // be the target operation it is being mapped by. // // However, when/if we optimise/cleanup the IR we will have to extend // this pass to support multiple users, as we may wish to have a map // be re-used by multiple users (e.g. across multiple targets that map // the variable and have identical map properties). assert(llvm::hasSingleElement(op->getUsers()) && "OMPMapInfoFinalization currently only supports single users " "of a MapInfoOp"); if (hasADescriptor(op.getVarPtr().getDefiningOp(), fir::unwrapRefType(op.getVarType()))) { builder.setInsertionPoint(op); mlir::Operation *targetUser = getFirstTargetUser(op); assert(targetUser && "expected user of map operation was not found"); genDescriptorMemberMaps(op, builder, targetUser); } }); // Now that we've expanded all of our boxes into a descriptor and base // address map where necessary, we check if the map owner is an // enter/exit/target data directive, and if they are we drop the initial // descriptor (top-level parent) and replace it with the // base_address/data. // // This circumvents issues with stack allocated descriptors bound to // device colliding which in Flang is rather trivial for a user to do by // accident due to the rather pervasive local intermediate descriptor // generation that occurs whenever you pass boxes around different scopes. // In OpenMP 6+ mapping these would be a user error as the tools required // to circumvent these issues are provided by the spec (ref_ptr/ptee map // types), but in prior specifications these tools are not available and // it becomes an implementation issue for us to solve. // // We do this by dropping the top-level descriptor which will be the stack // descriptor when we perform enter/exit maps, as we don't want these to // be bound until necessary which is when we utilise the descriptor type // within a target region. At which point we map the relevant descriptor // data and the runtime should correctly associate the data with the // descriptor and bind together and allow clean mapping and execution. for (auto *op : deferrableDesc) { auto mapOp = llvm::dyn_cast(op); mlir::Operation *targetUser = getFirstTargetUser(mapOp); assert(targetUser && "expected user of map operation was not found"); builder.setInsertionPoint(mapOp); removeTopLevelDescriptor(mapOp, builder, targetUser); addImplicitDescriptorMapToTargetDataOp(mapOp, builder, *targetUser); } // Wait until after we have generated all of our maps to add them onto // the target's block arguments, simplifying the process as there would be // no need to avoid accidental duplicate additions. func->walk([&](mlir::omp::MapInfoOp op) { mlir::Operation *targetUser = getFirstTargetUser(op); assert(targetUser && "expected user of map operation was not found"); addImplicitMembersToTarget(op, builder, targetUser); }); }); } }; } // namespace