mirror of
https://github.com/intel/llvm.git
synced 2026-01-24 08:30:34 +08:00
[mlir][acc] Add ACCImplicitData pass for implicit data attributes (#166472)
This change adds the ACCImplicitData pass which implements the OpenACC specification for "Variables with Implicitly Determined Data Attributes" (OpenACC 3.4 spec, section 2.6.2). The pass automatically generates data clause operations (copyin, copyout, present, firstprivate, etc.) for variables used within OpenACC compute constructs (parallel, kernels, serial) that do not already have explicit data clauses. Key features: - Respects default(none) and default(present) clauses - Handles scalar vs. aggregate variables with different semantics: * Aggregates: present clause (if default(present)) or copy clause * Scalars: copy clause (kernels) or firstprivate (parallel/serial) - Generates privatization recipes when needed for firstprivate clauses - Performs alias analysis to avoid redundant data mappings - Ensures proper data clause ordering for partial entity access - Generates exit operations (copyout, delete) to match entry operations Requirements: - Types must implement acc::MappableType and/or acc::PointerLikeType interfaces to be considered candidates. - Operations accessing partial entities or creating subviews should implement acc::PartialEntityAccess and/or mlir::ViewLikeOpInterface for proper clause ordering. - Optionally pre-register acc::OpenACCSupport and mlir::AliasAnalysis if custom alias analysis, variable name determination, or error reporting is needed.
This commit is contained in:
@@ -9,6 +9,9 @@
|
||||
#ifndef MLIR_DIALECT_OPENACC_TRANSFORMS_PASSES_H
|
||||
#define MLIR_DIALECT_OPENACC_TRANSFORMS_PASSES_H
|
||||
|
||||
#include "mlir/Dialect/Arith/IR/Arith.h"
|
||||
#include "mlir/Dialect/MemRef/IR/MemRef.h"
|
||||
#include "mlir/Dialect/OpenACC/OpenACC.h"
|
||||
#include "mlir/Pass/Pass.h"
|
||||
|
||||
namespace mlir {
|
||||
|
||||
@@ -27,4 +27,40 @@ def LegalizeDataValuesInRegion : Pass<"openacc-legalize-data-values", "mlir::fun
|
||||
];
|
||||
}
|
||||
|
||||
def ACCImplicitData : Pass<"acc-implicit-data", "mlir::ModuleOp"> {
|
||||
let summary = "Generate implicit data attributes for OpenACC compute constructs";
|
||||
let description = [{
|
||||
This pass implements the OpenACC specification for "Variables with
|
||||
Implicitly Determined Data Attributes" (OpenACC 3.4 spec, section 2.6.2).
|
||||
|
||||
The pass automatically generates data clause operations for variables used
|
||||
within OpenACC compute constructs (parallel, kernels, serial) that do not
|
||||
already have explicit data clauses. The semantics follow these rules:
|
||||
|
||||
1. If there is a default(none) clause visible, no implicit data actions
|
||||
apply.
|
||||
|
||||
2. An aggregate variable (arrays, derived types, etc.) will be treated as:
|
||||
- In a present clause when default(present) is visible.
|
||||
- In a copy clause otherwise.
|
||||
|
||||
3. A scalar variable will be treated as if it appears in:
|
||||
- A copy clause if the compute construct is a kernels construct.
|
||||
- A firstprivate clause otherwise (parallel, serial).
|
||||
}];
|
||||
let dependentDialects = ["mlir::acc::OpenACCDialect",
|
||||
"mlir::memref::MemRefDialect",
|
||||
"mlir::arith::ArithDialect"];
|
||||
let options = [
|
||||
Option<"enableImplicitReductionCopy", "enable-implicit-reduction-copy",
|
||||
"bool", "true",
|
||||
"Enable applying implicit copy in lieu of implicit firstprivate for "
|
||||
"reduction variables. This allows uniform treatment of reduction "
|
||||
"variables between combined constructs (e.g., 'parallel loop') and "
|
||||
"separate constructs (e.g., 'parallel' followed by 'loop'), where "
|
||||
"the OpenACC spec requires copy semantics for the former but "
|
||||
"firstprivate would normally apply for the latter.">
|
||||
];
|
||||
}
|
||||
|
||||
#endif // MLIR_DIALECT_OPENACC_TRANSFORMS_PASSES
|
||||
|
||||
880
mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp
Normal file
880
mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp
Normal file
@@ -0,0 +1,880 @@
|
||||
//===- ACCImplicitData.cpp ------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This pass implements the OpenACC specification for "Variables with
|
||||
// Implicitly Determined Data Attributes" (OpenACC 3.4 spec, section 2.6.2).
|
||||
//
|
||||
// Overview:
|
||||
// ---------
|
||||
// The pass automatically generates data clause operations for variables used
|
||||
// within OpenACC compute constructs (parallel, kernels, serial) that do not
|
||||
// already have explicit data clauses. The semantics follow these rules:
|
||||
//
|
||||
// 1. If there is a default(none) clause visible, no implicit data actions
|
||||
// apply.
|
||||
//
|
||||
// 2. An aggregate variable (arrays, derived types, etc.) will be treated as:
|
||||
// - In a present clause when default(present) is visible.
|
||||
// - In a copy clause otherwise.
|
||||
//
|
||||
// 3. A scalar variable will be treated as if it appears in:
|
||||
// - A copy clause if the compute construct is a kernels construct.
|
||||
// - A firstprivate clause otherwise (parallel, serial).
|
||||
//
|
||||
// Requirements:
|
||||
// -------------
|
||||
// To use this pass in a pipeline, the following requirements must be met:
|
||||
//
|
||||
// 1. Type Interface Implementation: Variables from the dialect being used
|
||||
// must implement one or both of the following MLIR interfaces:
|
||||
// `acc::MappableType` and/or `acc::PointerLikeType`
|
||||
//
|
||||
// These interfaces provide the necessary methods for the pass to:
|
||||
// - Determine variable type categories (scalar vs. aggregate)
|
||||
// - Generate appropriate bounds information
|
||||
// - Generate privatization recipes
|
||||
//
|
||||
// 2. Operation Interface Implementation: Operations that access partial
|
||||
// entities or create views should implement the following MLIR
|
||||
// interfaces: `acc::PartialEntityAccess` and/or
|
||||
// `mlir::ViewLikeOpInterface`
|
||||
//
|
||||
// These interfaces are used for proper data clause ordering, ensuring
|
||||
// that base entities are mapped before derived entities (e.g., a
|
||||
// struct is mapped before its fields, an array is mapped before
|
||||
// subarray views).
|
||||
//
|
||||
// 3. Analysis Registration (Optional): If custom behavior is needed for
|
||||
// variable name extraction or alias analysis, the dialect should
|
||||
// pre-register the `acc::OpenACCSupport` and `mlir::AliasAnalysis` analyses.
|
||||
//
|
||||
// If not registered, default behavior will be used.
|
||||
//
|
||||
// Implementation Details:
|
||||
// -----------------------
|
||||
// The pass performs the following operations:
|
||||
//
|
||||
// 1. Finds candidate variables which are live-in to the compute region and
|
||||
// are not already in a data clause or private clause.
|
||||
//
|
||||
// 2. Generates both data "entry" and "exit" clause operations that match
|
||||
// the intended action depending on variable type:
|
||||
// - copy -> acc.copyin (entry) + acc.copyout (exit)
|
||||
// - present -> acc.present (entry) + acc.delete (exit)
|
||||
// - firstprivate -> acc.firstprivate (entry only, no exit)
|
||||
//
|
||||
// 3. Ensures that default clause is taken into consideration by looking
|
||||
// through current construct and parent constructs to find the "visible
|
||||
// default clause".
|
||||
//
|
||||
// 4. Fixes up SSA value links so that uses in the acc region reference the
|
||||
// result of the newly created data clause operations.
|
||||
//
|
||||
// 5. When generating implicit data clause operations, it also adds variable
|
||||
// name information and marks them with the implicit flag.
|
||||
//
|
||||
// 6. Recipes are generated by calling the appropriate entrypoints in the
|
||||
// MappableType and PointerLikeType interfaces.
|
||||
//
|
||||
// 7. AliasAnalysis is used to determine if a variable is already covered by
|
||||
// an existing data clause (e.g., an interior pointer covered by its parent).
|
||||
//
|
||||
// Examples:
|
||||
// ---------
|
||||
//
|
||||
// Example 1: Scalar in parallel construct (implicit firstprivate)
|
||||
//
|
||||
// Before:
|
||||
// func.func @test() {
|
||||
// %scalar = memref.alloca() {acc.var_name = "x"} : memref<f32>
|
||||
// acc.parallel {
|
||||
// %val = memref.load %scalar[] : memref<f32>
|
||||
// acc.yield
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// After:
|
||||
// func.func @test() {
|
||||
// %scalar = memref.alloca() {acc.var_name = "x"} : memref<f32>
|
||||
// %firstpriv = acc.firstprivate varPtr(%scalar : memref<f32>)
|
||||
// -> memref<f32> {implicit = true, name = "x"}
|
||||
// acc.parallel firstprivate(@recipe -> %firstpriv : memref<f32>) {
|
||||
// %val = memref.load %firstpriv[] : memref<f32>
|
||||
// acc.yield
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// Example 2: Scalar in kernels construct (implicit copy)
|
||||
//
|
||||
// Before:
|
||||
// func.func @test() {
|
||||
// %scalar = memref.alloca() {acc.var_name = "n"} : memref<i32>
|
||||
// acc.kernels {
|
||||
// %val = memref.load %scalar[] : memref<i32>
|
||||
// acc.terminator
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// After:
|
||||
// func.func @test() {
|
||||
// %scalar = memref.alloca() {acc.var_name = "n"} : memref<i32>
|
||||
// %copyin = acc.copyin varPtr(%scalar : memref<i32>) -> memref<i32>
|
||||
// {dataClause = #acc<data_clause acc_copy>,
|
||||
// implicit = true, name = "n"}
|
||||
// acc.kernels dataOperands(%copyin : memref<i32>) {
|
||||
// %val = memref.load %copyin[] : memref<i32>
|
||||
// acc.terminator
|
||||
// }
|
||||
// acc.copyout accPtr(%copyin : memref<i32>)
|
||||
// to varPtr(%scalar : memref<i32>)
|
||||
// {dataClause = #acc<data_clause acc_copy>,
|
||||
// implicit = true, name = "n"}
|
||||
// }
|
||||
//
|
||||
// Example 3: Array (aggregate) in parallel (implicit copy)
|
||||
//
|
||||
// Before:
|
||||
// func.func @test() {
|
||||
// %array = memref.alloca() {acc.var_name = "arr"} : memref<100xf32>
|
||||
// acc.parallel {
|
||||
// %c0 = arith.constant 0 : index
|
||||
// %val = memref.load %array[%c0] : memref<100xf32>
|
||||
// acc.yield
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// After:
|
||||
// func.func @test() {
|
||||
// %array = memref.alloca() {acc.var_name = "arr"} : memref<100xf32>
|
||||
// %copyin = acc.copyin varPtr(%array : memref<100xf32>)
|
||||
// -> memref<100xf32>
|
||||
// {dataClause = #acc<data_clause acc_copy>,
|
||||
// implicit = true, name = "arr"}
|
||||
// acc.parallel dataOperands(%copyin : memref<100xf32>) {
|
||||
// %c0 = arith.constant 0 : index
|
||||
// %val = memref.load %copyin[%c0] : memref<100xf32>
|
||||
// acc.yield
|
||||
// }
|
||||
// acc.copyout accPtr(%copyin : memref<100xf32>)
|
||||
// to varPtr(%array : memref<100xf32>)
|
||||
// {dataClause = #acc<data_clause acc_copy>,
|
||||
// implicit = true, name = "arr"}
|
||||
// }
|
||||
//
|
||||
// Example 4: Array with default(present)
|
||||
//
|
||||
// Before:
|
||||
// func.func @test() {
|
||||
// %array = memref.alloca() {acc.var_name = "arr"} : memref<100xf32>
|
||||
// acc.parallel {
|
||||
// %c0 = arith.constant 0 : index
|
||||
// %val = memref.load %array[%c0] : memref<100xf32>
|
||||
// acc.yield
|
||||
// } attributes {defaultAttr = #acc<defaultvalue present>}
|
||||
// }
|
||||
//
|
||||
// After:
|
||||
// func.func @test() {
|
||||
// %array = memref.alloca() {acc.var_name = "arr"} : memref<100xf32>
|
||||
// %present = acc.present varPtr(%array : memref<100xf32>)
|
||||
// -> memref<100xf32>
|
||||
// {implicit = true, name = "arr"}
|
||||
// acc.parallel dataOperands(%present : memref<100xf32>)
|
||||
// attributes {defaultAttr = #acc<defaultvalue present>} {
|
||||
// %c0 = arith.constant 0 : index
|
||||
// %val = memref.load %present[%c0] : memref<100xf32>
|
||||
// acc.yield
|
||||
// }
|
||||
// acc.delete accPtr(%present : memref<100xf32>)
|
||||
// {dataClause = #acc<data_clause acc_present>,
|
||||
// implicit = true, name = "arr"}
|
||||
// }
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "mlir/Dialect/OpenACC/Transforms/Passes.h"
|
||||
|
||||
#include "mlir/Analysis/AliasAnalysis.h"
|
||||
#include "mlir/Dialect/OpenACC/Analysis/OpenACCSupport.h"
|
||||
#include "mlir/Dialect/OpenACC/OpenACC.h"
|
||||
#include "mlir/Dialect/OpenACC/OpenACCUtils.h"
|
||||
#include "mlir/IR/Builders.h"
|
||||
#include "mlir/IR/BuiltinOps.h"
|
||||
#include "mlir/IR/Dominance.h"
|
||||
#include "mlir/IR/Operation.h"
|
||||
#include "mlir/IR/Value.h"
|
||||
#include "mlir/Interfaces/FunctionInterfaces.h"
|
||||
#include "mlir/Interfaces/ViewLikeInterface.h"
|
||||
#include "mlir/Transforms/RegionUtils.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/TypeSwitch.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include <type_traits>
|
||||
|
||||
namespace mlir {
|
||||
namespace acc {
|
||||
#define GEN_PASS_DEF_ACCIMPLICITDATA
|
||||
#include "mlir/Dialect/OpenACC/Transforms/Passes.h.inc"
|
||||
} // namespace acc
|
||||
} // namespace mlir
|
||||
|
||||
#define DEBUG_TYPE "acc-implicit-data"
|
||||
|
||||
using namespace mlir;
|
||||
|
||||
namespace {
|
||||
|
||||
class ACCImplicitData : public acc::impl::ACCImplicitDataBase<ACCImplicitData> {
|
||||
public:
|
||||
using acc::impl::ACCImplicitDataBase<ACCImplicitData>::ACCImplicitDataBase;
|
||||
|
||||
void runOnOperation() override;
|
||||
|
||||
private:
|
||||
/// Collects all data clauses that dominate the compute construct.
|
||||
/// Needed to determine if a variable is already covered by an existing data
|
||||
/// clause.
|
||||
SmallVector<Value> getDominatingDataClauses(Operation *computeConstructOp);
|
||||
|
||||
/// Looks through the `dominatingDataClauses` to find the original data clause
|
||||
/// op for an alias. Returns nullptr if no original data clause op is found.
|
||||
template <typename OpT>
|
||||
Operation *getOriginalDataClauseOpForAlias(
|
||||
Value var, OpBuilder &builder, OpT computeConstructOp,
|
||||
const SmallVector<Value> &dominatingDataClauses);
|
||||
|
||||
/// Generates the appropriate `acc.copyin`, `acc.present`,`acc.firstprivate`,
|
||||
/// etc. data clause op for a candidate variable.
|
||||
template <typename OpT>
|
||||
Operation *generateDataClauseOpForCandidate(
|
||||
Value var, ModuleOp &module, OpBuilder &builder, OpT computeConstructOp,
|
||||
const SmallVector<Value> &dominatingDataClauses,
|
||||
const std::optional<acc::ClauseDefaultValue> &defaultClause);
|
||||
|
||||
/// Generates the implicit data ops for a compute construct.
|
||||
template <typename OpT>
|
||||
void generateImplicitDataOps(
|
||||
ModuleOp &module, OpT computeConstructOp,
|
||||
std::optional<acc::ClauseDefaultValue> &defaultClause);
|
||||
|
||||
/// Generates a private recipe for a variable.
|
||||
acc::PrivateRecipeOp generatePrivateRecipe(ModuleOp &module, Value var,
|
||||
Location loc, OpBuilder &builder,
|
||||
acc::OpenACCSupport &accSupport);
|
||||
|
||||
/// Generates a firstprivate recipe for a variable.
|
||||
acc::FirstprivateRecipeOp
|
||||
generateFirstprivateRecipe(ModuleOp &module, Value var, Location loc,
|
||||
OpBuilder &builder,
|
||||
acc::OpenACCSupport &accSupport);
|
||||
|
||||
/// Generates recipes for a list of variables.
|
||||
void generateRecipes(ModuleOp &module, OpBuilder &builder,
|
||||
Operation *computeConstructOp,
|
||||
const SmallVector<Value> &newOperands,
|
||||
SmallVector<Attribute> &newRecipeSyms);
|
||||
};
|
||||
|
||||
/// Determines if a variable is a candidate for implicit data mapping.
|
||||
/// Returns true if the variable is a candidate, false otherwise.
|
||||
static bool isCandidateForImplicitData(Value val, Region &accRegion) {
|
||||
// Ensure the variable is an allowed type for data clause.
|
||||
if (!acc::isPointerLikeType(val.getType()) &&
|
||||
!acc::isMappableType(val.getType()))
|
||||
return false;
|
||||
|
||||
// If this is already coming from a data clause, we do not need to generate
|
||||
// another.
|
||||
if (isa_and_nonnull<ACC_DATA_ENTRY_OPS>(val.getDefiningOp()))
|
||||
return false;
|
||||
|
||||
// If this is only used by private clauses, it is not a real live-in.
|
||||
if (acc::isOnlyUsedByPrivateClauses(val, accRegion))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
SmallVector<Value>
|
||||
ACCImplicitData::getDominatingDataClauses(Operation *computeConstructOp) {
|
||||
llvm::SmallSetVector<Value, 8> dominatingDataClauses;
|
||||
|
||||
llvm::TypeSwitch<Operation *>(computeConstructOp)
|
||||
.Case<acc::ParallelOp, acc::KernelsOp, acc::SerialOp>([&](auto op) {
|
||||
for (auto dataClause : op.getDataClauseOperands()) {
|
||||
dominatingDataClauses.insert(dataClause);
|
||||
}
|
||||
})
|
||||
.Default([](Operation *) {});
|
||||
|
||||
// Collect the data clauses from enclosing data constructs.
|
||||
Operation *currParentOp = computeConstructOp->getParentOp();
|
||||
while (currParentOp) {
|
||||
if (isa<acc::DataOp>(currParentOp)) {
|
||||
for (auto dataClause :
|
||||
dyn_cast<acc::DataOp>(currParentOp).getDataClauseOperands()) {
|
||||
dominatingDataClauses.insert(dataClause);
|
||||
}
|
||||
}
|
||||
currParentOp = currParentOp->getParentOp();
|
||||
}
|
||||
|
||||
// Find the enclosing function/subroutine
|
||||
auto funcOp = computeConstructOp->getParentOfType<FunctionOpInterface>();
|
||||
if (!funcOp)
|
||||
return dominatingDataClauses.takeVector();
|
||||
|
||||
// Walk the function to find `acc.declare_enter`/`acc.declare_exit` pairs that
|
||||
// dominate and post-dominate the compute construct and add their data
|
||||
// clauses to the list.
|
||||
auto &domInfo = this->getAnalysis<DominanceInfo>();
|
||||
auto &postDomInfo = this->getAnalysis<PostDominanceInfo>();
|
||||
funcOp->walk([&](acc::DeclareEnterOp declareEnterOp) {
|
||||
if (domInfo.dominates(declareEnterOp.getOperation(), computeConstructOp)) {
|
||||
// Collect all `acc.declare_exit` ops for this token.
|
||||
SmallVector<acc::DeclareExitOp> exits;
|
||||
for (auto *user : declareEnterOp.getToken().getUsers())
|
||||
if (auto declareExit = dyn_cast<acc::DeclareExitOp>(user))
|
||||
exits.push_back(declareExit);
|
||||
|
||||
// Only add clauses if every `acc.declare_exit` op post-dominates the
|
||||
// compute construct.
|
||||
if (!exits.empty() && llvm::all_of(exits, [&](acc::DeclareExitOp exitOp) {
|
||||
return postDomInfo.postDominates(exitOp, computeConstructOp);
|
||||
})) {
|
||||
for (auto dataClause : declareEnterOp.getDataClauseOperands())
|
||||
dominatingDataClauses.insert(dataClause);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
return dominatingDataClauses.takeVector();
|
||||
}
|
||||
|
||||
template <typename OpT>
|
||||
Operation *ACCImplicitData::getOriginalDataClauseOpForAlias(
|
||||
Value var, OpBuilder &builder, OpT computeConstructOp,
|
||||
const SmallVector<Value> &dominatingDataClauses) {
|
||||
auto &aliasAnalysis = this->getAnalysis<AliasAnalysis>();
|
||||
for (auto dataClause : dominatingDataClauses) {
|
||||
if (auto *dataClauseOp = dataClause.getDefiningOp()) {
|
||||
// Only accept clauses that guarantee that the alias is present.
|
||||
if (isa<acc::CopyinOp, acc::CreateOp, acc::PresentOp, acc::NoCreateOp,
|
||||
acc::DevicePtrOp>(dataClauseOp))
|
||||
if (aliasAnalysis.alias(acc::getVar(dataClauseOp), var).isMust())
|
||||
return dataClauseOp;
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Generates bounds for variables that have unknown dimensions
|
||||
static void fillInBoundsForUnknownDimensions(Operation *dataClauseOp,
|
||||
OpBuilder &builder) {
|
||||
|
||||
if (!acc::getBounds(dataClauseOp).empty())
|
||||
// If bounds are already present, do not overwrite them.
|
||||
return;
|
||||
|
||||
// For types that have unknown dimensions, attempt to generate bounds by
|
||||
// relying on MappableType being able to extract it from the IR.
|
||||
auto var = acc::getVar(dataClauseOp);
|
||||
auto type = var.getType();
|
||||
if (auto mappableTy = dyn_cast<acc::MappableType>(type)) {
|
||||
if (mappableTy.hasUnknownDimensions()) {
|
||||
TypeSwitch<Operation *>(dataClauseOp)
|
||||
.Case<ACC_DATA_ENTRY_OPS, ACC_DATA_EXIT_OPS>([&](auto dataClauseOp) {
|
||||
if (std::is_same_v<decltype(dataClauseOp), acc::DevicePtrOp>)
|
||||
return;
|
||||
OpBuilder::InsertionGuard guard(builder);
|
||||
builder.setInsertionPoint(dataClauseOp);
|
||||
auto bounds = mappableTy.generateAccBounds(var, builder);
|
||||
if (!bounds.empty())
|
||||
dataClauseOp.getBoundsMutable().assign(bounds);
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
acc::PrivateRecipeOp
|
||||
ACCImplicitData::generatePrivateRecipe(ModuleOp &module, Value var,
|
||||
Location loc, OpBuilder &builder,
|
||||
acc::OpenACCSupport &accSupport) {
|
||||
auto type = var.getType();
|
||||
std::string recipeName =
|
||||
accSupport.getRecipeName(acc::RecipeKind::private_recipe, type, var);
|
||||
|
||||
// Check if recipe already exists
|
||||
auto existingRecipe = module.lookupSymbol<acc::PrivateRecipeOp>(recipeName);
|
||||
if (existingRecipe)
|
||||
return existingRecipe;
|
||||
|
||||
// Set insertion point to module body in a scoped way
|
||||
OpBuilder::InsertionGuard guard(builder);
|
||||
builder.setInsertionPointToStart(module.getBody());
|
||||
|
||||
auto recipe =
|
||||
acc::PrivateRecipeOp::createAndPopulate(builder, loc, recipeName, type);
|
||||
if (!recipe.has_value())
|
||||
return accSupport.emitNYI(loc, "implicit private"), nullptr;
|
||||
return recipe.value();
|
||||
}
|
||||
|
||||
acc::FirstprivateRecipeOp
|
||||
ACCImplicitData::generateFirstprivateRecipe(ModuleOp &module, Value var,
|
||||
Location loc, OpBuilder &builder,
|
||||
acc::OpenACCSupport &accSupport) {
|
||||
auto type = var.getType();
|
||||
std::string recipeName =
|
||||
accSupport.getRecipeName(acc::RecipeKind::firstprivate_recipe, type, var);
|
||||
|
||||
// Check if recipe already exists
|
||||
auto existingRecipe =
|
||||
module.lookupSymbol<acc::FirstprivateRecipeOp>(recipeName);
|
||||
if (existingRecipe)
|
||||
return existingRecipe;
|
||||
|
||||
// Set insertion point to module body in a scoped way
|
||||
OpBuilder::InsertionGuard guard(builder);
|
||||
builder.setInsertionPointToStart(module.getBody());
|
||||
|
||||
auto recipe = acc::FirstprivateRecipeOp::createAndPopulate(builder, loc,
|
||||
recipeName, type);
|
||||
if (!recipe.has_value())
|
||||
return accSupport.emitNYI(loc, "implicit firstprivate"), nullptr;
|
||||
return recipe.value();
|
||||
}
|
||||
|
||||
void ACCImplicitData::generateRecipes(ModuleOp &module, OpBuilder &builder,
|
||||
Operation *computeConstructOp,
|
||||
const SmallVector<Value> &newOperands,
|
||||
SmallVector<Attribute> &newRecipeSyms) {
|
||||
auto &accSupport = this->getAnalysis<acc::OpenACCSupport>();
|
||||
for (auto var : newOperands) {
|
||||
auto loc{var.getLoc()};
|
||||
if (isa<acc::PrivateOp>(var.getDefiningOp())) {
|
||||
auto recipe = generatePrivateRecipe(
|
||||
module, acc::getVar(var.getDefiningOp()), loc, builder, accSupport);
|
||||
if (recipe)
|
||||
newRecipeSyms.push_back(SymbolRefAttr::get(module->getContext(),
|
||||
recipe.getSymName().str()));
|
||||
} else if (isa<acc::FirstprivateOp>(var.getDefiningOp())) {
|
||||
auto recipe = generateFirstprivateRecipe(
|
||||
module, acc::getVar(var.getDefiningOp()), loc, builder, accSupport);
|
||||
if (recipe)
|
||||
newRecipeSyms.push_back(SymbolRefAttr::get(module->getContext(),
|
||||
recipe.getSymName().str()));
|
||||
} else {
|
||||
accSupport.emitNYI(var.getLoc(), "implicit reduction");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Generates the data entry data op clause so that it adheres to OpenACC
|
||||
// rules as follows (line numbers and specification from OpenACC 3.4):
|
||||
// 1388 An aggregate variable will be treated as if it appears either:
|
||||
// 1389 - In a present clause if there is a default(present) clause visible at
|
||||
// the compute construct.
|
||||
// 1391 - In a copy clause otherwise.
|
||||
// 1392 A scalar variable will be treated as if it appears either:
|
||||
// 1393 - In a copy clause if the compute construct is a kernels construct.
|
||||
// 1394 - In a firstprivate clause otherwise.
|
||||
template <typename OpT>
|
||||
Operation *ACCImplicitData::generateDataClauseOpForCandidate(
|
||||
Value var, ModuleOp &module, OpBuilder &builder, OpT computeConstructOp,
|
||||
const SmallVector<Value> &dominatingDataClauses,
|
||||
const std::optional<acc::ClauseDefaultValue> &defaultClause) {
|
||||
auto &accSupport = this->getAnalysis<acc::OpenACCSupport>();
|
||||
acc::VariableTypeCategory typeCategory =
|
||||
acc::VariableTypeCategory::uncategorized;
|
||||
if (auto mappableTy = dyn_cast<acc::MappableType>(var.getType())) {
|
||||
typeCategory = mappableTy.getTypeCategory(var);
|
||||
} else if (auto pointerLikeTy =
|
||||
dyn_cast<acc::PointerLikeType>(var.getType())) {
|
||||
typeCategory = pointerLikeTy.getPointeeTypeCategory(
|
||||
cast<TypedValue<acc::PointerLikeType>>(var),
|
||||
pointerLikeTy.getElementType());
|
||||
}
|
||||
|
||||
bool isScalar =
|
||||
acc::bitEnumContainsAny(typeCategory, acc::VariableTypeCategory::scalar);
|
||||
bool isAnyAggregate = acc::bitEnumContainsAny(
|
||||
typeCategory, acc::VariableTypeCategory::aggregate);
|
||||
Location loc = computeConstructOp->getLoc();
|
||||
|
||||
Operation *op = nullptr;
|
||||
op = getOriginalDataClauseOpForAlias(var, builder, computeConstructOp,
|
||||
dominatingDataClauses);
|
||||
if (op) {
|
||||
if (isa<acc::NoCreateOp>(op))
|
||||
return acc::NoCreateOp::create(builder, loc, var,
|
||||
/*structured=*/true, /*implicit=*/true,
|
||||
accSupport.getVariableName(var),
|
||||
acc::getBounds(op));
|
||||
|
||||
if (isa<acc::DevicePtrOp>(op))
|
||||
return acc::DevicePtrOp::create(builder, loc, var,
|
||||
/*structured=*/true, /*implicit=*/true,
|
||||
accSupport.getVariableName(var),
|
||||
acc::getBounds(op));
|
||||
|
||||
// The original data clause op is a PresentOp, CopyinOp, or CreateOp,
|
||||
// hence guaranteed to be present.
|
||||
return acc::PresentOp::create(builder, loc, var,
|
||||
/*structured=*/true, /*implicit=*/true,
|
||||
accSupport.getVariableName(var),
|
||||
acc::getBounds(op));
|
||||
} else if (isScalar) {
|
||||
if (enableImplicitReductionCopy &&
|
||||
acc::isOnlyUsedByReductionClauses(var,
|
||||
computeConstructOp->getRegion(0))) {
|
||||
auto copyinOp =
|
||||
acc::CopyinOp::create(builder, loc, var,
|
||||
/*structured=*/true, /*implicit=*/true,
|
||||
accSupport.getVariableName(var));
|
||||
copyinOp.setDataClause(acc::DataClause::acc_reduction);
|
||||
return copyinOp.getOperation();
|
||||
}
|
||||
if constexpr (std::is_same_v<OpT, acc::KernelsOp> ||
|
||||
std::is_same_v<OpT, acc::KernelEnvironmentOp>) {
|
||||
// Scalars are implicit copyin in kernels construct.
|
||||
// We also do the same for acc.kernel_environment because semantics
|
||||
// of user variable mappings should be applied while ACC construct exists
|
||||
// and at this point we should only be dealing with unmapped variables
|
||||
// that were made live-in by the compiler.
|
||||
// TODO: This may be revisited.
|
||||
auto copyinOp =
|
||||
acc::CopyinOp::create(builder, loc, var,
|
||||
/*structured=*/true, /*implicit=*/true,
|
||||
accSupport.getVariableName(var));
|
||||
copyinOp.setDataClause(acc::DataClause::acc_copy);
|
||||
return copyinOp.getOperation();
|
||||
} else {
|
||||
// Scalars are implicit firstprivate in parallel and serial construct.
|
||||
return acc::FirstprivateOp::create(builder, loc, var,
|
||||
/*structured=*/true, /*implicit=*/true,
|
||||
accSupport.getVariableName(var));
|
||||
}
|
||||
} else if (isAnyAggregate) {
|
||||
Operation *newDataOp = nullptr;
|
||||
|
||||
// When default(present) is true, the implicit behavior is present.
|
||||
if (defaultClause.has_value() &&
|
||||
defaultClause.value() == acc::ClauseDefaultValue::Present) {
|
||||
newDataOp = acc::PresentOp::create(builder, loc, var,
|
||||
/*structured=*/true, /*implicit=*/true,
|
||||
accSupport.getVariableName(var));
|
||||
} else {
|
||||
SmallVector<Value> bounds;
|
||||
auto copyinOp =
|
||||
acc::CopyinOp::create(builder, loc, var,
|
||||
/*structured=*/true, /*implicit=*/true,
|
||||
accSupport.getVariableName(var));
|
||||
copyinOp.setDataClause(acc::DataClause::acc_copy);
|
||||
newDataOp = copyinOp.getOperation();
|
||||
}
|
||||
|
||||
return newDataOp;
|
||||
} else {
|
||||
// This is not a fatal error - for example when the element type is
|
||||
// pointer type (aka we have a pointer of pointer), it is potentially a
|
||||
// deep copy scenario which is not being handled here.
|
||||
// Other types need to be canonicalized. Thus just log unhandled cases.
|
||||
LLVM_DEBUG(llvm::dbgs()
|
||||
<< "Unhandled case for implicit data mapping " << var << "\n");
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Ensures that result values from the acc data clause ops are used inside the
|
||||
// acc region. ie:
|
||||
// acc.kernels {
|
||||
// use %val
|
||||
// }
|
||||
// =>
|
||||
// %dev = acc.dataop %val
|
||||
// acc.kernels {
|
||||
// use %dev
|
||||
// }
|
||||
static void legalizeValuesInRegion(Region &accRegion,
|
||||
SmallVector<Value> &newPrivateOperands,
|
||||
SmallVector<Value> &newDataClauseOperands) {
|
||||
for (Value dataClause :
|
||||
llvm::concat<Value>(newDataClauseOperands, newPrivateOperands)) {
|
||||
Value var = acc::getVar(dataClause.getDefiningOp());
|
||||
replaceAllUsesInRegionWith(var, dataClause, accRegion);
|
||||
}
|
||||
}
|
||||
|
||||
// Adds the private operands and private recipes to the data construct
|
||||
// operation in a valid way (ensures that the index in the privatizationRecipes
|
||||
// array matches the position of the private operand).
|
||||
template <typename OpT>
|
||||
static void
|
||||
addNewPrivateOperands(OpT &accOp, const SmallVector<Value> &privateOperands,
|
||||
const SmallVector<Attribute> &privateRecipeSyms) {
|
||||
assert(privateOperands.size() == privateRecipeSyms.size());
|
||||
if (privateOperands.empty())
|
||||
return;
|
||||
|
||||
SmallVector<Attribute> completePrivateRecipesSyms;
|
||||
SmallVector<Attribute> completeFirstprivateRecipesSyms;
|
||||
SmallVector<Value> newPrivateOperands;
|
||||
SmallVector<Value> newFirstprivateOperands;
|
||||
|
||||
// Collect all of the existing recipes since they are held in an attribute.
|
||||
// To add to it, we need to create a brand new one.
|
||||
if (accOp.getPrivatizationRecipes().has_value())
|
||||
for (auto privatization : accOp.getPrivatizationRecipesAttr())
|
||||
completePrivateRecipesSyms.push_back(privatization);
|
||||
if (accOp.getFirstprivatizationRecipes().has_value())
|
||||
for (auto privatization : accOp.getFirstprivatizationRecipesAttr())
|
||||
completeFirstprivateRecipesSyms.push_back(privatization);
|
||||
|
||||
// Now separate between private and firstprivate operands.
|
||||
for (auto [priv, privateRecipeSym] :
|
||||
llvm::zip(privateOperands, privateRecipeSyms)) {
|
||||
if (isa<acc::PrivateOp>(priv.getDefiningOp())) {
|
||||
newPrivateOperands.push_back(priv);
|
||||
completePrivateRecipesSyms.push_back(privateRecipeSym);
|
||||
} else if (isa<acc::FirstprivateOp>(priv.getDefiningOp())) {
|
||||
newFirstprivateOperands.push_back(priv);
|
||||
completeFirstprivateRecipesSyms.push_back(privateRecipeSym);
|
||||
} else {
|
||||
llvm_unreachable("unhandled private operand");
|
||||
}
|
||||
}
|
||||
|
||||
// Append all of the new private operands to their appropriate list.
|
||||
accOp.getPrivateOperandsMutable().append(newPrivateOperands);
|
||||
accOp.getFirstprivateOperandsMutable().append(newFirstprivateOperands);
|
||||
|
||||
// Update the privatizationRecipes attributes to hold all of the new recipes.
|
||||
if (!completePrivateRecipesSyms.empty())
|
||||
accOp.setPrivatizationRecipesAttr(
|
||||
ArrayAttr::get(accOp.getContext(), completePrivateRecipesSyms));
|
||||
if (!completeFirstprivateRecipesSyms.empty())
|
||||
accOp.setFirstprivatizationRecipesAttr(
|
||||
ArrayAttr::get(accOp.getContext(), completeFirstprivateRecipesSyms));
|
||||
}
|
||||
|
||||
static Operation *findDataExitOp(Operation *dataEntryOp) {
|
||||
auto res = acc::getAccVar(dataEntryOp);
|
||||
for (auto *user : res.getUsers())
|
||||
if (isa<ACC_DATA_EXIT_OPS>(user))
|
||||
return user;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Generates matching data exit operation as described in the acc dialect
|
||||
// for how data clauses are decomposed:
|
||||
// https://mlir.llvm.org/docs/Dialects/OpenACCDialect/#operation-categories
|
||||
// Key ones used here:
|
||||
// * acc {construct} copy -> acc.copyin (before region) + acc.copyout (after
|
||||
// region)
|
||||
// * acc {construct} present -> acc.present (before region) + acc.delete
|
||||
// (after region)
|
||||
static void
|
||||
generateDataExitOperations(OpBuilder &builder, Operation *accOp,
|
||||
const SmallVector<Value> &newDataClauseOperands,
|
||||
const SmallVector<Value> &sortedDataClauseOperands) {
|
||||
builder.setInsertionPointAfter(accOp);
|
||||
Value lastDataClause = nullptr;
|
||||
for (auto dataEntry : llvm::reverse(sortedDataClauseOperands)) {
|
||||
if (llvm::find(newDataClauseOperands, dataEntry) ==
|
||||
newDataClauseOperands.end()) {
|
||||
// If this is not a new data clause operand, we should not generate an
|
||||
// exit operation for it.
|
||||
lastDataClause = dataEntry;
|
||||
continue;
|
||||
}
|
||||
if (lastDataClause)
|
||||
if (auto *dataExitOp = findDataExitOp(lastDataClause.getDefiningOp()))
|
||||
builder.setInsertionPointAfter(dataExitOp);
|
||||
Operation *dataEntryOp = dataEntry.getDefiningOp();
|
||||
if (isa<acc::CopyinOp>(dataEntryOp)) {
|
||||
auto copyoutOp = acc::CopyoutOp::create(
|
||||
builder, dataEntryOp->getLoc(), dataEntry, acc::getVar(dataEntryOp),
|
||||
/*structured=*/true, /*implicit=*/true,
|
||||
acc::getVarName(dataEntryOp).value(), acc::getBounds(dataEntryOp));
|
||||
copyoutOp.setDataClause(acc::DataClause::acc_copy);
|
||||
} else if (isa<acc::PresentOp, acc::NoCreateOp>(dataEntryOp)) {
|
||||
auto deleteOp = acc::DeleteOp::create(
|
||||
builder, dataEntryOp->getLoc(), dataEntry,
|
||||
/*structured=*/true, /*implicit=*/true,
|
||||
acc::getVarName(dataEntryOp).value(), acc::getBounds(dataEntryOp));
|
||||
deleteOp.setDataClause(acc::getDataClause(dataEntryOp).value());
|
||||
} else if (isa<acc::DevicePtrOp>(dataEntryOp)) {
|
||||
// Do nothing.
|
||||
} else {
|
||||
llvm_unreachable("unhandled data exit");
|
||||
}
|
||||
lastDataClause = dataEntry;
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns all base references of a value in order.
|
||||
/// So for example, if we have a reference to a struct field like
|
||||
/// s.f1.f2.f3, this will return <s, s.f1, s.f1.f2, s.f1.f2.f3>.
|
||||
/// Any intermediate casts/view-like operations are included in the
|
||||
/// chain as well.
|
||||
static SmallVector<Value> getBaseRefsChain(Value val) {
|
||||
SmallVector<Value> baseRefs;
|
||||
baseRefs.push_back(val);
|
||||
while (true) {
|
||||
Value prevVal = val;
|
||||
|
||||
val = acc::getBaseEntity(val);
|
||||
if (val != baseRefs.front())
|
||||
baseRefs.insert(baseRefs.begin(), val);
|
||||
|
||||
// If this is a view-like operation, it is effectively another
|
||||
// view of the same entity so we should add it to the chain also.
|
||||
if (auto viewLikeOp = val.getDefiningOp<ViewLikeOpInterface>()) {
|
||||
val = viewLikeOp.getViewSource();
|
||||
baseRefs.insert(baseRefs.begin(), val);
|
||||
}
|
||||
|
||||
// Continue loop if we made any progress
|
||||
if (val == prevVal)
|
||||
break;
|
||||
}
|
||||
|
||||
return baseRefs;
|
||||
}
|
||||
|
||||
static void insertInSortedOrder(SmallVector<Value> &sortedDataClauseOperands,
|
||||
Operation *newClause) {
|
||||
auto *insertPos =
|
||||
std::find_if(sortedDataClauseOperands.begin(),
|
||||
sortedDataClauseOperands.end(), [&](Value dataClauseVal) {
|
||||
// Get the base refs for the current clause we are looking
|
||||
// at.
|
||||
auto var = acc::getVar(dataClauseVal.getDefiningOp());
|
||||
auto baseRefs = getBaseRefsChain(var);
|
||||
|
||||
// If the newClause is of a base ref of an existing clause,
|
||||
// we should insert it right before the current clause.
|
||||
// Thus return true to stop iteration when this is the
|
||||
// case.
|
||||
return std::find(baseRefs.begin(), baseRefs.end(),
|
||||
acc::getVar(newClause)) != baseRefs.end();
|
||||
});
|
||||
|
||||
if (insertPos != sortedDataClauseOperands.end()) {
|
||||
newClause->moveBefore(insertPos->getDefiningOp());
|
||||
sortedDataClauseOperands.insert(insertPos, acc::getAccVar(newClause));
|
||||
} else {
|
||||
sortedDataClauseOperands.push_back(acc::getAccVar(newClause));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename OpT>
|
||||
void ACCImplicitData::generateImplicitDataOps(
|
||||
ModuleOp &module, OpT computeConstructOp,
|
||||
std::optional<acc::ClauseDefaultValue> &defaultClause) {
|
||||
// Implicit data attributes are only applied if "[t]here is no default(none)
|
||||
// clause visible at the compute construct."
|
||||
if (defaultClause.has_value() &&
|
||||
defaultClause.value() == acc::ClauseDefaultValue::None)
|
||||
return;
|
||||
assert(!defaultClause.has_value() ||
|
||||
defaultClause.value() == acc::ClauseDefaultValue::Present);
|
||||
|
||||
// 1) Collect live-in values.
|
||||
Region &accRegion = computeConstructOp->getRegion(0);
|
||||
SetVector<Value> liveInValues;
|
||||
getUsedValuesDefinedAbove(accRegion, liveInValues);
|
||||
|
||||
// 2) Run the filtering to find relevant pointers that need copied.
|
||||
auto isCandidate{[&](Value val) -> bool {
|
||||
return isCandidateForImplicitData(val, accRegion);
|
||||
}};
|
||||
auto candidateVars(
|
||||
llvm::to_vector(llvm::make_filter_range(liveInValues, isCandidate)));
|
||||
if (candidateVars.empty())
|
||||
return;
|
||||
|
||||
// 3) Generate data clauses for the variables.
|
||||
SmallVector<Value> newPrivateOperands;
|
||||
SmallVector<Value> newDataClauseOperands;
|
||||
OpBuilder builder(computeConstructOp);
|
||||
if (!candidateVars.empty()) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "== Generating clauses for ==\n"
|
||||
<< computeConstructOp << "\n");
|
||||
}
|
||||
auto dominatingDataClauses = getDominatingDataClauses(computeConstructOp);
|
||||
for (auto var : candidateVars) {
|
||||
auto newDataClauseOp = generateDataClauseOpForCandidate(
|
||||
var, module, builder, computeConstructOp, dominatingDataClauses,
|
||||
defaultClause);
|
||||
fillInBoundsForUnknownDimensions(newDataClauseOp, builder);
|
||||
LLVM_DEBUG(llvm::dbgs() << "Generated data clause for " << var << ":\n"
|
||||
<< "\t" << *newDataClauseOp << "\n");
|
||||
if (isa_and_nonnull<acc::PrivateOp, acc::FirstprivateOp, acc::ReductionOp>(
|
||||
newDataClauseOp)) {
|
||||
newPrivateOperands.push_back(acc::getAccVar(newDataClauseOp));
|
||||
} else if (isa_and_nonnull<ACC_DATA_CLAUSE_OPS>(newDataClauseOp)) {
|
||||
newDataClauseOperands.push_back(acc::getAccVar(newDataClauseOp));
|
||||
dominatingDataClauses.push_back(acc::getAccVar(newDataClauseOp));
|
||||
}
|
||||
}
|
||||
|
||||
// 4) Legalize values in region (aka the uses in the region are the result
|
||||
// of the data clause ops)
|
||||
legalizeValuesInRegion(accRegion, newPrivateOperands, newDataClauseOperands);
|
||||
|
||||
SmallVector<Attribute> newPrivateRecipeSyms;
|
||||
// 5) Generate private recipes which are required for properly attaching
|
||||
// private operands.
|
||||
if constexpr (!std::is_same_v<OpT, acc::KernelsOp> &&
|
||||
!std::is_same_v<OpT, acc::KernelEnvironmentOp>)
|
||||
generateRecipes(module, builder, computeConstructOp, newPrivateOperands,
|
||||
newPrivateRecipeSyms);
|
||||
|
||||
// 6) Figure out insertion order for the new data clause operands.
|
||||
SmallVector<Value> sortedDataClauseOperands(
|
||||
computeConstructOp.getDataClauseOperands());
|
||||
for (auto newClause : newDataClauseOperands)
|
||||
insertInSortedOrder(sortedDataClauseOperands, newClause.getDefiningOp());
|
||||
|
||||
// 7) Generate the data exit operations.
|
||||
generateDataExitOperations(builder, computeConstructOp, newDataClauseOperands,
|
||||
sortedDataClauseOperands);
|
||||
|
||||
// 8) Add all of the new operands to the compute construct op.
|
||||
assert(newPrivateOperands.size() == newPrivateRecipeSyms.size() &&
|
||||
"sizes must match");
|
||||
if constexpr (!std::is_same_v<OpT, acc::KernelsOp> &&
|
||||
!std::is_same_v<OpT, acc::KernelEnvironmentOp>)
|
||||
addNewPrivateOperands(computeConstructOp, newPrivateOperands,
|
||||
newPrivateRecipeSyms);
|
||||
|
||||
computeConstructOp.getDataClauseOperandsMutable().assign(
|
||||
sortedDataClauseOperands);
|
||||
}
|
||||
|
||||
void ACCImplicitData::runOnOperation() {
|
||||
ModuleOp module = this->getOperation();
|
||||
module.walk([&](Operation *op) {
|
||||
if (isa<ACC_COMPUTE_CONSTRUCT_OPS, acc::KernelEnvironmentOp>(op)) {
|
||||
assert(op->getNumRegions() == 1 && "must have 1 region");
|
||||
|
||||
auto defaultClause = acc::getDefaultAttr(op);
|
||||
llvm::TypeSwitch<Operation *, void>(op)
|
||||
.Case<ACC_COMPUTE_CONSTRUCT_OPS, acc::KernelEnvironmentOp>(
|
||||
[&](auto op) {
|
||||
generateImplicitDataOps(module, op, defaultClause);
|
||||
})
|
||||
.Default([&](Operation *) {});
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace
|
||||
@@ -1,4 +1,5 @@
|
||||
add_mlir_dialect_library(MLIROpenACCTransforms
|
||||
ACCImplicitData.cpp
|
||||
LegalizeDataValues.cpp
|
||||
|
||||
ADDITIONAL_HEADER_DIRS
|
||||
@@ -14,7 +15,10 @@ add_mlir_dialect_library(MLIROpenACCTransforms
|
||||
MLIROpenACCTypeInterfacesIncGen
|
||||
|
||||
LINK_LIBS PUBLIC
|
||||
MLIRAnalysis
|
||||
MLIROpenACCAnalysis
|
||||
MLIROpenACCDialect
|
||||
MLIROpenACCUtils
|
||||
MLIRFuncDialect
|
||||
MLIRIR
|
||||
MLIRPass
|
||||
|
||||
109
mlir/test/Dialect/OpenACC/acc-implicit-data-reduction.mlir
Normal file
109
mlir/test/Dialect/OpenACC/acc-implicit-data-reduction.mlir
Normal file
@@ -0,0 +1,109 @@
|
||||
// RUN: mlir-opt %s -acc-implicit-data=enable-implicit-reduction-copy=true -split-input-file | FileCheck %s --check-prefix=COPY
|
||||
// RUN: mlir-opt %s -acc-implicit-data=enable-implicit-reduction-copy=false -split-input-file | FileCheck %s --check-prefix=FIRSTPRIVATE
|
||||
|
||||
// Test case: scalar reduction variable in parallel loop
|
||||
// When enable-implicit-reduction-copy=true: expect copyin/copyout for reduction variable
|
||||
// When enable-implicit-reduction-copy=false: expect firstprivate for reduction variable
|
||||
|
||||
acc.reduction.recipe @reduction_add_memref_i32 : memref<i32> reduction_operator <add> init {
|
||||
^bb0(%arg0: memref<i32>):
|
||||
%c0_i32 = arith.constant 0 : i32
|
||||
%alloc = memref.alloca() : memref<i32>
|
||||
memref.store %c0_i32, %alloc[] : memref<i32>
|
||||
acc.yield %alloc : memref<i32>
|
||||
} combiner {
|
||||
^bb0(%arg0: memref<i32>, %arg1: memref<i32>):
|
||||
%0 = memref.load %arg0[] : memref<i32>
|
||||
%1 = memref.load %arg1[] : memref<i32>
|
||||
%2 = arith.addi %0, %1 : i32
|
||||
memref.store %2, %arg0[] : memref<i32>
|
||||
acc.yield %arg0 : memref<i32>
|
||||
}
|
||||
|
||||
func.func @test_reduction_implicit_copy() {
|
||||
%c1_i32 = arith.constant 1 : i32
|
||||
%c100_i32 = arith.constant 100 : i32
|
||||
%c0_i32 = arith.constant 0 : i32
|
||||
%r = memref.alloca() : memref<i32>
|
||||
memref.store %c0_i32, %r[] : memref<i32>
|
||||
|
||||
acc.parallel {
|
||||
%red_var = acc.reduction varPtr(%r : memref<i32>) -> memref<i32> {name = "r"}
|
||||
acc.loop reduction(@reduction_add_memref_i32 -> %red_var : memref<i32>) control(%iv : i32) = (%c1_i32 : i32) to (%c100_i32 : i32) step (%c1_i32 : i32) {
|
||||
%load = memref.load %red_var[] : memref<i32>
|
||||
%add = arith.addi %load, %c1_i32 : i32
|
||||
memref.store %add, %red_var[] : memref<i32>
|
||||
acc.yield
|
||||
} attributes {independent = [#acc.device_type<none>]}
|
||||
acc.yield
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// When enable-implicit-reduction-copy=true: expect copyin/copyout for reduction variable
|
||||
// COPY-LABEL: func.func @test_reduction_implicit_copy
|
||||
// COPY: %[[COPYIN:.*]] = acc.copyin varPtr({{.*}} : memref<i32>) -> memref<i32> {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = ""}
|
||||
// COPY: acc.copyout accPtr(%[[COPYIN]] : memref<i32>) to varPtr({{.*}} : memref<i32>) {dataClause = #acc<data_clause acc_copy>, implicit = true, name = ""}
|
||||
|
||||
// When enable-implicit-reduction-copy=false: expect firstprivate for reduction variable
|
||||
// FIRSTPRIVATE-LABEL: func.func @test_reduction_implicit_copy
|
||||
// FIRSTPRIVATE: acc.firstprivate varPtr({{.*}} : memref<i32>) -> memref<i32> {implicit = true, name = ""}
|
||||
// FIRSTPRIVATE-NOT: acc.copyin
|
||||
// FIRSTPRIVATE-NOT: acc.copyout
|
||||
|
||||
// -----
|
||||
|
||||
// Test case: reduction variable used both in loop and outside
|
||||
// Should be firstprivate regardless of the flag setting
|
||||
|
||||
acc.reduction.recipe @reduction_add_memref_i32_2 : memref<i32> reduction_operator <add> init {
|
||||
^bb0(%arg0: memref<i32>):
|
||||
%c0_i32 = arith.constant 0 : i32
|
||||
%alloc = memref.alloca() : memref<i32>
|
||||
memref.store %c0_i32, %alloc[] : memref<i32>
|
||||
acc.yield %alloc : memref<i32>
|
||||
} combiner {
|
||||
^bb0(%arg0: memref<i32>, %arg1: memref<i32>):
|
||||
%0 = memref.load %arg0[] : memref<i32>
|
||||
%1 = memref.load %arg1[] : memref<i32>
|
||||
%2 = arith.addi %0, %1 : i32
|
||||
memref.store %2, %arg0[] : memref<i32>
|
||||
acc.yield %arg0 : memref<i32>
|
||||
}
|
||||
|
||||
func.func @test_reduction_with_usage_outside_loop() {
|
||||
%c1_i32 = arith.constant 1 : i32
|
||||
%c100_i32 = arith.constant 100 : i32
|
||||
%c0_i32 = arith.constant 0 : i32
|
||||
%r = memref.alloca() : memref<i32>
|
||||
%out = memref.alloca() : memref<i32>
|
||||
memref.store %c0_i32, %r[] : memref<i32>
|
||||
|
||||
%out_create = acc.create varPtr(%out : memref<i32>) -> memref<i32> {dataClause = #acc<data_clause acc_copyout>, name = "out"}
|
||||
acc.parallel dataOperands(%out_create : memref<i32>) {
|
||||
%red_var = acc.reduction varPtr(%r : memref<i32>) -> memref<i32> {name = "r"}
|
||||
acc.loop reduction(@reduction_add_memref_i32_2 -> %red_var : memref<i32>) control(%iv : i32) = (%c1_i32 : i32) to (%c100_i32 : i32) step (%c1_i32 : i32) {
|
||||
%load = memref.load %red_var[] : memref<i32>
|
||||
%add = arith.addi %load, %c1_i32 : i32
|
||||
memref.store %add, %red_var[] : memref<i32>
|
||||
acc.yield
|
||||
} attributes {independent = [#acc.device_type<none>]}
|
||||
// out = r (usage of r outside the loop)
|
||||
%final_r = memref.load %r[] : memref<i32>
|
||||
memref.store %final_r, %out_create[] : memref<i32>
|
||||
acc.yield
|
||||
}
|
||||
acc.copyout accPtr(%out_create : memref<i32>) to varPtr(%out : memref<i32>) {dataClause = #acc<data_clause acc_copyout>, name = "out"}
|
||||
return
|
||||
}
|
||||
|
||||
// In this case, r should be firstprivate regardless of the flag setting
|
||||
// because it's used outside the reduction context
|
||||
// COPY-LABEL: func.func @test_reduction_with_usage_outside_loop
|
||||
// COPY: acc.firstprivate varPtr({{.*}} : memref<i32>) -> memref<i32> {implicit = true, name = ""}
|
||||
// COPY-NOT: acc.copyin varPtr({{.*}} : memref<i32>) -> memref<i32> {{.*}} name = ""
|
||||
|
||||
// FIRSTPRIVATE-LABEL: func.func @test_reduction_with_usage_outside_loop
|
||||
// FIRSTPRIVATE: acc.firstprivate varPtr({{.*}} : memref<i32>) -> memref<i32> {implicit = true, name = ""}
|
||||
// FIRSTPRIVATE-NOT: acc.copyin varPtr({{.*}} : memref<i32>) -> memref<i32> {{.*}} name = ""
|
||||
|
||||
224
mlir/test/Dialect/OpenACC/acc-implicit-data.mlir
Normal file
224
mlir/test/Dialect/OpenACC/acc-implicit-data.mlir
Normal file
@@ -0,0 +1,224 @@
|
||||
// RUN: mlir-opt %s -acc-implicit-data -split-input-file | FileCheck %s
|
||||
|
||||
// -----
|
||||
|
||||
// Test scalar in serial construct - should generate firstprivate
|
||||
func.func @test_scalar_in_serial() {
|
||||
%alloc = memref.alloca() : memref<i64>
|
||||
acc.serial {
|
||||
%load = memref.load %alloc[] : memref<i64>
|
||||
acc.yield
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func.func @test_scalar_in_serial
|
||||
// CHECK: acc.firstprivate varPtr({{.*}} : memref<i64>) -> memref<i64> {implicit = true, name = ""}
|
||||
|
||||
// -----
|
||||
|
||||
// Test scalar in parallel construct - should generate firstprivate
|
||||
func.func @test_scalar_in_parallel() {
|
||||
%alloc = memref.alloca() : memref<f32>
|
||||
acc.parallel {
|
||||
%load = memref.load %alloc[] : memref<f32>
|
||||
acc.yield
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func.func @test_scalar_in_parallel
|
||||
// CHECK: acc.firstprivate varPtr({{.*}} : memref<f32>) -> memref<f32> {implicit = true, name = ""}
|
||||
|
||||
// -----
|
||||
|
||||
// Test scalar in kernels construct - should generate copyin/copyout
|
||||
func.func @test_scalar_in_kernels() {
|
||||
%alloc = memref.alloca() : memref<f64>
|
||||
acc.kernels {
|
||||
%load = memref.load %alloc[] : memref<f64>
|
||||
acc.terminator
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func.func @test_scalar_in_kernels
|
||||
// CHECK: %[[COPYIN:.*]] = acc.copyin varPtr({{.*}} : memref<f64>) -> memref<f64> {dataClause = #acc<data_clause acc_copy>, implicit = true, name = ""}
|
||||
// CHECK: acc.copyout accPtr(%[[COPYIN]] : memref<f64>) to varPtr({{.*}} : memref<f64>) {dataClause = #acc<data_clause acc_copy>, implicit = true, name = ""}
|
||||
|
||||
// -----
|
||||
|
||||
// Test scalar in parallel with default(none) - should NOT generate implicit data
|
||||
func.func @test_scalar_parallel_defaultnone() {
|
||||
%alloc = memref.alloca() : memref<f32>
|
||||
acc.parallel {
|
||||
%load = memref.load %alloc[] : memref<f32>
|
||||
acc.yield
|
||||
} attributes {defaultAttr = #acc<defaultvalue none>}
|
||||
return
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func.func @test_scalar_parallel_defaultnone
|
||||
// CHECK-NOT: acc.firstprivate
|
||||
// CHECK-NOT: acc.copyin
|
||||
|
||||
// -----
|
||||
|
||||
// Test array in parallel - should generate copyin/copyout
|
||||
func.func @test_array_in_parallel() {
|
||||
%alloc = memref.alloca() : memref<10xf32>
|
||||
acc.parallel {
|
||||
%c0 = arith.constant 0 : index
|
||||
%load = memref.load %alloc[%c0] : memref<10xf32>
|
||||
acc.yield
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func.func @test_array_in_parallel
|
||||
// CHECK: %[[COPYIN:.*]] = acc.copyin varPtr({{.*}} : memref<10xf32>) -> memref<10xf32> {dataClause = #acc<data_clause acc_copy>, implicit = true, name = ""}
|
||||
// CHECK: acc.copyout accPtr(%[[COPYIN]] : memref<10xf32>) to varPtr({{.*}} : memref<10xf32>) {dataClause = #acc<data_clause acc_copy>, implicit = true, name = ""}
|
||||
|
||||
// -----
|
||||
|
||||
// Test array in kernels - should generate copyin/copyout
|
||||
func.func @test_array_in_kernels() {
|
||||
%alloc = memref.alloca() : memref<20xi32>
|
||||
acc.kernels {
|
||||
%c0 = arith.constant 0 : index
|
||||
%load = memref.load %alloc[%c0] : memref<20xi32>
|
||||
acc.terminator
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func.func @test_array_in_kernels
|
||||
// CHECK: %[[COPYIN:.*]] = acc.copyin varPtr({{.*}} : memref<20xi32>) -> memref<20xi32> {dataClause = #acc<data_clause acc_copy>, implicit = true, name = ""}
|
||||
// CHECK: acc.copyout accPtr(%[[COPYIN]] : memref<20xi32>) to varPtr({{.*}} : memref<20xi32>) {dataClause = #acc<data_clause acc_copy>, implicit = true, name = ""}
|
||||
|
||||
// -----
|
||||
|
||||
// Test array with default(present) - should generate present
|
||||
func.func @test_array_parallel_defaultpresent() {
|
||||
%alloc = memref.alloca() : memref<10xf32>
|
||||
acc.parallel {
|
||||
%c0 = arith.constant 0 : index
|
||||
%load = memref.load %alloc[%c0] : memref<10xf32>
|
||||
acc.yield
|
||||
} attributes {defaultAttr = #acc<defaultvalue present>}
|
||||
return
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func.func @test_array_parallel_defaultpresent
|
||||
// CHECK: %[[PRESENT:.*]] = acc.present varPtr({{.*}} : memref<10xf32>) -> memref<10xf32> {implicit = true, name = ""}
|
||||
// CHECK: acc.delete accPtr(%[[PRESENT]] : memref<10xf32>) {dataClause = #acc<data_clause acc_present>, implicit = true, name = ""}
|
||||
|
||||
// -----
|
||||
|
||||
// Test scalar with default(present) - should still generate firstprivate (scalars ignore default(present))
|
||||
func.func @test_scalar_parallel_defaultpresent() {
|
||||
%alloc = memref.alloca() : memref<f32>
|
||||
acc.parallel {
|
||||
%load = memref.load %alloc[] : memref<f32>
|
||||
acc.yield
|
||||
} attributes {defaultAttr = #acc<defaultvalue present>}
|
||||
return
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func.func @test_scalar_parallel_defaultpresent
|
||||
// CHECK: acc.firstprivate varPtr({{.*}} : memref<f32>) -> memref<f32> {implicit = true, name = ""}
|
||||
|
||||
// -----
|
||||
|
||||
// Test multidimensional array
|
||||
func.func @test_multidim_array_in_parallel() {
|
||||
%alloc = memref.alloca() : memref<8x16xf32>
|
||||
acc.parallel {
|
||||
%c0 = arith.constant 0 : index
|
||||
%c1 = arith.constant 1 : index
|
||||
%load = memref.load %alloc[%c0, %c1] : memref<8x16xf32>
|
||||
acc.yield
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func.func @test_multidim_array_in_parallel
|
||||
// CHECK: %[[COPYIN:.*]] = acc.copyin varPtr({{.*}} : memref<8x16xf32>) -> memref<8x16xf32> {dataClause = #acc<data_clause acc_copy>, implicit = true, name = ""}
|
||||
// CHECK: acc.copyout accPtr(%[[COPYIN]] : memref<8x16xf32>) to varPtr({{.*}} : memref<8x16xf32>) {dataClause = #acc<data_clause acc_copy>, implicit = true, name = ""}
|
||||
|
||||
// -----
|
||||
|
||||
// Test dynamic size array
|
||||
func.func @test_dynamic_array(%size: index) {
|
||||
%alloc = memref.alloca(%size) : memref<?xf64>
|
||||
acc.parallel {
|
||||
%c0 = arith.constant 0 : index
|
||||
%load = memref.load %alloc[%c0] : memref<?xf64>
|
||||
acc.yield
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func.func @test_dynamic_array
|
||||
// CHECK: %[[COPYIN:.*]] = acc.copyin varPtr({{.*}} : memref<?xf64>) -> memref<?xf64> {dataClause = #acc<data_clause acc_copy>, implicit = true, name = ""}
|
||||
// CHECK: acc.copyout accPtr(%[[COPYIN]] : memref<?xf64>) to varPtr({{.*}} : memref<?xf64>) {dataClause = #acc<data_clause acc_copy>, implicit = true, name = ""}
|
||||
|
||||
// -----
|
||||
|
||||
// Test variable with explicit data clause - implicit should recognize it
|
||||
func.func @test_with_explicit_copyin() {
|
||||
%alloc = memref.alloca() : memref<100xf32>
|
||||
%copyin = acc.copyin varPtr(%alloc : memref<100xf32>) -> memref<100xf32> {name = "explicit"}
|
||||
acc.parallel dataOperands(%copyin : memref<100xf32>) {
|
||||
%c0 = arith.constant 0 : index
|
||||
%load = memref.load %alloc[%c0] : memref<100xf32>
|
||||
acc.yield
|
||||
}
|
||||
acc.copyout accPtr(%copyin : memref<100xf32>) to varPtr(%alloc : memref<100xf32>) {name = "explicit"}
|
||||
return
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func.func @test_with_explicit_copyin
|
||||
// CHECK: acc.present varPtr({{.*}} : memref<100xf32>) -> memref<100xf32> {implicit = true, name = ""}
|
||||
|
||||
// -----
|
||||
|
||||
// Test multiple variables
|
||||
func.func @test_multiple_variables() {
|
||||
%alloc1 = memref.alloca() : memref<f32>
|
||||
%alloc2 = memref.alloca() : memref<10xi32>
|
||||
acc.parallel {
|
||||
%load1 = memref.load %alloc1[] : memref<f32>
|
||||
%c0 = arith.constant 0 : index
|
||||
%load2 = memref.load %alloc2[%c0] : memref<10xi32>
|
||||
acc.yield
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func.func @test_multiple_variables
|
||||
// CHECK: acc.firstprivate varPtr({{.*}} : memref<f32>) -> memref<f32> {implicit = true, name = ""}
|
||||
// CHECK: %[[COPYIN:.*]] = acc.copyin varPtr({{.*}} : memref<10xi32>) -> memref<10xi32> {dataClause = #acc<data_clause acc_copy>, implicit = true, name = ""}
|
||||
// CHECK: acc.copyout accPtr(%[[COPYIN]] : memref<10xi32>) to varPtr({{.*}} : memref<10xi32>) {dataClause = #acc<data_clause acc_copy>, implicit = true, name = ""}
|
||||
|
||||
// -----
|
||||
|
||||
// Test memref.view aliasing - view of explicitly copied buffer should generate present
|
||||
func.func @test_memref_view(%size: index) {
|
||||
%c0 = arith.constant 0 : index
|
||||
%buffer = memref.alloca(%size) : memref<?xi8>
|
||||
%copyin = acc.copyin varPtr(%buffer : memref<?xi8>) -> memref<?xi8> {name = "buffer"}
|
||||
%view = memref.view %buffer[%c0][] : memref<?xi8> to memref<8x64xf32>
|
||||
acc.kernels dataOperands(%copyin : memref<?xi8>) {
|
||||
%c0_0 = arith.constant 0 : index
|
||||
%c0_1 = arith.constant 0 : index
|
||||
%load = memref.load %view[%c0_0, %c0_1] : memref<8x64xf32>
|
||||
acc.terminator
|
||||
}
|
||||
acc.copyout accPtr(%copyin : memref<?xi8>) to varPtr(%buffer : memref<?xi8>) {name = "buffer"}
|
||||
return
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func.func @test_memref_view
|
||||
// CHECK: acc.present varPtr({{.*}} : memref<8x64xf32>) -> memref<8x64xf32> {implicit = true, name = ""}
|
||||
|
||||
Reference in New Issue
Block a user