mirror of
https://github.com/intel/llvm.git
synced 2026-01-23 16:06:39 +08:00
[MLIR][NVGPU] Fix the cga_cluster.mlir test (#112191)
This patch fixes the sm90 cluster test by: * Fixing a typo in LowerGpuOpsToNVVMOps where one of the ClusterDim Op conversion pattern should actually be for the ClusterDimBlocks Op. This addresses the compilation error for this test. * The grid-size should be (4,4,1) instead of (2,2,1). This passes the scf-if check against the threshold of 3 below and actually generates the required prints from the GPU. Signed-off-by: Durgadoss R <durgadossr@nvidia.com>
This commit is contained in:
@@ -373,8 +373,9 @@ void mlir::populateGpuToNVVMConversionPatterns(
|
||||
NVVM::BlockInClusterIdYOp, NVVM::BlockInClusterIdZOp>>(
|
||||
converter, IndexKind::Other, IntrType::Id);
|
||||
patterns.add<gpu::index_lowering::OpLowering<
|
||||
gpu::ClusterDimOp, NVVM::ClusterDimXOp, NVVM::ClusterDimYOp,
|
||||
NVVM::ClusterDimZOp>>(converter, IndexKind::Other, IntrType::Dim);
|
||||
gpu::ClusterDimBlocksOp, NVVM::ClusterDimBlocksXOp,
|
||||
NVVM::ClusterDimBlocksYOp, NVVM::ClusterDimBlocksZOp>>(
|
||||
converter, IndexKind::Other, IntrType::Dim);
|
||||
patterns.add<gpu::index_lowering::OpLowering<
|
||||
gpu::BlockIdOp, NVVM::BlockIdXOp, NVVM::BlockIdYOp, NVVM::BlockIdZOp>>(
|
||||
converter, IndexKind::Grid, IntrType::Id);
|
||||
|
||||
@@ -18,7 +18,7 @@ module attributes {gpu.container_module} {
|
||||
return
|
||||
}
|
||||
gpu.module @gpumodule {
|
||||
gpu.func @kernel_cluster() kernel attributes {gpu.known_block_size = array<i32: 1, 1, 1>, gpu.known_grid_size = array<i32: 2, 2, 1>} {
|
||||
gpu.func @kernel_cluster() kernel attributes {gpu.known_block_size = array<i32: 1, 1, 1>, gpu.known_grid_size = array<i32: 4, 4, 1>} {
|
||||
%cidX = gpu.cluster_id x
|
||||
%cidY = gpu.cluster_id y
|
||||
%cidZ = gpu.cluster_id z
|
||||
|
||||
Reference in New Issue
Block a user