[mlir][Linalg] Add 3D pooling named ops to Linalg.

Reviewed By: gysit, hanchung

Differential Revision: https://reviews.llvm.org/D105329
This commit is contained in:
Hanhan Wang
2021-07-12 17:25:55 -07:00
parent 189c552518
commit 50529affcd
3 changed files with 380 additions and 0 deletions

View File

@@ -736,6 +736,228 @@ structured_op: !LinalgStructuredOpConfig
- !ScalarExpression
scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
name: pooling_ndhwc_sum
cpp_class_name: PoolingNdhwcSumOp
doc: |-
Performs 3D sum pooling.
Numeric casting is performed on the input operand, promoting it to the same
data type as the accumulator/output.
structured_op: !LinalgStructuredOpConfig
args:
- !LinalgOperandDefConfig
name: I
usage: InputOperand
type_var: T1
shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
s13, s14, s15, s16] -> (s0, s1, s2, s3, s4)>
- !LinalgOperandDefConfig
name: K
usage: InputOperand
type_var: T2
shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
s13, s14, s15, s16] -> (s5, s6, s7)>
- !LinalgOperandDefConfig
name: O
usage: OutputOperand
type_var: U
shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
s13, s14, s15, s16] -> (s0, s8, s9, s10, s4)>
- !LinalgOperandDefConfig
name: strides
usage: IndexAttribute
type_var: I64
attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
s12, s13, s14, s15, s16] -> (s11, s12, s13)>
- !LinalgOperandDefConfig
name: dilations
usage: IndexAttribute
type_var: I64
attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
s12, s13, s14, s15, s16] -> (s14, s15, s16)>
indexing_maps: !LinalgIndexingMapsConfig
static_indexing_maps:
- affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
s8, s9, s10, s11, s12, s13, s14, s15, s16] -> (d0, d1 * s11 + d4 * s14, d2 *
s12 + d5 * s15, d3 * s13 + d6 * s16, d7)>
- affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
s8, s9, s10, s11, s12, s13, s14, s15, s16] -> (d4, d5, d6)>
- affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
s8, s9, s10, s11, s12, s13, s14, s15, s16] -> (d0, d1, d2, d3, d7)>
iterator_types:
- parallel
- parallel
- parallel
- parallel
- reduction
- reduction
- reduction
- parallel
assignments:
- !ScalarAssign
arg: O
value: !ScalarExpression
scalar_apply:
fn_name: add
operands:
- !ScalarExpression
scalar_arg: O
- !ScalarExpression
symbolic_cast:
type_var: U
operands:
- !ScalarExpression
scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
name: pooling_ndhwc_max
cpp_class_name: PoolingNdhwcMaxOp
doc: |-
Performs 3D max pooling.
Numeric casting is performed on the input operand, promoting it to the same
data type as the accumulator/output.
structured_op: !LinalgStructuredOpConfig
args:
- !LinalgOperandDefConfig
name: I
usage: InputOperand
type_var: T1
shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
s13, s14, s15, s16] -> (s0, s1, s2, s3, s4)>
- !LinalgOperandDefConfig
name: K
usage: InputOperand
type_var: T2
shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
s13, s14, s15, s16] -> (s5, s6, s7)>
- !LinalgOperandDefConfig
name: O
usage: OutputOperand
type_var: U
shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
s13, s14, s15, s16] -> (s0, s8, s9, s10, s4)>
- !LinalgOperandDefConfig
name: strides
usage: IndexAttribute
type_var: I64
attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
s12, s13, s14, s15, s16] -> (s11, s12, s13)>
- !LinalgOperandDefConfig
name: dilations
usage: IndexAttribute
type_var: I64
attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
s12, s13, s14, s15, s16] -> (s14, s15, s16)>
indexing_maps: !LinalgIndexingMapsConfig
static_indexing_maps:
- affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
s8, s9, s10, s11, s12, s13, s14, s15, s16] -> (d0, d1 * s11 + d4 * s14, d2 *
s12 + d5 * s15, d3 * s13 + d6 * s16, d7)>
- affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
s8, s9, s10, s11, s12, s13, s14, s15, s16] -> (d4, d5, d6)>
- affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
s8, s9, s10, s11, s12, s13, s14, s15, s16] -> (d0, d1, d2, d3, d7)>
iterator_types:
- parallel
- parallel
- parallel
- parallel
- reduction
- reduction
- reduction
- parallel
assignments:
- !ScalarAssign
arg: O
value: !ScalarExpression
scalar_apply:
fn_name: max
operands:
- !ScalarExpression
scalar_arg: O
- !ScalarExpression
symbolic_cast:
type_var: U
operands:
- !ScalarExpression
scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
name: pooling_ndhwc_min
cpp_class_name: PoolingNdhwcMinOp
doc: |-
Performs 3D min pooling.
Numeric casting is performed on the input operand, promoting it to the same
data type as the accumulator/output.
structured_op: !LinalgStructuredOpConfig
args:
- !LinalgOperandDefConfig
name: I
usage: InputOperand
type_var: T1
shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
s13, s14, s15, s16] -> (s0, s1, s2, s3, s4)>
- !LinalgOperandDefConfig
name: K
usage: InputOperand
type_var: T2
shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
s13, s14, s15, s16] -> (s5, s6, s7)>
- !LinalgOperandDefConfig
name: O
usage: OutputOperand
type_var: U
shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
s13, s14, s15, s16] -> (s0, s8, s9, s10, s4)>
- !LinalgOperandDefConfig
name: strides
usage: IndexAttribute
type_var: I64
attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
s12, s13, s14, s15, s16] -> (s11, s12, s13)>
- !LinalgOperandDefConfig
name: dilations
usage: IndexAttribute
type_var: I64
attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
s12, s13, s14, s15, s16] -> (s14, s15, s16)>
indexing_maps: !LinalgIndexingMapsConfig
static_indexing_maps:
- affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
s8, s9, s10, s11, s12, s13, s14, s15, s16] -> (d0, d1 * s11 + d4 * s14, d2 *
s12 + d5 * s15, d3 * s13 + d6 * s16, d7)>
- affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
s8, s9, s10, s11, s12, s13, s14, s15, s16] -> (d4, d5, d6)>
- affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
s8, s9, s10, s11, s12, s13, s14, s15, s16] -> (d0, d1, d2, d3, d7)>
iterator_types:
- parallel
- parallel
- parallel
- parallel
- reduction
- reduction
- reduction
- parallel
assignments:
- !ScalarAssign
arg: O
value: !ScalarExpression
scalar_apply:
fn_name: min
operands:
- !ScalarExpression
scalar_arg: O
- !ScalarExpression
symbolic_cast:
type_var: U
operands:
- !ScalarExpression
scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
name: fill_rng_2d
cpp_class_name: FillRng2DOp

View File

@@ -184,6 +184,62 @@ def pooling_nhwc_min_poly(
D.c]))
@linalg_structured_op
def pooling_ndhwc_sum(
I=TensorDef(T1, S.N, S.D, S.H, S.W, S.C),
K=TensorDef(T2, S.KD, S.KH, S.KW, index_dims=[D.kd, D.kh, D.kw]),
O=TensorDef(U, S.N, S.OD, S.OH, S.OW, S.C, output=True),
strides=AttributeDef(S.SD, S.SH, S.SW),
dilations=AttributeDef(S.DD, S.DH, S.DW)):
"""Performs 3D sum pooling.
Numeric casting is performed on the input operand, promoting it to the same
data type as the accumulator/output.
"""
domain(D.n, D.od, D.oh, D.ow, D.kd, D.kh, D.kw, D.c)
O[D.n, D.od, D.oh, D.ow, D.c] += cast(
U, I[D.n, D.od * S.SD + D.kd * S.DD, D.oh * S.SH + D.kh * S.DH,
D.ow * S.SW + D.kw * S.DW, D.c])
@linalg_structured_op
def pooling_ndhwc_max(
I=TensorDef(T1, S.N, S.D, S.H, S.W, S.C),
K=TensorDef(T2, S.KD, S.KH, S.KW, index_dims=[D.kd, D.kh, D.kw]),
O=TensorDef(U, S.N, S.OD, S.OH, S.OW, S.C, output=True),
strides=AttributeDef(S.SD, S.SH, S.SW),
dilations=AttributeDef(S.DD, S.DH, S.DW)):
"""Performs 3D max pooling.
Numeric casting is performed on the input operand, promoting it to the same
data type as the accumulator/output.
"""
domain(D.n, D.od, D.oh, D.ow, D.kd, D.kh, D.kw, D.c)
O[D.n, D.od, D.oh, D.ow, D.c] = ReduceFn.max(D.kd, D.kh, D.kw)(
cast(
U, I[D.n, D.od * S.SD + D.kd * S.DD, D.oh * S.SH + D.kh * S.DH,
D.ow * S.SW + D.kw * S.DW, D.c]))
@linalg_structured_op
def pooling_ndhwc_min(
I=TensorDef(T1, S.N, S.D, S.H, S.W, S.C),
K=TensorDef(T2, S.KD, S.KH, S.KW, index_dims=[D.kd, D.kh, D.kw]),
O=TensorDef(U, S.N, S.OD, S.OH, S.OW, S.C, output=True),
strides=AttributeDef(S.SD, S.SH, S.SW),
dilations=AttributeDef(S.DD, S.DH, S.DW)):
"""Performs 3D min pooling.
Numeric casting is performed on the input operand, promoting it to the same
data type as the accumulator/output.
"""
domain(D.n, D.od, D.oh, D.ow, D.kd, D.kh, D.kw, D.c)
O[D.n, D.od, D.oh, D.ow, D.c] = ReduceFn.min(D.kd, D.kh, D.kw)(
cast(
U, I[D.n, D.od * S.SD + D.kd * S.DD, D.oh * S.SH + D.kh * S.DH,
D.ow * S.SW + D.kw * S.DW, D.c]))
@linalg_structured_op
def fill_rng_2d(
min=ScalarDef(F64),

View File

@@ -518,3 +518,105 @@ func @pooling_nhwc_min(%input: memref<1x4x4x1xf32>, %fake: memref<3x3xf32>, %out
outs(%output: memref<1x2x2x1xf32>)
return
}
// -----
// CHECK-LABEL: func @pooling_ndhwc_sum_tensor
// CHECK: %{{.+}} = linalg.pooling_ndhwc_sum
// CHECK-SAME: dilations = dense<1> : tensor<3xi64>
// CHECK-SAME: strides = dense<1> : tensor<3xi64>
// CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>)
// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
func @pooling_ndhwc_sum_tensor(%input: tensor<1x4x4x4x1xf32>) -> tensor<1x2x2x2x1xf32> {
%fake = linalg.init_tensor [3, 3, 3] : tensor<3x3x3xf32>
%init = linalg.init_tensor [1, 2, 2, 2, 1] : tensor<1x2x2x2x1xf32>
%cst = constant 0.000000e+00 : f32
%fill = linalg.fill(%cst, %init) : f32, tensor<1x2x2x2x1xf32> -> tensor<1x2x2x2x1xf32>
%res = linalg.pooling_ndhwc_sum {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>}
ins(%input, %fake: tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>)
outs(%fill: tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
return %res : tensor<1x2x2x2x1xf32>
}
// -----
// CHECK-LABEL: func @pooling_ndhwc_sum
// CHECK: linalg.pooling_ndhwc_sum
// CHECK-SAME: dilations = dense<1> : tensor<3xi64>
// CHECK-SAME: strides = dense<1> : tensor<3xi64>
// CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x4x4x4x1xf32>, memref<3x3x3xf32>)
// CHECK-SAME: outs(%{{.+}} : memref<1x2x2x2x1xf32>)
func @pooling_ndhwc_sum(%input: memref<1x4x4x4x1xf32>, %fake: memref<3x3x3xf32>, %output: memref<1x2x2x2x1xf32>) {
linalg.pooling_ndhwc_sum {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>}
ins(%input, %fake: memref<1x4x4x4x1xf32>, memref<3x3x3xf32>)
outs(%output: memref<1x2x2x2x1xf32>)
return
}
// -----
// CHECK-LABEL: func @pooling_ndhwc_max_tensor
// CHECK: %{{.+}} = linalg.pooling_ndhwc_max
// CHECK-SAME: dilations = dense<1> : tensor<3xi64>
// CHECK-SAME: strides = dense<1> : tensor<3xi64>
// CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>)
// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
func @pooling_ndhwc_max_tensor(%input: tensor<1x4x4x4x1xf32>) -> tensor<1x2x2x2x1xf32> {
%fake = linalg.init_tensor [3, 3, 3] : tensor<3x3x3xf32>
%init = linalg.init_tensor [1, 2, 2, 2, 1] : tensor<1x2x2x2x1xf32>
%cst = constant 0.000000e+00 : f32
%fill = linalg.fill(%cst, %init) : f32, tensor<1x2x2x2x1xf32> -> tensor<1x2x2x2x1xf32>
%res = linalg.pooling_ndhwc_max {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>}
ins(%input, %fake: tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>)
outs(%fill: tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
return %res : tensor<1x2x2x2x1xf32>
}
// -----
// CHECK-LABEL: func @pooling_ndhwc_max
// CHECK: linalg.pooling_ndhwc_max
// CHECK-SAME: dilations = dense<1> : tensor<3xi64>
// CHECK-SAME: strides = dense<1> : tensor<3xi64>
// CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x4x4x4x1xf32>, memref<3x3x3xf32>)
// CHECK-SAME: outs(%{{.+}} : memref<1x2x2x2x1xf32>)
func @pooling_ndhwc_max(%input: memref<1x4x4x4x1xf32>, %fake: memref<3x3x3xf32>, %output: memref<1x2x2x2x1xf32>) {
linalg.pooling_ndhwc_max {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>}
ins(%input, %fake: memref<1x4x4x4x1xf32>, memref<3x3x3xf32>)
outs(%output: memref<1x2x2x2x1xf32>)
return
}
// -----
// CHECK-LABEL: func @pooling_ndhwc_min_tensor
// CHECK: %{{.+}} = linalg.pooling_ndhwc_min
// CHECK-SAME: dilations = dense<1> : tensor<3xi64>
// CHECK-SAME: strides = dense<1> : tensor<3xi64>
// CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>)
// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
func @pooling_ndhwc_min_tensor(%input: tensor<1x4x4x4x1xf32>) -> tensor<1x2x2x2x1xf32> {
%fake = linalg.init_tensor [3, 3, 3] : tensor<3x3x3xf32>
%init = linalg.init_tensor [1, 2, 2, 2, 1] : tensor<1x2x2x2x1xf32>
%cst = constant 0.000000e+00 : f32
%fill = linalg.fill(%cst, %init) : f32, tensor<1x2x2x2x1xf32> -> tensor<1x2x2x2x1xf32>
%res = linalg.pooling_ndhwc_min {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>}
ins(%input, %fake: tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>)
outs(%fill: tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
return %res : tensor<1x2x2x2x1xf32>
}
// -----
// CHECK-LABEL: func @pooling_ndhwc_min
// CHECK: linalg.pooling_ndhwc_min
// CHECK-SAME: dilations = dense<1> : tensor<3xi64>
// CHECK-SAME: strides = dense<1> : tensor<3xi64>
// CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x4x4x4x1xf32>, memref<3x3x3xf32>)
// CHECK-SAME: outs(%{{.+}} : memref<1x2x2x2x1xf32>)
func @pooling_ndhwc_min(%input: memref<1x4x4x4x1xf32>, %fake: memref<3x3x3xf32>, %output: memref<1x2x2x2x1xf32>) {
linalg.pooling_ndhwc_min {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>}
ins(%input, %fake: memref<1x4x4x4x1xf32>, memref<3x3x3xf32>)
outs(%output: memref<1x2x2x2x1xf32>)
return
}