mirror of
https://github.com/intel/llvm.git
synced 2026-02-05 04:46:27 +08:00
Exploit non-negative numerators
isl marks known non-negative numerators in modulo (and soon also division) operations. We now exploit this by generating unsigned operations. This is beneficial as unsigned operations with power-of-two denominators will be translated by isl to fast bitshift or bitwise and operations. llvm-svn: 238577
This commit is contained in:
@@ -295,8 +295,10 @@ Value *IslExprBuilder::createOpBin(__isl_take isl_ast_expr *Expr) {
|
||||
Res = Builder.CreateNSWMul(LHS, RHS);
|
||||
break;
|
||||
case isl_ast_op_div:
|
||||
Res = Builder.CreateSDiv(LHS, RHS, "pexp.div");
|
||||
break;
|
||||
case isl_ast_op_pdiv_q: // Dividend is non-negative
|
||||
Res = Builder.CreateSDiv(LHS, RHS);
|
||||
Res = Builder.CreateUDiv(LHS, RHS, "pexp.p_div_q");
|
||||
break;
|
||||
case isl_ast_op_fdiv_q: { // Round towards -infty
|
||||
// TODO: Review code and check that this calculation does not yield
|
||||
@@ -305,16 +307,20 @@ Value *IslExprBuilder::createOpBin(__isl_take isl_ast_expr *Expr) {
|
||||
// floord(n,d) ((n < 0) ? (n - d + 1) : n) / d
|
||||
Value *One = ConstantInt::get(MaxType, 1);
|
||||
Value *Zero = ConstantInt::get(MaxType, 0);
|
||||
Value *Sum1 = Builder.CreateSub(LHS, RHS);
|
||||
Value *Sum2 = Builder.CreateAdd(Sum1, One);
|
||||
Value *isNegative = Builder.CreateICmpSLT(LHS, Zero);
|
||||
Value *Dividend = Builder.CreateSelect(isNegative, Sum2, LHS);
|
||||
Res = Builder.CreateSDiv(Dividend, RHS);
|
||||
Value *Sum1 = Builder.CreateSub(LHS, RHS, "pexp.fdiv_q.0");
|
||||
Value *Sum2 = Builder.CreateAdd(Sum1, One, "pexp.fdiv_q.1");
|
||||
Value *isNegative = Builder.CreateICmpSLT(LHS, Zero, "pexp.fdiv_q.2");
|
||||
Value *Dividend =
|
||||
Builder.CreateSelect(isNegative, Sum2, LHS, "pexp.fdiv_q.3");
|
||||
Res = Builder.CreateSDiv(Dividend, RHS, "pexp.fdiv_q.4");
|
||||
break;
|
||||
}
|
||||
case isl_ast_op_pdiv_r: // Dividend is non-negative
|
||||
Res = Builder.CreateURem(LHS, RHS, "pexp.pdiv_r");
|
||||
break;
|
||||
|
||||
case isl_ast_op_zdiv_r: // Result only compared against zero
|
||||
Res = Builder.CreateSRem(LHS, RHS);
|
||||
Res = Builder.CreateURem(LHS, RHS, "pexp.zdiv_r");
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
78
polly/test/Isl/CodeGen/exprModDiv.ll
Normal file
78
polly/test/Isl/CodeGen/exprModDiv.ll
Normal file
@@ -0,0 +1,78 @@
|
||||
; RUN: opt %loadPolly -polly-import-jscop -polly-import-jscop-dir=%S -polly-codegen -S < %s | FileCheck %s
|
||||
;
|
||||
; void exprModDiv(float *A, float *B, float *C, long N, long p) {
|
||||
; for (long i = 0; i < N; i++)
|
||||
; C[i] += A[i] + B[i] + A[p] + B[p];
|
||||
; }
|
||||
;
|
||||
;
|
||||
; This test case changes the access functions such that the resulting index
|
||||
; expressions are modulo or division operations. We test that the code we
|
||||
; generate takes advantage of knowledge about unsigned numerators. This is
|
||||
; useful as LLVM will translate urem and udiv operations with power-of-two
|
||||
; denominators to fast bitwise and or shift operations.
|
||||
|
||||
; A[i % 128]
|
||||
; CHECK: %pexp.pdiv_r = urem i64 %polly.indvar, 128
|
||||
; CHECK: %polly.access.A6 = getelementptr float, float* %A, i64 %pexp.pdiv_r
|
||||
|
||||
; A[i / 128]
|
||||
; CHECK: %pexp.div = sdiv i64 %polly.indvar, 128
|
||||
; CHECK: %polly.access.B8 = getelementptr float, float* %B, i64 %pexp.div
|
||||
;
|
||||
; FIXME: Make isl mark this as an udiv expression.
|
||||
|
||||
; #define floord(n,d) ((n < 0) ? (n - d + 1) : n) / d
|
||||
; A[p + 128 * floord(-p - 1, 128) + 128]
|
||||
; CHECK: %20 = sub nsw i64 0, %p
|
||||
; CHECK: %21 = sub nsw i64 %20, 1
|
||||
; CHECK: %pexp.fdiv_q.0 = sub i64 %21, 128
|
||||
; CHECK: %pexp.fdiv_q.1 = add i64 %pexp.fdiv_q.0, 1
|
||||
; CHECK: %pexp.fdiv_q.2 = icmp slt i64 %21, 0
|
||||
; CHECK: %pexp.fdiv_q.3 = select i1 %pexp.fdiv_q.2, i64 %pexp.fdiv_q.1, i64 %21
|
||||
; CHECK: %pexp.fdiv_q.4 = sdiv i64 %pexp.fdiv_q.3, 128
|
||||
; CHECK: %22 = mul nsw i64 128, %pexp.fdiv_q.4
|
||||
; CHECK: %23 = add nsw i64 %p, %22
|
||||
; CHECK: %24 = add nsw i64 %23, 128
|
||||
; CHECK: %polly.access.A10 = getelementptr float, float* %A, i64 %24
|
||||
|
||||
; A[p / 128]
|
||||
; CHECK: %pexp.div12 = sdiv i64 %p, 128
|
||||
; CHECK: %polly.access.B13 = getelementptr float, float* %B, i64 %pexp.div12
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
define void @exprModDiv(float* %A, float* %B, float* %C, i64 %N, i64 %p) {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.inc, %entry
|
||||
%i.0 = phi i64 [ 0, %entry ], [ %inc, %for.inc ]
|
||||
%cmp = icmp slt i64 %i.0, %N
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
%arrayidx = getelementptr inbounds float, float* %A, i64 %i.0
|
||||
%tmp = load float, float* %arrayidx, align 4
|
||||
%arrayidx1 = getelementptr inbounds float, float* %B, i64 %i.0
|
||||
%tmp1 = load float, float* %arrayidx1, align 4
|
||||
%add = fadd float %tmp, %tmp1
|
||||
%arrayidx2 = getelementptr inbounds float, float* %A, i64 %p
|
||||
%tmp2 = load float, float* %arrayidx2, align 4
|
||||
%add3 = fadd float %add, %tmp2
|
||||
%arrayidx4 = getelementptr inbounds float, float* %B, i64 %p
|
||||
%tmp3 = load float, float* %arrayidx4, align 4
|
||||
%add5 = fadd float %add3, %tmp3
|
||||
%arrayidx6 = getelementptr inbounds float, float* %C, i64 %i.0
|
||||
%tmp4 = load float, float* %arrayidx6, align 4
|
||||
%add7 = fadd float %tmp4, %add5
|
||||
store float %add7, float* %arrayidx6, align 4
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %for.body
|
||||
%inc = add nuw nsw i64 %i.0, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end: ; preds = %for.cond
|
||||
ret void
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
{
|
||||
"context" : "[N, p] -> { : N >= -9223372036854775808 and N <= 9223372036854775807 and p >= -9223372036854775808 and p <= 9223372036854775807 }",
|
||||
"name" : "for.cond => for.end",
|
||||
"statements" : [
|
||||
{
|
||||
"accesses" : [
|
||||
{
|
||||
"kind" : "read",
|
||||
"relation" : "[N, p] -> { Stmt_for_body[i0] -> MemRef_A[i0 % 128] }"
|
||||
},
|
||||
{
|
||||
"kind" : "read",
|
||||
"relation" : "[N, p] -> { Stmt_for_body[i0] -> MemRef_B[i0 / 128] }"
|
||||
},
|
||||
{
|
||||
"kind" : "read",
|
||||
"relation" : "[N, p] -> { Stmt_for_body[i0] -> MemRef_A[p % 128] }"
|
||||
},
|
||||
{
|
||||
"kind" : "read",
|
||||
"relation" : "[N, p] -> { Stmt_for_body[i0] -> MemRef_B[p / 128] }"
|
||||
},
|
||||
{
|
||||
"kind" : "read",
|
||||
"relation" : "[N, p] -> { Stmt_for_body[i0] -> MemRef_C[i0] }"
|
||||
},
|
||||
{
|
||||
"kind" : "write",
|
||||
"relation" : "[N, p] -> { Stmt_for_body[i0] -> MemRef_C[i0] }"
|
||||
}
|
||||
],
|
||||
"domain" : "[N, p] -> { Stmt_for_body[i0] : i0 >= 0 and N >= 1 and i0 <= -1 + N }",
|
||||
"name" : "Stmt_for_body",
|
||||
"schedule" : "[N, p] -> { Stmt_for_body[i0] -> [i0] }"
|
||||
}
|
||||
]
|
||||
}
|
||||
Reference in New Issue
Block a user