Skip to content

Commit 8608ce7

Browse files
committed
Use ptradd_commutable PatFrags to make ptradd commutable in some DAG patterns
1 parent ee55fac commit 8608ce7

File tree

1 file changed

+21
-16
lines changed

1 file changed

+21
-16
lines changed

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,12 @@
99
def BITOP3_32 : ComplexPattern<i32, 4, "SelectBITOP3", [and, or, xor]>;
1010
def BITOP3_16 : ComplexPattern<i16, 4, "SelectBITOP3", [and, or, xor]>;
1111

12+
// Matches PTRADD as a commutative operation. Patterns using this PatFrag must
13+
// set GISelShouldIgnore = 1 as commuting the corresponding G_PTR_ADD is
14+
// invalid.
15+
def ptradd_commutative : PatFrags<(ops node:$src0, node:$src1),
16+
[(ptradd node:$src0, node:$src1), (ptradd node:$src1, node:$src0)]>;
17+
1218
// Special case for v_div_fmas_{f32|f64}, since it seems to be the
1319
// only VOP instruction that implicitly reads VCC.
1420
let Asm64 = " $vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$omod" in {
@@ -554,13 +560,12 @@ let OtherPredicates = [isGFX10Plus, Has16BitInsts], True16Predicate = NotHasTrue
554560
defm: Ternary_i16_Pats_gfx9<mul, add, V_MAD_U16_gfx9_e64>;
555561
} // End OtherPredicates = [isGFX10Plus, Has16BitInsts], True16Predicate = NotHasTrue16BitInsts
556562

557-
class ThreeOpFragSDAG<SDPatternOperator op1, SDPatternOperator op2, bit op1IsRight = 0> : PatFrag<
563+
class ThreeOpFragSDAG<SDPatternOperator op1, SDPatternOperator op2> : PatFrag<
558564
(ops node:$x, node:$y, node:$z),
559565
// When the inner operation is used multiple times, selecting 3-op
560566
// instructions may still be beneficial -- if the other users can be
561567
// combined similarly. Let's be conservative for now.
562-
!if(op1IsRight, (op2 node:$z, (HasOneUseBinOp<op1> node:$x, node:$y)),
563-
(op2 (HasOneUseBinOp<op1> node:$x, node:$y), node:$z)),
568+
(op2 (HasOneUseBinOp<op1> node:$x, node:$y), node:$z),
564569
[{
565570
// Only use VALU ops when the result is divergent.
566571
if (!N->isDivergent())
@@ -587,10 +592,7 @@ class ThreeOpFragSDAG<SDPatternOperator op1, SDPatternOperator op2, bit op1IsRig
587592
let PredicateCodeUsesOperands = 1;
588593
}
589594

590-
// Matches (op2 (op1 x, y), z) if op1IsRight = 0 and
591-
// matches (op2 z, (op1, x, y)) if op1IsRight = 1.
592-
class ThreeOpFrag<SDPatternOperator op1, SDPatternOperator op2,
593-
bit op1IsRight = 0> : ThreeOpFragSDAG<op1, op2, op1IsRight> {
595+
class ThreeOpFrag<SDPatternOperator op1, SDPatternOperator op2> : ThreeOpFragSDAG<op1, op2> {
594596
// The divergence predicate is irrelevant in GlobalISel, as we have
595597
// proper register bank checks. We just need to verify the constant
596598
// bus restriction when all the sources are considered.
@@ -949,10 +951,11 @@ def : GCNPat<
949951
>;
950952

951953
def : GCNPat <
952-
// (ptradd z, (shl x, y)) -> ((x << y) + z)
953-
(ThreeOpFrag<shl_0_to_4, ptradd, /*op1IsRight=*/1> i64:$src0, i32:$src1, i64:$src2),
954-
(V_LSHL_ADD_U64_e64 VSrc_b64:$src0, VSrc_b32:$src1, VSrc_b64:$src2)
955-
>;
954+
// (ptradd z, (shl x, y)) or (ptradd (shl x, y), z) -> ((x << y) + z)
955+
(ThreeOpFrag<shl_0_to_4, ptradd_commutative> i64:$src0, i32:$src1, i64:$src2),
956+
(V_LSHL_ADD_U64_e64 VSrc_b64:$src0, VSrc_b32:$src1, VSrc_b64:$src2)> {
957+
let GISelShouldIgnore = 1;
958+
}
956959
} // End SubtargetPredicate = HasLshlAddU64Inst
957960

958961
let SubtargetPredicate = HasAddMinMaxInsts in {
@@ -1030,14 +1033,16 @@ multiclass IMAD32_Pats <VOP3_Pseudo inst> {
10301033

10311034
// Handle cases where amdgpu-codegenprepare-mul24 made a mul24 instead of a normal mul.
10321035
// We need to separate this because otherwise OtherPredicates would be overriden.
1033-
class IMAD32_Mul24_Pats_Impl<VOP3_Pseudo inst, SDPatternOperator AddOp, bit mulIsRight = 0> : GCNPat <
1034-
!if(mulIsRight, (i64 (AddOp i64:$src2, (i64 (AMDGPUmul_u24 i32:$src0, i32:$src1)))),
1035-
(i64 (AddOp (i64 (AMDGPUmul_u24 i32:$src0, i32:$src1)), i64:$src2))),
1036-
(inst $src0, $src1, $src2, 0 /* clamp */)>;
1036+
class IMAD32_Mul24_Pats_Impl<VOP3_Pseudo inst, SDPatternOperator AddOp> : GCNPat <
1037+
(i64 (AddOp (i64 (AMDGPUmul_u24 i32:$src0, i32:$src1)), i64:$src2)),
1038+
(inst $src0, $src1, $src2, 0 /* clamp */)
1039+
>;
10371040

10381041
multiclass IMAD32_Mul24_Pats<VOP3_Pseudo inst> {
10391042
def : IMAD32_Mul24_Pats_Impl<inst, add>;
1040-
def : IMAD32_Mul24_Pats_Impl<inst, ptradd, /*mulIsRight=*/1>;
1043+
def : IMAD32_Mul24_Pats_Impl<inst, ptradd_commutative> {
1044+
let GISelShouldIgnore = 1;
1045+
}
10411046
}
10421047

10431048
// exclude pre-GFX9 where it was slow

0 commit comments

Comments
 (0)