From 0a751530a2546d51125b841e1ca3e2131b168ef1 Mon Sep 17 00:00:00 2001 From: Matt Devereau Date: Mon, 22 Apr 2024 13:28:44 +0000 Subject: [PATCH 1/4] [LoopUnswitch] Allow i1 truncs in loop unswitch With the addition of #84628, truncs to i1 are being emitted as conditions to branch instructions. This caused significant regressions in cases which were previously improved by loop unswitch. Adding truncs to i1 restore the previous performance seen. --- llvm/lib/Transforms/Utils/LoopUtils.cpp | 10 +- .../SimpleLoopUnswitch/endless-unswitch.ll | 93 +++++++++++++ .../SimpleLoopUnswitch/partial-unswitch.ll | 130 ++++++++++++++++++ 3 files changed, 232 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 73c5d63678229..e10c5dcbd218a 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -1930,7 +1930,15 @@ llvm::hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold, if (!TI || !TI->isConditional()) return {}; - auto *CondI = dyn_cast(TI->getCondition()); + Instruction *CondI = nullptr; + CondI = dyn_cast(TI->getCondition()); + + if (!CondI) { + CondI = dyn_cast(TI->getCondition()); + if (CondI && CondI->getType() != Type::getInt1Ty(TI->getContext())) { + return {}; + } + } // The case with the condition outside the loop should already be handled // earlier. if (!CondI || !L.contains(CondI)) diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/endless-unswitch.ll b/llvm/test/Transforms/SimpleLoopUnswitch/endless-unswitch.ll index 0d3aa8b243109..a5ad182ad0b3e 100644 --- a/llvm/test/Transforms/SimpleLoopUnswitch/endless-unswitch.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/endless-unswitch.ll @@ -106,3 +106,96 @@ for.inc: ; preds = %for.cond5 store i8 0, ptr @b, align 1 br label %for.cond5 } + +define void @e() { +; CHECK-LABEL: @e( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_COND:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[FOR_COND]] +; CHECK: for.end: +; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr null, align 2 +; CHECK-NEXT: [[TMP1:%.*]] = trunc i16 [[TMP0]] to i1 +; CHECK-NEXT: br i1 [[TMP1]], label [[FOR_END_SPLIT:%.*]], label [[FOR_END_SPLIT_US:%.*]] +; CHECK: for.end.split.us: +; CHECK-NEXT: br label [[G_US:%.*]] +; CHECK: g.us: +; CHECK-NEXT: br label [[G_SPLIT_US6:%.*]] +; CHECK: for.cond1.us1: +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr null, align 2 +; CHECK-NEXT: [[TOBOOL4_NOT_US:%.*]] = trunc i16 [[TMP2]] to i1 +; CHECK-NEXT: br i1 [[TOBOOL4_NOT_US]], label [[FOR_COND5_PREHEADER_US4:%.*]], label [[G_LOOPEXIT_US:%.*]] +; CHECK: for.cond5.us2: +; CHECK-NEXT: br i1 false, label [[FOR_COND1_LOOPEXIT_US5:%.*]], label [[FOR_INC_US3:%.*]] +; CHECK: for.inc.us3: +; CHECK-NEXT: store i8 0, ptr @b, align 1 +; CHECK-NEXT: br label [[FOR_COND5_US2:%.*]] +; CHECK: for.cond5.preheader.us4: +; CHECK-NEXT: br label [[FOR_COND5_US2]] +; CHECK: for.cond1.loopexit.us5: +; CHECK-NEXT: br label [[FOR_COND1_US1:%.*]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: g.loopexit.us: +; CHECK-NEXT: br label [[G_US]] +; CHECK: g.split.us6: +; CHECK-NEXT: br label [[FOR_COND1_US1]] +; CHECK: for.end.split: +; CHECK-NEXT: br label [[G:%.*]] +; CHECK: g.loopexit: +; CHECK-NEXT: br label [[G]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: g: +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr null, align 2 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i16 [[TMP3]] to i1 +; CHECK-NEXT: br i1 [[TMP4]], label [[G_SPLIT_US:%.*]], label [[G_SPLIT:%.*]] +; CHECK: g.split.us: +; CHECK-NEXT: br label [[FOR_COND1_US:%.*]] +; CHECK: for.cond1.us: +; CHECK-NEXT: br label [[FOR_COND5_PREHEADER_US:%.*]] +; CHECK: for.cond5.us: +; CHECK-NEXT: br i1 false, label [[FOR_COND1_LOOPEXIT_US:%.*]], label [[FOR_INC_US:%.*]] +; CHECK: for.inc.us: +; CHECK-NEXT: store i8 0, ptr @b, align 1 +; CHECK-NEXT: br label [[FOR_COND5_US:%.*]] +; CHECK: for.cond5.preheader.us: +; CHECK-NEXT: br label [[FOR_COND5_US]] +; CHECK: for.cond1.loopexit.us: +; CHECK-NEXT: br label [[FOR_COND1_US]] +; CHECK: g.split: +; CHECK-NEXT: br label [[FOR_COND1:%.*]] +; CHECK: for.cond1.loopexit: +; CHECK-NEXT: br label [[FOR_COND1]], !llvm.loop [[LOOP3]] +; CHECK: for.cond1: +; CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr null, align 2 +; CHECK-NEXT: [[TOBOOL4_NOT:%.*]] = trunc i16 [[TMP5]] to i1 +; CHECK-NEXT: br i1 [[TOBOOL4_NOT]], label [[FOR_COND5_PREHEADER:%.*]], label [[G_LOOPEXIT:%.*]] +; CHECK: for.cond5.preheader: +; CHECK-NEXT: br label [[FOR_COND5:%.*]] +; CHECK: for.cond5: +; CHECK-NEXT: br i1 false, label [[FOR_COND1_LOOPEXIT:%.*]], label [[FOR_INC:%.*]] +; CHECK: for.inc: +; CHECK-NEXT: store i8 0, ptr @b, align 1 +; CHECK-NEXT: br label [[FOR_COND5]] +; +entry: + br label %for.cond + +for.cond: ; preds = %for.cond, %entry + br i1 false, label %for.end, label %for.cond + +for.end: ; preds = %for.cond + br label %g + +g: ; preds = %for.cond1, %for.end + br label %for.cond1 + +for.cond1: ; preds = %for.cond5, %g + %0 = load i16, ptr null, align 2 + %tobool4.not = trunc i16 %0 to i1 + br i1 %tobool4.not, label %for.cond5, label %g + +for.cond5: ; preds = %for.inc, %for.cond1 + br i1 false, label %for.cond1, label %for.inc + +for.inc: ; preds = %for.cond5 + store i8 0, ptr @b, align 1 + br label %for.cond5 +} diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll b/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll index f97e5c3eec9d4..1d8942079ffd8 100644 --- a/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll @@ -1326,6 +1326,136 @@ exit: ret i32 10 } +define i32 @partial_unswitch_true_successor_trunc(ptr %ptr, i32 %N) { +; CHECK-LABEL: @partial_unswitch_true_successor_trunc( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[PTR:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i1 +; CHECK-NEXT: br i1 [[TMP1]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split.us: +; CHECK-NEXT: br label [[LOOP_HEADER_US:%.*]] +; CHECK: loop.header.us: +; CHECK-NEXT: [[IV_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[IV_NEXT_US:%.*]], [[LOOP_LATCH_US:%.*]] ] +; CHECK-NEXT: br label [[NOCLOBBER_US:%.*]] +; CHECK: noclobber.us: +; CHECK-NEXT: br label [[LOOP_LATCH_US]] +; CHECK: loop.latch.us: +; CHECK-NEXT: [[C_US:%.*]] = icmp ult i32 [[IV_US]], [[N:%.*]] +; CHECK-NEXT: [[IV_NEXT_US]] = add i32 [[IV_US]], 1 +; CHECK-NEXT: br i1 [[C_US]], label [[LOOP_HEADER_US]], label [[EXIT_SPLIT_US:%.*]] +; CHECK: exit.split.us: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[LV:%.*]] = load i32, ptr [[PTR]], align 4 +; CHECK-NEXT: [[SC:%.*]] = trunc i32 [[LV]] to i1 +; CHECK-NEXT: br i1 [[SC]], label [[NOCLOBBER:%.*]], label [[CLOBBER:%.*]] +; CHECK: noclobber: +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: clobber: +; CHECK-NEXT: call void @clobber() +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N]] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK: exit.split: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret i32 10 +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %lv = load i32, ptr %ptr + %sc = trunc i32 %lv to i1 + br i1 %sc, label %noclobber, label %clobber + +noclobber: + br label %loop.latch + +clobber: + call void @clobber() + br label %loop.latch + +loop.latch: + %c = icmp ult i32 %iv, %N + %iv.next = add i32 %iv, 1 + br i1 %c, label %loop.header, label %exit + +exit: + ret i32 10 +} + +define i32 @partial_unswitch_false_successor_trunc(ptr %ptr, i32 %N) { +; CHECK-LABEL: @partial_unswitch_false_successor_trunc( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[PTR:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i1 +; CHECK-NEXT: br i1 [[TMP1]], label [[ENTRY_SPLIT:%.*]], label [[ENTRY_SPLIT_US:%.*]] +; CHECK: entry.split.us: +; CHECK-NEXT: br label [[LOOP_HEADER_US:%.*]] +; CHECK: loop.header.us: +; CHECK-NEXT: [[IV_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[IV_NEXT_US:%.*]], [[LOOP_LATCH_US:%.*]] ] +; CHECK-NEXT: br label [[NOCLOBBER_US:%.*]] +; CHECK: noclobber.us: +; CHECK-NEXT: br label [[LOOP_LATCH_US]] +; CHECK: loop.latch.us: +; CHECK-NEXT: [[C_US:%.*]] = icmp ult i32 [[IV_US]], [[N:%.*]] +; CHECK-NEXT: [[IV_NEXT_US]] = add i32 [[IV_US]], 1 +; CHECK-NEXT: br i1 [[C_US]], label [[LOOP_HEADER_US]], label [[EXIT_SPLIT_US:%.*]] +; CHECK: exit.split.us: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[LV:%.*]] = load i32, ptr [[PTR]], align 4 +; CHECK-NEXT: [[SC:%.*]] = trunc i32 [[LV]] to i1 +; CHECK-NEXT: br i1 [[SC]], label [[CLOBBER:%.*]], label [[NOCLOBBER:%.*]] +; CHECK: clobber: +; CHECK-NEXT: call void @clobber() +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: noclobber: +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N]] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK: exit.split: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret i32 10 +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %lv = load i32, ptr %ptr + %sc = trunc i32 %lv to i1 + br i1 %sc, label %clobber, label %noclobber + +clobber: + call void @clobber() + br label %loop.latch + +noclobber: + br label %loop.latch + +loop.latch: + %c = icmp ult i32 %iv, %N + %iv.next = add i32 %iv, 1 + br i1 %c, label %loop.header, label %exit + +exit: + ret i32 10 +} + ; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[UNSWITCH_PARTIAL_DISABLE:![0-9]+]]} ; CHECK: [[UNSWITCH_PARTIAL_DISABLE]] = !{!"llvm.loop.unswitch.partial.disable"} ; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[UNSWITCH_PARTIAL_DISABLE]]} From 3763f4f2af1072bab25ff1d9c906e28271b3c8b1 Mon Sep 17 00:00:00 2001 From: Matt Devereau Date: Tue, 23 Apr 2024 12:58:52 +0000 Subject: [PATCH 2/4] Format assignment and if statement --- llvm/lib/Transforms/Utils/LoopUtils.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index e10c5dcbd218a..6799e3973fbf0 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -1930,15 +1930,13 @@ llvm::hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold, if (!TI || !TI->isConditional()) return {}; - Instruction *CondI = nullptr; - CondI = dyn_cast(TI->getCondition()); - - if (!CondI) { + Instruction *CondI = dyn_cast(TI->getCondition()); + if (!CondI){ CondI = dyn_cast(TI->getCondition()); - if (CondI && CondI->getType() != Type::getInt1Ty(TI->getContext())) { + if (CondI && CondI->getType() != Type::getInt1Ty(TI->getContext())) return {}; - } } + // The case with the condition outside the loop should already be handled // earlier. if (!CondI || !L.contains(CondI)) From efb55e2ae9444b4d8ea09833ae3ff2b51341ed91 Mon Sep 17 00:00:00 2001 From: Matt Devereau Date: Tue, 23 Apr 2024 14:20:11 +0000 Subject: [PATCH 3/4] Re-run clang-format, don't load from null --- llvm/lib/Transforms/Utils/LoopUtils.cpp | 2 +- .../SimpleLoopUnswitch/endless-unswitch.ll | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 6799e3973fbf0..59bee4ce0d929 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -1931,7 +1931,7 @@ llvm::hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold, return {}; Instruction *CondI = dyn_cast(TI->getCondition()); - if (!CondI){ + if (!CondI) { CondI = dyn_cast(TI->getCondition()); if (CondI && CondI->getType() != Type::getInt1Ty(TI->getContext())) return {}; diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/endless-unswitch.ll b/llvm/test/Transforms/SimpleLoopUnswitch/endless-unswitch.ll index a5ad182ad0b3e..e70bea2d2f7a3 100644 --- a/llvm/test/Transforms/SimpleLoopUnswitch/endless-unswitch.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/endless-unswitch.ll @@ -107,14 +107,14 @@ for.inc: ; preds = %for.cond5 br label %for.cond5 } -define void @e() { +define void @e(ptr %p) { ; CHECK-LABEL: @e( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: ; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[FOR_COND]] ; CHECK: for.end: -; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr null, align 2 +; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[P:%.*]], align 2 ; CHECK-NEXT: [[TMP1:%.*]] = trunc i16 [[TMP0]] to i1 ; CHECK-NEXT: br i1 [[TMP1]], label [[FOR_END_SPLIT:%.*]], label [[FOR_END_SPLIT_US:%.*]] ; CHECK: for.end.split.us: @@ -122,7 +122,7 @@ define void @e() { ; CHECK: g.us: ; CHECK-NEXT: br label [[G_SPLIT_US6:%.*]] ; CHECK: for.cond1.us1: -; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr null, align 2 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[P]], align 2 ; CHECK-NEXT: [[TOBOOL4_NOT_US:%.*]] = trunc i16 [[TMP2]] to i1 ; CHECK-NEXT: br i1 [[TOBOOL4_NOT_US]], label [[FOR_COND5_PREHEADER_US4:%.*]], label [[G_LOOPEXIT_US:%.*]] ; CHECK: for.cond5.us2: @@ -143,7 +143,7 @@ define void @e() { ; CHECK: g.loopexit: ; CHECK-NEXT: br label [[G]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: g: -; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr null, align 2 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr [[P]], align 2 ; CHECK-NEXT: [[TMP4:%.*]] = trunc i16 [[TMP3]] to i1 ; CHECK-NEXT: br i1 [[TMP4]], label [[G_SPLIT_US:%.*]], label [[G_SPLIT:%.*]] ; CHECK: g.split.us: @@ -164,7 +164,7 @@ define void @e() { ; CHECK: for.cond1.loopexit: ; CHECK-NEXT: br label [[FOR_COND1]], !llvm.loop [[LOOP3]] ; CHECK: for.cond1: -; CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr null, align 2 +; CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr [[P]], align 2 ; CHECK-NEXT: [[TOBOOL4_NOT:%.*]] = trunc i16 [[TMP5]] to i1 ; CHECK-NEXT: br i1 [[TOBOOL4_NOT]], label [[FOR_COND5_PREHEADER:%.*]], label [[G_LOOPEXIT:%.*]] ; CHECK: for.cond5.preheader: @@ -188,7 +188,7 @@ g: ; preds = %for.cond1, %for.end br label %for.cond1 for.cond1: ; preds = %for.cond5, %g - %0 = load i16, ptr null, align 2 + %0 = load i16, ptr %p, align 2 %tobool4.not = trunc i16 %0 to i1 br i1 %tobool4.not, label %for.cond5, label %g From ebe309b46a6cbacfab01fdad2a2e28404e6f08be Mon Sep 17 00:00:00 2001 From: Matt Devereau Date: Thu, 25 Apr 2024 10:39:58 +0000 Subject: [PATCH 4/4] Remove i1 check --- llvm/lib/Transforms/Utils/LoopUtils.cpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 59bee4ce0d929..e3e09d11ba8c8 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -1930,16 +1930,12 @@ llvm::hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold, if (!TI || !TI->isConditional()) return {}; - Instruction *CondI = dyn_cast(TI->getCondition()); - if (!CondI) { - CondI = dyn_cast(TI->getCondition()); - if (CondI && CondI->getType() != Type::getInt1Ty(TI->getContext())) - return {}; - } - + auto *CondI = dyn_cast(TI->getCondition()); // The case with the condition outside the loop should already be handled // earlier. - if (!CondI || !L.contains(CondI)) + // Allow CmpInst and TruncInsts as they may be users of load instructions + // and have potential for partial unswitching + if (!CondI || !isa(CondI) || !L.contains(CondI)) return {}; SmallVector InstToDuplicate;