Skip to content

[RISCV] AddEdge between mask producer and user of V0 #146855

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 19 additions & 1 deletion llvm/lib/Target/RISCV/RISCVVectorMaskDAGMutation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@
// instructions and masked instructions, so that we can reduce the live range
// overlaps of mask registers.
//
// If there are multiple masks producers followed by multiple masked
// instructions, then at each masked instructions add dependency edges between
// every producer and masked instruction.
//
// The reason why we need to do this:
// 1. When tracking register pressure, we don't track physical registers.
// 2. We have a RegisterClass for mask register (which is `VMV0`), but we don't
Expand Down Expand Up @@ -68,11 +72,25 @@ class RISCVVectorMaskDAGMutation : public ScheduleDAGMutation {

void apply(ScheduleDAGInstrs *DAG) override {
SUnit *NearestUseV0SU = nullptr;
SmallVector<SUnit *, 2> DefMask;
for (SUnit &SU : DAG->SUnits) {
const MachineInstr *MI = SU.getInstr();
if (MI->findRegisterUseOperand(RISCV::V0, TRI))
if (isSoleUseCopyToV0(SU))
DefMask.push_back(&SU);

if (MI->findRegisterUseOperand(RISCV::V0, TRI)) {
NearestUseV0SU = &SU;

// Copy may not be a real use, so skip it here.
if (DefMask.size() > 1 && !MI->isCopy())
for (SUnit *Def : DefMask)
if (DAG->canAddEdge(Def, &SU))
DAG->addEdge(Def, SDep(&SU, SDep::Artificial));

if (!DefMask.empty())
DefMask.erase(DefMask.begin());
}

if (NearestUseV0SU && NearestUseV0SU != &SU && isSoleUseCopyToV0(SU) &&
// For LMUL=8 cases, there will be more possibilities to spill.
// FIXME: We should use RegPressureTracker to do fine-grained
Expand Down
18 changes: 10 additions & 8 deletions llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll
Original file line number Diff line number Diff line change
Expand Up @@ -313,12 +313,12 @@ define i32 @test_nxv128i1(<vscale x 128 x i1> %x) {
; CHECK-NEXT: vslidedown.vx v0, v6, a0
; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v6, v7, a1
; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v7, a0
; CHECK-NEXT: vslidedown.vx v5, v6, a0
; CHECK-NEXT: vslidedown.vx v4, v7, a0
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vmv1r.v v0, v4
; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t
; CHECK-NEXT: vmv1r.v v0, v5
; CHECK-NEXT: vadd.vi v16, v16, 1, v0.t
Expand Down Expand Up @@ -425,13 +425,15 @@ define i32 @test_nxv256i1(<vscale x 256 x i1> %x) {
; CHECK-NEXT: vmerge.vim v16, v8, 1, v0
; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v5, a1
; CHECK-NEXT: vslidedown.vx v5, v7, a1
; CHECK-NEXT: vslidedown.vx v4, v6, a1
; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, mu
; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vmv1r.v v0, v4
; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v6, a1
; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, mu
; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t
; CHECK-NEXT: vmv1r.v v0, v5
; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This removes a vmv but adds two more vtype toggles. But it may not matter.

; CHECK-NEXT: vslidedown.vx v0, v7, a1
; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, mu
; CHECK-NEXT: vadd.vi v16, v16, 1, v0.t
; CHECK-NEXT: vadd.vv v8, v16, v8
; CHECK-NEXT: addi a2, sp, 16
Expand Down
38 changes: 18 additions & 20 deletions llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll
Original file line number Diff line number Diff line change
Expand Up @@ -139,21 +139,20 @@ define i1 @extractelt_nxv128i1(ptr %x, i64 %idx) nounwind {
; RV32-NEXT: slli a3, a3, 4
; RV32-NEXT: sub sp, sp, a3
; RV32-NEXT: andi sp, sp, -64
; RV32-NEXT: addi a3, sp, 64
; RV32-NEXT: vl8r.v v8, (a0)
; RV32-NEXT: slli a2, a2, 3
; RV32-NEXT: add a0, a0, a2
; RV32-NEXT: vl8r.v v24, (a0)
; RV32-NEXT: vl8r.v v16, (a0)
; RV32-NEXT: vsetvli a0, zero, e8, m8, ta, ma
; RV32-NEXT: vmseq.vi v0, v8, 0
; RV32-NEXT: vmv.v.i v16, 0
; RV32-NEXT: add a1, a3, a1
; RV32-NEXT: add a2, a3, a2
; RV32-NEXT: vmseq.vi v8, v24, 0
; RV32-NEXT: vmerge.vim v24, v16, 1, v0
; RV32-NEXT: vs8r.v v24, (a3)
; RV32-NEXT: vmv1r.v v0, v8
; RV32-NEXT: vmerge.vim v8, v16, 1, v0
; RV32-NEXT: vmv.v.i v8, 0
; RV32-NEXT: vmerge.vim v24, v8, 1, v0
; RV32-NEXT: vmseq.vi v0, v16, 0
; RV32-NEXT: addi a0, sp, 64
; RV32-NEXT: add a1, a0, a1
; RV32-NEXT: add a2, a0, a2
; RV32-NEXT: vs8r.v v24, (a0)
; RV32-NEXT: vmerge.vim v8, v8, 1, v0
; RV32-NEXT: vs8r.v v8, (a2)
; RV32-NEXT: lbu a0, 0(a1)
; RV32-NEXT: addi sp, s0, -80
Expand All @@ -179,21 +178,20 @@ define i1 @extractelt_nxv128i1(ptr %x, i64 %idx) nounwind {
; RV64-NEXT: slli a3, a3, 4
; RV64-NEXT: sub sp, sp, a3
; RV64-NEXT: andi sp, sp, -64
; RV64-NEXT: addi a3, sp, 64
; RV64-NEXT: vl8r.v v8, (a0)
; RV64-NEXT: slli a2, a2, 3
; RV64-NEXT: add a0, a0, a2
; RV64-NEXT: vl8r.v v24, (a0)
; RV64-NEXT: vl8r.v v16, (a0)
; RV64-NEXT: vsetvli a0, zero, e8, m8, ta, ma
; RV64-NEXT: vmseq.vi v0, v8, 0
; RV64-NEXT: vmv.v.i v16, 0
; RV64-NEXT: add a1, a3, a1
; RV64-NEXT: add a2, a3, a2
; RV64-NEXT: vmseq.vi v8, v24, 0
; RV64-NEXT: vmerge.vim v24, v16, 1, v0
; RV64-NEXT: vs8r.v v24, (a3)
; RV64-NEXT: vmv1r.v v0, v8
; RV64-NEXT: vmerge.vim v8, v16, 1, v0
; RV64-NEXT: vmv.v.i v8, 0
; RV64-NEXT: vmerge.vim v24, v8, 1, v0
; RV64-NEXT: vmseq.vi v0, v16, 0
; RV64-NEXT: addi a0, sp, 64
; RV64-NEXT: add a1, a0, a1
; RV64-NEXT: add a2, a0, a2
; RV64-NEXT: vs8r.v v24, (a0)
; RV64-NEXT: vmerge.vim v8, v8, 1, v0
; RV64-NEXT: vs8r.v v8, (a2)
; RV64-NEXT: lbu a0, 0(a1)
; RV64-NEXT: addi sp, s0, -80
Expand Down
108 changes: 52 additions & 56 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
Original file line number Diff line number Diff line change
Expand Up @@ -324,24 +324,23 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind {
; RV32-NEXT: sw s0, 376(sp) # 4-byte Folded Spill
; RV32-NEXT: addi s0, sp, 384
; RV32-NEXT: andi sp, sp, -128
; RV32-NEXT: zext.b a1, a1
; RV32-NEXT: mv a2, sp
; RV32-NEXT: li a3, 128
; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
; RV32-NEXT: li a2, 128
; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; RV32-NEXT: vle8.v v8, (a0)
; RV32-NEXT: addi a0, a0, 128
; RV32-NEXT: vle8.v v16, (a0)
; RV32-NEXT: add a1, a2, a1
; RV32-NEXT: vmseq.vi v0, v8, 0
; RV32-NEXT: vmv.v.i v24, 0
; RV32-NEXT: vmseq.vi v8, v16, 0
; RV32-NEXT: vmerge.vim v16, v24, 1, v0
; RV32-NEXT: vse8.v v16, (a2)
; RV32-NEXT: vmv1r.v v0, v8
; RV32-NEXT: vmerge.vim v8, v24, 1, v0
; RV32-NEXT: addi a0, sp, 128
; RV32-NEXT: vse8.v v8, (a0)
; RV32-NEXT: lbu a0, 0(a1)
; RV32-NEXT: vmv.v.i v8, 0
; RV32-NEXT: vmerge.vim v24, v8, 1, v0
; RV32-NEXT: vmseq.vi v0, v16, 0
; RV32-NEXT: zext.b a0, a1
; RV32-NEXT: mv a1, sp
; RV32-NEXT: add a0, a1, a0
; RV32-NEXT: vse8.v v24, (a1)
; RV32-NEXT: vmerge.vim v8, v8, 1, v0
; RV32-NEXT: addi a1, sp, 128
; RV32-NEXT: vse8.v v8, (a1)
; RV32-NEXT: lbu a0, 0(a0)
; RV32-NEXT: addi sp, s0, -384
; RV32-NEXT: lw ra, 380(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 376(sp) # 4-byte Folded Reload
Expand All @@ -355,24 +354,23 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind {
; RV64-NEXT: sd s0, 368(sp) # 8-byte Folded Spill
; RV64-NEXT: addi s0, sp, 384
; RV64-NEXT: andi sp, sp, -128
; RV64-NEXT: zext.b a1, a1
; RV64-NEXT: mv a2, sp
; RV64-NEXT: li a3, 128
; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
; RV64-NEXT: li a2, 128
; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; RV64-NEXT: vle8.v v8, (a0)
; RV64-NEXT: addi a0, a0, 128
; RV64-NEXT: vle8.v v16, (a0)
; RV64-NEXT: add a1, a2, a1
; RV64-NEXT: vmseq.vi v0, v8, 0
; RV64-NEXT: vmv.v.i v24, 0
; RV64-NEXT: vmseq.vi v8, v16, 0
; RV64-NEXT: vmerge.vim v16, v24, 1, v0
; RV64-NEXT: vse8.v v16, (a2)
; RV64-NEXT: vmv1r.v v0, v8
; RV64-NEXT: vmerge.vim v8, v24, 1, v0
; RV64-NEXT: addi a0, sp, 128
; RV64-NEXT: vse8.v v8, (a0)
; RV64-NEXT: lbu a0, 0(a1)
; RV64-NEXT: vmv.v.i v8, 0
; RV64-NEXT: vmerge.vim v24, v8, 1, v0
; RV64-NEXT: vmseq.vi v0, v16, 0
; RV64-NEXT: zext.b a0, a1
; RV64-NEXT: mv a1, sp
; RV64-NEXT: add a0, a1, a0
; RV64-NEXT: vse8.v v24, (a1)
; RV64-NEXT: vmerge.vim v8, v8, 1, v0
; RV64-NEXT: addi a1, sp, 128
; RV64-NEXT: vse8.v v8, (a1)
; RV64-NEXT: lbu a0, 0(a0)
; RV64-NEXT: addi sp, s0, -384
; RV64-NEXT: ld ra, 376(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s0, 368(sp) # 8-byte Folded Reload
Expand All @@ -386,24 +384,23 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind {
; RV32ZBS-NEXT: sw s0, 376(sp) # 4-byte Folded Spill
; RV32ZBS-NEXT: addi s0, sp, 384
; RV32ZBS-NEXT: andi sp, sp, -128
; RV32ZBS-NEXT: zext.b a1, a1
; RV32ZBS-NEXT: mv a2, sp
; RV32ZBS-NEXT: li a3, 128
; RV32ZBS-NEXT: vsetvli zero, a3, e8, m8, ta, ma
; RV32ZBS-NEXT: li a2, 128
; RV32ZBS-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; RV32ZBS-NEXT: vle8.v v8, (a0)
; RV32ZBS-NEXT: addi a0, a0, 128
; RV32ZBS-NEXT: vle8.v v16, (a0)
; RV32ZBS-NEXT: add a1, a2, a1
; RV32ZBS-NEXT: vmseq.vi v0, v8, 0
; RV32ZBS-NEXT: vmv.v.i v24, 0
; RV32ZBS-NEXT: vmseq.vi v8, v16, 0
; RV32ZBS-NEXT: vmerge.vim v16, v24, 1, v0
; RV32ZBS-NEXT: vse8.v v16, (a2)
; RV32ZBS-NEXT: vmv1r.v v0, v8
; RV32ZBS-NEXT: vmerge.vim v8, v24, 1, v0
; RV32ZBS-NEXT: addi a0, sp, 128
; RV32ZBS-NEXT: vse8.v v8, (a0)
; RV32ZBS-NEXT: lbu a0, 0(a1)
; RV32ZBS-NEXT: vmv.v.i v8, 0
; RV32ZBS-NEXT: vmerge.vim v24, v8, 1, v0
; RV32ZBS-NEXT: vmseq.vi v0, v16, 0
; RV32ZBS-NEXT: zext.b a0, a1
; RV32ZBS-NEXT: mv a1, sp
; RV32ZBS-NEXT: add a0, a1, a0
; RV32ZBS-NEXT: vse8.v v24, (a1)
; RV32ZBS-NEXT: vmerge.vim v8, v8, 1, v0
; RV32ZBS-NEXT: addi a1, sp, 128
; RV32ZBS-NEXT: vse8.v v8, (a1)
; RV32ZBS-NEXT: lbu a0, 0(a0)
; RV32ZBS-NEXT: addi sp, s0, -384
; RV32ZBS-NEXT: lw ra, 380(sp) # 4-byte Folded Reload
; RV32ZBS-NEXT: lw s0, 376(sp) # 4-byte Folded Reload
Expand All @@ -417,24 +414,23 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind {
; RV64ZBS-NEXT: sd s0, 368(sp) # 8-byte Folded Spill
; RV64ZBS-NEXT: addi s0, sp, 384
; RV64ZBS-NEXT: andi sp, sp, -128
; RV64ZBS-NEXT: zext.b a1, a1
; RV64ZBS-NEXT: mv a2, sp
; RV64ZBS-NEXT: li a3, 128
; RV64ZBS-NEXT: vsetvli zero, a3, e8, m8, ta, ma
; RV64ZBS-NEXT: li a2, 128
; RV64ZBS-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; RV64ZBS-NEXT: vle8.v v8, (a0)
; RV64ZBS-NEXT: addi a0, a0, 128
; RV64ZBS-NEXT: vle8.v v16, (a0)
; RV64ZBS-NEXT: add a1, a2, a1
; RV64ZBS-NEXT: vmseq.vi v0, v8, 0
; RV64ZBS-NEXT: vmv.v.i v24, 0
; RV64ZBS-NEXT: vmseq.vi v8, v16, 0
; RV64ZBS-NEXT: vmerge.vim v16, v24, 1, v0
; RV64ZBS-NEXT: vse8.v v16, (a2)
; RV64ZBS-NEXT: vmv1r.v v0, v8
; RV64ZBS-NEXT: vmerge.vim v8, v24, 1, v0
; RV64ZBS-NEXT: addi a0, sp, 128
; RV64ZBS-NEXT: vse8.v v8, (a0)
; RV64ZBS-NEXT: lbu a0, 0(a1)
; RV64ZBS-NEXT: vmv.v.i v8, 0
; RV64ZBS-NEXT: vmerge.vim v24, v8, 1, v0
; RV64ZBS-NEXT: vmseq.vi v0, v16, 0
; RV64ZBS-NEXT: zext.b a0, a1
; RV64ZBS-NEXT: mv a1, sp
; RV64ZBS-NEXT: add a0, a1, a0
; RV64ZBS-NEXT: vse8.v v24, (a1)
; RV64ZBS-NEXT: vmerge.vim v8, v8, 1, v0
; RV64ZBS-NEXT: addi a1, sp, 128
; RV64ZBS-NEXT: vse8.v v8, (a1)
; RV64ZBS-NEXT: lbu a0, 0(a0)
; RV64ZBS-NEXT: addi sp, s0, -384
; RV64ZBS-NEXT: ld ra, 376(sp) # 8-byte Folded Reload
; RV64ZBS-NEXT: ld s0, 368(sp) # 8-byte Folded Reload
Expand Down
26 changes: 12 additions & 14 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll
Original file line number Diff line number Diff line change
Expand Up @@ -143,16 +143,15 @@ define void @deinterleave6_0_i8(ptr %in, ptr %out) {
; CHECK-LABEL: deinterleave6_0_i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vle8.v v9, (a0)
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: vmv.v.i v0, 2
; CHECK-NEXT: vmv.v.i v8, 4
; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma
; CHECK-NEXT: vslidedown.vi v10, v9, 8
; CHECK-NEXT: vslidedown.vi v9, v8, 8
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vslidedown.vi v9, v9, 5, v0.t
; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vrgather.vi v9, v10, 4, v0.t
; CHECK-NEXT: vse8.v v9, (a1)
; CHECK-NEXT: vslidedown.vi v8, v8, 5, v0.t
; CHECK-NEXT: vmv.v.i v0, 4
; CHECK-NEXT: vrgather.vi v8, v9, 4, v0.t
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
entry:
%0 = load <16 x i8>, ptr %in, align 1
Expand Down Expand Up @@ -188,16 +187,15 @@ define void @deinterleave7_0_i8(ptr %in, ptr %out) {
; CHECK-LABEL: deinterleave7_0_i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vle8.v v9, (a0)
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: vmv.v.i v0, 2
; CHECK-NEXT: vmv.v.i v8, 4
; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma
; CHECK-NEXT: vslidedown.vi v10, v9, 8
; CHECK-NEXT: vslidedown.vi v9, v8, 8
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vslidedown.vi v9, v9, 6, v0.t
; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vrgather.vi v9, v10, 6, v0.t
; CHECK-NEXT: vse8.v v9, (a1)
; CHECK-NEXT: vslidedown.vi v8, v8, 6, v0.t
; CHECK-NEXT: vmv.v.i v0, 4
; CHECK-NEXT: vrgather.vi v8, v9, 6, v0.t
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
entry:
%0 = load <16 x i8>, ptr %in, align 1
Expand Down
Loading