Skip to content

Revert "[AMDGPU][MC] Allow op_sel in v_alignbit_b32 etc in GFX9 and GFX10 (#142188) #149138

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 16, 2025

Conversation

jwanggit86
Copy link
Contributor

This reverts commit ce7851f.

@llvmbot
Copy link
Member

llvmbot commented Jul 16, 2025

@llvm/pr-subscribers-mc

@llvm/pr-subscribers-backend-amdgpu

Author: Jun Wang (jwanggit86)

Changes

This reverts commit ce7851f.


Patch is 24.63 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/149138.diff

11 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/SIInstructions.td (+5-42)
  • (modified) llvm/lib/Target/AMDGPU/VOP3Instructions.td (+2-27)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-bswap.mir (-19)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fshr.mir (+2-20)
  • (modified) llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll (+3-3)
  • (modified) llvm/test/MC/AMDGPU/gfx10_asm_vop3.s (-24)
  • (modified) llvm/test/MC/AMDGPU/gfx7_err_pos.s (-13)
  • (modified) llvm/test/MC/AMDGPU/gfx8_err_pos.s (-10)
  • (modified) llvm/test/MC/AMDGPU/gfx9_asm_vop3_e64.s (-24)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt (-24)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt (-24)
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index d48eb52d2faae..2a6fcadd4c49c 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2473,7 +2473,6 @@ def : AMDGPUPat <
 >;
 
 let True16Predicate = NotHasTrue16BitInsts in {
-let SubtargetPredicate = isNotGFX9Plus in {
 def : ROTRPattern <V_ALIGNBIT_B32_e64>;
 
 def : GCNPat<(i32 (trunc (srl i64:$src0, (and i32:$src1, (i32 31))))),
@@ -2483,35 +2482,6 @@ def : GCNPat<(i32 (trunc (srl i64:$src0, (and i32:$src1, (i32 31))))),
 def : GCNPat<(i32 (trunc (srl i64:$src0, (i32 ShiftAmt32Imm:$src1)))),
           (V_ALIGNBIT_B32_e64 (i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
                           (i32 (EXTRACT_SUBREG (i64 $src0), sub0)), $src1)>;
-} // isNotGFX9Plus
-
-let SubtargetPredicate = isGFX9GFX10 in {
-def : GCNPat <
-        (rotr i32:$src0, i32:$src1),
-        (V_ALIGNBIT_B32_opsel_e64 /* src0_modifiers */ 0, $src0,
-                                  /* src1_modifiers */ 0, $src0,
-                                  /* src2_modifiers */ 0,
-                                  $src1, /* clamp */ 0, /* op_sel */ 0)
->;
-
-foreach pat = [(i32 (trunc (srl i64:$src0, (and i32:$src1, (i32 31))))),
-               (i32 (trunc (srl i64:$src0, (i32 ShiftAmt32Imm:$src1))))] in
-def : GCNPat<pat,
-        (V_ALIGNBIT_B32_opsel_e64 0, /* src0_modifiers */
-                                  (i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
-                                  0, /* src1_modifiers */
-                                  (i32 (EXTRACT_SUBREG (i64 $src0), sub0)),
-                                  0, /* src2_modifiers */
-                                  $src1, /* clamp */ 0, /* op_sel */ 0)
->;
-
-def : GCNPat<(fshr i32:$src0, i32:$src1, i32:$src2),
-        (V_ALIGNBIT_B32_opsel_e64 /* src0_modifiers */ 0, $src0,
-                                  /* src1_modifiers */ 0, $src1,
-                                  /* src2_modifiers */ 0,
-                                  $src2, /* clamp */ 0, /* op_sel */ 0)
->;
-} // isGFX9GFX10
 } // end True16Predicate = NotHasTrue16BitInsts
 
 let True16Predicate = UseRealTrue16Insts in {
@@ -3112,8 +3082,6 @@ def : GCNPat <
                     (i32 (EXTRACT_SUBREG $a, sub0))), (i32 1))
 >;
 
-// This pattern for bswap is used for pre-GFX8. For GFX8+, bswap is mapped
-// to V_PERM_B32.
 let True16Predicate = NotHasTrue16BitInsts in
 def : GCNPat <
   (i32 (bswap i32:$a)),
@@ -3589,20 +3557,15 @@ def : GCNPat <
 
 // Take the upper 16 bits from V[0] and the lower 16 bits from V[1]
 // Special case, can use V_ALIGNBIT (always uses encoded literal)
-let True16Predicate = NotHasTrue16BitInsts in {
-defvar BuildVectorToAlignBitPat =
+let True16Predicate = NotHasTrue16BitInsts in
+def : GCNPat <
   (vecTy (DivergentBinFrag<build_vector>
     (Ty !if(!eq(Ty, i16),
       (Ty (trunc (srl VGPR_32:$a, (i32 16)))),
       (Ty (bitconvert (i16 (trunc (srl VGPR_32:$a, (i32 16)))))))),
-    (Ty VGPR_32:$b)));
-
-let SubtargetPredicate = isNotGFX9Plus in
-def : GCNPat<BuildVectorToAlignBitPat, (V_ALIGNBIT_B32_e64 VGPR_32:$b, VGPR_32:$a, (i32 16))>;
-
-let SubtargetPredicate = isGFX9GFX10 in
-def : GCNPat<BuildVectorToAlignBitPat, (V_ALIGNBIT_B32_opsel_e64 0, VGPR_32:$b, 0, VGPR_32:$a, 0, (i32 16), 0, 0)>;
-} //True16Predicate = NotHasTrue16BitInsts
+    (Ty VGPR_32:$b))),
+    (V_ALIGNBIT_B32_e64 VGPR_32:$b, VGPR_32:$a, (i32 16))
+>;
 
 let True16Predicate = UseFakeTrue16Insts in
 def : GCNPat <
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 75c531913ded1..2e7f25b67fb63 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -224,12 +224,6 @@ defm V_ALIGNBIT_B32 : VOP3Inst_t16_with_profiles <"v_alignbit_b32",
                                                    fshr, null_frag>;
 
 defm V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_alignbyte>;
-
-// In gfx9 and 10, opsel is allowed for V_ALIGNBIT_B32 and V_ALIGNBYTE_B32.
-// Hardware uses opsel[1:0] to byte-select src2. Other opsel bits are ignored.
-defm V_ALIGNBIT_B32_opsel : VOP3Inst <"v_alignbit_b32_opsel", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_OPSEL>>;
-defm V_ALIGNBYTE_B32_opsel : VOP3Inst <"v_alignbyte_b32_opsel", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_OPSEL>>;
-
 let True16Predicate = UseRealTrue16Insts in
 defm V_ALIGNBYTE_B32_t16 : VOP3Inst <"v_alignbyte_b32_t16", VOP3_Profile_True16<VOP_I32_I32_I32_I16, VOP3_OPSEL>>;
 let True16Predicate = UseFakeTrue16Insts in
@@ -1960,9 +1954,6 @@ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
   }
 } // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10"
 
-defm V_ALIGNBIT_B32_opsel  : VOP3OpSel_Real_gfx10_with_name<0x14e, "V_ALIGNBIT_B32_opsel", "v_alignbit_b32">;
-defm V_ALIGNBYTE_B32_opsel  : VOP3OpSel_Real_gfx10_with_name<0x14f, "V_ALIGNBYTE_B32_opsel", "v_alignbyte_b32">;
-
 defm V_READLANE_B32  : VOP3_Real_No_Suffix_gfx10<0x360>;
 
 let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) in {
@@ -2113,8 +2104,8 @@ defm V_BFI_B32         : VOP3_Real_gfx6_gfx7_gfx10<0x14a>;
 defm V_FMA_F32         : VOP3_Real_gfx6_gfx7_gfx10<0x14b>;
 defm V_FMA_F64         : VOP3_Real_gfx6_gfx7_gfx10<0x14c>;
 defm V_LERP_U8         : VOP3_Real_gfx6_gfx7_gfx10<0x14d>;
-defm V_ALIGNBIT_B32    : VOP3_Real_gfx6_gfx7<0x14e>;
-defm V_ALIGNBYTE_B32   : VOP3_Real_gfx6_gfx7<0x14f>;
+defm V_ALIGNBIT_B32    : VOP3_Real_gfx6_gfx7_gfx10<0x14e>;
+defm V_ALIGNBYTE_B32   : VOP3_Real_gfx6_gfx7_gfx10<0x14f>;
 defm V_MULLIT_F32      : VOP3_Real_gfx6_gfx7_gfx10<0x150>;
 defm V_MIN3_F32        : VOP3_Real_gfx6_gfx7_gfx10<0x151>;
 defm V_MIN3_I32        : VOP3_Real_gfx6_gfx7_gfx10<0x152>;
@@ -2257,17 +2248,6 @@ multiclass VOP3_Real_BITOP3_gfx9<bits<10> op, string AsmName, bit isSingle = 0>
   }
 }
 
-// Instructions such as v_alignbyte_b32 allows op_sel in gfx9, but not in vi.
-// The following is created to support that.
-multiclass VOP3OpSel_Real_gfx9_with_name<bits<10> op, string opName, string AsmName> {
-  defvar psName = opName#"_e64";
-  def _gfx9 : VOP3_Real<!cast<VOP3_Pseudo>(psName), SIEncodingFamily.VI>, // note: encoding family is VI
-            VOP3OpSel_gfx9 <op, !cast<VOP3_Pseudo>(psName).Pfl> {
-              VOP3_Pseudo ps = !cast<VOP3_Pseudo>(psName);
-              let AsmString = AsmName # ps.AsmOperands;
-            }
-}
-
 } // End AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9"
 
 defm V_MAD_U64_U32      : VOP3be_Real_vi <0x1E8>;
@@ -2287,10 +2267,8 @@ defm V_BFI_B32          : VOP3_Real_vi <0x1ca>;
 defm V_FMA_F32          : VOP3_Real_vi <0x1cb>;
 defm V_FMA_F64          : VOP3_Real_vi <0x1cc>;
 defm V_LERP_U8          : VOP3_Real_vi <0x1cd>;
-let SubtargetPredicate = isGFX8Only in {
 defm V_ALIGNBIT_B32     : VOP3_Real_vi <0x1ce>;
 defm V_ALIGNBYTE_B32    : VOP3_Real_vi <0x1cf>;
-}
 defm V_MIN3_F32         : VOP3_Real_vi <0x1d0>;
 defm V_MIN3_I32         : VOP3_Real_vi <0x1d1>;
 defm V_MIN3_U32         : VOP3_Real_vi <0x1d2>;
@@ -2335,9 +2313,6 @@ defm V_INTERP_P2_LEGACY_F16 : VOP3Interp_F16_Real_gfx9 <0x276, "V_INTERP_P2_F16"
 defm V_MAD_LEGACY_U16       : VOP3_F16_Real_gfx9 <0x1eb, "V_MAD_U16",       "v_mad_legacy_u16">;
 defm V_MAD_LEGACY_I16       : VOP3_F16_Real_gfx9 <0x1ec, "V_MAD_I16",       "v_mad_legacy_i16">;
 
-defm V_ALIGNBIT_B32_opsel   : VOP3OpSel_Real_gfx9_with_name <0x1ce, "V_ALIGNBIT_B32_opsel", "v_alignbit_b32">;
-defm V_ALIGNBYTE_B32_opsel  : VOP3OpSel_Real_gfx9_with_name <0x1cf, "V_ALIGNBYTE_B32_opsel", "v_alignbyte_b32">;
-
 defm V_MAD_F16_gfx9         : VOP3OpSel_F16_Real_gfx9 <0x203, "v_mad_f16">;
 defm V_MAD_U16_gfx9         : VOP3OpSel_F16_Real_gfx9 <0x204, "v_mad_u16">;
 defm V_MAD_I16_gfx9         : VOP3OpSel_F16_Real_gfx9 <0x205, "v_mad_i16">;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-bswap.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-bswap.mir
index dde566d9643d8..5b8c2840b0156 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-bswap.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-bswap.mir
@@ -1,8 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -o - %s  | FileCheck -check-prefix=GFX7 %s
 # RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s  | FileCheck -check-prefix=GFX8 %s
-# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s  | FileCheck -check-prefix=GFX9 %s
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s  | FileCheck -check-prefix=GFX10 %s
 
 ---
 name: bswap_i32_vv
@@ -21,7 +19,6 @@ body: |
     ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16711935
     ; GFX7-NEXT: [[V_BFI_B32_e64_:%[0-9]+]]:vgpr_32 = V_BFI_B32_e64 [[S_MOV_B32_]], [[V_ALIGNBIT_B32_e64_1]], [[V_ALIGNBIT_B32_e64_]], implicit $exec
     ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_BFI_B32_e64_]]
-    ;
     ; GFX8-LABEL: name: bswap_i32_vv
     ; GFX8: liveins: $vgpr0
     ; GFX8-NEXT: {{  $}}
@@ -29,22 +26,6 @@ body: |
     ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 66051
     ; GFX8-NEXT: [[V_PERM_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERM_B32_e64 0, [[COPY]], [[S_MOV_B32_]], implicit $exec
     ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_PERM_B32_e64_]]
-    ;
-    ; GFX9-LABEL: name: bswap_i32_vv
-    ; GFX9: liveins: $vgpr0
-    ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 66051
-    ; GFX9-NEXT: [[V_PERM_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERM_B32_e64 0, [[COPY]], [[S_MOV_B32_]], implicit $exec
-    ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PERM_B32_e64_]]
-    ;
-    ; GFX10-LABEL: name: bswap_i32_vv
-    ; GFX10: liveins: $vgpr0
-    ; GFX10-NEXT: {{  $}}
-    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 66051
-    ; GFX10-NEXT: [[V_PERM_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERM_B32_e64 0, [[COPY]], [[S_MOV_B32_]], implicit $exec
-    ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_PERM_B32_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = G_BSWAP %0
     S_ENDPGM 0, implicit %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fshr.mir
index fa95f33909b76..0a4cb3ccf2957 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fshr.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fshr.mir
@@ -1,8 +1,8 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
 # RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
-# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefixes=GFX11 %s
 
 ---
@@ -24,24 +24,6 @@ body: |
     ; GCN-NEXT: [[V_ALIGNBIT_B32_e64_:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec
     ; GCN-NEXT: S_ENDPGM 0, implicit [[V_ALIGNBIT_B32_e64_]]
     ;
-    ; GFX9-LABEL: name: fshr_s32
-    ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
-    ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-    ; GFX9-NEXT: [[V_ALIGNBIT_B32_opsel_e64_:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_opsel_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
-    ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ALIGNBIT_B32_opsel_e64_]]
-    ;
-    ; GFX10-LABEL: name: fshr_s32
-    ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2
-    ; GFX10-NEXT: {{  $}}
-    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-    ; GFX10-NEXT: [[V_ALIGNBIT_B32_opsel_e64_:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_opsel_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
-    ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ALIGNBIT_B32_opsel_e64_]]
-    ;
     ; GFX11-LABEL: name: fshr_s32
     ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2
     ; GFX11-NEXT: {{  $}}
diff --git a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
index 92c63fead15ac..ae90cfb631e8d 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
@@ -766,10 +766,10 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr10 = COPY renamable $sgpr22, implicit $exec
   ; GFX90A-NEXT:   renamable $vgpr12_vgpr13 = DS_READ_B64_gfx9 killed renamable $vgpr10, 0, 0, implicit $exec :: (load (s64) from %ir.8, addrspace 3)
   ; GFX90A-NEXT:   renamable $vgpr10 = COPY renamable $sgpr46, implicit $exec
-  ; GFX90A-NEXT:   renamable $vgpr11 = V_ALIGNBIT_B32_opsel_e64 0, killed $sgpr47, 0, killed $vgpr10, 0, 1, 0, 0, implicit $exec
-  ; GFX90A-NEXT:   renamable $vgpr52 = V_ALIGNBIT_B32_opsel_e64 0, $vgpr17, 0, $vgpr16, 0, 1, 0, 0, implicit $exec
+  ; GFX90A-NEXT:   renamable $vgpr11 = V_ALIGNBIT_B32_e64 killed $sgpr47, killed $vgpr10, 1, implicit $exec
+  ; GFX90A-NEXT:   renamable $vgpr52 = V_ALIGNBIT_B32_e64 $vgpr17, $vgpr16, 1, implicit $exec
   ; GFX90A-NEXT:   renamable $vgpr17 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr12_sgpr13, implicit $exec
-  ; GFX90A-NEXT:   renamable $vgpr15 = V_ALIGNBIT_B32_opsel_e64 0, $vgpr15, 0, $vgpr14, 0, 1, 0, 0, implicit $exec
+  ; GFX90A-NEXT:   renamable $vgpr15 = V_ALIGNBIT_B32_e64 $vgpr15, $vgpr14, 1, implicit $exec
   ; GFX90A-NEXT:   renamable $sgpr52_sgpr53 = S_XOR_B64 $exec, -1, implicit-def dead $scc
   ; GFX90A-NEXT:   renamable $sgpr62_sgpr63 = S_OR_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc
   ; GFX90A-NEXT:   renamable $vgpr10 = COPY renamable $vgpr14, implicit $exec
diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx10_asm_vop3.s
index 3d6af6ba6dbf8..6bb0f4b1dff2d 100644
--- a/llvm/test/MC/AMDGPU/gfx10_asm_vop3.s
+++ b/llvm/test/MC/AMDGPU/gfx10_asm_vop3.s
@@ -3628,18 +3628,6 @@ v_alignbit_b32 v5, v1, v2, exec_lo
 v_alignbit_b32 v5, v1, v2, exec_hi
 // GFX10: encoding: [0x05,0x00,0x4e,0xd5,0x01,0x05,0xfe,0x01]
 
-v_alignbit_b32 v5, v1, v2, v3 op_sel:[1]
-// GFX10: v_alignbit_b32 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4e,0xd5,0x01,0x05,0x0e,0x04]
-
-v_alignbit_b32 v5, v1, v2, v3 op_sel:[1,1]
-// GFX10: v_alignbit_b32 v5, v1, v2, v3 op_sel:[1,1,0,0] ; encoding: [0x05,0x18,0x4e,0xd5,0x01,0x05,0x0e,0x04]
-
-v_alignbit_b32 v5, v1, v2, v3 op_sel:[1,1,1]
-// GFX10: v_alignbit_b32 v5, v1, v2, v3 op_sel:[1,1,1,0] ; encoding: [0x05,0x38,0x4e,0xd5,0x01,0x05,0x0e,0x04]
-
-v_alignbit_b32 v5, v1, v2, v3 op_sel:[1,1,1,1]
-// GFX10: v_alignbit_b32 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4e,0xd5,0x01,0x05,0x0e,0x04]
-
 v_alignbyte_b32 v5, v1, v2, v3
 // GFX10: encoding: [0x05,0x00,0x4f,0xd5,0x01,0x05,0x0e,0x04]
 
@@ -3727,18 +3715,6 @@ v_alignbyte_b32 v5, v1, v2, exec_lo
 v_alignbyte_b32 v5, v1, v2, exec_hi
 // GFX10: encoding: [0x05,0x00,0x4f,0xd5,0x01,0x05,0xfe,0x01]
 
-v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1]
-// GFX10: v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4f,0xd5,0x01,0x05,0x0e,0x04]
-
-v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,1]
-// GFX10: v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,1,0,0] ; encoding: [0x05,0x18,0x4f,0xd5,0x01,0x05,0x0e,0x04]
-
-v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,1,1]
-// GFX10: v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,1,1,0] ; encoding: [0x05,0x38,0x4f,0xd5,0x01,0x05,0x0e,0x04]
-
-v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,1,1,1]
-// GFX10: v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4f,0xd5,0x01,0x05,0x0e,0x04]
-
 v_mullit_f32 v5, v1, v2, v3
 // GFX10: encoding: [0x05,0x00,0x50,0xd5,0x01,0x05,0x0e,0x04]
 
diff --git a/llvm/test/MC/AMDGPU/gfx7_err_pos.s b/llvm/test/MC/AMDGPU/gfx7_err_pos.s
index 7b6b241e04707..9dcbd4a4074af 100644
--- a/llvm/test/MC/AMDGPU/gfx7_err_pos.s
+++ b/llvm/test/MC/AMDGPU/gfx7_err_pos.s
@@ -44,16 +44,3 @@ s_load_dword s5, s[2:3], glc
 // CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: cache policy is not supported for SMRD instructions
 // CHECK-NEXT:{{^}}s_load_dword s5, s[2:3], glc
 // CHECK-NEXT:{{^}}                         ^
-
-//==============================================================================
-// not a valid operand
-
-v_alignbit_b32 v5, v1, v2, v3 op_sel:[1,1,1,1]
-// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
-// CHECK-NEXT:{{^}}v_alignbit_b32 v5, v1, v2, v3 op_sel:[1,1,1,1]
-// CHECK-NEXT:{{^}}                              ^
-
-v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,1,1,1]
-// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
-// CHECK-NEXT:{{^}}v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,1,1,1]
-// CHECK-NEXT:{{^}}                               ^
diff --git a/llvm/test/MC/AMDGPU/gfx8_err_pos.s b/llvm/test/MC/AMDGPU/gfx8_err_pos.s
index a475c739e690d..1e8457d54049a 100644
--- a/llvm/test/MC/AMDGPU/gfx8_err_pos.s
+++ b/llvm/test/MC/AMDGPU/gfx8_err_pos.s
@@ -49,13 +49,3 @@ v_cndmask_b32_sdwa v5, v1, sext(v2), vcc dst_sel:DWORD dst_unused:UNUSED_PRESERV
 // CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
 // CHECK-NEXT:{{^}}v_cndmask_b32_sdwa v5, v1, sext(v2), vcc dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:BYTE_0 src1_sel:WORD_0
 // CHECK-NEXT:{{^}}                           ^
-
-v_alignbit_b32 v5, v1, v2, v3 op_sel:[1,1,1,1]
-// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
-// CHECK-NEXT:{{^}}v_alignbit_b32 v5, v1, v2, v3 op_sel:[1,1,1,1]
-// CHECK-NEXT:{{^}}                              ^
-
-v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,1,1,1]
-// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
-// CHECK-NEXT:{{^}}v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,1,1,1]
-// CHECK-NEXT:{{^}}                               ^
diff --git a/llvm/test/MC/AMDGPU/gfx9_asm_vop3_e64.s b/llvm/test/MC/AMDGPU/gfx9_asm_vop3_e64.s
index a1cd9ce8ef18e..f3f4cae22538a 100644
--- a/llvm/test/MC/AMDGPU/gfx9_asm_vop3_e64.s
+++ b/llvm/test/MC/AMDGPU/gfx9_asm_vop3_e64.s
@@ -2829,18 +2829,6 @@ v_alignbit_b32 v5, v1, v2, src_execz
 v_alignbit_b32 v5, v1, v2, src_scc
 // CHECK: [0x05,0x00,0xce,0xd1,0x01,0x05,0xf6,0x03]
 
-v_alignbit_b32 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0xce,0xd1,0x01,0x05,0x0e,0x04]
-// CHECK: [0x05,0x08,0xce,0xd1,0x01,0x05,0x0e,0x04]
-
-v_alignbit_b32 v5, v1, v2, v3 op_sel:[1,1,0,0] ; encoding: [0x05,0x18,0xce,0xd1,0x01,0x05,0x0e,0x04]
-// CHECK: [0x05,0x18,0xce,0xd1,0x01,0x05,0x0e,0x04]
-
-v_alignbit_b32 v5, v1, v2, v3 op_sel:[1,1,1,0] ; encoding: [0x05,0x38,0xce,0xd1,0x01,0x05,0x0e,0x04]
-// CHECK: [0x05,0x38,0xce,0xd1,0x01,0x05,0x0e,0x04]
-
-v_alignbit_b32 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0xce,0xd1,0x01,0x05,0x0e,0x04]
-// CHECK: [0x05,0x78,0xce,0xd1,0x01,0x05,0x0e,0x04]
-
 v_alignbyte_b32 v5, v1, v2, v3
 // CHECK: [0x05,0x00,0xcf,0xd1,0x01,0x05,0x0e,0x04]
 
@@ -3012,18 +3000,6 @@ v_alignbyte_b32 v5, v1, v2, src_execz
 v_alignbyte_b32 v5, v1, v2, src_scc
 // CHECK: [0x05,0x00,0xcf,0xd1,0x01,0x05,0xf6,0x03]
 
-v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1]
-// CHECK: v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0xcf,0xd1,0x01,0x05,0x0e,0x04]
-
-v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,1]
-// CHECK: v_alignbyte_b32 v5, v1, v2, v3 op_s...
[truncated]

Copy link
Contributor

@shiltian shiltian left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You don't need a PR for a revert. Just revert it locally and push it directly to the main branch. Be sure to include the reason of the revert.

@jwanggit86 jwanggit86 merged commit ba271cc into llvm:main Jul 16, 2025
13 checks passed
@arsenm
Copy link
Contributor

arsenm commented Jul 16, 2025

Reason not included?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

5 participants