Skip to content

Commit 987c7c4

Browse files
committed
[AMDGPU][MC] Allow op_sel in v_alignbit_b32 etc in GFX9 and GFX10
In GFX9 and GFX10, the op_sel modifier should be allowed in the instructions v_align_bit_b32 and v_alignbyte_b32.
1 parent ce4d214 commit 987c7c4

File tree

5 files changed

+93
-0
lines changed

5 files changed

+93
-0
lines changed

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,11 @@ defm V_ALIGNBIT_B32 : VOP3Inst_t16_with_profiles <"v_alignbit_b32",
218218
fshr, null_frag>;
219219

220220
defm V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_alignbyte>;
221+
222+
// In gfx9 and 10, opsel is allowed for V_ALIGNBIT_B32 and V_ALIGNBYTE_B32
223+
defm V_ALIGNBIT_B32_opsel : VOP3Inst <"v_alignbit_b32_opsel", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_OPSEL>>;
224+
defm V_ALIGNBYTE_B32_opsel : VOP3Inst <"v_alignbyte_b32_opsel", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_OPSEL>>;
225+
221226
let True16Predicate = UseRealTrue16Insts in
222227
defm V_ALIGNBYTE_B32_t16 : VOP3Inst <"v_alignbyte_b32_t16", VOP3_Profile_True16<VOP_I32_I32_I32_I16, VOP3_OPSEL>>;
223228
let True16Predicate = UseFakeTrue16Insts in
@@ -1940,6 +1945,9 @@ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
19401945
}
19411946
} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10"
19421947

1948+
defm V_ALIGNBIT_B32_opsel : VOP3OpSel_Real_gfx10_with_name<0x14e, "V_ALIGNBIT_B32_opsel", "v_alignbit_b32">;
1949+
defm V_ALIGNBYTE_B32_opsel : VOP3OpSel_Real_gfx10_with_name<0x14f, "V_ALIGNBYTE_B32_opsel", "v_alignbyte_b32">;
1950+
19431951
defm V_READLANE_B32 : VOP3_Real_No_Suffix_gfx10<0x360>;
19441952

19451953
let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) in {
@@ -2236,6 +2244,17 @@ multiclass VOP3_Real_BITOP3_gfx9<bits<10> op, string AsmName, bit isSingle = 0>
22362244
}
22372245
}
22382246

2247+
// Instructions such as v_alignbyte_b32 allows op_sel in gfx9, but not in vi.
2248+
// The following is created to support that.
2249+
multiclass VOP3OpSel_Real_gfx9_with_names<bits<10> op, string opName, string AsmName> {
2250+
defvar psName = opName#"_e64";
2251+
def _gfx9 : VOP3_Real<!cast<VOP3_Pseudo>(psName), SIEncodingFamily.VI>, // note: encoding family is VI
2252+
VOP3OpSel_gfx9 <op, !cast<VOP3_Pseudo>(psName).Pfl> {
2253+
VOP3_Pseudo ps = !cast<VOP3_Pseudo>(psName);
2254+
let AsmString = AsmName # ps.AsmOperands;
2255+
}
2256+
}
2257+
22392258
} // End AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9"
22402259

22412260
defm V_MAD_U64_U32 : VOP3be_Real_vi <0x1E8>;
@@ -2301,6 +2320,9 @@ defm V_INTERP_P2_LEGACY_F16 : VOP3Interp_F16_Real_gfx9 <0x276, "V_INTERP_P2_F16"
23012320
defm V_MAD_LEGACY_U16 : VOP3_F16_Real_gfx9 <0x1eb, "V_MAD_U16", "v_mad_legacy_u16">;
23022321
defm V_MAD_LEGACY_I16 : VOP3_F16_Real_gfx9 <0x1ec, "V_MAD_I16", "v_mad_legacy_i16">;
23032322

2323+
defm V_ALIGNBIT_B32_opsel : VOP3OpSel_Real_gfx9_with_names <0x1ce, "V_ALIGNBIT_B32_opsel", "v_alignbit_b32">;
2324+
defm V_ALIGNBYTE_B32_opsel : VOP3OpSel_Real_gfx9_with_names <0x1cf, "V_ALIGNBYTE_B32_opsel", "v_alignbyte_b32">;
2325+
23042326
defm V_MAD_F16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x203, "v_mad_f16">;
23052327
defm V_MAD_U16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x204, "v_mad_u16">;
23062328
defm V_MAD_I16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x205, "v_mad_i16">;

llvm/test/MC/AMDGPU/gfx10_asm_vop3.s

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3628,6 +3628,18 @@ v_alignbit_b32 v5, v1, v2, exec_lo
36283628
v_alignbit_b32 v5, v1, v2, exec_hi
36293629
// GFX10: encoding: [0x05,0x00,0x4e,0xd5,0x01,0x05,0xfe,0x01]
36303630

3631+
v_alignbit_b32 v5, v1, v2, v3 op_sel:[1]
3632+
// GFX10: v_alignbit_b32 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4e,0xd5,0x01,0x05,0x0e,0x04]
3633+
3634+
v_alignbit_b32 v5, v1, v2, v3 op_sel:[1,1]
3635+
// GFX10: v_alignbit_b32 v5, v1, v2, v3 op_sel:[1,1,0,0] ; encoding: [0x05,0x18,0x4e,0xd5,0x01,0x05,0x0e,0x04]
3636+
3637+
v_alignbit_b32 v5, v1, v2, v3 op_sel:[1,1,1]
3638+
// GFX10: v_alignbit_b32 v5, v1, v2, v3 op_sel:[1,1,1,0] ; encoding: [0x05,0x38,0x4e,0xd5,0x01,0x05,0x0e,0x04]
3639+
3640+
v_alignbit_b32 v5, v1, v2, v3 op_sel:[1,1,1,1]
3641+
// GFX10: v_alignbit_b32 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4e,0xd5,0x01,0x05,0x0e,0x04]
3642+
36313643
v_alignbyte_b32 v5, v1, v2, v3
36323644
// GFX10: encoding: [0x05,0x00,0x4f,0xd5,0x01,0x05,0x0e,0x04]
36333645

@@ -3715,6 +3727,18 @@ v_alignbyte_b32 v5, v1, v2, exec_lo
37153727
v_alignbyte_b32 v5, v1, v2, exec_hi
37163728
// GFX10: encoding: [0x05,0x00,0x4f,0xd5,0x01,0x05,0xfe,0x01]
37173729

3730+
v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1]
3731+
// GFX10: v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4f,0xd5,0x01,0x05,0x0e,0x04]
3732+
3733+
v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,1]
3734+
// GFX10: v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,1,0,0] ; encoding: [0x05,0x18,0x4f,0xd5,0x01,0x05,0x0e,0x04]
3735+
3736+
v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,1,1]
3737+
// GFX10: v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,1,1,0] ; encoding: [0x05,0x38,0x4f,0xd5,0x01,0x05,0x0e,0x04]
3738+
3739+
v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,1,1,1]
3740+
// GFX10: v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4f,0xd5,0x01,0x05,0x0e,0x04]
3741+
37183742
v_mullit_f32 v5, v1, v2, v3
37193743
// GFX10: encoding: [0x05,0x00,0x50,0xd5,0x01,0x05,0x0e,0x04]
37203744

llvm/test/MC/AMDGPU/gfx7_err_pos.s

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,3 +44,16 @@ s_load_dword s5, s[2:3], glc
4444
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: cache policy is not supported for SMRD instructions
4545
// CHECK-NEXT:{{^}}s_load_dword s5, s[2:3], glc
4646
// CHECK-NEXT:{{^}} ^
47+
48+
//==============================================================================
49+
// not a valid operand
50+
51+
v_alignbit_b32 v5, v1, v2, v3 op_sel:[1,1,1,1]
52+
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
53+
// CHECK-NEXT:{{^}}v_alignbit_b32 v5, v1, v2, v3 op_sel:[1,1,1,1]
54+
// CHECK-NEXT:{{^}} ^
55+
56+
v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,1,1,1]
57+
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
58+
// CHECK-NEXT:{{^}}v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,1,1,1]
59+
// CHECK-NEXT:{{^}} ^

llvm/test/MC/AMDGPU/gfx8_err_pos.s

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,3 +49,13 @@ v_cndmask_b32_sdwa v5, v1, sext(v2), vcc dst_sel:DWORD dst_unused:UNUSED_PRESERV
4949
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
5050
// CHECK-NEXT:{{^}}v_cndmask_b32_sdwa v5, v1, sext(v2), vcc dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:BYTE_0 src1_sel:WORD_0
5151
// CHECK-NEXT:{{^}} ^
52+
53+
v_alignbit_b32 v5, v1, v2, v3 op_sel:[1,1,1,1]
54+
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
55+
// CHECK-NEXT:{{^}}v_alignbit_b32 v5, v1, v2, v3 op_sel:[1,1,1,1]
56+
// CHECK-NEXT:{{^}} ^
57+
58+
v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,1,1,1]
59+
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
60+
// CHECK-NEXT:{{^}}v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,1,1,1]
61+
// CHECK-NEXT:{{^}} ^

llvm/test/MC/AMDGPU/gfx9_asm_vop3_e64.s

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2829,6 +2829,18 @@ v_alignbit_b32 v5, v1, v2, src_execz
28292829
v_alignbit_b32 v5, v1, v2, src_scc
28302830
// CHECK: [0x05,0x00,0xce,0xd1,0x01,0x05,0xf6,0x03]
28312831

2832+
v_alignbit_b32 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0xce,0xd1,0x01,0x05,0x0e,0x04]
2833+
// CHECK: [0x05,0x08,0xce,0xd1,0x01,0x05,0x0e,0x04]
2834+
2835+
v_alignbit_b32 v5, v1, v2, v3 op_sel:[1,1,0,0] ; encoding: [0x05,0x18,0xce,0xd1,0x01,0x05,0x0e,0x04]
2836+
// CHECK: [0x05,0x18,0xce,0xd1,0x01,0x05,0x0e,0x04]
2837+
2838+
v_alignbit_b32 v5, v1, v2, v3 op_sel:[1,1,1,0] ; encoding: [0x05,0x38,0xce,0xd1,0x01,0x05,0x0e,0x04]
2839+
// CHECK: [0x05,0x38,0xce,0xd1,0x01,0x05,0x0e,0x04]
2840+
2841+
v_alignbit_b32 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0xce,0xd1,0x01,0x05,0x0e,0x04]
2842+
// CHECK: [0x05,0x78,0xce,0xd1,0x01,0x05,0x0e,0x04]
2843+
28322844
v_alignbyte_b32 v5, v1, v2, v3
28332845
// CHECK: [0x05,0x00,0xcf,0xd1,0x01,0x05,0x0e,0x04]
28342846

@@ -3000,6 +3012,18 @@ v_alignbyte_b32 v5, v1, v2, src_execz
30003012
v_alignbyte_b32 v5, v1, v2, src_scc
30013013
// CHECK: [0x05,0x00,0xcf,0xd1,0x01,0x05,0xf6,0x03]
30023014

3015+
v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1]
3016+
// CHECK: v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0xcf,0xd1,0x01,0x05,0x0e,0x04]
3017+
3018+
v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,1]
3019+
// CHECK: v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,1,0,0] ; encoding: [0x05,0x18,0xcf,0xd1,0x01,0x05,0x0e,0x04]
3020+
3021+
v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,1,1]
3022+
// CHECK: v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,1,1,0] ; encoding: [0x05,0x38,0xcf,0xd1,0x01,0x05,0x0e,0x04]
3023+
3024+
v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,1,1,1]
3025+
// CHECK: v_alignbyte_b32 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0xcf,0xd1,0x01,0x05,0x0e,0x04]
3026+
30033027
v_min3_f32 v5, v1, v2, v3
30043028
// CHECK: [0x05,0x00,0xd0,0xd1,0x01,0x05,0x0e,0x04]
30053029

0 commit comments

Comments
 (0)