Skip to content

[AMDGPU] Support nv memory instructions modifier on gfx1250 #149582

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5280,6 +5280,15 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,

unsigned CPol = Inst.getOperand(CPolPos).getImm();

if (!isGFX1250()) {
if (CPol & CPol::NV) {
SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
StringRef CStr(S.getPointer());
S = SMLoc::getFromPointer(&CStr.data()[CStr.find("nv")]);
Error(S, "nv is not supported on this GPU");
}
}

if (isGFX12Plus())
return validateTHAndScopeBits(Inst, Operands, CPol);

Expand Down Expand Up @@ -6916,6 +6925,7 @@ ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
int64_t CPolVal = 0;
ParseStatus ResTH = ParseStatus::NoMatch;
ParseStatus ResScope = ParseStatus::NoMatch;
ParseStatus ResNV = ParseStatus::NoMatch;

for (;;) {
if (ResTH.isNoMatch()) {
Expand All @@ -6940,10 +6950,24 @@ ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
}
}

// NV bit exists on GFX12+, but does something starting from GFX1250.
// Allow parsing on all GFX12 and fail on validation for better
// diagnostics.
if (ResNV.isNoMatch()) {
if (trySkipId("nv")) {
ResNV = ParseStatus::Success;
CPolVal |= CPol::NV;
continue;
} else if (trySkipId("no", "nv")) {
ResNV = ParseStatus::Success;
continue;
}
}

break;
}

if (ResTH.isNoMatch() && ResScope.isNoMatch())
if (ResTH.isNoMatch() && ResScope.isNoMatch() && ResNV.isNoMatch())
return ParseStatus::NoMatch;

Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/BUFInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -2451,6 +2451,7 @@ class VBUFFER_Real <bits<8> op, BUF_Pseudo ps, string real_name> :
let Inst{62} = ps.offen;
let Inst{63} = ps.idxen;

let Inst{7} = cpol{5}; // nv
let Inst{54-53} = cpol{2-1}; // th{2-1}
let Inst{52} = !if(ps.IsAtomicRet, 1, cpol{0}); // th{0}
let Inst{51-50} = cpol{4-3}; // scope
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/FLATInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ class VFLAT_Real <bits<8> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> :

bits<7> saddr;
bits<8> vdst;
bits<6> cpol;
bits<12> cpol;
bits<8> vdata; // vsrc
bits<8> vaddr;
bits<24> offset;
Expand All @@ -193,6 +193,7 @@ class VFLAT_Real <bits<8> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> :
let Inst{31-26} = 0x3b;
let Inst{39-32} = !if(ps.has_vdst, vdst, ?);
let Inst{49} = ps.sve;
let Inst{7} = cpol{5}; // nv
let Inst{54-53} = cpol{2-1}; // th{2-1}
let Inst{52} = !if(ps.IsAtomicRet, 1, cpol{0}); // th{0}
let Inst{51-50} = cpol{4-3}; // scope
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,9 @@ void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo,
printTH(MI, TH, Scope, O);
printScope(Scope, O);

if (Imm & CPol::NV)
O << " nv";

return;
}

Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/SIDefines.h
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,8 @@ enum CPol {
SCOPE_DEV = 2 << 3,
SCOPE_SYS = 3 << 3,

NV = 1 << 5, // Non-volatile bit

SWZ = 1 << 6, // Swizzle bit

ALL = TH | SCOPE,
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/SIInstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,7 @@ def CPolBit {
int SLC = 1;
int DLC = 2;
int SCC = 4;
int NV = 5;
}

class VOPDstOperand <RegisterClass rc> : RegisterOperand <rc, "printVOPDst">;
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/SMInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ class SM_Real <SM_Pseudo ps, string opName = ps.Mnemonic>
bits<7> sdst;
bits<32> offset;
bits<8> soffset;
bits<5> cpol;
bits<12> cpol;
}

class OffsetMode<bit hasOffset, bit hasSOffset, string variant,
Expand Down Expand Up @@ -1485,6 +1485,7 @@ class SMEM_Real_Load_gfx12<bits<6> op, string ps, string opName, OffsetMode offs
RegisterClass BaseClass = !cast<SM_Load_Pseudo>(ps # offsets.Variant).BaseClass;
let InOperandList = !con((ins BaseClass:$sbase), offsets.Ins, (ins CPol:$cpol));

let Inst{20} = cpol{CPolBit.NV}; // non-volatile
let Inst{22-21} = cpol{4-3}; // scope
let Inst{24-23} = cpol{1-0}; // th - only lower 2 bits are supported
}
Expand Down
14 changes: 14 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_smem.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s

s_load_b32 s4, s[2:3], 10 nv
// GFX1250: s_load_b32 s4, s[2:3], 0xa nv ; encoding: [0x01,0x01,0x10,0xf4,0x0a,0x00,0x00,0xf8]
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
// GFX12-ERR-NEXT:{{^}}s_load_b32 s4, s[2:3], 10 nv
// GFX12-ERR-NEXT:{{^}} ^

s_buffer_load_i8 s5, s[4:7], s0 nv
// GFX1250: s_buffer_load_i8 s5, s[4:7], s0 offset:0x0 nv ; encoding: [0x42,0x01,0x13,0xf4,0x00,0x00,0x00,0x00]
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
// GFX12-ERR-NEXT:{{^}}s_buffer_load_i8 s5, s[4:7], s0 nv
// GFX12-ERR-NEXT:{{^}} ^
20 changes: 20 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_vbuffer_mubuf.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s

buffer_load_b32 v5, v1, s[8:11], s3 offen offset:4095 nv
// GFX1250: buffer_load_b32 v5, v1, s[8:11], s3 offen offset:4095 nv ; encoding: [0x83,0x00,0x05,0xc4,0x05,0x10,0x80,0x40,0x01,0xff,0x0f,0x00]
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
// GFX12-ERR-NEXT:{{^}}buffer_load_b32 v5, v1, s[8:11], s3 offen offset:4095 nv
// GFX12-ERR-NEXT:{{^}} ^

buffer_store_b128 v[2:5], v0, s[12:15], s4 idxen offset:4095 nv
// GFX1250: buffer_store_b128 v[2:5], v0, s[12:15], s4 idxen offset:4095 nv ; encoding: [0x84,0x40,0x07,0xc4,0x02,0x18,0x80,0x80,0x00,0xff,0x0f,0x00]
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
// GFX12-ERR-NEXT:{{^}}buffer_store_b128 v[2:5], v0, s[12:15], s4 idxen offset:4095 nv
// GFX12-ERR-NEXT:{{^}} ^

buffer_atomic_and_b32 v5, v1, s[8:11], s3 offen offset:4095 nv
// GFX1250: buffer_atomic_and_b32 v5, v1, s[8:11], s3 offen offset:4095 nv ; encoding: [0x83,0x00,0x0f,0xc4,0x05,0x10,0x80,0x40,0x01,0xff,0x0f,0x00]
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
// GFX12-ERR-NEXT:{{^}}buffer_atomic_and_b32 v5, v1, s[8:11], s3 offen offset:4095 nv
// GFX12-ERR-NEXT:{{^}} ^
60 changes: 60 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s
Original file line number Diff line number Diff line change
@@ -1,6 +1,66 @@
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s

global_load_b32 v0, v[2:3], off nv
// GFX1250: global_load_b32 v0, v[2:3], off nv ; encoding: [0xfc,0x00,0x05,0xee,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
// GFX12-ERR-NEXT:{{^}}global_load_b32 v0, v[2:3], off nv
// GFX12-ERR-NEXT:{{^}} ^

global_store_b32 v[2:3], v0, off nv
// GFX1250: global_store_b32 v[2:3], v0, off nv ; encoding: [0xfc,0x80,0x06,0xee,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
// GFX12-ERR-NEXT:{{^}}global_store_b32 v[2:3], v0, off nv
// GFX12-ERR-NEXT:{{^}} ^

global_atomic_add v[2:3], v2, off nv
// GFX1250: global_atomic_add_u32 v[2:3], v2, off nv ; encoding: [0xfc,0x40,0x0d,0xee,0x00,0x00,0x00,0x01,0x02,0x00,0x00,0x00]
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
// GFX12-ERR-NEXT:{{^}}global_atomic_add v[2:3], v2, off nv
// GFX12-ERR-NEXT:{{^}} ^

global_load_addtid_b32 v5, s[2:3] nv
// GFX1250: global_load_addtid_b32 v5, s[2:3] nv ; encoding: [0x82,0x00,0x0a,0xee,0x05,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
// GFX12-ERR-NEXT:{{^}}global_load_addtid_b32 v5, s[2:3] nv
// GFX12-ERR-NEXT:{{^}} ^

scratch_load_b32 v0, v2, off nv
// GFX1250: scratch_load_b32 v0, v2, off nv ; encoding: [0xfc,0x00,0x05,0xed,0x00,0x00,0x02,0x00,0x02,0x00,0x00,0x00]
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
// GFX12-ERR-NEXT:{{^}}scratch_load_b32 v0, v2, off nv
// GFX12-ERR-NEXT:{{^}} ^

scratch_store_b32 v2, v0, off nv
// GFX1250: scratch_store_b32 v2, v0, off nv ; encoding: [0xfc,0x80,0x06,0xed,0x00,0x00,0x02,0x00,0x02,0x00,0x00,0x00]
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
// GFX12-ERR-NEXT:{{^}}scratch_store_b32 v2, v0, off nv
// GFX12-ERR-NEXT:{{^}} ^

flat_load_b32 v0, v[2:3] nv
// GFX1250: flat_load_b32 v0, v[2:3] nv ; encoding: [0xfc,0x00,0x05,0xec,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
// GFX12-ERR-NEXT:{{^}}flat_load_b32 v0, v[2:3] nv
// GFX12-ERR-NEXT:{{^}} ^

flat_store_b32 v[2:3], v0 nv
// GFX1250: flat_store_b32 v[2:3], v0 nv ; encoding: [0xfc,0x80,0x06,0xec,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
// GFX12-ERR-NEXT:{{^}}flat_store_b32 v[2:3], v0 nv
// GFX12-ERR-NEXT:{{^}} ^

flat_atomic_add v[2:3], v2 nv
// GFX1250: flat_atomic_add_u32 v[2:3], v2 nv ; encoding: [0xfc,0x40,0x0d,0xec,0x00,0x00,0x00,0x01,0x02,0x00,0x00,0x00]
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
// GFX12-ERR-NEXT:{{^}}flat_atomic_add v[2:3], v2 nv
// GFX12-ERR-NEXT:{{^}} ^

scratch_load_b32 v5, v2, off nv
// GFX1250: scratch_load_b32 v5, v2, off nv ; encoding: [0xfc,0x00,0x05,0xed,0x05,0x00,0x02,0x00,0x02,0x00,0x00,0x00]
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
// GFX12-ERR-NEXT:{{^}}scratch_load_b32 v5, v2, off nv
// GFX12-ERR-NEXT:{{^}} ^

tensor_save s[0:1]
// GFX1250: tensor_save s[0:1] ; encoding: [0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
Expand Down
7 changes: 7 additions & 0 deletions llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_smem.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s

# GFX1250: s_buffer_load_i8 s5, s[4:7], s0 offset:0x0 nv ; encoding: [0x42,0x01,0x13,0xf4,0x00,0x00,0x00,0x00]
0x42,0x01,0x13,0xf4,0x00,0x00,0x00,0x00

# GFX1250: s_load_b32 s4, s[2:3], 0xa nv ; encoding: [0x01,0x01,0x10,0xf4,0x0a,0x00,0x00,0xf8]
0x01,0x01,0x10,0xf4,0x0a,0x00,0x00,0xf8
10 changes: 10 additions & 0 deletions llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vbuffer_mubuf.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s

# GFX1250: buffer_atomic_and_b32 v5, v1, s[8:11], s3 offen offset:4095 nv ; encoding: [0x83,0x00,0x0f,0xc4,0x05,0x10,0x80,0x40,0x01,0xff,0x0f,0x00]
0x83,0x00,0x0f,0xc4,0x05,0x10,0x80,0x40,0x01,0xff,0x0f,0x00

# GFX1250: buffer_load_b32 v5, v1, s[8:11], s3 offen offset:4095 nv ; encoding: [0x83,0x00,0x05,0xc4,0x05,0x10,0x80,0x40,0x01,0xff,0x0f,0x00]
0x83,0x00,0x05,0xc4,0x05,0x10,0x80,0x40,0x01,0xff,0x0f,0x00

# GFX1250: buffer_store_b128 v[2:5], v0, s[12:15], s4 idxen offset:4095 nv ; encoding: [0x84,0x40,0x07,0xc4,0x02,0x18,0x80,0x80,0x00,0xff,0x0f,0x00]
0x84,0x40,0x07,0xc4,0x02,0x18,0x80,0x80,0x00,0xff,0x0f,0x00
30 changes: 30 additions & 0 deletions llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2826,6 +2826,36 @@
# GFX1250: scratch_store_d16_hi_b8 v1, v2, s3 ; encoding: [0x03,0x00,0x09,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00]
0x03,0x00,0x09,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00

# GFX1250: flat_atomic_add_u32 v[2:3], v2 nv ; encoding: [0xfc,0x40,0x0d,0xec,0x00,0x00,0x00,0x01,0x02,0x00,0x00,0x00]
0xfc,0x40,0x0d,0xec,0x00,0x00,0x00,0x01,0x02,0x00,0x00,0x00

# GFX1250: flat_load_b32 v0, v[2:3] nv ; encoding: [0xfc,0x00,0x05,0xec,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
0xfc,0x00,0x05,0xec,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00

# GFX1250: flat_store_b32 v[2:3], v0 nv ; encoding: [0xfc,0x80,0x06,0xec,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
0xfc,0x80,0x06,0xec,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00

# GFX1250: global_atomic_add_u32 v[2:3], v2, off nv ; encoding: [0xfc,0x40,0x0d,0xee,0x00,0x00,0x00,0x01,0x02,0x00,0x00,0x00]
0xfc,0x40,0x0d,0xee,0x00,0x00,0x00,0x01,0x02,0x00,0x00,0x00

# GFX1250: global_load_addtid_b32 v5, s[2:3] nv ; encoding: [0x82,0x00,0x0a,0xee,0x05,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
0x82,0x00,0x0a,0xee,0x05,0x00,0x00,0x00,0x00,0x00,0x00,0x00

# GFX1250: global_load_b32 v0, v[2:3], off nv ; encoding: [0xfc,0x00,0x05,0xee,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
0xfc,0x00,0x05,0xee,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00

# GFX1250: global_store_b32 v[2:3], v0, off nv ; encoding: [0xfc,0x80,0x06,0xee,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
0xfc,0x80,0x06,0xee,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00

# GFX1250: scratch_load_b32 v0, v2, off nv ; encoding: [0xfc,0x00,0x05,0xed,0x00,0x00,0x02,0x00,0x02,0x00,0x00,0x00]
0xfc,0x00,0x05,0xed,0x00,0x00,0x02,0x00,0x02,0x00,0x00,0x00

# GFX1250: scratch_store_b32 v2, v0, off nv ; encoding: [0xfc,0x80,0x06,0xed,0x00,0x00,0x02,0x00,0x02,0x00,0x00,0x00]
0xfc,0x80,0x06,0xed,0x00,0x00,0x02,0x00,0x02,0x00,0x00,0x00

# GFX1250: scratch_load_b32 v5, v2, off nv ; encoding: [0xfc,0x00,0x05,0xed,0x05,0x00,0x02,0x00,0x02,0x00,0x00,0x00]
0xfc,0x00,0x05,0xed,0x05,0x00,0x02,0x00,0x02,0x00,0x00,0x00

# GFX1250: tensor_save s[0:1] ; encoding: [0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00

Expand Down
Loading