Skip to content

[AMDGPU] Support nv memory instructions modifier on gfx1250 #149582

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Conversation

rampitec
Copy link
Collaborator

No description provided.

Copy link
Collaborator Author

This stack of pull requests is managed by Graphite. Learn more about stacking.

@rampitec rampitec requested a review from changpeng July 18, 2025 20:12
@rampitec rampitec marked this pull request as ready for review July 18, 2025 20:12
@llvmbot llvmbot added backend:AMDGPU llvm:mc Machine (object) code labels Jul 18, 2025
@llvmbot
Copy link
Member

llvmbot commented Jul 18, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Stanislav Mekhanoshin (rampitec)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/149582.diff

13 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (+25-1)
  • (modified) llvm/lib/Target/AMDGPU/BUFInstructions.td (+1)
  • (modified) llvm/lib/Target/AMDGPU/FLATInstructions.td (+2-1)
  • (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp (+3)
  • (modified) llvm/lib/Target/AMDGPU/SIDefines.h (+2)
  • (modified) llvm/lib/Target/AMDGPU/SIInstrFormats.td (+1)
  • (modified) llvm/lib/Target/AMDGPU/SMInstructions.td (+2-1)
  • (added) llvm/test/MC/AMDGPU/gfx1250_asm_smem.s (+14)
  • (added) llvm/test/MC/AMDGPU/gfx1250_asm_vbuffer_mubuf.s (+20)
  • (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s (+60)
  • (added) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_smem.txt (+7)
  • (added) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vbuffer_mubuf.txt (+10)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt (+30)
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 43d4e8db791b0..de17fccdda902 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -5280,6 +5280,15 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
 
   unsigned CPol = Inst.getOperand(CPolPos).getImm();
 
+  if (!isGFX1250()) {
+    if (CPol & CPol::NV) {
+      SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
+      StringRef CStr(S.getPointer());
+      S = SMLoc::getFromPointer(&CStr.data()[CStr.find("nv")]);
+      Error(S, "nv is not supported on this GPU");
+    }
+  }
+
   if (isGFX12Plus())
     return validateTHAndScopeBits(Inst, Operands, CPol);
 
@@ -6916,6 +6925,7 @@ ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
     int64_t CPolVal = 0;
     ParseStatus ResTH = ParseStatus::NoMatch;
     ParseStatus ResScope = ParseStatus::NoMatch;
+    ParseStatus ResNV = ParseStatus::NoMatch;
 
     for (;;) {
       if (ResTH.isNoMatch()) {
@@ -6940,10 +6950,24 @@ ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
         }
       }
 
+      // NV bit exists on GFX12+, but does something starting from GFX1250.
+      // Allow parsing on all GFX12 and fail on validation for better
+      // diagnostics.
+      if (ResNV.isNoMatch()) {
+        if (trySkipId("nv")) {
+          ResNV = ParseStatus::Success;
+          CPolVal |= CPol::NV;
+          continue;
+        } else if (trySkipId("no", "nv")) {
+          ResNV = ParseStatus::Success;
+          continue;
+        }
+      }
+
       break;
     }
 
-    if (ResTH.isNoMatch() && ResScope.isNoMatch())
+    if (ResTH.isNoMatch() && ResScope.isNoMatch() && ResNV.isNoMatch())
       return ParseStatus::NoMatch;
 
     Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index 0caabe41e9b79..e994aeeb82251 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -2451,6 +2451,7 @@ class VBUFFER_Real <bits<8> op, BUF_Pseudo ps, string real_name> :
   let Inst{62}    = ps.offen;
   let Inst{63}    = ps.idxen;
 
+  let Inst{7}     = cpol{5};   // nv
   let Inst{54-53} = cpol{2-1}; // th{2-1}
   let Inst{52}    = !if(ps.IsAtomicRet, 1, cpol{0}); // th{0}
   let Inst{51-50} = cpol{4-3}; // scope
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 1432b5940f3f0..f7f29f17f9d0e 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -183,7 +183,7 @@ class VFLAT_Real <bits<8> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> :
 
   bits<7> saddr;
   bits<8> vdst;
-  bits<6> cpol;
+  bits<12> cpol;
   bits<8> vdata; // vsrc
   bits<8> vaddr;
   bits<24> offset;
@@ -193,6 +193,7 @@ class VFLAT_Real <bits<8> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> :
   let Inst{31-26} = 0x3b;
   let Inst{39-32} = !if(ps.has_vdst, vdst, ?);
   let Inst{49} = ps.sve;
+  let Inst{7} = cpol{5}; // nv
   let Inst{54-53} = cpol{2-1}; // th{2-1}
   let Inst{52} = !if(ps.IsAtomicRet, 1, cpol{0}); // th{0}
   let Inst{51-50} = cpol{4-3}; // scope
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index ec9248b972ec4..44d2f947ec9c2 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -160,6 +160,9 @@ void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo,
     printTH(MI, TH, Scope, O);
     printScope(Scope, O);
 
+    if (Imm & CPol::NV)
+      O << " nv";
+
     return;
   }
 
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index a8649970aa825..edc74605ab241 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -398,6 +398,8 @@ enum CPol {
   SCOPE_DEV = 2 << 3,
   SCOPE_SYS = 3 << 3,
 
+  NV = 1 << 5, // Non-volatile bit
+
   SWZ = 1 << 6, // Swizzle bit
 
   ALL = TH | SCOPE,
diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td
index a368bc5d0b1a1..6b419347c01d9 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td
@@ -317,6 +317,7 @@ def CPolBit {
   int SLC = 1;
   int DLC = 2;
   int SCC = 4;
+  int NV = 5;
 }
 
 class VOPDstOperand <RegisterClass rc> : RegisterOperand <rc, "printVOPDst">;
diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td
index 37dcc10086257..d8b52d271a964 100644
--- a/llvm/lib/Target/AMDGPU/SMInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -87,7 +87,7 @@ class SM_Real <SM_Pseudo ps, string opName = ps.Mnemonic>
   bits<7>  sdst;
   bits<32> offset;
   bits<8>  soffset;
-  bits<5>  cpol;
+  bits<12> cpol;
 }
 
 class OffsetMode<bit hasOffset, bit hasSOffset, string variant,
@@ -1485,6 +1485,7 @@ class SMEM_Real_Load_gfx12<bits<6> op, string ps, string opName, OffsetMode offs
   RegisterClass BaseClass = !cast<SM_Load_Pseudo>(ps # offsets.Variant).BaseClass;
   let InOperandList = !con((ins BaseClass:$sbase), offsets.Ins, (ins CPol:$cpol));
 
+  let Inst{20} = cpol{CPolBit.NV}; // non-volatile
   let Inst{22-21} = cpol{4-3}; // scope
   let Inst{24-23} = cpol{1-0}; // th - only lower 2 bits are supported
 }
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_smem.s b/llvm/test/MC/AMDGPU/gfx1250_asm_smem.s
new file mode 100644
index 0000000000000..899c4c7aca0ba
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_smem.s
@@ -0,0 +1,14 @@
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s
+
+s_load_b32 s4, s[2:3], 10 nv
+// GFX1250: s_load_b32 s4, s[2:3], 0xa nv           ; encoding: [0x01,0x01,0x10,0xf4,0x0a,0x00,0x00,0xf8]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}s_load_b32 s4, s[2:3], 10 nv
+// GFX12-ERR-NEXT:{{^}}                          ^
+
+s_buffer_load_i8 s5, s[4:7], s0 nv
+// GFX1250: s_buffer_load_i8 s5, s[4:7], s0 offset:0x0 nv ; encoding: [0x42,0x01,0x13,0xf4,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}s_buffer_load_i8 s5, s[4:7], s0 nv
+// GFX12-ERR-NEXT:{{^}}                                ^
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vbuffer_mubuf.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vbuffer_mubuf.s
new file mode 100644
index 0000000000000..1d14bd91a7569
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vbuffer_mubuf.s
@@ -0,0 +1,20 @@
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s
+
+buffer_load_b32 v5, v1, s[8:11], s3 offen offset:4095 nv
+// GFX1250: buffer_load_b32 v5, v1, s[8:11], s3 offen offset:4095 nv ; encoding: [0x83,0x00,0x05,0xc4,0x05,0x10,0x80,0x40,0x01,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}buffer_load_b32 v5, v1, s[8:11], s3 offen offset:4095 nv
+// GFX12-ERR-NEXT:{{^}}                                                      ^
+
+buffer_store_b128 v[2:5], v0, s[12:15], s4 idxen offset:4095 nv
+// GFX1250: buffer_store_b128 v[2:5], v0, s[12:15], s4 idxen offset:4095 nv ; encoding: [0x84,0x40,0x07,0xc4,0x02,0x18,0x80,0x80,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}buffer_store_b128 v[2:5], v0, s[12:15], s4 idxen offset:4095 nv
+// GFX12-ERR-NEXT:{{^}}                                                             ^
+
+buffer_atomic_and_b32 v5, v1, s[8:11], s3 offen offset:4095 nv
+// GFX1250: buffer_atomic_and_b32 v5, v1, s[8:11], s3 offen offset:4095 nv ; encoding: [0x83,0x00,0x0f,0xc4,0x05,0x10,0x80,0x40,0x01,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}buffer_atomic_and_b32 v5, v1, s[8:11], s3 offen offset:4095 nv
+// GFX12-ERR-NEXT:{{^}}                                                            ^
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s
index 737d7b3de4e92..488040e1b5390 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s
@@ -1,6 +1,66 @@
 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s
 
+global_load_b32 v0, v[2:3], off nv
+// GFX1250: global_load_b32 v0, v[2:3], off nv      ; encoding: [0xfc,0x00,0x05,0xee,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}global_load_b32 v0, v[2:3], off nv
+// GFX12-ERR-NEXT:{{^}}                                ^
+
+global_store_b32 v[2:3], v0, off nv
+// GFX1250: global_store_b32 v[2:3], v0, off nv     ; encoding: [0xfc,0x80,0x06,0xee,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}global_store_b32 v[2:3], v0, off nv
+// GFX12-ERR-NEXT:{{^}}                                 ^
+
+global_atomic_add v[2:3], v2, off nv
+// GFX1250: global_atomic_add_u32 v[2:3], v2, off nv ; encoding: [0xfc,0x40,0x0d,0xee,0x00,0x00,0x00,0x01,0x02,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}global_atomic_add v[2:3], v2, off nv
+// GFX12-ERR-NEXT:{{^}}                                  ^
+
+global_load_addtid_b32 v5, s[2:3] nv
+// GFX1250: global_load_addtid_b32 v5, s[2:3] nv    ; encoding: [0x82,0x00,0x0a,0xee,0x05,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}global_load_addtid_b32 v5, s[2:3] nv
+// GFX12-ERR-NEXT:{{^}}                                  ^
+
+scratch_load_b32 v0, v2, off nv
+// GFX1250: scratch_load_b32 v0, v2, off nv         ; encoding: [0xfc,0x00,0x05,0xed,0x00,0x00,0x02,0x00,0x02,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}scratch_load_b32 v0, v2, off nv
+// GFX12-ERR-NEXT:{{^}}                             ^
+
+scratch_store_b32 v2, v0, off nv
+// GFX1250: scratch_store_b32 v2, v0, off nv        ; encoding: [0xfc,0x80,0x06,0xed,0x00,0x00,0x02,0x00,0x02,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}scratch_store_b32 v2, v0, off nv
+// GFX12-ERR-NEXT:{{^}}                              ^
+
+flat_load_b32 v0, v[2:3] nv
+// GFX1250: flat_load_b32 v0, v[2:3] nv             ; encoding: [0xfc,0x00,0x05,0xec,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}flat_load_b32 v0, v[2:3] nv
+// GFX12-ERR-NEXT:{{^}}                         ^
+
+flat_store_b32 v[2:3], v0 nv
+// GFX1250: flat_store_b32 v[2:3], v0 nv            ; encoding: [0xfc,0x80,0x06,0xec,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}flat_store_b32 v[2:3], v0 nv
+// GFX12-ERR-NEXT:{{^}}                          ^
+
+flat_atomic_add v[2:3], v2 nv
+// GFX1250: flat_atomic_add_u32 v[2:3], v2 nv       ; encoding: [0xfc,0x40,0x0d,0xec,0x00,0x00,0x00,0x01,0x02,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}flat_atomic_add v[2:3], v2 nv
+// GFX12-ERR-NEXT:{{^}}                           ^
+
+scratch_load_b32 v5, v2, off nv
+// GFX1250: scratch_load_b32 v5, v2, off nv         ; encoding: [0xfc,0x00,0x05,0xed,0x05,0x00,0x02,0x00,0x02,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}scratch_load_b32 v5, v2, off nv
+// GFX12-ERR-NEXT:{{^}}                             ^
+
 tensor_save s[0:1]
 // GFX1250: tensor_save s[0:1] ; encoding: [0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
 // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_smem.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_smem.txt
new file mode 100644
index 0000000000000..4bd9ab45af6bd
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_smem.txt
@@ -0,0 +1,7 @@
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s
+
+# GFX1250: s_buffer_load_i8 s5, s[4:7], s0 offset:0x0 nv ; encoding: [0x42,0x01,0x13,0xf4,0x00,0x00,0x00,0x00]
+0x42,0x01,0x13,0xf4,0x00,0x00,0x00,0x00
+
+# GFX1250: s_load_b32 s4, s[2:3], 0xa nv           ; encoding: [0x01,0x01,0x10,0xf4,0x0a,0x00,0x00,0xf8]
+0x01,0x01,0x10,0xf4,0x0a,0x00,0x00,0xf8
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vbuffer_mubuf.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vbuffer_mubuf.txt
new file mode 100644
index 0000000000000..a2f12115bb64b
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vbuffer_mubuf.txt
@@ -0,0 +1,10 @@
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s
+
+# GFX1250: buffer_atomic_and_b32 v5, v1, s[8:11], s3 offen offset:4095 nv ; encoding: [0x83,0x00,0x0f,0xc4,0x05,0x10,0x80,0x40,0x01,0xff,0x0f,0x00]
+0x83,0x00,0x0f,0xc4,0x05,0x10,0x80,0x40,0x01,0xff,0x0f,0x00
+
+# GFX1250: buffer_load_b32 v5, v1, s[8:11], s3 offen offset:4095 nv ; encoding: [0x83,0x00,0x05,0xc4,0x05,0x10,0x80,0x40,0x01,0xff,0x0f,0x00]
+0x83,0x00,0x05,0xc4,0x05,0x10,0x80,0x40,0x01,0xff,0x0f,0x00
+
+# GFX1250: buffer_store_b128 v[2:5], v0, s[12:15], s4 idxen offset:4095 nv ; encoding: [0x84,0x40,0x07,0xc4,0x02,0x18,0x80,0x80,0x00,0xff,0x0f,0x00]
+0x84,0x40,0x07,0xc4,0x02,0x18,0x80,0x80,0x00,0xff,0x0f,0x00
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt
index 55bc3e7a5746c..fcbb58bf7865e 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt
@@ -2826,6 +2826,36 @@
 # GFX1250: scratch_store_d16_hi_b8 v1, v2, s3      ; encoding: [0x03,0x00,0x09,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00]
 0x03,0x00,0x09,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00
 
+# GFX1250: flat_atomic_add_u32 v[2:3], v2 nv       ; encoding: [0xfc,0x40,0x0d,0xec,0x00,0x00,0x00,0x01,0x02,0x00,0x00,0x00]
+0xfc,0x40,0x0d,0xec,0x00,0x00,0x00,0x01,0x02,0x00,0x00,0x00
+
+# GFX1250: flat_load_b32 v0, v[2:3] nv             ; encoding: [0xfc,0x00,0x05,0xec,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
+0xfc,0x00,0x05,0xec,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00
+
+# GFX1250: flat_store_b32 v[2:3], v0 nv            ; encoding: [0xfc,0x80,0x06,0xec,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
+0xfc,0x80,0x06,0xec,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00
+
+# GFX1250: global_atomic_add_u32 v[2:3], v2, off nv ; encoding: [0xfc,0x40,0x0d,0xee,0x00,0x00,0x00,0x01,0x02,0x00,0x00,0x00]
+0xfc,0x40,0x0d,0xee,0x00,0x00,0x00,0x01,0x02,0x00,0x00,0x00
+
+# GFX1250: global_load_addtid_b32 v5, s[2:3] nv    ; encoding: [0x82,0x00,0x0a,0xee,0x05,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+0x82,0x00,0x0a,0xee,0x05,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+
+# GFX1250: global_load_b32 v0, v[2:3], off nv      ; encoding: [0xfc,0x00,0x05,0xee,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
+0xfc,0x00,0x05,0xee,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00
+
+# GFX1250: global_store_b32 v[2:3], v0, off nv     ; encoding: [0xfc,0x80,0x06,0xee,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
+0xfc,0x80,0x06,0xee,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00
+
+# GFX1250: scratch_load_b32 v0, v2, off nv         ; encoding: [0xfc,0x00,0x05,0xed,0x00,0x00,0x02,0x00,0x02,0x00,0x00,0x00]
+0xfc,0x00,0x05,0xed,0x00,0x00,0x02,0x00,0x02,0x00,0x00,0x00
+
+# GFX1250: scratch_store_b32 v2, v0, off nv        ; encoding: [0xfc,0x80,0x06,0xed,0x00,0x00,0x02,0x00,0x02,0x00,0x00,0x00]
+0xfc,0x80,0x06,0xed,0x00,0x00,0x02,0x00,0x02,0x00,0x00,0x00
+
+# GFX1250: scratch_load_b32 v5, v2, off nv         ; encoding: [0xfc,0x00,0x05,0xed,0x05,0x00,0x02,0x00,0x02,0x00,0x00,0x00]
+0xfc,0x00,0x05,0xed,0x05,0x00,0x02,0x00,0x02,0x00,0x00,0x00
+
 # GFX1250: tensor_save s[0:1] ; encoding: [0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
 0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
 

@llvmbot
Copy link
Member

llvmbot commented Jul 18, 2025

@llvm/pr-subscribers-mc

Author: Stanislav Mekhanoshin (rampitec)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/149582.diff

13 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (+25-1)
  • (modified) llvm/lib/Target/AMDGPU/BUFInstructions.td (+1)
  • (modified) llvm/lib/Target/AMDGPU/FLATInstructions.td (+2-1)
  • (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp (+3)
  • (modified) llvm/lib/Target/AMDGPU/SIDefines.h (+2)
  • (modified) llvm/lib/Target/AMDGPU/SIInstrFormats.td (+1)
  • (modified) llvm/lib/Target/AMDGPU/SMInstructions.td (+2-1)
  • (added) llvm/test/MC/AMDGPU/gfx1250_asm_smem.s (+14)
  • (added) llvm/test/MC/AMDGPU/gfx1250_asm_vbuffer_mubuf.s (+20)
  • (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s (+60)
  • (added) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_smem.txt (+7)
  • (added) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vbuffer_mubuf.txt (+10)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt (+30)
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 43d4e8db791b0..de17fccdda902 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -5280,6 +5280,15 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
 
   unsigned CPol = Inst.getOperand(CPolPos).getImm();
 
+  if (!isGFX1250()) {
+    if (CPol & CPol::NV) {
+      SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
+      StringRef CStr(S.getPointer());
+      S = SMLoc::getFromPointer(&CStr.data()[CStr.find("nv")]);
+      Error(S, "nv is not supported on this GPU");
+    }
+  }
+
   if (isGFX12Plus())
     return validateTHAndScopeBits(Inst, Operands, CPol);
 
@@ -6916,6 +6925,7 @@ ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
     int64_t CPolVal = 0;
     ParseStatus ResTH = ParseStatus::NoMatch;
     ParseStatus ResScope = ParseStatus::NoMatch;
+    ParseStatus ResNV = ParseStatus::NoMatch;
 
     for (;;) {
       if (ResTH.isNoMatch()) {
@@ -6940,10 +6950,24 @@ ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
         }
       }
 
+      // NV bit exists on GFX12+, but does something starting from GFX1250.
+      // Allow parsing on all GFX12 and fail on validation for better
+      // diagnostics.
+      if (ResNV.isNoMatch()) {
+        if (trySkipId("nv")) {
+          ResNV = ParseStatus::Success;
+          CPolVal |= CPol::NV;
+          continue;
+        } else if (trySkipId("no", "nv")) {
+          ResNV = ParseStatus::Success;
+          continue;
+        }
+      }
+
       break;
     }
 
-    if (ResTH.isNoMatch() && ResScope.isNoMatch())
+    if (ResTH.isNoMatch() && ResScope.isNoMatch() && ResNV.isNoMatch())
       return ParseStatus::NoMatch;
 
     Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index 0caabe41e9b79..e994aeeb82251 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -2451,6 +2451,7 @@ class VBUFFER_Real <bits<8> op, BUF_Pseudo ps, string real_name> :
   let Inst{62}    = ps.offen;
   let Inst{63}    = ps.idxen;
 
+  let Inst{7}     = cpol{5};   // nv
   let Inst{54-53} = cpol{2-1}; // th{2-1}
   let Inst{52}    = !if(ps.IsAtomicRet, 1, cpol{0}); // th{0}
   let Inst{51-50} = cpol{4-3}; // scope
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 1432b5940f3f0..f7f29f17f9d0e 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -183,7 +183,7 @@ class VFLAT_Real <bits<8> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> :
 
   bits<7> saddr;
   bits<8> vdst;
-  bits<6> cpol;
+  bits<12> cpol;
   bits<8> vdata; // vsrc
   bits<8> vaddr;
   bits<24> offset;
@@ -193,6 +193,7 @@ class VFLAT_Real <bits<8> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> :
   let Inst{31-26} = 0x3b;
   let Inst{39-32} = !if(ps.has_vdst, vdst, ?);
   let Inst{49} = ps.sve;
+  let Inst{7} = cpol{5}; // nv
   let Inst{54-53} = cpol{2-1}; // th{2-1}
   let Inst{52} = !if(ps.IsAtomicRet, 1, cpol{0}); // th{0}
   let Inst{51-50} = cpol{4-3}; // scope
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index ec9248b972ec4..44d2f947ec9c2 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -160,6 +160,9 @@ void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo,
     printTH(MI, TH, Scope, O);
     printScope(Scope, O);
 
+    if (Imm & CPol::NV)
+      O << " nv";
+
     return;
   }
 
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index a8649970aa825..edc74605ab241 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -398,6 +398,8 @@ enum CPol {
   SCOPE_DEV = 2 << 3,
   SCOPE_SYS = 3 << 3,
 
+  NV = 1 << 5, // Non-volatile bit
+
   SWZ = 1 << 6, // Swizzle bit
 
   ALL = TH | SCOPE,
diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td
index a368bc5d0b1a1..6b419347c01d9 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td
@@ -317,6 +317,7 @@ def CPolBit {
   int SLC = 1;
   int DLC = 2;
   int SCC = 4;
+  int NV = 5;
 }
 
 class VOPDstOperand <RegisterClass rc> : RegisterOperand <rc, "printVOPDst">;
diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td
index 37dcc10086257..d8b52d271a964 100644
--- a/llvm/lib/Target/AMDGPU/SMInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -87,7 +87,7 @@ class SM_Real <SM_Pseudo ps, string opName = ps.Mnemonic>
   bits<7>  sdst;
   bits<32> offset;
   bits<8>  soffset;
-  bits<5>  cpol;
+  bits<12> cpol;
 }
 
 class OffsetMode<bit hasOffset, bit hasSOffset, string variant,
@@ -1485,6 +1485,7 @@ class SMEM_Real_Load_gfx12<bits<6> op, string ps, string opName, OffsetMode offs
   RegisterClass BaseClass = !cast<SM_Load_Pseudo>(ps # offsets.Variant).BaseClass;
   let InOperandList = !con((ins BaseClass:$sbase), offsets.Ins, (ins CPol:$cpol));
 
+  let Inst{20} = cpol{CPolBit.NV}; // non-volatile
   let Inst{22-21} = cpol{4-3}; // scope
   let Inst{24-23} = cpol{1-0}; // th - only lower 2 bits are supported
 }
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_smem.s b/llvm/test/MC/AMDGPU/gfx1250_asm_smem.s
new file mode 100644
index 0000000000000..899c4c7aca0ba
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_smem.s
@@ -0,0 +1,14 @@
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s
+
+s_load_b32 s4, s[2:3], 10 nv
+// GFX1250: s_load_b32 s4, s[2:3], 0xa nv           ; encoding: [0x01,0x01,0x10,0xf4,0x0a,0x00,0x00,0xf8]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}s_load_b32 s4, s[2:3], 10 nv
+// GFX12-ERR-NEXT:{{^}}                          ^
+
+s_buffer_load_i8 s5, s[4:7], s0 nv
+// GFX1250: s_buffer_load_i8 s5, s[4:7], s0 offset:0x0 nv ; encoding: [0x42,0x01,0x13,0xf4,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}s_buffer_load_i8 s5, s[4:7], s0 nv
+// GFX12-ERR-NEXT:{{^}}                                ^
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vbuffer_mubuf.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vbuffer_mubuf.s
new file mode 100644
index 0000000000000..1d14bd91a7569
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vbuffer_mubuf.s
@@ -0,0 +1,20 @@
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s
+
+buffer_load_b32 v5, v1, s[8:11], s3 offen offset:4095 nv
+// GFX1250: buffer_load_b32 v5, v1, s[8:11], s3 offen offset:4095 nv ; encoding: [0x83,0x00,0x05,0xc4,0x05,0x10,0x80,0x40,0x01,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}buffer_load_b32 v5, v1, s[8:11], s3 offen offset:4095 nv
+// GFX12-ERR-NEXT:{{^}}                                                      ^
+
+buffer_store_b128 v[2:5], v0, s[12:15], s4 idxen offset:4095 nv
+// GFX1250: buffer_store_b128 v[2:5], v0, s[12:15], s4 idxen offset:4095 nv ; encoding: [0x84,0x40,0x07,0xc4,0x02,0x18,0x80,0x80,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}buffer_store_b128 v[2:5], v0, s[12:15], s4 idxen offset:4095 nv
+// GFX12-ERR-NEXT:{{^}}                                                             ^
+
+buffer_atomic_and_b32 v5, v1, s[8:11], s3 offen offset:4095 nv
+// GFX1250: buffer_atomic_and_b32 v5, v1, s[8:11], s3 offen offset:4095 nv ; encoding: [0x83,0x00,0x0f,0xc4,0x05,0x10,0x80,0x40,0x01,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}buffer_atomic_and_b32 v5, v1, s[8:11], s3 offen offset:4095 nv
+// GFX12-ERR-NEXT:{{^}}                                                            ^
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s
index 737d7b3de4e92..488040e1b5390 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s
@@ -1,6 +1,66 @@
 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s
 
+global_load_b32 v0, v[2:3], off nv
+// GFX1250: global_load_b32 v0, v[2:3], off nv      ; encoding: [0xfc,0x00,0x05,0xee,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}global_load_b32 v0, v[2:3], off nv
+// GFX12-ERR-NEXT:{{^}}                                ^
+
+global_store_b32 v[2:3], v0, off nv
+// GFX1250: global_store_b32 v[2:3], v0, off nv     ; encoding: [0xfc,0x80,0x06,0xee,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}global_store_b32 v[2:3], v0, off nv
+// GFX12-ERR-NEXT:{{^}}                                 ^
+
+global_atomic_add v[2:3], v2, off nv
+// GFX1250: global_atomic_add_u32 v[2:3], v2, off nv ; encoding: [0xfc,0x40,0x0d,0xee,0x00,0x00,0x00,0x01,0x02,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}global_atomic_add v[2:3], v2, off nv
+// GFX12-ERR-NEXT:{{^}}                                  ^
+
+global_load_addtid_b32 v5, s[2:3] nv
+// GFX1250: global_load_addtid_b32 v5, s[2:3] nv    ; encoding: [0x82,0x00,0x0a,0xee,0x05,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}global_load_addtid_b32 v5, s[2:3] nv
+// GFX12-ERR-NEXT:{{^}}                                  ^
+
+scratch_load_b32 v0, v2, off nv
+// GFX1250: scratch_load_b32 v0, v2, off nv         ; encoding: [0xfc,0x00,0x05,0xed,0x00,0x00,0x02,0x00,0x02,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}scratch_load_b32 v0, v2, off nv
+// GFX12-ERR-NEXT:{{^}}                             ^
+
+scratch_store_b32 v2, v0, off nv
+// GFX1250: scratch_store_b32 v2, v0, off nv        ; encoding: [0xfc,0x80,0x06,0xed,0x00,0x00,0x02,0x00,0x02,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}scratch_store_b32 v2, v0, off nv
+// GFX12-ERR-NEXT:{{^}}                              ^
+
+flat_load_b32 v0, v[2:3] nv
+// GFX1250: flat_load_b32 v0, v[2:3] nv             ; encoding: [0xfc,0x00,0x05,0xec,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}flat_load_b32 v0, v[2:3] nv
+// GFX12-ERR-NEXT:{{^}}                         ^
+
+flat_store_b32 v[2:3], v0 nv
+// GFX1250: flat_store_b32 v[2:3], v0 nv            ; encoding: [0xfc,0x80,0x06,0xec,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}flat_store_b32 v[2:3], v0 nv
+// GFX12-ERR-NEXT:{{^}}                          ^
+
+flat_atomic_add v[2:3], v2 nv
+// GFX1250: flat_atomic_add_u32 v[2:3], v2 nv       ; encoding: [0xfc,0x40,0x0d,0xec,0x00,0x00,0x00,0x01,0x02,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}flat_atomic_add v[2:3], v2 nv
+// GFX12-ERR-NEXT:{{^}}                           ^
+
+scratch_load_b32 v5, v2, off nv
+// GFX1250: scratch_load_b32 v5, v2, off nv         ; encoding: [0xfc,0x00,0x05,0xed,0x05,0x00,0x02,0x00,0x02,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}scratch_load_b32 v5, v2, off nv
+// GFX12-ERR-NEXT:{{^}}                             ^
+
 tensor_save s[0:1]
 // GFX1250: tensor_save s[0:1] ; encoding: [0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
 // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_smem.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_smem.txt
new file mode 100644
index 0000000000000..4bd9ab45af6bd
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_smem.txt
@@ -0,0 +1,7 @@
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s
+
+# GFX1250: s_buffer_load_i8 s5, s[4:7], s0 offset:0x0 nv ; encoding: [0x42,0x01,0x13,0xf4,0x00,0x00,0x00,0x00]
+0x42,0x01,0x13,0xf4,0x00,0x00,0x00,0x00
+
+# GFX1250: s_load_b32 s4, s[2:3], 0xa nv           ; encoding: [0x01,0x01,0x10,0xf4,0x0a,0x00,0x00,0xf8]
+0x01,0x01,0x10,0xf4,0x0a,0x00,0x00,0xf8
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vbuffer_mubuf.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vbuffer_mubuf.txt
new file mode 100644
index 0000000000000..a2f12115bb64b
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vbuffer_mubuf.txt
@@ -0,0 +1,10 @@
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s
+
+# GFX1250: buffer_atomic_and_b32 v5, v1, s[8:11], s3 offen offset:4095 nv ; encoding: [0x83,0x00,0x0f,0xc4,0x05,0x10,0x80,0x40,0x01,0xff,0x0f,0x00]
+0x83,0x00,0x0f,0xc4,0x05,0x10,0x80,0x40,0x01,0xff,0x0f,0x00
+
+# GFX1250: buffer_load_b32 v5, v1, s[8:11], s3 offen offset:4095 nv ; encoding: [0x83,0x00,0x05,0xc4,0x05,0x10,0x80,0x40,0x01,0xff,0x0f,0x00]
+0x83,0x00,0x05,0xc4,0x05,0x10,0x80,0x40,0x01,0xff,0x0f,0x00
+
+# GFX1250: buffer_store_b128 v[2:5], v0, s[12:15], s4 idxen offset:4095 nv ; encoding: [0x84,0x40,0x07,0xc4,0x02,0x18,0x80,0x80,0x00,0xff,0x0f,0x00]
+0x84,0x40,0x07,0xc4,0x02,0x18,0x80,0x80,0x00,0xff,0x0f,0x00
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt
index 55bc3e7a5746c..fcbb58bf7865e 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt
@@ -2826,6 +2826,36 @@
 # GFX1250: scratch_store_d16_hi_b8 v1, v2, s3      ; encoding: [0x03,0x00,0x09,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00]
 0x03,0x00,0x09,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00
 
+# GFX1250: flat_atomic_add_u32 v[2:3], v2 nv       ; encoding: [0xfc,0x40,0x0d,0xec,0x00,0x00,0x00,0x01,0x02,0x00,0x00,0x00]
+0xfc,0x40,0x0d,0xec,0x00,0x00,0x00,0x01,0x02,0x00,0x00,0x00
+
+# GFX1250: flat_load_b32 v0, v[2:3] nv             ; encoding: [0xfc,0x00,0x05,0xec,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
+0xfc,0x00,0x05,0xec,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00
+
+# GFX1250: flat_store_b32 v[2:3], v0 nv            ; encoding: [0xfc,0x80,0x06,0xec,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
+0xfc,0x80,0x06,0xec,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00
+
+# GFX1250: global_atomic_add_u32 v[2:3], v2, off nv ; encoding: [0xfc,0x40,0x0d,0xee,0x00,0x00,0x00,0x01,0x02,0x00,0x00,0x00]
+0xfc,0x40,0x0d,0xee,0x00,0x00,0x00,0x01,0x02,0x00,0x00,0x00
+
+# GFX1250: global_load_addtid_b32 v5, s[2:3] nv    ; encoding: [0x82,0x00,0x0a,0xee,0x05,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+0x82,0x00,0x0a,0xee,0x05,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+
+# GFX1250: global_load_b32 v0, v[2:3], off nv      ; encoding: [0xfc,0x00,0x05,0xee,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
+0xfc,0x00,0x05,0xee,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00
+
+# GFX1250: global_store_b32 v[2:3], v0, off nv     ; encoding: [0xfc,0x80,0x06,0xee,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
+0xfc,0x80,0x06,0xee,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00
+
+# GFX1250: scratch_load_b32 v0, v2, off nv         ; encoding: [0xfc,0x00,0x05,0xed,0x00,0x00,0x02,0x00,0x02,0x00,0x00,0x00]
+0xfc,0x00,0x05,0xed,0x00,0x00,0x02,0x00,0x02,0x00,0x00,0x00
+
+# GFX1250: scratch_store_b32 v2, v0, off nv        ; encoding: [0xfc,0x80,0x06,0xed,0x00,0x00,0x02,0x00,0x02,0x00,0x00,0x00]
+0xfc,0x80,0x06,0xed,0x00,0x00,0x02,0x00,0x02,0x00,0x00,0x00
+
+# GFX1250: scratch_load_b32 v5, v2, off nv         ; encoding: [0xfc,0x00,0x05,0xed,0x05,0x00,0x02,0x00,0x02,0x00,0x00,0x00]
+0xfc,0x00,0x05,0xed,0x05,0x00,0x02,0x00,0x02,0x00,0x00,0x00
+
 # GFX1250: tensor_save s[0:1] ; encoding: [0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
 0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
 

@rampitec rampitec requested a review from shiltian July 18, 2025 21:06
@rampitec rampitec merged commit 6d8e53d into main Jul 18, 2025
14 checks passed
@rampitec rampitec deleted the users/rampitec/07-18-_amdgpu_support_nv_memory_instructions_modifier_on_gfx1250 branch July 18, 2025 21:38
@llvm-ci
Copy link
Collaborator

llvm-ci commented Jul 18, 2025

LLVM Buildbot has detected a new failure on builder ml-opt-devrel-x86-64 running on ml-opt-devrel-x86-64-b2 while building llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/175/builds/22572

Here is the relevant piece of the build log for the reference
Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM :: CodeGen/NVPTX/f16x2-instructions.ll' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
/b/ml-opt-devrel-x86-64-b1/build/bin/llc < /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53           -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs  | /b/ml-opt-devrel-x86-64-b1/build/bin/FileCheck -allow-deprecated-dag-overlap -check-prefixes CHECK,CHECK-F16 /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll # RUN: at line 3
+ /b/ml-opt-devrel-x86-64-b1/build/bin/llc -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs
+ /b/ml-opt-devrel-x86-64-b1/build/bin/FileCheck -allow-deprecated-dag-overlap -check-prefixes CHECK,CHECK-F16 /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
/b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll:1507:15: error: CHECK-NEXT: expected string not found in input
; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fptrunc_2xfloat_param_0];
              ^
<stdin>:960:10: note: scanning from here
// %bb.0:
         ^
<stdin>:961:2: note: possible intended match here
 ld.param.b64 %rd1, [test_fptrunc_2xfloat_param_0];
 ^

Input file: <stdin>
Check file: /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll

-dump-input=help explains the following input dump.

Input was:
<<<<<<
             .
             .
             .
           955: { 
           956:  .reg .b16 %rs<3>; 
           957:  .reg .b32 %r<4>; 
           958:  .reg .b64 %rd<2>; 
           959:  
           960: // %bb.0: 
next:1507'0              X error: no match found
           961:  ld.param.b64 %rd1, [test_fptrunc_2xfloat_param_0]; 
next:1507'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
next:1507'1      ?                                                   possible intended match
           962:  mov.b64 {%r1, %r2}, %rd1; 
next:1507'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~
           963:  cvt.rn.f16.f32 %rs1, %r2; 
next:1507'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~
           964:  cvt.rn.f16.f32 %rs2, %r1; 
next:1507'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~
           965:  mov.b32 %r3, {%rs2, %rs1}; 
next:1507'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           966:  st.param.b32 [func_retval0], %r3; 
next:1507'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
             .
             .
...

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:AMDGPU llvm:mc Machine (object) code
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants