diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 7aa06f9079b09..4e7002feea215 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -731,6 +731,8 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, setTruncStoreAction(MVT::f32, MVT::bf16, Expand); setTruncStoreAction(MVT::f64, MVT::bf16, Expand); setTruncStoreAction(MVT::f64, MVT::f32, Expand); + setTruncStoreAction(MVT::v2f32, MVT::v2f16, Expand); + setTruncStoreAction(MVT::v2f32, MVT::v2bf16, Expand); // PTX does not support load / store predicate registers setOperationAction(ISD::LOAD, MVT::i1, Custom); diff --git a/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll index e2a914d8cfc36..ba5813c869236 100644 --- a/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll @@ -359,11 +359,12 @@ define <2 x bfloat> @test_select_cc_bf16_f32(<2 x bfloat> %a, <2 x bfloat> %b, define <2 x bfloat> @test_fptrunc_2xfloat(<2 x float> %a) #0 { ; CHECK-LABEL: test_fptrunc_2xfloat( ; CHECK: { -; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-NEXT: .reg .b32 %r<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b64 %rd1, [test_fptrunc_2xfloat_param_0]; -; CHECK-NEXT: st.param.b32 [func_retval0], %rd1; +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fptrunc_2xfloat_param_0]; +; CHECK-NEXT: cvt.rn.bf16x2.f32 %r3, %r2, %r1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-NEXT: ret; %r = fptrunc <2 x float> %a to <2 x bfloat> ret <2 x bfloat> %r diff --git a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll index d0e2c1817f696..c765e34f3153f 100644 --- a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll @@ -1433,11 +1433,16 @@ define <2 x half> @test_sitofp_2xi32_fadd(<2 x i32> %a, <2 x half> %b) #0 { define <2 x half> @test_fptrunc_2xfloat(<2 x float> %a) #0 { ; CHECK-LABEL: test_fptrunc_2xfloat( ; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<4>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b64 %rd1, [test_fptrunc_2xfloat_param_0]; -; CHECK-NEXT: st.param.b32 [func_retval0], %rd1; +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fptrunc_2xfloat_param_0]; +; CHECK-NEXT: cvt.rn.f16.f32 %rs1, %r2; +; CHECK-NEXT: cvt.rn.f16.f32 %rs2, %r1; +; CHECK-NEXT: mov.b32 %r3, {%rs2, %rs1}; +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-NEXT: ret; %r = fptrunc <2 x float> %a to <2 x half> ret <2 x half> %r diff --git a/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll index af3cb63082e78..f24428ebcfb8c 100644 --- a/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll @@ -1957,6 +1957,41 @@ define <2 x float> @test_uitofp_2xi32_to_2xfloat(<2 x i32> %a) #0 { ret <2 x float> %r } +define void @test_trunc_to_v2bf16(<2 x float> %a, ptr %p) { +; CHECK-LABEL: test_trunc_to_v2bf16( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd2, [test_trunc_to_v2bf16_param_1]; +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_trunc_to_v2bf16_param_0]; +; CHECK-NEXT: cvt.rn.bf16x2.f32 %r3, %r2, %r1; +; CHECK-NEXT: st.b32 [%rd2], %r3; +; CHECK-NEXT: ret; + %trunc = fptrunc <2 x float> %a to <2 x bfloat> + store <2 x bfloat> %trunc, ptr %p + ret void +} + +define void @test_trunc_to_v2f16(<2 x float> %a, ptr %p) { +; CHECK-LABEL: test_trunc_to_v2f16( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd2, [test_trunc_to_v2f16_param_1]; +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_trunc_to_v2f16_param_0]; +; CHECK-NEXT: cvt.rn.f16x2.f32 %r3, %r2, %r1; +; CHECK-NEXT: st.b32 [%rd2], %r3; +; CHECK-NEXT: ret; + %trunc = fptrunc <2 x float> %a to <2 x half> + store <2 x half> %trunc, ptr %p + ret void +} + + attributes #0 = { nounwind } attributes #1 = { "unsafe-fp-math" = "true" } attributes #2 = { "denormal-fp-math"="preserve-sign" }