diff --git a/llvm/test/CodeGen/AArch64/replace-with-veclib-armpl.ll b/llvm/test/CodeGen/AArch64/replace-with-veclib-armpl.ll index 4480a90a2728d..758df0493cc50 100644 --- a/llvm/test/CodeGen/AArch64/replace-with-veclib-armpl.ll +++ b/llvm/test/CodeGen/AArch64/replace-with-veclib-armpl.ll @@ -15,7 +15,7 @@ declare @llvm.cos.nxv2f64() declare @llvm.cos.nxv4f32() ;. -; CHECK: @llvm.compiler.used = appending global [36 x ptr] [ptr @armpl_vcosq_f64, ptr @armpl_vcosq_f32, ptr @armpl_svcos_f64_x, ptr @armpl_svcos_f32_x, ptr @armpl_vsinq_f64, ptr @armpl_vsinq_f32, ptr @armpl_svsin_f64_x, ptr @armpl_svsin_f32_x, ptr @armpl_vexpq_f64, ptr @armpl_vexpq_f32, ptr @armpl_svexp_f64_x, ptr @armpl_svexp_f32_x, ptr @armpl_vexp2q_f64, ptr @armpl_vexp2q_f32, ptr @armpl_svexp2_f64_x, ptr @armpl_svexp2_f32_x, ptr @armpl_vexp10q_f64, ptr @armpl_vexp10q_f32, ptr @armpl_svexp10_f64_x, ptr @armpl_svexp10_f32_x, ptr @armpl_vlogq_f64, ptr @armpl_vlogq_f32, ptr @armpl_svlog_f64_x, ptr @armpl_svlog_f32_x, ptr @armpl_vlog2q_f64, ptr @armpl_vlog2q_f32, ptr @armpl_svlog2_f64_x, ptr @armpl_svlog2_f32_x, ptr @armpl_vlog10q_f64, ptr @armpl_vlog10q_f32, ptr @armpl_svlog10_f64_x, ptr @armpl_svlog10_f32_x, ptr @armpl_vfmodq_f64, ptr @armpl_vfmodq_f32, ptr @armpl_svfmod_f64_x, ptr @armpl_svfmod_f32_x], section "llvm.metadata" +; CHECK: @llvm.compiler.used = appending global [36 x ptr] [ptr @armpl_vcosq_f64, ptr @armpl_vcosq_f32, ptr @armpl_svcos_f64_x, ptr @armpl_svcos_f32_x, ptr @armpl_vexpq_f64, ptr @armpl_vexpq_f32, ptr @armpl_svexp_f64_x, ptr @armpl_svexp_f32_x, ptr @armpl_vexp10q_f64, ptr @armpl_vexp10q_f32, ptr @armpl_svexp10_f64_x, ptr @armpl_svexp10_f32_x, ptr @armpl_vexp2q_f64, ptr @armpl_vexp2q_f32, ptr @armpl_svexp2_f64_x, ptr @armpl_svexp2_f32_x, ptr @armpl_vlogq_f64, ptr @armpl_vlogq_f32, ptr @armpl_svlog_f64_x, ptr @armpl_svlog_f32_x, ptr @armpl_vlog10q_f64, ptr @armpl_vlog10q_f32, ptr @armpl_svlog10_f64_x, ptr @armpl_svlog10_f32_x, ptr @armpl_vlog2q_f64, ptr @armpl_vlog2q_f32, ptr @armpl_svlog2_f64_x, ptr @armpl_svlog2_f32_x, ptr @armpl_vsinq_f64, ptr @armpl_vsinq_f32, ptr @armpl_svsin_f64_x, ptr @armpl_svsin_f32_x, ptr @armpl_vfmodq_f64, ptr @armpl_vfmodq_f32, ptr @armpl_svfmod_f64_x, ptr @armpl_svfmod_f32_x], section "llvm.metadata" ;. define <2 x double> @llvm_cos_f64(<2 x double> %in) { ; CHECK-LABEL: define <2 x double> @llvm_cos_f64 @@ -57,51 +57,6 @@ define @llvm_cos_vscale_f32( %in) #0 { ret %1 } -declare <2 x double> @llvm.sin.v2f64(<2 x double>) -declare <4 x float> @llvm.sin.v4f32(<4 x float>) -declare @llvm.sin.nxv2f64() -declare @llvm.sin.nxv4f32() - -define <2 x double> @llvm_sin_f64(<2 x double> %in) { -; CHECK-LABEL: define <2 x double> @llvm_sin_f64 -; CHECK-SAME: (<2 x double> [[IN:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @armpl_vsinq_f64(<2 x double> [[IN]]) -; CHECK-NEXT: ret <2 x double> [[TMP1]] -; - %1 = call fast <2 x double> @llvm.sin.v2f64(<2 x double> %in) - ret <2 x double> %1 -} - -define <4 x float> @llvm_sin_f32(<4 x float> %in) { -; CHECK-LABEL: define <4 x float> @llvm_sin_f32 -; CHECK-SAME: (<4 x float> [[IN:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @armpl_vsinq_f32(<4 x float> [[IN]]) -; CHECK-NEXT: ret <4 x float> [[TMP1]] -; - %1 = call fast <4 x float> @llvm.sin.v4f32(<4 x float> %in) - ret <4 x float> %1 -} - -define @llvm_sin_vscale_f64( %in) #0 { -; CHECK-LABEL: define @llvm_sin_vscale_f64 -; CHECK-SAME: ( [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast @armpl_svsin_f64_x( [[IN]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = call fast @llvm.sin.nxv2f64( %in) - ret %1 -} - -define @llvm_sin_vscale_f32( %in) #0 { -; CHECK-LABEL: define @llvm_sin_vscale_f32 -; CHECK-SAME: ( [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast @armpl_svsin_f32_x( [[IN]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = call fast @llvm.sin.nxv4f32( %in) - ret %1 -} - declare <2 x double> @llvm.exp.v2f64(<2 x double>) declare <4 x float> @llvm.exp.v4f32(<4 x float>) declare @llvm.exp.nxv2f64() @@ -147,93 +102,93 @@ define @llvm_exp_vscale_f32( %in) #0 { ret %1 } -declare <2 x double> @llvm.exp2.v2f64(<2 x double>) -declare <4 x float> @llvm.exp2.v4f32(<4 x float>) -declare @llvm.exp2.nxv2f64() -declare @llvm.exp2.nxv4f32() +declare <2 x double> @llvm.exp10.v2f64(<2 x double>) +declare <4 x float> @llvm.exp10.v4f32(<4 x float>) +declare @llvm.exp10.nxv2f64() +declare @llvm.exp10.nxv4f32() -define <2 x double> @llvm_exp2_f64(<2 x double> %in) { -; CHECK-LABEL: define <2 x double> @llvm_exp2_f64 +define <2 x double> @llvm_exp10_f64(<2 x double> %in) { +; CHECK-LABEL: define <2 x double> @llvm_exp10_f64 ; CHECK-SAME: (<2 x double> [[IN:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @armpl_vexp2q_f64(<2 x double> [[IN]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @armpl_vexp10q_f64(<2 x double> [[IN]]) ; CHECK-NEXT: ret <2 x double> [[TMP1]] ; - %1 = call fast <2 x double> @llvm.exp2.v2f64(<2 x double> %in) + %1 = call fast <2 x double> @llvm.exp10.v2f64(<2 x double> %in) ret <2 x double> %1 } -define <4 x float> @llvm_exp2_f32(<4 x float> %in) { -; CHECK-LABEL: define <4 x float> @llvm_exp2_f32 +define <4 x float> @llvm_exp10_f32(<4 x float> %in) { +; CHECK-LABEL: define <4 x float> @llvm_exp10_f32 ; CHECK-SAME: (<4 x float> [[IN:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @armpl_vexp2q_f32(<4 x float> [[IN]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @armpl_vexp10q_f32(<4 x float> [[IN]]) ; CHECK-NEXT: ret <4 x float> [[TMP1]] ; - %1 = call fast <4 x float> @llvm.exp2.v4f32(<4 x float> %in) + %1 = call fast <4 x float> @llvm.exp10.v4f32(<4 x float> %in) ret <4 x float> %1 } -define @llvm_exp2_vscale_f64( %in) #0 { -; CHECK-LABEL: define @llvm_exp2_vscale_f64 +define @llvm_exp10_vscale_f64( %in) #0 { +; CHECK-LABEL: define @llvm_exp10_vscale_f64 ; CHECK-SAME: ( [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast @armpl_svexp2_f64_x( [[IN]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast @armpl_svexp10_f64_x( [[IN]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; - %1 = call fast @llvm.exp2.nxv2f64( %in) + %1 = call fast @llvm.exp10.nxv2f64( %in) ret %1 } -define @llvm_exp2_vscale_f32( %in) #0 { -; CHECK-LABEL: define @llvm_exp2_vscale_f32 +define @llvm_exp10_vscale_f32( %in) #0 { +; CHECK-LABEL: define @llvm_exp10_vscale_f32 ; CHECK-SAME: ( [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast @armpl_svexp2_f32_x( [[IN]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast @armpl_svexp10_f32_x( [[IN]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; - %1 = call fast @llvm.exp2.nxv4f32( %in) + %1 = call fast @llvm.exp10.nxv4f32( %in) ret %1 } -declare <2 x double> @llvm.exp10.v2f64(<2 x double>) -declare <4 x float> @llvm.exp10.v4f32(<4 x float>) -declare @llvm.exp10.nxv2f64() -declare @llvm.exp10.nxv4f32() +declare <2 x double> @llvm.exp2.v2f64(<2 x double>) +declare <4 x float> @llvm.exp2.v4f32(<4 x float>) +declare @llvm.exp2.nxv2f64() +declare @llvm.exp2.nxv4f32() -define <2 x double> @llvm_exp10_f64(<2 x double> %in) { -; CHECK-LABEL: define <2 x double> @llvm_exp10_f64 +define <2 x double> @llvm_exp2_f64(<2 x double> %in) { +; CHECK-LABEL: define <2 x double> @llvm_exp2_f64 ; CHECK-SAME: (<2 x double> [[IN:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @armpl_vexp10q_f64(<2 x double> [[IN]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @armpl_vexp2q_f64(<2 x double> [[IN]]) ; CHECK-NEXT: ret <2 x double> [[TMP1]] ; - %1 = call fast <2 x double> @llvm.exp10.v2f64(<2 x double> %in) + %1 = call fast <2 x double> @llvm.exp2.v2f64(<2 x double> %in) ret <2 x double> %1 } -define <4 x float> @llvm_exp10_f32(<4 x float> %in) { -; CHECK-LABEL: define <4 x float> @llvm_exp10_f32 +define <4 x float> @llvm_exp2_f32(<4 x float> %in) { +; CHECK-LABEL: define <4 x float> @llvm_exp2_f32 ; CHECK-SAME: (<4 x float> [[IN:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @armpl_vexp10q_f32(<4 x float> [[IN]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @armpl_vexp2q_f32(<4 x float> [[IN]]) ; CHECK-NEXT: ret <4 x float> [[TMP1]] ; - %1 = call fast <4 x float> @llvm.exp10.v4f32(<4 x float> %in) + %1 = call fast <4 x float> @llvm.exp2.v4f32(<4 x float> %in) ret <4 x float> %1 } -define @llvm_exp10_vscale_f64( %in) #0 { -; CHECK-LABEL: define @llvm_exp10_vscale_f64 +define @llvm_exp2_vscale_f64( %in) #0 { +; CHECK-LABEL: define @llvm_exp2_vscale_f64 ; CHECK-SAME: ( [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast @armpl_svexp10_f64_x( [[IN]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast @armpl_svexp2_f64_x( [[IN]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; - %1 = call fast @llvm.exp10.nxv2f64( %in) + %1 = call fast @llvm.exp2.nxv2f64( %in) ret %1 } -define @llvm_exp10_vscale_f32( %in) #0 { -; CHECK-LABEL: define @llvm_exp10_vscale_f32 +define @llvm_exp2_vscale_f32( %in) #0 { +; CHECK-LABEL: define @llvm_exp2_vscale_f32 ; CHECK-SAME: ( [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast @armpl_svexp10_f32_x( [[IN]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast @armpl_svexp2_f32_x( [[IN]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; - %1 = call fast @llvm.exp10.nxv4f32( %in) + %1 = call fast @llvm.exp2.nxv4f32( %in) ret %1 } @@ -282,93 +237,93 @@ define @llvm_log_vscale_f32( %in) #0 { ret %1 } -declare <2 x double> @llvm.log2.v2f64(<2 x double>) -declare <4 x float> @llvm.log2.v4f32(<4 x float>) -declare @llvm.log2.nxv2f64() -declare @llvm.log2.nxv4f32() +declare <2 x double> @llvm.log10.v2f64(<2 x double>) +declare <4 x float> @llvm.log10.v4f32(<4 x float>) +declare @llvm.log10.nxv2f64() +declare @llvm.log10.nxv4f32() -define <2 x double> @llvm_log2_f64(<2 x double> %in) { -; CHECK-LABEL: define <2 x double> @llvm_log2_f64 +define <2 x double> @llvm_log10_f64(<2 x double> %in) { +; CHECK-LABEL: define <2 x double> @llvm_log10_f64 ; CHECK-SAME: (<2 x double> [[IN:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @armpl_vlog2q_f64(<2 x double> [[IN]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @armpl_vlog10q_f64(<2 x double> [[IN]]) ; CHECK-NEXT: ret <2 x double> [[TMP1]] ; - %1 = call fast <2 x double> @llvm.log2.v2f64(<2 x double> %in) + %1 = call fast <2 x double> @llvm.log10.v2f64(<2 x double> %in) ret <2 x double> %1 } -define <4 x float> @llvm_log2_f32(<4 x float> %in) { -; CHECK-LABEL: define <4 x float> @llvm_log2_f32 +define <4 x float> @llvm_log10_f32(<4 x float> %in) { +; CHECK-LABEL: define <4 x float> @llvm_log10_f32 ; CHECK-SAME: (<4 x float> [[IN:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @armpl_vlog2q_f32(<4 x float> [[IN]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @armpl_vlog10q_f32(<4 x float> [[IN]]) ; CHECK-NEXT: ret <4 x float> [[TMP1]] ; - %1 = call fast <4 x float> @llvm.log2.v4f32(<4 x float> %in) + %1 = call fast <4 x float> @llvm.log10.v4f32(<4 x float> %in) ret <4 x float> %1 } -define @llvm_log2_vscale_f64( %in) #0 { -; CHECK-LABEL: define @llvm_log2_vscale_f64 +define @llvm_log10_vscale_f64( %in) #0 { +; CHECK-LABEL: define @llvm_log10_vscale_f64 ; CHECK-SAME: ( [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast @armpl_svlog2_f64_x( [[IN]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast @armpl_svlog10_f64_x( [[IN]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; - %1 = call fast @llvm.log2.nxv2f64( %in) + %1 = call fast @llvm.log10.nxv2f64( %in) ret %1 } -define @llvm_log2_vscale_f32( %in) #0 { -; CHECK-LABEL: define @llvm_log2_vscale_f32 +define @llvm_log10_vscale_f32( %in) #0 { +; CHECK-LABEL: define @llvm_log10_vscale_f32 ; CHECK-SAME: ( [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast @armpl_svlog2_f32_x( [[IN]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast @armpl_svlog10_f32_x( [[IN]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; - %1 = call fast @llvm.log2.nxv4f32( %in) + %1 = call fast @llvm.log10.nxv4f32( %in) ret %1 } -declare <2 x double> @llvm.log10.v2f64(<2 x double>) -declare <4 x float> @llvm.log10.v4f32(<4 x float>) -declare @llvm.log10.nxv2f64() -declare @llvm.log10.nxv4f32() +declare <2 x double> @llvm.log2.v2f64(<2 x double>) +declare <4 x float> @llvm.log2.v4f32(<4 x float>) +declare @llvm.log2.nxv2f64() +declare @llvm.log2.nxv4f32() -define <2 x double> @llvm_log10_f64(<2 x double> %in) { -; CHECK-LABEL: define <2 x double> @llvm_log10_f64 +define <2 x double> @llvm_log2_f64(<2 x double> %in) { +; CHECK-LABEL: define <2 x double> @llvm_log2_f64 ; CHECK-SAME: (<2 x double> [[IN:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @armpl_vlog10q_f64(<2 x double> [[IN]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @armpl_vlog2q_f64(<2 x double> [[IN]]) ; CHECK-NEXT: ret <2 x double> [[TMP1]] ; - %1 = call fast <2 x double> @llvm.log10.v2f64(<2 x double> %in) + %1 = call fast <2 x double> @llvm.log2.v2f64(<2 x double> %in) ret <2 x double> %1 } -define <4 x float> @llvm_log10_f32(<4 x float> %in) { -; CHECK-LABEL: define <4 x float> @llvm_log10_f32 +define <4 x float> @llvm_log2_f32(<4 x float> %in) { +; CHECK-LABEL: define <4 x float> @llvm_log2_f32 ; CHECK-SAME: (<4 x float> [[IN:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @armpl_vlog10q_f32(<4 x float> [[IN]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @armpl_vlog2q_f32(<4 x float> [[IN]]) ; CHECK-NEXT: ret <4 x float> [[TMP1]] ; - %1 = call fast <4 x float> @llvm.log10.v4f32(<4 x float> %in) + %1 = call fast <4 x float> @llvm.log2.v4f32(<4 x float> %in) ret <4 x float> %1 } -define @llvm_log10_vscale_f64( %in) #0 { -; CHECK-LABEL: define @llvm_log10_vscale_f64 +define @llvm_log2_vscale_f64( %in) #0 { +; CHECK-LABEL: define @llvm_log2_vscale_f64 ; CHECK-SAME: ( [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast @armpl_svlog10_f64_x( [[IN]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast @armpl_svlog2_f64_x( [[IN]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; - %1 = call fast @llvm.log10.nxv2f64( %in) + %1 = call fast @llvm.log2.nxv2f64( %in) ret %1 } -define @llvm_log10_vscale_f32( %in) #0 { -; CHECK-LABEL: define @llvm_log10_vscale_f32 +define @llvm_log2_vscale_f32( %in) #0 { +; CHECK-LABEL: define @llvm_log2_vscale_f32 ; CHECK-SAME: ( [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast @armpl_svlog10_f32_x( [[IN]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast @armpl_svlog2_f32_x( [[IN]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; - %1 = call fast @llvm.log10.nxv4f32( %in) + %1 = call fast @llvm.log2.nxv4f32( %in) ret %1 } @@ -424,6 +379,52 @@ define @llvm_pow_vscale_f32( %in, %1 } +declare <2 x double> @llvm.sin.v2f64(<2 x double>) +declare <4 x float> @llvm.sin.v4f32(<4 x float>) +declare @llvm.sin.nxv2f64() +declare @llvm.sin.nxv4f32() + +define <2 x double> @llvm_sin_f64(<2 x double> %in) { +; CHECK-LABEL: define <2 x double> @llvm_sin_f64 +; CHECK-SAME: (<2 x double> [[IN:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @armpl_vsinq_f64(<2 x double> [[IN]]) +; CHECK-NEXT: ret <2 x double> [[TMP1]] +; + %1 = call fast <2 x double> @llvm.sin.v2f64(<2 x double> %in) + ret <2 x double> %1 +} + +define <4 x float> @llvm_sin_f32(<4 x float> %in) { +; CHECK-LABEL: define <4 x float> @llvm_sin_f32 +; CHECK-SAME: (<4 x float> [[IN:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @armpl_vsinq_f32(<4 x float> [[IN]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; + %1 = call fast <4 x float> @llvm.sin.v4f32(<4 x float> %in) + ret <4 x float> %1 +} + +define @llvm_sin_vscale_f64( %in) #0 { +; CHECK-LABEL: define @llvm_sin_vscale_f64 +; CHECK-SAME: ( [[IN:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = call fast @armpl_svsin_f64_x( [[IN]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.sin.nxv2f64( %in) + ret %1 +} + +define @llvm_sin_vscale_f32( %in) #0 { +; CHECK-LABEL: define @llvm_sin_vscale_f32 +; CHECK-SAME: ( [[IN:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = call fast @armpl_svsin_f32_x( [[IN]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.sin.nxv4f32( %in) + ret %1 +} + + define <2 x double> @frem_f64(<2 x double> %in) { ; CHECK-LABEL: define <2 x double> @frem_f64 ; CHECK-SAME: (<2 x double> [[IN:%.*]]) { diff --git a/llvm/test/CodeGen/AArch64/replace-with-veclib-sleef-scalable.ll b/llvm/test/CodeGen/AArch64/replace-with-veclib-sleef-scalable.ll index 590dd9effac0e..9e214655e413a 100644 --- a/llvm/test/CodeGen/AArch64/replace-with-veclib-sleef-scalable.ll +++ b/llvm/test/CodeGen/AArch64/replace-with-veclib-sleef-scalable.ll @@ -4,7 +4,7 @@ target triple = "aarch64-unknown-linux-gnu" ;. -; CHECK: @llvm.compiler.used = appending global [18 x ptr] [ptr @_ZGVsMxv_cos, ptr @_ZGVsMxv_cosf, ptr @_ZGVsMxv_exp, ptr @_ZGVsMxv_expf, ptr @_ZGVsMxv_exp2, ptr @_ZGVsMxv_exp2f, ptr @_ZGVsMxv_exp10, ptr @_ZGVsMxv_exp10f, ptr @_ZGVsMxv_log, ptr @_ZGVsMxv_logf, ptr @_ZGVsMxv_log10, ptr @_ZGVsMxv_log10f, ptr @_ZGVsMxv_log2, ptr @_ZGVsMxv_log2f, ptr @_ZGVsMxv_sin, ptr @_ZGVsMxv_sinf, ptr @_ZGVsMxvv_fmod, ptr @_ZGVsMxvv_fmodf], section "llvm.metadata" +; CHECK: @llvm.compiler.used = appending global [18 x ptr] [ptr @_ZGVsMxv_cos, ptr @_ZGVsMxv_cosf, ptr @_ZGVsMxv_exp, ptr @_ZGVsMxv_expf, ptr @_ZGVsMxv_exp10, ptr @_ZGVsMxv_exp10f, ptr @_ZGVsMxv_exp2, ptr @_ZGVsMxv_exp2f, ptr @_ZGVsMxv_log, ptr @_ZGVsMxv_logf, ptr @_ZGVsMxv_log10, ptr @_ZGVsMxv_log10f, ptr @_ZGVsMxv_log2, ptr @_ZGVsMxv_log2f, ptr @_ZGVsMxv_sin, ptr @_ZGVsMxv_sinf, ptr @_ZGVsMxvv_fmod, ptr @_ZGVsMxvv_fmodf], section "llvm.metadata" ;. define @llvm_ceil_vscale_f64( %in) { ; CHECK-LABEL: @llvm_ceil_vscale_f64( @@ -78,39 +78,39 @@ define @llvm_exp_vscale_f32( %in) { ret %1 } -define @llvm_exp2_vscale_f64( %in) { -; CHECK-LABEL: @llvm_exp2_vscale_f64( -; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_exp2( [[IN:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +define @llvm_exp10_vscale_f64( %in) { +; CHECK-LABEL: @llvm_exp10_vscale_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_exp10( [[IN:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; - %1 = call fast @llvm.exp2.nxv2f64( %in) + %1 = call fast @llvm.exp10.nxv2f64( %in) ret %1 } -define @llvm_exp2_vscale_f32( %in) { -; CHECK-LABEL: @llvm_exp2_vscale_f32( -; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_exp2f( [[IN:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +define @llvm_exp10_vscale_f32( %in) { +; CHECK-LABEL: @llvm_exp10_vscale_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_exp10f( [[IN:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; - %1 = call fast @llvm.exp2.nxv4f32( %in) + %1 = call fast @llvm.exp10.nxv4f32( %in) ret %1 } -define @llvm_exp10_vscale_f64( %in) { -; CHECK-LABEL: @llvm_exp10_vscale_f64( -; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_exp10( [[IN:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +define @llvm_exp2_vscale_f64( %in) { +; CHECK-LABEL: @llvm_exp2_vscale_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_exp2( [[IN:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; - %1 = call fast @llvm.exp10.nxv2f64( %in) + %1 = call fast @llvm.exp2.nxv2f64( %in) ret %1 } -define @llvm_exp10_vscale_f32( %in) { -; CHECK-LABEL: @llvm_exp10_vscale_f32( -; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_exp10f( [[IN:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +define @llvm_exp2_vscale_f32( %in) { +; CHECK-LABEL: @llvm_exp2_vscale_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_exp2f( [[IN:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; - %1 = call fast @llvm.exp10.nxv4f32( %in) + %1 = call fast @llvm.exp2.nxv4f32( %in) ret %1 } diff --git a/llvm/test/CodeGen/AArch64/replace-with-veclib-sleef.ll b/llvm/test/CodeGen/AArch64/replace-with-veclib-sleef.ll index 865a46009b205..f408df570fdc0 100644 --- a/llvm/test/CodeGen/AArch64/replace-with-veclib-sleef.ll +++ b/llvm/test/CodeGen/AArch64/replace-with-veclib-sleef.ll @@ -4,7 +4,7 @@ target triple = "aarch64-unknown-linux-gnu" ;. -; CHECK: @llvm.compiler.used = appending global [18 x ptr] [ptr @_ZGVnN2v_cos, ptr @_ZGVnN4v_cosf, ptr @_ZGVnN2v_exp, ptr @_ZGVnN4v_expf, ptr @_ZGVnN2v_exp2, ptr @_ZGVnN4v_exp2f, ptr @_ZGVnN2v_exp10, ptr @_ZGVnN4v_exp10f, ptr @_ZGVnN2v_log, ptr @_ZGVnN4v_logf, ptr @_ZGVnN2v_log10, ptr @_ZGVnN4v_log10f, ptr @_ZGVnN2v_log2, ptr @_ZGVnN4v_log2f, ptr @_ZGVnN2v_sin, ptr @_ZGVnN4v_sinf, ptr @_ZGVnN2vv_fmod, ptr @_ZGVnN4vv_fmodf], section "llvm.metadata" +; CHECK: @llvm.compiler.used = appending global [18 x ptr] [ptr @_ZGVnN2v_cos, ptr @_ZGVnN4v_cosf, ptr @_ZGVnN2v_exp, ptr @_ZGVnN4v_expf, ptr @_ZGVnN2v_exp10, ptr @_ZGVnN4v_exp10f, ptr @_ZGVnN2v_exp2, ptr @_ZGVnN4v_exp2f, ptr @_ZGVnN2v_log, ptr @_ZGVnN4v_logf, ptr @_ZGVnN2v_log10, ptr @_ZGVnN4v_log10f, ptr @_ZGVnN2v_log2, ptr @_ZGVnN4v_log2f, ptr @_ZGVnN2v_sin, ptr @_ZGVnN4v_sinf, ptr @_ZGVnN2vv_fmod, ptr @_ZGVnN4vv_fmodf], section "llvm.metadata" ;. define <2 x double> @llvm_ceil_f64(<2 x double> %in) { ; CHECK-LABEL: @llvm_ceil_f64( @@ -78,39 +78,39 @@ define <4 x float> @llvm_exp_f32(<4 x float> %in) { ret <4 x float> %1 } -define <2 x double> @llvm_exp2_f64(<2 x double> %in) { -; CHECK-LABEL: @llvm_exp2_f64( -; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_exp2(<2 x double> [[IN:%.*]]) +define <2 x double> @llvm_exp10_f64(<2 x double> %in) { +; CHECK-LABEL: @llvm_exp10_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_exp10(<2 x double> [[IN:%.*]]) ; CHECK-NEXT: ret <2 x double> [[TMP1]] ; - %1 = call fast <2 x double> @llvm.exp2.v2f64(<2 x double> %in) + %1 = call fast <2 x double> @llvm.exp10.v2f64(<2 x double> %in) ret <2 x double> %1 } -define <4 x float> @llvm_exp2_f32(<4 x float> %in) { -; CHECK-LABEL: @llvm_exp2_f32( -; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_exp2f(<4 x float> [[IN:%.*]]) +define <4 x float> @llvm_exp10_f32(<4 x float> %in) { +; CHECK-LABEL: @llvm_exp10_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_exp10f(<4 x float> [[IN:%.*]]) ; CHECK-NEXT: ret <4 x float> [[TMP1]] ; - %1 = call fast <4 x float> @llvm.exp2.v4f32(<4 x float> %in) + %1 = call fast <4 x float> @llvm.exp10.v4f32(<4 x float> %in) ret <4 x float> %1 } -define <2 x double> @llvm_exp10_f64(<2 x double> %in) { -; CHECK-LABEL: @llvm_exp10_f64( -; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_exp10(<2 x double> [[IN:%.*]]) +define <2 x double> @llvm_exp2_f64(<2 x double> %in) { +; CHECK-LABEL: @llvm_exp2_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_exp2(<2 x double> [[IN:%.*]]) ; CHECK-NEXT: ret <2 x double> [[TMP1]] ; - %1 = call fast <2 x double> @llvm.exp10.v2f64(<2 x double> %in) + %1 = call fast <2 x double> @llvm.exp2.v2f64(<2 x double> %in) ret <2 x double> %1 } -define <4 x float> @llvm_exp10_f32(<4 x float> %in) { -; CHECK-LABEL: @llvm_exp10_f32( -; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_exp10f(<4 x float> [[IN:%.*]]) +define <4 x float> @llvm_exp2_f32(<4 x float> %in) { +; CHECK-LABEL: @llvm_exp2_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_exp2f(<4 x float> [[IN:%.*]]) ; CHECK-NEXT: ret <4 x float> [[TMP1]] ; - %1 = call fast <4 x float> @llvm.exp10.v4f32(<4 x float> %in) + %1 = call fast <4 x float> @llvm.exp2.v4f32(<4 x float> %in) ret <4 x float> %1 } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll index e30173a588818..2eb7151ffe31a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll @@ -376,6 +376,79 @@ define void @atan_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } +declare double @atan2(double, double) +declare float @atan2f(float, float) + +define void @atan2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @atan2_f64 +; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2vv_atan2(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) +; +; SLEEF-SVE-LABEL: define void @atan2_f64 +; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxvv_atan2( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) +; +; ARMPL-NEON-LABEL: define void @atan2_f64 +; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vatan2q_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) +; +; ARMPL-SVE-LABEL: define void @atan2_f64 +; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svatan2_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv + %in = load double, ptr %in.gep, align 8 + %call = tail call double @atan2(double %in, double %in) + %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv + store double %call, ptr %out.gep, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @atan2_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @atan2_f32 +; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4vv_atan2f(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) +; +; SLEEF-SVE-LABEL: define void @atan2_f32 +; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxvv_atan2f( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) +; +; ARMPL-NEON-LABEL: define void @atan2_f32 +; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vatan2q_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) +; +; ARMPL-SVE-LABEL: define void @atan2_f32 +; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svatan2_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv + %in = load float, ptr %in.gep, align 8 + %call = tail call float @atan2f(float %in, float %in) + %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv + store float %call, ptr %out.gep, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + declare double @atanh(double) declare float @atanhf(float) @@ -522,6 +595,79 @@ define void @cbrt_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } +declare double @copysign(double, double) +declare float @copysignf(float, float) + +define void @copysign_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @copysign_f64 +; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; SLEEF-NEON: [[CALL:%.*]] = tail call double @copysign(double [[IN:%.*]], double [[IN]]) +; +; SLEEF-SVE-LABEL: define void @copysign_f64 +; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; SLEEF-SVE: [[CALL:%.*]] = tail call double @copysign(double [[IN:%.*]], double [[IN]]) +; +; ARMPL-NEON-LABEL: define void @copysign_f64 +; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vcopysignq_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) +; +; ARMPL-SVE-LABEL: define void @copysign_f64 +; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svcopysign_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv + %in = load double, ptr %in.gep, align 8 + %call = tail call double @copysign(double %in, double %in) + %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv + store double %call, ptr %out.gep, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @copysign_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @copysign_f32 +; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; SLEEF-NEON: [[CALL:%.*]] = tail call float @copysignf(float [[IN:%.*]], float [[IN]]) +; +; SLEEF-SVE-LABEL: define void @copysign_f32 +; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; SLEEF-SVE: [[CALL:%.*]] = tail call float @copysignf(float [[IN:%.*]], float [[IN]]) +; +; ARMPL-NEON-LABEL: define void @copysign_f32 +; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vcopysignq_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) +; +; ARMPL-SVE-LABEL: define void @copysign_f32 +; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svcopysign_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv + %in = load float, ptr %in.gep, align 8 + %call = tail call float @copysignf(float %in, float %in) + %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv + store float %call, ptr %out.gep, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + declare double @cos(double) declare float @cosf(float) @@ -887,25 +1033,25 @@ define void @exp_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } -declare double @exp2(double) -declare float @exp2f(float) +declare double @exp10(double) +declare float @exp10f(float) -define void @exp2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { -; SLEEF-NEON-LABEL: define void @exp2_f64 +define void @exp10_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @exp10_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_exp2(<2 x double> [[WIDE_LOAD:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_exp10(<2 x double> [[WIDE_LOAD:%.*]]) ; -; SLEEF-SVE-LABEL: define void @exp2_f64 +; SLEEF-SVE-LABEL: define void @exp10_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_exp2( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_exp10( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; -; ARMPL-NEON-LABEL: define void @exp2_f64 +; ARMPL-NEON-LABEL: define void @exp10_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vexp2q_f64(<2 x double> [[WIDE_LOAD:%.*]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vexp10q_f64(<2 x double> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @exp2_f64 +; ARMPL-SVE-LABEL: define void @exp10_f64 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svexp2_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svexp10_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -914,7 +1060,7 @@ define void @exp2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv %in = load double, ptr %in.gep, align 8 - %call = tail call double @exp2(double %in) + %call = tail call double @exp10(double %in) %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv store double %call, ptr %out.gep, align 8 %iv.next = add nuw nsw i64 %iv, 1 @@ -925,22 +1071,22 @@ define void @exp2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } -define void @exp2_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { -; SLEEF-NEON-LABEL: define void @exp2_f32 +define void @exp10_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @exp10_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_exp2f(<4 x float> [[WIDE_LOAD:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_exp10f(<4 x float> [[WIDE_LOAD:%.*]]) ; -; SLEEF-SVE-LABEL: define void @exp2_f32 +; SLEEF-SVE-LABEL: define void @exp10_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_exp2f( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_exp10f( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; -; ARMPL-NEON-LABEL: define void @exp2_f32 +; ARMPL-NEON-LABEL: define void @exp10_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vexp2q_f32(<4 x float> [[WIDE_LOAD:%.*]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vexp10q_f32(<4 x float> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @exp2_f32 +; ARMPL-SVE-LABEL: define void @exp10_f32 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svexp2_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svexp10_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -949,7 +1095,7 @@ define void @exp2_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv %in = load float, ptr %in.gep, align 8 - %call = tail call float @exp2f(float %in) + %call = tail call float @exp10f(float %in) %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv store float %call, ptr %out.gep, align 4 %iv.next = add nuw nsw i64 %iv, 1 @@ -960,25 +1106,25 @@ define void @exp2_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } -declare double @exp10(double) -declare float @exp10f(float) +declare double @exp2(double) +declare float @exp2f(float) -define void @exp10_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { -; SLEEF-NEON-LABEL: define void @exp10_f64 +define void @exp2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @exp2_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_exp10(<2 x double> [[WIDE_LOAD:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_exp2(<2 x double> [[WIDE_LOAD:%.*]]) ; -; SLEEF-SVE-LABEL: define void @exp10_f64 +; SLEEF-SVE-LABEL: define void @exp2_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_exp10( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_exp2( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; -; ARMPL-NEON-LABEL: define void @exp10_f64 +; ARMPL-NEON-LABEL: define void @exp2_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vexp10q_f64(<2 x double> [[WIDE_LOAD:%.*]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vexp2q_f64(<2 x double> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @exp10_f64 +; ARMPL-SVE-LABEL: define void @exp2_f64 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svexp10_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svexp2_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -987,7 +1133,7 @@ define void @exp10_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv %in = load double, ptr %in.gep, align 8 - %call = tail call double @exp10(double %in) + %call = tail call double @exp2(double %in) %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv store double %call, ptr %out.gep, align 8 %iv.next = add nuw nsw i64 %iv, 1 @@ -998,22 +1144,22 @@ define void @exp10_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } -define void @exp10_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { -; SLEEF-NEON-LABEL: define void @exp10_f32 +define void @exp2_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @exp2_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_exp10f(<4 x float> [[WIDE_LOAD:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_exp2f(<4 x float> [[WIDE_LOAD:%.*]]) ; -; SLEEF-SVE-LABEL: define void @exp10_f32 +; SLEEF-SVE-LABEL: define void @exp2_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_exp10f( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_exp2f( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; -; ARMPL-NEON-LABEL: define void @exp10_f32 +; ARMPL-NEON-LABEL: define void @exp2_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vexp10q_f32(<4 x float> [[WIDE_LOAD:%.*]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vexp2q_f32(<4 x float> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @exp10_f32 +; ARMPL-SVE-LABEL: define void @exp2_f32 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svexp10_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svexp2_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -1022,7 +1168,7 @@ define void @exp10_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv %in = load float, ptr %in.gep, align 8 - %call = tail call float @exp10f(float %in) + %call = tail call float @exp2f(float %in) %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv store float %call, ptr %out.gep, align 4 %iv.next = add nuw nsw i64 %iv, 1 @@ -1106,25 +1252,25 @@ define void @expm1_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } -declare double @lgamma(double) -declare float @lgammaf(float) +declare double @fdim(double, double) +declare float @fdimf(float, float) -define void @lgamma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { -; SLEEF-NEON-LABEL: define void @lgamma_f64 +define void @fdim_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @fdim_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_lgamma(<2 x double> [[WIDE_LOAD:%.*]]) +; SLEEF-NEON: [[CALL:%.*]] = tail call double @fdim(double [[IN:%.*]], double [[IN]]) ; -; SLEEF-SVE-LABEL: define void @lgamma_f64 +; SLEEF-SVE-LABEL: define void @fdim_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_lgamma( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; SLEEF-SVE: [[CALL:%.*]] = tail call double @fdim(double [[IN:%.*]], double [[IN]]) ; -; ARMPL-NEON-LABEL: define void @lgamma_f64 +; ARMPL-NEON-LABEL: define void @fdim_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vlgammaq_f64(<2 x double> [[WIDE_LOAD:%.*]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vfdimq_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) ; -; ARMPL-SVE-LABEL: define void @lgamma_f64 +; ARMPL-SVE-LABEL: define void @fdim_f64 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svlgamma_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svfdim_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -1133,7 +1279,7 @@ define void @lgamma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv %in = load double, ptr %in.gep, align 8 - %call = tail call double @lgamma(double %in) + %call = tail call double @fdim(double %in, double %in) %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv store double %call, ptr %out.gep, align 8 %iv.next = add nuw nsw i64 %iv, 1 @@ -1144,22 +1290,22 @@ define void @lgamma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } -define void @lgamma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { -; SLEEF-NEON-LABEL: define void @lgamma_f32 +define void @fdim_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @fdim_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_lgammaf(<4 x float> [[WIDE_LOAD:%.*]]) +; SLEEF-NEON: [[CALL:%.*]] = tail call float @fdimf(float [[IN:%.*]], float [[IN]]) ; -; SLEEF-SVE-LABEL: define void @lgamma_f32 +; SLEEF-SVE-LABEL: define void @fdim_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_lgammaf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; SLEEF-SVE: [[CALL:%.*]] = tail call float @fdimf(float [[IN:%.*]], float [[IN]]) ; -; ARMPL-NEON-LABEL: define void @lgamma_f32 +; ARMPL-NEON-LABEL: define void @fdim_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vlgammaq_f32(<4 x float> [[WIDE_LOAD:%.*]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vfdimq_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) ; -; ARMPL-SVE-LABEL: define void @lgamma_f32 +; ARMPL-SVE-LABEL: define void @fdim_f32 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svlgamma_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svfdim_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -1168,7 +1314,7 @@ define void @lgamma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv %in = load float, ptr %in.gep, align 8 - %call = tail call float @lgammaf(float %in) + %call = tail call float @fdimf(float %in, float %in) %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv store float %call, ptr %out.gep, align 4 %iv.next = add nuw nsw i64 %iv, 1 @@ -1179,25 +1325,25 @@ define void @lgamma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } -declare double @log(double) -declare float @logf(float) +declare double @fma(double, double, double) +declare float @fmaf(float, float, float) -define void @log_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { -; SLEEF-NEON-LABEL: define void @log_f64 +define void @fma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @fma_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_log(<2 x double> [[WIDE_LOAD:%.*]]) +; SLEEF-NEON: [[CALL:%.*]] = tail call double @fma(double [[IN:%.*]], double [[IN]], double [[IN]]) ; -; SLEEF-SVE-LABEL: define void @log_f64 +; SLEEF-SVE-LABEL: define void @fma_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_log( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; SLEEF-SVE: [[CALL:%.*]] = tail call double @fma(double [[IN:%.*]], double [[IN]], double [[IN]]) ; -; ARMPL-NEON-LABEL: define void @log_f64 +; ARMPL-NEON-LABEL: define void @fma_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vlogq_f64(<2 x double> [[WIDE_LOAD:%.*]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vfmaq_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]], <2 x double> [[WIDE_LOAD]]) ; -; ARMPL-SVE-LABEL: define void @log_f64 +; ARMPL-SVE-LABEL: define void @fma_f64 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svlog_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svfma_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -1206,7 +1352,7 @@ define void @log_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv %in = load double, ptr %in.gep, align 8 - %call = tail call double @log(double %in) + %call = tail call double @fma(double %in, double %in, double %in) %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv store double %call, ptr %out.gep, align 8 %iv.next = add nuw nsw i64 %iv, 1 @@ -1217,22 +1363,22 @@ define void @log_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } -define void @log_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { -; SLEEF-NEON-LABEL: define void @log_f32 +define void @fma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @fma_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_logf(<4 x float> [[WIDE_LOAD:%.*]]) +; SLEEF-NEON: [[CALL:%.*]] = tail call float @fmaf(float [[IN:%.*]], float [[IN]], float [[IN]]) ; -; SLEEF-SVE-LABEL: define void @log_f32 +; SLEEF-SVE-LABEL: define void @fma_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_logf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; SLEEF-SVE: [[CALL:%.*]] = tail call float @fmaf(float [[IN:%.*]], float [[IN]], float [[IN]]) ; -; ARMPL-NEON-LABEL: define void @log_f32 +; ARMPL-NEON-LABEL: define void @fma_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vlogq_f32(<4 x float> [[WIDE_LOAD:%.*]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vfmaq_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD]]) ; -; ARMPL-SVE-LABEL: define void @log_f32 +; ARMPL-SVE-LABEL: define void @fma_f32 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svlog_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svfma_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -1241,7 +1387,7 @@ define void @log_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv %in = load float, ptr %in.gep, align 8 - %call = tail call float @logf(float %in) + %call = tail call float @fmaf(float %in, float %in, float %in) %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv store float %call, ptr %out.gep, align 4 %iv.next = add nuw nsw i64 %iv, 1 @@ -1252,25 +1398,25 @@ define void @log_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } -declare double @log1p(double) -declare float @log1pf(float) +declare double @fmin(double, double) +declare float @fminf(float, float) -define void @log1p_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { -; SLEEF-NEON-LABEL: define void @log1p_f64 +define void @fmin_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @fmin_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[CALL:%.*]] = tail call double @log1p(double [[IN:%.*]]) +; SLEEF-NEON: [[CALL:%.*]] = tail call double @fmin(double [[IN:%.*]], double [[IN]]) ; -; SLEEF-SVE-LABEL: define void @log1p_f64 +; SLEEF-SVE-LABEL: define void @fmin_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[CALL:%.*]] = tail call double @log1p(double [[IN:%.*]]) +; SLEEF-SVE: [[CALL:%.*]] = tail call double @fmin(double [[IN:%.*]], double [[IN]]) ; -; ARMPL-NEON-LABEL: define void @log1p_f64 +; ARMPL-NEON-LABEL: define void @fmin_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vlog1pq_f64(<2 x double> [[WIDE_LOAD:%.*]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vfminq_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) ; -; ARMPL-SVE-LABEL: define void @log1p_f64 +; ARMPL-SVE-LABEL: define void @fmin_f64 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svlog1p_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svfmin_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -1279,7 +1425,7 @@ define void @log1p_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv %in = load double, ptr %in.gep, align 8 - %call = tail call double @log1p(double %in) + %call = tail call double @fmin(double %in, double %in) %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv store double %call, ptr %out.gep, align 8 %iv.next = add nuw nsw i64 %iv, 1 @@ -1290,22 +1436,22 @@ define void @log1p_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } -define void @log1p_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { -; SLEEF-NEON-LABEL: define void @log1p_f32 +define void @fmin_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @fmin_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[CALL:%.*]] = tail call float @log1pf(float [[IN:%.*]]) +; SLEEF-NEON: [[CALL:%.*]] = tail call float @fminf(float [[IN:%.*]], float [[IN]]) ; -; SLEEF-SVE-LABEL: define void @log1p_f32 +; SLEEF-SVE-LABEL: define void @fmin_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[CALL:%.*]] = tail call float @log1pf(float [[IN:%.*]]) +; SLEEF-SVE: [[CALL:%.*]] = tail call float @fminf(float [[IN:%.*]], float [[IN]]) ; -; ARMPL-NEON-LABEL: define void @log1p_f32 +; ARMPL-NEON-LABEL: define void @fmin_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vlog1pq_f32(<4 x float> [[WIDE_LOAD:%.*]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vfminq_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) ; -; ARMPL-SVE-LABEL: define void @log1p_f32 +; ARMPL-SVE-LABEL: define void @fmin_f32 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svlog1p_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svfmin_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -1314,7 +1460,7 @@ define void @log1p_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv %in = load float, ptr %in.gep, align 8 - %call = tail call float @log1pf(float %in) + %call = tail call float @fminf(float %in, float %in) %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv store float %call, ptr %out.gep, align 4 %iv.next = add nuw nsw i64 %iv, 1 @@ -1325,25 +1471,25 @@ define void @log1p_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } -declare double @log2(double) -declare float @log2f(float) +declare double @fmod(double, double) +declare float @fmodf(float, float) -define void @log2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { -; SLEEF-NEON-LABEL: define void @log2_f64 +define void @fmod_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @fmod_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_log2(<2 x double> [[WIDE_LOAD:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2vv_fmod(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) ; -; SLEEF-SVE-LABEL: define void @log2_f64 +; SLEEF-SVE-LABEL: define void @fmod_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_log2( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxvv_fmod( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) ; -; ARMPL-NEON-LABEL: define void @log2_f64 +; ARMPL-NEON-LABEL: define void @fmod_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vlog2q_f64(<2 x double> [[WIDE_LOAD:%.*]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vfmodq_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) ; -; ARMPL-SVE-LABEL: define void @log2_f64 +; ARMPL-SVE-LABEL: define void @fmod_f64 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svlog2_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svfmod_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -1352,7 +1498,7 @@ define void @log2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv %in = load double, ptr %in.gep, align 8 - %call = tail call double @log2(double %in) + %call = tail call double @fmod(double %in, double %in) %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv store double %call, ptr %out.gep, align 8 %iv.next = add nuw nsw i64 %iv, 1 @@ -1363,22 +1509,22 @@ define void @log2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } -define void @log2_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { -; SLEEF-NEON-LABEL: define void @log2_f32 +define void @fmod_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @fmod_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_log2f(<4 x float> [[WIDE_LOAD:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4vv_fmodf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) ; -; SLEEF-SVE-LABEL: define void @log2_f32 +; SLEEF-SVE-LABEL: define void @fmod_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_log2f( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxvv_fmodf( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) ; -; ARMPL-NEON-LABEL: define void @log2_f32 +; ARMPL-NEON-LABEL: define void @fmod_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vlog2q_f32(<4 x float> [[WIDE_LOAD:%.*]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vfmodq_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) ; -; ARMPL-SVE-LABEL: define void @log2_f32 +; ARMPL-SVE-LABEL: define void @fmod_f32 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svlog2_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svfmod_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -1387,7 +1533,7 @@ define void @log2_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv %in = load float, ptr %in.gep, align 8 - %call = tail call float @log2f(float %in) + %call = tail call float @fmodf(float %in, float %in) %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv store float %call, ptr %out.gep, align 4 %iv.next = add nuw nsw i64 %iv, 1 @@ -1398,25 +1544,25 @@ define void @log2_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } -declare double @log10(double) -declare float @log10f(float) +declare double @hypot(double, double) +declare float @hypotf(float, float) -define void @log10_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { -; SLEEF-NEON-LABEL: define void @log10_f64 +define void @hypot_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @hypot_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_log10(<2 x double> [[WIDE_LOAD:%.*]]) +; SLEEF-NEON: [[CALL:%.*]] = tail call double @hypot(double [[IN:%.*]], double [[IN]]) ; -; SLEEF-SVE-LABEL: define void @log10_f64 +; SLEEF-SVE-LABEL: define void @hypot_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_log10( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; SLEEF-SVE: [[CALL:%.*]] = tail call double @hypot(double [[IN:%.*]], double [[IN]]) ; -; ARMPL-NEON-LABEL: define void @log10_f64 +; ARMPL-NEON-LABEL: define void @hypot_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vlog10q_f64(<2 x double> [[WIDE_LOAD:%.*]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vhypotq_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) ; -; ARMPL-SVE-LABEL: define void @log10_f64 +; ARMPL-SVE-LABEL: define void @hypot_f64 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svlog10_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svhypot_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -1425,7 +1571,7 @@ define void @log10_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv %in = load double, ptr %in.gep, align 8 - %call = tail call double @log10(double %in) + %call = tail call double @hypot(double %in, double %in) %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv store double %call, ptr %out.gep, align 8 %iv.next = add nuw nsw i64 %iv, 1 @@ -1436,22 +1582,22 @@ define void @log10_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } -define void @log10_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { -; SLEEF-NEON-LABEL: define void @log10_f32 +define void @hypot_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @hypot_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_log10f(<4 x float> [[WIDE_LOAD:%.*]]) +; SLEEF-NEON: [[CALL:%.*]] = tail call float @hypotf(float [[IN:%.*]], float [[IN]]) ; -; SLEEF-SVE-LABEL: define void @log10_f32 +; SLEEF-SVE-LABEL: define void @hypot_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_log10f( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; SLEEF-SVE: [[CALL:%.*]] = tail call float @hypotf(float [[IN:%.*]], float [[IN]]) ; -; ARMPL-NEON-LABEL: define void @log10_f32 +; ARMPL-NEON-LABEL: define void @hypot_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vlog10q_f32(<4 x float> [[WIDE_LOAD:%.*]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vhypotq_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) ; -; ARMPL-SVE-LABEL: define void @log10_f32 +; ARMPL-SVE-LABEL: define void @hypot_f32 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svlog10_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svhypot_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -1460,7 +1606,7 @@ define void @log10_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv %in = load float, ptr %in.gep, align 8 - %call = tail call float @log10f(float %in) + %call = tail call float @hypotf(float %in, float %in) %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv store float %call, ptr %out.gep, align 4 %iv.next = add nuw nsw i64 %iv, 1 @@ -1471,25 +1617,25 @@ define void @log10_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } -declare double @sin(double) -declare float @sinf(float) +declare double @lgamma(double) +declare float @lgammaf(float) -define void @sin_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { -; SLEEF-NEON-LABEL: define void @sin_f64 +define void @lgamma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @lgamma_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_sin(<2 x double> [[WIDE_LOAD:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_lgamma(<2 x double> [[WIDE_LOAD:%.*]]) ; -; SLEEF-SVE-LABEL: define void @sin_f64 +; SLEEF-SVE-LABEL: define void @lgamma_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_sin( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_lgamma( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; -; ARMPL-NEON-LABEL: define void @sin_f64 +; ARMPL-NEON-LABEL: define void @lgamma_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vsinq_f64(<2 x double> [[WIDE_LOAD:%.*]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vlgammaq_f64(<2 x double> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @sin_f64 +; ARMPL-SVE-LABEL: define void @lgamma_f64 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svsin_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svlgamma_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -1498,7 +1644,7 @@ define void @sin_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv %in = load double, ptr %in.gep, align 8 - %call = tail call double @sin(double %in) + %call = tail call double @lgamma(double %in) %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv store double %call, ptr %out.gep, align 8 %iv.next = add nuw nsw i64 %iv, 1 @@ -1509,22 +1655,22 @@ define void @sin_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } -define void @sin_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { -; SLEEF-NEON-LABEL: define void @sin_f32 +define void @lgamma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @lgamma_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_sinf(<4 x float> [[WIDE_LOAD:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_lgammaf(<4 x float> [[WIDE_LOAD:%.*]]) ; -; SLEEF-SVE-LABEL: define void @sin_f32 +; SLEEF-SVE-LABEL: define void @lgamma_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_sinf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_lgammaf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; -; ARMPL-NEON-LABEL: define void @sin_f32 +; ARMPL-NEON-LABEL: define void @lgamma_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vsinq_f32(<4 x float> [[WIDE_LOAD:%.*]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vlgammaq_f32(<4 x float> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @sin_f32 +; ARMPL-SVE-LABEL: define void @lgamma_f32 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svsin_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svlgamma_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -1533,7 +1679,7 @@ define void @sin_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv %in = load float, ptr %in.gep, align 8 - %call = tail call float @sinf(float %in) + %call = tail call float @lgammaf(float %in) %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv store float %call, ptr %out.gep, align 4 %iv.next = add nuw nsw i64 %iv, 1 @@ -1544,25 +1690,25 @@ define void @sin_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } -declare double @sinh(double) -declare float @sinhf(float) +declare double @log(double) +declare float @logf(float) -define void @sinh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { -; SLEEF-NEON-LABEL: define void @sinh_f64 +define void @log_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @log_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_sinh(<2 x double> [[WIDE_LOAD:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_log(<2 x double> [[WIDE_LOAD:%.*]]) ; -; SLEEF-SVE-LABEL: define void @sinh_f64 +; SLEEF-SVE-LABEL: define void @log_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_sinh( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_log( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; -; ARMPL-NEON-LABEL: define void @sinh_f64 +; ARMPL-NEON-LABEL: define void @log_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vsinhq_f64(<2 x double> [[WIDE_LOAD:%.*]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vlogq_f64(<2 x double> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @sinh_f64 +; ARMPL-SVE-LABEL: define void @log_f64 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svsinh_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svlog_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -1571,7 +1717,7 @@ define void @sinh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv %in = load double, ptr %in.gep, align 8 - %call = tail call double @sinh(double %in) + %call = tail call double @log(double %in) %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv store double %call, ptr %out.gep, align 8 %iv.next = add nuw nsw i64 %iv, 1 @@ -1582,22 +1728,22 @@ define void @sinh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } -define void @sinh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { -; SLEEF-NEON-LABEL: define void @sinh_f32 +define void @log_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @log_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_sinhf(<4 x float> [[WIDE_LOAD:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_logf(<4 x float> [[WIDE_LOAD:%.*]]) ; -; SLEEF-SVE-LABEL: define void @sinh_f32 +; SLEEF-SVE-LABEL: define void @log_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_sinhf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_logf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; -; ARMPL-NEON-LABEL: define void @sinh_f32 +; ARMPL-NEON-LABEL: define void @log_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vsinhq_f32(<4 x float> [[WIDE_LOAD:%.*]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vlogq_f32(<4 x float> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @sinh_f32 +; ARMPL-SVE-LABEL: define void @log_f32 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svsinh_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svlog_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -1606,7 +1752,7 @@ define void @sinh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv %in = load float, ptr %in.gep, align 8 - %call = tail call float @sinhf(float %in) + %call = tail call float @logf(float %in) %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv store float %call, ptr %out.gep, align 4 %iv.next = add nuw nsw i64 %iv, 1 @@ -1617,25 +1763,25 @@ define void @sinh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } -declare double @sinpi(double) -declare float @sinpif(float) +declare double @log10(double) +declare float @log10f(float) -define void @sinpi_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { -; SLEEF-NEON-LABEL: define void @sinpi_f64 +define void @log10_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @log10_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[CALL:%.*]] = tail call double @sinpi(double [[IN:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_log10(<2 x double> [[WIDE_LOAD:%.*]]) ; -; SLEEF-SVE-LABEL: define void @sinpi_f64 +; SLEEF-SVE-LABEL: define void @log10_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[CALL:%.*]] = tail call double @sinpi(double [[IN:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_log10( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; -; ARMPL-NEON-LABEL: define void @sinpi_f64 +; ARMPL-NEON-LABEL: define void @log10_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vsinpiq_f64(<2 x double> [[WIDE_LOAD:%.*]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vlog10q_f64(<2 x double> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @sinpi_f64 +; ARMPL-SVE-LABEL: define void @log10_f64 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svsinpi_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svlog10_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -1644,7 +1790,7 @@ define void @sinpi_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv %in = load double, ptr %in.gep, align 8 - %call = tail call double @sinpi(double %in) + %call = tail call double @log10(double %in) %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv store double %call, ptr %out.gep, align 8 %iv.next = add nuw nsw i64 %iv, 1 @@ -1655,22 +1801,22 @@ define void @sinpi_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } -define void @sinpi_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { -; SLEEF-NEON-LABEL: define void @sinpi_f32 +define void @log10_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @log10_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[CALL:%.*]] = tail call float @sinpif(float [[IN:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_log10f(<4 x float> [[WIDE_LOAD:%.*]]) ; -; SLEEF-SVE-LABEL: define void @sinpi_f32 +; SLEEF-SVE-LABEL: define void @log10_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[CALL:%.*]] = tail call float @sinpif(float [[IN:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_log10f( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; -; ARMPL-NEON-LABEL: define void @sinpi_f32 +; ARMPL-NEON-LABEL: define void @log10_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vsinpiq_f32(<4 x float> [[WIDE_LOAD:%.*]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vlog10q_f32(<4 x float> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @sinpi_f32 +; ARMPL-SVE-LABEL: define void @log10_f32 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svsinpi_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svlog10_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -1679,7 +1825,7 @@ define void @sinpi_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv %in = load float, ptr %in.gep, align 8 - %call = tail call float @sinpif(float %in) + %call = tail call float @log10f(float %in) %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv store float %call, ptr %out.gep, align 4 %iv.next = add nuw nsw i64 %iv, 1 @@ -1690,25 +1836,25 @@ define void @sinpi_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } -declare double @sqrt(double) -declare float @sqrtf(float) +declare double @log1p(double) +declare float @log1pf(float) -define void @sqrt_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { -; SLEEF-NEON-LABEL: define void @sqrt_f64 +define void @log1p_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @log1p_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_sqrt(<2 x double> [[WIDE_LOAD:%.*]]) +; SLEEF-NEON: [[CALL:%.*]] = tail call double @log1p(double [[IN:%.*]]) ; -; SLEEF-SVE-LABEL: define void @sqrt_f64 +; SLEEF-SVE-LABEL: define void @log1p_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_sqrt( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; SLEEF-SVE: [[CALL:%.*]] = tail call double @log1p(double [[IN:%.*]]) ; -; ARMPL-NEON-LABEL: define void @sqrt_f64 +; ARMPL-NEON-LABEL: define void @log1p_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vsqrtq_f64(<2 x double> [[WIDE_LOAD:%.*]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vlog1pq_f64(<2 x double> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @sqrt_f64 +; ARMPL-SVE-LABEL: define void @log1p_f64 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svsqrt_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svlog1p_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -1717,7 +1863,7 @@ define void @sqrt_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv %in = load double, ptr %in.gep, align 8 - %call = tail call double @sqrt(double %in) + %call = tail call double @log1p(double %in) %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv store double %call, ptr %out.gep, align 8 %iv.next = add nuw nsw i64 %iv, 1 @@ -1728,22 +1874,22 @@ define void @sqrt_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } -define void @sqrt_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { -; SLEEF-NEON-LABEL: define void @sqrt_f32 +define void @log1p_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @log1p_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_sqrtf(<4 x float> [[WIDE_LOAD:%.*]]) +; SLEEF-NEON: [[CALL:%.*]] = tail call float @log1pf(float [[IN:%.*]]) ; -; SLEEF-SVE-LABEL: define void @sqrt_f32 +; SLEEF-SVE-LABEL: define void @log1p_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_sqrtf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; SLEEF-SVE: [[CALL:%.*]] = tail call float @log1pf(float [[IN:%.*]]) ; -; ARMPL-NEON-LABEL: define void @sqrt_f32 +; ARMPL-NEON-LABEL: define void @log1p_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vsqrtq_f32(<4 x float> [[WIDE_LOAD:%.*]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vlog1pq_f32(<4 x float> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @sqrt_f32 +; ARMPL-SVE-LABEL: define void @log1p_f32 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svsqrt_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svlog1p_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -1752,7 +1898,7 @@ define void @sqrt_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv %in = load float, ptr %in.gep, align 8 - %call = tail call float @sqrtf(float %in) + %call = tail call float @log1pf(float %in) %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv store float %call, ptr %out.gep, align 4 %iv.next = add nuw nsw i64 %iv, 1 @@ -1763,25 +1909,25 @@ define void @sqrt_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } -declare double @tan(double) -declare float @tanf(float) +declare double @log2(double) +declare float @log2f(float) -define void @tan_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { -; SLEEF-NEON-LABEL: define void @tan_f64 +define void @log2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @log2_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_tan(<2 x double> [[WIDE_LOAD:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_log2(<2 x double> [[WIDE_LOAD:%.*]]) ; -; SLEEF-SVE-LABEL: define void @tan_f64 +; SLEEF-SVE-LABEL: define void @log2_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_tan( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_log2( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; -; ARMPL-NEON-LABEL: define void @tan_f64 +; ARMPL-NEON-LABEL: define void @log2_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vtanq_f64(<2 x double> [[WIDE_LOAD:%.*]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vlog2q_f64(<2 x double> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @tan_f64 +; ARMPL-SVE-LABEL: define void @log2_f64 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svtan_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svlog2_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -1790,7 +1936,7 @@ define void @tan_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv %in = load double, ptr %in.gep, align 8 - %call = tail call double @tan(double %in) + %call = tail call double @log2(double %in) %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv store double %call, ptr %out.gep, align 8 %iv.next = add nuw nsw i64 %iv, 1 @@ -1801,22 +1947,22 @@ define void @tan_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } -define void @tan_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { -; SLEEF-NEON-LABEL: define void @tan_f32 +define void @log2_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @log2_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_tanf(<4 x float> [[WIDE_LOAD:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_log2f(<4 x float> [[WIDE_LOAD:%.*]]) ; -; SLEEF-SVE-LABEL: define void @tan_f32 +; SLEEF-SVE-LABEL: define void @log2_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_tanf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_log2f( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; -; ARMPL-NEON-LABEL: define void @tan_f32 +; ARMPL-NEON-LABEL: define void @log2_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vtanq_f32(<4 x float> [[WIDE_LOAD:%.*]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vlog2q_f32(<4 x float> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @tan_f32 +; ARMPL-SVE-LABEL: define void @log2_f32 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svtan_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svlog2_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -1825,7 +1971,7 @@ define void @tan_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv %in = load float, ptr %in.gep, align 8 - %call = tail call float @tanf(float %in) + %call = tail call float @log2f(float %in) %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv store float %call, ptr %out.gep, align 4 %iv.next = add nuw nsw i64 %iv, 1 @@ -1836,25 +1982,100 @@ define void @tan_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } -declare double @tanh(double) -declare float @tanhf(float) +declare double @modf(double, ptr) +declare float @modff(float, ptr) -define void @tanh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { -; SLEEF-NEON-LABEL: define void @tanh_f64 +define void @modf_f64(ptr noalias %a, ptr noalias %b, ptr noalias %c) { +; SLEEF-NEON-LABEL: define void @modf_f64 +; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; SLEEF-NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2vl8_modf(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP4:%.*]]) +; +; SLEEF-SVE-LABEL: define void @modf_f64 +; SLEEF-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; SLEEF-SVE: [[TMP23:%.*]] = call @_ZGVsMxvl8_modf( [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP22:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; +; ARMPL-NEON-LABEL: define void @modf_f64 +; ARMPL-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; ARMPL-NEON: [[TMP5:%.*]] = call <2 x double> @armpl_vmodfq_f64(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP4:%.*]]) +; +; ARMPL-SVE-LABEL: define void @modf_f64 +; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; ARMPL-SVE: [[TMP23:%.*]] = call @armpl_svmodf_f64_x( [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP22:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %gepa = getelementptr double, ptr %a, i64 %indvars.iv + %num = load double, ptr %gepa, align 8 + %gepb = getelementptr double, ptr %b, i64 %indvars.iv + %data = call double @modf(double %num, ptr %gepb) + %gepc = getelementptr inbounds double, ptr %c, i64 %indvars.iv + store double %data, ptr %gepc, align 8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void +} + +define void @modf_f32(ptr noalias %a, ptr noalias %b, ptr noalias %c) { +; SLEEF-NEON-LABEL: define void @modf_f32 +; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; SLEEF-NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4vl4_modff(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP4:%.*]]) +; +; SLEEF-SVE-LABEL: define void @modf_f32 +; SLEEF-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; SLEEF-SVE: [[TMP23:%.*]] = call @_ZGVsMxvl4_modff( [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP22:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; +; ARMPL-NEON-LABEL: define void @modf_f32 +; ARMPL-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; ARMPL-NEON: [[TMP5:%.*]] = call <4 x float> @armpl_vmodfq_f32(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP4:%.*]]) +; +; ARMPL-SVE-LABEL: define void @modf_f32 +; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; ARMPL-SVE: [[TMP23:%.*]] = call @armpl_svmodf_f32_x( [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP22:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %gepa = getelementptr float, ptr %a, i64 %indvars.iv + %num = load float, ptr %gepa, align 8 + %gepb = getelementptr float, ptr %b, i64 %indvars.iv + %data = call float @modff(float %num, ptr %gepb) + %gepc = getelementptr inbounds float, ptr %c, i64 %indvars.iv + store float %data, ptr %gepc, align 8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void +} + +declare double @nextafter(double, double) +declare float @nextafterf(float, float) + +define void @nextafter_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @nextafter_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_tanh(<2 x double> [[WIDE_LOAD:%.*]]) +; SLEEF-NEON: [[CALL:%.*]] = tail call double @nextafter(double [[IN:%.*]], double [[IN]]) ; -; SLEEF-SVE-LABEL: define void @tanh_f64 +; SLEEF-SVE-LABEL: define void @nextafter_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_tanh( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; SLEEF-SVE: [[CALL:%.*]] = tail call double @nextafter(double [[IN:%.*]], double [[IN]]) ; -; ARMPL-NEON-LABEL: define void @tanh_f64 +; ARMPL-NEON-LABEL: define void @nextafter_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vtanhq_f64(<2 x double> [[WIDE_LOAD:%.*]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vnextafterq_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) ; -; ARMPL-SVE-LABEL: define void @tanh_f64 +; ARMPL-SVE-LABEL: define void @nextafter_f64 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svtanh_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svnextafter_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -1863,7 +2084,7 @@ define void @tanh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv %in = load double, ptr %in.gep, align 8 - %call = tail call double @tanh(double %in) + %call = tail call double @nextafter(double %in, double %in) %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv store double %call, ptr %out.gep, align 8 %iv.next = add nuw nsw i64 %iv, 1 @@ -1874,22 +2095,22 @@ define void @tanh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } -define void @tanh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { -; SLEEF-NEON-LABEL: define void @tanh_f32 +define void @nextafter_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @nextafter_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_tanhf(<4 x float> [[WIDE_LOAD:%.*]]) +; SLEEF-NEON: [[CALL:%.*]] = tail call float @nextafterf(float [[IN:%.*]], float [[IN]]) ; -; SLEEF-SVE-LABEL: define void @tanh_f32 +; SLEEF-SVE-LABEL: define void @nextafter_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_tanhf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; SLEEF-SVE: [[CALL:%.*]] = tail call float @nextafterf(float [[IN:%.*]], float [[IN]]) ; -; ARMPL-NEON-LABEL: define void @tanh_f32 +; ARMPL-NEON-LABEL: define void @nextafter_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vtanhq_f32(<4 x float> [[WIDE_LOAD:%.*]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vnextafterq_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) ; -; ARMPL-SVE-LABEL: define void @tanh_f32 +; ARMPL-SVE-LABEL: define void @nextafter_f32 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svtanh_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svnextafter_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -1898,7 +2119,7 @@ define void @tanh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv %in = load float, ptr %in.gep, align 8 - %call = tail call float @tanhf(float %in) + %call = tail call float @nextafterf(float %in, float %in) %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv store float %call, ptr %out.gep, align 4 %iv.next = add nuw nsw i64 %iv, 1 @@ -1909,25 +2130,25 @@ define void @tanh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } -declare double @tgamma(double) -declare float @tgammaf(float) +declare double @pow(double, double) +declare float @powf(float, float) -define void @tgamma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { -; SLEEF-NEON-LABEL: define void @tgamma_f64 +define void @pow_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @pow_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_tgamma(<2 x double> [[WIDE_LOAD:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2vv_pow(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) ; -; SLEEF-SVE-LABEL: define void @tgamma_f64 +; SLEEF-SVE-LABEL: define void @pow_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_tgamma( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxvv_pow( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) ; -; ARMPL-NEON-LABEL: define void @tgamma_f64 +; ARMPL-NEON-LABEL: define void @pow_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vtgammaq_f64(<2 x double> [[WIDE_LOAD:%.*]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vpowq_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) ; -; ARMPL-SVE-LABEL: define void @tgamma_f64 +; ARMPL-SVE-LABEL: define void @pow_f64 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svtgamma_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svpow_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -1936,7 +2157,7 @@ define void @tgamma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv %in = load double, ptr %in.gep, align 8 - %call = tail call double @tgamma(double %in) + %call = tail call double @pow(double %in, double %in) %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv store double %call, ptr %out.gep, align 8 %iv.next = add nuw nsw i64 %iv, 1 @@ -1947,22 +2168,22 @@ define void @tgamma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } -define void @tgamma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { -; SLEEF-NEON-LABEL: define void @tgamma_f32 +define void @pow_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @pow_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_tgammaf(<4 x float> [[WIDE_LOAD:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4vv_powf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) ; -; SLEEF-SVE-LABEL: define void @tgamma_f32 +; SLEEF-SVE-LABEL: define void @pow_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_tgammaf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxvv_powf( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) ; -; ARMPL-NEON-LABEL: define void @tgamma_f32 +; ARMPL-NEON-LABEL: define void @pow_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vtgammaq_f32(<4 x float> [[WIDE_LOAD:%.*]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vpowq_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) ; -; ARMPL-SVE-LABEL: define void @tgamma_f32 +; ARMPL-SVE-LABEL: define void @pow_f32 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svtgamma_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svpow_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -1971,7 +2192,7 @@ define void @tgamma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv %in = load float, ptr %in.gep, align 8 - %call = tail call float @tgammaf(float %in) + %call = tail call float @powf(float %in, float %in) %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv store float %call, ptr %out.gep, align 4 %iv.next = add nuw nsw i64 %iv, 1 @@ -1982,25 +2203,25 @@ define void @tgamma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } -declare double @atan2(double, double) -declare float @atan2f(float, float) +declare double @sin(double) +declare float @sinf(float) -define void @atan2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { -; SLEEF-NEON-LABEL: define void @atan2_f64 +define void @sin_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @sin_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2vv_atan2(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_sin(<2 x double> [[WIDE_LOAD:%.*]]) ; -; SLEEF-SVE-LABEL: define void @atan2_f64 +; SLEEF-SVE-LABEL: define void @sin_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxvv_atan2( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_sin( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; -; ARMPL-NEON-LABEL: define void @atan2_f64 +; ARMPL-NEON-LABEL: define void @sin_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vatan2q_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vsinq_f64(<2 x double> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @atan2_f64 +; ARMPL-SVE-LABEL: define void @sin_f64 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svatan2_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svsin_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -2009,7 +2230,7 @@ define void @atan2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv %in = load double, ptr %in.gep, align 8 - %call = tail call double @atan2(double %in, double %in) + %call = tail call double @sin(double %in) %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv store double %call, ptr %out.gep, align 8 %iv.next = add nuw nsw i64 %iv, 1 @@ -2020,22 +2241,22 @@ define void @atan2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } -define void @atan2_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { -; SLEEF-NEON-LABEL: define void @atan2_f32 +define void @sin_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @sin_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4vv_atan2f(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_sinf(<4 x float> [[WIDE_LOAD:%.*]]) ; -; SLEEF-SVE-LABEL: define void @atan2_f32 +; SLEEF-SVE-LABEL: define void @sin_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxvv_atan2f( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_sinf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; -; ARMPL-NEON-LABEL: define void @atan2_f32 +; ARMPL-NEON-LABEL: define void @sin_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vatan2q_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vsinq_f32(<4 x float> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @atan2_f32 +; ARMPL-SVE-LABEL: define void @sin_f32 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svatan2_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svsin_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -2044,7 +2265,7 @@ define void @atan2_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv %in = load float, ptr %in.gep, align 8 - %call = tail call float @atan2f(float %in, float %in) + %call = tail call float @sinf(float %in) %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv store float %call, ptr %out.gep, align 4 %iv.next = add nuw nsw i64 %iv, 1 @@ -2055,25 +2276,98 @@ define void @atan2_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } -declare double @copysign(double, double) -declare float @copysignf(float, float) +declare void @sincos(double, ptr, ptr) +declare void @sincosf(float, ptr, ptr) -define void @copysign_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { -; SLEEF-NEON-LABEL: define void @copysign_f64 +define void @sincos_f64(ptr noalias %a, ptr noalias %b, ptr noalias %c) { +; SLEEF-NEON-LABEL: define void @sincos_f64 +; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; SLEEF-NEON: call void @_ZGVnN2vl8l8_sincos(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]]) +; +; SLEEF-SVE-LABEL: define void @sincos_f64 +; SLEEF-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; SLEEF-SVE: call void @_ZGVsMxvl8l8_sincos( [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; +; ARMPL-NEON-LABEL: define void @sincos_f64 +; ARMPL-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; ARMPL-NEON: call void @armpl_vsincosq_f64(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]]) +; +; ARMPL-SVE-LABEL: define void @sincos_f64 +; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; ARMPL-SVE: call void @armpl_svsincos_f64_x( [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %gepa = getelementptr double, ptr %a, i64 %indvars.iv + %num = load double, ptr %gepa, align 8 + %gepb = getelementptr double, ptr %b, i64 %indvars.iv + %gepc = getelementptr double, ptr %c, i64 %indvars.iv + call void @sincos(double %num, ptr %gepb, ptr %gepc) + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void +} + +define void @sincos_f32(ptr noalias %a, ptr noalias %b, ptr noalias %c) { +; SLEEF-NEON-LABEL: define void @sincos_f32 +; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; SLEEF-NEON: call void @_ZGVnN4vl4l4_sincosf(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]]) +; +; SLEEF-SVE-LABEL: define void @sincos_f32 +; SLEEF-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; SLEEF-SVE: call void @_ZGVsMxvl4l4_sincosf( [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; +; ARMPL-NEON-LABEL: define void @sincos_f32 +; ARMPL-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; ARMPL-NEON: call void @armpl_vsincosq_f32(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]]) +; +; ARMPL-SVE-LABEL: define void @sincos_f32 +; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; ARMPL-SVE: call void @armpl_svsincos_f32_x( [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %gepa = getelementptr float, ptr %a, i64 %indvars.iv + %num = load float, ptr %gepa, align 8 + %gepb = getelementptr float, ptr %b, i64 %indvars.iv + %gepc = getelementptr float, ptr %c, i64 %indvars.iv + call void @sincosf(float %num, ptr %gepb, ptr %gepc) + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void +} + +declare double @sinh(double) +declare float @sinhf(float) + +define void @sinh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @sinh_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[CALL:%.*]] = tail call double @copysign(double [[IN:%.*]], double [[IN]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_sinh(<2 x double> [[WIDE_LOAD:%.*]]) ; -; SLEEF-SVE-LABEL: define void @copysign_f64 +; SLEEF-SVE-LABEL: define void @sinh_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[CALL:%.*]] = tail call double @copysign(double [[IN:%.*]], double [[IN]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_sinh( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; -; ARMPL-NEON-LABEL: define void @copysign_f64 +; ARMPL-NEON-LABEL: define void @sinh_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vcopysignq_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vsinhq_f64(<2 x double> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @copysign_f64 +; ARMPL-SVE-LABEL: define void @sinh_f64 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svcopysign_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svsinh_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -2082,7 +2376,7 @@ define void @copysign_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv %in = load double, ptr %in.gep, align 8 - %call = tail call double @copysign(double %in, double %in) + %call = tail call double @sinh(double %in) %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv store double %call, ptr %out.gep, align 8 %iv.next = add nuw nsw i64 %iv, 1 @@ -2093,22 +2387,22 @@ define void @copysign_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } -define void @copysign_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { -; SLEEF-NEON-LABEL: define void @copysign_f32 +define void @sinh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @sinh_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[CALL:%.*]] = tail call float @copysignf(float [[IN:%.*]], float [[IN]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_sinhf(<4 x float> [[WIDE_LOAD:%.*]]) ; -; SLEEF-SVE-LABEL: define void @copysign_f32 +; SLEEF-SVE-LABEL: define void @sinh_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[CALL:%.*]] = tail call float @copysignf(float [[IN:%.*]], float [[IN]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_sinhf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; -; ARMPL-NEON-LABEL: define void @copysign_f32 +; ARMPL-NEON-LABEL: define void @sinh_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vcopysignq_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vsinhq_f32(<4 x float> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @copysign_f32 +; ARMPL-SVE-LABEL: define void @sinh_f32 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svcopysign_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svsinh_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -2117,7 +2411,7 @@ define void @copysign_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv %in = load float, ptr %in.gep, align 8 - %call = tail call float @copysignf(float %in, float %in) + %call = tail call float @sinhf(float %in) %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv store float %call, ptr %out.gep, align 4 %iv.next = add nuw nsw i64 %iv, 1 @@ -2128,25 +2422,25 @@ define void @copysign_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } -declare double @fdim(double, double) -declare float @fdimf(float, float) +declare double @sinpi(double) +declare float @sinpif(float) -define void @fdim_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { -; SLEEF-NEON-LABEL: define void @fdim_f64 +define void @sinpi_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @sinpi_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[CALL:%.*]] = tail call double @fdim(double [[IN:%.*]], double [[IN]]) +; SLEEF-NEON: [[CALL:%.*]] = tail call double @sinpi(double [[IN:%.*]]) ; -; SLEEF-SVE-LABEL: define void @fdim_f64 +; SLEEF-SVE-LABEL: define void @sinpi_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[CALL:%.*]] = tail call double @fdim(double [[IN:%.*]], double [[IN]]) +; SLEEF-SVE: [[CALL:%.*]] = tail call double @sinpi(double [[IN:%.*]]) ; -; ARMPL-NEON-LABEL: define void @fdim_f64 +; ARMPL-NEON-LABEL: define void @sinpi_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vfdimq_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vsinpiq_f64(<2 x double> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @fdim_f64 +; ARMPL-SVE-LABEL: define void @sinpi_f64 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svfdim_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svsinpi_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -2155,7 +2449,7 @@ define void @fdim_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv %in = load double, ptr %in.gep, align 8 - %call = tail call double @fdim(double %in, double %in) + %call = tail call double @sinpi(double %in) %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv store double %call, ptr %out.gep, align 8 %iv.next = add nuw nsw i64 %iv, 1 @@ -2166,22 +2460,22 @@ define void @fdim_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } -define void @fdim_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { -; SLEEF-NEON-LABEL: define void @fdim_f32 +define void @sinpi_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @sinpi_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[CALL:%.*]] = tail call float @fdimf(float [[IN:%.*]], float [[IN]]) +; SLEEF-NEON: [[CALL:%.*]] = tail call float @sinpif(float [[IN:%.*]]) ; -; SLEEF-SVE-LABEL: define void @fdim_f32 +; SLEEF-SVE-LABEL: define void @sinpi_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[CALL:%.*]] = tail call float @fdimf(float [[IN:%.*]], float [[IN]]) +; SLEEF-SVE: [[CALL:%.*]] = tail call float @sinpif(float [[IN:%.*]]) ; -; ARMPL-NEON-LABEL: define void @fdim_f32 +; ARMPL-NEON-LABEL: define void @sinpi_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vfdimq_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vsinpiq_f32(<4 x float> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @fdim_f32 +; ARMPL-SVE-LABEL: define void @sinpi_f32 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svfdim_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svsinpi_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -2190,7 +2484,7 @@ define void @fdim_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv %in = load float, ptr %in.gep, align 8 - %call = tail call float @fdimf(float %in, float %in) + %call = tail call float @sinpif(float %in) %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv store float %call, ptr %out.gep, align 4 %iv.next = add nuw nsw i64 %iv, 1 @@ -2201,171 +2495,25 @@ define void @fdim_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } -declare double @fmin(double, double) -declare float @fminf(float, float) - -define void @fmin_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { -; SLEEF-NEON-LABEL: define void @fmin_f64 -; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[CALL:%.*]] = tail call double @fmin(double [[IN:%.*]], double [[IN]]) -; -; SLEEF-SVE-LABEL: define void @fmin_f64 -; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[CALL:%.*]] = tail call double @fmin(double [[IN:%.*]], double [[IN]]) -; -; ARMPL-NEON-LABEL: define void @fmin_f64 -; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vfminq_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) -; -; ARMPL-SVE-LABEL: define void @fmin_f64 -; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svfmin_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) -; - entry: - br label %for.body - - for.body: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv - %in = load double, ptr %in.gep, align 8 - %call = tail call double @fmin(double %in, double %in) - %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv - store double %call, ptr %out.gep, align 8 - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond = icmp eq i64 %iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body - - for.end: - ret void -} - -define void @fmin_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { -; SLEEF-NEON-LABEL: define void @fmin_f32 -; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[CALL:%.*]] = tail call float @fminf(float [[IN:%.*]], float [[IN]]) -; -; SLEEF-SVE-LABEL: define void @fmin_f32 -; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[CALL:%.*]] = tail call float @fminf(float [[IN:%.*]], float [[IN]]) -; -; ARMPL-NEON-LABEL: define void @fmin_f32 -; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vfminq_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) -; -; ARMPL-SVE-LABEL: define void @fmin_f32 -; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svfmin_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) -; - entry: - br label %for.body - - for.body: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv - %in = load float, ptr %in.gep, align 8 - %call = tail call float @fminf(float %in, float %in) - %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv - store float %call, ptr %out.gep, align 4 - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond = icmp eq i64 %iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body - - for.end: - ret void -} - -declare double @fmod(double, double) -declare float @fmodf(float, float) - -define void @fmod_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { -; SLEEF-NEON-LABEL: define void @fmod_f64 -; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2vv_fmod(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) -; -; SLEEF-SVE-LABEL: define void @fmod_f64 -; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxvv_fmod( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) -; -; ARMPL-NEON-LABEL: define void @fmod_f64 -; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vfmodq_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) -; -; ARMPL-SVE-LABEL: define void @fmod_f64 -; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svfmod_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) -; - entry: - br label %for.body - - for.body: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv - %in = load double, ptr %in.gep, align 8 - %call = tail call double @fmod(double %in, double %in) - %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv - store double %call, ptr %out.gep, align 8 - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond = icmp eq i64 %iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body - - for.end: - ret void -} - -define void @fmod_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { -; SLEEF-NEON-LABEL: define void @fmod_f32 -; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4vv_fmodf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) -; -; SLEEF-SVE-LABEL: define void @fmod_f32 -; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxvv_fmodf( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) -; -; ARMPL-NEON-LABEL: define void @fmod_f32 -; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vfmodq_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) -; -; ARMPL-SVE-LABEL: define void @fmod_f32 -; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svfmod_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) -; - entry: - br label %for.body - - for.body: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv - %in = load float, ptr %in.gep, align 8 - %call = tail call float @fmodf(float %in, float %in) - %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv - store float %call, ptr %out.gep, align 4 - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond = icmp eq i64 %iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body - - for.end: - ret void -} - -declare double @hypot(double, double) -declare float @hypotf(float, float) +declare double @sqrt(double) +declare float @sqrtf(float) -define void @hypot_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { -; SLEEF-NEON-LABEL: define void @hypot_f64 +define void @sqrt_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @sqrt_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[CALL:%.*]] = tail call double @hypot(double [[IN:%.*]], double [[IN]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_sqrt(<2 x double> [[WIDE_LOAD:%.*]]) ; -; SLEEF-SVE-LABEL: define void @hypot_f64 +; SLEEF-SVE-LABEL: define void @sqrt_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[CALL:%.*]] = tail call double @hypot(double [[IN:%.*]], double [[IN]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_sqrt( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; -; ARMPL-NEON-LABEL: define void @hypot_f64 +; ARMPL-NEON-LABEL: define void @sqrt_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vhypotq_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vsqrtq_f64(<2 x double> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @hypot_f64 +; ARMPL-SVE-LABEL: define void @sqrt_f64 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svhypot_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svsqrt_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -2374,7 +2522,7 @@ define void @hypot_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv %in = load double, ptr %in.gep, align 8 - %call = tail call double @hypot(double %in, double %in) + %call = tail call double @sqrt(double %in) %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv store double %call, ptr %out.gep, align 8 %iv.next = add nuw nsw i64 %iv, 1 @@ -2385,22 +2533,22 @@ define void @hypot_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } -define void @hypot_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { -; SLEEF-NEON-LABEL: define void @hypot_f32 +define void @sqrt_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @sqrt_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[CALL:%.*]] = tail call float @hypotf(float [[IN:%.*]], float [[IN]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_sqrtf(<4 x float> [[WIDE_LOAD:%.*]]) ; -; SLEEF-SVE-LABEL: define void @hypot_f32 +; SLEEF-SVE-LABEL: define void @sqrt_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[CALL:%.*]] = tail call float @hypotf(float [[IN:%.*]], float [[IN]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_sqrtf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; -; ARMPL-NEON-LABEL: define void @hypot_f32 +; ARMPL-NEON-LABEL: define void @sqrt_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vhypotq_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vsqrtq_f32(<4 x float> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @hypot_f32 +; ARMPL-SVE-LABEL: define void @sqrt_f32 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svhypot_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svsqrt_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -2409,7 +2557,7 @@ define void @hypot_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv %in = load float, ptr %in.gep, align 8 - %call = tail call float @hypotf(float %in, float %in) + %call = tail call float @sqrtf(float %in) %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv store float %call, ptr %out.gep, align 4 %iv.next = add nuw nsw i64 %iv, 1 @@ -2420,25 +2568,25 @@ define void @hypot_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } -declare double @nextafter(double, double) -declare float @nextafterf(float, float) +declare double @tan(double) +declare float @tanf(float) -define void @nextafter_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { -; SLEEF-NEON-LABEL: define void @nextafter_f64 +define void @tan_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @tan_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[CALL:%.*]] = tail call double @nextafter(double [[IN:%.*]], double [[IN]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_tan(<2 x double> [[WIDE_LOAD:%.*]]) ; -; SLEEF-SVE-LABEL: define void @nextafter_f64 +; SLEEF-SVE-LABEL: define void @tan_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[CALL:%.*]] = tail call double @nextafter(double [[IN:%.*]], double [[IN]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_tan( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; -; ARMPL-NEON-LABEL: define void @nextafter_f64 +; ARMPL-NEON-LABEL: define void @tan_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vnextafterq_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vtanq_f64(<2 x double> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @nextafter_f64 +; ARMPL-SVE-LABEL: define void @tan_f64 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svnextafter_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svtan_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -2447,7 +2595,7 @@ define void @nextafter_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv %in = load double, ptr %in.gep, align 8 - %call = tail call double @nextafter(double %in, double %in) + %call = tail call double @tan(double %in) %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv store double %call, ptr %out.gep, align 8 %iv.next = add nuw nsw i64 %iv, 1 @@ -2458,22 +2606,22 @@ define void @nextafter_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } -define void @nextafter_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { -; SLEEF-NEON-LABEL: define void @nextafter_f32 +define void @tan_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @tan_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[CALL:%.*]] = tail call float @nextafterf(float [[IN:%.*]], float [[IN]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_tanf(<4 x float> [[WIDE_LOAD:%.*]]) ; -; SLEEF-SVE-LABEL: define void @nextafter_f32 +; SLEEF-SVE-LABEL: define void @tan_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[CALL:%.*]] = tail call float @nextafterf(float [[IN:%.*]], float [[IN]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_tanf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; -; ARMPL-NEON-LABEL: define void @nextafter_f32 +; ARMPL-NEON-LABEL: define void @tan_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vnextafterq_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vtanq_f32(<4 x float> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @nextafter_f32 +; ARMPL-SVE-LABEL: define void @tan_f32 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svnextafter_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svtan_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -2482,7 +2630,7 @@ define void @nextafter_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv %in = load float, ptr %in.gep, align 8 - %call = tail call float @nextafterf(float %in, float %in) + %call = tail call float @tanf(float %in) %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv store float %call, ptr %out.gep, align 4 %iv.next = add nuw nsw i64 %iv, 1 @@ -2493,25 +2641,25 @@ define void @nextafter_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } -declare double @pow(double, double) -declare float @powf(float, float) +declare double @tanh(double) +declare float @tanhf(float) -define void @pow_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { -; SLEEF-NEON-LABEL: define void @pow_f64 +define void @tanh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @tanh_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2vv_pow(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_tanh(<2 x double> [[WIDE_LOAD:%.*]]) ; -; SLEEF-SVE-LABEL: define void @pow_f64 +; SLEEF-SVE-LABEL: define void @tanh_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxvv_pow( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_tanh( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; -; ARMPL-NEON-LABEL: define void @pow_f64 +; ARMPL-NEON-LABEL: define void @tanh_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vpowq_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vtanhq_f64(<2 x double> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @pow_f64 +; ARMPL-SVE-LABEL: define void @tanh_f64 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svpow_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svtanh_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -2520,7 +2668,7 @@ define void @pow_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv %in = load double, ptr %in.gep, align 8 - %call = tail call double @pow(double %in, double %in) + %call = tail call double @tanh(double %in) %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv store double %call, ptr %out.gep, align 8 %iv.next = add nuw nsw i64 %iv, 1 @@ -2531,22 +2679,22 @@ define void @pow_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } -define void @pow_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { -; SLEEF-NEON-LABEL: define void @pow_f32 +define void @tanh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @tanh_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4vv_powf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_tanhf(<4 x float> [[WIDE_LOAD:%.*]]) ; -; SLEEF-SVE-LABEL: define void @pow_f32 +; SLEEF-SVE-LABEL: define void @tanh_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxvv_powf( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_tanhf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; -; ARMPL-NEON-LABEL: define void @pow_f32 +; ARMPL-NEON-LABEL: define void @tanh_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vpowq_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vtanhq_f32(<4 x float> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @pow_f32 +; ARMPL-SVE-LABEL: define void @tanh_f32 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svpow_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svtanh_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -2555,7 +2703,7 @@ define void @pow_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv %in = load float, ptr %in.gep, align 8 - %call = tail call float @powf(float %in, float %in) + %call = tail call float @tanhf(float %in) %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv store float %call, ptr %out.gep, align 4 %iv.next = add nuw nsw i64 %iv, 1 @@ -2566,25 +2714,25 @@ define void @pow_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } -declare double @fma(double, double, double) -declare float @fmaf(float, float, float) +declare double @tgamma(double) +declare float @tgammaf(float) -define void @fma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { -; SLEEF-NEON-LABEL: define void @fma_f64 +define void @tgamma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @tgamma_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[CALL:%.*]] = tail call double @fma(double [[IN:%.*]], double [[IN]], double [[IN]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_tgamma(<2 x double> [[WIDE_LOAD:%.*]]) ; -; SLEEF-SVE-LABEL: define void @fma_f64 +; SLEEF-SVE-LABEL: define void @tgamma_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[CALL:%.*]] = tail call double @fma(double [[IN:%.*]], double [[IN]], double [[IN]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_tgamma( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; -; ARMPL-NEON-LABEL: define void @fma_f64 +; ARMPL-NEON-LABEL: define void @tgamma_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vfmaq_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]], <2 x double> [[WIDE_LOAD]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vtgammaq_f64(<2 x double> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @fma_f64 +; ARMPL-SVE-LABEL: define void @tgamma_f64 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svfma_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svtgamma_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -2593,7 +2741,7 @@ define void @fma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv %in = load double, ptr %in.gep, align 8 - %call = tail call double @fma(double %in, double %in, double %in) + %call = tail call double @tgamma(double %in) %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv store double %call, ptr %out.gep, align 8 %iv.next = add nuw nsw i64 %iv, 1 @@ -2604,22 +2752,22 @@ define void @fma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } -define void @fma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { -; SLEEF-NEON-LABEL: define void @fma_f32 +define void @tgamma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; SLEEF-NEON-LABEL: define void @tgamma_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[CALL:%.*]] = tail call float @fmaf(float [[IN:%.*]], float [[IN]], float [[IN]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_tgammaf(<4 x float> [[WIDE_LOAD:%.*]]) ; -; SLEEF-SVE-LABEL: define void @fma_f32 +; SLEEF-SVE-LABEL: define void @tgamma_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[CALL:%.*]] = tail call float @fmaf(float [[IN:%.*]], float [[IN]], float [[IN]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_tgammaf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; -; ARMPL-NEON-LABEL: define void @fma_f32 +; ARMPL-NEON-LABEL: define void @tgamma_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vfmaq_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vtgammaq_f32(<4 x float> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @fma_f32 +; ARMPL-SVE-LABEL: define void @tgamma_f32 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svfma_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svtgamma_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -2628,7 +2776,7 @@ define void @fma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv %in = load float, ptr %in.gep, align 8 - %call = tail call float @fmaf(float %in, float %in, float %in) + %call = tail call float @tgammaf(float %in) %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv store float %call, ptr %out.gep, align 4 %iv.next = add nuw nsw i64 %iv, 1 @@ -2638,224 +2786,3 @@ define void @fma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { for.end: ret void } - -declare double @modf(double, ptr) -declare float @modff(float, ptr) - -define void @test_modf(ptr noalias %a, ptr noalias %b, ptr noalias %c) { -; SLEEF-NEON-LABEL: define void @test_modf -; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2vl8_modf(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP4:%.*]]) -; -; SLEEF-SVE-LABEL: define void @test_modf -; SLEEF-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[TMP23:%.*]] = call @_ZGVsMxvl8_modf( [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP22:%.*]], [[ACTIVE_LANE_MASK:%.*]]) -; -; ARMPL-NEON-LABEL: define void @test_modf -; ARMPL-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP5:%.*]] = call <2 x double> @armpl_vmodfq_f64(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP4:%.*]]) -; -; ARMPL-SVE-LABEL: define void @test_modf -; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP23:%.*]] = call @armpl_svmodf_f64_x( [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP22:%.*]], [[ACTIVE_LANE_MASK:%.*]]) -; -entry: - br label %for.body - -for.body: - %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %gepa = getelementptr double, ptr %a, i64 %indvars.iv - %num = load double, ptr %gepa, align 8 - %gepb = getelementptr double, ptr %b, i64 %indvars.iv - %data = call double @modf(double %num, ptr %gepb) - %gepc = getelementptr inbounds double, ptr %c, i64 %indvars.iv - store double %data, ptr %gepc, align 8 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond = icmp eq i64 %indvars.iv.next, 1000 - br i1 %exitcond, label %for.cond.cleanup, label %for.body - -for.cond.cleanup: - ret void -} - -define void @test_modff(ptr noalias %a, ptr noalias %b, ptr noalias %c) { -; SLEEF-NEON-LABEL: define void @test_modff -; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4vl4_modff(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP4:%.*]]) -; -; SLEEF-SVE-LABEL: define void @test_modff -; SLEEF-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[TMP23:%.*]] = call @_ZGVsMxvl4_modff( [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP22:%.*]], [[ACTIVE_LANE_MASK:%.*]]) -; -; ARMPL-NEON-LABEL: define void @test_modff -; ARMPL-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP5:%.*]] = call <4 x float> @armpl_vmodfq_f32(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP4:%.*]]) -; -; ARMPL-SVE-LABEL: define void @test_modff -; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP23:%.*]] = call @armpl_svmodf_f32_x( [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP22:%.*]], [[ACTIVE_LANE_MASK:%.*]]) -; -entry: - br label %for.body - -for.body: - %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %gepa = getelementptr float, ptr %a, i64 %indvars.iv - %num = load float, ptr %gepa, align 8 - %gepb = getelementptr float, ptr %b, i64 %indvars.iv - %data = call float @modff(float %num, ptr %gepb) - %gepc = getelementptr inbounds float, ptr %c, i64 %indvars.iv - store float %data, ptr %gepc, align 8 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond = icmp eq i64 %indvars.iv.next, 1000 - br i1 %exitcond, label %for.cond.cleanup, label %for.body - -for.cond.cleanup: - ret void -} - -declare void @sincos(double, ptr, ptr) -declare void @sincosf(float, ptr, ptr) - -define void @test_sincos(ptr noalias %a, ptr noalias %b, ptr noalias %c) { -; SLEEF-NEON-LABEL: define void @test_sincos -; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: call void @_ZGVnN2vl8l8_sincos(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]]) -; -; SLEEF-SVE-LABEL: define void @test_sincos -; SLEEF-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: call void @_ZGVsMxvl8l8_sincos( [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], [[ACTIVE_LANE_MASK:%.*]]) -; -; ARMPL-NEON-LABEL: define void @test_sincos -; ARMPL-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: call void @armpl_vsincosq_f64(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]]) -; -; ARMPL-SVE-LABEL: define void @test_sincos -; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: call void @armpl_svsincos_f64_x( [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], [[ACTIVE_LANE_MASK:%.*]]) -; -entry: - br label %for.body - -for.body: - %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %gepa = getelementptr double, ptr %a, i64 %indvars.iv - %num = load double, ptr %gepa, align 8 - %gepb = getelementptr double, ptr %b, i64 %indvars.iv - %gepc = getelementptr double, ptr %c, i64 %indvars.iv - call void @sincos(double %num, ptr %gepb, ptr %gepc) - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond = icmp eq i64 %indvars.iv.next, 1000 - br i1 %exitcond, label %for.cond.cleanup, label %for.body - -for.cond.cleanup: - ret void -} - -define void @test_sincosf(ptr noalias %a, ptr noalias %b, ptr noalias %c) { -; SLEEF-NEON-LABEL: define void @test_sincosf -; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: call void @_ZGVnN4vl4l4_sincosf(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]]) -; -; SLEEF-SVE-LABEL: define void @test_sincosf -; SLEEF-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: call void @_ZGVsMxvl4l4_sincosf( [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], [[ACTIVE_LANE_MASK:%.*]]) -; -; ARMPL-NEON-LABEL: define void @test_sincosf -; ARMPL-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: call void @armpl_vsincosq_f32(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]]) -; -; ARMPL-SVE-LABEL: define void @test_sincosf -; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: call void @armpl_svsincos_f32_x( [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], [[ACTIVE_LANE_MASK:%.*]]) -; -entry: - br label %for.body - -for.body: - %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %gepa = getelementptr float, ptr %a, i64 %indvars.iv - %num = load float, ptr %gepa, align 8 - %gepb = getelementptr float, ptr %b, i64 %indvars.iv - %gepc = getelementptr float, ptr %c, i64 %indvars.iv - call void @sincosf(float %num, ptr %gepb, ptr %gepc) - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond = icmp eq i64 %indvars.iv.next, 1000 - br i1 %exitcond, label %for.cond.cleanup, label %for.body - -for.cond.cleanup: - ret void -} - -declare void @sincospi(double, ptr, ptr) -declare void @sincospif(float, ptr, ptr) - -define void @test_sincospi(ptr noalias %a, ptr noalias %b, ptr noalias %c) { -; SLEEF-NEON-LABEL: define void @test_sincospi -; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: call void @_ZGVnN2vl8l8_sincospi(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]]) -; -; SLEEF-SVE-LABEL: define void @test_sincospi -; SLEEF-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: call void @_ZGVsMxvl8l8_sincospi( [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], [[ACTIVE_LANE_MASK:%.*]]) -; -; ARMPL-NEON-LABEL: define void @test_sincospi -; ARMPL-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: call void @armpl_vsincospiq_f64(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]]) -; -; ARMPL-SVE-LABEL: define void @test_sincospi -; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: call void @armpl_svsincospi_f64_x( [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], [[ACTIVE_LANE_MASK:%.*]]) -; -entry: - br label %for.body - -for.body: - %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %gepa = getelementptr double, ptr %a, i64 %indvars.iv - %num = load double, ptr %gepa, align 8 - %gepb = getelementptr double, ptr %b, i64 %indvars.iv - %gepc = getelementptr double, ptr %c, i64 %indvars.iv - call void @sincospi(double %num, ptr %gepb, ptr %gepc) - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond = icmp eq i64 %indvars.iv.next, 1000 - br i1 %exitcond, label %for.cond.cleanup, label %for.body - -for.cond.cleanup: - ret void -} - -define void @test_sincospif(ptr noalias %a, ptr noalias %b, ptr noalias %c) { -; SLEEF-NEON-LABEL: define void @test_sincospif -; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: call void @_ZGVnN4vl4l4_sincospif(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]]) -; -; SLEEF-SVE-LABEL: define void @test_sincospif -; SLEEF-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: call void @_ZGVsMxvl4l4_sincospif( [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], [[ACTIVE_LANE_MASK:%.*]]) -; -; ARMPL-NEON-LABEL: define void @test_sincospif -; ARMPL-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: call void @armpl_vsincospiq_f32(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]]) -; -; ARMPL-SVE-LABEL: define void @test_sincospif -; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: call void @armpl_svsincospi_f32_x( [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], [[ACTIVE_LANE_MASK:%.*]]) -; -entry: - br label %for.body - -for.body: - %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %gepa = getelementptr float, ptr %a, i64 %indvars.iv - %num = load float, ptr %gepa, align 8 - %gepb = getelementptr float, ptr %b, i64 %indvars.iv - %gepc = getelementptr float, ptr %c, i64 %indvars.iv - call void @sincospif(float %num, ptr %gepb, ptr %gepc) - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond = icmp eq i64 %indvars.iv.next, 1000 - br i1 %exitcond, label %for.cond.cleanup, label %for.body - -for.cond.cleanup: - ret void -} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/veclib-intrinsic-calls.ll b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-intrinsic-calls.ll index d59c28849bfb5..2a552077a42b6 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/veclib-intrinsic-calls.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-intrinsic-calls.ll @@ -12,171 +12,25 @@ target triple = "aarch64-unknown-linux-gnu" ; are checking fixed width vectorization with NEON and scalable vectorization ; with SVE. -declare double @llvm.cos.f64(double) -declare float @llvm.cos.f32(float) +declare double @llvm.ceil.f64(double) +declare float @llvm.ceil.f32(float) -define void @cos_f64(ptr noalias %in.ptr, ptr %out.ptr) { -; SLEEF-NEON-LABEL: define void @cos_f64 +define void @ceil_f64(ptr noalias %in.ptr, ptr %out.ptr) { +; SLEEF-NEON-LABEL: define void @ceil_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1:[0-9]+]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_cos(<2 x double> [[WIDE_LOAD:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) ; -; SLEEF-SVE-LABEL: define void @cos_f64 +; SLEEF-SVE-LABEL: define void @ceil_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1:[0-9]+]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_cos( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @llvm.ceil.nxv2f64( [[WIDE_MASKED_LOAD:%.*]]) ; -; ARMPL-NEON-LABEL: define void @cos_f64 +; ARMPL-NEON-LABEL: define void @ceil_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1:[0-9]+]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vcosq_f64(<2 x double> [[WIDE_LOAD:%.*]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @cos_f64 +; ARMPL-SVE-LABEL: define void @ceil_f64 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1:[0-9]+]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svcos_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) -; - entry: - br label %for.body - - for.body: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv - %in = load double, ptr %in.gep, align 8 - %call = tail call double @llvm.cos.f64(double %in) - %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv - store double %call, ptr %out.gep, align 8 - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond = icmp eq i64 %iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body - - for.end: - ret void -} - -define void @cos_f32(ptr noalias %in.ptr, ptr %out.ptr) { -; SLEEF-NEON-LABEL: define void @cos_f32 -; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_cosf(<4 x float> [[WIDE_LOAD:%.*]]) -; -; SLEEF-SVE-LABEL: define void @cos_f32 -; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_cosf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) -; -; ARMPL-NEON-LABEL: define void @cos_f32 -; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vcosq_f32(<4 x float> [[WIDE_LOAD:%.*]]) -; -; ARMPL-SVE-LABEL: define void @cos_f32 -; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svcos_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) -; - entry: - br label %for.body - - for.body: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv - %in = load float, ptr %in.gep, align 8 - %call = tail call float @llvm.cos.f32(float %in) - %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv - store float %call, ptr %out.gep, align 4 - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond = icmp eq i64 %iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body - - for.end: - ret void -} - -declare double @llvm.exp.f64(double) -declare float @llvm.exp.f32(float) - -define void @exp_f64(ptr noalias %in.ptr, ptr %out.ptr) { -; SLEEF-NEON-LABEL: define void @exp_f64 -; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_exp(<2 x double> [[WIDE_LOAD:%.*]]) -; -; SLEEF-SVE-LABEL: define void @exp_f64 -; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_exp( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) -; -; ARMPL-NEON-LABEL: define void @exp_f64 -; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vexpq_f64(<2 x double> [[WIDE_LOAD:%.*]]) -; -; ARMPL-SVE-LABEL: define void @exp_f64 -; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svexp_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) -; - entry: - br label %for.body - - for.body: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv - %in = load double, ptr %in.gep, align 8 - %call = tail call double @llvm.exp.f64(double %in) - %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv - store double %call, ptr %out.gep, align 8 - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond = icmp eq i64 %iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body - - for.end: - ret void -} - -define void @exp_f32(ptr noalias %in.ptr, ptr %out.ptr) { -; SLEEF-NEON-LABEL: define void @exp_f32 -; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_expf(<4 x float> [[WIDE_LOAD:%.*]]) -; -; SLEEF-SVE-LABEL: define void @exp_f32 -; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_expf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) -; -; ARMPL-NEON-LABEL: define void @exp_f32 -; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vexpq_f32(<4 x float> [[WIDE_LOAD:%.*]]) -; -; ARMPL-SVE-LABEL: define void @exp_f32 -; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svexp_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) -; - entry: - br label %for.body - - for.body: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv - %in = load float, ptr %in.gep, align 8 - %call = tail call float @llvm.exp.f32(float %in) - %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv - store float %call, ptr %out.gep, align 4 - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond = icmp eq i64 %iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body - - for.end: - ret void -} - -declare double @llvm.exp2.f64(double) -declare float @llvm.exp2.f32(float) - -define void @exp2_f64(ptr noalias %in.ptr, ptr %out.ptr) { -; SLEEF-NEON-LABEL: define void @exp2_f64 -; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_exp2(<2 x double> [[WIDE_LOAD:%.*]]) -; -; SLEEF-SVE-LABEL: define void @exp2_f64 -; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_exp2( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) -; -; ARMPL-NEON-LABEL: define void @exp2_f64 -; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vexp2q_f64(<2 x double> [[WIDE_LOAD:%.*]]) -; -; ARMPL-SVE-LABEL: define void @exp2_f64 -; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svexp2_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @llvm.ceil.nxv2f64( [[WIDE_MASKED_LOAD:%.*]]) ; entry: br label %for.body @@ -185,7 +39,7 @@ define void @exp2_f64(ptr noalias %in.ptr, ptr %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv %in = load double, ptr %in.gep, align 8 - %call = tail call double @llvm.exp2.f64(double %in) + %call = tail call double @llvm.ceil.f64(double %in) %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv store double %call, ptr %out.gep, align 8 %iv.next = add nuw nsw i64 %iv, 1 @@ -196,22 +50,22 @@ define void @exp2_f64(ptr noalias %in.ptr, ptr %out.ptr) { ret void } -define void @exp2_f32(ptr noalias %in.ptr, ptr %out.ptr) { -; SLEEF-NEON-LABEL: define void @exp2_f32 +define void @ceil_f32(ptr noalias %in.ptr, ptr %out.ptr) { +; SLEEF-NEON-LABEL: define void @ceil_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_exp2f(<4 x float> [[WIDE_LOAD:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) ; -; SLEEF-SVE-LABEL: define void @exp2_f32 +; SLEEF-SVE-LABEL: define void @ceil_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_exp2f( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @llvm.ceil.nxv4f32( [[WIDE_MASKED_LOAD:%.*]]) ; -; ARMPL-NEON-LABEL: define void @exp2_f32 +; ARMPL-NEON-LABEL: define void @ceil_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vexp2q_f32(<4 x float> [[WIDE_LOAD:%.*]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @exp2_f32 +; ARMPL-SVE-LABEL: define void @ceil_f32 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svexp2_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @llvm.ceil.nxv4f32( [[WIDE_MASKED_LOAD:%.*]]) ; entry: br label %for.body @@ -220,7 +74,7 @@ define void @exp2_f32(ptr noalias %in.ptr, ptr %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv %in = load float, ptr %in.gep, align 8 - %call = tail call float @llvm.exp2.f32(float %in) + %call = tail call float @llvm.ceil.f32(float %in) %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv store float %call, ptr %out.gep, align 4 %iv.next = add nuw nsw i64 %iv, 1 @@ -231,25 +85,25 @@ define void @exp2_f32(ptr noalias %in.ptr, ptr %out.ptr) { ret void } -declare double @llvm.exp10.f64(double) -declare float @llvm.exp10.f32(float) +declare double @llvm.copysign.f64(double, double) +declare float @llvm.copysign.f32(float, float) -define void @exp10_f64(ptr noalias %in.ptr, ptr %out.ptr) { -; SLEEF-NEON-LABEL: define void @exp10_f64 +define void @copysign_f64(ptr noalias %in.ptr, ptr %out.ptr) { +; SLEEF-NEON-LABEL: define void @copysign_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_exp10(<2 x double> [[WIDE_LOAD:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @llvm.copysign.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) ; -; SLEEF-SVE-LABEL: define void @exp10_f64 +; SLEEF-SVE-LABEL: define void @copysign_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_exp10( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @llvm.copysign.nxv2f64( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]]) ; -; ARMPL-NEON-LABEL: define void @exp10_f64 +; ARMPL-NEON-LABEL: define void @copysign_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vexp10q_f64(<2 x double> [[WIDE_LOAD:%.*]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @llvm.copysign.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) ; -; ARMPL-SVE-LABEL: define void @exp10_f64 +; ARMPL-SVE-LABEL: define void @copysign_f64 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svexp10_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @llvm.copysign.nxv2f64( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]]) ; entry: br label %for.body @@ -258,7 +112,7 @@ define void @exp10_f64(ptr noalias %in.ptr, ptr %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv %in = load double, ptr %in.gep, align 8 - %call = tail call double @llvm.exp10.f64(double %in) + %call = tail call double @llvm.copysign.f64(double %in, double %in) %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv store double %call, ptr %out.gep, align 8 %iv.next = add nuw nsw i64 %iv, 1 @@ -269,22 +123,22 @@ define void @exp10_f64(ptr noalias %in.ptr, ptr %out.ptr) { ret void } -define void @exp10_f32(ptr noalias %in.ptr, ptr %out.ptr) { -; SLEEF-NEON-LABEL: define void @exp10_f32 +define void @copysign_f32(ptr noalias %in.ptr, ptr %out.ptr) { +; SLEEF-NEON-LABEL: define void @copysign_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_exp10f(<4 x float> [[WIDE_LOAD:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @llvm.copysign.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) ; -; SLEEF-SVE-LABEL: define void @exp10_f32 +; SLEEF-SVE-LABEL: define void @copysign_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_exp10f( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @llvm.copysign.nxv4f32( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]]) ; -; ARMPL-NEON-LABEL: define void @exp10_f32 +; ARMPL-NEON-LABEL: define void @copysign_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vexp10q_f32(<4 x float> [[WIDE_LOAD:%.*]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @llvm.copysign.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) ; -; ARMPL-SVE-LABEL: define void @exp10_f32 +; ARMPL-SVE-LABEL: define void @copysign_f32 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svexp10_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @llvm.copysign.nxv4f32( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]]) ; entry: br label %for.body @@ -293,7 +147,7 @@ define void @exp10_f32(ptr noalias %in.ptr, ptr %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv %in = load float, ptr %in.gep, align 8 - %call = tail call float @llvm.exp10.f32(float %in) + %call = tail call float @llvm.copysign.f32(float %in, float %in) %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv store float %call, ptr %out.gep, align 4 %iv.next = add nuw nsw i64 %iv, 1 @@ -304,25 +158,25 @@ define void @exp10_f32(ptr noalias %in.ptr, ptr %out.ptr) { ret void } -declare double @llvm.log.f64(double) -declare float @llvm.log.f32(float) +declare double @llvm.cos.f64(double) +declare float @llvm.cos.f32(float) -define void @log_f64(ptr noalias %in.ptr, ptr %out.ptr) { -; SLEEF-NEON-LABEL: define void @log_f64 +define void @cos_f64(ptr noalias %in.ptr, ptr %out.ptr) { +; SLEEF-NEON-LABEL: define void @cos_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_log(<2 x double> [[WIDE_LOAD:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_cos(<2 x double> [[WIDE_LOAD:%.*]]) ; -; SLEEF-SVE-LABEL: define void @log_f64 +; SLEEF-SVE-LABEL: define void @cos_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_log( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_cos( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; -; ARMPL-NEON-LABEL: define void @log_f64 +; ARMPL-NEON-LABEL: define void @cos_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vlogq_f64(<2 x double> [[WIDE_LOAD:%.*]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vcosq_f64(<2 x double> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @log_f64 +; ARMPL-SVE-LABEL: define void @cos_f64 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svlog_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svcos_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -331,7 +185,7 @@ define void @log_f64(ptr noalias %in.ptr, ptr %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv %in = load double, ptr %in.gep, align 8 - %call = tail call double @llvm.log.f64(double %in) + %call = tail call double @llvm.cos.f64(double %in) %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv store double %call, ptr %out.gep, align 8 %iv.next = add nuw nsw i64 %iv, 1 @@ -342,22 +196,22 @@ define void @log_f64(ptr noalias %in.ptr, ptr %out.ptr) { ret void } -define void @log_f32(ptr noalias %in.ptr, ptr %out.ptr) { -; SLEEF-NEON-LABEL: define void @log_f32 +define void @cos_f32(ptr noalias %in.ptr, ptr %out.ptr) { +; SLEEF-NEON-LABEL: define void @cos_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_logf(<4 x float> [[WIDE_LOAD:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_cosf(<4 x float> [[WIDE_LOAD:%.*]]) ; -; SLEEF-SVE-LABEL: define void @log_f32 +; SLEEF-SVE-LABEL: define void @cos_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_logf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_cosf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; -; ARMPL-NEON-LABEL: define void @log_f32 +; ARMPL-NEON-LABEL: define void @cos_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vlogq_f32(<4 x float> [[WIDE_LOAD:%.*]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vcosq_f32(<4 x float> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @log_f32 +; ARMPL-SVE-LABEL: define void @cos_f32 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svlog_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svcos_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -366,7 +220,7 @@ define void @log_f32(ptr noalias %in.ptr, ptr %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv %in = load float, ptr %in.gep, align 8 - %call = tail call float @llvm.log.f32(float %in) + %call = tail call float @llvm.cos.f32(float %in) %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv store float %call, ptr %out.gep, align 4 %iv.next = add nuw nsw i64 %iv, 1 @@ -377,25 +231,25 @@ define void @log_f32(ptr noalias %in.ptr, ptr %out.ptr) { ret void } -declare double @llvm.log2.f64(double) -declare float @llvm.log2.f32(float) +declare double @llvm.exp.f64(double) +declare float @llvm.exp.f32(float) -define void @log2_f64(ptr noalias %in.ptr, ptr %out.ptr) { -; SLEEF-NEON-LABEL: define void @log2_f64 +define void @exp_f64(ptr noalias %in.ptr, ptr %out.ptr) { +; SLEEF-NEON-LABEL: define void @exp_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_log2(<2 x double> [[WIDE_LOAD:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_exp(<2 x double> [[WIDE_LOAD:%.*]]) ; -; SLEEF-SVE-LABEL: define void @log2_f64 +; SLEEF-SVE-LABEL: define void @exp_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_log2( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_exp( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; -; ARMPL-NEON-LABEL: define void @log2_f64 +; ARMPL-NEON-LABEL: define void @exp_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vlog2q_f64(<2 x double> [[WIDE_LOAD:%.*]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vexpq_f64(<2 x double> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @log2_f64 +; ARMPL-SVE-LABEL: define void @exp_f64 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svlog2_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svexp_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -404,7 +258,7 @@ define void @log2_f64(ptr noalias %in.ptr, ptr %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv %in = load double, ptr %in.gep, align 8 - %call = tail call double @llvm.log2.f64(double %in) + %call = tail call double @llvm.exp.f64(double %in) %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv store double %call, ptr %out.gep, align 8 %iv.next = add nuw nsw i64 %iv, 1 @@ -415,22 +269,22 @@ define void @log2_f64(ptr noalias %in.ptr, ptr %out.ptr) { ret void } -define void @log2_f32(ptr noalias %in.ptr, ptr %out.ptr) { -; SLEEF-NEON-LABEL: define void @log2_f32 +define void @exp_f32(ptr noalias %in.ptr, ptr %out.ptr) { +; SLEEF-NEON-LABEL: define void @exp_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_log2f(<4 x float> [[WIDE_LOAD:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_expf(<4 x float> [[WIDE_LOAD:%.*]]) ; -; SLEEF-SVE-LABEL: define void @log2_f32 +; SLEEF-SVE-LABEL: define void @exp_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_log2f( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_expf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; -; ARMPL-NEON-LABEL: define void @log2_f32 +; ARMPL-NEON-LABEL: define void @exp_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vlog2q_f32(<4 x float> [[WIDE_LOAD:%.*]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vexpq_f32(<4 x float> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @log2_f32 +; ARMPL-SVE-LABEL: define void @exp_f32 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svlog2_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svexp_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -439,7 +293,7 @@ define void @log2_f32(ptr noalias %in.ptr, ptr %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv %in = load float, ptr %in.gep, align 8 - %call = tail call float @llvm.log2.f32(float %in) + %call = tail call float @llvm.exp.f32(float %in) %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv store float %call, ptr %out.gep, align 4 %iv.next = add nuw nsw i64 %iv, 1 @@ -450,25 +304,25 @@ define void @log2_f32(ptr noalias %in.ptr, ptr %out.ptr) { ret void } -declare double @llvm.log10.f64(double) -declare float @llvm.log10.f32(float) +declare double @llvm.exp10.f64(double) +declare float @llvm.exp10.f32(float) -define void @log10_f64(ptr noalias %in.ptr, ptr %out.ptr) { -; SLEEF-NEON-LABEL: define void @log10_f64 +define void @exp10_f64(ptr noalias %in.ptr, ptr %out.ptr) { +; SLEEF-NEON-LABEL: define void @exp10_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_log10(<2 x double> [[WIDE_LOAD:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_exp10(<2 x double> [[WIDE_LOAD:%.*]]) ; -; SLEEF-SVE-LABEL: define void @log10_f64 +; SLEEF-SVE-LABEL: define void @exp10_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_log10( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_exp10( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; -; ARMPL-NEON-LABEL: define void @log10_f64 +; ARMPL-NEON-LABEL: define void @exp10_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vlog10q_f64(<2 x double> [[WIDE_LOAD:%.*]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vexp10q_f64(<2 x double> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @log10_f64 +; ARMPL-SVE-LABEL: define void @exp10_f64 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svlog10_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svexp10_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -477,7 +331,7 @@ define void @log10_f64(ptr noalias %in.ptr, ptr %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv %in = load double, ptr %in.gep, align 8 - %call = tail call double @llvm.log10.f64(double %in) + %call = tail call double @llvm.exp10.f64(double %in) %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv store double %call, ptr %out.gep, align 8 %iv.next = add nuw nsw i64 %iv, 1 @@ -488,22 +342,22 @@ define void @log10_f64(ptr noalias %in.ptr, ptr %out.ptr) { ret void } -define void @log10_f32(ptr noalias %in.ptr, ptr %out.ptr) { -; SLEEF-NEON-LABEL: define void @log10_f32 +define void @exp10_f32(ptr noalias %in.ptr, ptr %out.ptr) { +; SLEEF-NEON-LABEL: define void @exp10_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_log10f(<4 x float> [[WIDE_LOAD:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_exp10f(<4 x float> [[WIDE_LOAD:%.*]]) ; -; SLEEF-SVE-LABEL: define void @log10_f32 +; SLEEF-SVE-LABEL: define void @exp10_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_log10f( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_exp10f( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; -; ARMPL-NEON-LABEL: define void @log10_f32 +; ARMPL-NEON-LABEL: define void @exp10_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vlog10q_f32(<4 x float> [[WIDE_LOAD:%.*]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vexp10q_f32(<4 x float> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @log10_f32 +; ARMPL-SVE-LABEL: define void @exp10_f32 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svlog10_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svexp10_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -512,7 +366,7 @@ define void @log10_f32(ptr noalias %in.ptr, ptr %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv %in = load float, ptr %in.gep, align 8 - %call = tail call float @llvm.log10.f32(float %in) + %call = tail call float @llvm.exp10.f32(float %in) %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv store float %call, ptr %out.gep, align 4 %iv.next = add nuw nsw i64 %iv, 1 @@ -523,25 +377,25 @@ define void @log10_f32(ptr noalias %in.ptr, ptr %out.ptr) { ret void } -declare double @llvm.sin.f64(double) -declare float @llvm.sin.f32(float) +declare double @llvm.exp2.f64(double) +declare float @llvm.exp2.f32(float) -define void @sin_f64(ptr noalias %in.ptr, ptr %out.ptr) { -; SLEEF-NEON-LABEL: define void @sin_f64 +define void @exp2_f64(ptr noalias %in.ptr, ptr %out.ptr) { +; SLEEF-NEON-LABEL: define void @exp2_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_sin(<2 x double> [[WIDE_LOAD:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_exp2(<2 x double> [[WIDE_LOAD:%.*]]) ; -; SLEEF-SVE-LABEL: define void @sin_f64 +; SLEEF-SVE-LABEL: define void @exp2_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_sin( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_exp2( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; -; ARMPL-NEON-LABEL: define void @sin_f64 +; ARMPL-NEON-LABEL: define void @exp2_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vsinq_f64(<2 x double> [[WIDE_LOAD:%.*]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vexp2q_f64(<2 x double> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @sin_f64 +; ARMPL-SVE-LABEL: define void @exp2_f64 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svsin_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svexp2_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -550,7 +404,7 @@ define void @sin_f64(ptr noalias %in.ptr, ptr %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv %in = load double, ptr %in.gep, align 8 - %call = tail call double @llvm.sin.f64(double %in) + %call = tail call double @llvm.exp2.f64(double %in) %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv store double %call, ptr %out.gep, align 8 %iv.next = add nuw nsw i64 %iv, 1 @@ -561,22 +415,22 @@ define void @sin_f64(ptr noalias %in.ptr, ptr %out.ptr) { ret void } -define void @sin_f32(ptr noalias %in.ptr, ptr %out.ptr) { -; SLEEF-NEON-LABEL: define void @sin_f32 +define void @exp2_f32(ptr noalias %in.ptr, ptr %out.ptr) { +; SLEEF-NEON-LABEL: define void @exp2_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_sinf(<4 x float> [[WIDE_LOAD:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_exp2f(<4 x float> [[WIDE_LOAD:%.*]]) ; -; SLEEF-SVE-LABEL: define void @sin_f32 +; SLEEF-SVE-LABEL: define void @exp2_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_sinf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_exp2f( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; -; ARMPL-NEON-LABEL: define void @sin_f32 +; ARMPL-NEON-LABEL: define void @exp2_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vsinq_f32(<4 x float> [[WIDE_LOAD:%.*]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vexp2q_f32(<4 x float> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @sin_f32 +; ARMPL-SVE-LABEL: define void @exp2_f32 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svsin_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svexp2_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -585,7 +439,7 @@ define void @sin_f32(ptr noalias %in.ptr, ptr %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv %in = load float, ptr %in.gep, align 8 - %call = tail call float @llvm.sin.f32(float %in) + %call = tail call float @llvm.exp2.f32(float %in) %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv store float %call, ptr %out.gep, align 4 %iv.next = add nuw nsw i64 %iv, 1 @@ -596,25 +450,25 @@ define void @sin_f32(ptr noalias %in.ptr, ptr %out.ptr) { ret void } -declare double @llvm.pow.f64(double, double) -declare float @llvm.pow.f32(float, float) +declare double @llvm.fabs.f64(double) +declare float @llvm.fabs.f32(float) -define void @pow_f64(ptr noalias %in.ptr, ptr %out.ptr) { -; SLEEF-NEON-LABEL: define void @pow_f64 +define void @fabs_f64(ptr noalias %in.ptr, ptr %out.ptr) { +; SLEEF-NEON-LABEL: define void @fabs_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2vv_pow(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) ; -; SLEEF-SVE-LABEL: define void @pow_f64 +; SLEEF-SVE-LABEL: define void @fabs_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxvv_pow( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @llvm.fabs.nxv2f64( [[WIDE_MASKED_LOAD:%.*]]) ; -; ARMPL-NEON-LABEL: define void @pow_f64 +; ARMPL-NEON-LABEL: define void @fabs_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vpowq_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @pow_f64 +; ARMPL-SVE-LABEL: define void @fabs_f64 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svpow_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @llvm.fabs.nxv2f64( [[WIDE_MASKED_LOAD:%.*]]) ; entry: br label %for.body @@ -623,7 +477,7 @@ define void @pow_f64(ptr noalias %in.ptr, ptr %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv %in = load double, ptr %in.gep, align 8 - %call = tail call double @llvm.pow.f64(double %in, double %in) + %call = tail call double @llvm.fabs.f64(double %in) %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv store double %call, ptr %out.gep, align 8 %iv.next = add nuw nsw i64 %iv, 1 @@ -634,22 +488,22 @@ define void @pow_f64(ptr noalias %in.ptr, ptr %out.ptr) { ret void } -define void @pow_f32(ptr noalias %in.ptr, ptr %out.ptr) { -; SLEEF-NEON-LABEL: define void @pow_f32 +define void @fabs_f32(ptr noalias %in.ptr, ptr %out.ptr) { +; SLEEF-NEON-LABEL: define void @fabs_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4vv_powf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) ; -; SLEEF-SVE-LABEL: define void @pow_f32 +; SLEEF-SVE-LABEL: define void @fabs_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxvv_powf( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @llvm.fabs.nxv4f32( [[WIDE_MASKED_LOAD:%.*]]) ; -; ARMPL-NEON-LABEL: define void @pow_f32 +; ARMPL-NEON-LABEL: define void @fabs_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vpowq_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @pow_f32 +; ARMPL-SVE-LABEL: define void @fabs_f32 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svpow_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @llvm.fabs.nxv4f32( [[WIDE_MASKED_LOAD:%.*]]) ; entry: br label %for.body @@ -658,7 +512,7 @@ define void @pow_f32(ptr noalias %in.ptr, ptr %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv %in = load float, ptr %in.gep, align 8 - %call = tail call float @llvm.pow.f32(float %in, float %in) + %call = tail call float @llvm.fabs.f32(float %in) %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv store float %call, ptr %out.gep, align 4 %iv.next = add nuw nsw i64 %iv, 1 @@ -669,25 +523,25 @@ define void @pow_f32(ptr noalias %in.ptr, ptr %out.ptr) { ret void } -declare double @llvm.ceil.f64(double) -declare float @llvm.ceil.f32(float) +declare double @llvm.floor.f64(double) +declare float @llvm.floor.f32(float) -define void @ceil_f64(ptr noalias %in.ptr, ptr %out.ptr) { -; SLEEF-NEON-LABEL: define void @ceil_f64 +define void @floor_f64(ptr noalias %in.ptr, ptr %out.ptr) { +; SLEEF-NEON-LABEL: define void @floor_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) ; -; SLEEF-SVE-LABEL: define void @ceil_f64 +; SLEEF-SVE-LABEL: define void @floor_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @llvm.ceil.nxv2f64( [[WIDE_MASKED_LOAD:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @llvm.floor.nxv2f64( [[WIDE_MASKED_LOAD:%.*]]) ; -; ARMPL-NEON-LABEL: define void @ceil_f64 +; ARMPL-NEON-LABEL: define void @floor_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @ceil_f64 +; ARMPL-SVE-LABEL: define void @floor_f64 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @llvm.ceil.nxv2f64( [[WIDE_MASKED_LOAD:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @llvm.floor.nxv2f64( [[WIDE_MASKED_LOAD:%.*]]) ; entry: br label %for.body @@ -696,7 +550,7 @@ define void @ceil_f64(ptr noalias %in.ptr, ptr %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv %in = load double, ptr %in.gep, align 8 - %call = tail call double @llvm.ceil.f64(double %in) + %call = tail call double @llvm.floor.f64(double %in) %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv store double %call, ptr %out.gep, align 8 %iv.next = add nuw nsw i64 %iv, 1 @@ -707,22 +561,22 @@ define void @ceil_f64(ptr noalias %in.ptr, ptr %out.ptr) { ret void } -define void @ceil_f32(ptr noalias %in.ptr, ptr %out.ptr) { -; SLEEF-NEON-LABEL: define void @ceil_f32 +define void @floor_f32(ptr noalias %in.ptr, ptr %out.ptr) { +; SLEEF-NEON-LABEL: define void @floor_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) ; -; SLEEF-SVE-LABEL: define void @ceil_f32 +; SLEEF-SVE-LABEL: define void @floor_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @llvm.ceil.nxv4f32( [[WIDE_MASKED_LOAD:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @llvm.floor.nxv4f32( [[WIDE_MASKED_LOAD:%.*]]) ; -; ARMPL-NEON-LABEL: define void @ceil_f32 +; ARMPL-NEON-LABEL: define void @floor_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @ceil_f32 +; ARMPL-SVE-LABEL: define void @floor_f32 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @llvm.ceil.nxv4f32( [[WIDE_MASKED_LOAD:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @llvm.floor.nxv4f32( [[WIDE_MASKED_LOAD:%.*]]) ; entry: br label %for.body @@ -731,7 +585,7 @@ define void @ceil_f32(ptr noalias %in.ptr, ptr %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv %in = load float, ptr %in.gep, align 8 - %call = tail call float @llvm.ceil.f32(float %in) + %call = tail call float @llvm.floor.f32(float %in) %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv store float %call, ptr %out.gep, align 4 %iv.next = add nuw nsw i64 %iv, 1 @@ -742,25 +596,25 @@ define void @ceil_f32(ptr noalias %in.ptr, ptr %out.ptr) { ret void } -declare double @llvm.copysign.f64(double, double) -declare float @llvm.copysign.f32(float, float) +declare double @llvm.fma.f64(double, double, double) +declare float @llvm.fma.f32(float, float, float) -define void @copysign_f64(ptr noalias %in.ptr, ptr %out.ptr) { -; SLEEF-NEON-LABEL: define void @copysign_f64 +define void @fma_f64(ptr noalias %in.ptr, ptr %out.ptr) { +; SLEEF-NEON-LABEL: define void @fma_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @llvm.copysign.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]], <2 x double> [[WIDE_LOAD]]) ; -; SLEEF-SVE-LABEL: define void @copysign_f64 +; SLEEF-SVE-LABEL: define void @fma_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @llvm.copysign.nxv2f64( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @llvm.fma.nxv2f64( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD]]) ; -; ARMPL-NEON-LABEL: define void @copysign_f64 +; ARMPL-NEON-LABEL: define void @fma_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @llvm.copysign.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]], <2 x double> [[WIDE_LOAD]]) ; -; ARMPL-SVE-LABEL: define void @copysign_f64 +; ARMPL-SVE-LABEL: define void @fma_f64 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @llvm.copysign.nxv2f64( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @llvm.fma.nxv2f64( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD]]) ; entry: br label %for.body @@ -769,7 +623,7 @@ define void @copysign_f64(ptr noalias %in.ptr, ptr %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv %in = load double, ptr %in.gep, align 8 - %call = tail call double @llvm.copysign.f64(double %in, double %in) + %call = tail call double @llvm.fma.f64(double %in, double %in, double %in) %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv store double %call, ptr %out.gep, align 8 %iv.next = add nuw nsw i64 %iv, 1 @@ -780,22 +634,22 @@ define void @copysign_f64(ptr noalias %in.ptr, ptr %out.ptr) { ret void } -define void @copysign_f32(ptr noalias %in.ptr, ptr %out.ptr) { -; SLEEF-NEON-LABEL: define void @copysign_f32 +define void @fma_f32(ptr noalias %in.ptr, ptr %out.ptr) { +; SLEEF-NEON-LABEL: define void @fma_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @llvm.copysign.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD]]) ; -; SLEEF-SVE-LABEL: define void @copysign_f32 +; SLEEF-SVE-LABEL: define void @fma_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @llvm.copysign.nxv4f32( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @llvm.fma.nxv4f32( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD]]) ; -; ARMPL-NEON-LABEL: define void @copysign_f32 +; ARMPL-NEON-LABEL: define void @fma_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @llvm.copysign.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD]]) ; -; ARMPL-SVE-LABEL: define void @copysign_f32 +; ARMPL-SVE-LABEL: define void @fma_f32 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @llvm.copysign.nxv4f32( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @llvm.fma.nxv4f32( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD]]) ; entry: br label %for.body @@ -804,7 +658,7 @@ define void @copysign_f32(ptr noalias %in.ptr, ptr %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv %in = load float, ptr %in.gep, align 8 - %call = tail call float @llvm.copysign.f32(float %in, float %in) + %call = tail call float @llvm.fma.f32(float %in, float %in, float %in) %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv store float %call, ptr %out.gep, align 4 %iv.next = add nuw nsw i64 %iv, 1 @@ -815,25 +669,25 @@ define void @copysign_f32(ptr noalias %in.ptr, ptr %out.ptr) { ret void } -declare double @llvm.fabs.f64(double) -declare float @llvm.fabs.f32(float) +declare double @llvm.log.f64(double) +declare float @llvm.log.f32(float) -define void @fabs_f64(ptr noalias %in.ptr, ptr %out.ptr) { -; SLEEF-NEON-LABEL: define void @fabs_f64 +define void @log_f64(ptr noalias %in.ptr, ptr %out.ptr) { +; SLEEF-NEON-LABEL: define void @log_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_log(<2 x double> [[WIDE_LOAD:%.*]]) ; -; SLEEF-SVE-LABEL: define void @fabs_f64 +; SLEEF-SVE-LABEL: define void @log_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @llvm.fabs.nxv2f64( [[WIDE_MASKED_LOAD:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_log( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; -; ARMPL-NEON-LABEL: define void @fabs_f64 +; ARMPL-NEON-LABEL: define void @log_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vlogq_f64(<2 x double> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @fabs_f64 +; ARMPL-SVE-LABEL: define void @log_f64 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @llvm.fabs.nxv2f64( [[WIDE_MASKED_LOAD:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svlog_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -842,7 +696,7 @@ define void @fabs_f64(ptr noalias %in.ptr, ptr %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv %in = load double, ptr %in.gep, align 8 - %call = tail call double @llvm.fabs.f64(double %in) + %call = tail call double @llvm.log.f64(double %in) %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv store double %call, ptr %out.gep, align 8 %iv.next = add nuw nsw i64 %iv, 1 @@ -853,22 +707,22 @@ define void @fabs_f64(ptr noalias %in.ptr, ptr %out.ptr) { ret void } -define void @fabs_f32(ptr noalias %in.ptr, ptr %out.ptr) { -; SLEEF-NEON-LABEL: define void @fabs_f32 +define void @log_f32(ptr noalias %in.ptr, ptr %out.ptr) { +; SLEEF-NEON-LABEL: define void @log_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_logf(<4 x float> [[WIDE_LOAD:%.*]]) ; -; SLEEF-SVE-LABEL: define void @fabs_f32 +; SLEEF-SVE-LABEL: define void @log_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @llvm.fabs.nxv4f32( [[WIDE_MASKED_LOAD:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_logf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; -; ARMPL-NEON-LABEL: define void @fabs_f32 +; ARMPL-NEON-LABEL: define void @log_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vlogq_f32(<4 x float> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @fabs_f32 +; ARMPL-SVE-LABEL: define void @log_f32 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @llvm.fabs.nxv4f32( [[WIDE_MASKED_LOAD:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svlog_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -877,7 +731,7 @@ define void @fabs_f32(ptr noalias %in.ptr, ptr %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv %in = load float, ptr %in.gep, align 8 - %call = tail call float @llvm.fabs.f32(float %in) + %call = tail call float @llvm.log.f32(float %in) %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv store float %call, ptr %out.gep, align 4 %iv.next = add nuw nsw i64 %iv, 1 @@ -888,25 +742,25 @@ define void @fabs_f32(ptr noalias %in.ptr, ptr %out.ptr) { ret void } -declare double @llvm.floor.f64(double) -declare float @llvm.floor.f32(float) +declare double @llvm.log10.f64(double) +declare float @llvm.log10.f32(float) -define void @floor_f64(ptr noalias %in.ptr, ptr %out.ptr) { -; SLEEF-NEON-LABEL: define void @floor_f64 +define void @log10_f64(ptr noalias %in.ptr, ptr %out.ptr) { +; SLEEF-NEON-LABEL: define void @log10_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_log10(<2 x double> [[WIDE_LOAD:%.*]]) ; -; SLEEF-SVE-LABEL: define void @floor_f64 +; SLEEF-SVE-LABEL: define void @log10_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @llvm.floor.nxv2f64( [[WIDE_MASKED_LOAD:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_log10( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; -; ARMPL-NEON-LABEL: define void @floor_f64 +; ARMPL-NEON-LABEL: define void @log10_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vlog10q_f64(<2 x double> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @floor_f64 +; ARMPL-SVE-LABEL: define void @log10_f64 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @llvm.floor.nxv2f64( [[WIDE_MASKED_LOAD:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svlog10_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -915,7 +769,7 @@ define void @floor_f64(ptr noalias %in.ptr, ptr %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv %in = load double, ptr %in.gep, align 8 - %call = tail call double @llvm.floor.f64(double %in) + %call = tail call double @llvm.log10.f64(double %in) %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv store double %call, ptr %out.gep, align 8 %iv.next = add nuw nsw i64 %iv, 1 @@ -926,22 +780,22 @@ define void @floor_f64(ptr noalias %in.ptr, ptr %out.ptr) { ret void } -define void @floor_f32(ptr noalias %in.ptr, ptr %out.ptr) { -; SLEEF-NEON-LABEL: define void @floor_f32 +define void @log10_f32(ptr noalias %in.ptr, ptr %out.ptr) { +; SLEEF-NEON-LABEL: define void @log10_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_log10f(<4 x float> [[WIDE_LOAD:%.*]]) ; -; SLEEF-SVE-LABEL: define void @floor_f32 +; SLEEF-SVE-LABEL: define void @log10_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @llvm.floor.nxv4f32( [[WIDE_MASKED_LOAD:%.*]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_log10f( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; -; ARMPL-NEON-LABEL: define void @floor_f32 +; ARMPL-NEON-LABEL: define void @log10_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vlog10q_f32(<4 x float> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @floor_f32 +; ARMPL-SVE-LABEL: define void @log10_f32 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @llvm.floor.nxv4f32( [[WIDE_MASKED_LOAD:%.*]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svlog10_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -950,7 +804,7 @@ define void @floor_f32(ptr noalias %in.ptr, ptr %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv %in = load float, ptr %in.gep, align 8 - %call = tail call float @llvm.floor.f32(float %in) + %call = tail call float @llvm.log10.f32(float %in) %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv store float %call, ptr %out.gep, align 4 %iv.next = add nuw nsw i64 %iv, 1 @@ -961,25 +815,25 @@ define void @floor_f32(ptr noalias %in.ptr, ptr %out.ptr) { ret void } -declare double @llvm.fma.f64(double, double, double) -declare float @llvm.fma.f32(float, float, float) +declare double @llvm.log2.f64(double) +declare float @llvm.log2.f32(float) -define void @fma_f64(ptr noalias %in.ptr, ptr %out.ptr) { -; SLEEF-NEON-LABEL: define void @fma_f64 +define void @log2_f64(ptr noalias %in.ptr, ptr %out.ptr) { +; SLEEF-NEON-LABEL: define void @log2_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]], <2 x double> [[WIDE_LOAD]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_log2(<2 x double> [[WIDE_LOAD:%.*]]) ; -; SLEEF-SVE-LABEL: define void @fma_f64 +; SLEEF-SVE-LABEL: define void @log2_f64 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @llvm.fma.nxv2f64( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_log2( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; -; ARMPL-NEON-LABEL: define void @fma_f64 +; ARMPL-NEON-LABEL: define void @log2_f64 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]], <2 x double> [[WIDE_LOAD]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vlog2q_f64(<2 x double> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @fma_f64 +; ARMPL-SVE-LABEL: define void @log2_f64 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @llvm.fma.nxv2f64( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svlog2_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -988,7 +842,7 @@ define void @fma_f64(ptr noalias %in.ptr, ptr %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv %in = load double, ptr %in.gep, align 8 - %call = tail call double @llvm.fma.f64(double %in, double %in, double %in) + %call = tail call double @llvm.log2.f64(double %in) %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv store double %call, ptr %out.gep, align 8 %iv.next = add nuw nsw i64 %iv, 1 @@ -999,22 +853,22 @@ define void @fma_f64(ptr noalias %in.ptr, ptr %out.ptr) { ret void } -define void @fma_f32(ptr noalias %in.ptr, ptr %out.ptr) { -; SLEEF-NEON-LABEL: define void @fma_f32 +define void @log2_f32(ptr noalias %in.ptr, ptr %out.ptr) { +; SLEEF-NEON-LABEL: define void @log2_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD]]) +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_log2f(<4 x float> [[WIDE_LOAD:%.*]]) ; -; SLEEF-SVE-LABEL: define void @fma_f32 +; SLEEF-SVE-LABEL: define void @log2_f32 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; SLEEF-SVE: [[TMP15:%.*]] = call @llvm.fma.nxv4f32( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD]]) +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_log2f( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; -; ARMPL-NEON-LABEL: define void @fma_f32 +; ARMPL-NEON-LABEL: define void @log2_f32 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD]]) +; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vlog2q_f32(<4 x float> [[WIDE_LOAD:%.*]]) ; -; ARMPL-SVE-LABEL: define void @fma_f32 +; ARMPL-SVE-LABEL: define void @log2_f32 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { -; ARMPL-SVE: [[TMP15:%.*]] = call @llvm.fma.nxv4f32( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD]]) +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svlog2_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; entry: br label %for.body @@ -1023,7 +877,7 @@ define void @fma_f32(ptr noalias %in.ptr, ptr %out.ptr) { %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv %in = load float, ptr %in.gep, align 8 - %call = tail call float @llvm.fma.f32(float %in, float %in, float %in) + %call = tail call float @llvm.log2.f32(float %in) %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv store float %call, ptr %out.gep, align 4 %iv.next = add nuw nsw i64 %iv, 1 @@ -1253,6 +1107,79 @@ define void @nearbyint_f32(ptr noalias %in.ptr, ptr %out.ptr) { ret void } +declare double @llvm.pow.f64(double, double) +declare float @llvm.pow.f32(float, float) + +define void @pow_f64(ptr noalias %in.ptr, ptr %out.ptr) { +; SLEEF-NEON-LABEL: define void @pow_f64 +; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2vv_pow(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) +; +; SLEEF-SVE-LABEL: define void @pow_f64 +; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxvv_pow( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) +; +; ARMPL-NEON-LABEL: define void @pow_f64 +; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vpowq_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) +; +; ARMPL-SVE-LABEL: define void @pow_f64 +; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svpow_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv + %in = load double, ptr %in.gep, align 8 + %call = tail call double @llvm.pow.f64(double %in, double %in) + %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv + store double %call, ptr %out.gep, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @pow_f32(ptr noalias %in.ptr, ptr %out.ptr) { +; SLEEF-NEON-LABEL: define void @pow_f32 +; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4vv_powf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) +; +; SLEEF-SVE-LABEL: define void @pow_f32 +; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxvv_powf( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) +; +; ARMPL-NEON-LABEL: define void @pow_f32 +; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vpowq_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) +; +; ARMPL-SVE-LABEL: define void @pow_f32 +; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svpow_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv + %in = load float, ptr %in.gep, align 8 + %call = tail call float @llvm.pow.f32(float %in, float %in) + %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv + store float %call, ptr %out.gep, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + declare double @llvm.rint.f64(double) declare float @llvm.rint.f32(float) @@ -1399,6 +1326,79 @@ define void @round_f32(ptr noalias %in.ptr, ptr %out.ptr) { ret void } +declare double @llvm.sin.f64(double) +declare float @llvm.sin.f32(float) + +define void @sin_f64(ptr noalias %in.ptr, ptr %out.ptr) { +; SLEEF-NEON-LABEL: define void @sin_f64 +; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_sin(<2 x double> [[WIDE_LOAD:%.*]]) +; +; SLEEF-SVE-LABEL: define void @sin_f64 +; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_sin( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; +; ARMPL-NEON-LABEL: define void @sin_f64 +; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vsinq_f64(<2 x double> [[WIDE_LOAD:%.*]]) +; +; ARMPL-SVE-LABEL: define void @sin_f64 +; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svsin_f64_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv + %in = load double, ptr %in.gep, align 8 + %call = tail call double @llvm.sin.f64(double %in) + %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv + store double %call, ptr %out.gep, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @sin_f32(ptr noalias %in.ptr, ptr %out.ptr) { +; SLEEF-NEON-LABEL: define void @sin_f32 +; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_sinf(<4 x float> [[WIDE_LOAD:%.*]]) +; +; SLEEF-SVE-LABEL: define void @sin_f32 +; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; SLEEF-SVE: [[TMP15:%.*]] = call @_ZGVsMxv_sinf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; +; ARMPL-NEON-LABEL: define void @sin_f32 +; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vsinq_f32(<4 x float> [[WIDE_LOAD:%.*]]) +; +; ARMPL-SVE-LABEL: define void @sin_f32 +; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; ARMPL-SVE: [[TMP15:%.*]] = call @armpl_svsin_f32_x( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv + %in = load float, ptr %in.gep, align 8 + %call = tail call float @llvm.sin.f32(float %in) + %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv + store float %call, ptr %out.gep, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + declare double @llvm.sqrt.f64(double) declare float @llvm.sqrt.f32(float) @@ -1544,4 +1544,3 @@ define void @trunc_f32(ptr noalias %in.ptr, ptr %out.ptr) { for.end: ret void } -