Skip to content

Commit 07eb824

Browse files
sdesmalen-armagozillon
authored andcommitted
[AArch64] Replace LLVM IR function attributes for PSTATE.ZA. (llvm#79166)
Since ARM-software/acle#276 the ACLE defines attributes to better describe the use of a given SME state. Previously the attributes merely described the possibility of it being 'shared' or 'preserved', whereas the new attributes have more semantics and also describe how the data flows through the program. For ZT0 we already had to add new LLVM IR attributes: * aarch64_new_zt0 * aarch64_in_zt0 * aarch64_out_zt0 * aarch64_inout_zt0 * aarch64_preserves_zt0 We have now done the same for ZA, such that we add: * aarch64_new_za (previously `aarch64_pstate_za_new`) * aarch64_in_za (more specific variation of `aarch64_pstate_za_shared`) * aarch64_out_za (more specific variation of `aarch64_pstate_za_shared`) * aarch64_inout_za (more specific variation of `aarch64_pstate_za_shared`) * aarch64_preserves_za (previously `aarch64_pstate_za_shared, aarch64_pstate_za_preserved`) This explicitly removes 'pstate' from the name, because with SME2 and the new ACLE attributes there is a difference between "sharing ZA" (sharing the ZA matrix register with the caller) and "sharing PSTATE.ZA" (sharing either the ZA or ZT0 register, both part of PSTATE.ZA with the caller).
1 parent 2002d69 commit 07eb824

File tree

28 files changed

+350
-211
lines changed

28 files changed

+350
-211
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10676,10 +10676,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
1067610676
llvm::FunctionType::get(StructType::get(CGM.Int64Ty, CGM.Int64Ty), {},
1067710677
false),
1067810678
"__arm_sme_state"));
10679-
auto Attrs =
10680-
AttributeList()
10681-
.addFnAttribute(getLLVMContext(), "aarch64_pstate_sm_compatible")
10682-
.addFnAttribute(getLLVMContext(), "aarch64_pstate_za_preserved");
10679+
auto Attrs = AttributeList().addFnAttribute(getLLVMContext(),
10680+
"aarch64_pstate_sm_compatible");
1068310681
CI->setAttributes(Attrs);
1068410682
CI->setCallingConv(
1068510683
llvm::CallingConv::

clang/lib/CodeGen/CGCall.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1774,14 +1774,14 @@ static void AddAttributesFromFunctionProtoType(ASTContext &Ctx,
17741774
FuncAttrs.addAttribute("aarch64_pstate_sm_compatible");
17751775

17761776
// ZA
1777-
if (FunctionType::getArmZAState(SMEBits) == FunctionType::ARM_Out ||
1778-
FunctionType::getArmZAState(SMEBits) == FunctionType::ARM_InOut)
1779-
FuncAttrs.addAttribute("aarch64_pstate_za_shared");
1780-
if (FunctionType::getArmZAState(SMEBits) == FunctionType::ARM_Preserves ||
1781-
FunctionType::getArmZAState(SMEBits) == FunctionType::ARM_In) {
1782-
FuncAttrs.addAttribute("aarch64_pstate_za_shared");
1783-
FuncAttrs.addAttribute("aarch64_pstate_za_preserved");
1784-
}
1777+
if (FunctionType::getArmZAState(SMEBits) == FunctionType::ARM_Preserves)
1778+
FuncAttrs.addAttribute("aarch64_preserves_za");
1779+
if (FunctionType::getArmZAState(SMEBits) == FunctionType::ARM_In)
1780+
FuncAttrs.addAttribute("aarch64_in_za");
1781+
if (FunctionType::getArmZAState(SMEBits) == FunctionType::ARM_Out)
1782+
FuncAttrs.addAttribute("aarch64_out_za");
1783+
if (FunctionType::getArmZAState(SMEBits) == FunctionType::ARM_InOut)
1784+
FuncAttrs.addAttribute("aarch64_inout_za");
17851785

17861786
// ZT0
17871787
if (FunctionType::getArmZT0State(SMEBits) == FunctionType::ARM_Preserves)

clang/lib/CodeGen/CodeGenModule.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2414,7 +2414,7 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
24142414

24152415
if (auto *Attr = D->getAttr<ArmNewAttr>()) {
24162416
if (Attr->isNewZA())
2417-
B.addAttribute("aarch64_pstate_za_new");
2417+
B.addAttribute("aarch64_new_za");
24182418
if (Attr->isNewZT0())
24192419
B.addAttribute("aarch64_new_zt0");
24202420
}

clang/test/CodeGen/aarch64-sme-intrinsics/aarch64-sme-attrs.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -284,20 +284,20 @@ int test_variadic_template() __arm_inout("za") {
284284
// CHECK: attributes #[[SM_COMPATIBLE]] = { mustprogress noinline nounwind "aarch64_pstate_sm_compatible" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" }
285285
// CHECK: attributes #[[SM_COMPATIBLE_DECL]] = { "aarch64_pstate_sm_compatible" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" }
286286
// CHECK: attributes #[[SM_BODY]] = { mustprogress noinline nounwind "aarch64_pstate_sm_body" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" }
287-
// CHECK: attributes #[[ZA_SHARED]] = { mustprogress noinline nounwind "aarch64_pstate_za_shared" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" }
288-
// CHECK: attributes #[[ZA_SHARED_DECL]] = { "aarch64_pstate_za_shared" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" }
289-
// CHECK: attributes #[[ZA_PRESERVED]] = { mustprogress noinline nounwind "aarch64_pstate_za_preserved" "aarch64_pstate_za_shared" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" }
290-
// CHECK: attributes #[[ZA_PRESERVED_DECL]] = { "aarch64_pstate_za_preserved" "aarch64_pstate_za_shared" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" }
291-
// CHECK: attributes #[[ZA_NEW]] = { mustprogress noinline nounwind "aarch64_pstate_za_new" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" }
287+
// CHECK: attributes #[[ZA_SHARED]] = { mustprogress noinline nounwind "aarch64_inout_za" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" }
288+
// CHECK: attributes #[[ZA_SHARED_DECL]] = { "aarch64_inout_za" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" }
289+
// CHECK: attributes #[[ZA_PRESERVED]] = { mustprogress noinline nounwind "aarch64_preserves_za" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" }
290+
// CHECK: attributes #[[ZA_PRESERVED_DECL]] = { "aarch64_preserves_za" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" }
291+
// CHECK: attributes #[[ZA_NEW]] = { mustprogress noinline nounwind "aarch64_new_za" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" }
292292
// CHECK: attributes #[[NORMAL_DEF]] = { mustprogress noinline nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" }
293293
// CHECK: attributes #[[SM_ENABLED_CALL]] = { "aarch64_pstate_sm_enabled" }
294294
// CHECK: attributes #[[SM_COMPATIBLE_CALL]] = { "aarch64_pstate_sm_compatible" }
295295
// CHECK: attributes #[[SM_BODY_CALL]] = { "aarch64_pstate_sm_body" }
296-
// CHECK: attributes #[[ZA_SHARED_CALL]] = { "aarch64_pstate_za_shared" }
297-
// CHECK: attributes #[[ZA_PRESERVED_CALL]] = { "aarch64_pstate_za_preserved" "aarch64_pstate_za_shared" }
296+
// CHECK: attributes #[[ZA_SHARED_CALL]] = { "aarch64_inout_za" }
297+
// CHECK: attributes #[[ZA_PRESERVED_CALL]] = { "aarch64_preserves_za" }
298298
// CHECK: attributes #[[NOUNWIND_CALL]] = { nounwind }
299299
// CHECK: attributes #[[NOUNWIND_SM_ENABLED_CALL]] = { nounwind "aarch64_pstate_sm_enabled" }
300300
// CHECK: attributes #[[NOUNWIND_SM_COMPATIBLE_CALL]] = { nounwind "aarch64_pstate_sm_compatible" }
301-
// CHECK: attributes #[[NOUNWIND_ZA_SHARED_CALL]] = { nounwind "aarch64_pstate_za_shared" }
302-
// CHECK: attributes #[[NOUNWIND_ZA_PRESERVED_CALL]] = { nounwind "aarch64_pstate_za_preserved" "aarch64_pstate_za_shared" }
301+
// CHECK: attributes #[[NOUNWIND_ZA_SHARED_CALL]] = { nounwind "aarch64_inout_za" }
302+
// CHECK: attributes #[[NOUNWIND_ZA_PRESERVED_CALL]] = { nounwind "aarch64_preserves_za" }
303303

clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_zero.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,13 +55,13 @@ void test_svzero_mask_za_2(void) __arm_inout("za") {
5555
}
5656

5757
// CHECK-C-LABEL: define dso_local void @test_svzero_za(
58-
// CHECK-C-SAME: ) local_unnamed_addr #[[ATTR0]] {
58+
// CHECK-C-SAME: ) local_unnamed_addr #[[ATTR2:[0-9]+]] {
5959
// CHECK-C-NEXT: entry:
6060
// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.zero(i32 255)
6161
// CHECK-C-NEXT: ret void
6262
//
6363
// CHECK-CXX-LABEL: define dso_local void @_Z14test_svzero_zav(
64-
// CHECK-CXX-SAME: ) local_unnamed_addr #[[ATTR0]] {
64+
// CHECK-CXX-SAME: ) local_unnamed_addr #[[ATTR2:[0-9]+]] {
6565
// CHECK-CXX-NEXT: entry:
6666
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.zero(i32 255)
6767
// CHECK-CXX-NEXT: ret void

clang/test/Modules/aarch64-sme-keywords.cppm

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,14 +43,14 @@ import A;
4343
//
4444
// CHECK:declare void @_ZW1A22f_streaming_compatiblev() #[[STREAMING_COMPATIBLE_DECL:[0-9]+]]
4545
//
46-
// CHECK-DAG: attributes #[[SHARED_ZA_DEF]] = {{{.*}} "aarch64_pstate_za_shared" {{.*}}}
47-
// CHECK-DAG: attributes #[[SHARED_ZA_DECL]] = {{{.*}} "aarch64_pstate_za_shared" {{.*}}}
48-
// CHECK-DAG: attributes #[[PRESERVES_ZA_DECL]] = {{{.*}} "aarch64_pstate_za_preserved" {{.*}}}
46+
// CHECK-DAG: attributes #[[SHARED_ZA_DEF]] = {{{.*}} "aarch64_inout_za" {{.*}}}
47+
// CHECK-DAG: attributes #[[SHARED_ZA_DECL]] = {{{.*}} "aarch64_inout_za" {{.*}}}
48+
// CHECK-DAG: attributes #[[PRESERVES_ZA_DECL]] = {{{.*}} "aarch64_preserves_za" {{.*}}}
4949
// CHECK-DAG: attributes #[[NORMAL_DEF]] = {{{.*}}}
5050
// CHECK-DAG: attributes #[[STREAMING_DECL]] = {{{.*}} "aarch64_pstate_sm_enabled" {{.*}}}
5151
// CHECK-DAG: attributes #[[STREAMING_COMPATIBLE_DECL]] = {{{.*}} "aarch64_pstate_sm_compatible" {{.*}}}
52-
// CHECK-DAG: attributes #[[SHARED_ZA_USE]] = { "aarch64_pstate_za_shared" }
53-
// CHECK-DAG: attributes #[[PRESERVES_ZA_USE]] = { "aarch64_pstate_za_preserved" "aarch64_pstate_za_shared" }
52+
// CHECK-DAG: attributes #[[SHARED_ZA_USE]] = { "aarch64_inout_za" }
53+
// CHECK-DAG: attributes #[[PRESERVES_ZA_USE]] = { "aarch64_preserves_za" }
5454
// CHECK-DAG: attributes #[[STREAMING_USE]] = { "aarch64_pstate_sm_enabled" }
5555
// CHECK-DAG: attributes #[[STREAMING_COMPATIBLE_USE]] = { "aarch64_pstate_sm_compatible" }
5656

llvm/docs/AArch64SME.rst

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -22,26 +22,32 @@ Below we describe the LLVM IR attributes and their relation to the C/C++
2222
level ACLE attributes:
2323

2424
``aarch64_pstate_sm_enabled``
25-
is used for functions with ``__attribute__((arm_streaming))``
25+
is used for functions with ``__arm_streaming``
2626

2727
``aarch64_pstate_sm_compatible``
28-
is used for functions with ``__attribute__((arm_streaming_compatible))``
28+
is used for functions with ``__arm_streaming_compatible``
2929

3030
``aarch64_pstate_sm_body``
31-
is used for functions with ``__attribute__((arm_locally_streaming))`` and is
31+
is used for functions with ``__arm_locally_streaming`` and is
3232
only valid on function definitions (not declarations)
3333

34-
``aarch64_pstate_za_new``
35-
is used for functions with ``__attribute__((arm_new_za))``
34+
``aarch64_new_za``
35+
is used for functions with ``__arm_new("za")``
3636

37-
``aarch64_pstate_za_shared``
38-
is used for functions with ``__attribute__((arm_shared_za))``
37+
``aarch64_in_za``
38+
is used for functions with ``__arm_in("za")``
3939

40-
``aarch64_pstate_za_preserved``
41-
is used for functions with ``__attribute__((arm_preserves_za))``
40+
``aarch64_out_za``
41+
is used for functions with ``__arm_out("za")``
42+
43+
``aarch64_inout_za``
44+
is used for functions with ``__arm_inout("za")``
45+
46+
``aarch64_preserves_za``
47+
is used for functions with ``__arm_preserves("za")``
4248

4349
``aarch64_expanded_pstate_za``
44-
is used for functions with ``__attribute__((arm_new_za))``
50+
is used for functions with ``__arm_new_za``
4551

4652
Clang must ensure that the above attributes are added both to the
4753
function's declaration/definition as well as to their call-sites. This is
@@ -89,11 +95,10 @@ Restrictions on attributes
8995
* It is not allowed for a function to be decorated with both
9096
``aarch64_pstate_sm_compatible`` and ``aarch64_pstate_sm_enabled``.
9197

92-
* It is not allowed for a function to be decorated with both
93-
``aarch64_pstate_za_new`` and ``aarch64_pstate_za_preserved``.
94-
95-
* It is not allowed for a function to be decorated with both
96-
``aarch64_pstate_za_new`` and ``aarch64_pstate_za_shared``.
98+
* It is not allowed for a function to be decorated with more than one of the
99+
following attributes:
100+
``aarch64_new_za``, ``aarch64_in_za``, ``aarch64_out_za``, ``aarch64_inout_za``,
101+
``aarch64_preserves_za``.
97102

98103
These restrictions also apply in the higher level SME ACLE, which means we can
99104
emit diagnostics in Clang to signal users about incorrect behaviour.
@@ -426,7 +431,7 @@ to toggle PSTATE.ZA using intrinsics. This also makes it simpler to setup a
426431
lazy-save mechanism for calls to private-ZA functions (i.e. functions that may
427432
either directly or indirectly clobber ZA state).
428433

429-
For the purpose of handling functions marked with ``aarch64_pstate_za_new``,
434+
For the purpose of handling functions marked with ``aarch64_new_za``,
430435
we have introduced a new LLVM IR pass (SMEABIPass) that is run just before
431436
SelectionDAG. Any such functions dealt with by this pass are marked with
432437
``aarch64_expanded_pstate_za``.

llvm/lib/IR/Verifier.cpp

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2155,17 +2155,13 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
21552155
V);
21562156
}
21572157

2158-
if (Attrs.hasFnAttr("aarch64_pstate_za_new")) {
2159-
Check(!Attrs.hasFnAttr("aarch64_pstate_za_preserved"),
2160-
"Attributes 'aarch64_pstate_za_new and aarch64_pstate_za_preserved' "
2161-
"are incompatible!",
2162-
V);
2163-
2164-
Check(!Attrs.hasFnAttr("aarch64_pstate_za_shared"),
2165-
"Attributes 'aarch64_pstate_za_new and aarch64_pstate_za_shared' "
2166-
"are incompatible!",
2167-
V);
2168-
}
2158+
Check((Attrs.hasFnAttr("aarch64_new_za") + Attrs.hasFnAttr("aarch64_in_za") +
2159+
Attrs.hasFnAttr("aarch64_inout_za") +
2160+
Attrs.hasFnAttr("aarch64_out_za") +
2161+
Attrs.hasFnAttr("aarch64_preserves_za")) <= 1,
2162+
"Attributes 'aarch64_new_za', 'aarch64_in_za', 'aarch64_out_za', "
2163+
"'aarch64_inout_za' and 'aarch64_preserves_za' are mutually exclusive",
2164+
V);
21692165

21702166
Check(
21712167
(Attrs.hasFnAttr("aarch64_new_zt0") + Attrs.hasFnAttr("aarch64_in_zt0") +

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,
242242
CalleeAttrs.set(SMEAttrs::SM_Enabled, true);
243243
}
244244

245-
if (CalleeAttrs.hasNewZABody())
245+
if (CalleeAttrs.isNewZA())
246246
return false;
247247

248248
if (CallerAttrs.requiresLazySave(CalleeAttrs) ||

llvm/lib/Target/AArch64/SMEABIPass.cpp

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -60,10 +60,8 @@ FunctionPass *llvm::createSMEABIPass() { return new SMEABI(); }
6060
void emitTPIDR2Save(Module *M, IRBuilder<> &Builder) {
6161
auto *TPIDR2SaveTy =
6262
FunctionType::get(Builder.getVoidTy(), {}, /*IsVarArgs=*/false);
63-
auto Attrs =
64-
AttributeList()
65-
.addFnAttribute(M->getContext(), "aarch64_pstate_sm_compatible")
66-
.addFnAttribute(M->getContext(), "aarch64_pstate_za_preserved");
63+
auto Attrs = AttributeList().addFnAttribute(M->getContext(),
64+
"aarch64_pstate_sm_compatible");
6765
FunctionCallee Callee =
6866
M->getOrInsertFunction("__arm_tpidr2_save", TPIDR2SaveTy, Attrs);
6967
CallInst *Call = Builder.CreateCall(Callee);
@@ -78,7 +76,7 @@ void emitTPIDR2Save(Module *M, IRBuilder<> &Builder) {
7876
}
7977

8078
/// This function generates code at the beginning and end of a function marked
81-
/// with either `aarch64_pstate_za_new` or `aarch64_new_zt0`.
79+
/// with either `aarch64_new_za` or `aarch64_new_zt0`.
8280
/// At the beginning of the function, the following code is generated:
8381
/// - Commit lazy-save if active [Private-ZA Interface*]
8482
/// - Enable PSTATE.ZA [Private-ZA Interface]
@@ -133,7 +131,7 @@ bool SMEABI::updateNewStateFunctions(Module *M, Function *F,
133131
Builder.CreateCall(EnableZAIntr->getFunctionType(), EnableZAIntr);
134132
}
135133

136-
if (FnAttrs.hasNewZABody()) {
134+
if (FnAttrs.isNewZA()) {
137135
Function *ZeroIntr =
138136
Intrinsic::getDeclaration(M, Intrinsic::aarch64_sme_zero);
139137
Builder.CreateCall(ZeroIntr->getFunctionType(), ZeroIntr,
@@ -174,7 +172,7 @@ bool SMEABI::runOnFunction(Function &F) {
174172

175173
bool Changed = false;
176174
SMEAttrs FnAttrs(F);
177-
if (FnAttrs.hasNewZABody() || FnAttrs.isNewZT0())
175+
if (FnAttrs.isNewZA() || FnAttrs.isNewZT0())
178176
Changed |= updateNewStateFunctions(M, &F, Builder, FnAttrs);
179177

180178
return Changed;

0 commit comments

Comments
 (0)