Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AArch64] Set the default streaming hazard size to 1024 for +sme,+sve #123753

Merged
merged 3 commits into from
Jan 22, 2025
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion llvm/lib/Target/AArch64/AArch64Subtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,10 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
IsLittle(LittleEndian), IsStreaming(IsStreaming),
IsStreamingCompatible(IsStreamingCompatible),
StreamingHazardSize(AArch64StreamingHazardSize),
StreamingHazardSize(
AArch64StreamingHazardSize.getNumOccurrences() > 0
? std::optional<unsigned>(AArch64StreamingHazardSize)
: std::nullopt),
MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride),
MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT),
InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU, HasMinSize)),
Expand Down
11 changes: 9 additions & 2 deletions llvm/lib/Target/AArch64/AArch64Subtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {

bool IsStreaming;
bool IsStreamingCompatible;
unsigned StreamingHazardSize;
std::optional<unsigned> StreamingHazardSize;
unsigned MinSVEVectorSizeInBits;
unsigned MaxSVEVectorSizeInBits;
unsigned VScaleForTuning = 1;
Expand Down Expand Up @@ -179,7 +179,14 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {

/// Returns the size of memory region that if accessed by both the CPU and
/// the SME unit could result in a hazard. 0 = disabled.
unsigned getStreamingHazardSize() const { return StreamingHazardSize; }
unsigned getStreamingHazardSize() const {
// If StreamingHazardSize has been explicitly set to a value, use that.
// Otherwise, default to 1024 bytes when both SME and SVE are available
// (without FA64), for all other configurations default to no streaming
// hazards.
MacDue marked this conversation as resolved.
Show resolved Hide resolved
MacDue marked this conversation as resolved.
Show resolved Hide resolved
return StreamingHazardSize.value_or(
!hasSMEFA64() && hasSME() && hasSVE() ? 1024 : 0);
}

/// Returns true if the target has NEON and the function at runtime is known
/// to have NEON enabled (e.g. the function is known not to be in streaming-SVE
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 -enable-machine-outliner -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 -enable-machine-outliner -verify-machineinstrs < %s | FileCheck %s -check-prefix=OUTLINER
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 -enable-machine-outliner -aarch64-streaming-hazard-size=0 -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 -enable-machine-outliner -aarch64-streaming-hazard-size=0 -verify-machineinstrs < %s | FileCheck %s -check-prefix=OUTLINER

declare void @callee();

Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AArch64/sme-callee-save-restore-pairs.ll
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -mattr=+sve -aarch64-disable-multivector-spill-fill -verify-machineinstrs < %s | FileCheck %s --check-prefixes=NOPAIR
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -mattr=+sve -verify-machineinstrs < %s | FileCheck %s --check-prefixes=NOPAIR
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -mattr=+sve -verify-machineinstrs < %s | FileCheck %s --check-prefixes=PAIR
; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sme2 -mattr=+sve -aarch64-disable-multivector-spill-fill -verify-machineinstrs < %s | FileCheck %s --check-prefixes=NOPAIR
; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sme -mattr=+sve -verify-machineinstrs < %s | FileCheck %s --check-prefixes=NOPAIR
; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sme2 -mattr=+sve -verify-machineinstrs < %s | FileCheck %s --check-prefixes=PAIR

declare void @my_func()
declare void @my_func2(<vscale x 16 x i8> %v)
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AArch64/sme-darwin-sve-vg.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
; RUN: llc -mtriple=aarch64-darwin -mattr=+sve -mattr=+sme -enable-aarch64-sme-peephole-opt=false -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=aarch64-darwin -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme -enable-aarch64-sme-peephole-opt=false -verify-machineinstrs < %s | FileCheck %s

declare void @normal_callee();

Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -fast-isel=true -global-isel=false -fast-isel-abort=0 -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 < %s \
; RUN: llc -fast-isel=true -aarch64-streaming-hazard-size=0 -global-isel=false -fast-isel-abort=0 -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 < %s \
; RUN: | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-FISEL
; RUN: llc -fast-isel=false -global-isel=true -global-isel-abort=0 -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 < %s \
; RUN: llc -fast-isel=false -aarch64-streaming-hazard-size=0 -global-isel=true -global-isel-abort=0 -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 < %s \
; RUN: | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-GISEL


Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64 -mattr=+sve -mattr=+sme < %s | FileCheck %s
; RUN: llc -mtriple=aarch64 -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme < %s | FileCheck %s

declare void @private_za_callee()
declare float @llvm.cos.f32(float)
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AArch64/sme-peephole-opts.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+sme2 < %s | FileCheck %s
; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve,+sme2 < %s | FileCheck %s

declare void @callee()
declare void @callee_farg(float)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc < %s | FileCheck %s
; RUN: llc -aarch64-streaming-hazard-size=0 < %s | FileCheck %s

target triple = "aarch64-unknown-unknown-eabi-elf"

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme -start-after=simplifycfg -enable-tail-merge=false -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme -start-after=simplifycfg -enable-tail-merge=false -verify-machineinstrs < %s | FileCheck %s

declare void @normal_callee();
declare void @streaming_callee() "aarch64_pstate_sm_enabled";
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AArch64/sme-streaming-body.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme -start-after=simplifycfg -enable-tail-merge=false -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme -start-after=simplifycfg -enable-tail-merge=false -verify-machineinstrs < %s | FileCheck %s

declare void @normal_callee();
declare void @streaming_callee() "aarch64_pstate_sm_enabled";
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -mattr=+sve -mattr=+sme < %s | FileCheck %s
; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -verify-machineinstrs -mattr=+sve -mattr=+sme < %s | FileCheck %s

; This file tests the following combinations related to streaming-enabled functions:
; [ ] N -> SC (Normal -> Streaming-compatible)
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AArch64/sme-streaming-interface.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme -verify-machineinstrs < %s | FileCheck %s

; This file tests the following combinations related to streaming-enabled functions:
; [ ] N -> S (Normal -> Streaming)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc < %s | FileCheck %s
; RUN: llc -aarch64-streaming-hazard-size=0 < %s | FileCheck %s

target triple = "aarch64"

Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 -frame-pointer=non-leaf -verify-machineinstrs < %s | FileCheck %s --check-prefix=FP-CHECK
; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme2 -frame-pointer=non-leaf -verify-machineinstrs < %s | FileCheck %s --check-prefix=FP-CHECK
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -frame-pointer=non-leaf -verify-machineinstrs < %s | FileCheck %s --check-prefix=NO-SVE-CHECK
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 -verify-machineinstrs -enable-machine-outliner < %s | FileCheck %s --check-prefix=OUTLINER-CHECK
; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme2 -verify-machineinstrs -enable-machine-outliner < %s | FileCheck %s --check-prefix=OUTLINER-CHECK

declare void @callee();
declare void @fixed_callee(<4 x i32>);
Expand Down
57 changes: 57 additions & 0 deletions llvm/test/CodeGen/AArch64/stack-hazard-defaults.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=aarch64 -mattr=+sme -aarch64-stack-hazard-size=0 | FileCheck %s --check-prefix=CHECK0
; RUN: llc < %s -mtriple=aarch64 -mattr=+sme -aarch64-stack-hazard-size=1024 | FileCheck %s --check-prefix=CHECK1024

;; The following run lines check the default values for aarch64-stack-hazard-size/aarch64-streaming-hazard-size.

;; When +sme,+sve is set the hazard size should default to 1024.
; RUN: llc < %s -mtriple=aarch64 -mattr=+sme -mattr=+sve | FileCheck %s --check-prefix=CHECK1024

;; The hazard size can still be overridden/disabled when +sme,+sve is set.
; RUN: llc < %s -mtriple=aarch64 -mattr=+sme -mattr=+sve -aarch64-stack-hazard-size=0 | FileCheck %s --check-prefix=CHECK0

;; When +sme-fa64 is set alongside +sme,+sve the default hazard size should be 0.
; RUN: llc < %s -mtriple=aarch64 -mattr=+sme-fa64 -mattr=+sme -mattr=+sve | FileCheck %s --check-prefix=CHECK0

;; When +sme is set (without +sve) the default hazard size should be 0.
; RUN: llc < %s -mtriple=aarch64 -mattr=+sme | FileCheck %s --check-prefix=CHECK0

define i32 @spill_fpr_with_gpr_stack_object(i64 %d) "aarch64_pstate_sm_compatible" {
; CHECK0-LABEL: spill_fpr_with_gpr_stack_object:
; CHECK0: // %bb.0: // %entry
; CHECK0-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
; CHECK0-NEXT: .cfi_def_cfa_offset 16
; CHECK0-NEXT: .cfi_offset b8, -16
; CHECK0-NEXT: mov x8, x0
; CHECK0-NEXT: mov w0, wzr
; CHECK0-NEXT: //APP
; CHECK0-NEXT: //NO_APP
; CHECK0-NEXT: str x8, [sp, #8]
; CHECK0-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload
; CHECK0-NEXT: ret
;
; CHECK1024-LABEL: spill_fpr_with_gpr_stack_object:
; CHECK1024: // %bb.0: // %entry
; CHECK1024-NEXT: sub sp, sp, #1040
; CHECK1024-NEXT: str d8, [sp] // 8-byte Folded Spill
; CHECK1024-NEXT: str x29, [sp, #1032] // 8-byte Folded Spill
; CHECK1024-NEXT: sub sp, sp, #1040
; CHECK1024-NEXT: .cfi_def_cfa_offset 2080
; CHECK1024-NEXT: .cfi_offset w29, -8
; CHECK1024-NEXT: .cfi_offset b8, -1040
; CHECK1024-NEXT: mov x8, x0
; CHECK1024-NEXT: mov w0, wzr
; CHECK1024-NEXT: //APP
; CHECK1024-NEXT: //NO_APP
; CHECK1024-NEXT: str x8, [sp, #8]
; CHECK1024-NEXT: add sp, sp, #1040
; CHECK1024-NEXT: ldr x29, [sp, #1032] // 8-byte Folded Reload
; CHECK1024-NEXT: ldr d8, [sp] // 8-byte Folded Reload
; CHECK1024-NEXT: add sp, sp, #1040
; CHECK1024-NEXT: ret
entry:
%a = alloca i64
tail call void asm sideeffect "", "~{d8}"() #1
store i64 %d, ptr %a
ret i32 0
}
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AArch64/streaming-compatible-memory-ops.ll
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 -verify-machineinstrs -aarch64-lower-to-sme-routines=false < %s | FileCheck %s -check-prefixes=CHECK-NO-SME-ROUTINES
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 -mattr=+mops -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK-MOPS
; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK
; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme2 -verify-machineinstrs -aarch64-lower-to-sme-routines=false < %s | FileCheck %s -check-prefixes=CHECK-NO-SME-ROUTINES
; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme2 -mattr=+mops -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK-MOPS

@dst = global [512 x i8] zeroinitializer, align 1
@src = global [512 x i8] zeroinitializer, align 1
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 | FileCheck %s --check-prefixes=CHECK
; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -pass-remarks-analysis=stack-frame-layout 2>&1 >/dev/null | FileCheck %s --check-prefixes=CHECK-FRAMELAYOUT
; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -aarch64-streaming-hazard-size=0 | FileCheck %s --check-prefixes=CHECK
; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -aarch64-streaming-hazard-size=0 -pass-remarks-analysis=stack-frame-layout 2>&1 >/dev/null | FileCheck %s --check-prefixes=CHECK-FRAMELAYOUT

; CHECK-FRAMELAYOUT-LABEL: Function: csr_d8_allocnxv4i32i32f64
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-8], Type: Spill, Align: 8, Size: 8
Expand Down
Loading