Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYCL] Implement SYCL part of sycl_ext_oneapi_prefetch #11458

Merged
merged 26 commits into from
Oct 16, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -619,6 +619,12 @@ bool CompileTimePropertiesPass::transformSYCLPropertiesAnnotation(
if (*Property.first == "sycl-alignment")
continue;

// leave these annotations as is. They will be processed by SPIRVWriter.
KornevNikita marked this conversation as resolved.
Show resolved Hide resolved
if (*Property.first == "sycl-prefetch-hint" ||
KornevNikita marked this conversation as resolved.
Show resolved Hide resolved
*Property.first == "sycl-prefetch-hint-nt") {
return false;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't feel right to me, but it's outside of SYCL RT...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like currently it's the only way to transform these properties into spirv decorations for this pointer. I tried to create spirv.Decorations metadata instead, but the compiler eliminates them with optimization flags.
Annotation also may be eliminated, but it's much less likely.

}

auto DecorIt = SpirvDecorMap.find(*Property.first);
if (DecorIt == SpirvDecorMap.end())
continue;
Expand Down
186 changes: 186 additions & 0 deletions sycl/include/sycl/ext/oneapi/prefetch.hpp
KornevNikita marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
//==--------------- prefetch.hpp --- SYCL prefetch extension ---------------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#pragma once

#include <CL/__spirv/spirv_ops.hpp>
#include <sycl/ext/oneapi/properties/properties.hpp>

namespace sycl {
inline namespace _V1 {
namespace ext::oneapi::experimental {

enum class cache_level {
L1,
L2,
L3,
L4,
};

struct nontemporal;

struct prefetch_hint_key {
template <cache_level Level, typename Hint>
using value_t =
property_value<prefetch_hint_key,
std::integral_constant<cache_level, Level>, Hint>;
};

template <cache_level Level, typename Hint>
inline constexpr prefetch_hint_key::value_t<Level, Hint> prefetch_hint;

inline constexpr prefetch_hint_key::value_t<cache_level::L1, void>
prefetch_hint_L1;
inline constexpr prefetch_hint_key::value_t<cache_level::L2, void>
prefetch_hint_L2;
inline constexpr prefetch_hint_key::value_t<cache_level::L3, void>
prefetch_hint_L3;
inline constexpr prefetch_hint_key::value_t<cache_level::L4, void>
prefetch_hint_L4;

inline constexpr prefetch_hint_key::value_t<cache_level::L1, nontemporal>
prefetch_hint_L1_nt;
inline constexpr prefetch_hint_key::value_t<cache_level::L2, nontemporal>
prefetch_hint_L2_nt;
inline constexpr prefetch_hint_key::value_t<cache_level::L3, nontemporal>
prefetch_hint_L3_nt;
inline constexpr prefetch_hint_key::value_t<cache_level::L4, nontemporal>
prefetch_hint_L4_nt;

template <typename T, cache_level Level, typename Hint>
struct is_valid_property<T, prefetch_hint_key::value_t<Level, Hint>>
: std::bool_constant<std::is_pointer<T>::value> {};
KornevNikita marked this conversation as resolved.
Show resolved Hide resolved

namespace detail {
template <> struct IsCompileTimeProperty<prefetch_hint_key> : std::true_type {};

template <cache_level Level, typename Hint>
struct PropertyMetaInfo<prefetch_hint_key::value_t<Level, Hint>> {
static constexpr const char *name = std::is_same_v<Hint, nontemporal>
? "sycl-prefetch-hint-nt"
: "sycl-prefetch-hint";
static constexpr int value = static_cast<int>(Level);
KornevNikita marked this conversation as resolved.
Show resolved Hide resolved
};

template <typename Properties>
void prefetch_impl(void *ptr, size_t bytes, Properties properties) {
#ifdef __SYCL_DEVICE_ONLY__
KornevNikita marked this conversation as resolved.
Show resolved Hide resolved
auto *ptrGlobalAS = __SYCL_GenericCastToPtrExplicit_ToGlobal<char>(ptr);
__attribute__((opencl_global)) char *ptrAnnotated = nullptr;
if constexpr (!properties.template has_property<prefetch_hint_key>()) {
ptrAnnotated = __builtin_intel_sycl_ptr_annotation(
ptrGlobalAS, "sycl-prefetch-hint", static_cast<int>(cache_level::L1));
} else {
auto prop = properties.template get_property<prefetch_hint_key>();
ptrAnnotated = __builtin_intel_sycl_ptr_annotation(
ptrGlobalAS, PropertyMetaInfo<decltype(prop)>::name,
PropertyMetaInfo<decltype(prop)>::value);
}
__spirv_ocl_prefetch(ptrAnnotated, bytes);
#endif
}
} // namespace detail

template <typename Properties = empty_properties_t>
void prefetch(void *ptr, Properties properties = {}) {
detail::prefetch_impl(ptr, 1, properties);
}

template <typename Properties = empty_properties_t>
void prefetch(void *ptr, size_t bytes, Properties properties = {}) {
detail::prefetch_impl(ptr, bytes, properties);
}

template <typename T, typename Properties = empty_properties_t>
void prefetch(T *ptr, Properties properties = {}) {
prefetch((void *)ptr, sizeof(T), properties);
}

template <typename T, typename Properties = empty_properties_t>
void prefetch(T *ptr, size_t count, Properties properties = {}) {
prefetch((void *)ptr, count * sizeof(T), properties);
}

// Only available if AddressSpace == global_space || AddressSpace ==
// generic_space
KornevNikita marked this conversation as resolved.
Show resolved Hide resolved
template <
access::address_space AddressSpace, access::decorated IsDecorated,
typename Properties = empty_properties_t,
std::enable_if_t<AddressSpace == access::address_space::global_space ||
AddressSpace == access::address_space::generic_space>>
KornevNikita marked this conversation as resolved.
Show resolved Hide resolved
void prefetch(multi_ptr<void, AddressSpace, IsDecorated> ptr,
Properties properties = {}) {
prefetch(ptr.get(), properties);
}

// Only available if AddressSpace == global_space || AddressSpace ==
// generic_space
template <
access::address_space AddressSpace, access::decorated IsDecorated,
typename Properties = empty_properties_t,
std::enable_if_t<AddressSpace == access::address_space::global_space ||
AddressSpace == access::address_space::generic_space>>
void prefetch(multi_ptr<void, AddressSpace, IsDecorated> ptr, size_t bytes,
Properties properties = {}) {
prefetch(ptr.get(), bytes, properties);
}

// Only available if AddressSpace == global_space || AddressSpace ==
// generic_space
template <
typename T, access::address_space AddressSpace,
access::decorated IsDecorated, typename Properties = empty_properties_t,
std::enable_if_t<AddressSpace == access::address_space::global_space ||
AddressSpace == access::address_space::generic_space>>
void prefetch(multi_ptr<T, AddressSpace, IsDecorated> ptr,
Properties properties = {}) {
prefetch(ptr.get(), properties);
}

// Only available if AddressSpace == global_space || AddressSpace ==
// generic_space
template <
typename T, access::address_space AddressSpace,
access::decorated IsDecorated, typename Properties = empty_properties_t,
std::enable_if_t<AddressSpace == access::address_space::global_space ||
AddressSpace == access::address_space::generic_space>>
void prefetch(multi_ptr<T, AddressSpace, IsDecorated> ptr, size_t count,
Properties properties = {}) {
prefetch(ptr.get(), count, properties);
}

// Only available if Dimensions > 0 && (AccessMode == read || AccessMode ==
// read_write)
template <typename DataT, int Dimensions, access_mode AccessMode,
access::placeholder IsPlaceholder,
typename Properties = empty_properties_t,
std::enable_if_t<(Dimensions > 0) &&
(AccessMode == access_mode::read ||
AccessMode == access_mode::read_write)>>
void prefetch(
accessor<DataT, Dimensions, AccessMode, target::device, IsPlaceholder> acc,
id<Dimensions> offset, Properties properties = {}) {
prefetch((void *)&acc[offset], sizeof(DataT), properties);
KornevNikita marked this conversation as resolved.
Show resolved Hide resolved
}

// Only available if Dimensions > 0 && (AccessMode == read || AccessMode ==
// read_write)
template <typename DataT, int Dimensions, access_mode AccessMode,
access::placeholder IsPlaceholder,
typename Properties = empty_properties_t,
std::enable_if_t<(Dimensions > 0) &&
(AccessMode == access_mode::read ||
AccessMode == access_mode::read_write)>>
void prefetch(
accessor<DataT, Dimensions, AccessMode, target::device, IsPlaceholder> acc,
size_t offset, size_t count, Properties properties = {}) {
prefetch((void *)&acc[offset], count * sizeof(DataT), properties);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we know for sure that count elements are consecutive in memory?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point. @Pennycook I'm not sure how it's intended to work in case of N-dim offset. Should we call the __spirv_ocl_prefetch spirv instruction several times for different memory segments in such case or there should be some constraint?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We struggled a bit with multi-dimensional prefetches (see the issues).

Where we landed (for now) is that the block being prefetched is assumed to be contiguous, and only the offset itself is multi-dimensional. It's effectively a shorthand to avoid computing the linear offset from the start of the buffer. Note that the specification says for the multi-dimensional cases:

Effects: Equivalent to prefetch((void*) &acc[offset], sizeof(DataT), properties).
Effects: Equivalent to prefetch((void*) &acc[offset], count, properties).

If somebody requests a multi-dimensional prefetch later, we can describe it with a range parameter in place of a size_t count, and implement it the way you suggested (by calling the instruction multiple times).

}
} // namespace ext::oneapi::experimental
} // namespace _V1
} // namespace sycl
2 changes: 2 additions & 0 deletions sycl/include/sycl/ext/oneapi/properties/properties.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,8 @@ properties(PropertyValueTs... props)
-> properties<typename detail::Sorted<PropertyValueTs...>::type>;
#endif

using empty_properties_t = decltype(properties{});
KornevNikita marked this conversation as resolved.
Show resolved Hide resolved

// Property list traits
template <typename propertiesT> struct is_property_list : std::false_type {};
template <typename... PropertyValueTs>
Expand Down
1 change: 1 addition & 0 deletions sycl/include/sycl/sycl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@
#include <sycl/ext/oneapi/kernel_properties/properties.hpp>
#include <sycl/ext/oneapi/matrix/matrix.hpp>
#include <sycl/ext/oneapi/owner_less.hpp>
#include <sycl/ext/oneapi/prefetch.hpp>
#include <sycl/ext/oneapi/properties/properties.hpp>
#include <sycl/ext/oneapi/properties/property_value.hpp>
#include <sycl/ext/oneapi/sub_group.hpp>
Expand Down
1 change: 1 addition & 0 deletions sycl/source/feature_test.hpp.in
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ inline namespace _V1 {
#define SYCL_EXT_CODEPLAY_MAX_REGISTERS_PER_WORK_GROUP_QUERY 1
#define SYCL_EXT_ONEAPI_DEVICE_GLOBAL 1
#define SYCL_EXT_INTEL_QUEUE_IMMEDIATE_COMMAND_LIST 1
#define SYCL_EXT_ONEAPI_PREFETCH 1

#ifndef __has_include
#define __has_include(x) 0
Expand Down
38 changes: 38 additions & 0 deletions sycl/test/extensions/prefetch.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// RUN: %clangxx -fsycl-device-only -S %s -o - | FileCheck %s

#include <sycl/sycl.hpp>

char data[] = {0, 1, 2, 3};

// CHECK: [[PREFETCH_STR:@.*]] = private unnamed_addr addrspace(1) constant [19 x i8] c"sycl-prefetch-hint\00", section "llvm.metadata"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are these manually written? If so, do we have an utility script to generate them automatically? If yes, one might need to disable instrumentation/use -O1 to get a more readable IR.

// CHECK: [[PREFETCH_LVL0:@.*]] = private unnamed_addr addrspace(1) constant [2 x i8] c"0\00", section "llvm.metadata"
// CHECK: [[ANNOTATION1:@.*]] = private unnamed_addr addrspace(1) constant { ptr addrspace(1), ptr addrspace(1) } { ptr addrspace(1) [[PREFETCH_STR]], ptr addrspace(1) [[PREFETCH_LVL0]] }, section "llvm.metadata"
// CHECK: [[PREFETCH_LVL1:@.*]] = private unnamed_addr addrspace(1) constant [2 x i8] c"1\00", section "llvm.metadata"
// CHECK: [[ANNOTATION2:@.*]] = private unnamed_addr addrspace(1) constant { ptr addrspace(1), ptr addrspace(1) } { ptr addrspace(1) [[PREFETCH_STR]], ptr addrspace(1) [[PREFETCH_LVL1]] }, section "llvm.metadata"
// CHECK: [[PREFETCH_STR_NT:@.*]] = private unnamed_addr addrspace(1) constant [22 x i8] c"sycl-prefetch-hint-nt\00", section "llvm.metadata"
// CHECK: [[PREFETCH_LVL2:@.*]] = private unnamed_addr addrspace(1) constant [2 x i8] c"2\00", section "llvm.metadata"
// CHECK: [[ANNOTATION3:@.*]] = private unnamed_addr addrspace(1) constant { ptr addrspace(1), ptr addrspace(1) } { ptr addrspace(1) [[PREFETCH_STR_NT]], ptr addrspace(1) [[PREFETCH_LVL2]] }, section "llvm.metadata"

int main() {
namespace syclex = sycl::ext::oneapi::experimental;
sycl::queue q;
void *dataPtr = &data;
q.parallel_for(1, [=](sycl::id<1> idx) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this can be single_task. I'd also want to see an E2E test with this used in non-uniform control flow (I don't think the spec prohibits that).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moved to single task - 7afe1c9. I'm going to add E2E tests a bit later when the whole feature (incl. llvm-spirv translator part) is done. Not sure what do you mean by "non-uniform control". Could you please explain a bit?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if (id.get_global_id(0) % 3 == 0))
  syclex::prefetch(p);

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now that you've added joint_prefetch that just delegates to the per-WI one, I'm even more concerned about non-uniform control flow scenario...

// CHECK: [[CASTED:%.*]] = tail call spir_func noundef ptr addrspace(1) @_Z41__spirv_GenericCastToPtrExplicit_ToGlobal{{.*}}

// CHECK: [[ANNOTATED1:%.*]] = tail call ptr addrspace(1) @llvm.ptr.annotation.p1.p1(ptr addrspace(1) [[CASTED]], ptr addrspace(1) {{.*}}, ptr addrspace(1) {{.*}}, i32 76, ptr addrspace(1) [[ANNOTATION1]])
// CHECK: tail call spir_func void @_Z20__spirv_ocl_prefetch{{.*}}(ptr addrspace(1) noundef [[ANNOTATED1]], i64 noundef 1)
syclex::prefetch(dataPtr);

// CHECK: [[ANNOTATED2:%.*]] = tail call ptr addrspace(1) @llvm.ptr.annotation.p1.p1(ptr addrspace(1) [[CASTED]], ptr addrspace(1) {{.*}}, ptr addrspace(1) {{.*}}, i32 80, ptr addrspace(1) [[ANNOTATION2]])
// CHECK: tail call spir_func void @_Z20__spirv_ocl_prefetch{{.*}}(ptr addrspace(1) noundef [[ANNOTATED2]], i64 noundef 1)
syclex::prefetch(dataPtr, syclex::properties{syclex::prefetch_hint_L2});

// CHECK: [[ANNOTATED3:%.*]] = tail call ptr addrspace(1) @llvm.ptr.annotation.p1.p1(ptr addrspace(1) [[CASTED]], ptr addrspace(1){{.*}}, ptr addrspace(1) {{.*}}, i32 80, ptr addrspace(1) [[ANNOTATION3]])
// CHECK: tail call spir_func void @_Z20__spirv_ocl_prefetch{{.*}}(ptr addrspace(1) noundef [[ANNOTATED3]], i64 noundef 4)
syclex::prefetch(dataPtr, 4, syclex::properties{syclex::prefetch_hint_L3_nt});
});
q.wait();

return 0;
}