Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYCL] Implement SYCL part of sycl_ext_oneapi_prefetch #11458

Merged
merged 26 commits into from
Oct 16, 2023
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 10 additions & 5 deletions llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -613,13 +613,18 @@ bool CompileTimePropertiesPass::transformSYCLPropertiesAnnotation(
// Read the annotation values and create the new annotation string.
std::string NewAnnotString = "";
auto Properties = parseSYCLPropertiesString(M, IntrInst);
for (auto &Property : Properties) {
for (const auto &[first, second] : Properties) {
// sycl-alignment is converted to align on
// previous parseAlignmentAndApply(), dropping here
if (*Property.first == "sycl-alignment")
if (first == "sycl-alignment")
continue;

auto DecorIt = SpirvDecorMap.find(*Property.first);
// Leave these annotations as is. They will be processed by SPIRVWriter.
if (first == "sycl-prefetch-hint" || first == "sycl-prefetch-hint-nt") {
return false;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't feel right to me, but it's outside of SYCL RT...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like currently it's the only way to transform these properties into spirv decorations for this pointer. I tried to create spirv.Decorations metadata instead, but the compiler eliminates them with optimization flags.
Annotation also may be eliminated, but it's much less likely.

}

auto DecorIt = SpirvDecorMap.find(*first);
if (DecorIt == SpirvDecorMap.end())
continue;
uint32_t DecorCode = DecorIt->second.Code;
Expand All @@ -629,8 +634,8 @@ bool CompileTimePropertiesPass::transformSYCLPropertiesAnnotation(
// string values are handled correctly. Note that " around values are
// always valid, even if the decoration parameters are not strings.
NewAnnotString += "{" + std::to_string(DecorCode);
if (Property.second)
NewAnnotString += ":\"" + Property.second->str() + "\"";
if (second)
NewAnnotString += ":\"" + second->str() + "\"";
NewAnnotString += "}";
}

Expand Down
301 changes: 301 additions & 0 deletions sycl/include/sycl/ext/oneapi/prefetch.hpp
KornevNikita marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,301 @@
//==--------------- prefetch.hpp --- SYCL prefetch extension ---------------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#pragma once

#include <CL/__spirv/spirv_ops.hpp>
#include <sycl/ext/oneapi/properties/properties.hpp>

namespace sycl {
inline namespace _V1 {
namespace ext::oneapi::experimental {

enum class cache_level { L1 = 0, L2 = 1, L3 = 2, L4 = 3 };

struct nontemporal;

struct prefetch_hint_key {
template <cache_level Level, typename Hint>
using value_t =
property_value<prefetch_hint_key,
std::integral_constant<cache_level, Level>, Hint>;
};

template <cache_level Level, typename Hint>
inline constexpr prefetch_hint_key::value_t<Level, Hint> prefetch_hint;

inline constexpr prefetch_hint_key::value_t<cache_level::L1, void>
prefetch_hint_L1;
inline constexpr prefetch_hint_key::value_t<cache_level::L2, void>
prefetch_hint_L2;
inline constexpr prefetch_hint_key::value_t<cache_level::L3, void>
prefetch_hint_L3;
inline constexpr prefetch_hint_key::value_t<cache_level::L4, void>
prefetch_hint_L4;

inline constexpr prefetch_hint_key::value_t<cache_level::L1, nontemporal>
prefetch_hint_L1_nt;
inline constexpr prefetch_hint_key::value_t<cache_level::L2, nontemporal>
prefetch_hint_L2_nt;
inline constexpr prefetch_hint_key::value_t<cache_level::L3, nontemporal>
prefetch_hint_L3_nt;
inline constexpr prefetch_hint_key::value_t<cache_level::L4, nontemporal>
prefetch_hint_L4_nt;

namespace detail {
template <> struct IsCompileTimeProperty<prefetch_hint_key> : std::true_type {};

template <cache_level Level, typename Hint>
struct PropertyMetaInfo<prefetch_hint_key::value_t<Level, Hint>> {
static constexpr const char *name = std::is_same_v<Hint, nontemporal>
? "sycl-prefetch-hint-nt"
: "sycl-prefetch-hint";
static constexpr int value = static_cast<int>(Level);
KornevNikita marked this conversation as resolved.
Show resolved Hide resolved
};

template <typename Properties>
void prefetch_impl(void *ptr, size_t bytes, Properties properties) {
#ifdef __SYCL_DEVICE_ONLY__
KornevNikita marked this conversation as resolved.
Show resolved Hide resolved
auto *ptrGlobalAS = __SYCL_GenericCastToPtrExplicit_ToGlobal<char>(ptr);
__attribute__((opencl_global)) char *ptrAnnotated = nullptr;
if constexpr (!properties.template has_property<prefetch_hint_key>()) {
ptrAnnotated = __builtin_intel_sycl_ptr_annotation(
ptrGlobalAS, "sycl-prefetch-hint", static_cast<int>(cache_level::L1));
} else {
auto prop = properties.template get_property<prefetch_hint_key>();
ptrAnnotated = __builtin_intel_sycl_ptr_annotation(
ptrGlobalAS, PropertyMetaInfo<decltype(prop)>::name,
PropertyMetaInfo<decltype(prop)>::value);
}
__spirv_ocl_prefetch(ptrAnnotated, bytes);
#endif
std::ignore = ptr;
std::ignore = bytes;
std::ignore = properties;
}
} // namespace detail

template <typename Properties = empty_properties_t>
void prefetch(void *ptr, Properties properties = {}) {
detail::prefetch_impl(ptr, 1, properties);
}

template <typename Properties = empty_properties_t>
void prefetch(void *ptr, size_t bytes, Properties properties = {}) {
detail::prefetch_impl(ptr, bytes, properties);
}

template <typename T, typename Properties = empty_properties_t>
void prefetch(T *ptr, Properties properties = {}) {
prefetch((void *)ptr, sizeof(T), properties);
}

template <typename T, typename Properties = empty_properties_t>
void prefetch(T *ptr, size_t count, Properties properties = {}) {
prefetch((void *)ptr, count * sizeof(T), properties);
}

// Only available if AddressSpace == global_space || AddressSpace ==
// generic_space
KornevNikita marked this conversation as resolved.
Show resolved Hide resolved
template <access::address_space AddressSpace, access::decorated IsDecorated,
typename Properties = empty_properties_t>
std::enable_if_t<AddressSpace == access::address_space::global_space ||
AddressSpace == access::address_space::generic_space,
void>
KornevNikita marked this conversation as resolved.
Show resolved Hide resolved
prefetch(multi_ptr<void, AddressSpace, IsDecorated> ptr,
Properties properties = {}) {
prefetch(ptr.get(), properties);
}

// Only available if AddressSpace == global_space || AddressSpace ==
// generic_space
template <access::address_space AddressSpace, access::decorated IsDecorated,
typename Properties = empty_properties_t>
std::enable_if_t<AddressSpace == access::address_space::global_space ||
AddressSpace == access::address_space::generic_space,
void>
prefetch(multi_ptr<void, AddressSpace, IsDecorated> ptr, size_t bytes,
Properties properties = {}) {
prefetch(ptr.get(), bytes, properties);
}

// Only available if AddressSpace == global_space || AddressSpace ==
// generic_space
template <typename T, access::address_space AddressSpace,
access::decorated IsDecorated,
typename Properties = empty_properties_t>
std::enable_if_t<AddressSpace == access::address_space::global_space ||
AddressSpace == access::address_space::generic_space,
void>
prefetch(multi_ptr<T, AddressSpace, IsDecorated> ptr,
Properties properties = {}) {
prefetch(ptr.get(), properties);
}

// Only available if AddressSpace == global_space || AddressSpace ==
// generic_space
template <typename T, access::address_space AddressSpace,
access::decorated IsDecorated,
typename Properties = empty_properties_t>
std::enable_if_t<AddressSpace == access::address_space::global_space ||
AddressSpace == access::address_space::generic_space,
void>
prefetch(multi_ptr<T, AddressSpace, IsDecorated> ptr, size_t count,
Properties properties = {}) {
prefetch(ptr.get(), count, properties);
}

// Only available if Dimensions > 0 && (AccessMode == read || AccessMode ==
// read_write)
template <typename DataT, int Dimensions, access_mode AccessMode,
access::placeholder IsPlaceholder,
typename Properties = empty_properties_t>
std::enable_if_t<(Dimensions > 0) && (AccessMode == access_mode::read ||
AccessMode == access_mode::read_write),
void>
prefetch(
accessor<DataT, Dimensions, AccessMode, target::device, IsPlaceholder> acc,
id<Dimensions> offset, Properties properties = {}) {
prefetch((void *)&acc[offset], sizeof(DataT), properties);
KornevNikita marked this conversation as resolved.
Show resolved Hide resolved
}

// Only available if Dimensions > 0 && (AccessMode == read || AccessMode ==
// read_write)
template <typename DataT, int Dimensions, access_mode AccessMode,
access::placeholder IsPlaceholder,
typename Properties = empty_properties_t>
std::enable_if_t<(Dimensions > 0) && (AccessMode == access_mode::read ||
AccessMode == access_mode::read_write),
void>
prefetch(
accessor<DataT, Dimensions, AccessMode, target::device, IsPlaceholder> acc,
size_t offset, size_t count, Properties properties = {}) {
prefetch((void *)&acc[offset], count * sizeof(DataT), properties);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we know for sure that count elements are consecutive in memory?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point. @Pennycook I'm not sure how it's intended to work in case of N-dim offset. Should we call the __spirv_ocl_prefetch spirv instruction several times for different memory segments in such case or there should be some constraint?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We struggled a bit with multi-dimensional prefetches (see the issues).

Where we landed (for now) is that the block being prefetched is assumed to be contiguous, and only the offset itself is multi-dimensional. It's effectively a shorthand to avoid computing the linear offset from the start of the buffer. Note that the specification says for the multi-dimensional cases:

Effects: Equivalent to prefetch((void*) &acc[offset], sizeof(DataT), properties).
Effects: Equivalent to prefetch((void*) &acc[offset], count, properties).

If somebody requests a multi-dimensional prefetch later, we can describe it with a range parameter in place of a size_t count, and implement it the way you suggested (by calling the instruction multiple times).

}

template <typename Group, typename Properties = empty_properties_t>
typename std::enable_if_t<sycl::is_group_v<std::decay_t<Group>>, void>
joint_prefetch(Group g, void *ptr, Properties properties = {}) {
detail::prefetch_impl(ptr, 1, properties);
}

template <typename Group, typename Properties = empty_properties_t>
typename std::enable_if_t<sycl::is_group_v<std::decay_t<Group>>, void>
joint_prefetch(Group g, void *ptr, size_t bytes, Properties properties = {}) {
detail::prefetch_impl(ptr, bytes, properties);
}

template <typename Group, typename T, typename Properties = empty_properties_t>
typename std::enable_if_t<sycl::is_group_v<std::decay_t<Group>>, void>
joint_prefetch(Group g, T *ptr, Properties properties = {}) {
joint_prefetch((void *)ptr, sizeof(T), properties);
}

template <typename Group, typename T, typename Properties = empty_properties_t>
typename std::enable_if_t<sycl::is_group_v<std::decay_t<Group>>, void>
joint_prefetch(Group g, T *ptr, size_t count, Properties properties = {}) {
joint_prefetch((void *)ptr, count * sizeof(T), properties);
}

// Only available if AddressSpace == global_space || AddressSpace ==
// generic_space
template <typename Group, access::address_space AddressSpace,
access::decorated IsDecorated,
typename Properties = ext::oneapi::experimental::empty_properties_t>
typename std::enable_if_t<
sycl::is_group_v<std::decay_t<Group>> &&
(AddressSpace == access::address_space::global_space ||
AddressSpace == access::address_space::generic_space),
void>
joint_prefetch(Group g, multi_ptr<void, AddressSpace, IsDecorated> ptr,
Properties properties = {}) {
joint_prefetch(g, ptr.get(), properties);
}

// Only available if AddressSpace == global_space || AddressSpace ==
// generic_space
template <typename Group, access::address_space AddressSpace,
access::decorated IsDecorated,
typename Properties = ext::oneapi::experimental::empty_properties_t>
typename std::enable_if_t<
sycl::is_group_v<std::decay_t<Group>> &&
(AddressSpace == access::address_space::global_space ||
AddressSpace == access::address_space::generic_space),
void>
joint_prefetch(Group g, multi_ptr<void, AddressSpace, IsDecorated> ptr,
size_t bytes, Properties properties = {}) {
joint_prefetch(g, ptr.get(), bytes, properties);
}

// Only available if AddressSpace == global_space || AddressSpace ==
// generic_space
template <typename Group, typename T, access::address_space AddressSpace,
access::decorated IsDecorated,
typename Properties = ext::oneapi::experimental::empty_properties_t>
typename std::enable_if_t<
sycl::is_group_v<std::decay_t<Group>> &&
(AddressSpace == access::address_space::global_space ||
AddressSpace == access::address_space::generic_space),
void>
joint_prefetch(Group g, multi_ptr<T, AddressSpace, IsDecorated> ptr,
Properties properties = {}) {
joint_prefetch(g, ptr.get(), properties);
}

// Only available if AddressSpace == global_space || AddressSpace ==
// generic_space
template <typename Group, typename T, access::address_space AddressSpace,
access::decorated IsDecorated,
typename Properties = ext::oneapi::experimental::empty_properties_t>
typename std::enable_if_t<
sycl::is_group_v<std::decay_t<Group>> &&
(AddressSpace == access::address_space::global_space ||
AddressSpace == access::address_space::generic_space),
void>
joint_prefetch(Group g, multi_ptr<T, AddressSpace, IsDecorated> ptr,
size_t count, Properties properties = {}) {
joint_prefetch(g, ptr.get(), count, properties);
}

// Only available if Dimensions > 0 && (AccessMode == read || AccessMode ==
// read_write)
template <typename Group, typename DataT, int Dimensions,
access_mode AccessMode, access::placeholder IsPlaceholder,
typename Properties = ext::oneapi::experimental::empty_properties_t>
typename std::enable_if_t<sycl::is_group_v<std::decay_t<Group>> &&
(Dimensions > 0) &&
(AccessMode == access_mode::read ||
AccessMode == access_mode::read_write),
void>
joint_prefetch(
Group g,
accessor<DataT, Dimensions, AccessMode, target::device, IsPlaceholder> acc,
size_t offset, Properties properties = {}) {
joint_prefetch(g, (void *)&acc[offset], sizeof(DataT), properties);
}

// Only available if Dimensions > 0 && (AccessMode == read || AccessMode ==
// read_write)
template <typename Group, typename DataT, int Dimensions,
access_mode AccessMode, access::placeholder IsPlaceholder,
typename Properties = ext::oneapi::experimental::empty_properties_t>
typename std::enable_if_t<sycl::is_group_v<std::decay_t<Group>> &&
(Dimensions > 0) &&
(AccessMode == access_mode::read ||
AccessMode == access_mode::read_write),
void>
joint_prefetch(
Group g,
accessor<DataT, Dimensions, AccessMode, target::device, IsPlaceholder> acc,
size_t offset, size_t count, Properties properties = {}) {
joint_prefetch(g, (void *)&acc[offset], count * sizeof(DataT), properties);
}

} // namespace ext::oneapi::experimental
} // namespace _V1
} // namespace sycl
2 changes: 2 additions & 0 deletions sycl/include/sycl/ext/oneapi/properties/properties.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,8 @@ properties(PropertyValueTs... props)
-> properties<typename detail::Sorted<PropertyValueTs...>::type>;
#endif

using empty_properties_t = decltype(properties{});
KornevNikita marked this conversation as resolved.
Show resolved Hide resolved

// Property list traits
template <typename propertiesT> struct is_property_list : std::false_type {};
template <typename... PropertyValueTs>
Expand Down
1 change: 1 addition & 0 deletions sycl/include/sycl/sycl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@
#include <sycl/ext/oneapi/kernel_properties/properties.hpp>
#include <sycl/ext/oneapi/matrix/matrix.hpp>
#include <sycl/ext/oneapi/owner_less.hpp>
#include <sycl/ext/oneapi/prefetch.hpp>
#include <sycl/ext/oneapi/properties/properties.hpp>
#include <sycl/ext/oneapi/properties/property_value.hpp>
#include <sycl/ext/oneapi/sub_group.hpp>
Expand Down
1 change: 1 addition & 0 deletions sycl/source/feature_test.hpp.in
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ inline namespace _V1 {
#define SYCL_EXT_CODEPLAY_MAX_REGISTERS_PER_WORK_GROUP_QUERY 1
#define SYCL_EXT_ONEAPI_DEVICE_GLOBAL 1
#define SYCL_EXT_INTEL_QUEUE_IMMEDIATE_COMMAND_LIST 1
#define SYCL_EXT_ONEAPI_PREFETCH 1

#ifndef __has_include
#define __has_include(x) 0
Expand Down
39 changes: 39 additions & 0 deletions sycl/test/extensions/prefetch.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// RUN: %clangxx -fsycl-device-only -S %s -o - | FileCheck %s

#include <sycl/sycl.hpp>

char data[] = {0, 1, 2, 3};

// CHECK: [[PREFETCH_STR:@.*]] = private unnamed_addr addrspace(1) constant [19 x i8] c"sycl-prefetch-hint\00", section "llvm.metadata"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are these manually written? If so, do we have an utility script to generate them automatically? If yes, one might need to disable instrumentation/use -O1 to get a more readable IR.

// CHECK: [[PREFETCH_LVL0:@.*]] = private unnamed_addr addrspace(1) constant [2 x i8] c"0\00", section "llvm.metadata"
// CHECK: [[ANNOTATION1:@.*]] = private unnamed_addr addrspace(1) constant { ptr addrspace(1), ptr addrspace(1) } { ptr addrspace(1) [[PREFETCH_STR]], ptr addrspace(1) [[PREFETCH_LVL0]] }, section "llvm.metadata"
// CHECK: [[PREFETCH_LVL1:@.*]] = private unnamed_addr addrspace(1) constant [2 x i8] c"1\00", section "llvm.metadata"
// CHECK: [[ANNOTATION2:@.*]] = private unnamed_addr addrspace(1) constant { ptr addrspace(1), ptr addrspace(1) } { ptr addrspace(1) [[PREFETCH_STR]], ptr addrspace(1) [[PREFETCH_LVL1]] }, section "llvm.metadata"
// CHECK: [[PREFETCH_STR_NT:@.*]] = private unnamed_addr addrspace(1) constant [22 x i8] c"sycl-prefetch-hint-nt\00", section "llvm.metadata"
// CHECK: [[PREFETCH_LVL2:@.*]] = private unnamed_addr addrspace(1) constant [2 x i8] c"2\00", section "llvm.metadata"
// CHECK: [[ANNOTATION3:@.*]] = private unnamed_addr addrspace(1) constant { ptr addrspace(1), ptr addrspace(1) } { ptr addrspace(1) [[PREFETCH_STR_NT]], ptr addrspace(1) [[PREFETCH_LVL2]] }, section "llvm.metadata"

int main() {
namespace syclex = sycl::ext::oneapi::experimental;
sycl::queue q;
void *dataPtr = &data;
q.single_task([=]() {
// CHECK: [[CASTED:%.*]] = tail call spir_func noundef ptr addrspace(1) @_Z41__spirv_GenericCastToPtrExplicit_ToGlobal{{.*}}

// CHECK: [[ANNOTATED1:%.*]] = tail call ptr addrspace(1) @llvm.ptr.annotation.p1.p1(ptr addrspace(1) [[CASTED]], {{.*}}, ptr addrspace(1) [[ANNOTATION1]])
// CHECK: tail call spir_func void @_Z20__spirv_ocl_prefetch{{.*}}(ptr addrspace(1) noundef [[ANNOTATED1]], i64 noundef 1)
syclex::prefetch(dataPtr);

// CHECK: [[ANNOTATED2:%.*]] = tail call ptr addrspace(1) @llvm.ptr.annotation.p1.p1(ptr addrspace(1) [[CASTED]], {{.*}}, ptr addrspace(1) [[ANNOTATION2]])
// CHECK: tail call spir_func void @_Z20__spirv_ocl_prefetch{{.*}}(ptr addrspace(1) noundef [[ANNOTATED2]], i64 noundef 1)
syclex::prefetch(dataPtr, syclex::properties{syclex::prefetch_hint_L2});

// CHECK: [[ANNOTATED3:%.*]] = tail call ptr addrspace(1) @llvm.ptr.annotation.p1.p1(ptr addrspace(1) [[CASTED]], {{.*}}, ptr addrspace(1) [[ANNOTATION3]])
// CHECK: tail call spir_func void @_Z20__spirv_ocl_prefetch{{.*}}(ptr addrspace(1) noundef [[ANNOTATED3]], i64 noundef 4)
syclex::prefetch(dataPtr, 4,
syclex::properties{syclex::prefetch_hint_L3_nt});
});
q.wait();

return 0;
}