diff --git a/llvm/include/llvm/Frontend/Offloading/Utility.h b/llvm/include/llvm/Frontend/Offloading/Utility.h index abaea843848b21..f0bde5d81ef6de 100644 --- a/llvm/include/llvm/Frontend/Offloading/Utility.h +++ b/llvm/include/llvm/Frontend/Offloading/Utility.h @@ -21,6 +21,16 @@ namespace llvm { namespace offloading { +/// This is the record of an object that just be registered with the offloading +/// runtime. +struct EntryTy { + void *Address; + char *SymbolName; + size_t Size; + int32_t Flags; + int32_t Data; +}; + /// Offloading entry flags for CUDA / HIP. The first three bits indicate the /// type of entry while the others are a bit field for additional information. enum OffloadEntryKindFlag : uint32_t { @@ -48,15 +58,6 @@ StructType *getEntryTy(Module &M); /// Create an offloading section struct used to register this global at /// runtime. /// -/// Type struct __tgt_offload_entry { -/// void *addr; // Pointer to the offload entry info. -/// // (function or global) -/// char *name; // Name of the function or global. -/// size_t size; // Size of the entry info (0 if it a function). -/// int32_t flags; -/// int32_t data; -/// }; -/// /// \param M The module to be used /// \param Addr The pointer to the global being registered. /// \param Name The symbol name associated with the global. diff --git a/offload/docs/declare_target_indirect.md b/offload/docs/declare_target_indirect.md index 443a5ab1d4b964..bd66dcd98154f6 100644 --- a/offload/docs/declare_target_indirect.md +++ b/offload/docs/declare_target_indirect.md @@ -25,7 +25,7 @@ The offload entries table that is created for the host and for each of the devic Compiler will also produce an entry for each procedure listed in **indirect** clause of **declare target** construct: ```C++ -struct __tgt_offload_entry { +struct llvm::offloading::EntryTy { void *addr; // Pointer to the function char *name; // Name of the function size_t size; // 0 for function @@ -82,7 +82,7 @@ struct __omp_offloading_fptr_map_ty { }; ``` -Where `host_ptr` is `__tgt_offload_entry::addr` in a **host** offload entry, and `tgt_ptr` is `__tgt_offload_entry::addr` in the corresponding **device** offload entry (which may be found using the populated `Device.HostDataToTargetMap`). +Where `host_ptr` is `llvm::offloading::EntryTy::addr` in a **host** offload entry, and `tgt_ptr` is `llvm::offloading::EntryTy::addr` in the corresponding **device** offload entry (which may be found using the populated `Device.HostDataToTargetMap`). When all `__omp_offloading_function_ptr_map_ty` entries are collected in a single host array, `libomptarget` sorts the table by `host_ptr` values and passes it to the device plugin for registration, if plugin supports optional `__tgt_rtl_set_function_ptr_map` API. diff --git a/offload/include/OffloadEntry.h b/offload/include/OffloadEntry.h index da1de8123be979..551ee698759e20 100644 --- a/offload/include/OffloadEntry.h +++ b/offload/include/OffloadEntry.h @@ -22,24 +22,25 @@ class DeviceImageTy; class OffloadEntryTy { DeviceImageTy &DeviceImage; - __tgt_offload_entry &OffloadEntry; + llvm::offloading::EntryTy &OffloadEntry; public: - OffloadEntryTy(DeviceImageTy &DeviceImage, __tgt_offload_entry &OffloadEntry) + OffloadEntryTy(DeviceImageTy &DeviceImage, + llvm::offloading::EntryTy &OffloadEntry) : DeviceImage(DeviceImage), OffloadEntry(OffloadEntry) {} bool isGlobal() const { return getSize() != 0; } - size_t getSize() const { return OffloadEntry.size; } + size_t getSize() const { return OffloadEntry.Size; } - void *getAddress() const { return OffloadEntry.addr; } - llvm::StringRef getName() const { return OffloadEntry.name; } - const char *getNameAsCStr() const { return OffloadEntry.name; } + void *getnAddress() const { return OffloadEntry.Address; } + llvm::StringRef getName() const { return OffloadEntry.SymbolName; } + const char *getNameAsCStr() const { return OffloadEntry.SymbolName; } __tgt_bin_desc *getBinaryDescription() const; bool isLink() const { return hasFlags(OMP_DECLARE_TARGET_LINK); } bool hasFlags(OpenMPOffloadingDeclareTargetFlags Flags) const { - return Flags & OffloadEntry.flags; + return Flags & OffloadEntry.Flags; } }; diff --git a/offload/include/PluginManager.h b/offload/include/PluginManager.h index 94b6d1090b5a80..324dcac7787ea8 100644 --- a/offload/include/PluginManager.h +++ b/offload/include/PluginManager.h @@ -81,7 +81,8 @@ struct PluginManager { HostEntriesBeginToTransTableTy HostEntriesBeginToTransTable; std::mutex TrlTblMtx; ///< For Translation Table /// Host offload entries in order of image registration - llvm::SmallVector<__tgt_offload_entry *> HostEntriesBeginRegistrationOrder; + llvm::SmallVector + HostEntriesBeginRegistrationOrder; /// Map from ptrs on the host to an entry in the Translation Table HostPtrToTableMapTy HostPtrToTableMap; diff --git a/offload/include/Shared/APITypes.h b/offload/include/Shared/APITypes.h index 4c1f7712249a3a..978b53d5d69b9e 100644 --- a/offload/include/Shared/APITypes.h +++ b/offload/include/Shared/APITypes.h @@ -17,28 +17,20 @@ #include "Environment.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Frontend/Offloading/Utility.h" #include #include extern "C" { -/// This struct is a record of an entry point or global. For a function -/// entry point the size is expected to be zero -struct __tgt_offload_entry { - void *addr; // Pointer to the offload entry info (function or global) - char *name; // Name of the function or global - size_t size; // Size of the entry info (0 if it is a function) - int32_t flags; // Flags associated with the entry, e.g. 'link'. - int32_t data; // Extra data associated with the entry. -}; - /// This struct is a record of the device image information struct __tgt_device_image { - void *ImageStart; // Pointer to the target code start - void *ImageEnd; // Pointer to the target code end - __tgt_offload_entry *EntriesBegin; // Begin of table with all target entries - __tgt_offload_entry *EntriesEnd; // End of table (non inclusive) + void *ImageStart; // Pointer to the target code start + void *ImageEnd; // Pointer to the target code end + llvm::offloading::EntryTy + *EntriesBegin; // Begin of table with all target entries + llvm::offloading::EntryTy *EntriesEnd; // End of table (non inclusive) }; struct __tgt_device_info { @@ -51,14 +43,16 @@ struct __tgt_device_info { struct __tgt_bin_desc { int32_t NumDeviceImages; // Number of device types supported __tgt_device_image *DeviceImages; // Array of device images (1 per dev. type) - __tgt_offload_entry *HostEntriesBegin; // Begin of table with all host entries - __tgt_offload_entry *HostEntriesEnd; // End of table (non inclusive) + llvm::offloading::EntryTy + *HostEntriesBegin; // Begin of table with all host entries + llvm::offloading::EntryTy *HostEntriesEnd; // End of table (non inclusive) }; /// This struct contains the offload entries identified by the target runtime struct __tgt_target_table { - __tgt_offload_entry *EntriesBegin; // Begin of the table with all the entries - __tgt_offload_entry + llvm::offloading::EntryTy + *EntriesBegin; // Begin of the table with all the entries + llvm::offloading::EntryTy *EntriesEnd; // End of the table with all the entries (non inclusive) }; @@ -107,9 +101,9 @@ struct KernelArgsTy { } Flags = {0, 0, 0}; // The number of teams (for x,y,z dimension). uint32_t NumTeams[3] = {0, 0, 0}; - // The number of threads (for x,y,z dimension). + // The number of threads (for x,y,z dimension). uint32_t ThreadLimit[3] = {0, 0, 0}; - uint32_t DynCGroupMem = 0; // Amount of dynamic cgroup memory requested. + uint32_t DynCGroupMem = 0; // Amount of dynamic cgroup memory requested. }; static_assert(sizeof(KernelArgsTy().Flags) == sizeof(uint64_t), "Invalid struct size"); diff --git a/offload/include/rtl.h b/offload/include/rtl.h index 5e198bdad43642..38f1dd24011e04 100644 --- a/offload/include/rtl.h +++ b/offload/include/rtl.h @@ -22,7 +22,7 @@ /// Map between the host entry begin and the translation table. Each /// registered library gets one TranslationTable. Use the map from -/// __tgt_offload_entry so that we may quickly determine whether we +/// llvm::offloading::EntryTy so that we may quickly determine whether we /// are trying to (re)register an existing lib or really have a new one. struct TranslationTable { __tgt_target_table HostTable; @@ -33,14 +33,14 @@ struct TranslationTable { TargetsImages; // One image per device ID. // Arrays of entries active on the device. - llvm::SmallVector> + llvm::SmallVector> TargetsEntries; // One table per device ID. // Table of entry points or NULL if it was not already computed. llvm::SmallVector<__tgt_target_table *> TargetsTable; // One table per device ID. }; -typedef std::map<__tgt_offload_entry *, TranslationTable> +typedef std::map HostEntriesBeginToTransTableTy; /// Map between the host ptr and a table index diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp index bd58d1d6e0d96d..a164bfb51d0264 100644 --- a/offload/plugins-nextgen/common/src/PluginInterface.cpp +++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp @@ -376,24 +376,24 @@ setupIndirectCallTable(GenericPluginTy &Plugin, GenericDeviceTy &Device, DeviceImageTy &Image) { GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler(); - llvm::ArrayRef<__tgt_offload_entry> Entries(Image.getTgtImage()->EntriesBegin, - Image.getTgtImage()->EntriesEnd); + llvm::ArrayRef Entries( + Image.getTgtImage()->EntriesBegin, Image.getTgtImage()->EntriesEnd); llvm::SmallVector> IndirectCallTable; for (const auto &Entry : Entries) { - if (Entry.size == 0 || !(Entry.flags & OMP_DECLARE_TARGET_INDIRECT)) + if (Entry.Size == 0 || !(Entry.Flags & OMP_DECLARE_TARGET_INDIRECT)) continue; - assert(Entry.size == sizeof(void *) && "Global not a function pointer?"); + assert(Entry.Size == sizeof(void *) && "Global not a function pointer?"); auto &[HstPtr, DevPtr] = IndirectCallTable.emplace_back(); - GlobalTy DeviceGlobal(Entry.name, Entry.size); + GlobalTy DeviceGlobal(Entry.SymbolName, Entry.Size); if (auto Err = Handler.getGlobalMetadataFromDevice(Device, Image, DeviceGlobal)) return std::move(Err); - HstPtr = Entry.addr; + HstPtr = Entry.Address; if (auto Err = Device.dataRetrieve(&DevPtr, DeviceGlobal.getPtr(), - Entry.size, nullptr)) + Entry.Size, nullptr)) return std::move(Err); } diff --git a/offload/src/PluginManager.cpp b/offload/src/PluginManager.cpp index 315b953f9b31ac..96fa0bb170489e 100644 --- a/offload/src/PluginManager.cpp +++ b/offload/src/PluginManager.cpp @@ -128,10 +128,10 @@ void PluginManager::registerLib(__tgt_bin_desc *Desc) { PM->RTLsMtx.lock(); // Add in all the OpenMP requirements associated with this binary. - for (__tgt_offload_entry &Entry : + for (llvm::offloading::EntryTy &Entry : llvm::make_range(Desc->HostEntriesBegin, Desc->HostEntriesEnd)) - if (Entry.flags == OMP_REGISTER_REQUIRES) - PM->addRequirements(Entry.data); + if (Entry.Flags == OMP_REGISTER_REQUIRES) + PM->addRequirements(Entry.Data); // Extract the exectuable image and extra information if availible. for (int32_t i = 0; i < Desc->NumDeviceImages; ++i) @@ -268,9 +268,9 @@ void PluginManager::unregisterLib(__tgt_bin_desc *Desc) { // Remove entries from PM->HostPtrToTableMap PM->TblMapMtx.lock(); - for (__tgt_offload_entry *Cur = Desc->HostEntriesBegin; + for (llvm::offloading::EntryTy *Cur = Desc->HostEntriesBegin; Cur < Desc->HostEntriesEnd; ++Cur) { - PM->HostPtrToTableMap.erase(Cur->addr); + PM->HostPtrToTableMap.erase(Cur->Address); } // Remove translation table for this descriptor. @@ -336,35 +336,36 @@ static int loadImagesOntoDevice(DeviceTy &Device) { } // 3) Create the translation table. - llvm::SmallVector<__tgt_offload_entry> &DeviceEntries = + llvm::SmallVector &DeviceEntries = TransTable->TargetsEntries[DeviceId]; - for (__tgt_offload_entry &Entry : + for (llvm::offloading::EntryTy &Entry : llvm::make_range(Img->EntriesBegin, Img->EntriesEnd)) { __tgt_device_binary &Binary = *BinaryOrErr; - __tgt_offload_entry DeviceEntry = Entry; - if (Entry.size) { - if (Device.RTL->get_global(Binary, Entry.size, Entry.name, - &DeviceEntry.addr) != OFFLOAD_SUCCESS) - REPORT("Failed to load symbol %s\n", Entry.name); + llvm::offloading::EntryTy DeviceEntry = Entry; + if (Entry.Size) { + if (Device.RTL->get_global(Binary, Entry.Size, Entry.SymbolName, + &DeviceEntry.Address) != OFFLOAD_SUCCESS) + REPORT("Failed to load symbol %s\n", Entry.SymbolName); // If unified memory is active, the corresponding global is a device // reference to the host global. We need to initialize the pointer on // the device to point to the memory on the host. if ((PM->getRequirements() & OMP_REQ_UNIFIED_SHARED_MEMORY) || (PM->getRequirements() & OMPX_REQ_AUTO_ZERO_COPY)) { - if (Device.RTL->data_submit(DeviceId, DeviceEntry.addr, Entry.addr, - Entry.size) != OFFLOAD_SUCCESS) - REPORT("Failed to write symbol for USM %s\n", Entry.name); + if (Device.RTL->data_submit(DeviceId, DeviceEntry.Address, + Entry.Address, + Entry.Size) != OFFLOAD_SUCCESS) + REPORT("Failed to write symbol for USM %s\n", Entry.SymbolName); } - } else if (Entry.addr) { - if (Device.RTL->get_function(Binary, Entry.name, &DeviceEntry.addr) != - OFFLOAD_SUCCESS) - REPORT("Failed to load kernel %s\n", Entry.name); + } else if (Entry.Address) { + if (Device.RTL->get_function(Binary, Entry.SymbolName, + &DeviceEntry.Address) != OFFLOAD_SUCCESS) + REPORT("Failed to load kernel %s\n", Entry.SymbolName); } DP("Entry point " DPxMOD " maps to%s %s (" DPxMOD ")\n", - DPxPTR(Entry.addr), (Entry.size) ? " global" : "", Entry.name, - DPxPTR(DeviceEntry.addr)); + DPxPTR(Entry.Address), (Entry.Size) ? " global" : "", + Entry.SymbolName, DPxPTR(DeviceEntry.Address)); DeviceEntries.emplace_back(DeviceEntry); } @@ -396,30 +397,31 @@ static int loadImagesOntoDevice(DeviceTy &Device) { Device.getMappingInfo().HostDataToTargetMap.getExclusiveAccessor(); __tgt_target_table *HostTable = &TransTable->HostTable; - for (__tgt_offload_entry *CurrDeviceEntry = TargetTable->EntriesBegin, - *CurrHostEntry = HostTable->EntriesBegin, - *EntryDeviceEnd = TargetTable->EntriesEnd; + for (llvm::offloading::EntryTy * + CurrDeviceEntry = TargetTable->EntriesBegin, + *CurrHostEntry = HostTable->EntriesBegin, + *EntryDeviceEnd = TargetTable->EntriesEnd; CurrDeviceEntry != EntryDeviceEnd; CurrDeviceEntry++, CurrHostEntry++) { - if (CurrDeviceEntry->size == 0) + if (CurrDeviceEntry->Size == 0) continue; - assert(CurrDeviceEntry->size == CurrHostEntry->size && + assert(CurrDeviceEntry->Size == CurrHostEntry->Size && "data size mismatch"); // Fortran may use multiple weak declarations for the same symbol, // therefore we must allow for multiple weak symbols to be loaded from // the fat binary. Treat these mappings as any other "regular" // mapping. Add entry to map. - if (Device.getMappingInfo().getTgtPtrBegin(HDTTMap, CurrHostEntry->addr, - CurrHostEntry->size)) + if (Device.getMappingInfo().getTgtPtrBegin( + HDTTMap, CurrHostEntry->Address, CurrHostEntry->Size)) continue; - void *CurrDeviceEntryAddr = CurrDeviceEntry->addr; + void *CurrDeviceEntryAddr = CurrDeviceEntry->Address; // For indirect mapping, follow the indirection and map the actual // target. - if (CurrDeviceEntry->flags & OMP_DECLARE_TARGET_INDIRECT) { + if (CurrDeviceEntry->Flags & OMP_DECLARE_TARGET_INDIRECT) { AsyncInfoTy AsyncInfo(Device); void *DevPtr; Device.retrieveData(&DevPtr, CurrDeviceEntryAddr, sizeof(void *), @@ -431,19 +433,21 @@ static int loadImagesOntoDevice(DeviceTy &Device) { DP("Add mapping from host " DPxMOD " to device " DPxMOD " with size %zu" ", name \"%s\"\n", - DPxPTR(CurrHostEntry->addr), DPxPTR(CurrDeviceEntry->addr), - CurrDeviceEntry->size, CurrDeviceEntry->name); + DPxPTR(CurrHostEntry->Address), DPxPTR(CurrDeviceEntry->Address), + CurrDeviceEntry->Size, CurrDeviceEntry->SymbolName); HDTTMap->emplace(new HostDataToTargetTy( - (uintptr_t)CurrHostEntry->addr /*HstPtrBase*/, - (uintptr_t)CurrHostEntry->addr /*HstPtrBegin*/, - (uintptr_t)CurrHostEntry->addr + CurrHostEntry->size /*HstPtrEnd*/, + (uintptr_t)CurrHostEntry->Address /*HstPtrBase*/, + (uintptr_t)CurrHostEntry->Address /*HstPtrBegin*/, + (uintptr_t)CurrHostEntry->Address + + CurrHostEntry->Size /*HstPtrEnd*/, (uintptr_t)CurrDeviceEntryAddr /*TgtAllocBegin*/, (uintptr_t)CurrDeviceEntryAddr /*TgtPtrBegin*/, - false /*UseHoldRefCount*/, CurrHostEntry->name, + false /*UseHoldRefCount*/, CurrHostEntry->SymbolName, true /*IsRefCountINF*/)); // Notify about the new mapping. - if (Device.notifyDataMapped(CurrHostEntry->addr, CurrHostEntry->size)) + if (Device.notifyDataMapped(CurrHostEntry->Address, + CurrHostEntry->Size)) return OFFLOAD_FAIL; } } diff --git a/offload/src/omptarget.cpp b/offload/src/omptarget.cpp index 1a7af5649b9e22..89fa63347babe2 100644 --- a/offload/src/omptarget.cpp +++ b/offload/src/omptarget.cpp @@ -977,9 +977,9 @@ TableMap *getTableMap(void *HostPtr) { TranslationTable *TransTable = &Itr->second; // iterate over all the host table entries to see if we can locate the // host_ptr. - __tgt_offload_entry *Cur = TransTable->HostTable.EntriesBegin; + llvm::offloading::EntryTy *Cur = TransTable->HostTable.EntriesBegin; for (uint32_t I = 0; Cur < TransTable->HostTable.EntriesEnd; ++Cur, ++I) { - if (Cur->addr != HostPtr) + if (Cur->Address != HostPtr) continue; // we got a match, now fill the HostPtrToTableMap so that we // may avoid this search next time. @@ -1437,9 +1437,10 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr, } // Launch device execution. - void *TgtEntryPtr = TargetTable->EntriesBegin[TM->Index].addr; + void *TgtEntryPtr = TargetTable->EntriesBegin[TM->Index].Address; DP("Launching target execution %s with pointer " DPxMOD " (index=%d).\n", - TargetTable->EntriesBegin[TM->Index].name, DPxPTR(TgtEntryPtr), TM->Index); + TargetTable->EntriesBegin[TM->Index].SymbolName, DPxPTR(TgtEntryPtr), + TM->Index); { assert(KernelArgs.NumArgs == TgtArgs.size() && "Argument count mismatch!"); @@ -1525,9 +1526,10 @@ int target_replay(ident_t *Loc, DeviceTy &Device, void *HostPtr, // Retrieve the target kernel pointer, allocate and store the recorded device // memory data, and launch device execution. - void *TgtEntryPtr = TargetTable->EntriesBegin[TM->Index].addr; + void *TgtEntryPtr = TargetTable->EntriesBegin[TM->Index].Address; DP("Launching target execution %s with pointer " DPxMOD " (index=%d).\n", - TargetTable->EntriesBegin[TM->Index].name, DPxPTR(TgtEntryPtr), TM->Index); + TargetTable->EntriesBegin[TM->Index].SymbolName, DPxPTR(TgtEntryPtr), + TM->Index); void *TgtPtr = Device.allocData(DeviceMemorySize, /*HstPtr=*/nullptr, TARGET_ALLOC_DEFAULT); diff --git a/offload/tools/kernelreplay/llvm-omp-kernel-replay.cpp b/offload/tools/kernelreplay/llvm-omp-kernel-replay.cpp index 1e9a6a84d80583..ff771540332396 100644 --- a/offload/tools/kernelreplay/llvm-omp-kernel-replay.cpp +++ b/offload/tools/kernelreplay/llvm-omp-kernel-replay.cpp @@ -13,9 +13,11 @@ #include "omptarget.h" +#include "llvm/Frontend/Offloading/Utility.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/JSON.h" #include "llvm/Support/MemoryBuffer.h" + #include #include @@ -91,11 +93,11 @@ int main(int argc, char **argv) { void *BAllocStart = reinterpret_cast( JsonKernelInfo->getAsObject()->getInteger("BumpAllocVAStart").value()); - __tgt_offload_entry KernelEntry = {nullptr, nullptr, 0, 0, 0}; + llvm::offloading::EntryTy KernelEntry = {nullptr, nullptr, 0, 0, 0}; std::string KernelEntryName = KernelFunc.value().str(); - KernelEntry.name = const_cast(KernelEntryName.c_str()); + KernelEntry.SymbolName = const_cast(KernelEntryName.c_str()); // Anything non-zero works to uniquely identify the kernel. - KernelEntry.addr = (void *)0x1; + KernelEntry.Address = (void *)0x1; ErrorOr> ImageMB = MemoryBuffer::getFile(KernelEntryName + ".image", /*isText=*/false, @@ -164,7 +166,7 @@ int main(int argc, char **argv) { } __tgt_target_kernel_replay( - /*Loc=*/nullptr, DeviceId, KernelEntry.addr, (char *)recored_data, + /*Loc=*/nullptr, DeviceId, KernelEntry.Address, (char *)recored_data, DeviceMemoryMB.get()->getBufferSize(), TgtArgs.data(), TgtArgOffsets.data(), NumArgs.value(), NumTeams, NumThreads, LoopTripCount.value());