Files
llvm/offload/libomptarget/device.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

412 lines
16 KiB
C++
Raw Normal View History

//===--------- device.cpp - Target independent OpenMP target RTL ----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Functionality for managing devices that are handled by RTL plugins.
//
//===----------------------------------------------------------------------===//
#include "device.h"
#include "OffloadEntry.h"
#include "OpenMP/Mapping.h"
#include "OpenMP/OMPT/Callback.h"
#include "OpenMP/OMPT/Interface.h"
#include "PluginManager.h"
#include "Shared/APITypes.h"
#include "Shared/Debug.h"
#include "omptarget.h"
#include "private.h"
#include "rtl.h"
#include "Shared/EnvironmentVar.h"
#include "llvm/Support/Error.h"
#include <cassert>
#include <climits>
#include <cstdint>
#include <cstdio>
#include <mutex>
#include <string>
#include <thread>
#ifdef OMPT_SUPPORT
using namespace llvm::omp::target::ompt;
#endif
using namespace llvm::omp::target::plugin;
using namespace llvm::omp::target::debug;
int HostDataToTargetTy::addEventIfNecessary(DeviceTy &Device,
AsyncInfoTy &AsyncInfo) const {
// First, check if the user disabled atomic map transfer/malloc/dealloc.
if (!MappingConfig::get().UseEventsForAtomicTransfers)
return OFFLOAD_SUCCESS;
void *Event = getEvent();
bool NeedNewEvent = Event == nullptr;
if (NeedNewEvent && Device.createEvent(&Event) != OFFLOAD_SUCCESS) {
REPORT() << "Failed to create event";
return OFFLOAD_FAIL;
}
// We cannot assume the event should not be nullptr because we don't
// know if the target support event. But if a target doesn't,
// recordEvent should always return success.
if (Device.recordEvent(Event, AsyncInfo) != OFFLOAD_SUCCESS) {
REPORT() << "Failed to set dependence on event " << Event;
return OFFLOAD_FAIL;
}
if (NeedNewEvent)
setEvent(Event);
return OFFLOAD_SUCCESS;
}
[Reland][Libomptarget] Statically link all plugin runtimes (#87009) This patch overhauls the `libomptarget` and plugin interface. Currently, we define a C API and compile each plugin as a separate shared library. Then, `libomptarget` loads these API functions and forwards its internal calls to them. This was originally designed to allow multiple implementations of a library to be live. However, since then no one has used this functionality and it prevents us from using much nicer interfaces. If the old behavior is desired it should instead be implemented as a separate plugin. This patch replaces the `PluginAdaptorTy` interface with the `GenericPluginTy` that is used by the plugins. Each plugin exports a `createPlugin_<name>` function that is used to get the specific implementation. This code is now shared with `libomptarget`. There are some notable improvements to this. 1. Massively improved lifetimes of life runtime objects 2. The plugins can use a C++ interface 3. Global state does not need to be duplicated for each plugin + libomptarget 4. Easier to use and add features and improve error handling 5. Less function call overhead / Improved LTO performance. Additional changes in this plugin are related to contending with the fact that state is now shared. Initialization and deinitialization is now handled correctly and in phase with the underlying runtime, allowing us to actually know when something is getting deallocated. Depends on https://github.com/llvm/llvm-project/pull/86971 https://github.com/llvm/llvm-project/pull/86875 https://github.com/llvm/llvm-project/pull/86868
2024-05-09 06:35:54 -05:00
DeviceTy::DeviceTy(GenericPluginTy *RTL, int32_t DeviceID, int32_t RTLDeviceID)
: DeviceID(DeviceID), RTL(RTL), RTLDeviceID(RTLDeviceID),
MappingInfo(*this) {}
[OpenMP] Introduce target memory manager Target memory manager is introduced in this patch which aims to manage target memory such that they will not be freed immediately when they are not used because the overhead of memory allocation and free is very large. For CUDA device, cuMemFree even blocks the context switch on device which affects concurrent kernel execution. The memory manager can be taken as a memory pool. It divides the pool into multiple buckets according to the size such that memory allocation/free distributed to different buckets will not affect each other. In this version, we use the exact-equality policy to find a free buffer. This is an open question: will best-fit work better here? IMO, best-fit is not good for target memory management because computation on GPU usually requires GBs of data. Best-fit might lead to a serious waste. For example, there is a free buffer of size 1960MB, and now we need a buffer of size 1200MB. If best-fit, the free buffer will be returned, leading to a 760MB waste. The allocation will happen when there is no free memory left, and the memory free on device will take place in the following two cases: 1. The program ends. Obviously. However, there is a little problem that plugin library is destroyed before the memory manager is destroyed, leading to a fact that the call to target plugin will not succeed. 2. Device is out of memory when we request a new memory. The manager will walk through all free buffers from the bucket with largest base size, pick up one buffer, free it, and try to allocate immediately. If it succeeds, it will return right away rather than freeing all buffers in free list. Update: A threshold (8KB by default) is set such that users could control what size of memory will be managed by the manager. It can also be configured by an environment variable `LIBOMPTARGET_MEMORY_MANAGER_THRESHOLD`. Reviewed By: jdoerfert, ye-luo, JonChesterfield Differential Revision: https://reviews.llvm.org/D81054
2020-08-19 23:12:02 -04:00
DeviceTy::~DeviceTy() {
if (DeviceID == -1 || !(getInfoLevel() & OMP_INFOTYPE_DUMP_TABLE))
return;
ident_t Loc = {0, 0, 0, 0, ";libomptarget;libomptarget;0;0;;"};
dumpTargetPointerMappings(&Loc, *this);
}
[OpenMP] Introduce target memory manager Target memory manager is introduced in this patch which aims to manage target memory such that they will not be freed immediately when they are not used because the overhead of memory allocation and free is very large. For CUDA device, cuMemFree even blocks the context switch on device which affects concurrent kernel execution. The memory manager can be taken as a memory pool. It divides the pool into multiple buckets according to the size such that memory allocation/free distributed to different buckets will not affect each other. In this version, we use the exact-equality policy to find a free buffer. This is an open question: will best-fit work better here? IMO, best-fit is not good for target memory management because computation on GPU usually requires GBs of data. Best-fit might lead to a serious waste. For example, there is a free buffer of size 1960MB, and now we need a buffer of size 1200MB. If best-fit, the free buffer will be returned, leading to a 760MB waste. The allocation will happen when there is no free memory left, and the memory free on device will take place in the following two cases: 1. The program ends. Obviously. However, there is a little problem that plugin library is destroyed before the memory manager is destroyed, leading to a fact that the call to target plugin will not succeed. 2. Device is out of memory when we request a new memory. The manager will walk through all free buffers from the bucket with largest base size, pick up one buffer, free it, and try to allocate immediately. If it succeeds, it will return right away rather than freeing all buffers in free list. Update: A threshold (8KB by default) is set such that users could control what size of memory will be managed by the manager. It can also be configured by an environment variable `LIBOMPTARGET_MEMORY_MANAGER_THRESHOLD`. Reviewed By: jdoerfert, ye-luo, JonChesterfield Differential Revision: https://reviews.llvm.org/D81054
2020-08-19 23:12:02 -04:00
llvm::Error DeviceTy::init() {
int32_t Ret = RTL->init_device(RTLDeviceID);
if (Ret != OFFLOAD_SUCCESS)
return error::createOffloadError(error::ErrorCode::BACKEND_FAILURE,
"failed to initialize device %d\n",
DeviceID);
[OpenMP] Introduce target memory manager Target memory manager is introduced in this patch which aims to manage target memory such that they will not be freed immediately when they are not used because the overhead of memory allocation and free is very large. For CUDA device, cuMemFree even blocks the context switch on device which affects concurrent kernel execution. The memory manager can be taken as a memory pool. It divides the pool into multiple buckets according to the size such that memory allocation/free distributed to different buckets will not affect each other. In this version, we use the exact-equality policy to find a free buffer. This is an open question: will best-fit work better here? IMO, best-fit is not good for target memory management because computation on GPU usually requires GBs of data. Best-fit might lead to a serious waste. For example, there is a free buffer of size 1960MB, and now we need a buffer of size 1200MB. If best-fit, the free buffer will be returned, leading to a 760MB waste. The allocation will happen when there is no free memory left, and the memory free on device will take place in the following two cases: 1. The program ends. Obviously. However, there is a little problem that plugin library is destroyed before the memory manager is destroyed, leading to a fact that the call to target plugin will not succeed. 2. Device is out of memory when we request a new memory. The manager will walk through all free buffers from the bucket with largest base size, pick up one buffer, free it, and try to allocate immediately. If it succeeds, it will return right away rather than freeing all buffers in free list. Update: A threshold (8KB by default) is set such that users could control what size of memory will be managed by the manager. It can also be configured by an environment variable `LIBOMPTARGET_MEMORY_MANAGER_THRESHOLD`. Reviewed By: jdoerfert, ye-luo, JonChesterfield Differential Revision: https://reviews.llvm.org/D81054
2020-08-19 23:12:02 -04:00
// Enables recording kernels if set.
BoolEnvar OMPX_RecordKernel("LIBOMPTARGET_RECORD", false);
if (OMPX_RecordKernel) {
// Enables saving the device memory kernel output post execution if set.
BoolEnvar OMPX_ReplaySaveOutput("LIBOMPTARGET_RR_SAVE_OUTPUT", false);
uint64_t ReqPtrArgOffset;
RTL->initialize_record_replay(RTLDeviceID, 0, nullptr, true,
OMPX_ReplaySaveOutput, ReqPtrArgOffset);
}
return llvm::Error::success();
}
// Extract the mapping of host function pointers to device function pointers
// from the entry table. Functions marked as 'indirect' in OpenMP will have
// offloading entries generated for them which map the host's function pointer
// to a global containing the corresponding function pointer on the device.
static llvm::Expected<std::pair<void *, uint64_t>>
setupIndirectCallTable(DeviceTy &Device, __tgt_device_image *Image,
__tgt_device_binary Binary) {
AsyncInfoTy AsyncInfo(Device);
llvm::ArrayRef<llvm::offloading::EntryTy> Entries(Image->EntriesBegin,
Image->EntriesEnd);
llvm::SmallVector<std::pair<void *, void *>> IndirectCallTable;
for (const auto &Entry : Entries) {
if (Entry.Kind != llvm::object::OffloadKind::OFK_OpenMP ||
2025-11-26 11:33:26 -06:00
Entry.Size == 0 ||
(!(Entry.Flags & OMP_DECLARE_TARGET_INDIRECT) &&
!(Entry.Flags & OMP_DECLARE_TARGET_INDIRECT_VTABLE)))
continue;
2025-11-26 11:33:26 -06:00
size_t PtrSize = sizeof(void *);
if (Entry.Flags & OMP_DECLARE_TARGET_INDIRECT_VTABLE) {
// This is a VTable entry, the current entry is the first index of the
// VTable and Entry.Size is the total size of the VTable. Unlike the
// indirect function case below, the Global is not of size Entry.Size and
// is instead of size PtrSize (sizeof(void*)).
void *Vtable;
void *res;
if (Device.RTL->get_global(Binary, PtrSize, Entry.SymbolName, &Vtable))
return error::createOffloadError(error::ErrorCode::INVALID_BINARY,
"failed to load %s", Entry.SymbolName);
// HstPtr = Entry.Address;
if (Device.retrieveData(&res, Vtable, PtrSize, AsyncInfo))
return error::createOffloadError(error::ErrorCode::INVALID_BINARY,
"failed to load %s", Entry.SymbolName);
if (Device.synchronize(AsyncInfo))
return error::createOffloadError(
error::ErrorCode::INVALID_BINARY,
"failed to synchronize after retrieving %s", Entry.SymbolName);
// Calculate and emplace entire Vtable from first Vtable byte
for (uint64_t i = 0; i < Entry.Size / PtrSize; ++i) {
auto &[HstPtr, DevPtr] = IndirectCallTable.emplace_back();
HstPtr = reinterpret_cast<void *>(
reinterpret_cast<uintptr_t>(Entry.Address) + i * PtrSize);
DevPtr = reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(res) +
i * PtrSize);
}
} else {
// Indirect function case: Entry.Size should equal PtrSize since we're
// dealing with a single function pointer (not a VTable)
assert(Entry.Size == PtrSize && "Global not a function pointer?");
auto &[HstPtr, DevPtr] = IndirectCallTable.emplace_back();
void *Ptr;
if (Device.RTL->get_global(Binary, Entry.Size, Entry.SymbolName, &Ptr))
return error::createOffloadError(error::ErrorCode::INVALID_BINARY,
"failed to load %s", Entry.SymbolName);
HstPtr = Entry.Address;
if (Device.retrieveData(&DevPtr, Ptr, Entry.Size, AsyncInfo))
return error::createOffloadError(error::ErrorCode::INVALID_BINARY,
"failed to load %s", Entry.SymbolName);
}
if (Device.synchronize(AsyncInfo))
return error::createOffloadError(
error::ErrorCode::INVALID_BINARY,
"failed to synchronize after retrieving %s", Entry.SymbolName);
}
// If we do not have any indirect globals we exit early.
if (IndirectCallTable.empty())
return std::pair{nullptr, 0};
// Sort the array to allow for more efficient lookup of device pointers.
llvm::sort(IndirectCallTable,
[](const auto &x, const auto &y) { return x.first < y.first; });
uint64_t TableSize =
IndirectCallTable.size() * sizeof(std::pair<void *, void *>);
void *DevicePtr = Device.allocData(TableSize, nullptr, TARGET_ALLOC_DEVICE);
if (Device.submitData(DevicePtr, IndirectCallTable.data(), TableSize,
AsyncInfo))
return error::createOffloadError(error::ErrorCode::INVALID_BINARY,
"failed to copy data");
return std::pair<void *, uint64_t>(DevicePtr, IndirectCallTable.size());
}
// Load binary to device and perform global initialization if needed.
llvm::Expected<__tgt_device_binary>
DeviceTy::loadBinary(__tgt_device_image *Img) {
__tgt_device_binary Binary;
if (RTL->load_binary(RTLDeviceID, Img, &Binary) != OFFLOAD_SUCCESS)
return error::createOffloadError(error::ErrorCode::INVALID_BINARY,
"failed to load binary %p", Img);
// This symbol is optional.
void *DeviceEnvironmentPtr;
if (RTL->get_global(Binary, sizeof(DeviceEnvironmentTy),
"__omp_rtl_device_environment", &DeviceEnvironmentPtr))
return Binary;
// Obtain a table mapping host function pointers to device function pointers.
auto CallTablePairOrErr = setupIndirectCallTable(*this, Img, Binary);
if (!CallTablePairOrErr)
return CallTablePairOrErr.takeError();
GenericDeviceTy &GenericDevice = RTL->getDevice(RTLDeviceID);
DeviceEnvironmentTy DeviceEnvironment;
DeviceEnvironment.DeviceDebugKind = GenericDevice.getDebugKind();
DeviceEnvironment.NumDevices = RTL->getNumDevices();
// TODO: The device ID used here is not the real device ID used by OpenMP.
DeviceEnvironment.DeviceNum = RTLDeviceID;
DeviceEnvironment.DynamicMemSize = GenericDevice.getDynamicMemorySize();
DeviceEnvironment.ClockFrequency = GenericDevice.getClockFrequency();
DeviceEnvironment.IndirectCallTable =
reinterpret_cast<uintptr_t>(CallTablePairOrErr->first);
DeviceEnvironment.IndirectCallTableSize = CallTablePairOrErr->second;
DeviceEnvironment.HardwareParallelism =
GenericDevice.getHardwareParallelism();
AsyncInfoTy AsyncInfo(*this);
if (submitData(DeviceEnvironmentPtr, &DeviceEnvironment,
sizeof(DeviceEnvironment), AsyncInfo))
return error::createOffloadError(error::ErrorCode::INVALID_BINARY,
"failed to copy data");
return Binary;
}
void *DeviceTy::allocData(int64_t Size, void *HstPtr, int32_t Kind) {
/// RAII to establish tool anchors before and after data allocation
void *TargetPtr = nullptr;
OMPT_IF_BUILT(InterfaceRAII TargetDataAllocRAII(
RegionInterface.getCallbacks<ompt_target_data_alloc>(),
DeviceID, HstPtr, &TargetPtr, Size,
/*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
TargetPtr = RTL->data_alloc(RTLDeviceID, Size, HstPtr, Kind);
return TargetPtr;
}
int32_t DeviceTy::deleteData(void *TgtAllocBegin, int32_t Kind) {
/// RAII to establish tool anchors before and after data deletion
OMPT_IF_BUILT(InterfaceRAII TargetDataDeleteRAII(
RegionInterface.getCallbacks<ompt_target_data_delete>(),
DeviceID, TgtAllocBegin,
/*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
return RTL->data_delete(RTLDeviceID, TgtAllocBegin, Kind);
}
// Submit data to device
int32_t DeviceTy::submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size,
AsyncInfoTy &AsyncInfo, HostDataToTargetTy *Entry,
MappingInfoTy::HDTTMapAccessorTy *HDTTMapPtr) {
if (getInfoLevel() & OMP_INFOTYPE_DATA_TRANSFER)
MappingInfo.printCopyInfo(TgtPtrBegin, HstPtrBegin, Size, /*H2D=*/true,
Entry, HDTTMapPtr);
/// RAII to establish tool anchors before and after data submit
OMPT_IF_BUILT(
InterfaceRAII TargetDataSubmitRAII(
RegionInterface.getCallbacks<ompt_target_data_transfer_to_device>(),
omp_get_initial_device(), HstPtrBegin, DeviceID, TgtPtrBegin, Size,
/*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
return RTL->data_submit_async(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size,
AsyncInfo);
}
// Retrieve data from device
int32_t DeviceTy::retrieveData(void *HstPtrBegin, void *TgtPtrBegin,
int64_t Size, AsyncInfoTy &AsyncInfo,
HostDataToTargetTy *Entry,
MappingInfoTy::HDTTMapAccessorTy *HDTTMapPtr) {
if (getInfoLevel() & OMP_INFOTYPE_DATA_TRANSFER)
MappingInfo.printCopyInfo(TgtPtrBegin, HstPtrBegin, Size, /*H2D=*/false,
Entry, HDTTMapPtr);
/// RAII to establish tool anchors before and after data retrieval
OMPT_IF_BUILT(
InterfaceRAII TargetDataRetrieveRAII(
RegionInterface.getCallbacks<ompt_target_data_transfer_from_device>(),
DeviceID, TgtPtrBegin, omp_get_initial_device(), HstPtrBegin, Size,
/*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
return RTL->data_retrieve_async(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size,
AsyncInfo);
}
// Copy data from current device to destination device directly
int32_t DeviceTy::dataExchange(void *SrcPtr, DeviceTy &DstDev, void *DstPtr,
int64_t Size, AsyncInfoTy &AsyncInfo) {
/// RAII to establish tool anchors before and after data exchange
/// Note: Despite the fact that this is a data exchange, we use 'from_device'
/// operation enum (w.r.t. ompt_target_data_op_t) as there is currently
/// no better alternative. It is still possible to distinguish this
/// scenario from a real data retrieve by checking if both involved
/// device numbers are less than omp_get_num_devices().
OMPT_IF_BUILT(
InterfaceRAII TargetDataExchangeRAII(
RegionInterface.getCallbacks<ompt_target_data_transfer_from_device>(),
RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, DstPtr, Size,
/*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
if (!AsyncInfo) {
return RTL->data_exchange(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, DstPtr,
Size);
}
return RTL->data_exchange_async(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID,
DstPtr, Size, AsyncInfo);
}
[Offload] Introduce ATTACH map-type support for pointer attachment. (#149036) This patch introduces libomptarget support for the ATTACH map-type, which can be used to implement OpenMP conditional compliant pointer attachment, based on whether the pointer/pointee is newly mapped on a given construct. For example, for the following: ```c int *p; #pragma omp target enter data map(p[1:10]) ``` The following maps can be emitted by clang: ``` (A) &p[0], &p[1], 10 * sizeof(p[1]), TO | FROM &p, &p[1], sizeof(p), ATTACH ``` Without this map-type, these two possible maps could be emitted by clang: ``` (B) &p[0], &p[1], 10 * sizeof(p[1]), TO | FROM (C) &p, &p[1], 10 * sizeof(p[1]), TO | FROM | PTR_AND_OBJ ```` (B) does not perform any pointer attachment, while (C) also maps the pointer p, which are both incorrect. In terms of implementation, maps with the ATTACH map-type are handled after all other maps have been processed, as it requires knowledge of which new allocations happened as part of the construct. As per OpenMP 5.0, an attachment should happen only when either the pointer or the pointee was newly mapped while handling the construct. Maps with ATTACH map-type-bit do not increase/decrease the ref-count. With OpenMP 6.1, `attach(always/never)` can be used to force/prevent attachment. For `attach(always)`, the compiler will insert the ALWAYS map-type, which would let libomptarget bypass the check about one of the pointer/pointee being new. With `attach(never)`, the ATTACH map will not be emitted at all. The size argument of the ATTACH map-type can specify values greater than `sizeof(void*)` which can be used to support pointer attachment on Fortran descriptors. Note that this also requires shadow-pointer tracking to also support them. That has not been implemented in this patch. This was worked upon in coordination with Ravi Narayanaswamy, who has since retired. Happy retirement, Ravi! --------- Co-authored-by: Alex Duran <alejandro.duran@intel.com>
2025-08-17 15:17:04 -07:00
int32_t DeviceTy::dataFence(AsyncInfoTy &AsyncInfo) {
return RTL->data_fence(RTLDeviceID, AsyncInfo);
}
int32_t DeviceTy::notifyDataMapped(void *HstPtr, int64_t Size) {
ODBG(ODT_Mapping) << "Notifying about new mapping: HstPtr=" << HstPtr
<< ", Size=" << Size;
if (RTL->data_notify_mapped(RTLDeviceID, HstPtr, Size)) {
REPORT() << "Notifying about data mapping failed.";
return OFFLOAD_FAIL;
}
return OFFLOAD_SUCCESS;
}
int32_t DeviceTy::notifyDataUnmapped(void *HstPtr) {
ODBG(ODT_Mapping) << "Notifying about an unmapping: HstPtr=" << HstPtr;
if (RTL->data_notify_unmapped(RTLDeviceID, HstPtr)) {
REPORT() << "Notifying about data unmapping failed.";
return OFFLOAD_FAIL;
}
return OFFLOAD_SUCCESS;
}
// Run region on device
int32_t DeviceTy::launchKernel(void *TgtEntryPtr, void **TgtVarsPtr,
ptrdiff_t *TgtOffsets, KernelArgsTy &KernelArgs,
AsyncInfoTy &AsyncInfo) {
return RTL->launch_kernel(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets,
&KernelArgs, AsyncInfo);
}
// Run region on device
bool DeviceTy::printDeviceInfo() {
RTL->print_device_info(RTLDeviceID);
return true;
}
// Whether data can be copied to DstDevice directly
bool DeviceTy::isDataExchangable(const DeviceTy &DstDevice) {
if (RTL != DstDevice.RTL)
return false;
if (RTL->is_data_exchangable(RTLDeviceID, DstDevice.RTLDeviceID))
return true;
return false;
}
int32_t DeviceTy::synchronize(AsyncInfoTy &AsyncInfo) {
return RTL->synchronize(RTLDeviceID, AsyncInfo);
}
int32_t DeviceTy::queryAsync(AsyncInfoTy &AsyncInfo) {
return RTL->query_async(RTLDeviceID, AsyncInfo);
}
int32_t DeviceTy::createEvent(void **Event) {
return RTL->create_event(RTLDeviceID, Event);
}
int32_t DeviceTy::recordEvent(void *Event, AsyncInfoTy &AsyncInfo) {
return RTL->record_event(RTLDeviceID, Event, AsyncInfo);
}
int32_t DeviceTy::waitEvent(void *Event, AsyncInfoTy &AsyncInfo) {
return RTL->wait_event(RTLDeviceID, Event, AsyncInfo);
}
int32_t DeviceTy::syncEvent(void *Event) {
return RTL->sync_event(RTLDeviceID, Event);
}
int32_t DeviceTy::destroyEvent(void *Event) {
return RTL->destroy_event(RTLDeviceID, Event);
}
void DeviceTy::dumpOffloadEntries() {
fprintf(stderr, "Device %i offload entries:\n", DeviceID);
for (auto &It : *DeviceOffloadEntries.getExclusiveAccessor()) {
const char *Kind = "kernel";
if (It.second.isLink())
Kind = "link";
else if (It.second.isGlobal())
Kind = "global var.";
fprintf(stderr, " %11s: %s\n", Kind, It.second.getNameAsCStr());
}
}
bool DeviceTy::useAutoZeroCopy() {
if (PM->getRequirements() & OMP_REQ_UNIFIED_SHARED_MEMORY)
return false;
return RTL->use_auto_zero_copy(RTLDeviceID);
}
bool DeviceTy::isAccessiblePtr(const void *Ptr, size_t Size) {
return RTL->is_accessible_ptr(RTLDeviceID, Ptr, Size);
}