mirror of
https://github.com/intel/llvm.git
synced 2026-01-26 12:26:52 +08:00
[openmp][amdgpu] Implement target_alloc_host as fine grain HSA memory
The cuda plugin maps TARGET_ALLOC_HOST onto cuMemAllocHost which is page locked host memory. Fine grain HSA memory is not necessarily page locked but has the same read/write from host or device semantics. The cuda plugin does this per-gpu and this patch makes it accessible from any gpu, but it can be locked down to match the cuda behaviour if preferred. Enabling tests requires an equivalent to // RUN: %libomptarget-compile-run-and-check-nvptx64-nvidia-cuda for amdgpu which doesn't seem to be in use yet. Reviewed By: jhuber6 Differential Revision: https://reviews.llvm.org/D132660
This commit is contained in:
@@ -2620,13 +2620,22 @@ void *__tgt_rtl_data_alloc(int DeviceId, int64_t Size, void *, int32_t Kind) {
|
||||
void *Ptr = NULL;
|
||||
assert(DeviceId < DeviceInfo().NumberOfDevices && "Device ID too large");
|
||||
|
||||
if (Kind != TARGET_ALLOC_DEFAULT) {
|
||||
hsa_amd_memory_pool_t MemoryPool;
|
||||
switch (Kind) {
|
||||
case TARGET_ALLOC_DEFAULT:
|
||||
// GPU memory
|
||||
MemoryPool = DeviceInfo().getDeviceMemoryPool(DeviceId);
|
||||
break;
|
||||
case TARGET_ALLOC_HOST:
|
||||
// non-migratable memory accessible by host and device(s)
|
||||
MemoryPool = DeviceInfo().getHostMemoryPool();
|
||||
break;
|
||||
default:
|
||||
REPORT("Invalid target data allocation kind or requested allocator not "
|
||||
"implemented yet\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
hsa_amd_memory_pool_t MemoryPool = DeviceInfo().getDeviceMemoryPool(DeviceId);
|
||||
hsa_status_t Err = hsa_amd_memory_pool_allocate(MemoryPool, Size, 0, &Ptr);
|
||||
DP("Tgt alloc data %ld bytes, (tgt:%016llx).\n", Size,
|
||||
(long long unsigned)(Elf64_Addr)Ptr);
|
||||
@@ -2677,6 +2686,7 @@ int32_t __tgt_rtl_data_retrieve_async(int DeviceId, void *HstPtr, void *TgtPtr,
|
||||
|
||||
int32_t __tgt_rtl_data_delete(int DeviceId, void *TgtPtr) {
|
||||
assert(DeviceId < DeviceInfo().NumberOfDevices && "Device ID too large");
|
||||
// HSA can free pointers allocated from different types of memory pool.
|
||||
hsa_status_t Err;
|
||||
DP("Tgt free data (tgt:%016llx).\n", (long long unsigned)(Elf64_Addr)TgtPtr);
|
||||
Err = core::Runtime::Memfree(TgtPtr);
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
// RUN: %libomptarget-compile-run-and-check-nvptx64-nvidia-cuda
|
||||
// REQUIRES: nvptx64-nvidia-cuda
|
||||
// RUN: %libomptarget-compile-run-and-check-generic
|
||||
|
||||
#include <omp.h>
|
||||
#include <stdio.h>
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
// RUN: %libomptarget-compile-run-and-check-nvptx64-nvidia-cuda
|
||||
// REQUIRES: nvptx64-nvidia-cuda
|
||||
// RUN: %libomptarget-compile-run-and-check-generic
|
||||
|
||||
#include <omp.h>
|
||||
#include <stdio.h>
|
||||
|
||||
Reference in New Issue
Block a user