[openmp][amdgpu] Implement target_alloc_host as fine grain HSA memory

The cuda plugin maps TARGET_ALLOC_HOST onto cuMemAllocHost
which is page locked host memory. Fine grain HSA memory is not
necessarily page locked but has the same read/write from host or
device semantics.

The cuda plugin does this per-gpu and this patch makes it accessible
from any gpu, but it can be locked down to match the cuda behaviour
if preferred.

Enabling tests requires an equivalent to
// RUN: %libomptarget-compile-run-and-check-nvptx64-nvidia-cuda
for amdgpu which doesn't seem to be in use yet.

Reviewed By: jhuber6

Differential Revision: https://reviews.llvm.org/D132660
This commit is contained in:
Jon Chesterfield
2022-08-25 16:27:51 +01:00
parent 34fe6ddce1
commit ffabe997a5
3 changed files with 14 additions and 6 deletions

View File

@@ -2620,13 +2620,22 @@ void *__tgt_rtl_data_alloc(int DeviceId, int64_t Size, void *, int32_t Kind) {
void *Ptr = NULL;
assert(DeviceId < DeviceInfo().NumberOfDevices && "Device ID too large");
if (Kind != TARGET_ALLOC_DEFAULT) {
hsa_amd_memory_pool_t MemoryPool;
switch (Kind) {
case TARGET_ALLOC_DEFAULT:
// GPU memory
MemoryPool = DeviceInfo().getDeviceMemoryPool(DeviceId);
break;
case TARGET_ALLOC_HOST:
// non-migratable memory accessible by host and device(s)
MemoryPool = DeviceInfo().getHostMemoryPool();
break;
default:
REPORT("Invalid target data allocation kind or requested allocator not "
"implemented yet\n");
return NULL;
}
hsa_amd_memory_pool_t MemoryPool = DeviceInfo().getDeviceMemoryPool(DeviceId);
hsa_status_t Err = hsa_amd_memory_pool_allocate(MemoryPool, Size, 0, &Ptr);
DP("Tgt alloc data %ld bytes, (tgt:%016llx).\n", Size,
(long long unsigned)(Elf64_Addr)Ptr);
@@ -2677,6 +2686,7 @@ int32_t __tgt_rtl_data_retrieve_async(int DeviceId, void *HstPtr, void *TgtPtr,
int32_t __tgt_rtl_data_delete(int DeviceId, void *TgtPtr) {
assert(DeviceId < DeviceInfo().NumberOfDevices && "Device ID too large");
// HSA can free pointers allocated from different types of memory pool.
hsa_status_t Err;
DP("Tgt free data (tgt:%016llx).\n", (long long unsigned)(Elf64_Addr)TgtPtr);
Err = core::Runtime::Memfree(TgtPtr);

View File

@@ -1,5 +1,4 @@
// RUN: %libomptarget-compile-run-and-check-nvptx64-nvidia-cuda
// REQUIRES: nvptx64-nvidia-cuda
// RUN: %libomptarget-compile-run-and-check-generic
#include <omp.h>
#include <stdio.h>

View File

@@ -1,5 +1,4 @@
// RUN: %libomptarget-compile-run-and-check-nvptx64-nvidia-cuda
// REQUIRES: nvptx64-nvidia-cuda
// RUN: %libomptarget-compile-run-and-check-generic
#include <omp.h>
#include <stdio.h>