[openmp][amdgpu] Implement target_alloc_host as fine grain HSA memory

The cuda plugin maps TARGET_ALLOC_HOST onto cuMemAllocHost which is page locked host memory. Fine grain HSA memory is not necessarily page locked but has the same read/write from host or device semantics. The cuda plugin does this per-gpu and this patch makes it accessible from any gpu, but it can be locked down to match the cuda behaviour if preferred. Enabling tests requires an equivalent to // RUN: %libomptarget-compile-run-and-check-nvptx64-nvidia-cuda for amdgpu which doesn't seem to be in use yet. Reviewed By: jhuber6 Differential Revision: https://reviews.llvm.org/D132660
2026-01-26 12:26:52 +08:00 · 2022-08-25 16:27:51 +01:00
parent 34fe6ddce1
commit ffabe997a5
3 changed files with 14 additions and 6 deletions
--- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
@@ -2620,13 +2620,22 @@ void *__tgt_rtl_data_alloc(int DeviceId, int64_t Size, void *, int32_t Kind) {
  void *Ptr = NULL;
  assert(DeviceId < DeviceInfo().NumberOfDevices && "Device ID too large");

-  if (Kind != TARGET_ALLOC_DEFAULT) {
+  hsa_amd_memory_pool_t MemoryPool;
+  switch (Kind) {
+  case TARGET_ALLOC_DEFAULT:
+    // GPU memory
+    MemoryPool = DeviceInfo().getDeviceMemoryPool(DeviceId);
+    break;
+  case TARGET_ALLOC_HOST:
+    // non-migratable memory accessible by host and device(s)
+    MemoryPool = DeviceInfo().getHostMemoryPool();
+    break;
+  default:
    REPORT("Invalid target data allocation kind or requested allocator not "
           "implemented yet\n");
    return NULL;
  }

-  hsa_amd_memory_pool_t MemoryPool = DeviceInfo().getDeviceMemoryPool(DeviceId);
  hsa_status_t Err = hsa_amd_memory_pool_allocate(MemoryPool, Size, 0, &Ptr);
  DP("Tgt alloc data %ld bytes, (tgt:%016llx).\n", Size,
     (long long unsigned)(Elf64_Addr)Ptr);
@@ -2677,6 +2686,7 @@ int32_t __tgt_rtl_data_retrieve_async(int DeviceId, void *HstPtr, void *TgtPtr,

 int32_t __tgt_rtl_data_delete(int DeviceId, void *TgtPtr) {
  assert(DeviceId < DeviceInfo().NumberOfDevices && "Device ID too large");
+  // HSA can free pointers allocated from different types of memory pool.
  hsa_status_t Err;
  DP("Tgt free data (tgt:%016llx).\n", (long long unsigned)(Elf64_Addr)TgtPtr);
  Err = core::Runtime::Memfree(TgtPtr);
--- a/openmp/libomptarget/test/api/omp_host_pinned_memory.c
+++ b/openmp/libomptarget/test/api/omp_host_pinned_memory.c
@@ -1,5 +1,4 @@
-// RUN: %libomptarget-compile-run-and-check-nvptx64-nvidia-cuda
-// REQUIRES: nvptx64-nvidia-cuda
+// RUN: %libomptarget-compile-run-and-check-generic

 #include <omp.h>
 #include <stdio.h>
--- a/openmp/libomptarget/test/api/omp_host_pinned_memory_alloc.c
+++ b/openmp/libomptarget/test/api/omp_host_pinned_memory_alloc.c
@@ -1,5 +1,4 @@
-// RUN: %libomptarget-compile-run-and-check-nvptx64-nvidia-cuda
-// REQUIRES: nvptx64-nvidia-cuda
+// RUN: %libomptarget-compile-run-and-check-generic

 #include <omp.h>
 #include <stdio.h>