mirror of
https://github.com/intel/llvm.git
synced 2026-01-14 03:50:17 +08:00
[libomptarget] Add support for target update non-contiguous
This patch is the runtime support for https://reviews.llvm.org/D84192. In order not to modify the tgt_target_data_update information but still be able to pass the extra information for non-contiguous map item (offset, count, and stride for each dimension), this patch overload arg when the maptype is set as OMP_TGT_MAPTYPE_DESCRIPTOR. The origin arg is for passing the pointer information, however, the overloaded arg is an array of descriptor_dim: ``` struct descriptor_dim { int64_t offset; int64_t count; int64_t stride }; ``` and the array size is the dimension size. In addition, since we have count and stride information in descriptor_dim, we can replace/overload the arg_size parameter by using dimension size. Reviewed By: grokos, tianshilei1992 Differential Revision: https://reviews.llvm.org/D82245
This commit is contained in:
@@ -52,6 +52,8 @@ enum tgt_map_type {
|
||||
OMP_TGT_MAPTYPE_CLOSE = 0x400,
|
||||
// runtime error if not already allocated
|
||||
OMP_TGT_MAPTYPE_PRESENT = 0x1000,
|
||||
// descriptor for non-contiguous target-update
|
||||
OMP_TGT_MAPTYPE_NON_CONTIG = 0x100000000000,
|
||||
// member of struct, member given by [16 MSBs] - 1
|
||||
OMP_TGT_MAPTYPE_MEMBER_OF = 0xffff000000000000
|
||||
};
|
||||
@@ -123,6 +125,13 @@ struct __tgt_async_info {
|
||||
void *Queue = nullptr;
|
||||
};
|
||||
|
||||
/// This struct is a record of non-contiguous information
|
||||
struct __tgt_target_non_contig {
|
||||
uint64_t Offset;
|
||||
uint64_t Count;
|
||||
uint64_t Stride;
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
@@ -277,7 +277,7 @@ void *DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase,
|
||||
return rc;
|
||||
}
|
||||
|
||||
// Used by targetDataBegin, targetDataEnd, target_data_update and target.
|
||||
// Used by targetDataBegin, targetDataEnd, targetDataUpdate and target.
|
||||
// Return the target pointer begin (where the data will be moved).
|
||||
// Decrement the reference counter if called from targetDataEnd.
|
||||
void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,
|
||||
|
||||
@@ -281,8 +281,8 @@ EXTERN void __tgt_target_data_update_mapper(ident_t *loc, int64_t device_id,
|
||||
}
|
||||
|
||||
DeviceTy &Device = PM->Devices[device_id];
|
||||
int rc = target_data_update(Device, arg_num, args_base, args, arg_sizes,
|
||||
arg_types, arg_names, arg_mappers);
|
||||
int rc = targetDataUpdate(Device, arg_num, args_base, args, arg_sizes,
|
||||
arg_types, arg_names, arg_mappers);
|
||||
HandleTargetOutcome(rc == OFFLOAD_SUCCESS, loc);
|
||||
}
|
||||
|
||||
|
||||
@@ -639,33 +639,158 @@ int targetDataEnd(DeviceTy &Device, int32_t ArgNum, void **ArgBases,
|
||||
return OFFLOAD_SUCCESS;
|
||||
}
|
||||
|
||||
static int targetDataContiguous(DeviceTy &Device, void *ArgsBase,
|
||||
void *HstPtrBegin, int64_t ArgSize,
|
||||
int64_t ArgType) {
|
||||
bool IsLast, IsHostPtr;
|
||||
void *TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, ArgSize, IsLast, false,
|
||||
IsHostPtr, /*MustContain=*/true);
|
||||
if (!TgtPtrBegin) {
|
||||
DP("hst data:" DPxMOD " not found, becomes a noop\n", DPxPTR(HstPtrBegin));
|
||||
if (ArgType & OMP_TGT_MAPTYPE_PRESENT) {
|
||||
MESSAGE("device mapping required by 'present' motion modifier does not "
|
||||
"exist for host address " DPxMOD " (%" PRId64 " bytes)",
|
||||
DPxPTR(HstPtrBegin), ArgSize);
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
return OFFLOAD_SUCCESS;
|
||||
}
|
||||
|
||||
if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
|
||||
TgtPtrBegin == HstPtrBegin) {
|
||||
DP("hst data:" DPxMOD " unified and shared, becomes a noop\n",
|
||||
DPxPTR(HstPtrBegin));
|
||||
return OFFLOAD_SUCCESS;
|
||||
}
|
||||
|
||||
if (ArgType & OMP_TGT_MAPTYPE_FROM) {
|
||||
DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",
|
||||
ArgSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));
|
||||
int Ret = Device.retrieveData(HstPtrBegin, TgtPtrBegin, ArgSize, nullptr);
|
||||
if (Ret != OFFLOAD_SUCCESS) {
|
||||
REPORT("Copying data from device failed.\n");
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
|
||||
uintptr_t LB = (uintptr_t)HstPtrBegin;
|
||||
uintptr_t UB = (uintptr_t)HstPtrBegin + ArgSize;
|
||||
Device.ShadowMtx.lock();
|
||||
for (ShadowPtrListTy::iterator IT = Device.ShadowPtrMap.begin();
|
||||
IT != Device.ShadowPtrMap.end(); ++IT) {
|
||||
void **ShadowHstPtrAddr = (void **)IT->first;
|
||||
if ((uintptr_t)ShadowHstPtrAddr < LB)
|
||||
continue;
|
||||
if ((uintptr_t)ShadowHstPtrAddr >= UB)
|
||||
break;
|
||||
DP("Restoring original host pointer value " DPxMOD
|
||||
" for host pointer " DPxMOD "\n",
|
||||
DPxPTR(IT->second.HstPtrVal), DPxPTR(ShadowHstPtrAddr));
|
||||
*ShadowHstPtrAddr = IT->second.HstPtrVal;
|
||||
}
|
||||
Device.ShadowMtx.unlock();
|
||||
}
|
||||
|
||||
if (ArgType & OMP_TGT_MAPTYPE_TO) {
|
||||
DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n",
|
||||
ArgSize, DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin));
|
||||
int Ret = Device.submitData(TgtPtrBegin, HstPtrBegin, ArgSize, nullptr);
|
||||
if (Ret != OFFLOAD_SUCCESS) {
|
||||
REPORT("Copying data to device failed.\n");
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
|
||||
uintptr_t LB = (uintptr_t)HstPtrBegin;
|
||||
uintptr_t UB = (uintptr_t)HstPtrBegin + ArgSize;
|
||||
Device.ShadowMtx.lock();
|
||||
for (ShadowPtrListTy::iterator IT = Device.ShadowPtrMap.begin();
|
||||
IT != Device.ShadowPtrMap.end(); ++IT) {
|
||||
void **ShadowHstPtrAddr = (void **)IT->first;
|
||||
if ((uintptr_t)ShadowHstPtrAddr < LB)
|
||||
continue;
|
||||
if ((uintptr_t)ShadowHstPtrAddr >= UB)
|
||||
break;
|
||||
DP("Restoring original target pointer value " DPxMOD " for target "
|
||||
"pointer " DPxMOD "\n",
|
||||
DPxPTR(IT->second.TgtPtrVal), DPxPTR(IT->second.TgtPtrAddr));
|
||||
Ret = Device.submitData(IT->second.TgtPtrAddr, &IT->second.TgtPtrVal,
|
||||
sizeof(void *), nullptr);
|
||||
if (Ret != OFFLOAD_SUCCESS) {
|
||||
REPORT("Copying data to device failed.\n");
|
||||
Device.ShadowMtx.unlock();
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
}
|
||||
Device.ShadowMtx.unlock();
|
||||
}
|
||||
return OFFLOAD_SUCCESS;
|
||||
}
|
||||
|
||||
static int targetDataNonContiguous(DeviceTy &Device, void *ArgsBase,
|
||||
__tgt_target_non_contig *NonContig,
|
||||
uint64_t Size, int64_t ArgType,
|
||||
int CurrentDim, int DimSize,
|
||||
uint64_t Offset) {
|
||||
int Ret = OFFLOAD_SUCCESS;
|
||||
if (CurrentDim < DimSize) {
|
||||
for (unsigned int I = 0; I < NonContig[CurrentDim].Count; ++I) {
|
||||
uint64_t CurOffset =
|
||||
(NonContig[CurrentDim].Offset + I) * NonContig[CurrentDim].Stride;
|
||||
// we only need to transfer the first element for the last dimension
|
||||
// since we've already got a contiguous piece.
|
||||
if (CurrentDim != DimSize - 1 || I == 0) {
|
||||
Ret = targetDataNonContiguous(Device, ArgsBase, NonContig, Size,
|
||||
ArgType, CurrentDim + 1, DimSize,
|
||||
Offset + CurOffset);
|
||||
// Stop the whole process if any contiguous piece returns anything
|
||||
// other than OFFLOAD_SUCCESS.
|
||||
if (Ret != OFFLOAD_SUCCESS)
|
||||
return Ret;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
char *Ptr = (char *)ArgsBase + Offset;
|
||||
DP("Transfer of non-contiguous : host ptr %lx offset %ld len %ld\n",
|
||||
(uint64_t)Ptr, Offset, Size);
|
||||
Ret = targetDataContiguous(Device, ArgsBase, Ptr, Size, ArgType);
|
||||
}
|
||||
return Ret;
|
||||
}
|
||||
|
||||
static int getNonContigMergedDimension(__tgt_target_non_contig *NonContig,
|
||||
int32_t DimSize) {
|
||||
int RemovedDim = 0;
|
||||
for (int I = DimSize - 1; I > 0; --I) {
|
||||
if (NonContig[I].Count * NonContig[I].Stride == NonContig[I - 1].Stride)
|
||||
RemovedDim++;
|
||||
}
|
||||
return RemovedDim;
|
||||
}
|
||||
|
||||
/// Internal function to pass data to/from the target.
|
||||
// async_info_ptr is currently unused, added here so target_data_update has the
|
||||
// async_info_ptr is currently unused, added here so targetDataUpdate has the
|
||||
// same signature as targetDataBegin and targetDataEnd.
|
||||
int target_data_update(DeviceTy &Device, int32_t arg_num, void **args_base,
|
||||
void **args, int64_t *arg_sizes, int64_t *arg_types,
|
||||
map_var_info_t *arg_names, void **arg_mappers,
|
||||
__tgt_async_info *async_info_ptr) {
|
||||
int targetDataUpdate(DeviceTy &Device, int32_t ArgNum, void **ArgsBase,
|
||||
void **Args, int64_t *ArgSizes, int64_t *ArgTypes,
|
||||
map_var_info_t *ArgNames, void **ArgMappers,
|
||||
__tgt_async_info *AsyncInfoPtr) {
|
||||
// process each input.
|
||||
for (int32_t i = 0; i < arg_num; ++i) {
|
||||
if ((arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) ||
|
||||
(arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE))
|
||||
for (int32_t I = 0; I < ArgNum; ++I) {
|
||||
if ((ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL) ||
|
||||
(ArgTypes[I] & OMP_TGT_MAPTYPE_PRIVATE))
|
||||
continue;
|
||||
|
||||
if (arg_mappers && arg_mappers[i]) {
|
||||
// Instead of executing the regular path of target_data_update, call the
|
||||
// targetDataMapper variant which will call target_data_update again
|
||||
if (ArgMappers && ArgMappers[I]) {
|
||||
// Instead of executing the regular path of targetDataUpdate, call the
|
||||
// targetDataMapper variant which will call targetDataUpdate again
|
||||
// with new arguments.
|
||||
DP("Calling targetDataMapper for the %dth argument\n", i);
|
||||
DP("Calling targetDataMapper for the %dth argument\n", I);
|
||||
|
||||
int rc =
|
||||
targetDataMapper(Device, args_base[i], args[i], arg_sizes[i],
|
||||
arg_types[i], arg_mappers[i], target_data_update);
|
||||
int Ret = targetDataMapper(Device, ArgsBase[I], Args[I], ArgSizes[I],
|
||||
ArgTypes[I], ArgMappers[I], targetDataUpdate);
|
||||
|
||||
if (rc != OFFLOAD_SUCCESS) {
|
||||
REPORT(
|
||||
"Call to target_data_update via targetDataMapper for custom mapper"
|
||||
" failed.\n");
|
||||
if (Ret != OFFLOAD_SUCCESS) {
|
||||
REPORT("Call to targetDataUpdate via targetDataMapper for custom mapper"
|
||||
" failed.\n");
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
|
||||
@@ -673,88 +798,23 @@ int target_data_update(DeviceTy &Device, int32_t arg_num, void **args_base,
|
||||
continue;
|
||||
}
|
||||
|
||||
void *HstPtrBegin = args[i];
|
||||
int64_t MapSize = arg_sizes[i];
|
||||
bool IsLast, IsHostPtr;
|
||||
void *TgtPtrBegin = Device.getTgtPtrBegin(
|
||||
HstPtrBegin, MapSize, IsLast, false, IsHostPtr, /*MustContain=*/true);
|
||||
if (!TgtPtrBegin) {
|
||||
DP("hst data:" DPxMOD " not found, becomes a noop\n", DPxPTR(HstPtrBegin));
|
||||
if (arg_types[i] & OMP_TGT_MAPTYPE_PRESENT) {
|
||||
MESSAGE("device mapping required by 'present' motion modifier does not "
|
||||
"exist for host address " DPxMOD " (%" PRId64 " bytes)",
|
||||
DPxPTR(HstPtrBegin), MapSize);
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
|
||||
TgtPtrBegin == HstPtrBegin) {
|
||||
DP("hst data:" DPxMOD " unified and shared, becomes a noop\n",
|
||||
DPxPTR(HstPtrBegin));
|
||||
continue;
|
||||
}
|
||||
|
||||
if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) {
|
||||
DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",
|
||||
arg_sizes[i], DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));
|
||||
int rt = Device.retrieveData(HstPtrBegin, TgtPtrBegin, MapSize, nullptr);
|
||||
if (rt != OFFLOAD_SUCCESS) {
|
||||
REPORT("Copying data from device failed.\n");
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
|
||||
uintptr_t lb = (uintptr_t) HstPtrBegin;
|
||||
uintptr_t ub = (uintptr_t) HstPtrBegin + MapSize;
|
||||
Device.ShadowMtx.lock();
|
||||
for (ShadowPtrListTy::iterator it = Device.ShadowPtrMap.begin();
|
||||
it != Device.ShadowPtrMap.end(); ++it) {
|
||||
void **ShadowHstPtrAddr = (void**) it->first;
|
||||
if ((uintptr_t) ShadowHstPtrAddr < lb)
|
||||
continue;
|
||||
if ((uintptr_t) ShadowHstPtrAddr >= ub)
|
||||
break;
|
||||
DP("Restoring original host pointer value " DPxMOD " for host pointer "
|
||||
DPxMOD "\n", DPxPTR(it->second.HstPtrVal),
|
||||
DPxPTR(ShadowHstPtrAddr));
|
||||
*ShadowHstPtrAddr = it->second.HstPtrVal;
|
||||
}
|
||||
Device.ShadowMtx.unlock();
|
||||
}
|
||||
|
||||
if (arg_types[i] & OMP_TGT_MAPTYPE_TO) {
|
||||
DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n",
|
||||
arg_sizes[i], DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin));
|
||||
int rt = Device.submitData(TgtPtrBegin, HstPtrBegin, MapSize, nullptr);
|
||||
if (rt != OFFLOAD_SUCCESS) {
|
||||
REPORT("Copying data to device failed.\n");
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
|
||||
uintptr_t lb = (uintptr_t) HstPtrBegin;
|
||||
uintptr_t ub = (uintptr_t) HstPtrBegin + MapSize;
|
||||
Device.ShadowMtx.lock();
|
||||
for (ShadowPtrListTy::iterator it = Device.ShadowPtrMap.begin();
|
||||
it != Device.ShadowPtrMap.end(); ++it) {
|
||||
void **ShadowHstPtrAddr = (void **)it->first;
|
||||
if ((uintptr_t)ShadowHstPtrAddr < lb)
|
||||
continue;
|
||||
if ((uintptr_t)ShadowHstPtrAddr >= ub)
|
||||
break;
|
||||
DP("Restoring original target pointer value " DPxMOD " for target "
|
||||
"pointer " DPxMOD "\n",
|
||||
DPxPTR(it->second.TgtPtrVal), DPxPTR(it->second.TgtPtrAddr));
|
||||
rt = Device.submitData(it->second.TgtPtrAddr, &it->second.TgtPtrVal,
|
||||
sizeof(void *), nullptr);
|
||||
if (rt != OFFLOAD_SUCCESS) {
|
||||
REPORT("Copying data to device failed.\n");
|
||||
Device.ShadowMtx.unlock();
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
}
|
||||
Device.ShadowMtx.unlock();
|
||||
int Ret = OFFLOAD_SUCCESS;
|
||||
|
||||
if (ArgTypes[I] & OMP_TGT_MAPTYPE_NON_CONTIG) {
|
||||
__tgt_target_non_contig *NonContig = (__tgt_target_non_contig *)Args[I];
|
||||
int32_t DimSize = ArgSizes[I];
|
||||
uint64_t Size =
|
||||
NonContig[DimSize - 1].Count * NonContig[DimSize - 1].Stride;
|
||||
int32_t MergedDim = getNonContigMergedDimension(NonContig, DimSize);
|
||||
Ret = targetDataNonContiguous(
|
||||
Device, ArgsBase[I], NonContig, Size, ArgTypes[I],
|
||||
/*current_dim=*/0, DimSize - MergedDim, /*offset=*/0);
|
||||
} else {
|
||||
Ret = targetDataContiguous(Device, ArgsBase[I], Args[I], ArgSizes[I],
|
||||
ArgTypes[I]);
|
||||
}
|
||||
if (Ret == OFFLOAD_FAIL)
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
return OFFLOAD_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -28,11 +28,10 @@ extern int targetDataEnd(DeviceTy &Device, int32_t ArgNum, void **ArgBases,
|
||||
map_var_info_t *arg_names, void **ArgMappers,
|
||||
__tgt_async_info *AsyncInfo);
|
||||
|
||||
extern int target_data_update(DeviceTy &Device, int32_t arg_num,
|
||||
void **args_base, void **args, int64_t *arg_sizes,
|
||||
int64_t *arg_types, map_var_info_t *arg_names,
|
||||
void **arg_mappers,
|
||||
__tgt_async_info *async_info_ptr = nullptr);
|
||||
extern int targetDataUpdate(DeviceTy &Device, int32_t arg_num, void **args_base,
|
||||
void **args, int64_t *arg_sizes, int64_t *arg_types,
|
||||
map_var_info_t *arg_names, void **arg_mappers,
|
||||
__tgt_async_info *async_info_ptr = nullptr);
|
||||
|
||||
extern int target(int64_t DeviceId, void *HostPtr, int32_t ArgNum,
|
||||
void **ArgBases, void **Args, int64_t *ArgSizes,
|
||||
@@ -68,7 +67,7 @@ struct MapperComponentsTy {
|
||||
typedef void (*MapperFuncPtrTy)(void *, void *, void *, int64_t, int64_t);
|
||||
|
||||
// Function pointer type for target_data_* functions (targetDataBegin,
|
||||
// targetDataEnd and target_data_update).
|
||||
// targetDataEnd and targetDataUpdate).
|
||||
typedef int (*TargetDataFuncPtrTy)(DeviceTy &, int32_t, void **, void **,
|
||||
int64_t *, int64_t *, map_var_info_t *,
|
||||
void **, __tgt_async_info *);
|
||||
|
||||
101
openmp/libomptarget/test/offloading/non_contiguous_update.cpp
Normal file
101
openmp/libomptarget/test/offloading/non_contiguous_update.cpp
Normal file
@@ -0,0 +1,101 @@
|
||||
// RUN: %libomptarget-compile-aarch64-unknown-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-aarch64-unknown-linux-gnu 2>&1 | %fcheck-aarch64-unknown-linux-gnu -allow-empty -check-prefix=DEBUG
|
||||
// RUN: %libomptarget-compile-powerpc64-ibm-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-powerpc64-ibm-linux-gnu 2>&1 | %fcheck-powerpc64-ibm-linux-gnu -allow-empty -check-prefix=DEBUG
|
||||
// RUN: %libomptarget-compile-powerpc64le-ibm-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-powerpc64le-ibm-linux-gnu 2>&1 | %fcheck-powerpc64le-ibm-linux-gnu -allow-empty -check-prefix=DEBUG
|
||||
// RUN: %libomptarget-compile-x86_64-pc-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-x86_64-pc-linux-gnu 2>&1 | %fcheck-x86_64-pc-linux-gnu -allow-empty -check-prefix=DEBUG
|
||||
// RUN: %libomptarget-compile-nvptx64-nvidia-cuda && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-nvptx64-nvidia-cuda 2>&1 | %fcheck-nvptx64-nvidia-cuda -allow-empty -check-prefix=DEBUG
|
||||
// REQUIRES: libomptarget-debug
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cassert>
|
||||
|
||||
// Data structure definitions copied from OpenMP RTL.
|
||||
struct __tgt_target_non_contig {
|
||||
int64_t offset;
|
||||
int64_t width;
|
||||
int64_t stride;
|
||||
};
|
||||
|
||||
enum tgt_map_type {
|
||||
OMP_TGT_MAPTYPE_NON_CONTIG = 0x100000000000
|
||||
};
|
||||
|
||||
// OpenMP RTL interfaces
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
|
||||
void **args_base, void **args, int64_t *arg_sizes,
|
||||
int64_t *arg_types);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
int main() {
|
||||
// case 1
|
||||
// int arr[3][4][5][6];
|
||||
// #pragma omp target update to(arr[0:2][1:3][1:2][:])
|
||||
// set up descriptor
|
||||
__tgt_target_non_contig non_contig[5] = {
|
||||
{0, 2, 480}, {1, 3, 120}, {1, 2, 24}, {0, 6, 4}, {0, 1, 4}};
|
||||
int64_t size = 4, type = OMP_TGT_MAPTYPE_NON_CONTIG;
|
||||
|
||||
void *base;
|
||||
void *begin = &non_contig;
|
||||
int64_t *sizes = &size;
|
||||
int64_t *types = &type;
|
||||
|
||||
// The below diagram is the visualization of the non-contiguous transfer after
|
||||
// optimization. Note that each element represent the innermost dimension
|
||||
// (unit size = 24) since the stride * count of last dimension is equal to the
|
||||
// stride of second last dimension.
|
||||
//
|
||||
// OOOOO OOOOO OOOOO
|
||||
// OXXOO OXXOO OOOOO
|
||||
// OXXOO OXXOO OOOOO
|
||||
// OXXOO OXXOO OOOOO
|
||||
__tgt_target_data_update(/*device_id*/ -1, /*arg_num*/ 1, &base, &begin,
|
||||
sizes, types);
|
||||
// DEBUG: offset 144
|
||||
// DEBUG: offset 264
|
||||
// DEBUG: offset 384
|
||||
// DEBUG: offset 624
|
||||
// DEBUG: offset 744
|
||||
// DEBUG: offset 864
|
||||
|
||||
|
||||
// case 2
|
||||
// double darr[3][4][5];
|
||||
// #pragma omp target update to(darr[0:2:2][2:2][:2:2])
|
||||
// set up descriptor
|
||||
__tgt_target_non_contig non_contig_2[4] = {
|
||||
{0, 2, 320}, {2, 2, 40}, {0, 2, 16}, {0, 1, 8}};
|
||||
int64_t size_2 = 4, type_2 = OMP_TGT_MAPTYPE_NON_CONTIG;
|
||||
|
||||
void *base_2;
|
||||
void *begin_2 = &non_contig_2;
|
||||
int64_t *sizes_2 = &size_2;
|
||||
int64_t *types_2 = &type_2;
|
||||
|
||||
// The below diagram is the visualization of the non-contiguous transfer after
|
||||
// optimization. Note that each element represent the innermost dimension
|
||||
// (unit size = 24) since the stride * count of last dimension is equal to the
|
||||
// stride of second last dimension.
|
||||
//
|
||||
// OOOOO OOOOO OOOOO
|
||||
// OOOOO OOOOO OOOOO
|
||||
// XOXOO OOOOO XOXOO
|
||||
// XOXOO OOOOO XOXOO
|
||||
__tgt_target_data_update(/*device_id*/ -1, /*arg_num*/ 1, &base_2, &begin_2,
|
||||
sizes_2, types_2);
|
||||
// DEBUG: offset 80
|
||||
// DEBUG: offset 96
|
||||
// DEBUG: offset 120
|
||||
// DEBUG: offset 136
|
||||
// DEBUG: offset 400
|
||||
// DEBUG: offset 416
|
||||
// DEBUG: offset 440
|
||||
// DEBUG: offset 456
|
||||
return 0;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user