[OpenMP][libomptarget] New map interface: remove translation code and ensure proper alignment of struct members

This patch removes the translation code since this functionality is now implemented in the compiler.
target_data_begin and target_data_end are also patched to handle some special cases that used to be
handled by the obsolete translation function, namely ensure proper alignment of struct members when
we have partially mapped structs. Mapping a struct from a higher address (i.e. not from its beginning)
can result in distortion of the alignment for some of its member fields. Padding restores the original
(proper) alignment.

Differential revision: https://reviews.llvm.org/D44186

llvm-svn: 337455
This commit is contained in:
George Rokos
2018-07-19 13:41:03 +00:00
parent a67eb91047
commit a0da24683b
3 changed files with 142 additions and 327 deletions

View File

@@ -48,7 +48,7 @@ enum tgt_map_type {
OMP_TGT_MAPTYPE_LITERAL = 0x100,
// mapping is implicit
OMP_TGT_MAPTYPE_IMPLICIT = 0x200,
// member of struct, member given by 16 MSBs - 1
// member of struct, member given by [16 MSBs] - 1
OMP_TGT_MAPTYPE_MEMBER_OF = 0xffff000000000000
};

View File

@@ -33,265 +33,36 @@ EXTERN void __tgt_unregister_lib(__tgt_bin_desc *desc) {
RTLs.UnregisterLib(desc);
}
// Following datatypes and functions (tgt_oldmap_type, combined_entry_t,
// translate_map, cleanup_map) will be removed once the compiler starts using
// the new map types.
// Old map types
enum tgt_oldmap_type {
OMP_TGT_OLDMAPTYPE_TO = 0x001, // copy data from host to device
OMP_TGT_OLDMAPTYPE_FROM = 0x002, // copy data from device to host
OMP_TGT_OLDMAPTYPE_ALWAYS = 0x004, // copy regardless of the ref. count
OMP_TGT_OLDMAPTYPE_DELETE = 0x008, // force unmapping of data
OMP_TGT_OLDMAPTYPE_MAP_PTR = 0x010, // map pointer as well as pointee
OMP_TGT_OLDMAPTYPE_FIRST_MAP = 0x020, // first occurrence of mapped variable
OMP_TGT_OLDMAPTYPE_RETURN_PTR = 0x040, // return TgtBase addr of mapped data
OMP_TGT_OLDMAPTYPE_PRIVATE_PTR = 0x080, // private variable - not mapped
OMP_TGT_OLDMAPTYPE_PRIVATE_VAL = 0x100 // copy by value - not mapped
};
// Temporary functions for map translation and cleanup
struct combined_entry_t {
int num_members; // number of members in combined entry
void *base_addr; // base address of combined entry
void *begin_addr; // begin address of combined entry
void *end_addr; // size of combined entry
};
static void translate_map(int32_t arg_num, void **args_base, void **args,
int64_t *arg_sizes, int64_t *arg_types, int32_t &new_arg_num,
void **&new_args_base, void **&new_args, int64_t *&new_arg_sizes,
int64_t *&new_arg_types, bool is_target_construct) {
if (arg_num <= 0) {
DP("Nothing to translate\n");
new_arg_num = 0;
return;
}
// array of combined entries
combined_entry_t *cmb_entries =
(combined_entry_t *) alloca(arg_num * sizeof(combined_entry_t));
// number of combined entries
long num_combined = 0;
// old entry is MAP_PTR?
bool *is_ptr_old = (bool *) alloca(arg_num * sizeof(bool));
// old entry is member of member_of[old] cmb_entry
int *member_of = (int *) alloca(arg_num * sizeof(int));
// temporary storage for modifications of the original arg_types
int64_t *mod_arg_types = (int64_t *) alloca(arg_num *sizeof(int64_t));
DP("Translating %d map entries\n", arg_num);
for (int i = 0; i < arg_num; ++i) {
member_of[i] = -1;
is_ptr_old[i] = false;
mod_arg_types[i] = arg_types[i];
// Scan previous entries to see whether this entry shares the same base
for (int j = 0; j < i; ++j) {
void *new_begin_addr = NULL;
void *new_end_addr = NULL;
if (mod_arg_types[i] & OMP_TGT_OLDMAPTYPE_MAP_PTR) {
if (args_base[i] == args[j]) {
if (!(mod_arg_types[j] & OMP_TGT_OLDMAPTYPE_MAP_PTR)) {
DP("Entry %d has the same base as entry %d's begin address\n", i,
j);
new_begin_addr = args_base[i];
new_end_addr = (char *)args_base[i] + sizeof(void *);
assert(arg_sizes[j] == sizeof(void *));
is_ptr_old[j] = true;
} else {
DP("Entry %d has the same base as entry %d's begin address, but "
"%d's base was a MAP_PTR too\n", i, j, j);
int32_t to_from_always_delete =
OMP_TGT_OLDMAPTYPE_TO | OMP_TGT_OLDMAPTYPE_FROM |
OMP_TGT_OLDMAPTYPE_ALWAYS | OMP_TGT_OLDMAPTYPE_DELETE;
if (mod_arg_types[j] & to_from_always_delete) {
DP("Resetting to/from/always/delete flags for entry %d because "
"it is only a pointer to pointer\n", j);
mod_arg_types[j] &= ~to_from_always_delete;
}
}
}
} else {
if (!(mod_arg_types[i] & OMP_TGT_OLDMAPTYPE_FIRST_MAP) &&
args_base[i] == args_base[j]) {
DP("Entry %d has the same base address as entry %d\n", i, j);
new_begin_addr = args[i];
new_end_addr = (char *)args[i] + arg_sizes[i];
}
}
// If we have combined the entry with a previous one
if (new_begin_addr) {
int id;
if(member_of[j] == -1) {
// We have a new entry
id = num_combined++;
DP("Creating new combined entry %d for old entry %d\n", id, j);
// Initialize new entry
cmb_entries[id].num_members = 1;
cmb_entries[id].base_addr = args_base[j];
if (mod_arg_types[j] & OMP_TGT_OLDMAPTYPE_MAP_PTR) {
cmb_entries[id].begin_addr = args_base[j];
cmb_entries[id].end_addr = (char *)args_base[j] + arg_sizes[j];
} else {
cmb_entries[id].begin_addr = args[j];
cmb_entries[id].end_addr = (char *)args[j] + arg_sizes[j];
}
member_of[j] = id;
} else {
// Reuse existing combined entry
DP("Reusing existing combined entry %d\n", member_of[j]);
id = member_of[j];
}
// Update combined entry
DP("Adding entry %d to combined entry %d\n", i, id);
cmb_entries[id].num_members++;
// base_addr stays the same
cmb_entries[id].begin_addr =
std::min(cmb_entries[id].begin_addr, new_begin_addr);
cmb_entries[id].end_addr =
std::max(cmb_entries[id].end_addr, new_end_addr);
member_of[i] = id;
break;
}
}
}
DP("New entries: %ld combined + %d original\n", num_combined, arg_num);
new_arg_num = arg_num + num_combined;
new_args_base = (void **) malloc(new_arg_num * sizeof(void *));
new_args = (void **) malloc(new_arg_num * sizeof(void *));
new_arg_sizes = (int64_t *) malloc(new_arg_num * sizeof(int64_t));
new_arg_types = (int64_t *) malloc(new_arg_num * sizeof(int64_t));
const int64_t alignment = 8;
int next_id = 0; // next ID
int next_cid = 0; // next combined ID
int *combined_to_new_id = (int *) alloca(num_combined * sizeof(int));
for (int i = 0; i < arg_num; ++i) {
// It is member_of
if (member_of[i] == next_cid) {
int cid = next_cid++; // ID of this combined entry
int nid = next_id++; // ID of the new (global) entry
combined_to_new_id[cid] = nid;
DP("Combined entry %3d will become new entry %3d\n", cid, nid);
int64_t padding = (int64_t)cmb_entries[cid].begin_addr % alignment;
if (padding) {
DP("Using a padding of %" PRId64 " for begin address " DPxMOD "\n",
padding, DPxPTR(cmb_entries[cid].begin_addr));
cmb_entries[cid].begin_addr =
(char *)cmb_entries[cid].begin_addr - padding;
}
new_args_base[nid] = cmb_entries[cid].base_addr;
new_args[nid] = cmb_entries[cid].begin_addr;
new_arg_sizes[nid] = (int64_t) ((char *)cmb_entries[cid].end_addr -
(char *)cmb_entries[cid].begin_addr);
new_arg_types[nid] = OMP_TGT_MAPTYPE_TARGET_PARAM;
DP("Entry %3d: base_addr " DPxMOD ", begin_addr " DPxMOD ", "
"size %" PRId64 ", type 0x%" PRIx64 "\n", nid,
DPxPTR(new_args_base[nid]), DPxPTR(new_args[nid]), new_arg_sizes[nid],
new_arg_types[nid]);
} else if (member_of[i] != -1) {
DP("Combined entry %3d has been encountered before, do nothing\n",
member_of[i]);
}
// Now that the combined entry (the one the old entry was a member of) has
// been inserted into the new arguments list, proceed with the old entry.
int nid = next_id++;
DP("Old entry %3d will become new entry %3d\n", i, nid);
new_args_base[nid] = args_base[i];
new_args[nid] = args[i];
new_arg_sizes[nid] = arg_sizes[i];
int64_t old_type = mod_arg_types[i];
if (is_ptr_old[i]) {
// Reset TO and FROM flags
old_type &= ~(OMP_TGT_OLDMAPTYPE_TO | OMP_TGT_OLDMAPTYPE_FROM);
}
if (member_of[i] == -1) {
if (!is_target_construct)
old_type &= ~OMP_TGT_MAPTYPE_TARGET_PARAM;
new_arg_types[nid] = old_type;
DP("Entry %3d: base_addr " DPxMOD ", begin_addr " DPxMOD ", size %" PRId64
", type 0x%" PRIx64 " (old entry %d not MEMBER_OF)\n", nid,
DPxPTR(new_args_base[nid]), DPxPTR(new_args[nid]), new_arg_sizes[nid],
new_arg_types[nid], i);
} else {
// Old entry is not FIRST_MAP
old_type &= ~OMP_TGT_OLDMAPTYPE_FIRST_MAP;
// Add MEMBER_OF
int new_member_of = combined_to_new_id[member_of[i]];
old_type |= ((int64_t)new_member_of + 1) << 48;
new_arg_types[nid] = old_type;
DP("Entry %3d: base_addr " DPxMOD ", begin_addr " DPxMOD ", size %" PRId64
", type 0x%" PRIx64 " (old entry %d MEMBER_OF %d)\n", nid,
DPxPTR(new_args_base[nid]), DPxPTR(new_args[nid]), new_arg_sizes[nid],
new_arg_types[nid], i, new_member_of);
}
}
}
static void cleanup_map(int32_t new_arg_num, void **new_args_base,
void **new_args, int64_t *new_arg_sizes, int64_t *new_arg_types,
int32_t arg_num, void **args_base) {
if (new_arg_num > 0) {
int offset = new_arg_num - arg_num;
for (int32_t i = 0; i < arg_num; ++i) {
// Restore old base address
args_base[i] = new_args_base[i+offset];
}
free(new_args_base);
free(new_args);
free(new_arg_sizes);
free(new_arg_types);
}
}
/// creates host-to-target data mapping, stores it in the
/// libomptarget.so internal structure (an entry in a stack of data maps)
/// and passes the data to the device.
EXTERN void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
DP("Entering data begin region for device %ld with %d mappings\n", device_id,
arg_num);
DP("Entering data begin region for device %" PRId64 " with %d mappings\n",
device_id, arg_num);
// No devices available?
if (device_id == OFFLOAD_DEVICE_DEFAULT) {
device_id = omp_get_default_device();
DP("Use default device id %ld\n", device_id);
DP("Use default device id %" PRId64 "\n", device_id);
}
if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) {
DP("Failed to get device %ld ready\n", device_id);
DP("Failed to get device %" PRId64 " ready\n", device_id);
return;
}
DeviceTy& Device = Devices[device_id];
// Translate maps
int32_t new_arg_num;
void **new_args_base;
void **new_args;
int64_t *new_arg_sizes;
int64_t *new_arg_types;
translate_map(arg_num, args_base, args, arg_sizes, arg_types, new_arg_num,
new_args_base, new_args, new_arg_sizes, new_arg_types, false);
#ifdef OMPTARGET_DEBUG
for (int i=0; i<arg_num; ++i) {
DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
", Type=0x%" PRIx64 "\n", i, DPxPTR(args_base[i]), DPxPTR(args[i]),
arg_sizes[i], arg_types[i]);
}
#endif
//target_data_begin(Device, arg_num, args_base, args, arg_sizes, arg_types);
target_data_begin(Device, new_arg_num, new_args_base, new_args, new_arg_sizes,
new_arg_types);
// Cleanup translation memory
cleanup_map(new_arg_num, new_args_base, new_args, new_arg_sizes,
new_arg_types, arg_num, args_base);
target_data_begin(Device, arg_num, args_base, args, arg_sizes, arg_types);
}
EXTERN void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num,
@@ -321,32 +92,25 @@ EXTERN void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
size_t Devices_size = Devices.size();
RTLsMtx.unlock();
if (Devices_size <= (size_t)device_id) {
DP("Device ID %ld does not have a matching RTL.\n", device_id);
DP("Device ID %" PRId64 " does not have a matching RTL.\n", device_id);
return;
}
DeviceTy &Device = Devices[device_id];
if (!Device.IsInit) {
DP("uninit device: ignore");
DP("Uninit device: ignore");
return;
}
// Translate maps
int32_t new_arg_num;
void **new_args_base;
void **new_args;
int64_t *new_arg_sizes;
int64_t *new_arg_types;
translate_map(arg_num, args_base, args, arg_sizes, arg_types, new_arg_num,
new_args_base, new_args, new_arg_sizes, new_arg_types, false);
#ifdef OMPTARGET_DEBUG
for (int i=0; i<arg_num; ++i) {
DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
", Type=0x%" PRIx64 "\n", i, DPxPTR(args_base[i]), DPxPTR(args[i]),
arg_sizes[i], arg_types[i]);
}
#endif
//target_data_end(Device, arg_num, args_base, args, arg_sizes, arg_types);
target_data_end(Device, new_arg_num, new_args_base, new_args, new_arg_sizes,
new_arg_types);
// Cleanup translation memory
cleanup_map(new_arg_num, new_args_base, new_args, new_arg_sizes,
new_arg_types, arg_num, args_base);
target_data_end(Device, arg_num, args_base, args, arg_sizes, arg_types);
}
EXTERN void __tgt_target_data_end_nowait(int64_t device_id, int32_t arg_num,
@@ -370,7 +134,7 @@ EXTERN void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
}
if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) {
DP("Failed to get device %ld ready\n", device_id);
DP("Failed to get device %" PRId64 " ready\n", device_id);
return;
}
@@ -391,35 +155,28 @@ EXTERN void __tgt_target_data_update_nowait(
EXTERN int __tgt_target(int64_t device_id, void *host_ptr, int32_t arg_num,
void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
DP("Entering target region with entry point " DPxMOD " and device Id %ld\n",
DPxPTR(host_ptr), device_id);
DP("Entering target region with entry point " DPxMOD " and device Id %"
PRId64 "\n", DPxPTR(host_ptr), device_id);
if (device_id == OFFLOAD_DEVICE_DEFAULT) {
device_id = omp_get_default_device();
}
if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) {
DP("Failed to get device %ld ready\n", device_id);
DP("Failed to get device %" PRId64 " ready\n", device_id);
return OFFLOAD_FAIL;
}
// Translate maps
int32_t new_arg_num;
void **new_args_base;
void **new_args;
int64_t *new_arg_sizes;
int64_t *new_arg_types;
translate_map(arg_num, args_base, args, arg_sizes, arg_types, new_arg_num,
new_args_base, new_args, new_arg_sizes, new_arg_types, true);
#ifdef OMPTARGET_DEBUG
for (int i=0; i<arg_num; ++i) {
DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
", Type=0x%" PRIx64 "\n", i, DPxPTR(args_base[i]), DPxPTR(args[i]),
arg_sizes[i], arg_types[i]);
}
#endif
//return target(device_id, host_ptr, arg_num, args_base, args, arg_sizes,
// arg_types, 0, 0, false /*team*/, false /*recursive*/);
int rc = target(device_id, host_ptr, new_arg_num, new_args_base, new_args,
new_arg_sizes, new_arg_types, 0, 0, false /*team*/);
// Cleanup translation memory
cleanup_map(new_arg_num, new_args_base, new_args, new_arg_sizes,
new_arg_types, arg_num, args_base);
int rc = target(device_id, host_ptr, arg_num, args_base, args, arg_sizes,
arg_types, 0, 0, false /*team*/);
return rc;
}
@@ -438,36 +195,28 @@ EXTERN int __tgt_target_nowait(int64_t device_id, void *host_ptr,
EXTERN int __tgt_target_teams(int64_t device_id, void *host_ptr,
int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
int64_t *arg_types, int32_t team_num, int32_t thread_limit) {
DP("Entering target region with entry point " DPxMOD " and device Id %ld\n",
DPxPTR(host_ptr), device_id);
DP("Entering target region with entry point " DPxMOD " and device Id %"
PRId64 "\n", DPxPTR(host_ptr), device_id);
if (device_id == OFFLOAD_DEVICE_DEFAULT) {
device_id = omp_get_default_device();
}
if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) {
DP("Failed to get device %ld ready\n", device_id);
DP("Failed to get device %" PRId64 " ready\n", device_id);
return OFFLOAD_FAIL;
}
// Translate maps
int32_t new_arg_num;
void **new_args_base;
void **new_args;
int64_t *new_arg_sizes;
int64_t *new_arg_types;
translate_map(arg_num, args_base, args, arg_sizes, arg_types, new_arg_num,
new_args_base, new_args, new_arg_sizes, new_arg_types, true);
#ifdef OMPTARGET_DEBUG
for (int i=0; i<arg_num; ++i) {
DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
", Type=0x%" PRIx64 "\n", i, DPxPTR(args_base[i]), DPxPTR(args[i]),
arg_sizes[i], arg_types[i]);
}
#endif
//return target(device_id, host_ptr, arg_num, args_base, args, arg_sizes,
// arg_types, team_num, thread_limit, true /*team*/,
// false /*recursive*/);
int rc = target(device_id, host_ptr, new_arg_num, new_args_base, new_args,
new_arg_sizes, new_arg_types, team_num, thread_limit, true /*team*/);
// Cleanup translation memory
cleanup_map(new_arg_num, new_args_base, new_args, new_arg_sizes,
new_arg_types, arg_num, args_base);
int rc = target(device_id, host_ptr, arg_num, args_base, args, arg_sizes,
arg_types, team_num, thread_limit, true /*team*/);
return rc;
}
@@ -492,11 +241,11 @@ EXTERN void __kmpc_push_target_tripcount(int64_t device_id,
}
if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) {
DP("Failed to get device %ld ready\n", device_id);
DP("Failed to get device %" PRId64 " ready\n", device_id);
return;
}
DP("__kmpc_push_target_tripcount(%ld, %" PRIu64 ")\n", device_id,
DP("__kmpc_push_target_tripcount(%" PRId64 ", %" PRIu64 ")\n", device_id,
loop_tripcount);
Devices[device_id].loopTripCnt = loop_tripcount;
}

View File

@@ -25,6 +25,38 @@
int DebugLevel = 0;
#endif // OMPTARGET_DEBUG
/* All begin addresses for partially mapped structs must be 8-aligned in order
* to ensure proper alignment of members. E.g.
*
* struct S {
* int a; // 4-aligned
* int b; // 4-aligned
* int *p; // 8-aligned
* } s1;
* ...
* #pragma omp target map(tofrom: s1.b, s1.p[0:N])
* {
* s1.b = 5;
* for (int i...) s1.p[i] = ...;
* }
*
* Here we are mapping s1 starting from member b, so BaseAddress=&s1=&s1.a and
* BeginAddress=&s1.b. Let's assume that the struct begins at address 0x100,
* then &s1.a=0x100, &s1.b=0x104, &s1.p=0x108. Each member obeys the alignment
* requirements for its type. Now, when we allocate memory on the device, in
* CUDA's case cuMemAlloc() returns an address which is at least 256-aligned.
* This means that the chunk of the struct on the device will start at a
* 256-aligned address, let's say 0x200. Then the address of b will be 0x200 and
* address of p will be a misaligned 0x204 (on the host there was no need to add
* padding between b and p, so p comes exactly 4 bytes after b). If the device
* kernel tries to access s1.p, a misaligned address error occurs (as reported
* by the CUDA plugin). By padding the begin address down to a multiple of 8 and
* extending the size of the allocated chuck accordingly, the chuck on the
* device will start at 0x200 with the padding (4 bytes), then &s1.b=0x204 and
* &s1.p=0x208, as they should be to satisfy the alignment requirements.
*/
static const int64_t alignment = 8;
/// Map global data and execute pending ctors
static int InitLibrary(DeviceTy& Device) {
/*
@@ -172,7 +204,7 @@ int CheckDeviceAndCtors(int64_t device_id) {
return OFFLOAD_SUCCESS;
}
static short member_of(int64_t type) {
static int32_t member_of(int64_t type) {
return ((type & OMP_TGT_MAPTYPE_MEMBER_OF) >> 48) - 1;
}
@@ -189,10 +221,33 @@ int target_data_begin(DeviceTy &Device, int32_t arg_num,
void *HstPtrBegin = args[i];
void *HstPtrBase = args_base[i];
int64_t data_size = arg_sizes[i];
// Adjust for proper alignment if this is a combined entry (for structs).
// Look at the next argument - if that is MEMBER_OF this one, then this one
// is a combined entry.
int64_t padding = 0;
const int next_i = i+1;
if (member_of(arg_types[i]) < 0 && next_i < arg_num &&
member_of(arg_types[next_i]) == i) {
padding = (int64_t)HstPtrBegin % alignment;
if (padding) {
DP("Using a padding of %" PRId64 " bytes for begin address " DPxMOD
"\n", padding, DPxPTR(HstPtrBegin));
HstPtrBegin = (char *) HstPtrBegin - padding;
data_size += padding;
}
}
// Address of pointer on the host and device, respectively.
void *Pointer_HstPtrBegin, *Pointer_TgtPtrBegin;
bool IsNew, Pointer_IsNew;
bool IsImplicit = arg_types[i] & OMP_TGT_MAPTYPE_IMPLICIT;
// UpdateRef is based on MEMBER_OF instead of TARGET_PARAM because if we
// have reached this point via __tgt_target_data_begin and not __tgt_target
// then no argument is marked as TARGET_PARAM ("omp target data map" is not
// associated with a target region, so there are no target parameters). This
// may be considered a hack, we could revise the scheme in the future.
bool UpdateRef = !(arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF);
if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) {
DP("Has a pointer entry: \n");
@@ -213,28 +268,22 @@ int target_data_begin(DeviceTy &Device, int32_t arg_num,
}
void *TgtPtrBegin = Device.getOrAllocTgtPtr(HstPtrBegin, HstPtrBase,
arg_sizes[i], IsNew, IsImplicit, UpdateRef);
if (!TgtPtrBegin && arg_sizes[i]) {
// If arg_sizes[i]==0, then the argument is a pointer to NULL, so
// getOrAlloc() returning NULL is not an error.
data_size, IsNew, IsImplicit, UpdateRef);
if (!TgtPtrBegin && data_size) {
// If data_size==0, then the argument could be a zero-length pointer to
// NULL, so getOrAlloc() returning NULL is not an error.
DP("Call to getOrAllocTgtPtr returned null pointer (device failure or "
"illegal mapping).\n");
}
DP("There are %" PRId64 " bytes allocated at target address " DPxMOD
" - is%s new\n", arg_sizes[i], DPxPTR(TgtPtrBegin),
" - is%s new\n", data_size, DPxPTR(TgtPtrBegin),
(IsNew ? "" : " not"));
if (arg_types[i] & OMP_TGT_MAPTYPE_RETURN_PARAM) {
void *ret_ptr;
if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)
ret_ptr = Pointer_TgtPtrBegin;
else {
bool IsLast; // not used
ret_ptr = Device.getTgtPtrBegin(HstPtrBegin, 0, IsLast, false);
}
DP("Returning device pointer " DPxMOD "\n", DPxPTR(ret_ptr));
args_base[i] = ret_ptr;
uintptr_t Delta = (uintptr_t)HstPtrBegin - (uintptr_t)HstPtrBase;
void *TgtPtrBase = (void *)((uintptr_t)TgtPtrBegin - Delta);
DP("Returning device pointer " DPxMOD "\n", DPxPTR(TgtPtrBase));
args_base[i] = TgtPtrBase;
}
if (arg_types[i] & OMP_TGT_MAPTYPE_TO) {
@@ -243,7 +292,7 @@ int target_data_begin(DeviceTy &Device, int32_t arg_num,
copy = true;
} else if (arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) {
// Copy data only if the "parent" struct has RefCount==1.
short parent_idx = member_of(arg_types[i]);
int32_t parent_idx = member_of(arg_types[i]);
long parent_rc = Device.getMapEntryRefCnt(args[parent_idx]);
assert(parent_rc > 0 && "parent struct not found");
if (parent_rc == 1) {
@@ -253,8 +302,8 @@ int target_data_begin(DeviceTy &Device, int32_t arg_num,
if (copy) {
DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n",
arg_sizes[i], DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin));
int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, arg_sizes[i]);
data_size, DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin));
int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, data_size);
if (rt != OFFLOAD_SUCCESS) {
DP("Copying data to device failed.\n");
rc = OFFLOAD_FAIL;
@@ -297,16 +346,33 @@ int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base,
continue;
void *HstPtrBegin = args[i];
int64_t data_size = arg_sizes[i];
// Adjust for proper alignment if this is a combined entry (for structs).
// Look at the next argument - if that is MEMBER_OF this one, then this one
// is a combined entry.
int64_t padding = 0;
const int next_i = i+1;
if (member_of(arg_types[i]) < 0 && next_i < arg_num &&
member_of(arg_types[next_i]) == i) {
padding = (int64_t)HstPtrBegin % alignment;
if (padding) {
DP("Using a padding of %" PRId64 " bytes for begin address " DPxMOD
"\n", padding, DPxPTR(HstPtrBegin));
HstPtrBegin = (char *) HstPtrBegin - padding;
data_size += padding;
}
}
bool IsLast;
bool UpdateRef = !(arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) ||
(arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ);
bool ForceDelete = arg_types[i] & OMP_TGT_MAPTYPE_DELETE;
// If PTR_AND_OBJ, HstPtrBegin is address of pointee
void *TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, arg_sizes[i], IsLast,
void *TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, data_size, IsLast,
UpdateRef);
DP("There are %" PRId64 " bytes allocated at target address " DPxMOD
" - is%s last\n", arg_sizes[i], DPxPTR(TgtPtrBegin),
" - is%s last\n", data_size, DPxPTR(TgtPtrBegin),
(IsLast ? "" : " not"));
bool DelEntry = IsLast || ForceDelete;
@@ -324,7 +390,7 @@ int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base,
if ((arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) &&
!(arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) {
// Copy data only if the "parent" struct has RefCount==1.
short parent_idx = member_of(arg_types[i]);
int32_t parent_idx = member_of(arg_types[i]);
long parent_rc = Device.getMapEntryRefCnt(args[parent_idx]);
assert(parent_rc > 0 && "parent struct not found");
if (parent_rc == 1) {
@@ -334,8 +400,8 @@ int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base,
if (DelEntry || Always || CopyMember) {
DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",
arg_sizes[i], DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));
int rt = Device.data_retrieve(HstPtrBegin, TgtPtrBegin, arg_sizes[i]);
data_size, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));
int rt = Device.data_retrieve(HstPtrBegin, TgtPtrBegin, data_size);
if (rt != OFFLOAD_SUCCESS) {
DP("Copying data from device failed.\n");
rc = OFFLOAD_FAIL;
@@ -348,7 +414,7 @@ int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base,
// copies. If the struct is going to be deallocated, remove any remaining
// shadow pointer entries for this struct.
uintptr_t lb = (uintptr_t) HstPtrBegin;
uintptr_t ub = (uintptr_t) HstPtrBegin + arg_sizes[i];
uintptr_t ub = (uintptr_t) HstPtrBegin + data_size;
Device.ShadowMtx.lock();
for (ShadowPtrListTy::iterator it = Device.ShadowPtrMap.begin();
it != Device.ShadowPtrMap.end(); ++it) {
@@ -378,7 +444,7 @@ int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base,
// Deallocate map
if (DelEntry) {
int rt = Device.deallocTgtPtr(HstPtrBegin, arg_sizes[i], ForceDelete);
int rt = Device.deallocTgtPtr(HstPtrBegin, data_size, ForceDelete);
if (rt != OFFLOAD_SUCCESS) {
DP("Deallocating data from device failed.\n");
rc = OFFLOAD_FAIL;