mirror of
https://github.com/intel/llvm.git
synced 2026-01-20 01:58:44 +08:00
[OpenMP] add 4 custom APIs supporting MSVC OMP codegen
This check-in adds 4 APIs to support MSVC, specifically: * 3 APIs (__kmpc_sections_init, __kmpc_next_section, __kmpc_end_sections) to support the dynamic scheduling of OMP sections. * 1 API (__kmpc_copyprivate_light, a light-weight version of __kmpc_copyrprivate) to support the OMP single copyprivate clause. Differential Revision: https://reviews.llvm.org/D128403
This commit is contained in:
committed by
Jonathan Peyton
parent
b97013fd60
commit
43d5c4d539
@@ -397,6 +397,13 @@ kmpc_set_disp_num_buffers 267
|
||||
__kmpc_end_scope 287
|
||||
%endif
|
||||
|
||||
%ifndef stub
|
||||
__kmpc_copyprivate_light 288
|
||||
__kmpc_sections_init 289
|
||||
__kmpc_next_section 290
|
||||
__kmpc_end_sections 291
|
||||
%endif
|
||||
|
||||
# User API entry points that have both lower- and upper- case versions for Fortran.
|
||||
# Number for lowercase version is indicated. Number for uppercase is obtained by adding 1000.
|
||||
# User API entry points are entry points that start with 'kmp_' or 'omp_'.
|
||||
|
||||
@@ -3890,6 +3890,11 @@ KMP_EXPORT kmp_int32 __kmpc_barrier_master_nowait(ident_t *,
|
||||
KMP_EXPORT kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
|
||||
KMP_EXPORT void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
|
||||
|
||||
KMP_EXPORT kmp_int32 __kmpc_sections_init(ident_t *loc, kmp_int32 global_tid);
|
||||
KMP_EXPORT kmp_int32 __kmpc_next_section(ident_t *loc, kmp_int32 global_tid,
|
||||
kmp_int32 numberOfSections);
|
||||
KMP_EXPORT void __kmpc_end_sections(ident_t *loc, kmp_int32 global_tid);
|
||||
|
||||
KMP_EXPORT void KMPC_FOR_STATIC_INIT(ident_t *loc, kmp_int32 global_tid,
|
||||
kmp_int32 schedtype, kmp_int32 *plastiter,
|
||||
kmp_int *plower, kmp_int *pupper,
|
||||
@@ -3903,6 +3908,9 @@ KMP_EXPORT void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
|
||||
void (*cpy_func)(void *, void *),
|
||||
kmp_int32 didit);
|
||||
|
||||
KMP_EXPORT void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid,
|
||||
void *cpy_data);
|
||||
|
||||
extern void KMPC_SET_NUM_THREADS(int arg);
|
||||
extern void KMPC_SET_DYNAMIC(int flag);
|
||||
extern void KMPC_SET_NESTED(int flag);
|
||||
|
||||
@@ -2224,6 +2224,61 @@ void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
|
||||
}
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------------*/
|
||||
/*!
|
||||
@ingroup THREADPRIVATE
|
||||
@param loc source location information
|
||||
@param gtid global thread number
|
||||
@param cpy_data pointer to the data to be saved/copied or 0
|
||||
@return the saved pointer to the data
|
||||
|
||||
__kmpc_copyprivate_light is a lighter version of __kmpc_copyprivate:
|
||||
__kmpc_copyprivate_light only saves the pointer it's given (if it's not 0, so
|
||||
coming from single), and returns that pointer in all calls (for single thread
|
||||
it's not needed). This version doesn't do any actual data copying. Data copying
|
||||
has to be done somewhere else, e.g. inline in the generated code. Due to this,
|
||||
this function doesn't have any barrier at the end of the function, like
|
||||
__kmpc_copyprivate does, so generated code needs barrier after copying of all
|
||||
data was done.
|
||||
*/
|
||||
void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data) {
|
||||
void **data_ptr;
|
||||
|
||||
KC_TRACE(10, ("__kmpc_copyprivate_light: called T#%d\n", gtid));
|
||||
|
||||
KMP_MB();
|
||||
|
||||
data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
|
||||
|
||||
if (__kmp_env_consistency_check) {
|
||||
if (loc == 0) {
|
||||
KMP_WARNING(ConstructIdentInvalid);
|
||||
}
|
||||
}
|
||||
|
||||
// ToDo: Optimize the following barrier
|
||||
|
||||
if (cpy_data)
|
||||
*data_ptr = cpy_data;
|
||||
|
||||
#if OMPT_SUPPORT
|
||||
ompt_frame_t *ompt_frame;
|
||||
if (ompt_enabled.enabled) {
|
||||
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
|
||||
if (ompt_frame->enter_frame.ptr == NULL)
|
||||
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
|
||||
OMPT_STORE_RETURN_ADDRESS(gtid);
|
||||
}
|
||||
#endif
|
||||
/* This barrier is not a barrier region boundary */
|
||||
#if USE_ITT_NOTIFY
|
||||
__kmp_threads[gtid]->th.th_ident = loc;
|
||||
#endif
|
||||
__kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
|
||||
|
||||
return *data_ptr;
|
||||
}
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
#define INIT_LOCK __kmp_init_user_lock_with_checks
|
||||
@@ -4348,7 +4403,7 @@ void *omp_aligned_calloc(size_t align, size_t nmemb, size_t size,
|
||||
void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator,
|
||||
omp_allocator_handle_t free_allocator) {
|
||||
return __kmp_realloc(__kmp_entry_gtid(), ptr, size, allocator,
|
||||
free_allocator);
|
||||
free_allocator);
|
||||
}
|
||||
|
||||
void omp_free(void *ptr, omp_allocator_handle_t allocator) {
|
||||
|
||||
@@ -2285,6 +2285,219 @@ static int __kmp_dispatch_next(ident_t *loc, int gtid, kmp_int32 *p_last,
|
||||
return status;
|
||||
}
|
||||
|
||||
/*!
|
||||
@ingroup WORK_SHARING
|
||||
@param loc source location information
|
||||
@param global_tid global thread number
|
||||
@return Zero if the parallel region is not active and this thread should execute
|
||||
all sections, non-zero otherwise.
|
||||
|
||||
Beginning of sections construct.
|
||||
There are no implicit barriers in the "sections" calls, rather the compiler
|
||||
should introduce an explicit barrier if it is required.
|
||||
|
||||
This implementation is based on __kmp_dispatch_init, using same constructs for
|
||||
shared data (we can't have sections nested directly in omp for loop, there
|
||||
should be a parallel region in between)
|
||||
*/
|
||||
kmp_int32 __kmpc_sections_init(ident_t *loc, kmp_int32 gtid) {
|
||||
|
||||
int active;
|
||||
kmp_info_t *th;
|
||||
kmp_team_t *team;
|
||||
kmp_uint32 my_buffer_index;
|
||||
dispatch_shared_info_template<kmp_int32> volatile *sh;
|
||||
|
||||
KMP_DEBUG_ASSERT(__kmp_init_serial);
|
||||
|
||||
if (!TCR_4(__kmp_init_parallel))
|
||||
__kmp_parallel_initialize();
|
||||
__kmp_resume_if_soft_paused();
|
||||
|
||||
/* setup data */
|
||||
th = __kmp_threads[gtid];
|
||||
team = th->th.th_team;
|
||||
active = !team->t.t_serialized;
|
||||
th->th.th_ident = loc;
|
||||
|
||||
KMP_COUNT_BLOCK(OMP_SECTIONS);
|
||||
KD_TRACE(10, ("__kmpc_sections: called by T#%d\n", gtid));
|
||||
|
||||
if (active) {
|
||||
// Setup sections in the same way as dynamic scheduled loops.
|
||||
// We need one shared data: which section is to execute next.
|
||||
// (in case parallel is not active, all sections will be executed on the
|
||||
// same thread)
|
||||
KMP_DEBUG_ASSERT(th->th.th_dispatch ==
|
||||
&th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
|
||||
|
||||
my_buffer_index = th->th.th_dispatch->th_disp_index++;
|
||||
|
||||
// reuse shared data structures from dynamic sched loops:
|
||||
sh = reinterpret_cast<dispatch_shared_info_template<kmp_int32> volatile *>(
|
||||
&team->t.t_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
|
||||
KD_TRACE(10, ("__kmpc_sections_init: T#%d my_buffer_index:%d\n", gtid,
|
||||
my_buffer_index));
|
||||
|
||||
th->th.th_dispatch->th_deo_fcn = __kmp_dispatch_deo_error;
|
||||
th->th.th_dispatch->th_dxo_fcn = __kmp_dispatch_dxo_error;
|
||||
|
||||
KD_TRACE(100, ("__kmpc_sections_init: T#%d before wait: my_buffer_index:%d "
|
||||
"sh->buffer_index:%d\n",
|
||||
gtid, my_buffer_index, sh->buffer_index));
|
||||
__kmp_wait<kmp_uint32>(&sh->buffer_index, my_buffer_index,
|
||||
__kmp_eq<kmp_uint32> USE_ITT_BUILD_ARG(NULL));
|
||||
// Note: KMP_WAIT() cannot be used there: buffer index and
|
||||
// my_buffer_index are *always* 32-bit integers.
|
||||
KMP_MB();
|
||||
KD_TRACE(100, ("__kmpc_sections_init: T#%d after wait: my_buffer_index:%d "
|
||||
"sh->buffer_index:%d\n",
|
||||
gtid, my_buffer_index, sh->buffer_index));
|
||||
|
||||
th->th.th_dispatch->th_dispatch_pr_current =
|
||||
nullptr; // sections construct doesn't need private data
|
||||
th->th.th_dispatch->th_dispatch_sh_current =
|
||||
CCAST(dispatch_shared_info_t *, (volatile dispatch_shared_info_t *)sh);
|
||||
}
|
||||
|
||||
#if OMPT_SUPPORT && OMPT_OPTIONAL
|
||||
if (ompt_enabled.ompt_callback_work) {
|
||||
ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
|
||||
ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
|
||||
ompt_callbacks.ompt_callback(ompt_callback_work)(
|
||||
ompt_work_sections, ompt_scope_begin, &(team_info->parallel_data),
|
||||
&(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
|
||||
}
|
||||
#endif
|
||||
KMP_PUSH_PARTITIONED_TIMER(OMP_sections);
|
||||
|
||||
return active;
|
||||
}
|
||||
|
||||
/*!
|
||||
@ingroup WORK_SHARING
|
||||
@param loc source location information
|
||||
@param global_tid global thread number
|
||||
@param numberOfSections number of sections in the 'sections' construct
|
||||
@return unsigned [from 0 to n) - number (id) of the section to execute next on
|
||||
this thread. n (or any other number not in range) - nothing to execute on this
|
||||
thread
|
||||
*/
|
||||
|
||||
kmp_int32 __kmpc_next_section(ident_t *loc, kmp_int32 gtid,
|
||||
kmp_int32 numberOfSections) {
|
||||
|
||||
KMP_TIME_PARTITIONED_BLOCK(OMP_sections);
|
||||
|
||||
kmp_info_t *th = __kmp_threads[gtid];
|
||||
#ifdef KMP_DEBUG
|
||||
kmp_team_t *team = th->th.th_team;
|
||||
#endif
|
||||
|
||||
KD_TRACE(1000, ("__kmp_dispatch_next: T#%d; number of sections:%d\n", gtid,
|
||||
numberOfSections));
|
||||
|
||||
// For serialized case we should not call this function:
|
||||
KMP_DEBUG_ASSERT(!team->t.t_serialized);
|
||||
|
||||
dispatch_shared_info_template<kmp_int32> volatile *sh;
|
||||
|
||||
KMP_DEBUG_ASSERT(th->th.th_dispatch ==
|
||||
&th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
|
||||
|
||||
KMP_DEBUG_ASSERT(!(th->th.th_dispatch->th_dispatch_pr_current));
|
||||
sh = reinterpret_cast<dispatch_shared_info_template<kmp_int32> volatile *>(
|
||||
th->th.th_dispatch->th_dispatch_sh_current);
|
||||
KMP_DEBUG_ASSERT(sh);
|
||||
|
||||
kmp_int32 sectionIndex = 0;
|
||||
bool moreSectionsToExecute = true;
|
||||
|
||||
// Find section to execute:
|
||||
sectionIndex = test_then_inc<kmp_int32>((kmp_int32 *)&sh->u.s.iteration);
|
||||
if (sectionIndex >= numberOfSections) {
|
||||
moreSectionsToExecute = false;
|
||||
}
|
||||
|
||||
// status == 0: no more sections to execute;
|
||||
// OMPTODO: __kmpc_end_sections could be bypassed?
|
||||
if (!moreSectionsToExecute) {
|
||||
kmp_int32 num_done;
|
||||
|
||||
num_done = test_then_inc<kmp_int32>((kmp_int32 *)(&sh->u.s.num_done));
|
||||
|
||||
if (num_done == th->th.th_team_nproc - 1) {
|
||||
/* NOTE: release this buffer to be reused */
|
||||
|
||||
KMP_MB(); /* Flush all pending memory write invalidates. */
|
||||
|
||||
sh->u.s.num_done = 0;
|
||||
sh->u.s.iteration = 0;
|
||||
|
||||
KMP_MB(); /* Flush all pending memory write invalidates. */
|
||||
|
||||
sh->buffer_index += __kmp_dispatch_num_buffers;
|
||||
KD_TRACE(100, ("__kmpc_next_section: T#%d change buffer_index:%d\n", gtid,
|
||||
sh->buffer_index));
|
||||
|
||||
KMP_MB(); /* Flush all pending memory write invalidates. */
|
||||
|
||||
} // if
|
||||
|
||||
th->th.th_dispatch->th_deo_fcn = NULL;
|
||||
th->th.th_dispatch->th_dxo_fcn = NULL;
|
||||
th->th.th_dispatch->th_dispatch_sh_current = NULL;
|
||||
th->th.th_dispatch->th_dispatch_pr_current = NULL;
|
||||
|
||||
#if OMPT_SUPPORT && OMPT_OPTIONAL
|
||||
if (ompt_enabled.ompt_callback_dispatch) {
|
||||
ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
|
||||
ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
|
||||
ompt_data_t instance = ompt_data_none;
|
||||
instance.ptr = OMPT_GET_RETURN_ADDRESS(0);
|
||||
ompt_callbacks.ompt_callback(ompt_callback_dispatch)(
|
||||
&(team_info->parallel_data), &(task_info->task_data),
|
||||
ompt_dispatch_section, instance);
|
||||
}
|
||||
#endif
|
||||
KMP_POP_PARTITIONED_TIMER();
|
||||
}
|
||||
|
||||
return sectionIndex;
|
||||
}
|
||||
|
||||
/*!
|
||||
@ingroup WORK_SHARING
|
||||
@param loc source location information
|
||||
@param global_tid global thread number
|
||||
|
||||
End of "sections" construct.
|
||||
Don't need to wait here: barrier is added separately when needed.
|
||||
*/
|
||||
void __kmpc_end_sections(ident_t *loc, kmp_int32 gtid) {
|
||||
|
||||
kmp_info_t *th = __kmp_threads[gtid];
|
||||
int active = !th->th.th_team->t.t_serialized;
|
||||
|
||||
KD_TRACE(100, ("__kmpc_end_sections: T#%d called\n", gtid));
|
||||
|
||||
if (!active) {
|
||||
// In active case call finalization is done in __kmpc_next_section
|
||||
#if OMPT_SUPPORT && OMPT_OPTIONAL
|
||||
if (ompt_enabled.ompt_callback_work) {
|
||||
ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
|
||||
ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
|
||||
ompt_callbacks.ompt_callback(ompt_callback_work)(
|
||||
ompt_work_sections, ompt_scope_end, &(team_info->parallel_data),
|
||||
&(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
|
||||
}
|
||||
#endif
|
||||
KMP_POP_PARTITIONED_TIMER();
|
||||
}
|
||||
|
||||
KD_TRACE(100, ("__kmpc_end_sections: T#%d returned\n", gtid));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static void __kmp_dist_get_bounds(ident_t *loc, kmp_int32 gtid,
|
||||
kmp_int32 *plastiter, T *plower, T *pupper,
|
||||
|
||||
Reference in New Issue
Block a user