mirror of
https://github.com/intel/llvm.git
synced 2026-01-14 03:50:17 +08:00
Fix depth field bug and resize() function in hierarchical barrier
This is a follow up to the hierarchy cleanup patch. Added some clarifying comments to hierarchy_info. Fixed a bug with the depth field not being updated cleanly during a resize. Fixed resize to first check capacity as determined by maxLevels before actually doing the full resize. Differential Revision: http://reviews.llvm.org/D12562 llvm-svn: 247333
This commit is contained in:
@@ -32,15 +32,12 @@ void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
|
||||
// The test below is true if affinity is available, but set to "none". Need to init on first use of hierarchical barrier.
|
||||
if (TCR_1(machine_hierarchy.uninitialized))
|
||||
machine_hierarchy.init(NULL, nproc);
|
||||
// Adjust the hierarchy in case num threads exceeds original
|
||||
if (nproc > machine_hierarchy.base_num_threads)
|
||||
machine_hierarchy.resize(nproc);
|
||||
|
||||
depth = machine_hierarchy.depth;
|
||||
KMP_DEBUG_ASSERT(depth > 0);
|
||||
// The loop below adjusts the depth in the case of a resize
|
||||
while (nproc > machine_hierarchy.skipPerLevel[depth-1])
|
||||
depth++;
|
||||
// Adjust the hierarchy in case num threads exceeds original
|
||||
if (nproc > machine_hierarchy.skipPerLevel[depth-1])
|
||||
machine_hierarchy.resize(nproc);
|
||||
|
||||
thr_bar->depth = depth;
|
||||
thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0]-1;
|
||||
|
||||
@@ -119,15 +119,21 @@ __kmp_affinity_cmp_Address_child_num(const void *a, const void *b)
|
||||
}
|
||||
|
||||
|
||||
/** A structure for holding machine-specific hierarchy info to be computed once at init. */
|
||||
/** A structure for holding machine-specific hierarchy info to be computed once at init.
|
||||
This structure represents a mapping of threads to the actual machine hierarchy, or to
|
||||
our best guess at what the hierarchy might be, for the purpose of performing an
|
||||
efficient barrier. In the worst case, when there is no machine hierarchy information,
|
||||
it produces a tree suitable for a barrier, similar to the tree used in the hyper barrier. */
|
||||
class hierarchy_info {
|
||||
public:
|
||||
/** Good default values for number of leaves and branching factor, given no affinity information.
|
||||
Behaves a bit like hyper barrier. */
|
||||
static const kmp_uint32 maxLeaves=4;
|
||||
static const kmp_uint32 minBranch=4;
|
||||
/** Typical levels are threads/core, cores/package or socket, packages/node, nodes/machine,
|
||||
etc. We don't want to get specific with nomenclature */
|
||||
/** Number of levels in the hierarchy. Typical levels are threads/core, cores/package
|
||||
or socket, packages/node, nodes/machine, etc. We don't want to get specific with
|
||||
nomenclature. When the machine is oversubscribed we add levels to duplicate the
|
||||
hierarchy, doubling the thread capacity of the hierarchy each time we add a level. */
|
||||
kmp_uint32 maxLevels;
|
||||
|
||||
/** This is specifically the depth of the machine configuration hierarchy, in terms of the
|
||||
@@ -227,6 +233,7 @@ public:
|
||||
|
||||
}
|
||||
|
||||
// Resize the hierarchy if nproc changes to something larger than before
|
||||
void resize(kmp_uint32 nproc)
|
||||
{
|
||||
kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
|
||||
@@ -237,13 +244,23 @@ public:
|
||||
KMP_DEBUG_ASSERT(bool_result!=0);
|
||||
KMP_DEBUG_ASSERT(nproc > base_num_threads);
|
||||
|
||||
// Calculate new max_levels
|
||||
// Calculate new maxLevels
|
||||
kmp_uint32 old_sz = skipPerLevel[depth-1];
|
||||
kmp_uint32 incs = 0, old_maxLevels= maxLevels;
|
||||
kmp_uint32 incs = 0, old_maxLevels = maxLevels;
|
||||
// First see if old maxLevels is enough to contain new size
|
||||
for (kmp_uint32 i=depth; i<maxLevels && nproc>old_sz; ++i) {
|
||||
skipPerLevel[i] = 2*skipPerLevel[i-1];
|
||||
old_sz *= 2;
|
||||
depth++;
|
||||
}
|
||||
if (nproc <= old_sz) // enough space already
|
||||
return;
|
||||
// Not enough space, need to expand hierarchy
|
||||
while (nproc > old_sz) {
|
||||
old_sz *=2;
|
||||
incs++;
|
||||
}
|
||||
depth++;
|
||||
}
|
||||
maxLevels += incs;
|
||||
|
||||
// Resize arrays
|
||||
|
||||
Reference in New Issue
Block a user