Fix depth field bug and resize() function in hierarchical barrier

This is a follow up to the hierarchy cleanup patch.
Added some clarifying comments to hierarchy_info.
Fixed a bug with the depth field not being updated cleanly during a resize.
Fixed resize to first check capacity as determined by maxLevels before actually doing the full resize.

Differential Revision: http://reviews.llvm.org/D12562

llvm-svn: 247333
This commit is contained in:
Jonathan Peyton
2015-09-10 20:34:32 +00:00
parent 4bed31b9bf
commit df4d3dd659
2 changed files with 26 additions and 12 deletions

View File

@@ -32,15 +32,12 @@ void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
// The test below is true if affinity is available, but set to "none". Need to init on first use of hierarchical barrier.
if (TCR_1(machine_hierarchy.uninitialized))
machine_hierarchy.init(NULL, nproc);
// Adjust the hierarchy in case num threads exceeds original
if (nproc > machine_hierarchy.base_num_threads)
machine_hierarchy.resize(nproc);
depth = machine_hierarchy.depth;
KMP_DEBUG_ASSERT(depth > 0);
// The loop below adjusts the depth in the case of a resize
while (nproc > machine_hierarchy.skipPerLevel[depth-1])
depth++;
// Adjust the hierarchy in case num threads exceeds original
if (nproc > machine_hierarchy.skipPerLevel[depth-1])
machine_hierarchy.resize(nproc);
thr_bar->depth = depth;
thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0]-1;

View File

@@ -119,15 +119,21 @@ __kmp_affinity_cmp_Address_child_num(const void *a, const void *b)
}
/** A structure for holding machine-specific hierarchy info to be computed once at init. */
/** A structure for holding machine-specific hierarchy info to be computed once at init.
This structure represents a mapping of threads to the actual machine hierarchy, or to
our best guess at what the hierarchy might be, for the purpose of performing an
efficient barrier. In the worst case, when there is no machine hierarchy information,
it produces a tree suitable for a barrier, similar to the tree used in the hyper barrier. */
class hierarchy_info {
public:
/** Good default values for number of leaves and branching factor, given no affinity information.
Behaves a bit like hyper barrier. */
static const kmp_uint32 maxLeaves=4;
static const kmp_uint32 minBranch=4;
/** Typical levels are threads/core, cores/package or socket, packages/node, nodes/machine,
etc. We don't want to get specific with nomenclature */
/** Number of levels in the hierarchy. Typical levels are threads/core, cores/package
or socket, packages/node, nodes/machine, etc. We don't want to get specific with
nomenclature. When the machine is oversubscribed we add levels to duplicate the
hierarchy, doubling the thread capacity of the hierarchy each time we add a level. */
kmp_uint32 maxLevels;
/** This is specifically the depth of the machine configuration hierarchy, in terms of the
@@ -227,6 +233,7 @@ public:
}
// Resize the hierarchy if nproc changes to something larger than before
void resize(kmp_uint32 nproc)
{
kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
@@ -237,13 +244,23 @@ public:
KMP_DEBUG_ASSERT(bool_result!=0);
KMP_DEBUG_ASSERT(nproc > base_num_threads);
// Calculate new max_levels
// Calculate new maxLevels
kmp_uint32 old_sz = skipPerLevel[depth-1];
kmp_uint32 incs = 0, old_maxLevels= maxLevels;
kmp_uint32 incs = 0, old_maxLevels = maxLevels;
// First see if old maxLevels is enough to contain new size
for (kmp_uint32 i=depth; i<maxLevels && nproc>old_sz; ++i) {
skipPerLevel[i] = 2*skipPerLevel[i-1];
old_sz *= 2;
depth++;
}
if (nproc <= old_sz) // enough space already
return;
// Not enough space, need to expand hierarchy
while (nproc > old_sz) {
old_sz *=2;
incs++;
}
depth++;
}
maxLevels += incs;
// Resize arrays