mirror of
https://github.com/intel/llvm.git
synced 2026-01-16 13:35:38 +08:00
[OpenMP][libomp] Add core attributes to KMP_HW_SUBSET
Allow filtering of resources based on core attributes. There are two new attributes added: 1) Core Type (intel_atom, intel_core) 2) Core Efficiency (integer) where the higher the efficiency, the more performant the core On hybrid architectures , e.g., Alder Lake, users can specify KMP_HW_SUBSET=4c:intel_atom,4c:intel_core to select the first four Atom and first four Big cores. The can also use the efficiency syntax. e.g., KMP_HW_SUBSET=2c:eff0,2c:eff1 Differential Revision: https://reviews.llvm.org/D114901
This commit is contained in:
@@ -461,13 +461,14 @@ unit starting from top layer downwards. E.g. the number of sockets (top layer
|
||||
units), cores per socket, and the threads per core, to use with an OpenMP
|
||||
application, as an alternative to writing complicated explicit affinity settings
|
||||
or a limiting process affinity mask. You can also specify an offset value to set
|
||||
which resources to use.
|
||||
which resources to use. When available, you can specify attributes to select
|
||||
different subsets of resources.
|
||||
|
||||
An extended syntax is available when ``KMP_TOPOLOGY_METHOD=hwloc``. Depending on what
|
||||
resources are detected, you may be able to specify additional resources, such as
|
||||
NUMA domains and groups of hardware resources that share certain cache levels.
|
||||
|
||||
**Basic syntax:** ``num_unitsID[@offset] [,num_unitsID[@offset]...]``
|
||||
**Basic syntax:** ``num_unitsID[@offset][:attribute] [,num_unitsID[@offset][:attribute]...]``
|
||||
|
||||
Supported unit IDs are not case-insensitive.
|
||||
|
||||
@@ -485,6 +486,14 @@ Supported unit IDs are not case-insensitive.
|
||||
|
||||
``offset`` - (Optional) The number of units to skip.
|
||||
|
||||
``attribute`` - (Optional) An attribute differentiating resources at a particular level. The attributes available to users are:
|
||||
|
||||
* **Core type** - On Intel architectures, this can be ``intel_atom`` or ``intel_core``
|
||||
* **Core efficiency** - This is specified as ``eff``:emphasis:`num` where :emphasis:`num` is a number from 0
|
||||
to the number of core efficiencies detected in the machine topology minus one.
|
||||
E.g., ``eff0``. The greater the efficiency number the more performant the core. There may be
|
||||
more core efficiencies than core types and can be viewed by setting ``KMP_AFFINITY=verbose``
|
||||
|
||||
.. note::
|
||||
The hardware cache can be specified as a unit, e.g. L2 for L2 cache,
|
||||
or LL for last level cache.
|
||||
@@ -513,7 +522,10 @@ The run-time library prints a warning, and the setting of
|
||||
|
||||
* a resource is specified, but detection of that resource is not supported
|
||||
by the chosen topology detection method and/or
|
||||
* a resource is specified twice.
|
||||
* a resource is specified twice. An exception to this condition is if attributes
|
||||
differentiate the resource.
|
||||
* attributes are used when not detected in the machine topology or conflict with
|
||||
each other.
|
||||
|
||||
This variable does not work if ``KMP_AFFINITY=disabled``.
|
||||
|
||||
@@ -532,6 +544,10 @@ available hardware resources.
|
||||
* ``1T``: Use all cores on all sockets, 1 thread per core.
|
||||
* ``1s, 1d, 1n, 1c, 1t``: Use 1 socket, 1 die, 1 NUMA node, 1 core, 1 thread
|
||||
- use HW thread as a result.
|
||||
* ``4c:intel_atom,5c:intel_core``: Use all available sockets and use 4
|
||||
Intel Atom(R) processor cores and 5 Intel(R) Core(TM) processor cores per socket.
|
||||
* ``2c:eff0@1,3c:eff1``: Use all available sockets, skip the first core with efficiency 0
|
||||
and use the next 2 cores with efficiency 0 and 3 cores with efficiency 1 per socket.
|
||||
* ``1s, 1c, 1t``: Use 1 socket, 1 core, 1 thread. This may result in using
|
||||
single thread on a 3-layer topology architecture, or multiple threads on
|
||||
4-layer or 5-layer architecture. Result may even be different on the same
|
||||
|
||||
@@ -361,6 +361,7 @@ OmpNoAllocator "Allocator %1$s is not available, will use default
|
||||
TopologyGeneric "%1$s: %2$s (%3$d total cores)"
|
||||
AffGranularityBad "%1$s: granularity setting: %2$s does not exist in topology. Using granularity=%3$s instead."
|
||||
TopologyHybrid "%1$s: hybrid core type detected: %2$d %3$s cores."
|
||||
TopologyHybridCoreEff "%1$s: %2$d with core efficiency %3$d."
|
||||
|
||||
# --- OpenMP errors detected at runtime ---
|
||||
#
|
||||
@@ -472,6 +473,12 @@ AffEqualTopologyTypes "%1$s: topology layer \"%2$s\" is equivalent to \"%
|
||||
AffGranTooCoarseProcGroup "%1$s: granularity=%2$s is too coarse, setting granularity=group."
|
||||
StgDeprecatedValue "%1$s: \"%2$s\" value is deprecated. Please use \"%3$s\" instead."
|
||||
NumTeamsNotPositive "num_teams value must be positive, it is %1$d, using %2$d instead."
|
||||
AffHWSubsetIncompat "KMP_HW_SUBSET ignored: %1$s, %2$s: attributes are ambiguous, please only specify one."
|
||||
AffHWSubsetAttrRepeat "KMP_HW_SUBSET ignored: %1$s: attribute specified more than once."
|
||||
AffHWSubsetAttrInvalid "KMP_HW_SUBSET ignored: %1$s: attribute value %2$s is invalid."
|
||||
AffHWSubsetAllFiltered "KMP_HW_SUBSET ignored: all hardware resources would be filtered, please reduce the filter."
|
||||
AffHWSubsetAttrsNonHybrid "KMP_HW_SUBSET ignored: Too many attributes specified. This machine is not a hybrid architecutre."
|
||||
AffHWSubsetIgnoringAttr "KMP_HW_SUBSET: ignoring %1$s attribute. This machine is not a hybrid architecutre."
|
||||
|
||||
# --------------------------------------------------------------------------------------------------
|
||||
-*- HINTS -*-
|
||||
@@ -530,6 +537,7 @@ BadExeFormat "System error #193 is \"Bad format of EXE or DLL fi
|
||||
"Check whether \"%1$s\" is a file for %2$s architecture."
|
||||
SystemLimitOnThreads "System-related limit on the number of threads."
|
||||
SetNewBound "Try setting new bounds (preferably less than or equal to %1$d) for num_teams clause."
|
||||
ValidValuesRange "Valid values are from %1$d to %2$d."
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------------------------------
|
||||
|
||||
@@ -618,6 +618,19 @@ enum kmp_hw_t : int {
|
||||
KMP_HW_LAST
|
||||
};
|
||||
|
||||
typedef enum kmp_hw_core_type_t {
|
||||
KMP_HW_CORE_TYPE_UNKNOWN = 0x0,
|
||||
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
|
||||
KMP_HW_CORE_TYPE_ATOM = 0x20,
|
||||
KMP_HW_CORE_TYPE_CORE = 0x40,
|
||||
KMP_HW_MAX_NUM_CORE_TYPES = 3,
|
||||
#else
|
||||
KMP_HW_MAX_NUM_CORE_TYPES = 1,
|
||||
#endif
|
||||
} kmp_hw_core_type_t;
|
||||
|
||||
#define KMP_HW_MAX_NUM_CORE_EFFS 8
|
||||
|
||||
#define KMP_DEBUG_ASSERT_VALID_HW_TYPE(type) \
|
||||
KMP_DEBUG_ASSERT(type >= (kmp_hw_t)0 && type < KMP_HW_LAST)
|
||||
#define KMP_ASSERT_VALID_HW_TYPE(type) \
|
||||
@@ -629,6 +642,7 @@ enum kmp_hw_t : int {
|
||||
|
||||
const char *__kmp_hw_get_keyword(kmp_hw_t type, bool plural = false);
|
||||
const char *__kmp_hw_get_catalog_string(kmp_hw_t type, bool plural = false);
|
||||
const char *__kmp_hw_get_core_type_string(kmp_hw_core_type_t type);
|
||||
|
||||
/* Only Linux* OS and Windows* OS support thread affinity. */
|
||||
#if KMP_AFFINITY_SUPPORTED
|
||||
|
||||
@@ -189,8 +189,11 @@ void kmp_hw_thread_t::print() const {
|
||||
for (int i = 0; i < depth; ++i) {
|
||||
printf("%4d ", ids[i]);
|
||||
}
|
||||
if (core_type != KMP_HW_CORE_TYPE_UNKNOWN) {
|
||||
printf(" (%s)", __kmp_hw_get_core_type_string(core_type));
|
||||
if (attrs) {
|
||||
if (attrs.is_core_type_valid())
|
||||
printf(" (%s)", __kmp_hw_get_core_type_string(attrs.get_core_type()));
|
||||
if (attrs.is_core_eff_valid())
|
||||
printf(" (eff=%d)", attrs.get_core_eff());
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
@@ -391,12 +394,6 @@ void kmp_topology_t::_gather_enumeration_information() {
|
||||
count[i] = 0;
|
||||
ratio[i] = 0;
|
||||
}
|
||||
if (__kmp_is_hybrid_cpu()) {
|
||||
for (int i = 0; i < KMP_HW_MAX_NUM_CORE_TYPES; ++i) {
|
||||
core_types_count[i] = 0;
|
||||
core_types[i] = KMP_HW_CORE_TYPE_UNKNOWN;
|
||||
}
|
||||
}
|
||||
int core_level = get_level(KMP_HW_CORE);
|
||||
for (int i = 0; i < num_hw_threads; ++i) {
|
||||
kmp_hw_thread_t &hw_thread = hw_threads[i];
|
||||
@@ -413,9 +410,29 @@ void kmp_topology_t::_gather_enumeration_information() {
|
||||
ratio[l] = max[l];
|
||||
max[l] = 1;
|
||||
}
|
||||
// Figure out the number of each core type for hybrid CPUs
|
||||
if (__kmp_is_hybrid_cpu() && core_level >= 0 && layer <= core_level)
|
||||
_increment_core_type(hw_thread.core_type);
|
||||
// Figure out the number of different core types
|
||||
// and efficiencies for hybrid CPUs
|
||||
if (__kmp_is_hybrid_cpu() && core_level >= 0 && layer <= core_level) {
|
||||
if (hw_thread.attrs.is_core_eff_valid() &&
|
||||
hw_thread.attrs.core_eff >= num_core_efficiencies) {
|
||||
// Because efficiencies can range from 0 to max efficiency - 1,
|
||||
// the number of efficiencies is max efficiency + 1
|
||||
num_core_efficiencies = hw_thread.attrs.core_eff + 1;
|
||||
}
|
||||
if (hw_thread.attrs.is_core_type_valid()) {
|
||||
bool found = false;
|
||||
for (int j = 0; j < num_core_types; ++j) {
|
||||
if (hw_thread.attrs.get_core_type() == core_types[j]) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
KMP_ASSERT(num_core_types < KMP_HW_MAX_NUM_CORE_TYPES);
|
||||
core_types[num_core_types++] = hw_thread.attrs.get_core_type();
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -429,6 +446,42 @@ void kmp_topology_t::_gather_enumeration_information() {
|
||||
}
|
||||
}
|
||||
|
||||
int kmp_topology_t::_get_ncores_with_attr(const kmp_hw_attr_t &attr,
|
||||
int above_level,
|
||||
bool find_all) const {
|
||||
int current, current_max;
|
||||
int previous_id[KMP_HW_LAST];
|
||||
for (int i = 0; i < depth; ++i)
|
||||
previous_id[i] = kmp_hw_thread_t::UNKNOWN_ID;
|
||||
int core_level = get_level(KMP_HW_CORE);
|
||||
if (find_all)
|
||||
above_level = -1;
|
||||
KMP_ASSERT(above_level < core_level);
|
||||
current_max = 0;
|
||||
current = 0;
|
||||
for (int i = 0; i < num_hw_threads; ++i) {
|
||||
kmp_hw_thread_t &hw_thread = hw_threads[i];
|
||||
if (!find_all && hw_thread.ids[above_level] != previous_id[above_level]) {
|
||||
if (current > current_max)
|
||||
current_max = current;
|
||||
current = hw_thread.attrs.contains(attr);
|
||||
} else {
|
||||
for (int level = above_level + 1; level <= core_level; ++level) {
|
||||
if (hw_thread.ids[level] != previous_id[level]) {
|
||||
if (hw_thread.attrs.contains(attr))
|
||||
current++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int level = 0; level < depth; ++level)
|
||||
previous_id[level] = hw_thread.ids[level];
|
||||
}
|
||||
if (current > current_max)
|
||||
current_max = current;
|
||||
return current_max;
|
||||
}
|
||||
|
||||
// Find out if the topology is uniform
|
||||
void kmp_topology_t::_discover_uniformity() {
|
||||
int num = 1;
|
||||
@@ -517,6 +570,10 @@ kmp_topology_t *kmp_topology_t::allocate(int nproc, int ndepth,
|
||||
retval->types = (kmp_hw_t *)arr;
|
||||
retval->ratio = arr + (size_t)KMP_HW_LAST;
|
||||
retval->count = arr + 2 * (size_t)KMP_HW_LAST;
|
||||
retval->num_core_efficiencies = 0;
|
||||
retval->num_core_types = 0;
|
||||
for (int i = 0; i < KMP_HW_MAX_NUM_CORE_TYPES; ++i)
|
||||
retval->core_types[i] = KMP_HW_CORE_TYPE_UNKNOWN;
|
||||
KMP_FOREACH_HW_TYPE(type) { retval->equivalent[type] = KMP_HW_UNKNOWN; }
|
||||
for (int i = 0; i < ndepth; ++i) {
|
||||
retval->types[i] = types[i];
|
||||
@@ -574,18 +631,12 @@ void kmp_topology_t::dump() const {
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
printf("* core_types:\n");
|
||||
for (int i = 0; i < KMP_HW_MAX_NUM_CORE_TYPES; ++i) {
|
||||
if (core_types[i] != KMP_HW_CORE_TYPE_UNKNOWN) {
|
||||
printf(" %d %s core%c\n", core_types_count[i],
|
||||
__kmp_hw_get_core_type_string(core_types[i]),
|
||||
((core_types_count[i] > 1) ? 's' : ' '));
|
||||
} else {
|
||||
if (i == 0)
|
||||
printf("No hybrid information available\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
printf("* num_core_eff: %d\n", num_core_efficiencies);
|
||||
printf("* num_core_types: %d\n", num_core_types);
|
||||
printf("* core_types: ");
|
||||
for (int i = 0; i < num_core_types; ++i)
|
||||
printf("%3d ", core_types[i]);
|
||||
printf("\n");
|
||||
|
||||
printf("* equivalent map:\n");
|
||||
KMP_FOREACH_HW_TYPE(i) {
|
||||
@@ -680,12 +731,26 @@ void kmp_topology_t::print(const char *env_var) const {
|
||||
}
|
||||
KMP_INFORM(TopologyGeneric, env_var, buf.str, ncores);
|
||||
|
||||
// Hybrid topology information
|
||||
if (__kmp_is_hybrid_cpu()) {
|
||||
for (int i = 0; i < KMP_HW_MAX_NUM_CORE_TYPES; ++i) {
|
||||
if (core_types[i] == KMP_HW_CORE_TYPE_UNKNOWN)
|
||||
break;
|
||||
KMP_INFORM(TopologyHybrid, env_var, core_types_count[i],
|
||||
__kmp_hw_get_core_type_string(core_types[i]));
|
||||
for (int i = 0; i < num_core_types; ++i) {
|
||||
kmp_hw_core_type_t core_type = core_types[i];
|
||||
kmp_hw_attr_t attr;
|
||||
attr.clear();
|
||||
attr.set_core_type(core_type);
|
||||
int ncores = get_ncores_with_attr(attr);
|
||||
if (ncores > 0) {
|
||||
KMP_INFORM(TopologyHybrid, env_var, ncores,
|
||||
__kmp_hw_get_core_type_string(core_type));
|
||||
KMP_ASSERT(num_core_efficiencies <= KMP_HW_MAX_NUM_CORE_EFFS)
|
||||
for (int eff = 0; eff < num_core_efficiencies; ++eff) {
|
||||
attr.set_core_eff(eff);
|
||||
int ncores_with_eff = get_ncores_with_attr(attr);
|
||||
if (ncores_with_eff > 0) {
|
||||
KMP_INFORM(TopologyHybridCoreEff, env_var, ncores_with_eff, eff);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -705,7 +770,8 @@ void kmp_topology_t::print(const char *env_var) const {
|
||||
}
|
||||
if (__kmp_is_hybrid_cpu())
|
||||
__kmp_str_buf_print(
|
||||
&buf, "(%s)", __kmp_hw_get_core_type_string(hw_threads[i].core_type));
|
||||
&buf, "(%s)",
|
||||
__kmp_hw_get_core_type_string(hw_threads[i].attrs.get_core_type()));
|
||||
KMP_INFORM(OSProcMapToPack, env_var, hw_threads[i].os_id, buf.str);
|
||||
}
|
||||
|
||||
@@ -816,6 +882,56 @@ void kmp_topology_t::canonicalize(int npackages, int ncores_per_pkg,
|
||||
_discover_uniformity();
|
||||
}
|
||||
|
||||
// Represents running sub IDs for a single core attribute where
|
||||
// attribute values have SIZE possibilities.
|
||||
template <size_t SIZE, typename IndexFunc> struct kmp_sub_ids_t {
|
||||
int last_level; // last level in topology to consider for sub_ids
|
||||
int sub_id[SIZE]; // The sub ID for a given attribute value
|
||||
int prev_sub_id[KMP_HW_LAST];
|
||||
IndexFunc indexer;
|
||||
|
||||
public:
|
||||
kmp_sub_ids_t(int last_level) : last_level(last_level) {
|
||||
KMP_ASSERT(last_level < KMP_HW_LAST);
|
||||
for (size_t i = 0; i < SIZE; ++i)
|
||||
sub_id[i] = -1;
|
||||
for (size_t i = 0; i < KMP_HW_LAST; ++i)
|
||||
prev_sub_id[i] = -1;
|
||||
}
|
||||
void update(const kmp_hw_thread_t &hw_thread) {
|
||||
int idx = indexer(hw_thread);
|
||||
KMP_ASSERT(idx < (int)SIZE);
|
||||
for (int level = 0; level <= last_level; ++level) {
|
||||
if (hw_thread.sub_ids[level] != prev_sub_id[level]) {
|
||||
if (level < last_level)
|
||||
sub_id[idx] = -1;
|
||||
sub_id[idx]++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (int level = 0; level <= last_level; ++level)
|
||||
prev_sub_id[level] = hw_thread.sub_ids[level];
|
||||
}
|
||||
int get_sub_id(const kmp_hw_thread_t &hw_thread) const {
|
||||
return sub_id[indexer(hw_thread)];
|
||||
}
|
||||
};
|
||||
|
||||
static kmp_str_buf_t *
|
||||
__kmp_hw_get_catalog_core_string(const kmp_hw_attr_t &attr, kmp_str_buf_t *buf,
|
||||
bool plural) {
|
||||
__kmp_str_buf_init(buf);
|
||||
if (attr.is_core_type_valid())
|
||||
__kmp_str_buf_print(buf, "%s %s",
|
||||
__kmp_hw_get_core_type_string(attr.get_core_type()),
|
||||
__kmp_hw_get_catalog_string(KMP_HW_CORE, plural));
|
||||
else
|
||||
__kmp_str_buf_print(buf, "%s eff=%d",
|
||||
__kmp_hw_get_catalog_string(KMP_HW_CORE, plural),
|
||||
attr.get_core_eff());
|
||||
return buf;
|
||||
}
|
||||
|
||||
// Apply the KMP_HW_SUBSET envirable to the topology
|
||||
// Returns true if KMP_HW_SUBSET filtered any processors
|
||||
// otherwise, returns false
|
||||
@@ -828,17 +944,23 @@ bool kmp_topology_t::filter_hw_subset() {
|
||||
__kmp_hw_subset->sort();
|
||||
|
||||
// Check to see if KMP_HW_SUBSET is a valid subset of the detected topology
|
||||
bool using_core_types = false;
|
||||
bool using_core_effs = false;
|
||||
int hw_subset_depth = __kmp_hw_subset->get_depth();
|
||||
kmp_hw_t specified[KMP_HW_LAST];
|
||||
int topology_levels[hw_subset_depth];
|
||||
KMP_ASSERT(hw_subset_depth > 0);
|
||||
KMP_FOREACH_HW_TYPE(i) { specified[i] = KMP_HW_UNKNOWN; }
|
||||
int core_level = get_level(KMP_HW_CORE);
|
||||
for (int i = 0; i < hw_subset_depth; ++i) {
|
||||
int max_count;
|
||||
int num = __kmp_hw_subset->at(i).num;
|
||||
int offset = __kmp_hw_subset->at(i).offset;
|
||||
kmp_hw_t type = __kmp_hw_subset->at(i).type;
|
||||
const kmp_hw_subset_t::item_t &item = __kmp_hw_subset->at(i);
|
||||
int num = item.num[0];
|
||||
int offset = item.offset[0];
|
||||
kmp_hw_t type = item.type;
|
||||
kmp_hw_t equivalent_type = equivalent[type];
|
||||
int level = get_level(type);
|
||||
topology_levels[i] = level;
|
||||
|
||||
// Check to see if current layer is in detected machine topology
|
||||
if (equivalent_type != KMP_HW_UNKNOWN) {
|
||||
@@ -849,8 +971,8 @@ bool kmp_topology_t::filter_hw_subset() {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check to see if current layer has already been specified
|
||||
// either directly or through an equivalent type
|
||||
// Check to see if current layer has already been
|
||||
// specified either directly or through an equivalent type
|
||||
if (specified[equivalent_type] != KMP_HW_UNKNOWN) {
|
||||
KMP_WARNING(AffHWSubsetEqvLayers, __kmp_hw_get_catalog_string(type),
|
||||
__kmp_hw_get_catalog_string(specified[equivalent_type]));
|
||||
@@ -866,41 +988,233 @@ bool kmp_topology_t::filter_hw_subset() {
|
||||
__kmp_hw_get_catalog_string(type, plural));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Apply the filtered hardware subset
|
||||
int new_index = 0;
|
||||
for (int i = 0; i < num_hw_threads; ++i) {
|
||||
kmp_hw_thread_t &hw_thread = hw_threads[i];
|
||||
// Check to see if this hardware thread should be filtered
|
||||
bool should_be_filtered = false;
|
||||
for (int level = 0, hw_subset_index = 0;
|
||||
level < depth && hw_subset_index < hw_subset_depth; ++level) {
|
||||
kmp_hw_t topology_type = types[level];
|
||||
auto hw_subset_item = __kmp_hw_subset->at(hw_subset_index);
|
||||
kmp_hw_t hw_subset_type = hw_subset_item.type;
|
||||
if (topology_type != hw_subset_type)
|
||||
continue;
|
||||
int num = hw_subset_item.num;
|
||||
int offset = hw_subset_item.offset;
|
||||
hw_subset_index++;
|
||||
if (hw_thread.sub_ids[level] < offset ||
|
||||
hw_thread.sub_ids[level] >= offset + num) {
|
||||
should_be_filtered = true;
|
||||
break;
|
||||
// Check to see if core attributes are consistent
|
||||
if (core_level == level) {
|
||||
// Determine which core attributes are specified
|
||||
for (int j = 0; j < item.num_attrs; ++j) {
|
||||
if (item.attr[j].is_core_type_valid())
|
||||
using_core_types = true;
|
||||
if (item.attr[j].is_core_eff_valid())
|
||||
using_core_effs = true;
|
||||
}
|
||||
|
||||
// Check if using a single core attribute on non-hybrid arch.
|
||||
// Do not ignore all of KMP_HW_SUBSET, just ignore the attribute.
|
||||
//
|
||||
// Check if using multiple core attributes on non-hyrbid arch.
|
||||
// Ignore all of KMP_HW_SUBSET if this is the case.
|
||||
if ((using_core_effs || using_core_types) && !__kmp_is_hybrid_cpu()) {
|
||||
if (item.num_attrs == 1) {
|
||||
if (using_core_effs) {
|
||||
KMP_WARNING(AffHWSubsetIgnoringAttr, "efficiency");
|
||||
} else {
|
||||
KMP_WARNING(AffHWSubsetIgnoringAttr, "core_type");
|
||||
}
|
||||
using_core_effs = false;
|
||||
using_core_types = false;
|
||||
} else {
|
||||
KMP_WARNING(AffHWSubsetAttrsNonHybrid);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Check if using both core types and core efficiencies together
|
||||
if (using_core_types && using_core_effs) {
|
||||
KMP_WARNING(AffHWSubsetIncompat, "core_type", "efficiency");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check that core efficiency values are valid
|
||||
if (using_core_effs) {
|
||||
for (int j = 0; j < item.num_attrs; ++j) {
|
||||
if (item.attr[j].is_core_eff_valid()) {
|
||||
int core_eff = item.attr[j].get_core_eff();
|
||||
if (core_eff < 0 || core_eff >= num_core_efficiencies) {
|
||||
kmp_str_buf_t buf;
|
||||
__kmp_str_buf_init(&buf);
|
||||
__kmp_str_buf_print(&buf, "%d", item.attr[j].get_core_eff());
|
||||
__kmp_msg(kmp_ms_warning,
|
||||
KMP_MSG(AffHWSubsetAttrInvalid, "efficiency", buf.str),
|
||||
KMP_HNT(ValidValuesRange, 0, num_core_efficiencies - 1),
|
||||
__kmp_msg_null);
|
||||
__kmp_str_buf_free(&buf);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check that the number of requested cores with attributes is valid
|
||||
if (using_core_types || using_core_effs) {
|
||||
for (int j = 0; j < item.num_attrs; ++j) {
|
||||
int num = item.num[j];
|
||||
int offset = item.offset[j];
|
||||
int level_above = core_level - 1;
|
||||
if (level_above >= 0) {
|
||||
max_count = get_ncores_with_attr_per(item.attr[j], level_above);
|
||||
if (max_count <= 0 || num + offset > max_count) {
|
||||
kmp_str_buf_t buf;
|
||||
__kmp_hw_get_catalog_core_string(item.attr[j], &buf, num > 0);
|
||||
KMP_WARNING(AffHWSubsetManyGeneric, buf.str);
|
||||
__kmp_str_buf_free(&buf);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ((using_core_types || using_core_effs) && item.num_attrs > 1) {
|
||||
for (int j = 0; j < item.num_attrs; ++j) {
|
||||
// Ambiguous use of specific core attribute + generic core
|
||||
// e.g., 4c & 3c:intel_core or 4c & 3c:eff1
|
||||
if (!item.attr[j]) {
|
||||
kmp_hw_attr_t other_attr;
|
||||
for (int k = 0; k < item.num_attrs; ++k) {
|
||||
if (item.attr[k] != item.attr[j]) {
|
||||
other_attr = item.attr[k];
|
||||
break;
|
||||
}
|
||||
}
|
||||
kmp_str_buf_t buf;
|
||||
__kmp_hw_get_catalog_core_string(other_attr, &buf, item.num[j] > 0);
|
||||
KMP_WARNING(AffHWSubsetIncompat,
|
||||
__kmp_hw_get_catalog_string(KMP_HW_CORE), buf.str);
|
||||
__kmp_str_buf_free(&buf);
|
||||
return false;
|
||||
}
|
||||
// Allow specifying a specific core type or core eff exactly once
|
||||
for (int k = 0; k < j; ++k) {
|
||||
if (!item.attr[j] || !item.attr[k])
|
||||
continue;
|
||||
if (item.attr[k] == item.attr[j]) {
|
||||
kmp_str_buf_t buf;
|
||||
__kmp_hw_get_catalog_core_string(item.attr[j], &buf,
|
||||
item.num[j] > 0);
|
||||
KMP_WARNING(AffHWSubsetAttrRepeat, buf.str);
|
||||
__kmp_str_buf_free(&buf);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!should_be_filtered) {
|
||||
}
|
||||
|
||||
struct core_type_indexer {
|
||||
int operator()(const kmp_hw_thread_t &t) const {
|
||||
switch (t.attrs.get_core_type()) {
|
||||
case KMP_HW_CORE_TYPE_ATOM:
|
||||
return 1;
|
||||
case KMP_HW_CORE_TYPE_CORE:
|
||||
return 2;
|
||||
case KMP_HW_CORE_TYPE_UNKNOWN:
|
||||
return 0;
|
||||
}
|
||||
KMP_ASSERT(0);
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
struct core_eff_indexer {
|
||||
int operator()(const kmp_hw_thread_t &t) const {
|
||||
return t.attrs.get_core_eff();
|
||||
}
|
||||
};
|
||||
|
||||
kmp_sub_ids_t<KMP_HW_MAX_NUM_CORE_TYPES, core_type_indexer> core_type_sub_ids(
|
||||
core_level);
|
||||
kmp_sub_ids_t<KMP_HW_MAX_NUM_CORE_EFFS, core_eff_indexer> core_eff_sub_ids(
|
||||
core_level);
|
||||
|
||||
// Determine which hardware threads should be filtered.
|
||||
int num_filtered = 0;
|
||||
bool *filtered = (bool *)__kmp_allocate(sizeof(bool) * num_hw_threads);
|
||||
for (int i = 0; i < num_hw_threads; ++i) {
|
||||
kmp_hw_thread_t &hw_thread = hw_threads[i];
|
||||
// Update type_sub_id
|
||||
if (using_core_types)
|
||||
core_type_sub_ids.update(hw_thread);
|
||||
if (using_core_effs)
|
||||
core_eff_sub_ids.update(hw_thread);
|
||||
|
||||
// Check to see if this hardware thread should be filtered
|
||||
bool should_be_filtered = false;
|
||||
for (int hw_subset_index = 0; hw_subset_index < hw_subset_depth;
|
||||
++hw_subset_index) {
|
||||
const auto &hw_subset_item = __kmp_hw_subset->at(hw_subset_index);
|
||||
int level = topology_levels[hw_subset_index];
|
||||
if (level == -1)
|
||||
continue;
|
||||
if ((using_core_effs || using_core_types) && level == core_level) {
|
||||
// Look for the core attribute in KMP_HW_SUBSET which corresponds
|
||||
// to this hardware thread's core attribute. Use this num,offset plus
|
||||
// the running sub_id for the particular core attribute of this hardware
|
||||
// thread to determine if the hardware thread should be filtered or not.
|
||||
int attr_idx;
|
||||
kmp_hw_core_type_t core_type = hw_thread.attrs.get_core_type();
|
||||
int core_eff = hw_thread.attrs.get_core_eff();
|
||||
for (attr_idx = 0; attr_idx < hw_subset_item.num_attrs; ++attr_idx) {
|
||||
if (using_core_types &&
|
||||
hw_subset_item.attr[attr_idx].get_core_type() == core_type)
|
||||
break;
|
||||
if (using_core_effs &&
|
||||
hw_subset_item.attr[attr_idx].get_core_eff() == core_eff)
|
||||
break;
|
||||
}
|
||||
// This core attribute isn't in the KMP_HW_SUBSET so always filter it.
|
||||
if (attr_idx == hw_subset_item.num_attrs) {
|
||||
should_be_filtered = true;
|
||||
break;
|
||||
}
|
||||
int sub_id;
|
||||
int num = hw_subset_item.num[attr_idx];
|
||||
int offset = hw_subset_item.offset[attr_idx];
|
||||
if (using_core_types)
|
||||
sub_id = core_type_sub_ids.get_sub_id(hw_thread);
|
||||
else
|
||||
sub_id = core_eff_sub_ids.get_sub_id(hw_thread);
|
||||
if (sub_id < offset || sub_id >= offset + num) {
|
||||
should_be_filtered = true;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
int num = hw_subset_item.num[0];
|
||||
int offset = hw_subset_item.offset[0];
|
||||
if (hw_thread.sub_ids[level] < offset ||
|
||||
hw_thread.sub_ids[level] >= offset + num) {
|
||||
should_be_filtered = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Collect filtering information
|
||||
filtered[i] = should_be_filtered;
|
||||
if (should_be_filtered)
|
||||
num_filtered++;
|
||||
}
|
||||
|
||||
// One last check that we shouldn't allow filtering entire machine
|
||||
if (num_filtered == num_hw_threads) {
|
||||
KMP_WARNING(AffHWSubsetAllFiltered);
|
||||
__kmp_free(filtered);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Apply the filter
|
||||
int new_index = 0;
|
||||
for (int i = 0; i < num_hw_threads; ++i) {
|
||||
if (!filtered[i]) {
|
||||
if (i != new_index)
|
||||
hw_threads[new_index] = hw_thread;
|
||||
hw_threads[new_index] = hw_threads[i];
|
||||
new_index++;
|
||||
} else {
|
||||
#if KMP_AFFINITY_SUPPORTED
|
||||
KMP_CPU_CLR(hw_thread.os_id, __kmp_affin_fullMask);
|
||||
KMP_CPU_CLR(hw_threads[i].os_id, __kmp_affin_fullMask);
|
||||
#endif
|
||||
__kmp_avail_proc--;
|
||||
}
|
||||
}
|
||||
|
||||
KMP_DEBUG_ASSERT(new_index <= num_hw_threads);
|
||||
num_hw_threads = new_index;
|
||||
|
||||
@@ -909,6 +1223,7 @@ bool kmp_topology_t::filter_hw_subset() {
|
||||
_discover_uniformity();
|
||||
_set_globals();
|
||||
_set_last_level_cache();
|
||||
__kmp_free(filtered);
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -1461,8 +1776,10 @@ static bool __kmp_affinity_create_hwloc_map(kmp_i18n_id_t *const msg_id) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (cpukind_index >= 0)
|
||||
hw_thread.core_type = cpukinds[cpukind_index].core_type;
|
||||
if (cpukind_index >= 0) {
|
||||
hw_thread.attrs.set_core_type(cpukinds[cpukind_index].core_type);
|
||||
hw_thread.attrs.set_core_eff(cpukinds[cpukind_index].efficiency);
|
||||
}
|
||||
}
|
||||
index--;
|
||||
}
|
||||
@@ -2040,11 +2357,21 @@ static bool __kmp_affinity_create_apicid_map(kmp_i18n_id_t *const msg_id) {
|
||||
|
||||
// Hybrid cpu detection using CPUID.1A
|
||||
// Thread should be pinned to processor already
|
||||
static void __kmp_get_hybrid_info(kmp_hw_core_type_t *type,
|
||||
static void __kmp_get_hybrid_info(kmp_hw_core_type_t *type, int *efficiency,
|
||||
unsigned *native_model_id) {
|
||||
kmp_cpuid buf;
|
||||
__kmp_x86_cpuid(0x1a, 0, &buf);
|
||||
*type = (kmp_hw_core_type_t)__kmp_extract_bits<24, 31>(buf.eax);
|
||||
switch (*type) {
|
||||
case KMP_HW_CORE_TYPE_ATOM:
|
||||
*efficiency = 0;
|
||||
break;
|
||||
case KMP_HW_CORE_TYPE_CORE:
|
||||
*efficiency = 1;
|
||||
break;
|
||||
default:
|
||||
*efficiency = 0;
|
||||
}
|
||||
*native_model_id = __kmp_extract_bits<0, 23>(buf.eax);
|
||||
}
|
||||
|
||||
@@ -2321,8 +2648,10 @@ static bool __kmp_affinity_create_x2apicid_map(kmp_i18n_id_t *const msg_id) {
|
||||
if (__kmp_is_hybrid_cpu() && highest_leaf >= 0x1a) {
|
||||
kmp_hw_core_type_t type;
|
||||
unsigned native_model_id;
|
||||
__kmp_get_hybrid_info(&type, &native_model_id);
|
||||
hw_thread.core_type = type;
|
||||
int efficiency;
|
||||
__kmp_get_hybrid_info(&type, &efficiency, &native_model_id);
|
||||
hw_thread.attrs.set_core_type(type);
|
||||
hw_thread.attrs.set_core_eff(efficiency);
|
||||
}
|
||||
hw_thread_index++;
|
||||
}
|
||||
|
||||
@@ -598,16 +598,62 @@ class KMPNativeAffinity : public KMPAffinity {
|
||||
#endif /* KMP_OS_WINDOWS */
|
||||
#endif /* KMP_AFFINITY_SUPPORTED */
|
||||
|
||||
typedef enum kmp_hw_core_type_t {
|
||||
KMP_HW_CORE_TYPE_UNKNOWN = 0x0,
|
||||
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
|
||||
KMP_HW_CORE_TYPE_ATOM = 0x20,
|
||||
KMP_HW_CORE_TYPE_CORE = 0x40,
|
||||
KMP_HW_MAX_NUM_CORE_TYPES = 3,
|
||||
#else
|
||||
KMP_HW_MAX_NUM_CORE_TYPES = 1,
|
||||
#endif
|
||||
} kmp_hw_core_type_t;
|
||||
// Describe an attribute for a level in the machine topology
|
||||
struct kmp_hw_attr_t {
|
||||
int core_type : 8;
|
||||
int core_eff : 8;
|
||||
unsigned valid : 1;
|
||||
unsigned reserved : 15;
|
||||
|
||||
static const int UNKNOWN_CORE_EFF = -1;
|
||||
|
||||
kmp_hw_attr_t()
|
||||
: core_type(KMP_HW_CORE_TYPE_UNKNOWN), core_eff(UNKNOWN_CORE_EFF),
|
||||
valid(0), reserved(0) {}
|
||||
void set_core_type(kmp_hw_core_type_t type) {
|
||||
valid = 1;
|
||||
core_type = type;
|
||||
}
|
||||
void set_core_eff(int eff) {
|
||||
valid = 1;
|
||||
core_eff = eff;
|
||||
}
|
||||
kmp_hw_core_type_t get_core_type() const {
|
||||
return (kmp_hw_core_type_t)core_type;
|
||||
}
|
||||
int get_core_eff() const { return core_eff; }
|
||||
bool is_core_type_valid() const {
|
||||
return core_type != KMP_HW_CORE_TYPE_UNKNOWN;
|
||||
}
|
||||
bool is_core_eff_valid() const { return core_eff != UNKNOWN_CORE_EFF; }
|
||||
operator bool() const { return valid; }
|
||||
void clear() {
|
||||
core_type = KMP_HW_CORE_TYPE_UNKNOWN;
|
||||
core_eff = UNKNOWN_CORE_EFF;
|
||||
valid = 0;
|
||||
}
|
||||
bool contains(const kmp_hw_attr_t &other) const {
|
||||
if (!valid && !other.valid)
|
||||
return true;
|
||||
if (valid && other.valid) {
|
||||
if (other.is_core_type_valid()) {
|
||||
if (!is_core_type_valid() || (get_core_type() != other.get_core_type()))
|
||||
return false;
|
||||
}
|
||||
if (other.is_core_eff_valid()) {
|
||||
if (!is_core_eff_valid() || (get_core_eff() != other.get_core_eff()))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
bool operator==(const kmp_hw_attr_t &rhs) const {
|
||||
return (rhs.valid == valid && rhs.core_eff == core_eff &&
|
||||
rhs.core_type == core_type);
|
||||
}
|
||||
bool operator!=(const kmp_hw_attr_t &rhs) const { return !operator==(rhs); }
|
||||
};
|
||||
|
||||
class kmp_hw_thread_t {
|
||||
public:
|
||||
@@ -618,14 +664,14 @@ public:
|
||||
int sub_ids[KMP_HW_LAST];
|
||||
bool leader;
|
||||
int os_id;
|
||||
kmp_hw_core_type_t core_type;
|
||||
kmp_hw_attr_t attrs;
|
||||
|
||||
void print() const;
|
||||
void clear() {
|
||||
for (int i = 0; i < (int)KMP_HW_LAST; ++i)
|
||||
ids[i] = UNKNOWN_ID;
|
||||
leader = false;
|
||||
core_type = KMP_HW_CORE_TYPE_UNKNOWN;
|
||||
attrs.clear();
|
||||
}
|
||||
};
|
||||
|
||||
@@ -653,10 +699,11 @@ class kmp_topology_t {
|
||||
// Storage containing the absolute number of each topology layer
|
||||
int *count;
|
||||
|
||||
// Storage containing the core types and the number of
|
||||
// each core type for hybrid processors
|
||||
// The number of core efficiencies. This is only useful for hybrid
|
||||
// topologies. Core efficiencies will range from 0 to num efficiencies - 1
|
||||
int num_core_efficiencies;
|
||||
int num_core_types;
|
||||
kmp_hw_core_type_t core_types[KMP_HW_MAX_NUM_CORE_TYPES];
|
||||
int core_types_count[KMP_HW_MAX_NUM_CORE_TYPES];
|
||||
|
||||
// The hardware threads array
|
||||
// hw_threads is num_hw_threads long
|
||||
@@ -704,19 +751,11 @@ class kmp_topology_t {
|
||||
// Set the last level cache equivalent type
|
||||
void _set_last_level_cache();
|
||||
|
||||
// Increments the number of cores of type 'type'
|
||||
void _increment_core_type(kmp_hw_core_type_t type) {
|
||||
for (int i = 0; i < KMP_HW_MAX_NUM_CORE_TYPES; ++i) {
|
||||
if (core_types[i] == KMP_HW_CORE_TYPE_UNKNOWN) {
|
||||
core_types[i] = type;
|
||||
core_types_count[i] = 1;
|
||||
break;
|
||||
} else if (core_types[i] == type) {
|
||||
core_types_count[i]++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Return the number of cores with a particular attribute, 'attr'.
|
||||
// If 'find_all' is true, then find all cores on the machine, otherwise find
|
||||
// all cores per the layer 'above'
|
||||
int _get_ncores_with_attr(const kmp_hw_attr_t &attr, int above,
|
||||
bool find_all = false) const;
|
||||
|
||||
public:
|
||||
// Force use of allocate()/deallocate()
|
||||
@@ -807,6 +846,16 @@ public:
|
||||
KMP_DEBUG_ASSERT(level >= 0 && level < depth);
|
||||
return count[level];
|
||||
}
|
||||
// Return the total number of cores with attribute 'attr'
|
||||
int get_ncores_with_attr(const kmp_hw_attr_t &attr) const {
|
||||
return _get_ncores_with_attr(attr, -1, true);
|
||||
}
|
||||
// Return the number of cores with attribute
|
||||
// 'attr' per topology level 'above'
|
||||
int get_ncores_with_attr_per(const kmp_hw_attr_t &attr, int above) const {
|
||||
return _get_ncores_with_attr(attr, above, false);
|
||||
}
|
||||
|
||||
#if KMP_AFFINITY_SUPPORTED
|
||||
void sort_compact() {
|
||||
qsort(hw_threads, num_hw_threads, sizeof(kmp_hw_thread_t),
|
||||
@@ -819,11 +868,16 @@ public:
|
||||
extern kmp_topology_t *__kmp_topology;
|
||||
|
||||
class kmp_hw_subset_t {
|
||||
const static size_t MAX_ATTRS = KMP_HW_MAX_NUM_CORE_EFFS;
|
||||
|
||||
public:
|
||||
// Describe a machine topology item in KMP_HW_SUBSET
|
||||
struct item_t {
|
||||
int num;
|
||||
kmp_hw_t type;
|
||||
int offset;
|
||||
int num_attrs;
|
||||
int num[MAX_ATTRS];
|
||||
int offset[MAX_ATTRS];
|
||||
kmp_hw_attr_t attr[MAX_ATTRS];
|
||||
};
|
||||
|
||||
private:
|
||||
@@ -869,7 +923,20 @@ public:
|
||||
}
|
||||
void set_absolute() { absolute = true; }
|
||||
bool is_absolute() const { return absolute; }
|
||||
void push_back(int num, kmp_hw_t type, int offset) {
|
||||
void push_back(int num, kmp_hw_t type, int offset, kmp_hw_attr_t attr) {
|
||||
for (int i = 0; i < depth; ++i) {
|
||||
// Found an existing item for this layer type
|
||||
// Add the num, offset, and attr to this item
|
||||
if (items[i].type == type) {
|
||||
int idx = items[i].num_attrs++;
|
||||
if ((size_t)idx >= MAX_ATTRS)
|
||||
return;
|
||||
items[i].num[idx] = num;
|
||||
items[i].offset[idx] = offset;
|
||||
items[i].attr[idx] = attr;
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (depth == capacity - 1) {
|
||||
capacity *= 2;
|
||||
item_t *new_items = (item_t *)__kmp_allocate(sizeof(item_t) * capacity);
|
||||
@@ -878,9 +945,11 @@ public:
|
||||
__kmp_free(items);
|
||||
items = new_items;
|
||||
}
|
||||
items[depth].num = num;
|
||||
items[depth].num_attrs = 1;
|
||||
items[depth].type = type;
|
||||
items[depth].offset = offset;
|
||||
items[depth].num[0] = num;
|
||||
items[depth].offset[0] = offset;
|
||||
items[depth].attr[0] = attr;
|
||||
depth++;
|
||||
set |= (1ull << type);
|
||||
}
|
||||
@@ -912,8 +981,19 @@ public:
|
||||
printf("* depth: %d\n", depth);
|
||||
printf("* items:\n");
|
||||
for (int i = 0; i < depth; ++i) {
|
||||
printf("num: %d, type: %s, offset: %d\n", items[i].num,
|
||||
__kmp_hw_get_keyword(items[i].type), items[i].offset);
|
||||
printf(" type: %s\n", __kmp_hw_get_keyword(items[i].type));
|
||||
for (int j = 0; j < items[i].num_attrs; ++j) {
|
||||
printf(" num: %d, offset: %d, attr: ", items[i].num[j],
|
||||
items[i].offset[j]);
|
||||
if (!items[i].attr[j]) {
|
||||
printf(" (none)\n");
|
||||
} else {
|
||||
printf(
|
||||
" core_type = %s, core_eff = %d\n",
|
||||
__kmp_hw_get_core_type_string(items[i].attr[j].get_core_type()),
|
||||
items[i].attr[j].get_core_eff());
|
||||
}
|
||||
}
|
||||
}
|
||||
printf("* set: 0x%llx\n", set);
|
||||
printf("* absolute: %d\n", absolute);
|
||||
|
||||
@@ -4961,28 +4961,76 @@ static void __kmp_stg_parse_hw_subset(char const *name, char const *value,
|
||||
|
||||
// Check each component
|
||||
for (int i = 0; i < level; ++i) {
|
||||
int offset = 0;
|
||||
int num = atoi(components[i]); // each component should start with a number
|
||||
if (num <= 0) {
|
||||
goto err; // only positive integers are valid for count
|
||||
int core_level = 0;
|
||||
char *core_components[MAX_T_LEVEL];
|
||||
// Split possible core components by '&' delimiter
|
||||
pos = components[i];
|
||||
core_components[core_level++] = pos;
|
||||
while ((pos = strchr(pos, '&'))) {
|
||||
if (core_level >= MAX_T_LEVEL)
|
||||
goto err; // too many different core types
|
||||
*pos = '\0'; // modify input and avoid more copying
|
||||
core_components[core_level++] = ++pos; // expect something after '&'
|
||||
}
|
||||
if ((pos = strchr(components[i], '@'))) {
|
||||
offset = atoi(pos + 1); // save offset
|
||||
*pos = '\0'; // cut the offset from the component
|
||||
|
||||
for (int j = 0; j < core_level; ++j) {
|
||||
char *offset_ptr;
|
||||
char *attr_ptr;
|
||||
int offset = 0;
|
||||
kmp_hw_attr_t attr;
|
||||
int num =
|
||||
atoi(core_components[j]); // each component should start with a number
|
||||
if (num <= 0) {
|
||||
goto err; // only positive integers are valid for count
|
||||
}
|
||||
|
||||
offset_ptr = strchr(core_components[j], '@');
|
||||
attr_ptr = strchr(core_components[j], ':');
|
||||
|
||||
if (offset_ptr) {
|
||||
offset = atoi(offset_ptr + 1); // save offset
|
||||
*offset_ptr = '\0'; // cut the offset from the component
|
||||
}
|
||||
if (attr_ptr) {
|
||||
attr.clear();
|
||||
// save the attribute
|
||||
if (__kmp_str_match("intel_core", -1, attr_ptr + 1)) {
|
||||
attr.set_core_type(KMP_HW_CORE_TYPE_CORE);
|
||||
} else if (__kmp_str_match("intel_atom", -1, attr_ptr + 1)) {
|
||||
attr.set_core_type(KMP_HW_CORE_TYPE_ATOM);
|
||||
} else if (__kmp_str_match("eff", 3, attr_ptr + 1)) {
|
||||
const char *number = attr_ptr + 1;
|
||||
// skip the eff[iciency] token
|
||||
while (isalpha(*number))
|
||||
number++;
|
||||
if (!isdigit(*number)) {
|
||||
goto err;
|
||||
}
|
||||
int efficiency = atoi(number);
|
||||
attr.set_core_eff(efficiency);
|
||||
} else {
|
||||
goto err;
|
||||
}
|
||||
*attr_ptr = '\0'; // cut the attribute from the component
|
||||
}
|
||||
pos = core_components[j] + strspn(core_components[j], digits);
|
||||
if (pos == core_components[j]) {
|
||||
goto err;
|
||||
}
|
||||
// detect the component type
|
||||
kmp_hw_t type = __kmp_stg_parse_hw_subset_name(pos);
|
||||
if (type == KMP_HW_UNKNOWN) {
|
||||
goto err;
|
||||
}
|
||||
// Only the core type can have attributes
|
||||
if (attr && type != KMP_HW_CORE)
|
||||
goto err;
|
||||
// Must allow core be specified more than once
|
||||
if (type != KMP_HW_CORE && __kmp_hw_subset->specified(type)) {
|
||||
goto err;
|
||||
}
|
||||
__kmp_hw_subset->push_back(num, type, offset, attr);
|
||||
}
|
||||
pos = components[i] + strspn(components[i], digits);
|
||||
if (pos == components[i]) {
|
||||
goto err;
|
||||
}
|
||||
// detect the component type
|
||||
kmp_hw_t type = __kmp_stg_parse_hw_subset_name(pos);
|
||||
if (type == KMP_HW_UNKNOWN) {
|
||||
goto err;
|
||||
}
|
||||
if (__kmp_hw_subset->specified(type)) {
|
||||
goto err;
|
||||
}
|
||||
__kmp_hw_subset->push_back(num, type, offset);
|
||||
}
|
||||
return;
|
||||
err:
|
||||
@@ -4994,6 +5042,21 @@ err:
|
||||
return;
|
||||
}
|
||||
|
||||
static inline const char *
|
||||
__kmp_hw_get_core_type_keyword(kmp_hw_core_type_t type) {
|
||||
switch (type) {
|
||||
case KMP_HW_CORE_TYPE_UNKNOWN:
|
||||
return "unknown";
|
||||
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
|
||||
case KMP_HW_CORE_TYPE_ATOM:
|
||||
return "intel_atom";
|
||||
case KMP_HW_CORE_TYPE_CORE:
|
||||
return "intel_core";
|
||||
#endif
|
||||
}
|
||||
return "unknown";
|
||||
}
|
||||
|
||||
static void __kmp_stg_print_hw_subset(kmp_str_buf_t *buffer, char const *name,
|
||||
void *data) {
|
||||
kmp_str_buf_t buf;
|
||||
@@ -5009,10 +5072,20 @@ static void __kmp_stg_print_hw_subset(kmp_str_buf_t *buffer, char const *name,
|
||||
depth = __kmp_hw_subset->get_depth();
|
||||
for (int i = 0; i < depth; ++i) {
|
||||
const auto &item = __kmp_hw_subset->at(i);
|
||||
__kmp_str_buf_print(&buf, "%s%d%s", (i > 0 ? "," : ""), item.num,
|
||||
__kmp_hw_get_keyword(item.type));
|
||||
if (item.offset)
|
||||
__kmp_str_buf_print(&buf, "@%d", item.offset);
|
||||
if (i > 0)
|
||||
__kmp_str_buf_print(&buf, "%c", ',');
|
||||
for (int j = 0; j < item.num_attrs; ++j) {
|
||||
__kmp_str_buf_print(&buf, "%s%d%s", (j > 0 ? "&" : ""), item.num[j],
|
||||
__kmp_hw_get_keyword(item.type));
|
||||
if (item.attr[j].is_core_type_valid())
|
||||
__kmp_str_buf_print(
|
||||
&buf, ":%s",
|
||||
__kmp_hw_get_core_type_keyword(item.attr[j].get_core_type()));
|
||||
if (item.attr[j].is_core_eff_valid())
|
||||
__kmp_str_buf_print(&buf, ":eff%d", item.attr[j].get_core_eff());
|
||||
if (item.offset[j])
|
||||
__kmp_str_buf_print(&buf, "@%d", item.offset[j]);
|
||||
}
|
||||
}
|
||||
__kmp_str_buf_print(buffer, "%s'\n", buf.str);
|
||||
__kmp_str_buf_free(&buf);
|
||||
|
||||
Reference in New Issue
Block a user