Fix Metric Activation Procedure

This patch implements Deactivate previously activated metric groups,
if they are not part of the current list

Related-To: LOCI-3160

Signed-off-by: Joshua Santosh Ranjan <joshua.santosh.ranjan@intel.com>
This commit is contained in:
Joshua Santosh Ranjan
2022-07-04 11:02:11 +00:00
committed by Compute-Runtime-Automation
parent 6cb44ae0d9
commit 6292a3304a
4 changed files with 56 additions and 36 deletions

View File

@ -918,17 +918,14 @@ ze_result_t DeviceImp::systemBarrier() { return ZE_RESULT_ERROR_UNSUPPORTED_FEAT
ze_result_t DeviceImp::activateMetricGroupsDeferred(uint32_t count,
zet_metric_group_handle_t *phMetricGroups) {
ze_result_t result = ZE_RESULT_ERROR_UNKNOWN;
if (!this->isSubdevice && this->isImplicitScalingCapable()) {
for (auto subDevice : this->subDevices) {
result = subDevice->getMetricDeviceContext().activateMetricGroupsDeferred(count, phMetricGroups);
if (result != ZE_RESULT_SUCCESS)
break;
subDevice->getMetricDeviceContext().activateMetricGroupsDeferred(count, phMetricGroups);
}
} else {
result = metricContext->activateMetricGroupsDeferred(count, phMetricGroups);
metricContext->activateMetricGroupsDeferred(count, phMetricGroups);
}
return result;
return ZE_RESULT_SUCCESS;
}
void *DeviceImp::getExecEnvironment() { return execEnvironment; }

View File

@ -83,14 +83,40 @@ ze_result_t MetricDeviceContext::metricGroupGet(uint32_t *pCount, zet_metric_gro
return result;
}
ze_result_t MetricDeviceContext::activateMetricGroupsDeferred(uint32_t count, zet_metric_group_handle_t *phMetricGroups) {
void MetricDeviceContext::activateMetricGroupsDeferred(uint32_t count, zet_metric_group_handle_t *phMetricGroups) {
// Activation: postpone until zetMetricStreamerOpen or zeCommandQueueExecuteCommandLists
// Deactivation: execute immediately.
if (phMetricGroups == nullptr) {
return deActivateAllDomains();
deActivateAllDomains();
return;
}
auto isMetricGroupProvided = [phMetricGroups, count](const zet_metric_group_handle_t hMetricGroup) {
for (auto index = 0u; index < count; index++) {
if (hMetricGroup == phMetricGroups[index]) {
return true;
}
}
return false;
};
// Deactive existing metric groups which are not provided in phMetricGroups
std::vector<uint32_t> deactivateList = {};
for (const auto &[domainId, metricGroupPair] : domains) {
const auto &hMetricGroup = metricGroupPair.first;
if (isMetricGroupProvided(hMetricGroup) == false) {
deActivateDomain(domainId);
deactivateList.push_back(domainId);
}
}
// Remove deactivated ones from the map
for (const auto &domainId : deactivateList) {
domains.erase(domainId);
}
// Activate-deferred new metric groups if any
for (auto index = 0u; index < count; index++) {
zet_metric_group_handle_t hMetricGroup = MetricGroup::fromHandle(phMetricGroups[index])->getMetricGroupForSubDevice(subDeviceIndex);
@ -102,17 +128,9 @@ ze_result_t MetricDeviceContext::activateMetricGroupsDeferred(uint32_t count, ze
continue;
}
// Domain empty; So create new deactiavted association.
if (domains[domain].first == nullptr) {
domains[domain].first = hMetricGroup;
domains[domain].second = false;
continue;
}
// Attempt to overwrite a previous association is an error.
return ZE_RESULT_ERROR_UNKNOWN;
domains[domain].first = hMetricGroup;
domains[domain].second = false;
}
return ZE_RESULT_SUCCESS;
}
ze_result_t MetricDeviceContext::activateAllDomains() {
@ -125,16 +143,18 @@ ze_result_t MetricDeviceContext::activateAllDomains() {
return ZE_RESULT_SUCCESS;
}
ze_result_t MetricDeviceContext::deActivateAllDomains() {
void MetricDeviceContext::deActivateDomain(uint32_t domain) {
auto &metricGroupPair = domains[domain];
if (metricGroupPair.second == true) {
MetricGroup::fromHandle(metricGroupPair.first)->deactivate();
}
}
void MetricDeviceContext::deActivateAllDomains() {
for (auto &entry : domains) {
auto &metricGroup = entry.second;
if (metricGroup.second == true) {
MetricGroup::fromHandle(metricGroup.first)->deactivate();
}
metricGroup = {};
deActivateDomain(entry.first);
}
domains.clear();
return ZE_RESULT_SUCCESS;
}
ze_result_t MetricDeviceContext::appendMetricMemoryBarrier(CommandList &commandList) {

View File

@ -42,7 +42,7 @@ class MetricDeviceContext {
public:
MetricDeviceContext(Device &device);
ze_result_t metricGroupGet(uint32_t *pCount, zet_metric_group_handle_t *phMetricGroups);
ze_result_t activateMetricGroupsDeferred(uint32_t count, zet_metric_group_handle_t *phMetricGroups);
void activateMetricGroupsDeferred(uint32_t count, zet_metric_group_handle_t *phMetricGroups);
ze_result_t activateMetricGroups();
ze_result_t appendMetricMemoryBarrier(CommandList &commandList);
bool isMetricGroupActivated(const zet_metric_group_handle_t hMetricGroup) const;
@ -60,7 +60,8 @@ class MetricDeviceContext {
private:
bool enable();
ze_result_t activateAllDomains();
ze_result_t deActivateAllDomains();
void deActivateAllDomains();
void deActivateDomain(uint32_t domain);
struct Device &device;
std::map<uint32_t, std::pair<zet_metric_group_handle_t, bool>> domains;
bool multiDeviceCapable = false;

View File

@ -969,7 +969,7 @@ TEST_F(MultiDeviceMetricEnumerationTest, givenMultipleDevicesAndValidEventBasedM
EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), devices[0]->toHandle(), 1, &metricGroupHandle), ZE_RESULT_SUCCESS);
}
TEST_F(MultiDeviceMetricEnumerationTest, givenMultipleDevicesAndTwoMetricGroupsWithTheSameDomainsWhenzetContextActivateMetricGroupsIsCalledThenReturnsFail) {
TEST_F(MultiDeviceMetricEnumerationTest, givenMultipleDevicesAndTwoMetricGroupsWithTheSameDomainsWhenzetContextActivateMetricGroupsIsCalledThenReturnsSuccess) {
auto &deviceImp = *static_cast<DeviceImp *>(devices[0]);
const uint32_t subDeviceCount = static_cast<uint32_t>(deviceImp.subDevices.size());
@ -1084,7 +1084,7 @@ TEST_F(MultiDeviceMetricEnumerationTest, givenMultipleDevicesAndTwoMetricGroupsW
EXPECT_EQ(ZE_RESULT_SUCCESS, zetMetricGroupGetProperties(metricGroupHandles[1], &properties1));
// Activate metric groups.
EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), devices[0]->toHandle(), 4, metricGroupHandles.data()), ZE_RESULT_ERROR_UNKNOWN);
EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), devices[0]->toHandle(), 2, metricGroupHandles.data()), ZE_RESULT_SUCCESS);
}
TEST_F(MetricEnumerationTest, givenValidTimeBasedMetricGroupWhenzetContextActivateMetricGroupsIsCalledThenReturnsSuccess) {
@ -1381,7 +1381,7 @@ TEST_F(MetricEnumerationTest, givenActivateTwoMetricGroupsWithDifferentDomainsAt
EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device->toHandle(), 2, metricGroupHandle), ZE_RESULT_SUCCESS);
}
TEST_F(MetricEnumerationTest, givenActivateTwoMetricGroupsWithTheSameDomainsWhenzetContextActivateMetricGroupsIsCalledThenReturnsFail) {
TEST_F(MetricEnumerationTest, givenActivateTwoMetricGroupsWithTheSameDomainsWhenzetContextActivateMetricGroupsIsCalledThenReturnsSuccess) {
// Metrics Discovery device.
metricsDeviceParams.ConcurrentGroupsCount = 1;
@ -1447,8 +1447,8 @@ TEST_F(MetricEnumerationTest, givenActivateTwoMetricGroupsWithTheSameDomainsWhen
// Activate two metric groups with a different domains.
EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device->toHandle(), 1, &metricGroupHandle[0]), ZE_RESULT_SUCCESS);
EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device->toHandle(), 1, &metricGroupHandle[1]), ZE_RESULT_ERROR_UNKNOWN);
EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device->toHandle(), 2, metricGroupHandle), ZE_RESULT_ERROR_UNKNOWN);
EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device->toHandle(), 1, &metricGroupHandle[1]), ZE_RESULT_SUCCESS);
EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device->toHandle(), 2, metricGroupHandle), ZE_RESULT_SUCCESS);
}
TEST_F(MetricEnumerationTest, givenValidMetricGroupWhenDeactivateIsDoneThenDomainsAreCleared) {
@ -1503,7 +1503,7 @@ TEST_F(MetricEnumerationTest, givenValidMetricGroupWhenDeactivateIsDoneThenDomai
EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device->toHandle(), 0, nullptr), ZE_RESULT_SUCCESS);
}
TEST_F(MetricEnumerationTest, givenDeactivateTestsWhenzetContextActivateMetricGroupsIsCalledThenReturnsApropriateResults) {
TEST_F(MetricEnumerationTest, GivenAlreadyActivatedMetricGroupWhenzetContextActivateMetricGroupsIsCalledThenReturnSuccess) {
// Metrics Discovery device.
metricsDeviceParams.ConcurrentGroupsCount = 1;
@ -1569,14 +1569,16 @@ TEST_F(MetricEnumerationTest, givenDeactivateTestsWhenzetContextActivateMetricGr
// Activate two metric groups with a different domains.
EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device->toHandle(), 1, &metricGroupHandle[0]), ZE_RESULT_SUCCESS);
EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device->toHandle(), 1, &metricGroupHandle[1]), ZE_RESULT_ERROR_UNKNOWN);
EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device->toHandle(), 2, metricGroupHandle), ZE_RESULT_ERROR_UNKNOWN);
device->activateMetricGroups();
EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device->toHandle(), 1, &metricGroupHandle[1]), ZE_RESULT_SUCCESS);
EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device->toHandle(), 2, metricGroupHandle), ZE_RESULT_SUCCESS);
// Deactivate all.
EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device->toHandle(), 0, nullptr), ZE_RESULT_SUCCESS);
// Activate two metric groups at once.
EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device->toHandle(), 2, metricGroupHandle), ZE_RESULT_ERROR_UNKNOWN);
EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device->toHandle(), 2, metricGroupHandle), ZE_RESULT_SUCCESS);
// Deactivate all.
EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device->toHandle(), 0, nullptr), ZE_RESULT_SUCCESS);