Fix profiling calculation logic

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2022-02-01 15:48:44 +00:00
committed by Compute-Runtime-Automation
parent 493a96b308
commit 8a4d28ef8b
4 changed files with 44 additions and 39 deletions

View File

@ -1086,8 +1086,8 @@ TEST_F(DeviceTest, givenCallToDevicePropertiesThenTimestampValidBitsAreCorrectly
ze_device_properties_t deviceProps = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
device->getProperties(&deviceProps);
EXPECT_EQ(36u, deviceProps.timestampValidBits);
EXPECT_EQ(32u, deviceProps.kernelTimestampValidBits);
EXPECT_EQ(device->getHwInfo().capabilityTable.timestampValidBits, deviceProps.timestampValidBits);
EXPECT_EQ(device->getHwInfo().capabilityTable.kernelTimestampValidBits, deviceProps.kernelTimestampValidBits);
}
TEST_F(DeviceTest, givenNullDriverInfowhenPciPropertiesIsCalledThenUninitializedErrorIsReturned) {

View File

@ -101,7 +101,7 @@ Event::~Event() {
}
DBG_LOG(EventsDebugEnable, "~Event()", this);
//no commands should be registred
// no commands should be registred
DEBUG_BREAK_IF(this->cmdToSubmit.load());
submitCommand(true);
@ -154,7 +154,7 @@ cl_int Event::getEventProfilingInfo(cl_profiling_info paramName,
// CL_PROFILING_INFO_NOT_AVAILABLE if event refers to the clEnqueueSVMFree command
if (isUserEvent() != CL_FALSE || // or is a user event object.
!updateStatusAndCheckCompletion() || //if the execution status of the command identified by event is not CL_COMPLETE
!updateStatusAndCheckCompletion() || // if the execution status of the command identified by event is not CL_COMPLETE
!profilingEnabled) // the CL_QUEUE_PROFILING_ENABLE flag is not set for the command-queue,
{
return CL_PROFILING_INFO_NOT_AVAILABLE;
@ -245,7 +245,10 @@ void Event::updateCompletionStamp(uint32_t gpgpuTaskCount, uint32_t bcsTaskCount
cl_ulong Event::getDelta(cl_ulong startTime,
cl_ulong endTime) {
cl_ulong Max = maxNBitValue(OCLRT_NUM_TIMESTAMP_BITS);
auto &hwInfo = cmdQueue->getDevice().getHardwareInfo();
cl_ulong Max = maxNBitValue(hwInfo.capabilityTable.kernelTimestampValidBits);
cl_ulong Delta = 0;
startTime &= Max;
@ -372,7 +375,7 @@ void Event::calculateProfilingDataInternal(uint64_t contextStartTS, uint64_t con
const = CpuTimeQueue - GpuTimeQueue * scalar
*/
//If device enqueue has not updated complete timestamp, assign end timestamp
// If device enqueue has not updated complete timestamp, assign end timestamp
gpuDuration = getDelta(contextStartTS, contextEndTS);
if (*contextCompleteTS == 0) {
*contextCompleteTS = contextEndTS;
@ -495,7 +498,7 @@ void Event::unblockEventsBlockedByThis(int32_t transitionStatus) {
uint32_t taskLevelToPropagate = CompletionStamp::notReady;
if (isStatusCompletedByTermination(transitionStatus) == false) {
//if we are event on top of the tree , obtain taskLevel from CSR
// if we are event on top of the tree , obtain taskLevel from CSR
if (taskLevel == CompletionStamp::notReady) {
this->taskLevel = getTaskLevel(); // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall)
taskLevelToPropagate = this->taskLevel;
@ -598,7 +601,7 @@ void Event::submitCommand(bool abortTasks) {
updateTaskCount(this->cmdQueue->getGpgpuCommandStreamReceiver().peekTaskCount(), peekBcsTaskCountFromCommandQueue());
}
}
//make sure that task count is synchronized for events with kernels
// make sure that task count is synchronized for events with kernels
if (!this->eventWithoutCommand && !abortTasks) {
this->synchronizeTaskCount();
}
@ -611,7 +614,7 @@ cl_int Event::waitForEvents(cl_uint numEvents,
return CL_SUCCESS;
}
//flush all command queues
// flush all command queues
for (const cl_event *it = eventList, *end = eventList + numEvents; it != end; ++it) {
Event *event = castToObjectOrAbort<Event>(*it);
if (event->cmdQueue) {
@ -677,7 +680,7 @@ inline void Event::unblockEventBy(Event &event, uint32_t taskLevel, int32_t tran
}
setStatus(statusToPropagate);
//event may be completed after this operation, transtition the state to not block others.
// event may be completed after this operation, transtition the state to not block others.
this->updateExecutionStatus();
}
@ -753,9 +756,9 @@ void Event::executeCallbacks(int32_t executionStatusIn) {
}
void Event::tryFlushEvent() {
//only if event is not completed, completed event has already been flushed
// only if event is not completed, completed event has already been flushed
if (cmdQueue && updateStatusAndCheckCompletion() == false) {
//flush the command queue only if it is not blocked event
// flush the command queue only if it is not blocked event
if (taskLevel != CompletionStamp::notReady) {
cmdQueue->getGpgpuCommandStreamReceiver().flushBatchedSubmissions();
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -40,7 +40,7 @@ cl_int ProfilingInfo[] = {
CL_PROFILING_COMMAND_COMPLETE};
TEST_F(clEventProfilingTests, GivenInvalidParamNameWhenGettingEventProfilingInfoThenInvalidValueErrorIsReturned) {
Event *pEvent = new Event(nullptr, 0, 0, 0);
Event *pEvent = new Event(pCommandQueue, 0, 0, 0);
pEvent->setStatus(CL_COMPLETE);
size_t param_value_size = sizeof(cl_ulong);
cl_ulong param_value;
@ -60,7 +60,7 @@ TEST_F(clEventProfilingTests, GivenInvalidParamNameWhenGettingEventProfilingInfo
}
TEST_F(clEventProfilingTests, GivenInvalidParametersWhenGettingEventProfilingInfoThenValueSizeRetIsNotUpdated) {
Event event{nullptr, 0, 0, 0};
Event event{pCommandQueue, 0, 0, 0};
event.setStatus(CL_COMPLETE);
size_t paramValueSize = sizeof(cl_ulong);
cl_ulong paramValue;
@ -78,7 +78,7 @@ TEST_F(clEventProfilingTests, GivenInvalidParametersWhenGettingEventProfilingInf
}
TEST_F(clEventProfilingTests, GivenInvalidParamValueSizeWhenGettingEventProfilingInfoThenInvalidValueErrorIsReturned) {
Event *pEvent = new Event(nullptr, 0, 0, 0);
Event *pEvent = new Event(pCommandQueue, 0, 0, 0);
pEvent->setStatus(CL_COMPLETE);
size_t param_value_size = sizeof(cl_ulong);
cl_ulong param_value;
@ -98,7 +98,7 @@ TEST_F(clEventProfilingTests, GivenInvalidParamValueSizeWhenGettingEventProfilin
}
TEST_F(clEventProfilingTests, GivenValidParametersWhenGettingEventProfilingInfoThenSuccessIsReturned) {
Event *pEvent = new Event(nullptr, 0, 0, 0);
Event *pEvent = new Event(pCommandQueue, 0, 0, 0);
pEvent->setStatus(CL_COMPLETE);
size_t param_value_size = sizeof(cl_ulong);
cl_ulong param_value;
@ -119,7 +119,7 @@ TEST_F(clEventProfilingTests, GivenValidParametersWhenGettingEventProfilingInfoT
}
TEST_F(clEventProfilingTests, GivenNullParamValueSizeRetWhenGettingEventProfilingInfoThenSuccessIsReturned) {
Event *pEvent = new Event(nullptr, 0, 0, 0);
Event *pEvent = new Event(pCommandQueue, 0, 0, 0);
pEvent->setStatus(CL_COMPLETE);
size_t param_value_size = sizeof(cl_ulong);
cl_ulong param_value;
@ -143,8 +143,8 @@ TEST_F(clEventProfilingTests, GivenNullEventWhenGettingEventProfilingInfoThenInv
EXPECT_EQ(CL_INVALID_EVENT, retVal);
}
TEST(clGetEventProfilingInfo, GivenNullParamValueAndZeroParamValueSizeWhenGettingEventProfilingInfoThenSuccessIsReturned) {
Event *pEvent = new Event(nullptr, 0, 0, 0);
TEST_F(clEventProfilingTests, GivenNullParamValueAndZeroParamValueSizeWhenGettingEventProfilingInfoThenSuccessIsReturned) {
Event *pEvent = new Event(pCommandQueue, 0, 0, 0);
size_t param_value_size = 0;
pEvent->setStatus(CL_COMPLETE);
@ -162,8 +162,8 @@ TEST(clGetEventProfilingInfo, GivenNullParamValueAndZeroParamValueSizeWhenGettin
delete pEvent;
}
TEST(clGetEventProfilingInfo, GivenNullParamValueAndCorrectParamValueSizeWhenGettingEventProfilingInfoThenSuccessIsReturned) {
Event *pEvent = new Event(nullptr, 0, 0, 0);
TEST_F(clEventProfilingTests, GivenNullParamValueAndCorrectParamValueSizeWhenGettingEventProfilingInfoThenSuccessIsReturned) {
Event *pEvent = new Event(pCommandQueue, 0, 0, 0);
size_t param_value_size = sizeof(cl_ulong);
pEvent->setStatus(CL_COMPLETE);
@ -181,7 +181,7 @@ TEST(clGetEventProfilingInfo, GivenNullParamValueAndCorrectParamValueSizeWhenGet
delete pEvent;
}
TEST(clGetEventProfilingInfo, GivenUserEventWhenGettingEventProfilingInfoThenProfilingInfoNotAvailableErrorIsReturned) {
TEST_F(clEventProfilingTests, GivenUserEventWhenGettingEventProfilingInfoThenProfilingInfoNotAvailableErrorIsReturned) {
UserEvent *ue = new UserEvent();
size_t param_value_size = sizeof(cl_ulong);
cl_ulong param_value;
@ -199,8 +199,8 @@ TEST(clGetEventProfilingInfo, GivenUserEventWhenGettingEventProfilingInfoThenPro
delete ue;
}
TEST(clGetEventProfilingInfo, GivenStartAndEndTimeWhenGettingDeltaThenCorrectDeltaIsReturned) {
Event *pEvent = new Event(nullptr, 0, 0, 0);
TEST_F(clEventProfilingTests, GivenStartAndEndTimeWhenGettingDeltaThenCorrectDeltaIsReturned) {
Event *pEvent = new Event(pCommandQueue, 0, 0, 0);
cl_ulong startTime = 1;
cl_ulong endTime = 2;
cl_ulong delta = 0;
@ -211,21 +211,21 @@ TEST(clGetEventProfilingInfo, GivenStartAndEndTimeWhenGettingDeltaThenCorrectDel
delete pEvent;
}
TEST(clGetEventProfilingInfo, GivenStartTimeGreaterThenEndTimeWhenGettingDeltaThenCorrectDeltaIsReturned) {
Event *pEvent = new Event(nullptr, 0, 0, 0);
TEST_F(clEventProfilingTests, GivenStartTimeGreaterThenEndTimeWhenGettingDeltaThenCorrectDeltaIsReturned) {
Event *pEvent = new Event(pCommandQueue, 0, 0, 0);
cl_ulong startTime = 2;
cl_ulong endTime = 1;
cl_ulong delta = 0;
cl_ulong timeMax = 0xffffffffULL;
cl_ulong timeMax = maxNBitValue(pDevice->getHardwareInfo().capabilityTable.kernelTimestampValidBits);
delta = pEvent->getDelta(startTime, endTime);
EXPECT_EQ((timeMax + (endTime - startTime)), delta);
delete pEvent;
}
TEST(clGetEventProfilingInfo, givenTimestampThatOverlapWhenGetDeltaIsCalledThenProperDeltaIsComputed) {
Event *pEvent = new Event(nullptr, 0, 0, 0);
cl_ulong TimeMax = 0xffffffffULL;
TEST_F(clEventProfilingTests, givenTimestampThatOverlapWhenGetDeltaIsCalledThenProperDeltaIsComputed) {
Event *pEvent = new Event(pCommandQueue, 0, 0, 0);
cl_ulong TimeMax = maxNBitValue(pDevice->getHardwareInfo().capabilityTable.kernelTimestampValidBits);
cl_ulong realDelta = 10;
cl_ulong startTime = TimeMax - realDelta;
@ -236,14 +236,14 @@ TEST(clGetEventProfilingInfo, givenTimestampThatOverlapWhenGetDeltaIsCalledThenP
delete pEvent;
}
TEST(clGetEventProfilingInfo, GivenProfilingDisabledWhenCalculatingProfilingDataThenFalseIsReturned) {
TEST_F(clEventProfilingTests, GivenProfilingDisabledWhenCalculatingProfilingDataThenFalseIsReturned) {
auto *pEvent = new MockEvent<Event>(nullptr, 0, 0, 0);
EXPECT_FALSE(pEvent->calcProfilingData());
delete pEvent;
}
TEST(clGetEventProfilingInfo, GivenProfilingEnabledWhenCalculatingProfilingDataThenFalseIsNotReturned) {
Event *pEvent = new Event(nullptr, 0, 0, 0);
TEST_F(clEventProfilingTests, GivenProfilingEnabledWhenCalculatingProfilingDataThenFalseIsNotReturned) {
Event *pEvent = new Event(pCommandQueue, 0, 0, 0);
cl_bool Result = pEvent->isProfilingEnabled();
EXPECT_EQ(((cl_bool)CL_FALSE), Result);
pEvent->setProfilingEnabled(true);
@ -252,15 +252,15 @@ TEST(clGetEventProfilingInfo, GivenProfilingEnabledWhenCalculatingProfilingDataT
delete pEvent;
}
TEST(clGetEventProfilingInfo, GivenProfilingEnabledAndUserEventsWhenCalculatingProfilingDataThenFalseIsReturned) {
TEST_F(clEventProfilingTests, GivenProfilingEnabledAndUserEventsWhenCalculatingProfilingDataThenFalseIsReturned) {
Event *pEvent = new UserEvent();
cl_bool Result = pEvent->isProfilingEnabled();
EXPECT_EQ(((cl_bool)CL_FALSE), Result);
delete pEvent;
}
TEST(clGetEventProfilingInfo, GivenPerfCountersEnabledWhenCheckingPerfCountersThenTrueIsReturned) {
Event *pEvent = new Event(nullptr, 0, 0, 0);
TEST_F(clEventProfilingTests, GivenPerfCountersEnabledWhenCheckingPerfCountersThenTrueIsReturned) {
Event *pEvent = new Event(pCommandQueue, 0, 0, 0);
bool Result = pEvent->isPerfCountersEnabled();
EXPECT_FALSE(Result);
pEvent->setPerfCountersEnabled(true);

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2021 Intel Corporation
* Copyright (C) 2020-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -19,11 +19,13 @@ constexpr bool is64bit = (sizeof(void *) == 8);
constexpr NEO::DeviceBitfield systemMemoryBitfield(0b0);
constexpr uint64_t maxNBitValue(uint64_t n) {
return ((1ULL << n) - 1);
return ((n == 64) ? std::numeric_limits<uint64_t>::max()
: ((1ULL << n) - 1));
}
static_assert(maxNBitValue(8) == std::numeric_limits<uint8_t>::max(), "");
static_assert(maxNBitValue(16) == std::numeric_limits<uint16_t>::max(), "");
static_assert(maxNBitValue(32) == std::numeric_limits<uint32_t>::max(), "");
static_assert(maxNBitValue(64) == std::numeric_limits<uint64_t>::max(), "");
namespace MemoryConstants {
constexpr uint64_t zoneHigh = ~(uint64_t)0xFFFFFFFF;