mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Fix profiling calculation logic
Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
493a96b308
commit
8a4d28ef8b
@ -1086,8 +1086,8 @@ TEST_F(DeviceTest, givenCallToDevicePropertiesThenTimestampValidBitsAreCorrectly
|
||||
ze_device_properties_t deviceProps = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
|
||||
|
||||
device->getProperties(&deviceProps);
|
||||
EXPECT_EQ(36u, deviceProps.timestampValidBits);
|
||||
EXPECT_EQ(32u, deviceProps.kernelTimestampValidBits);
|
||||
EXPECT_EQ(device->getHwInfo().capabilityTable.timestampValidBits, deviceProps.timestampValidBits);
|
||||
EXPECT_EQ(device->getHwInfo().capabilityTable.kernelTimestampValidBits, deviceProps.kernelTimestampValidBits);
|
||||
}
|
||||
|
||||
TEST_F(DeviceTest, givenNullDriverInfowhenPciPropertiesIsCalledThenUninitializedErrorIsReturned) {
|
||||
|
@ -101,7 +101,7 @@ Event::~Event() {
|
||||
}
|
||||
|
||||
DBG_LOG(EventsDebugEnable, "~Event()", this);
|
||||
//no commands should be registred
|
||||
// no commands should be registred
|
||||
DEBUG_BREAK_IF(this->cmdToSubmit.load());
|
||||
|
||||
submitCommand(true);
|
||||
@ -154,7 +154,7 @@ cl_int Event::getEventProfilingInfo(cl_profiling_info paramName,
|
||||
|
||||
// CL_PROFILING_INFO_NOT_AVAILABLE if event refers to the clEnqueueSVMFree command
|
||||
if (isUserEvent() != CL_FALSE || // or is a user event object.
|
||||
!updateStatusAndCheckCompletion() || //if the execution status of the command identified by event is not CL_COMPLETE
|
||||
!updateStatusAndCheckCompletion() || // if the execution status of the command identified by event is not CL_COMPLETE
|
||||
!profilingEnabled) // the CL_QUEUE_PROFILING_ENABLE flag is not set for the command-queue,
|
||||
{
|
||||
return CL_PROFILING_INFO_NOT_AVAILABLE;
|
||||
@ -245,7 +245,10 @@ void Event::updateCompletionStamp(uint32_t gpgpuTaskCount, uint32_t bcsTaskCount
|
||||
|
||||
cl_ulong Event::getDelta(cl_ulong startTime,
|
||||
cl_ulong endTime) {
|
||||
cl_ulong Max = maxNBitValue(OCLRT_NUM_TIMESTAMP_BITS);
|
||||
|
||||
auto &hwInfo = cmdQueue->getDevice().getHardwareInfo();
|
||||
|
||||
cl_ulong Max = maxNBitValue(hwInfo.capabilityTable.kernelTimestampValidBits);
|
||||
cl_ulong Delta = 0;
|
||||
|
||||
startTime &= Max;
|
||||
@ -372,7 +375,7 @@ void Event::calculateProfilingDataInternal(uint64_t contextStartTS, uint64_t con
|
||||
const = CpuTimeQueue - GpuTimeQueue * scalar
|
||||
*/
|
||||
|
||||
//If device enqueue has not updated complete timestamp, assign end timestamp
|
||||
// If device enqueue has not updated complete timestamp, assign end timestamp
|
||||
gpuDuration = getDelta(contextStartTS, contextEndTS);
|
||||
if (*contextCompleteTS == 0) {
|
||||
*contextCompleteTS = contextEndTS;
|
||||
@ -495,7 +498,7 @@ void Event::unblockEventsBlockedByThis(int32_t transitionStatus) {
|
||||
uint32_t taskLevelToPropagate = CompletionStamp::notReady;
|
||||
|
||||
if (isStatusCompletedByTermination(transitionStatus) == false) {
|
||||
//if we are event on top of the tree , obtain taskLevel from CSR
|
||||
// if we are event on top of the tree , obtain taskLevel from CSR
|
||||
if (taskLevel == CompletionStamp::notReady) {
|
||||
this->taskLevel = getTaskLevel(); // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall)
|
||||
taskLevelToPropagate = this->taskLevel;
|
||||
@ -598,7 +601,7 @@ void Event::submitCommand(bool abortTasks) {
|
||||
updateTaskCount(this->cmdQueue->getGpgpuCommandStreamReceiver().peekTaskCount(), peekBcsTaskCountFromCommandQueue());
|
||||
}
|
||||
}
|
||||
//make sure that task count is synchronized for events with kernels
|
||||
// make sure that task count is synchronized for events with kernels
|
||||
if (!this->eventWithoutCommand && !abortTasks) {
|
||||
this->synchronizeTaskCount();
|
||||
}
|
||||
@ -611,7 +614,7 @@ cl_int Event::waitForEvents(cl_uint numEvents,
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
//flush all command queues
|
||||
// flush all command queues
|
||||
for (const cl_event *it = eventList, *end = eventList + numEvents; it != end; ++it) {
|
||||
Event *event = castToObjectOrAbort<Event>(*it);
|
||||
if (event->cmdQueue) {
|
||||
@ -677,7 +680,7 @@ inline void Event::unblockEventBy(Event &event, uint32_t taskLevel, int32_t tran
|
||||
}
|
||||
setStatus(statusToPropagate);
|
||||
|
||||
//event may be completed after this operation, transtition the state to not block others.
|
||||
// event may be completed after this operation, transtition the state to not block others.
|
||||
this->updateExecutionStatus();
|
||||
}
|
||||
|
||||
@ -753,9 +756,9 @@ void Event::executeCallbacks(int32_t executionStatusIn) {
|
||||
}
|
||||
|
||||
void Event::tryFlushEvent() {
|
||||
//only if event is not completed, completed event has already been flushed
|
||||
// only if event is not completed, completed event has already been flushed
|
||||
if (cmdQueue && updateStatusAndCheckCompletion() == false) {
|
||||
//flush the command queue only if it is not blocked event
|
||||
// flush the command queue only if it is not blocked event
|
||||
if (taskLevel != CompletionStamp::notReady) {
|
||||
cmdQueue->getGpgpuCommandStreamReceiver().flushBatchedSubmissions();
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -40,7 +40,7 @@ cl_int ProfilingInfo[] = {
|
||||
CL_PROFILING_COMMAND_COMPLETE};
|
||||
|
||||
TEST_F(clEventProfilingTests, GivenInvalidParamNameWhenGettingEventProfilingInfoThenInvalidValueErrorIsReturned) {
|
||||
Event *pEvent = new Event(nullptr, 0, 0, 0);
|
||||
Event *pEvent = new Event(pCommandQueue, 0, 0, 0);
|
||||
pEvent->setStatus(CL_COMPLETE);
|
||||
size_t param_value_size = sizeof(cl_ulong);
|
||||
cl_ulong param_value;
|
||||
@ -60,7 +60,7 @@ TEST_F(clEventProfilingTests, GivenInvalidParamNameWhenGettingEventProfilingInfo
|
||||
}
|
||||
|
||||
TEST_F(clEventProfilingTests, GivenInvalidParametersWhenGettingEventProfilingInfoThenValueSizeRetIsNotUpdated) {
|
||||
Event event{nullptr, 0, 0, 0};
|
||||
Event event{pCommandQueue, 0, 0, 0};
|
||||
event.setStatus(CL_COMPLETE);
|
||||
size_t paramValueSize = sizeof(cl_ulong);
|
||||
cl_ulong paramValue;
|
||||
@ -78,7 +78,7 @@ TEST_F(clEventProfilingTests, GivenInvalidParametersWhenGettingEventProfilingInf
|
||||
}
|
||||
|
||||
TEST_F(clEventProfilingTests, GivenInvalidParamValueSizeWhenGettingEventProfilingInfoThenInvalidValueErrorIsReturned) {
|
||||
Event *pEvent = new Event(nullptr, 0, 0, 0);
|
||||
Event *pEvent = new Event(pCommandQueue, 0, 0, 0);
|
||||
pEvent->setStatus(CL_COMPLETE);
|
||||
size_t param_value_size = sizeof(cl_ulong);
|
||||
cl_ulong param_value;
|
||||
@ -98,7 +98,7 @@ TEST_F(clEventProfilingTests, GivenInvalidParamValueSizeWhenGettingEventProfilin
|
||||
}
|
||||
|
||||
TEST_F(clEventProfilingTests, GivenValidParametersWhenGettingEventProfilingInfoThenSuccessIsReturned) {
|
||||
Event *pEvent = new Event(nullptr, 0, 0, 0);
|
||||
Event *pEvent = new Event(pCommandQueue, 0, 0, 0);
|
||||
pEvent->setStatus(CL_COMPLETE);
|
||||
size_t param_value_size = sizeof(cl_ulong);
|
||||
cl_ulong param_value;
|
||||
@ -119,7 +119,7 @@ TEST_F(clEventProfilingTests, GivenValidParametersWhenGettingEventProfilingInfoT
|
||||
}
|
||||
|
||||
TEST_F(clEventProfilingTests, GivenNullParamValueSizeRetWhenGettingEventProfilingInfoThenSuccessIsReturned) {
|
||||
Event *pEvent = new Event(nullptr, 0, 0, 0);
|
||||
Event *pEvent = new Event(pCommandQueue, 0, 0, 0);
|
||||
pEvent->setStatus(CL_COMPLETE);
|
||||
size_t param_value_size = sizeof(cl_ulong);
|
||||
cl_ulong param_value;
|
||||
@ -143,8 +143,8 @@ TEST_F(clEventProfilingTests, GivenNullEventWhenGettingEventProfilingInfoThenInv
|
||||
EXPECT_EQ(CL_INVALID_EVENT, retVal);
|
||||
}
|
||||
|
||||
TEST(clGetEventProfilingInfo, GivenNullParamValueAndZeroParamValueSizeWhenGettingEventProfilingInfoThenSuccessIsReturned) {
|
||||
Event *pEvent = new Event(nullptr, 0, 0, 0);
|
||||
TEST_F(clEventProfilingTests, GivenNullParamValueAndZeroParamValueSizeWhenGettingEventProfilingInfoThenSuccessIsReturned) {
|
||||
Event *pEvent = new Event(pCommandQueue, 0, 0, 0);
|
||||
size_t param_value_size = 0;
|
||||
|
||||
pEvent->setStatus(CL_COMPLETE);
|
||||
@ -162,8 +162,8 @@ TEST(clGetEventProfilingInfo, GivenNullParamValueAndZeroParamValueSizeWhenGettin
|
||||
delete pEvent;
|
||||
}
|
||||
|
||||
TEST(clGetEventProfilingInfo, GivenNullParamValueAndCorrectParamValueSizeWhenGettingEventProfilingInfoThenSuccessIsReturned) {
|
||||
Event *pEvent = new Event(nullptr, 0, 0, 0);
|
||||
TEST_F(clEventProfilingTests, GivenNullParamValueAndCorrectParamValueSizeWhenGettingEventProfilingInfoThenSuccessIsReturned) {
|
||||
Event *pEvent = new Event(pCommandQueue, 0, 0, 0);
|
||||
size_t param_value_size = sizeof(cl_ulong);
|
||||
|
||||
pEvent->setStatus(CL_COMPLETE);
|
||||
@ -181,7 +181,7 @@ TEST(clGetEventProfilingInfo, GivenNullParamValueAndCorrectParamValueSizeWhenGet
|
||||
delete pEvent;
|
||||
}
|
||||
|
||||
TEST(clGetEventProfilingInfo, GivenUserEventWhenGettingEventProfilingInfoThenProfilingInfoNotAvailableErrorIsReturned) {
|
||||
TEST_F(clEventProfilingTests, GivenUserEventWhenGettingEventProfilingInfoThenProfilingInfoNotAvailableErrorIsReturned) {
|
||||
UserEvent *ue = new UserEvent();
|
||||
size_t param_value_size = sizeof(cl_ulong);
|
||||
cl_ulong param_value;
|
||||
@ -199,8 +199,8 @@ TEST(clGetEventProfilingInfo, GivenUserEventWhenGettingEventProfilingInfoThenPro
|
||||
delete ue;
|
||||
}
|
||||
|
||||
TEST(clGetEventProfilingInfo, GivenStartAndEndTimeWhenGettingDeltaThenCorrectDeltaIsReturned) {
|
||||
Event *pEvent = new Event(nullptr, 0, 0, 0);
|
||||
TEST_F(clEventProfilingTests, GivenStartAndEndTimeWhenGettingDeltaThenCorrectDeltaIsReturned) {
|
||||
Event *pEvent = new Event(pCommandQueue, 0, 0, 0);
|
||||
cl_ulong startTime = 1;
|
||||
cl_ulong endTime = 2;
|
||||
cl_ulong delta = 0;
|
||||
@ -211,21 +211,21 @@ TEST(clGetEventProfilingInfo, GivenStartAndEndTimeWhenGettingDeltaThenCorrectDel
|
||||
delete pEvent;
|
||||
}
|
||||
|
||||
TEST(clGetEventProfilingInfo, GivenStartTimeGreaterThenEndTimeWhenGettingDeltaThenCorrectDeltaIsReturned) {
|
||||
Event *pEvent = new Event(nullptr, 0, 0, 0);
|
||||
TEST_F(clEventProfilingTests, GivenStartTimeGreaterThenEndTimeWhenGettingDeltaThenCorrectDeltaIsReturned) {
|
||||
Event *pEvent = new Event(pCommandQueue, 0, 0, 0);
|
||||
cl_ulong startTime = 2;
|
||||
cl_ulong endTime = 1;
|
||||
cl_ulong delta = 0;
|
||||
cl_ulong timeMax = 0xffffffffULL;
|
||||
cl_ulong timeMax = maxNBitValue(pDevice->getHardwareInfo().capabilityTable.kernelTimestampValidBits);
|
||||
|
||||
delta = pEvent->getDelta(startTime, endTime);
|
||||
EXPECT_EQ((timeMax + (endTime - startTime)), delta);
|
||||
delete pEvent;
|
||||
}
|
||||
|
||||
TEST(clGetEventProfilingInfo, givenTimestampThatOverlapWhenGetDeltaIsCalledThenProperDeltaIsComputed) {
|
||||
Event *pEvent = new Event(nullptr, 0, 0, 0);
|
||||
cl_ulong TimeMax = 0xffffffffULL;
|
||||
TEST_F(clEventProfilingTests, givenTimestampThatOverlapWhenGetDeltaIsCalledThenProperDeltaIsComputed) {
|
||||
Event *pEvent = new Event(pCommandQueue, 0, 0, 0);
|
||||
cl_ulong TimeMax = maxNBitValue(pDevice->getHardwareInfo().capabilityTable.kernelTimestampValidBits);
|
||||
cl_ulong realDelta = 10;
|
||||
|
||||
cl_ulong startTime = TimeMax - realDelta;
|
||||
@ -236,14 +236,14 @@ TEST(clGetEventProfilingInfo, givenTimestampThatOverlapWhenGetDeltaIsCalledThenP
|
||||
delete pEvent;
|
||||
}
|
||||
|
||||
TEST(clGetEventProfilingInfo, GivenProfilingDisabledWhenCalculatingProfilingDataThenFalseIsReturned) {
|
||||
TEST_F(clEventProfilingTests, GivenProfilingDisabledWhenCalculatingProfilingDataThenFalseIsReturned) {
|
||||
auto *pEvent = new MockEvent<Event>(nullptr, 0, 0, 0);
|
||||
EXPECT_FALSE(pEvent->calcProfilingData());
|
||||
delete pEvent;
|
||||
}
|
||||
|
||||
TEST(clGetEventProfilingInfo, GivenProfilingEnabledWhenCalculatingProfilingDataThenFalseIsNotReturned) {
|
||||
Event *pEvent = new Event(nullptr, 0, 0, 0);
|
||||
TEST_F(clEventProfilingTests, GivenProfilingEnabledWhenCalculatingProfilingDataThenFalseIsNotReturned) {
|
||||
Event *pEvent = new Event(pCommandQueue, 0, 0, 0);
|
||||
cl_bool Result = pEvent->isProfilingEnabled();
|
||||
EXPECT_EQ(((cl_bool)CL_FALSE), Result);
|
||||
pEvent->setProfilingEnabled(true);
|
||||
@ -252,15 +252,15 @@ TEST(clGetEventProfilingInfo, GivenProfilingEnabledWhenCalculatingProfilingDataT
|
||||
delete pEvent;
|
||||
}
|
||||
|
||||
TEST(clGetEventProfilingInfo, GivenProfilingEnabledAndUserEventsWhenCalculatingProfilingDataThenFalseIsReturned) {
|
||||
TEST_F(clEventProfilingTests, GivenProfilingEnabledAndUserEventsWhenCalculatingProfilingDataThenFalseIsReturned) {
|
||||
Event *pEvent = new UserEvent();
|
||||
cl_bool Result = pEvent->isProfilingEnabled();
|
||||
EXPECT_EQ(((cl_bool)CL_FALSE), Result);
|
||||
delete pEvent;
|
||||
}
|
||||
|
||||
TEST(clGetEventProfilingInfo, GivenPerfCountersEnabledWhenCheckingPerfCountersThenTrueIsReturned) {
|
||||
Event *pEvent = new Event(nullptr, 0, 0, 0);
|
||||
TEST_F(clEventProfilingTests, GivenPerfCountersEnabledWhenCheckingPerfCountersThenTrueIsReturned) {
|
||||
Event *pEvent = new Event(pCommandQueue, 0, 0, 0);
|
||||
bool Result = pEvent->isPerfCountersEnabled();
|
||||
EXPECT_FALSE(Result);
|
||||
pEvent->setPerfCountersEnabled(true);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2021 Intel Corporation
|
||||
* Copyright (C) 2020-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -19,11 +19,13 @@ constexpr bool is64bit = (sizeof(void *) == 8);
|
||||
constexpr NEO::DeviceBitfield systemMemoryBitfield(0b0);
|
||||
|
||||
constexpr uint64_t maxNBitValue(uint64_t n) {
|
||||
return ((1ULL << n) - 1);
|
||||
return ((n == 64) ? std::numeric_limits<uint64_t>::max()
|
||||
: ((1ULL << n) - 1));
|
||||
}
|
||||
static_assert(maxNBitValue(8) == std::numeric_limits<uint8_t>::max(), "");
|
||||
static_assert(maxNBitValue(16) == std::numeric_limits<uint16_t>::max(), "");
|
||||
static_assert(maxNBitValue(32) == std::numeric_limits<uint32_t>::max(), "");
|
||||
static_assert(maxNBitValue(64) == std::numeric_limits<uint64_t>::max(), "");
|
||||
|
||||
namespace MemoryConstants {
|
||||
constexpr uint64_t zoneHigh = ~(uint64_t)0xFFFFFFFF;
|
||||
|
Reference in New Issue
Block a user