Revert "Fix event profiling for marker commands"

This reverts commit 5f0167b477993f3ebc804c53a110bc432638615b.

Signed-off-by: Michal Mrozek <michal.mrozek@intel.com>
This commit is contained in:
Michal Mrozek
2021-07-01 07:21:06 +00:00
committed by Compute-Runtime-Automation
parent c04d545eb2
commit ad3855ceb3
10 changed files with 40 additions and 136 deletions

View File

@ -774,12 +774,12 @@ bool CommandQueue::blitEnqueueImageAllowed(const size_t *origin, const size_t *r
return blitEnqueuImageAllowed; return blitEnqueuImageAllowed;
} }
bool CommandQueue::isBlockedCommandStreamRequired(uint32_t commandType, const EventsRequest &eventsRequest, bool blockedQueue, bool isMarkerWithProfiling) const { bool CommandQueue::isBlockedCommandStreamRequired(uint32_t commandType, const EventsRequest &eventsRequest, bool blockedQueue) const {
if (!blockedQueue) { if (!blockedQueue) {
return false; return false;
} }
if (isCacheFlushCommand(commandType) || !isCommandWithoutKernel(commandType) || isMarkerWithProfiling) { if (isCacheFlushCommand(commandType) || !isCommandWithoutKernel(commandType)) {
return true; return true;
} }

View File

@ -343,7 +343,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
cl_int enqueueUnmapMemObject(TransferProperties &transferProperties, EventsRequest &eventsRequest); cl_int enqueueUnmapMemObject(TransferProperties &transferProperties, EventsRequest &eventsRequest);
virtual void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType){}; virtual void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType){};
bool isBlockedCommandStreamRequired(uint32_t commandType, const EventsRequest &eventsRequest, bool blockedQueue, bool isMarkerWithProfiling) const; bool isBlockedCommandStreamRequired(uint32_t commandType, const EventsRequest &eventsRequest, bool blockedQueue) const;
MOCKABLE_VIRTUAL void obtainNewTimestampPacketNodes(size_t numberOfNodes, TimestampPacketContainer &previousNodes, bool clearAllDependencies, bool blitEnqueue); MOCKABLE_VIRTUAL void obtainNewTimestampPacketNodes(size_t numberOfNodes, TimestampPacketContainer &previousNodes, bool clearAllDependencies, bool blitEnqueue);
void storeProperties(const cl_queue_properties *properties); void storeProperties(const cl_queue_properties *properties);

View File

@ -459,7 +459,7 @@ class CommandQueueHw : public CommandQueue {
bool profilingRequired = (this->isProfilingEnabled() && eventsRequest.outEvent); bool profilingRequired = (this->isProfilingEnabled() && eventsRequest.outEvent);
bool perfCountersRequired = (this->isPerfCountersEnabled() && eventsRequest.outEvent); bool perfCountersRequired = (this->isPerfCountersEnabled() && eventsRequest.outEvent);
if (isBlockedCommandStreamRequired(commandType, eventsRequest, blockedQueue, isMarkerWithProfiling)) { if (isBlockedCommandStreamRequired(commandType, eventsRequest, blockedQueue)) {
constexpr size_t additionalAllocationSize = CSRequirements::csOverfetchSize; constexpr size_t additionalAllocationSize = CSRequirements::csOverfetchSize;
constexpr size_t allocationSize = MemoryConstants::pageSize64k - CSRequirements::csOverfetchSize; constexpr size_t allocationSize = MemoryConstants::pageSize64k - CSRequirements::csOverfetchSize;
commandStream = new LinearStream(); commandStream = new LinearStream();

View File

@ -796,7 +796,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
TimeStampData submitTimeStamp = {}; TimeStampData submitTimeStamp = {};
if (isProfilingEnabled() && eventBuilder.getEvent()) { if (isProfilingEnabled() && eventBuilder.getEvent()) {
this->getDevice().getOSTime()->getCpuGpuTime(&submitTimeStamp); this->getDevice().getOSTime()->getCpuTime(&submitTimeStamp.CPUTimeinNS);
eventBuilder.getEvent()->setSubmitTimeStamp(&submitTimeStamp); eventBuilder.getEvent()->setSubmitTimeStamp(&submitTimeStamp);
auto hwTimestampNode = eventBuilder.getEvent()->getHwTimeStampNode(); auto hwTimestampNode = eventBuilder.getEvent()->getHwTimeStampNode();

View File

@ -160,19 +160,21 @@ cl_int Event::getEventProfilingInfo(cl_profiling_info paramName,
return CL_PROFILING_INFO_NOT_AVAILABLE; return CL_PROFILING_INFO_NOT_AVAILABLE;
} }
uint64_t timestamp;
// if paramValue is NULL, it is ignored // if paramValue is NULL, it is ignored
switch (paramName) { switch (paramName) {
case CL_PROFILING_COMMAND_QUEUED: case CL_PROFILING_COMMAND_QUEUED:
timestamp = getTimeInNSFromTimestampData(queueTimeStamp); src = &queueTimeStamp.CPUTimeinNS;
src = &timestamp; if (DebugManager.flags.ReturnRawGpuTimestamps.get()) {
src = &queueTimeStamp.GPUTimeStamp;
}
srcSize = sizeof(cl_ulong); srcSize = sizeof(cl_ulong);
break; break;
case CL_PROFILING_COMMAND_SUBMIT: case CL_PROFILING_COMMAND_SUBMIT:
timestamp = getTimeInNSFromTimestampData(submitTimeStamp); src = &submitTimeStamp.CPUTimeinNS;
src = &timestamp; if (DebugManager.flags.ReturnRawGpuTimestamps.get()) {
src = &submitTimeStamp.GPUTimeStamp;
}
srcSize = sizeof(cl_ulong); srcSize = sizeof(cl_ulong);
break; break;
@ -247,20 +249,6 @@ cl_ulong Event::getDelta(cl_ulong startTime,
return Delta; return Delta;
} }
uint64_t Event::getTimeInNSFromTimestampData(const TimeStampData &timestamp) const {
if (isCPUProfilingPath()) {
return timestamp.CPUTimeinNS;
}
if (DebugManager.flags.ReturnRawGpuTimestamps.get()) {
return timestamp.GPUTimeStamp;
}
double resolution = cmdQueue ? cmdQueue->getDevice().getDeviceInfo().profilingTimerResolution : 0.0;
return static_cast<uint64_t>(timestamp.GPUTimeStamp * resolution);
}
bool Event::calcProfilingData() { bool Event::calcProfilingData() {
if (!dataCalculated && !profilingCpuPath) { if (!dataCalculated && !profilingCpuPath) {
if (timestampPacketContainer && timestampPacketContainer->peekNodes().size() > 0) { if (timestampPacketContainer && timestampPacketContainer->peekNodes().size() > 0) {
@ -306,22 +294,32 @@ bool Event::calcProfilingData() {
} }
void Event::calculateProfilingDataInternal(uint64_t contextStartTS, uint64_t contextEndTS, uint64_t *contextCompleteTS, uint64_t globalStartTS) { void Event::calculateProfilingDataInternal(uint64_t contextStartTS, uint64_t contextEndTS, uint64_t *contextCompleteTS, uint64_t globalStartTS) {
uint64_t gpuDuration = 0; uint64_t gpuDuration = 0;
uint64_t cpuDuration = 0; uint64_t cpuDuration = 0;
uint64_t gpuCompleteDuration = 0; uint64_t gpuCompleteDuration = 0;
uint64_t cpuCompleteDuration = 0; uint64_t cpuCompleteDuration = 0;
auto &device = this->cmdQueue->getDevice(); auto &hwHelper = HwHelper::get(this->cmdQueue->getDevice().getHardwareInfo().platform.eRenderCoreFamily);
auto &hwHelper = HwHelper::get(device.getHardwareInfo().platform.eRenderCoreFamily); auto frequency = cmdQueue->getDevice().getDeviceInfo().profilingTimerResolution;
auto frequency = device.getDeviceInfo().profilingTimerResolution; auto gpuTimeStamp = queueTimeStamp.GPUTimeStamp;
startTimeStamp = hwHelper.getGpuTimeStampInNS(globalStartTS, frequency); int64_t c0 = queueTimeStamp.CPUTimeinNS - hwHelper.getGpuTimeStampInNS(gpuTimeStamp, frequency);
if (startTimeStamp < queueTimeStamp.GPUTimeStamp) { startTimeStamp = static_cast<uint64_t>(globalStartTS * frequency) + c0;
startTimeStamp += static_cast<uint64_t>((1ULL << (hwHelper.getGlobalTimeStampBits())) * frequency); if (startTimeStamp < queueTimeStamp.CPUTimeinNS) {
c0 += static_cast<uint64_t>((1ULL << (hwHelper.getGlobalTimeStampBits())) * frequency);
startTimeStamp = static_cast<uint64_t>(globalStartTS * frequency) + c0;
} }
/* calculation based on equation
CpuTime = GpuTime * scalar + const( == c0)
scalar = DeltaCpu( == dCpu) / DeltaGpu( == dGpu)
to determine the value of the const we can use one pair of values
const = CpuTimeQueue - GpuTimeQueue * scalar
*/
//If device enqueue has not updated complete timestamp, assign end timestamp //If device enqueue has not updated complete timestamp, assign end timestamp
gpuDuration = getDelta(contextStartTS, contextEndTS); gpuDuration = getDelta(contextStartTS, contextEndTS);
if (*contextCompleteTS == 0) { if (*contextCompleteTS == 0) {

View File

@ -326,7 +326,6 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
} }
} }
uint64_t getTimeInNSFromTimestampData(const TimeStampData &timestamp) const;
bool calcProfilingData(); bool calcProfilingData();
MOCKABLE_VIRTUAL void calculateProfilingDataInternal(uint64_t contextStartTS, uint64_t contextEndTS, uint64_t *contextCompleteTS, uint64_t globalStartTS); MOCKABLE_VIRTUAL void calculateProfilingDataInternal(uint64_t contextStartTS, uint64_t contextEndTS, uint64_t *contextCompleteTS, uint64_t globalStartTS);
MOCKABLE_VIRTUAL void synchronizeTaskCount() { MOCKABLE_VIRTUAL void synchronizeTaskCount() {

View File

@ -380,14 +380,14 @@ CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminate
false, //useGlobalAtomics false, //useGlobalAtomics
1u); //numDevicesInContext 1u); //numDevicesInContext
UNRECOVERABLE_IF(!kernelOperation->blitEnqueue && !commandStreamReceiver.peekTimestampPacketWriteEnabled() && commandQueue.getContext().getRootDeviceIndices().size() == 1);
if (commandQueue.getContext().getRootDeviceIndices().size() > 1) { if (commandQueue.getContext().getRootDeviceIndices().size() > 1) {
eventsRequest.fillCsrDependenciesForTaskCountContainer(dispatchFlags.csrDependencies, commandStreamReceiver); eventsRequest.fillCsrDependenciesForTaskCountContainer(dispatchFlags.csrDependencies, commandStreamReceiver);
} }
if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) { eventsRequest.fillCsrDependenciesForTimestampPacketContainer(dispatchFlags.csrDependencies, commandStreamReceiver, CsrDependencies::DependenciesType::OutOfCsr);
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(dispatchFlags.csrDependencies, commandStreamReceiver, CsrDependencies::DependenciesType::OutOfCsr); makeTimestampPacketsResident(commandStreamReceiver);
makeTimestampPacketsResident(commandStreamReceiver);
}
gtpinNotifyPreFlushTask(&commandQueue); gtpinNotifyPreFlushTask(&commandQueue);

View File

@ -7,7 +7,6 @@
#include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "opencl/source/event/user_event.h" #include "opencl/source/event/user_event.h"
#include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h"
@ -253,31 +252,3 @@ HWTEST_F(MarkerTest, givenMarkerCallFollowingNdrangeCallInBatchedModeWhenWaitFor
clReleaseEvent(eventFromMarker); clReleaseEvent(eventFromMarker);
clReleaseEvent(eventFromNdr); clReleaseEvent(eventFromNdr);
} }
struct MarkerWithProfilingTest : public MarkerTest {
void SetUp() {
dbgRestore = std::make_unique<DebugManagerStateRestore>();
DebugManager.flags.EnableTimestampPacket.set(0);
MarkerTest::SetUp();
}
void TearDown() {
MarkerTest::TearDown();
dbgRestore.reset(nullptr);
}
std::unique_ptr<DebugManagerStateRestore> dbgRestore;
};
struct WhiteBoxCommandQueue : public CommandQueue {
using CommandQueue::isBlockedCommandStreamRequired;
};
HWTEST_F(MarkerWithProfilingTest, givenMarkerWithProfilingAndBlockedEnqueueThenBlockedCommandStreamIsRequired) {
auto cmdQueueWB = static_cast<WhiteBoxCommandQueue *>(pCmdQ);
EventsRequest eventsRequest(0, nullptr, nullptr);
bool ret = cmdQueueWB->isBlockedCommandStreamRequired(CL_COMMAND_MARKER, eventsRequest, true, true);
EXPECT_TRUE(ret);
}

View File

@ -181,7 +181,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfolingWhenWa
clReleaseEvent(event); clReleaseEvent(event);
} }
HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfilingWhenNonBlockedEnqueueIsExecutedThenSubmittedTimestampHasGPUTime) { HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfilingWhenNonBlockedEnqueueIsExecutedThenSubmittedTimestampDoesntHaveGPUTime) {
MockKernel kernel(program.get(), kernelInfo, *pClDevice); MockKernel kernel(program.get(), kernelInfo, *pClDevice);
ASSERT_EQ(CL_SUCCESS, kernel.initialize()); ASSERT_EQ(CL_SUCCESS, kernel.initialize());
@ -203,8 +203,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfilingWhenNo
auto mockEvent = static_cast<MockEvent<Event> *>(event); auto mockEvent = static_cast<MockEvent<Event> *>(event);
EXPECT_NE(0u, mockEvent->queueTimeStamp.GPUTimeStamp); EXPECT_NE(0u, mockEvent->queueTimeStamp.GPUTimeStamp);
EXPECT_NE(0u, mockEvent->queueTimeStamp.CPUTimeinNS); EXPECT_NE(0u, mockEvent->queueTimeStamp.CPUTimeinNS);
EXPECT_LT(mockEvent->queueTimeStamp.GPUTimeStamp, mockEvent->submitTimeStamp.GPUTimeStamp);
EXPECT_LT(mockEvent->queueTimeStamp.CPUTimeinNS, mockEvent->submitTimeStamp.CPUTimeinNS); EXPECT_LT(mockEvent->queueTimeStamp.CPUTimeinNS, mockEvent->submitTimeStamp.CPUTimeinNS);
EXPECT_EQ(0u, mockEvent->submitTimeStamp.GPUTimeStamp);
clReleaseEvent(event); clReleaseEvent(event);
} }
@ -455,71 +455,6 @@ HWTEST_F(ProfilingTests, givenMarkerEnqueueWhenNonBlockedEnqueueThenSetGpuPath)
eventObj->release(); eventObj->release();
} }
HWTEST_F(ProfilingTests, givenMarkerEnqueueWhenBlockedEnqueueThenSetGpuPath) {
cl_event event;
cl_event userEvent = new UserEvent();
pCmdQ->enqueueMarkerWithWaitList(1, &userEvent, &event);
auto eventObj = static_cast<Event *>(event);
EXPECT_FALSE(eventObj->isCPUProfilingPath());
auto userEventObj = static_cast<UserEvent *>(userEvent);
pCmdQ->flush();
userEventObj->setStatus(CL_COMPLETE);
Event::waitForEvents(1, &event);
uint64_t queued, submit;
cl_int retVal;
retVal = eventObj->getEventProfilingInfo(CL_PROFILING_COMMAND_QUEUED, sizeof(uint64_t), &queued, 0);
EXPECT_EQ(CL_SUCCESS, retVal);
retVal = eventObj->getEventProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, sizeof(uint64_t), &submit, 0);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_LT(0u, queued);
EXPECT_LT(queued, submit);
eventObj->release();
userEventObj->release();
}
HWTEST_F(ProfilingTests, givenMarkerEnqueueWhenBlockedEnqueueThenPipeControlsArePresentInCS) {
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
cl_event event;
cl_event userEvent = new UserEvent();
static_cast<CommandQueueHw<FamilyType> *>(pCmdQ)->enqueueMarkerWithWaitList(1, &userEvent, &event);
auto eventObj = static_cast<Event *>(event);
EXPECT_FALSE(eventObj->isCPUProfilingPath());
auto userEventObj = static_cast<UserEvent *>(userEvent);
pCmdQ->flush();
userEventObj->setStatus(CL_COMPLETE);
Event::waitForEvents(1, &event);
parseCommands<FamilyType>(*pCmdQ);
// Check PIPE_CONTROLs
auto itorFirstPC = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorFirstPC);
auto pFirstPC = genCmdCast<PIPE_CONTROL *>(*itorFirstPC);
ASSERT_NE(nullptr, pFirstPC);
auto itorSecondPC = find<PIPE_CONTROL *>(itorFirstPC, cmdList.end());
ASSERT_NE(cmdList.end(), itorSecondPC);
auto pSecondPC = genCmdCast<PIPE_CONTROL *>(*itorSecondPC);
ASSERT_NE(nullptr, pSecondPC);
EXPECT_TRUE(static_cast<MockEvent<Event> *>(event)->calcProfilingData());
eventObj->release();
userEventObj->release();
pCmdQ->isQueueBlocked();
}
template <typename TagType> template <typename TagType>
struct MockTagNode : public TagNode<TagType> { struct MockTagNode : public TagNode<TagType> {
public: public:

View File

@ -466,12 +466,13 @@ EngineControl &Device::getEngine(uint32_t index) {
} }
bool Device::getDeviceAndHostTimer(uint64_t *deviceTimestamp, uint64_t *hostTimestamp) const { bool Device::getDeviceAndHostTimer(uint64_t *deviceTimestamp, uint64_t *hostTimestamp) const {
TimeStampData timeStamp; TimeStampData queueTimeStamp;
bool retVal = getOSTime()->getCpuGpuTime(&timeStamp); bool retVal = getOSTime()->getCpuGpuTime(&queueTimeStamp);
if (retVal) { if (retVal) {
auto resolution = getOSTime()->getDynamicDeviceTimerResolution(getHardwareInfo()); uint64_t resolution = (uint64_t)getOSTime()->getDynamicDeviceTimerResolution(getHardwareInfo());
*deviceTimestamp = static_cast<uint64_t>(timeStamp.GPUTimeStamp * resolution); *deviceTimestamp = queueTimeStamp.GPUTimeStamp * resolution;
} }
retVal = getOSTime()->getCpuTime(hostTimestamp); retVal = getOSTime()->getCpuTime(hostTimestamp);
return retVal; return retVal;
} }