From 184ec7d07dcdc6bebb6d1aa2bd238964f5edfe76 Mon Sep 17 00:00:00 2001 From: Bartosz Dunajski Date: Thu, 25 Feb 2021 14:50:02 +0000 Subject: [PATCH] Dont allocate HwTimeStamp when TimestampPacket is used Signed-off-by: Bartosz Dunajski --- opencl/source/command_queue/enqueue_common.h | 7 +++- opencl/source/event/event.cpp | 4 +-- opencl/test/unit_test/event/event_tests.cpp | 33 ++++++++++++++----- .../unit_test/profiling/profiling_tests.cpp | 24 ++++++++++++++ 4 files changed, 57 insertions(+), 11 deletions(-) diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h index 101c342c74..d19e2bf04b 100644 --- a/opencl/source/command_queue/enqueue_common.h +++ b/opencl/source/command_queue/enqueue_common.h @@ -729,7 +729,12 @@ CompletionStamp CommandQueueHw::enqueueNonBlocked( if (isProfilingEnabled() && eventBuilder.getEvent()) { this->getDevice().getOSTime()->getCpuTime(&submitTimeStamp.CPUTimeinNS); eventBuilder.getEvent()->setSubmitTimeStamp(&submitTimeStamp); - getGpgpuCommandStreamReceiver().makeResident(*eventBuilder.getEvent()->getHwTimeStampNode()->getBaseGraphicsAllocation()); + + auto hwTimestampNode = eventBuilder.getEvent()->getHwTimeStampNode(); + if (hwTimestampNode) { + getGpgpuCommandStreamReceiver().makeResident(*hwTimestampNode->getBaseGraphicsAllocation()); + } + if (isPerfCountersEnabled()) { getGpgpuCommandStreamReceiver().makeResident(*eventBuilder.getEvent()->getHwPerfCounterNode()->getBaseGraphicsAllocation()); } diff --git a/opencl/source/event/event.cpp b/opencl/source/event/event.cpp index 117050fdcf..74a699db64 100644 --- a/opencl/source/event/event.cpp +++ b/opencl/source/event/event.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2020 Intel Corporation + * Copyright (C) 2017-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -737,7 +737,7 @@ void Event::setEndTimeStamp() { } TagNode *Event::getHwTimeStampNode() { - if (!timeStampNode) { + if (!cmdQueue->getTimestampPacketContainer() && !timeStampNode) { timeStampNode = cmdQueue->getGpgpuCommandStreamReceiver().getEventTsAllocator()->getTag(); } return timeStampNode; diff --git a/opencl/test/unit_test/event/event_tests.cpp b/opencl/test/unit_test/event/event_tests.cpp index 405f8fa14c..508b29c62a 100644 --- a/opencl/test/unit_test/event/event_tests.cpp +++ b/opencl/test/unit_test/event/event_tests.cpp @@ -866,8 +866,13 @@ HWTEST_F(InternalsEventWithPerfCountersTest, givenCpuProfilingPerfCountersPathWh event->setCPUProfilingPath(true); HwPerfCounter *perfCounter = event->getHwPerfCounterNode()->tagForCpuAccess; ASSERT_NE(nullptr, perfCounter); - HwTimeStamps *timeStamps = event->getHwTimeStampNode()->tagForCpuAccess; - ASSERT_NE(nullptr, timeStamps); + + auto hwTimeStampNode = event->getHwTimeStampNode(); + if (pCmdQ->getTimestampPacketContainer()) { + EXPECT_EQ(nullptr, hwTimeStampNode); + } else { + ASSERT_NE(nullptr, hwTimeStampNode->tagForCpuAccess); + } event->setCommand(std::unique_ptr(new CommandWithoutKernel(*pCmdQ))); @@ -1109,8 +1114,12 @@ TEST_F(EventTest, GivenNoQueueWhenSettingCpuTimeStampThenTimesIsNotSet) { EXPECT_EQ(0ULL, outCPUtimeStamp); } -TEST_F(EventTest, WhenGettingHwTimeStampsThenValidPointerIsReturned) { - std::unique_ptr event(new Event(this->pCmdQ, CL_COMMAND_COPY_BUFFER, 0, 0)); +HWTEST_F(EventTest, WhenGettingHwTimeStampsThenValidPointerIsReturned) { + pDevice->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false; + + auto myCmdQ = std::make_unique>(pCmdQ->getContextPtr(), pClDevice, nullptr); + + std::unique_ptr event(new Event(myCmdQ.get(), CL_COMMAND_COPY_BUFFER, 0, 0)); ASSERT_NE(nullptr, event); HwTimeStamps *timeStamps = event->getHwTimeStampNode()->tagForCpuAccess; @@ -1130,8 +1139,12 @@ TEST_F(EventTest, WhenGettingHwTimeStampsThenValidPointerIsReturned) { ASSERT_EQ(timeStamps, timeStamps2); } -TEST_F(EventTest, WhenGetHwTimeStampsAllocationThenValidPointerIsReturned) { - std::unique_ptr event(new Event(this->pCmdQ, CL_COMMAND_COPY_BUFFER, 0, 0)); +HWTEST_F(EventTest, WhenGetHwTimeStampsAllocationThenValidPointerIsReturned) { + pDevice->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false; + + auto myCmdQ = std::make_unique>(pCmdQ->getContextPtr(), pClDevice, nullptr); + + std::unique_ptr event(new Event(myCmdQ.get(), CL_COMMAND_COPY_BUFFER, 0, 0)); ASSERT_NE(nullptr, event); GraphicsAllocation *allocation = event->getHwTimeStampNode()->getBaseGraphicsAllocation(); @@ -1144,8 +1157,12 @@ TEST_F(EventTest, WhenGetHwTimeStampsAllocationThenValidPointerIsReturned) { EXPECT_GT(memoryStorageSize, 0u); } -TEST_F(EventTest, WhenEventIsCreatedThenHwTimeStampsMemoryIsPlacedInGraphicsAllocation) { - std::unique_ptr event(new Event(this->pCmdQ, CL_COMMAND_COPY_BUFFER, 0, 0)); +HWTEST_F(EventTest, WhenEventIsCreatedThenHwTimeStampsMemoryIsPlacedInGraphicsAllocation) { + pDevice->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false; + + auto myCmdQ = std::make_unique>(pCmdQ->getContextPtr(), pClDevice, nullptr); + + std::unique_ptr event(new Event(myCmdQ.get(), CL_COMMAND_COPY_BUFFER, 0, 0)); ASSERT_NE(nullptr, event); HwTimeStamps *timeStamps = event->getHwTimeStampNode()->tagForCpuAccess; diff --git a/opencl/test/unit_test/profiling/profiling_tests.cpp b/opencl/test/unit_test/profiling/profiling_tests.cpp index a9e1d03834..7f63676c5c 100644 --- a/opencl/test/unit_test/profiling/profiling_tests.cpp +++ b/opencl/test/unit_test/profiling/profiling_tests.cpp @@ -888,6 +888,8 @@ HWTEST_F(ProfilingWithPerfCountersTests, GivenCommandQueueWithProfilingPerfCount template struct FixedGpuAddressTagAllocator : TagAllocator { + using TagAllocator::usedTags; + using TagAllocator::deferredTags; struct MockTagNode : TagNode { void setGpuAddress(uint64_t value) { this->gpuAddress = value; } @@ -936,6 +938,28 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingWithPerfCountersTests, GivenCommandQueueWit clReleaseEvent(event); } +HWTEST_F(ProfilingWithPerfCountersTests, givenTimestampPacketsEnabledWhenEnqueueIsCalledThenDontAllocateHwTimeStamps) { + auto &csr = pDevice->getUltCommandStreamReceiver(); + csr.timestampPacketWriteEnabled = true; + + auto mockAllocator = new FixedGpuAddressTagAllocator(csr, 0x123); + csr.profilingTimeStampAllocator.reset(mockAllocator); + + auto myCmdQ = std::make_unique>(pCmdQ->getContextPtr(), pClDevice.get(), nullptr); + myCmdQ->setProfilingEnabled(); + + size_t globalOffsets[3] = {0, 0, 0}; + size_t workItems[3] = {1, 1, 1}; + cl_event event; + + myCmdQ->enqueueKernel(kernel->mockKernel, 1, globalOffsets, workItems, nullptr, 0, nullptr, &event); + + EXPECT_EQ(!!myCmdQ->getTimestampPacketContainer(), mockAllocator->usedTags.peekIsEmpty()); + EXPECT_TRUE(mockAllocator->deferredTags.peekIsEmpty()); + + clReleaseEvent(event); +} + HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingWithPerfCountersOnCCSTests, givenCommandQueueBlockedWithProfilingPerfCountersWhenWalkerIsDispatchedThenPipeControlWithTimeStampIsPresentInCS) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER;