From 4598fc61e21d099e277ca3e3ec08f479bde013b6 Mon Sep 17 00:00:00 2001 From: Szymon Morek Date: Fri, 25 Nov 2022 10:44:10 +0000 Subject: [PATCH] Don't access virtual method if not needed Signed-off-by: Szymon Morek --- level_zero/core/source/event/event.h | 5 ++-- level_zero/core/source/event/event_impl.inl | 18 +++++++++---- .../unit_tests/sources/event/test_event.cpp | 26 ++++++++++++++++--- .../libult/ult_command_stream_receiver.h | 5 ++++ .../mocks/mock_command_stream_receiver.h | 3 ++- 5 files changed, 46 insertions(+), 11 deletions(-) diff --git a/level_zero/core/source/event/event.h b/level_zero/core/source/event/event.h index fea60be1fc..41985182b4 100644 --- a/level_zero/core/source/event/event.h +++ b/level_zero/core/source/event/event.h @@ -199,8 +199,8 @@ class KernelEventCompletionData : public NEO::TimestampPackets { template struct EventImp : public Event { - EventImp(EventPool *eventPool, int index, Device *device) - : device(device), index(index), eventPool(eventPool) { + EventImp(EventPool *eventPool, int index, Device *device, bool downloadAllocationRequired) + : device(device), index(index), eventPool(eventPool), downloadAllocationRequired(downloadAllocationRequired) { contextStartOffset = NEO::TimestampPackets::getContextStartOffset(); contextEndOffset = NEO::TimestampPackets::getContextEndOffset(); globalStartOffset = NEO::TimestampPackets::getGlobalStartOffset(); @@ -242,6 +242,7 @@ struct EventImp : public Event { Device *device; int index; EventPool *eventPool; + const bool downloadAllocationRequired = false; protected: ze_result_t calculateProfilingData(); diff --git a/level_zero/core/source/event/event_impl.inl b/level_zero/core/source/event/event_impl.inl index 4ff8baf2d8..dbaef0fbc1 100644 --- a/level_zero/core/source/event/event_impl.inl +++ b/level_zero/core/source/event/event_impl.inl @@ -15,13 +15,17 @@ namespace L0 { template Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *device) { - auto event = new EventImp(eventPool, desc->index, device); + auto neoDevice = device->getNEODevice(); + auto csr = neoDevice->getDefaultEngine().commandStreamReceiver; + bool downloadAllocationRequired = (csr->getType() == NEO::CommandStreamReceiverType::CSR_TBX || + csr->getType() == NEO::CommandStreamReceiverType::CSR_TBX_WITH_AUB); + + auto event = new EventImp(eventPool, desc->index, device, downloadAllocationRequired); UNRECOVERABLE_IF(event == nullptr); if (eventPool->isEventPoolTimestampFlagSet()) { event->setEventTimestampFlag(true); } - auto neoDevice = device->getNEODevice(); auto &hwInfo = neoDevice->getHardwareInfo(); auto &l0CoreHelper = neoDevice->getRootDeviceEnvironment().getHelper(); @@ -41,9 +45,10 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device * event->hostAddress = reinterpret_cast(baseHostAddr + event->eventPoolOffset); event->signalScope = desc->signal; event->waitScope = desc->wait; - event->csr = neoDevice->getDefaultEngine().commandStreamReceiver; + event->csr = csr; event->maxKernelCount = maxKernels; event->maxPacketCount = eventPool->getEventMaxPackets(); + bool useContextEndOffset = l0CoreHelper.multiTileCapablePlatform(); int32_t overrideUseContextEndOffset = NEO::DebugManager.flags.UseContextEndOffsetForEventCompletion.get(); if (overrideUseContextEndOffset != -1) { @@ -182,8 +187,11 @@ ze_result_t EventImp::queryStatus() { if (metricStreamer != nullptr) { hostEventSetValue(metricStreamer->getNotificationState()); } - this->csr->downloadAllocations(); - this->csr->downloadAllocation(*eventPool->getAllocation().getGraphicsAllocation(device->getNEODevice()->getRootDeviceIndex())); + if (this->downloadAllocationRequired) { + this->csr->downloadAllocations(); + this->csr->downloadAllocation(*eventPool->getAllocation().getGraphicsAllocation(device->getNEODevice()->getRootDeviceIndex())); + } + if (isAlreadyCompleted()) { return ZE_RESULT_SUCCESS; } else { diff --git a/level_zero/core/test/unit_tests/sources/event/test_event.cpp b/level_zero/core/test/unit_tests/sources/event/test_event.cpp index 618bf069bf..8641b3b789 100644 --- a/level_zero/core/test/unit_tests/sources/event/test_event.cpp +++ b/level_zero/core/test/unit_tests/sources/event/test_event.cpp @@ -1536,7 +1536,7 @@ HWTEST_EXCLUDE_PRODUCT(TimestampEventCreate, givenEventTimestampsWhenQueryKernel TEST_F(TimestampEventCreate, givenEventWhenQueryKernelTimestampThenNotReadyReturned) { struct MockEventQuery : public EventImp { - MockEventQuery(L0::EventPool *eventPool, int index, L0::Device *device) : EventImp(eventPool, index, device) {} + MockEventQuery(L0::EventPool *eventPool, int index, L0::Device *device) : EventImp(eventPool, index, device, false) {} ze_result_t queryStatus() override { return ZE_RESULT_NOT_READY; @@ -2112,8 +2112,9 @@ HWTEST_F(EventTests, VariableBackup backupPauseValue(&CpuIntrinsicsTests::pauseValue, Event::STATE_CLEARED); VariableBackup backupPauseOffset(&CpuIntrinsicsTests::pauseOffset); VariableBackup> backupSetupPauseAddress(&CpuIntrinsicsTests::setupPauseAddress); - + neoDevice->getUltCommandStreamReceiver().commandStreamReceiverType = CommandStreamReceiverType::CSR_TBX; auto event = whiteboxCast(Event::create(eventPool.get(), &eventDesc, device)); + ASSERT_NE(event, nullptr); ASSERT_NE(nullptr, event->csr); ASSERT_EQ(device->getNEODevice()->getDefaultEngine().commandStreamReceiver, event->csr); @@ -2153,11 +2154,30 @@ HWTEST_F(EventTests, event->destroy(); } +HWTEST_F(EventTests, WhenDownloadAllocationNotRequiredThenDontDownloadAllocation) { + neoDevice->getUltCommandStreamReceiver().commandStreamReceiverType = CommandStreamReceiverType::CSR_HW; + auto event = whiteboxCast(Event::create(eventPool.get(), &eventDesc, device)); + event->queryStatus(); + EXPECT_FALSE(static_cast *>(event->csr)->downloadAllocationsCalled); + event->destroy(); +} + +HWTEST_F(EventTests, WhenDownloadAllocationRequiredThenDownloadAllocation) { + CommandStreamReceiverType csrTypes[] = {CommandStreamReceiverType::CSR_TBX, CommandStreamReceiverType::CSR_TBX_WITH_AUB}; + for (auto csrType : csrTypes) { + neoDevice->getUltCommandStreamReceiver().commandStreamReceiverType = csrType; + auto event = whiteboxCast(Event::create(eventPool.get(), &eventDesc, device)); + event->queryStatus(); + EXPECT_TRUE(static_cast *>(event->csr)->downloadAllocationsCalled); + event->destroy(); + } +} + struct MockEventCompletion : public EventImp { using EventImp::gpuStartTimestamp; using EventImp::gpuEndTimestamp; - MockEventCompletion(L0::EventPool *eventPool, int index, L0::Device *device) : EventImp(eventPool, index, device) { + MockEventCompletion(L0::EventPool *eventPool, int index, L0::Device *device) : EventImp(eventPool, index, device, false) { auto neoDevice = device->getNEODevice(); auto &hwInfo = neoDevice->getHardwareInfo(); auto &l0HwHelper = L0HwHelper::get(hwInfo.platform.eRenderCoreFamily); diff --git a/shared/test/common/libult/ult_command_stream_receiver.h b/shared/test/common/libult/ult_command_stream_receiver.h index 8c5fbc11c5..e106a4307d 100644 --- a/shared/test/common/libult/ult_command_stream_receiver.h +++ b/shared/test/common/libult/ult_command_stream_receiver.h @@ -354,6 +354,10 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ BaseClass::ensureCommandBufferAllocation(commandStream, minimumRequiredSize, additionalAllocationSize); } + CommandStreamReceiverType getType() override { + return commandStreamReceiverType; + } + std::vector aubCommentMessages; BatchBuffer latestFlushedBatchBuffer = {}; @@ -398,6 +402,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ bool callBaseFlushBcsTask{true}; uint32_t flushBcsTaskReturnValue{}; std::optional flushReturnValue{}; + CommandStreamReceiverType commandStreamReceiverType = CommandStreamReceiverType::CSR_HW; }; } // namespace NEO diff --git a/shared/test/common/mocks/mock_command_stream_receiver.h b/shared/test/common/mocks/mock_command_stream_receiver.h index 974b4484a2..24006dfecc 100644 --- a/shared/test/common/mocks/mock_command_stream_receiver.h +++ b/shared/test/common/mocks/mock_command_stream_receiver.h @@ -122,7 +122,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver { uint32_t flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override { return taskCount; }; CommandStreamReceiverType getType() override { - return CommandStreamReceiverType::CSR_HW; + return commandStreamReceiverType; } void downloadAllocations() override { @@ -193,6 +193,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver { std::optional isGpuHangDetectedReturnValue{}; std::optional testTaskCountReadyReturnValue{}; WaitStatus waitForCompletionWithTimeoutReturnValue{WaitStatus::Ready}; + CommandStreamReceiverType commandStreamReceiverType = CommandStreamReceiverType::CSR_HW; }; class MockCommandStreamReceiverWithFailingSubmitBatch : public MockCommandStreamReceiver {