diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_tests.cpp index 050f5f1460..267041b598 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_tests.cpp @@ -640,27 +640,13 @@ TEST_F(CommandStreamReceiverTest, whenGettingEventPerfCountAllocatorThenSameTagA EXPECT_EQ(allocator2, allocator); } -HWTEST_F(CommandStreamReceiverTest, givenTimestampPacketAllocatorWhenAskingForTagThenReturnValidObject) { +HWTEST_F(CommandStreamReceiverTest, givenCsrWhenAskingForTimestampPacketAlignmentThenReturnFourCachelines) { auto &csr = pDevice->getUltCommandStreamReceiver(); EXPECT_EQ(nullptr, csr.timestampPacketAllocator.get()); - auto allocator = static_cast> *>(csr.getTimestampPacketAllocator()); - EXPECT_NE(nullptr, csr.timestampPacketAllocator.get()); - EXPECT_EQ(allocator, csr.timestampPacketAllocator.get()); + constexpr auto expectedAlignment = MemoryConstants::cacheLineSize * 4; - auto allocator2 = static_cast> *>(csr.getTimestampPacketAllocator()); - EXPECT_EQ(allocator, allocator2); - - auto node1 = allocator->getTag(); - auto node2 = allocator->getTag(); - EXPECT_NE(nullptr, node1); - EXPECT_NE(nullptr, node2); - EXPECT_NE(node1, node2); - - constexpr auto tagAlignment = MemoryConstants::cacheLineSize * 4; - - EXPECT_TRUE(isAligned(node1->getGpuAddress(), tagAlignment)); - EXPECT_TRUE(isAligned(node2->getGpuAddress(), tagAlignment)); + EXPECT_EQ(expectedAlignment, csr.getTimestampPacketAllocatorAlignment()); } HWTEST_F(CommandStreamReceiverTest, givenUltCommandStreamReceiverWhenAddAubCommentIsCalledThenCallAddAubCommentOnCsr) { diff --git a/opencl/test/unit_test/helpers/timestamp_packet_tests.cpp b/opencl/test/unit_test/helpers/timestamp_packet_tests.cpp index d4389c2473..cd69ed5805 100644 --- a/opencl/test/unit_test/helpers/timestamp_packet_tests.cpp +++ b/opencl/test/unit_test/helpers/timestamp_packet_tests.cpp @@ -358,6 +358,30 @@ HWTEST_F(TimestampPacketTests, givenCommandStreamReceiverHwWhenObtainingPreferre EXPECT_EQ(2048u, csr.getPreferredTagPoolSize()); } +HWTEST_F(TimestampPacketTests, givenTagAlignmentWhenCreatingAllocatorThenGpuAddressIsAligned) { + class MyCsr : public CommandStreamReceiverHw { + public: + using CommandStreamReceiverHw::CommandStreamReceiverHw; + size_t getTimestampPacketAllocatorAlignment() const override { + return alignment; + } + + size_t alignment = 4096; + }; + OsContext &osContext = *executionEnvironment->memoryManager->getRegisteredEngines()[0].osContext; + + MyCsr csr(*executionEnvironment, 0, osContext.getDeviceBitfield()); + csr.setupContext(osContext); + + auto allocator = csr.getTimestampPacketAllocator(); + + auto tag1 = allocator->getTag(); + auto tag2 = allocator->getTag(); + + EXPECT_TRUE(isAligned(tag1->getGpuAddress(), csr.alignment)); + EXPECT_TRUE(isAligned(tag2->getGpuAddress(), csr.alignment)); +} + HWTEST_F(TimestampPacketTests, givenDebugFlagSetWhenCreatingTimestampPacketAllocatorThenDisableReusingAndLimitPoolSize) { DebugManagerStateRestore restore; DebugManager.flags.DisableTimestampPacketOptimizations.set(true); diff --git a/opencl/test/unit_test/libult/ult_command_stream_receiver.h b/opencl/test/unit_test/libult/ult_command_stream_receiver.h index c4193aea84..b9054eed80 100644 --- a/opencl/test/unit_test/libult/ult_command_stream_receiver.h +++ b/opencl/test/unit_test/libult/ult_command_stream_receiver.h @@ -37,6 +37,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ using BaseClass::getCmdSizeForPrologue; using BaseClass::getScratchPatchAddress; using BaseClass::getScratchSpaceController; + using BaseClass::getTimestampPacketAllocatorAlignment; using BaseClass::indirectHeap; using BaseClass::iohState; using BaseClass::isBlitterDirectSubmissionEnabled; diff --git a/shared/source/command_stream/command_stream_receiver_hw.h b/shared/source/command_stream/command_stream_receiver_hw.h index d2c13bf295..78d63feb2d 100644 --- a/shared/source/command_stream/command_stream_receiver_hw.h +++ b/shared/source/command_stream/command_stream_receiver_hw.h @@ -155,6 +155,8 @@ class CommandStreamReceiverHw : public CommandStreamReceiver { bool checkPlatformSupportsNewResourceImplicitFlush() const; bool checkPlatformSupportsGpuIdleImplicitFlush() const; + MOCKABLE_VIRTUAL size_t getTimestampPacketAllocatorAlignment() const; + HeapDirtyState dshState; HeapDirtyState iohState; HeapDirtyState sshState; diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index 8ea46d7fae..001379f9ae 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -1282,10 +1282,15 @@ TagAllocatorBase *CommandStreamReceiverHw::getTimestampPacketAllocato using TimestampPacketsT = TimestampPackets; timestampPacketAllocator = std::make_unique>( - rootDeviceIndex, getMemoryManager(), getPreferredTagPoolSize(), MemoryConstants::cacheLineSize * 4, + rootDeviceIndex, getMemoryManager(), getPreferredTagPoolSize(), getTimestampPacketAllocatorAlignment(), sizeof(TimestampPacketsT), doNotReleaseNodes, osContext->getDeviceBitfield()); } return timestampPacketAllocator.get(); } +template +size_t CommandStreamReceiverHw::getTimestampPacketAllocatorAlignment() const { + return MemoryConstants::cacheLineSize * 4; +} + } // namespace NEO