From 3dc3ad36f840d952c99e427a422deac02c74867c Mon Sep 17 00:00:00 2001 From: Bartosz Dunajski Date: Mon, 22 Mar 2021 15:14:49 +0000 Subject: [PATCH] Refactor TimestampPacket class Signed-off-by: Bartosz Dunajski --- level_zero/core/source/event/event.cpp | 40 +++++------ .../unit_tests/sources/event/test_event.cpp | 9 ++- opencl/source/event/event.cpp | 26 ++++---- ...nd_stream_receiver_with_aub_dump_tests.cpp | 11 ++-- .../helpers/timestamp_packet_tests.cpp | 66 ++++++++++--------- opencl/test/unit_test/kernel/kernel_tests.cpp | 26 ++++++-- .../unit_test/profiling/profiling_tests.cpp | 55 ++++++++-------- .../command_stream/command_stream_receiver.h | 2 +- .../command_stream_receiver_hw_base.inl | 8 +-- shared/source/helpers/blit_commands_helper.h | 2 +- shared/source/helpers/timestamp_packet.h | 42 ++++++++++-- 11 files changed, 159 insertions(+), 128 deletions(-) diff --git a/level_zero/core/source/event/event.cpp b/level_zero/core/source/event/event.cpp index 40445cae92..4b9fb1a280 100644 --- a/level_zero/core/source/event/event.cpp +++ b/level_zero/core/source/event/event.cpp @@ -147,24 +147,23 @@ uint64_t Event::getTimestampPacketAddress() { } ze_result_t EventImp::calculateProfilingData() { - globalStartTS = timestampsData->packets[0].globalStart; - globalEndTS = timestampsData->packets[0].globalEnd; - contextStartTS = timestampsData->packets[0].contextStart; - contextEndTS = timestampsData->packets[0].contextEnd; + globalStartTS = timestampsData->getGlobalStartValue(0); + globalEndTS = timestampsData->getGlobalEndValue(0); + contextStartTS = timestampsData->getContextStartValue(0); + contextEndTS = timestampsData->getContextEndValue(0); for (auto i = 1u; i < packetsInUse; i++) { - auto &packet = timestampsData->packets[i]; - if (globalStartTS > packet.globalStart) { - globalStartTS = packet.globalStart; + if (globalStartTS > timestampsData->getGlobalStartValue(i)) { + globalStartTS = timestampsData->getGlobalStartValue(i); } - if (contextStartTS > packet.contextStart) { - contextStartTS = packet.contextStart; + if (contextStartTS > timestampsData->getContextStartValue(i)) { + contextStartTS = timestampsData->getContextStartValue(i); } - if (contextEndTS < packet.contextEnd) { - contextEndTS = packet.contextEnd; + if (contextEndTS < timestampsData->getContextEndValue(i)) { + contextEndTS = timestampsData->getContextEndValue(i); } - if (globalEndTS < packet.globalEnd) { - globalEndTS = packet.globalEnd; + if (globalEndTS < timestampsData->getGlobalEndValue(i)) { + globalEndTS = timestampsData->getGlobalEndValue(i); } } @@ -172,20 +171,11 @@ ze_result_t EventImp::calculateProfilingData() { } void EventImp::assignTimestampData(void *address) { - auto baseAddr = reinterpret_cast(address); uint32_t packetsToCopy = packetsInUse ? packetsInUse : NEO::TimestampPacketSizeControl::preferredPacketCount; - auto copyData = [&](uint32_t ×tampField, auto tsAddr) { - memcpy_s(static_cast(×tampField), sizeof(uint32_t), reinterpret_cast(tsAddr), sizeof(uint32_t)); - }; - for (uint32_t i = 0; i < packetsToCopy; i++) { - auto &packet = timestampsData->packets[i]; - copyData(packet.globalStart, baseAddr + offsetof(TimestampPacketStorage::Packet, globalStart)); - copyData(packet.contextStart, baseAddr + offsetof(TimestampPacketStorage::Packet, contextStart)); - copyData(packet.globalEnd, baseAddr + offsetof(TimestampPacketStorage::Packet, globalEnd)); - copyData(packet.contextEnd, baseAddr + offsetof(TimestampPacketStorage::Packet, contextEnd)); - baseAddr += sizeof(struct TimestampPacketStorage::Packet); + timestampsData->assignDataToAllTimestamps(i, address); + address = ptrOffset(address, sizeof(struct TimestampPacketStorage::Packet)); } } @@ -215,7 +205,7 @@ ze_result_t EventImp::hostEventSetValueTimestamps(uint32_t eventVal) { auto baseAddr = reinterpret_cast(hostAddress); auto signalScopeFlag = this->signalScope; - auto eventTsSetFunc = [&](auto tsAddr) { + auto eventTsSetFunc = [&eventVal, &signalScopeFlag](auto tsAddr) { auto tsptr = reinterpret_cast(tsAddr); memcpy_s(tsptr, sizeof(uint32_t), static_cast(&eventVal), sizeof(uint32_t)); diff --git a/level_zero/core/test/unit_tests/sources/event/test_event.cpp b/level_zero/core/test/unit_tests/sources/event/test_event.cpp index b274f2b6a2..4d86c9d585 100644 --- a/level_zero/core/test/unit_tests/sources/event/test_event.cpp +++ b/level_zero/core/test/unit_tests/sources/event/test_event.cpp @@ -276,11 +276,10 @@ TEST_F(TimestampEventCreate, givenEventTimestampsCreatedWhenResetIsInvokeThenCor EXPECT_NE(nullptr, event->timestampsData); for (auto i = 0u; i < NEO::TimestampPacketSizeControl::preferredPacketCount; i++) { - auto &packet = event->timestampsData->packets[i]; - EXPECT_EQ(Event::State::STATE_INITIAL, packet.contextStart); - EXPECT_EQ(Event::State::STATE_INITIAL, packet.globalStart); - EXPECT_EQ(Event::State::STATE_INITIAL, packet.contextEnd); - EXPECT_EQ(Event::State::STATE_INITIAL, packet.globalEnd); + EXPECT_EQ(static_cast(Event::State::STATE_INITIAL), event->timestampsData->getContextStartValue(i)); + EXPECT_EQ(static_cast(Event::State::STATE_INITIAL), event->timestampsData->getGlobalStartValue(i)); + EXPECT_EQ(static_cast(Event::State::STATE_INITIAL), event->timestampsData->getContextEndValue(i)); + EXPECT_EQ(static_cast(Event::State::STATE_INITIAL), event->timestampsData->getGlobalEndValue(i)); } EXPECT_EQ(0u, event->getPacketsInUse()); diff --git a/opencl/source/event/event.cpp b/opencl/source/event/event.cpp index 74a699db64..585bba9e1d 100644 --- a/opencl/source/event/event.cpp +++ b/opencl/source/event/event.cpp @@ -258,13 +258,12 @@ bool Event::calcProfilingData() { for (auto i = 0u; i < timestamps.size(); i++) { std::cout << "Timestamp " << i << ", " << "profiling capable: " << timestamps[i]->isProfilingCapable() << ", "; - for (auto j = 0u; j < timestamps[i]->tagForCpuAccess->packetsUsed; j++) { - const auto &packet = timestamps[i]->tagForCpuAccess->packets[j]; + for (auto j = 0u; j < timestamps[i]->tagForCpuAccess->getPacketsUsed(); j++) { std::cout << "packet " << j << ": " - << "global start: " << packet.globalStart << ", " - << "global end: " << packet.globalEnd << ", " - << "context start: " << packet.contextStart << ", " - << "context end: " << packet.contextEnd << std::endl; + << "global start: " << timestamps[i]->tagForCpuAccess->getGlobalStartValue(j) << ", " + << "global end: " << timestamps[i]->tagForCpuAccess->getGlobalEndValue(j) << ", " + << "context start: " << timestamps[i]->tagForCpuAccess->getContextStartValue(j) << ", " + << "context end: " << timestamps[i]->tagForCpuAccess->getContextEndValue(j) << std::endl; } } } @@ -347,20 +346,19 @@ void Event::calculateProfilingDataInternal(uint64_t contextStartTS, uint64_t con void Event::getBoundaryTimestampValues(TimestampPacketContainer *timestampContainer, uint64_t &globalStartTS, uint64_t &globalEndTS) { const auto timestamps = timestampContainer->peekNodes(); - globalStartTS = timestamps[0]->tagForCpuAccess->packets[0].globalStart; - globalEndTS = timestamps[0]->tagForCpuAccess->packets[0].globalEnd; + globalStartTS = timestamps[0]->tagForCpuAccess->getGlobalStartValue(0); + globalEndTS = timestamps[0]->tagForCpuAccess->getGlobalEndValue(0); for (const auto ×tamp : timestamps) { if (!timestamp->isProfilingCapable()) { continue; } - for (auto i = 0u; i < timestamp->tagForCpuAccess->packetsUsed; ++i) { - const auto &packet = timestamp->tagForCpuAccess->packets[i]; - if (globalStartTS > packet.globalStart) { - globalStartTS = packet.globalStart; + for (auto i = 0u; i < timestamp->tagForCpuAccess->getPacketsUsed(); ++i) { + if (globalStartTS > timestamp->tagForCpuAccess->getGlobalStartValue(i)) { + globalStartTS = timestamp->tagForCpuAccess->getGlobalStartValue(i); } - if (globalEndTS < packet.globalEnd) { - globalEndTS = packet.globalEnd; + if (globalEndTS < timestamp->tagForCpuAccess->getGlobalEndValue(i)) { + globalEndTS = timestamp->tagForCpuAccess->getGlobalEndValue(i); } } } diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_with_aub_dump_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_with_aub_dump_tests.cpp index 1120d7a26b..d7735235b2 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_with_aub_dump_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_with_aub_dump_tests.cpp @@ -290,12 +290,13 @@ struct CommandStreamReceiverTagTests : public ::testing::Test { auto allocator = csr.getTimestampPacketAllocator(); auto tag = allocator->getTag(); - for (auto &packet : tag->tagForCpuAccess->packets) { - packet.contextStart = 0; - packet.globalStart = 0; - packet.contextEnd = 0; - packet.globalEnd = 0; + + uint32_t zeros[4] = {}; + + for (uint32_t i = 0; i < TimestampPacketSizeControl::preferredPacketCount; i++) { + tag->tagForCpuAccess->assignDataToAllTimestamps(i, zeros); } + EXPECT_TRUE(tag->tagForCpuAccess->isCompleted()); bool canBeReleased = tag->canBeReleased(); diff --git a/opencl/test/unit_test/helpers/timestamp_packet_tests.cpp b/opencl/test/unit_test/helpers/timestamp_packet_tests.cpp index 0e33d02c1c..72ce050702 100644 --- a/opencl/test/unit_test/helpers/timestamp_packet_tests.cpp +++ b/opencl/test/unit_test/helpers/timestamp_packet_tests.cpp @@ -36,17 +36,23 @@ using namespace NEO; struct TimestampPacketSimpleTests : public ::testing::Test { - void setTagToReadyState(TagNode *tagNode) { - auto packetsUsed = tagNode->tagForCpuAccess->packetsUsed; + class MockTimestampPacketStorage : public TimestampPacketStorage { + public: + using TimestampPacketStorage::implicitGpuDependenciesCount; + using TimestampPacketStorage::packets; + }; + + template + void setTagToReadyState(TagNode *tagNode) { + auto packetsUsed = tagNode->tagForCpuAccess->getPacketsUsed(); tagNode->initialize(); - for (auto &packet : tagNode->tagForCpuAccess->packets) { - packet.contextStart = 0u; - packet.globalStart = 0u; - packet.contextEnd = 0u; - packet.globalEnd = 0u; + uint32_t zeros[4] = {}; + + for (uint32_t i = 0; i < TimestampPacketSizeControl::preferredPacketCount; i++) { + tagNode->tagForCpuAccess->assignDataToAllTimestamps(i, zeros); } - tagNode->tagForCpuAccess->packetsUsed = packetsUsed; + tagNode->tagForCpuAccess->setPacketsUsed(packetsUsed); } const size_t gws[3] = {1, 1, 1}; @@ -139,7 +145,7 @@ HWTEST_F(TimestampPacketTests, givenDebugModeWhereAtomicsAreNotEmittedWhenComman using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using MI_ATOMIC = typename FamilyType::MI_ATOMIC; - TimestampPacketStorage tag; + MockTimestampPacketStorage tag; MockTagNode mockNode; mockNode.tagForCpuAccess = &tag; mockNode.gpuAddress = 0x1230000; @@ -178,7 +184,7 @@ HWTEST_F(TimestampPacketTests, givenTagNodeWithPacketsUsed2WhenSemaphoreAndAtomi using MI_ATOMIC = typename FamilyType::MI_ATOMIC; TimestampPacketStorage tag; - tag.packetsUsed = 2; + tag.setPacketsUsed(2); MockTagNode mockNode; mockNode.tagForCpuAccess = &tag; mockNode.gpuAddress = 0x1230000; @@ -189,7 +195,7 @@ HWTEST_F(TimestampPacketTests, givenTagNodeWithPacketsUsed2WhenSemaphoreAndAtomi HardwareParse hwParser; hwParser.parseCommands(cmdStream, 0); auto it = hwParser.cmdList.begin(); - for (uint32_t packetId = 0; packetId < tag.packetsUsed; packetId++) { + for (uint32_t packetId = 0; packetId < tag.getPacketsUsed(); packetId++) { verifySemaphore(genCmdCast(*it++), &mockNode, packetId); } verifyMiAtomic(genCmdCast(*it++), &mockNode); @@ -201,15 +207,15 @@ TEST_F(TimestampPacketTests, givenTagNodeWhatAskingForGpuAddressesThenReturnCorr mockNode.tagForCpuAccess = &tag; mockNode.gpuAddress = 0x1230000; - auto expectedEndAddress = mockNode.getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd); + auto expectedEndAddress = mockNode.getGpuAddress() + (2 * sizeof(uint32_t)); EXPECT_EQ(expectedEndAddress, TimestampPacketHelper::getContextEndGpuAddress(mockNode)); - auto expectedCounterAddress = mockNode.getGpuAddress() + offsetof(TimestampPacketStorage, implicitGpuDependenciesCount); + auto expectedCounterAddress = mockNode.getGpuAddress() + (TimestampPacketSizeControl::preferredPacketCount * 4 * sizeof(uint32_t)); EXPECT_EQ(expectedCounterAddress, TimestampPacketHelper::getGpuDependenciesCountGpuAddress(mockNode)); } TEST_F(TimestampPacketSimpleTests, whenEndTagIsNotOneThenMarkAsCompleted) { - TimestampPacketStorage timestampPacketStorage; + MockTimestampPacketStorage timestampPacketStorage; auto &packet = timestampPacketStorage.packets[0]; timestampPacketStorage.initialize(); @@ -266,7 +272,7 @@ TEST_F(TimestampPacketSimpleTests, givenTimestampPacketContainerWhenMovedThenMov } TEST_F(TimestampPacketSimpleTests, whenIsCompletedIsCalledThenItReturnsProperTimestampPacketStatus) { - TimestampPacketStorage timestampPacketStorage; + MockTimestampPacketStorage timestampPacketStorage; auto &packet = timestampPacketStorage.packets[0]; timestampPacketStorage.initialize(); @@ -278,29 +284,29 @@ TEST_F(TimestampPacketSimpleTests, whenIsCompletedIsCalledThenItReturnsProperTim } TEST_F(TimestampPacketSimpleTests, givenMultiplePacketsInUseWhenCompletionIsCheckedThenVerifyAllUsedNodes) { - TimestampPacketStorage timestampPacketStorage; + MockTimestampPacketStorage timestampPacketStorage; auto &packets = timestampPacketStorage.packets; timestampPacketStorage.initialize(); - timestampPacketStorage.packetsUsed = TimestampPacketSizeControl::preferredPacketCount - 1; + timestampPacketStorage.setPacketsUsed(TimestampPacketSizeControl::preferredPacketCount - 1); - for (uint32_t i = 0; i < timestampPacketStorage.packetsUsed - 1; i++) { + for (uint32_t i = 0; i < timestampPacketStorage.getPacketsUsed() - 1; i++) { packets[i].contextEnd = 0; packets[i].globalEnd = 0; EXPECT_FALSE(timestampPacketStorage.isCompleted()); } - packets[timestampPacketStorage.packetsUsed - 1].contextEnd = 0; + packets[timestampPacketStorage.getPacketsUsed() - 1].contextEnd = 0; EXPECT_FALSE(timestampPacketStorage.isCompleted()); - packets[timestampPacketStorage.packetsUsed - 1].globalEnd = 0; + packets[timestampPacketStorage.getPacketsUsed() - 1].globalEnd = 0; EXPECT_TRUE(timestampPacketStorage.isCompleted()); } TEST_F(TimestampPacketSimpleTests, whenNewTagIsTakenThenReinitialize) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(executionEnvironment); - MockTagAllocator allocator(0, &memoryManager, 1); + MockTagAllocator allocator(0, &memoryManager, 1); auto firstNode = allocator.getTag(); auto i = 0u; @@ -328,11 +334,11 @@ TEST_F(TimestampPacketSimpleTests, whenNewTagIsTakenThenReinitialize) { EXPECT_EQ(1u, packet.contextEnd); EXPECT_EQ(1u, packet.globalEnd); } - EXPECT_EQ(1u, firstNode->tagForCpuAccess->packetsUsed); + EXPECT_EQ(1u, firstNode->tagForCpuAccess->getPacketsUsed()); } TEST_F(TimestampPacketSimpleTests, whenObjectIsCreatedThenInitializeAllStamps) { - TimestampPacketStorage timestampPacketStorage; + MockTimestampPacketStorage timestampPacketStorage; EXPECT_EQ(TimestampPacketSizeControl::preferredPacketCount * sizeof(timestampPacketStorage.packets[0]), sizeof(timestampPacketStorage.packets)); for (const auto &packet : timestampPacketStorage.packets) { @@ -341,7 +347,7 @@ TEST_F(TimestampPacketSimpleTests, whenObjectIsCreatedThenInitializeAllStamps) { EXPECT_EQ(1u, packet.contextEnd); EXPECT_EQ(1u, packet.globalEnd); } - EXPECT_EQ(1u, timestampPacketStorage.packetsUsed); + EXPECT_EQ(1u, timestampPacketStorage.getPacketsUsed()); } HWTEST_F(TimestampPacketTests, givenCommandStreamReceiverHwWhenObtainingPreferredTagPoolSizeThenReturnCorrectValue) { @@ -361,12 +367,8 @@ HWTEST_F(TimestampPacketTests, givenDebugFlagSetWhenCreatingTimestampPacketAlloc EXPECT_EQ(1u, csr.getPreferredTagPoolSize()); auto tag = csr.getTimestampPacketAllocator()->getTag(); - for (auto &packet : tag->tagForCpuAccess->packets) { - packet.contextStart = 0; - packet.globalStart = 0; - packet.contextEnd = 0; - packet.globalEnd = 0; - } + setTagToReadyState(tag); + EXPECT_TRUE(tag->tagForCpuAccess->isCompleted()); EXPECT_FALSE(tag->canBeReleased()); } @@ -556,13 +558,13 @@ HWTEST_F(TimestampPacketTests, whenEstimatingSizeForNodeDependencyThenReturnCorr size_t sizeForNodeDependency = 0; sizeForNodeDependency += TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependency(mockNode); - size_t expectedSize = mockNode.tagForCpuAccess->packetsUsed * sizeof(typename FamilyType::MI_SEMAPHORE_WAIT) + sizeof(typename FamilyType::MI_ATOMIC); + size_t expectedSize = mockNode.tagForCpuAccess->getPacketsUsed() * sizeof(typename FamilyType::MI_SEMAPHORE_WAIT) + sizeof(typename FamilyType::MI_ATOMIC); EXPECT_EQ(expectedSize, sizeForNodeDependency); } HWTEST_F(TimestampPacketTests, givenTagNodeWhenCpuAndGpuDependenciesCountAreEqualThenCanBeReleased) { - TimestampPacketStorage tag; + MockTimestampPacketStorage tag; MockTagNode mockNode; mockNode.tagForCpuAccess = &tag; mockNode.gpuAddress = 0x1230000; diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index fc1ada99a7..24ea505c51 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -2183,8 +2183,12 @@ HWTEST_F(KernelResidencyTest, givenEnableFullKernelTuningWhenPerformTunningThenK EXPECT_EQ(result->second.status, MockKernel::TunningStatus::SUBDEVICE_TUNNING_IN_PROGRESS); EXPECT_FALSE(mockKernel.mockKernel->singleSubdevicePreferedInCurrentEnqueue); - container.getNode(0u)->tagForCpuAccess->packets->globalEnd = 2u; - container.getNode(0u)->tagForCpuAccess->packets->contextEnd = 2u; + uint32_t data[4] = {static_cast(container.getNode(0u)->tagForCpuAccess->getContextStartValue(0)), + static_cast(container.getNode(0u)->tagForCpuAccess->getGlobalStartValue(0)), + 2, 2}; + + container.getNode(0u)->tagForCpuAccess->assignDataToAllTimestamps(0, data); + mockKernel.mockKernel->performKernelTunning(commandStreamReceiver, lws, gws, offsets, &container); result = mockKernel.mockKernel->kernelSubmissionMap.find(config); @@ -2192,8 +2196,13 @@ HWTEST_F(KernelResidencyTest, givenEnableFullKernelTuningWhenPerformTunningThenK EXPECT_EQ(result->second.status, MockKernel::TunningStatus::SUBDEVICE_TUNNING_IN_PROGRESS); EXPECT_FALSE(mockKernel.mockKernel->singleSubdevicePreferedInCurrentEnqueue); - subdeviceContainer.getNode(0u)->tagForCpuAccess->packets->globalEnd = 2u; - subdeviceContainer.getNode(0u)->tagForCpuAccess->packets->contextEnd = 2u; + data[0] = static_cast(subdeviceContainer.getNode(0u)->tagForCpuAccess->getContextStartValue(0)); + data[1] = static_cast(subdeviceContainer.getNode(0u)->tagForCpuAccess->getGlobalStartValue(0)); + data[2] = 2; + data[3] = 2; + + subdeviceContainer.getNode(0u)->tagForCpuAccess->assignDataToAllTimestamps(0, data); + mockKernel.mockKernel->performKernelTunning(commandStreamReceiver, lws, gws, offsets, &container); result = mockKernel.mockKernel->kernelSubmissionMap.find(config); @@ -2203,8 +2212,13 @@ HWTEST_F(KernelResidencyTest, givenEnableFullKernelTuningWhenPerformTunningThenK EXPECT_EQ(result->second.status, MockKernel::TunningStatus::SUBDEVICE_TUNNING_IN_PROGRESS); EXPECT_FALSE(mockKernel.mockKernel->singleSubdevicePreferedInCurrentEnqueue); - subdeviceContainer.getNode(1u)->tagForCpuAccess->packets->globalEnd = 2u; - subdeviceContainer.getNode(1u)->tagForCpuAccess->packets->contextEnd = 2u; + data[0] = static_cast(subdeviceContainer.getNode(1u)->tagForCpuAccess->getContextStartValue(0)); + data[1] = static_cast(subdeviceContainer.getNode(1u)->tagForCpuAccess->getGlobalStartValue(0)); + data[2] = 2; + data[3] = 2; + + subdeviceContainer.getNode(1u)->tagForCpuAccess->assignDataToAllTimestamps(0, data); + mockKernel.mockKernel->performKernelTunning(commandStreamReceiver, lws, gws, offsets, &container); result = mockKernel.mockKernel->kernelSubmissionMap.find(config); diff --git a/opencl/test/unit_test/profiling/profiling_tests.cpp b/opencl/test/unit_test/profiling/profiling_tests.cpp index 4678bc7c4b..4ed3e958c7 100644 --- a/opencl/test/unit_test/profiling/profiling_tests.cpp +++ b/opencl/test/unit_test/profiling/profiling_tests.cpp @@ -1078,36 +1078,33 @@ struct ProfilingTimestampPacketsTest : public ::testing::Test { ev->timestampPacketContainer = std::make_unique(); } - void addTimestampNode(int contextStart, int contextEnd, int globalStart, int globalEnd) { + void addTimestampNode(uint32_t contextStart, uint32_t contextEnd, uint32_t globalStart, uint32_t globalEnd) { auto node = new MockTagNode(); auto timestampPacketStorage = new TimestampPacketStorage(); node->tagForCpuAccess = timestampPacketStorage; - timestampPacketStorage->packets[0].contextStart = contextStart; - timestampPacketStorage->packets[0].contextEnd = contextEnd; - timestampPacketStorage->packets[0].globalStart = globalStart; - timestampPacketStorage->packets[0].globalEnd = globalEnd; + uint32_t values[4] = {contextStart, globalStart, contextEnd, globalEnd}; + timestampPacketStorage->assignDataToAllTimestamps(0, values); ev->timestampPacketContainer->add(node); } - void addTimestampNodeMultiOsContext(int globalStart[16], int globalEnd[16], int contextStart[16], int contextEnd[16], uint32_t size) { + void addTimestampNodeMultiOsContext(uint32_t globalStart[16], uint32_t globalEnd[16], uint32_t contextStart[16], uint32_t contextEnd[16], uint32_t size) { auto node = new MockTagNode(); auto timestampPacketStorage = new TimestampPacketStorage(); - timestampPacketStorage->packetsUsed = size; + timestampPacketStorage->setPacketsUsed(size); - for (uint32_t i = 0u; i < timestampPacketStorage->packetsUsed; ++i) { - timestampPacketStorage->packets[i].globalStart = globalStart[i]; - timestampPacketStorage->packets[i].globalEnd = globalEnd[i]; - timestampPacketStorage->packets[i].contextStart = contextStart[i]; - timestampPacketStorage->packets[i].contextEnd = contextEnd[i]; + for (uint32_t i = 0u; i < timestampPacketStorage->getPacketsUsed(); ++i) { + uint32_t values[4] = {contextStart[i], globalStart[i], contextEnd[i], globalEnd[i]}; + + timestampPacketStorage->assignDataToAllTimestamps(i, values); } node->tagForCpuAccess = timestampPacketStorage; ev->timestampPacketContainer->add(node); } - void initTimestampNodeMultiOsContextData(int globalStart[16], int globalEnd[16], uint32_t size) { + void initTimestampNodeMultiOsContextData(uint32_t globalStart[16], uint32_t globalEnd[16], uint32_t size) { for (uint32_t i = 0u; i < size; ++i) { globalStart[i] = 100; @@ -1149,10 +1146,10 @@ TEST_F(ProfilingTimestampPacketsTest, givenTimestampsPacketContainerWithOneEleme } TEST_F(ProfilingTimestampPacketsTest, givenMultiOsContextCapableSetToTrueWhenCalcProfilingDataIsCalledThenCorrectedValuesAreReturned) { - int globalStart[16] = {0}; - int globalEnd[16] = {0}; - int contextStart[16] = {0}; - int contextEnd[16] = {0}; + uint32_t globalStart[16] = {0}; + uint32_t globalEnd[16] = {0}; + uint32_t contextStart[16] = {0}; + uint32_t contextEnd[16] = {0}; initTimestampNodeMultiOsContextData(globalStart, globalEnd, 16u); addTimestampNodeMultiOsContext(globalStart, globalEnd, contextStart, contextEnd, 16u); auto &device = reinterpret_cast(cmdQ->getDevice()); @@ -1165,15 +1162,15 @@ TEST_F(ProfilingTimestampPacketsTest, givenMultiOsContextCapableSetToTrueWhenCal } TEST_F(ProfilingTimestampPacketsTest, givenTimestampPacketWithoutProfilingDataWhenCalculatingThenDontUseThatPacket) { - int globalStart0 = 20; - int globalEnd0 = 51; - int contextStart0 = 21; - int contextEnd0 = 50; + uint32_t globalStart0 = 20; + uint32_t globalEnd0 = 51; + uint32_t contextStart0 = 21; + uint32_t contextEnd0 = 50; - int globalStart1 = globalStart0 - 1; - int globalEnd1 = globalEnd0 + 1; - int contextStart1 = contextStart0 - 1; - int contextEnd1 = contextEnd0 + 1; + uint32_t globalStart1 = globalStart0 - 1; + uint32_t globalEnd1 = globalEnd0 + 1; + uint32_t contextStart1 = contextStart0 - 1; + uint32_t contextEnd1 = contextEnd0 + 1; addTimestampNodeMultiOsContext(&globalStart0, &globalEnd0, &contextStart0, &contextEnd0, 1); addTimestampNodeMultiOsContext(&globalStart1, &globalEnd1, &contextStart1, &contextEnd1, 1); @@ -1197,10 +1194,10 @@ TEST_F(ProfilingTimestampPacketsTest, givenPrintTimestampPacketContentsSetWhenCa auto &csr = device.getUltCommandStreamReceiver(); csr.multiOsContextCapable = true; - int globalStart[16] = {0}; - int globalEnd[16] = {0}; - int contextStart[16] = {0}; - int contextEnd[16] = {0}; + uint32_t globalStart[16] = {0}; + uint32_t globalEnd[16] = {0}; + uint32_t contextStart[16] = {0}; + uint32_t contextEnd[16] = {0}; for (int i = 0; i < 16; i++) { globalStart[i] = 2 * i; globalEnd[i] = 500 * i; diff --git a/shared/source/command_stream/command_stream_receiver.h b/shared/source/command_stream/command_stream_receiver.h index ed1773d8f8..b4c9c0ffa7 100644 --- a/shared/source/command_stream/command_stream_receiver.h +++ b/shared/source/command_stream/command_stream_receiver.h @@ -47,7 +47,7 @@ struct HwPerfCounter; struct HwTimeStamps; template -struct TimestampPackets; +class TimestampPackets; template class TagAllocator; diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index a9363bf6ac..1e2006edd6 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -1006,8 +1006,8 @@ uint32_t CommandStreamReceiverHw::blitBuffer(const BlitPropertiesCont TimestampPacketHelper::programCsrDependencies(commandStream, blitProperties.csrDependencies, getOsContext().getNumSupportedDevices()); if (blitProperties.outputTimestampPacket && profilingEnabled) { - auto timestampContextStartGpuAddress = blitProperties.outputTimestampPacket->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextStart); - auto timestampGlobalStartAddress = blitProperties.outputTimestampPacket->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].globalStart); + auto timestampContextStartGpuAddress = TimestampPacketHelper::getContextStartGpuAddress(*blitProperties.outputTimestampPacket); + auto timestampGlobalStartAddress = TimestampPacketHelper::getGlobalStartGpuAddress(*blitProperties.outputTimestampPacket); EncodeStoreMMIO::encode(commandStream, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, timestampContextStartGpuAddress); EncodeStoreMMIO::encode(commandStream, REG_GLOBAL_TIMESTAMP_LDW, timestampGlobalStartAddress); @@ -1017,8 +1017,8 @@ uint32_t CommandStreamReceiverHw::blitBuffer(const BlitPropertiesCont if (blitProperties.outputTimestampPacket) { if (profilingEnabled) { - auto timestampContextEndGpuAddress = blitProperties.outputTimestampPacket->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd); - auto timestampGlobalEndAddress = blitProperties.outputTimestampPacket->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].globalEnd); + auto timestampContextEndGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(*blitProperties.outputTimestampPacket); + auto timestampGlobalEndAddress = TimestampPacketHelper::getGlobalEndGpuAddress(*blitProperties.outputTimestampPacket); EncodeMiFlushDW::programMiFlushDw(commandStream, 0llu, newTaskCount, false, false); diff --git a/shared/source/helpers/blit_commands_helper.h b/shared/source/helpers/blit_commands_helper.h index 4b93f1da98..617e21ec6b 100644 --- a/shared/source/helpers/blit_commands_helper.h +++ b/shared/source/helpers/blit_commands_helper.h @@ -29,7 +29,7 @@ template struct TagNode; template -struct TimestampPackets; +class TimestampPackets; struct BlitProperties; struct HardwareInfo; diff --git a/shared/source/helpers/timestamp_packet.h b/shared/source/helpers/timestamp_packet.h index 07e37f03ae..724af11825 100644 --- a/shared/source/helpers/timestamp_packet.h +++ b/shared/source/helpers/timestamp_packet.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -12,6 +12,7 @@ #include "shared/source/helpers/aux_translation.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/non_copyable_or_moveable.h" +#include "shared/source/helpers/string.h" #include "shared/source/utilities/tag_allocator.h" #include "pipe_control_args.h" @@ -30,7 +31,8 @@ constexpr uint32_t preferredPacketCount = 16u; #pragma pack(1) template -struct TimestampPackets { +class TimestampPackets { + public: struct Packet { TSize contextStart = 1u; TSize globalStart = 1u; @@ -67,8 +69,27 @@ struct TimestampPackets { implicitGpuDependenciesCount = 0; } + void assignDataToAllTimestamps(uint32_t packetIndex, void *source) { + memcpy_s(&packets[packetIndex], sizeof(Packet), source, sizeof(Packet)); + } + + size_t getGlobalStartOffset() const { return ptrDiff(&packets[0].globalStart, this); } + size_t getContextStartOffset() const { return ptrDiff(&packets[0].contextStart, this); } + size_t getContextEndOffset() const { return ptrDiff(&packets[0].contextEnd, this); } + size_t getGlobalEndOffset() const { return ptrDiff(&packets[0].globalEnd, this); } + size_t getImplicitGpuDependenciesCountOffset() const { return ptrDiff(&implicitGpuDependenciesCount, this); } + + uint64_t getContextStartValue(uint32_t packetIndex) const { return static_cast(packets[packetIndex].contextStart); } + uint64_t getGlobalStartValue(uint32_t packetIndex) const { return static_cast(packets[packetIndex].globalStart); } + uint64_t getContextEndValue(uint32_t packetIndex) const { return static_cast(packets[packetIndex].contextEnd); } + uint64_t getGlobalEndValue(uint32_t packetIndex) const { return static_cast(packets[packetIndex].globalEnd); } + + void setPacketsUsed(uint32_t used) { packetsUsed = used; } + uint32_t getPacketsUsed() const { return packetsUsed; } + uint32_t getImplicitGpuDependenciesCount() const { return implicitGpuDependenciesCount; } + protected: Packet packets[TimestampPacketSizeControl::preferredPacketCount]; uint32_t implicitGpuDependenciesCount = 0; uint32_t packetsUsed = 1; @@ -110,11 +131,20 @@ struct TimestampPacketDependencies : public NonCopyableClass { struct TimestampPacketHelper { static uint64_t getContextEndGpuAddress(const TagNode ×tampPacketNode) { - return timestampPacketNode.getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd); + return timestampPacketNode.getGpuAddress() + timestampPacketNode.tagForCpuAccess->getContextEndOffset(); + } + static uint64_t getContextStartGpuAddress(const TagNode ×tampPacketNode) { + return timestampPacketNode.getGpuAddress() + timestampPacketNode.tagForCpuAccess->getContextStartOffset(); + } + static uint64_t getGlobalEndGpuAddress(const TagNode ×tampPacketNode) { + return timestampPacketNode.getGpuAddress() + timestampPacketNode.tagForCpuAccess->getGlobalEndOffset(); + } + static uint64_t getGlobalStartGpuAddress(const TagNode ×tampPacketNode) { + return timestampPacketNode.getGpuAddress() + timestampPacketNode.tagForCpuAccess->getGlobalStartOffset(); } static uint64_t getGpuDependenciesCountGpuAddress(const TagNode ×tampPacketNode) { - return timestampPacketNode.getGpuAddress() + offsetof(TimestampPacketStorage, implicitGpuDependenciesCount); + return timestampPacketNode.getGpuAddress() + timestampPacketNode.tagForCpuAccess->getImplicitGpuDependenciesCountOffset(); } static void overrideSupportedDevicesCount(uint32_t &numSupportedDevices); @@ -128,7 +158,7 @@ struct TimestampPacketHelper { auto compareAddress = getContextEndGpuAddress(timestampPacketNode); auto dependenciesCountAddress = getGpuDependenciesCountGpuAddress(timestampPacketNode); - for (uint32_t packetId = 0; packetId < timestampPacketNode.tagForCpuAccess->packetsUsed; packetId++) { + for (uint32_t packetId = 0; packetId < timestampPacketNode.tagForCpuAccess->getPacketsUsed(); packetId++) { uint64_t compareOffset = packetId * sizeof(TimestampPacketStorage::Packet); EncodeSempahore::addMiSemaphoreWaitCommand(cmdStream, compareAddress + compareOffset, 1, COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD); } @@ -202,7 +232,7 @@ struct TimestampPacketHelper { template static size_t getRequiredCmdStreamSizeForNodeDependency(TagNode ×tampPacketNode) { - size_t totalMiSemaphoreWaitSize = timestampPacketNode.tagForCpuAccess->packetsUsed * sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT); + size_t totalMiSemaphoreWaitSize = timestampPacketNode.tagForCpuAccess->getPacketsUsed() * sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT); return totalMiSemaphoreWaitSize + sizeof(typename GfxFamily::MI_ATOMIC); }