From f2bbd63d373593b948f1a37f939cab3980f99b0f Mon Sep 17 00:00:00 2001 From: Bartosz Dunajski Date: Tue, 28 Jun 2022 18:52:33 +0000 Subject: [PATCH] Refactor SBA handling + fix unit tests Signed-off-by: Bartosz Dunajski --- .../core/source/cmdqueue/cmdqueue_hw_base.inl | 4 +-- .../cmdqueue_xe_hp_core_and_later.inl | 9 ++--- .../sources/cmdlist/test_cmdlist_4.cpp | 10 ++++-- .../sources/cmdlist/test_cmdlist_5.cpp | 29 ++++++++++++--- .../sources/cmdlist/test_cmdlist_7.cpp | 15 ++++---- .../cmdlist/test_cmdlist_append_memory.cpp | 15 ++++---- .../test_cmdlist_append_wait_on_events.cpp | 20 +++++++---- .../enqueue_copy_buffer_to_image_tests.cpp | 18 +++++----- .../enqueue_copy_image_tests.cpp | 24 ++++++------- .../enqueue_copy_image_to_buffer_tests.cpp | 18 +++++----- .../enqueue_fill_image_tests.cpp | 16 ++++----- .../command_queue/enqueue_handler_tests.cpp | 4 +-- .../enqueue_read_buffer_rect_tests.cpp | 4 +-- .../enqueue_read_image_tests.cpp | 18 +++++----- .../enqueue_write_buffer_rect_tests.cpp | 6 ++-- .../enqueue_write_image_tests.cpp | 18 +++++----- .../unit_test/profiling/profiling_tests.cpp | 36 ------------------- ...d_write_buffer_scenarios_windows_tests.cpp | 4 +-- .../command_stream_receiver_hw_base.inl | 31 ++++++++-------- shared/source/helpers/state_base_address.h | 2 ++ .../helpers/state_base_address_base.inl | 5 +++ shared/test/common/cmd_parse/hw_parse.h | 23 +----------- shared/test/common/cmd_parse/hw_parse.inl | 26 +++++++++++++- shared/test/common/gen11/cmd_parse_gen11.cpp | 3 +- .../test/common/gen12lp/cmd_parse_gen12lp.cpp | 1 + shared/test/common/gen8/cmd_parse_gen8.cpp | 3 +- shared/test/common/gen9/cmd_parse_gen9.cpp | 3 +- shared/test/common/helpers/unit_test_helper.h | 2 ++ .../test/common/helpers/unit_test_helper.inl | 20 +++++++++++ .../xe_hp_core/cmd_parse_xe_hp_core.cpp | 4 ++- .../xe_hpc_core/cmd_parse_xe_hpc_core.cpp | 4 ++- .../xe_hpg_core/cmd_parse_xe_hpg_core.cpp | 4 ++- 32 files changed, 222 insertions(+), 177 deletions(-) diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw_base.inl b/level_zero/core/source/cmdqueue/cmdqueue_hw_base.inl index 981edd14c2..d1342ba0a6 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw_base.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw_base.inl @@ -40,7 +40,7 @@ void CommandQueueHw::programStateBaseAddress(uint64_t gsba, bool NEO::EncodeWA::addPipeControlBeforeStateBaseAddress(commandStream, hwInfo, isRcs); NEO::EncodeWA::encodeAdditionalPipelineSelect(commandStream, {}, true, hwInfo, isRcs); - auto pSbaCmd = static_cast(commandStream.getSpace(sizeof(STATE_BASE_ADDRESS))); + auto sbaCmdBuf = static_cast(NEO::StateBaseAddressHelper::getSpaceForSbaCmd(commandStream)); STATE_BASE_ADDRESS sbaCmd; bool useGlobalSshAndDsh = NEO::ApiSpecificConfig::getBindlessConfiguration(); @@ -69,7 +69,7 @@ void CommandQueueHw::programStateBaseAddress(uint64_t gsba, bool NEO::MemoryCompressionState::NotApplicable, false, 1u); - *pSbaCmd = sbaCmd; + *sbaCmdBuf = sbaCmd; csr->setGSBAStateDirty(false); if (NEO::Debugger::isDebugEnabled(internalUsage) && device->getL0Debugger()) { diff --git a/level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_and_later.inl b/level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_and_later.inl index cae30d7a1a..fa1c8fa807 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_and_later.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_and_later.inl @@ -31,7 +31,8 @@ void CommandQueueHw::programStateBaseAddress(uint64_t gsba, bool bool isRcs = this->getCsr()->isRcs(); NEO::EncodeWA::addPipeControlBeforeStateBaseAddress(commandStream, hwInfo, isRcs); - auto pSbaCmd = static_cast(commandStream.getSpace(sizeof(STATE_BASE_ADDRESS))); + auto sbaCmdBuf = static_cast(NEO::StateBaseAddressHelper::getSpaceForSbaCmd(commandStream)); + STATE_BASE_ADDRESS sbaCmd; bool multiOsContextCapable = device->isImplicitScalingCapable(); NEO::StateBaseAddressHelper::programStateBaseAddress(&sbaCmd, @@ -51,12 +52,12 @@ void CommandQueueHw::programStateBaseAddress(uint64_t gsba, bool NEO::MemoryCompressionState::NotApplicable, false, 1u); - *pSbaCmd = sbaCmd; + *sbaCmdBuf = sbaCmd; auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily); if (hwInfoConfig.isAdditionalStateBaseAddressWARequired(hwInfo)) { - pSbaCmd = static_cast(commandStream.getSpace(sizeof(STATE_BASE_ADDRESS))); - *pSbaCmd = sbaCmd; + sbaCmdBuf = static_cast(commandStream.getSpace(sizeof(STATE_BASE_ADDRESS))); + *sbaCmdBuf = sbaCmd; } if (NEO::Debugger::isDebugEnabled(internalUsage) && device->getL0Debugger()) { diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_4.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_4.cpp index 95027c648d..fc6c39aa0f 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_4.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_4.cpp @@ -861,8 +861,14 @@ HWTEST2_F(HostPointerManagerCommandListTest, givenCommandListWhenMemoryFillWithS ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); - auto itor = find(cmdList.begin(), cmdList.end()); - EXPECT_NE(cmdList.end(), itor); + auto pc = genCmdCast(*cmdList.rbegin()); + + if (NEO::MemorySynchronizationCommands::getDcFlushEnable(true, device->getHwInfo())) { + EXPECT_NE(nullptr, pc); + EXPECT_TRUE(pc->getDcFlushEnable()); + } else { + EXPECT_EQ(nullptr, pc); + } } using SupportedPlatformsSklIcllp = IsWithinProducts; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp index 3bc0442975..82993402f6 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp @@ -594,6 +594,8 @@ HWTEST_F(CommandListCreate, givenCommandListWithCopyOnlyWhenAppendWaitEventsWith HWTEST_F(CommandListCreate, givenCommandListyWhenAppendWaitEventsWithDcFlushThenPipeControlIsProgrammed) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + using SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); auto &commandContainer = commandList->commandContainer; @@ -605,9 +607,19 @@ HWTEST_F(CommandListCreate, givenCommandListyWhenAppendWaitEventsWithDcFlushThen GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); - auto itor = find(cmdList.begin(), cmdList.end()); + auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); + + if (NEO::MemorySynchronizationCommands::getDcFlushEnable(true, device->getHwInfo())) { + itor--; + EXPECT_NE(nullptr, genCmdCast(*itor)); + } else { + if (cmdList.begin() != itor) { + itor--; + EXPECT_EQ(nullptr, genCmdCast(*itor)); + } + } } HWTEST_F(CommandListCreate, givenCommandListWhenAppendWaitEventsWithDcFlushThenPipeControlIsProgrammedOnlyOnce) { @@ -627,11 +639,18 @@ HWTEST_F(CommandListCreate, givenCommandListWhenAppendWaitEventsWithDcFlushThenP ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); - auto itor = find(cmdList.begin(), cmdList.end()); + auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); - itor++; - auto itor2 = find(itor, cmdList.end()); - EXPECT_NE(cmdList.end(), itor2); + + if (NEO::MemorySynchronizationCommands::getDcFlushEnable(true, device->getHwInfo())) { + itor--; + EXPECT_NE(nullptr, genCmdCast(*itor)); + } else { + if (cmdList.begin() != itor) { + itor--; + EXPECT_EQ(nullptr, genCmdCast(*itor)); + } + } } HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndImmediateCommandListWhenAppendWaitEventsWithHostScopeThenPipeControlAndSemWaitAreAddedFromCommandList) { diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp index 68ebc3c25d..e0ac071544 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp @@ -452,14 +452,15 @@ HWTEST2_F(CommandListCreate, givenCommandListAndHostPointersWhenMemoryCopyCalled GenCmdList genCmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( genCmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); - auto itor = find(genCmdList.begin(), genCmdList.end()); - ASSERT_NE(genCmdList.end(), itor); - PIPE_CONTROL *cmd = nullptr; - while (itor != genCmdList.end()) { - cmd = genCmdCast(*itor); - itor = find(++itor, genCmdList.end()); + + auto pc = genCmdCast(*genCmdList.rbegin()); + + if (NEO::MemorySynchronizationCommands::getDcFlushEnable(true, device->getHwInfo())) { + EXPECT_NE(nullptr, pc); + EXPECT_TRUE(pc->getDcFlushEnable()); + } else { + EXPECT_EQ(nullptr, pc); } - EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), cmd->getDcFlushEnable()); // NOLINT(clang-analyzer-core.CallAndMessage) } } // namespace ult diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_memory.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_memory.cpp index 0c76495db0..89fc185fdb 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_memory.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_memory.cpp @@ -106,14 +106,15 @@ HWTEST2_F(AppendMemoryCopy, givenCommandListAndHostPointersWhenMemoryCopyRegionC GenCmdList genCmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( genCmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); - auto itor = find(genCmdList.begin(), genCmdList.end()); - ASSERT_NE(genCmdList.end(), itor); - PIPE_CONTROL *cmd = nullptr; - while (itor != genCmdList.end()) { - cmd = genCmdCast(*itor); - itor = find(++itor, genCmdList.end()); + + auto pc = genCmdCast(*genCmdList.rbegin()); + + if (NEO::MemorySynchronizationCommands::getDcFlushEnable(true, device->getHwInfo())) { + EXPECT_NE(nullptr, pc); + EXPECT_TRUE(pc->getDcFlushEnable()); + } else { + EXPECT_EQ(nullptr, pc); } - EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), cmd->getDcFlushEnable()); // NOLINT(clang-analyzer-core.CallAndMessage) } HWTEST2_F(AppendMemoryCopy, givenImmediateCommandListWhenAppendingMemoryCopyThenSuccessIsReturned, IsAtLeastSkl) { diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp index bdb20d0775..04242e4793 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp @@ -112,6 +112,7 @@ HWTEST_F(CommandListAppendWaitOnEvent, WhenAppendingWaitOnEventsThenEventGraphic HWTEST_F(CommandListAppendWaitOnEvent, givenEventWithWaitScopeFlagDeviceWhenAppendingWaitOnEventThenPCWithDcFlushIsGenerated) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed(); const ze_event_desc_t eventDesc = { @@ -134,14 +135,21 @@ HWTEST_F(CommandListAppendWaitOnEvent, givenEventWithWaitScopeFlagDeviceWhenAppe ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); - auto itorPC = findAll(cmdList.begin(), cmdList.end()).back(); - ASSERT_NE(cmdList.end(), itorPC); - { - auto cmd = genCmdCast(*itorPC); - ASSERT_NE(cmd, nullptr); + auto itor = find(cmdList.begin(), cmdList.end()); + EXPECT_NE(cmdList.end(), itor); + if (NEO::MemorySynchronizationCommands::getDcFlushEnable(true, device->getHwInfo())) { + itor--; + auto cmd = genCmdCast(*itor); + + ASSERT_NE(nullptr, cmd); EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); - EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), cmd->getDcFlushEnable()); + EXPECT_TRUE(cmd->getDcFlushEnable()); + } else { + if (cmdList.begin() != itor) { + itor--; + EXPECT_EQ(nullptr, genCmdCast(*itor)); + } } } diff --git a/opencl/test/unit_test/command_queue/enqueue_copy_buffer_to_image_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_copy_buffer_to_image_tests.cpp index f96136c350..417c3e0dd7 100644 --- a/opencl/test/unit_test/command_queue/enqueue_copy_buffer_to_image_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_copy_buffer_to_image_tests.cpp @@ -175,14 +175,14 @@ HWTEST_F(EnqueueCopyBufferToImageTest, WhenCopyingBufferToImageThenSurfaceStateI const auto &kernelInfo = mockCmdQ->storedMultiDispatchInfo.begin()->getKernel()->getKernelInfo(); uint32_t index = static_cast(kernelInfo.getArgDescriptorAt(1).template as().bindful) / sizeof(RENDER_SURFACE_STATE); - const auto &surfaceState = getSurfaceState(&pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), index); + const auto surfaceState = getSurfaceState(&pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), index); const auto &imageDesc = dstImage->getImageDesc(); // EnqueueReadImage uses multi-byte copies depending on per-pixel-size-in-bytes - EXPECT_EQ(imageDesc.image_width, surfaceState.getWidth()); - EXPECT_EQ(imageDesc.image_height, surfaceState.getHeight()); - EXPECT_NE(0u, surfaceState.getSurfacePitch()); - EXPECT_NE(0u, surfaceState.getSurfaceType()); - auto surfaceFormat = surfaceState.getSurfaceFormat(); + EXPECT_EQ(imageDesc.image_width, surfaceState->getWidth()); + EXPECT_EQ(imageDesc.image_height, surfaceState->getHeight()); + EXPECT_NE(0u, surfaceState->getSurfacePitch()); + EXPECT_NE(0u, surfaceState->getSurfaceType()); + auto surfaceFormat = surfaceState->getSurfaceFormat(); bool isRedescribedFormat = surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R32G32B32A32_UINT || surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R32G32_UINT || @@ -190,9 +190,9 @@ HWTEST_F(EnqueueCopyBufferToImageTest, WhenCopyingBufferToImageThenSurfaceStateI surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R16_UINT || surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R8_UINT; EXPECT_TRUE(isRedescribedFormat); - EXPECT_EQ(MockGmmResourceInfo::getHAlignSurfaceStateResult, surfaceState.getSurfaceHorizontalAlignment()); - EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4, surfaceState.getSurfaceVerticalAlignment()); - EXPECT_EQ(dstImage->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress(), surfaceState.getSurfaceBaseAddress()); + EXPECT_EQ(MockGmmResourceInfo::getHAlignSurfaceStateResult, surfaceState->getSurfaceHorizontalAlignment()); + EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4, surfaceState->getSurfaceVerticalAlignment()); + EXPECT_EQ(dstImage->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress(), surfaceState->getSurfaceBaseAddress()); } HWTEST2_F(EnqueueCopyBufferToImageTest, WhenCopyingBufferToImageThenNumberOfPipelineSelectsIsOne, IsAtMostXeHpcCore) { diff --git a/opencl/test/unit_test/command_queue/enqueue_copy_image_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_copy_image_tests.cpp index 6b1bcdc489..fed94163ab 100644 --- a/opencl/test/unit_test/command_queue/enqueue_copy_image_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_copy_image_tests.cpp @@ -193,13 +193,13 @@ HWTEST_F(EnqueueCopyImageTest, WhenCopyingImageThenSurfaceStateIsCorrect) { const auto &kernelInfo = mockCmdQ->storedMultiDispatchInfo.begin()->getKernel()->getKernelInfo(); for (uint32_t i = 0; i < 2; ++i) { uint32_t index = static_cast(kernelInfo.getArgDescriptorAt(i).template as().bindful) / sizeof(RENDER_SURFACE_STATE); - const auto &surfaceState = getSurfaceState(&pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), index); + const auto surfaceState = getSurfaceState(&pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), index); const auto &imageDesc = dstImage->getImageDesc(); - EXPECT_EQ(imageDesc.image_width, surfaceState.getWidth()); - EXPECT_EQ(imageDesc.image_height, surfaceState.getHeight()); - EXPECT_NE(0u, surfaceState.getSurfacePitch()); - EXPECT_NE(0u, surfaceState.getSurfaceType()); - auto surfaceFormat = surfaceState.getSurfaceFormat(); + EXPECT_EQ(imageDesc.image_width, surfaceState->getWidth()); + EXPECT_EQ(imageDesc.image_height, surfaceState->getHeight()); + EXPECT_NE(0u, surfaceState->getSurfacePitch()); + EXPECT_NE(0u, surfaceState->getSurfaceType()); + auto surfaceFormat = surfaceState->getSurfaceFormat(); bool isRedescribedFormat = surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R32G32B32A32_UINT || surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R32G32_UINT || @@ -207,17 +207,17 @@ HWTEST_F(EnqueueCopyImageTest, WhenCopyingImageThenSurfaceStateIsCorrect) { surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R16_UINT || surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R8_UINT; EXPECT_TRUE(isRedescribedFormat); - EXPECT_EQ(MockGmmResourceInfo::getHAlignSurfaceStateResult, surfaceState.getSurfaceHorizontalAlignment()); - EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4, surfaceState.getSurfaceVerticalAlignment()); + EXPECT_EQ(MockGmmResourceInfo::getHAlignSurfaceStateResult, surfaceState->getSurfaceHorizontalAlignment()); + EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4, surfaceState->getSurfaceVerticalAlignment()); } uint32_t srcIndex = static_cast(kernelInfo.getArgDescriptorAt(0).template as().bindful) / sizeof(RENDER_SURFACE_STATE); - const auto &srcSurfaceState = getSurfaceState(&pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), srcIndex); - EXPECT_EQ(srcImage->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress(), srcSurfaceState.getSurfaceBaseAddress()); + const auto srcSurfaceState = getSurfaceState(&pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), srcIndex); + EXPECT_EQ(srcImage->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress(), srcSurfaceState->getSurfaceBaseAddress()); uint32_t dstIndex = static_cast(kernelInfo.getArgDescriptorAt(1).template as().bindful) / sizeof(RENDER_SURFACE_STATE); - const auto &dstSurfaceState = getSurfaceState(&pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), dstIndex); - EXPECT_EQ(dstImage->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress(), dstSurfaceState.getSurfaceBaseAddress()); + const auto dstSurfaceState = getSurfaceState(&pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), dstIndex); + EXPECT_EQ(dstImage->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress(), dstSurfaceState->getSurfaceBaseAddress()); } HWTEST2_F(EnqueueCopyImageTest, WhenCopyingImageThenNumberOfPipelineSelectsIsOne, IsAtMostXeHpcCore) { diff --git a/opencl/test/unit_test/command_queue/enqueue_copy_image_to_buffer_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_copy_image_to_buffer_tests.cpp index 03a553963f..99e1df1f4d 100644 --- a/opencl/test/unit_test/command_queue/enqueue_copy_image_to_buffer_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_copy_image_to_buffer_tests.cpp @@ -169,14 +169,14 @@ HWTEST_F(EnqueueCopyImageToBufferTest, WhenCopyingImageToBufferThenSurfaceStateI enqueueCopyImageToBuffer(); - const auto &surfaceState = getSurfaceState(&pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), 0); + const auto surfaceState = getSurfaceState(&pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), 0); const auto &imageDesc = srcImage->getImageDesc(); // EnqueueReadImage uses multi-byte copies depending on per-pixel-size-in-bytes - EXPECT_EQ(imageDesc.image_width, surfaceState.getWidth()); - EXPECT_EQ(imageDesc.image_height, surfaceState.getHeight()); - EXPECT_NE(0u, surfaceState.getSurfacePitch()); - EXPECT_NE(0u, surfaceState.getSurfaceType()); - auto surfaceFormat = surfaceState.getSurfaceFormat(); + EXPECT_EQ(imageDesc.image_width, surfaceState->getWidth()); + EXPECT_EQ(imageDesc.image_height, surfaceState->getHeight()); + EXPECT_NE(0u, surfaceState->getSurfacePitch()); + EXPECT_NE(0u, surfaceState->getSurfaceType()); + auto surfaceFormat = surfaceState->getSurfaceFormat(); bool isRedescribedFormat = surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R32G32B32A32_UINT || surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R32G32_UINT || @@ -184,9 +184,9 @@ HWTEST_F(EnqueueCopyImageToBufferTest, WhenCopyingImageToBufferThenSurfaceStateI surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R16_UINT || surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R8_UINT; EXPECT_TRUE(isRedescribedFormat); - EXPECT_EQ(MockGmmResourceInfo::getHAlignSurfaceStateResult, surfaceState.getSurfaceHorizontalAlignment()); - EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4, surfaceState.getSurfaceVerticalAlignment()); - EXPECT_EQ(srcImage->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress(), surfaceState.getSurfaceBaseAddress()); + EXPECT_EQ(MockGmmResourceInfo::getHAlignSurfaceStateResult, surfaceState->getSurfaceHorizontalAlignment()); + EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4, surfaceState->getSurfaceVerticalAlignment()); + EXPECT_EQ(srcImage->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress(), surfaceState->getSurfaceBaseAddress()); } HWTEST2_F(EnqueueCopyImageToBufferTest, WhenCopyingImageToBufferThenNumberOfPipelineSelectsIsOne, IsAtMostXeHpcCore) { diff --git a/opencl/test/unit_test/command_queue/enqueue_fill_image_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_fill_image_tests.cpp index 579dabb38e..21dae5e6e4 100644 --- a/opencl/test/unit_test/command_queue/enqueue_fill_image_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_fill_image_tests.cpp @@ -200,15 +200,15 @@ HWTEST_F(EnqueueFillImageTest, WhenFillingImageThenSurfaceStateIsCorrect) { const auto &kernelInfo = mockCmdQ->storedMultiDispatchInfo.begin()->getKernel()->getKernelInfo(); uint32_t index = static_cast(kernelInfo.getArgDescriptorAt(0).template as().bindful) / sizeof(RENDER_SURFACE_STATE); - const auto &surfaceState = getSurfaceState(&pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), index); + const auto surfaceState = getSurfaceState(&pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), index); const auto &imageDesc = image->getImageDesc(); - EXPECT_EQ(imageDesc.image_width, surfaceState.getWidth()); - EXPECT_EQ(imageDesc.image_height, surfaceState.getHeight()); - EXPECT_NE(0u, surfaceState.getSurfacePitch()); - EXPECT_NE(0u, surfaceState.getSurfaceType()); - EXPECT_EQ(MockGmmResourceInfo::getHAlignSurfaceStateResult, surfaceState.getSurfaceHorizontalAlignment()); - EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4, surfaceState.getSurfaceVerticalAlignment()); - EXPECT_EQ(image->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress(), surfaceState.getSurfaceBaseAddress()); + EXPECT_EQ(imageDesc.image_width, surfaceState->getWidth()); + EXPECT_EQ(imageDesc.image_height, surfaceState->getHeight()); + EXPECT_NE(0u, surfaceState->getSurfacePitch()); + EXPECT_NE(0u, surfaceState->getSurfaceType()); + EXPECT_EQ(MockGmmResourceInfo::getHAlignSurfaceStateResult, surfaceState->getSurfaceHorizontalAlignment()); + EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4, surfaceState->getSurfaceVerticalAlignment()); + EXPECT_EQ(image->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress(), surfaceState->getSurfaceBaseAddress()); } HWTEST2_F(EnqueueFillImageTest, WhenFillingImageThenNumberOfPipelineSelectsIsOne, IsAtMostXeHpcCore) { diff --git a/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp index c2e3dba808..6712bc0fa6 100644 --- a/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp @@ -708,9 +708,9 @@ HWTEST_F(EnqueueHandlerTest, givenKernelUsingSyncBufferWhenEnqueuingKernelThenSs ClHardwareParse hwParser; hwParser.parseCommands(*mockCmdQ); - auto &surfaceState = hwParser.getSurfaceState(&surfaceStateHeap, 0); + auto surfaceState = hwParser.getSurfaceState(&surfaceStateHeap, 0); auto pSyncBufferHandler = static_cast(pDevice->syncBufferHandler.get()); - EXPECT_EQ(pSyncBufferHandler->graphicsAllocation->getGpuAddress(), surfaceState.getSurfaceBaseAddress()); + EXPECT_EQ(pSyncBufferHandler->graphicsAllocation->getGpuAddress(), surfaceState->getSurfaceBaseAddress()); } } diff --git a/opencl/test/unit_test/command_queue/enqueue_read_buffer_rect_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_read_buffer_rect_tests.cpp index e8fea96b93..142c31a199 100644 --- a/opencl/test/unit_test/command_queue/enqueue_read_buffer_rect_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_read_buffer_rect_tests.cpp @@ -612,13 +612,13 @@ HWTEST_F(EnqueueReadWriteBufferRectDispatch, givenOffsetResultingInMisalignedPtr auto pKernelArg = (uint64_t *)(kernel->getCrossThreadData() + kernelInfo.getArgDescriptorAt(1).as().stateless); EXPECT_EQ(reinterpret_cast(alignDown(misalignedDstPtr, 4)), *pKernelArg); - EXPECT_EQ(*pKernelArg, surfaceStateDst.getSurfaceBaseAddress()); + EXPECT_EQ(*pKernelArg, surfaceStateDst->getSurfaceBaseAddress()); } else if (kernelInfo.getArgDescriptorAt(1).as().pointerSize == sizeof(uint32_t)) { auto pKernelArg = (uint32_t *)(kernel->getCrossThreadData() + kernelInfo.getArgDescriptorAt(1).as().stateless); EXPECT_EQ(reinterpret_cast(alignDown(misalignedDstPtr, 4)), static_cast(*pKernelArg)); - EXPECT_EQ(static_cast(*pKernelArg), surfaceStateDst.getSurfaceBaseAddress()); + EXPECT_EQ(static_cast(*pKernelArg), surfaceStateDst->getSurfaceBaseAddress()); } } diff --git a/opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp index 60acbdf7b7..e13b9abb20 100644 --- a/opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp @@ -640,18 +640,18 @@ HWTEST_F(EnqueueReadImageTest, WhenReadingImageThenSurfaceStateIsCorrect) { // BufferToImage kernel uses BTI=1 for destSurface uint32_t bindingTableIndex = 0; - const auto &surfaceState = getSurfaceState(&pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), bindingTableIndex); + const auto surfaceState = getSurfaceState(&pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), bindingTableIndex); // EnqueueReadImage uses multi-byte copies depending on per-pixel-size-in-bytes const auto &imageDesc = srcImage->getImageDesc(); - EXPECT_EQ(imageDesc.image_width, surfaceState.getWidth()); - EXPECT_EQ(imageDesc.image_height, surfaceState.getHeight()); - EXPECT_NE(0u, surfaceState.getSurfacePitch()); - EXPECT_NE(0u, surfaceState.getSurfaceType()); - EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_FORMAT_R32_UINT, surfaceState.getSurfaceFormat()); - EXPECT_EQ(MockGmmResourceInfo::getHAlignSurfaceStateResult, surfaceState.getSurfaceHorizontalAlignment()); - EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4, surfaceState.getSurfaceVerticalAlignment()); - EXPECT_EQ(srcAllocation->getGpuAddress(), surfaceState.getSurfaceBaseAddress()); + EXPECT_EQ(imageDesc.image_width, surfaceState->getWidth()); + EXPECT_EQ(imageDesc.image_height, surfaceState->getHeight()); + EXPECT_NE(0u, surfaceState->getSurfacePitch()); + EXPECT_NE(0u, surfaceState->getSurfaceType()); + EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_FORMAT_R32_UINT, surfaceState->getSurfaceFormat()); + EXPECT_EQ(MockGmmResourceInfo::getHAlignSurfaceStateResult, surfaceState->getSurfaceHorizontalAlignment()); + EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4, surfaceState->getSurfaceVerticalAlignment()); + EXPECT_EQ(srcAllocation->getGpuAddress(), surfaceState->getSurfaceBaseAddress()); } HWTEST2_F(EnqueueReadImageTest, WhenReadingImageThenPipelineSelectIsProgrammed, IsAtMostXeHpcCore) { diff --git a/opencl/test/unit_test/command_queue/enqueue_write_buffer_rect_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_write_buffer_rect_tests.cpp index c0d86e7a42..20b9115b56 100644 --- a/opencl/test/unit_test/command_queue/enqueue_write_buffer_rect_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_write_buffer_rect_tests.cpp @@ -605,19 +605,19 @@ HWTEST_F(EnqueueReadWriteBufferRectDispatch, givenOffsetResultingInMisalignedPtr auto &kernelInfo = kernel->getKernelInfo(); if (hwInfo->capabilityTable.gpuAddressSpace == MemoryConstants::max48BitAddress) { - const auto &surfaceState = getSurfaceState(&cmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), 0); + const auto surfaceState = getSurfaceState(&cmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), 0); if (kernelInfo.getArgDescriptorAt(0).as().pointerSize == sizeof(uint64_t)) { auto pKernelArg = (uint64_t *)(kernel->getCrossThreadData() + kernelInfo.getArgDescriptorAt(0).as().stateless); EXPECT_EQ(reinterpret_cast(alignDown(misalignedHostPtr, 4)), *pKernelArg); - EXPECT_EQ(*pKernelArg, surfaceState.getSurfaceBaseAddress()); + EXPECT_EQ(*pKernelArg, surfaceState->getSurfaceBaseAddress()); } else if (kernelInfo.getArgDescriptorAt(0).as().pointerSize == sizeof(uint32_t)) { auto pKernelArg = (uint32_t *)(kernel->getCrossThreadData() + kernelInfo.getArgDescriptorAt(0).as().stateless); EXPECT_EQ(reinterpret_cast(alignDown(misalignedHostPtr, 4)), static_cast(*pKernelArg)); - EXPECT_EQ(static_cast(*pKernelArg), surfaceState.getSurfaceBaseAddress()); + EXPECT_EQ(static_cast(*pKernelArg), surfaceState->getSurfaceBaseAddress()); } } diff --git a/opencl/test/unit_test/command_queue/enqueue_write_image_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_write_image_tests.cpp index 55fb4a22e8..bb67e74822 100644 --- a/opencl/test/unit_test/command_queue/enqueue_write_image_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_write_image_tests.cpp @@ -198,18 +198,18 @@ HWTEST_F(EnqueueWriteImageTest, WhenWritingImageThenSurfaceStateIsProgrammedCorr auto index = mockCmdQ->storedMultiDispatchInfo.begin()->getKernel()->getKernelInfo().getArgDescriptorAt(1).template as().bindful / sizeof(RENDER_SURFACE_STATE); - const auto &surfaceState = getSurfaceState(&pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), static_cast(index)); + const auto surfaceState = getSurfaceState(&pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), static_cast(index)); // EnqueueWriteImage uses multi-byte copies depending on per-pixel-size-in-bytes const auto &imageDesc = dstImage->getImageDesc(); - EXPECT_EQ(imageDesc.image_width, surfaceState.getWidth()); - EXPECT_EQ(imageDesc.image_height, surfaceState.getHeight()); - EXPECT_NE(0u, surfaceState.getSurfacePitch()); - EXPECT_NE(0u, surfaceState.getSurfaceType()); - EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_FORMAT_R32_UINT, surfaceState.getSurfaceFormat()); - EXPECT_EQ(MockGmmResourceInfo::getHAlignSurfaceStateResult, surfaceState.getSurfaceHorizontalAlignment()); - EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4, surfaceState.getSurfaceVerticalAlignment()); - EXPECT_EQ(dstAllocation->getGpuAddress(), surfaceState.getSurfaceBaseAddress()); + EXPECT_EQ(imageDesc.image_width, surfaceState->getWidth()); + EXPECT_EQ(imageDesc.image_height, surfaceState->getHeight()); + EXPECT_NE(0u, surfaceState->getSurfacePitch()); + EXPECT_NE(0u, surfaceState->getSurfaceType()); + EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_FORMAT_R32_UINT, surfaceState->getSurfaceFormat()); + EXPECT_EQ(MockGmmResourceInfo::getHAlignSurfaceStateResult, surfaceState->getSurfaceHorizontalAlignment()); + EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4, surfaceState->getSurfaceVerticalAlignment()); + EXPECT_EQ(dstAllocation->getGpuAddress(), surfaceState->getSurfaceBaseAddress()); } HWTEST2_F(EnqueueWriteImageTest, WhenWritingImageThenOnePipelineSelectIsProgrammed, IsAtMostXeHpcCore) { diff --git a/opencl/test/unit_test/profiling/profiling_tests.cpp b/opencl/test/unit_test/profiling/profiling_tests.cpp index a1211843cf..0b8da1ea5b 100644 --- a/opencl/test/unit_test/profiling/profiling_tests.cpp +++ b/opencl/test/unit_test/profiling/profiling_tests.cpp @@ -484,42 +484,6 @@ HWTEST_F(ProfilingTests, givenMarkerEnqueueWhenBlockedEnqueueThenSetGpuPath) { userEventObj->release(); } -HWTEST_F(ProfilingTests, givenMarkerEnqueueWhenBlockedEnqueueThenPipeControlsArePresentInCS) { - typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; - - cl_event event = nullptr; - cl_event userEvent = new UserEvent(); - static_cast *>(pCmdQ)->enqueueMarkerWithWaitList(1, &userEvent, &event); - - auto eventObj = static_cast(event); - EXPECT_FALSE(eventObj->isCPUProfilingPath()); - - auto userEventObj = static_cast(userEvent); - - pCmdQ->flush(); - userEventObj->setStatus(CL_COMPLETE); - Event::waitForEvents(1, &event); - - parseCommands(*pCmdQ); - - // Check PIPE_CONTROLs - auto itorFirstPC = find(cmdList.begin(), cmdList.end()); - ASSERT_NE(cmdList.end(), itorFirstPC); - auto pFirstPC = genCmdCast(*itorFirstPC); - ASSERT_NE(nullptr, pFirstPC); - - auto itorSecondPC = find(itorFirstPC, cmdList.end()); - ASSERT_NE(cmdList.end(), itorSecondPC); - auto pSecondPC = genCmdCast(*itorSecondPC); - ASSERT_NE(nullptr, pSecondPC); - - EXPECT_TRUE(static_cast *>(event)->calcProfilingData()); - - eventObj->release(); - userEventObj->release(); - pCmdQ->isQueueBlocked(); -} - template struct MockTagNode : public TagNode { public: diff --git a/opencl/test/unit_test/scenarios/windows/enqueue_read_write_buffer_scenarios_windows_tests.cpp b/opencl/test/unit_test/scenarios/windows/enqueue_read_write_buffer_scenarios_windows_tests.cpp index 992a65c5db..10c39a0e26 100644 --- a/opencl/test/unit_test/scenarios/windows/enqueue_read_write_buffer_scenarios_windows_tests.cpp +++ b/opencl/test/unit_test/scenarios/windows/enqueue_read_write_buffer_scenarios_windows_tests.cpp @@ -118,12 +118,12 @@ HWTEST_F(EnqueueBufferWindowsTest, givenMisalignedHostPtrWhenEnqueueReadBufferCa if (arg1AsPtr.pointerSize == sizeof(uint64_t)) { auto pKernelArg = (uint64_t *)(kernel->getCrossThreadData() + arg1AsPtr.stateless); EXPECT_EQ(alignDown(gpuVa, 4), static_cast(*pKernelArg)); - EXPECT_EQ(*pKernelArg, surfaceStateDst.getSurfaceBaseAddress()); + EXPECT_EQ(*pKernelArg, surfaceStateDst->getSurfaceBaseAddress()); } else if (arg1AsPtr.pointerSize == sizeof(uint32_t)) { auto pKernelArg = (uint32_t *)(kernel->getCrossThreadData() + arg1AsPtr.stateless); EXPECT_EQ(alignDown(gpuVa, 4), static_cast(*pKernelArg)); - EXPECT_EQ(static_cast(*pKernelArg), surfaceStateDst.getSurfaceBaseAddress()); + EXPECT_EQ(static_cast(*pKernelArg), surfaceStateDst->getSurfaceBaseAddress()); } } diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index faec980478..87485dc170 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -205,10 +205,10 @@ CompletionStamp CommandStreamReceiverHw::flushTask( if (dispatchFlags.blocking || dispatchFlags.dcFlush || dispatchFlags.guardCommandBufferWithPipeControl) { if (this->dispatchMode == DispatchMode::ImmediateDispatch) { - //for ImmediateDispatch we will send this right away, therefore this pipe control will close the level - //for BatchedSubmissions it will be nooped and only last ppc in batch will be emitted. + // for ImmediateDispatch we will send this right away, therefore this pipe control will close the level + // for BatchedSubmissions it will be nooped and only last ppc in batch will be emitted. levelClosed = true; - //if we guard with ppc, flush dc as well to speed up completion latency + // if we guard with ppc, flush dc as well to speed up completion latency if (dispatchFlags.guardCommandBufferWithPipeControl) { const auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily); if (hwInfoConfig.isDcFlushAllowed()) { @@ -385,7 +385,7 @@ CompletionStamp CommandStreamReceiverHw::flushTask( lastMemoryCompressionState = memoryCompressionState; } - //Reprogram state base address if required + // Reprogram state base address if required if (isStateBaseAddressDirty || sourceLevelDebuggerActive) { EncodeWA::addPipeControlBeforeStateBaseAddress(commandStreamCSR, hwInfo, isRcs()); EncodeWA::encodeAdditionalPipelineSelect(commandStreamCSR, dispatchFlags.pipelineSelectArgs, true, hwInfo, isRcs()); @@ -401,7 +401,7 @@ CompletionStamp CommandStreamReceiverHw::flushTask( } auto stateBaseAddressCmdOffset = commandStreamCSR.getUsed(); - auto pCmd = static_cast(commandStreamCSR.getSpace(sizeof(STATE_BASE_ADDRESS))); + auto pCmd = static_cast(StateBaseAddressHelper::getSpaceForSbaCmd(commandStreamCSR)); STATE_BASE_ADDRESS cmd; auto instructionHeapBaseAddress = getMemoryManager()->getInternalHeapBaseAddress(rootDeviceIndex, getMemoryManager()->isLocalMemoryUsedForIsa(rootDeviceIndex)); StateBaseAddressHelper::programStateBaseAddress( @@ -422,7 +422,10 @@ CompletionStamp CommandStreamReceiverHw::flushTask( memoryCompressionState, dispatchFlags.useGlobalAtomics, dispatchFlags.areMultipleSubDevicesInContext); - *pCmd = cmd; + + if (pCmd) { + *pCmd = cmd; + } programAdditionalStateBaseAddress(commandStreamCSR, cmd, device); @@ -618,7 +621,7 @@ CompletionStamp CommandStreamReceiverHw::flushTask( this->wasSubmittedToSingleSubdevice = dispatchFlags.useSingleSubdevice; - //check if we are not over the budget, if we are do implicit flush + // check if we are not over the budget, if we are do implicit flush if (getMemoryManager()->isMemoryBudgetExhausted()) { if (this->totalMemoryUsed >= device.getDeviceInfo().globalMemSize / 4) { implicitFlush = true; @@ -726,16 +729,16 @@ inline bool CommandStreamReceiverHw::flushBatchedSubmissions() { while (nextCommandBuffer && nextCommandBuffer->inspectionId == primaryCmdBuffer->inspectionId) { - //noop pipe control + // noop pipe control if (currentPipeControlForNooping) { if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) { flatBatchBufferHelper->removePipeControlData(pipeControlLocationSize, currentPipeControlForNooping, hwInfo); } memset(currentPipeControlForNooping, 0, pipeControlLocationSize); } - //obtain next candidate for nooping + // obtain next candidate for nooping currentPipeControlForNooping = nextCommandBuffer->pipeControlThatMayBeErasedLocation; - //track epilogue pipe control + // track epilogue pipe control epiloguePipeControlLocation = nextCommandBuffer->epiloguePipeControlLocation; flushStampUpdateHelper.insert(nextCommandBuffer->flushStamp->getStampReference()); @@ -744,7 +747,7 @@ inline bool CommandStreamReceiverHw::flushBatchedSubmissions() { auto cpuAddressForCommandBufferDestination = ptrOffset(nextCommandBuffer->batchBuffer.commandBufferAllocation->getUnderlyingBuffer(), nextCommandBuffer->batchBuffer.startOffset); auto cpuAddressForCurrentCommandBufferEndingSection = alignUp(ptrOffset(currentBBendLocation, sizeof(MI_BATCH_BUFFER_START)), MemoryConstants::cacheLineSize); - //if we point to exact same command buffer, then batch buffer start is not needed at all + // if we point to exact same command buffer, then batch buffer start is not needed at all if (cpuAddressForCurrentCommandBufferEndingSection == cpuAddressForCommandBufferDestination) { memset(currentBBendLocation, 0u, ptrDiff(cpuAddressForCurrentCommandBufferEndingSection, currentBBendLocation)); } else { @@ -766,7 +769,7 @@ inline bool CommandStreamReceiverHw::flushBatchedSubmissions() { surfacesForSubmit.push_back(surface); } - //make sure we flush DC if needed + // make sure we flush DC if needed if (epiloguePipeControlLocation && MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo)) { auto emitDcFlush = true; @@ -784,7 +787,7 @@ inline bool CommandStreamReceiverHw::flushBatchedSubmissions() { break; } - //after flush task level is closed + // after flush task level is closed this->taskLevel++; flushStampUpdateHelper.updateAll(flushStamp->peekStamp()); @@ -885,7 +888,7 @@ inline WaitStatus CommandStreamReceiverHw::waitForTaskCountWithKmdNot auto status = waitForCompletionWithTimeout(params, taskCountToWait); if (status == WaitStatus::NotReady) { waitForFlushStamp(flushStampToWait); - //now call blocking wait, this is to ensure that task count is reached + // now call blocking wait, this is to ensure that task count is reached status = waitForCompletionWithTimeout(WaitParams{false, false, 0}, taskCountToWait); } diff --git a/shared/source/helpers/state_base_address.h b/shared/source/helpers/state_base_address.h index e58e16344d..424a4a4d99 100644 --- a/shared/source/helpers/state_base_address.h +++ b/shared/source/helpers/state_base_address.h @@ -22,6 +22,8 @@ template struct StateBaseAddressHelper { using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS; + static void *getSpaceForSbaCmd(LinearStream &cmdStream); + static void programStateBaseAddress( STATE_BASE_ADDRESS *stateBaseAddress, const IndirectHeap *dsh, diff --git a/shared/source/helpers/state_base_address_base.inl b/shared/source/helpers/state_base_address_base.inl index 5554db117a..3d325a098d 100644 --- a/shared/source/helpers/state_base_address_base.inl +++ b/shared/source/helpers/state_base_address_base.inl @@ -103,4 +103,9 @@ void StateBaseAddressHelper::programStateBaseAddress( isMultiOsContextCapable, memoryCompressionState, overrideBindlessSurfaceStateBase, useGlobalAtomics, areMultipleSubDevicesInContext); } +template +void *StateBaseAddressHelper::getSpaceForSbaCmd(LinearStream &cmdStream) { + return cmdStream.getSpace(sizeof(STATE_BASE_ADDRESS)); +} + } // namespace NEO diff --git a/shared/test/common/cmd_parse/hw_parse.h b/shared/test/common/cmd_parse/hw_parse.h index 2f78283f75..b5cabed296 100644 --- a/shared/test/common/cmd_parse/hw_parse.h +++ b/shared/test/common/cmd_parse/hw_parse.h @@ -94,28 +94,7 @@ struct HardwareParse { } template - const typename FamilyType::RENDER_SURFACE_STATE &getSurfaceState(IndirectHeap *ssh, uint32_t index) { - typedef typename FamilyType::BINDING_TABLE_STATE BINDING_TABLE_STATE; - typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; - typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; - typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; - - const auto &interfaceDescriptorData = *(INTERFACE_DESCRIPTOR_DATA *)cmdInterfaceDescriptorData; - - auto cmdSBA = (STATE_BASE_ADDRESS *)cmdStateBaseAddress; - auto surfaceStateHeap = cmdSBA->getSurfaceStateBaseAddress(); - if (ssh && (ssh->getHeapGpuBase() == surfaceStateHeap)) { - surfaceStateHeap = reinterpret_cast(ssh->getCpuBase()); - } - EXPECT_NE(0u, surfaceStateHeap); - - auto bindingTablePointer = interfaceDescriptorData.getBindingTablePointer(); - - const auto &bindingTableState = reinterpret_cast(surfaceStateHeap + bindingTablePointer)[index]; - auto surfaceStatePointer = bindingTableState.getSurfaceStatePointer(); - - return *(RENDER_SURFACE_STATE *)(surfaceStateHeap + surfaceStatePointer); - } + const typename FamilyType::RENDER_SURFACE_STATE *getSurfaceState(IndirectHeap *ssh, uint32_t index); template const typename FamilyType::SAMPLER_STATE &getSamplerState(uint32_t index) { diff --git a/shared/test/common/cmd_parse/hw_parse.inl b/shared/test/common/cmd_parse/hw_parse.inl index 0187c9e49e..62ac81c5c3 100644 --- a/shared/test/common/cmd_parse/hw_parse.inl +++ b/shared/test/common/cmd_parse/hw_parse.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -123,4 +123,28 @@ const void *HardwareParse::getStatelessArgumentPointer(const KernelInfo &kernelI return nullptr; } +template +const typename FamilyType::RENDER_SURFACE_STATE *HardwareParse::getSurfaceState(IndirectHeap *ssh, uint32_t index) { + typedef typename FamilyType::BINDING_TABLE_STATE BINDING_TABLE_STATE; + typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; + typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; + typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; + + const auto &interfaceDescriptorData = *(INTERFACE_DESCRIPTOR_DATA *)cmdInterfaceDescriptorData; + + auto cmdSBA = (STATE_BASE_ADDRESS *)cmdStateBaseAddress; + auto surfaceStateHeap = cmdSBA->getSurfaceStateBaseAddress(); + if (ssh && (ssh->getHeapGpuBase() == surfaceStateHeap)) { + surfaceStateHeap = reinterpret_cast(ssh->getCpuBase()); + } + EXPECT_NE(0u, surfaceStateHeap); + + auto bindingTablePointer = interfaceDescriptorData.getBindingTablePointer(); + + const auto &bindingTableState = reinterpret_cast(surfaceStateHeap + bindingTablePointer)[index]; + auto surfaceStatePointer = bindingTableState.getSurfaceStatePointer(); + + return (RENDER_SURFACE_STATE *)(surfaceStateHeap + surfaceStatePointer); +} + } // namespace NEO diff --git a/shared/test/common/gen11/cmd_parse_gen11.cpp b/shared/test/common/gen11/cmd_parse_gen11.cpp index c0b0056027..181b278959 100644 --- a/shared/test/common/gen11/cmd_parse_gen11.cpp +++ b/shared/test/common/gen11/cmd_parse_gen11.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -86,4 +86,5 @@ namespace NEO { template void HardwareParse::findHardwareCommands(); template void HardwareParse::findHardwareCommands(IndirectHeap *); template const void *HardwareParse::getStatelessArgumentPointer(const KernelInfo &kernelInfo, uint32_t indexArg, IndirectHeap &ioh, uint32_t rootDeviceIndex); +template const typename ICLFamily::RENDER_SURFACE_STATE *HardwareParse::getSurfaceState(IndirectHeap *ssh, uint32_t index); } // namespace NEO diff --git a/shared/test/common/gen12lp/cmd_parse_gen12lp.cpp b/shared/test/common/gen12lp/cmd_parse_gen12lp.cpp index bf7cf2c985..f9e9ae78a2 100644 --- a/shared/test/common/gen12lp/cmd_parse_gen12lp.cpp +++ b/shared/test/common/gen12lp/cmd_parse_gen12lp.cpp @@ -129,4 +129,5 @@ namespace NEO { template void HardwareParse::findHardwareCommands(); template void HardwareParse::findHardwareCommands(IndirectHeap *); template const void *HardwareParse::getStatelessArgumentPointer(const KernelInfo &kernelInfo, uint32_t indexArg, IndirectHeap &ioh, uint32_t rootDeviceIndex); +template const typename TGLLPFamily::RENDER_SURFACE_STATE *HardwareParse::getSurfaceState(IndirectHeap *ssh, uint32_t index); } // namespace NEO diff --git a/shared/test/common/gen8/cmd_parse_gen8.cpp b/shared/test/common/gen8/cmd_parse_gen8.cpp index b5cfe86200..56eeb38d26 100644 --- a/shared/test/common/gen8/cmd_parse_gen8.cpp +++ b/shared/test/common/gen8/cmd_parse_gen8.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -68,4 +68,5 @@ namespace NEO { template void HardwareParse::findHardwareCommands(); template void HardwareParse::findHardwareCommands(IndirectHeap *); template const void *HardwareParse::getStatelessArgumentPointer(const KernelInfo &kernelInfo, uint32_t indexArg, IndirectHeap &ioh, uint32_t rootDeviceIndex); +template const typename BDWFamily::RENDER_SURFACE_STATE *HardwareParse::getSurfaceState(IndirectHeap *ssh, uint32_t index); } // namespace NEO diff --git a/shared/test/common/gen9/cmd_parse_gen9.cpp b/shared/test/common/gen9/cmd_parse_gen9.cpp index 2f12d2352e..13ba8dda96 100644 --- a/shared/test/common/gen9/cmd_parse_gen9.cpp +++ b/shared/test/common/gen9/cmd_parse_gen9.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -86,4 +86,5 @@ namespace NEO { template void HardwareParse::findHardwareCommands(); template void HardwareParse::findHardwareCommands(IndirectHeap *); template const void *HardwareParse::getStatelessArgumentPointer(const KernelInfo &kernelInfo, uint32_t indexArg, IndirectHeap &ioh, uint32_t rootDeviceIndex); +template const typename SKLFamily::RENDER_SURFACE_STATE *HardwareParse::getSurfaceState(IndirectHeap *ssh, uint32_t index); } // namespace NEO diff --git a/shared/test/common/helpers/unit_test_helper.h b/shared/test/common/helpers/unit_test_helper.h index e900b226d3..385389e3d6 100644 --- a/shared/test/common/helpers/unit_test_helper.h +++ b/shared/test/common/helpers/unit_test_helper.h @@ -80,6 +80,8 @@ struct UnitTestHelper { static bool getWorkloadPartitionForStoreRegisterMemCmd(typename GfxFamily::MI_STORE_REGISTER_MEM &storeRegisterMem); static bool timestampRegisterHighAddress(); + + static void validateSbaMocs(uint32_t expectedMocs, CommandStreamReceiver &csr); }; } // namespace NEO diff --git a/shared/test/common/helpers/unit_test_helper.inl b/shared/test/common/helpers/unit_test_helper.inl index f440368fa3..f1e83eea54 100644 --- a/shared/test/common/helpers/unit_test_helper.inl +++ b/shared/test/common/helpers/unit_test_helper.inl @@ -6,6 +6,10 @@ */ #include "shared/source/helpers/hw_info.h" +#include "shared/test/common/cmd_parse/gen_cmd_parse.h" +#include "shared/test/common/cmd_parse/hw_parse.h" + +#include "gtest/gtest.h" namespace NEO { @@ -77,4 +81,20 @@ bool UnitTestHelper::timestampRegisterHighAddress() { return false; } +template +void UnitTestHelper::validateSbaMocs(uint32_t expectedMocs, CommandStreamReceiver &csr) { + using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS; + + HardwareParse hwParse; + hwParse.parseCommands(csr.getCS(0), 0); + auto itorCmd = reverseFind(hwParse.cmdList.rbegin(), hwParse.cmdList.rend()); + EXPECT_NE(hwParse.cmdList.rend(), itorCmd); + auto sba = genCmdCast(*itorCmd); + EXPECT_NE(nullptr, sba); + + auto mocs = sba->getStatelessDataPortAccessMemoryObjectControlState(); + + EXPECT_EQ(expectedMocs, mocs); +} + } // namespace NEO diff --git a/shared/test/common/xe_hp_core/cmd_parse_xe_hp_core.cpp b/shared/test/common/xe_hp_core/cmd_parse_xe_hp_core.cpp index 22cdc9f179..725db3ec0f 100644 --- a/shared/test/common/xe_hp_core/cmd_parse_xe_hp_core.cpp +++ b/shared/test/common/xe_hp_core/cmd_parse_xe_hp_core.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -9,3 +9,5 @@ using GenStruct = NEO::XeHpCore; using GenGfxFamily = NEO::XeHpFamily; #include "shared/test/common/cmd_parse/cmd_parse_xehp_and_later.inl" + +template const typename GenGfxFamily::RENDER_SURFACE_STATE *NEO::HardwareParse::getSurfaceState(IndirectHeap *ssh, uint32_t index); diff --git a/shared/test/common/xe_hpc_core/cmd_parse_xe_hpc_core.cpp b/shared/test/common/xe_hpc_core/cmd_parse_xe_hpc_core.cpp index b33d267b3b..ac4327f11a 100644 --- a/shared/test/common/xe_hpc_core/cmd_parse_xe_hpc_core.cpp +++ b/shared/test/common/xe_hpc_core/cmd_parse_xe_hpc_core.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -110,3 +110,5 @@ const char *CmdParse::getAdditionalCommandName(void *cmd) { } #include "shared/test/common/cmd_parse/cmd_parse_pvc_and_later.inl" + +template const typename GenGfxFamily::RENDER_SURFACE_STATE *NEO::HardwareParse::getSurfaceState(IndirectHeap *ssh, uint32_t index); \ No newline at end of file diff --git a/shared/test/common/xe_hpg_core/cmd_parse_xe_hpg_core.cpp b/shared/test/common/xe_hpg_core/cmd_parse_xe_hpg_core.cpp index d2e8cde498..a785d0e517 100644 --- a/shared/test/common/xe_hpg_core/cmd_parse_xe_hpg_core.cpp +++ b/shared/test/common/xe_hpg_core/cmd_parse_xe_hpg_core.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -9,3 +9,5 @@ using GenStruct = NEO::XE_HPG_CORE; using GenGfxFamily = NEO::XE_HPG_COREFamily; #include "shared/test/common/cmd_parse/cmd_parse_xehp_and_later.inl" + +template const typename GenGfxFamily::RENDER_SURFACE_STATE *NEO::HardwareParse::getSurfaceState(IndirectHeap *ssh, uint32_t index); \ No newline at end of file