From 9d8ce7aace31c0f80870ed8fa4d4ad8995b26dd3 Mon Sep 17 00:00:00 2001 From: Maciej Plewka Date: Wed, 12 Jan 2022 16:57:42 +0000 Subject: [PATCH] Command container appends BB_END on cmd buffer allocation end When linear stream created for command container has not enough space for command and BB_END it will program BB_END and allocate new command buffer allocation. Pointer returned from getSpace in this case will return storage from new command buffer allocation. Related-To: NEO-5707 Signed-off-by: Maciej Plewka --- level_zero/core/source/cmdlist/cmdlist_hw.h | 3 +- level_zero/core/source/cmdlist/cmdlist_hw.inl | 35 -------- .../core/source/cmdlist/cmdlist_hw_base.inl | 3 - .../cmdlist/cmdlist_hw_xehp_and_later.inl | 13 --- .../xe_hpc_core/cmdlist_xe_hpc_core.cpp | 6 -- .../test_cmdlist_append_launch_kernel_1.cpp | 54 +----------- .../test_cmdlist_append_launch_kernel_2.cpp | 25 +++--- .../unit_test/helpers/hw_helper_tests.cpp | 10 +++ .../source/command_container/cmdcontainer.cpp | 17 +++- .../source/command_container/cmdcontainer.h | 8 +- .../command_container/command_encoder.h | 10 --- .../command_container/command_encoder.inl | 21 ----- .../command_encoder_bdw_and_later.inl | 43 --------- .../command_encoder_xehp_and_later.inl | 39 --------- .../implicit_scaling_xehp_and_later.inl | 15 ++-- .../command_stream/command_stream_receiver.h | 2 +- .../source/command_stream/linear_stream.cpp | 8 +- shared/source/command_stream/linear_stream.h | 12 ++- .../gen12lp/command_encoder_gen12lp.cpp | 7 +- shared/source/helpers/hw_helper.h | 6 +- shared/source/helpers/hw_helper_base.inl | 8 ++ .../source/helpers/preamble_bdw_and_later.inl | 3 +- .../source/utilities/software_tags_manager.h | 3 +- .../gen12lp/command_encoder_tests_gen12lp.cpp | 32 ++++++- .../gen12lp/test_command_encoder_gen12lp.cpp | 16 ---- .../common/gen12lp/test_encode_gen12lp.cpp | 3 +- .../xe_hpg_core/dg2/test_encode_dg2.cpp | 3 +- .../command_container_tests.cpp | 49 ++++++++++- .../command_encoder_tests.cpp | 1 + .../command_stream/linear_stream_tests.cpp | 87 ++++++++++++++++++- ..._encode_dispatch_kernel_xehp_and_later.cpp | 26 +----- 31 files changed, 262 insertions(+), 306 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.h b/level_zero/core/source/cmdlist/cmdlist_hw.h index ebf14c4d1f..4eb08cba23 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2021 Intel Corporation + * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -159,7 +159,6 @@ struct CommandListCoreFamily : CommandListImp { ze_result_t reset() override; ze_result_t executeCommandListImmediate(bool performMigration) override; size_t getReserveSshSize(); - void increaseCommandStreamSpace(size_t commandSize); protected: MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyKernelWithGA(void *dstPtr, NEO::GraphicsAllocation *dstPtrAlloc, diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index ff2c7a4eed..22a245dff5 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -332,7 +332,6 @@ ze_result_t CommandListCoreFamily::appendEventReset(ze_event_hand if (this->partitionCount > 1) { estimateSize += estimateBufferSizeMultiTileBarrier(hwInfo); } - increaseCommandStreamSpace(estimateSize); for (uint32_t i = 0u; i < packetsToReset; i++) { NEO::MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( @@ -896,13 +895,6 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyBlit(uintptr_t commandContainer.addToResidencyContainer(clearColorAllocation); NEO::BlitPropertiesContainer blitPropertiesContainer{blitProperties}; - bool blitterDirectSubmission = true; // assume direct submission enabled, since usually MI_BATCH_BUFFER_START is bigger than MI_BATCH_BUFFER_END - size_t estimatedSize = NEO::BlitCommandsHelper::template BlitCommandsHelper::estimateBlitCommandsSize(blitPropertiesContainer, - false, - false, - blitterDirectSubmission, - *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]); - increaseCommandStreamSpace(estimatedSize); NEO::BlitCommandsHelper::dispatchBlitCommandsForBufferPerRow(blitProperties, *commandContainer.getCommandStream(), *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]); @@ -946,13 +938,6 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyBlitRegion(NEO } NEO::BlitPropertiesContainer blitPropertiesContainer{blitProperties}; - bool blitterDirectSubmission = true; // assume direct submission enabled, since usually MI_BATCH_BUFFER_START is bigger than MI_BATCH_BUFFER_END - size_t estimatedSize = NEO::BlitCommandsHelper::template BlitCommandsHelper::estimateBlitCommandsSize(blitPropertiesContainer, - false, - false, - blitterDirectSubmission, - *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]); - increaseCommandStreamSpace(estimatedSize); appendEventForProfiling(hSignalEvent, true); bool copyRegionPreferred = NEO::BlitCommandsHelper::isCopyRegionPreferred(copySizeModified, *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]); @@ -1684,11 +1669,9 @@ void CommandListCoreFamily::appendSignalEventPostWalker(ze_event_ if (isCopyOnly()) { NEO::MiFlushArgs args; args.commandWithPostSync = true; - increaseCommandStreamSpace(NEO::EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite()); NEO::EncodeMiFlushDW::programMiFlushDw(*commandContainer.getCommandStream(), baseAddr, Event::STATE_SIGNALED, args, hwInfo); } else { - increaseCommandStreamSpace(NEO::MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo)); NEO::PipeControlArgs args; args.dcFlushEnable = NEO::MemorySynchronizationCommands::getDcFlushEnable(event->signalScope, hwInfo); if (this->partitionCount > 1) { @@ -1839,7 +1822,6 @@ ze_result_t CommandListCoreFamily::appendSignalEvent(ze_event_han if (isCopyOnly()) { NEO::MiFlushArgs args; args.commandWithPostSync = true; - increaseCommandStreamSpace(NEO::EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite()); NEO::EncodeMiFlushDW::programMiFlushDw(*commandContainer.getCommandStream(), ptrOffset(baseAddr, eventSignalOffset), Event::STATE_SIGNALED, args, hwInfo); } else { @@ -1851,7 +1833,6 @@ ze_result_t CommandListCoreFamily::appendSignalEvent(ze_event_han event->setPacketsInUse(this->partitionCount); } if (applyScope || event->isEventTimestampFlagSet()) { - increaseCommandStreamSpace(NEO::MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo)); NEO::MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( *commandContainer.getCommandStream(), POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, @@ -1860,7 +1841,6 @@ ze_result_t CommandListCoreFamily::appendSignalEvent(ze_event_han hwInfo, args); } else { - increaseCommandStreamSpace(NEO::EncodeStoreMemory::getStoreDataImmSize()); NEO::EncodeStoreMemory::programStoreDataImm( *commandContainer.getCommandStream(), ptrOffset(baseAddr, eventSignalOffset), @@ -1928,7 +1908,6 @@ ze_result_t CommandListCoreFamily::appendWaitOnEvents(uint32_t nu estimatedBufferSize += NEO::EncodeSempahore::getSizeMiSemaphoreWait(); } } - increaseCommandStreamSpace(estimatedBufferSize); if (dcFlushRequired) { if (isCopyOnly()) { @@ -2204,17 +2183,6 @@ ze_result_t CommandListCoreFamily::reserveSpace(size_t size, void return ZE_RESULT_SUCCESS; } -template -void CommandListCoreFamily::increaseCommandStreamSpace(size_t commandSize) { - using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END; - size_t estimatedSizeRequired = commandSize + sizeof(MI_BATCH_BUFFER_END); - if (commandContainer.getCommandStream()->getAvailableSpace() < estimatedSizeRequired) { - auto bbEnd = commandContainer.getCommandStream()->template getSpaceForCmd(); - *bbEnd = GfxFamily::cmdInitBatchBufferEnd; - commandContainer.allocateNextCommandBuffer(); - } -} - template ze_result_t CommandListCoreFamily::prepareIndirectParams(const ze_group_count_t *pThreadGroupDimensions) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; @@ -2353,9 +2321,6 @@ ze_result_t CommandListCoreFamily::appendBarrier(ze_event_handle_ const auto &hwInfo = this->device->getHwInfo(); if (!hSignalEvent) { if (isCopyOnly()) { - size_t estimatedSizeRequired = NEO::EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); - increaseCommandStreamSpace(estimatedSizeRequired); - NEO::MiFlushArgs args; NEO::EncodeMiFlushDW::programMiFlushDw(*commandContainer.getCommandStream(), 0, 0, args, hwInfo); } else { diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl index 0a86a2f686..53e3a7ff7a 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl @@ -183,9 +183,6 @@ void CommandListCoreFamily::appendMultiPartitionEpilogue() {} template void CommandListCoreFamily::appendComputeBarrierCommand() { - size_t estimatedSizeRequired = NEO::MemorySynchronizationCommands::getSizeForSinglePipeControl(); - increaseCommandStreamSpace(estimatedSizeRequired); - NEO::PipeControlArgs args = createBarrierFlags(); NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); } diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index 1465430707..59458b434b 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -134,8 +134,6 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(z const auto &hwInfo = this->device->getHwInfo(); if (NEO::DebugManager.flags.ForcePipeControlPriorToWalker.get()) { - increaseCommandStreamSpace(NEO::MemorySynchronizationCommands::getSizeForSinglePipeControl()); - NEO::PipeControlArgs args; NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); } @@ -245,8 +243,6 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(z event->setPacketsInUse(partitionCount); } if (L3FlushEnable) { - size_t estimatedSize = NEO::MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo); - increaseCommandStreamSpace(estimatedSize); programEventL3Flush(hEvent, this->device, partitionCount, commandContainer); } } @@ -302,16 +298,12 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(z template void CommandListCoreFamily::appendMultiPartitionPrologue(uint32_t partitionDataSize) { - size_t estimatedSizeRequired = NEO::ImplicitScalingDispatch::getOffsetRegisterSize(); - increaseCommandStreamSpace(estimatedSizeRequired); NEO::ImplicitScalingDispatch::dispatchOffsetRegister(*commandContainer.getCommandStream(), partitionDataSize); } template void CommandListCoreFamily::appendMultiPartitionEpilogue() { - const size_t estimatedSizeRequired = NEO::ImplicitScalingDispatch::getOffsetRegisterSize(); - increaseCommandStreamSpace(estimatedSizeRequired); NEO::ImplicitScalingDispatch::dispatchOffsetRegister(*commandContainer.getCommandStream(), NEO::ImplicitScalingDispatch::getPostSyncOffset()); } @@ -320,14 +312,9 @@ template void CommandListCoreFamily::appendComputeBarrierCommand() { if (this->partitionCount > 1) { auto neoDevice = device->getNEODevice(); - auto &hwInfo = neoDevice->getHardwareInfo(); - - increaseCommandStreamSpace(estimateBufferSizeMultiTileBarrier(hwInfo)); appendMultiTileBarrier(*neoDevice); } else { NEO::PipeControlArgs args = createBarrierFlags(); - size_t estimatedSizeRequired = NEO::MemorySynchronizationCommands::getSizeForSinglePipeControl(); - increaseCommandStreamSpace(estimatedSizeRequired); NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); } } diff --git a/level_zero/core/source/xe_hpc_core/cmdlist_xe_hpc_core.cpp b/level_zero/core/source/xe_hpc_core/cmdlist_xe_hpc_core.cpp index 695c5479e2..ca8ffebccc 100644 --- a/level_zero/core/source/xe_hpc_core/cmdlist_xe_hpc_core.cpp +++ b/level_zero/core/source/xe_hpc_core/cmdlist_xe_hpc_core.cpp @@ -44,9 +44,6 @@ ze_result_t CommandListCoreFamily::appendMemoryPrefetch(const NEO::LinearStream &cmdStream = *commandContainer.getCommandStream(); - size_t estimatedSizeRequired = NEO::EncodeMemoryPrefetch::getSizeForMemoryPrefetch(size); - increaseCommandStreamSpace(estimatedSizeRequired); - NEO::EncodeMemoryPrefetch::programMemoryPrefetch(cmdStream, *gpuAlloc, static_cast(size), offset, hwInfo); return ZE_RESULT_SUCCESS; @@ -56,9 +53,6 @@ template <> void CommandListCoreFamily::applyMemoryRangesBarrier(uint32_t numRanges, const size_t *pRangeSizes, const void **pRanges) { - - increaseCommandStreamSpace(NEO::MemorySynchronizationCommands::getSizeForSinglePipeControl()); - NEO::PipeControlArgs args; args.hdcPipelineFlush = true; args.unTypedDataPortCacheFlush = true; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp index b9193433c0..b97472336d 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp @@ -106,10 +106,9 @@ HWTEST_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenAp const auto streamCpu = stream->getCpuBase(); Vec3 groupCount{1, 1, 1}; - auto requiredSizeEstimate = EncodeDispatchKernel::estimateEncodeDispatchKernelCmdsSize( - device->getNEODevice(), {0, 0, 0}, groupCount, false, false, false, kernel.get(), false); + auto sizeLeftInStream = sizeof(MI_BATCH_BUFFER_END); auto available = stream->getAvailableSpace(); - stream->getSpace(available - requiredSizeEstimate + 1); + stream->getSpace(available - sizeLeftInStream); auto bbEndPosition = stream->getSpace(0); const uint32_t threadGroupDimensions[3] = {1, 1, 1}; @@ -236,38 +235,6 @@ HWTEST_F(CommandListAppendLaunchKernel, WhenAppendingMultipleTimesThenSshIsNotDe EXPECT_NE(initialAllocation, reallocatedAllocation); } -HWTEST2_F(CommandListAppendLaunchKernel, WhenAppendingFunctionThenUsedCmdBufferSizeDoesNotExceedEstimate, IsAtLeastSkl) { - createKernel(); - ze_group_count_t groupCount{1, 1, 1}; - - auto commandList = std::make_unique>>(); - ze_result_t ret = commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); - ASSERT_EQ(ZE_RESULT_SUCCESS, ret); - - auto sizeBefore = commandList->commandContainer.getCommandStream()->getUsed(); - - auto result = commandList->appendLaunchKernelWithParams(kernel->toHandle(), &groupCount, nullptr, false, false, false); - ASSERT_EQ(ZE_RESULT_SUCCESS, result); - - auto sizeAfter = commandList->commandContainer.getCommandStream()->getUsed(); - auto estimate = NEO::EncodeDispatchKernel::estimateEncodeDispatchKernelCmdsSize( - device->getNEODevice(), Vec3(0, 0, 0), Vec3(1, 1, 1), false, false, false, kernel.get(), false); - - EXPECT_LE(sizeAfter - sizeBefore, estimate); - - sizeBefore = commandList->commandContainer.getCommandStream()->getUsed(); - - result = commandList->appendLaunchKernelWithParams(kernel->toHandle(), &groupCount, nullptr, true, false, false); - ASSERT_EQ(ZE_RESULT_SUCCESS, result); - - sizeAfter = commandList->commandContainer.getCommandStream()->getUsed(); - estimate = NEO::EncodeDispatchKernel::estimateEncodeDispatchKernelCmdsSize( - device->getNEODevice(), Vec3(0, 0, 0), Vec3(1, 1, 1), false, false, false, kernel.get(), false); - - EXPECT_LE(sizeAfter - sizeBefore, estimate); - EXPECT_LE(sizeAfter - sizeBefore, estimate); -} - HWCMDTEST_F(IGFX_GEN8_CORE, CommandListAppendLaunchKernel, givenEventsWhenAppendingKernelThenPostSyncToEventIsGenerated) { using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; @@ -689,19 +656,10 @@ HWTEST_F(CommandListAppendLaunchKernel, givenIndirectDispatchWithImplicitArgsWhe auto result = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 4096u, &alloc); ASSERT_EQ(result, ZE_RESULT_SUCCESS); - auto sizeBefore = commandList->commandContainer.getCommandStream()->getUsed(); - result = commandList->appendLaunchKernelIndirect(kernel.toHandle(), static_cast(alloc), nullptr, 0, nullptr); EXPECT_EQ(result, ZE_RESULT_SUCCESS); - - auto sizeAfter = commandList->commandContainer.getCommandStream()->getUsed(); - auto estimate = NEO::EncodeDispatchKernel::estimateEncodeDispatchKernelCmdsSize( - device->getNEODevice(), Vec3(0, 0, 0), Vec3(1, 1, 1), false, false, true, &kernel, false); - - EXPECT_LE(sizeAfter - sizeBefore, estimate); - auto heap = commandList->commandContainer.getIndirectHeap(HeapType::INDIRECT_OBJECT); uint64_t pImplicitArgsGPUVA = heap->getGraphicsAllocation()->getGpuAddress() + kernel.getSizeForImplicitArgsPatching() - sizeof(ImplicitArgs); auto workDimStoreRegisterMemCmd = FamilyType::cmdInitStoreRegisterMem; @@ -869,19 +827,11 @@ HWTEST_F(CommandListAppendLaunchKernel, givenIndirectDispatchWhenAppendingThenWo auto result = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 4096u, &alloc); ASSERT_EQ(result, ZE_RESULT_SUCCESS); - auto sizeBefore = commandList->commandContainer.getCommandStream()->getUsed(); - result = commandList->appendLaunchKernelIndirect(kernel.toHandle(), static_cast(alloc), nullptr, 0, nullptr); EXPECT_EQ(result, ZE_RESULT_SUCCESS); - auto sizeAfter = commandList->commandContainer.getCommandStream()->getUsed(); - auto estimate = NEO::EncodeDispatchKernel::estimateEncodeDispatchKernelCmdsSize( - device->getNEODevice(), Vec3(0, 0, 0), Vec3(1, 1, 1), false, false, true, &kernel, false); - - EXPECT_LE(sizeAfter - sizeBefore, estimate); - kernel.groupSize[2] = 2; result = commandList->appendLaunchKernelIndirect(kernel.toHandle(), static_cast(alloc), diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp index b8a0958589..134cc108eb 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp @@ -1209,20 +1209,19 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, MultiTileCommandListAppendLaunchFunctionXeHpCoreTes HWTEST2_F(MultiTileCommandListAppendLaunchFunctionXeHpCoreTest, givenCooperativeKernelWhenAppendingKernelsThenDoNotUseImplicitScaling, IsAtLeastXeHpCore) { ze_group_count_t groupCount{1, 1, 1}; - auto estimateWithNonCooperativeKernel = NEO::EncodeDispatchKernel::estimateEncodeDispatchKernelCmdsSize( - device->getNEODevice(), Vec3{0, 0, 0}, Vec3{1, 1, 1}, false, false, false, kernel.get(), true); - auto estimateWithCooperativeKernel = NEO::EncodeDispatchKernel::estimateEncodeDispatchKernelCmdsSize( - device->getNEODevice(), Vec3{0, 0, 0}, Vec3{1, 1, 1}, false, true, false, kernel.get(), true); - EXPECT_GT(estimateWithNonCooperativeKernel, estimateWithCooperativeKernel); - auto commandListWithNonCooperativeKernel = std::make_unique>>(); auto result = commandListWithNonCooperativeKernel->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto sizeBefore = commandListWithNonCooperativeKernel->commandContainer.getCommandStream()->getUsed(); result = commandListWithNonCooperativeKernel->appendLaunchKernelWithParams(kernel->toHandle(), &groupCount, nullptr, false, false, false); ASSERT_EQ(ZE_RESULT_SUCCESS, result); - auto sizeUsedWithNonCooperativeKernel = commandListWithNonCooperativeKernel->commandContainer.getCommandStream()->getUsed() - sizeBefore; - EXPECT_LE(sizeUsedWithNonCooperativeKernel, estimateWithNonCooperativeKernel); + auto sizeAfter = commandListWithNonCooperativeKernel->commandContainer.getCommandStream()->getUsed(); + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandListWithNonCooperativeKernel->commandContainer.getCommandStream()->getCpuBase(), sizeBefore), sizeAfter - sizeBefore)); + auto itorWalker = find(cmdList.begin(), cmdList.end()); + auto cmd = genCmdCast(*itorWalker); + EXPECT_TRUE(cmd->getWorkloadPartitionEnable()); auto commandListWithCooperativeKernel = std::make_unique>>(); result = commandListWithCooperativeKernel->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); @@ -1230,8 +1229,14 @@ HWTEST2_F(MultiTileCommandListAppendLaunchFunctionXeHpCoreTest, givenCooperative sizeBefore = commandListWithCooperativeKernel->commandContainer.getCommandStream()->getUsed(); result = commandListWithCooperativeKernel->appendLaunchKernelWithParams(kernel->toHandle(), &groupCount, nullptr, false, false, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); - auto sizeUsedWithCooperativeKernel = commandListWithCooperativeKernel->commandContainer.getCommandStream()->getUsed() - sizeBefore; - EXPECT_LE(sizeUsedWithCooperativeKernel, estimateWithCooperativeKernel); + sizeAfter = commandListWithCooperativeKernel->commandContainer.getCommandStream()->getUsed(); + cmdList.clear(); + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandListWithNonCooperativeKernel->commandContainer.getCommandStream()->getCpuBase(), sizeBefore), sizeAfter - sizeBefore)); + + itorWalker = find(cmdList.begin(), cmdList.end()); + cmd = genCmdCast(*itorWalker); + EXPECT_TRUE(cmd->getWorkloadPartitionEnable()); } } // namespace ult diff --git a/opencl/test/unit_test/helpers/hw_helper_tests.cpp b/opencl/test/unit_test/helpers/hw_helper_tests.cpp index ffb298a30a..646a3226ab 100644 --- a/opencl/test/unit_test/helpers/hw_helper_tests.cpp +++ b/opencl/test/unit_test/helpers/hw_helper_tests.cpp @@ -1449,3 +1449,13 @@ HWTEST2_F(HwHelperTest, givenHwInfoConfigWhenCheckingForceNonGpuCoherencyWAThenF EXPECT_FALSE(hwHelper.forceNonGpuCoherencyWA(true)); EXPECT_FALSE(hwHelper.forceNonGpuCoherencyWA(false)); } + +HWTEST_F(HwHelperTest, GivenHwInfoWhenGetBatchBufferEndSizeCalledThenCorrectSizeReturned) { + const auto &hwHelper = HwHelper::get(renderCoreFamily); + EXPECT_EQ(hwHelper.getBatchBufferEndSize(), sizeof(typename FamilyType::MI_BATCH_BUFFER_END)); +} + +HWTEST_F(HwHelperTest, GivenHwInfoWhenGetBatchBufferEndReferenceCalledThenCorrectPtrReturned) { + const auto &hwHelper = HwHelper::get(renderCoreFamily); + EXPECT_EQ(hwHelper.getBatchBufferEndReference(), reinterpret_cast(&FamilyType::cmdInitBatchBufferEnd)); +} \ No newline at end of file diff --git a/shared/source/command_container/cmdcontainer.cpp b/shared/source/command_container/cmdcontainer.cpp index 45e9dec80a..af44372761 100644 --- a/shared/source/command_container/cmdcontainer.cpp +++ b/shared/source/command_container/cmdcontainer.cpp @@ -69,8 +69,10 @@ ErrorCode CommandContainer::initialize(Device *device, AllocationsList *reusable cmdBufferAllocations.push_back(cmdBufferAllocation); - commandStream = std::unique_ptr(new LinearStream(cmdBufferAllocation->getUnderlyingBuffer(), - defaultListCmdBufferSize)); + const auto &hardwareInfo = device->getHardwareInfo(); + auto &hwHelper = NEO::HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); + commandStream = std::make_unique(cmdBufferAllocation->getUnderlyingBuffer(), + alignedSize - cmdBufferReservedSize, this, hwHelper.getBatchBufferEndSize()); commandStream->replaceGraphicsAllocation(cmdBufferAllocation); @@ -264,7 +266,8 @@ void CommandContainer::allocateNextCommandBuffer() { cmdBufferAllocations.push_back(cmdBufferAllocation); - commandStream->replaceBuffer(cmdBufferAllocation->getUnderlyingBuffer(), defaultListCmdBufferSize); + size_t alignedSize = alignUp(totalCmdBufferSize, MemoryConstants::pageSize64k); + commandStream->replaceBuffer(cmdBufferAllocation->getUnderlyingBuffer(), alignedSize - cmdBufferReservedSize); commandStream->replaceGraphicsAllocation(cmdBufferAllocation); if (!getFlushTaskUsedForImmediate()) { @@ -272,6 +275,14 @@ void CommandContainer::allocateNextCommandBuffer() { } } +void CommandContainer::closeAndAllocateNextCommandBuffer() { + auto &hwHelper = NEO::HwHelper::get(device->getHardwareInfo().platform.eRenderCoreFamily); + auto bbEndSize = hwHelper.getBatchBufferEndSize(); + auto ptr = commandStream->getSpace(0u); + memcpy_s(ptr, bbEndSize, hwHelper.getBatchBufferEndReference(), bbEndSize); + allocateNextCommandBuffer(); +} + void CommandContainer::prepareBindfulSsh() { if (ApiSpecificConfig::getBindlessConfiguration()) { if (allocationIndirectHeaps[IndirectHeap::Type::SURFACE_STATE] == nullptr) { diff --git a/shared/source/command_container/cmdcontainer.h b/shared/source/command_container/cmdcontainer.h index 884f30c49f..db84e26e47 100644 --- a/shared/source/command_container/cmdcontainer.h +++ b/shared/source/command_container/cmdcontainer.h @@ -36,10 +36,9 @@ enum class ErrorCode { class CommandContainer : public NonCopyableOrMovableClass { public: static constexpr size_t defaultListCmdBufferSize = MemoryConstants::kiloByte * 256; - static constexpr size_t totalCmdBufferSize = - defaultListCmdBufferSize + - MemoryConstants::cacheLineSize + - CSRequirements::csOverfetchSize; + static constexpr size_t cmdBufferReservedSize = MemoryConstants::cacheLineSize + + CSRequirements::csOverfetchSize; + static constexpr size_t totalCmdBufferSize = defaultListCmdBufferSize + cmdBufferReservedSize; CommandContainer(); @@ -86,6 +85,7 @@ class CommandContainer : public NonCopyableOrMovableClass { IndirectHeap *getHeapWithRequiredSizeAndAlignment(HeapType heapType, size_t sizeRequired, size_t alignment); void allocateNextCommandBuffer(); + void closeAndAllocateNextCommandBuffer(); void handleCmdBufferAllocations(size_t startIndex); GraphicsAllocation *obtainNextCommandBufferAllocation(); diff --git a/shared/source/command_container/command_encoder.h b/shared/source/command_container/command_encoder.h index f6458c5408..9921c0e50c 100644 --- a/shared/source/command_container/command_encoder.h +++ b/shared/source/command_container/command_encoder.h @@ -7,7 +7,6 @@ #pragma once #include "shared/source/command_container/cmdcontainer.h" -#include "shared/source/command_stream/linear_stream.h" #include "shared/source/debugger/debugger.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/helpers/definitions/mi_flush_args.h" @@ -64,10 +63,6 @@ struct EncodeDispatchKernel { static void *getInterfaceDescriptor(CommandContainer &container, uint32_t &iddOffset); - static size_t estimateEncodeDispatchKernelCmdsSize(Device *device, const Vec3 &groupStart, const Vec3 &groupCount, - bool isInternal, bool isCooperative, bool isIndirect, DispatchKernelEncoderI *dispatchInterface, - bool isPartitioned); - static bool isRuntimeLocalIdsGenerationRequired(uint32_t activeChannels, size_t *lws, std::array walkOrder, @@ -116,8 +111,6 @@ struct EncodeStates { const void *fnDynamicStateHeap, BindlessHeapsHelper *bindlessHeapHelper, const HardwareInfo &hwInfo); - - static size_t getAdjustStateComputeModeSize(); }; template @@ -186,9 +179,6 @@ struct EncodeIndirectParams { static void setWorkDimIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset offset, uint64_t crossThreadAddress, const uint32_t *groupSize); static void setGlobalWorkSizeIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset offsets[3], uint64_t crossThreadAddress, const uint32_t *lws); - static size_t getCmdsSizeForIndirectParams(); - static size_t getCmdsSizeForSetGroupSizeIndirect(); - static size_t getCmdsSizeForSetGroupCountIndirect(); static size_t getCmdsSizeForSetWorkDimIndirect(const uint32_t *groupSize, bool misalignedPtr); }; diff --git a/shared/source/command_container/command_encoder.inl b/shared/source/command_container/command_encoder.inl index 0f0173e1a8..49baf3513e 100644 --- a/shared/source/command_container/command_encoder.inl +++ b/shared/source/command_container/command_encoder.inl @@ -92,11 +92,6 @@ uint32_t EncodeStates::copySamplerState(IndirectHeap *dsh, return samplerStateOffsetInDsh; } // namespace NEO -template -inline size_t EncodeStates::getAdjustStateComputeModeSize() { - return 0; -} - template void EncodeMathMMIO::encodeMulRegVal(CommandContainer &container, uint32_t offset, uint32_t val, uint64_t dstAddress) { int logLws = 0; @@ -665,22 +660,6 @@ void EncodeIndirectParams::setGlobalWorkSizeIndirect(CommandContainer &c } } -template -inline size_t EncodeIndirectParams::getCmdsSizeForIndirectParams() { - return 3 * sizeof(typename Family::MI_LOAD_REGISTER_MEM); -} - -template -inline size_t EncodeIndirectParams::getCmdsSizeForSetGroupCountIndirect() { - return 3 * (sizeof(MI_STORE_REGISTER_MEM)); -} - -template -inline size_t EncodeIndirectParams::getCmdsSizeForSetGroupSizeIndirect() { - constexpr uint32_t aluCmdSize = sizeof(MI_MATH) + sizeof(MI_MATH_ALU_INST_INLINE) * NUM_ALU_INST_FOR_READ_MODIFY_WRITE; - return 3 * (sizeof(MI_LOAD_REGISTER_REG) + sizeof(MI_LOAD_REGISTER_IMM) + aluCmdSize + sizeof(MI_STORE_REGISTER_MEM)); -} - template inline size_t EncodeIndirectParams::getCmdsSizeForSetWorkDimIndirect(const uint32_t *groupSize, bool misaligedPtr) { constexpr uint32_t aluCmdSize = sizeof(MI_MATH) + sizeof(MI_MATH_ALU_INST_INLINE) * NUM_ALU_INST_FOR_READ_MODIFY_WRITE; diff --git a/shared/source/command_container/command_encoder_bdw_and_later.inl b/shared/source/command_container/command_encoder_bdw_and_later.inl index c35373d441..6c66b1d3f6 100644 --- a/shared/source/command_container/command_encoder_bdw_and_later.inl +++ b/shared/source/command_container/command_encoder_bdw_and_later.inl @@ -66,15 +66,6 @@ void EncodeDispatchKernel::encode(CommandContainer &container, if (!args.isIndirect) { threadDimsVec = {threadDims[0], threadDims[1], threadDims[2]}; } - size_t estimatedSizeRequired = estimateEncodeDispatchKernelCmdsSize(args.device, threadStartVec, threadDimsVec, - args.isInternal, args.isCooperative, args.isIndirect, - args.dispatchInterface, false); - if (container.getCommandStream()->getAvailableSpace() < estimatedSizeRequired) { - auto bbEnd = listCmdBufferStream->getSpaceForCmd(); - *bbEnd = Family::cmdInitBatchBufferEnd; - - container.allocateNextCommandBuffer(); - } WALKER_TYPE cmd = Family::cmdInitGpgpuWalker; auto idd = Family::cmdInitInterfaceDescriptorData; @@ -343,40 +334,6 @@ inline void EncodeDispatchKernel::encodeAdditionalWalkerFields(const Har template void EncodeDispatchKernel::appendAdditionalIDDFields(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const HardwareInfo &hwInfo, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) {} -template -size_t EncodeDispatchKernel::estimateEncodeDispatchKernelCmdsSize(Device *device, const Vec3 &groupStart, - const Vec3 &groupCount, bool isInternal, - bool isCooperative, bool isIndirect, DispatchKernelEncoderI *dispatchInterface, - bool isPartitioned) { - using MEDIA_STATE_FLUSH = typename Family::MEDIA_STATE_FLUSH; - using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename Family::MEDIA_INTERFACE_DESCRIPTOR_LOAD; - using MI_BATCH_BUFFER_END = typename Family::MI_BATCH_BUFFER_END; - - size_t issueMediaInterfaceDescriptorLoad = sizeof(MEDIA_STATE_FLUSH) + sizeof(MEDIA_INTERFACE_DESCRIPTOR_LOAD); - size_t totalSize = sizeof(WALKER_TYPE); - totalSize += PreemptionHelper::getPreemptionWaCsSize(*device); - totalSize += sizeof(MEDIA_STATE_FLUSH); - totalSize += issueMediaInterfaceDescriptorLoad; - totalSize += EncodeStates::getAdjustStateComputeModeSize(); - totalSize += EncodeWA::getAdditionalPipelineSelectSize(*device); - totalSize += EncodeIndirectParams::getCmdsSizeForIndirectParams(); - totalSize += EncodeIndirectParams::getCmdsSizeForSetGroupCountIndirect(); - totalSize += EncodeIndirectParams::getCmdsSizeForSetGroupSizeIndirect(); - if (isIndirect) { - UNRECOVERABLE_IF(dispatchInterface == nullptr); - totalSize += EncodeIndirectParams::getCmdsSizeForSetWorkDimIndirect(dispatchInterface->getGroupSize(), false); - if (dispatchInterface->getImplicitArgs()) { - totalSize += EncodeIndirectParams::getCmdsSizeForSetGroupCountIndirect(); - totalSize += EncodeIndirectParams::getCmdsSizeForSetGroupSizeIndirect(); - totalSize += EncodeIndirectParams::getCmdsSizeForSetWorkDimIndirect(dispatchInterface->getGroupSize(), true); - } - } - - totalSize += sizeof(MI_BATCH_BUFFER_END); - - return totalSize; -} - template inline void EncodeComputeMode::programComputeModeCommand(LinearStream &csr, StateComputeModeProperties &properties, const HardwareInfo &hwInfo) { } diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index cdae17c5e0..b33a302d4a 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -63,15 +63,6 @@ void EncodeDispatchKernel::encode(CommandContainer &container, if (!args.isIndirect) { threadDimsVec = {threadDims[0], threadDims[1], threadDims[2]}; } - size_t estimatedSizeRequired = estimateEncodeDispatchKernelCmdsSize(args.device, threadStartVec, threadDimsVec, - args.isInternal, args.isCooperative, args.isIndirect, args.dispatchInterface, - args.partitionCount > 1); - if (container.getCommandStream()->getAvailableSpace() < estimatedSizeRequired) { - auto bbEnd = listCmdBufferStream->getSpaceForCmd(); - *bbEnd = Family::cmdInitBatchBufferEnd; - - container.allocateNextCommandBuffer(); - } bool specialModeRequired = kernelDescriptor.kernelAttributes.flags.usesSpecialPipelineSelectMode; if (PreambleHelper::isSpecialPipelineSelectModeChanged(container.lastPipelineSelectModeRequired, specialModeRequired, hwInfo)) { @@ -448,36 +439,6 @@ void EncodeDispatchKernel::encodeThreadData(WALKER_TYPE &walkerCmd, } } -template -size_t EncodeDispatchKernel::estimateEncodeDispatchKernelCmdsSize(Device *device, const Vec3 &groupStart, - const Vec3 &groupCount, bool isInternal, - bool isCooperative, bool isIndirect, DispatchKernelEncoderI *dispatchInterface, - bool isPartitioned) { - size_t totalSize = sizeof(WALKER_TYPE); - totalSize += PreemptionHelper::getPreemptionWaCsSize(*device); - totalSize += EncodeStates::getAdjustStateComputeModeSize(); - totalSize += EncodeIndirectParams::getCmdsSizeForIndirectParams(); - totalSize += EncodeIndirectParams::getCmdsSizeForSetGroupCountIndirect(); - totalSize += EncodeIndirectParams::getCmdsSizeForSetGroupSizeIndirect(); - if (isIndirect) { - UNRECOVERABLE_IF(dispatchInterface == nullptr); - totalSize += EncodeIndirectParams::getCmdsSizeForSetWorkDimIndirect(dispatchInterface->getGroupSize(), false); - if (dispatchInterface->getImplicitArgs()) { - totalSize += EncodeIndirectParams::getCmdsSizeForSetGroupCountIndirect(); - totalSize += EncodeIndirectParams::getCmdsSizeForSetGroupSizeIndirect(); - totalSize += EncodeIndirectParams::getCmdsSizeForSetWorkDimIndirect(dispatchInterface->getGroupSize(), true); - } - } - - if ((isPartitioned && !isCooperative) && - !isInternal) { - const bool staticPartitioning = device->getDefaultEngine().commandStreamReceiver->isStaticWorkPartitioningEnabled(); - totalSize += ImplicitScalingDispatch::getSize(true, staticPartitioning, device->getDeviceBitfield(), groupStart, groupCount); - } - - return totalSize; -} - template void EncodeStateBaseAddress::setIohAddressForDebugger(NEO::Debugger::SbaAddresses &sbaAddress, const STATE_BASE_ADDRESS &sbaCmd) { } diff --git a/shared/source/command_container/implicit_scaling_xehp_and_later.inl b/shared/source/command_container/implicit_scaling_xehp_and_later.inl index b00bef2554..86b78b32fa 100644 --- a/shared/source/command_container/implicit_scaling_xehp_and_later.inl +++ b/shared/source/command_container/implicit_scaling_xehp_and_later.inl @@ -100,8 +100,10 @@ void ImplicitScalingDispatch::dispatchCommands(LinearStream &commandS staticPartitioning, useSecondaryBatchBuffer); - uint64_t cmdBufferGpuAddress = commandStream.getGraphicsAllocation()->getGpuAddress() + commandStream.getUsed(); - void *commandBuffer = commandStream.getSpace(0u); + auto dispatchCommandsSize = getSize(apiSelfCleanup, preferStaticPartitioning, devices, {walkerCmd.getThreadGroupIdStartingX(), walkerCmd.getThreadGroupIdStartingY(), walkerCmd.getThreadGroupIdStartingZ()}, {walkerCmd.getThreadGroupIdXDimension(), walkerCmd.getThreadGroupIdYDimension(), walkerCmd.getThreadGroupIdZDimension()}); + void *commandBuffer = commandStream.getSpace(dispatchCommandsSize); + uint64_t cmdBufferGpuAddress = commandStream.getGraphicsAllocation()->getGpuAddress() + commandStream.getUsed() - dispatchCommandsSize; + if (staticPartitioning) { UNRECOVERABLE_IF(tileCount != partitionCount); WalkerPartition::constructStaticallyPartitionedCommandBuffer(commandBuffer, @@ -126,7 +128,7 @@ void ImplicitScalingDispatch::dispatchCommands(LinearStream &commandS args, hwInfo); } - commandStream.getSpace(totalProgrammedSize); + UNRECOVERABLE_IF(totalProgrammedSize != dispatchCommandsSize); } template @@ -166,8 +168,9 @@ void ImplicitScalingDispatch::dispatchBarrierCommands(LinearStream &c args.postSyncGpuAddress = gpuAddress; args.postSyncImmediateValue = immediateData; - uint64_t cmdBufferGpuAddress = commandStream.getGraphicsAllocation()->getGpuAddress() + commandStream.getUsed(); - void *commandBuffer = commandStream.getSpace(0u); + auto barrierCommandsSize = getBarrierSize(hwInfo, apiSelfCleanup, args.usePostSync); + void *commandBuffer = commandStream.getSpace(barrierCommandsSize); + uint64_t cmdBufferGpuAddress = commandStream.getGraphicsAllocation()->getGpuAddress() + commandStream.getUsed() - barrierCommandsSize; WalkerPartition::constructBarrierCommandBuffer(commandBuffer, cmdBufferGpuAddress, @@ -175,7 +178,7 @@ void ImplicitScalingDispatch::dispatchBarrierCommands(LinearStream &c args, flushArgs, hwInfo); - commandStream.getSpace(totalProgrammedSize); + UNRECOVERABLE_IF(totalProgrammedSize != barrierCommandsSize); } template diff --git a/shared/source/command_stream/command_stream_receiver.h b/shared/source/command_stream/command_stream_receiver.h index 07abed92e3..ef69a4e252 100644 --- a/shared/source/command_stream/command_stream_receiver.h +++ b/shared/source/command_stream/command_stream_receiver.h @@ -273,7 +273,7 @@ class CommandStreamReceiver { uint64_t getWorkPartitionAllocationGpuAddress() const; - bool isRcs() const; + MOCKABLE_VIRTUAL bool isRcs() const; virtual void initializeDefaultsForInternalEngine(){}; diff --git a/shared/source/command_stream/linear_stream.cpp b/shared/source/command_stream/linear_stream.cpp index 8ae903ddb0..5c2f07e8f6 100644 --- a/shared/source/command_stream/linear_stream.cpp +++ b/shared/source/command_stream/linear_stream.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -33,4 +33,10 @@ LinearStream::LinearStream(GraphicsAllocation *gfxAllocation) LinearStream::LinearStream() : LinearStream(nullptr) { } + +LinearStream::LinearStream(void *buffer, size_t bufferSize, CommandContainer *cmdContainer, size_t batchBufferEndSize) + : LinearStream(buffer, bufferSize) { + this->cmdContainer = cmdContainer; + this->batchBufferEndSize = batchBufferEndSize; +} } // namespace NEO diff --git a/shared/source/command_stream/linear_stream.h b/shared/source/command_stream/linear_stream.h index ba0169be80..6243e512a3 100644 --- a/shared/source/command_stream/linear_stream.h +++ b/shared/source/command_stream/linear_stream.h @@ -1,13 +1,16 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once +#include "shared/source/command_container/cmdcontainer.h" #include "shared/source/helpers/debug_helpers.h" +#include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/ptr_math.h" +#include "shared/source/helpers/string.h" #include #include @@ -23,6 +26,7 @@ class LinearStream { LinearStream(void *buffer, size_t bufferSize); LinearStream(GraphicsAllocation *buffer); LinearStream(GraphicsAllocation *gfxAllocation, void *buffer, size_t bufferSize); + LinearStream(void *buffer, size_t bufferSize, CommandContainer *cmdContainer, size_t batchBufferEndSize); void *getCpuBase() const; void *getSpace(size_t size); size_t getMaxAvailableSpace() const; @@ -44,6 +48,8 @@ class LinearStream { size_t maxAvailableSpace; void *buffer; GraphicsAllocation *graphicsAllocation; + CommandContainer *cmdContainer = nullptr; + size_t batchBufferEndSize = 0; }; inline void *LinearStream::getCpuBase() const { @@ -51,6 +57,10 @@ inline void *LinearStream::getCpuBase() const { } inline void *LinearStream::getSpace(size_t size) { + if (cmdContainer != nullptr && getAvailableSpace() < batchBufferEndSize + size) { + UNRECOVERABLE_IF(sizeUsed + batchBufferEndSize > maxAvailableSpace); + cmdContainer->closeAndAllocateNextCommandBuffer(); + } UNRECOVERABLE_IF(sizeUsed + size > maxAvailableSpace); auto memory = ptrOffset(buffer, sizeUsed); sizeUsed += size; diff --git a/shared/source/gen12lp/command_encoder_gen12lp.cpp b/shared/source/gen12lp/command_encoder_gen12lp.cpp index f2f547a819..c121a997d5 100644 --- a/shared/source/gen12lp/command_encoder_gen12lp.cpp +++ b/shared/source/gen12lp/command_encoder_gen12lp.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2021 Intel Corporation + * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -32,11 +32,6 @@ size_t EncodeWA::getAdditionalPipelineSelectSize(Device &device) { return size; } -template <> -size_t EncodeStates::getAdjustStateComputeModeSize() { - return sizeof(typename Family::STATE_COMPUTE_MODE); -} - template <> void EncodeComputeMode::programComputeModeCommand(LinearStream &csr, StateComputeModeProperties &properties, const HardwareInfo &hwInfo) { using STATE_COMPUTE_MODE = typename Family::STATE_COMPUTE_MODE; diff --git a/shared/source/helpers/hw_helper.h b/shared/source/helpers/hw_helper.h index 18e04914e6..97398174ca 100644 --- a/shared/source/helpers/hw_helper.h +++ b/shared/source/helpers/hw_helper.h @@ -9,7 +9,6 @@ #include "shared/source/aub_mem_dump/aub_mem_dump.h" #include "shared/source/built_ins/sip.h" #include "shared/source/command_container/command_encoder.h" -#include "shared/source/command_stream/linear_stream.h" #include "shared/source/commands/bxml_generator_glue.h" #include "shared/source/helpers/aux_translation.h" #include "shared/source/helpers/definitions/engine_group_types.h" @@ -28,6 +27,7 @@ namespace NEO { class GmmHelper; class GraphicsAllocation; class TagAllocatorBase; +class LinearSteram; class Gmm; struct AllocationData; struct AllocationProperties; @@ -155,6 +155,8 @@ class HwHelper { virtual bool forceNonGpuCoherencyWA(bool requiresCoherency) const = 0; virtual bool platformSupportsImplicitScaling(const NEO::HardwareInfo &hwInfo) const = 0; virtual bool isLinuxCompletionFenceSupported() const = 0; + virtual size_t getBatchBufferEndSize() const = 0; + virtual const void *getBatchBufferEndReference() const = 0; protected: HwHelper() = default; @@ -391,6 +393,8 @@ class HwHelperHw : public HwHelper { bool forceNonGpuCoherencyWA(bool requiresCoherency) const override; bool platformSupportsImplicitScaling(const NEO::HardwareInfo &hwInfo) const override; bool isLinuxCompletionFenceSupported() const override; + size_t getBatchBufferEndSize() const override; + const void *getBatchBufferEndReference() const override; protected: static const AuxTranslationMode defaultAuxTranslationMode; diff --git a/shared/source/helpers/hw_helper_base.inl b/shared/source/helpers/hw_helper_base.inl index 54a587da58..23ad870298 100644 --- a/shared/source/helpers/hw_helper_base.inl +++ b/shared/source/helpers/hw_helper_base.inl @@ -710,4 +710,12 @@ template bool HwHelperHw::forceNonGpuCoherencyWA(bool requiresCoherency) const { return requiresCoherency; } +template +size_t HwHelperHw::getBatchBufferEndSize() const { + return sizeof(typename GfxFamily::MI_BATCH_BUFFER_END); +} +template +const void *HwHelperHw::getBatchBufferEndReference() const { + return reinterpret_cast(&GfxFamily::cmdInitBatchBufferEnd); +} } // namespace NEO diff --git a/shared/source/helpers/preamble_bdw_and_later.inl b/shared/source/helpers/preamble_bdw_and_later.inl index 28c041d663..a395e5d3f4 100644 --- a/shared/source/helpers/preamble_bdw_and_later.inl +++ b/shared/source/helpers/preamble_bdw_and_later.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2021 Intel Corporation + * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -7,7 +7,6 @@ #include "shared/source/command_stream/stream_properties.h" #include "shared/source/helpers/flat_batch_buffer_helper.h" -#include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/preamble_base.inl" #include "shared/source/kernel/kernel_execution_type.h" diff --git a/shared/source/utilities/software_tags_manager.h b/shared/source/utilities/software_tags_manager.h index 8a95243af2..360a8c65ed 100644 --- a/shared/source/utilities/software_tags_manager.h +++ b/shared/source/utilities/software_tags_manager.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -7,6 +7,7 @@ #pragma once #include "shared/source/command_container/command_encoder.h" +#include "shared/source/command_stream/linear_stream.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/utilities/software_tags.h" diff --git a/shared/test/common/gen12lp/command_encoder_tests_gen12lp.cpp b/shared/test/common/gen12lp/command_encoder_tests_gen12lp.cpp index 8c7d8ec2bd..5c7d2e9cdd 100644 --- a/shared/test/common/gen12lp/command_encoder_tests_gen12lp.cpp +++ b/shared/test/common/gen12lp/command_encoder_tests_gen12lp.cpp @@ -1,11 +1,14 @@ /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/command_encoder.h" +#include "shared/source/helpers/preamble.h" +#include "shared/test/common/mocks/mock_command_stream_receiver.h" +#include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/test_macros/test.h" using namespace NEO; @@ -14,3 +17,30 @@ using Gen12LpCommandEncodeTest = testing::Test; GEN12LPTEST_F(Gen12LpCommandEncodeTest, givenGen12LpPlatformWhenDoBindingTablePrefetchIsCalledThenReturnsTrue) { EXPECT_FALSE(EncodeSurfaceState::doBindingTablePrefetch()); } + +template +class MyCommandStreamReceiverMock : public MockCommandStreamReceiver { + public: + MyCommandStreamReceiverMock(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield) : MockCommandStreamReceiver(executionEnvironment, rootDeviceIndex, deviceBitfield) {} + bool isRcs() const override { + return rcs; + } +}; + +GEN12LPTEST_F(Gen12LpCommandEncodeTest, givenGen12LpPlatformWhenDefaultEngineIsRcsThenAdditionalPipelineSelectSizeEqualTwoPipelineSelectSize) { + MockDevice device; + auto csr = std::make_unique>(*device.getExecutionEnvironment(), 0, device.getDeviceBitfield()); + auto oldCsr = device.getDefaultEngine().commandStreamReceiver; + device.getDefaultEngine().commandStreamReceiver = csr.get(); + EXPECT_EQ(2 * PreambleHelper::getCmdSizeForPipelineSelect(device.getHardwareInfo()), EncodeWA::getAdditionalPipelineSelectSize(device)); + device.getDefaultEngine().commandStreamReceiver = oldCsr; +} + +GEN12LPTEST_F(Gen12LpCommandEncodeTest, givenGen12LpPlatformWhenDefaultEngineIsNotRcsThenAdditionalPipelineSelectSizeEqualZero) { + MockDevice device; + auto csr = std::make_unique>(*device.getExecutionEnvironment(), 0, device.getDeviceBitfield()); + auto oldCsr = device.getDefaultEngine().commandStreamReceiver; + device.getDefaultEngine().commandStreamReceiver = csr.get(); + EXPECT_EQ(0u, EncodeWA::getAdditionalPipelineSelectSize(device)); + device.getDefaultEngine().commandStreamReceiver = oldCsr; +} \ No newline at end of file diff --git a/shared/test/common/gen12lp/test_command_encoder_gen12lp.cpp b/shared/test/common/gen12lp/test_command_encoder_gen12lp.cpp index 44ae10826c..690029caf8 100644 --- a/shared/test/common/gen12lp/test_command_encoder_gen12lp.cpp +++ b/shared/test/common/gen12lp/test_command_encoder_gen12lp.cpp @@ -103,22 +103,6 @@ GEN12LPTEST_F(CommandEncoderTest, givenVariousEngineTypesWhenEncodeSBAThenAdditi } } -GEN12LPTEST_F(CommandEncoderTest, givenVariousEngineTypesWhenEstimateCommandBufferSizeThenRcsHasAdditionalPipelineSelectWASize) { - using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT; - using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE; - - auto sizeWA = EncodeDispatchKernel::estimateEncodeDispatchKernelCmdsSize(pDevice, Vec3(0, 0, 0), - Vec3(1, 1, 1), false, false, false, nullptr, false); - static_cast(pDevice->getDefaultEngine().osContext)->engineType = aub_stream::ENGINE_CCS; - auto size = EncodeDispatchKernel::estimateEncodeDispatchKernelCmdsSize(pDevice, Vec3(0, 0, 0), - Vec3(1, 1, 1), false, false, false, nullptr, false); - - auto expectedDiff = 2 * PreambleHelper::getCmdSizeForPipelineSelect(pDevice->getHardwareInfo()); - auto diff = sizeWA - size; - - EXPECT_EQ(expectedDiff, diff); -} - GEN12LPTEST_F(CommandEncoderTest, GivenGen12LpWhenProgrammingL3StateOnThenExpectNoCommandsDispatched) { using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; diff --git a/shared/test/common/gen12lp/test_encode_gen12lp.cpp b/shared/test/common/gen12lp/test_encode_gen12lp.cpp index 3a6c57c292..96272bf93b 100644 --- a/shared/test/common/gen12lp/test_encode_gen12lp.cpp +++ b/shared/test/common/gen12lp/test_encode_gen12lp.cpp @@ -1,11 +1,12 @@ /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/command_encoder.h" +#include "shared/source/command_stream/linear_stream.h" #include "shared/source/command_stream/stream_properties.h" #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/test_macros/test.h" diff --git a/shared/test/common/xe_hpg_core/dg2/test_encode_dg2.cpp b/shared/test/common/xe_hpg_core/dg2/test_encode_dg2.cpp index a63d39925e..17191201b0 100644 --- a/shared/test/common/xe_hpg_core/dg2/test_encode_dg2.cpp +++ b/shared/test/common/xe_hpg_core/dg2/test_encode_dg2.cpp @@ -1,11 +1,12 @@ /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/command_encoder.h" +#include "shared/source/command_stream/linear_stream.h" #include "shared/source/command_stream/stream_properties.h" #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/test_macros/test.h" diff --git a/shared/test/unit_test/command_container/command_container_tests.cpp b/shared/test/unit_test/command_container/command_container_tests.cpp index 8647c2abbe..21fdcf2429 100644 --- a/shared/test/unit_test/command_container/command_container_tests.cpp +++ b/shared/test/unit_test/command_container/command_container_tests.cpp @@ -6,6 +6,7 @@ */ #include "shared/source/command_container/cmdcontainer.h" +#include "shared/source/command_stream/linear_stream.h" #include "shared/source/memory_manager/allocations_list.h" #include "shared/test/common/fixtures/device_fixture.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" @@ -475,7 +476,8 @@ TEST_F(CommandContainerTest, whenAllocateNextCmdBufferIsCalledThenNewAllocationI EXPECT_NE(nullptr, nextBuffer); EXPECT_EQ(0u, sizeUsed); EXPECT_NE(initialBuffer, nextBuffer); - const size_t cmdBufSize = CommandContainer::defaultListCmdBufferSize; + size_t alignedSize = alignUp(CommandContainer::totalCmdBufferSize, MemoryConstants::pageSize64k); + const size_t cmdBufSize = alignedSize - CommandContainer::cmdBufferReservedSize; EXPECT_EQ(cmdBufSize, availableSize); ASSERT_EQ(2u, cmdContainer->getCmdBufferAllocations().size()); @@ -682,3 +684,48 @@ TEST_F(CommandContainerTest, givenContainerAllocatesNextCommandBufferWhenResetin } EXPECT_TRUE(firstAllocationFound); } + +class MyLinearStreamMock : public LinearStream { + public: + using LinearStream::cmdContainer; +}; + +TEST_F(CommandContainerTest, givenCmdContainerWhenContainerIsInitializedThenStreamContainsContainerPtr) { + CommandContainer cmdContainer; + cmdContainer.initialize(pDevice, nullptr); + + EXPECT_EQ(reinterpret_cast(cmdContainer.getCommandStream())->cmdContainer, &cmdContainer); +} + +TEST_F(CommandContainerTest, givenCmdContainerWhenContainerIsInitializedThenStreamSizeEqualAlignedTotalCmdBuffSizeDecreasedOfReservedSize) { + CommandContainer cmdContainer; + cmdContainer.initialize(pDevice, nullptr); + size_t alignedSize = alignUp(CommandContainer::totalCmdBufferSize, MemoryConstants::pageSize64k); + EXPECT_EQ(cmdContainer.getCommandStream()->getMaxAvailableSpace(), alignedSize - CommandContainer::cmdBufferReservedSize); +} + +TEST_F(CommandContainerTest, givenCmdContainerWhenAlocatingNextCmdBufferThenStreamSizeEqualAlignedTotalCmdBuffSizeDecreasedOfReservedSize) { + CommandContainer cmdContainer; + cmdContainer.initialize(pDevice, nullptr); + cmdContainer.allocateNextCommandBuffer(); + size_t alignedSize = alignUp(CommandContainer::totalCmdBufferSize, MemoryConstants::pageSize64k); + EXPECT_EQ(cmdContainer.getCommandStream()->getMaxAvailableSpace(), alignedSize - CommandContainer::cmdBufferReservedSize); +} + +TEST_F(CommandContainerTest, givenCmdContainerWhenCloseAndAllocateNextCommandBufferCalledThenBBEndPlacedAtEndOfLinearStream) { + CommandContainer cmdContainer; + cmdContainer.initialize(pDevice, nullptr); + auto &hwInfo = pDevice->getHardwareInfo(); + auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); + auto ptr = cmdContainer.getCommandStream()->getSpace(0u); + cmdContainer.closeAndAllocateNextCommandBuffer(); + EXPECT_EQ(memcmp(ptr, hwHelper.getBatchBufferEndReference(), hwHelper.getBatchBufferEndSize()), 0); +} + +TEST_F(CommandContainerTest, givenCmdContainerWhenCloseAndAllocateNextCommandBufferCalledThenNewCmdBufferAllocationCreated) { + CommandContainer cmdContainer; + cmdContainer.initialize(pDevice, nullptr); + EXPECT_EQ(cmdContainer.getCmdBufferAllocations().size(), 1u); + cmdContainer.closeAndAllocateNextCommandBuffer(); + EXPECT_EQ(cmdContainer.getCmdBufferAllocations().size(), 2u); +} \ No newline at end of file diff --git a/shared/test/unit_test/command_container/command_encoder_tests.cpp b/shared/test/unit_test/command_container/command_encoder_tests.cpp index 9f87cd371d..cd6bd076ae 100644 --- a/shared/test/unit_test/command_container/command_encoder_tests.cpp +++ b/shared/test/unit_test/command_container/command_encoder_tests.cpp @@ -6,6 +6,7 @@ */ #include "shared/source/command_container/command_encoder.h" +#include "shared/source/command_stream/linear_stream.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/helpers/unit_test_helper.h" diff --git a/shared/test/unit_test/command_stream/linear_stream_tests.cpp b/shared/test/unit_test/command_stream/linear_stream_tests.cpp index 19967a3448..a65c92b087 100644 --- a/shared/test/unit_test/command_stream/linear_stream_tests.cpp +++ b/shared/test/unit_test/command_stream/linear_stream_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -7,6 +7,7 @@ #include "shared/source/command_stream/linear_stream.h" #include "shared/source/memory_manager/graphics_allocation.h" +#include "shared/test/common/fixtures/device_fixture.h" #include "shared/test/common/fixtures/linear_stream_fixture.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" @@ -109,3 +110,87 @@ TEST_F(LinearStreamTest, givenNewGraphicsAllocationWhenReplaceIsCalledThenLinear linearStream.replaceGraphicsAllocation(&newGraphicsAllocation); EXPECT_EQ(&newGraphicsAllocation, linearStream.getGraphicsAllocation()); } + +class MyLinearStreamMock : public LinearStream { + public: + using LinearStream::sizeUsed; +}; + +TEST_F(LinearStreamTest, givenLinearStreamWithoutCmdContainerWhenOneByteLeftInStreamThenGetSpaceDontThrowAbort) { + reinterpret_cast(&linearStream)->sizeUsed = linearStream.getMaxAvailableSpace() - 1; + EXPECT_NO_THROW(linearStream.getSpace(1)); +} +using CommandContainerLinearStreamTest = Test; +TEST_F(CommandContainerLinearStreamTest, givenLinearStreamWithCmdContainerWhenOneByteLeftInStreamThenGetSpaceThrowAbort) { + CommandContainer cmdContainer; + cmdContainer.initialize(pDevice, nullptr); + auto stream = reinterpret_cast(cmdContainer.getCommandStream()); + stream->sizeUsed = stream->getMaxAvailableSpace() - 1; + EXPECT_THROW(stream->getSpace(1), std::exception); +} + +TEST_F(CommandContainerLinearStreamTest, givenLinearStreamWithCmdContainerWhenThereIsNoSpaceForCommandAndBBEndThenNewCmdBufferAllocated) { + CommandContainer cmdContainer; + cmdContainer.initialize(pDevice, nullptr); + auto &hwInfo = pDevice->getHardwareInfo(); + auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); + auto stream = reinterpret_cast(cmdContainer.getCommandStream()); + size_t dummyCommandSize = 2; + stream->sizeUsed = stream->getMaxAvailableSpace() - hwHelper.getBatchBufferEndSize() - (dummyCommandSize - 1); + EXPECT_EQ(cmdContainer.getCmdBufferAllocations().size(), 1u); + stream->getSpace(dummyCommandSize); + EXPECT_EQ(cmdContainer.getCmdBufferAllocations().size(), 2u); +} + +TEST_F(CommandContainerLinearStreamTest, givenLinearStreamWithCmdContainerWhenThereIsNoSpaceForCommandAndBBEndThenLinearStreamHasNewAllocation) { + CommandContainer cmdContainer; + cmdContainer.initialize(pDevice, nullptr); + auto &hwInfo = pDevice->getHardwareInfo(); + auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); + auto stream = reinterpret_cast(cmdContainer.getCommandStream()); + size_t dummyCommandSize = 2; + stream->sizeUsed = stream->getMaxAvailableSpace() - hwHelper.getBatchBufferEndSize() - (dummyCommandSize - 1); + auto oldBuffer = stream->getCpuBase(); + stream->getSpace(dummyCommandSize); + auto newBuffer = stream->getCpuBase(); + EXPECT_NE(newBuffer, oldBuffer); +} + +TEST_F(CommandContainerLinearStreamTest, givenLinearStreamWithCmdContainerWhenThereIsNoSpaceForCommandAndBBEndThenGetSpaceReturnPtrFromNewAllocation) { + CommandContainer cmdContainer; + cmdContainer.initialize(pDevice, nullptr); + auto &hwInfo = pDevice->getHardwareInfo(); + auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); + auto stream = reinterpret_cast(cmdContainer.getCommandStream()); + size_t dummyCommandSize = 2; + stream->sizeUsed = stream->getMaxAvailableSpace() - hwHelper.getBatchBufferEndSize() - (dummyCommandSize - 1); + auto ptr = stream->getSpace(dummyCommandSize); + auto buffer = stream->getCpuBase(); + EXPECT_EQ(buffer, ptr); +} + +TEST_F(CommandContainerLinearStreamTest, givenLinearStreamWithCmdContainerWhenThereIsSpaceForCommandAndBBEndThenNewCmdBufferIsNotAllocated) { + CommandContainer cmdContainer; + cmdContainer.initialize(pDevice, nullptr); + auto &hwInfo = pDevice->getHardwareInfo(); + auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); + auto stream = reinterpret_cast(cmdContainer.getCommandStream()); + size_t dummyCommandSize = 2; + stream->sizeUsed = stream->getMaxAvailableSpace() - hwHelper.getBatchBufferEndSize() - (dummyCommandSize); + EXPECT_EQ(cmdContainer.getCmdBufferAllocations().size(), 1u); + stream->getSpace(dummyCommandSize); + EXPECT_EQ(cmdContainer.getCmdBufferAllocations().size(), 1u); +} + +TEST_F(CommandContainerLinearStreamTest, givenLinearStreamWithCmdContainerWhenThereIsNoSpaceForCommandAndBBEndThenBBEndAddedAtEndOfStream) { + CommandContainer cmdContainer; + cmdContainer.initialize(pDevice, nullptr); + auto &hwInfo = pDevice->getHardwareInfo(); + auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); + auto stream = reinterpret_cast(cmdContainer.getCommandStream()); + size_t dummyCommandSize = 2; + stream->sizeUsed = stream->getMaxAvailableSpace() - hwHelper.getBatchBufferEndSize() - (dummyCommandSize - 1); + auto ptr = stream->getSpace(0u); + stream->getSpace(dummyCommandSize); + EXPECT_EQ(memcmp(ptr, hwHelper.getBatchBufferEndReference(), hwHelper.getBatchBufferEndSize()), 0); +} \ No newline at end of file diff --git a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp index 113b6050ce..220ca03cef 100644 --- a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp +++ b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp @@ -955,8 +955,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesDynamicImplicitScaling, givenImp bool requiresUncachedMocs = false; bool isInternal = false; - size_t regularEstimateSize = EncodeDispatchKernel::estimateEncodeDispatchKernelCmdsSize( - pDevice, Vec3(0, 0, 0), Vec3(16, 1, 1), isInternal, false, false, nullptr, false); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); dispatchArgs.isInternal = isInternal; @@ -972,8 +970,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesDynamicImplicitScaling, givenImp EXPECT_EQ(WALKER_TYPE::PARTITION_TYPE::PARTITION_TYPE_DISABLED, baseWalkerCmd->getPartitionType()); EXPECT_EQ(16u, baseWalkerCmd->getThreadGroupIdXDimension()); - size_t partitionEstimateSize = EncodeDispatchKernel::estimateEncodeDispatchKernelCmdsSize( - pDevice, Vec3(0, 0, 0), Vec3(16, 1, 1), isInternal, false, false, nullptr, true); dispatchArgs.partitionCount = 2; EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); @@ -982,7 +978,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesDynamicImplicitScaling, givenImp size_t expectedPartitionedWalkerSize = ImplicitScalingDispatch::getSize(true, false, pDevice->getDeviceBitfield(), Vec3(0, 0, 0), Vec3(16, 1, 1)); EXPECT_EQ(expectedPartitionedWalkerSize, partitionedWalkerSize); - EXPECT_EQ(partitionEstimateSize, regularEstimateSize + expectedPartitionedWalkerSize); GenCmdList partitionedWalkerList; CmdParse::parseCommandBuffer( @@ -1020,23 +1015,18 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesDynamicImplicitScaling, givenImp std::unique_ptr dispatchInterface(new MockDispatchKernelEncoder()); bool isInternal = false; - size_t baseEstimateSize = EncodeDispatchKernel::estimateEncodeDispatchKernelCmdsSize( - pDevice, Vec3(0, 0, 0), Vec3(16, 1, 1), isInternal, false, false, dispatchInterface.get(), false); bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); dispatchArgs.isInternal = isInternal; dispatchArgs.partitionCount = 2; - size_t partitionEstimateSize = EncodeDispatchKernel::estimateEncodeDispatchKernelCmdsSize( - pDevice, Vec3(0, 0, 0), Vec3(16, 1, 1), isInternal, false, false, dispatchInterface.get(), true); EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); EXPECT_EQ(2u, dispatchArgs.partitionCount); size_t partitionedWalkerSize = cmdContainer->getCommandStream()->getUsed(); size_t expectedPartitionedWalkerSize = ImplicitScalingDispatch::getSize(true, false, pDevice->getDeviceBitfield(), Vec3(0, 0, 0), Vec3(16, 1, 1)); - EXPECT_EQ(partitionEstimateSize, baseEstimateSize + expectedPartitionedWalkerSize); EXPECT_EQ(expectedPartitionedWalkerSize, partitionedWalkerSize); GenCmdList partitionedWalkerList; @@ -1124,23 +1114,17 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesDynamicImplicitScaling, std::unique_ptr dispatchInterface(new MockDispatchKernelEncoder()); bool isInternal = false; - size_t baseEstimateSize = EncodeDispatchKernel::estimateEncodeDispatchKernelCmdsSize( - pDevice, Vec3(0, 0, 0), Vec3(16, 1, 1), isInternal, false, false, dispatchInterface.get(), false); - bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); dispatchArgs.isInternal = isInternal; dispatchArgs.partitionCount = 2; - size_t partitionEstimateSize = EncodeDispatchKernel::estimateEncodeDispatchKernelCmdsSize( - pDevice, Vec3(0, 0, 0), Vec3(16, 1, 1), isInternal, false, false, dispatchInterface.get(), true); EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); EXPECT_EQ(2u, dispatchArgs.partitionCount); size_t partitionedWalkerSize = cmdContainer->getCommandStream()->getUsed(); size_t expectedPartitionedWalkerSize = ImplicitScalingDispatch::getSize(true, false, pDevice->getDeviceBitfield(), Vec3(0, 0, 0), Vec3(16, 1, 1)); - EXPECT_EQ(partitionEstimateSize, baseEstimateSize + expectedPartitionedWalkerSize); EXPECT_EQ(expectedPartitionedWalkerSize, partitionedWalkerSize); GenCmdList partitionedWalkerList; @@ -1187,20 +1171,12 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesDynamicImplicitScaling, givenImp uint32_t dims[] = {16, 1, 1}; std::unique_ptr dispatchInterface(new MockDispatchKernelEncoder()); - bool isInternal = false; - size_t baseEstimateSize = EncodeDispatchKernel::estimateEncodeDispatchKernelCmdsSize( - pDevice, Vec3(0, 0, 0), Vec3(16, 1, 1), isInternal, false, false, dispatchInterface.get(), false); - - isInternal = true; + bool isInternal = true; bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); dispatchArgs.isInternal = isInternal; dispatchArgs.partitionCount = 2; - size_t internalEstimateSize = EncodeDispatchKernel::estimateEncodeDispatchKernelCmdsSize( - pDevice, Vec3(0, 0, 0), Vec3(16, 1, 1), isInternal, false, false, dispatchInterface.get(), true); - EXPECT_EQ(baseEstimateSize, internalEstimateSize); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); size_t internalWalkerSize = cmdContainer->getCommandStream()->getUsed();