From 7e0401d28041b7db592505f5b0b1ab8e88f7ccd4 Mon Sep 17 00:00:00 2001 From: Zbigniew Zdanowicz Date: Thu, 2 Feb 2023 18:57:24 +0000 Subject: [PATCH] Add improvements to heap estimation in level zero command lists - add estimation parameter for interface descriptor data count - add to the heap estimation alignment parameter for dynamic and surface heaps - extend encode interface and implementations to allow child heaps Related-To: NEO-5055 Signed-off-by: Zbigniew Zdanowicz --- .../core/source/cmdlist/cmdlist_hw_base.inl | 5 +- .../cmdlist/cmdlist_hw_xehp_and_later.inl | 5 +- .../sources/cmdlist/test_cmdlist_6.cpp | 7 ++- .../source/command_container/cmdcontainer.cpp | 19 ++++-- .../source/command_container/cmdcontainer.h | 3 +- .../command_container/command_encoder.h | 13 ++-- .../command_container/command_encoder.inl | 33 +++++++---- .../command_encoder_bdw_and_later.inl | 31 ++++++---- .../command_encoder_xehp_and_later.inl | 4 +- .../command_stream_receiver.cpp | 8 +++ .../command_stream/command_stream_receiver.h | 1 + shared/test/common/helpers/unit_test_helper.h | 2 +- .../unit_test_helper_bdw_and_later.inl | 6 +- .../unit_test_helper_xehp_and_later.inl | 4 +- .../command_container_tests.cpp | 59 +++++++++++++++---- .../encoders/test_encode_dispatch_kernel.cpp | 12 ++-- ...test_encode_media_interface_descriptor.cpp | 4 +- ...ia_interface_descriptor_xehp_and_later.cpp | 4 +- 18 files changed, 154 insertions(+), 66 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl index 52ba9dd4d2..a6ab5f2557 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl @@ -52,9 +52,12 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K const auto kernelImmutableData = kernel->getImmutableData(); if (this->immediateCmdListHeapSharing || this->stateBaseAddressTracking) { auto kernelInfo = kernelImmutableData->getKernelInfo(); + commandContainer.ensureHeapSizePrepared( NEO::EncodeDispatchKernel::getSizeRequiredSsh(*kernelInfo), - NEO::EncodeDispatchKernel::getSizeRequiredDsh(kernelDescriptor), true); + NEO::EncodeDispatchKernel::getDefaultSshAlignment(), + NEO::EncodeDispatchKernel::getSizeRequiredDsh(kernelDescriptor, commandContainer.getNumIddPerBlock()), + NEO::EncodeDispatchKernel::getDefaultDshAlignment(), true); ssh = commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE); dsh = commandContainer.getIndirectHeap(NEO::HeapType::DYNAMIC_STATE); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index 20398bcd3d..4781f45d8f 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -143,12 +143,13 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K if (this->immediateCmdListHeapSharing || this->stateBaseAddressTracking) { const bool getDsh = NEO::EncodeDispatchKernel::isDshNeeded(device->getDeviceInfo()); - auto kernelInfo = kernelImmutableData->getKernelInfo(); commandContainer.ensureHeapSizePrepared( NEO::EncodeDispatchKernel::getSizeRequiredSsh(*kernelInfo), - NEO::EncodeDispatchKernel::getSizeRequiredDsh(kernelDescriptor), getDsh); + NEO::EncodeDispatchKernel::getDefaultSshAlignment(), + NEO::EncodeDispatchKernel::getSizeRequiredDsh(kernelDescriptor, 0), + NEO::EncodeDispatchKernel::getDefaultDshAlignment(), getDsh); ssh = commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE); if (getDsh) { diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp index a676bf766c..e65b8af855 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp @@ -814,7 +814,9 @@ HWTEST2_F(ImmediateCmdListSharedHeapsTest, givenMultipleCommandListsUsingSharedH expectedSbaCount++; } - bool dshPresent = hwInfo.capabilityTable.supportsImages || NEO::UnitTestHelper::getAdditionalDshSize() > 0; + auto &cmdContainer = commandListImmediate->commandContainer; + + bool dshPresent = hwInfo.capabilityTable.supportsImages || NEO::UnitTestHelper::getAdditionalDshSize(cmdContainer.getNumIddPerBlock()) > 0; if (dshPresent) { mockKernelImmData->kernelInfo->kernelDescriptor.payloadMappings.samplerTable.numSamplers = 2; @@ -838,7 +840,6 @@ HWTEST2_F(ImmediateCmdListSharedHeapsTest, givenMultipleCommandListsUsingSharedH EXPECT_TRUE(commandListImmediate->isFlushTaskSubmissionEnabled); EXPECT_TRUE(commandListImmediate->immediateCmdListHeapSharing); - auto &cmdContainer = commandListImmediate->commandContainer; EXPECT_EQ(1u, cmdContainer.getNumIddPerBlock()); EXPECT_TRUE(cmdContainer.immediateCmdListSharedHeap(HeapType::DYNAMIC_STATE)); EXPECT_TRUE(cmdContainer.immediateCmdListSharedHeap(HeapType::SURFACE_STATE)); @@ -899,7 +900,7 @@ HWTEST2_F(ImmediateCmdListSharedHeapsTest, givenMultipleCommandListsUsingSharedH } EXPECT_LT(0u, sshUsed); - size_t dshEstimated = NEO::EncodeDispatchKernel::getSizeRequiredDsh(kernel->getKernelDescriptor()); + size_t dshEstimated = NEO::EncodeDispatchKernel::getSizeRequiredDsh(kernel->getKernelDescriptor(), cmdContainer.getNumIddPerBlock()); size_t sshEstimated = NEO::EncodeDispatchKernel::getSizeRequiredSsh(*kernel->getImmutableData()->getKernelInfo()); EXPECT_GE(dshEstimated, dshUsed); diff --git a/shared/source/command_container/cmdcontainer.cpp b/shared/source/command_container/cmdcontainer.cpp index 96c17725bd..cd369afa1f 100644 --- a/shared/source/command_container/cmdcontainer.cpp +++ b/shared/source/command_container/cmdcontainer.cpp @@ -331,18 +331,27 @@ IndirectHeap *CommandContainer::getIndirectHeap(HeapType heapType) { } } -void CommandContainer::ensureHeapSizePrepared(size_t sshRequiredSize, size_t dshRequiredSize, bool getDsh) { +IndirectHeap *CommandContainer::getCsrAlignedSize(HeapType heapType, size_t size, size_t alignment) { + void *ptr = immediateCmdListCsr->getIndirectHeapCurrentPtr(heapType); + size_t totalSize = size + ptrDiff(alignUp(ptr, alignment), ptr); + + auto baseHeap = &immediateCmdListCsr->getIndirectHeap(heapType, totalSize); + + return baseHeap; +} + +void CommandContainer::ensureHeapSizePrepared(size_t sshRequiredSize, size_t sshDefaultAlignment, size_t dshRequiredSize, size_t dshDefaultAlignment, bool getDsh) { if (immediateCmdListCsr) { auto lock = immediateCmdListCsr->obtainUniqueOwnership(); - sharedSshCsrHeap = &immediateCmdListCsr->getIndirectHeap(HeapType::SURFACE_STATE, sshRequiredSize); + sharedSshCsrHeap = getCsrAlignedSize(HeapType::SURFACE_STATE, sshRequiredSize, sshDefaultAlignment); if (getDsh) { - sharedDshCsrHeap = &immediateCmdListCsr->getIndirectHeap(HeapType::DYNAMIC_STATE, dshRequiredSize); + sharedDshCsrHeap = getCsrAlignedSize(HeapType::DYNAMIC_STATE, dshRequiredSize, dshDefaultAlignment); } } else { - this->getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, sshRequiredSize, 0); + this->getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, sshRequiredSize, sshDefaultAlignment); if (getDsh) { - this->getHeapWithRequiredSizeAndAlignment(HeapType::DYNAMIC_STATE, dshRequiredSize, 0); + this->getHeapWithRequiredSizeAndAlignment(HeapType::DYNAMIC_STATE, dshRequiredSize, dshDefaultAlignment); } } } diff --git a/shared/source/command_container/cmdcontainer.h b/shared/source/command_container/cmdcontainer.h index f1de0e0e38..ea1f619860 100644 --- a/shared/source/command_container/cmdcontainer.h +++ b/shared/source/command_container/cmdcontainer.h @@ -119,7 +119,7 @@ class CommandContainer : public NonCopyableOrMovableClass { bool immediateCmdListSharedHeap(HeapType heapType) { return (heapSharingEnabled && (heapType == HeapType::DYNAMIC_STATE || heapType == HeapType::SURFACE_STATE)); } - void ensureHeapSizePrepared(size_t sshRequiredSize, size_t dshRequiredSize, bool getDsh); + void ensureHeapSizePrepared(size_t sshRequiredSize, size_t sshDefaultAlignment, size_t dshRequiredSize, size_t dshDefaultAlignment, bool getDsh); GraphicsAllocation *reuseExistingCmdBuffer(); GraphicsAllocation *allocateCommandBuffer(); @@ -141,6 +141,7 @@ class CommandContainer : public NonCopyableOrMovableClass { size_t getTotalCmdBufferSize(); IndirectHeap *getHeapWithRequiredSize(HeapType heapType, size_t sizeRequired, size_t alignment, bool allowGrow); void createAndAssignNewHeap(HeapType heapType, size_t size); + IndirectHeap *getCsrAlignedSize(HeapType heapType, size_t size, size_t alignment); GraphicsAllocation *allocationIndirectHeaps[HeapType::NUM_TYPES] = {}; std::unique_ptr indirectHeaps[HeapType::NUM_TYPES]; diff --git a/shared/source/command_container/command_encoder.h b/shared/source/command_container/command_encoder.h index 9dd9662a2a..ee07b83384 100644 --- a/shared/source/command_container/command_encoder.h +++ b/shared/source/command_container/command_encoder.h @@ -99,7 +99,7 @@ struct EncodeDispatchKernel { static void setGrfInfo(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, uint32_t numGrf, const size_t &sizeCrossThreadData, const size_t &sizePerThreadData, const HardwareInfo &hwInfo); - static void *getInterfaceDescriptor(CommandContainer &container, uint32_t &iddOffset); + static void *getInterfaceDescriptor(CommandContainer &container, IndirectHeap *childDsh, uint32_t &iddOffset); static bool isRuntimeLocalIdsGenerationRequired(uint32_t activeChannels, const size_t *lws, @@ -138,10 +138,15 @@ struct EncodeDispatchKernel { static constexpr bool shouldUpdateGlobalAtomics(bool ¤tVal, bool refVal, bool updateCurrent); - static size_t getSizeRequiredDsh(const KernelDescriptor &kernelDescriptor); + static size_t getSizeRequiredDsh(const KernelDescriptor &kernelDescriptor, uint32_t iddCount); static size_t getSizeRequiredSsh(const KernelInfo &kernelInfo); - inline static uint32_t additionalSizeRequiredDsh(); + inline static size_t additionalSizeRequiredDsh(uint32_t iddCount); static bool isDshNeeded(const DeviceInfo &deviceInfo); + static size_t getDefaultDshAlignment(); + static constexpr size_t getDefaultSshAlignment() { + using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE; + return BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE; + } }; template @@ -298,7 +303,7 @@ template struct EncodeMediaInterfaceDescriptorLoad { using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA; - static void encode(CommandContainer &container); + static void encode(CommandContainer &container, IndirectHeap *childDsh); }; template diff --git a/shared/source/command_container/command_encoder.inl b/shared/source/command_container/command_encoder.inl index 7d308688a1..662ea9a97b 100644 --- a/shared/source/command_container/command_encoder.inl +++ b/shared/source/command_container/command_encoder.inl @@ -546,16 +546,23 @@ template void EncodeSurfaceState::encodeImplicitScalingParams(const EncodeSurfaceStateArgs &args) {} template -void *EncodeDispatchKernel::getInterfaceDescriptor(CommandContainer &container, uint32_t &iddOffset) { +void *EncodeDispatchKernel::getInterfaceDescriptor(CommandContainer &container, IndirectHeap *childDsh, uint32_t &iddOffset) { if (container.nextIddInBlock == container.getNumIddPerBlock()) { if (ApiSpecificConfig::getBindlessConfiguration()) { container.getDevice()->getBindlessHeapsHelper()->getHeap(BindlessHeapsHelper::BindlesHeapType::GLOBAL_DSH)->align(EncodeStates::alignInterfaceDescriptorData); container.setIddBlock(container.getDevice()->getBindlessHeapsHelper()->getSpaceInHeap(sizeof(INTERFACE_DESCRIPTOR_DATA) * container.getNumIddPerBlock(), BindlessHeapsHelper::BindlesHeapType::GLOBAL_DSH)); } else { - container.getIndirectHeap(HeapType::DYNAMIC_STATE)->align(EncodeStates::alignInterfaceDescriptorData); - container.setIddBlock(container.getHeapSpaceAllowGrow(HeapType::DYNAMIC_STATE, - sizeof(INTERFACE_DESCRIPTOR_DATA) * container.getNumIddPerBlock())); + void *heapPointer = nullptr; + size_t heapSize = sizeof(INTERFACE_DESCRIPTOR_DATA) * container.getNumIddPerBlock(); + if (childDsh != nullptr) { + childDsh->align(EncodeStates::alignInterfaceDescriptorData); + heapPointer = childDsh->getSpace(heapSize); + } else { + container.getIndirectHeap(HeapType::DYNAMIC_STATE)->align(EncodeStates::alignInterfaceDescriptorData); + heapPointer = container.getHeapSpaceAllowGrow(HeapType::DYNAMIC_STATE, heapSize); + } + container.setIddBlock(heapPointer); } container.nextIddInBlock = 0; } @@ -726,25 +733,25 @@ template constexpr bool EncodeDispatchKernel::shouldUpdateGlobalAtomics(bool ¤tVal, bool refVal, bool updateCurrent) { return false; } template -size_t EncodeDispatchKernel::getSizeRequiredDsh(const KernelDescriptor &kernelDescriptor) { +size_t EncodeDispatchKernel::getSizeRequiredDsh(const KernelDescriptor &kernelDescriptor, uint32_t iddCount) { using INTERFACE_DESCRIPTOR_DATA = typename Family::INTERFACE_DESCRIPTOR_DATA; constexpr auto samplerStateSize = sizeof(typename Family::SAMPLER_STATE); const auto numSamplers = kernelDescriptor.payloadMappings.samplerTable.numSamplers; - const auto additionalDshSize = additionalSizeRequiredDsh(); + const auto additionalDshSize = additionalSizeRequiredDsh(iddCount); if (numSamplers == 0U) { - return alignUp(additionalDshSize, EncodeStates::alignInterfaceDescriptorData); + return alignUp(additionalDshSize, EncodeDispatchKernel::getDefaultDshAlignment()); } size_t size = kernelDescriptor.payloadMappings.samplerTable.tableOffset - kernelDescriptor.payloadMappings.samplerTable.borderColor; - size = alignUp(size, EncodeStates::alignIndirectStatePointer); + size = alignUp(size, EncodeDispatchKernel::getDefaultDshAlignment()); size += numSamplers * samplerStateSize; size = alignUp(size, INTERFACE_DESCRIPTOR_DATA::SAMPLERSTATEPOINTER_ALIGN_SIZE); if (additionalDshSize > 0) { size += additionalDshSize; - size = alignUp(size, EncodeStates::alignInterfaceDescriptorData); + size = alignUp(size, EncodeDispatchKernel::getDefaultDshAlignment()); } return size; @@ -752,12 +759,16 @@ size_t EncodeDispatchKernel::getSizeRequiredDsh(const KernelDescriptor & template size_t EncodeDispatchKernel::getSizeRequiredSsh(const KernelInfo &kernelInfo) { - using BINDING_TABLE_STATE = typename Family::BINDING_TABLE_STATE; size_t requiredSshSize = kernelInfo.heapInfo.SurfaceStateHeapSize; - requiredSshSize = alignUp(requiredSshSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); + requiredSshSize = alignUp(requiredSshSize, EncodeDispatchKernel::getDefaultSshAlignment()); return requiredSshSize; } +template +size_t EncodeDispatchKernel::getDefaultDshAlignment() { + return EncodeStates::alignIndirectStatePointer; +} + template void EncodeIndirectParams::setGlobalWorkSizeIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset offsets[3], uint64_t crossThreadAddress, const uint32_t *lws) { for (int i = 0; i < 3; ++i) { diff --git a/shared/source/command_container/command_encoder_bdw_and_later.inl b/shared/source/command_container/command_encoder_bdw_and_later.inl index 51d3e94a5b..b25396c8fb 100644 --- a/shared/source/command_container/command_encoder_bdw_and_later.inl +++ b/shared/source/command_container/command_encoder_bdw_and_later.inl @@ -120,14 +120,14 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis auto dsHeap = args.dynamicStateHeap; if (dsHeap == nullptr) { if (!ApiSpecificConfig::getBindlessConfiguration()) { - auto dsHeap = container.getIndirectHeap(HeapType::DYNAMIC_STATE); - auto dshSizeRequired = NEO::EncodeDispatchKernel::getSizeRequiredDsh(kernelDescriptor); + dsHeap = container.getIndirectHeap(HeapType::DYNAMIC_STATE); + auto dshSizeRequired = NEO::EncodeDispatchKernel::getSizeRequiredDsh(kernelDescriptor, container.getNumIddPerBlock()); if (dsHeap->getAvailableSpace() <= dshSizeRequired) { - dsHeap = container.getHeapWithRequiredSizeAndAlignment(HeapType::DYNAMIC_STATE, dsHeap->getMaxAvailableSpace(), 0); - UNRECOVERABLE_IF(!dsHeap); + dsHeap = container.getHeapWithRequiredSizeAndAlignment(HeapType::DYNAMIC_STATE, dsHeap->getMaxAvailableSpace(), NEO::EncodeDispatchKernel::getDefaultDshAlignment()); } + } else { + dsHeap = args.device->getBindlessHeapsHelper()->getHeap(BindlessHeapsHelper::GLOBAL_DSH); } - dsHeap = ApiSpecificConfig::getBindlessConfiguration() ? args.device->getBindlessHeapsHelper()->getHeap(BindlessHeapsHelper::GLOBAL_DSH) : container.getIndirectHeap(HeapType::DYNAMIC_STATE); } UNRECOVERABLE_IF(!dsHeap); @@ -187,7 +187,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis } uint32_t numIDD = 0u; - void *iddPtr = getInterfaceDescriptor(container, numIDD); + void *iddPtr = getInterfaceDescriptor(container, args.dynamicStateHeap, numIDD); auto slmSizeNew = args.dispatchInterface->getSlmTotalSize(); bool dirtyHeaps = container.isAnyHeapDirty(); @@ -226,7 +226,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis } if (numIDD == 0 || flush) { - EncodeMediaInterfaceDescriptorLoad::encode(container); + EncodeMediaInterfaceDescriptorLoad::encode(container, args.dynamicStateHeap); } cmd.setIndirectDataStartAddress(static_cast(offsetThreadData)); @@ -291,10 +291,19 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis } template -void EncodeMediaInterfaceDescriptorLoad::encode(CommandContainer &container) { +void EncodeMediaInterfaceDescriptorLoad::encode(CommandContainer &container, IndirectHeap *childDsh) { using MEDIA_STATE_FLUSH = typename Family::MEDIA_STATE_FLUSH; using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename Family::MEDIA_INTERFACE_DESCRIPTOR_LOAD; - auto heapBase = ApiSpecificConfig::getBindlessConfiguration() ? container.getDevice()->getBindlessHeapsHelper()->getHeap(BindlessHeapsHelper::GLOBAL_DSH)->getGraphicsAllocation()->getUnderlyingBuffer() : container.getIndirectHeap(HeapType::DYNAMIC_STATE)->getCpuBase(); + void *heapBase = nullptr; + if (childDsh != nullptr) { + heapBase = childDsh->getCpuBase(); + } else { + if (ApiSpecificConfig::getBindlessConfiguration()) { + heapBase = container.getDevice()->getBindlessHeapsHelper()->getHeap(BindlessHeapsHelper::GLOBAL_DSH)->getGraphicsAllocation()->getUnderlyingBuffer(); + } else { + heapBase = container.getIndirectHeap(HeapType::DYNAMIC_STATE)->getCpuBase(); + } + } auto mediaStateFlush = container.getCommandStream()->getSpaceForCmd(); *mediaStateFlush = Family::cmdInitMediaStateFlush; @@ -570,8 +579,8 @@ template void EncodeDispatchKernel::adjustWalkOrder(WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment) {} template -uint32_t EncodeDispatchKernel::additionalSizeRequiredDsh() { - return sizeof(typename Family::INTERFACE_DESCRIPTOR_DATA); +size_t EncodeDispatchKernel::additionalSizeRequiredDsh(uint32_t iddCount) { + return iddCount * sizeof(typename Family::INTERFACE_DESCRIPTOR_DATA); } } // namespace NEO diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index 09c668c3f6..feff3ccffd 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -628,7 +628,7 @@ void EncodeComputeMode::adjustPipelineSelect(CommandContainer &container } template -inline void EncodeMediaInterfaceDescriptorLoad::encode(CommandContainer &container) { +inline void EncodeMediaInterfaceDescriptorLoad::encode(CommandContainer &container, IndirectHeap *childDsh) { } template @@ -801,7 +801,7 @@ template void EncodeDispatchKernel::adjustWalkOrder(WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment) {} template -uint32_t EncodeDispatchKernel::additionalSizeRequiredDsh() { +size_t EncodeDispatchKernel::additionalSizeRequiredDsh(uint32_t iddCount) { return 0u; } diff --git a/shared/source/command_stream/command_stream_receiver.cpp b/shared/source/command_stream/command_stream_receiver.cpp index fa2603fee2..e6afccffa4 100644 --- a/shared/source/command_stream/command_stream_receiver.cpp +++ b/shared/source/command_stream/command_stream_receiver.cpp @@ -572,6 +572,14 @@ GraphicsAllocation *CommandStreamReceiver::allocateDebugSurface(size_t size) { return debugSurface; } +void *CommandStreamReceiver::getIndirectHeapCurrentPtr(IndirectHeapType heapType) const { + auto heap = indirectHeap[heapType]; + if (heap) { + return heap->getSpace(0); + } + return nullptr; +} + IndirectHeap &CommandStreamReceiver::getIndirectHeap(IndirectHeap::Type heapType, size_t minRequiredSize) { DEBUG_BREAK_IF(static_cast(heapType) >= arrayCount(indirectHeap)); diff --git a/shared/source/command_stream/command_stream_receiver.h b/shared/source/command_stream/command_stream_receiver.h index 2323090f27..f76a8b299c 100644 --- a/shared/source/command_stream/command_stream_receiver.h +++ b/shared/source/command_stream/command_stream_receiver.h @@ -187,6 +187,7 @@ class CommandStreamReceiver { IndirectHeap &getIndirectHeap(IndirectHeapType heapType, size_t minRequiredSize); void allocateHeapMemory(IndirectHeapType heapType, size_t minRequiredSize, IndirectHeap *&indirectHeap); void releaseIndirectHeap(IndirectHeapType heapType); + void *getIndirectHeapCurrentPtr(IndirectHeapType heapType) const; virtual enum CommandStreamReceiverType getType() const = 0; void setExperimentalCmdBuffer(std::unique_ptr &&cmdBuffer); diff --git a/shared/test/common/helpers/unit_test_helper.h b/shared/test/common/helpers/unit_test_helper.h index ec60066b48..067d15b08a 100644 --- a/shared/test/common/helpers/unit_test_helper.h +++ b/shared/test/common/helpers/unit_test_helper.h @@ -93,7 +93,7 @@ struct UnitTestHelper { static bool getDisableFusionStateFromFrontEndCommand(const typename GfxFamily::VFE_STATE_TYPE &feCmd); static bool getComputeDispatchAllWalkerFromFrontEndCommand(const typename GfxFamily::VFE_STATE_TYPE &feCmd); static bool getSystolicFlagValueFromPipelineSelectCommand(const typename GfxFamily::PIPELINE_SELECT &pipelineSelectCmd); - static size_t getAdditionalDshSize(); + static size_t getAdditionalDshSize(uint32_t iddCount); static bool expectNullDsh(const DeviceInfo &deviceInfo); }; diff --git a/shared/test/common/helpers/unit_test_helper_bdw_and_later.inl b/shared/test/common/helpers/unit_test_helper_bdw_and_later.inl index ba4000f8e6..5e0e18eaf5 100644 --- a/shared/test/common/helpers/unit_test_helper_bdw_and_later.inl +++ b/shared/test/common/helpers/unit_test_helper_bdw_and_later.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2022 Intel Corporation + * Copyright (C) 2022-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -93,8 +93,8 @@ bool UnitTestHelper::getSystolicFlagValueFromPipelineSelectCommand(co } template -size_t UnitTestHelper::getAdditionalDshSize() { - return sizeof(typename GfxFamily::INTERFACE_DESCRIPTOR_DATA); +size_t UnitTestHelper::getAdditionalDshSize(uint32_t iddCount) { + return iddCount * sizeof(typename GfxFamily::INTERFACE_DESCRIPTOR_DATA); } } // namespace NEO diff --git a/shared/test/common/helpers/unit_test_helper_xehp_and_later.inl b/shared/test/common/helpers/unit_test_helper_xehp_and_later.inl index 6b8cc20d22..ec75b69560 100644 --- a/shared/test/common/helpers/unit_test_helper_xehp_and_later.inl +++ b/shared/test/common/helpers/unit_test_helper_xehp_and_later.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2022 Intel Corporation + * Copyright (C) 2021-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -122,7 +122,7 @@ bool UnitTestHelper::getSystolicFlagValueFromPipelineSelectCommand(co } template -size_t UnitTestHelper::getAdditionalDshSize() { +size_t UnitTestHelper::getAdditionalDshSize(uint32_t iddCount) { return 0; } diff --git a/shared/test/unit_test/command_container/command_container_tests.cpp b/shared/test/unit_test/command_container/command_container_tests.cpp index 75c6ec3bda..b50fa24080 100644 --- a/shared/test/unit_test/command_container/command_container_tests.cpp +++ b/shared/test/unit_test/command_container/command_container_tests.cpp @@ -6,6 +6,7 @@ */ #include "shared/source/command_container/cmdcontainer.h" +#include "shared/source/command_container/command_encoder.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/heap_helper.h" @@ -940,6 +941,9 @@ HWTEST_F(CommandContainerTest, givenCmdContainerHasImmediateCsrWhenGettingHeapWi cmdContainer.setImmediateCmdListCsr(pDevice->getDefaultEngine().commandStreamReceiver); cmdContainer.immediateReusableAllocationList = std::make_unique(); + const size_t dshAlign = NEO::EncodeDispatchKernel::getDefaultDshAlignment(); + const size_t sshAlign = NEO::EncodeDispatchKernel::getDefaultSshAlignment(); + cmdContainer.setNumIddPerBlock(1); auto code = cmdContainer.initialize(pDevice, nullptr, true); EXPECT_EQ(CommandContainer::ErrorCode::SUCCESS, code); @@ -956,7 +960,7 @@ HWTEST_F(CommandContainerTest, givenCmdContainerHasImmediateCsrWhenGettingHeapWi auto &ultCsr = pDevice->getUltCommandStreamReceiver(); ultCsr.recursiveLockCounter = 0; - cmdContainer.ensureHeapSizePrepared(0, 0, false); + cmdContainer.ensureHeapSizePrepared(0, sshAlign, 0, dshAlign, false); EXPECT_EQ(1u, ultCsr.recursiveLockCounter); EXPECT_EQ(nullptr, cmdContainer.getIndirectHeap(HeapType::DYNAMIC_STATE)); @@ -968,22 +972,54 @@ HWTEST_F(CommandContainerTest, givenCmdContainerHasImmediateCsrWhenGettingHeapWi EXPECT_NO_THROW(cmdContainer.getHeapSpaceAllowGrow(HeapType::SURFACE_STATE, 0)); EXPECT_NO_THROW(cmdContainer.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, 0, 0)); - cmdContainer.ensureHeapSizePrepared(0, 0, true); + cmdContainer.ensureHeapSizePrepared(0, sshAlign, 0, dshAlign, true); EXPECT_EQ(2u, ultCsr.recursiveLockCounter); - EXPECT_NE(nullptr, cmdContainer.getIndirectHeap(HeapType::DYNAMIC_STATE)); - EXPECT_NE(nullptr, cmdContainer.getIndirectHeap(HeapType::SURFACE_STATE)); + ASSERT_NE(nullptr, cmdContainer.getIndirectHeap(HeapType::DYNAMIC_STATE)); + ASSERT_NE(nullptr, cmdContainer.getIndirectHeap(HeapType::SURFACE_STATE)); - cmdContainer.ensureHeapSizePrepared(4 * MemoryConstants::kiloByte, 4 * MemoryConstants::kiloByte, true); + auto sshHeap = cmdContainer.getIndirectHeap(HeapType::SURFACE_STATE); + EXPECT_NE(nullptr, sshHeap); + + size_t sizeUsedDsh = 0; + size_t sizeUsedSsh = sshHeap->getUsed(); + size_t initSshSize = sizeUsedSsh; + + constexpr size_t misAlignedSize = 3; + cmdContainer.ensureHeapSizePrepared(misAlignedSize, sshAlign, misAlignedSize, dshAlign, true); EXPECT_EQ(3u, ultCsr.recursiveLockCounter); auto dshHeap = cmdContainer.getIndirectHeap(HeapType::DYNAMIC_STATE); EXPECT_NE(nullptr, dshHeap); - auto sshHeap = cmdContainer.getIndirectHeap(HeapType::SURFACE_STATE); + + sshHeap->getSpace(misAlignedSize); + dshHeap->getSpace(misAlignedSize); + + cmdContainer.ensureHeapSizePrepared(sshAlign, sshAlign, dshAlign, dshAlign, true); + EXPECT_EQ(4u, ultCsr.recursiveLockCounter); + + sshHeap->align(sshAlign); + sshHeap->getSpace(sshAlign); + + dshHeap->align(dshAlign); + dshHeap->getSpace(dshAlign); + + sizeUsedDsh = dshHeap->getUsed(); + sizeUsedSsh = sshHeap->getUsed(); + + EXPECT_EQ(2 * sshAlign + initSshSize, sizeUsedSsh); + EXPECT_EQ(2 * dshAlign, sizeUsedDsh); + + cmdContainer.ensureHeapSizePrepared(4 * MemoryConstants::kiloByte, sshAlign, 4 * MemoryConstants::kiloByte, dshAlign, true); + EXPECT_EQ(5u, ultCsr.recursiveLockCounter); + + dshHeap = cmdContainer.getIndirectHeap(HeapType::DYNAMIC_STATE); + EXPECT_NE(nullptr, dshHeap); + sshHeap = cmdContainer.getIndirectHeap(HeapType::SURFACE_STATE); EXPECT_NE(nullptr, sshHeap); - size_t sizeUsedDsh = dshHeap->getUsed(); - size_t sizeUsedSsh = sshHeap->getUsed(); + sizeUsedDsh = dshHeap->getUsed(); + sizeUsedSsh = sshHeap->getUsed(); void *dshPtr = cmdContainer.getHeapSpaceAllowGrow(HeapType::DYNAMIC_STATE, 64); void *sshPtr = cmdContainer.getHeapSpaceAllowGrow(HeapType::SURFACE_STATE, 64); @@ -1012,12 +1048,15 @@ HWTEST_F(CommandContainerTest, givenCmdContainerUsedInRegularCmdListWhenGettingH GTEST_SKIP(); } + const size_t dshAlign = NEO::EncodeDispatchKernel::getDefaultDshAlignment(); + const size_t sshAlign = NEO::EncodeDispatchKernel::getDefaultSshAlignment(); + MyMockCommandContainer cmdContainer; auto code = cmdContainer.initialize(pDevice, nullptr, true); EXPECT_EQ(CommandContainer::ErrorCode::SUCCESS, code); - cmdContainer.ensureHeapSizePrepared(0, 0, true); + cmdContainer.ensureHeapSizePrepared(0, sshAlign, 0, dshAlign, true); auto dsh = cmdContainer.getIndirectHeap(HeapType::DYNAMIC_STATE); auto ssh = cmdContainer.getIndirectHeap(HeapType::SURFACE_STATE); @@ -1027,7 +1066,7 @@ HWTEST_F(CommandContainerTest, givenCmdContainerUsedInRegularCmdListWhenGettingH dsh->getSpace(dsh->getAvailableSpace() - 64); - cmdContainer.ensureHeapSizePrepared(4 * MemoryConstants::kiloByte, 4 * MemoryConstants::kiloByte, false); + cmdContainer.ensureHeapSizePrepared(4 * MemoryConstants::kiloByte, sshAlign, 4 * MemoryConstants::kiloByte, dshAlign, false); dsh = cmdContainer.getIndirectHeap(HeapType::DYNAMIC_STATE); EXPECT_EQ(64u, dsh->getAvailableSpace()); diff --git a/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp b/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp index 84cf375c4b..d641962caa 100644 --- a/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp +++ b/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp @@ -800,7 +800,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersOneWhenHeapI auto dshBeforeFlush = cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE); auto &kernelDescriptor = dispatchInterface->getKernelDescriptor(); - dshBeforeFlush->getSpace(dshBeforeFlush->getAvailableSpace() - NEO::EncodeDispatchKernel::getSizeRequiredDsh(kernelDescriptor)); + dshBeforeFlush->getSpace(dshBeforeFlush->getAvailableSpace() - NEO::EncodeDispatchKernel::getSizeRequiredDsh(kernelDescriptor, cmdContainer->getNumIddPerBlock())); auto cpuBaseBeforeFlush = dshBeforeFlush->getCpuBase(); EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs, nullptr); @@ -853,7 +853,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersOneAndNextID auto dshBeforeFlush = cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE); auto &kernelDescriptor = dispatchInterface->getKernelDescriptor(); - auto sizeRequiredMinusIDD = dshBeforeFlush->getAvailableSpace() - NEO::EncodeDispatchKernel::getSizeRequiredDsh(kernelDescriptor) + sizeof(INTERFACE_DESCRIPTOR_DATA); + auto sizeRequiredMinusIDD = dshBeforeFlush->getAvailableSpace() - NEO::EncodeDispatchKernel::getSizeRequiredDsh(kernelDescriptor, cmdContainer->getNumIddPerBlock()) + sizeof(INTERFACE_DESCRIPTOR_DATA); dshBeforeFlush->getSpace(sizeRequiredMinusIDD); auto cpuBaseBeforeFlush = dshBeforeFlush->getCpuBase(); auto usedBefore = cmdContainer->getIndirectHeap(HeapType::SURFACE_STATE)->getUsed(); @@ -1526,12 +1526,12 @@ HWTEST_F(CommandEncodeStatesTest, givenKernelInfoWhenGettingRequiredDshSpaceThen using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using SAMPLER_STATE = typename FamilyType::SAMPLER_STATE; - size_t additionalSize = UnitTestHelper::getAdditionalDshSize(); + size_t additionalSize = UnitTestHelper::getAdditionalDshSize(cmdContainer->getNumIddPerBlock()); size_t expectedSize = alignUp(additionalSize, EncodeStates::alignInterfaceDescriptorData); // no samplers kernelInfo.kernelDescriptor.payloadMappings.samplerTable.numSamplers = 0; - size_t size = EncodeDispatchKernel::getSizeRequiredDsh(kernelInfo.kernelDescriptor); + size_t size = EncodeDispatchKernel::getSizeRequiredDsh(kernelInfo.kernelDescriptor, cmdContainer->getNumIddPerBlock()); EXPECT_EQ(expectedSize, size); // two samplers, no border color state @@ -1549,7 +1549,7 @@ HWTEST_F(CommandEncodeStatesTest, givenKernelInfoWhenGettingRequiredDshSpaceThen expectedSize = alignedSamplers; } - size = EncodeDispatchKernel::getSizeRequiredDsh(kernelInfo.kernelDescriptor); + size = EncodeDispatchKernel::getSizeRequiredDsh(kernelInfo.kernelDescriptor, cmdContainer->getNumIddPerBlock()); EXPECT_EQ(expectedSize, size); // three samplers, border color state @@ -1565,7 +1565,7 @@ HWTEST_F(CommandEncodeStatesTest, givenKernelInfoWhenGettingRequiredDshSpaceThen } else { expectedSize = alignedSamplers; } - size = EncodeDispatchKernel::getSizeRequiredDsh(kernelInfo.kernelDescriptor); + size = EncodeDispatchKernel::getSizeRequiredDsh(kernelInfo.kernelDescriptor, cmdContainer->getNumIddPerBlock()); EXPECT_EQ(expectedSize, size); } diff --git a/shared/test/unit_test/encoders/test_encode_media_interface_descriptor.cpp b/shared/test/unit_test/encoders/test_encode_media_interface_descriptor.cpp index 34d7949b19..2f21bbe467 100644 --- a/shared/test/unit_test/encoders/test_encode_media_interface_descriptor.cpp +++ b/shared/test/unit_test/encoders/test_encode_media_interface_descriptor.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2022 Intel Corporation + * Copyright (C) 2020-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -17,7 +17,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, WhenProgrammingThenMediaInt using MEDIA_STATE_FLUSH = typename FamilyType::MEDIA_STATE_FLUSH; using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD; - EncodeMediaInterfaceDescriptorLoad::encode(*cmdContainer.get()); + EncodeMediaInterfaceDescriptorLoad::encode(*cmdContainer.get(), nullptr); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); diff --git a/shared/test/unit_test/encoders/test_encode_media_interface_descriptor_xehp_and_later.cpp b/shared/test/unit_test/encoders/test_encode_media_interface_descriptor_xehp_and_later.cpp index f47fd55269..3233eda27c 100644 --- a/shared/test/unit_test/encoders/test_encode_media_interface_descriptor_xehp_and_later.cpp +++ b/shared/test/unit_test/encoders/test_encode_media_interface_descriptor_xehp_and_later.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2022 Intel Corporation + * Copyright (C) 2021-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -13,7 +13,7 @@ using CommandEncodeStatesTest = Test; HWTEST2_F(CommandEncodeStatesTest, givenCommandContainerWhenEncodingMediaDescriptorThenUsedSizeDidNotIncreased, IsAtLeastXeHpCore) { auto sizeBefore = cmdContainer->getCommandStream()->getUsed(); - EncodeMediaInterfaceDescriptorLoad::encode(*cmdContainer.get()); + EncodeMediaInterfaceDescriptorLoad::encode(*cmdContainer.get(), nullptr); auto sizeAfter = cmdContainer->getCommandStream()->getUsed(); EXPECT_EQ(sizeBefore, sizeAfter); }