diff --git a/level_zero/core/source/cmdlist/cmdlist.h b/level_zero/core/source/cmdlist/cmdlist.h index 7adb31c6dc..20a42738ef 100644 --- a/level_zero/core/source/cmdlist/cmdlist.h +++ b/level_zero/core/source/cmdlist/cmdlist.h @@ -297,6 +297,7 @@ struct CommandList : _ze_command_list_handle_t { bool commandListSLMEnabled = false; bool requiresQueueUncachedMocs = false; bool isBcsSplitNeeded = false; + bool immediateCmdListHeapSharing = false; protected: NEO::GraphicsAllocation *getAllocationFromHostPtrMap(const void *buffer, uint64_t bufferSize); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 5285efd93a..34361ad040 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -135,6 +135,11 @@ ze_result_t CommandListCoreFamily::initialize(Device *device, NEO commandContainer.setFlushTaskUsedForImmediate(this->isFlushTaskSubmissionEnabled); } + if (this->immediateCmdListHeapSharing) { + commandContainer.setImmediateCmdListCsr(this->csr); + commandContainer.setNumIddPerBlock(1); + } + commandContainer.setReservedSshSize(getReserveSshSize()); DeviceImp *deviceImp = static_cast(device); auto returnValue = commandContainer.initialize(deviceImp->getActiveDevice(), deviceImp->allocationsForReuse.get(), !isCopyOnly()); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl index c12dd210da..eb8090e31d 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl @@ -43,8 +43,14 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K if (kernelDescriptor.kernelAttributes.flags.isInvalid) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } - appendEventForProfiling(event, true, false); const auto kernelImmutableData = kernel->getImmutableData(); + if (this->immediateCmdListHeapSharing) { + auto kernelInfo = kernelImmutableData->getKernelInfo(); + commandContainer.ensureHeapSizePrepared( + NEO::EncodeDispatchKernel::getSizeRequiredSsh(*kernelInfo), + NEO::EncodeDispatchKernel::getSizeRequiredDsh(*kernelInfo)); + } + appendEventForProfiling(event, true, false); auto perThreadScratchSize = std::max(this->getCommandListPerThreadScratchSize(), kernel->getImmutableData()->getDescriptor().kernelAttributes.perThreadScratchSize[0]); this->setCommandListPerThreadScratchSize(perThreadScratchSize); @@ -147,7 +153,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K NEO::EncodeDispatchKernel::encode(commandContainer, dispatchKernelArgs, getLogicalStateHelper()); this->containsStatelessUncachedResource = dispatchKernelArgs.requiresUncachedMocs; - if (neoDevice->getDebugger()) { + if (neoDevice->getDebugger() && !this->immediateCmdListHeapSharing) { auto *ssh = commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE); auto surfaceStateSpace = neoDevice->getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh); auto surfaceState = GfxFamily::cmdInitRenderSurfaceState; diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h index 861c06c8ad..3d47fd2d2c 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h @@ -21,10 +21,10 @@ constexpr size_t maxImmediateCommandSize = 4 * MemoryConstants::kiloByte; template struct CommandListCoreFamilyImmediate : public CommandListCoreFamily { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using BaseClass = CommandListCoreFamily; - using BaseClass::executeCommandListImmediate; - using BaseClass::BaseClass; + using BaseClass::executeCommandListImmediate; ze_result_t appendLaunchKernel(ze_kernel_handle_t kernelHandle, const ze_group_count_t *threadGroupDimensions, diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl index 7b2cf4e4ee..28a47b8377 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl @@ -7,6 +7,8 @@ #pragma once +#include "shared/source/command_container/command_encoder.h" +#include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/command_stream/wait_status.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/hw_info.h" @@ -143,6 +145,32 @@ ze_result_t CommandListCoreFamilyImmediate::executeCommandListImm this->csr->makeResident(*this->device->getDebugSurface()); } + NEO::Device *neoDevice = this->device->getNEODevice(); + if (neoDevice->getDebugger() && this->immediateCmdListHeapSharing) { + auto csrHw = static_cast *>(this->csr); + auto sshStateCopy = csrHw->getSshState(); + bool sshDirty = sshStateCopy.updateAndCheck(ssh); + + if (sshDirty) { + auto surfaceStateSpace = neoDevice->getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh); + auto surfaceState = GfxFamily::cmdInitRenderSurfaceState; + + NEO::EncodeSurfaceStateArgs args; + args.outMemory = &surfaceState; + args.graphicsAddress = this->device->getDebugSurface()->getGpuAddress(); + args.size = this->device->getDebugSurface()->getUnderlyingBufferSize(); + args.mocs = this->device->getMOCS(false, false); + args.numAvailableDevices = neoDevice->getNumGenericSubDevices(); + args.allocation = this->device->getDebugSurface(); + args.gmmHelper = neoDevice->getGmmHelper(); + args.useGlobalAtomics = false; + args.areMultipleSubDevicesInContext = false; + args.isDebuggerActive = true; + NEO::EncodeSurfaceState::encodeBuffer(args); + *reinterpret_cast(surfaceStateSpace) = surfaceState; + } + } + auto completionStamp = this->csr->flushTask( *commandStream, commandStreamStart, diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index 61bdf33210..5be66b9c4b 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -139,6 +139,12 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K if (kernelDescriptor.kernelAttributes.flags.isInvalid) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } + if (this->immediateCmdListHeapSharing) { + auto kernelInfo = kernelImmutableData->getKernelInfo(); + commandContainer.ensureHeapSizePrepared( + NEO::EncodeDispatchKernel::getSizeRequiredSsh(*kernelInfo), + NEO::EncodeDispatchKernel::getSizeRequiredDsh(*kernelInfo)); + } commandListPerThreadScratchSize = std::max(commandListPerThreadScratchSize, kernelDescriptor.kernelAttributes.perThreadScratchSize[0]); commandListPerThreadPrivateScratchSize = std::max(commandListPerThreadPrivateScratchSize, kernelDescriptor.kernelAttributes.perThreadScratchSize[1]); @@ -265,7 +271,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K } } - if (neoDevice->getDebugger()) { + if (neoDevice->getDebugger() && !this->immediateCmdListHeapSharing) { auto *ssh = commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE); auto surfaceStateSpace = neoDevice->getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh); auto surfaceState = GfxFamily::cmdInitRenderSurfaceState; diff --git a/level_zero/core/source/cmdlist/cmdlist_imp.cpp b/level_zero/core/source/cmdlist/cmdlist_imp.cpp index 88a2c73c74..55494c181d 100644 --- a/level_zero/core/source/cmdlist/cmdlist_imp.cpp +++ b/level_zero/core/source/cmdlist/cmdlist_imp.cpp @@ -127,6 +127,7 @@ CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device UNRECOVERABLE_IF(nullptr == csr); commandList = static_cast((*allocator)(CommandList::commandListimmediateIddsPerBlock)); + commandList->csr = csr; commandList->internalUsage = internalUsage; commandList->cmdListType = CommandListType::TYPE_IMMEDIATE; commandList->isSyncModeQueue = (desc->mode == ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS); @@ -135,6 +136,7 @@ CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device if (NEO::DebugManager.flags.EnableFlushTaskSubmission.get() != -1) { commandList->isFlushTaskSubmissionEnabled = !!NEO::DebugManager.flags.EnableFlushTaskSubmission.get(); } + commandList->immediateCmdListHeapSharing = L0HwHelper::enableImmediateCmdListHeapSharing(commandList->isFlushTaskSubmissionEnabled); } returnValue = commandList->initialize(device, engineGroupType, desc->flags); if (returnValue != ZE_RESULT_SUCCESS) { @@ -151,7 +153,6 @@ CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device } commandList->cmdQImmediate = commandQueue; - commandList->csr = csr; commandList->isTbxMode = (csr->getType() == NEO::CommandStreamReceiverType::CSR_TBX) || (csr->getType() == NEO::CommandStreamReceiverType::CSR_TBX_WITH_AUB); commandList->commandListPreemptionMode = device->getDevicePreemptionMode(); diff --git a/level_zero/core/source/device/device_imp.cpp b/level_zero/core/source/device/device_imp.cpp index c7ba0bf5af..5935b9d550 100644 --- a/level_zero/core/source/device/device_imp.cpp +++ b/level_zero/core/source/device/device_imp.cpp @@ -1092,7 +1092,9 @@ Device *Device::create(DriverHandle *driverHandle, NEO::Device *neoDevice, bool device->getSourceLevelDebugger() ->notifyNewDevice(osInterface ? osInterface->getDriverModel()->getDeviceHandle() : 0); } - device->createSysmanHandle(isSubDevice); + if (device->getNEODevice()->getAllEngines()[0].commandStreamReceiver->getType() == NEO::CommandStreamReceiverType::CSR_HW) { + device->createSysmanHandle(isSubDevice); + } device->resourcesReleased = false; device->populateSubDeviceCopyEngineGroups(); diff --git a/level_zero/core/source/hw_helpers/l0_hw_helper.cpp b/level_zero/core/source/hw_helpers/l0_hw_helper.cpp index 2fd34c64ad..d5fb5da7fe 100644 --- a/level_zero/core/source/hw_helpers/l0_hw_helper.cpp +++ b/level_zero/core/source/hw_helpers/l0_hw_helper.cpp @@ -39,4 +39,12 @@ bool L0HwHelper::enableStateComputeModeTracking() { return defaultValue; } +bool L0HwHelper::enableImmediateCmdListHeapSharing(bool cmdlistSupport) { + bool enabled = false; + if (NEO::DebugManager.flags.EnableImmediateCmdListHeapSharing.get() != -1) { + return !!NEO::DebugManager.flags.EnableImmediateCmdListHeapSharing.get(); + } + return enabled; +} + } // namespace L0 diff --git a/level_zero/core/source/hw_helpers/l0_hw_helper.h b/level_zero/core/source/hw_helpers/l0_hw_helper.h index a85529cf67..b851f46bf6 100644 --- a/level_zero/core/source/hw_helpers/l0_hw_helper.h +++ b/level_zero/core/source/hw_helpers/l0_hw_helper.h @@ -33,6 +33,7 @@ class L0HwHelper { static bool enableFrontEndStateTracking(); static bool enablePipelineSelectStateTracking(); static bool enableStateComputeModeTracking(); + static bool enableImmediateCmdListHeapSharing(bool cmdlistSupport); virtual void setAdditionalGroupProperty(ze_command_queue_group_properties_t &groupProperty, NEO::EngineGroupT &group) const = 0; virtual L0::Event *createEvent(L0::EventPool *eventPool, const ze_event_desc_t *desc, L0::Device *device) const = 0; diff --git a/level_zero/core/test/black_box_tests/zello_commandlist_immediate.cpp b/level_zero/core/test/black_box_tests/zello_commandlist_immediate.cpp index 209762d6d7..4b87c1109b 100644 --- a/level_zero/core/test/black_box_tests/zello_commandlist_immediate.cpp +++ b/level_zero/core/test/black_box_tests/zello_commandlist_immediate.cpp @@ -385,6 +385,11 @@ int main(int argc, char *argv[]) { verbose = isVerbose(argc, argv); bool useSyncQueue = isSyncQueueEnabled(argc, argv); bool commandListShared = isCommandListShared(argc, argv); + bool commandListCoexist = isParamEnabled(argc, argv, "-o", "--coexists"); + if (commandListCoexist) { + std::cerr << "Command List coexists between tests" << std::endl; + commandListShared = false; + } bool aubMode = isAubMode(argc, argv); ze_context_handle_t context = nullptr; @@ -410,18 +415,43 @@ int main(int argc, char *argv[]) { SUCCESS_OR_TERMINATE(zeCommandListCreateImmediate(context, device0, &cmdQueueDesc, &cmdList)); } + ze_command_list_handle_t cmdListStandardMemoryCopy = nullptr; + ze_command_list_handle_t cmdListMemoryCopyRegion = nullptr; + ze_command_list_handle_t cmdListLaunchGpuKernel = nullptr; + if (commandListCoexist) { + ze_command_queue_desc_t cmdQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC}; + cmdQueueDesc.pNext = nullptr; + cmdQueueDesc.flags = 0; + cmdQueueDesc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL; + cmdQueueDesc.ordinal = getCommandQueueOrdinal(device0); + cmdQueueDesc.index = 0; + selectQueueMode(cmdQueueDesc, useSyncQueue); + + SUCCESS_OR_TERMINATE(zeCommandListCreateImmediate(context, device0, &cmdQueueDesc, &cmdListStandardMemoryCopy)); + SUCCESS_OR_TERMINATE(zeCommandListCreateImmediate(context, device0, &cmdQueueDesc, &cmdListMemoryCopyRegion)); + SUCCESS_OR_TERMINATE(zeCommandListCreateImmediate(context, device0, &cmdQueueDesc, &cmdListLaunchGpuKernel)); + + cmdList = cmdListStandardMemoryCopy; + } + std::string currentTest; currentTest = "Standard Memory Copy"; testAppendMemoryCopy(context, device0, useSyncQueue, outputValidationSuccessful, cmdList); printResult(aubMode, outputValidationSuccessful, blackBoxName, currentTest); if (outputValidationSuccessful || aubMode) { + if (commandListCoexist) { + cmdList = cmdListMemoryCopyRegion; + } currentTest = "Memory Copy Region"; testAppendMemoryCopyRegion(context, device0, useSyncQueue, outputValidationSuccessful, cmdList); printResult(aubMode, outputValidationSuccessful, blackBoxName, currentTest); } if (outputValidationSuccessful || aubMode) { + if (commandListCoexist) { + cmdList = cmdListLaunchGpuKernel; + } currentTest = "Launch GPU Kernel"; testAppendGpuKernel(context, device0, useSyncQueue, outputValidationSuccessful, cmdList); printResult(aubMode, outputValidationSuccessful, blackBoxName, currentTest); @@ -430,6 +460,11 @@ int main(int argc, char *argv[]) { if (commandListShared) { SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); } + if (commandListCoexist) { + SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdListStandardMemoryCopy)); + SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdListMemoryCopyRegion)); + SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdListLaunchGpuKernel)); + } SUCCESS_OR_TERMINATE(zeContextDestroy(context)); diff --git a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.cpp b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.cpp index 127e807d6c..2cf6e234e3 100644 --- a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.cpp +++ b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.cpp @@ -94,7 +94,7 @@ void ModuleMutableCommandListFixture::setUp(uint32_t revision) { false, returnValue)); - NEO::EngineGroupType engineGroupType = NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily).getEngineGroupType(neoDevice->getDefaultEngine().getEngineType(), neoDevice->getDefaultEngine().getEngineUsage(), device->getHwInfo()); + engineGroupType = NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily).getEngineGroupType(neoDevice->getDefaultEngine().getEngineType(), neoDevice->getDefaultEngine().getEngineUsage(), device->getHwInfo()); commandList.reset(whiteboxCast(CommandList::create(productFamily, device, engineGroupType, 0u, returnValue))); commandListImmediate.reset(whiteboxCast(CommandList::createImmediate(productFamily, device, &queueDesc, false, engineGroupType, returnValue))); @@ -131,5 +131,11 @@ void CmdListStateComputeModeStateFixture::setUp() { ModuleMutableCommandListFixture::setUp(); } +void ImmediateCmdListSharedHeapsFixture::setUp() { + DebugManager.flags.EnableFlushTaskSubmission.set(1); + DebugManager.flags.EnableImmediateCmdListHeapSharing.set(1); + ModuleMutableCommandListFixture::setUp(); +} + } // namespace ult } // namespace L0 diff --git a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h index c5a23a913b..54bd9abe19 100644 --- a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h +++ b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h @@ -78,6 +78,7 @@ struct ModuleMutableCommandListFixture : public ModuleImmutableDataFixture { std::unique_ptr commandListImmediate; std::unique_ptr kernel; L0::ult::CommandQueue *commandQueue; + NEO::EngineGroupType engineGroupType; }; struct MultiReturnCommandListFixture : public ModuleMutableCommandListFixture { @@ -117,5 +118,11 @@ struct CmdListLargeGrfFixture : public CmdListStateComputeModeStateFixture { void testBody(); }; +struct ImmediateCmdListSharedHeapsFixture : public ModuleMutableCommandListFixture { + void setUp(); + + DebugManagerStateRestore restorer; +}; + } // namespace ult } // namespace L0 diff --git a/level_zero/core/test/unit_tests/fixtures/module_fixture.h b/level_zero/core/test/unit_tests/fixtures/module_fixture.h index 5f37e6e227..6b56ddf8d0 100644 --- a/level_zero/core/test/unit_tests/fixtures/module_fixture.h +++ b/level_zero/core/test/unit_tests/fixtures/module_fixture.h @@ -126,11 +126,15 @@ struct ModuleImmutableDataFixture : public DeviceFixture { public: using KernelImp::crossThreadData; using KernelImp::crossThreadDataSize; + using KernelImp::dynamicStateHeapData; + using KernelImp::dynamicStateHeapDataSize; using KernelImp::kernelArgHandlers; using KernelImp::kernelHasIndirectAccess; using KernelImp::kernelRequiresGenerationOfLocalIdsByRuntime; using KernelImp::privateMemoryGraphicsAllocation; using KernelImp::requiredWorkgroupOrder; + using KernelImp::surfaceStateHeapData; + using KernelImp::surfaceStateHeapDataSize; MockKernel(MockModule *mockModule) : WhiteBox(mockModule) { } diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h index 4a1124079d..c642f5eca5 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h @@ -59,6 +59,7 @@ struct WhiteBox<::L0::CommandListCoreFamily> using BaseClass::getAllocationFromHostPtrMap; using BaseClass::getHostPtrAlloc; using BaseClass::hostPtrMap; + using BaseClass::immediateCmdListHeapSharing; using BaseClass::indirectAllocationsAllowed; using BaseClass::initialize; using BaseClass::partitionCount; @@ -123,6 +124,7 @@ struct WhiteBox> using BaseClass::csr; using BaseClass::finalStreamState; using BaseClass::frontEndStateTracking; + using BaseClass::immediateCmdListHeapSharing; using BaseClass::isFlushTaskSubmissionEnabled; using BaseClass::partitionCount; using BaseClass::pipelineSelectStateTracking; @@ -134,9 +136,11 @@ struct WhiteBox> template struct MockCommandListImmediate : public CommandListCoreFamilyImmediate { - using CommandListCoreFamilyImmediate::requiredStreamState; - using CommandListCoreFamilyImmediate::containsAnyKernel; - using CommandListCoreFamilyImmediate::indirectAllocationsAllowed; + using BaseClass = CommandListCoreFamilyImmediate; + using BaseClass::containsAnyKernel; + using BaseClass::immediateCmdListHeapSharing; + using BaseClass::indirectAllocationsAllowed; + using BaseClass::requiredStreamState; }; template <> @@ -148,6 +152,7 @@ struct WhiteBox<::L0::CommandList> : public ::L0::CommandListImp { using BaseClass::commandListPreemptionMode; using BaseClass::csr; using BaseClass::frontEndStateTracking; + using BaseClass::immediateCmdListHeapSharing; using BaseClass::initialize; using BaseClass::isFlushTaskSubmissionEnabled; using BaseClass::nonImmediateLogicalStateHelper; diff --git a/level_zero/core/test/unit_tests/mocks/mock_kernel.h b/level_zero/core/test/unit_tests/mocks/mock_kernel.h index f8c1f52c99..cd466ebf24 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_kernel.h +++ b/level_zero/core/test/unit_tests/mocks/mock_kernel.h @@ -45,6 +45,8 @@ struct WhiteBox<::L0::Kernel> : public ::L0::KernelImp { using ::L0::KernelImp::createPrintfBuffer; using ::L0::KernelImp::crossThreadData; using ::L0::KernelImp::crossThreadDataSize; + using ::L0::KernelImp::dynamicStateHeapData; + using ::L0::KernelImp::dynamicStateHeapDataSize; using ::L0::KernelImp::groupSize; using ::L0::KernelImp::kernelImmData; using ::L0::KernelImp::kernelRequiresGenerationOfLocalIdsByRuntime; @@ -76,6 +78,8 @@ struct WhiteBoxKernelHw : public KernelHw { using ::L0::KernelImp::createPrintfBuffer; using ::L0::KernelImp::crossThreadData; using ::L0::KernelImp::crossThreadDataSize; + using ::L0::KernelImp::dynamicStateHeapData; + using ::L0::KernelImp::dynamicStateHeapDataSize; using ::L0::KernelImp::groupSize; using ::L0::KernelImp::kernelImmData; using ::L0::KernelImp::kernelRequiresGenerationOfLocalIdsByRuntime; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp index 56786a1557..d73fc05b21 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp @@ -5,6 +5,10 @@ * */ +#include "shared/source/command_container/command_encoder.h" +#include "shared/source/kernel/kernel_descriptor.h" +#include "shared/test/common/helpers/unit_test_helper.h" +#include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/mocks/mock_command_stream_receiver.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/test_macros/hw_test.h" @@ -716,5 +720,165 @@ HWTEST2_F(CommandListTest, givenCmdListWithNoIndirectAccessWhenExecutingCommandL commandList->cmdQImmediate = oldCommandQueue; } +using ImmediateCmdListSharedHeapsTest = Test; +HWTEST2_F(ImmediateCmdListSharedHeapsTest, givenMultipleCommandListsUsingSharedHeapsWhenDispatchingKernelThenExpectSingleSbaCommandAndHeapsReused, IsAtLeastSkl) { + using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; + using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; + using SAMPLER_STATE = typename FamilyType::SAMPLER_STATE; + using SAMPLER_BORDER_COLOR_STATE = typename FamilyType::SAMPLER_BORDER_COLOR_STATE; + auto &hwInfo = device->getHwInfo(); + + uint32_t expectedSbaCount = 1; + auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily); + if (hwInfoConfig.isAdditionalStateBaseAddressWARequired(hwInfo)) { + expectedSbaCount++; + } + + bool dshPresent = hwInfo.capabilityTable.supportsImages || NEO::UnitTestHelper::getAdditionalDshSize() > 0; + + if (dshPresent) { + mockKernelImmData->kernelInfo->kernelDescriptor.payloadMappings.samplerTable.numSamplers = 2; + mockKernelImmData->kernelInfo->kernelDescriptor.payloadMappings.samplerTable.tableOffset = sizeof(SAMPLER_BORDER_COLOR_STATE); + mockKernelImmData->kernelInfo->kernelDescriptor.payloadMappings.samplerTable.borderColor = 0; + + kernel->dynamicStateHeapDataSize = static_cast(sizeof(SAMPLER_STATE) * 2 + mockKernelImmData->kernelInfo->kernelDescriptor.payloadMappings.samplerTable.tableOffset); + kernel->dynamicStateHeapData.reset(new uint8_t[kernel->dynamicStateHeapDataSize]); + + mockKernelImmData->mockKernelDescriptor->payloadMappings.samplerTable = mockKernelImmData->kernelInfo->kernelDescriptor.payloadMappings.samplerTable; + } + + mockKernelImmData->kernelInfo->heapInfo.SurfaceStateHeapSize = static_cast(sizeof(RENDER_SURFACE_STATE) + sizeof(uint32_t)); + mockKernelImmData->mockKernelDescriptor->payloadMappings.bindingTable.numEntries = 1; + mockKernelImmData->mockKernelDescriptor->payloadMappings.bindingTable.tableOffset = 0x40; + mockKernelImmData->mockKernelDescriptor->kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindfulAndStateless; + + kernel->surfaceStateHeapDataSize = mockKernelImmData->kernelInfo->heapInfo.SurfaceStateHeapSize; + kernel->surfaceStateHeapData.reset(new uint8_t[kernel->surfaceStateHeapDataSize]); + + EXPECT_TRUE(commandListImmediate->isFlushTaskSubmissionEnabled); + EXPECT_TRUE(commandListImmediate->immediateCmdListHeapSharing); + + auto &cmdContainer = commandListImmediate->commandContainer; + EXPECT_EQ(1u, cmdContainer.getNumIddPerBlock()); + EXPECT_TRUE(cmdContainer.immediateCmdListSharedHeap(HeapType::DYNAMIC_STATE)); + EXPECT_TRUE(cmdContainer.immediateCmdListSharedHeap(HeapType::SURFACE_STATE)); + + auto &ultCsr = neoDevice->getUltCommandStreamReceiver(); + auto &csrStream = ultCsr.commandStream; + + const ze_group_count_t groupCount{1, 1, 1}; + CmdListKernelLaunchParams launchParams = {}; + auto result = ZE_RESULT_SUCCESS; + + auto csrDshHeap = &ultCsr.getIndirectHeap(HeapType::DYNAMIC_STATE, MemoryConstants::pageSize64k); + auto csrSshHeap = &ultCsr.getIndirectHeap(HeapType::SURFACE_STATE, MemoryConstants::pageSize64k); + + size_t dshUsed = csrDshHeap->getUsed(); + size_t sshUsed = csrSshHeap->getUsed(); + + size_t csrUsedBefore = csrStream.getUsed(); + result = commandListImmediate->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + size_t csrUsedAfter = csrStream.getUsed(); + + NEO::IndirectHeap *containerDshHeap = cmdContainer.getIndirectHeap(HeapType::DYNAMIC_STATE); + NEO::IndirectHeap *containerSshHeap = cmdContainer.getIndirectHeap(HeapType::SURFACE_STATE); + + if (dshPresent) { + EXPECT_EQ(csrDshHeap, containerDshHeap); + } else { + EXPECT_EQ(nullptr, containerDshHeap); + } + EXPECT_EQ(csrSshHeap, containerSshHeap); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, + ptrOffset(csrStream.getCpuBase(), csrUsedBefore), + (csrUsedAfter - csrUsedBefore))); + auto sbaCmds = findAll(cmdList.begin(), cmdList.end()); + ASSERT_EQ(expectedSbaCount, sbaCmds.size()); + + auto &sbaCmd = *genCmdCast(*sbaCmds[0]); + if (dshPresent) { + EXPECT_TRUE(sbaCmd.getDynamicStateBaseAddressModifyEnable()); + EXPECT_EQ(csrDshHeap->getHeapGpuBase(), sbaCmd.getDynamicStateBaseAddress()); + } else { + EXPECT_FALSE(sbaCmd.getDynamicStateBaseAddressModifyEnable()); + EXPECT_EQ(0u, sbaCmd.getDynamicStateBaseAddress()); + } + EXPECT_TRUE(sbaCmd.getSurfaceStateBaseAddressModifyEnable()); + EXPECT_EQ(csrSshHeap->getHeapGpuBase(), sbaCmd.getSurfaceStateBaseAddress()); + + dshUsed = csrDshHeap->getUsed() - dshUsed; + sshUsed = csrSshHeap->getUsed() - sshUsed; + if (dshPresent) { + EXPECT_LT(0u, dshUsed); + } else { + EXPECT_EQ(0u, dshUsed); + } + EXPECT_LT(0u, sshUsed); + + size_t dshEstimated = NEO::EncodeDispatchKernel::getSizeRequiredDsh(*kernel->getImmutableData()->getKernelInfo()); + size_t sshEstimated = NEO::EncodeDispatchKernel::getSizeRequiredSsh(*kernel->getImmutableData()->getKernelInfo()); + + EXPECT_GE(dshEstimated, dshUsed); + EXPECT_GE(sshEstimated, sshUsed); + + ze_command_queue_desc_t queueDesc{}; + queueDesc.ordinal = 0u; + queueDesc.index = 0u; + queueDesc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL; + + std::unique_ptr commandListImmediateCoexisting; + commandListImmediateCoexisting.reset(whiteboxCast(CommandList::createImmediate(productFamily, device, &queueDesc, false, engineGroupType, result))); + + auto &cmdContainerCoexisting = commandListImmediateCoexisting->commandContainer; + EXPECT_EQ(1u, cmdContainerCoexisting.getNumIddPerBlock()); + EXPECT_TRUE(cmdContainerCoexisting.immediateCmdListSharedHeap(HeapType::DYNAMIC_STATE)); + EXPECT_TRUE(cmdContainerCoexisting.immediateCmdListSharedHeap(HeapType::SURFACE_STATE)); + + dshUsed = csrDshHeap->getUsed(); + sshUsed = csrSshHeap->getUsed(); + + csrUsedBefore = csrStream.getUsed(); + result = commandListImmediateCoexisting->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + csrUsedAfter = csrStream.getUsed(); + + auto containerDshHeapCoexisting = cmdContainerCoexisting.getIndirectHeap(HeapType::DYNAMIC_STATE); + auto containerSshHeapCoexisting = cmdContainerCoexisting.getIndirectHeap(HeapType::SURFACE_STATE); + + if (dshPresent) { + EXPECT_EQ(csrDshHeap, containerDshHeapCoexisting); + } else { + EXPECT_EQ(nullptr, containerDshHeapCoexisting); + } + EXPECT_EQ(csrSshHeap, containerSshHeapCoexisting); + + cmdList.clear(); + sbaCmds.clear(); + + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, + ptrOffset(csrStream.getCpuBase(), csrUsedBefore), + (csrUsedAfter - csrUsedBefore))); + sbaCmds = findAll(cmdList.begin(), cmdList.end()); + EXPECT_EQ(0u, sbaCmds.size()); + + dshUsed = csrDshHeap->getUsed() - dshUsed; + sshUsed = csrSshHeap->getUsed() - sshUsed; + + if (dshPresent) { + EXPECT_LT(0u, dshUsed); + } else { + EXPECT_EQ(0u, dshUsed); + } + EXPECT_LT(0u, sshUsed); + + EXPECT_GE(dshEstimated, dshUsed); + EXPECT_GE(sshEstimated, sshUsed); +} + } // namespace ult } // namespace L0 diff --git a/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_1.cpp b/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_1.cpp index 117e67b8a0..64c118b36e 100644 --- a/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_1.cpp +++ b/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_1.cpp @@ -15,6 +15,7 @@ #include "level_zero/core/source/cmdlist/cmdlist.h" #include "level_zero/core/source/event/event.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" +#include "level_zero/core/test/unit_tests/fixtures/module_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h" #include "level_zero/core/test/unit_tests/mocks/mock_kernel.h" #include "level_zero/core/test/unit_tests/sources/debugger/l0_debugger_fixture.h" @@ -569,5 +570,64 @@ HWTEST2_F(L0DebuggerTest, givenXeHpOrXeHpgCoreAndDebugIsActiveThenDisableL3Cache INSTANTIATE_TEST_CASE_P(SBAModesForDebugger, L0DebuggerParameterizedTests, ::testing::Values(0, 1)); +struct MockKernelImmutableData : public KernelImmutableData { + using KernelImmutableData::isaGraphicsAllocation; + using KernelImmutableData::kernelDescriptor; + using KernelImmutableData::kernelInfo; + + MockKernelImmutableData(L0::Device *device) : KernelImmutableData(device) {} +}; + +HWTEST2_F(L0DebuggerTest, givenFlushTaskSubmissionAndSharedHeapsEnabledWhenAppendingKernelUsingNewHeapThenDebugSurfaceIsProgrammedOnce, IsAtLeastGen12lp) { + using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; + + DebugManagerStateRestore restorer; + NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true); + NEO::DebugManager.flags.EnableImmediateCmdListHeapSharing.set(1); + + ze_command_queue_desc_t queueDesc = {}; + ze_result_t returnValue = ZE_RESULT_SUCCESS; + auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::RenderCompute, returnValue); + EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); + + EXPECT_TRUE(commandList->isFlushTaskSubmissionEnabled); + EXPECT_TRUE(commandList->immediateCmdListHeapSharing); + + auto kernelInfo = std::make_unique(); + auto kernelDescriptor = std::make_unique(); + auto kernelImmData = std::make_unique(device); + + kernelImmData->kernelInfo = kernelInfo.get(); + kernelImmData->kernelDescriptor = kernelDescriptor.get(); + kernelImmData->isaGraphicsAllocation.reset(new MockGraphicsAllocation()); + + Mock<::L0::Kernel> kernel; + kernel.kernelImmData = kernelImmData.get(); + + CmdListKernelLaunchParams launchParams = {}; + ze_group_count_t groupCount{1, 1, 1}; + returnValue = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams); + EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); + + auto csrHeap = &commandList->csr->getIndirectHeap(NEO::HeapType::SURFACE_STATE, 0); + ASSERT_NE(nullptr, csrHeap); + + auto debugSurfaceState = reinterpret_cast(csrHeap->getCpuBase()); + ASSERT_NE(debugSurfaceState, nullptr); + auto debugSurface = static_cast<::L0::DeviceImp *>(device)->getDebugSurface(); + ASSERT_NE(debugSurface, nullptr); + ASSERT_EQ(debugSurface->getGpuAddress(), debugSurfaceState->getSurfaceBaseAddress()); + + memset(debugSurfaceState, 0, sizeof(*debugSurfaceState)); + + returnValue = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams); + EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); + + ASSERT_EQ(0u, debugSurfaceState->getSurfaceBaseAddress()); + + kernelImmData->isaGraphicsAllocation.reset(nullptr); + commandList->destroy(); +} + } // namespace ult } // namespace L0 diff --git a/shared/source/command_container/cmdcontainer.cpp b/shared/source/command_container/cmdcontainer.cpp index 3f3538146b..85a9b05280 100644 --- a/shared/source/command_container/cmdcontainer.cpp +++ b/shared/source/command_container/cmdcontainer.cpp @@ -96,6 +96,9 @@ CommandContainer::ErrorCode CommandContainer::initialize(Device *device, Allocat if (!hardwareInfo.capabilityTable.supportsImages && IndirectHeap::Type::DYNAMIC_STATE == i) { continue; } + if (immediateCmdListSharedHeap(static_cast(i))) { + continue; + } allocationIndirectHeaps[i] = heapHelper->getHeapAllocation(i, heapSize, alignedSize, @@ -185,32 +188,40 @@ void *CommandContainer::getHeapSpaceAllowGrow(HeapType heapType, size_t size) { auto indirectHeap = getIndirectHeap(heapType); - if (indirectHeap->getAvailableSpace() < size) { - size_t newSize = indirectHeap->getUsed() + indirectHeap->getAvailableSpace(); - newSize *= 2; - newSize = std::max(newSize, indirectHeap->getAvailableSpace() + size); - newSize = alignUp(newSize, MemoryConstants::pageSize); - auto oldAlloc = getIndirectHeapAllocation(heapType); - auto newAlloc = getHeapHelper()->getHeapAllocation(heapType, newSize, MemoryConstants::pageSize, device->getRootDeviceIndex()); - UNRECOVERABLE_IF(!oldAlloc); - UNRECOVERABLE_IF(!newAlloc); - auto oldBase = indirectHeap->getHeapGpuBase(); - indirectHeap->replaceGraphicsAllocation(newAlloc); - indirectHeap->replaceBuffer(newAlloc->getUnderlyingBuffer(), - newAlloc->getUnderlyingBufferSize()); - auto newBase = indirectHeap->getHeapGpuBase(); - getResidencyContainer().push_back(newAlloc); - getDeallocationContainer().push_back(oldAlloc); - setIndirectHeapAllocation(heapType, newAlloc); - if (oldBase != newBase) { - setHeapDirty(heapType); + if (immediateCmdListSharedHeap(heapType)) { + UNRECOVERABLE_IF(indirectHeap == nullptr); + UNRECOVERABLE_IF(indirectHeap->getAvailableSpace() < size); + getResidencyContainer().push_back(indirectHeap->getGraphicsAllocation()); + } else { + if (indirectHeap->getAvailableSpace() < size) { + size_t newSize = indirectHeap->getUsed() + indirectHeap->getAvailableSpace(); + newSize *= 2; + newSize = std::max(newSize, indirectHeap->getAvailableSpace() + size); + newSize = alignUp(newSize, MemoryConstants::pageSize); + auto oldAlloc = getIndirectHeapAllocation(heapType); + auto newAlloc = getHeapHelper()->getHeapAllocation(heapType, newSize, MemoryConstants::pageSize, device->getRootDeviceIndex()); + UNRECOVERABLE_IF(!oldAlloc); + UNRECOVERABLE_IF(!newAlloc); + auto oldBase = indirectHeap->getHeapGpuBase(); + indirectHeap->replaceGraphicsAllocation(newAlloc); + indirectHeap->replaceBuffer(newAlloc->getUnderlyingBuffer(), + newAlloc->getUnderlyingBufferSize()); + auto newBase = indirectHeap->getHeapGpuBase(); + getResidencyContainer().push_back(newAlloc); + getDeallocationContainer().push_back(oldAlloc); + setIndirectHeapAllocation(heapType, newAlloc); + if (oldBase != newBase) { + setHeapDirty(heapType); + } } } + return indirectHeap->getSpace(size); } IndirectHeap *CommandContainer::getHeapWithRequiredSizeAndAlignment(HeapType heapType, size_t sizeRequired, size_t alignment) { auto indirectHeap = getIndirectHeap(heapType); + UNRECOVERABLE_IF(indirectHeap == nullptr); auto sizeRequested = sizeRequired; auto heapBuffer = indirectHeap->getSpace(0); @@ -218,27 +229,32 @@ IndirectHeap *CommandContainer::getHeapWithRequiredSizeAndAlignment(HeapType hea sizeRequested += alignment; } - if (indirectHeap->getAvailableSpace() < sizeRequested) { - size_t newSize = indirectHeap->getUsed() + indirectHeap->getAvailableSpace(); - newSize = alignUp(newSize, MemoryConstants::pageSize); - auto oldAlloc = getIndirectHeapAllocation(heapType); - auto newAlloc = getHeapHelper()->getHeapAllocation(heapType, newSize, MemoryConstants::pageSize, device->getRootDeviceIndex()); - UNRECOVERABLE_IF(!oldAlloc); - UNRECOVERABLE_IF(!newAlloc); - auto oldBase = indirectHeap->getHeapGpuBase(); - indirectHeap->replaceGraphicsAllocation(newAlloc); - indirectHeap->replaceBuffer(newAlloc->getUnderlyingBuffer(), - newAlloc->getUnderlyingBufferSize()); - auto newBase = indirectHeap->getHeapGpuBase(); - getResidencyContainer().push_back(newAlloc); - getDeallocationContainer().push_back(oldAlloc); - setIndirectHeapAllocation(heapType, newAlloc); - if (oldBase != newBase) { - setHeapDirty(heapType); - } - if (heapType == HeapType::SURFACE_STATE) { - indirectHeap->getSpace(reservedSshSize); - sshAllocations.push_back(oldAlloc); + if (immediateCmdListSharedHeap(heapType)) { + UNRECOVERABLE_IF(indirectHeap->getAvailableSpace() < sizeRequested); + getResidencyContainer().push_back(indirectHeap->getGraphicsAllocation()); + } else { + if (indirectHeap->getAvailableSpace() < sizeRequested) { + size_t newSize = indirectHeap->getUsed() + indirectHeap->getAvailableSpace(); + newSize = alignUp(newSize, MemoryConstants::pageSize); + auto oldAlloc = getIndirectHeapAllocation(heapType); + auto newAlloc = getHeapHelper()->getHeapAllocation(heapType, newSize, MemoryConstants::pageSize, device->getRootDeviceIndex()); + UNRECOVERABLE_IF(!oldAlloc); + UNRECOVERABLE_IF(!newAlloc); + auto oldBase = indirectHeap->getHeapGpuBase(); + indirectHeap->replaceGraphicsAllocation(newAlloc); + indirectHeap->replaceBuffer(newAlloc->getUnderlyingBuffer(), + newAlloc->getUnderlyingBufferSize()); + auto newBase = indirectHeap->getHeapGpuBase(); + getResidencyContainer().push_back(newAlloc); + getDeallocationContainer().push_back(oldAlloc); + setIndirectHeapAllocation(heapType, newAlloc); + if (oldBase != newBase) { + setHeapDirty(heapType); + } + if (heapType == HeapType::SURFACE_STATE) { + indirectHeap->getSpace(reservedSshSize); + sshAllocations.push_back(oldAlloc); + } } } @@ -329,7 +345,19 @@ void CommandContainer::prepareBindfulSsh() { } IndirectHeap *CommandContainer::getIndirectHeap(HeapType heapType) { - return indirectHeaps[heapType].get(); + if (immediateCmdListSharedHeap(heapType)) { + return heapType == HeapType::SURFACE_STATE ? sharedSshCsrHeap : sharedDshCsrHeap; + } else { + return indirectHeaps[heapType].get(); + } +} + +void CommandContainer::ensureHeapSizePrepared(size_t sshRequiredSize, size_t dshRequiredSize) { + sharedSshCsrHeap = &immediateCmdListCsr->getIndirectHeap(HeapType::SURFACE_STATE, sshRequiredSize); + + if (dshRequiredSize > 0) { + sharedDshCsrHeap = &immediateCmdListCsr->getIndirectHeap(HeapType::DYNAMIC_STATE, dshRequiredSize); + } } } // namespace NEO diff --git a/shared/source/command_container/cmdcontainer.h b/shared/source/command_container/cmdcontainer.h index d88ccd45ba..ac83343f6c 100644 --- a/shared/source/command_container/cmdcontainer.h +++ b/shared/source/command_container/cmdcontainer.h @@ -17,6 +17,7 @@ #include namespace NEO { +class CommandStreamReceiver; class Device; class GraphicsAllocation; class LinearStream; @@ -94,13 +95,20 @@ class CommandContainer : public NonCopyableOrMovableClass { void setIddBlock(void *iddBlock) { this->iddBlock = iddBlock; } void *getIddBlock() { return iddBlock; } uint32_t getNumIddPerBlock() const { return numIddsPerBlock; } + void setNumIddPerBlock(uint32_t value) { numIddsPerBlock = value; } void setReservedSshSize(size_t reserveSize) { reservedSshSize = reserveSize; } bool getFlushTaskUsedForImmediate() const { return isFlushTaskUsedForImmediate; } void setFlushTaskUsedForImmediate(bool flushTaskUsedForImmediate) { isFlushTaskUsedForImmediate = flushTaskUsedForImmediate; } - + void setImmediateCmdListCsr(CommandStreamReceiver *newValue) { + this->immediateCmdListCsr = newValue; + } + bool immediateCmdListSharedHeap(HeapType heapType) { + return (this->immediateCmdListCsr != nullptr && (heapType == HeapType::DYNAMIC_STATE || heapType == HeapType::SURFACE_STATE)); + } + void ensureHeapSizePrepared(size_t sshRequiredSize, size_t dshRequiredSize); HeapContainer sshAllocations; uint64_t currentLinearStreamStartOffset = 0u; uint32_t slmSize = std::numeric_limits::max(); @@ -129,6 +137,9 @@ class CommandContainer : public NonCopyableOrMovableClass { Device *device = nullptr; AllocationsList *reusableAllocationList = nullptr; size_t reservedSshSize = 0; + CommandStreamReceiver *immediateCmdListCsr = nullptr; + IndirectHeap *sharedSshCsrHeap = nullptr; + IndirectHeap *sharedDshCsrHeap = nullptr; uint32_t dirtyHeaps = std::numeric_limits::max(); uint32_t numIddsPerBlock = 64; diff --git a/shared/source/command_container/command_encoder.h b/shared/source/command_container/command_encoder.h index 26c1a1d28c..dd2769350a 100644 --- a/shared/source/command_container/command_encoder.h +++ b/shared/source/command_container/command_encoder.h @@ -30,6 +30,7 @@ class IndirectHeap; class LogicalStateHelper; class Gmm; struct HardwareInfo; +struct KernelInfo; struct StateComputeModeProperties; struct EncodeDispatchKernelArgs { @@ -112,6 +113,10 @@ struct EncodeDispatchKernel { static void adjustWalkOrder(WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const HardwareInfo &hwInfo); static constexpr bool shouldUpdateGlobalAtomics(bool ¤tVal, bool refVal, bool updateCurrent); + + static size_t getSizeRequiredDsh(const KernelInfo &kernelInfo); + static size_t getSizeRequiredSsh(const KernelInfo &kernelInfo); + inline static uint32_t additionalSizeRequiredDsh(); }; template @@ -121,8 +126,8 @@ struct EncodeStates { using SAMPLER_STATE = typename GfxFamily::SAMPLER_STATE; using SAMPLER_BORDER_COLOR_STATE = typename GfxFamily::SAMPLER_BORDER_COLOR_STATE; - static const uint32_t alignIndirectStatePointer = MemoryConstants::cacheLineSize; - static const size_t alignInterfaceDescriptorData = MemoryConstants::cacheLineSize; + static constexpr uint32_t alignIndirectStatePointer = MemoryConstants::cacheLineSize; + static constexpr size_t alignInterfaceDescriptorData = MemoryConstants::cacheLineSize; static uint32_t copySamplerState(IndirectHeap *dsh, uint32_t samplerStateOffset, diff --git a/shared/source/command_container/command_encoder.inl b/shared/source/command_container/command_encoder.inl index 0664c10444..397f19c0d3 100644 --- a/shared/source/command_container/command_encoder.inl +++ b/shared/source/command_container/command_encoder.inl @@ -27,6 +27,7 @@ #include "shared/source/kernel/implicit_args.h" #include "shared/source/kernel/kernel_descriptor.h" #include "shared/source/os_interface/hw_info_config.h" +#include "shared/source/program/kernel_info.h" #include "encode_surface_state.inl" @@ -698,6 +699,39 @@ void EncodeDispatchKernel::adjustInterfaceDescriptorData(INTERFACE_DESCR template constexpr bool EncodeDispatchKernel::shouldUpdateGlobalAtomics(bool ¤tVal, bool refVal, bool updateCurrent) { return false; } +template +size_t EncodeDispatchKernel::getSizeRequiredDsh(const KernelInfo &kernelInfo) { + using INTERFACE_DESCRIPTOR_DATA = typename Family::INTERFACE_DESCRIPTOR_DATA; + constexpr auto samplerStateSize = sizeof(typename Family::SAMPLER_STATE); + const auto numSamplers = kernelInfo.kernelDescriptor.payloadMappings.samplerTable.numSamplers; + const auto additionalDshSize = additionalSizeRequiredDsh(); + if (numSamplers == 0U) { + return alignUp(additionalDshSize, EncodeStates::alignInterfaceDescriptorData); + } + + size_t size = kernelInfo.kernelDescriptor.payloadMappings.samplerTable.tableOffset - + kernelInfo.kernelDescriptor.payloadMappings.samplerTable.borderColor; + size = alignUp(size, EncodeStates::alignIndirectStatePointer); + + size += numSamplers * samplerStateSize; + size = alignUp(size, INTERFACE_DESCRIPTOR_DATA::SAMPLERSTATEPOINTER_ALIGN_SIZE); + + if (additionalDshSize > 0) { + size += additionalDshSize; + size = alignUp(size, EncodeStates::alignInterfaceDescriptorData); + } + + return size; +} + +template +size_t EncodeDispatchKernel::getSizeRequiredSsh(const KernelInfo &kernelInfo) { + using BINDING_TABLE_STATE = typename Family::BINDING_TABLE_STATE; + size_t requiredSshSize = kernelInfo.heapInfo.SurfaceStateHeapSize; + requiredSshSize = alignUp(requiredSshSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); + return requiredSshSize; +} + template void EncodeIndirectParams::setGlobalWorkSizeIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset offsets[3], uint64_t crossThreadAddress, const uint32_t *lws) { for (int i = 0; i < 3; ++i) { diff --git a/shared/source/command_container/command_encoder_bdw_and_later.inl b/shared/source/command_container/command_encoder_bdw_and_later.inl index 1f703b901e..a11208966b 100644 --- a/shared/source/command_container/command_encoder_bdw_and_later.inl +++ b/shared/source/command_container/command_encoder_bdw_and_later.inl @@ -104,13 +104,13 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis PreemptionHelper::programInterfaceDescriptorDataPreemption(&idd, args.preemptionMode); - auto heap = ApiSpecificConfig::getBindlessConfiguration() ? args.device->getBindlessHeapsHelper()->getHeap(BindlessHeapsHelper::GLOBAL_DSH) : container.getIndirectHeap(HeapType::DYNAMIC_STATE); - UNRECOVERABLE_IF(!heap); - uint32_t samplerStateOffset = 0; uint32_t samplerCount = 0; if (kernelDescriptor.payloadMappings.samplerTable.numSamplers > 0) { + auto heap = ApiSpecificConfig::getBindlessConfiguration() ? args.device->getBindlessHeapsHelper()->getHeap(BindlessHeapsHelper::GLOBAL_DSH) : container.getIndirectHeap(HeapType::DYNAMIC_STATE); + UNRECOVERABLE_IF(!heap); + samplerCount = kernelDescriptor.payloadMappings.samplerTable.numSamplers; samplerStateOffset = EncodeStates::copySamplerState(heap, kernelDescriptor.payloadMappings.samplerTable.tableOffset, kernelDescriptor.payloadMappings.samplerTable.numSamplers, @@ -539,4 +539,9 @@ void EncodeDispatchKernel::setupPostSyncMocs(WALKER_TYPE &walkerCmd, con template void EncodeDispatchKernel::adjustWalkOrder(WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const HardwareInfo &hwInfo) {} +template +uint32_t EncodeDispatchKernel::additionalSizeRequiredDsh() { + return sizeof(typename Family::INTERFACE_DESCRIPTOR_DATA); +} + } // namespace NEO diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index 6edab2ab19..850f1541ba 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -127,13 +127,13 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis PreemptionHelper::programInterfaceDescriptorDataPreemption(&idd, args.preemptionMode); if constexpr (Family::supportsSampler) { - auto heap = ApiSpecificConfig::getBindlessConfiguration() ? args.device->getBindlessHeapsHelper()->getHeap(BindlessHeapsHelper::GLOBAL_DSH) : container.getIndirectHeap(HeapType::DYNAMIC_STATE); - UNRECOVERABLE_IF(!heap); - uint32_t samplerStateOffset = 0; uint32_t samplerCount = 0; if (kernelDescriptor.payloadMappings.samplerTable.numSamplers > 0) { + auto heap = ApiSpecificConfig::getBindlessConfiguration() ? args.device->getBindlessHeapsHelper()->getHeap(BindlessHeapsHelper::GLOBAL_DSH) : container.getIndirectHeap(HeapType::DYNAMIC_STATE); + UNRECOVERABLE_IF(!heap); + samplerCount = kernelDescriptor.payloadMappings.samplerTable.numSamplers; samplerStateOffset = EncodeStates::copySamplerState( heap, kernelDescriptor.payloadMappings.samplerTable.tableOffset, @@ -768,4 +768,9 @@ inline void EncodeStoreMMIO::appendFlags(MI_STORE_REGISTER_MEM *storeReg template void EncodeDispatchKernel::adjustWalkOrder(WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const HardwareInfo &hwInfo) {} +template +uint32_t EncodeDispatchKernel::additionalSizeRequiredDsh() { + return 0u; +} + } // namespace NEO diff --git a/shared/source/command_stream/command_stream_receiver_hw.h b/shared/source/command_stream/command_stream_receiver_hw.h index ac3bb81c42..afc6dc7df0 100644 --- a/shared/source/command_stream/command_stream_receiver_hw.h +++ b/shared/source/command_stream/command_stream_receiver_hw.h @@ -140,6 +140,10 @@ class CommandStreamReceiverHw : public CommandStreamReceiver { } void initializeDeviceWithFirstSubmission() override; + HeapDirtyState &getSshState() { + return sshState; + } + protected: void programPreemption(LinearStream &csr, DispatchFlags &dispatchFlags); void programL3(LinearStream &csr, uint32_t &newL3Config); diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index d2a7aeffb8..5bc91b34c3 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -342,7 +342,7 @@ CompletionStamp CommandStreamReceiverHw::flushTask( if (stallingCommandsOnNextFlushRequired) { programStallingCommandsForBarrier(commandStreamCSR, dispatchFlags); } - const bool hasDsh = hwInfo.capabilityTable.supportsImages; + const bool hasDsh = hwInfo.capabilityTable.supportsImages && dsh != nullptr; bool dshDirty = hasDsh ? dshState.updateAndCheck(dsh) : false; bool iohDirty = iohState.updateAndCheck(ioh); bool sshDirty = sshState.updateAndCheck(ssh); diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index 3662c1361e..132b419cf6 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -407,6 +407,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, EnableDrmCompletionFence, -1, "Enables DRM compl DECLARE_DEBUG_VARIABLE(int32_t, UseDrmCompletionFenceForAllAllocations, -1, "Uses DRM completion fence for all allocations, -1:default (disabled), 0:disable, 1:enable") DECLARE_DEBUG_VARIABLE(int32_t, EnableChipsetUniqueUUID, -1, "Enables retrieving chipset unique UUID using telemetry, -1:default (disabled), 0:disable, 1:enable") DECLARE_DEBUG_VARIABLE(int32_t, EnableFlushTaskSubmission, -1, "Driver uses csr flushTask for immediate commandlist submissions, -1:default (enabled), 0:disabled, 1:enabled") +DECLARE_DEBUG_VARIABLE(int32_t, EnableImmediateCmdListHeapSharing, -1, "Immediate command lists using flush task use current csr heap instead private cmd list heap, -1:default (disabled), 0:disabled, 1:enabled") DECLARE_DEBUG_VARIABLE(int32_t, EnableBcsSwControlWa, -1, "Enable BCS WA via BCSSWCONTROL MMIO. -1: default, 0: disabled, 1: if src in system mem, 2: if dst in system mem, 3: if src and dst in system mem, 4: always") /* IMPLICIT SCALING */ diff --git a/shared/test/common/helpers/unit_test_helper.h b/shared/test/common/helpers/unit_test_helper.h index deda301271..9a58f75d87 100644 --- a/shared/test/common/helpers/unit_test_helper.h +++ b/shared/test/common/helpers/unit_test_helper.h @@ -91,6 +91,7 @@ struct UnitTestHelper { static bool getDisableFusionStateFromFrontEndCommand(const typename GfxFamily::VFE_STATE_TYPE &feCmd); static bool getComputeDispatchAllWalkerFromFrontEndCommand(const typename GfxFamily::VFE_STATE_TYPE &feCmd); static bool getSystolicFlagValueFromPipelineSelectCommand(const typename GfxFamily::PIPELINE_SELECT &pipelineSelectCmd); + static size_t getAdditionalDshSize(); }; } // namespace NEO diff --git a/shared/test/common/helpers/unit_test_helper_bdw_and_later.inl b/shared/test/common/helpers/unit_test_helper_bdw_and_later.inl index 2a7d321e40..ba4000f8e6 100644 --- a/shared/test/common/helpers/unit_test_helper_bdw_and_later.inl +++ b/shared/test/common/helpers/unit_test_helper_bdw_and_later.inl @@ -92,4 +92,9 @@ bool UnitTestHelper::getSystolicFlagValueFromPipelineSelectCommand(co return false; } +template +size_t UnitTestHelper::getAdditionalDshSize() { + return sizeof(typename GfxFamily::INTERFACE_DESCRIPTOR_DATA); +} + } // namespace NEO diff --git a/shared/test/common/helpers/unit_test_helper_xehp_and_later.inl b/shared/test/common/helpers/unit_test_helper_xehp_and_later.inl index 24eebbe837..6b8cc20d22 100644 --- a/shared/test/common/helpers/unit_test_helper_xehp_and_later.inl +++ b/shared/test/common/helpers/unit_test_helper_xehp_and_later.inl @@ -121,4 +121,9 @@ bool UnitTestHelper::getSystolicFlagValueFromPipelineSelectCommand(co return pipelineSelectCmd.getSystolicModeEnable(); } +template +size_t UnitTestHelper::getAdditionalDshSize() { + return 0; +} + } // namespace NEO diff --git a/shared/test/common/test_files/igdrcl.config b/shared/test/common/test_files/igdrcl.config index 8545ac097a..a08b80a180 100644 --- a/shared/test/common/test_files/igdrcl.config +++ b/shared/test/common/test_files/igdrcl.config @@ -172,6 +172,7 @@ EnableUsmConcurrentAccessSupport = 0 EnableSharedSystemUsmSupport = -1 EnablePassInlineData = -1 ForceFineGrainedSVMSupport = -1 +EnableImmediateCmdListHeapSharing = -1 ForcePipeSupport = -1 ForceSystemMemoryPlacement = 0 ForceNonSystemMemoryPlacement = 0 diff --git a/shared/test/unit_test/command_container/command_container_tests.cpp b/shared/test/unit_test/command_container/command_container_tests.cpp index ae9081e718..689dfb6f76 100644 --- a/shared/test/unit_test/command_container/command_container_tests.cpp +++ b/shared/test/unit_test/command_container/command_container_tests.cpp @@ -18,26 +18,17 @@ using namespace NEO; constexpr uint32_t defaultNumIddsPerBlock = 64; -class CommandContainerTest : public DeviceFixture, - public ::testing::Test { +using CommandContainerFixture = DeviceFixture; +using CommandContainerTest = Test; +class MyMockCommandContainer : public CommandContainer { public: - void SetUp() override { - ::testing::Test::SetUp(); - DeviceFixture::setUp(); - } - void TearDown() override { - DeviceFixture::tearDown(); - ::testing::Test::TearDown(); - } + using CommandContainer::allocationIndirectHeaps; + using CommandContainer::dirtyHeaps; + using CommandContainer::getTotalCmdBufferSize; }; struct CommandContainerHeapStateTests : public ::testing::Test { - class MyMockCommandContainer : public CommandContainer { - public: - using CommandContainer::dirtyHeaps; - }; - MyMockCommandContainer myCommandContainer; }; @@ -795,23 +786,18 @@ TEST_F(CommandContainerTest, givenCmdContainerWhenContainerIsInitializedThenStre TEST_F(CommandContainerTest, GivenCmdContainerAndDebugFlagWhenContainerIsInitializedThenStreamSizeEqualsAlignedTotalCmdBuffSizeDecreasedOfReservedSize) { DebugManagerStateRestore restorer; - class MyCommandContainer : public CommandContainer { - public: - using CommandContainer::getTotalCmdBufferSize; - }; - DebugManager.flags.OverrideCmdListCmdBufferSizeInKb.set(0); - MyCommandContainer cmdContainer; + MyMockCommandContainer cmdContainer; cmdContainer.initialize(pDevice, nullptr, true); size_t alignedSize = alignUp(cmdContainer.getTotalCmdBufferSize(), MemoryConstants::pageSize64k); - EXPECT_EQ(cmdContainer.getCommandStream()->getMaxAvailableSpace(), alignedSize - MyCommandContainer::cmdBufferReservedSize); + EXPECT_EQ(cmdContainer.getCommandStream()->getMaxAvailableSpace(), alignedSize - MyMockCommandContainer::cmdBufferReservedSize); auto newSizeInKB = 512; DebugManager.flags.OverrideCmdListCmdBufferSizeInKb.set(newSizeInKB); - MyCommandContainer cmdContainer2; + MyMockCommandContainer cmdContainer2; cmdContainer2.initialize(pDevice, nullptr, true); alignedSize = alignUp(cmdContainer.getTotalCmdBufferSize(), MemoryConstants::pageSize64k); - EXPECT_EQ(cmdContainer2.getCommandStream()->getMaxAvailableSpace(), alignedSize - MyCommandContainer::cmdBufferReservedSize); + EXPECT_EQ(cmdContainer2.getCommandStream()->getMaxAvailableSpace(), alignedSize - MyMockCommandContainer::cmdBufferReservedSize); } TEST_F(CommandContainerTest, givenCmdContainerWhenAlocatingNextCmdBufferThenStreamSizeEqualAlignedTotalCmdBuffSizeDecreasedOfReservedSize) { @@ -841,15 +827,68 @@ TEST_F(CommandContainerTest, givenCmdContainerWhenCloseAndAllocateNextCommandBuf } TEST_F(CommandContainerTest, GivenCmdContainerWhenContainerIsInitializedThenSurfaceStateIndirectHeapSizeIsCorrect) { - - class MyCommandContainer : public CommandContainer { - public: - using CommandContainer::allocationIndirectHeaps; - }; - - MyCommandContainer cmdContainer; + MyMockCommandContainer cmdContainer; cmdContainer.initialize(pDevice, nullptr, true); auto size = cmdContainer.allocationIndirectHeaps[IndirectHeap::Type::SURFACE_STATE]->getUnderlyingBufferSize(); constexpr size_t expectedHeapSize = MemoryConstants::pageSize64k; EXPECT_EQ(expectedHeapSize, size); } + +TEST_F(CommandContainerTest, givenCmdContainerHasImmediateCsrWhenGettingHeapWithoutEnsuringSpaceThenExpectNullptrReturnedOrUnrecoverable) { + CommandContainer cmdContainer; + cmdContainer.setImmediateCmdListCsr(pDevice->getDefaultEngine().commandStreamReceiver); + cmdContainer.setNumIddPerBlock(1); + auto code = cmdContainer.initialize(pDevice, nullptr, true); + EXPECT_EQ(CommandContainer::ErrorCode::SUCCESS, code); + + EXPECT_EQ(nullptr, cmdContainer.getIndirectHeap(HeapType::DYNAMIC_STATE)); + EXPECT_EQ(nullptr, cmdContainer.getIndirectHeap(HeapType::SURFACE_STATE)); + + EXPECT_THROW(cmdContainer.getHeapSpaceAllowGrow(HeapType::DYNAMIC_STATE, 0), std::exception); + EXPECT_THROW(cmdContainer.getHeapWithRequiredSizeAndAlignment(HeapType::DYNAMIC_STATE, 0, 0), std::exception); + + EXPECT_THROW(cmdContainer.getHeapSpaceAllowGrow(HeapType::SURFACE_STATE, 0), std::exception); + EXPECT_THROW(cmdContainer.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, 0, 0), std::exception); + + cmdContainer.ensureHeapSizePrepared(0, 0); + + EXPECT_EQ(nullptr, cmdContainer.getIndirectHeap(HeapType::DYNAMIC_STATE)); + EXPECT_NE(nullptr, cmdContainer.getIndirectHeap(HeapType::SURFACE_STATE)); + + EXPECT_THROW(cmdContainer.getHeapSpaceAllowGrow(HeapType::DYNAMIC_STATE, 0), std::exception); + EXPECT_THROW(cmdContainer.getHeapWithRequiredSizeAndAlignment(HeapType::DYNAMIC_STATE, 0, 0), std::exception); + + EXPECT_NO_THROW(cmdContainer.getHeapSpaceAllowGrow(HeapType::SURFACE_STATE, 0)); + EXPECT_NO_THROW(cmdContainer.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, 0, 0)); + + cmdContainer.ensureHeapSizePrepared(4 * MemoryConstants::kiloByte, 4 * MemoryConstants::kiloByte); + + auto dshHeap = cmdContainer.getIndirectHeap(HeapType::DYNAMIC_STATE); + EXPECT_NE(nullptr, dshHeap); + auto sshHeap = cmdContainer.getIndirectHeap(HeapType::SURFACE_STATE); + EXPECT_NE(nullptr, sshHeap); + + size_t sizeUsedDsh = dshHeap->getUsed(); + size_t sizeUsedSsh = sshHeap->getUsed(); + + void *dshPtr = cmdContainer.getHeapSpaceAllowGrow(HeapType::DYNAMIC_STATE, 64); + void *sshPtr = cmdContainer.getHeapSpaceAllowGrow(HeapType::SURFACE_STATE, 64); + + EXPECT_EQ(ptrOffset(dshHeap->getCpuBase(), sizeUsedDsh), dshPtr); + EXPECT_EQ(ptrOffset(sshHeap->getCpuBase(), sizeUsedSsh), sshPtr); + + auto alignedHeapDsh = cmdContainer.getHeapWithRequiredSizeAndAlignment(HeapType::DYNAMIC_STATE, 128, 128); + auto alignedHeapSsh = cmdContainer.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, 128, 128); + + EXPECT_EQ(dshHeap, alignedHeapDsh); + EXPECT_EQ(sshHeap, alignedHeapSsh); + + dshHeap->getSpace(dshHeap->getAvailableSpace() - 32); + sshHeap->getSpace(sshHeap->getAvailableSpace() - 32); + + EXPECT_THROW(cmdContainer.getHeapSpaceAllowGrow(HeapType::DYNAMIC_STATE, 64), std::exception); + EXPECT_THROW(cmdContainer.getHeapWithRequiredSizeAndAlignment(HeapType::DYNAMIC_STATE, 64, 64), std::exception); + + EXPECT_THROW(cmdContainer.getHeapSpaceAllowGrow(HeapType::SURFACE_STATE, 64), std::exception); + EXPECT_THROW(cmdContainer.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, 64, 64), std::exception); +} diff --git a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp index cd0f60e6b1..b723f11a12 100644 --- a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp +++ b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp @@ -2124,3 +2124,27 @@ using SystolicSupport = IsAnyProducts(); } + +HWTEST_F(CommandStreamReceiverTest, givenSshDirtyStateWhenUpdatingStateWithNewHeapThenExpectDirtyStateTrue) { + MockGraphicsAllocation allocation{}; + allocation.gpuAddress = 0xABC000; + allocation.size = 0x1000; + + IndirectHeap dummyHeap(&allocation, false); + + auto dirtyStateCopy = static_cast *>(commandStreamReceiver)->getSshState(); + + bool check = dirtyStateCopy.updateAndCheck(&dummyHeap); + EXPECT_TRUE(check); + + check = dirtyStateCopy.updateAndCheck(&dummyHeap); + EXPECT_FALSE(check); + + auto dirtyState = static_cast *>(commandStreamReceiver)->getSshState(); + + check = dirtyState.updateAndCheck(&dummyHeap); + EXPECT_TRUE(check); + + check = dirtyState.updateAndCheck(&dummyHeap); + EXPECT_FALSE(check); +} diff --git a/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp b/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp index c8ce207a16..2c4b14f703 100644 --- a/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp +++ b/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp @@ -17,6 +17,7 @@ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/helpers/gtest_helpers.h" +#include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/fixtures/command_container_fixture.h" @@ -1324,3 +1325,67 @@ HWTEST_F(BindlessCommandEncodeStatesTest, givenBindlessModeDisabledelWithSampler EXPECT_EQ(std::find(cmdContainer->getResidencyContainer().begin(), cmdContainer->getResidencyContainer().end(), pDevice->getBindlessHeapsHelper()->getHeap(BindlessHeapsHelper::GLOBAL_DSH)->getGraphicsAllocation()), cmdContainer->getResidencyContainer().end()); } + +HWTEST_F(CommandEncodeStatesTest, givenKernelInfoWhenGettingRequiredDshSpaceThenReturnCorrectValues) { + using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; + using SAMPLER_STATE = typename FamilyType::SAMPLER_STATE; + + size_t additionalSize = UnitTestHelper::getAdditionalDshSize(); + size_t expectedSize = alignUp(additionalSize, EncodeStates::alignInterfaceDescriptorData); + + // no samplers + kernelInfo.kernelDescriptor.payloadMappings.samplerTable.numSamplers = 0; + size_t size = EncodeDispatchKernel::getSizeRequiredDsh(kernelInfo); + EXPECT_EQ(expectedSize, size); + + // two samplers, no border color state + kernelInfo.kernelDescriptor.payloadMappings.samplerTable.numSamplers = 2; + kernelInfo.kernelDescriptor.payloadMappings.samplerTable.tableOffset = 0; + kernelInfo.kernelDescriptor.payloadMappings.samplerTable.borderColor = 0; + + // align samplers + size_t alignedSamplers = alignUp(2 * sizeof(SAMPLER_STATE), INTERFACE_DESCRIPTOR_DATA::SAMPLERSTATEPOINTER_ALIGN_SIZE); + + // additional IDD for requiring platforms + if (additionalSize > 0) { + expectedSize = alignUp(alignedSamplers + additionalSize, EncodeStates::alignInterfaceDescriptorData); + } else { + expectedSize = alignedSamplers; + } + + size = EncodeDispatchKernel::getSizeRequiredDsh(kernelInfo); + EXPECT_EQ(expectedSize, size); + + // three samplers, border color state + kernelInfo.kernelDescriptor.payloadMappings.samplerTable.numSamplers = 3; + kernelInfo.kernelDescriptor.payloadMappings.samplerTable.tableOffset = 32; + + // align border color state and samplers + alignedSamplers = alignUp(alignUp(32, EncodeStates::alignIndirectStatePointer) + 3 * sizeof(SAMPLER_STATE), INTERFACE_DESCRIPTOR_DATA::SAMPLERSTATEPOINTER_ALIGN_SIZE); + + // additional IDD for requiring platforms + if (additionalSize > 0) { + expectedSize = alignUp(alignedSamplers + additionalSize, EncodeStates::alignInterfaceDescriptorData); + } else { + expectedSize = alignedSamplers; + } + size = EncodeDispatchKernel::getSizeRequiredDsh(kernelInfo); + EXPECT_EQ(expectedSize, size); +} + +HWTEST_F(CommandEncodeStatesTest, givenKernelInfoWhenGettingRequiredSshSpaceThenReturnCorrectValues) { + using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE; + using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; + + // no surface states + kernelInfo.heapInfo.SurfaceStateHeapSize = 0; + size_t size = EncodeDispatchKernel::getSizeRequiredSsh(kernelInfo); + EXPECT_EQ(0u, size); + + // two surface states and BTI indices + kernelInfo.heapInfo.SurfaceStateHeapSize = 2 * sizeof(RENDER_SURFACE_STATE) + 2 * sizeof(uint32_t); + size_t expectedSize = alignUp(kernelInfo.heapInfo.SurfaceStateHeapSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); + + size = EncodeDispatchKernel::getSizeRequiredSsh(kernelInfo); + EXPECT_EQ(expectedSize, size); +} diff --git a/shared/test/unit_test/fixtures/command_container_fixture.h b/shared/test/unit_test/fixtures/command_container_fixture.h index 0f85de7edd..2319729814 100644 --- a/shared/test/unit_test/fixtures/command_container_fixture.h +++ b/shared/test/unit_test/fixtures/command_container_fixture.h @@ -8,6 +8,7 @@ #pragma once #include "shared/source/command_container/command_encoder.h" #include "shared/source/kernel/kernel_descriptor.h" +#include "shared/source/program/kernel_info.h" #include "shared/test/common/fixtures/device_fixture.h" #include "shared/test/common/test_macros/hw_test.h" @@ -44,6 +45,7 @@ class CommandEncodeStatesFixture : public DeviceFixture { } KernelDescriptor descriptor; + KernelInfo kernelInfo; std::unique_ptr cmdContainer; };