diff --git a/level_zero/core/source/cmdlist/cmdlist.h b/level_zero/core/source/cmdlist/cmdlist.h index 3c88be0a85..3970e38829 100644 --- a/level_zero/core/source/cmdlist/cmdlist.h +++ b/level_zero/core/source/cmdlist/cmdlist.h @@ -351,6 +351,7 @@ struct CommandList : _ze_command_list_handle_t { bool pipelineSelectStateTracking = false; bool stateComputeModeTracking = false; bool signalAllEventPackets = false; + bool stateBaseAddressTracking = false; }; using CommandListAllocatorFn = CommandList *(*)(uint32_t); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.h b/level_zero/core/source/cmdlist/cmdlist_hw.h index 399c15dd35..35eaaeff63 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw.h @@ -259,6 +259,7 @@ struct CommandListCoreFamily : CommandListImp { ze_result_t prepareIndirectParams(const ze_group_count_t *threadGroupDimensions); void updateStreamProperties(Kernel &kernel, bool isCooperative); + void updateStateBaseAddressStreamProperties(Kernel &kernel, bool updateRequiredState, bool captureBaseAddressState); void clearCommandsToPatch(); size_t getTotalSizeForCopyRegion(const ze_copy_region_t *region, uint32_t pitch, uint32_t slicePitch); @@ -307,7 +308,19 @@ struct CommandListCoreFamily : CommandListImp { void dispatchEventRemainingPacketsPostSyncOperation(Event *event); void dispatchEventPostSyncOperation(Event *event, uint32_t value, bool omitFirstOperation, bool useMax, bool useLastPipeControl); + int64_t currentSurfaceStateBaseAddress = -1; + int64_t currentDynamicStateBaseAddress = -1; + int64_t currentIndirectObjectBaseAddress = -1; + int64_t currentBindingTablePoolBaseAddress = -1; + + size_t currentSurfaceStateSize = std::numeric_limits::max(); + size_t currentDynamicStateSize = std::numeric_limits::max(); + size_t currentIndirectObjectSize = std::numeric_limits::max(); + size_t currentBindingTablePoolSize = std::numeric_limits::max(); size_t cmdListCurrentStartOffset = 0; + + int32_t currentMocsState = -1; + bool containsAnyKernel = false; bool pipeControlMultiKernelEventSync = false; bool compactL3FlushEventPacket = false; diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 25d77ed296..eb8524ed62 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -8,7 +8,6 @@ #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_container/encode_surface_state.h" #include "shared/source/command_stream/command_stream_receiver.h" -#include "shared/source/command_stream/linear_stream.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/debugger/debugger_l0.h" #include "shared/source/device/device.h" @@ -60,9 +59,6 @@ namespace L0 { -template -struct EncodeStateBaseAddress; - inline ze_result_t parseErrorCode(NEO::CommandContainer::ErrorCode returnValue) { switch (returnValue) { case NEO::CommandContainer::ErrorCode::OUT_OF_DEVICE_MEMORY: @@ -121,6 +117,17 @@ ze_result_t CommandListCoreFamily::reset() { this->ownedPrivateAllocations.clear(); cmdListCurrentStartOffset = 0; this->returnPoints.clear(); + + currentSurfaceStateBaseAddress = -1; + currentDynamicStateBaseAddress = -1; + currentIndirectObjectBaseAddress = -1; + currentBindingTablePoolBaseAddress = -1; + + currentSurfaceStateSize = std::numeric_limits::max(); + currentDynamicStateSize = std::numeric_limits::max(); + currentIndirectObjectSize = std::numeric_limits::max(); + currentBindingTablePoolSize = std::numeric_limits::max(); + return ZE_RESULT_SUCCESS; } @@ -144,6 +151,7 @@ ze_result_t CommandListCoreFamily::initialize(Device *device, NEO this->stateComputeModeTracking = L0GfxCoreHelper::enableStateComputeModeTracking(rootDeviceEnvironment); this->frontEndStateTracking = L0GfxCoreHelper::enableFrontEndStateTracking(rootDeviceEnvironment); this->pipelineSelectStateTracking = L0GfxCoreHelper::enablePipelineSelectStateTracking(rootDeviceEnvironment); + this->stateBaseAddressTracking = L0GfxCoreHelper::enableStateBaseAddressTracking(rootDeviceEnvironment); this->pipeControlMultiKernelEventSync = L0GfxCoreHelper::usePipeControlMultiKernelEventSync(hwInfo); this->compactL3FlushEventPacket = L0GfxCoreHelper::useCompactL3FlushEventPacket(hwInfo); this->signalAllEventPackets = L0GfxCoreHelper::useSignalAllEventPackets(hwInfo); @@ -2323,17 +2331,61 @@ ze_result_t CommandListCoreFamily::prepareIndirectParams(const ze return ZE_RESULT_SUCCESS; } +template +void CommandListCoreFamily::updateStateBaseAddressStreamProperties(Kernel &kernel, bool updateRequiredState, bool captureBaseAddressState) { + KernelImp &kernelImp = static_cast(kernel); + auto &rootDeviceEnvironment = device->getNEODevice()->getRootDeviceEnvironment(); + + if (captureBaseAddressState) { + currentMocsState = static_cast(device->getMOCS(!kernelImp.getKernelRequiresUncachedMocs(), false) >> 1); + + auto ssh = commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::SURFACE_STATE); + currentSurfaceStateBaseAddress = ssh->getHeapGpuBase(); + currentSurfaceStateSize = ssh->getHeapSizeInPages(); + + currentBindingTablePoolBaseAddress = currentSurfaceStateBaseAddress; + currentBindingTablePoolSize = currentSurfaceStateSize; + + auto dsh = commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::DYNAMIC_STATE); + if (dsh != nullptr) { + currentDynamicStateBaseAddress = dsh->getHeapGpuBase(); + currentDynamicStateSize = dsh->getHeapSizeInPages(); + } + + auto ioh = commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::INDIRECT_OBJECT); + currentIndirectObjectBaseAddress = ioh->getHeapGpuBase(); + currentIndirectObjectSize = ioh->getHeapSizeInPages(); + } + + auto sbaStreamState = &finalStreamState.stateBaseAddress; + if (updateRequiredState) { + sbaStreamState = &requiredStreamState.stateBaseAddress; + } + + sbaStreamState->setProperties(kernelImp.getKernelDescriptor().kernelAttributes.flags.useGlobalAtomics, currentMocsState, + currentBindingTablePoolBaseAddress, currentBindingTablePoolSize, + currentSurfaceStateBaseAddress, currentSurfaceStateSize, + currentDynamicStateBaseAddress, currentDynamicStateSize, + currentIndirectObjectBaseAddress, currentIndirectObjectSize, + rootDeviceEnvironment); +} + template void CommandListCoreFamily::updateStreamProperties(Kernel &kernel, bool isCooperative) { using VFE_STATE_TYPE = typename GfxFamily::VFE_STATE_TYPE; - auto &hwInfo = device->getHwInfo(); auto &rootDeviceEnvironment = device->getNEODevice()->getRootDeviceEnvironment(); auto &kernelAttributes = kernel.getKernelDescriptor().kernelAttributes; + bool captureBaseAddressState = containsAnyKernel; if (!containsAnyKernel) { requiredStreamState.frontEndState.setProperties(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, true, -1, rootDeviceEnvironment); requiredStreamState.pipelineSelect.setProperties(true, false, kernelAttributes.flags.usesSystolicPipelineSelectMode, rootDeviceEnvironment); + + if (!this->isFlushTaskSubmissionEnabled) { + updateStateBaseAddressStreamProperties(kernel, true, true); + } + if (this->stateComputeModeTracking) { requiredStreamState.stateComputeMode.setProperties(false, kernelAttributes.numGrfRequired, kernelAttributes.threadArbitrationPolicy, device->getDevicePreemptionMode(), rootDeviceEnvironment); finalStreamState = requiredStreamState; @@ -2342,6 +2394,7 @@ void CommandListCoreFamily::updateStreamProperties(Kernel &kernel requiredStreamState.stateComputeMode.setProperties(false, kernelAttributes.numGrfRequired, kernelAttributes.threadArbitrationPolicy, device->getDevicePreemptionMode(), rootDeviceEnvironment); } containsAnyKernel = true; + captureBaseAddressState = false; } auto logicalStateHelperBlock = !getLogicalStateHelper(); @@ -2361,10 +2414,10 @@ void CommandListCoreFamily::updateStreamProperties(Kernel &kernel bool isPatchingVfeStateAllowed = NEO::DebugManager.flags.AllowPatchingVfeStateInCommandLists.get(); if (finalStreamState.frontEndState.isDirty() && logicalStateHelperBlock) { if (isPatchingVfeStateAllowed) { - auto pVfeStateAddress = NEO::PreambleHelper::getSpaceForVfeState(commandContainer.getCommandStream(), hwInfo, engineGroupType); - auto pVfeState = new VFE_STATE_TYPE; - NEO::PreambleHelper::programVfeState(pVfeState, rootDeviceEnvironment, 0, 0, device->getMaxNumHwThreads(), finalStreamState, nullptr); - commandsToPatch.push_back({pVfeStateAddress, pVfeState, CommandToPatch::FrontEndState}); + auto frontEndStateAddress = NEO::PreambleHelper::getSpaceForVfeState(commandContainer.getCommandStream(), device->getHwInfo(), engineGroupType); + auto frontEndStateCmd = new VFE_STATE_TYPE; + NEO::PreambleHelper::programVfeState(frontEndStateCmd, rootDeviceEnvironment, 0, 0, device->getMaxNumHwThreads(), finalStreamState, nullptr); + commandsToPatch.push_back({frontEndStateAddress, frontEndStateCmd, CommandToPatch::FrontEndState}); } if (this->frontEndStateTracking) { auto &stream = *commandContainer.getCommandStream(); @@ -2389,6 +2442,10 @@ void CommandListCoreFamily::updateStreamProperties(Kernel &kernel NEO::EncodeComputeMode::programComputeModeCommandWithSynchronization( *commandContainer.getCommandStream(), finalStreamState.stateComputeMode, pipelineSelectArgs, false, rootDeviceEnvironment, isRcs, this->dcFlushSupport, nullptr); } + + if (!this->isFlushTaskSubmissionEnabled) { + updateStateBaseAddressStreamProperties(kernel, false, captureBaseAddressState); + } } template @@ -2665,9 +2722,6 @@ ze_result_t CommandListCoreFamily::appendWriteToMemory(void *desc UNRECOVERABLE_IF(dstAllocationStruct.alloc == nullptr); commandContainer.addToResidencyContainer(dstAllocationStruct.alloc); - NEO::PipeControlArgs args; - args.dcFlushEnable = NEO::MemorySynchronizationCommands::getDcFlushEnable(descriptor->writeScope, device->getNEODevice()->getRootDeviceEnvironment()); - args.dcFlushEnable &= dstAllocationStruct.needsFlush; const uint64_t gpuAddress = static_cast(dstAllocationStruct.alignedAllocationPtr); if (isCopyOnly()) { @@ -2677,6 +2731,10 @@ ze_result_t CommandListCoreFamily::appendWriteToMemory(void *desc NEO::EncodeMiFlushDW::programMiFlushDw(*commandContainer.getCommandStream(), gpuAddress, data, args, productHelper); } else { + NEO::PipeControlArgs args; + args.dcFlushEnable = getDcFlushRequired(!!descriptor->writeScope); + args.dcFlushEnable &= dstAllocationStruct.needsFlush; + NEO::MemorySynchronizationCommands::addBarrierWithPostSyncOperation( *commandContainer.getCommandStream(), NEO::PostSyncMode::ImmediateData, @@ -2756,7 +2814,6 @@ void CommandListCoreFamily::dispatchPostSyncCommands(const CmdLis } if (useLastPipeControl) { - NEO::PipeControlArgs pipeControlArgs; pipeControlArgs.dcFlushEnable = getDcFlushRequired(signalScope); pipeControlArgs.workloadPartitionOffset = eventOperations.workPartitionOperation; @@ -2797,7 +2854,7 @@ void CommandListCoreFamily::dispatchEventRemainingPacketsPostSync uint64_t eventAddress = event->getCompletionFieldGpuAddress(device); eventAddress += event->getSinglePacketSize() * event->getPacketsInUse(); - bool appendLastPipeControl = false; + constexpr bool appendLastPipeControl = false; dispatchPostSyncCommands(remainingPacketsOperation, eventAddress, Event::STATE_SIGNALED, appendLastPipeControl, event->isSignalScope()); } } diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl index e6461e8a97..2af94b313f 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl @@ -46,11 +46,11 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K return ZE_RESULT_ERROR_INVALID_ARGUMENT; } const auto kernelImmutableData = kernel->getImmutableData(); - if (this->immediateCmdListHeapSharing) { + if (this->immediateCmdListHeapSharing || this->stateBaseAddressTracking) { auto kernelInfo = kernelImmutableData->getKernelInfo(); commandContainer.ensureHeapSizePrepared( NEO::EncodeDispatchKernel::getSizeRequiredSsh(*kernelInfo), - NEO::EncodeDispatchKernel::getSizeRequiredDsh(kernelDescriptor)); + NEO::EncodeDispatchKernel::getSizeRequiredDsh(kernelDescriptor), true); } appendEventForProfiling(event, true); auto perThreadScratchSize = std::max(this->getCommandListPerThreadScratchSize(), diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index 9e098faf88..cf1082499c 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -32,9 +32,6 @@ namespace L0 { -template -struct EncodeStateBaseAddress; - template size_t CommandListCoreFamily::getReserveSshSize() { return 4 * MemoryConstants::pageSize; @@ -140,15 +137,16 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K if (kernelDescriptor.kernelAttributes.flags.isInvalid) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } - if (this->immediateCmdListHeapSharing) { + bool getDsh = false; + if constexpr (GfxFamily::supportsSampler) { + getDsh = device->getDeviceInfo().imageSupport; + } + if (this->immediateCmdListHeapSharing || this->stateBaseAddressTracking) { auto kernelInfo = kernelImmutableData->getKernelInfo(); - size_t dshSize = 0; - if constexpr (GfxFamily::supportsSampler) { - dshSize = NEO::EncodeDispatchKernel::getSizeRequiredDsh(kernelDescriptor); - } + commandContainer.ensureHeapSizePrepared( NEO::EncodeDispatchKernel::getSizeRequiredSsh(*kernelInfo), - dshSize); + NEO::EncodeDispatchKernel::getSizeRequiredDsh(kernelDescriptor), getDsh); } commandListPerThreadScratchSize = std::max(commandListPerThreadScratchSize, kernelDescriptor.kernelAttributes.perThreadScratchSize[0]); commandListPerThreadPrivateScratchSize = std::max(commandListPerThreadPrivateScratchSize, kernelDescriptor.kernelAttributes.perThreadScratchSize[1]); @@ -245,12 +243,12 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K } } - updateStreamProperties(*kernel, launchParams.isCooperative); - KernelImp *kernelImp = static_cast(kernel); this->containsStatelessUncachedResource |= kernelImp->getKernelRequiresUncachedMocs(); this->requiresQueueUncachedMocs |= kernelImp->getKernelRequiresQueueUncachedMocs(); + updateStreamProperties(*kernel, launchParams.isCooperative); + auto localMemSize = static_cast(neoDevice->getDeviceInfo().localMemSize); auto slmTotalSize = kernelImp->getSlmTotalSize(); if (slmTotalSize > 0 && localMemSize < slmTotalSize) { diff --git a/level_zero/core/source/cmdqueue/cmdqueue.cpp b/level_zero/core/source/cmdqueue/cmdqueue.cpp index 26724c723e..52293a77d2 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue.cpp +++ b/level_zero/core/source/cmdqueue/cmdqueue.cpp @@ -88,6 +88,7 @@ ze_result_t CommandQueueImp::initialize(bool copyOnly, bool isInternal) { this->stateComputeModeTracking = L0GfxCoreHelper::enableStateComputeModeTracking(rootDeviceEnvironment); this->frontEndStateTracking = L0GfxCoreHelper::enableFrontEndStateTracking(rootDeviceEnvironment); this->pipelineSelectStateTracking = L0GfxCoreHelper::enablePipelineSelectStateTracking(rootDeviceEnvironment); + this->stateBaseAddressTracking = L0GfxCoreHelper::enableStateBaseAddressTracking(rootDeviceEnvironment); } return returnValue; } diff --git a/level_zero/core/source/cmdqueue/cmdqueue.h b/level_zero/core/source/cmdqueue/cmdqueue.h index b9aacd5197..80b58d7d3d 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue.h +++ b/level_zero/core/source/cmdqueue/cmdqueue.h @@ -70,6 +70,7 @@ struct CommandQueue : _ze_command_queue_handle_t { bool frontEndStateTracking = false; bool pipelineSelectStateTracking = false; bool stateComputeModeTracking = false; + bool stateBaseAddressTracking = false; }; using CommandQueueAllocatorFn = CommandQueue *(*)(Device *device, NEO::CommandStreamReceiver *csr, diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.h b/level_zero/core/source/cmdqueue/cmdqueue_hw.h index 099afefc86..40e5623635 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw.h +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.h @@ -188,7 +188,11 @@ struct CommandQueueHw : public CommandQueueImp { NEO::StreamProperties &csrState, const NEO::StreamProperties &cmdListRequired, const NEO::StreamProperties &cmdListFinal); - + inline void programRequiredStateBaseAddressForCommandList(CommandList *commandList, + NEO::LinearStream &commandStream, + NEO::StreamProperties &csrState, + const NEO::StreamProperties &cmdListRequired, + const NEO::StreamProperties &cmdListFinal); inline void updateBaseAddressState(CommandList *lastCommandList); size_t alignedChildStreamPadding{}; diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl index db89aa2e1a..fd86c24b16 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl @@ -174,6 +174,7 @@ ze_result_t CommandQueueHw::executeCommandListsRegular( this->programOneCmdListPipelineSelect(commandList, child, csrStateProperties, requiredStreamState, finalStreamState); this->programOneCmdListFrontEndIfDirty(ctx, child, csrStateProperties, requiredStreamState, finalStreamState); this->programRequiredStateComputeModeForCommandList(commandList, child, csrStateProperties, requiredStreamState, finalStreamState); + this->programRequiredStateBaseAddressForCommandList(commandList, child, csrStateProperties, requiredStreamState, finalStreamState); this->patchCommands(*commandList, this->csr->getScratchSpaceController()->getScratchPatchAddress()); this->programOneCmdListBatchBufferStart(commandList, child, ctx); @@ -1212,6 +1213,22 @@ void CommandQueueHw::programRequiredStateComputeModeForCommandLis csrState.stateComputeMode.setProperties(cmdListFinal.stateComputeMode); } +template +void CommandQueueHw::programRequiredStateBaseAddressForCommandList(CommandList *commandList, + NEO::LinearStream &commandStream, + NEO::StreamProperties &csrState, + const NEO::StreamProperties &cmdListRequired, + const NEO::StreamProperties &cmdListFinal) { + + if (!this->stateBaseAddressTracking) { + return; + } + + csrState.stateBaseAddress.setProperties(cmdListRequired.stateBaseAddress); + + csrState.stateBaseAddress.setProperties(cmdListFinal.stateBaseAddress); +} + template void CommandQueueHw::updateBaseAddressState(CommandList *lastCommandList) { auto csrHw = static_cast *>(csr); diff --git a/level_zero/core/source/hw_helpers/l0_gfx_core_helper.cpp b/level_zero/core/source/hw_helpers/l0_gfx_core_helper.cpp index 10d1435e4e..8732de0a77 100644 --- a/level_zero/core/source/hw_helpers/l0_gfx_core_helper.cpp +++ b/level_zero/core/source/hw_helpers/l0_gfx_core_helper.cpp @@ -47,6 +47,14 @@ bool L0GfxCoreHelper::enableStateComputeModeTracking(const NEO::RootDeviceEnviro return l0GfxCoreHelper.platformSupportsStateComputeModeTracking(); } +bool L0GfxCoreHelper::enableStateBaseAddressTracking(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) { + if (NEO::DebugManager.flags.EnableStateBaseAddressTracking.get() != -1) { + return !!NEO::DebugManager.flags.EnableStateBaseAddressTracking.get(); + } + auto &l0GfxCoreHelper = rootDeviceEnvironment.getHelper(); + return l0GfxCoreHelper.platformSupportsStateBaseAddressTracking(); +} + bool L0GfxCoreHelper::enableImmediateCmdListHeapSharing(const NEO::RootDeviceEnvironment &rootDeviceEnvironment, bool cmdlistSupport) { if (NEO::DebugManager.flags.EnableImmediateCmdListHeapSharing.get() != -1) { return !!NEO::DebugManager.flags.EnableImmediateCmdListHeapSharing.get(); diff --git a/level_zero/core/source/hw_helpers/l0_gfx_core_helper.h b/level_zero/core/source/hw_helpers/l0_gfx_core_helper.h index b0841fd31d..f63dcfcebd 100644 --- a/level_zero/core/source/hw_helpers/l0_gfx_core_helper.h +++ b/level_zero/core/source/hw_helpers/l0_gfx_core_helper.h @@ -42,6 +42,7 @@ class L0GfxCoreHelper : public NEO::ApiGfxCoreHelper { static bool enableFrontEndStateTracking(const NEO::RootDeviceEnvironment &rootDeviceEnvironment); static bool enablePipelineSelectStateTracking(const NEO::RootDeviceEnvironment &rootDeviceEnvironment); static bool enableStateComputeModeTracking(const NEO::RootDeviceEnvironment &rootDeviceEnvironment); + static bool enableStateBaseAddressTracking(const NEO::RootDeviceEnvironment &rootDeviceEnvironment); static bool enableImmediateCmdListHeapSharing(const NEO::RootDeviceEnvironment &rootDeviceEnvironment, bool cmdlistSupport); static bool usePipeControlMultiKernelEventSync(const NEO::HardwareInfo &hwInfo); static bool useCompactL3FlushEventPacket(const NEO::HardwareInfo &hwInfo); @@ -63,6 +64,7 @@ class L0GfxCoreHelper : public NEO::ApiGfxCoreHelper { virtual bool platformSupportsStateComputeModeTracking() const = 0; virtual bool platformSupportsFrontEndTracking() const = 0; virtual bool platformSupportsPipelineSelectTracking() const = 0; + virtual bool platformSupportsStateBaseAddressTracking() const = 0; virtual bool platformSupportsRayTracing() const = 0; virtual bool isZebinAllowed(const NEO::Debugger *debugger) const = 0; virtual uint32_t getEventMaxKernelCount(const NEO::HardwareInfo &hwInfo) const = 0; @@ -95,6 +97,7 @@ class L0GfxCoreHelperHw : public L0GfxCoreHelper { bool platformSupportsStateComputeModeTracking() const override; bool platformSupportsFrontEndTracking() const override; bool platformSupportsPipelineSelectTracking() const override; + bool platformSupportsStateBaseAddressTracking() const override; bool platformSupportsRayTracing() const override; bool isZebinAllowed(const NEO::Debugger *debugger) const override; uint32_t getEventMaxKernelCount(const NEO::HardwareInfo &hwInfo) const override; diff --git a/level_zero/core/source/hw_helpers/l0_gfx_core_helper_skl_to_tgllp.inl b/level_zero/core/source/hw_helpers/l0_gfx_core_helper_skl_to_tgllp.inl index f0443b370f..b75f8fbdb8 100644 --- a/level_zero/core/source/hw_helpers/l0_gfx_core_helper_skl_to_tgllp.inl +++ b/level_zero/core/source/hw_helpers/l0_gfx_core_helper_skl_to_tgllp.inl @@ -29,6 +29,11 @@ bool L0GfxCoreHelperHw::platformSupportsPipelineSelectTracking() const { return false; } +template +bool L0GfxCoreHelperHw::platformSupportsStateBaseAddressTracking() const { + return false; +} + template uint32_t L0GfxCoreHelperHw::getEventMaxKernelCount(const NEO::HardwareInfo &hwInfo) const { return 1; diff --git a/level_zero/core/source/hw_helpers/l0_gfx_core_helper_xehp_and_later.inl b/level_zero/core/source/hw_helpers/l0_gfx_core_helper_xehp_and_later.inl index 50fbd8fb59..400c5b1aff 100644 --- a/level_zero/core/source/hw_helpers/l0_gfx_core_helper_xehp_and_later.inl +++ b/level_zero/core/source/hw_helpers/l0_gfx_core_helper_xehp_and_later.inl @@ -32,6 +32,11 @@ bool L0GfxCoreHelperHw::platformSupportsPipelineSelectTracking() const { return true; } +template +bool L0GfxCoreHelperHw::platformSupportsStateBaseAddressTracking() const { + return false; +} + template uint32_t L0GfxCoreHelperHw::getEventMaxKernelCount(const NEO::HardwareInfo &hwInfo) const { uint32_t kernelCount = EventPacketsCount::maxKernelSplit; diff --git a/level_zero/core/source/kernel/kernel_imp.h b/level_zero/core/source/kernel/kernel_imp.h index 49d1a1f5e1..4c7991b774 100644 --- a/level_zero/core/source/kernel/kernel_imp.h +++ b/level_zero/core/source/kernel/kernel_imp.h @@ -217,8 +217,8 @@ struct KernelImp : Kernel { uint32_t requiredWorkgroupOrder = 0u; bool kernelRequiresGenerationOfLocalIdsByRuntime = true; - uint32_t kernelRequiresUncachedMocsCount = false; - uint32_t kernelRequiresQueueUncachedMocsCount = false; + uint32_t kernelRequiresUncachedMocsCount = 0; + uint32_t kernelRequiresQueueUncachedMocsCount = 0; std::vector isArgUncached; uint32_t globalOffsets[3] = {}; diff --git a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.cpp b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.cpp index aa3dde402e..30e41eeb06 100644 --- a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.cpp +++ b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.cpp @@ -146,6 +146,20 @@ void CmdListStateComputeModeStateFixture::setUp() { ModuleMutableCommandListFixture::setUp(); } +void CommandListStateBaseAddressFixture::setUp() { + DebugManager.flags.EnableStateBaseAddressTracking.set(1); + ModuleMutableCommandListFixture::setUp(); + + mockKernelImmData->kernelDescriptor->payloadMappings.samplerTable.numSamplers = 1; + mockKernelImmData->kernelDescriptor->payloadMappings.samplerTable.tableOffset = 16; + mockKernelImmData->kernelDescriptor->payloadMappings.samplerTable.borderColor = 0; + kernel->dynamicStateHeapData.reset(new uint8_t[512]); +} + +uint32_t CommandListStateBaseAddressFixture::getMocs(bool l3On) { + return device->getMOCS(l3On, false) >> 1; +} + void ImmediateCmdListSharedHeapsFixture::setUp() { DebugManager.flags.EnableFlushTaskSubmission.set(1); DebugManager.flags.EnableImmediateCmdListHeapSharing.set(1); diff --git a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h index 362f2d097a..d796cb6a7b 100644 --- a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h +++ b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h @@ -120,6 +120,13 @@ struct CmdListLargeGrfFixture : public CmdListStateComputeModeStateFixture { void testBody(); }; +struct CommandListStateBaseAddressFixture : public ModuleMutableCommandListFixture { + void setUp(); + uint32_t getMocs(bool l3On); + + DebugManagerStateRestore restorer; +}; + struct ImmediateCmdListSharedHeapsFixture : public ModuleMutableCommandListFixture { void setUp(); diff --git a/level_zero/core/test/unit_tests/gen11/test_l0_gfx_core_helper_gen11.cpp b/level_zero/core/test/unit_tests/gen11/test_l0_gfx_core_helper_gen11.cpp index d376197aa6..a57005fd68 100644 --- a/level_zero/core/test/unit_tests/gen11/test_l0_gfx_core_helper_gen11.cpp +++ b/level_zero/core/test/unit_tests/gen11/test_l0_gfx_core_helper_gen11.cpp @@ -36,6 +36,11 @@ GEN11TEST_F(L0GfxCoreHelperTestGen11, GivenGen11WhenCheckingL0HelperForPipelineS EXPECT_FALSE(l0GfxCoreHelper.platformSupportsPipelineSelectTracking()); } +GEN11TEST_F(L0GfxCoreHelperTestGen11, GivenGen11WhenCheckingL0HelperForStateBaseAddressTrackingSupportThenReturnFalse) { + auto &l0GfxCoreHelper = getHelper(); + EXPECT_FALSE(l0GfxCoreHelper.platformSupportsStateBaseAddressTracking()); +} + GEN11TEST_F(L0GfxCoreHelperTestGen11, GivenGen11WhenCheckingL0HelperForRayTracingSupportThenReturnFalse) { auto &l0GfxCoreHelper = getHelper(); EXPECT_FALSE(l0GfxCoreHelper.platformSupportsRayTracing()); diff --git a/level_zero/core/test/unit_tests/gen12lp/test_l0_gfx_core_helper_gen12lp.cpp b/level_zero/core/test/unit_tests/gen12lp/test_l0_gfx_core_helper_gen12lp.cpp index b41f0784ae..411b8379dc 100644 --- a/level_zero/core/test/unit_tests/gen12lp/test_l0_gfx_core_helper_gen12lp.cpp +++ b/level_zero/core/test/unit_tests/gen12lp/test_l0_gfx_core_helper_gen12lp.cpp @@ -51,6 +51,12 @@ GEN12LPTEST_F(L0GfxCoreHelperTestGen12Lp, GivenGen12LpWhenCheckingL0HelperForPip EXPECT_FALSE(l0GfxCoreHelper.platformSupportsPipelineSelectTracking()); } +GEN12LPTEST_F(L0GfxCoreHelperTestGen12Lp, GivenGen12LpWhenCheckingL0HelperForStateBaseAddressTrackingSupportThenReturnFalse) { + MockExecutionEnvironment executionEnvironment; + auto &l0GfxCoreHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper(); + EXPECT_FALSE(l0GfxCoreHelper.platformSupportsStateBaseAddressTracking()); +} + GEN12LPTEST_F(L0GfxCoreHelperTestGen12Lp, GivenGen12LpWhenCheckingL0HelperForRayTracingSupportThenReturnFalse) { MockExecutionEnvironment executionEnvironment; auto &l0GfxCoreHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper(); diff --git a/level_zero/core/test/unit_tests/gen9/test_l0_gfx_core_helper_gen9.cpp b/level_zero/core/test/unit_tests/gen9/test_l0_gfx_core_helper_gen9.cpp index 1a952b3b49..7c94ba84d3 100644 --- a/level_zero/core/test/unit_tests/gen9/test_l0_gfx_core_helper_gen9.cpp +++ b/level_zero/core/test/unit_tests/gen9/test_l0_gfx_core_helper_gen9.cpp @@ -36,6 +36,11 @@ GEN9TEST_F(L0GfxCoreHelperTestGen9, GivenGen9WhenCheckingL0HelperForPipelineSele EXPECT_FALSE(l0GfxCoreHelper.platformSupportsPipelineSelectTracking()); } +GEN9TEST_F(L0GfxCoreHelperTestGen9, GivenGen9WhenCheckingL0HelperForStateBaseAddressTrackingSupportThenReturnFalse) { + auto &l0GfxCoreHelper = getHelper(); + EXPECT_FALSE(l0GfxCoreHelper.platformSupportsStateBaseAddressTracking()); +} + GEN9TEST_F(L0GfxCoreHelperTestGen9, GivenGen9WhenCheckingL0HelperForRayTracingSupportThenReturnFalse) { auto &l0GfxCoreHelper = getHelper(); EXPECT_FALSE(l0GfxCoreHelper.platformSupportsRayTracing()); diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h index f369632b1d..77dc127bef 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h @@ -73,6 +73,7 @@ struct WhiteBox<::L0::CommandListCoreFamily> using BaseClass::requiredStreamState; using BaseClass::setupTimestampEventForMultiTile; using BaseClass::signalAllEventPackets; + using BaseClass::stateBaseAddressTracking; using BaseClass::stateComputeModeTracking; using BaseClass::unifiedMemoryControls; using BaseClass::updateStreamProperties; @@ -145,6 +146,7 @@ struct WhiteBox> using BaseClass::pipelineSelectStateTracking; using BaseClass::requiredStreamState; using BaseClass::signalAllEventPackets; + using BaseClass::stateBaseAddressTracking; using BaseClass::stateComputeModeTracking; WhiteBox() : BaseClass(BaseClass::defaultNumIddsPerBlock) {} @@ -155,6 +157,7 @@ struct MockCommandListImmediate : public CommandListCoreFamilyImmediate; using BaseClass::compactL3FlushEventPacket; using BaseClass::containsAnyKernel; + using BaseClass::finalStreamState; using BaseClass::immediateCmdListHeapSharing; using BaseClass::indirectAllocationsAllowed; using BaseClass::pipeControlMultiKernelEventSync; @@ -169,6 +172,7 @@ struct WhiteBox<::L0::CommandList> : public ::L0::CommandListImp { using BaseClass::commandContainer; using BaseClass::commandListPreemptionMode; using BaseClass::csr; + using BaseClass::finalStreamState; using BaseClass::frontEndStateTracking; using BaseClass::getDcFlushRequired; using BaseClass::immediateCmdListHeapSharing; @@ -177,7 +181,9 @@ struct WhiteBox<::L0::CommandList> : public ::L0::CommandListImp { using BaseClass::nonImmediateLogicalStateHelper; using BaseClass::partitionCount; using BaseClass::pipelineSelectStateTracking; + using BaseClass::requiredStreamState; using BaseClass::signalAllEventPackets; + using BaseClass::stateBaseAddressTracking; using BaseClass::stateComputeModeTracking; WhiteBox(Device *device); diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h b/level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h index 5d7bd1b3b5..654074f594 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2022 Intel Corporation + * Copyright (C) 2020-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -37,6 +37,7 @@ struct WhiteBox<::L0::CommandQueue> : public ::L0::CommandQueueImp { using CommandQueue::internalUsage; using CommandQueue::partitionCount; using CommandQueue::pipelineSelectStateTracking; + using CommandQueue::stateBaseAddressTracking; using CommandQueue::stateComputeModeTracking; WhiteBox(Device *device, NEO::CommandStreamReceiver *csr, @@ -71,6 +72,7 @@ struct MockCommandQueueHw : public L0::CommandQueueHw { using L0::CommandQueue::partitionCount; using L0::CommandQueue::pipelineSelectStateTracking; using L0::CommandQueue::preemptionCmdSyncProgramming; + using L0::CommandQueue::stateBaseAddressTracking; using L0::CommandQueue::stateComputeModeTracking; using L0::CommandQueueImp::csr; using typename BaseClass::CommandListExecutionContext; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp index 2fef10262a..39b95bcabd 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp @@ -2365,6 +2365,9 @@ TEST_F(CommandListCreate, givenCreatedCommandListWhenGettingTrackingFlagsThenDef bool expectedFrontEndTracking = l0GfxCoreHelper.platformSupportsFrontEndTracking(); EXPECT_EQ(expectedFrontEndTracking, commandList->frontEndStateTracking); + + bool expectedStateBaseAddressTracking = l0GfxCoreHelper.platformSupportsStateBaseAddressTracking(); + EXPECT_EQ(expectedStateBaseAddressTracking, commandList->stateBaseAddressTracking); } } // namespace ult diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_2.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_2.cpp index ffa06cdde5..0f94356107 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_2.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_2.cpp @@ -16,7 +16,6 @@ #include "level_zero/core/source/hw_helpers/l0_gfx_core_helper.h" #include "level_zero/core/source/image/image_hw.h" #include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h" -#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_kernel.h" diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp index 151f244efc..6a556b90b8 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp @@ -7,12 +7,14 @@ #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/hw_helper.h" +#include "shared/source/indirect_heap/indirect_heap.h" #include "shared/source/kernel/implicit_args.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" +#include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/test_macros/hw_test.h" #include "level_zero/core/source/kernel/kernel_imp.h" -#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" +#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_builtin_functions_lib_impl_timestamps.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_device_for_spirv.h" @@ -901,5 +903,552 @@ HWTEST2_F(CommandListCreate, givenAllValuesTbxAndSyncModeFlagsWhenCheckingWaitli EXPECT_TRUE(cmdList.eventWaitlistSyncRequired()); } +using CommandListStateBaseAddressTest = Test; + +HWTEST2_F(CommandListStateBaseAddressTest, + givenStateBaseAddressTrackingWhenRegularCmdListAppendKernelAndExecuteThenBaseAddressStateIsStoredInCsr, + IsAtLeastSkl) { + NEO::StateBaseAddressPropertiesSupport sbaPropertiesSupport = {}; + auto &productHelper = device->getProductHelper(); + productHelper.fillStateBaseAddressPropertiesSupportStructure(sbaPropertiesSupport); + + EXPECT_TRUE(commandList->stateBaseAddressTracking); + + auto &container = commandList->commandContainer; + + ze_group_count_t groupCount{1, 1, 1}; + CmdListKernelLaunchParams launchParams = {}; + auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + auto sshHeap = container.getIndirectHeap(NEO::HeapType::SURFACE_STATE); + auto ssBaseAddress = sshHeap->getHeapGpuBase(); + auto ssSize = sshHeap->getHeapSizeInPages(); + + uint64_t dsBaseAddress = -1; + size_t dsSize = static_cast(-1); + + auto dshHeap = container.getIndirectHeap(NEO::HeapType::DYNAMIC_STATE); + if (NEO::UnitTestHelper::expectNullDsh(device->getDeviceInfo())) { + EXPECT_EQ(nullptr, dshHeap); + } else { + EXPECT_NE(nullptr, dshHeap); + } + if (dshHeap) { + dsBaseAddress = dshHeap->getHeapGpuBase(); + dsSize = dshHeap->getHeapSizeInPages(); + } + + auto ioBaseAddress = container.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT)->getHeapGpuBase(); + auto ioSize = container.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT)->getHeapSizeInPages(); + + auto statlessMocs = device->getMOCS(true, false) >> 1; + + auto &requiredState = commandList->requiredStreamState.stateBaseAddress; + auto &finalState = commandList->finalStreamState.stateBaseAddress; + + EXPECT_EQ(static_cast(statlessMocs), requiredState.statelessMocs.value); + + EXPECT_EQ(static_cast(ssBaseAddress), requiredState.surfaceStateBaseAddress.value); + EXPECT_EQ(ssSize, requiredState.surfaceStateSize.value); + EXPECT_EQ(static_cast(dsBaseAddress), requiredState.dynamicStateBaseAddress.value); + EXPECT_EQ(dsSize, requiredState.dynamicStateSize.value); + EXPECT_EQ(static_cast(ioBaseAddress), requiredState.indirectObjectBaseAddress.value); + EXPECT_EQ(ioSize, requiredState.indirectObjectSize.value); + + if (sbaPropertiesSupport.bindingTablePoolBaseAddress) { + EXPECT_EQ(static_cast(ssBaseAddress), requiredState.bindingTablePoolBaseAddress.value); + EXPECT_EQ(ssSize, requiredState.bindingTablePoolSize.value); + } else { + EXPECT_EQ(-1, requiredState.bindingTablePoolBaseAddress.value); + EXPECT_EQ(static_cast(-1), requiredState.bindingTablePoolSize.value); + } + + EXPECT_EQ(finalState.surfaceStateBaseAddress.value, requiredState.surfaceStateBaseAddress.value); + EXPECT_EQ(finalState.surfaceStateSize.value, requiredState.surfaceStateSize.value); + + EXPECT_EQ(finalState.dynamicStateBaseAddress.value, requiredState.dynamicStateBaseAddress.value); + EXPECT_EQ(finalState.dynamicStateSize.value, requiredState.dynamicStateSize.value); + + EXPECT_EQ(finalState.indirectObjectBaseAddress.value, requiredState.indirectObjectBaseAddress.value); + EXPECT_EQ(finalState.indirectObjectSize.value, requiredState.indirectObjectSize.value); + + EXPECT_EQ(finalState.bindingTablePoolBaseAddress.value, requiredState.bindingTablePoolBaseAddress.value); + EXPECT_EQ(finalState.bindingTablePoolSize.value, requiredState.bindingTablePoolSize.value); + + EXPECT_EQ(finalState.globalAtomics.value, requiredState.globalAtomics.value); + EXPECT_EQ(finalState.statelessMocs.value, requiredState.statelessMocs.value); + + result = commandList->close(); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + ze_command_list_handle_t cmdListHandle = commandList->toHandle(); + result = commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, true); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + auto &csrState = commandQueue->getCsr()->getStreamProperties().stateBaseAddress; + + EXPECT_EQ(csrState.surfaceStateBaseAddress.value, finalState.surfaceStateBaseAddress.value); + EXPECT_EQ(csrState.surfaceStateSize.value, finalState.surfaceStateSize.value); + + EXPECT_EQ(csrState.dynamicStateBaseAddress.value, finalState.dynamicStateBaseAddress.value); + EXPECT_EQ(csrState.dynamicStateSize.value, finalState.dynamicStateSize.value); + + EXPECT_EQ(csrState.indirectObjectBaseAddress.value, finalState.indirectObjectBaseAddress.value); + EXPECT_EQ(csrState.indirectObjectSize.value, finalState.indirectObjectSize.value); + + EXPECT_EQ(csrState.bindingTablePoolBaseAddress.value, finalState.bindingTablePoolBaseAddress.value); + EXPECT_EQ(csrState.bindingTablePoolSize.value, finalState.bindingTablePoolSize.value); + + EXPECT_EQ(csrState.globalAtomics.value, finalState.globalAtomics.value); + EXPECT_EQ(csrState.statelessMocs.value, finalState.statelessMocs.value); +} + +HWTEST2_F(CommandListStateBaseAddressTest, + givenStateBaseAddressTrackingWhenRegularCmdListAppendKernelChangesHeapsAndExecuteThenFinalBaseAddressStateIsStoredInCsr, + IsAtLeastSkl) { + NEO::StateBaseAddressPropertiesSupport sbaPropertiesSupport = {}; + auto &productHelper = device->getProductHelper(); + productHelper.fillStateBaseAddressPropertiesSupportStructure(sbaPropertiesSupport); + + EXPECT_TRUE(commandList->stateBaseAddressTracking); + + auto &container = commandList->commandContainer; + + ze_group_count_t groupCount{1, 1, 1}; + CmdListKernelLaunchParams launchParams = {}; + auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + auto sshHeap = container.getIndirectHeap(NEO::HeapType::SURFACE_STATE); + auto ssBaseAddress = sshHeap->getHeapGpuBase(); + auto ssSize = sshHeap->getHeapSizeInPages(); + + uint64_t dsBaseAddress = -1; + size_t dsSize = static_cast(-1); + + auto dshHeap = container.getIndirectHeap(NEO::HeapType::DYNAMIC_STATE); + if (dshHeap) { + dsBaseAddress = dshHeap->getHeapGpuBase(); + dsSize = dshHeap->getHeapSizeInPages(); + } + + auto &requiredState = commandList->requiredStreamState.stateBaseAddress; + auto &finalState = commandList->finalStreamState.stateBaseAddress; + + EXPECT_EQ(static_cast(ssBaseAddress), requiredState.surfaceStateBaseAddress.value); + EXPECT_EQ(ssSize, requiredState.surfaceStateSize.value); + EXPECT_EQ(static_cast(dsBaseAddress), requiredState.dynamicStateBaseAddress.value); + EXPECT_EQ(dsSize, requiredState.dynamicStateSize.value); + + EXPECT_EQ(finalState.surfaceStateBaseAddress.value, requiredState.surfaceStateBaseAddress.value); + EXPECT_EQ(finalState.surfaceStateSize.value, requiredState.surfaceStateSize.value); + + EXPECT_EQ(finalState.dynamicStateBaseAddress.value, requiredState.dynamicStateBaseAddress.value); + EXPECT_EQ(finalState.dynamicStateSize.value, requiredState.dynamicStateSize.value); + + sshHeap->getSpace(sshHeap->getAvailableSpace()); + container.getHeapWithRequiredSizeAndAlignment(NEO::HeapType::SURFACE_STATE, sshHeap->getMaxAvailableSpace(), 0); + + if (dshHeap) { + dshHeap->getSpace(dshHeap->getAvailableSpace()); + container.getHeapWithRequiredSizeAndAlignment(NEO::HeapType::DYNAMIC_STATE, dshHeap->getMaxAvailableSpace(), 0); + } + + result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + ssBaseAddress = sshHeap->getGpuBase(); + if (dshHeap) { + dsBaseAddress = dshHeap->getGpuBase(); + } + + EXPECT_NE(static_cast(ssBaseAddress), requiredState.surfaceStateBaseAddress.value); + if (dshHeap) { + EXPECT_NE(static_cast(dsBaseAddress), requiredState.dynamicStateBaseAddress.value); + } else { + EXPECT_EQ(static_cast(dsBaseAddress), requiredState.dynamicStateBaseAddress.value); + } + + EXPECT_EQ(static_cast(ssBaseAddress), finalState.surfaceStateBaseAddress.value); + EXPECT_EQ(static_cast(dsBaseAddress), finalState.dynamicStateBaseAddress.value); + + result = commandList->close(); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + ze_command_list_handle_t cmdListHandle = commandList->toHandle(); + result = commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, true); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + auto &csrState = commandQueue->getCsr()->getStreamProperties().stateBaseAddress; + + EXPECT_EQ(csrState.surfaceStateBaseAddress.value, finalState.surfaceStateBaseAddress.value); + EXPECT_EQ(csrState.surfaceStateSize.value, finalState.surfaceStateSize.value); + + EXPECT_EQ(csrState.dynamicStateBaseAddress.value, finalState.dynamicStateBaseAddress.value); + EXPECT_EQ(csrState.dynamicStateSize.value, finalState.dynamicStateSize.value); +} + +HWTEST2_F(CommandListStateBaseAddressTest, + givenStateBaseAddressTrackingWhenImmediateCmdListAppendKernelChangesHeapsAndExecuteThenFinalBaseAddressStateIsStoredInCsr, + IsAtLeastSkl) { + NEO::DebugManager.flags.DisableResourceRecycling.set(true); + + NEO::StateBaseAddressPropertiesSupport sbaPropertiesSupport = {}; + auto &productHelper = device->getProductHelper(); + productHelper.fillStateBaseAddressPropertiesSupportStructure(sbaPropertiesSupport); + + EXPECT_TRUE(commandListImmediate->stateBaseAddressTracking); + + auto &container = commandListImmediate->commandContainer; + + auto csrImmediate = commandListImmediate->csr; + auto &csrState = csrImmediate->getStreamProperties().stateBaseAddress; + + ze_group_count_t groupCount{1, 1, 1}; + CmdListKernelLaunchParams launchParams = {}; + auto result = commandListImmediate->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + auto sshHeap = container.getIndirectHeap(NEO::HeapType::SURFACE_STATE); + auto ssBaseAddress = sshHeap->getHeapGpuBase(); + auto ssSize = sshHeap->getHeapSizeInPages(); + + uint64_t dsBaseAddress = -1; + size_t dsSize = static_cast(-1); + + auto dshHeap = container.getIndirectHeap(NEO::HeapType::DYNAMIC_STATE); + if (NEO::UnitTestHelper::expectNullDsh(device->getDeviceInfo())) { + EXPECT_EQ(nullptr, dshHeap); + } else { + EXPECT_NE(nullptr, dshHeap); + } + if (dshHeap) { + dsBaseAddress = dshHeap->getHeapGpuBase(); + dsSize = dshHeap->getHeapSizeInPages(); + } + + auto ioBaseAddress = container.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT)->getHeapGpuBase(); + auto ioSize = container.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT)->getHeapSizeInPages(); + + auto statlessMocs = device->getMOCS(true, false) >> 1; + + EXPECT_EQ(static_cast(statlessMocs), csrState.statelessMocs.value); + + EXPECT_EQ(static_cast(ssBaseAddress), csrState.surfaceStateBaseAddress.value); + EXPECT_EQ(ssSize, csrState.surfaceStateSize.value); + EXPECT_EQ(static_cast(dsBaseAddress), csrState.dynamicStateBaseAddress.value); + EXPECT_EQ(dsSize, csrState.dynamicStateSize.value); + EXPECT_EQ(static_cast(ioBaseAddress), csrState.indirectObjectBaseAddress.value); + EXPECT_EQ(ioSize, csrState.indirectObjectSize.value); + + if (sbaPropertiesSupport.bindingTablePoolBaseAddress) { + EXPECT_EQ(static_cast(ssBaseAddress), csrState.bindingTablePoolBaseAddress.value); + EXPECT_EQ(ssSize, csrState.bindingTablePoolSize.value); + } else { + EXPECT_EQ(-1, csrState.bindingTablePoolBaseAddress.value); + EXPECT_EQ(static_cast(-1), csrState.bindingTablePoolSize.value); + } + + sshHeap->getSpace(sshHeap->getAvailableSpace()); + if (commandListImmediate->immediateCmdListHeapSharing) { + csrImmediate->getIndirectHeap(NEO::HeapType::SURFACE_STATE, sshHeap->getMaxAvailableSpace()); + } else { + container.getHeapWithRequiredSizeAndAlignment(NEO::HeapType::SURFACE_STATE, sshHeap->getMaxAvailableSpace(), 0); + } + + if (dshHeap) { + dshHeap->getSpace(dshHeap->getAvailableSpace()); + if (commandListImmediate->immediateCmdListHeapSharing) { + csrImmediate->getIndirectHeap(NEO::HeapType::DYNAMIC_STATE, sshHeap->getMaxAvailableSpace()); + } else { + container.getHeapWithRequiredSizeAndAlignment(NEO::HeapType::DYNAMIC_STATE, dshHeap->getMaxAvailableSpace(), 0); + } + } + + result = commandListImmediate->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + ssBaseAddress = sshHeap->getGpuBase(); + if (dshHeap) { + dsBaseAddress = dshHeap->getGpuBase(); + } + + EXPECT_EQ(static_cast(ssBaseAddress), csrState.surfaceStateBaseAddress.value); + EXPECT_EQ(static_cast(dsBaseAddress), csrState.dynamicStateBaseAddress.value); +} + +HWTEST2_F(CommandListStateBaseAddressTest, + givenStateBaseAddressTrackingWhenRegularCmdListAppendKernelAndExecuteAndImmediateCmdListAppendKernelSharingCsrThenBaseAddressStateIsUpdatedInCsr, + IsAtLeastSkl) { + ASSERT_EQ(commandListImmediate->csr, commandQueue->getCsr()); + + NEO::StateBaseAddressPropertiesSupport sbaPropertiesSupport = {}; + auto &productHelper = device->getProductHelper(); + productHelper.fillStateBaseAddressPropertiesSupportStructure(sbaPropertiesSupport); + + EXPECT_TRUE(commandList->stateBaseAddressTracking); + + auto &container = commandList->commandContainer; + + ze_group_count_t groupCount{1, 1, 1}; + CmdListKernelLaunchParams launchParams = {}; + auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + auto sshHeap = container.getIndirectHeap(NEO::HeapType::SURFACE_STATE); + auto ssBaseAddress = sshHeap->getHeapGpuBase(); + auto ssSize = sshHeap->getHeapSizeInPages(); + + uint64_t dsBaseAddress = -1; + size_t dsSize = static_cast(-1); + + auto dshHeap = container.getIndirectHeap(NEO::HeapType::DYNAMIC_STATE); + if (dshHeap) { + dsBaseAddress = dshHeap->getHeapGpuBase(); + dsSize = dshHeap->getHeapSizeInPages(); + } + + auto ioBaseAddress = container.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT)->getHeapGpuBase(); + auto ioSize = container.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT)->getHeapSizeInPages(); + + auto statlessMocs = getMocs(true); + + auto &requiredState = commandList->requiredStreamState.stateBaseAddress; + auto &finalState = commandList->finalStreamState.stateBaseAddress; + + EXPECT_EQ(static_cast(statlessMocs), requiredState.statelessMocs.value); + + EXPECT_EQ(static_cast(ssBaseAddress), requiredState.surfaceStateBaseAddress.value); + EXPECT_EQ(ssSize, requiredState.surfaceStateSize.value); + EXPECT_EQ(static_cast(dsBaseAddress), requiredState.dynamicStateBaseAddress.value); + EXPECT_EQ(dsSize, requiredState.dynamicStateSize.value); + EXPECT_EQ(static_cast(ioBaseAddress), requiredState.indirectObjectBaseAddress.value); + EXPECT_EQ(ioSize, requiredState.indirectObjectSize.value); + + if (sbaPropertiesSupport.bindingTablePoolBaseAddress) { + EXPECT_EQ(static_cast(ssBaseAddress), requiredState.bindingTablePoolBaseAddress.value); + EXPECT_EQ(ssSize, requiredState.bindingTablePoolSize.value); + } else { + EXPECT_EQ(-1, requiredState.bindingTablePoolBaseAddress.value); + EXPECT_EQ(static_cast(-1), requiredState.bindingTablePoolSize.value); + } + + EXPECT_EQ(finalState.surfaceStateBaseAddress.value, requiredState.surfaceStateBaseAddress.value); + EXPECT_EQ(finalState.surfaceStateSize.value, requiredState.surfaceStateSize.value); + + EXPECT_EQ(finalState.dynamicStateBaseAddress.value, requiredState.dynamicStateBaseAddress.value); + EXPECT_EQ(finalState.dynamicStateSize.value, requiredState.dynamicStateSize.value); + + EXPECT_EQ(finalState.indirectObjectBaseAddress.value, requiredState.indirectObjectBaseAddress.value); + EXPECT_EQ(finalState.indirectObjectSize.value, requiredState.indirectObjectSize.value); + + EXPECT_EQ(finalState.bindingTablePoolBaseAddress.value, requiredState.bindingTablePoolBaseAddress.value); + EXPECT_EQ(finalState.bindingTablePoolSize.value, requiredState.bindingTablePoolSize.value); + + EXPECT_EQ(finalState.globalAtomics.value, requiredState.globalAtomics.value); + EXPECT_EQ(finalState.statelessMocs.value, requiredState.statelessMocs.value); + + result = commandList->close(); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + ze_command_list_handle_t cmdListHandle = commandList->toHandle(); + result = commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, true); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + auto &csrState = commandQueue->getCsr()->getStreamProperties().stateBaseAddress; + + EXPECT_EQ(csrState.surfaceStateBaseAddress.value, finalState.surfaceStateBaseAddress.value); + EXPECT_EQ(csrState.surfaceStateSize.value, finalState.surfaceStateSize.value); + + EXPECT_EQ(csrState.dynamicStateBaseAddress.value, finalState.dynamicStateBaseAddress.value); + EXPECT_EQ(csrState.dynamicStateSize.value, finalState.dynamicStateSize.value); + + EXPECT_EQ(csrState.indirectObjectBaseAddress.value, finalState.indirectObjectBaseAddress.value); + EXPECT_EQ(csrState.indirectObjectSize.value, finalState.indirectObjectSize.value); + + EXPECT_EQ(csrState.bindingTablePoolBaseAddress.value, finalState.bindingTablePoolBaseAddress.value); + EXPECT_EQ(csrState.bindingTablePoolSize.value, finalState.bindingTablePoolSize.value); + + EXPECT_EQ(csrState.globalAtomics.value, finalState.globalAtomics.value); + EXPECT_EQ(csrState.statelessMocs.value, finalState.statelessMocs.value); + + auto &containerImmediate = commandListImmediate->commandContainer; + + result = commandListImmediate->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + auto sshHeapImmediate = containerImmediate.getIndirectHeap(NEO::HeapType::SURFACE_STATE); + auto ssBaseAddressImmediate = sshHeapImmediate->getHeapGpuBase(); + auto ssSizeImmediate = sshHeapImmediate->getHeapSizeInPages(); + + uint64_t dsBaseAddressImmediate = -1; + size_t dsSizeImmediate = static_cast(-1); + + auto dshHeapImmediate = containerImmediate.getIndirectHeap(NEO::HeapType::DYNAMIC_STATE); + if (dshHeapImmediate) { + dsBaseAddressImmediate = dshHeapImmediate->getHeapGpuBase(); + dsSizeImmediate = dshHeapImmediate->getHeapSizeInPages(); + } + + auto ioBaseAddressImmediate = containerImmediate.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT)->getHeapGpuBase(); + auto ioSizeImmediate = containerImmediate.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT)->getHeapSizeInPages(); + + auto statlessMocsImmediate = getMocs(true); + + EXPECT_EQ(static_cast(statlessMocsImmediate), csrState.statelessMocs.value); + + EXPECT_EQ(static_cast(ssBaseAddressImmediate), csrState.surfaceStateBaseAddress.value); + EXPECT_EQ(ssSizeImmediate, csrState.surfaceStateSize.value); + EXPECT_EQ(static_cast(dsBaseAddressImmediate), csrState.dynamicStateBaseAddress.value); + EXPECT_EQ(dsSizeImmediate, csrState.dynamicStateSize.value); + EXPECT_EQ(static_cast(ioBaseAddressImmediate), csrState.indirectObjectBaseAddress.value); + EXPECT_EQ(ioSizeImmediate, csrState.indirectObjectSize.value); + + if (sbaPropertiesSupport.bindingTablePoolBaseAddress) { + EXPECT_EQ(static_cast(ssBaseAddressImmediate), csrState.bindingTablePoolBaseAddress.value); + EXPECT_EQ(ssSizeImmediate, csrState.bindingTablePoolSize.value); + } else { + EXPECT_EQ(-1, csrState.bindingTablePoolBaseAddress.value); + EXPECT_EQ(static_cast(-1), csrState.bindingTablePoolSize.value); + } +} + +HWTEST2_F(CommandListStateBaseAddressTest, + givenStateBaseAddressTrackingWhenImmediateCmdListAppendKernelAndRegularCmdListAppendKernelAndExecuteSharingCsrThenBaseAddressStateIsUpdatedInCsr, + IsAtLeastSkl) { + ASSERT_EQ(commandListImmediate->csr, commandQueue->getCsr()); + auto &csrState = commandQueue->getCsr()->getStreamProperties().stateBaseAddress; + + NEO::StateBaseAddressPropertiesSupport sbaPropertiesSupport = {}; + auto &productHelper = device->getProductHelper(); + productHelper.fillStateBaseAddressPropertiesSupportStructure(sbaPropertiesSupport); + + ze_group_count_t groupCount{1, 1, 1}; + CmdListKernelLaunchParams launchParams = {}; + + EXPECT_TRUE(commandList->stateBaseAddressTracking); + + auto &containerImmediate = commandListImmediate->commandContainer; + + auto result = commandListImmediate->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + auto sshHeapImmediate = containerImmediate.getIndirectHeap(NEO::HeapType::SURFACE_STATE); + auto ssBaseAddressImmediate = sshHeapImmediate->getHeapGpuBase(); + auto ssSizeImmediate = sshHeapImmediate->getHeapSizeInPages(); + + uint64_t dsBaseAddressImmediate = -1; + size_t dsSizeImmediate = static_cast(-1); + + auto dshHeapImmediate = containerImmediate.getIndirectHeap(NEO::HeapType::DYNAMIC_STATE); + if (dshHeapImmediate) { + dsBaseAddressImmediate = dshHeapImmediate->getHeapGpuBase(); + dsSizeImmediate = dshHeapImmediate->getHeapSizeInPages(); + } + + auto ioBaseAddressImmediate = containerImmediate.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT)->getHeapGpuBase(); + auto ioSizeImmediate = containerImmediate.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT)->getHeapSizeInPages(); + + auto statlessMocsImmediate = getMocs(true); + + EXPECT_EQ(static_cast(statlessMocsImmediate), csrState.statelessMocs.value); + + EXPECT_EQ(static_cast(ssBaseAddressImmediate), csrState.surfaceStateBaseAddress.value); + EXPECT_EQ(ssSizeImmediate, csrState.surfaceStateSize.value); + EXPECT_EQ(static_cast(dsBaseAddressImmediate), csrState.dynamicStateBaseAddress.value); + EXPECT_EQ(dsSizeImmediate, csrState.dynamicStateSize.value); + EXPECT_EQ(static_cast(ioBaseAddressImmediate), csrState.indirectObjectBaseAddress.value); + EXPECT_EQ(ioSizeImmediate, csrState.indirectObjectSize.value); + + if (sbaPropertiesSupport.bindingTablePoolBaseAddress) { + EXPECT_EQ(static_cast(ssBaseAddressImmediate), csrState.bindingTablePoolBaseAddress.value); + EXPECT_EQ(ssSizeImmediate, csrState.bindingTablePoolSize.value); + } else { + EXPECT_EQ(-1, csrState.bindingTablePoolBaseAddress.value); + EXPECT_EQ(static_cast(-1), csrState.bindingTablePoolSize.value); + } + + auto &container = commandList->commandContainer; + + result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + auto sshHeap = container.getIndirectHeap(NEO::HeapType::SURFACE_STATE); + auto ssBaseAddress = sshHeap->getHeapGpuBase(); + auto ssSize = sshHeap->getHeapSizeInPages(); + + uint64_t dsBaseAddress = -1; + size_t dsSize = static_cast(-1); + + auto dshHeap = container.getIndirectHeap(NEO::HeapType::DYNAMIC_STATE); + if (dshHeap) { + dsBaseAddress = dshHeap->getHeapGpuBase(); + dsSize = dshHeap->getHeapSizeInPages(); + } + + auto ioBaseAddress = container.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT)->getHeapGpuBase(); + auto ioSize = container.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT)->getHeapSizeInPages(); + + auto statlessMocs = getMocs(true); + + auto &requiredState = commandList->requiredStreamState.stateBaseAddress; + auto &finalState = commandList->finalStreamState.stateBaseAddress; + + EXPECT_EQ(static_cast(statlessMocs), requiredState.statelessMocs.value); + + EXPECT_EQ(static_cast(ssBaseAddress), requiredState.surfaceStateBaseAddress.value); + EXPECT_EQ(ssSize, requiredState.surfaceStateSize.value); + EXPECT_EQ(static_cast(dsBaseAddress), requiredState.dynamicStateBaseAddress.value); + EXPECT_EQ(dsSize, requiredState.dynamicStateSize.value); + EXPECT_EQ(static_cast(ioBaseAddress), requiredState.indirectObjectBaseAddress.value); + EXPECT_EQ(ioSize, requiredState.indirectObjectSize.value); + + if (sbaPropertiesSupport.bindingTablePoolBaseAddress) { + EXPECT_EQ(static_cast(ssBaseAddress), requiredState.bindingTablePoolBaseAddress.value); + EXPECT_EQ(ssSize, requiredState.bindingTablePoolSize.value); + } else { + EXPECT_EQ(-1, requiredState.bindingTablePoolBaseAddress.value); + EXPECT_EQ(static_cast(-1), requiredState.bindingTablePoolSize.value); + } + + EXPECT_EQ(finalState.surfaceStateBaseAddress.value, requiredState.surfaceStateBaseAddress.value); + EXPECT_EQ(finalState.surfaceStateSize.value, requiredState.surfaceStateSize.value); + + EXPECT_EQ(finalState.dynamicStateBaseAddress.value, requiredState.dynamicStateBaseAddress.value); + EXPECT_EQ(finalState.dynamicStateSize.value, requiredState.dynamicStateSize.value); + + EXPECT_EQ(finalState.indirectObjectBaseAddress.value, requiredState.indirectObjectBaseAddress.value); + EXPECT_EQ(finalState.indirectObjectSize.value, requiredState.indirectObjectSize.value); + + EXPECT_EQ(finalState.bindingTablePoolBaseAddress.value, requiredState.bindingTablePoolBaseAddress.value); + EXPECT_EQ(finalState.bindingTablePoolSize.value, requiredState.bindingTablePoolSize.value); + + EXPECT_EQ(finalState.globalAtomics.value, requiredState.globalAtomics.value); + EXPECT_EQ(finalState.statelessMocs.value, requiredState.statelessMocs.value); + + result = commandList->close(); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + ze_command_list_handle_t cmdListHandle = commandList->toHandle(); + result = commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, true); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + EXPECT_EQ(csrState.surfaceStateBaseAddress.value, finalState.surfaceStateBaseAddress.value); + EXPECT_EQ(csrState.surfaceStateSize.value, finalState.surfaceStateSize.value); + + EXPECT_EQ(csrState.dynamicStateBaseAddress.value, finalState.dynamicStateBaseAddress.value); + EXPECT_EQ(csrState.dynamicStateSize.value, finalState.dynamicStateSize.value); + + EXPECT_EQ(csrState.indirectObjectBaseAddress.value, finalState.indirectObjectBaseAddress.value); + EXPECT_EQ(csrState.indirectObjectSize.value, finalState.indirectObjectSize.value); + + EXPECT_EQ(csrState.bindingTablePoolBaseAddress.value, finalState.bindingTablePoolBaseAddress.value); + EXPECT_EQ(csrState.bindingTablePoolSize.value, finalState.bindingTablePoolSize.value); + + EXPECT_EQ(csrState.globalAtomics.value, finalState.globalAtomics.value); + EXPECT_EQ(csrState.statelessMocs.value, finalState.statelessMocs.value); +} + } // namespace ult } // namespace L0 diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp index bf6d9c38cf..bb5df3afa2 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp @@ -17,7 +17,6 @@ #include "level_zero/core/source/builtin/builtin_functions_lib.h" #include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h" -#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_event.h" #include "level_zero/core/test/unit_tests/mocks/mock_image.h" diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp index e2ac9f1139..b686f916b2 100644 --- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp @@ -1997,6 +1997,9 @@ TEST_F(CommandQueueCreate, givenCreatedCommandQueueWhenGettingTrackingFlagsThenD bool expectedFrontEndTracking = l0GfxCoreHelper.platformSupportsFrontEndTracking(); EXPECT_EQ(expectedFrontEndTracking, commandQueue->frontEndStateTracking); + bool expectedStateBaseAddressTracking = l0GfxCoreHelper.platformSupportsStateBaseAddressTracking(); + EXPECT_EQ(expectedStateBaseAddressTracking, commandQueue->stateBaseAddressTracking); + commandQueue->destroy(); } diff --git a/level_zero/core/test/unit_tests/xe_hpc_core/test_l0_gfx_core_helper_xe_hpc_core.cpp b/level_zero/core/test/unit_tests/xe_hpc_core/test_l0_gfx_core_helper_xe_hpc_core.cpp index 8909d2cfe0..72e69cdcf2 100644 --- a/level_zero/core/test/unit_tests/xe_hpc_core/test_l0_gfx_core_helper_xe_hpc_core.cpp +++ b/level_zero/core/test/unit_tests/xe_hpc_core/test_l0_gfx_core_helper_xe_hpc_core.cpp @@ -49,6 +49,11 @@ XE_HPC_CORETEST_F(L0GfxCoreHelperTestXeHpc, GivenXeHpcWhenCheckingL0HelperForPip EXPECT_TRUE(l0GfxCoreHelper.platformSupportsPipelineSelectTracking()); } +XE_HPC_CORETEST_F(L0GfxCoreHelperTestXeHpc, GivenXeHpcWhenCheckingL0HelperForStateBaseAddressTrackingSupportThenReturnFalse) { + auto &l0GfxCoreHelper = getHelper(); + EXPECT_FALSE(l0GfxCoreHelper.platformSupportsStateBaseAddressTracking()); +} + XE_HPC_CORETEST_F(L0GfxCoreHelperTestXeHpc, GivenXeHpcWhenCheckingL0HelperForRayTracingSupportThenReturnTrue) { auto &l0GfxCoreHelper = getHelper(); EXPECT_TRUE(l0GfxCoreHelper.platformSupportsRayTracing()); diff --git a/level_zero/core/test/unit_tests/xe_hpg_core/test_l0_gfx_core_helper_xe_hpg_core.cpp b/level_zero/core/test/unit_tests/xe_hpg_core/test_l0_gfx_core_helper_xe_hpg_core.cpp index 07b4b7d7db..b110caa373 100644 --- a/level_zero/core/test/unit_tests/xe_hpg_core/test_l0_gfx_core_helper_xe_hpg_core.cpp +++ b/level_zero/core/test/unit_tests/xe_hpg_core/test_l0_gfx_core_helper_xe_hpg_core.cpp @@ -56,6 +56,12 @@ XE_HPG_CORETEST_F(L0GfxCoreHelperTestXeHpg, GivenXeHpgWhenCheckingL0HelperForPip EXPECT_TRUE(l0GfxCoreHelper.platformSupportsPipelineSelectTracking()); } +XE_HPG_CORETEST_F(L0GfxCoreHelperTestXeHpg, GivenXeHpgWhenCheckingL0HelperForStateBaseAddressTrackingSupportThenReturnFalse) { + MockExecutionEnvironment executionEnvironment; + auto &l0GfxCoreHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper(); + EXPECT_FALSE(l0GfxCoreHelper.platformSupportsStateBaseAddressTracking()); +} + XE_HPG_CORETEST_F(L0GfxCoreHelperTestXeHpg, GivenXeHpgWhenCheckingL0HelperForRayTracingSupportThenReturnTrue) { MockExecutionEnvironment executionEnvironment; auto &l0GfxCoreHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper(); diff --git a/opencl/test/unit_test/gen9/glk/windows/test_device_caps_glk_windows.cpp b/opencl/test/unit_test/gen9/glk/windows/test_device_caps_glk_windows.cpp index a23eab67fd..132e5365d7 100644 --- a/opencl/test/unit_test/gen9/glk/windows/test_device_caps_glk_windows.cpp +++ b/opencl/test/unit_test/gen9/glk/windows/test_device_caps_glk_windows.cpp @@ -1,11 +1,12 @@ /* - * Copyright (C) 2018-2022 Intel Corporation + * Copyright (C) 2018-2023 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_cmds_glk.h" +#include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/test_macros/header/per_product_test_definitions.h" #include "shared/test/common/test_macros/test.h" diff --git a/shared/source/command_container/cmdcontainer.cpp b/shared/source/command_container/cmdcontainer.cpp index f2e18fb78c..15258f5133 100644 --- a/shared/source/command_container/cmdcontainer.cpp +++ b/shared/source/command_container/cmdcontainer.cpp @@ -331,12 +331,19 @@ IndirectHeap *CommandContainer::getIndirectHeap(HeapType heapType) { } } -void CommandContainer::ensureHeapSizePrepared(size_t sshRequiredSize, size_t dshRequiredSize) { - auto lock = immediateCmdListCsr->obtainUniqueOwnership(); - sharedSshCsrHeap = &immediateCmdListCsr->getIndirectHeap(HeapType::SURFACE_STATE, sshRequiredSize); +void CommandContainer::ensureHeapSizePrepared(size_t sshRequiredSize, size_t dshRequiredSize, bool getDsh) { + if (immediateCmdListCsr) { + auto lock = immediateCmdListCsr->obtainUniqueOwnership(); + sharedSshCsrHeap = &immediateCmdListCsr->getIndirectHeap(HeapType::SURFACE_STATE, sshRequiredSize); - if (dshRequiredSize > 0) { - sharedDshCsrHeap = &immediateCmdListCsr->getIndirectHeap(HeapType::DYNAMIC_STATE, dshRequiredSize); + if (getDsh) { + sharedDshCsrHeap = &immediateCmdListCsr->getIndirectHeap(HeapType::DYNAMIC_STATE, dshRequiredSize); + } + } else { + this->getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, sshRequiredSize, 0); + if (getDsh) { + this->getHeapWithRequiredSizeAndAlignment(HeapType::DYNAMIC_STATE, dshRequiredSize, 0); + } } } diff --git a/shared/source/command_container/cmdcontainer.h b/shared/source/command_container/cmdcontainer.h index aa86c603c1..f1de0e0e38 100644 --- a/shared/source/command_container/cmdcontainer.h +++ b/shared/source/command_container/cmdcontainer.h @@ -119,7 +119,7 @@ class CommandContainer : public NonCopyableOrMovableClass { bool immediateCmdListSharedHeap(HeapType heapType) { return (heapSharingEnabled && (heapType == HeapType::DYNAMIC_STATE || heapType == HeapType::SURFACE_STATE)); } - void ensureHeapSizePrepared(size_t sshRequiredSize, size_t dshRequiredSize); + void ensureHeapSizePrepared(size_t sshRequiredSize, size_t dshRequiredSize, bool getDsh); GraphicsAllocation *reuseExistingCmdBuffer(); GraphicsAllocation *allocateCommandBuffer(); diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index 5bab5d2925..95b17dd350 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -236,7 +236,6 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis args.isRcs}; EncodeStateBaseAddress::encode(encodeStateBaseAddressArgs); container.setDirtyStateForAllHeaps(false); - args.requiresUncachedMocs = false; } if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::DebugManager.flags.PauseOnEnqueue.get(), args.device->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::BeforeWorkload)) { diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index ee2db178de..22a5f31e8c 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -276,6 +276,15 @@ CompletionStamp CommandStreamReceiverHw::flushTask( typedef typename GfxFamily::PIPE_CONTROL PIPE_CONTROL; typedef typename GfxFamily::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; + int64_t bindingTablePoolBaseAddress = -1; + size_t bindingTablePoolSize = std::numeric_limits::max(); + int64_t surfaceStateBaseAddress = -1; + size_t surfaceStateSize = std::numeric_limits::max(); + int64_t dynamicStateBaseAddress = -1; + size_t dynamicStateSize = std::numeric_limits::max(); + int64_t indirectObjectBaseAddress = -1; + size_t indirectObjectSize = std::numeric_limits::max(); + DEBUG_BREAK_IF(&commandStreamTask == &commandStream); DEBUG_BREAK_IF(!(dispatchFlags.preemptionMode == PreemptionMode::Disabled ? device.getPreemptionMode() == PreemptionMode::Disabled : true)); DEBUG_BREAK_IF(taskLevel >= CompletionStamp::notReady); @@ -444,6 +453,22 @@ CompletionStamp CommandStreamReceiverHw::flushTask( bool iohDirty = iohState.updateAndCheck(ioh); bool sshDirty = ssh != nullptr ? sshState.updateAndCheck(ssh) : false; + if (dshDirty) { + dynamicStateBaseAddress = dsh->getHeapGpuBase(); + dynamicStateSize = dsh->getHeapSizeInPages(); + } + if (iohDirty) { + indirectObjectBaseAddress = ioh->getHeapGpuBase(); + indirectObjectSize = ioh->getHeapSizeInPages(); + } + if (sshDirty) { + surfaceStateBaseAddress = ssh->getHeapGpuBase(); + surfaceStateSize = ssh->getHeapSizeInPages(); + + bindingTablePoolBaseAddress = surfaceStateBaseAddress; + bindingTablePoolSize = surfaceStateSize; + } + auto isStateBaseAddressDirty = dshDirty || iohDirty || sshDirty || stateBaseAddressDirty; auto mocsIndex = latestSentStatelessMocsConfig; @@ -464,6 +489,12 @@ CompletionStamp CommandStreamReceiverHw::flushTask( lastSentUseGlobalAtomics = dispatchFlags.useGlobalAtomics; } + this->streamProperties.stateBaseAddress.setProperties(dispatchFlags.useGlobalAtomics, mocsIndex, + bindingTablePoolBaseAddress, bindingTablePoolSize, + surfaceStateBaseAddress, surfaceStateSize, + dynamicStateBaseAddress, dynamicStateSize, + indirectObjectBaseAddress, indirectObjectSize, this->peekRootDeviceEnvironment()); + bool debuggingEnabled = device.getDebugger() != nullptr; bool sourceLevelDebuggerActive = device.getSourceLevelDebugger() != nullptr ? true : false; @@ -478,6 +509,7 @@ CompletionStamp CommandStreamReceiverHw::flushTask( // Reprogram state base address if required if (isStateBaseAddressDirty || sourceLevelDebuggerActive) { + this->latestSentStatelessMocsConfig = static_cast(this->streamProperties.stateBaseAddress.statelessMocs.value); EncodeWA::addPipeControlBeforeStateBaseAddress(commandStreamCSR, this->peekRootDeviceEnvironment(), isRcs(), this->dcFlushSupport); EncodeWA::encodeAdditionalPipelineSelect(commandStreamCSR, dispatchFlags.pipelineSelectArgs, true, peekRootDeviceEnvironment(), isRcs()); diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index 2469e3cbce..29e08404c2 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -512,6 +512,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, OverrideDrmRegion, -1, "-1: disable, 0+: overrid DECLARE_DEBUG_VARIABLE(int32_t, EnableFrontEndTracking, -1, "-1: default: disabled, 0: disabled, 1: enabled. This flag creates multiple return point from List to Queue for Front End reconfiguration on Queue buffer for single List") DECLARE_DEBUG_VARIABLE(int32_t, EnablePipelineSelectTracking, -1, "-1: default: disabled, 0: disabled, 1: enabled. This flag enables optimization that limits number of pipeline select dispatched by command lists") DECLARE_DEBUG_VARIABLE(int32_t, EnableStateComputeModeTracking, -1, "-1: default: disabled, 0: disabled, 1: enabled. This flag enables tracking state compute mode changes in command lists") +DECLARE_DEBUG_VARIABLE(int32_t, EnableStateBaseAddressTracking, -1, "-1: default: disabled, 0: disabled, 1: enabled. This flag enables tracking state base address changes in command lists") DECLARE_DEBUG_VARIABLE(int32_t, EnableSetPair, -1, "Use SET_PAIR to pair two buffer objects behind the same file descriptor, -1: default, 0: disabled, 1: enabled") /* Binary Cache */ DECLARE_DEBUG_VARIABLE(bool, BinaryCacheTrace, false, "enable cl_cache to produce .trace files with information about hash computation") diff --git a/shared/source/helpers/hw_helper_bdw_and_later.inl b/shared/source/helpers/hw_helper_bdw_and_later.inl index 7a9b6d5a09..79b5441330 100644 --- a/shared/source/helpers/hw_helper_bdw_and_later.inl +++ b/shared/source/helpers/hw_helper_bdw_and_later.inl @@ -5,6 +5,7 @@ * */ +#include "shared/source/helpers/engine_node_helper.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/hw_info.h" diff --git a/shared/test/common/helpers/unit_test_helper.h b/shared/test/common/helpers/unit_test_helper.h index 49f8742005..ec60066b48 100644 --- a/shared/test/common/helpers/unit_test_helper.h +++ b/shared/test/common/helpers/unit_test_helper.h @@ -16,6 +16,7 @@ namespace NEO { class CommandStreamReceiver; class LinearStream; +struct DeviceInfo; struct KernelDescriptor; struct HardwareInfo; struct RootDeviceEnvironment; @@ -93,6 +94,7 @@ struct UnitTestHelper { static bool getComputeDispatchAllWalkerFromFrontEndCommand(const typename GfxFamily::VFE_STATE_TYPE &feCmd); static bool getSystolicFlagValueFromPipelineSelectCommand(const typename GfxFamily::PIPELINE_SELECT &pipelineSelectCmd); static size_t getAdditionalDshSize(); + static bool expectNullDsh(const DeviceInfo &deviceInfo); }; } // namespace NEO diff --git a/shared/test/common/helpers/unit_test_helper.inl b/shared/test/common/helpers/unit_test_helper.inl index de4c879e95..3e20305cc3 100644 --- a/shared/test/common/helpers/unit_test_helper.inl +++ b/shared/test/common/helpers/unit_test_helper.inl @@ -5,6 +5,7 @@ * */ +#include "shared/source/device/device_info.h" #include "shared/source/helpers/hw_info.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/cmd_parse/hw_parse.h" @@ -107,4 +108,12 @@ bool UnitTestHelper::getComputeDispatchAllWalkerFromFrontEndCommand(c return false; } +template +bool UnitTestHelper::expectNullDsh(const DeviceInfo &deviceInfo) { + if constexpr (GfxFamily::supportsSampler) { + return !deviceInfo.imageSupport; + } + return true; +} + } // namespace NEO diff --git a/shared/test/common/test_files/igdrcl.config b/shared/test/common/test_files/igdrcl.config index d921fe1660..982efa50ce 100644 --- a/shared/test/common/test_files/igdrcl.config +++ b/shared/test/common/test_files/igdrcl.config @@ -144,6 +144,7 @@ AllocateBuffersInLocalMemoryForMultiRootDeviceContexts = 0 EnableComputeWorkSizeSquared = 0 EnableVaLibCalls = -1 EnableExtendedVaFormats = 0 +EnableStateBaseAddressTracking = -1 AddClGlSharing = -1 EnableFormatQuery = 1 EnableFreeMemory = 0 diff --git a/shared/test/unit_test/command_container/command_container_tests.cpp b/shared/test/unit_test/command_container/command_container_tests.cpp index bb8ea9a31d..2fb1a4e086 100644 --- a/shared/test/unit_test/command_container/command_container_tests.cpp +++ b/shared/test/unit_test/command_container/command_container_tests.cpp @@ -956,7 +956,7 @@ HWTEST_F(CommandContainerTest, givenCmdContainerHasImmediateCsrWhenGettingHeapWi auto &ultCsr = pDevice->getUltCommandStreamReceiver(); ultCsr.recursiveLockCounter = 0; - cmdContainer.ensureHeapSizePrepared(0, 0); + cmdContainer.ensureHeapSizePrepared(0, 0, false); EXPECT_EQ(1u, ultCsr.recursiveLockCounter); EXPECT_EQ(nullptr, cmdContainer.getIndirectHeap(HeapType::DYNAMIC_STATE)); @@ -968,9 +968,15 @@ HWTEST_F(CommandContainerTest, givenCmdContainerHasImmediateCsrWhenGettingHeapWi EXPECT_NO_THROW(cmdContainer.getHeapSpaceAllowGrow(HeapType::SURFACE_STATE, 0)); EXPECT_NO_THROW(cmdContainer.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, 0, 0)); - cmdContainer.ensureHeapSizePrepared(4 * MemoryConstants::kiloByte, 4 * MemoryConstants::kiloByte); + cmdContainer.ensureHeapSizePrepared(0, 0, true); EXPECT_EQ(2u, ultCsr.recursiveLockCounter); + EXPECT_NE(nullptr, cmdContainer.getIndirectHeap(HeapType::DYNAMIC_STATE)); + EXPECT_NE(nullptr, cmdContainer.getIndirectHeap(HeapType::SURFACE_STATE)); + + cmdContainer.ensureHeapSizePrepared(4 * MemoryConstants::kiloByte, 4 * MemoryConstants::kiloByte, true); + EXPECT_EQ(3u, ultCsr.recursiveLockCounter); + auto dshHeap = cmdContainer.getIndirectHeap(HeapType::DYNAMIC_STATE); EXPECT_NE(nullptr, dshHeap); auto sshHeap = cmdContainer.getIndirectHeap(HeapType::SURFACE_STATE); @@ -1001,6 +1007,32 @@ HWTEST_F(CommandContainerTest, givenCmdContainerHasImmediateCsrWhenGettingHeapWi EXPECT_THROW(cmdContainer.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, 64, 64), std::exception); } +HWTEST_F(CommandContainerTest, givenCmdContainerUsedInRegularCmdListWhenGettingHeapWithEnsuringSpaceThenExpectCorrectHeap) { + if (!pDevice->getDeviceInfo().imageSupport) { + GTEST_SKIP(); + } + + MyMockCommandContainer cmdContainer; + + auto code = cmdContainer.initialize(pDevice, nullptr, true); + EXPECT_EQ(CommandContainer::ErrorCode::SUCCESS, code); + + cmdContainer.ensureHeapSizePrepared(0, 0, true); + + auto dsh = cmdContainer.getIndirectHeap(HeapType::DYNAMIC_STATE); + auto ssh = cmdContainer.getIndirectHeap(HeapType::SURFACE_STATE); + + EXPECT_NE(nullptr, dsh); + EXPECT_NE(nullptr, ssh); + + dsh->getSpace(dsh->getAvailableSpace() - 64); + + cmdContainer.ensureHeapSizePrepared(4 * MemoryConstants::kiloByte, 4 * MemoryConstants::kiloByte, false); + + dsh = cmdContainer.getIndirectHeap(HeapType::DYNAMIC_STATE); + EXPECT_EQ(64u, dsh->getAvailableSpace()); +} + struct MockHeapHelper : public HeapHelper { public: using HeapHelper::storageForReuse; diff --git a/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp b/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp index cd2b0975df..c1cd7df1ed 100644 --- a/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp +++ b/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp @@ -96,7 +96,6 @@ HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithUnc EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs, nullptr); - EXPECT_FALSE(dispatchArgs.requiresUncachedMocs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, @@ -124,7 +123,6 @@ HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithUnc EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs, nullptr); - EXPECT_FALSE(dispatchArgs.requiresUncachedMocs); GenCmdList commands; CmdParse::parseCommandBuffer(commands,