diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h index f0fce2e648..70de197510 100644 --- a/opencl/source/command_queue/enqueue_common.h +++ b/opencl/source/command_queue/enqueue_common.h @@ -550,6 +550,8 @@ void CommandQueueHw::processDispatchForKernels(const MultiDispatchInf dispatchWalkerArgs.event = event; dispatchWalkerArgs.relaxedOrderingEnabled = relaxedOrderingEnabled; + getGpgpuCommandStreamReceiver().setRequiredScratchSizes(multiDispatchInfo.getRequiredScratchSize(0u), multiDispatchInfo.getRequiredScratchSize(1u)); + HardwareInterface::dispatchWalkerCommon(*this, multiDispatchInfo, csrDeps, dispatchWalkerArgs); if (debugManager.flags.AddPatchInfoCommentsForAUBDump.get()) { @@ -559,8 +561,6 @@ void CommandQueueHw::processDispatchForKernels(const MultiDispatchInf } } } - - getGpgpuCommandStreamReceiver().setRequiredScratchSizes(multiDispatchInfo.getRequiredScratchSize(0u), multiDispatchInfo.getRequiredScratchSize(1u)); } template diff --git a/opencl/source/command_queue/hardware_interface_xehp_and_later.inl b/opencl/source/command_queue/hardware_interface_xehp_and_later.inl index c954e21828..e723af7652 100644 --- a/opencl/source/command_queue/hardware_interface_xehp_and_later.inl +++ b/opencl/source/command_queue/hardware_interface_xehp_and_later.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2023 Intel Corporation + * Copyright (C) 2021-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -9,6 +9,7 @@ #include "shared/source/command_container/command_encoder.h" #include "shared/source/command_container/implicit_scaling.h" #include "shared/source/command_stream/command_stream_receiver.h" +#include "shared/source/command_stream/scratch_space_controller.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/device/device.h" #include "shared/source/helpers/definitions/command_encoder_args.h" @@ -111,8 +112,34 @@ inline void HardwareInterface::programWalker( if constexpr (heaplessModeEnabled) { auto scratchAllocation = queueCsr.getScratchAllocation(); + auto scratchSpaceController = queueCsr.getScratchSpaceController(); if (scratchAllocation) { - scratchAddress = scratchAllocation->getGpuAddress(); + scratchAddress = ssh.getGpuBase() + scratchSpaceController->getScratchPatchAddress(); + } else { + auto requiredScratchSlot0Size = queueCsr.getRequiredScratchSlot0Size(); + auto requiredScratchSlot1Size = queueCsr.getRequiredScratchSlot1Size(); + bool stateBaseAddressDirty = false; + bool checkVfeStateDirty = false; + + if (requiredScratchSlot0Size || requiredScratchSlot1Size) { + + scratchSpaceController->setRequiredScratchSpace(ssh.getCpuBase(), + 0u, + requiredScratchSlot0Size, + requiredScratchSlot1Size, + queueCsr.peekTaskCount(), queueCsr.getOsContext(), + stateBaseAddressDirty, + checkVfeStateDirty); + + if (scratchSpaceController->getScratchSpaceSlot0Allocation()) { + queueCsr.makeResident(*scratchSpaceController->getScratchSpaceSlot0Allocation()); + } + if (scratchSpaceController->getScratchSpaceSlot1Allocation()) { + queueCsr.makeResident(*scratchSpaceController->getScratchSpaceSlot1Allocation()); + } + + scratchAddress = ssh.getGpuBase() + scratchSpaceController->getScratchPatchAddress(); + } } } diff --git a/shared/source/command_stream/command_stream_receiver.h b/shared/source/command_stream/command_stream_receiver.h index eb7aaf60f7..6017271693 100644 --- a/shared/source/command_stream/command_stream_receiver.h +++ b/shared/source/command_stream/command_stream_receiver.h @@ -447,6 +447,9 @@ class CommandStreamReceiver { return this->resourcesInitialized; } + uint32_t getRequiredScratchSlot0Size() { return requiredScratchSlot0Size; } + uint32_t getRequiredScratchSlot1Size() { return requiredScratchSlot1Size; } + protected: void cleanupResources(); void printDeviceIndex(); diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index d4f7dc3ad8..2e8d371145 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -446,23 +446,25 @@ CompletionStamp CommandStreamReceiverHw::flushTask( bool stateBaseAddressDirty = false; bool checkVfeStateDirty = false; - if (ssh && (requiredScratchSlot0Size || requiredScratchSlot1Size)) { - scratchSpaceController->setRequiredScratchSpace(ssh->getCpuBase(), - 0u, - requiredScratchSlot0Size, - requiredScratchSlot1Size, - this->taskCount, - *this->osContext, - stateBaseAddressDirty, - checkVfeStateDirty); - if (checkVfeStateDirty) { - setMediaVFEStateDirty(true); - } - if (scratchSpaceController->getScratchSpaceSlot0Allocation()) { - makeResident(*scratchSpaceController->getScratchSpaceSlot0Allocation()); - } - if (scratchSpaceController->getScratchSpaceSlot1Allocation()) { - makeResident(*scratchSpaceController->getScratchSpaceSlot1Allocation()); + if (heaplessModeEnabled == false) { + if (ssh && (requiredScratchSlot0Size || requiredScratchSlot1Size)) { + scratchSpaceController->setRequiredScratchSpace(ssh->getCpuBase(), + 0u, + requiredScratchSlot0Size, + requiredScratchSlot1Size, + this->taskCount, + *this->osContext, + stateBaseAddressDirty, + checkVfeStateDirty); + if (checkVfeStateDirty) { + setMediaVFEStateDirty(true); + } + if (scratchSpaceController->getScratchSpaceSlot0Allocation()) { + makeResident(*scratchSpaceController->getScratchSpaceSlot0Allocation()); + } + if (scratchSpaceController->getScratchSpaceSlot1Allocation()) { + makeResident(*scratchSpaceController->getScratchSpaceSlot1Allocation()); + } } }