fix: fix scratch programming in heapless mode

Related-To: NEO-10107

Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
This commit is contained in:
Kamil Kopryk 2024-01-23 14:32:18 +00:00 committed by Compute-Runtime-Automation
parent 7bbe57c671
commit 87d13fcb6e
4 changed files with 53 additions and 21 deletions

View File

@ -550,6 +550,8 @@ void CommandQueueHw<GfxFamily>::processDispatchForKernels(const MultiDispatchInf
dispatchWalkerArgs.event = event;
dispatchWalkerArgs.relaxedOrderingEnabled = relaxedOrderingEnabled;
getGpgpuCommandStreamReceiver().setRequiredScratchSizes(multiDispatchInfo.getRequiredScratchSize(0u), multiDispatchInfo.getRequiredScratchSize(1u));
HardwareInterface<GfxFamily>::dispatchWalkerCommon(*this, multiDispatchInfo, csrDeps, dispatchWalkerArgs);
if (debugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
@ -559,8 +561,6 @@ void CommandQueueHw<GfxFamily>::processDispatchForKernels(const MultiDispatchInf
}
}
}
getGpgpuCommandStreamReceiver().setRequiredScratchSizes(multiDispatchInfo.getRequiredScratchSize(0u), multiDispatchInfo.getRequiredScratchSize(1u));
}
template <typename GfxFamily>

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2023 Intel Corporation
* Copyright (C) 2021-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -9,6 +9,7 @@
#include "shared/source/command_container/command_encoder.h"
#include "shared/source/command_container/implicit_scaling.h"
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/command_stream/scratch_space_controller.h"
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/device/device.h"
#include "shared/source/helpers/definitions/command_encoder_args.h"
@ -111,8 +112,34 @@ inline void HardwareInterface<GfxFamily>::programWalker(
if constexpr (heaplessModeEnabled) {
auto scratchAllocation = queueCsr.getScratchAllocation();
auto scratchSpaceController = queueCsr.getScratchSpaceController();
if (scratchAllocation) {
scratchAddress = scratchAllocation->getGpuAddress();
scratchAddress = ssh.getGpuBase() + scratchSpaceController->getScratchPatchAddress();
} else {
auto requiredScratchSlot0Size = queueCsr.getRequiredScratchSlot0Size();
auto requiredScratchSlot1Size = queueCsr.getRequiredScratchSlot1Size();
bool stateBaseAddressDirty = false;
bool checkVfeStateDirty = false;
if (requiredScratchSlot0Size || requiredScratchSlot1Size) {
scratchSpaceController->setRequiredScratchSpace(ssh.getCpuBase(),
0u,
requiredScratchSlot0Size,
requiredScratchSlot1Size,
queueCsr.peekTaskCount(), queueCsr.getOsContext(),
stateBaseAddressDirty,
checkVfeStateDirty);
if (scratchSpaceController->getScratchSpaceSlot0Allocation()) {
queueCsr.makeResident(*scratchSpaceController->getScratchSpaceSlot0Allocation());
}
if (scratchSpaceController->getScratchSpaceSlot1Allocation()) {
queueCsr.makeResident(*scratchSpaceController->getScratchSpaceSlot1Allocation());
}
scratchAddress = ssh.getGpuBase() + scratchSpaceController->getScratchPatchAddress();
}
}
}

View File

@ -447,6 +447,9 @@ class CommandStreamReceiver {
return this->resourcesInitialized;
}
uint32_t getRequiredScratchSlot0Size() { return requiredScratchSlot0Size; }
uint32_t getRequiredScratchSlot1Size() { return requiredScratchSlot1Size; }
protected:
void cleanupResources();
void printDeviceIndex();

View File

@ -446,6 +446,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
bool stateBaseAddressDirty = false;
bool checkVfeStateDirty = false;
if (heaplessModeEnabled == false) {
if (ssh && (requiredScratchSlot0Size || requiredScratchSlot1Size)) {
scratchSpaceController->setRequiredScratchSpace(ssh->getCpuBase(),
0u,
@ -465,6 +466,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
makeResident(*scratchSpaceController->getScratchSpaceSlot1Allocation());
}
}
}
if (dispatchFlags.usePerDssBackedBuffer) {
if (!perDssBackedBuffer) {