Add state base address properties tracking for command lists

Related-To: NEO-5055

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz 2023-01-26 14:33:36 +00:00 committed by Compute-Runtime-Automation
parent 2351b0f97d
commit 34b8f08fc6
40 changed files with 851 additions and 46 deletions

View File

@ -351,6 +351,7 @@ struct CommandList : _ze_command_list_handle_t {
bool pipelineSelectStateTracking = false;
bool stateComputeModeTracking = false;
bool signalAllEventPackets = false;
bool stateBaseAddressTracking = false;
};
using CommandListAllocatorFn = CommandList *(*)(uint32_t);

View File

@ -259,6 +259,7 @@ struct CommandListCoreFamily : CommandListImp {
ze_result_t prepareIndirectParams(const ze_group_count_t *threadGroupDimensions);
void updateStreamProperties(Kernel &kernel, bool isCooperative);
void updateStateBaseAddressStreamProperties(Kernel &kernel, bool updateRequiredState, bool captureBaseAddressState);
void clearCommandsToPatch();
size_t getTotalSizeForCopyRegion(const ze_copy_region_t *region, uint32_t pitch, uint32_t slicePitch);
@ -307,7 +308,19 @@ struct CommandListCoreFamily : CommandListImp {
void dispatchEventRemainingPacketsPostSyncOperation(Event *event);
void dispatchEventPostSyncOperation(Event *event, uint32_t value, bool omitFirstOperation, bool useMax, bool useLastPipeControl);
int64_t currentSurfaceStateBaseAddress = -1;
int64_t currentDynamicStateBaseAddress = -1;
int64_t currentIndirectObjectBaseAddress = -1;
int64_t currentBindingTablePoolBaseAddress = -1;
size_t currentSurfaceStateSize = std::numeric_limits<size_t>::max();
size_t currentDynamicStateSize = std::numeric_limits<size_t>::max();
size_t currentIndirectObjectSize = std::numeric_limits<size_t>::max();
size_t currentBindingTablePoolSize = std::numeric_limits<size_t>::max();
size_t cmdListCurrentStartOffset = 0;
int32_t currentMocsState = -1;
bool containsAnyKernel = false;
bool pipeControlMultiKernelEventSync = false;
bool compactL3FlushEventPacket = false;

View File

@ -8,7 +8,6 @@
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/command_container/encode_surface_state.h"
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/command_stream/linear_stream.h"
#include "shared/source/command_stream/preemption.h"
#include "shared/source/debugger/debugger_l0.h"
#include "shared/source/device/device.h"
@ -60,9 +59,6 @@
namespace L0 {
template <GFXCORE_FAMILY gfxCoreFamily>
struct EncodeStateBaseAddress;
inline ze_result_t parseErrorCode(NEO::CommandContainer::ErrorCode returnValue) {
switch (returnValue) {
case NEO::CommandContainer::ErrorCode::OUT_OF_DEVICE_MEMORY:
@ -121,6 +117,17 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::reset() {
this->ownedPrivateAllocations.clear();
cmdListCurrentStartOffset = 0;
this->returnPoints.clear();
currentSurfaceStateBaseAddress = -1;
currentDynamicStateBaseAddress = -1;
currentIndirectObjectBaseAddress = -1;
currentBindingTablePoolBaseAddress = -1;
currentSurfaceStateSize = std::numeric_limits<size_t>::max();
currentDynamicStateSize = std::numeric_limits<size_t>::max();
currentIndirectObjectSize = std::numeric_limits<size_t>::max();
currentBindingTablePoolSize = std::numeric_limits<size_t>::max();
return ZE_RESULT_SUCCESS;
}
@ -144,6 +151,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
this->stateComputeModeTracking = L0GfxCoreHelper::enableStateComputeModeTracking(rootDeviceEnvironment);
this->frontEndStateTracking = L0GfxCoreHelper::enableFrontEndStateTracking(rootDeviceEnvironment);
this->pipelineSelectStateTracking = L0GfxCoreHelper::enablePipelineSelectStateTracking(rootDeviceEnvironment);
this->stateBaseAddressTracking = L0GfxCoreHelper::enableStateBaseAddressTracking(rootDeviceEnvironment);
this->pipeControlMultiKernelEventSync = L0GfxCoreHelper::usePipeControlMultiKernelEventSync(hwInfo);
this->compactL3FlushEventPacket = L0GfxCoreHelper::useCompactL3FlushEventPacket(hwInfo);
this->signalAllEventPackets = L0GfxCoreHelper::useSignalAllEventPackets(hwInfo);
@ -2323,17 +2331,61 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::prepareIndirectParams(const ze
return ZE_RESULT_SUCCESS;
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::updateStateBaseAddressStreamProperties(Kernel &kernel, bool updateRequiredState, bool captureBaseAddressState) {
KernelImp &kernelImp = static_cast<KernelImp &>(kernel);
auto &rootDeviceEnvironment = device->getNEODevice()->getRootDeviceEnvironment();
if (captureBaseAddressState) {
currentMocsState = static_cast<int32_t>(device->getMOCS(!kernelImp.getKernelRequiresUncachedMocs(), false) >> 1);
auto ssh = commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::SURFACE_STATE);
currentSurfaceStateBaseAddress = ssh->getHeapGpuBase();
currentSurfaceStateSize = ssh->getHeapSizeInPages();
currentBindingTablePoolBaseAddress = currentSurfaceStateBaseAddress;
currentBindingTablePoolSize = currentSurfaceStateSize;
auto dsh = commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::DYNAMIC_STATE);
if (dsh != nullptr) {
currentDynamicStateBaseAddress = dsh->getHeapGpuBase();
currentDynamicStateSize = dsh->getHeapSizeInPages();
}
auto ioh = commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::INDIRECT_OBJECT);
currentIndirectObjectBaseAddress = ioh->getHeapGpuBase();
currentIndirectObjectSize = ioh->getHeapSizeInPages();
}
auto sbaStreamState = &finalStreamState.stateBaseAddress;
if (updateRequiredState) {
sbaStreamState = &requiredStreamState.stateBaseAddress;
}
sbaStreamState->setProperties(kernelImp.getKernelDescriptor().kernelAttributes.flags.useGlobalAtomics, currentMocsState,
currentBindingTablePoolBaseAddress, currentBindingTablePoolSize,
currentSurfaceStateBaseAddress, currentSurfaceStateSize,
currentDynamicStateBaseAddress, currentDynamicStateSize,
currentIndirectObjectBaseAddress, currentIndirectObjectSize,
rootDeviceEnvironment);
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel, bool isCooperative) {
using VFE_STATE_TYPE = typename GfxFamily::VFE_STATE_TYPE;
auto &hwInfo = device->getHwInfo();
auto &rootDeviceEnvironment = device->getNEODevice()->getRootDeviceEnvironment();
auto &kernelAttributes = kernel.getKernelDescriptor().kernelAttributes;
bool captureBaseAddressState = containsAnyKernel;
if (!containsAnyKernel) {
requiredStreamState.frontEndState.setProperties(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, true, -1, rootDeviceEnvironment);
requiredStreamState.pipelineSelect.setProperties(true, false, kernelAttributes.flags.usesSystolicPipelineSelectMode, rootDeviceEnvironment);
if (!this->isFlushTaskSubmissionEnabled) {
updateStateBaseAddressStreamProperties(kernel, true, true);
}
if (this->stateComputeModeTracking) {
requiredStreamState.stateComputeMode.setProperties(false, kernelAttributes.numGrfRequired, kernelAttributes.threadArbitrationPolicy, device->getDevicePreemptionMode(), rootDeviceEnvironment);
finalStreamState = requiredStreamState;
@ -2342,6 +2394,7 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel
requiredStreamState.stateComputeMode.setProperties(false, kernelAttributes.numGrfRequired, kernelAttributes.threadArbitrationPolicy, device->getDevicePreemptionMode(), rootDeviceEnvironment);
}
containsAnyKernel = true;
captureBaseAddressState = false;
}
auto logicalStateHelperBlock = !getLogicalStateHelper();
@ -2361,10 +2414,10 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel
bool isPatchingVfeStateAllowed = NEO::DebugManager.flags.AllowPatchingVfeStateInCommandLists.get();
if (finalStreamState.frontEndState.isDirty() && logicalStateHelperBlock) {
if (isPatchingVfeStateAllowed) {
auto pVfeStateAddress = NEO::PreambleHelper<GfxFamily>::getSpaceForVfeState(commandContainer.getCommandStream(), hwInfo, engineGroupType);
auto pVfeState = new VFE_STATE_TYPE;
NEO::PreambleHelper<GfxFamily>::programVfeState(pVfeState, rootDeviceEnvironment, 0, 0, device->getMaxNumHwThreads(), finalStreamState, nullptr);
commandsToPatch.push_back({pVfeStateAddress, pVfeState, CommandToPatch::FrontEndState});
auto frontEndStateAddress = NEO::PreambleHelper<GfxFamily>::getSpaceForVfeState(commandContainer.getCommandStream(), device->getHwInfo(), engineGroupType);
auto frontEndStateCmd = new VFE_STATE_TYPE;
NEO::PreambleHelper<GfxFamily>::programVfeState(frontEndStateCmd, rootDeviceEnvironment, 0, 0, device->getMaxNumHwThreads(), finalStreamState, nullptr);
commandsToPatch.push_back({frontEndStateAddress, frontEndStateCmd, CommandToPatch::FrontEndState});
}
if (this->frontEndStateTracking) {
auto &stream = *commandContainer.getCommandStream();
@ -2389,6 +2442,10 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel
NEO::EncodeComputeMode<GfxFamily>::programComputeModeCommandWithSynchronization(
*commandContainer.getCommandStream(), finalStreamState.stateComputeMode, pipelineSelectArgs, false, rootDeviceEnvironment, isRcs, this->dcFlushSupport, nullptr);
}
if (!this->isFlushTaskSubmissionEnabled) {
updateStateBaseAddressStreamProperties(kernel, false, captureBaseAddressState);
}
}
template <GFXCORE_FAMILY gfxCoreFamily>
@ -2665,9 +2722,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteToMemory(void *desc
UNRECOVERABLE_IF(dstAllocationStruct.alloc == nullptr);
commandContainer.addToResidencyContainer(dstAllocationStruct.alloc);
NEO::PipeControlArgs args;
args.dcFlushEnable = NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(descriptor->writeScope, device->getNEODevice()->getRootDeviceEnvironment());
args.dcFlushEnable &= dstAllocationStruct.needsFlush;
const uint64_t gpuAddress = static_cast<uint64_t>(dstAllocationStruct.alignedAllocationPtr);
if (isCopyOnly()) {
@ -2677,6 +2731,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteToMemory(void *desc
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), gpuAddress,
data, args, productHelper);
} else {
NEO::PipeControlArgs args;
args.dcFlushEnable = getDcFlushRequired(!!descriptor->writeScope);
args.dcFlushEnable &= dstAllocationStruct.needsFlush;
NEO::MemorySynchronizationCommands<GfxFamily>::addBarrierWithPostSyncOperation(
*commandContainer.getCommandStream(),
NEO::PostSyncMode::ImmediateData,
@ -2756,7 +2814,6 @@ void CommandListCoreFamily<gfxCoreFamily>::dispatchPostSyncCommands(const CmdLis
}
if (useLastPipeControl) {
NEO::PipeControlArgs pipeControlArgs;
pipeControlArgs.dcFlushEnable = getDcFlushRequired(signalScope);
pipeControlArgs.workloadPartitionOffset = eventOperations.workPartitionOperation;
@ -2797,7 +2854,7 @@ void CommandListCoreFamily<gfxCoreFamily>::dispatchEventRemainingPacketsPostSync
uint64_t eventAddress = event->getCompletionFieldGpuAddress(device);
eventAddress += event->getSinglePacketSize() * event->getPacketsInUse();
bool appendLastPipeControl = false;
constexpr bool appendLastPipeControl = false;
dispatchPostSyncCommands(remainingPacketsOperation, eventAddress, Event::STATE_SIGNALED, appendLastPipeControl, event->isSignalScope());
}
}

View File

@ -46,11 +46,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
const auto kernelImmutableData = kernel->getImmutableData();
if (this->immediateCmdListHeapSharing) {
if (this->immediateCmdListHeapSharing || this->stateBaseAddressTracking) {
auto kernelInfo = kernelImmutableData->getKernelInfo();
commandContainer.ensureHeapSizePrepared(
NEO::EncodeDispatchKernel<GfxFamily>::getSizeRequiredSsh(*kernelInfo),
NEO::EncodeDispatchKernel<GfxFamily>::getSizeRequiredDsh(kernelDescriptor));
NEO::EncodeDispatchKernel<GfxFamily>::getSizeRequiredDsh(kernelDescriptor), true);
}
appendEventForProfiling(event, true);
auto perThreadScratchSize = std::max<std::uint32_t>(this->getCommandListPerThreadScratchSize(),

View File

@ -32,9 +32,6 @@
namespace L0 {
template <GFXCORE_FAMILY gfxCoreFamily>
struct EncodeStateBaseAddress;
template <GFXCORE_FAMILY gfxCoreFamily>
size_t CommandListCoreFamily<gfxCoreFamily>::getReserveSshSize() {
return 4 * MemoryConstants::pageSize;
@ -140,15 +137,16 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
if (kernelDescriptor.kernelAttributes.flags.isInvalid) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
if (this->immediateCmdListHeapSharing) {
bool getDsh = false;
if constexpr (GfxFamily::supportsSampler) {
getDsh = device->getDeviceInfo().imageSupport;
}
if (this->immediateCmdListHeapSharing || this->stateBaseAddressTracking) {
auto kernelInfo = kernelImmutableData->getKernelInfo();
size_t dshSize = 0;
if constexpr (GfxFamily::supportsSampler) {
dshSize = NEO::EncodeDispatchKernel<GfxFamily>::getSizeRequiredDsh(kernelDescriptor);
}
commandContainer.ensureHeapSizePrepared(
NEO::EncodeDispatchKernel<GfxFamily>::getSizeRequiredSsh(*kernelInfo),
dshSize);
NEO::EncodeDispatchKernel<GfxFamily>::getSizeRequiredDsh(kernelDescriptor), getDsh);
}
commandListPerThreadScratchSize = std::max<uint32_t>(commandListPerThreadScratchSize, kernelDescriptor.kernelAttributes.perThreadScratchSize[0]);
commandListPerThreadPrivateScratchSize = std::max<uint32_t>(commandListPerThreadPrivateScratchSize, kernelDescriptor.kernelAttributes.perThreadScratchSize[1]);
@ -245,12 +243,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
}
}
updateStreamProperties(*kernel, launchParams.isCooperative);
KernelImp *kernelImp = static_cast<KernelImp *>(kernel);
this->containsStatelessUncachedResource |= kernelImp->getKernelRequiresUncachedMocs();
this->requiresQueueUncachedMocs |= kernelImp->getKernelRequiresQueueUncachedMocs();
updateStreamProperties(*kernel, launchParams.isCooperative);
auto localMemSize = static_cast<uint32_t>(neoDevice->getDeviceInfo().localMemSize);
auto slmTotalSize = kernelImp->getSlmTotalSize();
if (slmTotalSize > 0 && localMemSize < slmTotalSize) {

View File

@ -88,6 +88,7 @@ ze_result_t CommandQueueImp::initialize(bool copyOnly, bool isInternal) {
this->stateComputeModeTracking = L0GfxCoreHelper::enableStateComputeModeTracking(rootDeviceEnvironment);
this->frontEndStateTracking = L0GfxCoreHelper::enableFrontEndStateTracking(rootDeviceEnvironment);
this->pipelineSelectStateTracking = L0GfxCoreHelper::enablePipelineSelectStateTracking(rootDeviceEnvironment);
this->stateBaseAddressTracking = L0GfxCoreHelper::enableStateBaseAddressTracking(rootDeviceEnvironment);
}
return returnValue;
}

View File

@ -70,6 +70,7 @@ struct CommandQueue : _ze_command_queue_handle_t {
bool frontEndStateTracking = false;
bool pipelineSelectStateTracking = false;
bool stateComputeModeTracking = false;
bool stateBaseAddressTracking = false;
};
using CommandQueueAllocatorFn = CommandQueue *(*)(Device *device, NEO::CommandStreamReceiver *csr,

View File

@ -188,7 +188,11 @@ struct CommandQueueHw : public CommandQueueImp {
NEO::StreamProperties &csrState,
const NEO::StreamProperties &cmdListRequired,
const NEO::StreamProperties &cmdListFinal);
inline void programRequiredStateBaseAddressForCommandList(CommandList *commandList,
NEO::LinearStream &commandStream,
NEO::StreamProperties &csrState,
const NEO::StreamProperties &cmdListRequired,
const NEO::StreamProperties &cmdListFinal);
inline void updateBaseAddressState(CommandList *lastCommandList);
size_t alignedChildStreamPadding{};

View File

@ -174,6 +174,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegular(
this->programOneCmdListPipelineSelect(commandList, child, csrStateProperties, requiredStreamState, finalStreamState);
this->programOneCmdListFrontEndIfDirty(ctx, child, csrStateProperties, requiredStreamState, finalStreamState);
this->programRequiredStateComputeModeForCommandList(commandList, child, csrStateProperties, requiredStreamState, finalStreamState);
this->programRequiredStateBaseAddressForCommandList(commandList, child, csrStateProperties, requiredStreamState, finalStreamState);
this->patchCommands(*commandList, this->csr->getScratchSpaceController()->getScratchPatchAddress());
this->programOneCmdListBatchBufferStart(commandList, child, ctx);
@ -1212,6 +1213,22 @@ void CommandQueueHw<gfxCoreFamily>::programRequiredStateComputeModeForCommandLis
csrState.stateComputeMode.setProperties(cmdListFinal.stateComputeMode);
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandQueueHw<gfxCoreFamily>::programRequiredStateBaseAddressForCommandList(CommandList *commandList,
NEO::LinearStream &commandStream,
NEO::StreamProperties &csrState,
const NEO::StreamProperties &cmdListRequired,
const NEO::StreamProperties &cmdListFinal) {
if (!this->stateBaseAddressTracking) {
return;
}
csrState.stateBaseAddress.setProperties(cmdListRequired.stateBaseAddress);
csrState.stateBaseAddress.setProperties(cmdListFinal.stateBaseAddress);
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandQueueHw<gfxCoreFamily>::updateBaseAddressState(CommandList *lastCommandList) {
auto csrHw = static_cast<NEO::CommandStreamReceiverHw<GfxFamily> *>(csr);

View File

@ -47,6 +47,14 @@ bool L0GfxCoreHelper::enableStateComputeModeTracking(const NEO::RootDeviceEnviro
return l0GfxCoreHelper.platformSupportsStateComputeModeTracking();
}
bool L0GfxCoreHelper::enableStateBaseAddressTracking(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) {
if (NEO::DebugManager.flags.EnableStateBaseAddressTracking.get() != -1) {
return !!NEO::DebugManager.flags.EnableStateBaseAddressTracking.get();
}
auto &l0GfxCoreHelper = rootDeviceEnvironment.getHelper<L0GfxCoreHelper>();
return l0GfxCoreHelper.platformSupportsStateBaseAddressTracking();
}
bool L0GfxCoreHelper::enableImmediateCmdListHeapSharing(const NEO::RootDeviceEnvironment &rootDeviceEnvironment, bool cmdlistSupport) {
if (NEO::DebugManager.flags.EnableImmediateCmdListHeapSharing.get() != -1) {
return !!NEO::DebugManager.flags.EnableImmediateCmdListHeapSharing.get();

View File

@ -42,6 +42,7 @@ class L0GfxCoreHelper : public NEO::ApiGfxCoreHelper {
static bool enableFrontEndStateTracking(const NEO::RootDeviceEnvironment &rootDeviceEnvironment);
static bool enablePipelineSelectStateTracking(const NEO::RootDeviceEnvironment &rootDeviceEnvironment);
static bool enableStateComputeModeTracking(const NEO::RootDeviceEnvironment &rootDeviceEnvironment);
static bool enableStateBaseAddressTracking(const NEO::RootDeviceEnvironment &rootDeviceEnvironment);
static bool enableImmediateCmdListHeapSharing(const NEO::RootDeviceEnvironment &rootDeviceEnvironment, bool cmdlistSupport);
static bool usePipeControlMultiKernelEventSync(const NEO::HardwareInfo &hwInfo);
static bool useCompactL3FlushEventPacket(const NEO::HardwareInfo &hwInfo);
@ -63,6 +64,7 @@ class L0GfxCoreHelper : public NEO::ApiGfxCoreHelper {
virtual bool platformSupportsStateComputeModeTracking() const = 0;
virtual bool platformSupportsFrontEndTracking() const = 0;
virtual bool platformSupportsPipelineSelectTracking() const = 0;
virtual bool platformSupportsStateBaseAddressTracking() const = 0;
virtual bool platformSupportsRayTracing() const = 0;
virtual bool isZebinAllowed(const NEO::Debugger *debugger) const = 0;
virtual uint32_t getEventMaxKernelCount(const NEO::HardwareInfo &hwInfo) const = 0;
@ -95,6 +97,7 @@ class L0GfxCoreHelperHw : public L0GfxCoreHelper {
bool platformSupportsStateComputeModeTracking() const override;
bool platformSupportsFrontEndTracking() const override;
bool platformSupportsPipelineSelectTracking() const override;
bool platformSupportsStateBaseAddressTracking() const override;
bool platformSupportsRayTracing() const override;
bool isZebinAllowed(const NEO::Debugger *debugger) const override;
uint32_t getEventMaxKernelCount(const NEO::HardwareInfo &hwInfo) const override;

View File

@ -29,6 +29,11 @@ bool L0GfxCoreHelperHw<Family>::platformSupportsPipelineSelectTracking() const {
return false;
}
template <typename Family>
bool L0GfxCoreHelperHw<Family>::platformSupportsStateBaseAddressTracking() const {
return false;
}
template <typename Family>
uint32_t L0GfxCoreHelperHw<Family>::getEventMaxKernelCount(const NEO::HardwareInfo &hwInfo) const {
return 1;

View File

@ -32,6 +32,11 @@ bool L0GfxCoreHelperHw<Family>::platformSupportsPipelineSelectTracking() const {
return true;
}
template <typename Family>
bool L0GfxCoreHelperHw<Family>::platformSupportsStateBaseAddressTracking() const {
return false;
}
template <typename Family>
uint32_t L0GfxCoreHelperHw<Family>::getEventMaxKernelCount(const NEO::HardwareInfo &hwInfo) const {
uint32_t kernelCount = EventPacketsCount::maxKernelSplit;

View File

@ -217,8 +217,8 @@ struct KernelImp : Kernel {
uint32_t requiredWorkgroupOrder = 0u;
bool kernelRequiresGenerationOfLocalIdsByRuntime = true;
uint32_t kernelRequiresUncachedMocsCount = false;
uint32_t kernelRequiresQueueUncachedMocsCount = false;
uint32_t kernelRequiresUncachedMocsCount = 0;
uint32_t kernelRequiresQueueUncachedMocsCount = 0;
std::vector<bool> isArgUncached;
uint32_t globalOffsets[3] = {};

View File

@ -146,6 +146,20 @@ void CmdListStateComputeModeStateFixture::setUp() {
ModuleMutableCommandListFixture::setUp();
}
void CommandListStateBaseAddressFixture::setUp() {
DebugManager.flags.EnableStateBaseAddressTracking.set(1);
ModuleMutableCommandListFixture::setUp();
mockKernelImmData->kernelDescriptor->payloadMappings.samplerTable.numSamplers = 1;
mockKernelImmData->kernelDescriptor->payloadMappings.samplerTable.tableOffset = 16;
mockKernelImmData->kernelDescriptor->payloadMappings.samplerTable.borderColor = 0;
kernel->dynamicStateHeapData.reset(new uint8_t[512]);
}
uint32_t CommandListStateBaseAddressFixture::getMocs(bool l3On) {
return device->getMOCS(l3On, false) >> 1;
}
void ImmediateCmdListSharedHeapsFixture::setUp() {
DebugManager.flags.EnableFlushTaskSubmission.set(1);
DebugManager.flags.EnableImmediateCmdListHeapSharing.set(1);

View File

@ -120,6 +120,13 @@ struct CmdListLargeGrfFixture : public CmdListStateComputeModeStateFixture {
void testBody();
};
struct CommandListStateBaseAddressFixture : public ModuleMutableCommandListFixture {
void setUp();
uint32_t getMocs(bool l3On);
DebugManagerStateRestore restorer;
};
struct ImmediateCmdListSharedHeapsFixture : public ModuleMutableCommandListFixture {
void setUp();

View File

@ -36,6 +36,11 @@ GEN11TEST_F(L0GfxCoreHelperTestGen11, GivenGen11WhenCheckingL0HelperForPipelineS
EXPECT_FALSE(l0GfxCoreHelper.platformSupportsPipelineSelectTracking());
}
GEN11TEST_F(L0GfxCoreHelperTestGen11, GivenGen11WhenCheckingL0HelperForStateBaseAddressTrackingSupportThenReturnFalse) {
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
EXPECT_FALSE(l0GfxCoreHelper.platformSupportsStateBaseAddressTracking());
}
GEN11TEST_F(L0GfxCoreHelperTestGen11, GivenGen11WhenCheckingL0HelperForRayTracingSupportThenReturnFalse) {
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
EXPECT_FALSE(l0GfxCoreHelper.platformSupportsRayTracing());

View File

@ -51,6 +51,12 @@ GEN12LPTEST_F(L0GfxCoreHelperTestGen12Lp, GivenGen12LpWhenCheckingL0HelperForPip
EXPECT_FALSE(l0GfxCoreHelper.platformSupportsPipelineSelectTracking());
}
GEN12LPTEST_F(L0GfxCoreHelperTestGen12Lp, GivenGen12LpWhenCheckingL0HelperForStateBaseAddressTrackingSupportThenReturnFalse) {
MockExecutionEnvironment executionEnvironment;
auto &l0GfxCoreHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper<L0GfxCoreHelper>();
EXPECT_FALSE(l0GfxCoreHelper.platformSupportsStateBaseAddressTracking());
}
GEN12LPTEST_F(L0GfxCoreHelperTestGen12Lp, GivenGen12LpWhenCheckingL0HelperForRayTracingSupportThenReturnFalse) {
MockExecutionEnvironment executionEnvironment;
auto &l0GfxCoreHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper<L0GfxCoreHelper>();

View File

@ -36,6 +36,11 @@ GEN9TEST_F(L0GfxCoreHelperTestGen9, GivenGen9WhenCheckingL0HelperForPipelineSele
EXPECT_FALSE(l0GfxCoreHelper.platformSupportsPipelineSelectTracking());
}
GEN9TEST_F(L0GfxCoreHelperTestGen9, GivenGen9WhenCheckingL0HelperForStateBaseAddressTrackingSupportThenReturnFalse) {
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
EXPECT_FALSE(l0GfxCoreHelper.platformSupportsStateBaseAddressTracking());
}
GEN9TEST_F(L0GfxCoreHelperTestGen9, GivenGen9WhenCheckingL0HelperForRayTracingSupportThenReturnFalse) {
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
EXPECT_FALSE(l0GfxCoreHelper.platformSupportsRayTracing());

View File

@ -73,6 +73,7 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
using BaseClass::requiredStreamState;
using BaseClass::setupTimestampEventForMultiTile;
using BaseClass::signalAllEventPackets;
using BaseClass::stateBaseAddressTracking;
using BaseClass::stateComputeModeTracking;
using BaseClass::unifiedMemoryControls;
using BaseClass::updateStreamProperties;
@ -145,6 +146,7 @@ struct WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>
using BaseClass::pipelineSelectStateTracking;
using BaseClass::requiredStreamState;
using BaseClass::signalAllEventPackets;
using BaseClass::stateBaseAddressTracking;
using BaseClass::stateComputeModeTracking;
WhiteBox() : BaseClass(BaseClass::defaultNumIddsPerBlock) {}
@ -155,6 +157,7 @@ struct MockCommandListImmediate : public CommandListCoreFamilyImmediate<gfxCoreF
using BaseClass = CommandListCoreFamilyImmediate<gfxCoreFamily>;
using BaseClass::compactL3FlushEventPacket;
using BaseClass::containsAnyKernel;
using BaseClass::finalStreamState;
using BaseClass::immediateCmdListHeapSharing;
using BaseClass::indirectAllocationsAllowed;
using BaseClass::pipeControlMultiKernelEventSync;
@ -169,6 +172,7 @@ struct WhiteBox<::L0::CommandList> : public ::L0::CommandListImp {
using BaseClass::commandContainer;
using BaseClass::commandListPreemptionMode;
using BaseClass::csr;
using BaseClass::finalStreamState;
using BaseClass::frontEndStateTracking;
using BaseClass::getDcFlushRequired;
using BaseClass::immediateCmdListHeapSharing;
@ -177,7 +181,9 @@ struct WhiteBox<::L0::CommandList> : public ::L0::CommandListImp {
using BaseClass::nonImmediateLogicalStateHelper;
using BaseClass::partitionCount;
using BaseClass::pipelineSelectStateTracking;
using BaseClass::requiredStreamState;
using BaseClass::signalAllEventPackets;
using BaseClass::stateBaseAddressTracking;
using BaseClass::stateComputeModeTracking;
WhiteBox(Device *device);

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2022 Intel Corporation
* Copyright (C) 2020-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -37,6 +37,7 @@ struct WhiteBox<::L0::CommandQueue> : public ::L0::CommandQueueImp {
using CommandQueue::internalUsage;
using CommandQueue::partitionCount;
using CommandQueue::pipelineSelectStateTracking;
using CommandQueue::stateBaseAddressTracking;
using CommandQueue::stateComputeModeTracking;
WhiteBox(Device *device, NEO::CommandStreamReceiver *csr,
@ -71,6 +72,7 @@ struct MockCommandQueueHw : public L0::CommandQueueHw<gfxCoreFamily> {
using L0::CommandQueue::partitionCount;
using L0::CommandQueue::pipelineSelectStateTracking;
using L0::CommandQueue::preemptionCmdSyncProgramming;
using L0::CommandQueue::stateBaseAddressTracking;
using L0::CommandQueue::stateComputeModeTracking;
using L0::CommandQueueImp::csr;
using typename BaseClass::CommandListExecutionContext;

View File

@ -2365,6 +2365,9 @@ TEST_F(CommandListCreate, givenCreatedCommandListWhenGettingTrackingFlagsThenDef
bool expectedFrontEndTracking = l0GfxCoreHelper.platformSupportsFrontEndTracking();
EXPECT_EQ(expectedFrontEndTracking, commandList->frontEndStateTracking);
bool expectedStateBaseAddressTracking = l0GfxCoreHelper.platformSupportsStateBaseAddressTracking();
EXPECT_EQ(expectedStateBaseAddressTracking, commandList->stateBaseAddressTracking);
}
} // namespace ult

View File

@ -16,7 +16,6 @@
#include "level_zero/core/source/hw_helpers/l0_gfx_core_helper.h"
#include "level_zero/core/source/image/image_hw.h"
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"

View File

@ -7,12 +7,14 @@
#include "shared/source/helpers/aligned_memory.h"
#include "shared/source/helpers/hw_helper.h"
#include "shared/source/indirect_heap/indirect_heap.h"
#include "shared/source/kernel/implicit_args.h"
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
#include "shared/test/common/helpers/unit_test_helper.h"
#include "shared/test/common/test_macros/hw_test.h"
#include "level_zero/core/source/kernel/kernel_imp.h"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_builtin_functions_lib_impl_timestamps.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
#include "level_zero/core/test/unit_tests/mocks/mock_device_for_spirv.h"
@ -901,5 +903,552 @@ HWTEST2_F(CommandListCreate, givenAllValuesTbxAndSyncModeFlagsWhenCheckingWaitli
EXPECT_TRUE(cmdList.eventWaitlistSyncRequired());
}
using CommandListStateBaseAddressTest = Test<CommandListStateBaseAddressFixture>;
HWTEST2_F(CommandListStateBaseAddressTest,
givenStateBaseAddressTrackingWhenRegularCmdListAppendKernelAndExecuteThenBaseAddressStateIsStoredInCsr,
IsAtLeastSkl) {
NEO::StateBaseAddressPropertiesSupport sbaPropertiesSupport = {};
auto &productHelper = device->getProductHelper();
productHelper.fillStateBaseAddressPropertiesSupportStructure(sbaPropertiesSupport);
EXPECT_TRUE(commandList->stateBaseAddressTracking);
auto &container = commandList->commandContainer;
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto sshHeap = container.getIndirectHeap(NEO::HeapType::SURFACE_STATE);
auto ssBaseAddress = sshHeap->getHeapGpuBase();
auto ssSize = sshHeap->getHeapSizeInPages();
uint64_t dsBaseAddress = -1;
size_t dsSize = static_cast<size_t>(-1);
auto dshHeap = container.getIndirectHeap(NEO::HeapType::DYNAMIC_STATE);
if (NEO::UnitTestHelper<FamilyType>::expectNullDsh(device->getDeviceInfo())) {
EXPECT_EQ(nullptr, dshHeap);
} else {
EXPECT_NE(nullptr, dshHeap);
}
if (dshHeap) {
dsBaseAddress = dshHeap->getHeapGpuBase();
dsSize = dshHeap->getHeapSizeInPages();
}
auto ioBaseAddress = container.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT)->getHeapGpuBase();
auto ioSize = container.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT)->getHeapSizeInPages();
auto statlessMocs = device->getMOCS(true, false) >> 1;
auto &requiredState = commandList->requiredStreamState.stateBaseAddress;
auto &finalState = commandList->finalStreamState.stateBaseAddress;
EXPECT_EQ(static_cast<int32_t>(statlessMocs), requiredState.statelessMocs.value);
EXPECT_EQ(static_cast<int64_t>(ssBaseAddress), requiredState.surfaceStateBaseAddress.value);
EXPECT_EQ(ssSize, requiredState.surfaceStateSize.value);
EXPECT_EQ(static_cast<int64_t>(dsBaseAddress), requiredState.dynamicStateBaseAddress.value);
EXPECT_EQ(dsSize, requiredState.dynamicStateSize.value);
EXPECT_EQ(static_cast<int64_t>(ioBaseAddress), requiredState.indirectObjectBaseAddress.value);
EXPECT_EQ(ioSize, requiredState.indirectObjectSize.value);
if (sbaPropertiesSupport.bindingTablePoolBaseAddress) {
EXPECT_EQ(static_cast<int64_t>(ssBaseAddress), requiredState.bindingTablePoolBaseAddress.value);
EXPECT_EQ(ssSize, requiredState.bindingTablePoolSize.value);
} else {
EXPECT_EQ(-1, requiredState.bindingTablePoolBaseAddress.value);
EXPECT_EQ(static_cast<size_t>(-1), requiredState.bindingTablePoolSize.value);
}
EXPECT_EQ(finalState.surfaceStateBaseAddress.value, requiredState.surfaceStateBaseAddress.value);
EXPECT_EQ(finalState.surfaceStateSize.value, requiredState.surfaceStateSize.value);
EXPECT_EQ(finalState.dynamicStateBaseAddress.value, requiredState.dynamicStateBaseAddress.value);
EXPECT_EQ(finalState.dynamicStateSize.value, requiredState.dynamicStateSize.value);
EXPECT_EQ(finalState.indirectObjectBaseAddress.value, requiredState.indirectObjectBaseAddress.value);
EXPECT_EQ(finalState.indirectObjectSize.value, requiredState.indirectObjectSize.value);
EXPECT_EQ(finalState.bindingTablePoolBaseAddress.value, requiredState.bindingTablePoolBaseAddress.value);
EXPECT_EQ(finalState.bindingTablePoolSize.value, requiredState.bindingTablePoolSize.value);
EXPECT_EQ(finalState.globalAtomics.value, requiredState.globalAtomics.value);
EXPECT_EQ(finalState.statelessMocs.value, requiredState.statelessMocs.value);
result = commandList->close();
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
ze_command_list_handle_t cmdListHandle = commandList->toHandle();
result = commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, true);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto &csrState = commandQueue->getCsr()->getStreamProperties().stateBaseAddress;
EXPECT_EQ(csrState.surfaceStateBaseAddress.value, finalState.surfaceStateBaseAddress.value);
EXPECT_EQ(csrState.surfaceStateSize.value, finalState.surfaceStateSize.value);
EXPECT_EQ(csrState.dynamicStateBaseAddress.value, finalState.dynamicStateBaseAddress.value);
EXPECT_EQ(csrState.dynamicStateSize.value, finalState.dynamicStateSize.value);
EXPECT_EQ(csrState.indirectObjectBaseAddress.value, finalState.indirectObjectBaseAddress.value);
EXPECT_EQ(csrState.indirectObjectSize.value, finalState.indirectObjectSize.value);
EXPECT_EQ(csrState.bindingTablePoolBaseAddress.value, finalState.bindingTablePoolBaseAddress.value);
EXPECT_EQ(csrState.bindingTablePoolSize.value, finalState.bindingTablePoolSize.value);
EXPECT_EQ(csrState.globalAtomics.value, finalState.globalAtomics.value);
EXPECT_EQ(csrState.statelessMocs.value, finalState.statelessMocs.value);
}
HWTEST2_F(CommandListStateBaseAddressTest,
givenStateBaseAddressTrackingWhenRegularCmdListAppendKernelChangesHeapsAndExecuteThenFinalBaseAddressStateIsStoredInCsr,
IsAtLeastSkl) {
NEO::StateBaseAddressPropertiesSupport sbaPropertiesSupport = {};
auto &productHelper = device->getProductHelper();
productHelper.fillStateBaseAddressPropertiesSupportStructure(sbaPropertiesSupport);
EXPECT_TRUE(commandList->stateBaseAddressTracking);
auto &container = commandList->commandContainer;
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto sshHeap = container.getIndirectHeap(NEO::HeapType::SURFACE_STATE);
auto ssBaseAddress = sshHeap->getHeapGpuBase();
auto ssSize = sshHeap->getHeapSizeInPages();
uint64_t dsBaseAddress = -1;
size_t dsSize = static_cast<size_t>(-1);
auto dshHeap = container.getIndirectHeap(NEO::HeapType::DYNAMIC_STATE);
if (dshHeap) {
dsBaseAddress = dshHeap->getHeapGpuBase();
dsSize = dshHeap->getHeapSizeInPages();
}
auto &requiredState = commandList->requiredStreamState.stateBaseAddress;
auto &finalState = commandList->finalStreamState.stateBaseAddress;
EXPECT_EQ(static_cast<int64_t>(ssBaseAddress), requiredState.surfaceStateBaseAddress.value);
EXPECT_EQ(ssSize, requiredState.surfaceStateSize.value);
EXPECT_EQ(static_cast<int64_t>(dsBaseAddress), requiredState.dynamicStateBaseAddress.value);
EXPECT_EQ(dsSize, requiredState.dynamicStateSize.value);
EXPECT_EQ(finalState.surfaceStateBaseAddress.value, requiredState.surfaceStateBaseAddress.value);
EXPECT_EQ(finalState.surfaceStateSize.value, requiredState.surfaceStateSize.value);
EXPECT_EQ(finalState.dynamicStateBaseAddress.value, requiredState.dynamicStateBaseAddress.value);
EXPECT_EQ(finalState.dynamicStateSize.value, requiredState.dynamicStateSize.value);
sshHeap->getSpace(sshHeap->getAvailableSpace());
container.getHeapWithRequiredSizeAndAlignment(NEO::HeapType::SURFACE_STATE, sshHeap->getMaxAvailableSpace(), 0);
if (dshHeap) {
dshHeap->getSpace(dshHeap->getAvailableSpace());
container.getHeapWithRequiredSizeAndAlignment(NEO::HeapType::DYNAMIC_STATE, dshHeap->getMaxAvailableSpace(), 0);
}
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
ssBaseAddress = sshHeap->getGpuBase();
if (dshHeap) {
dsBaseAddress = dshHeap->getGpuBase();
}
EXPECT_NE(static_cast<int64_t>(ssBaseAddress), requiredState.surfaceStateBaseAddress.value);
if (dshHeap) {
EXPECT_NE(static_cast<int64_t>(dsBaseAddress), requiredState.dynamicStateBaseAddress.value);
} else {
EXPECT_EQ(static_cast<int64_t>(dsBaseAddress), requiredState.dynamicStateBaseAddress.value);
}
EXPECT_EQ(static_cast<int64_t>(ssBaseAddress), finalState.surfaceStateBaseAddress.value);
EXPECT_EQ(static_cast<int64_t>(dsBaseAddress), finalState.dynamicStateBaseAddress.value);
result = commandList->close();
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
ze_command_list_handle_t cmdListHandle = commandList->toHandle();
result = commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, true);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto &csrState = commandQueue->getCsr()->getStreamProperties().stateBaseAddress;
EXPECT_EQ(csrState.surfaceStateBaseAddress.value, finalState.surfaceStateBaseAddress.value);
EXPECT_EQ(csrState.surfaceStateSize.value, finalState.surfaceStateSize.value);
EXPECT_EQ(csrState.dynamicStateBaseAddress.value, finalState.dynamicStateBaseAddress.value);
EXPECT_EQ(csrState.dynamicStateSize.value, finalState.dynamicStateSize.value);
}
HWTEST2_F(CommandListStateBaseAddressTest,
givenStateBaseAddressTrackingWhenImmediateCmdListAppendKernelChangesHeapsAndExecuteThenFinalBaseAddressStateIsStoredInCsr,
IsAtLeastSkl) {
NEO::DebugManager.flags.DisableResourceRecycling.set(true);
NEO::StateBaseAddressPropertiesSupport sbaPropertiesSupport = {};
auto &productHelper = device->getProductHelper();
productHelper.fillStateBaseAddressPropertiesSupportStructure(sbaPropertiesSupport);
EXPECT_TRUE(commandListImmediate->stateBaseAddressTracking);
auto &container = commandListImmediate->commandContainer;
auto csrImmediate = commandListImmediate->csr;
auto &csrState = csrImmediate->getStreamProperties().stateBaseAddress;
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
auto result = commandListImmediate->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto sshHeap = container.getIndirectHeap(NEO::HeapType::SURFACE_STATE);
auto ssBaseAddress = sshHeap->getHeapGpuBase();
auto ssSize = sshHeap->getHeapSizeInPages();
uint64_t dsBaseAddress = -1;
size_t dsSize = static_cast<size_t>(-1);
auto dshHeap = container.getIndirectHeap(NEO::HeapType::DYNAMIC_STATE);
if (NEO::UnitTestHelper<FamilyType>::expectNullDsh(device->getDeviceInfo())) {
EXPECT_EQ(nullptr, dshHeap);
} else {
EXPECT_NE(nullptr, dshHeap);
}
if (dshHeap) {
dsBaseAddress = dshHeap->getHeapGpuBase();
dsSize = dshHeap->getHeapSizeInPages();
}
auto ioBaseAddress = container.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT)->getHeapGpuBase();
auto ioSize = container.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT)->getHeapSizeInPages();
auto statlessMocs = device->getMOCS(true, false) >> 1;
EXPECT_EQ(static_cast<int32_t>(statlessMocs), csrState.statelessMocs.value);
EXPECT_EQ(static_cast<int64_t>(ssBaseAddress), csrState.surfaceStateBaseAddress.value);
EXPECT_EQ(ssSize, csrState.surfaceStateSize.value);
EXPECT_EQ(static_cast<int64_t>(dsBaseAddress), csrState.dynamicStateBaseAddress.value);
EXPECT_EQ(dsSize, csrState.dynamicStateSize.value);
EXPECT_EQ(static_cast<int64_t>(ioBaseAddress), csrState.indirectObjectBaseAddress.value);
EXPECT_EQ(ioSize, csrState.indirectObjectSize.value);
if (sbaPropertiesSupport.bindingTablePoolBaseAddress) {
EXPECT_EQ(static_cast<int64_t>(ssBaseAddress), csrState.bindingTablePoolBaseAddress.value);
EXPECT_EQ(ssSize, csrState.bindingTablePoolSize.value);
} else {
EXPECT_EQ(-1, csrState.bindingTablePoolBaseAddress.value);
EXPECT_EQ(static_cast<size_t>(-1), csrState.bindingTablePoolSize.value);
}
sshHeap->getSpace(sshHeap->getAvailableSpace());
if (commandListImmediate->immediateCmdListHeapSharing) {
csrImmediate->getIndirectHeap(NEO::HeapType::SURFACE_STATE, sshHeap->getMaxAvailableSpace());
} else {
container.getHeapWithRequiredSizeAndAlignment(NEO::HeapType::SURFACE_STATE, sshHeap->getMaxAvailableSpace(), 0);
}
if (dshHeap) {
dshHeap->getSpace(dshHeap->getAvailableSpace());
if (commandListImmediate->immediateCmdListHeapSharing) {
csrImmediate->getIndirectHeap(NEO::HeapType::DYNAMIC_STATE, sshHeap->getMaxAvailableSpace());
} else {
container.getHeapWithRequiredSizeAndAlignment(NEO::HeapType::DYNAMIC_STATE, dshHeap->getMaxAvailableSpace(), 0);
}
}
result = commandListImmediate->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
ssBaseAddress = sshHeap->getGpuBase();
if (dshHeap) {
dsBaseAddress = dshHeap->getGpuBase();
}
EXPECT_EQ(static_cast<int64_t>(ssBaseAddress), csrState.surfaceStateBaseAddress.value);
EXPECT_EQ(static_cast<int64_t>(dsBaseAddress), csrState.dynamicStateBaseAddress.value);
}
HWTEST2_F(CommandListStateBaseAddressTest,
givenStateBaseAddressTrackingWhenRegularCmdListAppendKernelAndExecuteAndImmediateCmdListAppendKernelSharingCsrThenBaseAddressStateIsUpdatedInCsr,
IsAtLeastSkl) {
ASSERT_EQ(commandListImmediate->csr, commandQueue->getCsr());
NEO::StateBaseAddressPropertiesSupport sbaPropertiesSupport = {};
auto &productHelper = device->getProductHelper();
productHelper.fillStateBaseAddressPropertiesSupportStructure(sbaPropertiesSupport);
EXPECT_TRUE(commandList->stateBaseAddressTracking);
auto &container = commandList->commandContainer;
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto sshHeap = container.getIndirectHeap(NEO::HeapType::SURFACE_STATE);
auto ssBaseAddress = sshHeap->getHeapGpuBase();
auto ssSize = sshHeap->getHeapSizeInPages();
uint64_t dsBaseAddress = -1;
size_t dsSize = static_cast<size_t>(-1);
auto dshHeap = container.getIndirectHeap(NEO::HeapType::DYNAMIC_STATE);
if (dshHeap) {
dsBaseAddress = dshHeap->getHeapGpuBase();
dsSize = dshHeap->getHeapSizeInPages();
}
auto ioBaseAddress = container.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT)->getHeapGpuBase();
auto ioSize = container.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT)->getHeapSizeInPages();
auto statlessMocs = getMocs(true);
auto &requiredState = commandList->requiredStreamState.stateBaseAddress;
auto &finalState = commandList->finalStreamState.stateBaseAddress;
EXPECT_EQ(static_cast<int32_t>(statlessMocs), requiredState.statelessMocs.value);
EXPECT_EQ(static_cast<int64_t>(ssBaseAddress), requiredState.surfaceStateBaseAddress.value);
EXPECT_EQ(ssSize, requiredState.surfaceStateSize.value);
EXPECT_EQ(static_cast<int64_t>(dsBaseAddress), requiredState.dynamicStateBaseAddress.value);
EXPECT_EQ(dsSize, requiredState.dynamicStateSize.value);
EXPECT_EQ(static_cast<int64_t>(ioBaseAddress), requiredState.indirectObjectBaseAddress.value);
EXPECT_EQ(ioSize, requiredState.indirectObjectSize.value);
if (sbaPropertiesSupport.bindingTablePoolBaseAddress) {
EXPECT_EQ(static_cast<int64_t>(ssBaseAddress), requiredState.bindingTablePoolBaseAddress.value);
EXPECT_EQ(ssSize, requiredState.bindingTablePoolSize.value);
} else {
EXPECT_EQ(-1, requiredState.bindingTablePoolBaseAddress.value);
EXPECT_EQ(static_cast<size_t>(-1), requiredState.bindingTablePoolSize.value);
}
EXPECT_EQ(finalState.surfaceStateBaseAddress.value, requiredState.surfaceStateBaseAddress.value);
EXPECT_EQ(finalState.surfaceStateSize.value, requiredState.surfaceStateSize.value);
EXPECT_EQ(finalState.dynamicStateBaseAddress.value, requiredState.dynamicStateBaseAddress.value);
EXPECT_EQ(finalState.dynamicStateSize.value, requiredState.dynamicStateSize.value);
EXPECT_EQ(finalState.indirectObjectBaseAddress.value, requiredState.indirectObjectBaseAddress.value);
EXPECT_EQ(finalState.indirectObjectSize.value, requiredState.indirectObjectSize.value);
EXPECT_EQ(finalState.bindingTablePoolBaseAddress.value, requiredState.bindingTablePoolBaseAddress.value);
EXPECT_EQ(finalState.bindingTablePoolSize.value, requiredState.bindingTablePoolSize.value);
EXPECT_EQ(finalState.globalAtomics.value, requiredState.globalAtomics.value);
EXPECT_EQ(finalState.statelessMocs.value, requiredState.statelessMocs.value);
result = commandList->close();
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
ze_command_list_handle_t cmdListHandle = commandList->toHandle();
result = commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, true);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto &csrState = commandQueue->getCsr()->getStreamProperties().stateBaseAddress;
EXPECT_EQ(csrState.surfaceStateBaseAddress.value, finalState.surfaceStateBaseAddress.value);
EXPECT_EQ(csrState.surfaceStateSize.value, finalState.surfaceStateSize.value);
EXPECT_EQ(csrState.dynamicStateBaseAddress.value, finalState.dynamicStateBaseAddress.value);
EXPECT_EQ(csrState.dynamicStateSize.value, finalState.dynamicStateSize.value);
EXPECT_EQ(csrState.indirectObjectBaseAddress.value, finalState.indirectObjectBaseAddress.value);
EXPECT_EQ(csrState.indirectObjectSize.value, finalState.indirectObjectSize.value);
EXPECT_EQ(csrState.bindingTablePoolBaseAddress.value, finalState.bindingTablePoolBaseAddress.value);
EXPECT_EQ(csrState.bindingTablePoolSize.value, finalState.bindingTablePoolSize.value);
EXPECT_EQ(csrState.globalAtomics.value, finalState.globalAtomics.value);
EXPECT_EQ(csrState.statelessMocs.value, finalState.statelessMocs.value);
auto &containerImmediate = commandListImmediate->commandContainer;
result = commandListImmediate->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto sshHeapImmediate = containerImmediate.getIndirectHeap(NEO::HeapType::SURFACE_STATE);
auto ssBaseAddressImmediate = sshHeapImmediate->getHeapGpuBase();
auto ssSizeImmediate = sshHeapImmediate->getHeapSizeInPages();
uint64_t dsBaseAddressImmediate = -1;
size_t dsSizeImmediate = static_cast<size_t>(-1);
auto dshHeapImmediate = containerImmediate.getIndirectHeap(NEO::HeapType::DYNAMIC_STATE);
if (dshHeapImmediate) {
dsBaseAddressImmediate = dshHeapImmediate->getHeapGpuBase();
dsSizeImmediate = dshHeapImmediate->getHeapSizeInPages();
}
auto ioBaseAddressImmediate = containerImmediate.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT)->getHeapGpuBase();
auto ioSizeImmediate = containerImmediate.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT)->getHeapSizeInPages();
auto statlessMocsImmediate = getMocs(true);
EXPECT_EQ(static_cast<int32_t>(statlessMocsImmediate), csrState.statelessMocs.value);
EXPECT_EQ(static_cast<int64_t>(ssBaseAddressImmediate), csrState.surfaceStateBaseAddress.value);
EXPECT_EQ(ssSizeImmediate, csrState.surfaceStateSize.value);
EXPECT_EQ(static_cast<int64_t>(dsBaseAddressImmediate), csrState.dynamicStateBaseAddress.value);
EXPECT_EQ(dsSizeImmediate, csrState.dynamicStateSize.value);
EXPECT_EQ(static_cast<int64_t>(ioBaseAddressImmediate), csrState.indirectObjectBaseAddress.value);
EXPECT_EQ(ioSizeImmediate, csrState.indirectObjectSize.value);
if (sbaPropertiesSupport.bindingTablePoolBaseAddress) {
EXPECT_EQ(static_cast<int64_t>(ssBaseAddressImmediate), csrState.bindingTablePoolBaseAddress.value);
EXPECT_EQ(ssSizeImmediate, csrState.bindingTablePoolSize.value);
} else {
EXPECT_EQ(-1, csrState.bindingTablePoolBaseAddress.value);
EXPECT_EQ(static_cast<size_t>(-1), csrState.bindingTablePoolSize.value);
}
}
HWTEST2_F(CommandListStateBaseAddressTest,
givenStateBaseAddressTrackingWhenImmediateCmdListAppendKernelAndRegularCmdListAppendKernelAndExecuteSharingCsrThenBaseAddressStateIsUpdatedInCsr,
IsAtLeastSkl) {
ASSERT_EQ(commandListImmediate->csr, commandQueue->getCsr());
auto &csrState = commandQueue->getCsr()->getStreamProperties().stateBaseAddress;
NEO::StateBaseAddressPropertiesSupport sbaPropertiesSupport = {};
auto &productHelper = device->getProductHelper();
productHelper.fillStateBaseAddressPropertiesSupportStructure(sbaPropertiesSupport);
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
EXPECT_TRUE(commandList->stateBaseAddressTracking);
auto &containerImmediate = commandListImmediate->commandContainer;
auto result = commandListImmediate->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto sshHeapImmediate = containerImmediate.getIndirectHeap(NEO::HeapType::SURFACE_STATE);
auto ssBaseAddressImmediate = sshHeapImmediate->getHeapGpuBase();
auto ssSizeImmediate = sshHeapImmediate->getHeapSizeInPages();
uint64_t dsBaseAddressImmediate = -1;
size_t dsSizeImmediate = static_cast<size_t>(-1);
auto dshHeapImmediate = containerImmediate.getIndirectHeap(NEO::HeapType::DYNAMIC_STATE);
if (dshHeapImmediate) {
dsBaseAddressImmediate = dshHeapImmediate->getHeapGpuBase();
dsSizeImmediate = dshHeapImmediate->getHeapSizeInPages();
}
auto ioBaseAddressImmediate = containerImmediate.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT)->getHeapGpuBase();
auto ioSizeImmediate = containerImmediate.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT)->getHeapSizeInPages();
auto statlessMocsImmediate = getMocs(true);
EXPECT_EQ(static_cast<int32_t>(statlessMocsImmediate), csrState.statelessMocs.value);
EXPECT_EQ(static_cast<int64_t>(ssBaseAddressImmediate), csrState.surfaceStateBaseAddress.value);
EXPECT_EQ(ssSizeImmediate, csrState.surfaceStateSize.value);
EXPECT_EQ(static_cast<int64_t>(dsBaseAddressImmediate), csrState.dynamicStateBaseAddress.value);
EXPECT_EQ(dsSizeImmediate, csrState.dynamicStateSize.value);
EXPECT_EQ(static_cast<int64_t>(ioBaseAddressImmediate), csrState.indirectObjectBaseAddress.value);
EXPECT_EQ(ioSizeImmediate, csrState.indirectObjectSize.value);
if (sbaPropertiesSupport.bindingTablePoolBaseAddress) {
EXPECT_EQ(static_cast<int64_t>(ssBaseAddressImmediate), csrState.bindingTablePoolBaseAddress.value);
EXPECT_EQ(ssSizeImmediate, csrState.bindingTablePoolSize.value);
} else {
EXPECT_EQ(-1, csrState.bindingTablePoolBaseAddress.value);
EXPECT_EQ(static_cast<size_t>(-1), csrState.bindingTablePoolSize.value);
}
auto &container = commandList->commandContainer;
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto sshHeap = container.getIndirectHeap(NEO::HeapType::SURFACE_STATE);
auto ssBaseAddress = sshHeap->getHeapGpuBase();
auto ssSize = sshHeap->getHeapSizeInPages();
uint64_t dsBaseAddress = -1;
size_t dsSize = static_cast<size_t>(-1);
auto dshHeap = container.getIndirectHeap(NEO::HeapType::DYNAMIC_STATE);
if (dshHeap) {
dsBaseAddress = dshHeap->getHeapGpuBase();
dsSize = dshHeap->getHeapSizeInPages();
}
auto ioBaseAddress = container.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT)->getHeapGpuBase();
auto ioSize = container.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT)->getHeapSizeInPages();
auto statlessMocs = getMocs(true);
auto &requiredState = commandList->requiredStreamState.stateBaseAddress;
auto &finalState = commandList->finalStreamState.stateBaseAddress;
EXPECT_EQ(static_cast<int32_t>(statlessMocs), requiredState.statelessMocs.value);
EXPECT_EQ(static_cast<int64_t>(ssBaseAddress), requiredState.surfaceStateBaseAddress.value);
EXPECT_EQ(ssSize, requiredState.surfaceStateSize.value);
EXPECT_EQ(static_cast<int64_t>(dsBaseAddress), requiredState.dynamicStateBaseAddress.value);
EXPECT_EQ(dsSize, requiredState.dynamicStateSize.value);
EXPECT_EQ(static_cast<int64_t>(ioBaseAddress), requiredState.indirectObjectBaseAddress.value);
EXPECT_EQ(ioSize, requiredState.indirectObjectSize.value);
if (sbaPropertiesSupport.bindingTablePoolBaseAddress) {
EXPECT_EQ(static_cast<int64_t>(ssBaseAddress), requiredState.bindingTablePoolBaseAddress.value);
EXPECT_EQ(ssSize, requiredState.bindingTablePoolSize.value);
} else {
EXPECT_EQ(-1, requiredState.bindingTablePoolBaseAddress.value);
EXPECT_EQ(static_cast<size_t>(-1), requiredState.bindingTablePoolSize.value);
}
EXPECT_EQ(finalState.surfaceStateBaseAddress.value, requiredState.surfaceStateBaseAddress.value);
EXPECT_EQ(finalState.surfaceStateSize.value, requiredState.surfaceStateSize.value);
EXPECT_EQ(finalState.dynamicStateBaseAddress.value, requiredState.dynamicStateBaseAddress.value);
EXPECT_EQ(finalState.dynamicStateSize.value, requiredState.dynamicStateSize.value);
EXPECT_EQ(finalState.indirectObjectBaseAddress.value, requiredState.indirectObjectBaseAddress.value);
EXPECT_EQ(finalState.indirectObjectSize.value, requiredState.indirectObjectSize.value);
EXPECT_EQ(finalState.bindingTablePoolBaseAddress.value, requiredState.bindingTablePoolBaseAddress.value);
EXPECT_EQ(finalState.bindingTablePoolSize.value, requiredState.bindingTablePoolSize.value);
EXPECT_EQ(finalState.globalAtomics.value, requiredState.globalAtomics.value);
EXPECT_EQ(finalState.statelessMocs.value, requiredState.statelessMocs.value);
result = commandList->close();
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
ze_command_list_handle_t cmdListHandle = commandList->toHandle();
result = commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, true);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(csrState.surfaceStateBaseAddress.value, finalState.surfaceStateBaseAddress.value);
EXPECT_EQ(csrState.surfaceStateSize.value, finalState.surfaceStateSize.value);
EXPECT_EQ(csrState.dynamicStateBaseAddress.value, finalState.dynamicStateBaseAddress.value);
EXPECT_EQ(csrState.dynamicStateSize.value, finalState.dynamicStateSize.value);
EXPECT_EQ(csrState.indirectObjectBaseAddress.value, finalState.indirectObjectBaseAddress.value);
EXPECT_EQ(csrState.indirectObjectSize.value, finalState.indirectObjectSize.value);
EXPECT_EQ(csrState.bindingTablePoolBaseAddress.value, finalState.bindingTablePoolBaseAddress.value);
EXPECT_EQ(csrState.bindingTablePoolSize.value, finalState.bindingTablePoolSize.value);
EXPECT_EQ(csrState.globalAtomics.value, finalState.globalAtomics.value);
EXPECT_EQ(csrState.statelessMocs.value, finalState.statelessMocs.value);
}
} // namespace ult
} // namespace L0

View File

@ -17,7 +17,6 @@
#include "level_zero/core/source/builtin/builtin_functions_lib.h"
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
#include "level_zero/core/test/unit_tests/mocks/mock_event.h"
#include "level_zero/core/test/unit_tests/mocks/mock_image.h"

View File

@ -1997,6 +1997,9 @@ TEST_F(CommandQueueCreate, givenCreatedCommandQueueWhenGettingTrackingFlagsThenD
bool expectedFrontEndTracking = l0GfxCoreHelper.platformSupportsFrontEndTracking();
EXPECT_EQ(expectedFrontEndTracking, commandQueue->frontEndStateTracking);
bool expectedStateBaseAddressTracking = l0GfxCoreHelper.platformSupportsStateBaseAddressTracking();
EXPECT_EQ(expectedStateBaseAddressTracking, commandQueue->stateBaseAddressTracking);
commandQueue->destroy();
}

View File

@ -49,6 +49,11 @@ XE_HPC_CORETEST_F(L0GfxCoreHelperTestXeHpc, GivenXeHpcWhenCheckingL0HelperForPip
EXPECT_TRUE(l0GfxCoreHelper.platformSupportsPipelineSelectTracking());
}
XE_HPC_CORETEST_F(L0GfxCoreHelperTestXeHpc, GivenXeHpcWhenCheckingL0HelperForStateBaseAddressTrackingSupportThenReturnFalse) {
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
EXPECT_FALSE(l0GfxCoreHelper.platformSupportsStateBaseAddressTracking());
}
XE_HPC_CORETEST_F(L0GfxCoreHelperTestXeHpc, GivenXeHpcWhenCheckingL0HelperForRayTracingSupportThenReturnTrue) {
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
EXPECT_TRUE(l0GfxCoreHelper.platformSupportsRayTracing());

View File

@ -56,6 +56,12 @@ XE_HPG_CORETEST_F(L0GfxCoreHelperTestXeHpg, GivenXeHpgWhenCheckingL0HelperForPip
EXPECT_TRUE(l0GfxCoreHelper.platformSupportsPipelineSelectTracking());
}
XE_HPG_CORETEST_F(L0GfxCoreHelperTestXeHpg, GivenXeHpgWhenCheckingL0HelperForStateBaseAddressTrackingSupportThenReturnFalse) {
MockExecutionEnvironment executionEnvironment;
auto &l0GfxCoreHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper<L0GfxCoreHelper>();
EXPECT_FALSE(l0GfxCoreHelper.platformSupportsStateBaseAddressTracking());
}
XE_HPG_CORETEST_F(L0GfxCoreHelperTestXeHpg, GivenXeHpgWhenCheckingL0HelperForRayTracingSupportThenReturnTrue) {
MockExecutionEnvironment executionEnvironment;
auto &l0GfxCoreHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper<L0GfxCoreHelper>();

View File

@ -1,11 +1,12 @@
/*
* Copyright (C) 2018-2022 Intel Corporation
* Copyright (C) 2018-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/gen9/hw_cmds_glk.h"
#include "shared/test/common/mocks/mock_device.h"
#include "shared/test/common/test_macros/header/per_product_test_definitions.h"
#include "shared/test/common/test_macros/test.h"

View File

@ -331,12 +331,19 @@ IndirectHeap *CommandContainer::getIndirectHeap(HeapType heapType) {
}
}
void CommandContainer::ensureHeapSizePrepared(size_t sshRequiredSize, size_t dshRequiredSize) {
auto lock = immediateCmdListCsr->obtainUniqueOwnership();
sharedSshCsrHeap = &immediateCmdListCsr->getIndirectHeap(HeapType::SURFACE_STATE, sshRequiredSize);
void CommandContainer::ensureHeapSizePrepared(size_t sshRequiredSize, size_t dshRequiredSize, bool getDsh) {
if (immediateCmdListCsr) {
auto lock = immediateCmdListCsr->obtainUniqueOwnership();
sharedSshCsrHeap = &immediateCmdListCsr->getIndirectHeap(HeapType::SURFACE_STATE, sshRequiredSize);
if (dshRequiredSize > 0) {
sharedDshCsrHeap = &immediateCmdListCsr->getIndirectHeap(HeapType::DYNAMIC_STATE, dshRequiredSize);
if (getDsh) {
sharedDshCsrHeap = &immediateCmdListCsr->getIndirectHeap(HeapType::DYNAMIC_STATE, dshRequiredSize);
}
} else {
this->getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, sshRequiredSize, 0);
if (getDsh) {
this->getHeapWithRequiredSizeAndAlignment(HeapType::DYNAMIC_STATE, dshRequiredSize, 0);
}
}
}

View File

@ -119,7 +119,7 @@ class CommandContainer : public NonCopyableOrMovableClass {
bool immediateCmdListSharedHeap(HeapType heapType) {
return (heapSharingEnabled && (heapType == HeapType::DYNAMIC_STATE || heapType == HeapType::SURFACE_STATE));
}
void ensureHeapSizePrepared(size_t sshRequiredSize, size_t dshRequiredSize);
void ensureHeapSizePrepared(size_t sshRequiredSize, size_t dshRequiredSize, bool getDsh);
GraphicsAllocation *reuseExistingCmdBuffer();
GraphicsAllocation *allocateCommandBuffer();

View File

@ -236,7 +236,6 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
args.isRcs};
EncodeStateBaseAddress<Family>::encode(encodeStateBaseAddressArgs);
container.setDirtyStateForAllHeaps(false);
args.requiresUncachedMocs = false;
}
if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::DebugManager.flags.PauseOnEnqueue.get(), args.device->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::BeforeWorkload)) {

View File

@ -276,6 +276,15 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
typedef typename GfxFamily::PIPE_CONTROL PIPE_CONTROL;
typedef typename GfxFamily::STATE_BASE_ADDRESS STATE_BASE_ADDRESS;
int64_t bindingTablePoolBaseAddress = -1;
size_t bindingTablePoolSize = std::numeric_limits<size_t>::max();
int64_t surfaceStateBaseAddress = -1;
size_t surfaceStateSize = std::numeric_limits<size_t>::max();
int64_t dynamicStateBaseAddress = -1;
size_t dynamicStateSize = std::numeric_limits<size_t>::max();
int64_t indirectObjectBaseAddress = -1;
size_t indirectObjectSize = std::numeric_limits<size_t>::max();
DEBUG_BREAK_IF(&commandStreamTask == &commandStream);
DEBUG_BREAK_IF(!(dispatchFlags.preemptionMode == PreemptionMode::Disabled ? device.getPreemptionMode() == PreemptionMode::Disabled : true));
DEBUG_BREAK_IF(taskLevel >= CompletionStamp::notReady);
@ -444,6 +453,22 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
bool iohDirty = iohState.updateAndCheck(ioh);
bool sshDirty = ssh != nullptr ? sshState.updateAndCheck(ssh) : false;
if (dshDirty) {
dynamicStateBaseAddress = dsh->getHeapGpuBase();
dynamicStateSize = dsh->getHeapSizeInPages();
}
if (iohDirty) {
indirectObjectBaseAddress = ioh->getHeapGpuBase();
indirectObjectSize = ioh->getHeapSizeInPages();
}
if (sshDirty) {
surfaceStateBaseAddress = ssh->getHeapGpuBase();
surfaceStateSize = ssh->getHeapSizeInPages();
bindingTablePoolBaseAddress = surfaceStateBaseAddress;
bindingTablePoolSize = surfaceStateSize;
}
auto isStateBaseAddressDirty = dshDirty || iohDirty || sshDirty || stateBaseAddressDirty;
auto mocsIndex = latestSentStatelessMocsConfig;
@ -464,6 +489,12 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
lastSentUseGlobalAtomics = dispatchFlags.useGlobalAtomics;
}
this->streamProperties.stateBaseAddress.setProperties(dispatchFlags.useGlobalAtomics, mocsIndex,
bindingTablePoolBaseAddress, bindingTablePoolSize,
surfaceStateBaseAddress, surfaceStateSize,
dynamicStateBaseAddress, dynamicStateSize,
indirectObjectBaseAddress, indirectObjectSize, this->peekRootDeviceEnvironment());
bool debuggingEnabled = device.getDebugger() != nullptr;
bool sourceLevelDebuggerActive = device.getSourceLevelDebugger() != nullptr ? true : false;
@ -478,6 +509,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
// Reprogram state base address if required
if (isStateBaseAddressDirty || sourceLevelDebuggerActive) {
this->latestSentStatelessMocsConfig = static_cast<uint32_t>(this->streamProperties.stateBaseAddress.statelessMocs.value);
EncodeWA<GfxFamily>::addPipeControlBeforeStateBaseAddress(commandStreamCSR, this->peekRootDeviceEnvironment(), isRcs(), this->dcFlushSupport);
EncodeWA<GfxFamily>::encodeAdditionalPipelineSelect(commandStreamCSR, dispatchFlags.pipelineSelectArgs, true, peekRootDeviceEnvironment(), isRcs());

View File

@ -512,6 +512,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, OverrideDrmRegion, -1, "-1: disable, 0+: overrid
DECLARE_DEBUG_VARIABLE(int32_t, EnableFrontEndTracking, -1, "-1: default: disabled, 0: disabled, 1: enabled. This flag creates multiple return point from List to Queue for Front End reconfiguration on Queue buffer for single List")
DECLARE_DEBUG_VARIABLE(int32_t, EnablePipelineSelectTracking, -1, "-1: default: disabled, 0: disabled, 1: enabled. This flag enables optimization that limits number of pipeline select dispatched by command lists")
DECLARE_DEBUG_VARIABLE(int32_t, EnableStateComputeModeTracking, -1, "-1: default: disabled, 0: disabled, 1: enabled. This flag enables tracking state compute mode changes in command lists")
DECLARE_DEBUG_VARIABLE(int32_t, EnableStateBaseAddressTracking, -1, "-1: default: disabled, 0: disabled, 1: enabled. This flag enables tracking state base address changes in command lists")
DECLARE_DEBUG_VARIABLE(int32_t, EnableSetPair, -1, "Use SET_PAIR to pair two buffer objects behind the same file descriptor, -1: default, 0: disabled, 1: enabled")
/* Binary Cache */
DECLARE_DEBUG_VARIABLE(bool, BinaryCacheTrace, false, "enable cl_cache to produce .trace files with information about hash computation")

View File

@ -5,6 +5,7 @@
*
*/
#include "shared/source/helpers/engine_node_helper.h"
#include "shared/source/helpers/hw_helper.h"
#include "shared/source/helpers/hw_info.h"

View File

@ -16,6 +16,7 @@ namespace NEO {
class CommandStreamReceiver;
class LinearStream;
struct DeviceInfo;
struct KernelDescriptor;
struct HardwareInfo;
struct RootDeviceEnvironment;
@ -93,6 +94,7 @@ struct UnitTestHelper {
static bool getComputeDispatchAllWalkerFromFrontEndCommand(const typename GfxFamily::VFE_STATE_TYPE &feCmd);
static bool getSystolicFlagValueFromPipelineSelectCommand(const typename GfxFamily::PIPELINE_SELECT &pipelineSelectCmd);
static size_t getAdditionalDshSize();
static bool expectNullDsh(const DeviceInfo &deviceInfo);
};
} // namespace NEO

View File

@ -5,6 +5,7 @@
*
*/
#include "shared/source/device/device_info.h"
#include "shared/source/helpers/hw_info.h"
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
#include "shared/test/common/cmd_parse/hw_parse.h"
@ -107,4 +108,12 @@ bool UnitTestHelper<GfxFamily>::getComputeDispatchAllWalkerFromFrontEndCommand(c
return false;
}
template <typename GfxFamily>
bool UnitTestHelper<GfxFamily>::expectNullDsh(const DeviceInfo &deviceInfo) {
if constexpr (GfxFamily::supportsSampler) {
return !deviceInfo.imageSupport;
}
return true;
}
} // namespace NEO

View File

@ -144,6 +144,7 @@ AllocateBuffersInLocalMemoryForMultiRootDeviceContexts = 0
EnableComputeWorkSizeSquared = 0
EnableVaLibCalls = -1
EnableExtendedVaFormats = 0
EnableStateBaseAddressTracking = -1
AddClGlSharing = -1
EnableFormatQuery = 1
EnableFreeMemory = 0

View File

@ -956,7 +956,7 @@ HWTEST_F(CommandContainerTest, givenCmdContainerHasImmediateCsrWhenGettingHeapWi
auto &ultCsr = pDevice->getUltCommandStreamReceiver<FamilyType>();
ultCsr.recursiveLockCounter = 0;
cmdContainer.ensureHeapSizePrepared(0, 0);
cmdContainer.ensureHeapSizePrepared(0, 0, false);
EXPECT_EQ(1u, ultCsr.recursiveLockCounter);
EXPECT_EQ(nullptr, cmdContainer.getIndirectHeap(HeapType::DYNAMIC_STATE));
@ -968,9 +968,15 @@ HWTEST_F(CommandContainerTest, givenCmdContainerHasImmediateCsrWhenGettingHeapWi
EXPECT_NO_THROW(cmdContainer.getHeapSpaceAllowGrow(HeapType::SURFACE_STATE, 0));
EXPECT_NO_THROW(cmdContainer.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, 0, 0));
cmdContainer.ensureHeapSizePrepared(4 * MemoryConstants::kiloByte, 4 * MemoryConstants::kiloByte);
cmdContainer.ensureHeapSizePrepared(0, 0, true);
EXPECT_EQ(2u, ultCsr.recursiveLockCounter);
EXPECT_NE(nullptr, cmdContainer.getIndirectHeap(HeapType::DYNAMIC_STATE));
EXPECT_NE(nullptr, cmdContainer.getIndirectHeap(HeapType::SURFACE_STATE));
cmdContainer.ensureHeapSizePrepared(4 * MemoryConstants::kiloByte, 4 * MemoryConstants::kiloByte, true);
EXPECT_EQ(3u, ultCsr.recursiveLockCounter);
auto dshHeap = cmdContainer.getIndirectHeap(HeapType::DYNAMIC_STATE);
EXPECT_NE(nullptr, dshHeap);
auto sshHeap = cmdContainer.getIndirectHeap(HeapType::SURFACE_STATE);
@ -1001,6 +1007,32 @@ HWTEST_F(CommandContainerTest, givenCmdContainerHasImmediateCsrWhenGettingHeapWi
EXPECT_THROW(cmdContainer.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, 64, 64), std::exception);
}
HWTEST_F(CommandContainerTest, givenCmdContainerUsedInRegularCmdListWhenGettingHeapWithEnsuringSpaceThenExpectCorrectHeap) {
if (!pDevice->getDeviceInfo().imageSupport) {
GTEST_SKIP();
}
MyMockCommandContainer cmdContainer;
auto code = cmdContainer.initialize(pDevice, nullptr, true);
EXPECT_EQ(CommandContainer::ErrorCode::SUCCESS, code);
cmdContainer.ensureHeapSizePrepared(0, 0, true);
auto dsh = cmdContainer.getIndirectHeap(HeapType::DYNAMIC_STATE);
auto ssh = cmdContainer.getIndirectHeap(HeapType::SURFACE_STATE);
EXPECT_NE(nullptr, dsh);
EXPECT_NE(nullptr, ssh);
dsh->getSpace(dsh->getAvailableSpace() - 64);
cmdContainer.ensureHeapSizePrepared(4 * MemoryConstants::kiloByte, 4 * MemoryConstants::kiloByte, false);
dsh = cmdContainer.getIndirectHeap(HeapType::DYNAMIC_STATE);
EXPECT_EQ(64u, dsh->getAvailableSpace());
}
struct MockHeapHelper : public HeapHelper {
public:
using HeapHelper::storageForReuse;

View File

@ -96,7 +96,6 @@ HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithUnc
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs, nullptr);
EXPECT_FALSE(dispatchArgs.requiresUncachedMocs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands,
@ -124,7 +123,6 @@ HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithUnc
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs, nullptr);
EXPECT_FALSE(dispatchArgs.requiresUncachedMocs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands,