mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 14:55:24 +08:00
feature: add heapless and global stateless scratch address patching
Related-To: NEO-10381 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
78885ae1fe
commit
73d558058c
@@ -360,6 +360,10 @@ struct CommandList : _ze_command_list_handle_t {
|
||||
return stateBaseAddressTracking;
|
||||
}
|
||||
|
||||
bool getCmdListScratchAddressPatchingEnabled() const {
|
||||
return scratchAddressPatchingEnabled;
|
||||
}
|
||||
|
||||
protected:
|
||||
NEO::GraphicsAllocation *getAllocationFromHostPtrMap(const void *buffer, uint64_t bufferSize);
|
||||
NEO::GraphicsAllocation *getHostPtrAlloc(const void *buffer, uint64_t bufferSize, bool hostCopyAllowed);
|
||||
@@ -441,6 +445,7 @@ struct CommandList : _ze_command_list_handle_t {
|
||||
bool useOnlyGlobalTimestamps = false;
|
||||
bool heaplessModeEnabled = false;
|
||||
bool heaplessStateInitEnabled = false;
|
||||
bool scratchAddressPatchingEnabled = false;
|
||||
};
|
||||
|
||||
using CommandListAllocatorFn = CommandList *(*)(uint32_t);
|
||||
|
||||
@@ -3265,6 +3265,8 @@ void CommandListCoreFamily<gfxCoreFamily>::clearCommandsToPatch() {
|
||||
case CommandToPatch::PauseOnEnqueuePipeControlEnd:
|
||||
UNRECOVERABLE_IF(commandToPatch.pCommand == nullptr);
|
||||
break;
|
||||
case CommandToPatch::ComputeWalkerInlineDataScratch:
|
||||
break;
|
||||
default:
|
||||
UNRECOVERABLE_IF(true);
|
||||
}
|
||||
|
||||
@@ -219,6 +219,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
this->dcFlushSupport, // dcFlushEnable
|
||||
this->heaplessModeEnabled, // isHeaplessModeEnabled
|
||||
false, // interruptEvent
|
||||
!this->scratchAddressPatchingEnabled, // immediateScratchAddressPatching
|
||||
};
|
||||
|
||||
NEO::EncodeDispatchKernel<GfxFamily>::encodeCommon(commandContainer, dispatchKernelArgs);
|
||||
|
||||
@@ -127,11 +127,15 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
", SIMD: ", kernelInfo->getMaxSimdSize());
|
||||
|
||||
bool needScratchSpace = false;
|
||||
bool kernelNeedsScratchSpace = false;
|
||||
for (uint32_t slotId = 0u; slotId < 2; slotId++) {
|
||||
commandListPerThreadScratchSize[slotId] = std::max<uint32_t>(commandListPerThreadScratchSize[slotId], kernelDescriptor.kernelAttributes.perThreadScratchSize[slotId]);
|
||||
if (commandListPerThreadScratchSize[slotId] > 0) {
|
||||
needScratchSpace = true;
|
||||
}
|
||||
if (kernelDescriptor.kernelAttributes.perThreadScratchSize[slotId] > 0) {
|
||||
kernelNeedsScratchSpace = true;
|
||||
}
|
||||
}
|
||||
|
||||
if ((this->cmdListHeapAddressModel == NEO::HeapAddressModel::privateHeaps) && needScratchSpace) {
|
||||
@@ -342,11 +346,27 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
this->dcFlushSupport, // dcFlushEnable
|
||||
this->heaplessModeEnabled, // isHeaplessModeEnabled
|
||||
interruptEvent, // interruptEvent
|
||||
!this->scratchAddressPatchingEnabled, // immediateScratchAddressPatching
|
||||
};
|
||||
|
||||
NEO::EncodeDispatchKernel<GfxFamily>::encodeCommon(commandContainer, dispatchKernelArgs);
|
||||
launchParams.outWalker = dispatchKernelArgs.outWalkerPtr;
|
||||
|
||||
if (this->heaplessModeEnabled && this->scratchAddressPatchingEnabled && kernelNeedsScratchSpace) {
|
||||
CommandToPatch scratchInlineData;
|
||||
scratchInlineData.pDestination = dispatchKernelArgs.outWalkerPtr;
|
||||
scratchInlineData.pCommand = nullptr;
|
||||
scratchInlineData.type = CommandToPatch::CommandType::ComputeWalkerInlineDataScratch;
|
||||
scratchInlineData.offset = NEO::EncodeDispatchKernel<GfxFamily>::getInlineDataOffset(dispatchKernelArgs) +
|
||||
kernelDescriptor.payloadMappings.implicitArgs.scratchPointerAddress.offset;
|
||||
scratchInlineData.patchSize = kernelDescriptor.payloadMappings.implicitArgs.scratchPointerAddress.pointerSize;
|
||||
auto ssh = commandContainer.getIndirectHeap(NEO::HeapType::surfaceState);
|
||||
if (ssh != nullptr) {
|
||||
scratchInlineData.baseAddress = ssh->getGpuBase();
|
||||
}
|
||||
commandsToPatch.push_back(scratchInlineData);
|
||||
}
|
||||
|
||||
if (!this->isFlushTaskSubmissionEnabled) {
|
||||
this->containsStatelessUncachedResource = dispatchKernelArgs.requiresUncachedMocs;
|
||||
}
|
||||
|
||||
@@ -30,6 +30,7 @@ struct CommandToPatch {
|
||||
CbEventTimestampClearStoreDataImm,
|
||||
CbWaitEventSemaphoreWait,
|
||||
CbWaitEventLoadRegisterImm,
|
||||
ComputeWalkerInlineDataScratch,
|
||||
Invalid
|
||||
};
|
||||
void *pDestination = nullptr;
|
||||
@@ -37,6 +38,8 @@ struct CommandToPatch {
|
||||
size_t offset = 0;
|
||||
CommandType type = Invalid;
|
||||
size_t inOrderPatchListIndex = 0;
|
||||
size_t patchSize = 0;
|
||||
uint64_t baseAddress = 0;
|
||||
};
|
||||
|
||||
using CommandToPatchContainer = std::vector<CommandToPatch>;
|
||||
|
||||
@@ -114,6 +114,7 @@ struct CommandQueueHw : public CommandQueueImp {
|
||||
bool rtDispatchRequired = false;
|
||||
bool globalInit = false;
|
||||
bool lockScratchController = false;
|
||||
bool cmdListScratchAddressPatchingEnabled = false;
|
||||
};
|
||||
|
||||
ze_result_t executeCommandListsRegularHeapless(CommandListExecutionContext &ctx,
|
||||
@@ -251,6 +252,7 @@ struct CommandQueueHw : public CommandQueueImp {
|
||||
CommandListRequiredStateChange &cmdListRequired);
|
||||
inline void updateBaseAddressState(CommandList *lastCommandList);
|
||||
inline void updateDebugSurfaceState(CommandListExecutionContext &ctx);
|
||||
inline void patchCommands(CommandList &commandList, CommandListExecutionContext &ctx);
|
||||
|
||||
size_t alignedChildStreamPadding{};
|
||||
};
|
||||
|
||||
@@ -146,6 +146,10 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegularHeapless(
|
||||
|
||||
this->csr->getResidencyAllocations().reserve(ctx.spaceForResidency);
|
||||
|
||||
if (ctx.cmdListScratchAddressPatchingEnabled == true) {
|
||||
this->handleScratchSpaceAndUpdateGSBAStateDirtyFlag(ctx);
|
||||
}
|
||||
|
||||
NEO::LinearStream child(nullptr);
|
||||
if (const auto ret = this->makeAlignedChildStreamAndSetGpuBase(child, linearStreamSizeEstimate); ret != ZE_RESULT_SUCCESS) {
|
||||
return ret;
|
||||
@@ -168,7 +172,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegularHeapless(
|
||||
|
||||
ctx.childGpuAddressPositionBeforeDynamicPreamble = child.getCurrentGpuAddressPosition();
|
||||
|
||||
this->patchCommands(*commandList, this->csr->getScratchSpaceController()->getScratchPatchAddress());
|
||||
this->patchCommands(*commandList, ctx);
|
||||
this->programOneCmdListBatchBufferStart(commandList, child, ctx);
|
||||
|
||||
this->prefetchMemoryToDeviceAssociatedWithCmdList(commandList);
|
||||
@@ -254,9 +258,10 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegular(
|
||||
|
||||
size_t linearStreamSizeEstimate = this->estimateLinearStreamSizeInitial(ctx);
|
||||
|
||||
if (this->heaplessModeEnabled == false) {
|
||||
if (this->heaplessModeEnabled == false || ctx.cmdListScratchAddressPatchingEnabled == true) {
|
||||
this->handleScratchSpaceAndUpdateGSBAStateDirtyFlag(ctx);
|
||||
}
|
||||
|
||||
this->setFrontEndStateProperties(ctx);
|
||||
|
||||
auto neoDevice = this->device->getNEODevice();
|
||||
@@ -360,7 +365,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegular(
|
||||
}
|
||||
}
|
||||
|
||||
this->patchCommands(*commandList, this->csr->getScratchSpaceController()->getScratchPatchAddress());
|
||||
this->patchCommands(*commandList, ctx);
|
||||
this->programOneCmdListBatchBufferStart(commandList, child, ctx);
|
||||
|
||||
this->prefetchMemoryToDeviceAssociatedWithCmdList(commandList);
|
||||
@@ -487,9 +492,8 @@ void CommandQueueHw<gfxCoreFamily>::programFrontEndAndClearDirtyFlag(
|
||||
if (!shouldFrontEndBeProgrammed) {
|
||||
return;
|
||||
}
|
||||
auto scratchSpaceController = this->csr->getScratchSpaceController();
|
||||
programFrontEnd(scratchSpaceController->getScratchPatchAddress(),
|
||||
scratchSpaceController->getPerThreadScratchSpaceSizeSlot0(),
|
||||
programFrontEnd(ctx.scratchSpaceController->getScratchPatchAddress(),
|
||||
ctx.scratchSpaceController->getPerThreadScratchSpaceSizeSlot0(),
|
||||
cmdStream,
|
||||
csrState);
|
||||
ctx.frontEndStateDirty = false;
|
||||
@@ -740,6 +744,8 @@ void CommandQueueHw<gfxCoreFamily>::setupCmdListsAndContextParams(
|
||||
}
|
||||
|
||||
this->partitionCount = std::max(this->partitionCount, commandList->getPartitionCount());
|
||||
|
||||
ctx.cmdListScratchAddressPatchingEnabled |= commandList->getCmdListScratchAddressPatchingEnabled();
|
||||
}
|
||||
|
||||
makeResidentAndMigrate(ctx.isMigrationRequested, commandContainer.getResidencyContainer());
|
||||
@@ -828,15 +834,23 @@ void CommandQueueHw<gfxCoreFamily>::handleScratchSpaceAndUpdateGSBAStateDirtyFla
|
||||
if (ctx.lockScratchController) {
|
||||
defaultCsrLock = device->getNEODevice()->getDefaultEngine().commandStreamReceiver->obtainUniqueOwnership();
|
||||
}
|
||||
|
||||
bool localGsbaDirty = false;
|
||||
bool localFrontEndDirty = false;
|
||||
handleScratchSpace(this->heapContainer,
|
||||
ctx.scratchSpaceController,
|
||||
ctx.globalStatelessAllocation,
|
||||
ctx.gsbaStateDirty, ctx.frontEndStateDirty,
|
||||
localGsbaDirty, localFrontEndDirty,
|
||||
ctx.perThreadScratchSpaceSlot0Size, ctx.perThreadScratchSpaceSlot1Size);
|
||||
ctx.gsbaStateDirty |= this->csr->getGSBAStateDirty();
|
||||
ctx.scratchGsba = ctx.scratchSpaceController->calculateNewGSH();
|
||||
|
||||
ctx.globalInit |= ctx.gsbaStateDirty;
|
||||
if (this->heaplessModeEnabled == false) {
|
||||
ctx.gsbaStateDirty |= localGsbaDirty;
|
||||
ctx.frontEndStateDirty |= localFrontEndDirty;
|
||||
|
||||
ctx.gsbaStateDirty |= this->csr->getGSBAStateDirty();
|
||||
ctx.globalInit |= ctx.gsbaStateDirty;
|
||||
}
|
||||
ctx.scratchGsba = ctx.scratchSpaceController->calculateNewGSH();
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -1172,10 +1186,9 @@ void CommandQueueHw<gfxCoreFamily>::programOneCmdListBatchBufferStartSecondaryBa
|
||||
}) != returnPoints.end();
|
||||
if (cmdBufferHasRestarts) {
|
||||
while (returnPointIdx < returnPointsSize && allocation == returnPoints[returnPointIdx].currentCmdBuffer) {
|
||||
auto scratchSpaceController = this->csr->getScratchSpaceController();
|
||||
ctx.cmdListBeginState.frontEndState.copyPropertiesComputeDispatchAllWalkerEnableDisableEuFusion(returnPoints[returnPointIdx].configSnapshot.frontEndState);
|
||||
programFrontEnd(scratchSpaceController->getScratchPatchAddress(),
|
||||
scratchSpaceController->getPerThreadScratchSpaceSizeSlot0(),
|
||||
programFrontEnd(ctx.scratchSpaceController->getScratchPatchAddress(),
|
||||
ctx.scratchSpaceController->getPerThreadScratchSpaceSizeSlot0(),
|
||||
commandStream,
|
||||
ctx.cmdListBeginState);
|
||||
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferStart(&commandStream,
|
||||
@@ -1686,4 +1699,13 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateStateBaseAddressDebugTracking() {
|
||||
return size;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandQueueHw<gfxCoreFamily>::patchCommands(CommandList &commandList, CommandListExecutionContext &ctx) {
|
||||
uint64_t scratchAddress = ctx.scratchSpaceController->getScratchPatchAddress();
|
||||
if (this->heaplessModeEnabled && this->cmdListHeapAddressModel == NEO::HeapAddressModel::globalStateless) {
|
||||
scratchAddress += ctx.globalStatelessAllocation->getGpuAddress();
|
||||
}
|
||||
patchCommands(commandList, scratchAddress);
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -228,6 +228,12 @@ void CommandQueueHw<gfxCoreFamily>::patchCommands(CommandList &commandList, uint
|
||||
args);
|
||||
break;
|
||||
}
|
||||
case CommandToPatch::ComputeWalkerInlineDataScratch: {
|
||||
uint64_t fullScratchAddress = scratchAddress + commandToPatch.baseAddress;
|
||||
void *scratchAddressPatch = ptrOffset(commandToPatch.pDestination, commandToPatch.offset);
|
||||
std::memcpy(scratchAddressPatch, &fullScratchAddress, commandToPatch.patchSize);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNRECOVERABLE_IF(true);
|
||||
}
|
||||
|
||||
@@ -8,9 +8,11 @@
|
||||
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h"
|
||||
|
||||
#include "shared/source/built_ins/sip.h"
|
||||
#include "shared/source/command_container/cmdcontainer.h"
|
||||
#include "shared/source/command_container/implicit_scaling.h"
|
||||
#include "shared/source/helpers/gfx_core_helper.h"
|
||||
#include "shared/source/helpers/ray_tracing_helper.h"
|
||||
#include "shared/source/indirect_heap/indirect_heap.h"
|
||||
#include "shared/source/memory_manager/internal_allocation_storage.h"
|
||||
#include "shared/source/memory_manager/memory_manager.h"
|
||||
#include "shared/source/os_interface/os_interface.h"
|
||||
@@ -579,5 +581,42 @@ void CommandQueueThreadArbitrationPolicyFixture::tearDown() {
|
||||
L0::globalDriver = nullptr;
|
||||
}
|
||||
|
||||
void CommandListScratchPatchFixtureInit::setUpParams(int32_t globalStatelessMode, int32_t heaplessStateInitEnabled) {
|
||||
fixtureGlobalStatelessMode = globalStatelessMode;
|
||||
debugManager.flags.SelectCmdListHeapAddressModel.set(globalStatelessMode);
|
||||
|
||||
ModuleMutableCommandListFixture::setUp();
|
||||
|
||||
commandList->scratchAddressPatchingEnabled = true;
|
||||
commandList->heaplessModeEnabled = true;
|
||||
commandList->heaplessStateInitEnabled = !!heaplessStateInitEnabled;
|
||||
|
||||
commandListImmediate->heaplessModeEnabled = true;
|
||||
commandListImmediate->heaplessStateInitEnabled = !!heaplessStateInitEnabled;
|
||||
|
||||
commandQueue->heaplessModeEnabled = true;
|
||||
commandQueue->heaplessStateInitEnabled = !!heaplessStateInitEnabled;
|
||||
|
||||
mockKernelImmData->kernelDescriptor->kernelAttributes.perThreadScratchSize[0] = 0x40;
|
||||
mockKernelImmData->kernelDescriptor->payloadMappings.implicitArgs.scratchPointerAddress.pointerSize = 0x8;
|
||||
mockKernelImmData->kernelDescriptor->payloadMappings.implicitArgs.scratchPointerAddress.offset = scratchInlineOffset;
|
||||
}
|
||||
|
||||
void CommandListScratchPatchFixtureInit::tearDown() {
|
||||
ModuleMutableCommandListFixture::tearDown();
|
||||
}
|
||||
|
||||
uint64_t CommandListScratchPatchFixtureInit::getSurfStateGpuBase(bool useImmediate) {
|
||||
if (fixtureGlobalStatelessMode == 1) {
|
||||
return device->getNEODevice()->getDefaultEngine().commandStreamReceiver->getGlobalStatelessHeapAllocation()->getGpuAddress();
|
||||
} else {
|
||||
if (useImmediate) {
|
||||
return device->getNEODevice()->getDefaultEngine().commandStreamReceiver->getIndirectHeap(NEO::surfaceState, 0).getGpuBase();
|
||||
} else {
|
||||
return commandList->commandContainer.getIndirectHeap(NEO::surfaceState)->getGpuBase();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -362,5 +362,25 @@ struct CommandQueueThreadArbitrationPolicyFixture {
|
||||
L0::Device *device = nullptr;
|
||||
};
|
||||
|
||||
struct CommandListScratchPatchFixtureInit : public ModuleMutableCommandListFixture {
|
||||
void setUpParams(int32_t globalStatelessMode, int32_t heaplessStateInitEnabled);
|
||||
void tearDown();
|
||||
|
||||
uint64_t getSurfStateGpuBase(bool useImmediate);
|
||||
|
||||
template <typename FamilyType>
|
||||
void testScratchInline(bool useImmediate);
|
||||
|
||||
int32_t fixtureGlobalStatelessMode = 0;
|
||||
uint32_t scratchInlineOffset = 8;
|
||||
};
|
||||
|
||||
template <int32_t globalStatelessMode, int32_t heaplessStateInitEnabled>
|
||||
struct CommandListScratchPatchFixture : public CommandListScratchPatchFixtureInit {
|
||||
void setUp() {
|
||||
CommandListScratchPatchFixtureInit::setUpParams(globalStatelessMode, heaplessStateInitEnabled);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -6,6 +6,9 @@
|
||||
*/
|
||||
|
||||
#include "shared/source/command_container/cmdcontainer.h"
|
||||
#include "shared/source/command_container/command_encoder.h"
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
#include "shared/source/command_stream/scratch_space_controller.h"
|
||||
#include "shared/source/command_stream/thread_arbitration_policy.h"
|
||||
#include "shared/source/helpers/register_offsets.h"
|
||||
#include "shared/source/indirect_heap/indirect_heap.h"
|
||||
@@ -1405,5 +1408,90 @@ void ImmediateCmdListSharedHeapsFlushTaskFixtureInit::testBody(NonKernelOperatio
|
||||
validateDispatchFlags(true, ultCsr.recordedImmediateDispatchFlags, ultCsr.recordedSsh);
|
||||
}
|
||||
|
||||
template <typename FamilyType>
|
||||
void CommandListScratchPatchFixtureInit::testScratchInline(bool useImmediate) {
|
||||
auto csr = device->getNEODevice()->getDefaultEngine().commandStreamReceiver;
|
||||
auto scratchController = csr->getScratchSpaceController();
|
||||
|
||||
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(commandQueue->csr);
|
||||
ultCsr->storeMakeResidentAllocations = true;
|
||||
|
||||
NEO::EncodeDispatchKernelArgs dispatchKernelArgs = {};
|
||||
dispatchKernelArgs.isHeaplessModeEnabled = true;
|
||||
|
||||
size_t inlineOffset = NEO::EncodeDispatchKernel<FamilyType>::getInlineDataOffset(dispatchKernelArgs);
|
||||
|
||||
uint64_t surfaceHeapGpuBase = getSurfStateGpuBase(useImmediate);
|
||||
|
||||
auto scratchCmdList = static_cast<L0::CommandList *>(commandList.get());
|
||||
auto cmdListStream = commandList->commandContainer.getCommandStream();
|
||||
if (useImmediate) {
|
||||
scratchCmdList = static_cast<L0::CommandList *>(commandListImmediate.get());
|
||||
cmdListStream = commandListImmediate->commandContainer.getCommandStream();
|
||||
}
|
||||
|
||||
const ze_group_count_t groupCount{1, 1, 1};
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
|
||||
auto result = ZE_RESULT_SUCCESS;
|
||||
size_t usedBefore = cmdListStream->getUsed();
|
||||
result = scratchCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
size_t usedAfter = cmdListStream->getUsed();
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
|
||||
cmdList,
|
||||
ptrOffset(cmdListStream->getCpuBase(), usedBefore),
|
||||
usedAfter - usedBefore));
|
||||
|
||||
auto walkerIterator = NEO::UnitTestHelper<FamilyType>::findWalkerCmd(cmdList.begin(), cmdList.end(), true);
|
||||
ASSERT_NE(cmdList.end(), walkerIterator);
|
||||
void *walkerPtrWithScratch = *walkerIterator;
|
||||
|
||||
mockKernelImmData->kernelDescriptor->kernelAttributes.perThreadScratchSize[0] = 0x0;
|
||||
|
||||
usedBefore = cmdListStream->getUsed();
|
||||
result = scratchCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
usedAfter = cmdListStream->getUsed();
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
cmdList.clear();
|
||||
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
|
||||
cmdList,
|
||||
ptrOffset(cmdListStream->getCpuBase(), usedBefore),
|
||||
usedAfter - usedBefore));
|
||||
|
||||
walkerIterator = NEO::UnitTestHelper<FamilyType>::findWalkerCmd(cmdList.begin(), cmdList.end(), true);
|
||||
ASSERT_NE(cmdList.end(), walkerIterator);
|
||||
void *walkerPtrWithoutScratch = *walkerIterator;
|
||||
|
||||
if (!useImmediate) {
|
||||
result = commandList->close();
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto commandListHandle = commandList->toHandle();
|
||||
result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false, nullptr, 0, nullptr);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
}
|
||||
|
||||
auto scratchAddress = scratchController->getScratchPatchAddress();
|
||||
auto fullScratchAddress = surfaceHeapGpuBase + scratchAddress;
|
||||
|
||||
uint64_t scratchInlineValue = 0;
|
||||
|
||||
void *scratchInlinePtr = ptrOffset(walkerPtrWithScratch, (inlineOffset + scratchInlineOffset));
|
||||
std::memcpy(&scratchInlineValue, scratchInlinePtr, sizeof(scratchInlineValue));
|
||||
EXPECT_EQ(fullScratchAddress, scratchInlineValue);
|
||||
|
||||
scratchInlinePtr = ptrOffset(walkerPtrWithoutScratch, (inlineOffset + scratchInlineOffset));
|
||||
std::memcpy(&scratchInlineValue, scratchInlinePtr, sizeof(scratchInlineValue));
|
||||
EXPECT_EQ(0u, scratchInlineValue);
|
||||
|
||||
auto scratch0Allocation = scratchController->getScratchSpaceSlot0Allocation();
|
||||
bool scratchInResidency = ultCsr->isMadeResident(scratch0Allocation);
|
||||
EXPECT_TRUE(scratchInResidency);
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -80,6 +80,8 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
|
||||
using BaseClass::getDcFlushRequired;
|
||||
using BaseClass::getHostPtrAlloc;
|
||||
using BaseClass::getInOrderIncrementValue;
|
||||
using BaseClass::heaplessModeEnabled;
|
||||
using BaseClass::heaplessStateInitEnabled;
|
||||
using BaseClass::hostPtrMap;
|
||||
using BaseClass::immediateCmdListHeapSharing;
|
||||
using BaseClass::indirectAllocationsAllowed;
|
||||
@@ -102,6 +104,7 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
|
||||
using BaseClass::pipelineSelectStateTracking;
|
||||
using BaseClass::requiredStreamState;
|
||||
using BaseClass::requiresQueueUncachedMocs;
|
||||
using BaseClass::scratchAddressPatchingEnabled;
|
||||
using BaseClass::setupTimestampEventForMultiTile;
|
||||
using BaseClass::signalAllEventPackets;
|
||||
using BaseClass::stateBaseAddressTracking;
|
||||
@@ -273,6 +276,8 @@ struct WhiteBox<::L0::CommandListImp> : public ::L0::CommandListImp {
|
||||
using BaseClass::finalStreamState;
|
||||
using BaseClass::frontEndStateTracking;
|
||||
using BaseClass::getDcFlushRequired;
|
||||
using BaseClass::heaplessModeEnabled;
|
||||
using BaseClass::heaplessStateInitEnabled;
|
||||
using BaseClass::immediateCmdListHeapSharing;
|
||||
using BaseClass::initialize;
|
||||
using BaseClass::isFlushTaskSubmissionEnabled;
|
||||
@@ -283,6 +288,7 @@ struct WhiteBox<::L0::CommandListImp> : public ::L0::CommandListImp {
|
||||
using BaseClass::pipelineSelectStateTracking;
|
||||
using BaseClass::requiredStreamState;
|
||||
using BaseClass::requiresQueueUncachedMocs;
|
||||
using BaseClass::scratchAddressPatchingEnabled;
|
||||
using BaseClass::signalAllEventPackets;
|
||||
using BaseClass::stateBaseAddressTracking;
|
||||
using BaseClass::stateComputeModeTracking;
|
||||
|
||||
@@ -39,6 +39,8 @@ struct WhiteBox<::L0::CommandQueue> : public ::L0::CommandQueueImp {
|
||||
using CommandQueue::dispatchCmdListBatchBufferAsPrimary;
|
||||
using CommandQueue::doubleSbaWa;
|
||||
using CommandQueue::frontEndStateTracking;
|
||||
using CommandQueue::heaplessModeEnabled;
|
||||
using CommandQueue::heaplessStateInitEnabled;
|
||||
using CommandQueue::internalQueueForImmediateCommandList;
|
||||
using CommandQueue::internalUsage;
|
||||
using CommandQueue::partitionCount;
|
||||
|
||||
@@ -3000,6 +3000,8 @@ TEST_F(CommandListCreate, givenCreatedCommandListWhenGettingTrackingFlagsThenDef
|
||||
|
||||
auto expectedDispatchCmdListBatchBufferAsPrimary = L0GfxCoreHelper::dispatchCmdListBatchBufferAsPrimary(rootDeviceEnvironment, true);
|
||||
EXPECT_EQ(expectedDispatchCmdListBatchBufferAsPrimary, commandList->getCmdListBatchBufferFlag());
|
||||
|
||||
EXPECT_FALSE(commandList->scratchAddressPatchingEnabled);
|
||||
}
|
||||
|
||||
TEST(BuiltinTypeHelperTest, givenNonStatelessAndNonHeaplessWhenAdjustBuiltinTypeIsCalledThenCorrectBuiltinTypeIsReturned) {
|
||||
|
||||
@@ -1552,5 +1552,32 @@ HWTEST_F(CommandListCreate, givenDeviceWhenCreatingCommandListForNotInternalUsag
|
||||
EXPECT_FALSE(whiteboxCommandList->internalUsage);
|
||||
whiteboxCommandList->destroy();
|
||||
}
|
||||
|
||||
using CommandListScratchPatchPrivateHeapsTest = Test<CommandListScratchPatchFixture<0, 0>>;
|
||||
using CommandListScratchPatchGlobalStatelessHeapsTest = Test<CommandListScratchPatchFixture<1, 0>>;
|
||||
|
||||
using CommandListScratchPatchPrivateHeapsStateInitTest = Test<CommandListScratchPatchFixture<0, 1>>;
|
||||
using CommandListScratchPatchGlobalStatelessHeapsStateInitTest = Test<CommandListScratchPatchFixture<1, 1>>;
|
||||
|
||||
HWTEST2_F(CommandListScratchPatchPrivateHeapsTest,
|
||||
givenHeaplessWithScratchPatchEnabledOnRegularCmdListWhenAppendingAndExecutingKernelWithScratchThenExpectCorrectAddressPatched, IsAtLeastXeHpcCore) {
|
||||
testScratchInline<FamilyType>(false);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListScratchPatchGlobalStatelessHeapsTest,
|
||||
givenHeaplessWithScratchPatchEnabledOnRegularCmdListWhenAppendingAndExecutingKernelWithScratchThenExpectCorrectAddressPatched, IsAtLeastXeHpcCore) {
|
||||
testScratchInline<FamilyType>(false);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListScratchPatchPrivateHeapsStateInitTest,
|
||||
givenHeaplessWithScratchPatchEnabledOnRegularCmdListWhenAppendingAndExecutingKernelWithScratchThenExpectCorrectAddressPatched, IsAtLeastXeHpcCore) {
|
||||
testScratchInline<FamilyType>(false);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListScratchPatchGlobalStatelessHeapsStateInitTest,
|
||||
givenHeaplessWithScratchPatchEnabledOnRegularCmdListWhenAppendingAndExecutingKernelWithScratchThenExpectCorrectAddressPatched, IsAtLeastXeHpcCore) {
|
||||
testScratchInline<FamilyType>(false);
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -214,6 +214,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenA
|
||||
commandList->getDcFlushRequired(true), // dcFlushEnable
|
||||
false, // isHeaplessModeEnabled
|
||||
false, // interruptEvent
|
||||
false, // immediateScratchAddressPatching
|
||||
};
|
||||
NEO::EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(commandContainer, dispatchKernelArgs);
|
||||
|
||||
|
||||
@@ -711,6 +711,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenA
|
||||
commandList->getDcFlushRequired(true), // dcFlushEnable
|
||||
false, // isHeaplessModeEnabled
|
||||
false, // interruptEvent
|
||||
false, // immediateScratchAddressPatching
|
||||
};
|
||||
EXPECT_THROW(NEO::EncodeDispatchKernel<FamilyType>::template encode<DefaultWalkerType>(commandContainer, dispatchKernelArgs), std::exception);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user