feature: enable scratch address patching on regular command lists

Related-To: NEO-10381

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2024-04-05 12:36:43 +00:00
committed by Compute-Runtime-Automation
parent a468827c12
commit 1f35eaf7d8
6 changed files with 69 additions and 10 deletions

View File

@@ -242,6 +242,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
this->finalStreamState.initSupport(rootDeviceEnvironment);
this->duplicatedInOrderCounterStorageEnabled = gfxCoreHelper.duplicatedInOrderCounterStorageEnabled(rootDeviceEnvironment);
this->inOrderAtomicSignalingEnabled = gfxCoreHelper.inOrderAtomicSignallingEnabled(rootDeviceEnvironment);
this->scratchAddressPatchingEnabled = (this->heaplessModeEnabled && !isImmediateType());
this->commandContainer.doubleSbaWaRef() = this->doubleSbaWa;
this->commandContainer.l1CachePolicyDataRef() = &this->l1CachePolicyData;

View File

@@ -581,13 +581,13 @@ void CommandQueueThreadArbitrationPolicyFixture::tearDown() {
L0::globalDriver = nullptr;
}
void CommandListScratchPatchFixtureInit::setUpParams(int32_t globalStatelessMode, int32_t heaplessStateInitEnabled) {
void CommandListScratchPatchFixtureInit::setUpParams(int32_t globalStatelessMode, int32_t heaplessStateInitEnabled, bool scratchAddressPatchingEnabled) {
fixtureGlobalStatelessMode = globalStatelessMode;
debugManager.flags.SelectCmdListHeapAddressModel.set(globalStatelessMode);
ModuleMutableCommandListFixture::setUp();
commandList->scratchAddressPatchingEnabled = true;
commandList->scratchAddressPatchingEnabled = scratchAddressPatchingEnabled;
commandList->heaplessModeEnabled = true;
commandList->heaplessStateInitEnabled = !!heaplessStateInitEnabled;

View File

@@ -363,7 +363,7 @@ struct CommandQueueThreadArbitrationPolicyFixture {
};
struct CommandListScratchPatchFixtureInit : public ModuleMutableCommandListFixture {
void setUpParams(int32_t globalStatelessMode, int32_t heaplessStateInitEnabled);
void setUpParams(int32_t globalStatelessMode, int32_t heaplessStateInitEnabled, bool scratchAddressPatchingEnabled);
void tearDown();
uint64_t getSurfStateGpuBase(bool useImmediate);
@@ -377,15 +377,18 @@ struct CommandListScratchPatchFixtureInit : public ModuleMutableCommandListFixtu
template <typename FamilyType>
void testScratchSameNotPatching();
template <typename FamilyType>
void testScratchImmediatePatching();
int32_t fixtureGlobalStatelessMode = 0;
uint32_t scratchInlineOffset = 8;
uint32_t scratchInlinePointerSize = sizeof(uint64_t);
};
template <int32_t globalStatelessMode, int32_t heaplessStateInitEnabled>
template <int32_t globalStatelessMode, int32_t heaplessStateInitEnabled, bool scratchAddressPatchingEnabled>
struct CommandListScratchPatchFixture : public CommandListScratchPatchFixtureInit {
void setUp() {
CommandListScratchPatchFixtureInit::setUpParams(globalStatelessMode, heaplessStateInitEnabled);
CommandListScratchPatchFixtureInit::setUpParams(globalStatelessMode, heaplessStateInitEnabled, scratchAddressPatchingEnabled);
}
};

View File

@@ -1640,5 +1640,60 @@ void CommandListScratchPatchFixtureInit::testScratchSameNotPatching() {
EXPECT_EQ(0u, scratchInlineValue);
}
template <typename FamilyType>
void CommandListScratchPatchFixtureInit::testScratchImmediatePatching() {
auto csr = device->getNEODevice()->getDefaultEngine().commandStreamReceiver;
auto scratchController = csr->getScratchSpaceController();
NEO::EncodeDispatchKernelArgs dispatchKernelArgs = {};
dispatchKernelArgs.isHeaplessModeEnabled = true;
size_t inlineOffset = NEO::EncodeDispatchKernel<FamilyType>::getInlineDataOffset(dispatchKernelArgs);
uint64_t surfaceHeapGpuBase = getSurfStateGpuBase(false);
auto cmdListStream = commandList->commandContainer.getCommandStream();
const ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
auto result = ZE_RESULT_SUCCESS;
size_t usedBefore = cmdListStream->getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
size_t usedAfter = cmdListStream->getUsed();
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
cmdList,
ptrOffset(cmdListStream->getCpuBase(), usedBefore),
usedAfter - usedBefore));
auto walkerIterator = NEO::UnitTestHelper<FamilyType>::findWalkerCmd(cmdList.begin(), cmdList.end(), true);
ASSERT_NE(cmdList.end(), walkerIterator);
void *walkerPtrWithScratch = *walkerIterator;
result = commandList->close();
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto scratchAddress = scratchController->getScratchPatchAddress();
auto fullScratchAddress = surfaceHeapGpuBase + scratchAddress;
uint64_t scratchInlineValue = 0;
void *scratchInlinePtr = ptrOffset(walkerPtrWithScratch, (inlineOffset + scratchInlineOffset));
std::memcpy(&scratchInlineValue, scratchInlinePtr, sizeof(scratchInlineValue));
EXPECT_EQ(fullScratchAddress, scratchInlineValue);
memset(scratchInlinePtr, 0, scratchInlinePointerSize);
auto commandListHandle = commandList->toHandle();
result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false, nullptr, 0, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
std::memcpy(&scratchInlineValue, scratchInlinePtr, sizeof(scratchInlineValue));
EXPECT_EQ(0u, scratchInlineValue);
}
} // namespace ult
} // namespace L0

View File

@@ -3001,7 +3001,7 @@ TEST_F(CommandListCreate, givenCreatedCommandListWhenGettingTrackingFlagsThenDef
auto expectedDispatchCmdListBatchBufferAsPrimary = L0GfxCoreHelper::dispatchCmdListBatchBufferAsPrimary(rootDeviceEnvironment, true);
EXPECT_EQ(expectedDispatchCmdListBatchBufferAsPrimary, commandList->getCmdListBatchBufferFlag());
EXPECT_FALSE(commandList->scratchAddressPatchingEnabled);
EXPECT_EQ(commandList->heaplessModeEnabled, commandList->scratchAddressPatchingEnabled);
}
TEST(BuiltinTypeHelperTest, givenNonStatelessAndNonHeaplessWhenAdjustBuiltinTypeIsCalledThenCorrectBuiltinTypeIsReturned) {

View File

@@ -1553,11 +1553,11 @@ HWTEST_F(CommandListCreate, givenDeviceWhenCreatingCommandListForNotInternalUsag
whiteboxCommandList->destroy();
}
using CommandListScratchPatchPrivateHeapsTest = Test<CommandListScratchPatchFixture<0, 0>>;
using CommandListScratchPatchGlobalStatelessHeapsTest = Test<CommandListScratchPatchFixture<1, 0>>;
using CommandListScratchPatchPrivateHeapsTest = Test<CommandListScratchPatchFixture<0, 0, true>>;
using CommandListScratchPatchGlobalStatelessHeapsTest = Test<CommandListScratchPatchFixture<1, 0, true>>;
using CommandListScratchPatchPrivateHeapsStateInitTest = Test<CommandListScratchPatchFixture<0, 1>>;
using CommandListScratchPatchGlobalStatelessHeapsStateInitTest = Test<CommandListScratchPatchFixture<1, 1>>;
using CommandListScratchPatchPrivateHeapsStateInitTest = Test<CommandListScratchPatchFixture<0, 1, true>>;
using CommandListScratchPatchGlobalStatelessHeapsStateInitTest = Test<CommandListScratchPatchFixture<1, 1, true>>;
HWTEST2_F(CommandListScratchPatchPrivateHeapsTest,
givenHeaplessWithScratchPatchEnabledOnRegularCmdListWhenAppendingAndExecutingKernelWithScratchThenExpectCorrectAddressPatched, IsAtLeastXeHpcCore) {