Add initial support for KernelArgsBuffer allocation

Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2022-08-03 11:54:08 +00:00
committed by Compute-Runtime-Automation
parent d3796b2b2d
commit 98d776867f
42 changed files with 147 additions and 40 deletions

View File

@@ -282,6 +282,8 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
: NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(hwInfo);
}
linearStreamSizeEstimate += NEO::EncodeKernelArgsBuffer<GfxFamily>::getKernelArgsBufferCmdsSize(csr->getKernelArgsBufferAllocation(), csr->getLogicalStateHelper());
size_t alignedSize = alignUp<size_t>(linearStreamSizeEstimate, minCmdBufferPtrAlign);
size_t padding = alignedSize - linearStreamSizeEstimate;
@@ -370,6 +372,12 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
csrHw->programActivePartitionConfig(child);
}
NEO::EncodeKernelArgsBuffer<GfxFamily>::encodeKernelArgsBufferCmds(csr->getKernelArgsBufferAllocation(), csr->getLogicalStateHelper());
if (csr->getKernelArgsBufferAllocation()) {
csr->makeResident(*csr->getKernelArgsBufferAllocation());
}
if (csr->getLogicalStateHelper()) {
if (frontEndStateDirty && !isCopyOnlyCommandQueue) {
programFrontEnd(scratchSpaceController->getScratchPatchAddress(), scratchSpaceController->getPerThreadScratchSpaceSize(), child);

View File

@@ -68,8 +68,7 @@ void CommandQueueHw<gfxCoreFamily>::programStateBaseAddress(uint64_t gsba, bool
false,
NEO::MemoryCompressionState::NotApplicable,
false,
1u,
nullptr);
1u);
*sbaCmdBuf = sbaCmd;
csr->setGSBAStateDirty(false);

View File

@@ -51,8 +51,7 @@ void CommandQueueHw<gfxCoreFamily>::programStateBaseAddress(uint64_t gsba, bool
multiOsContextCapable,
NEO::MemoryCompressionState::NotApplicable,
false,
1u,
nullptr);
1u);
*sbaCmdBuf = sbaCmd;
auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily);

View File

@@ -251,6 +251,7 @@ using CommandQueueCommandsMultiTile = CommandQueueCommands<true>;
HWTEST_F(CommandQueueCommandsSingleTile, givenCommandQueueWhenExecutingCommandListsThenHardwareContextIsProgrammedAndGlobalAllocationResident) {
MockCsrHw2<FamilyType> csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
csr.createKernelArgsBufferAllocation();
csr.initializeTagAllocation();
csr.setupContext(*neoDevice->getDefaultEngine().osContext);
@@ -277,6 +278,34 @@ HWTEST_F(CommandQueueCommandsSingleTile, givenCommandQueueWhenExecutingCommandLi
commandQueue->destroy();
}
HWTEST_F(CommandQueueCommandsSingleTile, givenCommandQueueWhenExecutingCommandListsThenKernelArgBufferAllocationIsResident) {
MockCsrHw2<FamilyType> csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
csr.createKernelArgsBufferAllocation();
csr.initializeTagAllocation();
csr.setupContext(*neoDevice->getDefaultEngine().osContext);
ze_result_t returnValue;
L0::CommandQueue *commandQueue = CommandQueue::create(productFamily,
device,
&csr,
&desc,
true,
false,
returnValue);
ASSERT_NE(nullptr, commandQueue);
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Copy, 0u, returnValue));
auto commandListHandle = commandList->toHandle();
auto status = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
auto kernelArgsBufferAllocation = csr.getKernelArgsBufferAllocation();
if (kernelArgsBufferAllocation) {
EXPECT_TRUE(isAllocationInResidencyContainer(csr, kernelArgsBufferAllocation));
}
EXPECT_EQ(status, ZE_RESULT_SUCCESS);
commandQueue->destroy();
}
HWTEST2_F(CommandQueueCommandsMultiTile, givenCommandQueueOnMultiTileWhenExecutingCommandListsThenWorkPartitionAllocationIsMadeResident, IsAtLeastXeHpCore) {
DebugManagerStateRestore restorer;
DebugManager.flags.EnableWalkerPartition.set(1);
@@ -296,6 +325,7 @@ HWTEST2_F(CommandQueueCommandsMultiTile, givenCommandQueueOnMultiTileWhenExecuti
MyCsrMock csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
EXPECT_EQ(2u, csr.activePartitions);
csr.initializeTagAllocation();
csr.createKernelArgsBufferAllocation();
csr.createWorkPartitionAllocation(*neoDevice);
csr.setupContext(*neoDevice->getDefaultEngine().osContext);
@@ -352,6 +382,7 @@ HWTEST_F(CommandQueueIndirectAllocations, givenCommandQueueWhenExecutingCommandL
MockCsrHw2<FamilyType> csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
csr.initializeTagAllocation();
csr.createKernelArgsBufferAllocation();
csr.setupContext(*neoDevice->getDefaultEngine().osContext);
if (device->getNEODevice()->getPreemptionMode() == PreemptionMode::MidThread || device->getNEODevice()->isDebuggerActive()) {
csr.createPreemptionAllocation();
@@ -416,6 +447,7 @@ HWTEST_F(CommandQueueIndirectAllocations, givenDebugModeToTreatIndirectAllocatio
MockCsrHw2<FamilyType> csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
csr.initializeTagAllocation();
csr.createKernelArgsBufferAllocation();
csr.setupContext(*neoDevice->getDefaultEngine().osContext);
if (device->getNEODevice()->getPreemptionMode() == PreemptionMode::MidThread || device->getNEODevice()->isDebuggerActive()) {
csr.createPreemptionAllocation();
@@ -479,6 +511,7 @@ HWTEST_F(CommandQueueIndirectAllocations, givenDeviceThatSupportsSubmittingIndir
MockCsrHw2<FamilyType> csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
csr.initializeTagAllocation();
csr.createKernelArgsBufferAllocation();
csr.setupContext(*neoDevice->getDefaultEngine().osContext);
if (device->getNEODevice()->getPreemptionMode() == PreemptionMode::MidThread || device->getNEODevice()->isDebuggerActive()) {
csr.createPreemptionAllocation();

View File

@@ -520,6 +520,7 @@ HWTEST_F(ContextMakeMemoryResidentAndMigrationTests,
const ze_command_queue_desc_t desc = {};
MockCsrHw2<FamilyType> csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
csr.initializeTagAllocation();
csr.createKernelArgsBufferAllocation();
csr.setupContext(*neoDevice->getDefaultEngine().osContext);
ze_result_t returnValue;
@@ -570,6 +571,7 @@ HWTEST2_F(ContextMakeMemoryResidentAndMigrationTests,
const ze_command_queue_desc_t desc = {};
MockCsrHw2<FamilyType> csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
csr.initializeTagAllocation();
csr.createKernelArgsBufferAllocation();
csr.setupContext(*neoDevice->getDefaultEngine().osContext);
ze_result_t returnValue;
@@ -618,6 +620,7 @@ HWTEST_F(ContextMakeMemoryResidentAndMigrationTests,
const ze_command_queue_desc_t desc = {};
MockCsrHw2<FamilyType> csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
csr.initializeTagAllocation();
csr.createKernelArgsBufferAllocation();
csr.setupContext(*neoDevice->getDefaultEngine().osContext);
ze_result_t returnValue;