mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-09 22:43:00 +08:00
feature: prework to enable memory prefetch
Related-To: NEO-14703 Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
5714d9170c
commit
ff7d0a76cb
@@ -233,6 +233,7 @@ struct CommandListCoreFamily : public CommandListImp {
|
||||
void patchInOrderCmds() override;
|
||||
MOCKABLE_VIRTUAL bool handleCounterBasedEventOperations(Event *signalEvent, bool skipAddingEventToResidency);
|
||||
bool isCbEventBoundToCmdList(Event *event) const;
|
||||
bool kernelMemoryPrefetchEnabled() const override;
|
||||
|
||||
protected:
|
||||
MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyKernelWithGA(void *dstPtr, NEO::GraphicsAllocation *dstPtrAlloc,
|
||||
@@ -420,7 +421,6 @@ struct CommandListCoreFamily : public CommandListImp {
|
||||
virtual uint64_t getPrefetchCmdId() const { return std::numeric_limits<uint64_t>::max(); }
|
||||
virtual uint32_t getIohSizeForPrefetch(const Kernel &kernel, uint32_t reserveExtraSpace) const;
|
||||
virtual void ensureCmdBufferSpaceForPrefetch() {}
|
||||
bool kernelMemoryPrefetchEnabled() const { return NEO::debugManager.flags.EnableMemoryPrefetch.get() == 1; }
|
||||
|
||||
NEO::InOrderPatchCommandsContainer<GfxFamily> inOrderPatchCmds;
|
||||
|
||||
|
||||
@@ -1575,6 +1575,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlit(uintptr_t
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
bool CommandListCoreFamily<gfxCoreFamily>::kernelMemoryPrefetchEnabled() const { return NEO::debugManager.flags.EnableMemoryPrefetch.get() == 1; }
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(AlignedAllocationData *srcAllocationData,
|
||||
AlignedAllocationData *dstAllocationData,
|
||||
|
||||
@@ -48,6 +48,7 @@ struct CommandListImp : public CommandList {
|
||||
NEO::SynchronizedDispatchMode getSynchronizedDispatchMode() const { return synchronizedDispatchMode; }
|
||||
void enableCopyOperationOffload();
|
||||
void setInterruptEventsCsr(NEO::CommandStreamReceiver &csr);
|
||||
virtual bool kernelMemoryPrefetchEnabled() const = 0;
|
||||
|
||||
protected:
|
||||
std::shared_ptr<NEO::InOrderExecInfo> inOrderExecInfo;
|
||||
|
||||
@@ -355,6 +355,8 @@ struct MockCommandList : public CommandList {
|
||||
ADDMETHOD_NOBASE(destroy, ze_result_t, ZE_RESULT_SUCCESS, ());
|
||||
ADDMETHOD_NOBASE_VOIDRETURN(patchInOrderCmds, (void));
|
||||
|
||||
ADDMETHOD_CONST_NOBASE(kernelMemoryPrefetchEnabled, bool, false, (void));
|
||||
|
||||
ADDMETHOD_NOBASE(appendLaunchKernel, ze_result_t, ZE_RESULT_SUCCESS,
|
||||
(ze_kernel_handle_t kernelHandle,
|
||||
const ze_group_count_t &threadGroupDimensions,
|
||||
|
||||
@@ -2143,7 +2143,10 @@ HWTEST2_F(CommandListCreateTests, givenInOrderExecutionWhenDispatchingRelaxedOrd
|
||||
cmdStream->getUsed() - offset));
|
||||
|
||||
// init registers
|
||||
auto lrrCmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*genCmdList.begin());
|
||||
auto iter = genCmdList.begin();
|
||||
UnitTestHelper<FamilyType>::skipStatePrefetch(iter);
|
||||
|
||||
auto lrrCmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*iter);
|
||||
ASSERT_NE(nullptr, lrrCmd);
|
||||
lrrCmd++;
|
||||
lrrCmd++;
|
||||
|
||||
@@ -3446,7 +3446,14 @@ HWTEST2_F(CommandListStateBaseAddressPrivateHeapTest,
|
||||
if (ssh) {
|
||||
EXPECT_EQ(oldGfxHeapAllocation, ssh->getGraphicsAllocation());
|
||||
}
|
||||
EXPECT_EQ(usedBefore, cmdListStream.getUsed());
|
||||
|
||||
size_t prefetchSize = 0;
|
||||
if (commandList->kernelMemoryPrefetchEnabled()) {
|
||||
prefetchSize = NEO::EncodeMemoryPrefetch<FamilyType>::getSizeForMemoryPrefetch(kernel->getIndirectSize(), device->getNEODevice()->getRootDeviceEnvironment()) +
|
||||
NEO::EncodeMemoryPrefetch<FamilyType>::getSizeForMemoryPrefetch(kernel->getImmutableData()->getIsaSize(), device->getNEODevice()->getRootDeviceEnvironment());
|
||||
}
|
||||
|
||||
EXPECT_EQ(usedBefore + prefetchSize, cmdListStream.getUsed());
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
|
||||
@@ -882,6 +882,7 @@ HWTEST2_F(FrontEndMultiReturnCommandListTest, givenFrontEndTrackingIsUsedWhenPro
|
||||
}
|
||||
|
||||
HWTEST2_F(FrontEndMultiReturnCommandListTest, givenFrontEndTrackingIsUsedWhenPropertyComputeDispatchAllWalkerSupportedThenExpectReturnPointsAndBbEndProgramming, FrontEndMultiReturnMatcher) {
|
||||
debugManager.flags.EnableMemoryPrefetch.set(0);
|
||||
using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
|
||||
NEO::FrontEndPropertiesSupport fePropertiesSupport = {};
|
||||
auto &productHelper = device->getProductHelper();
|
||||
@@ -1296,6 +1297,7 @@ HWTEST2_F(FrontEndMultiReturnCommandListTest,
|
||||
|
||||
HWTEST2_F(FrontEndMultiReturnCommandListTest,
|
||||
givenFrontEndTrackingCmdListIsExecutedWhenPropertyComputeDispatchAllWalkerSupportedThenExpectFrontEndProgrammingInCmdQueue, FrontEndMultiReturnMatcher) {
|
||||
debugManager.flags.EnableMemoryPrefetch.set(0);
|
||||
using FrontEndStateCommand = typename FamilyType::FrontEndStateCommand;
|
||||
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
|
||||
using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
|
||||
|
||||
@@ -2741,12 +2741,14 @@ HWTEST2_F(CommandListAppendLaunchKernel,
|
||||
ASSERT_NE(0u, storeRegMemList.size());
|
||||
ASSERT_NE(0u, outStoreRegMemCmdList.size());
|
||||
|
||||
ASSERT_EQ(storeRegMemList.size(), outStoreRegMemCmdList.size());
|
||||
size_t additionalPatchCmdsSize = commandList->kernelMemoryPrefetchEnabled() ? 1 : 0;
|
||||
|
||||
ASSERT_EQ(storeRegMemList.size(), outStoreRegMemCmdList.size() - additionalPatchCmdsSize);
|
||||
|
||||
for (size_t i = 0; i < storeRegMemList.size(); i++) {
|
||||
MI_STORE_REGISTER_MEM *storeRegMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*storeRegMemList[i]);
|
||||
|
||||
auto &cmdToPatch = outStoreRegMemCmdList[i];
|
||||
auto &cmdToPatch = outStoreRegMemCmdList[i + additionalPatchCmdsSize];
|
||||
EXPECT_EQ(CommandToPatch::TimestampEventPostSyncStoreRegMem, cmdToPatch.type);
|
||||
MI_STORE_REGISTER_MEM *outStoreRegMem = genCmdCast<MI_STORE_REGISTER_MEM *>(cmdToPatch.pDestination);
|
||||
ASSERT_NE(nullptr, outStoreRegMem);
|
||||
@@ -2804,7 +2806,9 @@ HWTEST2_F(CommandListAppendLaunchKernel,
|
||||
ptrOffset(cmdStream->getCpuBase(), commandStreamOffset),
|
||||
cmdStream->getUsed() - commandStreamOffset));
|
||||
|
||||
ASSERT_EQ(0u, outCbEventCmds.size());
|
||||
size_t additionalPatchCmdsSize = commandList->kernelMemoryPrefetchEnabled() ? 1 : 0;
|
||||
|
||||
ASSERT_EQ(additionalPatchCmdsSize, outCbEventCmds.size());
|
||||
auto eventBaseAddress = event->getGpuAddress(device);
|
||||
|
||||
WalkerVariant walkerVariant = NEO::UnitTestHelper<FamilyType>::getWalkerVariant(launchParams.outWalker);
|
||||
@@ -2878,8 +2882,9 @@ HWTEST2_F(CommandListAppendLaunchKernel,
|
||||
auto inOrderAllocation = event->getInOrderExecInfo()->getDeviceCounterAllocation();
|
||||
|
||||
size_t expectedLoadRegImmCount = FamilyType::isQwordInOrderCounter ? 2 : 0;
|
||||
size_t additionalPatchCmdsSize = commandList->kernelMemoryPrefetchEnabled() ? 1 : 0;
|
||||
|
||||
size_t expectedWaitCmds = 1 + expectedLoadRegImmCount;
|
||||
size_t expectedWaitCmds = 1 + expectedLoadRegImmCount + additionalPatchCmdsSize;
|
||||
ASSERT_EQ(expectedWaitCmds, outCbWaitEventCmds.size());
|
||||
|
||||
auto loadRegImmList = findAll<MI_LOAD_REGISTER_IMM *>(cmdList.begin(), cmdList.end());
|
||||
@@ -2889,27 +2894,31 @@ HWTEST2_F(CommandListAppendLaunchKernel,
|
||||
|
||||
size_t outCbWaitEventCmdsIndex = 0;
|
||||
for (; outCbWaitEventCmdsIndex < expectedLoadRegImmCount; outCbWaitEventCmdsIndex++) {
|
||||
EXPECT_EQ(CommandToPatch::CbWaitEventLoadRegisterImm, outCbWaitEventCmds[outCbWaitEventCmdsIndex].type);
|
||||
ASSERT_NE(nullptr, outCbWaitEventCmds[outCbWaitEventCmdsIndex].pDestination);
|
||||
ASSERT_EQ(*loadRegImmList[outCbWaitEventCmdsIndex], outCbWaitEventCmds[outCbWaitEventCmdsIndex].pDestination);
|
||||
auto loadRegImmCmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(outCbWaitEventCmds[outCbWaitEventCmdsIndex].pDestination);
|
||||
auto &cmd = outCbWaitEventCmds[outCbWaitEventCmdsIndex + additionalPatchCmdsSize];
|
||||
|
||||
EXPECT_EQ(CommandToPatch::CbWaitEventLoadRegisterImm, cmd.type);
|
||||
ASSERT_NE(nullptr, cmd.pDestination);
|
||||
ASSERT_EQ(*loadRegImmList[outCbWaitEventCmdsIndex], cmd.pDestination);
|
||||
auto loadRegImmCmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(cmd.pDestination);
|
||||
ASSERT_NE(nullptr, loadRegImmCmd);
|
||||
EXPECT_EQ(0u, outCbWaitEventCmds[outCbWaitEventCmdsIndex].inOrderPatchListIndex);
|
||||
EXPECT_EQ(0u, cmd.inOrderPatchListIndex);
|
||||
auto registerNumber = 0x2600 + (4 * outCbWaitEventCmdsIndex);
|
||||
EXPECT_EQ(registerNumber, outCbWaitEventCmds[outCbWaitEventCmdsIndex].offset);
|
||||
EXPECT_EQ(registerNumber, cmd.offset);
|
||||
}
|
||||
|
||||
EXPECT_EQ(CommandToPatch::CbWaitEventSemaphoreWait, outCbWaitEventCmds[outCbWaitEventCmdsIndex].type);
|
||||
ASSERT_NE(nullptr, outCbWaitEventCmds[outCbWaitEventCmdsIndex].pDestination);
|
||||
ASSERT_EQ(*semaphoreWaitList[0], outCbWaitEventCmds[outCbWaitEventCmdsIndex].pDestination);
|
||||
auto semaphoreWaitCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(outCbWaitEventCmds[outCbWaitEventCmdsIndex].pDestination);
|
||||
auto &cmd = outCbWaitEventCmds[outCbWaitEventCmdsIndex + additionalPatchCmdsSize];
|
||||
|
||||
EXPECT_EQ(CommandToPatch::CbWaitEventSemaphoreWait, cmd.type);
|
||||
ASSERT_NE(nullptr, cmd.pDestination);
|
||||
ASSERT_EQ(*semaphoreWaitList[0], cmd.pDestination);
|
||||
auto semaphoreWaitCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(cmd.pDestination);
|
||||
ASSERT_NE(nullptr, semaphoreWaitCmd);
|
||||
EXPECT_EQ(eventCompletionAddress + outCbWaitEventCmds[outCbWaitEventCmdsIndex].offset, semaphoreWaitCmd->getSemaphoreGraphicsAddress());
|
||||
EXPECT_EQ(eventCompletionAddress + cmd.offset, semaphoreWaitCmd->getSemaphoreGraphicsAddress());
|
||||
|
||||
if (FamilyType::isQwordInOrderCounter) {
|
||||
EXPECT_EQ(std::numeric_limits<size_t>::max(), outCbWaitEventCmds[outCbWaitEventCmdsIndex].inOrderPatchListIndex);
|
||||
EXPECT_EQ(std::numeric_limits<size_t>::max(), cmd.inOrderPatchListIndex);
|
||||
} else {
|
||||
EXPECT_EQ(0u, outCbWaitEventCmds[outCbWaitEventCmdsIndex].inOrderPatchListIndex);
|
||||
EXPECT_EQ(0u, cmd.inOrderPatchListIndex);
|
||||
}
|
||||
|
||||
auto &residencyContainer = commandContainer.getResidencyContainer();
|
||||
|
||||
@@ -6330,24 +6330,29 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, InOrderCmdListTests, givenInOrderModeAndNoopWaitEve
|
||||
|
||||
size_t expectedLoadRegImmCount = FamilyType::isQwordInOrderCounter ? 2 : 0;
|
||||
|
||||
size_t expectedWaitCmds = 1 + expectedLoadRegImmCount;
|
||||
size_t additionalPatchCmdsSize = regularCmdList->kernelMemoryPrefetchEnabled() ? 1 : 0;
|
||||
size_t expectedWaitCmds = 1 + expectedLoadRegImmCount + additionalPatchCmdsSize;
|
||||
ASSERT_EQ(expectedWaitCmds, outCbWaitEventCmds.size());
|
||||
|
||||
size_t outCbWaitEventCmdsIndex = 0;
|
||||
for (; outCbWaitEventCmdsIndex < expectedLoadRegImmCount; outCbWaitEventCmdsIndex++) {
|
||||
EXPECT_EQ(CommandToPatch::CbWaitEventLoadRegisterImm, outCbWaitEventCmds[outCbWaitEventCmdsIndex].type);
|
||||
auto registerNumber = 0x2600 + (4 * outCbWaitEventCmdsIndex);
|
||||
EXPECT_EQ(registerNumber, outCbWaitEventCmds[outCbWaitEventCmdsIndex].offset);
|
||||
auto &cmd = outCbWaitEventCmds[outCbWaitEventCmdsIndex + additionalPatchCmdsSize];
|
||||
|
||||
ASSERT_NE(nullptr, outCbWaitEventCmds[outCbWaitEventCmdsIndex].pDestination);
|
||||
auto memCmpRet = memcmp(outCbWaitEventCmds[outCbWaitEventCmdsIndex].pDestination, noopedLriBuffer, sizeof(MI_LOAD_REGISTER_IMM));
|
||||
EXPECT_EQ(CommandToPatch::CbWaitEventLoadRegisterImm, cmd.type);
|
||||
auto registerNumber = 0x2600 + (4 * outCbWaitEventCmdsIndex);
|
||||
EXPECT_EQ(registerNumber, cmd.offset);
|
||||
|
||||
ASSERT_NE(nullptr, cmd.pDestination);
|
||||
auto memCmpRet = memcmp(cmd.pDestination, noopedLriBuffer, sizeof(MI_LOAD_REGISTER_IMM));
|
||||
EXPECT_EQ(0, memCmpRet);
|
||||
}
|
||||
|
||||
EXPECT_EQ(CommandToPatch::CbWaitEventSemaphoreWait, outCbWaitEventCmds[outCbWaitEventCmdsIndex].type);
|
||||
auto &cmd = outCbWaitEventCmds[outCbWaitEventCmdsIndex + additionalPatchCmdsSize];
|
||||
|
||||
ASSERT_NE(nullptr, outCbWaitEventCmds[outCbWaitEventCmdsIndex].pDestination);
|
||||
auto memCmpRet = memcmp(outCbWaitEventCmds[outCbWaitEventCmdsIndex].pDestination, noopedSemWaitBuffer, sizeof(MI_SEMAPHORE_WAIT));
|
||||
EXPECT_EQ(CommandToPatch::CbWaitEventSemaphoreWait, cmd.type);
|
||||
|
||||
ASSERT_NE(nullptr, cmd.pDestination);
|
||||
auto memCmpRet = memcmp(cmd.pDestination, noopedSemWaitBuffer, sizeof(MI_SEMAPHORE_WAIT));
|
||||
EXPECT_EQ(0, memCmpRet);
|
||||
}
|
||||
|
||||
|
||||
@@ -2908,6 +2908,9 @@ HWTEST2_F(MultiTileSynchronizedDispatchTests, givenFullSyncDispatchWhenAppending
|
||||
}
|
||||
|
||||
auto itor = cmdList.begin();
|
||||
|
||||
UnitTestHelper<FamilyType>::skipStatePrefetch(itor);
|
||||
|
||||
if (hasDependencySemaphore) {
|
||||
auto nPartition = std::min(immCmdList->inOrderExecInfo->getNumDevicePartitionsToWait(), partitionCount);
|
||||
for (uint32_t i = 0; i < nPartition; i++) {
|
||||
@@ -3527,6 +3530,7 @@ HWTEST2_F(MultiTileInOrderCmdListTests, givenAtomicSignallingEnabledWhenWaitingF
|
||||
ASSERT_EQ(2u + (ImplicitScalingDispatch<FamilyType>::getPipeControlStallRequired() ? 1 : 0), semaphores.size());
|
||||
|
||||
auto itor = cmdList.begin();
|
||||
UnitTestHelper<FamilyType>::skipStatePrefetch(itor);
|
||||
|
||||
// implicit dependency
|
||||
auto gpuAddress = immCmdList2->inOrderExecInfo->getBaseDeviceAddress();
|
||||
@@ -3582,6 +3586,8 @@ HWTEST2_F(MultiTileInOrderCmdListTests, givenMultiTileInOrderModeWhenProgramming
|
||||
(cmdStream->getUsed() - offset)));
|
||||
|
||||
auto itor = cmdList.begin();
|
||||
UnitTestHelper<FamilyType>::skipStatePrefetch(itor);
|
||||
|
||||
if (immCmdList->isQwordInOrderCounter()) {
|
||||
std::advance(itor, 2);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user