mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 23:03:02 +08:00
refactor: Enable CSR heap sharing on Older Generation platforms
Related-To: LOCI-4312 Signed-off-by: Jitendra Sharma <jitendra.sharma@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
5e4ea627f7
commit
8a01619310
@@ -16,9 +16,9 @@ namespace ult {
|
||||
|
||||
using L0GfxCoreHelperTestGen11 = Test<DeviceFixture>;
|
||||
|
||||
GEN11TEST_F(L0GfxCoreHelperTestGen11, GivenGen11WhenCheckingL0HelperForCmdListHeapSharingSupportThenReturnFalse) {
|
||||
GEN11TEST_F(L0GfxCoreHelperTestGen11, GivenGen11WhenCheckingL0HelperForCmdListHeapSharingSupportThenReturnTrue) {
|
||||
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
|
||||
EXPECT_FALSE(l0GfxCoreHelper.platformSupportsCmdListHeapSharing());
|
||||
EXPECT_TRUE(l0GfxCoreHelper.platformSupportsCmdListHeapSharing());
|
||||
}
|
||||
|
||||
GEN11TEST_F(L0GfxCoreHelperTestGen11, GivenGen11WhenCheckingL0HelperForStateComputeModeTrackingSupportThenReturnFalse) {
|
||||
|
||||
@@ -28,10 +28,10 @@ GEN12LPTEST_F(L0GfxCoreHelperTestGen12Lp, GivenGen12LpWhenGetRegsetTypeForLargeG
|
||||
EXPECT_EQ(ZET_DEBUG_REGSET_TYPE_INVALID_INTEL_GPU, l0GfxCoreHelper.getRegsetTypeForLargeGrfDetection());
|
||||
}
|
||||
|
||||
GEN12LPTEST_F(L0GfxCoreHelperTestGen12Lp, GivenGen12LpWhenCheckingL0HelperForCmdListHeapSharingSupportThenReturnFalse) {
|
||||
GEN12LPTEST_F(L0GfxCoreHelperTestGen12Lp, GivenGen12LpWhenCheckingL0HelperForCmdListHeapSharingSupportThenReturnTrue) {
|
||||
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
|
||||
|
||||
EXPECT_FALSE(l0GfxCoreHelper.platformSupportsCmdListHeapSharing());
|
||||
EXPECT_TRUE(l0GfxCoreHelper.platformSupportsCmdListHeapSharing());
|
||||
}
|
||||
|
||||
GEN12LPTEST_F(L0GfxCoreHelperTestGen12Lp, GivenGen12LpWhenCheckingL0HelperForStateComputeModeTrackingSupportThenReturnFalse) {
|
||||
|
||||
@@ -16,9 +16,9 @@ namespace ult {
|
||||
|
||||
using L0GfxCoreHelperTestGen9 = Test<DeviceFixture>;
|
||||
|
||||
GEN9TEST_F(L0GfxCoreHelperTestGen9, GivenGen9WhenCheckingL0HelperForCmdListHeapSharingSupportThenReturnFalse) {
|
||||
GEN9TEST_F(L0GfxCoreHelperTestGen9, GivenGen9WhenCheckingL0HelperForCmdListHeapSharingSupportThenReturnTrue) {
|
||||
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
|
||||
EXPECT_FALSE(l0GfxCoreHelper.platformSupportsCmdListHeapSharing());
|
||||
EXPECT_TRUE(l0GfxCoreHelper.platformSupportsCmdListHeapSharing());
|
||||
}
|
||||
|
||||
GEN9TEST_F(L0GfxCoreHelperTestGen9, GivenGen9WhenCheckingL0HelperForStateComputeModeTrackingSupportThenReturnFalse) {
|
||||
|
||||
@@ -108,6 +108,9 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
|
||||
const CmdListKernelLaunchParams &launchParams) override {
|
||||
|
||||
usedKernelLaunchParams = launchParams;
|
||||
if (launchParams.isKernelSplitOperation && (launchParams.numKernelsExecutedInSplitLaunch == 0)) {
|
||||
firstKernelInSplitOperation = kernel;
|
||||
}
|
||||
appendKernelEventValue = event;
|
||||
return BaseClass::appendLaunchKernelWithParams(kernel, threadGroupDimensions,
|
||||
event, launchParams);
|
||||
@@ -140,6 +143,7 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
|
||||
|
||||
CmdListKernelLaunchParams usedKernelLaunchParams;
|
||||
::L0::Event *appendKernelEventValue = nullptr;
|
||||
::L0::Kernel *firstKernelInSplitOperation = nullptr;
|
||||
ze_event_handle_t appendEventMultipleKernelIndirectEventHandleValue = nullptr;
|
||||
ze_event_handle_t appendEventKernelIndirectEventHandleValue = nullptr;
|
||||
};
|
||||
|
||||
@@ -998,6 +998,130 @@ HWTEST2_F(CommandListTest, givenComputeCommandListWhenMemoryFillRequiresMultiKer
|
||||
context->freeMem(dstBuffer);
|
||||
}
|
||||
|
||||
using IsPlatformSklToDg1 = IsWithinProducts<IGFX_SKYLAKE, IGFX_DG1>;
|
||||
HWTEST2_F(CommandListTest, givenComputeCommandListWhenMemoryCopyInUsmDeviceAllocationThenSplitFlagIsSetAndHeapsEstimationIsProper, IsPlatformSklToDg1) {
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
commandList->isFlushTaskSubmissionEnabled = true;
|
||||
commandList->immediateCmdListHeapSharing = true;
|
||||
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
commandList->commandContainer.setImmediateCmdListCsr(device->getNEODevice()->getDefaultEngine().commandStreamReceiver);
|
||||
|
||||
constexpr size_t size = 4096u;
|
||||
constexpr size_t alignment = 0;
|
||||
void *dstBuffer = nullptr;
|
||||
|
||||
ze_device_mem_alloc_desc_t deviceDesc = {};
|
||||
auto result = context->allocDeviceMem(device->toHandle(),
|
||||
&deviceDesc,
|
||||
size, alignment, &dstBuffer);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
void *srcPtr = reinterpret_cast<void *>(0x1234);
|
||||
|
||||
auto &cmdContainer = commandList->commandContainer;
|
||||
auto csrDshHeap = &device->getNEODevice()->getDefaultEngine().commandStreamReceiver->getIndirectHeap(HeapType::DYNAMIC_STATE, MemoryConstants::pageSize64k);
|
||||
auto csrSshHeap = &device->getNEODevice()->getDefaultEngine().commandStreamReceiver->getIndirectHeap(HeapType::SURFACE_STATE, MemoryConstants::pageSize64k);
|
||||
|
||||
size_t dshUsed = csrDshHeap->getUsed();
|
||||
size_t sshUsed = csrSshHeap->getUsed();
|
||||
|
||||
commandList->appendMemoryCopy(dstBuffer, srcPtr, 0x101, nullptr, 0, nullptr, false, false);
|
||||
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
|
||||
EXPECT_TRUE(commandList->usedKernelLaunchParams.isKernelSplitOperation);
|
||||
EXPECT_FALSE(commandList->usedKernelLaunchParams.isDestinationAllocationInSystemMemory);
|
||||
|
||||
// As numKernelsExecutedInSplitLaunch is incremented after split kernel launch. But we are storing usedKernelLaunchParams before actual split kernel launch.
|
||||
// Hence below comparison tells that actually (usedKernelLaunchParams.numKernelsExecutedInSplitLaunch + 1) split kernels are launched
|
||||
EXPECT_EQ(commandList->usedKernelLaunchParams.numKernelsInSplitLaunch, commandList->usedKernelLaunchParams.numKernelsExecutedInSplitLaunch + 1);
|
||||
|
||||
size_t dshEstimated = NEO::EncodeDispatchKernel<FamilyType>::getSizeRequiredDsh(
|
||||
commandList->firstKernelInSplitOperation->getKernelDescriptor(),
|
||||
cmdContainer.getNumIddPerBlock());
|
||||
size_t sshEstimated = NEO::EncodeDispatchKernel<FamilyType>::getSizeRequiredSsh(*commandList->firstKernelInSplitOperation->getImmutableData()->getKernelInfo());
|
||||
|
||||
auto expectedDshToBeConsumed = dshEstimated * commandList->usedKernelLaunchParams.numKernelsInSplitLaunch;
|
||||
auto expectedSshToBeConsumed = sshEstimated * commandList->usedKernelLaunchParams.numKernelsInSplitLaunch;
|
||||
auto consumedDsh1 = csrDshHeap->getUsed();
|
||||
auto consumedSsh1 = csrSshHeap->getUsed();
|
||||
|
||||
EXPECT_EQ(expectedDshToBeConsumed, (consumedDsh1 - dshUsed));
|
||||
EXPECT_EQ(expectedSshToBeConsumed, (consumedSsh1 - sshUsed));
|
||||
|
||||
context->freeMem(dstBuffer);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListTest, givenComputeCommandListWhenMemoryFillRequiresMultiKernelsThenSplitFlagIsSetAndHeapsEstimationIsProper, IsPlatformSklToDg1) {
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
commandList->isFlushTaskSubmissionEnabled = true;
|
||||
commandList->immediateCmdListHeapSharing = true;
|
||||
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
commandList->commandContainer.setImmediateCmdListCsr(device->getNEODevice()->getDefaultEngine().commandStreamReceiver);
|
||||
|
||||
constexpr size_t patternSize = 8;
|
||||
uint8_t pattern[patternSize] = {1, 2, 3, 4};
|
||||
|
||||
constexpr size_t size = 4096u;
|
||||
constexpr size_t alignment = 4096u;
|
||||
void *dstBuffer = nullptr;
|
||||
|
||||
ze_device_mem_alloc_desc_t deviceDesc = {};
|
||||
auto result = context->allocDeviceMem(device->toHandle(),
|
||||
&deviceDesc,
|
||||
size, alignment, &dstBuffer);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
constexpr size_t fillSize = size - 1;
|
||||
|
||||
auto &cmdContainer = commandList->commandContainer;
|
||||
auto csrDshHeap = &device->getNEODevice()->getDefaultEngine().commandStreamReceiver->getIndirectHeap(HeapType::DYNAMIC_STATE, MemoryConstants::pageSize64k);
|
||||
auto csrSshHeap = &device->getNEODevice()->getDefaultEngine().commandStreamReceiver->getIndirectHeap(HeapType::SURFACE_STATE, MemoryConstants::pageSize64k);
|
||||
|
||||
size_t dshUsed = csrDshHeap->getUsed();
|
||||
size_t sshUsed = csrSshHeap->getUsed();
|
||||
|
||||
commandList->appendMemoryFill(dstBuffer, pattern, patternSize, fillSize, nullptr, 0, nullptr, false);
|
||||
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
|
||||
EXPECT_TRUE(commandList->usedKernelLaunchParams.isKernelSplitOperation);
|
||||
EXPECT_FALSE(commandList->usedKernelLaunchParams.isDestinationAllocationInSystemMemory);
|
||||
|
||||
// As numKernelsExecutedInSplitLaunch is incremented after split kernel launch. But we are storing usedKernelLaunchParams before actual split kernel launch.
|
||||
// Hence below comparison tells that actually (usedKernelLaunchParams.numKernelsExecutedInSplitLaunch + 1) split kernels are launched
|
||||
EXPECT_EQ(commandList->usedKernelLaunchParams.numKernelsInSplitLaunch, commandList->usedKernelLaunchParams.numKernelsExecutedInSplitLaunch + 1);
|
||||
|
||||
size_t dshEstimated = NEO::EncodeDispatchKernel<FamilyType>::getSizeRequiredDsh(
|
||||
commandList->firstKernelInSplitOperation->getKernelDescriptor(),
|
||||
cmdContainer.getNumIddPerBlock());
|
||||
size_t sshEstimated = NEO::EncodeDispatchKernel<FamilyType>::getSizeRequiredSsh(*commandList->firstKernelInSplitOperation->getImmutableData()->getKernelInfo());
|
||||
|
||||
auto expectedDshToBeConsumed = dshEstimated * commandList->usedKernelLaunchParams.numKernelsInSplitLaunch;
|
||||
auto expectedSshToBeConsumed = sshEstimated * commandList->usedKernelLaunchParams.numKernelsInSplitLaunch;
|
||||
auto consumedDsh1 = csrDshHeap->getUsed();
|
||||
auto consumedSsh1 = csrSshHeap->getUsed();
|
||||
|
||||
EXPECT_EQ(expectedDshToBeConsumed, (consumedDsh1 - dshUsed));
|
||||
EXPECT_EQ(expectedSshToBeConsumed, (consumedSsh1 - sshUsed));
|
||||
|
||||
commandList->appendMemoryFill(dstBuffer, pattern, 1, fillSize, nullptr, 0, nullptr, false);
|
||||
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
|
||||
EXPECT_TRUE(commandList->usedKernelLaunchParams.isKernelSplitOperation);
|
||||
EXPECT_FALSE(commandList->usedKernelLaunchParams.isDestinationAllocationInSystemMemory);
|
||||
EXPECT_EQ(commandList->usedKernelLaunchParams.numKernelsInSplitLaunch, commandList->usedKernelLaunchParams.numKernelsExecutedInSplitLaunch + 1);
|
||||
|
||||
dshEstimated = NEO::EncodeDispatchKernel<FamilyType>::getSizeRequiredDsh(
|
||||
commandList->firstKernelInSplitOperation->getKernelDescriptor(),
|
||||
cmdContainer.getNumIddPerBlock());
|
||||
sshEstimated = NEO::EncodeDispatchKernel<FamilyType>::getSizeRequiredSsh(*commandList->firstKernelInSplitOperation->getImmutableData()->getKernelInfo());
|
||||
|
||||
expectedDshToBeConsumed = dshEstimated * commandList->usedKernelLaunchParams.numKernelsInSplitLaunch;
|
||||
expectedSshToBeConsumed = sshEstimated * commandList->usedKernelLaunchParams.numKernelsInSplitLaunch;
|
||||
auto consumedDsh2 = csrDshHeap->getUsed();
|
||||
auto consumedSsh2 = csrSshHeap->getUsed();
|
||||
EXPECT_EQ(expectedDshToBeConsumed, (consumedDsh2 - consumedDsh1));
|
||||
EXPECT_EQ(expectedSshToBeConsumed, (consumedSsh2 - consumedSsh1));
|
||||
|
||||
context->freeMem(dstBuffer);
|
||||
}
|
||||
|
||||
TEST(CommandList, whenAsMutableIsCalledNullptrIsReturned) {
|
||||
MockCommandList cmdList;
|
||||
EXPECT_EQ(nullptr, cmdList.asMutable());
|
||||
|
||||
@@ -156,6 +156,51 @@ HWTEST2_F(singleAddressSpaceModeTest, givenImmediateCommandListWhenExecutingWith
|
||||
commandList->destroy();
|
||||
}
|
||||
|
||||
HWTEST2_F(singleAddressSpaceModeTest, givenUseCsrImmediateSubmissionEnabledAndSharedHeapsDisbledForImmediateCommandListWhenExecutingWithFlushTaskThenGPR15isProgrammed, Gen12Plus) {
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
Mock<::L0::KernelImp> kernel;
|
||||
DebugManagerStateRestore restorer;
|
||||
NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true);
|
||||
NEO::DebugManager.flags.EnableImmediateCmdListHeapSharing.set(0);
|
||||
NEO::DebugManager.flags.UseImmediateFlushTask.set(0);
|
||||
|
||||
ze_command_queue_desc_t queueDesc = {};
|
||||
ze_result_t returnValue = ZE_RESULT_SUCCESS;
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
|
||||
auto &csr = neoDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
csr.storeMakeResidentAllocations = true;
|
||||
|
||||
auto commandList = whiteboxCast(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::RenderCompute, returnValue));
|
||||
|
||||
EXPECT_TRUE(commandList->isFlushTaskSubmissionEnabled);
|
||||
EXPECT_EQ(&csr, commandList->csr);
|
||||
|
||||
csr.lastFlushedCommandStream = nullptr;
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
auto result = commandList->appendLaunchKernel(kernel.toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_NE(nullptr, csr.lastFlushedCommandStream);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList, commandList->csr->getCS().getCpuBase(), commandList->csr->getCS().getUsed()));
|
||||
bool gpr15Found = false;
|
||||
auto miLoadImm = findAll<MI_LOAD_REGISTER_IMM *>(cmdList.begin(), cmdList.end());
|
||||
for (size_t i = 0; i < miLoadImm.size(); i++) {
|
||||
MI_LOAD_REGISTER_IMM *miLoad = genCmdCast<MI_LOAD_REGISTER_IMM *>(*miLoadImm[i]);
|
||||
ASSERT_NE(nullptr, miLoad);
|
||||
|
||||
if (miLoad->getRegisterOffset() == CS_GPR_R15) {
|
||||
gpr15Found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
EXPECT_TRUE(gpr15Found);
|
||||
commandList->destroy();
|
||||
}
|
||||
|
||||
HWTEST2_P(L0DebuggerWithBlitterTest, givenImmediateCommandListWhenExecutingWithFlushTaskThenSipIsInstalledAndDebuggerAllocationsAreResident, Gen12Plus) {
|
||||
using STATE_SIP = typename FamilyType::STATE_SIP;
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
|
||||
Reference in New Issue
Block a user