diff --git a/level_zero/core/source/cmdlist/cmdlist.h b/level_zero/core/source/cmdlist/cmdlist.h index d7d943b5f6..3e21c86b12 100644 --- a/level_zero/core/source/cmdlist/cmdlist.h +++ b/level_zero/core/source/cmdlist/cmdlist.h @@ -43,8 +43,6 @@ struct CmdListKernelLaunchParams { bool isDestinationAllocationInSystemMemory = false; bool isHostSignalScopeEvent = false; bool skipInOrderNonWalkerSignaling = false; - uint32_t numKernelsInSplitLaunch = 0; - uint32_t numKernelsExecutedInSplitLaunch = 0; }; struct CmdListReturnPoint { diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 9002b8a752..4eb0016661 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -1280,8 +1280,7 @@ ze_result_t CommandListCoreFamily::appendPageFaultCopy(NEO::Graph size); } else { CmdListKernelLaunchParams launchParams = {}; - launchParams.isKernelSplitOperation = rightSize > 0; - launchParams.numKernelsInSplitLaunch = 2; + launchParams.isKernelSplitOperation = rightSize > 1; ret = appendMemoryCopyKernelWithGA(reinterpret_cast(&dstAddress), dstAllocation, 0, reinterpret_cast(&srcAddress), @@ -1292,7 +1291,6 @@ ze_result_t CommandListCoreFamily::appendPageFaultCopy(NEO::Graph nullptr, isStateless, launchParams); - launchParams.numKernelsExecutedInSplitLaunch++; if (ret == ZE_RESULT_SUCCESS && rightSize) { ret = appendMemoryCopyKernelWithGA(reinterpret_cast(&dstAddress), dstAllocation, size - rightSize, @@ -1303,7 +1301,6 @@ ze_result_t CommandListCoreFamily::appendPageFaultCopy(NEO::Graph nullptr, isStateless, launchParams); - launchParams.numKernelsExecutedInSplitLaunch++; } if (this->dcFlushSupport) { @@ -1399,7 +1396,6 @@ ze_result_t CommandListCoreFamily::appendMemoryCopy(void *dstptr, dcFlush = getDcFlushRequired(signalEvent->isSignalScope()); } - launchParams.numKernelsInSplitLaunch = kernelCounter; launchParams.isKernelSplitOperation = kernelCounter > 1; bool singlePipeControlPacket = eventSignalPipeControl(launchParams.isKernelSplitOperation, dcFlush); @@ -1426,7 +1422,6 @@ ze_result_t CommandListCoreFamily::appendMemoryCopy(void *dstptr, signalEvent, isStateless, launchParams); - launchParams.numKernelsExecutedInSplitLaunch++; } if (ret == ZE_RESULT_SUCCESS && middleSizeBytes) { @@ -1445,7 +1440,6 @@ ze_result_t CommandListCoreFamily::appendMemoryCopy(void *dstptr, signalEvent, isStateless, launchParams); - launchParams.numKernelsExecutedInSplitLaunch++; } if (ret == ZE_RESULT_SUCCESS && rightSize) { @@ -1463,7 +1457,6 @@ ze_result_t CommandListCoreFamily::appendMemoryCopy(void *dstptr, signalEvent, isStateless, launchParams); - launchParams.numKernelsExecutedInSplitLaunch++; } } @@ -1862,21 +1855,12 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, appendEventForProfilingAllWalkers(signalEvent, true, singlePipeControlPacket); - if (fillArguments.leftRemainingBytes > 0) { - launchParams.numKernelsInSplitLaunch++; - } - if (fillArguments.rightRemainingBytes > 0) { - launchParams.numKernelsInSplitLaunch++; - } - if (patternSize == 1) { - launchParams.numKernelsInSplitLaunch++; if (fillArguments.leftRemainingBytes > 0) { res = appendUnalignedFillKernel(isStateless, fillArguments.leftRemainingBytes, dstAllocation, pattern, signalEvent, launchParams); if (res) { return res; } - launchParams.numKernelsExecutedInSplitLaunch++; } ze_result_t ret = builtinKernel->setGroupSize(static_cast(fillArguments.mainGroupSize), 1u, 1u); @@ -1897,7 +1881,6 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, if (res) { return res; } - launchParams.numKernelsExecutedInSplitLaunch++; if (fillArguments.rightRemainingBytes > 0) { dstAllocation.offset = fillArguments.rightOffset; @@ -1905,7 +1888,6 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, if (res) { return res; } - launchParams.numKernelsExecutedInSplitLaunch++; } } else { builtinKernel->setGroupSize(static_cast(fillArguments.mainGroupSize), 1, 1); @@ -1940,12 +1922,10 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, builtinKernel->setArgumentValue(3, sizeof(fillArguments.patternSizeInEls), &fillArguments.patternSizeInEls); ze_group_count_t dispatchKernelArgs{static_cast(fillArguments.groups), 1u, 1u}; - launchParams.numKernelsInSplitLaunch++; res = appendLaunchKernelSplit(builtinKernel, dispatchKernelArgs, signalEvent, launchParams); if (res) { return res; } - launchParams.numKernelsExecutedInSplitLaunch++; } else { uint32_t dstOffsetRemainder = static_cast(dstAllocation.offset); @@ -1974,7 +1954,6 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, if (res) { return res; } - launchParams.numKernelsExecutedInSplitLaunch++; } if (fillArguments.rightRemainingBytes > 0) { @@ -2006,7 +1985,6 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, if (res) { return res; } - launchParams.numKernelsExecutedInSplitLaunch++; } } diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl b/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl index 939711b7d7..d62599748a 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl @@ -87,23 +87,10 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K NEO::EncodeDispatchKernel::getSizeRequiredDsh(kernelDescriptor, commandContainer.getNumIddPerBlock()), NEO::EncodeDispatchKernel::getDefaultDshAlignment()}; - if (launchParams.isKernelSplitOperation) { - // when appendLaunchKernel is called during an operation with kernel split is true, - // then reserve sufficient ssh and dsh heaps during first kernel split, by multiplying, individual - // dsh and ssh heap size retrieved above with number of kernels in split operation. - // And after first kernel split, for remainder kernel split calls, dont estimate heap size. - if (launchParams.numKernelsExecutedInSplitLaunch == 0) { - dshReserveArgs.size = launchParams.numKernelsInSplitLaunch * dshReserveArgs.size; - sshReserveArgs.size = launchParams.numKernelsInSplitLaunch * sshReserveArgs.size; - commandContainer.reserveSpaceForDispatch( - sshReserveArgs, - dshReserveArgs, true); - } - } else { - commandContainer.reserveSpaceForDispatch( - sshReserveArgs, - dshReserveArgs, true); - } + commandContainer.reserveSpaceForDispatch( + sshReserveArgs, + dshReserveArgs, true); + ssh = sshReserveArgs.indirectHeapReservation; dsh = dshReserveArgs.indirectHeapReservation; } diff --git a/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_skl_to_tgllp.inl b/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_skl_to_tgllp.inl index f8e5e24ad4..7b5d729e20 100644 --- a/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_skl_to_tgllp.inl +++ b/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_skl_to_tgllp.inl @@ -11,7 +11,7 @@ namespace L0 { template bool L0GfxCoreHelperHw::platformSupportsCmdListHeapSharing() const { - return true; + return false; } template diff --git a/level_zero/core/source/kernel/kernel_imp.cpp b/level_zero/core/source/kernel/kernel_imp.cpp index 51fe42fafb..9a01a415fd 100644 --- a/level_zero/core/source/kernel/kernel_imp.cpp +++ b/level_zero/core/source/kernel/kernel_imp.cpp @@ -98,7 +98,13 @@ ze_result_t KernelImmutableData::initialize(NEO::KernelInfo *kernelInfo, Device kernelDescriptor->payloadMappings.implicitArgs.simdSize, kernelDescriptor->kernelAttributes.simdSize); } - if (NEO::KernelDescriptor::isBindlessAddressingKernel(kernelInfo->kernelDescriptor)) { + if (kernelInfo->heapInfo.surfaceStateHeapSize != 0) { + this->surfaceStateHeapSize = kernelInfo->heapInfo.surfaceStateHeapSize; + surfaceStateHeapTemplate.reset(new uint8_t[surfaceStateHeapSize]); + + memcpy_s(surfaceStateHeapTemplate.get(), surfaceStateHeapSize, + kernelInfo->heapInfo.pSsh, surfaceStateHeapSize); + } else if (NEO::KernelDescriptor::isBindlessAddressingKernel(kernelInfo->kernelDescriptor)) { auto &gfxCoreHelper = deviceImp->getNEODevice()->getGfxCoreHelper(); auto surfaceStateSize = static_cast(gfxCoreHelper.getRenderSurfaceStateSize()); @@ -106,12 +112,6 @@ ze_result_t KernelImmutableData::initialize(NEO::KernelInfo *kernelInfo, Device UNRECOVERABLE_IF(kernelInfo->kernelDescriptor.kernelAttributes.numArgsStateful != kernelInfo->kernelDescriptor.getBindlessOffsetToSurfaceState().size()); surfaceStateHeapTemplate.reset(new uint8_t[surfaceStateHeapSize]); - } else if (kernelInfo->heapInfo.surfaceStateHeapSize != 0) { - this->surfaceStateHeapSize = kernelInfo->heapInfo.surfaceStateHeapSize; - surfaceStateHeapTemplate.reset(new uint8_t[surfaceStateHeapSize]); - - memcpy_s(surfaceStateHeapTemplate.get(), surfaceStateHeapSize, - kernelInfo->heapInfo.pSsh, surfaceStateHeapSize); } if (kernelInfo->heapInfo.dynamicStateHeapSize != 0) { diff --git a/level_zero/core/test/unit_tests/gen11/test_l0_gfx_core_helper_gen11.cpp b/level_zero/core/test/unit_tests/gen11/test_l0_gfx_core_helper_gen11.cpp index c0f68b0bae..3563e4dc6e 100644 --- a/level_zero/core/test/unit_tests/gen11/test_l0_gfx_core_helper_gen11.cpp +++ b/level_zero/core/test/unit_tests/gen11/test_l0_gfx_core_helper_gen11.cpp @@ -16,9 +16,9 @@ namespace ult { using L0GfxCoreHelperTestGen11 = Test; -GEN11TEST_F(L0GfxCoreHelperTestGen11, GivenGen11WhenCheckingL0HelperForCmdListHeapSharingSupportThenReturnTrue) { +GEN11TEST_F(L0GfxCoreHelperTestGen11, GivenGen11WhenCheckingL0HelperForCmdListHeapSharingSupportThenReturnFalse) { auto &l0GfxCoreHelper = getHelper(); - EXPECT_TRUE(l0GfxCoreHelper.platformSupportsCmdListHeapSharing()); + EXPECT_FALSE(l0GfxCoreHelper.platformSupportsCmdListHeapSharing()); } GEN11TEST_F(L0GfxCoreHelperTestGen11, GivenGen11WhenCheckingL0HelperForStateComputeModeTrackingSupportThenReturnFalse) { diff --git a/level_zero/core/test/unit_tests/gen12lp/test_l0_gfx_core_helper_gen12lp.cpp b/level_zero/core/test/unit_tests/gen12lp/test_l0_gfx_core_helper_gen12lp.cpp index 536770cf1c..523afe812f 100644 --- a/level_zero/core/test/unit_tests/gen12lp/test_l0_gfx_core_helper_gen12lp.cpp +++ b/level_zero/core/test/unit_tests/gen12lp/test_l0_gfx_core_helper_gen12lp.cpp @@ -28,10 +28,10 @@ GEN12LPTEST_F(L0GfxCoreHelperTestGen12Lp, GivenGen12LpWhenGetRegsetTypeForLargeG EXPECT_EQ(ZET_DEBUG_REGSET_TYPE_INVALID_INTEL_GPU, l0GfxCoreHelper.getRegsetTypeForLargeGrfDetection()); } -GEN12LPTEST_F(L0GfxCoreHelperTestGen12Lp, GivenGen12LpWhenCheckingL0HelperForCmdListHeapSharingSupportThenReturnTrue) { +GEN12LPTEST_F(L0GfxCoreHelperTestGen12Lp, GivenGen12LpWhenCheckingL0HelperForCmdListHeapSharingSupportThenReturnFalse) { auto &l0GfxCoreHelper = getHelper(); - EXPECT_TRUE(l0GfxCoreHelper.platformSupportsCmdListHeapSharing()); + EXPECT_FALSE(l0GfxCoreHelper.platformSupportsCmdListHeapSharing()); } GEN12LPTEST_F(L0GfxCoreHelperTestGen12Lp, GivenGen12LpWhenCheckingL0HelperForStateComputeModeTrackingSupportThenReturnFalse) { diff --git a/level_zero/core/test/unit_tests/gen9/test_l0_gfx_core_helper_gen9.cpp b/level_zero/core/test/unit_tests/gen9/test_l0_gfx_core_helper_gen9.cpp index 007099e465..d06f049a3b 100644 --- a/level_zero/core/test/unit_tests/gen9/test_l0_gfx_core_helper_gen9.cpp +++ b/level_zero/core/test/unit_tests/gen9/test_l0_gfx_core_helper_gen9.cpp @@ -16,9 +16,9 @@ namespace ult { using L0GfxCoreHelperTestGen9 = Test; -GEN9TEST_F(L0GfxCoreHelperTestGen9, GivenGen9WhenCheckingL0HelperForCmdListHeapSharingSupportThenReturnTrue) { +GEN9TEST_F(L0GfxCoreHelperTestGen9, GivenGen9WhenCheckingL0HelperForCmdListHeapSharingSupportThenReturnFalse) { auto &l0GfxCoreHelper = getHelper(); - EXPECT_TRUE(l0GfxCoreHelper.platformSupportsCmdListHeapSharing()); + EXPECT_FALSE(l0GfxCoreHelper.platformSupportsCmdListHeapSharing()); } GEN9TEST_F(L0GfxCoreHelperTestGen9, GivenGen9WhenCheckingL0HelperForStateComputeModeTrackingSupportThenReturnFalse) { diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h index 6ab8d714aa..9c3f4efb73 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h @@ -109,9 +109,6 @@ struct WhiteBox<::L0::CommandListCoreFamily> const CmdListKernelLaunchParams &launchParams) override { usedKernelLaunchParams = launchParams; - if (launchParams.isKernelSplitOperation && (launchParams.numKernelsExecutedInSplitLaunch == 0)) { - firstKernelInSplitOperation = kernel; - } appendKernelEventValue = event; return BaseClass::appendLaunchKernelWithParams(kernel, threadGroupDimensions, event, launchParams); @@ -144,7 +141,6 @@ struct WhiteBox<::L0::CommandListCoreFamily> CmdListKernelLaunchParams usedKernelLaunchParams; ::L0::Event *appendKernelEventValue = nullptr; - ::L0::Kernel *firstKernelInSplitOperation = nullptr; ze_event_handle_t appendEventMultipleKernelIndirectEventHandleValue = nullptr; ze_event_handle_t appendEventKernelIndirectEventHandleValue = nullptr; }; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp index 295d2d19e0..b3eec8fe98 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp @@ -998,130 +998,6 @@ HWTEST2_F(CommandListTest, givenComputeCommandListWhenMemoryFillRequiresMultiKer context->freeMem(dstBuffer); } -using IsPlatformSklToDg1 = IsWithinProducts; -HWTEST2_F(CommandListTest, givenComputeCommandListWhenMemoryCopyInUsmDeviceAllocationThenSplitFlagIsSetAndHeapsEstimationIsProper, IsPlatformSklToDg1) { - auto commandList = std::make_unique>>(); - commandList->isFlushTaskSubmissionEnabled = true; - commandList->immediateCmdListHeapSharing = true; - commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); - commandList->commandContainer.setImmediateCmdListCsr(device->getNEODevice()->getDefaultEngine().commandStreamReceiver); - - constexpr size_t size = 4096u; - constexpr size_t alignment = 0; - void *dstBuffer = nullptr; - - ze_device_mem_alloc_desc_t deviceDesc = {}; - auto result = context->allocDeviceMem(device->toHandle(), - &deviceDesc, - size, alignment, &dstBuffer); - EXPECT_EQ(ZE_RESULT_SUCCESS, result); - - void *srcPtr = reinterpret_cast(0x1234); - - auto &cmdContainer = commandList->commandContainer; - auto csrDshHeap = &device->getNEODevice()->getDefaultEngine().commandStreamReceiver->getIndirectHeap(HeapType::DYNAMIC_STATE, MemoryConstants::pageSize64k); - auto csrSshHeap = &device->getNEODevice()->getDefaultEngine().commandStreamReceiver->getIndirectHeap(HeapType::SURFACE_STATE, MemoryConstants::pageSize64k); - - size_t dshUsed = csrDshHeap->getUsed(); - size_t sshUsed = csrSshHeap->getUsed(); - - commandList->appendMemoryCopy(dstBuffer, srcPtr, 0x101, nullptr, 0, nullptr, false, false); - EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel); - EXPECT_TRUE(commandList->usedKernelLaunchParams.isKernelSplitOperation); - EXPECT_FALSE(commandList->usedKernelLaunchParams.isDestinationAllocationInSystemMemory); - - // As numKernelsExecutedInSplitLaunch is incremented after split kernel launch. But we are storing usedKernelLaunchParams before actual split kernel launch. - // Hence below comparison tells that actually (usedKernelLaunchParams.numKernelsExecutedInSplitLaunch + 1) split kernels are launched - EXPECT_EQ(commandList->usedKernelLaunchParams.numKernelsInSplitLaunch, commandList->usedKernelLaunchParams.numKernelsExecutedInSplitLaunch + 1); - - size_t dshEstimated = NEO::EncodeDispatchKernel::getSizeRequiredDsh( - commandList->firstKernelInSplitOperation->getKernelDescriptor(), - cmdContainer.getNumIddPerBlock()); - size_t sshEstimated = NEO::EncodeDispatchKernel::getSizeRequiredSsh(*commandList->firstKernelInSplitOperation->getImmutableData()->getKernelInfo()); - - auto expectedDshToBeConsumed = dshEstimated * commandList->usedKernelLaunchParams.numKernelsInSplitLaunch; - auto expectedSshToBeConsumed = sshEstimated * commandList->usedKernelLaunchParams.numKernelsInSplitLaunch; - auto consumedDsh1 = csrDshHeap->getUsed(); - auto consumedSsh1 = csrSshHeap->getUsed(); - - EXPECT_EQ(expectedDshToBeConsumed, (consumedDsh1 - dshUsed)); - EXPECT_EQ(expectedSshToBeConsumed, (consumedSsh1 - sshUsed)); - - context->freeMem(dstBuffer); -} - -HWTEST2_F(CommandListTest, givenComputeCommandListWhenMemoryFillRequiresMultiKernelsThenSplitFlagIsSetAndHeapsEstimationIsProper, IsPlatformSklToDg1) { - auto commandList = std::make_unique>>(); - commandList->isFlushTaskSubmissionEnabled = true; - commandList->immediateCmdListHeapSharing = true; - commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); - commandList->commandContainer.setImmediateCmdListCsr(device->getNEODevice()->getDefaultEngine().commandStreamReceiver); - - constexpr size_t patternSize = 8; - uint8_t pattern[patternSize] = {1, 2, 3, 4}; - - constexpr size_t size = 4096u; - constexpr size_t alignment = 4096u; - void *dstBuffer = nullptr; - - ze_device_mem_alloc_desc_t deviceDesc = {}; - auto result = context->allocDeviceMem(device->toHandle(), - &deviceDesc, - size, alignment, &dstBuffer); - EXPECT_EQ(ZE_RESULT_SUCCESS, result); - - constexpr size_t fillSize = size - 1; - - auto &cmdContainer = commandList->commandContainer; - auto csrDshHeap = &device->getNEODevice()->getDefaultEngine().commandStreamReceiver->getIndirectHeap(HeapType::DYNAMIC_STATE, MemoryConstants::pageSize64k); - auto csrSshHeap = &device->getNEODevice()->getDefaultEngine().commandStreamReceiver->getIndirectHeap(HeapType::SURFACE_STATE, MemoryConstants::pageSize64k); - - size_t dshUsed = csrDshHeap->getUsed(); - size_t sshUsed = csrSshHeap->getUsed(); - - commandList->appendMemoryFill(dstBuffer, pattern, patternSize, fillSize, nullptr, 0, nullptr, false); - EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel); - EXPECT_TRUE(commandList->usedKernelLaunchParams.isKernelSplitOperation); - EXPECT_FALSE(commandList->usedKernelLaunchParams.isDestinationAllocationInSystemMemory); - - // As numKernelsExecutedInSplitLaunch is incremented after split kernel launch. But we are storing usedKernelLaunchParams before actual split kernel launch. - // Hence below comparison tells that actually (usedKernelLaunchParams.numKernelsExecutedInSplitLaunch + 1) split kernels are launched - EXPECT_EQ(commandList->usedKernelLaunchParams.numKernelsInSplitLaunch, commandList->usedKernelLaunchParams.numKernelsExecutedInSplitLaunch + 1); - - size_t dshEstimated = NEO::EncodeDispatchKernel::getSizeRequiredDsh( - commandList->firstKernelInSplitOperation->getKernelDescriptor(), - cmdContainer.getNumIddPerBlock()); - size_t sshEstimated = NEO::EncodeDispatchKernel::getSizeRequiredSsh(*commandList->firstKernelInSplitOperation->getImmutableData()->getKernelInfo()); - - auto expectedDshToBeConsumed = dshEstimated * commandList->usedKernelLaunchParams.numKernelsInSplitLaunch; - auto expectedSshToBeConsumed = sshEstimated * commandList->usedKernelLaunchParams.numKernelsInSplitLaunch; - auto consumedDsh1 = csrDshHeap->getUsed(); - auto consumedSsh1 = csrSshHeap->getUsed(); - - EXPECT_EQ(expectedDshToBeConsumed, (consumedDsh1 - dshUsed)); - EXPECT_EQ(expectedSshToBeConsumed, (consumedSsh1 - sshUsed)); - - commandList->appendMemoryFill(dstBuffer, pattern, 1, fillSize, nullptr, 0, nullptr, false); - EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel); - EXPECT_TRUE(commandList->usedKernelLaunchParams.isKernelSplitOperation); - EXPECT_FALSE(commandList->usedKernelLaunchParams.isDestinationAllocationInSystemMemory); - EXPECT_EQ(commandList->usedKernelLaunchParams.numKernelsInSplitLaunch, commandList->usedKernelLaunchParams.numKernelsExecutedInSplitLaunch + 1); - - dshEstimated = NEO::EncodeDispatchKernel::getSizeRequiredDsh( - commandList->firstKernelInSplitOperation->getKernelDescriptor(), - cmdContainer.getNumIddPerBlock()); - sshEstimated = NEO::EncodeDispatchKernel::getSizeRequiredSsh(*commandList->firstKernelInSplitOperation->getImmutableData()->getKernelInfo()); - - expectedDshToBeConsumed = dshEstimated * commandList->usedKernelLaunchParams.numKernelsInSplitLaunch; - expectedSshToBeConsumed = sshEstimated * commandList->usedKernelLaunchParams.numKernelsInSplitLaunch; - auto consumedDsh2 = csrDshHeap->getUsed(); - auto consumedSsh2 = csrSshHeap->getUsed(); - EXPECT_EQ(expectedDshToBeConsumed, (consumedDsh2 - consumedDsh1)); - EXPECT_EQ(expectedSshToBeConsumed, (consumedSsh2 - consumedSsh1)); - - context->freeMem(dstBuffer); -} - TEST(CommandList, whenAsMutableIsCalledNullptrIsReturned) { MockCommandList cmdList; EXPECT_EQ(nullptr, cmdList.asMutable()); diff --git a/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_2.cpp b/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_2.cpp index 30c28de379..5dec77cfe8 100644 --- a/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_2.cpp +++ b/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_2.cpp @@ -156,51 +156,6 @@ HWTEST2_F(singleAddressSpaceModeTest, givenImmediateCommandListWhenExecutingWith commandList->destroy(); } -HWTEST2_F(singleAddressSpaceModeTest, givenUseCsrImmediateSubmissionEnabledAndSharedHeapsDisbledForImmediateCommandListWhenExecutingWithFlushTaskThenGPR15isProgrammed, Gen12Plus) { - using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; - Mock<::L0::KernelImp> kernel; - DebugManagerStateRestore restorer; - NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true); - NEO::DebugManager.flags.EnableImmediateCmdListHeapSharing.set(0); - NEO::DebugManager.flags.UseImmediateFlushTask.set(0); - - ze_command_queue_desc_t queueDesc = {}; - ze_result_t returnValue = ZE_RESULT_SUCCESS; - ze_group_count_t groupCount{1, 1, 1}; - - auto &csr = neoDevice->getUltCommandStreamReceiver(); - csr.storeMakeResidentAllocations = true; - - auto commandList = whiteboxCast(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::RenderCompute, returnValue)); - - EXPECT_TRUE(commandList->isFlushTaskSubmissionEnabled); - EXPECT_EQ(&csr, commandList->csr); - - csr.lastFlushedCommandStream = nullptr; - CmdListKernelLaunchParams launchParams = {}; - auto result = commandList->appendLaunchKernel(kernel.toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false); - ASSERT_EQ(ZE_RESULT_SUCCESS, result); - - EXPECT_NE(nullptr, csr.lastFlushedCommandStream); - - GenCmdList cmdList; - ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( - cmdList, commandList->csr->getCS().getCpuBase(), commandList->csr->getCS().getUsed())); - bool gpr15Found = false; - auto miLoadImm = findAll(cmdList.begin(), cmdList.end()); - for (size_t i = 0; i < miLoadImm.size(); i++) { - MI_LOAD_REGISTER_IMM *miLoad = genCmdCast(*miLoadImm[i]); - ASSERT_NE(nullptr, miLoad); - - if (miLoad->getRegisterOffset() == CS_GPR_R15) { - gpr15Found = true; - break; - } - } - EXPECT_TRUE(gpr15Found); - commandList->destroy(); -} - HWTEST2_P(L0DebuggerWithBlitterTest, givenImmediateCommandListWhenExecutingWithFlushTaskThenSipIsInstalledAndDebuggerAllocationsAreResident, Gen12Plus) { using STATE_SIP = typename FamilyType::STATE_SIP; using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; diff --git a/shared/source/command_container/command_encoder.inl b/shared/source/command_container/command_encoder.inl index f73a3da57b..8386974464 100644 --- a/shared/source/command_container/command_encoder.inl +++ b/shared/source/command_container/command_encoder.inl @@ -736,7 +736,6 @@ size_t EncodeDispatchKernel::getSizeRequiredDsh(const KernelDescriptor & size = alignUp(size, INTERFACE_DESCRIPTOR_DATA::SAMPLERSTATEPOINTER_ALIGN_SIZE); if (additionalDshSize > 0) { - size = alignUp(size, EncodeStates::alignInterfaceDescriptorData); size += additionalDshSize; size = alignUp(size, EncodeDispatchKernel::getDefaultDshAlignment()); }