mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-21 09:14:47 +08:00
Revert "refactor: Enable CSR heap sharing on Older Generation platforms"
This reverts commit 58ff9c6d94.
Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
c60d105cad
commit
1ac37d4a49
@@ -43,8 +43,6 @@ struct CmdListKernelLaunchParams {
|
|||||||
bool isDestinationAllocationInSystemMemory = false;
|
bool isDestinationAllocationInSystemMemory = false;
|
||||||
bool isHostSignalScopeEvent = false;
|
bool isHostSignalScopeEvent = false;
|
||||||
bool skipInOrderNonWalkerSignaling = false;
|
bool skipInOrderNonWalkerSignaling = false;
|
||||||
uint32_t numKernelsInSplitLaunch = 0;
|
|
||||||
uint32_t numKernelsExecutedInSplitLaunch = 0;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct CmdListReturnPoint {
|
struct CmdListReturnPoint {
|
||||||
|
|||||||
@@ -1280,8 +1280,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendPageFaultCopy(NEO::Graph
|
|||||||
size);
|
size);
|
||||||
} else {
|
} else {
|
||||||
CmdListKernelLaunchParams launchParams = {};
|
CmdListKernelLaunchParams launchParams = {};
|
||||||
launchParams.isKernelSplitOperation = rightSize > 0;
|
launchParams.isKernelSplitOperation = rightSize > 1;
|
||||||
launchParams.numKernelsInSplitLaunch = 2;
|
|
||||||
ret = appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAddress),
|
ret = appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAddress),
|
||||||
dstAllocation, 0,
|
dstAllocation, 0,
|
||||||
reinterpret_cast<void *>(&srcAddress),
|
reinterpret_cast<void *>(&srcAddress),
|
||||||
@@ -1292,7 +1291,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendPageFaultCopy(NEO::Graph
|
|||||||
nullptr,
|
nullptr,
|
||||||
isStateless,
|
isStateless,
|
||||||
launchParams);
|
launchParams);
|
||||||
launchParams.numKernelsExecutedInSplitLaunch++;
|
|
||||||
if (ret == ZE_RESULT_SUCCESS && rightSize) {
|
if (ret == ZE_RESULT_SUCCESS && rightSize) {
|
||||||
ret = appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAddress),
|
ret = appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAddress),
|
||||||
dstAllocation, size - rightSize,
|
dstAllocation, size - rightSize,
|
||||||
@@ -1303,7 +1301,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendPageFaultCopy(NEO::Graph
|
|||||||
nullptr,
|
nullptr,
|
||||||
isStateless,
|
isStateless,
|
||||||
launchParams);
|
launchParams);
|
||||||
launchParams.numKernelsExecutedInSplitLaunch++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (this->dcFlushSupport) {
|
if (this->dcFlushSupport) {
|
||||||
@@ -1399,7 +1396,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
|||||||
dcFlush = getDcFlushRequired(signalEvent->isSignalScope());
|
dcFlush = getDcFlushRequired(signalEvent->isSignalScope());
|
||||||
}
|
}
|
||||||
|
|
||||||
launchParams.numKernelsInSplitLaunch = kernelCounter;
|
|
||||||
launchParams.isKernelSplitOperation = kernelCounter > 1;
|
launchParams.isKernelSplitOperation = kernelCounter > 1;
|
||||||
bool singlePipeControlPacket = eventSignalPipeControl(launchParams.isKernelSplitOperation, dcFlush);
|
bool singlePipeControlPacket = eventSignalPipeControl(launchParams.isKernelSplitOperation, dcFlush);
|
||||||
|
|
||||||
@@ -1426,7 +1422,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
|||||||
signalEvent,
|
signalEvent,
|
||||||
isStateless,
|
isStateless,
|
||||||
launchParams);
|
launchParams);
|
||||||
launchParams.numKernelsExecutedInSplitLaunch++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ret == ZE_RESULT_SUCCESS && middleSizeBytes) {
|
if (ret == ZE_RESULT_SUCCESS && middleSizeBytes) {
|
||||||
@@ -1445,7 +1440,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
|||||||
signalEvent,
|
signalEvent,
|
||||||
isStateless,
|
isStateless,
|
||||||
launchParams);
|
launchParams);
|
||||||
launchParams.numKernelsExecutedInSplitLaunch++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ret == ZE_RESULT_SUCCESS && rightSize) {
|
if (ret == ZE_RESULT_SUCCESS && rightSize) {
|
||||||
@@ -1463,7 +1457,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
|||||||
signalEvent,
|
signalEvent,
|
||||||
isStateless,
|
isStateless,
|
||||||
launchParams);
|
launchParams);
|
||||||
launchParams.numKernelsExecutedInSplitLaunch++;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1862,21 +1855,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
|||||||
|
|
||||||
appendEventForProfilingAllWalkers(signalEvent, true, singlePipeControlPacket);
|
appendEventForProfilingAllWalkers(signalEvent, true, singlePipeControlPacket);
|
||||||
|
|
||||||
if (fillArguments.leftRemainingBytes > 0) {
|
|
||||||
launchParams.numKernelsInSplitLaunch++;
|
|
||||||
}
|
|
||||||
if (fillArguments.rightRemainingBytes > 0) {
|
|
||||||
launchParams.numKernelsInSplitLaunch++;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (patternSize == 1) {
|
if (patternSize == 1) {
|
||||||
launchParams.numKernelsInSplitLaunch++;
|
|
||||||
if (fillArguments.leftRemainingBytes > 0) {
|
if (fillArguments.leftRemainingBytes > 0) {
|
||||||
res = appendUnalignedFillKernel(isStateless, fillArguments.leftRemainingBytes, dstAllocation, pattern, signalEvent, launchParams);
|
res = appendUnalignedFillKernel(isStateless, fillArguments.leftRemainingBytes, dstAllocation, pattern, signalEvent, launchParams);
|
||||||
if (res) {
|
if (res) {
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
launchParams.numKernelsExecutedInSplitLaunch++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ze_result_t ret = builtinKernel->setGroupSize(static_cast<uint32_t>(fillArguments.mainGroupSize), 1u, 1u);
|
ze_result_t ret = builtinKernel->setGroupSize(static_cast<uint32_t>(fillArguments.mainGroupSize), 1u, 1u);
|
||||||
@@ -1897,7 +1881,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
|||||||
if (res) {
|
if (res) {
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
launchParams.numKernelsExecutedInSplitLaunch++;
|
|
||||||
|
|
||||||
if (fillArguments.rightRemainingBytes > 0) {
|
if (fillArguments.rightRemainingBytes > 0) {
|
||||||
dstAllocation.offset = fillArguments.rightOffset;
|
dstAllocation.offset = fillArguments.rightOffset;
|
||||||
@@ -1905,7 +1888,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
|||||||
if (res) {
|
if (res) {
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
launchParams.numKernelsExecutedInSplitLaunch++;
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
builtinKernel->setGroupSize(static_cast<uint32_t>(fillArguments.mainGroupSize), 1, 1);
|
builtinKernel->setGroupSize(static_cast<uint32_t>(fillArguments.mainGroupSize), 1, 1);
|
||||||
@@ -1940,12 +1922,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
|||||||
builtinKernel->setArgumentValue(3, sizeof(fillArguments.patternSizeInEls), &fillArguments.patternSizeInEls);
|
builtinKernel->setArgumentValue(3, sizeof(fillArguments.patternSizeInEls), &fillArguments.patternSizeInEls);
|
||||||
|
|
||||||
ze_group_count_t dispatchKernelArgs{static_cast<uint32_t>(fillArguments.groups), 1u, 1u};
|
ze_group_count_t dispatchKernelArgs{static_cast<uint32_t>(fillArguments.groups), 1u, 1u};
|
||||||
launchParams.numKernelsInSplitLaunch++;
|
|
||||||
res = appendLaunchKernelSplit(builtinKernel, dispatchKernelArgs, signalEvent, launchParams);
|
res = appendLaunchKernelSplit(builtinKernel, dispatchKernelArgs, signalEvent, launchParams);
|
||||||
if (res) {
|
if (res) {
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
launchParams.numKernelsExecutedInSplitLaunch++;
|
|
||||||
} else {
|
} else {
|
||||||
uint32_t dstOffsetRemainder = static_cast<uint32_t>(dstAllocation.offset);
|
uint32_t dstOffsetRemainder = static_cast<uint32_t>(dstAllocation.offset);
|
||||||
|
|
||||||
@@ -1974,7 +1954,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
|||||||
if (res) {
|
if (res) {
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
launchParams.numKernelsExecutedInSplitLaunch++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fillArguments.rightRemainingBytes > 0) {
|
if (fillArguments.rightRemainingBytes > 0) {
|
||||||
@@ -2006,7 +1985,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
|||||||
if (res) {
|
if (res) {
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
launchParams.numKernelsExecutedInSplitLaunch++;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -87,23 +87,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
|||||||
NEO::EncodeDispatchKernel<GfxFamily>::getSizeRequiredDsh(kernelDescriptor, commandContainer.getNumIddPerBlock()),
|
NEO::EncodeDispatchKernel<GfxFamily>::getSizeRequiredDsh(kernelDescriptor, commandContainer.getNumIddPerBlock()),
|
||||||
NEO::EncodeDispatchKernel<GfxFamily>::getDefaultDshAlignment()};
|
NEO::EncodeDispatchKernel<GfxFamily>::getDefaultDshAlignment()};
|
||||||
|
|
||||||
if (launchParams.isKernelSplitOperation) {
|
commandContainer.reserveSpaceForDispatch(
|
||||||
// when appendLaunchKernel is called during an operation with kernel split is true,
|
sshReserveArgs,
|
||||||
// then reserve sufficient ssh and dsh heaps during first kernel split, by multiplying, individual
|
dshReserveArgs, true);
|
||||||
// dsh and ssh heap size retrieved above with number of kernels in split operation.
|
|
||||||
// And after first kernel split, for remainder kernel split calls, dont estimate heap size.
|
|
||||||
if (launchParams.numKernelsExecutedInSplitLaunch == 0) {
|
|
||||||
dshReserveArgs.size = launchParams.numKernelsInSplitLaunch * dshReserveArgs.size;
|
|
||||||
sshReserveArgs.size = launchParams.numKernelsInSplitLaunch * sshReserveArgs.size;
|
|
||||||
commandContainer.reserveSpaceForDispatch(
|
|
||||||
sshReserveArgs,
|
|
||||||
dshReserveArgs, true);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
commandContainer.reserveSpaceForDispatch(
|
|
||||||
sshReserveArgs,
|
|
||||||
dshReserveArgs, true);
|
|
||||||
}
|
|
||||||
ssh = sshReserveArgs.indirectHeapReservation;
|
ssh = sshReserveArgs.indirectHeapReservation;
|
||||||
dsh = dshReserveArgs.indirectHeapReservation;
|
dsh = dshReserveArgs.indirectHeapReservation;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ namespace L0 {
|
|||||||
|
|
||||||
template <typename Family>
|
template <typename Family>
|
||||||
bool L0GfxCoreHelperHw<Family>::platformSupportsCmdListHeapSharing() const {
|
bool L0GfxCoreHelperHw<Family>::platformSupportsCmdListHeapSharing() const {
|
||||||
return true;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Family>
|
template <typename Family>
|
||||||
|
|||||||
@@ -98,7 +98,13 @@ ze_result_t KernelImmutableData::initialize(NEO::KernelInfo *kernelInfo, Device
|
|||||||
kernelDescriptor->payloadMappings.implicitArgs.simdSize, kernelDescriptor->kernelAttributes.simdSize);
|
kernelDescriptor->payloadMappings.implicitArgs.simdSize, kernelDescriptor->kernelAttributes.simdSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (NEO::KernelDescriptor::isBindlessAddressingKernel(kernelInfo->kernelDescriptor)) {
|
if (kernelInfo->heapInfo.surfaceStateHeapSize != 0) {
|
||||||
|
this->surfaceStateHeapSize = kernelInfo->heapInfo.surfaceStateHeapSize;
|
||||||
|
surfaceStateHeapTemplate.reset(new uint8_t[surfaceStateHeapSize]);
|
||||||
|
|
||||||
|
memcpy_s(surfaceStateHeapTemplate.get(), surfaceStateHeapSize,
|
||||||
|
kernelInfo->heapInfo.pSsh, surfaceStateHeapSize);
|
||||||
|
} else if (NEO::KernelDescriptor::isBindlessAddressingKernel(kernelInfo->kernelDescriptor)) {
|
||||||
auto &gfxCoreHelper = deviceImp->getNEODevice()->getGfxCoreHelper();
|
auto &gfxCoreHelper = deviceImp->getNEODevice()->getGfxCoreHelper();
|
||||||
auto surfaceStateSize = static_cast<uint32_t>(gfxCoreHelper.getRenderSurfaceStateSize());
|
auto surfaceStateSize = static_cast<uint32_t>(gfxCoreHelper.getRenderSurfaceStateSize());
|
||||||
|
|
||||||
@@ -106,12 +112,6 @@ ze_result_t KernelImmutableData::initialize(NEO::KernelInfo *kernelInfo, Device
|
|||||||
UNRECOVERABLE_IF(kernelInfo->kernelDescriptor.kernelAttributes.numArgsStateful != kernelInfo->kernelDescriptor.getBindlessOffsetToSurfaceState().size());
|
UNRECOVERABLE_IF(kernelInfo->kernelDescriptor.kernelAttributes.numArgsStateful != kernelInfo->kernelDescriptor.getBindlessOffsetToSurfaceState().size());
|
||||||
|
|
||||||
surfaceStateHeapTemplate.reset(new uint8_t[surfaceStateHeapSize]);
|
surfaceStateHeapTemplate.reset(new uint8_t[surfaceStateHeapSize]);
|
||||||
} else if (kernelInfo->heapInfo.surfaceStateHeapSize != 0) {
|
|
||||||
this->surfaceStateHeapSize = kernelInfo->heapInfo.surfaceStateHeapSize;
|
|
||||||
surfaceStateHeapTemplate.reset(new uint8_t[surfaceStateHeapSize]);
|
|
||||||
|
|
||||||
memcpy_s(surfaceStateHeapTemplate.get(), surfaceStateHeapSize,
|
|
||||||
kernelInfo->heapInfo.pSsh, surfaceStateHeapSize);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (kernelInfo->heapInfo.dynamicStateHeapSize != 0) {
|
if (kernelInfo->heapInfo.dynamicStateHeapSize != 0) {
|
||||||
|
|||||||
@@ -16,9 +16,9 @@ namespace ult {
|
|||||||
|
|
||||||
using L0GfxCoreHelperTestGen11 = Test<DeviceFixture>;
|
using L0GfxCoreHelperTestGen11 = Test<DeviceFixture>;
|
||||||
|
|
||||||
GEN11TEST_F(L0GfxCoreHelperTestGen11, GivenGen11WhenCheckingL0HelperForCmdListHeapSharingSupportThenReturnTrue) {
|
GEN11TEST_F(L0GfxCoreHelperTestGen11, GivenGen11WhenCheckingL0HelperForCmdListHeapSharingSupportThenReturnFalse) {
|
||||||
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
|
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
|
||||||
EXPECT_TRUE(l0GfxCoreHelper.platformSupportsCmdListHeapSharing());
|
EXPECT_FALSE(l0GfxCoreHelper.platformSupportsCmdListHeapSharing());
|
||||||
}
|
}
|
||||||
|
|
||||||
GEN11TEST_F(L0GfxCoreHelperTestGen11, GivenGen11WhenCheckingL0HelperForStateComputeModeTrackingSupportThenReturnFalse) {
|
GEN11TEST_F(L0GfxCoreHelperTestGen11, GivenGen11WhenCheckingL0HelperForStateComputeModeTrackingSupportThenReturnFalse) {
|
||||||
|
|||||||
@@ -28,10 +28,10 @@ GEN12LPTEST_F(L0GfxCoreHelperTestGen12Lp, GivenGen12LpWhenGetRegsetTypeForLargeG
|
|||||||
EXPECT_EQ(ZET_DEBUG_REGSET_TYPE_INVALID_INTEL_GPU, l0GfxCoreHelper.getRegsetTypeForLargeGrfDetection());
|
EXPECT_EQ(ZET_DEBUG_REGSET_TYPE_INVALID_INTEL_GPU, l0GfxCoreHelper.getRegsetTypeForLargeGrfDetection());
|
||||||
}
|
}
|
||||||
|
|
||||||
GEN12LPTEST_F(L0GfxCoreHelperTestGen12Lp, GivenGen12LpWhenCheckingL0HelperForCmdListHeapSharingSupportThenReturnTrue) {
|
GEN12LPTEST_F(L0GfxCoreHelperTestGen12Lp, GivenGen12LpWhenCheckingL0HelperForCmdListHeapSharingSupportThenReturnFalse) {
|
||||||
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
|
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
|
||||||
|
|
||||||
EXPECT_TRUE(l0GfxCoreHelper.platformSupportsCmdListHeapSharing());
|
EXPECT_FALSE(l0GfxCoreHelper.platformSupportsCmdListHeapSharing());
|
||||||
}
|
}
|
||||||
|
|
||||||
GEN12LPTEST_F(L0GfxCoreHelperTestGen12Lp, GivenGen12LpWhenCheckingL0HelperForStateComputeModeTrackingSupportThenReturnFalse) {
|
GEN12LPTEST_F(L0GfxCoreHelperTestGen12Lp, GivenGen12LpWhenCheckingL0HelperForStateComputeModeTrackingSupportThenReturnFalse) {
|
||||||
|
|||||||
@@ -16,9 +16,9 @@ namespace ult {
|
|||||||
|
|
||||||
using L0GfxCoreHelperTestGen9 = Test<DeviceFixture>;
|
using L0GfxCoreHelperTestGen9 = Test<DeviceFixture>;
|
||||||
|
|
||||||
GEN9TEST_F(L0GfxCoreHelperTestGen9, GivenGen9WhenCheckingL0HelperForCmdListHeapSharingSupportThenReturnTrue) {
|
GEN9TEST_F(L0GfxCoreHelperTestGen9, GivenGen9WhenCheckingL0HelperForCmdListHeapSharingSupportThenReturnFalse) {
|
||||||
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
|
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
|
||||||
EXPECT_TRUE(l0GfxCoreHelper.platformSupportsCmdListHeapSharing());
|
EXPECT_FALSE(l0GfxCoreHelper.platformSupportsCmdListHeapSharing());
|
||||||
}
|
}
|
||||||
|
|
||||||
GEN9TEST_F(L0GfxCoreHelperTestGen9, GivenGen9WhenCheckingL0HelperForStateComputeModeTrackingSupportThenReturnFalse) {
|
GEN9TEST_F(L0GfxCoreHelperTestGen9, GivenGen9WhenCheckingL0HelperForStateComputeModeTrackingSupportThenReturnFalse) {
|
||||||
|
|||||||
@@ -109,9 +109,6 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
|
|||||||
const CmdListKernelLaunchParams &launchParams) override {
|
const CmdListKernelLaunchParams &launchParams) override {
|
||||||
|
|
||||||
usedKernelLaunchParams = launchParams;
|
usedKernelLaunchParams = launchParams;
|
||||||
if (launchParams.isKernelSplitOperation && (launchParams.numKernelsExecutedInSplitLaunch == 0)) {
|
|
||||||
firstKernelInSplitOperation = kernel;
|
|
||||||
}
|
|
||||||
appendKernelEventValue = event;
|
appendKernelEventValue = event;
|
||||||
return BaseClass::appendLaunchKernelWithParams(kernel, threadGroupDimensions,
|
return BaseClass::appendLaunchKernelWithParams(kernel, threadGroupDimensions,
|
||||||
event, launchParams);
|
event, launchParams);
|
||||||
@@ -144,7 +141,6 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
|
|||||||
|
|
||||||
CmdListKernelLaunchParams usedKernelLaunchParams;
|
CmdListKernelLaunchParams usedKernelLaunchParams;
|
||||||
::L0::Event *appendKernelEventValue = nullptr;
|
::L0::Event *appendKernelEventValue = nullptr;
|
||||||
::L0::Kernel *firstKernelInSplitOperation = nullptr;
|
|
||||||
ze_event_handle_t appendEventMultipleKernelIndirectEventHandleValue = nullptr;
|
ze_event_handle_t appendEventMultipleKernelIndirectEventHandleValue = nullptr;
|
||||||
ze_event_handle_t appendEventKernelIndirectEventHandleValue = nullptr;
|
ze_event_handle_t appendEventKernelIndirectEventHandleValue = nullptr;
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -998,130 +998,6 @@ HWTEST2_F(CommandListTest, givenComputeCommandListWhenMemoryFillRequiresMultiKer
|
|||||||
context->freeMem(dstBuffer);
|
context->freeMem(dstBuffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
using IsPlatformSklToDg1 = IsWithinProducts<IGFX_SKYLAKE, IGFX_DG1>;
|
|
||||||
HWTEST2_F(CommandListTest, givenComputeCommandListWhenMemoryCopyInUsmDeviceAllocationThenSplitFlagIsSetAndHeapsEstimationIsProper, IsPlatformSklToDg1) {
|
|
||||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
|
||||||
commandList->isFlushTaskSubmissionEnabled = true;
|
|
||||||
commandList->immediateCmdListHeapSharing = true;
|
|
||||||
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
|
||||||
commandList->commandContainer.setImmediateCmdListCsr(device->getNEODevice()->getDefaultEngine().commandStreamReceiver);
|
|
||||||
|
|
||||||
constexpr size_t size = 4096u;
|
|
||||||
constexpr size_t alignment = 0;
|
|
||||||
void *dstBuffer = nullptr;
|
|
||||||
|
|
||||||
ze_device_mem_alloc_desc_t deviceDesc = {};
|
|
||||||
auto result = context->allocDeviceMem(device->toHandle(),
|
|
||||||
&deviceDesc,
|
|
||||||
size, alignment, &dstBuffer);
|
|
||||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
|
||||||
|
|
||||||
void *srcPtr = reinterpret_cast<void *>(0x1234);
|
|
||||||
|
|
||||||
auto &cmdContainer = commandList->commandContainer;
|
|
||||||
auto csrDshHeap = &device->getNEODevice()->getDefaultEngine().commandStreamReceiver->getIndirectHeap(HeapType::DYNAMIC_STATE, MemoryConstants::pageSize64k);
|
|
||||||
auto csrSshHeap = &device->getNEODevice()->getDefaultEngine().commandStreamReceiver->getIndirectHeap(HeapType::SURFACE_STATE, MemoryConstants::pageSize64k);
|
|
||||||
|
|
||||||
size_t dshUsed = csrDshHeap->getUsed();
|
|
||||||
size_t sshUsed = csrSshHeap->getUsed();
|
|
||||||
|
|
||||||
commandList->appendMemoryCopy(dstBuffer, srcPtr, 0x101, nullptr, 0, nullptr, false, false);
|
|
||||||
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
|
|
||||||
EXPECT_TRUE(commandList->usedKernelLaunchParams.isKernelSplitOperation);
|
|
||||||
EXPECT_FALSE(commandList->usedKernelLaunchParams.isDestinationAllocationInSystemMemory);
|
|
||||||
|
|
||||||
// As numKernelsExecutedInSplitLaunch is incremented after split kernel launch. But we are storing usedKernelLaunchParams before actual split kernel launch.
|
|
||||||
// Hence below comparison tells that actually (usedKernelLaunchParams.numKernelsExecutedInSplitLaunch + 1) split kernels are launched
|
|
||||||
EXPECT_EQ(commandList->usedKernelLaunchParams.numKernelsInSplitLaunch, commandList->usedKernelLaunchParams.numKernelsExecutedInSplitLaunch + 1);
|
|
||||||
|
|
||||||
size_t dshEstimated = NEO::EncodeDispatchKernel<FamilyType>::getSizeRequiredDsh(
|
|
||||||
commandList->firstKernelInSplitOperation->getKernelDescriptor(),
|
|
||||||
cmdContainer.getNumIddPerBlock());
|
|
||||||
size_t sshEstimated = NEO::EncodeDispatchKernel<FamilyType>::getSizeRequiredSsh(*commandList->firstKernelInSplitOperation->getImmutableData()->getKernelInfo());
|
|
||||||
|
|
||||||
auto expectedDshToBeConsumed = dshEstimated * commandList->usedKernelLaunchParams.numKernelsInSplitLaunch;
|
|
||||||
auto expectedSshToBeConsumed = sshEstimated * commandList->usedKernelLaunchParams.numKernelsInSplitLaunch;
|
|
||||||
auto consumedDsh1 = csrDshHeap->getUsed();
|
|
||||||
auto consumedSsh1 = csrSshHeap->getUsed();
|
|
||||||
|
|
||||||
EXPECT_EQ(expectedDshToBeConsumed, (consumedDsh1 - dshUsed));
|
|
||||||
EXPECT_EQ(expectedSshToBeConsumed, (consumedSsh1 - sshUsed));
|
|
||||||
|
|
||||||
context->freeMem(dstBuffer);
|
|
||||||
}
|
|
||||||
|
|
||||||
HWTEST2_F(CommandListTest, givenComputeCommandListWhenMemoryFillRequiresMultiKernelsThenSplitFlagIsSetAndHeapsEstimationIsProper, IsPlatformSklToDg1) {
|
|
||||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
|
||||||
commandList->isFlushTaskSubmissionEnabled = true;
|
|
||||||
commandList->immediateCmdListHeapSharing = true;
|
|
||||||
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
|
||||||
commandList->commandContainer.setImmediateCmdListCsr(device->getNEODevice()->getDefaultEngine().commandStreamReceiver);
|
|
||||||
|
|
||||||
constexpr size_t patternSize = 8;
|
|
||||||
uint8_t pattern[patternSize] = {1, 2, 3, 4};
|
|
||||||
|
|
||||||
constexpr size_t size = 4096u;
|
|
||||||
constexpr size_t alignment = 4096u;
|
|
||||||
void *dstBuffer = nullptr;
|
|
||||||
|
|
||||||
ze_device_mem_alloc_desc_t deviceDesc = {};
|
|
||||||
auto result = context->allocDeviceMem(device->toHandle(),
|
|
||||||
&deviceDesc,
|
|
||||||
size, alignment, &dstBuffer);
|
|
||||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
|
||||||
|
|
||||||
constexpr size_t fillSize = size - 1;
|
|
||||||
|
|
||||||
auto &cmdContainer = commandList->commandContainer;
|
|
||||||
auto csrDshHeap = &device->getNEODevice()->getDefaultEngine().commandStreamReceiver->getIndirectHeap(HeapType::DYNAMIC_STATE, MemoryConstants::pageSize64k);
|
|
||||||
auto csrSshHeap = &device->getNEODevice()->getDefaultEngine().commandStreamReceiver->getIndirectHeap(HeapType::SURFACE_STATE, MemoryConstants::pageSize64k);
|
|
||||||
|
|
||||||
size_t dshUsed = csrDshHeap->getUsed();
|
|
||||||
size_t sshUsed = csrSshHeap->getUsed();
|
|
||||||
|
|
||||||
commandList->appendMemoryFill(dstBuffer, pattern, patternSize, fillSize, nullptr, 0, nullptr, false);
|
|
||||||
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
|
|
||||||
EXPECT_TRUE(commandList->usedKernelLaunchParams.isKernelSplitOperation);
|
|
||||||
EXPECT_FALSE(commandList->usedKernelLaunchParams.isDestinationAllocationInSystemMemory);
|
|
||||||
|
|
||||||
// As numKernelsExecutedInSplitLaunch is incremented after split kernel launch. But we are storing usedKernelLaunchParams before actual split kernel launch.
|
|
||||||
// Hence below comparison tells that actually (usedKernelLaunchParams.numKernelsExecutedInSplitLaunch + 1) split kernels are launched
|
|
||||||
EXPECT_EQ(commandList->usedKernelLaunchParams.numKernelsInSplitLaunch, commandList->usedKernelLaunchParams.numKernelsExecutedInSplitLaunch + 1);
|
|
||||||
|
|
||||||
size_t dshEstimated = NEO::EncodeDispatchKernel<FamilyType>::getSizeRequiredDsh(
|
|
||||||
commandList->firstKernelInSplitOperation->getKernelDescriptor(),
|
|
||||||
cmdContainer.getNumIddPerBlock());
|
|
||||||
size_t sshEstimated = NEO::EncodeDispatchKernel<FamilyType>::getSizeRequiredSsh(*commandList->firstKernelInSplitOperation->getImmutableData()->getKernelInfo());
|
|
||||||
|
|
||||||
auto expectedDshToBeConsumed = dshEstimated * commandList->usedKernelLaunchParams.numKernelsInSplitLaunch;
|
|
||||||
auto expectedSshToBeConsumed = sshEstimated * commandList->usedKernelLaunchParams.numKernelsInSplitLaunch;
|
|
||||||
auto consumedDsh1 = csrDshHeap->getUsed();
|
|
||||||
auto consumedSsh1 = csrSshHeap->getUsed();
|
|
||||||
|
|
||||||
EXPECT_EQ(expectedDshToBeConsumed, (consumedDsh1 - dshUsed));
|
|
||||||
EXPECT_EQ(expectedSshToBeConsumed, (consumedSsh1 - sshUsed));
|
|
||||||
|
|
||||||
commandList->appendMemoryFill(dstBuffer, pattern, 1, fillSize, nullptr, 0, nullptr, false);
|
|
||||||
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
|
|
||||||
EXPECT_TRUE(commandList->usedKernelLaunchParams.isKernelSplitOperation);
|
|
||||||
EXPECT_FALSE(commandList->usedKernelLaunchParams.isDestinationAllocationInSystemMemory);
|
|
||||||
EXPECT_EQ(commandList->usedKernelLaunchParams.numKernelsInSplitLaunch, commandList->usedKernelLaunchParams.numKernelsExecutedInSplitLaunch + 1);
|
|
||||||
|
|
||||||
dshEstimated = NEO::EncodeDispatchKernel<FamilyType>::getSizeRequiredDsh(
|
|
||||||
commandList->firstKernelInSplitOperation->getKernelDescriptor(),
|
|
||||||
cmdContainer.getNumIddPerBlock());
|
|
||||||
sshEstimated = NEO::EncodeDispatchKernel<FamilyType>::getSizeRequiredSsh(*commandList->firstKernelInSplitOperation->getImmutableData()->getKernelInfo());
|
|
||||||
|
|
||||||
expectedDshToBeConsumed = dshEstimated * commandList->usedKernelLaunchParams.numKernelsInSplitLaunch;
|
|
||||||
expectedSshToBeConsumed = sshEstimated * commandList->usedKernelLaunchParams.numKernelsInSplitLaunch;
|
|
||||||
auto consumedDsh2 = csrDshHeap->getUsed();
|
|
||||||
auto consumedSsh2 = csrSshHeap->getUsed();
|
|
||||||
EXPECT_EQ(expectedDshToBeConsumed, (consumedDsh2 - consumedDsh1));
|
|
||||||
EXPECT_EQ(expectedSshToBeConsumed, (consumedSsh2 - consumedSsh1));
|
|
||||||
|
|
||||||
context->freeMem(dstBuffer);
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST(CommandList, whenAsMutableIsCalledNullptrIsReturned) {
|
TEST(CommandList, whenAsMutableIsCalledNullptrIsReturned) {
|
||||||
MockCommandList cmdList;
|
MockCommandList cmdList;
|
||||||
EXPECT_EQ(nullptr, cmdList.asMutable());
|
EXPECT_EQ(nullptr, cmdList.asMutable());
|
||||||
|
|||||||
@@ -156,51 +156,6 @@ HWTEST2_F(singleAddressSpaceModeTest, givenImmediateCommandListWhenExecutingWith
|
|||||||
commandList->destroy();
|
commandList->destroy();
|
||||||
}
|
}
|
||||||
|
|
||||||
HWTEST2_F(singleAddressSpaceModeTest, givenUseCsrImmediateSubmissionEnabledAndSharedHeapsDisbledForImmediateCommandListWhenExecutingWithFlushTaskThenGPR15isProgrammed, Gen12Plus) {
|
|
||||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
|
||||||
Mock<::L0::KernelImp> kernel;
|
|
||||||
DebugManagerStateRestore restorer;
|
|
||||||
NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true);
|
|
||||||
NEO::DebugManager.flags.EnableImmediateCmdListHeapSharing.set(0);
|
|
||||||
NEO::DebugManager.flags.UseImmediateFlushTask.set(0);
|
|
||||||
|
|
||||||
ze_command_queue_desc_t queueDesc = {};
|
|
||||||
ze_result_t returnValue = ZE_RESULT_SUCCESS;
|
|
||||||
ze_group_count_t groupCount{1, 1, 1};
|
|
||||||
|
|
||||||
auto &csr = neoDevice->getUltCommandStreamReceiver<FamilyType>();
|
|
||||||
csr.storeMakeResidentAllocations = true;
|
|
||||||
|
|
||||||
auto commandList = whiteboxCast(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::RenderCompute, returnValue));
|
|
||||||
|
|
||||||
EXPECT_TRUE(commandList->isFlushTaskSubmissionEnabled);
|
|
||||||
EXPECT_EQ(&csr, commandList->csr);
|
|
||||||
|
|
||||||
csr.lastFlushedCommandStream = nullptr;
|
|
||||||
CmdListKernelLaunchParams launchParams = {};
|
|
||||||
auto result = commandList->appendLaunchKernel(kernel.toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
|
|
||||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
|
||||||
|
|
||||||
EXPECT_NE(nullptr, csr.lastFlushedCommandStream);
|
|
||||||
|
|
||||||
GenCmdList cmdList;
|
|
||||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
|
||||||
cmdList, commandList->csr->getCS().getCpuBase(), commandList->csr->getCS().getUsed()));
|
|
||||||
bool gpr15Found = false;
|
|
||||||
auto miLoadImm = findAll<MI_LOAD_REGISTER_IMM *>(cmdList.begin(), cmdList.end());
|
|
||||||
for (size_t i = 0; i < miLoadImm.size(); i++) {
|
|
||||||
MI_LOAD_REGISTER_IMM *miLoad = genCmdCast<MI_LOAD_REGISTER_IMM *>(*miLoadImm[i]);
|
|
||||||
ASSERT_NE(nullptr, miLoad);
|
|
||||||
|
|
||||||
if (miLoad->getRegisterOffset() == CS_GPR_R15) {
|
|
||||||
gpr15Found = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
EXPECT_TRUE(gpr15Found);
|
|
||||||
commandList->destroy();
|
|
||||||
}
|
|
||||||
|
|
||||||
HWTEST2_P(L0DebuggerWithBlitterTest, givenImmediateCommandListWhenExecutingWithFlushTaskThenSipIsInstalledAndDebuggerAllocationsAreResident, Gen12Plus) {
|
HWTEST2_P(L0DebuggerWithBlitterTest, givenImmediateCommandListWhenExecutingWithFlushTaskThenSipIsInstalledAndDebuggerAllocationsAreResident, Gen12Plus) {
|
||||||
using STATE_SIP = typename FamilyType::STATE_SIP;
|
using STATE_SIP = typename FamilyType::STATE_SIP;
|
||||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||||
|
|||||||
@@ -736,7 +736,6 @@ size_t EncodeDispatchKernel<Family>::getSizeRequiredDsh(const KernelDescriptor &
|
|||||||
size = alignUp(size, INTERFACE_DESCRIPTOR_DATA::SAMPLERSTATEPOINTER_ALIGN_SIZE);
|
size = alignUp(size, INTERFACE_DESCRIPTOR_DATA::SAMPLERSTATEPOINTER_ALIGN_SIZE);
|
||||||
|
|
||||||
if (additionalDshSize > 0) {
|
if (additionalDshSize > 0) {
|
||||||
size = alignUp(size, EncodeStates<Family>::alignInterfaceDescriptorData);
|
|
||||||
size += additionalDshSize;
|
size += additionalDshSize;
|
||||||
size = alignUp(size, EncodeDispatchKernel<Family>::getDefaultDshAlignment());
|
size = alignUp(size, EncodeDispatchKernel<Family>::getDefaultDshAlignment());
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user