mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-10 12:53:42 +08:00
Pass DispatchInfo to estimation functions
Related-To: NEO-5546 Signed-off-by: Maciej Dziuban <maciej.dziuban@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
bfd9aba350
commit
1350aa52fb
@ -204,7 +204,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, WhenAppendingFunctionThenUsedCmdBufferS
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto sizeAfter = commandList->commandContainer.getCommandStream()->getUsed();
|
||||
auto estimate = NEO::EncodeDispatchKernel<FamilyType>::estimateEncodeDispatchKernelCmdsSize(device->getNEODevice());
|
||||
auto estimate = NEO::EncodeDispatchKernel<FamilyType>::estimateEncodeDispatchKernelCmdsSize(device->getNEODevice(), Vec3<size_t>(0, 0, 0), Vec3<size_t>(1, 1, 1));
|
||||
|
||||
EXPECT_LE(sizeAfter - sizeBefore, estimate);
|
||||
|
||||
@ -214,7 +214,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, WhenAppendingFunctionThenUsedCmdBufferS
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
sizeAfter = commandList->commandContainer.getCommandStream()->getUsed();
|
||||
estimate = NEO::EncodeDispatchKernel<FamilyType>::estimateEncodeDispatchKernelCmdsSize(device->getNEODevice());
|
||||
estimate = NEO::EncodeDispatchKernel<FamilyType>::estimateEncodeDispatchKernelCmdsSize(device->getNEODevice(), Vec3<size_t>(0, 0, 0), Vec3<size_t>(1, 1, 1));
|
||||
|
||||
EXPECT_LE(sizeAfter - sizeBefore, estimate);
|
||||
EXPECT_LE(sizeAfter - sizeBefore, estimate);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -90,7 +90,8 @@ void provideLocalWorkGroupSizeHints(Context *context, DispatchInfo dispatchInfo)
|
||||
void setSpecialWorkgroupSize(size_t workgroupSize[3]);
|
||||
|
||||
inline cl_uint computeDimensions(const size_t workItems[3]) {
|
||||
return (workItems[2] > 1) ? 3 : (workItems[1] > 1) ? 2 : 1;
|
||||
return (workItems[2] > 1) ? 3 : (workItems[1] > 1) ? 2
|
||||
: 1;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
@ -167,12 +168,12 @@ template <typename GfxFamily>
|
||||
struct EnqueueOperation {
|
||||
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
|
||||
static size_t getTotalSizeRequiredCS(uint32_t eventType, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounters, bool blitEnqueue, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo);
|
||||
static size_t getSizeRequiredCS(uint32_t cmdType, bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue, const Kernel *pKernel);
|
||||
static size_t getSizeRequiredCS(uint32_t cmdType, bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue, const Kernel *pKernel, const DispatchInfo &dispatchInfo);
|
||||
static size_t getSizeRequiredForTimestampPacketWrite();
|
||||
static size_t getSizeForCacheFlushAfterWalkerCommands(const Kernel &kernel, const CommandQueue &commandQueue);
|
||||
|
||||
private:
|
||||
static size_t getSizeRequiredCSKernel(bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue, const Kernel *pKernel);
|
||||
static size_t getSizeRequiredCSKernel(bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue, const Kernel *pKernel, const DispatchInfo &dispatchInfo);
|
||||
static size_t getSizeRequiredCSNonKernel(bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue);
|
||||
};
|
||||
|
||||
|
@ -193,14 +193,14 @@ size_t EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(uint32_t eventType, c
|
||||
|
||||
Kernel *parentKernel = multiDispatchInfo.peekParentKernel();
|
||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredCS(eventType, reserveProfilingCmdsSpace, reservePerfCounters, commandQueue, dispatchInfo.getKernel());
|
||||
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredCS(eventType, reserveProfilingCmdsSpace, reservePerfCounters, commandQueue, dispatchInfo.getKernel(), dispatchInfo);
|
||||
size_t kernelObjAuxCount = multiDispatchInfo.getKernelObjsForAuxTranslation() != nullptr ? multiDispatchInfo.getKernelObjsForAuxTranslation()->size() : 0;
|
||||
expectedSizeCS += dispatchInfo.dispatchInitCommands.estimateCommandsSize(kernelObjAuxCount, hwInfo, commandQueueHw.isCacheFlushForBcsRequired());
|
||||
expectedSizeCS += dispatchInfo.dispatchEpilogueCommands.estimateCommandsSize(kernelObjAuxCount, hwInfo, commandQueueHw.isCacheFlushForBcsRequired());
|
||||
}
|
||||
if (parentKernel) {
|
||||
SchedulerKernel &scheduler = commandQueue.getContext().getSchedulerKernel();
|
||||
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredCS(eventType, reserveProfilingCmdsSpace, reservePerfCounters, commandQueue, &scheduler);
|
||||
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredCS(eventType, reserveProfilingCmdsSpace, reservePerfCounters, commandQueue, &scheduler, DispatchInfo{});
|
||||
}
|
||||
if (commandQueue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
||||
expectedSizeCS += TimestampPacketHelper::getRequiredCmdStreamSize<GfxFamily>(csrDeps);
|
||||
@ -223,11 +223,11 @@ size_t EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(uint32_t eventType, c
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t EnqueueOperation<GfxFamily>::getSizeRequiredCS(uint32_t cmdType, bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue, const Kernel *pKernel) {
|
||||
size_t EnqueueOperation<GfxFamily>::getSizeRequiredCS(uint32_t cmdType, bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue, const Kernel *pKernel, const DispatchInfo &dispatchInfo) {
|
||||
if (isCommandWithoutKernel(cmdType)) {
|
||||
return EnqueueOperation<GfxFamily>::getSizeRequiredCSNonKernel(reserveProfilingCmdsSpace, reservePerfCounters, commandQueue);
|
||||
} else {
|
||||
return EnqueueOperation<GfxFamily>::getSizeRequiredCSKernel(reserveProfilingCmdsSpace, reservePerfCounters, commandQueue, pKernel);
|
||||
return EnqueueOperation<GfxFamily>::getSizeRequiredCSKernel(reserveProfilingCmdsSpace, reservePerfCounters, commandQueue, pKernel, dispatchInfo);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -186,7 +186,7 @@ void GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket(
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t EnqueueOperation<GfxFamily>::getSizeRequiredCSKernel(bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue, const Kernel *pKernel) {
|
||||
size_t EnqueueOperation<GfxFamily>::getSizeRequiredCSKernel(bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue, const Kernel *pKernel, const DispatchInfo &dispatchInfo) {
|
||||
size_t size = sizeof(typename GfxFamily::GPGPU_WALKER) + HardwareCommandsHelper<GfxFamily>::getSizeRequiredCS(pKernel) +
|
||||
sizeof(PIPE_CONTROL) * (MemorySynchronizationCommands<GfxFamily>::isPipeControlWArequired(commandQueue.getDevice().getHardwareInfo()) ? 2 : 1);
|
||||
size += HardwareCommandsHelper<GfxFamily>::getSizeRequiredForCacheFlush(commandQueue, pKernel, 0U);
|
||||
|
@ -268,7 +268,7 @@ HWTEST_F(BarrierTest, givenEmptyCommandStreamAndBlockedBarrierCommandWhenUserEve
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
// Consume all memory except what is needed for this enqueue
|
||||
size_t barrierCmdStreamSize = NEO::EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_BARRIER, false, false, *pCmdQ, nullptr);
|
||||
size_t barrierCmdStreamSize = NEO::EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_BARRIER, false, false, *pCmdQ, nullptr, {});
|
||||
commandStream.getSpace(commandStream.getMaxAvailableSpace() - barrierCmdStreamSize);
|
||||
|
||||
//now trigger event
|
||||
|
@ -436,7 +436,7 @@ HWTEST_F(GetSizeRequiredBufferTest, GivenHelloWorldKernelWhenEnqueingKernelThenH
|
||||
auto iohAfter = pIOH->getUsed();
|
||||
auto sshAfter = pSSH->getUsed();
|
||||
|
||||
auto expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *pCmdQ, KernelFixture::pKernel);
|
||||
auto expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *pCmdQ, KernelFixture::pKernel, {});
|
||||
auto expectedSizeDSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredDSH(rootDeviceIndex, *KernelFixture::pKernel);
|
||||
auto expectedSizeIOH = HardwareCommandsHelper<FamilyType>::getSizeRequiredIOH(rootDeviceIndex, *KernelFixture::pKernel, workSize[0]);
|
||||
auto expectedSizeSSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredSSH(*KernelFixture::pKernel, rootDeviceIndex);
|
||||
@ -475,7 +475,7 @@ HWTEST_F(GetSizeRequiredBufferTest, GivenKernelWithSimpleArgWhenEnqueingKernelTh
|
||||
auto iohAfter = pIOH->getUsed();
|
||||
auto sshAfter = pSSH->getUsed();
|
||||
|
||||
auto expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *pCmdQ, KernelFixture::pKernel);
|
||||
auto expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *pCmdQ, KernelFixture::pKernel, {});
|
||||
auto expectedSizeDSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredDSH(rootDeviceIndex, *KernelFixture::pKernel);
|
||||
auto expectedSizeIOH = HardwareCommandsHelper<FamilyType>::getSizeRequiredIOH(rootDeviceIndex, *KernelFixture::pKernel, workSize[0]);
|
||||
auto expectedSizeSSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredSSH(*KernelFixture::pKernel, rootDeviceIndex);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
* Copyright (C) 2017-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -94,7 +94,7 @@ HWTEST_F(GetSizeRequiredImageTest, WhenCopyingImageThenHeapsAndCommandBufferCons
|
||||
auto usedAfterIOH = ioh.getUsed();
|
||||
auto usedAfterSSH = ssh.getUsed();
|
||||
|
||||
auto expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_COPY_IMAGE, false, false, *pCmdQ, kernel);
|
||||
auto expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_COPY_IMAGE, false, false, *pCmdQ, kernel, {});
|
||||
auto expectedSizeDSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredDSH(rootDeviceIndex, *kernel);
|
||||
auto expectedSizeIOH = HardwareCommandsHelper<FamilyType>::getSizeRequiredIOH(rootDeviceIndex, *kernel);
|
||||
auto expectedSizeSSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredSSH(*kernel, rootDeviceIndex);
|
||||
@ -139,7 +139,7 @@ HWTEST_F(GetSizeRequiredImageTest, WhenCopyingReadWriteImageThenHeapsAndCommandB
|
||||
auto usedAfterIOH = ioh.getUsed();
|
||||
auto usedAfterSSH = ssh.getUsed();
|
||||
|
||||
auto expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_COPY_IMAGE, false, false, *pCmdQ, kernel.get());
|
||||
auto expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_COPY_IMAGE, false, false, *pCmdQ, kernel.get(), {});
|
||||
auto expectedSizeDSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredDSH(rootDeviceIndex, *kernel.get());
|
||||
auto expectedSizeIOH = HardwareCommandsHelper<FamilyType>::getSizeRequiredIOH(rootDeviceIndex, *kernel.get());
|
||||
auto expectedSizeSSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredSSH(*kernel.get(), rootDeviceIndex);
|
||||
@ -196,7 +196,7 @@ HWTEST_F(GetSizeRequiredImageTest, WhenReadingImageNonBlockingThenHeapsAndComman
|
||||
auto usedAfterIOH = ioh.getUsed();
|
||||
auto usedAfterSSH = ssh.getUsed();
|
||||
|
||||
auto expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_READ_IMAGE, false, false, *pCmdQ, kernel);
|
||||
auto expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_READ_IMAGE, false, false, *pCmdQ, kernel, {});
|
||||
auto expectedSizeDSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredDSH(rootDeviceIndex, *kernel);
|
||||
auto expectedSizeIOH = HardwareCommandsHelper<FamilyType>::getSizeRequiredIOH(rootDeviceIndex, *kernel);
|
||||
auto expectedSizeSSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredSSH(*kernel, rootDeviceIndex);
|
||||
@ -251,7 +251,7 @@ HWTEST_F(GetSizeRequiredImageTest, WhenReadingImageBlockingThenHeapsAndCommandBu
|
||||
auto usedAfterIOH = ioh.getUsed();
|
||||
auto usedAfterSSH = ssh.getUsed();
|
||||
|
||||
auto expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_READ_IMAGE, false, false, *pCmdQ, kernel);
|
||||
auto expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_READ_IMAGE, false, false, *pCmdQ, kernel, {});
|
||||
auto expectedSizeDSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredDSH(rootDeviceIndex, *kernel);
|
||||
auto expectedSizeIOH = HardwareCommandsHelper<FamilyType>::getSizeRequiredIOH(rootDeviceIndex, *kernel);
|
||||
auto expectedSizeSSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredSSH(*kernel, rootDeviceIndex);
|
||||
@ -306,7 +306,7 @@ HWTEST_F(GetSizeRequiredImageTest, WhenWritingImageNonBlockingThenHeapsAndComman
|
||||
auto usedAfterIOH = ioh.getUsed();
|
||||
auto usedAfterSSH = ssh.getUsed();
|
||||
|
||||
auto expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_WRITE_IMAGE, false, false, *pCmdQ, kernel);
|
||||
auto expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_WRITE_IMAGE, false, false, *pCmdQ, kernel, {});
|
||||
auto expectedSizeDSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredDSH(rootDeviceIndex, *kernel);
|
||||
auto expectedSizeIOH = HardwareCommandsHelper<FamilyType>::getSizeRequiredIOH(rootDeviceIndex, *kernel);
|
||||
auto expectedSizeSSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredSSH(*kernel, rootDeviceIndex);
|
||||
@ -361,7 +361,7 @@ HWTEST_F(GetSizeRequiredImageTest, WhenWritingImageBlockingThenHeapsAndCommandBu
|
||||
auto usedAfterIOH = ioh.getUsed();
|
||||
auto usedAfterSSH = ssh.getUsed();
|
||||
|
||||
auto expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_WRITE_IMAGE, false, false, *pCmdQ, kernel);
|
||||
auto expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_WRITE_IMAGE, false, false, *pCmdQ, kernel, {});
|
||||
auto expectedSizeDSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredDSH(rootDeviceIndex, *kernel);
|
||||
auto expectedSizeIOH = HardwareCommandsHelper<FamilyType>::getSizeRequiredIOH(rootDeviceIndex, *kernel);
|
||||
auto expectedSizeSSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredSSH(*kernel, rootDeviceIndex);
|
||||
|
@ -195,13 +195,13 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandStreamFixture, GivenDispatchInfoW
|
||||
DispatchInfo dispatchInfo(device, mockParentKernel, 1, Vec3<size_t>{24, 1, 1}, Vec3<size_t>{24, 1, 1}, Vec3<size_t>{0, 0, 0});
|
||||
MultiDispatchInfo multiDispatchInfo(mockParentKernel);
|
||||
|
||||
size_t size = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *pCmdQ, mockParentKernel);
|
||||
size_t size = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *pCmdQ, mockParentKernel, {});
|
||||
size_t numOfKernels = MemoryConstants::pageSize / size;
|
||||
|
||||
size_t rest = MemoryConstants::pageSize - (numOfKernels * size);
|
||||
|
||||
SchedulerKernel &scheduler = pCmdQ->getContext().getSchedulerKernel();
|
||||
size_t schedulerSize = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *pCmdQ, &scheduler);
|
||||
size_t schedulerSize = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *pCmdQ, &scheduler, {});
|
||||
|
||||
while (rest >= schedulerSize) {
|
||||
numOfKernels++;
|
||||
|
@ -80,13 +80,13 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfilingAndFor
|
||||
MultiDispatchInfo multiDispatchInfo(&kernel);
|
||||
auto &commandStreamNDRangeKernel = getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*pCmdQ, CsrDependencies(), true, false, false,
|
||||
multiDispatchInfo, nullptr, 0);
|
||||
auto expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, true, false, *pCmdQ, &kernel);
|
||||
auto expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, true, false, *pCmdQ, &kernel, {});
|
||||
EXPECT_GE(expectedSizeCS, requiredSize);
|
||||
EXPECT_GE(commandStreamNDRangeKernel.getAvailableSpace(), requiredSize);
|
||||
|
||||
auto &commandStreamTask = getCommandStream<FamilyType, CL_COMMAND_TASK>(*pCmdQ, CsrDependencies(), true, false, false,
|
||||
multiDispatchInfo, nullptr, 0);
|
||||
expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_TASK, true, false, *pCmdQ, &kernel);
|
||||
expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_TASK, true, false, *pCmdQ, &kernel, {});
|
||||
EXPECT_GE(expectedSizeCS, requiredSize);
|
||||
EXPECT_GE(commandStreamTask.getAvailableSpace(), requiredSize);
|
||||
}
|
||||
@ -102,13 +102,13 @@ HWTEST_F(ProfilingTests, GivenCommandQueueWithProfilingAndForWorkloadWithNoKerne
|
||||
auto &commandStreamMigrateMemObjects = getCommandStream<FamilyType, CL_COMMAND_MIGRATE_MEM_OBJECTS>(*pCmdQ, CsrDependencies(),
|
||||
true, false, false,
|
||||
multiDispatchInfo, nullptr, 0);
|
||||
auto expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_MIGRATE_MEM_OBJECTS, true, false, *pCmdQ, nullptr);
|
||||
auto expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_MIGRATE_MEM_OBJECTS, true, false, *pCmdQ, nullptr, {});
|
||||
EXPECT_GE(expectedSizeCS, requiredSize);
|
||||
EXPECT_GE(commandStreamMigrateMemObjects.getAvailableSpace(), requiredSize);
|
||||
|
||||
auto &commandStreamMarker = getCommandStream<FamilyType, CL_COMMAND_MARKER>(*pCmdQ, CsrDependencies(), true,
|
||||
false, false, multiDispatchInfo, nullptr, 0);
|
||||
expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_MARKER, true, false, *pCmdQ, nullptr);
|
||||
expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_MARKER, true, false, *pCmdQ, nullptr, {});
|
||||
EXPECT_GE(expectedSizeCS, requiredSize);
|
||||
EXPECT_GE(commandStreamMarker.getAvailableSpace(), requiredSize);
|
||||
}
|
||||
@ -665,13 +665,13 @@ HWTEST_F(ProfilingWithPerfCountersTests, GivenCommandQueueWithProfilingPerfCount
|
||||
auto &commandStreamMigrateMemObjects = getCommandStream<FamilyType, CL_COMMAND_MIGRATE_MEM_OBJECTS>(*pCmdQ, CsrDependencies(),
|
||||
true, true, false, multiDispatchInfo,
|
||||
nullptr, 0);
|
||||
auto expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_MIGRATE_MEM_OBJECTS, true, true, *pCmdQ, nullptr);
|
||||
auto expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_MIGRATE_MEM_OBJECTS, true, true, *pCmdQ, nullptr, {});
|
||||
EXPECT_GE(expectedSizeCS, requiredSize);
|
||||
EXPECT_GE(commandStreamMigrateMemObjects.getAvailableSpace(), requiredSize);
|
||||
|
||||
auto &commandStreamMarker = getCommandStream<FamilyType, CL_COMMAND_MARKER>(*pCmdQ, CsrDependencies(), true, true, false,
|
||||
multiDispatchInfo, nullptr, 0);
|
||||
expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_MARKER, true, true, *pCmdQ, nullptr);
|
||||
expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_MARKER, true, true, *pCmdQ, nullptr, {});
|
||||
EXPECT_GE(expectedSizeCS, requiredSize);
|
||||
EXPECT_GE(commandStreamMarker.getAvailableSpace(), requiredSize);
|
||||
}
|
||||
|
@ -46,7 +46,7 @@ struct EncodeDispatchKernel {
|
||||
|
||||
static void *getInterfaceDescriptor(CommandContainer &container, uint32_t &iddOffset);
|
||||
|
||||
static size_t estimateEncodeDispatchKernelCmdsSize(Device *device);
|
||||
static size_t estimateEncodeDispatchKernelCmdsSize(Device *device, Vec3<size_t> groupStart, Vec3<size_t> groupCount);
|
||||
|
||||
static bool isRuntimeLocalIdsGenerationRequired(uint32_t activeChannels,
|
||||
size_t *lws,
|
||||
|
@ -43,7 +43,13 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
|
||||
LinearStream *listCmdBufferStream = container.getCommandStream();
|
||||
size_t sshOffset = 0;
|
||||
|
||||
size_t estimatedSizeRequired = estimateEncodeDispatchKernelCmdsSize(device);
|
||||
auto threadDims = static_cast<const uint32_t *>(pThreadGroupDimensions);
|
||||
const Vec3<size_t> threadStartVec{0, 0, 0};
|
||||
Vec3<size_t> threadDimsVec{0, 0, 0};
|
||||
if (threadDims != nullptr) {
|
||||
threadDimsVec = {threadDims[0], threadDims[1], threadDims[2]};
|
||||
}
|
||||
size_t estimatedSizeRequired = estimateEncodeDispatchKernelCmdsSize(device, threadStartVec, threadDimsVec);
|
||||
if (container.getCommandStream()->getAvailableSpace() < estimatedSizeRequired) {
|
||||
auto bbEnd = listCmdBufferStream->getSpaceForCmd<MI_BATCH_BUFFER_END>();
|
||||
*bbEnd = Family::cmdInitBatchBufferEnd;
|
||||
@ -191,7 +197,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
|
||||
|
||||
EncodeDispatchKernel<Family>::encodeThreadData(cmd,
|
||||
nullptr,
|
||||
static_cast<const uint32_t *>(pThreadGroupDimensions),
|
||||
threadDims,
|
||||
dispatchInterface->getGroupSize(),
|
||||
kernelDescriptor.kernelAttributes.simdSize,
|
||||
kernelDescriptor.kernelAttributes.numLocalIdChannels,
|
||||
@ -316,7 +322,7 @@ template <typename Family>
|
||||
void EncodeDispatchKernel<Family>::appendAdditionalIDDFields(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const HardwareInfo &hwInfo, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) {}
|
||||
|
||||
template <typename Family>
|
||||
size_t EncodeDispatchKernel<Family>::estimateEncodeDispatchKernelCmdsSize(Device *device) {
|
||||
size_t EncodeDispatchKernel<Family>::estimateEncodeDispatchKernelCmdsSize(Device *device, Vec3<size_t> groupStart, Vec3<size_t> groupCount) {
|
||||
using MEDIA_STATE_FLUSH = typename Family::MEDIA_STATE_FLUSH;
|
||||
using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename Family::MEDIA_INTERFACE_DESCRIPTOR_LOAD;
|
||||
using MI_BATCH_BUFFER_END = typename Family::MI_BATCH_BUFFER_END;
|
||||
|
@ -105,9 +105,9 @@ GEN12LPTEST_F(CommandEncoderTest, givenVariousEngineTypesWhenEstimateCommandBuff
|
||||
using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT;
|
||||
using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE;
|
||||
|
||||
auto sizeWA = EncodeDispatchKernel<FamilyType>::estimateEncodeDispatchKernelCmdsSize(pDevice);
|
||||
auto sizeWA = EncodeDispatchKernel<FamilyType>::estimateEncodeDispatchKernelCmdsSize(pDevice, Vec3<size_t>(0, 0, 0), Vec3<size_t>(1, 1, 1));
|
||||
static_cast<MockOsContext *>(pDevice->getDefaultEngine().osContext)->engineType = aub_stream::ENGINE_CCS;
|
||||
auto size = EncodeDispatchKernel<FamilyType>::estimateEncodeDispatchKernelCmdsSize(pDevice);
|
||||
auto size = EncodeDispatchKernel<FamilyType>::estimateEncodeDispatchKernelCmdsSize(pDevice, Vec3<size_t>(0, 0, 0), Vec3<size_t>(1, 1, 1));
|
||||
|
||||
auto expectedDiff = 2 * PreambleHelper<FamilyType>::getCmdSizeForPipelineSelect(pDevice->getHardwareInfo());
|
||||
auto diff = sizeWA - size;
|
||||
|
Reference in New Issue
Block a user