mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-21 01:04:57 +08:00
Command container appends BB_END on cmd buffer allocation end
When linear stream created for command container has not enough space for command and BB_END it will program BB_END and allocate new command buffer allocation. Pointer returned from getSpace in this case will return storage from new command buffer allocation. Related-To: NEO-5707 Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
92316c48f2
commit
9d8ce7aace
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2021 Intel Corporation
|
||||
* Copyright (C) 2020-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -159,7 +159,6 @@ struct CommandListCoreFamily : CommandListImp {
|
||||
ze_result_t reset() override;
|
||||
ze_result_t executeCommandListImmediate(bool performMigration) override;
|
||||
size_t getReserveSshSize();
|
||||
void increaseCommandStreamSpace(size_t commandSize);
|
||||
|
||||
protected:
|
||||
MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyKernelWithGA(void *dstPtr, NEO::GraphicsAllocation *dstPtrAlloc,
|
||||
|
||||
@@ -332,7 +332,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendEventReset(ze_event_hand
|
||||
if (this->partitionCount > 1) {
|
||||
estimateSize += estimateBufferSizeMultiTileBarrier(hwInfo);
|
||||
}
|
||||
increaseCommandStreamSpace(estimateSize);
|
||||
|
||||
for (uint32_t i = 0u; i < packetsToReset; i++) {
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
|
||||
@@ -896,13 +895,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlit(uintptr_t
|
||||
commandContainer.addToResidencyContainer(clearColorAllocation);
|
||||
|
||||
NEO::BlitPropertiesContainer blitPropertiesContainer{blitProperties};
|
||||
bool blitterDirectSubmission = true; // assume direct submission enabled, since usually MI_BATCH_BUFFER_START is bigger than MI_BATCH_BUFFER_END
|
||||
size_t estimatedSize = NEO::BlitCommandsHelper<GfxFamily>::template BlitCommandsHelper<GfxFamily>::estimateBlitCommandsSize(blitPropertiesContainer,
|
||||
false,
|
||||
false,
|
||||
blitterDirectSubmission,
|
||||
*device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]);
|
||||
increaseCommandStreamSpace(estimatedSize);
|
||||
|
||||
NEO::BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForBufferPerRow(blitProperties, *commandContainer.getCommandStream(), *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]);
|
||||
|
||||
@@ -946,13 +938,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(NEO
|
||||
}
|
||||
|
||||
NEO::BlitPropertiesContainer blitPropertiesContainer{blitProperties};
|
||||
bool blitterDirectSubmission = true; // assume direct submission enabled, since usually MI_BATCH_BUFFER_START is bigger than MI_BATCH_BUFFER_END
|
||||
size_t estimatedSize = NEO::BlitCommandsHelper<GfxFamily>::template BlitCommandsHelper<GfxFamily>::estimateBlitCommandsSize(blitPropertiesContainer,
|
||||
false,
|
||||
false,
|
||||
blitterDirectSubmission,
|
||||
*device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]);
|
||||
increaseCommandStreamSpace(estimatedSize);
|
||||
|
||||
appendEventForProfiling(hSignalEvent, true);
|
||||
bool copyRegionPreferred = NEO::BlitCommandsHelper<GfxFamily>::isCopyRegionPreferred(copySizeModified, *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]);
|
||||
@@ -1684,11 +1669,9 @@ void CommandListCoreFamily<gfxCoreFamily>::appendSignalEventPostWalker(ze_event_
|
||||
if (isCopyOnly()) {
|
||||
NEO::MiFlushArgs args;
|
||||
args.commandWithPostSync = true;
|
||||
increaseCommandStreamSpace(NEO::EncodeMiFlushDW<GfxFamily>::getMiFlushDwCmdSizeForDataWrite());
|
||||
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), baseAddr, Event::STATE_SIGNALED,
|
||||
args, hwInfo);
|
||||
} else {
|
||||
increaseCommandStreamSpace(NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(hwInfo));
|
||||
NEO::PipeControlArgs args;
|
||||
args.dcFlushEnable = NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(event->signalScope, hwInfo);
|
||||
if (this->partitionCount > 1) {
|
||||
@@ -1839,7 +1822,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_han
|
||||
if (isCopyOnly()) {
|
||||
NEO::MiFlushArgs args;
|
||||
args.commandWithPostSync = true;
|
||||
increaseCommandStreamSpace(NEO::EncodeMiFlushDW<GfxFamily>::getMiFlushDwCmdSizeForDataWrite());
|
||||
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), ptrOffset(baseAddr, eventSignalOffset),
|
||||
Event::STATE_SIGNALED, args, hwInfo);
|
||||
} else {
|
||||
@@ -1851,7 +1833,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_han
|
||||
event->setPacketsInUse(this->partitionCount);
|
||||
}
|
||||
if (applyScope || event->isEventTimestampFlagSet()) {
|
||||
increaseCommandStreamSpace(NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(hwInfo));
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
|
||||
*commandContainer.getCommandStream(),
|
||||
POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
|
||||
@@ -1860,7 +1841,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_han
|
||||
hwInfo,
|
||||
args);
|
||||
} else {
|
||||
increaseCommandStreamSpace(NEO::EncodeStoreMemory<GfxFamily>::getStoreDataImmSize());
|
||||
NEO::EncodeStoreMemory<GfxFamily>::programStoreDataImm(
|
||||
*commandContainer.getCommandStream(),
|
||||
ptrOffset(baseAddr, eventSignalOffset),
|
||||
@@ -1928,7 +1908,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
|
||||
estimatedBufferSize += NEO::EncodeSempahore<GfxFamily>::getSizeMiSemaphoreWait();
|
||||
}
|
||||
}
|
||||
increaseCommandStreamSpace(estimatedBufferSize);
|
||||
|
||||
if (dcFlushRequired) {
|
||||
if (isCopyOnly()) {
|
||||
@@ -2204,17 +2183,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::reserveSpace(size_t size, void
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::increaseCommandStreamSpace(size_t commandSize) {
|
||||
using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END;
|
||||
size_t estimatedSizeRequired = commandSize + sizeof(MI_BATCH_BUFFER_END);
|
||||
if (commandContainer.getCommandStream()->getAvailableSpace() < estimatedSizeRequired) {
|
||||
auto bbEnd = commandContainer.getCommandStream()->template getSpaceForCmd<MI_BATCH_BUFFER_END>();
|
||||
*bbEnd = GfxFamily::cmdInitBatchBufferEnd;
|
||||
commandContainer.allocateNextCommandBuffer();
|
||||
}
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::prepareIndirectParams(const ze_group_count_t *pThreadGroupDimensions) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
@@ -2353,9 +2321,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
|
||||
const auto &hwInfo = this->device->getHwInfo();
|
||||
if (!hSignalEvent) {
|
||||
if (isCopyOnly()) {
|
||||
size_t estimatedSizeRequired = NEO::EncodeMiFlushDW<GfxFamily>::getMiFlushDwCmdSizeForDataWrite();
|
||||
increaseCommandStreamSpace(estimatedSizeRequired);
|
||||
|
||||
NEO::MiFlushArgs args;
|
||||
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), 0, 0, args, hwInfo);
|
||||
} else {
|
||||
|
||||
@@ -183,9 +183,6 @@ void CommandListCoreFamily<gfxCoreFamily>::appendMultiPartitionEpilogue() {}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendComputeBarrierCommand() {
|
||||
size_t estimatedSizeRequired = NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForSinglePipeControl();
|
||||
increaseCommandStreamSpace(estimatedSizeRequired);
|
||||
|
||||
NEO::PipeControlArgs args = createBarrierFlags();
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
|
||||
}
|
||||
|
||||
@@ -134,8 +134,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
|
||||
|
||||
const auto &hwInfo = this->device->getHwInfo();
|
||||
if (NEO::DebugManager.flags.ForcePipeControlPriorToWalker.get()) {
|
||||
increaseCommandStreamSpace(NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForSinglePipeControl());
|
||||
|
||||
NEO::PipeControlArgs args;
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
|
||||
}
|
||||
@@ -245,8 +243,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
|
||||
event->setPacketsInUse(partitionCount);
|
||||
}
|
||||
if (L3FlushEnable) {
|
||||
size_t estimatedSize = NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(hwInfo);
|
||||
increaseCommandStreamSpace(estimatedSize);
|
||||
programEventL3Flush<gfxCoreFamily>(hEvent, this->device, partitionCount, commandContainer);
|
||||
}
|
||||
}
|
||||
@@ -302,16 +298,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendMultiPartitionPrologue(uint32_t partitionDataSize) {
|
||||
size_t estimatedSizeRequired = NEO::ImplicitScalingDispatch<GfxFamily>::getOffsetRegisterSize();
|
||||
increaseCommandStreamSpace(estimatedSizeRequired);
|
||||
NEO::ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(*commandContainer.getCommandStream(),
|
||||
partitionDataSize);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendMultiPartitionEpilogue() {
|
||||
const size_t estimatedSizeRequired = NEO::ImplicitScalingDispatch<GfxFamily>::getOffsetRegisterSize();
|
||||
increaseCommandStreamSpace(estimatedSizeRequired);
|
||||
NEO::ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(*commandContainer.getCommandStream(),
|
||||
NEO::ImplicitScalingDispatch<GfxFamily>::getPostSyncOffset());
|
||||
}
|
||||
@@ -320,14 +312,9 @@ template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendComputeBarrierCommand() {
|
||||
if (this->partitionCount > 1) {
|
||||
auto neoDevice = device->getNEODevice();
|
||||
auto &hwInfo = neoDevice->getHardwareInfo();
|
||||
|
||||
increaseCommandStreamSpace(estimateBufferSizeMultiTileBarrier(hwInfo));
|
||||
appendMultiTileBarrier(*neoDevice);
|
||||
} else {
|
||||
NEO::PipeControlArgs args = createBarrierFlags();
|
||||
size_t estimatedSizeRequired = NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForSinglePipeControl();
|
||||
increaseCommandStreamSpace(estimatedSizeRequired);
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -44,9 +44,6 @@ ze_result_t CommandListCoreFamily<IGFX_XE_HPC_CORE>::appendMemoryPrefetch(const
|
||||
|
||||
NEO::LinearStream &cmdStream = *commandContainer.getCommandStream();
|
||||
|
||||
size_t estimatedSizeRequired = NEO::EncodeMemoryPrefetch<GfxFamily>::getSizeForMemoryPrefetch(size);
|
||||
increaseCommandStreamSpace(estimatedSizeRequired);
|
||||
|
||||
NEO::EncodeMemoryPrefetch<GfxFamily>::programMemoryPrefetch(cmdStream, *gpuAlloc, static_cast<uint32_t>(size), offset, hwInfo);
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
@@ -56,9 +53,6 @@ template <>
|
||||
void CommandListCoreFamily<IGFX_XE_HPC_CORE>::applyMemoryRangesBarrier(uint32_t numRanges,
|
||||
const size_t *pRangeSizes,
|
||||
const void **pRanges) {
|
||||
|
||||
increaseCommandStreamSpace(NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForSinglePipeControl());
|
||||
|
||||
NEO::PipeControlArgs args;
|
||||
args.hdcPipelineFlush = true;
|
||||
args.unTypedDataPortCacheFlush = true;
|
||||
|
||||
@@ -106,10 +106,9 @@ HWTEST_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenAp
|
||||
const auto streamCpu = stream->getCpuBase();
|
||||
|
||||
Vec3<size_t> groupCount{1, 1, 1};
|
||||
auto requiredSizeEstimate = EncodeDispatchKernel<FamilyType>::estimateEncodeDispatchKernelCmdsSize(
|
||||
device->getNEODevice(), {0, 0, 0}, groupCount, false, false, false, kernel.get(), false);
|
||||
auto sizeLeftInStream = sizeof(MI_BATCH_BUFFER_END);
|
||||
auto available = stream->getAvailableSpace();
|
||||
stream->getSpace(available - requiredSizeEstimate + 1);
|
||||
stream->getSpace(available - sizeLeftInStream);
|
||||
auto bbEndPosition = stream->getSpace(0);
|
||||
|
||||
const uint32_t threadGroupDimensions[3] = {1, 1, 1};
|
||||
@@ -236,38 +235,6 @@ HWTEST_F(CommandListAppendLaunchKernel, WhenAppendingMultipleTimesThenSshIsNotDe
|
||||
EXPECT_NE(initialAllocation, reallocatedAllocation);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListAppendLaunchKernel, WhenAppendingFunctionThenUsedCmdBufferSizeDoesNotExceedEstimate, IsAtLeastSkl) {
|
||||
createKernel();
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
ze_result_t ret = commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, ret);
|
||||
|
||||
auto sizeBefore = commandList->commandContainer.getCommandStream()->getUsed();
|
||||
|
||||
auto result = commandList->appendLaunchKernelWithParams(kernel->toHandle(), &groupCount, nullptr, false, false, false);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto sizeAfter = commandList->commandContainer.getCommandStream()->getUsed();
|
||||
auto estimate = NEO::EncodeDispatchKernel<FamilyType>::estimateEncodeDispatchKernelCmdsSize(
|
||||
device->getNEODevice(), Vec3<size_t>(0, 0, 0), Vec3<size_t>(1, 1, 1), false, false, false, kernel.get(), false);
|
||||
|
||||
EXPECT_LE(sizeAfter - sizeBefore, estimate);
|
||||
|
||||
sizeBefore = commandList->commandContainer.getCommandStream()->getUsed();
|
||||
|
||||
result = commandList->appendLaunchKernelWithParams(kernel->toHandle(), &groupCount, nullptr, true, false, false);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
sizeAfter = commandList->commandContainer.getCommandStream()->getUsed();
|
||||
estimate = NEO::EncodeDispatchKernel<FamilyType>::estimateEncodeDispatchKernelCmdsSize(
|
||||
device->getNEODevice(), Vec3<size_t>(0, 0, 0), Vec3<size_t>(1, 1, 1), false, false, false, kernel.get(), false);
|
||||
|
||||
EXPECT_LE(sizeAfter - sizeBefore, estimate);
|
||||
EXPECT_LE(sizeAfter - sizeBefore, estimate);
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_GEN8_CORE, CommandListAppendLaunchKernel, givenEventsWhenAppendingKernelThenPostSyncToEventIsGenerated) {
|
||||
using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER;
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
@@ -689,19 +656,10 @@ HWTEST_F(CommandListAppendLaunchKernel, givenIndirectDispatchWithImplicitArgsWhe
|
||||
auto result = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 4096u, &alloc);
|
||||
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
|
||||
|
||||
auto sizeBefore = commandList->commandContainer.getCommandStream()->getUsed();
|
||||
|
||||
result = commandList->appendLaunchKernelIndirect(kernel.toHandle(),
|
||||
static_cast<ze_group_count_t *>(alloc),
|
||||
nullptr, 0, nullptr);
|
||||
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
|
||||
|
||||
auto sizeAfter = commandList->commandContainer.getCommandStream()->getUsed();
|
||||
auto estimate = NEO::EncodeDispatchKernel<FamilyType>::estimateEncodeDispatchKernelCmdsSize(
|
||||
device->getNEODevice(), Vec3<size_t>(0, 0, 0), Vec3<size_t>(1, 1, 1), false, false, true, &kernel, false);
|
||||
|
||||
EXPECT_LE(sizeAfter - sizeBefore, estimate);
|
||||
|
||||
auto heap = commandList->commandContainer.getIndirectHeap(HeapType::INDIRECT_OBJECT);
|
||||
uint64_t pImplicitArgsGPUVA = heap->getGraphicsAllocation()->getGpuAddress() + kernel.getSizeForImplicitArgsPatching() - sizeof(ImplicitArgs);
|
||||
auto workDimStoreRegisterMemCmd = FamilyType::cmdInitStoreRegisterMem;
|
||||
@@ -869,19 +827,11 @@ HWTEST_F(CommandListAppendLaunchKernel, givenIndirectDispatchWhenAppendingThenWo
|
||||
auto result = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 4096u, &alloc);
|
||||
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
|
||||
|
||||
auto sizeBefore = commandList->commandContainer.getCommandStream()->getUsed();
|
||||
|
||||
result = commandList->appendLaunchKernelIndirect(kernel.toHandle(),
|
||||
static_cast<ze_group_count_t *>(alloc),
|
||||
nullptr, 0, nullptr);
|
||||
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
|
||||
|
||||
auto sizeAfter = commandList->commandContainer.getCommandStream()->getUsed();
|
||||
auto estimate = NEO::EncodeDispatchKernel<FamilyType>::estimateEncodeDispatchKernelCmdsSize(
|
||||
device->getNEODevice(), Vec3<size_t>(0, 0, 0), Vec3<size_t>(1, 1, 1), false, false, true, &kernel, false);
|
||||
|
||||
EXPECT_LE(sizeAfter - sizeBefore, estimate);
|
||||
|
||||
kernel.groupSize[2] = 2;
|
||||
result = commandList->appendLaunchKernelIndirect(kernel.toHandle(),
|
||||
static_cast<ze_group_count_t *>(alloc),
|
||||
|
||||
@@ -1209,20 +1209,19 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, MultiTileCommandListAppendLaunchFunctionXeHpCoreTes
|
||||
HWTEST2_F(MultiTileCommandListAppendLaunchFunctionXeHpCoreTest, givenCooperativeKernelWhenAppendingKernelsThenDoNotUseImplicitScaling, IsAtLeastXeHpCore) {
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
|
||||
auto estimateWithNonCooperativeKernel = NEO::EncodeDispatchKernel<FamilyType>::estimateEncodeDispatchKernelCmdsSize(
|
||||
device->getNEODevice(), Vec3<size_t>{0, 0, 0}, Vec3<size_t>{1, 1, 1}, false, false, false, kernel.get(), true);
|
||||
auto estimateWithCooperativeKernel = NEO::EncodeDispatchKernel<FamilyType>::estimateEncodeDispatchKernelCmdsSize(
|
||||
device->getNEODevice(), Vec3<size_t>{0, 0, 0}, Vec3<size_t>{1, 1, 1}, false, true, false, kernel.get(), true);
|
||||
EXPECT_GT(estimateWithNonCooperativeKernel, estimateWithCooperativeKernel);
|
||||
|
||||
auto commandListWithNonCooperativeKernel = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
auto result = commandListWithNonCooperativeKernel->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto sizeBefore = commandListWithNonCooperativeKernel->commandContainer.getCommandStream()->getUsed();
|
||||
result = commandListWithNonCooperativeKernel->appendLaunchKernelWithParams(kernel->toHandle(), &groupCount, nullptr, false, false, false);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto sizeUsedWithNonCooperativeKernel = commandListWithNonCooperativeKernel->commandContainer.getCommandStream()->getUsed() - sizeBefore;
|
||||
EXPECT_LE(sizeUsedWithNonCooperativeKernel, estimateWithNonCooperativeKernel);
|
||||
auto sizeAfter = commandListWithNonCooperativeKernel->commandContainer.getCommandStream()->getUsed();
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandListWithNonCooperativeKernel->commandContainer.getCommandStream()->getCpuBase(), sizeBefore), sizeAfter - sizeBefore));
|
||||
auto itorWalker = find<typename FamilyType::WALKER_TYPE *>(cmdList.begin(), cmdList.end());
|
||||
auto cmd = genCmdCast<typename FamilyType::WALKER_TYPE *>(*itorWalker);
|
||||
EXPECT_TRUE(cmd->getWorkloadPartitionEnable());
|
||||
|
||||
auto commandListWithCooperativeKernel = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
result = commandListWithCooperativeKernel->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
@@ -1230,8 +1229,14 @@ HWTEST2_F(MultiTileCommandListAppendLaunchFunctionXeHpCoreTest, givenCooperative
|
||||
sizeBefore = commandListWithCooperativeKernel->commandContainer.getCommandStream()->getUsed();
|
||||
result = commandListWithCooperativeKernel->appendLaunchKernelWithParams(kernel->toHandle(), &groupCount, nullptr, false, false, true);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto sizeUsedWithCooperativeKernel = commandListWithCooperativeKernel->commandContainer.getCommandStream()->getUsed() - sizeBefore;
|
||||
EXPECT_LE(sizeUsedWithCooperativeKernel, estimateWithCooperativeKernel);
|
||||
sizeAfter = commandListWithCooperativeKernel->commandContainer.getCommandStream()->getUsed();
|
||||
cmdList.clear();
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandListWithNonCooperativeKernel->commandContainer.getCommandStream()->getCpuBase(), sizeBefore), sizeAfter - sizeBefore));
|
||||
|
||||
itorWalker = find<typename FamilyType::WALKER_TYPE *>(cmdList.begin(), cmdList.end());
|
||||
cmd = genCmdCast<typename FamilyType::WALKER_TYPE *>(*itorWalker);
|
||||
EXPECT_TRUE(cmd->getWorkloadPartitionEnable());
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
|
||||
@@ -1449,3 +1449,13 @@ HWTEST2_F(HwHelperTest, givenHwInfoConfigWhenCheckingForceNonGpuCoherencyWAThenF
|
||||
EXPECT_FALSE(hwHelper.forceNonGpuCoherencyWA(true));
|
||||
EXPECT_FALSE(hwHelper.forceNonGpuCoherencyWA(false));
|
||||
}
|
||||
|
||||
HWTEST_F(HwHelperTest, GivenHwInfoWhenGetBatchBufferEndSizeCalledThenCorrectSizeReturned) {
|
||||
const auto &hwHelper = HwHelper::get(renderCoreFamily);
|
||||
EXPECT_EQ(hwHelper.getBatchBufferEndSize(), sizeof(typename FamilyType::MI_BATCH_BUFFER_END));
|
||||
}
|
||||
|
||||
HWTEST_F(HwHelperTest, GivenHwInfoWhenGetBatchBufferEndReferenceCalledThenCorrectPtrReturned) {
|
||||
const auto &hwHelper = HwHelper::get(renderCoreFamily);
|
||||
EXPECT_EQ(hwHelper.getBatchBufferEndReference(), reinterpret_cast<const void *>(&FamilyType::cmdInitBatchBufferEnd));
|
||||
}
|
||||
@@ -69,8 +69,10 @@ ErrorCode CommandContainer::initialize(Device *device, AllocationsList *reusable
|
||||
|
||||
cmdBufferAllocations.push_back(cmdBufferAllocation);
|
||||
|
||||
commandStream = std::unique_ptr<LinearStream>(new LinearStream(cmdBufferAllocation->getUnderlyingBuffer(),
|
||||
defaultListCmdBufferSize));
|
||||
const auto &hardwareInfo = device->getHardwareInfo();
|
||||
auto &hwHelper = NEO::HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
|
||||
commandStream = std::make_unique<LinearStream>(cmdBufferAllocation->getUnderlyingBuffer(),
|
||||
alignedSize - cmdBufferReservedSize, this, hwHelper.getBatchBufferEndSize());
|
||||
|
||||
commandStream->replaceGraphicsAllocation(cmdBufferAllocation);
|
||||
|
||||
@@ -264,7 +266,8 @@ void CommandContainer::allocateNextCommandBuffer() {
|
||||
|
||||
cmdBufferAllocations.push_back(cmdBufferAllocation);
|
||||
|
||||
commandStream->replaceBuffer(cmdBufferAllocation->getUnderlyingBuffer(), defaultListCmdBufferSize);
|
||||
size_t alignedSize = alignUp<size_t>(totalCmdBufferSize, MemoryConstants::pageSize64k);
|
||||
commandStream->replaceBuffer(cmdBufferAllocation->getUnderlyingBuffer(), alignedSize - cmdBufferReservedSize);
|
||||
commandStream->replaceGraphicsAllocation(cmdBufferAllocation);
|
||||
|
||||
if (!getFlushTaskUsedForImmediate()) {
|
||||
@@ -272,6 +275,14 @@ void CommandContainer::allocateNextCommandBuffer() {
|
||||
}
|
||||
}
|
||||
|
||||
void CommandContainer::closeAndAllocateNextCommandBuffer() {
|
||||
auto &hwHelper = NEO::HwHelper::get(device->getHardwareInfo().platform.eRenderCoreFamily);
|
||||
auto bbEndSize = hwHelper.getBatchBufferEndSize();
|
||||
auto ptr = commandStream->getSpace(0u);
|
||||
memcpy_s(ptr, bbEndSize, hwHelper.getBatchBufferEndReference(), bbEndSize);
|
||||
allocateNextCommandBuffer();
|
||||
}
|
||||
|
||||
void CommandContainer::prepareBindfulSsh() {
|
||||
if (ApiSpecificConfig::getBindlessConfiguration()) {
|
||||
if (allocationIndirectHeaps[IndirectHeap::Type::SURFACE_STATE] == nullptr) {
|
||||
|
||||
@@ -36,10 +36,9 @@ enum class ErrorCode {
|
||||
class CommandContainer : public NonCopyableOrMovableClass {
|
||||
public:
|
||||
static constexpr size_t defaultListCmdBufferSize = MemoryConstants::kiloByte * 256;
|
||||
static constexpr size_t totalCmdBufferSize =
|
||||
defaultListCmdBufferSize +
|
||||
MemoryConstants::cacheLineSize +
|
||||
CSRequirements::csOverfetchSize;
|
||||
static constexpr size_t cmdBufferReservedSize = MemoryConstants::cacheLineSize +
|
||||
CSRequirements::csOverfetchSize;
|
||||
static constexpr size_t totalCmdBufferSize = defaultListCmdBufferSize + cmdBufferReservedSize;
|
||||
|
||||
CommandContainer();
|
||||
|
||||
@@ -86,6 +85,7 @@ class CommandContainer : public NonCopyableOrMovableClass {
|
||||
|
||||
IndirectHeap *getHeapWithRequiredSizeAndAlignment(HeapType heapType, size_t sizeRequired, size_t alignment);
|
||||
void allocateNextCommandBuffer();
|
||||
void closeAndAllocateNextCommandBuffer();
|
||||
|
||||
void handleCmdBufferAllocations(size_t startIndex);
|
||||
GraphicsAllocation *obtainNextCommandBufferAllocation();
|
||||
|
||||
@@ -7,7 +7,6 @@
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/command_container/cmdcontainer.h"
|
||||
#include "shared/source/command_stream/linear_stream.h"
|
||||
#include "shared/source/debugger/debugger.h"
|
||||
#include "shared/source/execution_environment/execution_environment.h"
|
||||
#include "shared/source/helpers/definitions/mi_flush_args.h"
|
||||
@@ -64,10 +63,6 @@ struct EncodeDispatchKernel {
|
||||
|
||||
static void *getInterfaceDescriptor(CommandContainer &container, uint32_t &iddOffset);
|
||||
|
||||
static size_t estimateEncodeDispatchKernelCmdsSize(Device *device, const Vec3<size_t> &groupStart, const Vec3<size_t> &groupCount,
|
||||
bool isInternal, bool isCooperative, bool isIndirect, DispatchKernelEncoderI *dispatchInterface,
|
||||
bool isPartitioned);
|
||||
|
||||
static bool isRuntimeLocalIdsGenerationRequired(uint32_t activeChannels,
|
||||
size_t *lws,
|
||||
std::array<uint8_t, 3> walkOrder,
|
||||
@@ -116,8 +111,6 @@ struct EncodeStates {
|
||||
const void *fnDynamicStateHeap,
|
||||
BindlessHeapsHelper *bindlessHeapHelper,
|
||||
const HardwareInfo &hwInfo);
|
||||
|
||||
static size_t getAdjustStateComputeModeSize();
|
||||
};
|
||||
|
||||
template <typename GfxFamily>
|
||||
@@ -186,9 +179,6 @@ struct EncodeIndirectParams {
|
||||
static void setWorkDimIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset offset, uint64_t crossThreadAddress, const uint32_t *groupSize);
|
||||
static void setGlobalWorkSizeIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset offsets[3], uint64_t crossThreadAddress, const uint32_t *lws);
|
||||
|
||||
static size_t getCmdsSizeForIndirectParams();
|
||||
static size_t getCmdsSizeForSetGroupSizeIndirect();
|
||||
static size_t getCmdsSizeForSetGroupCountIndirect();
|
||||
static size_t getCmdsSizeForSetWorkDimIndirect(const uint32_t *groupSize, bool misalignedPtr);
|
||||
};
|
||||
|
||||
|
||||
@@ -92,11 +92,6 @@ uint32_t EncodeStates<Family>::copySamplerState(IndirectHeap *dsh,
|
||||
return samplerStateOffsetInDsh;
|
||||
} // namespace NEO
|
||||
|
||||
template <typename Family>
|
||||
inline size_t EncodeStates<Family>::getAdjustStateComputeModeSize() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
void EncodeMathMMIO<Family>::encodeMulRegVal(CommandContainer &container, uint32_t offset, uint32_t val, uint64_t dstAddress) {
|
||||
int logLws = 0;
|
||||
@@ -665,22 +660,6 @@ void EncodeIndirectParams<Family>::setGlobalWorkSizeIndirect(CommandContainer &c
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
inline size_t EncodeIndirectParams<Family>::getCmdsSizeForIndirectParams() {
|
||||
return 3 * sizeof(typename Family::MI_LOAD_REGISTER_MEM);
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
inline size_t EncodeIndirectParams<Family>::getCmdsSizeForSetGroupCountIndirect() {
|
||||
return 3 * (sizeof(MI_STORE_REGISTER_MEM));
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
inline size_t EncodeIndirectParams<Family>::getCmdsSizeForSetGroupSizeIndirect() {
|
||||
constexpr uint32_t aluCmdSize = sizeof(MI_MATH) + sizeof(MI_MATH_ALU_INST_INLINE) * NUM_ALU_INST_FOR_READ_MODIFY_WRITE;
|
||||
return 3 * (sizeof(MI_LOAD_REGISTER_REG) + sizeof(MI_LOAD_REGISTER_IMM) + aluCmdSize + sizeof(MI_STORE_REGISTER_MEM));
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
inline size_t EncodeIndirectParams<Family>::getCmdsSizeForSetWorkDimIndirect(const uint32_t *groupSize, bool misaligedPtr) {
|
||||
constexpr uint32_t aluCmdSize = sizeof(MI_MATH) + sizeof(MI_MATH_ALU_INST_INLINE) * NUM_ALU_INST_FOR_READ_MODIFY_WRITE;
|
||||
|
||||
@@ -66,15 +66,6 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
|
||||
if (!args.isIndirect) {
|
||||
threadDimsVec = {threadDims[0], threadDims[1], threadDims[2]};
|
||||
}
|
||||
size_t estimatedSizeRequired = estimateEncodeDispatchKernelCmdsSize(args.device, threadStartVec, threadDimsVec,
|
||||
args.isInternal, args.isCooperative, args.isIndirect,
|
||||
args.dispatchInterface, false);
|
||||
if (container.getCommandStream()->getAvailableSpace() < estimatedSizeRequired) {
|
||||
auto bbEnd = listCmdBufferStream->getSpaceForCmd<MI_BATCH_BUFFER_END>();
|
||||
*bbEnd = Family::cmdInitBatchBufferEnd;
|
||||
|
||||
container.allocateNextCommandBuffer();
|
||||
}
|
||||
|
||||
WALKER_TYPE cmd = Family::cmdInitGpgpuWalker;
|
||||
auto idd = Family::cmdInitInterfaceDescriptorData;
|
||||
@@ -343,40 +334,6 @@ inline void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const Har
|
||||
template <typename Family>
|
||||
void EncodeDispatchKernel<Family>::appendAdditionalIDDFields(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const HardwareInfo &hwInfo, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) {}
|
||||
|
||||
template <typename Family>
|
||||
size_t EncodeDispatchKernel<Family>::estimateEncodeDispatchKernelCmdsSize(Device *device, const Vec3<size_t> &groupStart,
|
||||
const Vec3<size_t> &groupCount, bool isInternal,
|
||||
bool isCooperative, bool isIndirect, DispatchKernelEncoderI *dispatchInterface,
|
||||
bool isPartitioned) {
|
||||
using MEDIA_STATE_FLUSH = typename Family::MEDIA_STATE_FLUSH;
|
||||
using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename Family::MEDIA_INTERFACE_DESCRIPTOR_LOAD;
|
||||
using MI_BATCH_BUFFER_END = typename Family::MI_BATCH_BUFFER_END;
|
||||
|
||||
size_t issueMediaInterfaceDescriptorLoad = sizeof(MEDIA_STATE_FLUSH) + sizeof(MEDIA_INTERFACE_DESCRIPTOR_LOAD);
|
||||
size_t totalSize = sizeof(WALKER_TYPE);
|
||||
totalSize += PreemptionHelper::getPreemptionWaCsSize<Family>(*device);
|
||||
totalSize += sizeof(MEDIA_STATE_FLUSH);
|
||||
totalSize += issueMediaInterfaceDescriptorLoad;
|
||||
totalSize += EncodeStates<Family>::getAdjustStateComputeModeSize();
|
||||
totalSize += EncodeWA<Family>::getAdditionalPipelineSelectSize(*device);
|
||||
totalSize += EncodeIndirectParams<Family>::getCmdsSizeForIndirectParams();
|
||||
totalSize += EncodeIndirectParams<Family>::getCmdsSizeForSetGroupCountIndirect();
|
||||
totalSize += EncodeIndirectParams<Family>::getCmdsSizeForSetGroupSizeIndirect();
|
||||
if (isIndirect) {
|
||||
UNRECOVERABLE_IF(dispatchInterface == nullptr);
|
||||
totalSize += EncodeIndirectParams<Family>::getCmdsSizeForSetWorkDimIndirect(dispatchInterface->getGroupSize(), false);
|
||||
if (dispatchInterface->getImplicitArgs()) {
|
||||
totalSize += EncodeIndirectParams<Family>::getCmdsSizeForSetGroupCountIndirect();
|
||||
totalSize += EncodeIndirectParams<Family>::getCmdsSizeForSetGroupSizeIndirect();
|
||||
totalSize += EncodeIndirectParams<Family>::getCmdsSizeForSetWorkDimIndirect(dispatchInterface->getGroupSize(), true);
|
||||
}
|
||||
}
|
||||
|
||||
totalSize += sizeof(MI_BATCH_BUFFER_END);
|
||||
|
||||
return totalSize;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
inline void EncodeComputeMode<Family>::programComputeModeCommand(LinearStream &csr, StateComputeModeProperties &properties, const HardwareInfo &hwInfo) {
|
||||
}
|
||||
|
||||
@@ -63,15 +63,6 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
|
||||
if (!args.isIndirect) {
|
||||
threadDimsVec = {threadDims[0], threadDims[1], threadDims[2]};
|
||||
}
|
||||
size_t estimatedSizeRequired = estimateEncodeDispatchKernelCmdsSize(args.device, threadStartVec, threadDimsVec,
|
||||
args.isInternal, args.isCooperative, args.isIndirect, args.dispatchInterface,
|
||||
args.partitionCount > 1);
|
||||
if (container.getCommandStream()->getAvailableSpace() < estimatedSizeRequired) {
|
||||
auto bbEnd = listCmdBufferStream->getSpaceForCmd<MI_BATCH_BUFFER_END>();
|
||||
*bbEnd = Family::cmdInitBatchBufferEnd;
|
||||
|
||||
container.allocateNextCommandBuffer();
|
||||
}
|
||||
|
||||
bool specialModeRequired = kernelDescriptor.kernelAttributes.flags.usesSpecialPipelineSelectMode;
|
||||
if (PreambleHelper<Family>::isSpecialPipelineSelectModeChanged(container.lastPipelineSelectModeRequired, specialModeRequired, hwInfo)) {
|
||||
@@ -448,36 +439,6 @@ void EncodeDispatchKernel<Family>::encodeThreadData(WALKER_TYPE &walkerCmd,
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
size_t EncodeDispatchKernel<Family>::estimateEncodeDispatchKernelCmdsSize(Device *device, const Vec3<size_t> &groupStart,
|
||||
const Vec3<size_t> &groupCount, bool isInternal,
|
||||
bool isCooperative, bool isIndirect, DispatchKernelEncoderI *dispatchInterface,
|
||||
bool isPartitioned) {
|
||||
size_t totalSize = sizeof(WALKER_TYPE);
|
||||
totalSize += PreemptionHelper::getPreemptionWaCsSize<Family>(*device);
|
||||
totalSize += EncodeStates<Family>::getAdjustStateComputeModeSize();
|
||||
totalSize += EncodeIndirectParams<Family>::getCmdsSizeForIndirectParams();
|
||||
totalSize += EncodeIndirectParams<Family>::getCmdsSizeForSetGroupCountIndirect();
|
||||
totalSize += EncodeIndirectParams<Family>::getCmdsSizeForSetGroupSizeIndirect();
|
||||
if (isIndirect) {
|
||||
UNRECOVERABLE_IF(dispatchInterface == nullptr);
|
||||
totalSize += EncodeIndirectParams<Family>::getCmdsSizeForSetWorkDimIndirect(dispatchInterface->getGroupSize(), false);
|
||||
if (dispatchInterface->getImplicitArgs()) {
|
||||
totalSize += EncodeIndirectParams<Family>::getCmdsSizeForSetGroupCountIndirect();
|
||||
totalSize += EncodeIndirectParams<Family>::getCmdsSizeForSetGroupSizeIndirect();
|
||||
totalSize += EncodeIndirectParams<Family>::getCmdsSizeForSetWorkDimIndirect(dispatchInterface->getGroupSize(), true);
|
||||
}
|
||||
}
|
||||
|
||||
if ((isPartitioned && !isCooperative) &&
|
||||
!isInternal) {
|
||||
const bool staticPartitioning = device->getDefaultEngine().commandStreamReceiver->isStaticWorkPartitioningEnabled();
|
||||
totalSize += ImplicitScalingDispatch<Family>::getSize(true, staticPartitioning, device->getDeviceBitfield(), groupStart, groupCount);
|
||||
}
|
||||
|
||||
return totalSize;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
void EncodeStateBaseAddress<Family>::setIohAddressForDebugger(NEO::Debugger::SbaAddresses &sbaAddress, const STATE_BASE_ADDRESS &sbaCmd) {
|
||||
}
|
||||
|
||||
@@ -100,8 +100,10 @@ void ImplicitScalingDispatch<GfxFamily>::dispatchCommands(LinearStream &commandS
|
||||
staticPartitioning,
|
||||
useSecondaryBatchBuffer);
|
||||
|
||||
uint64_t cmdBufferGpuAddress = commandStream.getGraphicsAllocation()->getGpuAddress() + commandStream.getUsed();
|
||||
void *commandBuffer = commandStream.getSpace(0u);
|
||||
auto dispatchCommandsSize = getSize(apiSelfCleanup, preferStaticPartitioning, devices, {walkerCmd.getThreadGroupIdStartingX(), walkerCmd.getThreadGroupIdStartingY(), walkerCmd.getThreadGroupIdStartingZ()}, {walkerCmd.getThreadGroupIdXDimension(), walkerCmd.getThreadGroupIdYDimension(), walkerCmd.getThreadGroupIdZDimension()});
|
||||
void *commandBuffer = commandStream.getSpace(dispatchCommandsSize);
|
||||
uint64_t cmdBufferGpuAddress = commandStream.getGraphicsAllocation()->getGpuAddress() + commandStream.getUsed() - dispatchCommandsSize;
|
||||
|
||||
if (staticPartitioning) {
|
||||
UNRECOVERABLE_IF(tileCount != partitionCount);
|
||||
WalkerPartition::constructStaticallyPartitionedCommandBuffer<GfxFamily>(commandBuffer,
|
||||
@@ -126,7 +128,7 @@ void ImplicitScalingDispatch<GfxFamily>::dispatchCommands(LinearStream &commandS
|
||||
args,
|
||||
hwInfo);
|
||||
}
|
||||
commandStream.getSpace(totalProgrammedSize);
|
||||
UNRECOVERABLE_IF(totalProgrammedSize != dispatchCommandsSize);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
@@ -166,8 +168,9 @@ void ImplicitScalingDispatch<GfxFamily>::dispatchBarrierCommands(LinearStream &c
|
||||
args.postSyncGpuAddress = gpuAddress;
|
||||
args.postSyncImmediateValue = immediateData;
|
||||
|
||||
uint64_t cmdBufferGpuAddress = commandStream.getGraphicsAllocation()->getGpuAddress() + commandStream.getUsed();
|
||||
void *commandBuffer = commandStream.getSpace(0u);
|
||||
auto barrierCommandsSize = getBarrierSize(hwInfo, apiSelfCleanup, args.usePostSync);
|
||||
void *commandBuffer = commandStream.getSpace(barrierCommandsSize);
|
||||
uint64_t cmdBufferGpuAddress = commandStream.getGraphicsAllocation()->getGpuAddress() + commandStream.getUsed() - barrierCommandsSize;
|
||||
|
||||
WalkerPartition::constructBarrierCommandBuffer<GfxFamily>(commandBuffer,
|
||||
cmdBufferGpuAddress,
|
||||
@@ -175,7 +178,7 @@ void ImplicitScalingDispatch<GfxFamily>::dispatchBarrierCommands(LinearStream &c
|
||||
args,
|
||||
flushArgs,
|
||||
hwInfo);
|
||||
commandStream.getSpace(totalProgrammedSize);
|
||||
UNRECOVERABLE_IF(totalProgrammedSize != barrierCommandsSize);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
||||
@@ -273,7 +273,7 @@ class CommandStreamReceiver {
|
||||
|
||||
uint64_t getWorkPartitionAllocationGpuAddress() const;
|
||||
|
||||
bool isRcs() const;
|
||||
MOCKABLE_VIRTUAL bool isRcs() const;
|
||||
|
||||
virtual void initializeDefaultsForInternalEngine(){};
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -33,4 +33,10 @@ LinearStream::LinearStream(GraphicsAllocation *gfxAllocation)
|
||||
LinearStream::LinearStream()
|
||||
: LinearStream(nullptr) {
|
||||
}
|
||||
|
||||
LinearStream::LinearStream(void *buffer, size_t bufferSize, CommandContainer *cmdContainer, size_t batchBufferEndSize)
|
||||
: LinearStream(buffer, bufferSize) {
|
||||
this->cmdContainer = cmdContainer;
|
||||
this->batchBufferEndSize = batchBufferEndSize;
|
||||
}
|
||||
} // namespace NEO
|
||||
|
||||
@@ -1,13 +1,16 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/command_container/cmdcontainer.h"
|
||||
#include "shared/source/helpers/debug_helpers.h"
|
||||
#include "shared/source/helpers/hw_helper.h"
|
||||
#include "shared/source/helpers/ptr_math.h"
|
||||
#include "shared/source/helpers/string.h"
|
||||
|
||||
#include <atomic>
|
||||
#include <cstddef>
|
||||
@@ -23,6 +26,7 @@ class LinearStream {
|
||||
LinearStream(void *buffer, size_t bufferSize);
|
||||
LinearStream(GraphicsAllocation *buffer);
|
||||
LinearStream(GraphicsAllocation *gfxAllocation, void *buffer, size_t bufferSize);
|
||||
LinearStream(void *buffer, size_t bufferSize, CommandContainer *cmdContainer, size_t batchBufferEndSize);
|
||||
void *getCpuBase() const;
|
||||
void *getSpace(size_t size);
|
||||
size_t getMaxAvailableSpace() const;
|
||||
@@ -44,6 +48,8 @@ class LinearStream {
|
||||
size_t maxAvailableSpace;
|
||||
void *buffer;
|
||||
GraphicsAllocation *graphicsAllocation;
|
||||
CommandContainer *cmdContainer = nullptr;
|
||||
size_t batchBufferEndSize = 0;
|
||||
};
|
||||
|
||||
inline void *LinearStream::getCpuBase() const {
|
||||
@@ -51,6 +57,10 @@ inline void *LinearStream::getCpuBase() const {
|
||||
}
|
||||
|
||||
inline void *LinearStream::getSpace(size_t size) {
|
||||
if (cmdContainer != nullptr && getAvailableSpace() < batchBufferEndSize + size) {
|
||||
UNRECOVERABLE_IF(sizeUsed + batchBufferEndSize > maxAvailableSpace);
|
||||
cmdContainer->closeAndAllocateNextCommandBuffer();
|
||||
}
|
||||
UNRECOVERABLE_IF(sizeUsed + size > maxAvailableSpace);
|
||||
auto memory = ptrOffset(buffer, sizeUsed);
|
||||
sizeUsed += size;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2021 Intel Corporation
|
||||
* Copyright (C) 2020-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -32,11 +32,6 @@ size_t EncodeWA<Family>::getAdditionalPipelineSelectSize(Device &device) {
|
||||
return size;
|
||||
}
|
||||
|
||||
template <>
|
||||
size_t EncodeStates<Family>::getAdjustStateComputeModeSize() {
|
||||
return sizeof(typename Family::STATE_COMPUTE_MODE);
|
||||
}
|
||||
|
||||
template <>
|
||||
void EncodeComputeMode<Family>::programComputeModeCommand(LinearStream &csr, StateComputeModeProperties &properties, const HardwareInfo &hwInfo) {
|
||||
using STATE_COMPUTE_MODE = typename Family::STATE_COMPUTE_MODE;
|
||||
|
||||
@@ -9,7 +9,6 @@
|
||||
#include "shared/source/aub_mem_dump/aub_mem_dump.h"
|
||||
#include "shared/source/built_ins/sip.h"
|
||||
#include "shared/source/command_container/command_encoder.h"
|
||||
#include "shared/source/command_stream/linear_stream.h"
|
||||
#include "shared/source/commands/bxml_generator_glue.h"
|
||||
#include "shared/source/helpers/aux_translation.h"
|
||||
#include "shared/source/helpers/definitions/engine_group_types.h"
|
||||
@@ -28,6 +27,7 @@ namespace NEO {
|
||||
class GmmHelper;
|
||||
class GraphicsAllocation;
|
||||
class TagAllocatorBase;
|
||||
class LinearSteram;
|
||||
class Gmm;
|
||||
struct AllocationData;
|
||||
struct AllocationProperties;
|
||||
@@ -155,6 +155,8 @@ class HwHelper {
|
||||
virtual bool forceNonGpuCoherencyWA(bool requiresCoherency) const = 0;
|
||||
virtual bool platformSupportsImplicitScaling(const NEO::HardwareInfo &hwInfo) const = 0;
|
||||
virtual bool isLinuxCompletionFenceSupported() const = 0;
|
||||
virtual size_t getBatchBufferEndSize() const = 0;
|
||||
virtual const void *getBatchBufferEndReference() const = 0;
|
||||
|
||||
protected:
|
||||
HwHelper() = default;
|
||||
@@ -391,6 +393,8 @@ class HwHelperHw : public HwHelper {
|
||||
bool forceNonGpuCoherencyWA(bool requiresCoherency) const override;
|
||||
bool platformSupportsImplicitScaling(const NEO::HardwareInfo &hwInfo) const override;
|
||||
bool isLinuxCompletionFenceSupported() const override;
|
||||
size_t getBatchBufferEndSize() const override;
|
||||
const void *getBatchBufferEndReference() const override;
|
||||
|
||||
protected:
|
||||
static const AuxTranslationMode defaultAuxTranslationMode;
|
||||
|
||||
@@ -710,4 +710,12 @@ template <typename GfxFamily>
|
||||
bool HwHelperHw<GfxFamily>::forceNonGpuCoherencyWA(bool requiresCoherency) const {
|
||||
return requiresCoherency;
|
||||
}
|
||||
template <typename GfxFamily>
|
||||
size_t HwHelperHw<GfxFamily>::getBatchBufferEndSize() const {
|
||||
return sizeof(typename GfxFamily::MI_BATCH_BUFFER_END);
|
||||
}
|
||||
template <typename GfxFamily>
|
||||
const void *HwHelperHw<GfxFamily>::getBatchBufferEndReference() const {
|
||||
return reinterpret_cast<const void *>(&GfxFamily::cmdInitBatchBufferEnd);
|
||||
}
|
||||
} // namespace NEO
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2021 Intel Corporation
|
||||
* Copyright (C) 2019-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -7,7 +7,6 @@
|
||||
|
||||
#include "shared/source/command_stream/stream_properties.h"
|
||||
#include "shared/source/helpers/flat_batch_buffer_helper.h"
|
||||
#include "shared/source/helpers/hw_helper.h"
|
||||
#include "shared/source/helpers/preamble_base.inl"
|
||||
#include "shared/source/kernel/kernel_execution_type.h"
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/command_container/command_encoder.h"
|
||||
#include "shared/source/command_stream/linear_stream.h"
|
||||
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||
#include "shared/source/memory_manager/memory_manager.h"
|
||||
#include "shared/source/utilities/software_tags.h"
|
||||
|
||||
@@ -1,11 +1,14 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_container/command_encoder.h"
|
||||
#include "shared/source/helpers/preamble.h"
|
||||
#include "shared/test/common/mocks/mock_command_stream_receiver.h"
|
||||
#include "shared/test/common/mocks/mock_device.h"
|
||||
#include "shared/test/common/test_macros/test.h"
|
||||
|
||||
using namespace NEO;
|
||||
@@ -14,3 +17,30 @@ using Gen12LpCommandEncodeTest = testing::Test;
|
||||
GEN12LPTEST_F(Gen12LpCommandEncodeTest, givenGen12LpPlatformWhenDoBindingTablePrefetchIsCalledThenReturnsTrue) {
|
||||
EXPECT_FALSE(EncodeSurfaceState<FamilyType>::doBindingTablePrefetch());
|
||||
}
|
||||
|
||||
template <bool rcs>
|
||||
class MyCommandStreamReceiverMock : public MockCommandStreamReceiver {
|
||||
public:
|
||||
MyCommandStreamReceiverMock(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield) : MockCommandStreamReceiver(executionEnvironment, rootDeviceIndex, deviceBitfield) {}
|
||||
bool isRcs() const override {
|
||||
return rcs;
|
||||
}
|
||||
};
|
||||
|
||||
GEN12LPTEST_F(Gen12LpCommandEncodeTest, givenGen12LpPlatformWhenDefaultEngineIsRcsThenAdditionalPipelineSelectSizeEqualTwoPipelineSelectSize) {
|
||||
MockDevice device;
|
||||
auto csr = std::make_unique<MyCommandStreamReceiverMock<true>>(*device.getExecutionEnvironment(), 0, device.getDeviceBitfield());
|
||||
auto oldCsr = device.getDefaultEngine().commandStreamReceiver;
|
||||
device.getDefaultEngine().commandStreamReceiver = csr.get();
|
||||
EXPECT_EQ(2 * PreambleHelper<FamilyType>::getCmdSizeForPipelineSelect(device.getHardwareInfo()), EncodeWA<FamilyType>::getAdditionalPipelineSelectSize(device));
|
||||
device.getDefaultEngine().commandStreamReceiver = oldCsr;
|
||||
}
|
||||
|
||||
GEN12LPTEST_F(Gen12LpCommandEncodeTest, givenGen12LpPlatformWhenDefaultEngineIsNotRcsThenAdditionalPipelineSelectSizeEqualZero) {
|
||||
MockDevice device;
|
||||
auto csr = std::make_unique<MyCommandStreamReceiverMock<false>>(*device.getExecutionEnvironment(), 0, device.getDeviceBitfield());
|
||||
auto oldCsr = device.getDefaultEngine().commandStreamReceiver;
|
||||
device.getDefaultEngine().commandStreamReceiver = csr.get();
|
||||
EXPECT_EQ(0u, EncodeWA<FamilyType>::getAdditionalPipelineSelectSize(device));
|
||||
device.getDefaultEngine().commandStreamReceiver = oldCsr;
|
||||
}
|
||||
@@ -103,22 +103,6 @@ GEN12LPTEST_F(CommandEncoderTest, givenVariousEngineTypesWhenEncodeSBAThenAdditi
|
||||
}
|
||||
}
|
||||
|
||||
GEN12LPTEST_F(CommandEncoderTest, givenVariousEngineTypesWhenEstimateCommandBufferSizeThenRcsHasAdditionalPipelineSelectWASize) {
|
||||
using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT;
|
||||
using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE;
|
||||
|
||||
auto sizeWA = EncodeDispatchKernel<FamilyType>::estimateEncodeDispatchKernelCmdsSize(pDevice, Vec3<size_t>(0, 0, 0),
|
||||
Vec3<size_t>(1, 1, 1), false, false, false, nullptr, false);
|
||||
static_cast<MockOsContext *>(pDevice->getDefaultEngine().osContext)->engineType = aub_stream::ENGINE_CCS;
|
||||
auto size = EncodeDispatchKernel<FamilyType>::estimateEncodeDispatchKernelCmdsSize(pDevice, Vec3<size_t>(0, 0, 0),
|
||||
Vec3<size_t>(1, 1, 1), false, false, false, nullptr, false);
|
||||
|
||||
auto expectedDiff = 2 * PreambleHelper<FamilyType>::getCmdSizeForPipelineSelect(pDevice->getHardwareInfo());
|
||||
auto diff = sizeWA - size;
|
||||
|
||||
EXPECT_EQ(expectedDiff, diff);
|
||||
}
|
||||
|
||||
GEN12LPTEST_F(CommandEncoderTest, GivenGen12LpWhenProgrammingL3StateOnThenExpectNoCommandsDispatched) {
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_container/command_encoder.h"
|
||||
#include "shared/source/command_stream/linear_stream.h"
|
||||
#include "shared/source/command_stream/stream_properties.h"
|
||||
#include "shared/test/common/helpers/default_hw_info.h"
|
||||
#include "shared/test/common/test_macros/test.h"
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_container/command_encoder.h"
|
||||
#include "shared/source/command_stream/linear_stream.h"
|
||||
#include "shared/source/command_stream/stream_properties.h"
|
||||
#include "shared/test/common/helpers/default_hw_info.h"
|
||||
#include "shared/test/common/test_macros/test.h"
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
*/
|
||||
|
||||
#include "shared/source/command_container/cmdcontainer.h"
|
||||
#include "shared/source/command_stream/linear_stream.h"
|
||||
#include "shared/source/memory_manager/allocations_list.h"
|
||||
#include "shared/test/common/fixtures/device_fixture.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
@@ -475,7 +476,8 @@ TEST_F(CommandContainerTest, whenAllocateNextCmdBufferIsCalledThenNewAllocationI
|
||||
EXPECT_NE(nullptr, nextBuffer);
|
||||
EXPECT_EQ(0u, sizeUsed);
|
||||
EXPECT_NE(initialBuffer, nextBuffer);
|
||||
const size_t cmdBufSize = CommandContainer::defaultListCmdBufferSize;
|
||||
size_t alignedSize = alignUp<size_t>(CommandContainer::totalCmdBufferSize, MemoryConstants::pageSize64k);
|
||||
const size_t cmdBufSize = alignedSize - CommandContainer::cmdBufferReservedSize;
|
||||
EXPECT_EQ(cmdBufSize, availableSize);
|
||||
|
||||
ASSERT_EQ(2u, cmdContainer->getCmdBufferAllocations().size());
|
||||
@@ -682,3 +684,48 @@ TEST_F(CommandContainerTest, givenContainerAllocatesNextCommandBufferWhenResetin
|
||||
}
|
||||
EXPECT_TRUE(firstAllocationFound);
|
||||
}
|
||||
|
||||
class MyLinearStreamMock : public LinearStream {
|
||||
public:
|
||||
using LinearStream::cmdContainer;
|
||||
};
|
||||
|
||||
TEST_F(CommandContainerTest, givenCmdContainerWhenContainerIsInitializedThenStreamContainsContainerPtr) {
|
||||
CommandContainer cmdContainer;
|
||||
cmdContainer.initialize(pDevice, nullptr);
|
||||
|
||||
EXPECT_EQ(reinterpret_cast<MyLinearStreamMock *>(cmdContainer.getCommandStream())->cmdContainer, &cmdContainer);
|
||||
}
|
||||
|
||||
TEST_F(CommandContainerTest, givenCmdContainerWhenContainerIsInitializedThenStreamSizeEqualAlignedTotalCmdBuffSizeDecreasedOfReservedSize) {
|
||||
CommandContainer cmdContainer;
|
||||
cmdContainer.initialize(pDevice, nullptr);
|
||||
size_t alignedSize = alignUp<size_t>(CommandContainer::totalCmdBufferSize, MemoryConstants::pageSize64k);
|
||||
EXPECT_EQ(cmdContainer.getCommandStream()->getMaxAvailableSpace(), alignedSize - CommandContainer::cmdBufferReservedSize);
|
||||
}
|
||||
|
||||
TEST_F(CommandContainerTest, givenCmdContainerWhenAlocatingNextCmdBufferThenStreamSizeEqualAlignedTotalCmdBuffSizeDecreasedOfReservedSize) {
|
||||
CommandContainer cmdContainer;
|
||||
cmdContainer.initialize(pDevice, nullptr);
|
||||
cmdContainer.allocateNextCommandBuffer();
|
||||
size_t alignedSize = alignUp<size_t>(CommandContainer::totalCmdBufferSize, MemoryConstants::pageSize64k);
|
||||
EXPECT_EQ(cmdContainer.getCommandStream()->getMaxAvailableSpace(), alignedSize - CommandContainer::cmdBufferReservedSize);
|
||||
}
|
||||
|
||||
TEST_F(CommandContainerTest, givenCmdContainerWhenCloseAndAllocateNextCommandBufferCalledThenBBEndPlacedAtEndOfLinearStream) {
|
||||
CommandContainer cmdContainer;
|
||||
cmdContainer.initialize(pDevice, nullptr);
|
||||
auto &hwInfo = pDevice->getHardwareInfo();
|
||||
auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
auto ptr = cmdContainer.getCommandStream()->getSpace(0u);
|
||||
cmdContainer.closeAndAllocateNextCommandBuffer();
|
||||
EXPECT_EQ(memcmp(ptr, hwHelper.getBatchBufferEndReference(), hwHelper.getBatchBufferEndSize()), 0);
|
||||
}
|
||||
|
||||
TEST_F(CommandContainerTest, givenCmdContainerWhenCloseAndAllocateNextCommandBufferCalledThenNewCmdBufferAllocationCreated) {
|
||||
CommandContainer cmdContainer;
|
||||
cmdContainer.initialize(pDevice, nullptr);
|
||||
EXPECT_EQ(cmdContainer.getCmdBufferAllocations().size(), 1u);
|
||||
cmdContainer.closeAndAllocateNextCommandBuffer();
|
||||
EXPECT_EQ(cmdContainer.getCmdBufferAllocations().size(), 2u);
|
||||
}
|
||||
@@ -6,6 +6,7 @@
|
||||
*/
|
||||
|
||||
#include "shared/source/command_container/command_encoder.h"
|
||||
#include "shared/source/command_stream/linear_stream.h"
|
||||
#include "shared/source/memory_manager/graphics_allocation.h"
|
||||
#include "shared/test/common/helpers/default_hw_info.h"
|
||||
#include "shared/test/common/helpers/unit_test_helper.h"
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
#include "shared/source/command_stream/linear_stream.h"
|
||||
#include "shared/source/memory_manager/graphics_allocation.h"
|
||||
#include "shared/test/common/fixtures/device_fixture.h"
|
||||
#include "shared/test/common/fixtures/linear_stream_fixture.h"
|
||||
#include "shared/test/common/mocks/mock_graphics_allocation.h"
|
||||
|
||||
@@ -109,3 +110,87 @@ TEST_F(LinearStreamTest, givenNewGraphicsAllocationWhenReplaceIsCalledThenLinear
|
||||
linearStream.replaceGraphicsAllocation(&newGraphicsAllocation);
|
||||
EXPECT_EQ(&newGraphicsAllocation, linearStream.getGraphicsAllocation());
|
||||
}
|
||||
|
||||
class MyLinearStreamMock : public LinearStream {
|
||||
public:
|
||||
using LinearStream::sizeUsed;
|
||||
};
|
||||
|
||||
TEST_F(LinearStreamTest, givenLinearStreamWithoutCmdContainerWhenOneByteLeftInStreamThenGetSpaceDontThrowAbort) {
|
||||
reinterpret_cast<MyLinearStreamMock *>(&linearStream)->sizeUsed = linearStream.getMaxAvailableSpace() - 1;
|
||||
EXPECT_NO_THROW(linearStream.getSpace(1));
|
||||
}
|
||||
using CommandContainerLinearStreamTest = Test<DeviceFixture>;
|
||||
TEST_F(CommandContainerLinearStreamTest, givenLinearStreamWithCmdContainerWhenOneByteLeftInStreamThenGetSpaceThrowAbort) {
|
||||
CommandContainer cmdContainer;
|
||||
cmdContainer.initialize(pDevice, nullptr);
|
||||
auto stream = reinterpret_cast<MyLinearStreamMock *>(cmdContainer.getCommandStream());
|
||||
stream->sizeUsed = stream->getMaxAvailableSpace() - 1;
|
||||
EXPECT_THROW(stream->getSpace(1), std::exception);
|
||||
}
|
||||
|
||||
TEST_F(CommandContainerLinearStreamTest, givenLinearStreamWithCmdContainerWhenThereIsNoSpaceForCommandAndBBEndThenNewCmdBufferAllocated) {
|
||||
CommandContainer cmdContainer;
|
||||
cmdContainer.initialize(pDevice, nullptr);
|
||||
auto &hwInfo = pDevice->getHardwareInfo();
|
||||
auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
auto stream = reinterpret_cast<MyLinearStreamMock *>(cmdContainer.getCommandStream());
|
||||
size_t dummyCommandSize = 2;
|
||||
stream->sizeUsed = stream->getMaxAvailableSpace() - hwHelper.getBatchBufferEndSize() - (dummyCommandSize - 1);
|
||||
EXPECT_EQ(cmdContainer.getCmdBufferAllocations().size(), 1u);
|
||||
stream->getSpace(dummyCommandSize);
|
||||
EXPECT_EQ(cmdContainer.getCmdBufferAllocations().size(), 2u);
|
||||
}
|
||||
|
||||
TEST_F(CommandContainerLinearStreamTest, givenLinearStreamWithCmdContainerWhenThereIsNoSpaceForCommandAndBBEndThenLinearStreamHasNewAllocation) {
|
||||
CommandContainer cmdContainer;
|
||||
cmdContainer.initialize(pDevice, nullptr);
|
||||
auto &hwInfo = pDevice->getHardwareInfo();
|
||||
auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
auto stream = reinterpret_cast<MyLinearStreamMock *>(cmdContainer.getCommandStream());
|
||||
size_t dummyCommandSize = 2;
|
||||
stream->sizeUsed = stream->getMaxAvailableSpace() - hwHelper.getBatchBufferEndSize() - (dummyCommandSize - 1);
|
||||
auto oldBuffer = stream->getCpuBase();
|
||||
stream->getSpace(dummyCommandSize);
|
||||
auto newBuffer = stream->getCpuBase();
|
||||
EXPECT_NE(newBuffer, oldBuffer);
|
||||
}
|
||||
|
||||
TEST_F(CommandContainerLinearStreamTest, givenLinearStreamWithCmdContainerWhenThereIsNoSpaceForCommandAndBBEndThenGetSpaceReturnPtrFromNewAllocation) {
|
||||
CommandContainer cmdContainer;
|
||||
cmdContainer.initialize(pDevice, nullptr);
|
||||
auto &hwInfo = pDevice->getHardwareInfo();
|
||||
auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
auto stream = reinterpret_cast<MyLinearStreamMock *>(cmdContainer.getCommandStream());
|
||||
size_t dummyCommandSize = 2;
|
||||
stream->sizeUsed = stream->getMaxAvailableSpace() - hwHelper.getBatchBufferEndSize() - (dummyCommandSize - 1);
|
||||
auto ptr = stream->getSpace(dummyCommandSize);
|
||||
auto buffer = stream->getCpuBase();
|
||||
EXPECT_EQ(buffer, ptr);
|
||||
}
|
||||
|
||||
TEST_F(CommandContainerLinearStreamTest, givenLinearStreamWithCmdContainerWhenThereIsSpaceForCommandAndBBEndThenNewCmdBufferIsNotAllocated) {
|
||||
CommandContainer cmdContainer;
|
||||
cmdContainer.initialize(pDevice, nullptr);
|
||||
auto &hwInfo = pDevice->getHardwareInfo();
|
||||
auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
auto stream = reinterpret_cast<MyLinearStreamMock *>(cmdContainer.getCommandStream());
|
||||
size_t dummyCommandSize = 2;
|
||||
stream->sizeUsed = stream->getMaxAvailableSpace() - hwHelper.getBatchBufferEndSize() - (dummyCommandSize);
|
||||
EXPECT_EQ(cmdContainer.getCmdBufferAllocations().size(), 1u);
|
||||
stream->getSpace(dummyCommandSize);
|
||||
EXPECT_EQ(cmdContainer.getCmdBufferAllocations().size(), 1u);
|
||||
}
|
||||
|
||||
TEST_F(CommandContainerLinearStreamTest, givenLinearStreamWithCmdContainerWhenThereIsNoSpaceForCommandAndBBEndThenBBEndAddedAtEndOfStream) {
|
||||
CommandContainer cmdContainer;
|
||||
cmdContainer.initialize(pDevice, nullptr);
|
||||
auto &hwInfo = pDevice->getHardwareInfo();
|
||||
auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
auto stream = reinterpret_cast<MyLinearStreamMock *>(cmdContainer.getCommandStream());
|
||||
size_t dummyCommandSize = 2;
|
||||
stream->sizeUsed = stream->getMaxAvailableSpace() - hwHelper.getBatchBufferEndSize() - (dummyCommandSize - 1);
|
||||
auto ptr = stream->getSpace(0u);
|
||||
stream->getSpace(dummyCommandSize);
|
||||
EXPECT_EQ(memcmp(ptr, hwHelper.getBatchBufferEndReference(), hwHelper.getBatchBufferEndSize()), 0);
|
||||
}
|
||||
@@ -955,8 +955,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesDynamicImplicitScaling, givenImp
|
||||
|
||||
bool requiresUncachedMocs = false;
|
||||
bool isInternal = false;
|
||||
size_t regularEstimateSize = EncodeDispatchKernel<FamilyType>::estimateEncodeDispatchKernelCmdsSize(
|
||||
pDevice, Vec3<size_t>(0, 0, 0), Vec3<size_t>(16, 1, 1), isInternal, false, false, nullptr, false);
|
||||
|
||||
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
|
||||
dispatchArgs.isInternal = isInternal;
|
||||
@@ -972,8 +970,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesDynamicImplicitScaling, givenImp
|
||||
EXPECT_EQ(WALKER_TYPE::PARTITION_TYPE::PARTITION_TYPE_DISABLED, baseWalkerCmd->getPartitionType());
|
||||
EXPECT_EQ(16u, baseWalkerCmd->getThreadGroupIdXDimension());
|
||||
|
||||
size_t partitionEstimateSize = EncodeDispatchKernel<FamilyType>::estimateEncodeDispatchKernelCmdsSize(
|
||||
pDevice, Vec3<size_t>(0, 0, 0), Vec3<size_t>(16, 1, 1), isInternal, false, false, nullptr, true);
|
||||
dispatchArgs.partitionCount = 2;
|
||||
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
|
||||
|
||||
@@ -982,7 +978,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesDynamicImplicitScaling, givenImp
|
||||
|
||||
size_t expectedPartitionedWalkerSize = ImplicitScalingDispatch<FamilyType>::getSize(true, false, pDevice->getDeviceBitfield(), Vec3<size_t>(0, 0, 0), Vec3<size_t>(16, 1, 1));
|
||||
EXPECT_EQ(expectedPartitionedWalkerSize, partitionedWalkerSize);
|
||||
EXPECT_EQ(partitionEstimateSize, regularEstimateSize + expectedPartitionedWalkerSize);
|
||||
|
||||
GenCmdList partitionedWalkerList;
|
||||
CmdParse<FamilyType>::parseCommandBuffer(
|
||||
@@ -1020,23 +1015,18 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesDynamicImplicitScaling, givenImp
|
||||
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
|
||||
|
||||
bool isInternal = false;
|
||||
size_t baseEstimateSize = EncodeDispatchKernel<FamilyType>::estimateEncodeDispatchKernelCmdsSize(
|
||||
pDevice, Vec3<size_t>(0, 0, 0), Vec3<size_t>(16, 1, 1), isInternal, false, false, dispatchInterface.get(), false);
|
||||
|
||||
bool requiresUncachedMocs = false;
|
||||
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
|
||||
dispatchArgs.isInternal = isInternal;
|
||||
dispatchArgs.partitionCount = 2;
|
||||
|
||||
size_t partitionEstimateSize = EncodeDispatchKernel<FamilyType>::estimateEncodeDispatchKernelCmdsSize(
|
||||
pDevice, Vec3<size_t>(0, 0, 0), Vec3<size_t>(16, 1, 1), isInternal, false, false, dispatchInterface.get(), true);
|
||||
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
|
||||
|
||||
EXPECT_EQ(2u, dispatchArgs.partitionCount);
|
||||
size_t partitionedWalkerSize = cmdContainer->getCommandStream()->getUsed();
|
||||
|
||||
size_t expectedPartitionedWalkerSize = ImplicitScalingDispatch<FamilyType>::getSize(true, false, pDevice->getDeviceBitfield(), Vec3<size_t>(0, 0, 0), Vec3<size_t>(16, 1, 1));
|
||||
EXPECT_EQ(partitionEstimateSize, baseEstimateSize + expectedPartitionedWalkerSize);
|
||||
EXPECT_EQ(expectedPartitionedWalkerSize, partitionedWalkerSize);
|
||||
|
||||
GenCmdList partitionedWalkerList;
|
||||
@@ -1124,23 +1114,17 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesDynamicImplicitScaling,
|
||||
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
|
||||
|
||||
bool isInternal = false;
|
||||
size_t baseEstimateSize = EncodeDispatchKernel<FamilyType>::estimateEncodeDispatchKernelCmdsSize(
|
||||
pDevice, Vec3<size_t>(0, 0, 0), Vec3<size_t>(16, 1, 1), isInternal, false, false, dispatchInterface.get(), false);
|
||||
|
||||
bool requiresUncachedMocs = false;
|
||||
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
|
||||
dispatchArgs.isInternal = isInternal;
|
||||
dispatchArgs.partitionCount = 2;
|
||||
|
||||
size_t partitionEstimateSize = EncodeDispatchKernel<FamilyType>::estimateEncodeDispatchKernelCmdsSize(
|
||||
pDevice, Vec3<size_t>(0, 0, 0), Vec3<size_t>(16, 1, 1), isInternal, false, false, dispatchInterface.get(), true);
|
||||
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
|
||||
|
||||
EXPECT_EQ(2u, dispatchArgs.partitionCount);
|
||||
size_t partitionedWalkerSize = cmdContainer->getCommandStream()->getUsed();
|
||||
|
||||
size_t expectedPartitionedWalkerSize = ImplicitScalingDispatch<FamilyType>::getSize(true, false, pDevice->getDeviceBitfield(), Vec3<size_t>(0, 0, 0), Vec3<size_t>(16, 1, 1));
|
||||
EXPECT_EQ(partitionEstimateSize, baseEstimateSize + expectedPartitionedWalkerSize);
|
||||
EXPECT_EQ(expectedPartitionedWalkerSize, partitionedWalkerSize);
|
||||
|
||||
GenCmdList partitionedWalkerList;
|
||||
@@ -1187,20 +1171,12 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesDynamicImplicitScaling, givenImp
|
||||
uint32_t dims[] = {16, 1, 1};
|
||||
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
|
||||
|
||||
bool isInternal = false;
|
||||
size_t baseEstimateSize = EncodeDispatchKernel<FamilyType>::estimateEncodeDispatchKernelCmdsSize(
|
||||
pDevice, Vec3<size_t>(0, 0, 0), Vec3<size_t>(16, 1, 1), isInternal, false, false, dispatchInterface.get(), false);
|
||||
|
||||
isInternal = true;
|
||||
bool isInternal = true;
|
||||
bool requiresUncachedMocs = false;
|
||||
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
|
||||
dispatchArgs.isInternal = isInternal;
|
||||
dispatchArgs.partitionCount = 2;
|
||||
|
||||
size_t internalEstimateSize = EncodeDispatchKernel<FamilyType>::estimateEncodeDispatchKernelCmdsSize(
|
||||
pDevice, Vec3<size_t>(0, 0, 0), Vec3<size_t>(16, 1, 1), isInternal, false, false, dispatchInterface.get(), true);
|
||||
EXPECT_EQ(baseEstimateSize, internalEstimateSize);
|
||||
|
||||
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
|
||||
|
||||
size_t internalWalkerSize = cmdContainer->getCommandStream()->getUsed();
|
||||
|
||||
Reference in New Issue
Block a user