Command container appends BB_END on cmd buffer allocation end

When linear stream created for command container has not enough space
for command and BB_END it will program BB_END and allocate new command
buffer allocation. Pointer returned from getSpace in this case will
return storage from new command buffer allocation.

Related-To: NEO-5707

Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
Maciej Plewka
2022-01-12 16:57:42 +00:00
committed by Compute-Runtime-Automation
parent 92316c48f2
commit 9d8ce7aace
31 changed files with 262 additions and 306 deletions

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2021 Intel Corporation
* Copyright (C) 2020-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -159,7 +159,6 @@ struct CommandListCoreFamily : CommandListImp {
ze_result_t reset() override;
ze_result_t executeCommandListImmediate(bool performMigration) override;
size_t getReserveSshSize();
void increaseCommandStreamSpace(size_t commandSize);
protected:
MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyKernelWithGA(void *dstPtr, NEO::GraphicsAllocation *dstPtrAlloc,

View File

@@ -332,7 +332,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendEventReset(ze_event_hand
if (this->partitionCount > 1) {
estimateSize += estimateBufferSizeMultiTileBarrier(hwInfo);
}
increaseCommandStreamSpace(estimateSize);
for (uint32_t i = 0u; i < packetsToReset; i++) {
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
@@ -896,13 +895,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlit(uintptr_t
commandContainer.addToResidencyContainer(clearColorAllocation);
NEO::BlitPropertiesContainer blitPropertiesContainer{blitProperties};
bool blitterDirectSubmission = true; // assume direct submission enabled, since usually MI_BATCH_BUFFER_START is bigger than MI_BATCH_BUFFER_END
size_t estimatedSize = NEO::BlitCommandsHelper<GfxFamily>::template BlitCommandsHelper<GfxFamily>::estimateBlitCommandsSize(blitPropertiesContainer,
false,
false,
blitterDirectSubmission,
*device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]);
increaseCommandStreamSpace(estimatedSize);
NEO::BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForBufferPerRow(blitProperties, *commandContainer.getCommandStream(), *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]);
@@ -946,13 +938,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(NEO
}
NEO::BlitPropertiesContainer blitPropertiesContainer{blitProperties};
bool blitterDirectSubmission = true; // assume direct submission enabled, since usually MI_BATCH_BUFFER_START is bigger than MI_BATCH_BUFFER_END
size_t estimatedSize = NEO::BlitCommandsHelper<GfxFamily>::template BlitCommandsHelper<GfxFamily>::estimateBlitCommandsSize(blitPropertiesContainer,
false,
false,
blitterDirectSubmission,
*device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]);
increaseCommandStreamSpace(estimatedSize);
appendEventForProfiling(hSignalEvent, true);
bool copyRegionPreferred = NEO::BlitCommandsHelper<GfxFamily>::isCopyRegionPreferred(copySizeModified, *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]);
@@ -1684,11 +1669,9 @@ void CommandListCoreFamily<gfxCoreFamily>::appendSignalEventPostWalker(ze_event_
if (isCopyOnly()) {
NEO::MiFlushArgs args;
args.commandWithPostSync = true;
increaseCommandStreamSpace(NEO::EncodeMiFlushDW<GfxFamily>::getMiFlushDwCmdSizeForDataWrite());
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), baseAddr, Event::STATE_SIGNALED,
args, hwInfo);
} else {
increaseCommandStreamSpace(NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(hwInfo));
NEO::PipeControlArgs args;
args.dcFlushEnable = NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(event->signalScope, hwInfo);
if (this->partitionCount > 1) {
@@ -1839,7 +1822,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_han
if (isCopyOnly()) {
NEO::MiFlushArgs args;
args.commandWithPostSync = true;
increaseCommandStreamSpace(NEO::EncodeMiFlushDW<GfxFamily>::getMiFlushDwCmdSizeForDataWrite());
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), ptrOffset(baseAddr, eventSignalOffset),
Event::STATE_SIGNALED, args, hwInfo);
} else {
@@ -1851,7 +1833,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_han
event->setPacketsInUse(this->partitionCount);
}
if (applyScope || event->isEventTimestampFlagSet()) {
increaseCommandStreamSpace(NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(hwInfo));
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
*commandContainer.getCommandStream(),
POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
@@ -1860,7 +1841,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_han
hwInfo,
args);
} else {
increaseCommandStreamSpace(NEO::EncodeStoreMemory<GfxFamily>::getStoreDataImmSize());
NEO::EncodeStoreMemory<GfxFamily>::programStoreDataImm(
*commandContainer.getCommandStream(),
ptrOffset(baseAddr, eventSignalOffset),
@@ -1928,7 +1908,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
estimatedBufferSize += NEO::EncodeSempahore<GfxFamily>::getSizeMiSemaphoreWait();
}
}
increaseCommandStreamSpace(estimatedBufferSize);
if (dcFlushRequired) {
if (isCopyOnly()) {
@@ -2204,17 +2183,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::reserveSpace(size_t size, void
return ZE_RESULT_SUCCESS;
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::increaseCommandStreamSpace(size_t commandSize) {
using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END;
size_t estimatedSizeRequired = commandSize + sizeof(MI_BATCH_BUFFER_END);
if (commandContainer.getCommandStream()->getAvailableSpace() < estimatedSizeRequired) {
auto bbEnd = commandContainer.getCommandStream()->template getSpaceForCmd<MI_BATCH_BUFFER_END>();
*bbEnd = GfxFamily::cmdInitBatchBufferEnd;
commandContainer.allocateNextCommandBuffer();
}
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::prepareIndirectParams(const ze_group_count_t *pThreadGroupDimensions) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
@@ -2353,9 +2321,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
const auto &hwInfo = this->device->getHwInfo();
if (!hSignalEvent) {
if (isCopyOnly()) {
size_t estimatedSizeRequired = NEO::EncodeMiFlushDW<GfxFamily>::getMiFlushDwCmdSizeForDataWrite();
increaseCommandStreamSpace(estimatedSizeRequired);
NEO::MiFlushArgs args;
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), 0, 0, args, hwInfo);
} else {

View File

@@ -183,9 +183,6 @@ void CommandListCoreFamily<gfxCoreFamily>::appendMultiPartitionEpilogue() {}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendComputeBarrierCommand() {
size_t estimatedSizeRequired = NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForSinglePipeControl();
increaseCommandStreamSpace(estimatedSizeRequired);
NEO::PipeControlArgs args = createBarrierFlags();
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
}

View File

@@ -134,8 +134,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
const auto &hwInfo = this->device->getHwInfo();
if (NEO::DebugManager.flags.ForcePipeControlPriorToWalker.get()) {
increaseCommandStreamSpace(NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForSinglePipeControl());
NEO::PipeControlArgs args;
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
}
@@ -245,8 +243,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
event->setPacketsInUse(partitionCount);
}
if (L3FlushEnable) {
size_t estimatedSize = NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(hwInfo);
increaseCommandStreamSpace(estimatedSize);
programEventL3Flush<gfxCoreFamily>(hEvent, this->device, partitionCount, commandContainer);
}
}
@@ -302,16 +298,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendMultiPartitionPrologue(uint32_t partitionDataSize) {
size_t estimatedSizeRequired = NEO::ImplicitScalingDispatch<GfxFamily>::getOffsetRegisterSize();
increaseCommandStreamSpace(estimatedSizeRequired);
NEO::ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(*commandContainer.getCommandStream(),
partitionDataSize);
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendMultiPartitionEpilogue() {
const size_t estimatedSizeRequired = NEO::ImplicitScalingDispatch<GfxFamily>::getOffsetRegisterSize();
increaseCommandStreamSpace(estimatedSizeRequired);
NEO::ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(*commandContainer.getCommandStream(),
NEO::ImplicitScalingDispatch<GfxFamily>::getPostSyncOffset());
}
@@ -320,14 +312,9 @@ template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendComputeBarrierCommand() {
if (this->partitionCount > 1) {
auto neoDevice = device->getNEODevice();
auto &hwInfo = neoDevice->getHardwareInfo();
increaseCommandStreamSpace(estimateBufferSizeMultiTileBarrier(hwInfo));
appendMultiTileBarrier(*neoDevice);
} else {
NEO::PipeControlArgs args = createBarrierFlags();
size_t estimatedSizeRequired = NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForSinglePipeControl();
increaseCommandStreamSpace(estimatedSizeRequired);
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
}
}

View File

@@ -44,9 +44,6 @@ ze_result_t CommandListCoreFamily<IGFX_XE_HPC_CORE>::appendMemoryPrefetch(const
NEO::LinearStream &cmdStream = *commandContainer.getCommandStream();
size_t estimatedSizeRequired = NEO::EncodeMemoryPrefetch<GfxFamily>::getSizeForMemoryPrefetch(size);
increaseCommandStreamSpace(estimatedSizeRequired);
NEO::EncodeMemoryPrefetch<GfxFamily>::programMemoryPrefetch(cmdStream, *gpuAlloc, static_cast<uint32_t>(size), offset, hwInfo);
return ZE_RESULT_SUCCESS;
@@ -56,9 +53,6 @@ template <>
void CommandListCoreFamily<IGFX_XE_HPC_CORE>::applyMemoryRangesBarrier(uint32_t numRanges,
const size_t *pRangeSizes,
const void **pRanges) {
increaseCommandStreamSpace(NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForSinglePipeControl());
NEO::PipeControlArgs args;
args.hdcPipelineFlush = true;
args.unTypedDataPortCacheFlush = true;