Update to command_encoder, fix bug changing dynamic state memory

Adding ULT for encode and command container changes
Refactor getHeapSpaceAllowGrow and getHeapWithRequiredSizeAndAlignment

Signed-off-by: Tratnack, Geoffrey <geoffrey.tratnack@intel.com>
Related-To: LOCI-3365
This commit is contained in:
Tratnack, Geoffrey
2022-09-23 23:39:38 +00:00
committed by Compute-Runtime-Automation
parent 2eb47bd73e
commit 1b9c510614
6 changed files with 85 additions and 22 deletions

View File

@@ -86,7 +86,10 @@ CommandContainer::ErrorCode CommandContainer::initialize(Device *device, Allocat
addToResidencyContainer(cmdBufferAllocation);
}
if (requireHeaps) {
constexpr size_t heapSize = 65536u;
size_t heapSize = 65536u;
if (DebugManager.flags.ForceDefaultHeapSize.get() != -1) {
heapSize = DebugManager.flags.ForceDefaultHeapSize.get() * MemoryConstants::kiloByte;
}
heapHelper = std::unique_ptr<HeapHelper>(new HeapHelper(device->getMemoryManager(), device->getDefaultEngine().commandStreamReceiver->getInternalAllocationStorage(), device->getNumGenericSubDevices() > 1u));
for (uint32_t i = 0; i < IndirectHeap::Type::NUM_TYPES; i++) {
@@ -186,22 +189,7 @@ size_t CommandContainer::getTotalCmdBufferSize() {
void *CommandContainer::getHeapSpaceAllowGrow(HeapType heapType,
size_t size) {
auto indirectHeap = getIndirectHeap(heapType);
if (immediateCmdListSharedHeap(heapType)) {
UNRECOVERABLE_IF(indirectHeap == nullptr);
UNRECOVERABLE_IF(indirectHeap->getAvailableSpace() < size);
} else {
if (indirectHeap->getAvailableSpace() < size) {
size_t newSize = indirectHeap->getUsed() + indirectHeap->getAvailableSpace();
newSize *= 2;
newSize = std::max(newSize, indirectHeap->getAvailableSpace() + size);
newSize = alignUp(newSize, MemoryConstants::pageSize);
this->createAndAssignNewHeap(heapType, newSize);
}
}
return indirectHeap->getSpace(size);
return getHeapWithRequiredSizeAndAlignment(heapType, size, 0)->getSpace(size);
}
IndirectHeap *CommandContainer::getHeapWithRequiredSizeAndAlignment(HeapType heapType, size_t sizeRequired, size_t alignment) {
@@ -219,6 +207,7 @@ IndirectHeap *CommandContainer::getHeapWithRequiredSizeAndAlignment(HeapType hea
} else {
if (indirectHeap->getAvailableSpace() < sizeRequested) {
size_t newSize = indirectHeap->getUsed() + indirectHeap->getAvailableSpace();
newSize = std::max(newSize, indirectHeap->getAvailableSpace() + sizeRequested);
newSize = alignUp(newSize, MemoryConstants::pageSize);
auto oldAlloc = getIndirectHeapAllocation(heapType);
this->createAndAssignNewHeap(heapType, newSize);

View File

@@ -75,7 +75,7 @@ struct EncodeDispatchKernel {
static void setGrfInfo(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, uint32_t numGrf, const size_t &sizeCrossThreadData,
const size_t &sizePerThreadData, const HardwareInfo &hwInfo);
static void *getInterfaceDescriptor(CommandContainer &container, uint32_t &iddOffset);
static void *getInterfaceDescriptor(CommandContainer &container, uint32_t &iddOffset, const HardwareInfo &hwInfo);
static bool isRuntimeLocalIdsGenerationRequired(uint32_t activeChannels,
const size_t *lws,

View File

@@ -518,7 +518,9 @@ template <typename Family>
void EncodeSurfaceState<Family>::encodeImplicitScalingParams(const EncodeSurfaceStateArgs &args) {}
template <typename Family>
void *EncodeDispatchKernel<Family>::getInterfaceDescriptor(CommandContainer &container, uint32_t &iddOffset) {
void *EncodeDispatchKernel<Family>::getInterfaceDescriptor(CommandContainer &container, uint32_t &iddOffset, const HardwareInfo &hwInfo) {
using STATE_BASE_ADDRESS = typename Family::STATE_BASE_ADDRESS;
if (container.nextIddInBlock == container.getNumIddPerBlock()) {
if (ApiSpecificConfig::getBindlessConfiguration()) {
@@ -531,6 +533,24 @@ void *EncodeDispatchKernel<Family>::getInterfaceDescriptor(CommandContainer &con
}
container.nextIddInBlock = 0;
if (container.isHeapDirty(HeapType::DYNAMIC_STATE)) {
PipeControlArgs syncArgs;
syncArgs.dcFlushEnable = MemorySynchronizationCommands<Family>::getDcFlushEnable(true, hwInfo);
syncArgs.hdcPipelineFlush = true;
MemorySynchronizationCommands<Family>::addSingleBarrier(*container.getCommandStream(), syncArgs);
STATE_BASE_ADDRESS sba;
EncodeStateBaseAddressArgs<Family> encodeStateBaseAddressArgs = {
&container,
sba,
0,
false,
false,
false};
EncodeStateBaseAddress<Family>::encode(encodeStateBaseAddressArgs);
container.setDirtyStateForAllHeaps(false);
}
EncodeMediaInterfaceDescriptorLoad<Family>::encode(container);
}

View File

@@ -203,8 +203,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
}
uint32_t numIDD = 0u;
void *ptr = getInterfaceDescriptor(container, numIDD);
memcpy_s(ptr, sizeof(idd), &idd, sizeof(idd));
void *iddPtr = getInterfaceDescriptor(container, numIDD, hwInfo);
cmd.setIndirectDataStartAddress(static_cast<uint32_t>(offsetThreadData));
cmd.setIndirectDataLength(sizeThreadData);
@@ -233,6 +232,8 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
auto threadGroupCount = cmd.getThreadGroupIdXDimension() * cmd.getThreadGroupIdYDimension() * cmd.getThreadGroupIdZDimension();
EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData(idd, hwInfo, threadGroupCount, kernelDescriptor.kernelAttributes.numGrfRequired);
memcpy_s(iddPtr, sizeof(idd), &idd, sizeof(idd));
if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::DebugManager.flags.PauseOnEnqueue.get(), args.device->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::BeforeWorkload)) {
void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands<Family>::getSizeForBarrierWithPostSyncOperation(hwInfo, false));
args.additionalCommands->push_back(commandBuffer);