mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 14:55:24 +08:00
Update to command_encoder, fix bug changing dynamic state memory
Adding ULT for encode and command container changes Refactor getHeapSpaceAllowGrow and getHeapWithRequiredSizeAndAlignment Signed-off-by: Tratnack, Geoffrey <geoffrey.tratnack@intel.com> Related-To: LOCI-3365
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
2eb47bd73e
commit
1b9c510614
@@ -86,7 +86,10 @@ CommandContainer::ErrorCode CommandContainer::initialize(Device *device, Allocat
|
||||
addToResidencyContainer(cmdBufferAllocation);
|
||||
}
|
||||
if (requireHeaps) {
|
||||
constexpr size_t heapSize = 65536u;
|
||||
size_t heapSize = 65536u;
|
||||
if (DebugManager.flags.ForceDefaultHeapSize.get() != -1) {
|
||||
heapSize = DebugManager.flags.ForceDefaultHeapSize.get() * MemoryConstants::kiloByte;
|
||||
}
|
||||
heapHelper = std::unique_ptr<HeapHelper>(new HeapHelper(device->getMemoryManager(), device->getDefaultEngine().commandStreamReceiver->getInternalAllocationStorage(), device->getNumGenericSubDevices() > 1u));
|
||||
|
||||
for (uint32_t i = 0; i < IndirectHeap::Type::NUM_TYPES; i++) {
|
||||
@@ -186,22 +189,7 @@ size_t CommandContainer::getTotalCmdBufferSize() {
|
||||
|
||||
void *CommandContainer::getHeapSpaceAllowGrow(HeapType heapType,
|
||||
size_t size) {
|
||||
auto indirectHeap = getIndirectHeap(heapType);
|
||||
|
||||
if (immediateCmdListSharedHeap(heapType)) {
|
||||
UNRECOVERABLE_IF(indirectHeap == nullptr);
|
||||
UNRECOVERABLE_IF(indirectHeap->getAvailableSpace() < size);
|
||||
} else {
|
||||
if (indirectHeap->getAvailableSpace() < size) {
|
||||
size_t newSize = indirectHeap->getUsed() + indirectHeap->getAvailableSpace();
|
||||
newSize *= 2;
|
||||
newSize = std::max(newSize, indirectHeap->getAvailableSpace() + size);
|
||||
newSize = alignUp(newSize, MemoryConstants::pageSize);
|
||||
this->createAndAssignNewHeap(heapType, newSize);
|
||||
}
|
||||
}
|
||||
|
||||
return indirectHeap->getSpace(size);
|
||||
return getHeapWithRequiredSizeAndAlignment(heapType, size, 0)->getSpace(size);
|
||||
}
|
||||
|
||||
IndirectHeap *CommandContainer::getHeapWithRequiredSizeAndAlignment(HeapType heapType, size_t sizeRequired, size_t alignment) {
|
||||
@@ -219,6 +207,7 @@ IndirectHeap *CommandContainer::getHeapWithRequiredSizeAndAlignment(HeapType hea
|
||||
} else {
|
||||
if (indirectHeap->getAvailableSpace() < sizeRequested) {
|
||||
size_t newSize = indirectHeap->getUsed() + indirectHeap->getAvailableSpace();
|
||||
newSize = std::max(newSize, indirectHeap->getAvailableSpace() + sizeRequested);
|
||||
newSize = alignUp(newSize, MemoryConstants::pageSize);
|
||||
auto oldAlloc = getIndirectHeapAllocation(heapType);
|
||||
this->createAndAssignNewHeap(heapType, newSize);
|
||||
|
||||
@@ -75,7 +75,7 @@ struct EncodeDispatchKernel {
|
||||
static void setGrfInfo(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, uint32_t numGrf, const size_t &sizeCrossThreadData,
|
||||
const size_t &sizePerThreadData, const HardwareInfo &hwInfo);
|
||||
|
||||
static void *getInterfaceDescriptor(CommandContainer &container, uint32_t &iddOffset);
|
||||
static void *getInterfaceDescriptor(CommandContainer &container, uint32_t &iddOffset, const HardwareInfo &hwInfo);
|
||||
|
||||
static bool isRuntimeLocalIdsGenerationRequired(uint32_t activeChannels,
|
||||
const size_t *lws,
|
||||
|
||||
@@ -518,7 +518,9 @@ template <typename Family>
|
||||
void EncodeSurfaceState<Family>::encodeImplicitScalingParams(const EncodeSurfaceStateArgs &args) {}
|
||||
|
||||
template <typename Family>
|
||||
void *EncodeDispatchKernel<Family>::getInterfaceDescriptor(CommandContainer &container, uint32_t &iddOffset) {
|
||||
void *EncodeDispatchKernel<Family>::getInterfaceDescriptor(CommandContainer &container, uint32_t &iddOffset, const HardwareInfo &hwInfo) {
|
||||
|
||||
using STATE_BASE_ADDRESS = typename Family::STATE_BASE_ADDRESS;
|
||||
|
||||
if (container.nextIddInBlock == container.getNumIddPerBlock()) {
|
||||
if (ApiSpecificConfig::getBindlessConfiguration()) {
|
||||
@@ -531,6 +533,24 @@ void *EncodeDispatchKernel<Family>::getInterfaceDescriptor(CommandContainer &con
|
||||
}
|
||||
container.nextIddInBlock = 0;
|
||||
|
||||
if (container.isHeapDirty(HeapType::DYNAMIC_STATE)) {
|
||||
PipeControlArgs syncArgs;
|
||||
syncArgs.dcFlushEnable = MemorySynchronizationCommands<Family>::getDcFlushEnable(true, hwInfo);
|
||||
syncArgs.hdcPipelineFlush = true;
|
||||
MemorySynchronizationCommands<Family>::addSingleBarrier(*container.getCommandStream(), syncArgs);
|
||||
|
||||
STATE_BASE_ADDRESS sba;
|
||||
EncodeStateBaseAddressArgs<Family> encodeStateBaseAddressArgs = {
|
||||
&container,
|
||||
sba,
|
||||
0,
|
||||
false,
|
||||
false,
|
||||
false};
|
||||
EncodeStateBaseAddress<Family>::encode(encodeStateBaseAddressArgs);
|
||||
container.setDirtyStateForAllHeaps(false);
|
||||
}
|
||||
|
||||
EncodeMediaInterfaceDescriptorLoad<Family>::encode(container);
|
||||
}
|
||||
|
||||
|
||||
@@ -203,8 +203,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
||||
}
|
||||
|
||||
uint32_t numIDD = 0u;
|
||||
void *ptr = getInterfaceDescriptor(container, numIDD);
|
||||
memcpy_s(ptr, sizeof(idd), &idd, sizeof(idd));
|
||||
void *iddPtr = getInterfaceDescriptor(container, numIDD, hwInfo);
|
||||
|
||||
cmd.setIndirectDataStartAddress(static_cast<uint32_t>(offsetThreadData));
|
||||
cmd.setIndirectDataLength(sizeThreadData);
|
||||
@@ -233,6 +232,8 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
||||
auto threadGroupCount = cmd.getThreadGroupIdXDimension() * cmd.getThreadGroupIdYDimension() * cmd.getThreadGroupIdZDimension();
|
||||
EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData(idd, hwInfo, threadGroupCount, kernelDescriptor.kernelAttributes.numGrfRequired);
|
||||
|
||||
memcpy_s(iddPtr, sizeof(idd), &idd, sizeof(idd));
|
||||
|
||||
if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::DebugManager.flags.PauseOnEnqueue.get(), args.device->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::BeforeWorkload)) {
|
||||
void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands<Family>::getSizeForBarrierWithPostSyncOperation(hwInfo, false));
|
||||
args.additionalCommands->push_back(commandBuffer);
|
||||
|
||||
Reference in New Issue
Block a user