Estimate command stream size for marker profiling

Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
Maciej Plewka
2021-06-14 16:35:48 +00:00
committed by Compute-Runtime-Automation
parent 9c181df6e6
commit 9fe2dddcd3
13 changed files with 48 additions and 40 deletions

View File

@@ -452,7 +452,7 @@ class CommandQueueHw : public CommandQueue {
LinearStream *obtainCommandStream(const CsrDependencies &csrDependencies, bool blitEnqueue, bool blockedQueue,
const MultiDispatchInfo &multiDispatchInfo, const EventsRequest &eventsRequest,
std::unique_ptr<KernelOperation> &blockedCommandsData,
Surface **surfaces, size_t numSurfaces) {
Surface **surfaces, size_t numSurfaces, bool isMarkerWithProfiling) {
LinearStream *commandStream = nullptr;
bool profilingRequired = (this->isProfilingEnabled() && eventsRequest.outEvent);
@@ -469,7 +469,7 @@ class CommandQueueHw : public CommandQueue {
blockedCommandsData = std::make_unique<KernelOperation>(commandStream, *gpgpuCsr.getInternalAllocationStorage());
} else {
commandStream = &getCommandStream<GfxFamily, commandType>(*this, csrDependencies, profilingRequired, perfCountersRequired,
blitEnqueue, multiDispatchInfo, surfaces, numSurfaces);
blitEnqueue, multiDispatchInfo, surfaces, numSurfaces, isMarkerWithProfiling);
}
return commandStream;
}

View File

@@ -228,7 +228,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
}
auto &commandStream = *obtainCommandStream<commandType>(csrDeps, false, blockQueue, multiDispatchInfo, eventsRequest,
blockedCommandsData, surfacesForResidency, numSurfaceForResidency);
blockedCommandsData, surfacesForResidency, numSurfaceForResidency, isMarkerWithProfiling);
auto commandStreamStart = commandStream.getUsed();
if (this->context->getRootDeviceIndices().size() > 1) {
@@ -1177,7 +1177,7 @@ void CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDispat
LinearStream *gpgpuCommandStream = {};
size_t gpgpuCommandStreamStart = {};
if (isGpgpuSubmissionForBcsRequired(blockQueue)) {
gpgpuCommandStream = obtainCommandStream<cmdType>(csrDeps, true, blockQueue, multiDispatchInfo, eventsRequest, blockedCommandsData, nullptr, 0);
gpgpuCommandStream = obtainCommandStream<cmdType>(csrDeps, true, blockQueue, multiDispatchInfo, eventsRequest, blockedCommandsData, nullptr, 0, false);
gpgpuCommandStreamStart = gpgpuCommandStream->getUsed();
}

View File

@@ -167,7 +167,7 @@ class GpgpuWalkerHelper {
template <typename GfxFamily>
struct EnqueueOperation {
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
static size_t getTotalSizeRequiredCS(uint32_t eventType, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounters, bool blitEnqueue, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo);
static size_t getTotalSizeRequiredCS(uint32_t eventType, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounters, bool blitEnqueue, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo, bool isMarkerWithProfiling);
static size_t getSizeRequiredCS(uint32_t cmdType, bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue, const Kernel *pKernel, const DispatchInfo &dispatchInfo);
static size_t getSizeRequiredForTimestampPacketWrite();
static size_t getSizeForCacheFlushAfterWalkerCommands(const Kernel &kernel, const CommandQueue &commandQueue);
@@ -180,8 +180,8 @@ struct EnqueueOperation {
template <typename GfxFamily, uint32_t eventType>
LinearStream &getCommandStream(CommandQueue &commandQueue, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace,
bool reservePerfCounterCmdsSpace, bool blitEnqueue, const MultiDispatchInfo &multiDispatchInfo,
Surface **surfaces, size_t numSurfaces) {
size_t expectedSizeCS = EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(eventType, csrDeps, reserveProfilingCmdsSpace, reservePerfCounterCmdsSpace, blitEnqueue, commandQueue, multiDispatchInfo);
Surface **surfaces, size_t numSurfaces, bool isMarkerWithProfiling) {
size_t expectedSizeCS = EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(eventType, csrDeps, reserveProfilingCmdsSpace, reservePerfCounterCmdsSpace, blitEnqueue, commandQueue, multiDispatchInfo, isMarkerWithProfiling);
return commandQueue.getCS(expectedSizeCS);
}

View File

@@ -6,6 +6,7 @@
*/
#pragma once
#include "shared/source/command_container/command_encoder.h"
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/helpers/aligned_memory.h"
#include "shared/source/helpers/debug_helpers.h"
@@ -177,7 +178,7 @@ size_t GpgpuWalkerHelper<GfxFamily>::getSizeForWaDisableRccRhwoOptimization(cons
}
template <typename GfxFamily>
size_t EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(uint32_t eventType, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounters, bool blitEnqueue, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo) {
size_t EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(uint32_t eventType, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounters, bool blitEnqueue, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo, bool isMarkerWithProfiling) {
size_t expectedSizeCS = 0;
auto &hwInfo = commandQueue.getDevice().getHardwareInfo();
auto &commandQueueHw = static_cast<CommandQueueHw<GfxFamily> &>(commandQueue);
@@ -205,8 +206,15 @@ size_t EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(uint32_t eventType, c
if (commandQueue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
expectedSizeCS += TimestampPacketHelper::getRequiredCmdStreamSize<GfxFamily>(csrDeps);
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredForTimestampPacketWrite();
if (isMarkerWithProfiling) {
expectedSizeCS += 4 * EncodeStoreMMIO<GfxFamily>::size;
}
} else if (isMarkerWithProfiling) {
expectedSizeCS += 2 * MemorySynchronizationCommands<GfxFamily>::getSizeForSinglePipeControl();
if (!HwHelper::get(hwInfo.platform.eRenderCoreFamily).useOnlyGlobalTimestamps()) {
expectedSizeCS += 2 * EncodeStoreMMIO<GfxFamily>::size;
}
}
if (multiDispatchInfo.peekMainKernel()) {
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeForCacheFlushAfterWalkerCommands(*multiDispatchInfo.peekMainKernel(), commandQueue);
}