Implement PauseOnEnqueue for L0

Allow pausing execution before and after enqueuing kernel
using the PauseOnEnqueue and PauseOnGpuMode debug flags.

Related-To: NEO-6570
Signed-off-by: Naklicki, Mateusz <mateusz.naklicki@intel.com>
This commit is contained in:
Naklicki, Mateusz
2022-07-21 14:51:09 +00:00
committed by Compute-Runtime-Automation
parent a820e73dd7
commit 54042a191e
15 changed files with 620 additions and 24 deletions

View File

@@ -19,6 +19,8 @@
#include "encode_surface_state_args.h"
#include <list>
namespace NEO {
class BindlessHeapsHelper;
@@ -48,6 +50,7 @@ struct EncodeDispatchKernelArgs {
bool isKernelUsingSystemAllocation = false;
bool isKernelDispatchedFromImmediateCmdList = false;
bool isRcs = false;
std::list<void *> *additionalCommands = nullptr;
};
struct EncodeWalkerArgs {

View File

@@ -13,6 +13,7 @@
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/helpers/api_specific_config.h"
#include "shared/source/helpers/hw_helper.h"
#include "shared/source/helpers/pause_on_gpu_properties.h"
#include "shared/source/helpers/pipe_control_args.h"
#include "shared/source/helpers/simd_helper.h"
#include "shared/source/helpers/state_base_address.h"
@@ -232,6 +233,15 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
auto threadGroupCount = cmd.getThreadGroupIdXDimension() * cmd.getThreadGroupIdYDimension() * cmd.getThreadGroupIdZDimension();
EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData(idd, hwInfo, threadGroupCount, kernelDescriptor.kernelAttributes.numGrfRequired);
if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::DebugManager.flags.PauseOnEnqueue.get(), args.device->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::BeforeWorkload)) {
void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands<Family>::getSizeForBarrierWithPostSyncOperation(hwInfo, false));
args.additionalCommands->push_back(commandBuffer);
using MI_SEMAPHORE_WAIT = typename Family::MI_SEMAPHORE_WAIT;
MI_SEMAPHORE_WAIT *semaphoreCommand = listCmdBufferStream->getSpaceForCmd<MI_SEMAPHORE_WAIT>();
args.additionalCommands->push_back(semaphoreCommand);
}
PreemptionHelper::applyPreemptionWaCmdsBegin<Family>(listCmdBufferStream, *args.device);
auto buffer = listCmdBufferStream->getSpace(sizeof(cmd));
@@ -244,6 +254,15 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
}
args.partitionCount = 1;
if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::DebugManager.flags.PauseOnEnqueue.get(), args.device->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::AfterWorkload)) {
void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands<Family>::getSizeForBarrierWithPostSyncOperation(hwInfo, false));
args.additionalCommands->push_back(commandBuffer);
using MI_SEMAPHORE_WAIT = typename Family::MI_SEMAPHORE_WAIT;
MI_SEMAPHORE_WAIT *semaphoreCommand = listCmdBufferStream->getSpaceForCmd<MI_SEMAPHORE_WAIT>();
args.additionalCommands->push_back(semaphoreCommand);
}
}
template <typename Family>

View File

@@ -20,6 +20,7 @@
#include "shared/source/helpers/constants.h"
#include "shared/source/helpers/hw_helper.h"
#include "shared/source/helpers/hw_walk_order.h"
#include "shared/source/helpers/pause_on_gpu_properties.h"
#include "shared/source/helpers/pipe_control_args.h"
#include "shared/source/helpers/pipeline_select_helper.h"
#include "shared/source/helpers/ray_tracing_helper.h"
@@ -228,6 +229,15 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
args.requiresUncachedMocs = false;
}
if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::DebugManager.flags.PauseOnEnqueue.get(), args.device->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::BeforeWorkload)) {
void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands<Family>::getSizeForBarrierWithPostSyncOperation(hwInfo, false));
args.additionalCommands->push_back(commandBuffer);
using MI_SEMAPHORE_WAIT = typename Family::MI_SEMAPHORE_WAIT;
MI_SEMAPHORE_WAIT *semaphoreCommand = listCmdBufferStream->getSpaceForCmd<MI_SEMAPHORE_WAIT>();
args.additionalCommands->push_back(reinterpret_cast<void *>(semaphoreCommand));
}
walkerCmd.setIndirectDataStartAddress(static_cast<uint32_t>(offsetThreadData));
walkerCmd.setIndirectDataLength(sizeThreadData);
@@ -301,6 +311,15 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
}
PreemptionHelper::applyPreemptionWaCmdsEnd<Family>(listCmdBufferStream, *args.device);
if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::DebugManager.flags.PauseOnEnqueue.get(), args.device->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::AfterWorkload)) {
void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands<Family>::getSizeForBarrierWithPostSyncOperation(hwInfo, false));
args.additionalCommands->push_back(commandBuffer);
using MI_SEMAPHORE_WAIT = typename Family::MI_SEMAPHORE_WAIT;
MI_SEMAPHORE_WAIT *semaphoreCommand = listCmdBufferStream->getSpaceForCmd<MI_SEMAPHORE_WAIT>();
args.additionalCommands->push_back(semaphoreCommand);
}
}
template <typename Family>