mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-29 09:03:14 +08:00
refactor: rename global debug manager to debugManager
Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
f2ede40d2e
commit
c9664e6bad
@@ -25,7 +25,7 @@ Vec3<size_t> computeWorkgroupSize(const DispatchInfo &dispatchInfo) {
|
||||
auto kernel = dispatchInfo.getKernel();
|
||||
|
||||
if (kernel != nullptr) {
|
||||
if (DebugManager.flags.EnableComputeWorkSizeND.get()) {
|
||||
if (debugManager.flags.EnableComputeWorkSizeND.get()) {
|
||||
WorkSizeInfo wsInfo = createWorkSizeInfoFromDispatchInfo(dispatchInfo);
|
||||
size_t workItems[3] = {dispatchInfo.getGWS().x, dispatchInfo.getGWS().y, dispatchInfo.getGWS().z};
|
||||
computeWorkgroupSizeND(wsInfo, workGroupSize, workItems, dispatchInfo.getDim());
|
||||
@@ -35,7 +35,7 @@ Vec3<size_t> computeWorkgroupSize(const DispatchInfo &dispatchInfo) {
|
||||
size_t workItems[3] = {dispatchInfo.getGWS().x, dispatchInfo.getGWS().y, dispatchInfo.getGWS().z};
|
||||
if (dispatchInfo.getDim() == 1) {
|
||||
computeWorkgroupSize1D(maxWorkGroupSize, workGroupSize, workItems, simd);
|
||||
} else if (DebugManager.flags.EnableComputeWorkSizeSquared.get() && dispatchInfo.getDim() == 2) {
|
||||
} else if (debugManager.flags.EnableComputeWorkSizeSquared.get() && dispatchInfo.getDim() == 2) {
|
||||
computeWorkgroupSizeSquared(maxWorkGroupSize, workGroupSize, workItems, simd, dispatchInfo.getDim());
|
||||
} else {
|
||||
computeWorkgroupSize2D(maxWorkGroupSize, workGroupSize, workItems, simd);
|
||||
|
||||
@@ -118,8 +118,8 @@ CommandQueue::CommandQueue(Context *context, ClDevice *device, const cl_queue_pr
|
||||
}
|
||||
auto deferCmdQBcsInitialization = hwInfo.featureTable.ftrBcsInfo.count() > 1u;
|
||||
|
||||
if (DebugManager.flags.DeferCmdQBcsInitialization.get() != -1) {
|
||||
deferCmdQBcsInitialization = DebugManager.flags.DeferCmdQBcsInitialization.get();
|
||||
if (debugManager.flags.DeferCmdQBcsInitialization.get() != -1) {
|
||||
deferCmdQBcsInitialization = debugManager.flags.DeferCmdQBcsInitialization.get();
|
||||
}
|
||||
|
||||
if (!deferCmdQBcsInitialization) {
|
||||
@@ -179,8 +179,8 @@ void CommandQueue::initializeGpgpu() const {
|
||||
auto engineRoundRobinAvailable = productHelper.isAssignEngineRoundRobinSupported() &&
|
||||
this->isAssignEngineRoundRobinEnabled();
|
||||
|
||||
if (DebugManager.flags.EnableCmdQRoundRobindEngineAssign.get() != -1) {
|
||||
engineRoundRobinAvailable = DebugManager.flags.EnableCmdQRoundRobindEngineAssign.get();
|
||||
if (debugManager.flags.EnableCmdQRoundRobindEngineAssign.get() != -1) {
|
||||
engineRoundRobinAvailable = debugManager.flags.EnableCmdQRoundRobindEngineAssign.get();
|
||||
}
|
||||
|
||||
auto assignEngineRoundRobin =
|
||||
@@ -227,8 +227,8 @@ void CommandQueue::initializeGpgpuInternals() const {
|
||||
|
||||
if (getCmdQueueProperties<cl_queue_properties>(propertiesVector.data(), CL_QUEUE_PROPERTIES) & static_cast<cl_queue_properties>(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) && !this->gpgpuEngine->commandStreamReceiver->isUpdateTagFromWaitEnabled()) {
|
||||
this->gpgpuEngine->commandStreamReceiver->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
|
||||
if (DebugManager.flags.CsrDispatchMode.get() != 0) {
|
||||
this->gpgpuEngine->commandStreamReceiver->overrideDispatchPolicy(static_cast<DispatchMode>(DebugManager.flags.CsrDispatchMode.get()));
|
||||
if (debugManager.flags.CsrDispatchMode.get() != 0) {
|
||||
this->gpgpuEngine->commandStreamReceiver->overrideDispatchPolicy(static_cast<DispatchMode>(debugManager.flags.CsrDispatchMode.get()));
|
||||
}
|
||||
this->gpgpuEngine->commandStreamReceiver->enableNTo1SubmissionModel();
|
||||
}
|
||||
@@ -275,7 +275,7 @@ CommandStreamReceiver &CommandQueue::selectCsrForBuiltinOperation(const CsrSelec
|
||||
case TransferDirection::LocalToLocal: {
|
||||
const auto &clGfxCoreHelper = device->getRootDeviceEnvironment().getHelper<ClGfxCoreHelper>();
|
||||
preferBcs = clGfxCoreHelper.preferBlitterForLocalToLocalTransfers();
|
||||
if (auto flag = DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.get(); flag != -1) {
|
||||
if (auto flag = debugManager.flags.PreferCopyEngineForCopyBufferToBuffer.get(); flag != -1) {
|
||||
preferBcs = static_cast<bool>(flag);
|
||||
}
|
||||
if (preferBcs) {
|
||||
@@ -290,8 +290,8 @@ CommandStreamReceiver &CommandQueue::selectCsrForBuiltinOperation(const CsrSelec
|
||||
|
||||
auto preferredBCSType = true;
|
||||
|
||||
if (DebugManager.flags.AssignBCSAtEnqueue.get() != -1) {
|
||||
preferredBCSType = DebugManager.flags.AssignBCSAtEnqueue.get();
|
||||
if (debugManager.flags.AssignBCSAtEnqueue.get() != -1) {
|
||||
preferredBCSType = debugManager.flags.AssignBCSAtEnqueue.get();
|
||||
}
|
||||
|
||||
if (preferredBCSType) {
|
||||
@@ -308,8 +308,8 @@ CommandStreamReceiver &CommandQueue::selectCsrForBuiltinOperation(const CsrSelec
|
||||
if (preferBcs) {
|
||||
auto assignBCS = true;
|
||||
|
||||
if (DebugManager.flags.AssignBCSAtEnqueue.get() != -1) {
|
||||
assignBCS = DebugManager.flags.AssignBCSAtEnqueue.get();
|
||||
if (debugManager.flags.AssignBCSAtEnqueue.get() != -1) {
|
||||
assignBCS = debugManager.flags.AssignBCSAtEnqueue.get();
|
||||
}
|
||||
|
||||
if (assignBCS) {
|
||||
@@ -361,8 +361,8 @@ void CommandQueue::constructBcsEnginesForSplit() {
|
||||
return;
|
||||
}
|
||||
|
||||
if (DebugManager.flags.SplitBcsMask.get() > 0) {
|
||||
this->splitEngines = DebugManager.flags.SplitBcsMask.get();
|
||||
if (debugManager.flags.SplitBcsMask.get() > 0) {
|
||||
this->splitEngines = debugManager.flags.SplitBcsMask.get();
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < bcsInfoMaskSize; i++) {
|
||||
@@ -379,11 +379,11 @@ void CommandQueue::constructBcsEnginesForSplit() {
|
||||
}
|
||||
}
|
||||
|
||||
if (DebugManager.flags.SplitBcsMaskD2H.get() > 0) {
|
||||
this->d2hEngines = DebugManager.flags.SplitBcsMaskD2H.get();
|
||||
if (debugManager.flags.SplitBcsMaskD2H.get() > 0) {
|
||||
this->d2hEngines = debugManager.flags.SplitBcsMaskD2H.get();
|
||||
}
|
||||
if (DebugManager.flags.SplitBcsMaskH2D.get() > 0) {
|
||||
this->h2dEngines = DebugManager.flags.SplitBcsMaskH2D.get();
|
||||
if (debugManager.flags.SplitBcsMaskH2D.get() > 0) {
|
||||
this->h2dEngines = debugManager.flags.SplitBcsMaskH2D.get();
|
||||
}
|
||||
|
||||
this->bcsSplitInitialized = true;
|
||||
@@ -525,7 +525,7 @@ bool CommandQueue::isQueueBlocked() {
|
||||
taskLevel = getGpgpuCommandStreamReceiver().peekTaskLevel();
|
||||
}
|
||||
|
||||
fileLoggerInstance().log(DebugManager.flags.EventsDebugEnable.get(), "isQueueBlocked taskLevel change from", taskLevel, "to new from virtualEvent", this->virtualEvent, "new tasklevel", this->virtualEvent->taskLevel.load());
|
||||
fileLoggerInstance().log(debugManager.flags.EventsDebugEnable.get(), "isQueueBlocked taskLevel change from", taskLevel, "to new from virtualEvent", this->virtualEvent, "new tasklevel", this->virtualEvent->taskLevel.load());
|
||||
|
||||
// close the access to virtual event, driver added only 1 ref count.
|
||||
this->virtualEvent->decRefInternal();
|
||||
@@ -634,7 +634,7 @@ void CommandQueue::updateFromCompletionStamp(const CompletionStamp &completionSt
|
||||
|
||||
if (outEvent) {
|
||||
outEvent->updateCompletionStamp(completionStamp.taskCount, outEvent->peekBcsTaskCountFromCommandQueue(), completionStamp.taskLevel, completionStamp.flushStamp);
|
||||
fileLoggerInstance().log(DebugManager.flags.EventsDebugEnable.get(), "updateCompletionStamp Event", outEvent, "taskLevel", outEvent->taskLevel.load());
|
||||
fileLoggerInstance().log(debugManager.flags.EventsDebugEnable.get(), "updateCompletionStamp Event", outEvent, "taskLevel", outEvent->taskLevel.load());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -974,14 +974,14 @@ bool CommandQueue::bufferCpuCopyAllowed(Buffer *buffer, cl_command_type commandT
|
||||
|
||||
auto debugVariableSet = false;
|
||||
// Requested by debug variable or allowed by Buffer
|
||||
if (CL_COMMAND_READ_BUFFER == commandType && DebugManager.flags.DoCpuCopyOnReadBuffer.get() != -1) {
|
||||
if (DebugManager.flags.DoCpuCopyOnReadBuffer.get() == 0) {
|
||||
if (CL_COMMAND_READ_BUFFER == commandType && debugManager.flags.DoCpuCopyOnReadBuffer.get() != -1) {
|
||||
if (debugManager.flags.DoCpuCopyOnReadBuffer.get() == 0) {
|
||||
return false;
|
||||
}
|
||||
debugVariableSet = true;
|
||||
}
|
||||
if (CL_COMMAND_WRITE_BUFFER == commandType && DebugManager.flags.DoCpuCopyOnWriteBuffer.get() != -1) {
|
||||
if (DebugManager.flags.DoCpuCopyOnWriteBuffer.get() == 0) {
|
||||
if (CL_COMMAND_WRITE_BUFFER == commandType && debugManager.flags.DoCpuCopyOnWriteBuffer.get() != -1) {
|
||||
if (debugManager.flags.DoCpuCopyOnWriteBuffer.get() == 0) {
|
||||
return false;
|
||||
}
|
||||
debugVariableSet = true;
|
||||
@@ -1025,13 +1025,13 @@ bool CommandQueue::bufferCpuCopyAllowed(Buffer *buffer, cl_command_type commandT
|
||||
}
|
||||
|
||||
bool CommandQueue::queueDependenciesClearRequired() const {
|
||||
return isOOQEnabled() || DebugManager.flags.OmitTimestampPacketDependencies.get();
|
||||
return isOOQEnabled() || debugManager.flags.OmitTimestampPacketDependencies.get();
|
||||
}
|
||||
|
||||
bool CommandQueue::blitEnqueueAllowed(const CsrSelectionArgs &args) const {
|
||||
bool blitEnqueueAllowed = ((device->getRootDeviceEnvironment().isWddmOnLinux() || device->getRootDeviceEnvironment().getProductHelper().blitEnqueueAllowed()) && getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) || this->isCopyOnly;
|
||||
if (DebugManager.flags.EnableBlitterForEnqueueOperations.get() != -1) {
|
||||
blitEnqueueAllowed = DebugManager.flags.EnableBlitterForEnqueueOperations.get();
|
||||
if (debugManager.flags.EnableBlitterForEnqueueOperations.get() != -1) {
|
||||
blitEnqueueAllowed = debugManager.flags.EnableBlitterForEnqueueOperations.get();
|
||||
}
|
||||
if (!blitEnqueueAllowed) {
|
||||
return false;
|
||||
@@ -1067,8 +1067,8 @@ bool CommandQueue::blitEnqueueImageAllowed(const size_t *origin, const size_t *r
|
||||
auto &productHelper = device->getProductHelper();
|
||||
auto blitEnqueueImageAllowed = productHelper.isBlitterForImagesSupported();
|
||||
|
||||
if (DebugManager.flags.EnableBlitterForEnqueueImageOperations.get() != -1) {
|
||||
blitEnqueueImageAllowed = DebugManager.flags.EnableBlitterForEnqueueImageOperations.get();
|
||||
if (debugManager.flags.EnableBlitterForEnqueueImageOperations.get() != -1) {
|
||||
blitEnqueueImageAllowed = debugManager.flags.EnableBlitterForEnqueueImageOperations.get();
|
||||
}
|
||||
|
||||
blitEnqueueImageAllowed &= !isMipMapped(image.getImageDesc());
|
||||
@@ -1137,7 +1137,7 @@ void CommandQueue::processProperties(const cl_queue_properties *properties) {
|
||||
break;
|
||||
case CL_QUEUE_INDEX_INTEL:
|
||||
selectedQueueIndex = static_cast<cl_uint>(*(currentProperties + 1));
|
||||
auto nodeOrdinal = DebugManager.flags.NodeOrdinal.get();
|
||||
auto nodeOrdinal = debugManager.flags.NodeOrdinal.get();
|
||||
if (nodeOrdinal != -1) {
|
||||
int currentEngineIndex = 0;
|
||||
const HardwareInfo &hwInfo = getDevice().getHardwareInfo();
|
||||
@@ -1165,9 +1165,9 @@ void CommandQueue::processProperties(const cl_queue_properties *properties) {
|
||||
const auto &engine = getDevice().getRegularEngineGroups()[selectedQueueFamilyIndex].engines[selectedQueueIndex];
|
||||
auto engineType = engine.getEngineType();
|
||||
auto engineUsage = engine.getEngineUsage();
|
||||
if ((DebugManager.flags.EngineUsageHint.get() != -1) &&
|
||||
(getDevice().tryGetEngine(engineType, static_cast<EngineUsage>(DebugManager.flags.EngineUsageHint.get())) != nullptr)) {
|
||||
engineUsage = static_cast<EngineUsage>(DebugManager.flags.EngineUsageHint.get());
|
||||
if ((debugManager.flags.EngineUsageHint.get() != -1) &&
|
||||
(getDevice().tryGetEngine(engineType, static_cast<EngineUsage>(debugManager.flags.EngineUsageHint.get())) != nullptr)) {
|
||||
engineUsage = static_cast<EngineUsage>(debugManager.flags.EngineUsageHint.get());
|
||||
}
|
||||
this->overrideEngine(engineType, engineUsage);
|
||||
this->queueCapabilities = getClDevice().getDeviceInfo().queueFamilyProperties[selectedQueueFamilyIndex].capabilities;
|
||||
@@ -1213,7 +1213,7 @@ void CommandQueue::overrideEngine(aub_stream::EngineType engineType, EngineUsage
|
||||
}
|
||||
|
||||
void CommandQueue::aubCaptureHook(bool &blocking, bool &clearAllDependencies, const MultiDispatchInfo &multiDispatchInfo) {
|
||||
if (DebugManager.flags.AUBDumpSubCaptureMode.get()) {
|
||||
if (debugManager.flags.AUBDumpSubCaptureMode.get()) {
|
||||
auto status = getGpgpuCommandStreamReceiver().checkAndActivateAubSubCapture(multiDispatchInfo.empty() ? "" : multiDispatchInfo.peekMainKernel()->getDescriptor().kernelMetadata.kernelName);
|
||||
if (!status.isActive) {
|
||||
// make each enqueue blocking when subcapture is not active to split batch buffer
|
||||
@@ -1249,7 +1249,7 @@ bool CommandQueue::isWaitForTimestampsEnabled() const {
|
||||
enabled &= gfxCoreHelper.isTimestampWaitSupportedForQueues();
|
||||
enabled &= !productHelper.isDcFlushAllowed();
|
||||
|
||||
switch (DebugManager.flags.EnableTimestampWaitForQueues.get()) {
|
||||
switch (debugManager.flags.EnableTimestampWaitForQueues.get()) {
|
||||
case 0:
|
||||
enabled = false;
|
||||
break;
|
||||
|
||||
@@ -39,8 +39,8 @@ class CommandQueueHw : public CommandQueue {
|
||||
ClDevice *device,
|
||||
const cl_queue_properties *properties,
|
||||
bool internalUsage) : BaseClass(context, device, properties, internalUsage) {
|
||||
if (DebugManager.flags.SplitBcsSize.get() != -1) {
|
||||
this->minimalSizeForBcsSplit = DebugManager.flags.SplitBcsSize.get() * MemoryConstants::kiloByte;
|
||||
if (debugManager.flags.SplitBcsSize.get() != -1) {
|
||||
this->minimalSizeForBcsSplit = debugManager.flags.SplitBcsSize.get() * MemoryConstants::kiloByte;
|
||||
}
|
||||
|
||||
auto clPriority = getCmdQueueProperties<cl_queue_priority_khr>(properties, CL_QUEUE_PRIORITY_KHR);
|
||||
@@ -79,8 +79,8 @@ class CommandQueueHw : public CommandQueue {
|
||||
|
||||
auto initializeGpgpu = false;
|
||||
|
||||
if (DebugManager.flags.DeferCmdQGpgpuInitialization.get() != -1) {
|
||||
initializeGpgpu = !DebugManager.flags.DeferCmdQGpgpuInitialization.get();
|
||||
if (debugManager.flags.DeferCmdQGpgpuInitialization.get() != -1) {
|
||||
initializeGpgpu = !debugManager.flags.DeferCmdQGpgpuInitialization.get();
|
||||
}
|
||||
|
||||
if (initializeGpgpu) {
|
||||
|
||||
@@ -34,21 +34,21 @@
|
||||
namespace NEO {
|
||||
template <typename Family>
|
||||
void CommandQueueHw<Family>::notifyEnqueueReadBuffer(Buffer *buffer, bool blockingRead, bool notifyBcsCsr) {
|
||||
if (DebugManager.flags.AUBDumpAllocsOnEnqueueReadOnly.get()) {
|
||||
if (debugManager.flags.AUBDumpAllocsOnEnqueueReadOnly.get()) {
|
||||
buffer->getGraphicsAllocation(getDevice().getRootDeviceIndex())->setAllocDumpable(blockingRead, notifyBcsCsr);
|
||||
buffer->forceDisallowCPUCopy = blockingRead;
|
||||
}
|
||||
}
|
||||
template <typename Family>
|
||||
void CommandQueueHw<Family>::notifyEnqueueReadImage(Image *image, bool blockingRead, bool notifyBcsCsr) {
|
||||
if (DebugManager.flags.AUBDumpAllocsOnEnqueueReadOnly.get()) {
|
||||
if (debugManager.flags.AUBDumpAllocsOnEnqueueReadOnly.get()) {
|
||||
image->getGraphicsAllocation(getDevice().getRootDeviceIndex())->setAllocDumpable(blockingRead, notifyBcsCsr);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
void CommandQueueHw<Family>::notifyEnqueueSVMMemcpy(GraphicsAllocation *gfxAllocation, bool blockingCopy, bool notifyBcsCsr) {
|
||||
if (DebugManager.flags.AUBDumpAllocsOnEnqueueSVMMemcpyOnly.get()) {
|
||||
if (debugManager.flags.AUBDumpAllocsOnEnqueueSVMMemcpyOnly.get()) {
|
||||
gfxAllocation->setAllocDumpable(blockingCopy, notifyBcsCsr);
|
||||
}
|
||||
}
|
||||
@@ -134,8 +134,8 @@ bool CommandQueueHw<Family>::forceStateless(size_t size) {
|
||||
|
||||
template <typename Family>
|
||||
bool CommandQueueHw<Family>::isCacheFlushForBcsRequired() const {
|
||||
if (DebugManager.flags.ForceCacheFlushForBcs.get() != -1) {
|
||||
return !!DebugManager.flags.ForceCacheFlushForBcs.get();
|
||||
if (debugManager.flags.ForceCacheFlushForBcs.get() != -1) {
|
||||
return !!debugManager.flags.ForceCacheFlushForBcs.get();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@@ -212,7 +212,7 @@ bool CommandQueueHw<Family>::isGpgpuSubmissionForBcsRequired(bool queueBlocked,
|
||||
(latestSentEnqueueType != EnqueueProperties::Operation::None) &&
|
||||
(isCacheFlushForBcsRequired() || !(getGpgpuCommandStreamReceiver().getDispatchMode() == DispatchMode::ImmediateDispatch || getGpgpuCommandStreamReceiver().isLatestTaskCountFlushed()));
|
||||
|
||||
if (DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.get() == 1) {
|
||||
if (debugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.get() == 1) {
|
||||
required = true;
|
||||
}
|
||||
|
||||
|
||||
@@ -148,7 +148,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
TagNodeBase *hwTimeStamps = nullptr;
|
||||
CommandStreamReceiver &computeCommandStreamReceiver = getGpgpuCommandStreamReceiver();
|
||||
|
||||
if (NEO::DebugManager.flags.ForceMemoryPrefetchForKmdMigratedSharedAllocations.get()) {
|
||||
if (NEO::debugManager.flags.ForceMemoryPrefetchForKmdMigratedSharedAllocations.get()) {
|
||||
auto pSvmAllocMgr = this->context->getSVMAllocsManager();
|
||||
pSvmAllocMgr->prefetchSVMAllocs(this->getDevice(), computeCommandStreamReceiver);
|
||||
}
|
||||
@@ -176,7 +176,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
|
||||
const bool clearAllDependencies = (queueDependenciesClearRequired() || clearDependenciesForSubCapture);
|
||||
|
||||
if (DebugManager.flags.MakeEachEnqueueBlocking.get()) {
|
||||
if (debugManager.flags.MakeEachEnqueueBlocking.get()) {
|
||||
blocking = true;
|
||||
}
|
||||
|
||||
@@ -193,7 +193,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
const auto &hwInfo = this->getDevice().getHardwareInfo();
|
||||
auto &productHelper = getDevice().getProductHelper();
|
||||
bool canUsePipeControlInsteadOfSemaphoresForOnCsrDependencies = false;
|
||||
bool isNonStallingIoqBarrier = (CL_COMMAND_BARRIER == commandType) && !isOOQEnabled() && (DebugManager.flags.OptimizeIoqBarriersHandling.get() != 0);
|
||||
bool isNonStallingIoqBarrier = (CL_COMMAND_BARRIER == commandType) && !isOOQEnabled() && (debugManager.flags.OptimizeIoqBarriersHandling.get() != 0);
|
||||
const bool isNonStallingIoqBarrierWithDependencies = isNonStallingIoqBarrier && (eventsRequest.numEventsInWaitList > 0);
|
||||
|
||||
if (computeCommandStreamReceiver.peekTimestampPacketWriteEnabled()) {
|
||||
@@ -285,7 +285,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
} else if (computeCommandStreamReceiver.peekTimestampPacketWriteEnabled()) {
|
||||
if (CL_COMMAND_BARRIER == commandType && !isNonStallingIoqBarrier) {
|
||||
setStallingCommandsOnNextFlush(true);
|
||||
const bool isDcFlushRequiredOnBarrier = NEO::DebugManager.flags.SkipDcFlushOnBarrierWithoutEvents.get() == 0 || event;
|
||||
const bool isDcFlushRequiredOnBarrier = NEO::debugManager.flags.SkipDcFlushOnBarrierWithoutEvents.get() == 0 || event;
|
||||
setDcFlushRequiredOnStallingCommandsOnNextFlush(isDcFlushRequiredOnBarrier);
|
||||
this->splitBarrierRequired = true;
|
||||
}
|
||||
@@ -552,7 +552,7 @@ void CommandQueueHw<GfxFamily>::processDispatchForKernels(const MultiDispatchInf
|
||||
|
||||
HardwareInterface<GfxFamily>::dispatchWalkerCommon(*this, multiDispatchInfo, csrDeps, dispatchWalkerArgs);
|
||||
|
||||
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
|
||||
if (debugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
|
||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||
for (auto &patchInfoData : dispatchInfo.getKernel()->getPatchInfoDataList()) {
|
||||
getGpgpuCommandStreamReceiver().getFlatBatchBufferHelper().setPatchInfoData(patchInfoData);
|
||||
@@ -953,7 +953,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
dispatchFlags.implicitFlush = true;
|
||||
}
|
||||
|
||||
PRINT_DEBUG_STRING(DebugManager.flags.PrintDebugMessages.get(), stdout, "preemption = %d.\n", static_cast<int>(dispatchFlags.preemptionMode));
|
||||
PRINT_DEBUG_STRING(debugManager.flags.PrintDebugMessages.get(), stdout, "preemption = %d.\n", static_cast<int>(dispatchFlags.preemptionMode));
|
||||
CompletionStamp completionStamp = getGpgpuCommandStreamReceiver().flushTask(
|
||||
commandStream,
|
||||
commandStreamStart,
|
||||
@@ -1363,7 +1363,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDisp
|
||||
|
||||
std::unique_ptr<KernelOperation> blockedCommandsData;
|
||||
TakeOwnershipWrapper<CommandQueueHw<GfxFamily>> queueOwnership(*this);
|
||||
if (DebugManager.flags.ForceCsrLockInBcsEnqueueOnlyForGpgpuSubmission.get() != 1) {
|
||||
if (debugManager.flags.ForceCsrLockInBcsEnqueueOnlyForGpgpuSubmission.get() != 1) {
|
||||
commandStreamReceiverOwnership = getGpgpuCommandStreamReceiver().obtainUniqueOwnership();
|
||||
}
|
||||
|
||||
@@ -1376,7 +1376,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDisp
|
||||
enqueueHandlerHook(cmdType, multiDispatchInfo);
|
||||
aubCaptureHook(blocking, clearAllDependencies, multiDispatchInfo);
|
||||
|
||||
if (DebugManager.flags.MakeEachEnqueueBlocking.get()) {
|
||||
if (debugManager.flags.MakeEachEnqueueBlocking.get()) {
|
||||
blocking = true;
|
||||
}
|
||||
|
||||
@@ -1429,7 +1429,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDisp
|
||||
if (gpgpuSubmission) {
|
||||
registerGpgpuCsrClient();
|
||||
|
||||
if (DebugManager.flags.ForceCsrLockInBcsEnqueueOnlyForGpgpuSubmission.get() == 1) {
|
||||
if (debugManager.flags.ForceCsrLockInBcsEnqueueOnlyForGpgpuSubmission.get() == 1) {
|
||||
commandStreamReceiverOwnership = getGpgpuCommandStreamReceiver().obtainUniqueOwnership();
|
||||
}
|
||||
gpgpuCommandStream = obtainCommandStream<cmdType>(csrDeps, true, blockQueue, multiDispatchInfo, eventsRequest, blockedCommandsData, nullptr, 0, false, false);
|
||||
@@ -1448,7 +1448,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDisp
|
||||
}
|
||||
|
||||
if (gpgpuSubmission) {
|
||||
if (DebugManager.flags.ForceCsrLockInBcsEnqueueOnlyForGpgpuSubmission.get() == 1) {
|
||||
if (debugManager.flags.ForceCsrLockInBcsEnqueueOnlyForGpgpuSubmission.get() == 1) {
|
||||
commandStreamReceiverOwnership.unlock();
|
||||
}
|
||||
}
|
||||
@@ -1467,7 +1467,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDisp
|
||||
enqueueBlocked(cmdType, nullptr, 0, multiDispatchInfo, timestampPacketDependencies, blockedCommandsData, enqueueProperties, eventsRequest, eventBuilder, nullptr, &bcsCsr, multiRootEventSyncStamp);
|
||||
|
||||
if (gpgpuSubmission) {
|
||||
if (DebugManager.flags.ForceCsrLockInBcsEnqueueOnlyForGpgpuSubmission.get() == 1) {
|
||||
if (debugManager.flags.ForceCsrLockInBcsEnqueueOnlyForGpgpuSubmission.get() == 1) {
|
||||
commandStreamReceiverOwnership.unlock();
|
||||
}
|
||||
}
|
||||
@@ -1478,7 +1478,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDisp
|
||||
if (deferredMultiRootSyncNodes.get()) {
|
||||
csrDeps.copyRootDeviceSyncNodesToNewContainer(*deferredMultiRootSyncNodes);
|
||||
}
|
||||
if (DebugManager.flags.ForceCsrLockInBcsEnqueueOnlyForGpgpuSubmission.get() != 1) {
|
||||
if (debugManager.flags.ForceCsrLockInBcsEnqueueOnlyForGpgpuSubmission.get() != 1) {
|
||||
commandStreamReceiverOwnership.unlock();
|
||||
}
|
||||
queueOwnership.unlock();
|
||||
@@ -1542,7 +1542,7 @@ template <typename GfxFamily>
|
||||
bool CommandQueueHw<GfxFamily>::relaxedOrderingForGpgpuAllowed(uint32_t numWaitEvents) const {
|
||||
auto &gpgpuCsr = getGpgpuCommandStreamReceiver();
|
||||
|
||||
if ((DebugManager.flags.DirectSubmissionRelaxedOrdering.get() == 0) || gpgpuCsr.isRecyclingTagForHeapStorageRequired()) {
|
||||
if ((debugManager.flags.DirectSubmissionRelaxedOrdering.get() == 0) || gpgpuCsr.isRecyclingTagForHeapStorageRequired()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@@ -218,12 +218,12 @@ size_t EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(uint32_t eventType, c
|
||||
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeForCacheFlushAfterWalkerCommands(*multiDispatchInfo.peekMainKernel(), commandQueue);
|
||||
}
|
||||
|
||||
if (DebugManager.flags.PauseOnEnqueue.get() != -1) {
|
||||
if (debugManager.flags.PauseOnEnqueue.get() != -1) {
|
||||
expectedSizeCS += MemorySynchronizationCommands<GfxFamily>::getSizeForSingleBarrier(false) * 2;
|
||||
expectedSizeCS += NEO::EncodeSemaphore<GfxFamily>::getSizeMiSemaphoreWait() * 2;
|
||||
}
|
||||
|
||||
if (DebugManager.flags.GpuScratchRegWriteAfterWalker.get() != -1) {
|
||||
if (debugManager.flags.GpuScratchRegWriteAfterWalker.get() != -1) {
|
||||
expectedSizeCS += sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM);
|
||||
}
|
||||
expectedSizeCS += TimestampPacketHelper::getRequiredCmdStreamSizeForMultiRootDeviceSyncNodesContainer<GfxFamily>(csrDeps);
|
||||
|
||||
@@ -54,8 +54,8 @@ size_t GpgpuWalkerHelper<GfxFamily>::setGpgpuWalkerThreadData(
|
||||
walkerCmd->setSimdSize(getSimdConfig<WalkerType>(simd));
|
||||
walkerCmd->setMessageSimd(walkerCmd->getSimdSize());
|
||||
|
||||
if (DebugManager.flags.ForceSimdMessageSizeInWalker.get() != -1) {
|
||||
walkerCmd->setMessageSimd(DebugManager.flags.ForceSimdMessageSizeInWalker.get());
|
||||
if (debugManager.flags.ForceSimdMessageSizeInWalker.get() != -1) {
|
||||
walkerCmd->setMessageSimd(debugManager.flags.ForceSimdMessageSizeInWalker.get());
|
||||
}
|
||||
|
||||
walkerCmd->setThreadGroupIdStartingX(static_cast<uint32_t>(startWorkGroups[0]));
|
||||
@@ -112,7 +112,7 @@ void GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket(LinearStream *cmdStream,
|
||||
|
||||
EncodeDispatchKernel<GfxFamily>::template adjustTimestampPacket<WalkerType>(*walkerCmd, hwInfo);
|
||||
|
||||
if (DebugManager.flags.UseImmDataWriteModeOnPostSyncOperation.get()) {
|
||||
if (debugManager.flags.UseImmDataWriteModeOnPostSyncOperation.get()) {
|
||||
postSyncData.setOperation(GfxFamily::POSTSYNC_DATA::OPERATION::OPERATION_WRITE_IMMEDIATE_DATA);
|
||||
auto contextEndAddress = TimestampPacketHelper::getContextEndGpuAddress(*timestampPacketNode);
|
||||
postSyncData.setDestinationAddress(contextEndAddress);
|
||||
@@ -124,8 +124,8 @@ void GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket(LinearStream *cmdStream,
|
||||
}
|
||||
|
||||
if constexpr (std::is_same_v<WalkerType, typename GfxFamily::COMPUTE_WALKER>) {
|
||||
if (DebugManager.flags.OverrideSystolicInComputeWalker.get() != -1) {
|
||||
walkerCmd->setSystolicModeEnable((DebugManager.flags.OverrideSystolicInComputeWalker.get()));
|
||||
if (debugManager.flags.OverrideSystolicInComputeWalker.get() != -1) {
|
||||
walkerCmd->setSystolicModeEnable((debugManager.flags.OverrideSystolicInComputeWalker.get()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -134,7 +134,7 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
|
||||
dispatchProfilingPerfStartCommands(walkerArgs.hwTimeStamps, walkerArgs.hwPerfCounter, commandStream, commandQueue);
|
||||
|
||||
const auto &hwInfo = commandQueue.getDevice().getHardwareInfo();
|
||||
if (PauseOnGpuProperties::pauseModeAllowed(DebugManager.flags.PauseOnEnqueue.get(), commandQueue.getGpgpuCommandStreamReceiver().peekTaskCount(), PauseOnGpuProperties::PauseMode::BeforeWorkload)) {
|
||||
if (PauseOnGpuProperties::pauseModeAllowed(debugManager.flags.PauseOnEnqueue.get(), commandQueue.getGpgpuCommandStreamReceiver().peekTaskCount(), PauseOnGpuProperties::PauseMode::BeforeWorkload)) {
|
||||
dispatchDebugPauseCommands(commandStream, commandQueue, DebugPauseState::waitingForUserStartConfirmation,
|
||||
DebugPauseState::hasUserStartConfirmation, hwInfo);
|
||||
}
|
||||
@@ -157,13 +157,13 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
|
||||
dispatchInfo.dispatchEpilogueCommands(*commandStream, walkerArgs.timestampPacketDependencies, commandQueue.getDevice().getRootDeviceEnvironment());
|
||||
}
|
||||
|
||||
if (PauseOnGpuProperties::gpuScratchRegWriteAllowed(DebugManager.flags.GpuScratchRegWriteAfterWalker.get(), commandQueue.getGpgpuCommandStreamReceiver().peekTaskCount())) {
|
||||
uint32_t registerOffset = DebugManager.flags.GpuScratchRegWriteRegisterOffset.get();
|
||||
uint32_t registerData = DebugManager.flags.GpuScratchRegWriteRegisterData.get();
|
||||
if (PauseOnGpuProperties::gpuScratchRegWriteAllowed(debugManager.flags.GpuScratchRegWriteAfterWalker.get(), commandQueue.getGpgpuCommandStreamReceiver().peekTaskCount())) {
|
||||
uint32_t registerOffset = debugManager.flags.GpuScratchRegWriteRegisterOffset.get();
|
||||
uint32_t registerData = debugManager.flags.GpuScratchRegWriteRegisterData.get();
|
||||
LriHelper<GfxFamily>::program(commandStream, registerOffset, registerData, EncodeSetMMIO<GfxFamily>::isRemapApplicable(registerOffset));
|
||||
}
|
||||
|
||||
if (PauseOnGpuProperties::pauseModeAllowed(DebugManager.flags.PauseOnEnqueue.get(), commandQueue.getGpgpuCommandStreamReceiver().peekTaskCount(), PauseOnGpuProperties::PauseMode::AfterWorkload)) {
|
||||
if (PauseOnGpuProperties::pauseModeAllowed(debugManager.flags.PauseOnEnqueue.get(), commandQueue.getGpgpuCommandStreamReceiver().peekTaskCount(), PauseOnGpuProperties::PauseMode::AfterWorkload)) {
|
||||
dispatchDebugPauseCommands(commandStream, commandQueue, DebugPauseState::waitingForUserEndConfirmation,
|
||||
DebugPauseState::hasUserEndConfirmation, hwInfo);
|
||||
}
|
||||
|
||||
@@ -147,7 +147,7 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
||||
auto devices = queueCsr.getOsContext().getDeviceBitfield();
|
||||
auto partitionWalker = ImplicitScalingHelper::isImplicitScalingEnabled(devices, true);
|
||||
|
||||
if (timestampPacketNode && DebugManager.flags.PrintTimestampPacketUsage.get() == 1) {
|
||||
if (timestampPacketNode && debugManager.flags.PrintTimestampPacketUsage.get() == 1) {
|
||||
auto gpuVa = walkerArgs.currentTimestampPacketNodes->peekNodes()[walkerArgs.currentDispatchIndex]->getGpuAddress();
|
||||
printf("\nPID:%u, TSP used for Walker: 0x%" PRIX64 ", cmdBuffer pos: 0x%" PRIX64, SysCalls::getProcessId(), gpuVa, commandStream.getCurrentGpuAddressPosition());
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user