Add debug key to print direct submission ring buffer

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
Lukasz Jobczyk
2021-11-23 11:16:29 +00:00
committed by Compute-Runtime-Automation
parent d1059518a8
commit cc13d60f1d
4 changed files with 60 additions and 0 deletions

View File

@@ -97,6 +97,7 @@ DirectSubmissionDiagnosticExecutionCount = 30
DirectSubmissionNewResourceTlbFlush = -1
DirectSubmissionDisableCacheFlush = -1
DirectSubmissionDisableMonitorFence = -1
DirectSubmissionPrintBuffers = 0
USMEvictAfterMigration = 1
EnableDirectSubmissionController = -1
DirectSubmissionControllerTimeout = -1

View File

@@ -224,6 +224,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, OverrideKernelSizeLimitForSmallDispatch, -1, "-1
DECLARE_DEBUG_VARIABLE(int32_t, OverrideUseKmdWaitFunction, -1, "-1: default (L0: disabled), 0: disabled, 1: enabled. It uses only busy loop to wait or busy loop with KMD wait function, when KMD fallback is enabled")
/*DIRECT SUBMISSION FLAGS*/
DECLARE_DEBUG_VARIABLE(bool, DirectSubmissionPrintBuffers, false, "Print address of submitted command buffers")
DECLARE_DEBUG_VARIABLE(int32_t, EnableDirectSubmission, -1, "-1: default (disabled), 0: disable, 1:enable. Enables direct submission of command buffers bypassing KMD")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionBufferPlacement, -1, "-1: do not override, 0: non-system, 1: system")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionSemaphorePlacement, -1, "-1: do not override, 0: non-system, 1: system")

View File

@@ -86,6 +86,22 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::allocateResources() {
allocations.push_back(workPartitionAllocation);
}
if (DebugManager.flags.DirectSubmissionPrintBuffers.get()) {
printf("Ring buffer 1 - gpu address: %" PRIx64 " - %" PRIx64 ", cpu address: %p - %p, size: %zu \n",
ringBuffer->getGpuAddress(),
ptrOffset(ringBuffer->getGpuAddress(), ringBuffer->getUnderlyingBufferSize()),
ringBuffer->getUnderlyingBuffer(),
ptrOffset(ringBuffer->getUnderlyingBuffer(), ringBuffer->getUnderlyingBufferSize()),
ringBuffer->getUnderlyingBufferSize());
printf("Ring buffer 2 - gpu address: %" PRIx64 " - %" PRIx64 ", cpu address: %p - %p, size: %zu \n",
ringBuffer2->getGpuAddress(),
ptrOffset(ringBuffer2->getGpuAddress(), ringBuffer2->getUnderlyingBufferSize()),
ringBuffer2->getUnderlyingBuffer(),
ptrOffset(ringBuffer2->getUnderlyingBuffer(), ringBuffer2->getUnderlyingBufferSize()),
ringBuffer2->getUnderlyingBufferSize());
}
handleResidency();
ringCommandStream.replaceBuffer(ringBuffer->getUnderlyingBuffer(), minimumRequiredSize);
ringCommandStream.replaceGraphicsAllocation(ringBuffer);
@@ -320,6 +336,23 @@ template <typename GfxFamily, typename Dispatcher>
void *DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchWorkloadSection(BatchBuffer &batchBuffer) {
void *currentPosition = ringCommandStream.getSpace(0);
if (DebugManager.flags.DirectSubmissionPrintBuffers.get()) {
printf("Client buffer:\n");
printf("Command buffer allocation - gpu address: %" PRIx64 " - %" PRIx64 ", cpu address: %p - %p, size: %zu \n",
batchBuffer.commandBufferAllocation->getGpuAddress(),
ptrOffset(batchBuffer.commandBufferAllocation->getGpuAddress(), batchBuffer.commandBufferAllocation->getUnderlyingBufferSize()),
batchBuffer.commandBufferAllocation->getUnderlyingBuffer(),
ptrOffset(batchBuffer.commandBufferAllocation->getUnderlyingBuffer(), batchBuffer.commandBufferAllocation->getUnderlyingBufferSize()),
batchBuffer.commandBufferAllocation->getUnderlyingBufferSize());
printf("Command buffer - start gpu address: %" PRIx64 " - %" PRIx64 ", start cpu address: %p - %p, start offset: %zu, used size: %zu \n",
ptrOffset(batchBuffer.commandBufferAllocation->getGpuAddress(), batchBuffer.startOffset),
ptrOffset(ptrOffset(batchBuffer.commandBufferAllocation->getGpuAddress(), batchBuffer.startOffset), batchBuffer.usedSize),
ptrOffset(batchBuffer.commandBufferAllocation->getUnderlyingBuffer(), batchBuffer.startOffset),
ptrOffset(ptrOffset(batchBuffer.commandBufferAllocation->getUnderlyingBuffer(), batchBuffer.startOffset), batchBuffer.usedSize),
batchBuffer.startOffset,
batchBuffer.usedSize);
}
if (workloadMode == 0) {
auto commandStreamAddress = ptrOffset(batchBuffer.commandBufferAllocation->getGpuAddress(), batchBuffer.startOffset);
void *returnCmd = batchBuffer.endCmdPtr;

View File

@@ -892,6 +892,31 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
EXPECT_TRUE(directSubmission.ringStart);
}
HWTEST_F(DirectSubmissionDispatchBufferTest, givenDirectSubmissionPrintBuffersWhenInitializeAndDispatchBufferThenCommandBufferArePrinted) {
DebugManagerStateRestore restorer;
DebugManager.flags.DirectSubmissionPrintBuffers.set(true);
FlushStampTracker flushStamp(true);
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice,
*osContext.get());
testing::internal::CaptureStdout();
bool ret = directSubmission.initialize(false);
EXPECT_TRUE(ret);
ret = directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
EXPECT_TRUE(ret);
std::string output = testing::internal::GetCapturedStdout();
auto pos = output.find("Ring buffer 1");
EXPECT_TRUE(pos != std::string::npos);
pos = output.find("Ring buffer 2");
EXPECT_TRUE(pos != std::string::npos);
pos = output.find("Client buffer");
EXPECT_TRUE(pos != std::string::npos);
}
HWTEST_F(DirectSubmissionTest, givenSuperBaseCsrWhenCheckingDirectSubmissionAvailableThenReturnFalse) {
VariableBackup<UltHwConfig> backup(&ultHwConfig);
ultHwConfig.csrSuperBaseCallDirectSubmissionAvailable = true;