add multi-tile support for completion diagnostic

Related-To: NEO-6871

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz 2022-04-21 17:41:33 +00:00 committed by Compute-Runtime-Automation
parent db178a9604
commit 910871a706
4 changed files with 71 additions and 10 deletions

View File

@ -323,6 +323,11 @@ void CommandStreamReceiver::cleanupResources() {
}
WaitStatus CommandStreamReceiver::waitForCompletionWithTimeout(const WaitParams &params, uint32_t taskCountToWait) {
bool printWaitForCompletion = DebugManager.flags.LogWaitingForCompletion.get();
if (printWaitForCompletion) {
printTagAddressContent(taskCountToWait, params.waitTimeout, true);
}
uint32_t latestSentTaskCount = this->latestFlushedTaskCount;
if (latestSentTaskCount < taskCountToWait) {
if (!this->flushBatchedSubmissions()) {
@ -331,7 +336,11 @@ WaitStatus CommandStreamReceiver::waitForCompletionWithTimeout(const WaitParams
}
}
return baseWaitFunction(getTagAddress(), params, taskCountToWait);
auto retCode = baseWaitFunction(getTagAddress(), params, taskCountToWait);
if (printWaitForCompletion) {
printTagAddressContent(taskCountToWait, params.waitTimeout, false);
}
return retCode;
}
WaitStatus CommandStreamReceiver::baseWaitFunction(volatile uint32_t *pollAddress, const WaitParams &params, uint32_t taskCountToWait) {
@ -865,4 +874,21 @@ bool CommandStreamReceiver::createPerDssBackedBuffer(Device &device) {
return perDssBackedBuffer != nullptr;
}
void CommandStreamReceiver::printTagAddressContent(uint32_t taskCountToWait, int64_t waitTimeout, bool start) {
auto postSyncAddress = getTagAddress();
if (start) {
PRINT_DEBUG_STRING(true, stdout,
"\nWaiting for task count %u at location %p with timeout %llx. Current value:",
taskCountToWait, postSyncAddress, waitTimeout);
} else {
PRINT_DEBUG_STRING(true, stdout,
"%s", "\nWaiting completed. Current value:");
}
for (uint32_t i = 0; i < activePartitions; i++) {
PRINT_DEBUG_STRING(true, stdout, " %u", *postSyncAddress);
postSyncAddress = ptrOffset(postSyncAddress, this->postSyncWriteOffset);
}
PRINT_DEBUG_STRING(true, stdout, "%s", "\n");
}
} // namespace NEO

View File

@ -342,6 +342,7 @@ class CommandStreamReceiver {
void checkForNewResources(uint32_t submittedTaskCount, uint32_t allocationTaskCount, GraphicsAllocation &gfxAllocation);
bool checkImplicitFlushForGpuIdle();
void downloadTagAllocation(uint32_t taskCountToWait);
void printTagAddressContent(uint32_t taskCountToWait, int64_t waitTimeout, bool start);
MOCKABLE_VIRTUAL std::unique_lock<MutexType> obtainHostPtrSurfaceCreationLock();
std::unique_ptr<FlushStampTracker> flushStamp;

View File

@ -871,10 +871,6 @@ inline WaitStatus CommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNot
const auto params = kmdNotifyHelper->obtainTimeoutParams(useQuickKmdSleep, *getTagAddress(), taskCountToWait, flushStampToWait, throttle, this->isKmdWaitModeActive(),
this->isAnyDirectSubmissionEnabled());
PRINT_DEBUG_STRING(DebugManager.flags.LogWaitingForCompletion.get(), stdout,
"\nWaiting for task count %u at location %p. Current value: %u\n",
taskCountToWait, getTagAddress(), *getTagAddress());
auto status = waitForCompletionWithTimeout(params, taskCountToWait);
if (status == WaitStatus::NotReady) {
waitForFlushStamp(flushStampToWait);
@ -887,15 +883,13 @@ inline WaitStatus CommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNot
return status;
}
UNRECOVERABLE_IF(*getTagAddress() < taskCountToWait);
for (uint32_t i = 0; i < this->activePartitions; i++) {
UNRECOVERABLE_IF(*(ptrOffset(getTagAddress(), (i * this->postSyncWriteOffset))) < taskCountToWait);
}
if (kmdNotifyHelper->quickKmdSleepForSporadicWaitsEnabled()) {
kmdNotifyHelper->updateLastWaitForCompletionTimestamp();
}
PRINT_DEBUG_STRING(DebugManager.flags.LogWaitingForCompletion.get(), stdout,
"\nWaiting completed. Current value: %u\n", *getTagAddress());
return WaitStatus::Ready;
}

View File

@ -2001,3 +2001,43 @@ TEST(CreateWorkPartitionAllocationTest, givenEnabledBlitterWhenInitializingWorkP
EXPECT_TRUE(retVal);
EXPECT_EQ(0u, memoryManager->copyMemoryToAllocationBanksCalled);
}
HWTEST_F(CommandStreamReceiverTest, givenMultipleActivePartitionsWhenWaitLogIsEnabledThenPrintTagValueForAllPartitions) {
DebugManagerStateRestore restorer;
DebugManager.flags.LogWaitingForCompletion.set(true);
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
csr.activePartitions = 2;
volatile uint32_t *tagAddress = csr.tagAddress;
constexpr uint32_t tagValue = 2;
*tagAddress = tagValue;
tagAddress = ptrOffset(tagAddress, csr.postSyncWriteOffset);
*tagAddress = tagValue;
WaitParams waitParams;
waitParams.waitTimeout = std::numeric_limits<int64_t>::max();
constexpr uint32_t taskCount = 1;
testing::internal::CaptureStdout();
WaitStatus status = csr.waitForCompletionWithTimeout(waitParams, taskCount);
EXPECT_EQ(WaitStatus::Ready, status);
std::string output = testing::internal::GetCapturedStdout();
std::stringstream expectedOutput;
expectedOutput << std::endl
<< "Waiting for task count " << taskCount
<< " at location " << const_cast<uint32_t *>(csr.tagAddress)
<< " with timeout " << std::hex << waitParams.waitTimeout
<< ". Current value: " << std::dec << tagValue
<< " " << tagValue
<< std::endl
<< std::endl
<< "Waiting completed. Current value: " << tagValue
<< " " << tagValue << std::endl;
EXPECT_STREQ(expectedOutput.str().c_str(), output.c_str());
}