refactor: add debug prints for timestamp waits

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2024-08-30 17:33:28 +00:00
committed by Compute-Runtime-Automation
parent 8a80673ca7
commit 847180dc9a
4 changed files with 122 additions and 0 deletions

View File

@@ -142,6 +142,7 @@ bool CommandQueueHw<Family>::isCacheFlushForBcsRequired() const {
template <typename TSPacketType>
inline bool waitForTimestampsWithinContainer(TimestampPacketContainer *container, CommandStreamReceiver &csr, WaitStatus &status) {
bool printWaitForCompletion = debugManager.flags.LogWaitingForCompletion.get();
bool waited = false;
status = WaitStatus::notReady;
@@ -149,14 +150,23 @@ inline bool waitForTimestampsWithinContainer(TimestampPacketContainer *container
auto lastHangCheckTime = std::chrono::high_resolution_clock::now();
for (const auto &timestamp : container->peekNodes()) {
for (uint32_t i = 0; i < timestamp->getPacketsUsed(); i++) {
if (printWaitForCompletion) {
printf("\nWaiting for TS 0x%" PRIx64, timestamp->getGpuAddress() + (i * timestamp->getSinglePacketSize()));
}
while (timestamp->getContextEndValue(i) == 1) {
csr.downloadAllocation(*timestamp->getBaseGraphicsAllocation()->getGraphicsAllocation(csr.getRootDeviceIndex()));
WaitUtils::waitFunctionWithPredicate<const TSPacketType>(static_cast<TSPacketType const *>(timestamp->getContextEndAddress(i)), 1u, std::not_equal_to<TSPacketType>());
if (csr.checkGpuHangDetected(std::chrono::high_resolution_clock::now(), lastHangCheckTime)) {
status = WaitStatus::gpuHang;
if (printWaitForCompletion) {
printf("\nWaiting for TS failed");
}
return false;
}
}
if (printWaitForCompletion) {
printf("\nWaiting for TS completed");
}
status = WaitStatus::ready;
waited = true;
}

View File

@@ -824,14 +824,25 @@ bool Event::isWaitForTimestampsEnabled() const {
bool Event::areTimestampsCompleted() {
if (this->timestampPacketContainer.get()) {
if (this->isWaitForTimestampsEnabled()) {
bool printWaitForCompletion = debugManager.flags.LogWaitingForCompletion.get();
for (const auto &timestamp : this->timestampPacketContainer->peekNodes()) {
for (uint32_t i = 0; i < timestamp->getPacketsUsed(); i++) {
if (printWaitForCompletion) {
printf("\nChecking TS 0x%" PRIx64, timestamp->getGpuAddress() + (i * timestamp->getSinglePacketSize()));
}
this->cmdQueue->getGpgpuCommandStreamReceiver().downloadAllocation(*timestamp->getBaseGraphicsAllocation()->getGraphicsAllocation(this->cmdQueue->getGpgpuCommandStreamReceiver().getRootDeviceIndex()));
if (timestamp->getContextEndValue(i) == 1) {
if (printWaitForCompletion) {
printf("\nTS not ready");
}
return false;
}
}
}
if (printWaitForCompletion) {
printf("\nTS ready");
}
this->cmdQueue->getGpgpuCommandStreamReceiver().downloadAllocations(true);
const auto &bcsStates = this->cmdQueue->peekActiveBcsStates();
for (auto currentBcsIndex = 0u; currentBcsIndex < bcsStates.size(); currentBcsIndex++) {

View File

@@ -35,6 +35,7 @@ struct MockEvent : public BaseEventType {
FORWARD_FUNC(submitCommand, BaseEventType);
using BaseEventType::timeStampNode;
using Event::areTimestampsCompleted;
using Event::calcProfilingData;
using Event::cmdToSubmit;
using Event::isWaitForTimestampsEnabled;

View File

@@ -579,6 +579,106 @@ HWTEST_F(ProfilingTests, givenNonKernelEnqueueWhenNonBlockedEnqueueThenSetCpuPat
eventObj->release();
}
HWTEST_F(ProfilingTests, givenDebugFlagSetWhenWaitingForTimestampThenPrint) {
DebugManagerStateRestore restorer;
debugManager.flags.LogWaitingForCompletion.set(true);
debugManager.flags.EnableTimestampWaitForQueues.set(4);
debugManager.flags.EnableTimestampWaitForEvents.set(4);
using TimestampPacketType = typename FamilyType::TimestampPacketType;
using TimestampPacketsT = TimestampPackets<TimestampPacketType, FamilyType::timestampPacketCount>;
MockGraphicsAllocation alloc;
MultiGraphicsAllocation multiAlloc(1);
multiAlloc.addAllocation(&alloc);
struct MyMockTagNode : public TagNode<TimestampPacketsT> {
public:
using TagNode<TimestampPacketsT>::gfxAllocation;
MyMockTagNode(MultiGraphicsAllocation *alloc) {
this->gfxAllocation = alloc;
}
uint64_t getContextEndValue(uint32_t packetIndex) const override {
EXPECT_NE(0u, failCountdown);
failCountdown--;
if (failCountdown == 0) {
storage = 123;
}
return storage;
}
void const *getContextEndAddress(uint32_t packetIndex) const override {
return &storage;
}
void returnTag() override {}
mutable uint32_t failCountdown = 2;
mutable uint32_t storage = 1;
};
auto node = std::make_unique<MyMockTagNode>(&multiAlloc);
auto queue = std::make_unique<MockCommandQueueHw<FamilyType>>(pCmdQ->getContextPtr(), pClDevice, nullptr);
queue->timestampPacketContainer = std::make_unique<TimestampPacketContainer>();
auto container = queue->timestampPacketContainer.get();
testing::internal::CaptureStdout();
Range<CopyEngineState> copyEngineStates;
WaitStatus status;
container->add(node.get());
queue->waitForTimestamps(copyEngineStates, status, container, nullptr);
std::string output = testing::internal::GetCapturedStdout();
EXPECT_NE(output.npos, output.find("Waiting for TS 0x"));
EXPECT_NE(output.npos, output.find("Waiting for TS completed"));
EXPECT_EQ(output.npos, output.find("Waiting for TS failed"));
testing::internal::CaptureStdout();
auto &csr = static_cast<UltCommandStreamReceiver<FamilyType> &>(queue->getGpgpuCommandStreamReceiver());
csr.forceReturnGpuHang = true;
node->failCountdown = 2;
node->storage = 1;
queue->waitForTimestamps(copyEngineStates, status, container, nullptr);
output = testing::internal::GetCapturedStdout();
EXPECT_NE(output.npos, output.find("Waiting for TS 0x"));
EXPECT_EQ(output.npos, output.find("Waiting for TS completed"));
EXPECT_NE(output.npos, output.find("Waiting for TS failed"));
MockEvent<Event> event(queue.get(), CL_COMMAND_READ_BUFFER, 0, 0);
event.timestampPacketContainer = std::make_unique<TimestampPacketContainer>();
event.timestampPacketContainer->add(node.get());
testing::internal::CaptureStdout();
node->failCountdown = 2;
node->storage = 1;
event.areTimestampsCompleted();
output = testing::internal::GetCapturedStdout();
EXPECT_NE(output.npos, output.find("Checking TS 0x"));
EXPECT_EQ(output.npos, output.find("TS ready"));
EXPECT_NE(output.npos, output.find("TS not ready"));
testing::internal::CaptureStdout();
event.areTimestampsCompleted();
output = testing::internal::GetCapturedStdout();
EXPECT_NE(output.npos, output.find("Checking TS 0x"));
EXPECT_NE(output.npos, output.find("TS ready"));
EXPECT_EQ(output.npos, output.find("TS not ready"));
}
using EventProfilingTest = ProfilingTests;
HWCMDTEST_F(IGFX_GEN8_CORE, EventProfilingTest, givenEventWhenCompleteIsZeroThenCalcProfilingDataSetsEndTimestampInCompleteTimestampAndDoesntCallOsTimeMethods) {