mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-11 08:07:19 +08:00
refactor: add debug prints for timestamp waits
Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
8a80673ca7
commit
847180dc9a
@@ -142,6 +142,7 @@ bool CommandQueueHw<Family>::isCacheFlushForBcsRequired() const {
|
||||
|
||||
template <typename TSPacketType>
|
||||
inline bool waitForTimestampsWithinContainer(TimestampPacketContainer *container, CommandStreamReceiver &csr, WaitStatus &status) {
|
||||
bool printWaitForCompletion = debugManager.flags.LogWaitingForCompletion.get();
|
||||
bool waited = false;
|
||||
status = WaitStatus::notReady;
|
||||
|
||||
@@ -149,14 +150,23 @@ inline bool waitForTimestampsWithinContainer(TimestampPacketContainer *container
|
||||
auto lastHangCheckTime = std::chrono::high_resolution_clock::now();
|
||||
for (const auto ×tamp : container->peekNodes()) {
|
||||
for (uint32_t i = 0; i < timestamp->getPacketsUsed(); i++) {
|
||||
if (printWaitForCompletion) {
|
||||
printf("\nWaiting for TS 0x%" PRIx64, timestamp->getGpuAddress() + (i * timestamp->getSinglePacketSize()));
|
||||
}
|
||||
while (timestamp->getContextEndValue(i) == 1) {
|
||||
csr.downloadAllocation(*timestamp->getBaseGraphicsAllocation()->getGraphicsAllocation(csr.getRootDeviceIndex()));
|
||||
WaitUtils::waitFunctionWithPredicate<const TSPacketType>(static_cast<TSPacketType const *>(timestamp->getContextEndAddress(i)), 1u, std::not_equal_to<TSPacketType>());
|
||||
if (csr.checkGpuHangDetected(std::chrono::high_resolution_clock::now(), lastHangCheckTime)) {
|
||||
status = WaitStatus::gpuHang;
|
||||
if (printWaitForCompletion) {
|
||||
printf("\nWaiting for TS failed");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (printWaitForCompletion) {
|
||||
printf("\nWaiting for TS completed");
|
||||
}
|
||||
status = WaitStatus::ready;
|
||||
waited = true;
|
||||
}
|
||||
|
||||
@@ -824,14 +824,25 @@ bool Event::isWaitForTimestampsEnabled() const {
|
||||
bool Event::areTimestampsCompleted() {
|
||||
if (this->timestampPacketContainer.get()) {
|
||||
if (this->isWaitForTimestampsEnabled()) {
|
||||
bool printWaitForCompletion = debugManager.flags.LogWaitingForCompletion.get();
|
||||
|
||||
for (const auto ×tamp : this->timestampPacketContainer->peekNodes()) {
|
||||
for (uint32_t i = 0; i < timestamp->getPacketsUsed(); i++) {
|
||||
if (printWaitForCompletion) {
|
||||
printf("\nChecking TS 0x%" PRIx64, timestamp->getGpuAddress() + (i * timestamp->getSinglePacketSize()));
|
||||
}
|
||||
this->cmdQueue->getGpgpuCommandStreamReceiver().downloadAllocation(*timestamp->getBaseGraphicsAllocation()->getGraphicsAllocation(this->cmdQueue->getGpgpuCommandStreamReceiver().getRootDeviceIndex()));
|
||||
if (timestamp->getContextEndValue(i) == 1) {
|
||||
if (printWaitForCompletion) {
|
||||
printf("\nTS not ready");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (printWaitForCompletion) {
|
||||
printf("\nTS ready");
|
||||
}
|
||||
this->cmdQueue->getGpgpuCommandStreamReceiver().downloadAllocations(true);
|
||||
const auto &bcsStates = this->cmdQueue->peekActiveBcsStates();
|
||||
for (auto currentBcsIndex = 0u; currentBcsIndex < bcsStates.size(); currentBcsIndex++) {
|
||||
|
||||
@@ -35,6 +35,7 @@ struct MockEvent : public BaseEventType {
|
||||
FORWARD_FUNC(submitCommand, BaseEventType);
|
||||
|
||||
using BaseEventType::timeStampNode;
|
||||
using Event::areTimestampsCompleted;
|
||||
using Event::calcProfilingData;
|
||||
using Event::cmdToSubmit;
|
||||
using Event::isWaitForTimestampsEnabled;
|
||||
|
||||
@@ -579,6 +579,106 @@ HWTEST_F(ProfilingTests, givenNonKernelEnqueueWhenNonBlockedEnqueueThenSetCpuPat
|
||||
eventObj->release();
|
||||
}
|
||||
|
||||
HWTEST_F(ProfilingTests, givenDebugFlagSetWhenWaitingForTimestampThenPrint) {
|
||||
DebugManagerStateRestore restorer;
|
||||
debugManager.flags.LogWaitingForCompletion.set(true);
|
||||
debugManager.flags.EnableTimestampWaitForQueues.set(4);
|
||||
debugManager.flags.EnableTimestampWaitForEvents.set(4);
|
||||
|
||||
using TimestampPacketType = typename FamilyType::TimestampPacketType;
|
||||
using TimestampPacketsT = TimestampPackets<TimestampPacketType, FamilyType::timestampPacketCount>;
|
||||
|
||||
MockGraphicsAllocation alloc;
|
||||
MultiGraphicsAllocation multiAlloc(1);
|
||||
multiAlloc.addAllocation(&alloc);
|
||||
|
||||
struct MyMockTagNode : public TagNode<TimestampPacketsT> {
|
||||
public:
|
||||
using TagNode<TimestampPacketsT>::gfxAllocation;
|
||||
MyMockTagNode(MultiGraphicsAllocation *alloc) {
|
||||
this->gfxAllocation = alloc;
|
||||
}
|
||||
uint64_t getContextEndValue(uint32_t packetIndex) const override {
|
||||
EXPECT_NE(0u, failCountdown);
|
||||
failCountdown--;
|
||||
|
||||
if (failCountdown == 0) {
|
||||
storage = 123;
|
||||
}
|
||||
|
||||
return storage;
|
||||
}
|
||||
|
||||
void const *getContextEndAddress(uint32_t packetIndex) const override {
|
||||
return &storage;
|
||||
}
|
||||
|
||||
void returnTag() override {}
|
||||
|
||||
mutable uint32_t failCountdown = 2;
|
||||
mutable uint32_t storage = 1;
|
||||
};
|
||||
|
||||
auto node = std::make_unique<MyMockTagNode>(&multiAlloc);
|
||||
|
||||
auto queue = std::make_unique<MockCommandQueueHw<FamilyType>>(pCmdQ->getContextPtr(), pClDevice, nullptr);
|
||||
queue->timestampPacketContainer = std::make_unique<TimestampPacketContainer>();
|
||||
auto container = queue->timestampPacketContainer.get();
|
||||
|
||||
testing::internal::CaptureStdout();
|
||||
|
||||
Range<CopyEngineState> copyEngineStates;
|
||||
WaitStatus status;
|
||||
|
||||
container->add(node.get());
|
||||
|
||||
queue->waitForTimestamps(copyEngineStates, status, container, nullptr);
|
||||
|
||||
std::string output = testing::internal::GetCapturedStdout();
|
||||
|
||||
EXPECT_NE(output.npos, output.find("Waiting for TS 0x"));
|
||||
EXPECT_NE(output.npos, output.find("Waiting for TS completed"));
|
||||
EXPECT_EQ(output.npos, output.find("Waiting for TS failed"));
|
||||
|
||||
testing::internal::CaptureStdout();
|
||||
|
||||
auto &csr = static_cast<UltCommandStreamReceiver<FamilyType> &>(queue->getGpgpuCommandStreamReceiver());
|
||||
csr.forceReturnGpuHang = true;
|
||||
node->failCountdown = 2;
|
||||
node->storage = 1;
|
||||
queue->waitForTimestamps(copyEngineStates, status, container, nullptr);
|
||||
|
||||
output = testing::internal::GetCapturedStdout();
|
||||
|
||||
EXPECT_NE(output.npos, output.find("Waiting for TS 0x"));
|
||||
EXPECT_EQ(output.npos, output.find("Waiting for TS completed"));
|
||||
EXPECT_NE(output.npos, output.find("Waiting for TS failed"));
|
||||
|
||||
MockEvent<Event> event(queue.get(), CL_COMMAND_READ_BUFFER, 0, 0);
|
||||
event.timestampPacketContainer = std::make_unique<TimestampPacketContainer>();
|
||||
|
||||
event.timestampPacketContainer->add(node.get());
|
||||
|
||||
testing::internal::CaptureStdout();
|
||||
node->failCountdown = 2;
|
||||
node->storage = 1;
|
||||
event.areTimestampsCompleted();
|
||||
|
||||
output = testing::internal::GetCapturedStdout();
|
||||
|
||||
EXPECT_NE(output.npos, output.find("Checking TS 0x"));
|
||||
EXPECT_EQ(output.npos, output.find("TS ready"));
|
||||
EXPECT_NE(output.npos, output.find("TS not ready"));
|
||||
|
||||
testing::internal::CaptureStdout();
|
||||
event.areTimestampsCompleted();
|
||||
output = testing::internal::GetCapturedStdout();
|
||||
|
||||
EXPECT_NE(output.npos, output.find("Checking TS 0x"));
|
||||
EXPECT_NE(output.npos, output.find("TS ready"));
|
||||
EXPECT_EQ(output.npos, output.find("TS not ready"));
|
||||
}
|
||||
|
||||
using EventProfilingTest = ProfilingTests;
|
||||
|
||||
HWCMDTEST_F(IGFX_GEN8_CORE, EventProfilingTest, givenEventWhenCompleteIsZeroThenCalcProfilingDataSetsEndTimestampInCompleteTimestampAndDoesntCallOsTimeMethods) {
|
||||
|
||||
Reference in New Issue
Block a user