Dont use Packets without profiling data to calculate kernel duration

Change-Id: I710348835f8884a3b244502f53ff4e4980441654
Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2020-09-29 14:35:23 +02:00
committed by sys_ocldev
parent 77f88f9a96
commit 44a6d70ced
6 changed files with 43 additions and 2 deletions

View File

@@ -519,6 +519,7 @@ void CommandQueueHw<GfxFamily>::processDispatchForCacheFlush(Surface **surfaces,
uint64_t postSyncAddress = 0;
if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
auto timestampPacketNodeForPostSync = timestampPacketContainer->peekNodes().at(0);
timestampPacketNodeForPostSync->setProfilingCapable(false);
postSyncAddress = TimestampPacketHelper::getContextEndGpuAddress(*timestampPacketNodeForPostSync);
}

View File

@@ -143,6 +143,7 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
uint64_t postSyncAddress = 0;
if (commandQueue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
auto timestampPacketNodeForPostSync = currentTimestampPacketNodes->peekNodes().at(currentDispatchIndex);
timestampPacketNodeForPostSync->setProfilingCapable(false);
postSyncAddress = TimestampPacketHelper::getContextEndGpuAddress(*timestampPacketNodeForPostSync);
}
HardwareCommandsHelper<GfxFamily>::programCacheFlushAfterWalkerCommand(commandStream, commandQueue, mainKernel, postSyncAddress);

View File

@@ -256,9 +256,11 @@ bool Event::calcProfilingData() {
if (DebugManager.flags.PrintTimestampPacketContents.get()) {
for (auto i = 0u; i < timestamps.size(); i++) {
std::cout << "Timestamp " << i << ", "
<< "profiling capable: " << timestamps[i]->isProfilingCapable() << ", ";
for (auto j = 0u; j < timestamps[i]->tagForCpuAccess->packetsUsed; j++) {
const auto &packet = timestamps[i]->tagForCpuAccess->packets[j];
std::cout << "Timestamp " << i << ", packet " << j << ": "
std::cout << "packet " << j << ": "
<< "global start: " << packet.globalStart << ", "
<< "global end: " << packet.globalEnd << ", "
<< "context start: " << packet.contextStart << ", "
@@ -271,6 +273,9 @@ bool Event::calcProfilingData() {
uint64_t globalEndTS = timestamps[0]->tagForCpuAccess->packets[0].globalEnd;
for (const auto &timestamp : timestamps) {
if (!timestamp->isProfilingCapable()) {
continue;
}
for (auto i = 0u; i < timestamp->tagForCpuAccess->packetsUsed; ++i) {
const auto &packet = timestamp->tagForCpuAccess->packets[i];
if (globalStartTS > packet.globalStart) {

View File

@@ -1123,6 +1123,30 @@ TEST_F(ProfilingTimestampPacketsTest, givenMultiOsContextCapableSetToTrueWhenCal
EXPECT_EQ(350u, ev->getEndTimeStamp());
}
TEST_F(ProfilingTimestampPacketsTest, givenTimestampPacketWithoutProfilingDataWhenCalculatingThenDontUseThatPacket) {
int globalStart0 = 20;
int globalEnd0 = 51;
int contextStart0 = 21;
int contextEnd0 = 50;
int globalStart1 = globalStart0 - 1;
int globalEnd1 = globalEnd0 + 1;
int contextStart1 = contextStart0 - 1;
int contextEnd1 = contextEnd0 + 1;
addTimestampNodeMultiOsContext(&globalStart0, &globalEnd0, &contextStart0, &contextEnd0, 1);
addTimestampNodeMultiOsContext(&globalStart1, &globalEnd1, &contextStart1, &contextEnd1, 1);
auto &device = reinterpret_cast<MockDevice &>(cmdQ->getDevice());
auto &csr = device.getUltCommandStreamReceiver<DEFAULT_TEST_FAMILY_NAME>();
csr.multiOsContextCapable = true;
ev->timestampPacketContainer->peekNodes()[1]->setProfilingCapable(false);
ev->calcProfilingData();
EXPECT_EQ(static_cast<uint64_t>(globalStart0), ev->getStartTimeStamp());
EXPECT_EQ(static_cast<uint64_t>(globalEnd0), ev->getEndTimeStamp());
}
TEST_F(ProfilingTimestampPacketsTest, givenPrintTimestampPacketContentsSetWhenCalcProfilingDataThenTimeStampsArePrinted) {
DebugManagerStateRestore restorer;
DebugManager.flags.PrintTimestampPacketContents.set(true);
@@ -1148,8 +1172,10 @@ TEST_F(ProfilingTimestampPacketsTest, givenPrintTimestampPacketContentsSetWhenCa
std::string output = testing::internal::GetCapturedStdout();
std::stringstream expected;
expected << "Timestamp 0, profiling capable: " << ev->timestampPacketContainer->peekNodes()[0]->isProfilingCapable() << ", ";
for (int i = 0; i < 16; i++) {
expected << "Timestamp 0, packet " << i << ": "
expected << "packet " << i << ": "
<< "global start: " << globalStart[i] << ", "
<< "global end: " << globalEnd[i] << ", "
<< "context start: " << contextStart[i] << ", "

View File

@@ -310,10 +310,12 @@ TEST_F(TagAllocatorTest, whenNewTagIsTakenThenItIsInitialized) {
MockTagAllocator<TimeStamps> tagAllocator(memoryManager, 1, 2, deviceBitfield);
tagAllocator.getFreeTagsHead()->tagForCpuAccess->start = 3;
tagAllocator.getFreeTagsHead()->tagForCpuAccess->end = 4;
tagAllocator.getFreeTagsHead()->setProfilingCapable(false);
auto node = tagAllocator.getTag();
EXPECT_EQ(1u, node->tagForCpuAccess->start);
EXPECT_EQ(2u, node->tagForCpuAccess->end);
EXPECT_TRUE(node->isProfilingCapable());
}
TEST_F(TagAllocatorTest, givenMultipleReferencesOnTagWhenReleasingThenReturnWhenAllRefCountsAreReleased) {

View File

@@ -46,11 +46,16 @@ struct TagNode : public IDNode<TagNode<TagType>>, NonCopyableOrMovableClass {
doNotReleaseNodes = doNotRelease;
}
void setProfilingCapable(bool capable) { profilingCapable = capable; }
bool isProfilingCapable() const { return profilingCapable; }
void incImplicitCpuDependenciesCount() { implicitCpuDependenciesCount++; }
void initialize() {
tagForCpuAccess->initialize();
implicitCpuDependenciesCount.store(0);
setProfilingCapable(true);
}
uint32_t getImplicitCpuDependenciesCount() const { return implicitCpuDependenciesCount.load(); }
@@ -64,6 +69,7 @@ struct TagNode : public IDNode<TagNode<TagType>>, NonCopyableOrMovableClass {
std::atomic<uint32_t> refCount{0};
std::atomic<uint32_t> implicitCpuDependenciesCount{0};
bool doNotReleaseNodes = false;
bool profilingCapable = true;
template <typename TagType2>
friend class TagAllocator;