Change event algorithm to limit cache flush operation to single kernel packet

Related-To: NEO-6871

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz 2022-05-21 18:53:47 +00:00 committed by Compute-Runtime-Automation
parent 66c1727398
commit 7ca20ceb4b
6 changed files with 141 additions and 18 deletions

View File

@ -105,7 +105,7 @@ void programEventL3Flush(ze_event_handle_t hEvent,
event->setPacketsInUse(event->getPacketsUsedInLastKernel() + 1);
}
event->l3FlushWaApplied = true;
event->setL3FlushForCurrentKernel();
auto &cmdListStream = *commandContainer.getCommandStream();
NEO::PipeControlArgs args;

View File

@ -15,6 +15,7 @@
#include "level_zero/core/source/driver/driver_handle.h"
#include <level_zero/ze_api.h>
#include <bitset>
#include <chrono>
#include <limits>
@ -106,6 +107,12 @@ struct Event : _ze_event_handle_t {
void zeroKernelCount() {
kernelCount = 0;
}
bool getL3FlushForCurrenKernel() {
return l3FlushAppliedOnKernel.test(kernelCount - 1);
}
void setL3FlushForCurrentKernel() {
l3FlushAppliedOnKernel.set(kernelCount - 1);
}
uint64_t globalStartTS;
uint64_t globalEndTS;
@ -121,9 +128,9 @@ struct Event : _ze_event_handle_t {
ze_event_scope_flags_t signalScope = 0u;
ze_event_scope_flags_t waitScope = 0u;
bool l3FlushWaApplied = false;
protected:
std::bitset<EventPacketsCount::maxKernelSplit> l3FlushAppliedOnKernel;
size_t contextStartOffset = 0u;
size_t contextEndOffset = 0u;
size_t globalStartOffset = 0u;

View File

@ -68,22 +68,22 @@ ze_result_t EventImp<TagSizeT>::calculateProfilingData() {
contextStartTS = kernelEventCompletionData[0].getContextStartValue(0);
contextEndTS = kernelEventCompletionData[0].getContextEndValue(0);
for (uint32_t i = 0; i < kernelCount; i++) {
for (auto packetId = 0u; packetId < kernelEventCompletionData[i].getPacketsUsed(); packetId++) {
if (this->l3FlushWaApplied && ((packetId % skipL3EventPacketIndex) != 0)) {
for (uint32_t kernelId = 0; kernelId < kernelCount; kernelId++) {
for (auto packetId = 0u; packetId < kernelEventCompletionData[kernelId].getPacketsUsed(); packetId++) {
if (this->l3FlushAppliedOnKernel.test(kernelId) && ((packetId % skipL3EventPacketIndex) != 0)) {
continue;
}
if (globalStartTS > kernelEventCompletionData[i].getGlobalStartValue(packetId)) {
globalStartTS = kernelEventCompletionData[i].getGlobalStartValue(packetId);
if (globalStartTS > kernelEventCompletionData[kernelId].getGlobalStartValue(packetId)) {
globalStartTS = kernelEventCompletionData[kernelId].getGlobalStartValue(packetId);
}
if (contextStartTS > kernelEventCompletionData[i].getContextStartValue(packetId)) {
contextStartTS = kernelEventCompletionData[i].getContextStartValue(packetId);
if (contextStartTS > kernelEventCompletionData[kernelId].getContextStartValue(packetId)) {
contextStartTS = kernelEventCompletionData[kernelId].getContextStartValue(packetId);
}
if (contextEndTS < kernelEventCompletionData[i].getContextEndValue(packetId)) {
contextEndTS = kernelEventCompletionData[i].getContextEndValue(packetId);
if (contextEndTS < kernelEventCompletionData[kernelId].getContextEndValue(packetId)) {
contextEndTS = kernelEventCompletionData[kernelId].getContextEndValue(packetId);
}
if (globalEndTS < kernelEventCompletionData[i].getGlobalEndValue(packetId)) {
globalEndTS = kernelEventCompletionData[i].getGlobalEndValue(packetId);
if (globalEndTS < kernelEventCompletionData[kernelId].getGlobalEndValue(packetId)) {
globalEndTS = kernelEventCompletionData[kernelId].getGlobalEndValue(packetId);
}
}
}
@ -256,6 +256,7 @@ ze_result_t EventImp<TagSizeT>::reset() {
}
hostEventSetValue(Event::STATE_INITIAL);
resetPackets();
this->l3FlushAppliedOnKernel.reset();
return ZE_RESULT_SUCCESS;
}

View File

@ -19,6 +19,8 @@ namespace ult {
template <>
struct WhiteBox<::L0::Event> : public ::L0::Event {
using BaseClass = ::L0::Event;
using BaseClass::hostAddress;
using BaseClass::l3FlushAppliedOnKernel;
};
using Event = WhiteBox<::L0::Event>;
@ -64,6 +66,7 @@ struct Mock<EventPool> : public EventPool {
class MockEvent : public ::L0::Event {
public:
using ::L0::Event::l3FlushAppliedOnKernel;
MockEvent() {
mockAllocation.reset(new NEO::MockGraphicsAllocation(0, NEO::AllocationType::INTERNAL_HOST_MEMORY,
reinterpret_cast<void *>(0x1234), 0x1000, 0, sizeof(uint32_t),

View File

@ -1334,6 +1334,92 @@ TEST_F(TimestampEventCreate, givenEventWhenQueryingTimestampExpThenCorrectDataSe
}
}
TEST_F(TimestampEventCreate, givenTimeStampEventUsedOnTwoKernelsWhenL3FlushSetOnFirstKernelThenDoNotUseSecondPacketOfFirstKernel) {
typename MockTimestampPackets32::Packet packetData[4];
event->hostAddress = packetData;
constexpr uint32_t kernelStartValue = 5u;
constexpr uint32_t kernelEndValue = 10u;
constexpr uint32_t waStartValue = 2u;
constexpr uint32_t waEndValue = 15u;
//1st kernel 1st packet
packetData[0].contextStart = kernelStartValue;
packetData[0].contextEnd = kernelEndValue;
packetData[0].globalStart = kernelStartValue;
packetData[0].globalEnd = kernelEndValue;
//1st kernel 2nd packet for L3 Flush
packetData[1].contextStart = waStartValue;
packetData[1].contextEnd = waEndValue;
packetData[1].globalStart = waStartValue;
packetData[1].globalEnd = waEndValue;
//2nd kernel 1st packet
packetData[2].contextStart = kernelStartValue;
packetData[2].contextEnd = kernelEndValue;
packetData[2].globalStart = kernelStartValue;
packetData[2].globalEnd = kernelEndValue;
event->setPacketsInUse(2u);
event->setL3FlushForCurrentKernel();
event->increaseKernelCount();
EXPECT_EQ(1u, event->getPacketsUsedInLastKernel());
ze_kernel_timestamp_result_t results;
event->queryKernelTimestamp(&results);
EXPECT_EQ(static_cast<uint64_t>(kernelStartValue), results.context.kernelStart);
EXPECT_EQ(static_cast<uint64_t>(kernelStartValue), results.global.kernelStart);
EXPECT_EQ(static_cast<uint64_t>(kernelEndValue), results.context.kernelEnd);
EXPECT_EQ(static_cast<uint64_t>(kernelEndValue), results.global.kernelEnd);
}
TEST_F(TimestampEventCreate, givenTimeStampEventUsedOnTwoKernelsWhenL3FlushSetOnSecondKernelThenDoNotUseSecondPacketOfSecondKernel) {
typename MockTimestampPackets32::Packet packetData[4];
event->hostAddress = packetData;
constexpr uint32_t kernelStartValue = 5u;
constexpr uint32_t kernelEndValue = 10u;
constexpr uint32_t waStartValue = 2u;
constexpr uint32_t waEndValue = 15u;
//1st kernel 1st packet
packetData[0].contextStart = kernelStartValue;
packetData[0].contextEnd = kernelEndValue;
packetData[0].globalStart = kernelStartValue;
packetData[0].globalEnd = kernelEndValue;
//2nd kernel 1st packet
packetData[1].contextStart = kernelStartValue;
packetData[1].contextEnd = kernelEndValue;
packetData[1].globalStart = kernelStartValue;
packetData[1].globalEnd = kernelEndValue;
//2nd kernel 2nd packet for L3 Flush
packetData[2].contextStart = waStartValue;
packetData[2].contextEnd = waEndValue;
packetData[2].globalStart = waStartValue;
packetData[2].globalEnd = waEndValue;
EXPECT_EQ(1u, event->getPacketsUsedInLastKernel());
event->increaseKernelCount();
event->setPacketsInUse(2u);
event->setL3FlushForCurrentKernel();
ze_kernel_timestamp_result_t results;
event->queryKernelTimestamp(&results);
EXPECT_EQ(static_cast<uint64_t>(kernelStartValue), results.context.kernelStart);
EXPECT_EQ(static_cast<uint64_t>(kernelStartValue), results.global.kernelStart);
EXPECT_EQ(static_cast<uint64_t>(kernelEndValue), results.context.kernelEnd);
EXPECT_EQ(static_cast<uint64_t>(kernelEndValue), results.global.kernelEnd);
}
HWTEST_EXCLUDE_PRODUCT(TimestampEventCreate, givenEventTimestampsWhenQueryKernelTimestampThenCorrectDataAreSet, IGFX_GEN12LP_CORE);
TEST_F(TimestampEventCreate, givenEventWhenQueryKernelTimestampThenNotReadyReturned) {
@ -1755,6 +1841,32 @@ TEST_F(EventTests, givenEventUseMultiplePacketsWhenHostSignalThenExpectAllPacket
}
}
TEST_F(EventTests, WhenSettingL3FlushOnEventThenSetOnParticularKernel) {
auto event = whiteboxCast(Event::create<uint32_t>(eventPool, &eventDesc, device));
ASSERT_NE(event, nullptr);
EXPECT_FALSE(event->getL3FlushForCurrenKernel());
event->setL3FlushForCurrentKernel();
EXPECT_TRUE(event->getL3FlushForCurrenKernel());
event->increaseKernelCount();
EXPECT_EQ(2u, event->getKernelCount());
EXPECT_FALSE(event->getL3FlushForCurrenKernel());
event->setL3FlushForCurrentKernel();
EXPECT_TRUE(event->getL3FlushForCurrenKernel());
event->reset();
EXPECT_FALSE(event->getL3FlushForCurrenKernel());
constexpr size_t expectedL3FlushOnKernelCount = 0;
EXPECT_EQ(expectedL3FlushOnKernelCount, event->l3FlushAppliedOnKernel.count());
event->destroy();
}
struct EventSizeFixture : public DeviceFixture {
void SetUp() {
DeviceFixture::SetUp();

View File

@ -329,10 +329,10 @@ HWTEST2_F(CommandListAppendLaunchKernelL3Flush, givenKernelWithEventHostScopeWit
result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, event->toHandle(), launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(true, event->l3FlushWaApplied);
EXPECT_EQ(true, event->getL3FlushForCurrenKernel());
}
HWTEST2_F(CommandListAppendLaunchKernelL3Flush, givenKernelWithEventZeroScopeWithoutWalkerPartitionThenEventL3FlushWaNotSet, IsXeHpCore) {
HWTEST2_F(CommandListAppendLaunchKernelL3Flush, givenKernelWithEventZeroScopeWithoutWalkerPartitionThenEventL3FlushNotSet, IsXeHpCore) {
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
Mock<::L0::Kernel> kernel;
@ -360,7 +360,7 @@ HWTEST2_F(CommandListAppendLaunchKernelL3Flush, givenKernelWithEventZeroScopeWit
result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, event->toHandle(), launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(false, event->l3FlushWaApplied);
EXPECT_EQ(false, event->getL3FlushForCurrenKernel());
}
HWTEST2_F(CommandListAppendLaunchKernelL3Flush, givenKernelWithEventHostScopeWithoutWalkerPartitionThenSkipOddPacketsDuringQuery, IsXeHpCore) {
@ -392,7 +392,7 @@ HWTEST2_F(CommandListAppendLaunchKernelL3Flush, givenKernelWithEventHostScopeWit
result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, event->toHandle(), launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(true, event->l3FlushWaApplied);
EXPECT_EQ(true, event->getL3FlushForCurrenKernel());
EXPECT_EQ(2u, event->getPacketsInUse());
typename MockTimestampPackets32::Packet data[3] = {};