Change event algorithm to limit cache flush operation to single kernel packet
Related-To: NEO-6871 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
parent
66c1727398
commit
7ca20ceb4b
|
@ -105,7 +105,7 @@ void programEventL3Flush(ze_event_handle_t hEvent,
|
|||
event->setPacketsInUse(event->getPacketsUsedInLastKernel() + 1);
|
||||
}
|
||||
|
||||
event->l3FlushWaApplied = true;
|
||||
event->setL3FlushForCurrentKernel();
|
||||
|
||||
auto &cmdListStream = *commandContainer.getCommandStream();
|
||||
NEO::PipeControlArgs args;
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#include "level_zero/core/source/driver/driver_handle.h"
|
||||
#include <level_zero/ze_api.h>
|
||||
|
||||
#include <bitset>
|
||||
#include <chrono>
|
||||
#include <limits>
|
||||
|
||||
|
@ -106,6 +107,12 @@ struct Event : _ze_event_handle_t {
|
|||
void zeroKernelCount() {
|
||||
kernelCount = 0;
|
||||
}
|
||||
bool getL3FlushForCurrenKernel() {
|
||||
return l3FlushAppliedOnKernel.test(kernelCount - 1);
|
||||
}
|
||||
void setL3FlushForCurrentKernel() {
|
||||
l3FlushAppliedOnKernel.set(kernelCount - 1);
|
||||
}
|
||||
|
||||
uint64_t globalStartTS;
|
||||
uint64_t globalEndTS;
|
||||
|
@ -121,9 +128,9 @@ struct Event : _ze_event_handle_t {
|
|||
ze_event_scope_flags_t signalScope = 0u;
|
||||
ze_event_scope_flags_t waitScope = 0u;
|
||||
|
||||
bool l3FlushWaApplied = false;
|
||||
|
||||
protected:
|
||||
std::bitset<EventPacketsCount::maxKernelSplit> l3FlushAppliedOnKernel;
|
||||
|
||||
size_t contextStartOffset = 0u;
|
||||
size_t contextEndOffset = 0u;
|
||||
size_t globalStartOffset = 0u;
|
||||
|
|
|
@ -68,22 +68,22 @@ ze_result_t EventImp<TagSizeT>::calculateProfilingData() {
|
|||
contextStartTS = kernelEventCompletionData[0].getContextStartValue(0);
|
||||
contextEndTS = kernelEventCompletionData[0].getContextEndValue(0);
|
||||
|
||||
for (uint32_t i = 0; i < kernelCount; i++) {
|
||||
for (auto packetId = 0u; packetId < kernelEventCompletionData[i].getPacketsUsed(); packetId++) {
|
||||
if (this->l3FlushWaApplied && ((packetId % skipL3EventPacketIndex) != 0)) {
|
||||
for (uint32_t kernelId = 0; kernelId < kernelCount; kernelId++) {
|
||||
for (auto packetId = 0u; packetId < kernelEventCompletionData[kernelId].getPacketsUsed(); packetId++) {
|
||||
if (this->l3FlushAppliedOnKernel.test(kernelId) && ((packetId % skipL3EventPacketIndex) != 0)) {
|
||||
continue;
|
||||
}
|
||||
if (globalStartTS > kernelEventCompletionData[i].getGlobalStartValue(packetId)) {
|
||||
globalStartTS = kernelEventCompletionData[i].getGlobalStartValue(packetId);
|
||||
if (globalStartTS > kernelEventCompletionData[kernelId].getGlobalStartValue(packetId)) {
|
||||
globalStartTS = kernelEventCompletionData[kernelId].getGlobalStartValue(packetId);
|
||||
}
|
||||
if (contextStartTS > kernelEventCompletionData[i].getContextStartValue(packetId)) {
|
||||
contextStartTS = kernelEventCompletionData[i].getContextStartValue(packetId);
|
||||
if (contextStartTS > kernelEventCompletionData[kernelId].getContextStartValue(packetId)) {
|
||||
contextStartTS = kernelEventCompletionData[kernelId].getContextStartValue(packetId);
|
||||
}
|
||||
if (contextEndTS < kernelEventCompletionData[i].getContextEndValue(packetId)) {
|
||||
contextEndTS = kernelEventCompletionData[i].getContextEndValue(packetId);
|
||||
if (contextEndTS < kernelEventCompletionData[kernelId].getContextEndValue(packetId)) {
|
||||
contextEndTS = kernelEventCompletionData[kernelId].getContextEndValue(packetId);
|
||||
}
|
||||
if (globalEndTS < kernelEventCompletionData[i].getGlobalEndValue(packetId)) {
|
||||
globalEndTS = kernelEventCompletionData[i].getGlobalEndValue(packetId);
|
||||
if (globalEndTS < kernelEventCompletionData[kernelId].getGlobalEndValue(packetId)) {
|
||||
globalEndTS = kernelEventCompletionData[kernelId].getGlobalEndValue(packetId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -256,6 +256,7 @@ ze_result_t EventImp<TagSizeT>::reset() {
|
|||
}
|
||||
hostEventSetValue(Event::STATE_INITIAL);
|
||||
resetPackets();
|
||||
this->l3FlushAppliedOnKernel.reset();
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
|
|
|
@ -19,6 +19,8 @@ namespace ult {
|
|||
template <>
|
||||
struct WhiteBox<::L0::Event> : public ::L0::Event {
|
||||
using BaseClass = ::L0::Event;
|
||||
using BaseClass::hostAddress;
|
||||
using BaseClass::l3FlushAppliedOnKernel;
|
||||
};
|
||||
|
||||
using Event = WhiteBox<::L0::Event>;
|
||||
|
@ -64,6 +66,7 @@ struct Mock<EventPool> : public EventPool {
|
|||
|
||||
class MockEvent : public ::L0::Event {
|
||||
public:
|
||||
using ::L0::Event::l3FlushAppliedOnKernel;
|
||||
MockEvent() {
|
||||
mockAllocation.reset(new NEO::MockGraphicsAllocation(0, NEO::AllocationType::INTERNAL_HOST_MEMORY,
|
||||
reinterpret_cast<void *>(0x1234), 0x1000, 0, sizeof(uint32_t),
|
||||
|
|
|
@ -1334,6 +1334,92 @@ TEST_F(TimestampEventCreate, givenEventWhenQueryingTimestampExpThenCorrectDataSe
|
|||
}
|
||||
}
|
||||
|
||||
TEST_F(TimestampEventCreate, givenTimeStampEventUsedOnTwoKernelsWhenL3FlushSetOnFirstKernelThenDoNotUseSecondPacketOfFirstKernel) {
|
||||
typename MockTimestampPackets32::Packet packetData[4];
|
||||
event->hostAddress = packetData;
|
||||
|
||||
constexpr uint32_t kernelStartValue = 5u;
|
||||
constexpr uint32_t kernelEndValue = 10u;
|
||||
|
||||
constexpr uint32_t waStartValue = 2u;
|
||||
constexpr uint32_t waEndValue = 15u;
|
||||
|
||||
//1st kernel 1st packet
|
||||
packetData[0].contextStart = kernelStartValue;
|
||||
packetData[0].contextEnd = kernelEndValue;
|
||||
packetData[0].globalStart = kernelStartValue;
|
||||
packetData[0].globalEnd = kernelEndValue;
|
||||
|
||||
//1st kernel 2nd packet for L3 Flush
|
||||
packetData[1].contextStart = waStartValue;
|
||||
packetData[1].contextEnd = waEndValue;
|
||||
packetData[1].globalStart = waStartValue;
|
||||
packetData[1].globalEnd = waEndValue;
|
||||
|
||||
//2nd kernel 1st packet
|
||||
packetData[2].contextStart = kernelStartValue;
|
||||
packetData[2].contextEnd = kernelEndValue;
|
||||
packetData[2].globalStart = kernelStartValue;
|
||||
packetData[2].globalEnd = kernelEndValue;
|
||||
|
||||
event->setPacketsInUse(2u);
|
||||
event->setL3FlushForCurrentKernel();
|
||||
|
||||
event->increaseKernelCount();
|
||||
EXPECT_EQ(1u, event->getPacketsUsedInLastKernel());
|
||||
|
||||
ze_kernel_timestamp_result_t results;
|
||||
event->queryKernelTimestamp(&results);
|
||||
|
||||
EXPECT_EQ(static_cast<uint64_t>(kernelStartValue), results.context.kernelStart);
|
||||
EXPECT_EQ(static_cast<uint64_t>(kernelStartValue), results.global.kernelStart);
|
||||
EXPECT_EQ(static_cast<uint64_t>(kernelEndValue), results.context.kernelEnd);
|
||||
EXPECT_EQ(static_cast<uint64_t>(kernelEndValue), results.global.kernelEnd);
|
||||
}
|
||||
|
||||
TEST_F(TimestampEventCreate, givenTimeStampEventUsedOnTwoKernelsWhenL3FlushSetOnSecondKernelThenDoNotUseSecondPacketOfSecondKernel) {
|
||||
typename MockTimestampPackets32::Packet packetData[4];
|
||||
event->hostAddress = packetData;
|
||||
|
||||
constexpr uint32_t kernelStartValue = 5u;
|
||||
constexpr uint32_t kernelEndValue = 10u;
|
||||
|
||||
constexpr uint32_t waStartValue = 2u;
|
||||
constexpr uint32_t waEndValue = 15u;
|
||||
|
||||
//1st kernel 1st packet
|
||||
packetData[0].contextStart = kernelStartValue;
|
||||
packetData[0].contextEnd = kernelEndValue;
|
||||
packetData[0].globalStart = kernelStartValue;
|
||||
packetData[0].globalEnd = kernelEndValue;
|
||||
|
||||
//2nd kernel 1st packet
|
||||
packetData[1].contextStart = kernelStartValue;
|
||||
packetData[1].contextEnd = kernelEndValue;
|
||||
packetData[1].globalStart = kernelStartValue;
|
||||
packetData[1].globalEnd = kernelEndValue;
|
||||
|
||||
//2nd kernel 2nd packet for L3 Flush
|
||||
packetData[2].contextStart = waStartValue;
|
||||
packetData[2].contextEnd = waEndValue;
|
||||
packetData[2].globalStart = waStartValue;
|
||||
packetData[2].globalEnd = waEndValue;
|
||||
|
||||
EXPECT_EQ(1u, event->getPacketsUsedInLastKernel());
|
||||
|
||||
event->increaseKernelCount();
|
||||
event->setPacketsInUse(2u);
|
||||
event->setL3FlushForCurrentKernel();
|
||||
|
||||
ze_kernel_timestamp_result_t results;
|
||||
event->queryKernelTimestamp(&results);
|
||||
|
||||
EXPECT_EQ(static_cast<uint64_t>(kernelStartValue), results.context.kernelStart);
|
||||
EXPECT_EQ(static_cast<uint64_t>(kernelStartValue), results.global.kernelStart);
|
||||
EXPECT_EQ(static_cast<uint64_t>(kernelEndValue), results.context.kernelEnd);
|
||||
EXPECT_EQ(static_cast<uint64_t>(kernelEndValue), results.global.kernelEnd);
|
||||
}
|
||||
|
||||
HWTEST_EXCLUDE_PRODUCT(TimestampEventCreate, givenEventTimestampsWhenQueryKernelTimestampThenCorrectDataAreSet, IGFX_GEN12LP_CORE);
|
||||
|
||||
TEST_F(TimestampEventCreate, givenEventWhenQueryKernelTimestampThenNotReadyReturned) {
|
||||
|
@ -1755,6 +1841,32 @@ TEST_F(EventTests, givenEventUseMultiplePacketsWhenHostSignalThenExpectAllPacket
|
|||
}
|
||||
}
|
||||
|
||||
TEST_F(EventTests, WhenSettingL3FlushOnEventThenSetOnParticularKernel) {
|
||||
auto event = whiteboxCast(Event::create<uint32_t>(eventPool, &eventDesc, device));
|
||||
ASSERT_NE(event, nullptr);
|
||||
|
||||
EXPECT_FALSE(event->getL3FlushForCurrenKernel());
|
||||
|
||||
event->setL3FlushForCurrentKernel();
|
||||
EXPECT_TRUE(event->getL3FlushForCurrenKernel());
|
||||
|
||||
event->increaseKernelCount();
|
||||
EXPECT_EQ(2u, event->getKernelCount());
|
||||
|
||||
EXPECT_FALSE(event->getL3FlushForCurrenKernel());
|
||||
|
||||
event->setL3FlushForCurrentKernel();
|
||||
EXPECT_TRUE(event->getL3FlushForCurrenKernel());
|
||||
|
||||
event->reset();
|
||||
EXPECT_FALSE(event->getL3FlushForCurrenKernel());
|
||||
|
||||
constexpr size_t expectedL3FlushOnKernelCount = 0;
|
||||
EXPECT_EQ(expectedL3FlushOnKernelCount, event->l3FlushAppliedOnKernel.count());
|
||||
|
||||
event->destroy();
|
||||
}
|
||||
|
||||
struct EventSizeFixture : public DeviceFixture {
|
||||
void SetUp() {
|
||||
DeviceFixture::SetUp();
|
||||
|
|
|
@ -329,10 +329,10 @@ HWTEST2_F(CommandListAppendLaunchKernelL3Flush, givenKernelWithEventHostScopeWit
|
|||
result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, event->toHandle(), launchParams);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(true, event->l3FlushWaApplied);
|
||||
EXPECT_EQ(true, event->getL3FlushForCurrenKernel());
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListAppendLaunchKernelL3Flush, givenKernelWithEventZeroScopeWithoutWalkerPartitionThenEventL3FlushWaNotSet, IsXeHpCore) {
|
||||
HWTEST2_F(CommandListAppendLaunchKernelL3Flush, givenKernelWithEventZeroScopeWithoutWalkerPartitionThenEventL3FlushNotSet, IsXeHpCore) {
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
|
||||
Mock<::L0::Kernel> kernel;
|
||||
|
@ -360,7 +360,7 @@ HWTEST2_F(CommandListAppendLaunchKernelL3Flush, givenKernelWithEventZeroScopeWit
|
|||
result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, event->toHandle(), launchParams);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(false, event->l3FlushWaApplied);
|
||||
EXPECT_EQ(false, event->getL3FlushForCurrenKernel());
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListAppendLaunchKernelL3Flush, givenKernelWithEventHostScopeWithoutWalkerPartitionThenSkipOddPacketsDuringQuery, IsXeHpCore) {
|
||||
|
@ -392,7 +392,7 @@ HWTEST2_F(CommandListAppendLaunchKernelL3Flush, givenKernelWithEventHostScopeWit
|
|||
result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, event->toHandle(), launchParams);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(true, event->l3FlushWaApplied);
|
||||
EXPECT_EQ(true, event->getL3FlushForCurrenKernel());
|
||||
EXPECT_EQ(2u, event->getPacketsInUse());
|
||||
|
||||
typename MockTimestampPackets32::Packet data[3] = {};
|
||||
|
|
Loading…
Reference in New Issue