feature: experimental Counter Based Event support

Related-To: NEO-8145

Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz 2023-10-31 17:56:27 +00:00 committed by Compute-Runtime-Automation
parent c9376c9829
commit a8c79e0ba1
10 changed files with 80 additions and 48 deletions

View File

@ -172,7 +172,7 @@ void CommandListCoreFamily<gfxCoreFamily>::handleInOrderDependencyCounter(Event
this->commandContainer.addToResidencyContainer(&inOrderExecInfo->inOrderDependencyCounterAllocation);
if (signalEvent && signalEvent->isInOrderExecEvent()) {
if (signalEvent && signalEvent->isCounterBased()) {
signalEvent->updateInOrderExecState(inOrderExecInfo, inOrderExecInfo->inOrderDependencyCounter, this->inOrderAllocationOffset);
}
@ -495,7 +495,7 @@ template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendEventReset(ze_event_handle_t hEvent) {
auto event = Event::fromHandle(hEvent);
if (event->isInOrderExecEvent()) {
if (event->isCounterBased()) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
@ -2430,7 +2430,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
continue;
}
if (event->isInOrderExecEvent()) {
if (event->isCounterBased()) {
if (!event->getInOrderExecDataAllocation()) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT; // in-order event not signaled yet
}

View File

@ -507,7 +507,7 @@ bool CommandListCoreFamilyImmediate<gfxCoreFamily>::isSkippingInOrderBarrierAllo
auto signalEvent = Event::fromHandle(hSignalEvent);
return !(signalEvent && (signalEvent->isEventTimestampFlagSet() || !signalEvent->isInOrderExecEvent()));
return !(signalEvent && (signalEvent->isEventTimestampFlagSet() || !signalEvent->isCounterBased()));
}
template <GFXCORE_FAMILY gfxCoreFamily>

View File

@ -76,7 +76,7 @@ void programEventL3Flush(Event *event,
template <GFXCORE_FAMILY gfxCoreFamily>
bool CommandListCoreFamily<gfxCoreFamily>::isInOrderNonWalkerSignalingRequired(const Event *event) const {
return (event && (event->isUsingContextEndOffset() || !event->isInOrderExecEvent() || compactL3FlushEvent(getDcFlushRequired(event->isSignalScope()))));
return (event && (event->isUsingContextEndOffset() || !event->isCounterBased() || compactL3FlushEvent(getDcFlushRequired(event->isSignalScope()))));
}
template <GFXCORE_FAMILY gfxCoreFamily>

View File

@ -194,6 +194,20 @@ EventPool *EventPool::create(DriverHandle *driver, Context *context, uint32_t nu
return eventPool.release();
}
void EventPool::setupDescriptorFlags(const ze_event_pool_desc_t *desc) {
eventPoolFlags = desc->flags;
if (eventPoolFlags & ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP) {
eventPoolFlags |= ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
}
auto pNext = reinterpret_cast<const ze_base_desc_t *>(desc->pNext);
if (pNext && pNext->stype == ZE_STRUCTURE_TYPE_COUNTER_BASED_EVENT_POOL_EXP_DESC) {
counterBased = true;
}
}
bool EventPool::isEventPoolTimestampFlagSet() const {
if (NEO::DebugManager.flags.OverrideTimestampEvents.get() != -1) {
auto timestampOverride = !!NEO::DebugManager.flags.OverrideTimestampEvents.get();

View File

@ -217,8 +217,8 @@ struct Event : _ze_event_handle_t {
this->metricStreamer = metricStreamer;
}
void updateInOrderExecState(std::shared_ptr<InOrderExecInfo> &newInOrderExecInfo, uint64_t signalValue, uint32_t allocationOffset);
bool isInOrderExecEvent() const { return inOrderExecEvent; }
void enableInOrderMode() { this->inOrderExecEvent = true; }
bool isCounterBased() const { return counterBased; }
void enableCounterBasedMode() { this->counterBased = true; }
NEO::GraphicsAllocation *getInOrderExecDataAllocation() const;
uint64_t getInOrderExecSignalValueWithSubmissionCounter() const;
uint64_t getInOrderExecBaseSignalValue() const { return inOrderExecSignalValue; }
@ -287,7 +287,7 @@ struct Event : _ze_event_handle_t {
bool usingContextEndOffset = false;
bool signalAllEventPackets = false;
bool isFromIpcPool = false;
bool inOrderExecEvent = false;
bool counterBased = false;
uint64_t timestampRefreshIntervalInNanoSec = 0;
};
@ -296,10 +296,7 @@ struct EventPool : _ze_event_pool_handle_t {
static ze_result_t openEventPoolIpcHandle(const ze_ipc_event_pool_handle_t &ipcEventPoolHandle, ze_event_pool_handle_t *eventPoolHandle,
DriverHandleImp *driver, ContextImp *context, uint32_t numDevices, ze_device_handle_t *deviceHandles);
EventPool(const ze_event_pool_desc_t *desc) : EventPool(desc->count) {
eventPoolFlags = desc->flags;
if (eventPoolFlags & ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP) {
eventPoolFlags |= ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
}
setupDescriptorFlags(desc);
}
virtual ~EventPool();
MOCKABLE_VIRTUAL ze_result_t destroy();
@ -356,9 +353,12 @@ struct EventPool : _ze_event_pool_handle_t {
return isImplicitScalingCapable;
}
bool isCounterBased() const { return counterBased; }
protected:
EventPool() = default;
EventPool(size_t numEvents) : numEvents(numEvents) {}
void setupDescriptorFlags(const ze_event_pool_desc_t *desc);
std::vector<Device *> devices;
@ -381,6 +381,7 @@ struct EventPool : _ze_event_pool_handle_t {
bool isImportedIpcPool = false;
bool isShareableEventMemory = false;
bool isImplicitScalingCapable = false;
bool counterBased = false;
};
} // namespace L0

View File

@ -73,7 +73,7 @@ struct EventImp : public Event {
ze_result_t calculateProfilingData();
ze_result_t queryStatusEventPackets();
ze_result_t queryInOrderEventStatus();
ze_result_t queryCounterBasedEventStatus();
void handleSuccessfulHostSynchronization();
MOCKABLE_VIRTUAL ze_result_t hostEventSetValue(TagSizeT eventValue);
ze_result_t hostEventSetValueTimestamps(TagSizeT eventVal);

View File

@ -80,8 +80,8 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *
event->timestampRefreshIntervalInNanoSec = refreshTime * milliSecondsToNanoSeconds;
}
if (NEO::DebugManager.flags.ForceInOrderEvents.get() == 1) {
event->enableInOrderMode();
if (eventPool->isCounterBased() || NEO::DebugManager.flags.ForceInOrderEvents.get() == 1) {
event->enableCounterBasedMode();
}
return event;
@ -150,7 +150,7 @@ void EventImp<TagSizeT>::assignKernelEventCompletionData(void *address) {
}
template <typename TagSizeT>
ze_result_t EventImp<TagSizeT>::queryInOrderEventStatus() {
ze_result_t EventImp<TagSizeT>::queryCounterBasedEventStatus() {
if (!this->inOrderExecInfo.get()) {
return ZE_RESULT_NOT_READY;
}
@ -281,8 +281,8 @@ ze_result_t EventImp<TagSizeT>::queryStatus() {
return ZE_RESULT_SUCCESS;
}
if (this->inOrderExecEvent) {
return queryInOrderEventStatus();
if (this->counterBased) {
return queryCounterBasedEventStatus();
} else {
return queryStatusEventPackets();
}
@ -396,7 +396,7 @@ ze_result_t EventImp<TagSizeT>::hostEventSetValue(TagSizeT eventVal) {
template <typename TagSizeT>
ze_result_t EventImp<TagSizeT>::hostSignal() {
if (this->isInOrderExecEvent()) {
if (this->isCounterBased()) {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
@ -447,7 +447,7 @@ ze_result_t EventImp<TagSizeT>::hostSynchronize(uint64_t timeout) {
waitStartTime = std::chrono::high_resolution_clock::now();
lastHangCheckTime = waitStartTime;
do {
if (NEO::DebugManager.flags.WaitForUserFenceOnEventHostSynchronize.get() == 1 && this->inOrderExecEvent) {
if (NEO::DebugManager.flags.WaitForUserFenceOnEventHostSynchronize.get() == 1 && this->counterBased) {
ret = waitForUserFence(timeout);
} else {
ret = queryStatus();
@ -494,7 +494,7 @@ ze_result_t EventImp<TagSizeT>::hostSynchronize(uint64_t timeout) {
template <typename TagSizeT>
ze_result_t EventImp<TagSizeT>::reset() {
if (this->isInOrderExecEvent()) {
if (this->isCounterBased()) {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

View File

@ -19,11 +19,11 @@ namespace ult {
template <>
struct WhiteBox<::L0::Event> : public ::L0::Event {
using BaseClass = ::L0::Event;
using BaseClass::counterBased;
using BaseClass::csrs;
using BaseClass::Event;
using BaseClass::gpuHangCheckPeriod;
using BaseClass::hostAddress;
using BaseClass::inOrderExecEvent;
using BaseClass::isFromIpcPool;
using BaseClass::l3FlushAppliedOnKernel;
using BaseClass::maxKernelCount;

View File

@ -670,7 +670,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenInvalidKernelWhenAppendingThenRetur
struct InOrderCmdListTests : public CommandListAppendLaunchKernel {
struct MockEvent : public EventImp<uint32_t> {
using EventImp<uint32_t>::inOrderExecEvent;
using EventImp<uint32_t>::counterBased;
using EventImp<uint32_t>::maxPacketCount;
using EventImp<uint32_t>::inOrderExecInfo;
using EventImp<uint32_t>::inOrderExecSignalValue;
@ -700,6 +700,9 @@ struct InOrderCmdListTests : public CommandListAppendLaunchKernel {
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
eventPoolDesc.count = numEvents;
ze_event_pool_counter_based_exp_desc_t counterBasedExtension = {ZE_STRUCTURE_TYPE_COUNTER_BASED_EVENT_POOL_EXP_DESC};
eventPoolDesc.pNext = &counterBasedExtension;
if (timestampEvent) {
eventPoolDesc.flags |= ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
}
@ -712,8 +715,7 @@ struct InOrderCmdListTests : public CommandListAppendLaunchKernel {
for (uint32_t i = 0; i < numEvents; i++) {
eventDesc.index = i;
events.emplace_back(DestroyableZeUniquePtr<MockEvent>(static_cast<MockEvent *>(Event::create<typename GfxFamily::TimestampPacketType>(eventPool.get(), &eventDesc, device))));
EXPECT_FALSE(events.back()->inOrderExecEvent);
events.back()->inOrderExecEvent = true;
EXPECT_TRUE(events.back()->counterBased);
}
return eventPool;
@ -847,7 +849,7 @@ HWTEST2_F(InOrderCmdListTests, givenNotSignaledInOrderEventWhenAddedToWaitListTh
eventDesc.index = 0;
auto event = std::unique_ptr<MockEvent>(static_cast<MockEvent *>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device)));
EXPECT_TRUE(event->inOrderExecEvent);
EXPECT_TRUE(event->counterBased);
auto handle = event->toHandle();
@ -858,7 +860,7 @@ HWTEST2_F(InOrderCmdListTests, givenNotSignaledInOrderEventWhenAddedToWaitListTh
HWTEST2_F(InOrderCmdListTests, givenNotSignaledInOrderWhenWhenCallingQueryStatusThenReturnNotReady, IsAtLeastSkl) {
auto eventPool = createEvents<FamilyType>(1, false);
events[0]->inOrderExecEvent = true;
events[0]->counterBased = true;
EXPECT_EQ(ZE_RESULT_NOT_READY, events[0]->queryStatus());
}
@ -964,7 +966,7 @@ HWTEST2_F(InOrderCmdListTests, givenDebugFlagSetWhenEventHostSyncCalledThenCallW
EXPECT_EQ(2u, ultCsr->waitUserFenecParams.callCount);
// non in-order event
events[1]->inOrderExecEvent = false;
events[1]->counterBased = false;
events[1]->hostSynchronize(2);
EXPECT_EQ(2u, ultCsr->waitUserFenecParams.callCount);
}
@ -978,7 +980,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenHostResetOrSignalEventCalledT
EXPECT_EQ(MemoryConstants::pageSize64k, immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getUnderlyingBufferSize());
EXPECT_TRUE(events[0]->inOrderExecEvent);
EXPECT_TRUE(events[0]->counterBased);
EXPECT_EQ(events[0]->inOrderExecSignalValue, immCmdList->inOrderExecInfo->inOrderDependencyCounter);
EXPECT_EQ(&events[0]->inOrderExecInfo->inOrderDependencyCounterAllocation, &immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation);
EXPECT_EQ(events[0]->inOrderAllocationOffset, 0u);
@ -1005,11 +1007,11 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWheUsingRegularEventThenDontSetIn
auto immCmdList = createImmCmdList<gfxCoreFamily>();
auto eventPool = createEvents<FamilyType>(1, false);
events[0]->inOrderExecEvent = false;
events[0]->counterBased = false;
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false);
EXPECT_FALSE(events[0]->inOrderExecEvent);
EXPECT_FALSE(events[0]->counterBased);
EXPECT_EQ(events[0]->inOrderExecSignalValue, 0u);
EXPECT_EQ(events[0]->inOrderExecInfo.get(), nullptr);
EXPECT_EQ(events[0]->inOrderAllocationOffset, 0u);
@ -1056,7 +1058,7 @@ HWTEST2_F(InOrderCmdListTests, givenDebugFlagSetWhenDispatchingSemaphoreThenProg
auto eventPool = createEvents<FamilyType>(1, false);
auto eventHandle = events[0]->toHandle();
events[0]->inOrderExecEvent = false;
events[0]->counterBased = false;
auto immCmdList = createImmCmdList<gfxCoreFamily>();
@ -1120,7 +1122,7 @@ HWTEST2_F(InOrderCmdListTests, givenDebugFlagSetWhenDispatchingStoreDataImmThenP
auto eventPool = createEvents<FamilyType>(1, false);
auto eventHandle = events[0]->toHandle();
events[0]->inOrderExecEvent = false;
events[0]->counterBased = false;
auto immCmdList = createImmCmdList<gfxCoreFamily>();
@ -1164,7 +1166,7 @@ HWTEST2_F(InOrderCmdListTests, givenDebugFlagSetAsMaskWhenDispatchingStoreDataIm
auto eventPool = createEvents<FamilyType>(1, false);
auto eventHandle = events[0]->toHandle();
events[0]->inOrderExecEvent = false;
events[0]->counterBased = false;
auto immCmdList = createImmCmdList<gfxCoreFamily>();
@ -1278,7 +1280,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenWaitingForRegularEventFromPre
auto immCmdList = createCopyOnlyImmCmdList<gfxCoreFamily>();
auto eventPool = createEvents<FamilyType>(1, false);
events[0]->inOrderExecEvent = false;
events[0]->counterBased = false;
auto eventHandle = events[0]->toHandle();
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
@ -1370,7 +1372,7 @@ HWTEST2_F(InOrderCmdListTests, givenCmdsChainingWhenDispatchingKernelThenProgram
auto immCmdList = createImmCmdList<gfxCoreFamily>();
auto eventPool = createEvents<FamilyType>(1, false);
events[0]->inOrderExecEvent = false;
events[0]->counterBased = false;
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
@ -1450,7 +1452,7 @@ HWTEST2_F(InOrderCmdListTests, givenCmdsChainingFromAppendCopyWhenDispatchingKer
auto immCmdList = createImmCmdList<gfxCoreFamily>();
auto eventPool = createEvents<FamilyType>(1, false);
events[0]->inOrderExecEvent = false;
events[0]->counterBased = false;
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
@ -1548,7 +1550,7 @@ HWTEST2_F(InOrderCmdListTests, givenCmdsChainingWhenDispatchingKernelWithRelaxed
auto immCmdList = createImmCmdList<gfxCoreFamily>();
auto eventPool = createEvents<FamilyType>(1, false);
events[0]->inOrderExecEvent = false;
events[0]->counterBased = false;
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
@ -2042,7 +2044,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingRegularEventThenCl
auto eventPool = createEvents<FamilyType>(1, false);
events[0]->signalScope = 0;
events[0]->inOrderExecEvent = false;
events[0]->counterBased = false;
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false);
@ -2192,7 +2194,7 @@ HWTEST2_F(InOrderCmdListTests, givenNonPostSyncWalkerWhenAskingForNonWalkerSigna
auto eventPool1 = createEvents<FamilyType>(1, true);
auto eventPool2 = createEvents<FamilyType>(1, false);
auto eventPool3 = createEvents<FamilyType>(1, false);
events[2]->inOrderExecEvent = false;
events[2]->counterBased = false;
EXPECT_FALSE(immCmdList->isInOrderNonWalkerSignalingRequired(events[0].get()));
EXPECT_FALSE(immCmdList->isInOrderNonWalkerSignalingRequired(events[1].get()));
@ -2351,7 +2353,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingNonKernelAppendThe
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
auto eventPool = createEvents<FamilyType>(1, true);
events[0]->inOrderExecEvent = false;
events[0]->counterBased = false;
uint64_t inOrderSyncVa = immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress();
@ -2457,7 +2459,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderRegularCmdListWhenProgrammingNonKerne
auto cmdStream = regularCmdList->getCmdContainer().getCommandStream();
auto eventPool = createEvents<FamilyType>(1, true);
events[0]->inOrderExecEvent = false;
events[0]->counterBased = false;
uint8_t ptr[64] = {};
@ -3036,7 +3038,7 @@ HWTEST2_F(InOrderCmdListTests, givenRegularInOrderCmdListWhenProgrammingAppendWa
auto cmdStream = regularCmdList->getCmdContainer().getCommandStream();
auto eventPool = createEvents<FamilyType>(1, false);
events[0]->inOrderExecEvent = false;
events[0]->counterBased = false;
auto eventHandle = events[0]->toHandle();
@ -3345,7 +3347,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendBarrierWitho
auto offset = cmdStream->getUsed();
auto eventPool = createEvents<FamilyType>(1, false);
events[0]->inOrderExecEvent = false;
events[0]->counterBased = false;
auto eventHandle = events[0]->toHandle();
@ -4203,7 +4205,7 @@ HWTEST2_F(BcsSplitInOrderCmdListTests, givenBcsSplitEnabledWhenAppendingMemoryCo
constexpr size_t copySize = 8 * MemoryConstants::megaByte;
auto eventPool = createEvents<FamilyType>(1, false);
events[0]->inOrderExecEvent = false;
events[0]->counterBased = false;
auto eventHandle = events[0]->toHandle();
immCmdList->appendMemoryCopy(&copyData, &copyData, copySize, nullptr, 0, nullptr, false, false);
@ -4666,7 +4668,7 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL
auto eventPool = createEvents<FamilyType>(1, true);
auto eventHandle = events[0]->toHandle();
events[0]->inOrderExecEvent = false;
events[0]->counterBased = false;
auto regularCmdList = createRegularCmdList<gfxCoreFamily>(false);
auto regularCopyOnlyCmdList = createRegularCmdList<gfxCoreFamily>(true);

View File

@ -172,6 +172,21 @@ TEST_F(EventPoolCreate, GivenEventPoolThenAllocationContainsAtLeast16Bytes) {
minAllocationSize);
}
TEST_F(EventPoolCreate, givenInvalidPNextWhenCreatingPoolThenIgnore) {
ze_base_desc_t baseDesc = {ZE_STRUCTURE_TYPE_FORCE_UINT32};
ze_event_pool_desc_t eventPoolDesc = {
ZE_STRUCTURE_TYPE_EVENT_POOL_DESC,
&baseDesc,
ZE_EVENT_POOL_FLAG_HOST_VISIBLE,
1};
ze_result_t result = ZE_RESULT_SUCCESS;
std::unique_ptr<L0::EventPool> eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
ASSERT_NE(nullptr, eventPool);
}
HWTEST_F(EventPoolCreate, givenTimestampEventsThenEventSizeSufficientForAllKernelTimestamps) {
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
@ -3261,7 +3276,7 @@ HWTEST_F(EventTests, givenInOrderEventWhenHostSynchronizeIsCalledThenAllocationI
auto inOrderExecInfo = std::make_shared<InOrderExecInfo>(*syncAllocation, *neoDevice->getMemoryManager(), false);
event->inOrderExecEvent = true;
event->counterBased = true;
event->updateInOrderExecState(inOrderExecInfo, 1, 0);
constexpr uint64_t timeout = std::numeric_limits<std::uint64_t>::max();
@ -3273,7 +3288,7 @@ HWTEST_F(EventTests, givenInOrderEventWhenHostSynchronizeIsCalledThenAllocationI
auto event2 = zeUniquePtr(whiteboxCast(getHelper<L0GfxCoreHelper>().createEvent(eventPool.get(), &eventDesc, device)));
event2->inOrderExecEvent = true;
event2->counterBased = true;
event2->updateInOrderExecState(inOrderExecInfo, 1, 0);
syncAllocation->updateTaskCount(0u, ultCsr->getOsContext().getContextId());
ultCsr->downloadAllocationsCalledCount = 0;