Fix issues in signal all event packets 6/n

This fix unifies setting event properties in event pool, so they are shared
betweem all event pool devices.

Related-To: NEO-7490

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2022-12-13 18:50:36 +00:00
committed by Compute-Runtime-Automation
parent 148ee669e6
commit 595aa2f626
12 changed files with 56 additions and 19 deletions

View File

@@ -139,6 +139,7 @@ struct Device : _ze_device_handle_t {
virtual void storeReusableAllocation(NEO::GraphicsAllocation &alloc) = 0;
virtual ze_result_t getFabricVertex(ze_fabric_vertex_handle_t *phVertex) = 0;
virtual uint32_t getEventMaxPacketCount() const = 0;
virtual uint32_t getEventMaxKernelCount() const = 0;
protected:
NEO::Device *neoDevice = nullptr;

View File

@@ -1615,4 +1615,12 @@ uint32_t DeviceImp::getEventMaxPacketCount() const {
}
return basePackets;
}
uint32_t DeviceImp::getEventMaxKernelCount() const {
const auto &hardwareInfo = this->getHwInfo();
auto &l0GfxCoreHelper = L0GfxCoreHelper::get(hardwareInfo.platform.eRenderCoreFamily);
return l0GfxCoreHelper.getEventMaxKernelCount(hardwareInfo);
}
} // namespace L0

View File

@@ -146,6 +146,7 @@ struct DeviceImp : public Device {
ze_result_t queryDeviceLuid(ze_device_luid_ext_properties_t *deviceLuidProperties);
ze_result_t setDeviceLuid(ze_device_luid_ext_properties_t *deviceLuidProperties);
uint32_t getEventMaxPacketCount() const override;
uint32_t getEventMaxKernelCount() const override;
uint32_t queryDeviceNodeMask();
protected:

View File

@@ -67,6 +67,7 @@ struct DriverHandle : _ze_driver_handle_t {
uintptr_t *gpuAddress) = 0;
virtual ze_result_t fabricVertexGetExp(uint32_t *pCount, ze_fabric_vertex_handle_t *phDevices) = 0;
virtual uint32_t getEventMaxPacketCount(uint32_t numDevices, ze_device_handle_t *deviceHandles) const = 0;
virtual uint32_t getEventMaxKernelCount(uint32_t numDevices, ze_device_handle_t *deviceHandles) const = 0;
static DriverHandle *fromHandle(ze_driver_handle_t handle) { return static_cast<DriverHandle *>(handle); }
inline ze_driver_handle_t toHandle() { return this; }

View File

@@ -764,4 +764,22 @@ uint32_t DriverHandleImp::getEventMaxPacketCount(uint32_t numDevices, ze_device_
return maxCount;
}
uint32_t DriverHandleImp::getEventMaxKernelCount(uint32_t numDevices, ze_device_handle_t *deviceHandles) const {
uint32_t maxCount = 0;
if (numDevices == 0) {
for (auto device : this->devices) {
auto deviceMaxCount = device->getEventMaxKernelCount();
maxCount = std::max(maxCount, deviceMaxCount);
}
} else {
for (uint32_t i = 0; i < numDevices; i++) {
auto deviceMaxCount = Device::fromHandle(deviceHandles[i])->getEventMaxKernelCount();
maxCount = std::max(maxCount, deviceMaxCount);
}
}
return maxCount;
}
} // namespace L0

View File

@@ -83,6 +83,7 @@ struct DriverHandleImp : public DriverHandle {
ze_result_t fabricEdgeGetExp(ze_fabric_vertex_handle_t hVertexA, ze_fabric_vertex_handle_t hVertexB,
uint32_t *pCount, ze_fabric_edge_handle_t *phEdges);
uint32_t getEventMaxPacketCount(uint32_t numDevices, ze_device_handle_t *deviceHandles) const override;
uint32_t getEventMaxKernelCount(uint32_t numDevices, ze_device_handle_t *deviceHandles) const override;
std::unique_ptr<HostPointerManager> hostPointerManager;
// Experimental functions

View File

@@ -168,8 +168,10 @@ void EventPoolImp::initializeSizeParameters(uint32_t numDevices, ze_device_handl
bool useDynamicEventPackets = l0GfxCoreHelper.useDynamicEventPacketsCount(hwInfo);
eventPackets = EventPacketsCount::eventPackets;
maxKernelCount = EventPacketsCount::maxKernelSplit;
if (useDynamicEventPackets) {
eventPackets = driver.getEventMaxPacketCount(numDevices, deviceHandles);
maxKernelCount = driver.getEventMaxKernelCount(numDevices, deviceHandles);
}
setEventSize(static_cast<uint32_t>(alignUp(eventPackets * gfxCoreHelper.getSingleTimestampPacketSize(), eventAlignment)));

View File

@@ -302,6 +302,10 @@ struct EventPool : _ze_event_pool_handle_t {
return false;
}
uint32_t getMaxKernelCount() const {
return maxKernelCount;
}
std::unique_ptr<NEO::MultiGraphicsAllocation> eventPoolAllocations;
ze_event_pool_flags_t eventPoolFlags;
@@ -314,6 +318,7 @@ struct EventPool : _ze_event_pool_handle_t {
uint32_t eventAlignment = 0;
uint32_t eventSize = 0;
uint32_t eventPackets = 0;
uint32_t maxKernelCount = 0;
};
struct EventPoolImp : public EventPool {

View File

@@ -29,13 +29,7 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *
auto &hwInfo = neoDevice->getHardwareInfo();
auto &l0GfxCoreHelper = neoDevice->getRootDeviceEnvironment().getHelper<L0GfxCoreHelper>();
uint32_t maxKernels = EventPacketsCount::maxKernelSplit;
if (l0GfxCoreHelper.useDynamicEventPacketsCount(hwInfo)) {
maxKernels = l0GfxCoreHelper.getEventMaxKernelCount(hwInfo);
}
event->signalAllEventPackets = L0GfxCoreHelper::useSignalAllEventPackets(hwInfo);
event->kernelEventCompletionData = std::make_unique<KernelEventCompletionData<TagSizeT>[]>(maxKernels);
auto alloc = eventPool->getAllocation().getGraphicsAllocation(neoDevice->getRootDeviceIndex());
@@ -46,9 +40,12 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *
event->signalScope = desc->signal;
event->waitScope = desc->wait;
event->csr = csr;
event->maxKernelCount = maxKernels;
event->maxKernelCount = eventPool->getMaxKernelCount();
event->maxPacketCount = eventPool->getEventMaxPackets();
event->kernelEventCompletionData =
std::make_unique<KernelEventCompletionData<TagSizeT>[]>(event->maxKernelCount);
bool useContextEndOffset = l0GfxCoreHelper.multiTileCapablePlatform();
int32_t overrideUseContextEndOffset = NEO::DebugManager.flags.UseContextEndOffsetForEventCompletion.get();
if (overrideUseContextEndOffset != -1) {

View File

@@ -84,6 +84,7 @@ struct Mock<Device> : public Device {
ADDMETHOD_NOBASE_VOIDRETURN(storeReusableAllocation, (NEO::GraphicsAllocation & alloc));
ADDMETHOD_NOBASE(getFabricVertex, ze_result_t, ZE_RESULT_SUCCESS, (ze_fabric_vertex_handle_t * phVertex));
ADDMETHOD_CONST_NOBASE(getEventMaxPacketCount, uint32_t, 8, ())
ADDMETHOD_CONST_NOBASE(getEventMaxKernelCount, uint32_t, 3, ())
DebugSession *createDebugSession(const zet_debug_config_t &config, ze_result_t &result, bool isRootAttach) override {
result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;

View File

@@ -36,6 +36,7 @@ struct Mock<DriverHandle> : public DriverHandleImp {
ADDMETHOD_NOBASE(getHostPointerBaseAddress, ze_result_t, ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, (void *ptr, void **baseAddress))
ADDMETHOD_NOBASE(findHostPointerAllocation, NEO::GraphicsAllocation *, nullptr, (void *ptr, size_t size, uint32_t rootDeviceIndex))
ADDMETHOD_CONST_NOBASE(getEventMaxPacketCount, uint32_t, 8, (uint32_t, ze_device_handle_t *))
ADDMETHOD_CONST_NOBASE(getEventMaxKernelCount, uint32_t, 3, (uint32_t, ze_device_handle_t *))
void setupDevices(std::vector<std::unique_ptr<NEO::Device>> devices);

View File

@@ -1928,10 +1928,8 @@ TEST_F(EventUsedPacketSignalTests, givenEventUseMultiplePacketsWhenHostSignalThe
}
}
HWTEST2_F(EventUsedPacketSignalTests, WhenSettingL3FlushOnEventThenSetOnParticularKernel, IsAtLeastXeHpCore) {
DebugManagerStateRestore restorer;
DebugManager.flags.UsePipeControlMultiKernelEventSync.set(0);
using EventUsedPacketSignalNoCompactionTests = Test<EventUsedPacketSignalFixture<1, 0, 0, 0>>;
HWTEST2_F(EventUsedPacketSignalNoCompactionTests, WhenSettingL3FlushOnEventThenSetOnParticularKernel, IsAtLeastXeHpCore) {
auto event = whiteboxCast(Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
ASSERT_NE(event, nullptr);
EXPECT_FALSE(event->getL3FlushForCurrenKernel());
@@ -2279,16 +2277,9 @@ struct MockEventCompletion : public EventImp<uint32_t> {
MockEventCompletion(L0::EventPool *eventPool, int index, L0::Device *device) : EventImp(eventPool, index, device, false) {
auto neoDevice = device->getNEODevice();
auto &hwInfo = neoDevice->getHardwareInfo();
auto &l0GfxCoreHelper = L0GfxCoreHelper::get(hwInfo.platform.eRenderCoreFamily);
signalAllEventPackets = L0GfxCoreHelper::useSignalAllEventPackets(hwInfo);
uint32_t maxKernels = EventPacketsCount::maxKernelSplit;
if (l0GfxCoreHelper.useDynamicEventPacketsCount(hwInfo)) {
maxKernels = l0GfxCoreHelper.getEventMaxKernelCount(hwInfo);
}
kernelEventCompletionData = std::make_unique<KernelEventCompletionData<uint32_t>[]>(maxKernels);
auto alloc = eventPool->getAllocation().getGraphicsAllocation(neoDevice->getRootDeviceIndex());
uint64_t baseHostAddr = reinterpret_cast<uint64_t>(alloc->getUnderlyingBuffer());
@@ -2297,8 +2288,10 @@ struct MockEventCompletion : public EventImp<uint32_t> {
hostAddress = reinterpret_cast<void *>(baseHostAddr + eventPoolOffset);
csr = neoDevice->getDefaultEngine().commandStreamReceiver;
maxKernelCount = maxKernels;
maxKernelCount = eventPool->getMaxKernelCount();
maxPacketCount = eventPool->getEventMaxPackets();
kernelEventCompletionData = std::make_unique<KernelEventCompletionData<uint32_t>[]>(maxKernelCount);
}
void assignKernelEventCompletionData(void *address) override {
@@ -2442,6 +2435,9 @@ struct EventDynamicPacketUseFixture : public DeviceFixture {
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
ASSERT_NE(nullptr, eventPool);
auto expectedMaxKernelCount = driverHandle->getEventMaxKernelCount(0, nullptr);
EXPECT_EQ(expectedMaxKernelCount, eventPool->getMaxKernelCount());
auto eventPoolMaxPackets = eventPool->getEventMaxPackets();
auto expectedPoolMaxPackets = l0GfxCoreHelper.getEventBaseMaxPacketCount(hwInfo);
if constexpr (multiTile == 1) {
@@ -2465,6 +2461,7 @@ struct EventDynamicPacketUseFixture : public DeviceFixture {
EXPECT_EQ(expectedPoolMaxPackets, event->getMaxPacketsCount());
uint32_t maxKernels = l0GfxCoreHelper.getEventMaxKernelCount(hwInfo);
EXPECT_EQ(expectedMaxKernelCount, maxKernels);
EXPECT_EQ(maxKernels, event->getMaxKernelCount());
}
@@ -2499,6 +2496,9 @@ struct EventDynamicPacketUseFixture : public DeviceFixture {
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
ASSERT_NE(nullptr, eventPool);
auto expectedMaxKernelCount = driverHandle->getEventMaxKernelCount(1, deviceHandles.data());
EXPECT_EQ(expectedMaxKernelCount, eventPool->getMaxKernelCount());
auto eventPoolMaxPackets = eventPool->getEventMaxPackets();
auto expectedPoolMaxPackets = l0GfxCoreHelper.getEventBaseMaxPacketCount(hwInfo);
@@ -2520,6 +2520,7 @@ struct EventDynamicPacketUseFixture : public DeviceFixture {
EXPECT_EQ(expectedPoolMaxPackets, event->getMaxPacketsCount());
uint32_t maxKernels = l0GfxCoreHelper.getEventMaxKernelCount(hwInfo);
EXPECT_EQ(expectedMaxKernelCount, maxKernels);
EXPECT_EQ(maxKernels, event->getMaxKernelCount());
}