mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-26 23:33:20 +08:00
Fix issues in signal all event packets 6/n
This fix unifies setting event properties in event pool, so they are shared betweem all event pool devices. Related-To: NEO-7490 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
148ee669e6
commit
595aa2f626
@@ -139,6 +139,7 @@ struct Device : _ze_device_handle_t {
|
||||
virtual void storeReusableAllocation(NEO::GraphicsAllocation &alloc) = 0;
|
||||
virtual ze_result_t getFabricVertex(ze_fabric_vertex_handle_t *phVertex) = 0;
|
||||
virtual uint32_t getEventMaxPacketCount() const = 0;
|
||||
virtual uint32_t getEventMaxKernelCount() const = 0;
|
||||
|
||||
protected:
|
||||
NEO::Device *neoDevice = nullptr;
|
||||
|
||||
@@ -1615,4 +1615,12 @@ uint32_t DeviceImp::getEventMaxPacketCount() const {
|
||||
}
|
||||
return basePackets;
|
||||
}
|
||||
|
||||
uint32_t DeviceImp::getEventMaxKernelCount() const {
|
||||
const auto &hardwareInfo = this->getHwInfo();
|
||||
auto &l0GfxCoreHelper = L0GfxCoreHelper::get(hardwareInfo.platform.eRenderCoreFamily);
|
||||
|
||||
return l0GfxCoreHelper.getEventMaxKernelCount(hardwareInfo);
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -146,6 +146,7 @@ struct DeviceImp : public Device {
|
||||
ze_result_t queryDeviceLuid(ze_device_luid_ext_properties_t *deviceLuidProperties);
|
||||
ze_result_t setDeviceLuid(ze_device_luid_ext_properties_t *deviceLuidProperties);
|
||||
uint32_t getEventMaxPacketCount() const override;
|
||||
uint32_t getEventMaxKernelCount() const override;
|
||||
uint32_t queryDeviceNodeMask();
|
||||
|
||||
protected:
|
||||
|
||||
@@ -67,6 +67,7 @@ struct DriverHandle : _ze_driver_handle_t {
|
||||
uintptr_t *gpuAddress) = 0;
|
||||
virtual ze_result_t fabricVertexGetExp(uint32_t *pCount, ze_fabric_vertex_handle_t *phDevices) = 0;
|
||||
virtual uint32_t getEventMaxPacketCount(uint32_t numDevices, ze_device_handle_t *deviceHandles) const = 0;
|
||||
virtual uint32_t getEventMaxKernelCount(uint32_t numDevices, ze_device_handle_t *deviceHandles) const = 0;
|
||||
|
||||
static DriverHandle *fromHandle(ze_driver_handle_t handle) { return static_cast<DriverHandle *>(handle); }
|
||||
inline ze_driver_handle_t toHandle() { return this; }
|
||||
|
||||
@@ -764,4 +764,22 @@ uint32_t DriverHandleImp::getEventMaxPacketCount(uint32_t numDevices, ze_device_
|
||||
return maxCount;
|
||||
}
|
||||
|
||||
uint32_t DriverHandleImp::getEventMaxKernelCount(uint32_t numDevices, ze_device_handle_t *deviceHandles) const {
|
||||
uint32_t maxCount = 0;
|
||||
|
||||
if (numDevices == 0) {
|
||||
for (auto device : this->devices) {
|
||||
auto deviceMaxCount = device->getEventMaxKernelCount();
|
||||
maxCount = std::max(maxCount, deviceMaxCount);
|
||||
}
|
||||
} else {
|
||||
for (uint32_t i = 0; i < numDevices; i++) {
|
||||
auto deviceMaxCount = Device::fromHandle(deviceHandles[i])->getEventMaxKernelCount();
|
||||
maxCount = std::max(maxCount, deviceMaxCount);
|
||||
}
|
||||
}
|
||||
|
||||
return maxCount;
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -83,6 +83,7 @@ struct DriverHandleImp : public DriverHandle {
|
||||
ze_result_t fabricEdgeGetExp(ze_fabric_vertex_handle_t hVertexA, ze_fabric_vertex_handle_t hVertexB,
|
||||
uint32_t *pCount, ze_fabric_edge_handle_t *phEdges);
|
||||
uint32_t getEventMaxPacketCount(uint32_t numDevices, ze_device_handle_t *deviceHandles) const override;
|
||||
uint32_t getEventMaxKernelCount(uint32_t numDevices, ze_device_handle_t *deviceHandles) const override;
|
||||
|
||||
std::unique_ptr<HostPointerManager> hostPointerManager;
|
||||
// Experimental functions
|
||||
|
||||
@@ -168,8 +168,10 @@ void EventPoolImp::initializeSizeParameters(uint32_t numDevices, ze_device_handl
|
||||
|
||||
bool useDynamicEventPackets = l0GfxCoreHelper.useDynamicEventPacketsCount(hwInfo);
|
||||
eventPackets = EventPacketsCount::eventPackets;
|
||||
maxKernelCount = EventPacketsCount::maxKernelSplit;
|
||||
if (useDynamicEventPackets) {
|
||||
eventPackets = driver.getEventMaxPacketCount(numDevices, deviceHandles);
|
||||
maxKernelCount = driver.getEventMaxKernelCount(numDevices, deviceHandles);
|
||||
}
|
||||
setEventSize(static_cast<uint32_t>(alignUp(eventPackets * gfxCoreHelper.getSingleTimestampPacketSize(), eventAlignment)));
|
||||
|
||||
|
||||
@@ -302,6 +302,10 @@ struct EventPool : _ze_event_pool_handle_t {
|
||||
return false;
|
||||
}
|
||||
|
||||
uint32_t getMaxKernelCount() const {
|
||||
return maxKernelCount;
|
||||
}
|
||||
|
||||
std::unique_ptr<NEO::MultiGraphicsAllocation> eventPoolAllocations;
|
||||
ze_event_pool_flags_t eventPoolFlags;
|
||||
|
||||
@@ -314,6 +318,7 @@ struct EventPool : _ze_event_pool_handle_t {
|
||||
uint32_t eventAlignment = 0;
|
||||
uint32_t eventSize = 0;
|
||||
uint32_t eventPackets = 0;
|
||||
uint32_t maxKernelCount = 0;
|
||||
};
|
||||
|
||||
struct EventPoolImp : public EventPool {
|
||||
|
||||
@@ -29,13 +29,7 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *
|
||||
auto &hwInfo = neoDevice->getHardwareInfo();
|
||||
auto &l0GfxCoreHelper = neoDevice->getRootDeviceEnvironment().getHelper<L0GfxCoreHelper>();
|
||||
|
||||
uint32_t maxKernels = EventPacketsCount::maxKernelSplit;
|
||||
if (l0GfxCoreHelper.useDynamicEventPacketsCount(hwInfo)) {
|
||||
maxKernels = l0GfxCoreHelper.getEventMaxKernelCount(hwInfo);
|
||||
}
|
||||
|
||||
event->signalAllEventPackets = L0GfxCoreHelper::useSignalAllEventPackets(hwInfo);
|
||||
event->kernelEventCompletionData = std::make_unique<KernelEventCompletionData<TagSizeT>[]>(maxKernels);
|
||||
|
||||
auto alloc = eventPool->getAllocation().getGraphicsAllocation(neoDevice->getRootDeviceIndex());
|
||||
|
||||
@@ -46,9 +40,12 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *
|
||||
event->signalScope = desc->signal;
|
||||
event->waitScope = desc->wait;
|
||||
event->csr = csr;
|
||||
event->maxKernelCount = maxKernels;
|
||||
event->maxKernelCount = eventPool->getMaxKernelCount();
|
||||
event->maxPacketCount = eventPool->getEventMaxPackets();
|
||||
|
||||
event->kernelEventCompletionData =
|
||||
std::make_unique<KernelEventCompletionData<TagSizeT>[]>(event->maxKernelCount);
|
||||
|
||||
bool useContextEndOffset = l0GfxCoreHelper.multiTileCapablePlatform();
|
||||
int32_t overrideUseContextEndOffset = NEO::DebugManager.flags.UseContextEndOffsetForEventCompletion.get();
|
||||
if (overrideUseContextEndOffset != -1) {
|
||||
|
||||
@@ -84,6 +84,7 @@ struct Mock<Device> : public Device {
|
||||
ADDMETHOD_NOBASE_VOIDRETURN(storeReusableAllocation, (NEO::GraphicsAllocation & alloc));
|
||||
ADDMETHOD_NOBASE(getFabricVertex, ze_result_t, ZE_RESULT_SUCCESS, (ze_fabric_vertex_handle_t * phVertex));
|
||||
ADDMETHOD_CONST_NOBASE(getEventMaxPacketCount, uint32_t, 8, ())
|
||||
ADDMETHOD_CONST_NOBASE(getEventMaxKernelCount, uint32_t, 3, ())
|
||||
|
||||
DebugSession *createDebugSession(const zet_debug_config_t &config, ze_result_t &result, bool isRootAttach) override {
|
||||
result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
|
||||
@@ -36,6 +36,7 @@ struct Mock<DriverHandle> : public DriverHandleImp {
|
||||
ADDMETHOD_NOBASE(getHostPointerBaseAddress, ze_result_t, ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, (void *ptr, void **baseAddress))
|
||||
ADDMETHOD_NOBASE(findHostPointerAllocation, NEO::GraphicsAllocation *, nullptr, (void *ptr, size_t size, uint32_t rootDeviceIndex))
|
||||
ADDMETHOD_CONST_NOBASE(getEventMaxPacketCount, uint32_t, 8, (uint32_t, ze_device_handle_t *))
|
||||
ADDMETHOD_CONST_NOBASE(getEventMaxKernelCount, uint32_t, 3, (uint32_t, ze_device_handle_t *))
|
||||
|
||||
void setupDevices(std::vector<std::unique_ptr<NEO::Device>> devices);
|
||||
|
||||
|
||||
@@ -1928,10 +1928,8 @@ TEST_F(EventUsedPacketSignalTests, givenEventUseMultiplePacketsWhenHostSignalThe
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(EventUsedPacketSignalTests, WhenSettingL3FlushOnEventThenSetOnParticularKernel, IsAtLeastXeHpCore) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.UsePipeControlMultiKernelEventSync.set(0);
|
||||
|
||||
using EventUsedPacketSignalNoCompactionTests = Test<EventUsedPacketSignalFixture<1, 0, 0, 0>>;
|
||||
HWTEST2_F(EventUsedPacketSignalNoCompactionTests, WhenSettingL3FlushOnEventThenSetOnParticularKernel, IsAtLeastXeHpCore) {
|
||||
auto event = whiteboxCast(Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||
ASSERT_NE(event, nullptr);
|
||||
EXPECT_FALSE(event->getL3FlushForCurrenKernel());
|
||||
@@ -2279,16 +2277,9 @@ struct MockEventCompletion : public EventImp<uint32_t> {
|
||||
MockEventCompletion(L0::EventPool *eventPool, int index, L0::Device *device) : EventImp(eventPool, index, device, false) {
|
||||
auto neoDevice = device->getNEODevice();
|
||||
auto &hwInfo = neoDevice->getHardwareInfo();
|
||||
auto &l0GfxCoreHelper = L0GfxCoreHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
|
||||
signalAllEventPackets = L0GfxCoreHelper::useSignalAllEventPackets(hwInfo);
|
||||
|
||||
uint32_t maxKernels = EventPacketsCount::maxKernelSplit;
|
||||
if (l0GfxCoreHelper.useDynamicEventPacketsCount(hwInfo)) {
|
||||
maxKernels = l0GfxCoreHelper.getEventMaxKernelCount(hwInfo);
|
||||
}
|
||||
kernelEventCompletionData = std::make_unique<KernelEventCompletionData<uint32_t>[]>(maxKernels);
|
||||
|
||||
auto alloc = eventPool->getAllocation().getGraphicsAllocation(neoDevice->getRootDeviceIndex());
|
||||
|
||||
uint64_t baseHostAddr = reinterpret_cast<uint64_t>(alloc->getUnderlyingBuffer());
|
||||
@@ -2297,8 +2288,10 @@ struct MockEventCompletion : public EventImp<uint32_t> {
|
||||
hostAddress = reinterpret_cast<void *>(baseHostAddr + eventPoolOffset);
|
||||
csr = neoDevice->getDefaultEngine().commandStreamReceiver;
|
||||
|
||||
maxKernelCount = maxKernels;
|
||||
maxKernelCount = eventPool->getMaxKernelCount();
|
||||
maxPacketCount = eventPool->getEventMaxPackets();
|
||||
|
||||
kernelEventCompletionData = std::make_unique<KernelEventCompletionData<uint32_t>[]>(maxKernelCount);
|
||||
}
|
||||
|
||||
void assignKernelEventCompletionData(void *address) override {
|
||||
@@ -2442,6 +2435,9 @@ struct EventDynamicPacketUseFixture : public DeviceFixture {
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
ASSERT_NE(nullptr, eventPool);
|
||||
|
||||
auto expectedMaxKernelCount = driverHandle->getEventMaxKernelCount(0, nullptr);
|
||||
EXPECT_EQ(expectedMaxKernelCount, eventPool->getMaxKernelCount());
|
||||
|
||||
auto eventPoolMaxPackets = eventPool->getEventMaxPackets();
|
||||
auto expectedPoolMaxPackets = l0GfxCoreHelper.getEventBaseMaxPacketCount(hwInfo);
|
||||
if constexpr (multiTile == 1) {
|
||||
@@ -2465,6 +2461,7 @@ struct EventDynamicPacketUseFixture : public DeviceFixture {
|
||||
EXPECT_EQ(expectedPoolMaxPackets, event->getMaxPacketsCount());
|
||||
|
||||
uint32_t maxKernels = l0GfxCoreHelper.getEventMaxKernelCount(hwInfo);
|
||||
EXPECT_EQ(expectedMaxKernelCount, maxKernels);
|
||||
EXPECT_EQ(maxKernels, event->getMaxKernelCount());
|
||||
}
|
||||
|
||||
@@ -2499,6 +2496,9 @@ struct EventDynamicPacketUseFixture : public DeviceFixture {
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
ASSERT_NE(nullptr, eventPool);
|
||||
|
||||
auto expectedMaxKernelCount = driverHandle->getEventMaxKernelCount(1, deviceHandles.data());
|
||||
EXPECT_EQ(expectedMaxKernelCount, eventPool->getMaxKernelCount());
|
||||
|
||||
auto eventPoolMaxPackets = eventPool->getEventMaxPackets();
|
||||
auto expectedPoolMaxPackets = l0GfxCoreHelper.getEventBaseMaxPacketCount(hwInfo);
|
||||
|
||||
@@ -2520,6 +2520,7 @@ struct EventDynamicPacketUseFixture : public DeviceFixture {
|
||||
EXPECT_EQ(expectedPoolMaxPackets, event->getMaxPacketsCount());
|
||||
|
||||
uint32_t maxKernels = l0GfxCoreHelper.getEventMaxKernelCount(hwInfo);
|
||||
EXPECT_EQ(expectedMaxKernelCount, maxKernels);
|
||||
EXPECT_EQ(maxKernels, event->getMaxKernelCount());
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user