Add feature to estimate number of event packets

Related-To: NEO-7469

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2022-10-27 11:40:44 +00:00
committed by Compute-Runtime-Automation
parent 23bb199a15
commit 27d042107a
32 changed files with 545 additions and 76 deletions

View File

@@ -99,6 +99,7 @@ if(SUPPORT_XEHP_AND_LATER)
list(APPEND L0_RUNTIME_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist/cmdlist_hw_xehp_and_later.inl
${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue/cmdqueue_xe_hp_core_and_later.inl
${CMAKE_CURRENT_SOURCE_DIR}/hw_helpers/l0_hw_helper_xehp_and_later.inl
)
endif()

View File

@@ -390,7 +390,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendEventReset(ze_event_hand
}
if (event->isEventTimestampFlagSet()) {
packetsToReset = EventPacketsCount::eventPackets;
packetsToReset = event->getMaxPacketsCount();
}
event->resetPackets();
event->resetCompletion();

View File

@@ -577,12 +577,11 @@ ze_result_t ContextImp::openEventPoolIpcHandle(const ze_ipc_event_pool_handle_t
auto device = Device::fromHandle(this->devices.begin()->second);
auto neoDevice = device->getNEODevice();
NEO::osHandle osHandle = static_cast<NEO::osHandle>(handle);
auto &hwHelper = device->getHwHelper();
const uint32_t eventAlignment = static_cast<uint32_t>(hwHelper.getTimestampPacketAllocatorAlignment());
uint32_t eventSize = static_cast<uint32_t>(alignUp(EventPacketsCount::eventPackets * hwHelper.getSingleTimestampPacketSize(), eventAlignment));
size_t alignedSize = alignUp<size_t>(numEvents * eventSize, MemoryConstants::pageSize64k);
eventPool->initializeSizeParameters(this->numDevices, this->deviceHandles.data(), *this->driverHandle, device->getHwInfo());
NEO::AllocationProperties unifiedMemoryProperties{rootDeviceIndex,
alignedSize,
eventPool->getEventPoolSize(),
NEO::AllocationType::BUFFER_HOST_MEMORY,
systemMemoryBitfield};
@@ -605,8 +604,6 @@ ze_result_t ContextImp::openEventPoolIpcHandle(const ze_ipc_event_pool_handle_t
eventPool->eventPoolPtr = reinterpret_cast<void *>(alloc->getUnderlyingBuffer());
eventPool->devices.push_back(device);
eventPool->isImportedIpcPool = true;
eventPool->setEventSize(eventSize);
eventPool->setEventAlignment(eventAlignment);
for (auto currDeviceIndex : this->rootDeviceIndices) {
if (currDeviceIndex == rootDeviceIndex) {

View File

@@ -149,11 +149,20 @@ struct ContextImp : Context {
bool isShareableMemory(const void *exportDesc, bool exportableMemory, NEO::Device *neoDevice) override;
void *getMemHandlePtr(ze_device_handle_t hDevice, uint64_t handle, ze_ipc_memory_flags_t flags) override;
void initDeviceHandles(uint32_t numDevices, ze_device_handle_t *deviceHandles) {
this->numDevices = numDevices;
if (numDevices > 0) {
this->deviceHandles.assign(deviceHandles, deviceHandles + numDevices);
}
}
protected:
bool isAllocationSuitableForCompression(const StructuresLookupTable &structuresLookupTable, Device &device, size_t allocSize);
std::map<uint32_t, ze_device_handle_t> devices;
std::vector<ze_device_handle_t> deviceHandles;
DriverHandleImp *driverHandle = nullptr;
uint32_t numDevices = 0;
};
} // namespace L0

View File

@@ -136,6 +136,7 @@ struct Device : _ze_device_handle_t {
virtual NEO::GraphicsAllocation *obtainReusableAllocation(size_t requiredSize, NEO::AllocationType type) = 0;
virtual void storeReusableAllocation(NEO::GraphicsAllocation &alloc) = 0;
virtual ze_result_t getFabricVertex(ze_fabric_vertex_handle_t *phVertex) = 0;
virtual uint32_t getEventMaxPacketCount() const = 0;
protected:
NEO::Device *neoDevice = nullptr;

View File

@@ -1515,4 +1515,14 @@ ze_result_t DeviceImp::getFabricVertex(ze_fabric_vertex_handle_t *phVertex) {
return ZE_RESULT_SUCCESS;
}
uint32_t DeviceImp::getEventMaxPacketCount() const {
const auto &hardwareInfo = this->getHwInfo();
auto &l0HwHelper = L0HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
uint32_t basePackets = l0HwHelper.getEventBaseMaxPacketCount(hardwareInfo);
if (this->isImplicitScalingCapable()) {
basePackets *= static_cast<uint32_t>(neoDevice->getDeviceBitfield().count());
}
return basePackets;
}
} // namespace L0

View File

@@ -141,6 +141,7 @@ struct DeviceImp : public Device {
ze_result_t queryDeviceLuid(ze_device_luid_ext_properties_t *deviceLuidProperties);
ze_result_t setDeviceLuid(ze_device_luid_ext_properties_t *deviceLuidProperties);
uint32_t getEventMaxPacketCount() const override;
protected:
void adjustCommandQueueDesc(uint32_t &ordinal, uint32_t &index);

View File

@@ -66,6 +66,7 @@ struct DriverHandle : _ze_driver_handle_t {
uint32_t rootDeviceIndex,
uintptr_t *gpuAddress) = 0;
virtual ze_result_t fabricVertexGetExp(uint32_t *pCount, ze_fabric_vertex_handle_t *phDevices) = 0;
virtual uint32_t getEventMaxPacketCount(uint32_t numDevices, ze_device_handle_t *deviceHandles) const = 0;
static DriverHandle *fromHandle(ze_driver_handle_t handle) { return static_cast<DriverHandle *>(handle); }
inline ze_driver_handle_t toHandle() { return this; }

View File

@@ -58,7 +58,7 @@ ze_result_t DriverHandleImp::createContext(const ze_context_desc_t *desc,
}
*phContext = context->toHandle();
context->initDeviceHandles(numDevices, phDevices);
if (numDevices == 0) {
for (auto device : this->devices) {
auto neoDevice = device->getNEODevice();
@@ -704,4 +704,22 @@ ze_result_t DriverHandleImp::fabricEdgeGetExp(ze_fabric_vertex_handle_t hVertexA
return ZE_RESULT_SUCCESS;
}
uint32_t DriverHandleImp::getEventMaxPacketCount(uint32_t numDevices, ze_device_handle_t *deviceHandles) const {
uint32_t maxCount = 0;
if (numDevices == 0) {
for (auto device : this->devices) {
auto deviceMaxCount = device->getEventMaxPacketCount();
maxCount = std::max(maxCount, deviceMaxCount);
}
} else {
for (uint32_t i = 0; i < numDevices; i++) {
auto deviceMaxCount = Device::fromHandle(deviceHandles[i])->getEventMaxPacketCount();
maxCount = std::max(maxCount, deviceMaxCount);
}
}
return maxCount;
}
} // namespace L0

View File

@@ -82,6 +82,7 @@ struct DriverHandleImp : public DriverHandle {
Device *device);
ze_result_t fabricEdgeGetExp(ze_fabric_vertex_handle_t hVertexA, ze_fabric_vertex_handle_t hVertexB,
uint32_t *pCount, ze_fabric_edge_handle_t *phEdges);
uint32_t getEventMaxPacketCount(uint32_t numDevices, ze_device_handle_t *deviceHandles) const override;
std::unique_ptr<HostPointerManager> hostPointerManager;
// Experimental functions

View File

@@ -43,17 +43,18 @@ ze_result_t EventPoolImp::initialize(DriverHandle *driver, Context *context, uin
RootDeviceIndicesContainer rootDeviceIndices;
uint32_t maxRootDeviceIndex = 0u;
uint32_t currentNumDevices = numDevices;
DriverHandleImp *driverHandleImp = static_cast<DriverHandleImp *>(driver);
bool useDevicesFromApi = true;
bool useDeviceAlloc = isEventPoolDeviceAllocationFlagSet();
if (numDevices == 0) {
numDevices = static_cast<uint32_t>(driverHandleImp->devices.size());
currentNumDevices = static_cast<uint32_t>(driverHandleImp->devices.size());
useDevicesFromApi = false;
}
for (uint32_t i = 0u; i < numDevices; i++) {
for (uint32_t i = 0u; i < currentNumDevices; i++) {
Device *eventDevice = nullptr;
if (useDevicesFromApi) {
@@ -74,14 +75,11 @@ ze_result_t EventPoolImp::initialize(DriverHandle *driver, Context *context, uin
}
rootDeviceIndices.remove_duplicates();
auto &hwHelper = devices[0]->getHwHelper();
auto &hwInfo = getDevice()->getHwInfo();
useDeviceAlloc |= L0HwHelper::get(hwInfo.platform.eRenderCoreFamily).alwaysAllocateEventInLocalMem();
useDeviceAlloc |= L0HwHelper::get(getDevice()->getHwInfo().platform.eRenderCoreFamily).alwaysAllocateEventInLocalMem();
initializeSizeParameters(numDevices, phDevices, *driverHandleImp, hwInfo);
eventAlignment = static_cast<uint32_t>(hwHelper.getTimestampPacketAllocatorAlignment());
eventSize = static_cast<uint32_t>(alignUp(EventPacketsCount::eventPackets * hwHelper.getSingleTimestampPacketSize(), eventAlignment));
size_t alignedSize = alignUp<size_t>(numEvents * eventSize, MemoryConstants::pageSize64k);
NEO::AllocationType allocationType = isEventPoolTimestampFlagSet() ? NEO::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER
: NEO::AllocationType::BUFFER_HOST_MEMORY;
if (this->devices.size() > 1) {
@@ -97,7 +95,7 @@ ze_result_t EventPoolImp::initialize(DriverHandle *driver, Context *context, uin
bool allocatedMemory = false;
if (useDeviceAlloc) {
NEO::AllocationProperties allocationProperties{*rootDeviceIndices.begin(), alignedSize, allocationType, devices[0]->getNEODevice()->getDeviceBitfield()};
NEO::AllocationProperties allocationProperties{*rootDeviceIndices.begin(), this->eventPoolSize, allocationType, devices[0]->getNEODevice()->getDeviceBitfield()};
allocationProperties.alignment = eventAlignment;
if (eventPoolFlags & ZE_EVENT_POOL_FLAG_IPC) {
this->isShareableEventMemory = true;
@@ -110,7 +108,7 @@ ze_result_t EventPoolImp::initialize(DriverHandle *driver, Context *context, uin
}
} else {
NEO::AllocationProperties allocationProperties{*rootDeviceIndices.begin(), alignedSize, allocationType, systemMemoryBitfield};
NEO::AllocationProperties allocationProperties{*rootDeviceIndices.begin(), this->eventPoolSize, allocationType, systemMemoryBitfield};
allocationProperties.alignment = eventAlignment;
eventPoolPtr = driver->getMemoryManager()->createMultiGraphicsAllocationInSystemMemoryPool(rootDeviceIndices,
@@ -158,6 +156,22 @@ ze_result_t EventPoolImp::createEvent(const ze_event_desc_t *desc, ze_event_hand
return ZE_RESULT_SUCCESS;
}
void EventPoolImp::initializeSizeParameters(uint32_t numDevices, ze_device_handle_t *deviceHandles, DriverHandleImp &driver, const NEO::HardwareInfo &hwInfo) {
auto &l0HwHelper = L0HwHelper::get(hwInfo.platform.eRenderCoreFamily);
auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
setEventAlignment(static_cast<uint32_t>(hwHelper.getTimestampPacketAllocatorAlignment()));
bool useDynamicEventPackets = l0HwHelper.useDynamicEventPacketsCount(hwInfo);
eventPackets = EventPacketsCount::eventPackets;
if (useDynamicEventPackets) {
eventPackets = driver.getEventMaxPacketCount(numDevices, deviceHandles);
}
setEventSize(static_cast<uint32_t>(alignUp(eventPackets * hwHelper.getSingleTimestampPacketSize(), eventAlignment)));
eventPoolSize = alignUp<size_t>(this->numEvents * eventSize, MemoryConstants::pageSize64k);
}
ze_result_t Event::destroy() {
delete this;
return ZE_RESULT_SUCCESS;

View File

@@ -26,6 +26,7 @@ struct MetricStreamer;
struct ContextImp;
struct Context;
struct DriverHandle;
struct DriverHandleImp;
struct Device;
namespace EventPacketsCount {
@@ -104,7 +105,7 @@ struct Event : _ze_event_handle_t {
void increaseKernelCount() {
kernelCount++;
UNRECOVERABLE_IF(kernelCount > EventPacketsCount::maxKernelSplit);
UNRECOVERABLE_IF(kernelCount > maxKernelCount);
}
uint32_t getKernelCount() const {
return kernelCount;
@@ -123,6 +124,16 @@ struct Event : _ze_event_handle_t {
this->isCompleted = false;
}
uint32_t getMaxPacketsCount() const {
return maxPacketCount;
}
void setMaxKernelCount(uint32_t value) {
maxKernelCount = value;
}
uint32_t getMaxKernelCount() const {
return maxKernelCount;
}
uint64_t globalStartTS;
uint64_t globalEndTS;
uint64_t contextStartTS;
@@ -152,7 +163,9 @@ struct Event : _ze_event_handle_t {
size_t gpuStartTimestamp = 0u;
size_t gpuEndTimestamp = 0u;
uint32_t maxKernelCount = 0;
uint32_t kernelCount = 1u;
uint32_t maxPacketCount = 0;
bool isTimestampEvent = false;
bool usingContextEndOffset = false;
@@ -286,6 +299,9 @@ struct EventPoolImp : public EventPool {
void setEventSize(uint32_t size) override { eventSize = size; }
void setEventAlignment(uint32_t alignment) override { eventAlignment = alignment; }
size_t getNumEvents() { return numEvents; }
uint32_t getEventMaxPackets() { return eventPackets; }
size_t getEventPoolSize() const { return eventPoolSize; }
void initializeSizeParameters(uint32_t numDevices, ze_device_handle_t *deviceHandles, DriverHandleImp &driver, const NEO::HardwareInfo &hwInfo);
Device *getDevice() override { return devices[0]; }
@@ -297,8 +313,10 @@ struct EventPoolImp : public EventPool {
bool isShareableEventMemory = false;
protected:
size_t eventPoolSize = 0;
uint32_t eventAlignment = 0;
uint32_t eventSize = 0;
uint32_t eventPackets = 0;
};
} // namespace L0

View File

@@ -22,7 +22,15 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *
event->setEventTimestampFlag(true);
}
auto neoDevice = device->getNEODevice();
event->kernelEventCompletionData = std::make_unique<KernelEventCompletionData<TagSizeT>[]>(EventPacketsCount::maxKernelSplit);
auto &hwInfo = neoDevice->getHardwareInfo();
auto &l0HwHelper = L0HwHelper::get(hwInfo.platform.eRenderCoreFamily);
uint32_t maxKernels = EventPacketsCount::maxKernelSplit;
if (l0HwHelper.useDynamicEventPacketsCount(hwInfo)) {
maxKernels = l0HwHelper.getEventMaxKernelCount(hwInfo);
}
event->kernelEventCompletionData = std::make_unique<KernelEventCompletionData<TagSizeT>[]>(maxKernels);
auto alloc = eventPool->getAllocation().getGraphicsAllocation(neoDevice->getRootDeviceIndex());
@@ -32,7 +40,9 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *
event->signalScope = desc->signal;
event->waitScope = desc->wait;
event->csr = neoDevice->getDefaultEngine().commandStreamReceiver;
bool useContextEndOffset = L0HwHelper::get(neoDevice->getHardwareInfo().platform.eRenderCoreFamily).multiTileCapablePlatform();
event->maxKernelCount = maxKernels;
event->maxPacketCount = static_cast<EventPoolImp *>(eventPool)->getEventMaxPackets();
bool useContextEndOffset = l0HwHelper.multiTileCapablePlatform();
int32_t overrideUseContextEndOffset = NEO::DebugManager.flags.UseContextEndOffsetForEventCompletion.get();
if (overrideUseContextEndOffset != -1) {
useContextEndOffset = !!overrideUseContextEndOffset;
@@ -293,7 +303,7 @@ ze_result_t EventImp<TagSizeT>::reset() {
template <typename TagSizeT>
void EventImp<TagSizeT>::resetDeviceCompletionData() {
this->kernelCount = EventPacketsCount::maxKernelSplit;
this->kernelCount = this->maxKernelCount;
for (uint32_t i = 0; i < kernelCount; i++) {
this->kernelEventCompletionData[i].setPacketsUsed(NEO::TimestampPacketSizeControl::preferredPacketCount);
}

View File

@@ -60,4 +60,11 @@ bool L0HwHelper::useCompactL3FlushEventPacket(const NEO::HardwareInfo &hwInfo) {
return false;
}
bool L0HwHelper::useDynamicEventPacketsCount(const NEO::HardwareInfo &hwInfo) {
if (NEO::DebugManager.flags.UseDynamicEventPacketsCount.get() != -1) {
return !!NEO::DebugManager.flags.UseDynamicEventPacketsCount.get();
}
return false;
}
} // namespace L0

View File

@@ -36,6 +36,7 @@ class L0HwHelper {
static bool enableImmediateCmdListHeapSharing(const NEO::HardwareInfo &hwInfo, bool cmdlistSupport);
static bool usePipeControlMultiKernelEventSync(const NEO::HardwareInfo &hwInfo);
static bool useCompactL3FlushEventPacket(const NEO::HardwareInfo &hwInfo);
static bool useDynamicEventPacketsCount(const NEO::HardwareInfo &hwInfo);
virtual void setAdditionalGroupProperty(ze_command_queue_group_properties_t &groupProperty, NEO::EngineGroupT &group) const = 0;
virtual L0::Event *createEvent(L0::EventPool *eventPool, const ze_event_desc_t *desc, L0::Device *device) const = 0;
@@ -53,6 +54,9 @@ class L0HwHelper {
virtual bool platformSupportsFrontEndTracking(const NEO::HardwareInfo &hwInfo) const = 0;
virtual bool platformSupportsPipelineSelectTracking(const NEO::HardwareInfo &hwInfo) const = 0;
virtual uint32_t getEventMaxKernelCount(const NEO::HardwareInfo &hwInfo) const = 0;
virtual uint32_t getEventBaseMaxPacketCount(const NEO::HardwareInfo &hwInfo) const = 0;
protected:
L0HwHelper() = default;
};
@@ -80,6 +84,9 @@ class L0HwHelperHw : public L0HwHelper {
bool platformSupportsStateComputeModeTracking(const NEO::HardwareInfo &hwInfo) const override;
bool platformSupportsFrontEndTracking(const NEO::HardwareInfo &hwInfo) const override;
bool platformSupportsPipelineSelectTracking(const NEO::HardwareInfo &hwInfo) const override;
uint32_t getEventMaxKernelCount(const NEO::HardwareInfo &hwInfo) const override;
uint32_t getEventBaseMaxPacketCount(const NEO::HardwareInfo &hwInfo) const override;
};
} // namespace L0

View File

@@ -38,24 +38,4 @@ void L0HwHelperHw<Family>::setAdditionalGroupProperty(ze_command_queue_group_pro
}
}
template <typename Family>
bool L0HwHelperHw<Family>::platformSupportsCmdListHeapSharing(const NEO::HardwareInfo &hwInfo) const {
return false;
}
template <typename Family>
bool L0HwHelperHw<Family>::platformSupportsStateComputeModeTracking(const NEO::HardwareInfo &hwInfo) const {
return false;
}
template <typename Family>
bool L0HwHelperHw<Family>::platformSupportsFrontEndTracking(const NEO::HardwareInfo &hwInfo) const {
return false;
}
template <typename Family>
bool L0HwHelperHw<Family>::platformSupportsPipelineSelectTracking(const NEO::HardwareInfo &hwInfo) const {
return false;
}
} // namespace L0

View File

@@ -38,4 +38,14 @@ bool L0HwHelperHw<Family>::platformSupportsPipelineSelectTracking(const NEO::Har
return false;
}
template <typename Family>
uint32_t L0HwHelperHw<Family>::getEventMaxKernelCount(const NEO::HardwareInfo &hwInfo) const {
return 1;
}
template <typename Family>
uint32_t L0HwHelperHw<Family>::getEventBaseMaxPacketCount(const NEO::HardwareInfo &hwInfo) const {
return 1u;
}
} // namespace L0

View File

@@ -0,0 +1,58 @@
/*
* Copyright (C) 2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/helpers/hw_helper.h"
#include "level_zero/core/source/hw_helpers/l0_hw_helper.h"
namespace L0 {
template <typename Family>
bool L0HwHelperHw<Family>::multiTileCapablePlatform() const {
return false;
}
template <typename Family>
bool L0HwHelperHw<Family>::platformSupportsCmdListHeapSharing(const NEO::HardwareInfo &hwInfo) const {
return false;
}
template <typename Family>
bool L0HwHelperHw<Family>::platformSupportsStateComputeModeTracking(const NEO::HardwareInfo &hwInfo) const {
return false;
}
template <typename Family>
bool L0HwHelperHw<Family>::platformSupportsFrontEndTracking(const NEO::HardwareInfo &hwInfo) const {
return false;
}
template <typename Family>
bool L0HwHelperHw<Family>::platformSupportsPipelineSelectTracking(const NEO::HardwareInfo &hwInfo) const {
return false;
}
template <typename Family>
uint32_t L0HwHelperHw<Family>::getEventMaxKernelCount(const NEO::HardwareInfo &hwInfo) const {
uint32_t kernelCount = EventPacketsCount::maxKernelSplit;
if (L0HwHelper::usePipeControlMultiKernelEventSync(hwInfo)) {
kernelCount = 1;
}
return kernelCount;
}
template <typename Family>
uint32_t L0HwHelperHw<Family>::getEventBaseMaxPacketCount(const NEO::HardwareInfo &hwInfo) const {
uint32_t basePackets = getEventMaxKernelCount(hwInfo);
if (NEO::MemorySynchronizationCommands<Family>::getDcFlushEnable(true, hwInfo)) {
basePackets += L0HwHelper::useCompactL3FlushEventPacket(hwInfo) ? 0 : 1;
}
return basePackets;
}
} // namespace L0

View File

@@ -9,7 +9,7 @@
#include "level_zero/core/source/helpers/l0_populate_factory.h"
#include "level_zero/core/source/hw_helpers/l0_hw_helper_base.inl"
#include "level_zero/core/source/hw_helpers/l0_hw_helper_skl_and_later.inl"
#include "level_zero/core/source/hw_helpers/l0_hw_helper_xehp_and_later.inl"
namespace L0 {
@@ -32,6 +32,10 @@ bool L0HwHelperHw<Family>::multiTileCapablePlatform() const {
return true;
}
template <>
void L0HwHelperHw<Family>::setAdditionalGroupProperty(ze_command_queue_group_properties_t &groupProperty, NEO::EngineGroupT &group) const {
}
template <>
bool L0HwHelperHw<Family>::platformSupportsPipelineSelectTracking(const NEO::HardwareInfo &hwInfo) const {
return true;

View File

@@ -10,6 +10,7 @@
#include "level_zero/core/source/helpers/l0_populate_factory.h"
#include "level_zero/core/source/hw_helpers/l0_hw_helper_base.inl"
#include "level_zero/core/source/hw_helpers/l0_hw_helper_pvc_and_later.inl"
#include "level_zero/core/source/hw_helpers/l0_hw_helper_xehp_and_later.inl"
namespace L0 {

View File

@@ -9,7 +9,7 @@
#include "level_zero/core/source/helpers/l0_populate_factory.h"
#include "level_zero/core/source/hw_helpers/l0_hw_helper_base.inl"
#include "level_zero/core/source/hw_helpers/l0_hw_helper_skl_and_later.inl"
#include "level_zero/core/source/hw_helpers/l0_hw_helper_xehp_and_later.inl"
namespace L0 {
@@ -27,6 +27,10 @@ bool L0HwHelperHw<Family>::isResumeWARequired() {
return true;
}
template <>
void L0HwHelperHw<Family>::setAdditionalGroupProperty(ze_command_queue_group_properties_t &groupProperty, NEO::EngineGroupT &group) const {
}
template <>
bool L0HwHelperHw<Family>::platformSupportsCmdListHeapSharing(const NEO::HardwareInfo &hwInfo) const {
return true;

View File

@@ -83,6 +83,7 @@ struct Mock<Device> : public Device {
ADDMETHOD_NOBASE(obtainReusableAllocation, NEO::GraphicsAllocation *, nullptr, (size_t requiredSize, NEO::AllocationType type))
ADDMETHOD_NOBASE_VOIDRETURN(storeReusableAllocation, (NEO::GraphicsAllocation & alloc));
ADDMETHOD_NOBASE(getFabricVertex, ze_result_t, ZE_RESULT_SUCCESS, (ze_fabric_vertex_handle_t * phVertex));
ADDMETHOD_CONST_NOBASE(getEventMaxPacketCount, uint32_t, 8, ())
DebugSession *createDebugSession(const zet_debug_config_t &config, ze_result_t &result, bool isRootAttach) override {
result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;

View File

@@ -35,6 +35,7 @@ struct Mock<DriverHandle> : public DriverHandleImp {
ADDMETHOD_NOBASE(releaseImportedPointer, ze_result_t, ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, (void *ptr))
ADDMETHOD_NOBASE(getHostPointerBaseAddress, ze_result_t, ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, (void *ptr, void **baseAddress))
ADDMETHOD_NOBASE(findHostPointerAllocation, NEO::GraphicsAllocation *, nullptr, (void *ptr, size_t size, uint32_t rootDeviceIndex))
ADDMETHOD_CONST_NOBASE(getEventMaxPacketCount, uint32_t, 8, (uint32_t, ze_device_handle_t *))
void setupDevices(std::vector<std::unique_ptr<NEO::Device>> devices);

View File

@@ -22,6 +22,7 @@ struct WhiteBox<::L0::Event> : public ::L0::Event {
using BaseClass::csr;
using BaseClass::hostAddress;
using BaseClass::l3FlushAppliedOnKernel;
using BaseClass::maxKernelCount;
};
using Event = WhiteBox<::L0::Event>;
@@ -71,6 +72,7 @@ class MockEvent : public ::L0::Event {
using ::L0::Event::gpuStartTimestamp;
using ::L0::Event::isCompleted;
using ::L0::Event::l3FlushAppliedOnKernel;
using ::L0::Event::maxKernelCount;
MockEvent() {
mockAllocation.reset(new NEO::MockGraphicsAllocation(0,

View File

@@ -11,6 +11,7 @@
#include "shared/test/common/test_macros/hw_test.h"
#include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h"
#include "level_zero/core/source/hw_helpers/l0_hw_helper.h"
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
@@ -91,16 +92,29 @@ HWTEST_F(CommandListAppendEventReset, givenCmdlistWhenResetEventWithTimeStampIsA
gpuAddress += event->getContextEndOffset();
}
auto itorSdi = findAll<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
uint32_t sdiFound = 0;
ASSERT_NE(0u, itorSdi.size());
for (auto it : itorSdi) {
auto cmd = genCmdCast<MI_STORE_DATA_IMM *>(*it);
EXPECT_EQ(gpuAddress, cmd->getAddress());
gpuAddress += event->getSinglePacketSize();
sdiFound++;
auto &hwInfo = device->getHwInfo();
auto &l0HwHelper = L0HwHelper::get(hwInfo.platform.eRenderCoreFamily);
uint32_t maxPackets = EventPacketsCount::eventPackets;
if (l0HwHelper.useDynamicEventPacketsCount(hwInfo)) {
maxPackets = l0HwHelper.getEventBaseMaxPacketCount(hwInfo);
}
auto itorSdi = findAll<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
if (maxPackets == 1) {
EXPECT_EQ(0u, itorSdi.size());
} else {
uint32_t sdiFound = 0;
ASSERT_NE(0u, itorSdi.size());
for (auto it : itorSdi) {
auto cmd = genCmdCast<MI_STORE_DATA_IMM *>(*it);
EXPECT_EQ(gpuAddress, cmd->getAddress());
gpuAddress += event->getSinglePacketSize();
sdiFound++;
}
EXPECT_EQ(EventPacketsCount::eventPackets - 1, sdiFound);
}
EXPECT_EQ(EventPacketsCount::eventPackets - 1, sdiFound);
uint32_t postSyncFound = 0;
for (auto it : itorPC) {
@@ -216,6 +230,9 @@ HWTEST2_F(CommandListAppendEventReset, givenTimestampEventUsedInResetThenPipeCon
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
auto &commandContainer = commandList->commandContainer;
auto &hwInfo = device->getHwInfo();
auto &l0HwHelper = L0HwHelper::get(hwInfo.platform.eRenderCoreFamily);
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
@@ -234,7 +251,12 @@ HWTEST2_F(CommandListAppendEventReset, givenTimestampEventUsedInResetThenPipeCon
auto contextOffset = event->getContextEndOffset();
auto baseAddr = event->getGpuAddress(device);
auto gpuAddress = ptrOffset(baseAddr, contextOffset);
gpuAddress += ((EventPacketsCount::eventPackets - 1) * event->getSinglePacketSize());
uint32_t maxPackets = EventPacketsCount::eventPackets;
if (l0HwHelper.useDynamicEventPacketsCount(hwInfo)) {
maxPackets = l0HwHelper.getEventBaseMaxPacketCount(hwInfo);
}
gpuAddress += ((maxPackets - 1) * event->getSinglePacketSize());
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(

View File

@@ -217,6 +217,8 @@ HWTEST_F(CommandListAppendWaitOnEvent, WhenAppendingWaitOnTimestampEventWithThre
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
event->setMaxKernelCount(3u);
event->setPacketsInUse(3u);
event->increaseKernelCount();
event->setPacketsInUse(3u);

View File

@@ -158,10 +158,18 @@ HWTEST_F(EventPoolCreate, givenTimestampEventsThenEventSizeSufficientForAllKerne
std::unique_ptr<L0::EventPool> eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
ASSERT_NE(nullptr, eventPool);
uint32_t maxKernelSplit = 3;
uint32_t packetsSize = maxKernelSplit * NEO::TimestampPacketSizeControl::preferredPacketCount *
auto &hwInfo = device->getHwInfo();
auto &l0HwHelper = L0HwHelper::get(hwInfo.platform.eRenderCoreFamily);
auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
uint32_t maxPacketCount = EventPacketsCount::maxKernelSplit * NEO::TimestampPacketSizeControl::preferredPacketCount;
if (l0HwHelper.useDynamicEventPacketsCount(hwInfo)) {
maxPacketCount = l0HwHelper.getEventBaseMaxPacketCount(hwInfo);
}
uint32_t packetsSize = maxPacketCount *
static_cast<uint32_t>(NEO::TimestampPackets<typename FamilyType::TimestampPacketType>::getSinglePacketSize());
uint32_t kernelTimestampsSize = static_cast<uint32_t>(alignUp(packetsSize, 4 * MemoryConstants::cacheLineSize));
uint32_t kernelTimestampsSize = static_cast<uint32_t>(alignUp(packetsSize, hwHelper.getTimestampPacketAllocatorAlignment()));
EXPECT_EQ(kernelTimestampsSize, eventPool->getEventSize());
}
@@ -661,7 +669,8 @@ TEST_F(EventCreate, givenEventWhenSignaledAndResetFromTheHostThenCorrectDataAndO
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
ASSERT_NE(nullptr, eventPool);
auto &l0HwHelper = L0HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily);
auto &hwInfo = device->getHwInfo();
auto &l0HwHelper = L0HwHelper::get(hwInfo.platform.eRenderCoreFamily);
auto event = std::unique_ptr<L0::Event>(l0HwHelper.createEvent(eventPool.get(), &eventDesc, device));
ASSERT_NE(nullptr, event);
@@ -676,6 +685,10 @@ TEST_F(EventCreate, givenEventWhenSignaledAndResetFromTheHostThenCorrectDataAndO
eventCompletionMemory = ptrOffset(eventCompletionMemory, event->getContextEndOffset());
}
uint32_t maxPacketsCount = EventPacketsCount::maxKernelSplit * NEO::TimestampPacketSizeControl::preferredPacketCount;
if (l0HwHelper.useDynamicEventPacketsCount(hwInfo)) {
maxPacketsCount = l0HwHelper.getEventBaseMaxPacketCount(hwInfo);
}
for (uint32_t i = 0; i < maxPacketsCount; i++) {
EXPECT_EQ(Event::STATE_INITIAL, *eventCompletionMemory);
eventCompletionMemory = ptrOffset(eventCompletionMemory, event->getSinglePacketSize());
@@ -1142,9 +1155,9 @@ struct EventCreateAllocationResidencyTest : public ::testing::Test {
L0::Device *device = nullptr;
};
class TimestampEventCreate : public Test<DeviceFixture> {
class TimestampEventCreateFixture : public DeviceFixture {
public:
void SetUp() override {
void setUp() {
DeviceFixture::setUp();
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
@@ -1163,7 +1176,7 @@ class TimestampEventCreate : public Test<DeviceFixture> {
ASSERT_NE(nullptr, event);
}
void TearDown() override {
void tearDown() {
event.reset(nullptr);
eventPool.reset(nullptr);
DeviceFixture::tearDown();
@@ -1173,13 +1186,33 @@ class TimestampEventCreate : public Test<DeviceFixture> {
std::unique_ptr<L0::EventImp<uint32_t>> event;
};
struct TimestampEventCreateMultiKernelFixture : public TimestampEventCreateFixture {
void setUp() {
DebugManager.flags.UsePipeControlMultiKernelEventSync.set(0);
TimestampEventCreateFixture::setUp();
}
DebugManagerStateRestore restorer;
};
using TimestampEventCreate = Test<TimestampEventCreateFixture>;
using TimestampEventCreateMultiKernel = Test<TimestampEventCreateMultiKernelFixture>;
TEST_F(TimestampEventCreate, givenEventCreatedWithTimestampThenIsTimestampEventFlagSet) {
EXPECT_TRUE(event->isEventTimestampFlagSet());
}
TEST_F(TimestampEventCreate, givenEventTimestampsCreatedWhenResetIsInvokeThenCorrectDataAreSet) {
auto &hwInfo = device->getHwInfo();
auto &l0HwHelper = L0HwHelper::get(hwInfo.platform.eRenderCoreFamily);
uint32_t maxKernelCount = EventPacketsCount::maxKernelSplit;
if (l0HwHelper.useDynamicEventPacketsCount(hwInfo)) {
maxKernelCount = l0HwHelper.getEventMaxKernelCount(hwInfo);
}
EXPECT_NE(nullptr, event->kernelEventCompletionData);
for (auto j = 0u; j < EventPacketsCount::maxKernelSplit; j++) {
for (auto j = 0u; j < maxKernelCount; j++) {
for (auto i = 0u; i < NEO::TimestampPacketSizeControl::preferredPacketCount; i++) {
EXPECT_EQ(static_cast<uint64_t>(Event::State::STATE_INITIAL), event->kernelEventCompletionData[j].getContextStartValue(i));
EXPECT_EQ(static_cast<uint64_t>(Event::State::STATE_INITIAL), event->kernelEventCompletionData[j].getGlobalStartValue(i));
@@ -1212,7 +1245,7 @@ TEST_F(TimestampEventCreate, givenTimestampEventThenAllocationsIsDependentIfAllo
}
}
TEST_F(TimestampEventCreate, givenEventTimestampWhenPacketCountIsSetThenCorrectOffsetIsReturned) {
HWTEST2_F(TimestampEventCreateMultiKernel, givenEventTimestampWhenPacketCountIsSetThenCorrectOffsetIsReturned, IsAtLeastXeHpCore) {
EXPECT_EQ(1u, event->getPacketsInUse());
auto gpuAddr = event->getGpuAddress(device);
EXPECT_EQ(gpuAddr, event->getPacketAddress(device));
@@ -1242,7 +1275,7 @@ TEST_F(TimestampEventCreate, givenEventWhenSignaledAndResetFromTheHostThenCorrec
event->reset();
result = event->queryStatus();
EXPECT_EQ(ZE_RESULT_NOT_READY, result);
for (auto j = 0u; j < EventPacketsCount::maxKernelSplit; j++) {
for (auto j = 0u; j < event->getKernelCount(); j++) {
for (auto i = 0u; i < NEO::TimestampPacketSizeControl::preferredPacketCount; i++) {
EXPECT_EQ(Event::State::STATE_INITIAL, event->kernelEventCompletionData[j].getContextStartValue(i));
EXPECT_EQ(Event::State::STATE_INITIAL, event->kernelEventCompletionData[j].getGlobalStartValue(i));
@@ -1446,8 +1479,9 @@ TEST_F(TimestampEventCreate, givenEventWhenQueryingTimestampExpThenCorrectDataSe
}
}
TEST_F(TimestampEventCreate, givenTimeStampEventUsedOnTwoKernelsWhenL3FlushSetOnFirstKernelThenDoNotUseSecondPacketOfFirstKernel) {
HWTEST2_F(TimestampEventCreateMultiKernel, givenTimeStampEventUsedOnTwoKernelsWhenL3FlushSetOnFirstKernelThenDoNotUseSecondPacketOfFirstKernel, IsAtLeastXeHpCore) {
typename MockTimestampPackets32::Packet packetData[4];
event->hostAddress = packetData;
constexpr uint32_t kernelStartValue = 5u;
@@ -1489,8 +1523,9 @@ TEST_F(TimestampEventCreate, givenTimeStampEventUsedOnTwoKernelsWhenL3FlushSetOn
EXPECT_EQ(static_cast<uint64_t>(kernelEndValue), results.global.kernelEnd);
}
TEST_F(TimestampEventCreate, givenTimeStampEventUsedOnTwoKernelsWhenL3FlushSetOnSecondKernelThenDoNotUseSecondPacketOfSecondKernel) {
HWTEST2_F(TimestampEventCreateMultiKernel, givenTimeStampEventUsedOnTwoKernelsWhenL3FlushSetOnSecondKernelThenDoNotUseSecondPacketOfSecondKernel, IsAtLeastXeHpCore) {
typename MockTimestampPackets32::Packet packetData[4];
event->hostAddress = packetData;
constexpr uint32_t kernelStartValue = 5u;
@@ -1532,7 +1567,7 @@ TEST_F(TimestampEventCreate, givenTimeStampEventUsedOnTwoKernelsWhenL3FlushSetOn
EXPECT_EQ(static_cast<uint64_t>(kernelEndValue), results.global.kernelEnd);
}
TEST_F(TimestampEventCreate, givenOverflowingTimeStampDataOnTwoKernelsWhenQueryKernelTimestampIsCalledOverflowIsObserved) {
HWTEST2_F(TimestampEventCreateMultiKernel, givenOverflowingTimeStampDataOnTwoKernelsWhenQueryKernelTimestampIsCalledOverflowIsObserved, IsAtLeastXeHpCore) {
typename MockTimestampPackets32::Packet packetData[4] = {};
event->hostAddress = packetData;
@@ -1993,10 +2028,12 @@ TEST_F(EventTests, givenEventUseMultiplePacketsWhenHostSignalThenExpectAllPacket
}
}
TEST_F(EventTests, WhenSettingL3FlushOnEventThenSetOnParticularKernel) {
HWTEST2_F(EventTests, WhenSettingL3FlushOnEventThenSetOnParticularKernel, IsAtLeastXeHpCore) {
DebugManagerStateRestore restorer;
DebugManager.flags.UsePipeControlMultiKernelEventSync.set(0);
auto event = whiteboxCast(Event::create<uint32_t>(eventPool, &eventDesc, device));
ASSERT_NE(event, nullptr);
EXPECT_FALSE(event->getL3FlushForCurrenKernel());
event->setL3FlushForCurrentKernel();
@@ -2071,10 +2108,18 @@ HWTEST_F(EventSizeTests, whenCreatingEventPoolThenUseCorrectSizeAndAlignment) {
eventPool.reset(static_cast<EventPoolImp *>(EventPool::create(device->getDriverHandle(), context, 1, &hDevice, &eventPoolDesc, result)));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto &hwHelper = device->getHwHelper();
auto &hwInfo = device->getHwInfo();
auto &l0HwHelper = L0HwHelper::get(hwInfo.platform.eRenderCoreFamily);
uint32_t packetCount = EventPacketsCount::eventPackets;
if (l0HwHelper.useDynamicEventPacketsCount(hwInfo)) {
packetCount = l0HwHelper.getEventBaseMaxPacketCount(hwInfo);
}
auto expectedAlignment = static_cast<uint32_t>(hwHelper.getTimestampPacketAllocatorAlignment());
auto singlePacketSize = TimestampPackets<typename FamilyType::TimestampPacketType>::getSinglePacketSize();
auto expectedSize = static_cast<uint32_t>(alignUp(EventPacketsCount::eventPackets * singlePacketSize, expectedAlignment));
auto expectedSize = static_cast<uint32_t>(alignUp(packetCount * singlePacketSize, expectedAlignment));
EXPECT_EQ(expectedSize, eventPool->getEventSize());
@@ -2097,10 +2142,17 @@ HWTEST_F(EventSizeTests, whenCreatingEventPoolThenUseCorrectSizeAndAlignment) {
}
HWTEST_F(EventSizeTests, givenDebugFlagwhenCreatingEventPoolThenUseCorrectSizeAndAlignment) {
auto &hwHelper = device->getHwHelper();
auto &hwInfo = device->getHwInfo();
auto expectedAlignment = static_cast<uint32_t>(hwHelper.getTimestampPacketAllocatorAlignment());
auto &l0HwHelper = L0HwHelper::get(hwInfo.platform.eRenderCoreFamily);
uint32_t packetCount = EventPacketsCount::eventPackets;
if (l0HwHelper.useDynamicEventPacketsCount(hwInfo)) {
packetCount = l0HwHelper.getEventBaseMaxPacketCount(hwInfo);
}
{
DebugManager.flags.OverrideTimestampPacketSize.set(4);
@@ -2109,7 +2161,7 @@ HWTEST_F(EventSizeTests, givenDebugFlagwhenCreatingEventPoolThenUseCorrectSizeAn
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto singlePacketSize = TimestampPackets<uint32_t>::getSinglePacketSize();
auto expectedSize = static_cast<uint32_t>(alignUp(EventPacketsCount::eventPackets * singlePacketSize, expectedAlignment));
auto expectedSize = static_cast<uint32_t>(alignUp(packetCount * singlePacketSize, expectedAlignment));
EXPECT_EQ(expectedSize, eventPool->getEventSize());
@@ -2130,7 +2182,7 @@ HWTEST_F(EventSizeTests, givenDebugFlagwhenCreatingEventPoolThenUseCorrectSizeAn
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto singlePacketSize = TimestampPackets<uint64_t>::getSinglePacketSize();
auto expectedSize = static_cast<uint32_t>(alignUp(EventPacketsCount::eventPackets * singlePacketSize, expectedAlignment));
auto expectedSize = static_cast<uint32_t>(alignUp(packetCount * singlePacketSize, expectedAlignment));
EXPECT_EQ(expectedSize, eventPool->getEventSize());
@@ -2285,5 +2337,134 @@ TEST_F(EventSynchronizeTest, whenEventSetCsrThenCorrectCsrSet) {
EXPECT_EQ(event->csr, defaultCsr);
}
template <int32_t multiTile>
struct EventDynamicPacketUseFixture : public DeviceFixture {
void setUp() {
NEO::DebugManager.flags.UseDynamicEventPacketsCount.set(1);
if (multiTile == 1) {
DebugManager.flags.CreateMultipleSubDevices.set(2);
DebugManager.flags.EnableImplicitScaling.set(1);
}
DeviceFixture::setUp();
}
void testAllDevices() {
auto &hwInfo = device->getHwInfo();
auto &l0HwHelper = L0HwHelper::get(hwInfo.platform.eRenderCoreFamily);
auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
ze_event_pool_desc_t eventPoolDesc = {
ZE_STRUCTURE_TYPE_EVENT_POOL_DESC,
nullptr,
0,
1};
ze_result_t result = ZE_RESULT_SUCCESS;
std::unique_ptr<L0::EventPool> eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
ASSERT_NE(nullptr, eventPool);
auto eventPoolMaxPackets = static_cast<L0::EventPoolImp *>(eventPool.get())->getEventMaxPackets();
auto expectedPoolMaxPackets = l0HwHelper.getEventBaseMaxPacketCount(hwInfo);
if (multiTile == 1) {
expectedPoolMaxPackets *= 2;
}
EXPECT_EQ(expectedPoolMaxPackets, eventPoolMaxPackets);
auto eventSize = eventPool->getEventSize();
auto expectedEventSize = static_cast<uint32_t>(alignUp(expectedPoolMaxPackets * hwHelper.getSingleTimestampPacketSize(), hwHelper.getTimestampPacketAllocatorAlignment()));
EXPECT_EQ(expectedEventSize, eventSize);
ze_event_desc_t eventDesc = {
ZE_STRUCTURE_TYPE_EVENT_DESC,
nullptr,
0,
ZE_EVENT_SCOPE_FLAG_DEVICE,
ZE_EVENT_SCOPE_FLAG_DEVICE};
std::unique_ptr<L0::Event> event(Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
EXPECT_EQ(expectedPoolMaxPackets, event->getMaxPacketsCount());
uint32_t maxKernels = l0HwHelper.getEventMaxKernelCount(hwInfo);
EXPECT_EQ(maxKernels, event->getMaxKernelCount());
}
void testSingleDevice() {
ze_result_t result = ZE_RESULT_SUCCESS;
auto &hwInfo = device->getHwInfo();
auto &l0HwHelper = L0HwHelper::get(hwInfo.platform.eRenderCoreFamily);
auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
ze_event_pool_desc_t eventPoolDesc = {
ZE_STRUCTURE_TYPE_EVENT_POOL_DESC,
nullptr,
0,
1};
std::vector<ze_device_handle_t> deviceHandles;
L0::Device *eventDevice = device;
if (multiTile == 1) {
uint32_t count = 2;
ze_device_handle_t subDevices[2];
result = device->getSubDevices(&count, subDevices);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
deviceHandles.push_back(subDevices[0]);
eventDevice = Device::fromHandle(subDevices[0]);
} else {
deviceHandles.push_back(device->toHandle());
}
std::unique_ptr<L0::EventPool> eventPool(EventPool::create(driverHandle.get(), context, 1, deviceHandles.data(), &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
ASSERT_NE(nullptr, eventPool);
auto eventPoolMaxPackets = static_cast<L0::EventPoolImp *>(eventPool.get())->getEventMaxPackets();
auto expectedPoolMaxPackets = l0HwHelper.getEventBaseMaxPacketCount(hwInfo);
EXPECT_EQ(expectedPoolMaxPackets, eventPoolMaxPackets);
auto eventSize = eventPool->getEventSize();
auto expectedEventSize = static_cast<uint32_t>(alignUp(expectedPoolMaxPackets * hwHelper.getSingleTimestampPacketSize(), hwHelper.getTimestampPacketAllocatorAlignment()));
EXPECT_EQ(expectedEventSize, eventSize);
ze_event_desc_t eventDesc = {
ZE_STRUCTURE_TYPE_EVENT_DESC,
nullptr,
0,
ZE_EVENT_SCOPE_FLAG_DEVICE,
ZE_EVENT_SCOPE_FLAG_DEVICE};
std::unique_ptr<L0::Event> event(Event::create<uint32_t>(eventPool.get(), &eventDesc, eventDevice));
EXPECT_EQ(expectedPoolMaxPackets, event->getMaxPacketsCount());
uint32_t maxKernels = l0HwHelper.getEventMaxKernelCount(hwInfo);
EXPECT_EQ(maxKernels, event->getMaxKernelCount());
}
DebugManagerStateRestore restorer;
};
using EventDynamicPacketUseTest = Test<EventDynamicPacketUseFixture<0>>;
HWTEST2_F(EventDynamicPacketUseTest, testAllDevices, IsAtLeastSkl) {
testAllDevices();
}
HWTEST2_F(EventDynamicPacketUseTest, testSingleDevice, IsAtLeastSkl) {
testSingleDevice();
}
using EventMultiTileDynamicPacketUseTest = Test<EventDynamicPacketUseFixture<1>>;
HWTEST2_F(EventMultiTileDynamicPacketUseTest, testAllDevices, IsAtLeastXeHpCore) {
testAllDevices();
}
HWTEST2_F(EventMultiTileDynamicPacketUseTest, testSingleDevice, IsAtLeastXeHpCore) {
testSingleDevice();
}
} // namespace ult
} // namespace L0

View File

@@ -6,6 +6,7 @@
*/
#include "shared/source/helpers/aligned_memory.h"
#include "shared/source/helpers/hw_helper.h"
#include "shared/source/helpers/ptr_math.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/default_hw_info.h"
@@ -641,5 +642,84 @@ TEST_F(L0HwHelperTest, givenL0HelperWhenGettingDefaultValueForCompactL3FlushEven
EXPECT_FALSE(defaultValue);
}
TEST_F(L0HwHelperTest, givenL0HelperWhenGettingDefaultValueForDynamicEventPacketCountThenReturnFalse) {
auto hwInfo = *NEO::defaultHwInfo.get();
bool defaultValue = L0::L0HwHelper::useDynamicEventPacketsCount(hwInfo);
EXPECT_FALSE(defaultValue);
}
HWTEST2_F(L0HwHelperTest, givenL0HelperWhenGettingMaxKernelAndMaxPacketThenExpectBothReturnOne, NonMultiTilePlatforms) {
auto hwInfo = *NEO::defaultHwInfo.get();
EXPECT_EQ(1u, L0::L0HwHelperHw<FamilyType>::get().getEventMaxKernelCount(hwInfo));
EXPECT_EQ(1u, L0::L0HwHelperHw<FamilyType>::get().getEventBaseMaxPacketCount(hwInfo));
}
template <int32_t usePipeControlMultiPacketEventSync, int32_t compactL3FlushEventPacket>
struct L0HwHelperMultiPacketEventFixture {
void setUp() {
DebugManager.flags.UsePipeControlMultiKernelEventSync.set(usePipeControlMultiPacketEventSync);
DebugManager.flags.CompactL3FlushEventPacket.set(compactL3FlushEventPacket);
}
void tearDown() {
}
DebugManagerStateRestore restorer;
};
using L0HwHelperEventMultiKernelEnabledL3FlushCompactDisabledTest = Test<L0HwHelperMultiPacketEventFixture<0, 0>>;
HWTEST2_F(L0HwHelperEventMultiKernelEnabledL3FlushCompactDisabledTest,
givenL0HelperWhenGettingMaxKernelAndMaxPacketThenExpectKernelThreeAndPacketThreeWithL3PacketWhenApplicable,
IsAtLeastXeHpCore) {
auto hwInfo = *NEO::defaultHwInfo.get();
uint32_t expectedPacket = 3;
if (NEO::MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, hwInfo)) {
expectedPacket++;
}
EXPECT_EQ(3u, L0::L0HwHelperHw<FamilyType>::get().getEventMaxKernelCount(hwInfo));
EXPECT_EQ(expectedPacket, L0::L0HwHelperHw<FamilyType>::get().getEventBaseMaxPacketCount(hwInfo));
}
using L0HwHelperEventMultiKernelEnabledL3FlushCompactEnabledTest = Test<L0HwHelperMultiPacketEventFixture<0, 1>>;
HWTEST2_F(L0HwHelperEventMultiKernelEnabledL3FlushCompactEnabledTest,
givenL0HelperWhenGettingMaxKernelAndMaxPacketThenExpectKernelThreeAndPacketThree,
IsAtLeastXeHpCore) {
auto hwInfo = *NEO::defaultHwInfo.get();
uint32_t expectedPacket = 3;
EXPECT_EQ(3u, L0::L0HwHelperHw<FamilyType>::get().getEventMaxKernelCount(hwInfo));
EXPECT_EQ(expectedPacket, L0::L0HwHelperHw<FamilyType>::get().getEventBaseMaxPacketCount(hwInfo));
}
using L0HwHelperEventMultiKernelDisabledL3FlushCompactDisabledTest = Test<L0HwHelperMultiPacketEventFixture<1, 0>>;
HWTEST2_F(L0HwHelperEventMultiKernelDisabledL3FlushCompactDisabledTest,
givenL0HelperWhenGettingMaxKernelAndMaxPacketThenExpectKernelOneAndPacketOneWithL3PacketWhenApplicable,
IsAtLeastXeHpCore) {
auto hwInfo = *NEO::defaultHwInfo.get();
uint32_t expectedPacket = 1;
if (NEO::MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, hwInfo)) {
expectedPacket++;
}
EXPECT_EQ(1u, L0::L0HwHelperHw<FamilyType>::get().getEventMaxKernelCount(hwInfo));
EXPECT_EQ(expectedPacket, L0::L0HwHelperHw<FamilyType>::get().getEventBaseMaxPacketCount(hwInfo));
}
using L0HwHelperEventMultiKernelDisabledL3FlushCompactEnabledTest = Test<L0HwHelperMultiPacketEventFixture<1, 1>>;
HWTEST2_F(L0HwHelperEventMultiKernelDisabledL3FlushCompactEnabledTest,
givenL0HelperWhenGettingMaxKernelAndMaxPacketThenExpectKernelOneAndPacketOne,
IsAtLeastXeHpCore) {
auto hwInfo = *NEO::defaultHwInfo.get();
uint32_t expectedPacket = 1;
EXPECT_EQ(1u, L0::L0HwHelperHw<FamilyType>::get().getEventMaxKernelCount(hwInfo));
EXPECT_EQ(expectedPacket, L0::L0HwHelperHw<FamilyType>::get().getEventBaseMaxPacketCount(hwInfo));
}
} // namespace ult
} // namespace L0

View File

@@ -144,7 +144,17 @@ HWTEST2_F(CommandListAppendLaunchKernelWithAtomics, givenKernelWithGlobalAtomics
EXPECT_FALSE(pCommandList->commandContainer.lastSentUseGlobalAtomics);
}
using MultTileCommandListAppendLaunchKernelL3Flush = Test<MultiTileCommandListFixture<false, false, false>>;
struct MultTileCommandListAppendLaunchKernelL3FlushFixture : public MultiTileCommandListFixture<false, false, false> {
using BaseClass = MultiTileCommandListFixture<false, false, false>;
void setUp() {
DebugManager.flags.CompactL3FlushEventPacket.set(0);
BaseClass::setUp();
}
DebugManagerStateRestore restorer;
};
using MultTileCommandListAppendLaunchKernelL3Flush = Test<MultTileCommandListAppendLaunchKernelL3FlushFixture>;
HWTEST2_F(MultTileCommandListAppendLaunchKernelL3Flush, givenKernelWithRegularEventAndWithWalkerPartitionThenProperCommandsEncoded, IsXeHpCore) {
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
@@ -262,7 +272,17 @@ HWTEST2_F(MultTileCommandListAppendLaunchKernelL3Flush, givenKernelWithTimestamp
ASSERT_LE(1u, postSyncCount);
}
using CommandListAppendLaunchKernelL3Flush = Test<ModuleFixture>;
struct CommandListAppendLaunchKernelL3FlushFixture : public ModuleFixture {
void setUp() {
DebugManager.flags.CompactL3FlushEventPacket.set(0);
ModuleFixture::setUp();
}
DebugManagerStateRestore restorer;
};
using CommandListAppendLaunchKernelL3Flush = Test<CommandListAppendLaunchKernelL3FlushFixture>;
HWTEST2_F(CommandListAppendLaunchKernelL3Flush, givenKernelWithEventAndWithoutWalkerPartitionThenProperCommandsEncoded, IsXeHpCore) {
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
@@ -709,6 +729,9 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenEventWhenInvokingAppendLaunchKerne
using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA;
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
DebugManagerStateRestore restorer;
DebugManager.flags.CompactL3FlushEventPacket.set(0);
createKernel();
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));

View File

@@ -408,6 +408,9 @@ HWTEST2_F(CommandListAppendLaunchKernelXeHpgCore, givenEventWhenAppendKernelIsCa
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
DebugManagerStateRestore restorer;
DebugManager.flags.CompactL3FlushEventPacket.set(0);
Mock<::L0::Kernel> kernel;
auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
kernel.module = pMockModule.get();