reposition implementation of level zero hardware helper class

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2022-11-23 14:09:13 +00:00
committed by Compute-Runtime-Automation
parent 8a2a12393d
commit 7bdc99d9b2
12 changed files with 188 additions and 112 deletions

View File

@ -54,7 +54,6 @@ set(L0_RUNTIME_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/fence/fence.cpp
${CMAKE_CURRENT_SOURCE_DIR}/fence/fence.h
${CMAKE_CURRENT_SOURCE_DIR}/hw_helpers/l0_hw_helper_base.inl
${CMAKE_CURRENT_SOURCE_DIR}/hw_helpers/l0_hw_helper_skl_and_later.inl
${CMAKE_CURRENT_SOURCE_DIR}/hw_helpers/l0_hw_helper.cpp
${CMAKE_CURRENT_SOURCE_DIR}/hw_helpers/l0_hw_helper.h
${CMAKE_CURRENT_SOURCE_DIR}/kernel/kernel.cpp
@ -99,6 +98,18 @@ else()
)
endif()
if(SUPPORT_GEN9 OR SUPPORT_GEN11)
list(APPEND L0_RUNTIME_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/hw_helpers/l0_hw_helper_skl_to_icllp.inl
)
endif()
if(SUPPORT_GEN9 OR SUPPORT_GEN11 OR SUPPORT_GEN12LP)
list(APPEND L0_RUNTIME_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/hw_helpers/l0_hw_helper_skl_to_tgllp.inl
)
endif()
if(SUPPORT_XEHP_AND_LATER)
list(APPEND L0_RUNTIME_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist/cmdlist_hw_xehp_and_later.inl
@ -109,7 +120,7 @@ endif()
if(SUPPORT_GEN12LP OR SUPPORT_XE_HP_CORE OR SUPPORT_XE_HPG_CORE)
list(APPEND L0_RUNTIME_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/hw_helpers/l0_hw_helper_tgllp_plus.inl
${CMAKE_CURRENT_SOURCE_DIR}/hw_helpers/l0_hw_helper_tgllp_to_dg2.inl
)
endif()

View File

@ -9,7 +9,8 @@
#include "level_zero/core/source/helpers/l0_populate_factory.h"
#include "level_zero/core/source/hw_helpers/l0_hw_helper_base.inl"
#include "level_zero/core/source/hw_helpers/l0_hw_helper_skl_and_later.inl"
#include "level_zero/core/source/hw_helpers/l0_hw_helper_skl_to_icllp.inl"
#include "level_zero/core/source/hw_helpers/l0_hw_helper_skl_to_tgllp.inl"
namespace L0 {

View File

@ -9,7 +9,8 @@
#include "level_zero/core/source/helpers/l0_populate_factory.h"
#include "level_zero/core/source/hw_helpers/l0_hw_helper_base.inl"
#include "level_zero/core/source/hw_helpers/l0_hw_helper_skl_and_later.inl"
#include "level_zero/core/source/hw_helpers/l0_hw_helper_skl_to_tgllp.inl"
#include "level_zero/core/source/hw_helpers/l0_hw_helper_tgllp_to_dg2.inl"
#include "level_zero/tools/source/debug/eu_thread.h"
namespace L0 {
@ -28,10 +29,6 @@ bool L0HwHelperHw<Family>::isResumeWARequired() {
return true;
}
// clang-format off
#include "level_zero/core/source/hw_helpers/l0_hw_helper_tgllp_plus.inl"
// clang-format on
template class L0HwHelperHw<Family>;
} // namespace L0

View File

@ -9,7 +9,8 @@
#include "level_zero/core/source/helpers/l0_populate_factory.h"
#include "level_zero/core/source/hw_helpers/l0_hw_helper_base.inl"
#include "level_zero/core/source/hw_helpers/l0_hw_helper_skl_and_later.inl"
#include "level_zero/core/source/hw_helpers/l0_hw_helper_skl_to_icllp.inl"
#include "level_zero/core/source/hw_helpers/l0_hw_helper_skl_to_tgllp.inl"
namespace L0 {

View File

@ -14,8 +14,8 @@
namespace L0 {
template <typename GfxFamily>
L0::Event *L0HwHelperHw<GfxFamily>::createEvent(L0::EventPool *eventPool, const ze_event_desc_t *desc, L0::Device *device) const {
template <typename Family>
L0::Event *L0HwHelperHw<Family>::createEvent(L0::EventPool *eventPool, const ze_event_desc_t *desc, L0::Device *device) const {
if (NEO::DebugManager.flags.OverrideTimestampPacketSize.get() != -1) {
if (NEO::DebugManager.flags.OverrideTimestampPacketSize.get() == 4) {
return Event::create<uint32_t>(eventPool, desc, device);
@ -26,74 +26,16 @@ L0::Event *L0HwHelperHw<GfxFamily>::createEvent(L0::EventPool *eventPool, const
}
}
return Event::create<typename GfxFamily::TimestampPacketType>(eventPool, desc, device);
return Event::create<typename Family::TimestampPacketType>(eventPool, desc, device);
}
template <typename GfxFamily>
bool L0HwHelperHw<GfxFamily>::isResumeWARequired() {
template <typename Family>
bool L0HwHelperHw<Family>::isResumeWARequired() {
return false;
}
template <typename GfxFamily>
void L0HwHelperHw<GfxFamily>::getAttentionBitmaskForSingleThreads(const std::vector<EuThread::ThreadId> &threads, const NEO::HardwareInfo &hwInfo, std::unique_ptr<uint8_t[]> &bitmask, size_t &bitmaskSize) const {
const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
const uint32_t numEuPerSubslice = hwInfo.gtSystemInfo.MaxEuPerSubSlice;
const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8;
const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
const uint32_t highestEnabledSlice = NEO::HwHelper::getHighestEnabledSlice(hwInfo);
bitmaskSize = std::max(highestEnabledSlice, hwInfo.gtSystemInfo.MaxSlicesSupported) * numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
bitmask = std::make_unique<uint8_t[]>(bitmaskSize);
memset(bitmask.get(), 0, bitmaskSize);
for (auto &thread : threads) {
uint8_t *sliceData = ptrOffset(bitmask.get(), threadsSizePerSlice * thread.slice);
uint8_t *subsliceData = ptrOffset(sliceData, numEuPerSubslice * bytesPerEu * thread.subslice);
uint8_t *euData = ptrOffset(subsliceData, bytesPerEu * thread.eu);
UNRECOVERABLE_IF(thread.thread > 7);
*euData |= (1 << thread.thread);
}
}
template <typename GfxFamily>
std::vector<EuThread::ThreadId> L0HwHelperHw<GfxFamily>::getThreadsFromAttentionBitmask(const NEO::HardwareInfo &hwInfo, uint32_t tile, const uint8_t *bitmask, const size_t bitmaskSize) const {
const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
const uint32_t numEuPerSubslice = hwInfo.gtSystemInfo.MaxEuPerSubSlice;
const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8;
const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
const uint32_t threadsSizePerSubSlice = numEuPerSubslice * bytesPerEu;
const uint32_t highestEnabledSlice = NEO::HwHelper::getHighestEnabledSlice(hwInfo);
UNRECOVERABLE_IF(bytesPerEu != 1);
std::vector<EuThread::ThreadId> threads;
for (uint32_t slice = 0; slice < std::max(highestEnabledSlice, hwInfo.gtSystemInfo.MaxSlicesSupported); slice++) {
for (uint32_t subslice = 0; subslice < numSubslicesPerSlice; subslice++) {
for (uint32_t eu = 0; eu < hwInfo.gtSystemInfo.MaxEuPerSubSlice; eu++) {
size_t offset = slice * threadsSizePerSlice + subslice * threadsSizePerSubSlice + eu * bytesPerEu;
if (offset >= bitmaskSize) {
return threads;
}
std::bitset<8> bits(bitmask[offset]);
for (uint32_t i = 0; i < 8; i++) {
if (bits.test(i)) {
threads.emplace_back(tile, slice, subslice, eu, i);
}
}
}
}
}
return threads;
}
template <typename GfxFamily>
bool L0HwHelperHw<GfxFamily>::imageCompressionSupported(const NEO::HardwareInfo &hwInfo) const {
template <typename Family>
bool L0HwHelperHw<Family>::imageCompressionSupported(const NEO::HardwareInfo &hwInfo) const {
if (NEO::DebugManager.flags.RenderCompressedImagesEnabled.get() != -1) {
return !!NEO::DebugManager.flags.RenderCompressedImagesEnabled.get();
}
@ -101,8 +43,8 @@ bool L0HwHelperHw<GfxFamily>::imageCompressionSupported(const NEO::HardwareInfo
return false;
}
template <typename GfxFamily>
bool L0HwHelperHw<GfxFamily>::usmCompressionSupported(const NEO::HardwareInfo &hwInfo) const {
template <typename Family>
bool L0HwHelperHw<Family>::usmCompressionSupported(const NEO::HardwareInfo &hwInfo) const {
if (NEO::DebugManager.flags.RenderCompressedBuffersEnabled.get() != -1) {
return !!NEO::DebugManager.flags.RenderCompressedBuffersEnabled.get();
}
@ -110,13 +52,18 @@ bool L0HwHelperHw<GfxFamily>::usmCompressionSupported(const NEO::HardwareInfo &h
return false;
}
template <typename GfxFamily>
bool L0HwHelperHw<GfxFamily>::forceDefaultUsmCompressionSupport() const {
template <typename Family>
bool L0HwHelperHw<Family>::forceDefaultUsmCompressionSupport() const {
return false;
}
template <typename gfxProduct>
bool L0HwHelperHw<gfxProduct>::alwaysAllocateEventInLocalMem() const {
template <typename Family>
bool L0HwHelperHw<Family>::alwaysAllocateEventInLocalMem() const {
return false;
}
template <typename Family>
bool L0HwHelperHw<Family>::multiTileCapablePlatform() const {
return false;
}

View File

@ -16,6 +16,64 @@
namespace L0 {
template <typename Family>
void L0HwHelperHw<Family>::getAttentionBitmaskForSingleThreads(const std::vector<EuThread::ThreadId> &threads, const NEO::HardwareInfo &hwInfo, std::unique_ptr<uint8_t[]> &bitmask, size_t &bitmaskSize) const {
const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
const uint32_t numEuPerSubslice = hwInfo.gtSystemInfo.MaxEuPerSubSlice;
const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8;
const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
const uint32_t highestEnabledSlice = NEO::HwHelper::getHighestEnabledSlice(hwInfo);
bitmaskSize = std::max(highestEnabledSlice, hwInfo.gtSystemInfo.MaxSlicesSupported) * numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
bitmask = std::make_unique<uint8_t[]>(bitmaskSize);
memset(bitmask.get(), 0, bitmaskSize);
for (auto &thread : threads) {
uint8_t *sliceData = ptrOffset(bitmask.get(), threadsSizePerSlice * thread.slice);
uint8_t *subsliceData = ptrOffset(sliceData, numEuPerSubslice * bytesPerEu * thread.subslice);
uint8_t *euData = ptrOffset(subsliceData, bytesPerEu * thread.eu);
UNRECOVERABLE_IF(thread.thread > 7);
*euData |= (1 << thread.thread);
}
}
template <typename Family>
std::vector<EuThread::ThreadId> L0HwHelperHw<Family>::getThreadsFromAttentionBitmask(const NEO::HardwareInfo &hwInfo, uint32_t tile, const uint8_t *bitmask, const size_t bitmaskSize) const {
const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
const uint32_t numEuPerSubslice = hwInfo.gtSystemInfo.MaxEuPerSubSlice;
const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8;
const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
const uint32_t threadsSizePerSubSlice = numEuPerSubslice * bytesPerEu;
const uint32_t highestEnabledSlice = NEO::HwHelper::getHighestEnabledSlice(hwInfo);
UNRECOVERABLE_IF(bytesPerEu != 1);
std::vector<EuThread::ThreadId> threads;
for (uint32_t slice = 0; slice < std::max(highestEnabledSlice, hwInfo.gtSystemInfo.MaxSlicesSupported); slice++) {
for (uint32_t subslice = 0; subslice < numSubslicesPerSlice; subslice++) {
for (uint32_t eu = 0; eu < hwInfo.gtSystemInfo.MaxEuPerSubSlice; eu++) {
size_t offset = slice * threadsSizePerSlice + subslice * threadsSizePerSubSlice + eu * bytesPerEu;
if (offset >= bitmaskSize) {
return threads;
}
std::bitset<8> bits(bitmask[offset]);
for (uint32_t i = 0; i < 8; i++) {
if (bits.test(i)) {
threads.emplace_back(tile, slice, subslice, eu, i);
}
}
}
}
}
return threads;
}
template <typename Family>
void L0HwHelperHw<Family>::setAdditionalGroupProperty(ze_command_queue_group_properties_t &groupProperty, NEO::EngineGroupT &group) const {
if (group.engineGroupType == NEO::EngineGroupType::LinkedCopy) {

View File

@ -0,0 +1,79 @@
/*
* Copyright (C) 2020-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/helpers/hw_helper.h"
#include "level_zero/core/source/device/device.h"
#include "level_zero/core/source/event/event.h"
#include "level_zero/core/source/hw_helpers/l0_hw_helper.h"
namespace L0 {
template <typename Family>
void L0HwHelperHw<Family>::setAdditionalGroupProperty(ze_command_queue_group_properties_t &groupProperty, NEO::EngineGroupT &group) const {
}
template <typename Family>
void L0HwHelperHw<Family>::getAttentionBitmaskForSingleThreads(const std::vector<EuThread::ThreadId> &threads, const NEO::HardwareInfo &hwInfo, std::unique_ptr<uint8_t[]> &bitmask, size_t &bitmaskSize) const {
const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
const uint32_t numEuPerSubslice = hwInfo.gtSystemInfo.MaxEuPerSubSlice;
const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8;
const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
const uint32_t highestEnabledSlice = NEO::HwHelper::getHighestEnabledSlice(hwInfo);
bitmaskSize = std::max(highestEnabledSlice, hwInfo.gtSystemInfo.MaxSlicesSupported) * numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
bitmask = std::make_unique<uint8_t[]>(bitmaskSize);
memset(bitmask.get(), 0, bitmaskSize);
for (auto &thread : threads) {
uint8_t *sliceData = ptrOffset(bitmask.get(), threadsSizePerSlice * thread.slice);
uint8_t *subsliceData = ptrOffset(sliceData, numEuPerSubslice * bytesPerEu * thread.subslice);
uint8_t *euData = ptrOffset(subsliceData, bytesPerEu * thread.eu);
UNRECOVERABLE_IF(thread.thread > 7);
*euData |= (1 << thread.thread);
}
}
template <typename Family>
std::vector<EuThread::ThreadId> L0HwHelperHw<Family>::getThreadsFromAttentionBitmask(const NEO::HardwareInfo &hwInfo, uint32_t tile, const uint8_t *bitmask, const size_t bitmaskSize) const {
const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
const uint32_t numEuPerSubslice = hwInfo.gtSystemInfo.MaxEuPerSubSlice;
const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8;
const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
const uint32_t threadsSizePerSubSlice = numEuPerSubslice * bytesPerEu;
const uint32_t highestEnabledSlice = NEO::HwHelper::getHighestEnabledSlice(hwInfo);
UNRECOVERABLE_IF(bytesPerEu != 1);
std::vector<EuThread::ThreadId> threads;
for (uint32_t slice = 0; slice < std::max(highestEnabledSlice, hwInfo.gtSystemInfo.MaxSlicesSupported); slice++) {
for (uint32_t subslice = 0; subslice < numSubslicesPerSlice; subslice++) {
for (uint32_t eu = 0; eu < hwInfo.gtSystemInfo.MaxEuPerSubSlice; eu++) {
size_t offset = slice * threadsSizePerSlice + subslice * threadsSizePerSubSlice + eu * bytesPerEu;
if (offset >= bitmaskSize) {
return threads;
}
std::bitset<8> bits(bitmask[offset]);
for (uint32_t i = 0; i < 8; i++) {
if (bits.test(i)) {
threads.emplace_back(tile, slice, subslice, eu, i);
}
}
}
}
}
return threads;
}
} // namespace L0

View File

@ -9,15 +9,6 @@
namespace L0 {
template <typename GfxFamily>
void L0HwHelperHw<GfxFamily>::setAdditionalGroupProperty(ze_command_queue_group_properties_t &groupProperty, NEO::EngineGroupT &group) const {
}
template <typename Family>
bool L0HwHelperHw<Family>::multiTileCapablePlatform() const {
return false;
}
template <typename Family>
bool L0HwHelperHw<Family>::platformSupportsCmdListHeapSharing() const {
return false;

View File

@ -5,7 +5,15 @@
*
*/
template <>
#include "level_zero/core/source/hw_helpers/l0_hw_helper.h"
namespace L0 {
template <typename Family>
void L0HwHelperHw<Family>::setAdditionalGroupProperty(ze_command_queue_group_properties_t &groupProperty, NEO::EngineGroupT &group) const {
}
template <typename Family>
void L0HwHelperHw<Family>::getAttentionBitmaskForSingleThreads(const std::vector<EuThread::ThreadId> &threads, const NEO::HardwareInfo &hwInfo, std::unique_ptr<uint8_t[]> &bitmask, size_t &bitmaskSize) const {
const uint32_t numSubslicesPerSlice = (hwInfo.gtSystemInfo.MaxEuPerSubSlice == 8) ? hwInfo.gtSystemInfo.MaxDualSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported : hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
@ -44,7 +52,7 @@ void L0HwHelperHw<Family>::getAttentionBitmaskForSingleThreads(const std::vector
}
}
template <>
template <typename Family>
std::vector<EuThread::ThreadId> L0HwHelperHw<Family>::getThreadsFromAttentionBitmask(const NEO::HardwareInfo &hwInfo, uint32_t tile, const uint8_t *bitmask, const size_t bitmaskSize) const {
const uint32_t numSubslicesPerSlice = (hwInfo.gtSystemInfo.MaxEuPerSubSlice == 8) ? hwInfo.gtSystemInfo.MaxDualSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported : hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
@ -93,3 +101,5 @@ std::vector<EuThread::ThreadId> L0HwHelperHw<Family>::getThreadsFromAttentionBit
return threads;
}
} // namespace L0

View File

@ -11,11 +11,6 @@
namespace L0 {
template <typename Family>
bool L0HwHelperHw<Family>::multiTileCapablePlatform() const {
return false;
}
template <typename Family>
bool L0HwHelperHw<Family>::platformSupportsCmdListHeapSharing() const {
return true;

View File

@ -9,6 +9,7 @@
#include "level_zero/core/source/helpers/l0_populate_factory.h"
#include "level_zero/core/source/hw_helpers/l0_hw_helper_base.inl"
#include "level_zero/core/source/hw_helpers/l0_hw_helper_tgllp_to_dg2.inl"
#include "level_zero/core/source/hw_helpers/l0_hw_helper_xehp_and_later.inl"
namespace L0 {
@ -32,14 +33,6 @@ bool L0HwHelperHw<Family>::multiTileCapablePlatform() const {
return true;
}
template <>
void L0HwHelperHw<Family>::setAdditionalGroupProperty(ze_command_queue_group_properties_t &groupProperty, NEO::EngineGroupT &group) const {
}
// clang-format off
#include "level_zero/core/source/hw_helpers/l0_hw_helper_tgllp_plus.inl"
// clang-format on
template class L0HwHelperHw<Family>;
} // namespace L0

View File

@ -9,6 +9,7 @@
#include "level_zero/core/source/helpers/l0_populate_factory.h"
#include "level_zero/core/source/hw_helpers/l0_hw_helper_base.inl"
#include "level_zero/core/source/hw_helpers/l0_hw_helper_tgllp_to_dg2.inl"
#include "level_zero/core/source/hw_helpers/l0_hw_helper_xehp_and_later.inl"
namespace L0 {
@ -27,14 +28,6 @@ bool L0HwHelperHw<Family>::isResumeWARequired() {
return true;
}
template <>
void L0HwHelperHw<Family>::setAdditionalGroupProperty(ze_command_queue_group_properties_t &groupProperty, NEO::EngineGroupT &group) const {
}
// clang-format off
#include "level_zero/core/source/hw_helpers/l0_hw_helper_tgllp_plus.inl"
// clang-format on
template class L0HwHelperHw<Family>;
} // namespace L0