Add ClHwHelper

Related-To: NEO-3964

Change-Id: Ib2660e8f7d92fc970172517b3e2ddfd607e09ec1
Signed-off-by: Filip Hazubski <filip.hazubski@intel.com>
This commit is contained in:
Filip Hazubski
2020-10-21 15:10:25 +02:00
committed by sys_ocldev
parent b8858ee0d6
commit 0738ad5a82
19 changed files with 241 additions and 48 deletions

View File

@ -17,6 +17,7 @@ set(RUNTIME_SRCS_GENX_CPP_BASE
aub_command_stream_receiver
aub_mem_dump
buffer
cl_hw_helper
command_queue
command_stream_receiver_simulated_common_hw
experimental_command_buffer

View File

@ -0,0 +1,27 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/helpers/populate_factory.h"
#include "opencl/source/helpers/cl_hw_helper_base.inl"
#include "hw_cmds.h"
namespace NEO {
using Family = ICLFamily;
static auto gfxCore = IGFX_GEN11_CORE;
template <>
void populateFactoryTable<ClHwHelperHw<Family>>() {
extern ClHwHelper *clHwHelperFactory[IGFX_MAX_CORE];
clHwHelperFactory[gfxCore] = &ClHwHelperHw<Family>::get();
}
template class ClHwHelperHw<Family>;
} // namespace NEO

View File

@ -12,6 +12,7 @@
#include "opencl/source/command_stream/aub_command_stream_receiver_hw.h"
#include "opencl/source/command_stream/tbx_command_stream_receiver_hw.h"
#include "opencl/source/device_queue/device_queue_hw.h"
#include "opencl/source/helpers/cl_hw_helper.h"
#include "opencl/source/mem_obj/buffer.h"
#include "opencl/source/mem_obj/image.h"
#include "opencl/source/sampler/sampler.h"
@ -23,13 +24,14 @@ typedef ICLFamily Family;
struct EnableOCLGen11 {
EnableOCLGen11() {
populateFactoryTable<AUBCommandStreamReceiverHw<Family>>();
populateFactoryTable<TbxCommandStreamReceiverHw<Family>>();
populateFactoryTable<CommandQueueHw<Family>>();
populateFactoryTable<DeviceQueueHw<Family>>();
populateFactoryTable<CommandStreamReceiverHw<Family>>();
populateFactoryTable<BufferHw<Family>>();
populateFactoryTable<ClHwHelperHw<Family>>();
populateFactoryTable<CommandQueueHw<Family>>();
populateFactoryTable<CommandStreamReceiverHw<Family>>();
populateFactoryTable<DeviceQueueHw<Family>>();
populateFactoryTable<ImageHw<Family>>();
populateFactoryTable<SamplerHw<Family>>();
populateFactoryTable<TbxCommandStreamReceiverHw<Family>>();
}
};

View File

@ -0,0 +1,27 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/helpers/populate_factory.h"
#include "opencl/source/helpers/cl_hw_helper_base.inl"
#include "hw_cmds.h"
namespace NEO {
using Family = TGLLPFamily;
static auto gfxCore = IGFX_GEN12LP_CORE;
template <>
void populateFactoryTable<ClHwHelperHw<Family>>() {
extern ClHwHelper *clHwHelperFactory[IGFX_MAX_CORE];
clHwHelperFactory[gfxCore] = &ClHwHelperHw<Family>::get();
}
template class ClHwHelperHw<Family>;
} // namespace NEO

View File

@ -12,6 +12,7 @@
#include "opencl/source/command_stream/aub_command_stream_receiver_hw.h"
#include "opencl/source/command_stream/tbx_command_stream_receiver_hw.h"
#include "opencl/source/device_queue/device_queue_hw.h"
#include "opencl/source/helpers/cl_hw_helper.h"
#include "opencl/source/mem_obj/buffer.h"
#include "opencl/source/mem_obj/image.h"
#include "opencl/source/sampler/sampler.h"
@ -23,13 +24,14 @@ typedef TGLLPFamily Family;
struct EnableOCLGen12LP {
EnableOCLGen12LP() {
populateFactoryTable<AUBCommandStreamReceiverHw<Family>>();
populateFactoryTable<TbxCommandStreamReceiverHw<Family>>();
populateFactoryTable<CommandQueueHw<Family>>();
populateFactoryTable<DeviceQueueHw<Family>>();
populateFactoryTable<CommandStreamReceiverHw<Family>>();
populateFactoryTable<BufferHw<Family>>();
populateFactoryTable<ClHwHelperHw<Family>>();
populateFactoryTable<CommandQueueHw<Family>>();
populateFactoryTable<CommandStreamReceiverHw<Family>>();
populateFactoryTable<DeviceQueueHw<Family>>();
populateFactoryTable<ImageHw<Family>>();
populateFactoryTable<SamplerHw<Family>>();
populateFactoryTable<TbxCommandStreamReceiverHw<Family>>();
}
};

View File

@ -0,0 +1,27 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/helpers/populate_factory.h"
#include "opencl/source/helpers/cl_hw_helper_base.inl"
#include "hw_cmds.h"
namespace NEO {
using Family = BDWFamily;
static auto gfxCore = IGFX_GEN8_CORE;
template <>
void populateFactoryTable<ClHwHelperHw<Family>>() {
extern ClHwHelper *clHwHelperFactory[IGFX_MAX_CORE];
clHwHelperFactory[gfxCore] = &ClHwHelperHw<Family>::get();
}
template class ClHwHelperHw<Family>;
} // namespace NEO

View File

@ -13,6 +13,7 @@
#include "opencl/source/command_stream/tbx_command_stream_receiver_hw.h"
#include "opencl/source/device_queue/device_queue_hw.h"
#include "opencl/source/event/perf_counter.h"
#include "opencl/source/helpers/cl_hw_helper.h"
#include "opencl/source/mem_obj/buffer.h"
#include "opencl/source/mem_obj/image.h"
#include "opencl/source/sampler/sampler.h"
@ -26,13 +27,14 @@ typedef BDWFamily Family;
struct EnableOCLGen8 {
EnableOCLGen8() {
populateFactoryTable<AUBCommandStreamReceiverHw<Family>>();
populateFactoryTable<TbxCommandStreamReceiverHw<Family>>();
populateFactoryTable<CommandQueueHw<Family>>();
populateFactoryTable<DeviceQueueHw<Family>>();
populateFactoryTable<CommandStreamReceiverHw<Family>>();
populateFactoryTable<BufferHw<Family>>();
populateFactoryTable<ClHwHelperHw<Family>>();
populateFactoryTable<CommandQueueHw<Family>>();
populateFactoryTable<CommandStreamReceiverHw<Family>>();
populateFactoryTable<DeviceQueueHw<Family>>();
populateFactoryTable<ImageHw<Family>>();
populateFactoryTable<SamplerHw<Family>>();
populateFactoryTable<TbxCommandStreamReceiverHw<Family>>();
}
};

View File

@ -0,0 +1,27 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/helpers/populate_factory.h"
#include "opencl/source/helpers/cl_hw_helper_base.inl"
#include "hw_cmds.h"
namespace NEO {
using Family = SKLFamily;
static auto gfxCore = IGFX_GEN9_CORE;
template <>
void populateFactoryTable<ClHwHelperHw<Family>>() {
extern ClHwHelper *clHwHelperFactory[IGFX_MAX_CORE];
clHwHelperFactory[gfxCore] = &ClHwHelperHw<Family>::get();
}
template class ClHwHelperHw<Family>;
} // namespace NEO

View File

@ -13,6 +13,7 @@
#include "opencl/source/command_stream/tbx_command_stream_receiver_hw.h"
#include "opencl/source/device_queue/device_queue_hw.h"
#include "opencl/source/event/perf_counter.h"
#include "opencl/source/helpers/cl_hw_helper.h"
#include "opencl/source/mem_obj/buffer.h"
#include "opencl/source/mem_obj/image.h"
#include "opencl/source/sampler/sampler.h"
@ -26,13 +27,14 @@ typedef SKLFamily Family;
struct EnableOCLGen9 {
EnableOCLGen9() {
populateFactoryTable<AUBCommandStreamReceiverHw<Family>>();
populateFactoryTable<TbxCommandStreamReceiverHw<Family>>();
populateFactoryTable<CommandQueueHw<Family>>();
populateFactoryTable<DeviceQueueHw<Family>>();
populateFactoryTable<CommandStreamReceiverHw<Family>>();
populateFactoryTable<BufferHw<Family>>();
populateFactoryTable<ClHwHelperHw<Family>>();
populateFactoryTable<CommandQueueHw<Family>>();
populateFactoryTable<CommandStreamReceiverHw<Family>>();
populateFactoryTable<DeviceQueueHw<Family>>();
populateFactoryTable<ImageHw<Family>>();
populateFactoryTable<SamplerHw<Family>>();
populateFactoryTable<TbxCommandStreamReceiverHw<Family>>();
}
};

View File

@ -13,6 +13,9 @@ set(RUNTIME_SRCS_HELPERS_BASE
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/cl_device_helpers.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cl_device_helpers.h
${CMAKE_CURRENT_SOURCE_DIR}/cl_helper.h
${CMAKE_CURRENT_SOURCE_DIR}/cl_hw_helper.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cl_hw_helper.h
${CMAKE_CURRENT_SOURCE_DIR}/cl_hw_helper_base.inl
${CMAKE_CURRENT_SOURCE_DIR}/convert_color.h
${CMAKE_CURRENT_SOURCE_DIR}/destructor_callback.h
${CMAKE_CURRENT_SOURCE_DIR}/dispatch_info.cpp

View File

@ -0,0 +1,18 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "opencl/source/helpers/cl_hw_helper.h"
namespace NEO {
ClHwHelper *clHwHelperFactory[IGFX_MAX_CORE] = {};
ClHwHelper &ClHwHelper::get(GFXCORE_FAMILY gfxCore) {
return *clHwHelperFactory[gfxCore];
}
} // namespace NEO

View File

@ -0,0 +1,44 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "igfxfmid.h"
namespace NEO {
struct KernelInfo;
class ClHwHelper {
public:
static ClHwHelper &get(GFXCORE_FAMILY gfxCore);
virtual bool requiresAuxResolves(const KernelInfo &kernelInfo) const = 0;
protected:
virtual bool hasStatelessAccessToBuffer(const KernelInfo &kernelInfo) const = 0;
ClHwHelper() = default;
};
template <typename GfxFamily>
class ClHwHelperHw : public ClHwHelper {
public:
static ClHwHelper &get() {
static ClHwHelperHw<GfxFamily> clHwHelper;
return clHwHelper;
}
bool requiresAuxResolves(const KernelInfo &kernelInfo) const override;
protected:
bool hasStatelessAccessToBuffer(const KernelInfo &kernelInfo) const override;
ClHwHelperHw() = default;
};
} // namespace NEO

View File

@ -0,0 +1,29 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "opencl/source/helpers/cl_hw_helper.h"
#include "opencl/source/program/kernel_info.h"
namespace NEO {
template <typename GfxFamily>
inline bool ClHwHelperHw<GfxFamily>::requiresAuxResolves(const KernelInfo &kernelInfo) const {
return hasStatelessAccessToBuffer(kernelInfo);
}
template <typename GfxFamily>
inline bool ClHwHelperHw<GfxFamily>::hasStatelessAccessToBuffer(const KernelInfo &kernelInfo) const {
bool hasStatelessAccessToBuffer = false;
for (uint32_t i = 0; i < kernelInfo.kernelArgInfo.size(); ++i) {
if (kernelInfo.kernelArgInfo[i].isBuffer) {
hasStatelessAccessToBuffer |= !kernelInfo.kernelArgInfo[i].pureStatefulBufferAccess;
}
}
return hasStatelessAccessToBuffer;
}
} // namespace NEO

View File

@ -32,6 +32,7 @@
#include "opencl/source/device_queue/device_queue.h"
#include "opencl/source/execution_model/device_enqueue.h"
#include "opencl/source/gtpin/gtpin_notify.h"
#include "opencl/source/helpers/cl_hw_helper.h"
#include "opencl/source/helpers/dispatch_info.h"
#include "opencl/source/helpers/get_info_status_mapper.h"
#include "opencl/source/helpers/per_thread_data.h"
@ -378,7 +379,8 @@ cl_int Kernel::initialize() {
}
}
auxTranslationRequired = HwHelper::renderCompressedBuffersSupported(hwInfo) && hwHelper.requiresAuxResolves(kernelInfo);
auto &clHwHelper = ClHwHelper::get(hwInfo.platform.eRenderCoreFamily);
auxTranslationRequired = HwHelper::renderCompressedBuffersSupported(hwInfo) && clHwHelper.requiresAuxResolves(kernelInfo);
if (DebugManager.flags.ForceAuxTranslationEnabled.get() != -1) {
auxTranslationRequired &= !!DebugManager.flags.ForceAuxTranslationEnabled.get();
}

View File

@ -8,6 +8,7 @@
#include "shared/source/os_interface/hw_info_config.h"
#include "shared/test/unit_test/helpers/debug_manager_state_restore.h"
#include "opencl/source/helpers/cl_hw_helper.h"
#include "opencl/test/unit_test/gen12lp/special_ult_helper_gen12lp.h"
#include "opencl/test/unit_test/helpers/hw_helper_tests.h"
#include "opencl/test/unit_test/mocks/mock_context.h"
@ -18,7 +19,7 @@
using HwHelperTestGen12Lp = HwHelperTest;
GEN12LPTEST_F(HwHelperTestGen12Lp, givenTglLpThenAuxTranslationIsRequired) {
auto &helper = HwHelper::get(renderCoreFamily);
auto &clHwHelper = ClHwHelper::get(renderCoreFamily);
for (auto isPureStateful : {false, true}) {
KernelInfo kernelInfo{};
@ -27,7 +28,7 @@ GEN12LPTEST_F(HwHelperTestGen12Lp, givenTglLpThenAuxTranslationIsRequired) {
argInfo.pureStatefulBufferAccess = isPureStateful;
kernelInfo.kernelArgInfo.push_back(std::move(argInfo));
EXPECT_EQ(!isPureStateful, helper.requiresAuxResolves(kernelInfo));
EXPECT_EQ(!isPureStateful, clHwHelper.requiresAuxResolves(kernelInfo));
}
}

View File

@ -18,6 +18,7 @@
#include "shared/test/unit_test/utilities/base_object_utils.h"
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
#include "opencl/source/helpers/cl_hw_helper.h"
#include "opencl/source/helpers/memory_properties_helpers.h"
#include "opencl/source/helpers/surface_formats.h"
#include "opencl/source/kernel/kernel.h"
@ -2732,11 +2733,11 @@ TEST(KernelTest, givenFtrRenderCompressedBuffersWhenInitializingArgsWithNonState
capabilityTable.ftrRenderCompressedBuffers = true;
kernel.mockKernel->initialize();
EXPECT_EQ(HwHelper::get(hwInfo->platform.eRenderCoreFamily).requiresAuxResolves(kernel.kernelInfo), kernel.mockKernel->isAuxTranslationRequired());
EXPECT_EQ(ClHwHelper::get(hwInfo->platform.eRenderCoreFamily).requiresAuxResolves(kernel.kernelInfo), kernel.mockKernel->isAuxTranslationRequired());
DebugManager.flags.ForceAuxTranslationEnabled.set(-1);
kernel.mockKernel->initialize();
EXPECT_EQ(HwHelper::get(hwInfo->platform.eRenderCoreFamily).requiresAuxResolves(kernel.kernelInfo), kernel.mockKernel->isAuxTranslationRequired());
EXPECT_EQ(ClHwHelper::get(hwInfo->platform.eRenderCoreFamily).requiresAuxResolves(kernel.kernelInfo), kernel.mockKernel->isAuxTranslationRequired());
DebugManager.flags.ForceAuxTranslationEnabled.set(0);
kernel.mockKernel->initialize();
@ -2761,7 +2762,7 @@ TEST(KernelTest, WhenAuxTranslationIsRequiredThenKernelSetsRequiredResolvesInCon
kernel.mockKernel->initialize();
if (HwHelper::get(device->getHardwareInfo().platform.eRenderCoreFamily).requiresAuxResolves(kernel.kernelInfo)) {
if (ClHwHelper::get(device->getHardwareInfo().platform.eRenderCoreFamily).requiresAuxResolves(kernel.kernelInfo)) {
EXPECT_TRUE(context->getResolvesRequiredInKernels());
} else {
EXPECT_FALSE(context->getResolvesRequiredInKernels());
@ -2807,7 +2808,7 @@ TEST(KernelTest, givenDebugVariableSetWhenKernelHasStatefulBufferAccessThenMarkK
kernel.mockKernel->initialize();
if (HwHelper::get(localHwInfo.platform.eRenderCoreFamily).requiresAuxResolves(kernel.kernelInfo)) {
if (ClHwHelper::get(localHwInfo.platform.eRenderCoreFamily).requiresAuxResolves(kernel.kernelInfo)) {
EXPECT_TRUE(kernel.mockKernel->isAuxTranslationRequired());
} else {
EXPECT_FALSE(kernel.mockKernel->isAuxTranslationRequired());

View File

@ -9,11 +9,11 @@
#include "shared/source/device/device_info.h"
#include "shared/source/helpers/hw_helper.h"
#include "shared/source/helpers/hw_info.h"
#include "shared/test/unit_test/helpers/default_hw_info.h"
#include "opencl/source/cl_device/cl_device.h"
#include "opencl/source/context/context.h"
#include "opencl/source/helpers/cl_hw_helper.h"
#include "opencl/source/kernel/kernel.h"
using namespace NEO;
@ -51,6 +51,6 @@ bool TestChecks::supportsAuxResolves() {
argInfo.pureStatefulBufferAccess = false;
kernelInfo.kernelArgInfo.push_back(std::move(argInfo));
auto &hwHelper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily);
return hwHelper.requiresAuxResolves(kernelInfo);
auto &clHwHelper = ClHwHelper::get(defaultHwInfo->platform.eRenderCoreFamily);
return clHwHelper.requiresAuxResolves(kernelInfo);
}

View File

@ -27,7 +27,6 @@ class GraphicsAllocation;
struct AllocationData;
struct AllocationProperties;
struct HardwareCapabilities;
struct KernelInfo;
struct RootDeviceEnvironment;
struct PipeControlArgs;
@ -100,7 +99,6 @@ class HwHelper {
virtual uint32_t getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const;
virtual uint32_t getMetricsLibraryGenId() const = 0;
virtual uint32_t getMocsIndex(const GmmHelper &gmmHelper, bool l3enabled, bool l1enabled) const = 0;
virtual bool requiresAuxResolves(const KernelInfo &kernelInfo) const = 0;
virtual bool tilingAllowed(bool isSharedContext, bool isImage1d, bool forceLinearStorage) = 0;
virtual uint32_t getBarriersCountFromHasBarriers(uint32_t hasBarriers) = 0;
virtual uint32_t calculateAvailableThreadCount(PRODUCT_FAMILY family, uint32_t grfCount, uint32_t euCount,
@ -135,7 +133,6 @@ class HwHelper {
protected:
virtual LocalMemoryAccessMode getDefaultLocalMemoryAccessMode(const HardwareInfo &hwInfo) const = 0;
virtual bool hasStatelessAccessToBuffer(const KernelInfo &kernelInfo) const = 0;
HwHelper() = default;
};
@ -253,8 +250,6 @@ class HwHelperHw : public HwHelper {
uint32_t getMocsIndex(const GmmHelper &gmmHelper, bool l3enabled, bool l1enabled) const override;
bool requiresAuxResolves(const KernelInfo &kernelInfo) const override;
bool tilingAllowed(bool isSharedContext, bool isImage1d, bool forceLinearStorage) override;
uint32_t getBarriersCountFromHasBarriers(uint32_t hasBarriers) override;
@ -317,7 +312,6 @@ class HwHelperHw : public HwHelper {
protected:
LocalMemoryAccessMode getDefaultLocalMemoryAccessMode(const HardwareInfo &hwInfo) const override;
bool hasStatelessAccessToBuffer(const KernelInfo &kernelInfo) const override;
static const AuxTranslationMode defaultAuxTranslationMode;
HwHelperHw() = default;

View File

@ -315,11 +315,6 @@ uint32_t HwHelperHw<GfxFamily>::getMetricsLibraryGenId() const {
return static_cast<uint32_t>(MetricsLibraryApi::ClientGen::Gen9);
}
template <typename GfxFamily>
inline bool HwHelperHw<GfxFamily>::requiresAuxResolves(const KernelInfo &kernelInfo) const {
return hasStatelessAccessToBuffer(kernelInfo);
}
template <typename GfxFamily>
bool HwHelperHw<GfxFamily>::tilingAllowed(bool isSharedContext, bool isImage1d, bool forceLinearStorage) {
if (DebugManager.flags.ForceLinearImages.get() || forceLinearStorage || isSharedContext) {
@ -452,17 +447,6 @@ inline LocalMemoryAccessMode HwHelperHw<GfxFamily>::getDefaultLocalMemoryAccessM
return LocalMemoryAccessMode::Default;
}
template <typename GfxFamily>
inline bool HwHelperHw<GfxFamily>::hasStatelessAccessToBuffer(const KernelInfo &kernelInfo) const {
bool hasStatelessAccessToBuffer = false;
for (uint32_t i = 0; i < kernelInfo.kernelArgInfo.size(); ++i) {
if (kernelInfo.kernelArgInfo[i].isBuffer) {
hasStatelessAccessToBuffer |= !kernelInfo.kernelArgInfo[i].pureStatefulBufferAccess;
}
}
return hasStatelessAccessToBuffer;
}
template <typename GfxFamily>
size_t MemorySynchronizationCommands<GfxFamily>::getSizeForFullCacheFlush() {
return sizeof(typename GfxFamily::PIPE_CONTROL);