Add helper function to enable stateless compression

Related-To: NEO-5107

Signed-off-by: Milczarek, Slawomir <slawomir.milczarek@intel.com>
This commit is contained in:
Milczarek, Slawomir
2021-08-02 10:44:48 +00:00
committed by Compute-Runtime-Automation
parent d99f20d400
commit eb14d8458b
38 changed files with 133 additions and 76 deletions

View File

@@ -847,7 +847,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
}
}
auto memoryCompressionState = getGpgpuCommandStreamReceiver().getMemoryCompressionState(auxTranslationRequired);
auto memoryCompressionState = getGpgpuCommandStreamReceiver().getMemoryCompressionState(auxTranslationRequired, device->getHardwareInfo());
DispatchFlags dispatchFlags(
{}, //csrDependencies

View File

@@ -28,8 +28,8 @@ class ClHwHelper {
public:
static ClHwHelper &get(GFXCORE_FAMILY gfxCore);
virtual bool requiresNonAuxMode(const ArgDescPointer &argAsPtr) const = 0;
virtual bool requiresAuxResolves(const KernelInfo &kernelInfo) const = 0;
virtual bool requiresNonAuxMode(const ArgDescPointer &argAsPtr, const HardwareInfo &hwInfo) const = 0;
virtual bool requiresAuxResolves(const KernelInfo &kernelInfo, const HardwareInfo &hwInfo) const = 0;
virtual bool allowRenderCompressionForContext(const ClDevice &clDevice, const Context &context) const = 0;
virtual cl_command_queue_capabilities_intel getAdditionalDisabledQueueFamilyCapabilities(EngineGroupType type) const = 0;
virtual bool getQueueFamilyName(std::string &name, EngineGroupType type) const = 0;
@@ -57,8 +57,8 @@ class ClHwHelperHw : public ClHwHelper {
return clHwHelper;
}
bool requiresNonAuxMode(const ArgDescPointer &argAsPtr) const override;
bool requiresAuxResolves(const KernelInfo &kernelInfo) const override;
bool requiresNonAuxMode(const ArgDescPointer &argAsPtr, const HardwareInfo &hwInfo) const override;
bool requiresAuxResolves(const KernelInfo &kernelInfo, const HardwareInfo &hwInfo) const override;
bool allowRenderCompressionForContext(const ClDevice &clDevice, const Context &context) const override;
cl_command_queue_capabilities_intel getAdditionalDisabledQueueFamilyCapabilities(EngineGroupType type) const override;
bool getQueueFamilyName(std::string &name, EngineGroupType type) const override;

View File

@@ -14,12 +14,12 @@
namespace NEO {
template <typename GfxFamily>
inline bool ClHwHelperHw<GfxFamily>::requiresNonAuxMode(const ArgDescPointer &argAsPtr) const {
inline bool ClHwHelperHw<GfxFamily>::requiresNonAuxMode(const ArgDescPointer &argAsPtr, const HardwareInfo &hwInfo) const {
return !argAsPtr.isPureStateful();
}
template <typename GfxFamily>
inline bool ClHwHelperHw<GfxFamily>::requiresAuxResolves(const KernelInfo &kernelInfo) const {
inline bool ClHwHelperHw<GfxFamily>::requiresAuxResolves(const KernelInfo &kernelInfo, const HardwareInfo &hwInfo) const {
return hasStatelessAccessToBuffer(kernelInfo);
}

View File

@@ -215,7 +215,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
const auto &kernelDescriptor = kernel->getKernelInfo().kernelDescriptor;
auto memoryCompressionState = commandStreamReceiver.getMemoryCompressionState(kernel->isAuxTranslationRequired());
auto memoryCompressionState = commandStreamReceiver.getMemoryCompressionState(kernel->isAuxTranslationRequired(), commandQueue.getDevice().getHardwareInfo());
DispatchFlags dispatchFlags(
{}, //csrDependencies

View File

@@ -261,7 +261,7 @@ cl_int Kernel::initialize() {
patchBlocksSimdSize();
auto &clHwHelper = ClHwHelper::get(hwInfo.platform.eRenderCoreFamily);
auxTranslationRequired = HwHelper::renderCompressedBuffersSupported(hwInfo) && clHwHelper.requiresAuxResolves(kernelInfo);
auxTranslationRequired = HwHelper::renderCompressedBuffersSupported(hwInfo) && clHwHelper.requiresAuxResolves(kernelInfo, hwInfo);
if (DebugManager.flags.ForceAuxTranslationEnabled.get() != -1) {
auxTranslationRequired &= !!DebugManager.flags.ForceAuxTranslationEnabled.get();
}
@@ -895,7 +895,7 @@ cl_int Kernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocatio
}
disableL3 = (argIndex == 0);
} else if (svmAlloc && svmAlloc->getAllocationType() == GraphicsAllocation::AllocationType::BUFFER_COMPRESSED &&
clHwHelper.requiresNonAuxMode(argAsPtr)) {
clHwHelper.requiresNonAuxMode(argAsPtr, hwInfo)) {
forceNonAuxMode = true;
}
@@ -1401,7 +1401,7 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex,
}
disableL3 = (argIndex == 0);
} else if (graphicsAllocation->getAllocationType() == GraphicsAllocation::AllocationType::BUFFER_COMPRESSED &&
clHwHelper.requiresNonAuxMode(argAsPtr)) {
clHwHelper.requiresNonAuxMode(argAsPtr, hwInfo)) {
forceNonAuxMode = true;
}
@@ -2468,7 +2468,8 @@ void Kernel::fillWithKernelObjsForAuxTranslation(KernelObjsForAuxTranslation &ke
}
}
}
if (DebugManager.flags.EnableStatelessCompression.get()) {
auto &hwHelper = HwHelper::get(getDevice().getHardwareInfo().platform.eRenderCoreFamily);
if (hwHelper.allowStatelessCompression(getDevice().getHardwareInfo())) {
for (auto gfxAllocation : kernelUnifiedMemoryGfxAllocations) {
if ((gfxAllocation->getAllocationType() == GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) ||
(gfxAllocation->getAllocationType() == GraphicsAllocation::AllocationType::SVM_GPU)) {
@@ -2788,7 +2789,8 @@ bool Kernel::requiresLimitedWorkgroupSize() const {
}
void Kernel::updateAuxTranslationRequired() {
if (DebugManager.flags.EnableStatelessCompression.get()) {
auto &hwHelper = HwHelper::get(getDevice().getHardwareInfo().platform.eRenderCoreFamily);
if (hwHelper.allowStatelessCompression(getDevice().getHardwareInfo())) {
if (hasDirectStatelessAccessToHostMemory() || hasIndirectStatelessAccessToHostMemory()) {
setAuxTranslationRequired(true);
}

View File

@@ -190,7 +190,7 @@ Buffer *Buffer::create(Context *context,
*context,
HwHelper::renderCompressedBuffersSupported(*hwInfo),
memoryManager->isLocalMemorySupported(rootDeviceIndex),
HwHelper::get(hwInfo->platform.eRenderCoreFamily).isBufferSizeSuitableForRenderCompression(size));
HwHelper::get(hwInfo->platform.eRenderCoreFamily).isBufferSizeSuitableForRenderCompression(size, *hwInfo));
if (ptr) {
if (!memoryProperties.flags.useHostPtr) {

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020 Intel Corporation
* Copyright (C) 2020-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -8,7 +8,7 @@
#include "shared/source/memory_manager/compression_selector.h"
namespace NEO {
bool CompressionSelector::preferRenderCompressedBuffer(const AllocationProperties &properties) {
bool CompressionSelector::preferRenderCompressedBuffer(const AllocationProperties &properties, const HardwareInfo &hwInfo) {
return GraphicsAllocation::AllocationType::BUFFER_COMPRESSED == properties.allocationType;
}

View File

@@ -76,7 +76,8 @@ void PrintfHandler::makeResident(CommandStreamReceiver &commandStreamReceiver) {
}
void PrintfHandler::printEnqueueOutput() {
if (DebugManager.flags.EnableStatelessCompression.get()) {
auto &helper = HwHelper::get(device.getHardwareInfo().platform.eRenderCoreFamily);
if (helper.allowStatelessCompression(device.getHardwareInfo())) {
auto &bcsEngine = device.getEngine(EngineHelpers::getBcsEngineType(device.getHardwareInfo(), device.getSelectorCopyEngine()), EngineUsage::Regular);
BlitPropertiesContainer blitPropertiesContainer;
blitPropertiesContainer.push_back(

View File

@@ -26,8 +26,8 @@ void populateFactoryTable<ClHwHelperHw<Family>>() {
}
template <>
bool ClHwHelperHw<Family>::requiresNonAuxMode(const ArgDescPointer &argAsPtr) const {
if (DebugManager.flags.EnableStatelessCompression.get()) {
bool ClHwHelperHw<Family>::requiresNonAuxMode(const ArgDescPointer &argAsPtr, const HardwareInfo &hwInfo) const {
if (HwHelperHw<Family>::get().allowStatelessCompression(hwInfo)) {
return false;
} else {
return !argAsPtr.isPureStateful();
@@ -35,8 +35,8 @@ bool ClHwHelperHw<Family>::requiresNonAuxMode(const ArgDescPointer &argAsPtr) co
}
template <>
bool ClHwHelperHw<Family>::requiresAuxResolves(const KernelInfo &kernelInfo) const {
if (DebugManager.flags.EnableStatelessCompression.get()) {
bool ClHwHelperHw<Family>::requiresAuxResolves(const KernelInfo &kernelInfo, const HardwareInfo &hwInfo) const {
if (HwHelperHw<Family>::get().allowStatelessCompression(hwInfo)) {
return false;
} else {
return hasStatelessAccessToBuffer(kernelInfo);