Add debug flag to enable compression in L0 USM allocations

Related-To: NEO-5069

Change-Id: Icbfeb8d72cd764bb3c90d5c699998455f81dd3ee
Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2020-09-11 15:44:09 +02:00
committed by sys_ocldev
parent cbf838e5ab
commit 7d506e3608
18 changed files with 165 additions and 46 deletions

View File

@ -64,6 +64,7 @@ set(L0_RUNTIME_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/memory/memory.cpp
${CMAKE_CURRENT_SOURCE_DIR}/memory/memory_operations_helper.h
${CMAKE_CURRENT_SOURCE_DIR}/memory/cpu_page_fault_memory_manager.cpp
${CMAKE_CURRENT_SOURCE_DIR}/memory_manager/compression_selector_l0.cpp
${CMAKE_CURRENT_SOURCE_DIR}/module/module.h
${CMAKE_CURRENT_SOURCE_DIR}/module/module_build_log.cpp
${CMAKE_CURRENT_SOURCE_DIR}/module/module_build_log.h

View File

@ -0,0 +1,23 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/memory_manager/compression_selector.h"
namespace NEO {
bool CompressionSelector::preferRenderCompressedBuffer(const AllocationProperties &properties) {
bool preferredCompression = false;
int32_t compressionEnabled = DebugManager.flags.EnableUsmCompression.get();
if (compressionEnabled == 1) {
if ((properties.allocationType == GraphicsAllocation::AllocationType::SVM_GPU) ||
(properties.flags.isUSMDeviceAllocation)) {
preferredCompression = true;
}
}
return preferredCompression;
}
} // namespace NEO

View File

@ -0,0 +1,11 @@
#
# Copyright (C) 2020 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
target_sources(${TARGET_NAME} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/compression_selector_l0_tests.cpp
)
add_subdirectories()

View File

@ -0,0 +1,77 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/memory_manager/compression_selector.h"
#include "shared/test/unit_test/helpers/debug_manager_state_restore.h"
#include "test.h"
namespace L0 {
namespace ult {
TEST(CompressionSelectorL0Tests, GivenDefaultDebugFlagWhenProvidingUsmAllocationThenExpectCompressionDisabled) {
DeviceBitfield deviceBitfield{0x0};
AllocationProperties properties(0, MemoryConstants::pageSize,
GraphicsAllocation::AllocationType::BUFFER,
deviceBitfield);
properties.flags.isUSMDeviceAllocation = 1u;
EXPECT_FALSE(NEO::CompressionSelector::preferRenderCompressedBuffer(properties));
}
TEST(CompressionSelectorL0Tests, GivenDisabledDebugFlagWhenProvidingUsmAllocationThenExpectCompressionDisabled) {
DebugManagerStateRestore dbgRestorer;
DebugManager.flags.EnableUsmCompression.set(0);
DeviceBitfield deviceBitfield{0x0};
AllocationProperties properties(0, MemoryConstants::pageSize,
GraphicsAllocation::AllocationType::BUFFER,
deviceBitfield);
properties.flags.isUSMDeviceAllocation = 1u;
EXPECT_FALSE(NEO::CompressionSelector::preferRenderCompressedBuffer(properties));
}
TEST(CompressionSelectorL0Tests, GivenEnabledDebugFlagWhenProvidingUsmAllocationThenExpectCompressionEnabled) {
DebugManagerStateRestore dbgRestorer;
DebugManager.flags.EnableUsmCompression.set(1);
DeviceBitfield deviceBitfield{0x0};
AllocationProperties properties(0, MemoryConstants::pageSize,
GraphicsAllocation::AllocationType::BUFFER,
deviceBitfield);
properties.flags.isUSMDeviceAllocation = 1u;
EXPECT_TRUE(NEO::CompressionSelector::preferRenderCompressedBuffer(properties));
}
TEST(CompressionSelectorL0Tests, GivenEnabledDebugFlagWhenProvidingSvmGpuAllocationThenExpectCompressionEnabled) {
DebugManagerStateRestore dbgRestorer;
DebugManager.flags.EnableUsmCompression.set(1);
DeviceBitfield deviceBitfield{0x0};
AllocationProperties properties(0, MemoryConstants::pageSize,
GraphicsAllocation::AllocationType::SVM_GPU,
deviceBitfield);
EXPECT_TRUE(NEO::CompressionSelector::preferRenderCompressedBuffer(properties));
}
TEST(CompressionSelectorL0Tests, GivenEnabledDebugFlagWhenProvidingOtherAllocationThenExpectCompressionDisabled) {
DebugManagerStateRestore dbgRestorer;
DebugManager.flags.EnableUsmCompression.set(1);
DeviceBitfield deviceBitfield{0x0};
AllocationProperties properties(0, MemoryConstants::pageSize,
GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY,
deviceBitfield);
EXPECT_FALSE(NEO::CompressionSelector::preferRenderCompressedBuffer(properties));
}
} // namespace ult
} // namespace L0

View File

@ -8,6 +8,7 @@ set(RUNTIME_SRCS_MEMORY_MANAGER
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/address_mapper.cpp
${CMAKE_CURRENT_SOURCE_DIR}/address_mapper.h
${CMAKE_CURRENT_SOURCE_DIR}/compression_selector_ocl.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cpu_page_fault_manager_memory_sync.cpp
${CMAKE_CURRENT_SOURCE_DIR}/mem_obj_surface.h
${CMAKE_CURRENT_SOURCE_DIR}/memory_banks.h

View File

@ -0,0 +1,15 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/memory_manager/compression_selector.h"
namespace NEO {
bool CompressionSelector::preferRenderCompressedBuffer(const AllocationProperties &properties) {
return GraphicsAllocation::AllocationType::BUFFER_COMPRESSED == properties.allocationType;
}
} // namespace NEO

View File

@ -460,38 +460,6 @@ HWTEST_F(HwHelperTest, givenCreatedSurfaceStateBufferWhenGmmAndAllocationCompres
alignedFree(stateBuffer);
}
HWTEST_F(HwHelperTest, givenCreatedSurfaceStateBufferWhenGmmCompressionEnabledAndAllocationDisabledAnNonAuxDisabledThenSetCoherencyToIaAndAuxModeToNone) {
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE;
using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE;
auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment();
void *stateBuffer = alignedMalloc(sizeof(RENDER_SURFACE_STATE), sizeof(RENDER_SURFACE_STATE));
ASSERT_NE(nullptr, stateBuffer);
RENDER_SURFACE_STATE *state = reinterpret_cast<RENDER_SURFACE_STATE *>(stateBuffer);
memset(stateBuffer, 0, sizeof(RENDER_SURFACE_STATE));
auto &helper = HwHelper::get(renderCoreFamily);
size_t size = 0x1000;
uint64_t addr = 0x2000;
uint32_t pitch = 0;
void *cpuAddr = reinterpret_cast<void *>(0x4000);
uint64_t gpuAddr = 0x4000u;
size_t allocSize = size;
GraphicsAllocation allocation(0, GraphicsAllocation::AllocationType::UNKNOWN, cpuAddr, gpuAddr, 0u, allocSize, MemoryPool::MemoryNull, 1);
allocation.setDefaultGmm(new Gmm(rootDeviceEnvironment.getGmmClientContext(), allocation.getUnderlyingBuffer(), allocation.getUnderlyingBufferSize(), false));
allocation.getDefaultGmm()->isRenderCompressed = true;
SURFACE_TYPE type = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER;
helper.setRenderSurfaceStateForBuffer(rootDeviceEnvironment, stateBuffer, size, addr, 0, pitch, &allocation, false, type, false, false);
EXPECT_EQ(RENDER_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT, state->getCoherencyType());
EXPECT_EQ(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE, state->getAuxiliarySurfaceMode());
delete allocation.getDefaultGmm();
alignedFree(stateBuffer);
}
HWTEST_F(HwHelperTest, givenCreatedSurfaceStateBufferWhenGmmCompressionDisabledAndAllocationEnabledAnNonAuxDisabledThenSetCoherencyToIaAndAuxModeToNone) {
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE;

View File

@ -1551,10 +1551,6 @@ HWTEST_F(BufferSetSurfaceTests, givenRenderCompressedGmmResourceWhenSurfaceState
EXPECT_EQ(0u, surfaceState.getAuxiliarySurfaceBaseAddress());
EXPECT_TRUE(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E == surfaceState.getAuxiliarySurfaceMode());
EXPECT_TRUE(RENDER_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT == surfaceState.getCoherencyType());
graphicsAllocation->setAllocationType(GraphicsAllocation::AllocationType::BUFFER);
buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice());
EXPECT_TRUE(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE == surfaceState.getAuxiliarySurfaceMode());
}
HWTEST_F(BufferSetSurfaceTests, givenNonRenderCompressedGmmResourceWhenSurfaceStateIsProgrammedThenDontSetAuxParams) {

View File

@ -188,4 +188,5 @@ ForceUserptrAlignment = -1
UseExternalAllocatorForSshAndDsh = 0
DirectSubmissionOverrideBlitterSupport = -1
DirectSubmissionOverrideRenderSupport = -1
DirectSubmissionOverrideComputeSupport = -1
DirectSubmissionOverrideComputeSupport = -1
EnableUsmCompression = -1

View File

@ -294,6 +294,7 @@ void EncodeSurfaceState<Family>::encodeBuffer(void *dst, uint64_t address, size_
: R_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT);
ss->setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE);
}
template <typename Family>
void EncodeSurfaceState<Family>::encodeExtraBufferParams(GraphicsAllocation *allocation, GmmHelper *gmmHelper, void *memory, bool forceNonAuxMode, bool isReadOnlyArgument) {
using RENDER_SURFACE_STATE = typename Family::RENDER_SURFACE_STATE;
@ -302,8 +303,7 @@ void EncodeSurfaceState<Family>::encodeExtraBufferParams(GraphicsAllocation *all
auto surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(memory);
Gmm *gmm = allocation ? allocation->getDefaultGmm() : nullptr;
if (gmm && gmm->isRenderCompressed && !forceNonAuxMode &&
GraphicsAllocation::AllocationType::BUFFER_COMPRESSED == allocation->getAllocationType()) {
if (gmm && gmm->isRenderCompressed && !forceNonAuxMode) {
// Its expected to not program pitch/qpitch/baseAddress for Aux surface in CCS scenarios
surfaceState->setCoherencyType(RENDER_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT);
surfaceState->setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E);

View File

@ -77,6 +77,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, OverrideLeastOccupiedBank, -1, "-1: default, >=
DECLARE_DEBUG_VARIABLE(int32_t, OverrideRevision, -1, "-1: default, >=0: Revision id")
DECLARE_DEBUG_VARIABLE(int32_t, ForceCacheFlushForBcs, -1, "Force cache flush from gpgpu engine before dispatching BCS copy. -1: default, 1: enabled, 0: disabled")
DECLARE_DEBUG_VARIABLE(int32_t, ForceGpgpuSubmissionForBcsEnqueue, -1, "-1: Default, 1: Submit gpgpu command buffer with cache flushing and completion synchronization, 0: Do nothing, if possible")
DECLARE_DEBUG_VARIABLE(int32_t, EnableUsmCompression, -1, "enable compression support for L0 USM Device and Shared Device side: -1 default, 0: disable, 1: enable")
/*LOGGING FLAGS*/
DECLARE_DEBUG_VARIABLE(int32_t, PrintDriverDiagnostics, -1, "prints driver diagnostics messages to standard output, value corresponds to hint level")

View File

@ -151,8 +151,7 @@ void HwHelperHw<Family>::setRenderSurfaceStateForBuffer(const RootDeviceEnvironm
state.setSurfaceBaseAddress(bufferStateAddress);
Gmm *gmm = gfxAlloc ? gfxAlloc->getDefaultGmm() : nullptr;
if (gmm && gmm->isRenderCompressed && !forceNonAuxMode &&
GraphicsAllocation::AllocationType::BUFFER_COMPRESSED == gfxAlloc->getAllocationType()) {
if (gmm && gmm->isRenderCompressed && !forceNonAuxMode) {
// Its expected to not program pitch/qpitch/baseAddress for Aux surface in CCS scenarios
state.setCoherencyType(RENDER_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT);
state.setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E);

View File

@ -8,6 +8,7 @@ set(NEO_CORE_MEMORY_MANAGER
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/allocations_list.h
${CMAKE_CURRENT_SOURCE_DIR}/allocation_properties.h
${CMAKE_CURRENT_SOURCE_DIR}/compression_selector.h
${CMAKE_CURRENT_SOURCE_DIR}/deferrable_allocation_deletion.h
${CMAKE_CURRENT_SOURCE_DIR}/deferrable_allocation_deletion.cpp
${CMAKE_CURRENT_SOURCE_DIR}/deferrable_deletion.h

View File

@ -24,7 +24,8 @@ struct AllocationProperties {
uint32_t shareable : 1;
uint32_t resource48Bit : 1;
uint32_t isUSMHostAllocation : 1;
uint32_t reserved : 22;
uint32_t isUSMDeviceAllocation : 1;
uint32_t reserved : 21;
} flags;
uint32_t allFlags = 0;
};

View File

@ -0,0 +1,18 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/memory_manager/allocation_properties.h"
#include "shared/source/memory_manager/graphics_allocation.h"
namespace NEO {
class CompressionSelector {
public:
static bool preferRenderCompressedBuffer(const AllocationProperties &properties);
};
} // namespace NEO

View File

@ -21,6 +21,7 @@
#include "shared/source/helpers/hw_info.h"
#include "shared/source/helpers/string.h"
#include "shared/source/helpers/surface_format_info.h"
#include "shared/source/memory_manager/compression_selector.h"
#include "shared/source/memory_manager/deferrable_allocation_deletion.h"
#include "shared/source/memory_manager/deferred_deleter.h"
#include "shared/source/memory_manager/host_ptr_manager.h"
@ -369,7 +370,7 @@ bool MemoryManager::getAllocationData(AllocationData &allocationData, const Allo
allocationData.flags.uncacheable = properties.flags.uncacheable;
allocationData.flags.flushL3 =
(mayRequireL3Flush ? properties.flags.flushL3RequiredForRead | properties.flags.flushL3RequiredForWrite : 0u);
allocationData.flags.preferRenderCompressed = GraphicsAllocation::AllocationType::BUFFER_COMPRESSED == properties.allocationType;
allocationData.flags.preferRenderCompressed = CompressionSelector::preferRenderCompressedBuffer(properties);
allocationData.flags.multiOsContextCapable = properties.flags.multiOsContextCapable;
allocationData.hostPtr = hostPtr;

View File

@ -180,6 +180,7 @@ void *SVMAllocsManager::createUnifiedMemoryAllocation(uint32_t rootDeviceIndex,
memoryProperties.subdeviceBitfield.count() > 1,
memoryProperties.subdeviceBitfield};
unifiedMemoryProperties.flags.shareable = memoryProperties.allocationFlags.flags.shareable;
unifiedMemoryProperties.flags.isUSMDeviceAllocation = true;
GraphicsAllocation *unifiedMemoryAllocation = memoryManager->allocateGraphicsMemoryWithProperties(unifiedMemoryProperties);
if (!unifiedMemoryAllocation) {

View File

@ -133,14 +133,18 @@ GraphicsAllocation *DrmMemoryManager::allocateGraphicsMemoryInDevicePool(const A
gmm = std::make_unique<Gmm>(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getGmmClientContext(), *allocationData.imgInfo, allocationData.storageInfo);
sizeAligned = alignUp(allocationData.imgInfo->size, MemoryConstants::pageSize64k);
} else {
bool preferRenderCompressed = (allocationData.type == GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
if (allocationData.type == GraphicsAllocation::AllocationType::WRITE_COMBINED) {
sizeAligned = alignUp(allocationData.size + MemoryConstants::pageSize64k, 2 * MemoryConstants::megaByte) + 2 * MemoryConstants::megaByte;
} else {
sizeAligned = alignUp(allocationData.size, MemoryConstants::pageSize64k);
}
gmm = std::make_unique<Gmm>(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getGmmClientContext(), nullptr, sizeAligned, allocationData.flags.uncacheable,
preferRenderCompressed, false, allocationData.storageInfo);
gmm = std::make_unique<Gmm>(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getGmmClientContext(),
nullptr,
sizeAligned,
allocationData.flags.uncacheable,
allocationData.flags.preferRenderCompressed,
false,
allocationData.storageInfo);
}
auto sizeAllocated = sizeAligned;