Support for bindless mode in L0 - improvements

Related-To: NEO-6448

- add new IGC compilation flag when bindless mode used
- fix SBA programming of BindlessSurfaceStateSize -
always set maximum surface state count
- fix residency of global DSH heap on gen9 - gen12lp
in bindless mode
- add L0 aub test with bindless kernel - disabled
- partial fixes in OCL aub tests


Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
Mateusz Hoppe 2021-11-30 22:59:19 +00:00 committed by Compute-Runtime-Automation
parent 55959d4d1d
commit 8b233f7f45
21 changed files with 352 additions and 19 deletions

View File

@ -0,0 +1,10 @@
#
# Copyright (C) 2021 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
target_sources(ze_intel_gpu_aub_tests PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/bindless_kernel_aub_tests.cpp
)

View File

@ -0,0 +1,115 @@
/*
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/helpers/array_count.h"
#include "shared/source/helpers/file_io.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/test_files.h"
#include "test.h"
#include "level_zero/core/source/module/module_imp.h"
#include "level_zero/core/test/aub_tests/fixtures/aub_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_driver_handle.h"
namespace L0 {
namespace ult {
struct L0BindlessAub : Test<AUBFixtureL0> {
void SetUp() {
DebugManager.flags.UseBindlessMode.set(1);
DebugManager.flags.UseExternalAllocatorForSshAndDsh.set(1);
AUBFixtureL0::SetUp();
}
void TearDown() {
module->destroy();
AUBFixtureL0::TearDown();
}
void createModuleFromFile(const std::string &fileName, ze_context_handle_t context, L0::Device *device) {
std::string testFile;
retrieveBinaryKernelFilenameNoRevision(testFile, fileName + "_", ".bin");
size_t size = 0;
auto src = loadDataFromFile(
testFile.c_str(),
size);
ASSERT_NE(0u, size);
ASSERT_NE(nullptr, src);
ze_module_desc_t moduleDesc = {ZE_STRUCTURE_TYPE_MODULE_DESC};
moduleDesc.format = ZE_MODULE_FORMAT_NATIVE;
moduleDesc.pInputModule = reinterpret_cast<const uint8_t *>(src.get());
moduleDesc.inputSize = size;
moduleDesc.pBuildFlags = "";
module = new ModuleImp(device, nullptr, ModuleType::User);
bool success = module->initialize(&moduleDesc, device->getNEODevice());
ASSERT_TRUE(success);
}
DebugManagerStateRestore restorer;
ModuleImp *module = nullptr;
};
HWTEST_F(L0BindlessAub, DISABLED_GivenBindlessKernelWhenExecutedThenOutputIsCorrect) {
constexpr size_t bufferSize = MemoryConstants::pageSize;
const uint32_t groupSize[] = {32, 1, 1};
const uint32_t groupCount[] = {bufferSize / 32, 1, 1};
NEO::SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY,
context->rootDeviceIndices,
context->deviceBitfields);
auto bufferSrc = driverHandle->svmAllocsManager->createHostUnifiedMemoryAllocation(bufferSize, unifiedMemoryProperties);
memset(bufferSrc, 55, bufferSize);
auto bufferDst = driverHandle->svmAllocsManager->createHostUnifiedMemoryAllocation(bufferSize, unifiedMemoryProperties);
memset(bufferDst, 0, bufferSize);
auto simulatedCsr = AUBFixtureL0::getSimulatedCsr<FamilyType>();
simulatedCsr->initializeEngine();
simulatedCsr->writeMemory(*driverHandle->svmAllocsManager->getSVMAlloc(bufferSrc)->gpuAllocations.getDefaultGraphicsAllocation());
simulatedCsr->writeMemory(*driverHandle->svmAllocsManager->getSVMAlloc(bufferDst)->gpuAllocations.getDefaultGraphicsAllocation());
ze_group_count_t dispatchTraits;
dispatchTraits.groupCountX = groupCount[0];
dispatchTraits.groupCountY = groupCount[1];
dispatchTraits.groupCountZ = groupCount[2];
createModuleFromFile("bindless_stateful_copy_buffer", context, device);
ze_kernel_handle_t kernel;
ze_kernel_desc_t kernelDesc = {ZE_STRUCTURE_TYPE_KERNEL_DESC};
kernelDesc.pKernelName = "StatefulCopyBuffer";
EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelCreate(module->toHandle(), &kernelDesc, &kernel));
EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelSetArgumentValue(kernel, 0, sizeof(void *), &bufferSrc));
EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelSetArgumentValue(kernel, 1, sizeof(void *), &bufferDst));
EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelSetGroupSize(kernel, groupSize[0], groupSize[1], groupSize[2]));
ze_command_list_handle_t cmdListHandle = commandList->toHandle();
EXPECT_EQ(ZE_RESULT_SUCCESS, zeCommandListAppendLaunchKernel(cmdListHandle, kernel, &dispatchTraits, nullptr, 0, nullptr));
commandList->close();
pCmdq->executeCommandLists(1, &cmdListHandle, nullptr, false);
pCmdq->synchronize(std::numeric_limits<uint32_t>::max());
expectMemory<FamilyType>(reinterpret_cast<void *>(driverHandle->svmAllocsManager->getSVMAlloc(bufferDst)->gpuAllocations.getDefaultGraphicsAllocation()->getGpuAddress()),
bufferSrc, bufferSize);
EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelDestroy(kernel));
driverHandle->svmAllocsManager->freeSVMAlloc(bufferSrc);
driverHandle->svmAllocsManager->freeSVMAlloc(bufferDst);
}
} // namespace ult
} // namespace L0

View File

@ -5,6 +5,12 @@
*
*/
#include "shared/source/command_stream/command_stream_receiver_simulated_common_hw.h"
#include "shared/source/command_stream/command_stream_receiver_with_aub_dump.h"
#include "shared/source/command_stream/tbx_command_stream_receiver_hw.h"
#include "test_mode.h"
#include <cstdint>
#include <memory>
#include <string>
@ -39,6 +45,43 @@ class AUBFixtureL0 {
void TearDown();
static void prepareCopyEngines(NEO::MockDevice &device, const std::string &filename);
template <typename FamilyType>
NEO::CommandStreamReceiverSimulatedCommonHw<FamilyType> *getSimulatedCsr() const {
return static_cast<NEO::CommandStreamReceiverSimulatedCommonHw<FamilyType> *>(csr);
}
template <typename FamilyType>
void expectMemory(void *gfxAddress, const void *srcAddress, size_t length) {
NEO::CommandStreamReceiverSimulatedCommonHw<FamilyType> *csrSimulated = getSimulatedCsr<FamilyType>();
if (NEO::testMode == NEO::TestMode::AubTestsWithTbx) {
auto tbxCsr = csrSimulated;
EXPECT_TRUE(tbxCsr->expectMemoryEqual(gfxAddress, srcAddress, length));
csrSimulated = static_cast<NEO::CommandStreamReceiverSimulatedCommonHw<FamilyType> *>(
static_cast<NEO::CommandStreamReceiverWithAUBDump<NEO::TbxCommandStreamReceiverHw<FamilyType>> *>(csr)->aubCSR.get());
}
if (csrSimulated) {
csrSimulated->expectMemoryEqual(gfxAddress, srcAddress, length);
}
}
template <typename FamilyType>
void expectNotEqualMemory(void *gfxAddress, const void *srcAddress, size_t length) {
NEO::CommandStreamReceiverSimulatedCommonHw<FamilyType> *csrSimulated = getSimulatedCsr<FamilyType>();
if (NEO::testMode == NEO::TestMode::AubTestsWithTbx) {
auto tbxCsr = csrSimulated;
EXPECT_TRUE(tbxCsr->expectMemoryNotEqual(gfxAddress, srcAddress, length));
csrSimulated = static_cast<NEO::CommandStreamReceiverSimulatedCommonHw<FamilyType> *>(
static_cast<NEO::CommandStreamReceiverWithAUBDump<NEO::TbxCommandStreamReceiverHw<FamilyType>> *>(csr)->aubCSR.get());
}
if (csrSimulated) {
csrSimulated->expectMemoryNotEqual(gfxAddress, srcAddress, length);
}
}
const uint32_t rootDeviceIndex = 0;
NEO::ExecutionEnvironment *executionEnvironment;
NEO::MemoryManager *memoryManager = nullptr;

View File

@ -13,6 +13,14 @@ set(TEST_MODULES
${CMAKE_CURRENT_SOURCE_DIR}/test_modules/test_kernel.cl
)
set(TEST_KERNEL_BINDLESS_internal_options
"-cl-intel-use-bindless-mode -cl-intel-use-bindless-advanced-mode"
)
set(TEST_KERNEL_BINDLESS
${CMAKE_CURRENT_SOURCE_DIR}/test_modules/stateful_copy_buffer.cl
)
set(l0_test_kernels_outputs)
macro(macro_for_each_core_type)
foreach(PLATFORM_TYPE ${PLATFORM_TYPES})
@ -20,12 +28,18 @@ macro(macro_for_each_core_type)
get_family_name_with_type(${CORE_TYPE} ${PLATFORM_TYPE})
string(TOLOWER ${PLATFORM_TYPE} PLATFORM_TYPE_LOWER)
set(PLATFORM_LOWER ${DEFAULT_SUPPORTED_${CORE_TYPE}_${PLATFORM_TYPE}_PLATFORM})
string(TOLOWER ${CORE_TYPE} CORE_TYPE_LOWER)
level_zero_generate_kernels(l0_test_kernel_outputs ${PLATFORM_LOWER} ${family_name_with_type} "-g" ${TEST_MODULES})
#skip Gen8 bindless kernel generation
if(NOT ("${CORE_TYPE_LOWER}" STREQUAL "gen8"))
level_zero_generate_kernels_with_internal_options(l0_bindless_test_kernel_outputs ${PLATFORM_LOWER} ${family_name_with_type} "bindless" "-g" ${TEST_KERNEL_BINDLESS_internal_options} ${TEST_KERNEL_BINDLESS})
endif()
endif()
endforeach()
endmacro()
apply_macro_for_each_core_type("TESTED")
add_custom_target(l0_common_test_kernels DEPENDS ${l0_test_kernel_outputs} copy_compiler_files)
add_custom_target(l0_common_test_kernels DEPENDS ${l0_test_kernel_outputs} ${l0_bindless_test_kernel_outputs} copy_compiler_files)
set_target_properties(l0_common_test_kernels PROPERTIES FOLDER ${TARGET_NAME_L0})
add_dependencies(prepare_test_kernels_for_l0 l0_common_test_kernels)

View File

@ -49,3 +49,54 @@ function(level_zero_generate_kernels target_list platform_name suffix options)
set(${target_list} ${${target_list}} PARENT_SCOPE)
endfunction()
function(level_zero_generate_kernels_with_internal_options target_list platform_name suffix prefix options internal_options)
list(APPEND results copy_compiler_files)
set(outputdir "${TargetDir}/level_zero/${suffix}/test_files/${NEO_ARCH}/")
foreach(filepath ${ARGN})
get_filename_component(filename ${filepath} NAME)
get_filename_component(basename ${filepath} NAME_WE)
get_filename_component(workdir ${filepath} DIRECTORY)
set(outputpath_base "${outputdir}${prefix}_${basename}_${suffix}")
if(NOT NEO_DISABLE_BUILTINS_COMPILATION)
set(output_files
${outputpath_base}.bin
${outputpath_base}.gen
${outputpath_base}.spv
${outputpath_base}.dbg
)
set(output_name "-output" "${prefix}_${basename}")
string(CONCAT options \" ${options} \" )
string(CONCAT internal_options \" ${internal_options} \" )
add_custom_command(
COMMAND echo generate ${ocloc_cmd_prefix} -q -file ${filename} -device ${platform_name} -out_dir ${outputdir} ${output_name} -options ${options} -internal_options ${internal_options} , workdir is ${workdir}
OUTPUT ${output_files}
COMMAND ${ocloc_cmd_prefix} -q -file ${filename} -device ${platform_name} -out_dir ${outputdir} ${output_name} -options ${options} -internal_options ${internal_options}
WORKING_DIRECTORY ${workdir}
DEPENDS ${filepath} ocloc
)
list(APPEND ${target_list} ${output_files})
else()
foreach(_file_name "bin" "gen" "spv" "dbg")
set(_file_prebuilt "${NEO_SOURCE_DIR}/../neo_test_kernels/level_zero/${suffix}/test_files/${NEO_ARCH}/${prefix}_${basename}_${suffix}.${_file_name}")
add_custom_command(
OUTPUT ${outputpath_base}.${_file_name}
COMMAND ${CMAKE_COMMAND} -E make_directory ${outputdir}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${_file_prebuilt} ${outputdir}
)
list(APPEND ${target_list} ${outputpath_base}.${_file_name})
endforeach()
endif()
endforeach()
set(${target_list} ${${target_list}} PARENT_SCOPE)
endfunction()

View File

@ -0,0 +1,14 @@
/*
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
__kernel void StatefulCopyBuffer(
const __global uchar* src,
__global uchar* dst)
{
uint id = get_global_id(0);
dst[id] = src[id];
}

View File

@ -492,7 +492,7 @@ HWTEST2_F(CommandQueueProgramSBATest,
using BindlessCommandQueueSBASupport = IsAtLeastProduct<IGFX_SKYLAKE>;
HWTEST2_F(CommandQueueProgramSBATest,
givenBindlessModeEnabledWhenProgrammingStateBaseAddressThenBindlessBaseAddressIsPassed, BindlessCommandQueueSBASupport) {
givenBindlessModeEnabledWhenProgrammingStateBaseAddressThenBindlessBaseAddressAndSizeAreSet, BindlessCommandQueueSBASupport) {
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
DebugManagerStateRestore dbgRestorer;
DebugManager.flags.UseBindlessMode.set(1);
@ -525,7 +525,9 @@ HWTEST2_F(CommandQueueProgramSBATest,
auto cmdSba = genCmdCast<STATE_BASE_ADDRESS *>(*itor);
EXPECT_EQ(cmdSba->getBindlessSurfaceStateBaseAddressModifyEnable(), true);
EXPECT_EQ(cmdSba->getBindlessSurfaceStateBaseAddress(), neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->getBindlessHeapsHelper()->getGlobalHeapsBase());
EXPECT_EQ(cmdSba->getBindlessSurfaceStateSize(), MemoryConstants::sizeOf4GBinPageEntities);
auto surfaceStateCount = StateBaseAddressHelper<FamilyType>::getMaxBindlessSurfaceStates();
EXPECT_EQ(surfaceStateCount, cmdSba->getBindlessSurfaceStateSize());
commandQueue->destroy();
}

View File

@ -432,7 +432,7 @@ set(TEST_KERNEL_SIP_DEBUG_LOCAL_options
)
set(TEST_KERNEL_BINDLESS_internal_options
"-cl-intel-use-bindless-mode"
"-cl-intel-use-bindless-mode -cl-intel-use-bindless-advanced-mode"
)
set(TEST_KERNEL_BINDLESS

View File

@ -868,11 +868,27 @@ HWTEST_F(AUBSimpleArgNonUniformTest, givenOpenCL20SupportWhenProvidingWork3DimNo
expectMemory<FamilyType>(remainderBufferGpuAddress, this->expectedRemainderMemory, sizeRemainderMemory);
}
using AUBBindlessKernel = Test<KernelAUBFixture<BindlessKernelFixture>>;
using IsSklAndLater = IsAtLeastProduct<IGFX_SKYLAKE>;
struct AUBBindlessKernel : public KernelAUBFixture<BindlessKernelFixture>,
public ::testing::Test {
void SetUp() override {
DebugManager.flags.UseBindlessMode.set(1);
DebugManager.flags.UseExternalAllocatorForSshAndDsh.set(1);
KernelAUBFixture<BindlessKernelFixture>::SetUp();
}
void TearDown() override {
KernelAUBFixture<BindlessKernelFixture>::TearDown();
}
DebugManagerStateRestore restorer;
};
HWTEST2_F(AUBBindlessKernel, DISABLED_givenBindlessCopyKernelWhenEnqueuedThenResultsValidate, IsSklAndLater) {
constexpr size_t bufferSize = MemoryConstants::pageSize;
auto simulatedCsr = AUBFixture::getSimulatedCsr<FamilyType>();
simulatedCsr->initializeEngine();
createKernel(std::string("bindless_stateful_copy_buffer"), std::string("StatefulCopyBuffer"));
@ -904,8 +920,6 @@ HWTEST2_F(AUBBindlessKernel, DISABLED_givenBindlessCopyKernelWhenEnqueuedThenRes
retVal));
ASSERT_NE(nullptr, pBufferDst);
auto simulatedCsr = AUBFixture::getSimulatedCsr<FamilyType>();
memcpy(pBufferSrc->getGraphicsAllocation(device->getRootDeviceIndex())->getUnderlyingBuffer(), bufferDataSrc, bufferSize);
memcpy(pBufferDst->getGraphicsAllocation(device->getRootDeviceIndex())->getUnderlyingBuffer(), bufferDataDst, bufferSize);
@ -953,6 +967,8 @@ HWTEST2_F(AUBBindlessKernel, DISABLED_givenBindlessCopyImageKernelWhenEnqueuedTh
constexpr unsigned int testWidth = 5;
constexpr unsigned int testHeight = 1;
constexpr unsigned int testDepth = 1;
auto simulatedCsr = AUBFixture::getSimulatedCsr<FamilyType>();
simulatedCsr->initializeEngine();
createKernel(std::string("bindless_copy_buffer_to_image"), std::string("CopyBufferToImage3d"));
@ -1012,8 +1028,6 @@ HWTEST2_F(AUBBindlessKernel, DISABLED_givenBindlessCopyImageKernelWhenEnqueuedTh
memcpy(image->getGraphicsAllocation(device->getRootDeviceIndex())->getUnderlyingBuffer(), imageDataDst, imageSize);
memcpy(bufferSrc->getGraphicsAllocation(device->getRootDeviceIndex())->getUnderlyingBuffer(), imageDataSrc, imageSize);
auto simulatedCsr = AUBFixture::getSimulatedCsr<FamilyType>();
simulatedCsr->writeMemory(*bufferSrc->getGraphicsAllocation(device->getRootDeviceIndex()));
simulatedCsr->writeMemory(*image->getGraphicsAllocation(device->getRootDeviceIndex()));

View File

@ -234,7 +234,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, wh
EXPECT_NE(nullptr, bindingTablePoolAlloc);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, givenSbaProgrammingWhenHeapsAreNotProvidedThenDontProgram) {
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, givenNoHeapsProvidedWhenSBAIsProgrammedThenBaseAddressesAreNotSetAndBindlessSurfaceStateSizeSetToMax) {
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
@ -284,7 +284,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, gi
EXPECT_EQ(0u, sbaCmd.getBindlessSurfaceStateBaseAddress());
EXPECT_FALSE(sbaCmd.getBindlessSurfaceStateBaseAddressModifyEnable());
EXPECT_EQ(0u, sbaCmd.getBindlessSurfaceStateSize());
auto surfaceStateCount = StateBaseAddressHelper<FamilyType>::getMaxBindlessSurfaceStates();
EXPECT_EQ(surfaceStateCount, sbaCmd.getBindlessSurfaceStateSize());
}
using isXeHPOrAbove = IsAtLeastProduct<IGFX_XE_HP_SDV>;

View File

@ -26,7 +26,7 @@ set(BUILTIN_OPTIONS_STATELESS
)
set(bindless_OPTIONS
-internal_options "-cl-intel-use-bindless-mode"
-internal_options "-cl-intel-use-bindless-mode -cl-intel-use-bindless-advanced-mode"
)
set(bindful_OPTIONS

View File

@ -113,9 +113,6 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
kernelDescriptor.payloadMappings.samplerTable.borderColor,
dispatchInterface->getDynamicStateHeapData(),
device->getBindlessHeapsHelper(), device->getHardwareInfo());
if (ApiSpecificConfig::getBindlessConfiguration()) {
container.getResidencyContainer().push_back(device->getBindlessHeapsHelper()->getHeap(NEO::BindlessHeapsHelper::BindlesHeapType::GLOBAL_DSH)->getGraphicsAllocation());
}
}
idd.setSamplerStatePointer(samplerStateOffset);
@ -220,6 +217,10 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
cmd.setPredicateEnable(isPredicate);
if (ApiSpecificConfig::getBindlessConfiguration()) {
container.getResidencyContainer().push_back(device->getBindlessHeapsHelper()->getHeap(NEO::BindlessHeapsHelper::BindlesHeapType::GLOBAL_DSH)->getGraphicsAllocation());
}
EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData(idd, hwInfo);
PreemptionHelper::applyPreemptionWaCmdsBegin<Family>(listCmdBufferStream, *device);

View File

@ -26,7 +26,7 @@ static constexpr ConstStringRef fastRelaxedMath = "-cl-fast-relaxed-math";
static constexpr ConstStringRef preserveVec3Type = "-fpreserve-vec3-type";
static constexpr ConstStringRef createLibrary = "-create-library";
static constexpr ConstStringRef generateDebugInfo = "-g";
static constexpr ConstStringRef bindlessMode = "-cl-intel-use-bindless-mode";
static constexpr ConstStringRef bindlessMode = "-cl-intel-use-bindless-mode -cl-intel-use-bindless-advanced-mode";
static constexpr ConstStringRef uniformWorkgroupSize = "-cl-uniform-work-group-size";
static constexpr ConstStringRef forceEmuInt32DivRem = "-cl-intel-force-emu-int32divrem";
static constexpr ConstStringRef forceEmuInt32DivRemSP = "-cl-intel-force-emu-sp-int32divrem";

View File

@ -57,5 +57,7 @@ struct StateBaseAddressHelper {
static void appendExtraCacheSettings(STATE_BASE_ADDRESS *stateBaseAddress, GmmHelper *gmmHelper);
static void programBindingTableBaseAddress(LinearStream &commandStream, const IndirectHeap &ssh, GmmHelper *gmmHelper);
static uint32_t getMaxBindlessSurfaceStates();
};
} // namespace NEO

View File

@ -37,6 +37,10 @@ void StateBaseAddressHelper<GfxFamily>::programStateBaseAddress(
*stateBaseAddress = GfxFamily::cmdInitStateBaseAddress;
bool overrideBindlessSurfaceStateBase = true;
const auto surfaceStateCount = getMaxBindlessSurfaceStates();
stateBaseAddress->setBindlessSurfaceStateSize(surfaceStateCount);
if (useGlobalHeapsBaseAddress) {
stateBaseAddress->setDynamicStateBaseAddressModifyEnable(true);
stateBaseAddress->setDynamicStateBufferSizeModifyEnable(true);
@ -48,7 +52,6 @@ void StateBaseAddressHelper<GfxFamily>::programStateBaseAddress(
stateBaseAddress->setBindlessSurfaceStateBaseAddressModifyEnable(true);
stateBaseAddress->setBindlessSurfaceStateBaseAddress(globalHeapsBaseAddress);
stateBaseAddress->setBindlessSurfaceStateSize(MemoryConstants::sizeOf4GBinPageEntities);
overrideBindlessSurfaceStateBase = false;
} else {

View File

@ -24,4 +24,9 @@ void StateBaseAddressHelper<GfxFamily>::appendStateBaseAddressParameters(
appendExtraCacheSettings(stateBaseAddress, gmmHelper);
}
template <typename GfxFamily>
uint32_t StateBaseAddressHelper<GfxFamily>::getMaxBindlessSurfaceStates() {
return 0;
}
} // namespace NEO

View File

@ -40,4 +40,10 @@ void StateBaseAddressHelper<GfxFamily>::appendStateBaseAddressParameters(
appendExtraCacheSettings(stateBaseAddress, gmmHelper);
}
template <typename GfxFamily>
uint32_t StateBaseAddressHelper<GfxFamily>::getMaxBindlessSurfaceStates() {
return (1 << 20) - 1;
}
} // namespace NEO

View File

@ -32,4 +32,8 @@ void StateBaseAddressHelper<GfxFamily>::appendStateBaseAddressParameters(
appendExtraCacheSettings(stateBaseAddress, gmmHelper);
}
template <typename GfxFamily>
uint32_t StateBaseAddressHelper<GfxFamily>::getMaxBindlessSurfaceStates() {
return (1 << 20) - 1;
}
} // namespace NEO

View File

@ -114,4 +114,8 @@ template <typename GfxFamily>
void StateBaseAddressHelper<GfxFamily>::appendIohParameters(STATE_BASE_ADDRESS *stateBaseAddress, const IndirectHeap *ioh, bool useGlobalHeapsBaseAddress, uint64_t indirectObjectHeapBaseAddress) {
}
template <typename GfxFamily>
uint32_t StateBaseAddressHelper<GfxFamily>::getMaxBindlessSurfaceStates() {
return std::numeric_limits<uint32_t>::max();
}
} // namespace NEO

View File

@ -107,7 +107,9 @@ HWTEST2_F(SbaForBindlessTests, givenGlobalBindlessBaseAddressWhenProgramStateBas
1u);
EXPECT_TRUE(cmd->getBindlessSurfaceStateBaseAddressModifyEnable());
EXPECT_EQ(cmd->getBindlessSurfaceStateBaseAddress(), globalBindlessHeapsBaseAddress);
EXPECT_EQ(cmd->getBindlessSurfaceStateSize(), MemoryConstants::sizeOf4GBinPageEntities);
auto surfaceStateCount = StateBaseAddressHelper<FamilyType>::getMaxBindlessSurfaceStates();
EXPECT_EQ(surfaceStateCount, cmd->getBindlessSurfaceStateSize());
}
using IohSupported = IsWithinGfxCore<GFXCORE_FAMILY::IGFX_GEN9_CORE, GFXCORE_FAMILY::IGFX_GEN12LP_CORE>;

View File

@ -1146,7 +1146,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, InterfaceDescriptorDataTests, givenVariousValuesWhen
using BindlessCommandEncodeStatesTest = Test<MemManagerFixture>;
using BindlessCommandEncodeStatesTesttt = Test<DeviceFixture>;
HWTEST_F(BindlessCommandEncodeStatesTesttt, givenBindlessKernelWhenBindlessModeEnabledThenCmdContainerDoesNotHaveSsh) {
HWTEST_F(BindlessCommandEncodeStatesTesttt, givenBindlessKernelAndBindlessModeEnabledWhenEncodingKernelThenCmdContainerHasNullptrSSH) {
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
using WALKER = typename FamilyType::WALKER_TYPE;
@ -1184,6 +1184,47 @@ HWTEST_F(BindlessCommandEncodeStatesTesttt, givenBindlessKernelWhenBindlessModeE
EXPECT_EQ(commandContainer->getIndirectHeap(HeapType::SURFACE_STATE), nullptr);
}
HWTEST2_F(BindlessCommandEncodeStatesTesttt, givenBindlessKernelAndBindlessModeEnabledWhenEncodingKernelThenCmdContainerResidencyContainsGlobalDSH, IsAtMostGen12lp) {
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
using WALKER = typename FamilyType::WALKER_TYPE;
DebugManagerStateRestore dbgRestorer;
DebugManager.flags.UseBindlessMode.set(1);
auto commandContainer = std::make_unique<CommandContainer>();
commandContainer->initialize(pDevice);
commandContainer->setDirtyStateForAllHeaps(false);
pDevice->getExecutionEnvironment()->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->createBindlessHeapsHelper(pDevice->getMemoryManager(),
pDevice->getNumGenericSubDevices() > 1,
pDevice->getRootDeviceIndex(),
pDevice->getDeviceBitfield());
uint32_t numBindingTable = 1;
BINDING_TABLE_STATE bindingTableState = FamilyType::cmdInitBindingTableState;
uint32_t dims[] = {1, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
dispatchInterface->kernelDescriptor.payloadMappings.bindingTable.numEntries = numBindingTable;
dispatchInterface->kernelDescriptor.payloadMappings.bindingTable.tableOffset = 0U;
dispatchInterface->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::BindlessAndStateless;
const uint8_t *sshData = reinterpret_cast<uint8_t *>(&bindingTableState);
EXPECT_CALL(*dispatchInterface.get(), getSurfaceStateHeapData()).WillRepeatedly(::testing::Return(sshData));
EXPECT_CALL(*dispatchInterface.get(), getSurfaceStateHeapDataSize()).WillRepeatedly(::testing::Return(static_cast<uint32_t>(sizeof(BINDING_TABLE_STATE))));
bool requiresUncachedMocs = false;
EXPECT_EQ(commandContainer->getIndirectHeap(HeapType::SURFACE_STATE), nullptr);
uint32_t partitionCount = 0;
EncodeDispatchKernel<FamilyType>::encode(*commandContainer.get(), dims, false, false, dispatchInterface.get(), 0, false, false,
pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs, false, partitionCount,
false, false);
auto globalDSHIterator = std::find(commandContainer->getResidencyContainer().begin(), commandContainer->getResidencyContainer().end(),
pDevice->getBindlessHeapsHelper()->getHeap(BindlessHeapsHelper::GLOBAL_DSH)->getGraphicsAllocation());
EXPECT_NE(commandContainer->getResidencyContainer().end(), globalDSHIterator);
}
HWTEST_F(BindlessCommandEncodeStatesTesttt, givenBindfulKernelWhenBindlessModeEnabledThenCmdContainerHaveSsh) {
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;