Support for bindless mode in L0 - improvements
Related-To: NEO-6448 - add new IGC compilation flag when bindless mode used - fix SBA programming of BindlessSurfaceStateSize - always set maximum surface state count - fix residency of global DSH heap on gen9 - gen12lp in bindless mode - add L0 aub test with bindless kernel - disabled - partial fixes in OCL aub tests Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
parent
55959d4d1d
commit
8b233f7f45
|
@ -0,0 +1,10 @@
|
|||
#
|
||||
# Copyright (C) 2021 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
target_sources(ze_intel_gpu_aub_tests PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/bindless_kernel_aub_tests.cpp
|
||||
)
|
|
@ -0,0 +1,115 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/gmm_helper/gmm_helper.h"
|
||||
#include "shared/source/helpers/array_count.h"
|
||||
#include "shared/source/helpers/file_io.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/helpers/test_files.h"
|
||||
|
||||
#include "test.h"
|
||||
|
||||
#include "level_zero/core/source/module/module_imp.h"
|
||||
#include "level_zero/core/test/aub_tests/fixtures/aub_fixture.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_driver_handle.h"
|
||||
|
||||
namespace L0 {
|
||||
namespace ult {
|
||||
|
||||
struct L0BindlessAub : Test<AUBFixtureL0> {
|
||||
|
||||
void SetUp() {
|
||||
DebugManager.flags.UseBindlessMode.set(1);
|
||||
DebugManager.flags.UseExternalAllocatorForSshAndDsh.set(1);
|
||||
AUBFixtureL0::SetUp();
|
||||
}
|
||||
void TearDown() {
|
||||
|
||||
module->destroy();
|
||||
AUBFixtureL0::TearDown();
|
||||
}
|
||||
|
||||
void createModuleFromFile(const std::string &fileName, ze_context_handle_t context, L0::Device *device) {
|
||||
std::string testFile;
|
||||
retrieveBinaryKernelFilenameNoRevision(testFile, fileName + "_", ".bin");
|
||||
|
||||
size_t size = 0;
|
||||
auto src = loadDataFromFile(
|
||||
testFile.c_str(),
|
||||
size);
|
||||
|
||||
ASSERT_NE(0u, size);
|
||||
ASSERT_NE(nullptr, src);
|
||||
|
||||
ze_module_desc_t moduleDesc = {ZE_STRUCTURE_TYPE_MODULE_DESC};
|
||||
moduleDesc.format = ZE_MODULE_FORMAT_NATIVE;
|
||||
moduleDesc.pInputModule = reinterpret_cast<const uint8_t *>(src.get());
|
||||
moduleDesc.inputSize = size;
|
||||
moduleDesc.pBuildFlags = "";
|
||||
|
||||
module = new ModuleImp(device, nullptr, ModuleType::User);
|
||||
bool success = module->initialize(&moduleDesc, device->getNEODevice());
|
||||
ASSERT_TRUE(success);
|
||||
}
|
||||
DebugManagerStateRestore restorer;
|
||||
ModuleImp *module = nullptr;
|
||||
};
|
||||
|
||||
HWTEST_F(L0BindlessAub, DISABLED_GivenBindlessKernelWhenExecutedThenOutputIsCorrect) {
|
||||
constexpr size_t bufferSize = MemoryConstants::pageSize;
|
||||
const uint32_t groupSize[] = {32, 1, 1};
|
||||
const uint32_t groupCount[] = {bufferSize / 32, 1, 1};
|
||||
|
||||
NEO::SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY,
|
||||
context->rootDeviceIndices,
|
||||
context->deviceBitfields);
|
||||
|
||||
auto bufferSrc = driverHandle->svmAllocsManager->createHostUnifiedMemoryAllocation(bufferSize, unifiedMemoryProperties);
|
||||
memset(bufferSrc, 55, bufferSize);
|
||||
|
||||
auto bufferDst = driverHandle->svmAllocsManager->createHostUnifiedMemoryAllocation(bufferSize, unifiedMemoryProperties);
|
||||
memset(bufferDst, 0, bufferSize);
|
||||
|
||||
auto simulatedCsr = AUBFixtureL0::getSimulatedCsr<FamilyType>();
|
||||
simulatedCsr->initializeEngine();
|
||||
|
||||
simulatedCsr->writeMemory(*driverHandle->svmAllocsManager->getSVMAlloc(bufferSrc)->gpuAllocations.getDefaultGraphicsAllocation());
|
||||
simulatedCsr->writeMemory(*driverHandle->svmAllocsManager->getSVMAlloc(bufferDst)->gpuAllocations.getDefaultGraphicsAllocation());
|
||||
|
||||
ze_group_count_t dispatchTraits;
|
||||
dispatchTraits.groupCountX = groupCount[0];
|
||||
dispatchTraits.groupCountY = groupCount[1];
|
||||
dispatchTraits.groupCountZ = groupCount[2];
|
||||
|
||||
createModuleFromFile("bindless_stateful_copy_buffer", context, device);
|
||||
|
||||
ze_kernel_handle_t kernel;
|
||||
ze_kernel_desc_t kernelDesc = {ZE_STRUCTURE_TYPE_KERNEL_DESC};
|
||||
kernelDesc.pKernelName = "StatefulCopyBuffer";
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelCreate(module->toHandle(), &kernelDesc, &kernel));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelSetArgumentValue(kernel, 0, sizeof(void *), &bufferSrc));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelSetArgumentValue(kernel, 1, sizeof(void *), &bufferDst));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelSetGroupSize(kernel, groupSize[0], groupSize[1], groupSize[2]));
|
||||
|
||||
ze_command_list_handle_t cmdListHandle = commandList->toHandle();
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zeCommandListAppendLaunchKernel(cmdListHandle, kernel, &dispatchTraits, nullptr, 0, nullptr));
|
||||
commandList->close();
|
||||
|
||||
pCmdq->executeCommandLists(1, &cmdListHandle, nullptr, false);
|
||||
pCmdq->synchronize(std::numeric_limits<uint32_t>::max());
|
||||
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(driverHandle->svmAllocsManager->getSVMAlloc(bufferDst)->gpuAllocations.getDefaultGraphicsAllocation()->getGpuAddress()),
|
||||
bufferSrc, bufferSize);
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelDestroy(kernel));
|
||||
driverHandle->svmAllocsManager->freeSVMAlloc(bufferSrc);
|
||||
driverHandle->svmAllocsManager->freeSVMAlloc(bufferDst);
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
|
@ -5,6 +5,12 @@
|
|||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_stream/command_stream_receiver_simulated_common_hw.h"
|
||||
#include "shared/source/command_stream/command_stream_receiver_with_aub_dump.h"
|
||||
#include "shared/source/command_stream/tbx_command_stream_receiver_hw.h"
|
||||
|
||||
#include "test_mode.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
@ -39,6 +45,43 @@ class AUBFixtureL0 {
|
|||
void TearDown();
|
||||
static void prepareCopyEngines(NEO::MockDevice &device, const std::string &filename);
|
||||
|
||||
template <typename FamilyType>
|
||||
NEO::CommandStreamReceiverSimulatedCommonHw<FamilyType> *getSimulatedCsr() const {
|
||||
return static_cast<NEO::CommandStreamReceiverSimulatedCommonHw<FamilyType> *>(csr);
|
||||
}
|
||||
|
||||
template <typename FamilyType>
|
||||
void expectMemory(void *gfxAddress, const void *srcAddress, size_t length) {
|
||||
NEO::CommandStreamReceiverSimulatedCommonHw<FamilyType> *csrSimulated = getSimulatedCsr<FamilyType>();
|
||||
|
||||
if (NEO::testMode == NEO::TestMode::AubTestsWithTbx) {
|
||||
auto tbxCsr = csrSimulated;
|
||||
EXPECT_TRUE(tbxCsr->expectMemoryEqual(gfxAddress, srcAddress, length));
|
||||
csrSimulated = static_cast<NEO::CommandStreamReceiverSimulatedCommonHw<FamilyType> *>(
|
||||
static_cast<NEO::CommandStreamReceiverWithAUBDump<NEO::TbxCommandStreamReceiverHw<FamilyType>> *>(csr)->aubCSR.get());
|
||||
}
|
||||
|
||||
if (csrSimulated) {
|
||||
csrSimulated->expectMemoryEqual(gfxAddress, srcAddress, length);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename FamilyType>
|
||||
void expectNotEqualMemory(void *gfxAddress, const void *srcAddress, size_t length) {
|
||||
NEO::CommandStreamReceiverSimulatedCommonHw<FamilyType> *csrSimulated = getSimulatedCsr<FamilyType>();
|
||||
|
||||
if (NEO::testMode == NEO::TestMode::AubTestsWithTbx) {
|
||||
auto tbxCsr = csrSimulated;
|
||||
EXPECT_TRUE(tbxCsr->expectMemoryNotEqual(gfxAddress, srcAddress, length));
|
||||
csrSimulated = static_cast<NEO::CommandStreamReceiverSimulatedCommonHw<FamilyType> *>(
|
||||
static_cast<NEO::CommandStreamReceiverWithAUBDump<NEO::TbxCommandStreamReceiverHw<FamilyType>> *>(csr)->aubCSR.get());
|
||||
}
|
||||
|
||||
if (csrSimulated) {
|
||||
csrSimulated->expectMemoryNotEqual(gfxAddress, srcAddress, length);
|
||||
}
|
||||
}
|
||||
|
||||
const uint32_t rootDeviceIndex = 0;
|
||||
NEO::ExecutionEnvironment *executionEnvironment;
|
||||
NEO::MemoryManager *memoryManager = nullptr;
|
||||
|
|
|
@ -13,6 +13,14 @@ set(TEST_MODULES
|
|||
${CMAKE_CURRENT_SOURCE_DIR}/test_modules/test_kernel.cl
|
||||
)
|
||||
|
||||
set(TEST_KERNEL_BINDLESS_internal_options
|
||||
"-cl-intel-use-bindless-mode -cl-intel-use-bindless-advanced-mode"
|
||||
)
|
||||
|
||||
set(TEST_KERNEL_BINDLESS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_modules/stateful_copy_buffer.cl
|
||||
)
|
||||
|
||||
set(l0_test_kernels_outputs)
|
||||
macro(macro_for_each_core_type)
|
||||
foreach(PLATFORM_TYPE ${PLATFORM_TYPES})
|
||||
|
@ -20,12 +28,18 @@ macro(macro_for_each_core_type)
|
|||
get_family_name_with_type(${CORE_TYPE} ${PLATFORM_TYPE})
|
||||
string(TOLOWER ${PLATFORM_TYPE} PLATFORM_TYPE_LOWER)
|
||||
set(PLATFORM_LOWER ${DEFAULT_SUPPORTED_${CORE_TYPE}_${PLATFORM_TYPE}_PLATFORM})
|
||||
string(TOLOWER ${CORE_TYPE} CORE_TYPE_LOWER)
|
||||
level_zero_generate_kernels(l0_test_kernel_outputs ${PLATFORM_LOWER} ${family_name_with_type} "-g" ${TEST_MODULES})
|
||||
|
||||
#skip Gen8 bindless kernel generation
|
||||
if(NOT ("${CORE_TYPE_LOWER}" STREQUAL "gen8"))
|
||||
level_zero_generate_kernels_with_internal_options(l0_bindless_test_kernel_outputs ${PLATFORM_LOWER} ${family_name_with_type} "bindless" "-g" ${TEST_KERNEL_BINDLESS_internal_options} ${TEST_KERNEL_BINDLESS})
|
||||
endif()
|
||||
endif()
|
||||
endforeach()
|
||||
endmacro()
|
||||
apply_macro_for_each_core_type("TESTED")
|
||||
add_custom_target(l0_common_test_kernels DEPENDS ${l0_test_kernel_outputs} copy_compiler_files)
|
||||
add_custom_target(l0_common_test_kernels DEPENDS ${l0_test_kernel_outputs} ${l0_bindless_test_kernel_outputs} copy_compiler_files)
|
||||
set_target_properties(l0_common_test_kernels PROPERTIES FOLDER ${TARGET_NAME_L0})
|
||||
add_dependencies(prepare_test_kernels_for_l0 l0_common_test_kernels)
|
||||
|
||||
|
|
|
@ -49,3 +49,54 @@ function(level_zero_generate_kernels target_list platform_name suffix options)
|
|||
|
||||
set(${target_list} ${${target_list}} PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
function(level_zero_generate_kernels_with_internal_options target_list platform_name suffix prefix options internal_options)
|
||||
|
||||
list(APPEND results copy_compiler_files)
|
||||
|
||||
set(outputdir "${TargetDir}/level_zero/${suffix}/test_files/${NEO_ARCH}/")
|
||||
|
||||
foreach(filepath ${ARGN})
|
||||
get_filename_component(filename ${filepath} NAME)
|
||||
get_filename_component(basename ${filepath} NAME_WE)
|
||||
get_filename_component(workdir ${filepath} DIRECTORY)
|
||||
|
||||
set(outputpath_base "${outputdir}${prefix}_${basename}_${suffix}")
|
||||
|
||||
if(NOT NEO_DISABLE_BUILTINS_COMPILATION)
|
||||
set(output_files
|
||||
${outputpath_base}.bin
|
||||
${outputpath_base}.gen
|
||||
${outputpath_base}.spv
|
||||
${outputpath_base}.dbg
|
||||
)
|
||||
|
||||
set(output_name "-output" "${prefix}_${basename}")
|
||||
string(CONCAT options \" ${options} \" )
|
||||
string(CONCAT internal_options \" ${internal_options} \" )
|
||||
|
||||
add_custom_command(
|
||||
COMMAND echo generate ${ocloc_cmd_prefix} -q -file ${filename} -device ${platform_name} -out_dir ${outputdir} ${output_name} -options ${options} -internal_options ${internal_options} , workdir is ${workdir}
|
||||
OUTPUT ${output_files}
|
||||
COMMAND ${ocloc_cmd_prefix} -q -file ${filename} -device ${platform_name} -out_dir ${outputdir} ${output_name} -options ${options} -internal_options ${internal_options}
|
||||
WORKING_DIRECTORY ${workdir}
|
||||
DEPENDS ${filepath} ocloc
|
||||
)
|
||||
|
||||
list(APPEND ${target_list} ${output_files})
|
||||
else()
|
||||
foreach(_file_name "bin" "gen" "spv" "dbg")
|
||||
set(_file_prebuilt "${NEO_SOURCE_DIR}/../neo_test_kernels/level_zero/${suffix}/test_files/${NEO_ARCH}/${prefix}_${basename}_${suffix}.${_file_name}")
|
||||
add_custom_command(
|
||||
OUTPUT ${outputpath_base}.${_file_name}
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory ${outputdir}
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${_file_prebuilt} ${outputdir}
|
||||
)
|
||||
|
||||
list(APPEND ${target_list} ${outputpath_base}.${_file_name})
|
||||
endforeach()
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
set(${target_list} ${${target_list}} PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
|
|
@ -0,0 +1,14 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
__kernel void StatefulCopyBuffer(
|
||||
const __global uchar* src,
|
||||
__global uchar* dst)
|
||||
{
|
||||
uint id = get_global_id(0);
|
||||
dst[id] = src[id];
|
||||
}
|
|
@ -492,7 +492,7 @@ HWTEST2_F(CommandQueueProgramSBATest,
|
|||
using BindlessCommandQueueSBASupport = IsAtLeastProduct<IGFX_SKYLAKE>;
|
||||
|
||||
HWTEST2_F(CommandQueueProgramSBATest,
|
||||
givenBindlessModeEnabledWhenProgrammingStateBaseAddressThenBindlessBaseAddressIsPassed, BindlessCommandQueueSBASupport) {
|
||||
givenBindlessModeEnabledWhenProgrammingStateBaseAddressThenBindlessBaseAddressAndSizeAreSet, BindlessCommandQueueSBASupport) {
|
||||
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
|
||||
DebugManagerStateRestore dbgRestorer;
|
||||
DebugManager.flags.UseBindlessMode.set(1);
|
||||
|
@ -525,7 +525,9 @@ HWTEST2_F(CommandQueueProgramSBATest,
|
|||
auto cmdSba = genCmdCast<STATE_BASE_ADDRESS *>(*itor);
|
||||
EXPECT_EQ(cmdSba->getBindlessSurfaceStateBaseAddressModifyEnable(), true);
|
||||
EXPECT_EQ(cmdSba->getBindlessSurfaceStateBaseAddress(), neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->getBindlessHeapsHelper()->getGlobalHeapsBase());
|
||||
EXPECT_EQ(cmdSba->getBindlessSurfaceStateSize(), MemoryConstants::sizeOf4GBinPageEntities);
|
||||
|
||||
auto surfaceStateCount = StateBaseAddressHelper<FamilyType>::getMaxBindlessSurfaceStates();
|
||||
EXPECT_EQ(surfaceStateCount, cmdSba->getBindlessSurfaceStateSize());
|
||||
|
||||
commandQueue->destroy();
|
||||
}
|
||||
|
|
|
@ -432,7 +432,7 @@ set(TEST_KERNEL_SIP_DEBUG_LOCAL_options
|
|||
)
|
||||
|
||||
set(TEST_KERNEL_BINDLESS_internal_options
|
||||
"-cl-intel-use-bindless-mode"
|
||||
"-cl-intel-use-bindless-mode -cl-intel-use-bindless-advanced-mode"
|
||||
)
|
||||
|
||||
set(TEST_KERNEL_BINDLESS
|
||||
|
|
|
@ -868,11 +868,27 @@ HWTEST_F(AUBSimpleArgNonUniformTest, givenOpenCL20SupportWhenProvidingWork3DimNo
|
|||
expectMemory<FamilyType>(remainderBufferGpuAddress, this->expectedRemainderMemory, sizeRemainderMemory);
|
||||
}
|
||||
|
||||
using AUBBindlessKernel = Test<KernelAUBFixture<BindlessKernelFixture>>;
|
||||
using IsSklAndLater = IsAtLeastProduct<IGFX_SKYLAKE>;
|
||||
|
||||
struct AUBBindlessKernel : public KernelAUBFixture<BindlessKernelFixture>,
|
||||
public ::testing::Test {
|
||||
|
||||
void SetUp() override {
|
||||
DebugManager.flags.UseBindlessMode.set(1);
|
||||
DebugManager.flags.UseExternalAllocatorForSshAndDsh.set(1);
|
||||
KernelAUBFixture<BindlessKernelFixture>::SetUp();
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
KernelAUBFixture<BindlessKernelFixture>::TearDown();
|
||||
}
|
||||
DebugManagerStateRestore restorer;
|
||||
};
|
||||
|
||||
HWTEST2_F(AUBBindlessKernel, DISABLED_givenBindlessCopyKernelWhenEnqueuedThenResultsValidate, IsSklAndLater) {
|
||||
constexpr size_t bufferSize = MemoryConstants::pageSize;
|
||||
auto simulatedCsr = AUBFixture::getSimulatedCsr<FamilyType>();
|
||||
simulatedCsr->initializeEngine();
|
||||
|
||||
createKernel(std::string("bindless_stateful_copy_buffer"), std::string("StatefulCopyBuffer"));
|
||||
|
||||
|
@ -904,8 +920,6 @@ HWTEST2_F(AUBBindlessKernel, DISABLED_givenBindlessCopyKernelWhenEnqueuedThenRes
|
|||
retVal));
|
||||
ASSERT_NE(nullptr, pBufferDst);
|
||||
|
||||
auto simulatedCsr = AUBFixture::getSimulatedCsr<FamilyType>();
|
||||
|
||||
memcpy(pBufferSrc->getGraphicsAllocation(device->getRootDeviceIndex())->getUnderlyingBuffer(), bufferDataSrc, bufferSize);
|
||||
memcpy(pBufferDst->getGraphicsAllocation(device->getRootDeviceIndex())->getUnderlyingBuffer(), bufferDataDst, bufferSize);
|
||||
|
||||
|
@ -953,6 +967,8 @@ HWTEST2_F(AUBBindlessKernel, DISABLED_givenBindlessCopyImageKernelWhenEnqueuedTh
|
|||
constexpr unsigned int testWidth = 5;
|
||||
constexpr unsigned int testHeight = 1;
|
||||
constexpr unsigned int testDepth = 1;
|
||||
auto simulatedCsr = AUBFixture::getSimulatedCsr<FamilyType>();
|
||||
simulatedCsr->initializeEngine();
|
||||
|
||||
createKernel(std::string("bindless_copy_buffer_to_image"), std::string("CopyBufferToImage3d"));
|
||||
|
||||
|
@ -1012,8 +1028,6 @@ HWTEST2_F(AUBBindlessKernel, DISABLED_givenBindlessCopyImageKernelWhenEnqueuedTh
|
|||
memcpy(image->getGraphicsAllocation(device->getRootDeviceIndex())->getUnderlyingBuffer(), imageDataDst, imageSize);
|
||||
memcpy(bufferSrc->getGraphicsAllocation(device->getRootDeviceIndex())->getUnderlyingBuffer(), imageDataSrc, imageSize);
|
||||
|
||||
auto simulatedCsr = AUBFixture::getSimulatedCsr<FamilyType>();
|
||||
|
||||
simulatedCsr->writeMemory(*bufferSrc->getGraphicsAllocation(device->getRootDeviceIndex()));
|
||||
simulatedCsr->writeMemory(*image->getGraphicsAllocation(device->getRootDeviceIndex()));
|
||||
|
||||
|
|
|
@ -234,7 +234,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, wh
|
|||
EXPECT_NE(nullptr, bindingTablePoolAlloc);
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, givenSbaProgrammingWhenHeapsAreNotProvidedThenDontProgram) {
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, givenNoHeapsProvidedWhenSBAIsProgrammedThenBaseAddressesAreNotSetAndBindlessSurfaceStateSizeSetToMax) {
|
||||
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
|
||||
DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
|
||||
|
||||
|
@ -284,7 +284,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, gi
|
|||
|
||||
EXPECT_EQ(0u, sbaCmd.getBindlessSurfaceStateBaseAddress());
|
||||
EXPECT_FALSE(sbaCmd.getBindlessSurfaceStateBaseAddressModifyEnable());
|
||||
EXPECT_EQ(0u, sbaCmd.getBindlessSurfaceStateSize());
|
||||
|
||||
auto surfaceStateCount = StateBaseAddressHelper<FamilyType>::getMaxBindlessSurfaceStates();
|
||||
EXPECT_EQ(surfaceStateCount, sbaCmd.getBindlessSurfaceStateSize());
|
||||
}
|
||||
|
||||
using isXeHPOrAbove = IsAtLeastProduct<IGFX_XE_HP_SDV>;
|
||||
|
|
|
@ -26,7 +26,7 @@ set(BUILTIN_OPTIONS_STATELESS
|
|||
)
|
||||
|
||||
set(bindless_OPTIONS
|
||||
-internal_options "-cl-intel-use-bindless-mode"
|
||||
-internal_options "-cl-intel-use-bindless-mode -cl-intel-use-bindless-advanced-mode"
|
||||
)
|
||||
|
||||
set(bindful_OPTIONS
|
||||
|
|
|
@ -113,9 +113,6 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
|
|||
kernelDescriptor.payloadMappings.samplerTable.borderColor,
|
||||
dispatchInterface->getDynamicStateHeapData(),
|
||||
device->getBindlessHeapsHelper(), device->getHardwareInfo());
|
||||
if (ApiSpecificConfig::getBindlessConfiguration()) {
|
||||
container.getResidencyContainer().push_back(device->getBindlessHeapsHelper()->getHeap(NEO::BindlessHeapsHelper::BindlesHeapType::GLOBAL_DSH)->getGraphicsAllocation());
|
||||
}
|
||||
}
|
||||
|
||||
idd.setSamplerStatePointer(samplerStateOffset);
|
||||
|
@ -220,6 +217,10 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
|
|||
|
||||
cmd.setPredicateEnable(isPredicate);
|
||||
|
||||
if (ApiSpecificConfig::getBindlessConfiguration()) {
|
||||
container.getResidencyContainer().push_back(device->getBindlessHeapsHelper()->getHeap(NEO::BindlessHeapsHelper::BindlesHeapType::GLOBAL_DSH)->getGraphicsAllocation());
|
||||
}
|
||||
|
||||
EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData(idd, hwInfo);
|
||||
|
||||
PreemptionHelper::applyPreemptionWaCmdsBegin<Family>(listCmdBufferStream, *device);
|
||||
|
|
|
@ -26,7 +26,7 @@ static constexpr ConstStringRef fastRelaxedMath = "-cl-fast-relaxed-math";
|
|||
static constexpr ConstStringRef preserveVec3Type = "-fpreserve-vec3-type";
|
||||
static constexpr ConstStringRef createLibrary = "-create-library";
|
||||
static constexpr ConstStringRef generateDebugInfo = "-g";
|
||||
static constexpr ConstStringRef bindlessMode = "-cl-intel-use-bindless-mode";
|
||||
static constexpr ConstStringRef bindlessMode = "-cl-intel-use-bindless-mode -cl-intel-use-bindless-advanced-mode";
|
||||
static constexpr ConstStringRef uniformWorkgroupSize = "-cl-uniform-work-group-size";
|
||||
static constexpr ConstStringRef forceEmuInt32DivRem = "-cl-intel-force-emu-int32divrem";
|
||||
static constexpr ConstStringRef forceEmuInt32DivRemSP = "-cl-intel-force-emu-sp-int32divrem";
|
||||
|
|
|
@ -57,5 +57,7 @@ struct StateBaseAddressHelper {
|
|||
static void appendExtraCacheSettings(STATE_BASE_ADDRESS *stateBaseAddress, GmmHelper *gmmHelper);
|
||||
|
||||
static void programBindingTableBaseAddress(LinearStream &commandStream, const IndirectHeap &ssh, GmmHelper *gmmHelper);
|
||||
|
||||
static uint32_t getMaxBindlessSurfaceStates();
|
||||
};
|
||||
} // namespace NEO
|
||||
|
|
|
@ -37,6 +37,10 @@ void StateBaseAddressHelper<GfxFamily>::programStateBaseAddress(
|
|||
|
||||
*stateBaseAddress = GfxFamily::cmdInitStateBaseAddress;
|
||||
bool overrideBindlessSurfaceStateBase = true;
|
||||
|
||||
const auto surfaceStateCount = getMaxBindlessSurfaceStates();
|
||||
stateBaseAddress->setBindlessSurfaceStateSize(surfaceStateCount);
|
||||
|
||||
if (useGlobalHeapsBaseAddress) {
|
||||
stateBaseAddress->setDynamicStateBaseAddressModifyEnable(true);
|
||||
stateBaseAddress->setDynamicStateBufferSizeModifyEnable(true);
|
||||
|
@ -48,7 +52,6 @@ void StateBaseAddressHelper<GfxFamily>::programStateBaseAddress(
|
|||
|
||||
stateBaseAddress->setBindlessSurfaceStateBaseAddressModifyEnable(true);
|
||||
stateBaseAddress->setBindlessSurfaceStateBaseAddress(globalHeapsBaseAddress);
|
||||
stateBaseAddress->setBindlessSurfaceStateSize(MemoryConstants::sizeOf4GBinPageEntities);
|
||||
|
||||
overrideBindlessSurfaceStateBase = false;
|
||||
} else {
|
||||
|
|
|
@ -24,4 +24,9 @@ void StateBaseAddressHelper<GfxFamily>::appendStateBaseAddressParameters(
|
|||
appendExtraCacheSettings(stateBaseAddress, gmmHelper);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint32_t StateBaseAddressHelper<GfxFamily>::getMaxBindlessSurfaceStates() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
|
|
@ -40,4 +40,10 @@ void StateBaseAddressHelper<GfxFamily>::appendStateBaseAddressParameters(
|
|||
|
||||
appendExtraCacheSettings(stateBaseAddress, gmmHelper);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint32_t StateBaseAddressHelper<GfxFamily>::getMaxBindlessSurfaceStates() {
|
||||
return (1 << 20) - 1;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
|
|
@ -32,4 +32,8 @@ void StateBaseAddressHelper<GfxFamily>::appendStateBaseAddressParameters(
|
|||
appendExtraCacheSettings(stateBaseAddress, gmmHelper);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint32_t StateBaseAddressHelper<GfxFamily>::getMaxBindlessSurfaceStates() {
|
||||
return (1 << 20) - 1;
|
||||
}
|
||||
} // namespace NEO
|
||||
|
|
|
@ -114,4 +114,8 @@ template <typename GfxFamily>
|
|||
void StateBaseAddressHelper<GfxFamily>::appendIohParameters(STATE_BASE_ADDRESS *stateBaseAddress, const IndirectHeap *ioh, bool useGlobalHeapsBaseAddress, uint64_t indirectObjectHeapBaseAddress) {
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint32_t StateBaseAddressHelper<GfxFamily>::getMaxBindlessSurfaceStates() {
|
||||
return std::numeric_limits<uint32_t>::max();
|
||||
}
|
||||
} // namespace NEO
|
||||
|
|
|
@ -107,7 +107,9 @@ HWTEST2_F(SbaForBindlessTests, givenGlobalBindlessBaseAddressWhenProgramStateBas
|
|||
1u);
|
||||
EXPECT_TRUE(cmd->getBindlessSurfaceStateBaseAddressModifyEnable());
|
||||
EXPECT_EQ(cmd->getBindlessSurfaceStateBaseAddress(), globalBindlessHeapsBaseAddress);
|
||||
EXPECT_EQ(cmd->getBindlessSurfaceStateSize(), MemoryConstants::sizeOf4GBinPageEntities);
|
||||
|
||||
auto surfaceStateCount = StateBaseAddressHelper<FamilyType>::getMaxBindlessSurfaceStates();
|
||||
EXPECT_EQ(surfaceStateCount, cmd->getBindlessSurfaceStateSize());
|
||||
}
|
||||
|
||||
using IohSupported = IsWithinGfxCore<GFXCORE_FAMILY::IGFX_GEN9_CORE, GFXCORE_FAMILY::IGFX_GEN12LP_CORE>;
|
||||
|
|
|
@ -1146,7 +1146,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, InterfaceDescriptorDataTests, givenVariousValuesWhen
|
|||
using BindlessCommandEncodeStatesTest = Test<MemManagerFixture>;
|
||||
using BindlessCommandEncodeStatesTesttt = Test<DeviceFixture>;
|
||||
|
||||
HWTEST_F(BindlessCommandEncodeStatesTesttt, givenBindlessKernelWhenBindlessModeEnabledThenCmdContainerDoesNotHaveSsh) {
|
||||
HWTEST_F(BindlessCommandEncodeStatesTesttt, givenBindlessKernelAndBindlessModeEnabledWhenEncodingKernelThenCmdContainerHasNullptrSSH) {
|
||||
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
||||
using WALKER = typename FamilyType::WALKER_TYPE;
|
||||
|
@ -1184,6 +1184,47 @@ HWTEST_F(BindlessCommandEncodeStatesTesttt, givenBindlessKernelWhenBindlessModeE
|
|||
EXPECT_EQ(commandContainer->getIndirectHeap(HeapType::SURFACE_STATE), nullptr);
|
||||
}
|
||||
|
||||
HWTEST2_F(BindlessCommandEncodeStatesTesttt, givenBindlessKernelAndBindlessModeEnabledWhenEncodingKernelThenCmdContainerResidencyContainsGlobalDSH, IsAtMostGen12lp) {
|
||||
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
||||
using WALKER = typename FamilyType::WALKER_TYPE;
|
||||
DebugManagerStateRestore dbgRestorer;
|
||||
DebugManager.flags.UseBindlessMode.set(1);
|
||||
auto commandContainer = std::make_unique<CommandContainer>();
|
||||
commandContainer->initialize(pDevice);
|
||||
commandContainer->setDirtyStateForAllHeaps(false);
|
||||
pDevice->getExecutionEnvironment()->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->createBindlessHeapsHelper(pDevice->getMemoryManager(),
|
||||
pDevice->getNumGenericSubDevices() > 1,
|
||||
pDevice->getRootDeviceIndex(),
|
||||
pDevice->getDeviceBitfield());
|
||||
uint32_t numBindingTable = 1;
|
||||
BINDING_TABLE_STATE bindingTableState = FamilyType::cmdInitBindingTableState;
|
||||
|
||||
uint32_t dims[] = {1, 1, 1};
|
||||
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
|
||||
|
||||
dispatchInterface->kernelDescriptor.payloadMappings.bindingTable.numEntries = numBindingTable;
|
||||
dispatchInterface->kernelDescriptor.payloadMappings.bindingTable.tableOffset = 0U;
|
||||
dispatchInterface->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::BindlessAndStateless;
|
||||
|
||||
const uint8_t *sshData = reinterpret_cast<uint8_t *>(&bindingTableState);
|
||||
EXPECT_CALL(*dispatchInterface.get(), getSurfaceStateHeapData()).WillRepeatedly(::testing::Return(sshData));
|
||||
EXPECT_CALL(*dispatchInterface.get(), getSurfaceStateHeapDataSize()).WillRepeatedly(::testing::Return(static_cast<uint32_t>(sizeof(BINDING_TABLE_STATE))));
|
||||
|
||||
bool requiresUncachedMocs = false;
|
||||
EXPECT_EQ(commandContainer->getIndirectHeap(HeapType::SURFACE_STATE), nullptr);
|
||||
uint32_t partitionCount = 0;
|
||||
|
||||
EncodeDispatchKernel<FamilyType>::encode(*commandContainer.get(), dims, false, false, dispatchInterface.get(), 0, false, false,
|
||||
pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs, false, partitionCount,
|
||||
false, false);
|
||||
|
||||
auto globalDSHIterator = std::find(commandContainer->getResidencyContainer().begin(), commandContainer->getResidencyContainer().end(),
|
||||
pDevice->getBindlessHeapsHelper()->getHeap(BindlessHeapsHelper::GLOBAL_DSH)->getGraphicsAllocation());
|
||||
|
||||
EXPECT_NE(commandContainer->getResidencyContainer().end(), globalDSHIterator);
|
||||
}
|
||||
|
||||
HWTEST_F(BindlessCommandEncodeStatesTesttt, givenBindfulKernelWhenBindlessModeEnabledThenCmdContainerHaveSsh) {
|
||||
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
||||
|
|
Loading…
Reference in New Issue