diff --git a/level_zero/core/test/aub_tests/bindless/CMakeLists.txt b/level_zero/core/test/aub_tests/bindless/CMakeLists.txt new file mode 100644 index 0000000000..cb4765a495 --- /dev/null +++ b/level_zero/core/test/aub_tests/bindless/CMakeLists.txt @@ -0,0 +1,10 @@ +# +# Copyright (C) 2021 Intel Corporation +# +# SPDX-License-Identifier: MIT +# + +target_sources(ze_intel_gpu_aub_tests PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt + ${CMAKE_CURRENT_SOURCE_DIR}/bindless_kernel_aub_tests.cpp +) diff --git a/level_zero/core/test/aub_tests/bindless/bindless_kernel_aub_tests.cpp b/level_zero/core/test/aub_tests/bindless/bindless_kernel_aub_tests.cpp new file mode 100644 index 0000000000..9a7b071414 --- /dev/null +++ b/level_zero/core/test/aub_tests/bindless/bindless_kernel_aub_tests.cpp @@ -0,0 +1,115 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/gmm_helper/gmm_helper.h" +#include "shared/source/helpers/array_count.h" +#include "shared/source/helpers/file_io.h" +#include "shared/test/common/helpers/debug_manager_state_restore.h" +#include "shared/test/common/helpers/test_files.h" + +#include "test.h" + +#include "level_zero/core/source/module/module_imp.h" +#include "level_zero/core/test/aub_tests/fixtures/aub_fixture.h" +#include "level_zero/core/test/unit_tests/mocks/mock_driver_handle.h" + +namespace L0 { +namespace ult { + +struct L0BindlessAub : Test { + + void SetUp() { + DebugManager.flags.UseBindlessMode.set(1); + DebugManager.flags.UseExternalAllocatorForSshAndDsh.set(1); + AUBFixtureL0::SetUp(); + } + void TearDown() { + + module->destroy(); + AUBFixtureL0::TearDown(); + } + + void createModuleFromFile(const std::string &fileName, ze_context_handle_t context, L0::Device *device) { + std::string testFile; + retrieveBinaryKernelFilenameNoRevision(testFile, fileName + "_", ".bin"); + + size_t size = 0; + auto src = loadDataFromFile( + testFile.c_str(), + size); + + ASSERT_NE(0u, size); + ASSERT_NE(nullptr, src); + + ze_module_desc_t moduleDesc = {ZE_STRUCTURE_TYPE_MODULE_DESC}; + moduleDesc.format = ZE_MODULE_FORMAT_NATIVE; + moduleDesc.pInputModule = reinterpret_cast(src.get()); + moduleDesc.inputSize = size; + moduleDesc.pBuildFlags = ""; + + module = new ModuleImp(device, nullptr, ModuleType::User); + bool success = module->initialize(&moduleDesc, device->getNEODevice()); + ASSERT_TRUE(success); + } + DebugManagerStateRestore restorer; + ModuleImp *module = nullptr; +}; + +HWTEST_F(L0BindlessAub, DISABLED_GivenBindlessKernelWhenExecutedThenOutputIsCorrect) { + constexpr size_t bufferSize = MemoryConstants::pageSize; + const uint32_t groupSize[] = {32, 1, 1}; + const uint32_t groupCount[] = {bufferSize / 32, 1, 1}; + + NEO::SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY, + context->rootDeviceIndices, + context->deviceBitfields); + + auto bufferSrc = driverHandle->svmAllocsManager->createHostUnifiedMemoryAllocation(bufferSize, unifiedMemoryProperties); + memset(bufferSrc, 55, bufferSize); + + auto bufferDst = driverHandle->svmAllocsManager->createHostUnifiedMemoryAllocation(bufferSize, unifiedMemoryProperties); + memset(bufferDst, 0, bufferSize); + + auto simulatedCsr = AUBFixtureL0::getSimulatedCsr(); + simulatedCsr->initializeEngine(); + + simulatedCsr->writeMemory(*driverHandle->svmAllocsManager->getSVMAlloc(bufferSrc)->gpuAllocations.getDefaultGraphicsAllocation()); + simulatedCsr->writeMemory(*driverHandle->svmAllocsManager->getSVMAlloc(bufferDst)->gpuAllocations.getDefaultGraphicsAllocation()); + + ze_group_count_t dispatchTraits; + dispatchTraits.groupCountX = groupCount[0]; + dispatchTraits.groupCountY = groupCount[1]; + dispatchTraits.groupCountZ = groupCount[2]; + + createModuleFromFile("bindless_stateful_copy_buffer", context, device); + + ze_kernel_handle_t kernel; + ze_kernel_desc_t kernelDesc = {ZE_STRUCTURE_TYPE_KERNEL_DESC}; + kernelDesc.pKernelName = "StatefulCopyBuffer"; + + EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelCreate(module->toHandle(), &kernelDesc, &kernel)); + EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelSetArgumentValue(kernel, 0, sizeof(void *), &bufferSrc)); + EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelSetArgumentValue(kernel, 1, sizeof(void *), &bufferDst)); + EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelSetGroupSize(kernel, groupSize[0], groupSize[1], groupSize[2])); + + ze_command_list_handle_t cmdListHandle = commandList->toHandle(); + EXPECT_EQ(ZE_RESULT_SUCCESS, zeCommandListAppendLaunchKernel(cmdListHandle, kernel, &dispatchTraits, nullptr, 0, nullptr)); + commandList->close(); + + pCmdq->executeCommandLists(1, &cmdListHandle, nullptr, false); + pCmdq->synchronize(std::numeric_limits::max()); + + expectMemory(reinterpret_cast(driverHandle->svmAllocsManager->getSVMAlloc(bufferDst)->gpuAllocations.getDefaultGraphicsAllocation()->getGpuAddress()), + bufferSrc, bufferSize); + + EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelDestroy(kernel)); + driverHandle->svmAllocsManager->freeSVMAlloc(bufferSrc); + driverHandle->svmAllocsManager->freeSVMAlloc(bufferDst); +} + +} // namespace ult +} // namespace L0 \ No newline at end of file diff --git a/level_zero/core/test/aub_tests/fixtures/aub_fixture.h b/level_zero/core/test/aub_tests/fixtures/aub_fixture.h index 01a7b7353b..9f2e1d142c 100644 --- a/level_zero/core/test/aub_tests/fixtures/aub_fixture.h +++ b/level_zero/core/test/aub_tests/fixtures/aub_fixture.h @@ -5,6 +5,12 @@ * */ +#include "shared/source/command_stream/command_stream_receiver_simulated_common_hw.h" +#include "shared/source/command_stream/command_stream_receiver_with_aub_dump.h" +#include "shared/source/command_stream/tbx_command_stream_receiver_hw.h" + +#include "test_mode.h" + #include #include #include @@ -39,6 +45,43 @@ class AUBFixtureL0 { void TearDown(); static void prepareCopyEngines(NEO::MockDevice &device, const std::string &filename); + template + NEO::CommandStreamReceiverSimulatedCommonHw *getSimulatedCsr() const { + return static_cast *>(csr); + } + + template + void expectMemory(void *gfxAddress, const void *srcAddress, size_t length) { + NEO::CommandStreamReceiverSimulatedCommonHw *csrSimulated = getSimulatedCsr(); + + if (NEO::testMode == NEO::TestMode::AubTestsWithTbx) { + auto tbxCsr = csrSimulated; + EXPECT_TRUE(tbxCsr->expectMemoryEqual(gfxAddress, srcAddress, length)); + csrSimulated = static_cast *>( + static_cast> *>(csr)->aubCSR.get()); + } + + if (csrSimulated) { + csrSimulated->expectMemoryEqual(gfxAddress, srcAddress, length); + } + } + + template + void expectNotEqualMemory(void *gfxAddress, const void *srcAddress, size_t length) { + NEO::CommandStreamReceiverSimulatedCommonHw *csrSimulated = getSimulatedCsr(); + + if (NEO::testMode == NEO::TestMode::AubTestsWithTbx) { + auto tbxCsr = csrSimulated; + EXPECT_TRUE(tbxCsr->expectMemoryNotEqual(gfxAddress, srcAddress, length)); + csrSimulated = static_cast *>( + static_cast> *>(csr)->aubCSR.get()); + } + + if (csrSimulated) { + csrSimulated->expectMemoryNotEqual(gfxAddress, srcAddress, length); + } + } + const uint32_t rootDeviceIndex = 0; NEO::ExecutionEnvironment *executionEnvironment; NEO::MemoryManager *memoryManager = nullptr; diff --git a/level_zero/core/test/common/CMakeLists.txt b/level_zero/core/test/common/CMakeLists.txt index 6be5b3f169..498bdbe05d 100644 --- a/level_zero/core/test/common/CMakeLists.txt +++ b/level_zero/core/test/common/CMakeLists.txt @@ -13,6 +13,14 @@ set(TEST_MODULES ${CMAKE_CURRENT_SOURCE_DIR}/test_modules/test_kernel.cl ) +set(TEST_KERNEL_BINDLESS_internal_options + "-cl-intel-use-bindless-mode -cl-intel-use-bindless-advanced-mode" +) + +set(TEST_KERNEL_BINDLESS + ${CMAKE_CURRENT_SOURCE_DIR}/test_modules/stateful_copy_buffer.cl +) + set(l0_test_kernels_outputs) macro(macro_for_each_core_type) foreach(PLATFORM_TYPE ${PLATFORM_TYPES}) @@ -20,12 +28,18 @@ macro(macro_for_each_core_type) get_family_name_with_type(${CORE_TYPE} ${PLATFORM_TYPE}) string(TOLOWER ${PLATFORM_TYPE} PLATFORM_TYPE_LOWER) set(PLATFORM_LOWER ${DEFAULT_SUPPORTED_${CORE_TYPE}_${PLATFORM_TYPE}_PLATFORM}) + string(TOLOWER ${CORE_TYPE} CORE_TYPE_LOWER) level_zero_generate_kernels(l0_test_kernel_outputs ${PLATFORM_LOWER} ${family_name_with_type} "-g" ${TEST_MODULES}) + + #skip Gen8 bindless kernel generation + if(NOT ("${CORE_TYPE_LOWER}" STREQUAL "gen8")) + level_zero_generate_kernels_with_internal_options(l0_bindless_test_kernel_outputs ${PLATFORM_LOWER} ${family_name_with_type} "bindless" "-g" ${TEST_KERNEL_BINDLESS_internal_options} ${TEST_KERNEL_BINDLESS}) + endif() endif() endforeach() endmacro() apply_macro_for_each_core_type("TESTED") -add_custom_target(l0_common_test_kernels DEPENDS ${l0_test_kernel_outputs} copy_compiler_files) +add_custom_target(l0_common_test_kernels DEPENDS ${l0_test_kernel_outputs} ${l0_bindless_test_kernel_outputs} copy_compiler_files) set_target_properties(l0_common_test_kernels PROPERTIES FOLDER ${TARGET_NAME_L0}) add_dependencies(prepare_test_kernels_for_l0 l0_common_test_kernels) diff --git a/level_zero/core/test/common/gen_kernel.cmake b/level_zero/core/test/common/gen_kernel.cmake index cfc7347a25..b2e44ee519 100644 --- a/level_zero/core/test/common/gen_kernel.cmake +++ b/level_zero/core/test/common/gen_kernel.cmake @@ -49,3 +49,54 @@ function(level_zero_generate_kernels target_list platform_name suffix options) set(${target_list} ${${target_list}} PARENT_SCOPE) endfunction() + +function(level_zero_generate_kernels_with_internal_options target_list platform_name suffix prefix options internal_options) + + list(APPEND results copy_compiler_files) + + set(outputdir "${TargetDir}/level_zero/${suffix}/test_files/${NEO_ARCH}/") + + foreach(filepath ${ARGN}) + get_filename_component(filename ${filepath} NAME) + get_filename_component(basename ${filepath} NAME_WE) + get_filename_component(workdir ${filepath} DIRECTORY) + + set(outputpath_base "${outputdir}${prefix}_${basename}_${suffix}") + + if(NOT NEO_DISABLE_BUILTINS_COMPILATION) + set(output_files + ${outputpath_base}.bin + ${outputpath_base}.gen + ${outputpath_base}.spv + ${outputpath_base}.dbg + ) + + set(output_name "-output" "${prefix}_${basename}") + string(CONCAT options \" ${options} \" ) + string(CONCAT internal_options \" ${internal_options} \" ) + + add_custom_command( + COMMAND echo generate ${ocloc_cmd_prefix} -q -file ${filename} -device ${platform_name} -out_dir ${outputdir} ${output_name} -options ${options} -internal_options ${internal_options} , workdir is ${workdir} + OUTPUT ${output_files} + COMMAND ${ocloc_cmd_prefix} -q -file ${filename} -device ${platform_name} -out_dir ${outputdir} ${output_name} -options ${options} -internal_options ${internal_options} + WORKING_DIRECTORY ${workdir} + DEPENDS ${filepath} ocloc + ) + + list(APPEND ${target_list} ${output_files}) + else() + foreach(_file_name "bin" "gen" "spv" "dbg") + set(_file_prebuilt "${NEO_SOURCE_DIR}/../neo_test_kernels/level_zero/${suffix}/test_files/${NEO_ARCH}/${prefix}_${basename}_${suffix}.${_file_name}") + add_custom_command( + OUTPUT ${outputpath_base}.${_file_name} + COMMAND ${CMAKE_COMMAND} -E make_directory ${outputdir} + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${_file_prebuilt} ${outputdir} + ) + + list(APPEND ${target_list} ${outputpath_base}.${_file_name}) + endforeach() + endif() + endforeach() + + set(${target_list} ${${target_list}} PARENT_SCOPE) +endfunction() diff --git a/level_zero/core/test/common/test_modules/stateful_copy_buffer.cl b/level_zero/core/test/common/test_modules/stateful_copy_buffer.cl new file mode 100644 index 0000000000..8dc57d70c0 --- /dev/null +++ b/level_zero/core/test/common/test_modules/stateful_copy_buffer.cl @@ -0,0 +1,14 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +__kernel void StatefulCopyBuffer( + const __global uchar* src, + __global uchar* dst) +{ + uint id = get_global_id(0); + dst[id] = src[id]; +} diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue.cpp index 6bfd322051..bd11b11a48 100644 --- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue.cpp @@ -492,7 +492,7 @@ HWTEST2_F(CommandQueueProgramSBATest, using BindlessCommandQueueSBASupport = IsAtLeastProduct; HWTEST2_F(CommandQueueProgramSBATest, - givenBindlessModeEnabledWhenProgrammingStateBaseAddressThenBindlessBaseAddressIsPassed, BindlessCommandQueueSBASupport) { + givenBindlessModeEnabledWhenProgrammingStateBaseAddressThenBindlessBaseAddressAndSizeAreSet, BindlessCommandQueueSBASupport) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; DebugManagerStateRestore dbgRestorer; DebugManager.flags.UseBindlessMode.set(1); @@ -525,7 +525,9 @@ HWTEST2_F(CommandQueueProgramSBATest, auto cmdSba = genCmdCast(*itor); EXPECT_EQ(cmdSba->getBindlessSurfaceStateBaseAddressModifyEnable(), true); EXPECT_EQ(cmdSba->getBindlessSurfaceStateBaseAddress(), neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->getBindlessHeapsHelper()->getGlobalHeapsBase()); - EXPECT_EQ(cmdSba->getBindlessSurfaceStateSize(), MemoryConstants::sizeOf4GBinPageEntities); + + auto surfaceStateCount = StateBaseAddressHelper::getMaxBindlessSurfaceStates(); + EXPECT_EQ(surfaceStateCount, cmdSba->getBindlessSurfaceStateSize()); commandQueue->destroy(); } diff --git a/opencl/test/unit_test/CMakeLists.txt b/opencl/test/unit_test/CMakeLists.txt index ce912af724..973c451c14 100644 --- a/opencl/test/unit_test/CMakeLists.txt +++ b/opencl/test/unit_test/CMakeLists.txt @@ -432,7 +432,7 @@ set(TEST_KERNEL_SIP_DEBUG_LOCAL_options ) set(TEST_KERNEL_BINDLESS_internal_options - "-cl-intel-use-bindless-mode" + "-cl-intel-use-bindless-mode -cl-intel-use-bindless-advanced-mode" ) set(TEST_KERNEL_BINDLESS diff --git a/opencl/test/unit_test/aub_tests/command_queue/enqueue_kernel_aub_tests.cpp b/opencl/test/unit_test/aub_tests/command_queue/enqueue_kernel_aub_tests.cpp index 18c7465cec..f0f8dfe05e 100644 --- a/opencl/test/unit_test/aub_tests/command_queue/enqueue_kernel_aub_tests.cpp +++ b/opencl/test/unit_test/aub_tests/command_queue/enqueue_kernel_aub_tests.cpp @@ -868,11 +868,27 @@ HWTEST_F(AUBSimpleArgNonUniformTest, givenOpenCL20SupportWhenProvidingWork3DimNo expectMemory(remainderBufferGpuAddress, this->expectedRemainderMemory, sizeRemainderMemory); } -using AUBBindlessKernel = Test>; using IsSklAndLater = IsAtLeastProduct; +struct AUBBindlessKernel : public KernelAUBFixture, + public ::testing::Test { + + void SetUp() override { + DebugManager.flags.UseBindlessMode.set(1); + DebugManager.flags.UseExternalAllocatorForSshAndDsh.set(1); + KernelAUBFixture::SetUp(); + } + + void TearDown() override { + KernelAUBFixture::TearDown(); + } + DebugManagerStateRestore restorer; +}; + HWTEST2_F(AUBBindlessKernel, DISABLED_givenBindlessCopyKernelWhenEnqueuedThenResultsValidate, IsSklAndLater) { constexpr size_t bufferSize = MemoryConstants::pageSize; + auto simulatedCsr = AUBFixture::getSimulatedCsr(); + simulatedCsr->initializeEngine(); createKernel(std::string("bindless_stateful_copy_buffer"), std::string("StatefulCopyBuffer")); @@ -904,8 +920,6 @@ HWTEST2_F(AUBBindlessKernel, DISABLED_givenBindlessCopyKernelWhenEnqueuedThenRes retVal)); ASSERT_NE(nullptr, pBufferDst); - auto simulatedCsr = AUBFixture::getSimulatedCsr(); - memcpy(pBufferSrc->getGraphicsAllocation(device->getRootDeviceIndex())->getUnderlyingBuffer(), bufferDataSrc, bufferSize); memcpy(pBufferDst->getGraphicsAllocation(device->getRootDeviceIndex())->getUnderlyingBuffer(), bufferDataDst, bufferSize); @@ -953,6 +967,8 @@ HWTEST2_F(AUBBindlessKernel, DISABLED_givenBindlessCopyImageKernelWhenEnqueuedTh constexpr unsigned int testWidth = 5; constexpr unsigned int testHeight = 1; constexpr unsigned int testDepth = 1; + auto simulatedCsr = AUBFixture::getSimulatedCsr(); + simulatedCsr->initializeEngine(); createKernel(std::string("bindless_copy_buffer_to_image"), std::string("CopyBufferToImage3d")); @@ -1012,8 +1028,6 @@ HWTEST2_F(AUBBindlessKernel, DISABLED_givenBindlessCopyImageKernelWhenEnqueuedTh memcpy(image->getGraphicsAllocation(device->getRootDeviceIndex())->getUnderlyingBuffer(), imageDataDst, imageSize); memcpy(bufferSrc->getGraphicsAllocation(device->getRootDeviceIndex())->getUnderlyingBuffer(), imageDataSrc, imageSize); - auto simulatedCsr = AUBFixture::getSimulatedCsr(); - simulatedCsr->writeMemory(*bufferSrc->getGraphicsAllocation(device->getRootDeviceIndex())); simulatedCsr->writeMemory(*image->getGraphicsAllocation(device->getRootDeviceIndex())); diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_tests_xehp_and_later.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_tests_xehp_and_later.cpp index 021d98aca9..f66079af0f 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_tests_xehp_and_later.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_tests_xehp_and_later.cpp @@ -234,7 +234,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, wh EXPECT_NE(nullptr, bindingTablePoolAlloc); } -HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, givenSbaProgrammingWhenHeapsAreNotProvidedThenDontProgram) { +HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, givenNoHeapsProvidedWhenSBAIsProgrammedThenBaseAddressesAreNotSetAndBindlessSurfaceStateSizeSetToMax) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); @@ -284,7 +284,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, gi EXPECT_EQ(0u, sbaCmd.getBindlessSurfaceStateBaseAddress()); EXPECT_FALSE(sbaCmd.getBindlessSurfaceStateBaseAddressModifyEnable()); - EXPECT_EQ(0u, sbaCmd.getBindlessSurfaceStateSize()); + + auto surfaceStateCount = StateBaseAddressHelper::getMaxBindlessSurfaceStates(); + EXPECT_EQ(surfaceStateCount, sbaCmd.getBindlessSurfaceStateSize()); } using isXeHPOrAbove = IsAtLeastProduct; diff --git a/shared/source/built_ins/kernels/CMakeLists.txt b/shared/source/built_ins/kernels/CMakeLists.txt index 6de2071b59..9e0cd0e8dc 100644 --- a/shared/source/built_ins/kernels/CMakeLists.txt +++ b/shared/source/built_ins/kernels/CMakeLists.txt @@ -26,7 +26,7 @@ set(BUILTIN_OPTIONS_STATELESS ) set(bindless_OPTIONS - -internal_options "-cl-intel-use-bindless-mode" + -internal_options "-cl-intel-use-bindless-mode -cl-intel-use-bindless-advanced-mode" ) set(bindful_OPTIONS diff --git a/shared/source/command_container/command_encoder_bdw_and_later.inl b/shared/source/command_container/command_encoder_bdw_and_later.inl index 3accf9e357..af5650befc 100644 --- a/shared/source/command_container/command_encoder_bdw_and_later.inl +++ b/shared/source/command_container/command_encoder_bdw_and_later.inl @@ -113,9 +113,6 @@ void EncodeDispatchKernel::encode(CommandContainer &container, kernelDescriptor.payloadMappings.samplerTable.borderColor, dispatchInterface->getDynamicStateHeapData(), device->getBindlessHeapsHelper(), device->getHardwareInfo()); - if (ApiSpecificConfig::getBindlessConfiguration()) { - container.getResidencyContainer().push_back(device->getBindlessHeapsHelper()->getHeap(NEO::BindlessHeapsHelper::BindlesHeapType::GLOBAL_DSH)->getGraphicsAllocation()); - } } idd.setSamplerStatePointer(samplerStateOffset); @@ -220,6 +217,10 @@ void EncodeDispatchKernel::encode(CommandContainer &container, cmd.setPredicateEnable(isPredicate); + if (ApiSpecificConfig::getBindlessConfiguration()) { + container.getResidencyContainer().push_back(device->getBindlessHeapsHelper()->getHeap(NEO::BindlessHeapsHelper::BindlesHeapType::GLOBAL_DSH)->getGraphicsAllocation()); + } + EncodeDispatchKernel::adjustInterfaceDescriptorData(idd, hwInfo); PreemptionHelper::applyPreemptionWaCmdsBegin(listCmdBufferStream, *device); diff --git a/shared/source/compiler_interface/compiler_options/compiler_options_base.h b/shared/source/compiler_interface/compiler_options/compiler_options_base.h index a92bd9a99d..cd973a0a2a 100644 --- a/shared/source/compiler_interface/compiler_options/compiler_options_base.h +++ b/shared/source/compiler_interface/compiler_options/compiler_options_base.h @@ -26,7 +26,7 @@ static constexpr ConstStringRef fastRelaxedMath = "-cl-fast-relaxed-math"; static constexpr ConstStringRef preserveVec3Type = "-fpreserve-vec3-type"; static constexpr ConstStringRef createLibrary = "-create-library"; static constexpr ConstStringRef generateDebugInfo = "-g"; -static constexpr ConstStringRef bindlessMode = "-cl-intel-use-bindless-mode"; +static constexpr ConstStringRef bindlessMode = "-cl-intel-use-bindless-mode -cl-intel-use-bindless-advanced-mode"; static constexpr ConstStringRef uniformWorkgroupSize = "-cl-uniform-work-group-size"; static constexpr ConstStringRef forceEmuInt32DivRem = "-cl-intel-force-emu-int32divrem"; static constexpr ConstStringRef forceEmuInt32DivRemSP = "-cl-intel-force-emu-sp-int32divrem"; diff --git a/shared/source/helpers/state_base_address.h b/shared/source/helpers/state_base_address.h index 73e9feba23..20b5cdb0f3 100644 --- a/shared/source/helpers/state_base_address.h +++ b/shared/source/helpers/state_base_address.h @@ -57,5 +57,7 @@ struct StateBaseAddressHelper { static void appendExtraCacheSettings(STATE_BASE_ADDRESS *stateBaseAddress, GmmHelper *gmmHelper); static void programBindingTableBaseAddress(LinearStream &commandStream, const IndirectHeap &ssh, GmmHelper *gmmHelper); + + static uint32_t getMaxBindlessSurfaceStates(); }; } // namespace NEO diff --git a/shared/source/helpers/state_base_address_base.inl b/shared/source/helpers/state_base_address_base.inl index c005a4da97..ca78647b45 100644 --- a/shared/source/helpers/state_base_address_base.inl +++ b/shared/source/helpers/state_base_address_base.inl @@ -37,6 +37,10 @@ void StateBaseAddressHelper::programStateBaseAddress( *stateBaseAddress = GfxFamily::cmdInitStateBaseAddress; bool overrideBindlessSurfaceStateBase = true; + + const auto surfaceStateCount = getMaxBindlessSurfaceStates(); + stateBaseAddress->setBindlessSurfaceStateSize(surfaceStateCount); + if (useGlobalHeapsBaseAddress) { stateBaseAddress->setDynamicStateBaseAddressModifyEnable(true); stateBaseAddress->setDynamicStateBufferSizeModifyEnable(true); @@ -48,7 +52,6 @@ void StateBaseAddressHelper::programStateBaseAddress( stateBaseAddress->setBindlessSurfaceStateBaseAddressModifyEnable(true); stateBaseAddress->setBindlessSurfaceStateBaseAddress(globalHeapsBaseAddress); - stateBaseAddress->setBindlessSurfaceStateSize(MemoryConstants::sizeOf4GBinPageEntities); overrideBindlessSurfaceStateBase = false; } else { diff --git a/shared/source/helpers/state_base_address_bdw.inl b/shared/source/helpers/state_base_address_bdw.inl index cafa4e348e..1ebad248b0 100644 --- a/shared/source/helpers/state_base_address_bdw.inl +++ b/shared/source/helpers/state_base_address_bdw.inl @@ -24,4 +24,9 @@ void StateBaseAddressHelper::appendStateBaseAddressParameters( appendExtraCacheSettings(stateBaseAddress, gmmHelper); } +template +uint32_t StateBaseAddressHelper::getMaxBindlessSurfaceStates() { + return 0; +} + } // namespace NEO diff --git a/shared/source/helpers/state_base_address_icllp_and_later.inl b/shared/source/helpers/state_base_address_icllp_and_later.inl index 16385a256a..f3496fcf97 100644 --- a/shared/source/helpers/state_base_address_icllp_and_later.inl +++ b/shared/source/helpers/state_base_address_icllp_and_later.inl @@ -40,4 +40,10 @@ void StateBaseAddressHelper::appendStateBaseAddressParameters( appendExtraCacheSettings(stateBaseAddress, gmmHelper); } + +template +uint32_t StateBaseAddressHelper::getMaxBindlessSurfaceStates() { + return (1 << 20) - 1; +} + } // namespace NEO diff --git a/shared/source/helpers/state_base_address_skl.inl b/shared/source/helpers/state_base_address_skl.inl index 813537ef3b..90cf20e0ad 100644 --- a/shared/source/helpers/state_base_address_skl.inl +++ b/shared/source/helpers/state_base_address_skl.inl @@ -32,4 +32,8 @@ void StateBaseAddressHelper::appendStateBaseAddressParameters( appendExtraCacheSettings(stateBaseAddress, gmmHelper); } +template +uint32_t StateBaseAddressHelper::getMaxBindlessSurfaceStates() { + return (1 << 20) - 1; +} } // namespace NEO diff --git a/shared/source/helpers/state_base_address_xehp_and_later.inl b/shared/source/helpers/state_base_address_xehp_and_later.inl index 09db0be2a9..a1ab13b7f3 100644 --- a/shared/source/helpers/state_base_address_xehp_and_later.inl +++ b/shared/source/helpers/state_base_address_xehp_and_later.inl @@ -114,4 +114,8 @@ template void StateBaseAddressHelper::appendIohParameters(STATE_BASE_ADDRESS *stateBaseAddress, const IndirectHeap *ioh, bool useGlobalHeapsBaseAddress, uint64_t indirectObjectHeapBaseAddress) { } +template +uint32_t StateBaseAddressHelper::getMaxBindlessSurfaceStates() { + return std::numeric_limits::max(); +} } // namespace NEO diff --git a/shared/test/common/helpers/state_base_address_tests.cpp b/shared/test/common/helpers/state_base_address_tests.cpp index 5ec863c379..f040762c6a 100644 --- a/shared/test/common/helpers/state_base_address_tests.cpp +++ b/shared/test/common/helpers/state_base_address_tests.cpp @@ -107,7 +107,9 @@ HWTEST2_F(SbaForBindlessTests, givenGlobalBindlessBaseAddressWhenProgramStateBas 1u); EXPECT_TRUE(cmd->getBindlessSurfaceStateBaseAddressModifyEnable()); EXPECT_EQ(cmd->getBindlessSurfaceStateBaseAddress(), globalBindlessHeapsBaseAddress); - EXPECT_EQ(cmd->getBindlessSurfaceStateSize(), MemoryConstants::sizeOf4GBinPageEntities); + + auto surfaceStateCount = StateBaseAddressHelper::getMaxBindlessSurfaceStates(); + EXPECT_EQ(surfaceStateCount, cmd->getBindlessSurfaceStateSize()); } using IohSupported = IsWithinGfxCore; diff --git a/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp b/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp index 5d29beeac4..6e7ff72dc9 100644 --- a/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp +++ b/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp @@ -1146,7 +1146,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, InterfaceDescriptorDataTests, givenVariousValuesWhen using BindlessCommandEncodeStatesTest = Test; using BindlessCommandEncodeStatesTesttt = Test; -HWTEST_F(BindlessCommandEncodeStatesTesttt, givenBindlessKernelWhenBindlessModeEnabledThenCmdContainerDoesNotHaveSsh) { +HWTEST_F(BindlessCommandEncodeStatesTesttt, givenBindlessKernelAndBindlessModeEnabledWhenEncodingKernelThenCmdContainerHasNullptrSSH) { using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using WALKER = typename FamilyType::WALKER_TYPE; @@ -1184,6 +1184,47 @@ HWTEST_F(BindlessCommandEncodeStatesTesttt, givenBindlessKernelWhenBindlessModeE EXPECT_EQ(commandContainer->getIndirectHeap(HeapType::SURFACE_STATE), nullptr); } +HWTEST2_F(BindlessCommandEncodeStatesTesttt, givenBindlessKernelAndBindlessModeEnabledWhenEncodingKernelThenCmdContainerResidencyContainsGlobalDSH, IsAtMostGen12lp) { + using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE; + using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; + using WALKER = typename FamilyType::WALKER_TYPE; + DebugManagerStateRestore dbgRestorer; + DebugManager.flags.UseBindlessMode.set(1); + auto commandContainer = std::make_unique(); + commandContainer->initialize(pDevice); + commandContainer->setDirtyStateForAllHeaps(false); + pDevice->getExecutionEnvironment()->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->createBindlessHeapsHelper(pDevice->getMemoryManager(), + pDevice->getNumGenericSubDevices() > 1, + pDevice->getRootDeviceIndex(), + pDevice->getDeviceBitfield()); + uint32_t numBindingTable = 1; + BINDING_TABLE_STATE bindingTableState = FamilyType::cmdInitBindingTableState; + + uint32_t dims[] = {1, 1, 1}; + std::unique_ptr dispatchInterface(new MockDispatchKernelEncoder()); + + dispatchInterface->kernelDescriptor.payloadMappings.bindingTable.numEntries = numBindingTable; + dispatchInterface->kernelDescriptor.payloadMappings.bindingTable.tableOffset = 0U; + dispatchInterface->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::BindlessAndStateless; + + const uint8_t *sshData = reinterpret_cast(&bindingTableState); + EXPECT_CALL(*dispatchInterface.get(), getSurfaceStateHeapData()).WillRepeatedly(::testing::Return(sshData)); + EXPECT_CALL(*dispatchInterface.get(), getSurfaceStateHeapDataSize()).WillRepeatedly(::testing::Return(static_cast(sizeof(BINDING_TABLE_STATE)))); + + bool requiresUncachedMocs = false; + EXPECT_EQ(commandContainer->getIndirectHeap(HeapType::SURFACE_STATE), nullptr); + uint32_t partitionCount = 0; + + EncodeDispatchKernel::encode(*commandContainer.get(), dims, false, false, dispatchInterface.get(), 0, false, false, + pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs, false, partitionCount, + false, false); + + auto globalDSHIterator = std::find(commandContainer->getResidencyContainer().begin(), commandContainer->getResidencyContainer().end(), + pDevice->getBindlessHeapsHelper()->getHeap(BindlessHeapsHelper::GLOBAL_DSH)->getGraphicsAllocation()); + + EXPECT_NE(commandContainer->getResidencyContainer().end(), globalDSHIterator); +} + HWTEST_F(BindlessCommandEncodeStatesTesttt, givenBindfulKernelWhenBindlessModeEnabledThenCmdContainerHaveSsh) { using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;