mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 14:55:24 +08:00
Support for bindless mode in L0 - improvements
Related-To: NEO-6448 - add new IGC compilation flag when bindless mode used - fix SBA programming of BindlessSurfaceStateSize - always set maximum surface state count - fix residency of global DSH heap on gen9 - gen12lp in bindless mode - add L0 aub test with bindless kernel - disabled - partial fixes in OCL aub tests Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
55959d4d1d
commit
8b233f7f45
10
level_zero/core/test/aub_tests/bindless/CMakeLists.txt
Normal file
10
level_zero/core/test/aub_tests/bindless/CMakeLists.txt
Normal file
@@ -0,0 +1,10 @@
|
||||
#
|
||||
# Copyright (C) 2021 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
target_sources(ze_intel_gpu_aub_tests PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/bindless_kernel_aub_tests.cpp
|
||||
)
|
||||
@@ -0,0 +1,115 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/gmm_helper/gmm_helper.h"
|
||||
#include "shared/source/helpers/array_count.h"
|
||||
#include "shared/source/helpers/file_io.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/helpers/test_files.h"
|
||||
|
||||
#include "test.h"
|
||||
|
||||
#include "level_zero/core/source/module/module_imp.h"
|
||||
#include "level_zero/core/test/aub_tests/fixtures/aub_fixture.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_driver_handle.h"
|
||||
|
||||
namespace L0 {
|
||||
namespace ult {
|
||||
|
||||
struct L0BindlessAub : Test<AUBFixtureL0> {
|
||||
|
||||
void SetUp() {
|
||||
DebugManager.flags.UseBindlessMode.set(1);
|
||||
DebugManager.flags.UseExternalAllocatorForSshAndDsh.set(1);
|
||||
AUBFixtureL0::SetUp();
|
||||
}
|
||||
void TearDown() {
|
||||
|
||||
module->destroy();
|
||||
AUBFixtureL0::TearDown();
|
||||
}
|
||||
|
||||
void createModuleFromFile(const std::string &fileName, ze_context_handle_t context, L0::Device *device) {
|
||||
std::string testFile;
|
||||
retrieveBinaryKernelFilenameNoRevision(testFile, fileName + "_", ".bin");
|
||||
|
||||
size_t size = 0;
|
||||
auto src = loadDataFromFile(
|
||||
testFile.c_str(),
|
||||
size);
|
||||
|
||||
ASSERT_NE(0u, size);
|
||||
ASSERT_NE(nullptr, src);
|
||||
|
||||
ze_module_desc_t moduleDesc = {ZE_STRUCTURE_TYPE_MODULE_DESC};
|
||||
moduleDesc.format = ZE_MODULE_FORMAT_NATIVE;
|
||||
moduleDesc.pInputModule = reinterpret_cast<const uint8_t *>(src.get());
|
||||
moduleDesc.inputSize = size;
|
||||
moduleDesc.pBuildFlags = "";
|
||||
|
||||
module = new ModuleImp(device, nullptr, ModuleType::User);
|
||||
bool success = module->initialize(&moduleDesc, device->getNEODevice());
|
||||
ASSERT_TRUE(success);
|
||||
}
|
||||
DebugManagerStateRestore restorer;
|
||||
ModuleImp *module = nullptr;
|
||||
};
|
||||
|
||||
HWTEST_F(L0BindlessAub, DISABLED_GivenBindlessKernelWhenExecutedThenOutputIsCorrect) {
|
||||
constexpr size_t bufferSize = MemoryConstants::pageSize;
|
||||
const uint32_t groupSize[] = {32, 1, 1};
|
||||
const uint32_t groupCount[] = {bufferSize / 32, 1, 1};
|
||||
|
||||
NEO::SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY,
|
||||
context->rootDeviceIndices,
|
||||
context->deviceBitfields);
|
||||
|
||||
auto bufferSrc = driverHandle->svmAllocsManager->createHostUnifiedMemoryAllocation(bufferSize, unifiedMemoryProperties);
|
||||
memset(bufferSrc, 55, bufferSize);
|
||||
|
||||
auto bufferDst = driverHandle->svmAllocsManager->createHostUnifiedMemoryAllocation(bufferSize, unifiedMemoryProperties);
|
||||
memset(bufferDst, 0, bufferSize);
|
||||
|
||||
auto simulatedCsr = AUBFixtureL0::getSimulatedCsr<FamilyType>();
|
||||
simulatedCsr->initializeEngine();
|
||||
|
||||
simulatedCsr->writeMemory(*driverHandle->svmAllocsManager->getSVMAlloc(bufferSrc)->gpuAllocations.getDefaultGraphicsAllocation());
|
||||
simulatedCsr->writeMemory(*driverHandle->svmAllocsManager->getSVMAlloc(bufferDst)->gpuAllocations.getDefaultGraphicsAllocation());
|
||||
|
||||
ze_group_count_t dispatchTraits;
|
||||
dispatchTraits.groupCountX = groupCount[0];
|
||||
dispatchTraits.groupCountY = groupCount[1];
|
||||
dispatchTraits.groupCountZ = groupCount[2];
|
||||
|
||||
createModuleFromFile("bindless_stateful_copy_buffer", context, device);
|
||||
|
||||
ze_kernel_handle_t kernel;
|
||||
ze_kernel_desc_t kernelDesc = {ZE_STRUCTURE_TYPE_KERNEL_DESC};
|
||||
kernelDesc.pKernelName = "StatefulCopyBuffer";
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelCreate(module->toHandle(), &kernelDesc, &kernel));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelSetArgumentValue(kernel, 0, sizeof(void *), &bufferSrc));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelSetArgumentValue(kernel, 1, sizeof(void *), &bufferDst));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelSetGroupSize(kernel, groupSize[0], groupSize[1], groupSize[2]));
|
||||
|
||||
ze_command_list_handle_t cmdListHandle = commandList->toHandle();
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zeCommandListAppendLaunchKernel(cmdListHandle, kernel, &dispatchTraits, nullptr, 0, nullptr));
|
||||
commandList->close();
|
||||
|
||||
pCmdq->executeCommandLists(1, &cmdListHandle, nullptr, false);
|
||||
pCmdq->synchronize(std::numeric_limits<uint32_t>::max());
|
||||
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(driverHandle->svmAllocsManager->getSVMAlloc(bufferDst)->gpuAllocations.getDefaultGraphicsAllocation()->getGpuAddress()),
|
||||
bufferSrc, bufferSize);
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelDestroy(kernel));
|
||||
driverHandle->svmAllocsManager->freeSVMAlloc(bufferSrc);
|
||||
driverHandle->svmAllocsManager->freeSVMAlloc(bufferDst);
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
@@ -5,6 +5,12 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_stream/command_stream_receiver_simulated_common_hw.h"
|
||||
#include "shared/source/command_stream/command_stream_receiver_with_aub_dump.h"
|
||||
#include "shared/source/command_stream/tbx_command_stream_receiver_hw.h"
|
||||
|
||||
#include "test_mode.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
@@ -39,6 +45,43 @@ class AUBFixtureL0 {
|
||||
void TearDown();
|
||||
static void prepareCopyEngines(NEO::MockDevice &device, const std::string &filename);
|
||||
|
||||
template <typename FamilyType>
|
||||
NEO::CommandStreamReceiverSimulatedCommonHw<FamilyType> *getSimulatedCsr() const {
|
||||
return static_cast<NEO::CommandStreamReceiverSimulatedCommonHw<FamilyType> *>(csr);
|
||||
}
|
||||
|
||||
template <typename FamilyType>
|
||||
void expectMemory(void *gfxAddress, const void *srcAddress, size_t length) {
|
||||
NEO::CommandStreamReceiverSimulatedCommonHw<FamilyType> *csrSimulated = getSimulatedCsr<FamilyType>();
|
||||
|
||||
if (NEO::testMode == NEO::TestMode::AubTestsWithTbx) {
|
||||
auto tbxCsr = csrSimulated;
|
||||
EXPECT_TRUE(tbxCsr->expectMemoryEqual(gfxAddress, srcAddress, length));
|
||||
csrSimulated = static_cast<NEO::CommandStreamReceiverSimulatedCommonHw<FamilyType> *>(
|
||||
static_cast<NEO::CommandStreamReceiverWithAUBDump<NEO::TbxCommandStreamReceiverHw<FamilyType>> *>(csr)->aubCSR.get());
|
||||
}
|
||||
|
||||
if (csrSimulated) {
|
||||
csrSimulated->expectMemoryEqual(gfxAddress, srcAddress, length);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename FamilyType>
|
||||
void expectNotEqualMemory(void *gfxAddress, const void *srcAddress, size_t length) {
|
||||
NEO::CommandStreamReceiverSimulatedCommonHw<FamilyType> *csrSimulated = getSimulatedCsr<FamilyType>();
|
||||
|
||||
if (NEO::testMode == NEO::TestMode::AubTestsWithTbx) {
|
||||
auto tbxCsr = csrSimulated;
|
||||
EXPECT_TRUE(tbxCsr->expectMemoryNotEqual(gfxAddress, srcAddress, length));
|
||||
csrSimulated = static_cast<NEO::CommandStreamReceiverSimulatedCommonHw<FamilyType> *>(
|
||||
static_cast<NEO::CommandStreamReceiverWithAUBDump<NEO::TbxCommandStreamReceiverHw<FamilyType>> *>(csr)->aubCSR.get());
|
||||
}
|
||||
|
||||
if (csrSimulated) {
|
||||
csrSimulated->expectMemoryNotEqual(gfxAddress, srcAddress, length);
|
||||
}
|
||||
}
|
||||
|
||||
const uint32_t rootDeviceIndex = 0;
|
||||
NEO::ExecutionEnvironment *executionEnvironment;
|
||||
NEO::MemoryManager *memoryManager = nullptr;
|
||||
|
||||
@@ -13,6 +13,14 @@ set(TEST_MODULES
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_modules/test_kernel.cl
|
||||
)
|
||||
|
||||
set(TEST_KERNEL_BINDLESS_internal_options
|
||||
"-cl-intel-use-bindless-mode -cl-intel-use-bindless-advanced-mode"
|
||||
)
|
||||
|
||||
set(TEST_KERNEL_BINDLESS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_modules/stateful_copy_buffer.cl
|
||||
)
|
||||
|
||||
set(l0_test_kernels_outputs)
|
||||
macro(macro_for_each_core_type)
|
||||
foreach(PLATFORM_TYPE ${PLATFORM_TYPES})
|
||||
@@ -20,12 +28,18 @@ macro(macro_for_each_core_type)
|
||||
get_family_name_with_type(${CORE_TYPE} ${PLATFORM_TYPE})
|
||||
string(TOLOWER ${PLATFORM_TYPE} PLATFORM_TYPE_LOWER)
|
||||
set(PLATFORM_LOWER ${DEFAULT_SUPPORTED_${CORE_TYPE}_${PLATFORM_TYPE}_PLATFORM})
|
||||
string(TOLOWER ${CORE_TYPE} CORE_TYPE_LOWER)
|
||||
level_zero_generate_kernels(l0_test_kernel_outputs ${PLATFORM_LOWER} ${family_name_with_type} "-g" ${TEST_MODULES})
|
||||
|
||||
#skip Gen8 bindless kernel generation
|
||||
if(NOT ("${CORE_TYPE_LOWER}" STREQUAL "gen8"))
|
||||
level_zero_generate_kernels_with_internal_options(l0_bindless_test_kernel_outputs ${PLATFORM_LOWER} ${family_name_with_type} "bindless" "-g" ${TEST_KERNEL_BINDLESS_internal_options} ${TEST_KERNEL_BINDLESS})
|
||||
endif()
|
||||
endif()
|
||||
endforeach()
|
||||
endmacro()
|
||||
apply_macro_for_each_core_type("TESTED")
|
||||
add_custom_target(l0_common_test_kernels DEPENDS ${l0_test_kernel_outputs} copy_compiler_files)
|
||||
add_custom_target(l0_common_test_kernels DEPENDS ${l0_test_kernel_outputs} ${l0_bindless_test_kernel_outputs} copy_compiler_files)
|
||||
set_target_properties(l0_common_test_kernels PROPERTIES FOLDER ${TARGET_NAME_L0})
|
||||
add_dependencies(prepare_test_kernels_for_l0 l0_common_test_kernels)
|
||||
|
||||
|
||||
@@ -49,3 +49,54 @@ function(level_zero_generate_kernels target_list platform_name suffix options)
|
||||
|
||||
set(${target_list} ${${target_list}} PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
function(level_zero_generate_kernels_with_internal_options target_list platform_name suffix prefix options internal_options)
|
||||
|
||||
list(APPEND results copy_compiler_files)
|
||||
|
||||
set(outputdir "${TargetDir}/level_zero/${suffix}/test_files/${NEO_ARCH}/")
|
||||
|
||||
foreach(filepath ${ARGN})
|
||||
get_filename_component(filename ${filepath} NAME)
|
||||
get_filename_component(basename ${filepath} NAME_WE)
|
||||
get_filename_component(workdir ${filepath} DIRECTORY)
|
||||
|
||||
set(outputpath_base "${outputdir}${prefix}_${basename}_${suffix}")
|
||||
|
||||
if(NOT NEO_DISABLE_BUILTINS_COMPILATION)
|
||||
set(output_files
|
||||
${outputpath_base}.bin
|
||||
${outputpath_base}.gen
|
||||
${outputpath_base}.spv
|
||||
${outputpath_base}.dbg
|
||||
)
|
||||
|
||||
set(output_name "-output" "${prefix}_${basename}")
|
||||
string(CONCAT options \" ${options} \" )
|
||||
string(CONCAT internal_options \" ${internal_options} \" )
|
||||
|
||||
add_custom_command(
|
||||
COMMAND echo generate ${ocloc_cmd_prefix} -q -file ${filename} -device ${platform_name} -out_dir ${outputdir} ${output_name} -options ${options} -internal_options ${internal_options} , workdir is ${workdir}
|
||||
OUTPUT ${output_files}
|
||||
COMMAND ${ocloc_cmd_prefix} -q -file ${filename} -device ${platform_name} -out_dir ${outputdir} ${output_name} -options ${options} -internal_options ${internal_options}
|
||||
WORKING_DIRECTORY ${workdir}
|
||||
DEPENDS ${filepath} ocloc
|
||||
)
|
||||
|
||||
list(APPEND ${target_list} ${output_files})
|
||||
else()
|
||||
foreach(_file_name "bin" "gen" "spv" "dbg")
|
||||
set(_file_prebuilt "${NEO_SOURCE_DIR}/../neo_test_kernels/level_zero/${suffix}/test_files/${NEO_ARCH}/${prefix}_${basename}_${suffix}.${_file_name}")
|
||||
add_custom_command(
|
||||
OUTPUT ${outputpath_base}.${_file_name}
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory ${outputdir}
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${_file_prebuilt} ${outputdir}
|
||||
)
|
||||
|
||||
list(APPEND ${target_list} ${outputpath_base}.${_file_name})
|
||||
endforeach()
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
set(${target_list} ${${target_list}} PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
@@ -0,0 +1,14 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
__kernel void StatefulCopyBuffer(
|
||||
const __global uchar* src,
|
||||
__global uchar* dst)
|
||||
{
|
||||
uint id = get_global_id(0);
|
||||
dst[id] = src[id];
|
||||
}
|
||||
@@ -492,7 +492,7 @@ HWTEST2_F(CommandQueueProgramSBATest,
|
||||
using BindlessCommandQueueSBASupport = IsAtLeastProduct<IGFX_SKYLAKE>;
|
||||
|
||||
HWTEST2_F(CommandQueueProgramSBATest,
|
||||
givenBindlessModeEnabledWhenProgrammingStateBaseAddressThenBindlessBaseAddressIsPassed, BindlessCommandQueueSBASupport) {
|
||||
givenBindlessModeEnabledWhenProgrammingStateBaseAddressThenBindlessBaseAddressAndSizeAreSet, BindlessCommandQueueSBASupport) {
|
||||
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
|
||||
DebugManagerStateRestore dbgRestorer;
|
||||
DebugManager.flags.UseBindlessMode.set(1);
|
||||
@@ -525,7 +525,9 @@ HWTEST2_F(CommandQueueProgramSBATest,
|
||||
auto cmdSba = genCmdCast<STATE_BASE_ADDRESS *>(*itor);
|
||||
EXPECT_EQ(cmdSba->getBindlessSurfaceStateBaseAddressModifyEnable(), true);
|
||||
EXPECT_EQ(cmdSba->getBindlessSurfaceStateBaseAddress(), neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->getBindlessHeapsHelper()->getGlobalHeapsBase());
|
||||
EXPECT_EQ(cmdSba->getBindlessSurfaceStateSize(), MemoryConstants::sizeOf4GBinPageEntities);
|
||||
|
||||
auto surfaceStateCount = StateBaseAddressHelper<FamilyType>::getMaxBindlessSurfaceStates();
|
||||
EXPECT_EQ(surfaceStateCount, cmdSba->getBindlessSurfaceStateSize());
|
||||
|
||||
commandQueue->destroy();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user