Fail build program in shared system USM + stateful access case

Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
Related-To: NEO-6075

After this change driver will fail clBuildProgram/zeModuleCreate api calls
whenever stateful access is discovered and device has shared system usm caps
enabled.This is required since in this case allocation greater than 4GB
will not work.
If user still wants to use stateful addressing mode,
-cl-opt-smaller-than-4GB-buffers-only / -ze-opt-smaller-than-4GB-buffers-only
build option should be passed as build option, but then user can not use
buffers greater than 4GB.
This commit is contained in:
Kamil Kopryk 2021-11-09 14:12:08 +00:00 committed by Compute-Runtime-Automation
parent d497392b55
commit 0ad8afc0b3
13 changed files with 371 additions and 6 deletions

View File

@ -15,6 +15,7 @@
#include "shared/source/device_binary_format/elf/elf.h"
#include "shared/source/device_binary_format/elf/elf_encoder.h"
#include "shared/source/device_binary_format/elf/ocl_elf.h"
#include "shared/source/helpers/addressing_mode_helper.h"
#include "shared/source/helpers/api_specific_config.h"
#include "shared/source/helpers/constants.h"
#include "shared/source/helpers/kernel_helpers.h"
@ -41,6 +42,7 @@ namespace BuildOptions {
NEO::ConstStringRef optDisable = "-ze-opt-disable";
NEO::ConstStringRef optLevel = "-ze-opt-level";
NEO::ConstStringRef greaterThan4GbRequired = "-ze-opt-greater-than-4GB-buffer-required";
NEO::ConstStringRef smallerThan4GbBuffersOnly = "-ze-opt-smaller-than-4GB-buffers-only";
NEO::ConstStringRef hasBufferOffsetArg = "-ze-intel-has-buffer-offset-arg";
NEO::ConstStringRef debugKernelEnable = "-ze-kernel-debug-enable";
} // namespace BuildOptions
@ -122,8 +124,8 @@ std::string ModuleTranslationUnit::generateCompilerOptions(const char *buildOpti
internalOptions = NEO::CompilerOptions::concatenate(internalOptions, BuildOptions::debugKernelEnable);
}
if (NEO::DebugManager.flags.DisableStatelessToStatefulOptimization.get() ||
device->getNEODevice()->areSharedSystemAllocationsAllowed()) {
auto sharedSystemAllocationsAllowed = device->getNEODevice()->areSharedSystemAllocationsAllowed();
if (NEO::DebugManager.flags.DisableStatelessToStatefulOptimization.get() || NEO::AddressingModeHelper::forceToStatelessNeeded(options, BuildOptions::smallerThan4GbBuffersOnly.str(), sharedSystemAllocationsAllowed)) {
internalOptions = NEO::CompilerOptions::concatenate(internalOptions, NEO::CompilerOptions::greaterThan4gbBuffersRequired);
}
@ -529,6 +531,12 @@ bool ModuleImp::initialize(const ze_module_desc_t *desc, NEO::Device *neoDevice)
this->updateBuildLog(neoDevice);
verifyDebugCapabilities();
auto isUserKernel = (this->type == ModuleType::User);
auto sharedSystemAllocationsAllowed = device->getNEODevice()->areSharedSystemAllocationsAllowed();
if (NEO::AddressingModeHelper::containsStatefulAccess(this->translationUnit->programInfo.kernelInfos) && NEO::AddressingModeHelper::forceToStatelessNeeded(this->translationUnit->options, BuildOptions::smallerThan4GbBuffersOnly.str(), sharedSystemAllocationsAllowed) && isUserKernel) {
success = false;
}
if (false == success) {
return false;
}

View File

@ -8,6 +8,7 @@
#include "shared/source/device_binary_format/debug_zebin.h"
#include "shared/source/gmm_helper/gmm.h"
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/helpers/addressing_mode_helper.h"
#include "shared/source/kernel/implicit_args.h"
#include "shared/source/program/kernel_info.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
@ -1815,6 +1816,150 @@ TEST_F(ModuleTest, GivenInjectInternalBuildOptionsWhenBuildingBuiltinModuleThenI
EXPECT_FALSE(CompilerOptions::contains(cip->buildInternalOptions, "-abc"));
};
TEST_F(ModuleTest, givenSharedSystemAllocationsSupportWhenGenerateCompilerOptionsThenOptionsAreCorrect) {
auto areSharedSystemAllocationsSupported = device->getNEODevice()->areSharedSystemAllocationsAllowed();
if (!areSharedSystemAllocationsSupported) {
GTEST_SKIP();
}
DebugManagerStateRestore restorer;
auto module = std::make_unique<ModuleImp>(device, nullptr, ModuleType::User);
ASSERT_NE(nullptr, module);
auto moduleTranslationUnit = module->getTranslationUnit();
ASSERT_NE(nullptr, moduleTranslationUnit);
std::string buildOptions;
std::string internalBuildOptions;
{
NEO::DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(-1);
buildOptions = "";
auto internalOptions = moduleTranslationUnit->generateCompilerOptions(buildOptions.c_str(), internalBuildOptions.c_str());
EXPECT_THAT(internalOptions, testing::HasSubstr("-cl-intel-greater-than-4GB-buffer-required"));
}
{
NEO::DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(-1);
buildOptions = "-ze-opt-smaller-than-4GB-buffers-only";
auto internalOptions = moduleTranslationUnit->generateCompilerOptions(buildOptions.c_str(), internalBuildOptions.c_str());
EXPECT_THAT(internalOptions, testing::Not(testing::HasSubstr("-cl-intel-greater-than-4GB-buffer-required")));
}
{
NEO::DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(0);
buildOptions = "";
auto internalOptions = moduleTranslationUnit->generateCompilerOptions(buildOptions.c_str(), internalBuildOptions.c_str());
EXPECT_THAT(internalOptions, testing::HasSubstr("-cl-intel-greater-than-4GB-buffer-required"));
}
{
NEO::DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(0);
buildOptions = "-ze-opt-smaller-than-4GB-buffers-only";
auto internalOptions = moduleTranslationUnit->generateCompilerOptions(buildOptions.c_str(), internalBuildOptions.c_str());
EXPECT_THAT(internalOptions, testing::HasSubstr("-cl-intel-greater-than-4GB-buffer-required"));
}
{
NEO::DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(1);
buildOptions = "";
auto internalOptions = moduleTranslationUnit->generateCompilerOptions(buildOptions.c_str(), internalBuildOptions.c_str());
EXPECT_THAT(internalOptions, testing::Not(testing::HasSubstr("-cl-intel-greater-than-4GB-buffer-required")));
}
{
NEO::DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(1);
buildOptions = "-ze-opt-smaller-than-4GB-buffers-only";
auto internalOptions = moduleTranslationUnit->generateCompilerOptions(buildOptions.c_str(), internalBuildOptions.c_str());
EXPECT_THAT(internalOptions, testing::Not(testing::HasSubstr("-cl-intel-greater-than-4GB-buffer-required")));
}
}
TEST_F(ModuleTest, whenContainsStatefulAccessIsCalledThenResultIsCorrect) {
class MyModuleImpl : public ModuleImp {
public:
using ModuleImp::ModuleImp;
};
std::vector<std::tuple<bool, SurfaceStateHeapOffset, CrossThreadDataOffset>> testParams = {
{false, undefined<SurfaceStateHeapOffset>, undefined<CrossThreadDataOffset>},
{true, 0x40, undefined<CrossThreadDataOffset>},
{true, undefined<SurfaceStateHeapOffset>, 0x40},
{true, 0x40, 0x40},
};
for (auto &[expectedResult, surfaceStateHeapOffset, crossThreadDataOffset] : testParams) {
auto module = std::make_unique<MyModuleImpl>(device, nullptr, ModuleType::User);
ASSERT_NE(nullptr, module);
auto moduleTranslationUnit = module->getTranslationUnit();
ASSERT_NE(nullptr, moduleTranslationUnit);
auto kernelInfo = std::make_unique<KernelInfo>();
kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.clear();
auto argDescriptor = ArgDescriptor(ArgDescriptor::ArgTPointer);
argDescriptor.as<ArgDescPointer>().bindful = surfaceStateHeapOffset;
argDescriptor.as<ArgDescPointer>().bindless = crossThreadDataOffset;
kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
moduleTranslationUnit->programInfo.kernelInfos.clear();
moduleTranslationUnit->programInfo.kernelInfos.push_back(kernelInfo.release());
EXPECT_EQ(expectedResult, NEO::AddressingModeHelper::containsStatefulAccess(moduleTranslationUnit->programInfo.kernelInfos));
}
}
using ModuleInitializeTest = Test<DeviceFixture>;
TEST_F(ModuleInitializeTest, whenModuleInitializeIsCalledThenCorrectResultIsReturned) {
DebugManagerStateRestore restorer;
class MockModuleImp : public ModuleImp {
public:
using ModuleImp::ModuleImp;
using ModuleImp::translationUnit;
void setAddressingMode(bool isStateful) {
auto kernelInfo = std::make_unique<KernelInfo>();
kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.clear();
auto argDescriptor = ArgDescriptor(ArgDescriptor::ArgTPointer);
if (isStateful) {
argDescriptor.as<ArgDescPointer>().bindful = 0x40;
argDescriptor.as<ArgDescPointer>().bindless = 0x40;
} else {
argDescriptor.as<ArgDescPointer>().bindful = undefined<SurfaceStateHeapOffset>;
argDescriptor.as<ArgDescPointer>().bindless = undefined<CrossThreadDataOffset>;
}
kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
kernelInfo->heapInfo.KernelHeapSize = 0x1;
kernelInfo->heapInfo.pKernelHeap = reinterpret_cast<void *>(0xffff);
this->translationUnit->programInfo.kernelInfos.clear();
this->translationUnit->programInfo.kernelInfos.push_back(kernelInfo.release());
}
};
class MyMockModuleTU : public MockModuleTU {
public:
using MockModuleTU::MockModuleTU;
bool createFromNativeBinary(const char *input, size_t inputSize) { return true; }
};
std::string testFile;
retrieveBinaryKernelFilenameNoRevision(testFile, "test_kernel_", ".bin");
size_t size = 0;
auto src = loadDataFromFile(testFile.c_str(), size);
ASSERT_NE(0u, size);
ASSERT_NE(nullptr, src);
ze_module_desc_t moduleDesc = {};
moduleDesc.format = ZE_MODULE_FORMAT_NATIVE;
moduleDesc.pInputModule = reinterpret_cast<const uint8_t *>(src.get());
moduleDesc.inputSize = size;
device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.sharedSystemMemCapabilities = 1;
std::array<std::tuple<bool, bool, ModuleType, int32_t>, 4> testParams = {{{true, false, ModuleType::Builtin, -1},
{true, true, ModuleType::Builtin, 1},
{true, true, ModuleType::Builtin, 0},
{false, true, ModuleType::User, 0}}};
for (auto &[expectedResult, isStateful, moduleType, debugKey] : testParams) {
MockModuleImp module(device, nullptr, moduleType);
module.translationUnit = std::make_unique<MyMockModuleTU>(device);
DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(debugKey);
module.setAddressingMode(isStateful);
EXPECT_EQ(expectedResult, module.initialize(&moduleDesc, device->getNEODevice()));
}
}
using ModuleDebugDataTest = Test<DeviceFixture>;
TEST_F(ModuleDebugDataTest, GivenDebugDataWithRelocationsWhenCreatingRelocatedDebugDataThenRelocationsAreApplied) {
auto cip = new NEO::MockCompilerInterfaceCaptureBuildOptions();

View File

@ -9,6 +9,7 @@
#include "shared/source/device/device.h"
#include "shared/source/device_binary_format/device_binary_formats.h"
#include "shared/source/execution_environment/execution_environment.h"
#include "shared/source/helpers/addressing_mode_helper.h"
#include "shared/source/helpers/compiler_options_parser.h"
#include "shared/source/program/kernel_info.h"
#include "shared/source/source_level_debugger/source_level_debugger.h"
@ -34,8 +35,7 @@ cl_int Program::build(
const char *buildOptions,
bool enableCaching) {
cl_int retVal = CL_SUCCESS;
std::string internalOptions;
initInternalOptions(internalOptions);
auto defaultClDevice = deviceVector[0];
UNRECOVERABLE_IF(defaultClDevice == nullptr);
auto &defaultDevice = defaultClDevice->getDevice();
@ -69,6 +69,9 @@ cl_int Program::build(
} else if (this->createdFrom != CreatedFrom::BINARY) {
options = "";
}
std::string internalOptions;
initInternalOptions(internalOptions);
extractInternalOptions(options, internalOptions);
applyAdditionalOptions(internalOptions);
@ -170,6 +173,12 @@ cl_int Program::build(
phaseReached[clDevice->getRootDeviceIndex()] = BuildPhase::BinaryProcessing;
}
const auto &kernelInfoArray = buildInfos[clDevices[0]->getRootDeviceIndex()].kernelInfoArray;
auto sharedSystemAllocationsAllowed = clDevices[0]->areSharedSystemAllocationsAllowed();
if (AddressingModeHelper::containsStatefulAccess(kernelInfoArray) && AddressingModeHelper::forceToStatelessNeeded(options, CompilerOptions::smallerThan4gbBuffersOnly.str(), sharedSystemAllocationsAllowed) && !isBuiltIn) {
retVal = CL_BUILD_PROGRAM_FAILURE;
}
if (retVal != CL_SUCCESS) {
break;
}

View File

@ -15,6 +15,7 @@
#include "shared/source/device_binary_format/elf/elf_encoder.h"
#include "shared/source/device_binary_format/elf/ocl_elf.h"
#include "shared/source/device_binary_format/patchtokens_decoder.h"
#include "shared/source/helpers/addressing_mode_helper.h"
#include "shared/source/helpers/api_specific_config.h"
#include "shared/source/helpers/compiler_options_parser.h"
#include "shared/source/helpers/debug_helpers.h"
@ -74,7 +75,8 @@ void Program::initInternalOptions(std::string &internalOptions) const {
CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::arch32bit);
}
if ((isBuiltIn && is32bit) || pClDevice->areSharedSystemAllocationsAllowed() ||
auto sharedSystemAllocationsAllowed = clDevices[0]->areSharedSystemAllocationsAllowed();
if ((isBuiltIn && is32bit) || AddressingModeHelper::forceToStatelessNeeded(options, CompilerOptions::smallerThan4gbBuffersOnly.str(), sharedSystemAllocationsAllowed) ||
DebugManager.flags.DisableStatelessToStatefulOptimization.get()) {
CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::greaterThan4gbBuffersRequired);
}

View File

@ -13,6 +13,7 @@
#include "shared/source/device_binary_format/elf/ocl_elf.h"
#include "shared/source/device_binary_format/patchtokens_decoder.h"
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/helpers/addressing_mode_helper.h"
#include "shared/source/helpers/aligned_memory.h"
#include "shared/source/helpers/hash.h"
#include "shared/source/helpers/hw_helper.h"
@ -1653,6 +1654,137 @@ TEST_F(ProgramTests, WhenProgramIsCreatedThenCorrectOclVersionIsInOptions) {
}
}
TEST_F(ProgramTests, whenForceToStatelessNeededIsCalledThenCorrectResultIsReturned) {
DebugManagerStateRestore restorer;
class MyMockProgram : public Program {
public:
using Program::options;
using Program::Program;
};
MyMockProgram program(pContext, false, toClDeviceVector(*pClDevice));
auto sharedSystemAllocationsAllowed = pClDevice->areSharedSystemAllocationsAllowed();
{
DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(-1);
program.options = "";
EXPECT_EQ(AddressingModeHelper::forceToStatelessNeeded(program.options, NEO::CompilerOptions::smallerThan4gbBuffersOnly.str(), sharedSystemAllocationsAllowed), sharedSystemAllocationsAllowed);
}
{
DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(-1);
program.options = "-cl-opt-smaller-than-4GB-buffers-only";
EXPECT_FALSE(AddressingModeHelper::forceToStatelessNeeded(program.options, NEO::CompilerOptions::smallerThan4gbBuffersOnly.str(), sharedSystemAllocationsAllowed));
}
{
DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(0);
program.options = "";
EXPECT_EQ(AddressingModeHelper::forceToStatelessNeeded(program.options, NEO::CompilerOptions::smallerThan4gbBuffersOnly.str(), sharedSystemAllocationsAllowed), sharedSystemAllocationsAllowed);
}
{
DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(0);
program.options = "-cl-opt-smaller-than-4GB-buffers-only";
EXPECT_EQ(AddressingModeHelper::forceToStatelessNeeded(program.options, NEO::CompilerOptions::smallerThan4gbBuffersOnly.str(), sharedSystemAllocationsAllowed), sharedSystemAllocationsAllowed);
}
{
DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(1);
program.options = "";
EXPECT_FALSE(AddressingModeHelper::forceToStatelessNeeded(program.options, NEO::CompilerOptions::smallerThan4gbBuffersOnly.str(), sharedSystemAllocationsAllowed));
}
{
DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(1);
program.options = "-cl-opt-smaller-than-4GB-buffers-only";
EXPECT_FALSE(AddressingModeHelper::forceToStatelessNeeded(program.options, NEO::CompilerOptions::smallerThan4gbBuffersOnly.str(), sharedSystemAllocationsAllowed));
}
}
TEST_F(ProgramTests, whenContainsStatefulAccessIsCalledThenReturnCorrectResult) {
std::vector<std::tuple<bool, SurfaceStateHeapOffset, CrossThreadDataOffset>> testParams = {
{false, undefined<SurfaceStateHeapOffset>, undefined<CrossThreadDataOffset>},
{true, 0x40, undefined<CrossThreadDataOffset>},
{true, undefined<SurfaceStateHeapOffset>, 0x40},
{true, 0x40, 0x40},
};
for (auto &[expectedResult, surfaceStateHeapOffset, crossThreadDataOffset] : testParams) {
MockProgram program(pContext, false, toClDeviceVector(*pClDevice));
auto kernelInfo = std::make_unique<KernelInfo>();
kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.clear();
auto argDescriptor = ArgDescriptor(ArgDescriptor::ArgTPointer);
argDescriptor.as<ArgDescPointer>().bindful = surfaceStateHeapOffset;
argDescriptor.as<ArgDescPointer>().bindless = crossThreadDataOffset;
kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
program.addKernelInfo(kernelInfo.release(), 0);
EXPECT_EQ(expectedResult, AddressingModeHelper::containsStatefulAccess(program.buildInfos[0].kernelInfoArray));
}
}
TEST_F(ProgramTests, givenStatefulAndStatelessAccessesWhenProgramBuildIsCalledThenCorrectResultIsReturned) {
DebugManagerStateRestore restorer;
class MyMockProgram : public Program {
public:
using Program::buildInfos;
using Program::createdFrom;
using Program::irBinary;
using Program::irBinarySize;
using Program::isBuiltIn;
using Program::options;
using Program::Program;
using Program::sourceCode;
void setAddressingMode(bool isStateful) {
auto kernelInfo = std::make_unique<KernelInfo>();
kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.clear();
auto argDescriptor = ArgDescriptor(ArgDescriptor::ArgTPointer);
if (isStateful) {
argDescriptor.as<ArgDescPointer>().bindful = 0x40;
argDescriptor.as<ArgDescPointer>().bindless = 0x40;
} else {
argDescriptor.as<ArgDescPointer>().bindful = undefined<SurfaceStateHeapOffset>;
argDescriptor.as<ArgDescPointer>().bindless = undefined<CrossThreadDataOffset>;
}
kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
this->buildInfos[0].kernelInfoArray.clear();
this->buildInfos[0].kernelInfoArray.push_back(kernelInfo.release());
}
cl_int processGenBinary(const ClDevice &clDevice) override {
return CL_SUCCESS;
}
};
pClDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.sharedSystemMemCapabilities = 1;
std::array<std::tuple<int, bool, int32_t>, 3> testParams = {{{CL_SUCCESS, false, -1},
{CL_SUCCESS, true, 1},
{CL_BUILD_PROGRAM_FAILURE, true, 0}}};
for (auto &[result, isStatefulAccess, debuyKey] : testParams) {
MyMockProgram program(pContext, false, toClDeviceVector(*pClDevice));
program.isBuiltIn = false;
program.sourceCode = "test_kernel";
program.createdFrom = Program::CreatedFrom::SOURCE;
program.setAddressingMode(isStatefulAccess);
DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(debuyKey);
EXPECT_EQ(result, program.build(toClDeviceVector(*pClDevice), nullptr, false));
}
{
MyMockProgram programWithBuiltIn(pContext, true, toClDeviceVector(*pClDevice));
programWithBuiltIn.irBinary.reset(new char[16]);
programWithBuiltIn.irBinarySize = 16;
programWithBuiltIn.isBuiltIn = true;
programWithBuiltIn.setAddressingMode(true);
DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(0);
pClDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.sharedSystemMemCapabilities = 1u;
EXPECT_EQ(CL_SUCCESS, programWithBuiltIn.build(toClDeviceVector(*pClDevice), nullptr, false));
}
}
TEST_F(ProgramTests, GivenForcedClVersionWhenProgramIsCreatedThenCorrectOclOptionIsPresent) {
std::pair<unsigned int, std::string> testedValues[] = {
{0, "-ocl-version=120"},

View File

@ -112,6 +112,7 @@ ForceCsrFlushing = 0
ForceCsrReprogramming = 0
OmitTimestampPacketDependencies = 0
DisableStatelessToStatefulOptimization = 0
UseSmallerThan4gbBuffersOnly = -1
DisableConcurrentBlockExecution = 0
UseNoRingFlushesKmdMode = 1
DisableZeroCopyForUseHostPtr = 0

View File

@ -6,4 +6,5 @@
*/
__kernel void test(__global float *argGlobal, __read_only image3d_t argImg3D, __constant float *argConst) {
argGlobal[0] = argConst[0];
}

View File

@ -10,6 +10,6 @@ __constant uint constant_a[2] = {0xabcd5432u, 0xaabb5533u};
__kernel void test(__global uint *in, __global uint *out) {
int i = get_global_id(0);
int j = get_global_id(0) % (sizeof(constant_a) / sizeof(constant_a[0]));
in[0] = 0;
out[i] = constant_a[j];
}

View File

@ -13,6 +13,7 @@
namespace NEO {
namespace CompilerOptions {
static constexpr ConstStringRef greaterThan4gbBuffersRequired = "-cl-intel-greater-than-4GB-buffer-required";
static constexpr ConstStringRef smallerThan4gbBuffersOnly = "-cl-opt-smaller-than-4GB-buffers-only";
static constexpr ConstStringRef hasBufferOffsetArg = "-cl-intel-has-buffer-offset-arg";
static constexpr ConstStringRef kernelDebugEnable = "-cl-kernel-debug-enable";
static constexpr ConstStringRef arch32bit = "-m32";

View File

@ -215,6 +215,7 @@ DECLARE_DEBUG_VARIABLE(bool, DisableStatelessToStatefulOptimization, false, "Dis
DECLARE_DEBUG_VARIABLE(bool, DisableConcurrentBlockExecution, false, "disables concurrent block kernel execution")
DECLARE_DEBUG_VARIABLE(bool, UseNoRingFlushesKmdMode, true, "Windows only, passes flag to KMD that informs KMD to not emit any ring buffer flushes.")
DECLARE_DEBUG_VARIABLE(bool, DisableZeroCopyForUseHostPtr, false, "When active all buffer allocations created with CL_MEM_USE_HOST_PTR flag will not share memory with CPU.")
DECLARE_DEBUG_VARIABLE(int32_t, UseSmallerThan4gbBuffersOnly, -1, " -1: default, 0: disabled, 1: enabled. When enabled driver will not force stateless accesses when shared system USM is active")
DECLARE_DEBUG_VARIABLE(int32_t, EnableHostPtrTracking, -1, "Enable host ptr tracking: -1 - default platform setting, 0 - disabled, 1 - enabled")
DECLARE_DEBUG_VARIABLE(int32_t, MaxHwThreadsPercent, 0, "If not zero then maximum number of used HW threads is capped to max * MaxHwThreadsPercent / 100")
DECLARE_DEBUG_VARIABLE(int32_t, MinHwThreadsUnoccupied, 0, "If not zero then maximum number of used HW threads is reduced by MinHwThreadsUnoccupied")

View File

@ -7,6 +7,8 @@
set(NEO_CORE_HELPERS
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/abort.h
${CMAKE_CURRENT_SOURCE_DIR}/addressing_mode_helper.h
${CMAKE_CURRENT_SOURCE_DIR}/addressing_mode_helper.cpp
${CMAKE_CURRENT_SOURCE_DIR}/address_patch.h
${CMAKE_CURRENT_SOURCE_DIR}/affinity_mask.h
${CMAKE_CURRENT_SOURCE_DIR}/aligned_memory.h

View File

@ -0,0 +1,42 @@
/*
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/helpers/addressing_mode_helper.h"
#include "shared/source/compiler_interface/compiler_options/compiler_options_base.h"
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/program/kernel_info.h"
namespace NEO::AddressingModeHelper {
bool forceToStatelessNeeded(const std::string &options, const std::string &smallerThan4GbBuffersOnlyOption, bool sharedSystemAllocationsAllowed) {
auto preferStateful = false;
if (NEO::CompilerOptions::contains(options, smallerThan4GbBuffersOnlyOption)) {
preferStateful = true;
}
if (NEO::DebugManager.flags.UseSmallerThan4gbBuffersOnly.get() != -1) {
preferStateful = static_cast<bool>(NEO::DebugManager.flags.UseSmallerThan4gbBuffersOnly.get());
}
auto forceStateless = !preferStateful && sharedSystemAllocationsAllowed;
return forceStateless;
}
bool containsStatefulAccess(const std::vector<KernelInfo *> &kernelInfos) {
for (const auto &kernelInfo : kernelInfos) {
for (const auto &arg : kernelInfo->kernelDescriptor.payloadMappings.explicitArgs) {
auto isStatefulAccess = arg.is<NEO::ArgDescriptor::ArgTPointer>() &&
(NEO::isValidOffset(arg.as<NEO::ArgDescPointer>().bindless) ||
NEO::isValidOffset(arg.as<NEO::ArgDescPointer>().bindful));
if (isStatefulAccess) {
return true;
}
}
}
return false;
}
} // namespace NEO::AddressingModeHelper

View File

@ -0,0 +1,21 @@
/*
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include <string>
#include <vector>
namespace NEO {
struct KernelInfo;
namespace AddressingModeHelper {
bool forceToStatelessNeeded(const std::string &options, const std::string &smallerThan4GbBuffersOnly, bool sharedSystemAllocationsAllowed);
bool containsStatefulAccess(const std::vector<KernelInfo *> &kernelInfos);
} // namespace AddressingModeHelper
} // namespace NEO