Fail build program on PVC with stateful accesses

Related-To: NEO-6075

After this change driver will fail clBuildProgram/zeModuleCreate api calls
whenever stateful access is discovered on PVC.
This is required since in this case allocation greater than 4GB
will not work.
If user still wants to use stateful addressing mode,
-cl-opt-smaller-than-4GB-buffers-only / -ze-opt-smaller-than-4GB-buffers-only
build option should be passed as build option, but then user can not use
bufers greater than 4GB.


Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
This commit is contained in:
Kamil Kopryk
2022-02-02 00:21:00 +00:00
committed by Compute-Runtime-Automation
parent 4e31612c31
commit 9466113cef
13 changed files with 419 additions and 40 deletions

View File

@@ -16,6 +16,7 @@
#include "shared/source/device_binary_format/elf/elf.h"
#include "shared/source/device_binary_format/elf/elf_encoder.h"
#include "shared/source/device_binary_format/elf/ocl_elf.h"
#include "shared/source/helpers/addressing_mode_helper.h"
#include "shared/source/helpers/api_specific_config.h"
#include "shared/source/helpers/constants.h"
#include "shared/source/helpers/kernel_helpers.h"
@@ -43,6 +44,7 @@ namespace BuildOptions {
NEO::ConstStringRef optDisable = "-ze-opt-disable";
NEO::ConstStringRef optLevel = "-ze-opt-level";
NEO::ConstStringRef greaterThan4GbRequired = "-ze-opt-greater-than-4GB-buffer-required";
NEO::ConstStringRef smallerThan4GbBuffersOnly = "-ze-opt-smaller-than-4GB-buffers-only";
NEO::ConstStringRef hasBufferOffsetArg = "-ze-intel-has-buffer-offset-arg";
NEO::ConstStringRef debugKernelEnable = "-ze-kernel-debug-enable";
} // namespace BuildOptions
@@ -124,8 +126,10 @@ std::string ModuleTranslationUnit::generateCompilerOptions(const char *buildOpti
internalOptions = NEO::CompilerOptions::concatenate(internalOptions, BuildOptions::debugKernelEnable);
}
if (NEO::DebugManager.flags.DisableStatelessToStatefulOptimization.get() ||
device->getNEODevice()->areSharedSystemAllocationsAllowed()) {
auto disableStatelessToStatefulOptimization = NEO::DebugManager.flags.DisableStatelessToStatefulOptimization.get();
auto isForceToStatelessNeeded = NEO::AddressingModeHelper::forceToStatelessNeeded(options, BuildOptions::smallerThan4GbBuffersOnly.str(), device->getHwInfo());
if (disableStatelessToStatefulOptimization || isForceToStatelessNeeded) {
internalOptions = NEO::CompilerOptions::concatenate(internalOptions, NEO::CompilerOptions::greaterThan4gbBuffersRequired);
}
@@ -523,6 +527,14 @@ bool ModuleImp::initialize(const ze_module_desc_t *desc, NEO::Device *neoDevice)
this->updateBuildLog(neoDevice);
verifyDebugCapabilities();
auto containsStatefulAccess = NEO::AddressingModeHelper::containsStatefulAccess(translationUnit->programInfo.kernelInfos);
auto isForceToStatelessNeeded = NEO::AddressingModeHelper::forceToStatelessNeeded(translationUnit->options, BuildOptions::smallerThan4GbBuffersOnly.str(), device->getHwInfo());
auto isUserKernel = (type == ModuleType::User);
if (containsStatefulAccess && isForceToStatelessNeeded && isUserKernel) {
success = false;
}
if (false == success) {
return false;
}

View File

@@ -10,6 +10,8 @@
#include "shared/source/device_binary_format/debug_zebin.h"
#include "shared/source/gmm_helper/gmm.h"
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/helpers/addressing_mode_helper.h"
#include "shared/source/helpers/compiler_hw_info_config.h"
#include "shared/source/kernel/implicit_args.h"
#include "shared/source/program/kernel_info.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
@@ -1783,28 +1785,19 @@ HWTEST_F(ModuleTranslationUnitTest, WhenBuildOptionsAreNullThenReuseExistingOpti
EXPECT_NE(pMockCompilerInterface->inputInternalOptions.find("cl-intel-greater-than-4GB-buffer-required"), std::string::npos);
}
HWTEST_F(ModuleTranslationUnitTest, givenSystemSharedAllocationAllowedWhenBuildingModuleThen4GbBuffersAreRequired) {
HWTEST_F(ModuleTranslationUnitTest, givenForceToStatelessRequiredWhenBuildingModuleThen4GbBuffersAreRequired) {
auto mockCompilerInterface = new MockCompilerInterface;
auto &rootDeviceEnvironment = neoDevice->executionEnvironment->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()];
rootDeviceEnvironment->compilerInterface.reset(mockCompilerInterface);
{
neoDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.sharedSystemMemCapabilities = 1;
MockModuleTranslationUnit moduleTu(device);
auto ret = moduleTu.buildFromSpirV("", 0U, nullptr, "", nullptr);
EXPECT_TRUE(ret);
MockModuleTranslationUnit moduleTu(device);
auto ret = moduleTu.buildFromSpirV("", 0U, nullptr, "", nullptr);
EXPECT_TRUE(ret);
const auto &compilerHwInfoConfig = *CompilerHwInfoConfig::get(defaultHwInfo->platform.eProductFamily);
if (compilerHwInfoConfig.isForceToStatelessRequired()) {
EXPECT_NE(mockCompilerInterface->inputInternalOptions.find("cl-intel-greater-than-4GB-buffer-required"), std::string::npos);
}
{
neoDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.sharedSystemMemCapabilities = 0;
MockModuleTranslationUnit moduleTu(device);
auto ret = moduleTu.buildFromSpirV("", 0U, nullptr, "", nullptr);
EXPECT_TRUE(ret);
} else {
EXPECT_EQ(mockCompilerInterface->inputInternalOptions.find("cl-intel-greater-than-4GB-buffer-required"), std::string::npos);
}
}
@@ -2012,6 +2005,155 @@ TEST_F(ModuleTest, GivenInjectInternalBuildOptionsWhenBuildingBuiltinModuleThenI
EXPECT_FALSE(CompilerOptions::contains(cip->buildInternalOptions, "-abc"));
};
TEST_F(ModuleTest, givenForceToStatelessRequiredWhenGenerateCompilerOptionsThenOptionsAreCorrect) {
const auto &compilerHwInfoConfig = *CompilerHwInfoConfig::get(defaultHwInfo->platform.eProductFamily);
if (!compilerHwInfoConfig.isForceToStatelessRequired()) {
GTEST_SKIP();
}
DebugManagerStateRestore restorer;
auto module = std::make_unique<ModuleImp>(device, nullptr, ModuleType::User);
ASSERT_NE(nullptr, module);
auto moduleTranslationUnit = module->getTranslationUnit();
ASSERT_NE(nullptr, moduleTranslationUnit);
std::string buildOptions;
std::string internalBuildOptions;
{
NEO::DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(-1);
buildOptions = "";
auto internalOptions = moduleTranslationUnit->generateCompilerOptions(buildOptions.c_str(), internalBuildOptions.c_str());
EXPECT_THAT(internalOptions, testing::HasSubstr("-cl-intel-greater-than-4GB-buffer-required"));
}
{
NEO::DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(-1);
buildOptions = "-ze-opt-smaller-than-4GB-buffers-only";
auto internalOptions = moduleTranslationUnit->generateCompilerOptions(buildOptions.c_str(), internalBuildOptions.c_str());
EXPECT_THAT(internalOptions, testing::Not(testing::HasSubstr("-cl-intel-greater-than-4GB-buffer-required")));
}
{
NEO::DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(0);
buildOptions = "";
auto internalOptions = moduleTranslationUnit->generateCompilerOptions(buildOptions.c_str(), internalBuildOptions.c_str());
EXPECT_THAT(internalOptions, testing::HasSubstr("-cl-intel-greater-than-4GB-buffer-required"));
}
{
NEO::DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(0);
buildOptions = "-ze-opt-smaller-than-4GB-buffers-only";
auto internalOptions = moduleTranslationUnit->generateCompilerOptions(buildOptions.c_str(), internalBuildOptions.c_str());
EXPECT_THAT(internalOptions, testing::HasSubstr("-cl-intel-greater-than-4GB-buffer-required"));
}
{
NEO::DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(1);
buildOptions = "";
auto internalOptions = moduleTranslationUnit->generateCompilerOptions(buildOptions.c_str(), internalBuildOptions.c_str());
EXPECT_THAT(internalOptions, testing::Not(testing::HasSubstr("-cl-intel-greater-than-4GB-buffer-required")));
}
{
NEO::DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(1);
buildOptions = "-ze-opt-smaller-than-4GB-buffers-only";
auto internalOptions = moduleTranslationUnit->generateCompilerOptions(buildOptions.c_str(), internalBuildOptions.c_str());
EXPECT_THAT(internalOptions, testing::Not(testing::HasSubstr("-cl-intel-greater-than-4GB-buffer-required")));
}
}
TEST_F(ModuleTest, whenContainsStatefulAccessIsCalledThenResultIsCorrect) {
class MyModuleImpl : public ModuleImp {
public:
using ModuleImp::ModuleImp;
};
std::vector<std::tuple<bool, SurfaceStateHeapOffset, CrossThreadDataOffset>> testParams = {
{false, undefined<SurfaceStateHeapOffset>, undefined<CrossThreadDataOffset>},
{true, 0x40, undefined<CrossThreadDataOffset>},
{true, undefined<SurfaceStateHeapOffset>, 0x40},
{true, 0x40, 0x40},
};
for (auto &[expectedResult, surfaceStateHeapOffset, crossThreadDataOffset] : testParams) {
auto module = std::make_unique<MyModuleImpl>(device, nullptr, ModuleType::User);
ASSERT_NE(nullptr, module);
auto moduleTranslationUnit = module->getTranslationUnit();
ASSERT_NE(nullptr, moduleTranslationUnit);
auto kernelInfo = std::make_unique<KernelInfo>();
kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.clear();
auto argDescriptor = ArgDescriptor(ArgDescriptor::ArgTPointer);
argDescriptor.as<ArgDescPointer>().bindful = surfaceStateHeapOffset;
argDescriptor.as<ArgDescPointer>().bindless = crossThreadDataOffset;
kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
moduleTranslationUnit->programInfo.kernelInfos.clear();
moduleTranslationUnit->programInfo.kernelInfos.push_back(kernelInfo.release());
EXPECT_EQ(expectedResult, NEO::AddressingModeHelper::containsStatefulAccess(moduleTranslationUnit->programInfo.kernelInfos));
}
}
using ModuleInitializeTest = Test<DeviceFixture>;
TEST_F(ModuleInitializeTest, whenModuleInitializeIsCalledThenCorrectResultIsReturned) {
class MockModuleImp : public ModuleImp {
public:
using ModuleImp::ModuleImp;
using ModuleImp::translationUnit;
void setAddressingMode(bool isStateful) {
auto kernelInfo = std::make_unique<KernelInfo>();
kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.clear();
auto argDescriptor = ArgDescriptor(ArgDescriptor::ArgTPointer);
if (isStateful) {
argDescriptor.as<ArgDescPointer>().bindful = 0x40;
argDescriptor.as<ArgDescPointer>().bindless = 0x40;
} else {
argDescriptor.as<ArgDescPointer>().bindful = undefined<SurfaceStateHeapOffset>;
argDescriptor.as<ArgDescPointer>().bindless = undefined<CrossThreadDataOffset>;
}
kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
kernelInfo->heapInfo.KernelHeapSize = 0x1;
kernelInfo->heapInfo.pKernelHeap = reinterpret_cast<void *>(0xffff);
this->translationUnit->programInfo.kernelInfos.clear();
this->translationUnit->programInfo.kernelInfos.push_back(kernelInfo.release());
}
};
class MyMockModuleTU : public MockModuleTU {
public:
using MockModuleTU::MockModuleTU;
bool createFromNativeBinary(const char *input, size_t inputSize) override { return true; }
};
const auto &compilerHwInfoConfig = *CompilerHwInfoConfig::get(defaultHwInfo->platform.eProductFamily);
if (!compilerHwInfoConfig.isForceToStatelessRequired()) {
GTEST_SKIP();
}
DebugManagerStateRestore restorer;
std::string testFile;
retrieveBinaryKernelFilenameNoRevision(testFile, "test_kernel_", ".bin");
size_t size = 0;
auto src = loadDataFromFile(testFile.c_str(), size);
ASSERT_NE(0u, size);
ASSERT_NE(nullptr, src);
ze_module_desc_t moduleDesc = {};
moduleDesc.format = ZE_MODULE_FORMAT_NATIVE;
moduleDesc.pInputModule = reinterpret_cast<const uint8_t *>(src.get());
moduleDesc.inputSize = size;
std::array<std::tuple<bool, bool, ModuleType, int32_t>, 4> testParams = {{{true, false, ModuleType::Builtin, -1},
{true, true, ModuleType::Builtin, 1},
{true, true, ModuleType::Builtin, 0},
{false, true, ModuleType::User, 0}}};
for (auto &[expectedResult, isStateful, moduleType, debugKey] : testParams) {
MockModuleImp module(device, nullptr, moduleType);
module.translationUnit = std::make_unique<MyMockModuleTU>(device);
DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(debugKey);
module.setAddressingMode(isStateful);
EXPECT_EQ(expectedResult, module.initialize(&moduleDesc, device->getNEODevice()));
}
}
using ModuleDebugDataTest = Test<DeviceFixture>;
TEST_F(ModuleDebugDataTest, GivenDebugDataWithRelocationsWhenCreatingRelocatedDebugDataThenRelocationsAreApplied) {
auto cip = new NEO::MockCompilerInterfaceCaptureBuildOptions();