Add debug flag to fail build program with stateful access

I've added debug flag FailBuildProgramWithStatefulAccess which makes
possible to fail build program/module creation
with stateful access(except builtins) on
pvc and later platforms.

Related-To: NEO-6075
Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
This commit is contained in:
Kamil Kopryk
2022-05-13 11:49:25 +00:00
committed by Compute-Runtime-Automation
parent b24635b0c0
commit 99db73c034
12 changed files with 335 additions and 50 deletions

View File

@ -10,6 +10,7 @@
#include "shared/source/device/device.h"
#include "shared/source/device_binary_format/device_binary_formats.h"
#include "shared/source/execution_environment/execution_environment.h"
#include "shared/source/helpers/addressing_mode_helper.h"
#include "shared/source/helpers/compiler_options_parser.h"
#include "shared/source/program/kernel_info.h"
#include "shared/source/source_level_debugger/source_level_debugger.h"
@ -177,12 +178,22 @@ cl_int Program::build(
phaseReached[clDevice->getRootDeviceIndex()] = BuildPhase::BinaryProcessing;
}
auto containsStatefulAccess = AddressingModeHelper::containsStatefulAccess(buildInfos[clDevices[0]->getRootDeviceIndex()].kernelInfoArray);
auto isUserKernel = !isBuiltIn;
auto failBuildProgram = (containsStatefulAccess &&
isUserKernel &&
AddressingModeHelper::failBuildProgramWithStatefulAccess(clDevices[0]->getHardwareInfo()));
if (failBuildProgram) {
retVal = CL_BUILD_PROGRAM_FAILURE;
}
if (retVal != CL_SUCCESS) {
break;
}
if (isKernelDebugEnabled() || gtpinIsGTPinInitialized()) {
for (auto &clDevice : deviceVector) {
auto rootDeviceIndex = clDevice->getRootDeviceIndex();
if (BuildPhase::DebugDataNotification == phaseReached[rootDeviceIndex]) {

View File

@ -73,8 +73,12 @@ std::string Program::getInternalOptions() const {
CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::arch32bit);
}
if ((isBuiltIn && is32bit) || pClDevice->areSharedSystemAllocationsAllowed() ||
DebugManager.flags.DisableStatelessToStatefulOptimization.get()) {
auto &hwInfo = pClDevice->getHardwareInfo();
const auto &compilerHwInfoConfig = *CompilerHwInfoConfig::get(hwInfo.platform.eProductFamily);
auto forceToStatelessRequired = compilerHwInfoConfig.isForceToStatelessRequired();
auto disableStatelessToStatefulOptimization = DebugManager.flags.DisableStatelessToStatefulOptimization.get();
if ((isBuiltIn && is32bit) || forceToStatelessRequired || disableStatelessToStatefulOptimization) {
CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::greaterThan4gbBuffersRequired);
}
@ -91,7 +95,6 @@ std::string Program::getInternalOptions() const {
CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::hasBufferOffsetArg);
}
auto &hwInfo = pClDevice->getHardwareInfo();
const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily);
if (hwInfoConfig.isForceEmuInt32DivRemSPWARequired(hwInfo)) {
CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::forceEmuInt32DivRemSP);

View File

@ -257,10 +257,10 @@ TEST_F(BuiltInTests, WhenBuildingListOfBuiltinsThenBuiltinsHaveBeenGenerated) {
uint64_t hash = Hash::hash(allBuiltIns.c_str(), allBuiltIns.length());
auto hashName = getBuiltInHashFileName(hash, supportsImages);
//First fail, if we are inconsistent
// First fail, if we are inconsistent
EXPECT_EQ(true, fileExists(hashName)) << "**********\nBuilt in kernels need to be regenerated for the mock compilers!\n**********";
//then write to file if needed
// then write to file if needed
#define GENERATE_NEW_HASH_FOR_BUILT_INS 0
#if GENERATE_NEW_HASH_FOR_BUILT_INS
std::cout << "writing builtins to file: " << hashName << std::endl;
@ -1527,17 +1527,21 @@ TEST_F(BuiltInTests, GivenTypeSourceWhenCreatingProgramFromCodeThenValidPointerI
EXPECT_NE(nullptr, program.get());
}
TEST_F(BuiltInTests, givenCreateProgramFromSourceWhenDeviceSupportSharedSystemAllocationThenInternalOptionsDisableStosoFlag) {
TEST_F(BuiltInTests, givenCreateProgramFromSourceWhenForceToStatelessRequiredOr32BitThenInternalOptionsHasGreaterThan4gbBuffersRequired) {
auto builtinsLib = std::unique_ptr<BuiltinsLib>(new BuiltinsLib());
pClDevice->deviceInfo.sharedSystemMemCapabilities = CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL;
pDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.sharedSystemMemCapabilities = 1;
const BuiltinCode bc = builtinsLib->getBuiltinCode(EBuiltInOps::CopyBufferToBuffer, BuiltinCode::ECodeType::Source, *pDevice);
EXPECT_NE(0u, bc.resource.size());
auto program = std::unique_ptr<Program>(BuiltinDispatchInfoBuilder::createProgramFromCode(bc, toClDeviceVector(*pClDevice)));
EXPECT_NE(nullptr, program.get());
auto builtinInternalOptions = program->getInternalOptions();
EXPECT_TRUE(hasSubstr(builtinInternalOptions, std::string(CompilerOptions::greaterThan4gbBuffersRequired)));
const auto &compilerHwInfoConfig = *CompilerHwInfoConfig::get(defaultHwInfo->platform.eProductFamily);
if (compilerHwInfoConfig.isForceToStatelessRequired() || is32bit) {
EXPECT_THAT(builtinInternalOptions, testing::HasSubstr(std::string(CompilerOptions::greaterThan4gbBuffersRequired)));
} else {
EXPECT_THAT(builtinInternalOptions, testing::Not(testing::HasSubstr(std::string(CompilerOptions::greaterThan4gbBuffersRequired))));
}
}
TEST_F(BuiltInTests, GivenTypeIntermediateWhenCreatingProgramFromCodeThenNullPointerIsReturned) {
@ -1586,7 +1590,9 @@ TEST_F(BuiltInTests, GivenForce32bitWhenCreatingProgramThenCorrectKernelIsCreate
EXPECT_EQ(std::string::npos, it);
it = builtinInternalOptions.find(NEO::CompilerOptions::greaterThan4gbBuffersRequired.data());
if (is32bit || pDevice->areSharedSystemAllocationsAllowed()) {
const auto &compilerHwInfoConfig = *CompilerHwInfoConfig::get(defaultHwInfo->platform.eProductFamily);
if (is32bit || compilerHwInfoConfig.isForceToStatelessRequired()) {
EXPECT_NE(std::string::npos, it);
} else {
EXPECT_EQ(std::string::npos, it);

View File

@ -14,7 +14,9 @@
#include "shared/source/device_binary_format/elf/ocl_elf.h"
#include "shared/source/device_binary_format/patchtokens_decoder.h"
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/helpers/addressing_mode_helper.h"
#include "shared/source/helpers/aligned_memory.h"
#include "shared/source/helpers/compiler_hw_info_config.h"
#include "shared/source/helpers/hash.h"
#include "shared/source/helpers/hw_helper.h"
#include "shared/source/helpers/ptr_math.h"
@ -1140,7 +1142,9 @@ TEST_F(ProgramFromSourceTest, GivenFlagsWhenCompilingProgramThenBuildOptionsHave
// Check build options that were applied
EXPECT_TRUE(CompilerOptions::contains(cip->buildOptions, CompilerOptions::fastRelaxedMath)) << cip->buildOptions;
EXPECT_FALSE(CompilerOptions::contains(cip->buildInternalOptions, CompilerOptions::gtpinRera)) << cip->buildInternalOptions;
if (!pDevice->areSharedSystemAllocationsAllowed()) {
const auto &compilerHwInfoConfig = *CompilerHwInfoConfig::get(defaultHwInfo->platform.eProductFamily);
if (!compilerHwInfoConfig.isForceToStatelessRequired()) {
EXPECT_FALSE(CompilerOptions::contains(cip->buildInternalOptions, CompilerOptions::greaterThan4gbBuffersRequired)) << cip->buildInternalOptions;
}
EXPECT_TRUE(CompilerOptions::contains(cip->buildInternalOptions, pPlatform->getClDevice(0)->peekCompilerExtensions())) << cip->buildInternalOptions;
@ -1730,14 +1734,6 @@ TEST_F(ProgramTests, WhenCreatingProgramThenBindlessIsEnabledOnlyIfDebugFlagIsEn
}
}
TEST_F(ProgramTests, givenDeviceThatSupportsSharedSystemMemoryAllocationWhenProgramIsCompiledThenItForcesStatelessCompilation) {
pClDevice->deviceInfo.sharedSystemMemCapabilities = CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL;
pClDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.sharedSystemMemCapabilities = 1;
MockProgram program(pContext, false, toClDeviceVector(*pClDevice));
auto internalOptions = program.getInternalOptions();
EXPECT_TRUE(CompilerOptions::contains(internalOptions.c_str(), CompilerOptions::greaterThan4gbBuffersRequired)) << internalOptions;
}
TEST_F(ProgramTests, GivenForce32BitAddressessWhenProgramIsCreatedThenGreaterThan4gbBuffersRequiredIsCorrectlySet) {
DebugManagerStateRestore dbgRestorer;
cl_int retVal = CL_DEVICE_NOT_FOUND;
@ -1746,7 +1742,8 @@ TEST_F(ProgramTests, GivenForce32BitAddressessWhenProgramIsCreatedThenGreaterTha
const_cast<DeviceInfo *>(&pDevice->getDeviceInfo())->force32BitAddressess = true;
MockProgram program(pContext, false, toClDeviceVector(*pClDevice));
auto internalOptions = program.getInternalOptions();
if (pDevice->areSharedSystemAllocationsAllowed()) {
const auto &compilerHwInfoConfig = *CompilerHwInfoConfig::get(defaultHwInfo->platform.eProductFamily);
if (compilerHwInfoConfig.isForceToStatelessRequired()) {
EXPECT_TRUE(CompilerOptions::contains(internalOptions, CompilerOptions::greaterThan4gbBuffersRequired)) << internalOptions;
} else {
EXPECT_FALSE(CompilerOptions::contains(internalOptions, NEO::CompilerOptions::greaterThan4gbBuffersRequired)) << internalOptions;
@ -1761,10 +1758,12 @@ TEST_F(ProgramTests, Given32bitSupportWhenProgramIsCreatedThenGreaterThan4gbBuff
DebugManager.flags.DisableStatelessToStatefulOptimization.set(false);
std::unique_ptr<MockProgram> program{Program::createBuiltInFromSource<MockProgram>("", pContext, pContext->getDevices(), nullptr)};
auto internalOptions = program->getInternalOptions();
if ((false == pDevice->areSharedSystemAllocationsAllowed()) && (false == is32bit)) {
EXPECT_FALSE(CompilerOptions::contains(internalOptions, NEO::CompilerOptions::greaterThan4gbBuffersRequired)) << internalOptions;
} else {
const auto &compilerHwInfoConfig = *CompilerHwInfoConfig::get(defaultHwInfo->platform.eProductFamily);
if (compilerHwInfoConfig.isForceToStatelessRequired() || is32bit) {
EXPECT_TRUE(CompilerOptions::contains(internalOptions, NEO::CompilerOptions::greaterThan4gbBuffersRequired)) << internalOptions;
} else {
EXPECT_FALSE(CompilerOptions::contains(internalOptions, NEO::CompilerOptions::greaterThan4gbBuffersRequired)) << internalOptions;
}
}
@ -1788,14 +1787,101 @@ TEST_F(ProgramTests, Force32BitAddressessWhenProgramIsCreatedThenGreaterThan4gbB
const_cast<DeviceInfo *>(&pDevice->getDeviceInfo())->force32BitAddressess = true;
std::unique_ptr<MockProgram> program{Program::createBuiltInFromSource<MockProgram>("", pContext, pContext->getDevices(), nullptr)};
auto internalOptions = program->getInternalOptions();
if (is32bit) {
const auto &compilerHwInfoConfig = *CompilerHwInfoConfig::get(defaultHwInfo->platform.eProductFamily);
if (is32bit || compilerHwInfoConfig.isForceToStatelessRequired()) {
EXPECT_TRUE(CompilerOptions::contains(internalOptions, CompilerOptions::greaterThan4gbBuffersRequired)) << internalOptions;
} else {
if (false == pDevice->areSharedSystemAllocationsAllowed()) {
EXPECT_FALSE(CompilerOptions::contains(internalOptions, NEO::CompilerOptions::greaterThan4gbBuffersRequired)) << internalOptions;
} else {
EXPECT_TRUE(CompilerOptions::contains(internalOptions, NEO::CompilerOptions::greaterThan4gbBuffersRequired)) << internalOptions;
EXPECT_FALSE(CompilerOptions::contains(internalOptions, NEO::CompilerOptions::greaterThan4gbBuffersRequired)) << internalOptions;
}
}
TEST_F(ProgramTests, whenContainsStatefulAccessIsCalledThenReturnCorrectResult) {
std::vector<std::tuple<bool, SurfaceStateHeapOffset, CrossThreadDataOffset>> testParams = {
{false, undefined<SurfaceStateHeapOffset>, undefined<CrossThreadDataOffset>},
{true, 0x40, undefined<CrossThreadDataOffset>},
{true, undefined<SurfaceStateHeapOffset>, 0x40},
{true, 0x40, 0x40},
};
for (auto &[expectedResult, surfaceStateHeapOffset, crossThreadDataOffset] : testParams) {
MockProgram program(pContext, false, toClDeviceVector(*pClDevice));
auto kernelInfo = std::make_unique<KernelInfo>();
kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.clear();
auto argDescriptor = ArgDescriptor(ArgDescriptor::ArgTPointer);
argDescriptor.as<ArgDescPointer>().bindful = surfaceStateHeapOffset;
argDescriptor.as<ArgDescPointer>().bindless = crossThreadDataOffset;
kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
program.addKernelInfo(kernelInfo.release(), 0);
EXPECT_EQ(expectedResult, AddressingModeHelper::containsStatefulAccess(program.buildInfos[0].kernelInfoArray));
}
}
TEST_F(ProgramTests, givenStatefulAndStatelessAccessesWhenProgramBuildIsCalledThenCorrectResultIsReturned) {
DebugManagerStateRestore restorer;
const auto &compilerHwInfoConfig = *CompilerHwInfoConfig::get(pClDevice->getHardwareInfo().platform.eProductFamily);
class MyMockProgram : public Program {
public:
using Program::buildInfos;
using Program::createdFrom;
using Program::irBinary;
using Program::irBinarySize;
using Program::isBuiltIn;
using Program::options;
using Program::Program;
using Program::sourceCode;
void setAddressingMode(bool isStateful) {
auto kernelInfo = std::make_unique<KernelInfo>();
kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.clear();
auto argDescriptor = ArgDescriptor(ArgDescriptor::ArgTPointer);
if (isStateful) {
argDescriptor.as<ArgDescPointer>().bindful = 0x40;
argDescriptor.as<ArgDescPointer>().bindless = 0x40;
} else {
argDescriptor.as<ArgDescPointer>().bindful = undefined<SurfaceStateHeapOffset>;
argDescriptor.as<ArgDescPointer>().bindless = undefined<CrossThreadDataOffset>;
}
kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
this->buildInfos[0].kernelInfoArray.clear();
this->buildInfos[0].kernelInfoArray.push_back(kernelInfo.release());
}
cl_int processGenBinary(const ClDevice &clDevice) override {
return CL_SUCCESS;
}
};
std::array<std::tuple<int, bool, int32_t>, 3> testParams = {{{CL_SUCCESS, false, -1},
{CL_SUCCESS, true, 0},
{CL_BUILD_PROGRAM_FAILURE, true, 1}}};
for (auto &[result, isStatefulAccess, debuyKey] : testParams) {
if (!compilerHwInfoConfig.isForceToStatelessRequired()) {
result = CL_SUCCESS;
}
MyMockProgram program(pContext, false, toClDeviceVector(*pClDevice));
program.isBuiltIn = false;
program.sourceCode = "test_kernel";
program.createdFrom = Program::CreatedFrom::SOURCE;
program.setAddressingMode(isStatefulAccess);
DebugManager.flags.FailBuildProgramWithStatefulAccess.set(debuyKey);
EXPECT_EQ(result, program.build(toClDeviceVector(*pClDevice), nullptr, false));
}
{
MyMockProgram programWithBuiltIn(pContext, true, toClDeviceVector(*pClDevice));
programWithBuiltIn.isBuiltIn = true;
programWithBuiltIn.irBinary.reset(new char[16]);
programWithBuiltIn.irBinarySize = 16;
programWithBuiltIn.setAddressingMode(true);
DebugManager.flags.FailBuildProgramWithStatefulAccess.set(1);
EXPECT_EQ(CL_SUCCESS, programWithBuiltIn.build(toClDeviceVector(*pClDevice), nullptr, false));
}
}

View File

@ -422,3 +422,4 @@ DirectSubmissionInsertSfenceInstructionPriorToSubmission = -1
EnableTimestampWaitForEvents = -1
ForceWddmLowPriorityContextValue = -1
EnableDebuggerMmapMemoryAccess = 0
FailBuildProgramWithStatefulAccess = -1