mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Revert "Fail build program on PVC with stateful accesses"
This reverts commit 9466113cef
.
Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
cec0ea2809
commit
c5c3e865f0
@ -16,7 +16,6 @@
|
||||
#include "shared/source/device_binary_format/elf/elf.h"
|
||||
#include "shared/source/device_binary_format/elf/elf_encoder.h"
|
||||
#include "shared/source/device_binary_format/elf/ocl_elf.h"
|
||||
#include "shared/source/helpers/addressing_mode_helper.h"
|
||||
#include "shared/source/helpers/api_specific_config.h"
|
||||
#include "shared/source/helpers/constants.h"
|
||||
#include "shared/source/helpers/kernel_helpers.h"
|
||||
@ -44,7 +43,6 @@ namespace BuildOptions {
|
||||
NEO::ConstStringRef optDisable = "-ze-opt-disable";
|
||||
NEO::ConstStringRef optLevel = "-ze-opt-level";
|
||||
NEO::ConstStringRef greaterThan4GbRequired = "-ze-opt-greater-than-4GB-buffer-required";
|
||||
NEO::ConstStringRef smallerThan4GbBuffersOnly = "-ze-opt-smaller-than-4GB-buffers-only";
|
||||
NEO::ConstStringRef hasBufferOffsetArg = "-ze-intel-has-buffer-offset-arg";
|
||||
NEO::ConstStringRef debugKernelEnable = "-ze-kernel-debug-enable";
|
||||
} // namespace BuildOptions
|
||||
@ -126,10 +124,8 @@ std::string ModuleTranslationUnit::generateCompilerOptions(const char *buildOpti
|
||||
internalOptions = NEO::CompilerOptions::concatenate(internalOptions, BuildOptions::debugKernelEnable);
|
||||
}
|
||||
|
||||
auto disableStatelessToStatefulOptimization = NEO::DebugManager.flags.DisableStatelessToStatefulOptimization.get();
|
||||
auto isForceToStatelessNeeded = NEO::AddressingModeHelper::forceToStatelessNeeded(options, BuildOptions::smallerThan4GbBuffersOnly.str(), device->getHwInfo());
|
||||
|
||||
if (disableStatelessToStatefulOptimization || isForceToStatelessNeeded) {
|
||||
if (NEO::DebugManager.flags.DisableStatelessToStatefulOptimization.get() ||
|
||||
device->getNEODevice()->areSharedSystemAllocationsAllowed()) {
|
||||
internalOptions = NEO::CompilerOptions::concatenate(internalOptions, NEO::CompilerOptions::greaterThan4gbBuffersRequired);
|
||||
}
|
||||
|
||||
@ -527,14 +523,6 @@ bool ModuleImp::initialize(const ze_module_desc_t *desc, NEO::Device *neoDevice)
|
||||
this->updateBuildLog(neoDevice);
|
||||
verifyDebugCapabilities();
|
||||
|
||||
auto containsStatefulAccess = NEO::AddressingModeHelper::containsStatefulAccess(translationUnit->programInfo.kernelInfos);
|
||||
auto isForceToStatelessNeeded = NEO::AddressingModeHelper::forceToStatelessNeeded(translationUnit->options, BuildOptions::smallerThan4GbBuffersOnly.str(), device->getHwInfo());
|
||||
auto isUserKernel = (type == ModuleType::User);
|
||||
|
||||
if (containsStatefulAccess && isForceToStatelessNeeded && isUserKernel) {
|
||||
success = false;
|
||||
}
|
||||
|
||||
if (false == success) {
|
||||
return false;
|
||||
}
|
||||
|
@ -10,8 +10,6 @@
|
||||
#include "shared/source/device_binary_format/debug_zebin.h"
|
||||
#include "shared/source/gmm_helper/gmm.h"
|
||||
#include "shared/source/gmm_helper/gmm_helper.h"
|
||||
#include "shared/source/helpers/addressing_mode_helper.h"
|
||||
#include "shared/source/helpers/compiler_hw_info_config.h"
|
||||
#include "shared/source/kernel/implicit_args.h"
|
||||
#include "shared/source/program/kernel_info.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
@ -1785,19 +1783,28 @@ HWTEST_F(ModuleTranslationUnitTest, WhenBuildOptionsAreNullThenReuseExistingOpti
|
||||
EXPECT_NE(pMockCompilerInterface->inputInternalOptions.find("cl-intel-greater-than-4GB-buffer-required"), std::string::npos);
|
||||
}
|
||||
|
||||
HWTEST_F(ModuleTranslationUnitTest, givenForceToStatelessRequiredWhenBuildingModuleThen4GbBuffersAreRequired) {
|
||||
HWTEST_F(ModuleTranslationUnitTest, givenSystemSharedAllocationAllowedWhenBuildingModuleThen4GbBuffersAreRequired) {
|
||||
auto mockCompilerInterface = new MockCompilerInterface;
|
||||
auto &rootDeviceEnvironment = neoDevice->executionEnvironment->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()];
|
||||
rootDeviceEnvironment->compilerInterface.reset(mockCompilerInterface);
|
||||
|
||||
MockModuleTranslationUnit moduleTu(device);
|
||||
auto ret = moduleTu.buildFromSpirV("", 0U, nullptr, "", nullptr);
|
||||
EXPECT_TRUE(ret);
|
||||
{
|
||||
neoDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.sharedSystemMemCapabilities = 1;
|
||||
|
||||
MockModuleTranslationUnit moduleTu(device);
|
||||
auto ret = moduleTu.buildFromSpirV("", 0U, nullptr, "", nullptr);
|
||||
EXPECT_TRUE(ret);
|
||||
|
||||
const auto &compilerHwInfoConfig = *CompilerHwInfoConfig::get(defaultHwInfo->platform.eProductFamily);
|
||||
if (compilerHwInfoConfig.isForceToStatelessRequired()) {
|
||||
EXPECT_NE(mockCompilerInterface->inputInternalOptions.find("cl-intel-greater-than-4GB-buffer-required"), std::string::npos);
|
||||
} else {
|
||||
}
|
||||
|
||||
{
|
||||
neoDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.sharedSystemMemCapabilities = 0;
|
||||
|
||||
MockModuleTranslationUnit moduleTu(device);
|
||||
auto ret = moduleTu.buildFromSpirV("", 0U, nullptr, "", nullptr);
|
||||
EXPECT_TRUE(ret);
|
||||
|
||||
EXPECT_EQ(mockCompilerInterface->inputInternalOptions.find("cl-intel-greater-than-4GB-buffer-required"), std::string::npos);
|
||||
}
|
||||
}
|
||||
@ -2005,155 +2012,6 @@ TEST_F(ModuleTest, GivenInjectInternalBuildOptionsWhenBuildingBuiltinModuleThenI
|
||||
EXPECT_FALSE(CompilerOptions::contains(cip->buildInternalOptions, "-abc"));
|
||||
};
|
||||
|
||||
TEST_F(ModuleTest, givenForceToStatelessRequiredWhenGenerateCompilerOptionsThenOptionsAreCorrect) {
|
||||
|
||||
const auto &compilerHwInfoConfig = *CompilerHwInfoConfig::get(defaultHwInfo->platform.eProductFamily);
|
||||
if (!compilerHwInfoConfig.isForceToStatelessRequired()) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
DebugManagerStateRestore restorer;
|
||||
auto module = std::make_unique<ModuleImp>(device, nullptr, ModuleType::User);
|
||||
ASSERT_NE(nullptr, module);
|
||||
auto moduleTranslationUnit = module->getTranslationUnit();
|
||||
ASSERT_NE(nullptr, moduleTranslationUnit);
|
||||
std::string buildOptions;
|
||||
std::string internalBuildOptions;
|
||||
|
||||
{
|
||||
NEO::DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(-1);
|
||||
buildOptions = "";
|
||||
auto internalOptions = moduleTranslationUnit->generateCompilerOptions(buildOptions.c_str(), internalBuildOptions.c_str());
|
||||
EXPECT_THAT(internalOptions, testing::HasSubstr("-cl-intel-greater-than-4GB-buffer-required"));
|
||||
}
|
||||
{
|
||||
NEO::DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(-1);
|
||||
buildOptions = "-ze-opt-smaller-than-4GB-buffers-only";
|
||||
auto internalOptions = moduleTranslationUnit->generateCompilerOptions(buildOptions.c_str(), internalBuildOptions.c_str());
|
||||
EXPECT_THAT(internalOptions, testing::Not(testing::HasSubstr("-cl-intel-greater-than-4GB-buffer-required")));
|
||||
}
|
||||
{
|
||||
NEO::DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(0);
|
||||
buildOptions = "";
|
||||
auto internalOptions = moduleTranslationUnit->generateCompilerOptions(buildOptions.c_str(), internalBuildOptions.c_str());
|
||||
EXPECT_THAT(internalOptions, testing::HasSubstr("-cl-intel-greater-than-4GB-buffer-required"));
|
||||
}
|
||||
{
|
||||
NEO::DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(0);
|
||||
buildOptions = "-ze-opt-smaller-than-4GB-buffers-only";
|
||||
auto internalOptions = moduleTranslationUnit->generateCompilerOptions(buildOptions.c_str(), internalBuildOptions.c_str());
|
||||
EXPECT_THAT(internalOptions, testing::HasSubstr("-cl-intel-greater-than-4GB-buffer-required"));
|
||||
}
|
||||
{
|
||||
NEO::DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(1);
|
||||
buildOptions = "";
|
||||
auto internalOptions = moduleTranslationUnit->generateCompilerOptions(buildOptions.c_str(), internalBuildOptions.c_str());
|
||||
EXPECT_THAT(internalOptions, testing::Not(testing::HasSubstr("-cl-intel-greater-than-4GB-buffer-required")));
|
||||
}
|
||||
{
|
||||
NEO::DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(1);
|
||||
buildOptions = "-ze-opt-smaller-than-4GB-buffers-only";
|
||||
auto internalOptions = moduleTranslationUnit->generateCompilerOptions(buildOptions.c_str(), internalBuildOptions.c_str());
|
||||
EXPECT_THAT(internalOptions, testing::Not(testing::HasSubstr("-cl-intel-greater-than-4GB-buffer-required")));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(ModuleTest, whenContainsStatefulAccessIsCalledThenResultIsCorrect) {
|
||||
class MyModuleImpl : public ModuleImp {
|
||||
public:
|
||||
using ModuleImp::ModuleImp;
|
||||
};
|
||||
|
||||
std::vector<std::tuple<bool, SurfaceStateHeapOffset, CrossThreadDataOffset>> testParams = {
|
||||
{false, undefined<SurfaceStateHeapOffset>, undefined<CrossThreadDataOffset>},
|
||||
{true, 0x40, undefined<CrossThreadDataOffset>},
|
||||
{true, undefined<SurfaceStateHeapOffset>, 0x40},
|
||||
{true, 0x40, 0x40},
|
||||
};
|
||||
|
||||
for (auto &[expectedResult, surfaceStateHeapOffset, crossThreadDataOffset] : testParams) {
|
||||
auto module = std::make_unique<MyModuleImpl>(device, nullptr, ModuleType::User);
|
||||
ASSERT_NE(nullptr, module);
|
||||
auto moduleTranslationUnit = module->getTranslationUnit();
|
||||
ASSERT_NE(nullptr, moduleTranslationUnit);
|
||||
auto kernelInfo = std::make_unique<KernelInfo>();
|
||||
kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.clear();
|
||||
auto argDescriptor = ArgDescriptor(ArgDescriptor::ArgTPointer);
|
||||
argDescriptor.as<ArgDescPointer>().bindful = surfaceStateHeapOffset;
|
||||
argDescriptor.as<ArgDescPointer>().bindless = crossThreadDataOffset;
|
||||
kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
|
||||
moduleTranslationUnit->programInfo.kernelInfos.clear();
|
||||
moduleTranslationUnit->programInfo.kernelInfos.push_back(kernelInfo.release());
|
||||
|
||||
EXPECT_EQ(expectedResult, NEO::AddressingModeHelper::containsStatefulAccess(moduleTranslationUnit->programInfo.kernelInfos));
|
||||
}
|
||||
}
|
||||
|
||||
using ModuleInitializeTest = Test<DeviceFixture>;
|
||||
|
||||
TEST_F(ModuleInitializeTest, whenModuleInitializeIsCalledThenCorrectResultIsReturned) {
|
||||
class MockModuleImp : public ModuleImp {
|
||||
public:
|
||||
using ModuleImp::ModuleImp;
|
||||
using ModuleImp::translationUnit;
|
||||
|
||||
void setAddressingMode(bool isStateful) {
|
||||
auto kernelInfo = std::make_unique<KernelInfo>();
|
||||
kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.clear();
|
||||
auto argDescriptor = ArgDescriptor(ArgDescriptor::ArgTPointer);
|
||||
if (isStateful) {
|
||||
argDescriptor.as<ArgDescPointer>().bindful = 0x40;
|
||||
argDescriptor.as<ArgDescPointer>().bindless = 0x40;
|
||||
} else {
|
||||
argDescriptor.as<ArgDescPointer>().bindful = undefined<SurfaceStateHeapOffset>;
|
||||
argDescriptor.as<ArgDescPointer>().bindless = undefined<CrossThreadDataOffset>;
|
||||
}
|
||||
kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
|
||||
kernelInfo->heapInfo.KernelHeapSize = 0x1;
|
||||
kernelInfo->heapInfo.pKernelHeap = reinterpret_cast<void *>(0xffff);
|
||||
|
||||
this->translationUnit->programInfo.kernelInfos.clear();
|
||||
this->translationUnit->programInfo.kernelInfos.push_back(kernelInfo.release());
|
||||
}
|
||||
};
|
||||
|
||||
class MyMockModuleTU : public MockModuleTU {
|
||||
public:
|
||||
using MockModuleTU::MockModuleTU;
|
||||
bool createFromNativeBinary(const char *input, size_t inputSize) override { return true; }
|
||||
};
|
||||
|
||||
const auto &compilerHwInfoConfig = *CompilerHwInfoConfig::get(defaultHwInfo->platform.eProductFamily);
|
||||
if (!compilerHwInfoConfig.isForceToStatelessRequired()) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
DebugManagerStateRestore restorer;
|
||||
std::string testFile;
|
||||
retrieveBinaryKernelFilenameNoRevision(testFile, "test_kernel_", ".bin");
|
||||
size_t size = 0;
|
||||
auto src = loadDataFromFile(testFile.c_str(), size);
|
||||
ASSERT_NE(0u, size);
|
||||
ASSERT_NE(nullptr, src);
|
||||
ze_module_desc_t moduleDesc = {};
|
||||
moduleDesc.format = ZE_MODULE_FORMAT_NATIVE;
|
||||
moduleDesc.pInputModule = reinterpret_cast<const uint8_t *>(src.get());
|
||||
moduleDesc.inputSize = size;
|
||||
|
||||
std::array<std::tuple<bool, bool, ModuleType, int32_t>, 4> testParams = {{{true, false, ModuleType::Builtin, -1},
|
||||
{true, true, ModuleType::Builtin, 1},
|
||||
{true, true, ModuleType::Builtin, 0},
|
||||
{false, true, ModuleType::User, 0}}};
|
||||
|
||||
for (auto &[expectedResult, isStateful, moduleType, debugKey] : testParams) {
|
||||
MockModuleImp module(device, nullptr, moduleType);
|
||||
module.translationUnit = std::make_unique<MyMockModuleTU>(device);
|
||||
DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(debugKey);
|
||||
module.setAddressingMode(isStateful);
|
||||
EXPECT_EQ(expectedResult, module.initialize(&moduleDesc, device->getNEODevice()));
|
||||
}
|
||||
}
|
||||
|
||||
using ModuleDebugDataTest = Test<DeviceFixture>;
|
||||
TEST_F(ModuleDebugDataTest, GivenDebugDataWithRelocationsWhenCreatingRelocatedDebugDataThenRelocationsAreApplied) {
|
||||
auto cip = new NEO::MockCompilerInterfaceCaptureBuildOptions();
|
||||
|
@ -10,8 +10,6 @@
|
||||
#include "shared/source/device/device.h"
|
||||
#include "shared/source/device_binary_format/device_binary_formats.h"
|
||||
#include "shared/source/execution_environment/execution_environment.h"
|
||||
#include "shared/source/helpers/addressing_mode_helper.h"
|
||||
#include "shared/source/helpers/compiler_hw_info_config.h"
|
||||
#include "shared/source/helpers/compiler_options_parser.h"
|
||||
#include "shared/source/program/kernel_info.h"
|
||||
#include "shared/source/source_level_debugger/source_level_debugger.h"
|
||||
@ -177,14 +175,6 @@ cl_int Program::build(
|
||||
phaseReached[clDevice->getRootDeviceIndex()] = BuildPhase::BinaryProcessing;
|
||||
}
|
||||
|
||||
auto containsStatefulAccess = AddressingModeHelper::containsStatefulAccess(buildInfos[clDevices[0]->getRootDeviceIndex()].kernelInfoArray);
|
||||
auto forceToStatelessNeeded = AddressingModeHelper::forceToStatelessNeeded(options, CompilerOptions::smallerThan4gbBuffersOnly.str(), clDevices[0]->getHardwareInfo());
|
||||
auto isUserKernel = !isBuiltIn;
|
||||
|
||||
if (containsStatefulAccess && forceToStatelessNeeded && isUserKernel) {
|
||||
retVal = CL_BUILD_PROGRAM_FAILURE;
|
||||
}
|
||||
|
||||
if (retVal != CL_SUCCESS) {
|
||||
break;
|
||||
}
|
||||
|
@ -15,7 +15,6 @@
|
||||
#include "shared/source/device_binary_format/elf/elf_encoder.h"
|
||||
#include "shared/source/device_binary_format/elf/ocl_elf.h"
|
||||
#include "shared/source/device_binary_format/patchtokens_decoder.h"
|
||||
#include "shared/source/helpers/addressing_mode_helper.h"
|
||||
#include "shared/source/helpers/api_specific_config.h"
|
||||
#include "shared/source/helpers/compiler_options_parser.h"
|
||||
#include "shared/source/helpers/debug_helpers.h"
|
||||
@ -72,11 +71,9 @@ std::string Program::getInternalOptions() const {
|
||||
if (force32BitAddressess && !isBuiltIn) {
|
||||
CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::arch32bit);
|
||||
}
|
||||
auto &hwInfo = pClDevice->getHardwareInfo();
|
||||
auto disableStatelessToStatefulOptimization = DebugManager.flags.DisableStatelessToStatefulOptimization.get();
|
||||
auto forceToStatelessNeeded = AddressingModeHelper::forceToStatelessNeeded(options, CompilerOptions::smallerThan4gbBuffersOnly.str(), hwInfo);
|
||||
|
||||
if ((isBuiltIn && is32bit) || forceToStatelessNeeded || disableStatelessToStatefulOptimization) {
|
||||
if ((isBuiltIn && is32bit) || pClDevice->areSharedSystemAllocationsAllowed() ||
|
||||
DebugManager.flags.DisableStatelessToStatefulOptimization.get()) {
|
||||
CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::greaterThan4gbBuffersRequired);
|
||||
}
|
||||
|
||||
@ -93,6 +90,7 @@ std::string Program::getInternalOptions() const {
|
||||
CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::hasBufferOffsetArg);
|
||||
}
|
||||
|
||||
auto &hwInfo = pClDevice->getHardwareInfo();
|
||||
const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily);
|
||||
if (hwInfoConfig.isForceEmuInt32DivRemSPWARequired(hwInfo)) {
|
||||
CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::forceEmuInt32DivRemSP);
|
||||
|
@ -139,7 +139,6 @@ class UmStatelessCompressionWithStatefulAccess : public ProgramFixture,
|
||||
void SetUp() override {
|
||||
DebugManager.flags.EnableStatelessCompressionWithUnifiedMemory.set(1);
|
||||
DebugManager.flags.EnableSharedSystemUsmSupport.set(0);
|
||||
DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(1);
|
||||
compareCompressedMemory = GetParam();
|
||||
|
||||
ProgramFixture::SetUp();
|
||||
|
@ -1459,21 +1459,17 @@ TEST_F(BuiltInTests, GivenTypeSourceWhenCreatingProgramFromCodeThenValidPointerI
|
||||
EXPECT_NE(nullptr, program.get());
|
||||
}
|
||||
|
||||
TEST_F(BuiltInTests, givenCreateProgramFromSourceWhenDeviceHasForceToStatelessRequiredOr32BitThenInternalOptionsHasGreaterThan4gbBuffersRequiredFlag) {
|
||||
TEST_F(BuiltInTests, givenCreateProgramFromSourceWhenDeviceSupportSharedSystemAllocationThenInternalOptionsDisableStosoFlag) {
|
||||
auto builtinsLib = std::unique_ptr<BuiltinsLib>(new BuiltinsLib());
|
||||
pClDevice->deviceInfo.sharedSystemMemCapabilities = CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL;
|
||||
pDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.sharedSystemMemCapabilities = 1;
|
||||
|
||||
const BuiltinCode bc = builtinsLib->getBuiltinCode(EBuiltInOps::CopyBufferToBuffer, BuiltinCode::ECodeType::Source, *pDevice);
|
||||
EXPECT_NE(0u, bc.resource.size());
|
||||
auto program = std::unique_ptr<Program>(BuiltinDispatchInfoBuilder::createProgramFromCode(bc, toClDeviceVector(*pClDevice)));
|
||||
EXPECT_NE(nullptr, program.get());
|
||||
auto builtinInternalOptions = program->getInternalOptions();
|
||||
|
||||
const auto &compilerHwInfoConfig = *CompilerHwInfoConfig::get(defaultHwInfo->platform.eProductFamily);
|
||||
if (compilerHwInfoConfig.isForceToStatelessRequired() || is32bit) {
|
||||
EXPECT_THAT(builtinInternalOptions, testing::HasSubstr(std::string(CompilerOptions::greaterThan4gbBuffersRequired)));
|
||||
} else {
|
||||
EXPECT_THAT(builtinInternalOptions, testing::Not(testing::HasSubstr(std::string(CompilerOptions::greaterThan4gbBuffersRequired))));
|
||||
}
|
||||
EXPECT_THAT(builtinInternalOptions, testing::HasSubstr(std::string(CompilerOptions::greaterThan4gbBuffersRequired)));
|
||||
}
|
||||
|
||||
TEST_F(BuiltInTests, GivenTypeIntermediateWhenCreatingProgramFromCodeThenNullPointerIsReturned) {
|
||||
@ -1521,9 +1517,8 @@ TEST_F(BuiltInTests, GivenForce32bitWhenCreatingProgramThenCorrectKernelIsCreate
|
||||
auto it = builtinInternalOptions.find(NEO::CompilerOptions::arch32bit.data());
|
||||
EXPECT_EQ(std::string::npos, it);
|
||||
|
||||
const auto &compilerHwInfoConfig = *CompilerHwInfoConfig::get(defaultHwInfo->platform.eProductFamily);
|
||||
it = builtinInternalOptions.find(NEO::CompilerOptions::greaterThan4gbBuffersRequired.data());
|
||||
if (is32bit || compilerHwInfoConfig.isForceToStatelessRequired()) {
|
||||
if (is32bit || pDevice->areSharedSystemAllocationsAllowed()) {
|
||||
EXPECT_NE(std::string::npos, it);
|
||||
} else {
|
||||
EXPECT_EQ(std::string::npos, it);
|
||||
|
@ -14,9 +14,7 @@
|
||||
#include "shared/source/device_binary_format/elf/ocl_elf.h"
|
||||
#include "shared/source/device_binary_format/patchtokens_decoder.h"
|
||||
#include "shared/source/gmm_helper/gmm_helper.h"
|
||||
#include "shared/source/helpers/addressing_mode_helper.h"
|
||||
#include "shared/source/helpers/aligned_memory.h"
|
||||
#include "shared/source/helpers/compiler_hw_info_config.h"
|
||||
#include "shared/source/helpers/hash.h"
|
||||
#include "shared/source/helpers/hw_helper.h"
|
||||
#include "shared/source/helpers/ptr_math.h"
|
||||
@ -1106,10 +1104,7 @@ TEST_F(ProgramFromSourceTest, GivenFlagsWhenCompilingProgramThenBuildOptionsHave
|
||||
// Check build options that were applied
|
||||
EXPECT_TRUE(CompilerOptions::contains(cip->buildOptions, CompilerOptions::fastRelaxedMath)) << cip->buildOptions;
|
||||
EXPECT_FALSE(CompilerOptions::contains(cip->buildInternalOptions, CompilerOptions::gtpinRera)) << cip->buildInternalOptions;
|
||||
|
||||
const auto &compilerHwInfoConfig = *CompilerHwInfoConfig::get(defaultHwInfo->platform.eProductFamily);
|
||||
|
||||
if (!compilerHwInfoConfig.isForceToStatelessRequired()) {
|
||||
if (!pDevice->areSharedSystemAllocationsAllowed()) {
|
||||
EXPECT_FALSE(CompilerOptions::contains(cip->buildInternalOptions, CompilerOptions::greaterThan4gbBuffersRequired)) << cip->buildInternalOptions;
|
||||
}
|
||||
EXPECT_TRUE(CompilerOptions::contains(cip->buildInternalOptions, pPlatform->getClDevice(0)->peekCompilerExtensions())) << cip->buildInternalOptions;
|
||||
@ -1655,140 +1650,6 @@ TEST_F(ProgramTests, WhenProgramIsCreatedThenCorrectOclVersionIsInOptions) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(ProgramTests, whenForceToStatelessNeededIsCalledThenCorrectResultIsReturned) {
|
||||
DebugManagerStateRestore restorer;
|
||||
|
||||
class MyMockProgram : public Program {
|
||||
public:
|
||||
using Program::options;
|
||||
using Program::Program;
|
||||
};
|
||||
|
||||
MyMockProgram program(pContext, false, toClDeviceVector(*pClDevice));
|
||||
const auto &compilerHwInfoConfig = *CompilerHwInfoConfig::get(pClDevice->getHardwareInfo().platform.eProductFamily);
|
||||
|
||||
{
|
||||
DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(-1);
|
||||
program.options = "";
|
||||
EXPECT_EQ(AddressingModeHelper::forceToStatelessNeeded(program.options, NEO::CompilerOptions::smallerThan4gbBuffersOnly.str(), pClDevice->getHardwareInfo()), compilerHwInfoConfig.isForceToStatelessRequired());
|
||||
}
|
||||
{
|
||||
DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(-1);
|
||||
program.options = "-cl-opt-smaller-than-4GB-buffers-only";
|
||||
EXPECT_FALSE(AddressingModeHelper::forceToStatelessNeeded(program.options, NEO::CompilerOptions::smallerThan4gbBuffersOnly.str(), pClDevice->getHardwareInfo()));
|
||||
}
|
||||
{
|
||||
DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(0);
|
||||
program.options = "";
|
||||
EXPECT_EQ(AddressingModeHelper::forceToStatelessNeeded(program.options, NEO::CompilerOptions::smallerThan4gbBuffersOnly.str(), pClDevice->getHardwareInfo()), compilerHwInfoConfig.isForceToStatelessRequired());
|
||||
}
|
||||
{
|
||||
DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(0);
|
||||
program.options = "-cl-opt-smaller-than-4GB-buffers-only";
|
||||
EXPECT_EQ(AddressingModeHelper::forceToStatelessNeeded(program.options, NEO::CompilerOptions::smallerThan4gbBuffersOnly.str(), pClDevice->getHardwareInfo()), compilerHwInfoConfig.isForceToStatelessRequired());
|
||||
}
|
||||
{
|
||||
DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(1);
|
||||
program.options = "";
|
||||
EXPECT_FALSE(AddressingModeHelper::forceToStatelessNeeded(program.options, NEO::CompilerOptions::smallerThan4gbBuffersOnly.str(), pClDevice->getHardwareInfo()));
|
||||
}
|
||||
{
|
||||
DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(1);
|
||||
program.options = "-cl-opt-smaller-than-4GB-buffers-only";
|
||||
EXPECT_FALSE(AddressingModeHelper::forceToStatelessNeeded(program.options, NEO::CompilerOptions::smallerThan4gbBuffersOnly.str(), pClDevice->getHardwareInfo()));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(ProgramTests, whenContainsStatefulAccessIsCalledThenReturnCorrectResult) {
|
||||
std::vector<std::tuple<bool, SurfaceStateHeapOffset, CrossThreadDataOffset>> testParams = {
|
||||
{false, undefined<SurfaceStateHeapOffset>, undefined<CrossThreadDataOffset>},
|
||||
{true, 0x40, undefined<CrossThreadDataOffset>},
|
||||
{true, undefined<SurfaceStateHeapOffset>, 0x40},
|
||||
{true, 0x40, 0x40},
|
||||
|
||||
};
|
||||
|
||||
for (auto &[expectedResult, surfaceStateHeapOffset, crossThreadDataOffset] : testParams) {
|
||||
MockProgram program(pContext, false, toClDeviceVector(*pClDevice));
|
||||
auto kernelInfo = std::make_unique<KernelInfo>();
|
||||
kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.clear();
|
||||
auto argDescriptor = ArgDescriptor(ArgDescriptor::ArgTPointer);
|
||||
argDescriptor.as<ArgDescPointer>().bindful = surfaceStateHeapOffset;
|
||||
argDescriptor.as<ArgDescPointer>().bindless = crossThreadDataOffset;
|
||||
|
||||
kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
|
||||
program.addKernelInfo(kernelInfo.release(), 0);
|
||||
|
||||
EXPECT_EQ(expectedResult, AddressingModeHelper::containsStatefulAccess(program.buildInfos[0].kernelInfoArray));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(ProgramTests, givenStatefulAndStatelessAccessesWhenProgramBuildIsCalledThenCorrectResultIsReturned) {
|
||||
DebugManagerStateRestore restorer;
|
||||
const auto &compilerHwInfoConfig = *CompilerHwInfoConfig::get(pClDevice->getHardwareInfo().platform.eProductFamily);
|
||||
|
||||
class MyMockProgram : public Program {
|
||||
public:
|
||||
using Program::buildInfos;
|
||||
using Program::createdFrom;
|
||||
using Program::irBinary;
|
||||
using Program::irBinarySize;
|
||||
using Program::isBuiltIn;
|
||||
using Program::options;
|
||||
using Program::Program;
|
||||
using Program::sourceCode;
|
||||
|
||||
void setAddressingMode(bool isStateful) {
|
||||
auto kernelInfo = std::make_unique<KernelInfo>();
|
||||
kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.clear();
|
||||
auto argDescriptor = ArgDescriptor(ArgDescriptor::ArgTPointer);
|
||||
if (isStateful) {
|
||||
argDescriptor.as<ArgDescPointer>().bindful = 0x40;
|
||||
argDescriptor.as<ArgDescPointer>().bindless = 0x40;
|
||||
} else {
|
||||
argDescriptor.as<ArgDescPointer>().bindful = undefined<SurfaceStateHeapOffset>;
|
||||
argDescriptor.as<ArgDescPointer>().bindless = undefined<CrossThreadDataOffset>;
|
||||
}
|
||||
|
||||
kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
|
||||
this->buildInfos[0].kernelInfoArray.clear();
|
||||
this->buildInfos[0].kernelInfoArray.push_back(kernelInfo.release());
|
||||
}
|
||||
|
||||
cl_int processGenBinary(const ClDevice &clDevice) override {
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
};
|
||||
|
||||
std::array<std::tuple<int, bool, int32_t>, 3> testParams = {{{CL_SUCCESS, false, -1},
|
||||
{CL_SUCCESS, true, 1},
|
||||
{CL_BUILD_PROGRAM_FAILURE, true, 0}}};
|
||||
|
||||
for (auto &[result, isStatefulAccess, debuyKey] : testParams) {
|
||||
|
||||
if (!compilerHwInfoConfig.isForceToStatelessRequired()) {
|
||||
result = CL_SUCCESS;
|
||||
}
|
||||
MyMockProgram program(pContext, false, toClDeviceVector(*pClDevice));
|
||||
program.isBuiltIn = false;
|
||||
program.sourceCode = "test_kernel";
|
||||
program.createdFrom = Program::CreatedFrom::SOURCE;
|
||||
program.setAddressingMode(isStatefulAccess);
|
||||
DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(debuyKey);
|
||||
EXPECT_EQ(result, program.build(toClDeviceVector(*pClDevice), nullptr, false));
|
||||
}
|
||||
|
||||
{
|
||||
MyMockProgram programWithBuiltIn(pContext, true, toClDeviceVector(*pClDevice));
|
||||
programWithBuiltIn.isBuiltIn = true;
|
||||
programWithBuiltIn.irBinary.reset(new char[16]);
|
||||
programWithBuiltIn.irBinarySize = 16;
|
||||
programWithBuiltIn.setAddressingMode(true);
|
||||
DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(0);
|
||||
EXPECT_EQ(CL_SUCCESS, programWithBuiltIn.build(toClDeviceVector(*pClDevice), nullptr, false));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(ProgramTests, GivenForcedClVersionWhenProgramIsCreatedThenCorrectOclOptionIsPresent) {
|
||||
std::pair<unsigned int, std::string> testedValues[] = {
|
||||
{0, "-ocl-version=120"},
|
||||
@ -1833,6 +1694,14 @@ TEST_F(ProgramTests, WhenCreatingProgramThenBindlessIsEnabledOnlyIfDebugFlagIsEn
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(ProgramTests, givenDeviceThatSupportsSharedSystemMemoryAllocationWhenProgramIsCompiledThenItForcesStatelessCompilation) {
|
||||
pClDevice->deviceInfo.sharedSystemMemCapabilities = CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL;
|
||||
pClDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.sharedSystemMemCapabilities = 1;
|
||||
MockProgram program(pContext, false, toClDeviceVector(*pClDevice));
|
||||
auto internalOptions = program.getInternalOptions();
|
||||
EXPECT_TRUE(CompilerOptions::contains(internalOptions.c_str(), CompilerOptions::greaterThan4gbBuffersRequired)) << internalOptions;
|
||||
}
|
||||
|
||||
TEST_F(ProgramTests, GivenForce32BitAddressessWhenProgramIsCreatedThenGreaterThan4gbBuffersRequiredIsCorrectlySet) {
|
||||
DebugManagerStateRestore dbgRestorer;
|
||||
cl_int retVal = CL_DEVICE_NOT_FOUND;
|
||||
@ -1841,8 +1710,7 @@ TEST_F(ProgramTests, GivenForce32BitAddressessWhenProgramIsCreatedThenGreaterTha
|
||||
const_cast<DeviceInfo *>(&pDevice->getDeviceInfo())->force32BitAddressess = true;
|
||||
MockProgram program(pContext, false, toClDeviceVector(*pClDevice));
|
||||
auto internalOptions = program.getInternalOptions();
|
||||
const auto &compilerHwInfoConfig = *CompilerHwInfoConfig::get(defaultHwInfo->platform.eProductFamily);
|
||||
if (compilerHwInfoConfig.isForceToStatelessRequired()) {
|
||||
if (pDevice->areSharedSystemAllocationsAllowed()) {
|
||||
EXPECT_TRUE(CompilerOptions::contains(internalOptions, CompilerOptions::greaterThan4gbBuffersRequired)) << internalOptions;
|
||||
} else {
|
||||
EXPECT_FALSE(CompilerOptions::contains(internalOptions, NEO::CompilerOptions::greaterThan4gbBuffersRequired)) << internalOptions;
|
||||
@ -1857,8 +1725,7 @@ TEST_F(ProgramTests, Given32bitSupportWhenProgramIsCreatedThenGreaterThan4gbBuff
|
||||
DebugManager.flags.DisableStatelessToStatefulOptimization.set(false);
|
||||
std::unique_ptr<MockProgram> program{Program::createBuiltInFromSource<MockProgram>("", pContext, pContext->getDevices(), nullptr)};
|
||||
auto internalOptions = program->getInternalOptions();
|
||||
const auto &compilerHwInfoConfig = *CompilerHwInfoConfig::get(defaultHwInfo->platform.eProductFamily);
|
||||
if ((false == compilerHwInfoConfig.isForceToStatelessRequired()) && (false == is32bit)) {
|
||||
if ((false == pDevice->areSharedSystemAllocationsAllowed()) && (false == is32bit)) {
|
||||
EXPECT_FALSE(CompilerOptions::contains(internalOptions, NEO::CompilerOptions::greaterThan4gbBuffersRequired)) << internalOptions;
|
||||
} else {
|
||||
EXPECT_TRUE(CompilerOptions::contains(internalOptions, NEO::CompilerOptions::greaterThan4gbBuffersRequired)) << internalOptions;
|
||||
@ -1885,15 +1752,13 @@ TEST_F(ProgramTests, Force32BitAddressessWhenProgramIsCreatedThenGreaterThan4gbB
|
||||
const_cast<DeviceInfo *>(&pDevice->getDeviceInfo())->force32BitAddressess = true;
|
||||
std::unique_ptr<MockProgram> program{Program::createBuiltInFromSource<MockProgram>("", pContext, pContext->getDevices(), nullptr)};
|
||||
auto internalOptions = program->getInternalOptions();
|
||||
const auto &compilerHwInfoConfig = *CompilerHwInfoConfig::get(defaultHwInfo->platform.eProductFamily);
|
||||
|
||||
if (is32bit) {
|
||||
EXPECT_TRUE(CompilerOptions::contains(internalOptions, CompilerOptions::greaterThan4gbBuffersRequired)) << internalOptions;
|
||||
} else {
|
||||
if (compilerHwInfoConfig.isForceToStatelessRequired()) {
|
||||
EXPECT_TRUE(CompilerOptions::contains(internalOptions, NEO::CompilerOptions::greaterThan4gbBuffersRequired)) << internalOptions;
|
||||
} else {
|
||||
if (false == pDevice->areSharedSystemAllocationsAllowed()) {
|
||||
EXPECT_FALSE(CompilerOptions::contains(internalOptions, NEO::CompilerOptions::greaterThan4gbBuffersRequired)) << internalOptions;
|
||||
} else {
|
||||
EXPECT_TRUE(CompilerOptions::contains(internalOptions, NEO::CompilerOptions::greaterThan4gbBuffersRequired)) << internalOptions;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -111,7 +111,6 @@ ForceCsrFlushing = 0
|
||||
ForceCsrReprogramming = 0
|
||||
OmitTimestampPacketDependencies = 0
|
||||
DisableStatelessToStatefulOptimization = 0
|
||||
UseSmallerThan4gbBuffersOnly = -1
|
||||
DisableConcurrentBlockExecution = 0
|
||||
UseNoRingFlushesKmdMode = 1
|
||||
DisableZeroCopyForUseHostPtr = 0
|
||||
|
@ -16,7 +16,6 @@
|
||||
namespace NEO {
|
||||
namespace CompilerOptions {
|
||||
static constexpr ConstStringRef greaterThan4gbBuffersRequired = "-cl-intel-greater-than-4GB-buffer-required";
|
||||
static constexpr ConstStringRef smallerThan4gbBuffersOnly = "-cl-opt-smaller-than-4GB-buffers-only";
|
||||
static constexpr ConstStringRef hasBufferOffsetArg = "-cl-intel-has-buffer-offset-arg";
|
||||
static constexpr ConstStringRef kernelDebugEnable = "-cl-kernel-debug-enable";
|
||||
static constexpr ConstStringRef arch32bit = "-m32";
|
||||
|
@ -238,7 +238,6 @@ DECLARE_DEBUG_VARIABLE(bool, DisableStatelessToStatefulOptimization, false, "Dis
|
||||
DECLARE_DEBUG_VARIABLE(bool, DisableConcurrentBlockExecution, false, "disables concurrent block kernel execution")
|
||||
DECLARE_DEBUG_VARIABLE(bool, UseNoRingFlushesKmdMode, true, "Windows only, passes flag to KMD that informs KMD to not emit any ring buffer flushes.")
|
||||
DECLARE_DEBUG_VARIABLE(bool, DisableZeroCopyForUseHostPtr, false, "When active all buffer allocations created with CL_MEM_USE_HOST_PTR flag will not share memory with CPU.")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, UseSmallerThan4gbBuffersOnly, -1, " -1: default, 0: disabled, 1: enabled. When enabled, the driver will not force stateless accesses on devices with default stateless addressing mode")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableHostPtrTracking, -1, "Enable host ptr tracking: -1 - default platform setting, 0 - disabled, 1 - enabled")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, MaxHwThreadsPercent, 0, "If not zero then maximum number of used HW threads is capped to max * MaxHwThreadsPercent / 100")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, MinHwThreadsUnoccupied, 0, "If not zero then maximum number of used HW threads is reduced by MinHwThreadsUnoccupied")
|
||||
|
@ -7,8 +7,6 @@
|
||||
set(NEO_CORE_HELPERS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/abort.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/addressing_mode_helper.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/addressing_mode_helper.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/address_patch.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/affinity_mask.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/aligned_memory.h
|
||||
|
@ -1,45 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/helpers/addressing_mode_helper.h"
|
||||
|
||||
#include "shared/source/compiler_interface/compiler_options/compiler_options_base.h"
|
||||
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||
#include "shared/source/helpers/compiler_hw_info_config.h"
|
||||
#include "shared/source/program/kernel_info.h"
|
||||
|
||||
namespace NEO::AddressingModeHelper {
|
||||
|
||||
bool forceToStatelessNeeded(const std::string &options, const std::string &smallerThan4GbBuffersOnlyOption, const HardwareInfo &hwInfo) {
|
||||
auto preferStateful = false;
|
||||
if (NEO::CompilerOptions::contains(options, smallerThan4GbBuffersOnlyOption)) {
|
||||
preferStateful = true;
|
||||
}
|
||||
if (NEO::DebugManager.flags.UseSmallerThan4gbBuffersOnly.get() != -1) {
|
||||
preferStateful = static_cast<bool>(NEO::DebugManager.flags.UseSmallerThan4gbBuffersOnly.get());
|
||||
}
|
||||
|
||||
const auto &compilerHwInfoConfig = *CompilerHwInfoConfig::get(hwInfo.platform.eProductFamily);
|
||||
auto forceStateless = !preferStateful && compilerHwInfoConfig.isForceToStatelessRequired();
|
||||
return forceStateless;
|
||||
}
|
||||
|
||||
bool containsStatefulAccess(const std::vector<KernelInfo *> &kernelInfos) {
|
||||
for (const auto &kernelInfo : kernelInfos) {
|
||||
for (const auto &arg : kernelInfo->kernelDescriptor.payloadMappings.explicitArgs) {
|
||||
auto isStatefulAccess = arg.is<NEO::ArgDescriptor::ArgTPointer>() &&
|
||||
(NEO::isValidOffset(arg.as<NEO::ArgDescPointer>().bindless) ||
|
||||
NEO::isValidOffset(arg.as<NEO::ArgDescPointer>().bindful));
|
||||
if (isStatefulAccess) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace NEO::AddressingModeHelper
|
@ -1,22 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace NEO {
|
||||
struct KernelInfo;
|
||||
struct HardwareInfo;
|
||||
|
||||
namespace AddressingModeHelper {
|
||||
bool forceToStatelessNeeded(const std::string &options, const std::string &smallerThan4GbBuffersOnlyOption, const HardwareInfo &hwInfo);
|
||||
bool containsStatefulAccess(const std::vector<KernelInfo *> &kernelInfos);
|
||||
|
||||
} // namespace AddressingModeHelper
|
||||
} // namespace NEO
|
Reference in New Issue
Block a user