Disable stateless to statefull with offset optimization on BDW.

- BDW doesn't support this optimization

Change-Id: Ic88556214c8d9a14ddb093b7c25587575e616f83
This commit is contained in:
Mrozek, Michal
2018-07-10 13:22:11 +02:00
committed by sys_ocldev
parent 2589286d42
commit 92266e4ad1
11 changed files with 54 additions and 26 deletions

View File

@@ -42,6 +42,7 @@ void HwHelperHw<Family>::setupHardwareCapabilities(HardwareCapabilities *caps) {
caps->image3DMaxHeight = 2048;
caps->image3DMaxWidth = 2048;
caps->maxMemAllocSize = 2 * MemoryConstants::gigaByte - 8 * MemoryConstants::megaByte;
caps->isStatelesToStatefullWithOffsetSupported = false;
}
template class HwHelperHw<Family>;

View File

@@ -41,6 +41,7 @@ void HwHelperHw<Family>::setupHardwareCapabilities(HardwareCapabilities *caps) {
//With statefull messages we have an allocation cap of 4GB
//Reason to subtract 8KB is that driver may pad the buffer with addition pages for over fetching..
caps->maxMemAllocSize = (4ULL * MemoryConstants::gigaByte) - (8ULL * MemoryConstants::kiloByte);
caps->isStatelesToStatefullWithOffsetSupported = true;
}
template <typename Family>
@@ -56,4 +57,5 @@ SipKernelType HwHelperHw<Family>::getSipKernelType(bool debuggingActive) {
}
return SipKernelType::DbgCsr;
}
} // namespace OCLRT

View File

@@ -81,6 +81,7 @@ struct HardwareCapabilities {
size_t image3DMaxWidth;
size_t image3DMaxHeight;
uint64_t maxMemAllocSize;
bool isStatelesToStatefullWithOffsetSupported;
};
struct HardwareInfo {

View File

@@ -80,7 +80,6 @@ DECLARE_DEBUG_VARIABLE(bool, EnableNV12, true, "Enables NV12 extension")
DECLARE_DEBUG_VARIABLE(bool, EnablePackedYuv, true, "Enables cl_packed_yuv extension")
DECLARE_DEBUG_VARIABLE(bool, EnableIntelVme, true, "Enables cl_intel_motion_estimation extension")
DECLARE_DEBUG_VARIABLE(bool, EnableIntelAdvancedVme, true, "Enables cl_intel_advanced_motion_estimation extension")
DECLARE_DEBUG_VARIABLE(bool, EnableStatelessToStatefulBufferOffsetOpt, true, "Temporary debug variable to help in enabling buffer-offset improvement of the stateless to stateful optimization")
DECLARE_DEBUG_VARIABLE(bool, EnableDeferredDeleter, true, "Enables async deleter")
DECLARE_DEBUG_VARIABLE(bool, EnableAsyncDestroyAllocations, true, "Enables async destroying graphics allocations in mem obj destructor")
DECLARE_DEBUG_VARIABLE(bool, EnableAsyncEventsHandler, true, "Enables async events handler")
@@ -88,6 +87,7 @@ DECLARE_DEBUG_VARIABLE(bool, EnableForcePin, true, "Enables early pinning for me
DECLARE_DEBUG_VARIABLE(bool, EnableComputeWorkSizeND, true, "Enables diffrent algorithm to compute local work size")
DECLARE_DEBUG_VARIABLE(bool, EnableComputeWorkSizeSquared, false, "Enables algorithm to compute the most squared work group as possible")
DECLARE_DEBUG_VARIABLE(bool, EnableVaLibCalls, true, "Enable cl-va sharing lib calls")
DECLARE_DEBUG_VARIABLE(int32_t, EnableStatelessToStatefulBufferOffsetOpt, -1, "-1: dont override, 0: disable, 1: enable, Enables buffer-offset improvement of the stateless to stateful optimization")
DECLARE_DEBUG_VARIABLE(int32_t, CreateMultipleDevices, 0, "0: default - disable, 1+: Driver will create multiple (N) devices during initialization.")
DECLARE_DEBUG_VARIABLE(int32_t, Enable64kbpages, -1, "-1: default behaviour, 0 Disables, 1 Enables support for 64KB pages for driver allocated fine grain svm buffers")
DECLARE_DEBUG_VARIABLE(int32_t, OverrideEnableKmdNotify, -1, "-1: dont override, 0: disable, 1: enable")

View File

@@ -25,6 +25,7 @@
#include "runtime/context/context.h"
#include "runtime/helpers/debug_helpers.h"
#include "runtime/helpers/string.h"
#include "runtime/helpers/hw_helper.h"
#include "runtime/memory_manager/memory_manager.h"
#include "runtime/compiler_interface/compiler_interface.h"
@@ -88,10 +89,17 @@ Program::Program(Context *context, bool isBuiltIn) : context(context), isBuiltIn
internalOptions += "-cl-intel-greater-than-4GB-buffer-required ";
}
kernelDebugEnabled = pDevice->isSourceLevelDebuggerActive();
}
if (DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.get()) {
internalOptions += "-cl-intel-has-buffer-offset-arg ";
HardwareCapabilities hwCaps = {0};
HwHelper::get(pDevice->getHardwareInfo().pPlatform->eRenderCoreFamily).setupHardwareCapabilities(&hwCaps);
auto enableStatelessToStatefullWithOffset = hwCaps.isStatelesToStatefullWithOffsetSupported;
if (DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.get() != -1) {
enableStatelessToStatefullWithOffset = DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.get() != 0;
}
if (enableStatelessToStatefullWithOffset) {
internalOptions += "-cl-intel-has-buffer-offset-arg ";
}
}
internalOptions += "-fpreserve-vec3-type ";

View File

@@ -66,4 +66,5 @@ GEN8TEST_F(HwHelperTestBdw, givenGen8PlatformWhenSetupHardwareCapabilitiesIsCall
EXPECT_EQ(2048u, hwCaps.image3DMaxHeight);
EXPECT_EQ(2048u, hwCaps.image3DMaxWidth);
EXPECT_EQ(2 * MemoryConstants::gigaByte - 8 * MemoryConstants::megaByte, hwCaps.maxMemAllocSize);
}
EXPECT_FALSE(hwCaps.isStatelesToStatefullWithOffsetSupported);
}

View File

@@ -70,4 +70,4 @@ GEN9TEST_F(HwHelperTestSkl, givenDebuggingActiveWhenSipKernelTypeIsQueriedThenDb
auto sipType = helper.getSipKernelType(true);
EXPECT_EQ(SipKernelType::DbgCsrLocal, sipType);
}
}

View File

@@ -30,4 +30,5 @@ void testDefaultImplementationOfSetupHardwareCapabilities(HwHelper &hwHelper) {
EXPECT_EQ(16384u, hwCaps.image3DMaxHeight);
EXPECT_EQ(16384u, hwCaps.image3DMaxWidth);
EXPECT_TRUE(hwCaps.isStatelesToStatefullWithOffsetSupported);
}

View File

@@ -27,6 +27,7 @@
#include "runtime/helpers/file_io.h"
#include "runtime/helpers/options.h"
#include "runtime/os_interface/debug_settings_manager.h"
#include "unit_tests/helpers/debug_manager_state_restore.h"
#include "gmock/gmock.h"
#include <algorithm>
@@ -362,22 +363,16 @@ TEST(OfflineCompilerTest, parseCmdLine) {
delete mockOfflineCompiler;
}
TEST(OfflineCompilerTest, parseDebugSettings) {
MockOfflineCompiler *mockOfflineCompiler = new MockOfflineCompiler();
ASSERT_NE(nullptr, mockOfflineCompiler);
TEST(OfflineCompilerTest, givenStatelessToStatefullOptimizationEnabledWhenDebugSettingsAreParsedThenOptimizationStringIsPresent) {
DebugManagerStateRestore stateRestore;
MockOfflineCompiler mockOfflineCompiler;
DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.set(1);
bool isBufferOffsetOpt = DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.get();
DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.set(true);
mockOfflineCompiler.parseDebugSettings();
mockOfflineCompiler->parseDebugSettings();
DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.set(isBufferOffsetOpt);
std::string internalOptions = mockOfflineCompiler->getInternalOptions();
std::string internalOptions = mockOfflineCompiler.getInternalOptions();
size_t found = internalOptions.find("-cl-intel-has-buffer-offset-arg");
EXPECT_NE(std::string::npos, found);
delete mockOfflineCompiler;
}
TEST(OfflineCompilerTest, getStringWithinDelimiters) {

View File

@@ -26,6 +26,7 @@
#include "runtime/indirect_heap/indirect_heap.h"
#include "runtime/helpers/aligned_memory.h"
#include "runtime/helpers/hash.h"
#include "runtime/helpers/hw_helper.h"
#include "runtime/helpers/kernel_commands.h"
#include "runtime/helpers/ptr_math.h"
#include "runtime/helpers/string.h"
@@ -1866,15 +1867,26 @@ TEST_F(ProgramTests, BuiltinProgramCreateSetsProperInternalOptionsWhenForcing32B
TEST_F(ProgramTests, BuiltinProgramCreateSetsProperInternalOptionsEnablingStatelessToStatefulBufferOffsetOptimization) {
DebugManagerStateRestore dbgRestorer;
DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.set(true);
std::unique_ptr<MockProgram> pProgram(Program::create<MockProgram>("", pContext, *pDevice, true, nullptr));
DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.set(1);
cl_int errorCode = CL_SUCCESS;
const char programSource[] = "program";
const char *programPointer = programSource;
const char **programSources = reinterpret_cast<const char **>(&programPointer);
size_t length = sizeof(programSource);
std::unique_ptr<MockProgram> pProgram(Program::create<MockProgram>(pContext, 1u, programSources, &length, errorCode));
EXPECT_THAT(pProgram->getInternalOptions(), testing::HasSubstr(std::string("-cl-intel-has-buffer-offset-arg ")));
}
TEST_F(ProgramTests, givenStatelessToStatefullOptimizationOffWHenProgramIsCreatedThenOptimizationStringIsNotPresent) {
DebugManagerStateRestore dbgRestorer;
DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.set(false);
std::unique_ptr<MockProgram> pProgram(Program::create<MockProgram>("", pContext, *pDevice, true, nullptr));
DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.set(0);
cl_int errorCode = CL_SUCCESS;
const char programSource[] = "program";
const char *programPointer = programSource;
const char **programSources = reinterpret_cast<const char **>(&programPointer);
size_t length = sizeof(programSource);
std::unique_ptr<MockProgram> pProgram(Program::create<MockProgram>(pContext, 1u, programSources, &length, errorCode));
EXPECT_THAT(pProgram->getInternalOptions(), Not(testing::HasSubstr(std::string("-cl-intel-has-buffer-offset-arg "))));
}
@@ -2659,11 +2671,18 @@ TEST_F(Program32BitTests, givenDeviceWhenProgramIsCreatedThenProgramCountInDevic
EXPECT_EQ(1u, device->getProgramCount());
}
TEST_F(ProgramTests, givenNewProgramTheStatelessToStatefulBufferOffsetOtimizationIsDisabled) {
MockProgram prog;
TEST_F(ProgramTests, givenNewProgramTheStatelessToStatefulBufferOffsetOtimizationIsMatchingThePlatformEnablingStatus) {
MockProgram prog(pContext, false);
auto &internalOpts = prog.getInternalOptions();
auto it = internalOpts.find("-cl-intel-has-buffer-offset-arg ");
EXPECT_NE(std::string::npos, it);
HardwareCapabilities hwCaps = {0};
HwHelper::get(prog.getDevice(0).getHardwareInfo().pPlatform->eRenderCoreFamily).setupHardwareCapabilities(&hwCaps);
if (hwCaps.isStatelesToStatefullWithOffsetSupported) {
EXPECT_NE(std::string::npos, it);
} else {
EXPECT_EQ(std::string::npos, it);
}
}
template <int32_t ErrCodeToReturn, bool spirv = true>

View File

@@ -31,7 +31,7 @@ SchedulerSimulationReturnInstance = 0
DisableConcurrentBlockExecution = 0
ResidencyDebugEnable = 0
ForcePreemptionMode = -1
EnableStatelessToStatefulBufferOffsetOpt = 1
EnableStatelessToStatefulBufferOffsetOpt = -1
TbxPort = 4321
TbxServer = 127.0.0.1
EnableDeferredDeleter = 1