Disable stateless to statefull with offset optimization on BDW.

- BDW doesn't support this optimization

Change-Id: Ic88556214c8d9a14ddb093b7c25587575e616f83
This commit is contained in:
Mrozek, Michal
2018-07-10 13:22:11 +02:00
committed by sys_ocldev
parent 2589286d42
commit 92266e4ad1
11 changed files with 54 additions and 26 deletions

View File

@@ -42,6 +42,7 @@ void HwHelperHw<Family>::setupHardwareCapabilities(HardwareCapabilities *caps) {
caps->image3DMaxHeight = 2048; caps->image3DMaxHeight = 2048;
caps->image3DMaxWidth = 2048; caps->image3DMaxWidth = 2048;
caps->maxMemAllocSize = 2 * MemoryConstants::gigaByte - 8 * MemoryConstants::megaByte; caps->maxMemAllocSize = 2 * MemoryConstants::gigaByte - 8 * MemoryConstants::megaByte;
caps->isStatelesToStatefullWithOffsetSupported = false;
} }
template class HwHelperHw<Family>; template class HwHelperHw<Family>;

View File

@@ -41,6 +41,7 @@ void HwHelperHw<Family>::setupHardwareCapabilities(HardwareCapabilities *caps) {
//With statefull messages we have an allocation cap of 4GB //With statefull messages we have an allocation cap of 4GB
//Reason to subtract 8KB is that driver may pad the buffer with addition pages for over fetching.. //Reason to subtract 8KB is that driver may pad the buffer with addition pages for over fetching..
caps->maxMemAllocSize = (4ULL * MemoryConstants::gigaByte) - (8ULL * MemoryConstants::kiloByte); caps->maxMemAllocSize = (4ULL * MemoryConstants::gigaByte) - (8ULL * MemoryConstants::kiloByte);
caps->isStatelesToStatefullWithOffsetSupported = true;
} }
template <typename Family> template <typename Family>
@@ -56,4 +57,5 @@ SipKernelType HwHelperHw<Family>::getSipKernelType(bool debuggingActive) {
} }
return SipKernelType::DbgCsr; return SipKernelType::DbgCsr;
} }
} // namespace OCLRT } // namespace OCLRT

View File

@@ -81,6 +81,7 @@ struct HardwareCapabilities {
size_t image3DMaxWidth; size_t image3DMaxWidth;
size_t image3DMaxHeight; size_t image3DMaxHeight;
uint64_t maxMemAllocSize; uint64_t maxMemAllocSize;
bool isStatelesToStatefullWithOffsetSupported;
}; };
struct HardwareInfo { struct HardwareInfo {

View File

@@ -80,7 +80,6 @@ DECLARE_DEBUG_VARIABLE(bool, EnableNV12, true, "Enables NV12 extension")
DECLARE_DEBUG_VARIABLE(bool, EnablePackedYuv, true, "Enables cl_packed_yuv extension") DECLARE_DEBUG_VARIABLE(bool, EnablePackedYuv, true, "Enables cl_packed_yuv extension")
DECLARE_DEBUG_VARIABLE(bool, EnableIntelVme, true, "Enables cl_intel_motion_estimation extension") DECLARE_DEBUG_VARIABLE(bool, EnableIntelVme, true, "Enables cl_intel_motion_estimation extension")
DECLARE_DEBUG_VARIABLE(bool, EnableIntelAdvancedVme, true, "Enables cl_intel_advanced_motion_estimation extension") DECLARE_DEBUG_VARIABLE(bool, EnableIntelAdvancedVme, true, "Enables cl_intel_advanced_motion_estimation extension")
DECLARE_DEBUG_VARIABLE(bool, EnableStatelessToStatefulBufferOffsetOpt, true, "Temporary debug variable to help in enabling buffer-offset improvement of the stateless to stateful optimization")
DECLARE_DEBUG_VARIABLE(bool, EnableDeferredDeleter, true, "Enables async deleter") DECLARE_DEBUG_VARIABLE(bool, EnableDeferredDeleter, true, "Enables async deleter")
DECLARE_DEBUG_VARIABLE(bool, EnableAsyncDestroyAllocations, true, "Enables async destroying graphics allocations in mem obj destructor") DECLARE_DEBUG_VARIABLE(bool, EnableAsyncDestroyAllocations, true, "Enables async destroying graphics allocations in mem obj destructor")
DECLARE_DEBUG_VARIABLE(bool, EnableAsyncEventsHandler, true, "Enables async events handler") DECLARE_DEBUG_VARIABLE(bool, EnableAsyncEventsHandler, true, "Enables async events handler")
@@ -88,6 +87,7 @@ DECLARE_DEBUG_VARIABLE(bool, EnableForcePin, true, "Enables early pinning for me
DECLARE_DEBUG_VARIABLE(bool, EnableComputeWorkSizeND, true, "Enables diffrent algorithm to compute local work size") DECLARE_DEBUG_VARIABLE(bool, EnableComputeWorkSizeND, true, "Enables diffrent algorithm to compute local work size")
DECLARE_DEBUG_VARIABLE(bool, EnableComputeWorkSizeSquared, false, "Enables algorithm to compute the most squared work group as possible") DECLARE_DEBUG_VARIABLE(bool, EnableComputeWorkSizeSquared, false, "Enables algorithm to compute the most squared work group as possible")
DECLARE_DEBUG_VARIABLE(bool, EnableVaLibCalls, true, "Enable cl-va sharing lib calls") DECLARE_DEBUG_VARIABLE(bool, EnableVaLibCalls, true, "Enable cl-va sharing lib calls")
DECLARE_DEBUG_VARIABLE(int32_t, EnableStatelessToStatefulBufferOffsetOpt, -1, "-1: dont override, 0: disable, 1: enable, Enables buffer-offset improvement of the stateless to stateful optimization")
DECLARE_DEBUG_VARIABLE(int32_t, CreateMultipleDevices, 0, "0: default - disable, 1+: Driver will create multiple (N) devices during initialization.") DECLARE_DEBUG_VARIABLE(int32_t, CreateMultipleDevices, 0, "0: default - disable, 1+: Driver will create multiple (N) devices during initialization.")
DECLARE_DEBUG_VARIABLE(int32_t, Enable64kbpages, -1, "-1: default behaviour, 0 Disables, 1 Enables support for 64KB pages for driver allocated fine grain svm buffers") DECLARE_DEBUG_VARIABLE(int32_t, Enable64kbpages, -1, "-1: default behaviour, 0 Disables, 1 Enables support for 64KB pages for driver allocated fine grain svm buffers")
DECLARE_DEBUG_VARIABLE(int32_t, OverrideEnableKmdNotify, -1, "-1: dont override, 0: disable, 1: enable") DECLARE_DEBUG_VARIABLE(int32_t, OverrideEnableKmdNotify, -1, "-1: dont override, 0: disable, 1: enable")

View File

@@ -25,6 +25,7 @@
#include "runtime/context/context.h" #include "runtime/context/context.h"
#include "runtime/helpers/debug_helpers.h" #include "runtime/helpers/debug_helpers.h"
#include "runtime/helpers/string.h" #include "runtime/helpers/string.h"
#include "runtime/helpers/hw_helper.h"
#include "runtime/memory_manager/memory_manager.h" #include "runtime/memory_manager/memory_manager.h"
#include "runtime/compiler_interface/compiler_interface.h" #include "runtime/compiler_interface/compiler_interface.h"
@@ -88,10 +89,17 @@ Program::Program(Context *context, bool isBuiltIn) : context(context), isBuiltIn
internalOptions += "-cl-intel-greater-than-4GB-buffer-required "; internalOptions += "-cl-intel-greater-than-4GB-buffer-required ";
} }
kernelDebugEnabled = pDevice->isSourceLevelDebuggerActive(); kernelDebugEnabled = pDevice->isSourceLevelDebuggerActive();
}
if (DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.get()) { HardwareCapabilities hwCaps = {0};
internalOptions += "-cl-intel-has-buffer-offset-arg "; HwHelper::get(pDevice->getHardwareInfo().pPlatform->eRenderCoreFamily).setupHardwareCapabilities(&hwCaps);
auto enableStatelessToStatefullWithOffset = hwCaps.isStatelesToStatefullWithOffsetSupported;
if (DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.get() != -1) {
enableStatelessToStatefullWithOffset = DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.get() != 0;
}
if (enableStatelessToStatefullWithOffset) {
internalOptions += "-cl-intel-has-buffer-offset-arg ";
}
} }
internalOptions += "-fpreserve-vec3-type "; internalOptions += "-fpreserve-vec3-type ";

View File

@@ -66,4 +66,5 @@ GEN8TEST_F(HwHelperTestBdw, givenGen8PlatformWhenSetupHardwareCapabilitiesIsCall
EXPECT_EQ(2048u, hwCaps.image3DMaxHeight); EXPECT_EQ(2048u, hwCaps.image3DMaxHeight);
EXPECT_EQ(2048u, hwCaps.image3DMaxWidth); EXPECT_EQ(2048u, hwCaps.image3DMaxWidth);
EXPECT_EQ(2 * MemoryConstants::gigaByte - 8 * MemoryConstants::megaByte, hwCaps.maxMemAllocSize); EXPECT_EQ(2 * MemoryConstants::gigaByte - 8 * MemoryConstants::megaByte, hwCaps.maxMemAllocSize);
} EXPECT_FALSE(hwCaps.isStatelesToStatefullWithOffsetSupported);
}

View File

@@ -70,4 +70,4 @@ GEN9TEST_F(HwHelperTestSkl, givenDebuggingActiveWhenSipKernelTypeIsQueriedThenDb
auto sipType = helper.getSipKernelType(true); auto sipType = helper.getSipKernelType(true);
EXPECT_EQ(SipKernelType::DbgCsrLocal, sipType); EXPECT_EQ(SipKernelType::DbgCsrLocal, sipType);
} }

View File

@@ -30,4 +30,5 @@ void testDefaultImplementationOfSetupHardwareCapabilities(HwHelper &hwHelper) {
EXPECT_EQ(16384u, hwCaps.image3DMaxHeight); EXPECT_EQ(16384u, hwCaps.image3DMaxHeight);
EXPECT_EQ(16384u, hwCaps.image3DMaxWidth); EXPECT_EQ(16384u, hwCaps.image3DMaxWidth);
EXPECT_TRUE(hwCaps.isStatelesToStatefullWithOffsetSupported);
} }

View File

@@ -27,6 +27,7 @@
#include "runtime/helpers/file_io.h" #include "runtime/helpers/file_io.h"
#include "runtime/helpers/options.h" #include "runtime/helpers/options.h"
#include "runtime/os_interface/debug_settings_manager.h" #include "runtime/os_interface/debug_settings_manager.h"
#include "unit_tests/helpers/debug_manager_state_restore.h"
#include "gmock/gmock.h" #include "gmock/gmock.h"
#include <algorithm> #include <algorithm>
@@ -362,22 +363,16 @@ TEST(OfflineCompilerTest, parseCmdLine) {
delete mockOfflineCompiler; delete mockOfflineCompiler;
} }
TEST(OfflineCompilerTest, parseDebugSettings) { TEST(OfflineCompilerTest, givenStatelessToStatefullOptimizationEnabledWhenDebugSettingsAreParsedThenOptimizationStringIsPresent) {
MockOfflineCompiler *mockOfflineCompiler = new MockOfflineCompiler(); DebugManagerStateRestore stateRestore;
ASSERT_NE(nullptr, mockOfflineCompiler); MockOfflineCompiler mockOfflineCompiler;
DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.set(1);
bool isBufferOffsetOpt = DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.get(); mockOfflineCompiler.parseDebugSettings();
DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.set(true);
mockOfflineCompiler->parseDebugSettings(); std::string internalOptions = mockOfflineCompiler.getInternalOptions();
DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.set(isBufferOffsetOpt);
std::string internalOptions = mockOfflineCompiler->getInternalOptions();
size_t found = internalOptions.find("-cl-intel-has-buffer-offset-arg"); size_t found = internalOptions.find("-cl-intel-has-buffer-offset-arg");
EXPECT_NE(std::string::npos, found); EXPECT_NE(std::string::npos, found);
delete mockOfflineCompiler;
} }
TEST(OfflineCompilerTest, getStringWithinDelimiters) { TEST(OfflineCompilerTest, getStringWithinDelimiters) {

View File

@@ -26,6 +26,7 @@
#include "runtime/indirect_heap/indirect_heap.h" #include "runtime/indirect_heap/indirect_heap.h"
#include "runtime/helpers/aligned_memory.h" #include "runtime/helpers/aligned_memory.h"
#include "runtime/helpers/hash.h" #include "runtime/helpers/hash.h"
#include "runtime/helpers/hw_helper.h"
#include "runtime/helpers/kernel_commands.h" #include "runtime/helpers/kernel_commands.h"
#include "runtime/helpers/ptr_math.h" #include "runtime/helpers/ptr_math.h"
#include "runtime/helpers/string.h" #include "runtime/helpers/string.h"
@@ -1866,15 +1867,26 @@ TEST_F(ProgramTests, BuiltinProgramCreateSetsProperInternalOptionsWhenForcing32B
TEST_F(ProgramTests, BuiltinProgramCreateSetsProperInternalOptionsEnablingStatelessToStatefulBufferOffsetOptimization) { TEST_F(ProgramTests, BuiltinProgramCreateSetsProperInternalOptionsEnablingStatelessToStatefulBufferOffsetOptimization) {
DebugManagerStateRestore dbgRestorer; DebugManagerStateRestore dbgRestorer;
DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.set(true); DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.set(1);
std::unique_ptr<MockProgram> pProgram(Program::create<MockProgram>("", pContext, *pDevice, true, nullptr)); cl_int errorCode = CL_SUCCESS;
const char programSource[] = "program";
const char *programPointer = programSource;
const char **programSources = reinterpret_cast<const char **>(&programPointer);
size_t length = sizeof(programSource);
std::unique_ptr<MockProgram> pProgram(Program::create<MockProgram>(pContext, 1u, programSources, &length, errorCode));
EXPECT_THAT(pProgram->getInternalOptions(), testing::HasSubstr(std::string("-cl-intel-has-buffer-offset-arg "))); EXPECT_THAT(pProgram->getInternalOptions(), testing::HasSubstr(std::string("-cl-intel-has-buffer-offset-arg ")));
} }
TEST_F(ProgramTests, givenStatelessToStatefullOptimizationOffWHenProgramIsCreatedThenOptimizationStringIsNotPresent) { TEST_F(ProgramTests, givenStatelessToStatefullOptimizationOffWHenProgramIsCreatedThenOptimizationStringIsNotPresent) {
DebugManagerStateRestore dbgRestorer; DebugManagerStateRestore dbgRestorer;
DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.set(false); DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.set(0);
std::unique_ptr<MockProgram> pProgram(Program::create<MockProgram>("", pContext, *pDevice, true, nullptr)); cl_int errorCode = CL_SUCCESS;
const char programSource[] = "program";
const char *programPointer = programSource;
const char **programSources = reinterpret_cast<const char **>(&programPointer);
size_t length = sizeof(programSource);
std::unique_ptr<MockProgram> pProgram(Program::create<MockProgram>(pContext, 1u, programSources, &length, errorCode));
EXPECT_THAT(pProgram->getInternalOptions(), Not(testing::HasSubstr(std::string("-cl-intel-has-buffer-offset-arg ")))); EXPECT_THAT(pProgram->getInternalOptions(), Not(testing::HasSubstr(std::string("-cl-intel-has-buffer-offset-arg "))));
} }
@@ -2659,11 +2671,18 @@ TEST_F(Program32BitTests, givenDeviceWhenProgramIsCreatedThenProgramCountInDevic
EXPECT_EQ(1u, device->getProgramCount()); EXPECT_EQ(1u, device->getProgramCount());
} }
TEST_F(ProgramTests, givenNewProgramTheStatelessToStatefulBufferOffsetOtimizationIsDisabled) { TEST_F(ProgramTests, givenNewProgramTheStatelessToStatefulBufferOffsetOtimizationIsMatchingThePlatformEnablingStatus) {
MockProgram prog; MockProgram prog(pContext, false);
auto &internalOpts = prog.getInternalOptions(); auto &internalOpts = prog.getInternalOptions();
auto it = internalOpts.find("-cl-intel-has-buffer-offset-arg "); auto it = internalOpts.find("-cl-intel-has-buffer-offset-arg ");
EXPECT_NE(std::string::npos, it);
HardwareCapabilities hwCaps = {0};
HwHelper::get(prog.getDevice(0).getHardwareInfo().pPlatform->eRenderCoreFamily).setupHardwareCapabilities(&hwCaps);
if (hwCaps.isStatelesToStatefullWithOffsetSupported) {
EXPECT_NE(std::string::npos, it);
} else {
EXPECT_EQ(std::string::npos, it);
}
} }
template <int32_t ErrCodeToReturn, bool spirv = true> template <int32_t ErrCodeToReturn, bool spirv = true>

View File

@@ -31,7 +31,7 @@ SchedulerSimulationReturnInstance = 0
DisableConcurrentBlockExecution = 0 DisableConcurrentBlockExecution = 0
ResidencyDebugEnable = 0 ResidencyDebugEnable = 0
ForcePreemptionMode = -1 ForcePreemptionMode = -1
EnableStatelessToStatefulBufferOffsetOpt = 1 EnableStatelessToStatefulBufferOffsetOpt = -1
TbxPort = 4321 TbxPort = 4321
TbxServer = 127.0.0.1 TbxServer = 127.0.0.1
EnableDeferredDeleter = 1 EnableDeferredDeleter = 1