mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-19 16:24:18 +08:00
Disable stateless to statefull with offset optimization on BDW.
- BDW doesn't support this optimization Change-Id: Ic88556214c8d9a14ddb093b7c25587575e616f83
This commit is contained in:
committed by
sys_ocldev
parent
2589286d42
commit
92266e4ad1
@@ -42,6 +42,7 @@ void HwHelperHw<Family>::setupHardwareCapabilities(HardwareCapabilities *caps) {
|
|||||||
caps->image3DMaxHeight = 2048;
|
caps->image3DMaxHeight = 2048;
|
||||||
caps->image3DMaxWidth = 2048;
|
caps->image3DMaxWidth = 2048;
|
||||||
caps->maxMemAllocSize = 2 * MemoryConstants::gigaByte - 8 * MemoryConstants::megaByte;
|
caps->maxMemAllocSize = 2 * MemoryConstants::gigaByte - 8 * MemoryConstants::megaByte;
|
||||||
|
caps->isStatelesToStatefullWithOffsetSupported = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
template class HwHelperHw<Family>;
|
template class HwHelperHw<Family>;
|
||||||
|
|||||||
@@ -41,6 +41,7 @@ void HwHelperHw<Family>::setupHardwareCapabilities(HardwareCapabilities *caps) {
|
|||||||
//With statefull messages we have an allocation cap of 4GB
|
//With statefull messages we have an allocation cap of 4GB
|
||||||
//Reason to subtract 8KB is that driver may pad the buffer with addition pages for over fetching..
|
//Reason to subtract 8KB is that driver may pad the buffer with addition pages for over fetching..
|
||||||
caps->maxMemAllocSize = (4ULL * MemoryConstants::gigaByte) - (8ULL * MemoryConstants::kiloByte);
|
caps->maxMemAllocSize = (4ULL * MemoryConstants::gigaByte) - (8ULL * MemoryConstants::kiloByte);
|
||||||
|
caps->isStatelesToStatefullWithOffsetSupported = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Family>
|
template <typename Family>
|
||||||
@@ -56,4 +57,5 @@ SipKernelType HwHelperHw<Family>::getSipKernelType(bool debuggingActive) {
|
|||||||
}
|
}
|
||||||
return SipKernelType::DbgCsr;
|
return SipKernelType::DbgCsr;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace OCLRT
|
} // namespace OCLRT
|
||||||
|
|||||||
@@ -81,6 +81,7 @@ struct HardwareCapabilities {
|
|||||||
size_t image3DMaxWidth;
|
size_t image3DMaxWidth;
|
||||||
size_t image3DMaxHeight;
|
size_t image3DMaxHeight;
|
||||||
uint64_t maxMemAllocSize;
|
uint64_t maxMemAllocSize;
|
||||||
|
bool isStatelesToStatefullWithOffsetSupported;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct HardwareInfo {
|
struct HardwareInfo {
|
||||||
|
|||||||
@@ -80,7 +80,6 @@ DECLARE_DEBUG_VARIABLE(bool, EnableNV12, true, "Enables NV12 extension")
|
|||||||
DECLARE_DEBUG_VARIABLE(bool, EnablePackedYuv, true, "Enables cl_packed_yuv extension")
|
DECLARE_DEBUG_VARIABLE(bool, EnablePackedYuv, true, "Enables cl_packed_yuv extension")
|
||||||
DECLARE_DEBUG_VARIABLE(bool, EnableIntelVme, true, "Enables cl_intel_motion_estimation extension")
|
DECLARE_DEBUG_VARIABLE(bool, EnableIntelVme, true, "Enables cl_intel_motion_estimation extension")
|
||||||
DECLARE_DEBUG_VARIABLE(bool, EnableIntelAdvancedVme, true, "Enables cl_intel_advanced_motion_estimation extension")
|
DECLARE_DEBUG_VARIABLE(bool, EnableIntelAdvancedVme, true, "Enables cl_intel_advanced_motion_estimation extension")
|
||||||
DECLARE_DEBUG_VARIABLE(bool, EnableStatelessToStatefulBufferOffsetOpt, true, "Temporary debug variable to help in enabling buffer-offset improvement of the stateless to stateful optimization")
|
|
||||||
DECLARE_DEBUG_VARIABLE(bool, EnableDeferredDeleter, true, "Enables async deleter")
|
DECLARE_DEBUG_VARIABLE(bool, EnableDeferredDeleter, true, "Enables async deleter")
|
||||||
DECLARE_DEBUG_VARIABLE(bool, EnableAsyncDestroyAllocations, true, "Enables async destroying graphics allocations in mem obj destructor")
|
DECLARE_DEBUG_VARIABLE(bool, EnableAsyncDestroyAllocations, true, "Enables async destroying graphics allocations in mem obj destructor")
|
||||||
DECLARE_DEBUG_VARIABLE(bool, EnableAsyncEventsHandler, true, "Enables async events handler")
|
DECLARE_DEBUG_VARIABLE(bool, EnableAsyncEventsHandler, true, "Enables async events handler")
|
||||||
@@ -88,6 +87,7 @@ DECLARE_DEBUG_VARIABLE(bool, EnableForcePin, true, "Enables early pinning for me
|
|||||||
DECLARE_DEBUG_VARIABLE(bool, EnableComputeWorkSizeND, true, "Enables diffrent algorithm to compute local work size")
|
DECLARE_DEBUG_VARIABLE(bool, EnableComputeWorkSizeND, true, "Enables diffrent algorithm to compute local work size")
|
||||||
DECLARE_DEBUG_VARIABLE(bool, EnableComputeWorkSizeSquared, false, "Enables algorithm to compute the most squared work group as possible")
|
DECLARE_DEBUG_VARIABLE(bool, EnableComputeWorkSizeSquared, false, "Enables algorithm to compute the most squared work group as possible")
|
||||||
DECLARE_DEBUG_VARIABLE(bool, EnableVaLibCalls, true, "Enable cl-va sharing lib calls")
|
DECLARE_DEBUG_VARIABLE(bool, EnableVaLibCalls, true, "Enable cl-va sharing lib calls")
|
||||||
|
DECLARE_DEBUG_VARIABLE(int32_t, EnableStatelessToStatefulBufferOffsetOpt, -1, "-1: dont override, 0: disable, 1: enable, Enables buffer-offset improvement of the stateless to stateful optimization")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, CreateMultipleDevices, 0, "0: default - disable, 1+: Driver will create multiple (N) devices during initialization.")
|
DECLARE_DEBUG_VARIABLE(int32_t, CreateMultipleDevices, 0, "0: default - disable, 1+: Driver will create multiple (N) devices during initialization.")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, Enable64kbpages, -1, "-1: default behaviour, 0 Disables, 1 Enables support for 64KB pages for driver allocated fine grain svm buffers")
|
DECLARE_DEBUG_VARIABLE(int32_t, Enable64kbpages, -1, "-1: default behaviour, 0 Disables, 1 Enables support for 64KB pages for driver allocated fine grain svm buffers")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, OverrideEnableKmdNotify, -1, "-1: dont override, 0: disable, 1: enable")
|
DECLARE_DEBUG_VARIABLE(int32_t, OverrideEnableKmdNotify, -1, "-1: dont override, 0: disable, 1: enable")
|
||||||
|
|||||||
@@ -25,6 +25,7 @@
|
|||||||
#include "runtime/context/context.h"
|
#include "runtime/context/context.h"
|
||||||
#include "runtime/helpers/debug_helpers.h"
|
#include "runtime/helpers/debug_helpers.h"
|
||||||
#include "runtime/helpers/string.h"
|
#include "runtime/helpers/string.h"
|
||||||
|
#include "runtime/helpers/hw_helper.h"
|
||||||
#include "runtime/memory_manager/memory_manager.h"
|
#include "runtime/memory_manager/memory_manager.h"
|
||||||
#include "runtime/compiler_interface/compiler_interface.h"
|
#include "runtime/compiler_interface/compiler_interface.h"
|
||||||
|
|
||||||
@@ -88,10 +89,17 @@ Program::Program(Context *context, bool isBuiltIn) : context(context), isBuiltIn
|
|||||||
internalOptions += "-cl-intel-greater-than-4GB-buffer-required ";
|
internalOptions += "-cl-intel-greater-than-4GB-buffer-required ";
|
||||||
}
|
}
|
||||||
kernelDebugEnabled = pDevice->isSourceLevelDebuggerActive();
|
kernelDebugEnabled = pDevice->isSourceLevelDebuggerActive();
|
||||||
}
|
|
||||||
|
|
||||||
if (DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.get()) {
|
HardwareCapabilities hwCaps = {0};
|
||||||
internalOptions += "-cl-intel-has-buffer-offset-arg ";
|
HwHelper::get(pDevice->getHardwareInfo().pPlatform->eRenderCoreFamily).setupHardwareCapabilities(&hwCaps);
|
||||||
|
auto enableStatelessToStatefullWithOffset = hwCaps.isStatelesToStatefullWithOffsetSupported;
|
||||||
|
if (DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.get() != -1) {
|
||||||
|
enableStatelessToStatefullWithOffset = DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.get() != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (enableStatelessToStatefullWithOffset) {
|
||||||
|
internalOptions += "-cl-intel-has-buffer-offset-arg ";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
internalOptions += "-fpreserve-vec3-type ";
|
internalOptions += "-fpreserve-vec3-type ";
|
||||||
|
|||||||
@@ -66,4 +66,5 @@ GEN8TEST_F(HwHelperTestBdw, givenGen8PlatformWhenSetupHardwareCapabilitiesIsCall
|
|||||||
EXPECT_EQ(2048u, hwCaps.image3DMaxHeight);
|
EXPECT_EQ(2048u, hwCaps.image3DMaxHeight);
|
||||||
EXPECT_EQ(2048u, hwCaps.image3DMaxWidth);
|
EXPECT_EQ(2048u, hwCaps.image3DMaxWidth);
|
||||||
EXPECT_EQ(2 * MemoryConstants::gigaByte - 8 * MemoryConstants::megaByte, hwCaps.maxMemAllocSize);
|
EXPECT_EQ(2 * MemoryConstants::gigaByte - 8 * MemoryConstants::megaByte, hwCaps.maxMemAllocSize);
|
||||||
}
|
EXPECT_FALSE(hwCaps.isStatelesToStatefullWithOffsetSupported);
|
||||||
|
}
|
||||||
@@ -70,4 +70,4 @@ GEN9TEST_F(HwHelperTestSkl, givenDebuggingActiveWhenSipKernelTypeIsQueriedThenDb
|
|||||||
|
|
||||||
auto sipType = helper.getSipKernelType(true);
|
auto sipType = helper.getSipKernelType(true);
|
||||||
EXPECT_EQ(SipKernelType::DbgCsrLocal, sipType);
|
EXPECT_EQ(SipKernelType::DbgCsrLocal, sipType);
|
||||||
}
|
}
|
||||||
@@ -30,4 +30,5 @@ void testDefaultImplementationOfSetupHardwareCapabilities(HwHelper &hwHelper) {
|
|||||||
|
|
||||||
EXPECT_EQ(16384u, hwCaps.image3DMaxHeight);
|
EXPECT_EQ(16384u, hwCaps.image3DMaxHeight);
|
||||||
EXPECT_EQ(16384u, hwCaps.image3DMaxWidth);
|
EXPECT_EQ(16384u, hwCaps.image3DMaxWidth);
|
||||||
|
EXPECT_TRUE(hwCaps.isStatelesToStatefullWithOffsetSupported);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -27,6 +27,7 @@
|
|||||||
#include "runtime/helpers/file_io.h"
|
#include "runtime/helpers/file_io.h"
|
||||||
#include "runtime/helpers/options.h"
|
#include "runtime/helpers/options.h"
|
||||||
#include "runtime/os_interface/debug_settings_manager.h"
|
#include "runtime/os_interface/debug_settings_manager.h"
|
||||||
|
#include "unit_tests/helpers/debug_manager_state_restore.h"
|
||||||
#include "gmock/gmock.h"
|
#include "gmock/gmock.h"
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
@@ -362,22 +363,16 @@ TEST(OfflineCompilerTest, parseCmdLine) {
|
|||||||
delete mockOfflineCompiler;
|
delete mockOfflineCompiler;
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(OfflineCompilerTest, parseDebugSettings) {
|
TEST(OfflineCompilerTest, givenStatelessToStatefullOptimizationEnabledWhenDebugSettingsAreParsedThenOptimizationStringIsPresent) {
|
||||||
MockOfflineCompiler *mockOfflineCompiler = new MockOfflineCompiler();
|
DebugManagerStateRestore stateRestore;
|
||||||
ASSERT_NE(nullptr, mockOfflineCompiler);
|
MockOfflineCompiler mockOfflineCompiler;
|
||||||
|
DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.set(1);
|
||||||
|
|
||||||
bool isBufferOffsetOpt = DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.get();
|
mockOfflineCompiler.parseDebugSettings();
|
||||||
DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.set(true);
|
|
||||||
|
|
||||||
mockOfflineCompiler->parseDebugSettings();
|
std::string internalOptions = mockOfflineCompiler.getInternalOptions();
|
||||||
|
|
||||||
DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.set(isBufferOffsetOpt);
|
|
||||||
|
|
||||||
std::string internalOptions = mockOfflineCompiler->getInternalOptions();
|
|
||||||
size_t found = internalOptions.find("-cl-intel-has-buffer-offset-arg");
|
size_t found = internalOptions.find("-cl-intel-has-buffer-offset-arg");
|
||||||
EXPECT_NE(std::string::npos, found);
|
EXPECT_NE(std::string::npos, found);
|
||||||
|
|
||||||
delete mockOfflineCompiler;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(OfflineCompilerTest, getStringWithinDelimiters) {
|
TEST(OfflineCompilerTest, getStringWithinDelimiters) {
|
||||||
|
|||||||
@@ -26,6 +26,7 @@
|
|||||||
#include "runtime/indirect_heap/indirect_heap.h"
|
#include "runtime/indirect_heap/indirect_heap.h"
|
||||||
#include "runtime/helpers/aligned_memory.h"
|
#include "runtime/helpers/aligned_memory.h"
|
||||||
#include "runtime/helpers/hash.h"
|
#include "runtime/helpers/hash.h"
|
||||||
|
#include "runtime/helpers/hw_helper.h"
|
||||||
#include "runtime/helpers/kernel_commands.h"
|
#include "runtime/helpers/kernel_commands.h"
|
||||||
#include "runtime/helpers/ptr_math.h"
|
#include "runtime/helpers/ptr_math.h"
|
||||||
#include "runtime/helpers/string.h"
|
#include "runtime/helpers/string.h"
|
||||||
@@ -1866,15 +1867,26 @@ TEST_F(ProgramTests, BuiltinProgramCreateSetsProperInternalOptionsWhenForcing32B
|
|||||||
|
|
||||||
TEST_F(ProgramTests, BuiltinProgramCreateSetsProperInternalOptionsEnablingStatelessToStatefulBufferOffsetOptimization) {
|
TEST_F(ProgramTests, BuiltinProgramCreateSetsProperInternalOptionsEnablingStatelessToStatefulBufferOffsetOptimization) {
|
||||||
DebugManagerStateRestore dbgRestorer;
|
DebugManagerStateRestore dbgRestorer;
|
||||||
DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.set(true);
|
DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.set(1);
|
||||||
std::unique_ptr<MockProgram> pProgram(Program::create<MockProgram>("", pContext, *pDevice, true, nullptr));
|
cl_int errorCode = CL_SUCCESS;
|
||||||
|
const char programSource[] = "program";
|
||||||
|
const char *programPointer = programSource;
|
||||||
|
const char **programSources = reinterpret_cast<const char **>(&programPointer);
|
||||||
|
size_t length = sizeof(programSource);
|
||||||
|
std::unique_ptr<MockProgram> pProgram(Program::create<MockProgram>(pContext, 1u, programSources, &length, errorCode));
|
||||||
|
|
||||||
EXPECT_THAT(pProgram->getInternalOptions(), testing::HasSubstr(std::string("-cl-intel-has-buffer-offset-arg ")));
|
EXPECT_THAT(pProgram->getInternalOptions(), testing::HasSubstr(std::string("-cl-intel-has-buffer-offset-arg ")));
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(ProgramTests, givenStatelessToStatefullOptimizationOffWHenProgramIsCreatedThenOptimizationStringIsNotPresent) {
|
TEST_F(ProgramTests, givenStatelessToStatefullOptimizationOffWHenProgramIsCreatedThenOptimizationStringIsNotPresent) {
|
||||||
DebugManagerStateRestore dbgRestorer;
|
DebugManagerStateRestore dbgRestorer;
|
||||||
DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.set(false);
|
DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.set(0);
|
||||||
std::unique_ptr<MockProgram> pProgram(Program::create<MockProgram>("", pContext, *pDevice, true, nullptr));
|
cl_int errorCode = CL_SUCCESS;
|
||||||
|
const char programSource[] = "program";
|
||||||
|
const char *programPointer = programSource;
|
||||||
|
const char **programSources = reinterpret_cast<const char **>(&programPointer);
|
||||||
|
size_t length = sizeof(programSource);
|
||||||
|
std::unique_ptr<MockProgram> pProgram(Program::create<MockProgram>(pContext, 1u, programSources, &length, errorCode));
|
||||||
EXPECT_THAT(pProgram->getInternalOptions(), Not(testing::HasSubstr(std::string("-cl-intel-has-buffer-offset-arg "))));
|
EXPECT_THAT(pProgram->getInternalOptions(), Not(testing::HasSubstr(std::string("-cl-intel-has-buffer-offset-arg "))));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2659,11 +2671,18 @@ TEST_F(Program32BitTests, givenDeviceWhenProgramIsCreatedThenProgramCountInDevic
|
|||||||
EXPECT_EQ(1u, device->getProgramCount());
|
EXPECT_EQ(1u, device->getProgramCount());
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(ProgramTests, givenNewProgramTheStatelessToStatefulBufferOffsetOtimizationIsDisabled) {
|
TEST_F(ProgramTests, givenNewProgramTheStatelessToStatefulBufferOffsetOtimizationIsMatchingThePlatformEnablingStatus) {
|
||||||
MockProgram prog;
|
MockProgram prog(pContext, false);
|
||||||
auto &internalOpts = prog.getInternalOptions();
|
auto &internalOpts = prog.getInternalOptions();
|
||||||
auto it = internalOpts.find("-cl-intel-has-buffer-offset-arg ");
|
auto it = internalOpts.find("-cl-intel-has-buffer-offset-arg ");
|
||||||
EXPECT_NE(std::string::npos, it);
|
|
||||||
|
HardwareCapabilities hwCaps = {0};
|
||||||
|
HwHelper::get(prog.getDevice(0).getHardwareInfo().pPlatform->eRenderCoreFamily).setupHardwareCapabilities(&hwCaps);
|
||||||
|
if (hwCaps.isStatelesToStatefullWithOffsetSupported) {
|
||||||
|
EXPECT_NE(std::string::npos, it);
|
||||||
|
} else {
|
||||||
|
EXPECT_EQ(std::string::npos, it);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int32_t ErrCodeToReturn, bool spirv = true>
|
template <int32_t ErrCodeToReturn, bool spirv = true>
|
||||||
|
|||||||
@@ -31,7 +31,7 @@ SchedulerSimulationReturnInstance = 0
|
|||||||
DisableConcurrentBlockExecution = 0
|
DisableConcurrentBlockExecution = 0
|
||||||
ResidencyDebugEnable = 0
|
ResidencyDebugEnable = 0
|
||||||
ForcePreemptionMode = -1
|
ForcePreemptionMode = -1
|
||||||
EnableStatelessToStatefulBufferOffsetOpt = 1
|
EnableStatelessToStatefulBufferOffsetOpt = -1
|
||||||
TbxPort = 4321
|
TbxPort = 4321
|
||||||
TbxServer = 127.0.0.1
|
TbxServer = 127.0.0.1
|
||||||
EnableDeferredDeleter = 1
|
EnableDeferredDeleter = 1
|
||||||
|
|||||||
Reference in New Issue
Block a user