Enable new algorithm computing local work sizes
Change-Id: If0addb5f36ee0b667370489b11837be716d70782
This commit is contained in:
parent
1e78649540
commit
b503597ffa
|
@ -66,8 +66,8 @@ DECLARE_DEBUG_VARIABLE(bool, EnableAsyncDestroyAllocations, true, "Enables async
|
|||
DECLARE_DEBUG_VARIABLE(bool, EnableAsyncEventsHandler, true, "Enables async events handler")
|
||||
DECLARE_DEBUG_VARIABLE(bool, EnableForcePin, true, "Enables early pinning for memory object")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, Enable64kbpages, -1, "-1: default behaviour, 0 Disables, 1 Enables support for 64KB pages for driver allocated fine grain svm buffers")
|
||||
DECLARE_DEBUG_VARIABLE(bool, EnableComputeWorkSizeND, false, "Enables diffrent algorithm to compute locla work size")
|
||||
DECLARE_DEBUG_VARIABLE(bool, EnableComputeWorkSizeSquared, true, "Enables algorithm to compute the most squared work gropu as passible")
|
||||
DECLARE_DEBUG_VARIABLE(bool, EnableComputeWorkSizeND, true, "Enables diffrent algorithm to compute local work size")
|
||||
DECLARE_DEBUG_VARIABLE(bool, EnableComputeWorkSizeSquared, false, "Enables algorithm to compute the most squared work group as possible")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, OverrideEnableKmdNotify, -1, "-1: dont override, 0: disable, 1: enable")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, OverrideKmdNotifyDelayMs, -1, "-1: dont override, 0: infinite timeout, >0: timeout in ms")
|
||||
DECLARE_DEBUG_VARIABLE(bool, EnableVaLibCalls, true, "Enable cl-va sharing lib calls")
|
||||
|
|
|
@ -202,7 +202,90 @@ HWTEST_F(DispatchWalkerTest, noLocalIdsShouldntCrash) {
|
|||
EXPECT_EQ(sizeDispatchWalkerNeeds, commandStream.getUsed() - commandStreamStart);
|
||||
}
|
||||
|
||||
HWTEST_F(DispatchWalkerTest, dataParameterWorkDimensions) {
|
||||
HWTEST_F(DispatchWalkerTest, dataParameterWorkDimensionswithDefaultLwsAlgorithm) {
|
||||
MockKernel kernel(&program, kernelInfo, *pDevice);
|
||||
kernelInfo.workloadInfo.workDimOffset = 0;
|
||||
ASSERT_EQ(CL_SUCCESS, kernel.initialize());
|
||||
|
||||
size_t globalOffsets[3] = {0, 0, 0};
|
||||
size_t workItems[3] = {1, 1, 1};
|
||||
for (uint32_t dimension = 1; dimension <= 3; ++dimension) {
|
||||
workItems[dimension - 1] = 256;
|
||||
dispatchWalker<FamilyType>(
|
||||
*pCmdQ,
|
||||
kernel,
|
||||
dimension,
|
||||
globalOffsets,
|
||||
workItems,
|
||||
nullptr,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr);
|
||||
EXPECT_EQ(dimension, *kernel.workDim);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(DispatchWalkerTest, dataParameterWorkDimensionswithSquaredLwsAlgorithm) {
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.EnableComputeWorkSizeND.set(false);
|
||||
DebugManager.flags.EnableComputeWorkSizeSquared.set(true);
|
||||
MockKernel kernel(&program, kernelInfo, *pDevice);
|
||||
kernelInfo.workloadInfo.workDimOffset = 0;
|
||||
ASSERT_EQ(CL_SUCCESS, kernel.initialize());
|
||||
|
||||
size_t globalOffsets[3] = {0, 0, 0};
|
||||
size_t workItems[3] = {1, 1, 1};
|
||||
for (uint32_t dimension = 1; dimension <= 3; ++dimension) {
|
||||
workItems[dimension - 1] = 256;
|
||||
dispatchWalker<FamilyType>(
|
||||
*pCmdQ,
|
||||
kernel,
|
||||
dimension,
|
||||
globalOffsets,
|
||||
workItems,
|
||||
nullptr,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr);
|
||||
EXPECT_EQ(dimension, *kernel.workDim);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(DispatchWalkerTest, dataParameterWorkDimensionswithNDLwsAlgorithm) {
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.EnableComputeWorkSizeND.set(true);
|
||||
MockKernel kernel(&program, kernelInfo, *pDevice);
|
||||
kernelInfo.workloadInfo.workDimOffset = 0;
|
||||
ASSERT_EQ(CL_SUCCESS, kernel.initialize());
|
||||
|
||||
size_t globalOffsets[3] = {0, 0, 0};
|
||||
size_t workItems[3] = {1, 1, 1};
|
||||
for (uint32_t dimension = 1; dimension <= 3; ++dimension) {
|
||||
workItems[dimension - 1] = 256;
|
||||
dispatchWalker<FamilyType>(
|
||||
*pCmdQ,
|
||||
kernel,
|
||||
dimension,
|
||||
globalOffsets,
|
||||
workItems,
|
||||
nullptr,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr);
|
||||
EXPECT_EQ(dimension, *kernel.workDim);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(DispatchWalkerTest, dataParameterWorkDimensionswithOldLwsAlgorithm) {
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.EnableComputeWorkSizeND.set(false);
|
||||
DebugManager.flags.EnableComputeWorkSizeSquared.set(false);
|
||||
MockKernel kernel(&program, kernelInfo, *pDevice);
|
||||
kernelInfo.workloadInfo.workDimOffset = 0;
|
||||
ASSERT_EQ(CL_SUCCESS, kernel.initialize());
|
||||
|
@ -316,6 +399,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterNoLocalWorkSizeWithComputeND) {
|
|||
HWTEST_F(DispatchWalkerTest, dataParameterNoLocalWorkSizeWithComputeSquared) {
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.EnableComputeWorkSizeSquared.set(true);
|
||||
DebugManager.flags.EnableComputeWorkSizeND.set(false);
|
||||
MockKernel kernel(&program, kernelInfo, *pDevice);
|
||||
kernelInfo.workloadInfo.localWorkSizeOffsets[0] = 0;
|
||||
kernelInfo.workloadInfo.localWorkSizeOffsets[1] = 4;
|
||||
|
@ -342,9 +426,10 @@ HWTEST_F(DispatchWalkerTest, dataParameterNoLocalWorkSizeWithComputeSquared) {
|
|||
EXPECT_EQ(1u, *kernel.localWorkSizeZ);
|
||||
}
|
||||
|
||||
HWTEST_F(DispatchWalkerTest, dataParameterNoLocalWorkSizeWithOutComputeSquared) {
|
||||
HWTEST_F(DispatchWalkerTest, dataParameterNoLocalWorkSizeWithOutComputeSquaredAndND) {
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.EnableComputeWorkSizeSquared.set(false);
|
||||
DebugManager.flags.EnableComputeWorkSizeND.set(false);
|
||||
MockKernel kernel(&program, kernelInfo, *pDevice);
|
||||
kernelInfo.workloadInfo.localWorkSizeOffsets[0] = 0;
|
||||
kernelInfo.workloadInfo.localWorkSizeOffsets[1] = 4;
|
||||
|
|
|
@ -524,6 +524,5 @@ TEST(localWorkSizeTest, given2DimWorkWhenComputeSquaredCalledThenLocalGroupCompu
|
|||
}
|
||||
|
||||
TEST(localWorkSizeTest, givenDebugVariableEnableComputeWorkSizeNDWhenCheckValueExpectTrue) {
|
||||
bool isEnabled = DebugManager.flags.EnableComputeWorkSizeND.get();
|
||||
EXPECT_TRUE(isEnabled == false);
|
||||
EXPECT_TRUE(DebugManager.flags.EnableComputeWorkSizeND.get());
|
||||
}
|
|
@ -24,6 +24,7 @@
|
|||
#include "runtime/command_queue/dispatch_walker.h"
|
||||
#include "unit_tests/fixtures/device_fixture.h"
|
||||
#include "unit_tests/fixtures/memory_management_fixture.h"
|
||||
#include "unit_tests/helpers/debug_manager_state_restore.h"
|
||||
#include "test.h"
|
||||
|
||||
using namespace OCLRT;
|
||||
|
@ -185,27 +186,25 @@ HWTEST_P(WorkGroupSizeChannels, allChannelsWithEnableComputeWorkSizeSquaredDefau
|
|||
}
|
||||
|
||||
HWTEST_P(WorkGroupSizeChannels, allChannelsWithEnableComputeWorkSizeSquaredEnabled) {
|
||||
bool isWorkGroupSizeEnabled = DebugManager.flags.EnableComputeWorkSizeSquared.get();
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.EnableComputeWorkSizeSquared.set(true);
|
||||
DebugManager.flags.EnableComputeWorkSizeND.set(false);
|
||||
uint32_t simdSize;
|
||||
size_t workDim;
|
||||
std::tie(simdSize, workDim) = GetParam();
|
||||
|
||||
verify<FamilyType>(simdSize, workDim, workDim, workDim);
|
||||
|
||||
DebugManager.flags.EnableComputeWorkSizeSquared.set(isWorkGroupSizeEnabled);
|
||||
}
|
||||
|
||||
HWTEST_P(WorkGroupSizeChannels, allChannelsWithEnableComputeWorkSizeSquaredDisabled) {
|
||||
bool isWorkGroupSizeEnabled = DebugManager.flags.EnableComputeWorkSizeSquared.get();
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.EnableComputeWorkSizeSquared.set(false);
|
||||
DebugManager.flags.EnableComputeWorkSizeND.set(false);
|
||||
uint32_t simdSize;
|
||||
size_t workDim;
|
||||
std::tie(simdSize, workDim) = GetParam();
|
||||
|
||||
verify<FamilyType>(simdSize, workDim, workDim, workDim);
|
||||
|
||||
DebugManager.flags.EnableComputeWorkSizeSquared.set(isWorkGroupSizeEnabled);
|
||||
}
|
||||
|
||||
HWTEST_P(WorkGroupSizeChannels, justXWithEnableComputeWorkSizeNDDefault) {
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
|
||||
#include "runtime/memory_manager/svm_memory_manager.h"
|
||||
#include "driver_diagnostics_tests.h"
|
||||
#include "unit_tests/helpers/debug_manager_state_restore.h"
|
||||
|
||||
using namespace OCLRT;
|
||||
|
||||
|
@ -385,28 +386,28 @@ TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkS
|
|||
|
||||
TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkSizeSquaredIsTrueWhenEnqueueKernelIsCallingThenContextProvidesProperHint) {
|
||||
|
||||
bool isWorkGroupSizeEnabled = DebugManager.flags.EnableComputeWorkSizeSquared.get();
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.EnableComputeWorkSizeSquared.set(true);
|
||||
DebugManager.flags.EnableComputeWorkSizeND.set(false);
|
||||
retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkGroupSize, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE], kernel->getKernelInfo().name.c_str(),
|
||||
*kernel->localWorkSizeX, *kernel->localWorkSizeY, *kernel->localWorkSizeZ);
|
||||
EXPECT_TRUE(containsHint(expectedHint, userData));
|
||||
DebugManager.flags.EnableComputeWorkSizeSquared.set(isWorkGroupSizeEnabled);
|
||||
}
|
||||
|
||||
TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkSizeSquaredIsFalseWhenEnqueueKernelIsCallingThenContextProvidesProperHint) {
|
||||
|
||||
bool isWorkGroupSizeEnabled = DebugManager.flags.EnableComputeWorkSizeSquared.get();
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.EnableComputeWorkSizeSquared.set(false);
|
||||
DebugManager.flags.EnableComputeWorkSizeND.set(false);
|
||||
retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkGroupSize, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE], kernel->getKernelInfo().name.c_str(),
|
||||
*kernel->localWorkSizeX, *kernel->localWorkSizeY, *kernel->localWorkSizeZ);
|
||||
EXPECT_TRUE(containsHint(expectedHint, userData));
|
||||
DebugManager.flags.EnableComputeWorkSizeSquared.set(isWorkGroupSizeEnabled);
|
||||
}
|
||||
|
||||
TEST_P(PerformanceHintEnqueueKernelBadSizeTest, GivenBadLocalWorkGroupSizeWhenEnqueueKernelIsCallingThenContextProvidesProperHint) {
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
*/
|
||||
|
||||
#include "driver_diagnostics_tests.h"
|
||||
#include "unit_tests/helpers/debug_manager_state_restore.h"
|
||||
|
||||
using namespace OCLRT;
|
||||
|
||||
|
@ -227,19 +228,19 @@ TEST_F(PerformanceHintTest, GivenNullContextAndEmptyDispatchinfoAndEnableCompute
|
|||
}
|
||||
TEST_F(PerformanceHintTest, GivenNullContextAndEmptyDispatchinfoAndEnableComputeWorkSizeSquaredIsTrueWhenProvideLocalWorkGroupSizeIsCalledThenItDoesntCrash) {
|
||||
|
||||
bool isWorkGroupSizeEnabled = DebugManager.flags.EnableComputeWorkSizeSquared.get();
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.EnableComputeWorkSizeSquared.set(true);
|
||||
DebugManager.flags.EnableComputeWorkSizeND.set(false);
|
||||
DispatchInfo emptyDispatchInfo;
|
||||
provideLocalWorkGroupSizeHints(nullptr, 0, emptyDispatchInfo);
|
||||
DebugManager.flags.EnableComputeWorkSizeSquared.set(isWorkGroupSizeEnabled);
|
||||
}
|
||||
TEST_F(PerformanceHintTest, GivenNullContextAndEmptyDispatchinfoAndEnableComputeWorkSizeSquaredIsFalseWhenProvideLocalWorkGroupSizeIsCalledThenItDoesntCrash) {
|
||||
|
||||
bool isWorkGroupSizeEnabled = DebugManager.flags.EnableComputeWorkSizeSquared.get();
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.EnableComputeWorkSizeSquared.set(false);
|
||||
DebugManager.flags.EnableComputeWorkSizeND.set(false);
|
||||
DispatchInfo emptyDispatchInfo;
|
||||
provideLocalWorkGroupSizeHints(nullptr, 0, emptyDispatchInfo);
|
||||
DebugManager.flags.EnableComputeWorkSizeSquared.set(isWorkGroupSizeEnabled);
|
||||
}
|
||||
|
||||
TEST_F(PerformanceHintTest, GivenNullContextAndInvalidDispatchinfoAndEnableComputeWorkSizeNDIsDefaultWhenProvideLocalWorkGroupSizeIsCalledThenItDoesntCrash) {
|
||||
|
@ -279,23 +280,23 @@ TEST_F(PerformanceHintTest, GivenNullContextAndInvalidDispatchinfoAndEnableCompu
|
|||
}
|
||||
TEST_F(PerformanceHintTest, GivenNullContextAndInvalidDispatchinfoAndEnableComputeWorkSizeSquaredIsTrueWhenProvideLocalWorkGroupSizeIsCalledThenItDoesntCrash) {
|
||||
|
||||
bool isWorkGroupSizeEnabled = DebugManager.flags.EnableComputeWorkSizeSquared.get();
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.EnableComputeWorkSizeSquared.set(true);
|
||||
DebugManager.flags.EnableComputeWorkSizeND.set(false);
|
||||
auto pDevice = castToObject<Device>(devices[0]);
|
||||
MockKernelWithInternals mockKernel(*pDevice, context);
|
||||
DispatchInfo invalidDispatchInfo(mockKernel, 100, {32, 32, 32}, {1, 1, 1}, {0, 0, 0});
|
||||
provideLocalWorkGroupSizeHints(context, 0, invalidDispatchInfo);
|
||||
DebugManager.flags.EnableComputeWorkSizeSquared.set(isWorkGroupSizeEnabled);
|
||||
}
|
||||
TEST_F(PerformanceHintTest, GivenNullContextAndInvalidDispatchinfoAndEnableComputeWorkSizeSquaredIsFalseWhenProvideLocalWorkGroupSizeIsCalledThenItDoesntCrash) {
|
||||
|
||||
bool isWorkGroupSizeEnabled = DebugManager.flags.EnableComputeWorkSizeSquared.get();
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.EnableComputeWorkSizeSquared.set(false);
|
||||
DebugManager.flags.EnableComputeWorkSizeND.set(false);
|
||||
auto pDevice = castToObject<Device>(devices[0]);
|
||||
MockKernelWithInternals mockKernel(*pDevice, context);
|
||||
DispatchInfo invalidDispatchInfo(mockKernel, 100, {32, 32, 32}, {1, 1, 1}, {0, 0, 0});
|
||||
provideLocalWorkGroupSizeHints(context, 0, invalidDispatchInfo);
|
||||
DebugManager.flags.EnableComputeWorkSizeSquared.set(isWorkGroupSizeEnabled);
|
||||
}
|
||||
|
||||
TEST_F(PerformanceHintTest, GivenContextAndDispatchinfoAndEnableComputeWorkSizeSquaredIsDefaultWhenProvideLocalWorkGroupSizeIsCalledReturnValue) {
|
||||
|
@ -307,23 +308,23 @@ TEST_F(PerformanceHintTest, GivenContextAndDispatchinfoAndEnableComputeWorkSizeS
|
|||
}
|
||||
TEST_F(PerformanceHintTest, GivenContextAndDispatchinfoAndEnableComputeWorkSizeSquaredIsTrueWhenProvideLocalWorkGroupSizeIsCalledReturnValue) {
|
||||
|
||||
bool isWorkGroupSizeEnabled = DebugManager.flags.EnableComputeWorkSizeSquared.get();
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.EnableComputeWorkSizeSquared.set(true);
|
||||
DebugManager.flags.EnableComputeWorkSizeND.set(false);
|
||||
auto pDevice = castToObject<Device>(devices[0]);
|
||||
MockKernelWithInternals mockKernel(*pDevice, context);
|
||||
DispatchInfo invalidDispatchInfo(mockKernel, 2, {32, 32, 1}, {1, 1, 1}, {0, 0, 0});
|
||||
provideLocalWorkGroupSizeHints(context, 0, invalidDispatchInfo);
|
||||
DebugManager.flags.EnableComputeWorkSizeSquared.set(isWorkGroupSizeEnabled);
|
||||
}
|
||||
TEST_F(PerformanceHintTest, GivenContextAndDispatchinfoAndEnableComputeWorkSizeSquaredIsFalseWhenProvideLocalWorkGroupSizeIsCalledReturnValue) {
|
||||
|
||||
bool isWorkGroupSizeEnabled = DebugManager.flags.EnableComputeWorkSizeSquared.get();
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.EnableComputeWorkSizeSquared.set(false);
|
||||
DebugManager.flags.EnableComputeWorkSizeND.set(false);
|
||||
auto pDevice = castToObject<Device>(devices[0]);
|
||||
MockKernelWithInternals mockKernel(*pDevice, context);
|
||||
DispatchInfo invalidDispatchInfo(mockKernel, 2, {32, 32, 1}, {1, 1, 1}, {0, 0, 0});
|
||||
provideLocalWorkGroupSizeHints(context, 0, invalidDispatchInfo);
|
||||
DebugManager.flags.EnableComputeWorkSizeSquared.set(isWorkGroupSizeEnabled);
|
||||
}
|
||||
|
||||
TEST_P(PerformanceHintKernelTest, GivenSpillFillWhenKernelIsInitializedThenContextProvidesProperHint) {
|
||||
|
|
|
@ -45,8 +45,8 @@ Enable64kbpages = -1
|
|||
NodeOrdinal = 0
|
||||
ProductFamilyOverride = unk
|
||||
EnableDebugBreak = false
|
||||
EnableComputeWorkSizeND = false
|
||||
EnableComputeWorkSizeND = true
|
||||
EventsDebugEnable = false
|
||||
UseMaxSimdSizeToDeduceMaxWorkgroupSize = false
|
||||
EnableComputeWorkSizeSquared = true
|
||||
EnableComputeWorkSizeSquared = false
|
||||
TrackParentEvents = false
|
||||
|
|
Loading…
Reference in New Issue