Include dynamic SLM in clGetKernelWorkGroupInfo
Current implementation only takes static slmInlineSize into account. With this change we also include dynamic SLM passed as a kernel arguments. Related-To: NEO-5761 Signed-off-by: Fabian Zwolinski <fabian.zwolinski@intel.com>
This commit is contained in:
parent
d3f30b4046
commit
320b020dd6
|
@ -530,7 +530,7 @@ cl_int Kernel::getWorkGroupInfo(cl_kernel_work_group_info paramName,
|
|||
struct size_t3 {
|
||||
size_t val[3];
|
||||
} requiredWorkGroupSize;
|
||||
cl_ulong localMemorySize;
|
||||
size_t localMemorySize;
|
||||
const auto &kernelDescriptor = kernelInfo.kernelDescriptor;
|
||||
size_t preferredWorkGroupSizeMultiple = 0;
|
||||
cl_ulong scratchSize;
|
||||
|
@ -561,7 +561,7 @@ cl_int Kernel::getWorkGroupInfo(cl_kernel_work_group_info paramName,
|
|||
break;
|
||||
|
||||
case CL_KERNEL_LOCAL_MEM_SIZE:
|
||||
localMemorySize = kernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize;
|
||||
localMemorySize = static_cast<size_t>(this->getSlmTotalSize());
|
||||
srcSize = sizeof(localMemorySize);
|
||||
pSrc = &localMemorySize;
|
||||
break;
|
||||
|
|
|
@ -312,7 +312,7 @@ TEST_F(KernelTests, GivenKernelCompileWorkGroupSizeWhenGettingWorkGroupInfoThenC
|
|||
EXPECT_EQ(paramValueSize, paramValueSizeRet);
|
||||
}
|
||||
|
||||
TEST_F(KernelTests, GivenRequiredDisabledEUFusionFlagWhenGettingPrefferedWorkGroupSizeMultipleThenCorectValueIsReturned) {
|
||||
TEST_F(KernelTests, GivenRequiredDisabledEUFusionFlagWhenGettingPrefferedWorkGroupSizeMultipleThenCorrectValueIsReturned) {
|
||||
KernelInfo kernelInfo = {};
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.flags.requiresDisabledEUFusion = true;
|
||||
MockKernel kernel(pProgram, kernelInfo, *pClDevice);
|
||||
|
@ -337,7 +337,44 @@ TEST_F(KernelTests, GivenRequiredDisabledEUFusionFlagWhenGettingPrefferedWorkGro
|
|||
EXPECT_EQ(expectedValue, paramValue);
|
||||
}
|
||||
|
||||
TEST_F(KernelTests, GivenCFEFusedEUDispatchEnabledAndRequiredDisabledUEFusionWhenGettingPrefferedWorkGroupSizeMultipleThenCorectValueIsReturned) {
|
||||
TEST_F(KernelTests, GivenSlmInlineSizeAndSlmOffsetWhenGettingWorkGroupInfoThenCorrectValueIsReturned) {
|
||||
MockKernelInfo kernelInfo = {};
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize = 100u;
|
||||
|
||||
kernelInfo.addArgLocal(0, 0x10, 0x1);
|
||||
kernelInfo.addArgBuffer(1, 0x20, sizeof(void *));
|
||||
kernelInfo.addArgBuffer(2, 0x20, sizeof(void *));
|
||||
kernelInfo.addArgLocal(3, 0x30, 0x10);
|
||||
|
||||
MockKernel kernel(pProgram, kernelInfo, *pClDevice);
|
||||
kernel.kernelArguments.resize(4);
|
||||
kernel.slmSizes.resize(4);
|
||||
|
||||
uint32_t crossThreadData[0x40]{};
|
||||
crossThreadData[0x20 / sizeof(uint32_t)] = 0x12344321;
|
||||
kernel.setCrossThreadData(crossThreadData, sizeof(crossThreadData));
|
||||
|
||||
kernel.setArgLocal(0, 4096, nullptr);
|
||||
kernel.setArgLocal(3, 0, nullptr);
|
||||
|
||||
cl_kernel_info paramName = CL_KERNEL_LOCAL_MEM_SIZE;
|
||||
size_t paramValue;
|
||||
size_t paramValueSize = sizeof(paramValue);
|
||||
size_t paramValueSizeRet = 0;
|
||||
size_t expectedValue = 4096 + 0 + 100;
|
||||
|
||||
retVal = kernel.getWorkGroupInfo(
|
||||
paramName,
|
||||
paramValueSize,
|
||||
¶mValue,
|
||||
¶mValueSizeRet);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(paramValueSize, paramValueSizeRet);
|
||||
EXPECT_EQ(expectedValue, paramValue);
|
||||
}
|
||||
|
||||
TEST_F(KernelTests, GivenCFEFusedEUDispatchEnabledAndRequiredDisabledUEFusionWhenGettingPrefferedWorkGroupSizeMultipleThenCorrectValueIsReturned) {
|
||||
DebugManagerStateRestore dbgRestorer;
|
||||
DebugManager.flags.CFEFusedEUDispatch.set(0);
|
||||
|
||||
|
|
Loading…
Reference in New Issue