mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-04 23:56:39 +08:00
Include dynamic SLM in clGetKernelWorkGroupInfo
Current implementation only takes static slmInlineSize into account. With this change we also include dynamic SLM passed as a kernel arguments. Related-To: NEO-5761 Signed-off-by: Fabian Zwolinski <fabian.zwolinski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
67b670c5b9
commit
ccdb5aaa2a
@@ -75,7 +75,8 @@ Kernel::Kernel(Program *programArg, const KernelInfo &kernelInfoArg, ClDevice &c
|
|||||||
} else {
|
} else {
|
||||||
maxKernelWorkGroupSize = static_cast<uint32_t>(deviceInfo.maxWorkGroupSize);
|
maxKernelWorkGroupSize = static_cast<uint32_t>(deviceInfo.maxWorkGroupSize);
|
||||||
}
|
}
|
||||||
slmTotalSize = kernelInfoArg.kernelDescriptor.kernelAttributes.slmInlineSize;
|
|
||||||
|
slmTotalSize = slmTotalSum = kernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
Kernel::~Kernel() {
|
Kernel::~Kernel() {
|
||||||
@@ -527,7 +528,7 @@ cl_int Kernel::getWorkGroupInfo(cl_kernel_work_group_info paramName,
|
|||||||
struct size_t3 {
|
struct size_t3 {
|
||||||
size_t val[3];
|
size_t val[3];
|
||||||
} requiredWorkGroupSize;
|
} requiredWorkGroupSize;
|
||||||
cl_ulong localMemorySize;
|
size_t totalLocalMemorySize = static_cast<size_t>(slmTotalSum);
|
||||||
const auto &kernelDescriptor = kernelInfo.kernelDescriptor;
|
const auto &kernelDescriptor = kernelInfo.kernelDescriptor;
|
||||||
size_t preferredWorkGroupSizeMultiple = 0;
|
size_t preferredWorkGroupSizeMultiple = 0;
|
||||||
cl_ulong scratchSize;
|
cl_ulong scratchSize;
|
||||||
@@ -558,9 +559,8 @@ cl_int Kernel::getWorkGroupInfo(cl_kernel_work_group_info paramName,
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case CL_KERNEL_LOCAL_MEM_SIZE:
|
case CL_KERNEL_LOCAL_MEM_SIZE:
|
||||||
localMemorySize = kernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize;
|
srcSize = sizeof(totalLocalMemorySize);
|
||||||
srcSize = sizeof(localMemorySize);
|
pSrc = &totalLocalMemorySize;
|
||||||
pSrc = &localMemorySize;
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE:
|
case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE:
|
||||||
@@ -1376,6 +1376,14 @@ cl_int Kernel::setArgLocal(uint32_t argIndexIn,
|
|||||||
|
|
||||||
slmTotalSize = kernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize + alignUp(slmOffset, KB);
|
slmTotalSize = kernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize + alignUp(slmOffset, KB);
|
||||||
|
|
||||||
|
uint32_t slmSum = 0;
|
||||||
|
for (const auto &kernelArg : kernelArguments) {
|
||||||
|
if (kernelArg.type == SLM_OBJ) {
|
||||||
|
slmSum += static_cast<uint32_t>(kernelArg.size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
slmTotalSum = kernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize + alignUp(slmSum, KB);
|
||||||
|
|
||||||
return CL_SUCCESS;
|
return CL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -519,6 +519,7 @@ class Kernel : public ReferenceTrackedObject<Kernel> {
|
|||||||
uint32_t additionalKernelExecInfo = AdditionalKernelExecInfo::DisableOverdispatch;
|
uint32_t additionalKernelExecInfo = AdditionalKernelExecInfo::DisableOverdispatch;
|
||||||
uint32_t maxKernelWorkGroupSize = 0;
|
uint32_t maxKernelWorkGroupSize = 0;
|
||||||
uint32_t slmTotalSize = 0u;
|
uint32_t slmTotalSize = 0u;
|
||||||
|
uint32_t slmTotalSum = 0u;
|
||||||
uint32_t sshLocalSize = 0u;
|
uint32_t sshLocalSize = 0u;
|
||||||
uint32_t crossThreadDataSize = 0u;
|
uint32_t crossThreadDataSize = 0u;
|
||||||
|
|
||||||
|
|||||||
@@ -312,7 +312,7 @@ TEST_F(KernelTests, GivenKernelCompileWorkGroupSizeWhenGettingWorkGroupInfoThenC
|
|||||||
EXPECT_EQ(paramValueSize, paramValueSizeRet);
|
EXPECT_EQ(paramValueSize, paramValueSizeRet);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(KernelTests, GivenRequiredDisabledEUFusionFlagWhenGettingPrefferedWorkGroupSizeMultipleThenCorectValueIsReturned) {
|
TEST_F(KernelTests, GivenRequiredDisabledEUFusionFlagWhenGettingPrefferedWorkGroupSizeMultipleThenCorrectValueIsReturned) {
|
||||||
KernelInfo kernelInfo = {};
|
KernelInfo kernelInfo = {};
|
||||||
kernelInfo.kernelDescriptor.kernelAttributes.flags.requiresDisabledEUFusion = true;
|
kernelInfo.kernelDescriptor.kernelAttributes.flags.requiresDisabledEUFusion = true;
|
||||||
MockKernel kernel(pProgram, kernelInfo, *pClDevice);
|
MockKernel kernel(pProgram, kernelInfo, *pClDevice);
|
||||||
@@ -337,7 +337,44 @@ TEST_F(KernelTests, GivenRequiredDisabledEUFusionFlagWhenGettingPrefferedWorkGro
|
|||||||
EXPECT_EQ(expectedValue, paramValue);
|
EXPECT_EQ(expectedValue, paramValue);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(KernelTests, GivenCFEFusedEUDispatchEnabledAndRequiredDisabledUEFusionWhenGettingPrefferedWorkGroupSizeMultipleThenCorectValueIsReturned) {
|
TEST_F(KernelTests, GivenSlmInlineSizeAndSlmOffsetWhenGettingWorkGroupInfoThenCorrectValueIsReturned) {
|
||||||
|
MockKernelInfo kernelInfo = {};
|
||||||
|
kernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize = 100u;
|
||||||
|
|
||||||
|
kernelInfo.addArgLocal(0, 0x10, 0x1);
|
||||||
|
kernelInfo.addArgBuffer(1, 0x20, sizeof(void *));
|
||||||
|
kernelInfo.addArgBuffer(2, 0x20, sizeof(void *));
|
||||||
|
kernelInfo.addArgLocal(3, 0x30, 0x10);
|
||||||
|
|
||||||
|
MockKernel kernel(pProgram, kernelInfo, *pClDevice);
|
||||||
|
kernel.kernelArguments.resize(4);
|
||||||
|
kernel.slmSizes.resize(4);
|
||||||
|
|
||||||
|
uint32_t crossThreadData[0x40]{};
|
||||||
|
crossThreadData[0x20 / sizeof(uint32_t)] = 0x12344321;
|
||||||
|
kernel.setCrossThreadData(crossThreadData, sizeof(crossThreadData));
|
||||||
|
|
||||||
|
kernel.setArgLocal(0, 4096, nullptr);
|
||||||
|
kernel.setArgLocal(3, 0, nullptr);
|
||||||
|
|
||||||
|
cl_kernel_info paramName = CL_KERNEL_LOCAL_MEM_SIZE;
|
||||||
|
size_t paramValue;
|
||||||
|
size_t paramValueSize = sizeof(paramValue);
|
||||||
|
size_t paramValueSizeRet = 0;
|
||||||
|
size_t expectedValue = 4096 + 0 + 100;
|
||||||
|
|
||||||
|
retVal = kernel.getWorkGroupInfo(
|
||||||
|
paramName,
|
||||||
|
paramValueSize,
|
||||||
|
¶mValue,
|
||||||
|
¶mValueSizeRet);
|
||||||
|
|
||||||
|
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||||
|
EXPECT_EQ(paramValueSize, paramValueSizeRet);
|
||||||
|
EXPECT_EQ(expectedValue, paramValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(KernelTests, GivenCFEFusedEUDispatchEnabledAndRequiredDisabledUEFusionWhenGettingPrefferedWorkGroupSizeMultipleThenCorrectValueIsReturned) {
|
||||||
DebugManagerStateRestore dbgRestorer;
|
DebugManagerStateRestore dbgRestorer;
|
||||||
DebugManager.flags.CFEFusedEUDispatch.set(0);
|
DebugManager.flags.CFEFusedEUDispatch.set(0);
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user