Set arg local for all devices in Kernel

Related-To: NEO-5001
Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski
2020-12-22 17:29:56 +00:00
committed by Compute-Runtime-Automation
parent 2e346b58cd
commit 50841bfd0d
2 changed files with 93 additions and 72 deletions

View File

@ -1271,47 +1271,57 @@ bool Kernel::requiresCoherency() {
return false;
}
cl_int Kernel::setArgLocal(uint32_t argIndex,
cl_int Kernel::setArgLocal(uint32_t argIndexIn,
size_t argSize,
const void *argVal) {
auto rootDeviceIndex = getDevice().getRootDeviceIndex();
auto crossThreadData = reinterpret_cast<uint32_t *>(getCrossThreadData(rootDeviceIndex));
auto &defaultKernelInfo = getDefaultKernelInfo();
std::bitset<64> isArgSet{};
storeKernelArg(argIndexIn, SLM_OBJ, nullptr, argVal, argSize);
storeKernelArg(argIndex, SLM_OBJ, nullptr, argVal, argSize);
for (auto &pClDevice : getDevices()) {
auto rootDeviceIndex = pClDevice->getRootDeviceIndex();
if (isArgSet.test(rootDeviceIndex)) {
continue;
}
auto crossThreadData = reinterpret_cast<uint32_t *>(getCrossThreadData(rootDeviceIndex));
auto &kernelInfo = *kernelInfos[rootDeviceIndex];
auto &kernelDeviceInfo = kernelDeviceInfos[rootDeviceIndex];
kernelDeviceInfos[rootDeviceIndex].slmSizes[argIndex] = argSize;
uint32_t argIndex = argIndexIn;
// Extract our current slmOffset
auto slmOffset = *ptrOffset(crossThreadData,
defaultKernelInfo.kernelArgInfo[argIndex].kernelArgPatchInfoVector[0].crossthreadOffset);
kernelDeviceInfo.slmSizes[argIndex] = argSize;
// Add our size
slmOffset += static_cast<uint32_t>(argSize);
// Extract our current slmOffset
auto slmOffset = *ptrOffset(crossThreadData,
kernelInfo.kernelArgInfo[argIndex].kernelArgPatchInfoVector[0].crossthreadOffset);
// Update all slm offsets after this argIndex
++argIndex;
while (argIndex < kernelDeviceInfos[rootDeviceIndex].slmSizes.size()) {
const auto &kernelArgInfo = defaultKernelInfo.kernelArgInfo[argIndex];
auto slmAlignment = kernelArgInfo.slmAlignment;
// Add our size
slmOffset += static_cast<uint32_t>(argSize);
// If an local argument, alignment should be non-zero
if (slmAlignment) {
// Align to specified alignment
slmOffset = alignUp(slmOffset, slmAlignment);
// Update all slm offsets after this argIndex
++argIndex;
while (argIndex < kernelDeviceInfo.slmSizes.size()) {
const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex];
auto slmAlignment = kernelArgInfo.slmAlignment;
// Patch our new offset into cross thread data
auto patchLocation = ptrOffset(crossThreadData,
kernelArgInfo.kernelArgPatchInfoVector[0].crossthreadOffset);
*patchLocation = slmOffset;
// If an local argument, alignment should be non-zero
if (slmAlignment) {
// Align to specified alignment
slmOffset = alignUp(slmOffset, slmAlignment);
// Patch our new offset into cross thread data
auto patchLocation = ptrOffset(crossThreadData,
kernelArgInfo.kernelArgPatchInfoVector[0].crossthreadOffset);
*patchLocation = slmOffset;
}
slmOffset += static_cast<uint32_t>(kernelDeviceInfo.slmSizes[argIndex]);
++argIndex;
}
slmOffset += static_cast<uint32_t>(kernelDeviceInfos[rootDeviceIndex].slmSizes[argIndex]);
++argIndex;
kernelDeviceInfo.slmTotalSize = kernelInfo.workloadInfo.slmStaticSize + alignUp(slmOffset, KB);
isArgSet.set(rootDeviceIndex);
}
kernelDeviceInfos[rootDeviceIndex].slmTotalSize = defaultKernelInfo.workloadInfo.slmStaticSize + alignUp(slmOffset, KB);
return CL_SUCCESS;
}