diff --git a/opencl/source/built_ins/vme_dispatch_builder.h b/opencl/source/built_ins/vme_dispatch_builder.h index 33fc321cbc..9019c2ae21 100644 --- a/opencl/source/built_ins/vme_dispatch_builder.h +++ b/opencl/source/built_ins/vme_dispatch_builder.h @@ -173,7 +173,7 @@ class VmeBuiltinDispatchInfoBuilder : public BuiltinDispatchInfoBuilder { DEBUG_BREAK_IF(kernelArgInfo.kernelArgPatchInfoVector.size() != 1); const KernelArgPatchInfo &patchInfo = kernelArgInfo.kernelArgPatchInfoVector[0]; DEBUG_BREAK_IF(sizeof(RetType) > patchInfo.size); - return *(RetType *)(vmeKernel->getCrossThreadData(clDevice.getRootDeviceIndex()) + patchInfo.crossthreadOffset); + return *(RetType *)(vmeKernel->getCrossThreadData() + patchInfo.crossthreadOffset); } cl_int validateImages(Vec3 inputRegion, Vec3 offset) const { diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h index 8b24954279..010882d18e 100644 --- a/opencl/source/command_queue/enqueue_common.h +++ b/opencl/source/command_queue/enqueue_common.h @@ -921,7 +921,7 @@ void CommandQueueHw::enqueueBlocked( } else { continue; } - kernel->getResidency(allSurfaces, device->getRootDeviceIndex()); + kernel->getResidency(allSurfaces); } for (auto &surface : CreateRange(surfaces, surfaceCount)) { allSurfaces.push_back(surface->duplicate()); diff --git a/opencl/source/command_queue/enqueue_kernel.h b/opencl/source/command_queue/enqueue_kernel.h index e14bdecbe3..9339b1899f 100644 --- a/opencl/source/command_queue/enqueue_kernel.h +++ b/opencl/source/command_queue/enqueue_kernel.h @@ -132,7 +132,7 @@ cl_int CommandQueueHw::enqueueKernel( ",", globalWorkSizeIn[2], ",SIMD:, ", kernelInfo.getMaxSimdSize()); - if (totalWorkItems > kernel.getMaxKernelWorkGroupSize(rootDeviceIndex)) { + if (totalWorkItems > kernel.getMaxKernelWorkGroupSize()) { return CL_INVALID_WORK_GROUP_SIZE; } diff --git a/opencl/source/command_queue/gpgpu_walker_bdw_plus.inl b/opencl/source/command_queue/gpgpu_walker_bdw_plus.inl index 02fa0d6fa2..9d703e98cc 100644 --- a/opencl/source/command_queue/gpgpu_walker_bdw_plus.inl +++ b/opencl/source/command_queue/gpgpu_walker_bdw_plus.inl @@ -96,13 +96,13 @@ void GpgpuWalkerHelper::dispatchScheduler( DEBUG_BREAK_IF(simd != PARALLEL_SCHEDULER_COMPILATION_SIZE_20); // Patch our kernel constants - scheduler.setGlobalWorkOffsetValues(rootDeviceIndex, 0, 0, 0); - scheduler.setGlobalWorkSizeValues(rootDeviceIndex, static_cast(scheduler.getGws()), 1, 1); - scheduler.setLocalWorkSizeValues(rootDeviceIndex, static_cast(scheduler.getLws()), 1, 1); - scheduler.setLocalWorkSize2Values(rootDeviceIndex, static_cast(scheduler.getLws()), 1, 1); - scheduler.setEnqueuedLocalWorkSizeValues(rootDeviceIndex, static_cast(scheduler.getLws()), 1, 1); - scheduler.setNumWorkGroupsValues(rootDeviceIndex, static_cast(scheduler.getGws() / scheduler.getLws()), 0, 0); - scheduler.setWorkDim(rootDeviceIndex, 1); + scheduler.setGlobalWorkOffsetValues(0, 0, 0); + scheduler.setGlobalWorkSizeValues(static_cast(scheduler.getGws()), 1, 1); + scheduler.setLocalWorkSizeValues(static_cast(scheduler.getLws()), 1, 1); + scheduler.setLocalWorkSize2Values(static_cast(scheduler.getLws()), 1, 1); + scheduler.setEnqueuedLocalWorkSizeValues(static_cast(scheduler.getLws()), 1, 1); + scheduler.setNumWorkGroupsValues(static_cast(scheduler.getGws() / scheduler.getLws()), 0, 0); + scheduler.setWorkDim(1); // Send our indirect object data size_t localWorkSizes[3] = {scheduler.getLws(), 1, 1}; diff --git a/opencl/source/command_queue/hardware_interface_base.inl b/opencl/source/command_queue/hardware_interface_base.inl index a42787e616..96e4d1b918 100644 --- a/opencl/source/command_queue/hardware_interface_base.inl +++ b/opencl/source/command_queue/hardware_interface_base.inl @@ -211,23 +211,22 @@ void HardwareInterface::dispatchKernelCommands(CommandQueue &commandQ size_t globalWorkSizes[3] = {gws.x, gws.y, gws.z}; - auto rootDeviceIndex = commandQueue.getDevice().getRootDeviceIndex(); // Patch our kernel constants - kernel.setGlobalWorkOffsetValues(rootDeviceIndex, static_cast(offset.x), static_cast(offset.y), static_cast(offset.z)); - kernel.setGlobalWorkSizeValues(rootDeviceIndex, static_cast(gws.x), static_cast(gws.y), static_cast(gws.z)); + kernel.setGlobalWorkOffsetValues(static_cast(offset.x), static_cast(offset.y), static_cast(offset.z)); + kernel.setGlobalWorkSizeValues(static_cast(gws.x), static_cast(gws.y), static_cast(gws.z)); - if (isMainKernel || (!kernel.isLocalWorkSize2Patched(rootDeviceIndex))) { - kernel.setLocalWorkSizeValues(rootDeviceIndex, static_cast(lws.x), static_cast(lws.y), static_cast(lws.z)); + if (isMainKernel || (!kernel.isLocalWorkSize2Patched())) { + kernel.setLocalWorkSizeValues(static_cast(lws.x), static_cast(lws.y), static_cast(lws.z)); } - kernel.setLocalWorkSize2Values(rootDeviceIndex, static_cast(lws.x), static_cast(lws.y), static_cast(lws.z)); - kernel.setEnqueuedLocalWorkSizeValues(rootDeviceIndex, static_cast(elws.x), static_cast(elws.y), static_cast(elws.z)); + kernel.setLocalWorkSize2Values(static_cast(lws.x), static_cast(lws.y), static_cast(lws.z)); + kernel.setEnqueuedLocalWorkSizeValues(static_cast(elws.x), static_cast(elws.y), static_cast(elws.z)); if (isMainKernel) { - kernel.setNumWorkGroupsValues(rootDeviceIndex, static_cast(totalNumberOfWorkgroups.x), static_cast(totalNumberOfWorkgroups.y), static_cast(totalNumberOfWorkgroups.z)); + kernel.setNumWorkGroupsValues(static_cast(totalNumberOfWorkgroups.x), static_cast(totalNumberOfWorkgroups.y), static_cast(totalNumberOfWorkgroups.z)); } - kernel.setWorkDim(rootDeviceIndex, dim); + kernel.setWorkDim(dim); // Send our indirect object data size_t localWorkSizes[3] = {lws.x, lws.y, lws.z}; diff --git a/opencl/source/command_queue/local_work_size.cpp b/opencl/source/command_queue/local_work_size.cpp index 5e7f1b2f05..9c45bc9302 100644 --- a/opencl/source/command_queue/local_work_size.cpp +++ b/opencl/source/command_queue/local_work_size.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2020 Intel Corporation + * Copyright (C) 2017-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -427,7 +427,7 @@ Vec3 computeWorkgroupSize(const DispatchInfo &dispatchInfo) { size_t workItems[3] = {dispatchInfo.getGWS().x, dispatchInfo.getGWS().y, dispatchInfo.getGWS().z}; computeWorkgroupSizeND(wsInfo, workGroupSize, workItems, dispatchInfo.getDim()); } else { - auto maxWorkGroupSize = kernel->getMaxKernelWorkGroupSize(rootDeviceIndex); + auto maxWorkGroupSize = kernel->getMaxKernelWorkGroupSize(); auto simd = kernel->getKernelInfo(rootDeviceIndex).getMaxSimdSize(); size_t workItems[3] = {dispatchInfo.getGWS().x, dispatchInfo.getGWS().y, dispatchInfo.getGWS().z}; if (dispatchInfo.getDim() == 1) { diff --git a/opencl/source/gtpin/gtpin_callbacks.cpp b/opencl/source/gtpin/gtpin_callbacks.cpp index 8008f25d7a..710655c71d 100644 --- a/opencl/source/gtpin/gtpin_callbacks.cpp +++ b/opencl/source/gtpin/gtpin_callbacks.cpp @@ -67,7 +67,7 @@ void gtpinNotifyKernelCreate(cl_kernel kernel) { auto pKernel = pMultiDeviceKernel->getDefaultKernel(); auto &device = pKernel->getDevices()[0]->getDevice(); auto rootDeviceIndex = device.getRootDeviceIndex(); - size_t gtpinBTI = pKernel->getNumberOfBindingTableStates(rootDeviceIndex); + size_t gtpinBTI = pKernel->getNumberOfBindingTableStates(); // Enlarge local copy of SSH by 1 SS GFXCORE_FAMILY genFamily = device.getHardwareInfo().platform.eRenderCoreFamily; GTPinHwHelper >pinHelper = GTPinHwHelper::get(genFamily); @@ -141,7 +141,7 @@ void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue) { } GFXCORE_FAMILY genFamily = device.getHardwareInfo().platform.eRenderCoreFamily; GTPinHwHelper >pinHelper = GTPinHwHelper::get(genFamily); - size_t gtpinBTI = pKernel->getNumberOfBindingTableStates(rootDeviceIndex) - 1; + size_t gtpinBTI = pKernel->getNumberOfBindingTableStates() - 1; void *pSurfaceState = gtpinHelper.getSurfaceState(pKernel, gtpinBTI, rootDeviceIndex); cl_mem buffer = (cl_mem)resource; auto pBuffer = castToObjectOrAbort(buffer); diff --git a/opencl/source/gtpin/gtpin_hw_helper.inl b/opencl/source/gtpin/gtpin_hw_helper.inl index a8a9579585..2e6283478d 100644 --- a/opencl/source/gtpin/gtpin_hw_helper.inl +++ b/opencl/source/gtpin/gtpin_hw_helper.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -27,7 +27,7 @@ bool GTPinHwHelperHw::addSurfaceState(Kernel *pKernel, uint32_t rootD size_t ssSize = sizeof(RENDER_SURFACE_STATE); size_t btsSize = sizeof(BINDING_TABLE_STATE); size_t sizeToEnlarge = ssSize + btsSize; - size_t currBTOffset = pKernel->getBindingTableOffset(rootDeviceIndex); + size_t currBTOffset = pKernel->getBindingTableOffset(); size_t currSurfaceStateSize = currBTOffset; char *pSsh = static_cast(pKernel->getSurfaceStateHeap(rootDeviceIndex)); char *pNewSsh = new char[sshSize + sizeToEnlarge]; @@ -35,12 +35,12 @@ bool GTPinHwHelperHw::addSurfaceState(Kernel *pKernel, uint32_t rootD RENDER_SURFACE_STATE *pSS = reinterpret_cast(pNewSsh + currSurfaceStateSize); *pSS = GfxFamily::cmdInitRenderSurfaceState; size_t newSurfaceStateSize = currSurfaceStateSize + ssSize; - size_t currBTCount = pKernel->getNumberOfBindingTableStates(rootDeviceIndex); + size_t currBTCount = pKernel->getNumberOfBindingTableStates(); memcpy_s(pNewSsh + newSurfaceStateSize, sshSize + sizeToEnlarge - newSurfaceStateSize, pSsh + currBTOffset, currBTCount * btsSize); BINDING_TABLE_STATE *pNewBTS = reinterpret_cast(pNewSsh + newSurfaceStateSize + currBTCount * btsSize); *pNewBTS = GfxFamily::cmdInitBindingTableState; pNewBTS->setSurfaceStatePointer((uint64_t)currBTOffset); - pKernel->resizeSurfaceStateHeap(rootDeviceIndex, pNewSsh, sshSize + sizeToEnlarge, currBTCount + 1, newSurfaceStateSize); + pKernel->resizeSurfaceStateHeap(pNewSsh, sshSize + sizeToEnlarge, currBTCount + 1, newSurfaceStateSize); return true; } @@ -48,10 +48,10 @@ template void *GTPinHwHelperHw::getSurfaceState(Kernel *pKernel, size_t bti, uint32_t rootDeviceIndex) { using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE; - if ((nullptr == pKernel->getSurfaceStateHeap(rootDeviceIndex)) || (bti >= pKernel->getNumberOfBindingTableStates(rootDeviceIndex))) { + if ((nullptr == pKernel->getSurfaceStateHeap(rootDeviceIndex)) || (bti >= pKernel->getNumberOfBindingTableStates())) { return nullptr; } - auto *pBts = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), (pKernel->getBindingTableOffset(rootDeviceIndex) + bti * sizeof(BINDING_TABLE_STATE)))); + auto *pBts = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), (pKernel->getBindingTableOffset() + bti * sizeof(BINDING_TABLE_STATE)))); auto pSurfaceState = ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pBts->getSurfaceStatePointer()); return pSurfaceState; } diff --git a/opencl/source/helpers/dispatch_info.cpp b/opencl/source/helpers/dispatch_info.cpp index 6e4dc93177..32801a9ade 100644 --- a/opencl/source/helpers/dispatch_info.cpp +++ b/opencl/source/helpers/dispatch_info.cpp @@ -11,7 +11,7 @@ namespace NEO { bool DispatchInfo::usesSlm() const { - return (kernel == nullptr) ? false : kernel->getSlmTotalSize(pClDevice->getRootDeviceIndex()) > 0; + return (kernel == nullptr) ? false : kernel->getSlmTotalSize() > 0; } bool DispatchInfo::usesStatelessPrintfSurface() const { diff --git a/opencl/source/helpers/hardware_commands_helper.h b/opencl/source/helpers/hardware_commands_helper.h index c932761498..a0e7e83592 100644 --- a/opencl/source/helpers/hardware_commands_helper.h +++ b/opencl/source/helpers/hardware_commands_helper.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2020 Intel Corporation + * Copyright (C) 2017-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -77,8 +77,7 @@ struct HardwareCommandsHelper : public PerThreadDataHelper { Kernel &kernel, bool inlineDataProgrammingRequired, WALKER_TYPE *walkerCmd, - uint32_t &sizeCrossThreadData, - uint32_t rootDeviceIndex); + uint32_t &sizeCrossThreadData); static size_t sendIndirectState( LinearStream &commandStream, diff --git a/opencl/source/helpers/hardware_commands_helper_base.inl b/opencl/source/helpers/hardware_commands_helper_base.inl index 13c9381ae6..a5a19f5a63 100644 --- a/opencl/source/helpers/hardware_commands_helper_base.inl +++ b/opencl/source/helpers/hardware_commands_helper_base.inl @@ -63,7 +63,7 @@ size_t HardwareCommandsHelper::getSizeRequiredIOH( auto numChannels = kernelInfo.kernelDescriptor.kernelAttributes.numLocalIdChannels; uint32_t grfSize = sizeof(typename GfxFamily::GRF); - return alignUp((kernel.getCrossThreadDataSize(rootDeviceIndex) + + return alignUp((kernel.getCrossThreadDataSize() + getPerThreadDataSizeTotal(kernelInfo.getMaxSimdSize(), grfSize, numChannels, localWorkSize)), WALKER_TYPE::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); } @@ -174,7 +174,7 @@ size_t HardwareCommandsHelper::sendInterfaceDescriptorData( interfaceDescriptor.setDenormMode(INTERFACE_DESCRIPTOR_DATA::DENORM_MODE_SETBYKERNEL); - auto slmTotalSize = kernel.getSlmTotalSize(rootDeviceIndex); + auto slmTotalSize = kernel.getSlmTotalSize(); setGrfInfo(&interfaceDescriptor, kernel, sizeCrossThreadData, sizePerThreadData, rootDeviceIndex); EncodeDispatchKernel::appendAdditionalIDDFields(&interfaceDescriptor, hardwareInfo, threadsPerThreadGroup, slmTotalSize, SlmPolicy::SlmPolicyNone); @@ -237,7 +237,7 @@ size_t HardwareCommandsHelper::sendIndirectState( auto dstBindingTablePointer = EncodeSurfaceState::pushBindingTableAndSurfaceStates(ssh, kernelInfo.kernelDescriptor.payloadMappings.bindingTable.numEntries, kernel.getSurfaceStateHeap(rootDeviceIndex), kernel.getSurfaceStateHeapSize(rootDeviceIndex), - kernel.getNumberOfBindingTableStates(rootDeviceIndex), kernel.getBindingTableOffset(rootDeviceIndex)); + kernel.getNumberOfBindingTableStates(), kernel.getBindingTableOffset()); // Copy our sampler state if it exists const auto &samplerTable = kernelInfo.kernelDescriptor.payloadMappings.samplerTable; @@ -254,11 +254,11 @@ size_t HardwareCommandsHelper::sendIndirectState( auto threadsPerThreadGroup = static_cast(getThreadsPerWG(simd, localWorkItems)); auto numChannels = static_cast(kernelInfo.kernelDescriptor.kernelAttributes.numLocalIdChannels); - uint32_t sizeCrossThreadData = kernel.getCrossThreadDataSize(rootDeviceIndex); + uint32_t sizeCrossThreadData = kernel.getCrossThreadDataSize(); size_t offsetCrossThreadData = HardwareCommandsHelper::sendCrossThreadData( ioh, kernel, inlineDataProgrammingRequired, - walkerCmd, sizeCrossThreadData, rootDeviceIndex); + walkerCmd, sizeCrossThreadData); size_t sizePerThreadDataTotal = 0; size_t sizePerThreadData = 0; @@ -277,7 +277,7 @@ size_t HardwareCommandsHelper::sendIndirectState( uint64_t offsetInterfaceDescriptor = offsetInterfaceDescriptorTable + interfaceDescriptorIndex * sizeof(INTERFACE_DESCRIPTOR_DATA); - auto bindingTablePrefetchSize = std::min(31u, static_cast(kernel.getNumberOfBindingTableStates(rootDeviceIndex))); + auto bindingTablePrefetchSize = std::min(31u, static_cast(kernel.getNumberOfBindingTableStates())); if (resetBindingTablePrefetch(kernel)) { bindingTablePrefetchSize = 0; } diff --git a/opencl/source/helpers/hardware_commands_helper_bdw_plus.inl b/opencl/source/helpers/hardware_commands_helper_bdw_plus.inl index fbda56410b..dc6d13fd47 100644 --- a/opencl/source/helpers/hardware_commands_helper_bdw_plus.inl +++ b/opencl/source/helpers/hardware_commands_helper_bdw_plus.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -127,13 +127,12 @@ size_t HardwareCommandsHelper::sendCrossThreadData( Kernel &kernel, bool inlineDataProgrammingRequired, WALKER_TYPE *walkerCmd, - uint32_t &sizeCrossThreadData, - uint32_t rootDeviceIndex) { + uint32_t &sizeCrossThreadData) { indirectHeap.align(WALKER_TYPE::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); auto offsetCrossThreadData = indirectHeap.getUsed(); char *pDest = static_cast(indirectHeap.getSpace(sizeCrossThreadData)); - memcpy_s(pDest, sizeCrossThreadData, kernel.getCrossThreadData(rootDeviceIndex), sizeCrossThreadData); + memcpy_s(pDest, sizeCrossThreadData, kernel.getCrossThreadData(), sizeCrossThreadData); if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) { FlatBatchBufferHelper::fixCrossThreadDataInfo(kernel.getPatchInfoDataList(), offsetCrossThreadData, indirectHeap.getGraphicsAllocation()->getGpuAddress()); diff --git a/opencl/source/helpers/task_information.cpp b/opencl/source/helpers/task_information.cpp index 8a14719391..39c3d05235 100644 --- a/opencl/source/helpers/task_information.cpp +++ b/opencl/source/helpers/task_information.cpp @@ -198,7 +198,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate scheduler.makeResident(commandStreamReceiver); // Update SLM usage - slmUsed |= scheduler.getSlmTotalSize(rootDeviceIndex) > 0; + slmUsed |= scheduler.getSlmTotalSize() > 0; this->kernel->getProgram()->getBlockKernelManager()->makeInternalAllocationsResident(commandStreamReceiver); } diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index 58f648d165..19af26aab5 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -73,25 +73,22 @@ Kernel::Kernel(Program *programArg, const KernelInfoContainer &kernelInfosArg, C deviceVector(programArg->getDevices()), kernelInfos(kernelInfosArg), defaultRootDeviceIndex(clDeviceArg.getRootDeviceIndex()) { - kernelDeviceInfos.resize(program->getMaxRootDeviceIndex() + 1); program->retain(); program->retainForKernel(); imageTransformer.reset(new ImageTransformer); auto rootDeviceIndex = defaultRootDeviceIndex; - kernelDeviceInfos[rootDeviceIndex].maxKernelWorkGroupSize = static_cast(clDevice.getSharedDeviceInfo().maxWorkGroupSize); - kernelDeviceInfos[rootDeviceIndex].slmTotalSize = kernelInfosArg[rootDeviceIndex]->workloadInfo.slmStaticSize; + maxKernelWorkGroupSize = static_cast(clDevice.getSharedDeviceInfo().maxWorkGroupSize); + slmTotalSize = kernelInfosArg[rootDeviceIndex]->workloadInfo.slmStaticSize; } Kernel::~Kernel() { - for (auto &kernelDeviceInfo : kernelDeviceInfos) { - delete[] kernelDeviceInfo.crossThreadData; - kernelDeviceInfo.crossThreadData = nullptr; - kernelDeviceInfo.crossThreadDataSize = 0; + delete[] crossThreadData; + crossThreadData = nullptr; + crossThreadDataSize = 0; - if (kernelDeviceInfo.privateSurface) { - program->peekExecutionEnvironment().memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(kernelDeviceInfo.privateSurface); - kernelDeviceInfo.privateSurface = nullptr; - } + if (privateSurface) { + program->peekExecutionEnvironment().memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(privateSurface); + privateSurface = nullptr; } if (kernelReflectionSurface) { @@ -138,7 +135,6 @@ inline void patch(const SrcT &src, void *dst, uint32_t dstOffsetBytes) { void Kernel::patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, GraphicsAllocation &allocation, const Device &device, const ArgDescPointer &arg) { auto rootDeviceIndex = device.getRootDeviceIndex(); - void *crossThreadData = getCrossThreadData(rootDeviceIndex); if ((nullptr != crossThreadData) && isValidOffset(arg.stateless)) { auto pp = ptrOffset(crossThreadData, arg.stateless); uintptr_t addressToPatch = reinterpret_cast(ptrToPatchInCrossThreadData); @@ -164,7 +160,6 @@ void Kernel::patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, Graphic auto rootDeviceIndex = device.getRootDeviceIndex(); uint32_t pointerSize = patch.DataParamSize; - void *crossThreadData = getCrossThreadData(rootDeviceIndex); if (crossThreadData != nullptr) { uint32_t crossThreadDataOffset = patch.DataParamOffset; auto pp = ptrOffset(crossThreadData, crossThreadDataOffset); @@ -200,7 +195,6 @@ cl_int Kernel::initialize() { reconfigureKernel(rootDeviceIndex); auto &hwInfo = pClDevice->getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); - auto &kernelDeviceInfo = kernelDeviceInfos[rootDeviceIndex]; auto &kernelInfo = *kernelInfos[rootDeviceIndex]; auto &kernelDescriptor = kernelInfo.kernelDescriptor; auto maxSimdSize = kernelInfo.getMaxSimdSize(); @@ -211,133 +205,133 @@ cl_int Kernel::initialize() { return CL_INVALID_KERNEL; } - kernelDeviceInfo.crossThreadDataSize = kernelDescriptor.kernelAttributes.crossThreadDataSize; + crossThreadDataSize = kernelDescriptor.kernelAttributes.crossThreadDataSize; // now allocate our own cross-thread data, if necessary - if (kernelDeviceInfo.crossThreadDataSize) { - kernelDeviceInfo.crossThreadData = new char[kernelDeviceInfo.crossThreadDataSize]; + if (crossThreadDataSize) { + crossThreadData = new char[crossThreadDataSize]; if (kernelInfo.crossThreadData) { - memcpy_s(kernelDeviceInfo.crossThreadData, kernelDeviceInfo.crossThreadDataSize, - kernelInfo.crossThreadData, kernelDeviceInfo.crossThreadDataSize); + memcpy_s(crossThreadData, crossThreadDataSize, + kernelInfo.crossThreadData, crossThreadDataSize); } else { - memset(kernelDeviceInfo.crossThreadData, 0x00, kernelDeviceInfo.crossThreadDataSize); + memset(crossThreadData, 0x00, crossThreadDataSize); } - auto crossThread = reinterpret_cast(kernelDeviceInfo.crossThreadData); - kernelDeviceInfo.globalWorkOffsetX = workloadInfo.globalWorkOffsetOffsets[0] != WorkloadInfo::undefinedOffset - ? ptrOffset(crossThread, workloadInfo.globalWorkOffsetOffsets[0]) - : kernelDeviceInfo.globalWorkOffsetX; - kernelDeviceInfo.globalWorkOffsetY = workloadInfo.globalWorkOffsetOffsets[1] != WorkloadInfo::undefinedOffset - ? ptrOffset(crossThread, workloadInfo.globalWorkOffsetOffsets[1]) - : kernelDeviceInfo.globalWorkOffsetY; - kernelDeviceInfo.globalWorkOffsetZ = workloadInfo.globalWorkOffsetOffsets[2] != WorkloadInfo::undefinedOffset - ? ptrOffset(crossThread, workloadInfo.globalWorkOffsetOffsets[2]) - : kernelDeviceInfo.globalWorkOffsetZ; + auto crossThread = reinterpret_cast(crossThreadData); + globalWorkOffsetX = workloadInfo.globalWorkOffsetOffsets[0] != WorkloadInfo::undefinedOffset + ? ptrOffset(crossThread, workloadInfo.globalWorkOffsetOffsets[0]) + : globalWorkOffsetX; + globalWorkOffsetY = workloadInfo.globalWorkOffsetOffsets[1] != WorkloadInfo::undefinedOffset + ? ptrOffset(crossThread, workloadInfo.globalWorkOffsetOffsets[1]) + : globalWorkOffsetY; + globalWorkOffsetZ = workloadInfo.globalWorkOffsetOffsets[2] != WorkloadInfo::undefinedOffset + ? ptrOffset(crossThread, workloadInfo.globalWorkOffsetOffsets[2]) + : globalWorkOffsetZ; - kernelDeviceInfo.localWorkSizeX = workloadInfo.localWorkSizeOffsets[0] != WorkloadInfo::undefinedOffset - ? ptrOffset(crossThread, workloadInfo.localWorkSizeOffsets[0]) - : kernelDeviceInfo.localWorkSizeX; - kernelDeviceInfo.localWorkSizeY = workloadInfo.localWorkSizeOffsets[1] != WorkloadInfo::undefinedOffset - ? ptrOffset(crossThread, workloadInfo.localWorkSizeOffsets[1]) - : kernelDeviceInfo.localWorkSizeY; - kernelDeviceInfo.localWorkSizeZ = workloadInfo.localWorkSizeOffsets[2] != WorkloadInfo::undefinedOffset - ? ptrOffset(crossThread, workloadInfo.localWorkSizeOffsets[2]) - : kernelDeviceInfo.localWorkSizeZ; + localWorkSizeX = workloadInfo.localWorkSizeOffsets[0] != WorkloadInfo::undefinedOffset + ? ptrOffset(crossThread, workloadInfo.localWorkSizeOffsets[0]) + : localWorkSizeX; + localWorkSizeY = workloadInfo.localWorkSizeOffsets[1] != WorkloadInfo::undefinedOffset + ? ptrOffset(crossThread, workloadInfo.localWorkSizeOffsets[1]) + : localWorkSizeY; + localWorkSizeZ = workloadInfo.localWorkSizeOffsets[2] != WorkloadInfo::undefinedOffset + ? ptrOffset(crossThread, workloadInfo.localWorkSizeOffsets[2]) + : localWorkSizeZ; - kernelDeviceInfo.localWorkSizeX2 = workloadInfo.localWorkSizeOffsets2[0] != WorkloadInfo::undefinedOffset - ? ptrOffset(crossThread, workloadInfo.localWorkSizeOffsets2[0]) - : kernelDeviceInfo.localWorkSizeX2; - kernelDeviceInfo.localWorkSizeY2 = workloadInfo.localWorkSizeOffsets2[1] != WorkloadInfo::undefinedOffset - ? ptrOffset(crossThread, workloadInfo.localWorkSizeOffsets2[1]) - : kernelDeviceInfo.localWorkSizeY2; - kernelDeviceInfo.localWorkSizeZ2 = workloadInfo.localWorkSizeOffsets2[2] != WorkloadInfo::undefinedOffset - ? ptrOffset(crossThread, workloadInfo.localWorkSizeOffsets2[2]) - : kernelDeviceInfo.localWorkSizeZ2; + localWorkSizeX2 = workloadInfo.localWorkSizeOffsets2[0] != WorkloadInfo::undefinedOffset + ? ptrOffset(crossThread, workloadInfo.localWorkSizeOffsets2[0]) + : localWorkSizeX2; + localWorkSizeY2 = workloadInfo.localWorkSizeOffsets2[1] != WorkloadInfo::undefinedOffset + ? ptrOffset(crossThread, workloadInfo.localWorkSizeOffsets2[1]) + : localWorkSizeY2; + localWorkSizeZ2 = workloadInfo.localWorkSizeOffsets2[2] != WorkloadInfo::undefinedOffset + ? ptrOffset(crossThread, workloadInfo.localWorkSizeOffsets2[2]) + : localWorkSizeZ2; - kernelDeviceInfo.globalWorkSizeX = workloadInfo.globalWorkSizeOffsets[0] != WorkloadInfo::undefinedOffset - ? ptrOffset(crossThread, workloadInfo.globalWorkSizeOffsets[0]) - : kernelDeviceInfo.globalWorkSizeX; - kernelDeviceInfo.globalWorkSizeY = workloadInfo.globalWorkSizeOffsets[1] != WorkloadInfo::undefinedOffset - ? ptrOffset(crossThread, workloadInfo.globalWorkSizeOffsets[1]) - : kernelDeviceInfo.globalWorkSizeY; - kernelDeviceInfo.globalWorkSizeZ = workloadInfo.globalWorkSizeOffsets[2] != WorkloadInfo::undefinedOffset - ? ptrOffset(crossThread, workloadInfo.globalWorkSizeOffsets[2]) - : kernelDeviceInfo.globalWorkSizeZ; + globalWorkSizeX = workloadInfo.globalWorkSizeOffsets[0] != WorkloadInfo::undefinedOffset + ? ptrOffset(crossThread, workloadInfo.globalWorkSizeOffsets[0]) + : globalWorkSizeX; + globalWorkSizeY = workloadInfo.globalWorkSizeOffsets[1] != WorkloadInfo::undefinedOffset + ? ptrOffset(crossThread, workloadInfo.globalWorkSizeOffsets[1]) + : globalWorkSizeY; + globalWorkSizeZ = workloadInfo.globalWorkSizeOffsets[2] != WorkloadInfo::undefinedOffset + ? ptrOffset(crossThread, workloadInfo.globalWorkSizeOffsets[2]) + : globalWorkSizeZ; - kernelDeviceInfo.enqueuedLocalWorkSizeX = workloadInfo.enqueuedLocalWorkSizeOffsets[0] != WorkloadInfo::undefinedOffset - ? ptrOffset(crossThread, workloadInfo.enqueuedLocalWorkSizeOffsets[0]) - : kernelDeviceInfo.enqueuedLocalWorkSizeX; - kernelDeviceInfo.enqueuedLocalWorkSizeY = workloadInfo.enqueuedLocalWorkSizeOffsets[1] != WorkloadInfo::undefinedOffset - ? ptrOffset(crossThread, workloadInfo.enqueuedLocalWorkSizeOffsets[1]) - : kernelDeviceInfo.enqueuedLocalWorkSizeY; - kernelDeviceInfo.enqueuedLocalWorkSizeZ = workloadInfo.enqueuedLocalWorkSizeOffsets[2] != WorkloadInfo::undefinedOffset - ? ptrOffset(crossThread, workloadInfo.enqueuedLocalWorkSizeOffsets[2]) - : kernelDeviceInfo.enqueuedLocalWorkSizeZ; + enqueuedLocalWorkSizeX = workloadInfo.enqueuedLocalWorkSizeOffsets[0] != WorkloadInfo::undefinedOffset + ? ptrOffset(crossThread, workloadInfo.enqueuedLocalWorkSizeOffsets[0]) + : enqueuedLocalWorkSizeX; + enqueuedLocalWorkSizeY = workloadInfo.enqueuedLocalWorkSizeOffsets[1] != WorkloadInfo::undefinedOffset + ? ptrOffset(crossThread, workloadInfo.enqueuedLocalWorkSizeOffsets[1]) + : enqueuedLocalWorkSizeY; + enqueuedLocalWorkSizeZ = workloadInfo.enqueuedLocalWorkSizeOffsets[2] != WorkloadInfo::undefinedOffset + ? ptrOffset(crossThread, workloadInfo.enqueuedLocalWorkSizeOffsets[2]) + : enqueuedLocalWorkSizeZ; - kernelDeviceInfo.numWorkGroupsX = workloadInfo.numWorkGroupsOffset[0] != WorkloadInfo::undefinedOffset - ? ptrOffset(crossThread, workloadInfo.numWorkGroupsOffset[0]) - : kernelDeviceInfo.numWorkGroupsX; - kernelDeviceInfo.numWorkGroupsY = workloadInfo.numWorkGroupsOffset[1] != WorkloadInfo::undefinedOffset - ? ptrOffset(crossThread, workloadInfo.numWorkGroupsOffset[1]) - : kernelDeviceInfo.numWorkGroupsY; - kernelDeviceInfo.numWorkGroupsZ = workloadInfo.numWorkGroupsOffset[2] != WorkloadInfo::undefinedOffset - ? ptrOffset(crossThread, workloadInfo.numWorkGroupsOffset[2]) - : kernelDeviceInfo.numWorkGroupsZ; + numWorkGroupsX = workloadInfo.numWorkGroupsOffset[0] != WorkloadInfo::undefinedOffset + ? ptrOffset(crossThread, workloadInfo.numWorkGroupsOffset[0]) + : numWorkGroupsX; + numWorkGroupsY = workloadInfo.numWorkGroupsOffset[1] != WorkloadInfo::undefinedOffset + ? ptrOffset(crossThread, workloadInfo.numWorkGroupsOffset[1]) + : numWorkGroupsY; + numWorkGroupsZ = workloadInfo.numWorkGroupsOffset[2] != WorkloadInfo::undefinedOffset + ? ptrOffset(crossThread, workloadInfo.numWorkGroupsOffset[2]) + : numWorkGroupsZ; - kernelDeviceInfo.maxWorkGroupSizeForCrossThreadData = workloadInfo.maxWorkGroupSizeOffset != WorkloadInfo::undefinedOffset - ? ptrOffset(crossThread, workloadInfo.maxWorkGroupSizeOffset) - : kernelDeviceInfo.maxWorkGroupSizeForCrossThreadData; - kernelDeviceInfo.workDim = workloadInfo.workDimOffset != WorkloadInfo::undefinedOffset - ? ptrOffset(crossThread, workloadInfo.workDimOffset) - : kernelDeviceInfo.workDim; - kernelDeviceInfo.dataParameterSimdSize = workloadInfo.simdSizeOffset != WorkloadInfo::undefinedOffset ? ptrOffset(crossThread, workloadInfo.simdSizeOffset) : kernelDeviceInfo.dataParameterSimdSize; - kernelDeviceInfo.parentEventOffset = workloadInfo.parentEventOffset != WorkloadInfo::undefinedOffset - ? ptrOffset(crossThread, workloadInfo.parentEventOffset) - : kernelDeviceInfo.parentEventOffset; - kernelDeviceInfo.preferredWkgMultipleOffset = workloadInfo.preferredWkgMultipleOffset != WorkloadInfo::undefinedOffset - ? ptrOffset(crossThread, workloadInfo.preferredWkgMultipleOffset) - : kernelDeviceInfo.preferredWkgMultipleOffset; + maxWorkGroupSizeForCrossThreadData = workloadInfo.maxWorkGroupSizeOffset != WorkloadInfo::undefinedOffset + ? ptrOffset(crossThread, workloadInfo.maxWorkGroupSizeOffset) + : maxWorkGroupSizeForCrossThreadData; + workDim = workloadInfo.workDimOffset != WorkloadInfo::undefinedOffset + ? ptrOffset(crossThread, workloadInfo.workDimOffset) + : workDim; + dataParameterSimdSize = workloadInfo.simdSizeOffset != WorkloadInfo::undefinedOffset ? ptrOffset(crossThread, workloadInfo.simdSizeOffset) : dataParameterSimdSize; + parentEventOffset = workloadInfo.parentEventOffset != WorkloadInfo::undefinedOffset + ? ptrOffset(crossThread, workloadInfo.parentEventOffset) + : parentEventOffset; + preferredWkgMultipleOffset = workloadInfo.preferredWkgMultipleOffset != WorkloadInfo::undefinedOffset + ? ptrOffset(crossThread, workloadInfo.preferredWkgMultipleOffset) + : preferredWkgMultipleOffset; - *kernelDeviceInfo.maxWorkGroupSizeForCrossThreadData = kernelDeviceInfo.maxKernelWorkGroupSize; - *kernelDeviceInfo.dataParameterSimdSize = maxSimdSize; - *kernelDeviceInfo.preferredWkgMultipleOffset = maxSimdSize; - *kernelDeviceInfo.parentEventOffset = WorkloadInfo::invalidParentEvent; + *maxWorkGroupSizeForCrossThreadData = maxKernelWorkGroupSize; + *dataParameterSimdSize = maxSimdSize; + *preferredWkgMultipleOffset = maxSimdSize; + *parentEventOffset = WorkloadInfo::invalidParentEvent; } // allocate our own SSH, if necessary - kernelDeviceInfo.sshLocalSize = heapInfo.SurfaceStateHeapSize; + sshLocalSize = heapInfo.SurfaceStateHeapSize; - if (kernelDeviceInfo.sshLocalSize) { - kernelDeviceInfo.pSshLocal = std::make_unique(kernelDeviceInfo.sshLocalSize); + if (sshLocalSize) { + pSshLocal = std::make_unique(sshLocalSize); // copy the ssh into our local copy - memcpy_s(kernelDeviceInfo.pSshLocal.get(), kernelDeviceInfo.sshLocalSize, - heapInfo.pSsh, kernelDeviceInfo.sshLocalSize); + memcpy_s(pSshLocal.get(), sshLocalSize, + heapInfo.pSsh, sshLocalSize); } - kernelDeviceInfo.numberOfBindingTableStates = kernelDescriptor.payloadMappings.bindingTable.numEntries; - kernelDeviceInfo.localBindingTableOffset = kernelDescriptor.payloadMappings.bindingTable.tableOffset; + numberOfBindingTableStates = kernelDescriptor.payloadMappings.bindingTable.numEntries; + localBindingTableOffset = kernelDescriptor.payloadMappings.bindingTable.tableOffset; // patch crossthread data and ssh with inline surfaces, if necessary auto perHwThreadPrivateMemorySize = kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize; if (perHwThreadPrivateMemorySize) { - kernelDeviceInfo.privateSurfaceSize = KernelHelper::getPrivateSurfaceSize(perHwThreadPrivateMemorySize, pClDevice->getSharedDeviceInfo().computeUnitsUsedForScratch); + privateSurfaceSize = KernelHelper::getPrivateSurfaceSize(perHwThreadPrivateMemorySize, pClDevice->getSharedDeviceInfo().computeUnitsUsedForScratch); - DEBUG_BREAK_IF(kernelDeviceInfo.privateSurfaceSize == 0); - if (kernelDeviceInfo.privateSurfaceSize > std::numeric_limits::max()) { + DEBUG_BREAK_IF(privateSurfaceSize == 0); + if (privateSurfaceSize > std::numeric_limits::max()) { return CL_OUT_OF_RESOURCES; } - kernelDeviceInfo.privateSurface = executionEnvironment.memoryManager->allocateGraphicsMemoryWithProperties( + privateSurface = executionEnvironment.memoryManager->allocateGraphicsMemoryWithProperties( {rootDeviceIndex, - static_cast(kernelDeviceInfo.privateSurfaceSize), + static_cast(privateSurfaceSize), GraphicsAllocation::AllocationType::PRIVATE_SURFACE, pClDevice->getDeviceBitfield()}); - if (kernelDeviceInfo.privateSurface == nullptr) { + if (privateSurface == nullptr) { return CL_OUT_OF_RESOURCES; } const auto &patch = kernelDescriptor.payloadMappings.implicitArgs.privateMemoryAddress; - patchWithImplicitSurface(reinterpret_cast(kernelDeviceInfo.privateSurface->getGpuAddressToPatch()), *kernelDeviceInfo.privateSurface, pClDevice->getDevice(), patch); + patchWithImplicitSurface(reinterpret_cast(privateSurface->getGpuAddressToPatch()), *privateSurface, pClDevice->getDevice(), patch); } if (isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.stateless)) { DEBUG_BREAK_IF(program->getConstantSurface(rootDeviceIndex) == nullptr); @@ -391,7 +385,7 @@ cl_int Kernel::initialize() { debugEnabled = true; } auto numArgs = kernelInfo.kernelArgInfo.size(); - kernelDeviceInfo.slmSizes.resize(numArgs); + slmSizes.resize(numArgs); this->kernelHasIndirectAccess |= kernelInfo.kernelDescriptor.kernelAttributes.hasNonKernelArgLoad || kernelInfo.kernelDescriptor.kernelAttributes.hasNonKernelArgStore || @@ -449,10 +443,9 @@ cl_int Kernel::initialize() { cl_int Kernel::cloneKernel(Kernel *pSourceKernel) { // copy cross thread data to store arguments set to source kernel with clSetKernelArg on immediate data (non-pointer types) - auto rootDeviceIndex = defaultRootDeviceIndex; - memcpy_s(kernelDeviceInfos[rootDeviceIndex].crossThreadData, kernelDeviceInfos[rootDeviceIndex].crossThreadDataSize, - pSourceKernel->kernelDeviceInfos[rootDeviceIndex].crossThreadData, pSourceKernel->kernelDeviceInfos[rootDeviceIndex].crossThreadDataSize); - DEBUG_BREAK_IF(pSourceKernel->kernelDeviceInfos[rootDeviceIndex].crossThreadDataSize != kernelDeviceInfos[rootDeviceIndex].crossThreadDataSize); + memcpy_s(crossThreadData, crossThreadDataSize, + pSourceKernel->crossThreadData, pSourceKernel->crossThreadDataSize); + DEBUG_BREAK_IF(pSourceKernel->crossThreadDataSize != crossThreadDataSize); // copy arguments set to source kernel with clSetKernelArg or clSetKernelArgSVMPointer for (uint32_t i = 0; i < pSourceKernel->kernelArguments.size(); i++) { @@ -641,7 +634,7 @@ cl_int Kernel::getWorkGroupInfo(ClDevice &device, cl_kernel_work_group_info para switch (paramName) { case CL_KERNEL_WORK_GROUP_SIZE: - maxWorkgroupSize = kernelDeviceInfos[rootDeviceIndex].maxKernelWorkGroupSize; + maxWorkgroupSize = maxKernelWorkGroupSize; if (DebugManager.flags.UseMaxSimdSizeToDeduceMaxWorkgroupSize.get()) { auto divisionSize = CommonConstants::maximalSimdSize / kernelInfo.getMaxSimdSize(); maxWorkgroupSize /= divisionSize; @@ -704,7 +697,7 @@ cl_int Kernel::getSubGroupInfo(ClDevice &clDevice, cl_kernel_sub_group_info para auto rootDeviceIndex = clDevice.getRootDeviceIndex(); const auto &kernelInfo = getKernelInfo(rootDeviceIndex); auto maxSimdSize = static_cast(kernelInfo.getMaxSimdSize()); - auto maxRequiredWorkGroupSize = static_cast(kernelInfo.getMaxRequiredWorkGroupSize(getMaxKernelWorkGroupSize(rootDeviceIndex))); + auto maxRequiredWorkGroupSize = static_cast(kernelInfo.getMaxRequiredWorkGroupSize(getMaxKernelWorkGroupSize())); auto largestCompiledSIMDSize = static_cast(kernelInfo.getMaxSimdSize()); GetInfoHelper info(paramValue, paramValueSize, paramValueSizeRet); @@ -850,7 +843,7 @@ void Kernel::setStartOffset(uint32_t offset) { } void *Kernel::getSurfaceStateHeap(uint32_t rootDeviceIndex) const { - return kernelInfos[rootDeviceIndex]->usesSsh ? kernelDeviceInfos[rootDeviceIndex].pSshLocal.get() : nullptr; + return kernelInfos[rootDeviceIndex]->usesSsh ? pSshLocal.get() : nullptr; } size_t Kernel::getDynamicStateHeapSize(uint32_t rootDeviceIndex) const { @@ -863,19 +856,19 @@ const void *Kernel::getDynamicStateHeap(uint32_t rootDeviceIndex) const { size_t Kernel::getSurfaceStateHeapSize(uint32_t rootDeviceIndex) const { return kernelInfos[rootDeviceIndex]->usesSsh - ? kernelDeviceInfos[rootDeviceIndex].sshLocalSize + ? sshLocalSize : 0; } -size_t Kernel::getNumberOfBindingTableStates(uint32_t rootDeviceIndex) const { - return kernelDeviceInfos[rootDeviceIndex].numberOfBindingTableStates; +size_t Kernel::getNumberOfBindingTableStates() const { + return numberOfBindingTableStates; } -void Kernel::resizeSurfaceStateHeap(uint32_t rootDeviceIndex, void *pNewSsh, size_t newSshSize, size_t newBindingTableCount, size_t newBindingTableOffset) { - kernelDeviceInfos[rootDeviceIndex].pSshLocal.reset(static_cast(pNewSsh)); - kernelDeviceInfos[rootDeviceIndex].sshLocalSize = static_cast(newSshSize); - kernelDeviceInfos[rootDeviceIndex].numberOfBindingTableStates = newBindingTableCount; - kernelDeviceInfos[rootDeviceIndex].localBindingTableOffset = newBindingTableOffset; +void Kernel::resizeSurfaceStateHeap(void *pNewSsh, size_t newSshSize, size_t newBindingTableCount, size_t newBindingTableOffset) { + pSshLocal.reset(static_cast(pNewSsh)); + sshLocalSize = static_cast(newSshSize); + numberOfBindingTableStates = newBindingTableCount; + localBindingTableOffset = newBindingTableOffset; } cl_int Kernel::setArg(uint32_t argIndex, size_t argSize, const void *argVal) { @@ -936,7 +929,7 @@ void *Kernel::patchBufferOffset(const KernelArgInfo &argInfo, void *svmPtr, Grap DEBUG_BREAK_IF(ptrDiff(svmPtr, ptrToPatch) != static_cast(ptrDiff(svmPtr, ptrToPatch))); uint32_t offsetToPatch = static_cast(ptrDiff(svmPtr, ptrToPatch)); - patch(offsetToPatch, getCrossThreadData(rootDeviceIndex), argInfo.offsetBufferOffset); + patch(offsetToPatch, crossThreadData, argInfo.offsetBufferOffset); return ptrToPatch; } @@ -974,7 +967,7 @@ cl_int Kernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocatio void *ptrToPatch = patchBufferOffset(kernelArgInfo, svmPtr, svmAlloc, rootDeviceIndex); - auto patchLocation = ptrOffset(getCrossThreadData(rootDeviceIndex), + auto patchLocation = ptrOffset(crossThreadData, kernelArgInfo.kernelArgPatchInfoVector[0].crossthreadOffset); auto patchSize = kernelArgInfo.kernelArgPatchInfoVector[0].size; @@ -1143,7 +1136,7 @@ uint32_t Kernel::getMaxWorkGroupCount(const cl_uint workDim, const size_t *local availableThreadCount, dssCount, dssCount * KB * hardwareInfo.capabilityTable.slmSize, - hwHelper.alignSlmSize(kernelDeviceInfos[rootDeviceIndex].slmTotalSize), + hwHelper.alignSlmSize(slmTotalSize), static_cast(hwHelper.getMaxBarrierRegisterPerSlice()), hwHelper.getBarriersCountFromHasBarriers(barrierCount), workDim, @@ -1264,8 +1257,8 @@ bool Kernel::isSingleSubdevicePreferred() const { void Kernel::makeResident(CommandStreamReceiver &commandStreamReceiver) { auto rootDeviceIndex = commandStreamReceiver.getRootDeviceIndex(); - if (kernelDeviceInfos[rootDeviceIndex].privateSurface) { - commandStreamReceiver.makeResident(*kernelDeviceInfos[rootDeviceIndex].privateSurface); + if (privateSurface) { + commandStreamReceiver.makeResident(*privateSurface); } if (program->getConstantSurface(rootDeviceIndex)) { @@ -1312,12 +1305,13 @@ void Kernel::makeResident(CommandStreamReceiver &commandStreamReceiver) { } } -void Kernel::getResidency(std::vector &dst, uint32_t rootDeviceIndex) { - if (kernelDeviceInfos[rootDeviceIndex].privateSurface) { - GeneralSurface *surface = new GeneralSurface(kernelDeviceInfos[rootDeviceIndex].privateSurface); +void Kernel::getResidency(std::vector &dst) { + if (privateSurface) { + GeneralSurface *surface = new GeneralSurface(privateSurface); dst.push_back(surface); } + auto rootDeviceIndex = getDevice().getRootDeviceIndex(); if (program->getConstantSurface(rootDeviceIndex)) { GeneralSurface *surface = new GeneralSurface(program->getConstantSurface(rootDeviceIndex)); dst.push_back(surface); @@ -1391,13 +1385,12 @@ cl_int Kernel::setArgLocal(uint32_t argIndexIn, storeKernelArg(argIndexIn, SLM_OBJ, nullptr, argVal, argSize); auto pClDevice = &getDevice(); auto rootDeviceIndex = pClDevice->getRootDeviceIndex(); - auto crossThreadData = reinterpret_cast(getCrossThreadData(rootDeviceIndex)); auto &kernelInfo = *kernelInfos[rootDeviceIndex]; - auto &kernelDeviceInfo = kernelDeviceInfos[rootDeviceIndex]; + uint32_t *crossThreadData = reinterpret_cast(this->crossThreadData); uint32_t argIndex = argIndexIn; - kernelDeviceInfo.slmSizes[argIndex] = argSize; + slmSizes[argIndex] = argSize; // Extract our current slmOffset auto slmOffset = *ptrOffset(crossThreadData, @@ -1408,7 +1401,7 @@ cl_int Kernel::setArgLocal(uint32_t argIndexIn, // Update all slm offsets after this argIndex ++argIndex; - while (argIndex < kernelDeviceInfo.slmSizes.size()) { + while (argIndex < slmSizes.size()) { const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex]; auto slmAlignment = kernelArgInfo.slmAlignment; @@ -1423,11 +1416,11 @@ cl_int Kernel::setArgLocal(uint32_t argIndexIn, *patchLocation = slmOffset; } - slmOffset += static_cast(kernelDeviceInfo.slmSizes[argIndex]); + slmOffset += static_cast(slmSizes[argIndex]); ++argIndex; } - kernelDeviceInfo.slmTotalSize = kernelInfo.workloadInfo.slmStaticSize + alignUp(slmOffset, KB); + slmTotalSize = kernelInfo.workloadInfo.slmStaticSize + alignUp(slmOffset, KB); return CL_SUCCESS; } @@ -1460,7 +1453,7 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex, patchBufferOffset(kernelArgInfo, nullptr, nullptr, rootDeviceIndex); auto graphicsAllocation = buffer->getGraphicsAllocation(rootDeviceIndex); - auto patchLocation = ptrOffset(getCrossThreadData(rootDeviceIndex), + auto patchLocation = ptrOffset(crossThreadData, kernelArgInfo.kernelArgPatchInfoVector[0].crossthreadOffset); auto patchSize = kernelArgInfo.kernelArgPatchInfoVector[0].size; @@ -1469,7 +1462,7 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex, if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) { PatchInfoData patchInfoData(addressToPatch - buffer->getOffset(), static_cast(buffer->getOffset()), - PatchInfoAllocationType::KernelArg, reinterpret_cast(getCrossThreadData(rootDeviceIndex)), + PatchInfoAllocationType::KernelArg, reinterpret_cast(crossThreadData), static_cast(kernelArgInfo.kernelArgPatchInfoVector[0].crossthreadOffset), PatchInfoAllocationType::IndirectObjectHeap, patchSize); this->patchInfoDataList.push_back(patchInfoData); @@ -1512,7 +1505,7 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex, storeKernelArg(argIndex, BUFFER_OBJ, nullptr, argVal, argSize); const auto &kernelArgInfo = getKernelInfo(rootDeviceIndex).kernelArgInfo[argIndex]; patchBufferOffset(kernelArgInfo, nullptr, nullptr, rootDeviceIndex); - auto patchLocation = ptrOffset(getCrossThreadData(rootDeviceIndex), + auto patchLocation = ptrOffset(crossThreadData, kernelArgInfo.kernelArgPatchInfoVector[0].crossthreadOffset); patchWithRequiredSize(patchLocation, kernelArgInfo.kernelArgPatchInfoVector[0].size, 0u); @@ -1558,7 +1551,7 @@ cl_int Kernel::setArgPipe(uint32_t argIndex, } auto rootDeviceIndex = getDevice().getRootDeviceIndex(); const auto &kernelArgInfo = getKernelInfo(rootDeviceIndex).kernelArgInfo[argIndex]; - auto patchLocation = ptrOffset(getCrossThreadData(rootDeviceIndex), + auto patchLocation = ptrOffset(crossThreadData, kernelArgInfo.kernelArgPatchInfoVector[0].crossthreadOffset); auto patchSize = kernelArgInfo.kernelArgPatchInfoVector[0].size; @@ -1595,6 +1588,7 @@ cl_int Kernel::setArgImageWithMipLevel(uint32_t argIndex, auto &kernelInfo = getKernelInfo(rootDeviceIndex); patchBufferOffset(kernelInfo.kernelArgInfo[argIndex], nullptr, nullptr, rootDeviceIndex); + uint32_t *crossThreadData = reinterpret_cast(this->crossThreadData); auto clMemObj = *(static_cast(argVal)); auto pImage = castToObject(clMemObj); @@ -1619,7 +1613,6 @@ cl_int Kernel::setArgImageWithMipLevel(uint32_t argIndex, pImage->setImageArg(surfaceState, kernelArgInfo.isMediaBlockImage, mipLevel, rootDeviceIndex); } - auto crossThreadData = reinterpret_cast(getCrossThreadData(rootDeviceIndex)); auto &imageDesc = pImage->getImageDesc(); auto &imageFormat = pImage->getImageFormat(); auto graphicsAllocation = pImage->getGraphicsAllocation(rootDeviceIndex); @@ -1665,8 +1658,7 @@ cl_int Kernel::setArgImmediate(uint32_t argIndex, const auto &kernelArgInfo = getKernelInfo(rootDeviceIndex).kernelArgInfo[argIndex]; DEBUG_BREAK_IF(kernelArgInfo.kernelArgPatchInfoVector.size() <= 0); - auto crossThreadData = getCrossThreadData(rootDeviceIndex); - auto crossThreadDataEnd = ptrOffset(crossThreadData, getCrossThreadDataSize(rootDeviceIndex)); + auto crossThreadDataEnd = ptrOffset(crossThreadData, crossThreadDataSize); for (const auto &kernelArgPatchInfo : kernelArgInfo.kernelArgPatchInfoVector) { DEBUG_BREAK_IF(kernelArgPatchInfo.size <= 0); @@ -1698,6 +1690,7 @@ cl_int Kernel::setArgSampler(uint32_t argIndex, return retVal; } + uint32_t *crossThreadData = reinterpret_cast(this->crossThreadData); auto clSamplerObj = *(static_cast(argVal)); auto pSampler = castToObject(clSamplerObj); auto rootDeviceIndex = getDevice().getRootDeviceIndex(); @@ -1722,7 +1715,6 @@ cl_int Kernel::setArgSampler(uint32_t argIndex, pSampler->setArg(const_cast(samplerState), getProgram()->getDevices()[0]->getHardwareInfo()); - auto crossThreadData = reinterpret_cast(getCrossThreadData(rootDeviceIndex)); patch(pSampler->getSnapWaValue(), crossThreadData, kernelArgInfo.offsetSamplerSnapWa); patch(GetAddrModeEnum(pSampler->addressingMode), crossThreadData, kernelArgInfo.offsetSamplerAddressingMode); patch(GetNormCoordsEnum(pSampler->normalizedCoordinates), crossThreadData, kernelArgInfo.offsetSamplerNormalizedCoords); @@ -1759,7 +1751,6 @@ cl_int Kernel::setArgAccelerator(uint32_t argIndex, const auto &kernelArgInfo = getKernelInfo(rootDeviceIndex).kernelArgInfo[argIndex]; if (kernelArgInfo.samplerArgumentType == iOpenCL::SAMPLER_OBJECT_VME) { - auto crossThreadData = getCrossThreadData(rootDeviceIndex); const auto pVmeAccelerator = castToObjectOrAbort(pAccelerator); auto pDesc = static_cast(pVmeAccelerator->getDescriptor()); @@ -1808,7 +1799,7 @@ cl_int Kernel::setArgDevQueue(uint32_t argIndex, storeKernelArg(argIndex, DEVICE_QUEUE_OBJ, clDeviceQueue, argVal, argSize); const auto &kernelArgPatchInfo = kernelInfos[rootDeviceIndex]->kernelArgInfo[argIndex].kernelArgPatchInfoVector[0]; - auto patchLocation = ptrOffset(reinterpret_cast(getCrossThreadData(rootDeviceIndex)), + auto patchLocation = ptrOffset(reinterpret_cast(crossThreadData), kernelArgPatchInfo.crossthreadOffset); patchWithRequiredSize(patchLocation, kernelArgPatchInfo.size, @@ -2403,10 +2394,10 @@ void Kernel::provideInitializationHints() { auto pClDevice = &getDevice(); auto rootDeviceIndex = pClDevice->getRootDeviceIndex(); - if (kernelDeviceInfos[rootDeviceIndex].privateSurfaceSize) { + if (privateSurfaceSize) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, PRIVATE_MEMORY_USAGE_TOO_HIGH, kernelInfos[rootDeviceIndex]->kernelDescriptor.kernelMetadata.kernelName.c_str(), - kernelDeviceInfos[rootDeviceIndex].privateSurfaceSize); + privateSurfaceSize); } auto scratchSize = kernelInfos[rootDeviceIndex]->kernelDescriptor.kernelAttributes.perThreadScratchSize[0] * pClDevice->getSharedDeviceInfo().computeUnitsUsedForScratch * getKernelInfo(rootDeviceIndex).getMaxSimdSize(); @@ -2419,8 +2410,8 @@ void Kernel::provideInitializationHints() { void Kernel::patchDefaultDeviceQueue(DeviceQueue *devQueue) { auto rootDeviceIndex = devQueue->getDevice().getRootDeviceIndex(); const auto &defaultQueueSurfaceAddress = kernelInfos[rootDeviceIndex]->kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress; - if (isValidOffset(defaultQueueSurfaceAddress.stateless) && kernelDeviceInfos[rootDeviceIndex].crossThreadData) { - auto patchLocation = ptrOffset(reinterpret_cast(getCrossThreadData(rootDeviceIndex)), defaultQueueSurfaceAddress.stateless); + if (isValidOffset(defaultQueueSurfaceAddress.stateless) && crossThreadData) { + auto patchLocation = ptrOffset(reinterpret_cast(crossThreadData), defaultQueueSurfaceAddress.stateless); patchWithRequiredSize(patchLocation, defaultQueueSurfaceAddress.pointerSize, static_cast(devQueue->getQueueBuffer()->getGpuAddressToPatch())); } @@ -2436,8 +2427,8 @@ void Kernel::patchEventPool(DeviceQueue *devQueue) { auto rootDeviceIndex = devQueue->getDevice().getRootDeviceIndex(); const auto &eventPoolSurfaceAddress = kernelInfos[rootDeviceIndex]->kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress; - if (isValidOffset(eventPoolSurfaceAddress.stateless) && kernelDeviceInfos[rootDeviceIndex].crossThreadData) { - auto patchLocation = ptrOffset(reinterpret_cast(getCrossThreadData(rootDeviceIndex)), eventPoolSurfaceAddress.stateless); + if (isValidOffset(eventPoolSurfaceAddress.stateless) && crossThreadData) { + auto patchLocation = ptrOffset(reinterpret_cast(crossThreadData), eventPoolSurfaceAddress.stateless); patchWithRequiredSize(patchLocation, eventPoolSurfaceAddress.pointerSize, static_cast(devQueue->getEventPoolBuffer()->getGpuAddressToPatch())); } @@ -2459,7 +2450,7 @@ void Kernel::patchBlocksSimdSize(uint32_t rootDeviceIndex) { DEBUG_BREAK_IF(!(idOffset.first < static_cast(blockManager->getCount()))); const KernelInfo *blockInfo = blockManager->getBlockKernelInfo(idOffset.first); - uint32_t *simdSize = reinterpret_cast(&kernelDeviceInfos[rootDeviceIndex].crossThreadData[idOffset.second]); + uint32_t *simdSize = reinterpret_cast(&crossThreadData[idOffset.second]); *simdSize = blockInfo->getMaxSimdSize(); } } @@ -2471,7 +2462,7 @@ bool Kernel::usesSyncBuffer(uint32_t rootDeviceIndex) { void Kernel::patchSyncBuffer(Device &device, GraphicsAllocation *gfxAllocation, size_t bufferOffset) { auto rootDeviceIndex = device.getRootDeviceIndex(); const auto &syncBuffer = kernelInfos[rootDeviceIndex]->kernelDescriptor.payloadMappings.implicitArgs.syncBufferAddress; - auto bufferPatchAddress = ptrOffset(getCrossThreadData(rootDeviceIndex), syncBuffer.stateless); + auto bufferPatchAddress = ptrOffset(crossThreadData, syncBuffer.stateless); patchWithRequiredSize(bufferPatchAddress, syncBuffer.pointerSize, ptrOffset(gfxAllocation->getGpuAddressToPatch(), bufferOffset)); @@ -2699,7 +2690,7 @@ void Kernel::patchBindlessSurfaceStateOffsets(const Device &device, const size_t if ((kernelInfo.kernelArgInfo[i].isBuffer) || (kernelInfo.kernelArgInfo[i].isImage)) { - auto patchLocation = ptrOffset(getCrossThreadData(device.getRootDeviceIndex()), + auto patchLocation = ptrOffset(crossThreadData, kernelInfo.kernelArgInfo[i].kernelArgPatchInfoVector[0].crossthreadOffset); auto bindlessOffset = static_cast(sshOffset) + kernelInfo.kernelArgInfo[i].offsetHeap; @@ -2746,56 +2737,56 @@ const HardwareInfo &Kernel::getHardwareInfo(uint32_t rootDeviceIndex) const { const KernelInfo &Kernel::getDefaultKernelInfo() const { return *kernelInfos[defaultRootDeviceIndex]; } -void Kernel::setGlobalWorkOffsetValues(uint32_t rootDeviceIndex, uint32_t globalWorkOffsetX, uint32_t globalWorkOffsetY, uint32_t globalWorkOffsetZ) { - *kernelDeviceInfos[rootDeviceIndex].globalWorkOffsetX = globalWorkOffsetX; - *kernelDeviceInfos[rootDeviceIndex].globalWorkOffsetY = globalWorkOffsetY; - *kernelDeviceInfos[rootDeviceIndex].globalWorkOffsetZ = globalWorkOffsetZ; +void Kernel::setGlobalWorkOffsetValues(uint32_t globalWorkOffsetX, uint32_t globalWorkOffsetY, uint32_t globalWorkOffsetZ) { + *this->globalWorkOffsetX = globalWorkOffsetX; + *this->globalWorkOffsetY = globalWorkOffsetY; + *this->globalWorkOffsetZ = globalWorkOffsetZ; } -void Kernel::setGlobalWorkSizeValues(uint32_t rootDeviceIndex, uint32_t globalWorkSizeX, uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ) { - *kernelDeviceInfos[rootDeviceIndex].globalWorkSizeX = globalWorkSizeX; - *kernelDeviceInfos[rootDeviceIndex].globalWorkSizeY = globalWorkSizeY; - *kernelDeviceInfos[rootDeviceIndex].globalWorkSizeZ = globalWorkSizeZ; +void Kernel::setGlobalWorkSizeValues(uint32_t globalWorkSizeX, uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ) { + *this->globalWorkSizeX = globalWorkSizeX; + *this->globalWorkSizeY = globalWorkSizeY; + *this->globalWorkSizeZ = globalWorkSizeZ; } -void Kernel::setLocalWorkSizeValues(uint32_t rootDeviceIndex, uint32_t localWorkSizeX, uint32_t localWorkSizeY, uint32_t localWorkSizeZ) { - *kernelDeviceInfos[rootDeviceIndex].localWorkSizeX = localWorkSizeX; - *kernelDeviceInfos[rootDeviceIndex].localWorkSizeY = localWorkSizeY; - *kernelDeviceInfos[rootDeviceIndex].localWorkSizeZ = localWorkSizeZ; +void Kernel::setLocalWorkSizeValues(uint32_t localWorkSizeX, uint32_t localWorkSizeY, uint32_t localWorkSizeZ) { + *this->localWorkSizeX = localWorkSizeX; + *this->localWorkSizeY = localWorkSizeY; + *this->localWorkSizeZ = localWorkSizeZ; } -void Kernel::setLocalWorkSize2Values(uint32_t rootDeviceIndex, uint32_t localWorkSizeX, uint32_t localWorkSizeY, uint32_t localWorkSizeZ) { - *kernelDeviceInfos[rootDeviceIndex].localWorkSizeX2 = localWorkSizeX; - *kernelDeviceInfos[rootDeviceIndex].localWorkSizeY2 = localWorkSizeY; - *kernelDeviceInfos[rootDeviceIndex].localWorkSizeZ2 = localWorkSizeZ; +void Kernel::setLocalWorkSize2Values(uint32_t localWorkSizeX, uint32_t localWorkSizeY, uint32_t localWorkSizeZ) { + *this->localWorkSizeX2 = localWorkSizeX; + *this->localWorkSizeY2 = localWorkSizeY; + *this->localWorkSizeZ2 = localWorkSizeZ; } -void Kernel::setEnqueuedLocalWorkSizeValues(uint32_t rootDeviceIndex, uint32_t localWorkSizeX, uint32_t localWorkSizeY, uint32_t localWorkSizeZ) { - *kernelDeviceInfos[rootDeviceIndex].enqueuedLocalWorkSizeX = localWorkSizeX; - *kernelDeviceInfos[rootDeviceIndex].enqueuedLocalWorkSizeY = localWorkSizeY; - *kernelDeviceInfos[rootDeviceIndex].enqueuedLocalWorkSizeZ = localWorkSizeZ; +void Kernel::setEnqueuedLocalWorkSizeValues(uint32_t localWorkSizeX, uint32_t localWorkSizeY, uint32_t localWorkSizeZ) { + *this->enqueuedLocalWorkSizeX = localWorkSizeX; + *this->enqueuedLocalWorkSizeY = localWorkSizeY; + *this->enqueuedLocalWorkSizeZ = localWorkSizeZ; } -bool Kernel::isLocalWorkSize2Patched(uint32_t rootDeviceIndex) { - return kernelDeviceInfos[rootDeviceIndex].localWorkSizeX2 != &dummyPatchLocation; +bool Kernel::isLocalWorkSize2Patched() { + return localWorkSizeX2 != &dummyPatchLocation; } -void Kernel::setNumWorkGroupsValues(uint32_t rootDeviceIndex, uint32_t numWorkGroupsX, uint32_t numWorkGroupsY, uint32_t numWorkGroupsZ) { - *kernelDeviceInfos[rootDeviceIndex].numWorkGroupsX = numWorkGroupsX; - *kernelDeviceInfos[rootDeviceIndex].numWorkGroupsY = numWorkGroupsY; - *kernelDeviceInfos[rootDeviceIndex].numWorkGroupsZ = numWorkGroupsZ; +void Kernel::setNumWorkGroupsValues(uint32_t numWorkGroupsX, uint32_t numWorkGroupsY, uint32_t numWorkGroupsZ) { + *this->numWorkGroupsX = numWorkGroupsX; + *this->numWorkGroupsY = numWorkGroupsY; + *this->numWorkGroupsZ = numWorkGroupsZ; } -void Kernel::setWorkDim(uint32_t rootDeviceIndex, uint32_t workDim) { - *kernelDeviceInfos[rootDeviceIndex].workDim = workDim; +void Kernel::setWorkDim(uint32_t workDim) { + *this->workDim = workDim; } -uint32_t Kernel::getMaxKernelWorkGroupSize(uint32_t rootDeviceIndex) const { - return kernelDeviceInfos[rootDeviceIndex].maxKernelWorkGroupSize; +uint32_t Kernel::getMaxKernelWorkGroupSize() const { + return maxKernelWorkGroupSize; } -uint32_t Kernel::getSlmTotalSize(uint32_t rootDeviceIndex) const { - return kernelDeviceInfos[rootDeviceIndex].slmTotalSize; +uint32_t Kernel::getSlmTotalSize() const { + return slmTotalSize; } size_t Kernel::getTotalNumDevicesInContext() const { diff --git a/opencl/source/kernel/kernel.h b/opencl/source/kernel/kernel.h index 54c49ed485..92b6c48b5f 100644 --- a/opencl/source/kernel/kernel.h +++ b/opencl/source/kernel/kernel.h @@ -126,12 +126,12 @@ class Kernel : public ReferenceTrackedObject { void setAuxTranslationRequired(bool onOff) { auxTranslationRequired = onOff; } void updateAuxTranslationRequired(); - char *getCrossThreadData(uint32_t rootDeviceIndex) const { - return kernelDeviceInfos[rootDeviceIndex].crossThreadData; + char *getCrossThreadData() const { + return crossThreadData; } - uint32_t getCrossThreadDataSize(uint32_t rootDeviceIndex) const { - return kernelDeviceInfos[rootDeviceIndex].crossThreadDataSize; + uint32_t getCrossThreadDataSize() const { + return crossThreadDataSize; } cl_int initialize(); @@ -172,12 +172,12 @@ class Kernel : public ReferenceTrackedObject { size_t getKernelHeapSize(uint32_t rootDeviceIndex) const; size_t getSurfaceStateHeapSize(uint32_t rootDeviceIndex) const; size_t getDynamicStateHeapSize(uint32_t rootDeviceIndex) const; - size_t getNumberOfBindingTableStates(uint32_t rootDeviceIndex) const; - size_t getBindingTableOffset(uint32_t rootDeviceIndex) const { - return kernelDeviceInfos[rootDeviceIndex].localBindingTableOffset; + size_t getNumberOfBindingTableStates() const; + size_t getBindingTableOffset() const { + return localBindingTableOffset; } - void resizeSurfaceStateHeap(uint32_t rootDeviceIndex, void *pNewSsh, size_t newSshSize, size_t newBindingTableCount, size_t newBindingTableOffset); + void resizeSurfaceStateHeap(void *pNewSsh, size_t newSshSize, size_t newBindingTableCount, size_t newBindingTableOffset); void substituteKernelHeap(const Device &device, void *newKernelHeap, size_t newKernelHeapSize); bool isKernelHeapSubstituted(uint32_t rootDeviceIndex) const; @@ -303,7 +303,7 @@ class Kernel : public ReferenceTrackedObject { //residency for kernel surfaces MOCKABLE_VIRTUAL void makeResident(CommandStreamReceiver &commandStreamReceiver); - MOCKABLE_VIRTUAL void getResidency(std::vector &dst, uint32_t rootDeviceIndex); + MOCKABLE_VIRTUAL void getResidency(std::vector &dst); bool requiresCoherency(); void resetSharedObjectsPatchAddresses(); bool isUsingSharedObjArgs() const { return usingSharedObjArgs; } @@ -392,16 +392,16 @@ class Kernel : public ReferenceTrackedObject { } const KernelInfo &getDefaultKernelInfo() const; - void setGlobalWorkOffsetValues(uint32_t rootDeviceIndex, uint32_t globalWorkOffsetX, uint32_t globalWorkOffsetY, uint32_t globalWorkOffsetZ); - void setGlobalWorkSizeValues(uint32_t rootDeviceIndex, uint32_t globalWorkSizeX, uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ); - void setLocalWorkSizeValues(uint32_t rootDeviceIndex, uint32_t localWorkSizeX, uint32_t localWorkSizeY, uint32_t localWorkSizeZ); - void setLocalWorkSize2Values(uint32_t rootDeviceIndex, uint32_t localWorkSizeX, uint32_t localWorkSizeY, uint32_t localWorkSizeZ); - void setEnqueuedLocalWorkSizeValues(uint32_t rootDeviceIndex, uint32_t localWorkSizeX, uint32_t localWorkSizeY, uint32_t localWorkSizeZ); - bool isLocalWorkSize2Patched(uint32_t rootDeviceIndex); - void setNumWorkGroupsValues(uint32_t rootDeviceIndex, uint32_t numWorkGroupsX, uint32_t numWorkGroupsY, uint32_t numWorkGroupsZ); - void setWorkDim(uint32_t rootDeviceIndex, uint32_t workDim); - uint32_t getMaxKernelWorkGroupSize(uint32_t rootDeviceIndex) const; - uint32_t getSlmTotalSize(uint32_t rootDeviceIndex) const; + void setGlobalWorkOffsetValues(uint32_t globalWorkOffsetX, uint32_t globalWorkOffsetY, uint32_t globalWorkOffsetZ); + void setGlobalWorkSizeValues(uint32_t globalWorkSizeX, uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ); + void setLocalWorkSizeValues(uint32_t localWorkSizeX, uint32_t localWorkSizeY, uint32_t localWorkSizeZ); + void setLocalWorkSize2Values(uint32_t localWorkSizeX, uint32_t localWorkSizeY, uint32_t localWorkSizeZ); + void setEnqueuedLocalWorkSizeValues(uint32_t localWorkSizeX, uint32_t localWorkSizeY, uint32_t localWorkSizeZ); + bool isLocalWorkSize2Patched(); + void setNumWorkGroupsValues(uint32_t numWorkGroupsX, uint32_t numWorkGroupsY, uint32_t numWorkGroupsZ); + void setWorkDim(uint32_t workDim); + uint32_t getMaxKernelWorkGroupSize() const; + uint32_t getSlmTotalSize() const; bool getHasIndirectAccess() const { return this->kernelHasIndirectAccess; } @@ -546,53 +546,50 @@ class Kernel : public ReferenceTrackedObject { bool debugEnabled = false; uint32_t additionalKernelExecInfo = AdditionalKernelExecInfo::NotSet; - struct KernelDeviceInfo : public NonCopyableClass { - uint32_t *globalWorkOffsetX = &Kernel::dummyPatchLocation; - uint32_t *globalWorkOffsetY = &Kernel::dummyPatchLocation; - uint32_t *globalWorkOffsetZ = &Kernel::dummyPatchLocation; + uint32_t *globalWorkOffsetX = &Kernel::dummyPatchLocation; + uint32_t *globalWorkOffsetY = &Kernel::dummyPatchLocation; + uint32_t *globalWorkOffsetZ = &Kernel::dummyPatchLocation; - uint32_t *localWorkSizeX = &Kernel::dummyPatchLocation; - uint32_t *localWorkSizeY = &Kernel::dummyPatchLocation; - uint32_t *localWorkSizeZ = &Kernel::dummyPatchLocation; + uint32_t *localWorkSizeX = &Kernel::dummyPatchLocation; + uint32_t *localWorkSizeY = &Kernel::dummyPatchLocation; + uint32_t *localWorkSizeZ = &Kernel::dummyPatchLocation; - uint32_t *localWorkSizeX2 = &Kernel::dummyPatchLocation; - uint32_t *localWorkSizeY2 = &Kernel::dummyPatchLocation; - uint32_t *localWorkSizeZ2 = &Kernel::dummyPatchLocation; + uint32_t *localWorkSizeX2 = &Kernel::dummyPatchLocation; + uint32_t *localWorkSizeY2 = &Kernel::dummyPatchLocation; + uint32_t *localWorkSizeZ2 = &Kernel::dummyPatchLocation; - uint32_t *globalWorkSizeX = &Kernel::dummyPatchLocation; - uint32_t *globalWorkSizeY = &Kernel::dummyPatchLocation; - uint32_t *globalWorkSizeZ = &Kernel::dummyPatchLocation; + uint32_t *globalWorkSizeX = &Kernel::dummyPatchLocation; + uint32_t *globalWorkSizeY = &Kernel::dummyPatchLocation; + uint32_t *globalWorkSizeZ = &Kernel::dummyPatchLocation; - uint32_t *enqueuedLocalWorkSizeX = &Kernel::dummyPatchLocation; - uint32_t *enqueuedLocalWorkSizeY = &Kernel::dummyPatchLocation; - uint32_t *enqueuedLocalWorkSizeZ = &Kernel::dummyPatchLocation; + uint32_t *enqueuedLocalWorkSizeX = &Kernel::dummyPatchLocation; + uint32_t *enqueuedLocalWorkSizeY = &Kernel::dummyPatchLocation; + uint32_t *enqueuedLocalWorkSizeZ = &Kernel::dummyPatchLocation; - uint32_t *numWorkGroupsX = &Kernel::dummyPatchLocation; - uint32_t *numWorkGroupsY = &Kernel::dummyPatchLocation; - uint32_t *numWorkGroupsZ = &Kernel::dummyPatchLocation; + uint32_t *numWorkGroupsX = &Kernel::dummyPatchLocation; + uint32_t *numWorkGroupsY = &Kernel::dummyPatchLocation; + uint32_t *numWorkGroupsZ = &Kernel::dummyPatchLocation; - uint32_t *maxWorkGroupSizeForCrossThreadData = &Kernel::dummyPatchLocation; - uint32_t maxKernelWorkGroupSize = 0; - uint32_t *workDim = &Kernel::dummyPatchLocation; - uint32_t *dataParameterSimdSize = &Kernel::dummyPatchLocation; - uint32_t *parentEventOffset = &Kernel::dummyPatchLocation; - uint32_t *preferredWkgMultipleOffset = &Kernel::dummyPatchLocation; + uint32_t *maxWorkGroupSizeForCrossThreadData = &Kernel::dummyPatchLocation; + uint32_t maxKernelWorkGroupSize = 0; + uint32_t *workDim = &Kernel::dummyPatchLocation; + uint32_t *dataParameterSimdSize = &Kernel::dummyPatchLocation; + uint32_t *parentEventOffset = &Kernel::dummyPatchLocation; + uint32_t *preferredWkgMultipleOffset = &Kernel::dummyPatchLocation; - size_t numberOfBindingTableStates = 0u; - size_t localBindingTableOffset = 0u; + size_t numberOfBindingTableStates = 0u; + size_t localBindingTableOffset = 0u; - std::vector slmSizes; - uint32_t slmTotalSize = 0u; + std::vector slmSizes; + uint32_t slmTotalSize = 0u; - std::unique_ptr pSshLocal; - uint32_t sshLocalSize = 0u; - char *crossThreadData = nullptr; - uint32_t crossThreadDataSize = 0u; + std::unique_ptr pSshLocal; + uint32_t sshLocalSize = 0u; + char *crossThreadData = nullptr; + uint32_t crossThreadDataSize = 0u; - GraphicsAllocation *privateSurface = nullptr; - uint64_t privateSurfaceSize = 0u; - }; - std::vector kernelDeviceInfos; + GraphicsAllocation *privateSurface = nullptr; + uint64_t privateSurfaceSize = 0u; const uint32_t defaultRootDeviceIndex; struct KernelConfig { diff --git a/opencl/source/program/kernel_info.cpp b/opencl/source/program/kernel_info.cpp index fc1a7e32bb..8708996a87 100644 --- a/opencl/source/program/kernel_info.cpp +++ b/opencl/source/program/kernel_info.cpp @@ -135,10 +135,10 @@ WorkSizeInfo::WorkSizeInfo(const DispatchInfo &dispatchInfo) { auto &device = dispatchInfo.getClDevice(); auto rootDeviceIndex = device.getRootDeviceIndex(); const auto &kernelInfo = dispatchInfo.getKernel()->getKernelInfo(rootDeviceIndex); - this->maxWorkGroupSize = dispatchInfo.getKernel()->getMaxKernelWorkGroupSize(rootDeviceIndex); + this->maxWorkGroupSize = dispatchInfo.getKernel()->getMaxKernelWorkGroupSize(); this->hasBarriers = kernelInfo.kernelDescriptor.kernelAttributes.usesBarriers(); this->simdSize = static_cast(kernelInfo.getMaxSimdSize()); - this->slmTotalSize = static_cast(dispatchInfo.getKernel()->getSlmTotalSize(rootDeviceIndex)); + this->slmTotalSize = static_cast(dispatchInfo.getKernel()->getSlmTotalSize()); this->coreFamily = device.getHardwareInfo().platform.eRenderCoreFamily; this->numThreadsPerSubSlice = static_cast(device.getSharedDeviceInfo().maxNumEUsPerSubSlice) * device.getSharedDeviceInfo().numThreadsPerEU; diff --git a/opencl/source/program/printf_handler.cpp b/opencl/source/program/printf_handler.cpp index a21c297ec9..ce3d45b0f8 100644 --- a/opencl/source/program/printf_handler.cpp +++ b/opencl/source/program/printf_handler.cpp @@ -59,7 +59,7 @@ void PrintfHandler::prepareDispatch(const MultiDispatchInfo &multiDispatchInfo) sizeof(printfSurfaceInitialDataSize)); const auto &printfSurfaceArg = kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.payloadMappings.implicitArgs.printfSurfaceAddress; - auto printfPatchAddress = ptrOffset(reinterpret_cast(kernel->getCrossThreadData(rootDeviceIndex)), printfSurfaceArg.stateless); + auto printfPatchAddress = ptrOffset(reinterpret_cast(kernel->getCrossThreadData()), printfSurfaceArg.stateless); patchWithRequiredSize(printfPatchAddress, printfSurfaceArg.pointerSize, (uintptr_t)printfSurface->getGpuAddressToPatch()); if (isValidOffset(printfSurfaceArg.bindful)) { auto surfaceState = ptrOffset(reinterpret_cast(kernel->getSurfaceStateHeap(rootDeviceIndex)), printfSurfaceArg.bindful); diff --git a/opencl/source/utilities/logger.cpp b/opencl/source/utilities/logger.cpp index 97f8c88c46..a4469e8c21 100644 --- a/opencl/source/utilities/logger.cpp +++ b/opencl/source/utilities/logger.cpp @@ -206,9 +206,8 @@ void FileLogger::dumpKernelArgs(const Kernel *kernel) { } } else { type = "immediate"; - auto rootDeviceIndex = kernel->getDevices()[0]->getRootDeviceIndex(); - auto crossThreadData = kernel->getCrossThreadData(rootDeviceIndex); - auto crossThreadDataSize = kernel->getCrossThreadDataSize(rootDeviceIndex); + auto crossThreadData = kernel->getCrossThreadData(); + auto crossThreadDataSize = kernel->getCrossThreadDataSize(); argVal = std::unique_ptr(new char[crossThreadDataSize]); size_t totalArgSize = 0; diff --git a/opencl/test/unit_test/accelerators/media_image_arg_tests.cpp b/opencl/test/unit_test/accelerators/media_image_arg_tests.cpp index a947ccd6a5..e6d9f5c1a3 100644 --- a/opencl/test/unit_test/accelerators/media_image_arg_tests.cpp +++ b/opencl/test/unit_test/accelerators/media_image_arg_tests.cpp @@ -98,7 +98,7 @@ HWTEST_F(MediaImageSetArgTest, WhenSettingMediaImageArgThenArgsSetCorrectly) { pSurfaceState->getSurfaceBaseAddress()); std::vector surfaces; - pKernel->getResidency(surfaces, rootDeviceIndex); + pKernel->getResidency(surfaces); EXPECT_EQ(0u, surfaces.size()); } @@ -136,7 +136,7 @@ HWTEST_F(MediaImageSetArgTest, WhenSettingKernelArgImageThenArgsSetCorrectly) { EXPECT_EQ(MEDIA_SURFACE_STATE::PICTURE_STRUCTURE_FRAME_PICTURE, pSurfaceState->getPictureStructure()); std::vector surfaces; - pKernel->getResidency(surfaces, rootDeviceIndex); + pKernel->getResidency(surfaces); for (auto &surface : surfaces) { delete surface; diff --git a/opencl/test/unit_test/api/cl_get_kernel_sub_group_info_tests.inl b/opencl/test/unit_test/api/cl_get_kernel_sub_group_info_tests.inl index f0ea6b20ca..897a633794 100644 --- a/opencl/test/unit_test/api/cl_get_kernel_sub_group_info_tests.inl +++ b/opencl/test/unit_test/api/cl_get_kernel_sub_group_info_tests.inl @@ -15,12 +15,12 @@ struct KernelSubGroupInfoFixture : HelloWorldFixture { void SetUp() override { ParentClass::SetUp(); - pKernel->kernelDeviceInfos[rootDeviceIndex].maxKernelWorkGroupSize = static_cast(pDevice->getDeviceInfo().maxWorkGroupSize / 2); + pKernel->maxKernelWorkGroupSize = static_cast(pDevice->getDeviceInfo().maxWorkGroupSize / 2); maxSimdSize = static_cast(pKernel->getKernelInfo(rootDeviceIndex).getMaxSimdSize()); ASSERT_LE(8u, maxSimdSize); maxWorkDim = static_cast(pClDevice->getDeviceInfo().maxWorkItemDimensions); ASSERT_EQ(3u, maxWorkDim); - maxWorkGroupSize = static_cast(pKernel->kernelDeviceInfos[rootDeviceIndex].maxKernelWorkGroupSize); + maxWorkGroupSize = static_cast(pKernel->maxKernelWorkGroupSize); ASSERT_GE(1024u, maxWorkGroupSize); largestCompiledSIMDSize = static_cast(pKernel->getKernelInfo(rootDeviceIndex).getMaxSimdSize()); ASSERT_EQ(32u, largestCompiledSIMDSize); @@ -30,8 +30,8 @@ struct KernelSubGroupInfoFixture : HelloWorldFixture { auto requiredWorkGroupSizeZ = static_cast(pKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2]); calculatedMaxWorkgroupSize = requiredWorkGroupSizeX * requiredWorkGroupSizeY * requiredWorkGroupSizeZ; - if ((calculatedMaxWorkgroupSize == 0) || (calculatedMaxWorkgroupSize > static_cast(pKernel->kernelDeviceInfos[rootDeviceIndex].maxKernelWorkGroupSize))) { - calculatedMaxWorkgroupSize = static_cast(pKernel->kernelDeviceInfos[rootDeviceIndex].maxKernelWorkGroupSize); + if ((calculatedMaxWorkgroupSize == 0) || (calculatedMaxWorkgroupSize > static_cast(pKernel->maxKernelWorkGroupSize))) { + calculatedMaxWorkgroupSize = static_cast(pKernel->maxKernelWorkGroupSize); } } diff --git a/opencl/test/unit_test/api/cl_unified_shared_memory_tests.inl b/opencl/test/unit_test/api/cl_unified_shared_memory_tests.inl index 96fdd33d6e..852d02eb58 100644 --- a/opencl/test/unit_test/api/cl_unified_shared_memory_tests.inl +++ b/opencl/test/unit_test/api/cl_unified_shared_memory_tests.inl @@ -656,7 +656,7 @@ TEST(clUnifiedSharedMemoryTests, whenDeviceSupportSharedMemoryAllocationsAndSyst EXPECT_EQ(retVal, CL_SUCCESS); //check if cross thread is updated - auto crossThreadLocation = reinterpret_cast(ptrOffset(mockKernel.mockKernel->getCrossThreadData(device->getRootDeviceIndex()), mockKernel.kernelInfo.kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset)); + auto crossThreadLocation = reinterpret_cast(ptrOffset(mockKernel.mockKernel->getCrossThreadData(), mockKernel.kernelInfo.kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset)); auto systemAddress = reinterpret_cast(systemPointer); EXPECT_EQ(*crossThreadLocation, systemAddress); diff --git a/opencl/test/unit_test/built_ins/built_in_tests.cpp b/opencl/test/unit_test/built_ins/built_in_tests.cpp index a7ca790981..f0af4a4d4b 100644 --- a/opencl/test/unit_test/built_ins/built_in_tests.cpp +++ b/opencl/test/unit_test/built_ins/built_in_tests.cpp @@ -919,7 +919,7 @@ TEST_F(BuiltInTests, GivenUnalignedCopyBufferToBufferWhenDispatchInfoIsCreatedTh EXPECT_EQ(kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName, "CopyBufferToBufferMiddleMisaligned"); - const auto crossThreadData = kernel->getCrossThreadData(rootDeviceIndex); + const auto crossThreadData = kernel->getCrossThreadData(); const auto crossThreadOffset = kernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[4].kernelArgPatchInfoVector[0].crossthreadOffset; EXPECT_EQ(8u, *reinterpret_cast(ptrOffset(crossThreadData, crossThreadOffset))); @@ -1147,7 +1147,7 @@ TEST_F(VmeBuiltInTests, GivenVmeBuilderWhenGettingDispatchInfoThenParamsAreCorre auto &argInfo = outDi->getKernel()->getKernelInfo(rootDeviceIndex).kernelArgInfo[vmeImplicitArgsBase + i]; ASSERT_EQ(1U, argInfo.kernelArgPatchInfoVector.size()); auto off = argInfo.kernelArgPatchInfoVector[0].crossthreadOffset; - EXPECT_EQ(vmeExtraArgsExpectedVals[i], *((uint32_t *)(outDi->getKernel()->getCrossThreadData(rootDeviceIndex) + off))); + EXPECT_EQ(vmeExtraArgsExpectedVals[i], *((uint32_t *)(outDi->getKernel()->getCrossThreadData() + off))); } } @@ -1209,7 +1209,7 @@ TEST_F(VmeBuiltInTests, GivenAdvancedVmeBuilderWhenGettingDispatchInfoThenParams auto &argInfo = outDi->getKernel()->getKernelInfo(rootDeviceIndex).kernelArgInfo[vmeImplicitArgsBase + i]; ASSERT_EQ(1U, argInfo.kernelArgPatchInfoVector.size()); auto off = argInfo.kernelArgPatchInfoVector[0].crossthreadOffset; - EXPECT_EQ(vmeExtraArgsExpectedVals[i], *((uint32_t *)(outDi->getKernel()->getCrossThreadData(rootDeviceIndex) + off))); + EXPECT_EQ(vmeExtraArgsExpectedVals[i], *((uint32_t *)(outDi->getKernel()->getCrossThreadData() + off))); } } } diff --git a/opencl/test/unit_test/command_queue/command_queue_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_tests.cpp index e291fbca91..d398f817ed 100644 --- a/opencl/test/unit_test/command_queue/command_queue_tests.cpp +++ b/opencl/test/unit_test/command_queue/command_queue_tests.cpp @@ -1043,7 +1043,7 @@ HWTEST_F(CommandQueueCommandStreamTest, givenDebugKernelWhenSetupDebugSurfaceIsC const auto &systemThreadSurfaceAddress = kernel->getAllocatedKernelInfo()->kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful; kernel->getAllocatedKernelInfo()->usesSsh = true; - kernel->setSshLocal(nullptr, sizeof(RENDER_SURFACE_STATE) + systemThreadSurfaceAddress, rootDeviceIndex); + kernel->setSshLocal(nullptr, sizeof(RENDER_SURFACE_STATE) + systemThreadSurfaceAddress); auto &commandStreamReceiver = cmdQ.getGpgpuCommandStreamReceiver(); cmdQ.getGpgpuCommandStreamReceiver().allocateDebugSurface(SipKernel::maxDbgSurfaceSize); @@ -1064,7 +1064,7 @@ HWTEST_F(CommandQueueCommandStreamTest, givenCsrWithDebugSurfaceAllocatedWhenSet const auto &systemThreadSurfaceAddress = kernel->getAllocatedKernelInfo()->kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful; kernel->getAllocatedKernelInfo()->usesSsh = true; - kernel->setSshLocal(nullptr, sizeof(RENDER_SURFACE_STATE) + systemThreadSurfaceAddress, rootDeviceIndex); + kernel->setSshLocal(nullptr, sizeof(RENDER_SURFACE_STATE) + systemThreadSurfaceAddress); auto &commandStreamReceiver = cmdQ.getGpgpuCommandStreamReceiver(); commandStreamReceiver.allocateDebugSurface(SipKernel::maxDbgSurfaceSize); auto debugSurface = commandStreamReceiver.getDebugSurfaceAllocation(); diff --git a/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp b/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp index c1d5d9c064..0b79c73bc1 100644 --- a/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp +++ b/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp @@ -273,7 +273,7 @@ HWTEST_F(DispatchWalkerTest, GivenDefaultLwsAlgorithmWhenDispatchingWalkerThenDi nullptr, CL_COMMAND_NDRANGE_KERNEL); - EXPECT_EQ(dimension, *kernel.kernelDeviceInfos[rootDeviceIndex].workDim); + EXPECT_EQ(dimension, *kernel.workDim); } } @@ -304,7 +304,7 @@ HWTEST_F(DispatchWalkerTest, GivenSquaredLwsAlgorithmWhenDispatchingWalkerThenDi nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); - EXPECT_EQ(dimension, *kernel.kernelDeviceInfos[rootDeviceIndex].workDim); + EXPECT_EQ(dimension, *kernel.workDim); } } @@ -334,7 +334,7 @@ HWTEST_F(DispatchWalkerTest, GivenNdLwsAlgorithmWhenDispatchingWalkerThenDimensi nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); - EXPECT_EQ(dimension, *kernel.kernelDeviceInfos[rootDeviceIndex].workDim); + EXPECT_EQ(dimension, *kernel.workDim); } } @@ -365,7 +365,7 @@ HWTEST_F(DispatchWalkerTest, GivenOldLwsAlgorithmWhenDispatchingWalkerThenDimens nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); - EXPECT_EQ(dimension, *kernel.kernelDeviceInfos[rootDeviceIndex].workDim); + EXPECT_EQ(dimension, *kernel.workDim); } } @@ -397,9 +397,9 @@ HWTEST_F(DispatchWalkerTest, GivenNumWorkGroupsWhenDispatchingWalkerThenNumWorkG nullptr, CL_COMMAND_NDRANGE_KERNEL); - EXPECT_EQ(2u, *kernel.kernelDeviceInfos[rootDeviceIndex].numWorkGroupsX); - EXPECT_EQ(5u, *kernel.kernelDeviceInfos[rootDeviceIndex].numWorkGroupsY); - EXPECT_EQ(10u, *kernel.kernelDeviceInfos[rootDeviceIndex].numWorkGroupsZ); + EXPECT_EQ(2u, *kernel.numWorkGroupsX); + EXPECT_EQ(5u, *kernel.numWorkGroupsY); + EXPECT_EQ(10u, *kernel.numWorkGroupsZ); } HWTEST_F(DispatchWalkerTest, GivenGlobalWorkOffsetWhenDispatchingWalkerThenGlobalWorkOffsetIsCorrectlySet) { @@ -430,9 +430,9 @@ HWTEST_F(DispatchWalkerTest, GivenGlobalWorkOffsetWhenDispatchingWalkerThenGloba nullptr, CL_COMMAND_NDRANGE_KERNEL); - EXPECT_EQ(1u, *kernel.kernelDeviceInfos[rootDeviceIndex].globalWorkOffsetX); - EXPECT_EQ(2u, *kernel.kernelDeviceInfos[rootDeviceIndex].globalWorkOffsetY); - EXPECT_EQ(3u, *kernel.kernelDeviceInfos[rootDeviceIndex].globalWorkOffsetZ); + EXPECT_EQ(1u, *kernel.globalWorkOffsetX); + EXPECT_EQ(2u, *kernel.globalWorkOffsetY); + EXPECT_EQ(3u, *kernel.globalWorkOffsetZ); } HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndDefaultAlgorithmWhenDispatchingWalkerThenLwsIsCorrect) { @@ -462,9 +462,9 @@ HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndDefaultAlgorithmWhenDispatch nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); - EXPECT_EQ(2u, *kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeX); - EXPECT_EQ(5u, *kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeY); - EXPECT_EQ(1u, *kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeZ); + EXPECT_EQ(2u, *kernel.localWorkSizeX); + EXPECT_EQ(5u, *kernel.localWorkSizeY); + EXPECT_EQ(1u, *kernel.localWorkSizeZ); } HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndNdOnWhenDispatchingWalkerThenLwsIsCorrect) { @@ -494,9 +494,9 @@ HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndNdOnWhenDispatchingWalkerThe nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); - EXPECT_EQ(2u, *kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeX); - EXPECT_EQ(3u, *kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeY); - EXPECT_EQ(5u, *kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeZ); + EXPECT_EQ(2u, *kernel.localWorkSizeX); + EXPECT_EQ(3u, *kernel.localWorkSizeY); + EXPECT_EQ(5u, *kernel.localWorkSizeZ); } HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndSquaredAlgorithmWhenDispatchingWalkerThenLwsIsCorrect) { @@ -527,9 +527,9 @@ HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndSquaredAlgorithmWhenDispatch nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); - EXPECT_EQ(2u, *kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeX); - EXPECT_EQ(5u, *kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeY); - EXPECT_EQ(1u, *kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeZ); + EXPECT_EQ(2u, *kernel.localWorkSizeX); + EXPECT_EQ(5u, *kernel.localWorkSizeY); + EXPECT_EQ(1u, *kernel.localWorkSizeZ); } HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndSquaredAlgorithmOffAndNdOffWhenDispatchingWalkerThenLwsIsCorrect) { @@ -560,9 +560,9 @@ HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndSquaredAlgorithmOffAndNdOffW nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); - EXPECT_EQ(2u, *kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeX); - EXPECT_EQ(5u, *kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeY); - EXPECT_EQ(1u, *kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeZ); + EXPECT_EQ(2u, *kernel.localWorkSizeX); + EXPECT_EQ(5u, *kernel.localWorkSizeY); + EXPECT_EQ(1u, *kernel.localWorkSizeZ); } HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeWhenDispatchingWalkerThenLwsIsCorrect) { @@ -591,9 +591,9 @@ HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeWhenDispatchingWalkerThenLwsIsC nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); - EXPECT_EQ(1u, *kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeX); - EXPECT_EQ(2u, *kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeY); - EXPECT_EQ(3u, *kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeZ); + EXPECT_EQ(1u, *kernel.localWorkSizeX); + EXPECT_EQ(2u, *kernel.localWorkSizeY); + EXPECT_EQ(3u, *kernel.localWorkSizeZ); } HWTEST_F(DispatchWalkerTest, GivenTwoSetsOfLwsOffsetsWhenDispatchingWalkerThenLwsIsCorrect) { @@ -625,12 +625,12 @@ HWTEST_F(DispatchWalkerTest, GivenTwoSetsOfLwsOffsetsWhenDispatchingWalkerThenLw nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); - EXPECT_EQ(1u, *kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeX); - EXPECT_EQ(2u, *kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeY); - EXPECT_EQ(3u, *kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeZ); - EXPECT_EQ(1u, *kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeX2); - EXPECT_EQ(2u, *kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeY2); - EXPECT_EQ(3u, *kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeZ2); + EXPECT_EQ(1u, *kernel.localWorkSizeX); + EXPECT_EQ(2u, *kernel.localWorkSizeY); + EXPECT_EQ(3u, *kernel.localWorkSizeZ); + EXPECT_EQ(1u, *kernel.localWorkSizeX2); + EXPECT_EQ(2u, *kernel.localWorkSizeY2); + EXPECT_EQ(3u, *kernel.localWorkSizeZ2); } HWTEST_F(DispatchWalkerTest, GivenSplitKernelWhenDispatchingWalkerThenLwsIsCorrect) { @@ -670,14 +670,14 @@ HWTEST_F(DispatchWalkerTest, GivenSplitKernelWhenDispatchingWalkerThenLwsIsCorre for (auto &dispatchInfo : multiDispatchInfo) { auto &kernel = static_cast(*dispatchInfo.getKernel()); if (dispatchId == 0) { - EXPECT_EQ(1u, *kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeX); - EXPECT_EQ(2u, *kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeY); - EXPECT_EQ(3u, *kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeZ); + EXPECT_EQ(1u, *kernel.localWorkSizeX); + EXPECT_EQ(2u, *kernel.localWorkSizeY); + EXPECT_EQ(3u, *kernel.localWorkSizeZ); } if (dispatchId == 1) { - EXPECT_EQ(4u, *kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeX); - EXPECT_EQ(5u, *kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeY); - EXPECT_EQ(6u, *kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeZ); + EXPECT_EQ(4u, *kernel.localWorkSizeX); + EXPECT_EQ(5u, *kernel.localWorkSizeY); + EXPECT_EQ(6u, *kernel.localWorkSizeZ); } dispatchId++; } @@ -723,25 +723,25 @@ HWTEST_F(DispatchWalkerTest, GivenSplitWalkerWhenDispatchingWalkerThenLwsIsCorre for (auto &dispatchInfo : multiDispatchInfo) { auto &kernel = static_cast(*dispatchInfo.getKernel()); if (&kernel == &mainKernel) { - EXPECT_EQ(4u, *kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeX); - EXPECT_EQ(5u, *kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeY); - EXPECT_EQ(6u, *kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeZ); - EXPECT_EQ(4u, *kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeX2); - EXPECT_EQ(5u, *kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeY2); - EXPECT_EQ(6u, *kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeZ2); - EXPECT_EQ(3u, *kernel.kernelDeviceInfos[rootDeviceIndex].numWorkGroupsX); - EXPECT_EQ(2u, *kernel.kernelDeviceInfos[rootDeviceIndex].numWorkGroupsY); - EXPECT_EQ(2u, *kernel.kernelDeviceInfos[rootDeviceIndex].numWorkGroupsZ); + EXPECT_EQ(4u, *kernel.localWorkSizeX); + EXPECT_EQ(5u, *kernel.localWorkSizeY); + EXPECT_EQ(6u, *kernel.localWorkSizeZ); + EXPECT_EQ(4u, *kernel.localWorkSizeX2); + EXPECT_EQ(5u, *kernel.localWorkSizeY2); + EXPECT_EQ(6u, *kernel.localWorkSizeZ2); + EXPECT_EQ(3u, *kernel.numWorkGroupsX); + EXPECT_EQ(2u, *kernel.numWorkGroupsY); + EXPECT_EQ(2u, *kernel.numWorkGroupsZ); } else { - EXPECT_EQ(0u, *kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeX); - EXPECT_EQ(0u, *kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeY); - EXPECT_EQ(0u, *kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeZ); - EXPECT_EQ(1u, *kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeX2); - EXPECT_EQ(2u, *kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeY2); - EXPECT_EQ(3u, *kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeZ2); - EXPECT_EQ(0u, *kernel.kernelDeviceInfos[rootDeviceIndex].numWorkGroupsX); - EXPECT_EQ(0u, *kernel.kernelDeviceInfos[rootDeviceIndex].numWorkGroupsY); - EXPECT_EQ(0u, *kernel.kernelDeviceInfos[rootDeviceIndex].numWorkGroupsZ); + EXPECT_EQ(0u, *kernel.localWorkSizeX); + EXPECT_EQ(0u, *kernel.localWorkSizeY); + EXPECT_EQ(0u, *kernel.localWorkSizeZ); + EXPECT_EQ(1u, *kernel.localWorkSizeX2); + EXPECT_EQ(2u, *kernel.localWorkSizeY2); + EXPECT_EQ(3u, *kernel.localWorkSizeZ2); + EXPECT_EQ(0u, *kernel.numWorkGroupsX); + EXPECT_EQ(0u, *kernel.numWorkGroupsY); + EXPECT_EQ(0u, *kernel.numWorkGroupsZ); } } } @@ -939,7 +939,7 @@ HWTEST_F(DispatchWalkerTest, GivenMultipleKernelsWhenDispatchingWalkerThenWorkDi for (auto &dispatchInfo : multiDispatchInfo) { auto &kernel = static_cast(*dispatchInfo.getKernel()); - EXPECT_EQ(*kernel.kernelDeviceInfos[rootDeviceIndex].workDim, dispatchInfo.getDim()); + EXPECT_EQ(*kernel.workDim, dispatchInfo.getDim()); } } diff --git a/opencl/test/unit_test/command_queue/enqueue_debug_kernel_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_debug_kernel_tests.cpp index 5decccc349..598086d171 100644 --- a/opencl/test/unit_test/command_queue/enqueue_debug_kernel_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_debug_kernel_tests.cpp @@ -101,7 +101,7 @@ HWTEST_F(EnqueueDebugKernelTest, givenDebugKernelWhenEnqueuedThenSSHAndBtiAreCor mockCmdQ->enqueueKernel(debugKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); - auto *dstBtiTableBase = reinterpret_cast(ptrOffset(surfaceStates, debugKernel->getBindingTableOffset(rootDeviceIndex))); + auto *dstBtiTableBase = reinterpret_cast(ptrOffset(surfaceStates, debugKernel->getBindingTableOffset())); uint32_t surfaceStateOffset = dstBtiTableBase[0].getSurfaceStatePointer(); auto debugSurfaceState = reinterpret_cast(ptrOffset(ssh.getCpuBase(), surfaceStateOffset)); diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp index c3e3baf6d7..e73edaf3c1 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp @@ -1290,9 +1290,9 @@ TEST_F(EnqueueKernelTest, givenKernelWhenAllArgsAreNotAndEventExistSetThenClEnqu TEST_F(EnqueueKernelTest, givenEnqueueCommandThatLwsExceedsDeviceCapabilitiesWhenEnqueueNDRangeKernelIsCalledThenErrorIsReturned) { MockKernelWithInternals mockKernel(*pClDevice); - mockKernel.mockKernel->kernelDeviceInfos[rootDeviceIndex].maxKernelWorkGroupSize = static_cast(pDevice->getDeviceInfo().maxWorkGroupSize / 2); + mockKernel.mockKernel->maxKernelWorkGroupSize = static_cast(pDevice->getDeviceInfo().maxWorkGroupSize / 2); - auto maxKernelWorkgroupSize = mockKernel.mockKernel->kernelDeviceInfos[rootDeviceIndex].maxKernelWorkGroupSize; + auto maxKernelWorkgroupSize = mockKernel.mockKernel->maxKernelWorkGroupSize; size_t globalWorkSize[3] = {maxKernelWorkgroupSize + 1, 1, 1}; size_t localWorkSize[3] = {maxKernelWorkgroupSize + 1, 1, 1}; diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp index 82a3f87432..919a400f10 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp @@ -667,7 +667,7 @@ HWTEST_P(EnqueueKernelPrintfTest, GivenKernelWithPrintfBlockedByEventWhenEventUn patchData.DataParamOffset = 0; populateKernelDescriptor(mockKernel.kernelInfo.kernelDescriptor, patchData); - auto crossThreadData = reinterpret_cast(mockKernel.mockKernel->getCrossThreadData(rootDeviceIndex)); + auto crossThreadData = reinterpret_cast(mockKernel.mockKernel->getCrossThreadData()); std::string testString = "test"; diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_local_work_size_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_local_work_size_tests.cpp index d8c6700115..3afabe82bb 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_local_work_size_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_local_work_size_tests.cpp @@ -64,13 +64,13 @@ TEST_F(EnqueueKernelRequiredWorkSize, GivenUnspecifiedWorkGroupSizeWhenEnqeueing EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_EQ(*pKernel->kernelDeviceInfos[rootDeviceIndex].localWorkSizeX, 8u); - EXPECT_EQ(*pKernel->kernelDeviceInfos[rootDeviceIndex].localWorkSizeY, 2u); - EXPECT_EQ(*pKernel->kernelDeviceInfos[rootDeviceIndex].localWorkSizeZ, 2u); + EXPECT_EQ(*pKernel->localWorkSizeX, 8u); + EXPECT_EQ(*pKernel->localWorkSizeY, 2u); + EXPECT_EQ(*pKernel->localWorkSizeZ, 2u); - EXPECT_EQ(*pKernel->kernelDeviceInfos[rootDeviceIndex].enqueuedLocalWorkSizeX, 8u); - EXPECT_EQ(*pKernel->kernelDeviceInfos[rootDeviceIndex].enqueuedLocalWorkSizeY, 2u); - EXPECT_EQ(*pKernel->kernelDeviceInfos[rootDeviceIndex].enqueuedLocalWorkSizeZ, 2u); + EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeX, 8u); + EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeY, 2u); + EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeZ, 2u); } // Fully specified @@ -91,13 +91,13 @@ TEST_F(EnqueueKernelRequiredWorkSize, GivenRequiredWorkGroupSizeWhenEnqeueingKer EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_EQ(*pKernel->kernelDeviceInfos[rootDeviceIndex].enqueuedLocalWorkSizeX, 8u); - EXPECT_EQ(*pKernel->kernelDeviceInfos[rootDeviceIndex].enqueuedLocalWorkSizeY, 2u); - EXPECT_EQ(*pKernel->kernelDeviceInfos[rootDeviceIndex].enqueuedLocalWorkSizeZ, 2u); + EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeX, 8u); + EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeY, 2u); + EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeZ, 2u); - EXPECT_EQ(*pKernel->kernelDeviceInfos[rootDeviceIndex].localWorkSizeX, 8u); - EXPECT_EQ(*pKernel->kernelDeviceInfos[rootDeviceIndex].localWorkSizeY, 2u); - EXPECT_EQ(*pKernel->kernelDeviceInfos[rootDeviceIndex].localWorkSizeZ, 2u); + EXPECT_EQ(*pKernel->localWorkSizeX, 8u); + EXPECT_EQ(*pKernel->localWorkSizeY, 2u); + EXPECT_EQ(*pKernel->localWorkSizeZ, 2u); } // Underspecified. Won't permit. diff --git a/opencl/test/unit_test/command_queue/enqueue_read_buffer_rect_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_read_buffer_rect_tests.cpp index 2f3fe25b15..dd88bfce4a 100644 --- a/opencl/test/unit_test/command_queue/enqueue_read_buffer_rect_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_read_buffer_rect_tests.cpp @@ -576,13 +576,13 @@ HWTEST_F(EnqueueReadWriteBufferRectDispatch, givenOffsetResultingInMisalignedPtr const auto &surfaceStateDst = getSurfaceState(&cmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0), 1); if (kernelInfo.kernelArgInfo[1].kernelArgPatchInfoVector[0].size == sizeof(uint64_t)) { - auto pKernelArg = (uint64_t *)(kernel->getCrossThreadData(device->getRootDeviceIndex()) + + auto pKernelArg = (uint64_t *)(kernel->getCrossThreadData() + kernelInfo.kernelArgInfo[1].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(reinterpret_cast(alignDown(misalignedDstPtr, 4)), *pKernelArg); EXPECT_EQ(*pKernelArg, surfaceStateDst.getSurfaceBaseAddress()); } else if (kernelInfo.kernelArgInfo[1].kernelArgPatchInfoVector[0].size == sizeof(uint32_t)) { - auto pKernelArg = (uint32_t *)(kernel->getCrossThreadData(device->getRootDeviceIndex()) + + auto pKernelArg = (uint32_t *)(kernel->getCrossThreadData() + kernelInfo.kernelArgInfo[1].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(reinterpret_cast(alignDown(misalignedDstPtr, 4)), static_cast(*pKernelArg)); EXPECT_EQ(static_cast(*pKernelArg), surfaceStateDst.getSurfaceBaseAddress()); @@ -590,7 +590,7 @@ HWTEST_F(EnqueueReadWriteBufferRectDispatch, givenOffsetResultingInMisalignedPtr } if (kernelInfo.kernelArgInfo[3].kernelArgPatchInfoVector[0].size == 4 * sizeof(uint32_t)) { // size of uint4 DstOrigin - auto dstOffset = (uint32_t *)(kernel->getCrossThreadData(device->getRootDeviceIndex()) + + auto dstOffset = (uint32_t *)(kernel->getCrossThreadData() + kernelInfo.kernelArgInfo[3].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(hostOffset.x + ptrDiff(misalignedDstPtr, alignDown(misalignedDstPtr, 4)), *dstOffset); } else { diff --git a/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp index 74a7f6d6bc..43f8295e12 100644 --- a/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp @@ -769,7 +769,7 @@ TEST_F(EnqueueSvmTest, givenEnqueueTaskBlockedOnUserEventWhenItIsEnqueuedThenSur auto pMultiDeviceKernel = clUniquePtr(MultiDeviceKernel::create(program.get(), program->getKernelInfosForKernel("FillBufferBytes"), &retVal)); auto kernel = static_cast(pMultiDeviceKernel->getKernel(rootDeviceIndex)); std::vector allSurfaces; - kernel->getResidency(allSurfaces, rootDeviceIndex); + kernel->getResidency(allSurfaces); EXPECT_EQ(1u, allSurfaces.size()); kernel->setSvmKernelExecInfo(pSvmAlloc); @@ -789,7 +789,7 @@ TEST_F(EnqueueSvmTest, givenEnqueueTaskBlockedOnUserEventWhenItIsEnqueuedThenSur nullptr); EXPECT_EQ(CL_SUCCESS, retVal); - kernel->getResidency(allSurfaces, rootDeviceIndex); + kernel->getResidency(allSurfaces); EXPECT_EQ(3u, allSurfaces.size()); for (auto &surface : allSurfaces) diff --git a/opencl/test/unit_test/command_queue/enqueue_write_buffer_rect_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_write_buffer_rect_tests.cpp index 2da32b4d6e..cd9909923a 100644 --- a/opencl/test/unit_test/command_queue/enqueue_write_buffer_rect_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_write_buffer_rect_tests.cpp @@ -574,13 +574,13 @@ HWTEST_F(EnqueueReadWriteBufferRectDispatch, givenOffsetResultingInMisalignedPtr const auto &surfaceState = getSurfaceState(&cmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0), 0); if (kernelInfo.kernelArgInfo[0].kernelArgPatchInfoVector[0].size == sizeof(uint64_t)) { - auto pKernelArg = (uint64_t *)(kernel->getCrossThreadData(device->getRootDeviceIndex()) + + auto pKernelArg = (uint64_t *)(kernel->getCrossThreadData() + kernelInfo.kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(reinterpret_cast(alignDown(misalignedHostPtr, 4)), *pKernelArg); EXPECT_EQ(*pKernelArg, surfaceState.getSurfaceBaseAddress()); } else if (kernelInfo.kernelArgInfo[0].kernelArgPatchInfoVector[0].size == sizeof(uint32_t)) { - auto pKernelArg = (uint32_t *)(kernel->getCrossThreadData(device->getRootDeviceIndex()) + + auto pKernelArg = (uint32_t *)(kernel->getCrossThreadData() + kernelInfo.kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(reinterpret_cast(alignDown(misalignedHostPtr, 4)), static_cast(*pKernelArg)); EXPECT_EQ(static_cast(*pKernelArg), surfaceState.getSurfaceBaseAddress()); @@ -588,7 +588,7 @@ HWTEST_F(EnqueueReadWriteBufferRectDispatch, givenOffsetResultingInMisalignedPtr } if (kernelInfo.kernelArgInfo[2].kernelArgPatchInfoVector[0].size == 4 * sizeof(uint32_t)) { // size of uint4 SrcOrigin - auto dstOffset = (uint32_t *)(kernel->getCrossThreadData(device->getRootDeviceIndex()) + + auto dstOffset = (uint32_t *)(kernel->getCrossThreadData() + kernelInfo.kernelArgInfo[2].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(hostOffset.x + ptrDiff(misalignedHostPtr, alignDown(misalignedHostPtr, 4)), *dstOffset); } else { diff --git a/opencl/test/unit_test/command_queue/gl/windows/enqueue_kernel_gl_tests_windows.cpp b/opencl/test/unit_test/command_queue/gl/windows/enqueue_kernel_gl_tests_windows.cpp index 28c8210b40..1f764d67fb 100644 --- a/opencl/test/unit_test/command_queue/gl/windows/enqueue_kernel_gl_tests_windows.cpp +++ b/opencl/test/unit_test/command_queue/gl/windows/enqueue_kernel_gl_tests_windows.cpp @@ -45,7 +45,7 @@ TEST_F(EnqueueKernelTest, givenKernelWithSharedObjArgsWhenEnqueueIsCalledThenRes auto &kernelInfo = pKernel->getKernelInfo(rootDeviceIndex); auto pKernelArg = - (uint32_t *)(pKernel->getCrossThreadData(rootDeviceIndex) + kernelInfo.kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); + (uint32_t *)(pKernel->getCrossThreadData() + kernelInfo.kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); auto address1 = static_cast(*pKernelArg); auto sharedBufferGpuAddress = diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp index 4c1c8d31ea..46f1c06933 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp @@ -380,7 +380,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenKernelWithSlmWhenPreviousSLML commandStreamReceiver->lastSentL3Config = L3Config; commandStreamReceiver->lastSentThreadArbitrationPolicy = kernel.mockKernel->getThreadArbitrationPolicy(); - ((MockKernel *)kernel)->setTotalSLMSize(rootDeviceIndex, 1024); + ((MockKernel *)kernel)->setTotalSLMSize(1024); cmdList.clear(); commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr); diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests.inl b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests.inl index 4cd254d479..994e398eb3 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests.inl +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -43,7 +43,7 @@ void CommandStreamReceiverHwTest::givenKernelWithSlmWhenPreviousNOSLM commandStreamReceiver->isPreambleSent = true; commandStreamReceiver->lastSentL3Config = 0; - static_cast(kernel)->setTotalSLMSize(rootDeviceIndex, 1024); + static_cast(kernel)->setTotalSLMSize(1024); cmdList.clear(); commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr); @@ -89,7 +89,7 @@ void CommandStreamReceiverHwTest::givenBlockedKernelWithSlmWhenPrevio commandStreamReceiver->isPreambleSent = true; commandStreamReceiver->lastSentL3Config = 0; - static_cast(kernel)->setTotalSLMSize(rootDeviceIndex, 1024); + static_cast(kernel)->setTotalSLMSize(1024); commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 1, &blockingEvent, nullptr); diff --git a/opencl/test/unit_test/context/driver_diagnostics_enqueue_tests.cpp b/opencl/test/unit_test/context/driver_diagnostics_enqueue_tests.cpp index 106dba3139..88afa15d0f 100644 --- a/opencl/test/unit_test/context/driver_diagnostics_enqueue_tests.cpp +++ b/opencl/test/unit_test/context/driver_diagnostics_enqueue_tests.cpp @@ -682,9 +682,9 @@ TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkS snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE], kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName.c_str(), - *kernel->kernelDeviceInfos[rootDeviceIndex].localWorkSizeX, - *kernel->kernelDeviceInfos[rootDeviceIndex].localWorkSizeY, - *kernel->kernelDeviceInfos[rootDeviceIndex].localWorkSizeZ); + *kernel->localWorkSizeX, + *kernel->localWorkSizeY, + *kernel->localWorkSizeZ); EXPECT_TRUE(containsHint(expectedHint, userData)); } @@ -697,9 +697,9 @@ TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkS snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE], kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName.c_str(), - *kernel->kernelDeviceInfos[rootDeviceIndex].localWorkSizeX, - *kernel->kernelDeviceInfos[rootDeviceIndex].localWorkSizeY, - *kernel->kernelDeviceInfos[rootDeviceIndex].localWorkSizeZ); + *kernel->localWorkSizeX, + *kernel->localWorkSizeY, + *kernel->localWorkSizeZ); EXPECT_TRUE(containsHint(expectedHint, userData)); DebugManager.flags.EnableComputeWorkSizeND.set(isWorkGroupSizeEnabled); } @@ -713,9 +713,9 @@ TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkS snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE], kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName.c_str(), - *kernel->kernelDeviceInfos[rootDeviceIndex].localWorkSizeX, - *kernel->kernelDeviceInfos[rootDeviceIndex].localWorkSizeY, - *kernel->kernelDeviceInfos[rootDeviceIndex].localWorkSizeZ); + *kernel->localWorkSizeX, + *kernel->localWorkSizeY, + *kernel->localWorkSizeZ); EXPECT_TRUE(containsHint(expectedHint, userData)); DebugManager.flags.EnableComputeWorkSizeND.set(isWorkGroupSizeEnabled); } @@ -727,9 +727,9 @@ TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkS snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE], kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName.c_str(), - *kernel->kernelDeviceInfos[rootDeviceIndex].localWorkSizeX, - *kernel->kernelDeviceInfos[rootDeviceIndex].localWorkSizeY, - *kernel->kernelDeviceInfos[rootDeviceIndex].localWorkSizeZ); + *kernel->localWorkSizeX, + *kernel->localWorkSizeY, + *kernel->localWorkSizeZ); EXPECT_TRUE(containsHint(expectedHint, userData)); } @@ -743,9 +743,9 @@ TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkS snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE], kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName.c_str(), - *kernel->kernelDeviceInfos[rootDeviceIndex].localWorkSizeX, - *kernel->kernelDeviceInfos[rootDeviceIndex].localWorkSizeY, - *kernel->kernelDeviceInfos[rootDeviceIndex].localWorkSizeZ); + *kernel->localWorkSizeX, + *kernel->localWorkSizeY, + *kernel->localWorkSizeZ); EXPECT_TRUE(containsHint(expectedHint, userData)); } @@ -759,9 +759,9 @@ TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkS snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE], kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName.c_str(), - *kernel->kernelDeviceInfos[rootDeviceIndex].localWorkSizeX, - *kernel->kernelDeviceInfos[rootDeviceIndex].localWorkSizeY, - *kernel->kernelDeviceInfos[rootDeviceIndex].localWorkSizeZ); + *kernel->localWorkSizeX, + *kernel->localWorkSizeY, + *kernel->localWorkSizeZ); EXPECT_TRUE(containsHint(expectedHint, userData)); } diff --git a/opencl/test/unit_test/execution_model/enqueue_execution_model_kernel_tests.cpp b/opencl/test/unit_test/execution_model/enqueue_execution_model_kernel_tests.cpp index a0fd671562..4bcecf59fe 100644 --- a/opencl/test/unit_test/execution_model/enqueue_execution_model_kernel_tests.cpp +++ b/opencl/test/unit_test/execution_model/enqueue_execution_model_kernel_tests.cpp @@ -322,7 +322,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueu Kernel *blockKernel = Kernel::create(pKernel->getProgram(), MockKernel::toKernelInfoContainer(*pBlockInfo, rootDeviceIndex), *pClDevice, nullptr); blockSSH = alignUp(blockSSH, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); - if (blockKernel->getNumberOfBindingTableStates(rootDeviceIndex) > 0) { + if (blockKernel->getNumberOfBindingTableStates() > 0) { ASSERT_TRUE(isValidOffset(pBlockInfo->kernelDescriptor.payloadMappings.bindingTable.tableOffset)); auto dstBlockBti = ptrOffset(blockSSH, pBlockInfo->kernelDescriptor.payloadMappings.bindingTable.tableOffset); EXPECT_EQ(0U, reinterpret_cast(dstBlockBti) % INTERFACE_DESCRIPTOR_DATA::BINDINGTABLEPOINTER_ALIGN_SIZE); @@ -330,7 +330,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueu auto srcBlockBti = ptrOffset(pBlockInfo->heapInfo.pSsh, pBlockInfo->kernelDescriptor.payloadMappings.bindingTable.tableOffset); auto srcBindingTable = reinterpret_cast(srcBlockBti); - for (uint32_t i = 0; i < blockKernel->getNumberOfBindingTableStates(rootDeviceIndex); ++i) { + for (uint32_t i = 0; i < blockKernel->getNumberOfBindingTableStates(); ++i) { uint32_t dstSurfaceStatePointer = dstBindingTable[i].getSurfaceStatePointer(); uint32_t srcSurfaceStatePointer = srcBindingTable[i].getSurfaceStatePointer(); auto *dstSurfaceState = reinterpret_cast(ptrOffset(ssh->getCpuBase(), dstSurfaceStatePointer)); @@ -454,13 +454,13 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelEnqueueFixture, GivenParentKernelWhenEnq const auto &defaultQueueSurfaceAddress = implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress; if (isValidOffset(defaultQueueSurfaceAddress.stateless)) { - auto patchLocation = ptrOffset(reinterpret_cast(parentKernel->getCrossThreadData(rootDeviceIndex)), defaultQueueSurfaceAddress.stateless); + auto patchLocation = ptrOffset(reinterpret_cast(parentKernel->getCrossThreadData()), defaultQueueSurfaceAddress.stateless); EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddressToPatch(), *patchLocation); } const auto &eventPoolSurfaceAddress = implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress; if (isValidOffset(eventPoolSurfaceAddress.stateless)) { - auto patchLocation = ptrOffset(reinterpret_cast(parentKernel->getCrossThreadData(rootDeviceIndex)), eventPoolSurfaceAddress.stateless); + auto patchLocation = ptrOffset(reinterpret_cast(parentKernel->getCrossThreadData()), eventPoolSurfaceAddress.stateless); EXPECT_EQ(pDevQueue->getEventPoolBuffer()->getGpuAddressToPatch(), *patchLocation); } } diff --git a/opencl/test/unit_test/execution_model/parent_kernel_dispatch_tests.cpp b/opencl/test/unit_test/execution_model/parent_kernel_dispatch_tests.cpp index 1ec480899d..09f8cc48f8 100644 --- a/opencl/test/unit_test/execution_model/parent_kernel_dispatch_tests.cpp +++ b/opencl/test/unit_test/execution_model/parent_kernel_dispatch_tests.cpp @@ -174,7 +174,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueue size_t sshUsed = blockedCommandsData->ssh->getUsed(); - size_t expectedSizeSSH = pKernel->getNumberOfBindingTableStates(rootDeviceIndex) * sizeof(RENDER_SURFACE_STATE) + + size_t expectedSizeSSH = pKernel->getNumberOfBindingTableStates() * sizeof(RENDER_SURFACE_STATE) + pKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.payloadMappings.bindingTable.numEntries * sizeof(BINDING_TABLE_STATE) + UnitTestHelper::getDefaultSshUsage(); diff --git a/opencl/test/unit_test/execution_model/scheduler_dispatch_tests.cpp b/opencl/test/unit_test/execution_model/scheduler_dispatch_tests.cpp index 00d1790dcd..dfefe9699d 100644 --- a/opencl/test/unit_test/execution_model/scheduler_dispatch_tests.cpp +++ b/opencl/test/unit_test/execution_model/scheduler_dispatch_tests.cpp @@ -70,27 +70,27 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ExecutionModelSchedulerFixture, WhenDispatchingSched pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE), false); - EXPECT_EQ(0u, *scheduler.kernelDeviceInfos[rootDeviceIndex].globalWorkOffsetX); - EXPECT_EQ(0u, *scheduler.kernelDeviceInfos[rootDeviceIndex].globalWorkOffsetY); - EXPECT_EQ(0u, *scheduler.kernelDeviceInfos[rootDeviceIndex].globalWorkOffsetZ); + EXPECT_EQ(0u, *scheduler.globalWorkOffsetX); + EXPECT_EQ(0u, *scheduler.globalWorkOffsetY); + EXPECT_EQ(0u, *scheduler.globalWorkOffsetZ); - EXPECT_EQ((uint32_t)scheduler.getLws(), *scheduler.kernelDeviceInfos[rootDeviceIndex].localWorkSizeX); - EXPECT_EQ(1u, *scheduler.kernelDeviceInfos[rootDeviceIndex].localWorkSizeY); - EXPECT_EQ(1u, *scheduler.kernelDeviceInfos[rootDeviceIndex].localWorkSizeZ); + EXPECT_EQ((uint32_t)scheduler.getLws(), *scheduler.localWorkSizeX); + EXPECT_EQ(1u, *scheduler.localWorkSizeY); + EXPECT_EQ(1u, *scheduler.localWorkSizeZ); - EXPECT_EQ((uint32_t)scheduler.getLws(), *scheduler.kernelDeviceInfos[rootDeviceIndex].localWorkSizeX2); - EXPECT_EQ(1u, *scheduler.kernelDeviceInfos[rootDeviceIndex].localWorkSizeY2); - EXPECT_EQ(1u, *scheduler.kernelDeviceInfos[rootDeviceIndex].localWorkSizeZ2); + EXPECT_EQ((uint32_t)scheduler.getLws(), *scheduler.localWorkSizeX2); + EXPECT_EQ(1u, *scheduler.localWorkSizeY2); + EXPECT_EQ(1u, *scheduler.localWorkSizeZ2); - if (scheduler.kernelDeviceInfos[rootDeviceIndex].enqueuedLocalWorkSizeX != &Kernel::dummyPatchLocation) { - EXPECT_EQ((uint32_t)scheduler.getLws(), *scheduler.kernelDeviceInfos[rootDeviceIndex].enqueuedLocalWorkSizeX); + if (scheduler.enqueuedLocalWorkSizeX != &Kernel::dummyPatchLocation) { + EXPECT_EQ((uint32_t)scheduler.getLws(), *scheduler.enqueuedLocalWorkSizeX); } - EXPECT_EQ(1u, *scheduler.kernelDeviceInfos[rootDeviceIndex].enqueuedLocalWorkSizeY); - EXPECT_EQ(1u, *scheduler.kernelDeviceInfos[rootDeviceIndex].enqueuedLocalWorkSizeZ); + EXPECT_EQ(1u, *scheduler.enqueuedLocalWorkSizeY); + EXPECT_EQ(1u, *scheduler.enqueuedLocalWorkSizeZ); - EXPECT_EQ((uint32_t)(scheduler.getGws() / scheduler.getLws()), *scheduler.kernelDeviceInfos[rootDeviceIndex].numWorkGroupsX); - EXPECT_EQ(0u, *scheduler.kernelDeviceInfos[rootDeviceIndex].numWorkGroupsY); - EXPECT_EQ(0u, *scheduler.kernelDeviceInfos[rootDeviceIndex].numWorkGroupsZ); + EXPECT_EQ((uint32_t)(scheduler.getGws() / scheduler.getLws()), *scheduler.numWorkGroupsX); + EXPECT_EQ(0u, *scheduler.numWorkGroupsY); + EXPECT_EQ(0u, *scheduler.numWorkGroupsZ); HardwareParse hwParser; hwParser.parseCommands(commandStream, 0); @@ -151,7 +151,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ExecutionModelSchedulerFixture, WhenDispatchingSched auto grfSize = pDevice->getHardwareInfo().capabilityTable.grfSize; auto sizePerThreadDataTotal = PerThreadDataHelper::getPerThreadDataSizeTotal(scheduler.getKernelInfo(rootDeviceIndex).getMaxSimdSize(), grfSize, numChannels, scheduler.getLws()); - auto sizeCrossThreadData = scheduler.getCrossThreadDataSize(rootDeviceIndex); + auto sizeCrossThreadData = scheduler.getCrossThreadDataSize(); auto IndirectDataLength = alignUp((uint32_t)(sizeCrossThreadData + sizePerThreadDataTotal), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); EXPECT_EQ(IndirectDataLength, walker->getIndirectDataLength()); diff --git a/opencl/test/unit_test/gtpin/gtpin_tests.cpp b/opencl/test/unit_test/gtpin/gtpin_tests.cpp index 42627bbb37..7782af24fa 100644 --- a/opencl/test/unit_test/gtpin/gtpin_tests.cpp +++ b/opencl/test/unit_test/gtpin/gtpin_tests.cpp @@ -1282,7 +1282,7 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelWithoutSSHIsUsedThenG // Verify that when SSH is removed then during kernel execution // GT-Pin Kernel Submit, Command Buffer Create and Command Buffer Complete callbacks are not called. - pKernel->resizeSurfaceStateHeap(rootDeviceIndex, nullptr, 0, 0, 0); + pKernel->resizeSurfaceStateHeap(nullptr, 0, 0, 0); int prevCount2 = KernelSubmitCallbackCount; int prevCount3 = CommandBufferCreateCallbackCount; @@ -1396,7 +1396,7 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenBlockedKernelWithoutSSHIsUs // Verify that when SSH is removed then during kernel execution // GT-Pin Kernel Submit, Command Buffer Create and Command Buffer Complete callbacks are not called. - pKernel->resizeSurfaceStateHeap(rootDeviceIndex, nullptr, 0, 0, 0); + pKernel->resizeSurfaceStateHeap(nullptr, 0, 0, 0); cl_event userEvent = clCreateUserEvent(context, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); @@ -2177,8 +2177,8 @@ TEST_F(GTPinTests, givenParentKernelWhenGtPinAddingSurfaceStateThenItIsNotAddedA std::unique_ptr parentKernel(MockParentKernel::create(*pContext)); parentKernel->mockKernelInfo->usesSsh = true; - parentKernel->kernelDeviceInfos[rootDeviceIndex].sshLocalSize = 64; - parentKernel->kernelDeviceInfos[rootDeviceIndex].pSshLocal.reset(new char[64]); + parentKernel->sshLocalSize = 64; + parentKernel->pSshLocal.reset(new char[64]); size_t sizeSurfaceStates1 = parentKernel->getSurfaceStateHeapSize(rootDeviceIndex); @@ -2234,11 +2234,11 @@ TEST_F(GTPinTests, givenKernelWithSSHThenVerifyThatSSHResizeWorksWell) { auto pKernel = pMultiDeviceKernel->getKernel(rootDeviceIndex); ASSERT_NE(nullptr, pKernel); - size_t numBTS1 = pKernel->getNumberOfBindingTableStates(rootDeviceIndex); + size_t numBTS1 = pKernel->getNumberOfBindingTableStates(); EXPECT_EQ(2u, numBTS1); size_t sizeSurfaceStates1 = pKernel->getSurfaceStateHeapSize(rootDeviceIndex); EXPECT_NE(0u, sizeSurfaceStates1); - size_t offsetBTS1 = pKernel->getBindingTableOffset(rootDeviceIndex); + size_t offsetBTS1 = pKernel->getBindingTableOffset(); EXPECT_NE(0u, offsetBTS1); GFXCORE_FAMILY genFamily = pDevice->getHardwareInfo().platform.eRenderCoreFamily; @@ -2250,11 +2250,11 @@ TEST_F(GTPinTests, givenKernelWithSSHThenVerifyThatSSHResizeWorksWell) { bool surfaceAdded = gtpinHelper.addSurfaceState(pKernel, rootDeviceIndex); EXPECT_TRUE(surfaceAdded); - size_t numBTS2 = pKernel->getNumberOfBindingTableStates(rootDeviceIndex); + size_t numBTS2 = pKernel->getNumberOfBindingTableStates(); EXPECT_EQ(numBTS1 + 1, numBTS2); size_t sizeSurfaceStates2 = pKernel->getSurfaceStateHeapSize(rootDeviceIndex); EXPECT_GT(sizeSurfaceStates2, sizeSurfaceStates1); - size_t offsetBTS2 = pKernel->getBindingTableOffset(rootDeviceIndex); + size_t offsetBTS2 = pKernel->getBindingTableOffset(); EXPECT_GT(offsetBTS2, offsetBTS1); void *pSS2 = gtpinHelper.getSurfaceState(pKernel, 0, rootDeviceIndex); @@ -2264,17 +2264,17 @@ TEST_F(GTPinTests, givenKernelWithSSHThenVerifyThatSSHResizeWorksWell) { EXPECT_EQ(nullptr, pSS2); // Remove kernel's SSH - pKernel->resizeSurfaceStateHeap(rootDeviceIndex, nullptr, 0, 0, 0); + pKernel->resizeSurfaceStateHeap(nullptr, 0, 0, 0); // Try to enlarge SSH once again, this time the operation must fail surfaceAdded = gtpinHelper.addSurfaceState(pKernel, rootDeviceIndex); EXPECT_FALSE(surfaceAdded); - size_t numBTS3 = pKernel->getNumberOfBindingTableStates(rootDeviceIndex); + size_t numBTS3 = pKernel->getNumberOfBindingTableStates(); EXPECT_EQ(0u, numBTS3); size_t sizeSurfaceStates3 = pKernel->getSurfaceStateHeapSize(rootDeviceIndex); EXPECT_EQ(0u, sizeSurfaceStates3); - size_t offsetBTS3 = pKernel->getBindingTableOffset(rootDeviceIndex); + size_t offsetBTS3 = pKernel->getBindingTableOffset(); EXPECT_EQ(0u, offsetBTS3); void *pSS3 = gtpinHelper.getSurfaceState(pKernel, 0, rootDeviceIndex); EXPECT_EQ(nullptr, pSS3); @@ -2409,7 +2409,7 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenOnKernelSubitIsCalledThenCo std::unique_ptr pMultiDeviceKernel(MockMultiDeviceKernel::create(pProgramm.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex))); auto pKernel = static_cast(pMultiDeviceKernel->getKernel(rootDeviceIndex)); - pKernel->setSshLocal(nullptr, sizeof(surfaceStateHeap), rootDeviceIndex); + pKernel->setSshLocal(nullptr, sizeof(surfaceStateHeap)); kernelOffset = 0x1234; EXPECT_NE(pKernel->getStartOffset(), kernelOffset); diff --git a/opencl/test/unit_test/helpers/dispatch_info_builder_tests.cpp b/opencl/test/unit_test/helpers/dispatch_info_builder_tests.cpp index e16d2c472f..dee0aa9530 100644 --- a/opencl/test/unit_test/helpers/dispatch_info_builder_tests.cpp +++ b/opencl/test/unit_test/helpers/dispatch_info_builder_tests.cpp @@ -69,7 +69,7 @@ class DispatchInfoBuilderFixture : public ContextFixture, public ClDeviceFixture pKernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData)); pKernel->setKernelArgHandler(0, &Kernel::setArgBuffer); - pKernel->kernelDeviceInfos[rootDeviceIndex].slmTotalSize = 128; + pKernel->slmTotalSize = 128; pKernel->isBuiltIn = true; } @@ -874,11 +874,11 @@ TEST_F(DispatchInfoBuilderTest, WhenSettingKernelArgThenAddressesAreCorrect) { for (auto &dispatchInfo : multiDispatchInfo) { auto crossthreadOffset0 = pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset; - EXPECT_EQ(buffer->getCpuAddress(), *reinterpret_cast((dispatchInfo.getKernel()->getCrossThreadData(rootDeviceIndex) + crossthreadOffset0))); + EXPECT_EQ(buffer->getCpuAddress(), *reinterpret_cast((dispatchInfo.getKernel()->getCrossThreadData() + crossthreadOffset0))); auto crossthreadOffset1 = pKernelInfo->kernelArgInfo[1].kernelArgPatchInfoVector[0].crossthreadOffset; - EXPECT_EQ(svmPtr, *(reinterpret_cast(dispatchInfo.getKernel()->getCrossThreadData(rootDeviceIndex) + crossthreadOffset1))); + EXPECT_EQ(svmPtr, *(reinterpret_cast(dispatchInfo.getKernel()->getCrossThreadData() + crossthreadOffset1))); auto crossthreadOffset2 = pKernelInfo->kernelArgInfo[2].kernelArgPatchInfoVector[0].crossthreadOffset; - EXPECT_EQ(svmPtr, *(reinterpret_cast(dispatchInfo.getKernel()->getCrossThreadData(rootDeviceIndex) + crossthreadOffset2))); + EXPECT_EQ(svmPtr, *(reinterpret_cast(dispatchInfo.getKernel()->getCrossThreadData() + crossthreadOffset2))); } delete buffer; @@ -920,34 +920,34 @@ TEST_F(DispatchInfoBuilderTest, GivenSplitWhenSettingKernelArgThenAddressesAreCo clearCrossThreadData(); builder1D.setArg(SplitDispatch::RegionCoordX::Left, static_cast(0), sizeof(cl_mem *), pVal); for (auto &dispatchInfo : mdi1D) { - EXPECT_EQ(buffer->getCpuAddress(), *reinterpret_cast((dispatchInfo.getKernel()->getCrossThreadData(rootDeviceIndex) + 0x10))); + EXPECT_EQ(buffer->getCpuAddress(), *reinterpret_cast((dispatchInfo.getKernel()->getCrossThreadData() + 0x10))); } clearCrossThreadData(); builder2D.setArg(SplitDispatch::RegionCoordX::Left, SplitDispatch::RegionCoordY::Top, static_cast(0), sizeof(cl_mem *), pVal); for (auto &dispatchInfo : mdi2D) { - EXPECT_EQ(buffer->getCpuAddress(), *reinterpret_cast((dispatchInfo.getKernel()->getCrossThreadData(rootDeviceIndex) + 0x10))); + EXPECT_EQ(buffer->getCpuAddress(), *reinterpret_cast((dispatchInfo.getKernel()->getCrossThreadData() + 0x10))); } clearCrossThreadData(); builder3D.setArg(SplitDispatch::RegionCoordX::Left, SplitDispatch::RegionCoordY::Top, SplitDispatch::RegionCoordZ::Front, static_cast(0), sizeof(cl_mem *), pVal); for (auto &dispatchInfo : mdi3D) { - EXPECT_EQ(buffer->getCpuAddress(), *reinterpret_cast((dispatchInfo.getKernel()->getCrossThreadData(rootDeviceIndex) + 0x10))); + EXPECT_EQ(buffer->getCpuAddress(), *reinterpret_cast((dispatchInfo.getKernel()->getCrossThreadData() + 0x10))); } //Set arg SVM clearCrossThreadData(); builder1D.setArgSvm(SplitDispatch::RegionCoordX::Left, 1, sizeof(svmPtr), svmPtr, nullptr, 0u); for (auto &dispatchInfo : mdi1D) { - EXPECT_EQ(svmPtr, *(reinterpret_cast(dispatchInfo.getKernel()->getCrossThreadData(rootDeviceIndex) + 0x30))); + EXPECT_EQ(svmPtr, *(reinterpret_cast(dispatchInfo.getKernel()->getCrossThreadData() + 0x30))); } clearCrossThreadData(); builder2D.setArgSvm(SplitDispatch::RegionCoordX::Left, SplitDispatch::RegionCoordY::Top, 1, sizeof(svmPtr), svmPtr, nullptr, 0u); for (auto &dispatchInfo : mdi2D) { - EXPECT_EQ(svmPtr, *(reinterpret_cast(dispatchInfo.getKernel()->getCrossThreadData(rootDeviceIndex) + 0x30))); + EXPECT_EQ(svmPtr, *(reinterpret_cast(dispatchInfo.getKernel()->getCrossThreadData() + 0x30))); } clearCrossThreadData(); builder3D.setArgSvm(SplitDispatch::RegionCoordX::Left, SplitDispatch::RegionCoordY::Top, SplitDispatch::RegionCoordZ::Front, 1, sizeof(svmPtr), svmPtr, nullptr, 0u); for (auto &dispatchInfo : mdi3D) { - EXPECT_EQ(svmPtr, *(reinterpret_cast(dispatchInfo.getKernel()->getCrossThreadData(rootDeviceIndex) + 0x30))); + EXPECT_EQ(svmPtr, *(reinterpret_cast(dispatchInfo.getKernel()->getCrossThreadData() + 0x30))); } delete buffer; diff --git a/opencl/test/unit_test/helpers/dispatch_info_tests.cpp b/opencl/test/unit_test/helpers/dispatch_info_tests.cpp index 7ce73a25b7..5a46d91792 100644 --- a/opencl/test/unit_test/helpers/dispatch_info_tests.cpp +++ b/opencl/test/unit_test/helpers/dispatch_info_tests.cpp @@ -44,7 +44,7 @@ class DispatchInfoFixture : public ContextFixture, public ClDeviceFixture { pProgram = new MockProgram(pContext, false, toClDeviceVector(*pClDevice)); pKernel = new MockKernel(pProgram, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); - pKernel->kernelDeviceInfos[rootDeviceIndex].slmTotalSize = 128; + pKernel->slmTotalSize = 128; } void TearDown() override { delete pKernel; diff --git a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp index be5babb87c..c8c929291c 100644 --- a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp +++ b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp @@ -91,7 +91,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenProgramInterfaceDescriptor auto usedIndirectHeapBefore = indirectHeap.getUsed(); indirectHeap.getSpace(sizeof(INTERFACE_DESCRIPTOR_DATA)); - size_t crossThreadDataSize = kernel->getCrossThreadDataSize(rootDeviceIndex); + size_t crossThreadDataSize = kernel->getCrossThreadDataSize(); HardwareCommandsHelper::sendInterfaceDescriptorData( indirectHeap, 0, 0, crossThreadDataSize, 64, 0, 0, 0, 1, *kernel, 0, pDevice->getPreemptionMode(), nullptr, *pDevice); @@ -163,17 +163,16 @@ HWTEST_F(HardwareCommandsTest, WhenCrossThreadDataIsCreatedThenOnlyRequiredSpace auto &indirectHeap = cmdQ.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 8192); auto usedBefore = indirectHeap.getUsed(); - auto sizeCrossThreadData = kernel->getCrossThreadDataSize(rootDeviceIndex); + auto sizeCrossThreadData = kernel->getCrossThreadDataSize(); HardwareCommandsHelper::sendCrossThreadData( indirectHeap, *kernel, false, nullptr, - sizeCrossThreadData, - rootDeviceIndex); + sizeCrossThreadData); auto usedAfter = indirectHeap.getUsed(); - EXPECT_EQ(kernel->getCrossThreadDataSize(rootDeviceIndex), usedAfter - usedBefore); + EXPECT_EQ(kernel->getCrossThreadDataSize(), usedAfter - usedBefore); } HWTEST_F(HardwareCommandsTest, givenSendCrossThreadDataWhenWhenAddPatchInfoCommentsForAUBDumpIsNotSetThenAddPatchInfoDataOffsetsAreNotMoved) { @@ -190,14 +189,13 @@ HWTEST_F(HardwareCommandsTest, givenSendCrossThreadDataWhenWhenAddPatchInfoComme PatchInfoData patchInfoData = {0xaaaaaaaa, 0, PatchInfoAllocationType::KernelArg, 0xbbbbbbbb, 0, PatchInfoAllocationType::IndirectObjectHeap}; kernel->getPatchInfoDataList().push_back(patchInfoData); - auto sizeCrossThreadData = kernel->getCrossThreadDataSize(rootDeviceIndex); + auto sizeCrossThreadData = kernel->getCrossThreadDataSize(); HardwareCommandsHelper::sendCrossThreadData( indirectHeap, *kernel, false, nullptr, - sizeCrossThreadData, - rootDeviceIndex); + sizeCrossThreadData); ASSERT_EQ(1u, kernel->getPatchInfoDataList().size()); EXPECT_EQ(0xaaaaaaaa, kernel->getPatchInfoDataList()[0].sourceAllocation); @@ -212,14 +210,13 @@ HWTEST_F(HardwareCommandsTest, givenIndirectHeapNotAllocatedFromInternalPoolWhen auto nonInternalAllocation = pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); IndirectHeap indirectHeap(nonInternalAllocation, false); - auto sizeCrossThreadData = mockKernelWithInternal->mockKernel->getCrossThreadDataSize(rootDeviceIndex); + auto sizeCrossThreadData = mockKernelWithInternal->mockKernel->getCrossThreadDataSize(); auto offset = HardwareCommandsHelper::sendCrossThreadData( indirectHeap, *mockKernelWithInternal->mockKernel, false, nullptr, - sizeCrossThreadData, - rootDeviceIndex); + sizeCrossThreadData); EXPECT_EQ(0u, offset); pDevice->getMemoryManager()->freeGraphicsMemory(nonInternalAllocation); } @@ -229,14 +226,13 @@ HWTEST_F(HardwareCommandsTest, givenIndirectHeapAllocatedFromInternalPoolWhenSen IndirectHeap indirectHeap(internalAllocation, true); auto expectedOffset = internalAllocation->getGpuAddressToPatch(); - auto sizeCrossThreadData = mockKernelWithInternal->mockKernel->getCrossThreadDataSize(rootDeviceIndex); + auto sizeCrossThreadData = mockKernelWithInternal->mockKernel->getCrossThreadDataSize(); auto offset = HardwareCommandsHelper::sendCrossThreadData( indirectHeap, *mockKernelWithInternal->mockKernel, false, nullptr, - sizeCrossThreadData, - rootDeviceIndex); + sizeCrossThreadData); EXPECT_EQ(expectedOffset, offset); pDevice->getMemoryManager()->freeGraphicsMemory(internalAllocation); @@ -263,14 +259,13 @@ HWTEST_F(HardwareCommandsTest, givenSendCrossThreadDataWhenWhenAddPatchInfoComme kernel->getPatchInfoDataList().push_back(patchInfoData1); kernel->getPatchInfoDataList().push_back(patchInfoData2); - auto sizeCrossThreadData = kernel->getCrossThreadDataSize(rootDeviceIndex); + auto sizeCrossThreadData = kernel->getCrossThreadDataSize(); auto offsetCrossThreadData = HardwareCommandsHelper::sendCrossThreadData( indirectHeap, *kernel, false, nullptr, - sizeCrossThreadData, - rootDeviceIndex); + sizeCrossThreadData); ASSERT_NE(0u, offsetCrossThreadData); EXPECT_EQ(128u, offsetCrossThreadData); @@ -386,7 +381,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelWithFourBindingTabl *pWalkerCmd = FamilyType::cmdInitGpgpuWalker; auto expectedBindingTableCount = 3u; - mockKernelWithInternal->mockKernel->kernelDeviceInfos[rootDeviceIndex].numberOfBindingTableStates = expectedBindingTableCount; + mockKernelWithInternal->mockKernel->numberOfBindingTableStates = expectedBindingTableCount; auto &dsh = cmdQ.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 8192); auto &ioh = cmdQ.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 8192); @@ -432,7 +427,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelThatIsSchedulerWhen *pWalkerCmd = FamilyType::cmdInitGpgpuWalker; auto expectedBindingTableCount = 3u; - mockKernelWithInternal->mockKernel->kernelDeviceInfos[rootDeviceIndex].numberOfBindingTableStates = expectedBindingTableCount; + mockKernelWithInternal->mockKernel->numberOfBindingTableStates = expectedBindingTableCount; auto isScheduler = const_cast(&mockKernelWithInternal->mockKernel->isSchedulerKernel); *isScheduler = true; @@ -476,7 +471,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelWith100BindingTable *pWalkerCmd = FamilyType::cmdInitGpgpuWalker; auto expectedBindingTableCount = 100u; - mockKernelWithInternal->mockKernel->kernelDeviceInfos[rootDeviceIndex].numberOfBindingTableStates = expectedBindingTableCount; + mockKernelWithInternal->mockKernel->numberOfBindingTableStates = expectedBindingTableCount; auto &dsh = cmdQ.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 8192); auto &ioh = cmdQ.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 8192); @@ -799,7 +794,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenGettingBindingTableStateTh auto &ssh = cmdQ.getIndirectHeap(IndirectHeap::SURFACE_STATE, 8192); // Initialize binding table state pointers with pattern - EXPECT_EQ(numSurfaces, pKernel->getNumberOfBindingTableStates(rootDeviceIndex)); + EXPECT_EQ(numSurfaces, pKernel->getNumberOfBindingTableStates()); const size_t localWorkSizes[3]{256, 1, 1}; @@ -888,7 +883,7 @@ HWTEST_F(HardwareCommandsTest, GivenBuffersNotRequiringSshWhenSettingBindingTabl auto usedBefore = ssh.getUsed(); // Initialize binding table state pointers with pattern - auto numSurfaceStates = pKernel->getNumberOfBindingTableStates(rootDeviceIndex); + auto numSurfaceStates = pKernel->getNumberOfBindingTableStates(); EXPECT_EQ(0u, numSurfaceStates); // set binding table states @@ -932,7 +927,7 @@ HWTEST_F(HardwareCommandsTest, GivenZeroSurfaceStatesWhenSettingBindingTableStat auto &ssh = cmdQ.getIndirectHeap(IndirectHeap::SURFACE_STATE, 8192); // Initialize binding table state pointers with pattern - auto numSurfaceStates = pKernel->getNumberOfBindingTableStates(rootDeviceIndex); + auto numSurfaceStates = pKernel->getNumberOfBindingTableStates(); EXPECT_EQ(0u, numSurfaceStates); auto dstBindingTablePointer = pushBindingTableAndSurfaceStates(ssh, *pKernel); @@ -1078,7 +1073,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, GivenKernelWithSamplersWhenInd } mockKernelWithInternal->mockKernel->setCrossThreadData(mockKernelWithInternal->crossThreadData, sizeof(mockKernelWithInternal->crossThreadData)); - mockKernelWithInternal->mockKernel->setSshLocal(mockKernelWithInternal->sshLocal, sizeof(mockKernelWithInternal->sshLocal), rootDeviceIndex); + mockKernelWithInternal->mockKernel->setSshLocal(mockKernelWithInternal->sshLocal, sizeof(mockKernelWithInternal->sshLocal)); uint32_t interfaceDescriptorIndex = 0; auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType()); auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel, rootDeviceIndex); diff --git a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.h b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.h index 543f0c605f..e68dcb4cbf 100644 --- a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.h +++ b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.h @@ -47,6 +47,6 @@ struct HardwareCommandsTest : ClDeviceFixture, size_t pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, const Kernel &srcKernel) { return EncodeSurfaceState::pushBindingTableAndSurfaceStates(dstHeap, srcKernel.getKernelInfo(rootDeviceIndex).kernelDescriptor.payloadMappings.bindingTable.numEntries, srcKernel.getSurfaceStateHeap(rootDeviceIndex), srcKernel.getSurfaceStateHeapSize(rootDeviceIndex), - srcKernel.getNumberOfBindingTableStates(rootDeviceIndex), srcKernel.getBindingTableOffset(rootDeviceIndex)); + srcKernel.getNumberOfBindingTableStates(), srcKernel.getBindingTableOffset()); } }; diff --git a/opencl/test/unit_test/kernel/clone_kernel_tests.cpp b/opencl/test/unit_test/kernel/clone_kernel_tests.cpp index 25850d9b97..2b5fd98d78 100644 --- a/opencl/test/unit_test/kernel/clone_kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/clone_kernel_tests.cpp @@ -183,7 +183,7 @@ TEST_F(CloneKernelTest, GivenArgLocalWhenCloningKernelThenKernelInfoIsCorrect) { EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); - EXPECT_EQ(alignUp(slmSize, 1024), pClonedKernel[rootDeviceIndex]->kernelDeviceInfos[rootDeviceIndex].slmTotalSize); + EXPECT_EQ(alignUp(slmSize, 1024), pClonedKernel[rootDeviceIndex]->slmTotalSize); } } @@ -219,7 +219,7 @@ TEST_F(CloneKernelTest, GivenArgBufferWhenCloningKernelThenKernelInfoIsCorrect) EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); - auto pKernelArg = (cl_mem *)(pClonedKernel[rootDeviceIndex]->getCrossThreadData(rootDeviceIndex) + + auto pKernelArg = (cl_mem *)(pClonedKernel[rootDeviceIndex]->getCrossThreadData() + pClonedKernel[rootDeviceIndex]->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddressToPatch(), reinterpret_cast(*pKernelArg)); } @@ -256,7 +256,7 @@ TEST_F(CloneKernelTest, GivenArgPipeWhenCloningKernelThenKernelInfoIsCorrect) { EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); - auto pKernelArg = (cl_mem *)(pClonedKernel[rootDeviceIndex]->getCrossThreadData(rootDeviceIndex) + + auto pKernelArg = (cl_mem *)(pClonedKernel[rootDeviceIndex]->getCrossThreadData() + pClonedKernel[rootDeviceIndex]->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(pipe->getGraphicsAllocation(rootDeviceIndex)->getGpuAddressToPatch(), reinterpret_cast(*pKernelArg)); } @@ -296,7 +296,7 @@ TEST_F(CloneKernelTest, GivenArgImageWhenCloningKernelThenKernelInfoIsCorrect) { EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); - auto crossThreadData = reinterpret_cast(pClonedKernel[rootDeviceIndex]->getCrossThreadData(rootDeviceIndex)); + auto crossThreadData = reinterpret_cast(pClonedKernel[rootDeviceIndex]->getCrossThreadData()); EXPECT_EQ(objectId, *crossThreadData); const auto &argInfo = pClonedKernel[rootDeviceIndex]->getKernelInfo(rootDeviceIndex).kernelArgInfo[0]; @@ -349,7 +349,7 @@ TEST_F(CloneKernelTest, GivenArgAcceleratorWhenCloningKernelThenKernelInfoIsCorr EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); - auto crossThreadData = reinterpret_cast(pClonedKernel[rootDeviceIndex]->getCrossThreadData(rootDeviceIndex)); + auto crossThreadData = reinterpret_cast(pClonedKernel[rootDeviceIndex]->getCrossThreadData()); const auto &argInfo = pClonedKernel[rootDeviceIndex]->getKernelInfo(rootDeviceIndex).kernelArgInfo[0]; @@ -403,7 +403,7 @@ TEST_F(CloneKernelTest, GivenArgSamplerWhenCloningKernelThenKernelInfoIsCorrect) EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); - auto crossThreadData = reinterpret_cast(pClonedKernel[rootDeviceIndex]->getCrossThreadData(rootDeviceIndex)); + auto crossThreadData = reinterpret_cast(pClonedKernel[rootDeviceIndex]->getCrossThreadData()); EXPECT_EQ(objectId, *crossThreadData); const auto &argInfo = pClonedKernel[rootDeviceIndex]->getKernelInfo(rootDeviceIndex).kernelArgInfo[0]; @@ -454,7 +454,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CloneKernelTest, GivenArgDeviceQueueWhenCloningKerne EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); - auto pKernelArg = (uintptr_t *)(pClonedKernel[rootDeviceIndex]->getCrossThreadData(rootDeviceIndex) + + auto pKernelArg = (uintptr_t *)(pClonedKernel[rootDeviceIndex]->getCrossThreadData() + pClonedKernel[rootDeviceIndex]->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(static_cast(mockDevQueue.getQueueBuffer()->getGpuAddressToPatch()), *pKernelArg); } @@ -485,7 +485,7 @@ TEST_F(CloneKernelTest, GivenArgSvmWhenCloningKernelThenKernelInfoIsCorrect) { EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); - auto pKernelArg = (void **)(pClonedKernel[rootDeviceIndex]->getCrossThreadData(rootDeviceIndex) + + auto pKernelArg = (void **)(pClonedKernel[rootDeviceIndex]->getCrossThreadData() + pClonedKernel[rootDeviceIndex]->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(svmPtr, *pKernelArg); } @@ -518,7 +518,7 @@ TEST_F(CloneKernelTest, GivenArgSvmAllocWhenCloningKernelThenKernelInfoIsCorrect EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); - auto pKernelArg = (void **)(pClonedKernel[rootDeviceIndex]->getCrossThreadData(rootDeviceIndex) + + auto pKernelArg = (void **)(pClonedKernel[rootDeviceIndex]->getCrossThreadData() + pClonedKernel[rootDeviceIndex]->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(svmPtr, *pKernelArg); @@ -552,7 +552,7 @@ TEST_F(CloneKernelTest, GivenArgImmediateWhenCloningKernelThenKernelInfoIsCorrec EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); - auto pKernelArg = (TypeParam *)(pClonedKernel[rootDeviceIndex]->getCrossThreadData(rootDeviceIndex) + + auto pKernelArg = (TypeParam *)(pClonedKernel[rootDeviceIndex]->getCrossThreadData() + pClonedKernel[rootDeviceIndex]->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(value, *pKernelArg); } diff --git a/opencl/test/unit_test/kernel/kernel_accelerator_arg_tests.cpp b/opencl/test/unit_test/kernel/kernel_accelerator_arg_tests.cpp index 8a4846a86f..fda014c310 100644 --- a/opencl/test/unit_test/kernel/kernel_accelerator_arg_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_accelerator_arg_tests.cpp @@ -105,7 +105,7 @@ TEST_F(KernelArgAcceleratorTest, WhenCreatingVmeAcceleratorThenCorrectKernelArgs status = this->pKernel->setArg(0, sizeof(cl_accelerator_intel), &accelerator); ASSERT_EQ(CL_SUCCESS, status); - char *crossThreadData = pKernel->getCrossThreadData(rootDeviceIndex); + char *crossThreadData = pKernel->getCrossThreadData(); const auto &arginfo = pKernelInfo->kernelArgInfo[0]; diff --git a/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp b/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp index 3ae1b3a7c4..42a738ebed 100644 --- a/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp @@ -41,7 +41,7 @@ TEST_F(KernelArgBufferTest, GivenValidBufferWhenSettingKernelArgThenBufferAddres auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); - auto pKernelArg = (cl_mem **)(this->pKernel->getCrossThreadData(rootDeviceIndex) + + auto pKernelArg = (cl_mem **)(this->pKernel->getCrossThreadData() + this->pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(buffer->getCpuAddress(), *pKernelArg); @@ -127,7 +127,7 @@ TEST_F(MultiDeviceKernelArgBufferTest, GivenValidBufferWhenSettingKernelArgThenB for (auto &rootDeviceIndex : pContext->getRootDeviceIndices()) { auto pKernel = static_cast(pMultiDeviceKernel->getKernel(rootDeviceIndex)); - auto pKernelArg = reinterpret_cast(pKernel->getCrossThreadData(rootDeviceIndex) + + auto pKernelArg = reinterpret_cast(pKernel->getCrossThreadData() + kernelInfos[rootDeviceIndex]->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(pBuffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddressToPatch(), *pKernelArg); } @@ -266,7 +266,7 @@ TEST_F(KernelArgBufferTest, GivenNullPtrWhenSettingKernelArgThenKernelArgIsNull) auto pVal = &val; this->pKernel->setArg(0, sizeof(cl_mem *), pVal); - auto pKernelArg = (cl_mem **)(this->pKernel->getCrossThreadData(rootDeviceIndex) + + auto pKernelArg = (cl_mem **)(this->pKernel->getCrossThreadData() + this->pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(nullptr, *pKernelArg); @@ -283,7 +283,7 @@ TEST_F(MultiDeviceKernelArgBufferTest, GivenNullPtrWhenSettingKernelArgThenKerne pMultiDeviceKernel->setArg(0, sizeof(cl_mem *), pVal); for (auto &rootDeviceIndex : pContext->getRootDeviceIndices()) { auto pKernel = static_cast(pMultiDeviceKernel->getKernel(rootDeviceIndex)); - auto pKernelArg = reinterpret_cast(pKernel->getCrossThreadData(rootDeviceIndex) + + auto pKernelArg = reinterpret_cast(pKernel->getCrossThreadData() + kernelInfos[rootDeviceIndex]->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(nullptr, *pKernelArg); } @@ -295,7 +295,7 @@ TEST_F(KernelArgBufferTest, given32BitDeviceWhenArgPtrPassedIsNullThenOnly4Bytes this->pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].size = 4; - auto pKernelArg64bit = (uint64_t *)(this->pKernel->getCrossThreadData(rootDeviceIndex) + + auto pKernelArg64bit = (uint64_t *)(this->pKernel->getCrossThreadData() + this->pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); uint32_t *pKernelArg32bit = (uint32_t *)pKernelArg64bit; @@ -312,7 +312,7 @@ TEST_F(KernelArgBufferTest, given32BitDeviceWhenArgPtrPassedIsNullThenOnly4Bytes TEST_F(KernelArgBufferTest, given32BitDeviceWhenArgPassedIsNullThenOnly4BytesAreBeingPatched) { auto pVal = nullptr; this->pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].size = 4; - auto pKernelArg64bit = (uint64_t *)(this->pKernel->getCrossThreadData(rootDeviceIndex) + + auto pKernelArg64bit = (uint64_t *)(this->pKernel->getCrossThreadData() + this->pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); *pKernelArg64bit = 0xffffffffffffffff; @@ -537,7 +537,7 @@ HWTEST_F(KernelArgBufferTestBindless, givenUsedBindlessBuffersWhenPatchingSurfac pKernelInfo->kernelArgInfo[0].offsetHeap = 64; pKernelInfo->kernelArgInfo[0].isBuffer = true; - auto patchLocation = reinterpret_cast(ptrOffset(pKernel->getCrossThreadData(rootDeviceIndex), crossThreadDataOffset)); + auto patchLocation = reinterpret_cast(ptrOffset(pKernel->getCrossThreadData(), crossThreadDataOffset)); *patchLocation = 0xdead; uint32_t sshOffset = 0x1000; @@ -565,7 +565,7 @@ TEST_F(KernelArgBufferTest, givenUsedBindlessBuffersAndNonBufferArgWhenPatchingS pKernelInfo->kernelArgInfo[0].offsetHeap = 64; pKernelInfo->kernelArgInfo[0].isBuffer = false; - auto patchLocation = reinterpret_cast(ptrOffset(pKernel->getCrossThreadData(rootDeviceIndex), crossThreadDataOffset)); + auto patchLocation = reinterpret_cast(ptrOffset(pKernel->getCrossThreadData(), crossThreadDataOffset)); *patchLocation = 0xdead; uint32_t sshOffset = 4000; @@ -584,7 +584,7 @@ TEST_F(KernelArgBufferTest, givenNotUsedBindlessBuffersAndBufferArgWhenPatchingS pKernelInfo->kernelArgInfo[0].offsetHeap = 64; pKernelInfo->kernelArgInfo[0].isBuffer = true; - auto patchLocation = reinterpret_cast(ptrOffset(pKernel->getCrossThreadData(rootDeviceIndex), crossThreadDataOffset)); + auto patchLocation = reinterpret_cast(ptrOffset(pKernel->getCrossThreadData(), crossThreadDataOffset)); *patchLocation = 0xdead; uint32_t sshOffset = 4000; @@ -602,7 +602,7 @@ HWTEST_F(KernelArgBufferTestBindless, givenUsedBindlessBuffersAndBuiltinKernelWh pKernelInfo->kernelArgInfo[0].offsetHeap = 64; pKernelInfo->kernelArgInfo[0].isBuffer = true; - auto patchLocation = reinterpret_cast(ptrOffset(pKernel->getCrossThreadData(rootDeviceIndex), crossThreadDataOffset)); + auto patchLocation = reinterpret_cast(ptrOffset(pKernel->getCrossThreadData(), crossThreadDataOffset)); *patchLocation = 0xdead; pKernel->isBuiltIn = true; diff --git a/opencl/test/unit_test/kernel/kernel_arg_pipe_tests.cpp b/opencl/test/unit_test/kernel/kernel_arg_pipe_tests.cpp index 2272daaf87..af6758601a 100644 --- a/opencl/test/unit_test/kernel/kernel_arg_pipe_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_arg_pipe_tests.cpp @@ -92,7 +92,7 @@ TEST_F(KernelArgPipeTest, GivenValidPipeWhenSettingKernelArgThenPipeAddressIsCor auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); - auto pKernelArg = (cl_mem **)(this->pKernel->getCrossThreadData(rootDeviceIndex) + + auto pKernelArg = (cl_mem **)(this->pKernel->getCrossThreadData() + this->pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(pipe->getCpuAddress(), *pKernelArg); diff --git a/opencl/test/unit_test/kernel/kernel_arg_svm_tests.cpp b/opencl/test/unit_test/kernel/kernel_arg_svm_tests.cpp index 37973babbd..1b8cd662c4 100644 --- a/opencl/test/unit_test/kernel/kernel_arg_svm_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_arg_svm_tests.cpp @@ -86,7 +86,7 @@ TEST_F(KernelArgSvmTest, GivenValidSvmPtrWhenSettingKernelArgThenSvmPtrIsCorrect auto retVal = pKernel->setArgSvm(0, 256, svmPtr, nullptr, 0u); EXPECT_EQ(CL_SUCCESS, retVal); - auto pKernelArg = (void **)(pKernel->getCrossThreadData(rootDeviceIndex) + + auto pKernelArg = (void **)(pKernel->getCrossThreadData() + pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(svmPtr, *pKernelArg); @@ -137,7 +137,7 @@ TEST_F(KernelArgSvmTest, GivenValidSvmAllocWhenSettingKernelArgThenArgumentsAreS auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc); EXPECT_EQ(CL_SUCCESS, retVal); - auto pKernelArg = (void **)(pKernel->getCrossThreadData(rootDeviceIndex) + + auto pKernelArg = (void **)(pKernel->getCrossThreadData() + pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(svmPtr, *pKernelArg); @@ -238,7 +238,7 @@ HWTEST_F(KernelArgSvmTest, WhenPatchingWithImplicitSurfaceThenPatchIsApplied) { svmPtr.resize(256); pKernel->setCrossThreadData(nullptr, sizeof(void *)); - pKernel->setSshLocal(nullptr, rendSurfSize, rootDeviceIndex); + pKernel->setSshLocal(nullptr, rendSurfSize); pKernelInfo->requiresSshForBuffers = true; pKernelInfo->usesSsh = true; { @@ -252,8 +252,8 @@ HWTEST_F(KernelArgSvmTest, WhenPatchingWithImplicitSurfaceThenPatchIsApplied) { constexpr size_t patchOffset = 16; void *ptrToPatch = svmPtr.data() + patchOffset; - ASSERT_GE(pKernel->getCrossThreadDataSize(rootDeviceIndex), sizeof(void *)); - *reinterpret_cast(pKernel->getCrossThreadData(rootDeviceIndex)) = 0U; + ASSERT_GE(pKernel->getCrossThreadDataSize(), sizeof(void *)); + *reinterpret_cast(pKernel->getCrossThreadData()) = 0U; ASSERT_GE(pKernel->getSurfaceStateHeapSize(rootDeviceIndex), rendSurfSize); RENDER_SURFACE_STATE *surfState = reinterpret_cast(pKernel->getSurfaceStateHeap(rootDeviceIndex)); @@ -262,7 +262,7 @@ HWTEST_F(KernelArgSvmTest, WhenPatchingWithImplicitSurfaceThenPatchIsApplied) { pKernel->patchWithImplicitSurface(ptrToPatch, svmAlloc, *pDevice, patch); // verify cross thread data was properly patched - EXPECT_EQ(ptrToPatch, *reinterpret_cast(pKernel->getCrossThreadData(rootDeviceIndex))); + EXPECT_EQ(ptrToPatch, *reinterpret_cast(pKernel->getCrossThreadData())); // create surface state for comparison RENDER_SURFACE_STATE expectedSurfaceState; @@ -279,7 +279,7 @@ HWTEST_F(KernelArgSvmTest, WhenPatchingWithImplicitSurfaceThenPatchIsApplied) { // when cross thread and ssh data is not available then should not do anything pKernel->setCrossThreadData(nullptr, 0); - pKernel->setSshLocal(nullptr, 0, rootDeviceIndex); + pKernel->setSshLocal(nullptr, 0); pKernel->patchWithImplicitSurface(ptrToPatch, svmAlloc, *pDevice, patch); } } @@ -294,7 +294,7 @@ TEST_F(KernelArgSvmTest, WhenPatchingBufferOffsetThenPatchIsApplied) { constexpr uint32_t svmOffset = 13U; MockGraphicsAllocation svmAlloc(svmPtr.data(), 256); - uint32_t *expectedPatchPtr = reinterpret_cast(pKernel->getCrossThreadData(rootDeviceIndex)); + uint32_t *expectedPatchPtr = reinterpret_cast(pKernel->getCrossThreadData()); KernelArgInfo kai; void *returnedPtr = nullptr; @@ -390,7 +390,7 @@ HWTEST_TYPED_TEST(KernelArgSvmTestTyped, GivenBufferKernelArgWhenBufferOffsetIsN kai.offsetBufferOffset = kai.kernelArgPatchInfoVector[0].size; this->pKernel->setCrossThreadData(nullptr, kai.offsetBufferOffset + sizeof(uint32_t)); - this->pKernel->setSshLocal(nullptr, rendSurfSize, rootDeviceIndex); + this->pKernel->setSshLocal(nullptr, rendSurfSize); this->pKernelInfo->requiresSshForBuffers = true; this->pKernelInfo->usesSsh = true; { @@ -399,10 +399,10 @@ HWTEST_TYPED_TEST(KernelArgSvmTestTyped, GivenBufferKernelArgWhenBufferOffsetIsN constexpr size_t patchOffset = 16; void *ptrToPatch = svmPtr + patchOffset; size_t sizeToPatch = svmSize - patchOffset; - ASSERT_GE(this->pKernel->getCrossThreadDataSize(rootDeviceIndex), kai.offsetBufferOffset + sizeof(uint32_t)); + ASSERT_GE(this->pKernel->getCrossThreadDataSize(), kai.offsetBufferOffset + sizeof(uint32_t)); - void **expectedPointerPatchPtr = reinterpret_cast(this->pKernel->getCrossThreadData(rootDeviceIndex)); - uint32_t *expectedOffsetPatchPtr = reinterpret_cast(ptrOffset(this->pKernel->getCrossThreadData(rootDeviceIndex), kai.offsetBufferOffset)); + void **expectedPointerPatchPtr = reinterpret_cast(this->pKernel->getCrossThreadData()); + uint32_t *expectedOffsetPatchPtr = reinterpret_cast(ptrOffset(this->pKernel->getCrossThreadData(), kai.offsetBufferOffset)); *expectedPointerPatchPtr = reinterpret_cast(0U); *expectedOffsetPatchPtr = 0U; @@ -534,7 +534,7 @@ TEST_F(KernelArgSvmTest, givenCpuAddressIsNullWhenGpuAddressIsValidThenExpectSvm auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc); EXPECT_EQ(CL_SUCCESS, retVal); - auto pKernelArg = (void **)(pKernel->getCrossThreadData(rootDeviceIndex) + + auto pKernelArg = (void **)(pKernel->getCrossThreadData() + pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(svmPtr, *pKernelArg); } @@ -548,7 +548,7 @@ TEST_F(KernelArgSvmTest, givenCpuAddressIsNullWhenGpuAddressIsValidThenPatchBuff constexpr uint32_t initVal = 7U; MockGraphicsAllocation svmAlloc(nullptr, reinterpret_cast(svmPtr.data()), 256); - uint32_t *expectedPatchPtr = reinterpret_cast(pKernel->getCrossThreadData(rootDeviceIndex)); + uint32_t *expectedPatchPtr = reinterpret_cast(pKernel->getCrossThreadData()); KernelArgInfo kai; void *returnedPtr = nullptr; diff --git a/opencl/test/unit_test/kernel/kernel_image_arg_tests.cpp b/opencl/test/unit_test/kernel/kernel_image_arg_tests.cpp index 181053819c..e3bb6bf4a7 100644 --- a/opencl/test/unit_test/kernel/kernel_image_arg_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_image_arg_tests.cpp @@ -38,7 +38,7 @@ TEST_F(KernelImageArgTest, GivenKernelWithImageArgsWhenCheckingDifferentScenario pKernel->setArg(3, sizeof(memObj), &memObj); pKernel->setArg(4, sizeof(memObj), &memObj); - auto crossThreadData = reinterpret_cast(pKernel->getCrossThreadData(rootDeviceIndex)); + auto crossThreadData = reinterpret_cast(pKernel->getCrossThreadData()); auto imgWidthOffset = ptrOffset(crossThreadData, 0x4); EXPECT_EQ(imageWidth, *imgWidthOffset); @@ -63,7 +63,7 @@ TEST_F(KernelImageArgTest, givenKernelWithFlatImageTokensWhenArgIsSetThenPatchAl cl_mem memObj = image.get(); pKernel->setArg(0, sizeof(memObj), &memObj); - auto crossThreadData = reinterpret_cast(pKernel->getCrossThreadData(rootDeviceIndex)); + auto crossThreadData = reinterpret_cast(pKernel->getCrossThreadData()); auto pixelSize = image->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes; auto offsetFlatBaseOffset = ptrOffset(crossThreadData, pKernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].offsetFlatBaseOffset); @@ -85,7 +85,7 @@ TEST_F(KernelImageArgTest, givenKernelWithValidOffsetNumMipLevelsWhenImageArgIsS cl_mem imageObj = ℑ pKernel->setArg(0, sizeof(imageObj), &imageObj); - auto crossThreadData = reinterpret_cast(pKernel->getCrossThreadData(rootDeviceIndex)); + auto crossThreadData = reinterpret_cast(pKernel->getCrossThreadData()); auto patchedNumMipLevels = ptrOffset(crossThreadData, offsetNumMipLevelsImage0); EXPECT_EQ(7U, *patchedNumMipLevels); } @@ -107,7 +107,7 @@ TEST_F(KernelImageArgTest, givenImageWithNumSamplesWhenSetArgIsCalledThenPatchNu pKernel->setArg(0, sizeof(memObj), &memObj); - auto crossThreadData = reinterpret_cast(pKernel->getCrossThreadData(rootDeviceIndex)); + auto crossThreadData = reinterpret_cast(pKernel->getCrossThreadData()); auto patchedNumSamples = ptrOffset(crossThreadData, 0x3c); EXPECT_EQ(16u, *patchedNumSamples); @@ -367,7 +367,7 @@ HWTEST_F(KernelImageArgTestBindless, givenUsedBindlessImagesWhenPatchingSurfaceS for (size_t i = 0; i < pKernelInfo->kernelArgInfo.size(); i++) { pKernelInfo->kernelArgInfo[i].kernelArgPatchInfoVector[0].crossthreadOffset = 0x20 + static_cast(0x20 * i); auto crossThreadDataOffset = pKernelInfo->kernelArgInfo[i].kernelArgPatchInfoVector[0].crossthreadOffset; - auto patchLocation = reinterpret_cast(ptrOffset(pKernel->getCrossThreadData(rootDeviceIndex), crossThreadDataOffset)); + auto patchLocation = reinterpret_cast(ptrOffset(pKernel->getCrossThreadData(), crossThreadDataOffset)); *patchLocation = 0xdead; } @@ -378,7 +378,7 @@ HWTEST_F(KernelImageArgTestBindless, givenUsedBindlessImagesWhenPatchingSurfaceS for (size_t i = 0; i < pKernelInfo->kernelArgInfo.size(); i++) { auto crossThreadDataOffset = pKernelInfo->kernelArgInfo[i].kernelArgPatchInfoVector[0].crossthreadOffset; - auto patchLocation = reinterpret_cast(ptrOffset(pKernel->getCrossThreadData(rootDeviceIndex), crossThreadDataOffset)); + auto patchLocation = reinterpret_cast(ptrOffset(pKernel->getCrossThreadData(), crossThreadDataOffset)); if (pKernelInfo->kernelArgInfo[i].isImage) { DataPortBindlessSurfaceExtendedMessageDescriptor extMessageDesc; @@ -400,7 +400,7 @@ TEST_F(KernelImageArgTest, givenUsedBindlessImagesAndNonImageArgWhenPatchingSurf for (size_t i = 0; i < pKernelInfo->kernelArgInfo.size(); i++) { pKernelInfo->kernelArgInfo[i].kernelArgPatchInfoVector[0].crossthreadOffset = 0x20 + static_cast(0x20 * i); auto crossThreadDataOffset = pKernelInfo->kernelArgInfo[i].kernelArgPatchInfoVector[0].crossthreadOffset; - auto patchLocation = reinterpret_cast(ptrOffset(pKernel->getCrossThreadData(rootDeviceIndex), crossThreadDataOffset)); + auto patchLocation = reinterpret_cast(ptrOffset(pKernel->getCrossThreadData(), crossThreadDataOffset)); *patchLocation = 0xdead; } @@ -411,7 +411,7 @@ TEST_F(KernelImageArgTest, givenUsedBindlessImagesAndNonImageArgWhenPatchingSurf pKernel->patchBindlessSurfaceStateOffsets(*pDevice, sshOffset); auto crossThreadDataOffset = pKernelInfo->kernelArgInfo[nonImageIndex].kernelArgPatchInfoVector[0].crossthreadOffset; - auto patchLocation = reinterpret_cast(ptrOffset(pKernel->getCrossThreadData(rootDeviceIndex), crossThreadDataOffset)); + auto patchLocation = reinterpret_cast(ptrOffset(pKernel->getCrossThreadData(), crossThreadDataOffset)); EXPECT_EQ(0xdeadu, *patchLocation); } @@ -425,7 +425,7 @@ TEST_F(KernelImageArgTest, givenNotUsedBindlessImagesAndImageArgWhenPatchingSurf for (size_t i = 0; i < pKernelInfo->kernelArgInfo.size(); i++) { pKernelInfo->kernelArgInfo[i].kernelArgPatchInfoVector[0].crossthreadOffset = 0x20 + static_cast(0x20 * i); auto crossThreadDataOffset = pKernelInfo->kernelArgInfo[i].kernelArgPatchInfoVector[0].crossthreadOffset; - auto patchLocation = reinterpret_cast(ptrOffset(pKernel->getCrossThreadData(rootDeviceIndex), crossThreadDataOffset)); + auto patchLocation = reinterpret_cast(ptrOffset(pKernel->getCrossThreadData(), crossThreadDataOffset)); *patchLocation = 0xdead; } @@ -436,7 +436,7 @@ TEST_F(KernelImageArgTest, givenNotUsedBindlessImagesAndImageArgWhenPatchingSurf pKernel->patchBindlessSurfaceStateOffsets(*pDevice, sshOffset); auto crossThreadDataOffset = pKernelInfo->kernelArgInfo[nonImageIndex].kernelArgPatchInfoVector[0].crossthreadOffset; - auto patchLocation = reinterpret_cast(ptrOffset(pKernel->getCrossThreadData(rootDeviceIndex), crossThreadDataOffset)); + auto patchLocation = reinterpret_cast(ptrOffset(pKernel->getCrossThreadData(), crossThreadDataOffset)); EXPECT_EQ(0xdeadu, *patchLocation); } diff --git a/opencl/test/unit_test/kernel/kernel_immediate_arg_tests.cpp b/opencl/test/unit_test/kernel/kernel_immediate_arg_tests.cpp index 518e0560ea..812745792e 100644 --- a/opencl/test/unit_test/kernel/kernel_immediate_arg_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_immediate_arg_tests.cpp @@ -109,7 +109,7 @@ TYPED_TEST(KernelArgImmediateTest, WhenSettingKernelArgThenArgIsSetCorrectly) { for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { auto pKernel = this->pMultiDeviceKernel->getKernel(rootDeviceIndex); - auto pKernelArg = (TypeParam *)(pKernel->getCrossThreadData(rootDeviceIndex) + + auto pKernelArg = (TypeParam *)(pKernel->getCrossThreadData() + this->pKernelInfo[rootDeviceIndex]->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(val, *pKernelArg); @@ -132,7 +132,7 @@ TYPED_TEST(KernelArgImmediateTest, GivenMultipleArgumentsWhenSettingKernelArgThe for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { auto pKernel = this->pMultiDeviceKernel->getKernel(rootDeviceIndex); - auto pKernelArg = (TypeParam *)(pKernel->getCrossThreadData(rootDeviceIndex) + + auto pKernelArg = (TypeParam *)(pKernel->getCrossThreadData() + this->pKernelInfo[rootDeviceIndex]->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(val, *pKernelArg); @@ -142,7 +142,7 @@ TYPED_TEST(KernelArgImmediateTest, GivenMultipleArgumentsWhenSettingKernelArgThe for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { auto pKernel = this->pMultiDeviceKernel->getKernel(rootDeviceIndex); - auto pKernelArg = (TypeParam *)(pKernel->getCrossThreadData(rootDeviceIndex) + + auto pKernelArg = (TypeParam *)(pKernel->getCrossThreadData() + this->pKernelInfo[rootDeviceIndex]->kernelArgInfo[1].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(val, *pKernelArg); @@ -152,7 +152,7 @@ TYPED_TEST(KernelArgImmediateTest, GivenMultipleArgumentsWhenSettingKernelArgThe for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { auto pKernel = this->pMultiDeviceKernel->getKernel(rootDeviceIndex); - auto pKernelArg = (TypeParam *)(pKernel->getCrossThreadData(rootDeviceIndex) + + auto pKernelArg = (TypeParam *)(pKernel->getCrossThreadData() + this->pKernelInfo[rootDeviceIndex]->kernelArgInfo[2].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(val, *pKernelArg); @@ -166,7 +166,7 @@ TYPED_TEST(KernelArgImmediateTest, GivenCrossThreadDataOverwritesWhenSettingKern for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { auto pKernel = this->pMultiDeviceKernel->getKernel(rootDeviceIndex); - TypeParam *pKernelArg = (TypeParam *)(pKernel->getCrossThreadData(rootDeviceIndex) + + TypeParam *pKernelArg = (TypeParam *)(pKernel->getCrossThreadData() + this->pKernelInfo[rootDeviceIndex]->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(val, *pKernelArg); @@ -177,7 +177,7 @@ TYPED_TEST(KernelArgImmediateTest, GivenCrossThreadDataOverwritesWhenSettingKern for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { auto pKernel = this->pMultiDeviceKernel->getKernel(rootDeviceIndex); - auto pKernelArg = (TypeParam *)(pKernel->getCrossThreadData(rootDeviceIndex) + + auto pKernelArg = (TypeParam *)(pKernel->getCrossThreadData() + this->pKernelInfo[rootDeviceIndex]->kernelArgInfo[1].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(val, *pKernelArg); @@ -187,7 +187,7 @@ TYPED_TEST(KernelArgImmediateTest, GivenCrossThreadDataOverwritesWhenSettingKern for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { auto pKernel = this->pMultiDeviceKernel->getKernel(rootDeviceIndex); - auto pKernelArg = (TypeParam *)(pKernel->getCrossThreadData(rootDeviceIndex) + + auto pKernelArg = (TypeParam *)(pKernel->getCrossThreadData() + this->pKernelInfo[rootDeviceIndex]->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(val, *pKernelArg); @@ -215,11 +215,11 @@ TYPED_TEST(KernelArgImmediateTest, GivenMultipleStructElementsWhenSettingKernelA for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { auto pKernel = this->pMultiDeviceKernel->getKernel(rootDeviceIndex); - auto pCrossthreadA = (TypeParam *)(pKernel->getCrossThreadData(rootDeviceIndex) + + auto pCrossthreadA = (TypeParam *)(pKernel->getCrossThreadData() + this->pKernelInfo[rootDeviceIndex]->kernelArgInfo[3].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(immediateStruct.a, *pCrossthreadA); - auto pCrossthreadB = (TypeParam *)(pKernel->getCrossThreadData(rootDeviceIndex) + + auto pCrossthreadB = (TypeParam *)(pKernel->getCrossThreadData() + this->pKernelInfo[rootDeviceIndex]->kernelArgInfo[3].kernelArgPatchInfoVector[1].crossthreadOffset); EXPECT_EQ(immediateStruct.b, *pCrossthreadB); } @@ -233,7 +233,7 @@ TYPED_TEST(KernelArgImmediateTest, givenTooLargePatchSizeWhenSettingArgThenDontR std::memset(&memory[0], 0xaa, sizeof(TypeParam)); std::memset(&memory[1], 0xbb, sizeof(TypeParam)); - const auto destinationMemoryAddress = pKernel->getCrossThreadData(rootDeviceIndex) + + const auto destinationMemoryAddress = pKernel->getCrossThreadData() + this->pKernelInfo[rootDeviceIndex]->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset; const auto memoryBeyondLimitAddress = destinationMemoryAddress + sizeof(TypeParam); @@ -258,7 +258,7 @@ TYPED_TEST(KernelArgImmediateTest, givenNotTooLargePatchSizeWhenSettingArgThenDo std::memset(&memory[0], 0xaa, sizeof(TypeParam)); std::memset(&memory[1], 0xbb, sizeof(TypeParam)); - const auto destinationMemoryAddress = pKernel->getCrossThreadData(rootDeviceIndex) + + const auto destinationMemoryAddress = pKernel->getCrossThreadData() + this->pKernelInfo[rootDeviceIndex]->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset; const auto memoryBeyondLimitAddress = destinationMemoryAddress + sizeof(TypeParam); @@ -285,9 +285,9 @@ TYPED_TEST(KernelArgImmediateTest, givenMulitplePatchesAndFirstPatchSizeTooLarge std::memset(&memory[0], 0xaa, sizeof(TypeParam)); std::memset(&memory[1], 0xbb, sizeof(TypeParam)); - const auto destinationMemoryAddress1 = pKernel->getCrossThreadData(rootDeviceIndex) + + const auto destinationMemoryAddress1 = pKernel->getCrossThreadData() + this->pKernelInfo[rootDeviceIndex]->kernelArgInfo[3].kernelArgPatchInfoVector[2].crossthreadOffset; - const auto destinationMemoryAddress2 = pKernel->getCrossThreadData(rootDeviceIndex) + + const auto destinationMemoryAddress2 = pKernel->getCrossThreadData() + this->pKernelInfo[rootDeviceIndex]->kernelArgInfo[3].kernelArgPatchInfoVector[1].crossthreadOffset; const auto memoryBeyondLimitAddress1 = destinationMemoryAddress1 + sizeof(TypeParam); const auto memoryBeyondLimitAddress2 = destinationMemoryAddress2 + sizeof(TypeParam) / 2; @@ -321,9 +321,9 @@ TYPED_TEST(KernelArgImmediateTest, givenMulitplePatchesAndSecondPatchSizeTooLarg std::memset(&memory[0], 0xaa, sizeof(TypeParam)); std::memset(&memory[1], 0xbb, sizeof(TypeParam)); - const auto destinationMemoryAddress1 = pKernel->getCrossThreadData(rootDeviceIndex) + + const auto destinationMemoryAddress1 = pKernel->getCrossThreadData() + this->pKernelInfo[rootDeviceIndex]->kernelArgInfo[3].kernelArgPatchInfoVector[2].crossthreadOffset; - const auto destinationMemoryAddress2 = pKernel->getCrossThreadData(rootDeviceIndex) + + const auto destinationMemoryAddress2 = pKernel->getCrossThreadData() + this->pKernelInfo[rootDeviceIndex]->kernelArgInfo[3].kernelArgPatchInfoVector[1].crossthreadOffset; const auto memoryBeyondLimitAddress1 = destinationMemoryAddress1 + sizeof(TypeParam) / 2; const auto memoryBeyondLimitAddress2 = destinationMemoryAddress2 + sizeof(TypeParam) / 2; @@ -355,9 +355,9 @@ TYPED_TEST(KernelArgImmediateTest, givenMultiplePatchesAndOneSourceOffsetBeyondA std::memset(&memory[0], 0xaa, sizeof(TypeParam)); std::memset(&memory[1], 0xbb, sizeof(TypeParam)); - const auto destinationMemoryAddress1 = pKernel->getCrossThreadData(rootDeviceIndex) + + const auto destinationMemoryAddress1 = pKernel->getCrossThreadData() + this->pKernelInfo[rootDeviceIndex]->kernelArgInfo[3].kernelArgPatchInfoVector[1].crossthreadOffset; - const auto destinationMemoryAddress2 = pKernel->getCrossThreadData(rootDeviceIndex) + + const auto destinationMemoryAddress2 = pKernel->getCrossThreadData() + this->pKernelInfo[rootDeviceIndex]->kernelArgInfo[3].kernelArgPatchInfoVector[2].crossthreadOffset; const auto memoryBeyondLimitAddress1 = destinationMemoryAddress1 + sizeof(TypeParam); const auto memoryBeyondLimitAddress2 = destinationMemoryAddress2; diff --git a/opencl/test/unit_test/kernel/kernel_slm_arg_tests.cpp b/opencl/test/unit_test/kernel/kernel_slm_arg_tests.cpp index 32db959dc0..bcf0f9fafa 100644 --- a/opencl/test/unit_test/kernel/kernel_slm_arg_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_slm_arg_tests.cpp @@ -87,7 +87,7 @@ TEST_F(KernelSlmArgTest, WhenSettingSizeThenAlignmentOfHigherSlmArgsIsUpdated) { pMultiDeviceKernel->setArg(2, slmSize2, nullptr); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { - auto crossThreadData = reinterpret_cast(pKernel[rootDeviceIndex]->getCrossThreadData(rootDeviceIndex)); + auto crossThreadData = reinterpret_cast(pKernel[rootDeviceIndex]->getCrossThreadData()); auto slmOffset = ptrOffset(crossThreadData, 0x10); EXPECT_EQ(0u, *slmOffset); @@ -97,7 +97,7 @@ TEST_F(KernelSlmArgTest, WhenSettingSizeThenAlignmentOfHigherSlmArgsIsUpdated) { slmOffset = ptrOffset(crossThreadData, 0x30); EXPECT_EQ(0x400u, *slmOffset); - EXPECT_EQ(5 * KB, pKernel[rootDeviceIndex]->kernelDeviceInfos[rootDeviceIndex].slmTotalSize); + EXPECT_EQ(5 * KB, pKernel[rootDeviceIndex]->slmTotalSize); } } @@ -106,7 +106,7 @@ TEST_F(KernelSlmArgTest, GivenReverseOrderWhenSettingSizeThenAlignmentOfHigherSl pMultiDeviceKernel->setArg(0, slmSize0, nullptr); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { - auto crossThreadData = reinterpret_cast(pKernel[rootDeviceIndex]->getCrossThreadData(rootDeviceIndex)); + auto crossThreadData = reinterpret_cast(pKernel[rootDeviceIndex]->getCrossThreadData()); auto slmOffset = ptrOffset(crossThreadData, 0x10); EXPECT_EQ(0u, *slmOffset); @@ -116,6 +116,6 @@ TEST_F(KernelSlmArgTest, GivenReverseOrderWhenSettingSizeThenAlignmentOfHigherSl slmOffset = ptrOffset(crossThreadData, 0x30); EXPECT_EQ(0x400u, *slmOffset); - EXPECT_EQ(5 * KB, pKernel[rootDeviceIndex]->kernelDeviceInfos[rootDeviceIndex].slmTotalSize); + EXPECT_EQ(5 * KB, pKernel[rootDeviceIndex]->slmTotalSize); } } diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index 053806bb51..fc1ada99a7 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -282,7 +282,7 @@ TEST_F(KernelTests, GivenKernelWorkGroupSizeWhenGettingWorkGroupInfoThenWorkGrou size_t paramValueSizeRet = 0; auto kernelMaxWorkGroupSize = pDevice->getDeviceInfo().maxWorkGroupSize - 1; - pKernel->kernelDeviceInfos[rootDeviceIndex].maxKernelWorkGroupSize = static_cast(kernelMaxWorkGroupSize); + pKernel->maxKernelWorkGroupSize = static_cast(kernelMaxWorkGroupSize); retVal = pKernel->getWorkGroupInfo( *pClDevice, @@ -587,7 +587,7 @@ TEST_F(KernelPrivateSurfaceTest, givenKernelWithPrivateSurfaceThatIsInUseByGpuWh auto &csr = pDevice->getGpgpuCommandStreamReceiver(); - auto privateSurface = pKernel->kernelDeviceInfos[pDevice->getRootDeviceIndex()].privateSurface; + auto privateSurface = pKernel->privateSurface; auto tagAddress = csr.getTagAddress(); privateSurface->updateTaskCount(*tagAddress + 1, csr.getOsContext().getContextId()); @@ -667,7 +667,7 @@ TEST_F(KernelPrivateSurfaceTest, given32BitDeviceWhenKernelIsCreatedThenPrivateS ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); - EXPECT_TRUE(pKernel->kernelDeviceInfos[pDevice->getRootDeviceIndex()].privateSurface->is32BitAllocation()); + EXPECT_TRUE(pKernel->privateSurface->is32BitAllocation()); delete pKernel; } @@ -707,7 +707,7 @@ HWTEST_F(KernelPrivateSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenPri EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex)); - auto bufferAddress = pKernel->kernelDeviceInfos[pDevice->getRootDeviceIndex()].privateSurface->getGpuAddress(); + auto bufferAddress = pKernel->privateSurface->getGpuAddress(); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( @@ -856,7 +856,7 @@ TEST_F(KernelGlobalSurfaceTest, givenBuiltInKernelWhenKernelIsCreatedThenGlobalS ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); - EXPECT_EQ(bufferAddress, *(uint64_t *)pKernel->getCrossThreadData(rootDeviceIndex)); + EXPECT_EQ(bufferAddress, *(uint64_t *)pKernel->getCrossThreadData()); program.setGlobalSurface(nullptr); delete pKernel; @@ -891,7 +891,7 @@ TEST_F(KernelGlobalSurfaceTest, givenNDRangeKernelWhenKernelIsCreatedThenGlobalS ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); - EXPECT_EQ(bufferAddress, *(uint64_t *)pKernel->getCrossThreadData(rootDeviceIndex)); + EXPECT_EQ(bufferAddress, *(uint64_t *)pKernel->getCrossThreadData()); program.setGlobalSurface(nullptr); @@ -1008,7 +1008,7 @@ TEST_F(KernelConstantSurfaceTest, givenBuiltInKernelWhenKernelIsCreatedThenConst ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); - EXPECT_EQ(bufferAddress, *(uint64_t *)pKernel->getCrossThreadData(rootDeviceIndex)); + EXPECT_EQ(bufferAddress, *(uint64_t *)pKernel->getCrossThreadData()); program.setConstantSurface(nullptr); delete pKernel; @@ -1043,7 +1043,7 @@ TEST_F(KernelConstantSurfaceTest, givenNDRangeKernelWhenKernelIsCreatedThenConst ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); - EXPECT_EQ(bufferAddress, *(uint64_t *)pKernel->getCrossThreadData(rootDeviceIndex)); + EXPECT_EQ(bufferAddress, *(uint64_t *)pKernel->getCrossThreadData()); program.setConstantSurface(nullptr); @@ -1234,7 +1234,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenKernelWithNullEvent pKernel->patchEventPool(pDevQueue); - EXPECT_EQ(123u, *(uint64_t *)pKernel->getCrossThreadData(rootDeviceIndex)); + EXPECT_EQ(123u, *(uint64_t *)pKernel->getCrossThreadData()); delete pKernel; } @@ -1296,7 +1296,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatelessKernelWhen pKernel->patchEventPool(pDevQueue); - EXPECT_EQ(pDevQueue->getEventPoolBuffer()->getGpuAddressToPatch(), *(uint64_t *)pKernel->getCrossThreadData(rootDeviceIndex)); + EXPECT_EQ(pDevQueue->getEventPoolBuffer()->getGpuAddressToPatch(), *(uint64_t *)pKernel->getCrossThreadData()); delete pKernel; } @@ -1438,7 +1438,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenKernelWith pKernel->patchDefaultDeviceQueue(pDevQueue); - EXPECT_EQ(123u, *(uint64_t *)pKernel->getCrossThreadData(rootDeviceIndex)); + EXPECT_EQ(123u, *(uint64_t *)pKernel->getCrossThreadData()); delete pKernel; } @@ -1470,7 +1470,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatelessK pKernel->patchDefaultDeviceQueue(pDevQueue); - EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddressToPatch(), *(uint64_t *)pKernel->getCrossThreadData(rootDeviceIndex)); + EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddressToPatch(), *(uint64_t *)pKernel->getCrossThreadData()); delete pKernel; } @@ -1542,7 +1542,7 @@ HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledThenExportedFun // check getResidency as well std::vector residencySurfaces; - pKernel->getResidency(residencySurfaces, rootDeviceIndex); + pKernel->getResidency(residencySurfaces); std::unique_ptr mockCsrExecEnv; { CommandStreamReceiverMock csrMock; @@ -1580,7 +1580,7 @@ HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledThenGlobalBuffe EXPECT_TRUE(commandStreamReceiver.isMadeResident(program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface)); std::vector residencySurfaces; - pKernel->getResidency(residencySurfaces, rootDeviceIndex); + pKernel->getResidency(residencySurfaces); std::unique_ptr mockCsrExecEnv; { CommandStreamReceiverMock csrMock; @@ -2468,10 +2468,10 @@ TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenGlobalWorkOffsetIsCorr MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); - EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.kernelDeviceInfos[rootDeviceIndex].globalWorkOffsetX); - EXPECT_NE(nullptr, kernel.kernelDeviceInfos[rootDeviceIndex].globalWorkOffsetY); - EXPECT_NE(&Kernel::dummyPatchLocation, kernel.kernelDeviceInfos[rootDeviceIndex].globalWorkOffsetY); - EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.kernelDeviceInfos[rootDeviceIndex].globalWorkOffsetZ); + EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.globalWorkOffsetX); + EXPECT_NE(nullptr, kernel.globalWorkOffsetY); + EXPECT_NE(&Kernel::dummyPatchLocation, kernel.globalWorkOffsetY); + EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.globalWorkOffsetZ); } TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenLocalWorkSizeIsCorrect) { @@ -2481,10 +2481,10 @@ TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenLocalWorkSizeIsCorrect MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); - EXPECT_NE(nullptr, kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeX); - EXPECT_NE(&Kernel::dummyPatchLocation, kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeX); - EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeY); - EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeZ); + EXPECT_NE(nullptr, kernel.localWorkSizeX); + EXPECT_NE(&Kernel::dummyPatchLocation, kernel.localWorkSizeX); + EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.localWorkSizeY); + EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.localWorkSizeZ); } TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenLocalWorkSize2IsCorrect) { @@ -2494,10 +2494,10 @@ TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenLocalWorkSize2IsCorrec MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); - EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeX2); - EXPECT_NE(nullptr, kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeY2); - EXPECT_NE(&Kernel::dummyPatchLocation, kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeY2); - EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.kernelDeviceInfos[rootDeviceIndex].localWorkSizeZ2); + EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.localWorkSizeX2); + EXPECT_NE(nullptr, kernel.localWorkSizeY2); + EXPECT_NE(&Kernel::dummyPatchLocation, kernel.localWorkSizeY2); + EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.localWorkSizeZ2); } TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenGlobalWorkSizeIsCorrect) { @@ -2507,10 +2507,10 @@ TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenGlobalWorkSizeIsCorrec MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); - EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.kernelDeviceInfos[rootDeviceIndex].globalWorkSizeX); - EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.kernelDeviceInfos[rootDeviceIndex].globalWorkSizeY); - EXPECT_NE(nullptr, kernel.kernelDeviceInfos[rootDeviceIndex].globalWorkSizeZ); - EXPECT_NE(&Kernel::dummyPatchLocation, kernel.kernelDeviceInfos[rootDeviceIndex].globalWorkSizeZ); + EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.globalWorkSizeX); + EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.globalWorkSizeY); + EXPECT_NE(nullptr, kernel.globalWorkSizeZ); + EXPECT_NE(&Kernel::dummyPatchLocation, kernel.globalWorkSizeZ); } TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenLocalWorkDimIsCorrect) { @@ -2520,8 +2520,8 @@ TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenLocalWorkDimIsCorrect) MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); - EXPECT_NE(nullptr, kernel.kernelDeviceInfos[rootDeviceIndex].workDim); - EXPECT_NE(&Kernel::dummyPatchLocation, kernel.kernelDeviceInfos[rootDeviceIndex].workDim); + EXPECT_NE(nullptr, kernel.workDim); + EXPECT_NE(&Kernel::dummyPatchLocation, kernel.workDim); } TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenNumWorkGroupsIsCorrect) { @@ -2533,12 +2533,12 @@ TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenNumWorkGroupsIsCorrect MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); - EXPECT_NE(nullptr, kernel.kernelDeviceInfos[rootDeviceIndex].numWorkGroupsX); - EXPECT_NE(nullptr, kernel.kernelDeviceInfos[rootDeviceIndex].numWorkGroupsY); - EXPECT_NE(nullptr, kernel.kernelDeviceInfos[rootDeviceIndex].numWorkGroupsZ); - EXPECT_NE(&Kernel::dummyPatchLocation, kernel.kernelDeviceInfos[rootDeviceIndex].numWorkGroupsX); - EXPECT_NE(&Kernel::dummyPatchLocation, kernel.kernelDeviceInfos[rootDeviceIndex].numWorkGroupsY); - EXPECT_NE(&Kernel::dummyPatchLocation, kernel.kernelDeviceInfos[rootDeviceIndex].numWorkGroupsZ); + EXPECT_NE(nullptr, kernel.numWorkGroupsX); + EXPECT_NE(nullptr, kernel.numWorkGroupsY); + EXPECT_NE(nullptr, kernel.numWorkGroupsZ); + EXPECT_NE(&Kernel::dummyPatchLocation, kernel.numWorkGroupsX); + EXPECT_NE(&Kernel::dummyPatchLocation, kernel.numWorkGroupsY); + EXPECT_NE(&Kernel::dummyPatchLocation, kernel.numWorkGroupsZ); } TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenEnqueuedLocalWorkSizeIsCorrect) { @@ -2548,10 +2548,10 @@ TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenEnqueuedLocalWorkSizeI MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); - EXPECT_NE(nullptr, kernel.kernelDeviceInfos[rootDeviceIndex].enqueuedLocalWorkSizeX); - EXPECT_NE(&Kernel::dummyPatchLocation, kernel.kernelDeviceInfos[rootDeviceIndex].enqueuedLocalWorkSizeX); - EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.kernelDeviceInfos[rootDeviceIndex].enqueuedLocalWorkSizeY); - EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.kernelDeviceInfos[rootDeviceIndex].enqueuedLocalWorkSizeZ); + EXPECT_NE(nullptr, kernel.enqueuedLocalWorkSizeX); + EXPECT_NE(&Kernel::dummyPatchLocation, kernel.enqueuedLocalWorkSizeX); + EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.enqueuedLocalWorkSizeY); + EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.enqueuedLocalWorkSizeZ); } TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenEnqueuedMaxWorkGroupSizeIsCorrect) { @@ -2560,11 +2560,11 @@ TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenEnqueuedMaxWorkGroupSi MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); - EXPECT_NE(nullptr, kernel.kernelDeviceInfos[rootDeviceIndex].maxWorkGroupSizeForCrossThreadData); - EXPECT_NE(&Kernel::dummyPatchLocation, kernel.kernelDeviceInfos[rootDeviceIndex].maxWorkGroupSizeForCrossThreadData); - EXPECT_EQ(static_cast(kernel.getCrossThreadData(rootDeviceIndex) + pKernelInfo->workloadInfo.maxWorkGroupSizeOffset), static_cast(kernel.kernelDeviceInfos[rootDeviceIndex].maxWorkGroupSizeForCrossThreadData)); - EXPECT_EQ(pDevice->getDeviceInfo().maxWorkGroupSize, *kernel.kernelDeviceInfos[rootDeviceIndex].maxWorkGroupSizeForCrossThreadData); - EXPECT_EQ(pDevice->getDeviceInfo().maxWorkGroupSize, kernel.kernelDeviceInfos[rootDeviceIndex].maxKernelWorkGroupSize); + EXPECT_NE(nullptr, kernel.maxWorkGroupSizeForCrossThreadData); + EXPECT_NE(&Kernel::dummyPatchLocation, kernel.maxWorkGroupSizeForCrossThreadData); + EXPECT_EQ(static_cast(kernel.getCrossThreadData() + pKernelInfo->workloadInfo.maxWorkGroupSizeOffset), static_cast(kernel.maxWorkGroupSizeForCrossThreadData)); + EXPECT_EQ(pDevice->getDeviceInfo().maxWorkGroupSize, *kernel.maxWorkGroupSizeForCrossThreadData); + EXPECT_EQ(pDevice->getDeviceInfo().maxWorkGroupSize, kernel.maxKernelWorkGroupSize); } TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenDataParameterSimdSizeIsCorrect) { @@ -2573,10 +2573,10 @@ TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenDataParameterSimdSizeI MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); - EXPECT_NE(nullptr, kernel.kernelDeviceInfos[rootDeviceIndex].dataParameterSimdSize); - EXPECT_NE(&Kernel::dummyPatchLocation, kernel.kernelDeviceInfos[rootDeviceIndex].dataParameterSimdSize); - EXPECT_EQ(static_cast(kernel.getCrossThreadData(rootDeviceIndex) + pKernelInfo->workloadInfo.simdSizeOffset), static_cast(kernel.kernelDeviceInfos[rootDeviceIndex].dataParameterSimdSize)); - EXPECT_EQ_VAL(pKernelInfo->getMaxSimdSize(), *kernel.kernelDeviceInfos[rootDeviceIndex].dataParameterSimdSize); + EXPECT_NE(nullptr, kernel.dataParameterSimdSize); + EXPECT_NE(&Kernel::dummyPatchLocation, kernel.dataParameterSimdSize); + EXPECT_EQ(static_cast(kernel.getCrossThreadData() + pKernelInfo->workloadInfo.simdSizeOffset), static_cast(kernel.dataParameterSimdSize)); + EXPECT_EQ_VAL(pKernelInfo->getMaxSimdSize(), *kernel.dataParameterSimdSize); } TEST_F(KernelCrossThreadTests, GivenParentEventOffsetWhenKernelIsInitializedThenParentEventIsInitiatedWithInvalid) { @@ -2584,10 +2584,10 @@ TEST_F(KernelCrossThreadTests, GivenParentEventOffsetWhenKernelIsInitializedThen MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); - EXPECT_NE(nullptr, kernel.kernelDeviceInfos[rootDeviceIndex].parentEventOffset); - EXPECT_NE(&Kernel::dummyPatchLocation, kernel.kernelDeviceInfos[rootDeviceIndex].parentEventOffset); - EXPECT_EQ(static_cast(kernel.getCrossThreadData(rootDeviceIndex) + pKernelInfo->workloadInfo.parentEventOffset), static_cast(kernel.kernelDeviceInfos[rootDeviceIndex].parentEventOffset)); - EXPECT_EQ(WorkloadInfo::invalidParentEvent, *kernel.kernelDeviceInfos[rootDeviceIndex].parentEventOffset); + EXPECT_NE(nullptr, kernel.parentEventOffset); + EXPECT_NE(&Kernel::dummyPatchLocation, kernel.parentEventOffset); + EXPECT_EQ(static_cast(kernel.getCrossThreadData() + pKernelInfo->workloadInfo.parentEventOffset), static_cast(kernel.parentEventOffset)); + EXPECT_EQ(WorkloadInfo::invalidParentEvent, *kernel.parentEventOffset); } TEST_F(KernelCrossThreadTests, WhenAddingKernelThenProgramRefCountIsIncremented) { @@ -2608,7 +2608,7 @@ TEST_F(KernelCrossThreadTests, GivenSlmStatisSizeWhenCreatingKernelThenSlmTotalS MockKernel *kernel = new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); - EXPECT_EQ(1024u, kernel->kernelDeviceInfos[rootDeviceIndex].slmTotalSize); + EXPECT_EQ(1024u, kernel->slmTotalSize); delete kernel; } @@ -2623,9 +2623,9 @@ TEST_F(KernelCrossThreadTests, givenKernelWithPrivateMemoryWhenItIsCreatedThenCu kernel->initialize(); - auto privateSurface = kernel->kernelDeviceInfos[pDevice->getRootDeviceIndex()].privateSurface; + auto privateSurface = kernel->privateSurface; - auto constantBuffer = kernel->getCrossThreadData(rootDeviceIndex); + auto constantBuffer = kernel->getCrossThreadData(); auto privateAddress = (uintptr_t)privateSurface->getGpuAddressToPatch(); auto ptrCurbe = (uint64_t *)constantBuffer; auto privateAddressFromCurbe = (uintptr_t)*ptrCurbe; @@ -2642,7 +2642,7 @@ TEST_F(KernelCrossThreadTests, givenKernelWithPreferredWkgMultipleWhenItIsCreate kernel->initialize(); - auto *crossThread = kernel->getCrossThreadData(rootDeviceIndex); + auto *crossThread = kernel->getCrossThreadData(); uint32_t *preferredWkgMultipleOffset = (uint32_t *)ptrOffset(crossThread, 8); @@ -2667,7 +2667,7 @@ TEST_F(KernelCrossThreadTests, WhenPatchingBlocksSimdSizeThenSimdSizeIsPatchedCo kernel->mockKernel->patchBlocksSimdSize(rootDeviceIndex); // obtain block's simd size from cross thread data - void *blockSimdSize = ptrOffset(kernel->mockKernel->getCrossThreadData(rootDeviceIndex), kernel->kernelInfo.childrenKernelsIdOffset[0].second); + void *blockSimdSize = ptrOffset(kernel->mockKernel->getCrossThreadData(), kernel->kernelInfo.childrenKernelsIdOffset[0].second); uint32_t *simdSize = reinterpret_cast(blockSimdSize); // check of block's simd size has been patched correctly @@ -3419,7 +3419,7 @@ TEST_F(KernelMultiRootDeviceTest, givenKernelWithPrivateSurfaceWhenInitializeThe for (auto &rootDeviceIndex : context->getRootDeviceIndices()) { auto kernel = static_cast(pMultiDeviceKernel->getKernel(rootDeviceIndex)); - auto privateSurface = kernel->kernelDeviceInfos[rootDeviceIndex].privateSurface; + auto privateSurface = kernel->privateSurface; ASSERT_NE(nullptr, privateSurface); EXPECT_EQ(rootDeviceIndex, privateSurface->getRootDeviceIndex()); } diff --git a/opencl/test/unit_test/kernel/parent_kernel_tests.cpp b/opencl/test/unit_test/kernel/parent_kernel_tests.cpp index 888d875102..c6fa88e595 100644 --- a/opencl/test/unit_test/kernel/parent_kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/parent_kernel_tests.cpp @@ -68,7 +68,7 @@ TEST(ParentKernelTest, WhenPatchingBlocksSimdSizeThenPatchIsAppliedCorrectly) { parentKernel->patchBlocksSimdSize(rootDeviceIndex); - void *blockSimdSize = ptrOffset(parentKernel->getCrossThreadData(rootDeviceIndex), parentKernel->getKernelInfo(rootDeviceIndex).childrenKernelsIdOffset[0].second); + void *blockSimdSize = ptrOffset(parentKernel->getCrossThreadData(), parentKernel->getKernelInfo(rootDeviceIndex).childrenKernelsIdOffset[0].second); uint32_t *simdSize = reinterpret_cast(blockSimdSize); EXPECT_EQ(program->blockKernelManager->getBlockKernelInfo(0)->getMaxSimdSize(), *simdSize); @@ -99,7 +99,7 @@ TEST(ParentKernelTest, WhenInitializingParentKernelThenBlocksSimdSizeIsPatched) parentKernel->initialize(); - void *blockSimdSize = ptrOffset(parentKernel->getCrossThreadData(rootDeviceIndex), parentKernel->getKernelInfo(rootDeviceIndex).childrenKernelsIdOffset[0].second); + void *blockSimdSize = ptrOffset(parentKernel->getCrossThreadData(), parentKernel->getKernelInfo(rootDeviceIndex).childrenKernelsIdOffset[0].second); uint32_t *simdSize = reinterpret_cast(blockSimdSize); EXPECT_EQ(program->blockKernelManager->getBlockKernelInfo(0)->getMaxSimdSize(), *simdSize); diff --git a/opencl/test/unit_test/mem_obj/buffer_set_arg_tests.cpp b/opencl/test/unit_test/mem_obj/buffer_set_arg_tests.cpp index 1c8b149e7d..19a3f0c02b 100644 --- a/opencl/test/unit_test/mem_obj/buffer_set_arg_tests.cpp +++ b/opencl/test/unit_test/mem_obj/buffer_set_arg_tests.cpp @@ -104,7 +104,7 @@ class BufferSetArgTest : public ContextFixture, }; TEST_F(BufferSetArgTest, WhenSettingKernelArgBufferThenGpuAddressIsSet) { - auto pKernelArg = (void **)(pKernel->getCrossThreadData(rootDeviceIndex) + + auto pKernelArg = (void **)(pKernel->getCrossThreadData() + pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); auto tokenSize = pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].size; @@ -206,7 +206,7 @@ HWTEST_F(BufferSetArgTest, givenNonPureStatefulArgWhenRenderCompressedBufferIsSe } TEST_F(BufferSetArgTest, Given32BitAddressingWhenSettingArgStatelessThenGpuAddressIsSetCorrectly) { - auto pKernelArg = (void **)(pKernel->getCrossThreadData(rootDeviceIndex) + + auto pKernelArg = (void **)(pKernel->getCrossThreadData() + pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); auto tokenSize = pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].size; @@ -229,7 +229,7 @@ TEST_F(BufferSetArgTest, givenBufferWhenOffsetedSubbufferIsPassedToSetKernelArgT EXPECT_EQ(ptrOffset(buffer->getCpuAddress(), region.origin), subBuffer->getCpuAddress()); - auto pKernelArg = (void **)(pKernel->getCrossThreadData(rootDeviceIndex) + + auto pKernelArg = (void **)(pKernel->getCrossThreadData() + pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); auto tokenSize = pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].size; @@ -241,7 +241,7 @@ TEST_F(BufferSetArgTest, givenBufferWhenOffsetedSubbufferIsPassedToSetKernelArgT } TEST_F(BufferSetArgTest, givenCurbeTokenThatSizeIs4BytesWhenStatelessArgIsPatchedThenOnly4BytesArePatchedInCurbe) { - auto pKernelArg = (void **)(pKernel->getCrossThreadData(rootDeviceIndex) + + auto pKernelArg = (void **)(pKernel->getCrossThreadData() + pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); //fill 8 bytes with 0xffffffffffffffff; @@ -275,13 +275,13 @@ TEST_F(BufferSetArgTest, WhenSettingKernelArgThenAddressToPatchIsSetCorrectlyAnd &memObj); ASSERT_EQ(CL_SUCCESS, retVal); - auto pKernelArg = (void **)(pKernel->getCrossThreadData(rootDeviceIndex) + + auto pKernelArg = (void **)(pKernel->getCrossThreadData() + pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(reinterpret_cast(buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddressToPatch()), *pKernelArg); std::vector surfaces; - pKernel->getResidency(surfaces, rootDeviceIndex); + pKernel->getResidency(surfaces); EXPECT_EQ(1u, surfaces.size()); for (auto &surface : surfaces) { @@ -305,13 +305,13 @@ TEST_F(BufferSetArgTest, GivenSvmPointerWhenSettingKernelArgThenAddressToPatchIs pSvmAlloc); ASSERT_EQ(CL_SUCCESS, retVal); - auto pKernelArg = (void **)(pKernel->getCrossThreadData(rootDeviceIndex) + + auto pKernelArg = (void **)(pKernel->getCrossThreadData() + pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(ptrSVM, *pKernelArg); std::vector surfaces; - pKernel->getResidency(surfaces, rootDeviceIndex); + pKernel->getResidency(surfaces); EXPECT_EQ(1u, surfaces.size()); for (auto &surface : surfaces) { delete surface; @@ -348,7 +348,7 @@ TEST_F(BufferSetArgTest, givenKernelArgBufferWhenAddPathInfoDataIsSetThenPatchIn EXPECT_EQ(PatchInfoAllocationType::KernelArg, pKernel->getPatchInfoDataList()[0].sourceType); EXPECT_EQ(PatchInfoAllocationType::IndirectObjectHeap, pKernel->getPatchInfoDataList()[0].targetType); EXPECT_EQ(buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddressToPatch(), pKernel->getPatchInfoDataList()[0].sourceAllocation); - EXPECT_EQ(reinterpret_cast(pKernel->getCrossThreadData(rootDeviceIndex)), pKernel->getPatchInfoDataList()[0].targetAllocation); + EXPECT_EQ(reinterpret_cast(pKernel->getCrossThreadData()), pKernel->getPatchInfoDataList()[0].targetAllocation); EXPECT_EQ(0u, pKernel->getPatchInfoDataList()[0].sourceAllocationOffset); } diff --git a/opencl/test/unit_test/mem_obj/image_set_arg_tests.cpp b/opencl/test/unit_test/mem_obj/image_set_arg_tests.cpp index c8835cc648..890c526275 100644 --- a/opencl/test/unit_test/mem_obj/image_set_arg_tests.cpp +++ b/opencl/test/unit_test/mem_obj/image_set_arg_tests.cpp @@ -133,7 +133,7 @@ HWTEST_F(ImageSetArgTest, WhenSettingKernelArgImageThenSurfaceBaseAddressIsSetCo EXPECT_EQ(srcAllocation->getGpuAddress(), surfaceAddress); std::vector surfaces; - pKernel->getResidency(surfaces, rootDeviceIndex); + pKernel->getResidency(surfaces); EXPECT_EQ(0u, surfaces.size()); } @@ -343,7 +343,7 @@ HWTEST_F(ImageSetArgTest, givenOffsetedBufferWhenSetKernelArgImageIscalledThenFu EXPECT_EQ(srcAllocation->getGpuAddress(), surfaceAddress); std::vector surfaces; - pKernel->getResidency(surfaces, rootDeviceIndex); + pKernel->getResidency(surfaces); EXPECT_EQ(0u, surfaces.size()); } @@ -386,7 +386,7 @@ HWTEST_F(ImageSetArgTest, WhenSettingKernelArgThenPropertiesAreSetCorrectly) { EXPECT_EQ(0u, surfaceState->getCoherencyType()); std::vector surfaces; - pKernel->getResidency(surfaces, rootDeviceIndex); + pKernel->getResidency(surfaces); EXPECT_EQ(1u, surfaces.size()); for (auto &surface : surfaces) { @@ -458,7 +458,7 @@ HWTEST_F(ImageSetArgTest, Given2dArrayWhenSettingKernelArgThenPropertiesAreSetCo EXPECT_EQ(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA, surfaceState->getShaderChannelSelectAlpha()); std::vector surfaces; - pKernel->getResidency(surfaces, rootDeviceIndex); + pKernel->getResidency(surfaces); EXPECT_EQ(1u, surfaces.size()); for (auto &surface : surfaces) { delete surface; @@ -506,7 +506,7 @@ HWTEST_F(ImageSetArgTest, Given1dArrayWhenSettingKernelArgThenPropertiesAreSetCo EXPECT_EQ(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA, surfaceState->getShaderChannelSelectAlpha()); std::vector surfaces; - pKernel->getResidency(surfaces, rootDeviceIndex); + pKernel->getResidency(surfaces); EXPECT_EQ(1u, surfaces.size()); for (auto &surface : surfaces) { delete surface; @@ -852,7 +852,7 @@ HWTEST_F(ImageSetArgTest, GivenImageWithClLuminanceFormatWhenSettingKernelArgThe EXPECT_EQ(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA, surfaceState->getShaderChannelSelectAlpha()); std::vector surfaces; - pKernel->getResidency(surfaces, rootDeviceIndex); + pKernel->getResidency(surfaces); EXPECT_EQ(1u, surfaces.size()); for (auto &surface : surfaces) { delete surface; @@ -872,7 +872,7 @@ HWTEST_F(ImageSetArgTest, WhenSettingArgThenImageIsReturned) { EXPECT_EQ(memObj, pKernel->getKernelArg(0)); std::vector surfaces; - pKernel->getResidency(surfaces, rootDeviceIndex); + pKernel->getResidency(surfaces); EXPECT_EQ(1u, surfaces.size()); for (auto &surface : surfaces) { @@ -999,7 +999,7 @@ HWTEST_F(ImageMediaBlockSetArgTest, WhenSettingKernelArgImageThenPropertiesAreCo EXPECT_EQ(imageMocs, surfaceState->getMemoryObjectControlState()); std::vector surfaces; - pKernel->getResidency(surfaces, rootDeviceIndex); + pKernel->getResidency(surfaces); EXPECT_EQ(1u, surfaces.size()); for (auto &surface : surfaces) { diff --git a/opencl/test/unit_test/memory_manager/memory_manager_tests.cpp b/opencl/test/unit_test/memory_manager/memory_manager_tests.cpp index 94b412ebaf..6f67712929 100644 --- a/opencl/test/unit_test/memory_manager/memory_manager_tests.cpp +++ b/opencl/test/unit_test/memory_manager/memory_manager_tests.cpp @@ -541,7 +541,7 @@ TEST_F(MemoryAllocatorTest, givenStatelessKernelWithPrintfWhenPrintfSurfaceIsCre auto printfAllocation = printfHandler->getSurface(); auto allocationAddress = printfAllocation->getGpuAddressToPatch(); - auto printfPatchAddress = ptrOffset(reinterpret_cast(kernel.mockKernel->getCrossThreadData(rootDeviceIndex)), + auto printfPatchAddress = ptrOffset(reinterpret_cast(kernel.mockKernel->getCrossThreadData()), kernel.mockKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.payloadMappings.implicitArgs.printfSurfaceAddress.stateless); EXPECT_EQ(allocationAddress, *(uintptr_t *)printfPatchAddress); diff --git a/opencl/test/unit_test/mocks/mock_kernel.cpp b/opencl/test/unit_test/mocks/mock_kernel.cpp index b243dc1a83..43a0ea6cd1 100644 --- a/opencl/test/unit_test/mocks/mock_kernel.cpp +++ b/opencl/test/unit_test/mocks/mock_kernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2020 Intel Corporation + * Copyright (C) 2017-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -56,9 +56,9 @@ void MockKernel::makeResident(CommandStreamReceiver &commandStreamReceiver) { Kernel::makeResident(commandStreamReceiver); } -void MockKernel::getResidency(std::vector &dst, uint32_t rootDeviceIndex) { +void MockKernel::getResidency(std::vector &dst) { getResidencyCalls++; - Kernel::getResidency(dst, rootDeviceIndex); + Kernel::getResidency(dst); } bool MockKernel::requiresCacheFlushCommand(const CommandQueue &commandQueue) const { if (DebugManager.flags.EnableCacheFlushAfterWalker.get() != -1) { diff --git a/opencl/test/unit_test/mocks/mock_kernel.h b/opencl/test/unit_test/mocks/mock_kernel.h index 8285ad6fa1..efd10b03f8 100644 --- a/opencl/test/unit_test/mocks/mock_kernel.h +++ b/opencl/test/unit_test/mocks/mock_kernel.h @@ -116,8 +116,18 @@ class MockKernel : public Kernel { using Kernel::allBufferArgsStateful; using Kernel::auxTranslationRequired; using Kernel::containsStatelessWrites; + using Kernel::dataParameterSimdSize; + using Kernel::enqueuedLocalWorkSizeX; + using Kernel::enqueuedLocalWorkSizeY; + using Kernel::enqueuedLocalWorkSizeZ; using Kernel::executionType; using Kernel::getDevice; + using Kernel::globalWorkOffsetX; + using Kernel::globalWorkOffsetY; + using Kernel::globalWorkOffsetZ; + using Kernel::globalWorkSizeX; + using Kernel::globalWorkSizeY; + using Kernel::globalWorkSizeZ; using Kernel::hasDirectStatelessAccessToHostMemory; using Kernel::hasIndirectStatelessAccessToHostMemory; using Kernel::isSchedulerKernel; @@ -125,17 +135,35 @@ class MockKernel : public Kernel { using Kernel::kernelArgRequiresCacheFlush; using Kernel::kernelArguments; using Kernel::KernelConfig; - using Kernel::kernelDeviceInfos; using Kernel::kernelHasIndirectAccess; using Kernel::kernelSubmissionMap; using Kernel::kernelSvmGfxAllocations; using Kernel::kernelUnifiedMemoryGfxAllocations; + using Kernel::localWorkSizeX; + using Kernel::localWorkSizeX2; + using Kernel::localWorkSizeY; + using Kernel::localWorkSizeY2; + using Kernel::localWorkSizeZ; + using Kernel::localWorkSizeZ2; + using Kernel::maxKernelWorkGroupSize; + using Kernel::maxWorkGroupSizeForCrossThreadData; + using Kernel::numberOfBindingTableStates; + using Kernel::numWorkGroupsX; + using Kernel::numWorkGroupsY; + using Kernel::numWorkGroupsZ; + using Kernel::parentEventOffset; using Kernel::patchBufferOffset; using Kernel::patchWithImplicitSurface; + using Kernel::preferredWkgMultipleOffset; + using Kernel::privateSurface; using Kernel::singleSubdevicePreferedInCurrentEnqueue; using Kernel::svmAllocationsRequireCacheFlush; using Kernel::threadArbitrationPolicy; using Kernel::unifiedMemoryControls; + using Kernel::workDim; + + using Kernel::slmSizes; + using Kernel::slmTotalSize; struct BlockPatchValues { uint64_t offset; @@ -190,10 +218,8 @@ class MockKernel : public Kernel { ~MockKernel() override { // prevent double deletion - for (auto rootDeviceIndex = 0u; rootDeviceIndex < kernelDeviceInfos.size(); rootDeviceIndex++) { - if (kernelDeviceInfos[rootDeviceIndex].crossThreadData == mockCrossThreadData.data()) { - kernelDeviceInfos[rootDeviceIndex].crossThreadData = nullptr; - } + if (crossThreadData == mockCrossThreadData.data()) { + crossThreadData = nullptr; } if (kernelInfoAllocated) { @@ -230,9 +256,9 @@ class MockKernel : public Kernel { kernelInfos.resize(rootDeviceIndex + 1); kernelInfos[rootDeviceIndex] = info; auto kernel = new KernelType(program, kernelInfos, *device.getSpecializedDevice()); - kernel->kernelDeviceInfos[rootDeviceIndex].crossThreadData = new char[crossThreadSize]; - memset(kernel->kernelDeviceInfos[rootDeviceIndex].crossThreadData, 0, crossThreadSize); - kernel->kernelDeviceInfos[rootDeviceIndex].crossThreadDataSize = crossThreadSize; + kernel->crossThreadData = new char[crossThreadSize]; + memset(kernel->crossThreadData, 0, crossThreadSize); + kernel->crossThreadDataSize = crossThreadSize; kernel->kernelInfoAllocated = info; @@ -249,11 +275,10 @@ class MockKernel : public Kernel { //////////////////////////////////////////////////////////////////////////////// void setCrossThreadData(const void *crossThreadDataPattern, uint32_t newCrossThreadDataSize) { - auto rootDeviceIndex = defaultRootDeviceIndex; - if ((kernelDeviceInfos[rootDeviceIndex].crossThreadData != nullptr) && (kernelDeviceInfos[rootDeviceIndex].crossThreadData != mockCrossThreadData.data())) { - delete[] kernelDeviceInfos[rootDeviceIndex].crossThreadData; - kernelDeviceInfos[rootDeviceIndex].crossThreadData = nullptr; - kernelDeviceInfos[rootDeviceIndex].crossThreadDataSize = 0; + if ((crossThreadData != nullptr) && (crossThreadData != mockCrossThreadData.data())) { + delete[] crossThreadData; + crossThreadData = nullptr; + crossThreadDataSize = 0; } if (crossThreadDataPattern && (newCrossThreadDataSize > 0)) { mockCrossThreadData.clear(); @@ -263,41 +288,34 @@ class MockKernel : public Kernel { } if (newCrossThreadDataSize == 0) { - kernelDeviceInfos[rootDeviceIndex].crossThreadData = nullptr; - kernelDeviceInfos[rootDeviceIndex].crossThreadDataSize = 0; + crossThreadData = nullptr; + crossThreadDataSize = 0; return; } - kernelDeviceInfos[rootDeviceIndex].crossThreadData = mockCrossThreadData.data(); - kernelDeviceInfos[rootDeviceIndex].crossThreadDataSize = static_cast(mockCrossThreadData.size()); + crossThreadData = mockCrossThreadData.data(); + crossThreadDataSize = static_cast(mockCrossThreadData.size()); } - void setSshLocal(const void *sshPattern, uint32_t newSshSize, uint32_t rootDeviceIndex) { - kernelDeviceInfos[rootDeviceIndex].sshLocalSize = newSshSize; + void setSshLocal(const void *sshPattern, uint32_t newSshSize) { + sshLocalSize = newSshSize; if (newSshSize == 0) { - kernelDeviceInfos[rootDeviceIndex].pSshLocal.reset(nullptr); + pSshLocal.reset(nullptr); } else { - kernelDeviceInfos[rootDeviceIndex].pSshLocal = std::make_unique(newSshSize); + pSshLocal = std::make_unique(newSshSize); if (sshPattern) { - memcpy_s(kernelDeviceInfos[rootDeviceIndex].pSshLocal.get(), newSshSize, sshPattern, newSshSize); + memcpy_s(pSshLocal.get(), newSshSize, sshPattern, newSshSize); } } } void setPrivateSurface(GraphicsAllocation *gfxAllocation, uint32_t size) { - if (gfxAllocation) { - kernelDeviceInfos[gfxAllocation->getRootDeviceIndex()].privateSurface = gfxAllocation; - kernelDeviceInfos[gfxAllocation->getRootDeviceIndex()].privateSurfaceSize = size; - } else { - for (auto &kernelDeviceInfo : kernelDeviceInfos) { - kernelDeviceInfo.privateSurface = gfxAllocation; - kernelDeviceInfo.privateSurfaceSize = size; - } - } + privateSurface = gfxAllocation; + privateSurfaceSize = size; } - void setTotalSLMSize(uint32_t rootDeviceIndex, uint32_t size) { - kernelDeviceInfos[rootDeviceIndex].slmTotalSize = size; + void setTotalSLMSize(uint32_t size) { + slmTotalSize = size; } void setKernelArguments(std::vector kernelArguments) { @@ -314,7 +332,7 @@ class MockKernel : public Kernel { void setUsingSharedArgs(bool usingSharedArgValue) { this->usingSharedObjArgs = usingSharedArgValue; } void makeResident(CommandStreamReceiver &commandStreamReceiver) override; - void getResidency(std::vector &dst, uint32_t rootDeviceIndex) override; + void getResidency(std::vector &dst) override; void setSpecialPipelineSelectMode(bool value) { specialPipelineSelectMode = value; } @@ -391,9 +409,7 @@ class MockKernelWithInternals { } mockMultiDeviceKernel = new MockMultiDeviceKernel(std::move(mockKernels)); - for (const auto &pClDevice : deviceVector) { - mockKernel->setSshLocal(&sshLocal, sizeof(sshLocal), pClDevice->getRootDeviceIndex()); - } + mockKernel->setSshLocal(&sshLocal, sizeof(sshLocal)); if (addDefaultArg) { defaultKernelArguments.resize(2); @@ -462,9 +478,10 @@ class MockKernelWithInternals { class MockParentKernel : public Kernel { public: using Kernel::auxTranslationRequired; - using Kernel::kernelDeviceInfos; using Kernel::kernelInfos; using Kernel::patchBlocksCurbeWithConstantValues; + using Kernel::pSshLocal; + using Kernel::sshLocalSize; static MockParentKernel *create(Context &context, bool addChildSimdSize = false, bool addChildGlobalMemory = false, bool addChildConstantMemory = false, bool addPrintfForParent = true, bool addPrintfForBlock = true) { auto clDevice = context.getDevice(0); @@ -531,9 +548,9 @@ class MockParentKernel : public Kernel { info->crossThreadData = new char[crossThreadSize]; auto parent = new MockParentKernel(mockProgram, kernelInfos); - parent->kernelDeviceInfos[rootDeviceIndex].crossThreadData = new char[crossThreadSize]; - memset(parent->kernelDeviceInfos[rootDeviceIndex].crossThreadData, 0, crossThreadSize); - parent->kernelDeviceInfos[rootDeviceIndex].crossThreadDataSize = crossThreadSize; + parent->crossThreadData = new char[crossThreadSize]; + memset(parent->crossThreadData, 0, crossThreadSize); + parent->crossThreadDataSize = crossThreadSize; parent->mockKernelInfo = info; auto infoBlock = new KernelInfo(); @@ -665,7 +682,24 @@ class MockParentKernel : public Kernel { class MockSchedulerKernel : public SchedulerKernel { public: - using SchedulerKernel::kernelDeviceInfos; + using Kernel::enqueuedLocalWorkSizeX; + using Kernel::enqueuedLocalWorkSizeY; + using Kernel::enqueuedLocalWorkSizeZ; + using Kernel::globalWorkOffsetX; + using Kernel::globalWorkOffsetY; + using Kernel::globalWorkOffsetZ; + using Kernel::globalWorkSizeX; + using Kernel::globalWorkSizeY; + using Kernel::globalWorkSizeZ; + using Kernel::localWorkSizeX; + using Kernel::localWorkSizeX2; + using Kernel::localWorkSizeY; + using Kernel::localWorkSizeY2; + using Kernel::localWorkSizeZ; + using Kernel::localWorkSizeZ2; + using Kernel::numWorkGroupsX; + using Kernel::numWorkGroupsY; + using Kernel::numWorkGroupsZ; MockSchedulerKernel(Program *programArg, const KernelInfoContainer &kernelInfoArg, ClDevice &clDeviceArg) : SchedulerKernel(programArg, kernelInfoArg, clDeviceArg){}; }; diff --git a/opencl/test/unit_test/program/program_tests.cpp b/opencl/test/unit_test/program/program_tests.cpp index e394c11b3d..5875ba9008 100644 --- a/opencl/test/unit_test/program/program_tests.cpp +++ b/opencl/test/unit_test/program/program_tests.cpp @@ -1374,7 +1374,7 @@ HWTEST_F(PatchTokenTests, givenKernelRequiringConstantAllocationWhenMakeResident element = std::find(residencyVector.begin(), residencyVector.end(), constantAllocation); EXPECT_NE(residencyVector.end(), element); - auto crossThreadData = pKernel->getCrossThreadData(rootDeviceIndex); + auto crossThreadData = pKernel->getCrossThreadData(); uint32_t *constBuffGpuAddr = reinterpret_cast(pProgram->getConstantSurface(pContext->getDevice(0)->getRootDeviceIndex())->getGpuAddressToPatch()); uintptr_t *pDst = reinterpret_cast(crossThreadData + pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.stateless); @@ -1384,7 +1384,7 @@ HWTEST_F(PatchTokenTests, givenKernelRequiringConstantAllocationWhenMakeResident EXPECT_EQ(0u, pCommandStreamReceiver->residency.size()); std::vector surfaces; - pKernel->getResidency(surfaces, rootDeviceIndex); + pKernel->getResidency(surfaces); EXPECT_EQ(2u, surfaces.size()); for (Surface *surface : surfaces) { diff --git a/opencl/test/unit_test/sampler/sampler_set_arg_tests.cpp b/opencl/test/unit_test/sampler/sampler_set_arg_tests.cpp index 75d7aedd53..854200b78d 100644 --- a/opencl/test/unit_test/sampler/sampler_set_arg_tests.cpp +++ b/opencl/test/unit_test/sampler/sampler_set_arg_tests.cpp @@ -137,7 +137,7 @@ HWTEST_F(SamplerSetArgTest, WhenSettingKernelArgSamplerThenSamplerStatesAreCorre EXPECT_EQ(SAMPLER_STATE::MIP_MODE_FILTER_NEAREST, samplerState->getMipModeFilter()); std::vector surfaces; - pKernel->getResidency(surfaces, rootDeviceIndex); + pKernel->getResidency(surfaces); EXPECT_EQ(0u, surfaces.size()); } @@ -314,7 +314,7 @@ HWTEST_F(SamplerSetArgTest, GivenFilteringNearestAndAddressingClampWhenSettingKe EXPECT_EQ(samplerObj, pKernel->getKernelArg(0)); - auto crossThreadData = reinterpret_cast(pKernel->getCrossThreadData(rootDeviceIndex)); + auto crossThreadData = reinterpret_cast(pKernel->getCrossThreadData()); auto snapWaCrossThreadData = ptrOffset(crossThreadData, 0x4); unsigned int snapWaValue = 0xffffffff; @@ -435,7 +435,7 @@ HWTEST_P(NormalizedTest, WhenSettingKernelArgSamplerThenCoordsAreCorrect) { EXPECT_EQ(normalizedCoordinates, static_cast(!samplerState->getNonNormalizedCoordinateEnable())); - auto crossThreadData = reinterpret_cast(pKernel->getCrossThreadData(rootDeviceIndex)); + auto crossThreadData = reinterpret_cast(pKernel->getCrossThreadData()); auto normalizedCoordsAddress = ptrOffset(crossThreadData, 0x10); unsigned int normalizedCoordsValue = GetNormCoordsEnum(normalizedCoordinates); @@ -518,7 +518,7 @@ HWTEST_P(AddressingModeTest, WhenSettingKernelArgSamplerThenModesAreCorrect) { EXPECT_EQ(expectedModeY, samplerState->getTcyAddressControlMode()); EXPECT_EQ(expectedModeZ, samplerState->getTczAddressControlMode()); - auto crossThreadData = reinterpret_cast(pKernel->getCrossThreadData(rootDeviceIndex)); + auto crossThreadData = reinterpret_cast(pKernel->getCrossThreadData()); auto addressingModeAddress = ptrOffset(crossThreadData, 0x8); unsigned int addresingValue = GetAddrModeEnum(addressingMode); diff --git a/opencl/test/unit_test/scenarios/windows/enqueue_read_write_buffer_scenarios_windows_tests.cpp b/opencl/test/unit_test/scenarios/windows/enqueue_read_write_buffer_scenarios_windows_tests.cpp index 171338e3a3..8c8da60fdf 100644 --- a/opencl/test/unit_test/scenarios/windows/enqueue_read_write_buffer_scenarios_windows_tests.cpp +++ b/opencl/test/unit_test/scenarios/windows/enqueue_read_write_buffer_scenarios_windows_tests.cpp @@ -110,13 +110,13 @@ HWTEST_F(EnqueueBufferWindowsTest, givenMisalignedHostPtrWhenEnqueueReadBufferCa const auto &surfaceStateDst = getSurfaceState(&cmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0), 1); if (kernelInfo.kernelArgInfo[1].kernelArgPatchInfoVector[0].size == sizeof(uint64_t)) { - auto pKernelArg = (uint64_t *)(kernel->getCrossThreadData(rootDeviceIndex) + + auto pKernelArg = (uint64_t *)(kernel->getCrossThreadData() + kernelInfo.kernelArgInfo[1].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(alignDown(gpuVa, 4), static_cast(*pKernelArg)); EXPECT_EQ(*pKernelArg, surfaceStateDst.getSurfaceBaseAddress()); } else if (kernelInfo.kernelArgInfo[1].kernelArgPatchInfoVector[0].size == sizeof(uint32_t)) { - auto pKernelArg = (uint32_t *)(kernel->getCrossThreadData(rootDeviceIndex) + + auto pKernelArg = (uint32_t *)(kernel->getCrossThreadData() + kernelInfo.kernelArgInfo[1].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(alignDown(gpuVa, 4), static_cast(*pKernelArg)); EXPECT_EQ(static_cast(*pKernelArg), surfaceStateDst.getSurfaceBaseAddress()); @@ -124,7 +124,7 @@ HWTEST_F(EnqueueBufferWindowsTest, givenMisalignedHostPtrWhenEnqueueReadBufferCa } if (kernelInfo.kernelArgInfo[3].kernelArgPatchInfoVector[0].size == sizeof(uint32_t)) { - auto dstOffset = (uint32_t *)(kernel->getCrossThreadData(rootDeviceIndex) + + auto dstOffset = (uint32_t *)(kernel->getCrossThreadData() + kernelInfo.kernelArgInfo[3].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(ptrDiff(misalignedPtr, alignDown(misalignedPtr, 4)), *dstOffset); } else {