diff --git a/opencl/source/api/api.cpp b/opencl/source/api/api.cpp index 0de77414a8..03190a9825 100644 --- a/opencl/source/api/api.cpp +++ b/opencl/source/api/api.cpp @@ -3454,7 +3454,7 @@ cl_int CL_API_CALL clEnqueueNDRangeKernel(cl_command_queue commandQueue, Kernel *pKernel = pMultiDeviceKernel->getKernel(pCommandQueue->getDevice().getRootDeviceIndex()); if ((pKernel->getExecutionType() != KernelExecutionType::Default) || - pKernel->usesSyncBuffer(pCommandQueue->getDevice().getRootDeviceIndex())) { + pKernel->usesSyncBuffer()) { retVal = CL_INVALID_KERNEL; TRACING_EXIT(clEnqueueNDRangeKernel, &retVal); return retVal; @@ -4820,7 +4820,7 @@ cl_int CL_API_CALL clSetKernelArgSVMPointer(cl_kernel kernel, for (const auto &pDevice : pMultiDeviceKernel->getDevices()) { auto pKernel = pMultiDeviceKernel->getKernel(pDevice->getRootDeviceIndex()); - cl_int kernelArgAddressQualifier = asClKernelArgAddressQualifier(pKernel->getKernelInfo(pDevice->getRootDeviceIndex()).kernelArgInfo[argIndex].metadata.getAddressQualifier()); + cl_int kernelArgAddressQualifier = asClKernelArgAddressQualifier(pKernel->getKernelInfo().kernelArgInfo[argIndex].metadata.getAddressQualifier()); if ((kernelArgAddressQualifier != CL_KERNEL_ARG_ADDRESS_GLOBAL) && (kernelArgAddressQualifier != CL_KERNEL_ARG_ADDRESS_CONSTANT)) { retVal = CL_INVALID_ARG_VALUE; @@ -5920,7 +5920,7 @@ cl_int CL_API_CALL clEnqueueNDCountKernelINTEL(cl_command_queue commandQueue, } } - if (pKernel->usesSyncBuffer(rootDeviceIndex)) { + if (pKernel->usesSyncBuffer()) { if (pKernel->getExecutionType() != KernelExecutionType::Concurrent) { retVal = CL_INVALID_KERNEL; return retVal; diff --git a/opencl/source/built_ins/built_ins.inl b/opencl/source/built_ins/built_ins.inl index 463f4cc63b..feb39c6735 100644 --- a/opencl/source/built_ins/built_ins.inl +++ b/opencl/source/built_ins/built_ins.inl @@ -22,10 +22,10 @@ void BuiltInOp::resizeKernelInstances(size_t size) convertToAuxKernel.reserve(size); for (size_t i = convertToNonAuxKernel.size(); i < size; i++) { - auto clonedNonAuxToAuxKernel = Kernel::create(baseKernel->getProgram(), baseKernel->getKernelInfos(), clDevice, nullptr); + auto clonedNonAuxToAuxKernel = Kernel::create(baseKernel->getProgram(), baseKernel->getKernelInfo(), clDevice, nullptr); clonedNonAuxToAuxKernel->setAuxTranslationDirection(AuxTranslationDirection::NonAuxToAux); - auto clonedAuxToNonAuxKernel = Kernel::create(baseKernel->getProgram(), baseKernel->getKernelInfos(), clDevice, nullptr); + auto clonedAuxToNonAuxKernel = Kernel::create(baseKernel->getProgram(), baseKernel->getKernelInfo(), clDevice, nullptr); clonedAuxToNonAuxKernel->setAuxTranslationDirection(AuxTranslationDirection::AuxToNonAux); clonedNonAuxToAuxKernel->cloneKernel(baseKernel); diff --git a/opencl/source/built_ins/vme_dispatch_builder.h b/opencl/source/built_ins/vme_dispatch_builder.h index 9019c2ae21..949568bbbf 100644 --- a/opencl/source/built_ins/vme_dispatch_builder.h +++ b/opencl/source/built_ins/vme_dispatch_builder.h @@ -27,17 +27,17 @@ class VmeBuiltinDispatchInfoBuilder : public BuiltinDispatchInfoBuilder { populate(builtinOp, mediaKernelsBuildOptions, kernelName, multiDeviceVmeKernel); - auto rootDeviceIndex = clDevice.getRootDeviceIndex(); + auto rootDeviceIndex = device.getRootDeviceIndex(); vmeKernel = multiDeviceVmeKernel->getKernel(rootDeviceIndex); - widthArgNum = vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("width"); - heightArgNum = vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("height"); - strideArgNum = vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("stride"); - acceleratorArgNum = vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("accelerator"); - srcImgArgNum = vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("srcImg"); - refImgArgNum = vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("refImg"); - motionVectorBufferArgNum = vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("motion_vector_buffer"); - predictionMotionVectorBufferArgNum = vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("prediction_motion_vector_buffer"); - residualsArgNum = vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("residuals"); + widthArgNum = vmeKernel->getKernelInfo().getArgNumByName("width"); + heightArgNum = vmeKernel->getKernelInfo().getArgNumByName("height"); + strideArgNum = vmeKernel->getKernelInfo().getArgNumByName("stride"); + acceleratorArgNum = vmeKernel->getKernelInfo().getArgNumByName("accelerator"); + srcImgArgNum = vmeKernel->getKernelInfo().getArgNumByName("srcImg"); + refImgArgNum = vmeKernel->getKernelInfo().getArgNumByName("refImg"); + motionVectorBufferArgNum = vmeKernel->getKernelInfo().getArgNumByName("motion_vector_buffer"); + predictionMotionVectorBufferArgNum = vmeKernel->getKernelInfo().getArgNumByName("prediction_motion_vector_buffer"); + residualsArgNum = vmeKernel->getKernelInfo().getArgNumByName("residuals"); } void getBlkTraits(const Vec3 &inGws, size_t &gwWidthInBlk, size_t &gwHeightInBlk) const { @@ -53,8 +53,6 @@ class VmeBuiltinDispatchInfoBuilder : public BuiltinDispatchInfoBuilder { return false; } - auto rootDeviceIndex = clDevice.getRootDeviceIndex(); - size_t gwWidthInBlk = 0; size_t gwHeightInBlk = 0; getBlkTraits(inGws, gwWidthInBlk, gwHeightInBlk); @@ -63,7 +61,7 @@ class VmeBuiltinDispatchInfoBuilder : public BuiltinDispatchInfoBuilder { cl_int width = (cl_int)gwWidthInBlk; cl_int stride = height; size_t numThreadsX = gwWidthInBlk; - const size_t simdWidth = vmeKernel->getKernelInfo(rootDeviceIndex).getMaxSimdSize(); + const size_t simdWidth = vmeKernel->getKernelInfo().getMaxSimdSize(); stride = static_cast(Math::divideAndRoundUp(height * width, numThreadsX)); // update implicit args @@ -73,7 +71,7 @@ class VmeBuiltinDispatchInfoBuilder : public BuiltinDispatchInfoBuilder { // Update global work size to force macro-block to HW thread execution model Vec3 gws = {numThreadsX * simdWidth, 1, 1}; - Vec3 lws = {vmeKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0], 1, 1}; + Vec3 lws = {vmeKernel->getKernelInfo().kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0], 1, 1}; DispatchInfoBuilder builder(clDevice); builder.setDispatchGeometry(gws, lws, inOffset, gws, lws); @@ -168,8 +166,7 @@ class VmeBuiltinDispatchInfoBuilder : public BuiltinDispatchInfoBuilder { template RetType getKernelArgByValValue(uint32_t argNum) const { - auto rootDeviceIndex = clDevice.getRootDeviceIndex(); - auto &kernelArgInfo = vmeKernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[argNum]; + auto &kernelArgInfo = vmeKernel->getKernelInfo().kernelArgInfo[argNum]; DEBUG_BREAK_IF(kernelArgInfo.kernelArgPatchInfoVector.size() != 1); const KernelArgPatchInfo &patchInfo = kernelArgInfo.kernelArgPatchInfoVector[0]; DEBUG_BREAK_IF(sizeof(RetType) > patchInfo.size); @@ -261,19 +258,18 @@ class AdvancedVmeBuiltinDispatchInfoBuilder : public VmeBuiltinDispatchInfoBuild const char *kernelName) : VmeBuiltinDispatchInfoBuilder(kernelsLib, device, builtinOp, kernelName) { - auto rootDeviceIndex = clDevice.getRootDeviceIndex(); - flagsArgNum = this->vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("flags"); - intraSrcImgArgNum = this->vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("intraSrcImg"); - skipBlockTypeArgNum = this->vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("skip_block_type"); - searchCostPenaltyArgNum = this->vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("search_cost_penalty"); - searchCostPrecisionArgNum = this->vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("search_cost_precision"); - bidirWeightArgNum = this->vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("bidir_weight"); - predictorsBufferArgNum = this->vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("predictors_buffer"); - countMotionVectorBufferArgNum = this->vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("count_motion_vector_buffer"); - skipMotionVectorBufferArgNum = this->vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("skip_motion_vector_buffer"); - intraSearchPredictorModesArgNum = this->vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("intra_search_predictor_modes"); - skipResidualsArgNum = this->vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("skip_residuals"); - intraResidualsArgNum = this->vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("intra_residuals"); + flagsArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("flags"); + intraSrcImgArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("intraSrcImg"); + skipBlockTypeArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("skip_block_type"); + searchCostPenaltyArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("search_cost_penalty"); + searchCostPrecisionArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("search_cost_precision"); + bidirWeightArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("bidir_weight"); + predictorsBufferArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("predictors_buffer"); + countMotionVectorBufferArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("count_motion_vector_buffer"); + skipMotionVectorBufferArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("skip_motion_vector_buffer"); + intraSearchPredictorModesArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("intra_search_predictor_modes"); + skipResidualsArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("skip_residuals"); + intraResidualsArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("intra_residuals"); } bool setExplicitArg(uint32_t argIndex, size_t argSize, const void *argVal, cl_int &err) const override { diff --git a/opencl/source/command_queue/command_queue.cpp b/opencl/source/command_queue/command_queue.cpp index 8367d2b3a0..54d755655f 100644 --- a/opencl/source/command_queue/command_queue.cpp +++ b/opencl/source/command_queue/command_queue.cpp @@ -536,15 +536,14 @@ void CommandQueue::enqueueBlockedMapUnmapOperation(const cl_event *eventWaitList bool CommandQueue::setupDebugSurface(Kernel *kernel) { auto debugSurface = getGpgpuCommandStreamReceiver().getDebugSurfaceAllocation(); - auto rootDeviceIndex = device->getRootDeviceIndex(); - DEBUG_BREAK_IF(!kernel->requiresSshForBuffers(rootDeviceIndex)); - auto surfaceState = ptrOffset(reinterpret_cast(kernel->getSurfaceStateHeap(rootDeviceIndex)), - kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful); + DEBUG_BREAK_IF(!kernel->requiresSshForBuffers()); + auto surfaceState = ptrOffset(reinterpret_cast(kernel->getSurfaceStateHeap()), + kernel->getKernelInfo().kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful); void *addressToPatch = reinterpret_cast(debugSurface->getGpuAddress()); size_t sizeToPatch = debugSurface->getUnderlyingBufferSize(); Buffer::setSurfaceState(&device->getDevice(), surfaceState, false, false, sizeToPatch, addressToPatch, 0, debugSurface, 0, 0, - kernel->getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, + kernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, kernel->getTotalNumDevicesInContext()); return true; } @@ -894,7 +893,7 @@ void CommandQueue::aubCaptureHook(bool &blocking, bool &clearAllDependencies, co if (getGpgpuCommandStreamReceiver().getType() > CommandStreamReceiverType::CSR_HW) { for (auto &dispatchInfo : multiDispatchInfo) { - auto kernelName = dispatchInfo.getKernel()->getKernelInfo(device->getRootDeviceIndex()).kernelDescriptor.kernelMetadata.kernelName; + auto kernelName = dispatchInfo.getKernel()->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName; getGpgpuCommandStreamReceiver().addAubComment(kernelName.c_str()); } } diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h index c2a0a7ddef..8054ce059b 100644 --- a/opencl/source/command_queue/enqueue_common.h +++ b/opencl/source/command_queue/enqueue_common.h @@ -67,11 +67,10 @@ void CommandQueueHw::enqueueHandler(Surface *(&surfaces)[surfaceCount if (DebugManager.flags.ForceDispatchScheduler.get()) { forceDispatchScheduler(multiDispatchInfo); } else { - auto rootDeviceIndex = device->getRootDeviceIndex(); kernel->updateAuxTranslationRequired(); if (kernel->isAuxTranslationRequired()) { - kernel->fillWithKernelObjsForAuxTranslation(kernelObjsForAuxTranslation, rootDeviceIndex); + kernel->fillWithKernelObjsForAuxTranslation(kernelObjsForAuxTranslation); multiDispatchInfo.setKernelObjsForAuxTranslation(kernelObjsForAuxTranslation); if (!kernelObjsForAuxTranslation.empty()) { @@ -86,13 +85,13 @@ void CommandQueueHw::enqueueHandler(Surface *(&surfaces)[surfaceCount dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::AuxToNonAux); } - if (kernel->getKernelInfo(rootDeviceIndex).builtinDispatchBuilder == nullptr) { + if (kernel->getKernelInfo().builtinDispatchBuilder == nullptr) { DispatchInfoBuilder builder(getClDevice()); builder.setDispatchGeometry(workDim, workItems, enqueuedWorkSizes, globalOffsets, Vec3{0, 0, 0}, localWorkSizesIn); builder.setKernel(kernel); builder.bake(multiDispatchInfo); } else { - auto builder = kernel->getKernelInfo(rootDeviceIndex).builtinDispatchBuilder; + auto builder = kernel->getKernelInfo().builtinDispatchBuilder; builder->buildDispatchInfos(multiDispatchInfo, kernel, workDim, workItems, enqueuedWorkSizes, globalOffsets); if (multiDispatchInfo.size() == 0) { @@ -357,7 +356,7 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, if (blockQueue) { if (parentKernel) { - size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel, device->getRootDeviceIndex()); + size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); blockedCommandsData->surfaceStateHeapSizeEM = minSizeSSHForEM; } @@ -400,7 +399,7 @@ void CommandQueueHw::processDispatchForKernels(const MultiDispatchInf printfHandler->prepareDispatch(multiDispatchInfo); } - if (multiDispatchInfo.peekMainKernel()->usesSyncBuffer(device->getRootDeviceIndex())) { + if (multiDispatchInfo.peekMainKernel()->usesSyncBuffer()) { auto &gws = multiDispatchInfo.begin()->getGWS(); auto &lws = multiDispatchInfo.begin()->getLocalWorkgroupSize(); size_t workGroupsCount = (gws.x * gws.y * gws.z) / @@ -569,8 +568,7 @@ void CommandQueueHw::processDeviceEnqueue(DeviceQueueHw *d TagNode *hwTimeStamps, bool &blocking) { auto parentKernel = multiDispatchInfo.peekParentKernel(); - auto rootDeviceIndex = devQueueHw->getDevice().getRootDeviceIndex(); - size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel, rootDeviceIndex); + size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); bool isCcsUsed = EngineHelpers::isCcs(gpgpuEngine->osContext->getEngineType()); uint32_t taskCount = getGpgpuCommandStreamReceiver().peekTaskCount() + 1; @@ -684,8 +682,7 @@ CompletionStamp CommandQueueHw::enqueueNonBlocked( printfHandler->makeResident(getGpgpuCommandStreamReceiver()); } - auto rootDeviceIndex = device->getRootDeviceIndex(); - if (multiDispatchInfo.peekMainKernel()->usesSyncBuffer(rootDeviceIndex)) { + if (multiDispatchInfo.peekMainKernel()->usesSyncBuffer()) { device->getDevice().syncBufferHandler->makeResident(getGpgpuCommandStreamReceiver()); } @@ -722,7 +719,7 @@ CompletionStamp CommandQueueHw::enqueueNonBlocked( kernel->makeResident(getGpgpuCommandStreamReceiver()); requiresCoherency |= kernel->requiresCoherency(); mediaSamplerRequired |= kernel->isVmeKernel(); - auto numGrfRequiredByKernel = static_cast(kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.numGrfRequired); + auto numGrfRequiredByKernel = static_cast(kernel->getKernelInfo().kernelDescriptor.kernelAttributes.numGrfRequired); numGrfRequired = std::max(numGrfRequired, numGrfRequiredByKernel); specialPipelineSelectMode |= kernel->requiresSpecialPipelineSelectMode(); auxTranslationRequired |= kernel->isAuxTranslationRequired(); @@ -730,11 +727,11 @@ CompletionStamp CommandQueueHw::enqueueNonBlocked( anyUncacheableArgs = true; } - if (kernel->requiresPerDssBackedBuffer(rootDeviceIndex)) { + if (kernel->requiresPerDssBackedBuffer()) { usePerDssBackedBuffer = true; } - if (kernel->getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics) { + if (kernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics) { useGlobalAtomics = true; } } diff --git a/opencl/source/command_queue/enqueue_kernel.h b/opencl/source/command_queue/enqueue_kernel.h index 9339b1899f..76935f41c7 100644 --- a/opencl/source/command_queue/enqueue_kernel.h +++ b/opencl/source/command_queue/enqueue_kernel.h @@ -37,8 +37,7 @@ cl_int CommandQueueHw::enqueueKernel( size_t enqueuedLocalWorkSize[3] = {0, 0, 0}; auto &kernel = *pKernel; - auto rootDeviceIndex = device->getRootDeviceIndex(); - const auto &kernelInfo = kernel.getKernelInfo(rootDeviceIndex); + const auto &kernelInfo = kernel.getKernelInfo(); if (kernel.isParentKernel && !this->context->getDefaultDeviceQueue()) { return CL_INVALID_OPERATION; @@ -109,7 +108,7 @@ cl_int CommandQueueHw::enqueueKernel( Surface *surfaces[] = {&s}; if (context->isProvidingPerformanceHints()) { - if (kernel.hasPrintfOutput(rootDeviceIndex)) { + if (kernel.hasPrintfOutput()) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, PRINTF_DETECTED_IN_KERNEL, kernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str()); } if (kernel.requiresCoherency()) { diff --git a/opencl/source/command_queue/gpgpu_walker.h b/opencl/source/command_queue/gpgpu_walker.h index eb700c1ace..fecce241fe 100644 --- a/opencl/source/command_queue/gpgpu_walker.h +++ b/opencl/source/command_queue/gpgpu_walker.h @@ -102,7 +102,7 @@ class GpgpuWalkerHelper { bool disablePerfMode); static size_t getSizeForWADisableLSQCROPERFforOCL(const Kernel *pKernel); - static size_t getSizeForWaDisableRccRhwoOptimization(const Kernel *pKernel, uint32_t rootDeviceIndex); + static size_t getSizeForWaDisableRccRhwoOptimization(const Kernel *pKernel); static size_t setGpgpuWalkerThreadData( WALKER_TYPE *walkerCmd, @@ -200,7 +200,7 @@ IndirectHeap &getIndirectHeap(CommandQueue &commandQueue, const MultiDispatchInf if (Kernel *parentKernel = multiDispatchInfo.peekParentKernel()) { if (heapType == IndirectHeap::SURFACE_STATE) { - expectedSize += HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel, commandQueue.getDevice().getRootDeviceIndex()); + expectedSize += HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); } else //if (heapType == IndirectHeap::DYNAMIC_STATE || heapType == IndirectHeap::INDIRECT_OBJECT) { DeviceQueueHw *pDevQueue = castToObject>(commandQueue.getContext().getDefaultDeviceQueue()); diff --git a/opencl/source/command_queue/gpgpu_walker_base.inl b/opencl/source/command_queue/gpgpu_walker_base.inl index d006b3452b..769448fcf9 100644 --- a/opencl/source/command_queue/gpgpu_walker_base.inl +++ b/opencl/source/command_queue/gpgpu_walker_base.inl @@ -172,7 +172,7 @@ size_t GpgpuWalkerHelper::getSizeForWADisableLSQCROPERFforOCL(const K } template -size_t GpgpuWalkerHelper::getSizeForWaDisableRccRhwoOptimization(const Kernel *pKernel, uint32_t rootDeviceIndex) { +size_t GpgpuWalkerHelper::getSizeForWaDisableRccRhwoOptimization(const Kernel *pKernel) { return 0u; } diff --git a/opencl/source/command_queue/gpgpu_walker_bdw_plus.inl b/opencl/source/command_queue/gpgpu_walker_bdw_plus.inl index 9d703e98cc..0522bf19cc 100644 --- a/opencl/source/command_queue/gpgpu_walker_bdw_plus.inl +++ b/opencl/source/command_queue/gpgpu_walker_bdw_plus.inl @@ -68,8 +68,7 @@ void GpgpuWalkerHelper::dispatchScheduler( IndirectHeap *dsh, bool isCcsUsed) { - auto rootDeviceIndex = devQueueHw.getDevice().getRootDeviceIndex(); - const auto &kernelInfo = scheduler.getKernelInfo(rootDeviceIndex); + const auto &kernelInfo = scheduler.getKernelInfo(); using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA; using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER; @@ -117,8 +116,8 @@ void GpgpuWalkerHelper::dispatchScheduler( auto pGpGpuWalkerCmd = commandStream.getSpaceForCmd(); GPGPU_WALKER cmdWalker = GfxFamily::cmdInitGpgpuWalker; - bool inlineDataProgrammingRequired = HardwareCommandsHelper::inlineDataProgrammingRequired(scheduler, rootDeviceIndex); - auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(scheduler, rootDeviceIndex); + bool inlineDataProgrammingRequired = HardwareCommandsHelper::inlineDataProgrammingRequired(scheduler); + auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(scheduler); HardwareCommandsHelper::sendIndirectState( commandStream, @@ -126,7 +125,7 @@ void GpgpuWalkerHelper::dispatchScheduler( *ioh, *ssh, scheduler, - scheduler.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, rootDeviceIndex), + scheduler.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), simd, localWorkSizes, offsetInterfaceDescriptorTable, @@ -195,7 +194,7 @@ size_t EnqueueOperation::getSizeRequiredCSKernel(bool reserveProfilin } size += PerformanceCounters::getGpuCommandsSize(commandQueue, reservePerfCounters); size += GpgpuWalkerHelper::getSizeForWADisableLSQCROPERFforOCL(pKernel); - size += GpgpuWalkerHelper::getSizeForWaDisableRccRhwoOptimization(pKernel, commandQueue.getDevice().getRootDeviceIndex()); + size += GpgpuWalkerHelper::getSizeForWaDisableRccRhwoOptimization(pKernel); return size; } diff --git a/opencl/source/command_queue/hardware_interface_base.inl b/opencl/source/command_queue/hardware_interface_base.inl index 96e4d1b918..400bc83c97 100644 --- a/opencl/source/command_queue/hardware_interface_base.inl +++ b/opencl/source/command_queue/hardware_interface_base.inl @@ -102,7 +102,7 @@ void HardwareInterface::dispatchWalker( size_t sizeToPatch = debugSurface->getUnderlyingBufferSize(); Buffer::setSurfaceState(&commandQueue.getDevice(), commandQueue.getDevice().getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh), false, false, sizeToPatch, addressToPatch, 0, debugSurface, 0, 0, - mainKernel->getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, + mainKernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, mainKernel->getTotalNumDevicesInContext()); } @@ -244,7 +244,6 @@ template void HardwareInterface::obtainIndirectHeaps(CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo, bool blockedQueue, IndirectHeap *&dsh, IndirectHeap *&ioh, IndirectHeap *&ssh) { auto parentKernel = multiDispatchInfo.peekParentKernel(); - auto rootDeviceIndex = commandQueue.getDevice().getRootDeviceIndex(); if (blockedQueue) { size_t dshSize = 0; @@ -254,7 +253,7 @@ void HardwareInterface::obtainIndirectHeaps(CommandQueue &commandQueu if (parentKernel) { dshSize = commandQueue.getContext().getDefaultDeviceQueue()->getDshBuffer()->getUnderlyingBufferSize(); - sshSize += HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel, rootDeviceIndex); + sshSize += HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); iohEqualsDsh = true; colorCalcSize = static_cast(commandQueue.getContext().getDefaultDeviceQueue()->colorCalcStateSize); } else { diff --git a/opencl/source/command_queue/hardware_interface_bdw_plus.inl b/opencl/source/command_queue/hardware_interface_bdw_plus.inl index 8c71aa0a9b..08c9183291 100644 --- a/opencl/source/command_queue/hardware_interface_bdw_plus.inl +++ b/opencl/source/command_queue/hardware_interface_bdw_plus.inl @@ -70,11 +70,10 @@ inline void HardwareInterface::programWalker( Vec3 &numberOfWorkgroups, Vec3 &startOfWorkgroups) { - auto rootDeviceIndex = commandQueue.getDevice().getRootDeviceIndex(); auto walkerCmdBuf = allocateWalkerSpace(commandStream, kernel); WALKER_TYPE walkerCmd = GfxFamily::cmdInitGpgpuWalker; uint32_t dim = dispatchInfo.getDim(); - uint32_t simd = kernel.getKernelInfo(rootDeviceIndex).getMaxSimdSize(); + uint32_t simd = kernel.getKernelInfo().getMaxSimdSize(); size_t globalOffsets[3] = {dispatchInfo.getOffset().x, dispatchInfo.getOffset().y, dispatchInfo.getOffset().z}; size_t startWorkGroups[3] = {startOfWorkgroups.x, startOfWorkgroups.y, startOfWorkgroups.z}; @@ -86,7 +85,7 @@ inline void HardwareInterface::programWalker( } auto isCcsUsed = EngineHelpers::isCcs(commandQueue.getGpgpuEngine().osContext->getEngineType()); - auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(kernel, rootDeviceIndex); + auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(kernel); HardwareCommandsHelper::sendIndirectState( commandStream, @@ -94,7 +93,7 @@ inline void HardwareInterface::programWalker( ioh, ssh, kernel, - kernel.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, rootDeviceIndex), + kernel.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), simd, localWorkSizes, offsetInterfaceDescriptorTable, @@ -105,7 +104,7 @@ inline void HardwareInterface::programWalker( true, commandQueue.getDevice()); - GpgpuWalkerHelper::setGpgpuWalkerThreadData(&walkerCmd, kernel.getKernelInfo(rootDeviceIndex).kernelDescriptor, + GpgpuWalkerHelper::setGpgpuWalkerThreadData(&walkerCmd, kernel.getKernelInfo().kernelDescriptor, globalOffsets, startWorkGroups, numWorkGroups, localWorkSizes, simd, dim, false, false, 0u); diff --git a/opencl/source/command_queue/local_work_size.cpp b/opencl/source/command_queue/local_work_size.cpp index 9c45bc9302..89b9ad21aa 100644 --- a/opencl/source/command_queue/local_work_size.cpp +++ b/opencl/source/command_queue/local_work_size.cpp @@ -416,11 +416,10 @@ Vec3 computeWorkgroupSize(const DispatchInfo &dispatchInfo) { if (kernel != nullptr) { auto &device = dispatchInfo.getClDevice(); - auto rootDeviceIndex = device.getRootDeviceIndex(); const auto &hwInfo = device.getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); auto isSimulation = device.isSimulation(); - if (kernel->requiresLimitedWorkgroupSize(rootDeviceIndex) && hwHelper.isSpecialWorkgroupSizeRequired(hwInfo, isSimulation)) { + if (kernel->requiresLimitedWorkgroupSize() && hwHelper.isSpecialWorkgroupSizeRequired(hwInfo, isSimulation)) { setSpecialWorkgroupSize(workGroupSize); } else if (DebugManager.flags.EnableComputeWorkSizeND.get()) { WorkSizeInfo wsInfo(dispatchInfo); @@ -428,7 +427,7 @@ Vec3 computeWorkgroupSize(const DispatchInfo &dispatchInfo) { computeWorkgroupSizeND(wsInfo, workGroupSize, workItems, dispatchInfo.getDim()); } else { auto maxWorkGroupSize = kernel->getMaxKernelWorkGroupSize(); - auto simd = kernel->getKernelInfo(rootDeviceIndex).getMaxSimdSize(); + auto simd = kernel->getKernelInfo().getMaxSimdSize(); size_t workItems[3] = {dispatchInfo.getGWS().x, dispatchInfo.getGWS().y, dispatchInfo.getGWS().z}; if (dispatchInfo.getDim() == 1) { computeWorkgroupSize1D(maxWorkGroupSize, workGroupSize, workItems, simd); @@ -476,7 +475,7 @@ void provideLocalWorkGroupSizeHints(Context *context, DispatchInfo dispatchInfo) preferredWorkGroupSize[1] = lws.y; preferredWorkGroupSize[2] = lws.z; - const auto &kernelInfo = dispatchInfo.getKernel()->getKernelInfo(dispatchInfo.getClDevice().getRootDeviceIndex()); + const auto &kernelInfo = dispatchInfo.getKernel()->getKernelInfo(); if (dispatchInfo.getEnqueuedWorkgroupSize().x == 0) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, NULL_LOCAL_WORKGROUP_SIZE, kernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str(), preferredWorkGroupSize[0], preferredWorkGroupSize[1], preferredWorkGroupSize[2]); diff --git a/opencl/source/command_stream/aub_command_stream_receiver_hw_base.inl b/opencl/source/command_stream/aub_command_stream_receiver_hw_base.inl index 9c565fbe28..058b339c88 100644 --- a/opencl/source/command_stream/aub_command_stream_receiver_hw_base.inl +++ b/opencl/source/command_stream/aub_command_stream_receiver_hw_base.inl @@ -783,7 +783,7 @@ void AUBCommandStreamReceiverHw::dumpAllocation(GraphicsAllocation &g template AubSubCaptureStatus AUBCommandStreamReceiverHw::checkAndActivateAubSubCapture(const MultiDispatchInfo &dispatchInfo) { - std::string kernelName = dispatchInfo.peekMainKernel()->getKernelInfo(this->rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName; + std::string kernelName = dispatchInfo.peekMainKernel()->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName; auto status = subCaptureManager->checkAndActivateSubCapture(kernelName); if (status.isActive) { std::string subCaptureFile = subCaptureManager->getSubCaptureFileName(kernelName); diff --git a/opencl/source/command_stream/tbx_command_stream_receiver_hw.inl b/opencl/source/command_stream/tbx_command_stream_receiver_hw.inl index 0b899a822b..a83528784f 100644 --- a/opencl/source/command_stream/tbx_command_stream_receiver_hw.inl +++ b/opencl/source/command_stream/tbx_command_stream_receiver_hw.inl @@ -560,7 +560,7 @@ AubSubCaptureStatus TbxCommandStreamReceiverHw::checkAndActivateAubSu return {false, false}; } - std::string kernelName = (dispatchInfo.empty() ? "" : dispatchInfo.peekMainKernel()->getKernelInfo(this->rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName); + std::string kernelName = (dispatchInfo.empty() ? "" : dispatchInfo.peekMainKernel()->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName); auto status = subCaptureManager->checkAndActivateSubCapture(kernelName); if (status.isActive && !status.wasActiveInPreviousEnqueue) { dumpTbxNonWritable = true; diff --git a/opencl/source/context/context.cpp b/opencl/source/context/context.cpp index 6c16a76ecd..b65546dbcc 100644 --- a/opencl/source/context/context.cpp +++ b/opencl/source/context/context.cpp @@ -393,21 +393,16 @@ SchedulerKernel &Context::getSchedulerKernel() { schedulerBuiltIn->pProgram = program; - KernelInfoContainer kernelInfos; - kernelInfos.resize(getMaxRootDeviceIndex() + 1); - for (auto rootDeviceIndex : rootDeviceIndices) { - auto kernelInfo = schedulerBuiltIn->pProgram->getKernelInfo(SchedulerKernel::schedulerName, rootDeviceIndex); - DEBUG_BREAK_IF(!kernelInfo); - kernelInfos[rootDeviceIndex] = kernelInfo; - } + auto kernelInfo = schedulerBuiltIn->pProgram->getKernelInfo(SchedulerKernel::schedulerName, clDevice->getRootDeviceIndex()); + DEBUG_BREAK_IF(!kernelInfo); schedulerBuiltIn->pKernel = Kernel::create( schedulerBuiltIn->pProgram, - kernelInfos, + *kernelInfo, *clDevice, &retVal); - UNRECOVERABLE_IF(schedulerBuiltIn->pKernel->getScratchSize(clDevice->getRootDeviceIndex()) != 0); + UNRECOVERABLE_IF(schedulerBuiltIn->pKernel->getScratchSize() != 0); DEBUG_BREAK_IF(retVal != CL_SUCCESS); }; diff --git a/opencl/source/device_queue/device_queue_hw_bdw_plus.inl b/opencl/source/device_queue/device_queue_hw_bdw_plus.inl index b874e80320..d4e96162d2 100644 --- a/opencl/source/device_queue/device_queue_hw_bdw_plus.inl +++ b/opencl/source/device_queue/device_queue_hw_bdw_plus.inl @@ -151,13 +151,12 @@ template void DeviceQueueHw::setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount, bool isCcsUsed) { using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER; void *pDSH = dynamicStateHeap.getCpuBase(); - auto rootDeviceIndex = device->getRootDeviceIndex(); // Set scheduler ID to last entry in first table, it will have ID == 0, blocks will have following entries. auto igilCmdQueue = reinterpret_cast(queueBuffer->getUnderlyingBuffer()); igilCmdQueue->m_controls.m_IDTstart = colorCalcStateSize + sizeof(INTERFACE_DESCRIPTOR_DATA) * (interfaceDescriptorEntries - 2); // Parent's dsh is located after ColorCalcState and 2 ID tables - igilCmdQueue->m_controls.m_DynamicHeapStart = offsetDsh + alignUp(static_cast(parentKernel->getDynamicStateHeapSize(rootDeviceIndex)), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); + igilCmdQueue->m_controls.m_DynamicHeapStart = offsetDsh + alignUp(static_cast(parentKernel->getDynamicStateHeapSize()), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); igilCmdQueue->m_controls.m_DynamicHeapSizeInBytes = (uint32_t)dshBuffer->getUnderlyingBufferSize(); igilCmdQueue->m_controls.m_CurrentDSHoffset = igilCmdQueue->m_controls.m_DynamicHeapStart; diff --git a/opencl/source/gen12lp/gpgpu_walker_gen12lp.cpp b/opencl/source/gen12lp/gpgpu_walker_gen12lp.cpp index 4781b2d355..fa5c78faa3 100644 --- a/opencl/source/gen12lp/gpgpu_walker_gen12lp.cpp +++ b/opencl/source/gen12lp/gpgpu_walker_gen12lp.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2020 Intel Corporation + * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -30,7 +30,7 @@ void HardwareInterface::dispatchWorkarounds( using MI_LOAD_REGISTER_IMM = typename TGLLPFamily::MI_LOAD_REGISTER_IMM; using PIPE_CONTROL = typename TGLLPFamily::PIPE_CONTROL; - if (kernel.requiresWaDisableRccRhwoOptimization(commandQueue.getDevice().getRootDeviceIndex())) { + if (kernel.requiresWaDisableRccRhwoOptimization()) { PIPE_CONTROL cmdPipeControl = TGLLPFamily::cmdInitPipeControl; cmdPipeControl.setCommandStreamerStallEnable(true); @@ -46,8 +46,8 @@ void HardwareInterface::dispatchWorkarounds( } template <> -size_t GpgpuWalkerHelper::getSizeForWaDisableRccRhwoOptimization(const Kernel *pKernel, uint32_t rootDeviceIndex) { - if (pKernel->requiresWaDisableRccRhwoOptimization(rootDeviceIndex)) { +size_t GpgpuWalkerHelper::getSizeForWaDisableRccRhwoOptimization(const Kernel *pKernel) { + if (pKernel->requiresWaDisableRccRhwoOptimization()) { return (2 * (sizeof(TGLLPFamily::PIPE_CONTROL) + sizeof(TGLLPFamily::MI_LOAD_REGISTER_IMM))); } return 0u; diff --git a/opencl/source/gen8/gpgpu_walker_gen8.cpp b/opencl/source/gen8/gpgpu_walker_gen8.cpp index f873f9833f..7e1c7966aa 100644 --- a/opencl/source/gen8/gpgpu_walker_gen8.cpp +++ b/opencl/source/gen8/gpgpu_walker_gen8.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -15,12 +15,12 @@ namespace NEO { template <> void GpgpuWalkerHelper::applyWADisableLSQCROPERFforOCL(NEO::LinearStream *pCommandStream, const Kernel &kernel, bool disablePerfMode) { if (disablePerfMode) { - if (kernel.getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages) { + if (kernel.getKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages) { // Set bit L3SQC_BIT_LQSC_RO_PERF_DIS in L3SQC_REG4 GpgpuWalkerHelper::addAluReadModifyWriteRegister(pCommandStream, L3SQC_REG4, AluRegisters::OPCODE_OR, L3SQC_BIT_LQSC_RO_PERF_DIS); } } else { - if (kernel.getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages) { + if (kernel.getKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages) { // Add PIPE_CONTROL with CS_Stall to wait till GPU finishes its work typedef typename BDWFamily::PIPE_CONTROL PIPE_CONTROL; auto pCmd = reinterpret_cast(pCommandStream->getSpace(sizeof(PIPE_CONTROL))); @@ -40,7 +40,7 @@ size_t GpgpuWalkerHelper::getSizeForWADisableLSQCROPERFforOCL(const K typedef typename BDWFamily::MI_MATH MI_MATH; typedef typename BDWFamily::MI_MATH_ALU_INST_INLINE MI_MATH_ALU_INST_INLINE; size_t n = 0; - if (pKernel->getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages) { + if (pKernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages) { n += sizeof(PIPE_CONTROL) + (2 * sizeof(MI_LOAD_REGISTER_REG) + sizeof(MI_LOAD_REGISTER_IMM) + diff --git a/opencl/source/gen9/gpgpu_walker_gen9.cpp b/opencl/source/gen9/gpgpu_walker_gen9.cpp index 8d8509611c..6447bcdfc4 100644 --- a/opencl/source/gen9/gpgpu_walker_gen9.cpp +++ b/opencl/source/gen9/gpgpu_walker_gen9.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -15,12 +15,12 @@ namespace NEO { template <> void GpgpuWalkerHelper::applyWADisableLSQCROPERFforOCL(NEO::LinearStream *pCommandStream, const Kernel &kernel, bool disablePerfMode) { if (disablePerfMode) { - if (kernel.getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages) { + if (kernel.getKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages) { // Set bit L3SQC_BIT_LQSC_RO_PERF_DIS in L3SQC_REG4 GpgpuWalkerHelper::addAluReadModifyWriteRegister(pCommandStream, L3SQC_REG4, AluRegisters::OPCODE_OR, L3SQC_BIT_LQSC_RO_PERF_DIS); } } else { - if (kernel.getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages) { + if (kernel.getKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages) { // Add PIPE_CONTROL with CS_Stall to wait till GPU finishes its work typedef typename SKLFamily::PIPE_CONTROL PIPE_CONTROL; auto pCmd = reinterpret_cast(pCommandStream->getSpace(sizeof(PIPE_CONTROL))); @@ -40,7 +40,7 @@ size_t GpgpuWalkerHelper::getSizeForWADisableLSQCROPERFforOCL(const K typedef typename SKLFamily::MI_MATH MI_MATH; typedef typename SKLFamily::MI_MATH_ALU_INST_INLINE MI_MATH_ALU_INST_INLINE; size_t n = 0; - if (pKernel->getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages) { + if (pKernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages) { n += sizeof(PIPE_CONTROL) + (2 * sizeof(MI_LOAD_REGISTER_REG) + sizeof(MI_LOAD_REGISTER_IMM) + diff --git a/opencl/source/gtpin/gtpin_callbacks.cpp b/opencl/source/gtpin/gtpin_callbacks.cpp index 710655c71d..e20d17b481 100644 --- a/opencl/source/gtpin/gtpin_callbacks.cpp +++ b/opencl/source/gtpin/gtpin_callbacks.cpp @@ -66,29 +66,28 @@ void gtpinNotifyKernelCreate(cl_kernel kernel) { auto pMultiDeviceKernel = castToObjectOrAbort(kernel); auto pKernel = pMultiDeviceKernel->getDefaultKernel(); auto &device = pKernel->getDevices()[0]->getDevice(); - auto rootDeviceIndex = device.getRootDeviceIndex(); size_t gtpinBTI = pKernel->getNumberOfBindingTableStates(); // Enlarge local copy of SSH by 1 SS GFXCORE_FAMILY genFamily = device.getHardwareInfo().platform.eRenderCoreFamily; GTPinHwHelper >pinHelper = GTPinHwHelper::get(genFamily); - if (pKernel->isParentKernel || !gtpinHelper.addSurfaceState(pKernel, rootDeviceIndex)) { + if (pKernel->isParentKernel || !gtpinHelper.addSurfaceState(pKernel)) { // Kernel with no SSH or Kernel EM, not supported return; } - if (pKernel->isKernelHeapSubstituted(rootDeviceIndex)) { + if (pKernel->isKernelHeapSubstituted()) { // ISA for this kernel was already substituted return; } // Notify GT-Pin that new kernel was created Context *pContext = &(pKernel->getContext()); cl_context context = pContext; - auto &kernelInfo = pKernel->getKernelInfo(rootDeviceIndex); + auto &kernelInfo = pKernel->getKernelInfo(); instrument_params_in_t paramsIn = {}; paramsIn.kernel_type = GTPIN_KERNEL_TYPE_CS; paramsIn.simd = (GTPIN_SIMD_WIDTH)kernelInfo.getMaxSimdSize(); - paramsIn.orig_kernel_binary = (uint8_t *)pKernel->getKernelHeap(rootDeviceIndex); - paramsIn.orig_kernel_size = static_cast(pKernel->getKernelHeapSize(rootDeviceIndex)); + paramsIn.orig_kernel_binary = (uint8_t *)pKernel->getKernelHeap(); + paramsIn.orig_kernel_size = static_cast(pKernel->getKernelHeapSize()); paramsIn.buffer_type = GTPIN_BUFFER_BINDFULL; paramsIn.buffer_desc.BTI = static_cast(gtpinBTI); paramsIn.igc_hash_id = kernelInfo.shaderHashCode; @@ -100,7 +99,7 @@ void gtpinNotifyKernelCreate(cl_kernel kernel) { (*GTPinCallbacks.onKernelCreate)((context_handle_t)(cl_context)context, ¶msIn, ¶msOut); // Substitute ISA of created kernel with instrumented code pKernel->substituteKernelHeap(device, paramsOut.inst_kernel_binary, paramsOut.inst_kernel_size); - pKernel->setKernelId(rootDeviceIndex, paramsOut.kernel_id); + pKernel->setKernelId(paramsOut.kernel_id); } } @@ -111,13 +110,13 @@ void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue) { auto rootDeviceIndex = device.getRootDeviceIndex(); auto pMultiDeviceKernel = castToObjectOrAbort(kernel); auto pKernel = pMultiDeviceKernel->getKernel(rootDeviceIndex); - if (pKernel->isParentKernel || pKernel->getSurfaceStateHeapSize(rootDeviceIndex) == 0) { + if (pKernel->isParentKernel || pKernel->getSurfaceStateHeapSize() == 0) { // Kernel with no SSH, not supported return; } Context *pContext = &(pKernel->getContext()); cl_context context = (cl_context)pContext; - uint64_t kernelId = pKernel->getKernelId(rootDeviceIndex); + uint64_t kernelId = pKernel->getKernelId(); command_buffer_handle_t commandBuffer = (command_buffer_handle_t)((uintptr_t)(sequenceCount++)); uint32_t kernelOffset = 0; resource_handle_t resource = 0; @@ -142,11 +141,11 @@ void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue) { GFXCORE_FAMILY genFamily = device.getHardwareInfo().platform.eRenderCoreFamily; GTPinHwHelper >pinHelper = GTPinHwHelper::get(genFamily); size_t gtpinBTI = pKernel->getNumberOfBindingTableStates() - 1; - void *pSurfaceState = gtpinHelper.getSurfaceState(pKernel, gtpinBTI, rootDeviceIndex); + void *pSurfaceState = gtpinHelper.getSurfaceState(pKernel, gtpinBTI); cl_mem buffer = (cl_mem)resource; auto pBuffer = castToObjectOrAbort(buffer); pBuffer->setArgStateful(pSurfaceState, false, false, false, false, device, - pKernel->getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, pContext->getNumDevices()); + pKernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, pContext->getNumDevices()); } } diff --git a/opencl/source/gtpin/gtpin_hw_helper.h b/opencl/source/gtpin/gtpin_hw_helper.h index bcf49ed333..fcac86cc21 100644 --- a/opencl/source/gtpin/gtpin_hw_helper.h +++ b/opencl/source/gtpin/gtpin_hw_helper.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -15,8 +15,8 @@ class GTPinHwHelper { public: static GTPinHwHelper &get(GFXCORE_FAMILY gfxCore); virtual uint32_t getGenVersion() = 0; - virtual bool addSurfaceState(Kernel *pKernel, uint32_t rootDeviceIndex) = 0; - virtual void *getSurfaceState(Kernel *pKernel, size_t bti, uint32_t rootDeviceIndex) = 0; + virtual bool addSurfaceState(Kernel *pKernel) = 0; + virtual void *getSurfaceState(Kernel *pKernel, size_t bti) = 0; protected: GTPinHwHelper(){}; @@ -30,8 +30,8 @@ class GTPinHwHelperHw : public GTPinHwHelper { return gtpinHwHelper; } uint32_t getGenVersion() override; - bool addSurfaceState(Kernel *pKernel, uint32_t rootDeviceIndex) override; - void *getSurfaceState(Kernel *pKernel, size_t bti, uint32_t rootDeviceIndex) override; + bool addSurfaceState(Kernel *pKernel) override; + void *getSurfaceState(Kernel *pKernel, size_t bti) override; private: GTPinHwHelperHw(){}; diff --git a/opencl/source/gtpin/gtpin_hw_helper.inl b/opencl/source/gtpin/gtpin_hw_helper.inl index 2e6283478d..c794794a81 100644 --- a/opencl/source/gtpin/gtpin_hw_helper.inl +++ b/opencl/source/gtpin/gtpin_hw_helper.inl @@ -15,11 +15,11 @@ namespace NEO { template -bool GTPinHwHelperHw::addSurfaceState(Kernel *pKernel, uint32_t rootDeviceIndex) { +bool GTPinHwHelperHw::addSurfaceState(Kernel *pKernel) { using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE; using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE; - size_t sshSize = pKernel->getSurfaceStateHeapSize(rootDeviceIndex); + size_t sshSize = pKernel->getSurfaceStateHeapSize(); if ((sshSize == 0) || pKernel->isParentKernel) { // Kernels which do not use SSH or use Execution Model are not supported (yet) return false; @@ -29,7 +29,7 @@ bool GTPinHwHelperHw::addSurfaceState(Kernel *pKernel, uint32_t rootD size_t sizeToEnlarge = ssSize + btsSize; size_t currBTOffset = pKernel->getBindingTableOffset(); size_t currSurfaceStateSize = currBTOffset; - char *pSsh = static_cast(pKernel->getSurfaceStateHeap(rootDeviceIndex)); + char *pSsh = static_cast(pKernel->getSurfaceStateHeap()); char *pNewSsh = new char[sshSize + sizeToEnlarge]; memcpy_s(pNewSsh, sshSize + sizeToEnlarge, pSsh, currSurfaceStateSize); RENDER_SURFACE_STATE *pSS = reinterpret_cast(pNewSsh + currSurfaceStateSize); @@ -45,14 +45,14 @@ bool GTPinHwHelperHw::addSurfaceState(Kernel *pKernel, uint32_t rootD } template -void *GTPinHwHelperHw::getSurfaceState(Kernel *pKernel, size_t bti, uint32_t rootDeviceIndex) { +void *GTPinHwHelperHw::getSurfaceState(Kernel *pKernel, size_t bti) { using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE; - if ((nullptr == pKernel->getSurfaceStateHeap(rootDeviceIndex)) || (bti >= pKernel->getNumberOfBindingTableStates())) { + if ((nullptr == pKernel->getSurfaceStateHeap()) || (bti >= pKernel->getNumberOfBindingTableStates())) { return nullptr; } - auto *pBts = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), (pKernel->getBindingTableOffset() + bti * sizeof(BINDING_TABLE_STATE)))); - auto pSurfaceState = ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pBts->getSurfaceStatePointer()); + auto *pBts = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(), (pKernel->getBindingTableOffset() + bti * sizeof(BINDING_TABLE_STATE)))); + auto pSurfaceState = ptrOffset(pKernel->getSurfaceStateHeap(), pBts->getSurfaceStatePointer()); return pSurfaceState; } diff --git a/opencl/source/helpers/dispatch_info.cpp b/opencl/source/helpers/dispatch_info.cpp index 32801a9ade..e5e06c6322 100644 --- a/opencl/source/helpers/dispatch_info.cpp +++ b/opencl/source/helpers/dispatch_info.cpp @@ -15,15 +15,15 @@ bool DispatchInfo::usesSlm() const { } bool DispatchInfo::usesStatelessPrintfSurface() const { - return (kernel == nullptr) ? false : kernel->hasPrintfOutput(pClDevice->getRootDeviceIndex()); + return (kernel == nullptr) ? false : kernel->hasPrintfOutput(); } uint32_t DispatchInfo::getRequiredScratchSize() const { - return (kernel == nullptr) ? 0 : kernel->getScratchSize(pClDevice->getRootDeviceIndex()); + return (kernel == nullptr) ? 0 : kernel->getScratchSize(); } uint32_t DispatchInfo::getRequiredPrivateScratchSize() const { - return (kernel == nullptr) ? 0 : kernel->getPrivateScratchSize(pClDevice->getRootDeviceIndex()); + return (kernel == nullptr) ? 0 : kernel->getPrivateScratchSize(); } Kernel *MultiDispatchInfo::peekMainKernel() const { diff --git a/opencl/source/helpers/hardware_commands_helper.h b/opencl/source/helpers/hardware_commands_helper.h index a0e7e83592..42cc69aa50 100644 --- a/opencl/source/helpers/hardware_commands_helper.h +++ b/opencl/source/helpers/hardware_commands_helper.h @@ -42,8 +42,7 @@ struct HardwareCommandsHelper : public PerThreadDataHelper { INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const Kernel &kernel, const size_t &sizeCrossThreadData, - const size_t &sizePerThreadData, - uint32_t rootDeviceIndex); + const size_t &sizePerThreadData); inline static uint32_t additionalSizeRequiredDsh(); @@ -121,14 +120,12 @@ struct HardwareCommandsHelper : public PerThreadDataHelper { static size_t getSizeRequiredForCacheFlush(const CommandQueue &commandQueue, const Kernel *kernel, uint64_t postSyncAddress); static size_t getSizeRequiredDSH( - uint32_t rootDeviceIndex, const Kernel &kernel); static size_t getSizeRequiredIOH( - uint32_t rootDeviceIndex, const Kernel &kernel, size_t localWorkSize = 256); static size_t getSizeRequiredSSH( - const Kernel &kernel, uint32_t rootDeviceIndex); + const Kernel &kernel); static size_t getTotalSizeRequiredDSH( const MultiDispatchInfo &multiDispatchInfo); @@ -137,14 +134,14 @@ struct HardwareCommandsHelper : public PerThreadDataHelper { static size_t getTotalSizeRequiredSSH( const MultiDispatchInfo &multiDispatchInfo); - static size_t getSshSizeForExecutionModel(const Kernel &kernel, uint32_t rootDeviceIndex); + static size_t getSshSizeForExecutionModel(const Kernel &kernel); static void setInterfaceDescriptorOffset( WALKER_TYPE *walkerCmd, uint32_t &interfaceDescriptorIndex); static void programCacheFlushAfterWalkerCommand(LinearStream *commandStream, const CommandQueue &commandQueue, const Kernel *kernel, uint64_t postSyncAddress); - static bool inlineDataProgrammingRequired(const Kernel &kernel, uint32_t rootDeviceIndex); - static bool kernelUsesLocalIds(const Kernel &kernel, uint32_t rootDeviceIndex); + static bool inlineDataProgrammingRequired(const Kernel &kernel); + static bool kernelUsesLocalIds(const Kernel &kernel); }; } // namespace NEO diff --git a/opencl/source/helpers/hardware_commands_helper_base.inl b/opencl/source/helpers/hardware_commands_helper_base.inl index a5a19f5a63..6faf4a71ba 100644 --- a/opencl/source/helpers/hardware_commands_helper_base.inl +++ b/opencl/source/helpers/hardware_commands_helper_base.inl @@ -30,12 +30,10 @@ namespace NEO { template -size_t HardwareCommandsHelper::getSizeRequiredDSH( - uint32_t rootDeviceIndex, - const Kernel &kernel) { +size_t HardwareCommandsHelper::getSizeRequiredDSH(const Kernel &kernel) { using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA; using SAMPLER_STATE = typename GfxFamily::SAMPLER_STATE; - const auto &samplerTable = kernel.getKernelInfo(rootDeviceIndex).kernelDescriptor.payloadMappings.samplerTable; + const auto &samplerTable = kernel.getKernelInfo().kernelDescriptor.payloadMappings.samplerTable; auto samplerCount = samplerTable.numSamplers; auto totalSize = samplerCount @@ -48,18 +46,16 @@ size_t HardwareCommandsHelper::getSizeRequiredDSH( totalSize += borderColorSize + additionalSizeRequiredDsh(); - DEBUG_BREAK_IF(!(totalSize >= kernel.getDynamicStateHeapSize(rootDeviceIndex) || kernel.getKernelInfo(rootDeviceIndex).isVmeWorkload)); + DEBUG_BREAK_IF(!(totalSize >= kernel.getDynamicStateHeapSize() || kernel.getKernelInfo().isVmeWorkload)); return alignUp(totalSize, EncodeStates::alignInterfaceDescriptorData); } template -size_t HardwareCommandsHelper::getSizeRequiredIOH( - uint32_t rootDeviceIndex, - const Kernel &kernel, - size_t localWorkSize) { +size_t HardwareCommandsHelper::getSizeRequiredIOH(const Kernel &kernel, + size_t localWorkSize) { typedef typename GfxFamily::WALKER_TYPE WALKER_TYPE; - const auto &kernelInfo = kernel.getKernelInfo(rootDeviceIndex); + const auto &kernelInfo = kernel.getKernelInfo(); auto numChannels = kernelInfo.kernelDescriptor.kernelAttributes.numLocalIdChannels; uint32_t grfSize = sizeof(typename GfxFamily::GRF); @@ -69,10 +65,9 @@ size_t HardwareCommandsHelper::getSizeRequiredIOH( } template -size_t HardwareCommandsHelper::getSizeRequiredSSH( - const Kernel &kernel, uint32_t rootDeviceIndex) { +size_t HardwareCommandsHelper::getSizeRequiredSSH(const Kernel &kernel) { typedef typename GfxFamily::BINDING_TABLE_STATE BINDING_TABLE_STATE; - auto sizeSSH = kernel.getSurfaceStateHeapSize(rootDeviceIndex); + auto sizeSSH = kernel.getSurfaceStateHeapSize(); sizeSSH += sizeSSH ? BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE : 0; return sizeSSH; } @@ -92,14 +87,13 @@ size_t getSizeRequired(const MultiDispatchInfo &multiDispatchInfo, SizeGetterT & template size_t HardwareCommandsHelper::getTotalSizeRequiredDSH( const MultiDispatchInfo &multiDispatchInfo) { - return getSizeRequired(multiDispatchInfo, [](const DispatchInfo &dispatchInfo) { return getSizeRequiredDSH(dispatchInfo.getClDevice().getRootDeviceIndex(), *dispatchInfo.getKernel()); }); + return getSizeRequired(multiDispatchInfo, [](const DispatchInfo &dispatchInfo) { return getSizeRequiredDSH(*dispatchInfo.getKernel()); }); } template size_t HardwareCommandsHelper::getTotalSizeRequiredIOH( const MultiDispatchInfo &multiDispatchInfo) { return getSizeRequired(multiDispatchInfo, [](const DispatchInfo &dispatchInfo) { return getSizeRequiredIOH( - dispatchInfo.getClDevice().getRootDeviceIndex(), *dispatchInfo.getKernel(), Math::computeTotalElementsCount(dispatchInfo.getLocalWorkgroupSize())); }); } @@ -107,11 +101,11 @@ size_t HardwareCommandsHelper::getTotalSizeRequiredIOH( template size_t HardwareCommandsHelper::getTotalSizeRequiredSSH( const MultiDispatchInfo &multiDispatchInfo) { - return getSizeRequired(multiDispatchInfo, [](const DispatchInfo &dispatchInfo) { return getSizeRequiredSSH(*dispatchInfo.getKernel(), dispatchInfo.getClDevice().getRootDeviceIndex()); }); + return getSizeRequired(multiDispatchInfo, [](const DispatchInfo &dispatchInfo) { return getSizeRequiredSSH(*dispatchInfo.getKernel()); }); } template -size_t HardwareCommandsHelper::getSshSizeForExecutionModel(const Kernel &kernel, uint32_t rootDeviceIndex) { +size_t HardwareCommandsHelper::getSshSizeForExecutionModel(const Kernel &kernel) { typedef typename GfxFamily::BINDING_TABLE_STATE BINDING_TABLE_STATE; size_t totalSize = 0; @@ -131,7 +125,7 @@ size_t HardwareCommandsHelper::getSshSizeForExecutionModel(const Kern SchedulerKernel &scheduler = kernel.getContext().getSchedulerKernel(); - totalSize += getSizeRequiredSSH(scheduler, rootDeviceIndex); + totalSize += getSizeRequiredSSH(scheduler); totalSize += maxBindingTableCount * sizeof(BINDING_TABLE_STATE) * DeviceQueue::interfaceDescriptorEntries; totalSize = alignUp(totalSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); @@ -159,7 +153,6 @@ size_t HardwareCommandsHelper::sendInterfaceDescriptorData( using SHARED_LOCAL_MEMORY_SIZE = typename INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE; const auto &hardwareInfo = device.getHardwareInfo(); - auto rootDeviceIndex = device.getRootDeviceIndex(); // Allocate some memory for the interface descriptor auto pInterfaceDescriptor = getInterfaceDescriptor(indirectHeap, offsetInterfaceDescriptor, inlineInterfaceDescriptor); @@ -176,7 +169,7 @@ size_t HardwareCommandsHelper::sendInterfaceDescriptorData( auto slmTotalSize = kernel.getSlmTotalSize(); - setGrfInfo(&interfaceDescriptor, kernel, sizeCrossThreadData, sizePerThreadData, rootDeviceIndex); + setGrfInfo(&interfaceDescriptor, kernel, sizeCrossThreadData, sizePerThreadData); EncodeDispatchKernel::appendAdditionalIDDFields(&interfaceDescriptor, hardwareInfo, threadsPerThreadGroup, slmTotalSize, SlmPolicy::SlmPolicyNone); interfaceDescriptor.setBindingTablePointer(static_cast(bindingTablePointer)); @@ -194,7 +187,7 @@ size_t HardwareCommandsHelper::sendInterfaceDescriptorData( interfaceDescriptor.setSharedLocalMemorySize(programmableIDSLMSize); EncodeDispatchKernel::programBarrierEnable(interfaceDescriptor, - kernel.getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.barrierCount, + kernel.getKernelInfo().kernelDescriptor.kernelAttributes.barrierCount, hardwareInfo); PreemptionHelper::programInterfaceDescriptorDataPreemption(&interfaceDescriptor, preemptionMode); @@ -227,16 +220,16 @@ size_t HardwareCommandsHelper::sendIndirectState( auto rootDeviceIndex = device.getRootDeviceIndex(); DEBUG_BREAK_IF(simd != 1 && simd != 8 && simd != 16 && simd != 32); - auto inlineDataProgrammingRequired = HardwareCommandsHelper::inlineDataProgrammingRequired(kernel, rootDeviceIndex); + auto inlineDataProgrammingRequired = HardwareCommandsHelper::inlineDataProgrammingRequired(kernel); // Copy the kernel over to the ISH - const auto &kernelInfo = kernel.getKernelInfo(rootDeviceIndex); + const auto &kernelInfo = kernel.getKernelInfo(); ssh.align(BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); kernel.patchBindlessSurfaceStateOffsets(device, ssh.getUsed()); auto dstBindingTablePointer = EncodeSurfaceState::pushBindingTableAndSurfaceStates(ssh, kernelInfo.kernelDescriptor.payloadMappings.bindingTable.numEntries, - kernel.getSurfaceStateHeap(rootDeviceIndex), kernel.getSurfaceStateHeapSize(rootDeviceIndex), + kernel.getSurfaceStateHeap(), kernel.getSurfaceStateHeapSize(), kernel.getNumberOfBindingTableStates(), kernel.getBindingTableOffset()); // Copy our sampler state if it exists @@ -247,7 +240,7 @@ size_t HardwareCommandsHelper::sendIndirectState( samplerCount = samplerTable.numSamplers; samplerStateOffset = EncodeStates::copySamplerState(&dsh, samplerTable.tableOffset, samplerCount, samplerTable.borderColor, - kernel.getDynamicStateHeap(rootDeviceIndex), device.getBindlessHeapsHelper()); + kernel.getDynamicStateHeap(), device.getBindlessHeapsHelper()); } auto localWorkItems = localWorkSize[0] * localWorkSize[1] * localWorkSize[2]; @@ -337,20 +330,20 @@ void HardwareCommandsHelper::updatePerThreadDataTotal( } template -bool HardwareCommandsHelper::inlineDataProgrammingRequired(const Kernel &kernel, uint32_t rootDeviceIndex) { +bool HardwareCommandsHelper::inlineDataProgrammingRequired(const Kernel &kernel) { auto checkKernelForInlineData = true; if (DebugManager.flags.EnablePassInlineData.get() != -1) { checkKernelForInlineData = !!DebugManager.flags.EnablePassInlineData.get(); } if (checkKernelForInlineData) { - return kernel.getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.flags.passInlineData; + return kernel.getKernelInfo().kernelDescriptor.kernelAttributes.flags.passInlineData; } return false; } template -bool HardwareCommandsHelper::kernelUsesLocalIds(const Kernel &kernel, uint32_t rootDeviceIndex) { - return kernel.getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.numLocalIdChannels > 0; +bool HardwareCommandsHelper::kernelUsesLocalIds(const Kernel &kernel) { + return kernel.getKernelInfo().kernelDescriptor.kernelAttributes.numLocalIdChannels > 0; } } // namespace NEO diff --git a/opencl/source/helpers/hardware_commands_helper_bdw_plus.inl b/opencl/source/helpers/hardware_commands_helper_bdw_plus.inl index dc6d13fd47..4ef3249ffb 100644 --- a/opencl/source/helpers/hardware_commands_helper_bdw_plus.inl +++ b/opencl/source/helpers/hardware_commands_helper_bdw_plus.inl @@ -25,8 +25,7 @@ typename HardwareCommandsHelper::INTERFACE_DESCRIPTOR_DATA *HardwareC template void HardwareCommandsHelper::setGrfInfo(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const Kernel &kernel, - const size_t &sizeCrossThreadData, const size_t &sizePerThreadData, - uint32_t rootDeviceIndex) { + const size_t &sizeCrossThreadData, const size_t &sizePerThreadData) { auto grfSize = sizeof(typename GfxFamily::GRF); DEBUG_BREAK_IF((sizeCrossThreadData % grfSize) != 0); auto numGrfCrossThreadData = static_cast(sizeCrossThreadData / grfSize); @@ -113,9 +112,9 @@ void HardwareCommandsHelper::programPerThreadData( grfSize, numChannels, std::array{{static_cast(localWorkSize[0]), static_cast(localWorkSize[1]), static_cast(localWorkSize[2])}}, - std::array{{kernel.getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[0], - kernel.getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[1], - kernel.getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[2]}}, + std::array{{kernel.getKernelInfo().kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[0], + kernel.getKernelInfo().kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[1], + kernel.getKernelInfo().kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[2]}}, kernel.usesOnlyImages()); updatePerThreadDataTotal(sizePerThreadData, simd, numChannels, sizePerThreadDataTotal, localWorkItems); diff --git a/opencl/source/helpers/task_information.cpp b/opencl/source/helpers/task_information.cpp index 39c3d05235..0577c61347 100644 --- a/opencl/source/helpers/task_information.cpp +++ b/opencl/source/helpers/task_information.cpp @@ -168,7 +168,6 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate printfHandler.get()->makeResident(commandStreamReceiver); } makeTimestampPacketsResident(commandStreamReceiver); - auto rootDeviceIndex = commandQueue.getDevice().getRootDeviceIndex(); if (executionModelKernel) { uint32_t taskCount = commandStreamReceiver.peekTaskCount() + 1; @@ -213,38 +212,38 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate commandQueue.getGpgpuCommandStreamReceiver(), bcsCsr); } - const auto &kernelDescriptor = kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor; + const auto &kernelDescriptor = kernel->getKernelInfo().kernelDescriptor; auto memoryCompressionState = commandStreamReceiver.getMemoryCompressionState(kernel->isAuxTranslationRequired()); DispatchFlags dispatchFlags( - {}, //csrDependencies - nullptr, //barrierTimestampPacketNodes - {false, kernel->isVmeKernel()}, //pipelineSelectArgs - commandQueue.flushStamp->getStampReference(), //flushStampReference - commandQueue.getThrottle(), //throttle - preemptionMode, //preemptionMode - kernelDescriptor.kernelAttributes.numGrfRequired, //numGrfRequired - L3CachingSettings::l3CacheOn, //l3CacheSettings - kernel->getThreadArbitrationPolicy(), //threadArbitrationPolicy - kernel->getAdditionalKernelExecInfo(), //additionalKernelExecInfo - kernel->getExecutionType(), //kernelExecutionType - memoryCompressionState, //memoryCompressionState - commandQueue.getSliceCount(), //sliceCount - true, //blocking - flushDC, //dcFlush - slmUsed, //useSLM - true, //guardCommandBufferWithPipeControl - NDRangeKernel, //GSBA32BitRequired - requiresCoherency, //requiresCoherency - commandQueue.getPriority() == QueuePriority::LOW, //lowPriority - false, //implicitFlush - commandQueue.getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed - false, //epilogueRequired - kernel->requiresPerDssBackedBuffer(rootDeviceIndex), //usePerDssBackedBuffer - kernel->isSingleSubdevicePreferred(), //useSingleSubdevice - kernel->getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, //useGlobalAtomics - kernel->getTotalNumDevicesInContext()); //numDevicesInContext + {}, //csrDependencies + nullptr, //barrierTimestampPacketNodes + {false, kernel->isVmeKernel()}, //pipelineSelectArgs + commandQueue.flushStamp->getStampReference(), //flushStampReference + commandQueue.getThrottle(), //throttle + preemptionMode, //preemptionMode + kernelDescriptor.kernelAttributes.numGrfRequired, //numGrfRequired + L3CachingSettings::l3CacheOn, //l3CacheSettings + kernel->getThreadArbitrationPolicy(), //threadArbitrationPolicy + kernel->getAdditionalKernelExecInfo(), //additionalKernelExecInfo + kernel->getExecutionType(), //kernelExecutionType + memoryCompressionState, //memoryCompressionState + commandQueue.getSliceCount(), //sliceCount + true, //blocking + flushDC, //dcFlush + slmUsed, //useSLM + true, //guardCommandBufferWithPipeControl + NDRangeKernel, //GSBA32BitRequired + requiresCoherency, //requiresCoherency + commandQueue.getPriority() == QueuePriority::LOW, //lowPriority + false, //implicitFlush + commandQueue.getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed + false, //epilogueRequired + kernel->requiresPerDssBackedBuffer(), //usePerDssBackedBuffer + kernel->isSingleSubdevicePreferred(), //useSingleSubdevice + kernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, //useGlobalAtomics + kernel->getTotalNumDevicesInContext()); //numDevicesInContext if (timestampPacketDependencies) { eventsRequest.fillCsrDependencies(dispatchFlags.csrDependencies, commandStreamReceiver, CsrDependencies::DependenciesType::OutOfCsr); diff --git a/opencl/source/kernel/get_additional_kernel_info.cpp b/opencl/source/kernel/get_additional_kernel_info.cpp index c6a8753e11..342f139ca1 100644 --- a/opencl/source/kernel/get_additional_kernel_info.cpp +++ b/opencl/source/kernel/get_additional_kernel_info.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2020 Intel Corporation + * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -11,6 +11,6 @@ namespace NEO { void Kernel::getAdditionalInfo(cl_kernel_info paramName, const void *¶mValue, size_t ¶mValueSizeRet) const { } -void Kernel::getAdditionalWorkGroupInfo(cl_kernel_work_group_info paramName, const void *¶mValue, size_t ¶mValueSizeRet, uint32_t rootDeviceIndex) const { +void Kernel::getAdditionalWorkGroupInfo(cl_kernel_work_group_info paramName, const void *¶mValue, size_t ¶mValueSizeRet) const { } } // namespace NEO diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index ad8b2ecc9e..6582abb21e 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -64,21 +64,20 @@ class Surface; uint32_t Kernel::dummyPatchLocation = 0xbaddf00d; -Kernel::Kernel(Program *programArg, const KernelInfoContainer &kernelInfosArg, ClDevice &clDeviceArg, bool schedulerKernel) - : isParentKernel(kernelInfosArg[programArg->getDevices()[0]->getRootDeviceIndex()]->kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue), +Kernel::Kernel(Program *programArg, const KernelInfo &kernelInfoArg, ClDevice &clDeviceArg, bool schedulerKernel) + : isParentKernel(kernelInfoArg.kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue), isSchedulerKernel(schedulerKernel), executionEnvironment(programArg->getExecutionEnvironment()), program(programArg), clDevice(clDeviceArg), deviceVector(programArg->getDevices()), - kernelInfos(kernelInfosArg), + kernelInfo(kernelInfoArg), defaultRootDeviceIndex(clDeviceArg.getRootDeviceIndex()) { program->retain(); program->retainForKernel(); imageTransformer.reset(new ImageTransformer); - auto rootDeviceIndex = defaultRootDeviceIndex; maxKernelWorkGroupSize = static_cast(clDevice.getSharedDeviceInfo().maxWorkGroupSize); - slmTotalSize = kernelInfosArg[rootDeviceIndex]->workloadInfo.slmStaticSize; + slmTotalSize = kernelInfoArg.workloadInfo.slmStaticSize; } Kernel::~Kernel() { @@ -133,8 +132,6 @@ inline void patch(const SrcT &src, void *dst, uint32_t dstOffsetBytes) { } void Kernel::patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, GraphicsAllocation &allocation, const Device &device, const ArgDescPointer &arg) { - auto rootDeviceIndex = device.getRootDeviceIndex(); - if ((nullptr != crossThreadData) && isValidOffset(arg.stateless)) { auto pp = ptrOffset(crossThreadData, arg.stateless); uintptr_t addressToPatch = reinterpret_cast(ptrToPatchInCrossThreadData); @@ -145,19 +142,18 @@ void Kernel::patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, Graphic } } - void *ssh = getSurfaceStateHeap(rootDeviceIndex); + void *ssh = getSurfaceStateHeap(); if ((nullptr != ssh) & isValidOffset(arg.bindful)) { auto surfaceState = ptrOffset(ssh, arg.bindful); void *addressToPatch = reinterpret_cast(allocation.getGpuAddressToPatch()); size_t sizeToPatch = allocation.getUnderlyingBufferSize(); Buffer::setSurfaceState(&device, surfaceState, false, false, sizeToPatch, addressToPatch, 0, &allocation, 0, 0, - getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext()); + kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext()); } } template void Kernel::patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, GraphicsAllocation &allocation, const Device &device, const PatchTokenT &patch) { - auto rootDeviceIndex = device.getRootDeviceIndex(); uint32_t pointerSize = patch.DataParamSize; if (crossThreadData != nullptr) { @@ -171,14 +167,14 @@ void Kernel::patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, Graphic } } - void *ssh = getSurfaceStateHeap(rootDeviceIndex); + void *ssh = getSurfaceStateHeap(); if (ssh) { uint32_t sshOffset = patch.SurfaceStateHeapOffset; auto surfaceState = ptrOffset(ssh, sshOffset); void *addressToPatch = reinterpret_cast(allocation.getGpuAddressToPatch()); size_t sizeToPatch = allocation.getUnderlyingBufferSize(); Buffer::setSurfaceState(&device, surfaceState, false, false, sizeToPatch, addressToPatch, 0, &allocation, 0, 0, - getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext()); + kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext()); } } @@ -192,10 +188,9 @@ cl_int Kernel::initialize() { this->kernelHasIndirectAccess = false; auto pClDevice = &getDevice(); auto rootDeviceIndex = pClDevice->getRootDeviceIndex(); - reconfigureKernel(rootDeviceIndex); + reconfigureKernel(); auto &hwInfo = pClDevice->getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); - auto &kernelInfo = *kernelInfos[rootDeviceIndex]; auto &kernelDescriptor = kernelInfo.kernelDescriptor; auto maxSimdSize = kernelInfo.getMaxSimdSize(); const auto &workloadInfo = kernelInfo.workloadInfo; @@ -349,16 +344,16 @@ cl_int Kernel::initialize() { patchWithImplicitSurface(reinterpret_cast(globalMemory), *program->getGlobalSurface(rootDeviceIndex), pClDevice->getDevice(), arg); } - bool useGlobalAtomics = getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics; + bool useGlobalAtomics = kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics; if (isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress.bindful)) { - auto surfaceState = ptrOffset(reinterpret_cast(getSurfaceStateHeap(rootDeviceIndex)), + auto surfaceState = ptrOffset(reinterpret_cast(getSurfaceStateHeap()), kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress.bindful); Buffer::setSurfaceState(&pClDevice->getDevice(), surfaceState, false, false, 0, nullptr, 0, nullptr, 0, 0, useGlobalAtomics, getTotalNumDevicesInContext()); } if (isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress.bindful)) { - auto surfaceState = ptrOffset(reinterpret_cast(getSurfaceStateHeap(rootDeviceIndex)), + auto surfaceState = ptrOffset(reinterpret_cast(getSurfaceStateHeap()), kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress.bindful); Buffer::setSurfaceState(&pClDevice->getDevice(), surfaceState, false, false, 0, nullptr, 0, nullptr, 0, 0, useGlobalAtomics, getTotalNumDevicesInContext()); } @@ -367,7 +362,7 @@ cl_int Kernel::initialize() { if (false == kernelInfo.kernelDescriptor.kernelAttributes.flags.requiresSubgroupIndependentForwardProgress) { setThreadArbitrationPolicy(ThreadArbitrationPolicy::AgeBased); } - patchBlocksSimdSize(rootDeviceIndex); + patchBlocksSimdSize(); auto &clHwHelper = ClHwHelper::get(hwInfo.platform.eRenderCoreFamily); auxTranslationRequired = HwHelper::renderCompressedBuffersSupported(hwInfo) && clHwHelper.requiresAuxResolves(kernelInfo); @@ -397,7 +392,7 @@ cl_int Kernel::initialize() { // double check this assumption bool usingBuffers = false; bool usingImages = false; - auto &defaultKernelInfo = getDefaultKernelInfo(); + auto &defaultKernelInfo = kernelInfo; kernelArguments.resize(numArgs); kernelArgHandlers.resize(numArgs); kernelArgRequiresCacheFlush.resize(numArgs); @@ -494,17 +489,16 @@ cl_int Kernel::getInfo(cl_kernel_info paramName, size_t paramValueSize, const _cl_context *ctxt; cl_uint refCount = 0; uint64_t nonCannonizedGpuAddress = 0llu; - auto &defaultKernelInfo = getKernelInfo(defaultRootDeviceIndex); switch (paramName) { case CL_KERNEL_FUNCTION_NAME: - pSrc = defaultKernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str(); - srcSize = defaultKernelInfo.kernelDescriptor.kernelMetadata.kernelName.length() + 1; + pSrc = kernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str(); + srcSize = kernelInfo.kernelDescriptor.kernelMetadata.kernelName.length() + 1; break; case CL_KERNEL_NUM_ARGS: srcSize = sizeof(cl_uint); - numArgs = static_cast(defaultKernelInfo.kernelArgInfo.size()); + numArgs = static_cast(kernelInfo.kernelArgInfo.size()); pSrc = &numArgs; break; @@ -527,16 +521,16 @@ cl_int Kernel::getInfo(cl_kernel_info paramName, size_t paramValueSize, break; case CL_KERNEL_ATTRIBUTES: - pSrc = defaultKernelInfo.kernelDescriptor.kernelMetadata.kernelLanguageAttributes.c_str(); - srcSize = defaultKernelInfo.kernelDescriptor.kernelMetadata.kernelLanguageAttributes.length() + 1; + pSrc = kernelInfo.kernelDescriptor.kernelMetadata.kernelLanguageAttributes.c_str(); + srcSize = kernelInfo.kernelDescriptor.kernelMetadata.kernelLanguageAttributes.length() + 1; break; case CL_KERNEL_BINARY_PROGRAM_INTEL: - pSrc = getKernelHeap(defaultRootDeviceIndex); - srcSize = getKernelHeapSize(defaultRootDeviceIndex); + pSrc = getKernelHeap(); + srcSize = getKernelHeapSize(); break; case CL_KERNEL_BINARY_GPU_ADDRESS_INTEL: - nonCannonizedGpuAddress = GmmHelper::decanonize(defaultKernelInfo.kernelAllocation->getGpuAddress()); + nonCannonizedGpuAddress = GmmHelper::decanonize(kernelInfo.kernelAllocation->getGpuAddress()); pSrc = &nonCannonizedGpuAddress; srcSize = sizeof(nonCannonizedGpuAddress); break; @@ -557,7 +551,7 @@ cl_int Kernel::getArgInfo(cl_uint argIndx, cl_kernel_arg_info paramName, size_t cl_int retVal; const void *pSrc = nullptr; size_t srcSize = GetInfo::invalidSourceSize; - auto &defaultKernelInfo = getDefaultKernelInfo(); + auto &defaultKernelInfo = kernelInfo; auto numArgs = static_cast(defaultKernelInfo.kernelArgInfo.size()); const auto &argInfo = defaultKernelInfo.kernelArgInfo[argIndx]; @@ -620,8 +614,6 @@ cl_int Kernel::getWorkGroupInfo(ClDevice &device, cl_kernel_work_group_info para size_t val[3]; } requiredWorkGroupSize; cl_ulong localMemorySize; - auto rootDeviceIndex = device.getRootDeviceIndex(); - auto &kernelInfo = *kernelInfos[rootDeviceIndex]; const auto &kernelDescriptor = kernelInfo.kernelDescriptor; size_t preferredWorkGroupSizeMultiple = 0; cl_ulong scratchSize; @@ -677,7 +669,7 @@ cl_int Kernel::getWorkGroupInfo(ClDevice &device, cl_kernel_work_group_info para pSrc = &privateMemSize; break; default: - getAdditionalWorkGroupInfo(paramName, pSrc, srcSize, rootDeviceIndex); + getAdditionalWorkGroupInfo(paramName, pSrc, srcSize); break; } @@ -694,8 +686,6 @@ cl_int Kernel::getSubGroupInfo(ClDevice &clDevice, cl_kernel_sub_group_info para size_t *paramValueSizeRet) const { size_t numDimensions = 0; size_t WGS = 1; - auto rootDeviceIndex = clDevice.getRootDeviceIndex(); - const auto &kernelInfo = getKernelInfo(rootDeviceIndex); auto maxSimdSize = static_cast(kernelInfo.getMaxSimdSize()); auto maxRequiredWorkGroupSize = static_cast(kernelInfo.getMaxRequiredWorkGroupSize(getMaxKernelWorkGroupSize())); auto largestCompiledSIMDSize = static_cast(kernelInfo.getMaxSimdSize()); @@ -793,17 +783,16 @@ cl_int Kernel::getSubGroupInfo(ClDevice &clDevice, cl_kernel_sub_group_info para } } -const void *Kernel::getKernelHeap(uint32_t rootDeviceIndex) const { - return getKernelInfo(rootDeviceIndex).heapInfo.pKernelHeap; +const void *Kernel::getKernelHeap() const { + return kernelInfo.heapInfo.pKernelHeap; } -size_t Kernel::getKernelHeapSize(uint32_t rootDeviceIndex) const { - return getKernelInfo(rootDeviceIndex).heapInfo.KernelHeapSize; +size_t Kernel::getKernelHeapSize() const { + return kernelInfo.heapInfo.KernelHeapSize; } void Kernel::substituteKernelHeap(const Device &device, void *newKernelHeap, size_t newKernelHeapSize) { - auto rootDeviceIndex = device.getRootDeviceIndex(); - KernelInfo *pKernelInfo = const_cast(&getKernelInfo(rootDeviceIndex)); + KernelInfo *pKernelInfo = const_cast(&kernelInfo); void **pKernelHeap = const_cast(&pKernelInfo->heapInfo.pKernelHeap); *pKernelHeap = newKernelHeap; auto &heapInfo = pKernelInfo->heapInfo; @@ -823,16 +812,16 @@ void Kernel::substituteKernelHeap(const Device &device, void *newKernelHeap, siz UNRECOVERABLE_IF(!status); } -bool Kernel::isKernelHeapSubstituted(uint32_t rootDeviceIndex) const { - return getKernelInfo(rootDeviceIndex).isKernelHeapSubstituted; +bool Kernel::isKernelHeapSubstituted() const { + return kernelInfo.isKernelHeapSubstituted; } -uint64_t Kernel::getKernelId(uint32_t rootDeviceIndex) const { - return getKernelInfo(rootDeviceIndex).kernelId; +uint64_t Kernel::getKernelId() const { + return kernelInfo.kernelId; } -void Kernel::setKernelId(uint32_t rootDeviceIndex, uint64_t newKernelId) { - KernelInfo *pKernelInfo = const_cast(&getKernelInfo(rootDeviceIndex)); +void Kernel::setKernelId(uint64_t newKernelId) { + KernelInfo *pKernelInfo = const_cast(&kernelInfo); pKernelInfo->kernelId = newKernelId; } uint32_t Kernel::getStartOffset() const { @@ -842,20 +831,20 @@ void Kernel::setStartOffset(uint32_t offset) { this->startOffset = offset; } -void *Kernel::getSurfaceStateHeap(uint32_t rootDeviceIndex) const { - return kernelInfos[rootDeviceIndex]->usesSsh ? pSshLocal.get() : nullptr; +void *Kernel::getSurfaceStateHeap() const { + return kernelInfo.usesSsh ? pSshLocal.get() : nullptr; } -size_t Kernel::getDynamicStateHeapSize(uint32_t rootDeviceIndex) const { - return getKernelInfo(rootDeviceIndex).heapInfo.DynamicStateHeapSize; +size_t Kernel::getDynamicStateHeapSize() const { + return kernelInfo.heapInfo.DynamicStateHeapSize; } -const void *Kernel::getDynamicStateHeap(uint32_t rootDeviceIndex) const { - return getKernelInfo(rootDeviceIndex).heapInfo.pDsh; +const void *Kernel::getDynamicStateHeap() const { + return kernelInfo.heapInfo.pDsh; } -size_t Kernel::getSurfaceStateHeapSize(uint32_t rootDeviceIndex) const { - return kernelInfos[rootDeviceIndex]->usesSsh +size_t Kernel::getSurfaceStateHeapSize() const { + return kernelInfo.usesSsh ? sshLocalSize : 0; } @@ -875,7 +864,7 @@ cl_int Kernel::setArg(uint32_t argIndex, size_t argSize, const void *argVal) { cl_int retVal = CL_SUCCESS; bool updateExposedKernel = true; auto argWasUncacheable = false; - auto &defaultKernelInfo = getDefaultKernelInfo(); + auto &defaultKernelInfo = kernelInfo; if (defaultKernelInfo.builtinDispatchBuilder != nullptr) { updateExposedKernel = defaultKernelInfo.builtinDispatchBuilder->setExplicitArg(argIndex, argSize, argVal, retVal); } @@ -915,7 +904,7 @@ cl_int Kernel::setArg(uint32_t argIndex, cl_mem argVal, uint32_t mipLevel) { return setArgImageWithMipLevel(argIndex, sizeof(argVal), &argVal, mipLevel); } -void *Kernel::patchBufferOffset(const KernelArgInfo &argInfo, void *svmPtr, GraphicsAllocation *svmAlloc, uint32_t rootDeviceIndex) { +void *Kernel::patchBufferOffset(const KernelArgInfo &argInfo, void *svmPtr, GraphicsAllocation *svmAlloc) { if (isInvalidOffset(argInfo.offsetBufferOffset)) { return svmPtr; } @@ -934,18 +923,16 @@ void *Kernel::patchBufferOffset(const KernelArgInfo &argInfo, void *svmPtr, Grap } cl_int Kernel::setArgSvm(uint32_t argIndex, size_t svmAllocSize, void *svmPtr, GraphicsAllocation *svmAlloc, cl_mem_flags svmFlags) { - auto rootDeviceIndex = getDevice().getRootDeviceIndex(); - auto &kernelInfo = getKernelInfo(rootDeviceIndex); - void *ptrToPatch = patchBufferOffset(kernelInfo.kernelArgInfo[argIndex], svmPtr, svmAlloc, rootDeviceIndex); + void *ptrToPatch = patchBufferOffset(kernelInfo.kernelArgInfo[argIndex], svmPtr, svmAlloc); setArgImmediate(argIndex, sizeof(void *), &svmPtr); storeKernelArg(argIndex, SVM_OBJ, nullptr, svmPtr, sizeof(void *), svmAlloc, svmFlags); - if (requiresSshForBuffers(rootDeviceIndex)) { + if (requiresSshForBuffers()) { const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex]; - auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap); + auto surfaceState = ptrOffset(getSurfaceStateHeap(), kernelArgInfo.offsetHeap); Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, false, false, svmAllocSize + ptrDiff(svmPtr, ptrToPatch), ptrToPatch, 0, svmAlloc, svmFlags, 0, - getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext()); + kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext()); } if (!kernelArguments[argIndex].isPatched) { patchedArgumentsNum++; @@ -959,13 +946,11 @@ cl_int Kernel::setArgSvm(uint32_t argIndex, size_t svmAllocSize, void *svmPtr, G cl_int Kernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocation *svmAlloc) { DBG_LOG_INPUTS("setArgBuffer svm_alloc", svmAlloc); - auto rootDeviceIndex = getDevice().getRootDeviceIndex(); - auto &kernelInfo = getKernelInfo(rootDeviceIndex); const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex]; storeKernelArg(argIndex, SVM_ALLOC_OBJ, svmAlloc, svmPtr, sizeof(uintptr_t)); - void *ptrToPatch = patchBufferOffset(kernelArgInfo, svmPtr, svmAlloc, rootDeviceIndex); + void *ptrToPatch = patchBufferOffset(kernelArgInfo, svmPtr, svmAlloc); auto patchLocation = ptrOffset(crossThreadData, kernelArgInfo.kernelArgPatchInfoVector[0].crossthreadOffset); @@ -989,9 +974,9 @@ cl_int Kernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocatio forceNonAuxMode = true; } - if (requiresSshForBuffers(rootDeviceIndex)) { + if (requiresSshForBuffers()) { const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex]; - auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap); + auto surfaceState = ptrOffset(getSurfaceStateHeap(), kernelArgInfo.offsetHeap); size_t allocSize = 0; size_t offset = 0; if (svmAlloc != nullptr) { @@ -1000,7 +985,7 @@ cl_int Kernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocatio allocSize -= offset; } Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, forceNonAuxMode, disableL3, allocSize, ptrToPatch, offset, svmAlloc, 0, 0, - getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext()); + kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext()); } if (!kernelArguments[argIndex].isPatched) { @@ -1113,8 +1098,7 @@ void Kernel::getSuggestedLocalWorkSize(const cl_uint workDim, const size_t *glob } uint32_t Kernel::getMaxWorkGroupCount(const cl_uint workDim, const size_t *localWorkSize, const CommandQueue *commandQueue) const { - auto rootDeviceIndex = commandQueue->getDevice().getRootDeviceIndex(); - auto &hardwareInfo = getHardwareInfo(rootDeviceIndex); + auto &hardwareInfo = getHardwareInfo(); auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); auto engineGroupType = hwHelper.getEngineGroupType(commandQueue->getGpgpuEngine().getEngineType(), hardwareInfo); @@ -1122,7 +1106,7 @@ uint32_t Kernel::getMaxWorkGroupCount(const cl_uint workDim, const size_t *local return 0; } - const auto &kernelDescriptor = getKernelInfo(rootDeviceIndex).kernelDescriptor; + const auto &kernelDescriptor = kernelInfo.kernelDescriptor; auto dssCount = hardwareInfo.gtSystemInfo.DualSubSliceCount; if (dssCount == 0) { dssCount = hardwareInfo.gtSystemInfo.SubSliceCount; @@ -1133,7 +1117,7 @@ uint32_t Kernel::getMaxWorkGroupCount(const cl_uint workDim, const size_t *local hardwareInfo.gtSystemInfo.EUCount, hardwareInfo.gtSystemInfo.ThreadCount / hardwareInfo.gtSystemInfo.EUCount); auto barrierCount = kernelDescriptor.kernelAttributes.barrierCount; - return KernelHelper::getMaxWorkGroupCount(kernelInfos[rootDeviceIndex]->getMaxSimdSize(), + return KernelHelper::getMaxWorkGroupCount(kernelInfo.getMaxSimdSize(), availableThreadCount, dssCount, dssCount * KB * hardwareInfo.capabilityTable.slmSize, @@ -1145,7 +1129,7 @@ uint32_t Kernel::getMaxWorkGroupCount(const cl_uint workDim, const size_t *local } inline void Kernel::makeArgsResident(CommandStreamReceiver &commandStreamReceiver) { - auto numArgs = kernelInfos[commandStreamReceiver.getRootDeviceIndex()]->kernelArgInfo.size(); + auto numArgs = kernelInfo.kernelArgInfo.size(); for (decltype(numArgs) argIndex = 0; argIndex < numArgs; argIndex++) { if (kernelArguments[argIndex].object) { if (kernelArguments[argIndex].type == SVM_ALLOC_OBJ) { @@ -1181,7 +1165,7 @@ void Kernel::performKernelTunning(CommandStreamReceiver &commandStreamReceiver, } if (performTunning == TunningType::SIMPLE) { - this->singleSubdevicePreferedInCurrentEnqueue = !this->getKernelInfo(commandStreamReceiver.getRootDeviceIndex()).kernelDescriptor.kernelAttributes.flags.useGlobalAtomics; + this->singleSubdevicePreferedInCurrentEnqueue = !this->kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics; } else if (performTunning == TunningType::FULL) { KernelConfig config{gws, lws, offsets}; @@ -1292,7 +1276,7 @@ void Kernel::makeResident(CommandStreamReceiver &commandStreamReceiver) { } makeArgsResident(commandStreamReceiver); - auto kernelIsaAllocation = this->kernelInfos[rootDeviceIndex]->kernelAllocation; + auto kernelIsaAllocation = this->kernelInfo.kernelAllocation; if (kernelIsaAllocation) { commandStreamReceiver.makeResident(*kernelIsaAllocation); } @@ -1333,7 +1317,7 @@ void Kernel::getResidency(std::vector &dst) { dst.push_back(surface); } - auto numArgs = kernelInfos[rootDeviceIndex]->kernelArgInfo.size(); + auto numArgs = kernelInfo.kernelArgInfo.size(); for (decltype(numArgs) argIndex = 0; argIndex < numArgs; argIndex++) { if (kernelArguments[argIndex].object) { if (kernelArguments[argIndex].type == SVM_ALLOC_OBJ) { @@ -1348,7 +1332,7 @@ void Kernel::getResidency(std::vector &dst) { } } - auto kernelIsaAllocation = this->kernelInfos[rootDeviceIndex]->kernelAllocation; + auto kernelIsaAllocation = this->kernelInfo.kernelAllocation; if (kernelIsaAllocation) { GeneralSurface *surface = new GeneralSurface(kernelIsaAllocation); dst.push_back(surface); @@ -1358,7 +1342,7 @@ void Kernel::getResidency(std::vector &dst) { } bool Kernel::requiresCoherency() { - auto numArgs = getDefaultKernelInfo().kernelArgInfo.size(); + auto numArgs = kernelInfo.kernelArgInfo.size(); for (decltype(numArgs) argIndex = 0; argIndex < numArgs; argIndex++) { if (kernelArguments[argIndex].object) { if (kernelArguments[argIndex].type == SVM_ALLOC_OBJ) { @@ -1384,9 +1368,6 @@ cl_int Kernel::setArgLocal(uint32_t argIndexIn, size_t argSize, const void *argVal) { storeKernelArg(argIndexIn, SLM_OBJ, nullptr, argVal, argSize); - auto pClDevice = &getDevice(); - auto rootDeviceIndex = pClDevice->getRootDeviceIndex(); - auto &kernelInfo = *kernelInfos[rootDeviceIndex]; uint32_t *crossThreadData = reinterpret_cast(this->crossThreadData); uint32_t argIndex = argIndexIn; @@ -1450,8 +1431,8 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex, if (buffer->peekSharingHandler()) { usingSharedObjArgs = true; } - const auto &kernelArgInfo = getKernelInfo(rootDeviceIndex).kernelArgInfo[argIndex]; - patchBufferOffset(kernelArgInfo, nullptr, nullptr, rootDeviceIndex); + const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex]; + patchBufferOffset(kernelArgInfo, nullptr, nullptr); auto graphicsAllocation = buffer->getGraphicsAllocation(rootDeviceIndex); auto patchLocation = ptrOffset(crossThreadData, @@ -1484,10 +1465,10 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex, forceNonAuxMode = true; } - if (requiresSshForBuffers(rootDeviceIndex)) { - auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap); + if (requiresSshForBuffers()) { + auto surfaceState = ptrOffset(getSurfaceStateHeap(), kernelArgInfo.offsetHeap); buffer->setArgStateful(surfaceState, forceNonAuxMode, disableL3, isAuxTranslationKernel, kernelArgInfo.isReadOnly, pClDevice->getDevice(), - getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext()); + kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext()); } kernelArguments[argIndex].isStatelessUncacheable = kernelArgInfo.pureStatefulBufferAccess ? false : buffer->isMemObjUncacheable(); @@ -1504,17 +1485,17 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex, return CL_SUCCESS; } else { storeKernelArg(argIndex, BUFFER_OBJ, nullptr, argVal, argSize); - const auto &kernelArgInfo = getKernelInfo(rootDeviceIndex).kernelArgInfo[argIndex]; - patchBufferOffset(kernelArgInfo, nullptr, nullptr, rootDeviceIndex); + const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex]; + patchBufferOffset(kernelArgInfo, nullptr, nullptr); auto patchLocation = ptrOffset(crossThreadData, kernelArgInfo.kernelArgPatchInfoVector[0].crossthreadOffset); patchWithRequiredSize(patchLocation, kernelArgInfo.kernelArgPatchInfoVector[0].size, 0u); - if (requiresSshForBuffers(rootDeviceIndex)) { - auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap); + if (requiresSshForBuffers()) { + auto surfaceState = ptrOffset(getSurfaceStateHeap(), kernelArgInfo.offsetHeap); Buffer::setSurfaceState(&pClDevice->getDevice(), surfaceState, false, false, 0, nullptr, 0, nullptr, 0, 0, - getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext()); + kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext()); } return CL_SUCCESS; @@ -1551,7 +1532,7 @@ cl_int Kernel::setArgPipe(uint32_t argIndex, return CL_INVALID_MEM_OBJECT; } auto rootDeviceIndex = getDevice().getRootDeviceIndex(); - const auto &kernelArgInfo = getKernelInfo(rootDeviceIndex).kernelArgInfo[argIndex]; + const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex]; auto patchLocation = ptrOffset(crossThreadData, kernelArgInfo.kernelArgPatchInfoVector[0].crossthreadOffset); @@ -1561,12 +1542,12 @@ cl_int Kernel::setArgPipe(uint32_t argIndex, auto graphicsAllocation = pipe->getGraphicsAllocation(rootDeviceIndex); - if (requiresSshForBuffers(rootDeviceIndex)) { - auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap); + if (requiresSshForBuffers()) { + auto surfaceState = ptrOffset(getSurfaceStateHeap(), kernelArgInfo.offsetHeap); Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, false, false, pipe->getSize(), pipe->getCpuAddress(), 0, graphicsAllocation, 0, 0, - getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext()); + kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext()); } return CL_SUCCESS; @@ -1586,8 +1567,7 @@ cl_int Kernel::setArgImageWithMipLevel(uint32_t argIndex, const void *argVal, uint32_t mipLevel) { auto retVal = CL_INVALID_ARG_VALUE; auto rootDeviceIndex = getDevice().getRootDeviceIndex(); - auto &kernelInfo = getKernelInfo(rootDeviceIndex); - patchBufferOffset(kernelInfo.kernelArgInfo[argIndex], nullptr, nullptr, rootDeviceIndex); + patchBufferOffset(kernelInfo.kernelArgInfo[argIndex], nullptr, nullptr); uint32_t *crossThreadData = reinterpret_cast(this->crossThreadData); auto clMemObj = *(static_cast(argVal)); @@ -1603,7 +1583,7 @@ cl_int Kernel::setArgImageWithMipLevel(uint32_t argIndex, storeKernelArg(argIndex, IMAGE_OBJ, clMemObj, argVal, argSize); - auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap); + auto surfaceState = ptrOffset(getSurfaceStateHeap(), kernelArgInfo.offsetHeap); DEBUG_BREAK_IF(!kernelArgInfo.isImage); // Sets SS structure @@ -1654,9 +1634,7 @@ cl_int Kernel::setArgImmediate(uint32_t argIndex, if (argVal) { storeKernelArg(argIndex, NONE_OBJ, nullptr, nullptr, argSize); - auto pClDevice = &getDevice(); - auto rootDeviceIndex = pClDevice->getRootDeviceIndex(); - const auto &kernelArgInfo = getKernelInfo(rootDeviceIndex).kernelArgInfo[argIndex]; + const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex]; DEBUG_BREAK_IF(kernelArgInfo.kernelArgPatchInfoVector.size() <= 0); auto crossThreadDataEnd = ptrOffset(crossThreadData, crossThreadDataSize); @@ -1694,7 +1672,6 @@ cl_int Kernel::setArgSampler(uint32_t argIndex, uint32_t *crossThreadData = reinterpret_cast(this->crossThreadData); auto clSamplerObj = *(static_cast(argVal)); auto pSampler = castToObject(clSamplerObj); - auto rootDeviceIndex = getDevice().getRootDeviceIndex(); if (pSampler) { pSampler->incRefInternal(); @@ -1707,11 +1684,11 @@ cl_int Kernel::setArgSampler(uint32_t argIndex, } if (pSampler && argSize == sizeof(cl_sampler *)) { - const auto &kernelArgInfo = getKernelInfo(rootDeviceIndex).kernelArgInfo[argIndex]; + const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex]; storeKernelArg(argIndex, SAMPLER_OBJ, clSamplerObj, argVal, argSize); - auto dsh = getDynamicStateHeap(rootDeviceIndex); + auto dsh = getDynamicStateHeap(); auto samplerState = ptrOffset(dsh, kernelArgInfo.offsetHeap); pSampler->setArg(const_cast(samplerState), getProgram()->getDevices()[0]->getHardwareInfo()); @@ -1744,12 +1721,11 @@ cl_int Kernel::setArgAccelerator(uint32_t argIndex, DBG_LOG_INPUTS("setArgAccelerator cl_mem", clAcceleratorObj); const auto pAccelerator = castToObject(clAcceleratorObj); - auto rootDeviceIndex = getDevice().getRootDeviceIndex(); if (pAccelerator) { storeKernelArg(argIndex, ACCELERATOR_OBJ, clAcceleratorObj, argVal, argSize); - const auto &kernelArgInfo = getKernelInfo(rootDeviceIndex).kernelArgInfo[argIndex]; + const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex]; if (kernelArgInfo.samplerArgumentType == iOpenCL::SAMPLER_OBJECT_VME) { @@ -1795,11 +1771,10 @@ cl_int Kernel::setArgDevQueue(uint32_t argIndex, if (pDeviceQueue == nullptr) { return CL_INVALID_DEVICE_QUEUE; } - auto rootDeviceIndex = pDeviceQueue->getDevice().getRootDeviceIndex(); storeKernelArg(argIndex, DEVICE_QUEUE_OBJ, clDeviceQueue, argVal, argSize); - const auto &kernelArgPatchInfo = kernelInfos[rootDeviceIndex]->kernelArgInfo[argIndex].kernelArgPatchInfoVector[0]; + const auto &kernelArgPatchInfo = kernelInfo.kernelArgInfo[argIndex].kernelArgPatchInfoVector[0]; auto patchLocation = ptrOffset(reinterpret_cast(crossThreadData), kernelArgPatchInfo.crossthreadOffset); @@ -1830,7 +1805,6 @@ void Kernel::unsetArg(uint32_t argIndex) { void Kernel::createReflectionSurface() { auto pClDevice = program->getDevices()[0]; - auto rootDeviceIndex = pClDevice->getRootDeviceIndex(); if (this->isParentKernel && kernelReflectionSurface == nullptr) { auto &hwInfo = pClDevice->getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); @@ -1850,7 +1824,7 @@ void Kernel::createReflectionSurface() { size_t kernelReflectionSize = alignUp(sizeof(IGIL_KernelDataHeader) + blockCount * sizeof(IGIL_KernelAddressData), sizeof(void *)); uint32_t kernelDataOffset = static_cast(kernelReflectionSize); - uint32_t parentSSHAlignedSize = alignUp(this->kernelInfos[rootDeviceIndex]->heapInfo.SurfaceStateHeapSize, hwHelper.getBindingTableStateAlignement()); + uint32_t parentSSHAlignedSize = alignUp(this->kernelInfo.heapInfo.SurfaceStateHeapSize, hwHelper.getBindingTableStateAlignement()); uint32_t btOffset = parentSSHAlignedSize; for (uint32_t i = 0; i < blockCount; i++) { @@ -1977,8 +1951,8 @@ void Kernel::getParentObjectCounts(ObjectCounts &objectCount) { } } -bool Kernel::hasPrintfOutput(uint32_t rootDeviceIndex) const { - return getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.flags.usesPrintf; +bool Kernel::hasPrintfOutput() const { + return kernelInfo.kernelDescriptor.kernelAttributes.flags.usesPrintf; } size_t Kernel::getInstructionHeapSizeForExecutionModel() const { @@ -2394,39 +2368,36 @@ void Kernel::provideInitializationHints() { return; auto pClDevice = &getDevice(); - auto rootDeviceIndex = pClDevice->getRootDeviceIndex(); if (privateSurfaceSize) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, PRIVATE_MEMORY_USAGE_TOO_HIGH, - kernelInfos[rootDeviceIndex]->kernelDescriptor.kernelMetadata.kernelName.c_str(), + kernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str(), privateSurfaceSize); } - auto scratchSize = kernelInfos[rootDeviceIndex]->kernelDescriptor.kernelAttributes.perThreadScratchSize[0] * - pClDevice->getSharedDeviceInfo().computeUnitsUsedForScratch * getKernelInfo(rootDeviceIndex).getMaxSimdSize(); + auto scratchSize = kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[0] * + pClDevice->getSharedDeviceInfo().computeUnitsUsedForScratch * kernelInfo.getMaxSimdSize(); if (scratchSize > 0) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, REGISTER_PRESSURE_TOO_HIGH, - kernelInfos[rootDeviceIndex]->kernelDescriptor.kernelMetadata.kernelName.c_str(), scratchSize); + kernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str(), scratchSize); } } void Kernel::patchDefaultDeviceQueue(DeviceQueue *devQueue) { - auto rootDeviceIndex = devQueue->getDevice().getRootDeviceIndex(); - const auto &defaultQueueSurfaceAddress = kernelInfos[rootDeviceIndex]->kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress; + const auto &defaultQueueSurfaceAddress = kernelInfo.kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress; if (isValidOffset(defaultQueueSurfaceAddress.stateless) && crossThreadData) { auto patchLocation = ptrOffset(reinterpret_cast(crossThreadData), defaultQueueSurfaceAddress.stateless); patchWithRequiredSize(patchLocation, defaultQueueSurfaceAddress.pointerSize, static_cast(devQueue->getQueueBuffer()->getGpuAddressToPatch())); } if (isValidOffset(defaultQueueSurfaceAddress.bindful)) { - auto surfaceState = ptrOffset(reinterpret_cast(getSurfaceStateHeap(rootDeviceIndex)), defaultQueueSurfaceAddress.bindful); + auto surfaceState = ptrOffset(reinterpret_cast(getSurfaceStateHeap()), defaultQueueSurfaceAddress.bindful); Buffer::setSurfaceState(&devQueue->getDevice(), surfaceState, false, false, devQueue->getQueueBuffer()->getUnderlyingBufferSize(), (void *)devQueue->getQueueBuffer()->getGpuAddress(), 0, devQueue->getQueueBuffer(), 0, 0, - getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext()); + kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext()); } } void Kernel::patchEventPool(DeviceQueue *devQueue) { - auto rootDeviceIndex = devQueue->getDevice().getRootDeviceIndex(); - const auto &eventPoolSurfaceAddress = kernelInfos[rootDeviceIndex]->kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress; + const auto &eventPoolSurfaceAddress = kernelInfo.kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress; if (isValidOffset(eventPoolSurfaceAddress.stateless) && crossThreadData) { auto patchLocation = ptrOffset(reinterpret_cast(crossThreadData), eventPoolSurfaceAddress.stateless); @@ -2435,18 +2406,18 @@ void Kernel::patchEventPool(DeviceQueue *devQueue) { } if (isValidOffset(eventPoolSurfaceAddress.bindful)) { - auto surfaceState = ptrOffset(reinterpret_cast(getSurfaceStateHeap(rootDeviceIndex)), eventPoolSurfaceAddress.bindful); + auto surfaceState = ptrOffset(reinterpret_cast(getSurfaceStateHeap()), eventPoolSurfaceAddress.bindful); auto eventPoolBuffer = devQueue->getEventPoolBuffer(); Buffer::setSurfaceState(&devQueue->getDevice(), surfaceState, false, false, eventPoolBuffer->getUnderlyingBufferSize(), (void *)eventPoolBuffer->getGpuAddress(), 0, eventPoolBuffer, 0, 0, - getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext()); + kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext()); } } -void Kernel::patchBlocksSimdSize(uint32_t rootDeviceIndex) { +void Kernel::patchBlocksSimdSize() { BlockKernelManager *blockManager = program->getBlockKernelManager(); - for (auto &idOffset : kernelInfos[rootDeviceIndex]->childrenKernelsIdOffset) { + for (auto &idOffset : kernelInfo.childrenKernelsIdOffset) { DEBUG_BREAK_IF(!(idOffset.first < static_cast(blockManager->getCount()))); @@ -2456,35 +2427,34 @@ void Kernel::patchBlocksSimdSize(uint32_t rootDeviceIndex) { } } -bool Kernel::usesSyncBuffer(uint32_t rootDeviceIndex) { - return getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.flags.usesSyncBuffer; +bool Kernel::usesSyncBuffer() { + return kernelInfo.kernelDescriptor.kernelAttributes.flags.usesSyncBuffer; } void Kernel::patchSyncBuffer(Device &device, GraphicsAllocation *gfxAllocation, size_t bufferOffset) { - auto rootDeviceIndex = device.getRootDeviceIndex(); - const auto &syncBuffer = kernelInfos[rootDeviceIndex]->kernelDescriptor.payloadMappings.implicitArgs.syncBufferAddress; + const auto &syncBuffer = kernelInfo.kernelDescriptor.payloadMappings.implicitArgs.syncBufferAddress; auto bufferPatchAddress = ptrOffset(crossThreadData, syncBuffer.stateless); patchWithRequiredSize(bufferPatchAddress, syncBuffer.pointerSize, ptrOffset(gfxAllocation->getGpuAddressToPatch(), bufferOffset)); if (isValidOffset(syncBuffer.bindful)) { - auto surfaceState = ptrOffset(reinterpret_cast(getSurfaceStateHeap(rootDeviceIndex)), syncBuffer.bindful); + auto surfaceState = ptrOffset(reinterpret_cast(getSurfaceStateHeap()), syncBuffer.bindful); auto addressToPatch = gfxAllocation->getUnderlyingBuffer(); auto sizeToPatch = gfxAllocation->getUnderlyingBufferSize(); Buffer::setSurfaceState(&device, surfaceState, false, false, sizeToPatch, addressToPatch, 0, gfxAllocation, 0, 0, - getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext()); + kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext()); } } template void Kernel::patchReflectionSurface(DeviceQueue *, PrintfHandler *); bool Kernel::isPatched() const { - return patchedArgumentsNum == getDefaultKernelInfo().argumentsToPatchNum; + return patchedArgumentsNum == kernelInfo.argumentsToPatchNum; } cl_int Kernel::checkCorrectImageAccessQualifier(cl_uint argIndex, size_t argSize, const void *argValue) const { - auto &defaultKernelInfo = getDefaultKernelInfo(); + auto &defaultKernelInfo = kernelInfo; if (defaultKernelInfo.kernelArgInfo[argIndex].isImage) { cl_mem mem = *(static_cast(argValue)); MemObj *pMemObj = nullptr; @@ -2508,7 +2478,7 @@ void Kernel::resolveArgs() { return; bool canTransformImageTo2dArray = true; for (uint32_t i = 0; i < patchedArgumentsNum; i++) { - if (getDefaultKernelInfo().kernelArgInfo.at(i).isSampler) { + if (kernelInfo.kernelArgInfo.at(i).isSampler) { auto sampler = castToObject(kernelArguments.at(i).object); if (sampler->isTransformable()) { canTransformImageTo2dArray = true; @@ -2519,14 +2489,10 @@ void Kernel::resolveArgs() { } } - auto pClDevice = &getDevice(); - auto rootDeviceIndex = pClDevice->getRootDeviceIndex(); - - auto pKernelInfo = kernelInfos[rootDeviceIndex]; if (canTransformImageTo2dArray) { - imageTransformer->transformImagesTo2dArray(*pKernelInfo, kernelArguments, getSurfaceStateHeap(rootDeviceIndex)); + imageTransformer->transformImagesTo2dArray(kernelInfo, kernelArguments, getSurfaceStateHeap()); } else if (imageTransformer->didTransform()) { - imageTransformer->transformImagesTo3d(*pKernelInfo, kernelArguments, getSurfaceStateHeap(rootDeviceIndex)); + imageTransformer->transformImagesTo3d(kernelInfo, kernelArguments, getSurfaceStateHeap()); } } @@ -2535,9 +2501,8 @@ bool Kernel::canTransformImages() const { return renderCoreFamily >= IGFX_GEN9_CORE && renderCoreFamily <= IGFX_GEN11LP_CORE; } -void Kernel::fillWithKernelObjsForAuxTranslation(KernelObjsForAuxTranslation &kernelObjsForAuxTranslation, uint32_t rootDeviceIndex) { +void Kernel::fillWithKernelObjsForAuxTranslation(KernelObjsForAuxTranslation &kernelObjsForAuxTranslation) { kernelObjsForAuxTranslation.reserve(getKernelArgsNumber()); - auto &kernelInfo = getKernelInfo(rootDeviceIndex); for (uint32_t i = 0; i < getKernelArgsNumber(); i++) { if (BUFFER_OBJ == kernelArguments.at(i).type && !kernelInfo.kernelArgInfo.at(i).pureStatefulBufferAccess) { auto buffer = castToObject(getKernelArg(i)); @@ -2566,13 +2531,13 @@ void Kernel::fillWithKernelObjsForAuxTranslation(KernelObjsForAuxTranslation &ke bool Kernel::hasDirectStatelessAccessToHostMemory() const { for (uint32_t i = 0; i < getKernelArgsNumber(); i++) { - if (BUFFER_OBJ == kernelArguments.at(i).type && !getDefaultKernelInfo().kernelArgInfo.at(i).pureStatefulBufferAccess) { + if (BUFFER_OBJ == kernelArguments.at(i).type && !kernelInfo.kernelArgInfo.at(i).pureStatefulBufferAccess) { auto buffer = castToObject(getKernelArg(i)); if (buffer && buffer->getMultiGraphicsAllocation().getAllocationType() == GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY) { return true; } } - if (SVM_ALLOC_OBJ == kernelArguments.at(i).type && !getDefaultKernelInfo().kernelArgInfo.at(i).pureStatefulBufferAccess) { + if (SVM_ALLOC_OBJ == kernelArguments.at(i).type && !kernelInfo.kernelArgInfo.at(i).pureStatefulBufferAccess) { auto svmAlloc = reinterpret_cast(getKernelArg(i)); if (svmAlloc && svmAlloc->getAllocationType() == GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY) { return true; @@ -2583,7 +2548,7 @@ bool Kernel::hasDirectStatelessAccessToHostMemory() const { } bool Kernel::hasIndirectStatelessAccessToHostMemory() const { - if (!getDefaultKernelInfo().hasIndirectStatelessAccess) { + if (!kernelInfo.hasIndirectStatelessAccess) { return false; } @@ -2600,8 +2565,8 @@ bool Kernel::hasIndirectStatelessAccessToHostMemory() const { return false; } -void Kernel::getAllocationsForCacheFlush(CacheFlushAllocationsVec &out, uint32_t rootDeviceIndex) const { - if (false == HwHelper::cacheFlushAfterWalkerSupported(getHardwareInfo(rootDeviceIndex))) { +void Kernel::getAllocationsForCacheFlush(CacheFlushAllocationsVec &out) const { + if (false == HwHelper::cacheFlushAfterWalkerSupported(getHardwareInfo())) { return; } for (GraphicsAllocation *alloc : this->kernelArgRequiresCacheFlush) { @@ -2612,6 +2577,7 @@ void Kernel::getAllocationsForCacheFlush(CacheFlushAllocationsVec &out, uint32_t out.push_back(alloc); } + auto rootDeviceIndex = getDevice().getRootDeviceIndex(); auto global = getProgram()->getGlobalSurface(rootDeviceIndex); if (global != nullptr) { out.push_back(global); @@ -2654,33 +2620,30 @@ bool Kernel::checkIfIsParentKernelAndBlocksUsesPrintf() { uint64_t Kernel::getKernelStartOffset( const bool localIdsGenerationByRuntime, const bool kernelUsesLocalIds, - const bool isCssUsed, - uint32_t rootDeviceIndex) const { + const bool isCssUsed) const { uint64_t kernelStartOffset = 0; - if (kernelInfos[rootDeviceIndex]->getGraphicsAllocation()) { - kernelStartOffset = kernelInfos[rootDeviceIndex]->getGraphicsAllocation()->getGpuAddressToPatch(); + if (kernelInfo.getGraphicsAllocation()) { + kernelStartOffset = kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch(); if (localIdsGenerationByRuntime == false && kernelUsesLocalIds == true) { - kernelStartOffset += kernelInfos[rootDeviceIndex]->kernelDescriptor.entryPoints.skipPerThreadDataLoad; + kernelStartOffset += kernelInfo.kernelDescriptor.entryPoints.skipPerThreadDataLoad; } } kernelStartOffset += getStartOffset(); - auto &hardwareInfo = getHardwareInfo(rootDeviceIndex); + auto &hardwareInfo = getHardwareInfo(); auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); if (isCssUsed && hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)) { - kernelStartOffset += kernelInfos[rootDeviceIndex]->kernelDescriptor.entryPoints.skipSetFFIDGP; + kernelStartOffset += kernelInfo.kernelDescriptor.entryPoints.skipSetFFIDGP; } return kernelStartOffset; } void Kernel::patchBindlessSurfaceStateOffsets(const Device &device, const size_t sshOffset) { - auto rootDeviceIndex = device.getRootDeviceIndex(); - auto &kernelInfo = *kernelInfos[rootDeviceIndex]; const bool bindlessUsed = kernelInfo.kernelDescriptor.kernelAttributes.bufferAddressingMode == KernelDescriptor::AddressingMode::BindlessAndStateless; if (bindlessUsed) { @@ -2710,9 +2673,10 @@ uint32_t Kernel::getAdditionalKernelExecInfo() const { return this->additionalKernelExecInfo; } -bool Kernel::requiresWaDisableRccRhwoOptimization(uint32_t rootDeviceIndex) const { - auto &hardwareInfo = getHardwareInfo(rootDeviceIndex); +bool Kernel::requiresWaDisableRccRhwoOptimization() const { + auto &hardwareInfo = getHardwareInfo(); auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); + auto rootDeviceIndex = getDevice().getRootDeviceIndex(); if (hwHelper.isWaDisableRccRhwoOptimizationRequired() && isUsingSharedObjArgs()) { for (auto &arg : getKernelArguments()) { @@ -2731,13 +2695,10 @@ bool Kernel::requiresWaDisableRccRhwoOptimization(uint32_t rootDeviceIndex) cons return false; } -const HardwareInfo &Kernel::getHardwareInfo(uint32_t rootDeviceIndex) const { - return *executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo(); +const HardwareInfo &Kernel::getHardwareInfo() const { + return getDevice().getHardwareInfo(); } -const KernelInfo &Kernel::getDefaultKernelInfo() const { - return *kernelInfos[defaultRootDeviceIndex]; -} void Kernel::setGlobalWorkOffsetValues(uint32_t globalWorkOffsetX, uint32_t globalWorkOffsetY, uint32_t globalWorkOffsetZ) { *this->globalWorkOffsetX = globalWorkOffsetX; *this->globalWorkOffsetY = globalWorkOffsetY; diff --git a/opencl/source/kernel/kernel.h b/opencl/source/kernel/kernel.h index 92b6c48b5f..1186d49e4f 100644 --- a/opencl/source/kernel/kernel.h +++ b/opencl/source/kernel/kernel.h @@ -88,11 +88,11 @@ class Kernel : public ReferenceTrackedObject { const void *argVal); template - static kernel_t *create(program_t *program, const KernelInfoContainer &kernelInfos, ClDevice &clDevice, cl_int *errcodeRet) { + static kernel_t *create(program_t *program, const KernelInfo &kernelInfo, ClDevice &clDevice, cl_int *errcodeRet) { cl_int retVal; kernel_t *pKernel = nullptr; - pKernel = new kernel_t(program, kernelInfos, clDevice); + pKernel = new kernel_t(program, kernelInfo, clDevice); retVal = pKernel->initialize(); if (retVal != CL_SUCCESS) { @@ -107,7 +107,7 @@ class Kernel : public ReferenceTrackedObject { if (FileLoggerInstance().enabled()) { std::string source; program->getSource(source); - FileLoggerInstance().dumpKernel(kernelInfos[program->getDevices()[0]->getRootDeviceIndex()]->kernelDescriptor.kernelMetadata.kernelName, source); + FileLoggerInstance().dumpKernel(kernelInfo.kernelDescriptor.kernelMetadata.kernelName, source); } return pKernel; @@ -152,7 +152,7 @@ class Kernel : public ReferenceTrackedObject { cl_int getInfo(cl_kernel_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const; void getAdditionalInfo(cl_kernel_info paramName, const void *¶mValue, size_t ¶mValueSizeRet) const; - void getAdditionalWorkGroupInfo(cl_kernel_work_group_info paramName, const void *¶mValue, size_t ¶mValueSizeRet, uint32_t rootDeviceIndex) const; + void getAdditionalWorkGroupInfo(cl_kernel_work_group_info paramName, const void *¶mValue, size_t ¶mValueSizeRet) const; cl_int getArgInfo(cl_uint argIndx, cl_kernel_arg_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const; @@ -165,13 +165,13 @@ class Kernel : public ReferenceTrackedObject { size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const; - const void *getKernelHeap(uint32_t rootDeviceIndex) const; - void *getSurfaceStateHeap(uint32_t rootDeviceIndex) const; - const void *getDynamicStateHeap(uint32_t rootDeviceIndex) const; + const void *getKernelHeap() const; + void *getSurfaceStateHeap() const; + const void *getDynamicStateHeap() const; - size_t getKernelHeapSize(uint32_t rootDeviceIndex) const; - size_t getSurfaceStateHeapSize(uint32_t rootDeviceIndex) const; - size_t getDynamicStateHeapSize(uint32_t rootDeviceIndex) const; + size_t getKernelHeapSize() const; + size_t getSurfaceStateHeapSize() const; + size_t getDynamicStateHeapSize() const; size_t getNumberOfBindingTableStates() const; size_t getBindingTableOffset() const { return localBindingTableOffset; @@ -180,9 +180,9 @@ class Kernel : public ReferenceTrackedObject { void resizeSurfaceStateHeap(void *pNewSsh, size_t newSshSize, size_t newBindingTableCount, size_t newBindingTableOffset); void substituteKernelHeap(const Device &device, void *newKernelHeap, size_t newKernelHeapSize); - bool isKernelHeapSubstituted(uint32_t rootDeviceIndex) const; - uint64_t getKernelId(uint32_t rootDeviceIndex) const; - void setKernelId(uint32_t rootDeviceIndex, uint64_t newKernelId); + bool isKernelHeapSubstituted() const; + uint64_t getKernelId() const; + void setKernelId(uint64_t newKernelId); uint32_t getStartOffset() const; void setStartOffset(uint32_t offset); @@ -194,15 +194,12 @@ class Kernel : public ReferenceTrackedObject { return kernelArguments.size(); } - bool requiresSshForBuffers(uint32_t rootDeviceIndex) const { - return getKernelInfo(rootDeviceIndex).requiresSshForBuffers; + bool requiresSshForBuffers() const { + return kernelInfo.requiresSshForBuffers; } - const KernelInfo &getKernelInfo(uint32_t rootDeviceIndex) const { - return *kernelInfos[rootDeviceIndex]; - } - const KernelInfoContainer &getKernelInfos() const { - return kernelInfos; + const KernelInfo &getKernelInfo() const { + return kernelInfo; } Context &getContext() const { @@ -211,12 +208,12 @@ class Kernel : public ReferenceTrackedObject { Program *getProgram() const { return program; } - uint32_t getScratchSize(uint32_t rootDeviceIndex) { - return getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.perThreadScratchSize[0]; + uint32_t getScratchSize() { + return kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[0]; } - uint32_t getPrivateScratchSize(uint32_t rootDeviceIndex) { - return getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.perThreadScratchSize[1]; + uint32_t getPrivateScratchSize() { + return kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[1]; } void createReflectionSurface(); @@ -225,8 +222,8 @@ class Kernel : public ReferenceTrackedObject { void patchDefaultDeviceQueue(DeviceQueue *devQueue); void patchEventPool(DeviceQueue *devQueue); - void patchBlocksSimdSize(uint32_t rootDeviceIndex); - bool usesSyncBuffer(uint32_t rootDeviceIndex); + void patchBlocksSimdSize(); + bool usesSyncBuffer(); void patchSyncBuffer(Device &device, GraphicsAllocation *gfxAllocation, size_t bufferOffset); void patchBindlessSurfaceStateOffsets(const Device &device, const size_t sshOffset); @@ -295,7 +292,7 @@ class Kernel : public ReferenceTrackedObject { const SimpleKernelArgInfo &getKernelArgInfo(uint32_t argIndex) const; bool getAllowNonUniform() const { return program->getAllowNonUniform(); } - bool isVmeKernel() const { return getDefaultKernelInfo().isVmeWorkload; } + bool isVmeKernel() const { return kernelInfo.isVmeWorkload; } bool requiresSpecialPipelineSelectMode() const { return specialPipelineSelectMode; } void performKernelTunning(CommandStreamReceiver &commandStreamReceiver, const Vec3 &lws, const Vec3 &gws, const Vec3 &offsets, TimestampPacketContainer *timestampContainer); @@ -309,7 +306,7 @@ class Kernel : public ReferenceTrackedObject { bool isUsingSharedObjArgs() const { return usingSharedObjArgs; } bool hasUncacheableStatelessArgs() const { return statelessUncacheableArgsCount > 0; } - bool hasPrintfOutput(uint32_t rootDeviceIndex) const; + bool hasPrintfOutput() const; void setReflectionSurfaceBlockBtOffset(uint32_t blockID, uint32_t offset); @@ -334,12 +331,12 @@ class Kernel : public ReferenceTrackedObject { bool checkIfIsParentKernelAndBlocksUsesPrintf(); - bool is32Bit(uint32_t rootDeviceIndex) const { - return getKernelInfo(rootDeviceIndex).gpuPointerSize == 4; + bool is32Bit() const { + return kernelInfo.gpuPointerSize == 4; } - size_t getPerThreadSystemThreadSurfaceSize(uint32_t rootDeviceIndex) const { - return getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.perThreadSystemThreadSurfaceSize; + size_t getPerThreadSystemThreadSurfaceSize() const { + return kernelInfo.kernelDescriptor.kernelAttributes.perThreadSystemThreadSurfaceSize; } std::vector &getPatchInfoDataList() { return patchInfoDataList; }; @@ -347,12 +344,12 @@ class Kernel : public ReferenceTrackedObject { return usingImagesOnly; } - void fillWithKernelObjsForAuxTranslation(KernelObjsForAuxTranslation &kernelObjsForAuxTranslation, uint32_t rootDeviceIndex); + void fillWithKernelObjsForAuxTranslation(KernelObjsForAuxTranslation &kernelObjsForAuxTranslation); MOCKABLE_VIRTUAL bool requiresCacheFlushCommand(const CommandQueue &commandQueue) const; using CacheFlushAllocationsVec = StackVec; - void getAllocationsForCacheFlush(CacheFlushAllocationsVec &out, uint32_t rootDeviceIndex) const; + void getAllocationsForCacheFlush(CacheFlushAllocationsVec &out) const; void setAuxTranslationDirection(AuxTranslationDirection auxTranslationDirection) { this->auxTranslationDirection = auxTranslationDirection; @@ -377,20 +374,18 @@ class Kernel : public ReferenceTrackedObject { uint64_t getKernelStartOffset( const bool localIdsGenerationByRuntime, const bool kernelUsesLocalIds, - const bool isCssUsed, - uint32_t rootDeviceIndex) const; + const bool isCssUsed) const; - bool requiresPerDssBackedBuffer(uint32_t rootDeviceIndex) const; - bool requiresLimitedWorkgroupSize(uint32_t rootDeviceIndex) const; + bool requiresPerDssBackedBuffer() const; + bool requiresLimitedWorkgroupSize() const; bool isKernelDebugEnabled() const { return debugEnabled; } int32_t setAdditionalKernelExecInfoWithParam(uint32_t paramName, size_t paramValueSize, const void *paramValue); void setAdditionalKernelExecInfo(uint32_t additionalKernelExecInfo); uint32_t getAdditionalKernelExecInfo() const; - MOCKABLE_VIRTUAL bool requiresWaDisableRccRhwoOptimization(uint32_t rootDeviceIndex) const; + MOCKABLE_VIRTUAL bool requiresWaDisableRccRhwoOptimization() const; const ClDeviceVector &getDevices() const { return program->getDevices(); } - const KernelInfo &getDefaultKernelInfo() const; void setGlobalWorkOffsetValues(uint32_t globalWorkOffsetX, uint32_t globalWorkOffsetY, uint32_t globalWorkOffsetZ); void setGlobalWorkSizeValues(uint32_t globalWorkSizeX, uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ); @@ -482,7 +477,7 @@ class Kernel : public ReferenceTrackedObject { void makeArgsResident(CommandStreamReceiver &commandStreamReceiver); - void *patchBufferOffset(const KernelArgInfo &argInfo, void *svmPtr, GraphicsAllocation *svmAlloc, uint32_t rootDeviceIndex); + void *patchBufferOffset(const KernelArgInfo &argInfo, void *svmPtr, GraphicsAllocation *svmAlloc); void patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, GraphicsAllocation &allocation, const Device &device, const ArgDescPointer &arg); // Sets-up both crossThreadData and ssh for given implicit (private/constant, etc.) allocation @@ -490,21 +485,21 @@ class Kernel : public ReferenceTrackedObject { void patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, GraphicsAllocation &allocation, const Device &device, const PatchTokenT &patch); void getParentObjectCounts(ObjectCounts &objectCount); - Kernel(Program *programArg, const KernelInfoContainer &kernelInfsoArg, ClDevice &clDevice, bool schedulerKernel = false); + Kernel(Program *programArg, const KernelInfo &kernelInfo, ClDevice &clDevice, bool schedulerKernel = false); void provideInitializationHints(); void patchBlocksCurbeWithConstantValues(); void resolveArgs(); - void reconfigureKernel(uint32_t rootDeviceIndex); + void reconfigureKernel(); bool hasDirectStatelessAccessToHostMemory() const; bool hasIndirectStatelessAccessToHostMemory() const; void addAllocationToCacheFlushVector(uint32_t argIndex, GraphicsAllocation *argAllocation); bool allocationForCacheFlush(GraphicsAllocation *argAllocation) const; - const HardwareInfo &getHardwareInfo(uint32_t rootDeviceIndex) const; + const HardwareInfo &getHardwareInfo() const; const ClDevice &getDevice() const { return clDevice; @@ -514,7 +509,7 @@ class Kernel : public ReferenceTrackedObject { Program *program; ClDevice &clDevice; const ClDeviceVector &deviceVector; - const KernelInfoContainer kernelInfos; + const KernelInfo &kernelInfo; std::vector kernelArguments; std::vector kernelArgHandlers; diff --git a/opencl/source/kernel/kernel.inl b/opencl/source/kernel/kernel.inl index dd6ea0dede..5796ebab51 100644 --- a/opencl/source/kernel/kernel.inl +++ b/opencl/source/kernel/kernel.inl @@ -18,8 +18,6 @@ void Kernel::patchReflectionSurface(DeviceQueue *devQueue, PrintfHandler *printf BlockKernelManager *blockManager = program->getBlockKernelManager(); uint32_t blockCount = static_cast(blockManager->getCount()); - auto rootDeviceIndex = devQueue->getDevice().getRootDeviceIndex(); - auto &kernelInfo = *kernelInfos[rootDeviceIndex]; for (uint32_t i = 0; i < blockCount; i++) { const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); diff --git a/opencl/source/kernel/kernel_extra.cpp b/opencl/source/kernel/kernel_extra.cpp index 686d7552f6..eaaf2adfb7 100644 --- a/opencl/source/kernel/kernel_extra.cpp +++ b/opencl/source/kernel/kernel_extra.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2020 Intel Corporation + * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -13,7 +13,7 @@ namespace NEO { bool Kernel::requiresCacheFlushCommand(const CommandQueue &commandQueue) const { return false; } -void Kernel::reconfigureKernel(uint32_t rootDeviceIndex) { +void Kernel::reconfigureKernel() { } int Kernel::setKernelThreadArbitrationPolicy(uint32_t policy) { if (policy == CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_ROUND_ROBIN_INTEL) { @@ -29,11 +29,11 @@ int Kernel::setKernelThreadArbitrationPolicy(uint32_t policy) { return CL_SUCCESS; } -bool Kernel::requiresPerDssBackedBuffer(uint32_t rootDeviceIndex) const { +bool Kernel::requiresPerDssBackedBuffer() const { return DebugManager.flags.ForcePerDssBackedBufferProgramming.get(); } -bool Kernel::requiresLimitedWorkgroupSize(uint32_t rootDeviceIndex) const { +bool Kernel::requiresLimitedWorkgroupSize() const { return this->isBuiltIn; } diff --git a/opencl/source/kernel/multi_device_kernel.cpp b/opencl/source/kernel/multi_device_kernel.cpp index 75c5dd051d..af60e4a695 100644 --- a/opencl/source/kernel/multi_device_kernel.cpp +++ b/opencl/source/kernel/multi_device_kernel.cpp @@ -24,10 +24,10 @@ Kernel *MultiDeviceKernel::determineDefaultKernel(KernelVectorType &kernelVector } return nullptr; } -MultiDeviceKernel::MultiDeviceKernel(KernelVectorType kernelVector) : kernels(std::move(kernelVector)), - defaultKernel(MultiDeviceKernel::determineDefaultKernel(kernels)), - program(defaultKernel->getProgram()), - kernelInfos(defaultKernel->getKernelInfos()) { +MultiDeviceKernel::MultiDeviceKernel(KernelVectorType kernelVector, const KernelInfoContainer kernelInfosArg) : kernels(std::move(kernelVector)), + defaultKernel(MultiDeviceKernel::determineDefaultKernel(kernels)), + program(defaultKernel->getProgram()), + kernelInfos(kernelInfosArg) { for (auto &pKernel : kernels) { if (pKernel) { pKernel->incRefInternal(); diff --git a/opencl/source/kernel/multi_device_kernel.h b/opencl/source/kernel/multi_device_kernel.h index 9376aefa9d..f2469abc60 100644 --- a/opencl/source/kernel/multi_device_kernel.h +++ b/opencl/source/kernel/multi_device_kernel.h @@ -21,7 +21,7 @@ class MultiDeviceKernel : public BaseObject<_cl_kernel> { static const cl_ulong objectMagic = 0x3284ADC8EA0AFE25LL; ~MultiDeviceKernel() override; - MultiDeviceKernel(KernelVectorType kernelVector); + MultiDeviceKernel(KernelVectorType kernelVector, const KernelInfoContainer kernelInfosArg); Kernel *getKernel(uint32_t rootDeviceIndex) const { return kernels[rootDeviceIndex]; } Kernel *getDefaultKernel() const { return defaultKernel; } @@ -36,9 +36,9 @@ class MultiDeviceKernel : public BaseObject<_cl_kernel> { if (kernels[rootDeviceIndex]) { continue; } - kernels[rootDeviceIndex] = Kernel::create(program, kernelInfos, *pDevice, errcodeRet); + kernels[rootDeviceIndex] = Kernel::create(program, *kernelInfos[rootDeviceIndex], *pDevice, errcodeRet); } - auto pMultiDeviceKernel = new multi_device_kernel_t(std::move(kernels)); + auto pMultiDeviceKernel = new multi_device_kernel_t(std::move(kernels), kernelInfos); return pMultiDeviceKernel; } @@ -93,7 +93,7 @@ class MultiDeviceKernel : public BaseObject<_cl_kernel> { KernelVectorType kernels; Kernel *defaultKernel = nullptr; Program *program = nullptr; - const KernelInfoContainer &kernelInfos; + const KernelInfoContainer kernelInfos; }; } // namespace NEO diff --git a/opencl/source/program/kernel_info.cpp b/opencl/source/program/kernel_info.cpp index 8708996a87..d735de3e02 100644 --- a/opencl/source/program/kernel_info.cpp +++ b/opencl/source/program/kernel_info.cpp @@ -133,8 +133,7 @@ WorkSizeInfo::WorkSizeInfo(uint32_t maxWorkGroupSize, bool hasBarriers, uint32_t } WorkSizeInfo::WorkSizeInfo(const DispatchInfo &dispatchInfo) { auto &device = dispatchInfo.getClDevice(); - auto rootDeviceIndex = device.getRootDeviceIndex(); - const auto &kernelInfo = dispatchInfo.getKernel()->getKernelInfo(rootDeviceIndex); + const auto &kernelInfo = dispatchInfo.getKernel()->getKernelInfo(); this->maxWorkGroupSize = dispatchInfo.getKernel()->getMaxKernelWorkGroupSize(); this->hasBarriers = kernelInfo.kernelDescriptor.kernelAttributes.usesBarriers(); this->simdSize = static_cast(kernelInfo.getMaxSimdSize()); diff --git a/opencl/source/program/printf_handler.cpp b/opencl/source/program/printf_handler.cpp index ce3d45b0f8..c643c1b662 100644 --- a/opencl/source/program/printf_handler.cpp +++ b/opencl/source/program/printf_handler.cpp @@ -58,15 +58,15 @@ void PrintfHandler::prepareDispatch(const MultiDispatchInfo &multiDispatchInfo) device.getDevice(), printfSurface, 0, &printfSurfaceInitialDataSize, sizeof(printfSurfaceInitialDataSize)); - const auto &printfSurfaceArg = kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.payloadMappings.implicitArgs.printfSurfaceAddress; + const auto &printfSurfaceArg = kernel->getKernelInfo().kernelDescriptor.payloadMappings.implicitArgs.printfSurfaceAddress; auto printfPatchAddress = ptrOffset(reinterpret_cast(kernel->getCrossThreadData()), printfSurfaceArg.stateless); patchWithRequiredSize(printfPatchAddress, printfSurfaceArg.pointerSize, (uintptr_t)printfSurface->getGpuAddressToPatch()); if (isValidOffset(printfSurfaceArg.bindful)) { - auto surfaceState = ptrOffset(reinterpret_cast(kernel->getSurfaceStateHeap(rootDeviceIndex)), printfSurfaceArg.bindful); + auto surfaceState = ptrOffset(reinterpret_cast(kernel->getSurfaceStateHeap()), printfSurfaceArg.bindful); void *addressToPatch = printfSurface->getUnderlyingBuffer(); size_t sizeToPatch = printfSurface->getUnderlyingBufferSize(); Buffer::setSurfaceState(&device.getDevice(), surfaceState, false, false, sizeToPatch, addressToPatch, 0, printfSurface, 0, 0, - kernel->getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, + kernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, kernel->getTotalNumDevicesInContext()); } } @@ -76,9 +76,8 @@ void PrintfHandler::makeResident(CommandStreamReceiver &commandStreamReceiver) { } void PrintfHandler::printEnqueueOutput() { - auto rootDeviceIndex = device.getRootDeviceIndex(); PrintFormatter printFormatter(reinterpret_cast(printfSurface->getUnderlyingBuffer()), static_cast(printfSurface->getUnderlyingBufferSize()), - kernel->is32Bit(rootDeviceIndex), kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.printfStringsMap); + kernel->is32Bit(), kernel->getKernelInfo().kernelDescriptor.kernelMetadata.printfStringsMap); printFormatter.printKernelOutput(); } } // namespace NEO diff --git a/opencl/source/scheduler/scheduler_kernel.h b/opencl/source/scheduler/scheduler_kernel.h index 5524d4cf23..6de0470ff0 100644 --- a/opencl/source/scheduler/scheduler_kernel.h +++ b/opencl/source/scheduler/scheduler_kernel.h @@ -34,14 +34,13 @@ class SchedulerKernel : public Kernel { } size_t getCurbeSize() { - auto &defaultKernelInfo = getDefaultKernelInfo(); - size_t crossTrheadDataSize = defaultKernelInfo.kernelDescriptor.kernelAttributes.crossThreadDataSize; - size_t dshSize = defaultKernelInfo.heapInfo.DynamicStateHeapSize; + size_t crossThreadDataSize = kernelInfo.kernelDescriptor.kernelAttributes.crossThreadDataSize; + size_t dshSize = kernelInfo.heapInfo.DynamicStateHeapSize; - crossTrheadDataSize = alignUp(crossTrheadDataSize, 64); + crossThreadDataSize = alignUp(crossThreadDataSize, 64); dshSize = alignUp(dshSize, 64); - return alignUp(SCHEDULER_DYNAMIC_PAYLOAD_SIZE, 64) + crossTrheadDataSize + dshSize; + return alignUp(SCHEDULER_DYNAMIC_PAYLOAD_SIZE, 64) + crossThreadDataSize + dshSize; } void setArgs(GraphicsAllocation *queue, @@ -56,7 +55,7 @@ class SchedulerKernel : public Kernel { static BuiltinCode loadSchedulerKernel(Device *device); protected: - SchedulerKernel(Program *programArg, const KernelInfoContainer &kernelInfosArg, ClDevice &clDeviceArg) : Kernel(programArg, kernelInfosArg, clDeviceArg, true) { + SchedulerKernel(Program *programArg, const KernelInfo &kernelInfoArg, ClDevice &clDeviceArg) : Kernel(programArg, kernelInfoArg, clDeviceArg, true) { computeGws(); }; diff --git a/opencl/source/utilities/logger.cpp b/opencl/source/utilities/logger.cpp index a4469e8c21..ac3177f00f 100644 --- a/opencl/source/utilities/logger.cpp +++ b/opencl/source/utilities/logger.cpp @@ -171,7 +171,7 @@ void FileLogger::dumpKernelArgs(const Kernel *kernel) { if (dumpKernelArgsEnabled && kernel != nullptr) { std::unique_lock theLock(mtx); std::ofstream outFile; - const auto &kernelInfo = kernel->getDefaultKernelInfo(); + const auto &kernelInfo = kernel->getKernelInfo(); for (unsigned int i = 0; i < kernelInfo.kernelArgInfo.size(); i++) { std::string type; std::string fileName; diff --git a/opencl/test/unit_test/accelerators/media_image_arg_tests.cpp b/opencl/test/unit_test/accelerators/media_image_arg_tests.cpp index e6d9f5c1a3..e0643b143a 100644 --- a/opencl/test/unit_test/accelerators/media_image_arg_tests.cpp +++ b/opencl/test/unit_test/accelerators/media_image_arg_tests.cpp @@ -86,7 +86,7 @@ HWTEST_F(MediaImageSetArgTest, WhenSettingMediaImageArgThenArgsSetCorrectly) { typedef typename FamilyType::MEDIA_SURFACE_STATE MEDIA_SURFACE_STATE; auto pSurfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), + ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); srcImage->setMediaImageArg(const_cast(pSurfaceState), pClDevice->getRootDeviceIndex()); @@ -114,7 +114,7 @@ HWTEST_F(MediaImageSetArgTest, WhenSettingKernelArgImageThenArgsSetCorrectly) { ASSERT_EQ(CL_SUCCESS, retVal); auto pSurfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), + ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); uint64_t surfaceAddress = pSurfaceState->getSurfaceBaseAddress(); diff --git a/opencl/test/unit_test/api/cl_api_tests.cpp b/opencl/test/unit_test/api/cl_api_tests.cpp index 2dd7c633dc..e88bbe7bcb 100644 --- a/opencl/test/unit_test/api/cl_api_tests.cpp +++ b/opencl/test/unit_test/api/cl_api_tests.cpp @@ -35,7 +35,7 @@ void api_fixture_using_aligned_memory_manager::SetUp() { program = new MockProgram(ctxPtr, false, toClDeviceVector(*device)); Program *prgPtr = reinterpret_cast(program); - kernel = new MockKernel(prgPtr, MockKernel::toKernelInfoContainer(program->mockKernelInfo, 0), *device); + kernel = new MockKernel(prgPtr, program->mockKernelInfo, *device); ASSERT_NE(nullptr, kernel); } diff --git a/opencl/test/unit_test/api/cl_create_program_with_built_in_kernels_tests.cpp b/opencl/test/unit_test/api/cl_create_program_with_built_in_kernels_tests.cpp index 8112246f02..2b99ea51d8 100644 --- a/opencl/test/unit_test/api/cl_create_program_with_built_in_kernels_tests.cpp +++ b/opencl/test/unit_test/api/cl_create_program_with_built_in_kernels_tests.cpp @@ -214,11 +214,11 @@ TEST_F(clCreateProgramWithBuiltInVmeKernelsTests, GivenVmeBlockMotionEstimateKer auto pMultiDeviceKernel = castToObject(kernel); auto kernNeo = pMultiDeviceKernel->getKernel(testedRootDeviceIndex); - EXPECT_NE(nullptr, kernNeo->getKernelInfo(testedRootDeviceIndex).builtinDispatchBuilder); + EXPECT_NE(nullptr, kernNeo->getKernelInfo().builtinDispatchBuilder); EXPECT_EQ(6U, kernNeo->getKernelArgsNumber()); auto &vmeBuilder = Vme::getBuiltinDispatchInfoBuilder(EBuiltInOps::VmeBlockMotionEstimateIntel, *pClDevice); - EXPECT_EQ(&vmeBuilder, kernNeo->getKernelInfo(testedRootDeviceIndex).builtinDispatchBuilder); + EXPECT_EQ(&vmeBuilder, kernNeo->getKernelInfo().builtinDispatchBuilder); clReleaseKernel(kernel); clReleaseProgram(program); @@ -251,11 +251,11 @@ TEST_F(clCreateProgramWithBuiltInVmeKernelsTests, GivenVmeBlockAdvancedMotionEst auto pMultiDeviceKernel = castToObject(kernel); auto kernNeo = pMultiDeviceKernel->getKernel(testedRootDeviceIndex); - EXPECT_NE(nullptr, kernNeo->getKernelInfo(testedRootDeviceIndex).builtinDispatchBuilder); + EXPECT_NE(nullptr, kernNeo->getKernelInfo().builtinDispatchBuilder); EXPECT_EQ(15U, kernNeo->getKernelArgsNumber()); auto &vmeBuilder = Vme::getBuiltinDispatchInfoBuilder(EBuiltInOps::VmeBlockAdvancedMotionEstimateCheckIntel, *pClDevice); - EXPECT_EQ(&vmeBuilder, kernNeo->getKernelInfo(testedRootDeviceIndex).builtinDispatchBuilder); + EXPECT_EQ(&vmeBuilder, kernNeo->getKernelInfo().builtinDispatchBuilder); clReleaseKernel(kernel); clReleaseProgram(program); @@ -288,12 +288,12 @@ TEST_F(clCreateProgramWithBuiltInVmeKernelsTests, GivenVmeBlockAdvancedMotionEst auto pMultiDeviceKernel = castToObject(kernel); auto kernNeo = pMultiDeviceKernel->getKernel(testedRootDeviceIndex); - EXPECT_NE(nullptr, kernNeo->getKernelInfo(testedRootDeviceIndex).builtinDispatchBuilder); + EXPECT_NE(nullptr, kernNeo->getKernelInfo().builtinDispatchBuilder); EXPECT_EQ(20U, kernNeo->getKernelArgsNumber()); auto ctxNeo = castToObject(pContext); auto &vmeBuilder = Vme::getBuiltinDispatchInfoBuilder(EBuiltInOps::VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel, *ctxNeo->getDevice(0)); - EXPECT_EQ(&vmeBuilder, kernNeo->getKernelInfo(testedRootDeviceIndex).builtinDispatchBuilder); + EXPECT_EQ(&vmeBuilder, kernNeo->getKernelInfo().builtinDispatchBuilder); clReleaseKernel(kernel); clReleaseProgram(program); diff --git a/opencl/test/unit_test/api/cl_enqueue_nd_range_kernel_tests.inl b/opencl/test/unit_test/api/cl_enqueue_nd_range_kernel_tests.inl index 29476b2643..25a7116bff 100644 --- a/opencl/test/unit_test/api/cl_enqueue_nd_range_kernel_tests.inl +++ b/opencl/test/unit_test/api/cl_enqueue_nd_range_kernel_tests.inl @@ -137,7 +137,7 @@ TEST_F(clEnqueueNDRangeKernelTests, GivenKernelWithAllocateSyncBufferPatchWhenEx SPatchAllocateSyncBuffer patchAllocateSyncBuffer; populateKernelDescriptor(pProgram->mockKernelInfo.kernelDescriptor, patchAllocateSyncBuffer); - EXPECT_TRUE(pKernel->usesSyncBuffer(testedRootDeviceIndex)); + EXPECT_TRUE(pKernel->usesSyncBuffer()); retVal = clEnqueueNDRangeKernel( pCommandQueue, diff --git a/opencl/test/unit_test/api/cl_get_kernel_max_concurrent_work_group_count_intel_tests.inl b/opencl/test/unit_test/api/cl_get_kernel_max_concurrent_work_group_count_intel_tests.inl index 5dfdda165a..e163cd2adb 100644 --- a/opencl/test/unit_test/api/cl_get_kernel_max_concurrent_work_group_count_intel_tests.inl +++ b/opencl/test/unit_test/api/cl_get_kernel_max_concurrent_work_group_count_intel_tests.inl @@ -63,7 +63,7 @@ TEST_F(clGetKernelMaxConcurrentWorkGroupCountTests, GivenVariousInputWhenGetting size_t globalWorkOffset[] = {0, 0, 0}; size_t localWorkSize[] = {8, 8, 8}; size_t maxConcurrentWorkGroupCount = 0; - const_cast(pKernel->getKernelInfo(pDevice->getRootDeviceIndex())).kernelDescriptor.kernelAttributes.numGrfRequired = GrfConfig::DefaultGrfNumber; + const_cast(pKernel->getKernelInfo()).kernelDescriptor.kernelAttributes.numGrfRequired = GrfConfig::DefaultGrfNumber; retVal = clGetKernelMaxConcurrentWorkGroupCountINTEL(pCommandQueue, pMultiDeviceKernel, workDim, globalWorkOffset, localWorkSize, &maxConcurrentWorkGroupCount); @@ -72,7 +72,8 @@ TEST_F(clGetKernelMaxConcurrentWorkGroupCountTests, GivenVariousInputWhenGetting EXPECT_EQ(expectedMaxConcurrentWorkGroupCount, maxConcurrentWorkGroupCount); auto pKernelWithExecutionEnvironmentPatch = MockKernel::create(pCommandQueue->getDevice(), pProgram); - MultiDeviceKernel multiDeviceKernelWithExecutionEnvironmentPatch(MockMultiDeviceKernel::toKernelVector(pKernelWithExecutionEnvironmentPatch)); + auto kernelInfos = MockKernel::toKernelInfoContainer(pKernelWithExecutionEnvironmentPatch->getKernelInfo(), testedRootDeviceIndex); + MultiDeviceKernel multiDeviceKernelWithExecutionEnvironmentPatch(MockMultiDeviceKernel::toKernelVector(pKernelWithExecutionEnvironmentPatch), kernelInfos); retVal = clGetKernelMaxConcurrentWorkGroupCountINTEL(pCommandQueue, &multiDeviceKernelWithExecutionEnvironmentPatch, workDim, globalWorkOffset, localWorkSize, &maxConcurrentWorkGroupCount); diff --git a/opencl/test/unit_test/api/cl_get_kernel_sub_group_info_khr_tests.inl b/opencl/test/unit_test/api/cl_get_kernel_sub_group_info_khr_tests.inl index 04745e800e..7beebaa067 100644 --- a/opencl/test/unit_test/api/cl_get_kernel_sub_group_info_khr_tests.inl +++ b/opencl/test/unit_test/api/cl_get_kernel_sub_group_info_khr_tests.inl @@ -14,7 +14,7 @@ struct KernelSubGroupInfoKhrFixture : HelloWorldFixture(pKernel->getKernelInfo(rootDeviceIndex).getMaxSimdSize()); + MaxSimdSize = static_cast(pKernel->getKernelInfo().getMaxSimdSize()); ASSERT_GE(MaxSimdSize, 8u); MaxWorkDim = static_cast(pClDevice->getDeviceInfo().maxWorkItemDimensions); ASSERT_EQ(MaxWorkDim, 3u); @@ -141,11 +141,11 @@ TEST_F(KernelSubGroupInfoKhrReturnCompileSizeTest, GivenKernelWhenGettingRequire EXPECT_EQ(paramValueSizeRet, sizeof(size_t)); size_t requiredSubGroupSize = 0; - auto start = pKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelLanguageAttributes.find("intel_reqd_sub_group_size("); + auto start = pKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelLanguageAttributes.find("intel_reqd_sub_group_size("); if (start != std::string::npos) { start += strlen("intel_reqd_sub_group_size("); - auto stop = pKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelLanguageAttributes.find(")", start); - requiredSubGroupSize = stoi(pKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelLanguageAttributes.substr(start, stop - start)); + auto stop = pKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelLanguageAttributes.find(")", start); + requiredSubGroupSize = stoi(pKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelLanguageAttributes.substr(start, stop - start)); } EXPECT_EQ(paramValue, requiredSubGroupSize); @@ -200,7 +200,7 @@ TEST_F(KernelSubGroupInfoKhrTest, GivenNullDeviceWhenGettingSubGroupInfoFromMult MockUnrestrictiveContext context; auto mockProgram = std::make_unique(&context, false, context.getDevices()); std::unique_ptr pMultiDeviceKernel( - MultiDeviceKernel::create(mockProgram.get(), pKernel->getKernelInfos(), nullptr)); + MultiDeviceKernel::create(mockProgram.get(), this->pMultiDeviceKernel->getKernelInfos(), nullptr)); retVal = clGetKernelSubGroupInfoKHR( pMultiDeviceKernel.get(), diff --git a/opencl/test/unit_test/api/cl_get_kernel_sub_group_info_tests.inl b/opencl/test/unit_test/api/cl_get_kernel_sub_group_info_tests.inl index 897a633794..d15e2259d6 100644 --- a/opencl/test/unit_test/api/cl_get_kernel_sub_group_info_tests.inl +++ b/opencl/test/unit_test/api/cl_get_kernel_sub_group_info_tests.inl @@ -16,18 +16,18 @@ struct KernelSubGroupInfoFixture : HelloWorldFixture { void SetUp() override { ParentClass::SetUp(); pKernel->maxKernelWorkGroupSize = static_cast(pDevice->getDeviceInfo().maxWorkGroupSize / 2); - maxSimdSize = static_cast(pKernel->getKernelInfo(rootDeviceIndex).getMaxSimdSize()); + maxSimdSize = static_cast(pKernel->getKernelInfo().getMaxSimdSize()); ASSERT_LE(8u, maxSimdSize); maxWorkDim = static_cast(pClDevice->getDeviceInfo().maxWorkItemDimensions); ASSERT_EQ(3u, maxWorkDim); maxWorkGroupSize = static_cast(pKernel->maxKernelWorkGroupSize); ASSERT_GE(1024u, maxWorkGroupSize); - largestCompiledSIMDSize = static_cast(pKernel->getKernelInfo(rootDeviceIndex).getMaxSimdSize()); + largestCompiledSIMDSize = static_cast(pKernel->getKernelInfo().getMaxSimdSize()); ASSERT_EQ(32u, largestCompiledSIMDSize); - auto requiredWorkGroupSizeX = static_cast(pKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0]); - auto requiredWorkGroupSizeY = static_cast(pKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1]); - auto requiredWorkGroupSizeZ = static_cast(pKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2]); + auto requiredWorkGroupSizeX = static_cast(pKernel->getKernelInfo().kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0]); + auto requiredWorkGroupSizeY = static_cast(pKernel->getKernelInfo().kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1]); + auto requiredWorkGroupSizeZ = static_cast(pKernel->getKernelInfo().kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2]); calculatedMaxWorkgroupSize = requiredWorkGroupSizeX * requiredWorkGroupSizeY * requiredWorkGroupSizeZ; if ((calculatedMaxWorkgroupSize == 0) || (calculatedMaxWorkgroupSize > static_cast(pKernel->maxKernelWorkGroupSize))) { @@ -263,7 +263,7 @@ TEST_F(KernelSubGroupInfoReturnCompileNumberTest, GivenKernelWhenGettingCompileN EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValueSizeRet, sizeof(size_t)); - EXPECT_EQ(paramValue[0], static_cast(pKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.compiledSubGroupsNumber)); + EXPECT_EQ(paramValue[0], static_cast(pKernel->getKernelInfo().kernelDescriptor.kernelMetadata.compiledSubGroupsNumber)); } typedef KernelSubGroupInfoParamFixture KernelSubGroupInfoReturnCompileSizeTest; @@ -286,11 +286,11 @@ TEST_F(KernelSubGroupInfoReturnCompileSizeTest, GivenKernelWhenGettingCompileSub EXPECT_EQ(paramValueSizeRet, sizeof(size_t)); size_t requiredSubGroupSize = 0; - auto start = pKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelLanguageAttributes.find("intel_reqd_sub_group_size("); + auto start = pKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelLanguageAttributes.find("intel_reqd_sub_group_size("); if (start != std::string::npos) { start += strlen("intel_reqd_sub_group_size("); - auto stop = pKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelLanguageAttributes.find(")", start); - requiredSubGroupSize = stoi(pKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelLanguageAttributes.substr(start, stop - start)); + auto stop = pKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelLanguageAttributes.find(")", start); + requiredSubGroupSize = stoi(pKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelLanguageAttributes.substr(start, stop - start)); } EXPECT_EQ(paramValue[0], requiredSubGroupSize); @@ -348,7 +348,7 @@ TEST_F(KernelSubGroupInfoTest, GivenNullDeviceWhenGettingSubGroupInfoFromMultiDe MockUnrestrictiveContext context; auto mockProgram = std::make_unique(&context, false, context.getDevices()); - std::unique_ptr pMultiDeviceKernel(MultiDeviceKernel::create(mockProgram.get(), pKernel->getKernelInfos(), nullptr)); + std::unique_ptr pMultiDeviceKernel(MultiDeviceKernel::create(mockProgram.get(), this->pMultiDeviceKernel->getKernelInfos(), nullptr)); retVal = clGetKernelSubGroupInfo( pMultiDeviceKernel.get(), diff --git a/opencl/test/unit_test/api/cl_get_kernel_suggested_local_work_size_intel_tests.inl b/opencl/test/unit_test/api/cl_get_kernel_suggested_local_work_size_intel_tests.inl index 76328fc96a..de0b4f4ffa 100644 --- a/opencl/test/unit_test/api/cl_get_kernel_suggested_local_work_size_intel_tests.inl +++ b/opencl/test/unit_test/api/cl_get_kernel_suggested_local_work_size_intel_tests.inl @@ -98,7 +98,8 @@ TEST_F(clGetKernelSuggestedLocalWorkSizeTests, GivenVariousInputWhenGettingSugge TEST_F(clGetKernelSuggestedLocalWorkSizeTests, GivenKernelWithExecutionEnvironmentPatchedWhenGettingSuggestedLocalWorkSizeThenCorrectValuesAreReturned) { auto pKernelWithExecutionEnvironmentPatch = MockKernel::create(pCommandQueue->getDevice(), pProgram); - MultiDeviceKernel multiDeviceKernelWithExecutionEnvironmentPatch(MockMultiDeviceKernel::toKernelVector(pKernelWithExecutionEnvironmentPatch)); + auto kernelInfos = MockKernel::toKernelInfoContainer(pKernelWithExecutionEnvironmentPatch->getKernelInfo(), testedRootDeviceIndex); + MultiDeviceKernel multiDeviceKernelWithExecutionEnvironmentPatch(MockMultiDeviceKernel::toKernelVector(pKernelWithExecutionEnvironmentPatch), kernelInfos); size_t globalWorkOffset[] = {0, 0, 0}; size_t globalWorkSize[] = {128, 128, 128}; diff --git a/opencl/test/unit_test/api/cl_get_kernel_work_group_info_tests.inl b/opencl/test/unit_test/api/cl_get_kernel_work_group_info_tests.inl index 7d485f5080..1b64421b8e 100644 --- a/opencl/test/unit_test/api/cl_get_kernel_work_group_info_tests.inl +++ b/opencl/test/unit_test/api/cl_get_kernel_work_group_info_tests.inl @@ -62,7 +62,7 @@ TEST_F(clGetKernelWorkGroupInfoTest, GivenNullDeviceWhenGettingWorkGroupInfoFrom MockUnrestrictiveContext context; auto mockProgram = std::make_unique(&context, false, context.getDevices()); std::unique_ptr pMultiDeviceKernel( - MockMultiDeviceKernel::create(mockProgram.get(), MockKernel::toKernelInfoContainer(pKernel->getKernelInfo(testedRootDeviceIndex), context.getDevice(0)->getRootDeviceIndex()))); + MockMultiDeviceKernel::create(mockProgram.get(), MockKernel::toKernelInfoContainer(pKernel->getKernelInfo(), context.getDevice(0)->getRootDeviceIndex()))); retVal = clGetKernelWorkGroupInfo( pMultiDeviceKernel.get(), @@ -84,7 +84,7 @@ TEST_F(clGetKernelWorkGroupInfoTests, GivenKernelRequiringScratchSpaceWhenGettin mediaVFEstate.PerThreadScratchSpace = 1024; //whatever greater than 0 populateKernelDescriptor(mockKernel.kernelInfo.kernelDescriptor, mediaVFEstate, 0); - cl_ulong scratchSpaceSize = static_cast(mockKernel.mockKernel->getScratchSize(testedRootDeviceIndex)); + cl_ulong scratchSpaceSize = static_cast(mockKernel.mockKernel->getScratchSize()); EXPECT_EQ(scratchSpaceSize, 1024u); retVal = clGetKernelWorkGroupInfo( diff --git a/opencl/test/unit_test/api/cl_mem_locally_uncached_resource_tests.cpp b/opencl/test/unit_test/api/cl_mem_locally_uncached_resource_tests.cpp index 22b6b14fc4..08e0affd7a 100644 --- a/opencl/test/unit_test/api/cl_mem_locally_uncached_resource_tests.cpp +++ b/opencl/test/unit_test/api/cl_mem_locally_uncached_resource_tests.cpp @@ -27,10 +27,9 @@ namespace clMemLocallyUncachedResourceTests { template uint32_t argMocs(Kernel &kernel, size_t argIndex) { - auto rootDeviceIndex = kernel.getDevices()[0]->getRootDeviceIndex(); using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; - auto surfaceStateHeapAddress = kernel.getSurfaceStateHeap(rootDeviceIndex); - auto surfaceStateHeapAddressOffset = kernel.getKernelInfo(rootDeviceIndex).kernelArgInfo[argIndex].offsetHeap; + auto surfaceStateHeapAddress = kernel.getSurfaceStateHeap(); + auto surfaceStateHeapAddressOffset = kernel.getKernelInfo().kernelArgInfo[argIndex].offsetHeap; auto surfaceState = reinterpret_cast(ptrOffset(surfaceStateHeapAddress, surfaceStateHeapAddressOffset)); return surfaceState->getMemoryObjectControlState(); } diff --git a/opencl/test/unit_test/aub_tests/command_queue/enqueue_kernel_aub_tests.cpp b/opencl/test/unit_test/aub_tests/command_queue/enqueue_kernel_aub_tests.cpp index 6f869d2e33..64f222e184 100644 --- a/opencl/test/unit_test/aub_tests/command_queue/enqueue_kernel_aub_tests.cpp +++ b/opencl/test/unit_test/aub_tests/command_queue/enqueue_kernel_aub_tests.cpp @@ -109,12 +109,12 @@ HWCMDTEST_F(IGFX_GEN8_CORE, AUBHelloWorld, WhenEnqueuingKernelThenAdressesAreAli EXPECT_EQ(0u, addrIDD % alignmentIDD); // Check kernel start pointer matches hard-coded kernel. - auto pExpectedISA = pKernel->getKernelHeap(rootDeviceIndex); - auto expectedSize = pKernel->getKernelHeapSize(rootDeviceIndex); + auto pExpectedISA = pKernel->getKernelHeap(); + auto expectedSize = pKernel->getKernelHeapSize(); auto pSBA = reinterpret_cast(cmdStateBaseAddress); ASSERT_NE(nullptr, pSBA); - auto pISA = pKernel->getKernelInfo(rootDeviceIndex).getGraphicsAllocation()->getUnderlyingBuffer(); + auto pISA = pKernel->getKernelInfo().getGraphicsAllocation()->getUnderlyingBuffer(); EXPECT_EQ(0, memcmp(pISA, pExpectedISA, expectedSize)); } @@ -268,12 +268,12 @@ HWCMDTEST_F(IGFX_GEN8_CORE, AUBSimpleArg, WhenEnqueingKernelThenAdressesAreAlign EXPECT_EQ(0u, addrIDD % alignmentIDD); // Check kernel start pointer matches hard-coded kernel. - auto pExpectedISA = pKernel->getKernelHeap(rootDeviceIndex); - auto expectedSize = pKernel->getKernelHeapSize(rootDeviceIndex); + auto pExpectedISA = pKernel->getKernelHeap(); + auto expectedSize = pKernel->getKernelHeapSize(); auto pSBA = reinterpret_cast(cmdStateBaseAddress); ASSERT_NE(nullptr, pSBA); - auto pISA = pKernel->getKernelInfo(rootDeviceIndex).getGraphicsAllocation()->getUnderlyingBuffer(); + auto pISA = pKernel->getKernelInfo().getGraphicsAllocation()->getUnderlyingBuffer(); EXPECT_EQ(0, memcmp(pISA, pExpectedISA, expectedSize)); } @@ -502,8 +502,8 @@ HWTEST_F(AUBSimpleKernelStatelessTest, givenSimpleKernelWhenStatelessPathIsUsedT ASSERT_EQ(CL_SUCCESS, retVal); - EXPECT_FALSE(this->kernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].pureStatefulBufferAccess); - EXPECT_TRUE(this->kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb()); + EXPECT_FALSE(this->kernel->getKernelInfo().kernelArgInfo[0].pureStatefulBufferAccess); + EXPECT_TRUE(this->kernel->getKernelInfo().kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb()); this->pCmdQ->flush(); expectMemory(reinterpret_cast(pBuffer->getGraphicsAllocation(device->getRootDeviceIndex())->getGpuAddress()), @@ -937,7 +937,7 @@ HWTEST2_F(AUBBindlessKernel, DISABLED_givenBindlessCopyKernelWhenEnqueuedThenRes ASSERT_EQ(CL_SUCCESS, retVal); - EXPECT_TRUE(this->kernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].pureStatefulBufferAccess); + EXPECT_TRUE(this->kernel->getKernelInfo().kernelArgInfo[0].pureStatefulBufferAccess); this->pCmdQ->finish(); expectMemory(reinterpret_cast(pBufferDst->getGraphicsAllocation(device->getRootDeviceIndex())->getGpuAddress()), diff --git a/opencl/test/unit_test/built_ins/built_in_tests.cpp b/opencl/test/unit_test/built_ins/built_in_tests.cpp index f0af4a4d4b..95fa06ba1e 100644 --- a/opencl/test/unit_test/built_ins/built_in_tests.cpp +++ b/opencl/test/unit_test/built_ins/built_in_tests.cpp @@ -481,12 +481,12 @@ TEST_F(BuiltInTests, givenkAuxBuiltInWhenResizeIsCalledThenCloneAllNewInstancesF EXPECT_EQ(newSize, mockAuxBuiltInOp.convertToAuxKernel.size()); for (auto &convertToAuxKernel : mockAuxBuiltInOp.convertToAuxKernel) { - EXPECT_EQ(&mockAuxBuiltInOp.baseKernel->getKernelInfo(rootDeviceIndex), &convertToAuxKernel->getKernelInfo(rootDeviceIndex)); + EXPECT_EQ(&mockAuxBuiltInOp.baseKernel->getKernelInfo(), &convertToAuxKernel->getKernelInfo()); } EXPECT_EQ(newSize, mockAuxBuiltInOp.convertToNonAuxKernel.size()); for (auto &convertToNonAuxKernel : mockAuxBuiltInOp.convertToNonAuxKernel) { - EXPECT_EQ(&mockAuxBuiltInOp.baseKernel->getKernelInfo(rootDeviceIndex), &convertToNonAuxKernel->getKernelInfo(rootDeviceIndex)); + EXPECT_EQ(&mockAuxBuiltInOp.baseKernel->getKernelInfo(), &convertToNonAuxKernel->getKernelInfo()); } } @@ -497,7 +497,8 @@ HWTEST2_P(AuxBuiltInTests, givenKernelWithAuxTranslationRequiredWhenEnqueueCalle auto mockProgram = clUniquePtr(new MockProgram(toClDeviceVector(*pClDevice))); auto mockBuiltinKernel = MockKernel::create(*pDevice, mockProgram.get()); - auto pMultiDeviceKernel = new MockMultiDeviceKernel(MockMultiDeviceKernel::toKernelVector(mockBuiltinKernel)); + auto kernelInfos = MockKernel::toKernelInfoContainer(mockBuiltinKernel->getKernelInfo(), rootDeviceIndex); + auto pMultiDeviceKernel = new MockMultiDeviceKernel(MockMultiDeviceKernel::toKernelVector(mockBuiltinKernel), kernelInfos); mockAuxBuiltInOp->usedKernels.at(0).reset(pMultiDeviceKernel); MockKernelWithInternals mockKernel(*pClDevice, pContext); @@ -569,13 +570,13 @@ HWCMDTEST_P(IGFX_GEN8_CORE, AuxBuiltInTests, givenAuxTranslationKernelWhenSettin // read args auto argNum = 0; auto expectedMocs = pDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED); - auto sshBase = mockAuxBuiltInOp.convertToAuxKernel[0]->getSurfaceStateHeap(rootDeviceIndex); - auto sshOffset = mockAuxBuiltInOp.convertToAuxKernel[0]->getKernelInfo(rootDeviceIndex).kernelArgInfo[argNum].offsetHeap; + auto sshBase = mockAuxBuiltInOp.convertToAuxKernel[0]->getSurfaceStateHeap(); + auto sshOffset = mockAuxBuiltInOp.convertToAuxKernel[0]->getKernelInfo().kernelArgInfo[argNum].offsetHeap; auto surfaceState = reinterpret_cast(ptrOffset(sshBase, sshOffset)); EXPECT_EQ(expectedMocs, surfaceState->getMemoryObjectControlState()); - sshBase = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getSurfaceStateHeap(rootDeviceIndex); - sshOffset = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getKernelInfo(rootDeviceIndex).kernelArgInfo[argNum].offsetHeap; + sshBase = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getSurfaceStateHeap(); + sshOffset = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getKernelInfo().kernelArgInfo[argNum].offsetHeap; surfaceState = reinterpret_cast(ptrOffset(sshBase, sshOffset)); EXPECT_EQ(expectedMocs, surfaceState->getMemoryObjectControlState()); } @@ -584,13 +585,13 @@ HWCMDTEST_P(IGFX_GEN8_CORE, AuxBuiltInTests, givenAuxTranslationKernelWhenSettin // write args auto argNum = 1; auto expectedMocs = pDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); - auto sshBase = mockAuxBuiltInOp.convertToAuxKernel[0]->getSurfaceStateHeap(rootDeviceIndex); - auto sshOffset = mockAuxBuiltInOp.convertToAuxKernel[0]->getKernelInfo(rootDeviceIndex).kernelArgInfo[argNum].offsetHeap; + auto sshBase = mockAuxBuiltInOp.convertToAuxKernel[0]->getSurfaceStateHeap(); + auto sshOffset = mockAuxBuiltInOp.convertToAuxKernel[0]->getKernelInfo().kernelArgInfo[argNum].offsetHeap; auto surfaceState = reinterpret_cast(ptrOffset(sshBase, sshOffset)); EXPECT_EQ(expectedMocs, surfaceState->getMemoryObjectControlState()); - sshBase = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getSurfaceStateHeap(rootDeviceIndex); - sshOffset = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getKernelInfo(rootDeviceIndex).kernelArgInfo[argNum].offsetHeap; + sshBase = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getSurfaceStateHeap(); + sshOffset = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getKernelInfo().kernelArgInfo[argNum].offsetHeap; surfaceState = reinterpret_cast(ptrOffset(sshBase, sshOffset)); EXPECT_EQ(expectedMocs, surfaceState->getMemoryObjectControlState()); } @@ -638,8 +639,8 @@ HWTEST2_P(AuxBuiltInTests, givenAuxToNonAuxTranslationWhenSettingSurfaceStateThe { // read arg auto argNum = 0; - auto sshBase = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getSurfaceStateHeap(rootDeviceIndex); - auto sshOffset = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getKernelInfo(rootDeviceIndex).kernelArgInfo[argNum].offsetHeap; + auto sshBase = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getSurfaceStateHeap(); + auto sshOffset = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getKernelInfo().kernelArgInfo[argNum].offsetHeap; auto surfaceState = reinterpret_cast(ptrOffset(sshBase, sshOffset)); EXPECT_EQ(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E, surfaceState->getAuxiliarySurfaceMode()); } @@ -647,8 +648,8 @@ HWTEST2_P(AuxBuiltInTests, givenAuxToNonAuxTranslationWhenSettingSurfaceStateThe { // write arg auto argNum = 1; - auto sshBase = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getSurfaceStateHeap(rootDeviceIndex); - auto sshOffset = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getKernelInfo(rootDeviceIndex).kernelArgInfo[argNum].offsetHeap; + auto sshBase = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getSurfaceStateHeap(); + auto sshOffset = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getKernelInfo().kernelArgInfo[argNum].offsetHeap; auto surfaceState = reinterpret_cast(ptrOffset(sshBase, sshOffset)); EXPECT_EQ(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE, surfaceState->getAuxiliarySurfaceMode()); } @@ -685,8 +686,8 @@ HWTEST2_P(AuxBuiltInTests, givenNonAuxToAuxTranslationWhenSettingSurfaceStateThe { // read arg auto argNum = 0; - auto sshBase = mockAuxBuiltInOp.convertToAuxKernel[0]->getSurfaceStateHeap(rootDeviceIndex); - auto sshOffset = mockAuxBuiltInOp.convertToAuxKernel[0]->getKernelInfo(rootDeviceIndex).kernelArgInfo[argNum].offsetHeap; + auto sshBase = mockAuxBuiltInOp.convertToAuxKernel[0]->getSurfaceStateHeap(); + auto sshOffset = mockAuxBuiltInOp.convertToAuxKernel[0]->getKernelInfo().kernelArgInfo[argNum].offsetHeap; auto surfaceState = reinterpret_cast(ptrOffset(sshBase, sshOffset)); EXPECT_EQ(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE, surfaceState->getAuxiliarySurfaceMode()); } @@ -694,8 +695,8 @@ HWTEST2_P(AuxBuiltInTests, givenNonAuxToAuxTranslationWhenSettingSurfaceStateThe { // write arg auto argNum = 1; - auto sshBase = mockAuxBuiltInOp.convertToAuxKernel[0]->getSurfaceStateHeap(rootDeviceIndex); - auto sshOffset = mockAuxBuiltInOp.convertToAuxKernel[0]->getKernelInfo(rootDeviceIndex).kernelArgInfo[argNum].offsetHeap; + auto sshBase = mockAuxBuiltInOp.convertToAuxKernel[0]->getSurfaceStateHeap(); + auto sshOffset = mockAuxBuiltInOp.convertToAuxKernel[0]->getKernelInfo().kernelArgInfo[argNum].offsetHeap; auto surfaceState = reinterpret_cast(ptrOffset(sshBase, sshOffset)); EXPECT_EQ(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E, surfaceState->getAuxiliarySurfaceMode()); } @@ -859,8 +860,8 @@ HWTEST_F(BuiltInTests, givenBigOffsetAndSizeWhenBuilderCopyBufferToImageStateles auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); - EXPECT_TRUE(kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb()); - EXPECT_FALSE(kernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].pureStatefulBufferAccess); + EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb()); + EXPECT_FALSE(kernel->getKernelInfo().kernelArgInfo[0].pureStatefulBufferAccess); } HWTEST_F(BuiltInTests, givenBigOffsetAndSizeWhenBuilderCopyImageToBufferStatelessIsUsedThenParamsAreCorrect) { @@ -893,8 +894,8 @@ HWTEST_F(BuiltInTests, givenBigOffsetAndSizeWhenBuilderCopyImageToBufferStateles auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); - EXPECT_TRUE(kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb()); - EXPECT_FALSE(kernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].pureStatefulBufferAccess); + EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb()); + EXPECT_FALSE(kernel->getKernelInfo().kernelArgInfo[0].pureStatefulBufferAccess); } TEST_F(BuiltInTests, GivenUnalignedCopyBufferToBufferWhenDispatchInfoIsCreatedThenParamsAreCorrect) { @@ -917,10 +918,10 @@ TEST_F(BuiltInTests, GivenUnalignedCopyBufferToBufferWhenDispatchInfoIsCreatedTh const Kernel *kernel = multiDispatchInfo.begin()->getKernel(); - EXPECT_EQ(kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName, "CopyBufferToBufferMiddleMisaligned"); + EXPECT_EQ(kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName, "CopyBufferToBufferMiddleMisaligned"); const auto crossThreadData = kernel->getCrossThreadData(); - const auto crossThreadOffset = kernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[4].kernelArgPatchInfoVector[0].crossthreadOffset; + const auto crossThreadOffset = kernel->getKernelInfo().kernelArgInfo[4].kernelArgPatchInfoVector[0].crossthreadOffset; EXPECT_EQ(8u, *reinterpret_cast(ptrOffset(crossThreadData, crossThreadOffset))); EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), builtinOpsParams)); @@ -1017,7 +1018,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, BuiltInTests, WhenGettingSchedulerKernelThenCorrectK REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); SchedulerKernel &schedulerKernel = pContext->getSchedulerKernel(); std::string name = SchedulerKernel::schedulerName; - EXPECT_EQ(name, schedulerKernel.getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName); + EXPECT_EQ(name, schedulerKernel.getKernelInfo().kernelDescriptor.kernelMetadata.kernelName); } HWCMDTEST_F(IGFX_GEN8_CORE, BuiltInTests, WhenGetttingSchedulerKernelForSecondTimeThenReuseKernel) { @@ -1141,10 +1142,10 @@ TEST_F(VmeBuiltInTests, GivenVmeBuilderWhenGettingDispatchInfoThenParamsAreCorre constexpr uint32_t vmeImplicitArgsBase = 6; constexpr uint32_t vmeImplicitArgs = 3; - ASSERT_EQ(vmeImplicitArgsBase + vmeImplicitArgs, outDi->getKernel()->getKernelInfo(rootDeviceIndex).kernelArgInfo.size()); + ASSERT_EQ(vmeImplicitArgsBase + vmeImplicitArgs, outDi->getKernel()->getKernelInfo().kernelArgInfo.size()); uint32_t vmeExtraArgsExpectedVals[] = {18, 22, 18}; // height, width, stride for (uint32_t i = 0; i < vmeImplicitArgs; ++i) { - auto &argInfo = outDi->getKernel()->getKernelInfo(rootDeviceIndex).kernelArgInfo[vmeImplicitArgsBase + i]; + auto &argInfo = outDi->getKernel()->getKernelInfo().kernelArgInfo[vmeImplicitArgsBase + i]; ASSERT_EQ(1U, argInfo.kernelArgPatchInfoVector.size()); auto off = argInfo.kernelArgPatchInfoVector[0].crossthreadOffset; EXPECT_EQ(vmeExtraArgsExpectedVals[i], *((uint32_t *)(outDi->getKernel()->getCrossThreadData() + off))); @@ -1198,15 +1199,15 @@ TEST_F(VmeBuiltInTests, GivenAdvancedVmeBuilderWhenGettingDispatchInfoThenParams EXPECT_EQ(srcImageArg, outDi->getKernel()->getKernelArg(srcImageArgNum)); - uint32_t vmeImplicitArgsBase = outDi->getKernel()->getKernelInfo(rootDeviceIndex).getArgNumByName("intraSrcImg"); + uint32_t vmeImplicitArgsBase = outDi->getKernel()->getKernelInfo().getArgNumByName("intraSrcImg"); uint32_t vmeImplicitArgs = 4; - ASSERT_EQ(vmeImplicitArgsBase + vmeImplicitArgs, outDi->getKernel()->getKernelInfo(rootDeviceIndex).kernelArgInfo.size()); + ASSERT_EQ(vmeImplicitArgsBase + vmeImplicitArgs, outDi->getKernel()->getKernelInfo().kernelArgInfo.size()); EXPECT_EQ(srcImageArg, outDi->getKernel()->getKernelArg(vmeImplicitArgsBase)); ++vmeImplicitArgsBase; --vmeImplicitArgs; uint32_t vmeExtraArgsExpectedVals[] = {18, 22, 18}; // height, width, stride for (uint32_t i = 0; i < vmeImplicitArgs; ++i) { - auto &argInfo = outDi->getKernel()->getKernelInfo(rootDeviceIndex).kernelArgInfo[vmeImplicitArgsBase + i]; + auto &argInfo = outDi->getKernel()->getKernelInfo().kernelArgInfo[vmeImplicitArgsBase + i]; ASSERT_EQ(1U, argInfo.kernelArgPatchInfoVector.size()); auto off = argInfo.kernelArgPatchInfoVector[0].crossthreadOffset; EXPECT_EQ(vmeExtraArgsExpectedVals[i], *((uint32_t *)(outDi->getKernel()->getCrossThreadData() + off))); diff --git a/opencl/test/unit_test/command_queue/command_enqueue_fixture.h b/opencl/test/unit_test/command_queue/command_enqueue_fixture.h index 4e05ee3ce7..1d83fd4f31 100644 --- a/opencl/test/unit_test/command_queue/command_enqueue_fixture.h +++ b/opencl/test/unit_test/command_queue/command_enqueue_fixture.h @@ -100,10 +100,9 @@ struct CommandQueueStateless : public CommandQueueHw { void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &dispatchInfo) override { auto kernel = dispatchInfo.begin()->getKernel(); - auto rootDeviceIndex = this->device->getRootDeviceIndex(); - EXPECT_TRUE(kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb()); - EXPECT_FALSE(kernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].pureStatefulBufferAccess); + EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb()); + EXPECT_FALSE(kernel->getKernelInfo().kernelArgInfo[0].pureStatefulBufferAccess); } }; @@ -114,15 +113,14 @@ struct CommandQueueStateful : public CommandQueueHw { void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &dispatchInfo) override { auto kernel = dispatchInfo.begin()->getKernel(); auto &device = dispatchInfo.begin()->getClDevice(); - auto rootDeviceIndex = device.getRootDeviceIndex(); if (!device.areSharedSystemAllocationsAllowed()) { - EXPECT_FALSE(kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb()); + EXPECT_FALSE(kernel->getKernelInfo().kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb()); if (device.getHardwareCapabilities().isStatelesToStatefullWithOffsetSupported) { EXPECT_TRUE(kernel->allBufferArgsStateful); } } else { - EXPECT_TRUE(kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb()); - EXPECT_FALSE(kernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].pureStatefulBufferAccess); + EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb()); + EXPECT_FALSE(kernel->getKernelInfo().kernelArgInfo[0].pureStatefulBufferAccess); } } }; diff --git a/opencl/test/unit_test/command_queue/command_queue_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_tests.cpp index d398f817ed..4f0a13c386 100644 --- a/opencl/test/unit_test/command_queue/command_queue_tests.cpp +++ b/opencl/test/unit_test/command_queue/command_queue_tests.cpp @@ -1051,7 +1051,7 @@ HWTEST_F(CommandQueueCommandStreamTest, givenDebugKernelWhenSetupDebugSurfaceIsC auto debugSurface = commandStreamReceiver.getDebugSurfaceAllocation(); ASSERT_NE(nullptr, debugSurface); - RENDER_SURFACE_STATE *surfaceState = (RENDER_SURFACE_STATE *)kernel->getSurfaceStateHeap(rootDeviceIndex); + RENDER_SURFACE_STATE *surfaceState = (RENDER_SURFACE_STATE *)kernel->getSurfaceStateHeap(); EXPECT_EQ(debugSurface->getGpuAddress(), surfaceState->getSurfaceBaseAddress()); } @@ -1073,7 +1073,7 @@ HWTEST_F(CommandQueueCommandStreamTest, givenCsrWithDebugSurfaceAllocatedWhenSet cmdQ.setupDebugSurface(kernel.get()); EXPECT_EQ(debugSurface, commandStreamReceiver.getDebugSurfaceAllocation()); - RENDER_SURFACE_STATE *surfaceState = (RENDER_SURFACE_STATE *)kernel->getSurfaceStateHeap(rootDeviceIndex); + RENDER_SURFACE_STATE *surfaceState = (RENDER_SURFACE_STATE *)kernel->getSurfaceStateHeap(); EXPECT_EQ(debugSurface->getGpuAddress(), surfaceState->getSurfaceBaseAddress()); } diff --git a/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp b/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp index 0b79c73bc1..735fc8ab3e 100644 --- a/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp +++ b/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp @@ -153,7 +153,7 @@ HWTEST_F(DispatchWalkerTest, givenSimd1WhenSetGpgpuWalkerThreadDataThenSimdInWal } HWTEST_F(DispatchWalkerTest, WhenDispatchingWalkerThenCommandStreamMemoryIsntChanged) { - MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); auto &commandStream = pCmdQ->getCS(4096); @@ -204,7 +204,7 @@ HWTEST_F(DispatchWalkerTest, GivenNoLocalIdsWhenDispatchingWalkerThenWalkerIsDis threadPayload.UnusedPerThreadConstantPresent = 1; populateKernelDescriptor(kernelInfo.kernelDescriptor, threadPayload); - MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); auto &commandStream = pCmdQ->getCS(4096); @@ -248,7 +248,7 @@ HWTEST_F(DispatchWalkerTest, GivenNoLocalIdsWhenDispatchingWalkerThenWalkerIsDis } HWTEST_F(DispatchWalkerTest, GivenDefaultLwsAlgorithmWhenDispatchingWalkerThenDimensionsAreCorrect) { - MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel(program.get(), kernelInfo, *pClDevice); kernelInfo.workloadInfo.workDimOffset = 0; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); @@ -281,7 +281,7 @@ HWTEST_F(DispatchWalkerTest, GivenSquaredLwsAlgorithmWhenDispatchingWalkerThenDi DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeND.set(false); DebugManager.flags.EnableComputeWorkSizeSquared.set(true); - MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel(program.get(), kernelInfo, *pClDevice); kernelInfo.workloadInfo.workDimOffset = 0; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); @@ -311,7 +311,7 @@ HWTEST_F(DispatchWalkerTest, GivenSquaredLwsAlgorithmWhenDispatchingWalkerThenDi HWTEST_F(DispatchWalkerTest, GivenNdLwsAlgorithmWhenDispatchingWalkerThenDimensionsAreCorrect) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeND.set(true); - MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel(program.get(), kernelInfo, *pClDevice); kernelInfo.workloadInfo.workDimOffset = 0; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); @@ -342,7 +342,7 @@ HWTEST_F(DispatchWalkerTest, GivenOldLwsAlgorithmWhenDispatchingWalkerThenDimens DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeND.set(false); DebugManager.flags.EnableComputeWorkSizeSquared.set(false); - MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel(program.get(), kernelInfo, *pClDevice); kernelInfo.workloadInfo.workDimOffset = 0; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); @@ -370,7 +370,7 @@ HWTEST_F(DispatchWalkerTest, GivenOldLwsAlgorithmWhenDispatchingWalkerThenDimens } HWTEST_F(DispatchWalkerTest, GivenNumWorkGroupsWhenDispatchingWalkerThenNumWorkGroupsIsCorrectlySet) { - MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel(program.get(), kernelInfo, *pClDevice); kernelInfo.workloadInfo.numWorkGroupsOffset[0] = 0; kernelInfo.workloadInfo.numWorkGroupsOffset[1] = 4; kernelInfo.workloadInfo.numWorkGroupsOffset[2] = 8; @@ -406,7 +406,7 @@ HWTEST_F(DispatchWalkerTest, GivenGlobalWorkOffsetWhenDispatchingWalkerThenGloba kernelInfo.workloadInfo.globalWorkOffsetOffsets[0] = 0u; kernelInfo.workloadInfo.globalWorkOffsetOffsets[1] = 4u; kernelInfo.workloadInfo.globalWorkOffsetOffsets[2] = 8u; - MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {1, 2, 3}; @@ -438,7 +438,7 @@ HWTEST_F(DispatchWalkerTest, GivenGlobalWorkOffsetWhenDispatchingWalkerThenGloba HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndDefaultAlgorithmWhenDispatchingWalkerThenLwsIsCorrect) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeND.set(false); - MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel(program.get(), kernelInfo, *pClDevice); kernelInfo.workloadInfo.localWorkSizeOffsets[0] = 0; kernelInfo.workloadInfo.localWorkSizeOffsets[1] = 4; kernelInfo.workloadInfo.localWorkSizeOffsets[2] = 8; @@ -470,7 +470,7 @@ HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndDefaultAlgorithmWhenDispatch HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndNdOnWhenDispatchingWalkerThenLwsIsCorrect) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeND.set(true); - MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel(program.get(), kernelInfo, *pClDevice); kernelInfo.workloadInfo.localWorkSizeOffsets[0] = 0; kernelInfo.workloadInfo.localWorkSizeOffsets[1] = 4; kernelInfo.workloadInfo.localWorkSizeOffsets[2] = 8; @@ -503,7 +503,7 @@ HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndSquaredAlgorithmWhenDispatch DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(true); DebugManager.flags.EnableComputeWorkSizeND.set(false); - MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel(program.get(), kernelInfo, *pClDevice); kernelInfo.workloadInfo.localWorkSizeOffsets[0] = 0; kernelInfo.workloadInfo.localWorkSizeOffsets[1] = 4; kernelInfo.workloadInfo.localWorkSizeOffsets[2] = 8; @@ -536,7 +536,7 @@ HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndSquaredAlgorithmOffAndNdOffW DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(false); DebugManager.flags.EnableComputeWorkSizeND.set(false); - MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel(program.get(), kernelInfo, *pClDevice); kernelInfo.workloadInfo.localWorkSizeOffsets[0] = 0; kernelInfo.workloadInfo.localWorkSizeOffsets[1] = 4; kernelInfo.workloadInfo.localWorkSizeOffsets[2] = 8; @@ -566,7 +566,7 @@ HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndSquaredAlgorithmOffAndNdOffW } HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeWhenDispatchingWalkerThenLwsIsCorrect) { - MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel(program.get(), kernelInfo, *pClDevice); kernelInfo.workloadInfo.localWorkSizeOffsets[0] = 0; kernelInfo.workloadInfo.localWorkSizeOffsets[1] = 4; kernelInfo.workloadInfo.localWorkSizeOffsets[2] = 8; @@ -597,7 +597,7 @@ HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeWhenDispatchingWalkerThenLwsIsC } HWTEST_F(DispatchWalkerTest, GivenTwoSetsOfLwsOffsetsWhenDispatchingWalkerThenLwsIsCorrect) { - MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel(program.get(), kernelInfo, *pClDevice); kernelInfo.workloadInfo.localWorkSizeOffsets[0] = 0; kernelInfo.workloadInfo.localWorkSizeOffsets[1] = 4; kernelInfo.workloadInfo.localWorkSizeOffsets[2] = 8; @@ -634,13 +634,13 @@ HWTEST_F(DispatchWalkerTest, GivenTwoSetsOfLwsOffsetsWhenDispatchingWalkerThenLw } HWTEST_F(DispatchWalkerTest, GivenSplitKernelWhenDispatchingWalkerThenLwsIsCorrect) { - MockKernel kernel1(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel1(program.get(), kernelInfo, *pClDevice); kernelInfo.workloadInfo.localWorkSizeOffsets[0] = 0; kernelInfo.workloadInfo.localWorkSizeOffsets[1] = 4; kernelInfo.workloadInfo.localWorkSizeOffsets[2] = 8; ASSERT_EQ(CL_SUCCESS, kernel1.initialize()); - MockKernel kernel2(program.get(), MockKernel::toKernelInfoContainer(kernelInfoWithSampler, rootDeviceIndex), *pClDevice); + MockKernel kernel2(program.get(), kernelInfoWithSampler, *pClDevice); kernelInfoWithSampler.workloadInfo.localWorkSizeOffsets[0] = 12; kernelInfoWithSampler.workloadInfo.localWorkSizeOffsets[1] = 16; kernelInfoWithSampler.workloadInfo.localWorkSizeOffsets[2] = 20; @@ -684,8 +684,8 @@ HWTEST_F(DispatchWalkerTest, GivenSplitKernelWhenDispatchingWalkerThenLwsIsCorre } HWTEST_F(DispatchWalkerTest, GivenSplitWalkerWhenDispatchingWalkerThenLwsIsCorrect) { - MockKernel kernel1(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex), *pClDevice); - MockKernel mainKernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel1(program.get(), kernelInfo, *pClDevice); + MockKernel mainKernel(program.get(), kernelInfo, *pClDevice); kernelInfo.workloadInfo.localWorkSizeOffsets[0] = 0; kernelInfo.workloadInfo.localWorkSizeOffsets[1] = 4; kernelInfo.workloadInfo.localWorkSizeOffsets[2] = 8; @@ -747,7 +747,7 @@ HWTEST_F(DispatchWalkerTest, GivenSplitWalkerWhenDispatchingWalkerThenLwsIsCorre } HWTEST_F(DispatchWalkerTest, GivenBlockedQueueWhenDispatchingWalkerThenCommandSteamIsNotConsumed) { - MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; @@ -783,7 +783,7 @@ HWTEST_F(DispatchWalkerTest, GivenBlockedQueueWhenDispatchingWalkerThenCommandSt } HWTEST_F(DispatchWalkerTest, GivenBlockedQueueWhenDispatchingWalkerThenRequiredHeaSizesAreTakenFromKernel) { - MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; @@ -810,9 +810,9 @@ HWTEST_F(DispatchWalkerTest, GivenBlockedQueueWhenDispatchingWalkerThenRequiredH Vec3 localWorkgroupSize(workGroupSize); - auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(rootDeviceIndex, kernel); - auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(rootDeviceIndex, kernel, Math::computeTotalElementsCount(localWorkgroupSize)); - auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(kernel, rootDeviceIndex); + auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(kernel); + auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(kernel, Math::computeTotalElementsCount(localWorkgroupSize)); + auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(kernel); EXPECT_LE(expectedSizeDSH, blockedCommandsData->dsh->getMaxAvailableSpace()); EXPECT_LE(expectedSizeIOH, blockedCommandsData->ioh->getMaxAvailableSpace()); @@ -843,7 +843,7 @@ HWTEST_F(DispatchWalkerTest, givenBlockedEnqueueWhenObtainingCommandStreamThenAl } HWTEST_F(DispatchWalkerTest, GivenBlockedQueueWhenDispatchingWalkerThenRequiredHeapSizesAreTakenFromMdi) { - MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); MockMultiDispatchInfo multiDispatchInfo(pClDevice, &kernel); @@ -871,7 +871,7 @@ HWTEST_F(DispatchWalkerTest, GivenBlockedQueueWhenDispatchingWalkerThenRequiredH } HWTEST_F(DispatchWalkerTest, givenBlockedQueueWhenDispatchWalkerIsCalledThenCommandStreamHasGpuAddress) { - MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); MockMultiDispatchInfo multiDispatchInfo(pClDevice, &kernel); @@ -892,7 +892,7 @@ HWTEST_F(DispatchWalkerTest, givenBlockedQueueWhenDispatchWalkerIsCalledThenComm } HWTEST_F(DispatchWalkerTest, givenThereAreAllocationsForReuseWhenDispatchWalkerIsCalledThenCommandStreamObtainsReusableAllocation) { - MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); MockMultiDispatchInfo multiDispatchInfo(pClDevice, &kernel); @@ -919,9 +919,9 @@ HWTEST_F(DispatchWalkerTest, givenThereAreAllocationsForReuseWhenDispatchWalkerI } HWTEST_F(DispatchWalkerTest, GivenMultipleKernelsWhenDispatchingWalkerThenWorkDimensionsAreCorrect) { - MockKernel kernel1(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel1(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel1.initialize()); - MockKernel kernel2(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel2(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel2.initialize()); MockMultiDispatchInfo multiDispatchInfo(pClDevice, std::vector({&kernel1, &kernel2})); @@ -954,9 +954,9 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, GivenMultipleKernelsWhenDispatch auto gpuAddress1 = kernelIsaAllocation->getGpuAddressToPatch(); auto gpuAddress2 = kernelIsaWithSamplerAllocation->getGpuAddressToPatch(); - MockKernel kernel1(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel1(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel1.initialize()); - MockKernel kernel2(program.get(), MockKernel::toKernelInfoContainer(kernelInfoWithSampler, rootDeviceIndex), *pClDevice); + MockKernel kernel2(program.get(), kernelInfoWithSampler, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel2.initialize()); MockMultiDispatchInfo multiDispatchInfo(pClDevice, std::vector({&kernel1, &kernel2})); @@ -1045,9 +1045,9 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, GivenMultipleKernelsWhenDispatch HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, GivenMultipleKernelsWhenDispatchingWalkerThenGpgpuWalkerIdOffsetIsProgrammedCorrectly) { using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; - MockKernel kernel1(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel1(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel1.initialize()); - MockKernel kernel2(program.get(), MockKernel::toKernelInfoContainer(kernelInfoWithSampler, rootDeviceIndex), *pClDevice); + MockKernel kernel2(program.get(), kernelInfoWithSampler, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel2.initialize()); MockMultiDispatchInfo multiDispatchInfo(pClDevice, std::vector({&kernel1, &kernel2})); @@ -1090,9 +1090,9 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, GivenMultipleKernelsWhenDispatch HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, GivenMultipleKernelsWhenDispatchingWalkerThenThreadGroupIdStartingCoordinatesAreProgrammedCorrectly) { using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; - MockKernel kernel1(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel1(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel1.initialize()); - MockKernel kernel2(program.get(), MockKernel::toKernelInfoContainer(kernelInfoWithSampler, rootDeviceIndex), *pClDevice); + MockKernel kernel2(program.get(), kernelInfoWithSampler, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel2.initialize()); MockMultiDispatchInfo multiDispatchInfo(pClDevice, std::vector({&kernel1, &kernel2})); @@ -1139,7 +1139,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, GivenMultipleKernelsWhenDispatch HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, GivenMultipleDispatchInfoAndSameKernelWhenDispatchingWalkerThenGpgpuWalkerThreadGroupIdStartingCoordinatesAreCorrectlyProgrammed) { using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; - MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); DispatchInfo di1(pClDevice, &kernel, 1, {100, 1, 1}, {10, 1, 1}, {0, 0, 0}, {100, 1, 1}, {10, 1, 1}, {10, 1, 1}, {10, 1, 1}, {0, 0, 0}); @@ -1192,7 +1192,7 @@ HWTEST_F(DispatchWalkerTest, GivenCacheFlushAfterWalkerDisabledWhenAllocationReq DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(0); - MockKernel kernel1(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel1(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel1.initialize()); kernel1.kernelArgRequiresCacheFlush.resize(1); MockGraphicsAllocation cacheRequiringAllocation; @@ -1225,9 +1225,9 @@ HWTEST_F(DispatchWalkerTest, GivenCacheFlushAfterWalkerEnabledWhenWalkerWithTwoK DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); - MockKernel kernel1(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel1(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel1.initialize()); - MockKernel kernel2(program.get(), MockKernel::toKernelInfoContainer(kernelInfoWithSampler, rootDeviceIndex), *pClDevice); + MockKernel kernel2(program.get(), kernelInfoWithSampler, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel2.initialize()); kernel1.kernelArgRequiresCacheFlush.resize(1); @@ -1263,9 +1263,9 @@ HWTEST_F(DispatchWalkerTest, GivenCacheFlushAfterWalkerEnabledWhenTwoWalkersForQ DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); - MockKernel kernel1(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel1(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel1.initialize()); - MockKernel kernel2(program.get(), MockKernel::toKernelInfoContainer(kernelInfoWithSampler, rootDeviceIndex), *pClDevice); + MockKernel kernel2(program.get(), kernelInfoWithSampler, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel2.initialize()); kernel1.kernelArgRequiresCacheFlush.resize(1); @@ -1328,7 +1328,7 @@ HWTEST_P(DispatchWalkerTestForAuxTranslation, givenKernelWhenAuxToNonAuxWhenTran BuiltinDispatchInfoBuilder &baseBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pClDevice); auto &builder = static_cast &>(baseBuilder); - MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel(program.get(), kernelInfo, *pClDevice); kernelInfo.workloadInfo.workDimOffset = 0; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); @@ -1382,7 +1382,7 @@ HWTEST_P(DispatchWalkerTestForAuxTranslation, givenKernelWhenNonAuxToAuxWhenTran BuiltinDispatchInfoBuilder &baseBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pClDevice); auto &builder = static_cast &>(baseBuilder); - MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel(program.get(), kernelInfo, *pClDevice); kernelInfo.workloadInfo.workDimOffset = 0; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); diff --git a/opencl/test/unit_test/command_queue/enqueue_copy_buffer_rect_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_copy_buffer_rect_tests.cpp index 02ca9b4a2e..2b7032f09c 100644 --- a/opencl/test/unit_test/command_queue/enqueue_copy_buffer_rect_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_copy_buffer_rect_tests.cpp @@ -184,7 +184,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DTh EXPECT_NE(dshBefore, pDSH->getUsed()); EXPECT_NE(iohBefore, pIOH->getUsed()); - if (kernel->requiresSshForBuffers(rootDeviceIndex)) { + if (kernel->requiresSshForBuffers()) { EXPECT_NE(sshBefore, pSSH->getUsed()); } } @@ -212,8 +212,8 @@ HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRectStatelessThenStatelessK auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); - EXPECT_TRUE(kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb()); - EXPECT_FALSE(kernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].pureStatefulBufferAccess); + EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb()); + EXPECT_FALSE(kernel->getKernelInfo().kernelArgInfo[0].pureStatefulBufferAccess); } HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenL3ProgrammingIsCorrect) { diff --git a/opencl/test/unit_test/command_queue/enqueue_copy_buffer_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_copy_buffer_tests.cpp index a9d02fd282..f2fada936a 100644 --- a/opencl/test/unit_test/command_queue/enqueue_copy_buffer_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_copy_buffer_tests.cpp @@ -205,7 +205,7 @@ HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferThenIndirectDataGetsAdded) { EXPECT_TRUE(UnitTestHelper::evaluateDshUsage(dshBefore, pDSH->getUsed(), kernel, rootDeviceIndex)); EXPECT_NE(iohBefore, pIOH->getUsed()); - if (kernel->requiresSshForBuffers(rootDeviceIndex)) { + if (kernel->requiresSshForBuffers()) { EXPECT_NE(sshBefore, pSSH->getUsed()); } } @@ -231,8 +231,8 @@ HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferStatelessThenStatelessKernelIsU EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); - EXPECT_TRUE(kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb()); - EXPECT_FALSE(kernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].pureStatefulBufferAccess); + EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb()); + EXPECT_FALSE(kernel->getKernelInfo().kernelArgInfo[0].pureStatefulBufferAccess); } HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferThenL3ProgrammingIsCorrect) { diff --git a/opencl/test/unit_test/command_queue/enqueue_copy_buffer_to_image_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_copy_buffer_to_image_tests.cpp index 9eb4066b6a..1ce2de4a3c 100644 --- a/opencl/test/unit_test/command_queue/enqueue_copy_buffer_to_image_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_copy_buffer_to_image_tests.cpp @@ -170,7 +170,7 @@ HWTEST_F(EnqueueCopyBufferToImageTest, WhenCopyingBufferToImageThenSurfaceStateI mockCmdQ->storeMultiDispatchInfo = true; enqueueCopyBufferToImage(); - auto index = mockCmdQ->storedMultiDispatchInfo.begin()->getKernel()->getKernelInfo(rootDeviceIndex).kernelArgInfo[1].offsetHeap / sizeof(RENDER_SURFACE_STATE); + auto index = mockCmdQ->storedMultiDispatchInfo.begin()->getKernel()->getKernelInfo().kernelArgInfo[1].offsetHeap / sizeof(RENDER_SURFACE_STATE); const auto &surfaceState = getSurfaceState(&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0), static_cast(index)); const auto &imageDesc = dstImage->getImageDesc(); diff --git a/opencl/test/unit_test/command_queue/enqueue_copy_image_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_copy_image_tests.cpp index f92bc124dd..eecb0620b5 100644 --- a/opencl/test/unit_test/command_queue/enqueue_copy_image_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_copy_image_tests.cpp @@ -172,7 +172,7 @@ HWTEST_F(EnqueueCopyImageTest, WhenCopyingImageThenSurfaceStateIsCorrect) { enqueueCopyImage(); for (uint32_t i = 0; i < 2; ++i) { - auto index = mockCmdQ->storedMultiDispatchInfo.begin()->getKernel()->getKernelInfo(rootDeviceIndex).kernelArgInfo[i].offsetHeap / sizeof(RENDER_SURFACE_STATE); + auto index = mockCmdQ->storedMultiDispatchInfo.begin()->getKernel()->getKernelInfo().kernelArgInfo[i].offsetHeap / sizeof(RENDER_SURFACE_STATE); const auto &surfaceState = getSurfaceState(&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0), static_cast(index)); const auto &imageDesc = dstImage->getImageDesc(); EXPECT_EQ(imageDesc.image_width, surfaceState.getWidth()); @@ -191,11 +191,11 @@ HWTEST_F(EnqueueCopyImageTest, WhenCopyingImageThenSurfaceStateIsCorrect) { EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4, surfaceState.getSurfaceVerticalAlignment()); } - auto srcIndex = mockCmdQ->storedMultiDispatchInfo.begin()->getKernel()->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].offsetHeap / sizeof(RENDER_SURFACE_STATE); + auto srcIndex = mockCmdQ->storedMultiDispatchInfo.begin()->getKernel()->getKernelInfo().kernelArgInfo[0].offsetHeap / sizeof(RENDER_SURFACE_STATE); const auto &srcSurfaceState = getSurfaceState(&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0), static_cast(srcIndex)); EXPECT_EQ(srcImage->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress(), srcSurfaceState.getSurfaceBaseAddress()); - auto dstIndex = mockCmdQ->storedMultiDispatchInfo.begin()->getKernel()->getKernelInfo(rootDeviceIndex).kernelArgInfo[1].offsetHeap / sizeof(RENDER_SURFACE_STATE); + auto dstIndex = mockCmdQ->storedMultiDispatchInfo.begin()->getKernel()->getKernelInfo().kernelArgInfo[1].offsetHeap / sizeof(RENDER_SURFACE_STATE); const auto &dstSurfaceState = getSurfaceState(&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0), static_cast(dstIndex)); EXPECT_EQ(dstImage->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress(), dstSurfaceState.getSurfaceBaseAddress()); } diff --git a/opencl/test/unit_test/command_queue/enqueue_debug_kernel_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_debug_kernel_tests.cpp index 598086d171..fe2fb3f3ca 100644 --- a/opencl/test/unit_test/command_queue/enqueue_debug_kernel_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_debug_kernel_tests.cpp @@ -161,7 +161,7 @@ HWTEST_F(EnqueueDebugKernelSimpleTest, givenKernelFromProgramWithDebugEnabledWhe std::unique_ptr> mockCmdQ(new GMockCommandQueueHw(context, pClDevice, 0)); mockCmdQ->getGpgpuCommandStreamReceiver().allocateDebugSurface(SipKernel::maxDbgSurfaceSize); - EXPECT_TRUE(isValidOffset(kernel->getDefaultKernelInfo().kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful)); + EXPECT_TRUE(isValidOffset(kernel->getKernelInfo().kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful)); EXPECT_CALL(*mockCmdQ.get(), setupDebugSurface(kernel.get())).Times(1).RetiresOnSaturation(); @@ -177,7 +177,7 @@ HWTEST_F(EnqueueDebugKernelSimpleTest, givenKernelWithoutSystemThreadSurfaceWhen std::unique_ptr kernel(MockKernel::create(*pDevice, &program)); kernel->initialize(); - EXPECT_FALSE(isValidOffset(kernel->getDefaultKernelInfo().kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful)); + EXPECT_FALSE(isValidOffset(kernel->getKernelInfo().kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful)); std::unique_ptr> mockCmdQ(new GMockCommandQueueHw(context, pClDevice, 0)); diff --git a/opencl/test/unit_test/command_queue/enqueue_fill_buffer_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_fill_buffer_tests.cpp index e65311c135..1eba9b67b1 100644 --- a/opencl/test/unit_test/command_queue/enqueue_fill_buffer_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_fill_buffer_tests.cpp @@ -118,7 +118,7 @@ HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenIndirectDataGetsAdded) EXPECT_TRUE(UnitTestHelper::evaluateDshUsage(dshBefore, pDSH->getUsed(), kernel, rootDeviceIndex)); EXPECT_NE(iohBefore, pIOH->getUsed()); - if (kernel->requiresSshForBuffers(rootDeviceIndex)) { + if (kernel->requiresSshForBuffers()) { EXPECT_NE(sshBefore, pSSH->getUsed()); } @@ -147,7 +147,7 @@ HWTEST_F(EnqueueFillBufferCmdTests, FillBufferRightLeftover) { EXPECT_EQ(1u, mdi.size()); auto kernel = mdi.begin()->getKernel(); - EXPECT_STREQ("FillBufferRightLeftover", kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName.c_str()); + EXPECT_STREQ("FillBufferRightLeftover", kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str()); context.getMemoryManager()->freeGraphicsMemory(patternAllocation); } @@ -174,7 +174,7 @@ HWTEST_F(EnqueueFillBufferCmdTests, FillBufferMiddle) { EXPECT_EQ(1u, mdi.size()); auto kernel = mdi.begin()->getKernel(); - EXPECT_STREQ("FillBufferMiddle", kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName.c_str()); + EXPECT_STREQ("FillBufferMiddle", kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str()); context.getMemoryManager()->freeGraphicsMemory(patternAllocation); } @@ -201,7 +201,7 @@ HWTEST_F(EnqueueFillBufferCmdTests, FillBufferLeftLeftover) { EXPECT_EQ(1u, mdi.size()); auto kernel = mdi.begin()->getKernel(); - EXPECT_STREQ("FillBufferLeftLeftover", kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName.c_str()); + EXPECT_STREQ("FillBufferLeftLeftover", kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str()); context.getMemoryManager()->freeGraphicsMemory(patternAllocation); } @@ -364,8 +364,8 @@ HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferStatelessThenStatelessKerne auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); - EXPECT_TRUE(kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb()); - EXPECT_FALSE(kernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].pureStatefulBufferAccess); + EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb()); + EXPECT_FALSE(kernel->getKernelInfo().kernelArgInfo[0].pureStatefulBufferAccess); context.getMemoryManager()->freeGraphicsMemory(patternAllocation); } diff --git a/opencl/test/unit_test/command_queue/enqueue_fill_image_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_fill_image_tests.cpp index a642a75664..2e3bea2d20 100644 --- a/opencl/test/unit_test/command_queue/enqueue_fill_image_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_fill_image_tests.cpp @@ -176,7 +176,7 @@ HWTEST_F(EnqueueFillImageTest, WhenFillingImageThenSurfaceStateIsCorrect) { mockCmdQ->storeMultiDispatchInfo = true; enqueueFillImage(); - auto index = mockCmdQ->storedMultiDispatchInfo.begin()->getKernel()->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].offsetHeap / sizeof(RENDER_SURFACE_STATE); + auto index = mockCmdQ->storedMultiDispatchInfo.begin()->getKernel()->getKernelInfo().kernelArgInfo[0].offsetHeap / sizeof(RENDER_SURFACE_STATE); const auto &surfaceState = getSurfaceState(&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0), static_cast(index)); const auto &imageDesc = image->getImageDesc(); diff --git a/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp index 53304de7ca..38bb94aac1 100644 --- a/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp @@ -578,7 +578,7 @@ HWTEST_F(EnqueueHandlerTest, givenKernelUsingSyncBufferWhenEnqueuingKernelThenSs kernel->initialize(); auto bindingTableState = reinterpret_cast( - ptrOffset(kernel->getSurfaceStateHeap(rootDeviceIndex), sPatchBindingTableState.Offset)); + ptrOffset(kernel->getSurfaceStateHeap(), sPatchBindingTableState.Offset)); bindingTableState->setSurfaceStatePointer(0); auto mockCmdQ = clUniquePtr(new MockCommandQueueHw(context, pClDevice, 0)); diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp index faeae8cf8a..7204d0830a 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp @@ -433,7 +433,7 @@ HWTEST_F(EnqueueKernelTest, addsIndirectData) { callOneWorkItemNDRKernel(); EXPECT_TRUE(UnitTestHelper::evaluateDshUsage(dshBefore, pDSH->getUsed(), pKernel, rootDeviceIndex)); EXPECT_NE(iohBefore, pIOH->getUsed()); - if (pKernel->requiresSshForBuffers(rootDeviceIndex) || (pKernel->getKernelInfo(rootDeviceIndex).patchInfo.imageMemObjKernelArgs.size() > 0)) { + if (pKernel->requiresSshForBuffers() || (pKernel->getKernelInfo().patchInfo.imageMemObjKernelArgs.size() > 0)) { EXPECT_NE(sshBefore, pSSH->getUsed()); } } diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp index 919a400f10..80846de7e4 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp @@ -918,13 +918,13 @@ HWTEST_F(EnqueueAuxKernelTests, givenKernelWithRequiredAuxTranslationWhenEnqueue // before kernel EXPECT_EQ(1u, std::get(cmdQ.dispatchAuxTranslationInputs.at(0))); // aux before NDR auto kernelBefore = std::get(cmdQ.dispatchAuxTranslationInputs.at(0)); - EXPECT_EQ("fullCopy", kernelBefore->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName); + EXPECT_EQ("fullCopy", kernelBefore->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName); EXPECT_TRUE(kernelBefore->isBuiltIn); // after kernel EXPECT_EQ(3u, std::get(cmdQ.dispatchAuxTranslationInputs.at(1))); // aux + NDR + aux auto kernelAfter = std::get(cmdQ.dispatchAuxTranslationInputs.at(1)); - EXPECT_EQ("fullCopy", kernelAfter->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName); + EXPECT_EQ("fullCopy", kernelAfter->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName); EXPECT_TRUE(kernelAfter->isBuiltIn); } diff --git a/opencl/test/unit_test/command_queue/enqueue_read_buffer_rect_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_read_buffer_rect_tests.cpp index dd88bfce4a..261206e67a 100644 --- a/opencl/test/unit_test/command_queue/enqueue_read_buffer_rect_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_read_buffer_rect_tests.cpp @@ -197,7 +197,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferRectTest, WhenReadingBufferThenIndi EXPECT_NE(dshBefore, pDSH->getUsed()); EXPECT_NE(iohBefore, pIOH->getUsed()); - if (kernel->requiresSshForBuffers(rootDeviceIndex)) { + if (kernel->requiresSshForBuffers()) { EXPECT_NE(sshBefore, pSSH->getUsed()); } } @@ -570,7 +570,7 @@ HWTEST_F(EnqueueReadWriteBufferRectDispatch, givenOffsetResultingInMisalignedPtr parseCommands(*cmdQ); - auto &kernelInfo = kernel->getKernelInfo(device->getRootDeviceIndex()); + auto &kernelInfo = kernel->getKernelInfo(); if (hwInfo->capabilityTable.gpuAddressSpace == MemoryConstants::max48BitAddress) { const auto &surfaceStateDst = getSurfaceState(&cmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0), 1); diff --git a/opencl/test/unit_test/command_queue/enqueue_read_buffer_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_read_buffer_tests.cpp index 01aca975ac..e355494a4b 100644 --- a/opencl/test/unit_test/command_queue/enqueue_read_buffer_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_read_buffer_tests.cpp @@ -159,7 +159,7 @@ HWTEST_F(EnqueueReadBufferTypeTest, addsIndirectData) { EXPECT_TRUE(UnitTestHelper::evaluateDshUsage(dshBefore, pDSH->getUsed(), kernel, rootDeviceIndex)); EXPECT_NE(iohBefore, pIOH->getUsed()); - if (kernel->requiresSshForBuffers(rootDeviceIndex)) { + if (kernel->requiresSshForBuffers()) { EXPECT_NE(sshBefore, pSSH->getUsed()); } } diff --git a/opencl/test/unit_test/command_queue/enqueue_svm_mem_copy_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_svm_mem_copy_tests.cpp index e285886ee8..329e6ceda9 100644 --- a/opencl/test/unit_test/command_queue/enqueue_svm_mem_copy_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_svm_mem_copy_tests.cpp @@ -137,7 +137,7 @@ HWTEST_F(EnqueueSvmMemCopyTest, givenEnqueueSVMMemcpyWhenUsingCopyBufferToBuffer EXPECT_EQ(Vec3(256 / middleElSize, 1, 1), di->getGWS()); auto kernel = mdi->begin()->getKernel(); - EXPECT_EQ("CopyBufferToBufferMiddle", kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName); + EXPECT_EQ("CopyBufferToBufferMiddle", kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName); } HWTEST_F(EnqueueSvmMemCopyTest, givenEnqueueSVMMemcpyWhenUsingCopyBufferToBufferBuilderAndSrcHostPtrThenItConfiguredWithBuiltinOpsAndProducesDispatchInfo) { diff --git a/opencl/test/unit_test/command_queue/enqueue_svm_mem_fill_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_svm_mem_fill_tests.cpp index 4e85c9c6c3..a3c5e9cf40 100644 --- a/opencl/test/unit_test/command_queue/enqueue_svm_mem_fill_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_svm_mem_fill_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2020 Intel Corporation + * Copyright (C) 2017-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -139,7 +139,7 @@ HWTEST_P(EnqueueSvmMemFillTest, givenEnqueueSVMMemFillWhenUsingFillBufferBuilder EXPECT_EQ(Vec3(256 / middleElSize, 1, 1), di->getGWS()); auto kernel = di->getKernel(); - EXPECT_STREQ("FillBufferMiddle", kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName.c_str()); + EXPECT_STREQ("FillBufferMiddle", kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str()); } INSTANTIATE_TEST_CASE_P(size_t, diff --git a/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp index 43f8295e12..07a147ff42 100644 --- a/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp @@ -738,7 +738,7 @@ TEST_F(EnqueueSvmTest, GivenSvmAllocationWhenEnqueingKernelThenSuccessIsReturned std::unique_ptr program(Program::createBuiltInFromSource("FillBufferBytes", context, context->getDevices(), &retVal)); program->build(program->getDevices(), nullptr, false); - std::unique_ptr kernel(Kernel::create(program.get(), program->getKernelInfosForKernel("FillBufferBytes"), *context->getDevice(0), &retVal)); + std::unique_ptr kernel(Kernel::create(program.get(), program->getKernelInfoForKernel("FillBufferBytes"), *context->getDevice(0), &retVal)); kernel->setSvmKernelExecInfo(pSvmAlloc); diff --git a/opencl/test/unit_test/command_queue/enqueue_write_buffer_rect_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_write_buffer_rect_tests.cpp index cd9909923a..02316cfc69 100644 --- a/opencl/test/unit_test/command_queue/enqueue_write_buffer_rect_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_write_buffer_rect_tests.cpp @@ -171,7 +171,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueWriteBufferRectTest, WhenWritingBufferThenInd EXPECT_NE(dshBefore, pDSH->getUsed()); EXPECT_NE(iohBefore, pIOH->getUsed()); - if (kernel->requiresSshForBuffers(rootDeviceIndex)) { + if (kernel->requiresSshForBuffers()) { EXPECT_NE(sshBefore, pSSH->getUsed()); } } @@ -568,7 +568,7 @@ HWTEST_F(EnqueueReadWriteBufferRectDispatch, givenOffsetResultingInMisalignedPtr cmdQ->finish(); parseCommands(*cmdQ); - auto &kernelInfo = kernel->getKernelInfo(device->getRootDeviceIndex()); + auto &kernelInfo = kernel->getKernelInfo(); if (hwInfo->capabilityTable.gpuAddressSpace == MemoryConstants::max48BitAddress) { const auto &surfaceState = getSurfaceState(&cmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0), 0); diff --git a/opencl/test/unit_test/command_queue/enqueue_write_buffer_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_write_buffer_tests.cpp index f3a51b2270..baadd47f7e 100644 --- a/opencl/test/unit_test/command_queue/enqueue_write_buffer_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_write_buffer_tests.cpp @@ -157,7 +157,7 @@ HWTEST_F(EnqueueWriteBufferTypeTest, WhenWritingBufferThenIndirectDataIsAdded) { EXPECT_TRUE(UnitTestHelper::evaluateDshUsage(dshBefore, pDSH->getUsed(), kernel, rootDeviceIndex)); EXPECT_NE(iohBefore, pIOH->getUsed()); - if (kernel->requiresSshForBuffers(rootDeviceIndex)) { + if (kernel->requiresSshForBuffers()) { EXPECT_NE(sshBefore, pSSH->getUsed()); } } diff --git a/opencl/test/unit_test/command_queue/enqueue_write_image_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_write_image_tests.cpp index 333005805c..c65b53aca1 100644 --- a/opencl/test/unit_test/command_queue/enqueue_write_image_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_write_image_tests.cpp @@ -180,7 +180,7 @@ HWTEST_F(EnqueueWriteImageTest, WhenWritingImageThenSurfaceStateIsProgrammedCorr mockCmdQ->storeMultiDispatchInfo = true; enqueueWriteImage(); - auto index = mockCmdQ->storedMultiDispatchInfo.begin()->getKernel()->getKernelInfo(rootDeviceIndex).kernelArgInfo[1].offsetHeap / sizeof(RENDER_SURFACE_STATE); + auto index = mockCmdQ->storedMultiDispatchInfo.begin()->getKernel()->getKernelInfo().kernelArgInfo[1].offsetHeap / sizeof(RENDER_SURFACE_STATE); const auto &surfaceState = getSurfaceState(&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0), static_cast(index)); diff --git a/opencl/test/unit_test/command_queue/get_size_required_buffer_tests.cpp b/opencl/test/unit_test/command_queue/get_size_required_buffer_tests.cpp index 02edfc2a22..9912cc7a27 100644 --- a/opencl/test/unit_test/command_queue/get_size_required_buffer_tests.cpp +++ b/opencl/test/unit_test/command_queue/get_size_required_buffer_tests.cpp @@ -400,7 +400,7 @@ HWTEST_F(GetSizeRequiredBufferTest, givenMultipleKernelRequiringSshWhenTotalSize builder.buildDispatchInfos(multiDispatchInfo); builder.buildDispatchInfos(multiDispatchInfo); - auto sizeSSH = multiDispatchInfo.begin()->getKernel()->getSurfaceStateHeapSize(rootDeviceIndex); + auto sizeSSH = multiDispatchInfo.begin()->getKernel()->getSurfaceStateHeapSize(); sizeSSH += sizeSSH ? FamilyType::BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE : 0; sizeSSH = alignUp(sizeSSH, MemoryConstants::cacheLineSize); @@ -437,9 +437,9 @@ HWTEST_F(GetSizeRequiredBufferTest, GivenHelloWorldKernelWhenEnqueingKernelThenH auto sshAfter = pSSH->getUsed(); auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *pCmdQ, KernelFixture::pKernel, {}); - auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(rootDeviceIndex, *KernelFixture::pKernel); - auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(rootDeviceIndex, *KernelFixture::pKernel, workSize[0]); - auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*KernelFixture::pKernel, rootDeviceIndex); + auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(*KernelFixture::pKernel); + auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(*KernelFixture::pKernel, workSize[0]); + auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*KernelFixture::pKernel); // Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended. expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); @@ -476,9 +476,9 @@ HWTEST_F(GetSizeRequiredBufferTest, GivenKernelWithSimpleArgWhenEnqueingKernelTh auto sshAfter = pSSH->getUsed(); auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *pCmdQ, KernelFixture::pKernel, {}); - auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(rootDeviceIndex, *KernelFixture::pKernel); - auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(rootDeviceIndex, *KernelFixture::pKernel, workSize[0]); - auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*KernelFixture::pKernel, rootDeviceIndex); + auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(*KernelFixture::pKernel); + auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(*KernelFixture::pKernel, workSize[0]); + auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*KernelFixture::pKernel); EXPECT_EQ(0u, expectedSizeIOH % GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); EXPECT_EQ(0u, expectedSizeDSH % 64); diff --git a/opencl/test/unit_test/command_queue/get_size_required_image_tests.cpp b/opencl/test/unit_test/command_queue/get_size_required_image_tests.cpp index ecf2059a17..c5f39e0b6f 100644 --- a/opencl/test/unit_test/command_queue/get_size_required_image_tests.cpp +++ b/opencl/test/unit_test/command_queue/get_size_required_image_tests.cpp @@ -95,9 +95,9 @@ HWTEST_F(GetSizeRequiredImageTest, WhenCopyingImageThenHeapsAndCommandBufferCons auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_COPY_IMAGE, false, false, *pCmdQ, kernel, {}); - auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(rootDeviceIndex, *kernel); - auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(rootDeviceIndex, *kernel); - auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel, rootDeviceIndex); + auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(*kernel); + auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(*kernel); + auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel); // Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended. expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); @@ -121,14 +121,14 @@ HWTEST_F(GetSizeRequiredImageTest, WhenCopyingReadWriteImageThenHeapsAndCommandB std::unique_ptr program(Program::createBuiltInFromSource("CopyImageToImage3d", context, context->getDevices(), nullptr)); program->build(program->getDevices(), nullptr, false); - std::unique_ptr kernel(Kernel::create(program.get(), program->getKernelInfosForKernel("CopyImageToImage3d"), *context->getDevice(0), nullptr)); + std::unique_ptr kernel(Kernel::create(program.get(), program->getKernelInfoForKernel("CopyImageToImage3d"), *context->getDevice(0), nullptr)); EXPECT_NE(nullptr, kernel); // This kernel does not operate on OpenCL 2.0 Read and Write images - EXPECT_FALSE(kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages); + EXPECT_FALSE(kernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages); // Simulate that the kernel actually operates on OpenCL 2.0 Read and Write images. // Such kernel may require special WA DisableLSQCROPERFforOCL during construction of Command Buffer - const_cast(kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor).kernelAttributes.flags.usesFencesForReadWriteImages = true; + const_cast(kernel->getKernelInfo().kernelDescriptor).kernelAttributes.flags.usesFencesForReadWriteImages = true; // Enqueue kernel that may require special WA DisableLSQCROPERFforOCL auto retVal = EnqueueKernelHelper<>::enqueueKernel(pCmdQ, kernel.get()); @@ -140,15 +140,15 @@ HWTEST_F(GetSizeRequiredImageTest, WhenCopyingReadWriteImageThenHeapsAndCommandB auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_COPY_IMAGE, false, false, *pCmdQ, kernel.get(), {}); - auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(rootDeviceIndex, *kernel.get()); - auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(rootDeviceIndex, *kernel.get()); - auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel.get(), rootDeviceIndex); + auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(*kernel.get()); + auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(*kernel.get()); + auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel.get()); // Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended. expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); expectedSizeCS = alignUp(expectedSizeCS, MemoryConstants::cacheLineSize); - const_cast(kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor).kernelAttributes.flags.usesFencesForReadWriteImages = false; + const_cast(kernel->getKernelInfo().kernelDescriptor).kernelAttributes.flags.usesFencesForReadWriteImages = false; EXPECT_GE(expectedSizeCS, usedAfterCS - usedBeforeCS); EXPECT_GE(expectedSizeDSH, usedAfterDSH - usedBeforeDSH); @@ -197,9 +197,9 @@ HWTEST_F(GetSizeRequiredImageTest, WhenReadingImageNonBlockingThenHeapsAndComman auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_READ_IMAGE, false, false, *pCmdQ, kernel, {}); - auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(rootDeviceIndex, *kernel); - auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(rootDeviceIndex, *kernel); - auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel, rootDeviceIndex); + auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(*kernel); + auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(*kernel); + auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel); // Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended. expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); @@ -252,9 +252,9 @@ HWTEST_F(GetSizeRequiredImageTest, WhenReadingImageBlockingThenHeapsAndCommandBu auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_READ_IMAGE, false, false, *pCmdQ, kernel, {}); - auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(rootDeviceIndex, *kernel); - auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(rootDeviceIndex, *kernel); - auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel, rootDeviceIndex); + auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(*kernel); + auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(*kernel); + auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel); // Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended. expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); @@ -307,9 +307,9 @@ HWTEST_F(GetSizeRequiredImageTest, WhenWritingImageNonBlockingThenHeapsAndComman auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_WRITE_IMAGE, false, false, *pCmdQ, kernel, {}); - auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(rootDeviceIndex, *kernel); - auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(rootDeviceIndex, *kernel); - auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel, rootDeviceIndex); + auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(*kernel); + auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(*kernel); + auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel); // Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended. expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); @@ -362,9 +362,9 @@ HWTEST_F(GetSizeRequiredImageTest, WhenWritingImageBlockingThenHeapsAndCommandBu auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_WRITE_IMAGE, false, false, *pCmdQ, kernel, {}); - auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(rootDeviceIndex, *kernel); - auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(rootDeviceIndex, *kernel); - auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel, rootDeviceIndex); + auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(*kernel); + auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(*kernel); + auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel); // Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended. expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); diff --git a/opencl/test/unit_test/command_queue/gl/windows/enqueue_kernel_gl_tests_windows.cpp b/opencl/test/unit_test/command_queue/gl/windows/enqueue_kernel_gl_tests_windows.cpp index 1f764d67fb..58f5050230 100644 --- a/opencl/test/unit_test/command_queue/gl/windows/enqueue_kernel_gl_tests_windows.cpp +++ b/opencl/test/unit_test/command_queue/gl/windows/enqueue_kernel_gl_tests_windows.cpp @@ -42,7 +42,7 @@ TEST_F(EnqueueKernelTest, givenKernelWithSharedObjArgsWhenEnqueueIsCalledThenRes pKernel->setArg(0, sizeof(cl_mem *), &sharedMem); pKernel->setArg(1, sizeof(cl_mem *), &nonSharedMem); EXPECT_TRUE(pKernel->isUsingSharedObjArgs()); - auto &kernelInfo = pKernel->getKernelInfo(rootDeviceIndex); + auto &kernelInfo = pKernel->getKernelInfo(); auto pKernelArg = (uint32_t *)(pKernel->getCrossThreadData() + kernelInfo.kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); diff --git a/opencl/test/unit_test/command_queue/sync_buffer_handler_tests.cpp b/opencl/test/unit_test/command_queue/sync_buffer_handler_tests.cpp index 1c0102812b..e2796738e6 100644 --- a/opencl/test/unit_test/command_queue/sync_buffer_handler_tests.cpp +++ b/opencl/test/unit_test/command_queue/sync_buffer_handler_tests.cpp @@ -183,7 +183,7 @@ HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenSshRequiredWhenPatchingSyncBuffer pDevice->allocateSyncBufferHandler(); auto syncBufferHandler = getSyncBufferHandler(); - auto surfaceState = reinterpret_cast(ptrOffset(kernel->getSurfaceStateHeap(rootDeviceIndex), + auto surfaceState = reinterpret_cast(ptrOffset(kernel->getSurfaceStateHeap(), sPatchAllocateSyncBuffer.SurfaceStateHeapOffset)); auto bufferAddress = syncBufferHandler->graphicsAllocation->getGpuAddress(); surfaceState->setSurfaceBaseAddress(bufferAddress + 1); diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp index c469ea7a92..db8baf5655 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp @@ -1711,7 +1711,8 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenBlockedKernelWhenItIsUnblocke auto mockProgram = std::make_unique(&mockContext, false, toClDeviceVector(*pClDevice)); auto pKernel = MockKernel::create(*pDevice, mockProgram.get(), numGrfRequired); - MultiDeviceKernel multiDeviceKernel(MockMultiDeviceKernel::toKernelVector(pKernel)); + auto kernelInfos = MockKernel::toKernelInfoContainer(pKernel->getKernelInfo(), rootDeviceIndex); + MultiDeviceKernel multiDeviceKernel(MockMultiDeviceKernel::toKernelVector(pKernel), kernelInfos); auto event = std::make_unique>(pCmdQ.get(), CL_COMMAND_MARKER, 0, 0); auto cmdStream = new LinearStream(pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties({pDevice->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER, pDevice->getDeviceBitfield()})); diff --git a/opencl/test/unit_test/context/driver_diagnostics_enqueue_tests.cpp b/opencl/test/unit_test/context/driver_diagnostics_enqueue_tests.cpp index 88afa15d0f..455258cd81 100644 --- a/opencl/test/unit_test/context/driver_diagnostics_enqueue_tests.cpp +++ b/opencl/test/unit_test/context/driver_diagnostics_enqueue_tests.cpp @@ -681,7 +681,7 @@ TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkS EXPECT_EQ(CL_SUCCESS, retVal); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE], - kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName.c_str(), + kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(), *kernel->localWorkSizeX, *kernel->localWorkSizeY, *kernel->localWorkSizeZ); @@ -696,7 +696,7 @@ TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkS EXPECT_EQ(CL_SUCCESS, retVal); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE], - kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName.c_str(), + kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(), *kernel->localWorkSizeX, *kernel->localWorkSizeY, *kernel->localWorkSizeZ); @@ -712,7 +712,7 @@ TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkS EXPECT_EQ(CL_SUCCESS, retVal); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE], - kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName.c_str(), + kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(), *kernel->localWorkSizeX, *kernel->localWorkSizeY, *kernel->localWorkSizeZ); @@ -726,7 +726,7 @@ TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkS EXPECT_EQ(CL_SUCCESS, retVal); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE], - kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName.c_str(), + kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(), *kernel->localWorkSizeX, *kernel->localWorkSizeY, *kernel->localWorkSizeZ); @@ -742,7 +742,7 @@ TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkS EXPECT_EQ(CL_SUCCESS, retVal); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE], - kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName.c_str(), + kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(), *kernel->localWorkSizeX, *kernel->localWorkSizeY, *kernel->localWorkSizeZ); @@ -758,7 +758,7 @@ TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkS EXPECT_EQ(CL_SUCCESS, retVal); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE], - kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName.c_str(), + kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(), *kernel->localWorkSizeX, *kernel->localWorkSizeY, *kernel->localWorkSizeZ); @@ -790,7 +790,7 @@ TEST_P(PerformanceHintEnqueueKernelBadSizeTest, GivenBadLocalWorkGroupSizeWhenEn EXPECT_EQ(CL_SUCCESS, retVal); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[BAD_LOCAL_WORKGROUP_SIZE], - localWorkGroupSize[0], localWorkGroupSize[1], localWorkGroupSize[2], kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName.c_str(), + localWorkGroupSize[0], localWorkGroupSize[1], localWorkGroupSize[2], kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(), computedLocalWorkgroupSize.x, computedLocalWorkgroupSize.y, computedLocalWorkgroupSize.z); EXPECT_TRUE(containsHint(expectedHint, userData)); } @@ -806,7 +806,7 @@ TEST_F(PerformanceHintEnqueueKernelPrintfTest, GivenKernelWithPrintfWhenEnqueueK retVal = pCmdQ->enqueueKernel(kernel, 3, nullptr, globalWorkGroupSize, preferredWorkGroupSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); - snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[PRINTF_DETECTED_IN_KERNEL], kernel->getDefaultKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str()); + snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[PRINTF_DETECTED_IN_KERNEL], kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str()); EXPECT_TRUE(containsHint(expectedHint, userData)); } @@ -840,7 +840,7 @@ TEST_F(PerformanceHintEnqueueTest, GivenKernelWithCoherentPtrWhenEnqueueKernelIs EXPECT_EQ(CL_SUCCESS, retVal); - snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[KERNEL_REQUIRES_COHERENCY], mockKernel.mockKernel->getDefaultKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str()); + snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[KERNEL_REQUIRES_COHERENCY], mockKernel.mockKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str()); EXPECT_TRUE(containsHint(expectedHint, userData)); delete buffer; } diff --git a/opencl/test/unit_test/context/driver_diagnostics_tests.cpp b/opencl/test/unit_test/context/driver_diagnostics_tests.cpp index d730e9792b..35271ab6c1 100644 --- a/opencl/test/unit_test/context/driver_diagnostics_tests.cpp +++ b/opencl/test/unit_test/context/driver_diagnostics_tests.cpp @@ -434,7 +434,6 @@ TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeEnabledWhenCallF DebugManager.flags.PrintDriverDiagnostics.set(1); auto pDevice = castToObject(devices[0]); - auto rootDeviceIndex = pDevice->getRootDeviceIndex(); MockKernelWithInternals mockKernel(*pDevice, context); MockBuffer buffer; cl_mem clMem = &buffer; @@ -451,10 +450,10 @@ TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeEnabledWhenCallF testing::internal::CaptureStdout(); KernelObjsForAuxTranslation kernelObjects; - mockKernel.mockKernel->fillWithKernelObjsForAuxTranslation(kernelObjects, rootDeviceIndex); + mockKernel.mockKernel->fillWithKernelObjsForAuxTranslation(kernelObjects); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[KERNEL_ARGUMENT_AUX_TRANSLATION], - mockKernel.mockKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName.c_str(), 0, mockKernel.mockKernel->getKernelInfo(rootDeviceIndex).kernelArgInfo.at(0).metadataExtended->argName.c_str()); + mockKernel.mockKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(), 0, mockKernel.mockKernel->getKernelInfo().kernelArgInfo.at(0).metadataExtended->argName.c_str()); std::string output = testing::internal::GetCapturedStdout(); EXPECT_NE(0u, output.size()); @@ -466,7 +465,6 @@ TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeEnabledWhenCallF DebugManager.flags.PrintDriverDiagnostics.set(1); auto pDevice = castToObject(devices[0]); - auto rootDeviceIndex = pDevice->getRootDeviceIndex(); MockKernelWithInternals mockKernel(*pDevice, context); char data[128]; void *ptr = &data; @@ -484,10 +482,10 @@ TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeEnabledWhenCallF testing::internal::CaptureStdout(); KernelObjsForAuxTranslation kernelObjects; - mockKernel.mockKernel->fillWithKernelObjsForAuxTranslation(kernelObjects, rootDeviceIndex); + mockKernel.mockKernel->fillWithKernelObjsForAuxTranslation(kernelObjects); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[KERNEL_ARGUMENT_AUX_TRANSLATION], - mockKernel.mockKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName.c_str(), 0, mockKernel.mockKernel->getKernelInfo(rootDeviceIndex).kernelArgInfo.at(0).metadataExtended->argName.c_str()); + mockKernel.mockKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(), 0, mockKernel.mockKernel->getKernelInfo().kernelArgInfo.at(0).metadataExtended->argName.c_str()); std::string output = testing::internal::GetCapturedStdout(); EXPECT_NE(0u, output.size()); @@ -499,7 +497,6 @@ TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeEnabledWhenKerne DebugManager.flags.PrintDriverDiagnostics.set(1); auto pDevice = castToObject(devices[0]); - auto rootDeviceIndex = pDevice->getRootDeviceIndex(); MockKernelWithInternals mockKernel(*pDevice, context); char data[128]; void *ptr = &data; @@ -518,7 +515,7 @@ TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeEnabledWhenKerne testing::internal::CaptureStdout(); KernelObjsForAuxTranslation kernelObjects; - mockKernel.mockKernel->fillWithKernelObjsForAuxTranslation(kernelObjects, rootDeviceIndex); + mockKernel.mockKernel->fillWithKernelObjsForAuxTranslation(kernelObjects); std::string output = testing::internal::GetCapturedStdout(); EXPECT_EQ(0u, output.size()); @@ -528,7 +525,6 @@ TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeDisabledWhenCall auto pDevice = castToObject(devices[0]); cl_device_id clDevice = pDevice; auto context = Context::create(nullptr, ClDeviceVector(&clDevice, 1), nullptr, nullptr, retVal); - auto rootDeviceIndex = pDevice->getRootDeviceIndex(); MockKernelWithInternals mockKernel(*pDevice, context); char data[128]; void *ptr = &data; @@ -547,7 +543,7 @@ TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeDisabledWhenCall testing::internal::CaptureStdout(); KernelObjsForAuxTranslation kernelObjects; - mockKernel.mockKernel->fillWithKernelObjsForAuxTranslation(kernelObjects, rootDeviceIndex); + mockKernel.mockKernel->fillWithKernelObjsForAuxTranslation(kernelObjects); std::string output = testing::internal::GetCapturedStdout(); EXPECT_EQ(0u, output.size()); @@ -557,7 +553,6 @@ TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeDisabledWhenCall TEST_F(PerformanceHintTest, whenCallingFillWithKernelObjsForAuxTranslationOnNullGfxAllocationThenDontReportAnyHint) { auto pDevice = castToObject(devices[0]); - auto rootDeviceIndex = pDevice->getRootDeviceIndex(); MockKernelWithInternals mockKernel(*pDevice, context); mockKernel.kernelInfo.kernelArgInfo.resize(1); @@ -571,7 +566,7 @@ TEST_F(PerformanceHintTest, whenCallingFillWithKernelObjsForAuxTranslationOnNull testing::internal::CaptureStdout(); KernelObjsForAuxTranslation kernelObjects; - mockKernel.mockKernel->fillWithKernelObjsForAuxTranslation(kernelObjects, rootDeviceIndex); + mockKernel.mockKernel->fillWithKernelObjsForAuxTranslation(kernelObjects); std::string output = testing::internal::GetCapturedStdout(); EXPECT_EQ(0u, output.size()); @@ -836,16 +831,14 @@ TEST_P(PerformanceHintKernelTest, GivenSpillFillWhenKernelIsInitializedThenConte mockKernel.mockKernel->initialize(); - auto rootDeviceIndex = pClDevice->getRootDeviceIndex(); - auto expectedSize = size * pClDevice->getSharedDeviceInfo().computeUnitsUsedForScratch * mockKernel.mockKernel->getKernelInfo(rootDeviceIndex).getMaxSimdSize(); + auto expectedSize = size * pClDevice->getSharedDeviceInfo().computeUnitsUsedForScratch * mockKernel.mockKernel->getKernelInfo().getMaxSimdSize(); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[REGISTER_PRESSURE_TOO_HIGH], - mockKernel.mockKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName.c_str(), expectedSize); + mockKernel.mockKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(), expectedSize); EXPECT_EQ(!zeroSized, containsHint(expectedHint, userData)); } TEST_P(PerformanceHintKernelTest, GivenPrivateSurfaceWhenKernelIsInitializedThenContextProvidesProperHint) { auto pDevice = castToObject(devices[1]); - auto rootDeviceIndex = pDevice->getRootDeviceIndex(); static_cast(pDevice->getMemoryManager())->turnOnFakingBigAllocations(); for (auto isSmitThread : {false, true}) { @@ -862,12 +855,12 @@ TEST_P(PerformanceHintKernelTest, GivenPrivateSurfaceWhenKernelIsInitializedThen populateKernelDescriptor(mockKernel.kernelInfo.kernelDescriptor, allocateStatelessPrivateMemorySurface); size *= pDevice->getSharedDeviceInfo().computeUnitsUsedForScratch; - size *= isSmitThread ? mockKernel.mockKernel->getKernelInfo(rootDeviceIndex).getMaxSimdSize() : 1; + size *= isSmitThread ? mockKernel.mockKernel->getKernelInfo().getMaxSimdSize() : 1; mockKernel.mockKernel->initialize(); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[PRIVATE_MEMORY_USAGE_TOO_HIGH], - mockKernel.mockKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName.c_str(), size); + mockKernel.mockKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(), size); EXPECT_EQ(!zeroSized, containsHint(expectedHint, userData)); } } diff --git a/opencl/test/unit_test/context/driver_diagnostics_tests.h b/opencl/test/unit_test/context/driver_diagnostics_tests.h index 65f71bf58d..8a873323d8 100644 --- a/opencl/test/unit_test/context/driver_diagnostics_tests.h +++ b/opencl/test/unit_test/context/driver_diagnostics_tests.h @@ -230,7 +230,7 @@ struct PerformanceHintEnqueueKernelTest : public PerformanceHintEnqueueTest, CreateProgramFromBinary(context, context->getDevices(), "CopyBuffer_simd32"); retVal = pProgram->build(pProgram->getDevices(), nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); - kernel = Kernel::create(pProgram, pProgram->getKernelInfosForKernel("CopyBuffer"), *context->getDevice(0), &retVal); + kernel = Kernel::create(pProgram, pProgram->getKernelInfoForKernel("CopyBuffer"), *context->getDevice(0), &retVal); globalWorkGroupSize[0] = globalWorkGroupSize[1] = globalWorkGroupSize[2] = 1; rootDeviceIndex = context->getDevice(0)->getRootDeviceIndex(); @@ -267,7 +267,7 @@ struct PerformanceHintEnqueueKernelPrintfTest : public PerformanceHintEnqueueTes CreateProgramFromBinary(context, context->getDevices(), "printf"); retVal = pProgram->build(pProgram->getDevices(), nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); - kernel = Kernel::create(pProgram, pProgram->getKernelInfosForKernel("test"), *context->getDevice(0), &retVal); + kernel = Kernel::create(pProgram, pProgram->getKernelInfoForKernel("test"), *context->getDevice(0), &retVal); globalWorkGroupSize[0] = globalWorkGroupSize[1] = globalWorkGroupSize[2] = 1; } diff --git a/opencl/test/unit_test/device_queue/device_queue_hw_tests.cpp b/opencl/test/unit_test/device_queue/device_queue_hw_tests.cpp index bed4a5ccd2..7055981cc9 100644 --- a/opencl/test/unit_test/device_queue/device_queue_hw_tests.cpp +++ b/opencl/test/unit_test/device_queue/device_queue_hw_tests.cpp @@ -326,7 +326,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueSlb, WhenBuildingSlbThenCleanupSectionIsC // 4 pages padding expected after cleanup section EXPECT_LE(4 * MemoryConstants::pageSize, slbMax - slbUsed); - if (mockParentKernel->getKernelInfo(testedRootDeviceIndex).kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages) { + if (mockParentKernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages) { cleanupSectionOffsetToParse += GpgpuWalkerHelper::getSizeForWADisableLSQCROPERFforOCL(mockParentKernel) / 2; } @@ -401,7 +401,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueSlb, GivenProfilingWhenBuildingSlbThenEmC auto pipeControlItor = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - if (mockParentKernel->getKernelInfo(testedRootDeviceIndex).kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages && GpgpuWalkerHelper::getSizeForWADisableLSQCROPERFforOCL(mockParentKernel) > 0) { + if (mockParentKernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages && GpgpuWalkerHelper::getSizeForWADisableLSQCROPERFforOCL(mockParentKernel) > 0) { auto loadRegImmItor = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); EXPECT_NE(hwParser.cmdList.end(), loadRegImmItor); @@ -537,7 +537,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, WhenSetiingIUpIndirectState auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE); ASSERT_NE(nullptr, dsh); - size_t surfaceStateHeapSize = HardwareCommandsHelper::getSshSizeForExecutionModel(const_cast(*pKernel), rootDeviceIndex); + size_t surfaceStateHeapSize = HardwareCommandsHelper::getSshSizeForExecutionModel(const_cast(*pKernel)); auto ssh = new IndirectHeap(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize); auto usedBeforeSSH = ssh->getUsed(); @@ -565,7 +565,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, WhenSettingUpIndirectStateT auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE); ASSERT_NE(nullptr, dsh); - size_t surfaceStateHeapSize = HardwareCommandsHelper::getSshSizeForExecutionModel(const_cast(*pKernel), rootDeviceIndex); + size_t surfaceStateHeapSize = HardwareCommandsHelper::getSshSizeForExecutionModel(const_cast(*pKernel)); auto ssh = new IndirectHeap(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize); @@ -593,7 +593,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, WhenSettingUpIndirectStateT auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE); ASSERT_NE(nullptr, dsh); - size_t surfaceStateHeapSize = HardwareCommandsHelper::getSshSizeForExecutionModel(const_cast(*pKernel), rootDeviceIndex); + size_t surfaceStateHeapSize = HardwareCommandsHelper::getSshSizeForExecutionModel(const_cast(*pKernel)); auto ssh = new IndirectHeap(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize); @@ -602,9 +602,9 @@ HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, WhenSettingUpIndirectStateT devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount, false); auto *igilQueue = reinterpret_cast(devQueueHw->getQueueBuffer()->getUnderlyingBuffer()); - EXPECT_EQ(igilQueue->m_controls.m_DynamicHeapStart, devQueueHw->offsetDsh + alignUp((uint32_t)pKernel->getDynamicStateHeapSize(rootDeviceIndex), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE)); + EXPECT_EQ(igilQueue->m_controls.m_DynamicHeapStart, devQueueHw->offsetDsh + alignUp((uint32_t)pKernel->getDynamicStateHeapSize(), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE)); EXPECT_EQ(igilQueue->m_controls.m_DynamicHeapSizeInBytes, (uint32_t)devQueueHw->getDshBuffer()->getUnderlyingBufferSize()); - EXPECT_EQ(igilQueue->m_controls.m_CurrentDSHoffset, devQueueHw->offsetDsh + alignUp((uint32_t)pKernel->getDynamicStateHeapSize(rootDeviceIndex), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE)); + EXPECT_EQ(igilQueue->m_controls.m_CurrentDSHoffset, devQueueHw->offsetDsh + alignUp((uint32_t)pKernel->getDynamicStateHeapSize(), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE)); EXPECT_EQ(igilQueue->m_controls.m_ParentDSHOffset, devQueueHw->offsetDsh); alignedFree(ssh->getCpuBase()); @@ -631,7 +631,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, GivenHasBarriersSetWhenCall } auto surfaceStateHeapSize = - HardwareCommandsHelper::getSshSizeForExecutionModel(const_cast(*pKernel), rootDeviceIndex); + HardwareCommandsHelper::getSshSizeForExecutionModel(const_cast(*pKernel)); auto ssh = std::make_unique(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize); devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount, false); diff --git a/opencl/test/unit_test/device_queue/device_queue_tests.cpp b/opencl/test/unit_test/device_queue/device_queue_tests.cpp index a174840727..dc28d0103f 100644 --- a/opencl/test/unit_test/device_queue/device_queue_tests.cpp +++ b/opencl/test/unit_test/device_queue/device_queue_tests.cpp @@ -290,11 +290,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueTest, WhenDispatchingSchedulerThenNoAsser MockProgram program(toClDeviceVector(*device)); MockCommandQueue cmdQ(nullptr, nullptr, 0); KernelInfo info; - KernelInfoContainer kernelInfos; - auto rootDeviceIndex = device->getRootDeviceIndex(); - kernelInfos.resize(rootDeviceIndex + 1); - kernelInfos[rootDeviceIndex] = &info; - MockSchedulerKernel *kernel = new MockSchedulerKernel(&program, kernelInfos, *device); + MockSchedulerKernel *kernel = new MockSchedulerKernel(&program, info, *device); LinearStream cmdStream; devQueue.dispatchScheduler(cmdStream, *kernel, device->getPreemptionMode(), nullptr, nullptr, false); diff --git a/opencl/test/unit_test/event/event_tests.cpp b/opencl/test/unit_test/event/event_tests.cpp index b4812ed1d5..a77e3b73fe 100644 --- a/opencl/test/unit_test/event/event_tests.cpp +++ b/opencl/test/unit_test/event/event_tests.cpp @@ -564,7 +564,7 @@ TEST_F(InternalsEventTest, givenBlockedKernelWithPrintfWhenSubmittedThenPrintOut MockKernelWithInternals mockKernelWithInternals(*pClDevice); auto pKernel = mockKernelWithInternals.mockKernel; - KernelInfo *kernelInfo = const_cast(&pKernel->getKernelInfo(rootDeviceIndex)); + KernelInfo *kernelInfo = const_cast(&pKernel->getKernelInfo()); kernelInfo->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless; SPatchAllocateStatelessPrintfSurface sPatchPrintfSurface = {}; diff --git a/opencl/test/unit_test/execution_model/enqueue_execution_model_kernel_tests.cpp b/opencl/test/unit_test/execution_model/enqueue_execution_model_kernel_tests.cpp index 4bcecf59fe..2a2b699265 100644 --- a/opencl/test/unit_test/execution_model/enqueue_execution_model_kernel_tests.cpp +++ b/opencl/test/unit_test/execution_model/enqueue_execution_model_kernel_tests.cpp @@ -61,19 +61,19 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueu MockMultiDispatchInfo multiDispatchInfo(pClDevice, pKernel); - auto graphicsAllocation = pKernel->getKernelInfo(rootDeviceIndex).getGraphicsAllocation(); + auto graphicsAllocation = pKernel->getKernelInfo().getGraphicsAllocation(); auto kernelIsaAddress = graphicsAllocation->getGpuAddressToPatch(); auto &hardwareInfo = pClDevice->getHardwareInfo(); auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); if (EngineHelpers::isCcs(pCmdQ->getGpgpuEngine().osContext->getEngineType()) && hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)) { - kernelIsaAddress += pKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.entryPoints.skipSetFFIDGP; + kernelIsaAddress += pKernel->getKernelInfo().kernelDescriptor.entryPoints.skipSetFFIDGP; } pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr); - if (pKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName == "kernel_reflection") { + if (pKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName == "kernel_reflection") { if (EncodeSurfaceState::doBindingTablePrefetch()) { EXPECT_NE(0u, idData[0].getSamplerCount()); } else { @@ -298,7 +298,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueu BlockKernelManager *blockManager = pProgram->getBlockKernelManager(); uint32_t blockCount = static_cast(blockManager->getCount()); - size_t parentKernelSSHSize = pKernel->getSurfaceStateHeapSize(rootDeviceIndex); + size_t parentKernelSSHSize = pKernel->getSurfaceStateHeapSize(); MockMultiDispatchInfo multiDispatchInfo(pClDevice, pKernel); @@ -320,7 +320,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueu ASSERT_NE(nullptr, pBlockInfo); - Kernel *blockKernel = Kernel::create(pKernel->getProgram(), MockKernel::toKernelInfoContainer(*pBlockInfo, rootDeviceIndex), *pClDevice, nullptr); + Kernel *blockKernel = Kernel::create(pKernel->getProgram(), *pBlockInfo, *pClDevice, nullptr); blockSSH = alignUp(blockSSH, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); if (blockKernel->getNumberOfBindingTableStates() > 0) { ASSERT_TRUE(isValidOffset(pBlockInfo->kernelDescriptor.payloadMappings.bindingTable.tableOffset)); @@ -338,7 +338,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueu EXPECT_EQ(0, memcmp(srcSurfaceState, dstSurfaceState, sizeof(RENDER_SURFACE_STATE))); } - blockSSH = ptrOffset(blockSSH, blockKernel->getSurfaceStateHeapSize(rootDeviceIndex)); + blockSSH = ptrOffset(blockSSH, blockKernel->getSurfaceStateHeapSize()); } delete blockKernel; @@ -450,7 +450,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelEnqueueFixture, GivenParentKernelWhenEnq pCmdQ->enqueueKernel(parentKernel, 1, offset, gws, gws, 0, nullptr, nullptr); - const auto &implicitArgs = parentKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.payloadMappings.implicitArgs; + const auto &implicitArgs = parentKernel->getKernelInfo().kernelDescriptor.payloadMappings.implicitArgs; const auto &defaultQueueSurfaceAddress = implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress; if (isValidOffset(defaultQueueSurfaceAddress.stateless)) { diff --git a/opencl/test/unit_test/execution_model/parent_kernel_dispatch_tests.cpp b/opencl/test/unit_test/execution_model/parent_kernel_dispatch_tests.cpp index 09f8cc48f8..55a9216c46 100644 --- a/opencl/test/unit_test/execution_model/parent_kernel_dispatch_tests.cpp +++ b/opencl/test/unit_test/execution_model/parent_kernel_dispatch_tests.cpp @@ -135,10 +135,10 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueue auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u); - EXPECT_LE(pKernel->getKernelInfo(rootDeviceIndex).heapInfo.SurfaceStateHeapSize, ssh.getMaxAvailableSpace()); + EXPECT_LE(pKernel->getKernelInfo().heapInfo.SurfaceStateHeapSize, ssh.getMaxAvailableSpace()); size_t minRequiredSize = HardwareCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo); - size_t minRequiredSizeForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*pKernel, rootDeviceIndex); + size_t minRequiredSizeForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*pKernel); EXPECT_LE(minRequiredSize + minRequiredSizeForEM, ssh.getMaxAvailableSpace()); } @@ -170,15 +170,15 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueue ASSERT_NE(nullptr, blockedCommandsData); size_t minRequiredSize = HardwareCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo) + UnitTestHelper::getDefaultSshUsage(); - size_t minRequiredSizeForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*pKernel, rootDeviceIndex); + size_t minRequiredSizeForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*pKernel); size_t sshUsed = blockedCommandsData->ssh->getUsed(); size_t expectedSizeSSH = pKernel->getNumberOfBindingTableStates() * sizeof(RENDER_SURFACE_STATE) + - pKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.payloadMappings.bindingTable.numEntries * sizeof(BINDING_TABLE_STATE) + + pKernel->getKernelInfo().kernelDescriptor.payloadMappings.bindingTable.numEntries * sizeof(BINDING_TABLE_STATE) + UnitTestHelper::getDefaultSshUsage(); - if ((pKernel->requiresSshForBuffers(rootDeviceIndex)) || (pKernel->getKernelInfo(rootDeviceIndex).patchInfo.imageMemObjKernelArgs.size() > 0)) { + if ((pKernel->requiresSshForBuffers()) || (pKernel->getKernelInfo().patchInfo.imageMemObjKernelArgs.size() > 0)) { EXPECT_EQ(expectedSizeSSH, sshUsed); } @@ -354,7 +354,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, MockParentKernelDispatch, GivenUsedSSHHeapWhenParent // Assuming parent is not using SSH, this is becuase storing allocation on reuse list and allocating // new one by obtaining from reuse list returns the same allocation and heap buffer does not differ // If parent is not using SSH, then heap obtained has zero usage and the same buffer - ASSERT_EQ(0u, mockParentKernel->getKernelInfo(rootDeviceIndex).heapInfo.SurfaceStateHeapSize); + ASSERT_EQ(0u, mockParentKernel->getKernelInfo().heapInfo.SurfaceStateHeapSize); DispatchInfo dispatchInfo(pClDevice, mockParentKernel, 1, workItems, nullptr, globalOffsets); dispatchInfo.setNumberOfWorkgroups({1, 1, 1}); diff --git a/opencl/test/unit_test/execution_model/scheduler_dispatch_tests.cpp b/opencl/test/unit_test/execution_model/scheduler_dispatch_tests.cpp index dfefe9699d..538fb3d5fd 100644 --- a/opencl/test/unit_test/execution_model/scheduler_dispatch_tests.cpp +++ b/opencl/test/unit_test/execution_model/scheduler_dispatch_tests.cpp @@ -53,7 +53,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ExecutionModelSchedulerFixture, WhenDispatchingSched EXPECT_NE(nullptr, executionModelDsh); - size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel, rootDeviceIndex); + size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); // Setup heaps in pCmdQ MultiDispatchInfo multiDispatchinfo(&scheduler); LinearStream &commandStream = getCommandStream(*pCmdQ, CsrDependencies(), @@ -122,7 +122,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ExecutionModelSchedulerFixture, WhenDispatchingSched uint32_t threadsPerWorkGroup = walker->getThreadWidthCounterMaximum(); - EXPECT_EQ(scheduler.getLws() / scheduler.getKernelInfo(rootDeviceIndex).getMaxSimdSize(), threadsPerWorkGroup); + EXPECT_EQ(scheduler.getLws() / scheduler.getKernelInfo().getMaxSimdSize(), threadsPerWorkGroup); numWorkgroupsProgrammed[0] = walker->getThreadGroupIdXDimension(); numWorkgroupsProgrammed[1] = walker->getThreadGroupIdYDimension(); @@ -149,7 +149,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ExecutionModelSchedulerFixture, WhenDispatchingSched auto numChannels = 3; auto grfSize = pDevice->getHardwareInfo().capabilityTable.grfSize; - auto sizePerThreadDataTotal = PerThreadDataHelper::getPerThreadDataSizeTotal(scheduler.getKernelInfo(rootDeviceIndex).getMaxSimdSize(), grfSize, numChannels, scheduler.getLws()); + auto sizePerThreadDataTotal = PerThreadDataHelper::getPerThreadDataSizeTotal(scheduler.getKernelInfo().getMaxSimdSize(), grfSize, numChannels, scheduler.getLws()); auto sizeCrossThreadData = scheduler.getCrossThreadDataSize(); auto IndirectDataLength = alignUp((uint32_t)(sizeCrossThreadData + sizePerThreadDataTotal), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); @@ -174,7 +174,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ExecutionModelSchedulerFixture, WhenDispatchingSched DeviceQueueHw *pDevQueueHw = castToObject>(pDevQueue); SchedulerKernel &scheduler = context->getSchedulerKernel(); - size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel, rootDeviceIndex); + size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); // Setup heaps in pCmdQ MultiDispatchInfo multiDispatchinfo(&scheduler); @@ -209,7 +209,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, GivenEarlyReturnSet SchedulerKernel &scheduler = context->getSchedulerKernel(); - size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper::getSizeRequiredSSH(scheduler, rootDeviceIndex); + size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper::getSizeRequiredSSH(scheduler); // Setup heaps in pCmdQ MultiDispatchInfo multiDispatchinfo(&scheduler); LinearStream &commandStream = getCommandStream(*pCmdQ, CsrDependencies(), diff --git a/opencl/test/unit_test/execution_model/submit_blocked_parent_kernel_tests.cpp b/opencl/test/unit_test/execution_model/submit_blocked_parent_kernel_tests.cpp index d34831db42..62f6c5214d 100644 --- a/opencl/test/unit_test/execution_model/submit_blocked_parent_kernel_tests.cpp +++ b/opencl/test/unit_test/execution_model/submit_blocked_parent_kernel_tests.cpp @@ -81,7 +81,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenLockedEMcritca cl_queue_properties properties[3] = {0}; MockParentKernel *parentKernel = MockParentKernel::create(*context); - MultiDeviceKernel multiDeviceKernel(MockMultiDeviceKernel::toKernelVector(parentKernel)); + auto kernelInfos = MockKernel::toKernelInfoContainer(parentKernel->getKernelInfo(), rootDeviceIndex); + MultiDeviceKernel multiDeviceKernel(MockMultiDeviceKernel::toKernelVector(parentKernel), kernelInfos); MockDeviceQueueHwWithCriticalSectionRelease mockDevQueue(context, device, properties[0]); parentKernel->createReflectionSurface(); context->setDefaultDeviceQueue(&mockDevQueue); @@ -98,7 +99,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenLockedEMcritca dsh->getSpace(mockDevQueue.getDshOffset()); - size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel, rootDeviceIndex); + size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER, device->getDeviceBitfield()}); auto blockedCommandData = std::make_unique(new LinearStream(cmdStreamAllocation), @@ -121,7 +122,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenParentKernelWh cl_queue_properties properties[3] = {0}; MockParentKernel *parentKernel = MockParentKernel::create(*context); - MultiDeviceKernel multiDeviceKernel(MockMultiDeviceKernel::toKernelVector(parentKernel)); + auto kernelInfos = MockKernel::toKernelInfoContainer(parentKernel->getKernelInfo(), rootDeviceIndex); + MultiDeviceKernel multiDeviceKernel(MockMultiDeviceKernel::toKernelVector(parentKernel), kernelInfos); MockDeviceQueueHwWithCriticalSectionRelease mockDevQueue(context, device, properties[0]); parentKernel->createReflectionSurface(); context->setDefaultDeviceQueue(&mockDevQueue); @@ -163,7 +165,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenParentKernelWh *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); blockedCommandData->setHeaps(dsh, ioh, ssh); - size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel, rootDeviceIndex); + size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM; PreemptionMode preemptionMode = device->getPreemptionMode(); @@ -184,7 +186,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenParentKernelWh cl_queue_properties properties[3] = {0}; MockParentKernel *parentKernel = MockParentKernel::create(*context); - MultiDeviceKernel multiDeviceKernel(MockMultiDeviceKernel::toKernelVector(parentKernel)); + auto kernelInfos = MockKernel::toKernelInfoContainer(parentKernel->getKernelInfo(), rootDeviceIndex); + MultiDeviceKernel multiDeviceKernel(MockMultiDeviceKernel::toKernelVector(parentKernel), kernelInfos); MockDeviceQueueHwWithCriticalSectionRelease mockDevQueue(context, device, properties[0]); parentKernel->createReflectionSurface(); context->setDefaultDeviceQueue(&mockDevQueue); @@ -204,7 +207,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenParentKernelWh *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); blockedCommandData->setHeaps(dsh, ioh, ssh); - size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel, rootDeviceIndex); + size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM; PreemptionMode preemptionMode = device->getPreemptionMode(); @@ -224,7 +227,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenBlockedParentK cl_queue_properties properties[3] = {0}; MockParentKernel *parentKernel = MockParentKernel::create(*context); - MultiDeviceKernel multiDeviceKernel(MockMultiDeviceKernel::toKernelVector(parentKernel)); + auto kernelInfos = MockKernel::toKernelInfoContainer(parentKernel->getKernelInfo(), rootDeviceIndex); + MultiDeviceKernel multiDeviceKernel(MockMultiDeviceKernel::toKernelVector(parentKernel), kernelInfos); MockDeviceQueueHwWithCriticalSectionRelease mockDevQueue(context, device, properties[0]); parentKernel->createReflectionSurface(); context->setDefaultDeviceQueue(&mockDevQueue); @@ -242,7 +246,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenBlockedParentK *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); blockedCommandData->setHeaps(dsh, ioh, ssh); - size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel, rootDeviceIndex); + size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM; PreemptionMode preemptionMode = device->getPreemptionMode(); @@ -264,7 +268,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenParentKernelWh cl_queue_properties properties[3] = {0}; MockParentKernel *parentKernel = MockParentKernel::create(*context); - MultiDeviceKernel multiDeviceKernel(MockMultiDeviceKernel::toKernelVector(parentKernel)); + auto kernelInfos = MockKernel::toKernelInfoContainer(parentKernel->getKernelInfo(), rootDeviceIndex); + MultiDeviceKernel multiDeviceKernel(MockMultiDeviceKernel::toKernelVector(parentKernel), kernelInfos); MockDeviceQueueHwWithCriticalSectionRelease mockDevQueue(context, device, properties[0]); parentKernel->createReflectionSurface(); context->setDefaultDeviceQueue(&mockDevQueue); @@ -283,7 +288,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenParentKernelWh *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); blockedCommandData->setHeaps(dsh, ioh, ssh); - size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel, rootDeviceIndex); + size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM; PreemptionMode preemptionMode = device->getPreemptionMode(); @@ -302,14 +307,15 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenUsedCommandQue cl_queue_properties properties[3] = {0}; MockParentKernel *parentKernel = MockParentKernel::create(*context); - MultiDeviceKernel multiDeviceKernel(MockMultiDeviceKernel::toKernelVector(parentKernel)); + auto kernelInfos = MockKernel::toKernelInfoContainer(parentKernel->getKernelInfo(), rootDeviceIndex); + MultiDeviceKernel multiDeviceKernel(MockMultiDeviceKernel::toKernelVector(parentKernel), kernelInfos); MockDeviceQueueHw mockDevQueue(context, device, properties[0]); parentKernel->createReflectionSurface(); context->setDefaultDeviceQueue(&mockDevQueue); MockCommandQueue cmdQ(context, device, properties); - size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel, rootDeviceIndex); + size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); size_t heapSize = 20; @@ -358,12 +364,13 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenNotUsedSSHWhen cl_queue_properties properties[3] = {0}; MockParentKernel *parentKernel = MockParentKernel::create(*context); - MultiDeviceKernel multiDeviceKernel(MockMultiDeviceKernel::toKernelVector(parentKernel)); + auto kernelInfos = MockKernel::toKernelInfoContainer(parentKernel->getKernelInfo(), rootDeviceIndex); + MultiDeviceKernel multiDeviceKernel(MockMultiDeviceKernel::toKernelVector(parentKernel), kernelInfos); MockDeviceQueueHw mockDevQueue(context, device, properties[0]); parentKernel->createReflectionSurface(); context->setDefaultDeviceQueue(&mockDevQueue); - size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel, rootDeviceIndex); + size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); size_t heapSize = 20; @@ -403,7 +410,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenBlockedCommand cl_queue_properties properties[3] = {0}; auto parentKernel = MockParentKernel::create(*context); - MultiDeviceKernel multiDeviceKernel(MockMultiDeviceKernel::toKernelVector(parentKernel)); + auto kernelInfos = MockKernel::toKernelInfoContainer(parentKernel->getKernelInfo(), rootDeviceIndex); + MultiDeviceKernel multiDeviceKernel(MockMultiDeviceKernel::toKernelVector(parentKernel), kernelInfos); MockDeviceQueueHw mockDevQueue(context, device, properties[0]); parentKernel->createReflectionSurface(); diff --git a/opencl/test/unit_test/fixtures/cl_preemption_fixture.cpp b/opencl/test/unit_test/fixtures/cl_preemption_fixture.cpp index 2bccd0f460..366104a88d 100644 --- a/opencl/test/unit_test/fixtures/cl_preemption_fixture.cpp +++ b/opencl/test/unit_test/fixtures/cl_preemption_fixture.cpp @@ -40,7 +40,7 @@ void DevicePreemptionTests::SetUp() { context.reset(new MockContext(device.get())); cmdQ.reset(new MockCommandQueue(context.get(), device.get(), properties)); program = std::make_unique(toClDeviceVector(*device)); - kernel.reset(new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*kernelInfo, rootDeviceIndex), *device)); + kernel.reset(new MockKernel(program.get(), *kernelInfo, *device)); dispatchInfo.reset(new DispatchInfo(device.get(), kernel.get(), 1, Vec3(1, 1, 1), Vec3(1, 1, 1), Vec3(0, 0, 0))); ASSERT_NE(nullptr, device); diff --git a/opencl/test/unit_test/fixtures/execution_model_fixture.h b/opencl/test/unit_test/fixtures/execution_model_fixture.h index 91175f94cc..88e18a058c 100644 --- a/opencl/test/unit_test/fixtures/execution_model_fixture.h +++ b/opencl/test/unit_test/fixtures/execution_model_fixture.h @@ -93,7 +93,8 @@ class ExecutionModelSchedulerTest : public ClDeviceFixture, parentKernel = MockParentKernel::create(*context); ASSERT_NE(nullptr, parentKernel); - pMultiDeviceKernel = new MockMultiDeviceKernel(MockMultiDeviceKernel::toKernelVector(parentKernel)); + kernelInfos = MockKernel::toKernelInfoContainer(parentKernel->getKernelInfo(), rootDeviceIndex); + pMultiDeviceKernel = new MockMultiDeviceKernel(MockMultiDeviceKernel::toKernelVector(parentKernel), kernelInfos); } void TearDown() override { @@ -103,7 +104,7 @@ class ExecutionModelSchedulerTest : public ClDeviceFixture, CommandQueueHwFixture::TearDown(); ClDeviceFixture::TearDown(); } - + KernelInfoContainer kernelInfos; MockMultiDeviceKernel *pMultiDeviceKernel = nullptr; MockParentKernel *parentKernel = nullptr; }; diff --git a/opencl/test/unit_test/fixtures/simple_arg_kernel_fixture.h b/opencl/test/unit_test/fixtures/simple_arg_kernel_fixture.h index efe2d029bf..29f38f121b 100644 --- a/opencl/test/unit_test/fixtures/simple_arg_kernel_fixture.h +++ b/opencl/test/unit_test/fixtures/simple_arg_kernel_fixture.h @@ -110,7 +110,7 @@ class SimpleArgKernelFixture : public ProgramFixture { // create a kernel pKernel = Kernel::create( pProgram, - pProgram->getKernelInfosForKernel("SimpleArg"), + pProgram->getKernelInfoForKernel("SimpleArg"), *pDevice, &retVal); @@ -157,7 +157,7 @@ class SimpleArgNonUniformKernelFixture : public ProgramFixture { kernel = Kernel::create( pProgram, - pProgram->getKernelInfosForKernel("simpleNonUniform"), + pProgram->getKernelInfoForKernel("simpleNonUniform"), *device, &retVal); ASSERT_NE(nullptr, kernel); @@ -204,7 +204,7 @@ class SimpleKernelFixture : public ProgramFixture { kernelName.append(std::to_string(i)); kernels[i].reset(Kernel::create( pProgram, - pProgram->getKernelInfosForKernel(kernelName.c_str()), + pProgram->getKernelInfoForKernel(kernelName.c_str()), *device, &retVal)); ASSERT_NE(nullptr, kernels[i]); @@ -254,7 +254,7 @@ class SimpleKernelStatelessFixture : public ProgramFixture { kernel.reset(Kernel::create( pProgram, - pProgram->getKernelInfosForKernel("statelessKernel"), + pProgram->getKernelInfoForKernel("statelessKernel"), *device, &retVal)); ASSERT_NE(nullptr, kernel); @@ -378,7 +378,7 @@ class BindlessKernelFixture : public ProgramFixture { kernel.reset(Kernel::create( pProgram, - pProgram->getKernelInfosForKernel(kernelName.c_str()), + pProgram->getKernelInfoForKernel(kernelName.c_str()), *deviceCl, &retVal)); ASSERT_NE(nullptr, kernel); diff --git a/opencl/test/unit_test/gen11/kernel_tests_gen11.cpp b/opencl/test/unit_test/gen11/kernel_tests_gen11.cpp index 01e50274d5..493748d3c8 100644 --- a/opencl/test/unit_test/gen11/kernel_tests_gen11.cpp +++ b/opencl/test/unit_test/gen11/kernel_tests_gen11.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -21,5 +21,5 @@ GEN11TEST_F(Gen11KernelTest, givenKernelWhenCanTransformImagesIsCalledThenReturn GEN11TEST_F(Gen11KernelTest, GivenKernelWhenNotRunningOnGen12lpThenWaDisableRccRhwoOptimizationIsNotRequired) { MockKernelWithInternals kernel(*pClDevice); - EXPECT_FALSE(kernel.mockKernel->requiresWaDisableRccRhwoOptimization(rootDeviceIndex)); + EXPECT_FALSE(kernel.mockKernel->requiresWaDisableRccRhwoOptimization()); } diff --git a/opencl/test/unit_test/gen12lp/gpgpu_walker_tests_gen12lp.cpp b/opencl/test/unit_test/gen12lp/gpgpu_walker_tests_gen12lp.cpp index 190f351d95..80c1004dfd 100644 --- a/opencl/test/unit_test/gen12lp/gpgpu_walker_tests_gen12lp.cpp +++ b/opencl/test/unit_test/gen12lp/gpgpu_walker_tests_gen12lp.cpp @@ -37,8 +37,8 @@ GEN12LPTEST_F(GpgpuWalkerTests, givenMiStoreRegMemWhenAdjustMiStoreRegMemModeThe class MockKernelWithApplicableWa : public MockKernel { public: - MockKernelWithApplicableWa(Program *program, const KernelInfoContainer &kernelInfos, ClDevice &clDeviceArg) : MockKernel(program, kernelInfos, clDeviceArg) {} - bool requiresWaDisableRccRhwoOptimization(uint32_t rootDeviceIndex) const override { + MockKernelWithApplicableWa(Program *program, const KernelInfo &kernelInfos, ClDevice &clDeviceArg) : MockKernel(program, kernelInfos, clDeviceArg) {} + bool requiresWaDisableRccRhwoOptimization() const override { return waApplicable; } bool waApplicable = false; @@ -138,14 +138,14 @@ GEN12LPTEST_F(HardwareInterfaceTests, GivenKernelWithApplicableWaDisableRccRhwoO using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; pKernel->waApplicable = true; - auto cmdSize = GpgpuWalkerHelper::getSizeForWaDisableRccRhwoOptimization(pKernel, rootDeviceIndex); + auto cmdSize = GpgpuWalkerHelper::getSizeForWaDisableRccRhwoOptimization(pKernel); size_t expectedSize = 2 * (sizeof(PIPE_CONTROL) + sizeof(MI_LOAD_REGISTER_IMM)); EXPECT_EQ(expectedSize, cmdSize); } GEN12LPTEST_F(HardwareInterfaceTests, GivenKernelWithoutApplicableWaDisableRccRhwoOptimizationWhenCalculatingCommandsSizeThenZeroIsReturned) { pKernel->waApplicable = false; - auto cmdSize = GpgpuWalkerHelper::getSizeForWaDisableRccRhwoOptimization(pKernel, rootDeviceIndex); + auto cmdSize = GpgpuWalkerHelper::getSizeForWaDisableRccRhwoOptimization(pKernel); EXPECT_EQ(0u, cmdSize); } diff --git a/opencl/test/unit_test/gen12lp/kernel_tests_gen12lp.inl b/opencl/test/unit_test/gen12lp/kernel_tests_gen12lp.inl index 84f8e562ac..fced8e0576 100644 --- a/opencl/test/unit_test/gen12lp/kernel_tests_gen12lp.inl +++ b/opencl/test/unit_test/gen12lp/kernel_tests_gen12lp.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2020 Intel Corporation + * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -22,7 +22,7 @@ GEN12LPTEST_F(Gen12LpKernelTest, givenKernelWhenCanTransformImagesIsCalledThenRe GEN12LPTEST_F(Gen12LpKernelTest, GivenKernelWhenNotUsingSharedObjArgsThenWaDisableRccRhwoOptimizationIsNotRequired) { MockKernelWithInternals kernel(*pClDevice); - EXPECT_FALSE(kernel.mockKernel->requiresWaDisableRccRhwoOptimization(rootDeviceIndex)); + EXPECT_FALSE(kernel.mockKernel->requiresWaDisableRccRhwoOptimization()); } GEN12LPTEST_F(Gen12LpKernelTest, GivenKernelWhenAtLeastOneArgIsMediaCompressedThenWaDisableRccRhwoOptimizationIsRequired) { @@ -56,5 +56,5 @@ GEN12LPTEST_F(Gen12LpKernelTest, GivenKernelWhenAtLeastOneArgIsMediaCompressedTh cl_mem clMem2 = &bufferMediaCompressed; kernel.mockKernel->setArgBuffer(2, sizeof(cl_mem *), &clMem2); - EXPECT_TRUE(kernel.mockKernel->requiresWaDisableRccRhwoOptimization(rootDeviceIndex)); + EXPECT_TRUE(kernel.mockKernel->requiresWaDisableRccRhwoOptimization()); } diff --git a/opencl/test/unit_test/gen12lp/tgllp/kernel_tests_tgllp.cpp b/opencl/test/unit_test/gen12lp/tgllp/kernel_tests_tgllp.cpp index 7b27da595a..aadd415f86 100644 --- a/opencl/test/unit_test/gen12lp/tgllp/kernel_tests_tgllp.cpp +++ b/opencl/test/unit_test/gen12lp/tgllp/kernel_tests_tgllp.cpp @@ -29,12 +29,11 @@ TGLLPTEST_F(KernelTgllpTests, GivenUseOffsetToSkipSetFFIDGPWorkaroundActiveWhenS hwInfo.platform.usRevId = hwHelper.getHwRevIdFromStepping(stepping, hwInfo); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); - auto rootDeviceIndex = device->getRootDeviceIndex(); MockKernelWithInternals mockKernelWithInternals{*device}; populateKernelDescriptor(mockKernelWithInternals.kernelInfo.kernelDescriptor, threadPayload); for (auto isCcsUsed : ::testing::Bool()) { - uint64_t kernelStartOffset = mockKernelWithInternals.mockKernel->getKernelStartOffset(false, false, isCcsUsed, rootDeviceIndex); + uint64_t kernelStartOffset = mockKernelWithInternals.mockKernel->getKernelStartOffset(false, false, isCcsUsed); if (stepping == REVISION_A0 && isCcsUsed) { EXPECT_EQ(defaultKernelStartOffset + additionalOffsetDueToFfid, kernelStartOffset); diff --git a/opencl/test/unit_test/gen8/scheduler_dispatch_tests_gen8.cpp b/opencl/test/unit_test/gen8/scheduler_dispatch_tests_gen8.cpp index fa6f8e8642..8c4593e4f8 100644 --- a/opencl/test/unit_test/gen8/scheduler_dispatch_tests_gen8.cpp +++ b/opencl/test/unit_test/gen8/scheduler_dispatch_tests_gen8.cpp @@ -31,7 +31,7 @@ BDWTEST_F(BdwSchedulerTest, givenCallToDispatchSchedulerWhenPipeControlWithCSSta DeviceQueueHw *pDevQueueHw = castToObject>(pDevQueue); SchedulerKernel &scheduler = context->getSchedulerKernel(); - size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel, rootDeviceIndex); + size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); // Setup heaps in pCmdQ MultiDispatchInfo multiDispatchinfo(&scheduler); diff --git a/opencl/test/unit_test/gtpin/gtpin_tests.cpp b/opencl/test/unit_test/gtpin/gtpin_tests.cpp index 777a32cc83..2a82096169 100644 --- a/opencl/test/unit_test/gtpin/gtpin_tests.cpp +++ b/opencl/test/unit_test/gtpin/gtpin_tests.cpp @@ -763,8 +763,8 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelIsExecutedThenGTPinCa MultiDeviceKernel *pMultiDeviceKernel1 = static_cast(kernel1); Kernel *pKernel1 = pMultiDeviceKernel1->getKernel(rootDeviceIndex); - const KernelInfo &kInfo1 = pKernel1->getKernelInfo(rootDeviceIndex); - uint64_t gtpinKernelId1 = pKernel1->getKernelId(rootDeviceIndex); + const KernelInfo &kInfo1 = pKernel1->getKernelInfo(); + uint64_t gtpinKernelId1 = pKernel1->getKernelId(); EXPECT_EQ(kInfo1.shaderHashCode, gtpinKernelId1); constexpr size_t n = 256; @@ -798,8 +798,8 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelIsExecutedThenGTPinCa MultiDeviceKernel *pMultiDeviceKernel2 = static_cast(kernel2); Kernel *pKernel2 = pMultiDeviceKernel2->getKernel(rootDeviceIndex); - const KernelInfo &kInfo2 = pKernel2->getKernelInfo(rootDeviceIndex); - uint64_t gtpinKernelId2 = pKernel2->getKernelId(rootDeviceIndex); + const KernelInfo &kInfo2 = pKernel2->getKernelInfo(); + uint64_t gtpinKernelId2 = pKernel2->getKernelId(); EXPECT_EQ(kInfo2.shaderHashCode, gtpinKernelId2); auto buff20 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr); @@ -913,8 +913,8 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelINTELIsExecutedThenGT MultiDeviceKernel *pMultiDeviceKernel1 = static_cast(kernel1); Kernel *pKernel1 = pMultiDeviceKernel1->getKernel(rootDeviceIndex); - const KernelInfo &kInfo1 = pKernel1->getKernelInfo(rootDeviceIndex); - uint64_t gtpinKernelId1 = pKernel1->getKernelId(rootDeviceIndex); + const KernelInfo &kInfo1 = pKernel1->getKernelInfo(); + uint64_t gtpinKernelId1 = pKernel1->getKernelId(); EXPECT_EQ(kInfo1.shaderHashCode, gtpinKernelId1); cl_uint workDim = 1; @@ -956,8 +956,8 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelINTELIsExecutedThenGT MultiDeviceKernel *pMultiDeviceKernel2 = static_cast(kernel2); Kernel *pKernel2 = pMultiDeviceKernel2->getKernel(rootDeviceIndex); - const KernelInfo &kInfo2 = pKernel2->getKernelInfo(rootDeviceIndex); - uint64_t gtpinKernelId2 = pKernel2->getKernelId(rootDeviceIndex); + const KernelInfo &kInfo2 = pKernel2->getKernelInfo(); + uint64_t gtpinKernelId2 = pKernel2->getKernelId(); EXPECT_EQ(kInfo2.shaderHashCode, gtpinKernelId2); auto buff20 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr); @@ -1189,7 +1189,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, GTPinTests, givenInitializedGTPinInterfaceWhenKernel size_t localWorkSize[3] = {1, 1, 1}; MockParentKernel *parentKernel = MockParentKernel::create(*pContext); - auto pMultiDeviceKernel = std::make_unique(MockMultiDeviceKernel::toKernelVector(parentKernel)); + auto kernelInfos = MockKernel::toKernelInfoContainer(parentKernel->getKernelInfo(), rootDeviceIndex); + auto pMultiDeviceKernel = std::make_unique(MockMultiDeviceKernel::toKernelVector(parentKernel), kernelInfos); retVal = clEnqueueNDRangeKernel(cmdQ, pMultiDeviceKernel.get(), workDim, globalWorkOffset, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); @@ -1269,8 +1270,8 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelWithoutSSHIsUsedThenG MultiDeviceKernel *pMultiDeviceKernel = static_cast(kernel); Kernel *pKernel = pMultiDeviceKernel->getKernel(rootDeviceIndex); - const KernelInfo &kInfo = pKernel->getKernelInfo(rootDeviceIndex); - uint64_t gtpinKernelId = pKernel->getKernelId(rootDeviceIndex); + const KernelInfo &kInfo = pKernel->getKernelInfo(); + uint64_t gtpinKernelId = pKernel->getKernelId(); EXPECT_EQ(kInfo.shaderHashCode, gtpinKernelId); constexpr size_t n = 256; @@ -1383,8 +1384,8 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenBlockedKernelWithoutSSHIsUs MultiDeviceKernel *pMultiDeviceKernel = static_cast(kernel); Kernel *pKernel = pMultiDeviceKernel->getKernel(rootDeviceIndex); - const KernelInfo &kInfo = pKernel->getKernelInfo(rootDeviceIndex); - uint64_t gtpinKernelId = pKernel->getKernelId(rootDeviceIndex); + const KernelInfo &kInfo = pKernel->getKernelInfo(); + uint64_t gtpinKernelId = pKernel->getKernelId(); EXPECT_EQ(kInfo.shaderHashCode, gtpinKernelId); constexpr size_t n = 256; @@ -1508,8 +1509,8 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenTheSameKerneIsExecutedTwice MultiDeviceKernel *pMultiDeviceKernel1 = static_cast(kernel1); Kernel *pKernel1 = pMultiDeviceKernel1->getKernel(rootDeviceIndex); - const KernelInfo &kInfo1 = pKernel1->getKernelInfo(rootDeviceIndex); - uint64_t gtpinKernelId1 = pKernel1->getKernelId(rootDeviceIndex); + const KernelInfo &kInfo1 = pKernel1->getKernelInfo(); + uint64_t gtpinKernelId1 = pKernel1->getKernelId(); EXPECT_EQ(kInfo1.shaderHashCode, gtpinKernelId1); constexpr size_t n = 256; @@ -1547,8 +1548,8 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenTheSameKerneIsExecutedTwice MultiDeviceKernel *pMultiDeviceKernel2 = static_cast(kernel2); Kernel *pKernel2 = pMultiDeviceKernel2->getKernel(rootDeviceIndex); - const KernelInfo &kInfo2 = pKernel2->getKernelInfo(rootDeviceIndex); - uint64_t gtpinKernelId2 = pKernel2->getKernelId(rootDeviceIndex); + const KernelInfo &kInfo2 = pKernel2->getKernelInfo(); + uint64_t gtpinKernelId2 = pKernel2->getKernelId(); EXPECT_EQ(kInfo2.shaderHashCode, gtpinKernelId2); auto buff20 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr); @@ -2182,12 +2183,12 @@ TEST_F(GTPinTests, givenParentKernelWhenGtPinAddingSurfaceStateThenItIsNotAddedA parentKernel->sshLocalSize = 64; parentKernel->pSshLocal.reset(new char[64]); - size_t sizeSurfaceStates1 = parentKernel->getSurfaceStateHeapSize(rootDeviceIndex); + size_t sizeSurfaceStates1 = parentKernel->getSurfaceStateHeapSize(); - bool surfaceAdded = gtpinHelper.addSurfaceState(parentKernel.get(), rootDeviceIndex); + bool surfaceAdded = gtpinHelper.addSurfaceState(parentKernel.get()); EXPECT_FALSE(surfaceAdded); - size_t sizeSurfaceStates2 = parentKernel->getSurfaceStateHeapSize(rootDeviceIndex); + size_t sizeSurfaceStates2 = parentKernel->getSurfaceStateHeapSize(); EXPECT_EQ(sizeSurfaceStates2, sizeSurfaceStates1); } @@ -2238,47 +2239,47 @@ TEST_F(GTPinTests, givenKernelWithSSHThenVerifyThatSSHResizeWorksWell) { size_t numBTS1 = pKernel->getNumberOfBindingTableStates(); EXPECT_EQ(2u, numBTS1); - size_t sizeSurfaceStates1 = pKernel->getSurfaceStateHeapSize(rootDeviceIndex); + size_t sizeSurfaceStates1 = pKernel->getSurfaceStateHeapSize(); EXPECT_NE(0u, sizeSurfaceStates1); size_t offsetBTS1 = pKernel->getBindingTableOffset(); EXPECT_NE(0u, offsetBTS1); GFXCORE_FAMILY genFamily = pDevice->getHardwareInfo().platform.eRenderCoreFamily; GTPinHwHelper >pinHelper = GTPinHwHelper::get(genFamily); - void *pSS1 = gtpinHelper.getSurfaceState(pKernel, 0, rootDeviceIndex); + void *pSS1 = gtpinHelper.getSurfaceState(pKernel, 0); EXPECT_NE(nullptr, pSS1); // Enlarge SSH by one SURFACE STATE element - bool surfaceAdded = gtpinHelper.addSurfaceState(pKernel, rootDeviceIndex); + bool surfaceAdded = gtpinHelper.addSurfaceState(pKernel); EXPECT_TRUE(surfaceAdded); size_t numBTS2 = pKernel->getNumberOfBindingTableStates(); EXPECT_EQ(numBTS1 + 1, numBTS2); - size_t sizeSurfaceStates2 = pKernel->getSurfaceStateHeapSize(rootDeviceIndex); + size_t sizeSurfaceStates2 = pKernel->getSurfaceStateHeapSize(); EXPECT_GT(sizeSurfaceStates2, sizeSurfaceStates1); size_t offsetBTS2 = pKernel->getBindingTableOffset(); EXPECT_GT(offsetBTS2, offsetBTS1); - void *pSS2 = gtpinHelper.getSurfaceState(pKernel, 0, rootDeviceIndex); + void *pSS2 = gtpinHelper.getSurfaceState(pKernel, 0); EXPECT_NE(pSS2, pSS1); - pSS2 = gtpinHelper.getSurfaceState(pKernel, numBTS2, rootDeviceIndex); + pSS2 = gtpinHelper.getSurfaceState(pKernel, numBTS2); EXPECT_EQ(nullptr, pSS2); // Remove kernel's SSH pKernel->resizeSurfaceStateHeap(nullptr, 0, 0, 0); // Try to enlarge SSH once again, this time the operation must fail - surfaceAdded = gtpinHelper.addSurfaceState(pKernel, rootDeviceIndex); + surfaceAdded = gtpinHelper.addSurfaceState(pKernel); EXPECT_FALSE(surfaceAdded); size_t numBTS3 = pKernel->getNumberOfBindingTableStates(); EXPECT_EQ(0u, numBTS3); - size_t sizeSurfaceStates3 = pKernel->getSurfaceStateHeapSize(rootDeviceIndex); + size_t sizeSurfaceStates3 = pKernel->getSurfaceStateHeapSize(); EXPECT_EQ(0u, sizeSurfaceStates3); size_t offsetBTS3 = pKernel->getBindingTableOffset(); EXPECT_EQ(0u, offsetBTS3); - void *pSS3 = gtpinHelper.getSurfaceState(pKernel, 0, rootDeviceIndex); + void *pSS3 = gtpinHelper.getSurfaceState(pKernel, 0); EXPECT_EQ(nullptr, pSS3); // Cleanup @@ -2337,7 +2338,7 @@ TEST_F(GTPinTests, givenKernelThenVerifyThatKernelCodeSubstitutionWorksWell) { auto pKernel = pMultiDeviceKernel->getKernel(rootDeviceIndex); ASSERT_NE(nullptr, pKernel); - bool isKernelCodeSubstituted = pKernel->isKernelHeapSubstituted(rootDeviceIndex); + bool isKernelCodeSubstituted = pKernel->isKernelHeapSubstituted(); EXPECT_FALSE(isKernelCodeSubstituted); // Substitute new kernel code @@ -2346,12 +2347,12 @@ TEST_F(GTPinTests, givenKernelThenVerifyThatKernelCodeSubstitutionWorksWell) { pKernel->substituteKernelHeap(pDevice->getDevice(), &newCode[0], newCodeSize); // Verify that substitution went properly - isKernelCodeSubstituted = pKernel->isKernelHeapSubstituted(rootDeviceIndex); + isKernelCodeSubstituted = pKernel->isKernelHeapSubstituted(); EXPECT_TRUE(isKernelCodeSubstituted); - uint8_t *pBin2 = reinterpret_cast(const_cast(pKernel->getKernelHeap(rootDeviceIndex))); + uint8_t *pBin2 = reinterpret_cast(const_cast(pKernel->getKernelHeap())); EXPECT_EQ(pBin2, &newCode[0]); - auto kernelIsa = pKernel->getKernelInfo(rootDeviceIndex).kernelAllocation->getUnderlyingBuffer(); + auto kernelIsa = pKernel->getKernelInfo().kernelAllocation->getUnderlyingBuffer(); EXPECT_EQ(0, memcmp(kernelIsa, newCode, newCodeSize)); diff --git a/opencl/test/unit_test/helpers/dispatch_info_builder_tests.cpp b/opencl/test/unit_test/helpers/dispatch_info_builder_tests.cpp index dee0aa9530..7e76a71bba 100644 --- a/opencl/test/unit_test/helpers/dispatch_info_builder_tests.cpp +++ b/opencl/test/unit_test/helpers/dispatch_info_builder_tests.cpp @@ -64,7 +64,7 @@ class DispatchInfoBuilderFixture : public ContextFixture, public ClDeviceFixture pProgram = new MockProgram(pContext, false, toClDeviceVector(*pClDevice)); - pKernel = new MockKernel(pProgram, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + pKernel = new MockKernel(pProgram, *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData)); pKernel->setKernelArgHandler(0, &Kernel::setArgBuffer); diff --git a/opencl/test/unit_test/helpers/dispatch_info_tests.cpp b/opencl/test/unit_test/helpers/dispatch_info_tests.cpp index 5a46d91792..f3e91bd538 100644 --- a/opencl/test/unit_test/helpers/dispatch_info_tests.cpp +++ b/opencl/test/unit_test/helpers/dispatch_info_tests.cpp @@ -43,7 +43,7 @@ class DispatchInfoFixture : public ContextFixture, public ClDeviceFixture { pProgram = new MockProgram(pContext, false, toClDeviceVector(*pClDevice)); - pKernel = new MockKernel(pProgram, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + pKernel = new MockKernel(pProgram, *pKernelInfo, *pClDevice); pKernel->slmTotalSize = 128; } void TearDown() override { diff --git a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp index c8c929291c..b41aafbccb 100644 --- a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp +++ b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp @@ -183,7 +183,7 @@ HWTEST_F(HardwareCommandsTest, givenSendCrossThreadDataWhenWhenAddPatchInfoComme MockProgram program(&context, false, toClDeviceVector(*pClDevice)); auto kernelInfo = std::make_unique(); - std::unique_ptr kernel(new MockKernel(&program, MockKernel::toKernelInfoContainer(*kernelInfo, rootDeviceIndex), *pClDevice)); + std::unique_ptr kernel(new MockKernel(&program, *kernelInfo, *pClDevice)); auto &indirectHeap = cmdQ.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 8192); @@ -249,7 +249,7 @@ HWTEST_F(HardwareCommandsTest, givenSendCrossThreadDataWhenWhenAddPatchInfoComme MockProgram program(&context, false, toClDeviceVector(*pClDevice)); auto kernelInfo = std::make_unique(); - std::unique_ptr kernel(new MockKernel(&program, MockKernel::toKernelInfoContainer(*kernelInfo, rootDeviceIndex), *pClDevice)); + std::unique_ptr kernel(new MockKernel(&program, *kernelInfo, *pClDevice)); auto &indirectHeap = cmdQ.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 8192); indirectHeap.getSpace(128u); @@ -335,7 +335,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenAllocatingIndirectStateRes sizeof(INTERFACE_DESCRIPTOR_DATA)); uint32_t interfaceDescriptorIndex = 0; auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType()); - auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*kernel, rootDeviceIndex); + auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*kernel); HardwareCommandsHelper::sendIndirectState( commandStream, @@ -343,8 +343,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenAllocatingIndirectStateRes ioh, ssh, *kernel, - kernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, rootDeviceIndex), - kernel->getKernelInfo(rootDeviceIndex).getMaxSimdSize(), + kernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), + kernel->getKernelInfo().getMaxSimdSize(), localWorkSizes, IDToffset, interfaceDescriptorIndex, @@ -359,9 +359,9 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenAllocatingIndirectStateRes auto usedAfterDSH = dsh.getUsed(); auto usedAfterIOH = ioh.getUsed(); auto usedAfterSSH = ssh.getUsed(); - auto sizeRequiredDSH = HardwareCommandsHelper::getSizeRequiredDSH(rootDeviceIndex, *kernel); - auto sizeRequiredIOH = HardwareCommandsHelper::getSizeRequiredIOH(rootDeviceIndex, *kernel, localWorkSize); - auto sizeRequiredSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel, rootDeviceIndex); + auto sizeRequiredDSH = HardwareCommandsHelper::getSizeRequiredDSH(*kernel); + auto sizeRequiredIOH = HardwareCommandsHelper::getSizeRequiredIOH(*kernel, localWorkSize); + auto sizeRequiredSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel); EXPECT_GE(sizeRequiredDSH, usedAfterDSH - usedBeforeDSH); EXPECT_GE(sizeRequiredIOH, usedAfterIOH - usedBeforeIOH); @@ -390,7 +390,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelWithFourBindingTabl const size_t localWorkSizes[3]{localWorkSize, 1, 1}; uint32_t interfaceDescriptorIndex = 0; auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType()); - auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel, rootDeviceIndex); + auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel); HardwareCommandsHelper::sendIndirectState( commandStream, @@ -398,8 +398,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelWithFourBindingTabl ioh, ssh, *mockKernelWithInternal->mockKernel, - mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, rootDeviceIndex), - mockKernelWithInternal->mockKernel->getKernelInfo(rootDeviceIndex).getMaxSimdSize(), + mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), + mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(), localWorkSizes, 0, interfaceDescriptorIndex, @@ -438,7 +438,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelThatIsSchedulerWhen const size_t localWorkSizes[3]{localWorkSize, 1, 1}; uint32_t interfaceDescriptorIndex = 0; auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType()); - auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel, rootDeviceIndex); + auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel); HardwareCommandsHelper::sendIndirectState( commandStream, @@ -446,8 +446,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelThatIsSchedulerWhen ioh, ssh, *mockKernelWithInternal->mockKernel, - mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, rootDeviceIndex), - mockKernelWithInternal->mockKernel->getKernelInfo(rootDeviceIndex).getMaxSimdSize(), + mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), + mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(), localWorkSizes, 0, interfaceDescriptorIndex, @@ -480,7 +480,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelWith100BindingTable const size_t localWorkSizes[3]{localWorkSize, 1, 1}; uint32_t interfaceDescriptorIndex = 0; auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType()); - auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel, rootDeviceIndex); + auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel); HardwareCommandsHelper::sendIndirectState( commandStream, @@ -488,8 +488,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelWith100BindingTable ioh, ssh, *mockKernelWithInternal->mockKernel, - mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, rootDeviceIndex), - mockKernelWithInternal->mockKernel->getKernelInfo(rootDeviceIndex).getMaxSimdSize(), + mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), + mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(), localWorkSizes, 0, interfaceDescriptorIndex, @@ -548,20 +548,18 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, whenSendingIndirectStateThenKe dsh.getSpace(sizeof(INTERFACE_DESCRIPTOR_DATA)); KernelInfo modifiedKernelInfo = {}; - modifiedKernelInfo.patchInfo = kernel->getKernelInfo(rootDeviceIndex).patchInfo; + modifiedKernelInfo.patchInfo = kernel->getKernelInfo().patchInfo; modifiedKernelInfo.kernelDescriptor.kernelAttributes.workgroupWalkOrder[0] = 2; modifiedKernelInfo.kernelDescriptor.kernelAttributes.workgroupWalkOrder[1] = 1; modifiedKernelInfo.kernelDescriptor.kernelAttributes.workgroupWalkOrder[2] = 0; modifiedKernelInfo.kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[0] = 2; modifiedKernelInfo.kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[1] = 1; modifiedKernelInfo.kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[2] = 0; - KernelInfoContainer kernelInfos; modifiedKernelInfo.kernelDescriptor.kernelAttributes.simdSize = 16; - kernelInfos.push_back(&modifiedKernelInfo); - MockKernel mockKernel(kernel->getProgram(), kernelInfos, *pClDevice, false); + MockKernel mockKernel(kernel->getProgram(), modifiedKernelInfo, *pClDevice, false); uint32_t interfaceDescriptorIndex = 0; auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType()); - auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(mockKernel, rootDeviceIndex); + auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(mockKernel); HardwareCommandsHelper::sendIndirectState( commandStream, @@ -569,7 +567,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, whenSendingIndirectStateThenKe ioh, ssh, mockKernel, - mockKernel.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, rootDeviceIndex), + mockKernel.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), modifiedKernelInfo.getMaxSimdSize(), localWorkSizes, IDToffset, @@ -639,7 +637,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenSendingIndirectStateThenBi auto sshUsed = ssh.getUsed(); // Obtain where the pointers will be stored - const auto &kernelInfo = kernel->getKernelInfo(rootDeviceIndex); + const auto &kernelInfo = kernel->getKernelInfo(); auto numSurfaceStates = kernelInfo.patchInfo.statelessGlobalMemObjKernelArgs.size() + kernelInfo.patchInfo.imageMemObjKernelArgs.size(); EXPECT_EQ(2u, numSurfaceStates); @@ -654,7 +652,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenSendingIndirectStateThenBi const_cast(kernelInfo).requiresSshForBuffers = true; uint32_t interfaceDescriptorIndex = 0; auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType()); - auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*kernel, rootDeviceIndex); + auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*kernel); HardwareCommandsHelper::sendIndirectState( commandStream, @@ -662,8 +660,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenSendingIndirectStateThenBi ioh, ssh, *kernel, - kernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, rootDeviceIndex), - kernel->getKernelInfo(rootDeviceIndex).getMaxSimdSize(), + kernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), + kernel->getKernelInfo().getMaxSimdSize(), localWorkSizes, 0, interfaceDescriptorIndex, @@ -734,7 +732,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenGettingBindingTableStateTh program.setConstantSurface(&gfxConstAlloc); // create kernel - MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); // setup surface state heap constexpr uint32_t numSurfaces = 5; @@ -807,7 +805,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenGettingBindingTableStateTh // push surfaces states and binding table to given ssh heap uint32_t interfaceDescriptorIndex = 0; auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType()); - auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*pKernel, rootDeviceIndex); + auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*pKernel); HardwareCommandsHelper::sendIndirectState( commandStream, @@ -815,8 +813,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenGettingBindingTableStateTh ioh, ssh, *pKernel, - pKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, rootDeviceIndex), - pKernel->getKernelInfo(rootDeviceIndex).getMaxSimdSize(), + pKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), + pKernel->getKernelInfo().getMaxSimdSize(), localWorkSizes, 0, interfaceDescriptorIndex, @@ -853,7 +851,7 @@ HWTEST_F(HardwareCommandsTest, GivenBuffersNotRequiringSshWhenSettingBindingTabl MockProgram program(&context, false, toClDeviceVector(*pClDevice)); // create kernel - MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); // setup surface state heap char surfaceStateHeap[256]; @@ -909,7 +907,7 @@ HWTEST_F(HardwareCommandsTest, GivenZeroSurfaceStatesWhenSettingBindingTableStat MockProgram program(&context, false, toClDeviceVector(*pClDevice)); // create kernel - MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); // setup surface state heap char surfaceStateHeap[256]; @@ -965,7 +963,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, GivenKernelWithInvalidSamplerS const size_t localWorkSizes[3]{localWorkSize, 1, 1}; uint32_t interfaceDescriptorIndex = 0; auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType()); - auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel, rootDeviceIndex); + auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel); //Undefined Offset, Defined BorderColorOffset SPatchSamplerStateArray samplerStateArray = {}; @@ -982,8 +980,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, GivenKernelWithInvalidSamplerS ioh, ssh, *mockKernelWithInternal->mockKernel, - mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, rootDeviceIndex), - mockKernelWithInternal->mockKernel->getKernelInfo(rootDeviceIndex).getMaxSimdSize(), + mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), + mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(), localWorkSizes, 0, interfaceDescriptorIndex, @@ -1008,8 +1006,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, GivenKernelWithInvalidSamplerS ioh, ssh, *mockKernelWithInternal->mockKernel, - mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, rootDeviceIndex), - mockKernelWithInternal->mockKernel->getKernelInfo(rootDeviceIndex).getMaxSimdSize(), + mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), + mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(), localWorkSizes, 0, interfaceDescriptorIndex, @@ -1076,7 +1074,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, GivenKernelWithSamplersWhenInd mockKernelWithInternal->mockKernel->setSshLocal(mockKernelWithInternal->sshLocal, sizeof(mockKernelWithInternal->sshLocal)); uint32_t interfaceDescriptorIndex = 0; auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType()); - auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel, rootDeviceIndex); + auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel); HardwareCommandsHelper::sendIndirectState( commandStream, @@ -1084,7 +1082,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, GivenKernelWithSamplersWhenInd ioh, ssh, *mockKernelWithInternal->mockKernel, - mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, rootDeviceIndex), + mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), 8, localWorkSizes, interfaceDescriptorTableOffset, @@ -1170,12 +1168,12 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelCommandsFromBinaryTest, WhenGettingSizeR totalSize += maxBindingTableCount * sizeof(BINDING_TABLE_STATE) * DeviceQueue::interfaceDescriptorEntries; auto &scheduler = pContext->getSchedulerKernel(); - auto schedulerSshSize = scheduler.getSurfaceStateHeapSize(rootDeviceIndex); + auto schedulerSshSize = scheduler.getSurfaceStateHeapSize(); totalSize += schedulerSshSize + ((schedulerSshSize != 0) ? BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE : 0); totalSize = alignUp(totalSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); - EXPECT_EQ(totalSize, HardwareCommandsHelper::getSshSizeForExecutionModel(*pKernel, rootDeviceIndex)); + EXPECT_EQ(totalSize, HardwareCommandsHelper::getSshSizeForExecutionModel(*pKernel)); } static const char *binaryFile = "simple_block_kernel"; @@ -1195,19 +1193,19 @@ HWTEST_F(HardwareCommandsTest, givenEnabledPassInlineDataWhenKernelAllowsInlineT mockKernelWithInternal->kernelInfo.kernelDescriptor.kernelAttributes.flags.passInlineData = true; mockKernelWithInternal->mockKernel->setCrossThreadData(crossThreadData, sizeof(crossThreadData)); - EXPECT_TRUE(HardwareCommandsHelper::inlineDataProgrammingRequired(*mockKernelWithInternal->mockKernel, rootDeviceIndex)); + EXPECT_TRUE(HardwareCommandsHelper::inlineDataProgrammingRequired(*mockKernelWithInternal->mockKernel)); } HWTEST_F(HardwareCommandsTest, givenNoDebugSettingsWhenDefaultModeIsExcercisedThenWeFollowKernelSettingForInlineProgramming) { mockKernelWithInternal->kernelInfo.kernelDescriptor.kernelAttributes.flags.passInlineData = true; - EXPECT_TRUE(HardwareCommandsHelper::inlineDataProgrammingRequired(*mockKernelWithInternal->mockKernel, rootDeviceIndex)); + EXPECT_TRUE(HardwareCommandsHelper::inlineDataProgrammingRequired(*mockKernelWithInternal->mockKernel)); } HWTEST_F(HardwareCommandsTest, givenDisabledPassInlineDataWhenKernelAllowsInlineThenReturnFalse) { DebugManagerStateRestore restore; DebugManager.flags.EnablePassInlineData.set(0u); mockKernelWithInternal->kernelInfo.kernelDescriptor.kernelAttributes.flags.passInlineData = true; - EXPECT_FALSE(HardwareCommandsHelper::inlineDataProgrammingRequired(*mockKernelWithInternal->mockKernel, rootDeviceIndex)); + EXPECT_FALSE(HardwareCommandsHelper::inlineDataProgrammingRequired(*mockKernelWithInternal->mockKernel)); } HWTEST_F(HardwareCommandsTest, givenEnabledPassInlineDataWhenKernelDisallowsInlineThenReturnFalse) { @@ -1219,17 +1217,17 @@ HWTEST_F(HardwareCommandsTest, givenEnabledPassInlineDataWhenKernelDisallowsInli mockKernelWithInternal->kernelInfo.kernelDescriptor.kernelAttributes.flags.passInlineData = false; mockKernelWithInternal->mockKernel->setCrossThreadData(crossThreadData, sizeof(crossThreadData)); - EXPECT_FALSE(HardwareCommandsHelper::inlineDataProgrammingRequired(*mockKernelWithInternal->mockKernel, rootDeviceIndex)); + EXPECT_FALSE(HardwareCommandsHelper::inlineDataProgrammingRequired(*mockKernelWithInternal->mockKernel)); } HWTEST_F(HardwareCommandsTest, whenNumLocalIdsIsBiggerThanZeroThenExpectLocalIdsInUseIsTrue) { mockKernelWithInternal->kernelInfo.kernelDescriptor.kernelAttributes.numLocalIdChannels = 1; - EXPECT_TRUE(HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel, rootDeviceIndex)); + EXPECT_TRUE(HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel)); } HWTEST_F(HardwareCommandsTest, whenNumLocalIdsIsZeroThenExpectLocalIdsInUseIsFalse) { mockKernelWithInternal->kernelInfo.kernelDescriptor.kernelAttributes.numLocalIdChannels = 0; - EXPECT_FALSE(HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel, rootDeviceIndex)); + EXPECT_FALSE(HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel)); } HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenCacheFlushAfterWalkerEnabledWhenProgramGlobalSurfacePresentThenExpectCacheFlushCommand) { @@ -1247,7 +1245,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenCacheFlushAfterWalkerEnab mockKernelWithInternal->mockProgram->setGlobalSurface(&globalAllocation); Kernel::CacheFlushAllocationsVec allocs; - mockKernelWithInternal->mockKernel->getAllocationsForCacheFlush(allocs, rootDeviceIndex); + mockKernelWithInternal->mockKernel->getAllocationsForCacheFlush(allocs); EXPECT_NE(allocs.end(), std::find(allocs.begin(), allocs.end(), &globalAllocation)); size_t expectedSize = sizeof(PIPE_CONTROL); @@ -1286,7 +1284,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenCacheFlushAfterWalkerEnab mockKernelWithInternal->mockKernel->svmAllocationsRequireCacheFlush = true; Kernel::CacheFlushAllocationsVec allocs; - mockKernelWithInternal->mockKernel->getAllocationsForCacheFlush(allocs, rootDeviceIndex); + mockKernelWithInternal->mockKernel->getAllocationsForCacheFlush(allocs); EXPECT_NE(allocs.end(), std::find(allocs.begin(), allocs.end(), &svmAllocation1)); EXPECT_EQ(allocs.end(), std::find(allocs.begin(), allocs.end(), &svmAllocation2)); @@ -1321,7 +1319,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenCacheFlushAfterWalkerEnab mockKernelWithInternal->mockKernel->kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; Kernel::CacheFlushAllocationsVec allocs; - mockKernelWithInternal->mockKernel->getAllocationsForCacheFlush(allocs, rootDeviceIndex); + mockKernelWithInternal->mockKernel->getAllocationsForCacheFlush(allocs); EXPECT_NE(allocs.end(), std::find(allocs.begin(), allocs.end(), &cacheRequiringAllocation)); size_t expectedSize = sizeof(PIPE_CONTROL); @@ -1357,7 +1355,7 @@ TEST_F(HardwareCommandsTest, givenCacheFlushAfterWalkerEnabledWhenPlatformNotSup hardwareInfo.capabilityTable.supportCacheFlushAfterWalker = false; StackVec allocationsForCacheFlush; - mockKernelWithInternal->mockKernel->getAllocationsForCacheFlush(allocationsForCacheFlush, rootDeviceIndex); + mockKernelWithInternal->mockKernel->getAllocationsForCacheFlush(allocationsForCacheFlush); EXPECT_EQ(0U, allocationsForCacheFlush.size()); } @@ -1367,7 +1365,7 @@ HWTEST_F(KernelCacheFlushTests, givenLocallyUncachedBufferWhenGettingAllocations DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(-1); - auto kernel = std::unique_ptr(Kernel::create(pProgram, pProgram->getKernelInfosForKernel("CopyBuffer"), *pClDevice, &retVal)); + auto kernel = std::unique_ptr(Kernel::create(pProgram, pProgram->getKernelInfoForKernel("CopyBuffer"), *pClDevice, &retVal)); cl_mem_properties_intel bufferPropertiesUncachedResource[] = {CL_MEM_FLAGS_INTEL, CL_MEM_LOCALLY_UNCACHED_RESOURCE, 0}; auto bufferLocallyUncached = clCreateBufferWithPropertiesINTEL(context, bufferPropertiesUncachedResource, 0, 1, nullptr, nullptr); @@ -1375,13 +1373,13 @@ HWTEST_F(KernelCacheFlushTests, givenLocallyUncachedBufferWhenGettingAllocations using CacheFlushAllocationsVec = StackVec; CacheFlushAllocationsVec cacheFlushVec; - kernel->getAllocationsForCacheFlush(cacheFlushVec, rootDeviceIndex); + kernel->getAllocationsForCacheFlush(cacheFlushVec); EXPECT_EQ(0u, cacheFlushVec.size()); auto bufferRegular = clCreateBufferWithPropertiesINTEL(context, nullptr, 0, 1, nullptr, nullptr); kernel->setArg(1, sizeof(bufferRegular), &bufferRegular); - kernel->getAllocationsForCacheFlush(cacheFlushVec, rootDeviceIndex); + kernel->getAllocationsForCacheFlush(cacheFlushVec); size_t expectedCacheFlushVecSize = (hardwareInfo.capabilityTable.supportCacheFlushAfterWalker ? 1u : 0u); EXPECT_EQ(expectedCacheFlushVecSize, cacheFlushVec.size()); diff --git a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.h b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.h index e68dcb4cbf..6cc2fef792 100644 --- a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.h +++ b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.h @@ -45,8 +45,8 @@ struct HardwareCommandsTest : ClDeviceFixture, template size_t pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, const Kernel &srcKernel) { - return EncodeSurfaceState::pushBindingTableAndSurfaceStates(dstHeap, srcKernel.getKernelInfo(rootDeviceIndex).kernelDescriptor.payloadMappings.bindingTable.numEntries, - srcKernel.getSurfaceStateHeap(rootDeviceIndex), srcKernel.getSurfaceStateHeapSize(rootDeviceIndex), + return EncodeSurfaceState::pushBindingTableAndSurfaceStates(dstHeap, srcKernel.getKernelInfo().kernelDescriptor.payloadMappings.bindingTable.numEntries, + srcKernel.getSurfaceStateHeap(), srcKernel.getSurfaceStateHeapSize(), srcKernel.getNumberOfBindingTableStates(), srcKernel.getBindingTableOffset()); } }; diff --git a/opencl/test/unit_test/helpers/task_information_tests.cpp b/opencl/test/unit_test/helpers/task_information_tests.cpp index 466c504a5d..bd19412fdb 100644 --- a/opencl/test/unit_test/helpers/task_information_tests.cpp +++ b/opencl/test/unit_test/helpers/task_information_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2020 Intel Corporation + * Copyright (C) 2017-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -233,14 +233,13 @@ HWTEST_F(DispatchFlagsTests, givenCommandComputeKernelWhenSubmitThenPassCorrectD } std::unique_ptr command(new CommandComputeKernel(*mockCmdQ, kernelOperation, surfaces, flushDC, slmUsed, ndRangeKernel, nullptr, preemptionMode, kernel, 1)); command->submit(20, false); - auto rootDeviceIndex = mockCsr->getRootDeviceIndex(); EXPECT_FALSE(mockCsr->passedDispatchFlags.pipelineSelectArgs.specialPipelineSelectMode); EXPECT_EQ(kernel.mockKernel->isVmeKernel(), mockCsr->passedDispatchFlags.pipelineSelectArgs.mediaSamplerRequired); EXPECT_EQ(mockCmdQ->flushStamp->getStampReference(), mockCsr->passedDispatchFlags.flushStampReference); EXPECT_EQ(mockCmdQ->getThrottle(), mockCsr->passedDispatchFlags.throttle); EXPECT_EQ(preemptionMode, mockCsr->passedDispatchFlags.preemptionMode); - EXPECT_EQ(kernel.mockKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.numGrfRequired, mockCsr->passedDispatchFlags.numGrfRequired); + EXPECT_EQ(kernel.mockKernel->getKernelInfo().kernelDescriptor.kernelAttributes.numGrfRequired, mockCsr->passedDispatchFlags.numGrfRequired); EXPECT_EQ(L3CachingSettings::l3CacheOn, mockCsr->passedDispatchFlags.l3CacheSettings); EXPECT_TRUE(mockCsr->passedDispatchFlags.blocking); EXPECT_EQ(flushDC, mockCsr->passedDispatchFlags.dcFlush); diff --git a/opencl/test/unit_test/kernel/clone_kernel_tests.cpp b/opencl/test/unit_test/kernel/clone_kernel_tests.cpp index 2b5fd98d78..4e40560e5e 100644 --- a/opencl/test/unit_test/kernel/clone_kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/clone_kernel_tests.cpp @@ -95,12 +95,12 @@ class CloneKernelTest : public MultiRootDeviceWithSubDevicesFixture { for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { - pSourceKernel[rootDeviceIndex] = new MockKernel(pProgram.get(), kernelInfos, *deviceFactory->rootDevices[rootDeviceIndex]); + pSourceKernel[rootDeviceIndex] = new MockKernel(pProgram.get(), *pKernelInfo[rootDeviceIndex], *deviceFactory->rootDevices[rootDeviceIndex]); ASSERT_EQ(CL_SUCCESS, pSourceKernel[rootDeviceIndex]->initialize()); char pSourceCrossThreadData[64] = {}; sourceKernels[rootDeviceIndex] = pSourceKernel[rootDeviceIndex]; - pClonedKernel[rootDeviceIndex] = new MockKernel(pProgram.get(), kernelInfos, *deviceFactory->rootDevices[rootDeviceIndex]); + pClonedKernel[rootDeviceIndex] = new MockKernel(pProgram.get(), *pKernelInfo[rootDeviceIndex], *deviceFactory->rootDevices[rootDeviceIndex]); ASSERT_EQ(CL_SUCCESS, pClonedKernel[rootDeviceIndex]->initialize()); char pClonedCrossThreadData[64] = {}; clonedKernels[rootDeviceIndex] = pClonedKernel[rootDeviceIndex]; @@ -109,8 +109,8 @@ class CloneKernelTest : public MultiRootDeviceWithSubDevicesFixture { pClonedKernel[rootDeviceIndex]->setCrossThreadData(pClonedCrossThreadData, sizeof(pClonedCrossThreadData)); } - pSourceMultiDeviceKernel = std::make_unique(sourceKernels); - pClonedMultiDeviceKernel = std::make_unique(clonedKernels); + pSourceMultiDeviceKernel = std::make_unique(sourceKernels, kernelInfos); + pClonedMultiDeviceKernel = std::make_unique(clonedKernels, kernelInfos); } void TearDown() override { @@ -220,7 +220,7 @@ TEST_F(CloneKernelTest, GivenArgBufferWhenCloningKernelThenKernelInfoIsCorrect) EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); auto pKernelArg = (cl_mem *)(pClonedKernel[rootDeviceIndex]->getCrossThreadData() + - pClonedKernel[rootDeviceIndex]->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); + pClonedKernel[rootDeviceIndex]->getKernelInfo().kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddressToPatch(), reinterpret_cast(*pKernelArg)); } } @@ -257,7 +257,7 @@ TEST_F(CloneKernelTest, GivenArgPipeWhenCloningKernelThenKernelInfoIsCorrect) { EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); auto pKernelArg = (cl_mem *)(pClonedKernel[rootDeviceIndex]->getCrossThreadData() + - pClonedKernel[rootDeviceIndex]->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); + pClonedKernel[rootDeviceIndex]->getKernelInfo().kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(pipe->getGraphicsAllocation(rootDeviceIndex)->getGpuAddressToPatch(), reinterpret_cast(*pKernelArg)); } @@ -299,7 +299,7 @@ TEST_F(CloneKernelTest, GivenArgImageWhenCloningKernelThenKernelInfoIsCorrect) { auto crossThreadData = reinterpret_cast(pClonedKernel[rootDeviceIndex]->getCrossThreadData()); EXPECT_EQ(objectId, *crossThreadData); - const auto &argInfo = pClonedKernel[rootDeviceIndex]->getKernelInfo(rootDeviceIndex).kernelArgInfo[0]; + const auto &argInfo = pClonedKernel[rootDeviceIndex]->getKernelInfo().kernelArgInfo[0]; auto pImgWidth = ptrOffset(crossThreadData, argInfo.offsetImgWidth); EXPECT_EQ(imageWidth, *pImgWidth); @@ -351,7 +351,7 @@ TEST_F(CloneKernelTest, GivenArgAcceleratorWhenCloningKernelThenKernelInfoIsCorr auto crossThreadData = reinterpret_cast(pClonedKernel[rootDeviceIndex]->getCrossThreadData()); - const auto &argInfo = pClonedKernel[rootDeviceIndex]->getKernelInfo(rootDeviceIndex).kernelArgInfo[0]; + const auto &argInfo = pClonedKernel[rootDeviceIndex]->getKernelInfo().kernelArgInfo[0]; uint32_t *pMbBlockType = ptrOffset(crossThreadData, argInfo.offsetVmeMbBlockType); EXPECT_EQ(desc.mb_block_type, *pMbBlockType); @@ -406,7 +406,7 @@ TEST_F(CloneKernelTest, GivenArgSamplerWhenCloningKernelThenKernelInfoIsCorrect) auto crossThreadData = reinterpret_cast(pClonedKernel[rootDeviceIndex]->getCrossThreadData()); EXPECT_EQ(objectId, *crossThreadData); - const auto &argInfo = pClonedKernel[rootDeviceIndex]->getKernelInfo(rootDeviceIndex).kernelArgInfo[0]; + const auto &argInfo = pClonedKernel[rootDeviceIndex]->getKernelInfo().kernelArgInfo[0]; auto pSnapWa = ptrOffset(crossThreadData, argInfo.offsetSamplerSnapWa); EXPECT_EQ(sampler->getSnapWaValue(), *pSnapWa); @@ -455,7 +455,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CloneKernelTest, GivenArgDeviceQueueWhenCloningKerne EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); auto pKernelArg = (uintptr_t *)(pClonedKernel[rootDeviceIndex]->getCrossThreadData() + - pClonedKernel[rootDeviceIndex]->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); + pClonedKernel[rootDeviceIndex]->getKernelInfo().kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(static_cast(mockDevQueue.getQueueBuffer()->getGpuAddressToPatch()), *pKernelArg); } @@ -486,7 +486,7 @@ TEST_F(CloneKernelTest, GivenArgSvmWhenCloningKernelThenKernelInfoIsCorrect) { EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); auto pKernelArg = (void **)(pClonedKernel[rootDeviceIndex]->getCrossThreadData() + - pClonedKernel[rootDeviceIndex]->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); + pClonedKernel[rootDeviceIndex]->getKernelInfo().kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(svmPtr, *pKernelArg); } @@ -519,7 +519,7 @@ TEST_F(CloneKernelTest, GivenArgSvmAllocWhenCloningKernelThenKernelInfoIsCorrect EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); auto pKernelArg = (void **)(pClonedKernel[rootDeviceIndex]->getCrossThreadData() + - pClonedKernel[rootDeviceIndex]->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); + pClonedKernel[rootDeviceIndex]->getKernelInfo().kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(svmPtr, *pKernelArg); delete[] svmPtr; @@ -553,7 +553,7 @@ TEST_F(CloneKernelTest, GivenArgImmediateWhenCloningKernelThenKernelInfoIsCorrec EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); auto pKernelArg = (TypeParam *)(pClonedKernel[rootDeviceIndex]->getCrossThreadData() + - pClonedKernel[rootDeviceIndex]->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); + pClonedKernel[rootDeviceIndex]->getKernelInfo().kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(value, *pKernelArg); } } diff --git a/opencl/test/unit_test/kernel/debug_kernel_tests.cpp b/opencl/test/unit_test/kernel/debug_kernel_tests.cpp index c29eec7d2f..509628444e 100644 --- a/opencl/test/unit_test/kernel/debug_kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/debug_kernel_tests.cpp @@ -22,7 +22,7 @@ TEST(DebugKernelTest, givenKernelCompiledForDebuggingWhenGetPerThreadSystemThrea program.enableKernelDebug(); std::unique_ptr kernel(MockKernel::create(device->getDevice(), &program)); - EXPECT_EQ(MockDebugKernel::perThreadSystemThreadSurfaceSize, kernel->getPerThreadSystemThreadSurfaceSize(device->getRootDeviceIndex())); + EXPECT_EQ(MockDebugKernel::perThreadSystemThreadSurfaceSize, kernel->getPerThreadSystemThreadSurfaceSize()); } TEST(DebugKernelTest, givenKernelCompiledForDebuggingWhenQueryingIsKernelDebugEnabledThenTrueIsReturned) { @@ -51,5 +51,5 @@ TEST(DebugKernelTest, givenKernelWithoutDebugFlagWhenGetPerThreadSystemThreadSur program.enableKernelDebug(); std::unique_ptr kernel(MockKernel::create(device->getDevice(), &program)); - EXPECT_EQ(0u, kernel->getPerThreadSystemThreadSurfaceSize(device->getRootDeviceIndex())); + EXPECT_EQ(0u, kernel->getPerThreadSystemThreadSurfaceSize()); } diff --git a/opencl/test/unit_test/kernel/kernel_accelerator_arg_tests.cpp b/opencl/test/unit_test/kernel/kernel_accelerator_arg_tests.cpp index fda014c310..62aefba62c 100644 --- a/opencl/test/unit_test/kernel/kernel_accelerator_arg_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_accelerator_arg_tests.cpp @@ -63,7 +63,7 @@ class KernelArgAcceleratorFixture : public ContextFixture, public ClDeviceFixtur pKernelInfo->kernelArgInfo[0].offsetVmeSearchPathType = 0x1c; pProgram = new MockProgram(pContext, false, toClDeviceVector(*pClDevice)); - pKernel = new MockKernel(pProgram, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + pKernel = new MockKernel(pProgram, *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->setKernelArgHandler(0, &Kernel::setArgAccelerator); diff --git a/opencl/test/unit_test/kernel/kernel_arg_buffer_fixture.cpp b/opencl/test/unit_test/kernel/kernel_arg_buffer_fixture.cpp index 6a200490a0..ad75600865 100644 --- a/opencl/test/unit_test/kernel/kernel_arg_buffer_fixture.cpp +++ b/opencl/test/unit_test/kernel/kernel_arg_buffer_fixture.cpp @@ -53,7 +53,7 @@ void KernelArgBufferFixture::SetUp() { pProgram = new MockProgram(pContext, false, toClDeviceVector(*pClDevice)); - pKernel = new MockKernel(pProgram, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + pKernel = new MockKernel(pProgram, *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData)); diff --git a/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp b/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp index 42a738ebed..b79d70bc9f 100644 --- a/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp @@ -174,7 +174,7 @@ TEST_F(KernelArgBufferTest, GivenSvmPtrStatelessWhenSettingKernelArgThenArgument EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(pKernel->requiresCoherency()); - EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex)); + EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); delete buffer; } @@ -192,11 +192,11 @@ HWTEST_F(KernelArgBufferTest, GivenSvmPtrStatefulWhenSettingKernelArgThenArgumen EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(pKernel->requiresCoherency()); - EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex)); + EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pKernelInfo->kernelArgInfo[0].offsetHeap)); + ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(buffer->getGraphicsAllocation(mockRootDeviceIndex)->getGpuAddress(), surfaceAddress); @@ -224,11 +224,11 @@ HWTEST_F(MultiDeviceKernelArgBufferTest, GivenSvmPtrStatefulWhenSettingKernelArg for (auto &rootDeviceIndex : pContext->getRootDeviceIndices()) { auto pKernel = pMultiDeviceKernel->getKernel(rootDeviceIndex); EXPECT_FALSE(pKernel->requiresCoherency()); - EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex)); + EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), kernelInfos[rootDeviceIndex]->kernelArgInfo[0].offsetHeap)); + ptrOffset(pKernel->getSurfaceStateHeap(), kernelInfos[rootDeviceIndex]->kernelArgInfo[0].offsetHeap)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(pBuffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(), surfaceAddress); @@ -433,7 +433,7 @@ TEST_F(KernelArgBufferTest, givenGfxAllocationInHostMemoryWhenHasDirectStateless TEST_F(KernelArgBufferTest, givenInvalidKernelObjWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnFalse) { KernelInfo kernelInfo; - MockKernel emptyKernel(pProgram, MockKernel::toKernelInfoContainer(kernelInfo, 0), *pClDevice); + MockKernel emptyKernel(pProgram, kernelInfo, *pClDevice); EXPECT_FALSE(emptyKernel.hasDirectStatelessAccessToHostMemory()); pKernel->kernelArguments.at(0).type = Kernel::NONE_OBJ; @@ -450,19 +450,19 @@ TEST_F(KernelArgBufferTest, givenKernelWithIndirectStatelessAccessWhenHasIndirec KernelInfo kernelInfo; EXPECT_FALSE(kernelInfo.hasIndirectStatelessAccess); - MockKernel kernelWithNoIndirectStatelessAccess(pProgram, MockKernel::toKernelInfoContainer(kernelInfo, 0), *pClDevice); + MockKernel kernelWithNoIndirectStatelessAccess(pProgram, kernelInfo, *pClDevice); EXPECT_FALSE(kernelWithNoIndirectStatelessAccess.hasIndirectStatelessAccessToHostMemory()); kernelInfo.hasIndirectStatelessAccess = true; - MockKernel kernelWithNoIndirectHostAllocations(pProgram, MockKernel::toKernelInfoContainer(kernelInfo, 0), *pClDevice); + MockKernel kernelWithNoIndirectHostAllocations(pProgram, kernelInfo, *pClDevice); EXPECT_FALSE(kernelWithNoIndirectHostAllocations.hasIndirectStatelessAccessToHostMemory()); const auto allocationTypes = {GraphicsAllocation::AllocationType::BUFFER, GraphicsAllocation::AllocationType::BUFFER_COMPRESSED, GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY}; - MockKernel kernelWithIndirectUnifiedMemoryAllocation(pProgram, MockKernel::toKernelInfoContainer(kernelInfo, 0), *pClDevice); + MockKernel kernelWithIndirectUnifiedMemoryAllocation(pProgram, kernelInfo, *pClDevice); MockGraphicsAllocation gfxAllocation; for (const auto type : allocationTypes) { gfxAllocation.setAllocationType(type); @@ -480,7 +480,7 @@ TEST_F(KernelArgBufferTest, givenKernelExecInfoWithIndirectStatelessAccessWhenHa KernelInfo kernelInfo; kernelInfo.hasIndirectStatelessAccess = true; - MockKernel mockKernel(pProgram, MockKernel::toKernelInfoContainer(kernelInfo, 0), *pClDevice); + MockKernel mockKernel(pProgram, kernelInfo, *pClDevice); EXPECT_FALSE(mockKernel.unifiedMemoryControls.indirectHostAllocationsAllowed); EXPECT_FALSE(mockKernel.hasIndirectStatelessAccessToHostMemory()); diff --git a/opencl/test/unit_test/kernel/kernel_arg_dev_queue_tests.cpp b/opencl/test/unit_test/kernel/kernel_arg_dev_queue_tests.cpp index 1f5050ba4f..1f02e9d5e0 100644 --- a/opencl/test/unit_test/kernel/kernel_arg_dev_queue_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_arg_dev_queue_tests.cpp @@ -35,7 +35,7 @@ struct KernelArgDevQueueTest : public DeviceHostQueueFixture { pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); program = std::make_unique(toClDeviceVector(*pDevice)); - pKernel = new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, testedRootDeviceIndex), *pDevice); + pKernel = new MockKernel(program.get(), *pKernelInfo, *pDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); uint8_t pCrossThreadData[crossThreadDataSize]; diff --git a/opencl/test/unit_test/kernel/kernel_arg_info_tests.cpp b/opencl/test/unit_test/kernel/kernel_arg_info_tests.cpp index 4e45c48cc0..2bd365f147 100644 --- a/opencl/test/unit_test/kernel/kernel_arg_info_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_arg_info_tests.cpp @@ -40,7 +40,7 @@ class KernelArgInfoTest : public ProgramFromSourceTest { // create a kernel pKernel = Kernel::create( pProgram, - pProgram->getKernelInfosForKernel(kernelName), + pProgram->getKernelInfoForKernel(kernelName), *pPlatform->getClDevice(0), &retVal); diff --git a/opencl/test/unit_test/kernel/kernel_arg_pipe_tests.cpp b/opencl/test/unit_test/kernel/kernel_arg_pipe_tests.cpp index af6758601a..f2ef8c7ac6 100644 --- a/opencl/test/unit_test/kernel/kernel_arg_pipe_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_arg_pipe_tests.cpp @@ -57,7 +57,7 @@ class KernelArgPipeFixture : public ContextFixture, public ClDeviceFixture { pProgram = new MockProgram(pContext, false, toClDeviceVector(*pClDevice)); - pKernel = new MockKernel(pProgram, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + pKernel = new MockKernel(pProgram, *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData)); @@ -111,7 +111,7 @@ TEST_F(KernelArgPipeTest, GivenSvmPtrStatelessWhenSettingKernelArgThenArgumentsA auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex)); + EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); delete pipe; } @@ -128,11 +128,11 @@ HWTEST_F(KernelArgPipeTest, GivenSvmPtrStatefulWhenSettingKernelArgThenArguments auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex)); + EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), + ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); void *surfaceAddress = reinterpret_cast(surfaceState->getSurfaceBaseAddress()); diff --git a/opencl/test/unit_test/kernel/kernel_arg_svm_tests.cpp b/opencl/test/unit_test/kernel/kernel_arg_svm_tests.cpp index 1b8cd662c4..2b0f279bbb 100644 --- a/opencl/test/unit_test/kernel/kernel_arg_svm_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_arg_svm_tests.cpp @@ -56,7 +56,7 @@ class KernelArgSvmFixture_ : public ContextFixture, public ClDeviceFixture { pProgram = new MockProgram(pContext, false, toClDeviceVector(*pClDevice)); - pKernel = new MockKernel(pProgram, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + pKernel = new MockKernel(pProgram, *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData)); } @@ -102,7 +102,7 @@ TEST_F(KernelArgSvmTest, GivenSvmPtrStatelessWhenSettingKernelArgThenArgumentsAr auto retVal = pKernel->setArgSvm(0, 256, svmPtr, nullptr, 0u); EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex)); + EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); delete[] svmPtr; } @@ -116,11 +116,11 @@ HWTEST_F(KernelArgSvmTest, GivenSvmPtrStatefulWhenSettingKernelArgThenArgumentsA auto retVal = pKernel->setArgSvm(0, 256, svmPtr, nullptr, 0u); EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex)); + EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), + ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); void *surfaceAddress = reinterpret_cast(surfaceState->getSurfaceBaseAddress()); @@ -155,7 +155,7 @@ TEST_F(KernelArgSvmTest, GivenValidSvmAllocStatelessWhenSettingKernelArgThenArgu auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc); EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex)); + EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); delete[] svmPtr; } @@ -171,11 +171,11 @@ HWTEST_F(KernelArgSvmTest, GivenValidSvmAllocStatefulWhenSettingKernelArgThenArg auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc); EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex)); + EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), + ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); void *surfaceAddress = reinterpret_cast(surfaceState->getSurfaceBaseAddress()); @@ -197,7 +197,7 @@ HWTEST_F(KernelArgSvmTest, givenOffsetedSvmPointerWhenSetArgSvmAllocIsCalledThen typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), + ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); void *surfaceAddress = reinterpret_cast(surfaceState->getSurfaceBaseAddress()); @@ -215,7 +215,7 @@ HWTEST_F(KernelArgSvmTest, givenDeviceSupportingSharedSystemAllocationsWhenSetAr typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), + ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); void *surfaceAddress = reinterpret_cast(surfaceState->getSurfaceBaseAddress()); @@ -255,8 +255,8 @@ HWTEST_F(KernelArgSvmTest, WhenPatchingWithImplicitSurfaceThenPatchIsApplied) { ASSERT_GE(pKernel->getCrossThreadDataSize(), sizeof(void *)); *reinterpret_cast(pKernel->getCrossThreadData()) = 0U; - ASSERT_GE(pKernel->getSurfaceStateHeapSize(rootDeviceIndex), rendSurfSize); - RENDER_SURFACE_STATE *surfState = reinterpret_cast(pKernel->getSurfaceStateHeap(rootDeviceIndex)); + ASSERT_GE(pKernel->getSurfaceStateHeapSize(), rendSurfSize); + RENDER_SURFACE_STATE *surfState = reinterpret_cast(pKernel->getSurfaceStateHeap()); memset(surfState, 0, rendSurfSize); pKernel->patchWithImplicitSurface(ptrToPatch, svmAlloc, *pDevice, patch); @@ -301,25 +301,25 @@ TEST_F(KernelArgSvmTest, WhenPatchingBufferOffsetThenPatchIsApplied) { kai.offsetBufferOffset = static_cast(-1); *expectedPatchPtr = initVal; - returnedPtr = pKernel->patchBufferOffset(kai, svmPtr.data(), &svmAlloc, rootDeviceIndex); + returnedPtr = pKernel->patchBufferOffset(kai, svmPtr.data(), &svmAlloc); EXPECT_EQ(svmPtr.data(), returnedPtr); EXPECT_EQ(initVal, *expectedPatchPtr); kai.offsetBufferOffset = static_cast(-1); *expectedPatchPtr = initVal; - returnedPtr = pKernel->patchBufferOffset(kai, svmPtr.data(), nullptr, rootDeviceIndex); + returnedPtr = pKernel->patchBufferOffset(kai, svmPtr.data(), nullptr); EXPECT_EQ(svmPtr.data(), returnedPtr); EXPECT_EQ(initVal, *expectedPatchPtr); kai.offsetBufferOffset = 0U; *expectedPatchPtr = initVal; - returnedPtr = pKernel->patchBufferOffset(kai, svmPtr.data(), &svmAlloc, rootDeviceIndex); + returnedPtr = pKernel->patchBufferOffset(kai, svmPtr.data(), &svmAlloc); EXPECT_EQ(svmPtr.data(), returnedPtr); EXPECT_EQ(0U, *expectedPatchPtr); kai.offsetBufferOffset = 0U; *expectedPatchPtr = initVal; - returnedPtr = pKernel->patchBufferOffset(kai, svmPtr.data() + svmOffset, nullptr, rootDeviceIndex); + returnedPtr = pKernel->patchBufferOffset(kai, svmPtr.data() + svmOffset, nullptr); void *expectedPtr = alignDown(svmPtr.data() + svmOffset, 4); // expecting to see DWORD alignment restriction in offset uint32_t expectedOffset = static_cast(ptrDiff(svmPtr.data() + svmOffset, expectedPtr)); @@ -328,7 +328,7 @@ TEST_F(KernelArgSvmTest, WhenPatchingBufferOffsetThenPatchIsApplied) { kai.offsetBufferOffset = 0U; *expectedPatchPtr = initVal; - returnedPtr = pKernel->patchBufferOffset(kai, svmPtr.data() + svmOffset, &svmAlloc, rootDeviceIndex); + returnedPtr = pKernel->patchBufferOffset(kai, svmPtr.data() + svmOffset, &svmAlloc); EXPECT_EQ(svmPtr.data(), returnedPtr); EXPECT_EQ(svmOffset, *expectedPatchPtr); } @@ -378,7 +378,6 @@ HWTEST_TYPED_TEST(KernelArgSvmTestTyped, GivenBufferKernelArgWhenBufferOffsetIsN constexpr size_t rendSurfSize = sizeof(RENDER_SURFACE_STATE); auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); - auto rootDeviceIndex = device->getRootDeviceIndex(); uint32_t svmSize = MemoryConstants::pageSize; char *svmPtr = reinterpret_cast(alignedMalloc(svmSize, MemoryConstants::pageSize)); @@ -406,8 +405,8 @@ HWTEST_TYPED_TEST(KernelArgSvmTestTyped, GivenBufferKernelArgWhenBufferOffsetIsN *expectedPointerPatchPtr = reinterpret_cast(0U); *expectedOffsetPatchPtr = 0U; - ASSERT_GE(this->pKernel->getSurfaceStateHeapSize(rootDeviceIndex), rendSurfSize); - RENDER_SURFACE_STATE *surfState = reinterpret_cast(this->pKernel->getSurfaceStateHeap(rootDeviceIndex)); + ASSERT_GE(this->pKernel->getSurfaceStateHeapSize(), rendSurfSize); + RENDER_SURFACE_STATE *surfState = reinterpret_cast(this->pKernel->getSurfaceStateHeap()); memset(surfState, 0, rendSurfSize); TypeParam::setArg(*this->pKernel, 0U, ptrToPatch, sizeToPatch, svmAlloc); @@ -555,7 +554,7 @@ TEST_F(KernelArgSvmTest, givenCpuAddressIsNullWhenGpuAddressIsValidThenPatchBuff kai.offsetBufferOffset = 0U; *expectedPatchPtr = initVal; - returnedPtr = pKernel->patchBufferOffset(kai, svmPtr.data(), &svmAlloc, rootDeviceIndex); + returnedPtr = pKernel->patchBufferOffset(kai, svmPtr.data(), &svmAlloc); EXPECT_EQ(svmPtr.data(), returnedPtr); EXPECT_EQ(0U, *expectedPatchPtr); } diff --git a/opencl/test/unit_test/kernel/kernel_image_arg_tests.cpp b/opencl/test/unit_test/kernel/kernel_image_arg_tests.cpp index e3bb6bf4a7..49506eeec1 100644 --- a/opencl/test/unit_test/kernel/kernel_image_arg_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_image_arg_tests.cpp @@ -66,16 +66,16 @@ TEST_F(KernelImageArgTest, givenKernelWithFlatImageTokensWhenArgIsSetThenPatchAl auto crossThreadData = reinterpret_cast(pKernel->getCrossThreadData()); auto pixelSize = image->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes; - auto offsetFlatBaseOffset = ptrOffset(crossThreadData, pKernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].offsetFlatBaseOffset); + auto offsetFlatBaseOffset = ptrOffset(crossThreadData, pKernel->getKernelInfo().kernelArgInfo[0].offsetFlatBaseOffset); EXPECT_EQ(imageBaseAddress, *reinterpret_cast(offsetFlatBaseOffset)); - auto offsetFlatWidth = ptrOffset(crossThreadData, pKernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].offsetFlatWidth); + auto offsetFlatWidth = ptrOffset(crossThreadData, pKernel->getKernelInfo().kernelArgInfo[0].offsetFlatWidth); EXPECT_EQ(static_cast((imageWidth * pixelSize) - 1), *offsetFlatWidth); - auto offsetFlatHeight = ptrOffset(crossThreadData, pKernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].offsetFlatHeight); + auto offsetFlatHeight = ptrOffset(crossThreadData, pKernel->getKernelInfo().kernelArgInfo[0].offsetFlatHeight); EXPECT_EQ(static_cast((imageHeight * pixelSize) - 1), *offsetFlatHeight); - auto offsetFlatPitch = ptrOffset(crossThreadData, pKernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].offsetFlatPitch); + auto offsetFlatPitch = ptrOffset(crossThreadData, pKernel->getKernelInfo().kernelArgInfo[0].offsetFlatPitch); EXPECT_EQ(imageRowPitch - 1, *offsetFlatPitch); } diff --git a/opencl/test/unit_test/kernel/kernel_immediate_arg_tests.cpp b/opencl/test/unit_test/kernel/kernel_immediate_arg_tests.cpp index 812745792e..2f574e64c9 100644 --- a/opencl/test/unit_test/kernel/kernel_immediate_arg_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_immediate_arg_tests.cpp @@ -64,12 +64,12 @@ class KernelArgImmediateTest : public MultiRootDeviceWithSubDevicesFixture { } for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { - pKernel[rootDeviceIndex] = new MockKernel(program.get(), kernelInfos, *deviceFactory->rootDevices[rootDeviceIndex]); + pKernel[rootDeviceIndex] = new MockKernel(program.get(), *pKernelInfo[rootDeviceIndex], *deviceFactory->rootDevices[rootDeviceIndex]); kernels[rootDeviceIndex] = pKernel[rootDeviceIndex]; ASSERT_EQ(CL_SUCCESS, pKernel[rootDeviceIndex]->initialize()); } - pMultiDeviceKernel = std::make_unique(kernels); + pMultiDeviceKernel = std::make_unique(kernels, kernelInfos); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { pKernel[rootDeviceIndex]->setCrossThreadData(&pCrossThreadData[rootDeviceIndex], sizeof(pCrossThreadData[rootDeviceIndex])); diff --git a/opencl/test/unit_test/kernel/kernel_is_patched_tests.cpp b/opencl/test/unit_test/kernel/kernel_is_patched_tests.cpp index 44e947af5e..79f792fc4a 100644 --- a/opencl/test/unit_test/kernel/kernel_is_patched_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_is_patched_tests.cpp @@ -23,7 +23,7 @@ class PatchedKernelTest : public ::testing::Test { program.reset(Program::createBuiltInFromSource("FillBufferBytes", context.get(), context->getDevices(), &retVal)); EXPECT_EQ(CL_SUCCESS, retVal); program->build(program->getDevices(), nullptr, false); - kernel.reset(Kernel::create(program.get(), program->getKernelInfosForKernel("FillBufferBytes"), *device, &retVal)); + kernel.reset(Kernel::create(program.get(), program->getKernelInfoForKernel("FillBufferBytes"), *device, &retVal)); EXPECT_EQ(CL_SUCCESS, retVal); } void TearDown() override { diff --git a/opencl/test/unit_test/kernel/kernel_reflection_surface_tests.cpp b/opencl/test/unit_test/kernel/kernel_reflection_surface_tests.cpp index 241de31750..973e91212e 100644 --- a/opencl/test/unit_test/kernel/kernel_reflection_surface_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_reflection_surface_tests.cpp @@ -612,7 +612,7 @@ TEST(KernelReflectionSurfaceTestSingle, GivenNonParentKernelWhenCreatingKernelRe MockClDevice device{new MockDevice}; MockProgram program(toClDeviceVector(device)); KernelInfo info; - MockKernel kernel(&program, MockKernel::toKernelInfoContainer(info, device.getRootDeviceIndex()), device); + MockKernel kernel(&program, info, device); EXPECT_FALSE(kernel.isParentKernel); @@ -629,10 +629,8 @@ TEST(KernelReflectionSurfaceTestSingle, GivenNonSchedulerKernelWithForcedSchedul MockClDevice device{new MockDevice}; MockProgram program(toClDeviceVector(device)); - KernelInfoContainer kernelInfos; KernelInfo info; - kernelInfos.push_back(&info); - MockKernel kernel(&program, kernelInfos, device); + MockKernel kernel(&program, info, device); EXPECT_FALSE(kernel.isParentKernel); @@ -667,9 +665,7 @@ TEST(KernelReflectionSurfaceTestSingle, GivenNoKernelArgsWhenObtainingKernelRefl bindingTableState.SurfaceStateOffset = 0; populateKernelDescriptor(info.kernelDescriptor, bindingTableState); - KernelInfoContainer kernelInfos; - kernelInfos.push_back(&info); - MockKernel kernel(&program, kernelInfos, *device); + MockKernel kernel(&program, info, *device); EXPECT_TRUE(kernel.isParentKernel); @@ -730,9 +726,7 @@ TEST(KernelReflectionSurfaceTestSingle, GivenDeviceQueueKernelArgWhenObtainingKe info.kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset = devQueueCurbeOffset; info.kernelArgInfo[0].kernelArgPatchInfoVector[0].size = devQueueCurbeSize; - KernelInfoContainer kernelInfos; - kernelInfos.push_back(&info); - MockKernel kernel(&program, kernelInfos, *device); + MockKernel kernel(&program, info, *device); EXPECT_TRUE(kernel.isParentKernel); @@ -766,7 +760,7 @@ TEST_P(KernelReflectionSurfaceTest, WhenCreatingKernelReflectionSurfaceThenKerne size_t parentImageCount = 0; size_t parentSamplerCount = 0; - if (pKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName == "kernel_reflection") { + if (pKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName == "kernel_reflection") { parentImageCount = 1; parentSamplerCount = 1; } @@ -819,7 +813,7 @@ TEST_P(KernelReflectionSurfaceTest, WhenCreatingKernelReflectionSurfaceThenKerne uint32_t parentImages = 0; uint32_t parentSamplers = 0; - if (pKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName == "kernel_reflection") { + if (pKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName == "kernel_reflection") { parentImages = 1; parentSamplers = 1; EXPECT_LT(sizeof(IGIL_KernelDataHeader), pKernelHeader->m_ParentSamplerParamsOffset); @@ -1111,7 +1105,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, KernelReflectionSurfaceWithQueueTest, WhenObtainingK cl_sampler samplerCl = sampler.get(); cl_mem imageCl = image3d.get(); - if (pKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName == "kernel_reflection") { + if (pKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName == "kernel_reflection") { pKernel->setArgSampler(0, sizeof(cl_sampler), &samplerCl); pKernel->setArgImage(1, sizeof(cl_mem), &imageCl); } @@ -1136,7 +1130,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, KernelReflectionSurfaceWithQueueTest, WhenObtainingK if (pKernelHeader->m_ParentKernelImageCount > 0) { uint32_t imageIndex = 0; - for (const auto &arg : pKernel->getKernelInfo(rootDeviceIndex).kernelArgInfo) { + for (const auto &arg : pKernel->getKernelInfo().kernelArgInfo) { if (arg.isImage) { EXPECT_EQ(arg.offsetHeap, pParentImageParams[imageIndex].m_ObjectID); imageIndex++; @@ -1146,7 +1140,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, KernelReflectionSurfaceWithQueueTest, WhenObtainingK if (pKernelHeader->m_ParentSamplerCount > 0) { uint32_t samplerIndex = 0; - for (const auto &arg : pKernel->getKernelInfo(rootDeviceIndex).kernelArgInfo) { + for (const auto &arg : pKernel->getKernelInfo().kernelArgInfo) { if (arg.isSampler) { EXPECT_EQ(OCLRT_ARG_OFFSET_TO_SAMPLER_OBJECT_ID(arg.offsetHeap), pParentSamplerParams[samplerIndex].m_ObjectID); samplerIndex++; @@ -2095,11 +2089,7 @@ TEST_F(KernelReflectionMultiDeviceTest, GivenNoKernelArgsWhenObtainingKernelRefl bindingTableState.SurfaceStateOffset = 0; populateKernelDescriptor(info.kernelDescriptor, bindingTableState); - auto rootDeviceIndex = device1->getRootDeviceIndex(); - KernelInfoContainer kernelInfos; - kernelInfos.resize(rootDeviceIndex + 1); - kernelInfos[rootDeviceIndex] = &info; - MockKernel kernel(&program, kernelInfos, *device1); + MockKernel kernel(&program, info, *device1); EXPECT_TRUE(kernel.isParentKernel); @@ -2160,11 +2150,7 @@ TEST_F(KernelReflectionMultiDeviceTest, GivenDeviceQueueKernelArgWhenObtainingKe info.kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset = devQueueCurbeOffset; info.kernelArgInfo[0].kernelArgPatchInfoVector[0].size = devQueueCurbeSize; - auto rootDeviceIndex = device1->getRootDeviceIndex(); - KernelInfoContainer kernelInfos; - kernelInfos.resize(rootDeviceIndex + 1); - kernelInfos[rootDeviceIndex] = &info; - MockKernel kernel(&program, kernelInfos, *device1); + MockKernel kernel(&program, info, *device1); EXPECT_TRUE(kernel.isParentKernel); diff --git a/opencl/test/unit_test/kernel/kernel_slm_arg_tests.cpp b/opencl/test/unit_test/kernel/kernel_slm_arg_tests.cpp index bcf0f9fafa..177d0027b5 100644 --- a/opencl/test/unit_test/kernel/kernel_slm_arg_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_slm_arg_tests.cpp @@ -54,12 +54,12 @@ class KernelSlmArgTest : public MultiRootDeviceWithSubDevicesFixture { } for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { - pKernel[rootDeviceIndex] = new MockKernel(program.get(), kernelInfos, *deviceFactory->rootDevices[rootDeviceIndex]); + pKernel[rootDeviceIndex] = new MockKernel(program.get(), *pKernelInfo[rootDeviceIndex], *deviceFactory->rootDevices[rootDeviceIndex]); kernels[rootDeviceIndex] = pKernel[rootDeviceIndex]; ASSERT_EQ(CL_SUCCESS, pKernel[rootDeviceIndex]->initialize()); } - pMultiDeviceKernel = std::make_unique(kernels); + pMultiDeviceKernel = std::make_unique(kernels, kernelInfos); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { crossThreadData[rootDeviceIndex][0x20 / sizeof(uint32_t)] = 0x12344321; diff --git a/opencl/test/unit_test/kernel/kernel_slm_tests.cpp b/opencl/test/unit_test/kernel/kernel_slm_tests.cpp index b4d59ba001..e663e7a21e 100644 --- a/opencl/test/unit_test/kernel/kernel_slm_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_slm_tests.cpp @@ -69,7 +69,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, KernelSLMAndBarrierTest, GivenStaticSlmSizeWhenProgr kernelInfo.kernelDescriptor.kernelAttributes.barrierCount = 1; kernelInfo.workloadInfo.slmStaticSize = GetParam() * KB; - MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); // After creating Mock Kernel now create Indirect Heap @@ -156,7 +156,7 @@ HWTEST_F(KernelSLMAndBarrierTest, GivenInterfaceDescriptorProgrammedWhenOverride kernelInfo.workloadInfo.slmStaticSize = 0; - MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); CommandQueueHw cmdQ(nullptr, pClDevice, 0, false); diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index 24ea505c51..ef0aa46980 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -70,7 +70,7 @@ class KernelTests : public ProgramFromBinaryFixture { // create a kernel pKernel = Kernel::create( pProgram, - pProgram->getKernelInfosForKernel(kernelName), + pProgram->getKernelInfoForKernel(kernelName), *pClDevice, &retVal); @@ -101,8 +101,8 @@ TEST(KernelTest, WhenKernelIsCreatedThenCorrectMembersAreMemObjects) { } TEST_F(KernelTests, WhenKernelIsCreatedThenKernelHeapIsCorrect) { - EXPECT_EQ(pKernel->getKernelInfo(rootDeviceIndex).heapInfo.pKernelHeap, pKernel->getKernelHeap(rootDeviceIndex)); - EXPECT_EQ(pKernel->getKernelInfo(rootDeviceIndex).heapInfo.KernelHeapSize, pKernel->getKernelHeapSize(rootDeviceIndex)); + EXPECT_EQ(pKernel->getKernelInfo().heapInfo.pKernelHeap, pKernel->getKernelHeap()); + EXPECT_EQ(pKernel->getKernelInfo().heapInfo.KernelHeapSize, pKernel->getKernelHeapSize()); } TEST_F(KernelTests, GivenInvalidParamNameWhenGettingInfoThenInvalidValueErrorIsReturned) { @@ -171,7 +171,7 @@ TEST_F(KernelTests, GivenKernelBinaryProgramIntelWhenGettingInfoThenKernelBinary size_t paramValueSize = 0; char *paramValue = nullptr; size_t paramValueSizeRet = 0; - const char *pKernelData = reinterpret_cast(pKernel->getKernelHeap(rootDeviceIndex)); + const char *pKernelData = reinterpret_cast(pKernel->getKernelHeap()); EXPECT_NE(nullptr, pKernelData); // get size of kernel binary @@ -214,7 +214,7 @@ TEST_F(KernelTests, givenBinaryWhenItIsQueriedForGpuAddressThenAbsoluteAddressIs ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); - auto expectedGpuAddress = GmmHelper::decanonize(pKernel->getKernelInfo(rootDeviceIndex).kernelAllocation->getGpuAddress()); + auto expectedGpuAddress = GmmHelper::decanonize(pKernel->getKernelInfo().kernelAllocation->getGpuAddress()); EXPECT_EQ(expectedGpuAddress, paramValue); EXPECT_EQ(paramValueSize, paramValueSizeRet); } @@ -349,12 +349,12 @@ TEST_F(KernelFromBinaryTests, GivenKernelNumArgsWhenGettingInfoThenNumberOfKerne ASSERT_EQ(CL_SUCCESS, retVal); - auto kernelInfos = pProgram->getKernelInfosForKernel("test"); + auto &kernelInfo = pProgram->getKernelInfoForKernel("test"); // create a kernel auto pKernel = Kernel::create( pProgram, - kernelInfos, + kernelInfo, *pClDevice, &retVal); @@ -388,12 +388,12 @@ TEST_F(KernelFromBinaryTests, WhenRegularKernelIsCreatedThenItIsNotBuiltIn) { ASSERT_EQ(CL_SUCCESS, retVal); - auto kernelInfos = pProgram->getKernelInfosForKernel("simple_kernel_0"); + auto &kernelInfo = pProgram->getKernelInfoForKernel("simple_kernel_0"); // create a kernel auto pKernel = Kernel::create( pProgram, - kernelInfos, + kernelInfo, *pClDevice, &retVal); @@ -546,7 +546,7 @@ TEST_F(KernelPrivateSurfaceTest, WhenChangingResidencyThenCsrResidencySizeIsUpda // create kernel MockContext context; MockProgram program(&context, false, toClDeviceVector(*pClDevice)); - MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); // Test it @@ -582,7 +582,7 @@ TEST_F(KernelPrivateSurfaceTest, givenKernelWithPrivateSurfaceThatIsInUseByGpuWh MockContext context; MockProgram program(&context, false, toClDeviceVector(*pClDevice)); - std::unique_ptr pKernel(new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice)); + std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); pKernel->initialize(); auto &csr = pDevice->getGpgpuCommandStreamReceiver(); @@ -623,7 +623,7 @@ TEST_F(KernelPrivateSurfaceTest, WhenPrivateSurfaceAllocationFailsThenOutOfResou MockContext context; MockProgram program(&context, false, toClDeviceVector(*pClDevice)); MemoryManagementFixture::InjectedFunction method = [&](size_t failureIndex) { - MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); if (MemoryManagement::nonfailingAllocation == failureIndex) { EXPECT_EQ(CL_SUCCESS, pKernel->initialize()); @@ -663,7 +663,7 @@ TEST_F(KernelPrivateSurfaceTest, given32BitDeviceWhenKernelIsCreatedThenPrivateS // create kernel MockContext context; MockProgram program(&context, false, toClDeviceVector(*pClDevice)); - MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); @@ -692,7 +692,7 @@ HWTEST_F(KernelPrivateSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenPri MockProgram program(&context, false, toClDeviceVector(*pClDevice)); // create kernel - MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); // setup surface state heap char surfaceStateHeap[0x80]; @@ -705,13 +705,13 @@ HWTEST_F(KernelPrivateSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenPri ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); - EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex)); + EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); auto bufferAddress = pKernel->privateSurface->getGpuAddress(); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), + ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.privateMemoryAddress.bindful)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); @@ -736,7 +736,7 @@ TEST_F(KernelPrivateSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenPriv program.setConstantSurface(&gfxAlloc); // create kernel - MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); // define stateful path pKernelInfo->usesSsh = false; @@ -744,8 +744,8 @@ TEST_F(KernelPrivateSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenPriv ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); - EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex)); - EXPECT_EQ(nullptr, pKernel->getSurfaceStateHeap(rootDeviceIndex)); + EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); + EXPECT_EQ(nullptr, pKernel->getSurfaceStateHeap()); program.setConstantSurface(nullptr); delete pKernel; @@ -778,7 +778,7 @@ TEST_F(KernelPrivateSurfaceTest, GivenKernelWhenPrivateSurfaceTooBigAndGpuPointe MockContext context; MockProgram program(&context, false, toClDeviceVector(*pClDevice)); - std::unique_ptr pKernel(new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice)); + std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); pKernelInfo->gpuPointerSize = 4; pDevice->getMemoryManager()->setForce32BitAllocations(false); if (pDevice->getDeviceInfo().computeUnitsUsedForScratch == 0) @@ -797,7 +797,7 @@ TEST_F(KernelPrivateSurfaceTest, GivenKernelWhenPrivateSurfaceTooBigAndGpuPointe MockContext context; MockProgram program(&context, false, toClDeviceVector(*pClDevice)); - std::unique_ptr pKernel(new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice)); + std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); pKernelInfo->gpuPointerSize = 4; pDevice->getMemoryManager()->setForce32BitAllocations(true); if (pDevice->getDeviceInfo().computeUnitsUsedForScratch == 0) @@ -816,7 +816,7 @@ TEST_F(KernelPrivateSurfaceTest, GivenKernelWhenPrivateSurfaceTooBigAndGpuPointe MockContext context; MockProgram program(&context, false, toClDeviceVector(*pClDevice)); - std::unique_ptr pKernel(new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice)); + std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); pKernelInfo->gpuPointerSize = 8; pDevice->getMemoryManager()->setForce32BitAllocations(true); if (pDevice->getDeviceInfo().computeUnitsUsedForScratch == 0) @@ -850,7 +850,7 @@ TEST_F(KernelGlobalSurfaceTest, givenBuiltInKernelWhenKernelIsCreatedThenGlobalS MockContext context; MockProgram program(&context, false, toClDeviceVector(*pClDevice)); program.setGlobalSurface(&gfxAlloc); - MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); pKernel->isBuiltIn = true; @@ -887,7 +887,7 @@ TEST_F(KernelGlobalSurfaceTest, givenNDRangeKernelWhenKernelIsCreatedThenGlobalS // create kernel MockProgram program(toClDeviceVector(*pClDevice)); program.setGlobalSurface(&gfxAlloc); - MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); @@ -921,7 +921,7 @@ HWTEST_F(KernelGlobalSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenGlob program.setGlobalSurface(&gfxAlloc); // create kernel - MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); // setup surface state heap char surfaceStateHeap[0x80]; @@ -934,11 +934,11 @@ HWTEST_F(KernelGlobalSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenGlob ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); - EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex)); + EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), + ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.bindful)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); @@ -962,7 +962,7 @@ TEST_F(KernelGlobalSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenGloba program.setGlobalSurface(&gfxAlloc); // create kernel - MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); // define stateful path pKernelInfo->usesSsh = false; @@ -970,8 +970,8 @@ TEST_F(KernelGlobalSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenGloba ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); - EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex)); - EXPECT_EQ(nullptr, pKernel->getSurfaceStateHeap(rootDeviceIndex)); + EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); + EXPECT_EQ(nullptr, pKernel->getSurfaceStateHeap()); program.setGlobalSurface(nullptr); delete pKernel; @@ -1002,7 +1002,7 @@ TEST_F(KernelConstantSurfaceTest, givenBuiltInKernelWhenKernelIsCreatedThenConst // create kernel MockProgram program(toClDeviceVector(*pClDevice)); program.setConstantSurface(&gfxAlloc); - MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); pKernel->isBuiltIn = true; @@ -1039,7 +1039,7 @@ TEST_F(KernelConstantSurfaceTest, givenNDRangeKernelWhenKernelIsCreatedThenConst // create kernel MockProgram program(toClDeviceVector(*pClDevice)); program.setConstantSurface(&gfxAlloc); - MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); @@ -1072,7 +1072,7 @@ HWTEST_F(KernelConstantSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenCo program.setConstantSurface(&gfxAlloc); // create kernel - MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); // setup surface state heap char surfaceStateHeap[0x80]; @@ -1085,11 +1085,11 @@ HWTEST_F(KernelConstantSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenCo ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); - EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex)); + EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), + ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.bindful)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); @@ -1113,7 +1113,7 @@ TEST_F(KernelConstantSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenCon program.setConstantSurface(&gfxAlloc); // create kernel - MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); // define stateful path pKernelInfo->usesSsh = false; @@ -1121,8 +1121,8 @@ TEST_F(KernelConstantSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenCon ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); - EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex)); - EXPECT_EQ(nullptr, pKernel->getSurfaceStateHeap(rootDeviceIndex)); + EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); + EXPECT_EQ(nullptr, pKernel->getSurfaceStateHeap()); program.setConstantSurface(nullptr); delete pKernel; @@ -1143,7 +1143,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatefulKernelWhenK // create kernel MockProgram program(&context, false, toClDeviceVector(*pClDevice)); - MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); // setup surface state heap char surfaceStateHeap[0x80]; @@ -1156,11 +1156,11 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatefulKernelWhenK ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); - EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex)); + EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), + ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress.bindful)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); @@ -1186,7 +1186,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatefulKernelWhenE // create kernel MockProgram program(&context, false, toClDeviceVector(*pClDevice)); - MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); // setup surface state heap char surfaceStateHeap[0x80]; @@ -1203,7 +1203,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatefulKernelWhenE typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), + ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress.bindful)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); @@ -1222,7 +1222,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenKernelWithNullEvent // create kernel MockProgram program(toClDeviceVector(*pClDevice)); - MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); // define stateful path pKernelInfo->usesSsh = false; @@ -1254,7 +1254,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatelessKernelWhen // create kernel MockProgram program(toClDeviceVector(*pClDevice)); - MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); // define stateful path pKernelInfo->usesSsh = false; @@ -1262,7 +1262,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatelessKernelWhen ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); if (pClDevice->areOcl21FeaturesSupported() == false) { - EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex)); + EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); } else { } @@ -1284,7 +1284,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatelessKernelWhen // create kernel MockProgram program(toClDeviceVector(*pClDevice)); - MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); // define stateful path pKernelInfo->usesSsh = false; @@ -1316,7 +1316,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatefulKe // create kernel MockProgram program(&context, false, toClDeviceVector(*pClDevice)); - MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); // setup surface state heap char surfaceStateHeap[0x80]; @@ -1329,11 +1329,11 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatefulKe ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); - EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex)); + EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), + ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress.bindful)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); @@ -1359,7 +1359,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatefulKe // create kernel MockProgram program(&context, false, toClDeviceVector(*pClDevice)); - MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); // setup surface state heap char surfaceStateHeap[0x80]; @@ -1374,11 +1374,11 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatefulKe pKernel->patchDefaultDeviceQueue(pDevQueue); - EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex)); + EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), + ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress.bindful)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); @@ -1404,7 +1404,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatelessK // create kernel MockProgram program(toClDeviceVector(*pClDevice)); - MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); // define stateless path pKernelInfo->usesSsh = false; @@ -1412,7 +1412,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatelessK ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); - EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex)); + EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); delete pKernel; } @@ -1426,7 +1426,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenKernelWith // create kernel MockProgram program(toClDeviceVector(*pClDevice)); - MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); // define stateless path pKernelInfo->usesSsh = false; @@ -1458,7 +1458,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatelessK // create kernel MockProgram program(toClDeviceVector(*pClDevice)); - MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); // define stateless path pKernelInfo->usesSsh = false; @@ -1506,14 +1506,14 @@ HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledThenKernelIsaIs MockProgram program(toClDeviceVector(*pClDevice)); MockContext ctx; program.setContext(&ctx); - std::unique_ptr pKernel(new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice)); + std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData)); EXPECT_EQ(0u, commandStreamReceiver.makeResidentAllocations.size()); pKernel->makeResident(pDevice->getGpgpuCommandStreamReceiver()); EXPECT_EQ(1u, commandStreamReceiver.makeResidentAllocations.size()); - EXPECT_TRUE(commandStreamReceiver.isMadeResident(pKernel->getKernelInfo(rootDeviceIndex).getGraphicsAllocation())); + EXPECT_TRUE(commandStreamReceiver.isMadeResident(pKernel->getKernelInfo().getGraphicsAllocation())); memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation); } @@ -1533,7 +1533,7 @@ HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledThenExportedFun program.buildInfos[pDevice->getRootDeviceIndex()].exportedFunctionsSurface = exportedFunctionsSurface.get(); MockContext ctx; program.setContext(&ctx); - std::unique_ptr pKernel(new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice)); + std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_EQ(0u, commandStreamReceiver.makeResidentAllocations.size()); @@ -1572,7 +1572,7 @@ HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledThenGlobalBuffe MockContext ctx; program.setContext(&ctx); program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation(); - std::unique_ptr pKernel(new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice)); + std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_EQ(0u, commandStreamReceiver.makeResidentAllocations.size()); @@ -2041,7 +2041,7 @@ HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadNorKernelArgStoreNor MockContext ctx; program.setContext(&ctx); program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation(); - std::unique_ptr pKernel(new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice)); + std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_FALSE(pKernel->getHasIndirectAccess()); @@ -2066,7 +2066,7 @@ HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadThenKernelHasIndirec MockContext ctx; program.setContext(&ctx); program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation(); - std::unique_ptr pKernel(new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice)); + std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_TRUE(pKernel->getHasIndirectAccess()); @@ -2091,7 +2091,7 @@ HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgStoreThenKernelHasIndire MockContext ctx; program.setContext(&ctx); program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation(); - std::unique_ptr pKernel(new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice)); + std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_TRUE(pKernel->getHasIndirectAccess()); @@ -2116,7 +2116,7 @@ HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgAtomicThenKernelHasIndir MockContext ctx; program.setContext(&ctx); program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation(); - std::unique_ptr pKernel(new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice)); + std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_TRUE(pKernel->getHasIndirectAccess()); @@ -2256,7 +2256,7 @@ HWTEST_F(KernelResidencyTest, givenSimpleKernelTunningAndNoAtomicsWhenPerformTun result = mockKernel.mockKernel->kernelSubmissionMap.find(config); EXPECT_EQ(result, mockKernel.mockKernel->kernelSubmissionMap.end()); - EXPECT_NE(mockKernel.mockKernel->isSingleSubdevicePreferred(), mockKernel.mockKernel->getKernelInfo(0u).kernelDescriptor.kernelAttributes.flags.useGlobalAtomics); + EXPECT_NE(mockKernel.mockKernel->isSingleSubdevicePreferred(), mockKernel.mockKernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics); } TEST(KernelImageDetectionTests, givenKernelWithImagesOnlyWhenItIsAskedIfItHasImagesOnlyThenTrueIsReturned) { @@ -2271,7 +2271,7 @@ TEST(KernelImageDetectionTests, givenKernelWithImagesOnlyWhenItIsAskedIfItHasIma auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get(), rootDeviceIndex)); auto context = clUniquePtr(new MockContext(device.get())); auto program = clUniquePtr(new MockProgram(context.get(), false, toClDeviceVector(*device))); - auto kernel = std::make_unique(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *device); + auto kernel = std::make_unique(program.get(), *pKernelInfo, *device); EXPECT_FALSE(kernel->usesOnlyImages()); kernel->initialize(); EXPECT_TRUE(kernel->usesOnlyImages()); @@ -2289,7 +2289,7 @@ TEST(KernelImageDetectionTests, givenKernelWithImagesAndBuffersWhenItIsAskedIfIt auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get(), rootDeviceIndex)); auto context = clUniquePtr(new MockContext(device.get())); auto program = clUniquePtr(new MockProgram(context.get(), false, toClDeviceVector(*device))); - auto kernel = std::make_unique(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *device); + auto kernel = std::make_unique(program.get(), *pKernelInfo, *device); EXPECT_FALSE(kernel->usesOnlyImages()); kernel->initialize(); EXPECT_FALSE(kernel->usesOnlyImages()); @@ -2305,7 +2305,7 @@ TEST(KernelImageDetectionTests, givenKernelWithNoImagesWhenItIsAskedIfItHasImage auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get(), rootDeviceIndex)); auto context = clUniquePtr(new MockContext(device.get())); auto program = clUniquePtr(new MockProgram(context.get(), false, toClDeviceVector(*device))); - auto kernel = std::make_unique(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *device); + auto kernel = std::make_unique(program.get(), *pKernelInfo, *device); EXPECT_FALSE(kernel->usesOnlyImages()); kernel->initialize(); EXPECT_FALSE(kernel->usesOnlyImages()); @@ -2361,7 +2361,7 @@ HWTEST_F(KernelResidencyTest, WhenMakingArgsResidentThenImageFromImageCheckIsCor auto program = std::make_unique(toClDeviceVector(*pClDevice)); program->setContext(&context); - std::unique_ptr pKernel(new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice)); + std::unique_ptr pKernel(new MockKernel(program.get(), *pKernelInfo, *pClDevice)); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->storeKernelArg(0, Kernel::IMAGE_OBJ, (cl_mem)imageY.get(), NULL, 0); @@ -2382,7 +2382,7 @@ struct KernelExecutionEnvironmentTest : public Test { pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; - pKernel = new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + pKernel = new MockKernel(program.get(), *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); } @@ -2479,7 +2479,7 @@ TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenGlobalWorkOffsetIsCorr pKernelInfo->workloadInfo.globalWorkOffsetOffsets[1] = 4; - MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel(program.get(), *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.globalWorkOffsetX); @@ -2492,7 +2492,7 @@ TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenLocalWorkSizeIsCorrect pKernelInfo->workloadInfo.localWorkSizeOffsets[0] = 0xc; - MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel(program.get(), *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_NE(nullptr, kernel.localWorkSizeX); @@ -2505,7 +2505,7 @@ TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenLocalWorkSize2IsCorrec pKernelInfo->workloadInfo.localWorkSizeOffsets2[1] = 0xd; - MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel(program.get(), *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.localWorkSizeX2); @@ -2518,7 +2518,7 @@ TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenGlobalWorkSizeIsCorrec pKernelInfo->workloadInfo.globalWorkSizeOffsets[2] = 8; - MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel(program.get(), *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.globalWorkSizeX); @@ -2531,7 +2531,7 @@ TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenLocalWorkDimIsCorrect) pKernelInfo->workloadInfo.workDimOffset = 12; - MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel(program.get(), *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_NE(nullptr, kernel.workDim); @@ -2544,7 +2544,7 @@ TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenNumWorkGroupsIsCorrect pKernelInfo->workloadInfo.numWorkGroupsOffset[1] = 1 * sizeof(uint32_t); pKernelInfo->workloadInfo.numWorkGroupsOffset[2] = 2 * sizeof(uint32_t); - MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel(program.get(), *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_NE(nullptr, kernel.numWorkGroupsX); @@ -2559,7 +2559,7 @@ TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenEnqueuedLocalWorkSizeI pKernelInfo->workloadInfo.enqueuedLocalWorkSizeOffsets[0] = 0; - MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel(program.get(), *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_NE(nullptr, kernel.enqueuedLocalWorkSizeX); @@ -2571,7 +2571,7 @@ TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenEnqueuedLocalWorkSizeI TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenEnqueuedMaxWorkGroupSizeIsCorrect) { pKernelInfo->workloadInfo.maxWorkGroupSizeOffset = 12; - MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel(program.get(), *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_NE(nullptr, kernel.maxWorkGroupSizeForCrossThreadData); @@ -2584,7 +2584,7 @@ TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenEnqueuedMaxWorkGroupSi TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenDataParameterSimdSizeIsCorrect) { pKernelInfo->workloadInfo.simdSizeOffset = 16; pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 16; - MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel(program.get(), *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_NE(nullptr, kernel.dataParameterSimdSize); @@ -2595,7 +2595,7 @@ TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenDataParameterSimdSizeI TEST_F(KernelCrossThreadTests, GivenParentEventOffsetWhenKernelIsInitializedThenParentEventIsInitiatedWithInvalid) { pKernelInfo->workloadInfo.parentEventOffset = 16; - MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel(program.get(), *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_NE(nullptr, kernel.parentEventOffset); @@ -2607,7 +2607,7 @@ TEST_F(KernelCrossThreadTests, GivenParentEventOffsetWhenKernelIsInitializedThen TEST_F(KernelCrossThreadTests, WhenAddingKernelThenProgramRefCountIsIncremented) { auto refCount = program->getReference(); - MockKernel *kernel = new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + MockKernel *kernel = new MockKernel(program.get(), *pKernelInfo, *pClDevice); auto refCount2 = program->getReference(); EXPECT_EQ(refCount2, refCount + 1); @@ -2620,7 +2620,7 @@ TEST_F(KernelCrossThreadTests, GivenSlmStatisSizeWhenCreatingKernelThenSlmTotalS pKernelInfo->workloadInfo.slmStaticSize = 1024; - MockKernel *kernel = new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + MockKernel *kernel = new MockKernel(program.get(), *pKernelInfo, *pClDevice); EXPECT_EQ(1024u, kernel->slmTotalSize); @@ -2633,7 +2633,7 @@ TEST_F(KernelCrossThreadTests, givenKernelWithPrivateMemoryWhenItIsCreatedThenCu allocatePrivate.PerThreadPrivateMemorySize = 1; populateKernelDescriptor(pKernelInfo->kernelDescriptor, allocatePrivate); - MockKernel *kernel = new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + MockKernel *kernel = new MockKernel(program.get(), *pKernelInfo, *pClDevice); kernel->initialize(); @@ -2652,7 +2652,7 @@ TEST_F(KernelCrossThreadTests, givenKernelWithPrivateMemoryWhenItIsCreatedThenCu TEST_F(KernelCrossThreadTests, givenKernelWithPreferredWkgMultipleWhenItIsCreatedThenCurbeIsPatchedProperly) { pKernelInfo->workloadInfo.preferredWkgMultipleOffset = 8; - MockKernel *kernel = new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + MockKernel *kernel = new MockKernel(program.get(), *pKernelInfo, *pClDevice); kernel->initialize(); @@ -2678,7 +2678,7 @@ TEST_F(KernelCrossThreadTests, WhenPatchingBlocksSimdSizeThenSimdSizeIsPatchedCo kernel->mockProgram->blockKernelManager->addBlockKernelInfo(infoBlock); // patch block's simd size - kernel->mockKernel->patchBlocksSimdSize(rootDeviceIndex); + kernel->mockKernel->patchBlocksSimdSize(); // obtain block's simd size from cross thread data void *blockSimdSize = ptrOffset(kernel->mockKernel->getCrossThreadData(), kernel->kernelInfo.childrenKernelsIdOffset[0].second); @@ -2838,9 +2838,9 @@ TEST(KernelTest, givenKernelWithKernelInfoWith32bitPointerSizeThenReport32bit) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr, rootDeviceIndex)); MockContext context; MockProgram program(&context, false, toClDeviceVector(*device)); - std::unique_ptr kernel(new MockKernel(&program, MockKernel::toKernelInfoContainer(info, rootDeviceIndex), *device)); + std::unique_ptr kernel(new MockKernel(&program, info, *device)); - EXPECT_TRUE(kernel->is32Bit(rootDeviceIndex)); + EXPECT_TRUE(kernel->is32Bit()); } TEST(KernelTest, givenKernelWithKernelInfoWith64bitPointerSizeThenReport64bit) { @@ -2851,9 +2851,9 @@ TEST(KernelTest, givenKernelWithKernelInfoWith64bitPointerSizeThenReport64bit) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr, rootDeviceIndex)); MockContext context; MockProgram program(&context, false, toClDeviceVector(*device)); - std::unique_ptr kernel(new MockKernel(&program, MockKernel::toKernelInfoContainer(info, rootDeviceIndex), *device)); + std::unique_ptr kernel(new MockKernel(&program, info, *device)); - EXPECT_FALSE(kernel->is32Bit(rootDeviceIndex)); + EXPECT_FALSE(kernel->is32Bit()); } TEST(KernelTest, givenFtrRenderCompressedBuffersWhenInitializingArgsWithNonStatefulAccessThenMarkKernelForAuxTranslation) { @@ -3145,7 +3145,7 @@ TEST(KernelTest, givenKernelRequiringPrivateScratchSpaceWhenGettingSizeForPrivat mediaVFEstateSlot1.PerThreadScratchSpace = 1024u; populateKernelDescriptor(mockKernel.kernelInfo.kernelDescriptor, mediaVFEstateSlot1, 1); - EXPECT_EQ(1024u, mockKernel.mockKernel->getPrivateScratchSize(device->getRootDeviceIndex())); + EXPECT_EQ(1024u, mockKernel.mockKernel->getPrivateScratchSize()); } TEST(KernelTest, givenKernelWithoutMediaVfeStateSlot1WhenGettingSizeForPrivateScratchSpaceThenCorrectSizeIsReturned) { @@ -3153,7 +3153,7 @@ TEST(KernelTest, givenKernelWithoutMediaVfeStateSlot1WhenGettingSizeForPrivateSc MockKernelWithInternals mockKernel(*device); - EXPECT_EQ(0u, mockKernel.mockKernel->getPrivateScratchSize(device->getRootDeviceIndex())); + EXPECT_EQ(0u, mockKernel.mockKernel->getPrivateScratchSize()); } TEST(KernelTest, givenKernelWithPatchInfoCollectionEnabledWhenPatchWithImplicitSurfaceCalledThenPatchInfoDataIsCollected) { @@ -3249,7 +3249,6 @@ TEST(KernelTest, GivenDifferentValuesWhenSetKernelExecutionTypeIsCalledThenCorre TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeFalseWhenGettingStartOffsetThenOffsetToSkipPerThreadDataLoadIsAdded) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); - auto rootDeviceIndex = device->getRootDeviceIndex(); MockKernelWithInternals mockKernel(*device); SPatchThreadPayload threadPayload = {}; @@ -3261,14 +3260,13 @@ TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeFalseWhenGettingStartOffse auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch(); mockKernel.mockKernel->setStartOffset(128); - auto offset = mockKernel.mockKernel->getKernelStartOffset(false, true, false, rootDeviceIndex); + auto offset = mockKernel.mockKernel->getKernelStartOffset(false, true, false); EXPECT_EQ(allocationOffset + 256u, offset); device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getGraphicsAllocation()); } TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeTrueAndLocalIdsUsedWhenGettingStartOffsetThenOffsetToSkipPerThreadDataLoadIsNotAdded) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); - auto rootDeviceIndex = device->getRootDeviceIndex(); MockKernelWithInternals mockKernel(*device); SPatchThreadPayload threadPayload = {}; @@ -3280,14 +3278,13 @@ TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeTrueAndLocalIdsUsedWhenGet auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch(); mockKernel.mockKernel->setStartOffset(128); - auto offset = mockKernel.mockKernel->getKernelStartOffset(true, true, false, rootDeviceIndex); + auto offset = mockKernel.mockKernel->getKernelStartOffset(true, true, false); EXPECT_EQ(allocationOffset + 128u, offset); device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getGraphicsAllocation()); } TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeFalseAndLocalIdsNotUsedWhenGettingStartOffsetThenOffsetToSkipPerThreadDataLoadIsNotAdded) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); - auto rootDeviceIndex = device->getRootDeviceIndex(); MockKernelWithInternals mockKernel(*device); SPatchThreadPayload threadPayload = {}; @@ -3299,7 +3296,7 @@ TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeFalseAndLocalIdsNotUsedWhe auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch(); mockKernel.mockKernel->setStartOffset(128); - auto offset = mockKernel.mockKernel->getKernelStartOffset(false, false, false, rootDeviceIndex); + auto offset = mockKernel.mockKernel->getKernelStartOffset(false, false, false); EXPECT_EQ(allocationOffset + 128u, offset); device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getGraphicsAllocation()); } @@ -3311,14 +3308,14 @@ TEST(KernelTest, givenKernelWhenForcePerDssBackedBufferProgrammingIsSetThenKerne auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockKernelWithInternals kernel(*device); - EXPECT_TRUE(kernel.mockKernel->requiresPerDssBackedBuffer(device->getRootDeviceIndex())); + EXPECT_TRUE(kernel.mockKernel->requiresPerDssBackedBuffer()); } TEST(KernelTest, givenKernelWhenForcePerDssBackedBufferProgrammingIsNotSetThenKernelDoesntRequirePerDssBackedBuffer) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockKernelWithInternals kernel(*device); - EXPECT_FALSE(kernel.mockKernel->requiresPerDssBackedBuffer(device->getRootDeviceIndex())); + EXPECT_FALSE(kernel.mockKernel->requiresPerDssBackedBuffer()); } TEST(KernelTest, whenKernelIsInitializedThenThreadArbitrationPolicyIsSetToDefaultValue) { @@ -3450,16 +3447,14 @@ TEST(KernelCreateTest, whenInitFailedThenReturnNull) { MockClDevice mDevice{new MockDevice}; } mockProgram; struct MockKernel { - MockKernel(MockProgram *, const KernelInfoContainer &, ClDevice &) {} + MockKernel(MockProgram *, const KernelInfo &, ClDevice &) {} int initialize() { return -1; }; }; - KernelInfoContainer kernelInfos; KernelInfo info; info.gpuPointerSize = 8; - kernelInfos.push_back(&info); - auto ret = Kernel::create(&mockProgram, kernelInfos, mockProgram.mDevice, nullptr); + auto ret = Kernel::create(&mockProgram, info, mockProgram.mDevice, nullptr); EXPECT_EQ(nullptr, ret); } diff --git a/opencl/test/unit_test/kernel/kernel_transformable_tests.cpp b/opencl/test/unit_test/kernel/kernel_transformable_tests.cpp index 44a4c85217..a3dba02436 100644 --- a/opencl/test/unit_test/kernel/kernel_transformable_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_transformable_tests.cpp @@ -48,7 +48,7 @@ class KernelTransformableTest : public ::testing::Test { pKernelInfo->argumentsToPatchNum = 4; program = std::make_unique(context.get(), false, toClDeviceVector(*context->getDevice(0))); - pKernel.reset(new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *deviceFactory.rootDevices[rootDeviceIndex])); + pKernel.reset(new MockKernel(program.get(), *pKernelInfo, *deviceFactory.rootDevices[rootDeviceIndex])); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->setKernelArgHandler(0, &Kernel::setArgSampler); @@ -98,7 +98,7 @@ HWTEST_F(KernelTransformableTest, givenKernelThatCannotTranformImagesWithTwoTran pKernel->setArg(2, sizeof(clImage), &clImage); pKernel->setArg(3, sizeof(clImage), &clImage); - auto ssh = pKernel->getSurfaceStateHeap(rootDeviceIndex); + auto ssh = pKernel->getSurfaceStateHeap(); auto firstSurfaceState = reinterpret_cast(ptrOffset(ssh, firstImageOffset)); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_3D, firstSurfaceState->getSurfaceType()); @@ -125,7 +125,7 @@ HWTEST_F(KernelTransformableTest, givenKernelWithTwoTransformableImagesAndTwoTra pKernel->setArg(2, sizeof(clImage), &clImage); pKernel->setArg(3, sizeof(clImage), &clImage); - auto ssh = pKernel->getSurfaceStateHeap(rootDeviceIndex); + auto ssh = pKernel->getSurfaceStateHeap(); auto firstSurfaceState = reinterpret_cast(ptrOffset(ssh, firstImageOffset)); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_2D, firstSurfaceState->getSurfaceType()); @@ -152,7 +152,7 @@ HWTEST_F(KernelTransformableTest, givenKernelWithTwoTransformableImagesAndTwoTra pKernel->setArg(2, sizeof(clImage), &clImage); pKernel->setArg(3, sizeof(clImage), &clImage); - auto ssh = pKernel->getSurfaceStateHeap(rootDeviceIndex); + auto ssh = pKernel->getSurfaceStateHeap(); auto firstSurfaceState = reinterpret_cast(ptrOffset(ssh, firstImageOffset)); auto secondSurfaceState = reinterpret_cast(ptrOffset(ssh, secondImageOffset)); @@ -184,7 +184,7 @@ HWTEST_F(KernelTransformableTest, givenKernelWithOneTransformableImageAndTwoTran pKernel->setArg(2, sizeof(clImage), &clImage); pKernel->setArg(3, sizeof(clImage), &clImage); - auto ssh = pKernel->getSurfaceStateHeap(rootDeviceIndex); + auto ssh = pKernel->getSurfaceStateHeap(); auto firstSurfaceState = reinterpret_cast(ptrOffset(ssh, firstImageOffset)); auto secondSurfaceState = reinterpret_cast(ptrOffset(ssh, secondImageOffset)); @@ -206,7 +206,7 @@ HWTEST_F(KernelTransformableTest, givenKernelWithImages2dAndTwoTransformableSamp pKernelInfo->kernelArgInfo[2].isTransformable = true; pKernelInfo->kernelArgInfo[3].isTransformable = true; - auto ssh = pKernel->getSurfaceStateHeap(rootDeviceIndex); + auto ssh = pKernel->getSurfaceStateHeap(); auto firstSurfaceState = reinterpret_cast(ptrOffset(ssh, firstImageOffset)); auto secondSurfaceState = reinterpret_cast(ptrOffset(ssh, secondImageOffset)); @@ -238,7 +238,7 @@ HWTEST_F(KernelTransformableTest, givenKernelWithTwoTransformableImagesAndTwoTra pKernel->setArg(2, sizeof(clImage), &clImage); pKernel->setArg(3, sizeof(clImage), &clImage); - auto ssh = pKernel->getSurfaceStateHeap(rootDeviceIndex); + auto ssh = pKernel->getSurfaceStateHeap(); auto firstSurfaceState = reinterpret_cast(ptrOffset(ssh, firstImageOffset)); auto secondSurfaceState = reinterpret_cast(ptrOffset(ssh, secondImageOffset)); @@ -270,7 +270,7 @@ HWTEST_F(KernelTransformableTest, givenKernelWithNonTransformableSamplersWhenRes pKernel->setArg(2, sizeof(clImage), &clImage); pKernel->setArg(3, sizeof(clImage), &clImage); - auto ssh = pKernel->getSurfaceStateHeap(rootDeviceIndex); + auto ssh = pKernel->getSurfaceStateHeap(); auto firstSurfaceState = reinterpret_cast(ptrOffset(ssh, firstImageOffset)); auto secondSurfaceState = reinterpret_cast(ptrOffset(ssh, secondImageOffset)); @@ -308,7 +308,7 @@ HWTEST_F(KernelTransformableTest, givenKernelWithoutSamplersAndTransformableImag pKernel->setArg(2, sizeof(clImage), &clImage); pKernel->setArg(3, sizeof(clImage), &clImage); - auto ssh = pKernel->getSurfaceStateHeap(rootDeviceIndex); + auto ssh = pKernel->getSurfaceStateHeap(); auto firstSurfaceState = reinterpret_cast(ptrOffset(ssh, firstImageOffset)); auto secondSurfaceState = reinterpret_cast(ptrOffset(ssh, secondImageOffset)); diff --git a/opencl/test/unit_test/kernel/parent_kernel_tests.cpp b/opencl/test/unit_test/kernel/parent_kernel_tests.cpp index c6fa88e595..3b67959e64 100644 --- a/opencl/test/unit_test/kernel/parent_kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/parent_kernel_tests.cpp @@ -25,7 +25,7 @@ class MockKernelWithArgumentAccess : public Kernel { class ObjectCountsPublic : public Kernel::ObjectCounts { }; - MockKernelWithArgumentAccess(Program *programArg, const KernelInfoContainer &kernelInfoArg, ClDevice &clDeviceArg) : Kernel(programArg, kernelInfoArg, clDeviceArg, false) { + MockKernelWithArgumentAccess(Program *programArg, KernelInfo &kernelInfoArg, ClDevice &clDeviceArg) : Kernel(programArg, kernelInfoArg, clDeviceArg, false) { } void getParentObjectCountsPublic(MockKernelWithArgumentAccess::ObjectCountsPublic &objectCount) { @@ -39,7 +39,7 @@ TEST(ParentKernelTest, WhenArgsAddedThenObjectCountsAreIncremented) { KernelInfo info; info.kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue = true; - MockKernelWithArgumentAccess kernel(&program, MockKernel::toKernelInfoContainer(info, device->getRootDeviceIndex()), *device); + MockKernelWithArgumentAccess kernel(&program, info, *device); std::vector &args = kernel.getKernelArguments(); @@ -61,14 +61,13 @@ TEST(ParentKernelTest, WhenArgsAddedThenObjectCountsAreIncremented) { TEST(ParentKernelTest, WhenPatchingBlocksSimdSizeThenPatchIsAppliedCorrectly) { MockClDevice device{new MockDevice}; - auto rootDeviceIndex = device.getRootDeviceIndex(); MockContext context(&device); std::unique_ptr parentKernel(MockParentKernel::create(context, true)); MockProgram *program = (MockProgram *)parentKernel->mockProgram; - parentKernel->patchBlocksSimdSize(rootDeviceIndex); + parentKernel->patchBlocksSimdSize(); - void *blockSimdSize = ptrOffset(parentKernel->getCrossThreadData(), parentKernel->getKernelInfo(rootDeviceIndex).childrenKernelsIdOffset[0].second); + void *blockSimdSize = ptrOffset(parentKernel->getCrossThreadData(), parentKernel->getKernelInfo().childrenKernelsIdOffset[0].second); uint32_t *simdSize = reinterpret_cast(blockSimdSize); EXPECT_EQ(program->blockKernelManager->getBlockKernelInfo(0)->getMaxSimdSize(), *simdSize); @@ -79,8 +78,7 @@ TEST(ParentKernelTest, GivenParentKernelWhenCheckingForDeviceEnqueueThenTrueIsRe MockContext context(&device); std::unique_ptr parentKernel(MockParentKernel::create(context)); - auto rootDeviceIndex = device.getRootDeviceIndex(); - EXPECT_TRUE(parentKernel->getKernelInfo(rootDeviceIndex).hasDeviceEnqueue()); + EXPECT_TRUE(parentKernel->getKernelInfo().hasDeviceEnqueue()); } TEST(ParentKernelTest, GivenNormalKernelWhenCheckingForDeviceEnqueueThenFalseIsReturned) { @@ -92,14 +90,13 @@ TEST(ParentKernelTest, GivenNormalKernelWhenCheckingForDeviceEnqueueThenFalseIsR TEST(ParentKernelTest, WhenInitializingParentKernelThenBlocksSimdSizeIsPatched) { MockClDevice device{new MockDevice}; - auto rootDeviceIndex = device.getRootDeviceIndex(); MockContext context(&device); std::unique_ptr parentKernel(MockParentKernel::create(context, true)); MockProgram *program = (MockProgram *)parentKernel->mockProgram; parentKernel->initialize(); - void *blockSimdSize = ptrOffset(parentKernel->getCrossThreadData(), parentKernel->getKernelInfo(rootDeviceIndex).childrenKernelsIdOffset[0].second); + void *blockSimdSize = ptrOffset(parentKernel->getCrossThreadData(), parentKernel->getKernelInfo().childrenKernelsIdOffset[0].second); uint32_t *simdSize = reinterpret_cast(blockSimdSize); EXPECT_EQ(program->blockKernelManager->getBlockKernelInfo(0)->getMaxSimdSize(), *simdSize); diff --git a/opencl/test/unit_test/mem_obj/buffer_set_arg_tests.cpp b/opencl/test/unit_test/mem_obj/buffer_set_arg_tests.cpp index 19a3f0c02b..932597e0ff 100644 --- a/opencl/test/unit_test/mem_obj/buffer_set_arg_tests.cpp +++ b/opencl/test/unit_test/mem_obj/buffer_set_arg_tests.cpp @@ -125,7 +125,7 @@ HWTEST_F(BufferSetArgTest, givenSetArgBufferWhenNullArgStatefulThenProgramNullSu using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT; auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), + ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); pKernelInfo->requiresSshForBuffers = true; @@ -145,7 +145,7 @@ HWTEST_F(BufferSetArgTest, givenSetKernelArgOnReadOnlyBufferThatIsMisalingedWhen using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), + ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); pKernelInfo->requiresSshForBuffers = true; @@ -186,7 +186,7 @@ HWTEST_F(BufferSetArgTest, givenSetArgBufferWithNullArgStatelessThenDontProgramN HWTEST_F(BufferSetArgTest, givenNonPureStatefulArgWhenRenderCompressedBufferIsSetThenSetNonAuxMode) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; - auto surfaceState = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pKernelInfo->kernelArgInfo[0].offsetHeap)); + auto surfaceState = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); auto graphicsAllocation = buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex()); graphicsAllocation->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED); graphicsAllocation->setDefaultGmm(new Gmm(pDevice->getGmmClientContext(), graphicsAllocation->getUnderlyingBuffer(), buffer->getSize(), false)); diff --git a/opencl/test/unit_test/mem_obj/image_set_arg_tests.cpp b/opencl/test/unit_test/mem_obj/image_set_arg_tests.cpp index 890c526275..0538beea95 100644 --- a/opencl/test/unit_test/mem_obj/image_set_arg_tests.cpp +++ b/opencl/test/unit_test/mem_obj/image_set_arg_tests.cpp @@ -124,7 +124,7 @@ HWTEST_F(ImageSetArgTest, WhenSettingKernelArgImageThenSurfaceBaseAddressIsSetCo typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), + ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); srcImage->setImageArg(const_cast(surfaceState), false, 0, pClDevice->getRootDeviceIndex()); @@ -199,7 +199,7 @@ HWTEST_F(ImageSetArgTest, givenCubeMapIndexWhenSetKernelArgImageIsCalledThenModi src2dImage->setCubeFaceIndex(cubeFaceIndex); auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), + ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); src2dImage->setImageArg(const_cast(surfaceState), false, 0, pClDevice->getRootDeviceIndex()); @@ -302,7 +302,7 @@ HWTEST_F(ImageSetArgTest, givenNonCubeMapIndexWhenSetKernelArgImageIsCalledThenD typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), + ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); EXPECT_EQ(srcImage->getCubeFaceIndex(), __GMM_NO_CUBE_MAP); @@ -331,7 +331,7 @@ HWTEST_F(ImageSetArgTest, givenOffsetedBufferWhenSetKernelArgImageIscalledThenFu typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), + ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); auto graphicsAllocation = srcAllocation; @@ -361,7 +361,7 @@ HWTEST_F(ImageSetArgTest, WhenSettingKernelArgThenPropertiesAreSetCorrectly) { ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), + ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); size_t rPitch = srcImage->getImageDesc().image_row_pitch; @@ -412,7 +412,7 @@ HWTEST_F(ImageSetArgTest, givenImage2DWithMipMapsWhenSetKernelArgIsCalledThenMip ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), + ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); EXPECT_EQ((uint32_t)mipLevel, surfaceState->getSurfaceMinLod()); EXPECT_EQ((uint32_t)mipCount, surfaceState->getMipCountLod() + 1); @@ -433,7 +433,7 @@ HWTEST_F(ImageSetArgTest, Given2dArrayWhenSettingKernelArgThenPropertiesAreSetCo ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), + ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); @@ -481,7 +481,7 @@ HWTEST_F(ImageSetArgTest, Given1dArrayWhenSettingKernelArgThenPropertiesAreSetCo ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), + ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); @@ -537,7 +537,7 @@ HWTEST_F(ImageSetArgTest, givenMcsAllocationWhenSetArgIsCalledWithoutUnifiedAuxC ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), + ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); EXPECT_FALSE(Image::isDepthFormat(image->getImageFormat())); @@ -573,7 +573,7 @@ HWTEST_F(ImageSetArgTest, givenDepthFormatWhenSetArgIsCalledThenProgramAuxFields ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), + ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); EXPECT_TRUE(Image::isDepthFormat(image->getImageFormat())); @@ -604,7 +604,7 @@ HWTEST_F(ImageSetArgTest, givenMultisampledR32Floatx8x24DepthStencilFormatWhenSe retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); - auto surfaceState = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), + auto surfaceState = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); EXPECT_TRUE(Image::isDepthFormat(image->getImageFormat())); @@ -631,7 +631,7 @@ HWTEST_F(ImageSetArgTest, givenMcsAllocationAndRenderCompressionWhenSetArgOnMult retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); - auto surfaceState = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), + auto surfaceState = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); EXPECT_TRUE(surfaceState->getMultisampledSurfaceStorageFormat() == @@ -661,7 +661,7 @@ HWTEST_F(ImageSetArgTest, givenDepthFormatAndRenderCompressionWhenSetArgOnMultis retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); - auto surfaceState = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), + auto surfaceState = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); EXPECT_TRUE(Image::isDepthFormat(image->getImageFormat())); @@ -698,7 +698,7 @@ HWTEST_F(ImageSetArgTest, givenMcsAllocationWhenSetArgIsCalledWithUnifiedAuxCapa retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); - auto surfaceState = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), + auto surfaceState = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); EXPECT_TRUE(EncodeSurfaceState::isAuxModeEnabled(surfaceState, mcsAlloc->getDefaultGmm())); @@ -729,7 +729,7 @@ HWTEST_F(ImageSetArgTest, givenMcsAllocationWhenSetArgIsCalledWithUnifiedAuxCapa retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); - auto surfaceState = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), + auto surfaceState = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); EXPECT_NE(0u, surfaceState->getAuxiliarySurfaceBaseAddress()); @@ -764,7 +764,7 @@ HWTEST_F(ImageSetArgTest, givenMcsAllocationWhenSetArgIsCalledWithUnifiedAuxCapa retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); - auto surfaceState = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), + auto surfaceState = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); EXPECT_EQ(pitchValue, surfaceState->getAuxiliarySurfacePitch()); @@ -799,7 +799,7 @@ HWTEST_F(ImageSetArgTest, GivenImageFrom1dBufferWhenSettingKernelArgThenProperti ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), + ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); auto image = castToObject(imageFromBuffer); @@ -843,7 +843,7 @@ HWTEST_F(ImageSetArgTest, GivenImageWithClLuminanceFormatWhenSettingKernelArgThe ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), + ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); //for CL_LUMINANCE format we override channels to RED to be spec complaint. EXPECT_EQ(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_RED, surfaceState->getShaderChannelSelectRed()); @@ -972,7 +972,7 @@ HWTEST_F(ImageMediaBlockSetArgTest, WhenSettingKernelArgImageThenPropertiesAreCo ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), + ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); size_t rPitch = srcImage->getImageDesc().image_row_pitch; diff --git a/opencl/test/unit_test/memory_manager/memory_manager_tests.cpp b/opencl/test/unit_test/memory_manager/memory_manager_tests.cpp index 6f67712929..bb6fcef485 100644 --- a/opencl/test/unit_test/memory_manager/memory_manager_tests.cpp +++ b/opencl/test/unit_test/memory_manager/memory_manager_tests.cpp @@ -517,7 +517,6 @@ class MockPrintfHandler : public PrintfHandler { TEST_F(MemoryAllocatorTest, givenStatelessKernelWithPrintfWhenPrintfSurfaceIsCreatedThenPrintfSurfaceIsPatchedWithBaseAddressOffset) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); - auto rootDeviceIndex = device->getRootDeviceIndex(); MockKernelWithInternals kernel(*device); MockMultiDispatchInfo multiDispatchInfo(device.get(), kernel.mockKernel); @@ -542,11 +541,11 @@ TEST_F(MemoryAllocatorTest, givenStatelessKernelWithPrintfWhenPrintfSurfaceIsCre auto allocationAddress = printfAllocation->getGpuAddressToPatch(); auto printfPatchAddress = ptrOffset(reinterpret_cast(kernel.mockKernel->getCrossThreadData()), - kernel.mockKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.payloadMappings.implicitArgs.printfSurfaceAddress.stateless); + kernel.mockKernel->getKernelInfo().kernelDescriptor.payloadMappings.implicitArgs.printfSurfaceAddress.stateless); EXPECT_EQ(allocationAddress, *(uintptr_t *)printfPatchAddress); - EXPECT_EQ(0u, kernel.mockKernel->getSurfaceStateHeapSize(rootDeviceIndex)); + EXPECT_EQ(0u, kernel.mockKernel->getSurfaceStateHeapSize()); delete printfHandler; } @@ -577,12 +576,12 @@ HWTEST_F(MemoryAllocatorTest, givenStatefulKernelWithPrintfWhenPrintfSurfaceIsCr auto printfAllocation = printfHandler->getSurface(); auto allocationAddress = printfAllocation->getGpuAddress(); - EXPECT_NE(0u, kernel.mockKernel->getSurfaceStateHeapSize(device->getRootDeviceIndex())); + EXPECT_NE(0u, kernel.mockKernel->getSurfaceStateHeapSize()); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( - ptrOffset(kernel.mockKernel->getSurfaceStateHeap(device->getRootDeviceIndex()), - kernel.mockKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.payloadMappings.implicitArgs.printfSurfaceAddress.bindful)); + ptrOffset(kernel.mockKernel->getSurfaceStateHeap(), + kernel.mockKernel->getKernelInfo().kernelDescriptor.payloadMappings.implicitArgs.printfSurfaceAddress.bindful)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(allocationAddress, surfaceAddress); diff --git a/opencl/test/unit_test/mocks/mock_context.cpp b/opencl/test/unit_test/mocks/mock_context.cpp index 97e5323586..7559c41974 100644 --- a/opencl/test/unit_test/mocks/mock_context.cpp +++ b/opencl/test/unit_test/mocks/mock_context.cpp @@ -154,21 +154,16 @@ SchedulerKernel &MockContext::getSchedulerKernel() { schedulerBuiltIn->pProgram = program; - KernelInfoContainer kernelInfos; - kernelInfos.resize(getMaxRootDeviceIndex() + 1); - for (auto rootDeviceIndex : rootDeviceIndices) { - auto kernelInfo = schedulerBuiltIn->pProgram->getKernelInfo(SchedulerKernel::schedulerName, rootDeviceIndex); - DEBUG_BREAK_IF(!kernelInfo); - kernelInfos[rootDeviceIndex] = kernelInfo; - } + auto kernelInfo = schedulerBuiltIn->pProgram->getKernelInfo(SchedulerKernel::schedulerName, clDevice->getRootDeviceIndex()); + DEBUG_BREAK_IF(!kernelInfo); schedulerBuiltIn->pKernel = Kernel::create( schedulerBuiltIn->pProgram, - kernelInfos, + *kernelInfo, *clDevice, &retVal); - UNRECOVERABLE_IF(schedulerBuiltIn->pKernel->getScratchSize(clDevice->getRootDeviceIndex()) != 0); + UNRECOVERABLE_IF(schedulerBuiltIn->pKernel->getScratchSize() != 0); DEBUG_BREAK_IF(retVal != CL_SUCCESS); }; diff --git a/opencl/test/unit_test/mocks/mock_kernel.h b/opencl/test/unit_test/mocks/mock_kernel.h index efd10b03f8..f26e0afbdc 100644 --- a/opencl/test/unit_test/mocks/mock_kernel.h +++ b/opencl/test/unit_test/mocks/mock_kernel.h @@ -89,9 +89,9 @@ class MockMultiDeviceKernel : public MultiDeviceKernel { if (kernelVector[rootDeviceIndex]) { continue; } - kernelVector[rootDeviceIndex] = new kernel_t(programArg, kernelInfoArg, *pDevice); + kernelVector[rootDeviceIndex] = new kernel_t(programArg, *kernelInfoArg[rootDeviceIndex], *pDevice); } - return new MockMultiDeviceKernel(std::move(kernelVector)); + return new MockMultiDeviceKernel(std::move(kernelVector), kernelInfoArg); } void takeOwnership() const override { MultiDeviceKernel::takeOwnership(); @@ -212,7 +212,7 @@ class MockKernel : public Kernel { } }; - MockKernel(Program *programArg, const KernelInfoContainer &kernelInfoArg, ClDevice &clDeviceArg, bool scheduler = false) + MockKernel(Program *programArg, const KernelInfo &kernelInfoArg, ClDevice &clDeviceArg, bool scheduler = false) : Kernel(programArg, kernelInfoArg, clDeviceArg, scheduler) { } @@ -251,11 +251,7 @@ class MockKernel : public Kernel { info->crossThreadData = new char[crossThreadSize]; - auto rootDeviceIndex = device.getRootDeviceIndex(); - KernelInfoContainer kernelInfos; - kernelInfos.resize(rootDeviceIndex + 1); - kernelInfos[rootDeviceIndex] = info; - auto kernel = new KernelType(program, kernelInfos, *device.getSpecializedDevice()); + auto kernel = new KernelType(program, *info, *device.getSpecializedDevice()); kernel->crossThreadData = new char[crossThreadSize]; memset(kernel->crossThreadData, 0, crossThreadSize); kernel->crossThreadDataSize = crossThreadSize; @@ -397,7 +393,7 @@ class MockKernelWithInternals { } mockProgram = new MockProgram(context, false, deviceVector); - mockKernel = new MockKernel(mockProgram, kernelInfos, *deviceVector[0]); + mockKernel = new MockKernel(mockProgram, kernelInfo, *deviceVector[0]); mockKernel->setCrossThreadData(&crossThreadData, sizeof(crossThreadData)); KernelVectorType mockKernels; mockKernels.resize(mockProgram->getMaxRootDeviceIndex() + 1); @@ -407,7 +403,7 @@ class MockKernelWithInternals { mockKernels[rootDeviceIndex] = mockKernel; } } - mockMultiDeviceKernel = new MockMultiDeviceKernel(std::move(mockKernels)); + mockMultiDeviceKernel = new MockMultiDeviceKernel(std::move(mockKernels), kernelInfos); mockKernel->setSshLocal(&sshLocal, sizeof(sshLocal)); @@ -478,19 +474,15 @@ class MockKernelWithInternals { class MockParentKernel : public Kernel { public: using Kernel::auxTranslationRequired; - using Kernel::kernelInfos; + using Kernel::kernelInfo; using Kernel::patchBlocksCurbeWithConstantValues; using Kernel::pSshLocal; using Kernel::sshLocalSize; static MockParentKernel *create(Context &context, bool addChildSimdSize = false, bool addChildGlobalMemory = false, bool addChildConstantMemory = false, bool addPrintfForParent = true, bool addPrintfForBlock = true) { auto clDevice = context.getDevice(0); - auto rootDeviceIndex = clDevice->getRootDeviceIndex(); - KernelInfoContainer kernelInfos; - kernelInfos.resize(rootDeviceIndex + 1); auto info = new KernelInfo(); - kernelInfos[rootDeviceIndex] = info; const size_t crossThreadSize = 160; uint32_t crossThreadOffset = 0; uint32_t crossThreadOffsetBlock = 0; @@ -547,7 +539,7 @@ class MockParentKernel : public Kernel { UNRECOVERABLE_IF(crossThreadSize < crossThreadOffset + 8); info->crossThreadData = new char[crossThreadSize]; - auto parent = new MockParentKernel(mockProgram, kernelInfos); + auto parent = new MockParentKernel(mockProgram, *info); parent->crossThreadData = new char[crossThreadSize]; memset(parent->crossThreadData, 0, crossThreadSize); parent->crossThreadDataSize = crossThreadSize; @@ -646,23 +638,18 @@ class MockParentKernel : public Kernel { return parent; } - MockParentKernel(Program *programArg, const KernelInfoContainer &kernelInfoArg) : Kernel(programArg, kernelInfoArg, *programArg->getDevices()[0], false) { + MockParentKernel(Program *programArg, const KernelInfo &kernelInfoArg) : Kernel(programArg, kernelInfoArg, *programArg->getDevices()[0], false) { } ~MockParentKernel() override { - for (auto &pKernelInfo : kernelInfos) { - if (!pKernelInfo) { - continue; - } - auto &kernelInfo = *pKernelInfo; - delete &kernelInfo; - BlockKernelManager *blockManager = program->getBlockKernelManager(); + delete &kernelInfo; + BlockKernelManager *blockManager = program->getBlockKernelManager(); - for (uint32_t i = 0; i < blockManager->getCount(); i++) { - const KernelInfo *blockInfo = blockManager->getBlockKernelInfo(i); - delete[](uint64_t *) blockInfo->heapInfo.pDsh; - } + for (uint32_t i = 0; i < blockManager->getCount(); i++) { + const KernelInfo *blockInfo = blockManager->getBlockKernelInfo(i); + delete[](uint64_t *) blockInfo->heapInfo.pDsh; } + if (mockProgram) { mockProgram->decRefInternal(); } @@ -700,17 +687,17 @@ class MockSchedulerKernel : public SchedulerKernel { using Kernel::numWorkGroupsX; using Kernel::numWorkGroupsY; using Kernel::numWorkGroupsZ; - MockSchedulerKernel(Program *programArg, const KernelInfoContainer &kernelInfoArg, ClDevice &clDeviceArg) : SchedulerKernel(programArg, kernelInfoArg, clDeviceArg){}; + MockSchedulerKernel(Program *programArg, const KernelInfo &kernelInfoArg, ClDevice &clDeviceArg) : SchedulerKernel(programArg, kernelInfoArg, clDeviceArg){}; }; class MockDebugKernel : public MockKernel { public: - MockDebugKernel(Program *program, KernelInfoContainer &kernelInfos, ClDevice &clDeviceArg) : MockKernel(program, kernelInfos, clDeviceArg) { - if (!isValidOffset(kernelInfos[0]->kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful)) { + MockDebugKernel(Program *program, const KernelInfo &kernelInfo, ClDevice &clDeviceArg) : MockKernel(program, kernelInfo, clDeviceArg) { + if (!isValidOffset(kernelInfo.kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful)) { SPatchAllocateSystemThreadSurface allocateSystemThreadSurface = {}; allocateSystemThreadSurface.Offset = 0; allocateSystemThreadSurface.PerThreadSystemThreadSurfaceSize = MockDebugKernel::perThreadSystemThreadSurfaceSize; - populateKernelDescriptor(const_cast(kernelInfos[0]->kernelDescriptor), allocateSystemThreadSurface); + populateKernelDescriptor(const_cast(kernelInfo.kernelDescriptor), allocateSystemThreadSurface); } } diff --git a/opencl/test/unit_test/mocks/mock_program.h b/opencl/test/unit_test/mocks/mock_program.h index b2adbccdc5..558e9e3be4 100644 --- a/opencl/test/unit_test/mocks/mock_program.h +++ b/opencl/test/unit_test/mocks/mock_program.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2020 Intel Corporation + * Copyright (C) 2017-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -171,6 +171,10 @@ class MockProgram : public Program { Program::initInternalOptions(internalOptions); }; + const KernelInfo &getKernelInfoForKernel(const char *kernelName) const { + return *getKernelInfo(kernelName, getDevices()[0]->getRootDeviceIndex()); + } + const KernelInfoContainer getKernelInfosForKernel(const char *kernelName) const { KernelInfoContainer kernelInfos; kernelInfos.resize(getMaxRootDeviceIndex() + 1); diff --git a/opencl/test/unit_test/preemption/preemption_tests.cpp b/opencl/test/unit_test/preemption/preemption_tests.cpp index 3099b8fca1..b53fce1fc8 100644 --- a/opencl/test/unit_test/preemption/preemption_tests.cpp +++ b/opencl/test/unit_test/preemption/preemption_tests.cpp @@ -59,8 +59,7 @@ TEST_F(ThreadGroupPreemptionTests, GivenDisallowByReadWriteFencesWaThenThreadGro TEST_F(ThreadGroupPreemptionTests, GivenDisallowBySchedulerKernelThenThreadGroupPreemptionIsDisabled) { PreemptionFlags flags = {}; - kernel.reset(new MockKernel(program.get(), - MockKernel::toKernelInfoContainer(*kernelInfo, rootDeviceIndex), *device, true)); + kernel.reset(new MockKernel(program.get(), *kernelInfo, *device, true)); PreemptionHelper::setPreemptionLevelFlags(flags, device->getDevice(), kernel.get()); EXPECT_FALSE(PreemptionHelper::allowThreadGroupPreemption(flags)); EXPECT_EQ(PreemptionMode::MidBatch, PreemptionHelper::taskPreemptionMode(device->getPreemptionMode(), flags)); @@ -69,7 +68,7 @@ TEST_F(ThreadGroupPreemptionTests, GivenDisallowBySchedulerKernelThenThreadGroup TEST_F(ThreadGroupPreemptionTests, GivenDisallowByVmeKernelThenThreadGroupPreemptionIsDisabled) { PreemptionFlags flags = {}; kernelInfo->isVmeWorkload = true; - kernel.reset(new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*kernelInfo, rootDeviceIndex), *device)); + kernel.reset(new MockKernel(program.get(), *kernelInfo, *device)); PreemptionHelper::setPreemptionLevelFlags(flags, device->getDevice(), kernel.get()); EXPECT_FALSE(PreemptionHelper::allowThreadGroupPreemption(flags)); EXPECT_EQ(PreemptionMode::MidBatch, PreemptionHelper::taskPreemptionMode(device->getPreemptionMode(), flags)); @@ -140,7 +139,7 @@ TEST_F(ThreadGroupPreemptionTests, GivenValidKernelsInMdiAndDisabledPremptionThe } TEST_F(ThreadGroupPreemptionTests, GivenAtLeastOneInvalidKernelInMdiThenPreemptionIsDisabled) { - MockKernel schedulerKernel(program.get(), MockKernel::toKernelInfoContainer(*kernelInfo, rootDeviceIndex), *device, true); + MockKernel schedulerKernel(program.get(), *kernelInfo, *device, true); DispatchInfo schedulerDispatchInfo(device.get(), &schedulerKernel, 1, Vec3(1, 1, 1), Vec3(1, 1, 1), Vec3(0, 0, 0)); PreemptionFlags flags = {}; @@ -175,7 +174,7 @@ TEST_F(MidThreadPreemptionTests, GivenMidThreadPreemptionDeviceSupportPreemption device->setPreemptionMode(PreemptionMode::MidThread); device->sharedDeviceInfo.vmeAvcSupportsPreemption = true; kernelInfo->isVmeWorkload = true; - kernel.reset(new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*kernelInfo, rootDeviceIndex), *device)); + kernel.reset(new MockKernel(program.get(), *kernelInfo, *device)); PreemptionHelper::setPreemptionLevelFlags(flags, device->getDevice(), kernel.get()); EXPECT_TRUE(PreemptionHelper::allowMidThreadPreemption(flags)); } @@ -202,7 +201,7 @@ TEST_F(MidThreadPreemptionTests, GivenDisallowMidThreadPreemptionByVmeKernelThen device->setPreemptionMode(PreemptionMode::MidThread); device->sharedDeviceInfo.vmeAvcSupportsPreemption = false; kernelInfo->isVmeWorkload = true; - kernel.reset(new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*kernelInfo, rootDeviceIndex), *device)); + kernel.reset(new MockKernel(program.get(), *kernelInfo, *device)); PreemptionHelper::setPreemptionLevelFlags(flags, device->getDevice(), kernel.get()); EXPECT_FALSE(PreemptionHelper::allowMidThreadPreemption(flags)); } @@ -229,7 +228,7 @@ TEST_F(MidThreadPreemptionTests, GivenTaskPreemptionDisallowMidThreadByVmeKernel PreemptionFlags flags = {}; kernelInfo->isVmeWorkload = true; device->sharedDeviceInfo.vmeAvcSupportsPreemption = false; - kernel.reset(new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*kernelInfo, rootDeviceIndex), *device)); + kernel.reset(new MockKernel(program.get(), *kernelInfo, *device)); device->setPreemptionMode(PreemptionMode::MidThread); PreemptionHelper::setPreemptionLevelFlags(flags, device->getDevice(), kernel.get()); PreemptionMode outMode = PreemptionHelper::taskPreemptionMode(device->getPreemptionMode(), flags); @@ -250,7 +249,7 @@ TEST_F(MidThreadPreemptionTests, GivenTaskPreemptionAllowDeviceSupportsPreemptio PreemptionFlags flags = {}; kernelInfo->kernelDescriptor.kernelAttributes.flags.requiresDisabledMidThreadPreemption = false; kernelInfo->isVmeWorkload = true; - kernel.reset(new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*kernelInfo, rootDeviceIndex), *device)); + kernel.reset(new MockKernel(program.get(), *kernelInfo, *device)); device->sharedDeviceInfo.vmeAvcSupportsPreemption = true; device->setPreemptionMode(PreemptionMode::MidThread); PreemptionHelper::setPreemptionLevelFlags(flags, device->getDevice(), kernel.get()); @@ -264,7 +263,7 @@ TEST_F(ThreadGroupPreemptionTests, GivenDebugKernelPreemptionWhenDeviceSupportsT EXPECT_EQ(PreemptionMode::ThreadGroup, device->getPreemptionMode()); PreemptionFlags flags = {}; - kernel.reset(new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*kernelInfo, rootDeviceIndex), *device)); + kernel.reset(new MockKernel(program.get(), *kernelInfo, *device)); PreemptionHelper::setPreemptionLevelFlags(flags, device->getDevice(), kernel.get()); PreemptionMode outMode = PreemptionHelper::taskPreemptionMode(device->getPreemptionMode(), flags); EXPECT_EQ(PreemptionMode::MidThread, outMode); @@ -276,7 +275,7 @@ TEST_F(MidThreadPreemptionTests, GivenDebugKernelPreemptionWhenDeviceSupportsMid EXPECT_EQ(PreemptionMode::MidThread, device->getPreemptionMode()); PreemptionFlags flags = {}; - kernel.reset(new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*kernelInfo, rootDeviceIndex), *device)); + kernel.reset(new MockKernel(program.get(), *kernelInfo, *device)); PreemptionHelper::setPreemptionLevelFlags(flags, device->getDevice(), kernel.get()); PreemptionMode outMode = PreemptionHelper::taskPreemptionMode(device->getPreemptionMode(), flags); EXPECT_EQ(PreemptionMode::MidBatch, outMode); diff --git a/opencl/test/unit_test/profiling/profiling_tests.cpp b/opencl/test/unit_test/profiling/profiling_tests.cpp index 4ed3e958c7..a7b09d1677 100644 --- a/opencl/test/unit_test/profiling/profiling_tests.cpp +++ b/opencl/test/unit_test/profiling/profiling_tests.cpp @@ -73,7 +73,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfilingAndFor typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; - MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel(program.get(), kernelInfo, *pClDevice); uint64_t requiredSize = 2 * sizeof(PIPE_CONTROL) + 2 * sizeof(MI_STORE_REGISTER_MEM) + sizeof(GPGPU_WALKER) + HardwareCommandsHelper::getSizeRequiredCS(&kernel); @@ -118,7 +118,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfilingAndFor typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; - MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel(program.get(), kernelInfo, *pClDevice); uint64_t requiredSize = 2 * sizeof(PIPE_CONTROL) + 4 * sizeof(MI_STORE_REGISTER_MEM) + HardwareCommandsHelper::getSizeRequiredCS(&kernel); requiredSize += 2 * sizeof(GPGPU_WALKER); @@ -144,7 +144,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfolingWhenWa typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; - MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; @@ -190,7 +190,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfolingWhenWa } HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfilingWhenNonBlockedEnqueueIsExecutedThenSubmittedTimestampDoesntHaveGPUTime) { - MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; @@ -225,7 +225,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProflingWhenWal typedef typename FamilyType::MI_STORE_REGISTER_MEM MI_STORE_REGISTER_MEM; typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; - MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; @@ -279,7 +279,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueBlockedWithProfilin typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; - MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); kernel.incRefInternal(); @@ -337,7 +337,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueBlockedWithProfilin typedef typename FamilyType::MI_STORE_REGISTER_MEM MI_STORE_REGISTER_MEM; typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; - MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex), *pClDevice); + MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); kernel.incRefInternal(); diff --git a/opencl/test/unit_test/program/printf_handler_tests.cpp b/opencl/test/unit_test/program/printf_handler_tests.cpp index 829562d3d2..e98f907f0b 100644 --- a/opencl/test/unit_test/program/printf_handler_tests.cpp +++ b/opencl/test/unit_test/program/printf_handler_tests.cpp @@ -33,7 +33,7 @@ TEST(PrintfHandlerTest, givenNotPreparedPrintfHandlerWhenGetSurfaceIsCalledThenR populateKernelDescriptor(pKernelInfo->kernelDescriptor, printfSurface); MockProgram *pProgram = new MockProgram(&context, false, toClDeviceVector(*device)); - MockKernel *pKernel = new MockKernel(pProgram, MockKernel::toKernelInfoContainer(*pKernelInfo, device->getRootDeviceIndex()), *device); + MockKernel *pKernel = new MockKernel(pProgram, *pKernelInfo, *device); MockMultiDispatchInfo multiDispatchInfo(device, pKernel); PrintfHandler *printfHandler = PrintfHandler::create(multiDispatchInfo, *device); @@ -62,8 +62,7 @@ TEST(PrintfHandlerTest, givenPreparedPrintfHandlerWhenGetSurfaceIsCalledThenResu MockProgram *pProgram = new MockProgram(&context, false, toClDeviceVector(*device)); uint64_t crossThread[10]; - MockKernel *pKernel = new MockKernel(pProgram, - MockKernel::toKernelInfoContainer(*pKernelInfo, device->getRootDeviceIndex()), *device); + MockKernel *pKernel = new MockKernel(pProgram, *pKernelInfo, *device); pKernel->setCrossThreadData(&crossThread, sizeof(uint64_t) * 8); MockMultiDispatchInfo multiDispatchInfo(device, pKernel); @@ -132,9 +131,9 @@ TEST(PrintfHandlerTest, givenMultiDispatchInfoWithMultipleKernelsWhenCreatingAnd printfSurface.DataParamSize = 8; populateKernelDescriptor(pMainKernelInfo->kernelDescriptor, printfSurface); - auto mainKernel = std::make_unique(program.get(), MockKernel::toKernelInfoContainer(*pMainKernelInfo, device->getRootDeviceIndex()), *device); - auto kernel1 = std::make_unique(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, device->getRootDeviceIndex()), *device); - auto kernel2 = std::make_unique(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, device->getRootDeviceIndex()), *device); + auto mainKernel = std::make_unique(program.get(), *pMainKernelInfo, *device); + auto kernel1 = std::make_unique(program.get(), *pKernelInfo, *device); + auto kernel2 = std::make_unique(program.get(), *pKernelInfo, *device); uint64_t crossThread[8]; mainKernel->setCrossThreadData(&crossThread, sizeof(uint64_t) * 8); @@ -206,7 +205,7 @@ TEST(PrintfHandlerTest, GivenAllocationInLocalMemoryWhichRequiresBlitterWhenPrep auto program = std::make_unique(&context, false, toClDeviceVector(*pClDevice)); uint64_t crossThread[10]; - auto kernel = std::make_unique(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, pClDevice->getRootDeviceIndex()), *pClDevice); + auto kernel = std::make_unique(program.get(), *pKernelInfo, *pClDevice); kernel->setCrossThreadData(&crossThread, sizeof(uint64_t) * 8); MockMultiDispatchInfo multiDispatchInfo(pClDevice.get(), kernel.get()); @@ -236,7 +235,7 @@ TEST_F(PrintfHandlerMultiRootDeviceTests, GivenPrintfSurfaceThenItHasCorrectRoot auto program = std::make_unique(context.get(), false, toClDeviceVector(*device1)); uint64_t crossThread[10]; - auto kernel = std::make_unique(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, device1->getRootDeviceIndex()), *device1); + auto kernel = std::make_unique(program.get(), *pKernelInfo, *device1); kernel->setCrossThreadData(&crossThread, sizeof(uint64_t) * 8); MockMultiDispatchInfo multiDispatchInfo(device1, kernel.get()); diff --git a/opencl/test/unit_test/program/printf_helper_tests.cpp b/opencl/test/unit_test/program/printf_helper_tests.cpp index bf21b9258a..d19cc6f25b 100644 --- a/opencl/test/unit_test/program/printf_helper_tests.cpp +++ b/opencl/test/unit_test/program/printf_helper_tests.cpp @@ -51,9 +51,8 @@ class PrintFormatterTest : public testing::Test { kernelInfo = std::make_unique(); device = new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr)}; - auto rootDeviceIndex = device->getRootDeviceIndex(); program = std::make_unique(toClDeviceVector(*device)); - kernel = new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*kernelInfo, rootDeviceIndex), *device); + kernel = new MockKernel(program.get(), *kernelInfo, *device); printFormatter = std::unique_ptr(new PrintFormatter(static_cast(data->getUnderlyingBuffer()), printfBufferSize, is32bit, kernelInfo->kernelDescriptor.kernelMetadata.printfStringsMap)); diff --git a/opencl/test/unit_test/program/program_nonuniform.cpp b/opencl/test/unit_test/program/program_nonuniform.cpp index 90b838bc9a..87356c280f 100644 --- a/opencl/test/unit_test/program/program_nonuniform.cpp +++ b/opencl/test/unit_test/program/program_nonuniform.cpp @@ -106,12 +106,11 @@ TEST(KernelNonUniform, WhenSettingAllowNonUniformThenGettingAllowNonUniformRetur MockClDevice device{new MockDevice()}; MockProgram program(toClDeviceVector(device)); struct KernelMock : Kernel { - KernelMock(Program *program, KernelInfoContainer &kernelInfos, ClDevice &clDeviceArg) + KernelMock(Program *program, KernelInfo &kernelInfos, ClDevice &clDeviceArg) : Kernel(program, kernelInfos, clDeviceArg, false) { } }; - auto kernelInfos = MockKernel::toKernelInfoContainer(kernelInfo, device.getRootDeviceIndex()); - KernelMock k{&program, kernelInfos, device}; + KernelMock k{&program, kernelInfo, device}; program.setAllowNonUniform(false); EXPECT_FALSE(k.getAllowNonUniform()); program.setAllowNonUniform(true); @@ -202,7 +201,7 @@ TEST_F(ProgramNonUniformTest, GivenCl21WhenExecutingKernelWithNonUniformThenEnqu // create a kernel auto pKernel = Kernel::create(mockProgram, - MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), + *pKernelInfo, *pPlatform->getClDevice(0), &retVal); ASSERT_EQ(CL_SUCCESS, retVal); @@ -244,7 +243,7 @@ TEST_F(ProgramNonUniformTest, GivenCl20WhenExecutingKernelWithNonUniformThenEnqu // create a kernel auto pKernel = Kernel::create(mockProgram, - MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), + *pKernelInfo, *pPlatform->getClDevice(0), &retVal); ASSERT_EQ(CL_SUCCESS, retVal); @@ -284,7 +283,7 @@ TEST_F(ProgramNonUniformTest, GivenCl12WhenExecutingKernelWithNonUniformThenInva // create a kernel auto pKernel = Kernel::create(mockProgram, - MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), + *pKernelInfo, *pPlatform->getClDevice(0), &retVal); ASSERT_EQ(CL_SUCCESS, retVal); diff --git a/opencl/test/unit_test/program/program_tests.cpp b/opencl/test/unit_test/program/program_tests.cpp index 5875ba9008..b77cf5f8ea 100644 --- a/opencl/test/unit_test/program/program_tests.cpp +++ b/opencl/test/unit_test/program/program_tests.cpp @@ -1348,8 +1348,7 @@ HWTEST_F(PatchTokenTests, givenKernelRequiringConstantAllocationWhenMakeResident EXPECT_EQ(expected_values[0], constBuff[0]); EXPECT_EQ(expected_values[1], constBuff[1]); - std::unique_ptr pKernel(Kernel::create(pProgram, - MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice, &retVal)); + std::unique_ptr pKernel(Kernel::create(pProgram, *pKernelInfo, *pClDevice, &retVal)); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, pKernel); @@ -1366,7 +1365,7 @@ HWTEST_F(PatchTokenTests, givenKernelRequiringConstantAllocationWhenMakeResident auto &residencyVector = pCommandStreamReceiver->getResidencyAllocations(); //we expect kernel ISA here and constant allocation - auto kernelIsa = pKernel->getKernelInfo(rootDeviceIndex).getGraphicsAllocation(); + auto kernelIsa = pKernel->getKernelInfo().getGraphicsAllocation(); auto constantAllocation = pProgram->getConstantSurface(pDevice->getRootDeviceIndex()); auto element = std::find(residencyVector.begin(), residencyVector.end(), kernelIsa); @@ -1455,7 +1454,7 @@ TEST_F(PatchTokenTests, WhenBuildingProgramThenConstantKernelArgsAreAvailable) { auto pKernel = Kernel::create( pProgram, - MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), + *pKernelInfo, *pClDevice, &retVal); @@ -1496,7 +1495,7 @@ TEST_F(PatchTokenTests, GivenVmeKernelWhenBuildingKernelThenArgAvailable) { auto pKernel = Kernel::create( pProgram, - MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), + *pKernelInfo, *pClDevice, &retVal); diff --git a/opencl/test/unit_test/sampler/sampler_set_arg_tests.cpp b/opencl/test/unit_test/sampler/sampler_set_arg_tests.cpp index 854200b78d..1675cac67a 100644 --- a/opencl/test/unit_test/sampler/sampler_set_arg_tests.cpp +++ b/opencl/test/unit_test/sampler/sampler_set_arg_tests.cpp @@ -126,7 +126,7 @@ HWTEST_F(SamplerSetArgTest, WhenSettingKernelArgSamplerThenSamplerStatesAreCorre ASSERT_EQ(CL_SUCCESS, retVal); auto samplerState = reinterpret_cast( - ptrOffset(pKernel->getDynamicStateHeap(rootDeviceIndex), + ptrOffset(pKernel->getDynamicStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); EXPECT_EQ(static_cast(CL_TRUE), static_cast(!samplerState->getNonNormalizedCoordinateEnable())); EXPECT_EQ(SAMPLER_STATE::TEXTURE_COORDINATE_MODE_MIRROR, samplerState->getTcxAddressControlMode()); @@ -204,7 +204,7 @@ HWTEST_F(SamplerSetArgTest, GivenSamplerObjectWhenSetKernelArgIsCalledThenSample } HWTEST_F(SamplerSetArgTest, GivenSamplerObjectWhenSetKernelArgIsCalledAndKernelIsDeletedThenRefCountIsUnchanged) { - auto myKernel = std::make_unique(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + auto myKernel = std::make_unique(program.get(), *pKernelInfo, *pClDevice); ASSERT_NE(nullptr, myKernel.get()); ASSERT_EQ(CL_SUCCESS, myKernel->initialize()); @@ -378,7 +378,7 @@ TEST_F(SamplerSetArgTest, givenSamplerTypeStrAndIsSamplerTrueWhenInitializeKerne pKernelInfo->kernelArgInfo[1].metadataExtended->type = "sampler"; pKernelInfo->kernelArgInfo[1].isSampler = true; - auto pMockKernell = std::make_unique(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + auto pMockKernell = std::make_unique(program.get(), *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, pMockKernell->initialize()); EXPECT_EQ(pMockKernell->getKernelArguments()[0].type, MockKernel::SAMPLER_OBJ); EXPECT_EQ(pMockKernell->getKernelArguments()[1].type, MockKernel::SAMPLER_OBJ); @@ -393,7 +393,7 @@ TEST_F(SamplerSetArgTest, givenSamplerTypeStrAndAndIsSamplerFalseWhenInitializeK pKernelInfo->kernelArgInfo[1].metadataExtended->type = "sampler"; pKernelInfo->kernelArgInfo[1].isSampler = false; - auto pMockKernell = std::make_unique(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), *pClDevice); + auto pMockKernell = std::make_unique(program.get(), *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, pMockKernell->initialize()); EXPECT_NE(pMockKernell->getKernelArguments()[0].type, MockKernel::SAMPLER_OBJ); EXPECT_NE(pMockKernell->getKernelArguments()[1].type, MockKernel::SAMPLER_OBJ); @@ -430,7 +430,7 @@ HWTEST_P(NormalizedTest, WhenSettingKernelArgSamplerThenCoordsAreCorrect) { ASSERT_EQ(CL_SUCCESS, retVal); auto samplerState = reinterpret_cast( - ptrOffset(pKernel->getDynamicStateHeap(rootDeviceIndex), + ptrOffset(pKernel->getDynamicStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); EXPECT_EQ(normalizedCoordinates, static_cast(!samplerState->getNonNormalizedCoordinateEnable())); @@ -481,7 +481,7 @@ HWTEST_P(AddressingModeTest, WhenSettingKernelArgSamplerThenModesAreCorrect) { ASSERT_EQ(CL_SUCCESS, retVal); auto samplerState = reinterpret_cast( - ptrOffset(pKernel->getDynamicStateHeap(rootDeviceIndex), + ptrOffset(pKernel->getDynamicStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); auto expectedModeX = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_MIRROR; @@ -561,7 +561,7 @@ HWTEST_F(SamplerSetArgTest, GivenMipmapsWhenSettingKernelArgSamplerThenLodAreCor ASSERT_EQ(CL_SUCCESS, retVal); auto samplerState = reinterpret_cast( - ptrOffset(pKernel->getDynamicStateHeap(rootDeviceIndex), + ptrOffset(pKernel->getDynamicStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); EXPECT_EQ(FamilyType::SAMPLER_STATE::MIP_MODE_FILTER_LINEAR, samplerState->getMipModeFilter()); @@ -592,7 +592,7 @@ HWTEST_P(FilterModeTest, WhenSettingKernelArgSamplerThenFiltersAreCorrect) { retVal); auto samplerState = reinterpret_cast( - ptrOffset(pKernel->getDynamicStateHeap(rootDeviceIndex), + ptrOffset(pKernel->getDynamicStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); sampler->setArg(const_cast(samplerState), *defaultHwInfo); diff --git a/opencl/test/unit_test/scenarios/windows/enqueue_read_write_buffer_scenarios_windows_tests.cpp b/opencl/test/unit_test/scenarios/windows/enqueue_read_write_buffer_scenarios_windows_tests.cpp index 8c8da60fdf..55f5c36e46 100644 --- a/opencl/test/unit_test/scenarios/windows/enqueue_read_write_buffer_scenarios_windows_tests.cpp +++ b/opencl/test/unit_test/scenarios/windows/enqueue_read_write_buffer_scenarios_windows_tests.cpp @@ -104,7 +104,7 @@ HWTEST_F(EnqueueBufferWindowsTest, givenMisalignedHostPtrWhenEnqueueReadBufferCa cmdQ->finish(); parseCommands(*cmdQ); - auto &kernelInfo = kernel->getKernelInfo(rootDeviceIndex); + auto &kernelInfo = kernel->getKernelInfo(); if (hwInfo->capabilityTable.gpuAddressSpace == MemoryConstants::max48BitAddress) { const auto &surfaceStateDst = getSurfaceState(&cmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0), 1); diff --git a/opencl/test/unit_test/scheduler/scheduler_kernel_tests.cpp b/opencl/test/unit_test/scheduler/scheduler_kernel_tests.cpp index a43314f411..35b08d6bbf 100644 --- a/opencl/test/unit_test/scheduler/scheduler_kernel_tests.cpp +++ b/opencl/test/unit_test/scheduler/scheduler_kernel_tests.cpp @@ -29,7 +29,7 @@ void populateKernelDescriptor(KernelDescriptor &dst, const SPatchDataParameterSt class MockSchedulerKernel : public SchedulerKernel { public: - MockSchedulerKernel(Program *program, const KernelInfoContainer &info, ClDevice &clDeviceArg) : SchedulerKernel(program, info, clDeviceArg) { + MockSchedulerKernel(Program *program, const KernelInfo &info, ClDevice &clDeviceArg) : SchedulerKernel(program, info, clDeviceArg) { } static MockSchedulerKernel *create(Program &program, KernelInfo *&info) { @@ -53,12 +53,7 @@ class MockSchedulerKernel : public SchedulerKernel { info->kernelArgInfo.push_back(std::move(bufferArg)); } - KernelInfoContainer kernelInfos; - auto rootDeviceIndex = program.getDevices()[0]->getRootDeviceIndex(); - kernelInfos.resize(rootDeviceIndex + 1); - kernelInfos[rootDeviceIndex] = info; - - MockSchedulerKernel *mock = Kernel::create(&program, kernelInfos, *program.getDevices()[0], nullptr); + MockSchedulerKernel *mock = Kernel::create(&program, *info, *program.getDevices()[0], nullptr); return mock; } }; @@ -67,9 +62,7 @@ TEST(SchedulerKernelTest, WhenSchedulerKernelIsCreatedThenLwsIs24) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockProgram program(toClDeviceVector(*device)); KernelInfo info; - KernelInfoContainer kernelInfos; - kernelInfos.push_back(&info); - MockSchedulerKernel kernel(&program, kernelInfos, *device); + MockSchedulerKernel kernel(&program, info, *device); size_t lws = kernel.getLws(); EXPECT_EQ((size_t)24u, lws); @@ -79,9 +72,7 @@ TEST(SchedulerKernelTest, WhenSchedulerKernelIsCreatedThenGwsIs24) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockProgram program(toClDeviceVector(*device)); KernelInfo info; - KernelInfoContainer kernelInfos; - kernelInfos.push_back(&info); - MockSchedulerKernel kernel(&program, kernelInfos, *device); + MockSchedulerKernel kernel(&program, info, *device); const size_t hwThreads = 3; const size_t simdSize = 8; @@ -97,9 +88,7 @@ TEST(SchedulerKernelTest, WhenSettingGwsThenGetGwsReturnedSetValue) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockProgram program(toClDeviceVector(*device)); KernelInfo info; - KernelInfoContainer kernelInfos; - kernelInfos.push_back(&info); - MockSchedulerKernel kernel(&program, kernelInfos, *device); + MockSchedulerKernel kernel(&program, info, *device); kernel.setGws(24); @@ -121,9 +110,7 @@ TEST(SchedulerKernelTest, WhenSchedulerKernelIsCreatedThenCurbeSizeIsCorrect) { info.heapInfo.DynamicStateHeapSize = dshSize; - KernelInfoContainer kernelInfos; - kernelInfos.push_back(&info); - MockSchedulerKernel kernel(&program, kernelInfos, *device); + MockSchedulerKernel kernel(&program, info, *device); uint32_t expectedCurbeSize = alignUp(crossTrheadDataSize, 64) + alignUp(dshSize, 64) + alignUp(SCHEDULER_DYNAMIC_PAYLOAD_SIZE, 64); EXPECT_GE((size_t)expectedCurbeSize, kernel.getCurbeSize()); @@ -282,9 +269,7 @@ TEST(SchedulerKernelTest, GivenNullKernelInfoWhenGettingCurbeSizeThenSizeIsCorre MockProgram program(toClDeviceVector(*device)); KernelInfo info; - KernelInfoContainer kernelInfos; - kernelInfos.push_back(&info); - MockSchedulerKernel kernel(&program, kernelInfos, *device); + MockSchedulerKernel kernel(&program, info, *device); uint32_t expectedCurbeSize = alignUp(SCHEDULER_DYNAMIC_PAYLOAD_SIZE, 64); EXPECT_GE((size_t)expectedCurbeSize, kernel.getCurbeSize()); @@ -297,9 +282,7 @@ TEST(SchedulerKernelTest, givenForcedSchedulerGwsByDebugVariableWhenSchedulerKer auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockProgram program(toClDeviceVector(*device)); KernelInfo info; - KernelInfoContainer kernelInfos; - kernelInfos.push_back(&info); - MockSchedulerKernel kernel(&program, kernelInfos, *device); + MockSchedulerKernel kernel(&program, info, *device); size_t gws = kernel.getGws(); EXPECT_EQ(static_cast(48u), gws); @@ -313,9 +296,7 @@ TEST(SchedulerKernelTest, givenSimulationModeWhenSchedulerKernelIsCreatedThenGws MockProgram program(toClDeviceVector(*device)); KernelInfo info; - KernelInfoContainer kernelInfos; - kernelInfos.push_back(&info); - MockSchedulerKernel kernel(&program, kernelInfos, *device); + MockSchedulerKernel kernel(&program, info, *device); size_t gws = kernel.getGws(); EXPECT_EQ(static_cast(24u), gws); } @@ -331,9 +312,7 @@ TEST(SchedulerKernelTest, givenForcedSchedulerGwsByDebugVariableAndSimulationMod MockProgram program(toClDeviceVector(*device)); KernelInfo info; - KernelInfoContainer kernelInfos; - kernelInfos.push_back(&info); - MockSchedulerKernel kernel(&program, kernelInfos, *device); + MockSchedulerKernel kernel(&program, info, *device); size_t gws = kernel.getGws(); EXPECT_EQ(static_cast(48u), gws); } diff --git a/opencl/test/unit_test/utilities/file_logger_tests.cpp b/opencl/test/unit_test/utilities/file_logger_tests.cpp index 935338c8fc..bc51a33954 100644 --- a/opencl/test/unit_test/utilities/file_logger_tests.cpp +++ b/opencl/test/unit_test/utilities/file_logger_tests.cpp @@ -349,7 +349,7 @@ TEST(FileLogger, GivenDebugFunctionalityWhenDebugFlagIsDisabledThenDoNotDumpKern kernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockProgram program(toClDeviceVector(*device)); - auto kernel = std::unique_ptr(new MockKernel(&program, MockKernel::toKernelInfoContainer(*kernelInfo, mockRootDeviceIndex), *device)); + auto kernel = std::unique_ptr(new MockKernel(&program, *kernelInfo, *device)); auto multiDispatchInfo = std::unique_ptr(new MockMultiDispatchInfo(device.get(), kernel.get())); KernelArgPatchInfo kernelArgPatchInfo; @@ -385,7 +385,7 @@ TEST(FileLogger, GivenMdiWhenDumpingKernelArgsThenFileIsCreated) { kernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockProgram program(toClDeviceVector(*device)); - auto kernel = std::unique_ptr(new MockKernel(&program, MockKernel::toKernelInfoContainer(*kernelInfo, mockRootDeviceIndex), *device)); + auto kernel = std::unique_ptr(new MockKernel(&program, *kernelInfo, *device)); auto multiDispatchInfo = std::unique_ptr(new MockMultiDispatchInfo(device.get(), kernel.get())); KernelArgPatchInfo kernelArgPatchInfo; @@ -431,7 +431,7 @@ TEST(FileLogger, GivenEmptyKernelWhenDumpingKernelArgsThenFileIsNotCreated) { kernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockProgram program(toClDeviceVector(*device)); - auto kernel = std::unique_ptr(new MockKernel(&program, MockKernel::toKernelInfoContainer(*kernelInfo, mockRootDeviceIndex), *device)); + auto kernel = std::unique_ptr(new MockKernel(&program, *kernelInfo, *device)); std::string testFile = "testfile"; DebugVariables flags; @@ -449,7 +449,7 @@ TEST(FileLogger, GivenImmediateWhenDumpingKernelArgsThenFileIsCreated) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockProgram program(toClDeviceVector(*device)); - auto kernel = std::unique_ptr(new MockKernel(&program, MockKernel::toKernelInfoContainer(*kernelInfo, mockRootDeviceIndex), *device)); + auto kernel = std::unique_ptr(new MockKernel(&program, *kernelInfo, *device)); KernelArgPatchInfo kernelArgPatchInfo; @@ -483,7 +483,7 @@ TEST(FileLogger, GivenImmediateZeroSizeWhenDumpingKernelArgsThenFileIsNotCreated kernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockProgram program(toClDeviceVector(*device)); - auto kernel = std::unique_ptr(new MockKernel(&program, MockKernel::toKernelInfoContainer(*kernelInfo, mockRootDeviceIndex), *device)); + auto kernel = std::unique_ptr(new MockKernel(&program, *kernelInfo, *device)); KernelArgPatchInfo kernelArgPatchInfo; @@ -513,7 +513,7 @@ TEST(FileLogger, GivenLocalBufferWhenDumpingKernelArgsThenFileIsNotCreated) { kernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockProgram program(toClDeviceVector(*device)); - auto kernel = std::unique_ptr(new MockKernel(&program, MockKernel::toKernelInfoContainer(*kernelInfo, mockRootDeviceIndex), *device)); + auto kernel = std::unique_ptr(new MockKernel(&program, *kernelInfo, *device)); KernelArgPatchInfo kernelArgPatchInfo; @@ -537,7 +537,7 @@ TEST(FileLogger, GivenBufferNotSetWhenDumpingKernelArgsThenFileIsNotCreated) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto context = clUniquePtr(new MockContext(device.get())); auto program = clUniquePtr(new MockProgram(context.get(), false, toClDeviceVector(*device))); - auto kernel = std::make_unique(program.get(), MockKernel::toKernelInfoContainer(*kernelInfo, mockRootDeviceIndex), *device); + auto kernel = std::make_unique(program.get(), *kernelInfo, *device); KernelArgPatchInfo kernelArgPatchInfo; @@ -572,7 +572,7 @@ TEST(FileLogger, GivenBufferWhenDumpingKernelArgsThenFileIsCreated) { auto kernelInfo = std::make_unique(); kernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; auto program = clUniquePtr(new MockProgram(context.get(), false, toClDeviceVector(*device))); - auto kernel = std::make_unique(program.get(), MockKernel::toKernelInfoContainer(*kernelInfo, mockRootDeviceIndex), *device); + auto kernel = std::make_unique(program.get(), *kernelInfo, *device); KernelArgPatchInfo kernelArgPatchInfo; @@ -612,7 +612,7 @@ TEST(FileLogger, GivenSamplerWhenDumpingKernelArgsThenFileIsNotCreated) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto context = clUniquePtr(new MockContext(device.get())); auto program = clUniquePtr(new MockProgram(context.get(), false, toClDeviceVector(*device))); - auto kernel = std::make_unique(program.get(), MockKernel::toKernelInfoContainer(*kernelInfo, mockRootDeviceIndex), *device); + auto kernel = std::make_unique(program.get(), *kernelInfo, *device); KernelArgPatchInfo kernelArgPatchInfo; @@ -640,7 +640,7 @@ TEST(FileLogger, GivenImageNotSetWhenDumpingKernelArgsThenFileIsNotCreated) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto context = clUniquePtr(new MockContext(device.get())); auto program = clUniquePtr(new MockProgram(context.get(), false, toClDeviceVector(*device))); - auto kernel = std::make_unique(program.get(), MockKernel::toKernelInfoContainer(*kernelInfo, mockRootDeviceIndex), *device); + auto kernel = std::make_unique(program.get(), *kernelInfo, *device); char surfaceStateHeap[0x80]; kernelInfo->heapInfo.pSsh = surfaceStateHeap; diff --git a/shared/source/command_stream/preemption.cpp b/shared/source/command_stream/preemption.cpp index 094addbf44..80513c5a63 100644 --- a/shared/source/command_stream/preemption.cpp +++ b/shared/source/command_stream/preemption.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -60,7 +60,7 @@ PreemptionMode PreemptionHelper::taskPreemptionMode(PreemptionMode devicePreempt void PreemptionHelper::setPreemptionLevelFlags(PreemptionFlags &flags, Device &device, Kernel *kernel) { if (kernel) { - const auto &kernelDescriptor = kernel->getKernelInfo(device.getRootDeviceIndex()).kernelDescriptor; + const auto &kernelDescriptor = kernel->getKernelInfo().kernelDescriptor; flags.flags.disabledMidThreadPreemptionKernel = kernelDescriptor.kernelAttributes.flags.requiresDisabledMidThreadPreemption; flags.flags.vmeKernel = kernel->isVmeKernel(); flags.flags.usesFencesForReadWriteImages = kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages; diff --git a/shared/test/common/cmd_parse/hw_parse.inl b/shared/test/common/cmd_parse/hw_parse.inl index f7698f94c7..b59d688fc3 100644 --- a/shared/test/common/cmd_parse/hw_parse.inl +++ b/shared/test/common/cmd_parse/hw_parse.inl @@ -115,7 +115,7 @@ const void *HardwareParse::getStatelessArgumentPointer(const Kernel &kernel, uin offsetCrossThreadData); // Determine where the argument is - auto &patchInfo = kernel.getKernelInfo(rootDeviceIndex).patchInfo; + auto &patchInfo = kernel.getKernelInfo().patchInfo; for (auto &arg : patchInfo.statelessGlobalMemObjKernelArgs) { if (arg->ArgumentNumber == indexArg) { return ptrOffset(pCrossThreadData, arg->DataParamOffset);